aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,596 @@
1
+ """
2
+ Advanced Retrieval Strategies
3
+
4
+ Provides sophisticated retrieval methods including Personalized PageRank,
5
+ multi-hop neighbor retrieval, filtered retrieval, and query caching.
6
+ """
7
+
8
+ import asyncio
9
+ from typing import List, Dict, Set, Optional, Tuple, Any, Callable
10
+ from collections import defaultdict, deque
11
+ import hashlib
12
+ import json
13
+ import time
14
+ from aiecs.domain.knowledge_graph.models.entity import Entity
15
+ from aiecs.infrastructure.graph_storage.base import GraphStore
16
+
17
+
18
+ class PersonalizedPageRank:
19
+ """
20
+ Personalized PageRank Retrieval
21
+
22
+ Computes importance scores for entities in the graph based on
23
+ a random walk with restart from seed entities. Useful for finding
24
+ entities most relevant to a given starting point.
25
+
26
+ Algorithm:
27
+ 1. Start from seed entities
28
+ 2. Random walk with probability alpha to restart at seeds
29
+ 3. Iterate until convergence
30
+ 4. Return entities ranked by visit frequency
31
+
32
+ Example:
33
+ ```python
34
+ ppr = PersonalizedPageRank(graph_store)
35
+
36
+ results = await ppr.retrieve(
37
+ seed_entity_ids=["person_1"],
38
+ max_results=10,
39
+ alpha=0.15, # restart probability
40
+ max_iterations=100
41
+ )
42
+
43
+ for entity, score in results:
44
+ print(f"{entity.id}: {score:.4f}")
45
+ ```
46
+ """
47
+
48
+ def __init__(self, graph_store: GraphStore):
49
+ """
50
+ Initialize Personalized PageRank retrieval
51
+
52
+ Args:
53
+ graph_store: Graph storage backend
54
+ """
55
+ self.graph_store = graph_store
56
+
57
+ async def retrieve(
58
+ self,
59
+ seed_entity_ids: List[str],
60
+ max_results: int = 20,
61
+ alpha: float = 0.15,
62
+ max_iterations: int = 100,
63
+ convergence_threshold: float = 1e-6,
64
+ ) -> List[Tuple[Entity, float]]:
65
+ """
66
+ Retrieve entities using Personalized PageRank
67
+
68
+ Args:
69
+ seed_entity_ids: Starting entities for random walk
70
+ max_results: Maximum number of results to return
71
+ alpha: Restart probability (0.0-1.0)
72
+ max_iterations: Maximum number of iterations
73
+ convergence_threshold: Convergence threshold for scores
74
+
75
+ Returns:
76
+ List of (entity, score) tuples sorted by score descending
77
+ """
78
+ if not seed_entity_ids:
79
+ return []
80
+
81
+ # Initialize scores
82
+ scores: Dict[str, float] = defaultdict(float)
83
+ set(seed_entity_ids)
84
+
85
+ # Initialize seed scores uniformly
86
+ initial_score = 1.0 / len(seed_entity_ids)
87
+ for seed_id in seed_entity_ids:
88
+ scores[seed_id] = initial_score
89
+
90
+ # Build adjacency information (cache neighbors)
91
+ adjacency: Dict[str, List[str]] = {}
92
+
93
+ # Iterative PageRank computation
94
+ for iteration in range(max_iterations):
95
+ new_scores: Dict[str, float] = defaultdict(float)
96
+
97
+ # Restart probability: distribute to seeds
98
+ for seed_id in seed_entity_ids:
99
+ new_scores[seed_id] += alpha * initial_score
100
+
101
+ # Random walk probability: distribute from current nodes
102
+ max_delta = 0.0
103
+
104
+ for entity_id, score in scores.items():
105
+ if score == 0:
106
+ continue
107
+
108
+ # Get neighbors (cache for efficiency)
109
+ if entity_id not in adjacency:
110
+ neighbors = await self.graph_store.get_neighbors(
111
+ entity_id, direction="outgoing"
112
+ )
113
+ adjacency[entity_id] = [n.id for n in neighbors]
114
+
115
+ neighbor_ids = adjacency[entity_id]
116
+
117
+ if neighbor_ids:
118
+ # Distribute score to neighbors
119
+ distribute_score = (1 - alpha) * score / len(neighbor_ids)
120
+ for neighbor_id in neighbor_ids:
121
+ new_scores[neighbor_id] += distribute_score
122
+ else:
123
+ # No outgoing edges, restart at seeds
124
+ for seed_id in seed_entity_ids:
125
+ new_scores[seed_id] += (1 - alpha) * score * initial_score
126
+
127
+ # Check convergence
128
+ for entity_id in set(scores.keys()) | set(new_scores.keys()):
129
+ delta = abs(new_scores[entity_id] - scores[entity_id])
130
+ max_delta = max(max_delta, delta)
131
+
132
+ scores = new_scores
133
+
134
+ if max_delta < convergence_threshold:
135
+ break
136
+
137
+ # Retrieve entities and create results
138
+ results = []
139
+ for entity_id, score in scores.items():
140
+ if score > 0:
141
+ entity = await self.graph_store.get_entity(entity_id)
142
+ if entity:
143
+ results.append((entity, score))
144
+
145
+ # Sort by score descending
146
+ results.sort(key=lambda x: x[1], reverse=True)
147
+
148
+ return results[:max_results]
149
+
150
+
151
+ class MultiHopRetrieval:
152
+ """
153
+ Multi-Hop Neighbor Retrieval
154
+
155
+ Retrieves entities within N hops from seed entities, with configurable
156
+ aggregation and scoring strategies.
157
+
158
+ Features:
159
+ - Breadth-first expansion from seeds
160
+ - Hop-distance based scoring
161
+ - Relation type filtering
162
+ - Entity deduplication
163
+
164
+ Example:
165
+ ```python
166
+ retrieval = MultiHopRetrieval(graph_store)
167
+
168
+ results = await retrieval.retrieve(
169
+ seed_entity_ids=["entity_1"],
170
+ max_hops=2,
171
+ max_results=20,
172
+ relation_types=["RELATED_TO"] # Optional filter
173
+ )
174
+ ```
175
+ """
176
+
177
+ def __init__(self, graph_store: GraphStore):
178
+ """
179
+ Initialize multi-hop retrieval
180
+
181
+ Args:
182
+ graph_store: Graph storage backend
183
+ """
184
+ self.graph_store = graph_store
185
+
186
+ async def retrieve(
187
+ self,
188
+ seed_entity_ids: List[str],
189
+ max_hops: int = 2,
190
+ max_results: int = 50,
191
+ relation_types: Optional[List[str]] = None,
192
+ score_decay: float = 0.5,
193
+ include_seeds: bool = True,
194
+ ) -> List[Tuple[Entity, float]]:
195
+ """
196
+ Retrieve entities within N hops from seeds
197
+
198
+ Args:
199
+ seed_entity_ids: Starting entities
200
+ max_hops: Maximum number of hops
201
+ max_results: Maximum number of results
202
+ relation_types: Optional list of allowed relation types
203
+ score_decay: Score decay factor per hop (0.0-1.0)
204
+ include_seeds: Whether to include seed entities in results
205
+
206
+ Returns:
207
+ List of (entity, score) tuples
208
+ """
209
+ if not seed_entity_ids:
210
+ return []
211
+
212
+ # Track visited entities and their scores
213
+ entity_scores: Dict[str, float] = {}
214
+ visited: Set[str] = set()
215
+
216
+ # BFS expansion
217
+ current_level = set(seed_entity_ids)
218
+
219
+ for hop in range(max_hops + 1):
220
+ if not current_level:
221
+ break
222
+
223
+ next_level: Set[str] = set()
224
+
225
+ # Score for this hop level
226
+ hop_score = score_decay**hop
227
+
228
+ for entity_id in current_level:
229
+ if entity_id in visited:
230
+ continue
231
+
232
+ visited.add(entity_id)
233
+
234
+ # Update score (take max if entity reached via multiple paths)
235
+ if entity_id not in entity_scores:
236
+ entity_scores[entity_id] = hop_score
237
+ else:
238
+ entity_scores[entity_id] = max(entity_scores[entity_id], hop_score)
239
+
240
+ # Get neighbors for next level
241
+ if hop < max_hops:
242
+ neighbors = await self.graph_store.get_neighbors(
243
+ entity_id, relation_type=None, direction="outgoing"
244
+ )
245
+
246
+ for neighbor in neighbors:
247
+ if neighbor.id not in visited:
248
+ # Apply relation type filter if specified
249
+ if relation_types is None:
250
+ next_level.add(neighbor.id)
251
+ else:
252
+ # Check if any relation matches the filter
253
+ # (simplified - assumes we have the relation info)
254
+ next_level.add(neighbor.id)
255
+
256
+ current_level = next_level
257
+
258
+ # Filter out seeds if requested
259
+ if not include_seeds:
260
+ for seed_id in seed_entity_ids:
261
+ entity_scores.pop(seed_id, None)
262
+
263
+ # Retrieve entities and create results
264
+ results = []
265
+ for entity_id, score in entity_scores.items():
266
+ entity = await self.graph_store.get_entity(entity_id)
267
+ if entity:
268
+ results.append((entity, score))
269
+
270
+ # Sort by score descending
271
+ results.sort(key=lambda x: x[1], reverse=True)
272
+
273
+ return results[:max_results]
274
+
275
+
276
+ class FilteredRetrieval:
277
+ """
278
+ Filtered Retrieval Strategy
279
+
280
+ Retrieves entities from the graph with flexible filtering by:
281
+ - Entity type
282
+ - Property values
283
+ - Property existence
284
+ - Custom filter functions
285
+
286
+ Example:
287
+ ```python
288
+ retrieval = FilteredRetrieval(graph_store)
289
+
290
+ # Filter by entity type and property
291
+ results = await retrieval.retrieve(
292
+ entity_type="Person",
293
+ property_filters={"role": "Engineer"},
294
+ max_results=20
295
+ )
296
+
297
+ # Custom filter function
298
+ results = await retrieval.retrieve(
299
+ filter_fn=lambda e: e.properties.get("age", 0) > 30,
300
+ max_results=10
301
+ )
302
+ ```
303
+ """
304
+
305
+ def __init__(self, graph_store: GraphStore):
306
+ """
307
+ Initialize filtered retrieval
308
+
309
+ Args:
310
+ graph_store: Graph storage backend
311
+ """
312
+ self.graph_store = graph_store
313
+
314
+ async def retrieve(
315
+ self,
316
+ entity_type: Optional[str] = None,
317
+ property_filters: Optional[Dict[str, Any]] = None,
318
+ property_exists: Optional[List[str]] = None,
319
+ filter_fn: Optional[Callable[[Entity], bool]] = None,
320
+ max_results: int = 100,
321
+ score_by_match_count: bool = False,
322
+ ) -> List[Tuple[Entity, float]]:
323
+ """
324
+ Retrieve entities with flexible filtering
325
+
326
+ Args:
327
+ entity_type: Filter by entity type
328
+ property_filters: Filter by property values (key: value)
329
+ property_exists: Filter by property existence (list of keys)
330
+ filter_fn: Custom filter function
331
+ max_results: Maximum number of results
332
+ score_by_match_count: Score by number of matching criteria
333
+
334
+ Returns:
335
+ List of (entity, score) tuples
336
+ """
337
+ # Get all entities (or filtered by type if using vector search)
338
+ # Note: This is a simplified implementation
339
+ # In production, we'd want more efficient filtering at storage level
340
+
341
+ results = []
342
+
343
+ # For now, we'll use vector search with no threshold to get entities
344
+ # This is a workaround - ideally we'd have a direct entity scan method
345
+ if entity_type:
346
+ # Try vector search with entity type filter
347
+ dummy_embedding = [0.0] * 128 # Placeholder
348
+ candidates = await self.graph_store.vector_search(
349
+ query_embedding=dummy_embedding,
350
+ entity_type=entity_type,
351
+ max_results=1000,
352
+ score_threshold=0.0,
353
+ )
354
+ candidate_entities = [entity for entity, _ in candidates]
355
+ else:
356
+ # Without entity type filter, we can't efficiently get all entities
357
+ # This is a limitation of the current GraphStore interface
358
+ # Return empty results for now
359
+ candidate_entities = []
360
+
361
+ # Apply filters
362
+ for entity in candidate_entities:
363
+ match_count = 0
364
+ total_criteria = 0
365
+
366
+ # Entity type filter (already applied above)
367
+ if entity_type:
368
+ total_criteria += 1
369
+ if entity.entity_type == entity_type:
370
+ match_count += 1
371
+ else:
372
+ continue
373
+
374
+ # Property value filters
375
+ if property_filters:
376
+ total_criteria += len(property_filters)
377
+ for key, expected_value in property_filters.items():
378
+ if entity.properties.get(key) == expected_value:
379
+ match_count += 1
380
+ else:
381
+ # Strict matching - entity must match all filters
382
+ match_count = 0
383
+ break
384
+
385
+ if match_count == 0 and property_filters:
386
+ continue
387
+
388
+ # Property existence filters
389
+ if property_exists:
390
+ total_criteria += len(property_exists)
391
+ for key in property_exists:
392
+ if key in entity.properties:
393
+ match_count += 1
394
+ else:
395
+ match_count = 0
396
+ break
397
+
398
+ if match_count == 0 and property_exists:
399
+ continue
400
+
401
+ # Custom filter function
402
+ if filter_fn:
403
+ total_criteria += 1
404
+ try:
405
+ if filter_fn(entity):
406
+ match_count += 1
407
+ else:
408
+ continue
409
+ except Exception:
410
+ continue
411
+
412
+ # Calculate score
413
+ if score_by_match_count and total_criteria > 0:
414
+ score = match_count / total_criteria
415
+ else:
416
+ score = 1.0
417
+
418
+ results.append((entity, score))
419
+
420
+ if len(results) >= max_results:
421
+ break
422
+
423
+ # Sort by score descending
424
+ results.sort(key=lambda x: x[1], reverse=True)
425
+
426
+ return results[:max_results]
427
+
428
+
429
+ class RetrievalCache:
430
+ """
431
+ Query Caching for Retrieval
432
+
433
+ Caches retrieval results to improve performance for frequent queries.
434
+ Uses LRU eviction policy and TTL-based expiration.
435
+
436
+ Features:
437
+ - LRU cache with configurable size
438
+ - TTL-based expiration
439
+ - Query fingerprinting
440
+ - Cache statistics
441
+
442
+ Example:
443
+ ```python
444
+ cache = RetrievalCache(max_size=100, ttl=300) # 5 minutes TTL
445
+
446
+ # Wrap retrieval operation
447
+ results = await cache.get_or_compute(
448
+ cache_key="query_1",
449
+ compute_fn=lambda: retrieval.retrieve(...)
450
+ )
451
+
452
+ # Check cache statistics
453
+ stats = cache.get_stats()
454
+ print(f"Hit rate: {stats['hit_rate']:.2%}")
455
+ ```
456
+ """
457
+
458
+ def __init__(self, max_size: int = 1000, ttl: int = 300):
459
+ """
460
+ Initialize retrieval cache
461
+
462
+ Args:
463
+ max_size: Maximum number of cached entries
464
+ ttl: Time-to-live for cache entries in seconds
465
+ """
466
+ self.max_size = max_size
467
+ self.ttl = ttl
468
+ # key -> (value, timestamp)
469
+ self._cache: Dict[str, Tuple[Any, float]] = {}
470
+ self._access_order: deque = deque() # LRU tracking
471
+ self._hits = 0
472
+ self._misses = 0
473
+
474
+ def _generate_key(self, **kwargs) -> str:
475
+ """
476
+ Generate cache key from query parameters
477
+
478
+ Args:
479
+ **kwargs: Query parameters
480
+
481
+ Returns:
482
+ Cache key string
483
+ """
484
+ # Sort keys for consistent hashing
485
+ sorted_items = sorted(kwargs.items())
486
+ key_str = json.dumps(sorted_items, sort_keys=True)
487
+ return hashlib.md5(key_str.encode()).hexdigest()
488
+
489
+ def _is_expired(self, timestamp: float) -> bool:
490
+ """Check if cache entry is expired"""
491
+ return (time.time() - timestamp) > self.ttl
492
+
493
+ def _evict_lru(self):
494
+ """Evict least recently used entry"""
495
+ if self._access_order:
496
+ lru_key = self._access_order.popleft()
497
+ self._cache.pop(lru_key, None)
498
+
499
+ async def get_or_compute(
500
+ self,
501
+ cache_key: Optional[str] = None,
502
+ compute_fn: Optional[Callable] = None,
503
+ **kwargs,
504
+ ) -> Any:
505
+ """
506
+ Get cached result or compute and cache
507
+
508
+ Args:
509
+ cache_key: Optional explicit cache key
510
+ compute_fn: Async function to compute result if cache miss
511
+ **kwargs: Parameters for cache key generation
512
+
513
+ Returns:
514
+ Cached or computed result
515
+ """
516
+ # Generate cache key
517
+ if cache_key is None:
518
+ cache_key = self._generate_key(**kwargs)
519
+
520
+ # Check cache
521
+ if cache_key in self._cache:
522
+ result, timestamp = self._cache[cache_key]
523
+
524
+ # Check expiration
525
+ if not self._is_expired(timestamp):
526
+ # Cache hit
527
+ self._hits += 1
528
+
529
+ # Update LRU order
530
+ if cache_key in self._access_order:
531
+ self._access_order.remove(cache_key)
532
+ self._access_order.append(cache_key)
533
+
534
+ return result
535
+ else:
536
+ # Expired, remove
537
+ del self._cache[cache_key]
538
+ if cache_key in self._access_order:
539
+ self._access_order.remove(cache_key)
540
+
541
+ # Cache miss
542
+ self._misses += 1
543
+
544
+ # Compute result
545
+ if compute_fn is None:
546
+ return None
547
+
548
+ if asyncio.iscoroutinefunction(compute_fn):
549
+ result = await compute_fn()
550
+ else:
551
+ result = compute_fn()
552
+
553
+ # Store in cache
554
+ self._cache[cache_key] = (result, time.time())
555
+ self._access_order.append(cache_key)
556
+
557
+ # Evict if over size limit
558
+ while len(self._cache) > self.max_size:
559
+ self._evict_lru()
560
+
561
+ return result
562
+
563
+ def invalidate(self, cache_key: str):
564
+ """Invalidate a specific cache entry"""
565
+ if cache_key in self._cache:
566
+ del self._cache[cache_key]
567
+ if cache_key in self._access_order:
568
+ self._access_order.remove(cache_key)
569
+
570
+ def clear(self):
571
+ """Clear all cache entries"""
572
+ self._cache.clear()
573
+ self._access_order.clear()
574
+
575
+ def get_stats(self) -> Dict[str, Any]:
576
+ """
577
+ Get cache statistics
578
+
579
+ Returns:
580
+ Dictionary with cache statistics
581
+ """
582
+ total_requests = self._hits + self._misses
583
+ hit_rate = self._hits / total_requests if total_requests > 0 else 0.0
584
+
585
+ return {
586
+ "hits": self._hits,
587
+ "misses": self._misses,
588
+ "total_requests": total_requests,
589
+ "hit_rate": hit_rate,
590
+ "cache_size": len(self._cache),
591
+ "max_size": self.max_size,
592
+ "ttl": self.ttl,
593
+ }
594
+
595
+
596
+ # Import asyncio for async checks
@@ -0,0 +1,59 @@
1
+ """
2
+ Knowledge Graph Search Application Layer
3
+
4
+ Advanced search strategies including hybrid search and text similarity utilities.
5
+ """
6
+
7
+ from aiecs.application.knowledge_graph.search.hybrid_search import (
8
+ HybridSearchStrategy,
9
+ HybridSearchConfig,
10
+ SearchMode,
11
+ )
12
+ from aiecs.application.knowledge_graph.search.text_similarity import (
13
+ TextSimilarity,
14
+ BM25Scorer,
15
+ jaccard_similarity,
16
+ jaccard_similarity_text,
17
+ cosine_similarity_text,
18
+ levenshtein_distance,
19
+ normalized_levenshtein_similarity,
20
+ fuzzy_match,
21
+ )
22
+ from aiecs.application.knowledge_graph.search.reranker import (
23
+ RerankerStrategy,
24
+ ResultReranker,
25
+ ScoreCombinationMethod,
26
+ normalize_scores,
27
+ combine_scores,
28
+ )
29
+ from aiecs.application.knowledge_graph.search.reranker_strategies import (
30
+ TextSimilarityReranker,
31
+ SemanticReranker,
32
+ StructuralReranker,
33
+ HybridReranker,
34
+ CrossEncoderReranker,
35
+ )
36
+
37
+ __all__ = [
38
+ "HybridSearchStrategy",
39
+ "HybridSearchConfig",
40
+ "SearchMode",
41
+ "TextSimilarity",
42
+ "BM25Scorer",
43
+ "jaccard_similarity",
44
+ "jaccard_similarity_text",
45
+ "cosine_similarity_text",
46
+ "levenshtein_distance",
47
+ "normalized_levenshtein_similarity",
48
+ "fuzzy_match",
49
+ "RerankerStrategy",
50
+ "ResultReranker",
51
+ "ScoreCombinationMethod",
52
+ "normalize_scores",
53
+ "combine_scores",
54
+ "TextSimilarityReranker",
55
+ "SemanticReranker",
56
+ "StructuralReranker",
57
+ "HybridReranker",
58
+ "CrossEncoderReranker",
59
+ ]