aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,343 @@
1
+ """
2
+ Entity Linker
3
+
4
+ Links newly extracted entities to existing entities in the knowledge graph.
5
+ """
6
+
7
+ from typing import List, Optional
8
+ from aiecs.domain.knowledge_graph.models.entity import Entity
9
+ from aiecs.infrastructure.graph_storage.base import GraphStore
10
+
11
+
12
+ class EntityLinker:
13
+ """
14
+ Link new entities to existing entities in the graph
15
+
16
+ When extracting entities from new documents, many entities may already exist
17
+ in the knowledge graph. This class identifies such matches and links them,
18
+ preventing duplication across the entire graph.
19
+
20
+ Features:
21
+ - Exact ID matching
22
+ - Name-based fuzzy matching
23
+ - Embedding-based similarity search
24
+ - Type-aware linking
25
+ - Confidence scoring
26
+
27
+ Workflow:
28
+ 1. For each new entity, search graph for similar existing entities
29
+ 2. If match found, return existing entity ID (link)
30
+ 3. If no match, entity is new and should be added
31
+
32
+ Example:
33
+ ```python
34
+ linker = EntityLinker(graph_store, similarity_threshold=0.85)
35
+
36
+ new_entity = Entity(type="Person", properties={"name": "Alice Smith"})
37
+
38
+ # Check if Alice already exists
39
+ link_result = await linker.link_entity(new_entity)
40
+
41
+ if link_result.linked:
42
+ print(f"Linked to existing entity: {link_result.existing_entity.id}")
43
+ # Use existing entity instead of creating new one
44
+ else:
45
+ print("New entity - add to graph")
46
+ # Add new_entity to graph
47
+ ```
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ graph_store: GraphStore,
53
+ similarity_threshold: float = 0.85,
54
+ use_embeddings: bool = True,
55
+ embedding_threshold: float = 0.90,
56
+ ):
57
+ """
58
+ Initialize entity linker
59
+
60
+ Args:
61
+ graph_store: Graph storage to search for existing entities
62
+ similarity_threshold: Minimum similarity to link entities (0.0-1.0)
63
+ use_embeddings: Use embedding similarity for matching
64
+ embedding_threshold: Minimum embedding similarity for linking (0.0-1.0)
65
+ """
66
+ self.graph_store = graph_store
67
+ self.similarity_threshold = similarity_threshold
68
+ self.use_embeddings = use_embeddings
69
+ self.embedding_threshold = embedding_threshold
70
+
71
+ async def link_entity(self, new_entity: Entity, candidate_limit: int = 10) -> "LinkResult":
72
+ """
73
+ Link a new entity to existing entity in graph (if match found)
74
+
75
+ Args:
76
+ new_entity: Entity to link
77
+ candidate_limit: Maximum number of candidates to consider
78
+
79
+ Returns:
80
+ LinkResult with linking decision and matched entity (if any)
81
+ """
82
+ # Try exact ID match first
83
+ existing = await self.graph_store.get_entity(new_entity.id)
84
+ if existing:
85
+ return LinkResult(
86
+ linked=True,
87
+ existing_entity=existing,
88
+ new_entity=new_entity,
89
+ similarity=1.0,
90
+ link_type="exact_id",
91
+ )
92
+
93
+ # Try embedding-based search (fast, semantic)
94
+ if self.use_embeddings and new_entity.embedding:
95
+ link_result = await self._link_by_embedding(new_entity, candidate_limit)
96
+ if link_result.linked:
97
+ return link_result
98
+
99
+ # Try name-based search (fallback)
100
+ link_result = await self._link_by_name(new_entity, candidate_limit)
101
+
102
+ return link_result
103
+
104
+ async def link_entities(
105
+ self, new_entities: List[Entity], candidate_limit: int = 10
106
+ ) -> List["LinkResult"]:
107
+ """
108
+ Link multiple entities in batch
109
+
110
+ Args:
111
+ new_entities: List of entities to link
112
+ candidate_limit: Maximum candidates per entity
113
+
114
+ Returns:
115
+ List of LinkResult objects (one per input entity)
116
+ """
117
+ results = []
118
+ for entity in new_entities:
119
+ result = await self.link_entity(entity, candidate_limit)
120
+ results.append(result)
121
+ return results
122
+
123
+ async def _link_by_embedding(self, new_entity: Entity, candidate_limit: int) -> "LinkResult":
124
+ """
125
+ Link entity using embedding similarity search
126
+
127
+ Args:
128
+ new_entity: Entity to link
129
+ candidate_limit: Maximum candidates to consider
130
+
131
+ Returns:
132
+ LinkResult
133
+ """
134
+ if not new_entity.embedding:
135
+ return LinkResult(linked=False, new_entity=new_entity)
136
+
137
+ try:
138
+ # Vector search in graph
139
+ candidates = await self.graph_store.vector_search(
140
+ query_embedding=new_entity.embedding,
141
+ entity_type=new_entity.entity_type,
142
+ max_results=candidate_limit,
143
+ score_threshold=self.embedding_threshold,
144
+ )
145
+
146
+ if not candidates:
147
+ return LinkResult(linked=False, new_entity=new_entity)
148
+
149
+ # Get best candidate
150
+ best_entity, best_score = candidates[0]
151
+
152
+ # Check if score meets threshold
153
+ if best_score >= self.embedding_threshold:
154
+ # Also verify name similarity (sanity check)
155
+ name_match = self._check_name_similarity(new_entity, best_entity)
156
+
157
+ if name_match or best_score >= 0.95: # High embedding score = trust it
158
+ return LinkResult(
159
+ linked=True,
160
+ existing_entity=best_entity,
161
+ new_entity=new_entity,
162
+ similarity=best_score,
163
+ link_type="embedding",
164
+ )
165
+
166
+ except NotImplementedError:
167
+ # Graph store doesn't support vector search
168
+ pass
169
+ except Exception as e:
170
+ # Log error but don't fail
171
+ print(f"Warning: Embedding search failed: {e}")
172
+
173
+ return LinkResult(linked=False, new_entity=new_entity)
174
+
175
+ async def _link_by_name(self, new_entity: Entity, candidate_limit: int) -> "LinkResult":
176
+ """
177
+ Link entity using name-based matching
178
+
179
+ This is slower than embedding search but works without embeddings.
180
+
181
+ Strategy:
182
+ 1. Get all entities of same type (if feasible)
183
+ 2. Compare names using fuzzy matching
184
+ 3. Return best match if above threshold
185
+
186
+ Args:
187
+ new_entity: Entity to link
188
+ candidate_limit: Maximum candidates to consider
189
+
190
+ Returns:
191
+ LinkResult
192
+ """
193
+ new_name = self._get_entity_name(new_entity)
194
+ if not new_name:
195
+ return LinkResult(linked=False, new_entity=new_entity)
196
+
197
+ try:
198
+ # Get candidate entities of same type
199
+ # Note: This is a simplified implementation
200
+ # In production, you'd want an indexed search or LIKE query
201
+ candidates = await self._get_candidate_entities(new_entity.entity_type, candidate_limit)
202
+
203
+ if not candidates:
204
+ return LinkResult(linked=False, new_entity=new_entity)
205
+
206
+ # Find best match
207
+ best_match = None
208
+ best_score = 0.0
209
+
210
+ for candidate in candidates:
211
+ candidate_name = self._get_entity_name(candidate)
212
+ if candidate_name:
213
+ score = self._name_similarity(new_name, candidate_name)
214
+ if score > best_score:
215
+ best_score = score
216
+ best_match = candidate
217
+
218
+ # Check threshold
219
+ if best_score >= self.similarity_threshold and best_match:
220
+ return LinkResult(
221
+ linked=True,
222
+ existing_entity=best_match,
223
+ new_entity=new_entity,
224
+ similarity=best_score,
225
+ link_type="name",
226
+ )
227
+
228
+ except Exception as e:
229
+ print(f"Warning: Name-based linking failed: {e}")
230
+
231
+ return LinkResult(linked=False, new_entity=new_entity)
232
+
233
+ async def _get_candidate_entities(self, entity_type: str, limit: int) -> List[Entity]:
234
+ """
235
+ Get candidate entities for linking
236
+
237
+ This is a placeholder - in production, you'd want:
238
+ - Indexed search by entity type
239
+ - LIKE queries for name matching
240
+ - Pagination for large result sets
241
+
242
+ Args:
243
+ entity_type: Entity type to filter by
244
+ limit: Maximum candidates
245
+
246
+ Returns:
247
+ List of candidate entities
248
+ """
249
+ # TODO: Implement efficient candidate retrieval
250
+ # For now, return empty list (will rely on embedding search primarily)
251
+ # In Phase 3 (SQLite) and Phase 6 (PostgreSQL), we'll implement
252
+ # efficient queries for this
253
+ return []
254
+
255
+ def _check_name_similarity(self, entity1: Entity, entity2: Entity) -> bool:
256
+ """
257
+ Quick name similarity check
258
+
259
+ Args:
260
+ entity1: First entity
261
+ entity2: Second entity
262
+
263
+ Returns:
264
+ True if names are similar enough
265
+ """
266
+ name1 = self._get_entity_name(entity1)
267
+ name2 = self._get_entity_name(entity2)
268
+
269
+ if not name1 or not name2:
270
+ return False
271
+
272
+ return self._name_similarity(name1, name2) >= self.similarity_threshold
273
+
274
+ def _get_entity_name(self, entity: Entity) -> str:
275
+ """Extract entity name from properties"""
276
+ return (
277
+ entity.properties.get("name")
278
+ or entity.properties.get("title")
279
+ or entity.properties.get("text")
280
+ or ""
281
+ )
282
+
283
+ def _name_similarity(self, name1: str, name2: str) -> float:
284
+ """
285
+ Compute name similarity using fuzzy matching
286
+
287
+ Args:
288
+ name1: First name
289
+ name2: Second name
290
+
291
+ Returns:
292
+ Similarity score (0.0-1.0)
293
+ """
294
+ from difflib import SequenceMatcher
295
+
296
+ # Normalize
297
+ n1 = name1.lower().strip()
298
+ n2 = name2.lower().strip()
299
+
300
+ # Exact match
301
+ if n1 == n2:
302
+ return 1.0
303
+
304
+ # Substring match
305
+ if n1 in n2 or n2 in n1:
306
+ return 0.95
307
+
308
+ # Fuzzy match
309
+ return SequenceMatcher(None, n1, n2).ratio()
310
+
311
+
312
+ class LinkResult:
313
+ """
314
+ Result of entity linking operation
315
+
316
+ Attributes:
317
+ linked: Whether a link was found
318
+ existing_entity: The existing entity (if linked)
319
+ new_entity: The new entity being linked
320
+ similarity: Similarity score (0.0-1.0)
321
+ link_type: Type of link ("exact_id", "embedding", "name", "none")
322
+ """
323
+
324
+ def __init__(
325
+ self,
326
+ linked: bool,
327
+ new_entity: Entity,
328
+ existing_entity: Optional[Entity] = None,
329
+ similarity: float = 0.0,
330
+ link_type: str = "none",
331
+ ):
332
+ self.linked = linked
333
+ self.existing_entity = existing_entity
334
+ self.new_entity = new_entity
335
+ self.similarity = similarity
336
+ self.link_type = link_type
337
+
338
+ def __repr__(self) -> str:
339
+ if self.linked:
340
+ return (
341
+ f"LinkResult(linked=True, type={self.link_type}, similarity={self.similarity:.2f})"
342
+ )
343
+ return "LinkResult(linked=False)"