mcp-sqlite-memory-bank 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_sqlite_memory_bank/__main__.py +59 -0
- mcp_sqlite_memory_bank/server.py +157 -1
- mcp_sqlite_memory_bank/tools/__init__.py +11 -0
- mcp_sqlite_memory_bank/tools/discovery.py +1176 -0
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.5.1.dist-info}/METADATA +1 -1
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.5.1.dist-info}/RECORD +10 -8
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.5.1.dist-info}/WHEEL +0 -0
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.5.1.dist-info}/entry_points.txt +0 -0
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.5.1.dist-info}/licenses/LICENSE +0 -0
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1176 @@
|
|
1
|
+
"""
|
2
|
+
Discovery and Exploration Tools for SQLite Memory Bank
|
3
|
+
=====================================================
|
4
|
+
|
5
|
+
This module contains advanced discovery tools that help LLM agents intelligently
|
6
|
+
explore and understand memory bank content through guided workflows and orchestrated
|
7
|
+
discovery processes.
|
8
|
+
|
9
|
+
Author: Robert Meisner
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
from typing import Any, Dict, List, Optional, cast, Union
|
14
|
+
from datetime import datetime
|
15
|
+
|
16
|
+
from ..database import get_database
|
17
|
+
from ..semantic import is_semantic_search_available
|
18
|
+
from ..types import ToolResponse
|
19
|
+
from ..utils import catch_errors
|
20
|
+
|
21
|
+
|
22
|
+
@catch_errors
|
23
|
+
def intelligent_discovery(
|
24
|
+
discovery_goal: str = "understand_content",
|
25
|
+
focus_area: Optional[str] = None,
|
26
|
+
depth: str = "moderate",
|
27
|
+
agent_id: Optional[str] = None,
|
28
|
+
) -> ToolResponse:
|
29
|
+
"""
|
30
|
+
🧠 **INTELLIGENT DISCOVERY** - AI-guided exploration of your memory bank!
|
31
|
+
|
32
|
+
Orchestrates multiple discovery tools based on your exploration goals.
|
33
|
+
Provides step-by-step guidance and actionable insights tailored to your needs.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
discovery_goal (str): What you want to achieve
|
37
|
+
- "understand_content": Learn what data is available and how it's organized
|
38
|
+
- "find_patterns": Discover themes, relationships, and content patterns
|
39
|
+
- "explore_structure": Understand database schema and organization
|
40
|
+
- "assess_quality": Evaluate content quality and completeness
|
41
|
+
- "prepare_search": Get ready for effective content searching
|
42
|
+
focus_area (Optional[str]): Specific table or topic to focus on (default: all)
|
43
|
+
depth (str): How thorough the discovery should be
|
44
|
+
- "quick": Fast overview with key insights
|
45
|
+
- "moderate": Balanced analysis with actionable recommendations
|
46
|
+
- "comprehensive": Deep dive with detailed analysis
|
47
|
+
agent_id (Optional[str]): Agent identifier for learning discovery patterns
|
48
|
+
|
49
|
+
Returns:
|
50
|
+
ToolResponse: On success: {"success": True, "discovery": Dict, "next_steps": List}
|
51
|
+
On error: {"success": False, "error": str, "category": str, "details": dict}
|
52
|
+
|
53
|
+
Examples:
|
54
|
+
>>> intelligent_discovery("understand_content")
|
55
|
+
{"success": True, "discovery": {
|
56
|
+
"overview": {"total_tables": 5, "total_rows": 234},
|
57
|
+
"content_summary": {...},
|
58
|
+
"recommendations": [...]
|
59
|
+
}, "next_steps": ["Use auto_smart_search() for specific queries"]}
|
60
|
+
|
61
|
+
>>> intelligent_discovery("find_patterns", focus_area="technical_decisions")
|
62
|
+
{"success": True, "discovery": {
|
63
|
+
"patterns": {"decision_themes": [...], "temporal_trends": [...]},
|
64
|
+
"insights": [...]
|
65
|
+
}}
|
66
|
+
|
67
|
+
FastMCP Tool Info:
|
68
|
+
- **COMPLETELY AUTOMATED**: No manual tool chaining required
|
69
|
+
- **GOAL-ORIENTED**: Tailored discovery based on your specific objectives
|
70
|
+
- **ACTIONABLE INSIGHTS**: Always includes concrete next steps
|
71
|
+
- **LEARNING**: Improves recommendations based on usage patterns
|
72
|
+
- **PERFECT FOR AGENTS**: Single tool that orchestrates complex discovery workflows
|
73
|
+
"""
|
74
|
+
try:
|
75
|
+
from .. import server
|
76
|
+
db = get_database(server.DB_PATH)
|
77
|
+
|
78
|
+
# Initialize discovery session
|
79
|
+
discovery_session = {
|
80
|
+
"goal": discovery_goal,
|
81
|
+
"focus_area": focus_area,
|
82
|
+
"depth": depth,
|
83
|
+
"timestamp": datetime.now().isoformat(),
|
84
|
+
"agent_id": agent_id,
|
85
|
+
"steps_completed": [],
|
86
|
+
"insights": [],
|
87
|
+
"recommendations": []
|
88
|
+
}
|
89
|
+
|
90
|
+
# Step 1: Basic overview
|
91
|
+
discovery_session["steps_completed"].append("basic_overview")
|
92
|
+
tables_result = db.list_tables()
|
93
|
+
if not tables_result.get("success"):
|
94
|
+
return cast(ToolResponse, {
|
95
|
+
"success": False,
|
96
|
+
"error": "Failed to get basic overview",
|
97
|
+
"category": "DISCOVERY_ERROR",
|
98
|
+
"details": tables_result
|
99
|
+
})
|
100
|
+
|
101
|
+
tables = tables_result.get("tables", [])
|
102
|
+
overview = {
|
103
|
+
"total_tables": len(tables),
|
104
|
+
"available_tables": tables,
|
105
|
+
"semantic_search_available": is_semantic_search_available()
|
106
|
+
}
|
107
|
+
|
108
|
+
# Step 2: Content analysis based on goal
|
109
|
+
if discovery_goal in ["understand_content", "find_patterns", "assess_quality"]:
|
110
|
+
discovery_session["steps_completed"].append("content_analysis")
|
111
|
+
content_analysis = _analyze_content_for_discovery(db, tables, focus_area, depth)
|
112
|
+
overview.update(content_analysis)
|
113
|
+
|
114
|
+
# Step 3: Schema analysis for structure exploration
|
115
|
+
if discovery_goal in ["explore_structure", "understand_content"]:
|
116
|
+
discovery_session["steps_completed"].append("schema_analysis")
|
117
|
+
schema_analysis = _analyze_schema_for_discovery(db, tables, focus_area, depth)
|
118
|
+
overview.update(schema_analysis)
|
119
|
+
|
120
|
+
# Step 4: Quality assessment
|
121
|
+
if discovery_goal in ["assess_quality", "find_patterns"]:
|
122
|
+
discovery_session["steps_completed"].append("quality_assessment")
|
123
|
+
quality_analysis = _assess_content_quality(db, tables, focus_area, depth)
|
124
|
+
overview.update(quality_analysis)
|
125
|
+
|
126
|
+
# Step 5: Search readiness for search preparation
|
127
|
+
if discovery_goal in ["prepare_search", "understand_content"]:
|
128
|
+
discovery_session["steps_completed"].append("search_readiness")
|
129
|
+
search_analysis = _analyze_search_readiness(db, tables, focus_area)
|
130
|
+
overview.update(search_analysis)
|
131
|
+
|
132
|
+
# Step 6: Generate insights and recommendations
|
133
|
+
insights, recommendations, next_steps = _generate_discovery_insights(
|
134
|
+
discovery_goal, overview, focus_area, depth
|
135
|
+
)
|
136
|
+
|
137
|
+
discovery_session["insights"] = insights
|
138
|
+
discovery_session["recommendations"] = recommendations
|
139
|
+
|
140
|
+
# Step 7: Store discovery pattern for learning (if agent_id provided)
|
141
|
+
if agent_id:
|
142
|
+
_store_discovery_pattern(db, discovery_session)
|
143
|
+
|
144
|
+
return cast(ToolResponse, {
|
145
|
+
"success": True,
|
146
|
+
"discovery": {
|
147
|
+
"goal": discovery_goal,
|
148
|
+
"overview": overview,
|
149
|
+
"insights": insights,
|
150
|
+
"recommendations": recommendations,
|
151
|
+
"focus_area": focus_area,
|
152
|
+
"depth": depth,
|
153
|
+
"steps_completed": discovery_session["steps_completed"]
|
154
|
+
},
|
155
|
+
"next_steps": next_steps,
|
156
|
+
"discovery_session": discovery_session,
|
157
|
+
"quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area)
|
158
|
+
})
|
159
|
+
|
160
|
+
except Exception as e:
|
161
|
+
return cast(ToolResponse, {
|
162
|
+
"success": False,
|
163
|
+
"error": f"Intelligent discovery failed: {str(e)}",
|
164
|
+
"category": "DISCOVERY_ERROR",
|
165
|
+
"details": {
|
166
|
+
"goal": discovery_goal,
|
167
|
+
"focus_area": focus_area,
|
168
|
+
"depth": depth,
|
169
|
+
"agent_id": agent_id
|
170
|
+
}
|
171
|
+
})
|
172
|
+
|
173
|
+
|
174
|
+
@catch_errors
|
175
|
+
def discovery_templates(
|
176
|
+
template_type: str = "first_time_exploration",
|
177
|
+
customize_for: Optional[str] = None
|
178
|
+
) -> ToolResponse:
|
179
|
+
"""
|
180
|
+
📋 **DISCOVERY TEMPLATES** - Pre-built exploration workflows for common scenarios!
|
181
|
+
|
182
|
+
Provides step-by-step discovery templates optimized for specific agent use cases.
|
183
|
+
Each template includes the exact sequence of tools to call and what to look for.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
template_type (str): Type of discovery template to provide
|
187
|
+
- "first_time_exploration": Complete workflow for new agents
|
188
|
+
- "content_audit": Systematic content quality review
|
189
|
+
- "search_optimization": Prepare memory bank for optimal searching
|
190
|
+
- "relationship_mapping": Discover connections between data
|
191
|
+
- "problem_solving": Find information to solve specific problems
|
192
|
+
- "knowledge_extraction": Extract insights from stored knowledge
|
193
|
+
customize_for (Optional[str]): Customize template for specific domain/topic
|
194
|
+
|
195
|
+
Returns:
|
196
|
+
ToolResponse: {"success": True, "template": Dict, "workflow": List}
|
197
|
+
|
198
|
+
Examples:
|
199
|
+
>>> discovery_templates("first_time_exploration")
|
200
|
+
{"success": True, "template": {
|
201
|
+
"name": "First Time Exploration",
|
202
|
+
"description": "Complete discovery workflow for new agents",
|
203
|
+
"workflow": [
|
204
|
+
{"step": 1, "tool": "intelligent_discovery", "params": {...}},
|
205
|
+
{"step": 2, "tool": "explore_tables", "params": {...}}
|
206
|
+
]
|
207
|
+
}}
|
208
|
+
|
209
|
+
FastMCP Tool Info:
|
210
|
+
- **PROVEN WORKFLOWS**: Battle-tested discovery sequences
|
211
|
+
- **STEP-BY-STEP GUIDANCE**: Exact tools and parameters to use
|
212
|
+
- **CUSTOMIZABLE**: Adapt templates to your specific needs
|
213
|
+
- **LEARNING-OPTIMIZED**: Based on successful discovery patterns
|
214
|
+
"""
|
215
|
+
try:
|
216
|
+
templates = {
|
217
|
+
"first_time_exploration": {
|
218
|
+
"name": "First Time Exploration",
|
219
|
+
"description": "Complete discovery workflow for agents new to this memory bank",
|
220
|
+
"estimated_time": "2-3 minutes",
|
221
|
+
"workflow": [
|
222
|
+
{
|
223
|
+
"step": 1,
|
224
|
+
"action": "Get Overview",
|
225
|
+
"tool": "intelligent_discovery",
|
226
|
+
"params": {"discovery_goal": "understand_content", "depth": "moderate"},
|
227
|
+
"purpose": "Understand what data is available and how it's organized",
|
228
|
+
"look_for": ["total tables", "content types", "data volume"]
|
229
|
+
},
|
230
|
+
{
|
231
|
+
"step": 2,
|
232
|
+
"action": "Explore Structure",
|
233
|
+
"tool": "explore_tables",
|
234
|
+
"params": {"include_row_counts": True},
|
235
|
+
"purpose": "See detailed table schemas and sample data",
|
236
|
+
"look_for": ["column types", "sample content", "data relationships"]
|
237
|
+
},
|
238
|
+
{
|
239
|
+
"step": 3,
|
240
|
+
"action": "Test Search Capabilities",
|
241
|
+
"tool": "auto_smart_search",
|
242
|
+
"params": {"query": "recent important information", "limit": 5},
|
243
|
+
"purpose": "Understand search capabilities and content accessibility",
|
244
|
+
"look_for": ["search quality", "result relevance", "content types found"]
|
245
|
+
},
|
246
|
+
{
|
247
|
+
"step": 4,
|
248
|
+
"action": "Assess Quality",
|
249
|
+
"tool": "get_content_health_score",
|
250
|
+
"params": {},
|
251
|
+
"purpose": "Understand overall memory bank quality and opportunities",
|
252
|
+
"look_for": ["health score", "improvement recommendations", "strengths"]
|
253
|
+
}
|
254
|
+
],
|
255
|
+
"success_criteria": [
|
256
|
+
"Understand what types of information are stored",
|
257
|
+
"Know which tables contain the most valuable content",
|
258
|
+
"Identify best search strategies for this memory bank",
|
259
|
+
"Have actionable next steps for productive use"
|
260
|
+
]
|
261
|
+
},
|
262
|
+
|
263
|
+
"content_audit": {
|
264
|
+
"name": "Content Quality Audit",
|
265
|
+
"description": "Systematic review of content quality and completeness",
|
266
|
+
"estimated_time": "5-7 minutes",
|
267
|
+
"workflow": [
|
268
|
+
{
|
269
|
+
"step": 1,
|
270
|
+
"action": "Quality Assessment",
|
271
|
+
"tool": "get_content_health_score",
|
272
|
+
"params": {},
|
273
|
+
"purpose": "Get overall quality metrics and problem areas",
|
274
|
+
"look_for": ["quality scores", "problem tables", "recommendations"]
|
275
|
+
},
|
276
|
+
{
|
277
|
+
"step": 2,
|
278
|
+
"action": "Pattern Analysis",
|
279
|
+
"tool": "analyze_memory_patterns",
|
280
|
+
"params": {},
|
281
|
+
"purpose": "Identify content patterns and organizational issues",
|
282
|
+
"look_for": ["content distribution", "sparse tables", "organization gaps"]
|
283
|
+
},
|
284
|
+
{
|
285
|
+
"step": 3,
|
286
|
+
"action": "Table-by-Table Review",
|
287
|
+
"tool": "explore_tables",
|
288
|
+
"params": {"include_row_counts": True},
|
289
|
+
"purpose": "Detailed examination of each table's content",
|
290
|
+
"look_for": ["empty tables", "low-quality content", "missing data"]
|
291
|
+
},
|
292
|
+
{
|
293
|
+
"step": 4,
|
294
|
+
"action": "Search Readiness",
|
295
|
+
"tool": "intelligent_discovery",
|
296
|
+
"params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
|
297
|
+
"purpose": "Ensure content is optimally searchable",
|
298
|
+
"look_for": ["embedding coverage", "search optimization opportunities"]
|
299
|
+
}
|
300
|
+
],
|
301
|
+
"success_criteria": [
|
302
|
+
"Identify all content quality issues",
|
303
|
+
"Have specific recommendations for improvement",
|
304
|
+
"Understand which content areas need attention",
|
305
|
+
"Know how to optimize for better searchability"
|
306
|
+
]
|
307
|
+
},
|
308
|
+
|
309
|
+
"search_optimization": {
|
310
|
+
"name": "Search Optimization Setup",
|
311
|
+
"description": "Prepare memory bank for optimal content discovery and searching",
|
312
|
+
"estimated_time": "3-5 minutes",
|
313
|
+
"workflow": [
|
314
|
+
{
|
315
|
+
"step": 1,
|
316
|
+
"action": "Search Capability Assessment",
|
317
|
+
"tool": "intelligent_discovery",
|
318
|
+
"params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
|
319
|
+
"purpose": "Understand current search capabilities and gaps",
|
320
|
+
"look_for": ["semantic readiness", "text column identification", "embedding status"]
|
321
|
+
},
|
322
|
+
{
|
323
|
+
"step": 2,
|
324
|
+
"action": "Content Analysis for Search",
|
325
|
+
"tool": "analyze_memory_patterns",
|
326
|
+
"params": {},
|
327
|
+
"purpose": "Identify high-value content for search optimization",
|
328
|
+
"look_for": ["text-rich tables", "high-value content", "search opportunities"]
|
329
|
+
},
|
330
|
+
{
|
331
|
+
"step": 3,
|
332
|
+
"action": "Test Current Search",
|
333
|
+
"tool": "search_content",
|
334
|
+
"params": {"query": "test search capabilities", "limit": 10},
|
335
|
+
"purpose": "Baseline current search performance",
|
336
|
+
"look_for": ["search result quality", "coverage", "relevance"]
|
337
|
+
},
|
338
|
+
{
|
339
|
+
"step": 4,
|
340
|
+
"action": "Semantic Search Setup",
|
341
|
+
"tool": "auto_semantic_search",
|
342
|
+
"params": {"query": "important valuable content", "limit": 5},
|
343
|
+
"purpose": "Enable and test semantic search capabilities",
|
344
|
+
"look_for": ["automatic embedding generation", "semantic result quality"]
|
345
|
+
}
|
346
|
+
],
|
347
|
+
"success_criteria": [
|
348
|
+
"Semantic search is enabled for key tables",
|
349
|
+
"Both keyword and semantic search work effectively",
|
350
|
+
"Search performance meets quality standards",
|
351
|
+
"Clear strategy for ongoing search optimization"
|
352
|
+
]
|
353
|
+
},
|
354
|
+
|
355
|
+
"problem_solving": {
|
356
|
+
"name": "Problem-Solving Discovery",
|
357
|
+
"description": "Find information to solve specific problems or answer questions",
|
358
|
+
"estimated_time": "2-4 minutes",
|
359
|
+
"workflow": [
|
360
|
+
{
|
361
|
+
"step": 1,
|
362
|
+
"action": "Quick Content Survey",
|
363
|
+
"tool": "intelligent_discovery",
|
364
|
+
"params": {"discovery_goal": "understand_content", "depth": "quick"},
|
365
|
+
"purpose": "Rapid overview of available information",
|
366
|
+
"look_for": ["relevant content areas", "potential information sources"]
|
367
|
+
},
|
368
|
+
{
|
369
|
+
"step": 2,
|
370
|
+
"action": "Targeted Search",
|
371
|
+
"tool": "auto_smart_search",
|
372
|
+
"params": {"query": "REPLACE_WITH_PROBLEM_KEYWORDS", "limit": 10},
|
373
|
+
"purpose": "Find directly relevant information",
|
374
|
+
"look_for": ["directly applicable content", "related information", "context clues"]
|
375
|
+
},
|
376
|
+
{
|
377
|
+
"step": 3,
|
378
|
+
"action": "Related Content Discovery",
|
379
|
+
"tool": "auto_semantic_search",
|
380
|
+
"params": {"query": "REPLACE_WITH_CONCEPTUAL_TERMS", "similarity_threshold": 0.3},
|
381
|
+
"purpose": "Find conceptually related information",
|
382
|
+
"look_for": ["broader context", "related concepts", "background information"]
|
383
|
+
},
|
384
|
+
{
|
385
|
+
"step": 4,
|
386
|
+
"action": "Information Gap Analysis",
|
387
|
+
"tool": "explore_tables",
|
388
|
+
"params": {"include_row_counts": True},
|
389
|
+
"purpose": "Identify what information might be missing",
|
390
|
+
"look_for": ["information gaps", "additional context sources", "related data"]
|
391
|
+
}
|
392
|
+
],
|
393
|
+
"customization_note": "Replace REPLACE_WITH_PROBLEM_KEYWORDS and REPLACE_WITH_CONCEPTUAL_TERMS with your specific problem terms",
|
394
|
+
"success_criteria": [
|
395
|
+
"Found directly relevant information",
|
396
|
+
"Identified related/contextual information",
|
397
|
+
"Understand what information might be missing",
|
398
|
+
"Have clear next steps for problem resolution"
|
399
|
+
]
|
400
|
+
}
|
401
|
+
}
|
402
|
+
|
403
|
+
if template_type not in templates:
|
404
|
+
available_templates = list(templates.keys())
|
405
|
+
return cast(ToolResponse, {
|
406
|
+
"success": False,
|
407
|
+
"error": f"Template '{template_type}' not found",
|
408
|
+
"category": "TEMPLATE_ERROR",
|
409
|
+
"details": {
|
410
|
+
"available_templates": available_templates,
|
411
|
+
"requested_template": template_type
|
412
|
+
}
|
413
|
+
})
|
414
|
+
|
415
|
+
template = templates[template_type]
|
416
|
+
|
417
|
+
# Customize template if requested
|
418
|
+
if customize_for:
|
419
|
+
template = _customize_template(template, customize_for)
|
420
|
+
|
421
|
+
return cast(ToolResponse, {
|
422
|
+
"success": True,
|
423
|
+
"template": template,
|
424
|
+
"template_type": template_type,
|
425
|
+
"customized_for": customize_for,
|
426
|
+
"available_templates": list(templates.keys()),
|
427
|
+
"usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation"
|
428
|
+
})
|
429
|
+
|
430
|
+
except Exception as e:
|
431
|
+
return cast(ToolResponse, {
|
432
|
+
"success": False,
|
433
|
+
"error": f"Discovery template generation failed: {str(e)}",
|
434
|
+
"category": "TEMPLATE_ERROR",
|
435
|
+
"details": {"template_type": template_type, "customize_for": customize_for}
|
436
|
+
})
|
437
|
+
|
438
|
+
|
439
|
+
@catch_errors
|
440
|
+
def discover_relationships(
|
441
|
+
table_name: Optional[str] = None,
|
442
|
+
relationship_types: List[str] = ["foreign_keys", "semantic_similarity", "temporal_patterns"],
|
443
|
+
similarity_threshold: float = 0.6
|
444
|
+
) -> ToolResponse:
|
445
|
+
"""
|
446
|
+
🔗 **RELATIONSHIP DISCOVERY** - Find hidden connections in your data!
|
447
|
+
|
448
|
+
Automatically discovers relationships between tables and content areas using
|
449
|
+
both structural analysis and semantic similarity to reveal data connections.
|
450
|
+
|
451
|
+
Args:
|
452
|
+
table_name (Optional[str]): Focus on relationships for specific table (default: all)
|
453
|
+
relationship_types (List[str]): Types of relationships to discover
|
454
|
+
- "foreign_keys": Structural relationships via foreign keys
|
455
|
+
- "semantic_similarity": Content-based relationships via semantic analysis
|
456
|
+
- "temporal_patterns": Time-based relationships and patterns
|
457
|
+
- "naming_patterns": Relationships based on naming conventions
|
458
|
+
similarity_threshold (float): Minimum similarity for semantic relationships (0.0-1.0)
|
459
|
+
|
460
|
+
Returns:
|
461
|
+
ToolResponse: {"success": True, "relationships": Dict, "insights": List}
|
462
|
+
|
463
|
+
Examples:
|
464
|
+
>>> discover_relationships("users")
|
465
|
+
{"success": True, "relationships": {
|
466
|
+
"users": {
|
467
|
+
"foreign_key_refs": ["posts.user_id", "comments.user_id"],
|
468
|
+
"semantic_similar": [{"table": "profiles", "similarity": 0.8}],
|
469
|
+
"temporal_related": ["user_sessions"]
|
470
|
+
}
|
471
|
+
}}
|
472
|
+
|
473
|
+
FastMCP Tool Info:
|
474
|
+
- **AUTOMATIC DETECTION**: Finds relationships you might not notice manually
|
475
|
+
- **MULTIPLE METHODS**: Combines structural, semantic, and temporal analysis
|
476
|
+
- **ACTIONABLE INSIGHTS**: Suggests how to leverage discovered relationships
|
477
|
+
- **PERFECT FOR EXPLORATION**: Reveals hidden data organization patterns
|
478
|
+
"""
|
479
|
+
try:
|
480
|
+
from .. import server
|
481
|
+
db = get_database(server.DB_PATH)
|
482
|
+
|
483
|
+
# Get all tables or focus on specific table
|
484
|
+
tables_result = db.list_tables()
|
485
|
+
if not tables_result.get("success"):
|
486
|
+
return cast(ToolResponse, tables_result)
|
487
|
+
|
488
|
+
all_tables = tables_result.get("tables", [])
|
489
|
+
target_tables = [table_name] if table_name else all_tables
|
490
|
+
|
491
|
+
relationships = {}
|
492
|
+
insights = []
|
493
|
+
|
494
|
+
for target_table in target_tables:
|
495
|
+
if target_table not in all_tables:
|
496
|
+
continue
|
497
|
+
|
498
|
+
table_relationships = {
|
499
|
+
"foreign_key_refs": [],
|
500
|
+
"semantic_similar": [],
|
501
|
+
"temporal_related": [],
|
502
|
+
"naming_related": []
|
503
|
+
}
|
504
|
+
|
505
|
+
# Discover foreign key relationships
|
506
|
+
if "foreign_keys" in relationship_types:
|
507
|
+
fk_relationships = _discover_foreign_keys(db, target_table, all_tables)
|
508
|
+
table_relationships["foreign_key_refs"] = fk_relationships
|
509
|
+
if fk_relationships:
|
510
|
+
insights.append(f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables")
|
511
|
+
|
512
|
+
# Discover semantic similarity relationships
|
513
|
+
if "semantic_similarity" in relationship_types and is_semantic_search_available():
|
514
|
+
semantic_relationships = _discover_semantic_relationships(
|
515
|
+
db, target_table, all_tables, similarity_threshold
|
516
|
+
)
|
517
|
+
table_relationships["semantic_similar"] = semantic_relationships
|
518
|
+
if semantic_relationships:
|
519
|
+
insights.append(f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables")
|
520
|
+
|
521
|
+
# Discover temporal patterns
|
522
|
+
if "temporal_patterns" in relationship_types:
|
523
|
+
temporal_relationships = _discover_temporal_relationships(db, target_table, all_tables)
|
524
|
+
table_relationships["temporal_related"] = temporal_relationships
|
525
|
+
if temporal_relationships:
|
526
|
+
insights.append(f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables")
|
527
|
+
|
528
|
+
# Discover naming pattern relationships
|
529
|
+
if "naming_patterns" in relationship_types:
|
530
|
+
naming_relationships = _discover_naming_relationships(target_table, all_tables)
|
531
|
+
table_relationships["naming_related"] = naming_relationships
|
532
|
+
if naming_relationships:
|
533
|
+
insights.append(f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables")
|
534
|
+
|
535
|
+
relationships[target_table] = table_relationships
|
536
|
+
|
537
|
+
# Generate relationship insights
|
538
|
+
total_relationships = sum(
|
539
|
+
len(rel["foreign_key_refs"]) + len(rel["semantic_similar"]) +
|
540
|
+
len(rel["temporal_related"]) + len(rel["naming_related"])
|
541
|
+
for rel in relationships.values()
|
542
|
+
)
|
543
|
+
|
544
|
+
if total_relationships == 0:
|
545
|
+
insights.append("No strong relationships discovered. Consider adding more content or setting up semantic search.")
|
546
|
+
else:
|
547
|
+
insights.append(f"Discovered {total_relationships} total relationships across {len(relationships)} tables")
|
548
|
+
|
549
|
+
return cast(ToolResponse, {
|
550
|
+
"success": True,
|
551
|
+
"relationships": relationships,
|
552
|
+
"insights": insights,
|
553
|
+
"relationship_summary": {
|
554
|
+
"total_relationships": total_relationships,
|
555
|
+
"tables_analyzed": len(relationships),
|
556
|
+
"strongest_connections": _identify_strongest_connections(relationships)
|
557
|
+
},
|
558
|
+
"recommendations": _generate_relationship_recommendations(relationships, insights)
|
559
|
+
})
|
560
|
+
|
561
|
+
except Exception as e:
|
562
|
+
return cast(ToolResponse, {
|
563
|
+
"success": False,
|
564
|
+
"error": f"Relationship discovery failed: {str(e)}",
|
565
|
+
"category": "RELATIONSHIP_ERROR",
|
566
|
+
"details": {
|
567
|
+
"table_name": table_name,
|
568
|
+
"relationship_types": relationship_types,
|
569
|
+
"similarity_threshold": similarity_threshold
|
570
|
+
}
|
571
|
+
})
|
572
|
+
|
573
|
+
|
574
|
+
# Helper functions for discovery orchestration
|
575
|
+
|
576
|
+
def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
|
577
|
+
"""Analyze content patterns and distribution."""
|
578
|
+
content_analysis = {
|
579
|
+
"total_rows": 0,
|
580
|
+
"content_distribution": {},
|
581
|
+
"text_rich_tables": [],
|
582
|
+
"sparse_tables": [],
|
583
|
+
"high_value_tables": []
|
584
|
+
}
|
585
|
+
|
586
|
+
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
587
|
+
|
588
|
+
for table_name in target_tables:
|
589
|
+
try:
|
590
|
+
rows_result = db.read_rows(table_name)
|
591
|
+
if rows_result.get("success"):
|
592
|
+
rows = rows_result.get("rows", [])
|
593
|
+
row_count = len(rows)
|
594
|
+
content_analysis["total_rows"] += row_count
|
595
|
+
content_analysis["content_distribution"][table_name] = row_count
|
596
|
+
|
597
|
+
# Analyze content quality if depth allows
|
598
|
+
if depth in ["moderate", "comprehensive"] and rows:
|
599
|
+
# Sample content quality
|
600
|
+
sample_size = min(3, len(rows))
|
601
|
+
total_content_length = 0
|
602
|
+
|
603
|
+
for row in rows[:sample_size]:
|
604
|
+
for value in row.values():
|
605
|
+
if isinstance(value, str):
|
606
|
+
total_content_length += len(value)
|
607
|
+
|
608
|
+
avg_content_length = total_content_length / sample_size if sample_size > 0 else 0
|
609
|
+
|
610
|
+
if avg_content_length > 200:
|
611
|
+
content_analysis["text_rich_tables"].append(table_name)
|
612
|
+
if avg_content_length > 500:
|
613
|
+
content_analysis["high_value_tables"].append(table_name)
|
614
|
+
if row_count < 5:
|
615
|
+
content_analysis["sparse_tables"].append(table_name)
|
616
|
+
|
617
|
+
except Exception:
|
618
|
+
continue
|
619
|
+
|
620
|
+
return content_analysis
|
621
|
+
|
622
|
+
|
623
|
+
def _analyze_schema_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
|
624
|
+
"""Analyze schema structure and organization."""
|
625
|
+
schema_analysis = {
|
626
|
+
"total_columns": 0,
|
627
|
+
"text_columns_by_table": {},
|
628
|
+
"well_structured_tables": [],
|
629
|
+
"schema_issues": []
|
630
|
+
}
|
631
|
+
|
632
|
+
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
633
|
+
|
634
|
+
for table_name in target_tables:
|
635
|
+
try:
|
636
|
+
schema_result = db.describe_table(table_name)
|
637
|
+
if schema_result.get("success"):
|
638
|
+
columns = schema_result.get("columns", [])
|
639
|
+
schema_analysis["total_columns"] += len(columns)
|
640
|
+
|
641
|
+
# Find text columns
|
642
|
+
text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
|
643
|
+
schema_analysis["text_columns_by_table"][table_name] = len(text_columns)
|
644
|
+
|
645
|
+
# Check for well-structured tables
|
646
|
+
has_id = any(col.get("name") == "id" for col in columns)
|
647
|
+
has_timestamp = any("timestamp" in col.get("name", "").lower() for col in columns)
|
648
|
+
has_text_content = len(text_columns) > 0
|
649
|
+
|
650
|
+
if has_id and has_timestamp and has_text_content:
|
651
|
+
schema_analysis["well_structured_tables"].append(table_name)
|
652
|
+
|
653
|
+
# Identify schema issues
|
654
|
+
if len(columns) < 2:
|
655
|
+
schema_analysis["schema_issues"].append(f"Table '{table_name}' has very few columns")
|
656
|
+
if not has_id:
|
657
|
+
schema_analysis["schema_issues"].append(f"Table '{table_name}' lacks ID column")
|
658
|
+
|
659
|
+
except Exception:
|
660
|
+
continue
|
661
|
+
|
662
|
+
return schema_analysis
|
663
|
+
|
664
|
+
|
665
|
+
def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
|
666
|
+
"""Assess overall content quality."""
|
667
|
+
quality_analysis = {
|
668
|
+
"quality_scores": {},
|
669
|
+
"overall_quality": 0.0,
|
670
|
+
"improvement_opportunities": [],
|
671
|
+
"quality_distribution": {"high": 0, "medium": 0, "low": 0}
|
672
|
+
}
|
673
|
+
|
674
|
+
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
675
|
+
total_score = 0
|
676
|
+
table_count = 0
|
677
|
+
|
678
|
+
for table_name in target_tables:
|
679
|
+
try:
|
680
|
+
rows_result = db.read_rows(table_name)
|
681
|
+
if rows_result.get("success"):
|
682
|
+
rows = rows_result.get("rows", [])
|
683
|
+
|
684
|
+
if not rows:
|
685
|
+
quality_analysis["quality_scores"][table_name] = 0.0
|
686
|
+
quality_analysis["improvement_opportunities"].append(f"Table '{table_name}' is empty")
|
687
|
+
quality_analysis["quality_distribution"]["low"] += 1
|
688
|
+
continue
|
689
|
+
|
690
|
+
# Calculate quality score
|
691
|
+
sample_size = min(5, len(rows))
|
692
|
+
content_scores = []
|
693
|
+
|
694
|
+
for row in rows[:sample_size]:
|
695
|
+
row_score = 0
|
696
|
+
non_null_fields = sum(1 for v in row.values() if v is not None and str(v).strip())
|
697
|
+
total_content_length = sum(len(str(v)) for v in row.values() if v is not None)
|
698
|
+
|
699
|
+
# Score based on completeness and content richness
|
700
|
+
if non_null_fields > 2:
|
701
|
+
row_score += 3
|
702
|
+
if total_content_length > 100:
|
703
|
+
row_score += 4
|
704
|
+
if total_content_length > 500:
|
705
|
+
row_score += 3
|
706
|
+
|
707
|
+
content_scores.append(min(10, row_score))
|
708
|
+
|
709
|
+
table_quality = sum(content_scores) / len(content_scores) if content_scores else 0
|
710
|
+
quality_analysis["quality_scores"][table_name] = round(table_quality, 1)
|
711
|
+
|
712
|
+
# Categorize quality
|
713
|
+
if table_quality >= 7:
|
714
|
+
quality_analysis["quality_distribution"]["high"] += 1
|
715
|
+
elif table_quality >= 4:
|
716
|
+
quality_analysis["quality_distribution"]["medium"] += 1
|
717
|
+
else:
|
718
|
+
quality_analysis["quality_distribution"]["low"] += 1
|
719
|
+
quality_analysis["improvement_opportunities"].append(
|
720
|
+
f"Table '{table_name}' has low content quality (score: {table_quality:.1f})"
|
721
|
+
)
|
722
|
+
|
723
|
+
total_score += table_quality
|
724
|
+
table_count += 1
|
725
|
+
|
726
|
+
except Exception:
|
727
|
+
continue
|
728
|
+
|
729
|
+
quality_analysis["overall_quality"] = round(total_score / table_count, 1) if table_count > 0 else 0.0
|
730
|
+
|
731
|
+
return quality_analysis
|
732
|
+
|
733
|
+
|
734
|
+
def _analyze_search_readiness(db, tables: List[str], focus_area: Optional[str]) -> Dict[str, Any]:
|
735
|
+
"""Analyze readiness for effective searching."""
|
736
|
+
search_analysis = {
|
737
|
+
"semantic_ready_tables": [],
|
738
|
+
"text_searchable_tables": [],
|
739
|
+
"search_optimization_needed": [],
|
740
|
+
"embedding_coverage": {}
|
741
|
+
}
|
742
|
+
|
743
|
+
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
744
|
+
|
745
|
+
for table_name in target_tables:
|
746
|
+
try:
|
747
|
+
# Check schema for text content
|
748
|
+
schema_result = db.describe_table(table_name)
|
749
|
+
if schema_result.get("success"):
|
750
|
+
columns = schema_result.get("columns", [])
|
751
|
+
text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
|
752
|
+
|
753
|
+
if text_columns:
|
754
|
+
search_analysis["text_searchable_tables"].append(table_name)
|
755
|
+
|
756
|
+
# Check semantic search readiness if available
|
757
|
+
if is_semantic_search_available():
|
758
|
+
embedding_stats = db.get_embedding_stats(table_name)
|
759
|
+
if embedding_stats.get("success"):
|
760
|
+
coverage = embedding_stats.get("coverage_percent", 0)
|
761
|
+
search_analysis["embedding_coverage"][table_name] = coverage
|
762
|
+
|
763
|
+
if coverage > 80:
|
764
|
+
search_analysis["semantic_ready_tables"].append(table_name)
|
765
|
+
elif len(text_columns) > 0:
|
766
|
+
search_analysis["search_optimization_needed"].append(table_name)
|
767
|
+
|
768
|
+
except Exception:
|
769
|
+
continue
|
770
|
+
|
771
|
+
return search_analysis
|
772
|
+
|
773
|
+
|
774
|
+
def _generate_discovery_insights(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str) -> tuple:
|
775
|
+
"""Generate insights and recommendations based on discovery results."""
|
776
|
+
insights = []
|
777
|
+
recommendations = []
|
778
|
+
next_steps = []
|
779
|
+
|
780
|
+
total_tables = overview.get("total_tables", 0)
|
781
|
+
total_rows = overview.get("total_rows", 0)
|
782
|
+
|
783
|
+
# Goal-specific insights
|
784
|
+
if discovery_goal == "understand_content":
|
785
|
+
insights.append(f"Memory bank contains {total_tables} tables with {total_rows} total rows")
|
786
|
+
|
787
|
+
high_value_tables = overview.get("high_value_tables", [])
|
788
|
+
if high_value_tables:
|
789
|
+
insights.append(f"High-value content found in: {', '.join(high_value_tables[:3])}")
|
790
|
+
recommendations.append(f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}")
|
791
|
+
next_steps.append(f"Use auto_smart_search() to explore content in {high_value_tables[0]}")
|
792
|
+
|
793
|
+
sparse_tables = overview.get("sparse_tables", [])
|
794
|
+
if sparse_tables:
|
795
|
+
insights.append(f"Sparse tables detected: {', '.join(sparse_tables)}")
|
796
|
+
recommendations.append("Consider consolidating or expanding sparse tables")
|
797
|
+
|
798
|
+
elif discovery_goal == "find_patterns":
|
799
|
+
text_rich_tables = overview.get("text_rich_tables", [])
|
800
|
+
if text_rich_tables:
|
801
|
+
insights.append(f"Text-rich content found in {len(text_rich_tables)} tables")
|
802
|
+
next_steps.append("Use semantic search to find content patterns")
|
803
|
+
|
804
|
+
quality_scores = overview.get("quality_scores", {})
|
805
|
+
if quality_scores:
|
806
|
+
avg_quality = sum(quality_scores.values()) / len(quality_scores)
|
807
|
+
insights.append(f"Average content quality: {avg_quality:.1f}/10")
|
808
|
+
|
809
|
+
elif discovery_goal == "explore_structure":
|
810
|
+
well_structured = overview.get("well_structured_tables", [])
|
811
|
+
if well_structured:
|
812
|
+
insights.append(f"Well-structured tables: {', '.join(well_structured)}")
|
813
|
+
recommendations.append("Use well-structured tables as primary data sources")
|
814
|
+
|
815
|
+
schema_issues = overview.get("schema_issues", [])
|
816
|
+
if schema_issues:
|
817
|
+
insights.extend(schema_issues[:3]) # Show first 3 issues
|
818
|
+
|
819
|
+
elif discovery_goal == "assess_quality":
|
820
|
+
overall_quality = overview.get("overall_quality", 0)
|
821
|
+
insights.append(f"Overall content quality score: {overall_quality}/10")
|
822
|
+
|
823
|
+
improvement_opportunities = overview.get("improvement_opportunities", [])
|
824
|
+
recommendations.extend(improvement_opportunities[:3])
|
825
|
+
|
826
|
+
elif discovery_goal == "prepare_search":
|
827
|
+
semantic_ready = overview.get("semantic_ready_tables", [])
|
828
|
+
optimization_needed = overview.get("search_optimization_needed", [])
|
829
|
+
|
830
|
+
if semantic_ready:
|
831
|
+
insights.append(f"Semantic search ready for {len(semantic_ready)} tables")
|
832
|
+
next_steps.append("Use auto_semantic_search() for conceptual queries")
|
833
|
+
|
834
|
+
if optimization_needed:
|
835
|
+
insights.append(f"Search optimization needed for {len(optimization_needed)} tables")
|
836
|
+
next_steps.append(f"Set up embeddings for: {', '.join(optimization_needed[:2])}")
|
837
|
+
|
838
|
+
# Universal recommendations
|
839
|
+
if overview.get("semantic_search_available"):
|
840
|
+
recommendations.append("Use auto_smart_search() for best search results")
|
841
|
+
else:
|
842
|
+
recommendations.append("Install sentence-transformers for semantic search capabilities")
|
843
|
+
|
844
|
+
if not next_steps:
|
845
|
+
next_steps.append("Use explore_tables() for detailed content examination")
|
846
|
+
next_steps.append("Try auto_smart_search() to find specific information")
|
847
|
+
|
848
|
+
return insights, recommendations, next_steps
|
849
|
+
|
850
|
+
|
851
|
+
def _generate_quick_actions(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]) -> List[Dict[str, Any]]:
|
852
|
+
"""Generate quick action suggestions."""
|
853
|
+
actions = []
|
854
|
+
|
855
|
+
high_value_tables = overview.get("high_value_tables", [])
|
856
|
+
|
857
|
+
if discovery_goal == "understand_content" and high_value_tables:
|
858
|
+
actions.append({
|
859
|
+
"action": "Explore High-Value Content",
|
860
|
+
"tool": "read_rows",
|
861
|
+
"params": {"table_name": high_value_tables[0]},
|
862
|
+
"description": f"Examine content in {high_value_tables[0]} table"
|
863
|
+
})
|
864
|
+
|
865
|
+
if overview.get("semantic_search_available"):
|
866
|
+
actions.append({
|
867
|
+
"action": "Smart Search",
|
868
|
+
"tool": "auto_smart_search",
|
869
|
+
"params": {"query": "important recent information", "limit": 5},
|
870
|
+
"description": "Find important content using intelligent search"
|
871
|
+
})
|
872
|
+
|
873
|
+
actions.append({
|
874
|
+
"action": "Quality Assessment",
|
875
|
+
"tool": "get_content_health_score",
|
876
|
+
"params": {},
|
877
|
+
"description": "Get detailed quality metrics and recommendations"
|
878
|
+
})
|
879
|
+
|
880
|
+
return actions
|
881
|
+
|
882
|
+
|
883
|
+
def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
|
884
|
+
"""Store discovery pattern for learning (if agent learning table exists)."""
|
885
|
+
try:
|
886
|
+
# Check if discovery_patterns table exists
|
887
|
+
tables_result = db.list_tables()
|
888
|
+
if tables_result.get("success") and "discovery_patterns" in tables_result.get("tables", []):
|
889
|
+
# Store the discovery session
|
890
|
+
db.insert_row("discovery_patterns", {
|
891
|
+
"agent_id": discovery_session.get("agent_id"),
|
892
|
+
"goal": discovery_session.get("goal"),
|
893
|
+
"focus_area": discovery_session.get("focus_area"),
|
894
|
+
"depth": discovery_session.get("depth"),
|
895
|
+
"steps_completed": str(discovery_session.get("steps_completed", [])),
|
896
|
+
"success": True,
|
897
|
+
"timestamp": discovery_session.get("timestamp")
|
898
|
+
})
|
899
|
+
except Exception:
|
900
|
+
# Silently fail if learning storage isn't available
|
901
|
+
pass
|
902
|
+
|
903
|
+
|
904
|
+
def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[str, Any]:
|
905
|
+
"""Customize template for specific domain or topic."""
|
906
|
+
customized = template.copy()
|
907
|
+
|
908
|
+
# Add customization note
|
909
|
+
customized["customized_for"] = customize_for
|
910
|
+
customized["customization_note"] = f"Template customized for: {customize_for}"
|
911
|
+
|
912
|
+
# Modify search queries in workflow to include customization
|
913
|
+
for step in customized.get("workflow", []):
|
914
|
+
if step.get("tool") in ["auto_smart_search", "auto_semantic_search", "search_content"]:
|
915
|
+
params = step.get("params", {})
|
916
|
+
if "query" in params and params["query"].startswith("REPLACE_WITH"):
|
917
|
+
# Keep the placeholder for user customization
|
918
|
+
continue
|
919
|
+
elif "query" in params:
|
920
|
+
# Add customization to existing query
|
921
|
+
params["query"] = f"{customize_for} {params['query']}"
|
922
|
+
|
923
|
+
return customized
|
924
|
+
|
925
|
+
|
926
|
+
# Relationship discovery helper functions
|
927
|
+
|
928
|
+
def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List[str]:
|
929
|
+
"""Discover foreign key relationships."""
|
930
|
+
relationships = []
|
931
|
+
|
932
|
+
try:
|
933
|
+
# Get target table schema
|
934
|
+
target_schema = db.describe_table(target_table)
|
935
|
+
if not target_schema.get("success"):
|
936
|
+
return relationships
|
937
|
+
|
938
|
+
target_columns = target_schema.get("columns", [])
|
939
|
+
target_col_names = [col.get("name", "") for col in target_columns]
|
940
|
+
|
941
|
+
# Check other tables for potential foreign key references
|
942
|
+
for other_table in all_tables:
|
943
|
+
if other_table == target_table:
|
944
|
+
continue
|
945
|
+
|
946
|
+
try:
|
947
|
+
other_schema = db.describe_table(other_table)
|
948
|
+
if other_schema.get("success"):
|
949
|
+
other_columns = other_schema.get("columns", [])
|
950
|
+
|
951
|
+
for col in other_columns:
|
952
|
+
col_name = col.get("name", "")
|
953
|
+
# Look for naming patterns that suggest foreign keys
|
954
|
+
if col_name.endswith("_id") or col_name.endswith("Id"):
|
955
|
+
potential_ref = col_name.replace("_id", "").replace("Id", "")
|
956
|
+
if potential_ref == target_table or f"{potential_ref}s" == target_table:
|
957
|
+
relationships.append(f"{other_table}.{col_name}")
|
958
|
+
|
959
|
+
# Look for exact column name matches (potential shared keys)
|
960
|
+
if col_name in target_col_names and col_name != "id":
|
961
|
+
relationships.append(f"{other_table}.{col_name} (shared key)")
|
962
|
+
|
963
|
+
except Exception:
|
964
|
+
continue
|
965
|
+
|
966
|
+
except Exception:
|
967
|
+
pass
|
968
|
+
|
969
|
+
return relationships
|
970
|
+
|
971
|
+
|
972
|
+
def _discover_semantic_relationships(db, target_table: str, all_tables: List[str], threshold: float) -> List[Dict[str, Any]]:
|
973
|
+
"""Discover semantic similarity relationships."""
|
974
|
+
relationships = []
|
975
|
+
|
976
|
+
if not is_semantic_search_available():
|
977
|
+
return relationships
|
978
|
+
|
979
|
+
try:
|
980
|
+
# Get sample content from target table
|
981
|
+
target_rows = db.read_rows(target_table)
|
982
|
+
if not target_rows.get("success") or not target_rows.get("rows"):
|
983
|
+
return relationships
|
984
|
+
|
985
|
+
# Create a sample query from target table content
|
986
|
+
sample_row = target_rows["rows"][0]
|
987
|
+
sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[:200]
|
988
|
+
|
989
|
+
if len(sample_text.strip()) < 10:
|
990
|
+
return relationships
|
991
|
+
|
992
|
+
# Search for similar content in other tables
|
993
|
+
for other_table in all_tables:
|
994
|
+
if other_table == target_table:
|
995
|
+
continue
|
996
|
+
|
997
|
+
try:
|
998
|
+
# Try semantic search in the other table
|
999
|
+
search_result = db.semantic_search(
|
1000
|
+
sample_text, [other_table], "embedding", None, threshold, 3, "all-MiniLM-L6-v2"
|
1001
|
+
)
|
1002
|
+
|
1003
|
+
if search_result.get("success") and search_result.get("results"):
|
1004
|
+
results = search_result["results"]
|
1005
|
+
avg_similarity = sum(r.get("similarity_score", 0) for r in results) / len(results)
|
1006
|
+
|
1007
|
+
if avg_similarity >= threshold:
|
1008
|
+
relationships.append({
|
1009
|
+
"table": other_table,
|
1010
|
+
"similarity": round(avg_similarity, 2),
|
1011
|
+
"related_content_count": len(results)
|
1012
|
+
})
|
1013
|
+
|
1014
|
+
except Exception:
|
1015
|
+
continue
|
1016
|
+
|
1017
|
+
except Exception:
|
1018
|
+
pass
|
1019
|
+
|
1020
|
+
return relationships
|
1021
|
+
|
1022
|
+
|
1023
|
+
def _discover_temporal_relationships(db, target_table: str, all_tables: List[str]) -> List[str]:
|
1024
|
+
"""Discover temporal pattern relationships."""
|
1025
|
+
relationships = []
|
1026
|
+
|
1027
|
+
try:
|
1028
|
+
# Check if target table has timestamp columns
|
1029
|
+
target_schema = db.describe_table(target_table)
|
1030
|
+
if not target_schema.get("success"):
|
1031
|
+
return relationships
|
1032
|
+
|
1033
|
+
target_columns = target_schema.get("columns", [])
|
1034
|
+
target_has_timestamp = any("timestamp" in col.get("name", "").lower() or
|
1035
|
+
"date" in col.get("name", "").lower() or
|
1036
|
+
"time" in col.get("name", "").lower()
|
1037
|
+
for col in target_columns)
|
1038
|
+
|
1039
|
+
if not target_has_timestamp:
|
1040
|
+
return relationships
|
1041
|
+
|
1042
|
+
# Check other tables for similar timestamp patterns
|
1043
|
+
for other_table in all_tables:
|
1044
|
+
if other_table == target_table:
|
1045
|
+
continue
|
1046
|
+
|
1047
|
+
try:
|
1048
|
+
other_schema = db.describe_table(other_table)
|
1049
|
+
if other_schema.get("success"):
|
1050
|
+
other_columns = other_schema.get("columns", [])
|
1051
|
+
other_has_timestamp = any("timestamp" in col.get("name", "").lower() or
|
1052
|
+
"date" in col.get("name", "").lower() or
|
1053
|
+
"time" in col.get("name", "").lower()
|
1054
|
+
for col in other_columns)
|
1055
|
+
|
1056
|
+
if other_has_timestamp:
|
1057
|
+
relationships.append(other_table)
|
1058
|
+
|
1059
|
+
except Exception:
|
1060
|
+
continue
|
1061
|
+
|
1062
|
+
except Exception:
|
1063
|
+
pass
|
1064
|
+
|
1065
|
+
return relationships
|
1066
|
+
|
1067
|
+
|
1068
|
+
def _discover_naming_relationships(target_table: str, all_tables: List[str]) -> List[str]:
|
1069
|
+
"""Discover relationships based on naming conventions."""
|
1070
|
+
relationships = []
|
1071
|
+
|
1072
|
+
# Look for tables with similar names or naming patterns
|
1073
|
+
target_lower = target_table.lower()
|
1074
|
+
|
1075
|
+
for other_table in all_tables:
|
1076
|
+
if other_table == target_table:
|
1077
|
+
continue
|
1078
|
+
|
1079
|
+
other_lower = other_table.lower()
|
1080
|
+
|
1081
|
+
# Check for plural/singular relationships
|
1082
|
+
if (target_lower.endswith('s') and other_lower == target_lower[:-1]) or \
|
1083
|
+
(other_lower.endswith('s') and target_lower == other_lower[:-1]):
|
1084
|
+
relationships.append(other_table)
|
1085
|
+
continue
|
1086
|
+
|
1087
|
+
# Check for common prefixes or suffixes
|
1088
|
+
if len(target_lower) > 3 and len(other_lower) > 3:
|
1089
|
+
# Common prefix (at least 4 characters)
|
1090
|
+
if target_lower[:4] == other_lower[:4]:
|
1091
|
+
relationships.append(other_table)
|
1092
|
+
continue
|
1093
|
+
|
1094
|
+
# Common suffix (at least 4 characters)
|
1095
|
+
if target_lower[-4:] == other_lower[-4:]:
|
1096
|
+
relationships.append(other_table)
|
1097
|
+
continue
|
1098
|
+
|
1099
|
+
# Check for semantic name relationships
|
1100
|
+
name_words = set(target_lower.split('_'))
|
1101
|
+
other_words = set(other_lower.split('_'))
|
1102
|
+
|
1103
|
+
# If tables share significant word overlap
|
1104
|
+
if len(name_words.intersection(other_words)) > 0:
|
1105
|
+
relationships.append(other_table)
|
1106
|
+
|
1107
|
+
return relationships
|
1108
|
+
|
1109
|
+
|
1110
|
+
def _identify_strongest_connections(relationships: Dict[str, Any]) -> List[Dict[str, Any]]:
|
1111
|
+
"""Identify the strongest connections across all relationships."""
|
1112
|
+
connections = []
|
1113
|
+
|
1114
|
+
for table, rels in relationships.items():
|
1115
|
+
# Count total connections for this table
|
1116
|
+
total_connections = (len(rels.get("foreign_key_refs", [])) +
|
1117
|
+
len(rels.get("semantic_similar", [])) +
|
1118
|
+
len(rels.get("temporal_related", [])) +
|
1119
|
+
len(rels.get("naming_related", [])))
|
1120
|
+
|
1121
|
+
if total_connections > 0:
|
1122
|
+
connections.append({
|
1123
|
+
"table": table,
|
1124
|
+
"total_connections": total_connections,
|
1125
|
+
"connection_types": {
|
1126
|
+
"structural": len(rels.get("foreign_key_refs", [])),
|
1127
|
+
"semantic": len(rels.get("semantic_similar", [])),
|
1128
|
+
"temporal": len(rels.get("temporal_related", [])),
|
1129
|
+
"naming": len(rels.get("naming_related", []))
|
1130
|
+
}
|
1131
|
+
})
|
1132
|
+
|
1133
|
+
# Sort by total connections and return top 5
|
1134
|
+
connections.sort(key=lambda x: x["total_connections"], reverse=True)
|
1135
|
+
return connections[:5]
|
1136
|
+
|
1137
|
+
|
1138
|
+
def _generate_relationship_recommendations(relationships: Dict[str, Any], insights: List[str]) -> List[str]:
|
1139
|
+
"""Generate actionable recommendations based on discovered relationships."""
|
1140
|
+
recommendations = []
|
1141
|
+
|
1142
|
+
# Find tables with many connections
|
1143
|
+
highly_connected = []
|
1144
|
+
for table, rels in relationships.items():
|
1145
|
+
total_connections = (len(rels.get("foreign_key_refs", [])) +
|
1146
|
+
len(rels.get("semantic_similar", [])) +
|
1147
|
+
len(rels.get("temporal_related", [])) +
|
1148
|
+
len(rels.get("naming_related", [])))
|
1149
|
+
if total_connections >= 3:
|
1150
|
+
highly_connected.append(table)
|
1151
|
+
|
1152
|
+
if highly_connected:
|
1153
|
+
recommendations.append(f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}")
|
1154
|
+
|
1155
|
+
# Find tables with semantic relationships
|
1156
|
+
semantic_tables = []
|
1157
|
+
for table, rels in relationships.items():
|
1158
|
+
if rels.get("semantic_similar"):
|
1159
|
+
semantic_tables.append(table)
|
1160
|
+
|
1161
|
+
if semantic_tables:
|
1162
|
+
recommendations.append(f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}")
|
1163
|
+
|
1164
|
+
# Find tables with temporal relationships
|
1165
|
+
temporal_tables = []
|
1166
|
+
for table, rels in relationships.items():
|
1167
|
+
if rels.get("temporal_related"):
|
1168
|
+
temporal_tables.append(table)
|
1169
|
+
|
1170
|
+
if temporal_tables:
|
1171
|
+
recommendations.append(f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}")
|
1172
|
+
|
1173
|
+
if not recommendations:
|
1174
|
+
recommendations.append("Consider adding more structured relationships or content to improve discoverability")
|
1175
|
+
|
1176
|
+
return recommendations
|