mcp-sqlite-memory-bank 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_sqlite_memory_bank/__init__.py +2 -2
- mcp_sqlite_memory_bank/__main__.py +20 -11
- mcp_sqlite_memory_bank/database.py +234 -68
- mcp_sqlite_memory_bank/prompts.py +76 -52
- mcp_sqlite_memory_bank/resources.py +250 -150
- mcp_sqlite_memory_bank/semantic.py +50 -17
- mcp_sqlite_memory_bank/server.py +203 -31
- mcp_sqlite_memory_bank/tools/__init__.py +26 -29
- mcp_sqlite_memory_bank/tools/analytics.py +225 -139
- mcp_sqlite_memory_bank/tools/basic.py +417 -7
- mcp_sqlite_memory_bank/tools/discovery.py +636 -384
- mcp_sqlite_memory_bank/tools/search.py +159 -72
- mcp_sqlite_memory_bank/types.py +6 -1
- mcp_sqlite_memory_bank/utils.py +165 -107
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/METADATA +54 -6
- mcp_sqlite_memory_bank-1.6.0.dist-info/RECORD +21 -0
- mcp_sqlite_memory_bank-1.5.1.dist-info/RECORD +0 -21
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/WHEEL +0 -0
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/entry_points.txt +0 -0
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/top_level.txt +0 -0
@@ -35,14 +35,14 @@ def intelligent_discovery(
|
|
35
35
|
Args:
|
36
36
|
discovery_goal (str): What you want to achieve
|
37
37
|
- "understand_content": Learn what data is available and how it's organized
|
38
|
-
- "find_patterns": Discover themes, relationships, and content patterns
|
38
|
+
- "find_patterns": Discover themes, relationships, and content patterns
|
39
39
|
- "explore_structure": Understand database schema and organization
|
40
40
|
- "assess_quality": Evaluate content quality and completeness
|
41
41
|
- "prepare_search": Get ready for effective content searching
|
42
42
|
focus_area (Optional[str]): Specific table or topic to focus on (default: all)
|
43
43
|
depth (str): How thorough the discovery should be
|
44
44
|
- "quick": Fast overview with key insights
|
45
|
-
- "moderate": Balanced analysis with actionable recommendations
|
45
|
+
- "moderate": Balanced analysis with actionable recommendations
|
46
46
|
- "comprehensive": Deep dive with detailed analysis
|
47
47
|
agent_id (Optional[str]): Agent identifier for learning discovery patterns
|
48
48
|
|
@@ -73,8 +73,9 @@ def intelligent_discovery(
|
|
73
73
|
"""
|
74
74
|
try:
|
75
75
|
from .. import server
|
76
|
+
|
76
77
|
db = get_database(server.DB_PATH)
|
77
|
-
|
78
|
+
|
78
79
|
# Initialize discovery session
|
79
80
|
discovery_session = {
|
80
81
|
"goal": discovery_goal,
|
@@ -84,97 +85,111 @@ def intelligent_discovery(
|
|
84
85
|
"agent_id": agent_id,
|
85
86
|
"steps_completed": [],
|
86
87
|
"insights": [],
|
87
|
-
"recommendations": []
|
88
|
+
"recommendations": [],
|
88
89
|
}
|
89
|
-
|
90
|
+
|
90
91
|
# Step 1: Basic overview
|
91
92
|
discovery_session["steps_completed"].append("basic_overview")
|
92
93
|
tables_result = db.list_tables()
|
93
94
|
if not tables_result.get("success"):
|
94
|
-
return cast(
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
95
|
+
return cast(
|
96
|
+
ToolResponse,
|
97
|
+
{
|
98
|
+
"success": False,
|
99
|
+
"error": "Failed to get basic overview",
|
100
|
+
"category": "DISCOVERY_ERROR",
|
101
|
+
"details": tables_result,
|
102
|
+
},
|
103
|
+
)
|
104
|
+
|
101
105
|
tables = tables_result.get("tables", [])
|
102
106
|
overview = {
|
103
107
|
"total_tables": len(tables),
|
104
108
|
"available_tables": tables,
|
105
|
-
"semantic_search_available": is_semantic_search_available()
|
109
|
+
"semantic_search_available": is_semantic_search_available(),
|
106
110
|
}
|
107
|
-
|
111
|
+
|
108
112
|
# Step 2: Content analysis based on goal
|
109
113
|
if discovery_goal in ["understand_content", "find_patterns", "assess_quality"]:
|
110
114
|
discovery_session["steps_completed"].append("content_analysis")
|
111
|
-
content_analysis = _analyze_content_for_discovery(
|
115
|
+
content_analysis = _analyze_content_for_discovery(
|
116
|
+
db, tables, focus_area, depth
|
117
|
+
)
|
112
118
|
overview.update(content_analysis)
|
113
|
-
|
119
|
+
|
114
120
|
# Step 3: Schema analysis for structure exploration
|
115
121
|
if discovery_goal in ["explore_structure", "understand_content"]:
|
116
122
|
discovery_session["steps_completed"].append("schema_analysis")
|
117
|
-
schema_analysis = _analyze_schema_for_discovery(
|
123
|
+
schema_analysis = _analyze_schema_for_discovery(
|
124
|
+
db, tables, focus_area, depth
|
125
|
+
)
|
118
126
|
overview.update(schema_analysis)
|
119
|
-
|
127
|
+
|
120
128
|
# Step 4: Quality assessment
|
121
129
|
if discovery_goal in ["assess_quality", "find_patterns"]:
|
122
130
|
discovery_session["steps_completed"].append("quality_assessment")
|
123
131
|
quality_analysis = _assess_content_quality(db, tables, focus_area, depth)
|
124
132
|
overview.update(quality_analysis)
|
125
|
-
|
133
|
+
|
126
134
|
# Step 5: Search readiness for search preparation
|
127
135
|
if discovery_goal in ["prepare_search", "understand_content"]:
|
128
136
|
discovery_session["steps_completed"].append("search_readiness")
|
129
137
|
search_analysis = _analyze_search_readiness(db, tables, focus_area)
|
130
138
|
overview.update(search_analysis)
|
131
|
-
|
139
|
+
|
132
140
|
# Step 6: Generate insights and recommendations
|
133
141
|
insights, recommendations, next_steps = _generate_discovery_insights(
|
134
142
|
discovery_goal, overview, focus_area, depth
|
135
143
|
)
|
136
|
-
|
144
|
+
|
137
145
|
discovery_session["insights"] = insights
|
138
146
|
discovery_session["recommendations"] = recommendations
|
139
|
-
|
147
|
+
|
140
148
|
# Step 7: Store discovery pattern for learning (if agent_id provided)
|
141
149
|
if agent_id:
|
142
150
|
_store_discovery_pattern(db, discovery_session)
|
143
|
-
|
144
|
-
return cast(
|
145
|
-
|
146
|
-
|
147
|
-
"
|
148
|
-
"
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
151
|
+
|
152
|
+
return cast(
|
153
|
+
ToolResponse,
|
154
|
+
{
|
155
|
+
"success": True,
|
156
|
+
"discovery": {
|
157
|
+
"goal": discovery_goal,
|
158
|
+
"overview": overview,
|
159
|
+
"insights": insights,
|
160
|
+
"recommendations": recommendations,
|
161
|
+
"focus_area": focus_area,
|
162
|
+
"depth": depth,
|
163
|
+
"steps_completed": discovery_session["steps_completed"],
|
164
|
+
},
|
165
|
+
"next_steps": next_steps,
|
166
|
+
"discovery_session": discovery_session,
|
167
|
+
"quick_actions": _generate_quick_actions(
|
168
|
+
discovery_goal, overview, focus_area
|
169
|
+
),
|
154
170
|
},
|
155
|
-
|
156
|
-
|
157
|
-
"quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area)
|
158
|
-
})
|
159
|
-
|
171
|
+
)
|
172
|
+
|
160
173
|
except Exception as e:
|
161
|
-
return cast(
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
"
|
167
|
-
"
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
174
|
+
return cast(
|
175
|
+
ToolResponse,
|
176
|
+
{
|
177
|
+
"success": False,
|
178
|
+
"error": f"Intelligent discovery failed: {str(e)}",
|
179
|
+
"category": "DISCOVERY_ERROR",
|
180
|
+
"details": {
|
181
|
+
"goal": discovery_goal,
|
182
|
+
"focus_area": focus_area,
|
183
|
+
"depth": depth,
|
184
|
+
"agent_id": agent_id,
|
185
|
+
},
|
186
|
+
},
|
187
|
+
)
|
172
188
|
|
173
189
|
|
174
190
|
@catch_errors
|
175
191
|
def discovery_templates(
|
176
|
-
template_type: str = "first_time_exploration",
|
177
|
-
customize_for: Optional[str] = None
|
192
|
+
template_type: str = "first_time_exploration", customize_for: Optional[str] = None
|
178
193
|
) -> ToolResponse:
|
179
194
|
"""
|
180
195
|
📋 **DISCOVERY TEMPLATES** - Pre-built exploration workflows for common scenarios!
|
@@ -207,7 +222,7 @@ def discovery_templates(
|
|
207
222
|
}}
|
208
223
|
|
209
224
|
FastMCP Tool Info:
|
210
|
-
- **PROVEN WORKFLOWS**: Battle-tested discovery sequences
|
225
|
+
- **PROVEN WORKFLOWS**: Battle-tested discovery sequences
|
211
226
|
- **STEP-BY-STEP GUIDANCE**: Exact tools and parameters to use
|
212
227
|
- **CUSTOMIZABLE**: Adapt templates to your specific needs
|
213
228
|
- **LEARNING-OPTIMIZED**: Based on successful discovery patterns
|
@@ -223,9 +238,12 @@ def discovery_templates(
|
|
223
238
|
"step": 1,
|
224
239
|
"action": "Get Overview",
|
225
240
|
"tool": "intelligent_discovery",
|
226
|
-
"params": {
|
241
|
+
"params": {
|
242
|
+
"discovery_goal": "understand_content",
|
243
|
+
"depth": "moderate",
|
244
|
+
},
|
227
245
|
"purpose": "Understand what data is available and how it's organized",
|
228
|
-
"look_for": ["total tables", "content types", "data volume"]
|
246
|
+
"look_for": ["total tables", "content types", "data volume"],
|
229
247
|
},
|
230
248
|
{
|
231
249
|
"step": 2,
|
@@ -233,7 +251,11 @@ def discovery_templates(
|
|
233
251
|
"tool": "explore_tables",
|
234
252
|
"params": {"include_row_counts": True},
|
235
253
|
"purpose": "See detailed table schemas and sample data",
|
236
|
-
"look_for": [
|
254
|
+
"look_for": [
|
255
|
+
"column types",
|
256
|
+
"sample content",
|
257
|
+
"data relationships",
|
258
|
+
],
|
237
259
|
},
|
238
260
|
{
|
239
261
|
"step": 3,
|
@@ -241,7 +263,11 @@ def discovery_templates(
|
|
241
263
|
"tool": "auto_smart_search",
|
242
264
|
"params": {"query": "recent important information", "limit": 5},
|
243
265
|
"purpose": "Understand search capabilities and content accessibility",
|
244
|
-
"look_for": [
|
266
|
+
"look_for": [
|
267
|
+
"search quality",
|
268
|
+
"result relevance",
|
269
|
+
"content types found",
|
270
|
+
],
|
245
271
|
},
|
246
272
|
{
|
247
273
|
"step": 4,
|
@@ -249,17 +275,20 @@ def discovery_templates(
|
|
249
275
|
"tool": "get_content_health_score",
|
250
276
|
"params": {},
|
251
277
|
"purpose": "Understand overall memory bank quality and opportunities",
|
252
|
-
"look_for": [
|
253
|
-
|
278
|
+
"look_for": [
|
279
|
+
"health score",
|
280
|
+
"improvement recommendations",
|
281
|
+
"strengths",
|
282
|
+
],
|
283
|
+
},
|
254
284
|
],
|
255
285
|
"success_criteria": [
|
256
286
|
"Understand what types of information are stored",
|
257
287
|
"Know which tables contain the most valuable content",
|
258
288
|
"Identify best search strategies for this memory bank",
|
259
|
-
"Have actionable next steps for productive use"
|
260
|
-
]
|
289
|
+
"Have actionable next steps for productive use",
|
290
|
+
],
|
261
291
|
},
|
262
|
-
|
263
292
|
"content_audit": {
|
264
293
|
"name": "Content Quality Audit",
|
265
294
|
"description": "Systematic review of content quality and completeness",
|
@@ -271,7 +300,11 @@ def discovery_templates(
|
|
271
300
|
"tool": "get_content_health_score",
|
272
301
|
"params": {},
|
273
302
|
"purpose": "Get overall quality metrics and problem areas",
|
274
|
-
"look_for": [
|
303
|
+
"look_for": [
|
304
|
+
"quality scores",
|
305
|
+
"problem tables",
|
306
|
+
"recommendations",
|
307
|
+
],
|
275
308
|
},
|
276
309
|
{
|
277
310
|
"step": 2,
|
@@ -279,7 +312,11 @@ def discovery_templates(
|
|
279
312
|
"tool": "analyze_memory_patterns",
|
280
313
|
"params": {},
|
281
314
|
"purpose": "Identify content patterns and organizational issues",
|
282
|
-
"look_for": [
|
315
|
+
"look_for": [
|
316
|
+
"content distribution",
|
317
|
+
"sparse tables",
|
318
|
+
"organization gaps",
|
319
|
+
],
|
283
320
|
},
|
284
321
|
{
|
285
322
|
"step": 3,
|
@@ -287,25 +324,34 @@ def discovery_templates(
|
|
287
324
|
"tool": "explore_tables",
|
288
325
|
"params": {"include_row_counts": True},
|
289
326
|
"purpose": "Detailed examination of each table's content",
|
290
|
-
"look_for": [
|
327
|
+
"look_for": [
|
328
|
+
"empty tables",
|
329
|
+
"low-quality content",
|
330
|
+
"missing data",
|
331
|
+
],
|
291
332
|
},
|
292
333
|
{
|
293
334
|
"step": 4,
|
294
335
|
"action": "Search Readiness",
|
295
336
|
"tool": "intelligent_discovery",
|
296
|
-
"params": {
|
337
|
+
"params": {
|
338
|
+
"discovery_goal": "prepare_search",
|
339
|
+
"depth": "comprehensive",
|
340
|
+
},
|
297
341
|
"purpose": "Ensure content is optimally searchable",
|
298
|
-
"look_for": [
|
299
|
-
|
342
|
+
"look_for": [
|
343
|
+
"embedding coverage",
|
344
|
+
"search optimization opportunities",
|
345
|
+
],
|
346
|
+
},
|
300
347
|
],
|
301
348
|
"success_criteria": [
|
302
349
|
"Identify all content quality issues",
|
303
350
|
"Have specific recommendations for improvement",
|
304
351
|
"Understand which content areas need attention",
|
305
|
-
"Know how to optimize for better searchability"
|
306
|
-
]
|
352
|
+
"Know how to optimize for better searchability",
|
353
|
+
],
|
307
354
|
},
|
308
|
-
|
309
355
|
"search_optimization": {
|
310
356
|
"name": "Search Optimization Setup",
|
311
357
|
"description": "Prepare memory bank for optimal content discovery and searching",
|
@@ -315,9 +361,16 @@ def discovery_templates(
|
|
315
361
|
"step": 1,
|
316
362
|
"action": "Search Capability Assessment",
|
317
363
|
"tool": "intelligent_discovery",
|
318
|
-
"params": {
|
364
|
+
"params": {
|
365
|
+
"discovery_goal": "prepare_search",
|
366
|
+
"depth": "comprehensive",
|
367
|
+
},
|
319
368
|
"purpose": "Understand current search capabilities and gaps",
|
320
|
-
"look_for": [
|
369
|
+
"look_for": [
|
370
|
+
"semantic readiness",
|
371
|
+
"text column identification",
|
372
|
+
"embedding status",
|
373
|
+
],
|
321
374
|
},
|
322
375
|
{
|
323
376
|
"step": 2,
|
@@ -325,7 +378,11 @@ def discovery_templates(
|
|
325
378
|
"tool": "analyze_memory_patterns",
|
326
379
|
"params": {},
|
327
380
|
"purpose": "Identify high-value content for search optimization",
|
328
|
-
"look_for": [
|
381
|
+
"look_for": [
|
382
|
+
"text-rich tables",
|
383
|
+
"high-value content",
|
384
|
+
"search opportunities",
|
385
|
+
],
|
329
386
|
},
|
330
387
|
{
|
331
388
|
"step": 3,
|
@@ -333,7 +390,7 @@ def discovery_templates(
|
|
333
390
|
"tool": "search_content",
|
334
391
|
"params": {"query": "test search capabilities", "limit": 10},
|
335
392
|
"purpose": "Baseline current search performance",
|
336
|
-
"look_for": ["search result quality", "coverage", "relevance"]
|
393
|
+
"look_for": ["search result quality", "coverage", "relevance"],
|
337
394
|
},
|
338
395
|
{
|
339
396
|
"step": 4,
|
@@ -341,17 +398,19 @@ def discovery_templates(
|
|
341
398
|
"tool": "auto_semantic_search",
|
342
399
|
"params": {"query": "important valuable content", "limit": 5},
|
343
400
|
"purpose": "Enable and test semantic search capabilities",
|
344
|
-
"look_for": [
|
345
|
-
|
401
|
+
"look_for": [
|
402
|
+
"automatic embedding generation",
|
403
|
+
"semantic result quality",
|
404
|
+
],
|
405
|
+
},
|
346
406
|
],
|
347
407
|
"success_criteria": [
|
348
408
|
"Semantic search is enabled for key tables",
|
349
409
|
"Both keyword and semantic search work effectively",
|
350
410
|
"Search performance meets quality standards",
|
351
|
-
"Clear strategy for ongoing search optimization"
|
352
|
-
]
|
411
|
+
"Clear strategy for ongoing search optimization",
|
412
|
+
],
|
353
413
|
},
|
354
|
-
|
355
414
|
"problem_solving": {
|
356
415
|
"name": "Problem-Solving Discovery",
|
357
416
|
"description": "Find information to solve specific problems or answer questions",
|
@@ -361,25 +420,45 @@ def discovery_templates(
|
|
361
420
|
"step": 1,
|
362
421
|
"action": "Quick Content Survey",
|
363
422
|
"tool": "intelligent_discovery",
|
364
|
-
"params": {
|
423
|
+
"params": {
|
424
|
+
"discovery_goal": "understand_content",
|
425
|
+
"depth": "quick",
|
426
|
+
},
|
365
427
|
"purpose": "Rapid overview of available information",
|
366
|
-
"look_for": [
|
428
|
+
"look_for": [
|
429
|
+
"relevant content areas",
|
430
|
+
"potential information sources",
|
431
|
+
],
|
367
432
|
},
|
368
433
|
{
|
369
434
|
"step": 2,
|
370
435
|
"action": "Targeted Search",
|
371
436
|
"tool": "auto_smart_search",
|
372
|
-
"params": {
|
437
|
+
"params": {
|
438
|
+
"query": "REPLACE_WITH_PROBLEM_KEYWORDS",
|
439
|
+
"limit": 10,
|
440
|
+
},
|
373
441
|
"purpose": "Find directly relevant information",
|
374
|
-
"look_for": [
|
442
|
+
"look_for": [
|
443
|
+
"directly applicable content",
|
444
|
+
"related information",
|
445
|
+
"context clues",
|
446
|
+
],
|
375
447
|
},
|
376
448
|
{
|
377
449
|
"step": 3,
|
378
450
|
"action": "Related Content Discovery",
|
379
451
|
"tool": "auto_semantic_search",
|
380
|
-
"params": {
|
452
|
+
"params": {
|
453
|
+
"query": "REPLACE_WITH_CONCEPTUAL_TERMS",
|
454
|
+
"similarity_threshold": 0.3,
|
455
|
+
},
|
381
456
|
"purpose": "Find conceptually related information",
|
382
|
-
"look_for": [
|
457
|
+
"look_for": [
|
458
|
+
"broader context",
|
459
|
+
"related concepts",
|
460
|
+
"background information",
|
461
|
+
],
|
383
462
|
},
|
384
463
|
{
|
385
464
|
"step": 4,
|
@@ -387,60 +466,80 @@ def discovery_templates(
|
|
387
466
|
"tool": "explore_tables",
|
388
467
|
"params": {"include_row_counts": True},
|
389
468
|
"purpose": "Identify what information might be missing",
|
390
|
-
"look_for": [
|
391
|
-
|
469
|
+
"look_for": [
|
470
|
+
"information gaps",
|
471
|
+
"additional context sources",
|
472
|
+
"related data",
|
473
|
+
],
|
474
|
+
},
|
392
475
|
],
|
393
476
|
"customization_note": "Replace REPLACE_WITH_PROBLEM_KEYWORDS and REPLACE_WITH_CONCEPTUAL_TERMS with your specific problem terms",
|
394
477
|
"success_criteria": [
|
395
478
|
"Found directly relevant information",
|
396
479
|
"Identified related/contextual information",
|
397
480
|
"Understand what information might be missing",
|
398
|
-
"Have clear next steps for problem resolution"
|
399
|
-
]
|
400
|
-
}
|
481
|
+
"Have clear next steps for problem resolution",
|
482
|
+
],
|
483
|
+
},
|
401
484
|
}
|
402
|
-
|
485
|
+
|
403
486
|
if template_type not in templates:
|
404
487
|
available_templates = list(templates.keys())
|
405
|
-
return cast(
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
"
|
411
|
-
"
|
412
|
-
|
413
|
-
|
414
|
-
|
488
|
+
return cast(
|
489
|
+
ToolResponse,
|
490
|
+
{
|
491
|
+
"success": False,
|
492
|
+
"error": f"Template '{template_type}' not found",
|
493
|
+
"category": "TEMPLATE_ERROR",
|
494
|
+
"details": {
|
495
|
+
"available_templates": available_templates,
|
496
|
+
"requested_template": template_type,
|
497
|
+
},
|
498
|
+
},
|
499
|
+
)
|
500
|
+
|
415
501
|
template = templates[template_type]
|
416
|
-
|
502
|
+
|
417
503
|
# Customize template if requested
|
418
504
|
if customize_for:
|
419
505
|
template = _customize_template(template, customize_for)
|
420
|
-
|
421
|
-
return cast(
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
506
|
+
|
507
|
+
return cast(
|
508
|
+
ToolResponse,
|
509
|
+
{
|
510
|
+
"success": True,
|
511
|
+
"template": template,
|
512
|
+
"template_type": template_type,
|
513
|
+
"customized_for": customize_for,
|
514
|
+
"available_templates": list(templates.keys()),
|
515
|
+
"usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation",
|
516
|
+
},
|
517
|
+
)
|
518
|
+
|
430
519
|
except Exception as e:
|
431
|
-
return cast(
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
520
|
+
return cast(
|
521
|
+
ToolResponse,
|
522
|
+
{
|
523
|
+
"success": False,
|
524
|
+
"error": f"Discovery template generation failed: {str(e)}",
|
525
|
+
"category": "TEMPLATE_ERROR",
|
526
|
+
"details": {
|
527
|
+
"template_type": template_type,
|
528
|
+
"customize_for": customize_for,
|
529
|
+
},
|
530
|
+
},
|
531
|
+
)
|
437
532
|
|
438
533
|
|
439
534
|
@catch_errors
|
440
535
|
def discover_relationships(
|
441
536
|
table_name: Optional[str] = None,
|
442
|
-
relationship_types: List[str] = [
|
443
|
-
|
537
|
+
relationship_types: List[str] = [
|
538
|
+
"foreign_keys",
|
539
|
+
"semantic_similarity",
|
540
|
+
"temporal_patterns",
|
541
|
+
],
|
542
|
+
similarity_threshold: float = 0.6,
|
444
543
|
) -> ToolResponse:
|
445
544
|
"""
|
446
545
|
🔗 **RELATIONSHIP DISCOVERY** - Find hidden connections in your data!
|
@@ -478,113 +577,148 @@ def discover_relationships(
|
|
478
577
|
"""
|
479
578
|
try:
|
480
579
|
from .. import server
|
580
|
+
|
481
581
|
db = get_database(server.DB_PATH)
|
482
|
-
|
582
|
+
|
483
583
|
# Get all tables or focus on specific table
|
484
584
|
tables_result = db.list_tables()
|
485
585
|
if not tables_result.get("success"):
|
486
586
|
return cast(ToolResponse, tables_result)
|
487
|
-
|
587
|
+
|
488
588
|
all_tables = tables_result.get("tables", [])
|
489
589
|
target_tables = [table_name] if table_name else all_tables
|
490
|
-
|
590
|
+
|
491
591
|
relationships = {}
|
492
592
|
insights = []
|
493
|
-
|
593
|
+
|
494
594
|
for target_table in target_tables:
|
495
595
|
if target_table not in all_tables:
|
496
596
|
continue
|
497
|
-
|
597
|
+
|
498
598
|
table_relationships = {
|
499
599
|
"foreign_key_refs": [],
|
500
600
|
"semantic_similar": [],
|
501
601
|
"temporal_related": [],
|
502
|
-
"naming_related": []
|
602
|
+
"naming_related": [],
|
503
603
|
}
|
504
|
-
|
604
|
+
|
505
605
|
# Discover foreign key relationships
|
506
606
|
if "foreign_keys" in relationship_types:
|
507
607
|
fk_relationships = _discover_foreign_keys(db, target_table, all_tables)
|
508
608
|
table_relationships["foreign_key_refs"] = fk_relationships
|
509
609
|
if fk_relationships:
|
510
|
-
insights.append(
|
511
|
-
|
610
|
+
insights.append(
|
611
|
+
f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables"
|
612
|
+
)
|
613
|
+
|
512
614
|
# Discover semantic similarity relationships
|
513
|
-
if
|
615
|
+
if (
|
616
|
+
"semantic_similarity" in relationship_types
|
617
|
+
and is_semantic_search_available()
|
618
|
+
):
|
514
619
|
semantic_relationships = _discover_semantic_relationships(
|
515
620
|
db, target_table, all_tables, similarity_threshold
|
516
621
|
)
|
517
622
|
table_relationships["semantic_similar"] = semantic_relationships
|
518
623
|
if semantic_relationships:
|
519
|
-
insights.append(
|
520
|
-
|
624
|
+
insights.append(
|
625
|
+
f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables"
|
626
|
+
)
|
627
|
+
|
521
628
|
# Discover temporal patterns
|
522
629
|
if "temporal_patterns" in relationship_types:
|
523
|
-
temporal_relationships = _discover_temporal_relationships(
|
630
|
+
temporal_relationships = _discover_temporal_relationships(
|
631
|
+
db, target_table, all_tables
|
632
|
+
)
|
524
633
|
table_relationships["temporal_related"] = temporal_relationships
|
525
634
|
if temporal_relationships:
|
526
|
-
insights.append(
|
527
|
-
|
635
|
+
insights.append(
|
636
|
+
f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables"
|
637
|
+
)
|
638
|
+
|
528
639
|
# Discover naming pattern relationships
|
529
640
|
if "naming_patterns" in relationship_types:
|
530
|
-
naming_relationships = _discover_naming_relationships(
|
641
|
+
naming_relationships = _discover_naming_relationships(
|
642
|
+
target_table, all_tables
|
643
|
+
)
|
531
644
|
table_relationships["naming_related"] = naming_relationships
|
532
645
|
if naming_relationships:
|
533
|
-
insights.append(
|
534
|
-
|
646
|
+
insights.append(
|
647
|
+
f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables"
|
648
|
+
)
|
649
|
+
|
535
650
|
relationships[target_table] = table_relationships
|
536
|
-
|
651
|
+
|
537
652
|
# Generate relationship insights
|
538
653
|
total_relationships = sum(
|
539
|
-
len(rel["foreign_key_refs"])
|
540
|
-
|
654
|
+
len(rel["foreign_key_refs"])
|
655
|
+
+ len(rel["semantic_similar"])
|
656
|
+
+ len(rel["temporal_related"])
|
657
|
+
+ len(rel["naming_related"])
|
541
658
|
for rel in relationships.values()
|
542
659
|
)
|
543
|
-
|
660
|
+
|
544
661
|
if total_relationships == 0:
|
545
|
-
insights.append(
|
662
|
+
insights.append(
|
663
|
+
"No strong relationships discovered. Consider adding more content or setting up semantic search."
|
664
|
+
)
|
546
665
|
else:
|
547
|
-
insights.append(
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
"
|
555
|
-
"
|
556
|
-
"
|
666
|
+
insights.append(
|
667
|
+
f"Discovered {total_relationships} total relationships across {len(relationships)} tables"
|
668
|
+
)
|
669
|
+
|
670
|
+
return cast(
|
671
|
+
ToolResponse,
|
672
|
+
{
|
673
|
+
"success": True,
|
674
|
+
"relationships": relationships,
|
675
|
+
"insights": insights,
|
676
|
+
"relationship_summary": {
|
677
|
+
"total_relationships": total_relationships,
|
678
|
+
"tables_analyzed": len(relationships),
|
679
|
+
"strongest_connections": _identify_strongest_connections(
|
680
|
+
relationships
|
681
|
+
),
|
682
|
+
},
|
683
|
+
"recommendations": _generate_relationship_recommendations(
|
684
|
+
relationships, insights
|
685
|
+
),
|
557
686
|
},
|
558
|
-
|
559
|
-
|
560
|
-
|
687
|
+
)
|
688
|
+
|
561
689
|
except Exception as e:
|
562
|
-
return cast(
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
"
|
568
|
-
"
|
569
|
-
|
570
|
-
|
571
|
-
|
690
|
+
return cast(
|
691
|
+
ToolResponse,
|
692
|
+
{
|
693
|
+
"success": False,
|
694
|
+
"error": f"Relationship discovery failed: {str(e)}",
|
695
|
+
"category": "RELATIONSHIP_ERROR",
|
696
|
+
"details": {
|
697
|
+
"table_name": table_name,
|
698
|
+
"relationship_types": relationship_types,
|
699
|
+
"similarity_threshold": similarity_threshold,
|
700
|
+
},
|
701
|
+
},
|
702
|
+
)
|
572
703
|
|
573
704
|
|
574
705
|
# Helper functions for discovery orchestration
|
575
706
|
|
576
|
-
|
707
|
+
|
708
|
+
def _analyze_content_for_discovery(
|
709
|
+
db, tables: List[str], focus_area: Optional[str], depth: str
|
710
|
+
) -> Dict[str, Any]:
|
577
711
|
"""Analyze content patterns and distribution."""
|
578
712
|
content_analysis = {
|
579
713
|
"total_rows": 0,
|
580
714
|
"content_distribution": {},
|
581
715
|
"text_rich_tables": [],
|
582
716
|
"sparse_tables": [],
|
583
|
-
"high_value_tables": []
|
717
|
+
"high_value_tables": [],
|
584
718
|
}
|
585
|
-
|
719
|
+
|
586
720
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
587
|
-
|
721
|
+
|
588
722
|
for table_name in target_tables:
|
589
723
|
try:
|
590
724
|
rows_result = db.read_rows(table_name)
|
@@ -593,109 +727,129 @@ def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[s
|
|
593
727
|
row_count = len(rows)
|
594
728
|
content_analysis["total_rows"] += row_count
|
595
729
|
content_analysis["content_distribution"][table_name] = row_count
|
596
|
-
|
730
|
+
|
597
731
|
# Analyze content quality if depth allows
|
598
732
|
if depth in ["moderate", "comprehensive"] and rows:
|
599
733
|
# Sample content quality
|
600
734
|
sample_size = min(3, len(rows))
|
601
735
|
total_content_length = 0
|
602
|
-
|
736
|
+
|
603
737
|
for row in rows[:sample_size]:
|
604
738
|
for value in row.values():
|
605
739
|
if isinstance(value, str):
|
606
740
|
total_content_length += len(value)
|
607
|
-
|
608
|
-
avg_content_length =
|
609
|
-
|
741
|
+
|
742
|
+
avg_content_length = (
|
743
|
+
total_content_length / sample_size if sample_size > 0 else 0
|
744
|
+
)
|
745
|
+
|
610
746
|
if avg_content_length > 200:
|
611
747
|
content_analysis["text_rich_tables"].append(table_name)
|
612
748
|
if avg_content_length > 500:
|
613
749
|
content_analysis["high_value_tables"].append(table_name)
|
614
750
|
if row_count < 5:
|
615
751
|
content_analysis["sparse_tables"].append(table_name)
|
616
|
-
|
752
|
+
|
617
753
|
except Exception:
|
618
754
|
continue
|
619
|
-
|
755
|
+
|
620
756
|
return content_analysis
|
621
757
|
|
622
758
|
|
623
|
-
def _analyze_schema_for_discovery(
|
759
|
+
def _analyze_schema_for_discovery(
|
760
|
+
db, tables: List[str], focus_area: Optional[str], depth: str
|
761
|
+
) -> Dict[str, Any]:
|
624
762
|
"""Analyze schema structure and organization."""
|
625
763
|
schema_analysis = {
|
626
764
|
"total_columns": 0,
|
627
765
|
"text_columns_by_table": {},
|
628
766
|
"well_structured_tables": [],
|
629
|
-
"schema_issues": []
|
767
|
+
"schema_issues": [],
|
630
768
|
}
|
631
|
-
|
769
|
+
|
632
770
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
633
|
-
|
771
|
+
|
634
772
|
for table_name in target_tables:
|
635
773
|
try:
|
636
774
|
schema_result = db.describe_table(table_name)
|
637
775
|
if schema_result.get("success"):
|
638
776
|
columns = schema_result.get("columns", [])
|
639
777
|
schema_analysis["total_columns"] += len(columns)
|
640
|
-
|
778
|
+
|
641
779
|
# Find text columns
|
642
|
-
text_columns = [
|
780
|
+
text_columns = [
|
781
|
+
col for col in columns if "TEXT" in col.get("type", "").upper()
|
782
|
+
]
|
643
783
|
schema_analysis["text_columns_by_table"][table_name] = len(text_columns)
|
644
|
-
|
784
|
+
|
645
785
|
# Check for well-structured tables
|
646
786
|
has_id = any(col.get("name") == "id" for col in columns)
|
647
|
-
has_timestamp = any(
|
787
|
+
has_timestamp = any(
|
788
|
+
"timestamp" in col.get("name", "").lower() for col in columns
|
789
|
+
)
|
648
790
|
has_text_content = len(text_columns) > 0
|
649
|
-
|
791
|
+
|
650
792
|
if has_id and has_timestamp and has_text_content:
|
651
793
|
schema_analysis["well_structured_tables"].append(table_name)
|
652
|
-
|
794
|
+
|
653
795
|
# Identify schema issues
|
654
796
|
if len(columns) < 2:
|
655
|
-
schema_analysis["schema_issues"].append(
|
797
|
+
schema_analysis["schema_issues"].append(
|
798
|
+
f"Table '{table_name}' has very few columns"
|
799
|
+
)
|
656
800
|
if not has_id:
|
657
|
-
schema_analysis["schema_issues"].append(
|
658
|
-
|
801
|
+
schema_analysis["schema_issues"].append(
|
802
|
+
f"Table '{table_name}' lacks ID column"
|
803
|
+
)
|
804
|
+
|
659
805
|
except Exception:
|
660
806
|
continue
|
661
|
-
|
807
|
+
|
662
808
|
return schema_analysis
|
663
809
|
|
664
810
|
|
665
|
-
def _assess_content_quality(
|
811
|
+
def _assess_content_quality(
|
812
|
+
db, tables: List[str], focus_area: Optional[str], depth: str
|
813
|
+
) -> Dict[str, Any]:
|
666
814
|
"""Assess overall content quality."""
|
667
815
|
quality_analysis = {
|
668
816
|
"quality_scores": {},
|
669
817
|
"overall_quality": 0.0,
|
670
818
|
"improvement_opportunities": [],
|
671
|
-
"quality_distribution": {"high": 0, "medium": 0, "low": 0}
|
819
|
+
"quality_distribution": {"high": 0, "medium": 0, "low": 0},
|
672
820
|
}
|
673
|
-
|
821
|
+
|
674
822
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
675
823
|
total_score = 0
|
676
824
|
table_count = 0
|
677
|
-
|
825
|
+
|
678
826
|
for table_name in target_tables:
|
679
827
|
try:
|
680
828
|
rows_result = db.read_rows(table_name)
|
681
829
|
if rows_result.get("success"):
|
682
830
|
rows = rows_result.get("rows", [])
|
683
|
-
|
831
|
+
|
684
832
|
if not rows:
|
685
833
|
quality_analysis["quality_scores"][table_name] = 0.0
|
686
|
-
quality_analysis["improvement_opportunities"].append(
|
834
|
+
quality_analysis["improvement_opportunities"].append(
|
835
|
+
f"Table '{table_name}' is empty"
|
836
|
+
)
|
687
837
|
quality_analysis["quality_distribution"]["low"] += 1
|
688
838
|
continue
|
689
|
-
|
839
|
+
|
690
840
|
# Calculate quality score
|
691
841
|
sample_size = min(5, len(rows))
|
692
842
|
content_scores = []
|
693
|
-
|
843
|
+
|
694
844
|
for row in rows[:sample_size]:
|
695
845
|
row_score = 0
|
696
|
-
non_null_fields = sum(
|
697
|
-
|
698
|
-
|
846
|
+
non_null_fields = sum(
|
847
|
+
1 for v in row.values() if v is not None and str(v).strip()
|
848
|
+
)
|
849
|
+
total_content_length = sum(
|
850
|
+
len(str(v)) for v in row.values() if v is not None
|
851
|
+
)
|
852
|
+
|
699
853
|
# Score based on completeness and content richness
|
700
854
|
if non_null_fields > 2:
|
701
855
|
row_score += 3
|
@@ -703,12 +857,14 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
|
|
703
857
|
row_score += 4
|
704
858
|
if total_content_length > 500:
|
705
859
|
row_score += 3
|
706
|
-
|
860
|
+
|
707
861
|
content_scores.append(min(10, row_score))
|
708
|
-
|
709
|
-
table_quality =
|
862
|
+
|
863
|
+
table_quality = (
|
864
|
+
sum(content_scores) / len(content_scores) if content_scores else 0
|
865
|
+
)
|
710
866
|
quality_analysis["quality_scores"][table_name] = round(table_quality, 1)
|
711
|
-
|
867
|
+
|
712
868
|
# Categorize quality
|
713
869
|
if table_quality >= 7:
|
714
870
|
quality_analysis["quality_distribution"]["high"] += 1
|
@@ -719,164 +875,200 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
|
|
719
875
|
quality_analysis["improvement_opportunities"].append(
|
720
876
|
f"Table '{table_name}' has low content quality (score: {table_quality:.1f})"
|
721
877
|
)
|
722
|
-
|
878
|
+
|
723
879
|
total_score += table_quality
|
724
880
|
table_count += 1
|
725
|
-
|
881
|
+
|
726
882
|
except Exception:
|
727
883
|
continue
|
728
|
-
|
729
|
-
quality_analysis["overall_quality"] =
|
730
|
-
|
884
|
+
|
885
|
+
quality_analysis["overall_quality"] = (
|
886
|
+
round(total_score / table_count, 1) if table_count > 0 else 0.0
|
887
|
+
)
|
888
|
+
|
731
889
|
return quality_analysis
|
732
890
|
|
733
891
|
|
734
|
-
def _analyze_search_readiness(
|
892
|
+
def _analyze_search_readiness(
|
893
|
+
db, tables: List[str], focus_area: Optional[str]
|
894
|
+
) -> Dict[str, Any]:
|
735
895
|
"""Analyze readiness for effective searching."""
|
736
896
|
search_analysis = {
|
737
897
|
"semantic_ready_tables": [],
|
738
898
|
"text_searchable_tables": [],
|
739
899
|
"search_optimization_needed": [],
|
740
|
-
"embedding_coverage": {}
|
900
|
+
"embedding_coverage": {},
|
741
901
|
}
|
742
|
-
|
902
|
+
|
743
903
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
744
|
-
|
904
|
+
|
745
905
|
for table_name in target_tables:
|
746
906
|
try:
|
747
907
|
# Check schema for text content
|
748
908
|
schema_result = db.describe_table(table_name)
|
749
909
|
if schema_result.get("success"):
|
750
910
|
columns = schema_result.get("columns", [])
|
751
|
-
text_columns = [
|
752
|
-
|
911
|
+
text_columns = [
|
912
|
+
col for col in columns if "TEXT" in col.get("type", "").upper()
|
913
|
+
]
|
914
|
+
|
753
915
|
if text_columns:
|
754
916
|
search_analysis["text_searchable_tables"].append(table_name)
|
755
|
-
|
917
|
+
|
756
918
|
# Check semantic search readiness if available
|
757
919
|
if is_semantic_search_available():
|
758
920
|
embedding_stats = db.get_embedding_stats(table_name)
|
759
921
|
if embedding_stats.get("success"):
|
760
922
|
coverage = embedding_stats.get("coverage_percent", 0)
|
761
923
|
search_analysis["embedding_coverage"][table_name] = coverage
|
762
|
-
|
924
|
+
|
763
925
|
if coverage > 80:
|
764
|
-
search_analysis["semantic_ready_tables"].append(
|
926
|
+
search_analysis["semantic_ready_tables"].append(
|
927
|
+
table_name
|
928
|
+
)
|
765
929
|
elif len(text_columns) > 0:
|
766
|
-
search_analysis["search_optimization_needed"].append(
|
767
|
-
|
930
|
+
search_analysis["search_optimization_needed"].append(
|
931
|
+
table_name
|
932
|
+
)
|
933
|
+
|
768
934
|
except Exception:
|
769
935
|
continue
|
770
|
-
|
936
|
+
|
771
937
|
return search_analysis
|
772
938
|
|
773
939
|
|
774
|
-
def _generate_discovery_insights(
|
940
|
+
def _generate_discovery_insights(
|
941
|
+
discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str
|
942
|
+
) -> tuple:
|
775
943
|
"""Generate insights and recommendations based on discovery results."""
|
776
944
|
insights = []
|
777
945
|
recommendations = []
|
778
946
|
next_steps = []
|
779
|
-
|
947
|
+
|
780
948
|
total_tables = overview.get("total_tables", 0)
|
781
949
|
total_rows = overview.get("total_rows", 0)
|
782
|
-
|
950
|
+
|
783
951
|
# Goal-specific insights
|
784
952
|
if discovery_goal == "understand_content":
|
785
|
-
insights.append(
|
786
|
-
|
953
|
+
insights.append(
|
954
|
+
f"Memory bank contains {total_tables} tables with {total_rows} total rows"
|
955
|
+
)
|
956
|
+
|
787
957
|
high_value_tables = overview.get("high_value_tables", [])
|
788
958
|
if high_value_tables:
|
789
|
-
insights.append(
|
790
|
-
|
791
|
-
|
792
|
-
|
959
|
+
insights.append(
|
960
|
+
f"High-value content found in: {', '.join(high_value_tables[:3])}"
|
961
|
+
)
|
962
|
+
recommendations.append(
|
963
|
+
f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}"
|
964
|
+
)
|
965
|
+
next_steps.append(
|
966
|
+
f"Use auto_smart_search() to explore content in {high_value_tables[0]}"
|
967
|
+
)
|
968
|
+
|
793
969
|
sparse_tables = overview.get("sparse_tables", [])
|
794
970
|
if sparse_tables:
|
795
971
|
insights.append(f"Sparse tables detected: {', '.join(sparse_tables)}")
|
796
972
|
recommendations.append("Consider consolidating or expanding sparse tables")
|
797
|
-
|
973
|
+
|
798
974
|
elif discovery_goal == "find_patterns":
|
799
975
|
text_rich_tables = overview.get("text_rich_tables", [])
|
800
976
|
if text_rich_tables:
|
801
|
-
insights.append(
|
977
|
+
insights.append(
|
978
|
+
f"Text-rich content found in {len(text_rich_tables)} tables"
|
979
|
+
)
|
802
980
|
next_steps.append("Use semantic search to find content patterns")
|
803
|
-
|
981
|
+
|
804
982
|
quality_scores = overview.get("quality_scores", {})
|
805
983
|
if quality_scores:
|
806
984
|
avg_quality = sum(quality_scores.values()) / len(quality_scores)
|
807
985
|
insights.append(f"Average content quality: {avg_quality:.1f}/10")
|
808
|
-
|
986
|
+
|
809
987
|
elif discovery_goal == "explore_structure":
|
810
988
|
well_structured = overview.get("well_structured_tables", [])
|
811
989
|
if well_structured:
|
812
990
|
insights.append(f"Well-structured tables: {', '.join(well_structured)}")
|
813
991
|
recommendations.append("Use well-structured tables as primary data sources")
|
814
|
-
|
992
|
+
|
815
993
|
schema_issues = overview.get("schema_issues", [])
|
816
994
|
if schema_issues:
|
817
995
|
insights.extend(schema_issues[:3]) # Show first 3 issues
|
818
|
-
|
996
|
+
|
819
997
|
elif discovery_goal == "assess_quality":
|
820
998
|
overall_quality = overview.get("overall_quality", 0)
|
821
999
|
insights.append(f"Overall content quality score: {overall_quality}/10")
|
822
|
-
|
1000
|
+
|
823
1001
|
improvement_opportunities = overview.get("improvement_opportunities", [])
|
824
1002
|
recommendations.extend(improvement_opportunities[:3])
|
825
|
-
|
1003
|
+
|
826
1004
|
elif discovery_goal == "prepare_search":
|
827
1005
|
semantic_ready = overview.get("semantic_ready_tables", [])
|
828
1006
|
optimization_needed = overview.get("search_optimization_needed", [])
|
829
|
-
|
1007
|
+
|
830
1008
|
if semantic_ready:
|
831
1009
|
insights.append(f"Semantic search ready for {len(semantic_ready)} tables")
|
832
1010
|
next_steps.append("Use auto_semantic_search() for conceptual queries")
|
833
|
-
|
1011
|
+
|
834
1012
|
if optimization_needed:
|
835
|
-
insights.append(
|
836
|
-
|
837
|
-
|
1013
|
+
insights.append(
|
1014
|
+
f"Search optimization needed for {len(optimization_needed)} tables"
|
1015
|
+
)
|
1016
|
+
next_steps.append(
|
1017
|
+
f"Set up embeddings for: {', '.join(optimization_needed[:2])}"
|
1018
|
+
)
|
1019
|
+
|
838
1020
|
# Universal recommendations
|
839
1021
|
if overview.get("semantic_search_available"):
|
840
1022
|
recommendations.append("Use auto_smart_search() for best search results")
|
841
1023
|
else:
|
842
|
-
recommendations.append(
|
843
|
-
|
1024
|
+
recommendations.append(
|
1025
|
+
"Install sentence-transformers for semantic search capabilities"
|
1026
|
+
)
|
1027
|
+
|
844
1028
|
if not next_steps:
|
845
1029
|
next_steps.append("Use explore_tables() for detailed content examination")
|
846
1030
|
next_steps.append("Try auto_smart_search() to find specific information")
|
847
|
-
|
1031
|
+
|
848
1032
|
return insights, recommendations, next_steps
|
849
1033
|
|
850
1034
|
|
851
|
-
def _generate_quick_actions(
|
1035
|
+
def _generate_quick_actions(
|
1036
|
+
discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]
|
1037
|
+
) -> List[Dict[str, Any]]:
|
852
1038
|
"""Generate quick action suggestions."""
|
853
1039
|
actions = []
|
854
|
-
|
1040
|
+
|
855
1041
|
high_value_tables = overview.get("high_value_tables", [])
|
856
|
-
|
1042
|
+
|
857
1043
|
if discovery_goal == "understand_content" and high_value_tables:
|
858
|
-
actions.append(
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
1044
|
+
actions.append(
|
1045
|
+
{
|
1046
|
+
"action": "Explore High-Value Content",
|
1047
|
+
"tool": "read_rows",
|
1048
|
+
"params": {"table_name": high_value_tables[0]},
|
1049
|
+
"description": f"Examine content in {high_value_tables[0]} table",
|
1050
|
+
}
|
1051
|
+
)
|
1052
|
+
|
865
1053
|
if overview.get("semantic_search_available"):
|
866
|
-
actions.append(
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
1054
|
+
actions.append(
|
1055
|
+
{
|
1056
|
+
"action": "Smart Search",
|
1057
|
+
"tool": "auto_smart_search",
|
1058
|
+
"params": {"query": "important recent information", "limit": 5},
|
1059
|
+
"description": "Find important content using intelligent search",
|
1060
|
+
}
|
1061
|
+
)
|
1062
|
+
|
1063
|
+
actions.append(
|
1064
|
+
{
|
1065
|
+
"action": "Quality Assessment",
|
1066
|
+
"tool": "get_content_health_score",
|
1067
|
+
"params": {},
|
1068
|
+
"description": "Get detailed quality metrics and recommendations",
|
1069
|
+
}
|
1070
|
+
)
|
1071
|
+
|
880
1072
|
return actions
|
881
1073
|
|
882
1074
|
|
@@ -885,17 +1077,24 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
|
|
885
1077
|
try:
|
886
1078
|
# Check if discovery_patterns table exists
|
887
1079
|
tables_result = db.list_tables()
|
888
|
-
if tables_result.get("success") and "discovery_patterns" in tables_result.get(
|
1080
|
+
if tables_result.get("success") and "discovery_patterns" in tables_result.get(
|
1081
|
+
"tables", []
|
1082
|
+
):
|
889
1083
|
# Store the discovery session
|
890
|
-
db.insert_row(
|
891
|
-
"
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
1084
|
+
db.insert_row(
|
1085
|
+
"discovery_patterns",
|
1086
|
+
{
|
1087
|
+
"agent_id": discovery_session.get("agent_id"),
|
1088
|
+
"goal": discovery_session.get("goal"),
|
1089
|
+
"focus_area": discovery_session.get("focus_area"),
|
1090
|
+
"depth": discovery_session.get("depth"),
|
1091
|
+
"steps_completed": str(
|
1092
|
+
discovery_session.get("steps_completed", [])
|
1093
|
+
),
|
1094
|
+
"success": True,
|
1095
|
+
"timestamp": discovery_session.get("timestamp"),
|
1096
|
+
},
|
1097
|
+
)
|
899
1098
|
except Exception:
|
900
1099
|
# Silently fail if learning storage isn't available
|
901
1100
|
pass
|
@@ -904,14 +1103,18 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
|
|
904
1103
|
def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[str, Any]:
|
905
1104
|
"""Customize template for specific domain or topic."""
|
906
1105
|
customized = template.copy()
|
907
|
-
|
1106
|
+
|
908
1107
|
# Add customization note
|
909
1108
|
customized["customized_for"] = customize_for
|
910
1109
|
customized["customization_note"] = f"Template customized for: {customize_for}"
|
911
|
-
|
1110
|
+
|
912
1111
|
# Modify search queries in workflow to include customization
|
913
1112
|
for step in customized.get("workflow", []):
|
914
|
-
if step.get("tool") in [
|
1113
|
+
if step.get("tool") in [
|
1114
|
+
"auto_smart_search",
|
1115
|
+
"auto_semantic_search",
|
1116
|
+
"search_content",
|
1117
|
+
]:
|
915
1118
|
params = step.get("params", {})
|
916
1119
|
if "query" in params and params["query"].startswith("REPLACE_WITH"):
|
917
1120
|
# Keep the placeholder for user customization
|
@@ -919,258 +1122,307 @@ def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[st
|
|
919
1122
|
elif "query" in params:
|
920
1123
|
# Add customization to existing query
|
921
1124
|
params["query"] = f"{customize_for} {params['query']}"
|
922
|
-
|
1125
|
+
|
923
1126
|
return customized
|
924
1127
|
|
925
1128
|
|
926
1129
|
# Relationship discovery helper functions
|
927
1130
|
|
1131
|
+
|
928
1132
|
def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List[str]:
|
929
1133
|
"""Discover foreign key relationships."""
|
930
1134
|
relationships = []
|
931
|
-
|
1135
|
+
|
932
1136
|
try:
|
933
1137
|
# Get target table schema
|
934
1138
|
target_schema = db.describe_table(target_table)
|
935
1139
|
if not target_schema.get("success"):
|
936
1140
|
return relationships
|
937
|
-
|
1141
|
+
|
938
1142
|
target_columns = target_schema.get("columns", [])
|
939
1143
|
target_col_names = [col.get("name", "") for col in target_columns]
|
940
|
-
|
1144
|
+
|
941
1145
|
# Check other tables for potential foreign key references
|
942
1146
|
for other_table in all_tables:
|
943
1147
|
if other_table == target_table:
|
944
1148
|
continue
|
945
|
-
|
1149
|
+
|
946
1150
|
try:
|
947
1151
|
other_schema = db.describe_table(other_table)
|
948
1152
|
if other_schema.get("success"):
|
949
1153
|
other_columns = other_schema.get("columns", [])
|
950
|
-
|
1154
|
+
|
951
1155
|
for col in other_columns:
|
952
1156
|
col_name = col.get("name", "")
|
953
1157
|
# Look for naming patterns that suggest foreign keys
|
954
1158
|
if col_name.endswith("_id") or col_name.endswith("Id"):
|
955
|
-
potential_ref = col_name.replace("_id", "").replace(
|
956
|
-
|
1159
|
+
potential_ref = col_name.replace("_id", "").replace(
|
1160
|
+
"Id", ""
|
1161
|
+
)
|
1162
|
+
if (
|
1163
|
+
potential_ref == target_table
|
1164
|
+
or f"{potential_ref}s" == target_table
|
1165
|
+
):
|
957
1166
|
relationships.append(f"{other_table}.{col_name}")
|
958
|
-
|
1167
|
+
|
959
1168
|
# Look for exact column name matches (potential shared keys)
|
960
1169
|
if col_name in target_col_names and col_name != "id":
|
961
|
-
relationships.append(
|
962
|
-
|
1170
|
+
relationships.append(
|
1171
|
+
f"{other_table}.{col_name} (shared key)"
|
1172
|
+
)
|
1173
|
+
|
963
1174
|
except Exception:
|
964
1175
|
continue
|
965
|
-
|
1176
|
+
|
966
1177
|
except Exception:
|
967
1178
|
pass
|
968
|
-
|
1179
|
+
|
969
1180
|
return relationships
|
970
1181
|
|
971
1182
|
|
972
|
-
def _discover_semantic_relationships(
|
1183
|
+
def _discover_semantic_relationships(
|
1184
|
+
db, target_table: str, all_tables: List[str], threshold: float
|
1185
|
+
) -> List[Dict[str, Any]]:
|
973
1186
|
"""Discover semantic similarity relationships."""
|
974
1187
|
relationships = []
|
975
|
-
|
1188
|
+
|
976
1189
|
if not is_semantic_search_available():
|
977
1190
|
return relationships
|
978
|
-
|
1191
|
+
|
979
1192
|
try:
|
980
1193
|
# Get sample content from target table
|
981
1194
|
target_rows = db.read_rows(target_table)
|
982
1195
|
if not target_rows.get("success") or not target_rows.get("rows"):
|
983
1196
|
return relationships
|
984
|
-
|
1197
|
+
|
985
1198
|
# Create a sample query from target table content
|
986
1199
|
sample_row = target_rows["rows"][0]
|
987
|
-
sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[
|
988
|
-
|
1200
|
+
sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[
|
1201
|
+
:200
|
1202
|
+
]
|
1203
|
+
|
989
1204
|
if len(sample_text.strip()) < 10:
|
990
1205
|
return relationships
|
991
|
-
|
1206
|
+
|
992
1207
|
# Search for similar content in other tables
|
993
1208
|
for other_table in all_tables:
|
994
1209
|
if other_table == target_table:
|
995
1210
|
continue
|
996
|
-
|
1211
|
+
|
997
1212
|
try:
|
998
1213
|
# Try semantic search in the other table
|
999
1214
|
search_result = db.semantic_search(
|
1000
|
-
sample_text,
|
1215
|
+
sample_text,
|
1216
|
+
[other_table],
|
1217
|
+
"embedding",
|
1218
|
+
None,
|
1219
|
+
threshold,
|
1220
|
+
3,
|
1221
|
+
"all-MiniLM-L6-v2",
|
1001
1222
|
)
|
1002
|
-
|
1223
|
+
|
1003
1224
|
if search_result.get("success") and search_result.get("results"):
|
1004
1225
|
results = search_result["results"]
|
1005
|
-
avg_similarity = sum(
|
1006
|
-
|
1226
|
+
avg_similarity = sum(
|
1227
|
+
r.get("similarity_score", 0) for r in results
|
1228
|
+
) / len(results)
|
1229
|
+
|
1007
1230
|
if avg_similarity >= threshold:
|
1008
|
-
relationships.append(
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1231
|
+
relationships.append(
|
1232
|
+
{
|
1233
|
+
"table": other_table,
|
1234
|
+
"similarity": round(avg_similarity, 2),
|
1235
|
+
"related_content_count": len(results),
|
1236
|
+
}
|
1237
|
+
)
|
1238
|
+
|
1014
1239
|
except Exception:
|
1015
1240
|
continue
|
1016
|
-
|
1241
|
+
|
1017
1242
|
except Exception:
|
1018
1243
|
pass
|
1019
|
-
|
1244
|
+
|
1020
1245
|
return relationships
|
1021
1246
|
|
1022
1247
|
|
1023
|
-
def _discover_temporal_relationships(
|
1248
|
+
def _discover_temporal_relationships(
|
1249
|
+
db, target_table: str, all_tables: List[str]
|
1250
|
+
) -> List[str]:
|
1024
1251
|
"""Discover temporal pattern relationships."""
|
1025
1252
|
relationships = []
|
1026
|
-
|
1253
|
+
|
1027
1254
|
try:
|
1028
1255
|
# Check if target table has timestamp columns
|
1029
1256
|
target_schema = db.describe_table(target_table)
|
1030
1257
|
if not target_schema.get("success"):
|
1031
1258
|
return relationships
|
1032
|
-
|
1259
|
+
|
1033
1260
|
target_columns = target_schema.get("columns", [])
|
1034
|
-
target_has_timestamp = any(
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1261
|
+
target_has_timestamp = any(
|
1262
|
+
"timestamp" in col.get("name", "").lower()
|
1263
|
+
or "date" in col.get("name", "").lower()
|
1264
|
+
or "time" in col.get("name", "").lower()
|
1265
|
+
for col in target_columns
|
1266
|
+
)
|
1267
|
+
|
1039
1268
|
if not target_has_timestamp:
|
1040
1269
|
return relationships
|
1041
|
-
|
1270
|
+
|
1042
1271
|
# Check other tables for similar timestamp patterns
|
1043
1272
|
for other_table in all_tables:
|
1044
1273
|
if other_table == target_table:
|
1045
1274
|
continue
|
1046
|
-
|
1275
|
+
|
1047
1276
|
try:
|
1048
1277
|
other_schema = db.describe_table(other_table)
|
1049
1278
|
if other_schema.get("success"):
|
1050
1279
|
other_columns = other_schema.get("columns", [])
|
1051
|
-
other_has_timestamp = any(
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1280
|
+
other_has_timestamp = any(
|
1281
|
+
"timestamp" in col.get("name", "").lower()
|
1282
|
+
or "date" in col.get("name", "").lower()
|
1283
|
+
or "time" in col.get("name", "").lower()
|
1284
|
+
for col in other_columns
|
1285
|
+
)
|
1286
|
+
|
1056
1287
|
if other_has_timestamp:
|
1057
1288
|
relationships.append(other_table)
|
1058
|
-
|
1289
|
+
|
1059
1290
|
except Exception:
|
1060
1291
|
continue
|
1061
|
-
|
1292
|
+
|
1062
1293
|
except Exception:
|
1063
1294
|
pass
|
1064
|
-
|
1295
|
+
|
1065
1296
|
return relationships
|
1066
1297
|
|
1067
1298
|
|
1068
|
-
def _discover_naming_relationships(
|
1299
|
+
def _discover_naming_relationships(
|
1300
|
+
target_table: str, all_tables: List[str]
|
1301
|
+
) -> List[str]:
|
1069
1302
|
"""Discover relationships based on naming conventions."""
|
1070
1303
|
relationships = []
|
1071
|
-
|
1304
|
+
|
1072
1305
|
# Look for tables with similar names or naming patterns
|
1073
1306
|
target_lower = target_table.lower()
|
1074
|
-
|
1307
|
+
|
1075
1308
|
for other_table in all_tables:
|
1076
1309
|
if other_table == target_table:
|
1077
1310
|
continue
|
1078
|
-
|
1311
|
+
|
1079
1312
|
other_lower = other_table.lower()
|
1080
|
-
|
1313
|
+
|
1081
1314
|
# Check for plural/singular relationships
|
1082
|
-
if (target_lower.endswith(
|
1083
|
-
|
1315
|
+
if (target_lower.endswith("s") and other_lower == target_lower[:-1]) or (
|
1316
|
+
other_lower.endswith("s") and target_lower == other_lower[:-1]
|
1317
|
+
):
|
1084
1318
|
relationships.append(other_table)
|
1085
1319
|
continue
|
1086
|
-
|
1320
|
+
|
1087
1321
|
# Check for common prefixes or suffixes
|
1088
1322
|
if len(target_lower) > 3 and len(other_lower) > 3:
|
1089
1323
|
# Common prefix (at least 4 characters)
|
1090
1324
|
if target_lower[:4] == other_lower[:4]:
|
1091
1325
|
relationships.append(other_table)
|
1092
1326
|
continue
|
1093
|
-
|
1327
|
+
|
1094
1328
|
# Common suffix (at least 4 characters)
|
1095
1329
|
if target_lower[-4:] == other_lower[-4:]:
|
1096
1330
|
relationships.append(other_table)
|
1097
1331
|
continue
|
1098
|
-
|
1332
|
+
|
1099
1333
|
# Check for semantic name relationships
|
1100
|
-
name_words = set(target_lower.split(
|
1101
|
-
other_words = set(other_lower.split(
|
1102
|
-
|
1334
|
+
name_words = set(target_lower.split("_"))
|
1335
|
+
other_words = set(other_lower.split("_"))
|
1336
|
+
|
1103
1337
|
# If tables share significant word overlap
|
1104
1338
|
if len(name_words.intersection(other_words)) > 0:
|
1105
1339
|
relationships.append(other_table)
|
1106
|
-
|
1340
|
+
|
1107
1341
|
return relationships
|
1108
1342
|
|
1109
1343
|
|
1110
|
-
def _identify_strongest_connections(
|
1344
|
+
def _identify_strongest_connections(
|
1345
|
+
relationships: Dict[str, Any],
|
1346
|
+
) -> List[Dict[str, Any]]:
|
1111
1347
|
"""Identify the strongest connections across all relationships."""
|
1112
1348
|
connections = []
|
1113
|
-
|
1349
|
+
|
1114
1350
|
for table, rels in relationships.items():
|
1115
1351
|
# Count total connections for this table
|
1116
|
-
total_connections = (
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1352
|
+
total_connections = (
|
1353
|
+
len(rels.get("foreign_key_refs", []))
|
1354
|
+
+ len(rels.get("semantic_similar", []))
|
1355
|
+
+ len(rels.get("temporal_related", []))
|
1356
|
+
+ len(rels.get("naming_related", []))
|
1357
|
+
)
|
1358
|
+
|
1121
1359
|
if total_connections > 0:
|
1122
|
-
connections.append(
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
"
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1360
|
+
connections.append(
|
1361
|
+
{
|
1362
|
+
"table": table,
|
1363
|
+
"total_connections": total_connections,
|
1364
|
+
"connection_types": {
|
1365
|
+
"structural": len(rels.get("foreign_key_refs", [])),
|
1366
|
+
"semantic": len(rels.get("semantic_similar", [])),
|
1367
|
+
"temporal": len(rels.get("temporal_related", [])),
|
1368
|
+
"naming": len(rels.get("naming_related", [])),
|
1369
|
+
},
|
1130
1370
|
}
|
1131
|
-
|
1132
|
-
|
1371
|
+
)
|
1372
|
+
|
1133
1373
|
# Sort by total connections and return top 5
|
1134
1374
|
connections.sort(key=lambda x: x["total_connections"], reverse=True)
|
1135
1375
|
return connections[:5]
|
1136
1376
|
|
1137
1377
|
|
1138
|
-
def _generate_relationship_recommendations(
|
1378
|
+
def _generate_relationship_recommendations(
|
1379
|
+
relationships: Dict[str, Any], insights: List[str]
|
1380
|
+
) -> List[str]:
|
1139
1381
|
"""Generate actionable recommendations based on discovered relationships."""
|
1140
1382
|
recommendations = []
|
1141
|
-
|
1383
|
+
|
1142
1384
|
# Find tables with many connections
|
1143
1385
|
highly_connected = []
|
1144
1386
|
for table, rels in relationships.items():
|
1145
|
-
total_connections = (
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1387
|
+
total_connections = (
|
1388
|
+
len(rels.get("foreign_key_refs", []))
|
1389
|
+
+ len(rels.get("semantic_similar", []))
|
1390
|
+
+ len(rels.get("temporal_related", []))
|
1391
|
+
+ len(rels.get("naming_related", []))
|
1392
|
+
)
|
1149
1393
|
if total_connections >= 3:
|
1150
1394
|
highly_connected.append(table)
|
1151
|
-
|
1395
|
+
|
1152
1396
|
if highly_connected:
|
1153
|
-
recommendations.append(
|
1154
|
-
|
1397
|
+
recommendations.append(
|
1398
|
+
f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}"
|
1399
|
+
)
|
1400
|
+
|
1155
1401
|
# Find tables with semantic relationships
|
1156
1402
|
semantic_tables = []
|
1157
1403
|
for table, rels in relationships.items():
|
1158
1404
|
if rels.get("semantic_similar"):
|
1159
1405
|
semantic_tables.append(table)
|
1160
|
-
|
1406
|
+
|
1161
1407
|
if semantic_tables:
|
1162
|
-
recommendations.append(
|
1163
|
-
|
1408
|
+
recommendations.append(
|
1409
|
+
f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}"
|
1410
|
+
)
|
1411
|
+
|
1164
1412
|
# Find tables with temporal relationships
|
1165
1413
|
temporal_tables = []
|
1166
1414
|
for table, rels in relationships.items():
|
1167
1415
|
if rels.get("temporal_related"):
|
1168
1416
|
temporal_tables.append(table)
|
1169
|
-
|
1417
|
+
|
1170
1418
|
if temporal_tables:
|
1171
|
-
recommendations.append(
|
1172
|
-
|
1419
|
+
recommendations.append(
|
1420
|
+
f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}"
|
1421
|
+
)
|
1422
|
+
|
1173
1423
|
if not recommendations:
|
1174
|
-
recommendations.append(
|
1175
|
-
|
1424
|
+
recommendations.append(
|
1425
|
+
"Consider adding more structured relationships or content to improve discoverability"
|
1426
|
+
)
|
1427
|
+
|
1176
1428
|
return recommendations
|