mcp-sqlite-memory-bank 1.5.1__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_sqlite_memory_bank/__init__.py +3 -3
- mcp_sqlite_memory_bank/__main__.py +8 -7
- mcp_sqlite_memory_bank/database.py +166 -48
- mcp_sqlite_memory_bank/prompts.py +64 -48
- mcp_sqlite_memory_bank/resources.py +218 -144
- mcp_sqlite_memory_bank/semantic.py +25 -13
- mcp_sqlite_memory_bank/server.py +174 -32
- mcp_sqlite_memory_bank/tools/__init__.py +26 -29
- mcp_sqlite_memory_bank/tools/analytics.py +179 -130
- mcp_sqlite_memory_bank/tools/basic.py +417 -4
- mcp_sqlite_memory_bank/tools/discovery.py +549 -360
- mcp_sqlite_memory_bank/tools/search.py +147 -71
- mcp_sqlite_memory_bank/types.py +6 -1
- mcp_sqlite_memory_bank/utils.py +154 -105
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.2.dist-info}/METADATA +54 -6
- mcp_sqlite_memory_bank-1.6.2.dist-info/RECORD +21 -0
- mcp_sqlite_memory_bank-1.5.1.dist-info/RECORD +0 -21
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.2.dist-info}/WHEEL +0 -0
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.2.dist-info}/entry_points.txt +0 -0
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.2.dist-info}/licenses/LICENSE +0 -0
- {mcp_sqlite_memory_bank-1.5.1.dist-info → mcp_sqlite_memory_bank-1.6.2.dist-info}/top_level.txt +0 -0
@@ -9,8 +9,7 @@ discovery processes.
|
|
9
9
|
Author: Robert Meisner
|
10
10
|
"""
|
11
11
|
|
12
|
-
import
|
13
|
-
from typing import Any, Dict, List, Optional, cast, Union
|
12
|
+
from typing import Any, Dict, List, Optional, cast
|
14
13
|
from datetime import datetime
|
15
14
|
|
16
15
|
from ..database import get_database
|
@@ -35,14 +34,14 @@ def intelligent_discovery(
|
|
35
34
|
Args:
|
36
35
|
discovery_goal (str): What you want to achieve
|
37
36
|
- "understand_content": Learn what data is available and how it's organized
|
38
|
-
- "find_patterns": Discover themes, relationships, and content patterns
|
37
|
+
- "find_patterns": Discover themes, relationships, and content patterns
|
39
38
|
- "explore_structure": Understand database schema and organization
|
40
39
|
- "assess_quality": Evaluate content quality and completeness
|
41
40
|
- "prepare_search": Get ready for effective content searching
|
42
41
|
focus_area (Optional[str]): Specific table or topic to focus on (default: all)
|
43
42
|
depth (str): How thorough the discovery should be
|
44
43
|
- "quick": Fast overview with key insights
|
45
|
-
- "moderate": Balanced analysis with actionable recommendations
|
44
|
+
- "moderate": Balanced analysis with actionable recommendations
|
46
45
|
- "comprehensive": Deep dive with detailed analysis
|
47
46
|
agent_id (Optional[str]): Agent identifier for learning discovery patterns
|
48
47
|
|
@@ -73,8 +72,9 @@ def intelligent_discovery(
|
|
73
72
|
"""
|
74
73
|
try:
|
75
74
|
from .. import server
|
75
|
+
|
76
76
|
db = get_database(server.DB_PATH)
|
77
|
-
|
77
|
+
|
78
78
|
# Initialize discovery session
|
79
79
|
discovery_session = {
|
80
80
|
"goal": discovery_goal,
|
@@ -84,97 +84,105 @@ def intelligent_discovery(
|
|
84
84
|
"agent_id": agent_id,
|
85
85
|
"steps_completed": [],
|
86
86
|
"insights": [],
|
87
|
-
"recommendations": []
|
87
|
+
"recommendations": [],
|
88
88
|
}
|
89
|
-
|
89
|
+
|
90
90
|
# Step 1: Basic overview
|
91
91
|
discovery_session["steps_completed"].append("basic_overview")
|
92
92
|
tables_result = db.list_tables()
|
93
93
|
if not tables_result.get("success"):
|
94
|
-
return cast(
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
94
|
+
return cast(
|
95
|
+
ToolResponse,
|
96
|
+
{
|
97
|
+
"success": False,
|
98
|
+
"error": "Failed to get basic overview",
|
99
|
+
"category": "DISCOVERY_ERROR",
|
100
|
+
"details": tables_result,
|
101
|
+
},
|
102
|
+
)
|
103
|
+
|
101
104
|
tables = tables_result.get("tables", [])
|
102
105
|
overview = {
|
103
106
|
"total_tables": len(tables),
|
104
107
|
"available_tables": tables,
|
105
|
-
"semantic_search_available": is_semantic_search_available()
|
108
|
+
"semantic_search_available": is_semantic_search_available(),
|
106
109
|
}
|
107
|
-
|
110
|
+
|
108
111
|
# Step 2: Content analysis based on goal
|
109
112
|
if discovery_goal in ["understand_content", "find_patterns", "assess_quality"]:
|
110
113
|
discovery_session["steps_completed"].append("content_analysis")
|
111
114
|
content_analysis = _analyze_content_for_discovery(db, tables, focus_area, depth)
|
112
115
|
overview.update(content_analysis)
|
113
|
-
|
116
|
+
|
114
117
|
# Step 3: Schema analysis for structure exploration
|
115
118
|
if discovery_goal in ["explore_structure", "understand_content"]:
|
116
119
|
discovery_session["steps_completed"].append("schema_analysis")
|
117
120
|
schema_analysis = _analyze_schema_for_discovery(db, tables, focus_area, depth)
|
118
121
|
overview.update(schema_analysis)
|
119
|
-
|
122
|
+
|
120
123
|
# Step 4: Quality assessment
|
121
124
|
if discovery_goal in ["assess_quality", "find_patterns"]:
|
122
125
|
discovery_session["steps_completed"].append("quality_assessment")
|
123
126
|
quality_analysis = _assess_content_quality(db, tables, focus_area, depth)
|
124
127
|
overview.update(quality_analysis)
|
125
|
-
|
128
|
+
|
126
129
|
# Step 5: Search readiness for search preparation
|
127
130
|
if discovery_goal in ["prepare_search", "understand_content"]:
|
128
131
|
discovery_session["steps_completed"].append("search_readiness")
|
129
132
|
search_analysis = _analyze_search_readiness(db, tables, focus_area)
|
130
133
|
overview.update(search_analysis)
|
131
|
-
|
134
|
+
|
132
135
|
# Step 6: Generate insights and recommendations
|
133
136
|
insights, recommendations, next_steps = _generate_discovery_insights(
|
134
137
|
discovery_goal, overview, focus_area, depth
|
135
138
|
)
|
136
|
-
|
139
|
+
|
137
140
|
discovery_session["insights"] = insights
|
138
141
|
discovery_session["recommendations"] = recommendations
|
139
|
-
|
142
|
+
|
140
143
|
# Step 7: Store discovery pattern for learning (if agent_id provided)
|
141
144
|
if agent_id:
|
142
145
|
_store_discovery_pattern(db, discovery_session)
|
143
|
-
|
144
|
-
return cast(
|
145
|
-
|
146
|
-
|
147
|
-
"
|
148
|
-
"
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
146
|
+
|
147
|
+
return cast(
|
148
|
+
ToolResponse,
|
149
|
+
{
|
150
|
+
"success": True,
|
151
|
+
"discovery": {
|
152
|
+
"goal": discovery_goal,
|
153
|
+
"overview": overview,
|
154
|
+
"insights": insights,
|
155
|
+
"recommendations": recommendations,
|
156
|
+
"focus_area": focus_area,
|
157
|
+
"depth": depth,
|
158
|
+
"steps_completed": discovery_session["steps_completed"],
|
159
|
+
},
|
160
|
+
"next_steps": next_steps,
|
161
|
+
"discovery_session": discovery_session,
|
162
|
+
"quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area),
|
154
163
|
},
|
155
|
-
|
156
|
-
|
157
|
-
"quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area)
|
158
|
-
})
|
159
|
-
|
164
|
+
)
|
165
|
+
|
160
166
|
except Exception as e:
|
161
|
-
return cast(
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
"
|
167
|
-
"
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
167
|
+
return cast(
|
168
|
+
ToolResponse,
|
169
|
+
{
|
170
|
+
"success": False,
|
171
|
+
"error": f"Intelligent discovery failed: {str(e)}",
|
172
|
+
"category": "DISCOVERY_ERROR",
|
173
|
+
"details": {
|
174
|
+
"goal": discovery_goal,
|
175
|
+
"focus_area": focus_area,
|
176
|
+
"depth": depth,
|
177
|
+
"agent_id": agent_id,
|
178
|
+
},
|
179
|
+
},
|
180
|
+
)
|
172
181
|
|
173
182
|
|
174
183
|
@catch_errors
|
175
184
|
def discovery_templates(
|
176
|
-
template_type: str = "first_time_exploration",
|
177
|
-
customize_for: Optional[str] = None
|
185
|
+
template_type: str = "first_time_exploration", customize_for: Optional[str] = None
|
178
186
|
) -> ToolResponse:
|
179
187
|
"""
|
180
188
|
📋 **DISCOVERY TEMPLATES** - Pre-built exploration workflows for common scenarios!
|
@@ -207,7 +215,7 @@ def discovery_templates(
|
|
207
215
|
}}
|
208
216
|
|
209
217
|
FastMCP Tool Info:
|
210
|
-
- **PROVEN WORKFLOWS**: Battle-tested discovery sequences
|
218
|
+
- **PROVEN WORKFLOWS**: Battle-tested discovery sequences
|
211
219
|
- **STEP-BY-STEP GUIDANCE**: Exact tools and parameters to use
|
212
220
|
- **CUSTOMIZABLE**: Adapt templates to your specific needs
|
213
221
|
- **LEARNING-OPTIMIZED**: Based on successful discovery patterns
|
@@ -223,9 +231,12 @@ def discovery_templates(
|
|
223
231
|
"step": 1,
|
224
232
|
"action": "Get Overview",
|
225
233
|
"tool": "intelligent_discovery",
|
226
|
-
"params": {
|
234
|
+
"params": {
|
235
|
+
"discovery_goal": "understand_content",
|
236
|
+
"depth": "moderate",
|
237
|
+
},
|
227
238
|
"purpose": "Understand what data is available and how it's organized",
|
228
|
-
"look_for": ["total tables", "content types", "data volume"]
|
239
|
+
"look_for": ["total tables", "content types", "data volume"],
|
229
240
|
},
|
230
241
|
{
|
231
242
|
"step": 2,
|
@@ -233,7 +244,11 @@ def discovery_templates(
|
|
233
244
|
"tool": "explore_tables",
|
234
245
|
"params": {"include_row_counts": True},
|
235
246
|
"purpose": "See detailed table schemas and sample data",
|
236
|
-
"look_for": [
|
247
|
+
"look_for": [
|
248
|
+
"column types",
|
249
|
+
"sample content",
|
250
|
+
"data relationships",
|
251
|
+
],
|
237
252
|
},
|
238
253
|
{
|
239
254
|
"step": 3,
|
@@ -241,7 +256,11 @@ def discovery_templates(
|
|
241
256
|
"tool": "auto_smart_search",
|
242
257
|
"params": {"query": "recent important information", "limit": 5},
|
243
258
|
"purpose": "Understand search capabilities and content accessibility",
|
244
|
-
"look_for": [
|
259
|
+
"look_for": [
|
260
|
+
"search quality",
|
261
|
+
"result relevance",
|
262
|
+
"content types found",
|
263
|
+
],
|
245
264
|
},
|
246
265
|
{
|
247
266
|
"step": 4,
|
@@ -249,17 +268,20 @@ def discovery_templates(
|
|
249
268
|
"tool": "get_content_health_score",
|
250
269
|
"params": {},
|
251
270
|
"purpose": "Understand overall memory bank quality and opportunities",
|
252
|
-
"look_for": [
|
253
|
-
|
271
|
+
"look_for": [
|
272
|
+
"health score",
|
273
|
+
"improvement recommendations",
|
274
|
+
"strengths",
|
275
|
+
],
|
276
|
+
},
|
254
277
|
],
|
255
278
|
"success_criteria": [
|
256
279
|
"Understand what types of information are stored",
|
257
280
|
"Know which tables contain the most valuable content",
|
258
281
|
"Identify best search strategies for this memory bank",
|
259
|
-
"Have actionable next steps for productive use"
|
260
|
-
]
|
282
|
+
"Have actionable next steps for productive use",
|
283
|
+
],
|
261
284
|
},
|
262
|
-
|
263
285
|
"content_audit": {
|
264
286
|
"name": "Content Quality Audit",
|
265
287
|
"description": "Systematic review of content quality and completeness",
|
@@ -271,7 +293,11 @@ def discovery_templates(
|
|
271
293
|
"tool": "get_content_health_score",
|
272
294
|
"params": {},
|
273
295
|
"purpose": "Get overall quality metrics and problem areas",
|
274
|
-
"look_for": [
|
296
|
+
"look_for": [
|
297
|
+
"quality scores",
|
298
|
+
"problem tables",
|
299
|
+
"recommendations",
|
300
|
+
],
|
275
301
|
},
|
276
302
|
{
|
277
303
|
"step": 2,
|
@@ -279,7 +305,11 @@ def discovery_templates(
|
|
279
305
|
"tool": "analyze_memory_patterns",
|
280
306
|
"params": {},
|
281
307
|
"purpose": "Identify content patterns and organizational issues",
|
282
|
-
"look_for": [
|
308
|
+
"look_for": [
|
309
|
+
"content distribution",
|
310
|
+
"sparse tables",
|
311
|
+
"organization gaps",
|
312
|
+
],
|
283
313
|
},
|
284
314
|
{
|
285
315
|
"step": 3,
|
@@ -287,25 +317,34 @@ def discovery_templates(
|
|
287
317
|
"tool": "explore_tables",
|
288
318
|
"params": {"include_row_counts": True},
|
289
319
|
"purpose": "Detailed examination of each table's content",
|
290
|
-
"look_for": [
|
320
|
+
"look_for": [
|
321
|
+
"empty tables",
|
322
|
+
"low-quality content",
|
323
|
+
"missing data",
|
324
|
+
],
|
291
325
|
},
|
292
326
|
{
|
293
327
|
"step": 4,
|
294
328
|
"action": "Search Readiness",
|
295
329
|
"tool": "intelligent_discovery",
|
296
|
-
"params": {
|
330
|
+
"params": {
|
331
|
+
"discovery_goal": "prepare_search",
|
332
|
+
"depth": "comprehensive",
|
333
|
+
},
|
297
334
|
"purpose": "Ensure content is optimally searchable",
|
298
|
-
"look_for": [
|
299
|
-
|
335
|
+
"look_for": [
|
336
|
+
"embedding coverage",
|
337
|
+
"search optimization opportunities",
|
338
|
+
],
|
339
|
+
},
|
300
340
|
],
|
301
341
|
"success_criteria": [
|
302
342
|
"Identify all content quality issues",
|
303
343
|
"Have specific recommendations for improvement",
|
304
344
|
"Understand which content areas need attention",
|
305
|
-
"Know how to optimize for better searchability"
|
306
|
-
]
|
345
|
+
"Know how to optimize for better searchability",
|
346
|
+
],
|
307
347
|
},
|
308
|
-
|
309
348
|
"search_optimization": {
|
310
349
|
"name": "Search Optimization Setup",
|
311
350
|
"description": "Prepare memory bank for optimal content discovery and searching",
|
@@ -315,9 +354,16 @@ def discovery_templates(
|
|
315
354
|
"step": 1,
|
316
355
|
"action": "Search Capability Assessment",
|
317
356
|
"tool": "intelligent_discovery",
|
318
|
-
"params": {
|
357
|
+
"params": {
|
358
|
+
"discovery_goal": "prepare_search",
|
359
|
+
"depth": "comprehensive",
|
360
|
+
},
|
319
361
|
"purpose": "Understand current search capabilities and gaps",
|
320
|
-
"look_for": [
|
362
|
+
"look_for": [
|
363
|
+
"semantic readiness",
|
364
|
+
"text column identification",
|
365
|
+
"embedding status",
|
366
|
+
],
|
321
367
|
},
|
322
368
|
{
|
323
369
|
"step": 2,
|
@@ -325,7 +371,11 @@ def discovery_templates(
|
|
325
371
|
"tool": "analyze_memory_patterns",
|
326
372
|
"params": {},
|
327
373
|
"purpose": "Identify high-value content for search optimization",
|
328
|
-
"look_for": [
|
374
|
+
"look_for": [
|
375
|
+
"text-rich tables",
|
376
|
+
"high-value content",
|
377
|
+
"search opportunities",
|
378
|
+
],
|
329
379
|
},
|
330
380
|
{
|
331
381
|
"step": 3,
|
@@ -333,7 +383,7 @@ def discovery_templates(
|
|
333
383
|
"tool": "search_content",
|
334
384
|
"params": {"query": "test search capabilities", "limit": 10},
|
335
385
|
"purpose": "Baseline current search performance",
|
336
|
-
"look_for": ["search result quality", "coverage", "relevance"]
|
386
|
+
"look_for": ["search result quality", "coverage", "relevance"],
|
337
387
|
},
|
338
388
|
{
|
339
389
|
"step": 4,
|
@@ -341,17 +391,19 @@ def discovery_templates(
|
|
341
391
|
"tool": "auto_semantic_search",
|
342
392
|
"params": {"query": "important valuable content", "limit": 5},
|
343
393
|
"purpose": "Enable and test semantic search capabilities",
|
344
|
-
"look_for": [
|
345
|
-
|
394
|
+
"look_for": [
|
395
|
+
"automatic embedding generation",
|
396
|
+
"semantic result quality",
|
397
|
+
],
|
398
|
+
},
|
346
399
|
],
|
347
400
|
"success_criteria": [
|
348
401
|
"Semantic search is enabled for key tables",
|
349
402
|
"Both keyword and semantic search work effectively",
|
350
403
|
"Search performance meets quality standards",
|
351
|
-
"Clear strategy for ongoing search optimization"
|
352
|
-
]
|
404
|
+
"Clear strategy for ongoing search optimization",
|
405
|
+
],
|
353
406
|
},
|
354
|
-
|
355
407
|
"problem_solving": {
|
356
408
|
"name": "Problem-Solving Discovery",
|
357
409
|
"description": "Find information to solve specific problems or answer questions",
|
@@ -361,25 +413,45 @@ def discovery_templates(
|
|
361
413
|
"step": 1,
|
362
414
|
"action": "Quick Content Survey",
|
363
415
|
"tool": "intelligent_discovery",
|
364
|
-
"params": {
|
416
|
+
"params": {
|
417
|
+
"discovery_goal": "understand_content",
|
418
|
+
"depth": "quick",
|
419
|
+
},
|
365
420
|
"purpose": "Rapid overview of available information",
|
366
|
-
"look_for": [
|
421
|
+
"look_for": [
|
422
|
+
"relevant content areas",
|
423
|
+
"potential information sources",
|
424
|
+
],
|
367
425
|
},
|
368
426
|
{
|
369
427
|
"step": 2,
|
370
428
|
"action": "Targeted Search",
|
371
429
|
"tool": "auto_smart_search",
|
372
|
-
"params": {
|
430
|
+
"params": {
|
431
|
+
"query": "REPLACE_WITH_PROBLEM_KEYWORDS",
|
432
|
+
"limit": 10,
|
433
|
+
},
|
373
434
|
"purpose": "Find directly relevant information",
|
374
|
-
"look_for": [
|
435
|
+
"look_for": [
|
436
|
+
"directly applicable content",
|
437
|
+
"related information",
|
438
|
+
"context clues",
|
439
|
+
],
|
375
440
|
},
|
376
441
|
{
|
377
442
|
"step": 3,
|
378
443
|
"action": "Related Content Discovery",
|
379
444
|
"tool": "auto_semantic_search",
|
380
|
-
"params": {
|
445
|
+
"params": {
|
446
|
+
"query": "REPLACE_WITH_CONCEPTUAL_TERMS",
|
447
|
+
"similarity_threshold": 0.3,
|
448
|
+
},
|
381
449
|
"purpose": "Find conceptually related information",
|
382
|
-
"look_for": [
|
450
|
+
"look_for": [
|
451
|
+
"broader context",
|
452
|
+
"related concepts",
|
453
|
+
"background information",
|
454
|
+
],
|
383
455
|
},
|
384
456
|
{
|
385
457
|
"step": 4,
|
@@ -387,60 +459,80 @@ def discovery_templates(
|
|
387
459
|
"tool": "explore_tables",
|
388
460
|
"params": {"include_row_counts": True},
|
389
461
|
"purpose": "Identify what information might be missing",
|
390
|
-
"look_for": [
|
391
|
-
|
462
|
+
"look_for": [
|
463
|
+
"information gaps",
|
464
|
+
"additional context sources",
|
465
|
+
"related data",
|
466
|
+
],
|
467
|
+
},
|
392
468
|
],
|
393
469
|
"customization_note": "Replace REPLACE_WITH_PROBLEM_KEYWORDS and REPLACE_WITH_CONCEPTUAL_TERMS with your specific problem terms",
|
394
470
|
"success_criteria": [
|
395
471
|
"Found directly relevant information",
|
396
472
|
"Identified related/contextual information",
|
397
473
|
"Understand what information might be missing",
|
398
|
-
"Have clear next steps for problem resolution"
|
399
|
-
]
|
400
|
-
}
|
474
|
+
"Have clear next steps for problem resolution",
|
475
|
+
],
|
476
|
+
},
|
401
477
|
}
|
402
|
-
|
478
|
+
|
403
479
|
if template_type not in templates:
|
404
480
|
available_templates = list(templates.keys())
|
405
|
-
return cast(
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
"
|
411
|
-
"
|
412
|
-
|
413
|
-
|
414
|
-
|
481
|
+
return cast(
|
482
|
+
ToolResponse,
|
483
|
+
{
|
484
|
+
"success": False,
|
485
|
+
"error": f"Template '{template_type}' not found",
|
486
|
+
"category": "TEMPLATE_ERROR",
|
487
|
+
"details": {
|
488
|
+
"available_templates": available_templates,
|
489
|
+
"requested_template": template_type,
|
490
|
+
},
|
491
|
+
},
|
492
|
+
)
|
493
|
+
|
415
494
|
template = templates[template_type]
|
416
|
-
|
495
|
+
|
417
496
|
# Customize template if requested
|
418
497
|
if customize_for:
|
419
498
|
template = _customize_template(template, customize_for)
|
420
|
-
|
421
|
-
return cast(
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
499
|
+
|
500
|
+
return cast(
|
501
|
+
ToolResponse,
|
502
|
+
{
|
503
|
+
"success": True,
|
504
|
+
"template": template,
|
505
|
+
"template_type": template_type,
|
506
|
+
"customized_for": customize_for,
|
507
|
+
"available_templates": list(templates.keys()),
|
508
|
+
"usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation",
|
509
|
+
},
|
510
|
+
)
|
511
|
+
|
430
512
|
except Exception as e:
|
431
|
-
return cast(
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
513
|
+
return cast(
|
514
|
+
ToolResponse,
|
515
|
+
{
|
516
|
+
"success": False,
|
517
|
+
"error": f"Discovery template generation failed: {str(e)}",
|
518
|
+
"category": "TEMPLATE_ERROR",
|
519
|
+
"details": {
|
520
|
+
"template_type": template_type,
|
521
|
+
"customize_for": customize_for,
|
522
|
+
},
|
523
|
+
},
|
524
|
+
)
|
437
525
|
|
438
526
|
|
439
527
|
@catch_errors
|
440
528
|
def discover_relationships(
|
441
529
|
table_name: Optional[str] = None,
|
442
|
-
relationship_types: List[str] = [
|
443
|
-
|
530
|
+
relationship_types: List[str] = [
|
531
|
+
"foreign_keys",
|
532
|
+
"semantic_similarity",
|
533
|
+
"temporal_patterns",
|
534
|
+
],
|
535
|
+
similarity_threshold: float = 0.6,
|
444
536
|
) -> ToolResponse:
|
445
537
|
"""
|
446
538
|
🔗 **RELATIONSHIP DISCOVERY** - Find hidden connections in your data!
|
@@ -478,37 +570,40 @@ def discover_relationships(
|
|
478
570
|
"""
|
479
571
|
try:
|
480
572
|
from .. import server
|
573
|
+
|
481
574
|
db = get_database(server.DB_PATH)
|
482
|
-
|
575
|
+
|
483
576
|
# Get all tables or focus on specific table
|
484
577
|
tables_result = db.list_tables()
|
485
578
|
if not tables_result.get("success"):
|
486
579
|
return cast(ToolResponse, tables_result)
|
487
|
-
|
580
|
+
|
488
581
|
all_tables = tables_result.get("tables", [])
|
489
582
|
target_tables = [table_name] if table_name else all_tables
|
490
|
-
|
583
|
+
|
491
584
|
relationships = {}
|
492
585
|
insights = []
|
493
|
-
|
586
|
+
|
494
587
|
for target_table in target_tables:
|
495
588
|
if target_table not in all_tables:
|
496
589
|
continue
|
497
|
-
|
590
|
+
|
498
591
|
table_relationships = {
|
499
592
|
"foreign_key_refs": [],
|
500
593
|
"semantic_similar": [],
|
501
594
|
"temporal_related": [],
|
502
|
-
"naming_related": []
|
595
|
+
"naming_related": [],
|
503
596
|
}
|
504
|
-
|
597
|
+
|
505
598
|
# Discover foreign key relationships
|
506
599
|
if "foreign_keys" in relationship_types:
|
507
600
|
fk_relationships = _discover_foreign_keys(db, target_table, all_tables)
|
508
601
|
table_relationships["foreign_key_refs"] = fk_relationships
|
509
602
|
if fk_relationships:
|
510
|
-
insights.append(
|
511
|
-
|
603
|
+
insights.append(
|
604
|
+
f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables"
|
605
|
+
)
|
606
|
+
|
512
607
|
# Discover semantic similarity relationships
|
513
608
|
if "semantic_similarity" in relationship_types and is_semantic_search_available():
|
514
609
|
semantic_relationships = _discover_semantic_relationships(
|
@@ -516,75 +611,98 @@ def discover_relationships(
|
|
516
611
|
)
|
517
612
|
table_relationships["semantic_similar"] = semantic_relationships
|
518
613
|
if semantic_relationships:
|
519
|
-
insights.append(
|
520
|
-
|
614
|
+
insights.append(
|
615
|
+
f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables"
|
616
|
+
)
|
617
|
+
|
521
618
|
# Discover temporal patterns
|
522
619
|
if "temporal_patterns" in relationship_types:
|
523
|
-
temporal_relationships = _discover_temporal_relationships(
|
620
|
+
temporal_relationships = _discover_temporal_relationships(
|
621
|
+
db, target_table, all_tables
|
622
|
+
)
|
524
623
|
table_relationships["temporal_related"] = temporal_relationships
|
525
624
|
if temporal_relationships:
|
526
|
-
insights.append(
|
527
|
-
|
625
|
+
insights.append(
|
626
|
+
f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables"
|
627
|
+
)
|
628
|
+
|
528
629
|
# Discover naming pattern relationships
|
529
630
|
if "naming_patterns" in relationship_types:
|
530
631
|
naming_relationships = _discover_naming_relationships(target_table, all_tables)
|
531
632
|
table_relationships["naming_related"] = naming_relationships
|
532
633
|
if naming_relationships:
|
533
|
-
insights.append(
|
534
|
-
|
634
|
+
insights.append(
|
635
|
+
f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables"
|
636
|
+
)
|
637
|
+
|
535
638
|
relationships[target_table] = table_relationships
|
536
|
-
|
639
|
+
|
537
640
|
# Generate relationship insights
|
538
641
|
total_relationships = sum(
|
539
|
-
len(rel["foreign_key_refs"])
|
540
|
-
|
642
|
+
len(rel["foreign_key_refs"])
|
643
|
+
+ len(rel["semantic_similar"])
|
644
|
+
+ len(rel["temporal_related"])
|
645
|
+
+ len(rel["naming_related"])
|
541
646
|
for rel in relationships.values()
|
542
647
|
)
|
543
|
-
|
648
|
+
|
544
649
|
if total_relationships == 0:
|
545
|
-
insights.append(
|
650
|
+
insights.append(
|
651
|
+
"No strong relationships discovered. Consider adding more content or setting up semantic search."
|
652
|
+
)
|
546
653
|
else:
|
547
|
-
insights.append(
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
"
|
555
|
-
"
|
556
|
-
"
|
654
|
+
insights.append(
|
655
|
+
f"Discovered {total_relationships} total relationships across {len(relationships)} tables"
|
656
|
+
)
|
657
|
+
|
658
|
+
return cast(
|
659
|
+
ToolResponse,
|
660
|
+
{
|
661
|
+
"success": True,
|
662
|
+
"relationships": relationships,
|
663
|
+
"insights": insights,
|
664
|
+
"relationship_summary": {
|
665
|
+
"total_relationships": total_relationships,
|
666
|
+
"tables_analyzed": len(relationships),
|
667
|
+
"strongest_connections": _identify_strongest_connections(relationships),
|
668
|
+
},
|
669
|
+
"recommendations": _generate_relationship_recommendations(relationships, insights),
|
557
670
|
},
|
558
|
-
|
559
|
-
|
560
|
-
|
671
|
+
)
|
672
|
+
|
561
673
|
except Exception as e:
|
562
|
-
return cast(
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
"
|
568
|
-
"
|
569
|
-
|
570
|
-
|
571
|
-
|
674
|
+
return cast(
|
675
|
+
ToolResponse,
|
676
|
+
{
|
677
|
+
"success": False,
|
678
|
+
"error": f"Relationship discovery failed: {str(e)}",
|
679
|
+
"category": "RELATIONSHIP_ERROR",
|
680
|
+
"details": {
|
681
|
+
"table_name": table_name,
|
682
|
+
"relationship_types": relationship_types,
|
683
|
+
"similarity_threshold": similarity_threshold,
|
684
|
+
},
|
685
|
+
},
|
686
|
+
)
|
572
687
|
|
573
688
|
|
574
689
|
# Helper functions for discovery orchestration
|
575
690
|
|
576
|
-
|
691
|
+
|
692
|
+
def _analyze_content_for_discovery(
|
693
|
+
db, tables: List[str], focus_area: Optional[str], depth: str
|
694
|
+
) -> Dict[str, Any]:
|
577
695
|
"""Analyze content patterns and distribution."""
|
578
696
|
content_analysis = {
|
579
697
|
"total_rows": 0,
|
580
698
|
"content_distribution": {},
|
581
699
|
"text_rich_tables": [],
|
582
700
|
"sparse_tables": [],
|
583
|
-
"high_value_tables": []
|
701
|
+
"high_value_tables": [],
|
584
702
|
}
|
585
|
-
|
703
|
+
|
586
704
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
587
|
-
|
705
|
+
|
588
706
|
for table_name in target_tables:
|
589
707
|
try:
|
590
708
|
rows_result = db.read_rows(table_name)
|
@@ -593,109 +711,121 @@ def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[s
|
|
593
711
|
row_count = len(rows)
|
594
712
|
content_analysis["total_rows"] += row_count
|
595
713
|
content_analysis["content_distribution"][table_name] = row_count
|
596
|
-
|
714
|
+
|
597
715
|
# Analyze content quality if depth allows
|
598
716
|
if depth in ["moderate", "comprehensive"] and rows:
|
599
717
|
# Sample content quality
|
600
718
|
sample_size = min(3, len(rows))
|
601
719
|
total_content_length = 0
|
602
|
-
|
720
|
+
|
603
721
|
for row in rows[:sample_size]:
|
604
722
|
for value in row.values():
|
605
723
|
if isinstance(value, str):
|
606
724
|
total_content_length += len(value)
|
607
|
-
|
608
|
-
avg_content_length =
|
609
|
-
|
725
|
+
|
726
|
+
avg_content_length = (
|
727
|
+
total_content_length / sample_size if sample_size > 0 else 0
|
728
|
+
)
|
729
|
+
|
610
730
|
if avg_content_length > 200:
|
611
731
|
content_analysis["text_rich_tables"].append(table_name)
|
612
732
|
if avg_content_length > 500:
|
613
733
|
content_analysis["high_value_tables"].append(table_name)
|
614
734
|
if row_count < 5:
|
615
735
|
content_analysis["sparse_tables"].append(table_name)
|
616
|
-
|
736
|
+
|
617
737
|
except Exception:
|
618
738
|
continue
|
619
|
-
|
739
|
+
|
620
740
|
return content_analysis
|
621
741
|
|
622
742
|
|
623
|
-
def _analyze_schema_for_discovery(
|
743
|
+
def _analyze_schema_for_discovery(
|
744
|
+
db, tables: List[str], focus_area: Optional[str], depth: str
|
745
|
+
) -> Dict[str, Any]:
|
624
746
|
"""Analyze schema structure and organization."""
|
625
747
|
schema_analysis = {
|
626
748
|
"total_columns": 0,
|
627
749
|
"text_columns_by_table": {},
|
628
750
|
"well_structured_tables": [],
|
629
|
-
"schema_issues": []
|
751
|
+
"schema_issues": [],
|
630
752
|
}
|
631
|
-
|
753
|
+
|
632
754
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
633
|
-
|
755
|
+
|
634
756
|
for table_name in target_tables:
|
635
757
|
try:
|
636
758
|
schema_result = db.describe_table(table_name)
|
637
759
|
if schema_result.get("success"):
|
638
760
|
columns = schema_result.get("columns", [])
|
639
761
|
schema_analysis["total_columns"] += len(columns)
|
640
|
-
|
762
|
+
|
641
763
|
# Find text columns
|
642
764
|
text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
|
643
765
|
schema_analysis["text_columns_by_table"][table_name] = len(text_columns)
|
644
|
-
|
766
|
+
|
645
767
|
# Check for well-structured tables
|
646
768
|
has_id = any(col.get("name") == "id" for col in columns)
|
647
769
|
has_timestamp = any("timestamp" in col.get("name", "").lower() for col in columns)
|
648
770
|
has_text_content = len(text_columns) > 0
|
649
|
-
|
771
|
+
|
650
772
|
if has_id and has_timestamp and has_text_content:
|
651
773
|
schema_analysis["well_structured_tables"].append(table_name)
|
652
|
-
|
774
|
+
|
653
775
|
# Identify schema issues
|
654
776
|
if len(columns) < 2:
|
655
|
-
schema_analysis["schema_issues"].append(
|
777
|
+
schema_analysis["schema_issues"].append(
|
778
|
+
f"Table '{table_name}' has very few columns"
|
779
|
+
)
|
656
780
|
if not has_id:
|
657
781
|
schema_analysis["schema_issues"].append(f"Table '{table_name}' lacks ID column")
|
658
|
-
|
782
|
+
|
659
783
|
except Exception:
|
660
784
|
continue
|
661
|
-
|
785
|
+
|
662
786
|
return schema_analysis
|
663
787
|
|
664
788
|
|
665
|
-
def _assess_content_quality(
|
789
|
+
def _assess_content_quality(
|
790
|
+
db, tables: List[str], focus_area: Optional[str], depth: str
|
791
|
+
) -> Dict[str, Any]:
|
666
792
|
"""Assess overall content quality."""
|
667
793
|
quality_analysis = {
|
668
794
|
"quality_scores": {},
|
669
795
|
"overall_quality": 0.0,
|
670
796
|
"improvement_opportunities": [],
|
671
|
-
"quality_distribution": {"high": 0, "medium": 0, "low": 0}
|
797
|
+
"quality_distribution": {"high": 0, "medium": 0, "low": 0},
|
672
798
|
}
|
673
|
-
|
799
|
+
|
674
800
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
675
801
|
total_score = 0
|
676
802
|
table_count = 0
|
677
|
-
|
803
|
+
|
678
804
|
for table_name in target_tables:
|
679
805
|
try:
|
680
806
|
rows_result = db.read_rows(table_name)
|
681
807
|
if rows_result.get("success"):
|
682
808
|
rows = rows_result.get("rows", [])
|
683
|
-
|
809
|
+
|
684
810
|
if not rows:
|
685
811
|
quality_analysis["quality_scores"][table_name] = 0.0
|
686
|
-
quality_analysis["improvement_opportunities"].append(
|
812
|
+
quality_analysis["improvement_opportunities"].append(
|
813
|
+
f"Table '{table_name}' is empty"
|
814
|
+
)
|
687
815
|
quality_analysis["quality_distribution"]["low"] += 1
|
688
816
|
continue
|
689
|
-
|
817
|
+
|
690
818
|
# Calculate quality score
|
691
819
|
sample_size = min(5, len(rows))
|
692
820
|
content_scores = []
|
693
|
-
|
821
|
+
|
694
822
|
for row in rows[:sample_size]:
|
695
823
|
row_score = 0
|
696
|
-
non_null_fields = sum(
|
824
|
+
non_null_fields = sum(
|
825
|
+
1 for v in row.values() if v is not None and str(v).strip()
|
826
|
+
)
|
697
827
|
total_content_length = sum(len(str(v)) for v in row.values() if v is not None)
|
698
|
-
|
828
|
+
|
699
829
|
# Score based on completeness and content richness
|
700
830
|
if non_null_fields > 2:
|
701
831
|
row_score += 3
|
@@ -703,12 +833,12 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
|
|
703
833
|
row_score += 4
|
704
834
|
if total_content_length > 500:
|
705
835
|
row_score += 3
|
706
|
-
|
836
|
+
|
707
837
|
content_scores.append(min(10, row_score))
|
708
|
-
|
838
|
+
|
709
839
|
table_quality = sum(content_scores) / len(content_scores) if content_scores else 0
|
710
840
|
quality_analysis["quality_scores"][table_name] = round(table_quality, 1)
|
711
|
-
|
841
|
+
|
712
842
|
# Categorize quality
|
713
843
|
if table_quality >= 7:
|
714
844
|
quality_analysis["quality_distribution"]["high"] += 1
|
@@ -719,15 +849,17 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
|
|
719
849
|
quality_analysis["improvement_opportunities"].append(
|
720
850
|
f"Table '{table_name}' has low content quality (score: {table_quality:.1f})"
|
721
851
|
)
|
722
|
-
|
852
|
+
|
723
853
|
total_score += table_quality
|
724
854
|
table_count += 1
|
725
|
-
|
855
|
+
|
726
856
|
except Exception:
|
727
857
|
continue
|
728
|
-
|
729
|
-
quality_analysis["overall_quality"] =
|
730
|
-
|
858
|
+
|
859
|
+
quality_analysis["overall_quality"] = (
|
860
|
+
round(total_score / table_count, 1) if table_count > 0 else 0.0
|
861
|
+
)
|
862
|
+
|
731
863
|
return quality_analysis
|
732
864
|
|
733
865
|
|
@@ -737,11 +869,11 @@ def _analyze_search_readiness(db, tables: List[str], focus_area: Optional[str])
|
|
737
869
|
"semantic_ready_tables": [],
|
738
870
|
"text_searchable_tables": [],
|
739
871
|
"search_optimization_needed": [],
|
740
|
-
"embedding_coverage": {}
|
872
|
+
"embedding_coverage": {},
|
741
873
|
}
|
742
|
-
|
874
|
+
|
743
875
|
target_tables = [focus_area] if focus_area and focus_area in tables else tables
|
744
|
-
|
876
|
+
|
745
877
|
for table_name in target_tables:
|
746
878
|
try:
|
747
879
|
# Check schema for text content
|
@@ -749,134 +881,148 @@ def _analyze_search_readiness(db, tables: List[str], focus_area: Optional[str])
|
|
749
881
|
if schema_result.get("success"):
|
750
882
|
columns = schema_result.get("columns", [])
|
751
883
|
text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
|
752
|
-
|
884
|
+
|
753
885
|
if text_columns:
|
754
886
|
search_analysis["text_searchable_tables"].append(table_name)
|
755
|
-
|
887
|
+
|
756
888
|
# Check semantic search readiness if available
|
757
889
|
if is_semantic_search_available():
|
758
890
|
embedding_stats = db.get_embedding_stats(table_name)
|
759
891
|
if embedding_stats.get("success"):
|
760
892
|
coverage = embedding_stats.get("coverage_percent", 0)
|
761
893
|
search_analysis["embedding_coverage"][table_name] = coverage
|
762
|
-
|
894
|
+
|
763
895
|
if coverage > 80:
|
764
896
|
search_analysis["semantic_ready_tables"].append(table_name)
|
765
897
|
elif len(text_columns) > 0:
|
766
898
|
search_analysis["search_optimization_needed"].append(table_name)
|
767
|
-
|
899
|
+
|
768
900
|
except Exception:
|
769
901
|
continue
|
770
|
-
|
902
|
+
|
771
903
|
return search_analysis
|
772
904
|
|
773
905
|
|
774
|
-
def _generate_discovery_insights(
|
906
|
+
def _generate_discovery_insights(
|
907
|
+
discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str
|
908
|
+
) -> tuple:
|
775
909
|
"""Generate insights and recommendations based on discovery results."""
|
776
910
|
insights = []
|
777
911
|
recommendations = []
|
778
912
|
next_steps = []
|
779
|
-
|
913
|
+
|
780
914
|
total_tables = overview.get("total_tables", 0)
|
781
915
|
total_rows = overview.get("total_rows", 0)
|
782
|
-
|
916
|
+
|
783
917
|
# Goal-specific insights
|
784
918
|
if discovery_goal == "understand_content":
|
785
919
|
insights.append(f"Memory bank contains {total_tables} tables with {total_rows} total rows")
|
786
|
-
|
920
|
+
|
787
921
|
high_value_tables = overview.get("high_value_tables", [])
|
788
922
|
if high_value_tables:
|
789
923
|
insights.append(f"High-value content found in: {', '.join(high_value_tables[:3])}")
|
790
|
-
recommendations.append(
|
791
|
-
|
792
|
-
|
924
|
+
recommendations.append(
|
925
|
+
f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}"
|
926
|
+
)
|
927
|
+
next_steps.append(
|
928
|
+
f"Use auto_smart_search() to explore content in {high_value_tables[0]}"
|
929
|
+
)
|
930
|
+
|
793
931
|
sparse_tables = overview.get("sparse_tables", [])
|
794
932
|
if sparse_tables:
|
795
933
|
insights.append(f"Sparse tables detected: {', '.join(sparse_tables)}")
|
796
934
|
recommendations.append("Consider consolidating or expanding sparse tables")
|
797
|
-
|
935
|
+
|
798
936
|
elif discovery_goal == "find_patterns":
|
799
937
|
text_rich_tables = overview.get("text_rich_tables", [])
|
800
938
|
if text_rich_tables:
|
801
939
|
insights.append(f"Text-rich content found in {len(text_rich_tables)} tables")
|
802
940
|
next_steps.append("Use semantic search to find content patterns")
|
803
|
-
|
941
|
+
|
804
942
|
quality_scores = overview.get("quality_scores", {})
|
805
943
|
if quality_scores:
|
806
944
|
avg_quality = sum(quality_scores.values()) / len(quality_scores)
|
807
945
|
insights.append(f"Average content quality: {avg_quality:.1f}/10")
|
808
|
-
|
946
|
+
|
809
947
|
elif discovery_goal == "explore_structure":
|
810
948
|
well_structured = overview.get("well_structured_tables", [])
|
811
949
|
if well_structured:
|
812
950
|
insights.append(f"Well-structured tables: {', '.join(well_structured)}")
|
813
951
|
recommendations.append("Use well-structured tables as primary data sources")
|
814
|
-
|
952
|
+
|
815
953
|
schema_issues = overview.get("schema_issues", [])
|
816
954
|
if schema_issues:
|
817
955
|
insights.extend(schema_issues[:3]) # Show first 3 issues
|
818
|
-
|
956
|
+
|
819
957
|
elif discovery_goal == "assess_quality":
|
820
958
|
overall_quality = overview.get("overall_quality", 0)
|
821
959
|
insights.append(f"Overall content quality score: {overall_quality}/10")
|
822
|
-
|
960
|
+
|
823
961
|
improvement_opportunities = overview.get("improvement_opportunities", [])
|
824
962
|
recommendations.extend(improvement_opportunities[:3])
|
825
|
-
|
963
|
+
|
826
964
|
elif discovery_goal == "prepare_search":
|
827
965
|
semantic_ready = overview.get("semantic_ready_tables", [])
|
828
966
|
optimization_needed = overview.get("search_optimization_needed", [])
|
829
|
-
|
967
|
+
|
830
968
|
if semantic_ready:
|
831
969
|
insights.append(f"Semantic search ready for {len(semantic_ready)} tables")
|
832
970
|
next_steps.append("Use auto_semantic_search() for conceptual queries")
|
833
|
-
|
971
|
+
|
834
972
|
if optimization_needed:
|
835
973
|
insights.append(f"Search optimization needed for {len(optimization_needed)} tables")
|
836
974
|
next_steps.append(f"Set up embeddings for: {', '.join(optimization_needed[:2])}")
|
837
|
-
|
975
|
+
|
838
976
|
# Universal recommendations
|
839
977
|
if overview.get("semantic_search_available"):
|
840
978
|
recommendations.append("Use auto_smart_search() for best search results")
|
841
979
|
else:
|
842
980
|
recommendations.append("Install sentence-transformers for semantic search capabilities")
|
843
|
-
|
981
|
+
|
844
982
|
if not next_steps:
|
845
983
|
next_steps.append("Use explore_tables() for detailed content examination")
|
846
984
|
next_steps.append("Try auto_smart_search() to find specific information")
|
847
|
-
|
985
|
+
|
848
986
|
return insights, recommendations, next_steps
|
849
987
|
|
850
988
|
|
851
|
-
def _generate_quick_actions(
|
989
|
+
def _generate_quick_actions(
|
990
|
+
discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]
|
991
|
+
) -> List[Dict[str, Any]]:
|
852
992
|
"""Generate quick action suggestions."""
|
853
993
|
actions = []
|
854
|
-
|
994
|
+
|
855
995
|
high_value_tables = overview.get("high_value_tables", [])
|
856
|
-
|
996
|
+
|
857
997
|
if discovery_goal == "understand_content" and high_value_tables:
|
858
|
-
actions.append(
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
998
|
+
actions.append(
|
999
|
+
{
|
1000
|
+
"action": "Explore High-Value Content",
|
1001
|
+
"tool": "read_rows",
|
1002
|
+
"params": {"table_name": high_value_tables[0]},
|
1003
|
+
"description": f"Examine content in {high_value_tables[0]} table",
|
1004
|
+
}
|
1005
|
+
)
|
1006
|
+
|
865
1007
|
if overview.get("semantic_search_available"):
|
866
|
-
actions.append(
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
1008
|
+
actions.append(
|
1009
|
+
{
|
1010
|
+
"action": "Smart Search",
|
1011
|
+
"tool": "auto_smart_search",
|
1012
|
+
"params": {"query": "important recent information", "limit": 5},
|
1013
|
+
"description": "Find important content using intelligent search",
|
1014
|
+
}
|
1015
|
+
)
|
1016
|
+
|
1017
|
+
actions.append(
|
1018
|
+
{
|
1019
|
+
"action": "Quality Assessment",
|
1020
|
+
"tool": "get_content_health_score",
|
1021
|
+
"params": {},
|
1022
|
+
"description": "Get detailed quality metrics and recommendations",
|
1023
|
+
}
|
1024
|
+
)
|
1025
|
+
|
880
1026
|
return actions
|
881
1027
|
|
882
1028
|
|
@@ -887,15 +1033,18 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
|
|
887
1033
|
tables_result = db.list_tables()
|
888
1034
|
if tables_result.get("success") and "discovery_patterns" in tables_result.get("tables", []):
|
889
1035
|
# Store the discovery session
|
890
|
-
db.insert_row(
|
891
|
-
"
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
1036
|
+
db.insert_row(
|
1037
|
+
"discovery_patterns",
|
1038
|
+
{
|
1039
|
+
"agent_id": discovery_session.get("agent_id"),
|
1040
|
+
"goal": discovery_session.get("goal"),
|
1041
|
+
"focus_area": discovery_session.get("focus_area"),
|
1042
|
+
"depth": discovery_session.get("depth"),
|
1043
|
+
"steps_completed": str(discovery_session.get("steps_completed", [])),
|
1044
|
+
"success": True,
|
1045
|
+
"timestamp": discovery_session.get("timestamp"),
|
1046
|
+
},
|
1047
|
+
)
|
899
1048
|
except Exception:
|
900
1049
|
# Silently fail if learning storage isn't available
|
901
1050
|
pass
|
@@ -904,14 +1053,18 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
|
|
904
1053
|
def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[str, Any]:
|
905
1054
|
"""Customize template for specific domain or topic."""
|
906
1055
|
customized = template.copy()
|
907
|
-
|
1056
|
+
|
908
1057
|
# Add customization note
|
909
1058
|
customized["customized_for"] = customize_for
|
910
1059
|
customized["customization_note"] = f"Template customized for: {customize_for}"
|
911
|
-
|
1060
|
+
|
912
1061
|
# Modify search queries in workflow to include customization
|
913
1062
|
for step in customized.get("workflow", []):
|
914
|
-
if step.get("tool") in [
|
1063
|
+
if step.get("tool") in [
|
1064
|
+
"auto_smart_search",
|
1065
|
+
"auto_semantic_search",
|
1066
|
+
"search_content",
|
1067
|
+
]:
|
915
1068
|
params = step.get("params", {})
|
916
1069
|
if "query" in params and params["query"].startswith("REPLACE_WITH"):
|
917
1070
|
# Keep the placeholder for user customization
|
@@ -919,35 +1072,36 @@ def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[st
|
|
919
1072
|
elif "query" in params:
|
920
1073
|
# Add customization to existing query
|
921
1074
|
params["query"] = f"{customize_for} {params['query']}"
|
922
|
-
|
1075
|
+
|
923
1076
|
return customized
|
924
1077
|
|
925
1078
|
|
926
1079
|
# Relationship discovery helper functions
|
927
1080
|
|
1081
|
+
|
928
1082
|
def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List[str]:
|
929
1083
|
"""Discover foreign key relationships."""
|
930
1084
|
relationships = []
|
931
|
-
|
1085
|
+
|
932
1086
|
try:
|
933
1087
|
# Get target table schema
|
934
1088
|
target_schema = db.describe_table(target_table)
|
935
1089
|
if not target_schema.get("success"):
|
936
1090
|
return relationships
|
937
|
-
|
1091
|
+
|
938
1092
|
target_columns = target_schema.get("columns", [])
|
939
1093
|
target_col_names = [col.get("name", "") for col in target_columns]
|
940
|
-
|
1094
|
+
|
941
1095
|
# Check other tables for potential foreign key references
|
942
1096
|
for other_table in all_tables:
|
943
1097
|
if other_table == target_table:
|
944
1098
|
continue
|
945
|
-
|
1099
|
+
|
946
1100
|
try:
|
947
1101
|
other_schema = db.describe_table(other_table)
|
948
1102
|
if other_schema.get("success"):
|
949
1103
|
other_columns = other_schema.get("columns", [])
|
950
|
-
|
1104
|
+
|
951
1105
|
for col in other_columns:
|
952
1106
|
col_name = col.get("name", "")
|
953
1107
|
# Look for naming patterns that suggest foreign keys
|
@@ -955,222 +1109,257 @@ def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List
|
|
955
1109
|
potential_ref = col_name.replace("_id", "").replace("Id", "")
|
956
1110
|
if potential_ref == target_table or f"{potential_ref}s" == target_table:
|
957
1111
|
relationships.append(f"{other_table}.{col_name}")
|
958
|
-
|
1112
|
+
|
959
1113
|
# Look for exact column name matches (potential shared keys)
|
960
1114
|
if col_name in target_col_names and col_name != "id":
|
961
1115
|
relationships.append(f"{other_table}.{col_name} (shared key)")
|
962
|
-
|
1116
|
+
|
963
1117
|
except Exception:
|
964
1118
|
continue
|
965
|
-
|
1119
|
+
|
966
1120
|
except Exception:
|
967
1121
|
pass
|
968
|
-
|
1122
|
+
|
969
1123
|
return relationships
|
970
1124
|
|
971
1125
|
|
972
|
-
def _discover_semantic_relationships(
|
1126
|
+
def _discover_semantic_relationships(
|
1127
|
+
db, target_table: str, all_tables: List[str], threshold: float
|
1128
|
+
) -> List[Dict[str, Any]]:
|
973
1129
|
"""Discover semantic similarity relationships."""
|
974
1130
|
relationships = []
|
975
|
-
|
1131
|
+
|
976
1132
|
if not is_semantic_search_available():
|
977
1133
|
return relationships
|
978
|
-
|
1134
|
+
|
979
1135
|
try:
|
980
1136
|
# Get sample content from target table
|
981
1137
|
target_rows = db.read_rows(target_table)
|
982
1138
|
if not target_rows.get("success") or not target_rows.get("rows"):
|
983
1139
|
return relationships
|
984
|
-
|
1140
|
+
|
985
1141
|
# Create a sample query from target table content
|
986
1142
|
sample_row = target_rows["rows"][0]
|
987
1143
|
sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[:200]
|
988
|
-
|
1144
|
+
|
989
1145
|
if len(sample_text.strip()) < 10:
|
990
1146
|
return relationships
|
991
|
-
|
1147
|
+
|
992
1148
|
# Search for similar content in other tables
|
993
1149
|
for other_table in all_tables:
|
994
1150
|
if other_table == target_table:
|
995
1151
|
continue
|
996
|
-
|
1152
|
+
|
997
1153
|
try:
|
998
1154
|
# Try semantic search in the other table
|
999
1155
|
search_result = db.semantic_search(
|
1000
|
-
sample_text,
|
1156
|
+
sample_text,
|
1157
|
+
[other_table],
|
1158
|
+
"embedding",
|
1159
|
+
None,
|
1160
|
+
threshold,
|
1161
|
+
3,
|
1162
|
+
"all-MiniLM-L6-v2",
|
1001
1163
|
)
|
1002
|
-
|
1164
|
+
|
1003
1165
|
if search_result.get("success") and search_result.get("results"):
|
1004
1166
|
results = search_result["results"]
|
1005
|
-
avg_similarity = sum(r.get("similarity_score", 0) for r in results) / len(
|
1006
|
-
|
1167
|
+
avg_similarity = sum(r.get("similarity_score", 0) for r in results) / len(
|
1168
|
+
results
|
1169
|
+
)
|
1170
|
+
|
1007
1171
|
if avg_similarity >= threshold:
|
1008
|
-
relationships.append(
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1172
|
+
relationships.append(
|
1173
|
+
{
|
1174
|
+
"table": other_table,
|
1175
|
+
"similarity": round(avg_similarity, 2),
|
1176
|
+
"related_content_count": len(results),
|
1177
|
+
}
|
1178
|
+
)
|
1179
|
+
|
1014
1180
|
except Exception:
|
1015
1181
|
continue
|
1016
|
-
|
1182
|
+
|
1017
1183
|
except Exception:
|
1018
1184
|
pass
|
1019
|
-
|
1185
|
+
|
1020
1186
|
return relationships
|
1021
1187
|
|
1022
1188
|
|
1023
1189
|
def _discover_temporal_relationships(db, target_table: str, all_tables: List[str]) -> List[str]:
|
1024
1190
|
"""Discover temporal pattern relationships."""
|
1025
1191
|
relationships = []
|
1026
|
-
|
1192
|
+
|
1027
1193
|
try:
|
1028
1194
|
# Check if target table has timestamp columns
|
1029
1195
|
target_schema = db.describe_table(target_table)
|
1030
1196
|
if not target_schema.get("success"):
|
1031
1197
|
return relationships
|
1032
|
-
|
1198
|
+
|
1033
1199
|
target_columns = target_schema.get("columns", [])
|
1034
|
-
target_has_timestamp = any(
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1038
|
-
|
1200
|
+
target_has_timestamp = any(
|
1201
|
+
"timestamp" in col.get("name", "").lower()
|
1202
|
+
or "date" in col.get("name", "").lower()
|
1203
|
+
or "time" in col.get("name", "").lower()
|
1204
|
+
for col in target_columns
|
1205
|
+
)
|
1206
|
+
|
1039
1207
|
if not target_has_timestamp:
|
1040
1208
|
return relationships
|
1041
|
-
|
1209
|
+
|
1042
1210
|
# Check other tables for similar timestamp patterns
|
1043
1211
|
for other_table in all_tables:
|
1044
1212
|
if other_table == target_table:
|
1045
1213
|
continue
|
1046
|
-
|
1214
|
+
|
1047
1215
|
try:
|
1048
1216
|
other_schema = db.describe_table(other_table)
|
1049
1217
|
if other_schema.get("success"):
|
1050
1218
|
other_columns = other_schema.get("columns", [])
|
1051
|
-
other_has_timestamp = any(
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1219
|
+
other_has_timestamp = any(
|
1220
|
+
"timestamp" in col.get("name", "").lower()
|
1221
|
+
or "date" in col.get("name", "").lower()
|
1222
|
+
or "time" in col.get("name", "").lower()
|
1223
|
+
for col in other_columns
|
1224
|
+
)
|
1225
|
+
|
1056
1226
|
if other_has_timestamp:
|
1057
1227
|
relationships.append(other_table)
|
1058
|
-
|
1228
|
+
|
1059
1229
|
except Exception:
|
1060
1230
|
continue
|
1061
|
-
|
1231
|
+
|
1062
1232
|
except Exception:
|
1063
1233
|
pass
|
1064
|
-
|
1234
|
+
|
1065
1235
|
return relationships
|
1066
1236
|
|
1067
1237
|
|
1068
1238
|
def _discover_naming_relationships(target_table: str, all_tables: List[str]) -> List[str]:
|
1069
1239
|
"""Discover relationships based on naming conventions."""
|
1070
1240
|
relationships = []
|
1071
|
-
|
1241
|
+
|
1072
1242
|
# Look for tables with similar names or naming patterns
|
1073
1243
|
target_lower = target_table.lower()
|
1074
|
-
|
1244
|
+
|
1075
1245
|
for other_table in all_tables:
|
1076
1246
|
if other_table == target_table:
|
1077
1247
|
continue
|
1078
|
-
|
1248
|
+
|
1079
1249
|
other_lower = other_table.lower()
|
1080
|
-
|
1250
|
+
|
1081
1251
|
# Check for plural/singular relationships
|
1082
|
-
if (target_lower.endswith(
|
1083
|
-
|
1252
|
+
if (target_lower.endswith("s") and other_lower == target_lower[:-1]) or (
|
1253
|
+
other_lower.endswith("s") and target_lower == other_lower[:-1]
|
1254
|
+
):
|
1084
1255
|
relationships.append(other_table)
|
1085
1256
|
continue
|
1086
|
-
|
1257
|
+
|
1087
1258
|
# Check for common prefixes or suffixes
|
1088
1259
|
if len(target_lower) > 3 and len(other_lower) > 3:
|
1089
1260
|
# Common prefix (at least 4 characters)
|
1090
1261
|
if target_lower[:4] == other_lower[:4]:
|
1091
1262
|
relationships.append(other_table)
|
1092
1263
|
continue
|
1093
|
-
|
1264
|
+
|
1094
1265
|
# Common suffix (at least 4 characters)
|
1095
1266
|
if target_lower[-4:] == other_lower[-4:]:
|
1096
1267
|
relationships.append(other_table)
|
1097
1268
|
continue
|
1098
|
-
|
1269
|
+
|
1099
1270
|
# Check for semantic name relationships
|
1100
|
-
name_words = set(target_lower.split(
|
1101
|
-
other_words = set(other_lower.split(
|
1102
|
-
|
1271
|
+
name_words = set(target_lower.split("_"))
|
1272
|
+
other_words = set(other_lower.split("_"))
|
1273
|
+
|
1103
1274
|
# If tables share significant word overlap
|
1104
1275
|
if len(name_words.intersection(other_words)) > 0:
|
1105
1276
|
relationships.append(other_table)
|
1106
|
-
|
1277
|
+
|
1107
1278
|
return relationships
|
1108
1279
|
|
1109
1280
|
|
1110
|
-
def _identify_strongest_connections(
|
1281
|
+
def _identify_strongest_connections(
|
1282
|
+
relationships: Dict[str, Any],
|
1283
|
+
) -> List[Dict[str, Any]]:
|
1111
1284
|
"""Identify the strongest connections across all relationships."""
|
1112
1285
|
connections = []
|
1113
|
-
|
1286
|
+
|
1114
1287
|
for table, rels in relationships.items():
|
1115
1288
|
# Count total connections for this table
|
1116
|
-
total_connections = (
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1289
|
+
total_connections = (
|
1290
|
+
len(rels.get("foreign_key_refs", []))
|
1291
|
+
+ len(rels.get("semantic_similar", []))
|
1292
|
+
+ len(rels.get("temporal_related", []))
|
1293
|
+
+ len(rels.get("naming_related", []))
|
1294
|
+
)
|
1295
|
+
|
1121
1296
|
if total_connections > 0:
|
1122
|
-
connections.append(
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
"
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1297
|
+
connections.append(
|
1298
|
+
{
|
1299
|
+
"table": table,
|
1300
|
+
"total_connections": total_connections,
|
1301
|
+
"connection_types": {
|
1302
|
+
"structural": len(rels.get("foreign_key_refs", [])),
|
1303
|
+
"semantic": len(rels.get("semantic_similar", [])),
|
1304
|
+
"temporal": len(rels.get("temporal_related", [])),
|
1305
|
+
"naming": len(rels.get("naming_related", [])),
|
1306
|
+
},
|
1130
1307
|
}
|
1131
|
-
|
1132
|
-
|
1308
|
+
)
|
1309
|
+
|
1133
1310
|
# Sort by total connections and return top 5
|
1134
1311
|
connections.sort(key=lambda x: x["total_connections"], reverse=True)
|
1135
1312
|
return connections[:5]
|
1136
1313
|
|
1137
1314
|
|
1138
|
-
def _generate_relationship_recommendations(
|
1315
|
+
def _generate_relationship_recommendations(
|
1316
|
+
relationships: Dict[str, Any], insights: List[str]
|
1317
|
+
) -> List[str]:
|
1139
1318
|
"""Generate actionable recommendations based on discovered relationships."""
|
1140
1319
|
recommendations = []
|
1141
|
-
|
1320
|
+
|
1142
1321
|
# Find tables with many connections
|
1143
1322
|
highly_connected = []
|
1144
1323
|
for table, rels in relationships.items():
|
1145
|
-
total_connections = (
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1324
|
+
total_connections = (
|
1325
|
+
len(rels.get("foreign_key_refs", []))
|
1326
|
+
+ len(rels.get("semantic_similar", []))
|
1327
|
+
+ len(rels.get("temporal_related", []))
|
1328
|
+
+ len(rels.get("naming_related", []))
|
1329
|
+
)
|
1149
1330
|
if total_connections >= 3:
|
1150
1331
|
highly_connected.append(table)
|
1151
|
-
|
1332
|
+
|
1152
1333
|
if highly_connected:
|
1153
|
-
recommendations.append(
|
1154
|
-
|
1334
|
+
recommendations.append(
|
1335
|
+
f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}"
|
1336
|
+
)
|
1337
|
+
|
1155
1338
|
# Find tables with semantic relationships
|
1156
1339
|
semantic_tables = []
|
1157
1340
|
for table, rels in relationships.items():
|
1158
1341
|
if rels.get("semantic_similar"):
|
1159
1342
|
semantic_tables.append(table)
|
1160
|
-
|
1343
|
+
|
1161
1344
|
if semantic_tables:
|
1162
|
-
recommendations.append(
|
1163
|
-
|
1345
|
+
recommendations.append(
|
1346
|
+
f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}"
|
1347
|
+
)
|
1348
|
+
|
1164
1349
|
# Find tables with temporal relationships
|
1165
1350
|
temporal_tables = []
|
1166
1351
|
for table, rels in relationships.items():
|
1167
1352
|
if rels.get("temporal_related"):
|
1168
1353
|
temporal_tables.append(table)
|
1169
|
-
|
1354
|
+
|
1170
1355
|
if temporal_tables:
|
1171
|
-
recommendations.append(
|
1172
|
-
|
1356
|
+
recommendations.append(
|
1357
|
+
f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}"
|
1358
|
+
)
|
1359
|
+
|
1173
1360
|
if not recommendations:
|
1174
|
-
recommendations.append(
|
1175
|
-
|
1361
|
+
recommendations.append(
|
1362
|
+
"Consider adding more structured relationships or content to improve discoverability"
|
1363
|
+
)
|
1364
|
+
|
1176
1365
|
return recommendations
|