mcp-sqlite-memory-bank 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1428 @@
1
+ """
2
+ Discovery and Exploration Tools for SQLite Memory Bank
3
+ =====================================================
4
+
5
+ This module contains advanced discovery tools that help LLM agents intelligently
6
+ explore and understand memory bank content through guided workflows and orchestrated
7
+ discovery processes.
8
+
9
+ Author: Robert Meisner
10
+ """
11
+
12
+ import logging
13
+ from typing import Any, Dict, List, Optional, cast, Union
14
+ from datetime import datetime
15
+
16
+ from ..database import get_database
17
+ from ..semantic import is_semantic_search_available
18
+ from ..types import ToolResponse
19
+ from ..utils import catch_errors
20
+
21
+
22
+ @catch_errors
23
+ def intelligent_discovery(
24
+ discovery_goal: str = "understand_content",
25
+ focus_area: Optional[str] = None,
26
+ depth: str = "moderate",
27
+ agent_id: Optional[str] = None,
28
+ ) -> ToolResponse:
29
+ """
30
+ 🧠 **INTELLIGENT DISCOVERY** - AI-guided exploration of your memory bank!
31
+
32
+ Orchestrates multiple discovery tools based on your exploration goals.
33
+ Provides step-by-step guidance and actionable insights tailored to your needs.
34
+
35
+ Args:
36
+ discovery_goal (str): What you want to achieve
37
+ - "understand_content": Learn what data is available and how it's organized
38
+ - "find_patterns": Discover themes, relationships, and content patterns
39
+ - "explore_structure": Understand database schema and organization
40
+ - "assess_quality": Evaluate content quality and completeness
41
+ - "prepare_search": Get ready for effective content searching
42
+ focus_area (Optional[str]): Specific table or topic to focus on (default: all)
43
+ depth (str): How thorough the discovery should be
44
+ - "quick": Fast overview with key insights
45
+ - "moderate": Balanced analysis with actionable recommendations
46
+ - "comprehensive": Deep dive with detailed analysis
47
+ agent_id (Optional[str]): Agent identifier for learning discovery patterns
48
+
49
+ Returns:
50
+ ToolResponse: On success: {"success": True, "discovery": Dict, "next_steps": List}
51
+ On error: {"success": False, "error": str, "category": str, "details": dict}
52
+
53
+ Examples:
54
+ >>> intelligent_discovery("understand_content")
55
+ {"success": True, "discovery": {
56
+ "overview": {"total_tables": 5, "total_rows": 234},
57
+ "content_summary": {...},
58
+ "recommendations": [...]
59
+ }, "next_steps": ["Use auto_smart_search() for specific queries"]}
60
+
61
+ >>> intelligent_discovery("find_patterns", focus_area="technical_decisions")
62
+ {"success": True, "discovery": {
63
+ "patterns": {"decision_themes": [...], "temporal_trends": [...]},
64
+ "insights": [...]
65
+ }}
66
+
67
+ FastMCP Tool Info:
68
+ - **COMPLETELY AUTOMATED**: No manual tool chaining required
69
+ - **GOAL-ORIENTED**: Tailored discovery based on your specific objectives
70
+ - **ACTIONABLE INSIGHTS**: Always includes concrete next steps
71
+ - **LEARNING**: Improves recommendations based on usage patterns
72
+ - **PERFECT FOR AGENTS**: Single tool that orchestrates complex discovery workflows
73
+ """
74
+ try:
75
+ from .. import server
76
+
77
+ db = get_database(server.DB_PATH)
78
+
79
+ # Initialize discovery session
80
+ discovery_session = {
81
+ "goal": discovery_goal,
82
+ "focus_area": focus_area,
83
+ "depth": depth,
84
+ "timestamp": datetime.now().isoformat(),
85
+ "agent_id": agent_id,
86
+ "steps_completed": [],
87
+ "insights": [],
88
+ "recommendations": [],
89
+ }
90
+
91
+ # Step 1: Basic overview
92
+ discovery_session["steps_completed"].append("basic_overview")
93
+ tables_result = db.list_tables()
94
+ if not tables_result.get("success"):
95
+ return cast(
96
+ ToolResponse,
97
+ {
98
+ "success": False,
99
+ "error": "Failed to get basic overview",
100
+ "category": "DISCOVERY_ERROR",
101
+ "details": tables_result,
102
+ },
103
+ )
104
+
105
+ tables = tables_result.get("tables", [])
106
+ overview = {
107
+ "total_tables": len(tables),
108
+ "available_tables": tables,
109
+ "semantic_search_available": is_semantic_search_available(),
110
+ }
111
+
112
+ # Step 2: Content analysis based on goal
113
+ if discovery_goal in ["understand_content", "find_patterns", "assess_quality"]:
114
+ discovery_session["steps_completed"].append("content_analysis")
115
+ content_analysis = _analyze_content_for_discovery(
116
+ db, tables, focus_area, depth
117
+ )
118
+ overview.update(content_analysis)
119
+
120
+ # Step 3: Schema analysis for structure exploration
121
+ if discovery_goal in ["explore_structure", "understand_content"]:
122
+ discovery_session["steps_completed"].append("schema_analysis")
123
+ schema_analysis = _analyze_schema_for_discovery(
124
+ db, tables, focus_area, depth
125
+ )
126
+ overview.update(schema_analysis)
127
+
128
+ # Step 4: Quality assessment
129
+ if discovery_goal in ["assess_quality", "find_patterns"]:
130
+ discovery_session["steps_completed"].append("quality_assessment")
131
+ quality_analysis = _assess_content_quality(db, tables, focus_area, depth)
132
+ overview.update(quality_analysis)
133
+
134
+ # Step 5: Search readiness for search preparation
135
+ if discovery_goal in ["prepare_search", "understand_content"]:
136
+ discovery_session["steps_completed"].append("search_readiness")
137
+ search_analysis = _analyze_search_readiness(db, tables, focus_area)
138
+ overview.update(search_analysis)
139
+
140
+ # Step 6: Generate insights and recommendations
141
+ insights, recommendations, next_steps = _generate_discovery_insights(
142
+ discovery_goal, overview, focus_area, depth
143
+ )
144
+
145
+ discovery_session["insights"] = insights
146
+ discovery_session["recommendations"] = recommendations
147
+
148
+ # Step 7: Store discovery pattern for learning (if agent_id provided)
149
+ if agent_id:
150
+ _store_discovery_pattern(db, discovery_session)
151
+
152
+ return cast(
153
+ ToolResponse,
154
+ {
155
+ "success": True,
156
+ "discovery": {
157
+ "goal": discovery_goal,
158
+ "overview": overview,
159
+ "insights": insights,
160
+ "recommendations": recommendations,
161
+ "focus_area": focus_area,
162
+ "depth": depth,
163
+ "steps_completed": discovery_session["steps_completed"],
164
+ },
165
+ "next_steps": next_steps,
166
+ "discovery_session": discovery_session,
167
+ "quick_actions": _generate_quick_actions(
168
+ discovery_goal, overview, focus_area
169
+ ),
170
+ },
171
+ )
172
+
173
+ except Exception as e:
174
+ return cast(
175
+ ToolResponse,
176
+ {
177
+ "success": False,
178
+ "error": f"Intelligent discovery failed: {str(e)}",
179
+ "category": "DISCOVERY_ERROR",
180
+ "details": {
181
+ "goal": discovery_goal,
182
+ "focus_area": focus_area,
183
+ "depth": depth,
184
+ "agent_id": agent_id,
185
+ },
186
+ },
187
+ )
188
+
189
+
190
+ @catch_errors
191
+ def discovery_templates(
192
+ template_type: str = "first_time_exploration", customize_for: Optional[str] = None
193
+ ) -> ToolResponse:
194
+ """
195
+ 📋 **DISCOVERY TEMPLATES** - Pre-built exploration workflows for common scenarios!
196
+
197
+ Provides step-by-step discovery templates optimized for specific agent use cases.
198
+ Each template includes the exact sequence of tools to call and what to look for.
199
+
200
+ Args:
201
+ template_type (str): Type of discovery template to provide
202
+ - "first_time_exploration": Complete workflow for new agents
203
+ - "content_audit": Systematic content quality review
204
+ - "search_optimization": Prepare memory bank for optimal searching
205
+ - "relationship_mapping": Discover connections between data
206
+ - "problem_solving": Find information to solve specific problems
207
+ - "knowledge_extraction": Extract insights from stored knowledge
208
+ customize_for (Optional[str]): Customize template for specific domain/topic
209
+
210
+ Returns:
211
+ ToolResponse: {"success": True, "template": Dict, "workflow": List}
212
+
213
+ Examples:
214
+ >>> discovery_templates("first_time_exploration")
215
+ {"success": True, "template": {
216
+ "name": "First Time Exploration",
217
+ "description": "Complete discovery workflow for new agents",
218
+ "workflow": [
219
+ {"step": 1, "tool": "intelligent_discovery", "params": {...}},
220
+ {"step": 2, "tool": "explore_tables", "params": {...}}
221
+ ]
222
+ }}
223
+
224
+ FastMCP Tool Info:
225
+ - **PROVEN WORKFLOWS**: Battle-tested discovery sequences
226
+ - **STEP-BY-STEP GUIDANCE**: Exact tools and parameters to use
227
+ - **CUSTOMIZABLE**: Adapt templates to your specific needs
228
+ - **LEARNING-OPTIMIZED**: Based on successful discovery patterns
229
+ """
230
+ try:
231
+ templates = {
232
+ "first_time_exploration": {
233
+ "name": "First Time Exploration",
234
+ "description": "Complete discovery workflow for agents new to this memory bank",
235
+ "estimated_time": "2-3 minutes",
236
+ "workflow": [
237
+ {
238
+ "step": 1,
239
+ "action": "Get Overview",
240
+ "tool": "intelligent_discovery",
241
+ "params": {
242
+ "discovery_goal": "understand_content",
243
+ "depth": "moderate",
244
+ },
245
+ "purpose": "Understand what data is available and how it's organized",
246
+ "look_for": ["total tables", "content types", "data volume"],
247
+ },
248
+ {
249
+ "step": 2,
250
+ "action": "Explore Structure",
251
+ "tool": "explore_tables",
252
+ "params": {"include_row_counts": True},
253
+ "purpose": "See detailed table schemas and sample data",
254
+ "look_for": [
255
+ "column types",
256
+ "sample content",
257
+ "data relationships",
258
+ ],
259
+ },
260
+ {
261
+ "step": 3,
262
+ "action": "Test Search Capabilities",
263
+ "tool": "auto_smart_search",
264
+ "params": {"query": "recent important information", "limit": 5},
265
+ "purpose": "Understand search capabilities and content accessibility",
266
+ "look_for": [
267
+ "search quality",
268
+ "result relevance",
269
+ "content types found",
270
+ ],
271
+ },
272
+ {
273
+ "step": 4,
274
+ "action": "Assess Quality",
275
+ "tool": "get_content_health_score",
276
+ "params": {},
277
+ "purpose": "Understand overall memory bank quality and opportunities",
278
+ "look_for": [
279
+ "health score",
280
+ "improvement recommendations",
281
+ "strengths",
282
+ ],
283
+ },
284
+ ],
285
+ "success_criteria": [
286
+ "Understand what types of information are stored",
287
+ "Know which tables contain the most valuable content",
288
+ "Identify best search strategies for this memory bank",
289
+ "Have actionable next steps for productive use",
290
+ ],
291
+ },
292
+ "content_audit": {
293
+ "name": "Content Quality Audit",
294
+ "description": "Systematic review of content quality and completeness",
295
+ "estimated_time": "5-7 minutes",
296
+ "workflow": [
297
+ {
298
+ "step": 1,
299
+ "action": "Quality Assessment",
300
+ "tool": "get_content_health_score",
301
+ "params": {},
302
+ "purpose": "Get overall quality metrics and problem areas",
303
+ "look_for": [
304
+ "quality scores",
305
+ "problem tables",
306
+ "recommendations",
307
+ ],
308
+ },
309
+ {
310
+ "step": 2,
311
+ "action": "Pattern Analysis",
312
+ "tool": "analyze_memory_patterns",
313
+ "params": {},
314
+ "purpose": "Identify content patterns and organizational issues",
315
+ "look_for": [
316
+ "content distribution",
317
+ "sparse tables",
318
+ "organization gaps",
319
+ ],
320
+ },
321
+ {
322
+ "step": 3,
323
+ "action": "Table-by-Table Review",
324
+ "tool": "explore_tables",
325
+ "params": {"include_row_counts": True},
326
+ "purpose": "Detailed examination of each table's content",
327
+ "look_for": [
328
+ "empty tables",
329
+ "low-quality content",
330
+ "missing data",
331
+ ],
332
+ },
333
+ {
334
+ "step": 4,
335
+ "action": "Search Readiness",
336
+ "tool": "intelligent_discovery",
337
+ "params": {
338
+ "discovery_goal": "prepare_search",
339
+ "depth": "comprehensive",
340
+ },
341
+ "purpose": "Ensure content is optimally searchable",
342
+ "look_for": [
343
+ "embedding coverage",
344
+ "search optimization opportunities",
345
+ ],
346
+ },
347
+ ],
348
+ "success_criteria": [
349
+ "Identify all content quality issues",
350
+ "Have specific recommendations for improvement",
351
+ "Understand which content areas need attention",
352
+ "Know how to optimize for better searchability",
353
+ ],
354
+ },
355
+ "search_optimization": {
356
+ "name": "Search Optimization Setup",
357
+ "description": "Prepare memory bank for optimal content discovery and searching",
358
+ "estimated_time": "3-5 minutes",
359
+ "workflow": [
360
+ {
361
+ "step": 1,
362
+ "action": "Search Capability Assessment",
363
+ "tool": "intelligent_discovery",
364
+ "params": {
365
+ "discovery_goal": "prepare_search",
366
+ "depth": "comprehensive",
367
+ },
368
+ "purpose": "Understand current search capabilities and gaps",
369
+ "look_for": [
370
+ "semantic readiness",
371
+ "text column identification",
372
+ "embedding status",
373
+ ],
374
+ },
375
+ {
376
+ "step": 2,
377
+ "action": "Content Analysis for Search",
378
+ "tool": "analyze_memory_patterns",
379
+ "params": {},
380
+ "purpose": "Identify high-value content for search optimization",
381
+ "look_for": [
382
+ "text-rich tables",
383
+ "high-value content",
384
+ "search opportunities",
385
+ ],
386
+ },
387
+ {
388
+ "step": 3,
389
+ "action": "Test Current Search",
390
+ "tool": "search_content",
391
+ "params": {"query": "test search capabilities", "limit": 10},
392
+ "purpose": "Baseline current search performance",
393
+ "look_for": ["search result quality", "coverage", "relevance"],
394
+ },
395
+ {
396
+ "step": 4,
397
+ "action": "Semantic Search Setup",
398
+ "tool": "auto_semantic_search",
399
+ "params": {"query": "important valuable content", "limit": 5},
400
+ "purpose": "Enable and test semantic search capabilities",
401
+ "look_for": [
402
+ "automatic embedding generation",
403
+ "semantic result quality",
404
+ ],
405
+ },
406
+ ],
407
+ "success_criteria": [
408
+ "Semantic search is enabled for key tables",
409
+ "Both keyword and semantic search work effectively",
410
+ "Search performance meets quality standards",
411
+ "Clear strategy for ongoing search optimization",
412
+ ],
413
+ },
414
+ "problem_solving": {
415
+ "name": "Problem-Solving Discovery",
416
+ "description": "Find information to solve specific problems or answer questions",
417
+ "estimated_time": "2-4 minutes",
418
+ "workflow": [
419
+ {
420
+ "step": 1,
421
+ "action": "Quick Content Survey",
422
+ "tool": "intelligent_discovery",
423
+ "params": {
424
+ "discovery_goal": "understand_content",
425
+ "depth": "quick",
426
+ },
427
+ "purpose": "Rapid overview of available information",
428
+ "look_for": [
429
+ "relevant content areas",
430
+ "potential information sources",
431
+ ],
432
+ },
433
+ {
434
+ "step": 2,
435
+ "action": "Targeted Search",
436
+ "tool": "auto_smart_search",
437
+ "params": {
438
+ "query": "REPLACE_WITH_PROBLEM_KEYWORDS",
439
+ "limit": 10,
440
+ },
441
+ "purpose": "Find directly relevant information",
442
+ "look_for": [
443
+ "directly applicable content",
444
+ "related information",
445
+ "context clues",
446
+ ],
447
+ },
448
+ {
449
+ "step": 3,
450
+ "action": "Related Content Discovery",
451
+ "tool": "auto_semantic_search",
452
+ "params": {
453
+ "query": "REPLACE_WITH_CONCEPTUAL_TERMS",
454
+ "similarity_threshold": 0.3,
455
+ },
456
+ "purpose": "Find conceptually related information",
457
+ "look_for": [
458
+ "broader context",
459
+ "related concepts",
460
+ "background information",
461
+ ],
462
+ },
463
+ {
464
+ "step": 4,
465
+ "action": "Information Gap Analysis",
466
+ "tool": "explore_tables",
467
+ "params": {"include_row_counts": True},
468
+ "purpose": "Identify what information might be missing",
469
+ "look_for": [
470
+ "information gaps",
471
+ "additional context sources",
472
+ "related data",
473
+ ],
474
+ },
475
+ ],
476
+ "customization_note": "Replace REPLACE_WITH_PROBLEM_KEYWORDS and REPLACE_WITH_CONCEPTUAL_TERMS with your specific problem terms",
477
+ "success_criteria": [
478
+ "Found directly relevant information",
479
+ "Identified related/contextual information",
480
+ "Understand what information might be missing",
481
+ "Have clear next steps for problem resolution",
482
+ ],
483
+ },
484
+ }
485
+
486
+ if template_type not in templates:
487
+ available_templates = list(templates.keys())
488
+ return cast(
489
+ ToolResponse,
490
+ {
491
+ "success": False,
492
+ "error": f"Template '{template_type}' not found",
493
+ "category": "TEMPLATE_ERROR",
494
+ "details": {
495
+ "available_templates": available_templates,
496
+ "requested_template": template_type,
497
+ },
498
+ },
499
+ )
500
+
501
+ template = templates[template_type]
502
+
503
+ # Customize template if requested
504
+ if customize_for:
505
+ template = _customize_template(template, customize_for)
506
+
507
+ return cast(
508
+ ToolResponse,
509
+ {
510
+ "success": True,
511
+ "template": template,
512
+ "template_type": template_type,
513
+ "customized_for": customize_for,
514
+ "available_templates": list(templates.keys()),
515
+ "usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation",
516
+ },
517
+ )
518
+
519
+ except Exception as e:
520
+ return cast(
521
+ ToolResponse,
522
+ {
523
+ "success": False,
524
+ "error": f"Discovery template generation failed: {str(e)}",
525
+ "category": "TEMPLATE_ERROR",
526
+ "details": {
527
+ "template_type": template_type,
528
+ "customize_for": customize_for,
529
+ },
530
+ },
531
+ )
532
+
533
+
534
+ @catch_errors
535
+ def discover_relationships(
536
+ table_name: Optional[str] = None,
537
+ relationship_types: List[str] = [
538
+ "foreign_keys",
539
+ "semantic_similarity",
540
+ "temporal_patterns",
541
+ ],
542
+ similarity_threshold: float = 0.6,
543
+ ) -> ToolResponse:
544
+ """
545
+ 🔗 **RELATIONSHIP DISCOVERY** - Find hidden connections in your data!
546
+
547
+ Automatically discovers relationships between tables and content areas using
548
+ both structural analysis and semantic similarity to reveal data connections.
549
+
550
+ Args:
551
+ table_name (Optional[str]): Focus on relationships for specific table (default: all)
552
+ relationship_types (List[str]): Types of relationships to discover
553
+ - "foreign_keys": Structural relationships via foreign keys
554
+ - "semantic_similarity": Content-based relationships via semantic analysis
555
+ - "temporal_patterns": Time-based relationships and patterns
556
+ - "naming_patterns": Relationships based on naming conventions
557
+ similarity_threshold (float): Minimum similarity for semantic relationships (0.0-1.0)
558
+
559
+ Returns:
560
+ ToolResponse: {"success": True, "relationships": Dict, "insights": List}
561
+
562
+ Examples:
563
+ >>> discover_relationships("users")
564
+ {"success": True, "relationships": {
565
+ "users": {
566
+ "foreign_key_refs": ["posts.user_id", "comments.user_id"],
567
+ "semantic_similar": [{"table": "profiles", "similarity": 0.8}],
568
+ "temporal_related": ["user_sessions"]
569
+ }
570
+ }}
571
+
572
+ FastMCP Tool Info:
573
+ - **AUTOMATIC DETECTION**: Finds relationships you might not notice manually
574
+ - **MULTIPLE METHODS**: Combines structural, semantic, and temporal analysis
575
+ - **ACTIONABLE INSIGHTS**: Suggests how to leverage discovered relationships
576
+ - **PERFECT FOR EXPLORATION**: Reveals hidden data organization patterns
577
+ """
578
+ try:
579
+ from .. import server
580
+
581
+ db = get_database(server.DB_PATH)
582
+
583
+ # Get all tables or focus on specific table
584
+ tables_result = db.list_tables()
585
+ if not tables_result.get("success"):
586
+ return cast(ToolResponse, tables_result)
587
+
588
+ all_tables = tables_result.get("tables", [])
589
+ target_tables = [table_name] if table_name else all_tables
590
+
591
+ relationships = {}
592
+ insights = []
593
+
594
+ for target_table in target_tables:
595
+ if target_table not in all_tables:
596
+ continue
597
+
598
+ table_relationships = {
599
+ "foreign_key_refs": [],
600
+ "semantic_similar": [],
601
+ "temporal_related": [],
602
+ "naming_related": [],
603
+ }
604
+
605
+ # Discover foreign key relationships
606
+ if "foreign_keys" in relationship_types:
607
+ fk_relationships = _discover_foreign_keys(db, target_table, all_tables)
608
+ table_relationships["foreign_key_refs"] = fk_relationships
609
+ if fk_relationships:
610
+ insights.append(
611
+ f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables"
612
+ )
613
+
614
+ # Discover semantic similarity relationships
615
+ if (
616
+ "semantic_similarity" in relationship_types
617
+ and is_semantic_search_available()
618
+ ):
619
+ semantic_relationships = _discover_semantic_relationships(
620
+ db, target_table, all_tables, similarity_threshold
621
+ )
622
+ table_relationships["semantic_similar"] = semantic_relationships
623
+ if semantic_relationships:
624
+ insights.append(
625
+ f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables"
626
+ )
627
+
628
+ # Discover temporal patterns
629
+ if "temporal_patterns" in relationship_types:
630
+ temporal_relationships = _discover_temporal_relationships(
631
+ db, target_table, all_tables
632
+ )
633
+ table_relationships["temporal_related"] = temporal_relationships
634
+ if temporal_relationships:
635
+ insights.append(
636
+ f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables"
637
+ )
638
+
639
+ # Discover naming pattern relationships
640
+ if "naming_patterns" in relationship_types:
641
+ naming_relationships = _discover_naming_relationships(
642
+ target_table, all_tables
643
+ )
644
+ table_relationships["naming_related"] = naming_relationships
645
+ if naming_relationships:
646
+ insights.append(
647
+ f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables"
648
+ )
649
+
650
+ relationships[target_table] = table_relationships
651
+
652
+ # Generate relationship insights
653
+ total_relationships = sum(
654
+ len(rel["foreign_key_refs"])
655
+ + len(rel["semantic_similar"])
656
+ + len(rel["temporal_related"])
657
+ + len(rel["naming_related"])
658
+ for rel in relationships.values()
659
+ )
660
+
661
+ if total_relationships == 0:
662
+ insights.append(
663
+ "No strong relationships discovered. Consider adding more content or setting up semantic search."
664
+ )
665
+ else:
666
+ insights.append(
667
+ f"Discovered {total_relationships} total relationships across {len(relationships)} tables"
668
+ )
669
+
670
+ return cast(
671
+ ToolResponse,
672
+ {
673
+ "success": True,
674
+ "relationships": relationships,
675
+ "insights": insights,
676
+ "relationship_summary": {
677
+ "total_relationships": total_relationships,
678
+ "tables_analyzed": len(relationships),
679
+ "strongest_connections": _identify_strongest_connections(
680
+ relationships
681
+ ),
682
+ },
683
+ "recommendations": _generate_relationship_recommendations(
684
+ relationships, insights
685
+ ),
686
+ },
687
+ )
688
+
689
+ except Exception as e:
690
+ return cast(
691
+ ToolResponse,
692
+ {
693
+ "success": False,
694
+ "error": f"Relationship discovery failed: {str(e)}",
695
+ "category": "RELATIONSHIP_ERROR",
696
+ "details": {
697
+ "table_name": table_name,
698
+ "relationship_types": relationship_types,
699
+ "similarity_threshold": similarity_threshold,
700
+ },
701
+ },
702
+ )
703
+
704
+
705
+ # Helper functions for discovery orchestration
706
+
707
+
708
+ def _analyze_content_for_discovery(
709
+ db, tables: List[str], focus_area: Optional[str], depth: str
710
+ ) -> Dict[str, Any]:
711
+ """Analyze content patterns and distribution."""
712
+ content_analysis = {
713
+ "total_rows": 0,
714
+ "content_distribution": {},
715
+ "text_rich_tables": [],
716
+ "sparse_tables": [],
717
+ "high_value_tables": [],
718
+ }
719
+
720
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
721
+
722
+ for table_name in target_tables:
723
+ try:
724
+ rows_result = db.read_rows(table_name)
725
+ if rows_result.get("success"):
726
+ rows = rows_result.get("rows", [])
727
+ row_count = len(rows)
728
+ content_analysis["total_rows"] += row_count
729
+ content_analysis["content_distribution"][table_name] = row_count
730
+
731
+ # Analyze content quality if depth allows
732
+ if depth in ["moderate", "comprehensive"] and rows:
733
+ # Sample content quality
734
+ sample_size = min(3, len(rows))
735
+ total_content_length = 0
736
+
737
+ for row in rows[:sample_size]:
738
+ for value in row.values():
739
+ if isinstance(value, str):
740
+ total_content_length += len(value)
741
+
742
+ avg_content_length = (
743
+ total_content_length / sample_size if sample_size > 0 else 0
744
+ )
745
+
746
+ if avg_content_length > 200:
747
+ content_analysis["text_rich_tables"].append(table_name)
748
+ if avg_content_length > 500:
749
+ content_analysis["high_value_tables"].append(table_name)
750
+ if row_count < 5:
751
+ content_analysis["sparse_tables"].append(table_name)
752
+
753
+ except Exception:
754
+ continue
755
+
756
+ return content_analysis
757
+
758
+
759
+ def _analyze_schema_for_discovery(
760
+ db, tables: List[str], focus_area: Optional[str], depth: str
761
+ ) -> Dict[str, Any]:
762
+ """Analyze schema structure and organization."""
763
+ schema_analysis = {
764
+ "total_columns": 0,
765
+ "text_columns_by_table": {},
766
+ "well_structured_tables": [],
767
+ "schema_issues": [],
768
+ }
769
+
770
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
771
+
772
+ for table_name in target_tables:
773
+ try:
774
+ schema_result = db.describe_table(table_name)
775
+ if schema_result.get("success"):
776
+ columns = schema_result.get("columns", [])
777
+ schema_analysis["total_columns"] += len(columns)
778
+
779
+ # Find text columns
780
+ text_columns = [
781
+ col for col in columns if "TEXT" in col.get("type", "").upper()
782
+ ]
783
+ schema_analysis["text_columns_by_table"][table_name] = len(text_columns)
784
+
785
+ # Check for well-structured tables
786
+ has_id = any(col.get("name") == "id" for col in columns)
787
+ has_timestamp = any(
788
+ "timestamp" in col.get("name", "").lower() for col in columns
789
+ )
790
+ has_text_content = len(text_columns) > 0
791
+
792
+ if has_id and has_timestamp and has_text_content:
793
+ schema_analysis["well_structured_tables"].append(table_name)
794
+
795
+ # Identify schema issues
796
+ if len(columns) < 2:
797
+ schema_analysis["schema_issues"].append(
798
+ f"Table '{table_name}' has very few columns"
799
+ )
800
+ if not has_id:
801
+ schema_analysis["schema_issues"].append(
802
+ f"Table '{table_name}' lacks ID column"
803
+ )
804
+
805
+ except Exception:
806
+ continue
807
+
808
+ return schema_analysis
809
+
810
+
811
+ def _assess_content_quality(
812
+ db, tables: List[str], focus_area: Optional[str], depth: str
813
+ ) -> Dict[str, Any]:
814
+ """Assess overall content quality."""
815
+ quality_analysis = {
816
+ "quality_scores": {},
817
+ "overall_quality": 0.0,
818
+ "improvement_opportunities": [],
819
+ "quality_distribution": {"high": 0, "medium": 0, "low": 0},
820
+ }
821
+
822
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
823
+ total_score = 0
824
+ table_count = 0
825
+
826
+ for table_name in target_tables:
827
+ try:
828
+ rows_result = db.read_rows(table_name)
829
+ if rows_result.get("success"):
830
+ rows = rows_result.get("rows", [])
831
+
832
+ if not rows:
833
+ quality_analysis["quality_scores"][table_name] = 0.0
834
+ quality_analysis["improvement_opportunities"].append(
835
+ f"Table '{table_name}' is empty"
836
+ )
837
+ quality_analysis["quality_distribution"]["low"] += 1
838
+ continue
839
+
840
+ # Calculate quality score
841
+ sample_size = min(5, len(rows))
842
+ content_scores = []
843
+
844
+ for row in rows[:sample_size]:
845
+ row_score = 0
846
+ non_null_fields = sum(
847
+ 1 for v in row.values() if v is not None and str(v).strip()
848
+ )
849
+ total_content_length = sum(
850
+ len(str(v)) for v in row.values() if v is not None
851
+ )
852
+
853
+ # Score based on completeness and content richness
854
+ if non_null_fields > 2:
855
+ row_score += 3
856
+ if total_content_length > 100:
857
+ row_score += 4
858
+ if total_content_length > 500:
859
+ row_score += 3
860
+
861
+ content_scores.append(min(10, row_score))
862
+
863
+ table_quality = (
864
+ sum(content_scores) / len(content_scores) if content_scores else 0
865
+ )
866
+ quality_analysis["quality_scores"][table_name] = round(table_quality, 1)
867
+
868
+ # Categorize quality
869
+ if table_quality >= 7:
870
+ quality_analysis["quality_distribution"]["high"] += 1
871
+ elif table_quality >= 4:
872
+ quality_analysis["quality_distribution"]["medium"] += 1
873
+ else:
874
+ quality_analysis["quality_distribution"]["low"] += 1
875
+ quality_analysis["improvement_opportunities"].append(
876
+ f"Table '{table_name}' has low content quality (score: {table_quality:.1f})"
877
+ )
878
+
879
+ total_score += table_quality
880
+ table_count += 1
881
+
882
+ except Exception:
883
+ continue
884
+
885
+ quality_analysis["overall_quality"] = (
886
+ round(total_score / table_count, 1) if table_count > 0 else 0.0
887
+ )
888
+
889
+ return quality_analysis
890
+
891
+
892
+ def _analyze_search_readiness(
893
+ db, tables: List[str], focus_area: Optional[str]
894
+ ) -> Dict[str, Any]:
895
+ """Analyze readiness for effective searching."""
896
+ search_analysis = {
897
+ "semantic_ready_tables": [],
898
+ "text_searchable_tables": [],
899
+ "search_optimization_needed": [],
900
+ "embedding_coverage": {},
901
+ }
902
+
903
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
904
+
905
+ for table_name in target_tables:
906
+ try:
907
+ # Check schema for text content
908
+ schema_result = db.describe_table(table_name)
909
+ if schema_result.get("success"):
910
+ columns = schema_result.get("columns", [])
911
+ text_columns = [
912
+ col for col in columns if "TEXT" in col.get("type", "").upper()
913
+ ]
914
+
915
+ if text_columns:
916
+ search_analysis["text_searchable_tables"].append(table_name)
917
+
918
+ # Check semantic search readiness if available
919
+ if is_semantic_search_available():
920
+ embedding_stats = db.get_embedding_stats(table_name)
921
+ if embedding_stats.get("success"):
922
+ coverage = embedding_stats.get("coverage_percent", 0)
923
+ search_analysis["embedding_coverage"][table_name] = coverage
924
+
925
+ if coverage > 80:
926
+ search_analysis["semantic_ready_tables"].append(
927
+ table_name
928
+ )
929
+ elif len(text_columns) > 0:
930
+ search_analysis["search_optimization_needed"].append(
931
+ table_name
932
+ )
933
+
934
+ except Exception:
935
+ continue
936
+
937
+ return search_analysis
938
+
939
+
940
+ def _generate_discovery_insights(
941
+ discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str
942
+ ) -> tuple:
943
+ """Generate insights and recommendations based on discovery results."""
944
+ insights = []
945
+ recommendations = []
946
+ next_steps = []
947
+
948
+ total_tables = overview.get("total_tables", 0)
949
+ total_rows = overview.get("total_rows", 0)
950
+
951
+ # Goal-specific insights
952
+ if discovery_goal == "understand_content":
953
+ insights.append(
954
+ f"Memory bank contains {total_tables} tables with {total_rows} total rows"
955
+ )
956
+
957
+ high_value_tables = overview.get("high_value_tables", [])
958
+ if high_value_tables:
959
+ insights.append(
960
+ f"High-value content found in: {', '.join(high_value_tables[:3])}"
961
+ )
962
+ recommendations.append(
963
+ f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}"
964
+ )
965
+ next_steps.append(
966
+ f"Use auto_smart_search() to explore content in {high_value_tables[0]}"
967
+ )
968
+
969
+ sparse_tables = overview.get("sparse_tables", [])
970
+ if sparse_tables:
971
+ insights.append(f"Sparse tables detected: {', '.join(sparse_tables)}")
972
+ recommendations.append("Consider consolidating or expanding sparse tables")
973
+
974
+ elif discovery_goal == "find_patterns":
975
+ text_rich_tables = overview.get("text_rich_tables", [])
976
+ if text_rich_tables:
977
+ insights.append(
978
+ f"Text-rich content found in {len(text_rich_tables)} tables"
979
+ )
980
+ next_steps.append("Use semantic search to find content patterns")
981
+
982
+ quality_scores = overview.get("quality_scores", {})
983
+ if quality_scores:
984
+ avg_quality = sum(quality_scores.values()) / len(quality_scores)
985
+ insights.append(f"Average content quality: {avg_quality:.1f}/10")
986
+
987
+ elif discovery_goal == "explore_structure":
988
+ well_structured = overview.get("well_structured_tables", [])
989
+ if well_structured:
990
+ insights.append(f"Well-structured tables: {', '.join(well_structured)}")
991
+ recommendations.append("Use well-structured tables as primary data sources")
992
+
993
+ schema_issues = overview.get("schema_issues", [])
994
+ if schema_issues:
995
+ insights.extend(schema_issues[:3]) # Show first 3 issues
996
+
997
+ elif discovery_goal == "assess_quality":
998
+ overall_quality = overview.get("overall_quality", 0)
999
+ insights.append(f"Overall content quality score: {overall_quality}/10")
1000
+
1001
+ improvement_opportunities = overview.get("improvement_opportunities", [])
1002
+ recommendations.extend(improvement_opportunities[:3])
1003
+
1004
+ elif discovery_goal == "prepare_search":
1005
+ semantic_ready = overview.get("semantic_ready_tables", [])
1006
+ optimization_needed = overview.get("search_optimization_needed", [])
1007
+
1008
+ if semantic_ready:
1009
+ insights.append(f"Semantic search ready for {len(semantic_ready)} tables")
1010
+ next_steps.append("Use auto_semantic_search() for conceptual queries")
1011
+
1012
+ if optimization_needed:
1013
+ insights.append(
1014
+ f"Search optimization needed for {len(optimization_needed)} tables"
1015
+ )
1016
+ next_steps.append(
1017
+ f"Set up embeddings for: {', '.join(optimization_needed[:2])}"
1018
+ )
1019
+
1020
+ # Universal recommendations
1021
+ if overview.get("semantic_search_available"):
1022
+ recommendations.append("Use auto_smart_search() for best search results")
1023
+ else:
1024
+ recommendations.append(
1025
+ "Install sentence-transformers for semantic search capabilities"
1026
+ )
1027
+
1028
+ if not next_steps:
1029
+ next_steps.append("Use explore_tables() for detailed content examination")
1030
+ next_steps.append("Try auto_smart_search() to find specific information")
1031
+
1032
+ return insights, recommendations, next_steps
1033
+
1034
+
1035
+ def _generate_quick_actions(
1036
+ discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]
1037
+ ) -> List[Dict[str, Any]]:
1038
+ """Generate quick action suggestions."""
1039
+ actions = []
1040
+
1041
+ high_value_tables = overview.get("high_value_tables", [])
1042
+
1043
+ if discovery_goal == "understand_content" and high_value_tables:
1044
+ actions.append(
1045
+ {
1046
+ "action": "Explore High-Value Content",
1047
+ "tool": "read_rows",
1048
+ "params": {"table_name": high_value_tables[0]},
1049
+ "description": f"Examine content in {high_value_tables[0]} table",
1050
+ }
1051
+ )
1052
+
1053
+ if overview.get("semantic_search_available"):
1054
+ actions.append(
1055
+ {
1056
+ "action": "Smart Search",
1057
+ "tool": "auto_smart_search",
1058
+ "params": {"query": "important recent information", "limit": 5},
1059
+ "description": "Find important content using intelligent search",
1060
+ }
1061
+ )
1062
+
1063
+ actions.append(
1064
+ {
1065
+ "action": "Quality Assessment",
1066
+ "tool": "get_content_health_score",
1067
+ "params": {},
1068
+ "description": "Get detailed quality metrics and recommendations",
1069
+ }
1070
+ )
1071
+
1072
+ return actions
1073
+
1074
+
1075
+ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
1076
+ """Store discovery pattern for learning (if agent learning table exists)."""
1077
+ try:
1078
+ # Check if discovery_patterns table exists
1079
+ tables_result = db.list_tables()
1080
+ if tables_result.get("success") and "discovery_patterns" in tables_result.get(
1081
+ "tables", []
1082
+ ):
1083
+ # Store the discovery session
1084
+ db.insert_row(
1085
+ "discovery_patterns",
1086
+ {
1087
+ "agent_id": discovery_session.get("agent_id"),
1088
+ "goal": discovery_session.get("goal"),
1089
+ "focus_area": discovery_session.get("focus_area"),
1090
+ "depth": discovery_session.get("depth"),
1091
+ "steps_completed": str(
1092
+ discovery_session.get("steps_completed", [])
1093
+ ),
1094
+ "success": True,
1095
+ "timestamp": discovery_session.get("timestamp"),
1096
+ },
1097
+ )
1098
+ except Exception:
1099
+ # Silently fail if learning storage isn't available
1100
+ pass
1101
+
1102
+
1103
+ def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[str, Any]:
1104
+ """Customize template for specific domain or topic."""
1105
+ customized = template.copy()
1106
+
1107
+ # Add customization note
1108
+ customized["customized_for"] = customize_for
1109
+ customized["customization_note"] = f"Template customized for: {customize_for}"
1110
+
1111
+ # Modify search queries in workflow to include customization
1112
+ for step in customized.get("workflow", []):
1113
+ if step.get("tool") in [
1114
+ "auto_smart_search",
1115
+ "auto_semantic_search",
1116
+ "search_content",
1117
+ ]:
1118
+ params = step.get("params", {})
1119
+ if "query" in params and params["query"].startswith("REPLACE_WITH"):
1120
+ # Keep the placeholder for user customization
1121
+ continue
1122
+ elif "query" in params:
1123
+ # Add customization to existing query
1124
+ params["query"] = f"{customize_for} {params['query']}"
1125
+
1126
+ return customized
1127
+
1128
+
1129
+ # Relationship discovery helper functions
1130
+
1131
+
1132
+ def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List[str]:
1133
+ """Discover foreign key relationships."""
1134
+ relationships = []
1135
+
1136
+ try:
1137
+ # Get target table schema
1138
+ target_schema = db.describe_table(target_table)
1139
+ if not target_schema.get("success"):
1140
+ return relationships
1141
+
1142
+ target_columns = target_schema.get("columns", [])
1143
+ target_col_names = [col.get("name", "") for col in target_columns]
1144
+
1145
+ # Check other tables for potential foreign key references
1146
+ for other_table in all_tables:
1147
+ if other_table == target_table:
1148
+ continue
1149
+
1150
+ try:
1151
+ other_schema = db.describe_table(other_table)
1152
+ if other_schema.get("success"):
1153
+ other_columns = other_schema.get("columns", [])
1154
+
1155
+ for col in other_columns:
1156
+ col_name = col.get("name", "")
1157
+ # Look for naming patterns that suggest foreign keys
1158
+ if col_name.endswith("_id") or col_name.endswith("Id"):
1159
+ potential_ref = col_name.replace("_id", "").replace(
1160
+ "Id", ""
1161
+ )
1162
+ if (
1163
+ potential_ref == target_table
1164
+ or f"{potential_ref}s" == target_table
1165
+ ):
1166
+ relationships.append(f"{other_table}.{col_name}")
1167
+
1168
+ # Look for exact column name matches (potential shared keys)
1169
+ if col_name in target_col_names and col_name != "id":
1170
+ relationships.append(
1171
+ f"{other_table}.{col_name} (shared key)"
1172
+ )
1173
+
1174
+ except Exception:
1175
+ continue
1176
+
1177
+ except Exception:
1178
+ pass
1179
+
1180
+ return relationships
1181
+
1182
+
1183
+ def _discover_semantic_relationships(
1184
+ db, target_table: str, all_tables: List[str], threshold: float
1185
+ ) -> List[Dict[str, Any]]:
1186
+ """Discover semantic similarity relationships."""
1187
+ relationships = []
1188
+
1189
+ if not is_semantic_search_available():
1190
+ return relationships
1191
+
1192
+ try:
1193
+ # Get sample content from target table
1194
+ target_rows = db.read_rows(target_table)
1195
+ if not target_rows.get("success") or not target_rows.get("rows"):
1196
+ return relationships
1197
+
1198
+ # Create a sample query from target table content
1199
+ sample_row = target_rows["rows"][0]
1200
+ sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[
1201
+ :200
1202
+ ]
1203
+
1204
+ if len(sample_text.strip()) < 10:
1205
+ return relationships
1206
+
1207
+ # Search for similar content in other tables
1208
+ for other_table in all_tables:
1209
+ if other_table == target_table:
1210
+ continue
1211
+
1212
+ try:
1213
+ # Try semantic search in the other table
1214
+ search_result = db.semantic_search(
1215
+ sample_text,
1216
+ [other_table],
1217
+ "embedding",
1218
+ None,
1219
+ threshold,
1220
+ 3,
1221
+ "all-MiniLM-L6-v2",
1222
+ )
1223
+
1224
+ if search_result.get("success") and search_result.get("results"):
1225
+ results = search_result["results"]
1226
+ avg_similarity = sum(
1227
+ r.get("similarity_score", 0) for r in results
1228
+ ) / len(results)
1229
+
1230
+ if avg_similarity >= threshold:
1231
+ relationships.append(
1232
+ {
1233
+ "table": other_table,
1234
+ "similarity": round(avg_similarity, 2),
1235
+ "related_content_count": len(results),
1236
+ }
1237
+ )
1238
+
1239
+ except Exception:
1240
+ continue
1241
+
1242
+ except Exception:
1243
+ pass
1244
+
1245
+ return relationships
1246
+
1247
+
1248
+ def _discover_temporal_relationships(
1249
+ db, target_table: str, all_tables: List[str]
1250
+ ) -> List[str]:
1251
+ """Discover temporal pattern relationships."""
1252
+ relationships = []
1253
+
1254
+ try:
1255
+ # Check if target table has timestamp columns
1256
+ target_schema = db.describe_table(target_table)
1257
+ if not target_schema.get("success"):
1258
+ return relationships
1259
+
1260
+ target_columns = target_schema.get("columns", [])
1261
+ target_has_timestamp = any(
1262
+ "timestamp" in col.get("name", "").lower()
1263
+ or "date" in col.get("name", "").lower()
1264
+ or "time" in col.get("name", "").lower()
1265
+ for col in target_columns
1266
+ )
1267
+
1268
+ if not target_has_timestamp:
1269
+ return relationships
1270
+
1271
+ # Check other tables for similar timestamp patterns
1272
+ for other_table in all_tables:
1273
+ if other_table == target_table:
1274
+ continue
1275
+
1276
+ try:
1277
+ other_schema = db.describe_table(other_table)
1278
+ if other_schema.get("success"):
1279
+ other_columns = other_schema.get("columns", [])
1280
+ other_has_timestamp = any(
1281
+ "timestamp" in col.get("name", "").lower()
1282
+ or "date" in col.get("name", "").lower()
1283
+ or "time" in col.get("name", "").lower()
1284
+ for col in other_columns
1285
+ )
1286
+
1287
+ if other_has_timestamp:
1288
+ relationships.append(other_table)
1289
+
1290
+ except Exception:
1291
+ continue
1292
+
1293
+ except Exception:
1294
+ pass
1295
+
1296
+ return relationships
1297
+
1298
+
1299
+ def _discover_naming_relationships(
1300
+ target_table: str, all_tables: List[str]
1301
+ ) -> List[str]:
1302
+ """Discover relationships based on naming conventions."""
1303
+ relationships = []
1304
+
1305
+ # Look for tables with similar names or naming patterns
1306
+ target_lower = target_table.lower()
1307
+
1308
+ for other_table in all_tables:
1309
+ if other_table == target_table:
1310
+ continue
1311
+
1312
+ other_lower = other_table.lower()
1313
+
1314
+ # Check for plural/singular relationships
1315
+ if (target_lower.endswith("s") and other_lower == target_lower[:-1]) or (
1316
+ other_lower.endswith("s") and target_lower == other_lower[:-1]
1317
+ ):
1318
+ relationships.append(other_table)
1319
+ continue
1320
+
1321
+ # Check for common prefixes or suffixes
1322
+ if len(target_lower) > 3 and len(other_lower) > 3:
1323
+ # Common prefix (at least 4 characters)
1324
+ if target_lower[:4] == other_lower[:4]:
1325
+ relationships.append(other_table)
1326
+ continue
1327
+
1328
+ # Common suffix (at least 4 characters)
1329
+ if target_lower[-4:] == other_lower[-4:]:
1330
+ relationships.append(other_table)
1331
+ continue
1332
+
1333
+ # Check for semantic name relationships
1334
+ name_words = set(target_lower.split("_"))
1335
+ other_words = set(other_lower.split("_"))
1336
+
1337
+ # If tables share significant word overlap
1338
+ if len(name_words.intersection(other_words)) > 0:
1339
+ relationships.append(other_table)
1340
+
1341
+ return relationships
1342
+
1343
+
1344
+ def _identify_strongest_connections(
1345
+ relationships: Dict[str, Any],
1346
+ ) -> List[Dict[str, Any]]:
1347
+ """Identify the strongest connections across all relationships."""
1348
+ connections = []
1349
+
1350
+ for table, rels in relationships.items():
1351
+ # Count total connections for this table
1352
+ total_connections = (
1353
+ len(rels.get("foreign_key_refs", []))
1354
+ + len(rels.get("semantic_similar", []))
1355
+ + len(rels.get("temporal_related", []))
1356
+ + len(rels.get("naming_related", []))
1357
+ )
1358
+
1359
+ if total_connections > 0:
1360
+ connections.append(
1361
+ {
1362
+ "table": table,
1363
+ "total_connections": total_connections,
1364
+ "connection_types": {
1365
+ "structural": len(rels.get("foreign_key_refs", [])),
1366
+ "semantic": len(rels.get("semantic_similar", [])),
1367
+ "temporal": len(rels.get("temporal_related", [])),
1368
+ "naming": len(rels.get("naming_related", [])),
1369
+ },
1370
+ }
1371
+ )
1372
+
1373
+ # Sort by total connections and return top 5
1374
+ connections.sort(key=lambda x: x["total_connections"], reverse=True)
1375
+ return connections[:5]
1376
+
1377
+
1378
+ def _generate_relationship_recommendations(
1379
+ relationships: Dict[str, Any], insights: List[str]
1380
+ ) -> List[str]:
1381
+ """Generate actionable recommendations based on discovered relationships."""
1382
+ recommendations = []
1383
+
1384
+ # Find tables with many connections
1385
+ highly_connected = []
1386
+ for table, rels in relationships.items():
1387
+ total_connections = (
1388
+ len(rels.get("foreign_key_refs", []))
1389
+ + len(rels.get("semantic_similar", []))
1390
+ + len(rels.get("temporal_related", []))
1391
+ + len(rels.get("naming_related", []))
1392
+ )
1393
+ if total_connections >= 3:
1394
+ highly_connected.append(table)
1395
+
1396
+ if highly_connected:
1397
+ recommendations.append(
1398
+ f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}"
1399
+ )
1400
+
1401
+ # Find tables with semantic relationships
1402
+ semantic_tables = []
1403
+ for table, rels in relationships.items():
1404
+ if rels.get("semantic_similar"):
1405
+ semantic_tables.append(table)
1406
+
1407
+ if semantic_tables:
1408
+ recommendations.append(
1409
+ f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}"
1410
+ )
1411
+
1412
+ # Find tables with temporal relationships
1413
+ temporal_tables = []
1414
+ for table, rels in relationships.items():
1415
+ if rels.get("temporal_related"):
1416
+ temporal_tables.append(table)
1417
+
1418
+ if temporal_tables:
1419
+ recommendations.append(
1420
+ f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}"
1421
+ )
1422
+
1423
+ if not recommendations:
1424
+ recommendations.append(
1425
+ "Consider adding more structured relationships or content to improve discoverability"
1426
+ )
1427
+
1428
+ return recommendations