mcp-sqlite-memory-bank 1.5.0__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1176 @@
1
+ """
2
+ Discovery and Exploration Tools for SQLite Memory Bank
3
+ =====================================================
4
+
5
+ This module contains advanced discovery tools that help LLM agents intelligently
6
+ explore and understand memory bank content through guided workflows and orchestrated
7
+ discovery processes.
8
+
9
+ Author: Robert Meisner
10
+ """
11
+
12
+ import logging
13
+ from typing import Any, Dict, List, Optional, cast, Union
14
+ from datetime import datetime
15
+
16
+ from ..database import get_database
17
+ from ..semantic import is_semantic_search_available
18
+ from ..types import ToolResponse
19
+ from ..utils import catch_errors
20
+
21
+
22
+ @catch_errors
23
+ def intelligent_discovery(
24
+ discovery_goal: str = "understand_content",
25
+ focus_area: Optional[str] = None,
26
+ depth: str = "moderate",
27
+ agent_id: Optional[str] = None,
28
+ ) -> ToolResponse:
29
+ """
30
+ 🧠 **INTELLIGENT DISCOVERY** - AI-guided exploration of your memory bank!
31
+
32
+ Orchestrates multiple discovery tools based on your exploration goals.
33
+ Provides step-by-step guidance and actionable insights tailored to your needs.
34
+
35
+ Args:
36
+ discovery_goal (str): What you want to achieve
37
+ - "understand_content": Learn what data is available and how it's organized
38
+ - "find_patterns": Discover themes, relationships, and content patterns
39
+ - "explore_structure": Understand database schema and organization
40
+ - "assess_quality": Evaluate content quality and completeness
41
+ - "prepare_search": Get ready for effective content searching
42
+ focus_area (Optional[str]): Specific table or topic to focus on (default: all)
43
+ depth (str): How thorough the discovery should be
44
+ - "quick": Fast overview with key insights
45
+ - "moderate": Balanced analysis with actionable recommendations
46
+ - "comprehensive": Deep dive with detailed analysis
47
+ agent_id (Optional[str]): Agent identifier for learning discovery patterns
48
+
49
+ Returns:
50
+ ToolResponse: On success: {"success": True, "discovery": Dict, "next_steps": List}
51
+ On error: {"success": False, "error": str, "category": str, "details": dict}
52
+
53
+ Examples:
54
+ >>> intelligent_discovery("understand_content")
55
+ {"success": True, "discovery": {
56
+ "overview": {"total_tables": 5, "total_rows": 234},
57
+ "content_summary": {...},
58
+ "recommendations": [...]
59
+ }, "next_steps": ["Use auto_smart_search() for specific queries"]}
60
+
61
+ >>> intelligent_discovery("find_patterns", focus_area="technical_decisions")
62
+ {"success": True, "discovery": {
63
+ "patterns": {"decision_themes": [...], "temporal_trends": [...]},
64
+ "insights": [...]
65
+ }}
66
+
67
+ FastMCP Tool Info:
68
+ - **COMPLETELY AUTOMATED**: No manual tool chaining required
69
+ - **GOAL-ORIENTED**: Tailored discovery based on your specific objectives
70
+ - **ACTIONABLE INSIGHTS**: Always includes concrete next steps
71
+ - **LEARNING**: Improves recommendations based on usage patterns
72
+ - **PERFECT FOR AGENTS**: Single tool that orchestrates complex discovery workflows
73
+ """
74
+ try:
75
+ from .. import server
76
+ db = get_database(server.DB_PATH)
77
+
78
+ # Initialize discovery session
79
+ discovery_session = {
80
+ "goal": discovery_goal,
81
+ "focus_area": focus_area,
82
+ "depth": depth,
83
+ "timestamp": datetime.now().isoformat(),
84
+ "agent_id": agent_id,
85
+ "steps_completed": [],
86
+ "insights": [],
87
+ "recommendations": []
88
+ }
89
+
90
+ # Step 1: Basic overview
91
+ discovery_session["steps_completed"].append("basic_overview")
92
+ tables_result = db.list_tables()
93
+ if not tables_result.get("success"):
94
+ return cast(ToolResponse, {
95
+ "success": False,
96
+ "error": "Failed to get basic overview",
97
+ "category": "DISCOVERY_ERROR",
98
+ "details": tables_result
99
+ })
100
+
101
+ tables = tables_result.get("tables", [])
102
+ overview = {
103
+ "total_tables": len(tables),
104
+ "available_tables": tables,
105
+ "semantic_search_available": is_semantic_search_available()
106
+ }
107
+
108
+ # Step 2: Content analysis based on goal
109
+ if discovery_goal in ["understand_content", "find_patterns", "assess_quality"]:
110
+ discovery_session["steps_completed"].append("content_analysis")
111
+ content_analysis = _analyze_content_for_discovery(db, tables, focus_area, depth)
112
+ overview.update(content_analysis)
113
+
114
+ # Step 3: Schema analysis for structure exploration
115
+ if discovery_goal in ["explore_structure", "understand_content"]:
116
+ discovery_session["steps_completed"].append("schema_analysis")
117
+ schema_analysis = _analyze_schema_for_discovery(db, tables, focus_area, depth)
118
+ overview.update(schema_analysis)
119
+
120
+ # Step 4: Quality assessment
121
+ if discovery_goal in ["assess_quality", "find_patterns"]:
122
+ discovery_session["steps_completed"].append("quality_assessment")
123
+ quality_analysis = _assess_content_quality(db, tables, focus_area, depth)
124
+ overview.update(quality_analysis)
125
+
126
+ # Step 5: Search readiness for search preparation
127
+ if discovery_goal in ["prepare_search", "understand_content"]:
128
+ discovery_session["steps_completed"].append("search_readiness")
129
+ search_analysis = _analyze_search_readiness(db, tables, focus_area)
130
+ overview.update(search_analysis)
131
+
132
+ # Step 6: Generate insights and recommendations
133
+ insights, recommendations, next_steps = _generate_discovery_insights(
134
+ discovery_goal, overview, focus_area, depth
135
+ )
136
+
137
+ discovery_session["insights"] = insights
138
+ discovery_session["recommendations"] = recommendations
139
+
140
+ # Step 7: Store discovery pattern for learning (if agent_id provided)
141
+ if agent_id:
142
+ _store_discovery_pattern(db, discovery_session)
143
+
144
+ return cast(ToolResponse, {
145
+ "success": True,
146
+ "discovery": {
147
+ "goal": discovery_goal,
148
+ "overview": overview,
149
+ "insights": insights,
150
+ "recommendations": recommendations,
151
+ "focus_area": focus_area,
152
+ "depth": depth,
153
+ "steps_completed": discovery_session["steps_completed"]
154
+ },
155
+ "next_steps": next_steps,
156
+ "discovery_session": discovery_session,
157
+ "quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area)
158
+ })
159
+
160
+ except Exception as e:
161
+ return cast(ToolResponse, {
162
+ "success": False,
163
+ "error": f"Intelligent discovery failed: {str(e)}",
164
+ "category": "DISCOVERY_ERROR",
165
+ "details": {
166
+ "goal": discovery_goal,
167
+ "focus_area": focus_area,
168
+ "depth": depth,
169
+ "agent_id": agent_id
170
+ }
171
+ })
172
+
173
+
174
+ @catch_errors
175
+ def discovery_templates(
176
+ template_type: str = "first_time_exploration",
177
+ customize_for: Optional[str] = None
178
+ ) -> ToolResponse:
179
+ """
180
+ 📋 **DISCOVERY TEMPLATES** - Pre-built exploration workflows for common scenarios!
181
+
182
+ Provides step-by-step discovery templates optimized for specific agent use cases.
183
+ Each template includes the exact sequence of tools to call and what to look for.
184
+
185
+ Args:
186
+ template_type (str): Type of discovery template to provide
187
+ - "first_time_exploration": Complete workflow for new agents
188
+ - "content_audit": Systematic content quality review
189
+ - "search_optimization": Prepare memory bank for optimal searching
190
+ - "relationship_mapping": Discover connections between data
191
+ - "problem_solving": Find information to solve specific problems
192
+ - "knowledge_extraction": Extract insights from stored knowledge
193
+ customize_for (Optional[str]): Customize template for specific domain/topic
194
+
195
+ Returns:
196
+ ToolResponse: {"success": True, "template": Dict, "workflow": List}
197
+
198
+ Examples:
199
+ >>> discovery_templates("first_time_exploration")
200
+ {"success": True, "template": {
201
+ "name": "First Time Exploration",
202
+ "description": "Complete discovery workflow for new agents",
203
+ "workflow": [
204
+ {"step": 1, "tool": "intelligent_discovery", "params": {...}},
205
+ {"step": 2, "tool": "explore_tables", "params": {...}}
206
+ ]
207
+ }}
208
+
209
+ FastMCP Tool Info:
210
+ - **PROVEN WORKFLOWS**: Battle-tested discovery sequences
211
+ - **STEP-BY-STEP GUIDANCE**: Exact tools and parameters to use
212
+ - **CUSTOMIZABLE**: Adapt templates to your specific needs
213
+ - **LEARNING-OPTIMIZED**: Based on successful discovery patterns
214
+ """
215
+ try:
216
+ templates = {
217
+ "first_time_exploration": {
218
+ "name": "First Time Exploration",
219
+ "description": "Complete discovery workflow for agents new to this memory bank",
220
+ "estimated_time": "2-3 minutes",
221
+ "workflow": [
222
+ {
223
+ "step": 1,
224
+ "action": "Get Overview",
225
+ "tool": "intelligent_discovery",
226
+ "params": {"discovery_goal": "understand_content", "depth": "moderate"},
227
+ "purpose": "Understand what data is available and how it's organized",
228
+ "look_for": ["total tables", "content types", "data volume"]
229
+ },
230
+ {
231
+ "step": 2,
232
+ "action": "Explore Structure",
233
+ "tool": "explore_tables",
234
+ "params": {"include_row_counts": True},
235
+ "purpose": "See detailed table schemas and sample data",
236
+ "look_for": ["column types", "sample content", "data relationships"]
237
+ },
238
+ {
239
+ "step": 3,
240
+ "action": "Test Search Capabilities",
241
+ "tool": "auto_smart_search",
242
+ "params": {"query": "recent important information", "limit": 5},
243
+ "purpose": "Understand search capabilities and content accessibility",
244
+ "look_for": ["search quality", "result relevance", "content types found"]
245
+ },
246
+ {
247
+ "step": 4,
248
+ "action": "Assess Quality",
249
+ "tool": "get_content_health_score",
250
+ "params": {},
251
+ "purpose": "Understand overall memory bank quality and opportunities",
252
+ "look_for": ["health score", "improvement recommendations", "strengths"]
253
+ }
254
+ ],
255
+ "success_criteria": [
256
+ "Understand what types of information are stored",
257
+ "Know which tables contain the most valuable content",
258
+ "Identify best search strategies for this memory bank",
259
+ "Have actionable next steps for productive use"
260
+ ]
261
+ },
262
+
263
+ "content_audit": {
264
+ "name": "Content Quality Audit",
265
+ "description": "Systematic review of content quality and completeness",
266
+ "estimated_time": "5-7 minutes",
267
+ "workflow": [
268
+ {
269
+ "step": 1,
270
+ "action": "Quality Assessment",
271
+ "tool": "get_content_health_score",
272
+ "params": {},
273
+ "purpose": "Get overall quality metrics and problem areas",
274
+ "look_for": ["quality scores", "problem tables", "recommendations"]
275
+ },
276
+ {
277
+ "step": 2,
278
+ "action": "Pattern Analysis",
279
+ "tool": "analyze_memory_patterns",
280
+ "params": {},
281
+ "purpose": "Identify content patterns and organizational issues",
282
+ "look_for": ["content distribution", "sparse tables", "organization gaps"]
283
+ },
284
+ {
285
+ "step": 3,
286
+ "action": "Table-by-Table Review",
287
+ "tool": "explore_tables",
288
+ "params": {"include_row_counts": True},
289
+ "purpose": "Detailed examination of each table's content",
290
+ "look_for": ["empty tables", "low-quality content", "missing data"]
291
+ },
292
+ {
293
+ "step": 4,
294
+ "action": "Search Readiness",
295
+ "tool": "intelligent_discovery",
296
+ "params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
297
+ "purpose": "Ensure content is optimally searchable",
298
+ "look_for": ["embedding coverage", "search optimization opportunities"]
299
+ }
300
+ ],
301
+ "success_criteria": [
302
+ "Identify all content quality issues",
303
+ "Have specific recommendations for improvement",
304
+ "Understand which content areas need attention",
305
+ "Know how to optimize for better searchability"
306
+ ]
307
+ },
308
+
309
+ "search_optimization": {
310
+ "name": "Search Optimization Setup",
311
+ "description": "Prepare memory bank for optimal content discovery and searching",
312
+ "estimated_time": "3-5 minutes",
313
+ "workflow": [
314
+ {
315
+ "step": 1,
316
+ "action": "Search Capability Assessment",
317
+ "tool": "intelligent_discovery",
318
+ "params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
319
+ "purpose": "Understand current search capabilities and gaps",
320
+ "look_for": ["semantic readiness", "text column identification", "embedding status"]
321
+ },
322
+ {
323
+ "step": 2,
324
+ "action": "Content Analysis for Search",
325
+ "tool": "analyze_memory_patterns",
326
+ "params": {},
327
+ "purpose": "Identify high-value content for search optimization",
328
+ "look_for": ["text-rich tables", "high-value content", "search opportunities"]
329
+ },
330
+ {
331
+ "step": 3,
332
+ "action": "Test Current Search",
333
+ "tool": "search_content",
334
+ "params": {"query": "test search capabilities", "limit": 10},
335
+ "purpose": "Baseline current search performance",
336
+ "look_for": ["search result quality", "coverage", "relevance"]
337
+ },
338
+ {
339
+ "step": 4,
340
+ "action": "Semantic Search Setup",
341
+ "tool": "auto_semantic_search",
342
+ "params": {"query": "important valuable content", "limit": 5},
343
+ "purpose": "Enable and test semantic search capabilities",
344
+ "look_for": ["automatic embedding generation", "semantic result quality"]
345
+ }
346
+ ],
347
+ "success_criteria": [
348
+ "Semantic search is enabled for key tables",
349
+ "Both keyword and semantic search work effectively",
350
+ "Search performance meets quality standards",
351
+ "Clear strategy for ongoing search optimization"
352
+ ]
353
+ },
354
+
355
+ "problem_solving": {
356
+ "name": "Problem-Solving Discovery",
357
+ "description": "Find information to solve specific problems or answer questions",
358
+ "estimated_time": "2-4 minutes",
359
+ "workflow": [
360
+ {
361
+ "step": 1,
362
+ "action": "Quick Content Survey",
363
+ "tool": "intelligent_discovery",
364
+ "params": {"discovery_goal": "understand_content", "depth": "quick"},
365
+ "purpose": "Rapid overview of available information",
366
+ "look_for": ["relevant content areas", "potential information sources"]
367
+ },
368
+ {
369
+ "step": 2,
370
+ "action": "Targeted Search",
371
+ "tool": "auto_smart_search",
372
+ "params": {"query": "REPLACE_WITH_PROBLEM_KEYWORDS", "limit": 10},
373
+ "purpose": "Find directly relevant information",
374
+ "look_for": ["directly applicable content", "related information", "context clues"]
375
+ },
376
+ {
377
+ "step": 3,
378
+ "action": "Related Content Discovery",
379
+ "tool": "auto_semantic_search",
380
+ "params": {"query": "REPLACE_WITH_CONCEPTUAL_TERMS", "similarity_threshold": 0.3},
381
+ "purpose": "Find conceptually related information",
382
+ "look_for": ["broader context", "related concepts", "background information"]
383
+ },
384
+ {
385
+ "step": 4,
386
+ "action": "Information Gap Analysis",
387
+ "tool": "explore_tables",
388
+ "params": {"include_row_counts": True},
389
+ "purpose": "Identify what information might be missing",
390
+ "look_for": ["information gaps", "additional context sources", "related data"]
391
+ }
392
+ ],
393
+ "customization_note": "Replace REPLACE_WITH_PROBLEM_KEYWORDS and REPLACE_WITH_CONCEPTUAL_TERMS with your specific problem terms",
394
+ "success_criteria": [
395
+ "Found directly relevant information",
396
+ "Identified related/contextual information",
397
+ "Understand what information might be missing",
398
+ "Have clear next steps for problem resolution"
399
+ ]
400
+ }
401
+ }
402
+
403
+ if template_type not in templates:
404
+ available_templates = list(templates.keys())
405
+ return cast(ToolResponse, {
406
+ "success": False,
407
+ "error": f"Template '{template_type}' not found",
408
+ "category": "TEMPLATE_ERROR",
409
+ "details": {
410
+ "available_templates": available_templates,
411
+ "requested_template": template_type
412
+ }
413
+ })
414
+
415
+ template = templates[template_type]
416
+
417
+ # Customize template if requested
418
+ if customize_for:
419
+ template = _customize_template(template, customize_for)
420
+
421
+ return cast(ToolResponse, {
422
+ "success": True,
423
+ "template": template,
424
+ "template_type": template_type,
425
+ "customized_for": customize_for,
426
+ "available_templates": list(templates.keys()),
427
+ "usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation"
428
+ })
429
+
430
+ except Exception as e:
431
+ return cast(ToolResponse, {
432
+ "success": False,
433
+ "error": f"Discovery template generation failed: {str(e)}",
434
+ "category": "TEMPLATE_ERROR",
435
+ "details": {"template_type": template_type, "customize_for": customize_for}
436
+ })
437
+
438
+
439
+ @catch_errors
440
+ def discover_relationships(
441
+ table_name: Optional[str] = None,
442
+ relationship_types: List[str] = ["foreign_keys", "semantic_similarity", "temporal_patterns"],
443
+ similarity_threshold: float = 0.6
444
+ ) -> ToolResponse:
445
+ """
446
+ 🔗 **RELATIONSHIP DISCOVERY** - Find hidden connections in your data!
447
+
448
+ Automatically discovers relationships between tables and content areas using
449
+ both structural analysis and semantic similarity to reveal data connections.
450
+
451
+ Args:
452
+ table_name (Optional[str]): Focus on relationships for specific table (default: all)
453
+ relationship_types (List[str]): Types of relationships to discover
454
+ - "foreign_keys": Structural relationships via foreign keys
455
+ - "semantic_similarity": Content-based relationships via semantic analysis
456
+ - "temporal_patterns": Time-based relationships and patterns
457
+ - "naming_patterns": Relationships based on naming conventions
458
+ similarity_threshold (float): Minimum similarity for semantic relationships (0.0-1.0)
459
+
460
+ Returns:
461
+ ToolResponse: {"success": True, "relationships": Dict, "insights": List}
462
+
463
+ Examples:
464
+ >>> discover_relationships("users")
465
+ {"success": True, "relationships": {
466
+ "users": {
467
+ "foreign_key_refs": ["posts.user_id", "comments.user_id"],
468
+ "semantic_similar": [{"table": "profiles", "similarity": 0.8}],
469
+ "temporal_related": ["user_sessions"]
470
+ }
471
+ }}
472
+
473
+ FastMCP Tool Info:
474
+ - **AUTOMATIC DETECTION**: Finds relationships you might not notice manually
475
+ - **MULTIPLE METHODS**: Combines structural, semantic, and temporal analysis
476
+ - **ACTIONABLE INSIGHTS**: Suggests how to leverage discovered relationships
477
+ - **PERFECT FOR EXPLORATION**: Reveals hidden data organization patterns
478
+ """
479
+ try:
480
+ from .. import server
481
+ db = get_database(server.DB_PATH)
482
+
483
+ # Get all tables or focus on specific table
484
+ tables_result = db.list_tables()
485
+ if not tables_result.get("success"):
486
+ return cast(ToolResponse, tables_result)
487
+
488
+ all_tables = tables_result.get("tables", [])
489
+ target_tables = [table_name] if table_name else all_tables
490
+
491
+ relationships = {}
492
+ insights = []
493
+
494
+ for target_table in target_tables:
495
+ if target_table not in all_tables:
496
+ continue
497
+
498
+ table_relationships = {
499
+ "foreign_key_refs": [],
500
+ "semantic_similar": [],
501
+ "temporal_related": [],
502
+ "naming_related": []
503
+ }
504
+
505
+ # Discover foreign key relationships
506
+ if "foreign_keys" in relationship_types:
507
+ fk_relationships = _discover_foreign_keys(db, target_table, all_tables)
508
+ table_relationships["foreign_key_refs"] = fk_relationships
509
+ if fk_relationships:
510
+ insights.append(f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables")
511
+
512
+ # Discover semantic similarity relationships
513
+ if "semantic_similarity" in relationship_types and is_semantic_search_available():
514
+ semantic_relationships = _discover_semantic_relationships(
515
+ db, target_table, all_tables, similarity_threshold
516
+ )
517
+ table_relationships["semantic_similar"] = semantic_relationships
518
+ if semantic_relationships:
519
+ insights.append(f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables")
520
+
521
+ # Discover temporal patterns
522
+ if "temporal_patterns" in relationship_types:
523
+ temporal_relationships = _discover_temporal_relationships(db, target_table, all_tables)
524
+ table_relationships["temporal_related"] = temporal_relationships
525
+ if temporal_relationships:
526
+ insights.append(f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables")
527
+
528
+ # Discover naming pattern relationships
529
+ if "naming_patterns" in relationship_types:
530
+ naming_relationships = _discover_naming_relationships(target_table, all_tables)
531
+ table_relationships["naming_related"] = naming_relationships
532
+ if naming_relationships:
533
+ insights.append(f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables")
534
+
535
+ relationships[target_table] = table_relationships
536
+
537
+ # Generate relationship insights
538
+ total_relationships = sum(
539
+ len(rel["foreign_key_refs"]) + len(rel["semantic_similar"]) +
540
+ len(rel["temporal_related"]) + len(rel["naming_related"])
541
+ for rel in relationships.values()
542
+ )
543
+
544
+ if total_relationships == 0:
545
+ insights.append("No strong relationships discovered. Consider adding more content or setting up semantic search.")
546
+ else:
547
+ insights.append(f"Discovered {total_relationships} total relationships across {len(relationships)} tables")
548
+
549
+ return cast(ToolResponse, {
550
+ "success": True,
551
+ "relationships": relationships,
552
+ "insights": insights,
553
+ "relationship_summary": {
554
+ "total_relationships": total_relationships,
555
+ "tables_analyzed": len(relationships),
556
+ "strongest_connections": _identify_strongest_connections(relationships)
557
+ },
558
+ "recommendations": _generate_relationship_recommendations(relationships, insights)
559
+ })
560
+
561
+ except Exception as e:
562
+ return cast(ToolResponse, {
563
+ "success": False,
564
+ "error": f"Relationship discovery failed: {str(e)}",
565
+ "category": "RELATIONSHIP_ERROR",
566
+ "details": {
567
+ "table_name": table_name,
568
+ "relationship_types": relationship_types,
569
+ "similarity_threshold": similarity_threshold
570
+ }
571
+ })
572
+
573
+
574
+ # Helper functions for discovery orchestration
575
+
576
+ def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
577
+ """Analyze content patterns and distribution."""
578
+ content_analysis = {
579
+ "total_rows": 0,
580
+ "content_distribution": {},
581
+ "text_rich_tables": [],
582
+ "sparse_tables": [],
583
+ "high_value_tables": []
584
+ }
585
+
586
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
587
+
588
+ for table_name in target_tables:
589
+ try:
590
+ rows_result = db.read_rows(table_name)
591
+ if rows_result.get("success"):
592
+ rows = rows_result.get("rows", [])
593
+ row_count = len(rows)
594
+ content_analysis["total_rows"] += row_count
595
+ content_analysis["content_distribution"][table_name] = row_count
596
+
597
+ # Analyze content quality if depth allows
598
+ if depth in ["moderate", "comprehensive"] and rows:
599
+ # Sample content quality
600
+ sample_size = min(3, len(rows))
601
+ total_content_length = 0
602
+
603
+ for row in rows[:sample_size]:
604
+ for value in row.values():
605
+ if isinstance(value, str):
606
+ total_content_length += len(value)
607
+
608
+ avg_content_length = total_content_length / sample_size if sample_size > 0 else 0
609
+
610
+ if avg_content_length > 200:
611
+ content_analysis["text_rich_tables"].append(table_name)
612
+ if avg_content_length > 500:
613
+ content_analysis["high_value_tables"].append(table_name)
614
+ if row_count < 5:
615
+ content_analysis["sparse_tables"].append(table_name)
616
+
617
+ except Exception:
618
+ continue
619
+
620
+ return content_analysis
621
+
622
+
623
+ def _analyze_schema_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
624
+ """Analyze schema structure and organization."""
625
+ schema_analysis = {
626
+ "total_columns": 0,
627
+ "text_columns_by_table": {},
628
+ "well_structured_tables": [],
629
+ "schema_issues": []
630
+ }
631
+
632
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
633
+
634
+ for table_name in target_tables:
635
+ try:
636
+ schema_result = db.describe_table(table_name)
637
+ if schema_result.get("success"):
638
+ columns = schema_result.get("columns", [])
639
+ schema_analysis["total_columns"] += len(columns)
640
+
641
+ # Find text columns
642
+ text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
643
+ schema_analysis["text_columns_by_table"][table_name] = len(text_columns)
644
+
645
+ # Check for well-structured tables
646
+ has_id = any(col.get("name") == "id" for col in columns)
647
+ has_timestamp = any("timestamp" in col.get("name", "").lower() for col in columns)
648
+ has_text_content = len(text_columns) > 0
649
+
650
+ if has_id and has_timestamp and has_text_content:
651
+ schema_analysis["well_structured_tables"].append(table_name)
652
+
653
+ # Identify schema issues
654
+ if len(columns) < 2:
655
+ schema_analysis["schema_issues"].append(f"Table '{table_name}' has very few columns")
656
+ if not has_id:
657
+ schema_analysis["schema_issues"].append(f"Table '{table_name}' lacks ID column")
658
+
659
+ except Exception:
660
+ continue
661
+
662
+ return schema_analysis
663
+
664
+
665
+ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
666
+ """Assess overall content quality."""
667
+ quality_analysis = {
668
+ "quality_scores": {},
669
+ "overall_quality": 0.0,
670
+ "improvement_opportunities": [],
671
+ "quality_distribution": {"high": 0, "medium": 0, "low": 0}
672
+ }
673
+
674
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
675
+ total_score = 0
676
+ table_count = 0
677
+
678
+ for table_name in target_tables:
679
+ try:
680
+ rows_result = db.read_rows(table_name)
681
+ if rows_result.get("success"):
682
+ rows = rows_result.get("rows", [])
683
+
684
+ if not rows:
685
+ quality_analysis["quality_scores"][table_name] = 0.0
686
+ quality_analysis["improvement_opportunities"].append(f"Table '{table_name}' is empty")
687
+ quality_analysis["quality_distribution"]["low"] += 1
688
+ continue
689
+
690
+ # Calculate quality score
691
+ sample_size = min(5, len(rows))
692
+ content_scores = []
693
+
694
+ for row in rows[:sample_size]:
695
+ row_score = 0
696
+ non_null_fields = sum(1 for v in row.values() if v is not None and str(v).strip())
697
+ total_content_length = sum(len(str(v)) for v in row.values() if v is not None)
698
+
699
+ # Score based on completeness and content richness
700
+ if non_null_fields > 2:
701
+ row_score += 3
702
+ if total_content_length > 100:
703
+ row_score += 4
704
+ if total_content_length > 500:
705
+ row_score += 3
706
+
707
+ content_scores.append(min(10, row_score))
708
+
709
+ table_quality = sum(content_scores) / len(content_scores) if content_scores else 0
710
+ quality_analysis["quality_scores"][table_name] = round(table_quality, 1)
711
+
712
+ # Categorize quality
713
+ if table_quality >= 7:
714
+ quality_analysis["quality_distribution"]["high"] += 1
715
+ elif table_quality >= 4:
716
+ quality_analysis["quality_distribution"]["medium"] += 1
717
+ else:
718
+ quality_analysis["quality_distribution"]["low"] += 1
719
+ quality_analysis["improvement_opportunities"].append(
720
+ f"Table '{table_name}' has low content quality (score: {table_quality:.1f})"
721
+ )
722
+
723
+ total_score += table_quality
724
+ table_count += 1
725
+
726
+ except Exception:
727
+ continue
728
+
729
+ quality_analysis["overall_quality"] = round(total_score / table_count, 1) if table_count > 0 else 0.0
730
+
731
+ return quality_analysis
732
+
733
+
734
+ def _analyze_search_readiness(db, tables: List[str], focus_area: Optional[str]) -> Dict[str, Any]:
735
+ """Analyze readiness for effective searching."""
736
+ search_analysis = {
737
+ "semantic_ready_tables": [],
738
+ "text_searchable_tables": [],
739
+ "search_optimization_needed": [],
740
+ "embedding_coverage": {}
741
+ }
742
+
743
+ target_tables = [focus_area] if focus_area and focus_area in tables else tables
744
+
745
+ for table_name in target_tables:
746
+ try:
747
+ # Check schema for text content
748
+ schema_result = db.describe_table(table_name)
749
+ if schema_result.get("success"):
750
+ columns = schema_result.get("columns", [])
751
+ text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
752
+
753
+ if text_columns:
754
+ search_analysis["text_searchable_tables"].append(table_name)
755
+
756
+ # Check semantic search readiness if available
757
+ if is_semantic_search_available():
758
+ embedding_stats = db.get_embedding_stats(table_name)
759
+ if embedding_stats.get("success"):
760
+ coverage = embedding_stats.get("coverage_percent", 0)
761
+ search_analysis["embedding_coverage"][table_name] = coverage
762
+
763
+ if coverage > 80:
764
+ search_analysis["semantic_ready_tables"].append(table_name)
765
+ elif len(text_columns) > 0:
766
+ search_analysis["search_optimization_needed"].append(table_name)
767
+
768
+ except Exception:
769
+ continue
770
+
771
+ return search_analysis
772
+
773
+
774
+ def _generate_discovery_insights(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str) -> tuple:
775
+ """Generate insights and recommendations based on discovery results."""
776
+ insights = []
777
+ recommendations = []
778
+ next_steps = []
779
+
780
+ total_tables = overview.get("total_tables", 0)
781
+ total_rows = overview.get("total_rows", 0)
782
+
783
+ # Goal-specific insights
784
+ if discovery_goal == "understand_content":
785
+ insights.append(f"Memory bank contains {total_tables} tables with {total_rows} total rows")
786
+
787
+ high_value_tables = overview.get("high_value_tables", [])
788
+ if high_value_tables:
789
+ insights.append(f"High-value content found in: {', '.join(high_value_tables[:3])}")
790
+ recommendations.append(f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}")
791
+ next_steps.append(f"Use auto_smart_search() to explore content in {high_value_tables[0]}")
792
+
793
+ sparse_tables = overview.get("sparse_tables", [])
794
+ if sparse_tables:
795
+ insights.append(f"Sparse tables detected: {', '.join(sparse_tables)}")
796
+ recommendations.append("Consider consolidating or expanding sparse tables")
797
+
798
+ elif discovery_goal == "find_patterns":
799
+ text_rich_tables = overview.get("text_rich_tables", [])
800
+ if text_rich_tables:
801
+ insights.append(f"Text-rich content found in {len(text_rich_tables)} tables")
802
+ next_steps.append("Use semantic search to find content patterns")
803
+
804
+ quality_scores = overview.get("quality_scores", {})
805
+ if quality_scores:
806
+ avg_quality = sum(quality_scores.values()) / len(quality_scores)
807
+ insights.append(f"Average content quality: {avg_quality:.1f}/10")
808
+
809
+ elif discovery_goal == "explore_structure":
810
+ well_structured = overview.get("well_structured_tables", [])
811
+ if well_structured:
812
+ insights.append(f"Well-structured tables: {', '.join(well_structured)}")
813
+ recommendations.append("Use well-structured tables as primary data sources")
814
+
815
+ schema_issues = overview.get("schema_issues", [])
816
+ if schema_issues:
817
+ insights.extend(schema_issues[:3]) # Show first 3 issues
818
+
819
+ elif discovery_goal == "assess_quality":
820
+ overall_quality = overview.get("overall_quality", 0)
821
+ insights.append(f"Overall content quality score: {overall_quality}/10")
822
+
823
+ improvement_opportunities = overview.get("improvement_opportunities", [])
824
+ recommendations.extend(improvement_opportunities[:3])
825
+
826
+ elif discovery_goal == "prepare_search":
827
+ semantic_ready = overview.get("semantic_ready_tables", [])
828
+ optimization_needed = overview.get("search_optimization_needed", [])
829
+
830
+ if semantic_ready:
831
+ insights.append(f"Semantic search ready for {len(semantic_ready)} tables")
832
+ next_steps.append("Use auto_semantic_search() for conceptual queries")
833
+
834
+ if optimization_needed:
835
+ insights.append(f"Search optimization needed for {len(optimization_needed)} tables")
836
+ next_steps.append(f"Set up embeddings for: {', '.join(optimization_needed[:2])}")
837
+
838
+ # Universal recommendations
839
+ if overview.get("semantic_search_available"):
840
+ recommendations.append("Use auto_smart_search() for best search results")
841
+ else:
842
+ recommendations.append("Install sentence-transformers for semantic search capabilities")
843
+
844
+ if not next_steps:
845
+ next_steps.append("Use explore_tables() for detailed content examination")
846
+ next_steps.append("Try auto_smart_search() to find specific information")
847
+
848
+ return insights, recommendations, next_steps
849
+
850
+
851
+ def _generate_quick_actions(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]) -> List[Dict[str, Any]]:
852
+ """Generate quick action suggestions."""
853
+ actions = []
854
+
855
+ high_value_tables = overview.get("high_value_tables", [])
856
+
857
+ if discovery_goal == "understand_content" and high_value_tables:
858
+ actions.append({
859
+ "action": "Explore High-Value Content",
860
+ "tool": "read_rows",
861
+ "params": {"table_name": high_value_tables[0]},
862
+ "description": f"Examine content in {high_value_tables[0]} table"
863
+ })
864
+
865
+ if overview.get("semantic_search_available"):
866
+ actions.append({
867
+ "action": "Smart Search",
868
+ "tool": "auto_smart_search",
869
+ "params": {"query": "important recent information", "limit": 5},
870
+ "description": "Find important content using intelligent search"
871
+ })
872
+
873
+ actions.append({
874
+ "action": "Quality Assessment",
875
+ "tool": "get_content_health_score",
876
+ "params": {},
877
+ "description": "Get detailed quality metrics and recommendations"
878
+ })
879
+
880
+ return actions
881
+
882
+
883
+ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
884
+ """Store discovery pattern for learning (if agent learning table exists)."""
885
+ try:
886
+ # Check if discovery_patterns table exists
887
+ tables_result = db.list_tables()
888
+ if tables_result.get("success") and "discovery_patterns" in tables_result.get("tables", []):
889
+ # Store the discovery session
890
+ db.insert_row("discovery_patterns", {
891
+ "agent_id": discovery_session.get("agent_id"),
892
+ "goal": discovery_session.get("goal"),
893
+ "focus_area": discovery_session.get("focus_area"),
894
+ "depth": discovery_session.get("depth"),
895
+ "steps_completed": str(discovery_session.get("steps_completed", [])),
896
+ "success": True,
897
+ "timestamp": discovery_session.get("timestamp")
898
+ })
899
+ except Exception:
900
+ # Silently fail if learning storage isn't available
901
+ pass
902
+
903
+
904
+ def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[str, Any]:
905
+ """Customize template for specific domain or topic."""
906
+ customized = template.copy()
907
+
908
+ # Add customization note
909
+ customized["customized_for"] = customize_for
910
+ customized["customization_note"] = f"Template customized for: {customize_for}"
911
+
912
+ # Modify search queries in workflow to include customization
913
+ for step in customized.get("workflow", []):
914
+ if step.get("tool") in ["auto_smart_search", "auto_semantic_search", "search_content"]:
915
+ params = step.get("params", {})
916
+ if "query" in params and params["query"].startswith("REPLACE_WITH"):
917
+ # Keep the placeholder for user customization
918
+ continue
919
+ elif "query" in params:
920
+ # Add customization to existing query
921
+ params["query"] = f"{customize_for} {params['query']}"
922
+
923
+ return customized
924
+
925
+
926
+ # Relationship discovery helper functions
927
+
928
+ def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List[str]:
929
+ """Discover foreign key relationships."""
930
+ relationships = []
931
+
932
+ try:
933
+ # Get target table schema
934
+ target_schema = db.describe_table(target_table)
935
+ if not target_schema.get("success"):
936
+ return relationships
937
+
938
+ target_columns = target_schema.get("columns", [])
939
+ target_col_names = [col.get("name", "") for col in target_columns]
940
+
941
+ # Check other tables for potential foreign key references
942
+ for other_table in all_tables:
943
+ if other_table == target_table:
944
+ continue
945
+
946
+ try:
947
+ other_schema = db.describe_table(other_table)
948
+ if other_schema.get("success"):
949
+ other_columns = other_schema.get("columns", [])
950
+
951
+ for col in other_columns:
952
+ col_name = col.get("name", "")
953
+ # Look for naming patterns that suggest foreign keys
954
+ if col_name.endswith("_id") or col_name.endswith("Id"):
955
+ potential_ref = col_name.replace("_id", "").replace("Id", "")
956
+ if potential_ref == target_table or f"{potential_ref}s" == target_table:
957
+ relationships.append(f"{other_table}.{col_name}")
958
+
959
+ # Look for exact column name matches (potential shared keys)
960
+ if col_name in target_col_names and col_name != "id":
961
+ relationships.append(f"{other_table}.{col_name} (shared key)")
962
+
963
+ except Exception:
964
+ continue
965
+
966
+ except Exception:
967
+ pass
968
+
969
+ return relationships
970
+
971
+
972
+ def _discover_semantic_relationships(db, target_table: str, all_tables: List[str], threshold: float) -> List[Dict[str, Any]]:
973
+ """Discover semantic similarity relationships."""
974
+ relationships = []
975
+
976
+ if not is_semantic_search_available():
977
+ return relationships
978
+
979
+ try:
980
+ # Get sample content from target table
981
+ target_rows = db.read_rows(target_table)
982
+ if not target_rows.get("success") or not target_rows.get("rows"):
983
+ return relationships
984
+
985
+ # Create a sample query from target table content
986
+ sample_row = target_rows["rows"][0]
987
+ sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[:200]
988
+
989
+ if len(sample_text.strip()) < 10:
990
+ return relationships
991
+
992
+ # Search for similar content in other tables
993
+ for other_table in all_tables:
994
+ if other_table == target_table:
995
+ continue
996
+
997
+ try:
998
+ # Try semantic search in the other table
999
+ search_result = db.semantic_search(
1000
+ sample_text, [other_table], "embedding", None, threshold, 3, "all-MiniLM-L6-v2"
1001
+ )
1002
+
1003
+ if search_result.get("success") and search_result.get("results"):
1004
+ results = search_result["results"]
1005
+ avg_similarity = sum(r.get("similarity_score", 0) for r in results) / len(results)
1006
+
1007
+ if avg_similarity >= threshold:
1008
+ relationships.append({
1009
+ "table": other_table,
1010
+ "similarity": round(avg_similarity, 2),
1011
+ "related_content_count": len(results)
1012
+ })
1013
+
1014
+ except Exception:
1015
+ continue
1016
+
1017
+ except Exception:
1018
+ pass
1019
+
1020
+ return relationships
1021
+
1022
+
1023
+ def _discover_temporal_relationships(db, target_table: str, all_tables: List[str]) -> List[str]:
1024
+ """Discover temporal pattern relationships."""
1025
+ relationships = []
1026
+
1027
+ try:
1028
+ # Check if target table has timestamp columns
1029
+ target_schema = db.describe_table(target_table)
1030
+ if not target_schema.get("success"):
1031
+ return relationships
1032
+
1033
+ target_columns = target_schema.get("columns", [])
1034
+ target_has_timestamp = any("timestamp" in col.get("name", "").lower() or
1035
+ "date" in col.get("name", "").lower() or
1036
+ "time" in col.get("name", "").lower()
1037
+ for col in target_columns)
1038
+
1039
+ if not target_has_timestamp:
1040
+ return relationships
1041
+
1042
+ # Check other tables for similar timestamp patterns
1043
+ for other_table in all_tables:
1044
+ if other_table == target_table:
1045
+ continue
1046
+
1047
+ try:
1048
+ other_schema = db.describe_table(other_table)
1049
+ if other_schema.get("success"):
1050
+ other_columns = other_schema.get("columns", [])
1051
+ other_has_timestamp = any("timestamp" in col.get("name", "").lower() or
1052
+ "date" in col.get("name", "").lower() or
1053
+ "time" in col.get("name", "").lower()
1054
+ for col in other_columns)
1055
+
1056
+ if other_has_timestamp:
1057
+ relationships.append(other_table)
1058
+
1059
+ except Exception:
1060
+ continue
1061
+
1062
+ except Exception:
1063
+ pass
1064
+
1065
+ return relationships
1066
+
1067
+
1068
+ def _discover_naming_relationships(target_table: str, all_tables: List[str]) -> List[str]:
1069
+ """Discover relationships based on naming conventions."""
1070
+ relationships = []
1071
+
1072
+ # Look for tables with similar names or naming patterns
1073
+ target_lower = target_table.lower()
1074
+
1075
+ for other_table in all_tables:
1076
+ if other_table == target_table:
1077
+ continue
1078
+
1079
+ other_lower = other_table.lower()
1080
+
1081
+ # Check for plural/singular relationships
1082
+ if (target_lower.endswith('s') and other_lower == target_lower[:-1]) or \
1083
+ (other_lower.endswith('s') and target_lower == other_lower[:-1]):
1084
+ relationships.append(other_table)
1085
+ continue
1086
+
1087
+ # Check for common prefixes or suffixes
1088
+ if len(target_lower) > 3 and len(other_lower) > 3:
1089
+ # Common prefix (at least 4 characters)
1090
+ if target_lower[:4] == other_lower[:4]:
1091
+ relationships.append(other_table)
1092
+ continue
1093
+
1094
+ # Common suffix (at least 4 characters)
1095
+ if target_lower[-4:] == other_lower[-4:]:
1096
+ relationships.append(other_table)
1097
+ continue
1098
+
1099
+ # Check for semantic name relationships
1100
+ name_words = set(target_lower.split('_'))
1101
+ other_words = set(other_lower.split('_'))
1102
+
1103
+ # If tables share significant word overlap
1104
+ if len(name_words.intersection(other_words)) > 0:
1105
+ relationships.append(other_table)
1106
+
1107
+ return relationships
1108
+
1109
+
1110
+ def _identify_strongest_connections(relationships: Dict[str, Any]) -> List[Dict[str, Any]]:
1111
+ """Identify the strongest connections across all relationships."""
1112
+ connections = []
1113
+
1114
+ for table, rels in relationships.items():
1115
+ # Count total connections for this table
1116
+ total_connections = (len(rels.get("foreign_key_refs", [])) +
1117
+ len(rels.get("semantic_similar", [])) +
1118
+ len(rels.get("temporal_related", [])) +
1119
+ len(rels.get("naming_related", [])))
1120
+
1121
+ if total_connections > 0:
1122
+ connections.append({
1123
+ "table": table,
1124
+ "total_connections": total_connections,
1125
+ "connection_types": {
1126
+ "structural": len(rels.get("foreign_key_refs", [])),
1127
+ "semantic": len(rels.get("semantic_similar", [])),
1128
+ "temporal": len(rels.get("temporal_related", [])),
1129
+ "naming": len(rels.get("naming_related", []))
1130
+ }
1131
+ })
1132
+
1133
+ # Sort by total connections and return top 5
1134
+ connections.sort(key=lambda x: x["total_connections"], reverse=True)
1135
+ return connections[:5]
1136
+
1137
+
1138
+ def _generate_relationship_recommendations(relationships: Dict[str, Any], insights: List[str]) -> List[str]:
1139
+ """Generate actionable recommendations based on discovered relationships."""
1140
+ recommendations = []
1141
+
1142
+ # Find tables with many connections
1143
+ highly_connected = []
1144
+ for table, rels in relationships.items():
1145
+ total_connections = (len(rels.get("foreign_key_refs", [])) +
1146
+ len(rels.get("semantic_similar", [])) +
1147
+ len(rels.get("temporal_related", [])) +
1148
+ len(rels.get("naming_related", [])))
1149
+ if total_connections >= 3:
1150
+ highly_connected.append(table)
1151
+
1152
+ if highly_connected:
1153
+ recommendations.append(f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}")
1154
+
1155
+ # Find tables with semantic relationships
1156
+ semantic_tables = []
1157
+ for table, rels in relationships.items():
1158
+ if rels.get("semantic_similar"):
1159
+ semantic_tables.append(table)
1160
+
1161
+ if semantic_tables:
1162
+ recommendations.append(f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}")
1163
+
1164
+ # Find tables with temporal relationships
1165
+ temporal_tables = []
1166
+ for table, rels in relationships.items():
1167
+ if rels.get("temporal_related"):
1168
+ temporal_tables.append(table)
1169
+
1170
+ if temporal_tables:
1171
+ recommendations.append(f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}")
1172
+
1173
+ if not recommendations:
1174
+ recommendations.append("Consider adding more structured relationships or content to improve discoverability")
1175
+
1176
+ return recommendations