mcp-sqlite-memory-bank 1.5.1__py3-none-any.whl → 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,8 +9,7 @@ discovery processes.
9
9
  Author: Robert Meisner
10
10
  """
11
11
 
12
- import logging
13
- from typing import Any, Dict, List, Optional, cast, Union
12
+ from typing import Any, Dict, List, Optional, cast
14
13
  from datetime import datetime
15
14
 
16
15
  from ..database import get_database
@@ -35,14 +34,14 @@ def intelligent_discovery(
35
34
  Args:
36
35
  discovery_goal (str): What you want to achieve
37
36
  - "understand_content": Learn what data is available and how it's organized
38
- - "find_patterns": Discover themes, relationships, and content patterns
37
+ - "find_patterns": Discover themes, relationships, and content patterns
39
38
  - "explore_structure": Understand database schema and organization
40
39
  - "assess_quality": Evaluate content quality and completeness
41
40
  - "prepare_search": Get ready for effective content searching
42
41
  focus_area (Optional[str]): Specific table or topic to focus on (default: all)
43
42
  depth (str): How thorough the discovery should be
44
43
  - "quick": Fast overview with key insights
45
- - "moderate": Balanced analysis with actionable recommendations
44
+ - "moderate": Balanced analysis with actionable recommendations
46
45
  - "comprehensive": Deep dive with detailed analysis
47
46
  agent_id (Optional[str]): Agent identifier for learning discovery patterns
48
47
 
@@ -73,8 +72,9 @@ def intelligent_discovery(
73
72
  """
74
73
  try:
75
74
  from .. import server
75
+
76
76
  db = get_database(server.DB_PATH)
77
-
77
+
78
78
  # Initialize discovery session
79
79
  discovery_session = {
80
80
  "goal": discovery_goal,
@@ -84,97 +84,105 @@ def intelligent_discovery(
84
84
  "agent_id": agent_id,
85
85
  "steps_completed": [],
86
86
  "insights": [],
87
- "recommendations": []
87
+ "recommendations": [],
88
88
  }
89
-
89
+
90
90
  # Step 1: Basic overview
91
91
  discovery_session["steps_completed"].append("basic_overview")
92
92
  tables_result = db.list_tables()
93
93
  if not tables_result.get("success"):
94
- return cast(ToolResponse, {
95
- "success": False,
96
- "error": "Failed to get basic overview",
97
- "category": "DISCOVERY_ERROR",
98
- "details": tables_result
99
- })
100
-
94
+ return cast(
95
+ ToolResponse,
96
+ {
97
+ "success": False,
98
+ "error": "Failed to get basic overview",
99
+ "category": "DISCOVERY_ERROR",
100
+ "details": tables_result,
101
+ },
102
+ )
103
+
101
104
  tables = tables_result.get("tables", [])
102
105
  overview = {
103
106
  "total_tables": len(tables),
104
107
  "available_tables": tables,
105
- "semantic_search_available": is_semantic_search_available()
108
+ "semantic_search_available": is_semantic_search_available(),
106
109
  }
107
-
110
+
108
111
  # Step 2: Content analysis based on goal
109
112
  if discovery_goal in ["understand_content", "find_patterns", "assess_quality"]:
110
113
  discovery_session["steps_completed"].append("content_analysis")
111
114
  content_analysis = _analyze_content_for_discovery(db, tables, focus_area, depth)
112
115
  overview.update(content_analysis)
113
-
116
+
114
117
  # Step 3: Schema analysis for structure exploration
115
118
  if discovery_goal in ["explore_structure", "understand_content"]:
116
119
  discovery_session["steps_completed"].append("schema_analysis")
117
120
  schema_analysis = _analyze_schema_for_discovery(db, tables, focus_area, depth)
118
121
  overview.update(schema_analysis)
119
-
122
+
120
123
  # Step 4: Quality assessment
121
124
  if discovery_goal in ["assess_quality", "find_patterns"]:
122
125
  discovery_session["steps_completed"].append("quality_assessment")
123
126
  quality_analysis = _assess_content_quality(db, tables, focus_area, depth)
124
127
  overview.update(quality_analysis)
125
-
128
+
126
129
  # Step 5: Search readiness for search preparation
127
130
  if discovery_goal in ["prepare_search", "understand_content"]:
128
131
  discovery_session["steps_completed"].append("search_readiness")
129
132
  search_analysis = _analyze_search_readiness(db, tables, focus_area)
130
133
  overview.update(search_analysis)
131
-
134
+
132
135
  # Step 6: Generate insights and recommendations
133
136
  insights, recommendations, next_steps = _generate_discovery_insights(
134
137
  discovery_goal, overview, focus_area, depth
135
138
  )
136
-
139
+
137
140
  discovery_session["insights"] = insights
138
141
  discovery_session["recommendations"] = recommendations
139
-
142
+
140
143
  # Step 7: Store discovery pattern for learning (if agent_id provided)
141
144
  if agent_id:
142
145
  _store_discovery_pattern(db, discovery_session)
143
-
144
- return cast(ToolResponse, {
145
- "success": True,
146
- "discovery": {
147
- "goal": discovery_goal,
148
- "overview": overview,
149
- "insights": insights,
150
- "recommendations": recommendations,
151
- "focus_area": focus_area,
152
- "depth": depth,
153
- "steps_completed": discovery_session["steps_completed"]
146
+
147
+ return cast(
148
+ ToolResponse,
149
+ {
150
+ "success": True,
151
+ "discovery": {
152
+ "goal": discovery_goal,
153
+ "overview": overview,
154
+ "insights": insights,
155
+ "recommendations": recommendations,
156
+ "focus_area": focus_area,
157
+ "depth": depth,
158
+ "steps_completed": discovery_session["steps_completed"],
159
+ },
160
+ "next_steps": next_steps,
161
+ "discovery_session": discovery_session,
162
+ "quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area),
154
163
  },
155
- "next_steps": next_steps,
156
- "discovery_session": discovery_session,
157
- "quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area)
158
- })
159
-
164
+ )
165
+
160
166
  except Exception as e:
161
- return cast(ToolResponse, {
162
- "success": False,
163
- "error": f"Intelligent discovery failed: {str(e)}",
164
- "category": "DISCOVERY_ERROR",
165
- "details": {
166
- "goal": discovery_goal,
167
- "focus_area": focus_area,
168
- "depth": depth,
169
- "agent_id": agent_id
170
- }
171
- })
167
+ return cast(
168
+ ToolResponse,
169
+ {
170
+ "success": False,
171
+ "error": f"Intelligent discovery failed: {str(e)}",
172
+ "category": "DISCOVERY_ERROR",
173
+ "details": {
174
+ "goal": discovery_goal,
175
+ "focus_area": focus_area,
176
+ "depth": depth,
177
+ "agent_id": agent_id,
178
+ },
179
+ },
180
+ )
172
181
 
173
182
 
174
183
  @catch_errors
175
184
  def discovery_templates(
176
- template_type: str = "first_time_exploration",
177
- customize_for: Optional[str] = None
185
+ template_type: str = "first_time_exploration", customize_for: Optional[str] = None
178
186
  ) -> ToolResponse:
179
187
  """
180
188
  📋 **DISCOVERY TEMPLATES** - Pre-built exploration workflows for common scenarios!
@@ -207,7 +215,7 @@ def discovery_templates(
207
215
  }}
208
216
 
209
217
  FastMCP Tool Info:
210
- - **PROVEN WORKFLOWS**: Battle-tested discovery sequences
218
+ - **PROVEN WORKFLOWS**: Battle-tested discovery sequences
211
219
  - **STEP-BY-STEP GUIDANCE**: Exact tools and parameters to use
212
220
  - **CUSTOMIZABLE**: Adapt templates to your specific needs
213
221
  - **LEARNING-OPTIMIZED**: Based on successful discovery patterns
@@ -223,9 +231,12 @@ def discovery_templates(
223
231
  "step": 1,
224
232
  "action": "Get Overview",
225
233
  "tool": "intelligent_discovery",
226
- "params": {"discovery_goal": "understand_content", "depth": "moderate"},
234
+ "params": {
235
+ "discovery_goal": "understand_content",
236
+ "depth": "moderate",
237
+ },
227
238
  "purpose": "Understand what data is available and how it's organized",
228
- "look_for": ["total tables", "content types", "data volume"]
239
+ "look_for": ["total tables", "content types", "data volume"],
229
240
  },
230
241
  {
231
242
  "step": 2,
@@ -233,7 +244,11 @@ def discovery_templates(
233
244
  "tool": "explore_tables",
234
245
  "params": {"include_row_counts": True},
235
246
  "purpose": "See detailed table schemas and sample data",
236
- "look_for": ["column types", "sample content", "data relationships"]
247
+ "look_for": [
248
+ "column types",
249
+ "sample content",
250
+ "data relationships",
251
+ ],
237
252
  },
238
253
  {
239
254
  "step": 3,
@@ -241,7 +256,11 @@ def discovery_templates(
241
256
  "tool": "auto_smart_search",
242
257
  "params": {"query": "recent important information", "limit": 5},
243
258
  "purpose": "Understand search capabilities and content accessibility",
244
- "look_for": ["search quality", "result relevance", "content types found"]
259
+ "look_for": [
260
+ "search quality",
261
+ "result relevance",
262
+ "content types found",
263
+ ],
245
264
  },
246
265
  {
247
266
  "step": 4,
@@ -249,17 +268,20 @@ def discovery_templates(
249
268
  "tool": "get_content_health_score",
250
269
  "params": {},
251
270
  "purpose": "Understand overall memory bank quality and opportunities",
252
- "look_for": ["health score", "improvement recommendations", "strengths"]
253
- }
271
+ "look_for": [
272
+ "health score",
273
+ "improvement recommendations",
274
+ "strengths",
275
+ ],
276
+ },
254
277
  ],
255
278
  "success_criteria": [
256
279
  "Understand what types of information are stored",
257
280
  "Know which tables contain the most valuable content",
258
281
  "Identify best search strategies for this memory bank",
259
- "Have actionable next steps for productive use"
260
- ]
282
+ "Have actionable next steps for productive use",
283
+ ],
261
284
  },
262
-
263
285
  "content_audit": {
264
286
  "name": "Content Quality Audit",
265
287
  "description": "Systematic review of content quality and completeness",
@@ -271,7 +293,11 @@ def discovery_templates(
271
293
  "tool": "get_content_health_score",
272
294
  "params": {},
273
295
  "purpose": "Get overall quality metrics and problem areas",
274
- "look_for": ["quality scores", "problem tables", "recommendations"]
296
+ "look_for": [
297
+ "quality scores",
298
+ "problem tables",
299
+ "recommendations",
300
+ ],
275
301
  },
276
302
  {
277
303
  "step": 2,
@@ -279,7 +305,11 @@ def discovery_templates(
279
305
  "tool": "analyze_memory_patterns",
280
306
  "params": {},
281
307
  "purpose": "Identify content patterns and organizational issues",
282
- "look_for": ["content distribution", "sparse tables", "organization gaps"]
308
+ "look_for": [
309
+ "content distribution",
310
+ "sparse tables",
311
+ "organization gaps",
312
+ ],
283
313
  },
284
314
  {
285
315
  "step": 3,
@@ -287,25 +317,34 @@ def discovery_templates(
287
317
  "tool": "explore_tables",
288
318
  "params": {"include_row_counts": True},
289
319
  "purpose": "Detailed examination of each table's content",
290
- "look_for": ["empty tables", "low-quality content", "missing data"]
320
+ "look_for": [
321
+ "empty tables",
322
+ "low-quality content",
323
+ "missing data",
324
+ ],
291
325
  },
292
326
  {
293
327
  "step": 4,
294
328
  "action": "Search Readiness",
295
329
  "tool": "intelligent_discovery",
296
- "params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
330
+ "params": {
331
+ "discovery_goal": "prepare_search",
332
+ "depth": "comprehensive",
333
+ },
297
334
  "purpose": "Ensure content is optimally searchable",
298
- "look_for": ["embedding coverage", "search optimization opportunities"]
299
- }
335
+ "look_for": [
336
+ "embedding coverage",
337
+ "search optimization opportunities",
338
+ ],
339
+ },
300
340
  ],
301
341
  "success_criteria": [
302
342
  "Identify all content quality issues",
303
343
  "Have specific recommendations for improvement",
304
344
  "Understand which content areas need attention",
305
- "Know how to optimize for better searchability"
306
- ]
345
+ "Know how to optimize for better searchability",
346
+ ],
307
347
  },
308
-
309
348
  "search_optimization": {
310
349
  "name": "Search Optimization Setup",
311
350
  "description": "Prepare memory bank for optimal content discovery and searching",
@@ -315,9 +354,16 @@ def discovery_templates(
315
354
  "step": 1,
316
355
  "action": "Search Capability Assessment",
317
356
  "tool": "intelligent_discovery",
318
- "params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
357
+ "params": {
358
+ "discovery_goal": "prepare_search",
359
+ "depth": "comprehensive",
360
+ },
319
361
  "purpose": "Understand current search capabilities and gaps",
320
- "look_for": ["semantic readiness", "text column identification", "embedding status"]
362
+ "look_for": [
363
+ "semantic readiness",
364
+ "text column identification",
365
+ "embedding status",
366
+ ],
321
367
  },
322
368
  {
323
369
  "step": 2,
@@ -325,7 +371,11 @@ def discovery_templates(
325
371
  "tool": "analyze_memory_patterns",
326
372
  "params": {},
327
373
  "purpose": "Identify high-value content for search optimization",
328
- "look_for": ["text-rich tables", "high-value content", "search opportunities"]
374
+ "look_for": [
375
+ "text-rich tables",
376
+ "high-value content",
377
+ "search opportunities",
378
+ ],
329
379
  },
330
380
  {
331
381
  "step": 3,
@@ -333,7 +383,7 @@ def discovery_templates(
333
383
  "tool": "search_content",
334
384
  "params": {"query": "test search capabilities", "limit": 10},
335
385
  "purpose": "Baseline current search performance",
336
- "look_for": ["search result quality", "coverage", "relevance"]
386
+ "look_for": ["search result quality", "coverage", "relevance"],
337
387
  },
338
388
  {
339
389
  "step": 4,
@@ -341,17 +391,19 @@ def discovery_templates(
341
391
  "tool": "auto_semantic_search",
342
392
  "params": {"query": "important valuable content", "limit": 5},
343
393
  "purpose": "Enable and test semantic search capabilities",
344
- "look_for": ["automatic embedding generation", "semantic result quality"]
345
- }
394
+ "look_for": [
395
+ "automatic embedding generation",
396
+ "semantic result quality",
397
+ ],
398
+ },
346
399
  ],
347
400
  "success_criteria": [
348
401
  "Semantic search is enabled for key tables",
349
402
  "Both keyword and semantic search work effectively",
350
403
  "Search performance meets quality standards",
351
- "Clear strategy for ongoing search optimization"
352
- ]
404
+ "Clear strategy for ongoing search optimization",
405
+ ],
353
406
  },
354
-
355
407
  "problem_solving": {
356
408
  "name": "Problem-Solving Discovery",
357
409
  "description": "Find information to solve specific problems or answer questions",
@@ -361,25 +413,45 @@ def discovery_templates(
361
413
  "step": 1,
362
414
  "action": "Quick Content Survey",
363
415
  "tool": "intelligent_discovery",
364
- "params": {"discovery_goal": "understand_content", "depth": "quick"},
416
+ "params": {
417
+ "discovery_goal": "understand_content",
418
+ "depth": "quick",
419
+ },
365
420
  "purpose": "Rapid overview of available information",
366
- "look_for": ["relevant content areas", "potential information sources"]
421
+ "look_for": [
422
+ "relevant content areas",
423
+ "potential information sources",
424
+ ],
367
425
  },
368
426
  {
369
427
  "step": 2,
370
428
  "action": "Targeted Search",
371
429
  "tool": "auto_smart_search",
372
- "params": {"query": "REPLACE_WITH_PROBLEM_KEYWORDS", "limit": 10},
430
+ "params": {
431
+ "query": "REPLACE_WITH_PROBLEM_KEYWORDS",
432
+ "limit": 10,
433
+ },
373
434
  "purpose": "Find directly relevant information",
374
- "look_for": ["directly applicable content", "related information", "context clues"]
435
+ "look_for": [
436
+ "directly applicable content",
437
+ "related information",
438
+ "context clues",
439
+ ],
375
440
  },
376
441
  {
377
442
  "step": 3,
378
443
  "action": "Related Content Discovery",
379
444
  "tool": "auto_semantic_search",
380
- "params": {"query": "REPLACE_WITH_CONCEPTUAL_TERMS", "similarity_threshold": 0.3},
445
+ "params": {
446
+ "query": "REPLACE_WITH_CONCEPTUAL_TERMS",
447
+ "similarity_threshold": 0.3,
448
+ },
381
449
  "purpose": "Find conceptually related information",
382
- "look_for": ["broader context", "related concepts", "background information"]
450
+ "look_for": [
451
+ "broader context",
452
+ "related concepts",
453
+ "background information",
454
+ ],
383
455
  },
384
456
  {
385
457
  "step": 4,
@@ -387,60 +459,80 @@ def discovery_templates(
387
459
  "tool": "explore_tables",
388
460
  "params": {"include_row_counts": True},
389
461
  "purpose": "Identify what information might be missing",
390
- "look_for": ["information gaps", "additional context sources", "related data"]
391
- }
462
+ "look_for": [
463
+ "information gaps",
464
+ "additional context sources",
465
+ "related data",
466
+ ],
467
+ },
392
468
  ],
393
469
  "customization_note": "Replace REPLACE_WITH_PROBLEM_KEYWORDS and REPLACE_WITH_CONCEPTUAL_TERMS with your specific problem terms",
394
470
  "success_criteria": [
395
471
  "Found directly relevant information",
396
472
  "Identified related/contextual information",
397
473
  "Understand what information might be missing",
398
- "Have clear next steps for problem resolution"
399
- ]
400
- }
474
+ "Have clear next steps for problem resolution",
475
+ ],
476
+ },
401
477
  }
402
-
478
+
403
479
  if template_type not in templates:
404
480
  available_templates = list(templates.keys())
405
- return cast(ToolResponse, {
406
- "success": False,
407
- "error": f"Template '{template_type}' not found",
408
- "category": "TEMPLATE_ERROR",
409
- "details": {
410
- "available_templates": available_templates,
411
- "requested_template": template_type
412
- }
413
- })
414
-
481
+ return cast(
482
+ ToolResponse,
483
+ {
484
+ "success": False,
485
+ "error": f"Template '{template_type}' not found",
486
+ "category": "TEMPLATE_ERROR",
487
+ "details": {
488
+ "available_templates": available_templates,
489
+ "requested_template": template_type,
490
+ },
491
+ },
492
+ )
493
+
415
494
  template = templates[template_type]
416
-
495
+
417
496
  # Customize template if requested
418
497
  if customize_for:
419
498
  template = _customize_template(template, customize_for)
420
-
421
- return cast(ToolResponse, {
422
- "success": True,
423
- "template": template,
424
- "template_type": template_type,
425
- "customized_for": customize_for,
426
- "available_templates": list(templates.keys()),
427
- "usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation"
428
- })
429
-
499
+
500
+ return cast(
501
+ ToolResponse,
502
+ {
503
+ "success": True,
504
+ "template": template,
505
+ "template_type": template_type,
506
+ "customized_for": customize_for,
507
+ "available_templates": list(templates.keys()),
508
+ "usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation",
509
+ },
510
+ )
511
+
430
512
  except Exception as e:
431
- return cast(ToolResponse, {
432
- "success": False,
433
- "error": f"Discovery template generation failed: {str(e)}",
434
- "category": "TEMPLATE_ERROR",
435
- "details": {"template_type": template_type, "customize_for": customize_for}
436
- })
513
+ return cast(
514
+ ToolResponse,
515
+ {
516
+ "success": False,
517
+ "error": f"Discovery template generation failed: {str(e)}",
518
+ "category": "TEMPLATE_ERROR",
519
+ "details": {
520
+ "template_type": template_type,
521
+ "customize_for": customize_for,
522
+ },
523
+ },
524
+ )
437
525
 
438
526
 
439
527
  @catch_errors
440
528
  def discover_relationships(
441
529
  table_name: Optional[str] = None,
442
- relationship_types: List[str] = ["foreign_keys", "semantic_similarity", "temporal_patterns"],
443
- similarity_threshold: float = 0.6
530
+ relationship_types: List[str] = [
531
+ "foreign_keys",
532
+ "semantic_similarity",
533
+ "temporal_patterns",
534
+ ],
535
+ similarity_threshold: float = 0.6,
444
536
  ) -> ToolResponse:
445
537
  """
446
538
  🔗 **RELATIONSHIP DISCOVERY** - Find hidden connections in your data!
@@ -478,37 +570,40 @@ def discover_relationships(
478
570
  """
479
571
  try:
480
572
  from .. import server
573
+
481
574
  db = get_database(server.DB_PATH)
482
-
575
+
483
576
  # Get all tables or focus on specific table
484
577
  tables_result = db.list_tables()
485
578
  if not tables_result.get("success"):
486
579
  return cast(ToolResponse, tables_result)
487
-
580
+
488
581
  all_tables = tables_result.get("tables", [])
489
582
  target_tables = [table_name] if table_name else all_tables
490
-
583
+
491
584
  relationships = {}
492
585
  insights = []
493
-
586
+
494
587
  for target_table in target_tables:
495
588
  if target_table not in all_tables:
496
589
  continue
497
-
590
+
498
591
  table_relationships = {
499
592
  "foreign_key_refs": [],
500
593
  "semantic_similar": [],
501
594
  "temporal_related": [],
502
- "naming_related": []
595
+ "naming_related": [],
503
596
  }
504
-
597
+
505
598
  # Discover foreign key relationships
506
599
  if "foreign_keys" in relationship_types:
507
600
  fk_relationships = _discover_foreign_keys(db, target_table, all_tables)
508
601
  table_relationships["foreign_key_refs"] = fk_relationships
509
602
  if fk_relationships:
510
- insights.append(f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables")
511
-
603
+ insights.append(
604
+ f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables"
605
+ )
606
+
512
607
  # Discover semantic similarity relationships
513
608
  if "semantic_similarity" in relationship_types and is_semantic_search_available():
514
609
  semantic_relationships = _discover_semantic_relationships(
@@ -516,75 +611,98 @@ def discover_relationships(
516
611
  )
517
612
  table_relationships["semantic_similar"] = semantic_relationships
518
613
  if semantic_relationships:
519
- insights.append(f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables")
520
-
614
+ insights.append(
615
+ f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables"
616
+ )
617
+
521
618
  # Discover temporal patterns
522
619
  if "temporal_patterns" in relationship_types:
523
- temporal_relationships = _discover_temporal_relationships(db, target_table, all_tables)
620
+ temporal_relationships = _discover_temporal_relationships(
621
+ db, target_table, all_tables
622
+ )
524
623
  table_relationships["temporal_related"] = temporal_relationships
525
624
  if temporal_relationships:
526
- insights.append(f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables")
527
-
625
+ insights.append(
626
+ f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables"
627
+ )
628
+
528
629
  # Discover naming pattern relationships
529
630
  if "naming_patterns" in relationship_types:
530
631
  naming_relationships = _discover_naming_relationships(target_table, all_tables)
531
632
  table_relationships["naming_related"] = naming_relationships
532
633
  if naming_relationships:
533
- insights.append(f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables")
534
-
634
+ insights.append(
635
+ f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables"
636
+ )
637
+
535
638
  relationships[target_table] = table_relationships
536
-
639
+
537
640
  # Generate relationship insights
538
641
  total_relationships = sum(
539
- len(rel["foreign_key_refs"]) + len(rel["semantic_similar"]) +
540
- len(rel["temporal_related"]) + len(rel["naming_related"])
642
+ len(rel["foreign_key_refs"])
643
+ + len(rel["semantic_similar"])
644
+ + len(rel["temporal_related"])
645
+ + len(rel["naming_related"])
541
646
  for rel in relationships.values()
542
647
  )
543
-
648
+
544
649
  if total_relationships == 0:
545
- insights.append("No strong relationships discovered. Consider adding more content or setting up semantic search.")
650
+ insights.append(
651
+ "No strong relationships discovered. Consider adding more content or setting up semantic search."
652
+ )
546
653
  else:
547
- insights.append(f"Discovered {total_relationships} total relationships across {len(relationships)} tables")
548
-
549
- return cast(ToolResponse, {
550
- "success": True,
551
- "relationships": relationships,
552
- "insights": insights,
553
- "relationship_summary": {
554
- "total_relationships": total_relationships,
555
- "tables_analyzed": len(relationships),
556
- "strongest_connections": _identify_strongest_connections(relationships)
654
+ insights.append(
655
+ f"Discovered {total_relationships} total relationships across {len(relationships)} tables"
656
+ )
657
+
658
+ return cast(
659
+ ToolResponse,
660
+ {
661
+ "success": True,
662
+ "relationships": relationships,
663
+ "insights": insights,
664
+ "relationship_summary": {
665
+ "total_relationships": total_relationships,
666
+ "tables_analyzed": len(relationships),
667
+ "strongest_connections": _identify_strongest_connections(relationships),
668
+ },
669
+ "recommendations": _generate_relationship_recommendations(relationships, insights),
557
670
  },
558
- "recommendations": _generate_relationship_recommendations(relationships, insights)
559
- })
560
-
671
+ )
672
+
561
673
  except Exception as e:
562
- return cast(ToolResponse, {
563
- "success": False,
564
- "error": f"Relationship discovery failed: {str(e)}",
565
- "category": "RELATIONSHIP_ERROR",
566
- "details": {
567
- "table_name": table_name,
568
- "relationship_types": relationship_types,
569
- "similarity_threshold": similarity_threshold
570
- }
571
- })
674
+ return cast(
675
+ ToolResponse,
676
+ {
677
+ "success": False,
678
+ "error": f"Relationship discovery failed: {str(e)}",
679
+ "category": "RELATIONSHIP_ERROR",
680
+ "details": {
681
+ "table_name": table_name,
682
+ "relationship_types": relationship_types,
683
+ "similarity_threshold": similarity_threshold,
684
+ },
685
+ },
686
+ )
572
687
 
573
688
 
574
689
  # Helper functions for discovery orchestration
575
690
 
576
- def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
691
+
692
+ def _analyze_content_for_discovery(
693
+ db, tables: List[str], focus_area: Optional[str], depth: str
694
+ ) -> Dict[str, Any]:
577
695
  """Analyze content patterns and distribution."""
578
696
  content_analysis = {
579
697
  "total_rows": 0,
580
698
  "content_distribution": {},
581
699
  "text_rich_tables": [],
582
700
  "sparse_tables": [],
583
- "high_value_tables": []
701
+ "high_value_tables": [],
584
702
  }
585
-
703
+
586
704
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
587
-
705
+
588
706
  for table_name in target_tables:
589
707
  try:
590
708
  rows_result = db.read_rows(table_name)
@@ -593,109 +711,121 @@ def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[s
593
711
  row_count = len(rows)
594
712
  content_analysis["total_rows"] += row_count
595
713
  content_analysis["content_distribution"][table_name] = row_count
596
-
714
+
597
715
  # Analyze content quality if depth allows
598
716
  if depth in ["moderate", "comprehensive"] and rows:
599
717
  # Sample content quality
600
718
  sample_size = min(3, len(rows))
601
719
  total_content_length = 0
602
-
720
+
603
721
  for row in rows[:sample_size]:
604
722
  for value in row.values():
605
723
  if isinstance(value, str):
606
724
  total_content_length += len(value)
607
-
608
- avg_content_length = total_content_length / sample_size if sample_size > 0 else 0
609
-
725
+
726
+ avg_content_length = (
727
+ total_content_length / sample_size if sample_size > 0 else 0
728
+ )
729
+
610
730
  if avg_content_length > 200:
611
731
  content_analysis["text_rich_tables"].append(table_name)
612
732
  if avg_content_length > 500:
613
733
  content_analysis["high_value_tables"].append(table_name)
614
734
  if row_count < 5:
615
735
  content_analysis["sparse_tables"].append(table_name)
616
-
736
+
617
737
  except Exception:
618
738
  continue
619
-
739
+
620
740
  return content_analysis
621
741
 
622
742
 
623
- def _analyze_schema_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
743
+ def _analyze_schema_for_discovery(
744
+ db, tables: List[str], focus_area: Optional[str], depth: str
745
+ ) -> Dict[str, Any]:
624
746
  """Analyze schema structure and organization."""
625
747
  schema_analysis = {
626
748
  "total_columns": 0,
627
749
  "text_columns_by_table": {},
628
750
  "well_structured_tables": [],
629
- "schema_issues": []
751
+ "schema_issues": [],
630
752
  }
631
-
753
+
632
754
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
633
-
755
+
634
756
  for table_name in target_tables:
635
757
  try:
636
758
  schema_result = db.describe_table(table_name)
637
759
  if schema_result.get("success"):
638
760
  columns = schema_result.get("columns", [])
639
761
  schema_analysis["total_columns"] += len(columns)
640
-
762
+
641
763
  # Find text columns
642
764
  text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
643
765
  schema_analysis["text_columns_by_table"][table_name] = len(text_columns)
644
-
766
+
645
767
  # Check for well-structured tables
646
768
  has_id = any(col.get("name") == "id" for col in columns)
647
769
  has_timestamp = any("timestamp" in col.get("name", "").lower() for col in columns)
648
770
  has_text_content = len(text_columns) > 0
649
-
771
+
650
772
  if has_id and has_timestamp and has_text_content:
651
773
  schema_analysis["well_structured_tables"].append(table_name)
652
-
774
+
653
775
  # Identify schema issues
654
776
  if len(columns) < 2:
655
- schema_analysis["schema_issues"].append(f"Table '{table_name}' has very few columns")
777
+ schema_analysis["schema_issues"].append(
778
+ f"Table '{table_name}' has very few columns"
779
+ )
656
780
  if not has_id:
657
781
  schema_analysis["schema_issues"].append(f"Table '{table_name}' lacks ID column")
658
-
782
+
659
783
  except Exception:
660
784
  continue
661
-
785
+
662
786
  return schema_analysis
663
787
 
664
788
 
665
- def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
789
+ def _assess_content_quality(
790
+ db, tables: List[str], focus_area: Optional[str], depth: str
791
+ ) -> Dict[str, Any]:
666
792
  """Assess overall content quality."""
667
793
  quality_analysis = {
668
794
  "quality_scores": {},
669
795
  "overall_quality": 0.0,
670
796
  "improvement_opportunities": [],
671
- "quality_distribution": {"high": 0, "medium": 0, "low": 0}
797
+ "quality_distribution": {"high": 0, "medium": 0, "low": 0},
672
798
  }
673
-
799
+
674
800
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
675
801
  total_score = 0
676
802
  table_count = 0
677
-
803
+
678
804
  for table_name in target_tables:
679
805
  try:
680
806
  rows_result = db.read_rows(table_name)
681
807
  if rows_result.get("success"):
682
808
  rows = rows_result.get("rows", [])
683
-
809
+
684
810
  if not rows:
685
811
  quality_analysis["quality_scores"][table_name] = 0.0
686
- quality_analysis["improvement_opportunities"].append(f"Table '{table_name}' is empty")
812
+ quality_analysis["improvement_opportunities"].append(
813
+ f"Table '{table_name}' is empty"
814
+ )
687
815
  quality_analysis["quality_distribution"]["low"] += 1
688
816
  continue
689
-
817
+
690
818
  # Calculate quality score
691
819
  sample_size = min(5, len(rows))
692
820
  content_scores = []
693
-
821
+
694
822
  for row in rows[:sample_size]:
695
823
  row_score = 0
696
- non_null_fields = sum(1 for v in row.values() if v is not None and str(v).strip())
824
+ non_null_fields = sum(
825
+ 1 for v in row.values() if v is not None and str(v).strip()
826
+ )
697
827
  total_content_length = sum(len(str(v)) for v in row.values() if v is not None)
698
-
828
+
699
829
  # Score based on completeness and content richness
700
830
  if non_null_fields > 2:
701
831
  row_score += 3
@@ -703,12 +833,12 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
703
833
  row_score += 4
704
834
  if total_content_length > 500:
705
835
  row_score += 3
706
-
836
+
707
837
  content_scores.append(min(10, row_score))
708
-
838
+
709
839
  table_quality = sum(content_scores) / len(content_scores) if content_scores else 0
710
840
  quality_analysis["quality_scores"][table_name] = round(table_quality, 1)
711
-
841
+
712
842
  # Categorize quality
713
843
  if table_quality >= 7:
714
844
  quality_analysis["quality_distribution"]["high"] += 1
@@ -719,15 +849,17 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
719
849
  quality_analysis["improvement_opportunities"].append(
720
850
  f"Table '{table_name}' has low content quality (score: {table_quality:.1f})"
721
851
  )
722
-
852
+
723
853
  total_score += table_quality
724
854
  table_count += 1
725
-
855
+
726
856
  except Exception:
727
857
  continue
728
-
729
- quality_analysis["overall_quality"] = round(total_score / table_count, 1) if table_count > 0 else 0.0
730
-
858
+
859
+ quality_analysis["overall_quality"] = (
860
+ round(total_score / table_count, 1) if table_count > 0 else 0.0
861
+ )
862
+
731
863
  return quality_analysis
732
864
 
733
865
 
@@ -737,11 +869,11 @@ def _analyze_search_readiness(db, tables: List[str], focus_area: Optional[str])
737
869
  "semantic_ready_tables": [],
738
870
  "text_searchable_tables": [],
739
871
  "search_optimization_needed": [],
740
- "embedding_coverage": {}
872
+ "embedding_coverage": {},
741
873
  }
742
-
874
+
743
875
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
744
-
876
+
745
877
  for table_name in target_tables:
746
878
  try:
747
879
  # Check schema for text content
@@ -749,134 +881,148 @@ def _analyze_search_readiness(db, tables: List[str], focus_area: Optional[str])
749
881
  if schema_result.get("success"):
750
882
  columns = schema_result.get("columns", [])
751
883
  text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
752
-
884
+
753
885
  if text_columns:
754
886
  search_analysis["text_searchable_tables"].append(table_name)
755
-
887
+
756
888
  # Check semantic search readiness if available
757
889
  if is_semantic_search_available():
758
890
  embedding_stats = db.get_embedding_stats(table_name)
759
891
  if embedding_stats.get("success"):
760
892
  coverage = embedding_stats.get("coverage_percent", 0)
761
893
  search_analysis["embedding_coverage"][table_name] = coverage
762
-
894
+
763
895
  if coverage > 80:
764
896
  search_analysis["semantic_ready_tables"].append(table_name)
765
897
  elif len(text_columns) > 0:
766
898
  search_analysis["search_optimization_needed"].append(table_name)
767
-
899
+
768
900
  except Exception:
769
901
  continue
770
-
902
+
771
903
  return search_analysis
772
904
 
773
905
 
774
- def _generate_discovery_insights(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str) -> tuple:
906
+ def _generate_discovery_insights(
907
+ discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str
908
+ ) -> tuple:
775
909
  """Generate insights and recommendations based on discovery results."""
776
910
  insights = []
777
911
  recommendations = []
778
912
  next_steps = []
779
-
913
+
780
914
  total_tables = overview.get("total_tables", 0)
781
915
  total_rows = overview.get("total_rows", 0)
782
-
916
+
783
917
  # Goal-specific insights
784
918
  if discovery_goal == "understand_content":
785
919
  insights.append(f"Memory bank contains {total_tables} tables with {total_rows} total rows")
786
-
920
+
787
921
  high_value_tables = overview.get("high_value_tables", [])
788
922
  if high_value_tables:
789
923
  insights.append(f"High-value content found in: {', '.join(high_value_tables[:3])}")
790
- recommendations.append(f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}")
791
- next_steps.append(f"Use auto_smart_search() to explore content in {high_value_tables[0]}")
792
-
924
+ recommendations.append(
925
+ f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}"
926
+ )
927
+ next_steps.append(
928
+ f"Use auto_smart_search() to explore content in {high_value_tables[0]}"
929
+ )
930
+
793
931
  sparse_tables = overview.get("sparse_tables", [])
794
932
  if sparse_tables:
795
933
  insights.append(f"Sparse tables detected: {', '.join(sparse_tables)}")
796
934
  recommendations.append("Consider consolidating or expanding sparse tables")
797
-
935
+
798
936
  elif discovery_goal == "find_patterns":
799
937
  text_rich_tables = overview.get("text_rich_tables", [])
800
938
  if text_rich_tables:
801
939
  insights.append(f"Text-rich content found in {len(text_rich_tables)} tables")
802
940
  next_steps.append("Use semantic search to find content patterns")
803
-
941
+
804
942
  quality_scores = overview.get("quality_scores", {})
805
943
  if quality_scores:
806
944
  avg_quality = sum(quality_scores.values()) / len(quality_scores)
807
945
  insights.append(f"Average content quality: {avg_quality:.1f}/10")
808
-
946
+
809
947
  elif discovery_goal == "explore_structure":
810
948
  well_structured = overview.get("well_structured_tables", [])
811
949
  if well_structured:
812
950
  insights.append(f"Well-structured tables: {', '.join(well_structured)}")
813
951
  recommendations.append("Use well-structured tables as primary data sources")
814
-
952
+
815
953
  schema_issues = overview.get("schema_issues", [])
816
954
  if schema_issues:
817
955
  insights.extend(schema_issues[:3]) # Show first 3 issues
818
-
956
+
819
957
  elif discovery_goal == "assess_quality":
820
958
  overall_quality = overview.get("overall_quality", 0)
821
959
  insights.append(f"Overall content quality score: {overall_quality}/10")
822
-
960
+
823
961
  improvement_opportunities = overview.get("improvement_opportunities", [])
824
962
  recommendations.extend(improvement_opportunities[:3])
825
-
963
+
826
964
  elif discovery_goal == "prepare_search":
827
965
  semantic_ready = overview.get("semantic_ready_tables", [])
828
966
  optimization_needed = overview.get("search_optimization_needed", [])
829
-
967
+
830
968
  if semantic_ready:
831
969
  insights.append(f"Semantic search ready for {len(semantic_ready)} tables")
832
970
  next_steps.append("Use auto_semantic_search() for conceptual queries")
833
-
971
+
834
972
  if optimization_needed:
835
973
  insights.append(f"Search optimization needed for {len(optimization_needed)} tables")
836
974
  next_steps.append(f"Set up embeddings for: {', '.join(optimization_needed[:2])}")
837
-
975
+
838
976
  # Universal recommendations
839
977
  if overview.get("semantic_search_available"):
840
978
  recommendations.append("Use auto_smart_search() for best search results")
841
979
  else:
842
980
  recommendations.append("Install sentence-transformers for semantic search capabilities")
843
-
981
+
844
982
  if not next_steps:
845
983
  next_steps.append("Use explore_tables() for detailed content examination")
846
984
  next_steps.append("Try auto_smart_search() to find specific information")
847
-
985
+
848
986
  return insights, recommendations, next_steps
849
987
 
850
988
 
851
- def _generate_quick_actions(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]) -> List[Dict[str, Any]]:
989
+ def _generate_quick_actions(
990
+ discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]
991
+ ) -> List[Dict[str, Any]]:
852
992
  """Generate quick action suggestions."""
853
993
  actions = []
854
-
994
+
855
995
  high_value_tables = overview.get("high_value_tables", [])
856
-
996
+
857
997
  if discovery_goal == "understand_content" and high_value_tables:
858
- actions.append({
859
- "action": "Explore High-Value Content",
860
- "tool": "read_rows",
861
- "params": {"table_name": high_value_tables[0]},
862
- "description": f"Examine content in {high_value_tables[0]} table"
863
- })
864
-
998
+ actions.append(
999
+ {
1000
+ "action": "Explore High-Value Content",
1001
+ "tool": "read_rows",
1002
+ "params": {"table_name": high_value_tables[0]},
1003
+ "description": f"Examine content in {high_value_tables[0]} table",
1004
+ }
1005
+ )
1006
+
865
1007
  if overview.get("semantic_search_available"):
866
- actions.append({
867
- "action": "Smart Search",
868
- "tool": "auto_smart_search",
869
- "params": {"query": "important recent information", "limit": 5},
870
- "description": "Find important content using intelligent search"
871
- })
872
-
873
- actions.append({
874
- "action": "Quality Assessment",
875
- "tool": "get_content_health_score",
876
- "params": {},
877
- "description": "Get detailed quality metrics and recommendations"
878
- })
879
-
1008
+ actions.append(
1009
+ {
1010
+ "action": "Smart Search",
1011
+ "tool": "auto_smart_search",
1012
+ "params": {"query": "important recent information", "limit": 5},
1013
+ "description": "Find important content using intelligent search",
1014
+ }
1015
+ )
1016
+
1017
+ actions.append(
1018
+ {
1019
+ "action": "Quality Assessment",
1020
+ "tool": "get_content_health_score",
1021
+ "params": {},
1022
+ "description": "Get detailed quality metrics and recommendations",
1023
+ }
1024
+ )
1025
+
880
1026
  return actions
881
1027
 
882
1028
 
@@ -887,15 +1033,18 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
887
1033
  tables_result = db.list_tables()
888
1034
  if tables_result.get("success") and "discovery_patterns" in tables_result.get("tables", []):
889
1035
  # Store the discovery session
890
- db.insert_row("discovery_patterns", {
891
- "agent_id": discovery_session.get("agent_id"),
892
- "goal": discovery_session.get("goal"),
893
- "focus_area": discovery_session.get("focus_area"),
894
- "depth": discovery_session.get("depth"),
895
- "steps_completed": str(discovery_session.get("steps_completed", [])),
896
- "success": True,
897
- "timestamp": discovery_session.get("timestamp")
898
- })
1036
+ db.insert_row(
1037
+ "discovery_patterns",
1038
+ {
1039
+ "agent_id": discovery_session.get("agent_id"),
1040
+ "goal": discovery_session.get("goal"),
1041
+ "focus_area": discovery_session.get("focus_area"),
1042
+ "depth": discovery_session.get("depth"),
1043
+ "steps_completed": str(discovery_session.get("steps_completed", [])),
1044
+ "success": True,
1045
+ "timestamp": discovery_session.get("timestamp"),
1046
+ },
1047
+ )
899
1048
  except Exception:
900
1049
  # Silently fail if learning storage isn't available
901
1050
  pass
@@ -904,14 +1053,18 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
904
1053
  def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[str, Any]:
905
1054
  """Customize template for specific domain or topic."""
906
1055
  customized = template.copy()
907
-
1056
+
908
1057
  # Add customization note
909
1058
  customized["customized_for"] = customize_for
910
1059
  customized["customization_note"] = f"Template customized for: {customize_for}"
911
-
1060
+
912
1061
  # Modify search queries in workflow to include customization
913
1062
  for step in customized.get("workflow", []):
914
- if step.get("tool") in ["auto_smart_search", "auto_semantic_search", "search_content"]:
1063
+ if step.get("tool") in [
1064
+ "auto_smart_search",
1065
+ "auto_semantic_search",
1066
+ "search_content",
1067
+ ]:
915
1068
  params = step.get("params", {})
916
1069
  if "query" in params and params["query"].startswith("REPLACE_WITH"):
917
1070
  # Keep the placeholder for user customization
@@ -919,35 +1072,36 @@ def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[st
919
1072
  elif "query" in params:
920
1073
  # Add customization to existing query
921
1074
  params["query"] = f"{customize_for} {params['query']}"
922
-
1075
+
923
1076
  return customized
924
1077
 
925
1078
 
926
1079
  # Relationship discovery helper functions
927
1080
 
1081
+
928
1082
  def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List[str]:
929
1083
  """Discover foreign key relationships."""
930
1084
  relationships = []
931
-
1085
+
932
1086
  try:
933
1087
  # Get target table schema
934
1088
  target_schema = db.describe_table(target_table)
935
1089
  if not target_schema.get("success"):
936
1090
  return relationships
937
-
1091
+
938
1092
  target_columns = target_schema.get("columns", [])
939
1093
  target_col_names = [col.get("name", "") for col in target_columns]
940
-
1094
+
941
1095
  # Check other tables for potential foreign key references
942
1096
  for other_table in all_tables:
943
1097
  if other_table == target_table:
944
1098
  continue
945
-
1099
+
946
1100
  try:
947
1101
  other_schema = db.describe_table(other_table)
948
1102
  if other_schema.get("success"):
949
1103
  other_columns = other_schema.get("columns", [])
950
-
1104
+
951
1105
  for col in other_columns:
952
1106
  col_name = col.get("name", "")
953
1107
  # Look for naming patterns that suggest foreign keys
@@ -955,222 +1109,257 @@ def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List
955
1109
  potential_ref = col_name.replace("_id", "").replace("Id", "")
956
1110
  if potential_ref == target_table or f"{potential_ref}s" == target_table:
957
1111
  relationships.append(f"{other_table}.{col_name}")
958
-
1112
+
959
1113
  # Look for exact column name matches (potential shared keys)
960
1114
  if col_name in target_col_names and col_name != "id":
961
1115
  relationships.append(f"{other_table}.{col_name} (shared key)")
962
-
1116
+
963
1117
  except Exception:
964
1118
  continue
965
-
1119
+
966
1120
  except Exception:
967
1121
  pass
968
-
1122
+
969
1123
  return relationships
970
1124
 
971
1125
 
972
- def _discover_semantic_relationships(db, target_table: str, all_tables: List[str], threshold: float) -> List[Dict[str, Any]]:
1126
+ def _discover_semantic_relationships(
1127
+ db, target_table: str, all_tables: List[str], threshold: float
1128
+ ) -> List[Dict[str, Any]]:
973
1129
  """Discover semantic similarity relationships."""
974
1130
  relationships = []
975
-
1131
+
976
1132
  if not is_semantic_search_available():
977
1133
  return relationships
978
-
1134
+
979
1135
  try:
980
1136
  # Get sample content from target table
981
1137
  target_rows = db.read_rows(target_table)
982
1138
  if not target_rows.get("success") or not target_rows.get("rows"):
983
1139
  return relationships
984
-
1140
+
985
1141
  # Create a sample query from target table content
986
1142
  sample_row = target_rows["rows"][0]
987
1143
  sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[:200]
988
-
1144
+
989
1145
  if len(sample_text.strip()) < 10:
990
1146
  return relationships
991
-
1147
+
992
1148
  # Search for similar content in other tables
993
1149
  for other_table in all_tables:
994
1150
  if other_table == target_table:
995
1151
  continue
996
-
1152
+
997
1153
  try:
998
1154
  # Try semantic search in the other table
999
1155
  search_result = db.semantic_search(
1000
- sample_text, [other_table], "embedding", None, threshold, 3, "all-MiniLM-L6-v2"
1156
+ sample_text,
1157
+ [other_table],
1158
+ "embedding",
1159
+ None,
1160
+ threshold,
1161
+ 3,
1162
+ "all-MiniLM-L6-v2",
1001
1163
  )
1002
-
1164
+
1003
1165
  if search_result.get("success") and search_result.get("results"):
1004
1166
  results = search_result["results"]
1005
- avg_similarity = sum(r.get("similarity_score", 0) for r in results) / len(results)
1006
-
1167
+ avg_similarity = sum(r.get("similarity_score", 0) for r in results) / len(
1168
+ results
1169
+ )
1170
+
1007
1171
  if avg_similarity >= threshold:
1008
- relationships.append({
1009
- "table": other_table,
1010
- "similarity": round(avg_similarity, 2),
1011
- "related_content_count": len(results)
1012
- })
1013
-
1172
+ relationships.append(
1173
+ {
1174
+ "table": other_table,
1175
+ "similarity": round(avg_similarity, 2),
1176
+ "related_content_count": len(results),
1177
+ }
1178
+ )
1179
+
1014
1180
  except Exception:
1015
1181
  continue
1016
-
1182
+
1017
1183
  except Exception:
1018
1184
  pass
1019
-
1185
+
1020
1186
  return relationships
1021
1187
 
1022
1188
 
1023
1189
  def _discover_temporal_relationships(db, target_table: str, all_tables: List[str]) -> List[str]:
1024
1190
  """Discover temporal pattern relationships."""
1025
1191
  relationships = []
1026
-
1192
+
1027
1193
  try:
1028
1194
  # Check if target table has timestamp columns
1029
1195
  target_schema = db.describe_table(target_table)
1030
1196
  if not target_schema.get("success"):
1031
1197
  return relationships
1032
-
1198
+
1033
1199
  target_columns = target_schema.get("columns", [])
1034
- target_has_timestamp = any("timestamp" in col.get("name", "").lower() or
1035
- "date" in col.get("name", "").lower() or
1036
- "time" in col.get("name", "").lower()
1037
- for col in target_columns)
1038
-
1200
+ target_has_timestamp = any(
1201
+ "timestamp" in col.get("name", "").lower()
1202
+ or "date" in col.get("name", "").lower()
1203
+ or "time" in col.get("name", "").lower()
1204
+ for col in target_columns
1205
+ )
1206
+
1039
1207
  if not target_has_timestamp:
1040
1208
  return relationships
1041
-
1209
+
1042
1210
  # Check other tables for similar timestamp patterns
1043
1211
  for other_table in all_tables:
1044
1212
  if other_table == target_table:
1045
1213
  continue
1046
-
1214
+
1047
1215
  try:
1048
1216
  other_schema = db.describe_table(other_table)
1049
1217
  if other_schema.get("success"):
1050
1218
  other_columns = other_schema.get("columns", [])
1051
- other_has_timestamp = any("timestamp" in col.get("name", "").lower() or
1052
- "date" in col.get("name", "").lower() or
1053
- "time" in col.get("name", "").lower()
1054
- for col in other_columns)
1055
-
1219
+ other_has_timestamp = any(
1220
+ "timestamp" in col.get("name", "").lower()
1221
+ or "date" in col.get("name", "").lower()
1222
+ or "time" in col.get("name", "").lower()
1223
+ for col in other_columns
1224
+ )
1225
+
1056
1226
  if other_has_timestamp:
1057
1227
  relationships.append(other_table)
1058
-
1228
+
1059
1229
  except Exception:
1060
1230
  continue
1061
-
1231
+
1062
1232
  except Exception:
1063
1233
  pass
1064
-
1234
+
1065
1235
  return relationships
1066
1236
 
1067
1237
 
1068
1238
  def _discover_naming_relationships(target_table: str, all_tables: List[str]) -> List[str]:
1069
1239
  """Discover relationships based on naming conventions."""
1070
1240
  relationships = []
1071
-
1241
+
1072
1242
  # Look for tables with similar names or naming patterns
1073
1243
  target_lower = target_table.lower()
1074
-
1244
+
1075
1245
  for other_table in all_tables:
1076
1246
  if other_table == target_table:
1077
1247
  continue
1078
-
1248
+
1079
1249
  other_lower = other_table.lower()
1080
-
1250
+
1081
1251
  # Check for plural/singular relationships
1082
- if (target_lower.endswith('s') and other_lower == target_lower[:-1]) or \
1083
- (other_lower.endswith('s') and target_lower == other_lower[:-1]):
1252
+ if (target_lower.endswith("s") and other_lower == target_lower[:-1]) or (
1253
+ other_lower.endswith("s") and target_lower == other_lower[:-1]
1254
+ ):
1084
1255
  relationships.append(other_table)
1085
1256
  continue
1086
-
1257
+
1087
1258
  # Check for common prefixes or suffixes
1088
1259
  if len(target_lower) > 3 and len(other_lower) > 3:
1089
1260
  # Common prefix (at least 4 characters)
1090
1261
  if target_lower[:4] == other_lower[:4]:
1091
1262
  relationships.append(other_table)
1092
1263
  continue
1093
-
1264
+
1094
1265
  # Common suffix (at least 4 characters)
1095
1266
  if target_lower[-4:] == other_lower[-4:]:
1096
1267
  relationships.append(other_table)
1097
1268
  continue
1098
-
1269
+
1099
1270
  # Check for semantic name relationships
1100
- name_words = set(target_lower.split('_'))
1101
- other_words = set(other_lower.split('_'))
1102
-
1271
+ name_words = set(target_lower.split("_"))
1272
+ other_words = set(other_lower.split("_"))
1273
+
1103
1274
  # If tables share significant word overlap
1104
1275
  if len(name_words.intersection(other_words)) > 0:
1105
1276
  relationships.append(other_table)
1106
-
1277
+
1107
1278
  return relationships
1108
1279
 
1109
1280
 
1110
- def _identify_strongest_connections(relationships: Dict[str, Any]) -> List[Dict[str, Any]]:
1281
+ def _identify_strongest_connections(
1282
+ relationships: Dict[str, Any],
1283
+ ) -> List[Dict[str, Any]]:
1111
1284
  """Identify the strongest connections across all relationships."""
1112
1285
  connections = []
1113
-
1286
+
1114
1287
  for table, rels in relationships.items():
1115
1288
  # Count total connections for this table
1116
- total_connections = (len(rels.get("foreign_key_refs", [])) +
1117
- len(rels.get("semantic_similar", [])) +
1118
- len(rels.get("temporal_related", [])) +
1119
- len(rels.get("naming_related", [])))
1120
-
1289
+ total_connections = (
1290
+ len(rels.get("foreign_key_refs", []))
1291
+ + len(rels.get("semantic_similar", []))
1292
+ + len(rels.get("temporal_related", []))
1293
+ + len(rels.get("naming_related", []))
1294
+ )
1295
+
1121
1296
  if total_connections > 0:
1122
- connections.append({
1123
- "table": table,
1124
- "total_connections": total_connections,
1125
- "connection_types": {
1126
- "structural": len(rels.get("foreign_key_refs", [])),
1127
- "semantic": len(rels.get("semantic_similar", [])),
1128
- "temporal": len(rels.get("temporal_related", [])),
1129
- "naming": len(rels.get("naming_related", []))
1297
+ connections.append(
1298
+ {
1299
+ "table": table,
1300
+ "total_connections": total_connections,
1301
+ "connection_types": {
1302
+ "structural": len(rels.get("foreign_key_refs", [])),
1303
+ "semantic": len(rels.get("semantic_similar", [])),
1304
+ "temporal": len(rels.get("temporal_related", [])),
1305
+ "naming": len(rels.get("naming_related", [])),
1306
+ },
1130
1307
  }
1131
- })
1132
-
1308
+ )
1309
+
1133
1310
  # Sort by total connections and return top 5
1134
1311
  connections.sort(key=lambda x: x["total_connections"], reverse=True)
1135
1312
  return connections[:5]
1136
1313
 
1137
1314
 
1138
- def _generate_relationship_recommendations(relationships: Dict[str, Any], insights: List[str]) -> List[str]:
1315
+ def _generate_relationship_recommendations(
1316
+ relationships: Dict[str, Any], insights: List[str]
1317
+ ) -> List[str]:
1139
1318
  """Generate actionable recommendations based on discovered relationships."""
1140
1319
  recommendations = []
1141
-
1320
+
1142
1321
  # Find tables with many connections
1143
1322
  highly_connected = []
1144
1323
  for table, rels in relationships.items():
1145
- total_connections = (len(rels.get("foreign_key_refs", [])) +
1146
- len(rels.get("semantic_similar", [])) +
1147
- len(rels.get("temporal_related", [])) +
1148
- len(rels.get("naming_related", [])))
1324
+ total_connections = (
1325
+ len(rels.get("foreign_key_refs", []))
1326
+ + len(rels.get("semantic_similar", []))
1327
+ + len(rels.get("temporal_related", []))
1328
+ + len(rels.get("naming_related", []))
1329
+ )
1149
1330
  if total_connections >= 3:
1150
1331
  highly_connected.append(table)
1151
-
1332
+
1152
1333
  if highly_connected:
1153
- recommendations.append(f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}")
1154
-
1334
+ recommendations.append(
1335
+ f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}"
1336
+ )
1337
+
1155
1338
  # Find tables with semantic relationships
1156
1339
  semantic_tables = []
1157
1340
  for table, rels in relationships.items():
1158
1341
  if rels.get("semantic_similar"):
1159
1342
  semantic_tables.append(table)
1160
-
1343
+
1161
1344
  if semantic_tables:
1162
- recommendations.append(f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}")
1163
-
1345
+ recommendations.append(
1346
+ f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}"
1347
+ )
1348
+
1164
1349
  # Find tables with temporal relationships
1165
1350
  temporal_tables = []
1166
1351
  for table, rels in relationships.items():
1167
1352
  if rels.get("temporal_related"):
1168
1353
  temporal_tables.append(table)
1169
-
1354
+
1170
1355
  if temporal_tables:
1171
- recommendations.append(f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}")
1172
-
1356
+ recommendations.append(
1357
+ f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}"
1358
+ )
1359
+
1173
1360
  if not recommendations:
1174
- recommendations.append("Consider adding more structured relationships or content to improve discoverability")
1175
-
1361
+ recommendations.append(
1362
+ "Consider adding more structured relationships or content to improve discoverability"
1363
+ )
1364
+
1176
1365
  return recommendations