mcp-sqlite-memory-bank 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,14 +35,14 @@ def intelligent_discovery(
35
35
  Args:
36
36
  discovery_goal (str): What you want to achieve
37
37
  - "understand_content": Learn what data is available and how it's organized
38
- - "find_patterns": Discover themes, relationships, and content patterns
38
+ - "find_patterns": Discover themes, relationships, and content patterns
39
39
  - "explore_structure": Understand database schema and organization
40
40
  - "assess_quality": Evaluate content quality and completeness
41
41
  - "prepare_search": Get ready for effective content searching
42
42
  focus_area (Optional[str]): Specific table or topic to focus on (default: all)
43
43
  depth (str): How thorough the discovery should be
44
44
  - "quick": Fast overview with key insights
45
- - "moderate": Balanced analysis with actionable recommendations
45
+ - "moderate": Balanced analysis with actionable recommendations
46
46
  - "comprehensive": Deep dive with detailed analysis
47
47
  agent_id (Optional[str]): Agent identifier for learning discovery patterns
48
48
 
@@ -73,8 +73,9 @@ def intelligent_discovery(
73
73
  """
74
74
  try:
75
75
  from .. import server
76
+
76
77
  db = get_database(server.DB_PATH)
77
-
78
+
78
79
  # Initialize discovery session
79
80
  discovery_session = {
80
81
  "goal": discovery_goal,
@@ -84,97 +85,111 @@ def intelligent_discovery(
84
85
  "agent_id": agent_id,
85
86
  "steps_completed": [],
86
87
  "insights": [],
87
- "recommendations": []
88
+ "recommendations": [],
88
89
  }
89
-
90
+
90
91
  # Step 1: Basic overview
91
92
  discovery_session["steps_completed"].append("basic_overview")
92
93
  tables_result = db.list_tables()
93
94
  if not tables_result.get("success"):
94
- return cast(ToolResponse, {
95
- "success": False,
96
- "error": "Failed to get basic overview",
97
- "category": "DISCOVERY_ERROR",
98
- "details": tables_result
99
- })
100
-
95
+ return cast(
96
+ ToolResponse,
97
+ {
98
+ "success": False,
99
+ "error": "Failed to get basic overview",
100
+ "category": "DISCOVERY_ERROR",
101
+ "details": tables_result,
102
+ },
103
+ )
104
+
101
105
  tables = tables_result.get("tables", [])
102
106
  overview = {
103
107
  "total_tables": len(tables),
104
108
  "available_tables": tables,
105
- "semantic_search_available": is_semantic_search_available()
109
+ "semantic_search_available": is_semantic_search_available(),
106
110
  }
107
-
111
+
108
112
  # Step 2: Content analysis based on goal
109
113
  if discovery_goal in ["understand_content", "find_patterns", "assess_quality"]:
110
114
  discovery_session["steps_completed"].append("content_analysis")
111
- content_analysis = _analyze_content_for_discovery(db, tables, focus_area, depth)
115
+ content_analysis = _analyze_content_for_discovery(
116
+ db, tables, focus_area, depth
117
+ )
112
118
  overview.update(content_analysis)
113
-
119
+
114
120
  # Step 3: Schema analysis for structure exploration
115
121
  if discovery_goal in ["explore_structure", "understand_content"]:
116
122
  discovery_session["steps_completed"].append("schema_analysis")
117
- schema_analysis = _analyze_schema_for_discovery(db, tables, focus_area, depth)
123
+ schema_analysis = _analyze_schema_for_discovery(
124
+ db, tables, focus_area, depth
125
+ )
118
126
  overview.update(schema_analysis)
119
-
127
+
120
128
  # Step 4: Quality assessment
121
129
  if discovery_goal in ["assess_quality", "find_patterns"]:
122
130
  discovery_session["steps_completed"].append("quality_assessment")
123
131
  quality_analysis = _assess_content_quality(db, tables, focus_area, depth)
124
132
  overview.update(quality_analysis)
125
-
133
+
126
134
  # Step 5: Search readiness for search preparation
127
135
  if discovery_goal in ["prepare_search", "understand_content"]:
128
136
  discovery_session["steps_completed"].append("search_readiness")
129
137
  search_analysis = _analyze_search_readiness(db, tables, focus_area)
130
138
  overview.update(search_analysis)
131
-
139
+
132
140
  # Step 6: Generate insights and recommendations
133
141
  insights, recommendations, next_steps = _generate_discovery_insights(
134
142
  discovery_goal, overview, focus_area, depth
135
143
  )
136
-
144
+
137
145
  discovery_session["insights"] = insights
138
146
  discovery_session["recommendations"] = recommendations
139
-
147
+
140
148
  # Step 7: Store discovery pattern for learning (if agent_id provided)
141
149
  if agent_id:
142
150
  _store_discovery_pattern(db, discovery_session)
143
-
144
- return cast(ToolResponse, {
145
- "success": True,
146
- "discovery": {
147
- "goal": discovery_goal,
148
- "overview": overview,
149
- "insights": insights,
150
- "recommendations": recommendations,
151
- "focus_area": focus_area,
152
- "depth": depth,
153
- "steps_completed": discovery_session["steps_completed"]
151
+
152
+ return cast(
153
+ ToolResponse,
154
+ {
155
+ "success": True,
156
+ "discovery": {
157
+ "goal": discovery_goal,
158
+ "overview": overview,
159
+ "insights": insights,
160
+ "recommendations": recommendations,
161
+ "focus_area": focus_area,
162
+ "depth": depth,
163
+ "steps_completed": discovery_session["steps_completed"],
164
+ },
165
+ "next_steps": next_steps,
166
+ "discovery_session": discovery_session,
167
+ "quick_actions": _generate_quick_actions(
168
+ discovery_goal, overview, focus_area
169
+ ),
154
170
  },
155
- "next_steps": next_steps,
156
- "discovery_session": discovery_session,
157
- "quick_actions": _generate_quick_actions(discovery_goal, overview, focus_area)
158
- })
159
-
171
+ )
172
+
160
173
  except Exception as e:
161
- return cast(ToolResponse, {
162
- "success": False,
163
- "error": f"Intelligent discovery failed: {str(e)}",
164
- "category": "DISCOVERY_ERROR",
165
- "details": {
166
- "goal": discovery_goal,
167
- "focus_area": focus_area,
168
- "depth": depth,
169
- "agent_id": agent_id
170
- }
171
- })
174
+ return cast(
175
+ ToolResponse,
176
+ {
177
+ "success": False,
178
+ "error": f"Intelligent discovery failed: {str(e)}",
179
+ "category": "DISCOVERY_ERROR",
180
+ "details": {
181
+ "goal": discovery_goal,
182
+ "focus_area": focus_area,
183
+ "depth": depth,
184
+ "agent_id": agent_id,
185
+ },
186
+ },
187
+ )
172
188
 
173
189
 
174
190
  @catch_errors
175
191
  def discovery_templates(
176
- template_type: str = "first_time_exploration",
177
- customize_for: Optional[str] = None
192
+ template_type: str = "first_time_exploration", customize_for: Optional[str] = None
178
193
  ) -> ToolResponse:
179
194
  """
180
195
  📋 **DISCOVERY TEMPLATES** - Pre-built exploration workflows for common scenarios!
@@ -207,7 +222,7 @@ def discovery_templates(
207
222
  }}
208
223
 
209
224
  FastMCP Tool Info:
210
- - **PROVEN WORKFLOWS**: Battle-tested discovery sequences
225
+ - **PROVEN WORKFLOWS**: Battle-tested discovery sequences
211
226
  - **STEP-BY-STEP GUIDANCE**: Exact tools and parameters to use
212
227
  - **CUSTOMIZABLE**: Adapt templates to your specific needs
213
228
  - **LEARNING-OPTIMIZED**: Based on successful discovery patterns
@@ -223,9 +238,12 @@ def discovery_templates(
223
238
  "step": 1,
224
239
  "action": "Get Overview",
225
240
  "tool": "intelligent_discovery",
226
- "params": {"discovery_goal": "understand_content", "depth": "moderate"},
241
+ "params": {
242
+ "discovery_goal": "understand_content",
243
+ "depth": "moderate",
244
+ },
227
245
  "purpose": "Understand what data is available and how it's organized",
228
- "look_for": ["total tables", "content types", "data volume"]
246
+ "look_for": ["total tables", "content types", "data volume"],
229
247
  },
230
248
  {
231
249
  "step": 2,
@@ -233,7 +251,11 @@ def discovery_templates(
233
251
  "tool": "explore_tables",
234
252
  "params": {"include_row_counts": True},
235
253
  "purpose": "See detailed table schemas and sample data",
236
- "look_for": ["column types", "sample content", "data relationships"]
254
+ "look_for": [
255
+ "column types",
256
+ "sample content",
257
+ "data relationships",
258
+ ],
237
259
  },
238
260
  {
239
261
  "step": 3,
@@ -241,7 +263,11 @@ def discovery_templates(
241
263
  "tool": "auto_smart_search",
242
264
  "params": {"query": "recent important information", "limit": 5},
243
265
  "purpose": "Understand search capabilities and content accessibility",
244
- "look_for": ["search quality", "result relevance", "content types found"]
266
+ "look_for": [
267
+ "search quality",
268
+ "result relevance",
269
+ "content types found",
270
+ ],
245
271
  },
246
272
  {
247
273
  "step": 4,
@@ -249,17 +275,20 @@ def discovery_templates(
249
275
  "tool": "get_content_health_score",
250
276
  "params": {},
251
277
  "purpose": "Understand overall memory bank quality and opportunities",
252
- "look_for": ["health score", "improvement recommendations", "strengths"]
253
- }
278
+ "look_for": [
279
+ "health score",
280
+ "improvement recommendations",
281
+ "strengths",
282
+ ],
283
+ },
254
284
  ],
255
285
  "success_criteria": [
256
286
  "Understand what types of information are stored",
257
287
  "Know which tables contain the most valuable content",
258
288
  "Identify best search strategies for this memory bank",
259
- "Have actionable next steps for productive use"
260
- ]
289
+ "Have actionable next steps for productive use",
290
+ ],
261
291
  },
262
-
263
292
  "content_audit": {
264
293
  "name": "Content Quality Audit",
265
294
  "description": "Systematic review of content quality and completeness",
@@ -271,7 +300,11 @@ def discovery_templates(
271
300
  "tool": "get_content_health_score",
272
301
  "params": {},
273
302
  "purpose": "Get overall quality metrics and problem areas",
274
- "look_for": ["quality scores", "problem tables", "recommendations"]
303
+ "look_for": [
304
+ "quality scores",
305
+ "problem tables",
306
+ "recommendations",
307
+ ],
275
308
  },
276
309
  {
277
310
  "step": 2,
@@ -279,7 +312,11 @@ def discovery_templates(
279
312
  "tool": "analyze_memory_patterns",
280
313
  "params": {},
281
314
  "purpose": "Identify content patterns and organizational issues",
282
- "look_for": ["content distribution", "sparse tables", "organization gaps"]
315
+ "look_for": [
316
+ "content distribution",
317
+ "sparse tables",
318
+ "organization gaps",
319
+ ],
283
320
  },
284
321
  {
285
322
  "step": 3,
@@ -287,25 +324,34 @@ def discovery_templates(
287
324
  "tool": "explore_tables",
288
325
  "params": {"include_row_counts": True},
289
326
  "purpose": "Detailed examination of each table's content",
290
- "look_for": ["empty tables", "low-quality content", "missing data"]
327
+ "look_for": [
328
+ "empty tables",
329
+ "low-quality content",
330
+ "missing data",
331
+ ],
291
332
  },
292
333
  {
293
334
  "step": 4,
294
335
  "action": "Search Readiness",
295
336
  "tool": "intelligent_discovery",
296
- "params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
337
+ "params": {
338
+ "discovery_goal": "prepare_search",
339
+ "depth": "comprehensive",
340
+ },
297
341
  "purpose": "Ensure content is optimally searchable",
298
- "look_for": ["embedding coverage", "search optimization opportunities"]
299
- }
342
+ "look_for": [
343
+ "embedding coverage",
344
+ "search optimization opportunities",
345
+ ],
346
+ },
300
347
  ],
301
348
  "success_criteria": [
302
349
  "Identify all content quality issues",
303
350
  "Have specific recommendations for improvement",
304
351
  "Understand which content areas need attention",
305
- "Know how to optimize for better searchability"
306
- ]
352
+ "Know how to optimize for better searchability",
353
+ ],
307
354
  },
308
-
309
355
  "search_optimization": {
310
356
  "name": "Search Optimization Setup",
311
357
  "description": "Prepare memory bank for optimal content discovery and searching",
@@ -315,9 +361,16 @@ def discovery_templates(
315
361
  "step": 1,
316
362
  "action": "Search Capability Assessment",
317
363
  "tool": "intelligent_discovery",
318
- "params": {"discovery_goal": "prepare_search", "depth": "comprehensive"},
364
+ "params": {
365
+ "discovery_goal": "prepare_search",
366
+ "depth": "comprehensive",
367
+ },
319
368
  "purpose": "Understand current search capabilities and gaps",
320
- "look_for": ["semantic readiness", "text column identification", "embedding status"]
369
+ "look_for": [
370
+ "semantic readiness",
371
+ "text column identification",
372
+ "embedding status",
373
+ ],
321
374
  },
322
375
  {
323
376
  "step": 2,
@@ -325,7 +378,11 @@ def discovery_templates(
325
378
  "tool": "analyze_memory_patterns",
326
379
  "params": {},
327
380
  "purpose": "Identify high-value content for search optimization",
328
- "look_for": ["text-rich tables", "high-value content", "search opportunities"]
381
+ "look_for": [
382
+ "text-rich tables",
383
+ "high-value content",
384
+ "search opportunities",
385
+ ],
329
386
  },
330
387
  {
331
388
  "step": 3,
@@ -333,7 +390,7 @@ def discovery_templates(
333
390
  "tool": "search_content",
334
391
  "params": {"query": "test search capabilities", "limit": 10},
335
392
  "purpose": "Baseline current search performance",
336
- "look_for": ["search result quality", "coverage", "relevance"]
393
+ "look_for": ["search result quality", "coverage", "relevance"],
337
394
  },
338
395
  {
339
396
  "step": 4,
@@ -341,17 +398,19 @@ def discovery_templates(
341
398
  "tool": "auto_semantic_search",
342
399
  "params": {"query": "important valuable content", "limit": 5},
343
400
  "purpose": "Enable and test semantic search capabilities",
344
- "look_for": ["automatic embedding generation", "semantic result quality"]
345
- }
401
+ "look_for": [
402
+ "automatic embedding generation",
403
+ "semantic result quality",
404
+ ],
405
+ },
346
406
  ],
347
407
  "success_criteria": [
348
408
  "Semantic search is enabled for key tables",
349
409
  "Both keyword and semantic search work effectively",
350
410
  "Search performance meets quality standards",
351
- "Clear strategy for ongoing search optimization"
352
- ]
411
+ "Clear strategy for ongoing search optimization",
412
+ ],
353
413
  },
354
-
355
414
  "problem_solving": {
356
415
  "name": "Problem-Solving Discovery",
357
416
  "description": "Find information to solve specific problems or answer questions",
@@ -361,25 +420,45 @@ def discovery_templates(
361
420
  "step": 1,
362
421
  "action": "Quick Content Survey",
363
422
  "tool": "intelligent_discovery",
364
- "params": {"discovery_goal": "understand_content", "depth": "quick"},
423
+ "params": {
424
+ "discovery_goal": "understand_content",
425
+ "depth": "quick",
426
+ },
365
427
  "purpose": "Rapid overview of available information",
366
- "look_for": ["relevant content areas", "potential information sources"]
428
+ "look_for": [
429
+ "relevant content areas",
430
+ "potential information sources",
431
+ ],
367
432
  },
368
433
  {
369
434
  "step": 2,
370
435
  "action": "Targeted Search",
371
436
  "tool": "auto_smart_search",
372
- "params": {"query": "REPLACE_WITH_PROBLEM_KEYWORDS", "limit": 10},
437
+ "params": {
438
+ "query": "REPLACE_WITH_PROBLEM_KEYWORDS",
439
+ "limit": 10,
440
+ },
373
441
  "purpose": "Find directly relevant information",
374
- "look_for": ["directly applicable content", "related information", "context clues"]
442
+ "look_for": [
443
+ "directly applicable content",
444
+ "related information",
445
+ "context clues",
446
+ ],
375
447
  },
376
448
  {
377
449
  "step": 3,
378
450
  "action": "Related Content Discovery",
379
451
  "tool": "auto_semantic_search",
380
- "params": {"query": "REPLACE_WITH_CONCEPTUAL_TERMS", "similarity_threshold": 0.3},
452
+ "params": {
453
+ "query": "REPLACE_WITH_CONCEPTUAL_TERMS",
454
+ "similarity_threshold": 0.3,
455
+ },
381
456
  "purpose": "Find conceptually related information",
382
- "look_for": ["broader context", "related concepts", "background information"]
457
+ "look_for": [
458
+ "broader context",
459
+ "related concepts",
460
+ "background information",
461
+ ],
383
462
  },
384
463
  {
385
464
  "step": 4,
@@ -387,60 +466,80 @@ def discovery_templates(
387
466
  "tool": "explore_tables",
388
467
  "params": {"include_row_counts": True},
389
468
  "purpose": "Identify what information might be missing",
390
- "look_for": ["information gaps", "additional context sources", "related data"]
391
- }
469
+ "look_for": [
470
+ "information gaps",
471
+ "additional context sources",
472
+ "related data",
473
+ ],
474
+ },
392
475
  ],
393
476
  "customization_note": "Replace REPLACE_WITH_PROBLEM_KEYWORDS and REPLACE_WITH_CONCEPTUAL_TERMS with your specific problem terms",
394
477
  "success_criteria": [
395
478
  "Found directly relevant information",
396
479
  "Identified related/contextual information",
397
480
  "Understand what information might be missing",
398
- "Have clear next steps for problem resolution"
399
- ]
400
- }
481
+ "Have clear next steps for problem resolution",
482
+ ],
483
+ },
401
484
  }
402
-
485
+
403
486
  if template_type not in templates:
404
487
  available_templates = list(templates.keys())
405
- return cast(ToolResponse, {
406
- "success": False,
407
- "error": f"Template '{template_type}' not found",
408
- "category": "TEMPLATE_ERROR",
409
- "details": {
410
- "available_templates": available_templates,
411
- "requested_template": template_type
412
- }
413
- })
414
-
488
+ return cast(
489
+ ToolResponse,
490
+ {
491
+ "success": False,
492
+ "error": f"Template '{template_type}' not found",
493
+ "category": "TEMPLATE_ERROR",
494
+ "details": {
495
+ "available_templates": available_templates,
496
+ "requested_template": template_type,
497
+ },
498
+ },
499
+ )
500
+
415
501
  template = templates[template_type]
416
-
502
+
417
503
  # Customize template if requested
418
504
  if customize_for:
419
505
  template = _customize_template(template, customize_for)
420
-
421
- return cast(ToolResponse, {
422
- "success": True,
423
- "template": template,
424
- "template_type": template_type,
425
- "customized_for": customize_for,
426
- "available_templates": list(templates.keys()),
427
- "usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation"
428
- })
429
-
506
+
507
+ return cast(
508
+ ToolResponse,
509
+ {
510
+ "success": True,
511
+ "template": template,
512
+ "template_type": template_type,
513
+ "customized_for": customize_for,
514
+ "available_templates": list(templates.keys()),
515
+ "usage_tip": "Follow the workflow steps in order, adapting parameters as needed for your specific situation",
516
+ },
517
+ )
518
+
430
519
  except Exception as e:
431
- return cast(ToolResponse, {
432
- "success": False,
433
- "error": f"Discovery template generation failed: {str(e)}",
434
- "category": "TEMPLATE_ERROR",
435
- "details": {"template_type": template_type, "customize_for": customize_for}
436
- })
520
+ return cast(
521
+ ToolResponse,
522
+ {
523
+ "success": False,
524
+ "error": f"Discovery template generation failed: {str(e)}",
525
+ "category": "TEMPLATE_ERROR",
526
+ "details": {
527
+ "template_type": template_type,
528
+ "customize_for": customize_for,
529
+ },
530
+ },
531
+ )
437
532
 
438
533
 
439
534
  @catch_errors
440
535
  def discover_relationships(
441
536
  table_name: Optional[str] = None,
442
- relationship_types: List[str] = ["foreign_keys", "semantic_similarity", "temporal_patterns"],
443
- similarity_threshold: float = 0.6
537
+ relationship_types: List[str] = [
538
+ "foreign_keys",
539
+ "semantic_similarity",
540
+ "temporal_patterns",
541
+ ],
542
+ similarity_threshold: float = 0.6,
444
543
  ) -> ToolResponse:
445
544
  """
446
545
  🔗 **RELATIONSHIP DISCOVERY** - Find hidden connections in your data!
@@ -478,113 +577,148 @@ def discover_relationships(
478
577
  """
479
578
  try:
480
579
  from .. import server
580
+
481
581
  db = get_database(server.DB_PATH)
482
-
582
+
483
583
  # Get all tables or focus on specific table
484
584
  tables_result = db.list_tables()
485
585
  if not tables_result.get("success"):
486
586
  return cast(ToolResponse, tables_result)
487
-
587
+
488
588
  all_tables = tables_result.get("tables", [])
489
589
  target_tables = [table_name] if table_name else all_tables
490
-
590
+
491
591
  relationships = {}
492
592
  insights = []
493
-
593
+
494
594
  for target_table in target_tables:
495
595
  if target_table not in all_tables:
496
596
  continue
497
-
597
+
498
598
  table_relationships = {
499
599
  "foreign_key_refs": [],
500
600
  "semantic_similar": [],
501
601
  "temporal_related": [],
502
- "naming_related": []
602
+ "naming_related": [],
503
603
  }
504
-
604
+
505
605
  # Discover foreign key relationships
506
606
  if "foreign_keys" in relationship_types:
507
607
  fk_relationships = _discover_foreign_keys(db, target_table, all_tables)
508
608
  table_relationships["foreign_key_refs"] = fk_relationships
509
609
  if fk_relationships:
510
- insights.append(f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables")
511
-
610
+ insights.append(
611
+ f"Table '{target_table}' has structural relationships with {len(fk_relationships)} other tables"
612
+ )
613
+
512
614
  # Discover semantic similarity relationships
513
- if "semantic_similarity" in relationship_types and is_semantic_search_available():
615
+ if (
616
+ "semantic_similarity" in relationship_types
617
+ and is_semantic_search_available()
618
+ ):
514
619
  semantic_relationships = _discover_semantic_relationships(
515
620
  db, target_table, all_tables, similarity_threshold
516
621
  )
517
622
  table_relationships["semantic_similar"] = semantic_relationships
518
623
  if semantic_relationships:
519
- insights.append(f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables")
520
-
624
+ insights.append(
625
+ f"Table '{target_table}' has semantic similarity with {len(semantic_relationships)} tables"
626
+ )
627
+
521
628
  # Discover temporal patterns
522
629
  if "temporal_patterns" in relationship_types:
523
- temporal_relationships = _discover_temporal_relationships(db, target_table, all_tables)
630
+ temporal_relationships = _discover_temporal_relationships(
631
+ db, target_table, all_tables
632
+ )
524
633
  table_relationships["temporal_related"] = temporal_relationships
525
634
  if temporal_relationships:
526
- insights.append(f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables")
527
-
635
+ insights.append(
636
+ f"Table '{target_table}' shows temporal patterns with {len(temporal_relationships)} tables"
637
+ )
638
+
528
639
  # Discover naming pattern relationships
529
640
  if "naming_patterns" in relationship_types:
530
- naming_relationships = _discover_naming_relationships(target_table, all_tables)
641
+ naming_relationships = _discover_naming_relationships(
642
+ target_table, all_tables
643
+ )
531
644
  table_relationships["naming_related"] = naming_relationships
532
645
  if naming_relationships:
533
- insights.append(f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables")
534
-
646
+ insights.append(
647
+ f"Table '{target_table}' has naming pattern relationships with {len(naming_relationships)} tables"
648
+ )
649
+
535
650
  relationships[target_table] = table_relationships
536
-
651
+
537
652
  # Generate relationship insights
538
653
  total_relationships = sum(
539
- len(rel["foreign_key_refs"]) + len(rel["semantic_similar"]) +
540
- len(rel["temporal_related"]) + len(rel["naming_related"])
654
+ len(rel["foreign_key_refs"])
655
+ + len(rel["semantic_similar"])
656
+ + len(rel["temporal_related"])
657
+ + len(rel["naming_related"])
541
658
  for rel in relationships.values()
542
659
  )
543
-
660
+
544
661
  if total_relationships == 0:
545
- insights.append("No strong relationships discovered. Consider adding more content or setting up semantic search.")
662
+ insights.append(
663
+ "No strong relationships discovered. Consider adding more content or setting up semantic search."
664
+ )
546
665
  else:
547
- insights.append(f"Discovered {total_relationships} total relationships across {len(relationships)} tables")
548
-
549
- return cast(ToolResponse, {
550
- "success": True,
551
- "relationships": relationships,
552
- "insights": insights,
553
- "relationship_summary": {
554
- "total_relationships": total_relationships,
555
- "tables_analyzed": len(relationships),
556
- "strongest_connections": _identify_strongest_connections(relationships)
666
+ insights.append(
667
+ f"Discovered {total_relationships} total relationships across {len(relationships)} tables"
668
+ )
669
+
670
+ return cast(
671
+ ToolResponse,
672
+ {
673
+ "success": True,
674
+ "relationships": relationships,
675
+ "insights": insights,
676
+ "relationship_summary": {
677
+ "total_relationships": total_relationships,
678
+ "tables_analyzed": len(relationships),
679
+ "strongest_connections": _identify_strongest_connections(
680
+ relationships
681
+ ),
682
+ },
683
+ "recommendations": _generate_relationship_recommendations(
684
+ relationships, insights
685
+ ),
557
686
  },
558
- "recommendations": _generate_relationship_recommendations(relationships, insights)
559
- })
560
-
687
+ )
688
+
561
689
  except Exception as e:
562
- return cast(ToolResponse, {
563
- "success": False,
564
- "error": f"Relationship discovery failed: {str(e)}",
565
- "category": "RELATIONSHIP_ERROR",
566
- "details": {
567
- "table_name": table_name,
568
- "relationship_types": relationship_types,
569
- "similarity_threshold": similarity_threshold
570
- }
571
- })
690
+ return cast(
691
+ ToolResponse,
692
+ {
693
+ "success": False,
694
+ "error": f"Relationship discovery failed: {str(e)}",
695
+ "category": "RELATIONSHIP_ERROR",
696
+ "details": {
697
+ "table_name": table_name,
698
+ "relationship_types": relationship_types,
699
+ "similarity_threshold": similarity_threshold,
700
+ },
701
+ },
702
+ )
572
703
 
573
704
 
574
705
  # Helper functions for discovery orchestration
575
706
 
576
- def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
707
+
708
+ def _analyze_content_for_discovery(
709
+ db, tables: List[str], focus_area: Optional[str], depth: str
710
+ ) -> Dict[str, Any]:
577
711
  """Analyze content patterns and distribution."""
578
712
  content_analysis = {
579
713
  "total_rows": 0,
580
714
  "content_distribution": {},
581
715
  "text_rich_tables": [],
582
716
  "sparse_tables": [],
583
- "high_value_tables": []
717
+ "high_value_tables": [],
584
718
  }
585
-
719
+
586
720
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
587
-
721
+
588
722
  for table_name in target_tables:
589
723
  try:
590
724
  rows_result = db.read_rows(table_name)
@@ -593,109 +727,129 @@ def _analyze_content_for_discovery(db, tables: List[str], focus_area: Optional[s
593
727
  row_count = len(rows)
594
728
  content_analysis["total_rows"] += row_count
595
729
  content_analysis["content_distribution"][table_name] = row_count
596
-
730
+
597
731
  # Analyze content quality if depth allows
598
732
  if depth in ["moderate", "comprehensive"] and rows:
599
733
  # Sample content quality
600
734
  sample_size = min(3, len(rows))
601
735
  total_content_length = 0
602
-
736
+
603
737
  for row in rows[:sample_size]:
604
738
  for value in row.values():
605
739
  if isinstance(value, str):
606
740
  total_content_length += len(value)
607
-
608
- avg_content_length = total_content_length / sample_size if sample_size > 0 else 0
609
-
741
+
742
+ avg_content_length = (
743
+ total_content_length / sample_size if sample_size > 0 else 0
744
+ )
745
+
610
746
  if avg_content_length > 200:
611
747
  content_analysis["text_rich_tables"].append(table_name)
612
748
  if avg_content_length > 500:
613
749
  content_analysis["high_value_tables"].append(table_name)
614
750
  if row_count < 5:
615
751
  content_analysis["sparse_tables"].append(table_name)
616
-
752
+
617
753
  except Exception:
618
754
  continue
619
-
755
+
620
756
  return content_analysis
621
757
 
622
758
 
623
- def _analyze_schema_for_discovery(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
759
+ def _analyze_schema_for_discovery(
760
+ db, tables: List[str], focus_area: Optional[str], depth: str
761
+ ) -> Dict[str, Any]:
624
762
  """Analyze schema structure and organization."""
625
763
  schema_analysis = {
626
764
  "total_columns": 0,
627
765
  "text_columns_by_table": {},
628
766
  "well_structured_tables": [],
629
- "schema_issues": []
767
+ "schema_issues": [],
630
768
  }
631
-
769
+
632
770
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
633
-
771
+
634
772
  for table_name in target_tables:
635
773
  try:
636
774
  schema_result = db.describe_table(table_name)
637
775
  if schema_result.get("success"):
638
776
  columns = schema_result.get("columns", [])
639
777
  schema_analysis["total_columns"] += len(columns)
640
-
778
+
641
779
  # Find text columns
642
- text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
780
+ text_columns = [
781
+ col for col in columns if "TEXT" in col.get("type", "").upper()
782
+ ]
643
783
  schema_analysis["text_columns_by_table"][table_name] = len(text_columns)
644
-
784
+
645
785
  # Check for well-structured tables
646
786
  has_id = any(col.get("name") == "id" for col in columns)
647
- has_timestamp = any("timestamp" in col.get("name", "").lower() for col in columns)
787
+ has_timestamp = any(
788
+ "timestamp" in col.get("name", "").lower() for col in columns
789
+ )
648
790
  has_text_content = len(text_columns) > 0
649
-
791
+
650
792
  if has_id and has_timestamp and has_text_content:
651
793
  schema_analysis["well_structured_tables"].append(table_name)
652
-
794
+
653
795
  # Identify schema issues
654
796
  if len(columns) < 2:
655
- schema_analysis["schema_issues"].append(f"Table '{table_name}' has very few columns")
797
+ schema_analysis["schema_issues"].append(
798
+ f"Table '{table_name}' has very few columns"
799
+ )
656
800
  if not has_id:
657
- schema_analysis["schema_issues"].append(f"Table '{table_name}' lacks ID column")
658
-
801
+ schema_analysis["schema_issues"].append(
802
+ f"Table '{table_name}' lacks ID column"
803
+ )
804
+
659
805
  except Exception:
660
806
  continue
661
-
807
+
662
808
  return schema_analysis
663
809
 
664
810
 
665
- def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], depth: str) -> Dict[str, Any]:
811
+ def _assess_content_quality(
812
+ db, tables: List[str], focus_area: Optional[str], depth: str
813
+ ) -> Dict[str, Any]:
666
814
  """Assess overall content quality."""
667
815
  quality_analysis = {
668
816
  "quality_scores": {},
669
817
  "overall_quality": 0.0,
670
818
  "improvement_opportunities": [],
671
- "quality_distribution": {"high": 0, "medium": 0, "low": 0}
819
+ "quality_distribution": {"high": 0, "medium": 0, "low": 0},
672
820
  }
673
-
821
+
674
822
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
675
823
  total_score = 0
676
824
  table_count = 0
677
-
825
+
678
826
  for table_name in target_tables:
679
827
  try:
680
828
  rows_result = db.read_rows(table_name)
681
829
  if rows_result.get("success"):
682
830
  rows = rows_result.get("rows", [])
683
-
831
+
684
832
  if not rows:
685
833
  quality_analysis["quality_scores"][table_name] = 0.0
686
- quality_analysis["improvement_opportunities"].append(f"Table '{table_name}' is empty")
834
+ quality_analysis["improvement_opportunities"].append(
835
+ f"Table '{table_name}' is empty"
836
+ )
687
837
  quality_analysis["quality_distribution"]["low"] += 1
688
838
  continue
689
-
839
+
690
840
  # Calculate quality score
691
841
  sample_size = min(5, len(rows))
692
842
  content_scores = []
693
-
843
+
694
844
  for row in rows[:sample_size]:
695
845
  row_score = 0
696
- non_null_fields = sum(1 for v in row.values() if v is not None and str(v).strip())
697
- total_content_length = sum(len(str(v)) for v in row.values() if v is not None)
698
-
846
+ non_null_fields = sum(
847
+ 1 for v in row.values() if v is not None and str(v).strip()
848
+ )
849
+ total_content_length = sum(
850
+ len(str(v)) for v in row.values() if v is not None
851
+ )
852
+
699
853
  # Score based on completeness and content richness
700
854
  if non_null_fields > 2:
701
855
  row_score += 3
@@ -703,12 +857,14 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
703
857
  row_score += 4
704
858
  if total_content_length > 500:
705
859
  row_score += 3
706
-
860
+
707
861
  content_scores.append(min(10, row_score))
708
-
709
- table_quality = sum(content_scores) / len(content_scores) if content_scores else 0
862
+
863
+ table_quality = (
864
+ sum(content_scores) / len(content_scores) if content_scores else 0
865
+ )
710
866
  quality_analysis["quality_scores"][table_name] = round(table_quality, 1)
711
-
867
+
712
868
  # Categorize quality
713
869
  if table_quality >= 7:
714
870
  quality_analysis["quality_distribution"]["high"] += 1
@@ -719,164 +875,200 @@ def _assess_content_quality(db, tables: List[str], focus_area: Optional[str], de
719
875
  quality_analysis["improvement_opportunities"].append(
720
876
  f"Table '{table_name}' has low content quality (score: {table_quality:.1f})"
721
877
  )
722
-
878
+
723
879
  total_score += table_quality
724
880
  table_count += 1
725
-
881
+
726
882
  except Exception:
727
883
  continue
728
-
729
- quality_analysis["overall_quality"] = round(total_score / table_count, 1) if table_count > 0 else 0.0
730
-
884
+
885
+ quality_analysis["overall_quality"] = (
886
+ round(total_score / table_count, 1) if table_count > 0 else 0.0
887
+ )
888
+
731
889
  return quality_analysis
732
890
 
733
891
 
734
- def _analyze_search_readiness(db, tables: List[str], focus_area: Optional[str]) -> Dict[str, Any]:
892
+ def _analyze_search_readiness(
893
+ db, tables: List[str], focus_area: Optional[str]
894
+ ) -> Dict[str, Any]:
735
895
  """Analyze readiness for effective searching."""
736
896
  search_analysis = {
737
897
  "semantic_ready_tables": [],
738
898
  "text_searchable_tables": [],
739
899
  "search_optimization_needed": [],
740
- "embedding_coverage": {}
900
+ "embedding_coverage": {},
741
901
  }
742
-
902
+
743
903
  target_tables = [focus_area] if focus_area and focus_area in tables else tables
744
-
904
+
745
905
  for table_name in target_tables:
746
906
  try:
747
907
  # Check schema for text content
748
908
  schema_result = db.describe_table(table_name)
749
909
  if schema_result.get("success"):
750
910
  columns = schema_result.get("columns", [])
751
- text_columns = [col for col in columns if "TEXT" in col.get("type", "").upper()]
752
-
911
+ text_columns = [
912
+ col for col in columns if "TEXT" in col.get("type", "").upper()
913
+ ]
914
+
753
915
  if text_columns:
754
916
  search_analysis["text_searchable_tables"].append(table_name)
755
-
917
+
756
918
  # Check semantic search readiness if available
757
919
  if is_semantic_search_available():
758
920
  embedding_stats = db.get_embedding_stats(table_name)
759
921
  if embedding_stats.get("success"):
760
922
  coverage = embedding_stats.get("coverage_percent", 0)
761
923
  search_analysis["embedding_coverage"][table_name] = coverage
762
-
924
+
763
925
  if coverage > 80:
764
- search_analysis["semantic_ready_tables"].append(table_name)
926
+ search_analysis["semantic_ready_tables"].append(
927
+ table_name
928
+ )
765
929
  elif len(text_columns) > 0:
766
- search_analysis["search_optimization_needed"].append(table_name)
767
-
930
+ search_analysis["search_optimization_needed"].append(
931
+ table_name
932
+ )
933
+
768
934
  except Exception:
769
935
  continue
770
-
936
+
771
937
  return search_analysis
772
938
 
773
939
 
774
- def _generate_discovery_insights(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str) -> tuple:
940
+ def _generate_discovery_insights(
941
+ discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str], depth: str
942
+ ) -> tuple:
775
943
  """Generate insights and recommendations based on discovery results."""
776
944
  insights = []
777
945
  recommendations = []
778
946
  next_steps = []
779
-
947
+
780
948
  total_tables = overview.get("total_tables", 0)
781
949
  total_rows = overview.get("total_rows", 0)
782
-
950
+
783
951
  # Goal-specific insights
784
952
  if discovery_goal == "understand_content":
785
- insights.append(f"Memory bank contains {total_tables} tables with {total_rows} total rows")
786
-
953
+ insights.append(
954
+ f"Memory bank contains {total_tables} tables with {total_rows} total rows"
955
+ )
956
+
787
957
  high_value_tables = overview.get("high_value_tables", [])
788
958
  if high_value_tables:
789
- insights.append(f"High-value content found in: {', '.join(high_value_tables[:3])}")
790
- recommendations.append(f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}")
791
- next_steps.append(f"Use auto_smart_search() to explore content in {high_value_tables[0]}")
792
-
959
+ insights.append(
960
+ f"High-value content found in: {', '.join(high_value_tables[:3])}"
961
+ )
962
+ recommendations.append(
963
+ f"Focus search efforts on high-value tables: {', '.join(high_value_tables)}"
964
+ )
965
+ next_steps.append(
966
+ f"Use auto_smart_search() to explore content in {high_value_tables[0]}"
967
+ )
968
+
793
969
  sparse_tables = overview.get("sparse_tables", [])
794
970
  if sparse_tables:
795
971
  insights.append(f"Sparse tables detected: {', '.join(sparse_tables)}")
796
972
  recommendations.append("Consider consolidating or expanding sparse tables")
797
-
973
+
798
974
  elif discovery_goal == "find_patterns":
799
975
  text_rich_tables = overview.get("text_rich_tables", [])
800
976
  if text_rich_tables:
801
- insights.append(f"Text-rich content found in {len(text_rich_tables)} tables")
977
+ insights.append(
978
+ f"Text-rich content found in {len(text_rich_tables)} tables"
979
+ )
802
980
  next_steps.append("Use semantic search to find content patterns")
803
-
981
+
804
982
  quality_scores = overview.get("quality_scores", {})
805
983
  if quality_scores:
806
984
  avg_quality = sum(quality_scores.values()) / len(quality_scores)
807
985
  insights.append(f"Average content quality: {avg_quality:.1f}/10")
808
-
986
+
809
987
  elif discovery_goal == "explore_structure":
810
988
  well_structured = overview.get("well_structured_tables", [])
811
989
  if well_structured:
812
990
  insights.append(f"Well-structured tables: {', '.join(well_structured)}")
813
991
  recommendations.append("Use well-structured tables as primary data sources")
814
-
992
+
815
993
  schema_issues = overview.get("schema_issues", [])
816
994
  if schema_issues:
817
995
  insights.extend(schema_issues[:3]) # Show first 3 issues
818
-
996
+
819
997
  elif discovery_goal == "assess_quality":
820
998
  overall_quality = overview.get("overall_quality", 0)
821
999
  insights.append(f"Overall content quality score: {overall_quality}/10")
822
-
1000
+
823
1001
  improvement_opportunities = overview.get("improvement_opportunities", [])
824
1002
  recommendations.extend(improvement_opportunities[:3])
825
-
1003
+
826
1004
  elif discovery_goal == "prepare_search":
827
1005
  semantic_ready = overview.get("semantic_ready_tables", [])
828
1006
  optimization_needed = overview.get("search_optimization_needed", [])
829
-
1007
+
830
1008
  if semantic_ready:
831
1009
  insights.append(f"Semantic search ready for {len(semantic_ready)} tables")
832
1010
  next_steps.append("Use auto_semantic_search() for conceptual queries")
833
-
1011
+
834
1012
  if optimization_needed:
835
- insights.append(f"Search optimization needed for {len(optimization_needed)} tables")
836
- next_steps.append(f"Set up embeddings for: {', '.join(optimization_needed[:2])}")
837
-
1013
+ insights.append(
1014
+ f"Search optimization needed for {len(optimization_needed)} tables"
1015
+ )
1016
+ next_steps.append(
1017
+ f"Set up embeddings for: {', '.join(optimization_needed[:2])}"
1018
+ )
1019
+
838
1020
  # Universal recommendations
839
1021
  if overview.get("semantic_search_available"):
840
1022
  recommendations.append("Use auto_smart_search() for best search results")
841
1023
  else:
842
- recommendations.append("Install sentence-transformers for semantic search capabilities")
843
-
1024
+ recommendations.append(
1025
+ "Install sentence-transformers for semantic search capabilities"
1026
+ )
1027
+
844
1028
  if not next_steps:
845
1029
  next_steps.append("Use explore_tables() for detailed content examination")
846
1030
  next_steps.append("Try auto_smart_search() to find specific information")
847
-
1031
+
848
1032
  return insights, recommendations, next_steps
849
1033
 
850
1034
 
851
- def _generate_quick_actions(discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]) -> List[Dict[str, Any]]:
1035
+ def _generate_quick_actions(
1036
+ discovery_goal: str, overview: Dict[str, Any], focus_area: Optional[str]
1037
+ ) -> List[Dict[str, Any]]:
852
1038
  """Generate quick action suggestions."""
853
1039
  actions = []
854
-
1040
+
855
1041
  high_value_tables = overview.get("high_value_tables", [])
856
-
1042
+
857
1043
  if discovery_goal == "understand_content" and high_value_tables:
858
- actions.append({
859
- "action": "Explore High-Value Content",
860
- "tool": "read_rows",
861
- "params": {"table_name": high_value_tables[0]},
862
- "description": f"Examine content in {high_value_tables[0]} table"
863
- })
864
-
1044
+ actions.append(
1045
+ {
1046
+ "action": "Explore High-Value Content",
1047
+ "tool": "read_rows",
1048
+ "params": {"table_name": high_value_tables[0]},
1049
+ "description": f"Examine content in {high_value_tables[0]} table",
1050
+ }
1051
+ )
1052
+
865
1053
  if overview.get("semantic_search_available"):
866
- actions.append({
867
- "action": "Smart Search",
868
- "tool": "auto_smart_search",
869
- "params": {"query": "important recent information", "limit": 5},
870
- "description": "Find important content using intelligent search"
871
- })
872
-
873
- actions.append({
874
- "action": "Quality Assessment",
875
- "tool": "get_content_health_score",
876
- "params": {},
877
- "description": "Get detailed quality metrics and recommendations"
878
- })
879
-
1054
+ actions.append(
1055
+ {
1056
+ "action": "Smart Search",
1057
+ "tool": "auto_smart_search",
1058
+ "params": {"query": "important recent information", "limit": 5},
1059
+ "description": "Find important content using intelligent search",
1060
+ }
1061
+ )
1062
+
1063
+ actions.append(
1064
+ {
1065
+ "action": "Quality Assessment",
1066
+ "tool": "get_content_health_score",
1067
+ "params": {},
1068
+ "description": "Get detailed quality metrics and recommendations",
1069
+ }
1070
+ )
1071
+
880
1072
  return actions
881
1073
 
882
1074
 
@@ -885,17 +1077,24 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
885
1077
  try:
886
1078
  # Check if discovery_patterns table exists
887
1079
  tables_result = db.list_tables()
888
- if tables_result.get("success") and "discovery_patterns" in tables_result.get("tables", []):
1080
+ if tables_result.get("success") and "discovery_patterns" in tables_result.get(
1081
+ "tables", []
1082
+ ):
889
1083
  # Store the discovery session
890
- db.insert_row("discovery_patterns", {
891
- "agent_id": discovery_session.get("agent_id"),
892
- "goal": discovery_session.get("goal"),
893
- "focus_area": discovery_session.get("focus_area"),
894
- "depth": discovery_session.get("depth"),
895
- "steps_completed": str(discovery_session.get("steps_completed", [])),
896
- "success": True,
897
- "timestamp": discovery_session.get("timestamp")
898
- })
1084
+ db.insert_row(
1085
+ "discovery_patterns",
1086
+ {
1087
+ "agent_id": discovery_session.get("agent_id"),
1088
+ "goal": discovery_session.get("goal"),
1089
+ "focus_area": discovery_session.get("focus_area"),
1090
+ "depth": discovery_session.get("depth"),
1091
+ "steps_completed": str(
1092
+ discovery_session.get("steps_completed", [])
1093
+ ),
1094
+ "success": True,
1095
+ "timestamp": discovery_session.get("timestamp"),
1096
+ },
1097
+ )
899
1098
  except Exception:
900
1099
  # Silently fail if learning storage isn't available
901
1100
  pass
@@ -904,14 +1103,18 @@ def _store_discovery_pattern(db, discovery_session: Dict[str, Any]) -> None:
904
1103
  def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[str, Any]:
905
1104
  """Customize template for specific domain or topic."""
906
1105
  customized = template.copy()
907
-
1106
+
908
1107
  # Add customization note
909
1108
  customized["customized_for"] = customize_for
910
1109
  customized["customization_note"] = f"Template customized for: {customize_for}"
911
-
1110
+
912
1111
  # Modify search queries in workflow to include customization
913
1112
  for step in customized.get("workflow", []):
914
- if step.get("tool") in ["auto_smart_search", "auto_semantic_search", "search_content"]:
1113
+ if step.get("tool") in [
1114
+ "auto_smart_search",
1115
+ "auto_semantic_search",
1116
+ "search_content",
1117
+ ]:
915
1118
  params = step.get("params", {})
916
1119
  if "query" in params and params["query"].startswith("REPLACE_WITH"):
917
1120
  # Keep the placeholder for user customization
@@ -919,258 +1122,307 @@ def _customize_template(template: Dict[str, Any], customize_for: str) -> Dict[st
919
1122
  elif "query" in params:
920
1123
  # Add customization to existing query
921
1124
  params["query"] = f"{customize_for} {params['query']}"
922
-
1125
+
923
1126
  return customized
924
1127
 
925
1128
 
926
1129
  # Relationship discovery helper functions
927
1130
 
1131
+
928
1132
  def _discover_foreign_keys(db, target_table: str, all_tables: List[str]) -> List[str]:
929
1133
  """Discover foreign key relationships."""
930
1134
  relationships = []
931
-
1135
+
932
1136
  try:
933
1137
  # Get target table schema
934
1138
  target_schema = db.describe_table(target_table)
935
1139
  if not target_schema.get("success"):
936
1140
  return relationships
937
-
1141
+
938
1142
  target_columns = target_schema.get("columns", [])
939
1143
  target_col_names = [col.get("name", "") for col in target_columns]
940
-
1144
+
941
1145
  # Check other tables for potential foreign key references
942
1146
  for other_table in all_tables:
943
1147
  if other_table == target_table:
944
1148
  continue
945
-
1149
+
946
1150
  try:
947
1151
  other_schema = db.describe_table(other_table)
948
1152
  if other_schema.get("success"):
949
1153
  other_columns = other_schema.get("columns", [])
950
-
1154
+
951
1155
  for col in other_columns:
952
1156
  col_name = col.get("name", "")
953
1157
  # Look for naming patterns that suggest foreign keys
954
1158
  if col_name.endswith("_id") or col_name.endswith("Id"):
955
- potential_ref = col_name.replace("_id", "").replace("Id", "")
956
- if potential_ref == target_table or f"{potential_ref}s" == target_table:
1159
+ potential_ref = col_name.replace("_id", "").replace(
1160
+ "Id", ""
1161
+ )
1162
+ if (
1163
+ potential_ref == target_table
1164
+ or f"{potential_ref}s" == target_table
1165
+ ):
957
1166
  relationships.append(f"{other_table}.{col_name}")
958
-
1167
+
959
1168
  # Look for exact column name matches (potential shared keys)
960
1169
  if col_name in target_col_names and col_name != "id":
961
- relationships.append(f"{other_table}.{col_name} (shared key)")
962
-
1170
+ relationships.append(
1171
+ f"{other_table}.{col_name} (shared key)"
1172
+ )
1173
+
963
1174
  except Exception:
964
1175
  continue
965
-
1176
+
966
1177
  except Exception:
967
1178
  pass
968
-
1179
+
969
1180
  return relationships
970
1181
 
971
1182
 
972
- def _discover_semantic_relationships(db, target_table: str, all_tables: List[str], threshold: float) -> List[Dict[str, Any]]:
1183
+ def _discover_semantic_relationships(
1184
+ db, target_table: str, all_tables: List[str], threshold: float
1185
+ ) -> List[Dict[str, Any]]:
973
1186
  """Discover semantic similarity relationships."""
974
1187
  relationships = []
975
-
1188
+
976
1189
  if not is_semantic_search_available():
977
1190
  return relationships
978
-
1191
+
979
1192
  try:
980
1193
  # Get sample content from target table
981
1194
  target_rows = db.read_rows(target_table)
982
1195
  if not target_rows.get("success") or not target_rows.get("rows"):
983
1196
  return relationships
984
-
1197
+
985
1198
  # Create a sample query from target table content
986
1199
  sample_row = target_rows["rows"][0]
987
- sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[:200]
988
-
1200
+ sample_text = " ".join(str(v) for v in sample_row.values() if v is not None)[
1201
+ :200
1202
+ ]
1203
+
989
1204
  if len(sample_text.strip()) < 10:
990
1205
  return relationships
991
-
1206
+
992
1207
  # Search for similar content in other tables
993
1208
  for other_table in all_tables:
994
1209
  if other_table == target_table:
995
1210
  continue
996
-
1211
+
997
1212
  try:
998
1213
  # Try semantic search in the other table
999
1214
  search_result = db.semantic_search(
1000
- sample_text, [other_table], "embedding", None, threshold, 3, "all-MiniLM-L6-v2"
1215
+ sample_text,
1216
+ [other_table],
1217
+ "embedding",
1218
+ None,
1219
+ threshold,
1220
+ 3,
1221
+ "all-MiniLM-L6-v2",
1001
1222
  )
1002
-
1223
+
1003
1224
  if search_result.get("success") and search_result.get("results"):
1004
1225
  results = search_result["results"]
1005
- avg_similarity = sum(r.get("similarity_score", 0) for r in results) / len(results)
1006
-
1226
+ avg_similarity = sum(
1227
+ r.get("similarity_score", 0) for r in results
1228
+ ) / len(results)
1229
+
1007
1230
  if avg_similarity >= threshold:
1008
- relationships.append({
1009
- "table": other_table,
1010
- "similarity": round(avg_similarity, 2),
1011
- "related_content_count": len(results)
1012
- })
1013
-
1231
+ relationships.append(
1232
+ {
1233
+ "table": other_table,
1234
+ "similarity": round(avg_similarity, 2),
1235
+ "related_content_count": len(results),
1236
+ }
1237
+ )
1238
+
1014
1239
  except Exception:
1015
1240
  continue
1016
-
1241
+
1017
1242
  except Exception:
1018
1243
  pass
1019
-
1244
+
1020
1245
  return relationships
1021
1246
 
1022
1247
 
1023
- def _discover_temporal_relationships(db, target_table: str, all_tables: List[str]) -> List[str]:
1248
+ def _discover_temporal_relationships(
1249
+ db, target_table: str, all_tables: List[str]
1250
+ ) -> List[str]:
1024
1251
  """Discover temporal pattern relationships."""
1025
1252
  relationships = []
1026
-
1253
+
1027
1254
  try:
1028
1255
  # Check if target table has timestamp columns
1029
1256
  target_schema = db.describe_table(target_table)
1030
1257
  if not target_schema.get("success"):
1031
1258
  return relationships
1032
-
1259
+
1033
1260
  target_columns = target_schema.get("columns", [])
1034
- target_has_timestamp = any("timestamp" in col.get("name", "").lower() or
1035
- "date" in col.get("name", "").lower() or
1036
- "time" in col.get("name", "").lower()
1037
- for col in target_columns)
1038
-
1261
+ target_has_timestamp = any(
1262
+ "timestamp" in col.get("name", "").lower()
1263
+ or "date" in col.get("name", "").lower()
1264
+ or "time" in col.get("name", "").lower()
1265
+ for col in target_columns
1266
+ )
1267
+
1039
1268
  if not target_has_timestamp:
1040
1269
  return relationships
1041
-
1270
+
1042
1271
  # Check other tables for similar timestamp patterns
1043
1272
  for other_table in all_tables:
1044
1273
  if other_table == target_table:
1045
1274
  continue
1046
-
1275
+
1047
1276
  try:
1048
1277
  other_schema = db.describe_table(other_table)
1049
1278
  if other_schema.get("success"):
1050
1279
  other_columns = other_schema.get("columns", [])
1051
- other_has_timestamp = any("timestamp" in col.get("name", "").lower() or
1052
- "date" in col.get("name", "").lower() or
1053
- "time" in col.get("name", "").lower()
1054
- for col in other_columns)
1055
-
1280
+ other_has_timestamp = any(
1281
+ "timestamp" in col.get("name", "").lower()
1282
+ or "date" in col.get("name", "").lower()
1283
+ or "time" in col.get("name", "").lower()
1284
+ for col in other_columns
1285
+ )
1286
+
1056
1287
  if other_has_timestamp:
1057
1288
  relationships.append(other_table)
1058
-
1289
+
1059
1290
  except Exception:
1060
1291
  continue
1061
-
1292
+
1062
1293
  except Exception:
1063
1294
  pass
1064
-
1295
+
1065
1296
  return relationships
1066
1297
 
1067
1298
 
1068
- def _discover_naming_relationships(target_table: str, all_tables: List[str]) -> List[str]:
1299
+ def _discover_naming_relationships(
1300
+ target_table: str, all_tables: List[str]
1301
+ ) -> List[str]:
1069
1302
  """Discover relationships based on naming conventions."""
1070
1303
  relationships = []
1071
-
1304
+
1072
1305
  # Look for tables with similar names or naming patterns
1073
1306
  target_lower = target_table.lower()
1074
-
1307
+
1075
1308
  for other_table in all_tables:
1076
1309
  if other_table == target_table:
1077
1310
  continue
1078
-
1311
+
1079
1312
  other_lower = other_table.lower()
1080
-
1313
+
1081
1314
  # Check for plural/singular relationships
1082
- if (target_lower.endswith('s') and other_lower == target_lower[:-1]) or \
1083
- (other_lower.endswith('s') and target_lower == other_lower[:-1]):
1315
+ if (target_lower.endswith("s") and other_lower == target_lower[:-1]) or (
1316
+ other_lower.endswith("s") and target_lower == other_lower[:-1]
1317
+ ):
1084
1318
  relationships.append(other_table)
1085
1319
  continue
1086
-
1320
+
1087
1321
  # Check for common prefixes or suffixes
1088
1322
  if len(target_lower) > 3 and len(other_lower) > 3:
1089
1323
  # Common prefix (at least 4 characters)
1090
1324
  if target_lower[:4] == other_lower[:4]:
1091
1325
  relationships.append(other_table)
1092
1326
  continue
1093
-
1327
+
1094
1328
  # Common suffix (at least 4 characters)
1095
1329
  if target_lower[-4:] == other_lower[-4:]:
1096
1330
  relationships.append(other_table)
1097
1331
  continue
1098
-
1332
+
1099
1333
  # Check for semantic name relationships
1100
- name_words = set(target_lower.split('_'))
1101
- other_words = set(other_lower.split('_'))
1102
-
1334
+ name_words = set(target_lower.split("_"))
1335
+ other_words = set(other_lower.split("_"))
1336
+
1103
1337
  # If tables share significant word overlap
1104
1338
  if len(name_words.intersection(other_words)) > 0:
1105
1339
  relationships.append(other_table)
1106
-
1340
+
1107
1341
  return relationships
1108
1342
 
1109
1343
 
1110
- def _identify_strongest_connections(relationships: Dict[str, Any]) -> List[Dict[str, Any]]:
1344
+ def _identify_strongest_connections(
1345
+ relationships: Dict[str, Any],
1346
+ ) -> List[Dict[str, Any]]:
1111
1347
  """Identify the strongest connections across all relationships."""
1112
1348
  connections = []
1113
-
1349
+
1114
1350
  for table, rels in relationships.items():
1115
1351
  # Count total connections for this table
1116
- total_connections = (len(rels.get("foreign_key_refs", [])) +
1117
- len(rels.get("semantic_similar", [])) +
1118
- len(rels.get("temporal_related", [])) +
1119
- len(rels.get("naming_related", [])))
1120
-
1352
+ total_connections = (
1353
+ len(rels.get("foreign_key_refs", []))
1354
+ + len(rels.get("semantic_similar", []))
1355
+ + len(rels.get("temporal_related", []))
1356
+ + len(rels.get("naming_related", []))
1357
+ )
1358
+
1121
1359
  if total_connections > 0:
1122
- connections.append({
1123
- "table": table,
1124
- "total_connections": total_connections,
1125
- "connection_types": {
1126
- "structural": len(rels.get("foreign_key_refs", [])),
1127
- "semantic": len(rels.get("semantic_similar", [])),
1128
- "temporal": len(rels.get("temporal_related", [])),
1129
- "naming": len(rels.get("naming_related", []))
1360
+ connections.append(
1361
+ {
1362
+ "table": table,
1363
+ "total_connections": total_connections,
1364
+ "connection_types": {
1365
+ "structural": len(rels.get("foreign_key_refs", [])),
1366
+ "semantic": len(rels.get("semantic_similar", [])),
1367
+ "temporal": len(rels.get("temporal_related", [])),
1368
+ "naming": len(rels.get("naming_related", [])),
1369
+ },
1130
1370
  }
1131
- })
1132
-
1371
+ )
1372
+
1133
1373
  # Sort by total connections and return top 5
1134
1374
  connections.sort(key=lambda x: x["total_connections"], reverse=True)
1135
1375
  return connections[:5]
1136
1376
 
1137
1377
 
1138
- def _generate_relationship_recommendations(relationships: Dict[str, Any], insights: List[str]) -> List[str]:
1378
+ def _generate_relationship_recommendations(
1379
+ relationships: Dict[str, Any], insights: List[str]
1380
+ ) -> List[str]:
1139
1381
  """Generate actionable recommendations based on discovered relationships."""
1140
1382
  recommendations = []
1141
-
1383
+
1142
1384
  # Find tables with many connections
1143
1385
  highly_connected = []
1144
1386
  for table, rels in relationships.items():
1145
- total_connections = (len(rels.get("foreign_key_refs", [])) +
1146
- len(rels.get("semantic_similar", [])) +
1147
- len(rels.get("temporal_related", [])) +
1148
- len(rels.get("naming_related", [])))
1387
+ total_connections = (
1388
+ len(rels.get("foreign_key_refs", []))
1389
+ + len(rels.get("semantic_similar", []))
1390
+ + len(rels.get("temporal_related", []))
1391
+ + len(rels.get("naming_related", []))
1392
+ )
1149
1393
  if total_connections >= 3:
1150
1394
  highly_connected.append(table)
1151
-
1395
+
1152
1396
  if highly_connected:
1153
- recommendations.append(f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}")
1154
-
1397
+ recommendations.append(
1398
+ f"Focus queries on highly connected tables: {', '.join(highly_connected[:3])}"
1399
+ )
1400
+
1155
1401
  # Find tables with semantic relationships
1156
1402
  semantic_tables = []
1157
1403
  for table, rels in relationships.items():
1158
1404
  if rels.get("semantic_similar"):
1159
1405
  semantic_tables.append(table)
1160
-
1406
+
1161
1407
  if semantic_tables:
1162
- recommendations.append(f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}")
1163
-
1408
+ recommendations.append(
1409
+ f"Use semantic search across related tables: {', '.join(semantic_tables[:3])}"
1410
+ )
1411
+
1164
1412
  # Find tables with temporal relationships
1165
1413
  temporal_tables = []
1166
1414
  for table, rels in relationships.items():
1167
1415
  if rels.get("temporal_related"):
1168
1416
  temporal_tables.append(table)
1169
-
1417
+
1170
1418
  if temporal_tables:
1171
- recommendations.append(f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}")
1172
-
1419
+ recommendations.append(
1420
+ f"Consider temporal analysis for time-related tables: {', '.join(temporal_tables[:3])}"
1421
+ )
1422
+
1173
1423
  if not recommendations:
1174
- recommendations.append("Consider adding more structured relationships or content to improve discoverability")
1175
-
1424
+ recommendations.append(
1425
+ "Consider adding more structured relationships or content to improve discoverability"
1426
+ )
1427
+
1176
1428
  return recommendations