mcp-sqlite-memory-bank 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,15 +25,25 @@ from .server import (
25
25
  update_rows,
26
26
  delete_rows,
27
27
  run_select_query,
28
- # Search tools
29
- search_content,
30
- explore_tables,
31
28
  # FastMCP app
32
29
  app,
33
30
  # Constants
34
31
  DB_PATH,
35
32
  )
36
33
 
34
+ # Import search tools from the tools module
35
+ from .tools import (
36
+ # Search tools
37
+ search_content,
38
+ explore_tables,
39
+ add_embeddings,
40
+ semantic_search,
41
+ find_related,
42
+ smart_search,
43
+ embedding_stats,
44
+ auto_semantic_search,
45
+ auto_smart_search,
46
+ )
37
47
 
38
48
  from .types import (
39
49
  # Response types
@@ -81,6 +91,13 @@ __all__ = [
81
91
  # Search tools
82
92
  "search_content",
83
93
  "explore_tables",
94
+ "add_embeddings",
95
+ "semantic_search",
96
+ "find_related",
97
+ "smart_search",
98
+ "embedding_stats",
99
+ "auto_semantic_search",
100
+ "auto_smart_search",
84
101
  # FastMCP app
85
102
  "app",
86
103
  # Constants
@@ -403,27 +403,65 @@ class SQLiteMemoryDatabase:
403
403
  for row in conn.execute(stmt).fetchall():
404
404
  row_dict = dict(row._mapping)
405
405
 
406
- # Calculate relevance and matched content
407
- relevance = 0.0
406
+ # Enhanced relevance calculation with multiple scoring factors
407
+ relevance_scores = []
408
408
  matched_content = []
409
409
  query_lower = query.lower()
410
+ query_terms = query_lower.split()
410
411
 
411
412
  for col in text_columns:
412
413
  if col.name in row_dict and row_dict[col.name]:
413
414
  content = str(row_dict[col.name]).lower()
415
+ content_length = len(content)
416
+
414
417
  if query_lower in content:
415
- frequency = content.count(query_lower)
416
- relevance += frequency / len(content)
417
- matched_content.append(f"{col.name}: {row_dict[col.name]}")
418
-
419
- if relevance > 0:
418
+ # Factor 1: Exact phrase frequency (weighted higher)
419
+ exact_frequency = content.count(query_lower)
420
+ exact_score = (exact_frequency * 2.0) / content_length if content_length > 0 else 0
421
+
422
+ # Factor 2: Individual term frequency
423
+ term_score = 0.0
424
+ for term in query_terms:
425
+ if term in content:
426
+ term_score += content.count(term) / content_length if content_length > 0 else 0
427
+
428
+ # Factor 3: Position bonus (early matches score higher)
429
+ position_bonus = 0.0
430
+ first_occurrence = content.find(query_lower)
431
+ if first_occurrence != -1:
432
+ position_bonus = (content_length - first_occurrence) / content_length * 0.1
433
+
434
+ # Factor 4: Column importance (title/name columns get bonus)
435
+ column_bonus = 0.0
436
+ if any(keyword in col.name.lower() for keyword in ['title', 'name', 'summary', 'description']):
437
+ column_bonus = 0.2
438
+
439
+ # Combined relevance score
440
+ col_relevance = exact_score + term_score + position_bonus + column_bonus
441
+ relevance_scores.append(col_relevance)
442
+
443
+ # Enhanced matched content with context
444
+ snippet_start = max(0, first_occurrence - 50)
445
+ snippet_end = min(len(row_dict[col.name]), first_occurrence + len(query) + 50)
446
+ snippet = str(row_dict[col.name])[snippet_start:snippet_end]
447
+ if snippet_start > 0:
448
+ snippet = "..." + snippet
449
+ if snippet_end < len(str(row_dict[col.name])):
450
+ snippet = snippet + "..."
451
+
452
+ matched_content.append(f"{col.name}: {snippet}")
453
+
454
+ total_relevance = sum(relevance_scores)
455
+ if total_relevance > 0:
420
456
  results.append(
421
457
  {
422
458
  "table": table_name,
423
459
  "row_id": row_dict.get("id"),
424
460
  "row_data": row_dict,
425
461
  "matched_content": matched_content,
426
- "relevance": round(relevance, 3),
462
+ "relevance": round(total_relevance, 4),
463
+ "match_quality": "high" if total_relevance > 0.5 else "medium" if total_relevance > 0.1 else "low",
464
+ "match_count": len(relevance_scores)
427
465
  }
428
466
  )
429
467
 
@@ -910,10 +948,20 @@ class SQLiteMemoryDatabase:
910
948
  )
911
949
 
912
950
  # Enhance with text matching scores
913
- semantic_engine = get_semantic_engine(model_name)
914
- enhanced_results = semantic_engine.hybrid_search(
915
- query, semantic_results, text_columns or [], embedding_column, semantic_weight, text_weight, limit
916
- )
951
+ try:
952
+ semantic_engine = get_semantic_engine(model_name)
953
+
954
+ # Verify the engine has the required method
955
+ if not hasattr(semantic_engine, 'hybrid_search') or not callable(getattr(semantic_engine, 'hybrid_search')):
956
+ raise DatabaseError("Semantic engine hybrid_search method is not callable")
957
+
958
+ enhanced_results = semantic_engine.hybrid_search(
959
+ query, semantic_results, text_columns or [], embedding_column, semantic_weight, text_weight, limit
960
+ )
961
+ except Exception as e:
962
+ # If semantic enhancement fails, return semantic results without text enhancement
963
+ logging.warning(f"Semantic enhancement failed: {e}")
964
+ enhanced_results = semantic_results[:limit]
917
965
 
918
966
  return {
919
967
  "success": True,
@@ -14,6 +14,7 @@ Author: Robert Meisner
14
14
  from typing import Dict, Any, cast
15
15
  from fastmcp import FastMCP
16
16
  from .database import get_database
17
+ from .semantic import is_semantic_search_available
17
18
  import json
18
19
 
19
20
 
@@ -169,6 +170,261 @@ class MemoryBankResources:
169
170
  }
170
171
 
171
172
  return json.dumps(resource_content, indent=2)
173
+
174
+ @self.mcp.resource("memory://live/recent-activity")
175
+ async def get_recent_activity() -> str:
176
+ """Real-time feed of recent memory bank changes and activity."""
177
+ db = get_database(self.db_path)
178
+
179
+ # Get tables with timestamp columns for activity tracking
180
+ tables_result = cast(Dict[str, Any], db.list_tables())
181
+ if not tables_result.get("success"):
182
+ return json.dumps({"error": "Failed to get tables", "details": tables_result})
183
+
184
+ recent_activity = []
185
+ tables = tables_result.get("tables", [])
186
+
187
+ for table_name in tables:
188
+ try:
189
+ # Check if table has timestamp column
190
+ schema_result = cast(Dict[str, Any], db.describe_table(table_name))
191
+ if not schema_result.get("success"):
192
+ continue
193
+
194
+ columns = schema_result.get("columns", [])
195
+ timestamp_cols = [col for col in columns if "timestamp" in col.get("name", "").lower()]
196
+
197
+ if timestamp_cols:
198
+ # Get recent entries (last 10)
199
+ recent_result = cast(Dict[str, Any], db.read_rows(table_name, None, 10))
200
+ if recent_result.get("success"):
201
+ rows = recent_result.get("rows", [])
202
+ for row in rows:
203
+ activity_entry = {
204
+ "table": table_name,
205
+ "action": "content_added",
206
+ "timestamp": row.get(timestamp_cols[0]["name"]),
207
+ "content_preview": str(row).replace('"', "'")[:100] + "..." if len(str(row)) > 100 else str(row),
208
+ "row_id": row.get("id")
209
+ }
210
+ recent_activity.append(activity_entry)
211
+
212
+ except Exception as e:
213
+ continue
214
+
215
+ # Sort by timestamp (most recent first)
216
+ recent_activity.sort(key=lambda x: x.get("timestamp", ""), reverse=True)
217
+ recent_activity = recent_activity[:20] # Limit to 20 most recent
218
+
219
+ resource_content = {
220
+ "resource_type": "recent_activity",
221
+ "description": "Recent changes and additions to the memory bank",
222
+ "activities": recent_activity,
223
+ "activity_count": len(recent_activity),
224
+ "last_updated": "real-time",
225
+ "refresh_rate": "dynamic"
226
+ }
227
+
228
+ return json.dumps(resource_content, indent=2)
229
+
230
+ @self.mcp.resource("memory://live/content-suggestions")
231
+ async def get_content_suggestions() -> str:
232
+ """AI-powered suggestions for content improvements and organization."""
233
+ db = get_database(self.db_path)
234
+
235
+ suggestions = {
236
+ "organization_suggestions": [],
237
+ "content_gaps": [],
238
+ "semantic_opportunities": [],
239
+ "quality_improvements": []
240
+ }
241
+
242
+ try:
243
+ # Get basic analysis
244
+ tables_result = cast(Dict[str, Any], db.list_tables())
245
+ if not tables_result.get("success"):
246
+ return json.dumps({"error": "Failed to analyze content", "details": tables_result})
247
+
248
+ tables = tables_result.get("tables", [])
249
+
250
+ for table_name in tables:
251
+ try:
252
+ # Analyze table content
253
+ rows_result = cast(Dict[str, Any], db.read_rows(table_name))
254
+ if not rows_result.get("success"):
255
+ continue
256
+
257
+ rows = rows_result.get("rows", [])
258
+
259
+ # Check for organization opportunities
260
+ if len(rows) > 50:
261
+ suggestions["organization_suggestions"].append({
262
+ "table": table_name,
263
+ "suggestion": "Consider adding categories or tags for better organization",
264
+ "reason": f"Large table with {len(rows)} rows could benefit from categorization"
265
+ })
266
+
267
+ # Check for semantic search opportunities
268
+ if is_semantic_search_available():
269
+ embedding_stats = cast(Dict[str, Any], db.get_embedding_stats(table_name))
270
+ if embedding_stats.get("success") and embedding_stats.get("coverage_percent", 0) == 0:
271
+ schema_result = cast(Dict[str, Any], db.describe_table(table_name))
272
+ if schema_result.get("success"):
273
+ text_cols = [col for col in schema_result.get("columns", [])
274
+ if "TEXT" in col.get("type", "").upper()]
275
+ if text_cols and len(rows) > 5:
276
+ suggestions["semantic_opportunities"].append({
277
+ "table": table_name,
278
+ "suggestion": "Set up semantic search for better content discovery",
279
+ "reason": f"Table has {len(text_cols)} text columns and {len(rows)} rows",
280
+ "action": f"Use add_embeddings('{table_name}', {[col['name'] for col in text_cols[:3]]})"
281
+ })
282
+
283
+ # Check for content gaps (sparse tables)
284
+ if 1 <= len(rows) <= 5:
285
+ suggestions["content_gaps"].append({
286
+ "table": table_name,
287
+ "suggestion": "Consider adding more content or consolidating with other tables",
288
+ "reason": f"Table has only {len(rows)} rows - might be underutilized"
289
+ })
290
+
291
+ # Sample content for quality analysis
292
+ if rows:
293
+ sample_row = rows[0]
294
+ short_values = [k for k, v in sample_row.items()
295
+ if isinstance(v, str) and 0 < len(v) < 10]
296
+ if len(short_values) > 2:
297
+ suggestions["quality_improvements"].append({
298
+ "table": table_name,
299
+ "suggestion": "Consider adding more detailed content",
300
+ "reason": f"Several columns have very short values: {short_values[:3]}"
301
+ })
302
+
303
+ except Exception as e:
304
+ continue
305
+
306
+ # Prioritize suggestions
307
+ priority_order = ["semantic_opportunities", "organization_suggestions", "quality_improvements", "content_gaps"]
308
+ prioritized = {}
309
+ for category in priority_order:
310
+ if suggestions[category]:
311
+ prioritized[category] = suggestions[category]
312
+
313
+ resource_content = {
314
+ "resource_type": "content_suggestions",
315
+ "description": "AI-powered suggestions for improving your memory bank",
316
+ "suggestions": prioritized,
317
+ "total_suggestions": sum(len(v) for v in suggestions.values()),
318
+ "last_updated": "real-time",
319
+ "next_actions": [
320
+ "Review semantic opportunities for high-value tables",
321
+ "Consider organization improvements for large tables",
322
+ "Add more detailed content where suggested"
323
+ ]
324
+ }
325
+
326
+ return json.dumps(resource_content, indent=2)
327
+
328
+ except Exception as e:
329
+ return json.dumps({
330
+ "error": f"Failed to generate content suggestions: {str(e)}",
331
+ "suggestions": suggestions
332
+ })
333
+
334
+ @self.mcp.resource("memory://analytics/insights")
335
+ async def get_memory_insights() -> str:
336
+ """Real-time analytics and insights about memory bank usage and patterns."""
337
+ db = get_database(self.db_path)
338
+
339
+ insights = {
340
+ "usage_patterns": {},
341
+ "content_trends": {},
342
+ "search_recommendations": {},
343
+ "health_indicators": {}
344
+ }
345
+
346
+ try:
347
+ tables_result = cast(Dict[str, Any], db.list_tables())
348
+ if not tables_result.get("success"):
349
+ return json.dumps({"error": "Failed to get insights", "details": tables_result})
350
+
351
+ tables = tables_result.get("tables", [])
352
+ total_rows = 0
353
+ content_quality_scores = []
354
+
355
+ for table_name in tables:
356
+ rows_result = cast(Dict[str, Any], db.read_rows(table_name))
357
+ if rows_result.get("success"):
358
+ rows = rows_result.get("rows", [])
359
+ row_count = len(rows)
360
+ total_rows += row_count
361
+
362
+ # Calculate content quality score for this table
363
+ if rows:
364
+ # Sample content to estimate quality
365
+ sample_size = min(5, len(rows))
366
+ total_content_length = 0
367
+ for row in rows[:sample_size]:
368
+ for value in row.values():
369
+ if isinstance(value, str):
370
+ total_content_length += len(value)
371
+
372
+ avg_content_length = total_content_length / sample_size if sample_size > 0 else 0
373
+ quality_score = min(10, avg_content_length / 50) # Normalize to 0-10
374
+ content_quality_scores.append(quality_score)
375
+
376
+ insights["usage_patterns"][table_name] = {
377
+ "row_count": row_count,
378
+ "avg_content_length": round(avg_content_length),
379
+ "quality_score": round(quality_score, 1),
380
+ "category": "high_value" if quality_score > 7 else "medium_value" if quality_score > 3 else "low_value"
381
+ }
382
+
383
+ # Overall health indicators
384
+ avg_quality = sum(content_quality_scores) / len(content_quality_scores) if content_quality_scores else 0
385
+ insights["health_indicators"] = {
386
+ "total_tables": len(tables),
387
+ "total_content_rows": total_rows,
388
+ "average_content_quality": round(avg_quality, 1),
389
+ "content_distribution": "balanced" if len(tables) > 0 and total_rows / len(tables) > 10 else "sparse",
390
+ "semantic_readiness": "available" if is_semantic_search_available() else "unavailable"
391
+ }
392
+
393
+ # Search recommendations
394
+ high_value_tables = [name for name, data in insights["usage_patterns"].items()
395
+ if data.get("category") == "high_value"]
396
+
397
+ if high_value_tables:
398
+ insights["search_recommendations"]["intelligent_search"] = {
399
+ "recommended_tables": high_value_tables,
400
+ "strategy": "Use intelligent_search() for best results across high-value content"
401
+ }
402
+
403
+ if is_semantic_search_available():
404
+ insights["search_recommendations"]["semantic_opportunities"] = {
405
+ "suggestion": "Consider semantic search for conceptual queries",
406
+ "best_for": "Finding related concepts, patterns, and thematic content"
407
+ }
408
+
409
+ resource_content = {
410
+ "resource_type": "memory_insights",
411
+ "description": "Real-time analytics and insights about your memory bank",
412
+ "insights": insights,
413
+ "last_updated": "real-time",
414
+ "recommendations": [
415
+ f"Focus on high-value tables: {', '.join(high_value_tables[:3])}" if high_value_tables else "Add more detailed content to improve value",
416
+ "Use intelligent_search() for optimal search results",
417
+ "Consider semantic search setup for better content discovery" if is_semantic_search_available() else "Install sentence-transformers for semantic search"
418
+ ]
419
+ }
420
+
421
+ return json.dumps(resource_content, indent=2)
422
+
423
+ except Exception as e:
424
+ return json.dumps({
425
+ "error": f"Failed to generate insights: {str(e)}",
426
+ "insights": insights
427
+ })
172
428
 
173
429
 
174
430
  def setup_mcp_resources(mcp_app: FastMCP, db_path: str) -> MemoryBankResources:
@@ -387,10 +387,20 @@ def get_semantic_engine(model_name: str = "all-MiniLM-L6-v2") -> SemanticSearchE
387
387
  """Get or create the global semantic search engine."""
388
388
  global _semantic_engine
389
389
 
390
- if _semantic_engine is None or _semantic_engine.model_name != model_name:
391
- _semantic_engine = SemanticSearchEngine(model_name)
392
-
393
- return _semantic_engine
390
+ try:
391
+ if _semantic_engine is None or _semantic_engine.model_name != model_name:
392
+ if not SENTENCE_TRANSFORMERS_AVAILABLE:
393
+ raise ValueError("Sentence transformers not available for semantic search")
394
+ _semantic_engine = SemanticSearchEngine(model_name)
395
+
396
+ # Verify the engine is properly initialized
397
+ if not hasattr(_semantic_engine, 'hybrid_search'):
398
+ raise ValueError("Semantic engine missing hybrid_search method")
399
+
400
+ return _semantic_engine
401
+
402
+ except Exception as e:
403
+ raise DatabaseError(f"Failed to initialize semantic engine: {e}")
394
404
 
395
405
 
396
406
  def is_semantic_search_available() -> bool: