mcp-sqlite-memory-bank 1.4.1__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_sqlite_memory_bank-1.4.1/src/mcp_sqlite_memory_bank.egg-info → mcp_sqlite_memory_bank-1.4.3}/PKG-INFO +1 -1
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/pyproject.toml +1 -1
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/database.py +32 -8
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/prompts.py +17 -13
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/resources.py +19 -7
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/semantic.py +4 -4
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/server.py +259 -5
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/types.py +3 -1
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3/src/mcp_sqlite_memory_bank.egg-info}/PKG-INFO +1 -1
- mcp_sqlite_memory_bank-1.4.3/tests/test_api.py +984 -0
- mcp_sqlite_memory_bank-1.4.1/tests/test_api.py +0 -434
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/LICENSE +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/MANIFEST.in +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/README.md +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/setup.cfg +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/__init__.py +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/py.typed +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/utils.py +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank.egg-info/SOURCES.txt +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank.egg-info/dependency_links.txt +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank.egg-info/entry_points.txt +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank.egg-info/requires.txt +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank.egg-info/top_level.txt +0 -0
- {mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/tests/test_server.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mcp_sqlite_memory_bank
|
3
|
-
Version: 1.4.
|
3
|
+
Version: 1.4.3
|
4
4
|
Summary: A dynamic, agent/LLM-friendly SQLite memory bank for MCP servers with semantic search capabilities.
|
5
5
|
Author-email: Robert Meisner <robert@catchit.pl>
|
6
6
|
License-Expression: MIT
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "mcp_sqlite_memory_bank"
|
7
|
-
version = "1.4.
|
7
|
+
version = "1.4.3"
|
8
8
|
description = "A dynamic, agent/LLM-friendly SQLite memory bank for MCP servers with semantic search capabilities."
|
9
9
|
authors = [
|
10
10
|
{ name="Robert Meisner", email="robert@catchit.pl" }
|
{mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/database.py
RENAMED
@@ -27,7 +27,6 @@ from .types import (
|
|
27
27
|
SemanticSearchResponse,
|
28
28
|
RelatedContentResponse,
|
29
29
|
HybridSearchResponse,
|
30
|
-
EmbeddingStatsResponse,
|
31
30
|
)
|
32
31
|
from .semantic import get_semantic_engine, is_semantic_search_available
|
33
32
|
|
@@ -429,7 +428,12 @@ class SQLiteMemoryDatabase:
|
|
429
428
|
)
|
430
429
|
|
431
430
|
# Sort by relevance and limit results
|
432
|
-
|
431
|
+
def get_relevance(x: Dict[str, Any]) -> float:
|
432
|
+
rel = x.get("relevance", 0)
|
433
|
+
if isinstance(rel, (int, float)):
|
434
|
+
return float(rel)
|
435
|
+
return 0.0
|
436
|
+
results.sort(key=get_relevance, reverse=True)
|
433
437
|
results = results[:limit]
|
434
438
|
|
435
439
|
return {
|
@@ -453,7 +457,7 @@ class SQLiteMemoryDatabase:
|
|
453
457
|
if pattern:
|
454
458
|
table_names = [name for name in table_names if pattern.replace("%", "") in name]
|
455
459
|
|
456
|
-
exploration = {"tables": [], "total_tables": len(table_names), "total_rows": 0}
|
460
|
+
exploration: Dict[str, Any] = {"tables": [], "total_tables": len(table_names), "total_rows": 0}
|
457
461
|
|
458
462
|
with self.get_connection() as conn:
|
459
463
|
for table_name in table_names:
|
@@ -461,7 +465,7 @@ class SQLiteMemoryDatabase:
|
|
461
465
|
|
462
466
|
# Build column info and identify text columns
|
463
467
|
columns = []
|
464
|
-
text_columns = []
|
468
|
+
text_columns: List[str] = []
|
465
469
|
|
466
470
|
for col in table.columns:
|
467
471
|
col_data = {
|
@@ -476,7 +480,7 @@ class SQLiteMemoryDatabase:
|
|
476
480
|
if "TEXT" in str(col.type).upper() or "VARCHAR" in str(col.type).upper():
|
477
481
|
text_columns.append(col.name)
|
478
482
|
|
479
|
-
table_info = {"name": table_name, "columns": columns, "text_columns": text_columns}
|
483
|
+
table_info: Dict[str, Any] = {"name": table_name, "columns": columns, "text_columns": text_columns}
|
480
484
|
|
481
485
|
# Add row count if requested
|
482
486
|
if include_row_counts:
|
@@ -493,11 +497,11 @@ class SQLiteMemoryDatabase:
|
|
493
497
|
|
494
498
|
# Add content preview for text columns
|
495
499
|
if text_columns:
|
496
|
-
content_preview = {}
|
500
|
+
content_preview: Dict[str, List[Any]] = {}
|
497
501
|
for col_name in text_columns[:3]: # Limit to first 3 text columns
|
498
502
|
col = table.c[col_name]
|
499
503
|
preview_result = conn.execute(select(col).distinct().where(col.isnot(None)).limit(5))
|
500
|
-
unique_values = [row[0] for row in preview_result.fetchall() if row[0]]
|
504
|
+
unique_values: List[Any] = [row[0] for row in preview_result.fetchall() if row[0]]
|
501
505
|
if unique_values:
|
502
506
|
content_preview[col_name] = unique_values
|
503
507
|
|
@@ -927,11 +931,28 @@ class SQLiteMemoryDatabase:
|
|
927
931
|
raise e
|
928
932
|
raise DatabaseError(f"Hybrid search failed: {str(e)}")
|
929
933
|
|
930
|
-
def get_embedding_stats(self, table_name: str, embedding_column: str = "embedding") ->
|
934
|
+
def get_embedding_stats(self, table_name: str, embedding_column: str = "embedding") -> ToolResponse:
|
931
935
|
"""Get statistics about embeddings in a table."""
|
932
936
|
try:
|
933
937
|
table = self._ensure_table_exists(table_name)
|
934
938
|
|
939
|
+
# Check if embedding column exists
|
940
|
+
if embedding_column not in [col.name for col in table.columns]:
|
941
|
+
# Return 0% coverage when column doesn't exist (for compatibility with tests)
|
942
|
+
total_count = 0
|
943
|
+
with self.get_connection() as conn:
|
944
|
+
total_count = conn.execute(select(text("COUNT(*)")).select_from(table)).scalar() or 0
|
945
|
+
|
946
|
+
return {
|
947
|
+
"success": True,
|
948
|
+
"table_name": table_name,
|
949
|
+
"total_rows": total_count,
|
950
|
+
"embedded_rows": 0,
|
951
|
+
"coverage_percent": 0.0,
|
952
|
+
"embedding_dimensions": None,
|
953
|
+
"embedding_column": embedding_column,
|
954
|
+
}
|
955
|
+
|
935
956
|
with self.get_connection() as conn:
|
936
957
|
# Count total rows
|
937
958
|
total_count = conn.execute(select(text("COUNT(*)")).select_from(table)).scalar() or 0
|
@@ -1001,6 +1022,9 @@ def get_database(db_path: Optional[str] = None) -> SQLiteMemoryDatabase:
|
|
1001
1022
|
global _db_instance
|
1002
1023
|
|
1003
1024
|
actual_path = db_path or os.environ.get("DB_PATH", "./test.db")
|
1025
|
+
if actual_path is None:
|
1026
|
+
actual_path = "./test.db"
|
1027
|
+
|
1004
1028
|
if _db_instance is None or (db_path and db_path != _db_instance.db_path):
|
1005
1029
|
# Close previous instance if it exists
|
1006
1030
|
if _db_instance is not None:
|
{mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/prompts.py
RENAMED
@@ -11,7 +11,7 @@ stored memory content into LLM conversations.
|
|
11
11
|
Author: Robert Meisner
|
12
12
|
"""
|
13
13
|
|
14
|
-
from typing import Optional
|
14
|
+
from typing import Optional, Dict, List, Any, cast
|
15
15
|
from fastmcp import FastMCP
|
16
16
|
from .database import get_database
|
17
17
|
import json
|
@@ -35,7 +35,7 @@ class MemoryBankPrompts:
|
|
35
35
|
|
36
36
|
if table_name:
|
37
37
|
# Analyze specific table
|
38
|
-
result = db.read_rows(table_name, {})
|
38
|
+
result = cast(Dict[str, Any], db.read_rows(table_name, {}))
|
39
39
|
if not result.get("success"):
|
40
40
|
return f"Error: Could not access table '{table_name}'. Please check if it exists."
|
41
41
|
|
@@ -55,7 +55,7 @@ Please provide:
|
|
55
55
|
Focus on actionable insights that could help improve how this information is stored and retrieved."""
|
56
56
|
else:
|
57
57
|
# Analyze all tables
|
58
|
-
tables_result = db.list_tables()
|
58
|
+
tables_result = cast(Dict[str, Any], db.list_tables())
|
59
59
|
if not tables_result.get("success"):
|
60
60
|
return "Error: Could not access memory bank tables."
|
61
61
|
|
@@ -63,10 +63,11 @@ Focus on actionable insights that could help improve how this information is sto
|
|
63
63
|
overview = {"tables": len(tables), "total_content": []}
|
64
64
|
|
65
65
|
for table in tables[:5]: # Limit to first 5 tables
|
66
|
-
rows_result = db.read_rows(table, {})
|
66
|
+
rows_result = cast(Dict[str, Any], db.read_rows(table, {}))
|
67
67
|
if rows_result.get("success"):
|
68
68
|
rows = rows_result.get("rows", [])
|
69
|
-
overview["total_content"]
|
69
|
+
total_content = cast(List[Any], overview["total_content"])
|
70
|
+
total_content.append({
|
70
71
|
"table": table,
|
71
72
|
"rows": len(rows),
|
72
73
|
"sample": rows[:2] if rows else []
|
@@ -93,7 +94,7 @@ Focus on high-level strategic insights about the memory bank's utility and organ
|
|
93
94
|
db = get_database(self.db_path)
|
94
95
|
|
95
96
|
# Perform search
|
96
|
-
result = db.search_content(query, None, max_results or 10)
|
97
|
+
result = cast(Dict[str, Any], db.search_content(query, None, max_results or 10))
|
97
98
|
if not result.get("success"):
|
98
99
|
return f"Error: Could not search for '{query}'. {result.get('error', 'Unknown error')}"
|
99
100
|
|
@@ -130,7 +131,7 @@ Use this information to provide a thorough, well-organized response that synthes
|
|
130
131
|
db = get_database(self.db_path)
|
131
132
|
|
132
133
|
# Try to find technical_decisions table
|
133
|
-
tables_result = db.list_tables()
|
134
|
+
tables_result = cast(Dict[str, Any], db.list_tables())
|
134
135
|
if not tables_result.get("success"):
|
135
136
|
return "Error: Could not access memory bank."
|
136
137
|
|
@@ -162,7 +163,8 @@ The table should include fields like: decision_name, chosen_approach, rationale,
|
|
162
163
|
|
163
164
|
# Format decisions for analysis
|
164
165
|
formatted_decisions = []
|
165
|
-
|
166
|
+
decisions_list = cast(List[Dict[str, Any]], decisions)
|
167
|
+
for i, decision in enumerate(decisions_list, 1):
|
166
168
|
formatted_decisions.append(f"{i}. Decision: {decision.get('decision_name', 'Unknown')}")
|
167
169
|
formatted_decisions.append(f" Approach: {decision.get('chosen_approach', 'Not specified')}")
|
168
170
|
formatted_decisions.append(f" Rationale: {decision.get('rationale', 'Not provided')}")
|
@@ -215,19 +217,21 @@ Focus on actionable insights that can improve technical decision-making processe
|
|
215
217
|
}
|
216
218
|
|
217
219
|
if context_type == "brief":
|
220
|
+
tables_list = cast(List[str], tables)
|
218
221
|
prompt = f"""Memory Bank Context (Brief):
|
219
|
-
Available tables: {', '.join(
|
220
|
-
Total tables: {len(
|
222
|
+
Available tables: {', '.join(tables_list)}
|
223
|
+
Total tables: {len(tables_list)}
|
221
224
|
|
222
225
|
This memory bank contains structured information that can be searched and analyzed. Use the available tools to access specific content as needed."""
|
223
226
|
else:
|
224
227
|
# Get sample content from a few tables
|
225
228
|
sample_content = {}
|
226
|
-
|
229
|
+
tables_list = cast(List[str], tables)
|
230
|
+
for table in tables_list[:3]: # Sample from first 3 tables
|
227
231
|
try:
|
228
|
-
result = db.read_rows(table, {})
|
232
|
+
result = cast(Dict[str, Any], db.read_rows(table, {}))
|
229
233
|
if result.get("success"):
|
230
|
-
rows = result.get("rows", [])
|
234
|
+
rows = cast(List[Any], result.get("rows", []))
|
231
235
|
sample_content[table] = {
|
232
236
|
"row_count": len(rows),
|
233
237
|
"sample_row": rows[0] if rows else None
|
@@ -11,6 +11,7 @@ the standardized MCP protocol.
|
|
11
11
|
Author: Robert Meisner
|
12
12
|
"""
|
13
13
|
|
14
|
+
from typing import Dict, Any, cast
|
14
15
|
from fastmcp import FastMCP
|
15
16
|
from .database import get_database
|
16
17
|
import json
|
@@ -31,7 +32,7 @@ class MemoryBankResources:
|
|
31
32
|
async def get_tables_list() -> str:
|
32
33
|
"""Provide a list of all available tables as an MCP resource."""
|
33
34
|
db = get_database(self.db_path)
|
34
|
-
result = db.list_tables()
|
35
|
+
result = cast(Dict[str, Any], db.list_tables())
|
35
36
|
|
36
37
|
if not result.get("success"):
|
37
38
|
return json.dumps({"error": "Failed to fetch tables", "details": result})
|
@@ -50,7 +51,7 @@ class MemoryBankResources:
|
|
50
51
|
async def get_table_schema(table_name: str) -> str:
|
51
52
|
"""Provide table schema information as an MCP resource."""
|
52
53
|
db = get_database(self.db_path)
|
53
|
-
result = db.describe_table(table_name)
|
54
|
+
result = cast(Dict[str, Any], db.describe_table(table_name))
|
54
55
|
|
55
56
|
if not result.get("success"):
|
56
57
|
return json.dumps({"error": f"Failed to fetch schema for table '{table_name}'", "details": result})
|
@@ -70,7 +71,7 @@ class MemoryBankResources:
|
|
70
71
|
async def get_table_data(table_name: str) -> str:
|
71
72
|
"""Provide table data as an MCP resource."""
|
72
73
|
db = get_database(self.db_path)
|
73
|
-
result = db.read_rows(table_name, {})
|
74
|
+
result = cast(Dict[str, Any], db.read_rows(table_name, {}))
|
74
75
|
|
75
76
|
if not result.get("success"):
|
76
77
|
return json.dumps({"error": f"Failed to fetch data for table '{table_name}'", "details": result})
|
@@ -91,7 +92,7 @@ class MemoryBankResources:
|
|
91
92
|
async def search_memory_content(query: str) -> str:
|
92
93
|
"""Provide search results as an MCP resource."""
|
93
94
|
db = get_database(self.db_path)
|
94
|
-
result = db.search_content(query, None, 50) # Search all tables, limit to 50 results
|
95
|
+
result = cast(Dict[str, Any], db.search_content(query, None, 50)) # Search all tables, limit to 50 results
|
95
96
|
|
96
97
|
if not result.get("success"):
|
97
98
|
return json.dumps({"error": f"Failed to search for '{query}'", "details": result})
|
@@ -114,7 +115,7 @@ class MemoryBankResources:
|
|
114
115
|
db = get_database(self.db_path)
|
115
116
|
|
116
117
|
# Get table list
|
117
|
-
tables_result = db.list_tables()
|
118
|
+
tables_result = cast(Dict[str, Any], db.list_tables())
|
118
119
|
if not tables_result.get("success"):
|
119
120
|
return json.dumps({"error": "Failed to fetch memory overview", "details": tables_result})
|
120
121
|
|
@@ -125,7 +126,7 @@ class MemoryBankResources:
|
|
125
126
|
# Get row counts for each table
|
126
127
|
for table in tables:
|
127
128
|
try:
|
128
|
-
rows_result = db.read_rows(table, {})
|
129
|
+
rows_result = cast(Dict[str, Any], db.read_rows(table, {}))
|
129
130
|
if rows_result.get("success"):
|
130
131
|
row_count = len(rows_result.get("rows", []))
|
131
132
|
table_stats[table] = {
|
@@ -144,13 +145,24 @@ class MemoryBankResources:
|
|
144
145
|
"status": f"error: {str(e)}"
|
145
146
|
}
|
146
147
|
|
148
|
+
# Find largest table
|
149
|
+
largest_table = None
|
150
|
+
if table_stats:
|
151
|
+
max_rows = 0
|
152
|
+
for table_name, stats in table_stats.items():
|
153
|
+
row_count_obj = stats.get("row_count", 0)
|
154
|
+
row_count = int(row_count_obj) if isinstance(row_count_obj, (int, str)) else 0
|
155
|
+
if row_count > max_rows:
|
156
|
+
max_rows = row_count
|
157
|
+
largest_table = table_name
|
158
|
+
|
147
159
|
resource_content = {
|
148
160
|
"resource_type": "memory_overview",
|
149
161
|
"description": "Overview of memory bank contents and usage",
|
150
162
|
"summary": {
|
151
163
|
"total_tables": len(tables),
|
152
164
|
"total_rows": total_rows,
|
153
|
-
"largest_table":
|
165
|
+
"largest_table": largest_table
|
154
166
|
},
|
155
167
|
"table_statistics": table_stats,
|
156
168
|
"last_updated": "dynamic"
|
{mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/semantic.py
RENAMED
@@ -19,8 +19,8 @@ try:
|
|
19
19
|
SENTENCE_TRANSFORMERS_AVAILABLE = True
|
20
20
|
except ImportError:
|
21
21
|
SENTENCE_TRANSFORMERS_AVAILABLE = False
|
22
|
-
SentenceTransformer = None
|
23
|
-
util = None
|
22
|
+
SentenceTransformer = None # type: ignore
|
23
|
+
util = None # type: ignore
|
24
24
|
logging.warning("sentence-transformers not available. Install with: pip install sentence-transformers")
|
25
25
|
|
26
26
|
try:
|
@@ -29,7 +29,7 @@ try:
|
|
29
29
|
TORCH_AVAILABLE = True
|
30
30
|
except ImportError:
|
31
31
|
TORCH_AVAILABLE = False
|
32
|
-
torch = None
|
32
|
+
torch = None # type: ignore
|
33
33
|
logging.warning("torch not available. Install with: pip install torch")
|
34
34
|
|
35
35
|
from .types import ValidationError, DatabaseError
|
@@ -50,7 +50,7 @@ class SemanticSearchEngine:
|
|
50
50
|
"""Initialize the semantic search engine."""
|
51
51
|
self.model_name = model_name
|
52
52
|
self._model = None
|
53
|
-
self._embedding_cache = {}
|
53
|
+
self._embedding_cache: Dict[str, Any] = {}
|
54
54
|
|
55
55
|
if not SENTENCE_TRANSFORMERS_AVAILABLE:
|
56
56
|
raise ValueError(
|
{mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/server.py
RENAMED
@@ -470,7 +470,12 @@ def add_embeddings(
|
|
470
470
|
table_name: str, text_columns: List[str], embedding_column: str = "embedding", model_name: str = "all-MiniLM-L6-v2"
|
471
471
|
) -> ToolResponse:
|
472
472
|
"""
|
473
|
+
⚠️ **ADVANCED TOOL** - Most agents should use auto_smart_search() instead!
|
474
|
+
|
473
475
|
Generate and store vector embeddings for semantic search on table content.
|
476
|
+
|
477
|
+
**RECOMMENDATION**: Use auto_smart_search() or auto_semantic_search() for automatic setup.
|
478
|
+
This tool is for advanced users who need manual control over embedding generation.
|
474
479
|
|
475
480
|
This tool enables intelligent knowledge discovery by creating vector representations
|
476
481
|
of text content that can be searched semantically rather than just by exact keywords.
|
@@ -511,7 +516,12 @@ def semantic_search(
|
|
511
516
|
model_name: str = "all-MiniLM-L6-v2",
|
512
517
|
) -> ToolResponse:
|
513
518
|
"""
|
519
|
+
⚠️ **ADVANCED TOOL** - Most agents should use auto_smart_search() instead!
|
520
|
+
|
514
521
|
Find content using natural language semantic similarity rather than exact keyword matching.
|
522
|
+
|
523
|
+
**RECOMMENDATION**: Use auto_smart_search() for automatic setup and hybrid search capabilities.
|
524
|
+
This tool requires manual embedding setup via add_embeddings() first.
|
515
525
|
|
516
526
|
This enables intelligent knowledge discovery - find related concepts even when
|
517
527
|
they use different terminology or phrasing.
|
@@ -610,7 +620,12 @@ def smart_search(
|
|
610
620
|
model_name: str = "all-MiniLM-L6-v2",
|
611
621
|
) -> ToolResponse:
|
612
622
|
"""
|
623
|
+
⚠️ **ADVANCED TOOL** - Most agents should use auto_smart_search() instead!
|
624
|
+
|
613
625
|
Intelligent hybrid search combining semantic understanding with keyword matching.
|
626
|
+
|
627
|
+
**RECOMMENDATION**: Use auto_smart_search() for the same functionality with automatic setup.
|
628
|
+
This tool requires manual embedding setup via add_embeddings() first.
|
614
629
|
|
615
630
|
Provides the best of both worlds - semantic similarity for concept discovery
|
616
631
|
plus exact text matching for precise searches.
|
@@ -639,7 +654,7 @@ def smart_search(
|
|
639
654
|
- Provides separate scores for transparency
|
640
655
|
- Falls back gracefully if semantic search unavailable
|
641
656
|
- Optimal for both exploratory and precise searches
|
642
|
-
-
|
657
|
+
- Perfect for agents - ultimate search tool that just works!
|
643
658
|
"""
|
644
659
|
return cast(
|
645
660
|
ToolResponse,
|
@@ -649,8 +664,250 @@ def smart_search(
|
|
649
664
|
)
|
650
665
|
|
651
666
|
|
667
|
+
# --- Auto-Embedding Semantic Search Tools ---
|
668
|
+
|
669
|
+
|
670
|
+
@mcp.tool
|
671
|
+
@catch_errors
|
672
|
+
def auto_semantic_search(
|
673
|
+
query: str,
|
674
|
+
tables: Optional[List[str]] = None,
|
675
|
+
similarity_threshold: float = 0.5,
|
676
|
+
limit: int = 10,
|
677
|
+
model_name: str = "all-MiniLM-L6-v2",
|
678
|
+
) -> ToolResponse:
|
679
|
+
"""
|
680
|
+
🚀 **ZERO-SETUP SEMANTIC SEARCH** - Just search, embeddings are handled automatically!
|
681
|
+
|
682
|
+
Find content using natural language semantic similarity. If embeddings don't exist,
|
683
|
+
they will be automatically generated for text columns. This is the easiest way to
|
684
|
+
do semantic search - no manual setup required!
|
685
|
+
|
686
|
+
Args:
|
687
|
+
query (str): Natural language search query
|
688
|
+
tables (Optional[List[str]]): Specific tables to search (default: all tables)
|
689
|
+
similarity_threshold (float): Minimum similarity score (0.0-1.0, default: 0.5)
|
690
|
+
limit (int): Maximum number of results to return (default: 10)
|
691
|
+
model_name (str): Model to use for embeddings (default: "all-MiniLM-L6-v2")
|
692
|
+
|
693
|
+
Returns:
|
694
|
+
ToolResponse: On success: {"success": True, "results": List[...], "auto_embedded_tables": List[str]}
|
695
|
+
On error: {"success": False, "error": str, "category": str, "details": dict}
|
696
|
+
|
697
|
+
Examples:
|
698
|
+
>>> auto_semantic_search("API design patterns")
|
699
|
+
{"success": True, "results": [
|
700
|
+
{"table_name": "technical_decisions", "similarity_score": 0.87, "decision_name": "REST API Structure", ...}
|
701
|
+
], "auto_embedded_tables": ["technical_decisions"]}
|
702
|
+
|
703
|
+
>>> auto_semantic_search("machine learning concepts")
|
704
|
+
# Finds content about "ML", "AI", "neural networks", etc.
|
705
|
+
# Automatically creates embeddings if they don't exist!
|
706
|
+
|
707
|
+
FastMCP Tool Info:
|
708
|
+
- **COMPLETELY AUTOMATIC**: No manual embedding setup required
|
709
|
+
- Auto-detects text columns and creates embeddings as needed
|
710
|
+
- Works across multiple tables simultaneously
|
711
|
+
- Finds conceptually similar content regardless of exact wording
|
712
|
+
- Returns relevance scores for ranking results
|
713
|
+
- Supports fuzzy matching and concept discovery
|
714
|
+
- Perfect for agents - just search and it works!
|
715
|
+
"""
|
716
|
+
try:
|
717
|
+
db = get_database(DB_PATH)
|
718
|
+
auto_embedded_tables: List[str] = []
|
719
|
+
|
720
|
+
# Get tables to search
|
721
|
+
search_tables: List[str]
|
722
|
+
if tables:
|
723
|
+
search_tables = tables
|
724
|
+
else:
|
725
|
+
tables_result = db.list_tables()
|
726
|
+
if not tables_result.get("success"):
|
727
|
+
return cast(ToolResponse, tables_result)
|
728
|
+
all_tables = tables_result.get("tables", [])
|
729
|
+
if isinstance(all_tables, list):
|
730
|
+
search_tables = all_tables
|
731
|
+
else:
|
732
|
+
search_tables = []
|
733
|
+
|
734
|
+
# Auto-embed text columns in tables that don't have embeddings
|
735
|
+
for table_name in search_tables:
|
736
|
+
try:
|
737
|
+
# Check if table has embeddings
|
738
|
+
stats_result = db.get_embedding_stats(table_name, "embedding")
|
739
|
+
coverage_percent = stats_result.get("coverage_percent", 0)
|
740
|
+
if stats_result.get("success") and isinstance(coverage_percent, (int, float)) and coverage_percent > 0:
|
741
|
+
continue # Table already has embeddings
|
742
|
+
|
743
|
+
# Get table schema to find text columns
|
744
|
+
schema_result = db.describe_table(table_name)
|
745
|
+
if not schema_result.get("success"):
|
746
|
+
continue
|
747
|
+
|
748
|
+
# Find text columns
|
749
|
+
text_columns = []
|
750
|
+
columns = schema_result.get("columns", [])
|
751
|
+
if isinstance(columns, list):
|
752
|
+
for col in columns:
|
753
|
+
if isinstance(col, dict) and "TEXT" in col.get("type", "").upper():
|
754
|
+
text_columns.append(col["name"])
|
755
|
+
|
756
|
+
# Auto-embed text columns
|
757
|
+
if text_columns:
|
758
|
+
embed_result = db.generate_embeddings(table_name, text_columns, "embedding", model_name)
|
759
|
+
if embed_result.get("success"):
|
760
|
+
auto_embedded_tables.append(table_name)
|
761
|
+
|
762
|
+
except Exception:
|
763
|
+
# If auto-embedding fails, continue without it
|
764
|
+
continue
|
765
|
+
|
766
|
+
# Perform semantic search
|
767
|
+
search_result = db.semantic_search(
|
768
|
+
query, search_tables, "embedding", None, similarity_threshold, limit, model_name
|
769
|
+
)
|
770
|
+
|
771
|
+
# Add auto-embedding info to result
|
772
|
+
if isinstance(search_result, dict):
|
773
|
+
search_result["auto_embedded_tables"] = auto_embedded_tables
|
774
|
+
if auto_embedded_tables:
|
775
|
+
search_result["auto_embedding_note"] = f"Automatically generated embeddings for {len(auto_embedded_tables)} table(s)"
|
776
|
+
|
777
|
+
return cast(ToolResponse, search_result)
|
778
|
+
|
779
|
+
except Exception as e:
|
780
|
+
return cast(ToolResponse, {
|
781
|
+
"success": False,
|
782
|
+
"error": f"Auto semantic search failed: {str(e)}",
|
783
|
+
"category": "SEMANTIC_SEARCH_ERROR",
|
784
|
+
"details": {"query": query, "tables": tables}
|
785
|
+
})
|
786
|
+
|
787
|
+
|
652
788
|
@mcp.tool
|
653
789
|
@catch_errors
|
790
|
+
def auto_smart_search(
|
791
|
+
query: str,
|
792
|
+
tables: Optional[List[str]] = None,
|
793
|
+
semantic_weight: float = 0.7,
|
794
|
+
text_weight: float = 0.3,
|
795
|
+
limit: int = 10,
|
796
|
+
model_name: str = "all-MiniLM-L6-v2",
|
797
|
+
) -> ToolResponse:
|
798
|
+
"""
|
799
|
+
🚀 **ZERO-SETUP HYBRID SEARCH** - Best of both worlds with automatic embedding!
|
800
|
+
|
801
|
+
Intelligent hybrid search combining semantic understanding with keyword matching.
|
802
|
+
Automatically generates embeddings for text columns when needed. This is the
|
803
|
+
ultimate search tool - no manual setup required!
|
804
|
+
|
805
|
+
Args:
|
806
|
+
query (str): Search query (natural language or keywords)
|
807
|
+
tables (Optional[List[str]]): Tables to search (default: all)
|
808
|
+
semantic_weight (float): Weight for semantic similarity (0.0-1.0, default: 0.7)
|
809
|
+
text_weight (float): Weight for keyword matching (0.0-1.0, default: 0.3)
|
810
|
+
limit (int): Maximum results (default: 10)
|
811
|
+
model_name (str): Semantic model to use (default: "all-MiniLM-L6-v2")
|
812
|
+
|
813
|
+
Returns:
|
814
|
+
ToolResponse: On success: {"success": True, "results": List[...], "search_type": "auto_hybrid"}
|
815
|
+
On error: {"success": False, "error": str, "category": str, "details": dict}
|
816
|
+
|
817
|
+
Examples:
|
818
|
+
>>> auto_smart_search("user authentication security")
|
819
|
+
{"success": True, "results": [
|
820
|
+
{"combined_score": 0.89, "semantic_score": 0.92, "text_score": 0.82, ...}
|
821
|
+
], "search_type": "auto_hybrid", "auto_embedded_tables": ["user_data"]}
|
822
|
+
|
823
|
+
FastMCP Tool Info:
|
824
|
+
- **COMPLETELY AUTOMATIC**: No manual embedding setup required
|
825
|
+
- Automatically balances semantic and keyword search
|
826
|
+
- Auto-detects text columns and creates embeddings as needed
|
827
|
+
- Provides separate scores for transparency
|
828
|
+
- Falls back gracefully if semantic search unavailable
|
829
|
+
- Optimal for both exploratory and precise searches
|
830
|
+
- Perfect for agents - ultimate search tool that just works!
|
831
|
+
"""
|
832
|
+
try:
|
833
|
+
db = get_database(DB_PATH)
|
834
|
+
auto_embedded_tables: List[str] = []
|
835
|
+
|
836
|
+
# Get tables to search
|
837
|
+
search_tables: List[str]
|
838
|
+
if tables:
|
839
|
+
search_tables = tables
|
840
|
+
else:
|
841
|
+
tables_result = db.list_tables()
|
842
|
+
if not tables_result.get("success"):
|
843
|
+
return cast(ToolResponse, tables_result)
|
844
|
+
all_tables = tables_result.get("tables", [])
|
845
|
+
if isinstance(all_tables, list):
|
846
|
+
search_tables = all_tables
|
847
|
+
else:
|
848
|
+
search_tables = []
|
849
|
+
|
850
|
+
# Auto-embed text columns in tables that don't have embeddings
|
851
|
+
for table_name in search_tables:
|
852
|
+
try:
|
853
|
+
# Check if table has embeddings
|
854
|
+
stats_result = db.get_embedding_stats(table_name, "embedding")
|
855
|
+
coverage_percent = stats_result.get("coverage_percent", 0)
|
856
|
+
if stats_result.get("success") and isinstance(coverage_percent, (int, float)) and coverage_percent > 0:
|
857
|
+
continue # Table already has embeddings
|
858
|
+
|
859
|
+
# Get table schema to find text columns
|
860
|
+
schema_result = db.describe_table(table_name)
|
861
|
+
if not schema_result.get("success"):
|
862
|
+
continue
|
863
|
+
|
864
|
+
# Find text columns
|
865
|
+
text_columns = []
|
866
|
+
columns = schema_result.get("columns", [])
|
867
|
+
if isinstance(columns, list):
|
868
|
+
for col in columns:
|
869
|
+
if isinstance(col, dict) and "TEXT" in col.get("type", "").upper():
|
870
|
+
text_columns.append(col["name"])
|
871
|
+
|
872
|
+
# Auto-embed text columns
|
873
|
+
if text_columns:
|
874
|
+
embed_result = db.generate_embeddings(table_name, text_columns, "embedding", model_name)
|
875
|
+
if embed_result.get("success"):
|
876
|
+
auto_embedded_tables.append(table_name)
|
877
|
+
|
878
|
+
except Exception:
|
879
|
+
# If auto-embedding fails, continue without it
|
880
|
+
continue
|
881
|
+
|
882
|
+
# Now perform hybrid search
|
883
|
+
db = get_database(DB_PATH)
|
884
|
+
hybrid_result = db.hybrid_search(
|
885
|
+
query, tables, None, "embedding", semantic_weight, text_weight, limit, model_name
|
886
|
+
)
|
887
|
+
|
888
|
+
# Add auto-embedding info to result
|
889
|
+
if isinstance(hybrid_result, dict) and hybrid_result.get("success"):
|
890
|
+
# Convert to mutable dict to add extra fields
|
891
|
+
final_result = dict(hybrid_result)
|
892
|
+
final_result["search_type"] = "auto_hybrid"
|
893
|
+
final_result["auto_embedded_tables"] = auto_embedded_tables
|
894
|
+
if auto_embedded_tables:
|
895
|
+
final_result["auto_embedding_note"] = f"Automatically generated embeddings for {len(auto_embedded_tables)} table(s)"
|
896
|
+
return cast(ToolResponse, final_result)
|
897
|
+
else:
|
898
|
+
return cast(ToolResponse, hybrid_result)
|
899
|
+
|
900
|
+
except Exception as e:
|
901
|
+
return cast(ToolResponse, {
|
902
|
+
"success": False,
|
903
|
+
"error": f"Auto smart search failed: {str(e)}",
|
904
|
+
"category": "HYBRID_SEARCH_ERROR",
|
905
|
+
"details": {"query": query, "tables": tables}
|
906
|
+
})
|
907
|
+
|
908
|
+
|
909
|
+
@mcp.tool
|
910
|
+
@catch_errors
|
654
911
|
def embedding_stats(table_name: str, embedding_column: str = "embedding") -> ToolResponse:
|
655
912
|
"""
|
656
913
|
Get statistics about semantic search readiness for a table.
|
@@ -672,7 +929,7 @@ def embedding_stats(table_name: str, embedding_column: str = "embedding") -> Too
|
|
672
929
|
|
673
930
|
FastMCP Tool Info:
|
674
931
|
- Shows how much content is ready for semantic search
|
675
|
-
- Helps identify tables that need embedding generation
|
932
|
+
- Helps identify tables that need embedding generation
|
676
933
|
- Provides embedding dimension info for debugging
|
677
934
|
- Useful for monitoring semantic search capabilities
|
678
935
|
"""
|
@@ -792,9 +1049,6 @@ def get_tools_by_category(category: str) -> ToolResponse:
|
|
792
1049
|
})
|
793
1050
|
|
794
1051
|
|
795
|
-
# ...existing code...
|
796
|
-
|
797
|
-
|
798
1052
|
# Export the FastMCP app for use in other modules and server runners
|
799
1053
|
app = mcp
|
800
1054
|
|
{mcp_sqlite_memory_bank-1.4.1 → mcp_sqlite_memory_bank-1.4.3}/src/mcp_sqlite_memory_bank/types.py
RENAMED
@@ -180,7 +180,7 @@ class ExploreTablesResponse(SuccessResponse):
|
|
180
180
|
|
181
181
|
|
182
182
|
# Semantic Search Response Types
|
183
|
-
class SemanticSearchResponse(TypedDict):
|
183
|
+
class SemanticSearchResponse(TypedDict, total=False):
|
184
184
|
"""Response type for semantic search operations."""
|
185
185
|
|
186
186
|
success: bool
|
@@ -190,6 +190,8 @@ class SemanticSearchResponse(TypedDict):
|
|
190
190
|
total_results: int
|
191
191
|
model: str
|
192
192
|
similarity_threshold: float
|
193
|
+
auto_embedded_tables: List[str] # Tables that had embeddings auto-generated
|
194
|
+
auto_embedding_note: str # Message about auto-embedding
|
193
195
|
|
194
196
|
|
195
197
|
class RelatedContentResponse(TypedDict, total=False):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mcp_sqlite_memory_bank
|
3
|
-
Version: 1.4.
|
3
|
+
Version: 1.4.3
|
4
4
|
Summary: A dynamic, agent/LLM-friendly SQLite memory bank for MCP servers with semantic search capabilities.
|
5
5
|
Author-email: Robert Meisner <robert@catchit.pl>
|
6
6
|
License-Expression: MIT
|