mcp-sqlite-memory-bank 1.5.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_sqlite_memory_bank/__init__.py +2 -2
- mcp_sqlite_memory_bank/__main__.py +68 -0
- mcp_sqlite_memory_bank/database.py +234 -68
- mcp_sqlite_memory_bank/prompts.py +76 -52
- mcp_sqlite_memory_bank/resources.py +250 -150
- mcp_sqlite_memory_bank/semantic.py +50 -17
- mcp_sqlite_memory_bank/server.py +351 -23
- mcp_sqlite_memory_bank/tools/__init__.py +33 -25
- mcp_sqlite_memory_bank/tools/analytics.py +225 -139
- mcp_sqlite_memory_bank/tools/basic.py +417 -7
- mcp_sqlite_memory_bank/tools/discovery.py +1428 -0
- mcp_sqlite_memory_bank/tools/search.py +159 -72
- mcp_sqlite_memory_bank/types.py +6 -1
- mcp_sqlite_memory_bank/utils.py +165 -107
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/METADATA +54 -6
- mcp_sqlite_memory_bank-1.6.0.dist-info/RECORD +21 -0
- mcp_sqlite_memory_bank-1.5.0.dist-info/RECORD +0 -19
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/WHEEL +0 -0
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/entry_points.txt +0 -0
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {mcp_sqlite_memory_bank-1.5.0.dist-info → mcp_sqlite_memory_bank-1.6.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,19 @@ import json
|
|
12
12
|
import logging
|
13
13
|
from functools import wraps
|
14
14
|
from typing import Dict, List, Any, Optional, Callable, cast
|
15
|
-
from sqlalchemy import
|
15
|
+
from sqlalchemy import (
|
16
|
+
create_engine,
|
17
|
+
MetaData,
|
18
|
+
Table,
|
19
|
+
select,
|
20
|
+
insert,
|
21
|
+
update,
|
22
|
+
delete,
|
23
|
+
text,
|
24
|
+
inspect,
|
25
|
+
and_,
|
26
|
+
or_,
|
27
|
+
)
|
16
28
|
from sqlalchemy.engine import Engine
|
17
29
|
from sqlalchemy.exc import SQLAlchemyError
|
18
30
|
from contextlib import contextmanager
|
@@ -63,7 +75,7 @@ class SQLiteMemoryDatabase:
|
|
63
75
|
except Exception as e:
|
64
76
|
logging.warning(f"Error closing database: {e}")
|
65
77
|
|
66
|
-
def __del__(self):
|
78
|
+
def __del__(self) -> None:
|
67
79
|
"""Ensure cleanup when object is garbage collected."""
|
68
80
|
self.close()
|
69
81
|
|
@@ -76,7 +88,7 @@ class SQLiteMemoryDatabase:
|
|
76
88
|
logging.warning(f"Failed to refresh metadata: {e}")
|
77
89
|
|
78
90
|
@contextmanager
|
79
|
-
def get_connection(self):
|
91
|
+
def get_connection(self) -> Any:
|
80
92
|
"""Get a database connection with automatic cleanup."""
|
81
93
|
conn = self.engine.connect()
|
82
94
|
try:
|
@@ -96,12 +108,17 @@ class SQLiteMemoryDatabase:
|
|
96
108
|
|
97
109
|
return self.metadata.tables[table_name]
|
98
110
|
|
99
|
-
def _validate_columns(
|
111
|
+
def _validate_columns(
|
112
|
+
self, table: Table, column_names: List[str], context: str = "operation"
|
113
|
+
) -> None:
|
100
114
|
"""Validate that all column names exist in the table."""
|
101
115
|
valid_columns = set(col.name for col in table.columns)
|
102
116
|
for col_name in column_names:
|
103
117
|
if col_name not in valid_columns:
|
104
|
-
raise ValidationError(
|
118
|
+
raise ValidationError(
|
119
|
+
f"Invalid column '{col_name}' for table "
|
120
|
+
f"'{table.name}' in {context}"
|
121
|
+
)
|
105
122
|
|
106
123
|
def _build_where_conditions(self, table: Table, where: Dict[str, Any]) -> List:
|
107
124
|
"""Build SQLAlchemy WHERE conditions from a dictionary."""
|
@@ -135,7 +152,9 @@ class SQLiteMemoryDatabase:
|
|
135
152
|
|
136
153
|
return decorator
|
137
154
|
|
138
|
-
def create_table(
|
155
|
+
def create_table(
|
156
|
+
self, table_name: str, columns: List[Dict[str, str]]
|
157
|
+
) -> ToolResponse:
|
139
158
|
"""Create a new table with the specified columns."""
|
140
159
|
# Input validation
|
141
160
|
if not table_name or not table_name.isidentifier():
|
@@ -170,7 +189,11 @@ class SQLiteMemoryDatabase:
|
|
170
189
|
try:
|
171
190
|
with self.get_connection() as conn:
|
172
191
|
inspector = inspect(conn)
|
173
|
-
tables = [
|
192
|
+
tables = [
|
193
|
+
name
|
194
|
+
for name in inspector.get_table_names()
|
195
|
+
if not name.startswith("sqlite_")
|
196
|
+
]
|
174
197
|
return {"success": True, "tables": tables}
|
175
198
|
except SQLAlchemyError as e:
|
176
199
|
raise DatabaseError(f"Failed to list tables: {str(e)}")
|
@@ -229,7 +252,9 @@ class SQLiteMemoryDatabase:
|
|
229
252
|
except (ValidationError, SQLAlchemyError) as e:
|
230
253
|
if isinstance(e, ValidationError):
|
231
254
|
raise e
|
232
|
-
raise DatabaseError(
|
255
|
+
raise DatabaseError(
|
256
|
+
f"Failed to rename table from {old_name} to {new_name}: {str(e)}"
|
257
|
+
)
|
233
258
|
|
234
259
|
def insert_row(self, table_name: str, data: Dict[str, Any]) -> ToolResponse:
|
235
260
|
"""Insert a row into a table."""
|
@@ -248,7 +273,10 @@ class SQLiteMemoryDatabase:
|
|
248
273
|
raise DatabaseError(f"Failed to insert into table {table_name}: {str(e)}")
|
249
274
|
|
250
275
|
def read_rows(
|
251
|
-
self,
|
276
|
+
self,
|
277
|
+
table_name: str,
|
278
|
+
where: Optional[Dict[str, Any]] = None,
|
279
|
+
limit: Optional[int] = None,
|
252
280
|
) -> ToolResponse:
|
253
281
|
"""Read rows from a table with optional filtering."""
|
254
282
|
try:
|
@@ -275,7 +303,10 @@ class SQLiteMemoryDatabase:
|
|
275
303
|
raise DatabaseError(f"Failed to read from table {table_name}: {str(e)}")
|
276
304
|
|
277
305
|
def update_rows(
|
278
|
-
self,
|
306
|
+
self,
|
307
|
+
table_name: str,
|
308
|
+
data: Dict[str, Any],
|
309
|
+
where: Optional[Dict[str, Any]] = None,
|
279
310
|
) -> ToolResponse:
|
280
311
|
"""Update rows in a table."""
|
281
312
|
if not data:
|
@@ -299,7 +330,9 @@ class SQLiteMemoryDatabase:
|
|
299
330
|
raise e
|
300
331
|
raise DatabaseError(f"Failed to update table {table_name}: {str(e)}")
|
301
332
|
|
302
|
-
def delete_rows(
|
333
|
+
def delete_rows(
|
334
|
+
self, table_name: str, where: Optional[Dict[str, Any]] = None
|
335
|
+
) -> ToolResponse:
|
303
336
|
"""Delete rows from a table."""
|
304
337
|
try:
|
305
338
|
table = self._ensure_table_exists(table_name)
|
@@ -310,7 +343,9 @@ class SQLiteMemoryDatabase:
|
|
310
343
|
if conditions:
|
311
344
|
stmt = stmt.where(and_(*conditions))
|
312
345
|
else:
|
313
|
-
logging.warning(
|
346
|
+
logging.warning(
|
347
|
+
f"delete_rows called without WHERE clause on table {table_name}"
|
348
|
+
)
|
314
349
|
|
315
350
|
result = self._execute_with_commit(stmt)
|
316
351
|
return {"success": True, "rows_affected": result.rowcount}
|
@@ -363,13 +398,16 @@ class SQLiteMemoryDatabase:
|
|
363
398
|
try:
|
364
399
|
self._refresh_metadata()
|
365
400
|
schemas = {
|
366
|
-
table_name: [col.name for col in table.columns]
|
401
|
+
table_name: [col.name for col in table.columns]
|
402
|
+
for table_name, table in self.metadata.tables.items()
|
367
403
|
}
|
368
404
|
return {"success": True, "schemas": schemas}
|
369
405
|
except SQLAlchemyError as e:
|
370
406
|
raise DatabaseError(f"Failed to list all columns: {str(e)}")
|
371
407
|
|
372
|
-
def search_content(
|
408
|
+
def search_content(
|
409
|
+
self, query: str, tables: Optional[List[str]] = None, limit: int = 50
|
410
|
+
) -> ToolResponse:
|
373
411
|
"""Perform full-text search across table content."""
|
374
412
|
if not query or not query.strip():
|
375
413
|
raise ValidationError("Search query cannot be empty")
|
@@ -390,7 +428,8 @@ class SQLiteMemoryDatabase:
|
|
390
428
|
text_columns = [
|
391
429
|
col
|
392
430
|
for col in table.columns
|
393
|
-
if "TEXT" in str(col.type).upper()
|
431
|
+
if "TEXT" in str(col.type).upper()
|
432
|
+
or "VARCHAR" in str(col.type).upper()
|
394
433
|
]
|
395
434
|
|
396
435
|
if not text_columns:
|
@@ -413,42 +452,72 @@ class SQLiteMemoryDatabase:
|
|
413
452
|
if col.name in row_dict and row_dict[col.name]:
|
414
453
|
content = str(row_dict[col.name]).lower()
|
415
454
|
content_length = len(content)
|
416
|
-
|
455
|
+
|
417
456
|
if query_lower in content:
|
418
457
|
# Factor 1: Exact phrase frequency (weighted higher)
|
419
458
|
exact_frequency = content.count(query_lower)
|
420
|
-
exact_score = (
|
421
|
-
|
459
|
+
exact_score = (
|
460
|
+
(exact_frequency * 2.0) / content_length
|
461
|
+
if content_length > 0
|
462
|
+
else 0
|
463
|
+
)
|
464
|
+
|
422
465
|
# Factor 2: Individual term frequency
|
423
466
|
term_score = 0.0
|
424
467
|
for term in query_terms:
|
425
468
|
if term in content:
|
426
|
-
term_score +=
|
427
|
-
|
469
|
+
term_score += (
|
470
|
+
content.count(term) / content_length
|
471
|
+
if content_length > 0
|
472
|
+
else 0
|
473
|
+
)
|
474
|
+
|
428
475
|
# Factor 3: Position bonus (early matches score higher)
|
429
476
|
position_bonus = 0.0
|
430
477
|
first_occurrence = content.find(query_lower)
|
431
478
|
if first_occurrence != -1:
|
432
|
-
position_bonus = (
|
433
|
-
|
479
|
+
position_bonus = (
|
480
|
+
(content_length - first_occurrence)
|
481
|
+
/ content_length
|
482
|
+
* 0.1
|
483
|
+
)
|
484
|
+
|
434
485
|
# Factor 4: Column importance (title/name columns get bonus)
|
435
486
|
column_bonus = 0.0
|
436
|
-
if any(
|
487
|
+
if any(
|
488
|
+
keyword in col.name.lower()
|
489
|
+
for keyword in [
|
490
|
+
"title",
|
491
|
+
"name",
|
492
|
+
"summary",
|
493
|
+
"description",
|
494
|
+
]
|
495
|
+
):
|
437
496
|
column_bonus = 0.2
|
438
|
-
|
497
|
+
|
439
498
|
# Combined relevance score
|
440
|
-
col_relevance =
|
499
|
+
col_relevance = (
|
500
|
+
exact_score
|
501
|
+
+ term_score
|
502
|
+
+ position_bonus
|
503
|
+
+ column_bonus
|
504
|
+
)
|
441
505
|
relevance_scores.append(col_relevance)
|
442
|
-
|
506
|
+
|
443
507
|
# Enhanced matched content with context
|
444
508
|
snippet_start = max(0, first_occurrence - 50)
|
445
|
-
snippet_end = min(
|
446
|
-
|
509
|
+
snippet_end = min(
|
510
|
+
len(row_dict[col.name]),
|
511
|
+
first_occurrence + len(query) + 50,
|
512
|
+
)
|
513
|
+
snippet = str(row_dict[col.name])[
|
514
|
+
snippet_start:snippet_end
|
515
|
+
]
|
447
516
|
if snippet_start > 0:
|
448
517
|
snippet = "..." + snippet
|
449
518
|
if snippet_end < len(str(row_dict[col.name])):
|
450
519
|
snippet = snippet + "..."
|
451
|
-
|
520
|
+
|
452
521
|
matched_content.append(f"{col.name}: {snippet}")
|
453
522
|
|
454
523
|
total_relevance = sum(relevance_scores)
|
@@ -460,8 +529,14 @@ class SQLiteMemoryDatabase:
|
|
460
529
|
"row_data": row_dict,
|
461
530
|
"matched_content": matched_content,
|
462
531
|
"relevance": round(total_relevance, 4),
|
463
|
-
"match_quality":
|
464
|
-
|
532
|
+
"match_quality": (
|
533
|
+
"high"
|
534
|
+
if total_relevance > 0.5
|
535
|
+
else (
|
536
|
+
"medium" if total_relevance > 0.1 else "low"
|
537
|
+
)
|
538
|
+
),
|
539
|
+
"match_count": len(relevance_scores),
|
465
540
|
}
|
466
541
|
)
|
467
542
|
|
@@ -471,6 +546,7 @@ class SQLiteMemoryDatabase:
|
|
471
546
|
if isinstance(rel, (int, float)):
|
472
547
|
return float(rel)
|
473
548
|
return 0.0
|
549
|
+
|
474
550
|
results.sort(key=get_relevance, reverse=True)
|
475
551
|
results = results[:limit]
|
476
552
|
|
@@ -486,16 +562,24 @@ class SQLiteMemoryDatabase:
|
|
486
562
|
raise e
|
487
563
|
raise DatabaseError(f"Failed to search content: {str(e)}")
|
488
564
|
|
489
|
-
def explore_tables(
|
565
|
+
def explore_tables(
|
566
|
+
self, pattern: Optional[str] = None, include_row_counts: bool = True
|
567
|
+
) -> ToolResponse:
|
490
568
|
"""Explore table structures and content."""
|
491
569
|
try:
|
492
570
|
self._refresh_metadata()
|
493
571
|
table_names = list(self.metadata.tables.keys())
|
494
572
|
|
495
573
|
if pattern:
|
496
|
-
table_names = [
|
497
|
-
|
498
|
-
|
574
|
+
table_names = [
|
575
|
+
name for name in table_names if pattern.replace("%", "") in name
|
576
|
+
]
|
577
|
+
|
578
|
+
exploration: Dict[str, Any] = {
|
579
|
+
"tables": [],
|
580
|
+
"total_tables": len(table_names),
|
581
|
+
"total_rows": 0,
|
582
|
+
}
|
499
583
|
|
500
584
|
with self.get_connection() as conn:
|
501
585
|
for table_name in table_names:
|
@@ -515,31 +599,48 @@ class SQLiteMemoryDatabase:
|
|
515
599
|
}
|
516
600
|
columns.append(col_data)
|
517
601
|
|
518
|
-
if
|
602
|
+
if (
|
603
|
+
"TEXT" in str(col.type).upper()
|
604
|
+
or "VARCHAR" in str(col.type).upper()
|
605
|
+
):
|
519
606
|
text_columns.append(col.name)
|
520
607
|
|
521
|
-
table_info: Dict[str, Any] = {
|
608
|
+
table_info: Dict[str, Any] = {
|
609
|
+
"name": table_name,
|
610
|
+
"columns": columns,
|
611
|
+
"text_columns": text_columns,
|
612
|
+
}
|
522
613
|
|
523
614
|
# Add row count if requested
|
524
615
|
if include_row_counts:
|
525
|
-
count_result = conn.execute(
|
616
|
+
count_result = conn.execute(
|
617
|
+
select(text("COUNT(*)")).select_from(table)
|
618
|
+
)
|
526
619
|
row_count = count_result.scalar()
|
527
620
|
table_info["row_count"] = row_count
|
528
621
|
exploration["total_rows"] += row_count
|
529
622
|
|
530
623
|
# Add sample data
|
531
624
|
sample_result = conn.execute(select(table).limit(3))
|
532
|
-
sample_rows = [
|
625
|
+
sample_rows = [
|
626
|
+
dict(row._mapping) for row in sample_result.fetchall()
|
627
|
+
]
|
533
628
|
if sample_rows:
|
534
629
|
table_info["sample_data"] = sample_rows
|
535
630
|
|
536
631
|
# Add content preview for text columns
|
537
632
|
if text_columns:
|
538
633
|
content_preview: Dict[str, List[Any]] = {}
|
539
|
-
for col_name in text_columns[
|
634
|
+
for col_name in text_columns[
|
635
|
+
:3
|
636
|
+
]: # Limit to first 3 text columns
|
540
637
|
col = table.c[col_name]
|
541
|
-
preview_result = conn.execute(
|
542
|
-
|
638
|
+
preview_result = conn.execute(
|
639
|
+
select(col).distinct().where(col.isnot(None)).limit(5)
|
640
|
+
)
|
641
|
+
unique_values: List[Any] = [
|
642
|
+
row[0] for row in preview_result.fetchall() if row[0]
|
643
|
+
]
|
543
644
|
if unique_values:
|
544
645
|
content_preview[col_name] = unique_values
|
545
646
|
|
@@ -554,22 +655,32 @@ class SQLiteMemoryDatabase:
|
|
554
655
|
|
555
656
|
# --- Semantic Search Methods ---
|
556
657
|
|
557
|
-
def add_embedding_column(
|
658
|
+
def add_embedding_column(
|
659
|
+
self, table_name: str, embedding_column: str = "embedding"
|
660
|
+
) -> EmbeddingColumnResponse:
|
558
661
|
"""Add an embedding column to a table for semantic search."""
|
559
662
|
try:
|
560
663
|
table = self._ensure_table_exists(table_name)
|
561
664
|
|
562
665
|
# Check if embedding column already exists
|
563
666
|
if embedding_column in [col.name for col in table.columns]:
|
564
|
-
return {
|
667
|
+
return {
|
668
|
+
"success": True,
|
669
|
+
"message": f"Embedding column '{embedding_column}' already exists",
|
670
|
+
}
|
565
671
|
|
566
672
|
# Add embedding column as TEXT (JSON storage)
|
567
673
|
with self.get_connection() as conn:
|
568
|
-
conn.execute(
|
674
|
+
conn.execute(
|
675
|
+
text(f"ALTER TABLE {table_name} ADD COLUMN {embedding_column} TEXT")
|
676
|
+
)
|
569
677
|
conn.commit()
|
570
678
|
|
571
679
|
self._refresh_metadata()
|
572
|
-
return {
|
680
|
+
return {
|
681
|
+
"success": True,
|
682
|
+
"message": f"Added embedding column '{embedding_column}' to table '{table_name}'",
|
683
|
+
}
|
573
684
|
|
574
685
|
except (ValidationError, SQLAlchemyError) as e:
|
575
686
|
if isinstance(e, ValidationError):
|
@@ -586,7 +697,9 @@ class SQLiteMemoryDatabase:
|
|
586
697
|
) -> GenerateEmbeddingsResponse:
|
587
698
|
"""Generate embeddings for text content in a table."""
|
588
699
|
if not is_semantic_search_available():
|
589
|
-
raise ValidationError(
|
700
|
+
raise ValidationError(
|
701
|
+
"Semantic search is not available. Please install sentence-transformers."
|
702
|
+
)
|
590
703
|
|
591
704
|
try:
|
592
705
|
table = self._ensure_table_exists(table_name)
|
@@ -596,7 +709,9 @@ class SQLiteMemoryDatabase:
|
|
596
709
|
table_columns = [col.name for col in table.columns]
|
597
710
|
for col in text_columns:
|
598
711
|
if col not in table_columns:
|
599
|
-
raise ValidationError(
|
712
|
+
raise ValidationError(
|
713
|
+
f"Column '{col}' not found in table '{table_name}'"
|
714
|
+
)
|
600
715
|
|
601
716
|
# Add embedding column if it doesn't exist
|
602
717
|
if embedding_column not in table_columns:
|
@@ -642,7 +757,9 @@ class SQLiteMemoryDatabase:
|
|
642
757
|
combined_text = " ".join(text_parts)
|
643
758
|
|
644
759
|
# Generate embedding
|
645
|
-
embedding = semantic_engine.generate_embedding(
|
760
|
+
embedding = semantic_engine.generate_embedding(
|
761
|
+
combined_text
|
762
|
+
)
|
646
763
|
embedding_json = json.dumps(embedding)
|
647
764
|
|
648
765
|
# Update row with embedding
|
@@ -656,14 +773,17 @@ class SQLiteMemoryDatabase:
|
|
656
773
|
processed += 1
|
657
774
|
|
658
775
|
conn.commit()
|
659
|
-
logging.info(
|
776
|
+
logging.info(
|
777
|
+
f"Generated embeddings for batch {i//batch_size + 1}, processed {processed} rows"
|
778
|
+
)
|
660
779
|
|
661
780
|
return {
|
662
781
|
"success": True,
|
663
782
|
"message": f"Generated embeddings for {processed} rows",
|
664
783
|
"processed": processed,
|
665
784
|
"model": model_name,
|
666
|
-
"embedding_dimension": semantic_engine.get_embedding_dimensions()
|
785
|
+
"embedding_dimension": semantic_engine.get_embedding_dimensions()
|
786
|
+
or 0,
|
667
787
|
}
|
668
788
|
|
669
789
|
except (ValidationError, SQLAlchemyError) as e:
|
@@ -683,7 +803,9 @@ class SQLiteMemoryDatabase:
|
|
683
803
|
) -> SemanticSearchResponse:
|
684
804
|
"""Perform semantic search across tables using vector embeddings."""
|
685
805
|
if not is_semantic_search_available():
|
686
|
-
raise ValidationError(
|
806
|
+
raise ValidationError(
|
807
|
+
"Semantic search is not available. Please install sentence-transformers."
|
808
|
+
)
|
687
809
|
|
688
810
|
if not query or not query.strip():
|
689
811
|
raise ValidationError("Search query cannot be empty")
|
@@ -704,7 +826,9 @@ class SQLiteMemoryDatabase:
|
|
704
826
|
|
705
827
|
# Check if table has embedding column
|
706
828
|
if embedding_column not in [col.name for col in table.columns]:
|
707
|
-
logging.warning(
|
829
|
+
logging.warning(
|
830
|
+
f"Table '{table_name}' does not have embedding column '{embedding_column}'"
|
831
|
+
)
|
708
832
|
continue
|
709
833
|
|
710
834
|
# Get all rows with embeddings
|
@@ -728,7 +852,8 @@ class SQLiteMemoryDatabase:
|
|
728
852
|
text_cols = [
|
729
853
|
col.name
|
730
854
|
for col in table.columns
|
731
|
-
if "TEXT" in str(col.type).upper()
|
855
|
+
if "TEXT" in str(col.type).upper()
|
856
|
+
or "VARCHAR" in str(col.type).upper()
|
732
857
|
]
|
733
858
|
else:
|
734
859
|
text_cols = text_columns
|
@@ -753,6 +878,11 @@ class SQLiteMemoryDatabase:
|
|
753
878
|
all_results.sort(key=lambda x: x.get("similarity_score", 0), reverse=True)
|
754
879
|
final_results = all_results[:limit]
|
755
880
|
|
881
|
+
# Remove embedding data from results to keep LLM responses clean
|
882
|
+
for result in final_results:
|
883
|
+
if embedding_column in result:
|
884
|
+
del result[embedding_column]
|
885
|
+
|
756
886
|
return {
|
757
887
|
"success": True,
|
758
888
|
"results": final_results,
|
@@ -779,7 +909,9 @@ class SQLiteMemoryDatabase:
|
|
779
909
|
) -> RelatedContentResponse:
|
780
910
|
"""Find content related to a specific row by semantic similarity."""
|
781
911
|
if not is_semantic_search_available():
|
782
|
-
raise ValidationError(
|
912
|
+
raise ValidationError(
|
913
|
+
"Semantic search is not available. Please install sentence-transformers."
|
914
|
+
)
|
783
915
|
|
784
916
|
try:
|
785
917
|
table = self._ensure_table_exists(table_name)
|
@@ -791,7 +923,9 @@ class SQLiteMemoryDatabase:
|
|
791
923
|
target_row = conn.execute(target_stmt).fetchone()
|
792
924
|
|
793
925
|
if not target_row:
|
794
|
-
raise ValidationError(
|
926
|
+
raise ValidationError(
|
927
|
+
f"Row with id {row_id} not found in table '{table_name}'"
|
928
|
+
)
|
795
929
|
|
796
930
|
target_dict = dict(target_row._mapping)
|
797
931
|
|
@@ -862,13 +996,23 @@ class SQLiteMemoryDatabase:
|
|
862
996
|
for candidate_idx, similarity_score in similar_indices:
|
863
997
|
original_idx = valid_indices[candidate_idx]
|
864
998
|
row_dict = content_data[original_idx].copy()
|
999
|
+
|
1000
|
+
# Remove embedding data to avoid polluting LLM responses
|
1001
|
+
if embedding_column in row_dict:
|
1002
|
+
del row_dict[embedding_column]
|
1003
|
+
|
865
1004
|
row_dict["similarity_score"] = round(similarity_score, 3)
|
866
1005
|
results.append(row_dict)
|
867
1006
|
|
1007
|
+
# Remove embedding from target_row as well
|
1008
|
+
target_dict_clean = target_dict.copy()
|
1009
|
+
if embedding_column in target_dict_clean:
|
1010
|
+
del target_dict_clean[embedding_column]
|
1011
|
+
|
868
1012
|
return {
|
869
1013
|
"success": True,
|
870
1014
|
"results": results,
|
871
|
-
"target_row":
|
1015
|
+
"target_row": target_dict_clean,
|
872
1016
|
"total_results": len(results),
|
873
1017
|
"similarity_threshold": similarity_threshold,
|
874
1018
|
"model": model_name,
|
@@ -950,13 +1094,23 @@ class SQLiteMemoryDatabase:
|
|
950
1094
|
# Enhance with text matching scores
|
951
1095
|
try:
|
952
1096
|
semantic_engine = get_semantic_engine(model_name)
|
953
|
-
|
1097
|
+
|
954
1098
|
# Verify the engine has the required method
|
955
|
-
if not hasattr(semantic_engine,
|
956
|
-
|
957
|
-
|
1099
|
+
if not hasattr(semantic_engine, "hybrid_search") or not callable(
|
1100
|
+
getattr(semantic_engine, "hybrid_search")
|
1101
|
+
):
|
1102
|
+
raise DatabaseError(
|
1103
|
+
"Semantic engine hybrid_search method is not callable"
|
1104
|
+
)
|
1105
|
+
|
958
1106
|
enhanced_results = semantic_engine.hybrid_search(
|
959
|
-
query,
|
1107
|
+
query,
|
1108
|
+
semantic_results,
|
1109
|
+
text_columns or [],
|
1110
|
+
embedding_column,
|
1111
|
+
semantic_weight,
|
1112
|
+
text_weight,
|
1113
|
+
limit,
|
960
1114
|
)
|
961
1115
|
except Exception as e:
|
962
1116
|
# If semantic enhancement fails, return semantic results without text enhancement
|
@@ -979,7 +1133,9 @@ class SQLiteMemoryDatabase:
|
|
979
1133
|
raise e
|
980
1134
|
raise DatabaseError(f"Hybrid search failed: {str(e)}")
|
981
1135
|
|
982
|
-
def get_embedding_stats(
|
1136
|
+
def get_embedding_stats(
|
1137
|
+
self, table_name: str, embedding_column: str = "embedding"
|
1138
|
+
) -> ToolResponse:
|
983
1139
|
"""Get statistics about embeddings in a table."""
|
984
1140
|
try:
|
985
1141
|
table = self._ensure_table_exists(table_name)
|
@@ -989,8 +1145,13 @@ class SQLiteMemoryDatabase:
|
|
989
1145
|
# Return 0% coverage when column doesn't exist (for compatibility with tests)
|
990
1146
|
total_count = 0
|
991
1147
|
with self.get_connection() as conn:
|
992
|
-
total_count =
|
993
|
-
|
1148
|
+
total_count = (
|
1149
|
+
conn.execute(
|
1150
|
+
select(text("COUNT(*)")).select_from(table)
|
1151
|
+
).scalar()
|
1152
|
+
or 0
|
1153
|
+
)
|
1154
|
+
|
994
1155
|
return {
|
995
1156
|
"success": True,
|
996
1157
|
"table_name": table_name,
|
@@ -1003,7 +1164,10 @@ class SQLiteMemoryDatabase:
|
|
1003
1164
|
|
1004
1165
|
with self.get_connection() as conn:
|
1005
1166
|
# Count total rows
|
1006
|
-
total_count =
|
1167
|
+
total_count = (
|
1168
|
+
conn.execute(select(text("COUNT(*)")).select_from(table)).scalar()
|
1169
|
+
or 0
|
1170
|
+
)
|
1007
1171
|
|
1008
1172
|
# Count rows with embeddings
|
1009
1173
|
embedded_count = (
|
@@ -1043,7 +1207,9 @@ class SQLiteMemoryDatabase:
|
|
1043
1207
|
except json.JSONDecodeError:
|
1044
1208
|
pass
|
1045
1209
|
|
1046
|
-
coverage_percent = (
|
1210
|
+
coverage_percent = (
|
1211
|
+
(embedded_count / total_count * 100) if total_count > 0 else 0.0
|
1212
|
+
)
|
1047
1213
|
|
1048
1214
|
return {
|
1049
1215
|
"success": True,
|
@@ -1072,7 +1238,7 @@ def get_database(db_path: Optional[str] = None) -> SQLiteMemoryDatabase:
|
|
1072
1238
|
actual_path = db_path or os.environ.get("DB_PATH", "./test.db")
|
1073
1239
|
if actual_path is None:
|
1074
1240
|
actual_path = "./test.db"
|
1075
|
-
|
1241
|
+
|
1076
1242
|
if _db_instance is None or (db_path and db_path != _db_instance.db_path):
|
1077
1243
|
# Close previous instance if it exists
|
1078
1244
|
if _db_instance is not None:
|