mcp-sqlite-memory-bank 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_sqlite_memory_bank/database.py +247 -160
- mcp_sqlite_memory_bank/prompts.py +252 -0
- mcp_sqlite_memory_bank/resources.py +164 -0
- mcp_sqlite_memory_bank/semantic.py +107 -95
- mcp_sqlite_memory_bank/server.py +183 -33
- mcp_sqlite_memory_bank/types.py +6 -0
- mcp_sqlite_memory_bank/utils.py +5 -2
- {mcp_sqlite_memory_bank-1.3.0.dist-info → mcp_sqlite_memory_bank-1.4.1.dist-info}/METADATA +168 -4
- mcp_sqlite_memory_bank-1.4.1.dist-info/RECORD +15 -0
- mcp_sqlite_memory_bank-1.3.0.dist-info/RECORD +0 -13
- {mcp_sqlite_memory_bank-1.3.0.dist-info → mcp_sqlite_memory_bank-1.4.1.dist-info}/WHEEL +0 -0
- {mcp_sqlite_memory_bank-1.3.0.dist-info → mcp_sqlite_memory_bank-1.4.1.dist-info}/entry_points.txt +0 -0
- {mcp_sqlite_memory_bank-1.3.0.dist-info → mcp_sqlite_memory_bank-1.4.1.dist-info}/licenses/LICENSE +0 -0
- {mcp_sqlite_memory_bank-1.3.0.dist-info → mcp_sqlite_memory_bank-1.4.1.dist-info}/top_level.txt +0 -0
@@ -17,7 +17,18 @@ from sqlalchemy.engine import Engine
|
|
17
17
|
from sqlalchemy.exc import SQLAlchemyError
|
18
18
|
from contextlib import contextmanager
|
19
19
|
|
20
|
-
from .types import
|
20
|
+
from .types import (
|
21
|
+
ValidationError,
|
22
|
+
DatabaseError,
|
23
|
+
SchemaError,
|
24
|
+
ToolResponse,
|
25
|
+
EmbeddingColumnResponse,
|
26
|
+
GenerateEmbeddingsResponse,
|
27
|
+
SemanticSearchResponse,
|
28
|
+
RelatedContentResponse,
|
29
|
+
HybridSearchResponse,
|
30
|
+
EmbeddingStatsResponse,
|
31
|
+
)
|
21
32
|
from .semantic import get_semantic_engine, is_semantic_search_available
|
22
33
|
|
23
34
|
|
@@ -237,7 +248,9 @@ class SQLiteMemoryDatabase:
|
|
237
248
|
raise e
|
238
249
|
raise DatabaseError(f"Failed to insert into table {table_name}: {str(e)}")
|
239
250
|
|
240
|
-
def read_rows(
|
251
|
+
def read_rows(
|
252
|
+
self, table_name: str, where: Optional[Dict[str, Any]] = None, limit: Optional[int] = None
|
253
|
+
) -> ToolResponse:
|
241
254
|
"""Read rows from a table with optional filtering."""
|
242
255
|
try:
|
243
256
|
table = self._ensure_table_exists(table_name)
|
@@ -262,7 +275,9 @@ class SQLiteMemoryDatabase:
|
|
262
275
|
raise e
|
263
276
|
raise DatabaseError(f"Failed to read from table {table_name}: {str(e)}")
|
264
277
|
|
265
|
-
def update_rows(
|
278
|
+
def update_rows(
|
279
|
+
self, table_name: str, data: Dict[str, Any], where: Optional[Dict[str, Any]] = None
|
280
|
+
) -> ToolResponse:
|
266
281
|
"""Update rows in a table."""
|
267
282
|
if not data:
|
268
283
|
raise ValidationError("Update data cannot be empty")
|
@@ -306,7 +321,11 @@ class SQLiteMemoryDatabase:
|
|
306
321
|
raise DatabaseError(f"Failed to delete from table {table_name}: {str(e)}")
|
307
322
|
|
308
323
|
def select_query(
|
309
|
-
self,
|
324
|
+
self,
|
325
|
+
table_name: str,
|
326
|
+
columns: Optional[List[str]] = None,
|
327
|
+
where: Optional[Dict[str, Any]] = None,
|
328
|
+
limit: int = 100,
|
310
329
|
) -> ToolResponse:
|
311
330
|
"""Run a SELECT query with specified columns and conditions."""
|
312
331
|
if limit < 1:
|
@@ -344,7 +363,9 @@ class SQLiteMemoryDatabase:
|
|
344
363
|
"""List all columns for all tables."""
|
345
364
|
try:
|
346
365
|
self._refresh_metadata()
|
347
|
-
schemas = {
|
366
|
+
schemas = {
|
367
|
+
table_name: [col.name for col in table.columns] for table_name, table in self.metadata.tables.items()
|
368
|
+
}
|
348
369
|
return {"success": True, "schemas": schemas}
|
349
370
|
except SQLAlchemyError as e:
|
350
371
|
raise DatabaseError(f"Failed to list all columns: {str(e)}")
|
@@ -368,7 +389,9 @@ class SQLiteMemoryDatabase:
|
|
368
389
|
|
369
390
|
table = self.metadata.tables[table_name]
|
370
391
|
text_columns = [
|
371
|
-
col
|
392
|
+
col
|
393
|
+
for col in table.columns
|
394
|
+
if "TEXT" in str(col.type).upper() or "VARCHAR" in str(col.type).upper()
|
372
395
|
]
|
373
396
|
|
374
397
|
if not text_columns:
|
@@ -493,174 +516,201 @@ class SQLiteMemoryDatabase:
|
|
493
516
|
"""Add an embedding column to a table for semantic search."""
|
494
517
|
try:
|
495
518
|
table = self._ensure_table_exists(table_name)
|
496
|
-
|
519
|
+
|
497
520
|
# Check if embedding column already exists
|
498
521
|
if embedding_column in [col.name for col in table.columns]:
|
499
522
|
return {"success": True, "message": f"Embedding column '{embedding_column}' already exists"}
|
500
|
-
|
523
|
+
|
501
524
|
# Add embedding column as TEXT (JSON storage)
|
502
525
|
with self.get_connection() as conn:
|
503
526
|
conn.execute(text(f"ALTER TABLE {table_name} ADD COLUMN {embedding_column} TEXT"))
|
504
527
|
conn.commit()
|
505
|
-
|
528
|
+
|
506
529
|
self._refresh_metadata()
|
507
530
|
return {"success": True, "message": f"Added embedding column '{embedding_column}' to table '{table_name}'"}
|
508
|
-
|
531
|
+
|
509
532
|
except (ValidationError, SQLAlchemyError) as e:
|
510
533
|
if isinstance(e, ValidationError):
|
511
534
|
raise e
|
512
535
|
raise DatabaseError(f"Failed to add embedding column: {str(e)}")
|
513
536
|
|
514
|
-
def generate_embeddings(
|
515
|
-
|
516
|
-
|
517
|
-
|
537
|
+
def generate_embeddings(
|
538
|
+
self,
|
539
|
+
table_name: str,
|
540
|
+
text_columns: List[str],
|
541
|
+
embedding_column: str = "embedding",
|
542
|
+
model_name: str = "all-MiniLM-L6-v2",
|
543
|
+
batch_size: int = 50,
|
544
|
+
) -> GenerateEmbeddingsResponse:
|
518
545
|
"""Generate embeddings for text content in a table."""
|
519
546
|
if not is_semantic_search_available():
|
520
547
|
raise ValidationError("Semantic search is not available. Please install sentence-transformers.")
|
521
|
-
|
548
|
+
|
522
549
|
try:
|
523
550
|
table = self._ensure_table_exists(table_name)
|
524
551
|
semantic_engine = get_semantic_engine(model_name)
|
525
|
-
|
552
|
+
|
526
553
|
# Validate text columns exist
|
527
554
|
table_columns = [col.name for col in table.columns]
|
528
555
|
for col in text_columns:
|
529
556
|
if col not in table_columns:
|
530
557
|
raise ValidationError(f"Column '{col}' not found in table '{table_name}'")
|
531
|
-
|
558
|
+
|
532
559
|
# Add embedding column if it doesn't exist
|
533
560
|
if embedding_column not in table_columns:
|
534
561
|
self.add_embedding_column(table_name, embedding_column)
|
535
562
|
table = self._ensure_table_exists(table_name) # Refresh
|
536
|
-
|
563
|
+
|
537
564
|
# Get all rows that need embeddings
|
538
565
|
with self.get_connection() as conn:
|
539
566
|
# Select rows without embeddings or with null embeddings
|
540
567
|
stmt = select(table).where(
|
541
|
-
or_(
|
568
|
+
or_(
|
569
|
+
table.c[embedding_column].is_(None),
|
542
570
|
table.c[embedding_column] == "",
|
543
|
-
table.c[embedding_column] == "null"
|
571
|
+
table.c[embedding_column] == "null",
|
572
|
+
)
|
544
573
|
)
|
545
574
|
rows = conn.execute(stmt).fetchall()
|
546
|
-
|
575
|
+
|
547
576
|
if not rows:
|
548
577
|
embedding_dim = semantic_engine.get_embedding_dimensions() or 0
|
549
|
-
return {
|
550
|
-
|
578
|
+
return {
|
579
|
+
"success": True,
|
580
|
+
"message": "All rows already have embeddings",
|
581
|
+
"processed": 0,
|
582
|
+
"model": model_name,
|
583
|
+
"embedding_dimension": embedding_dim,
|
584
|
+
}
|
585
|
+
|
551
586
|
processed = 0
|
552
587
|
for i in range(0, len(rows), batch_size):
|
553
|
-
batch = rows[i:i + batch_size]
|
554
|
-
|
588
|
+
batch = rows[i : i + batch_size]
|
589
|
+
|
555
590
|
for row in batch:
|
556
591
|
row_dict = dict(row._mapping)
|
557
|
-
|
592
|
+
|
558
593
|
# Combine text from specified columns
|
559
594
|
text_parts = []
|
560
595
|
for col in text_columns:
|
561
596
|
if col in row_dict and row_dict[col]:
|
562
597
|
text_parts.append(str(row_dict[col]))
|
563
|
-
|
598
|
+
|
564
599
|
if text_parts:
|
565
600
|
combined_text = " ".join(text_parts)
|
566
|
-
|
601
|
+
|
567
602
|
# Generate embedding
|
568
603
|
embedding = semantic_engine.generate_embedding(combined_text)
|
569
604
|
embedding_json = json.dumps(embedding)
|
570
|
-
|
605
|
+
|
571
606
|
# Update row with embedding
|
572
|
-
update_stmt =
|
573
|
-
table
|
574
|
-
|
575
|
-
|
607
|
+
update_stmt = (
|
608
|
+
update(table)
|
609
|
+
.where(table.c["id"] == row_dict["id"])
|
610
|
+
.values({embedding_column: embedding_json})
|
611
|
+
)
|
612
|
+
|
576
613
|
conn.execute(update_stmt)
|
577
614
|
processed += 1
|
578
|
-
|
615
|
+
|
579
616
|
conn.commit()
|
580
617
|
logging.info(f"Generated embeddings for batch {i//batch_size + 1}, processed {processed} rows")
|
581
|
-
|
618
|
+
|
582
619
|
return {
|
583
|
-
"success": True,
|
620
|
+
"success": True,
|
584
621
|
"message": f"Generated embeddings for {processed} rows",
|
585
622
|
"processed": processed,
|
586
623
|
"model": model_name,
|
587
|
-
"embedding_dimension": semantic_engine.get_embedding_dimensions() or 0
|
624
|
+
"embedding_dimension": semantic_engine.get_embedding_dimensions() or 0,
|
588
625
|
}
|
589
|
-
|
626
|
+
|
590
627
|
except (ValidationError, SQLAlchemyError) as e:
|
591
628
|
if isinstance(e, ValidationError):
|
592
629
|
raise e
|
593
630
|
raise DatabaseError(f"Failed to generate embeddings: {str(e)}")
|
594
631
|
|
595
|
-
def semantic_search(
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
632
|
+
def semantic_search(
|
633
|
+
self,
|
634
|
+
query: str,
|
635
|
+
tables: Optional[List[str]] = None,
|
636
|
+
embedding_column: str = "embedding",
|
637
|
+
text_columns: Optional[List[str]] = None,
|
638
|
+
similarity_threshold: float = 0.5,
|
639
|
+
limit: int = 10,
|
640
|
+
model_name: str = "all-MiniLM-L6-v2",
|
641
|
+
) -> SemanticSearchResponse:
|
601
642
|
"""Perform semantic search across tables using vector embeddings."""
|
602
643
|
if not is_semantic_search_available():
|
603
644
|
raise ValidationError("Semantic search is not available. Please install sentence-transformers.")
|
604
|
-
|
645
|
+
|
605
646
|
if not query or not query.strip():
|
606
647
|
raise ValidationError("Search query cannot be empty")
|
607
|
-
|
648
|
+
|
608
649
|
try:
|
609
650
|
self._refresh_metadata()
|
610
651
|
search_tables = tables or list(self.metadata.tables.keys())
|
611
652
|
semantic_engine = get_semantic_engine(model_name)
|
612
|
-
|
653
|
+
|
613
654
|
all_results = []
|
614
|
-
|
655
|
+
|
615
656
|
with self.get_connection() as conn:
|
616
657
|
for table_name in search_tables:
|
617
658
|
if table_name not in self.metadata.tables:
|
618
659
|
continue
|
619
|
-
|
660
|
+
|
620
661
|
table = self.metadata.tables[table_name]
|
621
|
-
|
662
|
+
|
622
663
|
# Check if table has embedding column
|
623
664
|
if embedding_column not in [col.name for col in table.columns]:
|
624
665
|
logging.warning(f"Table '{table_name}' does not have embedding column '{embedding_column}'")
|
625
666
|
continue
|
626
|
-
|
667
|
+
|
627
668
|
# Get all rows with embeddings
|
628
669
|
stmt = select(table).where(
|
629
|
-
and_(
|
630
|
-
|
631
|
-
|
670
|
+
and_(
|
671
|
+
table.c[embedding_column].isnot(None),
|
672
|
+
table.c[embedding_column] != "",
|
673
|
+
table.c[embedding_column] != "null",
|
674
|
+
)
|
632
675
|
)
|
633
676
|
rows = conn.execute(stmt).fetchall()
|
634
|
-
|
677
|
+
|
635
678
|
if not rows:
|
636
679
|
continue
|
637
|
-
|
680
|
+
|
638
681
|
# Convert to list of dicts for semantic search
|
639
682
|
content_data = [dict(row._mapping) for row in rows]
|
640
|
-
|
683
|
+
|
641
684
|
# Determine text columns for highlighting
|
642
685
|
if text_columns is None:
|
643
|
-
text_cols = [
|
644
|
-
|
686
|
+
text_cols = [
|
687
|
+
col.name
|
688
|
+
for col in table.columns
|
689
|
+
if "TEXT" in str(col.type).upper() or "VARCHAR" in str(col.type).upper()
|
690
|
+
]
|
645
691
|
else:
|
646
692
|
text_cols = text_columns
|
647
|
-
|
693
|
+
|
648
694
|
# Perform semantic search on this table
|
649
695
|
table_results = semantic_engine.semantic_search(
|
650
|
-
query,
|
651
|
-
|
696
|
+
query,
|
697
|
+
content_data,
|
698
|
+
embedding_column,
|
699
|
+
text_cols,
|
700
|
+
similarity_threshold,
|
701
|
+
limit * 2, # Get more for global ranking
|
652
702
|
)
|
653
|
-
|
703
|
+
|
654
704
|
# Add table name to results
|
655
705
|
for result in table_results:
|
656
706
|
result["table_name"] = table_name
|
657
|
-
|
707
|
+
|
658
708
|
all_results.extend(table_results)
|
659
|
-
|
709
|
+
|
660
710
|
# Sort all results by similarity score and limit
|
661
711
|
all_results.sort(key=lambda x: x.get("similarity_score", 0), reverse=True)
|
662
712
|
final_results = all_results[:limit]
|
663
|
-
|
713
|
+
|
664
714
|
return {
|
665
715
|
"success": True,
|
666
716
|
"results": final_results,
|
@@ -668,55 +718,63 @@ class SQLiteMemoryDatabase:
|
|
668
718
|
"tables_searched": search_tables,
|
669
719
|
"total_results": len(final_results),
|
670
720
|
"model": model_name,
|
671
|
-
"similarity_threshold": similarity_threshold
|
721
|
+
"similarity_threshold": similarity_threshold,
|
672
722
|
}
|
673
|
-
|
723
|
+
|
674
724
|
except (ValidationError, SQLAlchemyError) as e:
|
675
725
|
if isinstance(e, ValidationError):
|
676
726
|
raise e
|
677
727
|
raise DatabaseError(f"Semantic search failed: {str(e)}")
|
678
728
|
|
679
|
-
def find_related_content(
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
729
|
+
def find_related_content(
|
730
|
+
self,
|
731
|
+
table_name: str,
|
732
|
+
row_id: int,
|
733
|
+
embedding_column: str = "embedding",
|
734
|
+
similarity_threshold: float = 0.5,
|
735
|
+
limit: int = 5,
|
736
|
+
model_name: str = "all-MiniLM-L6-v2",
|
737
|
+
) -> RelatedContentResponse:
|
684
738
|
"""Find content related to a specific row by semantic similarity."""
|
685
739
|
if not is_semantic_search_available():
|
686
740
|
raise ValidationError("Semantic search is not available. Please install sentence-transformers.")
|
687
|
-
|
741
|
+
|
688
742
|
try:
|
689
743
|
table = self._ensure_table_exists(table_name)
|
690
744
|
semantic_engine = get_semantic_engine(model_name)
|
691
|
-
|
745
|
+
|
692
746
|
with self.get_connection() as conn:
|
693
747
|
# Get the target row
|
694
748
|
target_stmt = select(table).where(table.c["id"] == row_id)
|
695
749
|
target_row = conn.execute(target_stmt).fetchone()
|
696
|
-
|
750
|
+
|
697
751
|
if not target_row:
|
698
752
|
raise ValidationError(f"Row with id {row_id} not found in table '{table_name}'")
|
699
|
-
|
753
|
+
|
700
754
|
target_dict = dict(target_row._mapping)
|
701
|
-
|
755
|
+
|
702
756
|
# Check if target has embedding
|
703
|
-
if (
|
704
|
-
not target_dict
|
705
|
-
target_dict[embedding_column]
|
757
|
+
if (
|
758
|
+
embedding_column not in target_dict
|
759
|
+
or not target_dict[embedding_column]
|
760
|
+
or target_dict[embedding_column] in ["", "null"]
|
761
|
+
):
|
706
762
|
raise ValidationError(f"Row {row_id} does not have an embedding")
|
707
|
-
|
763
|
+
|
708
764
|
# Get target embedding
|
709
765
|
target_embedding = json.loads(target_dict[embedding_column])
|
710
|
-
|
766
|
+
|
711
767
|
# Get all other rows with embeddings
|
712
768
|
stmt = select(table).where(
|
713
|
-
and_(
|
714
|
-
|
715
|
-
|
716
|
-
|
769
|
+
and_(
|
770
|
+
table.c["id"] != row_id,
|
771
|
+
table.c[embedding_column].isnot(None),
|
772
|
+
table.c[embedding_column] != "",
|
773
|
+
table.c[embedding_column] != "null",
|
774
|
+
)
|
717
775
|
)
|
718
776
|
rows = conn.execute(stmt).fetchall()
|
719
|
-
|
777
|
+
|
720
778
|
if not rows:
|
721
779
|
return {
|
722
780
|
"success": True,
|
@@ -725,14 +783,14 @@ class SQLiteMemoryDatabase:
|
|
725
783
|
"total_results": 0,
|
726
784
|
"similarity_threshold": similarity_threshold,
|
727
785
|
"model": model_name,
|
728
|
-
"message": "No other rows with embeddings found"
|
786
|
+
"message": "No other rows with embeddings found",
|
729
787
|
}
|
730
|
-
|
788
|
+
|
731
789
|
# Find similar rows
|
732
790
|
content_data = [dict(row._mapping) for row in rows]
|
733
791
|
candidate_embeddings = []
|
734
792
|
valid_indices = []
|
735
|
-
|
793
|
+
|
736
794
|
for idx, row_dict in enumerate(content_data):
|
737
795
|
try:
|
738
796
|
embedding = json.loads(row_dict[embedding_column])
|
@@ -740,7 +798,7 @@ class SQLiteMemoryDatabase:
|
|
740
798
|
valid_indices.append(idx)
|
741
799
|
except json.JSONDecodeError:
|
742
800
|
continue
|
743
|
-
|
801
|
+
|
744
802
|
if not candidate_embeddings:
|
745
803
|
return {
|
746
804
|
"success": True,
|
@@ -749,15 +807,14 @@ class SQLiteMemoryDatabase:
|
|
749
807
|
"total_results": 0,
|
750
808
|
"similarity_threshold": similarity_threshold,
|
751
809
|
"model": model_name,
|
752
|
-
"message": "No valid embeddings found for comparison"
|
810
|
+
"message": "No valid embeddings found for comparison",
|
753
811
|
}
|
754
|
-
|
812
|
+
|
755
813
|
# Calculate similarities
|
756
814
|
similar_indices = semantic_engine.find_similar_embeddings(
|
757
|
-
target_embedding, candidate_embeddings,
|
758
|
-
similarity_threshold, limit
|
815
|
+
target_embedding, candidate_embeddings, similarity_threshold, limit
|
759
816
|
)
|
760
|
-
|
817
|
+
|
761
818
|
# Build results
|
762
819
|
results = []
|
763
820
|
for candidate_idx, similarity_score in similar_indices:
|
@@ -765,7 +822,7 @@ class SQLiteMemoryDatabase:
|
|
765
822
|
row_dict = content_data[original_idx].copy()
|
766
823
|
row_dict["similarity_score"] = round(similarity_score, 3)
|
767
824
|
results.append(row_dict)
|
768
|
-
|
825
|
+
|
769
826
|
return {
|
770
827
|
"success": True,
|
771
828
|
"results": results,
|
@@ -773,70 +830,87 @@ class SQLiteMemoryDatabase:
|
|
773
830
|
"total_results": len(results),
|
774
831
|
"similarity_threshold": similarity_threshold,
|
775
832
|
"model": model_name,
|
776
|
-
"message": f"Found {len(results)} related items"
|
833
|
+
"message": f"Found {len(results)} related items",
|
777
834
|
}
|
778
|
-
|
835
|
+
|
779
836
|
except (ValidationError, SQLAlchemyError) as e:
|
780
837
|
if isinstance(e, ValidationError):
|
781
838
|
raise e
|
782
839
|
raise DatabaseError(f"Failed to find related content: {str(e)}")
|
783
840
|
|
784
|
-
def hybrid_search(
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
841
|
+
def hybrid_search(
|
842
|
+
self,
|
843
|
+
query: str,
|
844
|
+
tables: Optional[List[str]] = None,
|
845
|
+
text_columns: Optional[List[str]] = None,
|
846
|
+
embedding_column: str = "embedding",
|
847
|
+
semantic_weight: float = 0.7,
|
848
|
+
text_weight: float = 0.3,
|
849
|
+
limit: int = 10,
|
850
|
+
model_name: str = "all-MiniLM-L6-v2",
|
851
|
+
) -> HybridSearchResponse:
|
791
852
|
"""Combine semantic search with keyword matching for optimal results."""
|
792
853
|
if not is_semantic_search_available():
|
793
854
|
# Fallback to text search only
|
794
855
|
fallback_result = self.search_content(query, tables, limit)
|
795
856
|
# Convert to HybridSearchResponse format
|
796
|
-
return cast(
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
857
|
+
return cast(
|
858
|
+
HybridSearchResponse,
|
859
|
+
{
|
860
|
+
**fallback_result,
|
861
|
+
"search_type": "text_only",
|
862
|
+
"semantic_weight": 0.0,
|
863
|
+
"text_weight": 1.0,
|
864
|
+
"model": "none",
|
865
|
+
},
|
866
|
+
)
|
867
|
+
|
804
868
|
try:
|
805
869
|
# Get semantic search results
|
806
870
|
semantic_response = self.semantic_search(
|
807
|
-
query,
|
808
|
-
|
871
|
+
query,
|
872
|
+
tables,
|
873
|
+
embedding_column,
|
874
|
+
text_columns,
|
875
|
+
similarity_threshold=0.3,
|
876
|
+
limit=limit * 2,
|
877
|
+
model_name=model_name,
|
809
878
|
)
|
810
|
-
|
879
|
+
|
811
880
|
if not semantic_response.get("success"):
|
812
|
-
return cast(
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
881
|
+
return cast(
|
882
|
+
HybridSearchResponse,
|
883
|
+
{
|
884
|
+
**semantic_response,
|
885
|
+
"search_type": "semantic_failed",
|
886
|
+
"semantic_weight": semantic_weight,
|
887
|
+
"text_weight": text_weight,
|
888
|
+
"model": model_name,
|
889
|
+
},
|
890
|
+
)
|
891
|
+
|
820
892
|
semantic_results = semantic_response.get("results", [])
|
821
|
-
|
893
|
+
|
822
894
|
if not semantic_results:
|
823
895
|
# Fallback to text search
|
824
896
|
fallback_result = self.search_content(query, tables, limit)
|
825
|
-
return cast(
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
897
|
+
return cast(
|
898
|
+
HybridSearchResponse,
|
899
|
+
{
|
900
|
+
**fallback_result,
|
901
|
+
"search_type": "text_fallback",
|
902
|
+
"semantic_weight": semantic_weight,
|
903
|
+
"text_weight": text_weight,
|
904
|
+
"model": model_name,
|
905
|
+
},
|
906
|
+
)
|
907
|
+
|
833
908
|
# Enhance with text matching scores
|
834
909
|
semantic_engine = get_semantic_engine(model_name)
|
835
910
|
enhanced_results = semantic_engine.hybrid_search(
|
836
|
-
query, semantic_results, text_columns or [],
|
837
|
-
embedding_column, semantic_weight, text_weight, limit
|
911
|
+
query, semantic_results, text_columns or [], embedding_column, semantic_weight, text_weight, limit
|
838
912
|
)
|
839
|
-
|
913
|
+
|
840
914
|
return {
|
841
915
|
"success": True,
|
842
916
|
"results": enhanced_results,
|
@@ -845,9 +919,9 @@ class SQLiteMemoryDatabase:
|
|
845
919
|
"semantic_weight": semantic_weight,
|
846
920
|
"text_weight": text_weight,
|
847
921
|
"total_results": len(enhanced_results),
|
848
|
-
"model": model_name
|
922
|
+
"model": model_name,
|
849
923
|
}
|
850
|
-
|
924
|
+
|
851
925
|
except (ValidationError, SQLAlchemyError) as e:
|
852
926
|
if isinstance(e, ValidationError):
|
853
927
|
raise e
|
@@ -857,27 +931,40 @@ class SQLiteMemoryDatabase:
|
|
857
931
|
"""Get statistics about embeddings in a table."""
|
858
932
|
try:
|
859
933
|
table = self._ensure_table_exists(table_name)
|
860
|
-
|
934
|
+
|
861
935
|
with self.get_connection() as conn:
|
862
936
|
# Count total rows
|
863
937
|
total_count = conn.execute(select(text("COUNT(*)")).select_from(table)).scalar() or 0
|
864
|
-
|
938
|
+
|
865
939
|
# Count rows with embeddings
|
866
|
-
embedded_count =
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
940
|
+
embedded_count = (
|
941
|
+
conn.execute(
|
942
|
+
select(text("COUNT(*)"))
|
943
|
+
.select_from(table)
|
944
|
+
.where(
|
945
|
+
and_(
|
946
|
+
table.c[embedding_column].isnot(None),
|
947
|
+
table.c[embedding_column] != "",
|
948
|
+
table.c[embedding_column] != "null",
|
949
|
+
)
|
950
|
+
)
|
951
|
+
).scalar()
|
952
|
+
or 0
|
953
|
+
)
|
954
|
+
|
874
955
|
# Get sample embedding to check dimensions
|
875
|
-
sample_stmt =
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
956
|
+
sample_stmt = (
|
957
|
+
select(table.c[embedding_column])
|
958
|
+
.where(
|
959
|
+
and_(
|
960
|
+
table.c[embedding_column].isnot(None),
|
961
|
+
table.c[embedding_column] != "",
|
962
|
+
table.c[embedding_column] != "null",
|
963
|
+
)
|
964
|
+
)
|
965
|
+
.limit(1)
|
966
|
+
)
|
967
|
+
|
881
968
|
sample_result = conn.execute(sample_stmt).fetchone()
|
882
969
|
dimensions = None
|
883
970
|
if sample_result and sample_result[0]:
|
@@ -886,9 +973,9 @@ class SQLiteMemoryDatabase:
|
|
886
973
|
dimensions = len(sample_embedding)
|
887
974
|
except json.JSONDecodeError:
|
888
975
|
pass
|
889
|
-
|
976
|
+
|
890
977
|
coverage_percent = (embedded_count / total_count * 100) if total_count > 0 else 0.0
|
891
|
-
|
978
|
+
|
892
979
|
return {
|
893
980
|
"success": True,
|
894
981
|
"table_name": table_name,
|
@@ -896,9 +983,9 @@ class SQLiteMemoryDatabase:
|
|
896
983
|
"embedded_rows": embedded_count,
|
897
984
|
"coverage_percent": round(coverage_percent, 1),
|
898
985
|
"embedding_dimensions": dimensions,
|
899
|
-
"embedding_column": embedding_column
|
986
|
+
"embedding_column": embedding_column,
|
900
987
|
}
|
901
|
-
|
988
|
+
|
902
989
|
except (ValidationError, SQLAlchemyError) as e:
|
903
990
|
if isinstance(e, ValidationError):
|
904
991
|
raise e
|