mcp-sqlite-memory-bank 1.3.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,18 @@ from sqlalchemy.engine import Engine
17
17
  from sqlalchemy.exc import SQLAlchemyError
18
18
  from contextlib import contextmanager
19
19
 
20
- from .types import ValidationError, DatabaseError, SchemaError, ToolResponse, EmbeddingColumnResponse, GenerateEmbeddingsResponse, SemanticSearchResponse, RelatedContentResponse, HybridSearchResponse, EmbeddingStatsResponse
20
+ from .types import (
21
+ ValidationError,
22
+ DatabaseError,
23
+ SchemaError,
24
+ ToolResponse,
25
+ EmbeddingColumnResponse,
26
+ GenerateEmbeddingsResponse,
27
+ SemanticSearchResponse,
28
+ RelatedContentResponse,
29
+ HybridSearchResponse,
30
+ EmbeddingStatsResponse,
31
+ )
21
32
  from .semantic import get_semantic_engine, is_semantic_search_available
22
33
 
23
34
 
@@ -237,7 +248,9 @@ class SQLiteMemoryDatabase:
237
248
  raise e
238
249
  raise DatabaseError(f"Failed to insert into table {table_name}: {str(e)}")
239
250
 
240
- def read_rows(self, table_name: str, where: Optional[Dict[str, Any]] = None, limit: Optional[int] = None) -> ToolResponse:
251
+ def read_rows(
252
+ self, table_name: str, where: Optional[Dict[str, Any]] = None, limit: Optional[int] = None
253
+ ) -> ToolResponse:
241
254
  """Read rows from a table with optional filtering."""
242
255
  try:
243
256
  table = self._ensure_table_exists(table_name)
@@ -262,7 +275,9 @@ class SQLiteMemoryDatabase:
262
275
  raise e
263
276
  raise DatabaseError(f"Failed to read from table {table_name}: {str(e)}")
264
277
 
265
- def update_rows(self, table_name: str, data: Dict[str, Any], where: Optional[Dict[str, Any]] = None) -> ToolResponse:
278
+ def update_rows(
279
+ self, table_name: str, data: Dict[str, Any], where: Optional[Dict[str, Any]] = None
280
+ ) -> ToolResponse:
266
281
  """Update rows in a table."""
267
282
  if not data:
268
283
  raise ValidationError("Update data cannot be empty")
@@ -306,7 +321,11 @@ class SQLiteMemoryDatabase:
306
321
  raise DatabaseError(f"Failed to delete from table {table_name}: {str(e)}")
307
322
 
308
323
  def select_query(
309
- self, table_name: str, columns: Optional[List[str]] = None, where: Optional[Dict[str, Any]] = None, limit: int = 100
324
+ self,
325
+ table_name: str,
326
+ columns: Optional[List[str]] = None,
327
+ where: Optional[Dict[str, Any]] = None,
328
+ limit: int = 100,
310
329
  ) -> ToolResponse:
311
330
  """Run a SELECT query with specified columns and conditions."""
312
331
  if limit < 1:
@@ -344,7 +363,9 @@ class SQLiteMemoryDatabase:
344
363
  """List all columns for all tables."""
345
364
  try:
346
365
  self._refresh_metadata()
347
- schemas = {table_name: [col.name for col in table.columns] for table_name, table in self.metadata.tables.items()}
366
+ schemas = {
367
+ table_name: [col.name for col in table.columns] for table_name, table in self.metadata.tables.items()
368
+ }
348
369
  return {"success": True, "schemas": schemas}
349
370
  except SQLAlchemyError as e:
350
371
  raise DatabaseError(f"Failed to list all columns: {str(e)}")
@@ -368,7 +389,9 @@ class SQLiteMemoryDatabase:
368
389
 
369
390
  table = self.metadata.tables[table_name]
370
391
  text_columns = [
371
- col for col in table.columns if "TEXT" in str(col.type).upper() or "VARCHAR" in str(col.type).upper()
392
+ col
393
+ for col in table.columns
394
+ if "TEXT" in str(col.type).upper() or "VARCHAR" in str(col.type).upper()
372
395
  ]
373
396
 
374
397
  if not text_columns:
@@ -493,174 +516,201 @@ class SQLiteMemoryDatabase:
493
516
  """Add an embedding column to a table for semantic search."""
494
517
  try:
495
518
  table = self._ensure_table_exists(table_name)
496
-
519
+
497
520
  # Check if embedding column already exists
498
521
  if embedding_column in [col.name for col in table.columns]:
499
522
  return {"success": True, "message": f"Embedding column '{embedding_column}' already exists"}
500
-
523
+
501
524
  # Add embedding column as TEXT (JSON storage)
502
525
  with self.get_connection() as conn:
503
526
  conn.execute(text(f"ALTER TABLE {table_name} ADD COLUMN {embedding_column} TEXT"))
504
527
  conn.commit()
505
-
528
+
506
529
  self._refresh_metadata()
507
530
  return {"success": True, "message": f"Added embedding column '{embedding_column}' to table '{table_name}'"}
508
-
531
+
509
532
  except (ValidationError, SQLAlchemyError) as e:
510
533
  if isinstance(e, ValidationError):
511
534
  raise e
512
535
  raise DatabaseError(f"Failed to add embedding column: {str(e)}")
513
536
 
514
- def generate_embeddings(self, table_name: str, text_columns: List[str],
515
- embedding_column: str = "embedding",
516
- model_name: str = "all-MiniLM-L6-v2",
517
- batch_size: int = 50) -> GenerateEmbeddingsResponse:
537
+ def generate_embeddings(
538
+ self,
539
+ table_name: str,
540
+ text_columns: List[str],
541
+ embedding_column: str = "embedding",
542
+ model_name: str = "all-MiniLM-L6-v2",
543
+ batch_size: int = 50,
544
+ ) -> GenerateEmbeddingsResponse:
518
545
  """Generate embeddings for text content in a table."""
519
546
  if not is_semantic_search_available():
520
547
  raise ValidationError("Semantic search is not available. Please install sentence-transformers.")
521
-
548
+
522
549
  try:
523
550
  table = self._ensure_table_exists(table_name)
524
551
  semantic_engine = get_semantic_engine(model_name)
525
-
552
+
526
553
  # Validate text columns exist
527
554
  table_columns = [col.name for col in table.columns]
528
555
  for col in text_columns:
529
556
  if col not in table_columns:
530
557
  raise ValidationError(f"Column '{col}' not found in table '{table_name}'")
531
-
558
+
532
559
  # Add embedding column if it doesn't exist
533
560
  if embedding_column not in table_columns:
534
561
  self.add_embedding_column(table_name, embedding_column)
535
562
  table = self._ensure_table_exists(table_name) # Refresh
536
-
563
+
537
564
  # Get all rows that need embeddings
538
565
  with self.get_connection() as conn:
539
566
  # Select rows without embeddings or with null embeddings
540
567
  stmt = select(table).where(
541
- or_(table.c[embedding_column].is_(None),
568
+ or_(
569
+ table.c[embedding_column].is_(None),
542
570
  table.c[embedding_column] == "",
543
- table.c[embedding_column] == "null")
571
+ table.c[embedding_column] == "null",
572
+ )
544
573
  )
545
574
  rows = conn.execute(stmt).fetchall()
546
-
575
+
547
576
  if not rows:
548
577
  embedding_dim = semantic_engine.get_embedding_dimensions() or 0
549
- return {"success": True, "message": "All rows already have embeddings", "processed": 0, "model": model_name, "embedding_dimension": embedding_dim}
550
-
578
+ return {
579
+ "success": True,
580
+ "message": "All rows already have embeddings",
581
+ "processed": 0,
582
+ "model": model_name,
583
+ "embedding_dimension": embedding_dim,
584
+ }
585
+
551
586
  processed = 0
552
587
  for i in range(0, len(rows), batch_size):
553
- batch = rows[i:i + batch_size]
554
-
588
+ batch = rows[i : i + batch_size]
589
+
555
590
  for row in batch:
556
591
  row_dict = dict(row._mapping)
557
-
592
+
558
593
  # Combine text from specified columns
559
594
  text_parts = []
560
595
  for col in text_columns:
561
596
  if col in row_dict and row_dict[col]:
562
597
  text_parts.append(str(row_dict[col]))
563
-
598
+
564
599
  if text_parts:
565
600
  combined_text = " ".join(text_parts)
566
-
601
+
567
602
  # Generate embedding
568
603
  embedding = semantic_engine.generate_embedding(combined_text)
569
604
  embedding_json = json.dumps(embedding)
570
-
605
+
571
606
  # Update row with embedding
572
- update_stmt = update(table).where(
573
- table.c["id"] == row_dict["id"]
574
- ).values({embedding_column: embedding_json})
575
-
607
+ update_stmt = (
608
+ update(table)
609
+ .where(table.c["id"] == row_dict["id"])
610
+ .values({embedding_column: embedding_json})
611
+ )
612
+
576
613
  conn.execute(update_stmt)
577
614
  processed += 1
578
-
615
+
579
616
  conn.commit()
580
617
  logging.info(f"Generated embeddings for batch {i//batch_size + 1}, processed {processed} rows")
581
-
618
+
582
619
  return {
583
- "success": True,
620
+ "success": True,
584
621
  "message": f"Generated embeddings for {processed} rows",
585
622
  "processed": processed,
586
623
  "model": model_name,
587
- "embedding_dimension": semantic_engine.get_embedding_dimensions() or 0
624
+ "embedding_dimension": semantic_engine.get_embedding_dimensions() or 0,
588
625
  }
589
-
626
+
590
627
  except (ValidationError, SQLAlchemyError) as e:
591
628
  if isinstance(e, ValidationError):
592
629
  raise e
593
630
  raise DatabaseError(f"Failed to generate embeddings: {str(e)}")
594
631
 
595
- def semantic_search(self, query: str, tables: Optional[List[str]] = None,
596
- embedding_column: str = "embedding",
597
- text_columns: Optional[List[str]] = None,
598
- similarity_threshold: float = 0.5,
599
- limit: int = 10,
600
- model_name: str = "all-MiniLM-L6-v2") -> SemanticSearchResponse:
632
+ def semantic_search(
633
+ self,
634
+ query: str,
635
+ tables: Optional[List[str]] = None,
636
+ embedding_column: str = "embedding",
637
+ text_columns: Optional[List[str]] = None,
638
+ similarity_threshold: float = 0.5,
639
+ limit: int = 10,
640
+ model_name: str = "all-MiniLM-L6-v2",
641
+ ) -> SemanticSearchResponse:
601
642
  """Perform semantic search across tables using vector embeddings."""
602
643
  if not is_semantic_search_available():
603
644
  raise ValidationError("Semantic search is not available. Please install sentence-transformers.")
604
-
645
+
605
646
  if not query or not query.strip():
606
647
  raise ValidationError("Search query cannot be empty")
607
-
648
+
608
649
  try:
609
650
  self._refresh_metadata()
610
651
  search_tables = tables or list(self.metadata.tables.keys())
611
652
  semantic_engine = get_semantic_engine(model_name)
612
-
653
+
613
654
  all_results = []
614
-
655
+
615
656
  with self.get_connection() as conn:
616
657
  for table_name in search_tables:
617
658
  if table_name not in self.metadata.tables:
618
659
  continue
619
-
660
+
620
661
  table = self.metadata.tables[table_name]
621
-
662
+
622
663
  # Check if table has embedding column
623
664
  if embedding_column not in [col.name for col in table.columns]:
624
665
  logging.warning(f"Table '{table_name}' does not have embedding column '{embedding_column}'")
625
666
  continue
626
-
667
+
627
668
  # Get all rows with embeddings
628
669
  stmt = select(table).where(
629
- and_(table.c[embedding_column].isnot(None),
630
- table.c[embedding_column] != "",
631
- table.c[embedding_column] != "null")
670
+ and_(
671
+ table.c[embedding_column].isnot(None),
672
+ table.c[embedding_column] != "",
673
+ table.c[embedding_column] != "null",
674
+ )
632
675
  )
633
676
  rows = conn.execute(stmt).fetchall()
634
-
677
+
635
678
  if not rows:
636
679
  continue
637
-
680
+
638
681
  # Convert to list of dicts for semantic search
639
682
  content_data = [dict(row._mapping) for row in rows]
640
-
683
+
641
684
  # Determine text columns for highlighting
642
685
  if text_columns is None:
643
- text_cols = [col.name for col in table.columns
644
- if "TEXT" in str(col.type).upper() or "VARCHAR" in str(col.type).upper()]
686
+ text_cols = [
687
+ col.name
688
+ for col in table.columns
689
+ if "TEXT" in str(col.type).upper() or "VARCHAR" in str(col.type).upper()
690
+ ]
645
691
  else:
646
692
  text_cols = text_columns
647
-
693
+
648
694
  # Perform semantic search on this table
649
695
  table_results = semantic_engine.semantic_search(
650
- query, content_data, embedding_column, text_cols,
651
- similarity_threshold, limit * 2 # Get more for global ranking
696
+ query,
697
+ content_data,
698
+ embedding_column,
699
+ text_cols,
700
+ similarity_threshold,
701
+ limit * 2, # Get more for global ranking
652
702
  )
653
-
703
+
654
704
  # Add table name to results
655
705
  for result in table_results:
656
706
  result["table_name"] = table_name
657
-
707
+
658
708
  all_results.extend(table_results)
659
-
709
+
660
710
  # Sort all results by similarity score and limit
661
711
  all_results.sort(key=lambda x: x.get("similarity_score", 0), reverse=True)
662
712
  final_results = all_results[:limit]
663
-
713
+
664
714
  return {
665
715
  "success": True,
666
716
  "results": final_results,
@@ -668,55 +718,63 @@ class SQLiteMemoryDatabase:
668
718
  "tables_searched": search_tables,
669
719
  "total_results": len(final_results),
670
720
  "model": model_name,
671
- "similarity_threshold": similarity_threshold
721
+ "similarity_threshold": similarity_threshold,
672
722
  }
673
-
723
+
674
724
  except (ValidationError, SQLAlchemyError) as e:
675
725
  if isinstance(e, ValidationError):
676
726
  raise e
677
727
  raise DatabaseError(f"Semantic search failed: {str(e)}")
678
728
 
679
- def find_related_content(self, table_name: str, row_id: int,
680
- embedding_column: str = "embedding",
681
- similarity_threshold: float = 0.5,
682
- limit: int = 5,
683
- model_name: str = "all-MiniLM-L6-v2") -> RelatedContentResponse:
729
+ def find_related_content(
730
+ self,
731
+ table_name: str,
732
+ row_id: int,
733
+ embedding_column: str = "embedding",
734
+ similarity_threshold: float = 0.5,
735
+ limit: int = 5,
736
+ model_name: str = "all-MiniLM-L6-v2",
737
+ ) -> RelatedContentResponse:
684
738
  """Find content related to a specific row by semantic similarity."""
685
739
  if not is_semantic_search_available():
686
740
  raise ValidationError("Semantic search is not available. Please install sentence-transformers.")
687
-
741
+
688
742
  try:
689
743
  table = self._ensure_table_exists(table_name)
690
744
  semantic_engine = get_semantic_engine(model_name)
691
-
745
+
692
746
  with self.get_connection() as conn:
693
747
  # Get the target row
694
748
  target_stmt = select(table).where(table.c["id"] == row_id)
695
749
  target_row = conn.execute(target_stmt).fetchone()
696
-
750
+
697
751
  if not target_row:
698
752
  raise ValidationError(f"Row with id {row_id} not found in table '{table_name}'")
699
-
753
+
700
754
  target_dict = dict(target_row._mapping)
701
-
755
+
702
756
  # Check if target has embedding
703
- if (embedding_column not in target_dict or
704
- not target_dict[embedding_column] or
705
- target_dict[embedding_column] in ["", "null"]):
757
+ if (
758
+ embedding_column not in target_dict
759
+ or not target_dict[embedding_column]
760
+ or target_dict[embedding_column] in ["", "null"]
761
+ ):
706
762
  raise ValidationError(f"Row {row_id} does not have an embedding")
707
-
763
+
708
764
  # Get target embedding
709
765
  target_embedding = json.loads(target_dict[embedding_column])
710
-
766
+
711
767
  # Get all other rows with embeddings
712
768
  stmt = select(table).where(
713
- and_(table.c["id"] != row_id,
714
- table.c[embedding_column].isnot(None),
715
- table.c[embedding_column] != "",
716
- table.c[embedding_column] != "null")
769
+ and_(
770
+ table.c["id"] != row_id,
771
+ table.c[embedding_column].isnot(None),
772
+ table.c[embedding_column] != "",
773
+ table.c[embedding_column] != "null",
774
+ )
717
775
  )
718
776
  rows = conn.execute(stmt).fetchall()
719
-
777
+
720
778
  if not rows:
721
779
  return {
722
780
  "success": True,
@@ -725,14 +783,14 @@ class SQLiteMemoryDatabase:
725
783
  "total_results": 0,
726
784
  "similarity_threshold": similarity_threshold,
727
785
  "model": model_name,
728
- "message": "No other rows with embeddings found"
786
+ "message": "No other rows with embeddings found",
729
787
  }
730
-
788
+
731
789
  # Find similar rows
732
790
  content_data = [dict(row._mapping) for row in rows]
733
791
  candidate_embeddings = []
734
792
  valid_indices = []
735
-
793
+
736
794
  for idx, row_dict in enumerate(content_data):
737
795
  try:
738
796
  embedding = json.loads(row_dict[embedding_column])
@@ -740,7 +798,7 @@ class SQLiteMemoryDatabase:
740
798
  valid_indices.append(idx)
741
799
  except json.JSONDecodeError:
742
800
  continue
743
-
801
+
744
802
  if not candidate_embeddings:
745
803
  return {
746
804
  "success": True,
@@ -749,15 +807,14 @@ class SQLiteMemoryDatabase:
749
807
  "total_results": 0,
750
808
  "similarity_threshold": similarity_threshold,
751
809
  "model": model_name,
752
- "message": "No valid embeddings found for comparison"
810
+ "message": "No valid embeddings found for comparison",
753
811
  }
754
-
812
+
755
813
  # Calculate similarities
756
814
  similar_indices = semantic_engine.find_similar_embeddings(
757
- target_embedding, candidate_embeddings,
758
- similarity_threshold, limit
815
+ target_embedding, candidate_embeddings, similarity_threshold, limit
759
816
  )
760
-
817
+
761
818
  # Build results
762
819
  results = []
763
820
  for candidate_idx, similarity_score in similar_indices:
@@ -765,7 +822,7 @@ class SQLiteMemoryDatabase:
765
822
  row_dict = content_data[original_idx].copy()
766
823
  row_dict["similarity_score"] = round(similarity_score, 3)
767
824
  results.append(row_dict)
768
-
825
+
769
826
  return {
770
827
  "success": True,
771
828
  "results": results,
@@ -773,70 +830,87 @@ class SQLiteMemoryDatabase:
773
830
  "total_results": len(results),
774
831
  "similarity_threshold": similarity_threshold,
775
832
  "model": model_name,
776
- "message": f"Found {len(results)} related items"
833
+ "message": f"Found {len(results)} related items",
777
834
  }
778
-
835
+
779
836
  except (ValidationError, SQLAlchemyError) as e:
780
837
  if isinstance(e, ValidationError):
781
838
  raise e
782
839
  raise DatabaseError(f"Failed to find related content: {str(e)}")
783
840
 
784
- def hybrid_search(self, query: str, tables: Optional[List[str]] = None,
785
- text_columns: Optional[List[str]] = None,
786
- embedding_column: str = "embedding",
787
- semantic_weight: float = 0.7,
788
- text_weight: float = 0.3,
789
- limit: int = 10,
790
- model_name: str = "all-MiniLM-L6-v2") -> HybridSearchResponse:
841
+ def hybrid_search(
842
+ self,
843
+ query: str,
844
+ tables: Optional[List[str]] = None,
845
+ text_columns: Optional[List[str]] = None,
846
+ embedding_column: str = "embedding",
847
+ semantic_weight: float = 0.7,
848
+ text_weight: float = 0.3,
849
+ limit: int = 10,
850
+ model_name: str = "all-MiniLM-L6-v2",
851
+ ) -> HybridSearchResponse:
791
852
  """Combine semantic search with keyword matching for optimal results."""
792
853
  if not is_semantic_search_available():
793
854
  # Fallback to text search only
794
855
  fallback_result = self.search_content(query, tables, limit)
795
856
  # Convert to HybridSearchResponse format
796
- return cast(HybridSearchResponse, {
797
- **fallback_result,
798
- "search_type": "text_only",
799
- "semantic_weight": 0.0,
800
- "text_weight": 1.0,
801
- "model": "none"
802
- })
803
-
857
+ return cast(
858
+ HybridSearchResponse,
859
+ {
860
+ **fallback_result,
861
+ "search_type": "text_only",
862
+ "semantic_weight": 0.0,
863
+ "text_weight": 1.0,
864
+ "model": "none",
865
+ },
866
+ )
867
+
804
868
  try:
805
869
  # Get semantic search results
806
870
  semantic_response = self.semantic_search(
807
- query, tables, embedding_column, text_columns,
808
- similarity_threshold=0.3, limit=limit * 2, model_name=model_name
871
+ query,
872
+ tables,
873
+ embedding_column,
874
+ text_columns,
875
+ similarity_threshold=0.3,
876
+ limit=limit * 2,
877
+ model_name=model_name,
809
878
  )
810
-
879
+
811
880
  if not semantic_response.get("success"):
812
- return cast(HybridSearchResponse, {
813
- **semantic_response,
814
- "search_type": "semantic_failed",
815
- "semantic_weight": semantic_weight,
816
- "text_weight": text_weight,
817
- "model": model_name
818
- })
819
-
881
+ return cast(
882
+ HybridSearchResponse,
883
+ {
884
+ **semantic_response,
885
+ "search_type": "semantic_failed",
886
+ "semantic_weight": semantic_weight,
887
+ "text_weight": text_weight,
888
+ "model": model_name,
889
+ },
890
+ )
891
+
820
892
  semantic_results = semantic_response.get("results", [])
821
-
893
+
822
894
  if not semantic_results:
823
895
  # Fallback to text search
824
896
  fallback_result = self.search_content(query, tables, limit)
825
- return cast(HybridSearchResponse, {
826
- **fallback_result,
827
- "search_type": "text_fallback",
828
- "semantic_weight": semantic_weight,
829
- "text_weight": text_weight,
830
- "model": model_name
831
- })
832
-
897
+ return cast(
898
+ HybridSearchResponse,
899
+ {
900
+ **fallback_result,
901
+ "search_type": "text_fallback",
902
+ "semantic_weight": semantic_weight,
903
+ "text_weight": text_weight,
904
+ "model": model_name,
905
+ },
906
+ )
907
+
833
908
  # Enhance with text matching scores
834
909
  semantic_engine = get_semantic_engine(model_name)
835
910
  enhanced_results = semantic_engine.hybrid_search(
836
- query, semantic_results, text_columns or [],
837
- embedding_column, semantic_weight, text_weight, limit
911
+ query, semantic_results, text_columns or [], embedding_column, semantic_weight, text_weight, limit
838
912
  )
839
-
913
+
840
914
  return {
841
915
  "success": True,
842
916
  "results": enhanced_results,
@@ -845,9 +919,9 @@ class SQLiteMemoryDatabase:
845
919
  "semantic_weight": semantic_weight,
846
920
  "text_weight": text_weight,
847
921
  "total_results": len(enhanced_results),
848
- "model": model_name
922
+ "model": model_name,
849
923
  }
850
-
924
+
851
925
  except (ValidationError, SQLAlchemyError) as e:
852
926
  if isinstance(e, ValidationError):
853
927
  raise e
@@ -857,27 +931,40 @@ class SQLiteMemoryDatabase:
857
931
  """Get statistics about embeddings in a table."""
858
932
  try:
859
933
  table = self._ensure_table_exists(table_name)
860
-
934
+
861
935
  with self.get_connection() as conn:
862
936
  # Count total rows
863
937
  total_count = conn.execute(select(text("COUNT(*)")).select_from(table)).scalar() or 0
864
-
938
+
865
939
  # Count rows with embeddings
866
- embedded_count = conn.execute(
867
- select(text("COUNT(*)")).select_from(table).where(
868
- and_(table.c[embedding_column].isnot(None),
869
- table.c[embedding_column] != "",
870
- table.c[embedding_column] != "null")
871
- )
872
- ).scalar() or 0
873
-
940
+ embedded_count = (
941
+ conn.execute(
942
+ select(text("COUNT(*)"))
943
+ .select_from(table)
944
+ .where(
945
+ and_(
946
+ table.c[embedding_column].isnot(None),
947
+ table.c[embedding_column] != "",
948
+ table.c[embedding_column] != "null",
949
+ )
950
+ )
951
+ ).scalar()
952
+ or 0
953
+ )
954
+
874
955
  # Get sample embedding to check dimensions
875
- sample_stmt = select(table.c[embedding_column]).where(
876
- and_(table.c[embedding_column].isnot(None),
877
- table.c[embedding_column] != "",
878
- table.c[embedding_column] != "null")
879
- ).limit(1)
880
-
956
+ sample_stmt = (
957
+ select(table.c[embedding_column])
958
+ .where(
959
+ and_(
960
+ table.c[embedding_column].isnot(None),
961
+ table.c[embedding_column] != "",
962
+ table.c[embedding_column] != "null",
963
+ )
964
+ )
965
+ .limit(1)
966
+ )
967
+
881
968
  sample_result = conn.execute(sample_stmt).fetchone()
882
969
  dimensions = None
883
970
  if sample_result and sample_result[0]:
@@ -886,9 +973,9 @@ class SQLiteMemoryDatabase:
886
973
  dimensions = len(sample_embedding)
887
974
  except json.JSONDecodeError:
888
975
  pass
889
-
976
+
890
977
  coverage_percent = (embedded_count / total_count * 100) if total_count > 0 else 0.0
891
-
978
+
892
979
  return {
893
980
  "success": True,
894
981
  "table_name": table_name,
@@ -896,9 +983,9 @@ class SQLiteMemoryDatabase:
896
983
  "embedded_rows": embedded_count,
897
984
  "coverage_percent": round(coverage_percent, 1),
898
985
  "embedding_dimensions": dimensions,
899
- "embedding_column": embedding_column
986
+ "embedding_column": embedding_column,
900
987
  }
901
-
988
+
902
989
  except (ValidationError, SQLAlchemyError) as e:
903
990
  if isinstance(e, ValidationError):
904
991
  raise e