agent-runtime-core 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_runtime_core/__init__.py +108 -1
- agent_runtime_core/agentic_loop.py +254 -0
- agent_runtime_core/config.py +54 -4
- agent_runtime_core/config_schema.py +307 -0
- agent_runtime_core/interfaces.py +106 -0
- agent_runtime_core/json_runtime.py +509 -0
- agent_runtime_core/llm/__init__.py +80 -7
- agent_runtime_core/llm/anthropic.py +133 -12
- agent_runtime_core/llm/models_config.py +180 -0
- agent_runtime_core/memory/__init__.py +70 -0
- agent_runtime_core/memory/manager.py +554 -0
- agent_runtime_core/memory/mixin.py +294 -0
- agent_runtime_core/multi_agent.py +569 -0
- agent_runtime_core/persistence/__init__.py +2 -0
- agent_runtime_core/persistence/file.py +277 -0
- agent_runtime_core/rag/__init__.py +65 -0
- agent_runtime_core/rag/chunking.py +224 -0
- agent_runtime_core/rag/indexer.py +253 -0
- agent_runtime_core/rag/retriever.py +261 -0
- agent_runtime_core/runner.py +193 -15
- agent_runtime_core/tool_calling_agent.py +88 -130
- agent_runtime_core/tools.py +179 -0
- agent_runtime_core/vectorstore/__init__.py +193 -0
- agent_runtime_core/vectorstore/base.py +138 -0
- agent_runtime_core/vectorstore/embeddings.py +242 -0
- agent_runtime_core/vectorstore/sqlite_vec.py +328 -0
- agent_runtime_core/vectorstore/vertex.py +295 -0
- {agent_runtime_core-0.7.0.dist-info → agent_runtime_core-0.7.1.dist-info}/METADATA +202 -1
- agent_runtime_core-0.7.1.dist-info/RECORD +57 -0
- agent_runtime_core-0.7.0.dist-info/RECORD +0 -39
- {agent_runtime_core-0.7.0.dist-info → agent_runtime_core-0.7.1.dist-info}/WHEEL +0 -0
- {agent_runtime_core-0.7.0.dist-info → agent_runtime_core-0.7.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding client interfaces and implementations.
|
|
3
|
+
|
|
4
|
+
Provides abstract interface for generating embeddings and concrete implementations
|
|
5
|
+
for OpenAI and Vertex AI embedding models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class EmbeddingClient(ABC):
|
|
13
|
+
"""
|
|
14
|
+
Abstract interface for generating embeddings.
|
|
15
|
+
|
|
16
|
+
Embedding clients convert text into vector representations that can be
|
|
17
|
+
stored in a VectorStore for similarity search.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
async def embed(self, text: str) -> list[float]:
|
|
22
|
+
"""
|
|
23
|
+
Generate embedding for a single text.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
text: The text to embed
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The embedding vector
|
|
30
|
+
"""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
35
|
+
"""
|
|
36
|
+
Generate embeddings for multiple texts.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
texts: List of texts to embed
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List of embedding vectors in the same order as input
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def dimensions(self) -> int:
|
|
49
|
+
"""Return the embedding dimensions."""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def model_name(self) -> str:
|
|
55
|
+
"""Return the model name."""
|
|
56
|
+
...
|
|
57
|
+
|
|
58
|
+
async def close(self) -> None:
|
|
59
|
+
"""Close any connections. Override if needed."""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class OpenAIEmbeddings(EmbeddingClient):
|
|
64
|
+
"""
|
|
65
|
+
OpenAI embedding client using text-embedding-3-small or text-embedding-3-large.
|
|
66
|
+
|
|
67
|
+
Requires: pip install openai
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# Model dimensions mapping
|
|
71
|
+
MODEL_DIMENSIONS = {
|
|
72
|
+
"text-embedding-3-small": 1536,
|
|
73
|
+
"text-embedding-3-large": 3072,
|
|
74
|
+
"text-embedding-ada-002": 1536,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
model: str = "text-embedding-3-small",
|
|
80
|
+
api_key: Optional[str] = None,
|
|
81
|
+
dimensions: Optional[int] = None,
|
|
82
|
+
):
|
|
83
|
+
"""
|
|
84
|
+
Initialize OpenAI embedding client.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
model: Model name (text-embedding-3-small, text-embedding-3-large)
|
|
88
|
+
api_key: OpenAI API key (uses OPENAI_API_KEY env var if not provided)
|
|
89
|
+
dimensions: Optional dimension override for text-embedding-3-* models
|
|
90
|
+
"""
|
|
91
|
+
self._model = model
|
|
92
|
+
self._api_key = api_key
|
|
93
|
+
self._dimensions_override = dimensions
|
|
94
|
+
self._client: Optional["openai.AsyncOpenAI"] = None # type: ignore
|
|
95
|
+
|
|
96
|
+
def _get_client(self) -> "openai.AsyncOpenAI": # type: ignore
|
|
97
|
+
"""Get or create the OpenAI client."""
|
|
98
|
+
if self._client is None:
|
|
99
|
+
try:
|
|
100
|
+
import openai
|
|
101
|
+
except ImportError:
|
|
102
|
+
raise ImportError(
|
|
103
|
+
"OpenAI package not installed. Install with: pip install openai"
|
|
104
|
+
)
|
|
105
|
+
self._client = openai.AsyncOpenAI(api_key=self._api_key)
|
|
106
|
+
return self._client
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def dimensions(self) -> int:
|
|
110
|
+
"""Return the embedding dimensions."""
|
|
111
|
+
if self._dimensions_override:
|
|
112
|
+
return self._dimensions_override
|
|
113
|
+
return self.MODEL_DIMENSIONS.get(self._model, 1536)
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def model_name(self) -> str:
|
|
117
|
+
"""Return the model name."""
|
|
118
|
+
return self._model
|
|
119
|
+
|
|
120
|
+
async def embed(self, text: str) -> list[float]:
|
|
121
|
+
"""Generate embedding for a single text."""
|
|
122
|
+
client = self._get_client()
|
|
123
|
+
kwargs = {"model": self._model, "input": text}
|
|
124
|
+
if self._dimensions_override and self._model.startswith("text-embedding-3"):
|
|
125
|
+
kwargs["dimensions"] = self._dimensions_override
|
|
126
|
+
response = await client.embeddings.create(**kwargs)
|
|
127
|
+
return response.data[0].embedding
|
|
128
|
+
|
|
129
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
130
|
+
"""Generate embeddings for multiple texts."""
|
|
131
|
+
if not texts:
|
|
132
|
+
return []
|
|
133
|
+
client = self._get_client()
|
|
134
|
+
kwargs = {"model": self._model, "input": texts}
|
|
135
|
+
if self._dimensions_override and self._model.startswith("text-embedding-3"):
|
|
136
|
+
kwargs["dimensions"] = self._dimensions_override
|
|
137
|
+
response = await client.embeddings.create(**kwargs)
|
|
138
|
+
# Sort by index to ensure correct order
|
|
139
|
+
sorted_data = sorted(response.data, key=lambda x: x.index)
|
|
140
|
+
return [item.embedding for item in sorted_data]
|
|
141
|
+
|
|
142
|
+
async def close(self) -> None:
|
|
143
|
+
"""Close the client."""
|
|
144
|
+
if self._client is not None:
|
|
145
|
+
await self._client.close()
|
|
146
|
+
self._client = None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class VertexAIEmbeddings(EmbeddingClient):
|
|
150
|
+
"""
|
|
151
|
+
Vertex AI embedding client using text-embedding-004 or text-multilingual-embedding-002.
|
|
152
|
+
|
|
153
|
+
Requires: pip install google-cloud-aiplatform
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
# Model dimensions mapping
|
|
157
|
+
MODEL_DIMENSIONS = {
|
|
158
|
+
"text-embedding-004": 768,
|
|
159
|
+
"text-multilingual-embedding-002": 768,
|
|
160
|
+
"textembedding-gecko@003": 768,
|
|
161
|
+
"textembedding-gecko-multilingual@001": 768,
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
def __init__(
|
|
165
|
+
self,
|
|
166
|
+
model: str = "text-embedding-004",
|
|
167
|
+
project_id: Optional[str] = None,
|
|
168
|
+
location: str = "us-central1",
|
|
169
|
+
):
|
|
170
|
+
"""
|
|
171
|
+
Initialize Vertex AI embedding client.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
model: Model name (text-embedding-004, text-multilingual-embedding-002)
|
|
175
|
+
project_id: Google Cloud project ID (uses default if not provided)
|
|
176
|
+
location: Google Cloud region
|
|
177
|
+
"""
|
|
178
|
+
self._model = model
|
|
179
|
+
self._project_id = project_id
|
|
180
|
+
self._location = location
|
|
181
|
+
self._initialized = False
|
|
182
|
+
|
|
183
|
+
def _ensure_initialized(self) -> None:
|
|
184
|
+
"""Initialize Vertex AI SDK if not already done."""
|
|
185
|
+
if self._initialized:
|
|
186
|
+
return
|
|
187
|
+
try:
|
|
188
|
+
from google.cloud import aiplatform
|
|
189
|
+
except ImportError:
|
|
190
|
+
raise ImportError(
|
|
191
|
+
"Google Cloud AI Platform package not installed. "
|
|
192
|
+
"Install with: pip install google-cloud-aiplatform"
|
|
193
|
+
)
|
|
194
|
+
aiplatform.init(project=self._project_id, location=self._location)
|
|
195
|
+
self._initialized = True
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def dimensions(self) -> int:
|
|
199
|
+
"""Return the embedding dimensions."""
|
|
200
|
+
return self.MODEL_DIMENSIONS.get(self._model, 768)
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def model_name(self) -> str:
|
|
204
|
+
"""Return the model name."""
|
|
205
|
+
return self._model
|
|
206
|
+
|
|
207
|
+
async def embed(self, text: str) -> list[float]:
|
|
208
|
+
"""Generate embedding for a single text."""
|
|
209
|
+
self._ensure_initialized()
|
|
210
|
+
from vertexai.language_models import TextEmbeddingModel
|
|
211
|
+
|
|
212
|
+
model = TextEmbeddingModel.from_pretrained(self._model)
|
|
213
|
+
# Run in executor since Vertex AI SDK is synchronous
|
|
214
|
+
import asyncio
|
|
215
|
+
|
|
216
|
+
loop = asyncio.get_event_loop()
|
|
217
|
+
embeddings = await loop.run_in_executor(None, lambda: model.get_embeddings([text]))
|
|
218
|
+
return embeddings[0].values
|
|
219
|
+
|
|
220
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
221
|
+
"""Generate embeddings for multiple texts."""
|
|
222
|
+
if not texts:
|
|
223
|
+
return []
|
|
224
|
+
self._ensure_initialized()
|
|
225
|
+
from vertexai.language_models import TextEmbeddingModel
|
|
226
|
+
|
|
227
|
+
model = TextEmbeddingModel.from_pretrained(self._model)
|
|
228
|
+
# Run in executor since Vertex AI SDK is synchronous
|
|
229
|
+
import asyncio
|
|
230
|
+
|
|
231
|
+
loop = asyncio.get_event_loop()
|
|
232
|
+
# Vertex AI has a limit of 250 texts per batch
|
|
233
|
+
batch_size = 250
|
|
234
|
+
all_embeddings = []
|
|
235
|
+
for i in range(0, len(texts), batch_size):
|
|
236
|
+
batch = texts[i : i + batch_size]
|
|
237
|
+
embeddings = await loop.run_in_executor(
|
|
238
|
+
None, lambda b=batch: model.get_embeddings(b)
|
|
239
|
+
)
|
|
240
|
+
all_embeddings.extend([e.values for e in embeddings])
|
|
241
|
+
return all_embeddings
|
|
242
|
+
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLite-vec vector store implementation.
|
|
3
|
+
|
|
4
|
+
Uses the sqlite-vec extension for vector similarity search.
|
|
5
|
+
Ideal for local development and small to medium datasets.
|
|
6
|
+
|
|
7
|
+
Requires: pip install sqlite-vec
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import sqlite3
|
|
12
|
+
import struct
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from agent_runtime_core.vectorstore.base import (
|
|
16
|
+
VectorStore,
|
|
17
|
+
VectorRecord,
|
|
18
|
+
VectorSearchResult,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _serialize_vector(vector: list[float]) -> bytes:
|
|
23
|
+
"""Serialize a vector to bytes for sqlite-vec."""
|
|
24
|
+
return struct.pack(f"{len(vector)}f", *vector)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _deserialize_vector(data: bytes) -> list[float]:
|
|
28
|
+
"""Deserialize bytes to a vector."""
|
|
29
|
+
n = len(data) // 4 # 4 bytes per float
|
|
30
|
+
return list(struct.unpack(f"{n}f", data))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SqliteVecStore(VectorStore):
|
|
34
|
+
"""
|
|
35
|
+
Vector store using sqlite-vec extension.
|
|
36
|
+
|
|
37
|
+
This implementation stores vectors in a SQLite database with the sqlite-vec
|
|
38
|
+
extension for efficient similarity search. It's ideal for:
|
|
39
|
+
- Local development
|
|
40
|
+
- Small to medium datasets (up to millions of vectors)
|
|
41
|
+
- Embedded applications without external dependencies
|
|
42
|
+
|
|
43
|
+
The store creates two tables:
|
|
44
|
+
- {table_name}_vec: Virtual table for vector storage and search
|
|
45
|
+
- {table_name}_meta: Regular table for content and metadata
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
path: str = ":memory:",
|
|
51
|
+
table_name: str = "vectors",
|
|
52
|
+
):
|
|
53
|
+
"""
|
|
54
|
+
Initialize SQLite-vec store.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
path: Database path (":memory:" for in-memory, or file path)
|
|
58
|
+
table_name: Base name for the tables
|
|
59
|
+
"""
|
|
60
|
+
self._path = path
|
|
61
|
+
self._table_name = table_name
|
|
62
|
+
self._conn: Optional[sqlite3.Connection] = None
|
|
63
|
+
self._dimensions: Optional[int] = None
|
|
64
|
+
|
|
65
|
+
def _get_connection(self) -> sqlite3.Connection:
|
|
66
|
+
"""Get or create the database connection."""
|
|
67
|
+
if self._conn is None:
|
|
68
|
+
try:
|
|
69
|
+
import sqlite_vec
|
|
70
|
+
except ImportError:
|
|
71
|
+
raise ImportError(
|
|
72
|
+
"sqlite-vec package not installed. Install with: pip install sqlite-vec"
|
|
73
|
+
)
|
|
74
|
+
self._conn = sqlite3.connect(self._path, check_same_thread=False)
|
|
75
|
+
self._conn.enable_load_extension(True)
|
|
76
|
+
sqlite_vec.load(self._conn)
|
|
77
|
+
self._conn.enable_load_extension(False)
|
|
78
|
+
return self._conn
|
|
79
|
+
|
|
80
|
+
def _ensure_tables(self, dimensions: int) -> None:
|
|
81
|
+
"""Ensure the required tables exist."""
|
|
82
|
+
conn = self._get_connection()
|
|
83
|
+
cursor = conn.cursor()
|
|
84
|
+
|
|
85
|
+
# Check if tables already exist
|
|
86
|
+
cursor.execute(
|
|
87
|
+
f"SELECT name FROM sqlite_master WHERE type='table' AND name='{self._table_name}_meta'"
|
|
88
|
+
)
|
|
89
|
+
if cursor.fetchone() is not None:
|
|
90
|
+
# Tables exist, verify dimensions match
|
|
91
|
+
if self._dimensions is None:
|
|
92
|
+
# Get dimensions from existing virtual table
|
|
93
|
+
cursor.execute(f"PRAGMA table_info({self._table_name}_vec)")
|
|
94
|
+
# The virtual table structure varies, so we'll trust the stored dimensions
|
|
95
|
+
self._dimensions = dimensions
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
self._dimensions = dimensions
|
|
99
|
+
|
|
100
|
+
# Create metadata table
|
|
101
|
+
cursor.execute(f"""
|
|
102
|
+
CREATE TABLE IF NOT EXISTS {self._table_name}_meta (
|
|
103
|
+
id TEXT PRIMARY KEY,
|
|
104
|
+
content TEXT NOT NULL,
|
|
105
|
+
metadata TEXT NOT NULL DEFAULT '{{}}'
|
|
106
|
+
)
|
|
107
|
+
""")
|
|
108
|
+
|
|
109
|
+
# Create virtual table for vectors
|
|
110
|
+
cursor.execute(f"""
|
|
111
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS {self._table_name}_vec
|
|
112
|
+
USING vec0(
|
|
113
|
+
id TEXT PRIMARY KEY,
|
|
114
|
+
embedding float[{dimensions}]
|
|
115
|
+
)
|
|
116
|
+
""")
|
|
117
|
+
|
|
118
|
+
conn.commit()
|
|
119
|
+
|
|
120
|
+
async def add(
|
|
121
|
+
self,
|
|
122
|
+
id: str,
|
|
123
|
+
vector: list[float],
|
|
124
|
+
content: str,
|
|
125
|
+
metadata: Optional[dict] = None,
|
|
126
|
+
) -> None:
|
|
127
|
+
"""Add a vector with its content and metadata."""
|
|
128
|
+
self._ensure_tables(len(vector))
|
|
129
|
+
conn = self._get_connection()
|
|
130
|
+
cursor = conn.cursor()
|
|
131
|
+
|
|
132
|
+
metadata_json = json.dumps(metadata or {})
|
|
133
|
+
vector_bytes = _serialize_vector(vector)
|
|
134
|
+
|
|
135
|
+
# Insert or replace in both tables
|
|
136
|
+
cursor.execute(
|
|
137
|
+
f"INSERT OR REPLACE INTO {self._table_name}_meta (id, content, metadata) VALUES (?, ?, ?)",
|
|
138
|
+
(id, content, metadata_json),
|
|
139
|
+
)
|
|
140
|
+
cursor.execute(
|
|
141
|
+
f"INSERT OR REPLACE INTO {self._table_name}_vec (id, embedding) VALUES (?, ?)",
|
|
142
|
+
(id, vector_bytes),
|
|
143
|
+
)
|
|
144
|
+
conn.commit()
|
|
145
|
+
|
|
146
|
+
async def add_batch(
|
|
147
|
+
self,
|
|
148
|
+
items: list[tuple[str, list[float], str, Optional[dict]]],
|
|
149
|
+
) -> None:
|
|
150
|
+
"""Add multiple vectors efficiently."""
|
|
151
|
+
if not items:
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
# Get dimensions from first item
|
|
155
|
+
self._ensure_tables(len(items[0][1]))
|
|
156
|
+
conn = self._get_connection()
|
|
157
|
+
cursor = conn.cursor()
|
|
158
|
+
|
|
159
|
+
meta_data = []
|
|
160
|
+
vec_data = []
|
|
161
|
+
for id, vector, content, metadata in items:
|
|
162
|
+
meta_data.append((id, content, json.dumps(metadata or {})))
|
|
163
|
+
vec_data.append((id, _serialize_vector(vector)))
|
|
164
|
+
|
|
165
|
+
cursor.executemany(
|
|
166
|
+
f"INSERT OR REPLACE INTO {self._table_name}_meta (id, content, metadata) VALUES (?, ?, ?)",
|
|
167
|
+
meta_data,
|
|
168
|
+
)
|
|
169
|
+
cursor.executemany(
|
|
170
|
+
f"INSERT OR REPLACE INTO {self._table_name}_vec (id, embedding) VALUES (?, ?)",
|
|
171
|
+
vec_data,
|
|
172
|
+
)
|
|
173
|
+
conn.commit()
|
|
174
|
+
|
|
175
|
+
async def search(
|
|
176
|
+
self,
|
|
177
|
+
query_vector: list[float],
|
|
178
|
+
limit: int = 10,
|
|
179
|
+
filter: Optional[dict] = None,
|
|
180
|
+
) -> list[VectorSearchResult]:
|
|
181
|
+
"""Search for similar vectors."""
|
|
182
|
+
if self._dimensions is None:
|
|
183
|
+
# No vectors added yet
|
|
184
|
+
return []
|
|
185
|
+
|
|
186
|
+
conn = self._get_connection()
|
|
187
|
+
cursor = conn.cursor()
|
|
188
|
+
query_bytes = _serialize_vector(query_vector)
|
|
189
|
+
|
|
190
|
+
# sqlite-vec uses distance (lower = more similar), we convert to similarity score
|
|
191
|
+
if filter:
|
|
192
|
+
# Build filter conditions for metadata
|
|
193
|
+
filter_conditions = []
|
|
194
|
+
filter_values = []
|
|
195
|
+
for key, value in filter.items():
|
|
196
|
+
filter_conditions.append(f"json_extract(m.metadata, '$.{key}') = ?")
|
|
197
|
+
filter_values.append(json.dumps(value) if not isinstance(value, str) else value)
|
|
198
|
+
|
|
199
|
+
filter_sql = " AND ".join(filter_conditions)
|
|
200
|
+
cursor.execute(
|
|
201
|
+
f"""
|
|
202
|
+
SELECT v.id, v.distance, m.content, m.metadata
|
|
203
|
+
FROM {self._table_name}_vec v
|
|
204
|
+
JOIN {self._table_name}_meta m ON v.id = m.id
|
|
205
|
+
WHERE v.embedding MATCH ? AND k = ?
|
|
206
|
+
AND {filter_sql}
|
|
207
|
+
ORDER BY v.distance
|
|
208
|
+
""",
|
|
209
|
+
[query_bytes, limit] + filter_values,
|
|
210
|
+
)
|
|
211
|
+
else:
|
|
212
|
+
cursor.execute(
|
|
213
|
+
f"""
|
|
214
|
+
SELECT v.id, v.distance, m.content, m.metadata
|
|
215
|
+
FROM {self._table_name}_vec v
|
|
216
|
+
JOIN {self._table_name}_meta m ON v.id = m.id
|
|
217
|
+
WHERE v.embedding MATCH ? AND k = ?
|
|
218
|
+
ORDER BY v.distance
|
|
219
|
+
""",
|
|
220
|
+
(query_bytes, limit),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
results = []
|
|
224
|
+
for row in cursor.fetchall():
|
|
225
|
+
id, distance, content, metadata_json = row
|
|
226
|
+
# Convert distance to similarity score (1 / (1 + distance))
|
|
227
|
+
score = 1.0 / (1.0 + distance)
|
|
228
|
+
results.append(
|
|
229
|
+
VectorSearchResult(
|
|
230
|
+
id=id,
|
|
231
|
+
content=content,
|
|
232
|
+
score=score,
|
|
233
|
+
metadata=json.loads(metadata_json),
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
return results
|
|
237
|
+
|
|
238
|
+
async def delete(self, id: str) -> bool:
|
|
239
|
+
"""Delete a vector by ID."""
|
|
240
|
+
conn = self._get_connection()
|
|
241
|
+
cursor = conn.cursor()
|
|
242
|
+
|
|
243
|
+
cursor.execute(f"DELETE FROM {self._table_name}_meta WHERE id = ?", (id,))
|
|
244
|
+
deleted_meta = cursor.rowcount > 0
|
|
245
|
+
|
|
246
|
+
cursor.execute(f"DELETE FROM {self._table_name}_vec WHERE id = ?", (id,))
|
|
247
|
+
conn.commit()
|
|
248
|
+
|
|
249
|
+
return deleted_meta
|
|
250
|
+
|
|
251
|
+
async def delete_by_filter(self, filter: dict) -> int:
|
|
252
|
+
"""Delete vectors matching filter."""
|
|
253
|
+
conn = self._get_connection()
|
|
254
|
+
cursor = conn.cursor()
|
|
255
|
+
|
|
256
|
+
# Build filter conditions
|
|
257
|
+
filter_conditions = []
|
|
258
|
+
filter_values = []
|
|
259
|
+
for key, value in filter.items():
|
|
260
|
+
filter_conditions.append(f"json_extract(metadata, '$.{key}') = ?")
|
|
261
|
+
filter_values.append(json.dumps(value) if not isinstance(value, str) else value)
|
|
262
|
+
|
|
263
|
+
filter_sql = " AND ".join(filter_conditions)
|
|
264
|
+
|
|
265
|
+
# Get IDs to delete
|
|
266
|
+
cursor.execute(
|
|
267
|
+
f"SELECT id FROM {self._table_name}_meta WHERE {filter_sql}",
|
|
268
|
+
filter_values,
|
|
269
|
+
)
|
|
270
|
+
ids_to_delete = [row[0] for row in cursor.fetchall()]
|
|
271
|
+
|
|
272
|
+
if not ids_to_delete:
|
|
273
|
+
return 0
|
|
274
|
+
|
|
275
|
+
# Delete from both tables
|
|
276
|
+
placeholders = ",".join("?" * len(ids_to_delete))
|
|
277
|
+
cursor.execute(
|
|
278
|
+
f"DELETE FROM {self._table_name}_meta WHERE id IN ({placeholders})",
|
|
279
|
+
ids_to_delete,
|
|
280
|
+
)
|
|
281
|
+
cursor.execute(
|
|
282
|
+
f"DELETE FROM {self._table_name}_vec WHERE id IN ({placeholders})",
|
|
283
|
+
ids_to_delete,
|
|
284
|
+
)
|
|
285
|
+
conn.commit()
|
|
286
|
+
|
|
287
|
+
return len(ids_to_delete)
|
|
288
|
+
|
|
289
|
+
async def get(self, id: str) -> Optional[VectorRecord]:
|
|
290
|
+
"""Get a vector by ID."""
|
|
291
|
+
conn = self._get_connection()
|
|
292
|
+
cursor = conn.cursor()
|
|
293
|
+
|
|
294
|
+
# Get metadata
|
|
295
|
+
cursor.execute(
|
|
296
|
+
f"SELECT content, metadata FROM {self._table_name}_meta WHERE id = ?",
|
|
297
|
+
(id,),
|
|
298
|
+
)
|
|
299
|
+
meta_row = cursor.fetchone()
|
|
300
|
+
if meta_row is None:
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
content, metadata_json = meta_row
|
|
304
|
+
|
|
305
|
+
# Get vector
|
|
306
|
+
cursor.execute(
|
|
307
|
+
f"SELECT embedding FROM {self._table_name}_vec WHERE id = ?",
|
|
308
|
+
(id,),
|
|
309
|
+
)
|
|
310
|
+
vec_row = cursor.fetchone()
|
|
311
|
+
if vec_row is None:
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
vector = _deserialize_vector(vec_row[0])
|
|
315
|
+
|
|
316
|
+
return VectorRecord(
|
|
317
|
+
id=id,
|
|
318
|
+
vector=vector,
|
|
319
|
+
content=content,
|
|
320
|
+
metadata=json.loads(metadata_json),
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
async def close(self) -> None:
|
|
324
|
+
"""Close the database connection."""
|
|
325
|
+
if self._conn is not None:
|
|
326
|
+
self._conn.close()
|
|
327
|
+
self._conn = None
|
|
328
|
+
|