mcp-code-indexer 4.2.15__py3-none-any.whl → 4.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/database/database.py +334 -115
- mcp_code_indexer/database/database_factory.py +1 -1
- mcp_code_indexer/database/exceptions.py +1 -1
- mcp_code_indexer/database/models.py +66 -24
- mcp_code_indexer/database/retry_executor.py +15 -5
- mcp_code_indexer/file_scanner.py +107 -12
- mcp_code_indexer/main.py +43 -30
- mcp_code_indexer/server/mcp_server.py +201 -7
- mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
- mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
- mcp_code_indexer/vector_mode/config.py +113 -45
- mcp_code_indexer/vector_mode/const.py +24 -0
- mcp_code_indexer/vector_mode/daemon.py +860 -98
- mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
- mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
- mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
- mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
- mcp_code_indexer/vector_mode/services/__init__.py +9 -0
- mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
- mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
- mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
- mcp_code_indexer/vector_mode/types.py +46 -0
- mcp_code_indexer/vector_mode/utils.py +50 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/METADATA +13 -10
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/RECORD +28 -21
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/WHEEL +1 -1
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info/licenses}/LICENSE +0 -0
|
@@ -14,171 +14,354 @@ import uuid
|
|
|
14
14
|
from typing import List, Dict, Any, Optional
|
|
15
15
|
import turbopuffer
|
|
16
16
|
|
|
17
|
+
|
|
18
|
+
from turbopuffer.types import Row
|
|
19
|
+
|
|
17
20
|
from ..config import VectorConfig
|
|
18
21
|
|
|
19
22
|
logger = logging.getLogger(__name__)
|
|
20
23
|
|
|
24
|
+
|
|
21
25
|
class TurbopufferClient:
|
|
22
26
|
"""Clean Turbopuffer client using official SDK."""
|
|
23
|
-
|
|
27
|
+
|
|
24
28
|
def __init__(self, api_key: str, region: str = "gcp-europe-west3"):
|
|
25
29
|
self.api_key = api_key
|
|
26
30
|
self.region = region
|
|
27
|
-
|
|
31
|
+
|
|
28
32
|
# Initialize official TurboPuffer client
|
|
29
|
-
self.client = turbopuffer.Turbopuffer(
|
|
30
|
-
|
|
31
|
-
region=region
|
|
32
|
-
)
|
|
33
|
-
logger.info(f"Initialized TurboPuffer client with region {region}")
|
|
34
|
-
|
|
33
|
+
self.client = turbopuffer.Turbopuffer(api_key=api_key, region=region)
|
|
34
|
+
|
|
35
35
|
def health_check(self) -> bool:
|
|
36
36
|
"""Check if Turbopuffer service is healthy."""
|
|
37
37
|
try:
|
|
38
|
-
|
|
38
|
+
self.client.namespaces()
|
|
39
39
|
return True
|
|
40
40
|
except Exception as e:
|
|
41
41
|
logger.warning(f"Turbopuffer health check failed: {e}")
|
|
42
42
|
return False
|
|
43
|
-
|
|
43
|
+
|
|
44
|
+
def validate_api_access(self) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Validate API key and access to Turbopuffer service.
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
RuntimeError: If API access validation fails with specific error details
|
|
50
|
+
"""
|
|
51
|
+
logger.info("Validating Turbopuffer API access...")
|
|
52
|
+
try:
|
|
53
|
+
self.client.namespaces()
|
|
54
|
+
logger.debug("Turbopuffer API access validated successfully")
|
|
55
|
+
except Exception as e:
|
|
56
|
+
error_msg = str(e).lower()
|
|
57
|
+
|
|
58
|
+
if "401" in error_msg or "unauthorized" in error_msg:
|
|
59
|
+
raise RuntimeError(
|
|
60
|
+
f"Turbopuffer API authentication failed: Invalid or expired API key. "
|
|
61
|
+
f"Please check your TURBOPUFFER_API_KEY. Error: {e}"
|
|
62
|
+
)
|
|
63
|
+
elif "403" in error_msg or "forbidden" in error_msg:
|
|
64
|
+
raise RuntimeError(
|
|
65
|
+
f"Turbopuffer API access denied: API key lacks required permissions. Error: {e}"
|
|
66
|
+
)
|
|
67
|
+
elif "429" in error_msg or "rate limit" in error_msg:
|
|
68
|
+
raise RuntimeError(
|
|
69
|
+
f"Turbopuffer API rate limit exceeded: Too many requests. Error: {e}"
|
|
70
|
+
)
|
|
71
|
+
elif "5" in error_msg and ("error" in error_msg or "server" in error_msg):
|
|
72
|
+
raise RuntimeError(
|
|
73
|
+
f"Turbopuffer service unavailable: Server error. Error: {e}"
|
|
74
|
+
)
|
|
75
|
+
else:
|
|
76
|
+
raise RuntimeError(f"Turbopuffer API access validation failed: {e}")
|
|
77
|
+
|
|
44
78
|
def generate_vector_id(self, project_id: str, chunk_id: int) -> str:
|
|
45
79
|
"""Generate a unique vector ID."""
|
|
46
80
|
return f"{project_id}_{chunk_id}_{uuid.uuid4().hex[:8]}"
|
|
47
|
-
|
|
81
|
+
|
|
48
82
|
def upsert_vectors(
|
|
49
|
-
self,
|
|
50
|
-
vectors: List[Dict[str, Any]],
|
|
51
|
-
namespace: str,
|
|
52
|
-
**kwargs
|
|
83
|
+
self, vectors: List[Dict[str, Any]], namespace: str, **kwargs
|
|
53
84
|
) -> Dict[str, Any]:
|
|
54
85
|
"""Store or update vectors in the database."""
|
|
55
86
|
if not vectors:
|
|
56
87
|
return {"upserted": 0}
|
|
57
|
-
|
|
88
|
+
|
|
58
89
|
logger.info(f"Upserting {len(vectors)} vectors to namespace '{namespace}'")
|
|
59
|
-
|
|
60
|
-
#
|
|
61
|
-
|
|
90
|
+
|
|
91
|
+
# Convert row-based data to columnar format for v0.5+ API
|
|
92
|
+
if not all("id" in vector and "values" in vector for vector in vectors):
|
|
93
|
+
raise ValueError("Each vector must have 'id' and 'values' fields")
|
|
94
|
+
|
|
95
|
+
# Build columnar data structure
|
|
96
|
+
data = {
|
|
97
|
+
"id": [str(vector["id"]) for vector in vectors],
|
|
98
|
+
"vector": [vector["values"] for vector in vectors],
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Add metadata attributes as separate columns
|
|
102
|
+
all_metadata_keys = set()
|
|
62
103
|
for vector in vectors:
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
}
|
|
71
|
-
formatted_vectors.append(formatted_vector)
|
|
72
|
-
|
|
104
|
+
metadata = vector.get("metadata", {})
|
|
105
|
+
all_metadata_keys.update(metadata.keys())
|
|
106
|
+
|
|
107
|
+
# Add each metadata attribute as a column
|
|
108
|
+
for key in all_metadata_keys:
|
|
109
|
+
data[key] = [vector.get("metadata", {}).get(key) for vector in vectors]
|
|
110
|
+
|
|
73
111
|
try:
|
|
112
|
+
# Get namespace object and use write() with upsert_columns
|
|
74
113
|
ns = self.client.namespace(namespace)
|
|
75
|
-
ns.
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
114
|
+
response = ns.write(
|
|
115
|
+
upsert_columns=data,
|
|
116
|
+
distance_metric="cosine_distance", # Default metric TODO: which one to use?
|
|
117
|
+
)
|
|
118
|
+
# Log actual results from the response
|
|
119
|
+
rows_affected = getattr(response, "rows_affected", len(vectors))
|
|
120
|
+
logger.info(
|
|
121
|
+
f"Upsert operation completed: for namespace '{namespace}'. Requested {len(vectors)} vectors, "
|
|
122
|
+
f"actually affected {rows_affected} rows. Response status: {response.status}, response message: {response.message}"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return {"upserted": rows_affected}
|
|
126
|
+
|
|
80
127
|
except Exception as e:
|
|
81
128
|
logger.error(f"Failed to upsert vectors: {e}")
|
|
82
129
|
raise RuntimeError(f"Vector upsert failed: {e}")
|
|
83
|
-
|
|
130
|
+
|
|
131
|
+
def upsert_vectors_batch(
|
|
132
|
+
self, all_vectors: List[Dict[str, Any]], namespace: str, **kwargs
|
|
133
|
+
) -> Dict[str, Any]:
|
|
134
|
+
"""
|
|
135
|
+
Store or update vectors from multiple files in a single batch operation.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
all_vectors: List of all vector dictionaries from multiple files
|
|
139
|
+
namespace: Target namespace for storage
|
|
140
|
+
**kwargs: Additional arguments for vector storage
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Dictionary with upsert results
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
RuntimeError: If batch upsert fails
|
|
147
|
+
"""
|
|
148
|
+
if not all_vectors:
|
|
149
|
+
return {"upserted": 0}
|
|
150
|
+
|
|
151
|
+
logger.info(
|
|
152
|
+
f"Batch upserting {len(all_vectors)} vectors to namespace '{namespace}'"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Validate vector structure
|
|
156
|
+
if not all("id" in vector and "values" in vector for vector in all_vectors):
|
|
157
|
+
raise ValueError("Each vector must have 'id' and 'values' fields")
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
# Process vectors in sub-batches to respect TurboPuffer limits
|
|
161
|
+
max_batch_size = 1000 # TurboPuffer recommended limit
|
|
162
|
+
total_upserted = 0
|
|
163
|
+
|
|
164
|
+
for i in range(0, len(all_vectors), max_batch_size):
|
|
165
|
+
sub_batch = all_vectors[i : i + max_batch_size]
|
|
166
|
+
|
|
167
|
+
logger.debug(
|
|
168
|
+
f"Processing sub-batch {i//max_batch_size + 1}: {len(sub_batch)} vectors"
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Build columnar data structure for this sub-batch
|
|
172
|
+
data = {
|
|
173
|
+
"id": [str(vector["id"]) for vector in sub_batch],
|
|
174
|
+
"vector": [vector["values"] for vector in sub_batch],
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
# Add metadata attributes as separate columns
|
|
178
|
+
all_metadata_keys = set()
|
|
179
|
+
for vector in sub_batch:
|
|
180
|
+
metadata = vector.get("metadata", {})
|
|
181
|
+
all_metadata_keys.update(metadata.keys())
|
|
182
|
+
|
|
183
|
+
# Add each metadata attribute as a column
|
|
184
|
+
for key in all_metadata_keys:
|
|
185
|
+
data[key] = [
|
|
186
|
+
vector.get("metadata", {}).get(key) for vector in sub_batch
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
# Upsert this sub-batch
|
|
190
|
+
ns = self.client.namespace(namespace)
|
|
191
|
+
response = ns.write(
|
|
192
|
+
upsert_columns=data,
|
|
193
|
+
distance_metric="cosine_distance",
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
rows_affected = getattr(response, "rows_affected", len(sub_batch))
|
|
197
|
+
total_upserted += rows_affected
|
|
198
|
+
|
|
199
|
+
logger.debug(
|
|
200
|
+
f"Sub-batch {i//max_batch_size + 1} upserted: "
|
|
201
|
+
f"requested {len(sub_batch)}, affected {rows_affected} rows"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
logger.info(
|
|
205
|
+
f"Batch upsert operation completed for namespace '{namespace}'. "
|
|
206
|
+
f"Requested {len(all_vectors)} vectors, actually affected {total_upserted} rows"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
return {"upserted": total_upserted}
|
|
210
|
+
|
|
211
|
+
except Exception as e:
|
|
212
|
+
logger.error(f"Failed to batch upsert vectors: {e}")
|
|
213
|
+
raise RuntimeError(f"Batch vector upsert failed: {e}")
|
|
214
|
+
|
|
84
215
|
def search_vectors(
|
|
85
216
|
self,
|
|
86
217
|
query_vector: List[float],
|
|
87
218
|
top_k: int = 10,
|
|
88
219
|
namespace: str = "default",
|
|
89
|
-
filters:
|
|
90
|
-
**kwargs
|
|
91
|
-
) -> List[
|
|
220
|
+
filters: turbopuffer.types.Filter | turbopuffer.NotGiven = turbopuffer.NotGiven,
|
|
221
|
+
**kwargs,
|
|
222
|
+
) -> List[Row] | None:
|
|
92
223
|
"""Search for similar vectors."""
|
|
93
|
-
logger.
|
|
94
|
-
|
|
224
|
+
logger.info(f"Searching {top_k} vectors in namespace '{namespace}'")
|
|
225
|
+
|
|
95
226
|
try:
|
|
96
227
|
ns = self.client.namespace(namespace)
|
|
97
|
-
|
|
98
228
|
results = ns.query(
|
|
99
|
-
rank_by=
|
|
229
|
+
rank_by=("vector", "ANN", query_vector), # Use tuple format for v0.5+
|
|
100
230
|
top_k=top_k,
|
|
101
231
|
filters=filters,
|
|
102
|
-
|
|
232
|
+
exclude_attributes=["vector"],
|
|
103
233
|
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
234
|
+
# Return only rows if present, otherwise None
|
|
235
|
+
if hasattr(results, "rows") and results.rows:
|
|
236
|
+
logger.debug(f"Found {len(results.rows)} similar vectors")
|
|
237
|
+
return results.rows
|
|
238
|
+
else:
|
|
239
|
+
logger.debug("Found 0 similar vectors")
|
|
240
|
+
return None
|
|
241
|
+
|
|
108
242
|
except Exception as e:
|
|
109
243
|
logger.error(f"Vector search failed: {e}")
|
|
110
244
|
raise RuntimeError(f"Vector search failed: {e}")
|
|
111
|
-
|
|
245
|
+
|
|
112
246
|
def delete_vectors(
|
|
113
|
-
self,
|
|
114
|
-
vector_ids: List[str],
|
|
115
|
-
namespace: str,
|
|
116
|
-
**kwargs
|
|
247
|
+
self, vector_ids: List[str], namespace: str, **kwargs
|
|
117
248
|
) -> Dict[str, Any]:
|
|
118
249
|
"""Delete vectors by ID."""
|
|
119
250
|
if not vector_ids:
|
|
120
251
|
return {"deleted": 0}
|
|
121
|
-
|
|
252
|
+
|
|
122
253
|
logger.info(f"Deleting {len(vector_ids)} vectors from namespace '{namespace}'")
|
|
123
|
-
|
|
254
|
+
|
|
124
255
|
try:
|
|
125
256
|
ns = self.client.namespace(namespace)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
257
|
+
|
|
258
|
+
# Use the write method with deletes parameter (v0.5+ API)
|
|
259
|
+
response = ns.write(deletes=vector_ids)
|
|
260
|
+
|
|
261
|
+
# Log actual results from the response
|
|
262
|
+
rows_affected = getattr(response, "rows_affected", 0)
|
|
263
|
+
logger.info(
|
|
264
|
+
f"Delete operation completed: requested {len(vector_ids)} vectors, "
|
|
265
|
+
f"actually affected {rows_affected} rows"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
return {"deleted": rows_affected}
|
|
269
|
+
|
|
131
270
|
except Exception as e:
|
|
132
271
|
logger.error(f"Failed to delete vectors: {e}")
|
|
133
272
|
raise RuntimeError(f"Vector deletion failed: {e}")
|
|
134
|
-
|
|
273
|
+
|
|
135
274
|
def list_namespaces(self) -> List[str]:
|
|
136
275
|
"""List all available namespaces."""
|
|
137
276
|
try:
|
|
138
277
|
namespaces = self.client.namespaces()
|
|
139
|
-
return [ns.
|
|
140
|
-
|
|
278
|
+
return [ns.id for ns in namespaces.namespaces]
|
|
279
|
+
|
|
141
280
|
except Exception as e:
|
|
142
281
|
logger.error(f"Failed to list namespaces: {e}")
|
|
143
282
|
raise RuntimeError(f"Namespace listing failed: {e}")
|
|
144
|
-
|
|
145
|
-
def create_namespace(self, namespace: str, dimension: int, **kwargs) -> Dict[str, Any]:
|
|
146
|
-
"""Create a new namespace."""
|
|
147
|
-
logger.info(f"Creating namespace '{namespace}' with dimension {dimension}")
|
|
148
|
-
|
|
149
|
-
try:
|
|
150
|
-
self.client.create_namespace(
|
|
151
|
-
name=namespace,
|
|
152
|
-
dimension=dimension
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
logger.info(f"Successfully created namespace '{namespace}'")
|
|
156
|
-
return {"name": namespace, "dimension": dimension}
|
|
157
|
-
|
|
158
|
-
except Exception as e:
|
|
159
|
-
logger.error(f"Failed to create namespace: {e}")
|
|
160
|
-
raise RuntimeError(f"Namespace creation failed: {e}")
|
|
161
|
-
|
|
283
|
+
|
|
162
284
|
def delete_namespace(self, namespace: str) -> Dict[str, Any]:
|
|
163
285
|
"""Delete a namespace and all its vectors."""
|
|
164
286
|
logger.warning(f"Deleting namespace '{namespace}' and all its vectors")
|
|
165
|
-
|
|
166
287
|
try:
|
|
167
|
-
self.client.
|
|
168
|
-
|
|
169
|
-
|
|
288
|
+
ns = self.client.namespace(namespace)
|
|
289
|
+
# Use delete_all method to delete the namespace (v0.5+ API)
|
|
290
|
+
response = ns.delete_all()
|
|
291
|
+
|
|
292
|
+
logger.info(
|
|
293
|
+
f"Namespace deletion completed: '{namespace}' deleted, "
|
|
294
|
+
f"status: {response.status}, "
|
|
295
|
+
)
|
|
170
296
|
return {"deleted": namespace}
|
|
171
|
-
|
|
297
|
+
|
|
172
298
|
except Exception as e:
|
|
173
299
|
logger.error(f"Failed to delete namespace: {e}")
|
|
174
300
|
raise RuntimeError(f"Namespace deletion failed: {e}")
|
|
175
|
-
|
|
301
|
+
|
|
176
302
|
def get_namespace_for_project(self, project_id: str) -> str:
|
|
177
303
|
"""Get the namespace name for a project."""
|
|
178
304
|
# Use project ID as namespace, with prefix for safety
|
|
179
|
-
safe_project_id = "".join(
|
|
305
|
+
safe_project_id = "".join(
|
|
306
|
+
c if c.isalnum() or c in "-_" else "_" for c in project_id
|
|
307
|
+
)
|
|
180
308
|
return f"mcp_code_{safe_project_id}".lower()
|
|
181
|
-
|
|
309
|
+
|
|
310
|
+
def delete_vectors_for_file(self, namespace: str, file_path: str) -> Dict[str, Any]:
|
|
311
|
+
"""
|
|
312
|
+
Delete all vectors associated with a specific file.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
namespace: The namespace to delete from
|
|
316
|
+
file_path: Path to the source file
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
Dictionary with deletion results
|
|
320
|
+
|
|
321
|
+
Raises:
|
|
322
|
+
RuntimeError: If deletion fails
|
|
323
|
+
"""
|
|
324
|
+
logger.info(
|
|
325
|
+
f"Deleting vectors for file '{file_path}' in namespace '{namespace}'"
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
try:
|
|
329
|
+
ns = self.client.namespace(namespace)
|
|
330
|
+
|
|
331
|
+
# First, query for vectors with matching file_path
|
|
332
|
+
filter_condition = ("file_path", "Eq", file_path)
|
|
333
|
+
results = ns.query(
|
|
334
|
+
filters=filter_condition,
|
|
335
|
+
top_k=1200, # Set high enough to catch all chunks for a single file. 1200 is max
|
|
336
|
+
include_attributes=False, # We only need IDs
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
if not hasattr(results, "rows") or not results.rows:
|
|
340
|
+
logger.info(
|
|
341
|
+
f"No vectors found for file '{file_path}' in namespace '{namespace}'"
|
|
342
|
+
)
|
|
343
|
+
return {"deleted": 0, "file_path": file_path}
|
|
344
|
+
|
|
345
|
+
# Extract vector IDs to delete
|
|
346
|
+
ids_to_delete = [row.id for row in results.rows]
|
|
347
|
+
logger.info(
|
|
348
|
+
f"Found {len(ids_to_delete)} vectors to delete for file '{file_path}'"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Delete vectors by ID using existing method
|
|
352
|
+
delete_result = self.delete_vectors(ids_to_delete, namespace)
|
|
353
|
+
|
|
354
|
+
logger.info(
|
|
355
|
+
f"File deletion completed: removed {delete_result['deleted']} vectors "
|
|
356
|
+
f"for file '{file_path}' from namespace '{namespace}'"
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
return {"deleted": delete_result["deleted"], "file_path": file_path}
|
|
360
|
+
|
|
361
|
+
except Exception as e:
|
|
362
|
+
logger.error(f"Failed to delete vectors for file '{file_path}': {e}")
|
|
363
|
+
raise RuntimeError(f"File vector deletion failed: {e}")
|
|
364
|
+
|
|
182
365
|
def search_with_metadata_filter(
|
|
183
366
|
self,
|
|
184
367
|
query_vector: List[float],
|
|
@@ -186,31 +369,41 @@ class TurbopufferClient:
|
|
|
186
369
|
chunk_type: Optional[str] = None,
|
|
187
370
|
file_path: Optional[str] = None,
|
|
188
371
|
top_k: int = 10,
|
|
189
|
-
**kwargs
|
|
190
|
-
) -> List[
|
|
372
|
+
**kwargs,
|
|
373
|
+
) -> List[Row] | None:
|
|
191
374
|
"""Search vectors with metadata filtering."""
|
|
192
375
|
namespace = self.get_namespace_for_project(project_id)
|
|
193
|
-
|
|
194
|
-
# Build metadata filters
|
|
195
|
-
|
|
376
|
+
|
|
377
|
+
# Build metadata filters using tuple format (compatible with TurboPuffer v0.5+ API)
|
|
378
|
+
filter_conditions = [("project_id", "Eq", project_id)]
|
|
379
|
+
|
|
196
380
|
if chunk_type:
|
|
197
|
-
|
|
381
|
+
filter_conditions.append(("chunk_type", "Eq", chunk_type))
|
|
198
382
|
if file_path:
|
|
199
|
-
|
|
200
|
-
|
|
383
|
+
filter_conditions.append(("file_path", "Eq", file_path))
|
|
384
|
+
|
|
385
|
+
# Use appropriate filter format based on number of conditions
|
|
386
|
+
if len(filter_conditions) == 1:
|
|
387
|
+
# Single condition - use simple tuple format
|
|
388
|
+
filters = filter_conditions[0]
|
|
389
|
+
else:
|
|
390
|
+
# Multiple conditions - use And format
|
|
391
|
+
filters = ("And", filter_conditions)
|
|
392
|
+
|
|
201
393
|
return self.search_vectors(
|
|
202
394
|
query_vector=query_vector,
|
|
203
395
|
top_k=top_k,
|
|
204
396
|
namespace=namespace,
|
|
205
397
|
filters=filters,
|
|
206
|
-
**kwargs
|
|
398
|
+
**kwargs,
|
|
207
399
|
)
|
|
208
400
|
|
|
401
|
+
|
|
209
402
|
def create_turbopuffer_client(config: VectorConfig) -> TurbopufferClient:
|
|
210
403
|
"""Create a Turbopuffer client from configuration."""
|
|
211
404
|
if not config.turbopuffer_api_key:
|
|
212
405
|
raise ValueError("TURBOPUFFER_API_KEY is required for vector storage")
|
|
213
|
-
|
|
406
|
+
|
|
214
407
|
return TurbopufferClient(
|
|
215
408
|
api_key=config.turbopuffer_api_key,
|
|
216
409
|
region=config.turbopuffer_region,
|