agentrun-mem0ai 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentrun_mem0/__init__.py +6 -0
- agentrun_mem0/client/__init__.py +0 -0
- agentrun_mem0/client/main.py +1747 -0
- agentrun_mem0/client/project.py +931 -0
- agentrun_mem0/client/utils.py +115 -0
- agentrun_mem0/configs/__init__.py +0 -0
- agentrun_mem0/configs/base.py +90 -0
- agentrun_mem0/configs/embeddings/__init__.py +0 -0
- agentrun_mem0/configs/embeddings/base.py +110 -0
- agentrun_mem0/configs/enums.py +7 -0
- agentrun_mem0/configs/llms/__init__.py +0 -0
- agentrun_mem0/configs/llms/anthropic.py +56 -0
- agentrun_mem0/configs/llms/aws_bedrock.py +192 -0
- agentrun_mem0/configs/llms/azure.py +57 -0
- agentrun_mem0/configs/llms/base.py +62 -0
- agentrun_mem0/configs/llms/deepseek.py +56 -0
- agentrun_mem0/configs/llms/lmstudio.py +59 -0
- agentrun_mem0/configs/llms/ollama.py +56 -0
- agentrun_mem0/configs/llms/openai.py +79 -0
- agentrun_mem0/configs/llms/vllm.py +56 -0
- agentrun_mem0/configs/prompts.py +459 -0
- agentrun_mem0/configs/rerankers/__init__.py +0 -0
- agentrun_mem0/configs/rerankers/base.py +17 -0
- agentrun_mem0/configs/rerankers/cohere.py +15 -0
- agentrun_mem0/configs/rerankers/config.py +12 -0
- agentrun_mem0/configs/rerankers/huggingface.py +17 -0
- agentrun_mem0/configs/rerankers/llm.py +48 -0
- agentrun_mem0/configs/rerankers/sentence_transformer.py +16 -0
- agentrun_mem0/configs/rerankers/zero_entropy.py +28 -0
- agentrun_mem0/configs/vector_stores/__init__.py +0 -0
- agentrun_mem0/configs/vector_stores/alibabacloud_mysql.py +64 -0
- agentrun_mem0/configs/vector_stores/aliyun_tablestore.py +32 -0
- agentrun_mem0/configs/vector_stores/azure_ai_search.py +57 -0
- agentrun_mem0/configs/vector_stores/azure_mysql.py +84 -0
- agentrun_mem0/configs/vector_stores/baidu.py +27 -0
- agentrun_mem0/configs/vector_stores/chroma.py +58 -0
- agentrun_mem0/configs/vector_stores/databricks.py +61 -0
- agentrun_mem0/configs/vector_stores/elasticsearch.py +65 -0
- agentrun_mem0/configs/vector_stores/faiss.py +37 -0
- agentrun_mem0/configs/vector_stores/langchain.py +30 -0
- agentrun_mem0/configs/vector_stores/milvus.py +42 -0
- agentrun_mem0/configs/vector_stores/mongodb.py +25 -0
- agentrun_mem0/configs/vector_stores/neptune.py +27 -0
- agentrun_mem0/configs/vector_stores/opensearch.py +41 -0
- agentrun_mem0/configs/vector_stores/pgvector.py +52 -0
- agentrun_mem0/configs/vector_stores/pinecone.py +55 -0
- agentrun_mem0/configs/vector_stores/qdrant.py +47 -0
- agentrun_mem0/configs/vector_stores/redis.py +24 -0
- agentrun_mem0/configs/vector_stores/s3_vectors.py +28 -0
- agentrun_mem0/configs/vector_stores/supabase.py +44 -0
- agentrun_mem0/configs/vector_stores/upstash_vector.py +34 -0
- agentrun_mem0/configs/vector_stores/valkey.py +15 -0
- agentrun_mem0/configs/vector_stores/vertex_ai_vector_search.py +28 -0
- agentrun_mem0/configs/vector_stores/weaviate.py +41 -0
- agentrun_mem0/embeddings/__init__.py +0 -0
- agentrun_mem0/embeddings/aws_bedrock.py +100 -0
- agentrun_mem0/embeddings/azure_openai.py +55 -0
- agentrun_mem0/embeddings/base.py +31 -0
- agentrun_mem0/embeddings/configs.py +30 -0
- agentrun_mem0/embeddings/gemini.py +39 -0
- agentrun_mem0/embeddings/huggingface.py +44 -0
- agentrun_mem0/embeddings/langchain.py +35 -0
- agentrun_mem0/embeddings/lmstudio.py +29 -0
- agentrun_mem0/embeddings/mock.py +11 -0
- agentrun_mem0/embeddings/ollama.py +53 -0
- agentrun_mem0/embeddings/openai.py +49 -0
- agentrun_mem0/embeddings/together.py +31 -0
- agentrun_mem0/embeddings/vertexai.py +64 -0
- agentrun_mem0/exceptions.py +503 -0
- agentrun_mem0/graphs/__init__.py +0 -0
- agentrun_mem0/graphs/configs.py +105 -0
- agentrun_mem0/graphs/neptune/__init__.py +0 -0
- agentrun_mem0/graphs/neptune/base.py +497 -0
- agentrun_mem0/graphs/neptune/neptunedb.py +511 -0
- agentrun_mem0/graphs/neptune/neptunegraph.py +474 -0
- agentrun_mem0/graphs/tools.py +371 -0
- agentrun_mem0/graphs/utils.py +97 -0
- agentrun_mem0/llms/__init__.py +0 -0
- agentrun_mem0/llms/anthropic.py +87 -0
- agentrun_mem0/llms/aws_bedrock.py +665 -0
- agentrun_mem0/llms/azure_openai.py +141 -0
- agentrun_mem0/llms/azure_openai_structured.py +91 -0
- agentrun_mem0/llms/base.py +131 -0
- agentrun_mem0/llms/configs.py +34 -0
- agentrun_mem0/llms/deepseek.py +107 -0
- agentrun_mem0/llms/gemini.py +201 -0
- agentrun_mem0/llms/groq.py +88 -0
- agentrun_mem0/llms/langchain.py +94 -0
- agentrun_mem0/llms/litellm.py +87 -0
- agentrun_mem0/llms/lmstudio.py +114 -0
- agentrun_mem0/llms/ollama.py +117 -0
- agentrun_mem0/llms/openai.py +147 -0
- agentrun_mem0/llms/openai_structured.py +52 -0
- agentrun_mem0/llms/sarvam.py +89 -0
- agentrun_mem0/llms/together.py +88 -0
- agentrun_mem0/llms/vllm.py +107 -0
- agentrun_mem0/llms/xai.py +52 -0
- agentrun_mem0/memory/__init__.py +0 -0
- agentrun_mem0/memory/base.py +63 -0
- agentrun_mem0/memory/graph_memory.py +698 -0
- agentrun_mem0/memory/kuzu_memory.py +713 -0
- agentrun_mem0/memory/main.py +2229 -0
- agentrun_mem0/memory/memgraph_memory.py +689 -0
- agentrun_mem0/memory/setup.py +56 -0
- agentrun_mem0/memory/storage.py +218 -0
- agentrun_mem0/memory/telemetry.py +90 -0
- agentrun_mem0/memory/utils.py +208 -0
- agentrun_mem0/proxy/__init__.py +0 -0
- agentrun_mem0/proxy/main.py +189 -0
- agentrun_mem0/reranker/__init__.py +9 -0
- agentrun_mem0/reranker/base.py +20 -0
- agentrun_mem0/reranker/cohere_reranker.py +85 -0
- agentrun_mem0/reranker/huggingface_reranker.py +147 -0
- agentrun_mem0/reranker/llm_reranker.py +142 -0
- agentrun_mem0/reranker/sentence_transformer_reranker.py +107 -0
- agentrun_mem0/reranker/zero_entropy_reranker.py +96 -0
- agentrun_mem0/utils/factory.py +283 -0
- agentrun_mem0/utils/gcp_auth.py +167 -0
- agentrun_mem0/vector_stores/__init__.py +0 -0
- agentrun_mem0/vector_stores/alibabacloud_mysql.py +547 -0
- agentrun_mem0/vector_stores/aliyun_tablestore.py +252 -0
- agentrun_mem0/vector_stores/azure_ai_search.py +396 -0
- agentrun_mem0/vector_stores/azure_mysql.py +463 -0
- agentrun_mem0/vector_stores/baidu.py +368 -0
- agentrun_mem0/vector_stores/base.py +58 -0
- agentrun_mem0/vector_stores/chroma.py +332 -0
- agentrun_mem0/vector_stores/configs.py +67 -0
- agentrun_mem0/vector_stores/databricks.py +761 -0
- agentrun_mem0/vector_stores/elasticsearch.py +237 -0
- agentrun_mem0/vector_stores/faiss.py +479 -0
- agentrun_mem0/vector_stores/langchain.py +180 -0
- agentrun_mem0/vector_stores/milvus.py +250 -0
- agentrun_mem0/vector_stores/mongodb.py +310 -0
- agentrun_mem0/vector_stores/neptune_analytics.py +467 -0
- agentrun_mem0/vector_stores/opensearch.py +292 -0
- agentrun_mem0/vector_stores/pgvector.py +404 -0
- agentrun_mem0/vector_stores/pinecone.py +382 -0
- agentrun_mem0/vector_stores/qdrant.py +270 -0
- agentrun_mem0/vector_stores/redis.py +295 -0
- agentrun_mem0/vector_stores/s3_vectors.py +176 -0
- agentrun_mem0/vector_stores/supabase.py +237 -0
- agentrun_mem0/vector_stores/upstash_vector.py +293 -0
- agentrun_mem0/vector_stores/valkey.py +824 -0
- agentrun_mem0/vector_stores/vertex_ai_vector_search.py +635 -0
- agentrun_mem0/vector_stores/weaviate.py +343 -0
- agentrun_mem0ai-0.0.11.data/data/README.md +205 -0
- agentrun_mem0ai-0.0.11.dist-info/METADATA +277 -0
- agentrun_mem0ai-0.0.11.dist-info/RECORD +150 -0
- agentrun_mem0ai-0.0.11.dist-info/WHEEL +4 -0
- agentrun_mem0ai-0.0.11.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import vecs
|
|
9
|
+
except ImportError:
|
|
10
|
+
raise ImportError("The 'vecs' library is required. Please install it using 'pip install vecs'.")
|
|
11
|
+
|
|
12
|
+
from agentrun_mem0.configs.vector_stores.supabase import IndexMeasure, IndexMethod
|
|
13
|
+
from agentrun_mem0.vector_stores.base import VectorStoreBase
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OutputData(BaseModel):
|
|
19
|
+
id: Optional[str]
|
|
20
|
+
score: Optional[float]
|
|
21
|
+
payload: Optional[dict]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Supabase(VectorStoreBase):
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
connection_string: str,
|
|
28
|
+
collection_name: str,
|
|
29
|
+
embedding_model_dims: int,
|
|
30
|
+
index_method: IndexMethod = IndexMethod.AUTO,
|
|
31
|
+
index_measure: IndexMeasure = IndexMeasure.COSINE,
|
|
32
|
+
):
|
|
33
|
+
"""
|
|
34
|
+
Initialize the Supabase vector store using vecs.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
connection_string (str): PostgreSQL connection string
|
|
38
|
+
collection_name (str): Collection name
|
|
39
|
+
embedding_model_dims (int): Dimension of the embedding vector
|
|
40
|
+
index_method (IndexMethod): Index method to use. Defaults to AUTO.
|
|
41
|
+
index_measure (IndexMeasure): Distance measure to use. Defaults to COSINE.
|
|
42
|
+
"""
|
|
43
|
+
self.db = vecs.create_client(connection_string)
|
|
44
|
+
self.collection_name = collection_name
|
|
45
|
+
self.embedding_model_dims = embedding_model_dims
|
|
46
|
+
self.index_method = index_method
|
|
47
|
+
self.index_measure = index_measure
|
|
48
|
+
|
|
49
|
+
collections = self.list_cols()
|
|
50
|
+
if collection_name not in collections:
|
|
51
|
+
self.create_col(embedding_model_dims)
|
|
52
|
+
|
|
53
|
+
def _preprocess_filters(self, filters: Optional[dict] = None) -> Optional[dict]:
|
|
54
|
+
"""
|
|
55
|
+
Preprocess filters to be compatible with vecs.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
filters (Dict, optional): Filters to preprocess. Multiple filters will be
|
|
59
|
+
combined with AND logic.
|
|
60
|
+
"""
|
|
61
|
+
if filters is None:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
if len(filters) == 1:
|
|
65
|
+
# For single filter, keep the simple format
|
|
66
|
+
key, value = next(iter(filters.items()))
|
|
67
|
+
return {key: {"$eq": value}}
|
|
68
|
+
|
|
69
|
+
# For multiple filters, use $and clause
|
|
70
|
+
return {"$and": [{key: {"$eq": value}} for key, value in filters.items()]}
|
|
71
|
+
|
|
72
|
+
def create_col(self, embedding_model_dims: Optional[int] = None) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Create a new collection with vector support.
|
|
75
|
+
Will also initialize vector search index.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
embedding_model_dims (int, optional): Dimension of the embedding vector.
|
|
79
|
+
If not provided, uses the dimension specified in initialization.
|
|
80
|
+
"""
|
|
81
|
+
dims = embedding_model_dims or self.embedding_model_dims
|
|
82
|
+
if not dims:
|
|
83
|
+
raise ValueError(
|
|
84
|
+
"embedding_model_dims must be provided either during initialization or when creating collection"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
logger.info(f"Creating new collection: {self.collection_name}")
|
|
88
|
+
try:
|
|
89
|
+
self.collection = self.db.get_or_create_collection(name=self.collection_name, dimension=dims)
|
|
90
|
+
self.collection.create_index(method=self.index_method.value, measure=self.index_measure.value)
|
|
91
|
+
logger.info(f"Successfully created collection {self.collection_name} with dimension {dims}")
|
|
92
|
+
except Exception as e:
|
|
93
|
+
logger.error(f"Failed to create collection: {str(e)}")
|
|
94
|
+
raise
|
|
95
|
+
|
|
96
|
+
def insert(
|
|
97
|
+
self, vectors: List[List[float]], payloads: Optional[List[dict]] = None, ids: Optional[List[str]] = None
|
|
98
|
+
):
|
|
99
|
+
"""
|
|
100
|
+
Insert vectors into the collection.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
vectors (List[List[float]]): List of vectors to insert
|
|
104
|
+
payloads (List[Dict], optional): List of payloads corresponding to vectors
|
|
105
|
+
ids (List[str], optional): List of IDs corresponding to vectors
|
|
106
|
+
"""
|
|
107
|
+
logger.info(f"Inserting {len(vectors)} vectors into collection {self.collection_name}")
|
|
108
|
+
|
|
109
|
+
if not ids:
|
|
110
|
+
ids = [str(uuid.uuid4()) for _ in vectors]
|
|
111
|
+
if not payloads:
|
|
112
|
+
payloads = [{} for _ in vectors]
|
|
113
|
+
|
|
114
|
+
records = [(id, vector, payload) for id, vector, payload in zip(ids, vectors, payloads)]
|
|
115
|
+
|
|
116
|
+
self.collection.upsert(records)
|
|
117
|
+
|
|
118
|
+
def search(
|
|
119
|
+
self, query: str, vectors: List[float], limit: int = 5, filters: Optional[dict] = None
|
|
120
|
+
) -> List[OutputData]:
|
|
121
|
+
"""
|
|
122
|
+
Search for similar vectors.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
query (str): Query.
|
|
126
|
+
vectors (List[float]): Query vector.
|
|
127
|
+
limit (int, optional): Number of results to return. Defaults to 5.
|
|
128
|
+
filters (Dict, optional): Filters to apply to the search. Defaults to None.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
List[OutputData]: Search results
|
|
132
|
+
"""
|
|
133
|
+
filters = self._preprocess_filters(filters)
|
|
134
|
+
results = self.collection.query(
|
|
135
|
+
data=vectors, limit=limit, filters=filters, include_metadata=True, include_value=True
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return [OutputData(id=str(result[0]), score=float(result[1]), payload=result[2]) for result in results]
|
|
139
|
+
|
|
140
|
+
def delete(self, vector_id: str):
|
|
141
|
+
"""
|
|
142
|
+
Delete a vector by ID.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
vector_id (str): ID of the vector to delete
|
|
146
|
+
"""
|
|
147
|
+
self.collection.delete([(vector_id,)])
|
|
148
|
+
|
|
149
|
+
def update(self, vector_id: str, vector: Optional[List[float]] = None, payload: Optional[dict] = None):
|
|
150
|
+
"""
|
|
151
|
+
Update a vector and/or its payload.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
vector_id (str): ID of the vector to update
|
|
155
|
+
vector (List[float], optional): Updated vector
|
|
156
|
+
payload (Dict, optional): Updated payload
|
|
157
|
+
"""
|
|
158
|
+
if vector is None:
|
|
159
|
+
# If only updating metadata, we need to get the existing vector
|
|
160
|
+
existing = self.get(vector_id)
|
|
161
|
+
if existing and existing.payload:
|
|
162
|
+
vector = existing.payload.get("vector", [])
|
|
163
|
+
|
|
164
|
+
if vector:
|
|
165
|
+
self.collection.upsert([(vector_id, vector, payload or {})])
|
|
166
|
+
|
|
167
|
+
def get(self, vector_id: str) -> Optional[OutputData]:
|
|
168
|
+
"""
|
|
169
|
+
Retrieve a vector by ID.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
vector_id (str): ID of the vector to retrieve
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Optional[OutputData]: Retrieved vector data or None if not found
|
|
176
|
+
"""
|
|
177
|
+
result = self.collection.fetch([(vector_id,)])
|
|
178
|
+
if not result:
|
|
179
|
+
return []
|
|
180
|
+
|
|
181
|
+
record = result[0]
|
|
182
|
+
return OutputData(id=str(record.id), score=None, payload=record.metadata)
|
|
183
|
+
|
|
184
|
+
def list_cols(self) -> List[str]:
|
|
185
|
+
"""
|
|
186
|
+
List all collections.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
List[str]: List of collection names
|
|
190
|
+
"""
|
|
191
|
+
return self.db.list_collections()
|
|
192
|
+
|
|
193
|
+
def delete_col(self):
|
|
194
|
+
"""Delete the collection."""
|
|
195
|
+
self.db.delete_collection(self.collection_name)
|
|
196
|
+
|
|
197
|
+
def col_info(self) -> dict:
|
|
198
|
+
"""
|
|
199
|
+
Get information about the collection.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Dict: Collection information including name and configuration
|
|
203
|
+
"""
|
|
204
|
+
info = self.collection.describe()
|
|
205
|
+
return {
|
|
206
|
+
"name": info.name,
|
|
207
|
+
"count": info.vectors,
|
|
208
|
+
"dimension": info.dimension,
|
|
209
|
+
"index": {"method": info.index_method, "metric": info.distance_metric},
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
def list(self, filters: Optional[dict] = None, limit: int = 100) -> List[OutputData]:
|
|
213
|
+
"""
|
|
214
|
+
List vectors in the collection.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
filters (Dict, optional): Filters to apply
|
|
218
|
+
limit (int, optional): Maximum number of results to return. Defaults to 100.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
List[OutputData]: List of vectors
|
|
222
|
+
"""
|
|
223
|
+
filters = self._preprocess_filters(filters)
|
|
224
|
+
query = [0] * self.embedding_model_dims
|
|
225
|
+
ids = self.collection.query(
|
|
226
|
+
data=query, limit=limit, filters=filters, include_metadata=True, include_value=False
|
|
227
|
+
)
|
|
228
|
+
ids = [id[0] for id in ids]
|
|
229
|
+
records = self.collection.fetch(ids=ids)
|
|
230
|
+
|
|
231
|
+
return [[OutputData(id=str(record[0]), score=None, payload=record[2]) for record in records]]
|
|
232
|
+
|
|
233
|
+
def reset(self):
|
|
234
|
+
"""Reset the index by deleting and recreating it."""
|
|
235
|
+
logger.warning(f"Resetting index {self.collection_name}...")
|
|
236
|
+
self.delete_col()
|
|
237
|
+
self.create_col(self.embedding_model_dims)
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from agentrun_mem0.vector_stores.base import VectorStoreBase
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from upstash_vector import Index
|
|
10
|
+
except ImportError:
|
|
11
|
+
raise ImportError("The 'upstash_vector' library is required. Please install it using 'pip install upstash_vector'.")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OutputData(BaseModel):
|
|
18
|
+
id: Optional[str] # memory id
|
|
19
|
+
score: Optional[float] # is None for `get` method
|
|
20
|
+
payload: Optional[Dict] # metadata
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UpstashVector(VectorStoreBase):
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
collection_name: str,
|
|
27
|
+
url: Optional[str] = None,
|
|
28
|
+
token: Optional[str] = None,
|
|
29
|
+
client: Optional[Index] = None,
|
|
30
|
+
enable_embeddings: bool = False,
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
Initialize the UpstashVector vector store.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
url (str, optional): URL for Upstash Vector index. Defaults to None.
|
|
37
|
+
token (int, optional): Token for Upstash Vector index. Defaults to None.
|
|
38
|
+
client (Index, optional): Existing `upstash_vector.Index` client instance. Defaults to None.
|
|
39
|
+
namespace (str, optional): Default namespace for the index. Defaults to None.
|
|
40
|
+
"""
|
|
41
|
+
if client:
|
|
42
|
+
self.client = client
|
|
43
|
+
elif url and token:
|
|
44
|
+
self.client = Index(url, token)
|
|
45
|
+
else:
|
|
46
|
+
raise ValueError("Either a client or URL and token must be provided.")
|
|
47
|
+
|
|
48
|
+
self.collection_name = collection_name
|
|
49
|
+
|
|
50
|
+
self.enable_embeddings = enable_embeddings
|
|
51
|
+
|
|
52
|
+
def insert(
|
|
53
|
+
self,
|
|
54
|
+
vectors: List[list],
|
|
55
|
+
payloads: Optional[List[Dict]] = None,
|
|
56
|
+
ids: Optional[List[str]] = None,
|
|
57
|
+
):
|
|
58
|
+
"""
|
|
59
|
+
Insert vectors
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
vectors (list): List of vectors to insert.
|
|
63
|
+
payloads (list, optional): List of payloads corresponding to vectors. These will be passed as metadatas to the Upstash Vector client. Defaults to None.
|
|
64
|
+
ids (list, optional): List of IDs corresponding to vectors. Defaults to None.
|
|
65
|
+
"""
|
|
66
|
+
logger.info(f"Inserting {len(vectors)} vectors into namespace {self.collection_name}")
|
|
67
|
+
|
|
68
|
+
if self.enable_embeddings:
|
|
69
|
+
if not payloads or any("data" not in m or m["data"] is None for m in payloads):
|
|
70
|
+
raise ValueError("When embeddings are enabled, all payloads must contain a 'data' field.")
|
|
71
|
+
processed_vectors = [
|
|
72
|
+
{
|
|
73
|
+
"id": ids[i] if ids else None,
|
|
74
|
+
"data": payloads[i]["data"],
|
|
75
|
+
"metadata": payloads[i],
|
|
76
|
+
}
|
|
77
|
+
for i, v in enumerate(vectors)
|
|
78
|
+
]
|
|
79
|
+
else:
|
|
80
|
+
processed_vectors = [
|
|
81
|
+
{
|
|
82
|
+
"id": ids[i] if ids else None,
|
|
83
|
+
"vector": vectors[i],
|
|
84
|
+
"metadata": payloads[i] if payloads else None,
|
|
85
|
+
}
|
|
86
|
+
for i, v in enumerate(vectors)
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
self.client.upsert(
|
|
90
|
+
vectors=processed_vectors,
|
|
91
|
+
namespace=self.collection_name,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def _stringify(self, x):
|
|
95
|
+
return f'"{x}"' if isinstance(x, str) else x
|
|
96
|
+
|
|
97
|
+
def search(
|
|
98
|
+
self,
|
|
99
|
+
query: str,
|
|
100
|
+
vectors: List[list],
|
|
101
|
+
limit: int = 5,
|
|
102
|
+
filters: Optional[Dict] = None,
|
|
103
|
+
) -> List[OutputData]:
|
|
104
|
+
"""
|
|
105
|
+
Search for similar vectors.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
query (list): Query vector.
|
|
109
|
+
limit (int, optional): Number of results to return. Defaults to 5.
|
|
110
|
+
filters (Dict, optional): Filters to apply to the search.
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List[OutputData]: Search results.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
filters_str = " AND ".join([f"{k} = {self._stringify(v)}" for k, v in filters.items()]) if filters else None
|
|
117
|
+
|
|
118
|
+
response = []
|
|
119
|
+
|
|
120
|
+
if self.enable_embeddings:
|
|
121
|
+
response = self.client.query(
|
|
122
|
+
data=query,
|
|
123
|
+
top_k=limit,
|
|
124
|
+
filter=filters_str or "",
|
|
125
|
+
include_metadata=True,
|
|
126
|
+
namespace=self.collection_name,
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
queries = [
|
|
130
|
+
{
|
|
131
|
+
"vector": v,
|
|
132
|
+
"top_k": limit,
|
|
133
|
+
"filter": filters_str or "",
|
|
134
|
+
"include_metadata": True,
|
|
135
|
+
"namespace": self.collection_name,
|
|
136
|
+
}
|
|
137
|
+
for v in vectors
|
|
138
|
+
]
|
|
139
|
+
responses = self.client.query_many(queries=queries)
|
|
140
|
+
# flatten
|
|
141
|
+
response = [res for res_list in responses for res in res_list]
|
|
142
|
+
|
|
143
|
+
return [
|
|
144
|
+
OutputData(
|
|
145
|
+
id=res.id,
|
|
146
|
+
score=res.score,
|
|
147
|
+
payload=res.metadata,
|
|
148
|
+
)
|
|
149
|
+
for res in response
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
def delete(self, vector_id: int):
|
|
153
|
+
"""
|
|
154
|
+
Delete a vector by ID.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
vector_id (int): ID of the vector to delete.
|
|
158
|
+
"""
|
|
159
|
+
self.client.delete(
|
|
160
|
+
ids=[str(vector_id)],
|
|
161
|
+
namespace=self.collection_name,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
def update(
|
|
165
|
+
self,
|
|
166
|
+
vector_id: int,
|
|
167
|
+
vector: Optional[list] = None,
|
|
168
|
+
payload: Optional[dict] = None,
|
|
169
|
+
):
|
|
170
|
+
"""
|
|
171
|
+
Update a vector and its payload.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
vector_id (int): ID of the vector to update.
|
|
175
|
+
vector (list, optional): Updated vector. Defaults to None.
|
|
176
|
+
payload (dict, optional): Updated payload. Defaults to None.
|
|
177
|
+
"""
|
|
178
|
+
self.client.update(
|
|
179
|
+
id=str(vector_id),
|
|
180
|
+
vector=vector,
|
|
181
|
+
data=payload.get("data") if payload else None,
|
|
182
|
+
metadata=payload,
|
|
183
|
+
namespace=self.collection_name,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def get(self, vector_id: int) -> Optional[OutputData]:
|
|
187
|
+
"""
|
|
188
|
+
Retrieve a vector by ID.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
vector_id (int): ID of the vector to retrieve.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
dict: Retrieved vector.
|
|
195
|
+
"""
|
|
196
|
+
response = self.client.fetch(
|
|
197
|
+
ids=[str(vector_id)],
|
|
198
|
+
namespace=self.collection_name,
|
|
199
|
+
include_metadata=True,
|
|
200
|
+
)
|
|
201
|
+
if len(response) == 0:
|
|
202
|
+
return None
|
|
203
|
+
vector = response[0]
|
|
204
|
+
if not vector:
|
|
205
|
+
return None
|
|
206
|
+
return OutputData(id=vector.id, score=None, payload=vector.metadata)
|
|
207
|
+
|
|
208
|
+
def list(self, filters: Optional[Dict] = None, limit: int = 100) -> List[List[OutputData]]:
|
|
209
|
+
"""
|
|
210
|
+
List all memories.
|
|
211
|
+
Args:
|
|
212
|
+
filters (Dict, optional): Filters to apply to the search. Defaults to None.
|
|
213
|
+
limit (int, optional): Number of results to return. Defaults to 100.
|
|
214
|
+
Returns:
|
|
215
|
+
List[OutputData]: Search results.
|
|
216
|
+
"""
|
|
217
|
+
filters_str = " AND ".join([f"{k} = {self._stringify(v)}" for k, v in filters.items()]) if filters else None
|
|
218
|
+
|
|
219
|
+
info = self.client.info()
|
|
220
|
+
ns_info = info.namespaces.get(self.collection_name)
|
|
221
|
+
|
|
222
|
+
if not ns_info or ns_info.vector_count == 0:
|
|
223
|
+
return [[]]
|
|
224
|
+
|
|
225
|
+
random_vector = [1.0] * self.client.info().dimension
|
|
226
|
+
|
|
227
|
+
results, query = self.client.resumable_query(
|
|
228
|
+
vector=random_vector,
|
|
229
|
+
filter=filters_str or "",
|
|
230
|
+
include_metadata=True,
|
|
231
|
+
namespace=self.collection_name,
|
|
232
|
+
top_k=100,
|
|
233
|
+
)
|
|
234
|
+
with query:
|
|
235
|
+
while True:
|
|
236
|
+
if len(results) >= limit:
|
|
237
|
+
break
|
|
238
|
+
res = query.fetch_next(100)
|
|
239
|
+
if not res:
|
|
240
|
+
break
|
|
241
|
+
results.extend(res)
|
|
242
|
+
|
|
243
|
+
parsed_result = [
|
|
244
|
+
OutputData(
|
|
245
|
+
id=res.id,
|
|
246
|
+
score=res.score,
|
|
247
|
+
payload=res.metadata,
|
|
248
|
+
)
|
|
249
|
+
for res in results
|
|
250
|
+
]
|
|
251
|
+
return [parsed_result]
|
|
252
|
+
|
|
253
|
+
def create_col(self, name, vector_size, distance):
|
|
254
|
+
"""
|
|
255
|
+
Upstash Vector has namespaces instead of collections. A namespace is created when the first vector is inserted.
|
|
256
|
+
|
|
257
|
+
This method is a placeholder to maintain the interface.
|
|
258
|
+
"""
|
|
259
|
+
pass
|
|
260
|
+
|
|
261
|
+
def list_cols(self) -> List[str]:
|
|
262
|
+
"""
|
|
263
|
+
Lists all namespaces in the Upstash Vector index.
|
|
264
|
+
Returns:
|
|
265
|
+
List[str]: List of namespaces.
|
|
266
|
+
"""
|
|
267
|
+
return self.client.list_namespaces()
|
|
268
|
+
|
|
269
|
+
def delete_col(self):
|
|
270
|
+
"""
|
|
271
|
+
Delete the namespace and all vectors in it.
|
|
272
|
+
"""
|
|
273
|
+
self.client.reset(namespace=self.collection_name)
|
|
274
|
+
pass
|
|
275
|
+
|
|
276
|
+
def col_info(self):
|
|
277
|
+
"""
|
|
278
|
+
Return general information about the Upstash Vector index.
|
|
279
|
+
|
|
280
|
+
- Total number of vectors across all namespaces
|
|
281
|
+
- Total number of vectors waiting to be indexed across all namespaces
|
|
282
|
+
- Total size of the index on disk in bytes
|
|
283
|
+
- Vector dimension
|
|
284
|
+
- Similarity function used
|
|
285
|
+
- Per-namespace vector and pending vector counts
|
|
286
|
+
"""
|
|
287
|
+
return self.client.info()
|
|
288
|
+
|
|
289
|
+
def reset(self):
|
|
290
|
+
"""
|
|
291
|
+
Reset the Upstash Vector index.
|
|
292
|
+
"""
|
|
293
|
+
self.delete_col()
|