letta-nightly 0.6.2.dev20241210030340__py3-none-any.whl → 0.6.2.dev20241211031658__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +32 -43
- letta/agent_store/db.py +12 -54
- letta/agent_store/storage.py +10 -9
- letta/cli/cli.py +1 -0
- letta/client/client.py +4 -3
- letta/config.py +2 -2
- letta/data_sources/connectors.py +4 -3
- letta/embeddings.py +29 -9
- letta/functions/function_sets/base.py +36 -11
- letta/metadata.py +13 -2
- letta/o1_agent.py +2 -3
- letta/offline_memory_agent.py +2 -1
- letta/orm/__init__.py +1 -0
- letta/orm/file.py +1 -0
- letta/orm/mixins.py +12 -2
- letta/orm/organization.py +3 -0
- letta/orm/passage.py +72 -0
- letta/orm/sqlalchemy_base.py +66 -10
- letta/orm/sqlite_functions.py +140 -0
- letta/orm/user.py +1 -1
- letta/schemas/agent.py +4 -3
- letta/schemas/letta_message.py +5 -1
- letta/schemas/letta_request.py +3 -3
- letta/schemas/passage.py +6 -4
- letta/schemas/sandbox_config.py +1 -0
- letta/schemas/tool_rule.py +0 -3
- letta/server/rest_api/app.py +34 -12
- letta/server/rest_api/routers/v1/agents.py +20 -7
- letta/server/server.py +76 -52
- letta/server/static_files/assets/{index-4848e3d7.js → index-048c9598.js} +1 -1
- letta/server/static_files/assets/{index-43ab4d62.css → index-0e31b727.css} +1 -1
- letta/server/static_files/index.html +2 -2
- letta/services/message_manager.py +3 -0
- letta/services/passage_manager.py +225 -0
- letta/services/source_manager.py +2 -1
- letta/services/tool_execution_sandbox.py +19 -7
- letta/settings.py +2 -0
- {letta_nightly-0.6.2.dev20241210030340.dist-info → letta_nightly-0.6.2.dev20241211031658.dist-info}/METADATA +10 -15
- {letta_nightly-0.6.2.dev20241210030340.dist-info → letta_nightly-0.6.2.dev20241211031658.dist-info}/RECORD +42 -40
- letta/agent_store/chroma.py +0 -297
- {letta_nightly-0.6.2.dev20241210030340.dist-info → letta_nightly-0.6.2.dev20241211031658.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.2.dev20241210030340.dist-info → letta_nightly-0.6.2.dev20241211031658.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.2.dev20241210030340.dist-info → letta_nightly-0.6.2.dev20241211031658.dist-info}/entry_points.txt +0 -0
letta/agent_store/chroma.py
DELETED
|
@@ -1,297 +0,0 @@
|
|
|
1
|
-
from typing import Dict, List, Optional, Tuple, cast
|
|
2
|
-
|
|
3
|
-
import chromadb
|
|
4
|
-
from chromadb.api.types import Include
|
|
5
|
-
|
|
6
|
-
from letta.agent_store.storage import StorageConnector, TableType
|
|
7
|
-
from letta.config import LettaConfig
|
|
8
|
-
from letta.schemas.embedding_config import EmbeddingConfig
|
|
9
|
-
from letta.schemas.passage import Passage
|
|
10
|
-
from letta.utils import datetime_to_timestamp, printd, timestamp_to_datetime
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class ChromaStorageConnector(StorageConnector):
|
|
14
|
-
"""Storage via Chroma"""
|
|
15
|
-
|
|
16
|
-
# WARNING: This is not thread safe. Do NOT do concurrent access to the same collection.
|
|
17
|
-
# Timestamps are converted to integer timestamps for chroma (datetime not supported)
|
|
18
|
-
|
|
19
|
-
def __init__(self, table_type: str, config: LettaConfig, user_id, agent_id=None):
|
|
20
|
-
super().__init__(table_type=table_type, config=config, user_id=user_id, agent_id=agent_id)
|
|
21
|
-
|
|
22
|
-
assert table_type == TableType.ARCHIVAL_MEMORY or table_type == TableType.PASSAGES, "Chroma only supports archival memory"
|
|
23
|
-
|
|
24
|
-
# create chroma client
|
|
25
|
-
if config.archival_storage_path:
|
|
26
|
-
self.client = chromadb.PersistentClient(config.archival_storage_path)
|
|
27
|
-
else:
|
|
28
|
-
# assume uri={ip}:{port}
|
|
29
|
-
ip = config.archival_storage_uri.split(":")[0]
|
|
30
|
-
port = config.archival_storage_uri.split(":")[1]
|
|
31
|
-
self.client = chromadb.HttpClient(host=ip, port=port)
|
|
32
|
-
|
|
33
|
-
# get a collection or create if it doesn't exist already
|
|
34
|
-
self.collection = self.client.get_or_create_collection(self.table_name)
|
|
35
|
-
self.include: Include = ["documents", "embeddings", "metadatas"]
|
|
36
|
-
|
|
37
|
-
def get_filters(self, filters: Optional[Dict] = {}) -> Tuple[list, dict]:
|
|
38
|
-
# get all filters for query
|
|
39
|
-
if filters is not None:
|
|
40
|
-
filter_conditions = {**self.filters, **filters}
|
|
41
|
-
else:
|
|
42
|
-
filter_conditions = self.filters
|
|
43
|
-
|
|
44
|
-
# convert to chroma format
|
|
45
|
-
chroma_filters = []
|
|
46
|
-
ids = []
|
|
47
|
-
for key, value in filter_conditions.items():
|
|
48
|
-
# filter by id
|
|
49
|
-
if key == "id":
|
|
50
|
-
ids = [str(value)]
|
|
51
|
-
continue
|
|
52
|
-
|
|
53
|
-
# filter by other keys
|
|
54
|
-
chroma_filters.append({key: {"$eq": value}})
|
|
55
|
-
|
|
56
|
-
if len(chroma_filters) > 1:
|
|
57
|
-
chroma_filters = {"$and": chroma_filters}
|
|
58
|
-
elif len(chroma_filters) == 0:
|
|
59
|
-
chroma_filters = {}
|
|
60
|
-
else:
|
|
61
|
-
chroma_filters = chroma_filters[0]
|
|
62
|
-
return ids, chroma_filters
|
|
63
|
-
|
|
64
|
-
def get_all_paginated(self, filters: Optional[Dict] = {}, page_size: int = 1000, offset: int = 0):
|
|
65
|
-
ids, filters = self.get_filters(filters)
|
|
66
|
-
while True:
|
|
67
|
-
# Retrieve a chunk of records with the given page_size
|
|
68
|
-
results = self.collection.get(ids=ids, offset=offset, limit=page_size, include=self.include, where=filters)
|
|
69
|
-
|
|
70
|
-
# If the chunk is empty, we've retrieved all records
|
|
71
|
-
assert results["embeddings"] is not None, f"results['embeddings'] was None"
|
|
72
|
-
if len(results["embeddings"]) == 0:
|
|
73
|
-
break
|
|
74
|
-
|
|
75
|
-
# Yield a list of Record objects converted from the chunk
|
|
76
|
-
yield self.results_to_records(results)
|
|
77
|
-
|
|
78
|
-
# Increment the offset to get the next chunk in the next iteration
|
|
79
|
-
offset += page_size
|
|
80
|
-
|
|
81
|
-
def results_to_records(self, results):
|
|
82
|
-
# convert timestamps to datetime
|
|
83
|
-
for metadata in results["metadatas"]:
|
|
84
|
-
if "created_at" in metadata:
|
|
85
|
-
metadata["created_at"] = timestamp_to_datetime(metadata["created_at"])
|
|
86
|
-
if results["embeddings"]: # may not be returned, depending on table type
|
|
87
|
-
passages = []
|
|
88
|
-
for text, record_id, embedding, metadata in zip(
|
|
89
|
-
results["documents"], results["ids"], results["embeddings"], results["metadatas"]
|
|
90
|
-
):
|
|
91
|
-
args = {}
|
|
92
|
-
for field in EmbeddingConfig.__fields__.keys():
|
|
93
|
-
if field in metadata:
|
|
94
|
-
args[field] = metadata[field]
|
|
95
|
-
del metadata[field]
|
|
96
|
-
embedding_config = EmbeddingConfig(**args)
|
|
97
|
-
passages.append(Passage(text=text, embedding=embedding, id=record_id, embedding_config=embedding_config, **metadata))
|
|
98
|
-
# return [
|
|
99
|
-
# Passage(text=text, embedding=embedding, id=record_id, embedding_config=EmbeddingConfig(), **metadatas)
|
|
100
|
-
# for (text, record_id, embedding, metadatas) in zip(
|
|
101
|
-
# results["documents"], results["ids"], results["embeddings"], results["metadatas"]
|
|
102
|
-
# )
|
|
103
|
-
# ]
|
|
104
|
-
return passages
|
|
105
|
-
else:
|
|
106
|
-
# no embeddings
|
|
107
|
-
passages = []
|
|
108
|
-
for text, id, metadata in zip(results["documents"], results["ids"], results["metadatas"]):
|
|
109
|
-
args = {}
|
|
110
|
-
for field in EmbeddingConfig.__fields__.keys():
|
|
111
|
-
if field in metadata:
|
|
112
|
-
args[field] = metadata[field]
|
|
113
|
-
del metadata[field]
|
|
114
|
-
embedding_config = EmbeddingConfig(**args)
|
|
115
|
-
passages.append(Passage(text=text, embedding=None, id=id, embedding_config=embedding_config, **metadata))
|
|
116
|
-
return passages
|
|
117
|
-
|
|
118
|
-
# return [
|
|
119
|
-
# #cast(Passage, self.type(text=text, id=uuid.UUID(id), **metadatas)) # type: ignore
|
|
120
|
-
# Passage(text=text, embedding=None, id=id, **metadatas)
|
|
121
|
-
# for (text, id, metadatas) in zip(results["documents"], results["ids"], results["metadatas"])
|
|
122
|
-
# ]
|
|
123
|
-
|
|
124
|
-
def get_all(self, filters: Optional[Dict] = {}, limit=None):
|
|
125
|
-
ids, filters = self.get_filters(filters)
|
|
126
|
-
if self.collection.count() == 0:
|
|
127
|
-
return []
|
|
128
|
-
if ids == []:
|
|
129
|
-
ids = None
|
|
130
|
-
if limit:
|
|
131
|
-
results = self.collection.get(ids=ids, include=self.include, where=filters, limit=limit)
|
|
132
|
-
else:
|
|
133
|
-
results = self.collection.get(ids=ids, include=self.include, where=filters)
|
|
134
|
-
return self.results_to_records(results)
|
|
135
|
-
|
|
136
|
-
def get(self, id: str):
|
|
137
|
-
results = self.collection.get(ids=[str(id)])
|
|
138
|
-
if len(results["ids"]) == 0:
|
|
139
|
-
return None
|
|
140
|
-
return self.results_to_records(results)[0]
|
|
141
|
-
|
|
142
|
-
def format_records(self, records):
|
|
143
|
-
assert all([isinstance(r, Passage) for r in records])
|
|
144
|
-
|
|
145
|
-
recs = []
|
|
146
|
-
ids = []
|
|
147
|
-
documents = []
|
|
148
|
-
embeddings = []
|
|
149
|
-
|
|
150
|
-
# de-duplication of ids
|
|
151
|
-
exist_ids = set()
|
|
152
|
-
for i in range(len(records)):
|
|
153
|
-
record = records[i]
|
|
154
|
-
if record.id in exist_ids:
|
|
155
|
-
continue
|
|
156
|
-
exist_ids.add(record.id)
|
|
157
|
-
recs.append(cast(Passage, record))
|
|
158
|
-
ids.append(str(record.id))
|
|
159
|
-
documents.append(record.text)
|
|
160
|
-
embeddings.append(record.embedding)
|
|
161
|
-
|
|
162
|
-
# collect/format record metadata
|
|
163
|
-
metadatas = []
|
|
164
|
-
for record in recs:
|
|
165
|
-
embedding_config = vars(record.embedding_config)
|
|
166
|
-
metadata = vars(record)
|
|
167
|
-
metadata.pop("id")
|
|
168
|
-
metadata.pop("text")
|
|
169
|
-
metadata.pop("embedding")
|
|
170
|
-
metadata.pop("embedding_config")
|
|
171
|
-
metadata.pop("metadata_")
|
|
172
|
-
if "created_at" in metadata:
|
|
173
|
-
metadata["created_at"] = datetime_to_timestamp(metadata["created_at"])
|
|
174
|
-
if "metadata_" in metadata and metadata["metadata_"] is not None:
|
|
175
|
-
record_metadata = dict(metadata["metadata_"])
|
|
176
|
-
metadata.pop("metadata_")
|
|
177
|
-
else:
|
|
178
|
-
record_metadata = {}
|
|
179
|
-
|
|
180
|
-
metadata = {**metadata, **record_metadata} # merge with metadata
|
|
181
|
-
metadata = {**metadata, **embedding_config} # merge with embedding config
|
|
182
|
-
metadata = {key: value for key, value in metadata.items() if value is not None} # null values not allowed
|
|
183
|
-
|
|
184
|
-
# convert uuids to strings
|
|
185
|
-
metadatas.append(metadata)
|
|
186
|
-
return ids, documents, embeddings, metadatas
|
|
187
|
-
|
|
188
|
-
def insert(self, record):
|
|
189
|
-
ids, documents, embeddings, metadatas = self.format_records([record])
|
|
190
|
-
if any([e is None for e in embeddings]):
|
|
191
|
-
raise ValueError("Embeddings must be provided to chroma")
|
|
192
|
-
self.collection.upsert(documents=documents, embeddings=[e for e in embeddings if e is not None], ids=ids, metadatas=metadatas)
|
|
193
|
-
|
|
194
|
-
def insert_many(self, records, show_progress=False):
|
|
195
|
-
ids, documents, embeddings, metadatas = self.format_records(records)
|
|
196
|
-
if any([e is None for e in embeddings]):
|
|
197
|
-
raise ValueError("Embeddings must be provided to chroma")
|
|
198
|
-
self.collection.upsert(documents=documents, embeddings=[e for e in embeddings if e is not None], ids=ids, metadatas=metadatas)
|
|
199
|
-
|
|
200
|
-
def delete(self, filters: Optional[Dict] = {}):
|
|
201
|
-
ids, filters = self.get_filters(filters)
|
|
202
|
-
self.collection.delete(ids=ids, where=filters)
|
|
203
|
-
|
|
204
|
-
def delete_table(self):
|
|
205
|
-
# drop collection
|
|
206
|
-
self.client.delete_collection(self.collection.name)
|
|
207
|
-
|
|
208
|
-
def save(self):
|
|
209
|
-
# save to persistence file (nothing needs to be done)
|
|
210
|
-
printd("Saving chroma")
|
|
211
|
-
|
|
212
|
-
def size(self, filters: Optional[Dict] = {}) -> int:
|
|
213
|
-
# unfortuantely, need to use pagination to get filtering
|
|
214
|
-
# warning: poor performance for large datasets
|
|
215
|
-
return len(self.get_all(filters=filters))
|
|
216
|
-
|
|
217
|
-
def list_data_sources(self):
|
|
218
|
-
raise NotImplementedError
|
|
219
|
-
|
|
220
|
-
def query(self, query: str, query_vec: List[float], top_k: int = 10, filters: Optional[Dict] = {}):
|
|
221
|
-
ids, filters = self.get_filters(filters)
|
|
222
|
-
results = self.collection.query(query_embeddings=[query_vec], n_results=top_k, include=self.include, where=filters)
|
|
223
|
-
|
|
224
|
-
# flatten, since we only have one query vector
|
|
225
|
-
flattened_results = {}
|
|
226
|
-
for key, value in results.items():
|
|
227
|
-
if value:
|
|
228
|
-
# value is an Optional[List] type according to chromadb.api.types
|
|
229
|
-
flattened_results[key] = value[0] # type: ignore
|
|
230
|
-
assert len(value) == 1, f"Value is size {len(value)}: {value}" # type: ignore
|
|
231
|
-
else:
|
|
232
|
-
flattened_results[key] = value
|
|
233
|
-
|
|
234
|
-
return self.results_to_records(flattened_results)
|
|
235
|
-
|
|
236
|
-
def query_date(self, start_date, end_date, start=None, count=None):
|
|
237
|
-
raise ValueError("Cannot run query_date with chroma")
|
|
238
|
-
# filters = self.get_filters(filters)
|
|
239
|
-
# filters["created_at"] = {
|
|
240
|
-
# "$gte": start_date,
|
|
241
|
-
# "$lte": end_date,
|
|
242
|
-
# }
|
|
243
|
-
# results = self.collection.query(where=filters)
|
|
244
|
-
# start = 0 if start is None else start
|
|
245
|
-
# count = len(results) if count is None else count
|
|
246
|
-
# results = results[start : start + count]
|
|
247
|
-
# return self.results_to_records(results)
|
|
248
|
-
|
|
249
|
-
def query_text(self, query, count=None, start=None, filters: Optional[Dict] = {}):
|
|
250
|
-
raise ValueError("Cannot run query_text with chroma")
|
|
251
|
-
# filters = self.get_filters(filters)
|
|
252
|
-
# results = self.collection.query(where_document={"$contains": {"text": query}}, where=filters)
|
|
253
|
-
# start = 0 if start is None else start
|
|
254
|
-
# count = len(results) if count is None else count
|
|
255
|
-
# results = results[start : start + count]
|
|
256
|
-
# return self.results_to_records(results)
|
|
257
|
-
|
|
258
|
-
def get_all_cursor(
|
|
259
|
-
self,
|
|
260
|
-
filters: Optional[Dict] = {},
|
|
261
|
-
after: str = None,
|
|
262
|
-
before: str = None,
|
|
263
|
-
limit: Optional[int] = 1000,
|
|
264
|
-
order_by: str = "created_at",
|
|
265
|
-
reverse: bool = False,
|
|
266
|
-
):
|
|
267
|
-
records = self.get_all(filters=filters)
|
|
268
|
-
|
|
269
|
-
# WARNING: very hacky and slow implementation
|
|
270
|
-
def get_index(id, record_list):
|
|
271
|
-
for i in range(len(record_list)):
|
|
272
|
-
if record_list[i].id == id:
|
|
273
|
-
return i
|
|
274
|
-
assert False, f"Could not find id {id} in record list"
|
|
275
|
-
|
|
276
|
-
# sort by custom field
|
|
277
|
-
records = sorted(records, key=lambda x: getattr(x, order_by), reverse=reverse)
|
|
278
|
-
if after:
|
|
279
|
-
index = get_index(after, records)
|
|
280
|
-
if index + 1 >= len(records):
|
|
281
|
-
return None, []
|
|
282
|
-
records = records[index + 1 :]
|
|
283
|
-
if before:
|
|
284
|
-
index = get_index(before, records)
|
|
285
|
-
if index == 0:
|
|
286
|
-
return None, []
|
|
287
|
-
|
|
288
|
-
# TODO: not sure if this is correct
|
|
289
|
-
records = records[:index]
|
|
290
|
-
|
|
291
|
-
if len(records) == 0:
|
|
292
|
-
return None, []
|
|
293
|
-
|
|
294
|
-
# enforce limit
|
|
295
|
-
if limit:
|
|
296
|
-
records = records[:limit]
|
|
297
|
-
return records[-1].id, records
|
|
File without changes
|
|
File without changes
|
|
File without changes
|