letta-nightly 0.4.1.dev20241013104006__py3-none-any.whl → 0.5.0.dev20241015014828__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +2 -2
- letta/agent.py +51 -65
- letta/agent_store/db.py +18 -7
- letta/agent_store/lancedb.py +2 -2
- letta/agent_store/milvus.py +1 -1
- letta/agent_store/qdrant.py +1 -1
- letta/agent_store/storage.py +12 -10
- letta/cli/cli_load.py +1 -1
- letta/client/client.py +51 -0
- letta/data_sources/connectors.py +124 -124
- letta/data_sources/connectors_helper.py +97 -0
- letta/llm_api/mistral.py +47 -0
- letta/main.py +19 -9
- letta/metadata.py +58 -0
- letta/providers.py +44 -0
- letta/schemas/file.py +31 -0
- letta/schemas/job.py +1 -1
- letta/schemas/letta_request.py +3 -3
- letta/schemas/llm_config.py +1 -0
- letta/schemas/message.py +6 -2
- letta/schemas/passage.py +3 -3
- letta/schemas/source.py +2 -2
- letta/server/rest_api/routers/v1/agents.py +10 -16
- letta/server/rest_api/routers/v1/jobs.py +17 -1
- letta/server/rest_api/routers/v1/sources.py +7 -9
- letta/server/server.py +137 -24
- letta/server/static_files/assets/{index-9a9c449b.js → index-dc228d4a.js} +4 -4
- letta/server/static_files/index.html +1 -1
- {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/METADATA +1 -1
- {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/RECORD +33 -31
- letta/schemas/document.py +0 -21
- {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/LICENSE +0 -0
- {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/WHEEL +0 -0
- {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/entry_points.txt +0 -0
letta/data_sources/connectors.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
from typing import Dict, Iterator, List,
|
|
1
|
+
from typing import Dict, Iterator, List, Tuple
|
|
2
2
|
|
|
3
3
|
import typer
|
|
4
|
-
from llama_index.core import Document as LlamaIndexDocument
|
|
5
4
|
|
|
6
5
|
from letta.agent_store.storage import StorageConnector
|
|
6
|
+
from letta.data_sources.connectors_helper import (
|
|
7
|
+
assert_all_files_exist_locally,
|
|
8
|
+
extract_metadata_from_files,
|
|
9
|
+
get_filenames_in_dir,
|
|
10
|
+
)
|
|
7
11
|
from letta.embeddings import embedding_model
|
|
8
|
-
from letta.schemas.
|
|
12
|
+
from letta.schemas.file import FileMetadata
|
|
9
13
|
from letta.schemas.passage import Passage
|
|
10
14
|
from letta.schemas.source import Source
|
|
11
15
|
from letta.utils import create_uuid_from_string
|
|
@@ -13,23 +17,23 @@ from letta.utils import create_uuid_from_string
|
|
|
13
17
|
|
|
14
18
|
class DataConnector:
|
|
15
19
|
"""
|
|
16
|
-
Base class for data connectors that can be extended to generate
|
|
20
|
+
Base class for data connectors that can be extended to generate files and passages from a custom data source.
|
|
17
21
|
"""
|
|
18
22
|
|
|
19
|
-
def
|
|
23
|
+
def find_files(self, source: Source) -> Iterator[FileMetadata]:
|
|
20
24
|
"""
|
|
21
|
-
Generate
|
|
25
|
+
Generate file metadata from a data source.
|
|
22
26
|
|
|
23
27
|
Returns:
|
|
24
|
-
|
|
28
|
+
files (Iterator[FileMetadata]): Generate file metadata for each file found.
|
|
25
29
|
"""
|
|
26
30
|
|
|
27
|
-
def generate_passages(self,
|
|
31
|
+
def generate_passages(self, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
|
|
28
32
|
"""
|
|
29
|
-
Generate passage text and metadata from a list of
|
|
33
|
+
Generate passage text and metadata from a list of files.
|
|
30
34
|
|
|
31
35
|
Args:
|
|
32
|
-
|
|
36
|
+
file (FileMetadata): The document to generate passages from.
|
|
33
37
|
chunk_size (int, optional): Chunk size for splitting passages. Defaults to 1024.
|
|
34
38
|
|
|
35
39
|
Returns:
|
|
@@ -41,33 +45,25 @@ def load_data(
|
|
|
41
45
|
connector: DataConnector,
|
|
42
46
|
source: Source,
|
|
43
47
|
passage_store: StorageConnector,
|
|
44
|
-
|
|
48
|
+
file_metadata_store: StorageConnector,
|
|
45
49
|
):
|
|
46
|
-
"""Load data from a connector (generates
|
|
50
|
+
"""Load data from a connector (generates file and passages) into a specified source_id, associatedw with a user_id."""
|
|
47
51
|
embedding_config = source.embedding_config
|
|
48
52
|
|
|
49
53
|
# embedding model
|
|
50
54
|
embed_model = embedding_model(embedding_config)
|
|
51
55
|
|
|
52
|
-
# insert passages/
|
|
56
|
+
# insert passages/file
|
|
53
57
|
passages = []
|
|
54
58
|
embedding_to_document_name = {}
|
|
55
59
|
passage_count = 0
|
|
56
|
-
|
|
57
|
-
for
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
text=document_text,
|
|
61
|
-
metadata_=document_metadata,
|
|
62
|
-
source_id=source.id,
|
|
63
|
-
user_id=source.user_id,
|
|
64
|
-
)
|
|
65
|
-
document_count += 1
|
|
66
|
-
if document_store:
|
|
67
|
-
document_store.insert(document)
|
|
60
|
+
file_count = 0
|
|
61
|
+
for file_metadata in connector.find_files(source):
|
|
62
|
+
file_count += 1
|
|
63
|
+
file_metadata_store.insert(file_metadata)
|
|
68
64
|
|
|
69
65
|
# generate passages
|
|
70
|
-
for passage_text, passage_metadata in connector.generate_passages(
|
|
66
|
+
for passage_text, passage_metadata in connector.generate_passages(file_metadata, chunk_size=embedding_config.embedding_chunk_size):
|
|
71
67
|
# for some reason, llama index parsers sometimes return empty strings
|
|
72
68
|
if len(passage_text) == 0:
|
|
73
69
|
typer.secho(
|
|
@@ -89,7 +85,7 @@ def load_data(
|
|
|
89
85
|
passage = Passage(
|
|
90
86
|
id=create_uuid_from_string(f"{str(source.id)}_{passage_text}"),
|
|
91
87
|
text=passage_text,
|
|
92
|
-
|
|
88
|
+
file_id=file_metadata.id,
|
|
93
89
|
source_id=source.id,
|
|
94
90
|
metadata_=passage_metadata,
|
|
95
91
|
user_id=source.user_id,
|
|
@@ -98,16 +94,16 @@ def load_data(
|
|
|
98
94
|
)
|
|
99
95
|
|
|
100
96
|
hashable_embedding = tuple(passage.embedding)
|
|
101
|
-
|
|
97
|
+
file_name = file_metadata.file_name
|
|
102
98
|
if hashable_embedding in embedding_to_document_name:
|
|
103
99
|
typer.secho(
|
|
104
|
-
f"Warning: Duplicate embedding found for passage in {
|
|
100
|
+
f"Warning: Duplicate embedding found for passage in {file_name} (already exists in {embedding_to_document_name[hashable_embedding]}), skipping insert into VectorDB.",
|
|
105
101
|
fg=typer.colors.YELLOW,
|
|
106
102
|
)
|
|
107
103
|
continue
|
|
108
104
|
|
|
109
105
|
passages.append(passage)
|
|
110
|
-
embedding_to_document_name[hashable_embedding] =
|
|
106
|
+
embedding_to_document_name[hashable_embedding] = file_name
|
|
111
107
|
if len(passages) >= 100:
|
|
112
108
|
# insert passages into passage store
|
|
113
109
|
passage_store.insert_many(passages)
|
|
@@ -120,7 +116,7 @@ def load_data(
|
|
|
120
116
|
passage_store.insert_many(passages)
|
|
121
117
|
passage_count += len(passages)
|
|
122
118
|
|
|
123
|
-
return passage_count,
|
|
119
|
+
return passage_count, file_count
|
|
124
120
|
|
|
125
121
|
|
|
126
122
|
class DirectoryConnector(DataConnector):
|
|
@@ -143,105 +139,109 @@ class DirectoryConnector(DataConnector):
|
|
|
143
139
|
if self.recursive == True:
|
|
144
140
|
assert self.input_directory is not None, "Must provide input directory if recursive is True."
|
|
145
141
|
|
|
146
|
-
def
|
|
147
|
-
from llama_index.core import SimpleDirectoryReader
|
|
148
|
-
|
|
142
|
+
def find_files(self, source: Source) -> Iterator[FileMetadata]:
|
|
149
143
|
if self.input_directory is not None:
|
|
150
|
-
|
|
144
|
+
files = get_filenames_in_dir(
|
|
151
145
|
input_dir=self.input_directory,
|
|
152
146
|
recursive=self.recursive,
|
|
153
147
|
required_exts=[ext.strip() for ext in str(self.extensions).split(",")],
|
|
148
|
+
exclude=["*png", "*jpg", "*jpeg"],
|
|
154
149
|
)
|
|
155
150
|
else:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
151
|
+
files = self.input_files
|
|
152
|
+
|
|
153
|
+
# Check that file paths are valid
|
|
154
|
+
assert_all_files_exist_locally(files)
|
|
155
|
+
|
|
156
|
+
for metadata in extract_metadata_from_files(files):
|
|
157
|
+
yield FileMetadata(
|
|
158
|
+
user_id=source.user_id,
|
|
159
|
+
source_id=source.id,
|
|
160
|
+
file_name=metadata.get("file_name"),
|
|
161
|
+
file_path=metadata.get("file_path"),
|
|
162
|
+
file_type=metadata.get("file_type"),
|
|
163
|
+
file_size=metadata.get("file_size"),
|
|
164
|
+
file_creation_date=metadata.get("file_creation_date"),
|
|
165
|
+
file_last_modified_date=metadata.get("file_last_modified_date"),
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def generate_passages(self, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]:
|
|
169
|
+
from llama_index.core import SimpleDirectoryReader
|
|
169
170
|
from llama_index.core.node_parser import TokenTextSplitter
|
|
170
171
|
|
|
171
172
|
parser = TokenTextSplitter(chunk_size=chunk_size)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
class
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
yield text, {"embedding": embedding}
|
|
173
|
+
documents = SimpleDirectoryReader(input_files=[file.file_path]).load_data()
|
|
174
|
+
nodes = parser.get_nodes_from_documents(documents)
|
|
175
|
+
for node in nodes:
|
|
176
|
+
yield node.text, None
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
"""
|
|
180
|
+
The below isn't used anywhere, it isn't tested, and pretty much should be deleted.
|
|
181
|
+
- Matt
|
|
182
|
+
"""
|
|
183
|
+
# class WebConnector(DirectoryConnector):
|
|
184
|
+
# def __init__(self, urls: List[str] = None, html_to_text: bool = True):
|
|
185
|
+
# self.urls = urls
|
|
186
|
+
# self.html_to_text = html_to_text
|
|
187
|
+
#
|
|
188
|
+
# def generate_files(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
|
|
189
|
+
# from llama_index.readers.web import SimpleWebPageReader
|
|
190
|
+
#
|
|
191
|
+
# files = SimpleWebPageReader(html_to_text=self.html_to_text).load_data(self.urls)
|
|
192
|
+
# for document in files:
|
|
193
|
+
# yield document.text, {"url": document.id_}
|
|
194
|
+
#
|
|
195
|
+
#
|
|
196
|
+
# class VectorDBConnector(DataConnector):
|
|
197
|
+
# # NOTE: this class has not been properly tested, so is unlikely to work
|
|
198
|
+
# # TODO: allow loading multiple tables (1:1 mapping between FileMetadata and Table)
|
|
199
|
+
#
|
|
200
|
+
# def __init__(
|
|
201
|
+
# self,
|
|
202
|
+
# name: str,
|
|
203
|
+
# uri: str,
|
|
204
|
+
# table_name: str,
|
|
205
|
+
# text_column: str,
|
|
206
|
+
# embedding_column: str,
|
|
207
|
+
# embedding_dim: int,
|
|
208
|
+
# ):
|
|
209
|
+
# self.name = name
|
|
210
|
+
# self.uri = uri
|
|
211
|
+
# self.table_name = table_name
|
|
212
|
+
# self.text_column = text_column
|
|
213
|
+
# self.embedding_column = embedding_column
|
|
214
|
+
# self.embedding_dim = embedding_dim
|
|
215
|
+
#
|
|
216
|
+
# # connect to db table
|
|
217
|
+
# from sqlalchemy import create_engine
|
|
218
|
+
#
|
|
219
|
+
# self.engine = create_engine(uri)
|
|
220
|
+
#
|
|
221
|
+
# def generate_files(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
|
|
222
|
+
# yield self.table_name, None
|
|
223
|
+
#
|
|
224
|
+
# def generate_passages(self, file_text: str, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
|
|
225
|
+
# from pgvector.sqlalchemy import Vector
|
|
226
|
+
# from sqlalchemy import Inspector, MetaData, Table, select
|
|
227
|
+
#
|
|
228
|
+
# metadata = MetaData()
|
|
229
|
+
# # Create an inspector to inspect the database
|
|
230
|
+
# inspector = Inspector.from_engine(self.engine)
|
|
231
|
+
# table_names = inspector.get_table_names()
|
|
232
|
+
# assert self.table_name in table_names, f"Table {self.table_name} not found in database: tables that exist {table_names}."
|
|
233
|
+
#
|
|
234
|
+
# table = Table(self.table_name, metadata, autoload_with=self.engine)
|
|
235
|
+
#
|
|
236
|
+
# # Prepare a select statement
|
|
237
|
+
# select_statement = select(table.c[self.text_column], table.c[self.embedding_column].cast(Vector(self.embedding_dim)))
|
|
238
|
+
#
|
|
239
|
+
# # Execute the query and fetch the results
|
|
240
|
+
# # TODO: paginate results
|
|
241
|
+
# with self.engine.connect() as connection:
|
|
242
|
+
# result = connection.execute(select_statement).fetchall()
|
|
243
|
+
#
|
|
244
|
+
# for text, embedding in result:
|
|
245
|
+
# # assume that embeddings are the same model as in config
|
|
246
|
+
# # TODO: don't re-compute embedding
|
|
247
|
+
# yield text, {"embedding": embedding}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import mimetypes
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_file_metadata(file_path) -> dict:
|
|
9
|
+
"""Extracts metadata from a single file."""
|
|
10
|
+
if not os.path.exists(file_path):
|
|
11
|
+
raise FileNotFoundError(file_path)
|
|
12
|
+
|
|
13
|
+
file_metadata = {
|
|
14
|
+
"file_name": os.path.basename(file_path),
|
|
15
|
+
"file_path": file_path,
|
|
16
|
+
"file_type": mimetypes.guess_type(file_path)[0] or "unknown",
|
|
17
|
+
"file_size": os.path.getsize(file_path),
|
|
18
|
+
"file_creation_date": datetime.fromtimestamp(os.path.getctime(file_path)).strftime("%Y-%m-%d"),
|
|
19
|
+
"file_last_modified_date": datetime.fromtimestamp(os.path.getmtime(file_path)).strftime("%Y-%m-%d"),
|
|
20
|
+
}
|
|
21
|
+
return file_metadata
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def extract_metadata_from_files(file_list):
|
|
25
|
+
"""Extracts metadata for a list of files."""
|
|
26
|
+
metadata = []
|
|
27
|
+
for file_path in file_list:
|
|
28
|
+
file_metadata = extract_file_metadata(file_path)
|
|
29
|
+
if file_metadata:
|
|
30
|
+
metadata.append(file_metadata)
|
|
31
|
+
return metadata
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_filenames_in_dir(
|
|
35
|
+
input_dir: str, recursive: bool = True, required_exts: Optional[List[str]] = None, exclude: Optional[List[str]] = None
|
|
36
|
+
):
|
|
37
|
+
"""
|
|
38
|
+
Recursively reads files from the directory, applying required_exts and exclude filters.
|
|
39
|
+
Ensures that required_exts and exclude do not overlap.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
input_dir (str): The directory to scan for files.
|
|
43
|
+
recursive (bool): Whether to scan directories recursively.
|
|
44
|
+
required_exts (list): List of file extensions to include (e.g., ['pdf', 'txt']).
|
|
45
|
+
If None or empty, matches any file extension.
|
|
46
|
+
exclude (list): List of file patterns to exclude (e.g., ['*png', '*jpg']).
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
list: A list of matching file paths.
|
|
50
|
+
"""
|
|
51
|
+
required_exts = required_exts or []
|
|
52
|
+
exclude = exclude or []
|
|
53
|
+
|
|
54
|
+
# Ensure required_exts and exclude do not overlap
|
|
55
|
+
ext_set = set(required_exts)
|
|
56
|
+
exclude_set = set(exclude)
|
|
57
|
+
overlap = ext_set & exclude_set
|
|
58
|
+
if overlap:
|
|
59
|
+
raise ValueError(f"Extensions in required_exts and exclude overlap: {overlap}")
|
|
60
|
+
|
|
61
|
+
def is_excluded(file_name):
|
|
62
|
+
"""Check if a file matches any pattern in the exclude list."""
|
|
63
|
+
for pattern in exclude:
|
|
64
|
+
if Path(file_name).match(pattern):
|
|
65
|
+
return True
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
files = []
|
|
69
|
+
search_pattern = "**/*" if recursive else "*"
|
|
70
|
+
|
|
71
|
+
for file_path in Path(input_dir).glob(search_pattern):
|
|
72
|
+
if file_path.is_file() and not is_excluded(file_path.name):
|
|
73
|
+
ext = file_path.suffix.lstrip(".")
|
|
74
|
+
# If required_exts is empty, match any file
|
|
75
|
+
if not required_exts or ext in required_exts:
|
|
76
|
+
files.append(file_path)
|
|
77
|
+
|
|
78
|
+
return files
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def assert_all_files_exist_locally(file_paths: List[str]) -> bool:
|
|
82
|
+
"""
|
|
83
|
+
Checks if all file paths in the provided list exist locally.
|
|
84
|
+
Raises a FileNotFoundError with a list of missing files if any do not exist.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
file_paths (List[str]): List of file paths to check.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
bool: True if all files exist, raises FileNotFoundError if any file is missing.
|
|
91
|
+
"""
|
|
92
|
+
missing_files = [file_path for file_path in file_paths if not Path(file_path).exists()]
|
|
93
|
+
|
|
94
|
+
if missing_files:
|
|
95
|
+
raise FileNotFoundError(missing_files)
|
|
96
|
+
|
|
97
|
+
return True
|
letta/llm_api/mistral.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
|
|
3
|
+
from letta.utils import printd, smart_urljoin
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def mistral_get_model_list(url: str, api_key: str) -> dict:
|
|
7
|
+
url = smart_urljoin(url, "models")
|
|
8
|
+
|
|
9
|
+
headers = {"Content-Type": "application/json"}
|
|
10
|
+
if api_key is not None:
|
|
11
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
|
12
|
+
|
|
13
|
+
printd(f"Sending request to {url}")
|
|
14
|
+
response = None
|
|
15
|
+
try:
|
|
16
|
+
# TODO add query param "tool" to be true
|
|
17
|
+
response = requests.get(url, headers=headers)
|
|
18
|
+
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
19
|
+
response_json = response.json() # convert to dict from string
|
|
20
|
+
return response_json
|
|
21
|
+
except requests.exceptions.HTTPError as http_err:
|
|
22
|
+
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
|
23
|
+
try:
|
|
24
|
+
if response:
|
|
25
|
+
response = response.json()
|
|
26
|
+
except:
|
|
27
|
+
pass
|
|
28
|
+
printd(f"Got HTTPError, exception={http_err}, response={response}")
|
|
29
|
+
raise http_err
|
|
30
|
+
except requests.exceptions.RequestException as req_err:
|
|
31
|
+
# Handle other requests-related errors (e.g., connection error)
|
|
32
|
+
try:
|
|
33
|
+
if response:
|
|
34
|
+
response = response.json()
|
|
35
|
+
except:
|
|
36
|
+
pass
|
|
37
|
+
printd(f"Got RequestException, exception={req_err}, response={response}")
|
|
38
|
+
raise req_err
|
|
39
|
+
except Exception as e:
|
|
40
|
+
# Handle other potential errors
|
|
41
|
+
try:
|
|
42
|
+
if response:
|
|
43
|
+
response = response.json()
|
|
44
|
+
except:
|
|
45
|
+
pass
|
|
46
|
+
printd(f"Got unknown Exception, exception={e}, response={response}")
|
|
47
|
+
raise e
|
letta/main.py
CHANGED
|
@@ -356,19 +356,29 @@ def run_agent_loop(
|
|
|
356
356
|
else:
|
|
357
357
|
# If message did not begin with command prefix, pass inputs to Letta
|
|
358
358
|
# Handle user message and append to messages
|
|
359
|
-
user_message =
|
|
359
|
+
user_message = str(user_input)
|
|
360
360
|
|
|
361
361
|
skip_next_user_input = False
|
|
362
362
|
|
|
363
363
|
def process_agent_step(user_message, no_verify):
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
364
|
+
if user_message is None:
|
|
365
|
+
step_response = letta_agent.step(
|
|
366
|
+
messages=[],
|
|
367
|
+
first_message=False,
|
|
368
|
+
skip_verify=no_verify,
|
|
369
|
+
stream=stream,
|
|
370
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
|
|
371
|
+
ms=ms,
|
|
372
|
+
)
|
|
373
|
+
else:
|
|
374
|
+
step_response = letta_agent.step_user_message(
|
|
375
|
+
user_message_str=user_message,
|
|
376
|
+
first_message=False,
|
|
377
|
+
skip_verify=no_verify,
|
|
378
|
+
stream=stream,
|
|
379
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
|
|
380
|
+
ms=ms,
|
|
381
|
+
)
|
|
372
382
|
new_messages = step_response.messages
|
|
373
383
|
heartbeat_request = step_response.heartbeat_request
|
|
374
384
|
function_failed = step_response.function_failed
|
letta/metadata.py
CHANGED
|
@@ -11,6 +11,7 @@ from sqlalchemy import (
|
|
|
11
11
|
Column,
|
|
12
12
|
DateTime,
|
|
13
13
|
Index,
|
|
14
|
+
Integer,
|
|
14
15
|
String,
|
|
15
16
|
TypeDecorator,
|
|
16
17
|
desc,
|
|
@@ -24,6 +25,7 @@ from letta.schemas.api_key import APIKey
|
|
|
24
25
|
from letta.schemas.block import Block, Human, Persona
|
|
25
26
|
from letta.schemas.embedding_config import EmbeddingConfig
|
|
26
27
|
from letta.schemas.enums import JobStatus
|
|
28
|
+
from letta.schemas.file import FileMetadata
|
|
27
29
|
from letta.schemas.job import Job
|
|
28
30
|
from letta.schemas.llm_config import LLMConfig
|
|
29
31
|
from letta.schemas.memory import Memory
|
|
@@ -38,6 +40,41 @@ from letta.settings import settings
|
|
|
38
40
|
from letta.utils import enforce_types, get_utc_time, printd
|
|
39
41
|
|
|
40
42
|
|
|
43
|
+
class FileMetadataModel(Base):
|
|
44
|
+
__tablename__ = "files"
|
|
45
|
+
__table_args__ = {"extend_existing": True}
|
|
46
|
+
|
|
47
|
+
id = Column(String, primary_key=True, nullable=False)
|
|
48
|
+
user_id = Column(String, nullable=False)
|
|
49
|
+
# TODO: Investigate why this breaks during table creation due to FK
|
|
50
|
+
# source_id = Column(String, ForeignKey("sources.id"), nullable=False)
|
|
51
|
+
source_id = Column(String, nullable=False)
|
|
52
|
+
file_name = Column(String, nullable=True)
|
|
53
|
+
file_path = Column(String, nullable=True)
|
|
54
|
+
file_type = Column(String, nullable=True)
|
|
55
|
+
file_size = Column(Integer, nullable=True)
|
|
56
|
+
file_creation_date = Column(String, nullable=True)
|
|
57
|
+
file_last_modified_date = Column(String, nullable=True)
|
|
58
|
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
|
59
|
+
|
|
60
|
+
def __repr__(self):
|
|
61
|
+
return f"<FileMetadata(id='{self.id}', source_id='{self.source_id}', file_name='{self.file_name}')>"
|
|
62
|
+
|
|
63
|
+
def to_record(self):
|
|
64
|
+
return FileMetadata(
|
|
65
|
+
id=self.id,
|
|
66
|
+
user_id=self.user_id,
|
|
67
|
+
source_id=self.source_id,
|
|
68
|
+
file_name=self.file_name,
|
|
69
|
+
file_path=self.file_path,
|
|
70
|
+
file_type=self.file_type,
|
|
71
|
+
file_size=self.file_size,
|
|
72
|
+
file_creation_date=self.file_creation_date,
|
|
73
|
+
file_last_modified_date=self.file_last_modified_date,
|
|
74
|
+
created_at=self.created_at,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
41
78
|
class LLMConfigColumn(TypeDecorator):
|
|
42
79
|
"""Custom type for storing LLMConfig as JSON"""
|
|
43
80
|
|
|
@@ -865,6 +902,27 @@ class MetadataStore:
|
|
|
865
902
|
session.add(JobModel(**vars(job)))
|
|
866
903
|
session.commit()
|
|
867
904
|
|
|
905
|
+
@enforce_types
|
|
906
|
+
def list_files_from_source(self, source_id: str, limit: int, cursor: Optional[str]):
|
|
907
|
+
with self.session_maker() as session:
|
|
908
|
+
# Start with the basic query filtered by source_id
|
|
909
|
+
query = session.query(FileMetadataModel).filter(FileMetadataModel.source_id == source_id)
|
|
910
|
+
|
|
911
|
+
if cursor:
|
|
912
|
+
# Assuming cursor is the ID of the last file in the previous page
|
|
913
|
+
query = query.filter(FileMetadataModel.id > cursor)
|
|
914
|
+
|
|
915
|
+
# Order by ID or other ordering criteria to ensure correct pagination
|
|
916
|
+
query = query.order_by(FileMetadataModel.id)
|
|
917
|
+
|
|
918
|
+
# Limit the number of results returned
|
|
919
|
+
results = query.limit(limit).all()
|
|
920
|
+
|
|
921
|
+
# Convert the results to the required FileMetadata objects
|
|
922
|
+
files = [r.to_record() for r in results]
|
|
923
|
+
|
|
924
|
+
return files
|
|
925
|
+
|
|
868
926
|
def delete_job(self, job_id: str):
|
|
869
927
|
with self.session_maker() as session:
|
|
870
928
|
session.query(JobModel).filter(JobModel.id == job_id).delete()
|
letta/providers.py
CHANGED
|
@@ -139,6 +139,50 @@ class AnthropicProvider(Provider):
|
|
|
139
139
|
return []
|
|
140
140
|
|
|
141
141
|
|
|
142
|
+
class MistralProvider(Provider):
|
|
143
|
+
name: str = "mistral"
|
|
144
|
+
api_key: str = Field(..., description="API key for the Mistral API.")
|
|
145
|
+
base_url: str = "https://api.mistral.ai/v1"
|
|
146
|
+
|
|
147
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
148
|
+
from letta.llm_api.mistral import mistral_get_model_list
|
|
149
|
+
|
|
150
|
+
# Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
|
|
151
|
+
# See: https://openrouter.ai/docs/requests
|
|
152
|
+
response = mistral_get_model_list(self.base_url, api_key=self.api_key)
|
|
153
|
+
|
|
154
|
+
assert "data" in response, f"Mistral model query response missing 'data' field: {response}"
|
|
155
|
+
|
|
156
|
+
configs = []
|
|
157
|
+
for model in response["data"]:
|
|
158
|
+
# If model has chat completions and function calling enabled
|
|
159
|
+
if model["capabilities"]["completion_chat"] and model["capabilities"]["function_calling"]:
|
|
160
|
+
configs.append(
|
|
161
|
+
LLMConfig(
|
|
162
|
+
model=model["id"],
|
|
163
|
+
model_endpoint_type="openai",
|
|
164
|
+
model_endpoint=self.base_url,
|
|
165
|
+
context_window=model["max_context_length"],
|
|
166
|
+
)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return configs
|
|
170
|
+
|
|
171
|
+
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
172
|
+
# Not supported for mistral
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
def get_model_context_window(self, model_name: str) -> Optional[int]:
|
|
176
|
+
# Redoing this is fine because it's a pretty lightweight call
|
|
177
|
+
models = self.list_llm_models()
|
|
178
|
+
|
|
179
|
+
for m in models:
|
|
180
|
+
if model_name in m["id"]:
|
|
181
|
+
return int(m["max_context_length"])
|
|
182
|
+
|
|
183
|
+
return None
|
|
184
|
+
|
|
185
|
+
|
|
142
186
|
class OllamaProvider(OpenAIProvider):
|
|
143
187
|
"""Ollama provider that uses the native /api/generate endpoint
|
|
144
188
|
|