letta-nightly 0.4.1.dev20241013104006__py3-none-any.whl → 0.5.0.dev20241015014828__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (34) hide show
  1. letta/__init__.py +2 -2
  2. letta/agent.py +51 -65
  3. letta/agent_store/db.py +18 -7
  4. letta/agent_store/lancedb.py +2 -2
  5. letta/agent_store/milvus.py +1 -1
  6. letta/agent_store/qdrant.py +1 -1
  7. letta/agent_store/storage.py +12 -10
  8. letta/cli/cli_load.py +1 -1
  9. letta/client/client.py +51 -0
  10. letta/data_sources/connectors.py +124 -124
  11. letta/data_sources/connectors_helper.py +97 -0
  12. letta/llm_api/mistral.py +47 -0
  13. letta/main.py +19 -9
  14. letta/metadata.py +58 -0
  15. letta/providers.py +44 -0
  16. letta/schemas/file.py +31 -0
  17. letta/schemas/job.py +1 -1
  18. letta/schemas/letta_request.py +3 -3
  19. letta/schemas/llm_config.py +1 -0
  20. letta/schemas/message.py +6 -2
  21. letta/schemas/passage.py +3 -3
  22. letta/schemas/source.py +2 -2
  23. letta/server/rest_api/routers/v1/agents.py +10 -16
  24. letta/server/rest_api/routers/v1/jobs.py +17 -1
  25. letta/server/rest_api/routers/v1/sources.py +7 -9
  26. letta/server/server.py +137 -24
  27. letta/server/static_files/assets/{index-9a9c449b.js → index-dc228d4a.js} +4 -4
  28. letta/server/static_files/index.html +1 -1
  29. {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/METADATA +1 -1
  30. {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/RECORD +33 -31
  31. letta/schemas/document.py +0 -21
  32. {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/LICENSE +0 -0
  33. {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/WHEEL +0 -0
  34. {letta_nightly-0.4.1.dev20241013104006.dist-info → letta_nightly-0.5.0.dev20241015014828.dist-info}/entry_points.txt +0 -0
@@ -1,11 +1,15 @@
1
- from typing import Dict, Iterator, List, Optional, Tuple
1
+ from typing import Dict, Iterator, List, Tuple
2
2
 
3
3
  import typer
4
- from llama_index.core import Document as LlamaIndexDocument
5
4
 
6
5
  from letta.agent_store.storage import StorageConnector
6
+ from letta.data_sources.connectors_helper import (
7
+ assert_all_files_exist_locally,
8
+ extract_metadata_from_files,
9
+ get_filenames_in_dir,
10
+ )
7
11
  from letta.embeddings import embedding_model
8
- from letta.schemas.document import Document
12
+ from letta.schemas.file import FileMetadata
9
13
  from letta.schemas.passage import Passage
10
14
  from letta.schemas.source import Source
11
15
  from letta.utils import create_uuid_from_string
@@ -13,23 +17,23 @@ from letta.utils import create_uuid_from_string
13
17
 
14
18
  class DataConnector:
15
19
  """
16
- Base class for data connectors that can be extended to generate documents and passages from a custom data source.
20
+ Base class for data connectors that can be extended to generate files and passages from a custom data source.
17
21
  """
18
22
 
19
- def generate_documents(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
23
+ def find_files(self, source: Source) -> Iterator[FileMetadata]:
20
24
  """
21
- Generate document text and metadata from a data source.
25
+ Generate file metadata from a data source.
22
26
 
23
27
  Returns:
24
- documents (Iterator[Tuple[str, Dict]]): Generate a tuple of string text and metadata dictionary for each document.
28
+ files (Iterator[FileMetadata]): Generate file metadata for each file found.
25
29
  """
26
30
 
27
- def generate_passages(self, documents: List[Document], chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
31
+ def generate_passages(self, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
28
32
  """
29
- Generate passage text and metadata from a list of documents.
33
+ Generate passage text and metadata from a list of files.
30
34
 
31
35
  Args:
32
- documents (List[Document]): List of documents to generate passages from.
36
+ file (FileMetadata): The document to generate passages from.
33
37
  chunk_size (int, optional): Chunk size for splitting passages. Defaults to 1024.
34
38
 
35
39
  Returns:
@@ -41,33 +45,25 @@ def load_data(
41
45
  connector: DataConnector,
42
46
  source: Source,
43
47
  passage_store: StorageConnector,
44
- document_store: Optional[StorageConnector] = None,
48
+ file_metadata_store: StorageConnector,
45
49
  ):
46
- """Load data from a connector (generates documents and passages) into a specified source_id, associatedw with a user_id."""
50
+ """Load data from a connector (generates file and passages) into a specified source_id, associatedw with a user_id."""
47
51
  embedding_config = source.embedding_config
48
52
 
49
53
  # embedding model
50
54
  embed_model = embedding_model(embedding_config)
51
55
 
52
- # insert passages/documents
56
+ # insert passages/file
53
57
  passages = []
54
58
  embedding_to_document_name = {}
55
59
  passage_count = 0
56
- document_count = 0
57
- for document_text, document_metadata in connector.generate_documents():
58
- # insert document into storage
59
- document = Document(
60
- text=document_text,
61
- metadata_=document_metadata,
62
- source_id=source.id,
63
- user_id=source.user_id,
64
- )
65
- document_count += 1
66
- if document_store:
67
- document_store.insert(document)
60
+ file_count = 0
61
+ for file_metadata in connector.find_files(source):
62
+ file_count += 1
63
+ file_metadata_store.insert(file_metadata)
68
64
 
69
65
  # generate passages
70
- for passage_text, passage_metadata in connector.generate_passages([document], chunk_size=embedding_config.embedding_chunk_size):
66
+ for passage_text, passage_metadata in connector.generate_passages(file_metadata, chunk_size=embedding_config.embedding_chunk_size):
71
67
  # for some reason, llama index parsers sometimes return empty strings
72
68
  if len(passage_text) == 0:
73
69
  typer.secho(
@@ -89,7 +85,7 @@ def load_data(
89
85
  passage = Passage(
90
86
  id=create_uuid_from_string(f"{str(source.id)}_{passage_text}"),
91
87
  text=passage_text,
92
- doc_id=document.id,
88
+ file_id=file_metadata.id,
93
89
  source_id=source.id,
94
90
  metadata_=passage_metadata,
95
91
  user_id=source.user_id,
@@ -98,16 +94,16 @@ def load_data(
98
94
  )
99
95
 
100
96
  hashable_embedding = tuple(passage.embedding)
101
- document_name = document.metadata_.get("file_path", document.id)
97
+ file_name = file_metadata.file_name
102
98
  if hashable_embedding in embedding_to_document_name:
103
99
  typer.secho(
104
- f"Warning: Duplicate embedding found for passage in {document_name} (already exists in {embedding_to_document_name[hashable_embedding]}), skipping insert into VectorDB.",
100
+ f"Warning: Duplicate embedding found for passage in {file_name} (already exists in {embedding_to_document_name[hashable_embedding]}), skipping insert into VectorDB.",
105
101
  fg=typer.colors.YELLOW,
106
102
  )
107
103
  continue
108
104
 
109
105
  passages.append(passage)
110
- embedding_to_document_name[hashable_embedding] = document_name
106
+ embedding_to_document_name[hashable_embedding] = file_name
111
107
  if len(passages) >= 100:
112
108
  # insert passages into passage store
113
109
  passage_store.insert_many(passages)
@@ -120,7 +116,7 @@ def load_data(
120
116
  passage_store.insert_many(passages)
121
117
  passage_count += len(passages)
122
118
 
123
- return passage_count, document_count
119
+ return passage_count, file_count
124
120
 
125
121
 
126
122
  class DirectoryConnector(DataConnector):
@@ -143,105 +139,109 @@ class DirectoryConnector(DataConnector):
143
139
  if self.recursive == True:
144
140
  assert self.input_directory is not None, "Must provide input directory if recursive is True."
145
141
 
146
- def generate_documents(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
147
- from llama_index.core import SimpleDirectoryReader
148
-
142
+ def find_files(self, source: Source) -> Iterator[FileMetadata]:
149
143
  if self.input_directory is not None:
150
- reader = SimpleDirectoryReader(
144
+ files = get_filenames_in_dir(
151
145
  input_dir=self.input_directory,
152
146
  recursive=self.recursive,
153
147
  required_exts=[ext.strip() for ext in str(self.extensions).split(",")],
148
+ exclude=["*png", "*jpg", "*jpeg"],
154
149
  )
155
150
  else:
156
- assert self.input_files is not None, "Must provide input files if input_dir is None"
157
- reader = SimpleDirectoryReader(input_files=[str(f) for f in self.input_files])
158
-
159
- llama_index_docs = reader.load_data(show_progress=True)
160
- for llama_index_doc in llama_index_docs:
161
- # TODO: add additional metadata?
162
- # doc = Document(text=llama_index_doc.text, metadata=llama_index_doc.metadata)
163
- # docs.append(doc)
164
- yield llama_index_doc.text, llama_index_doc.metadata
165
-
166
- def generate_passages(self, documents: List[Document], chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
167
- # use llama index to run embeddings code
168
- # from llama_index.core.node_parser import SentenceSplitter
151
+ files = self.input_files
152
+
153
+ # Check that file paths are valid
154
+ assert_all_files_exist_locally(files)
155
+
156
+ for metadata in extract_metadata_from_files(files):
157
+ yield FileMetadata(
158
+ user_id=source.user_id,
159
+ source_id=source.id,
160
+ file_name=metadata.get("file_name"),
161
+ file_path=metadata.get("file_path"),
162
+ file_type=metadata.get("file_type"),
163
+ file_size=metadata.get("file_size"),
164
+ file_creation_date=metadata.get("file_creation_date"),
165
+ file_last_modified_date=metadata.get("file_last_modified_date"),
166
+ )
167
+
168
+ def generate_passages(self, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]:
169
+ from llama_index.core import SimpleDirectoryReader
169
170
  from llama_index.core.node_parser import TokenTextSplitter
170
171
 
171
172
  parser = TokenTextSplitter(chunk_size=chunk_size)
172
- for document in documents:
173
- llama_index_docs = [LlamaIndexDocument(text=document.text, metadata=document.metadata_)]
174
- nodes = parser.get_nodes_from_documents(llama_index_docs)
175
- for node in nodes:
176
- # passage = Passage(
177
- # text=node.text,
178
- # doc_id=document.id,
179
- # )
180
- yield node.text, None
181
-
182
-
183
- class WebConnector(DirectoryConnector):
184
- def __init__(self, urls: List[str] = None, html_to_text: bool = True):
185
- self.urls = urls
186
- self.html_to_text = html_to_text
187
-
188
- def generate_documents(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
189
- from llama_index.readers.web import SimpleWebPageReader
190
-
191
- documents = SimpleWebPageReader(html_to_text=self.html_to_text).load_data(self.urls)
192
- for document in documents:
193
- yield document.text, {"url": document.id_}
194
-
195
-
196
- class VectorDBConnector(DataConnector):
197
- # NOTE: this class has not been properly tested, so is unlikely to work
198
- # TODO: allow loading multiple tables (1:1 mapping between Document and Table)
199
-
200
- def __init__(
201
- self,
202
- name: str,
203
- uri: str,
204
- table_name: str,
205
- text_column: str,
206
- embedding_column: str,
207
- embedding_dim: int,
208
- ):
209
- self.name = name
210
- self.uri = uri
211
- self.table_name = table_name
212
- self.text_column = text_column
213
- self.embedding_column = embedding_column
214
- self.embedding_dim = embedding_dim
215
-
216
- # connect to db table
217
- from sqlalchemy import create_engine
218
-
219
- self.engine = create_engine(uri)
220
-
221
- def generate_documents(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
222
- yield self.table_name, None
223
-
224
- def generate_passages(self, documents: List[Document], chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
225
- from pgvector.sqlalchemy import Vector
226
- from sqlalchemy import Inspector, MetaData, Table, select
227
-
228
- metadata = MetaData()
229
- # Create an inspector to inspect the database
230
- inspector = Inspector.from_engine(self.engine)
231
- table_names = inspector.get_table_names()
232
- assert self.table_name in table_names, f"Table {self.table_name} not found in database: tables that exist {table_names}."
233
-
234
- table = Table(self.table_name, metadata, autoload_with=self.engine)
235
-
236
- # Prepare a select statement
237
- select_statement = select(table.c[self.text_column], table.c[self.embedding_column].cast(Vector(self.embedding_dim)))
238
-
239
- # Execute the query and fetch the results
240
- # TODO: paginate results
241
- with self.engine.connect() as connection:
242
- result = connection.execute(select_statement).fetchall()
243
-
244
- for text, embedding in result:
245
- # assume that embeddings are the same model as in config
246
- # TODO: don't re-compute embedding
247
- yield text, {"embedding": embedding}
173
+ documents = SimpleDirectoryReader(input_files=[file.file_path]).load_data()
174
+ nodes = parser.get_nodes_from_documents(documents)
175
+ for node in nodes:
176
+ yield node.text, None
177
+
178
+
179
+ """
180
+ The below isn't used anywhere, it isn't tested, and pretty much should be deleted.
181
+ - Matt
182
+ """
183
+ # class WebConnector(DirectoryConnector):
184
+ # def __init__(self, urls: List[str] = None, html_to_text: bool = True):
185
+ # self.urls = urls
186
+ # self.html_to_text = html_to_text
187
+ #
188
+ # def generate_files(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
189
+ # from llama_index.readers.web import SimpleWebPageReader
190
+ #
191
+ # files = SimpleWebPageReader(html_to_text=self.html_to_text).load_data(self.urls)
192
+ # for document in files:
193
+ # yield document.text, {"url": document.id_}
194
+ #
195
+ #
196
+ # class VectorDBConnector(DataConnector):
197
+ # # NOTE: this class has not been properly tested, so is unlikely to work
198
+ # # TODO: allow loading multiple tables (1:1 mapping between FileMetadata and Table)
199
+ #
200
+ # def __init__(
201
+ # self,
202
+ # name: str,
203
+ # uri: str,
204
+ # table_name: str,
205
+ # text_column: str,
206
+ # embedding_column: str,
207
+ # embedding_dim: int,
208
+ # ):
209
+ # self.name = name
210
+ # self.uri = uri
211
+ # self.table_name = table_name
212
+ # self.text_column = text_column
213
+ # self.embedding_column = embedding_column
214
+ # self.embedding_dim = embedding_dim
215
+ #
216
+ # # connect to db table
217
+ # from sqlalchemy import create_engine
218
+ #
219
+ # self.engine = create_engine(uri)
220
+ #
221
+ # def generate_files(self) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Document]:
222
+ # yield self.table_name, None
223
+ #
224
+ # def generate_passages(self, file_text: str, file: FileMetadata, chunk_size: int = 1024) -> Iterator[Tuple[str, Dict]]: # -> Iterator[Passage]:
225
+ # from pgvector.sqlalchemy import Vector
226
+ # from sqlalchemy import Inspector, MetaData, Table, select
227
+ #
228
+ # metadata = MetaData()
229
+ # # Create an inspector to inspect the database
230
+ # inspector = Inspector.from_engine(self.engine)
231
+ # table_names = inspector.get_table_names()
232
+ # assert self.table_name in table_names, f"Table {self.table_name} not found in database: tables that exist {table_names}."
233
+ #
234
+ # table = Table(self.table_name, metadata, autoload_with=self.engine)
235
+ #
236
+ # # Prepare a select statement
237
+ # select_statement = select(table.c[self.text_column], table.c[self.embedding_column].cast(Vector(self.embedding_dim)))
238
+ #
239
+ # # Execute the query and fetch the results
240
+ # # TODO: paginate results
241
+ # with self.engine.connect() as connection:
242
+ # result = connection.execute(select_statement).fetchall()
243
+ #
244
+ # for text, embedding in result:
245
+ # # assume that embeddings are the same model as in config
246
+ # # TODO: don't re-compute embedding
247
+ # yield text, {"embedding": embedding}
@@ -0,0 +1,97 @@
1
+ import mimetypes
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+
8
+ def extract_file_metadata(file_path) -> dict:
9
+ """Extracts metadata from a single file."""
10
+ if not os.path.exists(file_path):
11
+ raise FileNotFoundError(file_path)
12
+
13
+ file_metadata = {
14
+ "file_name": os.path.basename(file_path),
15
+ "file_path": file_path,
16
+ "file_type": mimetypes.guess_type(file_path)[0] or "unknown",
17
+ "file_size": os.path.getsize(file_path),
18
+ "file_creation_date": datetime.fromtimestamp(os.path.getctime(file_path)).strftime("%Y-%m-%d"),
19
+ "file_last_modified_date": datetime.fromtimestamp(os.path.getmtime(file_path)).strftime("%Y-%m-%d"),
20
+ }
21
+ return file_metadata
22
+
23
+
24
+ def extract_metadata_from_files(file_list):
25
+ """Extracts metadata for a list of files."""
26
+ metadata = []
27
+ for file_path in file_list:
28
+ file_metadata = extract_file_metadata(file_path)
29
+ if file_metadata:
30
+ metadata.append(file_metadata)
31
+ return metadata
32
+
33
+
34
+ def get_filenames_in_dir(
35
+ input_dir: str, recursive: bool = True, required_exts: Optional[List[str]] = None, exclude: Optional[List[str]] = None
36
+ ):
37
+ """
38
+ Recursively reads files from the directory, applying required_exts and exclude filters.
39
+ Ensures that required_exts and exclude do not overlap.
40
+
41
+ Args:
42
+ input_dir (str): The directory to scan for files.
43
+ recursive (bool): Whether to scan directories recursively.
44
+ required_exts (list): List of file extensions to include (e.g., ['pdf', 'txt']).
45
+ If None or empty, matches any file extension.
46
+ exclude (list): List of file patterns to exclude (e.g., ['*png', '*jpg']).
47
+
48
+ Returns:
49
+ list: A list of matching file paths.
50
+ """
51
+ required_exts = required_exts or []
52
+ exclude = exclude or []
53
+
54
+ # Ensure required_exts and exclude do not overlap
55
+ ext_set = set(required_exts)
56
+ exclude_set = set(exclude)
57
+ overlap = ext_set & exclude_set
58
+ if overlap:
59
+ raise ValueError(f"Extensions in required_exts and exclude overlap: {overlap}")
60
+
61
+ def is_excluded(file_name):
62
+ """Check if a file matches any pattern in the exclude list."""
63
+ for pattern in exclude:
64
+ if Path(file_name).match(pattern):
65
+ return True
66
+ return False
67
+
68
+ files = []
69
+ search_pattern = "**/*" if recursive else "*"
70
+
71
+ for file_path in Path(input_dir).glob(search_pattern):
72
+ if file_path.is_file() and not is_excluded(file_path.name):
73
+ ext = file_path.suffix.lstrip(".")
74
+ # If required_exts is empty, match any file
75
+ if not required_exts or ext in required_exts:
76
+ files.append(file_path)
77
+
78
+ return files
79
+
80
+
81
+ def assert_all_files_exist_locally(file_paths: List[str]) -> bool:
82
+ """
83
+ Checks if all file paths in the provided list exist locally.
84
+ Raises a FileNotFoundError with a list of missing files if any do not exist.
85
+
86
+ Args:
87
+ file_paths (List[str]): List of file paths to check.
88
+
89
+ Returns:
90
+ bool: True if all files exist, raises FileNotFoundError if any file is missing.
91
+ """
92
+ missing_files = [file_path for file_path in file_paths if not Path(file_path).exists()]
93
+
94
+ if missing_files:
95
+ raise FileNotFoundError(missing_files)
96
+
97
+ return True
@@ -0,0 +1,47 @@
1
+ import requests
2
+
3
+ from letta.utils import printd, smart_urljoin
4
+
5
+
6
+ def mistral_get_model_list(url: str, api_key: str) -> dict:
7
+ url = smart_urljoin(url, "models")
8
+
9
+ headers = {"Content-Type": "application/json"}
10
+ if api_key is not None:
11
+ headers["Authorization"] = f"Bearer {api_key}"
12
+
13
+ printd(f"Sending request to {url}")
14
+ response = None
15
+ try:
16
+ # TODO add query param "tool" to be true
17
+ response = requests.get(url, headers=headers)
18
+ response.raise_for_status() # Raises HTTPError for 4XX/5XX status
19
+ response_json = response.json() # convert to dict from string
20
+ return response_json
21
+ except requests.exceptions.HTTPError as http_err:
22
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
23
+ try:
24
+ if response:
25
+ response = response.json()
26
+ except:
27
+ pass
28
+ printd(f"Got HTTPError, exception={http_err}, response={response}")
29
+ raise http_err
30
+ except requests.exceptions.RequestException as req_err:
31
+ # Handle other requests-related errors (e.g., connection error)
32
+ try:
33
+ if response:
34
+ response = response.json()
35
+ except:
36
+ pass
37
+ printd(f"Got RequestException, exception={req_err}, response={response}")
38
+ raise req_err
39
+ except Exception as e:
40
+ # Handle other potential errors
41
+ try:
42
+ if response:
43
+ response = response.json()
44
+ except:
45
+ pass
46
+ printd(f"Got unknown Exception, exception={e}, response={response}")
47
+ raise e
letta/main.py CHANGED
@@ -356,19 +356,29 @@ def run_agent_loop(
356
356
  else:
357
357
  # If message did not begin with command prefix, pass inputs to Letta
358
358
  # Handle user message and append to messages
359
- user_message = system.package_user_message(user_input)
359
+ user_message = str(user_input)
360
360
 
361
361
  skip_next_user_input = False
362
362
 
363
363
  def process_agent_step(user_message, no_verify):
364
- step_response = letta_agent.step(
365
- user_message,
366
- first_message=False,
367
- skip_verify=no_verify,
368
- stream=stream,
369
- inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
370
- ms=ms,
371
- )
364
+ if user_message is None:
365
+ step_response = letta_agent.step(
366
+ messages=[],
367
+ first_message=False,
368
+ skip_verify=no_verify,
369
+ stream=stream,
370
+ inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
371
+ ms=ms,
372
+ )
373
+ else:
374
+ step_response = letta_agent.step_user_message(
375
+ user_message_str=user_message,
376
+ first_message=False,
377
+ skip_verify=no_verify,
378
+ stream=stream,
379
+ inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
380
+ ms=ms,
381
+ )
372
382
  new_messages = step_response.messages
373
383
  heartbeat_request = step_response.heartbeat_request
374
384
  function_failed = step_response.function_failed
letta/metadata.py CHANGED
@@ -11,6 +11,7 @@ from sqlalchemy import (
11
11
  Column,
12
12
  DateTime,
13
13
  Index,
14
+ Integer,
14
15
  String,
15
16
  TypeDecorator,
16
17
  desc,
@@ -24,6 +25,7 @@ from letta.schemas.api_key import APIKey
24
25
  from letta.schemas.block import Block, Human, Persona
25
26
  from letta.schemas.embedding_config import EmbeddingConfig
26
27
  from letta.schemas.enums import JobStatus
28
+ from letta.schemas.file import FileMetadata
27
29
  from letta.schemas.job import Job
28
30
  from letta.schemas.llm_config import LLMConfig
29
31
  from letta.schemas.memory import Memory
@@ -38,6 +40,41 @@ from letta.settings import settings
38
40
  from letta.utils import enforce_types, get_utc_time, printd
39
41
 
40
42
 
43
+ class FileMetadataModel(Base):
44
+ __tablename__ = "files"
45
+ __table_args__ = {"extend_existing": True}
46
+
47
+ id = Column(String, primary_key=True, nullable=False)
48
+ user_id = Column(String, nullable=False)
49
+ # TODO: Investigate why this breaks during table creation due to FK
50
+ # source_id = Column(String, ForeignKey("sources.id"), nullable=False)
51
+ source_id = Column(String, nullable=False)
52
+ file_name = Column(String, nullable=True)
53
+ file_path = Column(String, nullable=True)
54
+ file_type = Column(String, nullable=True)
55
+ file_size = Column(Integer, nullable=True)
56
+ file_creation_date = Column(String, nullable=True)
57
+ file_last_modified_date = Column(String, nullable=True)
58
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
59
+
60
+ def __repr__(self):
61
+ return f"<FileMetadata(id='{self.id}', source_id='{self.source_id}', file_name='{self.file_name}')>"
62
+
63
+ def to_record(self):
64
+ return FileMetadata(
65
+ id=self.id,
66
+ user_id=self.user_id,
67
+ source_id=self.source_id,
68
+ file_name=self.file_name,
69
+ file_path=self.file_path,
70
+ file_type=self.file_type,
71
+ file_size=self.file_size,
72
+ file_creation_date=self.file_creation_date,
73
+ file_last_modified_date=self.file_last_modified_date,
74
+ created_at=self.created_at,
75
+ )
76
+
77
+
41
78
  class LLMConfigColumn(TypeDecorator):
42
79
  """Custom type for storing LLMConfig as JSON"""
43
80
 
@@ -865,6 +902,27 @@ class MetadataStore:
865
902
  session.add(JobModel(**vars(job)))
866
903
  session.commit()
867
904
 
905
+ @enforce_types
906
+ def list_files_from_source(self, source_id: str, limit: int, cursor: Optional[str]):
907
+ with self.session_maker() as session:
908
+ # Start with the basic query filtered by source_id
909
+ query = session.query(FileMetadataModel).filter(FileMetadataModel.source_id == source_id)
910
+
911
+ if cursor:
912
+ # Assuming cursor is the ID of the last file in the previous page
913
+ query = query.filter(FileMetadataModel.id > cursor)
914
+
915
+ # Order by ID or other ordering criteria to ensure correct pagination
916
+ query = query.order_by(FileMetadataModel.id)
917
+
918
+ # Limit the number of results returned
919
+ results = query.limit(limit).all()
920
+
921
+ # Convert the results to the required FileMetadata objects
922
+ files = [r.to_record() for r in results]
923
+
924
+ return files
925
+
868
926
  def delete_job(self, job_id: str):
869
927
  with self.session_maker() as session:
870
928
  session.query(JobModel).filter(JobModel.id == job_id).delete()
letta/providers.py CHANGED
@@ -139,6 +139,50 @@ class AnthropicProvider(Provider):
139
139
  return []
140
140
 
141
141
 
142
+ class MistralProvider(Provider):
143
+ name: str = "mistral"
144
+ api_key: str = Field(..., description="API key for the Mistral API.")
145
+ base_url: str = "https://api.mistral.ai/v1"
146
+
147
+ def list_llm_models(self) -> List[LLMConfig]:
148
+ from letta.llm_api.mistral import mistral_get_model_list
149
+
150
+ # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
151
+ # See: https://openrouter.ai/docs/requests
152
+ response = mistral_get_model_list(self.base_url, api_key=self.api_key)
153
+
154
+ assert "data" in response, f"Mistral model query response missing 'data' field: {response}"
155
+
156
+ configs = []
157
+ for model in response["data"]:
158
+ # If model has chat completions and function calling enabled
159
+ if model["capabilities"]["completion_chat"] and model["capabilities"]["function_calling"]:
160
+ configs.append(
161
+ LLMConfig(
162
+ model=model["id"],
163
+ model_endpoint_type="openai",
164
+ model_endpoint=self.base_url,
165
+ context_window=model["max_context_length"],
166
+ )
167
+ )
168
+
169
+ return configs
170
+
171
+ def list_embedding_models(self) -> List[EmbeddingConfig]:
172
+ # Not supported for mistral
173
+ return []
174
+
175
+ def get_model_context_window(self, model_name: str) -> Optional[int]:
176
+ # Redoing this is fine because it's a pretty lightweight call
177
+ models = self.list_llm_models()
178
+
179
+ for m in models:
180
+ if model_name in m["id"]:
181
+ return int(m["max_context_length"])
182
+
183
+ return None
184
+
185
+
142
186
  class OllamaProvider(OpenAIProvider):
143
187
  """Ollama provider that uses the native /api/generate endpoint
144
188