letta-nightly 0.8.10.dev20250707035305__py3-none-any.whl → 0.8.11.dev20250708000504__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/constants.py +1 -0
- letta/helpers/pinecone_utils.py +12 -2
- letta/jobs/scheduler.py +45 -91
- letta/schemas/source_metadata.py +32 -0
- letta/server/db.py +31 -0
- letta/server/rest_api/routers/v1/sources.py +23 -6
- letta/services/file_manager.py +70 -0
- letta/services/file_processor/chunker/line_chunker.py +32 -6
- letta/services/file_processor/chunker/llama_index_chunker.py +134 -84
- letta/services/file_processor/embedder/openai_embedder.py +1 -1
- letta/services/file_processor/file_processor.py +59 -15
- letta/services/tool_executor/files_tool_executor.py +1 -1
- letta/settings.py +1 -1
- {letta_nightly-0.8.10.dev20250707035305.dist-info → letta_nightly-0.8.11.dev20250708000504.dist-info}/METADATA +1 -1
- {letta_nightly-0.8.10.dev20250707035305.dist-info → letta_nightly-0.8.11.dev20250708000504.dist-info}/RECORD +19 -18
- {letta_nightly-0.8.10.dev20250707035305.dist-info → letta_nightly-0.8.11.dev20250708000504.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.10.dev20250707035305.dist-info → letta_nightly-0.8.11.dev20250708000504.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.10.dev20250707035305.dist-info → letta_nightly-0.8.11.dev20250708000504.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/constants.py
CHANGED
letta/helpers/pinecone_utils.py
CHANGED
|
@@ -2,7 +2,14 @@ from typing import Any, Dict, List
|
|
|
2
2
|
|
|
3
3
|
from pinecone import PineconeAsyncio
|
|
4
4
|
|
|
5
|
-
from letta.constants import
|
|
5
|
+
from letta.constants import (
|
|
6
|
+
PINECONE_CLOUD,
|
|
7
|
+
PINECONE_EMBEDDING_MODEL,
|
|
8
|
+
PINECONE_MAX_BATCH_SIZE,
|
|
9
|
+
PINECONE_METRIC,
|
|
10
|
+
PINECONE_REGION,
|
|
11
|
+
PINECONE_TEXT_FIELD_NAME,
|
|
12
|
+
)
|
|
6
13
|
from letta.log import get_logger
|
|
7
14
|
from letta.schemas.user import User
|
|
8
15
|
from letta.settings import settings
|
|
@@ -90,7 +97,10 @@ async def upsert_records_to_pinecone_index(records: List[dict], actor: User):
|
|
|
90
97
|
async with PineconeAsyncio(api_key=settings.pinecone_api_key) as pc:
|
|
91
98
|
description = await pc.describe_index(name=settings.pinecone_source_index)
|
|
92
99
|
async with pc.IndexAsyncio(host=description.index.host) as dense_index:
|
|
93
|
-
|
|
100
|
+
# Process records in batches to avoid exceeding Pinecone limits
|
|
101
|
+
for i in range(0, len(records), PINECONE_MAX_BATCH_SIZE):
|
|
102
|
+
batch = records[i : i + PINECONE_MAX_BATCH_SIZE]
|
|
103
|
+
await dense_index.upsert_records(actor.organization_id, batch)
|
|
94
104
|
|
|
95
105
|
|
|
96
106
|
async def search_pinecone_index(query: str, limit: int, filter: Dict[str, Any], actor: User) -> Dict[str, Any]:
|
letta/jobs/scheduler.py
CHANGED
|
@@ -4,10 +4,11 @@ from typing import Optional
|
|
|
4
4
|
|
|
5
5
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
6
6
|
from apscheduler.triggers.interval import IntervalTrigger
|
|
7
|
+
from sqlalchemy import text
|
|
7
8
|
|
|
8
9
|
from letta.jobs.llm_batch_job_polling import poll_running_llm_batches
|
|
9
10
|
from letta.log import get_logger
|
|
10
|
-
from letta.server.db import
|
|
11
|
+
from letta.server.db import db_registry
|
|
11
12
|
from letta.server.server import SyncServer
|
|
12
13
|
from letta.settings import settings
|
|
13
14
|
|
|
@@ -16,68 +17,54 @@ scheduler = AsyncIOScheduler()
|
|
|
16
17
|
logger = get_logger(__name__)
|
|
17
18
|
ADVISORY_LOCK_KEY = 0x12345678ABCDEF00
|
|
18
19
|
|
|
19
|
-
|
|
20
|
-
_advisory_lock_cur = None # Holds the cursor for the lock connection if leader
|
|
20
|
+
_advisory_lock_session = None # Holds the async session if leader
|
|
21
21
|
_lock_retry_task: Optional[asyncio.Task] = None # Background task handle for non-leaders
|
|
22
22
|
_is_scheduler_leader = False # Flag indicating if this instance runs the scheduler
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
async def _try_acquire_lock_and_start_scheduler(server: SyncServer) -> bool:
|
|
26
26
|
"""Attempts to acquire lock, starts scheduler if successful."""
|
|
27
|
-
global
|
|
27
|
+
global _advisory_lock_session, _is_scheduler_leader, scheduler
|
|
28
28
|
|
|
29
29
|
if _is_scheduler_leader:
|
|
30
30
|
return True # Already leading
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
cur = None
|
|
32
|
+
lock_session = None
|
|
34
33
|
acquired_lock = False
|
|
35
34
|
try:
|
|
36
|
-
|
|
37
|
-
with db_context() as session:
|
|
35
|
+
async with db_registry.async_session() as session:
|
|
38
36
|
engine = session.get_bind()
|
|
39
37
|
engine_name = engine.name
|
|
40
38
|
logger.info(f"Database engine type: {engine_name}")
|
|
41
|
-
|
|
42
39
|
if engine_name != "postgresql":
|
|
43
40
|
logger.warning(f"Advisory locks not supported for {engine_name} database. Starting scheduler without leader election.")
|
|
44
|
-
acquired_lock = True
|
|
41
|
+
acquired_lock = True
|
|
45
42
|
else:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
acquired_lock = cur.fetchone()[0]
|
|
43
|
+
lock_session = db_registry.get_async_session_factory()()
|
|
44
|
+
result = await lock_session.execute(
|
|
45
|
+
text("SELECT pg_try_advisory_lock(CAST(:lock_key AS bigint))"), {"lock_key": ADVISORY_LOCK_KEY}
|
|
46
|
+
)
|
|
47
|
+
acquired_lock = result.scalar()
|
|
52
48
|
|
|
53
49
|
if not acquired_lock:
|
|
54
|
-
if
|
|
55
|
-
|
|
56
|
-
if raw_conn:
|
|
57
|
-
raw_conn.close()
|
|
50
|
+
if lock_session:
|
|
51
|
+
await lock_session.close()
|
|
58
52
|
logger.info("Scheduler lock held by another instance.")
|
|
59
53
|
return False
|
|
60
54
|
|
|
61
|
-
# --- Lock Acquired ---
|
|
62
55
|
if engine_name == "postgresql":
|
|
63
56
|
logger.info("Acquired PostgreSQL advisory lock.")
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
raw_conn = None # Prevent closing in finally block
|
|
67
|
-
cur = None # Prevent closing in finally block
|
|
57
|
+
_advisory_lock_session = lock_session
|
|
58
|
+
lock_session = None
|
|
68
59
|
else:
|
|
69
60
|
logger.info("Starting scheduler for non-PostgreSQL database.")
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
if raw_conn:
|
|
74
|
-
raw_conn.close()
|
|
75
|
-
raw_conn = None
|
|
76
|
-
cur = None
|
|
61
|
+
if lock_session:
|
|
62
|
+
await lock_session.close()
|
|
63
|
+
lock_session = None
|
|
77
64
|
|
|
78
65
|
trigger = IntervalTrigger(
|
|
79
66
|
seconds=settings.poll_running_llm_batches_interval_seconds,
|
|
80
|
-
jitter=10,
|
|
67
|
+
jitter=10,
|
|
81
68
|
)
|
|
82
69
|
scheduler.add_job(
|
|
83
70
|
poll_running_llm_batches,
|
|
@@ -91,7 +78,7 @@ async def _try_acquire_lock_and_start_scheduler(server: SyncServer) -> bool:
|
|
|
91
78
|
|
|
92
79
|
if not scheduler.running:
|
|
93
80
|
scheduler.start()
|
|
94
|
-
elif scheduler.state == 2:
|
|
81
|
+
elif scheduler.state == 2:
|
|
95
82
|
scheduler.resume()
|
|
96
83
|
|
|
97
84
|
_is_scheduler_leader = True
|
|
@@ -99,38 +86,27 @@ async def _try_acquire_lock_and_start_scheduler(server: SyncServer) -> bool:
|
|
|
99
86
|
|
|
100
87
|
except Exception as e:
|
|
101
88
|
logger.error(f"Error during lock acquisition/scheduler start: {e}", exc_info=True)
|
|
102
|
-
if acquired_lock:
|
|
89
|
+
if acquired_lock:
|
|
103
90
|
logger.warning("Attempting to release lock due to error during startup.")
|
|
104
91
|
try:
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
_advisory_lock_conn = raw_conn
|
|
108
|
-
await _release_advisory_lock() # Attempt cleanup
|
|
92
|
+
_advisory_lock_session = lock_session
|
|
93
|
+
await _release_advisory_lock()
|
|
109
94
|
except Exception as unlock_err:
|
|
110
95
|
logger.error(f"Failed to release lock during error handling: {unlock_err}", exc_info=True)
|
|
111
96
|
finally:
|
|
112
|
-
|
|
113
|
-
_advisory_lock_cur = None
|
|
114
|
-
_advisory_lock_conn = None
|
|
97
|
+
_advisory_lock_session = None
|
|
115
98
|
_is_scheduler_leader = False
|
|
116
99
|
|
|
117
|
-
# Ensure scheduler is stopped if we failed partially
|
|
118
100
|
if scheduler.running:
|
|
119
101
|
try:
|
|
120
102
|
scheduler.shutdown(wait=False)
|
|
121
103
|
except:
|
|
122
|
-
pass
|
|
104
|
+
pass
|
|
123
105
|
return False
|
|
124
106
|
finally:
|
|
125
|
-
|
|
126
|
-
if cur:
|
|
127
|
-
try:
|
|
128
|
-
cur.close()
|
|
129
|
-
except:
|
|
130
|
-
pass
|
|
131
|
-
if raw_conn:
|
|
107
|
+
if lock_session:
|
|
132
108
|
try:
|
|
133
|
-
|
|
109
|
+
await lock_session.close()
|
|
134
110
|
except:
|
|
135
111
|
pass
|
|
136
112
|
|
|
@@ -141,63 +117,50 @@ async def _background_lock_retry_loop(server: SyncServer):
|
|
|
141
117
|
logger.info("Starting background task to periodically check for scheduler lock.")
|
|
142
118
|
|
|
143
119
|
while True:
|
|
144
|
-
if _is_scheduler_leader:
|
|
120
|
+
if _is_scheduler_leader:
|
|
145
121
|
break
|
|
146
122
|
try:
|
|
147
123
|
wait_time = settings.poll_lock_retry_interval_seconds
|
|
148
124
|
await asyncio.sleep(wait_time)
|
|
149
125
|
|
|
150
|
-
# Re-check state before attempting lock
|
|
151
126
|
if _is_scheduler_leader or _lock_retry_task is None:
|
|
152
|
-
break
|
|
127
|
+
break
|
|
153
128
|
|
|
154
129
|
acquired = await _try_acquire_lock_and_start_scheduler(server)
|
|
155
130
|
if acquired:
|
|
156
131
|
logger.info("Background task acquired lock and started scheduler.")
|
|
157
|
-
_lock_retry_task = None
|
|
158
|
-
break
|
|
132
|
+
_lock_retry_task = None
|
|
133
|
+
break
|
|
159
134
|
|
|
160
135
|
except asyncio.CancelledError:
|
|
161
136
|
logger.info("Background lock retry task cancelled.")
|
|
162
137
|
break
|
|
163
138
|
except Exception as e:
|
|
164
139
|
logger.error(f"Error in background lock retry loop: {e}", exc_info=True)
|
|
165
|
-
# Avoid tight loop on persistent errors
|
|
166
140
|
await asyncio.sleep(settings.poll_lock_retry_interval_seconds)
|
|
167
141
|
|
|
168
142
|
|
|
169
143
|
async def _release_advisory_lock():
|
|
170
|
-
"""Releases the advisory lock using the stored
|
|
171
|
-
global
|
|
144
|
+
"""Releases the advisory lock using the stored session."""
|
|
145
|
+
global _advisory_lock_session
|
|
172
146
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
_advisory_lock_cur = None # Clear global immediately
|
|
176
|
-
_advisory_lock_conn = None # Clear global immediately
|
|
147
|
+
lock_session = _advisory_lock_session
|
|
148
|
+
_advisory_lock_session = None
|
|
177
149
|
|
|
178
|
-
if
|
|
150
|
+
if lock_session is not None:
|
|
179
151
|
logger.info(f"Attempting to release PostgreSQL advisory lock {ADVISORY_LOCK_KEY}")
|
|
180
152
|
try:
|
|
181
|
-
|
|
182
|
-
lock_cur.execute("SELECT pg_advisory_unlock(CAST(%s AS bigint))", (ADVISORY_LOCK_KEY,))
|
|
183
|
-
lock_cur.fetchone() # Consume result
|
|
184
|
-
lock_conn.commit()
|
|
153
|
+
await lock_session.execute(text("SELECT pg_advisory_unlock(CAST(:lock_key AS bigint))"), {"lock_key": ADVISORY_LOCK_KEY})
|
|
185
154
|
logger.info(f"Executed pg_advisory_unlock for lock {ADVISORY_LOCK_KEY}")
|
|
186
155
|
except Exception as e:
|
|
187
156
|
logger.error(f"Error executing pg_advisory_unlock: {e}", exc_info=True)
|
|
188
157
|
finally:
|
|
189
|
-
# Ensure resources are closed regardless of unlock success
|
|
190
|
-
try:
|
|
191
|
-
if lock_cur:
|
|
192
|
-
lock_cur.close()
|
|
193
|
-
except Exception as e:
|
|
194
|
-
logger.error(f"Error closing advisory lock cursor: {e}", exc_info=True)
|
|
195
158
|
try:
|
|
196
|
-
if
|
|
197
|
-
|
|
198
|
-
logger.info("Closed database
|
|
159
|
+
if lock_session:
|
|
160
|
+
await lock_session.close()
|
|
161
|
+
logger.info("Closed database session that held advisory lock.")
|
|
199
162
|
except Exception as e:
|
|
200
|
-
logger.error(f"Error closing advisory lock
|
|
163
|
+
logger.error(f"Error closing advisory lock session: {e}", exc_info=True)
|
|
201
164
|
else:
|
|
202
165
|
logger.info("No PostgreSQL advisory lock to release (likely using SQLite or non-PostgreSQL database).")
|
|
203
166
|
|
|
@@ -220,7 +183,6 @@ async def start_scheduler_with_leader_election(server: SyncServer):
|
|
|
220
183
|
acquired_immediately = await _try_acquire_lock_and_start_scheduler(server)
|
|
221
184
|
|
|
222
185
|
if not acquired_immediately and _lock_retry_task is None:
|
|
223
|
-
# Failed initial attempt, start background retry task
|
|
224
186
|
loop = asyncio.get_running_loop()
|
|
225
187
|
_lock_retry_task = loop.create_task(_background_lock_retry_loop(server))
|
|
226
188
|
|
|
@@ -232,48 +194,40 @@ async def shutdown_scheduler_and_release_lock():
|
|
|
232
194
|
"""
|
|
233
195
|
global _is_scheduler_leader, _lock_retry_task, scheduler
|
|
234
196
|
|
|
235
|
-
# 1. Cancel retry task if running (for non-leaders)
|
|
236
197
|
if _lock_retry_task is not None:
|
|
237
198
|
logger.info("Shutting down: Cancelling background lock retry task.")
|
|
238
199
|
current_task = _lock_retry_task
|
|
239
|
-
_lock_retry_task = None
|
|
200
|
+
_lock_retry_task = None
|
|
240
201
|
current_task.cancel()
|
|
241
202
|
try:
|
|
242
|
-
await current_task
|
|
203
|
+
await current_task
|
|
243
204
|
except asyncio.CancelledError:
|
|
244
205
|
logger.info("Background lock retry task successfully cancelled.")
|
|
245
206
|
except Exception as e:
|
|
246
207
|
logger.warning(f"Exception waiting for cancelled retry task: {e}", exc_info=True)
|
|
247
208
|
|
|
248
|
-
# 2. Shutdown scheduler and release lock if we were the leader
|
|
249
209
|
if _is_scheduler_leader:
|
|
250
210
|
logger.info("Shutting down: Leader instance stopping scheduler and releasing lock.")
|
|
251
211
|
if scheduler.running:
|
|
252
212
|
try:
|
|
253
|
-
# Force synchronous shutdown to prevent callback scheduling
|
|
254
213
|
scheduler.shutdown(wait=True)
|
|
255
214
|
|
|
256
|
-
# wait for any internal cleanup to complete
|
|
257
215
|
await asyncio.sleep(0.1)
|
|
258
216
|
|
|
259
217
|
logger.info("APScheduler shut down.")
|
|
260
218
|
except Exception as e:
|
|
261
|
-
# Handle SchedulerNotRunningError and other shutdown exceptions
|
|
262
219
|
logger.warning(f"Exception during APScheduler shutdown: {e}")
|
|
263
220
|
if "not running" not in str(e).lower():
|
|
264
221
|
logger.error(f"Unexpected error shutting down APScheduler: {e}", exc_info=True)
|
|
265
222
|
|
|
266
223
|
await _release_advisory_lock()
|
|
267
|
-
_is_scheduler_leader = False
|
|
224
|
+
_is_scheduler_leader = False
|
|
268
225
|
else:
|
|
269
226
|
logger.info("Shutting down: Non-leader instance.")
|
|
270
227
|
|
|
271
|
-
# Final cleanup check for scheduler state (belt and suspenders)
|
|
272
|
-
# This should rarely be needed if shutdown logic above worked correctly
|
|
273
228
|
try:
|
|
274
229
|
if scheduler.running:
|
|
275
230
|
logger.warning("Scheduler still running after shutdown logic completed? Forcing shutdown.")
|
|
276
231
|
scheduler.shutdown(wait=False)
|
|
277
232
|
except Exception as e:
|
|
278
|
-
# Catch SchedulerNotRunningError and other shutdown exceptions
|
|
279
233
|
logger.debug(f"Expected exception during final scheduler cleanup: {e}")
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from letta.schemas.letta_base import LettaBase
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FileStats(LettaBase):
|
|
9
|
+
"""File statistics for metadata endpoint"""
|
|
10
|
+
|
|
11
|
+
file_id: str = Field(..., description="Unique identifier of the file")
|
|
12
|
+
file_name: str = Field(..., description="Name of the file")
|
|
13
|
+
file_size: Optional[int] = Field(None, description="Size of the file in bytes")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SourceStats(LettaBase):
|
|
17
|
+
"""Aggregated metadata for a source"""
|
|
18
|
+
|
|
19
|
+
source_id: str = Field(..., description="Unique identifier of the source")
|
|
20
|
+
source_name: str = Field(..., description="Name of the source")
|
|
21
|
+
file_count: int = Field(0, description="Number of files in the source")
|
|
22
|
+
total_size: int = Field(0, description="Total size of all files in bytes")
|
|
23
|
+
files: List[FileStats] = Field(default_factory=list, description="List of file statistics")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class OrganizationSourcesStats(LettaBase):
|
|
27
|
+
"""Complete metadata response for organization sources"""
|
|
28
|
+
|
|
29
|
+
total_sources: int = Field(0, description="Total number of sources")
|
|
30
|
+
total_files: int = Field(0, description="Total number of files across all sources")
|
|
31
|
+
total_size: int = Field(0, description="Total size of all files in bytes")
|
|
32
|
+
sources: List[SourceStats] = Field(default_factory=list, description="List of source metadata")
|
letta/server/db.py
CHANGED
|
@@ -226,6 +226,32 @@ class DatabaseRegistry:
|
|
|
226
226
|
@contextmanager
|
|
227
227
|
def session(self, name: str = "default") -> Generator[Any, None, None]:
|
|
228
228
|
"""Context manager for database sessions."""
|
|
229
|
+
caller_info = "unknown caller"
|
|
230
|
+
try:
|
|
231
|
+
import inspect
|
|
232
|
+
|
|
233
|
+
frame = inspect.currentframe()
|
|
234
|
+
stack = inspect.getouterframes(frame)
|
|
235
|
+
|
|
236
|
+
for i, frame_info in enumerate(stack):
|
|
237
|
+
module = inspect.getmodule(frame_info.frame)
|
|
238
|
+
module_name = module.__name__ if module else "unknown"
|
|
239
|
+
|
|
240
|
+
if module_name != "contextlib" and "db.py" not in frame_info.filename:
|
|
241
|
+
caller_module = module_name
|
|
242
|
+
caller_function = frame_info.function
|
|
243
|
+
caller_lineno = frame_info.lineno
|
|
244
|
+
caller_file = frame_info.filename.split("/")[-1]
|
|
245
|
+
|
|
246
|
+
caller_info = f"{caller_module}.{caller_function}:{caller_lineno} ({caller_file})"
|
|
247
|
+
break
|
|
248
|
+
except:
|
|
249
|
+
pass
|
|
250
|
+
finally:
|
|
251
|
+
del frame
|
|
252
|
+
|
|
253
|
+
self.session_caller_trace(caller_info)
|
|
254
|
+
|
|
229
255
|
session_factory = self.get_session_factory(name)
|
|
230
256
|
if not session_factory:
|
|
231
257
|
raise ValueError(f"No session factory found for '{name}'")
|
|
@@ -250,6 +276,11 @@ class DatabaseRegistry:
|
|
|
250
276
|
finally:
|
|
251
277
|
await session.close()
|
|
252
278
|
|
|
279
|
+
@trace_method
|
|
280
|
+
def session_caller_trace(self, caller_info: str):
|
|
281
|
+
"""Trace sync db caller information for debugging purposes."""
|
|
282
|
+
pass # wrapper used for otel tracing only
|
|
283
|
+
|
|
253
284
|
|
|
254
285
|
# Create a singleton instance
|
|
255
286
|
db_registry = DatabaseRegistry()
|
|
@@ -23,10 +23,10 @@ from letta.schemas.enums import FileProcessingStatus
|
|
|
23
23
|
from letta.schemas.file import FileMetadata
|
|
24
24
|
from letta.schemas.passage import Passage
|
|
25
25
|
from letta.schemas.source import Source, SourceCreate, SourceUpdate
|
|
26
|
+
from letta.schemas.source_metadata import OrganizationSourcesStats
|
|
26
27
|
from letta.schemas.user import User
|
|
27
28
|
from letta.server.rest_api.utils import get_letta_server
|
|
28
29
|
from letta.server.server import SyncServer
|
|
29
|
-
from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
|
|
30
30
|
from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
|
|
31
31
|
from letta.services.file_processor.embedder.pinecone_embedder import PineconeEmbedder
|
|
32
32
|
from letta.services.file_processor.file_processor import FileProcessor
|
|
@@ -95,6 +95,24 @@ async def get_source_id_by_name(
|
|
|
95
95
|
return source.id
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
@router.get("/metadata", response_model=OrganizationSourcesStats, operation_id="get_sources_metadata")
|
|
99
|
+
async def get_sources_metadata(
|
|
100
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
101
|
+
actor_id: Optional[str] = Header(None, alias="user_id"),
|
|
102
|
+
):
|
|
103
|
+
"""
|
|
104
|
+
Get aggregated metadata for all sources in an organization.
|
|
105
|
+
|
|
106
|
+
Returns structured metadata including:
|
|
107
|
+
- Total number of sources
|
|
108
|
+
- Total number of files across all sources
|
|
109
|
+
- Total size of all files
|
|
110
|
+
- Per-source breakdown with file details (file_name, file_size per file)
|
|
111
|
+
"""
|
|
112
|
+
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
|
113
|
+
return await server.file_manager.get_organization_sources_metadata(actor=actor)
|
|
114
|
+
|
|
115
|
+
|
|
98
116
|
@router.get("/", response_model=List[Source], operation_id="list_sources")
|
|
99
117
|
async def list_sources(
|
|
100
118
|
server: "SyncServer" = Depends(get_letta_server),
|
|
@@ -344,7 +362,9 @@ async def get_file_metadata(
|
|
|
344
362
|
|
|
345
363
|
if should_use_pinecone() and not file_metadata.is_processing_terminal():
|
|
346
364
|
ids = await list_pinecone_index_for_files(file_id=file_id, actor=actor, limit=file_metadata.total_chunks)
|
|
347
|
-
logger.info(
|
|
365
|
+
logger.info(
|
|
366
|
+
f"Embedded chunks {len(ids)}/{file_metadata.total_chunks} for {file_id} ({file_metadata.file_name}) in organization {actor.organization_id}"
|
|
367
|
+
)
|
|
348
368
|
|
|
349
369
|
if len(ids) != file_metadata.chunks_embedded or len(ids) == file_metadata.total_chunks:
|
|
350
370
|
if len(ids) != file_metadata.total_chunks:
|
|
@@ -424,15 +444,12 @@ async def load_file_to_source_cloud(
|
|
|
424
444
|
file_metadata: FileMetadata,
|
|
425
445
|
):
|
|
426
446
|
file_processor = MistralFileParser()
|
|
427
|
-
text_chunker = LlamaIndexChunker(chunk_size=embedding_config.embedding_chunk_size)
|
|
428
447
|
using_pinecone = should_use_pinecone()
|
|
429
448
|
if using_pinecone:
|
|
430
449
|
embedder = PineconeEmbedder()
|
|
431
450
|
else:
|
|
432
451
|
embedder = OpenAIEmbedder(embedding_config=embedding_config)
|
|
433
|
-
file_processor = FileProcessor(
|
|
434
|
-
file_parser=file_processor, text_chunker=text_chunker, embedder=embedder, actor=actor, using_pinecone=using_pinecone
|
|
435
|
-
)
|
|
452
|
+
file_processor = FileProcessor(file_parser=file_processor, embedder=embedder, actor=actor, using_pinecone=using_pinecone)
|
|
436
453
|
await file_processor.process(
|
|
437
454
|
server=server, agent_states=agent_states, source_id=source_id, content=content, file_metadata=file_metadata
|
|
438
455
|
)
|
letta/services/file_manager.py
CHANGED
|
@@ -16,6 +16,7 @@ from letta.otel.tracing import trace_method
|
|
|
16
16
|
from letta.schemas.enums import FileProcessingStatus
|
|
17
17
|
from letta.schemas.file import FileMetadata as PydanticFileMetadata
|
|
18
18
|
from letta.schemas.source import Source as PydanticSource
|
|
19
|
+
from letta.schemas.source_metadata import FileStats, OrganizationSourcesStats, SourceStats
|
|
19
20
|
from letta.schemas.user import User as PydanticUser
|
|
20
21
|
from letta.server.db import db_registry
|
|
21
22
|
from letta.utils import enforce_types
|
|
@@ -272,3 +273,72 @@ class FileManager:
|
|
|
272
273
|
else:
|
|
273
274
|
# Add numeric suffix
|
|
274
275
|
return f"{source.name}/{base}_({count}){ext}"
|
|
276
|
+
|
|
277
|
+
@enforce_types
|
|
278
|
+
@trace_method
|
|
279
|
+
async def get_organization_sources_metadata(self, actor: PydanticUser) -> OrganizationSourcesStats:
|
|
280
|
+
"""
|
|
281
|
+
Get aggregated metadata for all sources in an organization with optimized queries.
|
|
282
|
+
|
|
283
|
+
Returns structured metadata including:
|
|
284
|
+
- Total number of sources
|
|
285
|
+
- Total number of files across all sources
|
|
286
|
+
- Total size of all files
|
|
287
|
+
- Per-source breakdown with file details
|
|
288
|
+
"""
|
|
289
|
+
async with db_registry.async_session() as session:
|
|
290
|
+
# Import here to avoid circular imports
|
|
291
|
+
from letta.orm.source import Source as SourceModel
|
|
292
|
+
|
|
293
|
+
# Single optimized query to get all sources with their file aggregations
|
|
294
|
+
query = (
|
|
295
|
+
select(
|
|
296
|
+
SourceModel.id,
|
|
297
|
+
SourceModel.name,
|
|
298
|
+
func.count(FileMetadataModel.id).label("file_count"),
|
|
299
|
+
func.coalesce(func.sum(FileMetadataModel.file_size), 0).label("total_size"),
|
|
300
|
+
)
|
|
301
|
+
.outerjoin(FileMetadataModel, (FileMetadataModel.source_id == SourceModel.id) & (FileMetadataModel.is_deleted == False))
|
|
302
|
+
.where(SourceModel.organization_id == actor.organization_id)
|
|
303
|
+
.where(SourceModel.is_deleted == False)
|
|
304
|
+
.group_by(SourceModel.id, SourceModel.name)
|
|
305
|
+
.order_by(SourceModel.name)
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
result = await session.execute(query)
|
|
309
|
+
source_aggregations = result.fetchall()
|
|
310
|
+
|
|
311
|
+
# Build response
|
|
312
|
+
metadata = OrganizationSourcesStats()
|
|
313
|
+
|
|
314
|
+
for row in source_aggregations:
|
|
315
|
+
source_id, source_name, file_count, total_size = row
|
|
316
|
+
|
|
317
|
+
# Get individual file details for this source
|
|
318
|
+
files_query = (
|
|
319
|
+
select(FileMetadataModel.id, FileMetadataModel.file_name, FileMetadataModel.file_size)
|
|
320
|
+
.where(
|
|
321
|
+
FileMetadataModel.source_id == source_id,
|
|
322
|
+
FileMetadataModel.organization_id == actor.organization_id,
|
|
323
|
+
FileMetadataModel.is_deleted == False,
|
|
324
|
+
)
|
|
325
|
+
.order_by(FileMetadataModel.file_name)
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
files_result = await session.execute(files_query)
|
|
329
|
+
files_rows = files_result.fetchall()
|
|
330
|
+
|
|
331
|
+
# Build file stats
|
|
332
|
+
files = [FileStats(file_id=file_row[0], file_name=file_row[1], file_size=file_row[2]) for file_row in files_rows]
|
|
333
|
+
|
|
334
|
+
# Build source metadata
|
|
335
|
+
source_metadata = SourceStats(
|
|
336
|
+
source_id=source_id, source_name=source_name, file_count=file_count, total_size=total_size, files=files
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
metadata.sources.append(source_metadata)
|
|
340
|
+
metadata.total_files += file_count
|
|
341
|
+
metadata.total_size += total_size
|
|
342
|
+
|
|
343
|
+
metadata.total_sources = len(metadata.sources)
|
|
344
|
+
return metadata
|
|
@@ -99,7 +99,12 @@ class LineChunker:
|
|
|
99
99
|
return [line for line in lines if line.strip()]
|
|
100
100
|
|
|
101
101
|
def chunk_text(
|
|
102
|
-
self,
|
|
102
|
+
self,
|
|
103
|
+
file_metadata: FileMetadata,
|
|
104
|
+
start: Optional[int] = None,
|
|
105
|
+
end: Optional[int] = None,
|
|
106
|
+
add_metadata: bool = True,
|
|
107
|
+
validate_range: bool = False,
|
|
103
108
|
) -> List[str]:
|
|
104
109
|
"""Content-aware text chunking based on file type"""
|
|
105
110
|
strategy = self._determine_chunking_strategy(file_metadata)
|
|
@@ -116,11 +121,31 @@ class LineChunker:
|
|
|
116
121
|
content_lines = self._chunk_by_lines(text, preserve_indentation=False)
|
|
117
122
|
|
|
118
123
|
total_chunks = len(content_lines)
|
|
124
|
+
chunk_type = (
|
|
125
|
+
"sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Validate range if requested
|
|
129
|
+
if validate_range and (start is not None or end is not None):
|
|
130
|
+
if start is not None and start >= total_chunks:
|
|
131
|
+
# Convert to 1-indexed for user-friendly error message
|
|
132
|
+
start_display = start + 1
|
|
133
|
+
raise ValueError(
|
|
134
|
+
f"File {file_metadata.file_name} has only {total_chunks} lines, but requested offset {start_display} is out of range"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if start is not None and end is not None and end > total_chunks:
|
|
138
|
+
# Convert to 1-indexed for user-friendly error message
|
|
139
|
+
start_display = start + 1
|
|
140
|
+
end_display = end
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"File {file_metadata.file_name} has only {total_chunks} lines, but requested range {start_display} to {end_display} extends beyond file bounds"
|
|
143
|
+
)
|
|
119
144
|
|
|
120
145
|
# Handle start/end slicing
|
|
121
|
-
if start is not None
|
|
146
|
+
if start is not None or end is not None:
|
|
122
147
|
content_lines = content_lines[start:end]
|
|
123
|
-
line_offset = start
|
|
148
|
+
line_offset = start if start is not None else 0
|
|
124
149
|
else:
|
|
125
150
|
line_offset = 0
|
|
126
151
|
|
|
@@ -129,14 +154,15 @@ class LineChunker:
|
|
|
129
154
|
|
|
130
155
|
# Add metadata about total chunks
|
|
131
156
|
if add_metadata:
|
|
132
|
-
chunk_type = (
|
|
133
|
-
"sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
|
|
134
|
-
)
|
|
135
157
|
if start is not None and end is not None:
|
|
136
158
|
# Display 1-indexed ranges for users
|
|
137
159
|
start_display = start + 1
|
|
138
160
|
end_display = end
|
|
139
161
|
content_lines.insert(0, f"[Viewing {chunk_type} {start_display} to {end_display} (out of {total_chunks} {chunk_type})]")
|
|
162
|
+
elif start is not None:
|
|
163
|
+
# Only start specified - viewing from start to end
|
|
164
|
+
start_display = start + 1
|
|
165
|
+
content_lines.insert(0, f"[Viewing {chunk_type} {start_display} to end (out of {total_chunks} {chunk_type})]")
|
|
140
166
|
else:
|
|
141
167
|
content_lines.insert(0, f"[Viewing file start (out of {total_chunks} {chunk_type})]")
|
|
142
168
|
|
|
@@ -1,119 +1,169 @@
|
|
|
1
|
-
from typing import List,
|
|
1
|
+
from typing import List, Optional, Union
|
|
2
2
|
|
|
3
3
|
from mistralai import OCRPageObject
|
|
4
4
|
|
|
5
5
|
from letta.log import get_logger
|
|
6
6
|
from letta.otel.tracing import trace_method
|
|
7
|
+
from letta.services.file_processor.file_types import ChunkingStrategy, file_type_registry
|
|
7
8
|
|
|
8
9
|
logger = get_logger(__name__)
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class LlamaIndexChunker:
|
|
12
|
-
"""LlamaIndex-based text chunking"""
|
|
13
|
+
"""LlamaIndex-based text chunking with automatic splitter selection"""
|
|
13
14
|
|
|
14
|
-
|
|
15
|
+
# Conservative default chunk sizes for fallback scenarios
|
|
16
|
+
DEFAULT_CONSERVATIVE_CHUNK_SIZE = 384
|
|
17
|
+
DEFAULT_CONSERVATIVE_CHUNK_OVERLAP = 25
|
|
18
|
+
|
|
19
|
+
def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50, file_type: Optional[str] = None):
|
|
15
20
|
self.chunk_size = chunk_size
|
|
16
21
|
self.chunk_overlap = chunk_overlap
|
|
22
|
+
self.file_type = file_type
|
|
17
23
|
|
|
18
|
-
|
|
24
|
+
# Create appropriate parser based on file type
|
|
25
|
+
self.parser = self._create_parser_for_file_type(file_type, chunk_size, chunk_overlap)
|
|
19
26
|
|
|
20
|
-
|
|
27
|
+
# Log which parser was selected
|
|
28
|
+
parser_name = type(self.parser).__name__
|
|
29
|
+
logger.info(f"LlamaIndexChunker initialized with {parser_name} for file type: {file_type}")
|
|
30
|
+
|
|
31
|
+
def _create_parser_for_file_type(self, file_type: Optional[str], chunk_size: int, chunk_overlap: int):
|
|
32
|
+
"""Create appropriate parser based on file type"""
|
|
33
|
+
if not file_type:
|
|
34
|
+
# Default fallback
|
|
35
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
36
|
+
|
|
37
|
+
return SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
21
38
|
|
|
22
|
-
# TODO: Make this more general beyond Mistral
|
|
23
|
-
@trace_method
|
|
24
|
-
def chunk_text(self, page: OCRPageObject) -> List[str]:
|
|
25
|
-
"""Chunk text using LlamaIndex splitter"""
|
|
26
39
|
try:
|
|
27
|
-
|
|
40
|
+
# Get chunking strategy from file type registry
|
|
41
|
+
chunking_strategy = file_type_registry.get_chunking_strategy_by_mime_type(file_type)
|
|
42
|
+
logger.debug(f"Chunking strategy for {file_type}: {chunking_strategy}")
|
|
28
43
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
raise
|
|
44
|
+
if chunking_strategy == ChunkingStrategy.CODE:
|
|
45
|
+
from llama_index.core.node_parser import CodeSplitter
|
|
32
46
|
|
|
47
|
+
return CodeSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
33
48
|
|
|
34
|
-
|
|
35
|
-
|
|
49
|
+
elif chunking_strategy == ChunkingStrategy.DOCUMENTATION:
|
|
50
|
+
if file_type in ["text/markdown", "text/x-markdown"]:
|
|
51
|
+
from llama_index.core.node_parser import MarkdownNodeParser
|
|
36
52
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
53
|
+
return MarkdownNodeParser()
|
|
54
|
+
elif file_type in ["text/html"]:
|
|
55
|
+
from llama_index.core.node_parser import HTMLNodeParser
|
|
56
|
+
|
|
57
|
+
return HTMLNodeParser()
|
|
58
|
+
else:
|
|
59
|
+
# Fall back to sentence splitter for other documentation
|
|
60
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
61
|
+
|
|
62
|
+
return SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
63
|
+
|
|
64
|
+
elif chunking_strategy == ChunkingStrategy.STRUCTURED_DATA:
|
|
65
|
+
if file_type in ["application/json", "application/jsonl"]:
|
|
66
|
+
from llama_index.core.node_parser import JSONNodeParser
|
|
40
67
|
|
|
41
|
-
|
|
68
|
+
return JSONNodeParser()
|
|
69
|
+
else:
|
|
70
|
+
# Fall back to sentence splitter for other structured data
|
|
71
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
42
72
|
|
|
43
|
-
|
|
73
|
+
return SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
44
74
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
75
|
+
else:
|
|
76
|
+
# Default to sentence splitter for PROSE and LINE_BASED
|
|
77
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
48
78
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
79
|
+
return SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.warning(f"Failed to create specialized parser for {file_type}: {str(e)}. Using default SentenceSplitter.")
|
|
83
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
84
|
+
|
|
85
|
+
return SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
86
|
+
|
|
87
|
+
@trace_method
|
|
88
|
+
def chunk_text(self, content: Union[OCRPageObject, str]) -> List[str]:
|
|
89
|
+
"""Chunk text using LlamaIndex splitter"""
|
|
52
90
|
try:
|
|
53
|
-
#
|
|
54
|
-
|
|
91
|
+
# Handle different input types
|
|
92
|
+
if isinstance(content, OCRPageObject):
|
|
93
|
+
# Extract markdown from OCR page object
|
|
94
|
+
text_content = content.markdown
|
|
95
|
+
else:
|
|
96
|
+
# Assume it's a string
|
|
97
|
+
text_content = content
|
|
98
|
+
|
|
99
|
+
# Use the selected parser
|
|
100
|
+
if hasattr(self.parser, "split_text"):
|
|
101
|
+
# Most parsers have split_text method
|
|
102
|
+
return self.parser.split_text(text_content)
|
|
103
|
+
elif hasattr(self.parser, "get_nodes_from_documents"):
|
|
104
|
+
# Some parsers need Document objects
|
|
105
|
+
from llama_index.core import Document
|
|
106
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
107
|
+
|
|
108
|
+
document = Document(text=text_content)
|
|
109
|
+
nodes = self.parser.get_nodes_from_documents([document])
|
|
110
|
+
|
|
111
|
+
# Further split nodes that exceed chunk_size using SentenceSplitter
|
|
112
|
+
final_chunks = []
|
|
113
|
+
sentence_splitter = SentenceSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
|
|
114
|
+
|
|
115
|
+
for node in nodes:
|
|
116
|
+
if len(node.text) > self.chunk_size:
|
|
117
|
+
# Split oversized nodes with sentence splitter
|
|
118
|
+
sub_chunks = sentence_splitter.split_text(node.text)
|
|
119
|
+
final_chunks.extend(sub_chunks)
|
|
120
|
+
else:
|
|
121
|
+
final_chunks.append(node.text)
|
|
122
|
+
|
|
123
|
+
return final_chunks
|
|
124
|
+
else:
|
|
125
|
+
# Fallback - try to call the parser directly
|
|
126
|
+
return self.parser(text_content)
|
|
55
127
|
|
|
56
|
-
|
|
57
|
-
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.error(f"Chunking failed with {type(self.parser).__name__}: {str(e)}")
|
|
130
|
+
# Try fallback with SentenceSplitter
|
|
131
|
+
try:
|
|
132
|
+
logger.info("Attempting fallback to SentenceSplitter")
|
|
133
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
58
134
|
|
|
59
|
-
|
|
60
|
-
nodes = self.parser.get_nodes_from_documents([document])
|
|
135
|
+
fallback_parser = SentenceSplitter(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
|
|
61
136
|
|
|
62
|
-
|
|
137
|
+
# Extract text content if needed
|
|
138
|
+
if isinstance(content, OCRPageObject):
|
|
139
|
+
text_content = content.markdown
|
|
140
|
+
else:
|
|
141
|
+
text_content = content
|
|
63
142
|
|
|
64
|
-
|
|
65
|
-
|
|
143
|
+
return fallback_parser.split_text(text_content)
|
|
144
|
+
except Exception as fallback_error:
|
|
145
|
+
logger.error(f"Fallback chunking also failed: {str(fallback_error)}")
|
|
146
|
+
raise e # Raise the original error
|
|
66
147
|
|
|
67
|
-
|
|
68
|
-
|
|
148
|
+
@trace_method
|
|
149
|
+
def default_chunk_text(self, content: Union[OCRPageObject, str], chunk_size: int = None, chunk_overlap: int = None) -> List[str]:
|
|
150
|
+
"""Chunk text using default SentenceSplitter regardless of file type with conservative defaults"""
|
|
151
|
+
try:
|
|
152
|
+
from llama_index.core.node_parser import SentenceSplitter
|
|
69
153
|
|
|
70
|
-
|
|
154
|
+
# Use provided defaults or fallback to conservative values
|
|
155
|
+
chunk_size = chunk_size if chunk_size is not None else self.DEFAULT_CONSERVATIVE_CHUNK_SIZE
|
|
156
|
+
chunk_overlap = chunk_overlap if chunk_overlap is not None else self.DEFAULT_CONSERVATIVE_CHUNK_OVERLAP
|
|
157
|
+
default_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
71
158
|
|
|
72
|
-
|
|
159
|
+
# Handle different input types
|
|
160
|
+
if isinstance(content, OCRPageObject):
|
|
161
|
+
text_content = content.markdown
|
|
162
|
+
else:
|
|
163
|
+
text_content = content
|
|
164
|
+
|
|
165
|
+
return default_parser.split_text(text_content)
|
|
73
166
|
|
|
74
167
|
except Exception as e:
|
|
75
|
-
logger.error(f"
|
|
76
|
-
|
|
77
|
-
return self._fallback_line_chunking(markdown_content)
|
|
78
|
-
|
|
79
|
-
def _find_line_numbers(self, chunk_text: str, lines: List[str]) -> Tuple[int, int]:
|
|
80
|
-
"""Find the start and end line numbers for a given chunk of text."""
|
|
81
|
-
chunk_lines = chunk_text.split("\n")
|
|
82
|
-
|
|
83
|
-
# Find the first line of the chunk in the original document
|
|
84
|
-
start_line = 1
|
|
85
|
-
for i, line in enumerate(lines):
|
|
86
|
-
if chunk_lines[0].strip() in line.strip() and len(chunk_lines[0].strip()) > 10: # Avoid matching short lines
|
|
87
|
-
start_line = i + 1
|
|
88
|
-
break
|
|
89
|
-
|
|
90
|
-
# Calculate end line
|
|
91
|
-
end_line = start_line + len(chunk_lines) - 1
|
|
92
|
-
|
|
93
|
-
return start_line, min(end_line, len(lines))
|
|
94
|
-
|
|
95
|
-
def _fallback_line_chunking(self, markdown_content: str) -> List[Tuple[str, int, int]]:
|
|
96
|
-
"""Fallback chunking method that simply splits by lines with no overlap."""
|
|
97
|
-
lines = markdown_content.split("\n")
|
|
98
|
-
chunks = []
|
|
99
|
-
|
|
100
|
-
i = 0
|
|
101
|
-
while i < len(lines):
|
|
102
|
-
chunk_lines = []
|
|
103
|
-
start_line = i + 1
|
|
104
|
-
char_count = 0
|
|
105
|
-
|
|
106
|
-
# Build chunk until we hit size limit
|
|
107
|
-
while i < len(lines) and char_count < self.chunk_size:
|
|
108
|
-
line = lines[i]
|
|
109
|
-
chunk_lines.append(line)
|
|
110
|
-
char_count += len(line) + 1 # +1 for newline
|
|
111
|
-
i += 1
|
|
112
|
-
|
|
113
|
-
end_line = i
|
|
114
|
-
chunk_text = "\n".join(chunk_lines)
|
|
115
|
-
chunks.append((chunk_text, start_line, end_line))
|
|
116
|
-
|
|
117
|
-
# No overlap - continue from where we left off
|
|
118
|
-
|
|
119
|
-
return chunks
|
|
168
|
+
logger.error(f"Default chunking failed: {str(e)}")
|
|
169
|
+
raise
|
|
@@ -91,7 +91,7 @@ class OpenAIEmbedder(BaseEmbedder):
|
|
|
91
91
|
try:
|
|
92
92
|
return await self._embed_batch(batch, indices)
|
|
93
93
|
except Exception as e:
|
|
94
|
-
logger.error(
|
|
94
|
+
logger.error("Failed to embed batch of size %s: %s", len(batch), e)
|
|
95
95
|
log_event("embedder.batch_failed", {"batch_size": len(batch), "error": str(e), "error_type": type(e).__name__})
|
|
96
96
|
raise
|
|
97
97
|
|
|
@@ -26,14 +26,12 @@ class FileProcessor:
|
|
|
26
26
|
def __init__(
|
|
27
27
|
self,
|
|
28
28
|
file_parser: MistralFileParser,
|
|
29
|
-
text_chunker: LlamaIndexChunker,
|
|
30
29
|
embedder: BaseEmbedder,
|
|
31
30
|
actor: User,
|
|
32
31
|
using_pinecone: bool,
|
|
33
32
|
max_file_size: int = 50 * 1024 * 1024, # 50MB default
|
|
34
33
|
):
|
|
35
34
|
self.file_parser = file_parser
|
|
36
|
-
self.text_chunker = text_chunker
|
|
37
35
|
self.line_chunker = LineChunker()
|
|
38
36
|
self.embedder = embedder
|
|
39
37
|
self.max_file_size = max_file_size
|
|
@@ -44,6 +42,61 @@ class FileProcessor:
|
|
|
44
42
|
self.actor = actor
|
|
45
43
|
self.using_pinecone = using_pinecone
|
|
46
44
|
|
|
45
|
+
async def _chunk_and_embed_with_fallback(self, file_metadata: FileMetadata, ocr_response, source_id: str) -> List:
|
|
46
|
+
"""Chunk text and generate embeddings with fallback to default chunker if needed"""
|
|
47
|
+
filename = file_metadata.file_name
|
|
48
|
+
|
|
49
|
+
# Create file-type-specific chunker
|
|
50
|
+
text_chunker = LlamaIndexChunker(file_type=file_metadata.file_type)
|
|
51
|
+
|
|
52
|
+
# First attempt with file-specific chunker
|
|
53
|
+
try:
|
|
54
|
+
all_chunks = []
|
|
55
|
+
for page in ocr_response.pages:
|
|
56
|
+
chunks = text_chunker.chunk_text(page)
|
|
57
|
+
if not chunks:
|
|
58
|
+
log_event("file_processor.chunking_failed", {"filename": filename, "page_index": ocr_response.pages.index(page)})
|
|
59
|
+
raise ValueError("No chunks created from text")
|
|
60
|
+
all_chunks.extend(chunks)
|
|
61
|
+
|
|
62
|
+
all_passages = await self.embedder.generate_embedded_passages(
|
|
63
|
+
file_id=file_metadata.id, source_id=source_id, chunks=all_chunks, actor=self.actor
|
|
64
|
+
)
|
|
65
|
+
return all_passages
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.warning(f"Failed to chunk/embed with file-specific chunker for {filename}: {str(e)}. Retrying with default chunker.")
|
|
69
|
+
log_event("file_processor.embedding_failed_retrying", {"filename": filename, "error": str(e), "error_type": type(e).__name__})
|
|
70
|
+
|
|
71
|
+
# Retry with default chunker
|
|
72
|
+
try:
|
|
73
|
+
logger.info(f"Retrying chunking with default SentenceSplitter for {filename}")
|
|
74
|
+
all_chunks = []
|
|
75
|
+
|
|
76
|
+
for page in ocr_response.pages:
|
|
77
|
+
chunks = text_chunker.default_chunk_text(page)
|
|
78
|
+
if not chunks:
|
|
79
|
+
log_event(
|
|
80
|
+
"file_processor.default_chunking_failed", {"filename": filename, "page_index": ocr_response.pages.index(page)}
|
|
81
|
+
)
|
|
82
|
+
raise ValueError("No chunks created from text with default chunker")
|
|
83
|
+
all_chunks.extend(chunks)
|
|
84
|
+
|
|
85
|
+
all_passages = await self.embedder.generate_embedded_passages(
|
|
86
|
+
file_id=file_metadata.id, source_id=source_id, chunks=all_chunks, actor=self.actor
|
|
87
|
+
)
|
|
88
|
+
logger.info(f"Successfully generated passages with default chunker for {filename}")
|
|
89
|
+
log_event("file_processor.default_chunking_success", {"filename": filename, "total_chunks": len(all_chunks)})
|
|
90
|
+
return all_passages
|
|
91
|
+
|
|
92
|
+
except Exception as fallback_error:
|
|
93
|
+
logger.error("Default chunking also failed for %s: %s", filename, fallback_error)
|
|
94
|
+
log_event(
|
|
95
|
+
"file_processor.default_chunking_also_failed",
|
|
96
|
+
{"filename": filename, "fallback_error": str(fallback_error), "fallback_error_type": type(fallback_error).__name__},
|
|
97
|
+
)
|
|
98
|
+
raise fallback_error
|
|
99
|
+
|
|
47
100
|
# TODO: Factor this function out of SyncServer
|
|
48
101
|
@trace_method
|
|
49
102
|
async def process(
|
|
@@ -111,19 +164,10 @@ class FileProcessor:
|
|
|
111
164
|
|
|
112
165
|
logger.info("Chunking extracted text")
|
|
113
166
|
log_event("file_processor.chunking_started", {"filename": filename, "pages_to_process": len(ocr_response.pages)})
|
|
114
|
-
all_chunks = []
|
|
115
|
-
|
|
116
|
-
for page in ocr_response.pages:
|
|
117
|
-
chunks = self.text_chunker.chunk_text(page)
|
|
118
167
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
all_chunks.extend(self.text_chunker.chunk_text(page))
|
|
124
|
-
|
|
125
|
-
all_passages = await self.embedder.generate_embedded_passages(
|
|
126
|
-
file_id=file_metadata.id, source_id=source_id, chunks=all_chunks, actor=self.actor
|
|
168
|
+
# Chunk and embed with fallback logic
|
|
169
|
+
all_passages = await self._chunk_and_embed_with_fallback(
|
|
170
|
+
file_metadata=file_metadata, ocr_response=ocr_response, source_id=source_id
|
|
127
171
|
)
|
|
128
172
|
|
|
129
173
|
if not self.using_pinecone:
|
|
@@ -156,7 +200,7 @@ class FileProcessor:
|
|
|
156
200
|
return all_passages
|
|
157
201
|
|
|
158
202
|
except Exception as e:
|
|
159
|
-
logger.error(
|
|
203
|
+
logger.error("File processing failed for %s: %s", filename, e)
|
|
160
204
|
log_event(
|
|
161
205
|
"file_processor.processing_failed",
|
|
162
206
|
{
|
|
@@ -175,7 +175,7 @@ class LettaFileToolExecutor(ToolExecutor):
|
|
|
175
175
|
file = await self.file_manager.get_file_by_id(file_id=file_id, actor=self.actor, include_content=True)
|
|
176
176
|
|
|
177
177
|
# Process file content
|
|
178
|
-
content_lines = LineChunker().chunk_text(file_metadata=file, start=start, end=end)
|
|
178
|
+
content_lines = LineChunker().chunk_text(file_metadata=file, start=start, end=end, validate_range=True)
|
|
179
179
|
visible_content = "\n".join(content_lines)
|
|
180
180
|
|
|
181
181
|
# Handle LRU eviction and file opening
|
letta/settings.py
CHANGED
|
@@ -247,7 +247,7 @@ class Settings(BaseSettings):
|
|
|
247
247
|
# cron job parameters
|
|
248
248
|
enable_batch_job_polling: bool = False
|
|
249
249
|
poll_running_llm_batches_interval_seconds: int = 5 * 60
|
|
250
|
-
poll_lock_retry_interval_seconds: int =
|
|
250
|
+
poll_lock_retry_interval_seconds: int = 8 * 60
|
|
251
251
|
batch_job_polling_lookback_weeks: int = 2
|
|
252
252
|
batch_job_polling_batch_size: Optional[int] = None
|
|
253
253
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
letta/__init__.py,sha256=
|
|
1
|
+
letta/__init__.py,sha256=MY4Eqs94gdtwy8X6KBPrk0zKk7LUqwyQWvQU-ssrpes,1223
|
|
2
2
|
letta/agent.py,sha256=esW2W5hBzO7aPr7ghEDb_fLnUxgYqBYDq_VWtQDrB0c,89153
|
|
3
3
|
letta/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
letta/agents/base_agent.py,sha256=Z1jgCTMFRTLnaLRcfdo8TmsP8tuCYqNcOM8ov9kviMA,6869
|
|
@@ -17,7 +17,7 @@ letta/client/client.py,sha256=l_yKUUzl1-qfxFkDHsOMHxSwyzOBbx-2mi0GfI3WlJE,84906
|
|
|
17
17
|
letta/client/streaming.py,sha256=UsDS_tDTsA3HgYryIDvGGmx_dWfnfQwtmEwLi4Z89Ik,4701
|
|
18
18
|
letta/client/utils.py,sha256=VCGV-op5ZSmurd4yw7Vhf93XDQ0BkyBT8qsuV7EqfiU,2859
|
|
19
19
|
letta/config.py,sha256=JFGY4TWW0Wm5fTbZamOwWqk5G8Nn-TXyhgByGoAqy2c,12375
|
|
20
|
-
letta/constants.py,sha256=
|
|
20
|
+
letta/constants.py,sha256=eGXwXbvIZyvxiX2Trg28i8a7ABfhWqtqb36D3ciyr_8,14733
|
|
21
21
|
letta/data_sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
22
|
letta/data_sources/connectors.py,sha256=V8mUgE3V6CX-CcOyvkPSQ_ZWP2VtuqgTEXkCN1j0p68,7920
|
|
23
23
|
letta/data_sources/connectors_helper.py,sha256=oQpVlc-BjSz9sTZ7sp4PsJSXJbBKpZPi3Dam03CURTQ,3376
|
|
@@ -59,7 +59,7 @@ letta/helpers/datetime_helpers.py,sha256=8AwZInX-NX_XQiqej2arozYqfC2ysnWpCJ9ETv8
|
|
|
59
59
|
letta/helpers/decorators.py,sha256=jyywXMxO5XPDSe93ybVXIOjTWkGX514S9BMcy_gP0j8,5891
|
|
60
60
|
letta/helpers/json_helpers.py,sha256=PWZ5HhSqGXO4e563dM_8M72q7ScirjXQ4Rv1ckohaV8,396
|
|
61
61
|
letta/helpers/message_helper.py,sha256=Xzf_VCMAXT0Ys8LVUh1ySVtgJwabSQYksOdPr7P4EJU,3549
|
|
62
|
-
letta/helpers/pinecone_utils.py,sha256=
|
|
62
|
+
letta/helpers/pinecone_utils.py,sha256=LMfrynzenK_IuVGEsZmULz4AAtZ58Wof02ENj-NwtLQ,6511
|
|
63
63
|
letta/helpers/singleton.py,sha256=Y4dG_ZBCcrogvl9iZ69bSLq-QltrdP8wHqKkhef8OBI,370
|
|
64
64
|
letta/helpers/tool_execution_helper.py,sha256=BgBgVLZzbc-JTdOGwyU9miV_-zM3A30jkMpwH1otxaU,7599
|
|
65
65
|
letta/helpers/tool_rule_solver.py,sha256=avRMQzqxE2r6gRvw7oTImYmkSvuoMHlADPND0__feBw,11620
|
|
@@ -75,7 +75,7 @@ letta/interfaces/utils.py,sha256=c6jvO0dBYHh8DQnlN-B0qeNC64d3CSunhfqlFA4pJTY,278
|
|
|
75
75
|
letta/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
letta/jobs/helpers.py,sha256=kO4aj954xsQ1RAmkjY6LQQ7JEIGuhaxB1e9pzrYKHAY,914
|
|
77
77
|
letta/jobs/llm_batch_job_polling.py,sha256=r_6D5RcqEJQgrdh-rnN7vdLD0GAQl-GGmIfCnV0naHQ,10299
|
|
78
|
-
letta/jobs/scheduler.py,sha256=
|
|
78
|
+
letta/jobs/scheduler.py,sha256=bnwvgT_72ULlKmSFG-26T6tfmiEzxSrm9ARl1MuOEvA,8818
|
|
79
79
|
letta/jobs/types.py,sha256=K8GKEnqEgAT6Kq4F2hUrBC4ZAFM9OkfOjVMStzxKuXQ,742
|
|
80
80
|
letta/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
81
81
|
letta/llm_api/anthropic.py,sha256=tbMy4483TySrEmbXD3juM6TpPRrV9_M3Fgp59sDBcqE,47935
|
|
@@ -265,6 +265,7 @@ letta/schemas/response_format.py,sha256=pXNsjbtpA3Tf8HsDyIa40CSmoUbVR_7n2WOfQaX4
|
|
|
265
265
|
letta/schemas/run.py,sha256=1lVOWlHVbk9MYIOiIrE1gCoQvBhErKo7UMSeWyMExbw,2089
|
|
266
266
|
letta/schemas/sandbox_config.py,sha256=thI4p7R4nnW1W-F_PBNkpmyHXpSH_lorlQX8YxDXSe0,5252
|
|
267
267
|
letta/schemas/source.py,sha256=ZDeTjkNp1rKamG7xZzoUHeCptjpW9WNLzAcJ9QQRxlM,3444
|
|
268
|
+
letta/schemas/source_metadata.py,sha256=_dGjuXhGcVMlc53ja9yuk16Uj64ggEzilRDgmkqYfNs,1334
|
|
268
269
|
letta/schemas/step.py,sha256=QudHSpLMcNVC-oI26Uy48lsp3FOrcd3JYAp5ubDEIHY,2651
|
|
269
270
|
letta/schemas/tool.py,sha256=C2HdnmwrjAWoBjB8H2lpO8oIys3HlkRrRWtR8uMcUfc,14375
|
|
270
271
|
letta/schemas/tool_execution_result.py,sha256=4P77llsUsZBnRd0PtPiC4VzGjx7i_-fUNgXQfCpMS9U,896
|
|
@@ -283,7 +284,7 @@ letta/serialize_schemas/marshmallow_tool.py,sha256=jwU69BDCakPlYPSk-ta21kuvsURKO
|
|
|
283
284
|
letta/serialize_schemas/pydantic_agent_schema.py,sha256=CqGqSFzArYE2CzFsIU8LXVmH1A1jYFQpFy7Sj62n_4A,3171
|
|
284
285
|
letta/server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
285
286
|
letta/server/constants.py,sha256=yAdGbLkzlOU_dLTx0lKDmAnj0ZgRXCEaIcPJWO69eaE,92
|
|
286
|
-
letta/server/db.py,sha256=
|
|
287
|
+
letta/server/db.py,sha256=q5wCpTxpcbutk2HuF2ZpyhlPGCdUuLpRFZ0QE5VkZ38,11751
|
|
287
288
|
letta/server/generate_openapi_schema.sh,sha256=0OtBhkC1g6CobVmNEd_m2B6sTdppjbJLXaM95icejvE,371
|
|
288
289
|
letta/server/rest_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
289
290
|
letta/server/rest_api/app.py,sha256=JEJlOKBPKfnkQeM6BAd5Olf4EM7O2TKD4_MHObFuci8,18084
|
|
@@ -310,7 +311,7 @@ letta/server/rest_api/routers/v1/organizations.py,sha256=5NEjTOdGKWrfN584jfPpJhA
|
|
|
310
311
|
letta/server/rest_api/routers/v1/providers.py,sha256=8SJ_RsSk7L4nh1f_uFE31JOxefmGhOfN-fMJ0Sp6SJo,4353
|
|
311
312
|
letta/server/rest_api/routers/v1/runs.py,sha256=vieUp7uTvRTdAte0Nw1bqX2APMATZhKTr2R1HVNJT74,8879
|
|
312
313
|
letta/server/rest_api/routers/v1/sandbox_configs.py,sha256=pKuy88GD3atrBkKa7VVfKTjg8Y07e1vVtdw4TtxkQBk,8910
|
|
313
|
-
letta/server/rest_api/routers/v1/sources.py,sha256=
|
|
314
|
+
letta/server/rest_api/routers/v1/sources.py,sha256=RfCp7XCFpEIF6eTHfbKXvE7mAdHFUrpDU_AXbwvJj3o,19584
|
|
314
315
|
letta/server/rest_api/routers/v1/steps.py,sha256=N863b0Oyzz64rKHqpyQnXEQBw0SCQ8kAxWaZ7huV1Rk,4925
|
|
315
316
|
letta/server/rest_api/routers/v1/tags.py,sha256=ef94QitUSJ3NQVffWF1ZqANUZ2b2jRyGHp_I3UUjhno,912
|
|
316
317
|
letta/server/rest_api/routers/v1/telemetry.py,sha256=z53BW3Pefi3eWy47FPJyGhFWbZicX9jPJUi5LC5c3sk,790
|
|
@@ -339,16 +340,16 @@ letta/services/block_manager.py,sha256=YwDGdy6f6MNXVXVOxIMOOP6IEWT8h-k5uQlveof0p
|
|
|
339
340
|
letta/services/context_window_calculator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
340
341
|
letta/services/context_window_calculator/context_window_calculator.py,sha256=H0-Ello1DHV28MnzMseWrg--jarDc6YwCcgwPlWjtZk,6527
|
|
341
342
|
letta/services/context_window_calculator/token_counter.py,sha256=Ai9-aPkNvhhMTj9zlvdiQAdVqroTzIyAn0TrHpHNQZY,2954
|
|
342
|
-
letta/services/file_manager.py,sha256=
|
|
343
|
+
letta/services/file_manager.py,sha256=Zm0wK4pkKz_rkPtANZCaxp2mXgi6rWg9moJ-nic6Bms,14277
|
|
343
344
|
letta/services/file_processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
344
345
|
letta/services/file_processor/chunker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
345
|
-
letta/services/file_processor/chunker/line_chunker.py,sha256=
|
|
346
|
-
letta/services/file_processor/chunker/llama_index_chunker.py,sha256=
|
|
346
|
+
letta/services/file_processor/chunker/line_chunker.py,sha256=m02molsKXU_RUEebbHhMA6LNxg3JmFlCTOuX6kZcz3E,7024
|
|
347
|
+
letta/services/file_processor/chunker/llama_index_chunker.py,sha256=zHjwQUE4QTJonxHpG09sd_0fgt4KTUyjRJawUcGDAyI,7615
|
|
347
348
|
letta/services/file_processor/embedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
348
349
|
letta/services/file_processor/embedder/base_embedder.py,sha256=cuHF2kAlBFL9Hr63Q5vJQYYrfyDNtm31vYvW5boUQ58,518
|
|
349
|
-
letta/services/file_processor/embedder/openai_embedder.py,sha256=
|
|
350
|
+
letta/services/file_processor/embedder/openai_embedder.py,sha256=qafYDdbbBDCv5Mg-gdZozc5qFCdraaG8B8OCLd8_3vY,5715
|
|
350
351
|
letta/services/file_processor/embedder/pinecone_embedder.py,sha256=O33NGvDyOG07Iz-tEhZDu_PKq7NfWIaBzjJuLi8hDiU,2841
|
|
351
|
-
letta/services/file_processor/file_processor.py,sha256=
|
|
352
|
+
letta/services/file_processor/file_processor.py,sha256=E2lEoootYA8mAgNKGq2u2KCVqJqg8QCAO3-3pVaV60U,10047
|
|
352
353
|
letta/services/file_processor/file_types.py,sha256=9k3Lt_bquQjJ7T6L12fPS9IS5wldhJ2puSkH6rhfCaE,13128
|
|
353
354
|
letta/services/file_processor/parser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
354
355
|
letta/services/file_processor/parser/base_parser.py,sha256=WfnXP6fL-xQz4eIHEWa6-ZNEAARbF_alowqH4BAUzJo,238
|
|
@@ -385,7 +386,7 @@ letta/services/tool_executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
|
|
|
385
386
|
letta/services/tool_executor/builtin_tool_executor.py,sha256=4hVeFnExExxuZHPIOD3LK9fPlFUt0s-b_TpKuvyxLwk,17384
|
|
386
387
|
letta/services/tool_executor/composio_tool_executor.py,sha256=ia2AA_WDOseR8Ylam-HEayR7OiyfNSb1sSUrjwqlmFM,2308
|
|
387
388
|
letta/services/tool_executor/core_tool_executor.py,sha256=YfUTxo78FNFPFNc6714RR_ztrf9I1GATs4cZhSbiEag,20503
|
|
388
|
-
letta/services/tool_executor/files_tool_executor.py,sha256=
|
|
389
|
+
letta/services/tool_executor/files_tool_executor.py,sha256=QZM0Q9zTsTSOnXq3gV5doy9HB6Mey_QdbAXWHpYlPDU,29045
|
|
389
390
|
letta/services/tool_executor/mcp_tool_executor.py,sha256=x8V8J4Xi1ZVbwfaR_IwnUGRrD9w5wgV4G54sjraVBw4,1676
|
|
390
391
|
letta/services/tool_executor/multi_agent_tool_executor.py,sha256=dfaZeldEnzJDg2jGHlGy3YXKjsJpokJW1tvVeoCCDrk,5496
|
|
391
392
|
letta/services/tool_executor/tool_execution_manager.py,sha256=flCbTmtxZvYNcTGNC2MrYWkdPIatqFPTWnI8oJUWTIY,6399
|
|
@@ -398,7 +399,7 @@ letta/services/tool_sandbox/base.py,sha256=Vt4CnxuY5otUD6Kv8PpJNrAtl9eI8tjfcwkOd
|
|
|
398
399
|
letta/services/tool_sandbox/e2b_sandbox.py,sha256=TrWWav56H1AsnaKgNZuq0RI-FeWHOZvOubtUywPH72s,11125
|
|
399
400
|
letta/services/tool_sandbox/local_sandbox.py,sha256=RQ3iSZqP1nndSdU8pN8GoNDbQr8PRuRIK2-BinIYtK4,11810
|
|
400
401
|
letta/services/user_manager.py,sha256=Neik-mxXgf9jc9jBiiBIlK38UukJonUy9NRS2soFR98,10405
|
|
401
|
-
letta/settings.py,sha256=
|
|
402
|
+
letta/settings.py,sha256=lWaLL1t06s9pp4VK5ojQvTiI7D85VcJPjaVKak5LFxs,11304
|
|
402
403
|
letta/streaming_interface.py,sha256=c-T7zoMTXGXFwDWJJXrv7UypeMPXwPOmNHeuuh0b9zk,16398
|
|
403
404
|
letta/streaming_utils.py,sha256=jLqFTVhUL76FeOuYk8TaRQHmPTf3HSRc2EoJwxJNK6U,11946
|
|
404
405
|
letta/system.py,sha256=-cfh9Xpl2Ef_H7N3oZQtNuJqb1EEskdDXNa-VwKsF0A,8977
|
|
@@ -409,8 +410,8 @@ letta/templates/summary_request_text.j2,sha256=ZttQwXonW2lk4pJLYzLK0pmo4EO4EtUUI
|
|
|
409
410
|
letta/templates/template_helper.py,sha256=uHWO1PukgMoIIvgqQdPyHq3o3CQ6mcjUjTGvx9VLGkk,409
|
|
410
411
|
letta/types/__init__.py,sha256=hokKjCVFGEfR7SLMrtZsRsBfsC7yTIbgKPLdGg4K1eY,147
|
|
411
412
|
letta/utils.py,sha256=4segcFYPNsPrzMpiouYoV6Qzj4TIHuqtCyzVwAMildM,36172
|
|
412
|
-
letta_nightly-0.8.
|
|
413
|
-
letta_nightly-0.8.
|
|
414
|
-
letta_nightly-0.8.
|
|
415
|
-
letta_nightly-0.8.
|
|
416
|
-
letta_nightly-0.8.
|
|
413
|
+
letta_nightly-0.8.11.dev20250708000504.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
|
|
414
|
+
letta_nightly-0.8.11.dev20250708000504.dist-info/METADATA,sha256=8fhj7YWfX0Qt65Shn8EfP5GjgmIU8bs86OMIfRo4pjg,22892
|
|
415
|
+
letta_nightly-0.8.11.dev20250708000504.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
|
416
|
+
letta_nightly-0.8.11.dev20250708000504.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
|
|
417
|
+
letta_nightly-0.8.11.dev20250708000504.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|