llama-deploy-appserver 0.2.7a1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. llama_deploy/appserver/app.py +274 -26
  2. llama_deploy/appserver/bootstrap.py +55 -25
  3. llama_deploy/appserver/configure_logging.py +189 -0
  4. llama_deploy/appserver/correlation_id.py +24 -0
  5. llama_deploy/appserver/deployment.py +70 -412
  6. llama_deploy/appserver/deployment_config_parser.py +12 -130
  7. llama_deploy/appserver/interrupts.py +55 -0
  8. llama_deploy/appserver/process_utils.py +214 -0
  9. llama_deploy/appserver/py.typed +0 -0
  10. llama_deploy/appserver/routers/__init__.py +4 -3
  11. llama_deploy/appserver/routers/deployments.py +163 -382
  12. llama_deploy/appserver/routers/status.py +4 -31
  13. llama_deploy/appserver/routers/ui_proxy.py +255 -0
  14. llama_deploy/appserver/settings.py +99 -49
  15. llama_deploy/appserver/types.py +0 -3
  16. llama_deploy/appserver/workflow_loader.py +431 -0
  17. llama_deploy/appserver/workflow_store/agent_data_store.py +100 -0
  18. llama_deploy/appserver/workflow_store/keyed_lock.py +32 -0
  19. llama_deploy/appserver/workflow_store/lru_cache.py +49 -0
  20. llama_deploy_appserver-0.3.0.dist-info/METADATA +25 -0
  21. llama_deploy_appserver-0.3.0.dist-info/RECORD +24 -0
  22. {llama_deploy_appserver-0.2.7a1.dist-info → llama_deploy_appserver-0.3.0.dist-info}/WHEEL +1 -1
  23. llama_deploy/appserver/__main__.py +0 -14
  24. llama_deploy/appserver/client/__init__.py +0 -3
  25. llama_deploy/appserver/client/base.py +0 -30
  26. llama_deploy/appserver/client/client.py +0 -49
  27. llama_deploy/appserver/client/models/__init__.py +0 -4
  28. llama_deploy/appserver/client/models/apiserver.py +0 -356
  29. llama_deploy/appserver/client/models/model.py +0 -82
  30. llama_deploy/appserver/run_autodeploy.py +0 -141
  31. llama_deploy/appserver/server.py +0 -60
  32. llama_deploy/appserver/source_managers/__init__.py +0 -5
  33. llama_deploy/appserver/source_managers/base.py +0 -33
  34. llama_deploy/appserver/source_managers/git.py +0 -48
  35. llama_deploy/appserver/source_managers/local.py +0 -51
  36. llama_deploy/appserver/tracing.py +0 -237
  37. llama_deploy_appserver-0.2.7a1.dist-info/METADATA +0 -23
  38. llama_deploy_appserver-0.2.7a1.dist-info/RECORD +0 -28
@@ -1,433 +1,214 @@
1
1
  import asyncio
2
2
  import json
3
- import logging
4
- from typing import Annotated, AsyncGenerator, List, Optional
3
+ from typing import AsyncGenerator
5
4
 
6
- import httpx
7
- import websockets
8
5
  from fastapi import (
9
6
  APIRouter,
10
- Depends,
11
- File,
12
7
  HTTPException,
13
- Request,
14
- UploadFile,
15
- WebSocket,
16
8
  )
17
- from fastapi.responses import JSONResponse, StreamingResponse
9
+ from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
18
10
  from llama_deploy.appserver.deployment import Deployment
19
- from llama_deploy.appserver.deployment_config_parser import DeploymentConfig
20
- from llama_deploy.appserver.server import manager
21
11
  from llama_deploy.appserver.types import (
22
- DeploymentDefinition,
23
12
  EventDefinition,
24
13
  SessionDefinition,
25
14
  TaskDefinition,
26
15
  TaskResult,
27
16
  generate_id,
28
17
  )
29
- from starlette.background import BackgroundTask
18
+ from llama_deploy.appserver.workflow_loader import DEFAULT_SERVICE_ID
30
19
  from workflows import Context
31
20
  from workflows.context import JsonSerializer
32
21
  from workflows.handler import WorkflowHandler
33
22
 
34
- deployments_router = APIRouter(
35
- prefix="/deployments",
36
- )
37
- logger = logging.getLogger(__name__)
38
-
39
-
40
- def deployment(deployment_name: str) -> Deployment:
41
- """FastAPI dependency to retrieve a Deployment instance"""
42
- deployment = manager.get_deployment(deployment_name)
43
- if deployment is None:
44
- raise HTTPException(status_code=404, detail="Deployment not found")
45
- return deployment
46
-
47
-
48
- @deployments_router.get("/")
49
- async def read_deployments() -> list[DeploymentDefinition]:
50
- """Returns a list of active deployments."""
51
- return [DeploymentDefinition(name=k) for k in manager._deployments.keys()]
52
23
 
24
+ def create_base_router(name: str) -> APIRouter:
25
+ base_router = APIRouter(
26
+ prefix="",
27
+ )
53
28
 
54
- @deployments_router.get("/{deployment_name}")
55
- async def read_deployment(
56
- deployment: Annotated[Deployment, Depends(deployment)],
57
- ) -> DeploymentDefinition:
58
- """Returns the details of a specific deployment."""
59
-
60
- return DeploymentDefinition(name=deployment.name)
61
-
29
+ @base_router.get("/", response_model=None, include_in_schema=False)
30
+ async def root() -> RedirectResponse:
31
+ return RedirectResponse(f"/deployments/{name}/")
62
32
 
63
- @deployments_router.post("/create")
64
- async def create_deployment(
65
- base_path: str = ".",
66
- config_file: UploadFile = File(...),
67
- reload: bool = False,
68
- local: bool = False,
69
- ) -> DeploymentDefinition:
70
- """Creates a new deployment by uploading a configuration file."""
71
- config = DeploymentConfig.from_yaml_bytes(await config_file.read())
72
- await manager.deploy(config, base_path, reload, local)
33
+ return base_router
73
34
 
74
- return DeploymentDefinition(name=config.name)
75
35
 
36
+ def create_deployments_router(name: str, deployment: Deployment) -> APIRouter:
37
+ deployments_router = APIRouter(
38
+ prefix="/deployments",
39
+ )
76
40
 
77
- @deployments_router.post("/{deployment_name}/tasks/run")
78
- async def create_deployment_task(
79
- deployment: Annotated[Deployment, Depends(deployment)],
80
- task_definition: TaskDefinition,
81
- session_id: str | None = None,
82
- ) -> JSONResponse:
83
- """Create a task for the deployment, wait for result and delete associated session."""
41
+ @deployments_router.post(f"/{name}/tasks/run", include_in_schema=False)
42
+ async def create_deployment_task(
43
+ task_definition: TaskDefinition,
44
+ session_id: str | None = None,
45
+ ) -> JSONResponse:
46
+ """Create a task for the deployment, wait for result and delete associated session."""
47
+
48
+ service_id = task_definition.service_id or DEFAULT_SERVICE_ID
49
+
50
+ if service_id not in deployment.service_names:
51
+ raise HTTPException(
52
+ status_code=404,
53
+ detail=(
54
+ "There is no default service for this deployment. service_id is required"
55
+ if not task_definition.service_id
56
+ else f"Service '{service_id}' not found in deployment 'deployment_name'"
57
+ ),
58
+ )
84
59
 
85
- service_id = task_definition.service_id or deployment.default_service
86
- if service_id is None:
87
- raise HTTPException(
88
- status_code=400,
89
- detail="Service is None and deployment has no default service",
60
+ run_kwargs = json.loads(task_definition.input) if task_definition.input else {}
61
+ result = await deployment.run_workflow(
62
+ service_id=service_id, session_id=session_id, **run_kwargs
90
63
  )
64
+ return JSONResponse(result)
65
+
66
+ @deployments_router.post(f"/{name}/tasks/create", include_in_schema=False)
67
+ async def create_deployment_task_nowait(
68
+ task_definition: TaskDefinition,
69
+ session_id: str | None = None,
70
+ ) -> TaskDefinition:
71
+ """Create a task for the deployment but don't wait for result."""
72
+ service_id = task_definition.service_id or DEFAULT_SERVICE_ID
73
+ if service_id not in deployment.service_names:
74
+ raise HTTPException(
75
+ status_code=404,
76
+ detail=(
77
+ "There is no default service for this deployment. service_id is required"
78
+ if not task_definition.service_id
79
+ else f"Service '{service_id}' not found in deployment 'deployment_name'"
80
+ ),
81
+ )
91
82
 
92
- if service_id not in deployment.service_names:
93
- raise HTTPException(
94
- status_code=404,
95
- detail=f"Service '{task_definition.service_id}' not found in deployment 'deployment_name'",
83
+ run_kwargs = json.loads(task_definition.input) if task_definition.input else {}
84
+ handler_id, session_id = deployment.run_workflow_no_wait(
85
+ service_id=service_id, session_id=session_id, **run_kwargs
96
86
  )
97
87
 
98
- run_kwargs = json.loads(task_definition.input) if task_definition.input else {}
99
- result = await deployment.run_workflow(
100
- service_id=service_id, session_id=session_id, **run_kwargs
101
- )
102
- return JSONResponse(result)
103
-
104
-
105
- @deployments_router.post("/{deployment_name}/tasks/create")
106
- async def create_deployment_task_nowait(
107
- deployment: Annotated[Deployment, Depends(deployment)],
108
- task_definition: TaskDefinition,
109
- session_id: str | None = None,
110
- ) -> TaskDefinition:
111
- """Create a task for the deployment but don't wait for result."""
112
- service_id = task_definition.service_id or deployment.default_service
113
- if service_id is None:
114
- raise HTTPException(
115
- status_code=400,
116
- detail="Service is None and deployment has no default service",
117
- )
88
+ task_definition.session_id = session_id
89
+ task_definition.task_id = handler_id
118
90
 
119
- if service_id not in deployment.service_names:
120
- raise HTTPException(
121
- status_code=404,
122
- detail=f"Service '{task_definition.service_id}' not found in deployment 'deployment_name'",
123
- )
91
+ return task_definition
124
92
 
125
- run_kwargs = json.loads(task_definition.input) if task_definition.input else {}
126
- handler_id, session_id = deployment.run_workflow_no_wait(
127
- service_id=service_id, session_id=session_id, **run_kwargs
93
+ @deployments_router.post(
94
+ f"/{name}/tasks/{{task_id}}/events", include_in_schema=False
128
95
  )
96
+ async def send_event(
97
+ task_id: str,
98
+ session_id: str,
99
+ event_def: EventDefinition,
100
+ ) -> EventDefinition:
101
+ """Send a human response event to a service for a specific task and session."""
102
+ ctx = deployment._contexts[session_id]
103
+ serializer = JsonSerializer()
104
+ event = serializer.deserialize(event_def.event_obj_str)
105
+ ctx.send_event(event)
129
106
 
130
- task_definition.session_id = session_id
131
- task_definition.task_id = handler_id
132
-
133
- return task_definition
134
-
135
-
136
- @deployments_router.post("/{deployment_name}/tasks/{task_id}/events")
137
- async def send_event(
138
- deployment: Annotated[Deployment, Depends(deployment)],
139
- task_id: str,
140
- session_id: str,
141
- event_def: EventDefinition,
142
- ) -> EventDefinition:
143
- """Send a human response event to a service for a specific task and session."""
144
- ctx = deployment._contexts[session_id]
145
- serializer = JsonSerializer()
146
- event = serializer.deserialize(event_def.event_obj_str)
147
- ctx.send_event(event)
148
-
149
- return event_def
150
-
151
-
152
- @deployments_router.get("/{deployment_name}/tasks/{task_id}/events")
153
- async def get_events(
154
- deployment: Annotated[Deployment, Depends(deployment)],
155
- session_id: str,
156
- task_id: str,
157
- raw_event: bool = False,
158
- ) -> StreamingResponse:
159
- """
160
- Get the stream of events from a given task and session.
161
-
162
- Args:
163
- raw_event (bool, default=False): Whether to return the raw event object
164
- or just the event data.
165
- """
107
+ return event_def
166
108
 
167
- async def event_stream(handler: WorkflowHandler) -> AsyncGenerator[str, None]:
168
- serializer = JsonSerializer()
169
- # this will hang indefinitely if done and queue is empty. Bail
170
- if (
171
- handler.is_done()
172
- and handler.ctx is not None
173
- and handler.ctx.streaming_queue.empty()
174
- ):
175
- return
176
- async for event in handler.stream_events():
177
- data = json.loads(serializer.serialize(event))
178
- if raw_event:
179
- yield json.dumps(data) + "\n"
180
- else:
181
- yield json.dumps(data.get("value")) + "\n"
182
- await asyncio.sleep(0.01)
183
- await handler
184
-
185
- return StreamingResponse(
186
- event_stream(deployment._handlers[task_id]),
187
- media_type="application/x-ndjson",
109
+ @deployments_router.get(
110
+ f"/{name}/tasks/{{task_id}}/events", include_in_schema=False
188
111
  )
112
+ async def get_events(
113
+ session_id: str,
114
+ task_id: str,
115
+ raw_event: bool = False,
116
+ ) -> StreamingResponse:
117
+ """
118
+ Get the stream of events from a given task and session.
119
+
120
+ Args:
121
+ raw_event (bool, default=False): Whether to return the raw event object
122
+ or just the event data.
123
+ """
124
+
125
+ async def event_stream(handler: WorkflowHandler) -> AsyncGenerator[str, None]:
126
+ serializer = JsonSerializer()
127
+ # this will hang indefinitely if done and queue is empty. Bail
128
+ if (
129
+ handler.is_done()
130
+ and handler.ctx is not None
131
+ and handler.ctx.streaming_queue.empty()
132
+ ):
133
+ return
134
+ async for event in handler.stream_events():
135
+ data = json.loads(serializer.serialize(event))
136
+ if raw_event:
137
+ yield json.dumps(data) + "\n"
138
+ else:
139
+ yield json.dumps(data.get("value")) + "\n"
140
+ await asyncio.sleep(0.01)
141
+ await handler
189
142
 
190
-
191
- @deployments_router.get("/{deployment_name}/tasks/{task_id}/results")
192
- async def get_task_result(
193
- deployment: Annotated[Deployment, Depends(deployment)],
194
- session_id: str,
195
- task_id: str,
196
- ) -> TaskResult | None:
197
- """Get the task result associated with a task and session."""
198
-
199
- handler = deployment._handlers[task_id]
200
- return TaskResult(task_id=task_id, history=[], result=await handler)
201
-
202
-
203
- @deployments_router.get("/{deployment_name}/tasks")
204
- async def get_tasks(
205
- deployment: Annotated[Deployment, Depends(deployment)],
206
- ) -> list[TaskDefinition]:
207
- """Get all the tasks from all the sessions in a given deployment."""
208
-
209
- tasks: list[TaskDefinition] = []
210
- for task_id, handler in deployment._handlers.items():
211
- if handler.is_done():
212
- continue
213
- tasks.append(
214
- TaskDefinition(
215
- task_id=task_id,
216
- input=deployment._handler_inputs[task_id],
217
- )
143
+ return StreamingResponse(
144
+ event_stream(deployment._handlers[task_id]),
145
+ media_type="application/x-ndjson",
218
146
  )
219
147
 
220
- return tasks
221
-
222
-
223
- @deployments_router.get("/{deployment_name}/sessions")
224
- async def get_sessions(
225
- deployment: Annotated[Deployment, Depends(deployment)],
226
- ) -> list[SessionDefinition]:
227
- """Get the active sessions in a deployment and service."""
228
-
229
- return [SessionDefinition(session_id=k) for k in deployment._contexts.keys()]
230
-
231
-
232
- @deployments_router.get("/{deployment_name}/sessions/{session_id}")
233
- async def get_session(
234
- deployment: Annotated[Deployment, Depends(deployment)], session_id: str
235
- ) -> SessionDefinition:
236
- """Get the definition of a session by ID."""
237
-
238
- return SessionDefinition(session_id=session_id)
239
-
240
-
241
- @deployments_router.post("/{deployment_name}/sessions/create")
242
- async def create_session(
243
- deployment: Annotated[Deployment, Depends(deployment)],
244
- ) -> SessionDefinition:
245
- """Create a new session for a deployment."""
246
-
247
- workflow = deployment._workflow_services[deployment.default_service]
248
- session_id = generate_id()
249
- deployment._contexts[session_id] = Context(workflow)
250
-
251
- return SessionDefinition(session_id=session_id)
252
-
253
-
254
- @deployments_router.post("/{deployment_name}/sessions/delete")
255
- async def delete_session(
256
- deployment: Annotated[Deployment, Depends(deployment)], session_id: str
257
- ) -> None:
258
- """Get the active sessions in a deployment and service."""
259
-
260
- deployment._contexts.pop(session_id)
261
-
262
-
263
- async def _ws_proxy(ws: WebSocket, upstream_url: str) -> None:
264
- """Proxy WebSocket connection to upstream server."""
265
- await ws.accept()
266
-
267
- # Forward most headers except WebSocket-specific ones
268
- header_blacklist = {
269
- "host",
270
- "connection",
271
- "upgrade",
272
- "sec-websocket-key",
273
- "sec-websocket-version",
274
- "sec-websocket-extensions",
275
- }
276
- hdrs = [(k, v) for k, v in ws.headers.items() if k.lower() not in header_blacklist]
277
-
278
- try:
279
- # Parse subprotocols if present
280
- subprotocols: Optional[List[websockets.Subprotocol]] = None
281
- if "sec-websocket-protocol" in ws.headers:
282
- # Parse comma-separated subprotocols
283
- subprotocols = [
284
- websockets.Subprotocol(p.strip())
285
- for p in ws.headers["sec-websocket-protocol"].split(",")
286
- ]
287
-
288
- # Open upstream WebSocket connection
289
- async with websockets.connect(
290
- upstream_url,
291
- additional_headers=hdrs,
292
- subprotocols=subprotocols,
293
- open_timeout=None,
294
- ping_interval=None,
295
- ) as upstream:
296
-
297
- async def client_to_upstream() -> None:
298
- try:
299
- while True:
300
- msg = await ws.receive()
301
- if msg["type"] == "websocket.receive":
302
- if "text" in msg:
303
- await upstream.send(msg["text"])
304
- elif "bytes" in msg:
305
- await upstream.send(msg["bytes"])
306
- elif msg["type"] == "websocket.disconnect":
307
- break
308
- except Exception as e:
309
- logger.debug(f"Client to upstream connection ended: {e}")
310
-
311
- async def upstream_to_client() -> None:
312
- try:
313
- async for message in upstream:
314
- if isinstance(message, str):
315
- await ws.send_text(message)
316
- else:
317
- await ws.send_bytes(message)
318
- except Exception as e:
319
- logger.debug(f"Upstream to client connection ended: {e}")
320
-
321
- # Pump both directions concurrently
322
- await asyncio.gather(
323
- client_to_upstream(), upstream_to_client(), return_exceptions=True
148
+ @deployments_router.get(
149
+ f"/{name}/tasks/{{task_id}}/results", include_in_schema=False
150
+ )
151
+ async def get_task_result(
152
+ session_id: str,
153
+ task_id: str,
154
+ ) -> TaskResult | None:
155
+ """Get the task result associated with a task and session."""
156
+
157
+ handler = deployment._handlers[task_id]
158
+ return TaskResult(task_id=task_id, history=[], result=await handler)
159
+
160
+ @deployments_router.get(f"/{name}/tasks", include_in_schema=False)
161
+ async def get_tasks() -> list[TaskDefinition]:
162
+ """Get all the tasks from all the sessions in a given deployment."""
163
+
164
+ tasks: list[TaskDefinition] = []
165
+ for task_id, handler in deployment._handlers.items():
166
+ if handler.is_done():
167
+ continue
168
+ tasks.append(
169
+ TaskDefinition(
170
+ task_id=task_id,
171
+ input=deployment._handler_inputs[task_id],
172
+ )
324
173
  )
325
174
 
326
- except Exception as e:
327
- logger.error(f"WebSocket proxy error: {e}")
328
- finally:
329
- try:
330
- await ws.close()
331
- except Exception as e:
332
- logger.debug(f"Error closing client connection: {e}")
175
+ return tasks
333
176
 
177
+ @deployments_router.get(f"/{name}/sessions", include_in_schema=False)
178
+ async def get_sessions() -> list[SessionDefinition]:
179
+ """Get the active sessions in a deployment and service."""
334
180
 
335
- @deployments_router.websocket("/{deployment_name}/ui/{path:path}")
336
- @deployments_router.websocket("/{deployment_name}/ui")
337
- async def websocket_proxy(
338
- websocket: WebSocket,
339
- deployment: Annotated[Deployment, Depends(deployment)],
340
- path: str | None = None,
341
- ) -> None:
342
- if deployment._config.ui is None:
343
- raise HTTPException(status_code=404, detail="Deployment has no ui configured")
181
+ return [SessionDefinition(session_id=k) for k in deployment._contexts.keys()]
344
182
 
345
- # Build the upstream WebSocket URL using FastAPI's extracted path parameter
346
- slash_path = f"/{path}" if path else ""
347
- upstream_path = f"/deployments/{deployment.name}/ui{slash_path}"
183
+ @deployments_router.get(f"/{name}/sessions/{{session_id}}", include_in_schema=False)
184
+ async def get_session(
185
+ session_id: str,
186
+ ) -> SessionDefinition:
187
+ """Get the definition of a session by ID."""
348
188
 
349
- # Convert to WebSocket URL
350
- upstream_url = f"ws://localhost:{deployment._config.ui.port}{upstream_path}"
351
- if websocket.url.query:
352
- upstream_url += f"?{websocket.url.query}"
189
+ return SessionDefinition(session_id=session_id)
353
190
 
354
- logger.debug(f"Proxying WebSocket {websocket.url} -> {upstream_url}")
191
+ @deployments_router.post(f"/{name}/sessions/create", include_in_schema=False)
192
+ async def create_session() -> SessionDefinition:
193
+ """Create a new session for a deployment."""
355
194
 
356
- await _ws_proxy(websocket, upstream_url)
357
-
358
-
359
- @deployments_router.api_route(
360
- "/{deployment_name}/ui/{path:path}",
361
- methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
362
- )
363
- @deployments_router.api_route(
364
- "/{deployment_name}/ui",
365
- methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
366
- )
367
- async def proxy(
368
- request: Request,
369
- deployment: Annotated[Deployment, Depends(deployment)],
370
- path: str | None = None,
371
- ) -> StreamingResponse:
372
- if deployment._config.ui is None:
373
- raise HTTPException(status_code=404, detail="Deployment has no ui configured")
374
-
375
- # Build the upstream URL using FastAPI's extracted path parameter
376
- slash_path = f"/{path}" if path else ""
377
- upstream_path = f"/deployments/{deployment.name}/ui{slash_path}"
378
-
379
- upstream_url = httpx.URL(
380
- f"http://localhost:{deployment._config.ui.port}{upstream_path}"
381
- ).copy_with(params=request.query_params)
382
-
383
- # Debug logging
384
- logger.debug(f"Proxying {request.method} {request.url} -> {upstream_url}")
385
-
386
- # Strip hop-by-hop headers + host
387
- hop_by_hop = {
388
- "connection",
389
- "keep-alive",
390
- "proxy-authenticate",
391
- "proxy-authorization",
392
- "te", # codespell:ignore
393
- "trailers",
394
- "transfer-encoding",
395
- "upgrade",
396
- "host",
397
- }
398
- headers = {k: v for k, v in request.headers.items() if k.lower() not in hop_by_hop}
399
-
400
- try:
401
- client = httpx.AsyncClient(timeout=None)
402
-
403
- req = client.build_request(
404
- request.method,
405
- upstream_url,
406
- headers=headers,
407
- content=request.stream(), # stream uploads
408
- )
409
- upstream = await client.send(req, stream=True)
195
+ workflow = deployment.default_service
196
+ if workflow is None:
197
+ raise HTTPException(
198
+ status_code=400,
199
+ detail="There is no default service for this deployment",
200
+ )
201
+ session_id = generate_id()
202
+ deployment._contexts[session_id] = Context(workflow)
410
203
 
411
- resp_headers = {
412
- k: v for k, v in upstream.headers.items() if k.lower() not in hop_by_hop
413
- }
204
+ return SessionDefinition(session_id=session_id)
414
205
 
415
- # Close client when upstream response is done
416
- async def cleanup() -> None:
417
- await upstream.aclose()
418
- await client.aclose()
206
+ @deployments_router.post(f"/{name}/sessions/delete", include_in_schema=False)
207
+ async def delete_session(
208
+ session_id: str,
209
+ ) -> None:
210
+ """Get the active sessions in a deployment and service."""
419
211
 
420
- return StreamingResponse(
421
- upstream.aiter_raw(), # stream downloads
422
- status_code=upstream.status_code,
423
- headers=resp_headers,
424
- background=BackgroundTask(cleanup), # tidy up when finished
425
- )
212
+ deployment._contexts.pop(session_id)
426
213
 
427
- except httpx.ConnectError:
428
- raise HTTPException(status_code=502, detail="Upstream server unavailable")
429
- except httpx.TimeoutException:
430
- raise HTTPException(status_code=504, detail="Upstream server timeout")
431
- except Exception as e:
432
- logger.error(f"Proxy error: {e}")
433
- raise HTTPException(status_code=502, detail="Proxy error")
214
+ return deployments_router
@@ -1,40 +1,13 @@
1
- import httpx
2
1
  from fastapi import APIRouter
3
- from fastapi.exceptions import HTTPException
4
- from fastapi.responses import PlainTextResponse
5
- from llama_deploy.appserver.server import manager
6
- from llama_deploy.appserver.settings import settings
7
2
  from llama_deploy.appserver.types import Status, StatusEnum
8
3
 
9
- status_router = APIRouter(
10
- prefix="/status",
4
+ health_router = APIRouter(
5
+ prefix="/health",
11
6
  )
12
7
 
13
8
 
14
- @status_router.get("/")
15
- async def status() -> Status:
9
+ @health_router.get("", include_in_schema=False)
10
+ async def health() -> Status:
16
11
  return Status(
17
12
  status=StatusEnum.HEALTHY,
18
- max_deployments=manager._max_deployments,
19
- deployments=list(manager._deployments.keys()),
20
- status_message="",
21
13
  )
22
-
23
-
24
- @status_router.get("/metrics")
25
- async def metrics() -> PlainTextResponse:
26
- """Proxies the Prometheus metrics endpoint through the API Server.
27
-
28
- This endpoint is mostly used in serverless environments where the LlamaDeploy
29
- container cannot expose more than one port (e.g. Knative, Google Cloud Run).
30
- If Prometheus is not enabled, this endpoint returns an empty HTTP-204 response.
31
- """
32
- if not settings.prometheus_enabled:
33
- return PlainTextResponse(status_code=204)
34
-
35
- try:
36
- async with httpx.AsyncClient() as client:
37
- response = await client.get(f"http://127.0.0.1:{settings.prometheus_port}/")
38
- return PlainTextResponse(content=response.text)
39
- except httpx.RequestError as exc:
40
- raise HTTPException(status_code=500, detail=str(exc))