llama-deploy-appserver 0.2.7a1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_deploy/appserver/app.py +274 -26
- llama_deploy/appserver/bootstrap.py +55 -25
- llama_deploy/appserver/configure_logging.py +189 -0
- llama_deploy/appserver/correlation_id.py +24 -0
- llama_deploy/appserver/deployment.py +70 -412
- llama_deploy/appserver/deployment_config_parser.py +12 -130
- llama_deploy/appserver/interrupts.py +55 -0
- llama_deploy/appserver/process_utils.py +214 -0
- llama_deploy/appserver/py.typed +0 -0
- llama_deploy/appserver/routers/__init__.py +4 -3
- llama_deploy/appserver/routers/deployments.py +163 -382
- llama_deploy/appserver/routers/status.py +4 -31
- llama_deploy/appserver/routers/ui_proxy.py +255 -0
- llama_deploy/appserver/settings.py +99 -49
- llama_deploy/appserver/types.py +0 -3
- llama_deploy/appserver/workflow_loader.py +431 -0
- llama_deploy/appserver/workflow_store/agent_data_store.py +100 -0
- llama_deploy/appserver/workflow_store/keyed_lock.py +32 -0
- llama_deploy/appserver/workflow_store/lru_cache.py +49 -0
- llama_deploy_appserver-0.3.0.dist-info/METADATA +25 -0
- llama_deploy_appserver-0.3.0.dist-info/RECORD +24 -0
- {llama_deploy_appserver-0.2.7a1.dist-info → llama_deploy_appserver-0.3.0.dist-info}/WHEEL +1 -1
- llama_deploy/appserver/__main__.py +0 -14
- llama_deploy/appserver/client/__init__.py +0 -3
- llama_deploy/appserver/client/base.py +0 -30
- llama_deploy/appserver/client/client.py +0 -49
- llama_deploy/appserver/client/models/__init__.py +0 -4
- llama_deploy/appserver/client/models/apiserver.py +0 -356
- llama_deploy/appserver/client/models/model.py +0 -82
- llama_deploy/appserver/run_autodeploy.py +0 -141
- llama_deploy/appserver/server.py +0 -60
- llama_deploy/appserver/source_managers/__init__.py +0 -5
- llama_deploy/appserver/source_managers/base.py +0 -33
- llama_deploy/appserver/source_managers/git.py +0 -48
- llama_deploy/appserver/source_managers/local.py +0 -51
- llama_deploy/appserver/tracing.py +0 -237
- llama_deploy_appserver-0.2.7a1.dist-info/METADATA +0 -23
- llama_deploy_appserver-0.2.7a1.dist-info/RECORD +0 -28
|
@@ -1,433 +1,214 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
|
-
import
|
|
4
|
-
from typing import Annotated, AsyncGenerator, List, Optional
|
|
3
|
+
from typing import AsyncGenerator
|
|
5
4
|
|
|
6
|
-
import httpx
|
|
7
|
-
import websockets
|
|
8
5
|
from fastapi import (
|
|
9
6
|
APIRouter,
|
|
10
|
-
Depends,
|
|
11
|
-
File,
|
|
12
7
|
HTTPException,
|
|
13
|
-
Request,
|
|
14
|
-
UploadFile,
|
|
15
|
-
WebSocket,
|
|
16
8
|
)
|
|
17
|
-
from fastapi.responses import JSONResponse, StreamingResponse
|
|
9
|
+
from fastapi.responses import JSONResponse, RedirectResponse, StreamingResponse
|
|
18
10
|
from llama_deploy.appserver.deployment import Deployment
|
|
19
|
-
from llama_deploy.appserver.deployment_config_parser import DeploymentConfig
|
|
20
|
-
from llama_deploy.appserver.server import manager
|
|
21
11
|
from llama_deploy.appserver.types import (
|
|
22
|
-
DeploymentDefinition,
|
|
23
12
|
EventDefinition,
|
|
24
13
|
SessionDefinition,
|
|
25
14
|
TaskDefinition,
|
|
26
15
|
TaskResult,
|
|
27
16
|
generate_id,
|
|
28
17
|
)
|
|
29
|
-
from
|
|
18
|
+
from llama_deploy.appserver.workflow_loader import DEFAULT_SERVICE_ID
|
|
30
19
|
from workflows import Context
|
|
31
20
|
from workflows.context import JsonSerializer
|
|
32
21
|
from workflows.handler import WorkflowHandler
|
|
33
22
|
|
|
34
|
-
deployments_router = APIRouter(
|
|
35
|
-
prefix="/deployments",
|
|
36
|
-
)
|
|
37
|
-
logger = logging.getLogger(__name__)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def deployment(deployment_name: str) -> Deployment:
|
|
41
|
-
"""FastAPI dependency to retrieve a Deployment instance"""
|
|
42
|
-
deployment = manager.get_deployment(deployment_name)
|
|
43
|
-
if deployment is None:
|
|
44
|
-
raise HTTPException(status_code=404, detail="Deployment not found")
|
|
45
|
-
return deployment
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
@deployments_router.get("/")
|
|
49
|
-
async def read_deployments() -> list[DeploymentDefinition]:
|
|
50
|
-
"""Returns a list of active deployments."""
|
|
51
|
-
return [DeploymentDefinition(name=k) for k in manager._deployments.keys()]
|
|
52
23
|
|
|
24
|
+
def create_base_router(name: str) -> APIRouter:
|
|
25
|
+
base_router = APIRouter(
|
|
26
|
+
prefix="",
|
|
27
|
+
)
|
|
53
28
|
|
|
54
|
-
@
|
|
55
|
-
async def
|
|
56
|
-
|
|
57
|
-
) -> DeploymentDefinition:
|
|
58
|
-
"""Returns the details of a specific deployment."""
|
|
59
|
-
|
|
60
|
-
return DeploymentDefinition(name=deployment.name)
|
|
61
|
-
|
|
29
|
+
@base_router.get("/", response_model=None, include_in_schema=False)
|
|
30
|
+
async def root() -> RedirectResponse:
|
|
31
|
+
return RedirectResponse(f"/deployments/{name}/")
|
|
62
32
|
|
|
63
|
-
|
|
64
|
-
async def create_deployment(
|
|
65
|
-
base_path: str = ".",
|
|
66
|
-
config_file: UploadFile = File(...),
|
|
67
|
-
reload: bool = False,
|
|
68
|
-
local: bool = False,
|
|
69
|
-
) -> DeploymentDefinition:
|
|
70
|
-
"""Creates a new deployment by uploading a configuration file."""
|
|
71
|
-
config = DeploymentConfig.from_yaml_bytes(await config_file.read())
|
|
72
|
-
await manager.deploy(config, base_path, reload, local)
|
|
33
|
+
return base_router
|
|
73
34
|
|
|
74
|
-
return DeploymentDefinition(name=config.name)
|
|
75
35
|
|
|
36
|
+
def create_deployments_router(name: str, deployment: Deployment) -> APIRouter:
|
|
37
|
+
deployments_router = APIRouter(
|
|
38
|
+
prefix="/deployments",
|
|
39
|
+
)
|
|
76
40
|
|
|
77
|
-
@deployments_router.post("/{
|
|
78
|
-
async def create_deployment_task(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
41
|
+
@deployments_router.post(f"/{name}/tasks/run", include_in_schema=False)
|
|
42
|
+
async def create_deployment_task(
|
|
43
|
+
task_definition: TaskDefinition,
|
|
44
|
+
session_id: str | None = None,
|
|
45
|
+
) -> JSONResponse:
|
|
46
|
+
"""Create a task for the deployment, wait for result and delete associated session."""
|
|
47
|
+
|
|
48
|
+
service_id = task_definition.service_id or DEFAULT_SERVICE_ID
|
|
49
|
+
|
|
50
|
+
if service_id not in deployment.service_names:
|
|
51
|
+
raise HTTPException(
|
|
52
|
+
status_code=404,
|
|
53
|
+
detail=(
|
|
54
|
+
"There is no default service for this deployment. service_id is required"
|
|
55
|
+
if not task_definition.service_id
|
|
56
|
+
else f"Service '{service_id}' not found in deployment 'deployment_name'"
|
|
57
|
+
),
|
|
58
|
+
)
|
|
84
59
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
status_code=400,
|
|
89
|
-
detail="Service is None and deployment has no default service",
|
|
60
|
+
run_kwargs = json.loads(task_definition.input) if task_definition.input else {}
|
|
61
|
+
result = await deployment.run_workflow(
|
|
62
|
+
service_id=service_id, session_id=session_id, **run_kwargs
|
|
90
63
|
)
|
|
64
|
+
return JSONResponse(result)
|
|
65
|
+
|
|
66
|
+
@deployments_router.post(f"/{name}/tasks/create", include_in_schema=False)
|
|
67
|
+
async def create_deployment_task_nowait(
|
|
68
|
+
task_definition: TaskDefinition,
|
|
69
|
+
session_id: str | None = None,
|
|
70
|
+
) -> TaskDefinition:
|
|
71
|
+
"""Create a task for the deployment but don't wait for result."""
|
|
72
|
+
service_id = task_definition.service_id or DEFAULT_SERVICE_ID
|
|
73
|
+
if service_id not in deployment.service_names:
|
|
74
|
+
raise HTTPException(
|
|
75
|
+
status_code=404,
|
|
76
|
+
detail=(
|
|
77
|
+
"There is no default service for this deployment. service_id is required"
|
|
78
|
+
if not task_definition.service_id
|
|
79
|
+
else f"Service '{service_id}' not found in deployment 'deployment_name'"
|
|
80
|
+
),
|
|
81
|
+
)
|
|
91
82
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
detail=f"Service '{task_definition.service_id}' not found in deployment 'deployment_name'",
|
|
83
|
+
run_kwargs = json.loads(task_definition.input) if task_definition.input else {}
|
|
84
|
+
handler_id, session_id = deployment.run_workflow_no_wait(
|
|
85
|
+
service_id=service_id, session_id=session_id, **run_kwargs
|
|
96
86
|
)
|
|
97
87
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
service_id=service_id, session_id=session_id, **run_kwargs
|
|
101
|
-
)
|
|
102
|
-
return JSONResponse(result)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
@deployments_router.post("/{deployment_name}/tasks/create")
|
|
106
|
-
async def create_deployment_task_nowait(
|
|
107
|
-
deployment: Annotated[Deployment, Depends(deployment)],
|
|
108
|
-
task_definition: TaskDefinition,
|
|
109
|
-
session_id: str | None = None,
|
|
110
|
-
) -> TaskDefinition:
|
|
111
|
-
"""Create a task for the deployment but don't wait for result."""
|
|
112
|
-
service_id = task_definition.service_id or deployment.default_service
|
|
113
|
-
if service_id is None:
|
|
114
|
-
raise HTTPException(
|
|
115
|
-
status_code=400,
|
|
116
|
-
detail="Service is None and deployment has no default service",
|
|
117
|
-
)
|
|
88
|
+
task_definition.session_id = session_id
|
|
89
|
+
task_definition.task_id = handler_id
|
|
118
90
|
|
|
119
|
-
|
|
120
|
-
raise HTTPException(
|
|
121
|
-
status_code=404,
|
|
122
|
-
detail=f"Service '{task_definition.service_id}' not found in deployment 'deployment_name'",
|
|
123
|
-
)
|
|
91
|
+
return task_definition
|
|
124
92
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
service_id=service_id, session_id=session_id, **run_kwargs
|
|
93
|
+
@deployments_router.post(
|
|
94
|
+
f"/{name}/tasks/{{task_id}}/events", include_in_schema=False
|
|
128
95
|
)
|
|
96
|
+
async def send_event(
|
|
97
|
+
task_id: str,
|
|
98
|
+
session_id: str,
|
|
99
|
+
event_def: EventDefinition,
|
|
100
|
+
) -> EventDefinition:
|
|
101
|
+
"""Send a human response event to a service for a specific task and session."""
|
|
102
|
+
ctx = deployment._contexts[session_id]
|
|
103
|
+
serializer = JsonSerializer()
|
|
104
|
+
event = serializer.deserialize(event_def.event_obj_str)
|
|
105
|
+
ctx.send_event(event)
|
|
129
106
|
|
|
130
|
-
|
|
131
|
-
task_definition.task_id = handler_id
|
|
132
|
-
|
|
133
|
-
return task_definition
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
@deployments_router.post("/{deployment_name}/tasks/{task_id}/events")
|
|
137
|
-
async def send_event(
|
|
138
|
-
deployment: Annotated[Deployment, Depends(deployment)],
|
|
139
|
-
task_id: str,
|
|
140
|
-
session_id: str,
|
|
141
|
-
event_def: EventDefinition,
|
|
142
|
-
) -> EventDefinition:
|
|
143
|
-
"""Send a human response event to a service for a specific task and session."""
|
|
144
|
-
ctx = deployment._contexts[session_id]
|
|
145
|
-
serializer = JsonSerializer()
|
|
146
|
-
event = serializer.deserialize(event_def.event_obj_str)
|
|
147
|
-
ctx.send_event(event)
|
|
148
|
-
|
|
149
|
-
return event_def
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
@deployments_router.get("/{deployment_name}/tasks/{task_id}/events")
|
|
153
|
-
async def get_events(
|
|
154
|
-
deployment: Annotated[Deployment, Depends(deployment)],
|
|
155
|
-
session_id: str,
|
|
156
|
-
task_id: str,
|
|
157
|
-
raw_event: bool = False,
|
|
158
|
-
) -> StreamingResponse:
|
|
159
|
-
"""
|
|
160
|
-
Get the stream of events from a given task and session.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
raw_event (bool, default=False): Whether to return the raw event object
|
|
164
|
-
or just the event data.
|
|
165
|
-
"""
|
|
107
|
+
return event_def
|
|
166
108
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
# this will hang indefinitely if done and queue is empty. Bail
|
|
170
|
-
if (
|
|
171
|
-
handler.is_done()
|
|
172
|
-
and handler.ctx is not None
|
|
173
|
-
and handler.ctx.streaming_queue.empty()
|
|
174
|
-
):
|
|
175
|
-
return
|
|
176
|
-
async for event in handler.stream_events():
|
|
177
|
-
data = json.loads(serializer.serialize(event))
|
|
178
|
-
if raw_event:
|
|
179
|
-
yield json.dumps(data) + "\n"
|
|
180
|
-
else:
|
|
181
|
-
yield json.dumps(data.get("value")) + "\n"
|
|
182
|
-
await asyncio.sleep(0.01)
|
|
183
|
-
await handler
|
|
184
|
-
|
|
185
|
-
return StreamingResponse(
|
|
186
|
-
event_stream(deployment._handlers[task_id]),
|
|
187
|
-
media_type="application/x-ndjson",
|
|
109
|
+
@deployments_router.get(
|
|
110
|
+
f"/{name}/tasks/{{task_id}}/events", include_in_schema=False
|
|
188
111
|
)
|
|
112
|
+
async def get_events(
|
|
113
|
+
session_id: str,
|
|
114
|
+
task_id: str,
|
|
115
|
+
raw_event: bool = False,
|
|
116
|
+
) -> StreamingResponse:
|
|
117
|
+
"""
|
|
118
|
+
Get the stream of events from a given task and session.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
raw_event (bool, default=False): Whether to return the raw event object
|
|
122
|
+
or just the event data.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
async def event_stream(handler: WorkflowHandler) -> AsyncGenerator[str, None]:
|
|
126
|
+
serializer = JsonSerializer()
|
|
127
|
+
# this will hang indefinitely if done and queue is empty. Bail
|
|
128
|
+
if (
|
|
129
|
+
handler.is_done()
|
|
130
|
+
and handler.ctx is not None
|
|
131
|
+
and handler.ctx.streaming_queue.empty()
|
|
132
|
+
):
|
|
133
|
+
return
|
|
134
|
+
async for event in handler.stream_events():
|
|
135
|
+
data = json.loads(serializer.serialize(event))
|
|
136
|
+
if raw_event:
|
|
137
|
+
yield json.dumps(data) + "\n"
|
|
138
|
+
else:
|
|
139
|
+
yield json.dumps(data.get("value")) + "\n"
|
|
140
|
+
await asyncio.sleep(0.01)
|
|
141
|
+
await handler
|
|
189
142
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
deployment: Annotated[Deployment, Depends(deployment)],
|
|
194
|
-
session_id: str,
|
|
195
|
-
task_id: str,
|
|
196
|
-
) -> TaskResult | None:
|
|
197
|
-
"""Get the task result associated with a task and session."""
|
|
198
|
-
|
|
199
|
-
handler = deployment._handlers[task_id]
|
|
200
|
-
return TaskResult(task_id=task_id, history=[], result=await handler)
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
@deployments_router.get("/{deployment_name}/tasks")
|
|
204
|
-
async def get_tasks(
|
|
205
|
-
deployment: Annotated[Deployment, Depends(deployment)],
|
|
206
|
-
) -> list[TaskDefinition]:
|
|
207
|
-
"""Get all the tasks from all the sessions in a given deployment."""
|
|
208
|
-
|
|
209
|
-
tasks: list[TaskDefinition] = []
|
|
210
|
-
for task_id, handler in deployment._handlers.items():
|
|
211
|
-
if handler.is_done():
|
|
212
|
-
continue
|
|
213
|
-
tasks.append(
|
|
214
|
-
TaskDefinition(
|
|
215
|
-
task_id=task_id,
|
|
216
|
-
input=deployment._handler_inputs[task_id],
|
|
217
|
-
)
|
|
143
|
+
return StreamingResponse(
|
|
144
|
+
event_stream(deployment._handlers[task_id]),
|
|
145
|
+
media_type="application/x-ndjson",
|
|
218
146
|
)
|
|
219
147
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
) ->
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
@deployments_router.get("/{
|
|
233
|
-
async def
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
)
|
|
245
|
-
"""Create a new session for a deployment."""
|
|
246
|
-
|
|
247
|
-
workflow = deployment._workflow_services[deployment.default_service]
|
|
248
|
-
session_id = generate_id()
|
|
249
|
-
deployment._contexts[session_id] = Context(workflow)
|
|
250
|
-
|
|
251
|
-
return SessionDefinition(session_id=session_id)
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
@deployments_router.post("/{deployment_name}/sessions/delete")
|
|
255
|
-
async def delete_session(
|
|
256
|
-
deployment: Annotated[Deployment, Depends(deployment)], session_id: str
|
|
257
|
-
) -> None:
|
|
258
|
-
"""Get the active sessions in a deployment and service."""
|
|
259
|
-
|
|
260
|
-
deployment._contexts.pop(session_id)
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
async def _ws_proxy(ws: WebSocket, upstream_url: str) -> None:
|
|
264
|
-
"""Proxy WebSocket connection to upstream server."""
|
|
265
|
-
await ws.accept()
|
|
266
|
-
|
|
267
|
-
# Forward most headers except WebSocket-specific ones
|
|
268
|
-
header_blacklist = {
|
|
269
|
-
"host",
|
|
270
|
-
"connection",
|
|
271
|
-
"upgrade",
|
|
272
|
-
"sec-websocket-key",
|
|
273
|
-
"sec-websocket-version",
|
|
274
|
-
"sec-websocket-extensions",
|
|
275
|
-
}
|
|
276
|
-
hdrs = [(k, v) for k, v in ws.headers.items() if k.lower() not in header_blacklist]
|
|
277
|
-
|
|
278
|
-
try:
|
|
279
|
-
# Parse subprotocols if present
|
|
280
|
-
subprotocols: Optional[List[websockets.Subprotocol]] = None
|
|
281
|
-
if "sec-websocket-protocol" in ws.headers:
|
|
282
|
-
# Parse comma-separated subprotocols
|
|
283
|
-
subprotocols = [
|
|
284
|
-
websockets.Subprotocol(p.strip())
|
|
285
|
-
for p in ws.headers["sec-websocket-protocol"].split(",")
|
|
286
|
-
]
|
|
287
|
-
|
|
288
|
-
# Open upstream WebSocket connection
|
|
289
|
-
async with websockets.connect(
|
|
290
|
-
upstream_url,
|
|
291
|
-
additional_headers=hdrs,
|
|
292
|
-
subprotocols=subprotocols,
|
|
293
|
-
open_timeout=None,
|
|
294
|
-
ping_interval=None,
|
|
295
|
-
) as upstream:
|
|
296
|
-
|
|
297
|
-
async def client_to_upstream() -> None:
|
|
298
|
-
try:
|
|
299
|
-
while True:
|
|
300
|
-
msg = await ws.receive()
|
|
301
|
-
if msg["type"] == "websocket.receive":
|
|
302
|
-
if "text" in msg:
|
|
303
|
-
await upstream.send(msg["text"])
|
|
304
|
-
elif "bytes" in msg:
|
|
305
|
-
await upstream.send(msg["bytes"])
|
|
306
|
-
elif msg["type"] == "websocket.disconnect":
|
|
307
|
-
break
|
|
308
|
-
except Exception as e:
|
|
309
|
-
logger.debug(f"Client to upstream connection ended: {e}")
|
|
310
|
-
|
|
311
|
-
async def upstream_to_client() -> None:
|
|
312
|
-
try:
|
|
313
|
-
async for message in upstream:
|
|
314
|
-
if isinstance(message, str):
|
|
315
|
-
await ws.send_text(message)
|
|
316
|
-
else:
|
|
317
|
-
await ws.send_bytes(message)
|
|
318
|
-
except Exception as e:
|
|
319
|
-
logger.debug(f"Upstream to client connection ended: {e}")
|
|
320
|
-
|
|
321
|
-
# Pump both directions concurrently
|
|
322
|
-
await asyncio.gather(
|
|
323
|
-
client_to_upstream(), upstream_to_client(), return_exceptions=True
|
|
148
|
+
@deployments_router.get(
|
|
149
|
+
f"/{name}/tasks/{{task_id}}/results", include_in_schema=False
|
|
150
|
+
)
|
|
151
|
+
async def get_task_result(
|
|
152
|
+
session_id: str,
|
|
153
|
+
task_id: str,
|
|
154
|
+
) -> TaskResult | None:
|
|
155
|
+
"""Get the task result associated with a task and session."""
|
|
156
|
+
|
|
157
|
+
handler = deployment._handlers[task_id]
|
|
158
|
+
return TaskResult(task_id=task_id, history=[], result=await handler)
|
|
159
|
+
|
|
160
|
+
@deployments_router.get(f"/{name}/tasks", include_in_schema=False)
|
|
161
|
+
async def get_tasks() -> list[TaskDefinition]:
|
|
162
|
+
"""Get all the tasks from all the sessions in a given deployment."""
|
|
163
|
+
|
|
164
|
+
tasks: list[TaskDefinition] = []
|
|
165
|
+
for task_id, handler in deployment._handlers.items():
|
|
166
|
+
if handler.is_done():
|
|
167
|
+
continue
|
|
168
|
+
tasks.append(
|
|
169
|
+
TaskDefinition(
|
|
170
|
+
task_id=task_id,
|
|
171
|
+
input=deployment._handler_inputs[task_id],
|
|
172
|
+
)
|
|
324
173
|
)
|
|
325
174
|
|
|
326
|
-
|
|
327
|
-
logger.error(f"WebSocket proxy error: {e}")
|
|
328
|
-
finally:
|
|
329
|
-
try:
|
|
330
|
-
await ws.close()
|
|
331
|
-
except Exception as e:
|
|
332
|
-
logger.debug(f"Error closing client connection: {e}")
|
|
175
|
+
return tasks
|
|
333
176
|
|
|
177
|
+
@deployments_router.get(f"/{name}/sessions", include_in_schema=False)
|
|
178
|
+
async def get_sessions() -> list[SessionDefinition]:
|
|
179
|
+
"""Get the active sessions in a deployment and service."""
|
|
334
180
|
|
|
335
|
-
|
|
336
|
-
@deployments_router.websocket("/{deployment_name}/ui")
|
|
337
|
-
async def websocket_proxy(
|
|
338
|
-
websocket: WebSocket,
|
|
339
|
-
deployment: Annotated[Deployment, Depends(deployment)],
|
|
340
|
-
path: str | None = None,
|
|
341
|
-
) -> None:
|
|
342
|
-
if deployment._config.ui is None:
|
|
343
|
-
raise HTTPException(status_code=404, detail="Deployment has no ui configured")
|
|
181
|
+
return [SessionDefinition(session_id=k) for k in deployment._contexts.keys()]
|
|
344
182
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
183
|
+
@deployments_router.get(f"/{name}/sessions/{{session_id}}", include_in_schema=False)
|
|
184
|
+
async def get_session(
|
|
185
|
+
session_id: str,
|
|
186
|
+
) -> SessionDefinition:
|
|
187
|
+
"""Get the definition of a session by ID."""
|
|
348
188
|
|
|
349
|
-
|
|
350
|
-
upstream_url = f"ws://localhost:{deployment._config.ui.port}{upstream_path}"
|
|
351
|
-
if websocket.url.query:
|
|
352
|
-
upstream_url += f"?{websocket.url.query}"
|
|
189
|
+
return SessionDefinition(session_id=session_id)
|
|
353
190
|
|
|
354
|
-
|
|
191
|
+
@deployments_router.post(f"/{name}/sessions/create", include_in_schema=False)
|
|
192
|
+
async def create_session() -> SessionDefinition:
|
|
193
|
+
"""Create a new session for a deployment."""
|
|
355
194
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
)
|
|
363
|
-
|
|
364
|
-
"/{deployment_name}/ui",
|
|
365
|
-
methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH"],
|
|
366
|
-
)
|
|
367
|
-
async def proxy(
|
|
368
|
-
request: Request,
|
|
369
|
-
deployment: Annotated[Deployment, Depends(deployment)],
|
|
370
|
-
path: str | None = None,
|
|
371
|
-
) -> StreamingResponse:
|
|
372
|
-
if deployment._config.ui is None:
|
|
373
|
-
raise HTTPException(status_code=404, detail="Deployment has no ui configured")
|
|
374
|
-
|
|
375
|
-
# Build the upstream URL using FastAPI's extracted path parameter
|
|
376
|
-
slash_path = f"/{path}" if path else ""
|
|
377
|
-
upstream_path = f"/deployments/{deployment.name}/ui{slash_path}"
|
|
378
|
-
|
|
379
|
-
upstream_url = httpx.URL(
|
|
380
|
-
f"http://localhost:{deployment._config.ui.port}{upstream_path}"
|
|
381
|
-
).copy_with(params=request.query_params)
|
|
382
|
-
|
|
383
|
-
# Debug logging
|
|
384
|
-
logger.debug(f"Proxying {request.method} {request.url} -> {upstream_url}")
|
|
385
|
-
|
|
386
|
-
# Strip hop-by-hop headers + host
|
|
387
|
-
hop_by_hop = {
|
|
388
|
-
"connection",
|
|
389
|
-
"keep-alive",
|
|
390
|
-
"proxy-authenticate",
|
|
391
|
-
"proxy-authorization",
|
|
392
|
-
"te", # codespell:ignore
|
|
393
|
-
"trailers",
|
|
394
|
-
"transfer-encoding",
|
|
395
|
-
"upgrade",
|
|
396
|
-
"host",
|
|
397
|
-
}
|
|
398
|
-
headers = {k: v for k, v in request.headers.items() if k.lower() not in hop_by_hop}
|
|
399
|
-
|
|
400
|
-
try:
|
|
401
|
-
client = httpx.AsyncClient(timeout=None)
|
|
402
|
-
|
|
403
|
-
req = client.build_request(
|
|
404
|
-
request.method,
|
|
405
|
-
upstream_url,
|
|
406
|
-
headers=headers,
|
|
407
|
-
content=request.stream(), # stream uploads
|
|
408
|
-
)
|
|
409
|
-
upstream = await client.send(req, stream=True)
|
|
195
|
+
workflow = deployment.default_service
|
|
196
|
+
if workflow is None:
|
|
197
|
+
raise HTTPException(
|
|
198
|
+
status_code=400,
|
|
199
|
+
detail="There is no default service for this deployment",
|
|
200
|
+
)
|
|
201
|
+
session_id = generate_id()
|
|
202
|
+
deployment._contexts[session_id] = Context(workflow)
|
|
410
203
|
|
|
411
|
-
|
|
412
|
-
k: v for k, v in upstream.headers.items() if k.lower() not in hop_by_hop
|
|
413
|
-
}
|
|
204
|
+
return SessionDefinition(session_id=session_id)
|
|
414
205
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
206
|
+
@deployments_router.post(f"/{name}/sessions/delete", include_in_schema=False)
|
|
207
|
+
async def delete_session(
|
|
208
|
+
session_id: str,
|
|
209
|
+
) -> None:
|
|
210
|
+
"""Get the active sessions in a deployment and service."""
|
|
419
211
|
|
|
420
|
-
|
|
421
|
-
upstream.aiter_raw(), # stream downloads
|
|
422
|
-
status_code=upstream.status_code,
|
|
423
|
-
headers=resp_headers,
|
|
424
|
-
background=BackgroundTask(cleanup), # tidy up when finished
|
|
425
|
-
)
|
|
212
|
+
deployment._contexts.pop(session_id)
|
|
426
213
|
|
|
427
|
-
|
|
428
|
-
raise HTTPException(status_code=502, detail="Upstream server unavailable")
|
|
429
|
-
except httpx.TimeoutException:
|
|
430
|
-
raise HTTPException(status_code=504, detail="Upstream server timeout")
|
|
431
|
-
except Exception as e:
|
|
432
|
-
logger.error(f"Proxy error: {e}")
|
|
433
|
-
raise HTTPException(status_code=502, detail="Proxy error")
|
|
214
|
+
return deployments_router
|
|
@@ -1,40 +1,13 @@
|
|
|
1
|
-
import httpx
|
|
2
1
|
from fastapi import APIRouter
|
|
3
|
-
from fastapi.exceptions import HTTPException
|
|
4
|
-
from fastapi.responses import PlainTextResponse
|
|
5
|
-
from llama_deploy.appserver.server import manager
|
|
6
|
-
from llama_deploy.appserver.settings import settings
|
|
7
2
|
from llama_deploy.appserver.types import Status, StatusEnum
|
|
8
3
|
|
|
9
|
-
|
|
10
|
-
prefix="/
|
|
4
|
+
health_router = APIRouter(
|
|
5
|
+
prefix="/health",
|
|
11
6
|
)
|
|
12
7
|
|
|
13
8
|
|
|
14
|
-
@
|
|
15
|
-
async def
|
|
9
|
+
@health_router.get("", include_in_schema=False)
|
|
10
|
+
async def health() -> Status:
|
|
16
11
|
return Status(
|
|
17
12
|
status=StatusEnum.HEALTHY,
|
|
18
|
-
max_deployments=manager._max_deployments,
|
|
19
|
-
deployments=list(manager._deployments.keys()),
|
|
20
|
-
status_message="",
|
|
21
13
|
)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@status_router.get("/metrics")
|
|
25
|
-
async def metrics() -> PlainTextResponse:
|
|
26
|
-
"""Proxies the Prometheus metrics endpoint through the API Server.
|
|
27
|
-
|
|
28
|
-
This endpoint is mostly used in serverless environments where the LlamaDeploy
|
|
29
|
-
container cannot expose more than one port (e.g. Knative, Google Cloud Run).
|
|
30
|
-
If Prometheus is not enabled, this endpoint returns an empty HTTP-204 response.
|
|
31
|
-
"""
|
|
32
|
-
if not settings.prometheus_enabled:
|
|
33
|
-
return PlainTextResponse(status_code=204)
|
|
34
|
-
|
|
35
|
-
try:
|
|
36
|
-
async with httpx.AsyncClient() as client:
|
|
37
|
-
response = await client.get(f"http://127.0.0.1:{settings.prometheus_port}/")
|
|
38
|
-
return PlainTextResponse(content=response.text)
|
|
39
|
-
except httpx.RequestError as exc:
|
|
40
|
-
raise HTTPException(status_code=500, detail=str(exc))
|