kalavai-client 0.6.16__py3-none-any.whl → 0.6.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalavai_client/__init__.py +1 -1
- kalavai_client/assets/apps.yaml +9 -7
- kalavai_client/assets/apps_values.yaml +26 -1
- kalavai_client/assets/pool_config_template.yaml +1 -0
- kalavai_client/assets/pool_config_values.yaml +1 -9
- kalavai_client/bridge_api.py +203 -89
- kalavai_client/bridge_models.py +2 -5
- kalavai_client/cli.py +38 -13
- kalavai_client/core.py +25 -13
- kalavai_client/utils.py +25 -1
- {kalavai_client-0.6.16.dist-info → kalavai_client-0.6.18.dist-info}/METADATA +3 -8
- kalavai_client-0.6.18.dist-info/RECORD +25 -0
- kalavai_client-0.6.16.dist-info/RECORD +0 -25
- {kalavai_client-0.6.16.dist-info → kalavai_client-0.6.18.dist-info}/LICENSE +0 -0
- {kalavai_client-0.6.16.dist-info → kalavai_client-0.6.18.dist-info}/WHEEL +0 -0
- {kalavai_client-0.6.16.dist-info → kalavai_client-0.6.18.dist-info}/entry_points.txt +0 -0
kalavai_client/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
|
2
|
-
__version__ = "0.6.
|
2
|
+
__version__ = "0.6.18"
|
kalavai_client/assets/apps.yaml
CHANGED
@@ -18,6 +18,8 @@ repositories:
|
|
18
18
|
url: https://opencost.github.io/opencost-helm-chart
|
19
19
|
- name: minio
|
20
20
|
url: https://charts.min.io/
|
21
|
+
- name: langfuse
|
22
|
+
url: https://langfuse.github.io/langfuse-k8s
|
21
23
|
|
22
24
|
releases:
|
23
25
|
- name: datashim
|
@@ -28,7 +30,7 @@ releases:
|
|
28
30
|
- name: lago
|
29
31
|
namespace: kalavai
|
30
32
|
chart: kalavai/lago
|
31
|
-
installed: {{
|
33
|
+
installed: {{deploy_lago}}
|
32
34
|
set:
|
33
35
|
- name: external.api.nodePort
|
34
36
|
value: 32000
|
@@ -79,7 +81,7 @@ releases:
|
|
79
81
|
namespace: kalavai
|
80
82
|
chart: kalavai/kalavai-helios
|
81
83
|
version: "0.1.11"
|
82
|
-
installed: false
|
84
|
+
installed: false
|
83
85
|
set:
|
84
86
|
- name: deployment.watcher_endpoint
|
85
87
|
value: "http://{{watcher_service}}"
|
@@ -94,11 +96,11 @@ releases:
|
|
94
96
|
- name: opencost
|
95
97
|
namespace: opencost
|
96
98
|
chart: opencost-charts/opencost
|
97
|
-
installed: {{
|
99
|
+
installed: {{deploy_opencost}}
|
98
100
|
- name: prometheus
|
99
101
|
namespace: prometheus-system
|
100
102
|
chart: prometheus/prometheus
|
101
|
-
installed: {{
|
103
|
+
installed: {{deploy_prometheus}}
|
102
104
|
set:
|
103
105
|
- name: prometheus-pushgateway.enabled
|
104
106
|
value: false
|
@@ -111,7 +113,7 @@ releases:
|
|
111
113
|
- name: kuberay
|
112
114
|
namespace: kuberay
|
113
115
|
chart: kuberay/kuberay-operator
|
114
|
-
installed:
|
116
|
+
installed: {{deploy_kuberay}}
|
115
117
|
version: "1.2.2"
|
116
118
|
- name: kuberay-apiserver
|
117
119
|
namespace: kuberay
|
@@ -152,7 +154,7 @@ releases:
|
|
152
154
|
- name: replicas
|
153
155
|
value: 1
|
154
156
|
- name: image_tag
|
155
|
-
value: "v2025.
|
157
|
+
value: "v2025.07.31"
|
156
158
|
- name: deployment.in_cluster
|
157
159
|
value: "True"
|
158
160
|
- name: deployment.kalavai_username_key
|
@@ -182,7 +184,7 @@ releases:
|
|
182
184
|
- name: nvidia-gpu-operator
|
183
185
|
namespace: kalavai
|
184
186
|
chart: kalavai/gpu
|
185
|
-
installed:
|
187
|
+
installed: false
|
186
188
|
- name: hami-vgpu
|
187
189
|
namespace: kalavai
|
188
190
|
chart: kalavai/hami
|
@@ -1,3 +1,28 @@
|
|
1
|
+
### APS ###
|
2
|
+
- name: deploy_lago
|
3
|
+
default: "False"
|
4
|
+
description: "Deploy Lago payment system"
|
5
|
+
|
6
|
+
- name: deploy_opencost
|
7
|
+
default: "False"
|
8
|
+
description: "Deploy Opencost cost monitoring system"
|
9
|
+
|
10
|
+
- name: deploy_prometheus
|
11
|
+
default: "False"
|
12
|
+
description: "Deploy Prometheus system monitoring system"
|
13
|
+
|
14
|
+
- name: deploy_langfuse
|
15
|
+
default: "False"
|
16
|
+
description: "Deploy Langfuse LLM tracing system"
|
17
|
+
|
18
|
+
- name: deploy_kuberay
|
19
|
+
default: "False"
|
20
|
+
description: "Deploy Langfuse LLM tracing system"
|
21
|
+
|
22
|
+
######
|
23
|
+
|
24
|
+
### VARIABLES ###
|
25
|
+
|
1
26
|
- name: kalavai_api_endpoint
|
2
27
|
default: https://platform.kalavai.net/_/api
|
3
28
|
description: ""
|
@@ -78,4 +103,4 @@
|
|
78
103
|
|
79
104
|
- name: minio_rootPassword
|
80
105
|
default: "password"
|
81
|
-
description: ""
|
106
|
+
description: ""
|
@@ -1,12 +1,4 @@
|
|
1
|
-
# STORAGE #
|
2
|
-
- name: storage_label_selector
|
3
|
-
default: "kalavai.storage.enabled:True"
|
4
|
-
description: ""
|
5
|
-
|
6
1
|
- name: storage_class_name
|
7
|
-
default: "longhorn
|
2
|
+
default: "longhorn"
|
8
3
|
description: ""
|
9
4
|
|
10
|
-
- name: storage_replicas
|
11
|
-
default: 1
|
12
|
-
description: ""
|
kalavai_client/bridge_api.py
CHANGED
@@ -2,10 +2,13 @@
|
|
2
2
|
Core kalavai service.
|
3
3
|
Used as a bridge between the kalavai-client app and the reflex frontend
|
4
4
|
"""
|
5
|
-
from fastapi import FastAPI, HTTPException, Depends
|
5
|
+
from fastapi import FastAPI, HTTPException, Depends, Query, Body
|
6
|
+
from typing import Optional, List
|
7
|
+
from fastapi_mcp import FastApiMCP
|
6
8
|
from starlette.requests import Request
|
7
9
|
import uvicorn
|
8
10
|
|
11
|
+
from kalavai_client.core import Job
|
9
12
|
from kalavai_client.bridge_models import (
|
10
13
|
CreatePoolRequest,
|
11
14
|
InvitesRequest,
|
@@ -15,8 +18,7 @@ from kalavai_client.bridge_models import (
|
|
15
18
|
DeleteJobRequest,
|
16
19
|
JobDetailsRequest,
|
17
20
|
NodesActionRequest,
|
18
|
-
NodeLabelsRequest
|
19
|
-
GetNodeLabelsRequest
|
21
|
+
NodeLabelsRequest
|
20
22
|
)
|
21
23
|
from kalavai_client.core import (
|
22
24
|
create_pool,
|
@@ -52,7 +54,10 @@ from kalavai_client.core import (
|
|
52
54
|
get_node_labels,
|
53
55
|
TokenType
|
54
56
|
)
|
55
|
-
from kalavai_client.utils import
|
57
|
+
from kalavai_client.utils import (
|
58
|
+
load_user_id,
|
59
|
+
extract_auth_token
|
60
|
+
)
|
56
61
|
|
57
62
|
app = FastAPI(
|
58
63
|
title="Kalavai Bridge API",
|
@@ -73,14 +78,16 @@ async def verify_api_key(request: Request):
|
|
73
78
|
user_id = load_user_id()
|
74
79
|
if user_id is None:
|
75
80
|
return None
|
76
|
-
api_key = request.headers
|
81
|
+
api_key = extract_auth_token(headers=request.headers)
|
77
82
|
if api_key != user_id:
|
78
83
|
raise HTTPException(status_code=401, detail="Request requires API Key")
|
79
84
|
return api_key
|
80
85
|
|
81
86
|
@app.post("/create_pool",
|
82
|
-
|
83
|
-
|
87
|
+
operation_id="create_pool",
|
88
|
+
summary="Create a new Kalavai compute pool",
|
89
|
+
tags=["pool_management"],
|
90
|
+
description="Creates a new distributed compute pool that allows multiple nodes to join and share GPU resources. The pool acts as a Kubernetes cluster where users can deploy and manage machine learning jobs across multiple devices.",
|
84
91
|
response_description="Result of pool creation")
|
85
92
|
def pool_create(request: CreatePoolRequest, api_key: str = Depends(verify_api_key)):
|
86
93
|
"""
|
@@ -109,8 +116,10 @@ def pool_create(request: CreatePoolRequest, api_key: str = Depends(verify_api_ke
|
|
109
116
|
return result
|
110
117
|
|
111
118
|
@app.post("/join_pool",
|
112
|
-
|
113
|
-
|
119
|
+
operation_id="join_pool",
|
120
|
+
summary="Join an existing Kalavai pool as a compute node",
|
121
|
+
description="Joins a running Kalavai pool by providing a valid join token. This endpoint registers the current machine as a compute node in the pool, making its GPU resources available for job scheduling. The node will receive workloads based on the pool's scheduling policy.",
|
122
|
+
tags=["pool_management"],
|
114
123
|
response_description="Result of joining the pool")
|
115
124
|
def pool_join(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
|
116
125
|
"""
|
@@ -130,8 +139,10 @@ def pool_join(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
|
|
130
139
|
return result
|
131
140
|
|
132
141
|
@app.post("/attach_to_pool",
|
133
|
-
|
134
|
-
|
142
|
+
operation_id="attach_to_pool",
|
143
|
+
summary="Attach to a pool for management purposes",
|
144
|
+
description="Attaches to an existing Kalavai pool for administrative and monitoring purposes without contributing compute resources. This is typically used by frontend applications or management tools that need to interact with the pool but don't provide GPU resources.",
|
145
|
+
tags=["pool_management"],
|
135
146
|
response_description="Result of attaching to the pool")
|
136
147
|
def pool_attach(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
|
137
148
|
"""
|
@@ -149,8 +160,10 @@ def pool_attach(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)
|
|
149
160
|
return result
|
150
161
|
|
151
162
|
@app.post("/stop_pool",
|
152
|
-
|
153
|
-
|
163
|
+
operation_id="stop_pool",
|
164
|
+
summary="Stop and clean up the current Kalavai pool",
|
165
|
+
description="Gracefully shuts down the current Kalavai pool, terminating all running jobs and optionally removing all compute nodes from the cluster. This operation is irreversible and will disconnect all nodes from the pool.",
|
166
|
+
tags=["pool_management"],
|
154
167
|
response_description="Result of stopping the pool")
|
155
168
|
def pool_stop(request: StopPoolRequest, api_key: str = Depends(verify_api_key)):
|
156
169
|
"""
|
@@ -164,8 +177,10 @@ def pool_stop(request: StopPoolRequest, api_key: str = Depends(verify_api_key)):
|
|
164
177
|
return result
|
165
178
|
|
166
179
|
@app.post("/delete_nodes",
|
167
|
-
|
168
|
-
|
180
|
+
operation_id="delete_nodes",
|
181
|
+
summary="Remove specific nodes from the pool",
|
182
|
+
description="Removes specified compute nodes from the Kalavai pool. This operation will terminate any jobs running on the target nodes and clean up their resources. Use with caution as it may interrupt running workloads.",
|
183
|
+
tags=["pool_management"],
|
169
184
|
response_description="Result of node deletion")
|
170
185
|
def device_delete(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
|
171
186
|
"""
|
@@ -179,8 +194,10 @@ def device_delete(request: NodesActionRequest, api_key: str = Depends(verify_api
|
|
179
194
|
return result
|
180
195
|
|
181
196
|
@app.post("/cordon_nodes",
|
182
|
-
|
183
|
-
|
197
|
+
operation_id="cordon_nodes",
|
198
|
+
summary="Mark nodes as unschedulable",
|
199
|
+
description="Marks specified nodes as unschedulable, preventing new jobs from being assigned to them while allowing existing jobs to complete. This is useful for maintenance operations or when you want to gradually remove nodes from the pool.",
|
200
|
+
tags=["pool_management"],
|
184
201
|
response_description="Result of cordoning nodes")
|
185
202
|
def device_cordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
|
186
203
|
"""
|
@@ -194,8 +211,10 @@ def device_cordon(request: NodesActionRequest, api_key: str = Depends(verify_api
|
|
194
211
|
return result
|
195
212
|
|
196
213
|
@app.post("/uncordon_nodes",
|
197
|
-
|
198
|
-
|
214
|
+
operation_id="uncordon_nodes",
|
215
|
+
summary="Mark nodes as schedulable again",
|
216
|
+
description="Re-enables job scheduling on previously cordoned nodes, allowing them to receive new workloads. This reverses the effect of the cordon operation.",
|
217
|
+
tags=["pool_management"],
|
199
218
|
response_description="Result of uncordoning nodes")
|
200
219
|
def device_uncordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
|
201
220
|
"""
|
@@ -209,8 +228,10 @@ def device_uncordon(request: NodesActionRequest, api_key: str = Depends(verify_a
|
|
209
228
|
return result
|
210
229
|
|
211
230
|
@app.get("/get_pool_token",
|
212
|
-
|
213
|
-
|
231
|
+
operation_id="get_pool_token",
|
232
|
+
summary="Generate a token for pool access",
|
233
|
+
description="Generates a secure token that can be used to join or attach to the current Kalavai pool. Different token types provide different levels of access - join tokens allow nodes to contribute resources, while attach tokens allow management access.",
|
234
|
+
tags=["auth"],
|
214
235
|
response_description="Pool token")
|
215
236
|
def get_token(mode: int, api_key: str = Depends(verify_api_key)):
|
216
237
|
"""
|
@@ -221,16 +242,20 @@ def get_token(mode: int, api_key: str = Depends(verify_api_key)):
|
|
221
242
|
return get_pool_token(mode=TokenType(mode))
|
222
243
|
|
223
244
|
@app.get("/fetch_devices",
|
224
|
-
|
225
|
-
|
245
|
+
operation_id="fetch_devices",
|
246
|
+
summary="Get list of all compute devices in the pool",
|
247
|
+
description="Retrieves information about all compute devices (nodes) currently connected to the Kalavai pool, including their status, available resources, and current workload distribution.",
|
248
|
+
tags=["info"],
|
226
249
|
response_description="List of devices")
|
227
250
|
def get_devices(api_key: str = Depends(verify_api_key)):
|
228
251
|
"""Get list of available devices"""
|
229
252
|
return fetch_devices()
|
230
253
|
|
231
254
|
@app.post("/send_pool_invites",
|
232
|
-
|
233
|
-
|
255
|
+
operation_id="send_pool_invites",
|
256
|
+
summary="Send invitations to join the pool",
|
257
|
+
description="Sends invitations to potential users or nodes to join the current Kalavai pool. Invitees will receive tokens that allow them to connect to the pool and contribute their resources.",
|
258
|
+
tags=["avoid"],
|
234
259
|
response_description="Result of sending invites")
|
235
260
|
def send_pool_invites(request: InvitesRequest, api_key: str = Depends(verify_api_key)):
|
236
261
|
"""
|
@@ -241,24 +266,30 @@ def send_pool_invites(request: InvitesRequest, api_key: str = Depends(verify_api
|
|
241
266
|
return send_invites(invitees=request.invitees)
|
242
267
|
|
243
268
|
@app.get("/fetch_resources",
|
244
|
-
|
245
|
-
|
269
|
+
operation_id="fetch_resources",
|
270
|
+
summary="Get resource utilization for specific nodes",
|
271
|
+
description="Retrieves detailed resource information (CPU, memory, GPU usage) for the pool; optionally for a list of specified nodes in the pool (as {'nodes': node_list}). This helps monitor resource utilization and plan workload distribution.",
|
272
|
+
tags=["info"],
|
246
273
|
response_description="Resource information")
|
247
|
-
def resources(api_key: str = Depends(verify_api_key)):
|
274
|
+
def resources(request: Optional[NodesActionRequest]=NodesActionRequest(), api_key: str = Depends(verify_api_key)):
|
248
275
|
"""Get available resources"""
|
249
|
-
return fetch_resources()
|
276
|
+
return fetch_resources(node_names=request.nodes)
|
250
277
|
|
251
278
|
@app.get("/fetch_job_names",
|
252
|
-
|
253
|
-
|
279
|
+
operation_id="fetch_job_names",
|
280
|
+
summary="Get list of all jobs (model deployments) in the pool",
|
281
|
+
description="Retrieves the names of all jobs and models currently deployed or scheduled in the Kalavai pool. This provides an overview of all workloads in the system.",
|
282
|
+
tags=["info"],
|
254
283
|
response_description="List of job names")
|
255
284
|
def job_names(api_key: str = Depends(verify_api_key)):
|
256
285
|
"""Get list of job names"""
|
257
286
|
return fetch_job_names()
|
258
287
|
|
259
288
|
@app.get("/fetch_gpus",
|
260
|
-
|
261
|
-
|
289
|
+
operation_id="fetch_gpus",
|
290
|
+
summary="Get GPU information across the pool",
|
291
|
+
description="Retrieves detailed information about all GPUs in the Kalavai pool, including their availability status, current utilization, and which jobs are using them. Can filter to show only available GPUs.",
|
292
|
+
tags=["info"],
|
262
293
|
response_description="List of GPUs")
|
263
294
|
def gpus(available: bool = False, api_key: str = Depends(verify_api_key)):
|
264
295
|
"""
|
@@ -269,26 +300,26 @@ def gpus(available: bool = False, api_key: str = Depends(verify_api_key)):
|
|
269
300
|
return fetch_gpus(available=available)
|
270
301
|
|
271
302
|
@app.post("/fetch_job_details",
|
272
|
-
|
273
|
-
|
303
|
+
operation_id="fetch_job_details",
|
304
|
+
summary="Get detailed information about specific job and model deployments",
|
305
|
+
description="Given a list of jobs (as {'jobs': [{'name': job_name}]}'), retrieves comprehensive information about specified jobs or models including their status, resource usage, runtime, and configuration. Useful for monitoring and debugging job execution.",
|
306
|
+
tags=["info"],
|
274
307
|
response_description="Job details")
|
275
308
|
def job_details(request: JobDetailsRequest, api_key: str = Depends(verify_api_key)):
|
276
|
-
"""
|
277
|
-
Get job details with the following parameters:
|
278
|
-
|
279
|
-
- **jobs**: List of jobs to get details for
|
280
|
-
"""
|
309
|
+
"""Get job details"""
|
281
310
|
return fetch_job_details(jobs=request.jobs)
|
282
311
|
|
283
312
|
@app.get("/fetch_job_logs",
|
284
|
-
|
285
|
-
|
313
|
+
operation_id="fetch_job_logs",
|
314
|
+
summary="Get execution logs for a specific job",
|
315
|
+
description="Retrieves the execution logs for a specified job, providing real-time or historical output from the job's containers. Useful for debugging, monitoring progress, and understanding job behavior.",
|
316
|
+
tags=["info", "avoid"],
|
286
317
|
response_description="Job logs")
|
287
318
|
def job_logs(
|
288
319
|
job_name: str,
|
289
|
-
force_namespace: str = None,
|
290
|
-
pod_name: str = None,
|
291
|
-
tail: int = 100,
|
320
|
+
force_namespace: str = Query(None),
|
321
|
+
pod_name: str = Query(None),
|
322
|
+
tail: int = Query(100),
|
292
323
|
api_key: str = Depends(verify_api_key)
|
293
324
|
):
|
294
325
|
"""
|
@@ -307,28 +338,68 @@ def job_logs(
|
|
307
338
|
)
|
308
339
|
|
309
340
|
@app.get("/fetch_job_templates",
|
310
|
-
|
311
|
-
|
341
|
+
operation_id="fetch_job_templates",
|
342
|
+
summary="Get available job templates",
|
343
|
+
description="Retrieves a list of all available job templates that can be used to deploy workloads. Templates provide predefined configurations for frameworks.",
|
344
|
+
tags=["info"],
|
312
345
|
response_description="List of job templates")
|
313
346
|
def job_templates(api_key: str = Depends(verify_api_key)):
|
314
|
-
"""Get available job templates"""
|
315
347
|
return fetch_job_templates()
|
316
348
|
|
349
|
+
@app.get("/fetch_model_templates",
|
350
|
+
operation_id="fetch_model_templates",
|
351
|
+
summary="Get available model engines templates",
|
352
|
+
description="Retrieves a list of all available model engine templates that can be used to deploy models. Templates provide predefined configurations for model engine frameworks.",
|
353
|
+
tags=["info"],
|
354
|
+
response_description="List of model engine templates")
|
355
|
+
def model_templates(api_key: str = Depends(verify_api_key)):
|
356
|
+
return fetch_job_templates(type="model")
|
357
|
+
|
317
358
|
@app.get("/fetch_job_defaults",
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
""
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
359
|
+
operation_id="fetch_job_defaults",
|
360
|
+
summary="Get default values for a job or model engine template deployment",
|
361
|
+
description="Retrieves the default values for a specific job or model engine template deployment. This helps users understand what parameters are required and what their default values are before deploying a job.",
|
362
|
+
tags=["info"],
|
363
|
+
response_description="Job and model engine default values")
|
364
|
+
def job_defaults(name: str, api_key: str = Depends(verify_api_key)):
|
365
|
+
result = fetch_job_defaults(name=name)
|
366
|
+
return result["defaults"]
|
367
|
+
|
368
|
+
@app.get("/fetch_job_metadata",
|
369
|
+
operation_id="fetch_job_metadata",
|
370
|
+
summary="Get metadata with information about a given job or model engine template deployment",
|
371
|
+
description="Retrieves the metadata associated with a specific job or model engine template deployment. This helps users understand what the template can be used for.",
|
372
|
+
tags=["info"],
|
373
|
+
response_description="Job and model engine metadata values")
|
374
|
+
def job_metadata(name: str, api_key: str = Depends(verify_api_key)):
|
375
|
+
result = fetch_job_defaults(name=name)
|
376
|
+
return result["metadata"]
|
377
|
+
|
378
|
+
@app.get("/fetch_job_rules",
|
379
|
+
operation_id="fetch_job_rules",
|
380
|
+
summary="Get the rules associated with the use of a given job or model engine template",
|
381
|
+
description="Retrieves the rules associated with a specific job or model engine template deployment. This helps users and AI agents determine if a given model engine template is adequate for the task.",
|
382
|
+
tags=["info"],
|
383
|
+
response_description="Job and model engine rules")
|
384
|
+
def job_rules(name: str, api_key: str = Depends(verify_api_key)):
|
385
|
+
result = job_metadata(name=name)
|
386
|
+
return result["template_rules"]
|
387
|
+
|
388
|
+
@app.get("/fetch_job_values_rules",
|
389
|
+
operation_id="fetch_job_values_rules",
|
390
|
+
summary="Get information on how to provide values to the parameters of a specific job or model engine template",
|
391
|
+
description="Retrieves information necessary to fill up the values required to deploy a specific job or model engine template. This helps users and AI agents generate the values dictionary for a job or model engine template deployment.",
|
392
|
+
tags=["info"],
|
393
|
+
response_description="Job and model engine info for values")
|
394
|
+
def job_values_rules(name: str, api_key: str = Depends(verify_api_key)):
|
395
|
+
result = job_metadata(name=name)
|
396
|
+
return result["values_rules"]
|
328
397
|
|
329
398
|
@app.post("/deploy_job",
|
330
|
-
|
331
|
-
|
399
|
+
operation_id="deploy_job",
|
400
|
+
summary="Deploy a new job to the pool",
|
401
|
+
description="Deploys a new job to the Kalavai pool using a specified template and configuration. The job will be scheduled on appropriate nodes based on resource availability and any specified target labels.",
|
402
|
+
tags=["job_management"],
|
332
403
|
response_description="Result of job deployment")
|
333
404
|
def job_deploy(request: DeployJobRequest, api_key: str = Depends(verify_api_key)):
|
334
405
|
"""
|
@@ -348,8 +419,10 @@ def job_deploy(request: DeployJobRequest, api_key: str = Depends(verify_api_key)
|
|
348
419
|
return result
|
349
420
|
|
350
421
|
@app.post("/delete_job",
|
351
|
-
|
352
|
-
|
422
|
+
operation_id="delete_job",
|
423
|
+
summary="Terminate and remove a job from the pool",
|
424
|
+
description="Terminates a running job and removes it from the Kalavai pool. This will stop all containers associated with the job and free up the resources they were using.",
|
425
|
+
tags=["job_management"],
|
353
426
|
response_description="Result of job deletion")
|
354
427
|
def job_delete(request: DeleteJobRequest, api_key: str = Depends(verify_api_key)):
|
355
428
|
"""
|
@@ -365,8 +438,10 @@ def job_delete(request: DeleteJobRequest, api_key: str = Depends(verify_api_key)
|
|
365
438
|
return result
|
366
439
|
|
367
440
|
@app.get("/authenticate_user",
|
368
|
-
|
369
|
-
|
441
|
+
operation_id="authenticate_user",
|
442
|
+
summary="Authenticate a user with the Kalavai system",
|
443
|
+
description="Authenticates a user against the Kalavai system, establishing their identity and permissions. This is required for accessing pool management features and deploying jobs.",
|
444
|
+
tags=["info", "auth"],
|
370
445
|
response_description="Authentication result")
|
371
446
|
def user_authenticate(user_id: str, api_key: str = Depends(verify_api_key)):
|
372
447
|
"""
|
@@ -380,8 +455,10 @@ def user_authenticate(user_id: str, api_key: str = Depends(verify_api_key)):
|
|
380
455
|
return result
|
381
456
|
|
382
457
|
@app.get("/load_user_session",
|
383
|
-
|
384
|
-
|
458
|
+
operation_id="load_user_session",
|
459
|
+
summary="Load current user session information",
|
460
|
+
description="Retrieves information about the currently authenticated user's session, including their identity, permissions, and any active connections to pools.",
|
461
|
+
tags=["info", "auth"],
|
385
462
|
response_description="User session information")
|
386
463
|
def user_session(api_key: str = Depends(verify_api_key)):
|
387
464
|
"""Load the current user session"""
|
@@ -389,8 +466,10 @@ def user_session(api_key: str = Depends(verify_api_key)):
|
|
389
466
|
return result
|
390
467
|
|
391
468
|
@app.get("/user_logout",
|
392
|
-
|
393
|
-
|
469
|
+
operation_id="user_logout",
|
470
|
+
summary="Log out the current user",
|
471
|
+
description="Terminates the current user's session and clears authentication credentials. This should be called when the user is done using the system to ensure proper cleanup.",
|
472
|
+
tags=["auth"],
|
394
473
|
response_description="Logout result")
|
395
474
|
def logout_user():
|
396
475
|
"""Log out the current user"""
|
@@ -398,8 +477,10 @@ def logout_user():
|
|
398
477
|
return result
|
399
478
|
|
400
479
|
@app.get("/is_connected",
|
401
|
-
|
402
|
-
|
480
|
+
operation_id="is_connected",
|
481
|
+
summary="Check if connected to a Kalavai pool",
|
482
|
+
description="Verifies whether the current instance is connected to a Kalavai pool. Returns connection status and pool information if connected.",
|
483
|
+
tags=["agent_management"],
|
403
484
|
response_description="Connection status")
|
404
485
|
def pool_connected():
|
405
486
|
"""Check if connected to a pool"""
|
@@ -407,8 +488,10 @@ def pool_connected():
|
|
407
488
|
return result
|
408
489
|
|
409
490
|
@app.get("/is_agent_running",
|
410
|
-
|
411
|
-
|
491
|
+
operation_id="is_agent_running",
|
492
|
+
summary="Check if the Kalavai agent is running",
|
493
|
+
description="Verifies whether the Kalavai agent service is currently running on this machine. The agent is responsible for managing pool connections and job execution.",
|
494
|
+
tags=["agent_management"],
|
412
495
|
response_description="Agent status")
|
413
496
|
def agent_running():
|
414
497
|
"""Check if the agent is running"""
|
@@ -416,8 +499,10 @@ def agent_running():
|
|
416
499
|
return result
|
417
500
|
|
418
501
|
@app.get("/is_server",
|
419
|
-
|
420
|
-
|
502
|
+
operation_id="is_server",
|
503
|
+
summary="Check if running as a pool server",
|
504
|
+
description="Determines whether this instance is running as a Kalavai pool server (coordinator) or as a client node. Server instances manage the pool while client instances contribute resources.",
|
505
|
+
tags=["agent_management"],
|
421
506
|
response_description="Server status")
|
422
507
|
def server():
|
423
508
|
"""Check if running as server"""
|
@@ -425,8 +510,10 @@ def server():
|
|
425
510
|
return result
|
426
511
|
|
427
512
|
@app.post("/pause_agent",
|
428
|
-
|
429
|
-
|
513
|
+
operation_id="pause_agent",
|
514
|
+
summary="Pause the Kalavai agent service",
|
515
|
+
description="Temporarily pauses the Kalavai agent, stopping it from accepting new jobs or participating in pool operations. Existing jobs will continue running until completion.",
|
516
|
+
tags=["agent_management"],
|
430
517
|
response_description="Result of pausing agent")
|
431
518
|
def agent_pause():
|
432
519
|
"""Pause the agent"""
|
@@ -434,8 +521,10 @@ def agent_pause():
|
|
434
521
|
return result
|
435
522
|
|
436
523
|
@app.post("/resume_agent",
|
437
|
-
|
438
|
-
|
524
|
+
operation_id="resume_agent",
|
525
|
+
summary="Resume the Kalavai agent service",
|
526
|
+
description="Resumes the previously paused Kalavai agent, allowing it to accept new jobs and participate in pool operations again.",
|
527
|
+
tags=["agent_management"],
|
439
528
|
response_description="Result of resuming agent")
|
440
529
|
def agent_resume():
|
441
530
|
"""Resume the agent"""
|
@@ -443,8 +532,10 @@ def agent_resume():
|
|
443
532
|
return result
|
444
533
|
|
445
534
|
@app.get("/get_ip_addresses",
|
446
|
-
|
447
|
-
|
535
|
+
operation_id="get_ip_addresses",
|
536
|
+
summary="Get available IP addresses for pool configuration",
|
537
|
+
description="Retrieves a list of available IP addresses that can be used for pool configuration. Optionally filters by subnet to help with network planning and pool setup.",
|
538
|
+
tags=["agent_management"],
|
448
539
|
response_description="List of IP addresses")
|
449
540
|
def ip_addresses(subnet: str = None, api_key: str = Depends(verify_api_key)):
|
450
541
|
"""
|
@@ -456,8 +547,10 @@ def ip_addresses(subnet: str = None, api_key: str = Depends(verify_api_key)):
|
|
456
547
|
return result
|
457
548
|
|
458
549
|
@app.get("/list_available_pools",
|
459
|
-
|
460
|
-
|
550
|
+
operation_id="list_available_pools",
|
551
|
+
summary="List all available Kalavai pools",
|
552
|
+
description="Retrieves a list of all Kalavai pools that are currently available for connection. Can filter to show only pools owned by the current user or all public pools.",
|
553
|
+
tags=["agent_management"],
|
461
554
|
response_description="List of available pools")
|
462
555
|
def pool_connected(user_only: bool = False, api_key: str = Depends(verify_api_key)):
|
463
556
|
"""
|
@@ -469,8 +562,10 @@ def pool_connected(user_only: bool = False, api_key: str = Depends(verify_api_ke
|
|
469
562
|
return result
|
470
563
|
|
471
564
|
@app.post("/add_node_labels",
|
472
|
-
|
473
|
-
|
565
|
+
operation_id="add_node_labels",
|
566
|
+
summary="Add custom labels to a compute node",
|
567
|
+
description="Adds custom labels to a specific compute node in the pool. Labels can be used for job scheduling, resource allocation, and organizational purposes. Labels are key-value pairs that help categorize and identify nodes.",
|
568
|
+
tags=["pool_management"],
|
474
569
|
response_description="Result of adding labels")
|
475
570
|
def node_labels(request: NodeLabelsRequest, api_key: str = Depends(verify_api_key)):
|
476
571
|
"""
|
@@ -485,21 +580,40 @@ def node_labels(request: NodeLabelsRequest, api_key: str = Depends(verify_api_ke
|
|
485
580
|
)
|
486
581
|
return result
|
487
582
|
|
488
|
-
@app.
|
489
|
-
|
490
|
-
|
583
|
+
@app.get("/get_node_labels",
|
584
|
+
operation_id="get_node_labels",
|
585
|
+
summary="Get labels for specified compute nodes",
|
586
|
+
description="Retrieves all labels associated with specified compute nodes in the pool. Labels provide metadata about nodes and can be used for filtering and scheduling decisions.",
|
587
|
+
tags=["info"],
|
491
588
|
response_description="Node labels")
|
492
|
-
def node_labels_get(request:
|
589
|
+
def node_labels_get(request: Optional[NodesActionRequest]=NodesActionRequest(), api_key: str = Depends(verify_api_key)):
|
493
590
|
"""
|
494
591
|
Get node labels with the following parameters:
|
495
592
|
|
496
593
|
- **node_names**: List of node names to get labels for
|
497
594
|
"""
|
498
595
|
result = get_node_labels(
|
499
|
-
node_names=request.
|
596
|
+
node_names=request.nodes
|
500
597
|
)
|
501
598
|
return result
|
502
599
|
|
600
|
+
### BUILD MCP WRAPPER ###
|
601
|
+
mcp = FastApiMCP(
|
602
|
+
app,
|
603
|
+
name="Protected MCP",
|
604
|
+
#exclude_operations=[],
|
605
|
+
exclude_tags=[
|
606
|
+
"auth",
|
607
|
+
"agent_management",
|
608
|
+
"job_management",
|
609
|
+
"pool_management",
|
610
|
+
"avoid"
|
611
|
+
]
|
612
|
+
)
|
613
|
+
mcp.mount()
|
614
|
+
##########################
|
615
|
+
|
616
|
+
|
503
617
|
def run_api(host="0.0.0.0", port=8001, log_level="critical"):
|
504
618
|
uvicorn.run(
|
505
619
|
app,
|
kalavai_client/bridge_models.py
CHANGED
@@ -18,7 +18,7 @@ class CreatePoolRequest(BaseModel):
|
|
18
18
|
description: str = Field("", description="Description of the pool")
|
19
19
|
|
20
20
|
class NodesActionRequest(BaseModel):
|
21
|
-
nodes: list[str] = Field(description="List of node names to perform the action on")
|
21
|
+
nodes: list[str] = Field(None, description="List of node names to perform the action on, defaults to None")
|
22
22
|
|
23
23
|
class JoinPoolRequest(BaseModel):
|
24
24
|
token: str = Field(description="Token to join the pool")
|
@@ -26,10 +26,10 @@ class JoinPoolRequest(BaseModel):
|
|
26
26
|
node_name: str = Field(None, description="Name of the node")
|
27
27
|
num_gpus: int = Field(None, description="Number of GPUs to allocate")
|
28
28
|
frontend: bool = Field(False, description="Whether this is a frontend request")
|
29
|
+
|
29
30
|
class JobDetailsRequest(BaseModel):
|
30
31
|
jobs: list[Job] = Field(description="List of jobs to get details for")
|
31
32
|
|
32
|
-
|
33
33
|
class StopPoolRequest(BaseModel):
|
34
34
|
skip_node_deletion: bool = Field(False, description="Whether to skip node deletion when stopping the pool")
|
35
35
|
|
@@ -46,6 +46,3 @@ class DeleteJobRequest(BaseModel):
|
|
46
46
|
class NodeLabelsRequest(BaseModel):
|
47
47
|
node_name: str = Field(description="Name of the node to add labels to")
|
48
48
|
labels: Dict[str, str] = Field(description="Dictionary of labels to add to the node")
|
49
|
-
|
50
|
-
class GetNodeLabelsRequest(BaseModel):
|
51
|
-
node_names: List[str] = Field(description="List of node names to get labels for")
|
kalavai_client/cli.py
CHANGED
@@ -207,28 +207,43 @@ def input_gpus(non_interactive=False):
|
|
207
207
|
@arguably.command
|
208
208
|
def gui__start(
|
209
209
|
*others,
|
210
|
-
gui_frontend_port=3000,
|
211
|
-
gui_backend_port=8000,
|
212
|
-
bridge_port=8001,
|
213
210
|
log_level="critical",
|
214
211
|
backend_only=False
|
215
212
|
):
|
216
213
|
"""Run GUI (docker) and kalavai core backend (api)"""
|
217
|
-
|
218
|
-
|
219
|
-
|
214
|
+
ports_needed = 1 if backend_only else 3
|
215
|
+
# find 3 available ports
|
216
|
+
ip = socket.gethostbyname (socket.gethostname())
|
217
|
+
ports = []
|
218
|
+
for port in range(49152,65535):
|
219
|
+
try:
|
220
|
+
serv = socket.socket(socket.AF_INET,socket.SOCK_STREAM) # create a new socket
|
221
|
+
serv.bind((ip, port)) # bind socket with address
|
222
|
+
serv.close()
|
223
|
+
ports.append(port)
|
224
|
+
except:
|
225
|
+
#port closed
|
226
|
+
pass
|
227
|
+
if len(ports) >= ports_needed:
|
228
|
+
break
|
220
229
|
|
230
|
+
if len(ports) < ports_needed:
|
231
|
+
# if not found, error
|
232
|
+
console.log(f"[red]Cannot initialise GUI: Could not find {ports_needed} free ports in your machine")
|
233
|
+
return
|
234
|
+
console.log(f"Using ports: {ports}")
|
235
|
+
|
221
236
|
user_key = load_user_id()
|
222
237
|
if user_key is not None:
|
223
238
|
console.log(f"[green]Using user key: {user_key}")
|
224
239
|
if not backend_only:
|
225
240
|
values = {
|
226
|
-
"gui_frontend_port":
|
227
|
-
"gui_backend_port":
|
228
|
-
"bridge_port":
|
241
|
+
"gui_frontend_port": ports[1],
|
242
|
+
"gui_backend_port": ports[2],
|
243
|
+
"bridge_port": ports[0],
|
229
244
|
"path": user_path("", create_path=True),
|
230
245
|
"protected_access": user_key
|
231
|
-
|
246
|
+
}
|
232
247
|
compose_yaml = load_template(
|
233
248
|
template_path=DOCKER_COMPOSE_GUI,
|
234
249
|
values=values)
|
@@ -237,11 +252,11 @@ def gui__start(
|
|
237
252
|
|
238
253
|
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} up -d")
|
239
254
|
|
240
|
-
console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{
|
255
|
+
console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{ports[1]}")
|
241
256
|
print(
|
242
257
|
"Deploying bridge API"
|
243
258
|
)
|
244
|
-
run_api(port=
|
259
|
+
run_api(port=ports[0], log_level=log_level)
|
245
260
|
|
246
261
|
if not backend_only:
|
247
262
|
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} down")
|
@@ -1096,12 +1111,22 @@ def job__defaults(template_name, *others):
|
|
1096
1111
|
return
|
1097
1112
|
|
1098
1113
|
# deploy template with kube-watcher
|
1099
|
-
|
1114
|
+
data = fetch_job_defaults(name=template_name)
|
1115
|
+
metadata = data["metadata"]
|
1116
|
+
defaults = data["defaults"]
|
1100
1117
|
if "error" in defaults:
|
1101
1118
|
console.log(f"[red]Error when fetching job defaults: {defaults}")
|
1102
1119
|
print(
|
1103
1120
|
json.dumps(defaults, indent=3)
|
1104
1121
|
)
|
1122
|
+
print(
|
1123
|
+
"*****************",
|
1124
|
+
"Metadata",
|
1125
|
+
"*****************"
|
1126
|
+
)
|
1127
|
+
print(
|
1128
|
+
json.dumps(metadata, indent=3)
|
1129
|
+
)
|
1105
1130
|
|
1106
1131
|
|
1107
1132
|
@arguably.command
|
kalavai_client/core.py
CHANGED
@@ -13,7 +13,6 @@ import re
|
|
13
13
|
|
14
14
|
from kalavai_client.cluster import CLUSTER
|
15
15
|
from kalavai_client.utils import (
|
16
|
-
DEPLOY_LLM_SIDECARS_KEY,
|
17
16
|
NODE_ROLE_LABEL,
|
18
17
|
check_gpu_drivers,
|
19
18
|
generate_join_token,
|
@@ -75,11 +74,12 @@ from kalavai_client.env import (
|
|
75
74
|
)
|
76
75
|
|
77
76
|
class Job(BaseModel):
|
78
|
-
owner: Optional[str] =
|
77
|
+
owner: Optional[str] = "default"
|
79
78
|
name: Optional[str] = None
|
80
79
|
workers: Optional[str] = None
|
81
80
|
endpoint: Optional[str] = None
|
82
81
|
status: Optional[str] = None
|
82
|
+
host_nodes: Optional[str] = None
|
83
83
|
|
84
84
|
class DeviceStatus(BaseModel):
|
85
85
|
name: str
|
@@ -198,19 +198,22 @@ def get_ip_addresses(subnet=None):
|
|
198
198
|
raise ValueError(f"No IPs available on subnet {subnet}")
|
199
199
|
return ips
|
200
200
|
|
201
|
-
def fetch_resources():
|
201
|
+
def fetch_resources(node_names: list[str]=None):
|
202
|
+
data = {}
|
203
|
+
if node_names is not None:
|
204
|
+
data["node_names"] = node_names
|
202
205
|
try:
|
203
206
|
total = request_to_server(
|
204
207
|
method="get",
|
205
208
|
endpoint="/v1/get_cluster_total_resources",
|
206
|
-
data=
|
209
|
+
data=data,
|
207
210
|
server_creds=USER_LOCAL_SERVER_FILE,
|
208
211
|
user_cookie=USER_COOKIE
|
209
212
|
)
|
210
213
|
available = request_to_server(
|
211
214
|
method="get",
|
212
215
|
endpoint="/v1/get_cluster_available_resources",
|
213
|
-
data=
|
216
|
+
data=data,
|
214
217
|
server_creds=USER_LOCAL_SERVER_FILE,
|
215
218
|
user_cookie=USER_COOKIE
|
216
219
|
)
|
@@ -224,24 +227,28 @@ def fetch_job_defaults(name):
|
|
224
227
|
"template": name
|
225
228
|
}
|
226
229
|
try:
|
227
|
-
|
230
|
+
metadata = request_to_server(
|
228
231
|
method="get",
|
229
232
|
endpoint="/v1/job_defaults",
|
230
233
|
data=data,
|
231
234
|
server_creds=USER_LOCAL_SERVER_FILE,
|
232
235
|
user_cookie=USER_COOKIE
|
233
236
|
)
|
234
|
-
return
|
237
|
+
return metadata
|
235
238
|
except Exception as e:
|
236
239
|
return {"error": str(e)}
|
237
240
|
|
238
|
-
def fetch_job_templates():
|
241
|
+
def fetch_job_templates(type: str=None):
|
242
|
+
data = None
|
243
|
+
if type is not None:
|
244
|
+
data = {"type": type}
|
239
245
|
try:
|
240
246
|
templates = request_to_server(
|
241
247
|
method="get",
|
242
248
|
endpoint="/v1/get_job_templates",
|
243
249
|
server_creds=USER_LOCAL_SERVER_FILE,
|
244
250
|
data=None,
|
251
|
+
params=data,
|
245
252
|
user_cookie=USER_COOKIE
|
246
253
|
)
|
247
254
|
return templates
|
@@ -296,14 +303,18 @@ def fetch_job_details(jobs: list[Job]):
|
|
296
303
|
)
|
297
304
|
workers_status = defaultdict(int)
|
298
305
|
restart_counts = 0
|
306
|
+
host_nodes = set()
|
299
307
|
for ns, ss in result.items():
|
300
308
|
if ns != namespace: # same job name, different namespace
|
301
309
|
continue
|
302
310
|
for _, values in ss.items():
|
303
|
-
# TODO: get nodes involved in deployment (needs kubewatcher)
|
304
311
|
if "conditions" in values and values["conditions"] is not None:
|
305
312
|
restart_counts = sum([c["restart_count"] for c in values["conditions"]])
|
306
313
|
workers_status[values["status"]] += 1
|
314
|
+
# get nodes involved in deployment (needs kubewatcher)
|
315
|
+
if "node_name" in values:
|
316
|
+
host_nodes.add(values["node_name"])
|
317
|
+
|
307
318
|
workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
|
308
319
|
if restart_counts > 0:
|
309
320
|
workers += f"\n({restart_counts} restart)"
|
@@ -320,7 +331,8 @@ def fetch_job_details(jobs: list[Job]):
|
|
320
331
|
server_creds=USER_LOCAL_SERVER_FILE,
|
321
332
|
user_cookie=USER_COOKIE
|
322
333
|
)
|
323
|
-
node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
|
334
|
+
#node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
|
335
|
+
node_ports = [f"{p['node_port']}" for s in result.values() for p in s["ports"]]
|
324
336
|
|
325
337
|
urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
|
326
338
|
if "Ready" in workers_status and len(workers_status) == 1:
|
@@ -338,7 +350,8 @@ def fetch_job_details(jobs: list[Job]):
|
|
338
350
|
name=deployment,
|
339
351
|
workers=workers,
|
340
352
|
endpoint="\n".join(urls),
|
341
|
-
status=str(status)
|
353
|
+
status=str(status),
|
354
|
+
host_nodes=" ".join(host_nodes))
|
342
355
|
)
|
343
356
|
|
344
357
|
except Exception as e:
|
@@ -802,8 +815,7 @@ def create_pool(
|
|
802
815
|
WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
|
803
816
|
WATCHER_SERVICE_KEY: watcher_service,
|
804
817
|
USER_NODE_LABEL_KEY: USER_NODE_LABEL,
|
805
|
-
ALLOW_UNREGISTERED_USER_KEY: True, # Change this if only registered users are allowed
|
806
|
-
DEPLOY_LLM_SIDECARS_KEY: location is not None
|
818
|
+
ALLOW_UNREGISTERED_USER_KEY: True, # Change this if only registered users are allowed
|
807
819
|
}
|
808
820
|
|
809
821
|
store_server_info(
|
kalavai_client/utils.py
CHANGED
@@ -38,7 +38,6 @@ CLUSTER_NAME_KEY = "cluster_name"
|
|
38
38
|
AUTH_KEY = "watcher_admin_key"
|
39
39
|
WRITE_AUTH_KEY = "watcher_write_key"
|
40
40
|
ALLOW_UNREGISTERED_USER_KEY = "watcher_allow_unregistered_user"
|
41
|
-
DEPLOY_LLM_SIDECARS_KEY = "deploy_llm_sidecars"
|
42
41
|
NODE_ROLE_LABEL = "kalavai.node_role"
|
43
42
|
USER_API_KEY = "user_api_key"
|
44
43
|
READONLY_AUTH_KEY = "watcher_readonly_key"
|
@@ -106,6 +105,29 @@ def is_storage_compatible():
|
|
106
105
|
return False
|
107
106
|
################
|
108
107
|
|
108
|
+
def extract_auth_token(headers):
|
109
|
+
"""
|
110
|
+
Extract auth token. Valid headers:
|
111
|
+
X-API-KEY: token
|
112
|
+
X-API-Key: token
|
113
|
+
Authorization: Bearer token
|
114
|
+
authorization: Bearer token
|
115
|
+
"""
|
116
|
+
#return headers.get("X-API-Key")
|
117
|
+
bearer = None
|
118
|
+
try:
|
119
|
+
for header in ["Authorization", "authorization", "X-API-KEY", "X-API-Key"]:
|
120
|
+
bearer = headers.get(header, None)
|
121
|
+
if bearer is not None:
|
122
|
+
break
|
123
|
+
if bearer is not None and " " in bearer:
|
124
|
+
return bearer.split()[-1]
|
125
|
+
else:
|
126
|
+
return bearer
|
127
|
+
except Exception as e:
|
128
|
+
return {"error": str(e)}
|
129
|
+
|
130
|
+
|
109
131
|
def generate_compose_config(role, node_name, target_platform="amd64", write_to_file=True, node_ip_address="0.0.0.0", num_gpus=0, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
|
110
132
|
|
111
133
|
if node_labels is not None:
|
@@ -269,6 +291,7 @@ def request_to_server(
|
|
269
291
|
endpoint,
|
270
292
|
data,
|
271
293
|
server_creds,
|
294
|
+
params=None,
|
272
295
|
force_url=None,
|
273
296
|
force_key=None,
|
274
297
|
user_cookie=None,
|
@@ -297,6 +320,7 @@ def request_to_server(
|
|
297
320
|
method=method,
|
298
321
|
url=f"http://{service_url}{endpoint}",
|
299
322
|
json=data,
|
323
|
+
params=params,
|
300
324
|
headers=headers,
|
301
325
|
timeout=timeout
|
302
326
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: kalavai-client
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.18
|
4
4
|
Summary: Client app for kalavai platform
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: LLM,platform
|
@@ -8,15 +8,9 @@ Author: Carlos Fernandez Musoles
|
|
8
8
|
Author-email: carlos@kalavai.net
|
9
9
|
Maintainer: Carlos Fernandez Musoles
|
10
10
|
Maintainer-email: carlos@kalavai.net
|
11
|
-
Requires-Python: >=3.
|
11
|
+
Requires-Python: >=3.10
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
14
|
-
Classifier: Programming Language :: Python :: 3.4
|
15
|
-
Classifier: Programming Language :: Python :: 3.5
|
16
|
-
Classifier: Programming Language :: Python :: 3.6
|
17
|
-
Classifier: Programming Language :: Python :: 3.7
|
18
|
-
Classifier: Programming Language :: Python :: 3.8
|
19
|
-
Classifier: Programming Language :: Python :: 3.9
|
20
14
|
Classifier: Programming Language :: Python :: 3.10
|
21
15
|
Classifier: Programming Language :: Python :: 3.11
|
22
16
|
Classifier: Programming Language :: Python :: 3.12
|
@@ -26,6 +20,7 @@ Requires-Dist: Pillow (==10.3.0)
|
|
26
20
|
Requires-Dist: arguably (>=1.2.5)
|
27
21
|
Requires-Dist: build ; extra == "dev"
|
28
22
|
Requires-Dist: fastapi (==0.115.8)
|
23
|
+
Requires-Dist: fastapi-mcp (==0.3.0)
|
29
24
|
Requires-Dist: importlib_resources (==6.5.2)
|
30
25
|
Requires-Dist: jinja2 (==3.1.4)
|
31
26
|
Requires-Dist: netifaces (==0.11.0)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
kalavai_client/__init__.py,sha256=ErdtY8HWYl_n6MmTR8hlH878NUP9glayQd1egl02vKY,23
|
2
|
+
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
+
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
kalavai_client/assets/apps.yaml,sha256=17JuXSv-Qj5Az6ZTRyiEaQXVbI325uTrZzKk2irts2g,6410
|
5
|
+
kalavai_client/assets/apps_values.yaml,sha256=LeSNd3PwkIx0wkTIlEk2KNz3Yy4sXSaHALQEkopdhKE,2165
|
6
|
+
kalavai_client/assets/docker-compose-gui.yaml,sha256=shqN78YLw0QP7bqTKveI4ppz5E-5b1JowmsSB4OG3nA,778
|
7
|
+
kalavai_client/assets/docker-compose-template.yaml,sha256=KHIwJ2WWX7Y7wQKiXRr82Jqd3IKRyls5zhTyl8mSmrc,1805
|
8
|
+
kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
|
9
|
+
kalavai_client/assets/pool_config_template.yaml,sha256=MhBZQsEMKrBgbUVSKgIGmXWhybeGKG6l5XvJb38y5GI,577
|
10
|
+
kalavai_client/assets/pool_config_values.yaml,sha256=_iAnugramLiwJaaDcPSetThvOdR7yFiCffdMri-SQCU,68
|
11
|
+
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
12
|
+
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
13
|
+
kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
|
14
|
+
kalavai_client/bridge_api.py,sha256=Hd7whTX2TAiNYX1G237hv2rqtKUBGRJkzUoWOMZm44A,25562
|
15
|
+
kalavai_client/bridge_models.py,sha256=3mHCqIHVysLLkQvGT-DKqKOrtAlQSfEOdrwSq2yTRRU,2439
|
16
|
+
kalavai_client/cli.py,sha256=SzKG7_ZG0ehMQsECQRWSvqj2Fju2Gd5O7uBa60bFBAY,47830
|
17
|
+
kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
|
18
|
+
kalavai_client/core.py,sha256=weg54lc03gp2qGwEXl90XEnXGdwFFlaTqZjxyKsngj4,34765
|
19
|
+
kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
|
20
|
+
kalavai_client/utils.py,sha256=GeX1rKUdlQoOW_K2relER8jRQEN1M0UdhsLKOkv5D_g,13428
|
21
|
+
kalavai_client-0.6.18.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
+
kalavai_client-0.6.18.dist-info/METADATA,sha256=nCvnC5f8QM1sHV4wk3HI9YjH0c_vkpOIkcVpMFIKEx0,12393
|
23
|
+
kalavai_client-0.6.18.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
24
|
+
kalavai_client-0.6.18.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
25
|
+
kalavai_client-0.6.18.dist-info/RECORD,,
|
@@ -1,25 +0,0 @@
|
|
1
|
-
kalavai_client/__init__.py,sha256=1--FABNdIxbiNn1wQox38stjIswkk1wPeOgoYJXMsNU,23
|
2
|
-
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
-
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
kalavai_client/assets/apps.yaml,sha256=Qe3RtY4kQbzZnF9K724FUbtqnkuCGfNUfK-WWtamATg,6365
|
5
|
-
kalavai_client/assets/apps_values.yaml,sha256=WRew3bS1MztjzcJfphuJcKn0n2T1ICRupPpr_Csjt_s,1644
|
6
|
-
kalavai_client/assets/docker-compose-gui.yaml,sha256=shqN78YLw0QP7bqTKveI4ppz5E-5b1JowmsSB4OG3nA,778
|
7
|
-
kalavai_client/assets/docker-compose-template.yaml,sha256=KHIwJ2WWX7Y7wQKiXRr82Jqd3IKRyls5zhTyl8mSmrc,1805
|
8
|
-
kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
|
9
|
-
kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
|
10
|
-
kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
|
11
|
-
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
12
|
-
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
13
|
-
kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
|
14
|
-
kalavai_client/bridge_api.py,sha256=O65aIh5lUl0KldRekHzLC-xdv1YJmrR14kt5-3UgCco,15351
|
15
|
-
kalavai_client/bridge_models.py,sha256=5ALGbkb6cxKwXbrzeTa9ja0kiZkJBvnY3J1IsmXTn0U,2540
|
16
|
-
kalavai_client/cli.py,sha256=_LK5OrCM5PYcYZo7lwXyfI3mlNzLFhL-BicKYbJkxeY,47123
|
17
|
-
kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
|
18
|
-
kalavai_client/core.py,sha256=R8UBTTzMHVPHuM9nB70cIxUxVCHyBspEq1cAWH1OyOQ,34304
|
19
|
-
kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
|
20
|
-
kalavai_client/utils.py,sha256=kGtfEuXVG5LgMJk289ksFgYrsMHwKXN7yvS5wCIou8s,12781
|
21
|
-
kalavai_client-0.6.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
-
kalavai_client-0.6.16.dist-info/METADATA,sha256=K5mzqy8pSDdK6WWFSt8YZNTJLENfeV3OOGELq417dYs,12655
|
23
|
-
kalavai_client-0.6.16.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
24
|
-
kalavai_client-0.6.16.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
25
|
-
kalavai_client-0.6.16.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|