kalavai-client 0.6.13__tar.gz → 0.6.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/PKG-INFO +1 -1
  2. kalavai_client-0.6.14/kalavai_client/__init__.py +2 -0
  3. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/apps.yaml +1 -1
  4. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/docker-compose-template.yaml +4 -0
  5. kalavai_client-0.6.14/kalavai_client/bridge_api.py +518 -0
  6. kalavai_client-0.6.14/kalavai_client/bridge_models.py +53 -0
  7. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/utils.py +1 -0
  8. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/pyproject.toml +1 -1
  9. kalavai_client-0.6.13/kalavai_client/__init__.py +0 -2
  10. kalavai_client-0.6.13/kalavai_client/bridge_api.py +0 -276
  11. kalavai_client-0.6.13/kalavai_client/bridge_models.py +0 -53
  12. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/LICENSE +0 -0
  13. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/README.md +0 -0
  14. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/__main__.py +0 -0
  15. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/__init__.py +0 -0
  16. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/apps_values.yaml +0 -0
  17. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/docker-compose-gui.yaml +0 -0
  18. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/nginx.conf +0 -0
  19. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/pool_config_template.yaml +0 -0
  20. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/pool_config_values.yaml +0 -0
  21. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/user_workspace.yaml +0 -0
  22. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/assets/user_workspace_values.yaml +0 -0
  23. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/auth.py +0 -0
  24. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/cli.py +0 -0
  25. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/cluster.py +0 -0
  26. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/core.py +0 -0
  27. {kalavai_client-0.6.13 → kalavai_client-0.6.14}/kalavai_client/env.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.6.13
3
+ Version: 0.6.14
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = "0.6.14"
@@ -152,7 +152,7 @@ releases:
152
152
  - name: replicas
153
153
  value: 1
154
154
  - name: image_tag
155
- value: "v2025.05.2"
155
+ value: "v2025.06.6"
156
156
  - name: deployment.in_cluster
157
157
  value: "True"
158
158
  - name: deployment.kalavai_username_key
@@ -18,6 +18,7 @@ services:
18
18
  {%if command %}
19
19
  {{service_name}}:
20
20
  image: docker.io/bundenth/kalavai-runner:gpu-latest
21
+ pull_policy: always
21
22
  container_name: {{service_name}}
22
23
  {% if vpn %}
23
24
  depends_on:
@@ -35,6 +36,9 @@ services:
35
36
  {% endif %}
36
37
  --node_name="{{node_name}}"
37
38
  --node_ip="{{node_ip_address}}"
39
+ {% if random_suffix %}
40
+ --random_suffix="{{random_suffix}}"
41
+ {% endif %}
38
42
  {% if command == "server" %}
39
43
  --port_range="30000-32767"
40
44
  {% else %}
@@ -0,0 +1,518 @@
1
+ """
2
+ Core kalavai service.
3
+ Used as a bridge between the kalavai-client app and the reflex frontend
4
+ """
5
+ from fastapi import FastAPI, HTTPException, Depends
6
+ from starlette.requests import Request
7
+ import uvicorn
8
+
9
+ from kalavai_client.bridge_models import (
10
+ CreatePoolRequest,
11
+ InvitesRequest,
12
+ JoinPoolRequest,
13
+ StopPoolRequest,
14
+ DeployJobRequest,
15
+ DeleteJobRequest,
16
+ JobDetailsRequest,
17
+ NodesActionRequest,
18
+ NodeLabelsRequest,
19
+ GetNodeLabelsRequest
20
+ )
21
+ from kalavai_client.core import (
22
+ create_pool,
23
+ join_pool,
24
+ attach_to_pool,
25
+ send_invites,
26
+ stop_pool,
27
+ fetch_devices,
28
+ fetch_resources,
29
+ fetch_job_names,
30
+ fetch_gpus,
31
+ fetch_job_details,
32
+ fetch_job_logs,
33
+ fetch_job_templates,
34
+ fetch_job_defaults,
35
+ deploy_job,
36
+ delete_job,
37
+ authenticate_user,
38
+ load_user_session,
39
+ user_logout,
40
+ is_connected,
41
+ list_available_pools,
42
+ is_agent_running,
43
+ is_server,
44
+ pause_agent,
45
+ resume_agent,
46
+ get_ip_addresses,
47
+ get_pool_token,
48
+ delete_nodes,
49
+ cordon_nodes,
50
+ uncordon_nodes,
51
+ add_node_labels,
52
+ get_node_labels,
53
+ TokenType
54
+ )
55
+ from kalavai_client.utils import load_user_id
56
+
57
+ app = FastAPI(
58
+ title="Kalavai Bridge API",
59
+ description="API for managing Kalavai pools, jobs, and nodes",
60
+ version="1.0.0",
61
+ docs_url="/docs",
62
+ redoc_url="/redoc",
63
+ )
64
+
65
+ ################################
66
+ ## API Key Validation methods ##
67
+ ################################
68
+ async def verify_api_key(request: Request):
69
+ """
70
+ Verify the API key from the request headers.
71
+ The API key must match the user ID.
72
+ """
73
+ user_id = load_user_id()
74
+ if user_id is None:
75
+ return None
76
+ api_key = request.headers.get("X-API-KEY")
77
+ if api_key != user_id:
78
+ raise HTTPException(status_code=401, detail="Request requires API Key")
79
+ return api_key
80
+
81
+ @app.post("/create_pool",
82
+ summary="Create a new pool",
83
+ description="Creates a new pool with the specified configuration",
84
+ response_description="Result of pool creation")
85
+ def pool_create(request: CreatePoolRequest, api_key: str = Depends(verify_api_key)):
86
+ """
87
+ Create a new pool with the following parameters:
88
+
89
+ - **cluster_name**: Name of the cluster
90
+ - **ip_address**: IP address for the pool
91
+ - **app_values**: Application configuration values
92
+ - **num_gpus**: Number of GPUs to allocate
93
+ - **node_name**: Name of the node
94
+ - **only_registered_users**: Whether to restrict to registered users
95
+ - **location**: Location of the pool
96
+ - **description**: Pool description
97
+ - **token_mode**: Token type for authentication
98
+ - **frontend**: Whether this is a frontend request
99
+ """
100
+ result = create_pool(
101
+ cluster_name=request.cluster_name,
102
+ ip_address=request.ip_address,
103
+ app_values=request.app_values,
104
+ num_gpus=request.num_gpus,
105
+ node_name=request.node_name,
106
+ only_registered_users=request.only_registered_users,
107
+ location=request.location,
108
+ description=request.description,
109
+ token_mode=request.token_mode,
110
+ frontend=request.frontend
111
+ )
112
+ return result
113
+
114
+ @app.post("/join_pool",
115
+ summary="Join an existing pool",
116
+ description="Join a pool using a token",
117
+ response_description="Result of joining the pool")
118
+ def pool_join(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
119
+ """
120
+ Join a pool with the following parameters:
121
+
122
+ - **token**: Pool join token
123
+ - **ip_address**: IP address for the node
124
+ - **node_name**: Name of the node
125
+ - **num_gpus**: Number of GPUs to allocate
126
+ - **frontend**: Whether this is a frontend request
127
+ """
128
+ result = join_pool(
129
+ token=request.token,
130
+ num_gpus=request.num_gpus,
131
+ node_name=request.node_name,
132
+ ip_address=request.ip_address,
133
+ frontend=request.frontend
134
+ )
135
+ return result
136
+
137
+ @app.post("/attach_to_pool",
138
+ summary="Attach to an existing pool",
139
+ description="Attach to a pool using a token",
140
+ response_description="Result of attaching to the pool")
141
+ def pool_attach(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
142
+ """
143
+ Attach to a pool with the following parameters:
144
+
145
+ - **token**: Pool token
146
+ - **node_name**: Name of the node
147
+ - **frontend**: Whether this is a frontend request
148
+ """
149
+ result = attach_to_pool(
150
+ token=request.token,
151
+ node_name=request.node_name,
152
+ frontend=request.frontend
153
+ )
154
+ return result
155
+
156
+ @app.post("/stop_pool",
157
+ summary="Stop a pool",
158
+ description="Stop the current pool",
159
+ response_description="Result of stopping the pool")
160
+ def pool_stop(request: StopPoolRequest, api_key: str = Depends(verify_api_key)):
161
+ """
162
+ Stop the pool with the following parameters:
163
+
164
+ - **skip_node_deletion**: Whether to skip node deletion
165
+ """
166
+ result = stop_pool(
167
+ skip_node_deletion=request.skip_node_deletion
168
+ )
169
+ return result
170
+
171
+ @app.post("/delete_nodes",
172
+ summary="Delete nodes",
173
+ description="Delete specified nodes from the pool",
174
+ response_description="Result of node deletion")
175
+ def device_delete(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
176
+ """
177
+ Delete nodes with the following parameters:
178
+
179
+ - **nodes**: List of node names to delete
180
+ """
181
+ result = delete_nodes(
182
+ nodes=request.nodes
183
+ )
184
+ return result
185
+
186
+ @app.post("/cordon_nodes",
187
+ summary="Cordon nodes",
188
+ description="Mark nodes as unschedulable",
189
+ response_description="Result of cordoning nodes")
190
+ def device_cordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
191
+ """
192
+ Cordon nodes with the following parameters:
193
+
194
+ - **nodes**: List of node names to cordon
195
+ """
196
+ result = cordon_nodes(
197
+ nodes=request.nodes
198
+ )
199
+ return result
200
+
201
+ @app.post("/uncordon_nodes",
202
+ summary="Uncordon nodes",
203
+ description="Mark nodes as schedulable",
204
+ response_description="Result of uncordoning nodes")
205
+ def device_uncordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
206
+ """
207
+ Uncordon nodes with the following parameters:
208
+
209
+ - **nodes**: List of node names to uncordon
210
+ """
211
+ result = uncordon_nodes(
212
+ nodes=request.nodes
213
+ )
214
+ return result
215
+
216
+ @app.get("/get_pool_token",
217
+ summary="Get pool token",
218
+ description="Get a token for the pool",
219
+ response_description="Pool token")
220
+ def get_token(mode: int, api_key: str = Depends(verify_api_key)):
221
+ """
222
+ Get pool token with the following parameters:
223
+
224
+ - **mode**: Token type mode
225
+ """
226
+ return get_pool_token(mode=TokenType(mode))
227
+
228
+ @app.get("/fetch_devices",
229
+ summary="Fetch devices",
230
+ description="Get list of available devices",
231
+ response_description="List of devices")
232
+ def get_devices(api_key: str = Depends(verify_api_key)):
233
+ """Get list of available devices"""
234
+ return fetch_devices()
235
+
236
+ @app.post("/send_pool_invites",
237
+ summary="Send pool invites",
238
+ description="Send invites to join the pool",
239
+ response_description="Result of sending invites")
240
+ def send_pool_invites(request: InvitesRequest, api_key: str = Depends(verify_api_key)):
241
+ """
242
+ Send pool invites with the following parameters:
243
+
244
+ - **invitees**: List of invitee identifiers
245
+ """
246
+ return send_invites(invitees=request.invitees)
247
+
248
+ @app.get("/fetch_resources",
249
+ summary="Fetch resources",
250
+ description="Get available resources",
251
+ response_description="Resource information")
252
+ def resources(api_key: str = Depends(verify_api_key)):
253
+ """Get available resources"""
254
+ return fetch_resources()
255
+
256
+ @app.get("/fetch_job_names",
257
+ summary="Fetch job names",
258
+ description="Get list of job names",
259
+ response_description="List of job names")
260
+ def job_names(api_key: str = Depends(verify_api_key)):
261
+ """Get list of job names"""
262
+ return fetch_job_names()
263
+
264
+ @app.get("/fetch_gpus",
265
+ summary="Fetch GPUs",
266
+ description="Get list of available GPUs",
267
+ response_description="List of GPUs")
268
+ def gpus(available: bool = False, api_key: str = Depends(verify_api_key)):
269
+ """
270
+ Get list of GPUs with the following parameters:
271
+
272
+ - **available**: Whether to show only available GPUs
273
+ """
274
+ return fetch_gpus(available=available)
275
+
276
+ @app.post("/fetch_job_details",
277
+ summary="Fetch job details",
278
+ description="Get details for specified jobs",
279
+ response_description="Job details")
280
+ def job_details(request: JobDetailsRequest, api_key: str = Depends(verify_api_key)):
281
+ """
282
+ Get job details with the following parameters:
283
+
284
+ - **jobs**: List of jobs to get details for
285
+ """
286
+ return fetch_job_details(jobs=request.jobs)
287
+
288
+ @app.get("/fetch_job_logs",
289
+ summary="Fetch job logs",
290
+ description="Get logs for a specific job",
291
+ response_description="Job logs")
292
+ def job_logs(
293
+ job_name: str,
294
+ force_namespace: str = None,
295
+ pod_name: str = None,
296
+ tail: int = 100,
297
+ api_key: str = Depends(verify_api_key)
298
+ ):
299
+ """
300
+ Get job logs with the following parameters:
301
+
302
+ - **job_name**: Name of the job
303
+ - **force_namespace**: Optional namespace override
304
+ - **pod_name**: Optional pod name
305
+ - **tail**: Number of log lines to return
306
+ """
307
+ return fetch_job_logs(
308
+ job_name=job_name,
309
+ force_namespace=force_namespace,
310
+ pod_name=pod_name,
311
+ tail=tail
312
+ )
313
+
314
+ @app.get("/fetch_job_templates",
315
+ summary="Fetch job templates",
316
+ description="Get available job templates",
317
+ response_description="List of job templates")
318
+ def job_templates(api_key: str = Depends(verify_api_key)):
319
+ """Get available job templates"""
320
+ return fetch_job_templates()
321
+
322
+ @app.get("/fetch_job_defaults",
323
+ summary="Fetch job defaults",
324
+ description="Get default values for a job template",
325
+ response_description="Job default values")
326
+ def job_templates(name: str, api_key: str = Depends(verify_api_key)):
327
+ """
328
+ Get job defaults with the following parameters:
329
+
330
+ - **name**: Name of the job template
331
+ """
332
+ return fetch_job_defaults(name=name)
333
+
334
+ @app.post("/deploy_job",
335
+ summary="Deploy job",
336
+ description="Deploy a new job",
337
+ response_description="Result of job deployment")
338
+ def job_deploy(request: DeployJobRequest, api_key: str = Depends(verify_api_key)):
339
+ """
340
+ Deploy a job with the following parameters:
341
+
342
+ - **template_name**: Name of the job template
343
+ - **values**: Job configuration values
344
+ - **force_namespace**: Optional namespace override
345
+ - **target_labels**: Optional target node labels
346
+ """
347
+ result = deploy_job(
348
+ template_name=request.template_name,
349
+ values_dict=request.values,
350
+ force_namespace=request.force_namespace,
351
+ target_labels=request.target_labels
352
+ )
353
+ return result
354
+
355
+ @app.post("/delete_job",
356
+ summary="Delete job",
357
+ description="Delete a job",
358
+ response_description="Result of job deletion")
359
+ def job_delete(request: DeleteJobRequest, api_key: str = Depends(verify_api_key)):
360
+ """
361
+ Delete a job with the following parameters:
362
+
363
+ - **name**: Name of the job to delete
364
+ - **force_namespace**: Optional namespace override
365
+ """
366
+ result = delete_job(
367
+ name=request.name,
368
+ force_namespace=request.force_namespace
369
+ )
370
+ return result
371
+
372
+ @app.get("/authenticate_user",
373
+ summary="Authenticate user",
374
+ description="Authenticate a user",
375
+ response_description="Authentication result")
376
+ def user_authenticate(user_id: str, api_key: str = Depends(verify_api_key)):
377
+ """
378
+ Authenticate user with the following parameters:
379
+
380
+ - **user_id**: User identifier
381
+ """
382
+ result = authenticate_user(
383
+ user_id=user_id
384
+ )
385
+ return result
386
+
387
+ @app.get("/load_user_session",
388
+ summary="Load user session",
389
+ description="Load the current user session",
390
+ response_description="User session information")
391
+ def user_session(api_key: str = Depends(verify_api_key)):
392
+ """Load the current user session"""
393
+ result = load_user_session()
394
+ return result
395
+
396
+ @app.get("/user_logout",
397
+ summary="User logout",
398
+ description="Log out the current user",
399
+ response_description="Logout result")
400
+ def logout_user():
401
+ """Log out the current user"""
402
+ result = user_logout()
403
+ return result
404
+
405
+ @app.get("/is_connected",
406
+ summary="Check connection",
407
+ description="Check if connected to a pool",
408
+ response_description="Connection status")
409
+ def pool_connected():
410
+ """Check if connected to a pool"""
411
+ result = is_connected()
412
+ return result
413
+
414
+ @app.get("/is_agent_running",
415
+ summary="Check agent status",
416
+ description="Check if the agent is running",
417
+ response_description="Agent status")
418
+ def agent_running():
419
+ """Check if the agent is running"""
420
+ result = is_agent_running()
421
+ return result
422
+
423
+ @app.get("/is_server",
424
+ summary="Check server status",
425
+ description="Check if running as server",
426
+ response_description="Server status")
427
+ def server():
428
+ """Check if running as server"""
429
+ result = is_server()
430
+ return result
431
+
432
+ @app.post("/pause_agent",
433
+ summary="Pause agent",
434
+ description="Pause the agent",
435
+ response_description="Result of pausing agent")
436
+ def agent_pause():
437
+ """Pause the agent"""
438
+ result = pause_agent()
439
+ return result
440
+
441
+ @app.post("/resume_agent",
442
+ summary="Resume agent",
443
+ description="Resume the agent",
444
+ response_description="Result of resuming agent")
445
+ def agent_resume():
446
+ """Resume the agent"""
447
+ result = resume_agent()
448
+ return result
449
+
450
+ @app.get("/get_ip_addresses",
451
+ summary="Get IP addresses",
452
+ description="Get available IP addresses",
453
+ response_description="List of IP addresses")
454
+ def ip_addresses(subnet: str = None, api_key: str = Depends(verify_api_key)):
455
+ """
456
+ Get IP addresses with the following parameters:
457
+
458
+ - **subnet**: Optional subnet to filter by
459
+ """
460
+ result = get_ip_addresses(subnet=subnet)
461
+ return result
462
+
463
+ @app.get("/list_available_pools",
464
+ summary="List available pools",
465
+ description="Get list of available pools",
466
+ response_description="List of available pools")
467
+ def pool_connected(user_only: bool = False, api_key: str = Depends(verify_api_key)):
468
+ """
469
+ List available pools with the following parameters:
470
+
471
+ - **user_only**: Whether to show only user's pools
472
+ """
473
+ result = list_available_pools(user_only=user_only)
474
+ return result
475
+
476
+ @app.post("/add_node_labels",
477
+ summary="Add node labels",
478
+ description="Add labels to a node",
479
+ response_description="Result of adding labels")
480
+ def node_labels(request: NodeLabelsRequest, api_key: str = Depends(verify_api_key)):
481
+ """
482
+ Add node labels with the following parameters:
483
+
484
+ - **node_name**: Name of the node
485
+ - **labels**: Dictionary of labels to add
486
+ """
487
+ result = add_node_labels(
488
+ node_name=request.node_name,
489
+ labels=request.labels
490
+ )
491
+ return result
492
+
493
+ @app.post("/get_node_labels",
494
+ summary="Get node labels",
495
+ description="Get labels for specified nodes",
496
+ response_description="Node labels")
497
+ def node_labels_get(request: GetNodeLabelsRequest, api_key: str = Depends(verify_api_key)):
498
+ """
499
+ Get node labels with the following parameters:
500
+
501
+ - **node_names**: List of node names to get labels for
502
+ """
503
+ result = get_node_labels(
504
+ node_names=request.node_names
505
+ )
506
+ return result
507
+
508
+ def run_api(host="0.0.0.0", port=8001, log_level="critical"):
509
+ uvicorn.run(
510
+ app,
511
+ host=host,
512
+ port=port,
513
+ log_level=log_level
514
+ )
515
+
516
+ if __name__ == "__main__":
517
+ run_api()
518
+
@@ -0,0 +1,53 @@
1
+ from pydantic import BaseModel, Field
2
+ from typing import List, Dict, Optional
3
+
4
+ from kalavai_client.core import Job, TokenType
5
+
6
+
7
+ class InvitesRequest(BaseModel):
8
+ invitees: list[str] = Field(description="List of user identifiers to invite to the pool")
9
+
10
+ class CreatePoolRequest(BaseModel):
11
+ cluster_name: str = Field(description="Name of the cluster to create")
12
+ ip_address: str = Field(description="IP address for the pool")
13
+ app_values: dict = Field(None, description="Application configuration values")
14
+ num_gpus: int = Field(None, description="Number of GPUs to allocate")
15
+ node_name: str = Field(None, description="Name of the node")
16
+ only_registered_users: bool = Field(False, description="Whether to restrict access to registered users only")
17
+ location: str = Field(None, description="Geographic location of the pool")
18
+ token_mode: TokenType = Field(TokenType.USER, description="Token type for authentication")
19
+ description: str = Field("", description="Description of the pool")
20
+ frontend: bool = Field(False, description="Whether this is a frontend request")
21
+
22
+ class NodesActionRequest(BaseModel):
23
+ nodes: list[str] = Field(description="List of node names to perform the action on")
24
+
25
+ class JoinPoolRequest(BaseModel):
26
+ token: str = Field(description="Token to join the pool")
27
+ ip_address: str = Field(None, description="IP address for the node")
28
+ node_name: str = Field(None, description="Name of the node")
29
+ num_gpus: int = Field(None, description="Number of GPUs to allocate")
30
+ frontend: bool = Field(False, description="Whether this is a frontend request")
31
+ class JobDetailsRequest(BaseModel):
32
+ jobs: list[Job] = Field(description="List of jobs to get details for")
33
+
34
+
35
+ class StopPoolRequest(BaseModel):
36
+ skip_node_deletion: bool = Field(False, description="Whether to skip node deletion when stopping the pool")
37
+
38
+ class DeployJobRequest(BaseModel):
39
+ template_name: str = Field(description="Name of the job template to use")
40
+ values: dict = Field(description="Job configuration values")
41
+ force_namespace: str = Field(None, description="Optional namespace override")
42
+ target_labels: dict[str, str] = Field(None, description="Optional target node labels")
43
+
44
+ class DeleteJobRequest(BaseModel):
45
+ name: str = Field(description="Name of the job to delete")
46
+ force_namespace: str = Field(None, description="Optional namespace override")
47
+
48
+ class NodeLabelsRequest(BaseModel):
49
+ node_name: str = Field(description="Name of the node to add labels to")
50
+ labels: Dict[str, str] = Field(description="Dictionary of labels to add to the node")
51
+
52
+ class GetNodeLabelsRequest(BaseModel):
53
+ node_names: List[str] = Field(description="List of node names to get labels for")
@@ -126,6 +126,7 @@ def generate_compose_config(role, node_name, write_to_file=True, node_ip_address
126
126
  "num_gpus": num_gpus,
127
127
  "k3s_path": f"{CONTAINER_HOST_PATH}/{rand_suffix}/k3s",
128
128
  "etc_path": f"{CONTAINER_HOST_PATH}/{rand_suffix}/etc",
129
+ "random_suffix": rand_suffix,
129
130
  "node_labels": node_labels,
130
131
  "flannel_iface": DEFAULT_FLANNEL_IFACE if vpn_token is not None else "",
131
132
  "user_id": load_user_id()
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kalavai-client"
3
- version = "0.6.13"
3
+ version = "0.6.14"
4
4
  authors = [
5
5
  {name = "Carlos Fernandez Musoles", email = "carlos@kalavai.net"}
6
6
  ]
@@ -1,2 +0,0 @@
1
-
2
- __version__ = "0.6.13"
@@ -1,276 +0,0 @@
1
- """
2
- Core kalavai service.
3
- Used as a bridge between the kalavai-client app and the reflex frontend
4
- """
5
- from fastapi import FastAPI, HTTPException, Depends
6
- from starlette.requests import Request
7
- import uvicorn
8
-
9
- from kalavai_client.bridge_models import (
10
- CreatePoolRequest,
11
- InvitesRequest,
12
- JoinPoolRequest,
13
- StopPoolRequest,
14
- DeployJobRequest,
15
- DeleteJobRequest,
16
- JobDetailsRequest,
17
- NodesActionRequest,
18
- NodeLabelsRequest,
19
- GetNodeLabelsRequest
20
- )
21
- from kalavai_client.core import (
22
- create_pool,
23
- join_pool,
24
- attach_to_pool,
25
- send_invites,
26
- stop_pool,
27
- fetch_devices,
28
- fetch_resources,
29
- fetch_job_names,
30
- fetch_gpus,
31
- fetch_job_details,
32
- fetch_job_logs,
33
- fetch_job_templates,
34
- fetch_job_defaults,
35
- deploy_job,
36
- delete_job,
37
- authenticate_user,
38
- load_user_session,
39
- user_logout,
40
- is_connected,
41
- list_available_pools,
42
- is_agent_running,
43
- is_server,
44
- pause_agent,
45
- resume_agent,
46
- get_ip_addresses,
47
- get_pool_token,
48
- delete_nodes,
49
- cordon_nodes,
50
- uncordon_nodes,
51
- add_node_labels,
52
- get_node_labels,
53
- TokenType
54
- )
55
- from kalavai_client.utils import load_user_id
56
-
57
- app = FastAPI()
58
-
59
- ################################
60
- ## API Key Validation methods ##
61
- ################################
62
- async def verify_api_key(request: Request):
63
- user_id = load_user_id()
64
- if user_id is None:
65
- return None
66
- api_key = request.headers.get("X-API-KEY")
67
- if api_key != user_id:
68
- raise HTTPException(status_code=401, detail="Request requires API Key")
69
- return api_key
70
-
71
- @app.post("/create_pool")
72
- def pool_create(request: CreatePoolRequest, api_key: str = Depends(verify_api_key)):
73
- result = create_pool(
74
- cluster_name=request.cluster_name,
75
- ip_address=request.ip_address,
76
- app_values=request.app_values,
77
- num_gpus=request.num_gpus,
78
- node_name=request.node_name,
79
- only_registered_users=request.only_registered_users,
80
- location=request.location,
81
- description=request.description,
82
- token_mode=request.token_mode,
83
- frontend=request.frontend
84
- )
85
- return result
86
-
87
- @app.post("/join_pool")
88
- def pool_join(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
89
- result = join_pool(
90
- token=request.token,
91
- num_gpus=request.num_gpus,
92
- node_name=request.node_name,
93
- ip_address=request.ip_address,
94
- frontend=request.frontend
95
- )
96
- return result
97
-
98
- @app.post("/attach_to_pool")
99
- def pool_attach(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
100
- result = attach_to_pool(
101
- token=request.token,
102
- node_name=request.node_name,
103
- frontend=request.frontend
104
- )
105
- return result
106
-
107
- @app.post("/stop_pool")
108
- def pool_stop(request: StopPoolRequest, api_key: str = Depends(verify_api_key)):
109
- result = stop_pool(
110
- skip_node_deletion=request.skip_node_deletion
111
- )
112
- return result
113
-
114
- @app.post("/delete_nodes")
115
- def device_delete(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
116
- result = delete_nodes(
117
- nodes=request.nodes
118
- )
119
- return result
120
-
121
- @app.post("/cordon_nodes")
122
- def device_cordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
123
- result = cordon_nodes(
124
- nodes=request.nodes
125
- )
126
- return result
127
-
128
- @app.post("/uncordon_nodes")
129
- def device_uncordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
130
- result = uncordon_nodes(
131
- nodes=request.nodes
132
- )
133
- return result
134
-
135
- @app.get("/get_pool_token")
136
- def get_token(mode: int, api_key: str = Depends(verify_api_key)):
137
-
138
- return get_pool_token(mode=TokenType(mode))
139
-
140
- @app.get("/fetch_devices")
141
- def get_devices(api_key: str = Depends(verify_api_key)):
142
- return fetch_devices()
143
-
144
- @app.post("/send_pool_invites")
145
- def send_pool_invites(request: InvitesRequest, api_key: str = Depends(verify_api_key)):
146
- return send_invites(invitees=request.invitees)
147
-
148
- @app.get("/fetch_resources")
149
- def resources(api_key: str = Depends(verify_api_key)):
150
- return fetch_resources()
151
-
152
- @app.get("/fetch_job_names")
153
- def job_names(api_key: str = Depends(verify_api_key)):
154
- return fetch_job_names()
155
-
156
- @app.get("/fetch_gpus")
157
- def gpus(available: bool = False, api_key: str = Depends(verify_api_key)):
158
- return fetch_gpus(available=available)
159
-
160
- @app.post("/fetch_job_details")
161
- def job_details(request: JobDetailsRequest, api_key: str = Depends(verify_api_key)):
162
- return fetch_job_details(jobs=request.jobs)
163
-
164
- @app.get("/fetch_job_logs")
165
- def job_logs(job_name: str, force_namespace: str=None, pod_name: str=None, tail: int=100, api_key: str = Depends(verify_api_key)):
166
- return fetch_job_logs(
167
- job_name=job_name,
168
- force_namespace=force_namespace,
169
- pod_name=pod_name,
170
- tail=tail
171
- )
172
-
173
- @app.get("/fetch_job_templates")
174
- def job_templates(api_key: str = Depends(verify_api_key)):
175
- return fetch_job_templates()
176
-
177
- @app.get("/fetch_job_defaults")
178
- def job_templates(name: str, api_key: str = Depends(verify_api_key)):
179
- return fetch_job_defaults(name=name)
180
-
181
- @app.post("/deploy_job")
182
- def job_deploy(request: DeployJobRequest, api_key: str = Depends(verify_api_key)):
183
- result = deploy_job(
184
- template_name=request.template_name,
185
- values_dict=request.values,
186
- force_namespace=request.force_namespace,
187
- target_labels=request.target_labels
188
- )
189
- return result
190
-
191
- @app.post("/delete_job")
192
- def job_delete(request: DeleteJobRequest, api_key: str = Depends(verify_api_key)):
193
- result = delete_job(
194
- name=request.name,
195
- force_namespace=request.force_namespace
196
- )
197
- return result
198
-
199
- @app.get("/authenticate_user")
200
- def user_authenticate(user_id: str, api_key: str = Depends(verify_api_key)):
201
- result = authenticate_user(
202
- user_id=user_id
203
- )
204
- return result
205
-
206
- @app.get("/load_user_session")
207
- def user_session(api_key: str = Depends(verify_api_key)):
208
- result = load_user_session()
209
- return result
210
-
211
- @app.get("/user_logout")
212
- def logout_user():
213
- result = user_logout()
214
- return result
215
-
216
- @app.get("/is_connected")
217
- def pool_connected():
218
- result = is_connected()
219
- return result
220
-
221
- @app.get("/is_agent_running")
222
- def agent_running():
223
- result = is_agent_running()
224
- return result
225
-
226
- @app.get("/is_server")
227
- def server():
228
- result = is_server()
229
- return result
230
-
231
- @app.post("/pause_agent")
232
- def agent_pause():
233
- result = pause_agent()
234
- return result
235
-
236
- @app.post("/resume_agent")
237
- def agent_resume():
238
- result = resume_agent()
239
- return result
240
-
241
- @app.get("/get_ip_addresses")
242
- def ip_addresses(subnet: str=None, api_key: str = Depends(verify_api_key)):
243
- result = get_ip_addresses(subnet=subnet)
244
- return result
245
-
246
- @app.get("/list_available_pools")
247
- def pool_connected(user_only: bool=False, api_key: str = Depends(verify_api_key)):
248
- result = list_available_pools(user_only=user_only)
249
- return result
250
-
251
- @app.post("/add_node_labels")
252
- def node_labels(request: NodeLabelsRequest, api_key: str = Depends(verify_api_key)):
253
- result = add_node_labels(
254
- node_name=request.node_name,
255
- labels=request.labels
256
- )
257
- return result
258
-
259
- @app.post("/get_node_labels")
260
- def node_labels_get(request: GetNodeLabelsRequest, api_key: str = Depends(verify_api_key)):
261
- result = get_node_labels(
262
- node_names=request.node_names
263
- )
264
- return result
265
-
266
- def run_api(host="0.0.0.0", port=8001, log_level="critical"):
267
- uvicorn.run(
268
- app,
269
- host=host,
270
- port=port,
271
- log_level=log_level
272
- )
273
-
274
- if __name__ == "__main__":
275
- run_api()
276
-
@@ -1,53 +0,0 @@
1
- from pydantic import BaseModel
2
- from typing import List, Dict, Optional
3
-
4
- from kalavai_client.core import Job, TokenType
5
-
6
-
7
- class InvitesRequest(BaseModel):
8
- invitees: list[str]
9
-
10
- class CreatePoolRequest(BaseModel):
11
- cluster_name: str
12
- ip_address: str
13
- app_values: dict = None
14
- num_gpus: int = None
15
- node_name: str = None
16
- only_registered_users: bool = False
17
- location: str = None
18
- token_mode: TokenType = TokenType.USER
19
- description: str = ""
20
- frontend: bool = False
21
-
22
- class NodesActionRequest(BaseModel):
23
- nodes: list[str]
24
-
25
- class JoinPoolRequest(BaseModel):
26
- token: str
27
- ip_address: str = None
28
- node_name: str = None
29
- num_gpus: int = None
30
- frontend: bool = False
31
- class JobDetailsRequest(BaseModel):
32
- jobs: list[Job]
33
-
34
-
35
- class StopPoolRequest(BaseModel):
36
- skip_node_deletion: bool = False
37
-
38
- class DeployJobRequest(BaseModel):
39
- template_name: str
40
- values: dict
41
- force_namespace: str = None
42
- target_labels: dict[str, str] = None
43
-
44
- class DeleteJobRequest(BaseModel):
45
- name: str
46
- force_namespace: str = None
47
-
48
- class NodeLabelsRequest(BaseModel):
49
- node_name: str
50
- labels: Dict[str, str]
51
-
52
- class GetNodeLabelsRequest(BaseModel):
53
- node_names: List[str]
File without changes