kalavai-client 0.6.20__tar.gz → 0.6.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/PKG-INFO +1 -1
- kalavai_client-0.6.22/kalavai_client/__init__.py +2 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/apps.yaml +25 -10
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/docker-compose-template.yaml +2 -1
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/bridge_api.py +32 -1
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/bridge_models.py +9 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/cli.py +22 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/core.py +21 -5
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/env.py +2 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/utils.py +0 -1
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/pyproject.toml +1 -1
- kalavai_client-0.6.20/kalavai_client/__init__.py +0 -2
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/LICENSE +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/README.md +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/__main__.py +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/__init__.py +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/apps_values.yaml +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/docker-compose-gui.yaml +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/nginx.conf +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/pool_config_template.yaml +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/pool_config_values.yaml +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/user_workspace.yaml +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/user_workspace_values.yaml +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/auth.py +0 -0
- {kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/cluster.py +0 -0
@@ -20,6 +20,8 @@ repositories:
|
|
20
20
|
url: https://charts.min.io/
|
21
21
|
- name: langfuse
|
22
22
|
url: https://langfuse.github.io/langfuse-k8s
|
23
|
+
- name: hami-charts
|
24
|
+
url: https://project-hami.github.io/HAMi
|
23
25
|
|
24
26
|
releases:
|
25
27
|
- name: datashim
|
@@ -154,7 +156,7 @@ releases:
|
|
154
156
|
- name: replicas
|
155
157
|
value: 1
|
156
158
|
- name: image_tag
|
157
|
-
value: "v2025.07.
|
159
|
+
value: "v2025.07.34"
|
158
160
|
- name: deployment.in_cluster
|
159
161
|
value: "True"
|
160
162
|
- name: deployment.kalavai_username_key
|
@@ -185,9 +187,30 @@ releases:
|
|
185
187
|
namespace: kalavai
|
186
188
|
chart: kalavai/gpu
|
187
189
|
installed: false
|
190
|
+
# - name: hami-vgpu
|
191
|
+
# namespace: kalavai
|
192
|
+
# chart: kalavai/hami
|
193
|
+
# installed: true
|
194
|
+
# set:
|
195
|
+
# - name: resourceCores
|
196
|
+
# value: "nvidia.com/gpucores"
|
197
|
+
# - name: devicePlugin.runtimeClassName
|
198
|
+
# value: "nvidia"
|
199
|
+
# - name: scheduler.defaultSchedulerPolicy.nodeSchedulerPolicy
|
200
|
+
# value: "binpack"
|
201
|
+
# - name: scheduler.defaultSchedulerPolicy.gpuSchedulerPolicy
|
202
|
+
# value: "binpack"
|
203
|
+
# - name: scheduler.defaultCores
|
204
|
+
# value: "100"
|
205
|
+
# - name: scheduler.kubeScheduler.imageTag
|
206
|
+
# value: v1.31.1
|
207
|
+
# - name: devicePlugin.deviceMemoryScaling
|
208
|
+
# value: "1"
|
209
|
+
# - name: devicePlugin.deviceSplitCount
|
210
|
+
# value: "1"
|
188
211
|
- name: hami-vgpu
|
189
212
|
namespace: kalavai
|
190
|
-
chart:
|
213
|
+
chart: hami-charts/hami
|
191
214
|
installed: true
|
192
215
|
set:
|
193
216
|
- name: resourceCores
|
@@ -206,13 +229,5 @@ releases:
|
|
206
229
|
value: "1"
|
207
230
|
- name: devicePlugin.deviceSplitCount
|
208
231
|
value: "1"
|
209
|
-
# - name: scheduler.customWebhook.port
|
210
|
-
# value: "30498"
|
211
|
-
# - name: scheduler.service.schedulerPort
|
212
|
-
# value: "30498"
|
213
|
-
# - name: scheduler.service.monitorPort
|
214
|
-
# value: "30493"
|
215
|
-
# - name: devicePlugin.service.httpPort
|
216
|
-
# value: "30492"
|
217
232
|
|
218
233
|
|
{kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/docker-compose-template.yaml
RENAMED
@@ -3,7 +3,7 @@ services:
|
|
3
3
|
{{vpn_name}}:
|
4
4
|
image: gravitl/netclient:v0.90.0
|
5
5
|
container_name: {{vpn_name}}
|
6
|
-
platform: linux/
|
6
|
+
platform: linux/{{target_platform}}
|
7
7
|
cap_add:
|
8
8
|
- NET_ADMIN
|
9
9
|
- SYS_MODULE
|
@@ -21,6 +21,7 @@ services:
|
|
21
21
|
image: docker.io/bundenth/kalavai-runner:{{target_platform}}-latest
|
22
22
|
pull_policy: always
|
23
23
|
container_name: {{service_name}}
|
24
|
+
platform: linux/{{target_platform}}
|
24
25
|
{% if vpn %}
|
25
26
|
depends_on:
|
26
27
|
- {{vpn_name}}
|
@@ -9,6 +9,10 @@ from starlette.requests import Request
|
|
9
9
|
import uvicorn
|
10
10
|
|
11
11
|
from kalavai_client.core import Job
|
12
|
+
from kalavai_client.env import (
|
13
|
+
KALAVAI_SERVICE_LABEL,
|
14
|
+
KALAVAI_SERVICE_LABEL_VALUE
|
15
|
+
)
|
12
16
|
from kalavai_client.bridge_models import (
|
13
17
|
CreatePoolRequest,
|
14
18
|
InvitesRequest,
|
@@ -18,7 +22,8 @@ from kalavai_client.bridge_models import (
|
|
18
22
|
DeleteJobRequest,
|
19
23
|
JobDetailsRequest,
|
20
24
|
NodesActionRequest,
|
21
|
-
NodeLabelsRequest
|
25
|
+
NodeLabelsRequest,
|
26
|
+
WorkerConfigRequest
|
22
27
|
)
|
23
28
|
from kalavai_client.core import (
|
24
29
|
create_pool,
|
@@ -34,6 +39,7 @@ from kalavai_client.core import (
|
|
34
39
|
fetch_job_logs,
|
35
40
|
fetch_job_templates,
|
36
41
|
fetch_job_defaults,
|
42
|
+
fetch_pod_logs,
|
37
43
|
deploy_job,
|
38
44
|
delete_job,
|
39
45
|
authenticate_user,
|
@@ -52,6 +58,7 @@ from kalavai_client.core import (
|
|
52
58
|
uncordon_nodes,
|
53
59
|
add_node_labels,
|
54
60
|
get_node_labels,
|
61
|
+
generate_worker_package,
|
55
62
|
TokenType
|
56
63
|
)
|
57
64
|
from kalavai_client.utils import (
|
@@ -241,6 +248,21 @@ def get_token(mode: int, api_key: str = Depends(verify_api_key)):
|
|
241
248
|
"""
|
242
249
|
return get_pool_token(mode=TokenType(mode))
|
243
250
|
|
251
|
+
@app.post("/generate_worker_config",
|
252
|
+
operation_id="generate_worker_config",
|
253
|
+
summary="Generate a config file for a remote worker to connect to the pool",
|
254
|
+
description="Generate a config file for a remote worker to connect to the pool. Different token types provide different levels of access - join tokens allow nodes to contribute resources, while attach tokens allow management access.",
|
255
|
+
tags=["pool_management"],
|
256
|
+
response_description="Worker config file")
|
257
|
+
def generate_worker_config(request: WorkerConfigRequest, api_key: str = Depends(verify_api_key)):
|
258
|
+
return generate_worker_package(
|
259
|
+
node_name=request.node_name,
|
260
|
+
mode=TokenType(request.mode),
|
261
|
+
target_platform=request.target_platform,
|
262
|
+
num_gpus=request.num_gpus,
|
263
|
+
ip_address=request.ip_address,
|
264
|
+
storage_compatible=request.storage_compatible)
|
265
|
+
|
244
266
|
@app.get("/fetch_devices",
|
245
267
|
operation_id="fetch_devices",
|
246
268
|
summary="Get list of all compute devices in the pool",
|
@@ -251,6 +273,15 @@ def get_devices(api_key: str = Depends(verify_api_key)):
|
|
251
273
|
"""Get list of available devices"""
|
252
274
|
return fetch_devices()
|
253
275
|
|
276
|
+
@app.get("/fetch_service_logs",
|
277
|
+
operation_id="fetch_service_logs",
|
278
|
+
summary="Get logs for the kalavai API service",
|
279
|
+
description="Get logs for the kalavai API service, including internal logs, debugging messages and status of the service.",
|
280
|
+
tags=["info"],
|
281
|
+
response_description="Logs")
|
282
|
+
def get_service_logs(api_key: str = Depends(verify_api_key)):
|
283
|
+
return fetch_pod_logs(label_key=KALAVAI_SERVICE_LABEL, label_value=KALAVAI_SERVICE_LABEL_VALUE, force_namespace="kalavai")
|
284
|
+
|
254
285
|
@app.post("/send_pool_invites",
|
255
286
|
operation_id="send_pool_invites",
|
256
287
|
summary="Send invitations to join the pool",
|
@@ -17,6 +17,15 @@ class CreatePoolRequest(BaseModel):
|
|
17
17
|
token_mode: TokenType = Field(TokenType.USER, description="Token type for authentication")
|
18
18
|
description: str = Field("", description="Description of the pool")
|
19
19
|
|
20
|
+
class WorkerConfigRequest(BaseModel):
|
21
|
+
node_name: str = Field(None, description="Name for the worker node")
|
22
|
+
mode: int = Field(2, description="Access mode for the worker (admin, worker or user)")
|
23
|
+
target_platform: str = Field("amd64", description="Target platform architecture for the worker (amd64 or arm64)")
|
24
|
+
num_gpus: int = Field(0, description="Number of GPUs to use on the worker node")
|
25
|
+
ip_address: str = Field("0.0.0.0", description="IP address of the worker node")
|
26
|
+
storage_compatible: bool = Field(True, description="Whether to use the node's storage capacity for volumes")
|
27
|
+
|
28
|
+
|
20
29
|
class NodesActionRequest(BaseModel):
|
21
30
|
nodes: list[str] = Field(None, description="List of node names to perform the action on, defaults to None")
|
22
31
|
|
@@ -27,6 +27,8 @@ from kalavai_client.env import (
|
|
27
27
|
USER_TEMPLATES_FOLDER,
|
28
28
|
DOCKER_COMPOSE_GUI,
|
29
29
|
USER_GUI_COMPOSE_FILE,
|
30
|
+
KALAVAI_SERVICE_LABEL,
|
31
|
+
KALAVAI_SERVICE_LABEL_VALUE,
|
30
32
|
user_path,
|
31
33
|
resource_path,
|
32
34
|
)
|
@@ -37,6 +39,7 @@ from kalavai_client.core import (
|
|
37
39
|
fetch_job_details,
|
38
40
|
fetch_devices,
|
39
41
|
fetch_job_logs,
|
42
|
+
fetch_pod_logs,
|
40
43
|
fetch_gpus,
|
41
44
|
generate_worker_package,
|
42
45
|
load_gpu_models,
|
@@ -689,6 +692,25 @@ def pool__update(*others):
|
|
689
692
|
else:
|
690
693
|
console.log(f"[green]{result}")
|
691
694
|
|
695
|
+
@arguably.command
|
696
|
+
def pool__logs(*others):
|
697
|
+
"""
|
698
|
+
Get the logs for the Kalavai API
|
699
|
+
"""
|
700
|
+
logs = []
|
701
|
+
|
702
|
+
logs.append("Getting Kalavai API logs...")
|
703
|
+
|
704
|
+
logs = fetch_pod_logs(
|
705
|
+
label_key=KALAVAI_SERVICE_LABEL,
|
706
|
+
label_value=KALAVAI_SERVICE_LABEL_VALUE,
|
707
|
+
force_namespace="kalavai"
|
708
|
+
)
|
709
|
+
for name, log in logs.items():
|
710
|
+
console.log(f"[yellow]LOGS for service: {name}")
|
711
|
+
for key, value in log.items():
|
712
|
+
console.log(f"[yellow]{key}")
|
713
|
+
console.log(json.dumps(value, indent=2))
|
692
714
|
|
693
715
|
@arguably.command
|
694
716
|
def pool__status(*others, log_file=None):
|
@@ -313,7 +313,7 @@ def fetch_job_details(jobs: list[Job]):
|
|
313
313
|
restart_counts = sum([c["restart_count"] for c in values["conditions"]])
|
314
314
|
workers_status[values["status"]] += 1
|
315
315
|
# get nodes involved in deployment (needs kubewatcher)
|
316
|
-
if "node_name" in values:
|
316
|
+
if "node_name" in values and values["node_name"] is not None:
|
317
317
|
host_nodes.add(values["node_name"])
|
318
318
|
|
319
319
|
workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
|
@@ -454,9 +454,18 @@ def fetch_devices():
|
|
454
454
|
return {"error": str(e)}
|
455
455
|
|
456
456
|
def fetch_job_logs(job_name, force_namespace=None, pod_name=None, tail=100):
|
457
|
+
return fetch_pod_logs(
|
458
|
+
label_key=TEMPLATE_LABEL,
|
459
|
+
label_value=job_name,
|
460
|
+
pod_name=pod_name,
|
461
|
+
force_namespace=force_namespace,
|
462
|
+
tail=tail
|
463
|
+
)
|
464
|
+
|
465
|
+
def fetch_pod_logs(label_key, label_value, force_namespace=None, pod_name=None, tail=100):
|
457
466
|
data = {
|
458
|
-
"label":
|
459
|
-
"value":
|
467
|
+
"label": label_key,
|
468
|
+
"value": label_value,
|
460
469
|
"tail": tail
|
461
470
|
}
|
462
471
|
if force_namespace is not None:
|
@@ -609,9 +618,16 @@ def attach_to_pool(token, node_name=None):
|
|
609
618
|
|
610
619
|
return cluster_name
|
611
620
|
|
612
|
-
def generate_worker_package(
|
621
|
+
def generate_worker_package(
|
622
|
+
target_platform="amd64",
|
623
|
+
num_gpus=0,
|
624
|
+
node_name=None,
|
625
|
+
ip_address="0.0.0.0",
|
626
|
+
storage_compatible=True,
|
627
|
+
mode=TokenType.WORKER
|
628
|
+
):
|
613
629
|
# get pool data from token
|
614
|
-
token = get_pool_token(mode=
|
630
|
+
token = get_pool_token(mode=mode)
|
615
631
|
if "error" in token:
|
616
632
|
return {"error": f"[red]Error when getting pool token: {token['error']}"}
|
617
633
|
|
@@ -32,6 +32,8 @@ def resource_path(relative_path: str):
|
|
32
32
|
|
33
33
|
|
34
34
|
TEMPLATE_LABEL = "kalavai.job.name"
|
35
|
+
KALAVAI_SERVICE_LABEL = "app"
|
36
|
+
KALAVAI_SERVICE_LABEL_VALUE = "kube-watcher-api"
|
35
37
|
STORAGE_CLASS_LABEL = "kalavai.storage.enabled"
|
36
38
|
USER_NODE_LABEL = "kalavai.cluster.user"
|
37
39
|
SERVER_IP_KEY = "server_ip"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/docker-compose-gui.yaml
RENAMED
File without changes
|
File without changes
|
{kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/pool_config_template.yaml
RENAMED
File without changes
|
{kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/pool_config_values.yaml
RENAMED
File without changes
|
File without changes
|
{kalavai_client-0.6.20 → kalavai_client-0.6.22}/kalavai_client/assets/user_workspace_values.yaml
RENAMED
File without changes
|
File without changes
|
File without changes
|