kalavai-client 0.5.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalavai_client/__init__.py +2 -0
- kalavai_client/__main__.py +5 -0
- kalavai_client/assets/apps.yaml +201 -0
- kalavai_client/assets/apps_values.yaml +83 -0
- kalavai_client/assets/docker-compose-template.yaml +55 -0
- kalavai_client/assets/pool_config_template.yaml +19 -0
- kalavai_client/assets/pool_config_values.yaml +12 -0
- kalavai_client/assets/user_workspace.yaml +19 -0
- kalavai_client/assets/user_workspace_values.yaml +29 -0
- kalavai_client/assets/vpn-template.yaml +13 -0
- kalavai_client/auth.py +68 -0
- kalavai_client/cli.py +1989 -0
- kalavai_client/cluster.py +308 -0
- kalavai_client/utils.py +456 -0
- kalavai_client-0.5.0.dist-info/LICENSE +201 -0
- kalavai_client-0.5.0.dist-info/METADATA +279 -0
- kalavai_client-0.5.0.dist-info/RECORD +19 -0
- kalavai_client-0.5.0.dist-info/WHEEL +4 -0
- kalavai_client-0.5.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,201 @@
|
|
1
|
+
helmDefaults:
|
2
|
+
timeout: 1200
|
3
|
+
|
4
|
+
repositories:
|
5
|
+
- name: kuberay
|
6
|
+
url: https://ray-project.github.io/kuberay-helm/
|
7
|
+
- name: nvidia
|
8
|
+
url: https://helm.ngc.nvidia.com/nvidia
|
9
|
+
- name: kalavai
|
10
|
+
url: https://kalavai-net.github.io/helm-charts/
|
11
|
+
- name: longhorn
|
12
|
+
url: https://charts.longhorn.io
|
13
|
+
- name: volcano-sh
|
14
|
+
url: https://volcano-sh.github.io/helm-charts
|
15
|
+
- name: prometheus
|
16
|
+
url: https://prometheus-community.github.io/helm-charts
|
17
|
+
- name: opencost-charts
|
18
|
+
url: https://opencost.github.io/opencost-helm-chart
|
19
|
+
- name: minio
|
20
|
+
url: https://charts.min.io/
|
21
|
+
|
22
|
+
releases:
|
23
|
+
- name: datashim
|
24
|
+
namespace: dlf
|
25
|
+
chart: kalavai/kalavai-datashim
|
26
|
+
version: "0.4.0"
|
27
|
+
installed: false
|
28
|
+
- name: minio
|
29
|
+
needs:
|
30
|
+
- kalavai/longhorn
|
31
|
+
namespace: minio
|
32
|
+
chart: minio/minio
|
33
|
+
installed: false
|
34
|
+
set:
|
35
|
+
- name: replicas
|
36
|
+
value: {{minio_replicas}}
|
37
|
+
- name: resources.requests.memory
|
38
|
+
value: "{{minio_resources_memory}}"
|
39
|
+
- name: persistence.enabled
|
40
|
+
value: true
|
41
|
+
- name: persistence.storageClass
|
42
|
+
value: {{minio_persistence_storageClass}}
|
43
|
+
- name: persistence.size
|
44
|
+
value: "{{minio_persistence_size}}"
|
45
|
+
- name: persistence.accessMode
|
46
|
+
value: "ReadWriteMany"
|
47
|
+
- name: rootUser
|
48
|
+
value: {{minio_rootUser}}
|
49
|
+
- name: rootPassword
|
50
|
+
value: {{minio_rootPassword}}
|
51
|
+
- name: service.type
|
52
|
+
value: "NodePort"
|
53
|
+
- name: service.nodePort
|
54
|
+
value: {{minio_service_port}}
|
55
|
+
- name: consoleService.type
|
56
|
+
value: "NodePort"
|
57
|
+
- name: consoleService.nodePort
|
58
|
+
value: {{minio_console_port}}
|
59
|
+
- name: buckets[0].name
|
60
|
+
value: "llm-storage"
|
61
|
+
- name: buckets[0].policy
|
62
|
+
value: "public"
|
63
|
+
- name: buckets[0].purge
|
64
|
+
value: false
|
65
|
+
- name: helios
|
66
|
+
namespace: kalavai
|
67
|
+
chart: kalavai/kalavai-helios
|
68
|
+
version: "0.1.9"
|
69
|
+
installed: {{not watcher_allow_unregistered_user}}
|
70
|
+
set:
|
71
|
+
- name: deployment.watcher_endpoint
|
72
|
+
value: "http://{{watcher_service}}"
|
73
|
+
- name: deployment.watcher_auth_key
|
74
|
+
value: "{{watcher_readonly_key}}"
|
75
|
+
- name: deployment.kalavai_api_endpoint
|
76
|
+
value: {{kalavai_api_endpoint}}
|
77
|
+
- name: deployment.user_node_label
|
78
|
+
value: "{{user_node_label}}"
|
79
|
+
- name: deployment.sleep_interval
|
80
|
+
value: "{{helios_harvest_interval}}"
|
81
|
+
- name: opencost
|
82
|
+
namespace: opencost
|
83
|
+
chart: opencost-charts/opencost
|
84
|
+
installed: {{not watcher_allow_unregistered_user}}
|
85
|
+
- name: prometheus
|
86
|
+
namespace: prometheus-system
|
87
|
+
chart: prometheus/prometheus
|
88
|
+
installed: {{not watcher_allow_unregistered_user}}
|
89
|
+
set:
|
90
|
+
- name: prometheus-pushgateway.enabled
|
91
|
+
value: false
|
92
|
+
- name: alertmanager.enabled
|
93
|
+
value: false
|
94
|
+
- name: volcano-sh
|
95
|
+
namespace: kalavai
|
96
|
+
chart: volcano-sh/volcano
|
97
|
+
installed: true
|
98
|
+
- name: kuberay
|
99
|
+
namespace: kuberay
|
100
|
+
chart: kuberay/kuberay-operator
|
101
|
+
installed: true
|
102
|
+
version: "1.2.2"
|
103
|
+
- name: kuberay-apiserver
|
104
|
+
namespace: kuberay
|
105
|
+
chart: kuberay/kuberay-apiserver
|
106
|
+
installed: false
|
107
|
+
- name: longhorn
|
108
|
+
namespace: kalavai
|
109
|
+
chart: longhorn/longhorn
|
110
|
+
installed: false
|
111
|
+
set:
|
112
|
+
# security issue! enable for testing only
|
113
|
+
- name: service.ui.type
|
114
|
+
value: NodePort
|
115
|
+
- name: service.ui.nodePort
|
116
|
+
value: {{longhorn_ui_port}}
|
117
|
+
- name: service.manager.type
|
118
|
+
value: NodePort
|
119
|
+
- name: service.manager.nodePort
|
120
|
+
value: {{longhorn_manager_port}}
|
121
|
+
- name: persistence.defaultClassReplicaCount
|
122
|
+
value: {{longhorn_replicas}}
|
123
|
+
- name: global.nodeSelector.{{longhorn_label_selector_key}}
|
124
|
+
value: "{{longhorn_label_selector_value}}"
|
125
|
+
- name: defaultSettings.storageMinimalAvailablePercentage
|
126
|
+
value: {{longhorn_minimal_available_percentage}}
|
127
|
+
- name: lws
|
128
|
+
namespace: kalavai
|
129
|
+
chart: kalavai/lws
|
130
|
+
installed: false
|
131
|
+
- name: kalavai-watcher
|
132
|
+
namespace: kalavai
|
133
|
+
chart: kalavai/kalavai-watcher
|
134
|
+
version: "0.3.6"
|
135
|
+
installed: true
|
136
|
+
set:
|
137
|
+
- name: namespace
|
138
|
+
value: kalavai
|
139
|
+
- name: replicas
|
140
|
+
value: 2
|
141
|
+
- name: image_tag
|
142
|
+
value: "v2025.01"
|
143
|
+
- name: deployment.in_cluster
|
144
|
+
value: "True"
|
145
|
+
- name: deployment.use_auth_key
|
146
|
+
value: "True"
|
147
|
+
- name: deployment.admin_key
|
148
|
+
value: "{{watcher_admin_key}}"
|
149
|
+
- name: deployment.write_key
|
150
|
+
value: "{{watcher_write_key}}"
|
151
|
+
- name: deployment.readonly_key
|
152
|
+
value: "{{watcher_readonly_key}}"
|
153
|
+
- name: deployment.is_shared_pool
|
154
|
+
value: {{watcher_is_shared_pool}}
|
155
|
+
- name: deployment.allow_unregistered_user
|
156
|
+
value: "{{watcher_allow_unregistered_user}}"
|
157
|
+
- name: deployment.kalavai_api_endpoint
|
158
|
+
value: {{kalavai_api_endpoint}}
|
159
|
+
- name: deployment.prometheus_endpoint
|
160
|
+
value: {{prometheus_endpoint}}
|
161
|
+
- name: deployment.opencost_endpoint
|
162
|
+
value: {{opencost_endpoint}}
|
163
|
+
- name: deployment.longhorn_manager_endpoint
|
164
|
+
value: {{longhorn_manager_endpoint}}
|
165
|
+
- name: service.nodePort
|
166
|
+
value: {{watcher_port}}
|
167
|
+
- name: nvidia-gpu-operator
|
168
|
+
namespace: kalavai
|
169
|
+
chart: kalavai/gpu
|
170
|
+
installed: true
|
171
|
+
- name: hami-vgpu
|
172
|
+
namespace: kalavai
|
173
|
+
chart: kalavai/hami
|
174
|
+
installed: true
|
175
|
+
set:
|
176
|
+
- name: resourceCores
|
177
|
+
value: "nvidia.com/gpucores"
|
178
|
+
- name: devicePlugin.runtimeClassName
|
179
|
+
value: "nvidia"
|
180
|
+
- name: scheduler.defaultSchedulerPolicy.nodeSchedulerPolicy
|
181
|
+
value: "binpack"
|
182
|
+
- name: scheduler.defaultSchedulerPolicy.gpuSchedulerPolicy
|
183
|
+
value: "binpack"
|
184
|
+
- name: scheduler.defaultCores
|
185
|
+
value: "100"
|
186
|
+
- name: scheduler.kubeScheduler.imageTag
|
187
|
+
value: v1.31.1
|
188
|
+
- name: devicePlugin.deviceMemoryScaling
|
189
|
+
value: "1"
|
190
|
+
- name: devicePlugin.deviceSplitCount
|
191
|
+
value: "1"
|
192
|
+
- name: scheduler.customWebhook.port
|
193
|
+
value: "30498"
|
194
|
+
- name: scheduler.service.schedulerPort
|
195
|
+
value: "30498"
|
196
|
+
- name: scheduler.service.monitorPort
|
197
|
+
value: "30493"
|
198
|
+
- name: devicePlugin.service.httpPort
|
199
|
+
value: "30492"
|
200
|
+
|
201
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# TODO: add helm versions here
|
2
|
+
|
3
|
+
- name: kalavai_api_endpoint
|
4
|
+
default: https://platform.kalavai.net/_/api
|
5
|
+
description: ""
|
6
|
+
|
7
|
+
- name: prometheus_endpoint
|
8
|
+
default: http://prometheus-server.prometheus-system.svc.cluster.local:80
|
9
|
+
description: ""
|
10
|
+
|
11
|
+
- name: opencost_endpoint
|
12
|
+
default: http://opencost.opencost.svc.cluster.local:9003
|
13
|
+
description: ""
|
14
|
+
|
15
|
+
- name: longhorn_manager_endpoint
|
16
|
+
default: http://longhorn-backend.kalavai.svc.cluster.local:9500
|
17
|
+
description: ""
|
18
|
+
|
19
|
+
- name: helios_harvest_interval
|
20
|
+
default: 120
|
21
|
+
description: "Interval (minutes) at which to report resource usage in public pools"
|
22
|
+
|
23
|
+
- name: watcher_is_shared_pool
|
24
|
+
default: "True"
|
25
|
+
description: "All users see each other's work"
|
26
|
+
|
27
|
+
## Longhorn
|
28
|
+
- name: longhorn_ui_port
|
29
|
+
default: 30000
|
30
|
+
description: ""
|
31
|
+
|
32
|
+
- name: longhorn_manager_port
|
33
|
+
default: "" #31011
|
34
|
+
description: ""
|
35
|
+
|
36
|
+
- name: longhorn_minimal_available_percentage
|
37
|
+
default: 5
|
38
|
+
description: ""
|
39
|
+
|
40
|
+
- name: longhorn_label_selector_key
|
41
|
+
default: "kalavai.storage.enabled"
|
42
|
+
description: ""
|
43
|
+
|
44
|
+
- name: longhorn_label_selector
|
45
|
+
default: "True"
|
46
|
+
description: ""
|
47
|
+
|
48
|
+
- name: longhorn_replicas
|
49
|
+
default: 2
|
50
|
+
description: ""
|
51
|
+
|
52
|
+
## MinIO
|
53
|
+
- name: minio_replicas
|
54
|
+
default: 2
|
55
|
+
description: ""
|
56
|
+
|
57
|
+
- name: minio_resources_memory
|
58
|
+
default: "1Gi"
|
59
|
+
description: ""
|
60
|
+
|
61
|
+
- name: minio_persistence_storageClass
|
62
|
+
default: "longhorn"
|
63
|
+
description: ""
|
64
|
+
|
65
|
+
- name: minio_persistence_size
|
66
|
+
default: "10Gi"
|
67
|
+
description: ""
|
68
|
+
|
69
|
+
- name: minio_service_port
|
70
|
+
default: 32000
|
71
|
+
description: ""
|
72
|
+
|
73
|
+
- name: minio_console_port
|
74
|
+
default: 32001
|
75
|
+
description: ""
|
76
|
+
|
77
|
+
- name: minio_rootUser
|
78
|
+
default: "admin"
|
79
|
+
description: ""
|
80
|
+
|
81
|
+
- name: minio_rootPassword
|
82
|
+
default: "password"
|
83
|
+
description: ""
|
@@ -0,0 +1,55 @@
|
|
1
|
+
services:
|
2
|
+
{{service_name}}-{{command}}:
|
3
|
+
image: bundenth/kalavai-runner:gpu-latest
|
4
|
+
container_name: {{service_name}}
|
5
|
+
hostname: {{hostname}}
|
6
|
+
privileged: true
|
7
|
+
restart: unless-stopped
|
8
|
+
ports:
|
9
|
+
- "6443:6443" # kube server
|
10
|
+
- "8472:8472" # flannel vxlan
|
11
|
+
- "51820:51820" # flannel wireguard
|
12
|
+
{% if command == "server" %}
|
13
|
+
- "30000-30500:30000-30500"
|
14
|
+
{% endif %}
|
15
|
+
networks:
|
16
|
+
- custom-network
|
17
|
+
command: >
|
18
|
+
{{command}}
|
19
|
+
{% if command == "server" %}
|
20
|
+
--flannel-backend wireguard-native
|
21
|
+
--service-node-port-range "30000-30500"
|
22
|
+
{% else %}
|
23
|
+
--server {{pool_ip}}
|
24
|
+
--token {{token}}
|
25
|
+
{% endif %}
|
26
|
+
--node-label role={{command}}
|
27
|
+
{% if node_labels %}
|
28
|
+
{{node_labels}}
|
29
|
+
{% endif %}
|
30
|
+
{% if num_gpus and num_gpus > 0 %}
|
31
|
+
--node-label gpu=on
|
32
|
+
{% else %}
|
33
|
+
--node-label gpu=off
|
34
|
+
{% endif %}
|
35
|
+
--node-ip {{ip_address}}
|
36
|
+
--node-external-ip {{ip_address}}
|
37
|
+
{% if flannel_iface %}
|
38
|
+
--flannel-iface {{flannel_iface}}
|
39
|
+
{% endif %}
|
40
|
+
volumes:
|
41
|
+
- {{k3s_path}}:/var/lib/rancher/k3s # Persist data
|
42
|
+
- {{etc_path}}:/etc/rancher/k3s # Config files
|
43
|
+
{% if num_gpus and num_gpus > 0 %}
|
44
|
+
deploy:
|
45
|
+
resources:
|
46
|
+
reservations:
|
47
|
+
devices:
|
48
|
+
- driver: nvidia
|
49
|
+
count: {{num_gpus}}
|
50
|
+
capabilities: [gpu]
|
51
|
+
{% endif %}
|
52
|
+
|
53
|
+
networks:
|
54
|
+
custom-network:
|
55
|
+
driver: bridge
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# # # storage class for model weights (temporary)
|
2
|
+
# kind: StorageClass
|
3
|
+
# apiVersion: storage.k8s.io/v1
|
4
|
+
# metadata:
|
5
|
+
# name: {{storage_class_name}}
|
6
|
+
# provisioner: driver.longhorn.io
|
7
|
+
# allowVolumeExpansion: true
|
8
|
+
# reclaimPolicy: Delete
|
9
|
+
# volumeBindingMode: Immediate
|
10
|
+
# parameters:
|
11
|
+
# shareManagerNodeSelector: {{storage_label_selector}}
|
12
|
+
# numberOfReplicas: "{{storage_replicas}}"
|
13
|
+
# staleReplicaTimeout: "10"
|
14
|
+
# fromBackup: ""
|
15
|
+
# fsType: "ext4"
|
16
|
+
# dataLocality: "disabled"
|
17
|
+
# unmapMarkSnapChainRemoved: "ignored"
|
18
|
+
# disableRevisionCounter: "true"
|
19
|
+
# dataEngine: "v1"
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# apiVersion: v1
|
2
|
+
# kind: Secret
|
3
|
+
# metadata:
|
4
|
+
# name: {{dataset_secret_name}}
|
5
|
+
# stringData:
|
6
|
+
# accessKeyID: "{{dataset_access_key_id}}"
|
7
|
+
# secretAccessKey: "{{dataset_secret_access_key}}"
|
8
|
+
# ---
|
9
|
+
# apiVersion: datashim.io/v1alpha1
|
10
|
+
# kind: Dataset
|
11
|
+
# metadata:
|
12
|
+
# name: {{dataset_name}}
|
13
|
+
# spec:
|
14
|
+
# local:
|
15
|
+
# type: "COS"
|
16
|
+
# secret-name: {{dataset_secret_name}}
|
17
|
+
# endpoint: "{{dataset_endpoint}}"
|
18
|
+
# bucket: {{dataset_bucket}}
|
19
|
+
# readonly: "{{dataset_readonly}}"
|
@@ -0,0 +1,29 @@
|
|
1
|
+
- name: dataset_secret_name
|
2
|
+
default: "kalavai-s3-secret"
|
3
|
+
description: ""
|
4
|
+
|
5
|
+
- name: dataset_access_key_id
|
6
|
+
default: "admin"
|
7
|
+
description: ""
|
8
|
+
|
9
|
+
# must match minio password
|
10
|
+
- name: dataset_secret_access_key
|
11
|
+
default: "password"
|
12
|
+
description: ""
|
13
|
+
|
14
|
+
- name: dataset_name
|
15
|
+
default: "models-hub"
|
16
|
+
description: ""
|
17
|
+
|
18
|
+
- name: dataset_endpoint
|
19
|
+
default: "http://minio.minio.svc.cluster.local:9000"
|
20
|
+
description: ""
|
21
|
+
|
22
|
+
- name: dataset_bucket
|
23
|
+
default: "llm-storage"
|
24
|
+
description: ""
|
25
|
+
|
26
|
+
- name: dataset_readonly
|
27
|
+
default: "false"
|
28
|
+
description: ""
|
29
|
+
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# https://docs.netmaker.io/docs/netclient#docker
|
2
|
+
services:
|
3
|
+
{{service_name}}:
|
4
|
+
network_mode: host
|
5
|
+
privileged: true
|
6
|
+
restart: always
|
7
|
+
environment:
|
8
|
+
- TOKEN={{vpn_token}}
|
9
|
+
- IFACE_NAME={{flannel_iface}}
|
10
|
+
volumes:
|
11
|
+
- '{{etc_path}}/netclient:/etc/netclient'
|
12
|
+
container_name: {{service_name}}
|
13
|
+
image: 'gravitl/netclient:latest'
|
kalavai_client/auth.py
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
import os
|
2
|
+
import pickle
|
3
|
+
|
4
|
+
import anvil.server
|
5
|
+
import anvil.users
|
6
|
+
|
7
|
+
|
8
|
+
AUTH_UPLINK_KEY = os.getenv("ANVIL_UPLINK_KEY", "client_AOPKTWK227ZV3R4ENTMOQRIY-ADMVMYW5OIRPH75P")
|
9
|
+
|
10
|
+
class KalavaiAuthClient:
|
11
|
+
def __init__(self, user_cookie_file=None):
|
12
|
+
anvil.server.connect(AUTH_UPLINK_KEY, quiet=True)
|
13
|
+
self.user_cookie_file = user_cookie_file
|
14
|
+
user = self.load_user_session()
|
15
|
+
|
16
|
+
def login(self, username, password):
|
17
|
+
try:
|
18
|
+
user = anvil.users.login_with_email(username, password, remember=True)
|
19
|
+
except:
|
20
|
+
return None
|
21
|
+
|
22
|
+
if self.user_cookie_file:
|
23
|
+
with open(self.user_cookie_file, "wb") as f:
|
24
|
+
pickle.dump(user, f)
|
25
|
+
return user
|
26
|
+
|
27
|
+
def logout(self):
|
28
|
+
anvil.users.logout()
|
29
|
+
try:
|
30
|
+
os.remove(self.user_cookie_file)
|
31
|
+
except:
|
32
|
+
pass
|
33
|
+
|
34
|
+
def is_logged_in(self):
|
35
|
+
user = self.load_user_session()
|
36
|
+
return user is not None
|
37
|
+
|
38
|
+
def load_user_session(self):
|
39
|
+
user = anvil.users.get_user()
|
40
|
+
if user:
|
41
|
+
return user
|
42
|
+
try:
|
43
|
+
with open(self.user_cookie_file, "rb") as f:
|
44
|
+
user = pickle.load(f)
|
45
|
+
return user
|
46
|
+
except:
|
47
|
+
return None
|
48
|
+
|
49
|
+
def call_function(self, fn, *args):
|
50
|
+
return anvil.server.call(
|
51
|
+
fn,
|
52
|
+
*args
|
53
|
+
)
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
if __name__ == "__main__":
|
58
|
+
auth = KalavaiAuthClient(
|
59
|
+
user_cookie_file="here.pickle"
|
60
|
+
)
|
61
|
+
user = auth.load_user_session()
|
62
|
+
if not user:
|
63
|
+
user = auth.login(username="carlos@kalavai.net", password="wrong_pass")
|
64
|
+
|
65
|
+
if user is None:
|
66
|
+
print("Failed to login")
|
67
|
+
else:
|
68
|
+
print(user)
|