kalavai-client 0.6.13__py3-none-any.whl → 0.6.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
 
2
- __version__ = "0.6.13"
2
+ __version__ = "0.6.16"
@@ -152,7 +152,7 @@ releases:
152
152
  - name: replicas
153
153
  value: 1
154
154
  - name: image_tag
155
- value: "v2025.05.2"
155
+ value: "v2025.06.7"
156
156
  - name: deployment.in_cluster
157
157
  value: "True"
158
158
  - name: deployment.kalavai_username_key
@@ -2,6 +2,7 @@ services:
2
2
  kalavai_gui:
3
3
  container_name: kalavai_gui
4
4
  image: bundenth/kalavai-gui:latest
5
+ platform: linux/amd64
5
6
  extra_hosts:
6
7
  - "host.docker.internal:host-gateway"
7
8
  networks:
@@ -3,6 +3,7 @@ services:
3
3
  {{vpn_name}}:
4
4
  image: gravitl/netclient:v0.90.0
5
5
  container_name: {{vpn_name}}
6
+ platform: linux/amd64
6
7
  cap_add:
7
8
  - NET_ADMIN
8
9
  - SYS_MODULE
@@ -17,7 +18,8 @@ services:
17
18
  # run worker only if command is set
18
19
  {%if command %}
19
20
  {{service_name}}:
20
- image: docker.io/bundenth/kalavai-runner:gpu-latest
21
+ image: docker.io/bundenth/kalavai-runner:{{target_platform}}-latest
22
+ pull_policy: always
21
23
  container_name: {{service_name}}
22
24
  {% if vpn %}
23
25
  depends_on:
@@ -35,6 +37,9 @@ services:
35
37
  {% endif %}
36
38
  --node_name="{{node_name}}"
37
39
  --node_ip="{{node_ip_address}}"
40
+ {% if random_suffix %}
41
+ --random_suffix="{{random_suffix}}"
42
+ {% endif %}
38
43
  {% if command == "server" %}
39
44
  --port_range="30000-32767"
40
45
  {% else %}
@@ -54,12 +54,22 @@ from kalavai_client.core import (
54
54
  )
55
55
  from kalavai_client.utils import load_user_id
56
56
 
57
- app = FastAPI()
57
+ app = FastAPI(
58
+ title="Kalavai Bridge API",
59
+ description="API for managing Kalavai pools, jobs, and nodes",
60
+ version="1.0.0",
61
+ docs_url="/docs",
62
+ redoc_url="/redoc",
63
+ )
58
64
 
59
65
  ################################
60
66
  ## API Key Validation methods ##
61
67
  ################################
62
68
  async def verify_api_key(request: Request):
69
+ """
70
+ Verify the API key from the request headers.
71
+ The API key must match the user ID.
72
+ """
63
73
  user_id = load_user_id()
64
74
  if user_id is None:
65
75
  return None
@@ -68,35 +78,69 @@ async def verify_api_key(request: Request):
68
78
  raise HTTPException(status_code=401, detail="Request requires API Key")
69
79
  return api_key
70
80
 
71
- @app.post("/create_pool")
81
+ @app.post("/create_pool",
82
+ summary="Create a new pool",
83
+ description="Creates a new pool with the specified configuration",
84
+ response_description="Result of pool creation")
72
85
  def pool_create(request: CreatePoolRequest, api_key: str = Depends(verify_api_key)):
86
+ """
87
+ Create a new pool with the following parameters:
88
+
89
+ - **cluster_name**: Name of the cluster
90
+ - **ip_address**: IP address for the pool
91
+ - **app_values**: Application configuration values
92
+ - **num_gpus**: Number of GPUs to allocate
93
+ - **node_name**: Name of the node
94
+ - **only_registered_users**: Whether to restrict to registered users
95
+ - **location**: Location of the pool
96
+ - **description**: Pool description
97
+ - **token_mode**: Token type for authentication
98
+ """
73
99
  result = create_pool(
74
100
  cluster_name=request.cluster_name,
75
101
  ip_address=request.ip_address,
76
102
  app_values=request.app_values,
77
103
  num_gpus=request.num_gpus,
78
104
  node_name=request.node_name,
79
- only_registered_users=request.only_registered_users,
80
105
  location=request.location,
81
106
  description=request.description,
82
- token_mode=request.token_mode,
83
- frontend=request.frontend
107
+ token_mode=request.token_mode
84
108
  )
85
109
  return result
86
110
 
87
- @app.post("/join_pool")
111
+ @app.post("/join_pool",
112
+ summary="Join an existing pool",
113
+ description="Join a pool using a token",
114
+ response_description="Result of joining the pool")
88
115
  def pool_join(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
116
+ """
117
+ Join a pool with the following parameters:
118
+
119
+ - **token**: Pool join token
120
+ - **ip_address**: IP address for the node
121
+ - **node_name**: Name of the node
122
+ - **num_gpus**: Number of GPUs to allocate
123
+ """
89
124
  result = join_pool(
90
125
  token=request.token,
91
126
  num_gpus=request.num_gpus,
92
127
  node_name=request.node_name,
93
- ip_address=request.ip_address,
94
- frontend=request.frontend
128
+ ip_address=request.ip_address
95
129
  )
96
130
  return result
97
131
 
98
- @app.post("/attach_to_pool")
132
+ @app.post("/attach_to_pool",
133
+ summary="Attach to an existing pool",
134
+ description="Attach to a pool using a token",
135
+ response_description="Result of attaching to the pool")
99
136
  def pool_attach(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
137
+ """
138
+ Attach to a pool with the following parameters:
139
+
140
+ - **token**: Pool token
141
+ - **node_name**: Name of the node
142
+ - **frontend**: Whether this is a frontend request
143
+ """
100
144
  result = attach_to_pool(
101
145
  token=request.token,
102
146
  node_name=request.node_name,
@@ -104,65 +148,157 @@ def pool_attach(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)
104
148
  )
105
149
  return result
106
150
 
107
- @app.post("/stop_pool")
151
+ @app.post("/stop_pool",
152
+ summary="Stop a pool",
153
+ description="Stop the current pool",
154
+ response_description="Result of stopping the pool")
108
155
  def pool_stop(request: StopPoolRequest, api_key: str = Depends(verify_api_key)):
156
+ """
157
+ Stop the pool with the following parameters:
158
+
159
+ - **skip_node_deletion**: Whether to skip node deletion
160
+ """
109
161
  result = stop_pool(
110
162
  skip_node_deletion=request.skip_node_deletion
111
163
  )
112
164
  return result
113
165
 
114
- @app.post("/delete_nodes")
166
+ @app.post("/delete_nodes",
167
+ summary="Delete nodes",
168
+ description="Delete specified nodes from the pool",
169
+ response_description="Result of node deletion")
115
170
  def device_delete(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
171
+ """
172
+ Delete nodes with the following parameters:
173
+
174
+ - **nodes**: List of node names to delete
175
+ """
116
176
  result = delete_nodes(
117
177
  nodes=request.nodes
118
178
  )
119
179
  return result
120
180
 
121
- @app.post("/cordon_nodes")
181
+ @app.post("/cordon_nodes",
182
+ summary="Cordon nodes",
183
+ description="Mark nodes as unschedulable",
184
+ response_description="Result of cordoning nodes")
122
185
  def device_cordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
186
+ """
187
+ Cordon nodes with the following parameters:
188
+
189
+ - **nodes**: List of node names to cordon
190
+ """
123
191
  result = cordon_nodes(
124
192
  nodes=request.nodes
125
193
  )
126
194
  return result
127
195
 
128
- @app.post("/uncordon_nodes")
196
+ @app.post("/uncordon_nodes",
197
+ summary="Uncordon nodes",
198
+ description="Mark nodes as schedulable",
199
+ response_description="Result of uncordoning nodes")
129
200
  def device_uncordon(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
201
+ """
202
+ Uncordon nodes with the following parameters:
203
+
204
+ - **nodes**: List of node names to uncordon
205
+ """
130
206
  result = uncordon_nodes(
131
207
  nodes=request.nodes
132
208
  )
133
209
  return result
134
210
 
135
- @app.get("/get_pool_token")
211
+ @app.get("/get_pool_token",
212
+ summary="Get pool token",
213
+ description="Get a token for the pool",
214
+ response_description="Pool token")
136
215
  def get_token(mode: int, api_key: str = Depends(verify_api_key)):
137
-
216
+ """
217
+ Get pool token with the following parameters:
218
+
219
+ - **mode**: Token type mode
220
+ """
138
221
  return get_pool_token(mode=TokenType(mode))
139
222
 
140
- @app.get("/fetch_devices")
223
+ @app.get("/fetch_devices",
224
+ summary="Fetch devices",
225
+ description="Get list of available devices",
226
+ response_description="List of devices")
141
227
  def get_devices(api_key: str = Depends(verify_api_key)):
228
+ """Get list of available devices"""
142
229
  return fetch_devices()
143
230
 
144
- @app.post("/send_pool_invites")
231
+ @app.post("/send_pool_invites",
232
+ summary="Send pool invites",
233
+ description="Send invites to join the pool",
234
+ response_description="Result of sending invites")
145
235
  def send_pool_invites(request: InvitesRequest, api_key: str = Depends(verify_api_key)):
236
+ """
237
+ Send pool invites with the following parameters:
238
+
239
+ - **invitees**: List of invitee identifiers
240
+ """
146
241
  return send_invites(invitees=request.invitees)
147
242
 
148
- @app.get("/fetch_resources")
243
+ @app.get("/fetch_resources",
244
+ summary="Fetch resources",
245
+ description="Get available resources",
246
+ response_description="Resource information")
149
247
  def resources(api_key: str = Depends(verify_api_key)):
248
+ """Get available resources"""
150
249
  return fetch_resources()
151
250
 
152
- @app.get("/fetch_job_names")
251
+ @app.get("/fetch_job_names",
252
+ summary="Fetch job names",
253
+ description="Get list of job names",
254
+ response_description="List of job names")
153
255
  def job_names(api_key: str = Depends(verify_api_key)):
256
+ """Get list of job names"""
154
257
  return fetch_job_names()
155
258
 
156
- @app.get("/fetch_gpus")
259
+ @app.get("/fetch_gpus",
260
+ summary="Fetch GPUs",
261
+ description="Get list of available GPUs",
262
+ response_description="List of GPUs")
157
263
  def gpus(available: bool = False, api_key: str = Depends(verify_api_key)):
264
+ """
265
+ Get list of GPUs with the following parameters:
266
+
267
+ - **available**: Whether to show only available GPUs
268
+ """
158
269
  return fetch_gpus(available=available)
159
270
 
160
- @app.post("/fetch_job_details")
271
+ @app.post("/fetch_job_details",
272
+ summary="Fetch job details",
273
+ description="Get details for specified jobs",
274
+ response_description="Job details")
161
275
  def job_details(request: JobDetailsRequest, api_key: str = Depends(verify_api_key)):
276
+ """
277
+ Get job details with the following parameters:
278
+
279
+ - **jobs**: List of jobs to get details for
280
+ """
162
281
  return fetch_job_details(jobs=request.jobs)
163
282
 
164
- @app.get("/fetch_job_logs")
165
- def job_logs(job_name: str, force_namespace: str=None, pod_name: str=None, tail: int=100, api_key: str = Depends(verify_api_key)):
283
+ @app.get("/fetch_job_logs",
284
+ summary="Fetch job logs",
285
+ description="Get logs for a specific job",
286
+ response_description="Job logs")
287
+ def job_logs(
288
+ job_name: str,
289
+ force_namespace: str = None,
290
+ pod_name: str = None,
291
+ tail: int = 100,
292
+ api_key: str = Depends(verify_api_key)
293
+ ):
294
+ """
295
+ Get job logs with the following parameters:
296
+
297
+ - **job_name**: Name of the job
298
+ - **force_namespace**: Optional namespace override
299
+ - **pod_name**: Optional pod name
300
+ - **tail**: Number of log lines to return
301
+ """
166
302
  return fetch_job_logs(
167
303
  job_name=job_name,
168
304
  force_namespace=force_namespace,
@@ -170,16 +306,39 @@ def job_logs(job_name: str, force_namespace: str=None, pod_name: str=None, tail:
170
306
  tail=tail
171
307
  )
172
308
 
173
- @app.get("/fetch_job_templates")
309
+ @app.get("/fetch_job_templates",
310
+ summary="Fetch job templates",
311
+ description="Get available job templates",
312
+ response_description="List of job templates")
174
313
  def job_templates(api_key: str = Depends(verify_api_key)):
314
+ """Get available job templates"""
175
315
  return fetch_job_templates()
176
316
 
177
- @app.get("/fetch_job_defaults")
317
+ @app.get("/fetch_job_defaults",
318
+ summary="Fetch job defaults",
319
+ description="Get default values for a job template",
320
+ response_description="Job default values")
178
321
  def job_templates(name: str, api_key: str = Depends(verify_api_key)):
322
+ """
323
+ Get job defaults with the following parameters:
324
+
325
+ - **name**: Name of the job template
326
+ """
179
327
  return fetch_job_defaults(name=name)
180
328
 
181
- @app.post("/deploy_job")
329
+ @app.post("/deploy_job",
330
+ summary="Deploy job",
331
+ description="Deploy a new job",
332
+ response_description="Result of job deployment")
182
333
  def job_deploy(request: DeployJobRequest, api_key: str = Depends(verify_api_key)):
334
+ """
335
+ Deploy a job with the following parameters:
336
+
337
+ - **template_name**: Name of the job template
338
+ - **values**: Job configuration values
339
+ - **force_namespace**: Optional namespace override
340
+ - **target_labels**: Optional target node labels
341
+ """
183
342
  result = deploy_job(
184
343
  template_name=request.template_name,
185
344
  values_dict=request.values,
@@ -188,76 +347,154 @@ def job_deploy(request: DeployJobRequest, api_key: str = Depends(verify_api_key)
188
347
  )
189
348
  return result
190
349
 
191
- @app.post("/delete_job")
350
+ @app.post("/delete_job",
351
+ summary="Delete job",
352
+ description="Delete a job",
353
+ response_description="Result of job deletion")
192
354
  def job_delete(request: DeleteJobRequest, api_key: str = Depends(verify_api_key)):
355
+ """
356
+ Delete a job with the following parameters:
357
+
358
+ - **name**: Name of the job to delete
359
+ - **force_namespace**: Optional namespace override
360
+ """
193
361
  result = delete_job(
194
362
  name=request.name,
195
363
  force_namespace=request.force_namespace
196
364
  )
197
365
  return result
198
366
 
199
- @app.get("/authenticate_user")
367
+ @app.get("/authenticate_user",
368
+ summary="Authenticate user",
369
+ description="Authenticate a user",
370
+ response_description="Authentication result")
200
371
  def user_authenticate(user_id: str, api_key: str = Depends(verify_api_key)):
372
+ """
373
+ Authenticate user with the following parameters:
374
+
375
+ - **user_id**: User identifier
376
+ """
201
377
  result = authenticate_user(
202
378
  user_id=user_id
203
379
  )
204
380
  return result
205
381
 
206
- @app.get("/load_user_session")
382
+ @app.get("/load_user_session",
383
+ summary="Load user session",
384
+ description="Load the current user session",
385
+ response_description="User session information")
207
386
  def user_session(api_key: str = Depends(verify_api_key)):
387
+ """Load the current user session"""
208
388
  result = load_user_session()
209
389
  return result
210
390
 
211
- @app.get("/user_logout")
391
+ @app.get("/user_logout",
392
+ summary="User logout",
393
+ description="Log out the current user",
394
+ response_description="Logout result")
212
395
  def logout_user():
396
+ """Log out the current user"""
213
397
  result = user_logout()
214
398
  return result
215
399
 
216
- @app.get("/is_connected")
400
+ @app.get("/is_connected",
401
+ summary="Check connection",
402
+ description="Check if connected to a pool",
403
+ response_description="Connection status")
217
404
  def pool_connected():
405
+ """Check if connected to a pool"""
218
406
  result = is_connected()
219
407
  return result
220
408
 
221
- @app.get("/is_agent_running")
409
+ @app.get("/is_agent_running",
410
+ summary="Check agent status",
411
+ description="Check if the agent is running",
412
+ response_description="Agent status")
222
413
  def agent_running():
414
+ """Check if the agent is running"""
223
415
  result = is_agent_running()
224
416
  return result
225
417
 
226
- @app.get("/is_server")
418
+ @app.get("/is_server",
419
+ summary="Check server status",
420
+ description="Check if running as server",
421
+ response_description="Server status")
227
422
  def server():
423
+ """Check if running as server"""
228
424
  result = is_server()
229
425
  return result
230
426
 
231
- @app.post("/pause_agent")
427
+ @app.post("/pause_agent",
428
+ summary="Pause agent",
429
+ description="Pause the agent",
430
+ response_description="Result of pausing agent")
232
431
  def agent_pause():
432
+ """Pause the agent"""
233
433
  result = pause_agent()
234
434
  return result
235
435
 
236
- @app.post("/resume_agent")
436
+ @app.post("/resume_agent",
437
+ summary="Resume agent",
438
+ description="Resume the agent",
439
+ response_description="Result of resuming agent")
237
440
  def agent_resume():
441
+ """Resume the agent"""
238
442
  result = resume_agent()
239
443
  return result
240
444
 
241
- @app.get("/get_ip_addresses")
242
- def ip_addresses(subnet: str=None, api_key: str = Depends(verify_api_key)):
445
+ @app.get("/get_ip_addresses",
446
+ summary="Get IP addresses",
447
+ description="Get available IP addresses",
448
+ response_description="List of IP addresses")
449
+ def ip_addresses(subnet: str = None, api_key: str = Depends(verify_api_key)):
450
+ """
451
+ Get IP addresses with the following parameters:
452
+
453
+ - **subnet**: Optional subnet to filter by
454
+ """
243
455
  result = get_ip_addresses(subnet=subnet)
244
456
  return result
245
457
 
246
- @app.get("/list_available_pools")
247
- def pool_connected(user_only: bool=False, api_key: str = Depends(verify_api_key)):
458
+ @app.get("/list_available_pools",
459
+ summary="List available pools",
460
+ description="Get list of available pools",
461
+ response_description="List of available pools")
462
+ def pool_connected(user_only: bool = False, api_key: str = Depends(verify_api_key)):
463
+ """
464
+ List available pools with the following parameters:
465
+
466
+ - **user_only**: Whether to show only user's pools
467
+ """
248
468
  result = list_available_pools(user_only=user_only)
249
469
  return result
250
470
 
251
- @app.post("/add_node_labels")
471
+ @app.post("/add_node_labels",
472
+ summary="Add node labels",
473
+ description="Add labels to a node",
474
+ response_description="Result of adding labels")
252
475
  def node_labels(request: NodeLabelsRequest, api_key: str = Depends(verify_api_key)):
476
+ """
477
+ Add node labels with the following parameters:
478
+
479
+ - **node_name**: Name of the node
480
+ - **labels**: Dictionary of labels to add
481
+ """
253
482
  result = add_node_labels(
254
483
  node_name=request.node_name,
255
484
  labels=request.labels
256
485
  )
257
486
  return result
258
487
 
259
- @app.post("/get_node_labels")
488
+ @app.post("/get_node_labels",
489
+ summary="Get node labels",
490
+ description="Get labels for specified nodes",
491
+ response_description="Node labels")
260
492
  def node_labels_get(request: GetNodeLabelsRequest, api_key: str = Depends(verify_api_key)):
493
+ """
494
+ Get node labels with the following parameters:
495
+
496
+ - **node_names**: List of node names to get labels for
497
+ """
261
498
  result = get_node_labels(
262
499
  node_names=request.node_names
263
500
  )
@@ -1,53 +1,51 @@
1
- from pydantic import BaseModel
1
+ from pydantic import BaseModel, Field
2
2
  from typing import List, Dict, Optional
3
3
 
4
4
  from kalavai_client.core import Job, TokenType
5
5
 
6
6
 
7
7
  class InvitesRequest(BaseModel):
8
- invitees: list[str]
8
+ invitees: list[str] = Field(description="List of user identifiers to invite to the pool")
9
9
 
10
10
  class CreatePoolRequest(BaseModel):
11
- cluster_name: str
12
- ip_address: str
13
- app_values: dict = None
14
- num_gpus: int = None
15
- node_name: str = None
16
- only_registered_users: bool = False
17
- location: str = None
18
- token_mode: TokenType = TokenType.USER
19
- description: str = ""
20
- frontend: bool = False
11
+ cluster_name: str = Field(description="Name of the cluster to create")
12
+ ip_address: str = Field(description="IP address for the pool")
13
+ app_values: dict = Field(None, description="Application configuration values")
14
+ num_gpus: int = Field(None, description="Number of GPUs to allocate")
15
+ node_name: str = Field(None, description="Name of the node")
16
+ location: str = Field(None, description="Geographic location of the pool")
17
+ token_mode: TokenType = Field(TokenType.USER, description="Token type for authentication")
18
+ description: str = Field("", description="Description of the pool")
21
19
 
22
20
  class NodesActionRequest(BaseModel):
23
- nodes: list[str]
21
+ nodes: list[str] = Field(description="List of node names to perform the action on")
24
22
 
25
23
  class JoinPoolRequest(BaseModel):
26
- token: str
27
- ip_address: str = None
28
- node_name: str = None
29
- num_gpus: int = None
30
- frontend: bool = False
24
+ token: str = Field(description="Token to join the pool")
25
+ ip_address: str = Field(None, description="IP address for the node")
26
+ node_name: str = Field(None, description="Name of the node")
27
+ num_gpus: int = Field(None, description="Number of GPUs to allocate")
28
+ frontend: bool = Field(False, description="Whether this is a frontend request")
31
29
  class JobDetailsRequest(BaseModel):
32
- jobs: list[Job]
30
+ jobs: list[Job] = Field(description="List of jobs to get details for")
33
31
 
34
32
 
35
33
  class StopPoolRequest(BaseModel):
36
- skip_node_deletion: bool = False
34
+ skip_node_deletion: bool = Field(False, description="Whether to skip node deletion when stopping the pool")
37
35
 
38
36
  class DeployJobRequest(BaseModel):
39
- template_name: str
40
- values: dict
41
- force_namespace: str = None
42
- target_labels: dict[str, str] = None
37
+ template_name: str = Field(description="Name of the job template to use")
38
+ values: dict = Field(description="Job configuration values")
39
+ force_namespace: str = Field(None, description="Optional namespace override")
40
+ target_labels: dict[str, str] = Field(None, description="Optional target node labels")
43
41
 
44
42
  class DeleteJobRequest(BaseModel):
45
- name: str
46
- force_namespace: str = None
43
+ name: str = Field(description="Name of the job to delete")
44
+ force_namespace: str = Field(None, description="Optional namespace override")
47
45
 
48
46
  class NodeLabelsRequest(BaseModel):
49
- node_name: str
50
- labels: Dict[str, str]
47
+ node_name: str = Field(description="Name of the node to add labels to")
48
+ labels: Dict[str, str] = Field(description="Dictionary of labels to add to the node")
51
49
 
52
50
  class GetNodeLabelsRequest(BaseModel):
53
- node_names: List[str]
51
+ node_names: List[str] = Field(description="List of node names to get labels for")
kalavai_client/cli.py CHANGED
@@ -325,7 +325,7 @@ def pool__unpublish(cluster_name=None, *others):
325
325
  console.log(f"[green]Your cluster has been removed from {KALAVAI_PLATFORM_URL}")
326
326
 
327
327
  @arguably.command
328
- def pool__package_worker(output_file, *others, num_gpus=0, ip_address="0.0.0.0", node_name=None, storage_compatible=True):
328
+ def pool__package_worker(output_file, *others, platform="amd64", num_gpus=0, ip_address="0.0.0.0", node_name=None, storage_compatible=True):
329
329
  """
330
330
  [AUTH]Package a worker for distribution (docker compose only)
331
331
  """
@@ -335,6 +335,7 @@ def pool__package_worker(output_file, *others, num_gpus=0, ip_address="0.0.0.0",
335
335
  return
336
336
 
337
337
  compose = generate_worker_package(
338
+ target_platform=platform,
338
339
  num_gpus=num_gpus,
339
340
  ip_address=ip_address,
340
341
  node_name=node_name,
@@ -374,7 +375,7 @@ def pool__list(*others, user_only=False):
374
375
 
375
376
 
376
377
  @arguably.command
377
- def pool__start(cluster_name, *others, ip_address: str=None, location: str=None, app_values: str=None, pool_config_values: str=None, non_interactive: bool=False):
378
+ def pool__start(cluster_name, *others, platform="amd64", ip_address: str=None, location: str=None, app_values: str=None, pool_config_values: str=None, non_interactive: bool=False):
378
379
  """
379
380
  Start Kalavai pool and start/resume sharing resources.
380
381
 
@@ -411,6 +412,7 @@ def pool__start(cluster_name, *others, ip_address: str=None, location: str=None
411
412
  console.log(f"[green]Creating {cluster_name} pool, this may take a few minutes...")
412
413
 
413
414
  result = create_pool(
415
+ target_platform=platform,
414
416
  cluster_name=cluster_name,
415
417
  ip_address=ip_address,
416
418
  app_values=app_values,
@@ -472,7 +474,7 @@ def pool__check_token(token, *others, public=False):
472
474
  return True
473
475
 
474
476
  @arguably.command
475
- def pool__join(token, *others, node_name=None, non_interactive=False):
477
+ def pool__join(token, *others, platform="amd64", node_name=None, non_interactive=False):
476
478
  """
477
479
  Join Kalavai pool and start/resume sharing resources.
478
480
 
@@ -522,6 +524,7 @@ def pool__join(token, *others, node_name=None, non_interactive=False):
522
524
 
523
525
  console.log("Connecting worker to the pool...")
524
526
  result = join_pool(
527
+ target_platform=platform,
525
528
  token=token,
526
529
  node_name=node_name,
527
530
  num_gpus=num_gpus,
kalavai_client/core.py CHANGED
@@ -595,7 +595,7 @@ def attach_to_pool(token, node_name=None):
595
595
 
596
596
  return cluster_name
597
597
 
598
- def generate_worker_package(num_gpus=0, node_name=None, ip_address="0.0.0.0", storage_compatible=True):
598
+ def generate_worker_package(target_platform="amd64", num_gpus=0, node_name=None, ip_address="0.0.0.0", storage_compatible=True):
599
599
  # get pool data from token
600
600
  token = get_pool_token(mode=TokenType.WORKER)
601
601
  if "error" in token:
@@ -620,6 +620,7 @@ def generate_worker_package(num_gpus=0, node_name=None, ip_address="0.0.0.0", st
620
620
  }
621
621
  # Generate docker compose recipe
622
622
  compose = generate_compose_config(
623
+ target_platform=target_platform,
623
624
  write_to_file=False,
624
625
  role="agent",
625
626
  node_ip_address=ip_address,
@@ -633,7 +634,13 @@ def generate_worker_package(num_gpus=0, node_name=None, ip_address="0.0.0.0", st
633
634
  return compose
634
635
 
635
636
 
636
- def join_pool(token, num_gpus=None, node_name=None, ip_address=None):
637
+ def join_pool(
638
+ token,
639
+ num_gpus=None,
640
+ node_name=None,
641
+ ip_address=None,
642
+ target_platform="amd64"
643
+ ):
637
644
  compatibility = check_worker_compatibility()
638
645
  if len(compatibility["issues"]) > 0:
639
646
  return {"error": compatibility["issues"]}
@@ -668,6 +675,7 @@ def join_pool(token, num_gpus=None, node_name=None, ip_address=None):
668
675
  # local agent join
669
676
  # Generate docker compose recipe
670
677
  generate_compose_config(
678
+ target_platform=target_platform,
671
679
  role="agent",
672
680
  node_ip_address=ip_address,
673
681
  pool_ip=f"https://{kalavai_seed_ip}:6443",
@@ -722,7 +730,8 @@ def create_pool(
722
730
  pool_config_values: str=None,
723
731
  num_gpus: int=0,
724
732
  node_name: str=None,
725
- location: str=None
733
+ location: str=None,
734
+ target_platform: str="amd64"
726
735
  ):
727
736
 
728
737
  if not check_seed_compatibility():
@@ -756,6 +765,7 @@ def create_pool(
756
765
 
757
766
  # Generate docker compose recipe
758
767
  generate_compose_config(
768
+ target_platform=target_platform,
759
769
  role="server",
760
770
  vpn_token=location,
761
771
  node_ip_address=ip_address,
kalavai_client/utils.py CHANGED
@@ -106,12 +106,13 @@ def is_storage_compatible():
106
106
  return False
107
107
  ################
108
108
 
109
- def generate_compose_config(role, node_name, write_to_file=True, node_ip_address="0.0.0.0", num_gpus=0, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
109
+ def generate_compose_config(role, node_name, target_platform="amd64", write_to_file=True, node_ip_address="0.0.0.0", num_gpus=0, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
110
110
 
111
111
  if node_labels is not None:
112
112
  node_labels = " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()])
113
113
  rand_suffix = uuid.uuid4().hex[:8]
114
114
  compose_values = {
115
+ "target_platform": target_platform,
115
116
  "user_path": user_path(""),
116
117
  "service_name": DEFAULT_CONTAINER_NAME,
117
118
  "vpn": vpn_token is not None,
@@ -126,6 +127,7 @@ def generate_compose_config(role, node_name, write_to_file=True, node_ip_address
126
127
  "num_gpus": num_gpus,
127
128
  "k3s_path": f"{CONTAINER_HOST_PATH}/{rand_suffix}/k3s",
128
129
  "etc_path": f"{CONTAINER_HOST_PATH}/{rand_suffix}/etc",
130
+ "random_suffix": rand_suffix,
129
131
  "node_labels": node_labels,
130
132
  "flannel_iface": DEFAULT_FLANNEL_IFACE if vpn_token is not None else "",
131
133
  "user_id": load_user_id()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.6.13
3
+ Version: 0.6.16
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -52,30 +52,28 @@ Description-Content-Type: text/markdown
52
52
 
53
53
  </div>
54
54
 
55
- ⭐⭐⭐ **Kalavai and our AI pools are open source, and free to use in both commercial and non-commercial purposes. If you find it useful, consider supporting us by [giving a star to our GitHub project](https://github.com/kalavai-net/kalavai-client), joining our [discord channel](https://discord.gg/YN6ThTJKbM), follow our [Substack](https://kalavainet.substack.com/) and give us a [review on Product Hunt](https://www.producthunt.com/products/kalavai/reviews/new).**
55
+ ⭐⭐⭐ **Kalavai platform is open source, and free to use in both commercial and non-commercial purposes. If you find it useful, consider supporting us by [giving a star to our GitHub project](https://github.com/kalavai-net/kalavai-client), joining our [discord channel](https://discord.gg/YN6ThTJKbM) and follow our [Substack](https://kalavainet.substack.com/).**
56
56
 
57
57
 
58
58
  # Kalavai: turn your devices into a scalable AI platform
59
59
 
60
- ### Taming the adoption of Large Language Models
60
+ > AI in the cloud is not aligned with you, it's aligned with the company that owns it. Make sure you own your AI
61
61
 
62
- > Kalavai is an **open source** tool that turns **everyday devices** into your very own LLM platform. It aggregates resources from multiple machines, including desktops and laptops, and is **compatible with most model engines** to make LLM deployment and orchestration simple and reliable.
62
+ ### Taming the adoption of self-hosted GenAI
63
63
 
64
- <div align="center">
65
-
66
- <a href="https://www.producthunt.com/products/kalavai/reviews?utm_source=badge-product_review&utm_medium=badge&utm_souce=badge-kalavai" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/product_review.svg?product_id=720725&theme=neutral" alt="Kalavai - The&#0032;first&#0032;platform&#0032;to&#0032;crowdsource&#0032;AI&#0032;computation | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
67
-
68
- </div>
64
+ Kalavai is an **open source** tool that turns **any devices** into a self-hosted AI platform. It aggregates resources from multiple machines, including cloud, on prem and personal computers, and is **compatible with most model engines** to make model deployment and orchestration simple and reliable.
69
65
 
70
66
 
71
67
  ## What can Kalavai do?
72
68
 
73
- Kalavai's goal is to make using AI (LLMs, AI agents) in real applications accessible and affordable to all. It's a _magic box_ that **integrates all the components required to make AI useful in the age of massive computing**, from model deployment and orchestration to Agentic AI.
69
+ Kalavai's goal is to make using self-hosted AI (GenAI models and agents) in real applications accessible and affordable to all. It's a tool that transforms machines into a _magic box_ that **integrates all the components required to make AI useful in the age of massive computing**, from model deployment and orchestration to Agentic AI.
74
70
 
75
71
  ### Core features
76
72
 
77
73
  - Manage **multiple devices resources as one**. One pool of RAM, CPUs and GPUs
78
- - **Deploy Large Language Models seamlessly across devices**, wherever they are (multiple clouds, on premises, personal devices)
74
+ - **Deploy open source models seamlessly across devices**, wherever they are (cloud, on premises, personal devices)
75
+ - Beyond LLMs: not just for large language models, but text-to-speech, speech-to-text, image understanding, coding generation and embedding models.
76
+ - The hybrid dream: build on your laptop, move to the cloud (any!) with zero changes
79
77
  - Auto-discovery: all **models are automatically exposed** through a single OpenAI-like API and a ChatGPT-like UI playground
80
78
  - Compatible with [most popular model engines](#support-for-llm-engines)
81
79
  - [Easy to expand](https://github.com/kalavai-net/kube-watcher/tree/main/templates) to custom workloads
@@ -83,19 +81,19 @@ Kalavai's goal is to make using AI (LLMs, AI agents) in real applications access
83
81
 
84
82
  <details>
85
83
 
86
- **<summary>Video tutorials</summary>**
84
+ **<summary>Powered by Kalavai</summary>**
87
85
 
88
- ### Self-hosted LLM pools
89
-
90
- https://github.com/user-attachments/assets/0d2316f3-79ea-46ac-b41e-8ef720f52672
86
+ - [CoGen AI](https://cogenai.kalavai.net): A community hosted alternative to OpenAI API for unlimited inference.
87
+ - [Create your own Free Cursor/Windsurf Clone](https://www.youtube.com/watch?v=6zHSo7oeCDQ&t=21s)
91
88
 
92
89
 
93
90
  </details>
94
91
 
92
+
95
93
  ### Latest updates
96
94
 
95
+ - 11 June 2025: Native support for Mac and Raspberry pi devices (ARM).
97
96
  - 20 February 2025: New shiny GUI interface to control LLM pools and deploy models
98
- - 6 February 2025: 🔥🔥🔥 Access **DeepSeek R1 model for free** when you join our [public LLM pool](https://kalavai-net.github.io/kalavai-client/public_llm_pool/)
99
97
  - 31 January 2025: `kalavai-client` is now a [PyPI package](https://pypi.org/project/kalavai-client/), easier to install than ever!
100
98
  <details>
101
99
  <summary>More news</summary>
@@ -148,8 +146,6 @@ The `kalavai-client` is the main tool to interact with the Kalavai platform, to
148
146
 
149
147
  <summary>Requirements</summary>
150
148
 
151
- ### Requirements
152
-
153
149
  For workers sharing resources with the pool:
154
150
 
155
151
  - A laptop, desktop or Virtual Machine
@@ -157,37 +153,8 @@ For workers sharing resources with the pool:
157
153
 
158
154
  > **Support for Windows and MacOS workers is experimental**: kalavai workers run on docker containers that require access to the host network interfaces, thus systems that do not support containers natively (Windows and MacOS) may have difficulties finding each other.
159
155
 
160
- Any system that runs python 3.6+ is able to run the `kalavai-client` and therefore connect and operate an LLM pool, [without sharing with the pool](). Your computer won't be adding its capacity to the pool, but it wil be able to deploy jobs and interact with models.
161
-
162
156
  </details>
163
157
 
164
- <details>
165
-
166
- <summary> Common issues</summary>
167
-
168
- If you see the following error:
169
-
170
- ```bash
171
- fatal error: Python.h: No such file or directory | #include <Python.h>
172
- ```
173
-
174
- Make sure you also install python3-dev package. For ubuntu distros:
175
-
176
- ```bash
177
- sudo apt install python3-dev
178
- ```
179
-
180
- If you see:
181
- ```bash
182
- AttributeError: install_layout. Did you mean: 'install_platlib'?
183
- [end of output]
184
- ```
185
-
186
- Upgrade your setuptools:
187
- ```bash
188
- pip install -U setuptools
189
- ```
190
- </details>
191
158
 
192
159
  ### Install the client
193
160
 
@@ -230,6 +197,8 @@ If your system is not currently supported, [open an issue](https://github.com/ka
230
197
 
231
198
  ### OS compatibility
232
199
 
200
+ Currently **seed nodes** are supported exclusively on linux machines (x86_64 platform). However Kalavai supports mix-pools, i.e. having Windows and MacOS computers as workers.
201
+
233
202
  Since **worker nodes** run inside docker, any machine that can run docker **should** be compatible with Kalavai. Here are instructions for [linux](https://docs.docker.com/engine/install/), [Windows](https://docs.docker.com/desktop/setup/install/windows-install/) and [MacOS](https://docs.docker.com/desktop/setup/install/mac-install/).
234
203
 
235
204
  The kalavai client, which controls and access pools, can be installed on any machine that has python 3.10+.
@@ -237,9 +206,10 @@ The kalavai client, which controls and access pools, can be installed on any mac
237
206
 
238
207
  ### Hardware compatibility:
239
208
 
240
- - `amd64` or `x86_64` CPU architecture
209
+ - `amd64` or `x86_64` CPU architecture for seed and worker nodes.
210
+ - `arm64` CPU architecture for worker nodes.
241
211
  - NVIDIA GPU
242
- - AMD and Intel GPUs are currently not supported ([interested in helping us test it?](https://kalavai-net.github.io/kalavai-client/compatibility/#help-testing-amd-gpus))
212
+ - Mac M series, AMD and Intel GPUs are currently not supported ([interested in helping us test it?](https://kalavai-net.github.io/kalavai-client/compatibility/#help-testing-amd-gpus))
243
213
 
244
214
  </details>
245
215
 
@@ -247,15 +217,15 @@ The kalavai client, which controls and access pools, can be installed on any mac
247
217
 
248
218
  - [x] Kalavai client on Linux
249
219
  - [x] [TEMPLATE] Distributed LLM deployment
250
- - [x] Kalavai client on Windows (with WSL2)
220
+ - [x] Kalavai client on Windows (worker only)
221
+ - [x] Kalavai client on Windows WSL2 (seed and worker)
251
222
  - [x] Self-hosted LLM pools
252
223
  - [x] Collaborative LLM deployment
253
224
  - [x] Ray cluster support
254
- - [x] Kalavai client on Mac
225
+ - [x] Kalavai client on Mac (worker only)
255
226
  - [x] Kalavai pools UI
256
- - [ ] [TEMPLATE] [GPUStack](https://github.com/gpustack/gpustack) support
257
- - [ ] [TEMPLATE] [exo](https://github.com/exo-explore/exo) support
258
227
  - [ ] Support for AMD GPUs
228
+ - [ ] Support for Mac M GPUs
259
229
  - [x] Docker install path
260
230
 
261
231
 
@@ -0,0 +1,25 @@
1
+ kalavai_client/__init__.py,sha256=1--FABNdIxbiNn1wQox38stjIswkk1wPeOgoYJXMsNU,23
2
+ kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
+ kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ kalavai_client/assets/apps.yaml,sha256=Qe3RtY4kQbzZnF9K724FUbtqnkuCGfNUfK-WWtamATg,6365
5
+ kalavai_client/assets/apps_values.yaml,sha256=WRew3bS1MztjzcJfphuJcKn0n2T1ICRupPpr_Csjt_s,1644
6
+ kalavai_client/assets/docker-compose-gui.yaml,sha256=shqN78YLw0QP7bqTKveI4ppz5E-5b1JowmsSB4OG3nA,778
7
+ kalavai_client/assets/docker-compose-template.yaml,sha256=KHIwJ2WWX7Y7wQKiXRr82Jqd3IKRyls5zhTyl8mSmrc,1805
8
+ kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
9
+ kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
10
+ kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
11
+ kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
12
+ kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
13
+ kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
14
+ kalavai_client/bridge_api.py,sha256=O65aIh5lUl0KldRekHzLC-xdv1YJmrR14kt5-3UgCco,15351
15
+ kalavai_client/bridge_models.py,sha256=5ALGbkb6cxKwXbrzeTa9ja0kiZkJBvnY3J1IsmXTn0U,2540
16
+ kalavai_client/cli.py,sha256=_LK5OrCM5PYcYZo7lwXyfI3mlNzLFhL-BicKYbJkxeY,47123
17
+ kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
18
+ kalavai_client/core.py,sha256=R8UBTTzMHVPHuM9nB70cIxUxVCHyBspEq1cAWH1OyOQ,34304
19
+ kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
20
+ kalavai_client/utils.py,sha256=kGtfEuXVG5LgMJk289ksFgYrsMHwKXN7yvS5wCIou8s,12781
21
+ kalavai_client-0.6.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
+ kalavai_client-0.6.16.dist-info/METADATA,sha256=K5mzqy8pSDdK6WWFSt8YZNTJLENfeV3OOGELq417dYs,12655
23
+ kalavai_client-0.6.16.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ kalavai_client-0.6.16.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
+ kalavai_client-0.6.16.dist-info/RECORD,,
@@ -1,25 +0,0 @@
1
- kalavai_client/__init__.py,sha256=RtAfZnVOn5ru1jOt5ukVyb0s2GiJ7s39Qp5_KBjCL-A,23
2
- kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
- kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- kalavai_client/assets/apps.yaml,sha256=AhTA3VZI27y05xHoHJCA9nvGnk8sWMhFDruBI2is3LM,6365
5
- kalavai_client/assets/apps_values.yaml,sha256=WRew3bS1MztjzcJfphuJcKn0n2T1ICRupPpr_Csjt_s,1644
6
- kalavai_client/assets/docker-compose-gui.yaml,sha256=DGCyGYzz1kH6kkMbo62FJHe3F9vcAmA8DOHw-c_o0Kw,752
7
- kalavai_client/assets/docker-compose-template.yaml,sha256=Nz_JzeBnQCzPCyWP5cEQHFeZzPwQqqBJ3C_xrToWlMA,1654
8
- kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
9
- kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
10
- kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
11
- kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
12
- kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
13
- kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
14
- kalavai_client/bridge_api.py,sha256=ZLyFOOz_o4agm-7DrHzoSBFrH65y__hZUoEe4diBTOA,7557
15
- kalavai_client/bridge_models.py,sha256=GbIaqGFAVs-3ikVUQZldwTTc06SsxmP6iAifH0oVDro,1219
16
- kalavai_client/cli.py,sha256=mmwLqqSYfl9k6vqveMcbHTq7g5FFd84YUUQCSH4J0k0,46967
17
- kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
18
- kalavai_client/core.py,sha256=u8a4uYqGS0mMJh0ArcXG2hwp2uDUSuwM5ROGXRQkHZg,34051
19
- kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
20
- kalavai_client/utils.py,sha256=yHz9n4hGwhpGUTc1ZcG5JHtesoUEzSngmOSTvu2jDic,12674
21
- kalavai_client-0.6.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
- kalavai_client-0.6.13.dist-info/METADATA,sha256=YUeJh_a81RDslXa8QNlgNwmXfQLHxAbqcCK2HWP6t-4,13354
23
- kalavai_client-0.6.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- kalavai_client-0.6.13.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
- kalavai_client-0.6.13.dist-info/RECORD,,