konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,625 @@
1
+ """This module contains schemas used to validate objects.
2
+
3
+ Schemas conform to the JSON Schema specification as defined at
4
+ https://json-schema.org/
5
+ """
6
+
7
+ import enum
8
+ from typing import Any, Dict, List, Tuple
9
+
10
+ OVERRIDEABLE_CONFIG_KEYS: List[Tuple[str, ...]] = [
11
+ ('kubernetes', 'pod_config'),
12
+ ('kubernetes', 'provision_timeout'),
13
+ ]
14
+
15
+
16
+ def _check_not_both_fields_present(field1: str, field2: str):
17
+ return {
18
+ 'oneOf': [
19
+ {'required': [field1], 'not': {'required': [field2]}},
20
+ {'required': [field2], 'not': {'required': [field1]}},
21
+ {'not': {'anyOf': [{'required': [field1]}, {'required': [field2]}]}},
22
+ ]
23
+ }
24
+
25
+
26
+ def _get_single_resources_schema():
27
+ """Schema for a single resource in a resources list."""
28
+ # To avoid circular imports, only import when needed.
29
+ # pylint: disable=import-outside-toplevel
30
+ return {
31
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
32
+ 'type': 'object',
33
+ 'required': [],
34
+ 'additionalProperties': False,
35
+ 'properties': {
36
+ 'cpus': {
37
+ 'anyOf': [
38
+ {
39
+ 'type': 'string',
40
+ },
41
+ {
42
+ 'type': 'number',
43
+ },
44
+ ],
45
+ },
46
+ 'memory': {
47
+ 'anyOf': [
48
+ {
49
+ 'type': 'string',
50
+ },
51
+ {
52
+ 'type': 'number',
53
+ },
54
+ ],
55
+ },
56
+ 'accelerators': {
57
+ 'anyOf': [
58
+ {
59
+ 'type': 'string',
60
+ },
61
+ {
62
+ 'type': 'object',
63
+ 'required': [],
64
+ 'maxProperties': 1,
65
+ 'additionalProperties': {'type': 'number'},
66
+ },
67
+ ]
68
+ },
69
+ 'disk_size': {
70
+ 'type': 'integer',
71
+ },
72
+ 'labels': {'type': 'object', 'additionalProperties': {'type': 'string'}},
73
+ 'image_id': {
74
+ 'anyOf': [
75
+ {
76
+ 'type': 'string',
77
+ },
78
+ {
79
+ 'type': 'object',
80
+ 'required': [],
81
+ },
82
+ {
83
+ 'type': 'null',
84
+ },
85
+ ]
86
+ },
87
+ '_cluster_config_overrides': {
88
+ 'type': 'object',
89
+ },
90
+ 'job_config': {'type': 'object'},
91
+ },
92
+ }
93
+
94
+
95
+ def _get_multi_resources_schema():
96
+ multi_resources_schema = {
97
+ k: v
98
+ for k, v in _get_single_resources_schema().items()
99
+ # Validation may fail if $schema is included.
100
+ if k != '$schema'
101
+ }
102
+ return multi_resources_schema
103
+
104
+
105
+ def get_resources_schema():
106
+ """Resource schema in task config."""
107
+ single_resources_schema = _get_single_resources_schema()['properties']
108
+ single_resources_schema.pop('accelerators')
109
+ multi_resources_schema = _get_multi_resources_schema()
110
+ return {
111
+ '$schema': 'http://json-schema.org/draft-07/schema#',
112
+ 'type': 'object',
113
+ 'required': [],
114
+ 'additionalProperties': False,
115
+ 'properties': {
116
+ **single_resources_schema,
117
+ # We redefine the 'accelerators' field to allow one line list or
118
+ # a set of accelerators.
119
+ 'accelerators': {
120
+ # {'V100:1', 'A100:1'} will be
121
+ # read as a string and converted to dict.
122
+ 'anyOf': [
123
+ {
124
+ 'type': 'string',
125
+ },
126
+ {
127
+ 'type': 'object',
128
+ 'required': [],
129
+ 'additionalProperties': {
130
+ 'anyOf': [
131
+ {
132
+ 'type': 'null',
133
+ },
134
+ {
135
+ 'type': 'number',
136
+ },
137
+ ]
138
+ },
139
+ },
140
+ {
141
+ 'type': 'array',
142
+ 'items': {
143
+ 'type': 'string',
144
+ },
145
+ },
146
+ ]
147
+ },
148
+ 'any_of': {
149
+ 'type': 'array',
150
+ 'items': multi_resources_schema,
151
+ },
152
+ 'ordered': {
153
+ 'type': 'array',
154
+ 'items': multi_resources_schema,
155
+ },
156
+ },
157
+ }
158
+
159
+
160
+ def _filter_schema(schema: dict, keys_to_keep: List[Tuple[str, ...]]) -> dict:
161
+ """Recursively filter a schema to include only certain keys.
162
+
163
+ Args:
164
+ schema: The original schema dictionary.
165
+ keys_to_keep: List of tuples with the path of keys to retain.
166
+
167
+ Returns:
168
+ The filtered schema.
169
+ """
170
+ # Convert list of tuples to a dictionary for easier access
171
+ paths_dict: Dict[str, Any] = {}
172
+ for path in keys_to_keep:
173
+ current = paths_dict
174
+ for step in path:
175
+ if step not in current:
176
+ current[step] = {}
177
+ current = current[step]
178
+
179
+ def keep_keys(
180
+ current_schema: dict, current_path_dict: dict, new_schema: dict
181
+ ) -> dict:
182
+ # Base case: if we reach a leaf in the path_dict, we stop.
183
+ if (
184
+ not current_path_dict
185
+ or not isinstance(current_schema, dict)
186
+ or not current_schema.get('properties')
187
+ ):
188
+ return current_schema
189
+
190
+ if 'properties' not in new_schema:
191
+ new_schema = {
192
+ key: current_schema[key]
193
+ for key in current_schema
194
+ # We do not support the handling of `oneOf`, `anyOf`, `allOf`,
195
+ # `required` for now.
196
+ if key not in {'properties', 'oneOf', 'anyOf', 'allOf', 'required'}
197
+ }
198
+ new_schema['properties'] = {}
199
+ for key, sub_schema in current_schema['properties'].items():
200
+ if key in current_path_dict:
201
+ # Recursively keep keys if further path dict exists
202
+ new_schema['properties'][key] = {}
203
+ current_path_value = current_path_dict.pop(key)
204
+ new_schema['properties'][key] = keep_keys(
205
+ sub_schema, current_path_value, new_schema['properties'][key]
206
+ )
207
+
208
+ return new_schema
209
+
210
+ # Start the recursive filtering
211
+ new_schema = keep_keys(schema, paths_dict, {})
212
+ assert not paths_dict, f'Unprocessed keys: {paths_dict}'
213
+ return new_schema
214
+
215
+
216
+ def _experimental_task_schema() -> dict:
217
+ config_override_schema = _filter_schema(
218
+ get_config_schema(), OVERRIDEABLE_CONFIG_KEYS
219
+ )
220
+ return {
221
+ 'experimental': {
222
+ 'type': 'object',
223
+ 'required': [],
224
+ 'additionalProperties': False,
225
+ 'properties': {
226
+ 'config_overrides': config_override_schema,
227
+ },
228
+ }
229
+ }
230
+
231
+
232
+ def get_task_schema():
233
+ return {
234
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
235
+ 'type': 'object',
236
+ 'required': [],
237
+ 'additionalProperties': False,
238
+ 'properties': {
239
+ 'name': {
240
+ 'type': 'string',
241
+ },
242
+ 'workdir': {
243
+ 'type': 'string',
244
+ },
245
+ 'event_callback': {
246
+ 'type': 'string',
247
+ },
248
+ 'num_nodes': {
249
+ 'type': 'integer',
250
+ },
251
+ # resources config is validated separately using RESOURCES_SCHEMA
252
+ 'resources': {
253
+ 'type': 'object',
254
+ },
255
+ # storage config is validated separately using STORAGE_SCHEMA
256
+ 'file_mounts': {
257
+ 'type': 'object',
258
+ },
259
+ # service config is validated separately using SERVICE_SCHEMA
260
+ 'service': {
261
+ 'type': 'object',
262
+ },
263
+ # serving config is validated separately using SERVING_SCHEMA
264
+ 'serving': {
265
+ 'type': 'object',
266
+ },
267
+ 'setup': {
268
+ 'type': 'string',
269
+ },
270
+ 'run': {
271
+ 'type': 'string',
272
+ },
273
+ 'envs': {
274
+ 'type': 'object',
275
+ 'required': [],
276
+ 'patternProperties': {
277
+ # Checks env keys are valid env var names.
278
+ '^[a-zA-Z_][a-zA-Z0-9_]*$': {'type': ['string', 'null']}
279
+ },
280
+ 'additionalProperties': False,
281
+ },
282
+ # inputs and outputs are experimental
283
+ 'inputs': {
284
+ 'type': 'object',
285
+ 'required': [],
286
+ 'maxProperties': 1,
287
+ 'additionalProperties': {'type': 'number'},
288
+ },
289
+ 'outputs': {
290
+ 'type': 'object',
291
+ 'required': [],
292
+ 'maxProperties': 1,
293
+ 'additionalProperties': {'type': 'number'},
294
+ },
295
+ **_experimental_task_schema(),
296
+ },
297
+ }
298
+
299
+
300
+ def get_cluster_schema():
301
+ return {
302
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
303
+ 'type': 'object',
304
+ 'required': ['cluster', 'auth'],
305
+ 'additionalProperties': False,
306
+ 'properties': {
307
+ 'cluster': {
308
+ 'type': 'object',
309
+ 'required': ['ips', 'name'],
310
+ 'additionalProperties': False,
311
+ 'properties': {
312
+ 'ips': {
313
+ 'type': 'array',
314
+ 'items': {
315
+ 'type': 'string',
316
+ },
317
+ },
318
+ 'name': {
319
+ 'type': 'string',
320
+ },
321
+ },
322
+ },
323
+ 'auth': {
324
+ 'type': 'object',
325
+ 'required': ['ssh_user', 'ssh_private_key'],
326
+ 'additionalProperties': False,
327
+ 'properties': {
328
+ 'ssh_user': {
329
+ 'type': 'string',
330
+ },
331
+ 'ssh_private_key': {
332
+ 'type': 'string',
333
+ },
334
+ },
335
+ },
336
+ 'python': {
337
+ 'type': 'string',
338
+ },
339
+ },
340
+ }
341
+
342
+
343
+ class RemoteIdentityOptions(enum.Enum):
344
+ """Enum for remote identity types.
345
+
346
+ Some clouds (e.g., AWS, Kubernetes) also allow string values for remote
347
+ identity, which map to the service account/role to use. Those are not
348
+ included in this enum.
349
+ """
350
+
351
+ LOCAL_CREDENTIALS = 'LOCAL_CREDENTIALS'
352
+ SERVICE_ACCOUNT = 'SERVICE_ACCOUNT'
353
+ NO_UPLOAD = 'NO_UPLOAD'
354
+
355
+
356
+ def get_default_remote_identity(cloud: str) -> str:
357
+ """Get the default remote identity for the specified cloud."""
358
+ if cloud == 'kubernetes':
359
+ return RemoteIdentityOptions.SERVICE_ACCOUNT.value
360
+ return RemoteIdentityOptions.LOCAL_CREDENTIALS.value
361
+
362
+
363
+ _REMOTE_IDENTITY_SCHEMA = {
364
+ 'remote_identity': {
365
+ 'type': 'string',
366
+ 'case_insensitive_enum': [option.value for option in RemoteIdentityOptions],
367
+ }
368
+ }
369
+
370
+ _REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
371
+ 'remote_identity': {
372
+ 'anyOf': [
373
+ {'type': 'string'},
374
+ {'type': 'object', 'additionalProperties': {'type': 'string'}},
375
+ ]
376
+ },
377
+ }
378
+
379
+
380
+ def get_serving_schema():
381
+ return {
382
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
383
+ 'type': 'object',
384
+ 'anyOf': [
385
+ {'required': ['min_replicas']},
386
+ {'required': ['max_replicas']},
387
+ ],
388
+ 'additionalProperties': False,
389
+ 'properties': {
390
+ 'min_replicas': {
391
+ 'type': 'integer',
392
+ 'minimum': 0,
393
+ 'description': 'Minimum number of replicas for autoscaling.',
394
+ },
395
+ 'max_replicas': {
396
+ 'type': 'integer',
397
+ 'minimum': 1,
398
+ 'description': 'Maximum number of replicas for autoscaling.',
399
+ },
400
+ 'ports': {
401
+ # this could easily be an integer, but I made it
402
+ # more vague on purpose so I can use a float to test
403
+ # the json schema validator later down the line
404
+ 'type': 'number',
405
+ 'minimum': 1,
406
+ 'description': 'The containerPort and service port '
407
+ 'used by the model server.',
408
+ },
409
+ 'probe': {
410
+ 'type': 'string',
411
+ 'description': 'The livenessProbe, readinessProbe, and startupProbe '
412
+ 'path used by the model server.',
413
+ },
414
+ },
415
+ }
416
+
417
+
418
+ def get_storage_schema():
419
+ # pylint: disable=import-outside-toplevel
420
+ from konduktor.data import storage
421
+ from konduktor.registry import registry
422
+
423
+ return {
424
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
425
+ 'type': 'object',
426
+ 'required': [],
427
+ 'additionalProperties': False,
428
+ 'properties': {
429
+ 'name': {
430
+ 'type': 'string',
431
+ },
432
+ 'source': {
433
+ 'anyOf': [
434
+ {
435
+ 'type': 'string',
436
+ },
437
+ {'type': 'array', 'minItems': 1, 'items': {'type': 'string'}},
438
+ ]
439
+ },
440
+ 'store': {
441
+ 'type': 'string',
442
+ 'case_insensitive_enum': [type for type in registry._REGISTRY],
443
+ },
444
+ 'persistent': {
445
+ 'type': 'boolean',
446
+ },
447
+ 'mode': {
448
+ 'type': 'string',
449
+ 'case_insensitive_enum': [mode.value for mode in storage.StorageMode],
450
+ },
451
+ '_bucket_sub_path': {
452
+ 'type': 'string',
453
+ },
454
+ '_force_delete': {
455
+ 'type': 'boolean',
456
+ },
457
+ },
458
+ }
459
+
460
+
461
+ def get_job_schema():
462
+ """Schema for a job spec, which is defined under resources."""
463
+ return {
464
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
465
+ 'type': 'object',
466
+ 'required': [],
467
+ 'additionalProperties': False,
468
+ 'properties': {
469
+ 'completions': {
470
+ 'type': 'integer',
471
+ 'minimum': 1,
472
+ },
473
+ 'max_restarts': {
474
+ 'type': 'integer',
475
+ },
476
+ },
477
+ }
478
+
479
+
480
+ def get_config_schema():
481
+ # pylint: disable=import-outside-toplevel
482
+ from konduktor.data import registry
483
+ from konduktor.utils import kubernetes_enums
484
+
485
+ cloud_configs = {
486
+ 'kubernetes': {
487
+ 'type': 'object',
488
+ 'required': [],
489
+ 'additionalProperties': False,
490
+ 'properties': {
491
+ 'pod_config': {
492
+ 'type': 'object',
493
+ 'required': [],
494
+ # Allow arbitrary keys since validating pod spec is hard
495
+ 'additionalProperties': True,
496
+ },
497
+ 'custom_metadata': {
498
+ 'type': 'object',
499
+ 'required': [],
500
+ # Allow arbitrary keys since validating metadata is hard
501
+ 'additionalProperties': True,
502
+ # Disallow 'name' and 'namespace' keys in this dict
503
+ 'not': {
504
+ 'anyOf': [{'required': ['name']}, {'required': ['namespace']}]
505
+ },
506
+ },
507
+ 'allowed_contexts': {
508
+ 'type': 'array',
509
+ 'items': {
510
+ 'type': 'string',
511
+ },
512
+ 'maxItems': 1,
513
+ },
514
+ 'provision_timeout': {
515
+ 'type': 'integer',
516
+ },
517
+ 'autoscaler': {
518
+ 'type': 'string',
519
+ 'case_insensitive_enum': [
520
+ type.value for type in kubernetes_enums.KubernetesAutoscalerType
521
+ ],
522
+ },
523
+ },
524
+ },
525
+ }
526
+
527
+ admin_policy_schema = {
528
+ 'type': 'string',
529
+ # Check regex to be a valid python module path
530
+ 'pattern': (r'^[a-zA-Z_][a-zA-Z0-9_]*' r'(\.[a-zA-Z_][a-zA-Z0-9_]*)+$'),
531
+ }
532
+
533
+ allowed_clouds = {
534
+ # A list of cloud names that are allowed to be used
535
+ 'type': 'array',
536
+ 'required': ['items'],
537
+ 'items': {
538
+ 'type': 'string',
539
+ 'case_insensitive_enum': (list(registry._REGISTRY.keys())),
540
+ },
541
+ }
542
+
543
+ logs_configs = {
544
+ 'type': 'object',
545
+ 'required': [],
546
+ 'additionalProperties': False,
547
+ 'properties': {
548
+ 'backend': {
549
+ 'type': 'string',
550
+ 'case_insensitive_enum': ['loki', 'victoria'],
551
+ },
552
+ 'timeout': {
553
+ 'type': 'integer',
554
+ 'minimum': 1,
555
+ },
556
+ },
557
+ }
558
+
559
+ gpu_configs = {
560
+ 'type': 'object',
561
+ 'required': [],
562
+ 'additionalProperties': False,
563
+ 'properties': {
564
+ 'disable_ecc': {
565
+ 'type': 'boolean',
566
+ },
567
+ },
568
+ }
569
+
570
+ tailscale_configs = {
571
+ 'type': 'object',
572
+ 'required': [],
573
+ 'additionalProperties': False,
574
+ 'properties': {
575
+ 'secret_name': {
576
+ 'type': 'string',
577
+ },
578
+ },
579
+ }
580
+
581
+ ssh_configs = {
582
+ 'type': 'object',
583
+ 'required': [],
584
+ 'additionalProperties': False,
585
+ 'properties': {
586
+ 'enable': {
587
+ 'type': 'boolean',
588
+ },
589
+ },
590
+ }
591
+
592
+ serving_configs = {
593
+ 'type': 'object',
594
+ 'required': [],
595
+ 'additionalProperties': False,
596
+ 'properties': {
597
+ 'endpoint': {
598
+ 'type': 'string',
599
+ 'case_insensitive_enum': ['trainy', 'direct'],
600
+ 'default': 'trainy',
601
+ },
602
+ },
603
+ }
604
+
605
+ for cloud, config in cloud_configs.items():
606
+ if cloud == 'kubernetes':
607
+ config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
608
+ else:
609
+ config['properties'].update(_REMOTE_IDENTITY_SCHEMA)
610
+ return {
611
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
612
+ 'type': 'object',
613
+ 'required': [],
614
+ 'additionalProperties': False,
615
+ 'properties': {
616
+ 'admin_policy': admin_policy_schema,
617
+ 'nvidia_gpus': gpu_configs,
618
+ 'allowed_clouds': allowed_clouds,
619
+ 'logs': logs_configs,
620
+ 'tailscale': tailscale_configs,
621
+ 'ssh': ssh_configs,
622
+ 'serving': serving_configs,
623
+ **cloud_configs,
624
+ },
625
+ }