konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +49 -0
- konduktor/adaptors/__init__.py +0 -0
- konduktor/adaptors/aws.py +221 -0
- konduktor/adaptors/common.py +118 -0
- konduktor/adaptors/gcp.py +126 -0
- konduktor/authentication.py +124 -0
- konduktor/backends/__init__.py +6 -0
- konduktor/backends/backend.py +86 -0
- konduktor/backends/constants.py +21 -0
- konduktor/backends/deployment.py +204 -0
- konduktor/backends/deployment_utils.py +1351 -0
- konduktor/backends/jobset.py +225 -0
- konduktor/backends/jobset_utils.py +726 -0
- konduktor/backends/pod_utils.py +501 -0
- konduktor/check.py +184 -0
- konduktor/cli.py +1945 -0
- konduktor/config.py +420 -0
- konduktor/constants.py +36 -0
- konduktor/controller/__init__.py +0 -0
- konduktor/controller/constants.py +56 -0
- konduktor/controller/launch.py +44 -0
- konduktor/controller/node.py +116 -0
- konduktor/controller/parse.py +111 -0
- konduktor/dashboard/README.md +30 -0
- konduktor/dashboard/backend/main.py +169 -0
- konduktor/dashboard/backend/sockets.py +154 -0
- konduktor/dashboard/frontend/.eslintrc.json +3 -0
- konduktor/dashboard/frontend/.gitignore +36 -0
- konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
- konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
- konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
- konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
- konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
- konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
- konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
- konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
- konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
- konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
- konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
- konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
- konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
- konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
- konduktor/dashboard/frontend/app/favicon.ico +0 -0
- konduktor/dashboard/frontend/app/globals.css +120 -0
- konduktor/dashboard/frontend/app/jobs/page.js +10 -0
- konduktor/dashboard/frontend/app/layout.js +22 -0
- konduktor/dashboard/frontend/app/logs/page.js +11 -0
- konduktor/dashboard/frontend/app/page.js +12 -0
- konduktor/dashboard/frontend/jsconfig.json +7 -0
- konduktor/dashboard/frontend/next.config.mjs +4 -0
- konduktor/dashboard/frontend/package-lock.json +6687 -0
- konduktor/dashboard/frontend/package.json +37 -0
- konduktor/dashboard/frontend/postcss.config.mjs +8 -0
- konduktor/dashboard/frontend/server.js +64 -0
- konduktor/dashboard/frontend/tailwind.config.js +17 -0
- konduktor/data/__init__.py +9 -0
- konduktor/data/aws/__init__.py +15 -0
- konduktor/data/aws/s3.py +1138 -0
- konduktor/data/constants.py +7 -0
- konduktor/data/data_utils.py +268 -0
- konduktor/data/gcp/__init__.py +19 -0
- konduktor/data/gcp/constants.py +42 -0
- konduktor/data/gcp/gcs.py +994 -0
- konduktor/data/gcp/utils.py +9 -0
- konduktor/data/registry.py +19 -0
- konduktor/data/storage.py +812 -0
- konduktor/data/storage_utils.py +535 -0
- konduktor/execution.py +447 -0
- konduktor/kube_client.py +237 -0
- konduktor/logging.py +111 -0
- konduktor/manifests/aibrix-setup.yaml +430 -0
- konduktor/manifests/apoxy-setup.yaml +184 -0
- konduktor/manifests/apoxy-setup2.yaml +98 -0
- konduktor/manifests/controller_deployment.yaml +69 -0
- konduktor/manifests/dashboard_deployment.yaml +131 -0
- konduktor/manifests/dmesg_daemonset.yaml +57 -0
- konduktor/manifests/pod_cleanup_controller.yaml +129 -0
- konduktor/resource.py +546 -0
- konduktor/serving.py +153 -0
- konduktor/task.py +949 -0
- konduktor/templates/deployment.yaml.j2 +191 -0
- konduktor/templates/jobset.yaml.j2 +43 -0
- konduktor/templates/pod.yaml.j2 +563 -0
- konduktor/usage/__init__.py +0 -0
- konduktor/usage/constants.py +21 -0
- konduktor/utils/__init__.py +0 -0
- konduktor/utils/accelerator_registry.py +17 -0
- konduktor/utils/annotations.py +62 -0
- konduktor/utils/base64_utils.py +95 -0
- konduktor/utils/common_utils.py +426 -0
- konduktor/utils/constants.py +5 -0
- konduktor/utils/env_options.py +55 -0
- konduktor/utils/exceptions.py +234 -0
- konduktor/utils/kubernetes_enums.py +8 -0
- konduktor/utils/kubernetes_utils.py +763 -0
- konduktor/utils/log_utils.py +467 -0
- konduktor/utils/loki_utils.py +102 -0
- konduktor/utils/rich_utils.py +123 -0
- konduktor/utils/schemas.py +625 -0
- konduktor/utils/subprocess_utils.py +273 -0
- konduktor/utils/ux_utils.py +247 -0
- konduktor/utils/validator.py +461 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
- konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
"""This module contains schemas used to validate objects.
|
|
2
|
+
|
|
3
|
+
Schemas conform to the JSON Schema specification as defined at
|
|
4
|
+
https://json-schema.org/
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import enum
|
|
8
|
+
from typing import Any, Dict, List, Tuple
|
|
9
|
+
|
|
10
|
+
OVERRIDEABLE_CONFIG_KEYS: List[Tuple[str, ...]] = [
|
|
11
|
+
('kubernetes', 'pod_config'),
|
|
12
|
+
('kubernetes', 'provision_timeout'),
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _check_not_both_fields_present(field1: str, field2: str):
|
|
17
|
+
return {
|
|
18
|
+
'oneOf': [
|
|
19
|
+
{'required': [field1], 'not': {'required': [field2]}},
|
|
20
|
+
{'required': [field2], 'not': {'required': [field1]}},
|
|
21
|
+
{'not': {'anyOf': [{'required': [field1]}, {'required': [field2]}]}},
|
|
22
|
+
]
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_single_resources_schema():
|
|
27
|
+
"""Schema for a single resource in a resources list."""
|
|
28
|
+
# To avoid circular imports, only import when needed.
|
|
29
|
+
# pylint: disable=import-outside-toplevel
|
|
30
|
+
return {
|
|
31
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
32
|
+
'type': 'object',
|
|
33
|
+
'required': [],
|
|
34
|
+
'additionalProperties': False,
|
|
35
|
+
'properties': {
|
|
36
|
+
'cpus': {
|
|
37
|
+
'anyOf': [
|
|
38
|
+
{
|
|
39
|
+
'type': 'string',
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
'type': 'number',
|
|
43
|
+
},
|
|
44
|
+
],
|
|
45
|
+
},
|
|
46
|
+
'memory': {
|
|
47
|
+
'anyOf': [
|
|
48
|
+
{
|
|
49
|
+
'type': 'string',
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
'type': 'number',
|
|
53
|
+
},
|
|
54
|
+
],
|
|
55
|
+
},
|
|
56
|
+
'accelerators': {
|
|
57
|
+
'anyOf': [
|
|
58
|
+
{
|
|
59
|
+
'type': 'string',
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
'type': 'object',
|
|
63
|
+
'required': [],
|
|
64
|
+
'maxProperties': 1,
|
|
65
|
+
'additionalProperties': {'type': 'number'},
|
|
66
|
+
},
|
|
67
|
+
]
|
|
68
|
+
},
|
|
69
|
+
'disk_size': {
|
|
70
|
+
'type': 'integer',
|
|
71
|
+
},
|
|
72
|
+
'labels': {'type': 'object', 'additionalProperties': {'type': 'string'}},
|
|
73
|
+
'image_id': {
|
|
74
|
+
'anyOf': [
|
|
75
|
+
{
|
|
76
|
+
'type': 'string',
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
'type': 'object',
|
|
80
|
+
'required': [],
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
'type': 'null',
|
|
84
|
+
},
|
|
85
|
+
]
|
|
86
|
+
},
|
|
87
|
+
'_cluster_config_overrides': {
|
|
88
|
+
'type': 'object',
|
|
89
|
+
},
|
|
90
|
+
'job_config': {'type': 'object'},
|
|
91
|
+
},
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _get_multi_resources_schema():
|
|
96
|
+
multi_resources_schema = {
|
|
97
|
+
k: v
|
|
98
|
+
for k, v in _get_single_resources_schema().items()
|
|
99
|
+
# Validation may fail if $schema is included.
|
|
100
|
+
if k != '$schema'
|
|
101
|
+
}
|
|
102
|
+
return multi_resources_schema
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_resources_schema():
|
|
106
|
+
"""Resource schema in task config."""
|
|
107
|
+
single_resources_schema = _get_single_resources_schema()['properties']
|
|
108
|
+
single_resources_schema.pop('accelerators')
|
|
109
|
+
multi_resources_schema = _get_multi_resources_schema()
|
|
110
|
+
return {
|
|
111
|
+
'$schema': 'http://json-schema.org/draft-07/schema#',
|
|
112
|
+
'type': 'object',
|
|
113
|
+
'required': [],
|
|
114
|
+
'additionalProperties': False,
|
|
115
|
+
'properties': {
|
|
116
|
+
**single_resources_schema,
|
|
117
|
+
# We redefine the 'accelerators' field to allow one line list or
|
|
118
|
+
# a set of accelerators.
|
|
119
|
+
'accelerators': {
|
|
120
|
+
# {'V100:1', 'A100:1'} will be
|
|
121
|
+
# read as a string and converted to dict.
|
|
122
|
+
'anyOf': [
|
|
123
|
+
{
|
|
124
|
+
'type': 'string',
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
'type': 'object',
|
|
128
|
+
'required': [],
|
|
129
|
+
'additionalProperties': {
|
|
130
|
+
'anyOf': [
|
|
131
|
+
{
|
|
132
|
+
'type': 'null',
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
'type': 'number',
|
|
136
|
+
},
|
|
137
|
+
]
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
'type': 'array',
|
|
142
|
+
'items': {
|
|
143
|
+
'type': 'string',
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
]
|
|
147
|
+
},
|
|
148
|
+
'any_of': {
|
|
149
|
+
'type': 'array',
|
|
150
|
+
'items': multi_resources_schema,
|
|
151
|
+
},
|
|
152
|
+
'ordered': {
|
|
153
|
+
'type': 'array',
|
|
154
|
+
'items': multi_resources_schema,
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _filter_schema(schema: dict, keys_to_keep: List[Tuple[str, ...]]) -> dict:
|
|
161
|
+
"""Recursively filter a schema to include only certain keys.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
schema: The original schema dictionary.
|
|
165
|
+
keys_to_keep: List of tuples with the path of keys to retain.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
The filtered schema.
|
|
169
|
+
"""
|
|
170
|
+
# Convert list of tuples to a dictionary for easier access
|
|
171
|
+
paths_dict: Dict[str, Any] = {}
|
|
172
|
+
for path in keys_to_keep:
|
|
173
|
+
current = paths_dict
|
|
174
|
+
for step in path:
|
|
175
|
+
if step not in current:
|
|
176
|
+
current[step] = {}
|
|
177
|
+
current = current[step]
|
|
178
|
+
|
|
179
|
+
def keep_keys(
|
|
180
|
+
current_schema: dict, current_path_dict: dict, new_schema: dict
|
|
181
|
+
) -> dict:
|
|
182
|
+
# Base case: if we reach a leaf in the path_dict, we stop.
|
|
183
|
+
if (
|
|
184
|
+
not current_path_dict
|
|
185
|
+
or not isinstance(current_schema, dict)
|
|
186
|
+
or not current_schema.get('properties')
|
|
187
|
+
):
|
|
188
|
+
return current_schema
|
|
189
|
+
|
|
190
|
+
if 'properties' not in new_schema:
|
|
191
|
+
new_schema = {
|
|
192
|
+
key: current_schema[key]
|
|
193
|
+
for key in current_schema
|
|
194
|
+
# We do not support the handling of `oneOf`, `anyOf`, `allOf`,
|
|
195
|
+
# `required` for now.
|
|
196
|
+
if key not in {'properties', 'oneOf', 'anyOf', 'allOf', 'required'}
|
|
197
|
+
}
|
|
198
|
+
new_schema['properties'] = {}
|
|
199
|
+
for key, sub_schema in current_schema['properties'].items():
|
|
200
|
+
if key in current_path_dict:
|
|
201
|
+
# Recursively keep keys if further path dict exists
|
|
202
|
+
new_schema['properties'][key] = {}
|
|
203
|
+
current_path_value = current_path_dict.pop(key)
|
|
204
|
+
new_schema['properties'][key] = keep_keys(
|
|
205
|
+
sub_schema, current_path_value, new_schema['properties'][key]
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return new_schema
|
|
209
|
+
|
|
210
|
+
# Start the recursive filtering
|
|
211
|
+
new_schema = keep_keys(schema, paths_dict, {})
|
|
212
|
+
assert not paths_dict, f'Unprocessed keys: {paths_dict}'
|
|
213
|
+
return new_schema
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _experimental_task_schema() -> dict:
|
|
217
|
+
config_override_schema = _filter_schema(
|
|
218
|
+
get_config_schema(), OVERRIDEABLE_CONFIG_KEYS
|
|
219
|
+
)
|
|
220
|
+
return {
|
|
221
|
+
'experimental': {
|
|
222
|
+
'type': 'object',
|
|
223
|
+
'required': [],
|
|
224
|
+
'additionalProperties': False,
|
|
225
|
+
'properties': {
|
|
226
|
+
'config_overrides': config_override_schema,
|
|
227
|
+
},
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def get_task_schema():
|
|
233
|
+
return {
|
|
234
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
235
|
+
'type': 'object',
|
|
236
|
+
'required': [],
|
|
237
|
+
'additionalProperties': False,
|
|
238
|
+
'properties': {
|
|
239
|
+
'name': {
|
|
240
|
+
'type': 'string',
|
|
241
|
+
},
|
|
242
|
+
'workdir': {
|
|
243
|
+
'type': 'string',
|
|
244
|
+
},
|
|
245
|
+
'event_callback': {
|
|
246
|
+
'type': 'string',
|
|
247
|
+
},
|
|
248
|
+
'num_nodes': {
|
|
249
|
+
'type': 'integer',
|
|
250
|
+
},
|
|
251
|
+
# resources config is validated separately using RESOURCES_SCHEMA
|
|
252
|
+
'resources': {
|
|
253
|
+
'type': 'object',
|
|
254
|
+
},
|
|
255
|
+
# storage config is validated separately using STORAGE_SCHEMA
|
|
256
|
+
'file_mounts': {
|
|
257
|
+
'type': 'object',
|
|
258
|
+
},
|
|
259
|
+
# service config is validated separately using SERVICE_SCHEMA
|
|
260
|
+
'service': {
|
|
261
|
+
'type': 'object',
|
|
262
|
+
},
|
|
263
|
+
# serving config is validated separately using SERVING_SCHEMA
|
|
264
|
+
'serving': {
|
|
265
|
+
'type': 'object',
|
|
266
|
+
},
|
|
267
|
+
'setup': {
|
|
268
|
+
'type': 'string',
|
|
269
|
+
},
|
|
270
|
+
'run': {
|
|
271
|
+
'type': 'string',
|
|
272
|
+
},
|
|
273
|
+
'envs': {
|
|
274
|
+
'type': 'object',
|
|
275
|
+
'required': [],
|
|
276
|
+
'patternProperties': {
|
|
277
|
+
# Checks env keys are valid env var names.
|
|
278
|
+
'^[a-zA-Z_][a-zA-Z0-9_]*$': {'type': ['string', 'null']}
|
|
279
|
+
},
|
|
280
|
+
'additionalProperties': False,
|
|
281
|
+
},
|
|
282
|
+
# inputs and outputs are experimental
|
|
283
|
+
'inputs': {
|
|
284
|
+
'type': 'object',
|
|
285
|
+
'required': [],
|
|
286
|
+
'maxProperties': 1,
|
|
287
|
+
'additionalProperties': {'type': 'number'},
|
|
288
|
+
},
|
|
289
|
+
'outputs': {
|
|
290
|
+
'type': 'object',
|
|
291
|
+
'required': [],
|
|
292
|
+
'maxProperties': 1,
|
|
293
|
+
'additionalProperties': {'type': 'number'},
|
|
294
|
+
},
|
|
295
|
+
**_experimental_task_schema(),
|
|
296
|
+
},
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def get_cluster_schema():
|
|
301
|
+
return {
|
|
302
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
303
|
+
'type': 'object',
|
|
304
|
+
'required': ['cluster', 'auth'],
|
|
305
|
+
'additionalProperties': False,
|
|
306
|
+
'properties': {
|
|
307
|
+
'cluster': {
|
|
308
|
+
'type': 'object',
|
|
309
|
+
'required': ['ips', 'name'],
|
|
310
|
+
'additionalProperties': False,
|
|
311
|
+
'properties': {
|
|
312
|
+
'ips': {
|
|
313
|
+
'type': 'array',
|
|
314
|
+
'items': {
|
|
315
|
+
'type': 'string',
|
|
316
|
+
},
|
|
317
|
+
},
|
|
318
|
+
'name': {
|
|
319
|
+
'type': 'string',
|
|
320
|
+
},
|
|
321
|
+
},
|
|
322
|
+
},
|
|
323
|
+
'auth': {
|
|
324
|
+
'type': 'object',
|
|
325
|
+
'required': ['ssh_user', 'ssh_private_key'],
|
|
326
|
+
'additionalProperties': False,
|
|
327
|
+
'properties': {
|
|
328
|
+
'ssh_user': {
|
|
329
|
+
'type': 'string',
|
|
330
|
+
},
|
|
331
|
+
'ssh_private_key': {
|
|
332
|
+
'type': 'string',
|
|
333
|
+
},
|
|
334
|
+
},
|
|
335
|
+
},
|
|
336
|
+
'python': {
|
|
337
|
+
'type': 'string',
|
|
338
|
+
},
|
|
339
|
+
},
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class RemoteIdentityOptions(enum.Enum):
|
|
344
|
+
"""Enum for remote identity types.
|
|
345
|
+
|
|
346
|
+
Some clouds (e.g., AWS, Kubernetes) also allow string values for remote
|
|
347
|
+
identity, which map to the service account/role to use. Those are not
|
|
348
|
+
included in this enum.
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
LOCAL_CREDENTIALS = 'LOCAL_CREDENTIALS'
|
|
352
|
+
SERVICE_ACCOUNT = 'SERVICE_ACCOUNT'
|
|
353
|
+
NO_UPLOAD = 'NO_UPLOAD'
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def get_default_remote_identity(cloud: str) -> str:
|
|
357
|
+
"""Get the default remote identity for the specified cloud."""
|
|
358
|
+
if cloud == 'kubernetes':
|
|
359
|
+
return RemoteIdentityOptions.SERVICE_ACCOUNT.value
|
|
360
|
+
return RemoteIdentityOptions.LOCAL_CREDENTIALS.value
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
_REMOTE_IDENTITY_SCHEMA = {
|
|
364
|
+
'remote_identity': {
|
|
365
|
+
'type': 'string',
|
|
366
|
+
'case_insensitive_enum': [option.value for option in RemoteIdentityOptions],
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
_REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
|
|
371
|
+
'remote_identity': {
|
|
372
|
+
'anyOf': [
|
|
373
|
+
{'type': 'string'},
|
|
374
|
+
{'type': 'object', 'additionalProperties': {'type': 'string'}},
|
|
375
|
+
]
|
|
376
|
+
},
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def get_serving_schema():
|
|
381
|
+
return {
|
|
382
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
383
|
+
'type': 'object',
|
|
384
|
+
'anyOf': [
|
|
385
|
+
{'required': ['min_replicas']},
|
|
386
|
+
{'required': ['max_replicas']},
|
|
387
|
+
],
|
|
388
|
+
'additionalProperties': False,
|
|
389
|
+
'properties': {
|
|
390
|
+
'min_replicas': {
|
|
391
|
+
'type': 'integer',
|
|
392
|
+
'minimum': 0,
|
|
393
|
+
'description': 'Minimum number of replicas for autoscaling.',
|
|
394
|
+
},
|
|
395
|
+
'max_replicas': {
|
|
396
|
+
'type': 'integer',
|
|
397
|
+
'minimum': 1,
|
|
398
|
+
'description': 'Maximum number of replicas for autoscaling.',
|
|
399
|
+
},
|
|
400
|
+
'ports': {
|
|
401
|
+
# this could easily be an integer, but I made it
|
|
402
|
+
# more vague on purpose so I can use a float to test
|
|
403
|
+
# the json schema validator later down the line
|
|
404
|
+
'type': 'number',
|
|
405
|
+
'minimum': 1,
|
|
406
|
+
'description': 'The containerPort and service port '
|
|
407
|
+
'used by the model server.',
|
|
408
|
+
},
|
|
409
|
+
'probe': {
|
|
410
|
+
'type': 'string',
|
|
411
|
+
'description': 'The livenessProbe, readinessProbe, and startupProbe '
|
|
412
|
+
'path used by the model server.',
|
|
413
|
+
},
|
|
414
|
+
},
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
def get_storage_schema():
|
|
419
|
+
# pylint: disable=import-outside-toplevel
|
|
420
|
+
from konduktor.data import storage
|
|
421
|
+
from konduktor.registry import registry
|
|
422
|
+
|
|
423
|
+
return {
|
|
424
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
425
|
+
'type': 'object',
|
|
426
|
+
'required': [],
|
|
427
|
+
'additionalProperties': False,
|
|
428
|
+
'properties': {
|
|
429
|
+
'name': {
|
|
430
|
+
'type': 'string',
|
|
431
|
+
},
|
|
432
|
+
'source': {
|
|
433
|
+
'anyOf': [
|
|
434
|
+
{
|
|
435
|
+
'type': 'string',
|
|
436
|
+
},
|
|
437
|
+
{'type': 'array', 'minItems': 1, 'items': {'type': 'string'}},
|
|
438
|
+
]
|
|
439
|
+
},
|
|
440
|
+
'store': {
|
|
441
|
+
'type': 'string',
|
|
442
|
+
'case_insensitive_enum': [type for type in registry._REGISTRY],
|
|
443
|
+
},
|
|
444
|
+
'persistent': {
|
|
445
|
+
'type': 'boolean',
|
|
446
|
+
},
|
|
447
|
+
'mode': {
|
|
448
|
+
'type': 'string',
|
|
449
|
+
'case_insensitive_enum': [mode.value for mode in storage.StorageMode],
|
|
450
|
+
},
|
|
451
|
+
'_bucket_sub_path': {
|
|
452
|
+
'type': 'string',
|
|
453
|
+
},
|
|
454
|
+
'_force_delete': {
|
|
455
|
+
'type': 'boolean',
|
|
456
|
+
},
|
|
457
|
+
},
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def get_job_schema():
|
|
462
|
+
"""Schema for a job spec, which is defined under resources."""
|
|
463
|
+
return {
|
|
464
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
465
|
+
'type': 'object',
|
|
466
|
+
'required': [],
|
|
467
|
+
'additionalProperties': False,
|
|
468
|
+
'properties': {
|
|
469
|
+
'completions': {
|
|
470
|
+
'type': 'integer',
|
|
471
|
+
'minimum': 1,
|
|
472
|
+
},
|
|
473
|
+
'max_restarts': {
|
|
474
|
+
'type': 'integer',
|
|
475
|
+
},
|
|
476
|
+
},
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def get_config_schema():
|
|
481
|
+
# pylint: disable=import-outside-toplevel
|
|
482
|
+
from konduktor.data import registry
|
|
483
|
+
from konduktor.utils import kubernetes_enums
|
|
484
|
+
|
|
485
|
+
cloud_configs = {
|
|
486
|
+
'kubernetes': {
|
|
487
|
+
'type': 'object',
|
|
488
|
+
'required': [],
|
|
489
|
+
'additionalProperties': False,
|
|
490
|
+
'properties': {
|
|
491
|
+
'pod_config': {
|
|
492
|
+
'type': 'object',
|
|
493
|
+
'required': [],
|
|
494
|
+
# Allow arbitrary keys since validating pod spec is hard
|
|
495
|
+
'additionalProperties': True,
|
|
496
|
+
},
|
|
497
|
+
'custom_metadata': {
|
|
498
|
+
'type': 'object',
|
|
499
|
+
'required': [],
|
|
500
|
+
# Allow arbitrary keys since validating metadata is hard
|
|
501
|
+
'additionalProperties': True,
|
|
502
|
+
# Disallow 'name' and 'namespace' keys in this dict
|
|
503
|
+
'not': {
|
|
504
|
+
'anyOf': [{'required': ['name']}, {'required': ['namespace']}]
|
|
505
|
+
},
|
|
506
|
+
},
|
|
507
|
+
'allowed_contexts': {
|
|
508
|
+
'type': 'array',
|
|
509
|
+
'items': {
|
|
510
|
+
'type': 'string',
|
|
511
|
+
},
|
|
512
|
+
'maxItems': 1,
|
|
513
|
+
},
|
|
514
|
+
'provision_timeout': {
|
|
515
|
+
'type': 'integer',
|
|
516
|
+
},
|
|
517
|
+
'autoscaler': {
|
|
518
|
+
'type': 'string',
|
|
519
|
+
'case_insensitive_enum': [
|
|
520
|
+
type.value for type in kubernetes_enums.KubernetesAutoscalerType
|
|
521
|
+
],
|
|
522
|
+
},
|
|
523
|
+
},
|
|
524
|
+
},
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
admin_policy_schema = {
|
|
528
|
+
'type': 'string',
|
|
529
|
+
# Check regex to be a valid python module path
|
|
530
|
+
'pattern': (r'^[a-zA-Z_][a-zA-Z0-9_]*' r'(\.[a-zA-Z_][a-zA-Z0-9_]*)+$'),
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
allowed_clouds = {
|
|
534
|
+
# A list of cloud names that are allowed to be used
|
|
535
|
+
'type': 'array',
|
|
536
|
+
'required': ['items'],
|
|
537
|
+
'items': {
|
|
538
|
+
'type': 'string',
|
|
539
|
+
'case_insensitive_enum': (list(registry._REGISTRY.keys())),
|
|
540
|
+
},
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
logs_configs = {
|
|
544
|
+
'type': 'object',
|
|
545
|
+
'required': [],
|
|
546
|
+
'additionalProperties': False,
|
|
547
|
+
'properties': {
|
|
548
|
+
'backend': {
|
|
549
|
+
'type': 'string',
|
|
550
|
+
'case_insensitive_enum': ['loki', 'victoria'],
|
|
551
|
+
},
|
|
552
|
+
'timeout': {
|
|
553
|
+
'type': 'integer',
|
|
554
|
+
'minimum': 1,
|
|
555
|
+
},
|
|
556
|
+
},
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
gpu_configs = {
|
|
560
|
+
'type': 'object',
|
|
561
|
+
'required': [],
|
|
562
|
+
'additionalProperties': False,
|
|
563
|
+
'properties': {
|
|
564
|
+
'disable_ecc': {
|
|
565
|
+
'type': 'boolean',
|
|
566
|
+
},
|
|
567
|
+
},
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
tailscale_configs = {
|
|
571
|
+
'type': 'object',
|
|
572
|
+
'required': [],
|
|
573
|
+
'additionalProperties': False,
|
|
574
|
+
'properties': {
|
|
575
|
+
'secret_name': {
|
|
576
|
+
'type': 'string',
|
|
577
|
+
},
|
|
578
|
+
},
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
ssh_configs = {
|
|
582
|
+
'type': 'object',
|
|
583
|
+
'required': [],
|
|
584
|
+
'additionalProperties': False,
|
|
585
|
+
'properties': {
|
|
586
|
+
'enable': {
|
|
587
|
+
'type': 'boolean',
|
|
588
|
+
},
|
|
589
|
+
},
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
serving_configs = {
|
|
593
|
+
'type': 'object',
|
|
594
|
+
'required': [],
|
|
595
|
+
'additionalProperties': False,
|
|
596
|
+
'properties': {
|
|
597
|
+
'endpoint': {
|
|
598
|
+
'type': 'string',
|
|
599
|
+
'case_insensitive_enum': ['trainy', 'direct'],
|
|
600
|
+
'default': 'trainy',
|
|
601
|
+
},
|
|
602
|
+
},
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
for cloud, config in cloud_configs.items():
|
|
606
|
+
if cloud == 'kubernetes':
|
|
607
|
+
config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
|
|
608
|
+
else:
|
|
609
|
+
config['properties'].update(_REMOTE_IDENTITY_SCHEMA)
|
|
610
|
+
return {
|
|
611
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
612
|
+
'type': 'object',
|
|
613
|
+
'required': [],
|
|
614
|
+
'additionalProperties': False,
|
|
615
|
+
'properties': {
|
|
616
|
+
'admin_policy': admin_policy_schema,
|
|
617
|
+
'nvidia_gpus': gpu_configs,
|
|
618
|
+
'allowed_clouds': allowed_clouds,
|
|
619
|
+
'logs': logs_configs,
|
|
620
|
+
'tailscale': tailscale_configs,
|
|
621
|
+
'ssh': ssh_configs,
|
|
622
|
+
'serving': serving_configs,
|
|
623
|
+
**cloud_configs,
|
|
624
|
+
},
|
|
625
|
+
}
|