konduktor-nightly 0.1.0.dev20250805105421__py3-none-any.whl → 0.1.0.dev20250807105334__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of konduktor-nightly might be problematic. Click here for more details.
- konduktor/__init__.py +4 -7
- konduktor/backends/__init__.py +2 -4
- konduktor/backends/constants.py +12 -0
- konduktor/backends/deployment.py +179 -0
- konduktor/backends/deployment_utils.py +835 -0
- konduktor/backends/jobset.py +2 -2
- konduktor/backends/jobset_utils.py +16 -266
- konduktor/backends/pod_utils.py +392 -0
- konduktor/cli.py +343 -8
- konduktor/controller/launch.py +1 -1
- konduktor/execution.py +5 -2
- konduktor/kube_client.py +8 -0
- konduktor/resource.py +20 -0
- konduktor/serving.py +149 -0
- konduktor/task.py +61 -0
- konduktor/templates/deployment.yaml.j2 +142 -0
- konduktor/templates/pod.yaml.j2 +36 -0
- konduktor/utils/accelerator_registry.py +1 -1
- konduktor/utils/log_utils.py +1 -1
- konduktor/utils/schemas.py +42 -0
- konduktor/utils/validator.py +51 -16
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/RECORD +26 -21
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/entry_points.txt +0 -0
konduktor/task.py
CHANGED
|
@@ -23,6 +23,7 @@ import yaml
|
|
|
23
23
|
|
|
24
24
|
if typing.TYPE_CHECKING:
|
|
25
25
|
import konduktor.resource as resources_lib
|
|
26
|
+
import konduktor.serving as servings_lib
|
|
26
27
|
|
|
27
28
|
import konduktor
|
|
28
29
|
from konduktor import constants, logging
|
|
@@ -193,6 +194,7 @@ class Task:
|
|
|
193
194
|
self.num_nodes = num_nodes # type: ignore
|
|
194
195
|
# Default to CPU VM
|
|
195
196
|
self.resources: Optional[konduktor.Resources] = None
|
|
197
|
+
self.serving: Optional[konduktor.Serving] = None
|
|
196
198
|
|
|
197
199
|
self.file_mounts: Optional[Dict[str, str]] = None
|
|
198
200
|
self.best_resources = None # (asaiacai): this is unused consider removing
|
|
@@ -386,6 +388,14 @@ class Task:
|
|
|
386
388
|
)
|
|
387
389
|
resources_config['_cluster_config_overrides'] = cluster_config_override
|
|
388
390
|
task.set_resources(konduktor.Resources.from_yaml_config(resources_config))
|
|
391
|
+
|
|
392
|
+
# Parse serving field.
|
|
393
|
+
serving_config = config.pop('serving', None)
|
|
394
|
+
if serving_config is not None:
|
|
395
|
+
serving = konduktor.Serving.from_yaml_config(serving_config, task.run)
|
|
396
|
+
if serving is not None:
|
|
397
|
+
task.set_serving(serving)
|
|
398
|
+
|
|
389
399
|
assert not config, f'Invalid task args: {config.keys()}'
|
|
390
400
|
return task
|
|
391
401
|
|
|
@@ -526,6 +536,52 @@ class Task:
|
|
|
526
536
|
self.set_resources(new_resources)
|
|
527
537
|
return self
|
|
528
538
|
|
|
539
|
+
def set_serving(
|
|
540
|
+
self,
|
|
541
|
+
serving: 'servings_lib.Serving',
|
|
542
|
+
) -> 'Task':
|
|
543
|
+
"""Sets the serving configuration for this task.
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
serving: konduktor.Serving object
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
self: The current task, with serving set.
|
|
550
|
+
"""
|
|
551
|
+
if self._num_nodes and self._num_nodes != 1:
|
|
552
|
+
with ux_utils.print_exception_no_traceback():
|
|
553
|
+
raise ValueError(
|
|
554
|
+
f'Only single node serving is supported (num_nodes: 1). '
|
|
555
|
+
f'Got: {self.num_nodes}'
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
if serving.max_replicas < serving.min_replicas:
|
|
559
|
+
with ux_utils.print_exception_no_traceback():
|
|
560
|
+
raise ValueError(
|
|
561
|
+
f'max_replicas ({serving.max_replicas}) cannot be '
|
|
562
|
+
f'less than min_replicas ({serving.min_replicas})'
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
if isinstance(serving, konduktor.Serving):
|
|
566
|
+
serving = serving
|
|
567
|
+
self.serving = serving
|
|
568
|
+
return self
|
|
569
|
+
|
|
570
|
+
def set_serving_override(self, override_params: Dict[str, Any]) -> 'Task':
|
|
571
|
+
"""Sets the override parameters for the serving config."""
|
|
572
|
+
assert self.serving is not None, 'Serving config is required'
|
|
573
|
+
new_serving = konduktor.Serving(
|
|
574
|
+
min_replicas=override_params.get('min_replicas', self.serving.min_replicas),
|
|
575
|
+
max_replicas=override_params.get('max_replicas', self.serving.max_replicas),
|
|
576
|
+
ports=override_params.get('ports', self.serving.ports),
|
|
577
|
+
probe=override_params.get('probe', self.serving.probe),
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
self.num_nodes = override_params.get('num_nodes', self.num_nodes)
|
|
581
|
+
|
|
582
|
+
self.set_serving(new_serving)
|
|
583
|
+
return self
|
|
584
|
+
|
|
529
585
|
def set_file_mounts(self, file_mounts: Optional[Dict[str, str]]) -> 'Task':
|
|
530
586
|
"""Sets the file mounts for this task.
|
|
531
587
|
|
|
@@ -870,6 +926,11 @@ class Task:
|
|
|
870
926
|
if self.file_mounts is not None:
|
|
871
927
|
config['file_mounts'].update(self.file_mounts)
|
|
872
928
|
|
|
929
|
+
add_if_not_none('serving', {})
|
|
930
|
+
|
|
931
|
+
if self.serving is not None:
|
|
932
|
+
add_if_not_none('serving', self.serving.to_yaml_config())
|
|
933
|
+
|
|
873
934
|
if self.storage_mounts is not None:
|
|
874
935
|
config['file_mounts'].update(
|
|
875
936
|
{
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
apiVersion: apps/v1
|
|
2
|
+
kind: Deployment
|
|
3
|
+
metadata:
|
|
4
|
+
labels:
|
|
5
|
+
{% if not general %}
|
|
6
|
+
{{ model_name_label }}: {{ name }}
|
|
7
|
+
model.aibrix.ai/port: "{{ ports }}"
|
|
8
|
+
{% endif %}
|
|
9
|
+
{{ deployment_name_label }}: "{{ name }}"
|
|
10
|
+
{{ deployment_user_label }}: "{{ user }}"
|
|
11
|
+
{{ deployment_accelerator_label }}: "{{ accelerator_type }}"
|
|
12
|
+
{{ deployment_num_accelerators_label }}: "{{ num_accelerators }}"
|
|
13
|
+
trainy.ai/has-autoscaler: "{{ autoscaler }}"
|
|
14
|
+
trainy.ai/konduktor-managed: "true"
|
|
15
|
+
name: {{ name }}
|
|
16
|
+
namespace: default
|
|
17
|
+
spec:
|
|
18
|
+
replicas: {{ min_replicas }}
|
|
19
|
+
selector:
|
|
20
|
+
matchLabels:
|
|
21
|
+
{% if not general %}
|
|
22
|
+
{{ model_name_label }}: {{ name }}
|
|
23
|
+
{% endif %}
|
|
24
|
+
{{ deployment_name_label }}: "{{ name }}"
|
|
25
|
+
template: {}
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
apiVersion: v1
|
|
30
|
+
kind: Service
|
|
31
|
+
metadata:
|
|
32
|
+
labels:
|
|
33
|
+
{% if not general %}
|
|
34
|
+
{{ model_name_label }}: {{ name }}
|
|
35
|
+
{% endif %}
|
|
36
|
+
prometheus-discovery: "true"
|
|
37
|
+
{{ deployment_name_label }}: "{{ name }}"
|
|
38
|
+
{{ deployment_user_label }}: "{{ user }}"
|
|
39
|
+
trainy.ai/has-autoscaler: "{{ autoscaler }}"
|
|
40
|
+
{% if not general %}
|
|
41
|
+
annotations:
|
|
42
|
+
prometheus.io/scrape: "true"
|
|
43
|
+
prometheus.io/port: "8080"
|
|
44
|
+
{% endif %}
|
|
45
|
+
name: {{ name }}
|
|
46
|
+
namespace: default
|
|
47
|
+
spec:
|
|
48
|
+
ports:
|
|
49
|
+
- name: serve
|
|
50
|
+
port: {{ ports }}
|
|
51
|
+
protocol: TCP
|
|
52
|
+
targetPort: {{ ports }}
|
|
53
|
+
{% if not general %}
|
|
54
|
+
- name: http
|
|
55
|
+
port: 8080
|
|
56
|
+
protocol: TCP
|
|
57
|
+
targetPort: 8080
|
|
58
|
+
{% endif %}
|
|
59
|
+
selector:
|
|
60
|
+
{% if not general %}
|
|
61
|
+
{{ model_name_label }}: {{ name }}
|
|
62
|
+
{% endif %}
|
|
63
|
+
{{ deployment_name_label }}: "{{ name }}"
|
|
64
|
+
{% if general %}
|
|
65
|
+
type: LoadBalancer
|
|
66
|
+
{% else %}
|
|
67
|
+
type: ClusterIP
|
|
68
|
+
{% endif %}
|
|
69
|
+
|
|
70
|
+
{% if not general %}
|
|
71
|
+
---
|
|
72
|
+
apiVersion: autoscaling.aibrix.ai/v1alpha1
|
|
73
|
+
kind: PodAutoscaler
|
|
74
|
+
metadata:
|
|
75
|
+
name: {{ name }}-apa
|
|
76
|
+
namespace: default
|
|
77
|
+
labels:
|
|
78
|
+
{{ model_name_label }}: {{ name }}
|
|
79
|
+
app.kubernetes.io/name: aibrix
|
|
80
|
+
app.kubernetes.io/managed-by: kustomize
|
|
81
|
+
{{ deployment_name_label }}: "{{ name }}"
|
|
82
|
+
{{ deployment_user_label }}: "{{ user }}"
|
|
83
|
+
annotations:
|
|
84
|
+
autoscaling.aibrix.ai/up-fluctuation-tolerance: '0.1'
|
|
85
|
+
autoscaling.aibrix.ai/down-fluctuation-tolerance: '0.2'
|
|
86
|
+
apa.autoscaling.aibrix.ai/window: 30s
|
|
87
|
+
spec:
|
|
88
|
+
scalingStrategy: APA
|
|
89
|
+
minReplicas: {{ min_replicas }}
|
|
90
|
+
maxReplicas: {{ max_replicas }}
|
|
91
|
+
metricsSources:
|
|
92
|
+
- metricSourceType: pod
|
|
93
|
+
protocolType: http
|
|
94
|
+
port: "{{ ports }}"
|
|
95
|
+
path: metrics
|
|
96
|
+
targetMetric: gpu_cache_usage_perc
|
|
97
|
+
targetValue: '0.5'
|
|
98
|
+
scaleTargetRef:
|
|
99
|
+
apiVersion: apps/v1
|
|
100
|
+
kind: Deployment
|
|
101
|
+
name: {{ name }}
|
|
102
|
+
{% endif %}
|
|
103
|
+
|
|
104
|
+
{% if general %}
|
|
105
|
+
---
|
|
106
|
+
apiVersion: autoscaling/v2
|
|
107
|
+
kind: HorizontalPodAutoscaler
|
|
108
|
+
metadata:
|
|
109
|
+
name: {{ name }}-hpa
|
|
110
|
+
namespace: default
|
|
111
|
+
labels:
|
|
112
|
+
{{ deployment_name_label }}: "{{ name }}"
|
|
113
|
+
{{ deployment_user_label }}: "{{ user }}"
|
|
114
|
+
trainy.ai/has-autoscaler: "{{ autoscaler }}"
|
|
115
|
+
spec:
|
|
116
|
+
scaleTargetRef:
|
|
117
|
+
apiVersion: apps/v1
|
|
118
|
+
kind: Deployment
|
|
119
|
+
name: {{ name }}
|
|
120
|
+
minReplicas: {{ min_replicas }}
|
|
121
|
+
maxReplicas: {{ max_replicas }}
|
|
122
|
+
metrics:
|
|
123
|
+
- type: Resource
|
|
124
|
+
resource:
|
|
125
|
+
name: cpu
|
|
126
|
+
target:
|
|
127
|
+
type: Utilization
|
|
128
|
+
averageUtilization: 50
|
|
129
|
+
behavior:
|
|
130
|
+
scaleDown:
|
|
131
|
+
stabilizationWindowSeconds: 60
|
|
132
|
+
policies:
|
|
133
|
+
- type: Percent
|
|
134
|
+
value: 100
|
|
135
|
+
periodSeconds: 15
|
|
136
|
+
scaleUp:
|
|
137
|
+
stabilizationWindowSeconds: 20
|
|
138
|
+
policies:
|
|
139
|
+
- type: Percent
|
|
140
|
+
value: 100
|
|
141
|
+
periodSeconds: 15
|
|
142
|
+
{% endif %}
|
konduktor/templates/pod.yaml.j2
CHANGED
|
@@ -23,6 +23,42 @@ kubernetes:
|
|
|
23
23
|
- name: ssh
|
|
24
24
|
containerPort: {{ konduktor_ssh_port }}
|
|
25
25
|
{% endif %}
|
|
26
|
+
{% if serving %}
|
|
27
|
+
ports:
|
|
28
|
+
- containerPort: {{ ports }}
|
|
29
|
+
# TODO (ryan): allow modification of thresholds and timings
|
|
30
|
+
{% if probe %}
|
|
31
|
+
livenessProbe:
|
|
32
|
+
httpGet:
|
|
33
|
+
path: {{ probe }}
|
|
34
|
+
port: {{ ports }}
|
|
35
|
+
scheme: HTTP
|
|
36
|
+
initialDelaySeconds: 60
|
|
37
|
+
failureThreshold: 3
|
|
38
|
+
periodSeconds: 10
|
|
39
|
+
successThreshold: 1
|
|
40
|
+
timeoutSeconds: 1
|
|
41
|
+
readinessProbe:
|
|
42
|
+
httpGet:
|
|
43
|
+
path: {{ probe }}
|
|
44
|
+
port: {{ ports }}
|
|
45
|
+
scheme: HTTP
|
|
46
|
+
initialDelaySeconds: 60
|
|
47
|
+
failureThreshold: 10
|
|
48
|
+
periodSeconds: 5
|
|
49
|
+
successThreshold: 1
|
|
50
|
+
timeoutSeconds: 1
|
|
51
|
+
startupProbe:
|
|
52
|
+
httpGet:
|
|
53
|
+
path: {{ probe }}
|
|
54
|
+
port: {{ ports }}
|
|
55
|
+
scheme: HTTP
|
|
56
|
+
failureThreshold: 60
|
|
57
|
+
periodSeconds: 30
|
|
58
|
+
successThreshold: 1
|
|
59
|
+
timeoutSeconds: 1
|
|
60
|
+
{% endif %}
|
|
61
|
+
{% endif %}
|
|
26
62
|
image: {{ image_id }}
|
|
27
63
|
# this is set during jobset definition since we need to know the jobset
|
|
28
64
|
# name and number of nodes to set all the environment variables correctly here
|
konduktor/utils/log_utils.py
CHANGED
|
@@ -374,7 +374,7 @@ def tail_vicky_logs(
|
|
|
374
374
|
logger.debug(f'Making request to {vicky_url} with query: {query}')
|
|
375
375
|
with requests.post(
|
|
376
376
|
vicky_url, data=query, stream=True, timeout=timeout
|
|
377
|
-
) as response:
|
|
377
|
+
) as response: # type: requests.Response
|
|
378
378
|
logger.debug(f'Response status: {response.status_code}')
|
|
379
379
|
if response.status_code != 200:
|
|
380
380
|
logger.error(
|
konduktor/utils/schemas.py
CHANGED
|
@@ -260,6 +260,10 @@ def get_task_schema():
|
|
|
260
260
|
'service': {
|
|
261
261
|
'type': 'object',
|
|
262
262
|
},
|
|
263
|
+
# serving config is validated separately using SERVING_SCHEMA
|
|
264
|
+
'serving': {
|
|
265
|
+
'type': 'object',
|
|
266
|
+
},
|
|
263
267
|
'setup': {
|
|
264
268
|
'type': 'string',
|
|
265
269
|
},
|
|
@@ -373,6 +377,44 @@ _REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
|
|
|
373
377
|
}
|
|
374
378
|
|
|
375
379
|
|
|
380
|
+
def get_serving_schema():
|
|
381
|
+
return {
|
|
382
|
+
'$schema': 'https://json-schema.org/draft/2020-12/schema',
|
|
383
|
+
'type': 'object',
|
|
384
|
+
'anyOf': [
|
|
385
|
+
{'required': ['min_replicas']},
|
|
386
|
+
{'required': ['max_replicas']},
|
|
387
|
+
],
|
|
388
|
+
'additionalProperties': False,
|
|
389
|
+
'properties': {
|
|
390
|
+
'min_replicas': {
|
|
391
|
+
'type': 'integer',
|
|
392
|
+
'minimum': 1,
|
|
393
|
+
'description': 'Minimum number of replicas for autoscaling.',
|
|
394
|
+
},
|
|
395
|
+
'max_replicas': {
|
|
396
|
+
'type': 'integer',
|
|
397
|
+
'minimum': 1,
|
|
398
|
+
'description': 'Maximum number of replicas for autoscaling.',
|
|
399
|
+
},
|
|
400
|
+
'ports': {
|
|
401
|
+
# this could easily be an integer, but I made it
|
|
402
|
+
# more vague on purpose so I can use a float to test
|
|
403
|
+
# the json schema validator later down the line
|
|
404
|
+
'type': 'number',
|
|
405
|
+
'minimum': 1,
|
|
406
|
+
'description': 'The containerPort and service port '
|
|
407
|
+
'used by the model server.',
|
|
408
|
+
},
|
|
409
|
+
'probe': {
|
|
410
|
+
'type': 'string',
|
|
411
|
+
'description': 'The livenessProbe, readinessProbe, and startupProbe '
|
|
412
|
+
'path used by the model server.',
|
|
413
|
+
},
|
|
414
|
+
},
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
|
|
376
418
|
def get_storage_schema():
|
|
377
419
|
# pylint: disable=import-outside-toplevel
|
|
378
420
|
from konduktor.data import storage
|
konduktor/utils/validator.py
CHANGED
|
@@ -17,11 +17,18 @@ from filelock import FileLock
|
|
|
17
17
|
from konduktor import logging
|
|
18
18
|
|
|
19
19
|
SCHEMA_VERSION = 'v1.32.0-standalone-strict'
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
SCHEMA_LOCK_PATH = SCHEMA_CACHE_PATH.with_suffix('.lock')
|
|
20
|
+
SCHEMA_CACHE_PATH = Path.home() / '.konduktor/schemas'
|
|
21
|
+
SCHEMA_LOCK_PATH = SCHEMA_CACHE_PATH / '.lock'
|
|
23
22
|
CACHE_MAX_AGE_SECONDS = 86400 # 24 hours
|
|
24
23
|
|
|
24
|
+
# Schema URLs for different Kubernetes resources
|
|
25
|
+
SCHEMA_URLS = {
|
|
26
|
+
'podspec': f'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{SCHEMA_VERSION}/podspec.json',
|
|
27
|
+
'deployment': f'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{SCHEMA_VERSION}/deployment.json',
|
|
28
|
+
'service': f'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{SCHEMA_VERSION}/service.json',
|
|
29
|
+
'horizontalpodautoscaler': f'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{SCHEMA_VERSION}/horizontalpodautoscaler-autoscaling-v2.json',
|
|
30
|
+
}
|
|
31
|
+
|
|
25
32
|
logger = logging.get_logger(__name__)
|
|
26
33
|
|
|
27
34
|
|
|
@@ -37,33 +44,41 @@ SchemaValidator = jsonschema.validators.extend(
|
|
|
37
44
|
)
|
|
38
45
|
|
|
39
46
|
|
|
40
|
-
def get_cached_schema() -> dict:
|
|
47
|
+
def get_cached_schema(schema_type: str) -> dict:
|
|
48
|
+
"""Get cached schema for a specific Kubernetes resource type."""
|
|
49
|
+
schema_url = SCHEMA_URLS.get(schema_type)
|
|
50
|
+
if not schema_url:
|
|
51
|
+
raise ValueError(f'Unknown schema type: {schema_type}')
|
|
52
|
+
|
|
53
|
+
schema_file = SCHEMA_CACHE_PATH / f'{schema_type}.json'
|
|
41
54
|
lock = FileLock(str(SCHEMA_LOCK_PATH))
|
|
55
|
+
|
|
42
56
|
with lock:
|
|
43
57
|
# Check if schema file exists and is fresh
|
|
44
|
-
if
|
|
45
|
-
age = time.time() -
|
|
46
|
-
# if
|
|
58
|
+
if schema_file.exists():
|
|
59
|
+
age = time.time() - schema_file.stat().st_mtime
|
|
60
|
+
# if fresh
|
|
47
61
|
if age < CACHE_MAX_AGE_SECONDS:
|
|
48
|
-
with open(
|
|
62
|
+
with open(schema_file, 'r') as f:
|
|
49
63
|
return json.load(f)
|
|
50
64
|
|
|
51
65
|
# Download schema
|
|
52
|
-
resp = requests.get(
|
|
66
|
+
resp = requests.get(schema_url)
|
|
53
67
|
resp.raise_for_status()
|
|
54
68
|
|
|
55
|
-
SCHEMA_CACHE_PATH.
|
|
56
|
-
with open(
|
|
69
|
+
SCHEMA_CACHE_PATH.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
with open(schema_file, 'w') as f:
|
|
57
71
|
f.write(resp.text)
|
|
58
72
|
|
|
59
73
|
return resp.json()
|
|
60
74
|
|
|
61
75
|
|
|
62
|
-
def
|
|
63
|
-
|
|
76
|
+
def _validate_k8s_spec(spec: dict, schema_type: str, resource_name: str) -> None:
|
|
77
|
+
"""Generic validation function for Kubernetes specs."""
|
|
78
|
+
schema = get_cached_schema(schema_type)
|
|
64
79
|
|
|
65
80
|
validator = jsonschema.Draft7Validator(schema)
|
|
66
|
-
errors = sorted(validator.iter_errors(
|
|
81
|
+
errors = sorted(validator.iter_errors(spec), key=lambda e: e.path)
|
|
67
82
|
|
|
68
83
|
if not errors:
|
|
69
84
|
return
|
|
@@ -75,7 +90,7 @@ def validate_pod_spec(pod_spec: dict) -> None:
|
|
|
75
90
|
]
|
|
76
91
|
|
|
77
92
|
# Clean log
|
|
78
|
-
logger.debug('Invalid k8s
|
|
93
|
+
logger.debug('Invalid k8s %s spec/config:\n%s', resource_name, '\n'.join(formatted))
|
|
79
94
|
|
|
80
95
|
# Color only in CLI
|
|
81
96
|
formatted_colored = [
|
|
@@ -86,6 +101,26 @@ def validate_pod_spec(pod_spec: dict) -> None:
|
|
|
86
101
|
]
|
|
87
102
|
|
|
88
103
|
raise ValueError(
|
|
89
|
-
f'\n{Fore.RED}Invalid k8s
|
|
104
|
+
f'\n{Fore.RED}Invalid k8s {resource_name} spec/config: {Style.RESET_ALL}\n'
|
|
90
105
|
+ '\n'.join(formatted_colored)
|
|
91
106
|
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def validate_pod_spec(pod_spec: dict) -> None:
|
|
110
|
+
"""Validate a Kubernetes pod spec."""
|
|
111
|
+
_validate_k8s_spec(pod_spec, 'podspec', 'pod')
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def validate_deployment_spec(deployment_spec: dict) -> None:
|
|
115
|
+
"""Validate a Kubernetes deployment spec."""
|
|
116
|
+
_validate_k8s_spec(deployment_spec, 'deployment', 'deployment')
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def validate_service_spec(service_spec: dict) -> None:
|
|
120
|
+
"""Validate a Kubernetes service spec."""
|
|
121
|
+
_validate_k8s_spec(service_spec, 'service', 'service')
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def validate_horizontalpodautoscaler_spec(hpa_spec: dict) -> None:
|
|
125
|
+
"""Validate a Kubernetes HorizontalPodAutoscaler spec."""
|
|
126
|
+
_validate_k8s_spec(hpa_spec, 'horizontalpodautoscaler', 'horizontalpodautoscaler')
|
|
@@ -1,21 +1,24 @@
|
|
|
1
|
-
konduktor/__init__.py,sha256=
|
|
1
|
+
konduktor/__init__.py,sha256=q3tcNX556xQdRe2M42K9PxHx1wCiqql_aGIr3DTKpAk,1574
|
|
2
2
|
konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
|
|
4
4
|
konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
|
|
5
5
|
konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,4102
|
|
6
6
|
konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4570
|
|
7
|
-
konduktor/backends/__init__.py,sha256=
|
|
7
|
+
konduktor/backends/__init__.py,sha256=usWJ8HdZJEyg7MIsN8Zcz9rk9e2Lq5dWJ8dv6hCN3ys,199
|
|
8
8
|
konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
|
|
9
|
-
konduktor/backends/constants.py,sha256=
|
|
10
|
-
konduktor/backends/
|
|
11
|
-
konduktor/backends/
|
|
9
|
+
konduktor/backends/constants.py,sha256=eqURY4RU_YXX_WRcge4AZHjr4nwuxTmmVGBnDrD_Qa4,441
|
|
10
|
+
konduktor/backends/deployment.py,sha256=EHfB2uLeKFQ3maek9tx6XL4_sjQ-ax59DZA79Q3EkVs,5519
|
|
11
|
+
konduktor/backends/deployment_utils.py,sha256=VGuL01rKe7p7PoVRI_cP4tiZRxHZ13nnTMG-bmDf7P0,28975
|
|
12
|
+
konduktor/backends/jobset.py,sha256=OwgDog9nH-FoUmNU_H--C3U5jx70reTKL1l849M1k5A,8430
|
|
13
|
+
konduktor/backends/jobset_utils.py,sha256=MRZf-Wcn084lnig0SfgXlF9Q3RyC7m7THeJQQlpdSw8,12931
|
|
14
|
+
konduktor/backends/pod_utils.py,sha256=K0y2kRTzrmIWFbmyJSEMhw7gueiIFF7VAlXtaBMNHkM,15237
|
|
12
15
|
konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
|
|
13
|
-
konduktor/cli.py,sha256=
|
|
16
|
+
konduktor/cli.py,sha256=PRMOseG1v1qXj0-KbW7Hzsg5rVRfNZw1oykSJWHO_qA,45481
|
|
14
17
|
konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
|
|
15
18
|
konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
|
|
16
19
|
konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
20
|
konduktor/controller/constants.py,sha256=SGAgu9yTDWYXyVwxlaw1vfRJFOflPR549mKwgdzbI9w,1124
|
|
18
|
-
konduktor/controller/launch.py,sha256=
|
|
21
|
+
konduktor/controller/launch.py,sha256=rmxBKYiAKObbYDiJEOx2S1B3pj5BX2avabbFYBEYEyk,1530
|
|
19
22
|
konduktor/controller/node.py,sha256=9uKFtgxmonxtr-73DRAd7qJjHUjyfS1E4sgXT0agzPg,2982
|
|
20
23
|
konduktor/controller/parse.py,sha256=U1G747N6Hef2cEgXvoRI2V1NEU316VDDHLCY1rYtnNc,3840
|
|
21
24
|
konduktor/dashboard/README.md,sha256=xOeItNLb3t0k8AiiQcjqleRfrlcWGK-n6yJF-rLv4M0,718
|
|
@@ -62,21 +65,23 @@ konduktor/data/gcp/utils.py,sha256=FJQcMXZqtMIzjZ98b3lTTc0UbdPUKTDLsOsfJaaH5-s,2
|
|
|
62
65
|
konduktor/data/registry.py,sha256=CUbMsN_Q17Pf4wRHkqZrycErEjTP7cLEdgcfwVGcEpc,696
|
|
63
66
|
konduktor/data/storage.py,sha256=o2So-bY9glvgbGdoN7AQNYmNnvGf1AUDPpImtadRL90,35213
|
|
64
67
|
konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT-4,20806
|
|
65
|
-
konduktor/execution.py,sha256=
|
|
66
|
-
konduktor/kube_client.py,sha256=
|
|
68
|
+
konduktor/execution.py,sha256=d0EP79iSrW2uFsoqn0YV_4kgIupPIqpMOParXx0y3kg,18519
|
|
69
|
+
konduktor/kube_client.py,sha256=WELs9jClRW9r-imNJF3gJi3Z7ygkFDnYDmMXu5nJhEM,6213
|
|
67
70
|
konduktor/logging.py,sha256=mA1JCCWPCqQMRqEpE4l6D6vOYdbtbQXr0BuEk9RR790,3177
|
|
68
71
|
konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
|
|
69
72
|
konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
|
|
70
73
|
konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
|
|
71
74
|
konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
|
|
72
|
-
konduktor/resource.py,sha256=
|
|
73
|
-
konduktor/
|
|
75
|
+
konduktor/resource.py,sha256=qQhMlI6gvTaoGfYb9NNgSrUavgNqfcYVfb9V_oC5pLE,20411
|
|
76
|
+
konduktor/serving.py,sha256=sh8TPAUXg23Bkt0ByatIMdxFFqzRm18HJTEkt3wHzdo,5147
|
|
77
|
+
konduktor/task.py,sha256=jrr-6mNWOB8I199N6OqAZIEmXoW17Xs1ZC9-y_N7P3w,37480
|
|
78
|
+
konduktor/templates/deployment.yaml.j2,sha256=uXFjDQaimbpFdAn2RJGaIvS_PzDY136cw_L3QMjz3ZA,3452
|
|
74
79
|
konduktor/templates/jobset.yaml.j2,sha256=rdURknodtgLp4zoA2PX86Nn4wPpi3tr5l4IG55aWBRg,1059
|
|
75
|
-
konduktor/templates/pod.yaml.j2,sha256=
|
|
80
|
+
konduktor/templates/pod.yaml.j2,sha256=3uXx0ls2v8x-NL_Ypze5u9RoJS8F5bzoyOJcYwzf8Z0,18240
|
|
76
81
|
konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
82
|
konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
|
|
78
83
|
konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
|
-
konduktor/utils/accelerator_registry.py,sha256=
|
|
84
|
+
konduktor/utils/accelerator_registry.py,sha256=ythz3ynulP1DSSU7Jj5VUsQeBzSYRkxCVDZ5oOg0xtc,560
|
|
80
85
|
konduktor/utils/annotations.py,sha256=oy2-BLydkFt3KWkXDuaGY84d6b7iISuy4eAT9uXk0Fc,2225
|
|
81
86
|
konduktor/utils/base64_utils.py,sha256=mF-Tw98mFRG70YE4w6s9feuQSCYZHOb8YatBZwMugyI,3130
|
|
82
87
|
konduktor/utils/common_utils.py,sha256=4yG5Kjvu1hu6x2nKNaaCUKQNrheUaG61Qe913MFPry8,15060
|
|
@@ -85,15 +90,15 @@ konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4x
|
|
|
85
90
|
konduktor/utils/exceptions.py,sha256=5IFnN5bIUSBJv4KRRrCepk5jyY9EG5vWWQqbjCmP3NU,6682
|
|
86
91
|
konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
|
|
87
92
|
konduktor/utils/kubernetes_utils.py,sha256=VG7qatUFyWHY-PCQ8fYWh2kn2TMwfg84cn-VkXdCwI8,26077
|
|
88
|
-
konduktor/utils/log_utils.py,sha256=
|
|
93
|
+
konduktor/utils/log_utils.py,sha256=CC6McrURGIcM8R4ICBCmn_Y7oXbWh1fwl4__la6Zotw,16677
|
|
89
94
|
konduktor/utils/loki_utils.py,sha256=h2ZvZQr1nE_wXXsKsGMjhG2s2MXknNd4icydTR_ruKU,3539
|
|
90
95
|
konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
|
|
91
|
-
konduktor/utils/schemas.py,sha256=
|
|
96
|
+
konduktor/utils/schemas.py,sha256=tBrKhnkfn9uKDYdlb4L2KgooW-muuhww7U8fu9zX-ms,18336
|
|
92
97
|
konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
|
|
93
98
|
konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
|
|
94
|
-
konduktor/utils/validator.py,sha256=
|
|
95
|
-
konduktor_nightly-0.1.0.
|
|
96
|
-
konduktor_nightly-0.1.0.
|
|
97
|
-
konduktor_nightly-0.1.0.
|
|
98
|
-
konduktor_nightly-0.1.0.
|
|
99
|
-
konduktor_nightly-0.1.0.
|
|
99
|
+
konduktor/utils/validator.py,sha256=5C1kE57Eyj1OPnAbvojqMNHHtf5fnl47FK_vEttd8aw,4331
|
|
100
|
+
konduktor_nightly-0.1.0.dev20250807105334.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
|
|
101
|
+
konduktor_nightly-0.1.0.dev20250807105334.dist-info/METADATA,sha256=agt8lbfGconzOtVJBStu3_Q3AI1Pf5dANidlxA4TICQ,4247
|
|
102
|
+
konduktor_nightly-0.1.0.dev20250807105334.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
103
|
+
konduktor_nightly-0.1.0.dev20250807105334.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
|
|
104
|
+
konduktor_nightly-0.1.0.dev20250807105334.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|