konduktor-nightly 0.1.0.dev20250915104603__py3-none-any.whl → 0.1.0.dev20251107104752__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
konduktor/task.py CHANGED
@@ -29,7 +29,7 @@ import konduktor
29
29
  from konduktor import constants, logging
30
30
  from konduktor.data import data_utils
31
31
  from konduktor.data import storage as storage_lib
32
- from konduktor.utils import common_utils, exceptions, schemas, ux_utils, validator
32
+ from konduktor.utils import common_utils, exceptions, schemas, ux_utils
33
33
 
34
34
  logger = logging.get_logger(__name__)
35
35
 
@@ -388,10 +388,6 @@ class Task:
388
388
  )
389
389
  resources_config['_cluster_config_overrides'] = cluster_config_override
390
390
 
391
- # Validate Docker image if specified in resources
392
- if 'image_id' in resources_config and resources_config['image_id']:
393
- validator.validate_and_warn_image(resources_config['image_id'], 'task')
394
-
395
391
  task.set_resources(konduktor.Resources.from_yaml_config(resources_config))
396
392
 
397
393
  # Parse serving field.
@@ -567,6 +563,13 @@ class Task:
567
563
  f'less than min_replicas ({serving.min_replicas})'
568
564
  )
569
565
 
566
+ if serving.max_replicas == 0 and serving.min_replicas == 0:
567
+ with ux_utils.print_exception_no_traceback():
568
+ raise ValueError(
569
+ f'max_replicas ({serving.max_replicas}) and '
570
+ f'min_replicas ({serving.min_replicas}) cannot both be 0'
571
+ )
572
+
570
573
  if isinstance(serving, konduktor.Serving):
571
574
  serving = serving
572
575
  self.serving = serving
@@ -12,6 +12,10 @@ metadata:
12
12
  {{ deployment_num_accelerators_label }}: "{{ num_accelerators }}"
13
13
  trainy.ai/has-autoscaler: "{{ autoscaler }}"
14
14
  trainy.ai/konduktor-managed: "true"
15
+ {% if autoscaler == 'true' %}
16
+ trainy.ai/original-min-replicas: "{{ min_replicas }}"
17
+ trainy.ai/original-max-replicas: "{{ max_replicas }}"
18
+ {% endif %}
15
19
  name: {{ name }}
16
20
  namespace: default
17
21
  spec:
@@ -37,11 +41,9 @@ metadata:
37
41
  {{ deployment_name_label }}: "{{ name }}"
38
42
  {{ deployment_user_label }}: "{{ user }}"
39
43
  trainy.ai/has-autoscaler: "{{ autoscaler }}"
40
- {% if not general %}
41
44
  annotations:
42
45
  prometheus.io/scrape: "true"
43
- prometheus.io/port: "8080"
44
- {% endif %}
46
+ prometheus.io/port: "9000"
45
47
  name: {{ name }}
46
48
  namespace: default
47
49
  spec:
@@ -61,18 +63,15 @@ spec:
61
63
  {{ model_name_label }}: {{ name }}
62
64
  {% endif %}
63
65
  {{ deployment_name_label }}: "{{ name }}"
64
- {% if general %}
65
- type: LoadBalancer
66
- {% else %}
67
66
  type: ClusterIP
68
- {% endif %}
69
67
 
70
- {% if not general %}
68
+ # AIBRIX PODAUTOSCALER STUFF (KPA)
69
+ {% if not general and autoscaler == 'true' %}
71
70
  ---
72
71
  apiVersion: autoscaling.aibrix.ai/v1alpha1
73
72
  kind: PodAutoscaler
74
73
  metadata:
75
- name: {{ name }}-apa
74
+ name: {{ name }}-pa
76
75
  namespace: default
77
76
  labels:
78
77
  {{ model_name_label }}: {{ name }}
@@ -80,63 +79,113 @@ metadata:
80
79
  app.kubernetes.io/managed-by: kustomize
81
80
  {{ deployment_name_label }}: "{{ name }}"
82
81
  {{ deployment_user_label }}: "{{ user }}"
83
- annotations:
84
- autoscaling.aibrix.ai/up-fluctuation-tolerance: '0.1'
85
- autoscaling.aibrix.ai/down-fluctuation-tolerance: '0.2'
86
- apa.autoscaling.aibrix.ai/window: 30s
87
82
  spec:
88
- scalingStrategy: APA
83
+ scalingStrategy: KPA
89
84
  minReplicas: {{ min_replicas }}
90
85
  maxReplicas: {{ max_replicas }}
91
86
  metricsSources:
92
- - metricSourceType: pod
87
+ - metricSourceType: domain
93
88
  protocolType: http
94
- port: "{{ ports }}"
95
- path: metrics
96
- targetMetric: gpu_cache_usage_perc
97
- targetValue: '0.5'
89
+ endpoint: aibrix-activator.aibrix-activator.svc.cluster.local:8080
90
+ path: /metrics/default/{{ name }}
91
+ targetMetric: vllm:deployment_replicas
92
+ targetValue: "1"
98
93
  scaleTargetRef:
99
94
  apiVersion: apps/v1
100
95
  kind: Deployment
101
96
  name: {{ name }}
102
97
  {% endif %}
103
98
 
99
+ # KEDA HTTP ADD-ON STUFF (1 per deployment)
104
100
  {% if general %}
101
+ {% if autoscaler == 'true' %}
102
+ # HTTPScaledObject (1 per deployment) - only when autoscaling enabled
105
103
  ---
106
- apiVersion: autoscaling/v2
107
- kind: HorizontalPodAutoscaler
104
+ apiVersion: http.keda.sh/v1alpha1
105
+ kind: HTTPScaledObject
108
106
  metadata:
109
- name: {{ name }}-hpa
107
+ name: {{ name }}-httpscaledobject
110
108
  namespace: default
111
109
  labels:
112
110
  {{ deployment_name_label }}: "{{ name }}"
113
111
  {{ deployment_user_label }}: "{{ user }}"
114
- trainy.ai/has-autoscaler: "{{ autoscaler }}"
115
112
  spec:
113
+ hosts:
114
+ - {{ name }}
115
+ pathPrefixes:
116
+ - "/"
117
+ {% if probe_path %}
118
+ - "{{ probe_path }}"
119
+ {% endif %}
116
120
  scaleTargetRef:
117
- apiVersion: apps/v1
121
+ name: "{{ name }}"
118
122
  kind: Deployment
119
- name: {{ name }}
120
- minReplicas: {{ min_replicas }}
121
- maxReplicas: {{ max_replicas }}
122
- metrics:
123
- - type: Resource
124
- resource:
125
- name: cpu
126
- target:
127
- type: Utilization
128
- averageUtilization: 50
129
- behavior:
130
- scaleDown:
131
- stabilizationWindowSeconds: 60
132
- policies:
133
- - type: Percent
134
- value: 100
135
- periodSeconds: 15
136
- scaleUp:
137
- stabilizationWindowSeconds: 20
138
- policies:
139
- - type: Percent
140
- value: 100
141
- periodSeconds: 15
123
+ apiVersion: apps/v1
124
+ service: "{{ name }}"
125
+ port: {{ ports }}
126
+ replicas:
127
+ min: {{ min_replicas }}
128
+ max: {{ max_replicas }}
129
+ scaledownPeriod: 1200 # 20 minutes
130
+ scalingMetric:
131
+ requestRate:
132
+ targetValue: 4
133
+ granularity: "1s"
134
+ window: "30s"
135
+ {% endif %}
136
+
137
+ # INGRESS (1 per deployment)
138
+ ---
139
+ apiVersion: networking.k8s.io/v1
140
+ kind: Ingress
141
+ metadata:
142
+ name: {{ name }}-ingress
143
+ labels:
144
+ {{ deployment_name_label }}: "{{ name }}"
145
+ {{ deployment_user_label }}: "{{ user }}"
146
+ trainy.ai/konduktor-managed: "true"
147
+ annotations:
148
+ nginx.ingress.kubernetes.io/use-regex: "true"
149
+ nginx.ingress.kubernetes.io/rewrite-target: /$1
150
+ {% if autoscaler == 'true' %}
151
+ nginx.ingress.kubernetes.io/upstream-vhost: "{{ name }}"
152
+ {% endif %}
153
+ spec:
154
+ ingressClassName: nginx
155
+ rules:
156
+ - host: {{ general_base_host }}
157
+ http:
158
+ paths:
159
+ - path: /{{ name }}(.*)
160
+ pathType: ImplementationSpecific
161
+ backend:
162
+ service:
163
+ {% if autoscaler == 'true' %}
164
+ # Use KEDA interceptor for autoscaling
165
+ name: keda-proxy
166
+ port:
167
+ number: 8080
168
+ {% else %}
169
+ # Direct to app service for fixed replicas
170
+ name: {{ name }}
171
+ port:
172
+ number: {{ ports }}
173
+ {% endif %}
174
+ # Direct access convenience rule (via LB IP + Host: {{ name }})
175
+ - host: {{ name }}
176
+ http:
177
+ paths:
178
+ - path: /(.*)
179
+ pathType: ImplementationSpecific
180
+ backend:
181
+ service:
182
+ {% if autoscaler == 'true' %}
183
+ name: keda-proxy
184
+ port:
185
+ number: 8080
186
+ {% else %}
187
+ name: {{ name }}
188
+ port:
189
+ number: {{ ports }}
190
+ {% endif %}
142
191
  {% endif %}
@@ -28,16 +28,21 @@ kubernetes:
28
28
  containers:
29
29
  # TODO(asaiacai): should decide here whether we add the fabric interfaces/containers init etc.
30
30
  - name: konduktor-container
31
- {% if enable_ssh %}
31
+ {% if enable_ssh or serving %}
32
32
  ports:
33
+ {% if enable_ssh %}
33
34
  - name: ssh
34
35
  containerPort: {{ konduktor_ssh_port }}
36
+ {% endif %}
37
+
38
+ {% if serving %}
39
+ - name: serving
40
+ containerPort: {{ ports }}
41
+ {% endif %}
35
42
  {% endif %}
36
- {% if serving %}
37
- ports:
38
- - containerPort: {{ ports }}
43
+
44
+ {% if serving and probe %}
39
45
  # TODO (ryan): allow modification of thresholds and timings
40
- {% if probe %}
41
46
  livenessProbe:
42
47
  httpGet:
43
48
  path: {{ probe }}
@@ -68,7 +73,6 @@ kubernetes:
68
73
  successThreshold: 1
69
74
  timeoutSeconds: 1
70
75
  {% endif %}
71
- {% endif %}
72
76
  image: {{ image_id }}
73
77
  # this is set during jobset definition since we need to know the jobset
74
78
  # name and number of nodes to set all the environment variables correctly here
@@ -77,6 +81,10 @@ kubernetes:
77
81
  # flush logs immediately to stdout for more reactive log streaming
78
82
  - name: PYTHONUNBUFFERED
79
83
  value: "0"
84
+ - name: KONDUKTOR_NODENAME
85
+ valueFrom:
86
+ fieldRef:
87
+ fieldPath: spec.nodeName
80
88
  - name: KONDUKTOR_JOB_NAME
81
89
  value: "{{ job_name }}"
82
90
  - name: NODE_HOST_IPS
@@ -134,6 +142,8 @@ kubernetes:
134
142
  {% if default_secrets %}
135
143
  - name: KONDUKTOR_DEFAULT_SECRETS
136
144
  value: "/konduktor/default-secrets"
145
+ - name: KONDUKTOR_DEFAULT_SECRETS_EXPANDED
146
+ value: "/run/konduktor/expanded-default-secrets"
137
147
  {% endif %}
138
148
  # these are for compatibility with skypilot
139
149
  - name: SKYPILOT_NODE_IPS
@@ -146,6 +156,10 @@ kubernetes:
146
156
  value: "{{ num_nodes }}"
147
157
  - name: SKYPILOT_NUM_GPUS_PER_NODE
148
158
  value: "{{ num_gpus }}"
159
+ - name: RESTART_ATTEMPT
160
+ valueFrom:
161
+ fieldRef:
162
+ fieldPath: metadata.labels['jobset.sigs.k8s.io/restart-attempt']
149
163
  volumeMounts:
150
164
  - name: shared-memory
151
165
  mountPath: /dev/shm
@@ -159,6 +173,10 @@ kubernetes:
159
173
  - name: default-secret-{{ secret.mount_name }}
160
174
  mountPath: /konduktor/default-secrets/{{ secret.mount_name }}
161
175
  {% endfor %}
176
+ {% if default_secrets %}
177
+ - name: default-secrets-expanded
178
+ mountPath: /run/konduktor/expanded-default-secrets
179
+ {% endif %}
162
180
  {% if git_ssh %}
163
181
  - name: git-ssh-secret
164
182
  mountPath: /run/konduktor/git-ssh-secret
@@ -192,7 +210,7 @@ kubernetes:
192
210
  {% if 'curl' in run_cmd or 'curl' in setup_cmd or tailscale_secret %}
193
211
  PACKAGES="$PACKAGES curl";
194
212
  {% endif %}
195
- {% if 'gs' in mount_secrets or 's3' in mount_secrets %}
213
+ {% if 'gs' in mount_secrets or 's3' in mount_secrets or default_secrets %}
196
214
  PACKAGES="$PACKAGES unzip wget";
197
215
  {% endif %}
198
216
  {% if 'git' in run_cmd or 'git' in setup_cmd %}
@@ -231,7 +249,7 @@ kubernetes:
231
249
  fi;
232
250
  end_epoch=$(date +%s);
233
251
 
234
- echo "Exposing ENV variables"
252
+ echo "===== KONDUKTOR: Exposing ENV variables ====="
235
253
  $(prefix_cmd) env -0 | awk -v RS='\0' '
236
254
  {
237
255
  gsub(/\\/,"\\\\"); # escape existing backslashes first
@@ -346,8 +364,41 @@ kubernetes:
346
364
 
347
365
  $(prefix_cmd) echo "===== KONDUKTOR: Installing packages took $((end_epoch - start_epoch)) seconds ====="
348
366
 
367
+ $(prefix_cmd) echo "===== KONDUKTOR: Environment variable summary ====="
368
+ start_epoch=$(date +%s);
369
+
370
+ print_bucket () {
371
+ title="$1"; list="${2:-}"
372
+ echo "--- $title ---"
373
+ if [ -n "$list" ]; then
374
+ echo "$list" | tr ',' '\n' | sed "s/^/[$title] /"
375
+ else
376
+ echo "[none]"
377
+ fi
378
+ }
379
+
380
+ # Secrets: prefer detailed mapping if available
381
+ echo "--- env secret ---"
382
+ if [ -n "${KONDUKTOR_ENV_SECRETS_MAP_HOPEFULLY_NO_NAME_COLLISION:-}" ]; then
383
+ echo "${KONDUKTOR_ENV_SECRETS_MAP_HOPEFULLY_NO_NAME_COLLISION}" \
384
+ | tr ',' '\n' \
385
+ | awk -F'=' '{ printf("[secret: %s] %s\n", $2, $1) }'
386
+ elif [ -n "${KONDUKTOR_ENV_SECRETS_HOPEFULLY_NO_NAME_COLLISION:-}" ]; then
387
+ echo "${KONDUKTOR_ENV_SECRETS_HOPEFULLY_NO_NAME_COLLISION}" \
388
+ | tr ',' '\n' | sed 's/^/[secret] /'
389
+ else
390
+ echo "[none]"
391
+ fi
392
+
393
+ print_bucket "CLI + task.yaml" "${KONDUKTOR_ENV_TASK_ALL_HOPEFULLY_NO_NAME_COLLISION}"
394
+ print_bucket "config.yaml" "${KONDUKTOR_ENV_CONFIG_HOPEFULLY_NO_NAME_COLLISION}"
395
+ print_bucket "other" "${KONDUKTOR_ENV_OTHER_HOPEFULLY_NO_NAME_COLLISION}"
396
+
397
+ end_epoch=$(date +%s);
398
+ $(prefix_cmd) echo "===== KONDUKTOR: Environment variable summary took $((end_epoch - start_epoch)) seconds ====="
399
+
349
400
  # unpack secrets credentials
350
- $(prefix_cmd) echo "===== KONDUKTOR: Unpacking secrets credentials ====="
401
+ $(prefix_cmd) echo "===== KONDUKTOR: Unpacking cloud storage secret credentials ====="
351
402
  start_epoch=$(date +%s);
352
403
  mkdir -p ~/.konduktor
353
404
  mkdir -p {{ remote_workdir }}
@@ -362,12 +413,71 @@ kubernetes:
362
413
  $(prefix_cmd) unzip /run/konduktor/s3-secret/awscredentials -d ~/.aws
363
414
  {% endif %}
364
415
  {% endfor %}
416
+
417
+ {% if default_secrets %}
418
+ $(prefix_cmd) echo "===== KONDUKTOR: Unpacking default secrets ====="
419
+ $(prefix_cmd) mkdir -p "${KONDUKTOR_DEFAULT_SECRETS_EXPANDED}"
420
+
421
+ # For each mounted default secret folder:
422
+ # - if payload.zip exists, unzip it into the expanded dir
423
+ # - otherwise, copy the files as-is
424
+ for src in "${KONDUKTOR_DEFAULT_SECRETS}"/*; do
425
+ [ -d "$src" ] || continue
426
+ name="$(basename "$src")"
427
+ dst="${KONDUKTOR_DEFAULT_SECRETS_EXPANDED}/${name}"
428
+ $(prefix_cmd) mkdir -p "$dst"
429
+
430
+ if [ -f "${src}/payload.zip" ]; then
431
+ $(prefix_cmd) unzip -oq "${src}/payload.zip" -d "$dst"
432
+ else
433
+ $(prefix_cmd) cp -a "${src}/." "$dst/"
434
+ fi
435
+ done
436
+
437
+ # Point callers to the expanded (writable) path going forward
438
+ export KONDUKTOR_DEFAULT_SECRETS="${KONDUKTOR_DEFAULT_SECRETS_EXPANDED}"
439
+ $(prefix_cmd) echo "KONDUKTOR_DEFAULT_SECRETS=${KONDUKTOR_DEFAULT_SECRETS_EXPANDED}" >> /etc/environment
440
+ {% endif %}
441
+
365
442
  {% if git_ssh %}
366
443
  $(prefix_cmd) echo "Unpacking GIT-SSH secret"
367
444
  {% endif %}
368
445
  end_epoch=$(date +%s);
369
446
  $(prefix_cmd) echo "===== KONDUKTOR: Unpacking secrets credentials took $((end_epoch - start_epoch)) seconds ====="
370
447
 
448
+ $(prefix_cmd) echo "===== KONDUKTOR: Default secret summary ====="
449
+ start_epoch=$(date +%s)
450
+
451
+ root="${KONDUKTOR_DEFAULT_SECRETS:-}"
452
+ if [[ -z "$root" || ! -d "$root" ]]; then
453
+ $(prefix_cmd) echo "NO DEFAULT SECRETS FOUND."
454
+ else
455
+ for dir in "$root"/*; do
456
+ [ -d "$dir" ] || continue
457
+ name="$(basename "$dir")"
458
+
459
+ # Pretty header that mirrors the logical mount base:
460
+ $(prefix_cmd) echo "/konduktor/default-secrets/${name}:"
461
+
462
+ # Print relative paths only; skip macOS junk and k8s secret internals
463
+ (
464
+ cd "$dir"
465
+ out="$(find . \
466
+ \( -name '.DS_Store' -o -name '__MACOSX' -o -name '..data' -o -name '..*' \) -prune -o \
467
+ \( -type f -o -type l \) -print \
468
+ | sed 's|^\./||' \
469
+ | sort)"
470
+ if [ -n "$out" ]; then
471
+ printf "%s\n" "$out"
472
+ fi
473
+ )
474
+ done
475
+ fi
476
+
477
+ end_epoch=$(date +%s)
478
+ $(prefix_cmd) echo "===== KONDUKTOR: Default secret summary took $((end_epoch - start_epoch)) seconds ====="
479
+
480
+
371
481
  # sync file mounts
372
482
  {% for mkdir_command in mkdir_commands %}
373
483
  $(prefix_cmd) {{ mkdir_command }}
@@ -436,6 +546,10 @@ kubernetes:
436
546
  secret:
437
547
  secretName: {{ secret.k8s_name }}
438
548
  {% endfor %}
549
+ {% if default_secrets %}
550
+ - name: default-secrets-expanded
551
+ emptyDir: {}
552
+ {% endif %}
439
553
  {% if git_ssh %}
440
554
  - name: git-ssh-secret
441
555
  secret:
@@ -44,6 +44,8 @@ def zip_base64encode(files: List[str]) -> str:
44
44
  else:
45
45
  for root, _, files in os.walk(item_path):
46
46
  for file in files:
47
+ if file == '.DS_Store':
48
+ continue
47
49
  file_path = os.path.join(root, file)
48
50
  arcname = os.path.relpath(file_path, temp_dir)
49
51
  zipf.write(file_path, arcname)
@@ -389,7 +389,7 @@ def get_serving_schema():
389
389
  'properties': {
390
390
  'min_replicas': {
391
391
  'type': 'integer',
392
- 'minimum': 1,
392
+ 'minimum': 0,
393
393
  'description': 'Minimum number of replicas for autoscaling.',
394
394
  },
395
395
  'max_replicas': {
@@ -37,6 +37,11 @@ SCHEMA_URLS = {
37
37
  logger = logging.get_logger(__name__)
38
38
 
39
39
 
40
+ def _skip_image_checks() -> bool:
41
+ val = os.getenv('KONDUKTOR_SKIP_IMAGE_CHECK', '')
42
+ return val.lower() in ('1', 'true', 'yes', 'y')
43
+
44
+
40
45
  def case_insensitive_enum(validator, enums, instance, schema):
41
46
  del validator, schema # Unused.
42
47
  if instance.lower() not in [enum.lower() for enum in enums]:
@@ -419,6 +424,13 @@ def validate_and_warn_image(image_id: str, context: str = 'task') -> None:
419
424
  if not image_id:
420
425
  return
421
426
 
427
+ if _skip_image_checks():
428
+ logger.info(
429
+ 'Skipping Docker image validation for %s',
430
+ image_id,
431
+ )
432
+ return
433
+
422
434
  status, message = validate_docker_image(image_id)
423
435
 
424
436
  if status == 'invalid':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250915104603
3
+ Version: 0.1.0.dev20251107104752
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,4 +1,4 @@
1
- konduktor/__init__.py,sha256=gW1PYl7NzKJnsnwHEY8-7kKncqEDCGzCOpYT5TGj6So,1574
1
+ konduktor/__init__.py,sha256=mHmTi0owXeaxTt6NwGboUKlwfKWw6xwzbdcUjq9-1DM,1574
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
@@ -6,14 +6,14 @@ konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,410
6
6
  konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4570
7
7
  konduktor/backends/__init__.py,sha256=usWJ8HdZJEyg7MIsN8Zcz9rk9e2Lq5dWJ8dv6hCN3ys,199
8
8
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
9
- konduktor/backends/constants.py,sha256=nt9G9AmFCOMwO4GuKgRQSzJJuKapOmaROp4_Y0tMF5A,732
10
- konduktor/backends/deployment.py,sha256=fswN9hX_7NwcEogYmo1xn3WgWF8XCcGDvV5yx54_CA0,5860
11
- konduktor/backends/deployment_utils.py,sha256=qcuoLPeMvEVqgD_h71hQZXAp4ZCdXsFeSBxhtXW6pAA,39846
9
+ konduktor/backends/constants.py,sha256=uAU-edQ_9DNYnu6x7fwNYXIEM7KMfJMOrnG74rlZ8mY,766
10
+ konduktor/backends/deployment.py,sha256=d0a3F7dxDbnRKIt4ZO_kQ0_vet0pZvg4bWYzVZ8DZIQ,6640
11
+ konduktor/backends/deployment_utils.py,sha256=9CmB9CYC_3wxIfIOmTSCN2hbURZ5MpEMTvPwYMUXBRM,49272
12
12
  konduktor/backends/jobset.py,sha256=drt8Gc0iYQx18JWXBU6XfhUvC2xCKd8szSJ2JC4O20Q,8640
13
- konduktor/backends/jobset_utils.py,sha256=If4pv5peB_yXrJJwjkySgVzbjcxDEDWfkOQxUkwSlOk,26386
14
- konduktor/backends/pod_utils.py,sha256=KP_PAgsdNHFgt4Od-5gAtpifAKIL7DMBg7NJ44uqikg,14885
13
+ konduktor/backends/jobset_utils.py,sha256=g49NY8RFhL_NNd4c1adRLG_Bq3UTFtRURxcAzxnMEYw,26524
14
+ konduktor/backends/pod_utils.py,sha256=kOi3cLbTI3abZFCNQswWrkrOiBBm3gW_9N4INjxeS-w,19276
15
15
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
16
- konduktor/cli.py,sha256=OwVdT4ibAQoAJO79YzlThQv_VKlHpsD-CHRwOzehGQ8,57613
16
+ konduktor/cli.py,sha256=B3Pp3RCwkGj8r9YgH-TgC85XU4zcc3eema1kpcDTQ3I,58452
17
17
  konduktor/config.py,sha256=9upqgCCYvcu6fKw7tovEYC1MWTkAAir0_WHPdayylbI,15536
18
18
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
19
19
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,25 +68,25 @@ konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT
68
68
  konduktor/execution.py,sha256=d0EP79iSrW2uFsoqn0YV_4kgIupPIqpMOParXx0y3kg,18519
69
69
  konduktor/kube_client.py,sha256=HtM3d-_GigHnfGINRANchApR9_OigqczBgeYJ6Dj4j0,8504
70
70
  konduktor/logging.py,sha256=xtcCdnecmC3rqMTyunK-klQRINojI7NI4Apag78i9jM,3221
71
- konduktor/manifests/apoxy-setup.yaml,sha256=HDZu7Evm_siIpK1E4tNZ9WVTnFV2LhBXwjJlFOYSTcU,3319
72
- konduktor/manifests/apoxy-setup2.yaml,sha256=BhXsgcVrLBruLXnF7xlj0Ej6YVJFYMABJIpYtwakQMo,731
71
+ konduktor/manifests/aibrix-setup.yaml,sha256=Foe3M1C0zVt-CVSJGr5SlQkMaNGs9kU2CvIZoANm3f8,14133
72
+ konduktor/manifests/apoxy-setup.yaml,sha256=EipknCq33aBdxu9BIo6y5novjO0B_d_DCWqY44zYNuU,4262
73
+ konduktor/manifests/apoxy-setup2.yaml,sha256=fc1tDwVopPVFzvUygkqxDGVqHHeo1cF9ERTnzUdgaGs,2517
73
74
  konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
74
75
  konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
75
76
  konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
76
77
  konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
77
- konduktor/resource.py,sha256=kfdhnUR_9kDLSm2sUAkv1sLQXyAkI08p3wIzCz1p7-M,20791
78
- konduktor/serving.py,sha256=sh8TPAUXg23Bkt0ByatIMdxFFqzRm18HJTEkt3wHzdo,5147
79
- konduktor/task.py,sha256=oFRHdMevg7lGYkHugCHl89FUREfq9M-l0Qd3N-rjHMA,37727
80
- konduktor/templates/apoxy-deployment.yaml.j2,sha256=_EdT7w0rBK3if1INHT6GGUEugy0mOkRfYOWRgBcKLdo,942
81
- konduktor/templates/deployment.yaml.j2,sha256=uXFjDQaimbpFdAn2RJGaIvS_PzDY136cw_L3QMjz3ZA,3452
78
+ konduktor/resource.py,sha256=JqEE3LZiBBd5vqAiHDk-nlLve_VUQHhgdo6BIgx2Xfk,21215
79
+ konduktor/serving.py,sha256=4s8cQhsVjf-HByZF65pbMxuqaV319hUSQE9pC8gP4Sg,5405
80
+ konduktor/task.py,sha256=FIWm_rC_63GPBoe-Hi8a_eJ0H8Szw747SwXYPrNtOWE,37820
81
+ konduktor/templates/deployment.yaml.j2,sha256=0Cer53I8YHtYgUeEBQ_NVgC36FdOcjMNejgfP8teJC4,4964
82
82
  konduktor/templates/jobset.yaml.j2,sha256=NQcVeRNsTLLmTnJRnkL1vr45mSeth-b11YShXn_RoSg,1323
83
- konduktor/templates/pod.yaml.j2,sha256=p9yE-AQkCF2Tgjd1QQiOLtzgI6Gbpps-MZYJZ9fQWIs,19159
83
+ konduktor/templates/pod.yaml.j2,sha256=gGYwdXsPxStiua9Mm-OF7byVfuKjcH-TYNjoQmdpX_Q,24107
84
84
  konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
86
86
  konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
87
  konduktor/utils/accelerator_registry.py,sha256=ythz3ynulP1DSSU7Jj5VUsQeBzSYRkxCVDZ5oOg0xtc,560
88
88
  konduktor/utils/annotations.py,sha256=oy2-BLydkFt3KWkXDuaGY84d6b7iISuy4eAT9uXk0Fc,2225
89
- konduktor/utils/base64_utils.py,sha256=mF-Tw98mFRG70YE4w6s9feuQSCYZHOb8YatBZwMugyI,3130
89
+ konduktor/utils/base64_utils.py,sha256=TzKxe_SPHrurJ1lTOwOi4OmGUNkGxomstFcTWcWLQhw,3223
90
90
  konduktor/utils/common_utils.py,sha256=8gBpzYiC1bQ8sbgHIFLkKCGT5nLs1afpejod60kVSos,15076
91
91
  konduktor/utils/constants.py,sha256=1DneiTR21lvKUcWdBGwC4I4fD4uPjbjLUilEnJS7rzA,216
92
92
  konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4xc,2258
@@ -96,12 +96,12 @@ konduktor/utils/kubernetes_utils.py,sha256=XleYxzG64hciZb-CjzBDjX8BOMhFATIIHZlXD
96
96
  konduktor/utils/log_utils.py,sha256=VUyTtN819BJnSwm33-73-h8aaD51Y5Gawt6ek2kU1tk,18181
97
97
  konduktor/utils/loki_utils.py,sha256=eOGiD7dZNuwzmyXKiifyqz00EVh2nwcUPFSiPkac9y0,4050
98
98
  konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
99
- konduktor/utils/schemas.py,sha256=X2q-Nuk71EfMQXl4QFOtFWlQgd8tC_jFAo5dScmlEQc,19067
99
+ konduktor/utils/schemas.py,sha256=cr39nEAgjluhXoUYnvIwCwLBH8rLds37MBsF1uQv1rw,19067
100
100
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
101
101
  konduktor/utils/ux_utils.py,sha256=LSH4b5lckD157qDF4keThxtkGdxNrAfGKmH1ewhZkm4,8646
102
- konduktor/utils/validator.py,sha256=gCB5v9Up9bCWD_92fS5ChfRRXj_m56Ky9uzd_77wXGI,16927
103
- konduktor_nightly-0.1.0.dev20250915104603.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
104
- konduktor_nightly-0.1.0.dev20250915104603.dist-info/METADATA,sha256=noibgctWCSonSUXnlV-7BQS_PAMNwkajGxAHNpvdwWM,4247
105
- konduktor_nightly-0.1.0.dev20250915104603.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
106
- konduktor_nightly-0.1.0.dev20250915104603.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
107
- konduktor_nightly-0.1.0.dev20250915104603.dist-info/RECORD,,
102
+ konduktor/utils/validator.py,sha256=UcLvZCk9Cpbbhw8r_ZJtTpMSTfY1NKqcyciKsPzRPZM,17222
103
+ konduktor_nightly-0.1.0.dev20251107104752.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
104
+ konduktor_nightly-0.1.0.dev20251107104752.dist-info/METADATA,sha256=EEA9KjVBKhzBk4hO1-mWEacCmBul0d5GqMbB_VUKWbQ,4247
105
+ konduktor_nightly-0.1.0.dev20251107104752.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
106
+ konduktor_nightly-0.1.0.dev20251107104752.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
107
+ konduktor_nightly-0.1.0.dev20251107104752.dist-info/RECORD,,
@@ -1,33 +0,0 @@
1
- ---
2
- # Apoxy Backend for general deployment
3
- apiVersion: core.apoxy.dev/v1alpha
4
- kind: Backend
5
- metadata:
6
- name: {{ unique_cluster_name }}-backend-{{ deployment_number }}
7
- labels:
8
- task_name: {{ name }}
9
- endpoint_name: {{ cluster_name }}-{{ deployment_number }}.trainy.us
10
- spec:
11
- endpoints:
12
- - fqdn: {{ name }}.default.{{ unique_cluster_name }}.tun.apoxy.net
13
- ---
14
- # Apoxy Route for general deployment
15
- apiVersion: gateway.apoxy.dev/v1
16
- kind: HTTPRoute
17
- metadata:
18
- name: {{ unique_cluster_name }}-route-{{ deployment_number }}
19
- labels:
20
- task_name: {{ name }}
21
- endpoint_name: {{ cluster_name }}-{{ deployment_number }}.trainy.us
22
- spec:
23
- parentRefs:
24
- - name: default
25
- kind: Gateway
26
- port: 443
27
- hostnames:
28
- - '{{ cluster_name }}-{{ deployment_number }}.trainy.us'
29
- rules:
30
- - backendRefs:
31
- - kind: Backend
32
- name: {{ unique_cluster_name }}-backend-{{ deployment_number }}
33
- port: {{ ports }}