konduktor-nightly 0.1.0.dev20250512104920__py3-none-any.whl → 0.1.0.dev20250514104854__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
konduktor/__init__.py CHANGED
@@ -14,7 +14,7 @@ __all__ = [
14
14
  ]
15
15
 
16
16
  # Replaced with the current commit when building the wheels.
17
- _KONDUKTOR_COMMIT_SHA = '2b0d682b6fc8ff0d4e5ea417c4e324090f3c5f9b'
17
+ _KONDUKTOR_COMMIT_SHA = '05c7d9e243ae23c6e9abb0a4a034bfc0815fd587'
18
18
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
19
19
 
20
20
 
@@ -48,5 +48,5 @@ def _get_git_commit():
48
48
 
49
49
 
50
50
  __commit__ = _get_git_commit()
51
- __version__ = '1.0.0.dev0.1.0.dev20250512104920'
51
+ __version__ = '1.0.0.dev0.1.0.dev20250514104854'
52
52
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -0,0 +1,124 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """
14
+ The local machine's public key should not be uploaded to the remote VM, because
15
+ it will cause private/public key pair mismatch when the user tries to launch new
16
+ VM from that remote VM using SkyPilot, e.g., the node is used as a jobs
17
+ controller. (Lambda cloud is an exception, due to the limitation of the cloud
18
+ provider. See the comments in setup_lambda_authentication)
19
+ """
20
+
21
+ import functools
22
+ import os
23
+ from typing import Tuple
24
+
25
+ import filelock
26
+
27
+ from konduktor import logging
28
+ from konduktor.utils import common_utils
29
+
30
+ logger = logging.get_logger(__name__)
31
+
32
+ _SSH_KEY_PATH_PREFIX = '~/.konduktor/clients/{user_hash}/ssh'
33
+
34
+ MAX_TRIALS = 64
35
+
36
+
37
+ def get_ssh_key_and_lock_path() -> Tuple[str, str, str]:
38
+ user_hash = common_utils.get_user_hash()
39
+ user_ssh_key_prefix = _SSH_KEY_PATH_PREFIX.format(user_hash=user_hash)
40
+ os.makedirs(os.path.expanduser(user_ssh_key_prefix), exist_ok=True, mode=0o700)
41
+ private_key_path = os.path.join(user_ssh_key_prefix, 'konduktor-key')
42
+ public_key_path = os.path.join(user_ssh_key_prefix, 'konduktor-key.pub')
43
+ lock_path = os.path.join(user_ssh_key_prefix, '.__internal-konduktor-key.lock')
44
+ return private_key_path, public_key_path, lock_path
45
+
46
+
47
+ def _generate_rsa_key_pair() -> Tuple[str, str]:
48
+ # Keep the import of the cryptography local to avoid expensive
49
+ # third-party imports when not needed.
50
+ # pylint: disable=import-outside-toplevel
51
+ from cryptography.hazmat.backends import default_backend
52
+ from cryptography.hazmat.primitives import serialization
53
+ from cryptography.hazmat.primitives.asymmetric import rsa
54
+
55
+ key = rsa.generate_private_key(
56
+ backend=default_backend(), public_exponent=65537, key_size=2048
57
+ )
58
+
59
+ private_key = (
60
+ key.private_bytes(
61
+ encoding=serialization.Encoding.PEM,
62
+ format=serialization.PrivateFormat.TraditionalOpenSSL,
63
+ encryption_algorithm=serialization.NoEncryption(),
64
+ )
65
+ .decode('utf-8')
66
+ .strip()
67
+ )
68
+
69
+ public_key = (
70
+ key.public_key()
71
+ .public_bytes(
72
+ serialization.Encoding.OpenSSH, serialization.PublicFormat.OpenSSH
73
+ )
74
+ .decode('utf-8')
75
+ .strip()
76
+ )
77
+
78
+ return public_key, private_key
79
+
80
+
81
+ def _save_key_pair(
82
+ private_key_path: str, public_key_path: str, private_key: str, public_key: str
83
+ ) -> None:
84
+ key_dir = os.path.dirname(private_key_path)
85
+ os.makedirs(key_dir, exist_ok=True, mode=0o700)
86
+
87
+ with open(
88
+ private_key_path,
89
+ 'w',
90
+ encoding='utf-8',
91
+ opener=functools.partial(os.open, mode=0o600),
92
+ ) as f:
93
+ f.write(private_key)
94
+
95
+ with open(
96
+ public_key_path,
97
+ 'w',
98
+ encoding='utf-8',
99
+ opener=functools.partial(os.open, mode=0o644),
100
+ ) as f:
101
+ f.write(public_key)
102
+
103
+
104
+ def get_or_generate_keys() -> Tuple[str, str]:
105
+ """Returns the aboslute private and public key paths."""
106
+ private_key_path, public_key_path, lock_path = get_ssh_key_and_lock_path()
107
+ private_key_path = os.path.expanduser(private_key_path)
108
+ public_key_path = os.path.expanduser(public_key_path)
109
+ lock_path = os.path.expanduser(lock_path)
110
+
111
+ lock_dir = os.path.dirname(lock_path)
112
+ # We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
113
+ # as the ssh configs will be written to this folder as well in
114
+ # backend_utils.SSHConfigHelper
115
+ os.makedirs(lock_dir, exist_ok=True, mode=0o700)
116
+ with filelock.FileLock(lock_path, timeout=10):
117
+ if not os.path.exists(private_key_path):
118
+ public_key, private_key = _generate_rsa_key_pair()
119
+ _save_key_pair(private_key_path, public_key_path, private_key, public_key)
120
+ assert os.path.exists(public_key_path), (
121
+ 'Private key found, but associated public key '
122
+ f'{public_key_path} does not exist.'
123
+ )
124
+ return private_key_path, public_key_path
@@ -70,25 +70,26 @@ def _wait_for_jobset_start(namespace: str, job_name: str):
70
70
  assert jobsets is not None, (
71
71
  f'Jobset {job_name} ' f'not found in namespace {namespace}'
72
72
  )
73
- if jobsets['status']['replicatedJobsStatus'][0]['ready']:
74
- logger.info(
75
- f'task '
76
- f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}{job_name}'
77
- f'{colorama.Style.RESET_ALL} ready'
78
- )
79
- break
80
- elif jobsets['status']['replicatedJobsStatus'][0]['succeeded']:
81
- return
82
- elif jobsets['status']['replicatedJobsStatus'][0]['failed']:
83
- logger.info(
84
- f'job '
85
- f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}{job_name}'
86
- f'{colorama.Style.RESET_ALL} '
87
- f'{colorama.Fore.RED}{colorama.Style.BRIGHT}failed{colorama.Style.RESET_ALL}'
88
- )
89
- job = jobset_utils.get_job(namespace, job_name)
90
- _raise_job_error(job)
91
- return
73
+ if 'status' in jobsets:
74
+ if jobsets['status']['replicatedJobsStatus'][0]['ready']:
75
+ logger.info(
76
+ f'task '
77
+ f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}{job_name}'
78
+ f'{colorama.Style.RESET_ALL} ready'
79
+ )
80
+ break
81
+ elif jobsets['status']['replicatedJobsStatus'][0]['succeeded']:
82
+ return
83
+ elif jobsets['status']['replicatedJobsStatus'][0]['failed']:
84
+ logger.info(
85
+ f'job '
86
+ f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}{job_name}'
87
+ f'{colorama.Style.RESET_ALL} '
88
+ f'{colorama.Fore.RED}{colorama.Style.BRIGHT}failed{colorama.Style.RESET_ALL}'
89
+ )
90
+ job = jobset_utils.get_job(namespace, job_name)
91
+ _raise_job_error(job)
92
+ return
92
93
  if timeout != -1 and time.time() - start > timeout:
93
94
  logger.error(
94
95
  f'{colorama.Style.BRIGHT}'
@@ -1,5 +1,6 @@
1
1
  """Jobset utils: wraps CRUD operations for jobsets"""
2
2
 
3
+ import base64
3
4
  import enum
4
5
  import json
5
6
  import os
@@ -15,7 +16,7 @@ if typing.TYPE_CHECKING:
15
16
  from datetime import timedelta
16
17
 
17
18
  import konduktor
18
- from konduktor import config, constants, kube_client, logging
19
+ from konduktor import authentication, config, constants, kube_client, logging
19
20
  from konduktor.data import registry
20
21
  from konduktor.utils import (
21
22
  common_utils,
@@ -93,6 +94,10 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
93
94
  else:
94
95
  accelerator_type = None
95
96
 
97
+ assert task.resources.cpus is not None, 'Task resources cpus are required'
98
+ assert task.resources.memory is not None, 'Task resources memory are required'
99
+ assert task.resources.image_id is not None, 'Task resources image_id are required'
100
+
96
101
  # template the commands to run on the container for syncing files. At this point
97
102
  # task.stores is Dict[str, storage_utils.Storage] which is (dst, storage_obj_src)
98
103
  # first we iterate through storage_mounts and then file_mounts.
@@ -150,10 +155,35 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
150
155
  f'though specified by `tailscale.secret_name`: {err}'
151
156
  )
152
157
 
153
- assert task.resources is not None, 'Task resources are required'
154
- assert task.resources.cpus is not None, 'Task resources cpus are required'
155
- assert task.resources.memory is not None, 'Task resources memory are required'
156
- assert task.resources.image_id is not None, 'Task resources image_id are required'
158
+ enable_ssh = config.get_nested(('ssh', 'enable'), False)
159
+ secret_name = None
160
+ if enable_ssh:
161
+ private_key_path, public_key_path = authentication.get_or_generate_keys()
162
+ with (
163
+ open(private_key_path, 'rb') as private_key_file,
164
+ open(public_key_path, 'rb') as public_key_file,
165
+ ):
166
+ private_key, public_key = private_key_file.read(), public_key_file.read()
167
+ user_hash = common_utils.get_user_hash()
168
+ context = kubernetes_utils.get_current_kube_config_context_name()
169
+ namespace = kubernetes_utils.get_kube_config_context_namespace(
170
+ context_name=context
171
+ )
172
+ secret_name = f'konduktor-ssh-keys-{user_hash}'
173
+ ok, result = kubernetes_utils.set_secret(
174
+ secret_name=secret_name,
175
+ namespace=namespace,
176
+ context=context,
177
+ data={
178
+ 'PUBKEY': base64.b64encode(public_key).decode(),
179
+ 'PRIVKEY': base64.b64encode(private_key).decode(),
180
+ },
181
+ )
182
+ if not ok:
183
+ raise exceptions.CreateSecretError(
184
+ f'Failed to set k8s secret {secret_name}: \n{result}'
185
+ )
186
+
157
187
  with tempfile.NamedTemporaryFile() as temp:
158
188
  common_utils.fill_template(
159
189
  'pod.yaml.j2',
@@ -166,6 +196,7 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
166
196
  'master_addr': master_addr,
167
197
  'num_nodes': task.num_nodes,
168
198
  'job_name': task.name, # append timestamp and user id here?
199
+ 'setup_cmd': task.setup or '',
169
200
  'run_cmd': task.run,
170
201
  'node_hostnames': node_hostnames,
171
202
  'accelerator_type': accelerator_type,
@@ -176,6 +207,9 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
176
207
  'user': common_utils.get_cleaned_username(),
177
208
  # Tailscale credentials
178
209
  'tailscale_secret': tailscale_secret,
210
+ # SSH
211
+ 'enable_ssh': enable_ssh,
212
+ 'secret_name': secret_name,
179
213
  },
180
214
  temp.name,
181
215
  )
@@ -183,6 +217,13 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
183
217
  # merge with `~/.konduktor/config.yaml``
184
218
  kubernetes_utils.combine_pod_config_fields(temp.name, pod_config)
185
219
  pod_config = common_utils.read_yaml(temp.name)
220
+
221
+ for env_var in pod_config['kubernetes']['pod_config']['spec']['containers'][0][
222
+ 'env'
223
+ ]:
224
+ if env_var['name'] in task.envs:
225
+ env_var['value'] = task.envs.pop(env_var['name'])
226
+
186
227
  for k, v in task.envs.items():
187
228
  pod_config['kubernetes']['pod_config']['spec']['containers'][0][
188
229
  'env'
@@ -221,6 +262,7 @@ def create_jobset(
221
262
  'user': common_utils.get_cleaned_username(),
222
263
  'accelerator_type': accelerator_type,
223
264
  'num_accelerators': num_accelerators,
265
+ 'completions': task.resources.get_completions(),
224
266
  **_JOBSET_METADATA_LABELS,
225
267
  },
226
268
  temp.name,
konduktor/data/aws/s3.py CHANGED
@@ -1037,8 +1037,11 @@ class S3Store(storage_utils.AbstractStore):
1037
1037
  secret_name=cls._AWS_SECRET_NAME,
1038
1038
  namespace=namespace,
1039
1039
  context=context,
1040
- secret_key=cls._AWS_CREDENTIALS_KEY,
1041
- secret_value=base64_utils.zip_base64encode(credentials_files),
1040
+ data={
1041
+ cls._AWS_CREDENTIALS_KEY: base64_utils.zip_base64encode(
1042
+ credentials_files
1043
+ )
1044
+ },
1042
1045
  )
1043
1046
  if not ok:
1044
1047
  logger.error(f'Failed to set AWS credentials in k8s secret: \n{result}')
konduktor/data/gcp/gcs.py CHANGED
@@ -891,8 +891,11 @@ class GcsStore(storage_utils.AbstractStore):
891
891
  secret_name=cls._GCP_SECRET_NAME,
892
892
  namespace=namespace,
893
893
  context=context,
894
- secret_key=cls._GCP_CREDENTIALS_KEY,
895
- secret_value=base64_utils.zip_base64encode(credentials_files),
894
+ data={
895
+ cls._GCP_CREDENTIALS_KEY: base64_utils.zip_base64encode(
896
+ credentials_files
897
+ )
898
+ },
896
899
  )
897
900
  if not ok:
898
901
  logger.error(f'Failed to set GCP credentials in k8s secret: \n{result}')
konduktor/resource.py CHANGED
@@ -49,6 +49,7 @@ class Resources:
49
49
  image_id: Union[str, None] = None,
50
50
  disk_size: Optional[int] = None,
51
51
  labels: Optional[Dict[str, str]] = None,
52
+ job_config: Optional[Dict[str, Union[int, str]]] = None,
52
53
  # Internal use only.
53
54
  # pylint: disable=invalid-name
54
55
  _cluster_config_overrides: Optional[Dict[str, Any]] = None,
@@ -91,6 +92,7 @@ class Resources:
91
92
  instance tags. On GCP, labels map to instance labels. On
92
93
  Kubernetes, labels map to pod labels. On other clouds, labels are
93
94
  not supported and will be ignored.
95
+ job_config: the configuration of the job spec
94
96
  Raises:
95
97
  ValueError: if some attributes are invalid.
96
98
  exceptions.NoCloudAccessError: if no public cloud is enabled.
@@ -122,6 +124,7 @@ class Resources:
122
124
  self._set_cpus(cpus)
123
125
  self._set_memory(memory)
124
126
  self._set_accelerators(accelerators)
127
+ self.job_config = job_config
125
128
 
126
129
  # TODO: move these out of init to prevent repeated calls.
127
130
  self._try_validate_cpus_mem()
@@ -382,6 +385,11 @@ class Resources:
382
385
  accel_str = f'{accel_name}:{accel_count}'
383
386
  return accel_str
384
387
 
388
+ def get_completions(self) -> Optional[int]:
389
+ if self.job_config and self.job_config['completions']:
390
+ return int(self.job_config['completions'])
391
+ return None
392
+
385
393
  def copy(self, **override) -> 'Resources':
386
394
  """Returns a copy of the given Resources."""
387
395
  resources = Resources(
@@ -392,6 +400,7 @@ class Resources:
392
400
  disk_size=override.pop('disk_size', self.disk_size),
393
401
  image_id=override.pop('image_id', self.image_id),
394
402
  labels=override.pop('labels', self.labels),
403
+ job_config=override.pop('job_config', self.job_config),
395
404
  )
396
405
  assert len(override) == 0
397
406
  return resources
@@ -404,6 +413,13 @@ class Resources:
404
413
  config, schemas.get_resources_schema(), 'Invalid resources YAML: '
405
414
  )
406
415
 
416
+ if config.get('job_config', None):
417
+ common_utils.validate_schema(
418
+ config['job_config'],
419
+ schemas.get_job_schema(),
420
+ 'Invalid job config YAML',
421
+ )
422
+
407
423
  def _override_resources(
408
424
  base_resource_config: Dict[str, Any], override_configs: List[Dict[str, Any]]
409
425
  ) -> List[Resources]:
@@ -446,6 +462,7 @@ class Resources:
446
462
  resources_fields['disk_size'] = config.pop('disk_size', None)
447
463
  resources_fields['image_id'] = config.pop('image_id', None)
448
464
  resources_fields['labels'] = config.pop('labels', None)
465
+ resources_fields['job_config'] = config.pop('job_config', None)
449
466
 
450
467
  if resources_fields['cpus'] is not None:
451
468
  resources_fields['cpus'] = str(resources_fields['cpus'])
@@ -475,4 +492,5 @@ class Resources:
475
492
  add_if_not_none('disk_size', self.disk_size)
476
493
  add_if_not_none('image_id', self.image_id)
477
494
  add_if_not_none('labels', self.labels)
495
+ add_if_not_none('job_config', self.job_config)
478
496
  return config
konduktor/task.py CHANGED
@@ -181,8 +181,7 @@ class Task:
181
181
  """
182
182
  assert name is not None, 'Task name is required'
183
183
  self.name = name
184
- if setup is not None:
185
- raise ValueError('`setup` is being deprecated and not supported')
184
+ self.setup = setup
186
185
  self.run = run
187
186
  self.storage_mounts: Dict[str, storage_lib.Storage] = {}
188
187
  self.storage_plans: Dict[storage_lib.Storage, storage_lib.StoreType] = {}
@@ -320,6 +319,7 @@ class Task:
320
319
 
321
320
  task = Task(
322
321
  config.pop('name', None),
322
+ setup=config.pop('setup', None),
323
323
  run=config.pop('run', None),
324
324
  workdir=config.pop('workdir', None),
325
325
  num_nodes=config.pop('num_nodes', None),
@@ -1,12 +1,12 @@
1
1
  kubernetes:
2
2
  pod_config:
3
3
  metadata:
4
- {% if accelerator_type %}
5
4
  labels:
6
5
  parent: trainy
7
- trainy.ai/accelerator: {{ accelerator_type }}
8
6
  trainy.ai/username: {{ user }}
9
- {% endif %}
7
+ {% if accelerator_type %}
8
+ trainy.ai/accelerator: {{ accelerator_type }}
9
+ {% endif %}
10
10
  spec:
11
11
  restartPolicy: "Never"
12
12
  # trigger this on GPU request
@@ -15,9 +15,39 @@ kubernetes:
15
15
  - key: "nvidia.com/gpu"
16
16
  operator: "Exists"
17
17
  {% endif %}
18
+ initContainers:
19
+ - name: setup-synchronizer
20
+ image: "alpine:3.19"
21
+ restartPolicy: Always
22
+ command: ["/bin/sh", "-c"]
23
+ args:
24
+ - |
25
+ apk add --no-cache socat
26
+ wget https://raw.githubusercontent.com/asaiacai/dumb_barrier/refs/heads/main/dumb_barrier.sh
27
+ sh -x dumb_barrier.sh
28
+ volumeMounts:
29
+ - name: sync
30
+ mountPath: /tmp/konduktor
31
+ env:
32
+ - name: MASTER_ADDR
33
+ value: "{{ master_addr }}"
34
+ - name: RANK
35
+ valueFrom:
36
+ fieldRef:
37
+ fieldPath: metadata.annotations['batch.kubernetes.io/job-completion-index']
38
+ - name: WORLD_SIZE
39
+ value: "{{ num_nodes }}"
40
+ - name: MASTER_PORT
41
+ value: "11111"
42
+ - name: GO_PORT
43
+ value: "11112"
18
44
  containers:
19
45
  # TODO(asaiacai): should decide here whether we add the fabric interfaces/containers init etc.
20
46
  - name: konduktor-container
47
+ {% if enable_ssh %}
48
+ ports:
49
+ - containerPort: 2222
50
+ {% endif %}
21
51
  image: {{ image_id }}
22
52
  # this is set during jobset definition since we need to know the jobset
23
53
  # name and number of nodes to set all the environment variables correctly here
@@ -56,6 +86,18 @@ kubernetes:
56
86
  fieldRef:
57
87
  fieldPath: metadata.uid
58
88
  {% endif %}
89
+ {% if enable_ssh %}
90
+ - name: KONDUKTOR_SSHPUB
91
+ valueFrom:
92
+ secretKeyRef:
93
+ name: {{ secret_name }}
94
+ key: PUBKEY
95
+ - name: KONDUKTOR_SSHPRIV
96
+ valueFrom:
97
+ secretKeyRef:
98
+ name: {{ secret_name }}
99
+ key: PRIVKEY
100
+ {% endif %}
59
101
  # these are for compatibility with skypilot
60
102
  - name: SKYPILOT_NODE_IPS
61
103
  value: "{{ node_hostnames }}"
@@ -70,6 +112,8 @@ kubernetes:
70
112
  volumeMounts:
71
113
  - name: shared-memory
72
114
  mountPath: /dev/shm
115
+ - name: sync
116
+ mountPath: /tmp/konduktor
73
117
  {% for secret_type, secret_name in mount_secrets.items() %}
74
118
  - name: {{ secret_type }}-secret
75
119
  mountPath: /run/konduktor/{{ secret_type }}-secret
@@ -89,22 +133,22 @@ kubernetes:
89
133
 
90
134
 
91
135
  PACKAGES="";
92
- {% if 'rsync' in run_cmd %}
136
+ {% if 'rsync' in run_cmd or 'rsync' in setup_cmd %}
93
137
  PACKAGES="$PACKAGES rsync";
94
138
  {% endif %}
95
- {% if 'curl' in run_cmd or tailscale_secret %}
139
+ {% if 'curl' in run_cmd or 'curl' in setup_cmd or tailscale_secret %}
96
140
  PACKAGES="$PACKAGES curl";
97
141
  {% endif %}
98
142
  {% if 'gs' in mount_secrets or 's3' in mount_secrets %}
99
143
  PACKAGES="$PACKAGES unzip wget";
100
144
  {% endif %}
101
- {% if 'git' in run_cmd %}
145
+ {% if 'git' in run_cmd or 'git' in setup_cmd %}
102
146
  PACKAGES="$PACKAGES git";
103
147
  {% endif %}
104
148
 
105
149
  if [ ! -z "${PACKAGES}" ]; then
106
150
  # Run apt update, install missing packages
107
- DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get update > ~/.konduktor/tmp/apt-update.log 2>&1 || \
151
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get update 2>&1 | tee -a ~/.konduktor/tmp/apt-update.log 2>&1 || \
108
152
  $(prefix_cmd) echo "Warning: apt-get update failed. Continuing anyway..." >> ~/.konduktor/tmp/apt-update.log
109
153
  fi
110
154
 
@@ -125,22 +169,112 @@ kubernetes:
125
169
  done;
126
170
  if [ ! -z "$INSTALL_FIRST" ]; then
127
171
  $(prefix_cmd) echo "Installing core packages: $INSTALL_FIRST";
128
- DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $INSTALL_FIRST >> ~/.konduktor/tmp/apt-install.log;
172
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $INSTALL_FIRST 2>&1 | tee -a ~/.konduktor/tmp/apt-install.log;
129
173
  fi;
130
174
 
131
175
  if [ ! -z "$MISSING_PACKAGES" ]; then
132
176
  $(prefix_cmd) echo "Installing missing packages: $MISSING_PACKAGES";
133
- DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $MISSING_PACKAGES >> ~/.konduktor/tmp/apt-install.log;
177
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $MISSING_PACKAGES 2>&1 | tee -a ~/.konduktor/tmp/apt-install.log;
134
178
  fi;
135
179
  end_epoch=$(date +%s);
136
180
 
181
+ {% if enable_ssh %}
182
+
183
+ function InstallSSH {
184
+ export DEBIAN_FRONTEND=noninteractive
185
+ export TZ=Etc/UTC
186
+ if service sshd status > /dev/null 2>&1; then
187
+ $(prefix_cmd) echo "OpenSSH server is already started."
188
+ return
189
+ fi
190
+ # Check if OpenSSH server is already installed
191
+ if ! command -v sshd &> /dev/null; then
192
+ $(prefix_cmd) echo "OpenSSH server is not installed. Installing..."
193
+
194
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt update
195
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt install -y openssh-server
196
+
197
+ $(prefix_cmd) echo "OpenSSH server installation complete."
198
+ else
199
+ $(prefix_cmd) echo "OpenSSH server is already installed."
200
+ fi
201
+
202
+ # Set root password if SSHKEY is provided
203
+ # Enable root login in SSH configuration
204
+ $(prefix_cmd) sed -i '/^#PermitRootLogin/c\PermitRootLogin yes' /etc/ssh/sshd_config
205
+ $(prefix_cmd) sed -i '/^PermitRootLogin/c\PermitRootLogin yes' /etc/ssh/sshd_config
206
+ $(prefix_cmd) echo "Root login is enabled."
207
+
208
+ # Create the .ssh directory and authorized_keys file if they don't exist
209
+ if [ ! -d "$HOME/.ssh" ]; then
210
+ $(prefix_cmd) mkdir -p "$HOME/.ssh"
211
+ $(prefix_cmd) chmod 0700 "$HOME/.ssh"
212
+ $(prefix_cmd) echo "Directory $HOME/.ssh created."
213
+ fi
214
+ if [ ! -f "$HOME/.ssh/authorized_keys" ]; then
215
+ $(prefix_cmd) touch "$HOME/.ssh/authorized_keys"
216
+ $(prefix_cmd) chmod 0600 "$HOME/.ssh/authorized_keys"
217
+ $(prefix_cmd) echo "File $HOME/.ssh/authorized_keys created."
218
+ fi
219
+ # Check if the public key is not already present in authorized_keys
220
+ if ! grep -q "${KONDUKTOR_SSHPUB}" "$HOME/.ssh/authorized_keys"; then
221
+ # Append the public key to authorized_keys
222
+ $(prefix_cmd) echo "${KONDUKTOR_SSHPUB}" >> "$HOME/.ssh/authorized_keys"
223
+ $(prefix_cmd) echo "Public key added."
224
+ fi
225
+ if [ ! -f "$HOME/.ssh/konduktor-key" ]; then
226
+ # create the private key to authorized_keys
227
+ $(prefix_cmd) touch "$HOME/.ssh/konduktor-key"
228
+ $(prefix_cmd) chmod 0600 "$HOME/.ssh/konduktor-key"
229
+ $(prefix_cmd) echo "${KONDUKTOR_SSHPRIV}" >> "$HOME/.ssh/konduktor-key"
230
+ $(prefix_cmd) echo "private key added."
231
+ fi
232
+ if [ ! -f "$HOME/.ssh/config" ]; then
233
+ # create the private key to authorized_keys
234
+ $(prefix_cmd) touch "$HOME/.ssh/config"
235
+ $(prefix_cmd) chmod 0600 "$HOME/.ssh/config"
236
+ $(prefix_cmd) printf '\nHost *\n StrictHostKeyChecking no\n' >> "$HOME/.ssh/config"
237
+ $(prefix_cmd) echo "ssh config set"
238
+ fi
239
+
240
+ # turn off PAM to fix sshd login issue
241
+ $(prefix_cmd) sed -i 's/UsePAM yes/UsePAM no/' /etc/ssh/sshd_config
242
+
243
+ # set default port to 2222
244
+ $(prefix_cmd) sed -i 's/#Port 22/Port 2222/' /etc/ssh/sshd_config
245
+
246
+ echo "Exposing ENV variables"
247
+ env -0 | awk -v RS='\0' '
248
+ {
249
+ gsub(/\\/,"\\\\"); # escape existing backslashes first
250
+ gsub(/"/,"\\\""); # escape any double quotes
251
+ gsub(/\n/,"\\n"); # turn real newlines into the two characters \n
252
+ sub(/=/,"=\""); # open the value-quoting
253
+ print $0 "\""; # close the quote and add a newline record separator
254
+ }
255
+ ' > /etc/environment
256
+ echo "set -a; source /etc/environment; set +a;" >> /root/.bashrc
257
+
258
+ $(prefix_cmd) mkdir /run/sshd
259
+ $(prefix_cmd) chmod 0755 /run/sshd
260
+
261
+ $(prefix_cmd) service ssh start
262
+ $(prefix_cmd) echo "sshd service started"
263
+ }
264
+
265
+ InstallSSH
266
+ {% endif %}
267
+
137
268
  {% if tailscale_secret %}
138
- if ! command -v tailscale >/dev/null 2>&1; then
139
- export TS_HOSTNAME=$(echo "$POD_NAME" | sed 's/-[^-]*$//')
140
- $(prefix_cmd) curl -fsSL https://tailscale.com/install.sh | DEBIAN_FRONTEND=noninteractive $(prefix_cmd) sh >> ~/.konduktor/tmp/tailscale-install.log
141
- $(prefix_cmd) tailscaled --tun=userspace-networking >/dev/null 2>&1 &
142
- $(prefix_cmd) tailscale up --auth-key=${TS_AUTHKEY} --ssh --hostname=${TS_HOSTNAME} >/dev/null 2>&1
143
- fi
269
+ function InstallTailscale {
270
+ if ! command -v tailscale >/dev/null 2>&1; then
271
+ export TS_HOSTNAME=$(echo "$POD_NAME" | sed 's/-[^-]*$//')
272
+ $(prefix_cmd) curl -fsSL https://tailscale.com/install.sh | DEBIAN_FRONTEND=noninteractive $(prefix_cmd) sh 2>&1 | tee -a ~/.konduktor/tmp/tailscale-install.log
273
+ $(prefix_cmd) tailscaled --tun=userspace-networking >/dev/null 2>&1 &
274
+ $(prefix_cmd) tailscale up --auth-key=${TS_AUTHKEY} --ssh --hostname=${TS_HOSTNAME} >/dev/null 2>&1
275
+ fi
276
+ }
277
+ InstallTailscale &
144
278
  {% endif %}
145
279
  end_epoch=$(date +%s);
146
280
 
@@ -182,14 +316,42 @@ kubernetes:
182
316
  end_setup_time=$((end_epoch - start_setup));
183
317
  ulimit -Sc 0 && ulimit -Hc 0
184
318
  $(prefix_cmd) echo "===== KONDUKTOR: Initialization took $end_setup_time seconds ====="
185
- # run task
319
+ set +eo pipefail
186
320
  $(prefix_cmd) cd {{ remote_workdir }}
321
+ {% if setup_cmd %}
322
+ # setup task
323
+ $(prefix_cmd) echo "===== KONDUKTOR: Running setup ======="
324
+ {{ setup_cmd | indent( width=14 ) }}
325
+ {% endif %}
326
+
327
+ # synchronize workers before executing `run`
328
+ set -e
329
+ touch "/tmp/konduktor/SETUP"
330
+ # TODO(asaiacai): should we make this value tuneable for users?
331
+ TIMEOUT=3600
332
+ start_sync=$(date +%s);
333
+ DEADLINE=$(( $(date +%s) + TIMEOUT ))
334
+
335
+ echo "[KONDUKTOR: main] Waiting for workers to synchronize"
336
+ while [ ! -f "/tmp/konduktor/READY" ]; do
337
+ if [ "$(date +%s)" -ge "$DEADLINE" ]; then
338
+ echo "[KONDUKTOR: main] ERROR: Timed out after 2 minutes of waiting for worker synchronization"
339
+ exit 1
340
+ fi
341
+ sleep 0.5
342
+ done
343
+ echo "[KONDUKTOR: main] All workers have joined"
344
+ end_sync=$(date +%s);
345
+ echo "[KONDUKTOR: main] Synchronization took $((end_sync - start_sync)) seconds"
187
346
  set +eo pipefail
347
+ # run task
348
+ $(prefix_cmd) cd {{ remote_workdir }}
188
349
  $(prefix_cmd) echo "===== KONDUKTOR: Running task ====="
189
350
  start_epoch=$(date +%s);
190
351
  {{ run_cmd | indent( width=14 ) }}
191
352
  end_epoch=$(date +%s);
192
353
  exit_code=$?
354
+ set +ex
193
355
  $(prefix_cmd) echo "===== KONDUKTOR: Running task took $((end_epoch - start_epoch)) seconds and finished with exit code: $exit_code ====="
194
356
  exit $exit_code
195
357
  resources:
@@ -216,6 +378,8 @@ kubernetes:
216
378
  emptyDir:
217
379
  medium: "Memory"
218
380
  sizeLimit: 4Gi
381
+ - name: sync
382
+ emptyDir: {}
219
383
  {% for secret_type, secret_name in mount_secrets.items() %}
220
384
  - name: {{ secret_type }}-secret
221
385
  secret:
@@ -130,6 +130,10 @@ class CommandError(Exception):
130
130
  pass
131
131
 
132
132
 
133
+ class CreateSecretError(Exception):
134
+ pass
135
+
136
+
133
137
  class MissingSecretError(Exception):
134
138
  pass
135
139
 
@@ -54,7 +54,7 @@ NO_ACCELERATOR_HELP_MESSAGE = (
54
54
  '(e.g. `nvidia.com/gpu` are setup correctly. '
55
55
  )
56
56
 
57
- _K8S_CLIENT_LOCK_PATH = '~/.konduktor/k8s_client.lock'
57
+ _K8S_CLIENT_LOCK_PATH = os.path.expanduser('~/.konduktor/k8s_client.lock')
58
58
  _K8s_CLIENT_LOCK = filelock.FileLock(_K8S_CLIENT_LOCK_PATH)
59
59
 
60
60
  logger = logging.get_logger(__name__)
@@ -578,11 +578,14 @@ def set_secret(
578
578
  secret_name: str,
579
579
  namespace: str,
580
580
  context: Optional[str],
581
- secret_key: str,
582
- secret_value: str,
581
+ data: Dict[str, str],
583
582
  ) -> Tuple[bool, Optional[str]]:
584
583
  """
585
584
  Create/update a secret in a namespace. Values are encoded to base64.
585
+ `secret` must be base64 encoded ie
586
+ ```
587
+ base64.b64encode(secret).decode()
588
+ ```
586
589
  """
587
590
  with _K8s_CLIENT_LOCK:
588
591
  secret_exists, response = check_secret_exists(
@@ -598,7 +601,7 @@ def set_secret(
598
601
  secret = kubernetes.client.V1Secret(
599
602
  metadata=kubernetes.client.V1ObjectMeta(**secret_metadata),
600
603
  type='Opaque',
601
- data={secret_key: secret_value},
604
+ data=data,
602
605
  )
603
606
 
604
607
  try:
@@ -67,7 +67,8 @@ def tail_loki_logs_ws(
67
67
  logger.debug(f'Loki URL: {loki_url}')
68
68
  params = {
69
69
  'query': urllib.parse.quote(
70
- f'{{k8s_job_name="{job_name}-workers-0"}} '
70
+ r'{' + f'k8s_job_name="{job_name}-workers-0",'
71
+ r' k8s_container_name="konduktor-container"} '
71
72
  f' | batch_kubernetes_io_job_completion_index = `{worker_id}`'
72
73
  ),
73
74
  'limit': num_logs,
@@ -87,6 +87,7 @@ def _get_single_resources_schema():
87
87
  '_cluster_config_overrides': {
88
88
  'type': 'object',
89
89
  },
90
+ 'job_config': {'type': 'object'},
90
91
  },
91
92
  }
92
93
 
@@ -153,8 +154,6 @@ def get_resources_schema():
153
154
  'items': multi_resources_schema,
154
155
  },
155
156
  },
156
- # Avoid job_recovery and spot_recovery being present at the same time.
157
- **_check_not_both_fields_present('job_recovery', 'spot_recovery'),
158
157
  }
159
158
 
160
159
 
@@ -337,84 +336,6 @@ def get_cluster_schema():
337
336
  }
338
337
 
339
338
 
340
- _NETWORK_CONFIG_SCHEMA = {
341
- 'vpc_name': {
342
- 'oneOf': [
343
- {
344
- 'type': 'string',
345
- },
346
- {
347
- 'type': 'null',
348
- },
349
- ],
350
- },
351
- 'use_internal_ips': {
352
- 'type': 'boolean',
353
- },
354
- 'ssh_proxy_command': {
355
- 'oneOf': [
356
- {
357
- 'type': 'string',
358
- },
359
- {
360
- 'type': 'null',
361
- },
362
- {
363
- 'type': 'object',
364
- 'required': [],
365
- 'additionalProperties': {
366
- 'anyOf': [
367
- {'type': 'string'},
368
- {'type': 'null'},
369
- ]
370
- },
371
- },
372
- ]
373
- },
374
- }
375
-
376
- _LABELS_SCHEMA = {
377
- # Deprecated: 'instance_tags' is replaced by 'labels'. Keeping for backward
378
- # compatibility. Will be removed after 0.8.0.
379
- 'instance_tags': {
380
- 'type': 'object',
381
- 'required': [],
382
- 'additionalProperties': {
383
- 'type': 'string',
384
- },
385
- },
386
- 'labels': {
387
- 'type': 'object',
388
- 'required': [],
389
- 'additionalProperties': {
390
- 'type': 'string',
391
- },
392
- },
393
- }
394
-
395
- _PRORPERTY_NAME_OR_CLUSTER_NAME_TO_PROPERTY = {
396
- 'oneOf': [
397
- {'type': 'string'},
398
- {
399
- # A list of single-element dict to pretain the
400
- # order.
401
- # Example:
402
- # property_name:
403
- # - my-cluster1-*: my-property-1
404
- # - my-cluster2-*: my-property-2
405
- # - "*"": my-property-3
406
- 'type': 'array',
407
- 'items': {
408
- 'type': 'object',
409
- 'additionalProperties': {'type': 'string'},
410
- 'maxProperties': 1,
411
- 'minProperties': 1,
412
- },
413
- },
414
- ]
415
- }
416
-
417
-
418
339
  class RemoteIdentityOptions(enum.Enum):
419
340
  """Enum for remote identity types.
420
341
 
@@ -454,9 +375,8 @@ _REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
454
375
 
455
376
  def get_storage_schema():
456
377
  # pylint: disable=import-outside-toplevel
457
- from knoduktor.registry import registry
458
-
459
378
  from konduktor.data import storage
379
+ from konduktor.registry import registry
460
380
 
461
381
  return {
462
382
  '$schema': 'https://json-schema.org/draft/2020-12/schema',
@@ -496,6 +416,21 @@ def get_storage_schema():
496
416
  }
497
417
 
498
418
 
419
+ def get_job_schema():
420
+ """Schema for a job spec, which is defined under resources."""
421
+ return {
422
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
423
+ 'type': 'object',
424
+ 'required': [],
425
+ 'additionalProperties': False,
426
+ 'properties': {
427
+ 'completions': {
428
+ 'type': 'number',
429
+ },
430
+ },
431
+ }
432
+
433
+
499
434
  def get_config_schema():
500
435
  # pylint: disable=import-outside-toplevel
501
436
  from konduktor.data import registry
@@ -574,6 +509,17 @@ def get_config_schema():
574
509
  },
575
510
  }
576
511
 
512
+ ssh_configs = {
513
+ 'type': 'object',
514
+ 'required': [],
515
+ 'additionalProperties': False,
516
+ 'properties': {
517
+ 'enable': {
518
+ 'type': 'boolean',
519
+ },
520
+ },
521
+ }
522
+
577
523
  for cloud, config in cloud_configs.items():
578
524
  if cloud == 'kubernetes':
579
525
  config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
@@ -589,6 +535,7 @@ def get_config_schema():
589
535
  'nvidia_gpus': gpu_configs,
590
536
  'allowed_clouds': allowed_clouds,
591
537
  'tailscale': tailscale_configs,
538
+ 'ssh': ssh_configs,
592
539
  **cloud_configs,
593
540
  },
594
541
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250512104920
3
+ Version: 0.1.0.dev20250514104854
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,12 +1,13 @@
1
- konduktor/__init__.py,sha256=FMLSHfj8jo627JKX7MyYFZH99jqw1EKMC7kpIpPgAPU,1540
1
+ konduktor/__init__.py,sha256=ODIjRocI7dlxyMFYh5S2VYJai-4MWm98MSyiSzaGDbA,1540
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=uTdpKvgBSwYMmynx9wR5kiZQyTrdaw9ZI4KH6Z2E5Hw,4296
5
5
  konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,4102
6
+ konduktor/authentication.py,sha256=jhw_virbyvrY_9WZqOXX3LyOP_HkpfmTssWUMxJVyBg,4564
6
7
  konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
7
8
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
8
- konduktor/backends/jobset.py,sha256=veptYGXtk-ugWxBsBV5SnqI4rGKOlGfm_N3wApvNhSQ,8326
9
- konduktor/backends/jobset_utils.py,sha256=B0N0sx-pWF9_CDeuSXAU4nm3ZIwroyVcq6aUAlNZZRs,18376
9
+ konduktor/backends/jobset.py,sha256=UdhwAuZODLMbLY51Y2zOBsh6wg4Pb84oHVvUKzx3Z2w,8434
10
+ konduktor/backends/jobset_utils.py,sha256=4vMYOhTENfBL9khzFuj69-Vy4g0sBkUpXX-1bfPnVys,20054
10
11
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
11
12
  konduktor/cli.py,sha256=Ii9-2mrc-1f2ksLasA-xRb-JnEi_9ZeCXZ3lJ1GG8H8,23515
12
13
  konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
@@ -50,12 +51,12 @@ konduktor/dashboard/frontend/server.js,sha256=jcp6_Ww9YJD3uKY07jR3KMlAM6n1QZdxZn
50
51
  konduktor/dashboard/frontend/tailwind.config.js,sha256=fCnc48wvioIDOe5ldQ_6RE7F76cP7aU7pDrxBPJx-Fk,366
51
52
  konduktor/data/__init__.py,sha256=KMR2i3E9YcIpiIuCxtRdS7BQ1w2vUAbbve7agziJrLo,213
52
53
  konduktor/data/aws/__init__.py,sha256=_6zWfNNAK1QGgyKqg_yPYWcXlnffchyvIMErYa6tw_U,331
53
- konduktor/data/aws/s3.py,sha256=2hvbgZ9NuwXY88blxfdjSbONSXcyWF0CtheDZkMYorQ,48296
54
+ konduktor/data/aws/s3.py,sha256=T4FnCxilNp35bsgmE7j5O3j15FVbgWRdUH8YFXCiwSw,48335
54
55
  konduktor/data/constants.py,sha256=yXVEoTI2we1xOjVSU-bjRCQCLpVvpEvJ0GedXvSwEfw,127
55
56
  konduktor/data/data_utils.py,sha256=yrnu8_cY63TXqfWfFG3yqY2w_tE9UQK9jIQAFQCDVg0,9668
56
57
  konduktor/data/gcp/__init__.py,sha256=rlQxACBC_Vu36mdgPyJgUy4mGc_6Nt_a96JAuaPz2pQ,489
57
58
  konduktor/data/gcp/constants.py,sha256=dMfOiFccM8O6rUi9kClJcbvw1K1VnS1JzzQk3apq8ho,1483
58
- konduktor/data/gcp/gcs.py,sha256=kDbUzf8ALYzsw_G3sBRn_enQ8fjI-UKV0jeWuFZiULA,42018
59
+ konduktor/data/gcp/gcs.py,sha256=nqhCvQuGpHFPoxT5SKgxL25KtZuSg377Nh1bICiQwlc,42057
59
60
  konduktor/data/gcp/utils.py,sha256=FJQcMXZqtMIzjZ98b3lTTc0UbdPUKTDLsOsfJaaH5-s,214
60
61
  konduktor/data/registry.py,sha256=CUbMsN_Q17Pf4wRHkqZrycErEjTP7cLEdgcfwVGcEpc,696
61
62
  konduktor/data/storage.py,sha256=SDKRWDd7PCT9ytuz4cH0CejZj5QmWG_EZhUMVoTzWsc,35308
@@ -67,10 +68,10 @@ konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4
67
68
  konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
68
69
  konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
69
70
  konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
70
- konduktor/resource.py,sha256=68z8gC8Ivqktwv0R6ylMn9ZNocgkcRT0yIRGGKOdwcM,18491
71
- konduktor/task.py,sha256=Vu1TzYtLvSBz-HyHY2gsM2cMcUhMNQu44L3CWmYRXKE,35232
71
+ konduktor/resource.py,sha256=w2PdIrmQaJWA-GLSmVBcg4lxwuxvPulz35_YSKa5o24,19254
72
+ konduktor/task.py,sha256=2JOHRS4JE2FdN-M3qZKhII1hkUvWHbreNtkf30Mo2lo,35196
72
73
  konduktor/templates/jobset.yaml.j2,sha256=onYiHtXAgk-XBtji994hPu_g0hxnLzvmfxwjbdKdeZc,960
73
- konduktor/templates/pod.yaml.j2,sha256=s3eECjLevUWR-zvyeI8WjQWxQYJh_AMk1tdQVGNXpEM,9835
74
+ konduktor/templates/pod.yaml.j2,sha256=AobmCpvXRnZuQjfT000vN72Nuk380CCmWPHC_BVrUhM,17161
74
75
  konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
76
  konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
76
77
  konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -80,18 +81,18 @@ konduktor/utils/base64_utils.py,sha256=mF-Tw98mFRG70YE4w6s9feuQSCYZHOb8YatBZwMug
80
81
  konduktor/utils/common_utils.py,sha256=F5x7k4AdBB44u8PYRkaugORnZKnK3JLqGn1jHOKgUYo,14960
81
82
  konduktor/utils/constants.py,sha256=1DneiTR21lvKUcWdBGwC4I4fD4uPjbjLUilEnJS7rzA,216
82
83
  konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4xc,2258
83
- konduktor/utils/exceptions.py,sha256=IHyaP5ERZpPvWZeKWV3MVTyKsxo2Fq-13nhI0PRNQzk,6629
84
+ konduktor/utils/exceptions.py,sha256=bOYHk3SHR3XO__p9bPwVPz8g9k6weIRxGRFNkyzgZOA,6676
84
85
  konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
85
- konduktor/utils/kubernetes_utils.py,sha256=ivFVh90Gez19_JD5U4bgCO5zNtQUflF0hJsM5nZLj8A,23864
86
+ konduktor/utils/kubernetes_utils.py,sha256=1MZHwU4vy-exA4TA5_oTiV-zm1A2ayfeA0T_75DMFM8,23937
86
87
  konduktor/utils/log_utils.py,sha256=lgHCq4OdtJNfbpso-uYGONUCVNsUrUkUWjROarsHt6s,9897
87
- konduktor/utils/loki_utils.py,sha256=ND1pbbbFhLhLKw3870j44LpR_9MB0EkDJSs5K7nWdY4,3473
88
+ konduktor/utils/loki_utils.py,sha256=h2ZvZQr1nE_wXXsKsGMjhG2s2MXknNd4icydTR_ruKU,3539
88
89
  konduktor/utils/rich_utils.py,sha256=kdjNe6S2LlpOxyzhFHqMzCz7g4ROC4e7TPWgcbRsrQE,3577
89
- konduktor/utils/schemas.py,sha256=_VCWnsSgyP3u5cpACEmJeuqcy5mzu_fr0McHyZdiXd8,17757
90
+ konduktor/utils/schemas.py,sha256=2fHsTi3t9q3LXqOPrcpkmPsMbaoJBnuJstd6ULmDiUo,16455
90
91
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
91
92
  konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
92
93
  konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
93
- konduktor_nightly-0.1.0.dev20250512104920.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
94
- konduktor_nightly-0.1.0.dev20250512104920.dist-info/METADATA,sha256=Ega2hwBGc5bYNmkhPKQdMzLWCO1ru_UcvdczmGosZBE,4366
95
- konduktor_nightly-0.1.0.dev20250512104920.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
96
- konduktor_nightly-0.1.0.dev20250512104920.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
97
- konduktor_nightly-0.1.0.dev20250512104920.dist-info/RECORD,,
94
+ konduktor_nightly-0.1.0.dev20250514104854.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
95
+ konduktor_nightly-0.1.0.dev20250514104854.dist-info/METADATA,sha256=ErMUfOWxJPkbM0by718uNtBgUv-2w7m5sqFzJ_cHc64,4366
96
+ konduktor_nightly-0.1.0.dev20250514104854.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
97
+ konduktor_nightly-0.1.0.dev20250514104854.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
98
+ konduktor_nightly-0.1.0.dev20250514104854.dist-info/RECORD,,