konduktor-nightly 0.1.0.dev20250430104745__py3-none-any.whl → 0.1.0.dev20250502104736__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/backends/jobset_utils.py +25 -3
- konduktor/templates/pod.yaml.j2 +34 -5
- konduktor/utils/exceptions.py +4 -0
- konduktor/utils/schemas.py +12 -0
- konduktor/utils/ux_utils.py +6 -0
- {konduktor_nightly-0.1.0.dev20250430104745.dist-info → konduktor_nightly-0.1.0.dev20250502104736.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250430104745.dist-info → konduktor_nightly-0.1.0.dev20250502104736.dist-info}/RECORD +11 -11
- {konduktor_nightly-0.1.0.dev20250430104745.dist-info → konduktor_nightly-0.1.0.dev20250502104736.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250430104745.dist-info → konduktor_nightly-0.1.0.dev20250502104736.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250430104745.dist-info → konduktor_nightly-0.1.0.dev20250502104736.dist-info}/entry_points.txt +0 -0
konduktor/__init__.py
CHANGED
@@ -14,7 +14,7 @@ __all__ = [
|
|
14
14
|
]
|
15
15
|
|
16
16
|
# Replaced with the current commit when building the wheels.
|
17
|
-
_KONDUKTOR_COMMIT_SHA = '
|
17
|
+
_KONDUKTOR_COMMIT_SHA = '8035a53e2bd3595665be51149e8287dc721d5edb'
|
18
18
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
19
19
|
|
20
20
|
|
@@ -48,5 +48,5 @@ def _get_git_commit():
|
|
48
48
|
|
49
49
|
|
50
50
|
__commit__ = _get_git_commit()
|
51
|
-
__version__ = '1.0.0.dev0.1.0.
|
51
|
+
__version__ = '1.0.0.dev0.1.0.dev20250502104736'
|
52
52
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
@@ -15,9 +15,15 @@ if typing.TYPE_CHECKING:
|
|
15
15
|
from datetime import timedelta
|
16
16
|
|
17
17
|
import konduktor
|
18
|
-
from konduktor import constants, kube_client, logging
|
18
|
+
from konduktor import config, constants, kube_client, logging
|
19
19
|
from konduktor.data import registry
|
20
|
-
from konduktor.utils import
|
20
|
+
from konduktor.utils import (
|
21
|
+
common_utils,
|
22
|
+
exceptions,
|
23
|
+
kubernetes_utils,
|
24
|
+
log_utils,
|
25
|
+
ux_utils,
|
26
|
+
)
|
21
27
|
|
22
28
|
if typing.TYPE_CHECKING:
|
23
29
|
pass
|
@@ -42,7 +48,7 @@ _JOBSET_METADATA_LABELS = {
|
|
42
48
|
'jobset_accelerator_label': JOBSET_ACCELERATOR_LABEL,
|
43
49
|
'jobset_num_accelerators_label': JOBSET_NUM_ACCELERATORS_LABEL,
|
44
50
|
}
|
45
|
-
|
51
|
+
|
46
52
|
_RUN_DURATION_ANNOTATION_KEY = 'kueue.x-k8s.io/maxRunDurationSeconds'
|
47
53
|
|
48
54
|
|
@@ -130,6 +136,20 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
|
|
130
136
|
storage_secrets[store_scheme] = cloud_store._STORE.get_k8s_credential_name()
|
131
137
|
sync_commands.append(cloud_store.make_sync_file_command(src, dst))
|
132
138
|
|
139
|
+
tailscale_secret = config.get_nested(('tailscale', 'secret_name'), None)
|
140
|
+
if tailscale_secret:
|
141
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
142
|
+
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
143
|
+
secret_exist, err = kubernetes_utils.check_secret_exists(
|
144
|
+
tailscale_secret, namespace, context
|
145
|
+
)
|
146
|
+
if not secret_exist:
|
147
|
+
with ux_utils.print_exception_no_traceback():
|
148
|
+
raise exceptions.MissingSecretError(
|
149
|
+
f'No tailscale auth-key secret `{tailscale_secret}` found even '
|
150
|
+
f'though specified by `tailscale.secret_name`: {err}'
|
151
|
+
)
|
152
|
+
|
133
153
|
assert task.resources is not None, 'Task resources are required'
|
134
154
|
assert task.resources.cpus is not None, 'Task resources cpus are required'
|
135
155
|
assert task.resources.memory is not None, 'Task resources memory are required'
|
@@ -154,6 +174,8 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
|
|
154
174
|
'mount_secrets': storage_secrets,
|
155
175
|
'remote_workdir': constants.KONDUKTOR_REMOTE_WORKDIR,
|
156
176
|
'user': common_utils.get_cleaned_username(),
|
177
|
+
# Tailscale credentials
|
178
|
+
'tailscale_secret': tailscale_secret,
|
157
179
|
},
|
158
180
|
temp.name,
|
159
181
|
)
|
konduktor/templates/pod.yaml.j2
CHANGED
@@ -16,8 +16,7 @@ kubernetes:
|
|
16
16
|
operator: "Exists"
|
17
17
|
{% endif %}
|
18
18
|
containers:
|
19
|
-
# TODO(asaiacai): should decide here whether we add the fabric interfaces/containers init etc.
|
20
|
-
# TODO(asaiacai): add ulimits
|
19
|
+
# TODO(asaiacai): should decide here whether we add the fabric interfaces/containers init etc.
|
21
20
|
- name: konduktor-container
|
22
21
|
image: {{ image_id }}
|
23
22
|
# this is set during jobset definition since we need to know the jobset
|
@@ -39,6 +38,24 @@ kubernetes:
|
|
39
38
|
value: "{{ num_nodes }}"
|
40
39
|
- name: NUM_GPUS_PER_NODE
|
41
40
|
value: "{{ num_gpus }}"
|
41
|
+
{% if tailscale_secret %}
|
42
|
+
- name: TS_USERSPACE
|
43
|
+
value: "true"
|
44
|
+
- name: TS_AUTHKEY
|
45
|
+
valueFrom:
|
46
|
+
secretKeyRef:
|
47
|
+
name: {{ tailscale_secret }}
|
48
|
+
key: TS_AUTHKEY
|
49
|
+
optional: true
|
50
|
+
- name: POD_NAME
|
51
|
+
valueFrom:
|
52
|
+
fieldRef:
|
53
|
+
fieldPath: metadata.name
|
54
|
+
- name: POD_UID
|
55
|
+
valueFrom:
|
56
|
+
fieldRef:
|
57
|
+
fieldPath: metadata.uid
|
58
|
+
{% endif %}
|
42
59
|
# these are for compatibility with skypilot
|
43
60
|
- name: SKYPILOT_NODE_IPS
|
44
61
|
value: "{{ node_hostnames }}"
|
@@ -58,7 +75,6 @@ kubernetes:
|
|
58
75
|
mountPath: /run/konduktor/{{ secret_type }}-secret
|
59
76
|
{% endfor %}
|
60
77
|
command: ["bash", "-c"]
|
61
|
-
# TODO(asaiacai): should we just mount this as a configmap instead? - Edit: probably not
|
62
78
|
args:
|
63
79
|
- |
|
64
80
|
# TODO(asaiacai): add debug environment variable for printing the apt-update, apt-install, sync-files output
|
@@ -76,7 +92,7 @@ kubernetes:
|
|
76
92
|
{% if 'rsync' in run_cmd %}
|
77
93
|
PACKAGES="$PACKAGES rsync";
|
78
94
|
{% endif %}
|
79
|
-
{% if 'curl' in run_cmd %}
|
95
|
+
{% if 'curl' in run_cmd or tailscale_secret %}
|
80
96
|
PACKAGES="$PACKAGES curl";
|
81
97
|
{% endif %}
|
82
98
|
{% if 'gs' in mount_secrets or 's3' in mount_secrets %}
|
@@ -117,6 +133,17 @@ kubernetes:
|
|
117
133
|
DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get install -y $MISSING_PACKAGES >> ~/.konduktor/tmp/apt-install.log;
|
118
134
|
fi;
|
119
135
|
end_epoch=$(date +%s);
|
136
|
+
|
137
|
+
{% if tailscale_secret %}
|
138
|
+
if ! command -v tailscale >/dev/null 2>&1; then
|
139
|
+
export TS_HOSTNAME=$(echo "$POD_NAME" | sed 's/-[^-]*$//')
|
140
|
+
$(prefix_cmd) curl -fsSL https://tailscale.com/install.sh | DEBIAN_FRONTEND=noninteractive $(prefix_cmd) sh >> ~/.konduktor/tmp/tailscale-install.log
|
141
|
+
$(prefix_cmd) tailscaled --tun=userspace-networking >/dev/null 2>&1 &
|
142
|
+
$(prefix_cmd) tailscale up --auth-key=${TS_AUTHKEY} --ssh --hostname=${TS_HOSTNAME} >/dev/null 2>&1
|
143
|
+
fi
|
144
|
+
{% endif %}
|
145
|
+
end_epoch=$(date +%s);
|
146
|
+
|
120
147
|
$(prefix_cmd) echo "===== KONDUKTOR: Installing packages took $((end_epoch - start_epoch)) seconds ====="
|
121
148
|
|
122
149
|
# unpack secrets credentials
|
@@ -153,6 +180,7 @@ kubernetes:
|
|
153
180
|
{% endif %}
|
154
181
|
end_epoch=$(date +%s);
|
155
182
|
end_setup_time=$((end_epoch - start_setup));
|
183
|
+
ulimit -Sc 0 && ulimit -Hc 0
|
156
184
|
$(prefix_cmd) echo "===== KONDUKTOR: Initialization took $end_setup_time seconds ====="
|
157
185
|
# run task
|
158
186
|
$(prefix_cmd) cd {{ remote_workdir }}
|
@@ -180,7 +208,8 @@ kubernetes:
|
|
180
208
|
{% endif %}
|
181
209
|
securityContext:
|
182
210
|
capabilities:
|
183
|
-
add:
|
211
|
+
add:
|
212
|
+
- "IPC_LOCK" # May be needed for memlock
|
184
213
|
|
185
214
|
volumes:
|
186
215
|
- name: shared-memory
|
konduktor/utils/exceptions.py
CHANGED
konduktor/utils/schemas.py
CHANGED
@@ -563,6 +563,17 @@ def get_config_schema():
|
|
563
563
|
},
|
564
564
|
}
|
565
565
|
|
566
|
+
tailscale_configs = {
|
567
|
+
'type': 'object',
|
568
|
+
'required': [],
|
569
|
+
'additionalProperties': False,
|
570
|
+
'properties': {
|
571
|
+
'secret_name': {
|
572
|
+
'type': 'string',
|
573
|
+
},
|
574
|
+
},
|
575
|
+
}
|
576
|
+
|
566
577
|
for cloud, config in cloud_configs.items():
|
567
578
|
if cloud == 'kubernetes':
|
568
579
|
config['properties'].update(_REMOTE_IDENTITY_SCHEMA_KUBERNETES)
|
@@ -577,6 +588,7 @@ def get_config_schema():
|
|
577
588
|
'admin_policy': admin_policy_schema,
|
578
589
|
'nvidia_gpus': gpu_configs,
|
579
590
|
'allowed_clouds': allowed_clouds,
|
591
|
+
'tailscale': tailscale_configs,
|
580
592
|
**cloud_configs,
|
581
593
|
},
|
582
594
|
}
|
konduktor/utils/ux_utils.py
CHANGED
@@ -11,6 +11,7 @@ from typing import Callable, Optional, Union
|
|
11
11
|
import colorama
|
12
12
|
import rich.console as rich_console
|
13
13
|
|
14
|
+
from konduktor import config
|
14
15
|
from konduktor import logging as konduktor_logging
|
15
16
|
|
16
17
|
if typing.TYPE_CHECKING:
|
@@ -211,6 +212,11 @@ def command_hint_messages(hint_type: CommandHintType, job_id: str) -> str:
|
|
211
212
|
)
|
212
213
|
hint_str = '\n📋 Useful Commands'
|
213
214
|
hint_str += f'{job_hint_str}'
|
215
|
+
if config.get_nested(('tailscale', 'secret_name'), None) is not None:
|
216
|
+
hint_str += (
|
217
|
+
f'\n{INDENT_SYMBOL}To tailscale ssh:\t\t'
|
218
|
+
f'{BOLD}ssh root@{job_id}-workers-0-0 {RESET_BOLD}'
|
219
|
+
)
|
214
220
|
return hint_str
|
215
221
|
else:
|
216
222
|
raise ValueError(f'Invalid hint type: {hint_type}')
|
@@ -1,4 +1,4 @@
|
|
1
|
-
konduktor/__init__.py,sha256=
|
1
|
+
konduktor/__init__.py,sha256=jMTLcTV0GSBv8eb57oCZaR9dTjrcnVas0dkuTq_7HAg,1540
|
2
2
|
konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
|
4
4
|
konduktor/adaptors/common.py,sha256=uTdpKvgBSwYMmynx9wR5kiZQyTrdaw9ZI4KH6Z2E5Hw,4296
|
@@ -6,7 +6,7 @@ konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,410
|
|
6
6
|
konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
|
7
7
|
konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
|
8
8
|
konduktor/backends/jobset.py,sha256=veptYGXtk-ugWxBsBV5SnqI4rGKOlGfm_N3wApvNhSQ,8326
|
9
|
-
konduktor/backends/jobset_utils.py,sha256=
|
9
|
+
konduktor/backends/jobset_utils.py,sha256=B0N0sx-pWF9_CDeuSXAU4nm3ZIwroyVcq6aUAlNZZRs,18376
|
10
10
|
konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
|
11
11
|
konduktor/cli.py,sha256=Ii9-2mrc-1f2ksLasA-xRb-JnEi_9ZeCXZ3lJ1GG8H8,23515
|
12
12
|
konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
|
@@ -70,7 +70,7 @@ konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw
|
|
70
70
|
konduktor/resource.py,sha256=68z8gC8Ivqktwv0R6ylMn9ZNocgkcRT0yIRGGKOdwcM,18491
|
71
71
|
konduktor/task.py,sha256=Vu1TzYtLvSBz-HyHY2gsM2cMcUhMNQu44L3CWmYRXKE,35232
|
72
72
|
konduktor/templates/jobset.yaml.j2,sha256=onYiHtXAgk-XBtji994hPu_g0hxnLzvmfxwjbdKdeZc,960
|
73
|
-
konduktor/templates/pod.yaml.j2,sha256=
|
73
|
+
konduktor/templates/pod.yaml.j2,sha256=s3eECjLevUWR-zvyeI8WjQWxQYJh_AMk1tdQVGNXpEM,9835
|
74
74
|
konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
75
75
|
konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
|
76
76
|
konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -80,18 +80,18 @@ konduktor/utils/base64_utils.py,sha256=mF-Tw98mFRG70YE4w6s9feuQSCYZHOb8YatBZwMug
|
|
80
80
|
konduktor/utils/common_utils.py,sha256=F5x7k4AdBB44u8PYRkaugORnZKnK3JLqGn1jHOKgUYo,14960
|
81
81
|
konduktor/utils/constants.py,sha256=1DneiTR21lvKUcWdBGwC4I4fD4uPjbjLUilEnJS7rzA,216
|
82
82
|
konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4xc,2258
|
83
|
-
konduktor/utils/exceptions.py,sha256=
|
83
|
+
konduktor/utils/exceptions.py,sha256=IHyaP5ERZpPvWZeKWV3MVTyKsxo2Fq-13nhI0PRNQzk,6629
|
84
84
|
konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
|
85
85
|
konduktor/utils/kubernetes_utils.py,sha256=ivFVh90Gez19_JD5U4bgCO5zNtQUflF0hJsM5nZLj8A,23864
|
86
86
|
konduktor/utils/log_utils.py,sha256=lgHCq4OdtJNfbpso-uYGONUCVNsUrUkUWjROarsHt6s,9897
|
87
87
|
konduktor/utils/loki_utils.py,sha256=ND1pbbbFhLhLKw3870j44LpR_9MB0EkDJSs5K7nWdY4,3473
|
88
88
|
konduktor/utils/rich_utils.py,sha256=kdjNe6S2LlpOxyzhFHqMzCz7g4ROC4e7TPWgcbRsrQE,3577
|
89
|
-
konduktor/utils/schemas.py,sha256=
|
89
|
+
konduktor/utils/schemas.py,sha256=_VCWnsSgyP3u5cpACEmJeuqcy5mzu_fr0McHyZdiXd8,17757
|
90
90
|
konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
|
91
|
-
konduktor/utils/ux_utils.py,sha256=
|
91
|
+
konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
|
92
92
|
konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
|
93
|
-
konduktor_nightly-0.1.0.
|
94
|
-
konduktor_nightly-0.1.0.
|
95
|
-
konduktor_nightly-0.1.0.
|
96
|
-
konduktor_nightly-0.1.0.
|
97
|
-
konduktor_nightly-0.1.0.
|
93
|
+
konduktor_nightly-0.1.0.dev20250502104736.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
|
94
|
+
konduktor_nightly-0.1.0.dev20250502104736.dist-info/METADATA,sha256=smj8TlY9k4hFEtZNDLlnl5W-b8UjtFepi-iTqGiuI9o,4366
|
95
|
+
konduktor_nightly-0.1.0.dev20250502104736.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
96
|
+
konduktor_nightly-0.1.0.dev20250502104736.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
|
97
|
+
konduktor_nightly-0.1.0.dev20250502104736.dist-info/RECORD,,
|
File without changes
|
File without changes
|