konduktor-nightly 0.1.0.dev20250519104943__py3-none-any.whl → 0.1.0.dev20250521104900__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/templates/pod.yaml.j2 +0 -59
- {konduktor_nightly-0.1.0.dev20250519104943.dist-info → konduktor_nightly-0.1.0.dev20250521104900.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250519104943.dist-info → konduktor_nightly-0.1.0.dev20250521104900.dist-info}/RECORD +7 -7
- {konduktor_nightly-0.1.0.dev20250519104943.dist-info → konduktor_nightly-0.1.0.dev20250521104900.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250519104943.dist-info → konduktor_nightly-0.1.0.dev20250521104900.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250519104943.dist-info → konduktor_nightly-0.1.0.dev20250521104900.dist-info}/entry_points.txt +0 -0
konduktor/__init__.py
CHANGED
@@ -14,7 +14,7 @@ __all__ = [
|
|
14
14
|
]
|
15
15
|
|
16
16
|
# Replaced with the current commit when building the wheels.
|
17
|
-
_KONDUKTOR_COMMIT_SHA = '
|
17
|
+
_KONDUKTOR_COMMIT_SHA = '46626c8c0df4bbbd5a8fc164b2d2f66c26dbdd33'
|
18
18
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
19
19
|
|
20
20
|
|
@@ -48,5 +48,5 @@ def _get_git_commit():
|
|
48
48
|
|
49
49
|
|
50
50
|
__commit__ = _get_git_commit()
|
51
|
-
__version__ = '1.0.0.dev0.1.0.
|
51
|
+
__version__ = '1.0.0.dev0.1.0.dev20250521104900'
|
52
52
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
konduktor/templates/pod.yaml.j2
CHANGED
@@ -15,36 +15,6 @@ kubernetes:
|
|
15
15
|
- key: "nvidia.com/gpu"
|
16
16
|
operator: "Exists"
|
17
17
|
{% endif %}
|
18
|
-
initContainers:
|
19
|
-
- name: setup-synchronizer
|
20
|
-
image: "alpine:3.19"
|
21
|
-
restartPolicy: Always
|
22
|
-
command: ["/bin/sh", "-c"]
|
23
|
-
args:
|
24
|
-
- |
|
25
|
-
apk add --no-cache socat
|
26
|
-
wget https://raw.githubusercontent.com/asaiacai/dumb_barrier/refs/heads/main/dumb_barrier.sh
|
27
|
-
sh -x dumb_barrier.sh
|
28
|
-
volumeMounts:
|
29
|
-
- name: sync
|
30
|
-
mountPath: /tmp/konduktor
|
31
|
-
env:
|
32
|
-
- name: MASTER_ADDR
|
33
|
-
value: "{{ master_addr }}"
|
34
|
-
- name: RANK
|
35
|
-
valueFrom:
|
36
|
-
fieldRef:
|
37
|
-
fieldPath: metadata.annotations['batch.kubernetes.io/job-completion-index']
|
38
|
-
- name: WORLD_SIZE
|
39
|
-
value: "{{ num_nodes }}"
|
40
|
-
- name: MASTER_PORT
|
41
|
-
value: "11111"
|
42
|
-
- name: GO_PORT
|
43
|
-
value: "11112"
|
44
|
-
- name: POD_NAMESPACE
|
45
|
-
valueFrom:
|
46
|
-
fieldRef:
|
47
|
-
fieldPath: metadata.namespace
|
48
18
|
containers:
|
49
19
|
# TODO(asaiacai): should decide here whether we add the fabric interfaces/containers init etc.
|
50
20
|
- name: konduktor-container
|
@@ -327,33 +297,6 @@ kubernetes:
|
|
327
297
|
ulimit -Sc 0 && ulimit -Hc 0
|
328
298
|
$(prefix_cmd) echo "===== KONDUKTOR: Initialization took $end_setup_time seconds ====="
|
329
299
|
set +eo pipefail
|
330
|
-
$(prefix_cmd) cd {{ remote_workdir }}
|
331
|
-
{% if setup_cmd %}
|
332
|
-
# setup task
|
333
|
-
$(prefix_cmd) echo "===== KONDUKTOR: Running setup ======="
|
334
|
-
{{ setup_cmd | indent( width=14 ) }}
|
335
|
-
{% endif %}
|
336
|
-
|
337
|
-
# synchronize workers before executing `run`
|
338
|
-
set -e
|
339
|
-
touch "/tmp/konduktor/SETUP"
|
340
|
-
# TODO(asaiacai): should we make this value tuneable for users?
|
341
|
-
TIMEOUT=300
|
342
|
-
start_sync=$(date +%s);
|
343
|
-
DEADLINE=$(( $(date +%s) + TIMEOUT ))
|
344
|
-
|
345
|
-
echo "[KONDUKTOR: main] Waiting for workers to synchronize"
|
346
|
-
while [ ! -f "/tmp/konduktor/READY" ]; do
|
347
|
-
if [ "$(date +%s)" -ge "$DEADLINE" ]; then
|
348
|
-
echo "[KONDUKTOR: main] ERROR: Timed out after 2 minutes of waiting for worker synchronization"
|
349
|
-
exit 1
|
350
|
-
fi
|
351
|
-
sleep 0.5
|
352
|
-
done
|
353
|
-
echo "[KONDUKTOR: main] All workers have joined"
|
354
|
-
end_sync=$(date +%s);
|
355
|
-
echo "[KONDUKTOR: main] Synchronization took $((end_sync - start_sync)) seconds"
|
356
|
-
set +eo pipefail
|
357
300
|
# run task
|
358
301
|
$(prefix_cmd) cd {{ remote_workdir }}
|
359
302
|
$(prefix_cmd) echo "===== KONDUKTOR: Running task ====="
|
@@ -395,8 +338,6 @@ kubernetes:
|
|
395
338
|
secret:
|
396
339
|
secretName: {{ secret_name }}
|
397
340
|
{% endfor %}
|
398
|
-
|
399
|
-
|
400
341
|
|
401
342
|
# TODO(asaiacai): should we add nodeSelectors here or leave to
|
402
343
|
# kueue resource flavors. leaning towards defining
|
@@ -1,4 +1,4 @@
|
|
1
|
-
konduktor/__init__.py,sha256=
|
1
|
+
konduktor/__init__.py,sha256=BpU070cC1XqK9i4AWlWvYixgyNGfMsI40UCLdAsI8us,1540
|
2
2
|
konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
|
4
4
|
konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
|
@@ -71,7 +71,7 @@ konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw
|
|
71
71
|
konduktor/resource.py,sha256=w2PdIrmQaJWA-GLSmVBcg4lxwuxvPulz35_YSKa5o24,19254
|
72
72
|
konduktor/task.py,sha256=ofwd8WIhfD6C3ThLcv6X3GUzQHyZ6ddjUagE-umF4K0,35207
|
73
73
|
konduktor/templates/jobset.yaml.j2,sha256=onYiHtXAgk-XBtji994hPu_g0hxnLzvmfxwjbdKdeZc,960
|
74
|
-
konduktor/templates/pod.yaml.j2,sha256=
|
74
|
+
konduktor/templates/pod.yaml.j2,sha256=v0s_gLmr7bBMVtdiElHbBvt36bJzAxBptqWVLzvdvE4,15520
|
75
75
|
konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
76
|
konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
|
77
77
|
konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -91,8 +91,8 @@ konduktor/utils/schemas.py,sha256=2fHsTi3t9q3LXqOPrcpkmPsMbaoJBnuJstd6ULmDiUo,16
|
|
91
91
|
konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
|
92
92
|
konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
|
93
93
|
konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
|
94
|
-
konduktor_nightly-0.1.0.
|
95
|
-
konduktor_nightly-0.1.0.
|
96
|
-
konduktor_nightly-0.1.0.
|
97
|
-
konduktor_nightly-0.1.0.
|
98
|
-
konduktor_nightly-0.1.0.
|
94
|
+
konduktor_nightly-0.1.0.dev20250521104900.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
|
95
|
+
konduktor_nightly-0.1.0.dev20250521104900.dist-info/METADATA,sha256=Zy1uJzy9wocXmmbkhqofIoykrQ_Re5xwCHvnj6jxJGM,4366
|
96
|
+
konduktor_nightly-0.1.0.dev20250521104900.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
97
|
+
konduktor_nightly-0.1.0.dev20250521104900.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
|
98
|
+
konduktor_nightly-0.1.0.dev20250521104900.dist-info/RECORD,,
|
File without changes
|
File without changes
|