konduktor-nightly 0.1.0.dev20250714105226__py3-none-any.whl → 0.1.0.dev20250716105229__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
konduktor/__init__.py CHANGED
@@ -14,7 +14,7 @@ __all__ = [
14
14
  ]
15
15
 
16
16
  # Replaced with the current commit when building the wheels.
17
- _KONDUKTOR_COMMIT_SHA = '5760f2c18ed487270e9244d22c7209eee12821c1'
17
+ _KONDUKTOR_COMMIT_SHA = 'd7075fa88a909a1b4ccf58487d0de364c2a74e7d'
18
18
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
19
19
 
20
20
 
@@ -48,5 +48,5 @@ def _get_git_commit():
48
48
 
49
49
 
50
50
  __commit__ = _get_git_commit()
51
- __version__ = '1.0.0.dev0.1.0.dev20250714105226'
51
+ __version__ = '1.0.0.dev0.1.0.dev20250716105229'
52
52
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -79,6 +79,8 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
79
79
  Returns:
80
80
  Dict[str, Any]: k8s pod spec
81
81
  """
82
+ context = kubernetes_utils.get_current_kube_config_context_name()
83
+ namespace = kubernetes_utils.get_kube_config_context_namespace(context)
82
84
 
83
85
  # fill out the templating variables
84
86
  assert task.resources is not None, 'Task resources are required'
@@ -107,7 +109,6 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
107
109
  sync_commands = []
108
110
  mkdir_commands = []
109
111
  storage_secrets = {}
110
-
111
112
  # first do storage_mount sync
112
113
  for dst, store in task.storage_mounts.items():
113
114
  # TODO(asaiacai) idk why but theres an extra storage mount for the
@@ -122,6 +123,13 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
122
123
  # should impelement a method here instead of raw dog dict access
123
124
  cloud_store = registry._REGISTRY[store_scheme]
124
125
  storage_secrets[store_scheme] = cloud_store._STORE.get_k8s_credential_name()
126
+ exists, _ = kubernetes_utils.check_secret_exists(
127
+ storage_secrets[store_scheme], namespace=namespace, context=context
128
+ )
129
+ assert exists, (
130
+ f"secret {storage_secrets[store_scheme]} doesn't "
131
+ f'exist in namespace {namespace}'
132
+ )
125
133
  mkdir_commands.append(
126
134
  f'cd {constants.KONDUKTOR_REMOTE_WORKDIR};' f'mkdir -p {dst}'
127
135
  )
@@ -142,10 +150,15 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
142
150
  f'mkdir -p {os.path.dirname(dst)}'
143
151
  )
144
152
  storage_secrets[store_scheme] = cloud_store._STORE.get_k8s_credential_name()
153
+ exists, reason = kubernetes_utils.check_secret_exists(
154
+ storage_secrets[store_scheme], namespace=namespace, context=context
155
+ )
156
+ assert exists, (
157
+ f'secret {storage_secrets[store_scheme]} '
158
+ f"doesn't exist in namespace {namespace}"
159
+ )
145
160
  sync_commands.append(cloud_store.make_sync_file_command(src, dst))
146
161
 
147
- context = kubernetes_utils.get_current_kube_config_context_name()
148
- namespace = kubernetes_utils.get_kube_config_context_namespace(context)
149
162
  tailscale_secret = config.get_nested(('tailscale', 'secret_name'), None)
150
163
  if tailscale_secret:
151
164
  secret_exist, err = kubernetes_utils.check_secret_exists(
@@ -188,8 +201,6 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
188
201
  env_secret_envs = []
189
202
  default_secrets = []
190
203
 
191
- context = kubernetes_utils.get_current_kube_config_context_name()
192
- namespace = kubernetes_utils.get_kube_config_context_namespace(context)
193
204
  user_hash = common_utils.get_user_hash()
194
205
  label_selector = f'konduktor/owner={user_hash}'
195
206
  user_secrets = kubernetes_utils.list_secrets(
@@ -251,6 +262,8 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
251
262
  'git_ssh': git_ssh_secret_name,
252
263
  # --kind default
253
264
  'default_secrets': default_secrets,
265
+ # KONDUKTOR_DEBUG
266
+ 'konduktor_debug': os.getenv('KONDUKTOR_DEBUG', 0),
254
267
  },
255
268
  temp.name,
256
269
  )
konduktor/cli.py CHANGED
@@ -713,6 +713,13 @@ def launch(
713
713
  required=False,
714
714
  )
715
715
  @click.option('--all', '-a', default=None, is_flag=True, help='Tear down all jobs.')
716
+ @click.option(
717
+ '--all-users',
718
+ '--all_users',
719
+ default=False,
720
+ is_flag=True,
721
+ help='Include other users for teardown',
722
+ )
716
723
  @click.option(
717
724
  '--yes',
718
725
  '-y',
@@ -723,7 +730,8 @@ def launch(
723
730
  )
724
731
  def down(
725
732
  jobs: List[str],
726
- all: Optional[bool], # pylint: disable=redefined-builtin
733
+ all: Optional[bool],
734
+ all_users: Optional[bool],
727
735
  yes: bool,
728
736
  ):
729
737
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
@@ -753,26 +761,39 @@ def down(
753
761
  # Tear down all jobs matching a pattern.
754
762
  konduktor down "test-*"
755
763
  \b
756
- # Tear down all existing jobs.
764
+ # Tear down all of this users jobs.
757
765
  konduktor down -a
766
+ konduktor down --all
767
+
768
+ # Tear down all jobs across all users
769
+ konduktor down --all --all-users
758
770
 
759
771
  """
760
772
 
761
773
  context = kubernetes_utils.get_current_kube_config_context_name()
762
774
  namespace = kubernetes_utils.get_kube_config_context_namespace(context)
775
+ jobs_response = jobset_utils.list_jobset(namespace)
776
+ assert jobs_response
777
+ jobs_specs = [
778
+ job
779
+ for job in jobs_response['items']
780
+ if (
781
+ job['metadata']['labels'][jobset_utils.JOBSET_USERID_LABEL]
782
+ == common_utils.user_and_hostname_hash()
783
+ and not all_users
784
+ )
785
+ ]
763
786
 
764
787
  if all:
765
- jobs_specs = jobset_utils.list_jobset(namespace)
766
788
  assert jobs_specs is not None, f'No jobs found in namespace {namespace}'
767
789
  assert len(jobs_specs) > 0, f'No jobs found in namespace {namespace}'
768
- jobs = [job['metadata']['name'] for job in jobs_specs['items']]
790
+ jobs = [job['metadata']['name'] for job in jobs_specs]
769
791
  elif jobs:
770
792
  # Get all available jobs to match against patterns
771
- jobs_specs = jobset_utils.list_jobset(namespace)
772
- if jobs_specs is None or len(jobs_specs.get('items', [])) == 0:
793
+ if len(jobs_specs) == 0:
773
794
  raise click.ClickException(f'No jobs found in namespace {namespace}')
774
795
 
775
- all_job_names = [job['metadata']['name'] for job in jobs_specs['items']]
796
+ all_job_names = [job['metadata']['name'] for job in jobs_specs]
776
797
  matched_jobs = []
777
798
 
778
799
  for job_pattern in jobs:
konduktor/data/gcp/gcs.py CHANGED
@@ -162,8 +162,6 @@ class GcsStore(storage_utils.AbstractStore):
162
162
  # This will generate
163
163
  # ~/.config/gcloud/application_default_credentials.json.
164
164
  f'{_INDENT_PREFIX} $ gcloud auth application-default login\n'
165
- f'{_INDENT_PREFIX}For more info: '
166
- 'https://konduktor.readthedocs.io/en/latest/getting-started/installation.html#google-cloud-platform-gcp' # noqa: E501
167
165
  )
168
166
  _APPLICATION_CREDENTIAL_HINT = (
169
167
  'Run the following commands:\n'
@@ -171,8 +169,6 @@ class GcsStore(storage_utils.AbstractStore):
171
169
  f'{_INDENT_PREFIX}Or set the environment variable '
172
170
  'GOOGLE_APPLICATION_CREDENTIALS '
173
171
  'to the path of your service account key file.\n'
174
- f'{_INDENT_PREFIX}For more info: '
175
- 'https://konduktor.readthedocs.ioo/en/latest/getting-started/installation.html#google-cloud-platform-gcp' # noqa: E501
176
172
  )
177
173
 
178
174
  _REPR = 'GcsStore'
@@ -113,7 +113,10 @@ kubernetes:
113
113
  - |
114
114
  # TODO(asaiacai): add debug environment variable for printing the apt-update, apt-install, sync-files output
115
115
  # Helper function to conditionally use sudo
116
- set -eo pipefail
116
+ # set -eo pipefail
117
+ {% if konduktor_debug %}
118
+ set -x
119
+ {% endif %}
117
120
  mkdir -p ~/.konduktor/tmp
118
121
  start_epoch=$(date +%s);
119
122
  start_setup=$(date +%s);
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250714105226
3
+ Version: 0.1.0.dev20250716105229
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,4 +1,4 @@
1
- konduktor/__init__.py,sha256=fTO5XnjmXkLYm8IU_vP4XVo4xLbL1UiVk64XF0wAapk,1540
1
+ konduktor/__init__.py,sha256=zYoBNEW7JjgSjqIRFfaHeNgMTT2K8KGedqy-SUCRYa8,1540
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
@@ -7,9 +7,9 @@ konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4
7
7
  konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
8
8
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
9
9
  konduktor/backends/jobset.py,sha256=UdhwAuZODLMbLY51Y2zOBsh6wg4Pb84oHVvUKzx3Z2w,8434
10
- konduktor/backends/jobset_utils.py,sha256=u5Z3SYv7rmkS1M0l1sHR5QEnJ-mIMk5hrM9WoPjJCoE,22283
10
+ konduktor/backends/jobset_utils.py,sha256=sE3USQ15rcXeTCmkysN5YYugiYcaqhWS65vdJRZvlJA,22827
11
11
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
12
- konduktor/cli.py,sha256=qtktD8N17IRC5MYEdaE0o3pv8EI36cvyyQkYUFi5_nQ,35590
12
+ konduktor/cli.py,sha256=GXmm4DGLHuvfnqDG_3PsWXK7mOI3XWvlC3VeI6JDoqI,36004
13
13
  konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
14
14
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
15
15
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -56,7 +56,7 @@ konduktor/data/constants.py,sha256=yXVEoTI2we1xOjVSU-bjRCQCLpVvpEvJ0GedXvSwEfw,1
56
56
  konduktor/data/data_utils.py,sha256=IG1jgb_La997wi90xCvxYYsHQRlmm8Aooq04ZSf8EDI,9670
57
57
  konduktor/data/gcp/__init__.py,sha256=rlQxACBC_Vu36mdgPyJgUy4mGc_6Nt_a96JAuaPz2pQ,489
58
58
  konduktor/data/gcp/constants.py,sha256=dMfOiFccM8O6rUi9kClJcbvw1K1VnS1JzzQk3apq8ho,1483
59
- konduktor/data/gcp/gcs.py,sha256=Zc1LXrjoeNU9EDK229evrKxjVqsKIUicbtYlugA_TiY,42229
59
+ konduktor/data/gcp/gcs.py,sha256=fFaQydgFj0zJbZrVCrOq7goXE6gT19i3f1NQpe_Hdq4,41888
60
60
  konduktor/data/gcp/utils.py,sha256=FJQcMXZqtMIzjZ98b3lTTc0UbdPUKTDLsOsfJaaH5-s,214
61
61
  konduktor/data/registry.py,sha256=CUbMsN_Q17Pf4wRHkqZrycErEjTP7cLEdgcfwVGcEpc,696
62
62
  konduktor/data/storage.py,sha256=o2So-bY9glvgbGdoN7AQNYmNnvGf1AUDPpImtadRL90,35213
@@ -71,7 +71,7 @@ konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw
71
71
  konduktor/resource.py,sha256=nHgPWXCbWj5sWyslNngrFypMN1K0Dksb0yHbJqWaei8,19612
72
72
  konduktor/task.py,sha256=ofwd8WIhfD6C3ThLcv6X3GUzQHyZ6ddjUagE-umF4K0,35207
73
73
  konduktor/templates/jobset.yaml.j2,sha256=rdURknodtgLp4zoA2PX86Nn4wPpi3tr5l4IG55aWBRg,1059
74
- konduktor/templates/pod.yaml.j2,sha256=7512UVJHv_8mQu0WO9yuOR3HGjWYtHOLe2IsatKeDH0,16635
74
+ konduktor/templates/pod.yaml.j2,sha256=PagIrQnON2L32m6A1tdFnO2ieF2Lzggm8AwISWQB-Kk,16723
75
75
  konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
77
77
  konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -91,8 +91,8 @@ konduktor/utils/schemas.py,sha256=VGPERAso2G4sVAznsJ80qT2Q-I_EFxXw6Rfcw-vkYgQ,16
91
91
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
92
92
  konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
93
93
  konduktor/utils/validator.py,sha256=uCRlScO1NYxsbTNKY9dkoqvlO8S0ISIIB8XmX2ItcO8,2793
94
- konduktor_nightly-0.1.0.dev20250714105226.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
95
- konduktor_nightly-0.1.0.dev20250714105226.dist-info/METADATA,sha256=xLxu-PCvFmq9dmqE_SaBq3S6OavTH4ULA9RiXcqFIU8,4247
96
- konduktor_nightly-0.1.0.dev20250714105226.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
97
- konduktor_nightly-0.1.0.dev20250714105226.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
98
- konduktor_nightly-0.1.0.dev20250714105226.dist-info/RECORD,,
94
+ konduktor_nightly-0.1.0.dev20250716105229.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
95
+ konduktor_nightly-0.1.0.dev20250716105229.dist-info/METADATA,sha256=b7CQSOl3ZWKLIni_LBzurM0ff7pjhOXpMH8KVRxNOzE,4247
96
+ konduktor_nightly-0.1.0.dev20250716105229.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
97
+ konduktor_nightly-0.1.0.dev20250716105229.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
98
+ konduktor_nightly-0.1.0.dev20250716105229.dist-info/RECORD,,