konduktor-nightly 0.1.0.dev20250808105243__py3-none-any.whl → 0.1.0.dev20250809104842__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

konduktor/__init__.py CHANGED
@@ -11,7 +11,7 @@ from konduktor.task import Task
11
11
  __all__ = ['launch', 'Resources', 'Task', 'Serving']
12
12
 
13
13
  # Replaced with the current commit when building the wheels.
14
- _KONDUKTOR_COMMIT_SHA = '45add2e516f1b7bb1f16ed063e5af87b8e5609cf'
14
+ _KONDUKTOR_COMMIT_SHA = '0f0b36c3a67aa7c60d6cb33240631b7c8ccaed03'
15
15
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
16
16
 
17
17
 
@@ -45,5 +45,5 @@ def _get_git_commit():
45
45
 
46
46
 
47
47
  __commit__ = _get_git_commit()
48
- __version__ = '1.0.0.dev0.1.0.dev20250808105243'
48
+ __version__ = '1.0.0.dev0.1.0.dev20250809104842'
49
49
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -9,5 +9,11 @@ USER_LABEL = 'trainy.ai/username'
9
9
  ACCELERATOR_LABEL = 'trainy.ai/accelerator'
10
10
  NUM_ACCELERATORS_LABEL = 'trainy.ai/num-accelerators'
11
11
 
12
+ # Start/stop/status related labels
13
+ STOP_USERID_LABEL = 'trainy.ai/stop-userid'
14
+ STOP_USERNAME_LABEL = 'trainy.ai/stop-username'
15
+
12
16
  # Secret labels
13
- SECRET_BASENAME_LABEL = 'konduktor/basename'
17
+ SECRET_BASENAME_LABEL = 'trainy.ai/secret-basename'
18
+ SECRET_KIND_LABEL = 'trainy.ai/secret-kind'
19
+ SECRET_OWNER_LABEL = 'trainy.ai/secret-owner'
@@ -3,10 +3,12 @@
3
3
  import enum
4
4
  import json
5
5
  import tempfile
6
+ import time
6
7
  import typing
7
8
  from datetime import datetime, timezone
8
9
  from typing import Any, Dict, Optional, Tuple
9
10
 
11
+ import click
10
12
  import colorama
11
13
 
12
14
  if typing.TYPE_CHECKING:
@@ -217,6 +219,131 @@ def delete_jobset(namespace: str, job_name: str) -> Optional[Dict[str, Any]]:
217
219
  return None
218
220
 
219
221
 
222
+ def stop_jobset(namespace: str, job_name: str) -> Optional[Dict[str, Any]]:
223
+ """Stops jobset in this namespace"""
224
+ context = kubernetes_utils.get_current_kube_config_context_name()
225
+ try:
226
+ # First check if the job exists
227
+ get_jobset(namespace, job_name)
228
+
229
+ # Apply patch to suspend the jobset and add annotations
230
+ # Time is in UTC but gets converted to local timezone in the konduktor status UI
231
+ patch = {
232
+ 'spec': {'suspend': True},
233
+ 'metadata': {
234
+ 'annotations': {
235
+ backend_constants.STOP_USERID_LABEL: common_utils.user_and_hostname_hash(),
236
+ backend_constants.STOP_USERNAME_LABEL: common_utils.get_cleaned_username(),
237
+ }
238
+ },
239
+ }
240
+ response = kube_client.crd_api(context=context).patch_namespaced_custom_object(
241
+ group=JOBSET_API_GROUP,
242
+ version=JOBSET_API_VERSION,
243
+ namespace=namespace,
244
+ plural=JOBSET_PLURAL,
245
+ name=job_name,
246
+ body=patch,
247
+ )
248
+
249
+ # Also suspend the associated Kueue workload to prevent automatic resumption
250
+ try:
251
+ # Find the workload for this jobset
252
+ workloads = kube_client.crd_api(
253
+ context=context
254
+ ).list_namespaced_custom_object(
255
+ group='kueue.x-k8s.io',
256
+ version='v1beta1',
257
+ namespace=namespace,
258
+ plural='workloads',
259
+ )
260
+ for workload in workloads.get('items', []):
261
+ if workload['metadata']['name'].startswith(f'jobset-{job_name}-'):
262
+ # Suspend the workload
263
+ workload_patch = {'spec': {'active': False}}
264
+ kube_client.crd_api(context=context).patch_namespaced_custom_object(
265
+ group='kueue.x-k8s.io',
266
+ version='v1beta1',
267
+ namespace=namespace,
268
+ plural='workloads',
269
+ name=workload['metadata']['name'],
270
+ body=workload_patch,
271
+ )
272
+ break
273
+ except Exception:
274
+ # If workload suspension fails, continue (JobSet suspension still worked)
275
+ pass
276
+
277
+ return response
278
+ except kube_client.api_exception() as e:
279
+ if e.status == 404:
280
+ raise JobNotFoundError(f'Job {job_name} not found in namespace {namespace}')
281
+ else:
282
+ raise e
283
+
284
+
285
+ def start_jobset(namespace: str, job_name: str) -> Optional[Dict[str, Any]]:
286
+ """Starts jobset in this namespace"""
287
+ context = kubernetes_utils.get_current_kube_config_context_name()
288
+ try:
289
+ # First check if the job exists
290
+ get_jobset(namespace, job_name)
291
+
292
+ # Apply patch to resume the jobset and remove suspension annotations
293
+ patch = {
294
+ 'spec': {'suspend': False},
295
+ 'metadata': {
296
+ 'annotations': {
297
+ backend_constants.STOP_USERID_LABEL: None,
298
+ backend_constants.STOP_USERNAME_LABEL: None,
299
+ }
300
+ },
301
+ }
302
+ response = kube_client.crd_api(context=context).patch_namespaced_custom_object(
303
+ group=JOBSET_API_GROUP,
304
+ version=JOBSET_API_VERSION,
305
+ namespace=namespace,
306
+ plural=JOBSET_PLURAL,
307
+ name=job_name,
308
+ body=patch,
309
+ )
310
+
311
+ # Also reactivate the associated Kueue workload
312
+ try:
313
+ # Find the workload for this jobset
314
+ workloads = kube_client.crd_api(
315
+ context=context
316
+ ).list_namespaced_custom_object(
317
+ group='kueue.x-k8s.io',
318
+ version='v1beta1',
319
+ namespace=namespace,
320
+ plural='workloads',
321
+ )
322
+ for workload in workloads.get('items', []):
323
+ if workload['metadata']['name'].startswith(f'jobset-{job_name}-'):
324
+ # Reactivate the workload
325
+ workload_patch = {'spec': {'active': True}}
326
+ kube_client.crd_api(context=context).patch_namespaced_custom_object(
327
+ group='kueue.x-k8s.io',
328
+ version='v1beta1',
329
+ namespace=namespace,
330
+ plural='workloads',
331
+ name=workload['metadata']['name'],
332
+ body=workload_patch,
333
+ )
334
+ break
335
+ except Exception:
336
+ # If workload reactivation fails, continue (JobSet resumption still worked)
337
+ pass
338
+
339
+ return response
340
+ except kube_client.api_exception() as e:
341
+ if e.status == 404:
342
+ raise JobNotFoundError(f'Job {job_name} not found in namespace {namespace}')
343
+ else:
344
+ raise e
345
+
346
+
220
347
  def get_job(namespace: str, job_name: str) -> Optional[Dict[str, Any]]:
221
348
  """Gets a specific job from a jobset by name and worker index
222
349
 
@@ -251,16 +378,82 @@ def get_job(namespace: str, job_name: str) -> Optional[Dict[str, Any]]:
251
378
  return None
252
379
 
253
380
 
254
- def show_status_table(namespace: str, all_users: bool):
255
- """Compute cluster table values and display.
381
+ def _parse_timestamp_filter(timestamp_str: str) -> datetime:
382
+ """Parse timestamp string into datetime object for filtering
256
383
 
257
- Returns:
258
- Number of pending auto{stop,down} clusters that are not already
259
- STOPPED.
384
+ Supported formats:
385
+ - "08/06/25 03:54PM" (full datetime)
386
+ - "08/06/25" (date only)
387
+ - "03:54PM" (time only, uses today's date)
388
+ """
389
+
390
+ # Try different formats
391
+ formats = [
392
+ '%m/%d/%y %I:%M%p', # 08/06/25 03:54PM (full datetime)
393
+ '%m/%d/%y', # 08/06/25 (date only)
394
+ '%I:%M%p', # 03:54PM (time only)
395
+ ]
396
+
397
+ for fmt in formats:
398
+ try:
399
+ dt = datetime.strptime(timestamp_str, fmt)
400
+
401
+ # Handle time-only format (add today's date)
402
+ if fmt == '%I:%M%p':
403
+ today = datetime.now().strftime('%m/%d/%y')
404
+ dt = datetime.strptime(f'{today} {timestamp_str}', '%m/%d/%y %I:%M%p')
405
+
406
+ # If no timezone info, assume local timezone and convert to UTC
407
+ if dt.tzinfo is None:
408
+ if fmt in ['%m/%d/%y %I:%M%p', '%I:%M%p']:
409
+ # For display format, convert from local time to UTC
410
+ # Get current local timezone offset
411
+ local_offset = time.timezone if not time.daylight else time.altzone
412
+ # Convert local time to UTC by adding the offset
413
+ # (since timezone is negative)
414
+ dt = dt.replace(tzinfo=timezone.utc) + timedelta(
415
+ seconds=abs(local_offset)
416
+ )
417
+ else:
418
+ dt = dt.replace(tzinfo=timezone.utc)
419
+ return dt
420
+ except ValueError:
421
+ continue
422
+
423
+ raise ValueError(
424
+ f"Unable to parse timestamp '{timestamp_str}'. "
425
+ f"Supported formats: '08/06/25 03:54PM', '08/06/25', '03:54PM'"
426
+ )
427
+
428
+
429
+ def show_status_table(
430
+ namespace: str,
431
+ all_users: bool,
432
+ limit: Optional[int] = None,
433
+ after: Optional[str] = None,
434
+ before: Optional[str] = None,
435
+ ):
436
+ """Compute cluster table values and display with optional filtering and pagination.
437
+
438
+ Args:
439
+ namespace: Kubernetes namespace to search
440
+ all_users: Whether to show jobs from all users
441
+ limit: Maximum number of jobs to display
442
+ after: Show jobs created after this timestamp
443
+ before: Show jobs created before this timestamp
260
444
  """
261
445
  # TODO(zhwu): Update the information for autostop clusters.
262
446
 
263
- def _get_status_string_colorized(status: Dict[str, Any]) -> str:
447
+ def _get_status_string_colorized(
448
+ status: Dict[str, Any], job: Dict[str, Any]
449
+ ) -> str:
450
+ # Handle case where status might be empty or missing
451
+ if not status:
452
+ return (
453
+ f'{colorama.Fore.YELLOW}'
454
+ f'{JobStatus.PENDING.name}{colorama.Style.RESET_ALL}'
455
+ )
456
+
264
457
  terminalState = status.get('terminalState', None)
265
458
  if terminalState and terminalState.upper() == JobStatus.COMPLETED.name.upper():
266
459
  return (
@@ -272,16 +465,28 @@ def show_status_table(namespace: str, all_users: bool):
272
465
  f'{colorama.Fore.RED}'
273
466
  f'{JobStatus.FAILED.name}{colorama.Style.RESET_ALL}'
274
467
  )
275
- elif status['replicatedJobsStatus'][0]['ready']:
468
+ elif status.get('replicatedJobsStatus', [{}])[0].get('ready', False):
276
469
  return (
277
470
  f'{colorama.Fore.CYAN}'
278
471
  f'{JobStatus.ACTIVE.name}{colorama.Style.RESET_ALL}'
279
472
  )
280
- elif status['replicatedJobsStatus'][0]['suspended']:
281
- return (
282
- f'{colorama.Fore.BLUE}'
283
- f'{JobStatus.SUSPENDED.name}{colorama.Style.RESET_ALL}'
284
- )
473
+ elif status.get('replicatedJobsStatus', [{}])[0].get('suspended', False):
474
+ # Check if this was manually suspended
475
+ annotations = job.get('metadata', {}).get('annotations', {})
476
+ if annotations.get(backend_constants.STOP_USERID_LABEL):
477
+ username = annotations.get(
478
+ backend_constants.STOP_USERNAME_LABEL, 'unknown'
479
+ )
480
+ return (
481
+ f'{colorama.Fore.BLUE}'
482
+ f'{JobStatus.SUSPENDED.name} '
483
+ f'(by {username}){colorama.Style.RESET_ALL}'
484
+ )
485
+ else:
486
+ return (
487
+ f'{colorama.Fore.BLUE}'
488
+ f'{JobStatus.SUSPENDED.name} (by system){colorama.Style.RESET_ALL}'
489
+ )
285
490
  else:
286
491
  return (
287
492
  f'{colorama.Fore.YELLOW}'
@@ -296,13 +501,32 @@ def show_status_table(namespace: str, all_users: bool):
296
501
 
297
502
  days, remainder = divmod(total_seconds, 86400) # 86400 seconds in a day
298
503
  hours, remainder = divmod(remainder, 3600) # 3600 seconds in an hour
299
- minutes, _ = divmod(remainder, 60) # 60 seconds in a minute
504
+ minutes, seconds = divmod(remainder, 60) # 60 seconds in a minute
505
+
506
+ days_str = f'{days} day{"s" if days != 1 else ""}, ' if days > 0 else ''
507
+ hours_str = f'{hours} hr{"s" if hours != 1 else ""}, ' if hours > 0 else ''
508
+ minutes_str = (
509
+ f'{minutes} min{"s" if minutes != 1 else ""}'
510
+ if minutes > 0 and days == 0
511
+ else ''
512
+ )
513
+ seconds_str = (
514
+ f'{seconds} sec{"s" if seconds != 1 else ""}'
515
+ if seconds > 0 and days == 0 and hours == 0 and minutes == 0
516
+ else ''
517
+ )
300
518
 
301
- days_str = f'{days} days, ' if days > 0 else ''
302
- hours_str = f'{hours} hours, ' if hours > 0 else ''
303
- minutes_str = f'{minutes} minutes' if minutes > 0 else ''
519
+ result = f'{days_str}{hours_str}{minutes_str}{seconds_str}'
520
+ return result if result else '<1 minute', delta
304
521
 
305
- return f'{days_str}{hours_str}{minutes_str}', delta
522
+ def _format_timestamp(timestamp: str) -> str:
523
+ """Format timestamp as MM/DD/YY HH:MMAM/PM in local timezone"""
524
+ # Parse UTC timestamp and convert to local time
525
+ dt_utc = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ').replace(
526
+ tzinfo=timezone.utc
527
+ )
528
+ dt_local = dt_utc.astimezone() # Convert to local timezone
529
+ return dt_local.strftime('%m/%d/%y %I:%M%p')
306
530
 
307
531
  def _get_resources(job: Dict[str, Any]) -> str:
308
532
  num_pods = int(
@@ -314,27 +538,86 @@ def show_status_table(namespace: str, all_users: bool):
314
538
  cpu, memory = resources['cpu'], resources['memory']
315
539
  accelerator = job['metadata']['labels'].get(JOBSET_ACCELERATOR_LABEL, None)
316
540
  if accelerator:
317
- return f'{num_pods}x({cpu}CPU, memory {memory}, {accelerator})'
541
+ return f'{num_pods}x({cpu}CPU, {memory}MEM, {accelerator})'
318
542
  else:
319
- return f'{num_pods}x({cpu}CPU, memory {memory}GB)'
543
+ return f'{num_pods}x({cpu}CPU, {memory}MEM)'
320
544
 
321
545
  if all_users:
322
- columns = ['NAME', 'USER', 'STATUS', 'RESOURCES', 'SUBMITTED']
546
+ columns = [
547
+ 'NAME',
548
+ 'USER',
549
+ 'STATUS',
550
+ 'RESOURCES',
551
+ 'SUBMITTED',
552
+ 'START TIME',
553
+ 'END TIME',
554
+ ]
323
555
  else:
324
- columns = ['NAME', 'STATUS', 'RESOURCES', 'SUBMITTED']
556
+ columns = ['NAME', 'STATUS', 'RESOURCES', 'SUBMITTED', 'START TIME', 'END TIME']
325
557
  job_table = log_utils.create_table(columns)
326
558
  job_specs = list_jobset(namespace)
327
559
  assert job_specs is not None, 'Retrieving jobs failed'
560
+
561
+ # Parse timestamp filters if provided
562
+ after_dt = None
563
+ before_dt = None
564
+ if after:
565
+ try:
566
+ after_dt = _parse_timestamp_filter(after)
567
+ except ValueError as e:
568
+ click.secho(f'Error parsing --after timestamp: {e}', fg='red', err=True)
569
+ return
570
+ if before:
571
+ try:
572
+ before_dt = _parse_timestamp_filter(before)
573
+ except ValueError as e:
574
+ click.secho(f'Error parsing --before timestamp: {e}', fg='red', err=True)
575
+ return
576
+
328
577
  rows = []
329
578
  for job in job_specs['items']:
579
+ # Apply timestamp filtering
580
+ if after_dt or before_dt:
581
+ job_creation_time = datetime.strptime(
582
+ job['metadata']['creationTimestamp'], '%Y-%m-%dT%H:%M:%SZ'
583
+ ).replace(tzinfo=timezone.utc)
584
+
585
+ if after_dt and job_creation_time <= after_dt:
586
+ continue
587
+ if before_dt and job_creation_time >= before_dt:
588
+ continue
589
+ # Get start time
590
+ start_time = _format_timestamp(job['metadata']['creationTimestamp'])
591
+
592
+ # Get submitted time (how long ago)
593
+ submitted_time, _ = _get_time_delta(job['metadata']['creationTimestamp'])
594
+
595
+ # Get end time (from JobSet conditions)
596
+ def _get_end_time_from_conditions(job: Dict[str, Any]) -> str:
597
+ """Extract end time from JobSet conditions (Completed or Failed)"""
598
+ conditions = job.get('status', {}).get('conditions', [])
599
+ for condition in conditions:
600
+ # Look for terminal conditions with status=True
601
+ if (
602
+ condition.get('type') in ['Completed', 'Failed']
603
+ and condition.get('status') == 'True'
604
+ ):
605
+ return _format_timestamp(condition.get('lastTransitionTime', ''))
606
+ return '-'
607
+
608
+ end_time = _get_end_time_from_conditions(job)
609
+
330
610
  if all_users:
331
611
  rows.append(
332
612
  [
333
613
  job['metadata']['name'],
334
614
  job['metadata']['labels'][JOBSET_USERID_LABEL],
335
- _get_status_string_colorized(job['status']),
615
+ _get_status_string_colorized(job.get('status', {}), job),
336
616
  _get_resources(job),
337
- *_get_time_delta(job['metadata']['creationTimestamp']),
617
+ submitted_time,
618
+ start_time,
619
+ end_time,
620
+ job['metadata']['creationTimestamp'],
338
621
  ]
339
622
  )
340
623
  elif (
@@ -345,13 +628,41 @@ def show_status_table(namespace: str, all_users: bool):
345
628
  rows.append(
346
629
  [
347
630
  job['metadata']['name'],
348
- _get_status_string_colorized(job.get('status', {})),
631
+ _get_status_string_colorized(job.get('status', {}), job),
349
632
  _get_resources(job),
350
- *_get_time_delta(job['metadata']['creationTimestamp']),
633
+ submitted_time,
634
+ start_time,
635
+ end_time,
636
+ job['metadata']['creationTimestamp'],
351
637
  ]
352
638
  )
353
- rows = [row[:-1] for row in sorted(rows, key=lambda x: x[-1])]
354
- # have the most recently submitted jobs at the top
639
+
640
+ # Sort by creation timestamp (most recent first)
641
+ rows = sorted(rows, key=lambda x: x[-1], reverse=True)
642
+
643
+ # Apply limit if specified
644
+ if limit and limit > 0:
645
+ rows = rows[:limit]
646
+
647
+ # Show pagination info if applicable
648
+ total_jobs = len(job_specs['items'])
649
+ filtered_jobs = len(rows)
650
+
651
+ if limit or after or before:
652
+ filter_info = []
653
+ if after:
654
+ filter_info.append(f'after {after}')
655
+ if before:
656
+ filter_info.append(f'before {before}')
657
+ if limit:
658
+ filter_info.append(f'limit {limit}')
659
+
660
+ filter_str = ', '.join(filter_info)
661
+ click.secho(f'Showing {filtered_jobs} jobs ({filter_str})', fg='yellow')
662
+ if total_jobs != filtered_jobs:
663
+ click.secho(f'Total jobs in namespace: {total_jobs}', fg='yellow')
664
+
665
+ # Remove the sorting timestamp and add rows to table
355
666
  for row in rows:
356
- job_table.add_row(row)
667
+ job_table.add_row(row[:-1])
357
668
  print(job_table)
@@ -154,7 +154,7 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
154
154
  default_secrets = []
155
155
 
156
156
  user_hash = common_utils.get_user_hash()
157
- label_selector = f'konduktor/owner={user_hash}'
157
+ label_selector = f'{backend_constants.SECRET_OWNER_LABEL}={user_hash}'
158
158
  user_secrets = kubernetes_utils.list_secrets(
159
159
  namespace, context, label_filter=label_selector
160
160
  )
konduktor/cli.py CHANGED
@@ -45,12 +45,13 @@ import click
45
45
  import colorama
46
46
  import dotenv
47
47
  import prettytable
48
- import yaml
48
+ import yaml # type: ignore
49
49
  from rich.progress import track
50
50
 
51
51
  import konduktor
52
52
  from konduktor import check as konduktor_check
53
53
  from konduktor import logging
54
+ from konduktor.backends import constants as backend_constants
54
55
  from konduktor.backends import deployment_utils, jobset_utils
55
56
  from konduktor.utils import (
56
57
  common_utils,
@@ -606,21 +607,67 @@ def cli():
606
607
  required=False,
607
608
  help='Show all clusters, including those not owned by the ' 'current user.',
608
609
  )
610
+ @click.option(
611
+ '--limit',
612
+ '-l',
613
+ default=None,
614
+ type=int,
615
+ help='Maximum number of jobs to display (e.g., --limit 100)',
616
+ )
617
+ @click.option(
618
+ '--after',
619
+ default=None,
620
+ type=str,
621
+ help=(
622
+ 'Show jobs created after this timestamp '
623
+ '(e.g., --after "08/06/25 03:54PM", --after "08/06/25", --after "03:54PM")'
624
+ ),
625
+ )
626
+ @click.option(
627
+ '--before',
628
+ default=None,
629
+ type=str,
630
+ help=(
631
+ 'Show jobs created before this timestamp '
632
+ '(e.g., --before "08/06/25 03:54PM", --before "08/06/25", --before "03:54PM")'
633
+ ),
634
+ )
609
635
  # pylint: disable=redefined-builtin
610
- def status(all_users: bool):
636
+ def status(
637
+ all_users: bool, limit: Optional[int], after: Optional[str], before: Optional[str]
638
+ ):
611
639
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
612
- """Shows list of all the jobs
640
+ """Shows list of all the jobs with optional filtering and pagination
613
641
 
614
642
  Args:
615
- all_users (bool): whether to show all jobs
616
- regardless of the user in this namespace
643
+ all_users (bool): whether to show all jobs for all users
644
+ limit (Optional[int]): maximum number of jobs to display
645
+ after (Optional[str]): show jobs created after this timestamp
646
+ before (Optional[str]): show jobs created before this timestamp
647
+
648
+ Examples:
649
+ konduktor status --limit 10
650
+ konduktor status --before "08/06/25 03:53PM"
651
+ konduktor status --all-users --limit 10 --after "08/06/25 03:53PM"
652
+
653
+ Note:
654
+ When using --before or --after timestamps, passing in "08/06/25" is
655
+ equivalent to passing in "08/06/25 00:00".
656
+ When using --before or --after timestamps, passing in "03:53PM" is
657
+ equivalent to passing in "03:53:00PM".
658
+ Timestamps shown in "konduktor startus" are truncated and are in the
659
+ local timezone. ex. "03:53:55PM" --> "03:53PM"
660
+ and would show up in --after "03:53PM" but not in --before "03:53PM"
661
+ despite status showing as "03:53PM".
617
662
  """
618
663
  context = kubernetes_utils.get_current_kube_config_context_name()
619
664
  namespace = kubernetes_utils.get_kube_config_context_namespace(context)
620
665
  user = common_utils.user_and_hostname_hash() if not all_users else 'All'
621
666
  click.secho(f'User: {user}', fg='green', bold=True)
622
667
  click.secho('Jobs', fg='cyan', bold=True)
623
- jobset_utils.show_status_table(namespace, all_users=all_users)
668
+ jobset_utils.show_status_table(
669
+ namespace, all_users=all_users, limit=limit, after=after, before=before
670
+ )
624
671
 
625
672
 
626
673
  @cli.command()
@@ -959,7 +1006,294 @@ def down(
959
1006
 
960
1007
 
961
1008
  @cli.command(cls=_DocumentedCodeCommand)
962
- @click.argument('clouds', required=True, type=str, nargs=-1)
1009
+ @click.argument(
1010
+ 'jobs',
1011
+ nargs=-1,
1012
+ required=False,
1013
+ )
1014
+ @click.option('--all', '-a', default=None, is_flag=True, help='Suspend all jobs.')
1015
+ @click.option(
1016
+ '--all-users',
1017
+ '--all_users',
1018
+ default=False,
1019
+ is_flag=True,
1020
+ help='Include other users for suspension',
1021
+ )
1022
+ @click.option(
1023
+ '--yes',
1024
+ '-y',
1025
+ is_flag=True,
1026
+ default=False,
1027
+ required=False,
1028
+ help='Skip confirmation prompt.',
1029
+ )
1030
+ def stop(
1031
+ jobs: List[str],
1032
+ all: Optional[bool],
1033
+ all_users: Optional[bool],
1034
+ yes: bool,
1035
+ ):
1036
+ """Suspend job(s) (manual/user-initiated).
1037
+
1038
+ JOB is the name of the job to suspend. If both
1039
+ JOB and ``--all`` are supplied, the latter takes precedence.
1040
+
1041
+ Suspending a job will pause execution and mark the job as SUSPENDED (by user).
1042
+ The job can be resumed later with `konduktor start`.
1043
+
1044
+ If a job is suspended by the system (e.g., due to queueing),
1045
+ it will show as SUSPENDED (by system).
1046
+
1047
+ Wildcard patterns are supported using * characters.
1048
+ Examples: "my_job-*" matches all jobs starting with "my_job-",
1049
+ "*-gpu" matches all jobs ending with "-gpu".
1050
+
1051
+ Examples:
1052
+
1053
+ .. code-block:: bash
1054
+
1055
+ # Suspend a specific job.
1056
+ konduktor stop my_job
1057
+ \b
1058
+ # Suspend multiple jobs.
1059
+ konduktor stop my_job1 my_job2
1060
+ \b
1061
+ # Suspend all jobs matching a pattern.
1062
+ konduktor stop "my_job-*"
1063
+ \b
1064
+ # Suspend all of this users jobs.
1065
+ konduktor stop -a
1066
+ konduktor stop --all
1067
+
1068
+ # Suspend all jobs across all users
1069
+ konduktor stop --all --all-users
1070
+
1071
+ """
1072
+
1073
+ context = kubernetes_utils.get_current_kube_config_context_name()
1074
+ namespace = kubernetes_utils.get_kube_config_context_namespace(context)
1075
+ jobs_response = jobset_utils.list_jobset(namespace)
1076
+ assert jobs_response
1077
+ jobs_specs = [
1078
+ job
1079
+ for job in jobs_response['items']
1080
+ if (
1081
+ job['metadata']['labels'][jobset_utils.JOBSET_USERID_LABEL]
1082
+ == common_utils.user_and_hostname_hash()
1083
+ and not all_users
1084
+ )
1085
+ ]
1086
+
1087
+ if all:
1088
+ assert jobs_specs is not None, f'No jobs found in namespace {namespace}'
1089
+ assert len(jobs_specs) > 0, f'No jobs found in namespace {namespace}'
1090
+ jobs = [job['metadata']['name'] for job in jobs_specs]
1091
+ elif jobs:
1092
+ # Get all available jobs to match against patterns
1093
+ if len(jobs_specs) == 0:
1094
+ raise click.ClickException(f'No jobs found in namespace {namespace}')
1095
+
1096
+ all_job_names = [job['metadata']['name'] for job in jobs_specs]
1097
+ matched_jobs = []
1098
+
1099
+ for job_pattern in jobs:
1100
+ # Use fnmatch for both wildcard and exact pattern matching
1101
+ pattern_matches = fnmatch.filter(all_job_names, job_pattern)
1102
+ if not pattern_matches:
1103
+ click.secho(
1104
+ f'Warning: No jobs found matching pattern "{job_pattern}"',
1105
+ fg='yellow',
1106
+ err=True,
1107
+ )
1108
+ matched_jobs.extend(pattern_matches)
1109
+
1110
+ # Remove duplicates while preserving order
1111
+ seen = set()
1112
+ jobs = []
1113
+ for job in matched_jobs:
1114
+ if job not in seen:
1115
+ seen.add(job)
1116
+ jobs.append(job)
1117
+
1118
+ if not jobs:
1119
+ raise click.ClickException(
1120
+ f'No matching jobs found check status with '
1121
+ f'{colorama.Style.BRIGHT}konduktor status{colorama.Style.RESET_ALL}'
1122
+ )
1123
+ else:
1124
+ raise click.ClickException(
1125
+ 'No jobs specified. Use --all to suspend '
1126
+ 'all jobs or specify job names/patterns.'
1127
+ )
1128
+
1129
+ if not yes:
1130
+ # Prompt for confirmation
1131
+ prompt = (
1132
+ f'Suspending job(s) {colorama.Style.BRIGHT} '
1133
+ f'{colorama.Fore.GREEN}{jobs}{colorama.Style.RESET_ALL}. '
1134
+ 'Proceed?'
1135
+ )
1136
+ if prompt is not None:
1137
+ click.confirm(prompt, default=True, abort=True, show_default=True)
1138
+
1139
+ for job in track(jobs, description='Suspending job(s)...'):
1140
+ jobset_utils.stop_jobset(namespace, job)
1141
+
1142
+
1143
+ @cli.command(cls=_DocumentedCodeCommand)
1144
+ @click.argument(
1145
+ 'jobs',
1146
+ nargs=-1,
1147
+ required=False,
1148
+ )
1149
+ @click.option(
1150
+ '--all', '-a', default=None, is_flag=True, help='Resume all suspended jobs.'
1151
+ )
1152
+ @click.option(
1153
+ '--all-users',
1154
+ '--all_users',
1155
+ default=False,
1156
+ is_flag=True,
1157
+ help='Include other users for resumption',
1158
+ )
1159
+ @click.option(
1160
+ '--yes',
1161
+ '-y',
1162
+ is_flag=True,
1163
+ default=False,
1164
+ required=False,
1165
+ help='Skip confirmation prompt.',
1166
+ )
1167
+ def start(
1168
+ jobs: List[str],
1169
+ all: Optional[bool],
1170
+ all_users: Optional[bool],
1171
+ yes: bool,
1172
+ ):
1173
+ """Resume suspended job(s) (manual/user-initiated).
1174
+
1175
+ JOB is the name of the job to resume. If both
1176
+ JOB and ``--all`` are supplied, the latter takes precedence.
1177
+
1178
+ Resuming a job will restart execution from where it was suspended.
1179
+ Only suspended jobs can be resumed.
1180
+
1181
+ This command works for both manually suspended jobs (SUSPENDED by user)
1182
+ and system-suspended jobs (SUSPENDED by system).
1183
+
1184
+ Wildcard patterns are supported using * characters.
1185
+ Examples: "my_job-*" matches all jobs starting with "my_job-",
1186
+ "*-gpu" matches all jobs ending with "-gpu".
1187
+
1188
+ Examples:
1189
+
1190
+ .. code-block:: bash
1191
+
1192
+ # Resume a specific job.
1193
+ konduktor start my_job
1194
+ \b
1195
+ # Resume multiple jobs.
1196
+ konduktor start my_job1 my_job2
1197
+ \b
1198
+ # Resume all jobs matching a pattern.
1199
+ konduktor start "my_job-*"
1200
+ \b
1201
+ # Resume all of this users suspended jobs.
1202
+ konduktor start -a
1203
+ konduktor start --all
1204
+
1205
+ # Resume all suspended jobs across all users
1206
+ konduktor start --all --all-users
1207
+
1208
+ """
1209
+
1210
+ context = kubernetes_utils.get_current_kube_config_context_name()
1211
+ namespace = kubernetes_utils.get_kube_config_context_namespace(context)
1212
+ jobs_response = jobset_utils.list_jobset(namespace)
1213
+ assert jobs_response
1214
+ jobs_specs = [
1215
+ job
1216
+ for job in jobs_response['items']
1217
+ if (
1218
+ job['metadata']['labels'][jobset_utils.JOBSET_USERID_LABEL]
1219
+ == common_utils.user_and_hostname_hash()
1220
+ and not all_users
1221
+ )
1222
+ ]
1223
+
1224
+ if all:
1225
+ # Only get suspended jobs when using --all
1226
+ suspended_jobs = [
1227
+ job['metadata']['name']
1228
+ for job in jobs_specs
1229
+ if job.get('status', {})
1230
+ .get('replicatedJobsStatus', [{}])[0]
1231
+ .get('suspended', False)
1232
+ ]
1233
+ if not suspended_jobs:
1234
+ raise click.ClickException(
1235
+ f'No suspended jobs found in namespace {namespace}'
1236
+ )
1237
+ jobs = suspended_jobs
1238
+ elif jobs:
1239
+ # Get all available jobs to match against patterns
1240
+ if len(jobs_specs) == 0:
1241
+ raise click.ClickException(f'No jobs found in namespace {namespace}')
1242
+
1243
+ all_job_names = [job['metadata']['name'] for job in jobs_specs]
1244
+ matched_jobs = []
1245
+
1246
+ for job_pattern in jobs:
1247
+ # Use fnmatch for both wildcard and exact pattern matching
1248
+ pattern_matches = fnmatch.filter(all_job_names, job_pattern)
1249
+ if not pattern_matches:
1250
+ click.secho(
1251
+ f'Warning: No jobs found matching pattern "{job_pattern}"',
1252
+ fg='yellow',
1253
+ err=True,
1254
+ )
1255
+ matched_jobs.extend(pattern_matches)
1256
+
1257
+ # Remove duplicates while preserving order
1258
+ seen = set()
1259
+ jobs = []
1260
+ for job in matched_jobs:
1261
+ if job not in seen:
1262
+ seen.add(job)
1263
+ jobs.append(job)
1264
+
1265
+ if not jobs:
1266
+ raise click.ClickException(
1267
+ f'No matching jobs found check status with '
1268
+ f'{colorama.Style.BRIGHT}konduktor status{colorama.Style.RESET_ALL}'
1269
+ )
1270
+ else:
1271
+ raise click.ClickException(
1272
+ 'No jobs specified. Use --all to resume '
1273
+ 'all suspended jobs or specify job names/patterns.'
1274
+ )
1275
+
1276
+ if not yes:
1277
+ # Prompt for confirmation
1278
+ prompt = (
1279
+ f'Resuming job(s) {colorama.Style.BRIGHT} '
1280
+ f'{colorama.Fore.GREEN}{jobs}{colorama.Style.RESET_ALL}. '
1281
+ 'Proceed?'
1282
+ )
1283
+ if prompt is not None:
1284
+ click.confirm(prompt, default=True, abort=True, show_default=True)
1285
+
1286
+ for job in track(jobs, description='Resuming job(s)...'):
1287
+ jobset_utils.start_jobset(namespace, job)
1288
+
1289
+
1290
+ @cli.command(cls=_DocumentedCodeCommand)
1291
+ @click.argument(
1292
+ 'clouds',
1293
+ required=True,
1294
+ type=str,
1295
+ nargs=-1,
1296
+ )
963
1297
  def check(clouds: Tuple[str]):
964
1298
  """Check which clouds are available to use for storage
965
1299
 
@@ -1143,9 +1477,9 @@ def create(kind, from_file, from_directory, inline, name):
1143
1477
  'name': secret_name,
1144
1478
  'labels': {
1145
1479
  'parent': 'konduktor',
1146
- 'konduktor/owner': common_utils.get_user_hash(),
1147
- 'konduktor/basename': basename,
1148
- 'konduktor/secret-kind': kind or None,
1480
+ backend_constants.SECRET_OWNER_LABEL: common_utils.get_user_hash(),
1481
+ backend_constants.SECRET_BASENAME_LABEL: basename,
1482
+ backend_constants.SECRET_KIND_LABEL: kind or None,
1149
1483
  },
1150
1484
  }
1151
1485
 
@@ -1153,13 +1487,13 @@ def create(kind, from_file, from_directory, inline, name):
1153
1487
  # Overwrites if user trying to create more than 1
1154
1488
  if kind == 'git-ssh':
1155
1489
  user_hash = common_utils.get_user_hash()
1156
- label_selector = f'konduktor/owner={user_hash}'
1490
+ label_selector = f'{backend_constants.SECRET_OWNER_LABEL}={user_hash}'
1157
1491
  existing = kubernetes_utils.list_secrets(
1158
1492
  namespace, context, label_filter=label_selector
1159
1493
  )
1160
1494
  for s in existing:
1161
1495
  labels = s.metadata.labels or {}
1162
- if labels.get('konduktor/secret-kind') == 'git-ssh':
1496
+ if labels.get(backend_constants.SECRET_KIND_LABEL) == 'git-ssh':
1163
1497
  old_name = s.metadata.name
1164
1498
  click.echo(f'Found existing git-ssh secret: {old_name}, deleting it.')
1165
1499
  kubernetes_utils.delete_secret(
@@ -1188,7 +1522,7 @@ def delete(name):
1188
1522
  namespace = kubernetes_utils.get_kube_config_context_namespace(context)
1189
1523
  user_hash = common_utils.get_user_hash()
1190
1524
 
1191
- label_selector = f'konduktor/owner={user_hash}'
1525
+ label_selector = f'{backend_constants.SECRET_OWNER_LABEL}={user_hash}'
1192
1526
  secrets = kubernetes_utils.list_secrets(
1193
1527
  namespace, context, label_filter=label_selector
1194
1528
  )
@@ -1196,7 +1530,8 @@ def delete(name):
1196
1530
  matches = [
1197
1531
  s
1198
1532
  for s in secrets
1199
- if s.metadata.labels and s.metadata.labels.get('konduktor/basename') == name
1533
+ if s.metadata.labels
1534
+ and s.metadata.labels.get(backend_constants.SECRET_BASENAME_LABEL) == name
1200
1535
  ]
1201
1536
 
1202
1537
  if not matches:
@@ -1233,7 +1568,7 @@ def list_secrets(all_users: bool):
1233
1568
  if not all_users:
1234
1569
  user_hash = common_utils.get_user_hash()
1235
1570
  username = common_utils.get_cleaned_username()
1236
- label_selector = f'konduktor/owner={user_hash}'
1571
+ label_selector = f'{backend_constants.SECRET_OWNER_LABEL}={user_hash}'
1237
1572
  secrets = kubernetes_utils.list_secrets(
1238
1573
  namespace, context, label_filter=label_selector
1239
1574
  )
@@ -1254,9 +1589,9 @@ def list_secrets(all_users: bool):
1254
1589
 
1255
1590
  for s in secrets:
1256
1591
  labels = s.metadata.labels or {}
1257
- basename = labels.get('konduktor/basename', s.metadata.name)
1258
- kind = labels.get('konduktor/secret-kind', '(none)')
1259
- owner = labels.get('konduktor/owner', '(none)')
1592
+ basename = labels.get(backend_constants.SECRET_BASENAME_LABEL, s.metadata.name)
1593
+ kind = labels.get(backend_constants.SECRET_KIND_LABEL, '(none)')
1594
+ owner = labels.get(backend_constants.SECRET_OWNER_LABEL, '(none)')
1260
1595
 
1261
1596
  if all_users:
1262
1597
  click.echo(f'{basename:30} kind={kind:10} owner={owner}')
konduktor/config.py CHANGED
@@ -65,7 +65,7 @@ import os
65
65
  import pprint
66
66
  from typing import Any, Dict, List, Optional, Tuple
67
67
 
68
- import yaml
68
+ import yaml # type: ignore
69
69
 
70
70
  from konduktor import logging
71
71
  from konduktor.utils import common_utils, schemas, ux_utils
konduktor/data/aws/s3.py CHANGED
@@ -29,6 +29,7 @@ import colorama
29
29
  from konduktor import config, logging
30
30
  from konduktor.adaptors import aws
31
31
  from konduktor.adaptors.aws import boto3
32
+ from konduktor.backends import constants as backend_constants
32
33
  from konduktor.data import constants, data_utils, storage_utils
33
34
  from konduktor.utils import (
34
35
  annotations,
@@ -1036,7 +1037,7 @@ class S3Store(storage_utils.AbstractStore):
1036
1037
 
1037
1038
  secret_metadata = {
1038
1039
  'labels': {
1039
- 'konduktor/secret-kind': 'S3',
1040
+ backend_constants.SECRET_KIND_LABEL: 'S3',
1040
1041
  },
1041
1042
  }
1042
1043
 
konduktor/data/gcp/gcs.py CHANGED
@@ -28,6 +28,7 @@ if typing.TYPE_CHECKING:
28
28
 
29
29
  from konduktor import logging
30
30
  from konduktor.adaptors import gcp
31
+ from konduktor.backends import constants as backend_constants
31
32
  from konduktor.data import constants, data_utils, storage_utils
32
33
  from konduktor.data.gcp import utils
33
34
  from konduktor.utils import (
@@ -886,7 +887,7 @@ class GcsStore(storage_utils.AbstractStore):
886
887
 
887
888
  secret_metadata = {
888
889
  'labels': {
889
- 'konduktor/secret-kind': 'GCS',
890
+ backend_constants.SECRET_KIND_LABEL: 'GCS',
890
891
  },
891
892
  }
892
893
 
konduktor/task.py CHANGED
@@ -19,7 +19,7 @@ import re
19
19
  import typing
20
20
  from typing import Any, Dict, List, Optional, Tuple, Union
21
21
 
22
- import yaml
22
+ import yaml # type: ignore
23
23
 
24
24
  if typing.TYPE_CHECKING:
25
25
  import konduktor.resource as resources_lib
@@ -15,7 +15,7 @@ jobset:
15
15
  parent: "trainy"
16
16
  annotations: {}
17
17
  spec:
18
- ttlSecondsAfterFinished: 259200 # 3 days
18
+ ttlSecondsAfterFinished: 31536000 # 1 year (365 days)
19
19
  {% if max_restarts %}
20
20
  failurePolicy:
21
21
  maxRestarts: {{ max_restarts }}
@@ -26,7 +26,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
26
26
 
27
27
  import jinja2
28
28
  import jsonschema
29
- import yaml
29
+ import yaml # type: ignore
30
30
 
31
31
  from konduktor.utils import annotations, constants, ux_utils, validator
32
32
 
@@ -21,9 +21,10 @@ from typing import Any, Dict, List, Optional, Tuple, Union
21
21
 
22
22
  import filelock
23
23
  import kubernetes
24
- import yaml
24
+ import yaml # type: ignore
25
25
 
26
26
  from konduktor import config, kube_client, logging
27
+ from konduktor.backends import constants as backend_constants
27
28
  from konduktor.utils import common_utils, kubernetes_enums
28
29
 
29
30
  if typing.TYPE_CHECKING:
@@ -604,8 +605,8 @@ def set_secret(
604
605
  'name': full_name,
605
606
  'labels': {
606
607
  'parent': 'konduktor',
607
- 'konduktor/owner': user_hash,
608
- 'konduktor/basename': secret_name,
608
+ backend_constants.SECRET_OWNER_LABEL: user_hash,
609
+ backend_constants.SECRET_BASENAME_LABEL: secret_name,
609
610
  },
610
611
  }
611
612
 
@@ -680,7 +681,7 @@ def delete_secret(
680
681
  def get_secret_kind(secret: kubernetes.client.V1Secret) -> Optional[str]:
681
682
  """Get the konduktor-specific kind of a secret, if labeled."""
682
683
  if secret.metadata.labels:
683
- return secret.metadata.labels.get('konduktor/secret-kind')
684
+ return secret.metadata.labels.get(backend_constants.SECRET_KIND_LABEL)
684
685
  return None
685
686
 
686
687
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250808105243
3
+ Version: 0.1.0.dev20250809104842
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,4 +1,4 @@
1
- konduktor/__init__.py,sha256=agpWvH79AhK6OerLHZEAQtdxWOaV_sshDBcCks8gWz0,1574
1
+ konduktor/__init__.py,sha256=bTZfonclg2KPH9idiGwsQys1xaoPFVgSruIPRdy8GSQ,1574
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
@@ -6,15 +6,15 @@ konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,410
6
6
  konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4570
7
7
  konduktor/backends/__init__.py,sha256=usWJ8HdZJEyg7MIsN8Zcz9rk9e2Lq5dWJ8dv6hCN3ys,199
8
8
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
9
- konduktor/backends/constants.py,sha256=eqURY4RU_YXX_WRcge4AZHjr4nwuxTmmVGBnDrD_Qa4,441
9
+ konduktor/backends/constants.py,sha256=NfdhY1PQnewvDCjgRKXj6EZDcVH8k_0GGxnMo7w6HDU,666
10
10
  konduktor/backends/deployment.py,sha256=EHfB2uLeKFQ3maek9tx6XL4_sjQ-ax59DZA79Q3EkVs,5519
11
11
  konduktor/backends/deployment_utils.py,sha256=VGuL01rKe7p7PoVRI_cP4tiZRxHZ13nnTMG-bmDf7P0,28975
12
12
  konduktor/backends/jobset.py,sha256=OwgDog9nH-FoUmNU_H--C3U5jx70reTKL1l849M1k5A,8430
13
- konduktor/backends/jobset_utils.py,sha256=MRZf-Wcn084lnig0SfgXlF9Q3RyC7m7THeJQQlpdSw8,12931
14
- konduktor/backends/pod_utils.py,sha256=K0y2kRTzrmIWFbmyJSEMhw7gueiIFF7VAlXtaBMNHkM,15237
13
+ konduktor/backends/jobset_utils.py,sha256=YPbGxbM9FIPNLlvu3_189iGDopWrGLqL_kJO17McRUU,24567
14
+ konduktor/backends/pod_utils.py,sha256=Jfv_CY8suF0e7QEaeQiNRRxRnOueLgPR8SfLEO7lnwc,15260
15
15
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
16
- konduktor/cli.py,sha256=NOJGSa7iakFpIcC6ny2uhGCugGNyku-U02rlvjgri5Q,45844
17
- konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
16
+ konduktor/cli.py,sha256=lEZmfrswuxMAyU5hmndMHqk4GkJyohk_TOHBx-0h90M,56316
17
+ konduktor/config.py,sha256=9upqgCCYvcu6fKw7tovEYC1MWTkAAir0_WHPdayylbI,15536
18
18
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
19
19
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  konduktor/controller/constants.py,sha256=SGAgu9yTDWYXyVwxlaw1vfRJFOflPR549mKwgdzbI9w,1124
@@ -55,12 +55,12 @@ konduktor/dashboard/frontend/server.js,sha256=jcp6_Ww9YJD3uKY07jR3KMlAM6n1QZdxZn
55
55
  konduktor/dashboard/frontend/tailwind.config.js,sha256=fCnc48wvioIDOe5ldQ_6RE7F76cP7aU7pDrxBPJx-Fk,366
56
56
  konduktor/data/__init__.py,sha256=KMR2i3E9YcIpiIuCxtRdS7BQ1w2vUAbbve7agziJrLo,213
57
57
  konduktor/data/aws/__init__.py,sha256=_6zWfNNAK1QGgyKqg_yPYWcXlnffchyvIMErYa6tw_U,331
58
- konduktor/data/aws/s3.py,sha256=lNgI02wacyXudyrIfXPKrscH4o153Wa_o5qpQR-jjLQ,48506
58
+ konduktor/data/aws/s3.py,sha256=vW79oNoCwKm97iyUQvDScf2i-bXZ6he55UU-kViFa7I,48580
59
59
  konduktor/data/constants.py,sha256=yXVEoTI2we1xOjVSU-bjRCQCLpVvpEvJ0GedXvSwEfw,127
60
60
  konduktor/data/data_utils.py,sha256=IG1jgb_La997wi90xCvxYYsHQRlmm8Aooq04ZSf8EDI,9670
61
61
  konduktor/data/gcp/__init__.py,sha256=rlQxACBC_Vu36mdgPyJgUy4mGc_6Nt_a96JAuaPz2pQ,489
62
62
  konduktor/data/gcp/constants.py,sha256=dMfOiFccM8O6rUi9kClJcbvw1K1VnS1JzzQk3apq8ho,1483
63
- konduktor/data/gcp/gcs.py,sha256=fFaQydgFj0zJbZrVCrOq7goXE6gT19i3f1NQpe_Hdq4,41888
63
+ konduktor/data/gcp/gcs.py,sha256=ZYHkupCewphSlVwQ5HDAvHG0scwYri9JkklvK9AwcPc,41962
64
64
  konduktor/data/gcp/utils.py,sha256=FJQcMXZqtMIzjZ98b3lTTc0UbdPUKTDLsOsfJaaH5-s,214
65
65
  konduktor/data/registry.py,sha256=CUbMsN_Q17Pf4wRHkqZrycErEjTP7cLEdgcfwVGcEpc,696
66
66
  konduktor/data/storage.py,sha256=o2So-bY9glvgbGdoN7AQNYmNnvGf1AUDPpImtadRL90,35213
@@ -74,9 +74,9 @@ konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-
74
74
  konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
75
75
  konduktor/resource.py,sha256=qQhMlI6gvTaoGfYb9NNgSrUavgNqfcYVfb9V_oC5pLE,20411
76
76
  konduktor/serving.py,sha256=sh8TPAUXg23Bkt0ByatIMdxFFqzRm18HJTEkt3wHzdo,5147
77
- konduktor/task.py,sha256=jrr-6mNWOB8I199N6OqAZIEmXoW17Xs1ZC9-y_N7P3w,37480
77
+ konduktor/task.py,sha256=97iLCo62qpN9wLGNPeFw64E8k1nch7AyySY3BUXHPWY,37496
78
78
  konduktor/templates/deployment.yaml.j2,sha256=uXFjDQaimbpFdAn2RJGaIvS_PzDY136cw_L3QMjz3ZA,3452
79
- konduktor/templates/jobset.yaml.j2,sha256=rdURknodtgLp4zoA2PX86Nn4wPpi3tr5l4IG55aWBRg,1059
79
+ konduktor/templates/jobset.yaml.j2,sha256=67yGuY4XdE4KBWN3DKvMJjlypQ0VpdiioRUAhpa3zA4,1072
80
80
  konduktor/templates/pod.yaml.j2,sha256=3uXx0ls2v8x-NL_Ypze5u9RoJS8F5bzoyOJcYwzf8Z0,18240
81
81
  konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
82
  konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
@@ -84,12 +84,12 @@ konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
84
  konduktor/utils/accelerator_registry.py,sha256=ythz3ynulP1DSSU7Jj5VUsQeBzSYRkxCVDZ5oOg0xtc,560
85
85
  konduktor/utils/annotations.py,sha256=oy2-BLydkFt3KWkXDuaGY84d6b7iISuy4eAT9uXk0Fc,2225
86
86
  konduktor/utils/base64_utils.py,sha256=mF-Tw98mFRG70YE4w6s9feuQSCYZHOb8YatBZwMugyI,3130
87
- konduktor/utils/common_utils.py,sha256=4yG5Kjvu1hu6x2nKNaaCUKQNrheUaG61Qe913MFPry8,15060
87
+ konduktor/utils/common_utils.py,sha256=8gBpzYiC1bQ8sbgHIFLkKCGT5nLs1afpejod60kVSos,15076
88
88
  konduktor/utils/constants.py,sha256=1DneiTR21lvKUcWdBGwC4I4fD4uPjbjLUilEnJS7rzA,216
89
89
  konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4xc,2258
90
90
  konduktor/utils/exceptions.py,sha256=5IFnN5bIUSBJv4KRRrCepk5jyY9EG5vWWQqbjCmP3NU,6682
91
91
  konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
92
- konduktor/utils/kubernetes_utils.py,sha256=VG7qatUFyWHY-PCQ8fYWh2kn2TMwfg84cn-VkXdCwI8,26077
92
+ konduktor/utils/kubernetes_utils.py,sha256=7RThCOiyaALRqbwHZ40qMnBsbAgt669k0NHkxtfx7Bs,26205
93
93
  konduktor/utils/log_utils.py,sha256=k4Qo0OlUZYQmLcbSD9tDWe6_Q5XcsLO_K8uVWjlTEU0,16938
94
94
  konduktor/utils/loki_utils.py,sha256=h2ZvZQr1nE_wXXsKsGMjhG2s2MXknNd4icydTR_ruKU,3539
95
95
  konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
@@ -97,8 +97,8 @@ konduktor/utils/schemas.py,sha256=tBrKhnkfn9uKDYdlb4L2KgooW-muuhww7U8fu9zX-ms,18
97
97
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
98
98
  konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
99
99
  konduktor/utils/validator.py,sha256=5C1kE57Eyj1OPnAbvojqMNHHtf5fnl47FK_vEttd8aw,4331
100
- konduktor_nightly-0.1.0.dev20250808105243.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
101
- konduktor_nightly-0.1.0.dev20250808105243.dist-info/METADATA,sha256=oECwmID0juizuwdOYpPMrX0n8Er1lPAaXZaMYmCA7-Y,4247
102
- konduktor_nightly-0.1.0.dev20250808105243.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
103
- konduktor_nightly-0.1.0.dev20250808105243.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
104
- konduktor_nightly-0.1.0.dev20250808105243.dist-info/RECORD,,
100
+ konduktor_nightly-0.1.0.dev20250809104842.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
101
+ konduktor_nightly-0.1.0.dev20250809104842.dist-info/METADATA,sha256=cOmfOuxpidR03qJMivceD6nkUWRwmNzm_D3OVKGF2q8,4247
102
+ konduktor_nightly-0.1.0.dev20250809104842.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
103
+ konduktor_nightly-0.1.0.dev20250809104842.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
104
+ konduktor_nightly-0.1.0.dev20250809104842.dist-info/RECORD,,