skypilot-nightly 1.0.0.dev20250729__py3-none-any.whl → 1.0.0.dev20250730__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (142) hide show
  1. sky/__init__.py +2 -2
  2. sky/client/cli/command.py +0 -7
  3. sky/client/common.py +12 -9
  4. sky/clouds/nebius.py +1 -1
  5. sky/clouds/utils/gcp_utils.py +1 -1
  6. sky/clouds/vast.py +1 -2
  7. sky/dashboard/out/404.html +1 -1
  8. sky/dashboard/out/_next/static/_r2LwCFLjlWjZDUIJQG_V/_buildManifest.js +1 -0
  9. sky/dashboard/out/_next/static/chunks/1043-928582d4860fef92.js +1 -0
  10. sky/dashboard/out/_next/static/chunks/1141-3f10a5a9f697c630.js +11 -0
  11. sky/dashboard/out/_next/static/chunks/1559-6c00e20454194859.js +30 -0
  12. sky/dashboard/out/_next/static/chunks/1664-22b00e32c9ff96a4.js +1 -0
  13. sky/dashboard/out/_next/static/chunks/1871-1df8b686a51f3e3a.js +6 -0
  14. sky/dashboard/out/_next/static/chunks/2003.f90b06bb1f914295.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/2350.fab69e61bac57b23.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/2369.fc20f0c2c8ed9fe7.js +15 -0
  17. sky/dashboard/out/_next/static/chunks/2641.142718b6b78a6f9b.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/3698-7874720877646365.js +1 -0
  19. sky/dashboard/out/_next/static/chunks/3785.95524bc443db8260.js +1 -0
  20. sky/dashboard/out/_next/static/chunks/3937.210053269f121201.js +1 -0
  21. sky/dashboard/out/_next/static/chunks/4725.42f21f250f91f65b.js +1 -0
  22. sky/dashboard/out/_next/static/chunks/4869.18e6a4361a380763.js +16 -0
  23. sky/dashboard/out/_next/static/chunks/4937.d6bf67771e353356.js +15 -0
  24. sky/dashboard/out/_next/static/chunks/5230-f3bb2663e442e86c.js +1 -0
  25. sky/dashboard/out/_next/static/chunks/5739-d67458fcb1386c92.js +8 -0
  26. sky/dashboard/out/_next/static/chunks/6135-d0e285ac5f3f2485.js +1 -0
  27. sky/dashboard/out/_next/static/chunks/616-3d59f75e2ccf9321.js +39 -0
  28. sky/dashboard/out/_next/static/chunks/6212-7bd06f60ba693125.js +13 -0
  29. sky/dashboard/out/_next/static/chunks/6601-234b1cf963c7280b.js +1 -0
  30. sky/dashboard/out/_next/static/chunks/691.6d99cbfba347cebf.js +55 -0
  31. sky/dashboard/out/_next/static/chunks/6989-983d3ae7a874de98.js +1 -0
  32. sky/dashboard/out/_next/static/chunks/6990-08b2a1cae076a943.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/7411-b15471acd2cba716.js +41 -0
  34. sky/dashboard/out/_next/static/chunks/8969-9a8cca241b30db83.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/9025.7937c16bc8623516.js +6 -0
  36. sky/dashboard/out/_next/static/chunks/938-40d15b6261ec8dc1.js +1 -0
  37. sky/dashboard/out/_next/static/chunks/9847.4c46c5e229c78704.js +30 -0
  38. sky/dashboard/out/_next/static/chunks/9984.78ee6d2c6fa4b0e8.js +1 -0
  39. sky/dashboard/out/_next/static/chunks/fd9d1056-86323a29a8f7e46a.js +1 -0
  40. sky/dashboard/out/_next/static/chunks/framework-cf60a09ccd051a10.js +33 -0
  41. sky/dashboard/out/_next/static/chunks/main-app-587214043926b3cc.js +1 -0
  42. sky/dashboard/out/_next/static/chunks/main-f15ccb73239a3bf1.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/_app-a67ae198457b9886.js +34 -0
  44. sky/dashboard/out/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-fa63e8b1d203f298.js +11 -0
  46. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-665fa5d96dd41d67.js +1 -0
  47. sky/dashboard/out/_next/static/chunks/pages/clusters-956ad430075efee8.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/pages/config-8620d099cbef8608.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/pages/index-444f1804401f04ea.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-9cfd875eecb6eaf5.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/pages/infra-0fbdc9072f19fbe2.js +1 -0
  52. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-b25c109d6e41bcf4.js +11 -0
  53. sky/dashboard/out/_next/static/chunks/pages/jobs-6393a9edc7322b54.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/pages/users-34d6bb10c3b3ee3d.js +1 -0
  55. sky/dashboard/out/_next/static/chunks/pages/volumes-225c8dae0634eb7f.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/pages/workspace/new-92f741084a89e27b.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-4d41c9023287f59a.js +1 -0
  58. sky/dashboard/out/_next/static/chunks/pages/workspaces-e4cb7e97d37e93ad.js +1 -0
  59. sky/dashboard/out/_next/static/chunks/webpack-5adfc4d4b3db6f71.js +1 -0
  60. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  61. sky/dashboard/out/clusters/[cluster].html +1 -1
  62. sky/dashboard/out/clusters.html +1 -1
  63. sky/dashboard/out/config.html +1 -1
  64. sky/dashboard/out/index.html +1 -1
  65. sky/dashboard/out/infra/[context].html +1 -1
  66. sky/dashboard/out/infra.html +1 -1
  67. sky/dashboard/out/jobs/[job].html +1 -1
  68. sky/dashboard/out/jobs.html +1 -1
  69. sky/dashboard/out/users.html +1 -1
  70. sky/dashboard/out/volumes.html +1 -1
  71. sky/dashboard/out/workspace/new.html +1 -1
  72. sky/dashboard/out/workspaces/[name].html +1 -1
  73. sky/dashboard/out/workspaces.html +1 -1
  74. sky/data/data_utils.py +25 -0
  75. sky/data/storage.py +1219 -1775
  76. sky/global_user_state.py +18 -8
  77. sky/jobs/state.py +35 -7
  78. sky/jobs/utils.py +35 -17
  79. sky/logs/aws.py +4 -2
  80. sky/provision/kubernetes/utils.py +6 -4
  81. sky/provision/vast/instance.py +2 -1
  82. sky/provision/vast/utils.py +9 -6
  83. sky/resources.py +8 -2
  84. sky/server/server.py +6 -1
  85. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/METADATA +1 -1
  86. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/RECORD +91 -90
  87. sky/dashboard/out/_next/static/Q2sVXboB_t7cgvntL-6nD/_buildManifest.js +0 -1
  88. sky/dashboard/out/_next/static/chunks/1043-869d9c78bf5dd3df.js +0 -1
  89. sky/dashboard/out/_next/static/chunks/1141-e49a159c30a6c4a7.js +0 -11
  90. sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +0 -30
  91. sky/dashboard/out/_next/static/chunks/1664-d65361e92b85e786.js +0 -1
  92. sky/dashboard/out/_next/static/chunks/1871-ea0e7283886407ca.js +0 -6
  93. sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +0 -1
  94. sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +0 -1
  95. sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +0 -15
  96. sky/dashboard/out/_next/static/chunks/2641.74c19c4d45a2c034.js +0 -1
  97. sky/dashboard/out/_next/static/chunks/3698-9fa11dafb5cad4a6.js +0 -1
  98. sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +0 -1
  99. sky/dashboard/out/_next/static/chunks/3937.d7f1c55d1916c7f2.js +0 -1
  100. sky/dashboard/out/_next/static/chunks/4725.66125dcd9832aa5d.js +0 -1
  101. sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +0 -16
  102. sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +0 -15
  103. sky/dashboard/out/_next/static/chunks/5230-df791914b54d91d9.js +0 -1
  104. sky/dashboard/out/_next/static/chunks/5739-5ea3ffa10fc884f2.js +0 -8
  105. sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +0 -1
  106. sky/dashboard/out/_next/static/chunks/616-162f3033ffcd3d31.js +0 -39
  107. sky/dashboard/out/_next/static/chunks/6601-d4a381403a8bae91.js +0 -1
  108. sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +0 -55
  109. sky/dashboard/out/_next/static/chunks/6989-eab0e9c16b64fd9f.js +0 -1
  110. sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +0 -1
  111. sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +0 -41
  112. sky/dashboard/out/_next/static/chunks/8969-8e0b2055bf5dd499.js +0 -1
  113. sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +0 -6
  114. sky/dashboard/out/_next/static/chunks/938-7ee806653aef0609.js +0 -1
  115. sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +0 -30
  116. sky/dashboard/out/_next/static/chunks/9984.0460de9d3adf5582.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/fd9d1056-61f2257a9cd8b32b.js +0 -1
  118. sky/dashboard/out/_next/static/chunks/framework-efc06c2733009cd3.js +0 -33
  119. sky/dashboard/out/_next/static/chunks/main-app-68c028b1bc5e1b72.js +0 -1
  120. sky/dashboard/out/_next/static/chunks/main-c0a4f1ea606d48d2.js +0 -1
  121. sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +0 -34
  122. sky/dashboard/out/_next/static/chunks/pages/_error-c72a1f77a3c0be1b.js +0 -1
  123. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-2186770cc2de1623.js +0 -11
  124. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-95afb019ab85801c.js +0 -6
  125. sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +0 -1
  126. sky/dashboard/out/_next/static/chunks/pages/config-a2673b256b6d416f.js +0 -1
  127. sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +0 -1
  128. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +0 -1
  129. sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +0 -1
  130. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-dc0299ffefebcdbe.js +0 -16
  131. sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +0 -1
  132. sky/dashboard/out/_next/static/chunks/pages/users-6790fcefd5487b13.js +0 -1
  133. sky/dashboard/out/_next/static/chunks/pages/volumes-61ea7ba7e56f8d06.js +0 -1
  134. sky/dashboard/out/_next/static/chunks/pages/workspace/new-5629d4e551dba1ee.js +0 -1
  135. sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +0 -1
  136. sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +0 -1
  137. sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +0 -1
  138. /sky/dashboard/out/_next/static/{Q2sVXboB_t7cgvntL-6nD → _r2LwCFLjlWjZDUIJQG_V}/_ssgManifest.js +0 -0
  139. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/WHEEL +0 -0
  140. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/entry_points.txt +0 -0
  141. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/licenses/LICENSE +0 -0
  142. {skypilot_nightly-1.0.0.dev20250729.dist-info → skypilot_nightly-1.0.0.dev20250730.dist-info}/top_level.txt +0 -0
sky/global_user_state.py CHANGED
@@ -11,6 +11,7 @@ import json
11
11
  import os
12
12
  import pickle
13
13
  import re
14
+ import threading
14
15
  import time
15
16
  import typing
16
17
  from typing import Any, Dict, List, Optional, Set, Tuple
@@ -47,6 +48,7 @@ _ENABLED_CLOUDS_KEY_PREFIX = 'enabled_clouds_'
47
48
  _ALLOWED_CLOUDS_KEY_PREFIX = 'allowed_clouds_'
48
49
 
49
50
  _SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
51
+ _SQLALCHEMY_ENGINE_LOCK = threading.Lock()
50
52
 
51
53
  Base = declarative.declarative_base()
52
54
 
@@ -241,21 +243,29 @@ def create_table(engine: sqlalchemy.engine.Engine):
241
243
  migration_utils.GLOBAL_USER_STATE_VERSION)
242
244
 
243
245
 
246
+ # We wrap the sqlalchemy engine initialization in a thread
247
+ # lock to ensure that multiple threads do not initialize the
248
+ # engine which could result in a rare race condition where
249
+ # a session has already been created with _SQLALCHEMY_ENGINE = e1,
250
+ # and then another thread overwrites _SQLALCHEMY_ENGINE = e2
251
+ # which could result in e1 being garbage collected unexpectedly.
244
252
  def initialize_and_get_db() -> sqlalchemy.engine.Engine:
245
253
  global _SQLALCHEMY_ENGINE
246
254
 
247
255
  if _SQLALCHEMY_ENGINE is not None:
248
256
  return _SQLALCHEMY_ENGINE
257
+ with _SQLALCHEMY_ENGINE_LOCK:
258
+ if _SQLALCHEMY_ENGINE is not None:
259
+ return _SQLALCHEMY_ENGINE
260
+ # get an engine to the db
261
+ engine = migration_utils.get_engine('state')
249
262
 
250
- # get an engine to the db
251
- engine = migration_utils.get_engine('state')
263
+ # run migrations if needed
264
+ create_table(engine)
252
265
 
253
- # run migrations if needed
254
- create_table(engine)
255
-
256
- # return engine
257
- _SQLALCHEMY_ENGINE = engine
258
- return _SQLALCHEMY_ENGINE
266
+ # return engine
267
+ _SQLALCHEMY_ENGINE = engine
268
+ return _SQLALCHEMY_ENGINE
259
269
 
260
270
 
261
271
  def _init_db(func):
sky/jobs/state.py CHANGED
@@ -4,6 +4,7 @@
4
4
  import enum
5
5
  import functools
6
6
  import json
7
+ import threading
7
8
  import time
8
9
  import typing
9
10
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
@@ -33,6 +34,7 @@ CallbackType = Callable[[str], None]
33
34
  logger = sky_logging.init_logger(__name__)
34
35
 
35
36
  _SQLALCHEMY_ENGINE: Optional[sqlalchemy.engine.Engine] = None
37
+ _SQLALCHEMY_ENGINE_LOCK = threading.Lock()
36
38
 
37
39
  Base = declarative.declarative_base()
38
40
 
@@ -131,21 +133,30 @@ def create_table(engine: sqlalchemy.engine.Engine):
131
133
  migration_utils.SPOT_JOBS_VERSION)
132
134
 
133
135
 
136
+ # We wrap the sqlalchemy engine initialization in a thread
137
+ # lock to ensure that multiple threads do not initialize the
138
+ # engine which could result in a rare race condition where
139
+ # a session has already been created with _SQLALCHEMY_ENGINE = e1,
140
+ # and then another thread overwrites _SQLALCHEMY_ENGINE = e2
141
+ # which could result in e1 being garbage collected unexpectedly.
134
142
  def initialize_and_get_db() -> sqlalchemy.engine.Engine:
135
143
  global _SQLALCHEMY_ENGINE
136
144
 
137
145
  if _SQLALCHEMY_ENGINE is not None:
138
146
  return _SQLALCHEMY_ENGINE
139
147
 
140
- # get an engine to the db
141
- engine = migration_utils.get_engine('spot_jobs')
148
+ with _SQLALCHEMY_ENGINE_LOCK:
149
+ if _SQLALCHEMY_ENGINE is not None:
150
+ return _SQLALCHEMY_ENGINE
151
+ # get an engine to the db
152
+ engine = migration_utils.get_engine('spot_jobs')
142
153
 
143
- # run migrations if needed
144
- create_table(engine)
154
+ # run migrations if needed
155
+ create_table(engine)
145
156
 
146
- # return engine
147
- _SQLALCHEMY_ENGINE = engine
148
- return _SQLALCHEMY_ENGINE
157
+ # return engine
158
+ _SQLALCHEMY_ENGINE = engine
159
+ return _SQLALCHEMY_ENGINE
149
160
 
150
161
 
151
162
  def _init_db(func):
@@ -1045,6 +1056,23 @@ def _get_all_task_ids_statuses(
1045
1056
  return [(row[0], ManagedJobStatus(row[1])) for row in id_statuses]
1046
1057
 
1047
1058
 
1059
+ @_init_db
1060
+ def get_all_task_ids_names_statuses_logs(
1061
+ job_id: int) -> List[Tuple[int, str, ManagedJobStatus, str]]:
1062
+ assert _SQLALCHEMY_ENGINE is not None
1063
+ with orm.Session(_SQLALCHEMY_ENGINE) as session:
1064
+ id_names = session.execute(
1065
+ sqlalchemy.select(
1066
+ spot_table.c.task_id,
1067
+ spot_table.c.task_name,
1068
+ spot_table.c.status,
1069
+ spot_table.c.local_log_file,
1070
+ ).where(spot_table.c.spot_job_id == job_id).order_by(
1071
+ spot_table.c.task_id.asc())).fetchall()
1072
+ return [(row[0], row[1], ManagedJobStatus(row[2]), row[3])
1073
+ for row in id_names]
1074
+
1075
+
1048
1076
  @_init_db
1049
1077
  def get_job_status_with_task_id(job_id: int,
1050
1078
  task_id: int) -> Optional[ManagedJobStatus]:
sky/jobs/utils.py CHANGED
@@ -716,23 +716,41 @@ def stream_logs_by_id(job_id: int,
716
716
  if managed_job_status.is_failed():
717
717
  job_msg = ('\nFailure reason: '
718
718
  f'{managed_job_state.get_failure_reason(job_id)}')
719
- log_file = managed_job_state.get_local_log_file(job_id, None)
720
- if log_file is not None:
721
- with open(os.path.expanduser(log_file), 'r',
722
- encoding='utf-8') as f:
723
- # Stream the logs to the console without reading the whole
724
- # file into memory.
725
- start_streaming = False
726
- read_from: Union[TextIO, Deque[str]] = f
727
- if tail is not None:
728
- assert tail > 0
729
- # Read only the last 'tail' lines using deque
730
- read_from = collections.deque(f, maxlen=tail)
731
- for line in read_from:
732
- if log_lib.LOG_FILE_START_STREAMING_AT in line:
733
- start_streaming = True
734
- if start_streaming:
735
- print(line, end='', flush=True)
719
+ log_file_exists = False
720
+ task_info = managed_job_state.get_all_task_ids_names_statuses_logs(
721
+ job_id)
722
+ num_tasks = len(task_info)
723
+ for task_id, task_name, task_status, log_file in task_info:
724
+ if log_file:
725
+ log_file_exists = True
726
+ task_str = (f'Task {task_name}({task_id})'
727
+ if task_name else f'Task {task_id}')
728
+ if num_tasks > 1:
729
+ print(f'=== {task_str} ===')
730
+ with open(os.path.expanduser(log_file),
731
+ 'r',
732
+ encoding='utf-8') as f:
733
+ # Stream the logs to the console without reading the
734
+ # whole file into memory.
735
+ start_streaming = False
736
+ read_from: Union[TextIO, Deque[str]] = f
737
+ if tail is not None:
738
+ assert tail > 0
739
+ # Read only the last 'tail' lines using deque
740
+ read_from = collections.deque(f, maxlen=tail)
741
+ for line in read_from:
742
+ if log_lib.LOG_FILE_START_STREAMING_AT in line:
743
+ start_streaming = True
744
+ if start_streaming:
745
+ print(line, end='', flush=True)
746
+ if num_tasks > 1:
747
+ # Add the "Task finished" message for terminal states
748
+ if task_status.is_terminal():
749
+ print(ux_utils.finishing_message(
750
+ f'{task_str} finished '
751
+ f'(status: {task_status.value}).'),
752
+ flush=True)
753
+ if log_file_exists:
736
754
  # Add the "Job finished" message for terminal states
737
755
  if managed_job_status.is_terminal():
738
756
  print(ux_utils.finishing_message(
sky/logs/aws.py CHANGED
@@ -9,6 +9,8 @@ from sky.skylet import constants
9
9
  from sky.utils import common_utils
10
10
  from sky.utils import resources_utils
11
11
 
12
+ EC2_MD_URL = '"${AWS_EC2_METADATA_SERVICE_ENDPOINT:-http://169.254.169.254/}"'
13
+
12
14
 
13
15
  class _CloudwatchLoggingConfig(pydantic.BaseModel):
14
16
  """Configuration for AWS CloudWatch logging agent."""
@@ -109,8 +111,8 @@ class CloudwatchLoggingAgent(FluentbitAgent):
109
111
  # Check if we're running on EC2 with an IAM role or if
110
112
  # AWS credentials are available in the environment
111
113
  pre_cmd = (
112
- 'if ! curl -s -m 1 http://169.254.169.254'
113
- '/latest/meta-data/iam/security-credentials/ > /dev/null; '
114
+ f'if ! curl -s -m 1 {EC2_MD_URL}'
115
+ 'latest/meta-data/iam/security-credentials/ > /dev/null; '
114
116
  'then '
115
117
  # failed EC2 check, look for env vars
116
118
  'if [ -z "$AWS_ACCESS_KEY_ID" ] || '
@@ -3179,10 +3179,12 @@ def get_skypilot_pods(context: Optional[str] = None) -> List[Any]:
3179
3179
  return pods
3180
3180
 
3181
3181
 
3182
- def is_tpu_on_gke(accelerator: str) -> bool:
3182
+ def is_tpu_on_gke(accelerator: str, normalize: bool = True) -> bool:
3183
3183
  """Determines if the given accelerator is a TPU supported on GKE."""
3184
- normalized, _ = normalize_tpu_accelerator_name(accelerator)
3185
- return normalized in GKE_TPU_ACCELERATOR_TO_GENERATION
3184
+ if normalize:
3185
+ normalized, _ = normalize_tpu_accelerator_name(accelerator)
3186
+ return normalized in GKE_TPU_ACCELERATOR_TO_GENERATION
3187
+ return accelerator in GKE_TPU_ACCELERATOR_TO_GENERATION
3186
3188
 
3187
3189
 
3188
3190
  def get_node_accelerator_count(context: Optional[str],
@@ -3384,7 +3386,7 @@ def process_skypilot_pods(
3384
3386
 
3385
3387
  def _gpu_resource_key_helper(context: Optional[str]) -> str:
3386
3388
  """Helper function to get the GPU resource key."""
3387
- gpu_resource_key = SUPPORTED_GPU_RESOURCE_KEYS['amd']
3389
+ gpu_resource_key = SUPPORTED_GPU_RESOURCE_KEYS['nvidia']
3388
3390
  try:
3389
3391
  nodes = kubernetes.core_api(context).list_node().items
3390
3392
  for gpu_key in SUPPORTED_GPU_RESOURCE_KEYS.values():
@@ -97,7 +97,8 @@ def run_instances(region: str, cluster_name_on_cloud: str,
97
97
  region=region,
98
98
  disk_size=config.node_config['DiskSize'],
99
99
  preemptible=config.node_config['Preemptible'],
100
- image_name=config.node_config['ImageId'])
100
+ image_name=config.node_config['ImageId'],
101
+ ports=config.ports_to_open_on_launch)
101
102
  except Exception as e: # pylint: disable=broad-except
102
103
  logger.warning(f'run_instances error: {e}')
103
104
  raise
@@ -5,7 +5,7 @@
5
5
  # python sdk.
6
6
  #
7
7
  """Vast library wrapper for SkyPilot."""
8
- from typing import Any, Dict, List
8
+ from typing import Any, Dict, List, Optional
9
9
 
10
10
  from sky import sky_logging
11
11
  from sky.adaptors import vast
@@ -34,7 +34,8 @@ def list_instances() -> Dict[str, Dict[str, Any]]:
34
34
 
35
35
 
36
36
  def launch(name: str, instance_type: str, region: str, disk_size: int,
37
- image_name: str, preemptible: bool) -> str:
37
+ image_name: str, ports: Optional[List[int]],
38
+ preemptible: bool) -> str:
38
39
  """Launches an instance with the given parameters.
39
40
 
40
41
  Converts the instance_type to the Vast GPU name, finds the specs for the
@@ -58,6 +59,8 @@ def launch(name: str, instance_type: str, region: str, disk_size: int,
58
59
  The disk size {xx} GB is not exactly matched the requested
59
60
  size {yy} GB. It is possible to charge extra cost on disk.
60
61
 
62
+ * `ports`: This is a feature flag to expose ports to the internet.
63
+
61
64
  * `geolocation`: Geolocation on Vast can be as specific as the
62
65
  host chooses to be. They can say, for instance, "Yutakachō,
63
66
  Shinagawa District, Tokyo, JP." Such a specific geolocation
@@ -79,9 +82,7 @@ def launch(name: str, instance_type: str, region: str, disk_size: int,
79
82
 
80
83
  * Vast instance types are an invention for skypilot. Refer to
81
84
  catalog/vast_catalog.py for the current construction
82
- of the type.
83
-
84
- """
85
+ of the type."""
85
86
  cpu_ram = float(instance_type.split('-')[-1]) / 1024
86
87
  gpu_name = instance_type.split('-')[1].replace('_', ' ')
87
88
  num_gpus = int(instance_type.split('-')[0].replace('x', ''))
@@ -104,11 +105,13 @@ def launch(name: str, instance_type: str, region: str, disk_size: int,
104
105
 
105
106
  instance_touse = instance_list[0]
106
107
 
108
+ port_map = ' '.join([f'-p {p}:{p}' for p in ports]) if ports else ''
109
+
107
110
  launch_params = {
108
111
  'id': instance_touse['id'],
109
112
  'direct': True,
110
113
  'ssh': True,
111
- 'env': '-e __SOURCE=skypilot',
114
+ 'env': f'-e __SOURCE=skypilot {port_map}',
112
115
  'onstart_cmd': ';'.join([
113
116
  'touch ~/.no_auto_tmux',
114
117
  f'echo "{vast.vast().api_key_access}" > ~/.vast_api_key',
sky/resources.py CHANGED
@@ -797,8 +797,13 @@ class Resources:
797
797
 
798
798
  acc, _ = list(accelerators.items())[0]
799
799
  if 'tpu' in acc.lower():
800
+ # TODO(syang): GCP TPU names are supported on both GCP and
801
+ # kubernetes (GKE), but this logic automatically assumes
802
+ # GCP TPUs can only be used on GCP.
803
+ # Fix the logic such that GCP TPU names can failover between
804
+ # GCP and kubernetes.
800
805
  if self.cloud is None:
801
- if kubernetes_utils.is_tpu_on_gke(acc):
806
+ if kubernetes_utils.is_tpu_on_gke(acc, normalize=False):
802
807
  self._cloud = clouds.Kubernetes()
803
808
  else:
804
809
  self._cloud = clouds.GCP()
@@ -813,7 +818,8 @@ class Resources:
813
818
 
814
819
  use_tpu_vm = accelerator_args.get('tpu_vm', True)
815
820
  if (self.cloud.is_same_cloud(clouds.GCP()) and
816
- not kubernetes_utils.is_tpu_on_gke(acc)):
821
+ not kubernetes_utils.is_tpu_on_gke(acc,
822
+ normalize=False)):
817
823
  if 'runtime_version' not in accelerator_args:
818
824
 
819
825
  def _get_default_runtime_version() -> str:
sky/server/server.py CHANGED
@@ -882,10 +882,15 @@ async def upload_zip_file(request: fastapi.Request, user_hash: str,
882
882
  upload_ids_to_cleanup[(upload_id,
883
883
  user_hash)] = (datetime.datetime.now() +
884
884
  _DEFAULT_UPLOAD_EXPIRATION_TIME)
885
+ # For anonymous access, use the user hash from client
886
+ user_id = user_hash
887
+ if request.state.auth_user is not None:
888
+ # Otherwise, the authenticated identity should be used.
889
+ user_id = request.state.auth_user.id
885
890
 
886
891
  # TODO(SKY-1271): We need to double check security of uploading zip file.
887
892
  client_file_mounts_dir = (
888
- common.API_SERVER_CLIENT_DIR.expanduser().resolve() / user_hash /
893
+ common.API_SERVER_CLIENT_DIR.expanduser().resolve() / user_id /
889
894
  'file_mounts')
890
895
  client_file_mounts_dir.mkdir(parents=True, exist_ok=True)
891
896
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skypilot-nightly
3
- Version: 1.0.0.dev20250729
3
+ Version: 1.0.0.dev20250730
4
4
  Summary: SkyPilot: Run AI on Any Infra — Unified, Faster, Cheaper.
5
5
  Author: SkyPilot Team
6
6
  License: Apache 2.0