skypilot-nightly 1.0.0.dev20251009__py3-none-any.whl → 1.0.0.dev20251107__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (231) hide show
  1. sky/__init__.py +6 -2
  2. sky/adaptors/aws.py +25 -7
  3. sky/adaptors/coreweave.py +278 -0
  4. sky/adaptors/kubernetes.py +64 -0
  5. sky/adaptors/shadeform.py +89 -0
  6. sky/admin_policy.py +20 -0
  7. sky/authentication.py +59 -149
  8. sky/backends/backend_utils.py +104 -63
  9. sky/backends/cloud_vm_ray_backend.py +84 -39
  10. sky/catalog/data_fetchers/fetch_runpod.py +698 -0
  11. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  12. sky/catalog/kubernetes_catalog.py +24 -28
  13. sky/catalog/runpod_catalog.py +5 -1
  14. sky/catalog/shadeform_catalog.py +165 -0
  15. sky/check.py +25 -13
  16. sky/client/cli/command.py +335 -86
  17. sky/client/cli/flags.py +4 -2
  18. sky/client/cli/table_utils.py +17 -9
  19. sky/client/sdk.py +59 -12
  20. sky/cloud_stores.py +73 -0
  21. sky/clouds/__init__.py +2 -0
  22. sky/clouds/aws.py +71 -16
  23. sky/clouds/azure.py +12 -5
  24. sky/clouds/cloud.py +19 -9
  25. sky/clouds/cudo.py +12 -5
  26. sky/clouds/do.py +4 -1
  27. sky/clouds/fluidstack.py +12 -5
  28. sky/clouds/gcp.py +12 -5
  29. sky/clouds/hyperbolic.py +12 -5
  30. sky/clouds/ibm.py +12 -5
  31. sky/clouds/kubernetes.py +62 -25
  32. sky/clouds/lambda_cloud.py +12 -5
  33. sky/clouds/nebius.py +12 -5
  34. sky/clouds/oci.py +12 -5
  35. sky/clouds/paperspace.py +4 -1
  36. sky/clouds/primeintellect.py +4 -1
  37. sky/clouds/runpod.py +12 -5
  38. sky/clouds/scp.py +12 -5
  39. sky/clouds/seeweb.py +4 -1
  40. sky/clouds/shadeform.py +400 -0
  41. sky/clouds/ssh.py +4 -2
  42. sky/clouds/vast.py +12 -5
  43. sky/clouds/vsphere.py +4 -1
  44. sky/core.py +12 -11
  45. sky/dashboard/out/404.html +1 -1
  46. sky/dashboard/out/_next/static/chunks/1141-e6aa9ab418717c59.js +11 -0
  47. sky/dashboard/out/_next/static/chunks/{1871-49141c317f3a9020.js → 1871-74503c8e80fd253b.js} +1 -1
  48. sky/dashboard/out/_next/static/chunks/2260-7703229c33c5ebd5.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/2755.fff53c4a3fcae910.js +26 -0
  50. sky/dashboard/out/_next/static/chunks/3294.72362fa129305b19.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/{3785.a19328ba41517b8b.js → 3785.ad6adaa2a0fa9768.js} +1 -1
  52. sky/dashboard/out/_next/static/chunks/{4725.10f7a9a5d3ea8208.js → 4725.a830b5c9e7867c92.js} +1 -1
  53. sky/dashboard/out/_next/static/chunks/6856-ef8ba11f96d8c4a3.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/6990-32b6e2d3822301fa.js +1 -0
  55. sky/dashboard/out/_next/static/chunks/7615-3301e838e5f25772.js +1 -0
  56. sky/dashboard/out/_next/static/chunks/8969-1e4613c651bf4051.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/9025.fa408f3242e9028d.js +6 -0
  58. sky/dashboard/out/_next/static/chunks/9353-cff34f7e773b2e2b.js +1 -0
  59. sky/dashboard/out/_next/static/chunks/9360.7310982cf5a0dc79.js +31 -0
  60. sky/dashboard/out/_next/static/chunks/pages/{_app-ce361c6959bc2001.js → _app-bde01e4a2beec258.js} +1 -1
  61. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-c736ead69c2d86ec.js +16 -0
  62. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-477555ab7c0b13d8.js → [cluster]-a37d2063af475a1c.js} +1 -1
  63. sky/dashboard/out/_next/static/chunks/pages/{clusters-2f61f65487f6d8ff.js → clusters-d44859594e6f8064.js} +1 -1
  64. sky/dashboard/out/_next/static/chunks/pages/infra/{[context]-553b8b5cb65e100b.js → [context]-c0b5935149902e6f.js} +1 -1
  65. sky/dashboard/out/_next/static/chunks/pages/{infra-910a22500c50596f.js → infra-aed0ea19df7cf961.js} +1 -1
  66. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-5796e8d6aea291a0.js +16 -0
  67. sky/dashboard/out/_next/static/chunks/pages/jobs/pools/{[pool]-bc979970c247d8f3.js → [pool]-6edeb7d06032adfc.js} +2 -2
  68. sky/dashboard/out/_next/static/chunks/pages/{jobs-a35a9dc3c5ccd657.js → jobs-479dde13399cf270.js} +1 -1
  69. sky/dashboard/out/_next/static/chunks/pages/{users-98d2ed979084162a.js → users-5ab3b907622cf0fe.js} +1 -1
  70. sky/dashboard/out/_next/static/chunks/pages/{volumes-835d14ba94808f79.js → volumes-b84b948ff357c43e.js} +1 -1
  71. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-e8688c35c06f0ac5.js → [name]-c5a3eeee1c218af1.js} +1 -1
  72. sky/dashboard/out/_next/static/chunks/pages/{workspaces-69c80d677d3c2949.js → workspaces-22b23febb3e89ce1.js} +1 -1
  73. sky/dashboard/out/_next/static/chunks/webpack-2679be77fc08a2f8.js +1 -0
  74. sky/dashboard/out/_next/static/css/0748ce22df867032.css +3 -0
  75. sky/dashboard/out/_next/static/zB0ed6ge_W1MDszVHhijS/_buildManifest.js +1 -0
  76. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  77. sky/dashboard/out/clusters/[cluster].html +1 -1
  78. sky/dashboard/out/clusters.html +1 -1
  79. sky/dashboard/out/config.html +1 -1
  80. sky/dashboard/out/index.html +1 -1
  81. sky/dashboard/out/infra/[context].html +1 -1
  82. sky/dashboard/out/infra.html +1 -1
  83. sky/dashboard/out/jobs/[job].html +1 -1
  84. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  85. sky/dashboard/out/jobs.html +1 -1
  86. sky/dashboard/out/users.html +1 -1
  87. sky/dashboard/out/volumes.html +1 -1
  88. sky/dashboard/out/workspace/new.html +1 -1
  89. sky/dashboard/out/workspaces/[name].html +1 -1
  90. sky/dashboard/out/workspaces.html +1 -1
  91. sky/data/data_utils.py +92 -1
  92. sky/data/mounting_utils.py +143 -19
  93. sky/data/storage.py +168 -11
  94. sky/exceptions.py +13 -1
  95. sky/execution.py +13 -0
  96. sky/global_user_state.py +189 -113
  97. sky/jobs/client/sdk.py +32 -10
  98. sky/jobs/client/sdk_async.py +9 -3
  99. sky/jobs/constants.py +3 -1
  100. sky/jobs/controller.py +164 -192
  101. sky/jobs/file_content_utils.py +80 -0
  102. sky/jobs/log_gc.py +201 -0
  103. sky/jobs/recovery_strategy.py +59 -82
  104. sky/jobs/scheduler.py +20 -9
  105. sky/jobs/server/core.py +105 -23
  106. sky/jobs/server/server.py +40 -28
  107. sky/jobs/server/utils.py +32 -11
  108. sky/jobs/state.py +588 -110
  109. sky/jobs/utils.py +442 -209
  110. sky/logs/agent.py +1 -1
  111. sky/metrics/utils.py +45 -6
  112. sky/optimizer.py +1 -1
  113. sky/provision/__init__.py +7 -0
  114. sky/provision/aws/instance.py +2 -1
  115. sky/provision/azure/instance.py +2 -1
  116. sky/provision/common.py +2 -0
  117. sky/provision/cudo/instance.py +2 -1
  118. sky/provision/do/instance.py +2 -1
  119. sky/provision/fluidstack/instance.py +4 -3
  120. sky/provision/gcp/instance.py +2 -1
  121. sky/provision/hyperbolic/instance.py +2 -1
  122. sky/provision/instance_setup.py +10 -2
  123. sky/provision/kubernetes/constants.py +0 -1
  124. sky/provision/kubernetes/instance.py +222 -89
  125. sky/provision/kubernetes/network.py +12 -8
  126. sky/provision/kubernetes/utils.py +114 -53
  127. sky/provision/kubernetes/volume.py +5 -4
  128. sky/provision/lambda_cloud/instance.py +2 -1
  129. sky/provision/nebius/instance.py +2 -1
  130. sky/provision/oci/instance.py +2 -1
  131. sky/provision/paperspace/instance.py +2 -1
  132. sky/provision/provisioner.py +11 -2
  133. sky/provision/runpod/instance.py +2 -1
  134. sky/provision/scp/instance.py +2 -1
  135. sky/provision/seeweb/instance.py +3 -3
  136. sky/provision/shadeform/__init__.py +11 -0
  137. sky/provision/shadeform/config.py +12 -0
  138. sky/provision/shadeform/instance.py +351 -0
  139. sky/provision/shadeform/shadeform_utils.py +83 -0
  140. sky/provision/vast/instance.py +2 -1
  141. sky/provision/vsphere/instance.py +2 -1
  142. sky/resources.py +1 -1
  143. sky/schemas/api/responses.py +9 -5
  144. sky/schemas/db/skypilot_config/001_initial_schema.py +30 -0
  145. sky/schemas/db/spot_jobs/004_job_file_contents.py +42 -0
  146. sky/schemas/db/spot_jobs/005_logs_gc.py +38 -0
  147. sky/schemas/generated/jobsv1_pb2.py +52 -52
  148. sky/schemas/generated/jobsv1_pb2.pyi +4 -2
  149. sky/schemas/generated/managed_jobsv1_pb2.py +39 -35
  150. sky/schemas/generated/managed_jobsv1_pb2.pyi +21 -5
  151. sky/serve/client/impl.py +11 -3
  152. sky/serve/replica_managers.py +5 -2
  153. sky/serve/serve_utils.py +9 -2
  154. sky/serve/server/impl.py +7 -2
  155. sky/serve/server/server.py +18 -15
  156. sky/serve/service.py +2 -2
  157. sky/server/auth/oauth2_proxy.py +2 -5
  158. sky/server/common.py +31 -28
  159. sky/server/constants.py +5 -1
  160. sky/server/daemons.py +27 -19
  161. sky/server/requests/executor.py +138 -74
  162. sky/server/requests/payloads.py +9 -1
  163. sky/server/requests/preconditions.py +13 -10
  164. sky/server/requests/request_names.py +120 -0
  165. sky/server/requests/requests.py +485 -153
  166. sky/server/requests/serializers/decoders.py +26 -13
  167. sky/server/requests/serializers/encoders.py +56 -11
  168. sky/server/requests/threads.py +106 -0
  169. sky/server/rest.py +70 -18
  170. sky/server/server.py +283 -104
  171. sky/server/stream_utils.py +233 -59
  172. sky/server/uvicorn.py +18 -17
  173. sky/setup_files/alembic.ini +4 -0
  174. sky/setup_files/dependencies.py +32 -13
  175. sky/sky_logging.py +0 -2
  176. sky/skylet/constants.py +30 -7
  177. sky/skylet/events.py +7 -0
  178. sky/skylet/log_lib.py +8 -2
  179. sky/skylet/log_lib.pyi +1 -1
  180. sky/skylet/services.py +26 -13
  181. sky/skylet/subprocess_daemon.py +103 -29
  182. sky/skypilot_config.py +87 -75
  183. sky/ssh_node_pools/server.py +9 -8
  184. sky/task.py +67 -54
  185. sky/templates/kubernetes-ray.yml.j2 +8 -1
  186. sky/templates/nebius-ray.yml.j2 +1 -0
  187. sky/templates/shadeform-ray.yml.j2 +72 -0
  188. sky/templates/websocket_proxy.py +142 -12
  189. sky/users/permission.py +8 -1
  190. sky/utils/admin_policy_utils.py +16 -3
  191. sky/utils/asyncio_utils.py +78 -0
  192. sky/utils/auth_utils.py +153 -0
  193. sky/utils/cli_utils/status_utils.py +8 -2
  194. sky/utils/command_runner.py +11 -0
  195. sky/utils/common.py +3 -1
  196. sky/utils/common_utils.py +7 -4
  197. sky/utils/context.py +57 -51
  198. sky/utils/context_utils.py +30 -12
  199. sky/utils/controller_utils.py +35 -8
  200. sky/utils/db/db_utils.py +37 -10
  201. sky/utils/db/migration_utils.py +8 -4
  202. sky/utils/locks.py +24 -6
  203. sky/utils/resource_checker.py +4 -1
  204. sky/utils/resources_utils.py +53 -29
  205. sky/utils/schemas.py +23 -4
  206. sky/utils/subprocess_utils.py +17 -4
  207. sky/volumes/server/server.py +7 -6
  208. sky/workspaces/server.py +13 -12
  209. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/METADATA +306 -55
  210. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/RECORD +215 -195
  211. sky/dashboard/out/_next/static/chunks/1121-d0782b9251f0fcd3.js +0 -1
  212. sky/dashboard/out/_next/static/chunks/1141-3b40c39626f99c89.js +0 -11
  213. sky/dashboard/out/_next/static/chunks/2755.97300e1362fe7c98.js +0 -26
  214. sky/dashboard/out/_next/static/chunks/3015-8d748834fcc60b46.js +0 -1
  215. sky/dashboard/out/_next/static/chunks/3294.1fafbf42b3bcebff.js +0 -1
  216. sky/dashboard/out/_next/static/chunks/6135-4b4d5e824b7f9d3c.js +0 -1
  217. sky/dashboard/out/_next/static/chunks/6856-5fdc9b851a18acdb.js +0 -1
  218. sky/dashboard/out/_next/static/chunks/6990-f6818c84ed8f1c86.js +0 -1
  219. sky/dashboard/out/_next/static/chunks/8969-66237729cdf9749e.js +0 -1
  220. sky/dashboard/out/_next/static/chunks/9025.c12318fb6a1a9093.js +0 -6
  221. sky/dashboard/out/_next/static/chunks/9360.71e83b2ddc844ec2.js +0 -31
  222. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-8f058b0346db2aff.js +0 -16
  223. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-4f7079dcab6ed653.js +0 -16
  224. sky/dashboard/out/_next/static/chunks/webpack-6a5ddd0184bfa22c.js +0 -1
  225. sky/dashboard/out/_next/static/css/4614e06482d7309e.css +0 -3
  226. sky/dashboard/out/_next/static/hIViZcQBkn0HE8SpaSsUU/_buildManifest.js +0 -1
  227. /sky/dashboard/out/_next/static/{hIViZcQBkn0HE8SpaSsUU → zB0ed6ge_W1MDszVHhijS}/_ssgManifest.js +0 -0
  228. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/WHEEL +0 -0
  229. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/entry_points.txt +0 -0
  230. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/licenses/LICENSE +0 -0
  231. {skypilot_nightly-1.0.0.dev20251009.dist-info → skypilot_nightly-1.0.0.dev20251107.dist-info}/top_level.txt +0 -0
sky/task.py CHANGED
@@ -1,6 +1,5 @@
1
1
  """Task: a coarse-grained stage in an application."""
2
2
  import collections
3
- import inspect
4
3
  import json
5
4
  import os
6
5
  import re
@@ -29,10 +28,6 @@ from sky.utils import yaml_utils
29
28
 
30
29
  logger = sky_logging.init_logger(__name__)
31
30
 
32
- # A lambda generating commands (node rank_i, node addrs -> cmd_i).
33
- CommandGen = Callable[[int, List[str]], Optional[str]]
34
- CommandOrCommandGen = Union[str, CommandGen]
35
-
36
31
  _VALID_NAME_REGEX = '[a-zA-Z0-9]+(?:[._-]{1,2}[a-zA-Z0-9]+)*'
37
32
  _VALID_NAME_DESCR = ('ASCII characters and may contain lowercase and'
38
33
  ' uppercase letters, digits, underscores, periods,'
@@ -236,7 +231,7 @@ class Task:
236
231
  name: Optional[str] = None,
237
232
  *,
238
233
  setup: Optional[Union[str, List[str]]] = None,
239
- run: Optional[Union[CommandOrCommandGen, List[str]]] = None,
234
+ run: Optional[Union[str, List[str]]] = None,
240
235
  envs: Optional[Dict[str, str]] = None,
241
236
  secrets: Optional[Dict[str, str]] = None,
242
237
  workdir: Optional[Union[str, Dict[str, Any]]] = None,
@@ -349,7 +344,7 @@ class Task:
349
344
  self._volumes = volumes or {}
350
345
 
351
346
  # concatenate commands if given as list
352
- def _concat(commands):
347
+ def _concat(commands: Optional[Union[str, List[str]]]) -> Optional[str]:
353
348
  if isinstance(commands, list):
354
349
  return '\n'.join(commands)
355
350
  return commands
@@ -447,42 +442,9 @@ class Task:
447
442
 
448
443
  def validate_run(self):
449
444
  """Validates if the run command is valid."""
450
- if callable(self.run):
451
- run_sig = inspect.signature(self.run)
452
- # Check that run is a function with 2 arguments.
453
- if len(run_sig.parameters) != 2:
454
- with ux_utils.print_exception_no_traceback():
455
- raise ValueError(_RUN_FN_CHECK_FAIL_MSG.format(run_sig))
456
-
457
- type_list = [int, List[str]]
458
- # Check annotations, if exists
459
- for i, param in enumerate(run_sig.parameters.values()):
460
- if param.annotation != inspect.Parameter.empty:
461
- if param.annotation != type_list[i]:
462
- with ux_utils.print_exception_no_traceback():
463
- raise ValueError(
464
- _RUN_FN_CHECK_FAIL_MSG.format(run_sig))
465
-
466
- # Check self containedness.
467
- run_closure = inspect.getclosurevars(self.run)
468
- if run_closure.nonlocals:
469
- with ux_utils.print_exception_no_traceback():
470
- raise ValueError(
471
- 'run command generator must be self contained. '
472
- f'Found nonlocals: {run_closure.nonlocals}')
473
- if run_closure.globals:
474
- with ux_utils.print_exception_no_traceback():
475
- raise ValueError(
476
- 'run command generator must be self contained. '
477
- f'Found globals: {run_closure.globals}')
478
- if run_closure.unbound:
479
- # Do not raise an error here. Import statements, which are
480
- # allowed, will be considered as unbounded.
481
- pass
482
- elif self.run is not None and not isinstance(self.run, str):
445
+ if self.run is not None and not isinstance(self.run, str):
483
446
  with ux_utils.print_exception_no_traceback():
484
- raise ValueError('run must be either a shell script (str) or '
485
- f'a command generator ({CommandGen}). '
447
+ raise ValueError('run must be a shell script (str). '
486
448
  f'Got {type(self.run)}')
487
449
 
488
450
  def expand_and_validate_file_mounts(self):
@@ -1130,7 +1092,7 @@ class Task:
1130
1092
  def set_resources(
1131
1093
  self, resources: Union['resources_lib.Resources',
1132
1094
  List['resources_lib.Resources'],
1133
- Set['resources_lib.Resources']]
1095
+ Set['resources_lib.Resources'], Dict[str, Any]]
1134
1096
  ) -> 'Task':
1135
1097
  """Sets the required resources to execute this task.
1136
1098
 
@@ -1144,7 +1106,9 @@ class Task:
1144
1106
  Returns:
1145
1107
  self: The current task, with resources set.
1146
1108
  """
1147
- if isinstance(resources, resources_lib.Resources):
1109
+ if isinstance(resources, dict):
1110
+ resources = resources_lib.Resources.from_yaml_config(resources)
1111
+ elif isinstance(resources, resources_lib.Resources):
1148
1112
  resources = {resources}
1149
1113
  # TODO(woosuk): Check if the resources are None.
1150
1114
  self.resources = _with_docker_login_config(resources, self.envs,
@@ -1172,6 +1136,10 @@ class Task:
1172
1136
  self.set_resources(type(self.resources)(new_resources_list))
1173
1137
  return self
1174
1138
 
1139
+ def get_resource_config(self) -> Dict[str, Any]:
1140
+ return _resources_to_config(self.resources,
1141
+ factor_out_common_fields=True)
1142
+
1175
1143
  @property
1176
1144
  def service(self) -> Optional[service_spec.SkyServiceSpec]:
1177
1145
  return self._service
@@ -1552,6 +1520,16 @@ class Task:
1552
1520
  self.update_file_mounts({
1553
1521
  mnt_path: blob_path,
1554
1522
  })
1523
+ elif store_type is storage_lib.StoreType.COREWEAVE:
1524
+ if storage.source is not None and not isinstance(
1525
+ storage.source,
1526
+ list) and storage.source.startswith('cw://'):
1527
+ blob_path = storage.source
1528
+ else:
1529
+ blob_path = 'cw://' + storage.name
1530
+ self.update_file_mounts({
1531
+ mnt_path: blob_path,
1532
+ })
1555
1533
  else:
1556
1534
  with ux_utils.print_exception_no_traceback():
1557
1535
  raise ValueError(f'Storage Type {store_type} '
@@ -1688,16 +1666,7 @@ class Task:
1688
1666
 
1689
1667
  add_if_not_none('name', self.name)
1690
1668
 
1691
- tmp_resource_config: Union[Dict[str, Union[str, int]],
1692
- Dict[str, List[Dict[str, Union[str, int]]]]]
1693
- if len(self.resources) > 1:
1694
- resource_list = []
1695
- for r in self.resources:
1696
- resource_list.append(r.to_yaml_config())
1697
- key = 'ordered' if isinstance(self.resources, list) else 'any_of'
1698
- tmp_resource_config = {key: resource_list}
1699
- else:
1700
- tmp_resource_config = list(self.resources)[0].to_yaml_config()
1669
+ tmp_resource_config = _resources_to_config(self.resources)
1701
1670
 
1702
1671
  add_if_not_none('resources', tmp_resource_config)
1703
1672
 
@@ -1810,3 +1779,47 @@ class Task:
1810
1779
  else:
1811
1780
  s += '\n resources: default instances'
1812
1781
  return s
1782
+
1783
+
1784
+ def _resources_to_config(
1785
+ resources: Union[List['resources_lib.Resources'],
1786
+ Set['resources_lib.Resources']],
1787
+ factor_out_common_fields: bool = False) -> Dict[str, Any]:
1788
+ if len(resources) > 1:
1789
+ resource_list: List[Dict[str, Union[str, int]]] = []
1790
+ for r in resources:
1791
+ resource_list.append(r.to_yaml_config())
1792
+ group_key = 'ordered' if isinstance(resources, list) else 'any_of'
1793
+ if factor_out_common_fields:
1794
+ return _factor_out_common_resource_fields(resource_list, group_key)
1795
+ return {group_key: resource_list}
1796
+ else:
1797
+ return list(resources)[0].to_yaml_config()
1798
+
1799
+
1800
+ def _factor_out_common_resource_fields(configs: List[Dict[str, Union[str,
1801
+ int]]],
1802
+ group_key: str) -> Dict[str, Any]:
1803
+ """Factors out the fields that are common to all resources."""
1804
+ return_config: Dict[str, Any] = configs[0].copy()
1805
+ if len(configs) > 1:
1806
+ for config in configs[1:]:
1807
+ for key, value in config.items():
1808
+ if key in return_config and return_config[key] != value:
1809
+ del return_config[key]
1810
+ num_empty_configs = 0
1811
+ for config in configs:
1812
+ keys_to_delete = []
1813
+ for key, value in config.items():
1814
+ if key in return_config:
1815
+ keys_to_delete.append(key)
1816
+ for key in keys_to_delete:
1817
+ del config[key]
1818
+ if not config:
1819
+ num_empty_configs += 1
1820
+
1821
+ if num_empty_configs == len(configs):
1822
+ return return_config
1823
+ if len(configs) > 0:
1824
+ return_config[group_key] = configs
1825
+ return return_config
@@ -209,7 +209,9 @@ provider:
209
209
  metadata:
210
210
  labels:
211
211
  parent: skypilot
212
+ # TODO (kyuds): remove this label for v0.12.0, as skypilot-cluster label is deprecated in favor of skypilot-cluster-name.
212
213
  skypilot-cluster: {{cluster_name_on_cloud}}
214
+ skypilot-cluster-name: {{cluster_name_on_cloud}}
213
215
  skypilot-user: {{ user }}
214
216
  name: {{cluster_name_on_cloud}}-head-ssh
215
217
  spec:
@@ -227,7 +229,9 @@ provider:
227
229
  metadata:
228
230
  labels:
229
231
  parent: skypilot
232
+ # TODO (kyuds): remove this label for v0.12.0, as skypilot-cluster label is deprecated in favor of skypilot-cluster-name.
230
233
  skypilot-cluster: {{cluster_name_on_cloud}}
234
+ skypilot-cluster-name: {{cluster_name_on_cloud}}
231
235
  skypilot-user: {{ user }}
232
236
  # NOTE: If you're running multiple Ray clusters with services
233
237
  # on one Kubernetes cluster, they must have unique service
@@ -247,7 +251,9 @@ provider:
247
251
  metadata:
248
252
  labels:
249
253
  parent: skypilot
254
+ # TODO (kyuds): remove this label for v0.12.0, as skypilot-cluster label is deprecated in favor of skypilot-cluster-name.
250
255
  skypilot-cluster: {{cluster_name_on_cloud}}
256
+ skypilot-cluster-name: {{cluster_name_on_cloud}}
251
257
  skypilot-user: {{ user }}
252
258
  name: {{cluster_name_on_cloud}}-worker{{ worker_id }}
253
259
  spec:
@@ -272,6 +278,7 @@ available_node_types:
272
278
  labels:
273
279
  parent: skypilot
274
280
  # component will be set for the head node pod to be the same as the head node service selector above if a
281
+ # TODO (kyuds): remove this label for v0.11.0, as skypilot-cluster label is deprecated in favor of skypilot-cluster-name.
275
282
  skypilot-cluster: {{cluster_name_on_cloud}}
276
283
  skypilot-user: {{ user }}
277
284
  # Custom tags for the pods
@@ -1059,7 +1066,7 @@ available_node_types:
1059
1066
  # Also, skip the jobs that are waiting to be scheduled as those does not have a controller process running.
1060
1067
  # For SkyServe, this will be None and every service will be recovered. This is because SkyServe
1061
1068
  # will delete the service from the database after it is terminated so everything in the database is running.
1062
- ALL_IN_PROGRESS_JOBS=$({{sky_python_cmd}} -c "from sky.jobs import state; jobs = state.get_managed_jobs(); print(' '.join({str(job['job_id']) for job in jobs if job['schedule_state'] not in [state.ManagedJobScheduleState.DONE, state.ManagedJobScheduleState.WAITING]}) if jobs else None)")
1069
+ ALL_IN_PROGRESS_JOBS=$({{sky_python_cmd}} -c "from sky.jobs import state; jobs, _ = state.get_managed_jobs_with_filters(fields=['job_id', 'schedule_state']); print(' '.join({str(job['job_id']) for job in jobs if job['schedule_state'] not in [state.ManagedJobScheduleState.DONE, state.ManagedJobScheduleState.WAITING]}) if jobs else None)")
1063
1070
  if [ "$ALL_IN_PROGRESS_JOBS" != "None" ]; then
1064
1071
  read -ra ALL_IN_PROGRESS_JOBS_SEQ <<< "$ALL_IN_PROGRESS_JOBS"
1065
1072
  fi
@@ -156,6 +156,7 @@ setup_commands:
156
156
  echo '{{env_var}}={{env_value}}' | sudo tee -a /etc/environment;
157
157
  {%- endfor %}
158
158
  {%- endif %}
159
+ IP=$(hostname -I | awk '{print $1}'); echo "$IP $(hostname)" | sudo tee -a /etc/hosts;
159
160
  sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
160
161
  sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
161
162
  mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
@@ -0,0 +1,72 @@
1
+ cluster_name: {{cluster_name_on_cloud}}
2
+
3
+ # The maximum number of workers nodes to launch in addition to the head node.
4
+ max_workers: {{num_nodes - 1}}
5
+ upscaling_speed: {{num_nodes - 1}}
6
+ idle_timeout_minutes: 60
7
+
8
+ provider:
9
+ type: external
10
+ module: sky.provision.shadeform
11
+ region: "{{region}}"
12
+ disable_launch_config_check: true
13
+
14
+ auth:
15
+ ssh_user: shadeform
16
+ ssh_private_key: {{ssh_private_key}}
17
+ ssh_key_id: {{ssh_key_id}}
18
+
19
+ available_node_types:
20
+ ray_head_default:
21
+ {%- if custom_resources %}
22
+ resources: {{custom_resources}}
23
+ {%- else %}
24
+ resources: {}
25
+ {%- endif %}
26
+ node_config:
27
+ InstanceType: {{instance_type}}
28
+ PublicKey: |-
29
+ skypilot:ssh_public_key_content
30
+
31
+ head_node_type: ray_head_default
32
+
33
+ # Format: `REMOTE_PATH : LOCAL_PATH`
34
+ file_mounts: {
35
+ "{{sky_ray_yaml_remote_path}}": "{{sky_ray_yaml_local_path}}",
36
+ "{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}",
37
+ {%- for remote_path, local_path in credentials.items() %}
38
+ "{{remote_path}}": "{{local_path}}",
39
+ {%- endfor %}
40
+ }
41
+
42
+ rsync_exclude: []
43
+
44
+ initialization_commands: []
45
+
46
+ # List of shell commands to run to set up nodes.
47
+ # NOTE: these are very performance-sensitive. Each new item opens/closes an SSH
48
+ # connection, which is expensive. Try your best to co-locate commands into fewer
49
+ # items!
50
+ #
51
+ # Increment the following for catching performance bugs easier:
52
+ # current num items (num SSH connections): 1
53
+ setup_commands:
54
+ # Create ~/.ssh/config file in case the file does not exist in the image.
55
+ # Line 'rm ..': there is another installation of pip.
56
+ # Line 'sudo bash ..': set the ulimit as suggested by ray docs for performance. https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html#system-configuration
57
+ # Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase.
58
+ # Line 'mkdir -p ..': disable host key check
59
+ # Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys`
60
+ - {%- for initial_setup_command in initial_setup_commands %}
61
+ {{ initial_setup_command }}
62
+ {%- endfor %}
63
+ mkdir -p ~/.ssh; touch ~/.ssh/config; which patch > /dev/null || sudo apt install -y patch;
64
+ {{ conda_installation_commands }}
65
+ {{ ray_skypilot_installation_commands }}
66
+ sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
67
+ sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
68
+ (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config;
69
+ {{ ssh_max_sessions_config }}
70
+
71
+ # Command to start ray clusters are now placed in `sky.provision.instance_setup`.
72
+ # We do not need to list it here anymore.
@@ -11,15 +11,23 @@ This script is useful for users who do not have local Kubernetes credentials.
11
11
  import asyncio
12
12
  from http.cookiejar import MozillaCookieJar
13
13
  import os
14
+ import struct
14
15
  import sys
15
- from typing import Dict
16
+ import time
17
+ from typing import Dict, Optional
16
18
  from urllib.request import Request
17
19
 
20
+ import requests
18
21
  import websockets
19
22
  from websockets.asyncio.client import ClientConnection
20
23
  from websockets.asyncio.client import connect
21
24
 
25
+ from sky.server import constants
26
+ from sky.server.server import KubernetesSSHMessageType
27
+ from sky.skylet import constants as skylet_constants
28
+
22
29
  BUFFER_SIZE = 2**16 # 64KB
30
+ HEARTBEAT_INTERVAL_SECONDS = 10
23
31
 
24
32
  # Environment variable for a file path to the API cookie file.
25
33
  # Keep in sync with server/constants.py
@@ -28,6 +36,8 @@ API_COOKIE_FILE_ENV_VAR = 'SKYPILOT_API_COOKIE_FILE'
28
36
  # Keep in sync with server/constants.py
29
37
  API_COOKIE_FILE_DEFAULT_LOCATION = '~/.sky/cookies.txt'
30
38
 
39
+ MAX_UNANSWERED_PINGS = 100
40
+
31
41
 
32
42
  def _get_cookie_header(url: str) -> Dict[str, str]:
33
43
  """Extract Cookie header value from a cookie jar for a specific URL"""
@@ -49,7 +59,7 @@ def _get_cookie_header(url: str) -> Dict[str, str]:
49
59
  return {'Cookie': cookie_header}
50
60
 
51
61
 
52
- async def main(url: str) -> None:
62
+ async def main(url: str, timestamps_supported: bool) -> None:
53
63
  cookie_header = _get_cookie_header(url)
54
64
  async with connect(url,
55
65
  ping_interval=None,
@@ -75,45 +85,149 @@ async def main(url: str) -> None:
75
85
  asyncio.streams.FlowControlMixin, sys.stdout) # type: ignore
76
86
  stdout_writer = asyncio.StreamWriter(transport, protocol, None,
77
87
  loop)
88
+ # Dictionary to store last ping time for latency measurement
89
+ last_ping_time_dict: Optional[Dict[int, float]] = None
90
+ if timestamps_supported:
91
+ last_ping_time_dict = {}
92
+
93
+ # Use an Event to signal when websocket is closed
94
+ websocket_closed_event = asyncio.Event()
95
+ websocket_lock = asyncio.Lock()
78
96
 
79
- await asyncio.gather(stdin_to_websocket(stdin_reader, websocket),
80
- websocket_to_stdout(websocket, stdout_writer))
97
+ await asyncio.gather(
98
+ stdin_to_websocket(stdin_reader, websocket,
99
+ timestamps_supported, websocket_closed_event,
100
+ websocket_lock),
101
+ websocket_to_stdout(websocket, stdout_writer,
102
+ timestamps_supported, last_ping_time_dict,
103
+ websocket_closed_event, websocket_lock),
104
+ latency_monitor(websocket, last_ping_time_dict,
105
+ websocket_closed_event, websocket_lock),
106
+ return_exceptions=True)
81
107
  finally:
82
108
  if old_settings:
83
109
  termios.tcsetattr(sys.stdin.fileno(), termios.TCSADRAIN,
84
110
  old_settings)
85
111
 
86
112
 
113
+ async def latency_monitor(websocket: ClientConnection,
114
+ last_ping_time_dict: Optional[dict],
115
+ websocket_closed_event: asyncio.Event,
116
+ websocket_lock: asyncio.Lock):
117
+ """Periodically send PING messages (type 1) to measure latency."""
118
+ if last_ping_time_dict is None:
119
+ return
120
+ next_id = 0
121
+ while not websocket_closed_event.is_set():
122
+ try:
123
+ await asyncio.sleep(HEARTBEAT_INTERVAL_SECONDS)
124
+ if len(last_ping_time_dict) >= MAX_UNANSWERED_PINGS:
125
+ # We are not getting responses, clear the dictionary so
126
+ # as not to grow unbounded.
127
+ last_ping_time_dict.clear()
128
+ ping_time = time.time()
129
+ next_id += 1
130
+ last_ping_time_dict[next_id] = ping_time
131
+ message_header_bytes = struct.pack(
132
+ '!BI', KubernetesSSHMessageType.PINGPONG.value, next_id)
133
+ try:
134
+ async with websocket_lock:
135
+ await websocket.send(message_header_bytes)
136
+ except websockets.exceptions.ConnectionClosed as e:
137
+ # Websocket is already closed.
138
+ print(f'Failed to send PING message: {e}', file=sys.stderr)
139
+ break
140
+ except Exception as e:
141
+ print(f'Error in latency_monitor: {e}', file=sys.stderr)
142
+ websocket_closed_event.set()
143
+ raise e
144
+
145
+
87
146
  async def stdin_to_websocket(reader: asyncio.StreamReader,
88
- websocket: ClientConnection):
147
+ websocket: ClientConnection,
148
+ timestamps_supported: bool,
149
+ websocket_closed_event: asyncio.Event,
150
+ websocket_lock: asyncio.Lock):
89
151
  try:
90
- while True:
152
+ while not websocket_closed_event.is_set():
91
153
  # Read at most BUFFER_SIZE bytes, this not affect
92
154
  # responsiveness since it will return as soon as
93
155
  # there is at least one byte.
94
156
  # The BUFFER_SIZE is chosen to be large enough to improve
95
157
  # throughput.
96
158
  data = await reader.read(BUFFER_SIZE)
159
+
97
160
  if not data:
98
161
  break
99
- await websocket.send(data)
162
+ if timestamps_supported:
163
+ # Send message with type 0 to indicate data.
164
+ message_type_bytes = struct.pack(
165
+ '!B', KubernetesSSHMessageType.REGULAR_DATA.value)
166
+ data = message_type_bytes + data
167
+ async with websocket_lock:
168
+ await websocket.send(data)
169
+
100
170
  except Exception as e: # pylint: disable=broad-except
101
171
  print(f'Error in stdin_to_websocket: {e}', file=sys.stderr)
102
172
  finally:
103
- await websocket.close()
173
+ async with websocket_lock:
174
+ await websocket.close()
175
+ websocket_closed_event.set()
104
176
 
105
177
 
106
178
  async def websocket_to_stdout(websocket: ClientConnection,
107
- writer: asyncio.StreamWriter):
179
+ writer: asyncio.StreamWriter,
180
+ timestamps_supported: bool,
181
+ last_ping_time_dict: Optional[dict],
182
+ websocket_closed_event: asyncio.Event,
183
+ websocket_lock: asyncio.Lock):
108
184
  try:
109
- while True:
185
+ while not websocket_closed_event.is_set():
110
186
  message = await websocket.recv()
187
+ if (timestamps_supported and len(message) > 0 and
188
+ last_ping_time_dict is not None):
189
+ message_type = struct.unpack('!B', message[:1])[0]
190
+ if message_type == KubernetesSSHMessageType.REGULAR_DATA.value:
191
+ # Regular data - strip type byte and write to stdout
192
+ message = message[1:]
193
+ elif message_type == KubernetesSSHMessageType.PINGPONG.value:
194
+ # PONG response - calculate latency and send measurement
195
+ if not len(message) == struct.calcsize('!BI'):
196
+ raise ValueError(
197
+ f'Invalid PONG message length: {len(message)}')
198
+ pong_id = struct.unpack('!I', message[1:5])[0]
199
+ pong_time = time.time()
200
+
201
+ ping_time = last_ping_time_dict.pop(pong_id, None)
202
+
203
+ if ping_time is None:
204
+ continue
205
+
206
+ latency_seconds = pong_time - ping_time
207
+ latency_ms = int(latency_seconds * 1000)
208
+
209
+ # Send latency measurement (type 2)
210
+ message_type_bytes = struct.pack(
211
+ '!B',
212
+ KubernetesSSHMessageType.LATENCY_MEASUREMENT.value)
213
+ latency_bytes = struct.pack('!Q', latency_ms)
214
+ message = message_type_bytes + latency_bytes
215
+ # Send to server.
216
+ async with websocket_lock:
217
+ await websocket.send(message)
218
+ continue
219
+ # No timestamps support, write directly
111
220
  writer.write(message)
112
221
  await writer.drain()
113
222
  except websockets.exceptions.ConnectionClosed:
114
223
  print('WebSocket connection closed', file=sys.stderr)
115
224
  except Exception as e: # pylint: disable=broad-except
116
225
  print(f'Error in websocket_to_stdout: {e}', file=sys.stderr)
226
+ raise e
227
+ finally:
228
+ async with websocket_lock:
229
+ await websocket.close()
230
+ websocket_closed_event.set()
117
231
 
118
232
 
119
233
  if __name__ == '__main__':
@@ -123,11 +237,27 @@ if __name__ == '__main__':
123
237
  # TODO(aylei): Remove this after 0.10.0
124
238
  server_url = f'http://{server_url}'
125
239
 
240
+ disable_latency_measurement = os.environ.get(
241
+ skylet_constants.SSH_DISABLE_LATENCY_MEASUREMENT_ENV_VAR, '0') == '1'
242
+ if disable_latency_measurement:
243
+ timestamps_are_supported = False
244
+ else:
245
+ health_url = f'{server_url}/api/health'
246
+ cookie_hdr = _get_cookie_header(health_url)
247
+ health_response = requests.get(health_url, headers=cookie_hdr)
248
+ health_data = health_response.json()
249
+ timestamps_are_supported = int(health_data.get('api_version', 0)) > 21
250
+
126
251
  server_proto, server_fqdn = server_url.split('://')
127
252
  websocket_proto = 'ws'
128
253
  if server_proto == 'https':
129
254
  websocket_proto = 'wss'
130
255
  server_url = f'{websocket_proto}://{server_fqdn}'
256
+
257
+ client_version_str = (f'&client_version={constants.API_VERSION}'
258
+ if timestamps_are_supported else '')
259
+
131
260
  websocket_url = (f'{server_url}/kubernetes-pod-ssh-proxy'
132
- f'?cluster_name={sys.argv[2]}')
133
- asyncio.run(main(websocket_url))
261
+ f'?cluster_name={sys.argv[2]}'
262
+ f'{client_version_str}')
263
+ asyncio.run(main(websocket_url, timestamps_are_supported))
sky/users/permission.py CHANGED
@@ -14,6 +14,7 @@ from sky import models
14
14
  from sky import sky_logging
15
15
  from sky.skylet import constants
16
16
  from sky.users import rbac
17
+ from sky.utils import annotations
17
18
  from sky.utils import common_utils
18
19
  from sky.utils.db import db_utils
19
20
 
@@ -42,7 +43,6 @@ class PermissionService:
42
43
  with _policy_lock():
43
44
  global _enforcer_instance
44
45
  if _enforcer_instance is None:
45
- _enforcer_instance = self
46
46
  engine = global_user_state.initialize_and_get_db()
47
47
  db_utils.add_all_tables_to_db_sqlalchemy(
48
48
  sqlalchemy_adapter.Base.metadata, engine)
@@ -52,6 +52,10 @@ class PermissionService:
52
52
  'model.conf')
53
53
  enforcer = casbin.Enforcer(model_path, adapter)
54
54
  self.enforcer = enforcer
55
+ # Only set the enforcer instance once the enforcer
56
+ # is successfully initialized, if we change it and then fail
57
+ # we will set it to None and all subsequent calls will fail.
58
+ _enforcer_instance = self
55
59
  self._maybe_initialize_policies()
56
60
  self._maybe_initialize_basic_auth_user()
57
61
  else:
@@ -254,6 +258,9 @@ class PermissionService:
254
258
  with _policy_lock():
255
259
  self._load_policy_no_lock()
256
260
 
261
+ # Right now, not a lot of users are using multiple workspaces,
262
+ # so 5 should be more than enough.
263
+ @annotations.lru_cache(scope='request', maxsize=5)
257
264
  def check_workspace_permission(self, user_id: str,
258
265
  workspace_name: str) -> bool:
259
266
  """Check workspace permission.
@@ -2,6 +2,7 @@
2
2
  import contextlib
3
3
  import copy
4
4
  import importlib
5
+ import typing
5
6
  from typing import Iterator, Optional, Tuple, Union
6
7
  import urllib.parse
7
8
 
@@ -13,12 +14,16 @@ from sky import exceptions
13
14
  from sky import sky_logging
14
15
  from sky import skypilot_config
15
16
  from sky import task as task_lib
17
+ from sky.server.requests import request_names
16
18
  from sky.utils import common_utils
17
19
  from sky.utils import config_utils
18
20
  from sky.utils import ux_utils
19
21
 
20
22
  logger = sky_logging.init_logger(__name__)
21
23
 
24
+ if typing.TYPE_CHECKING:
25
+ from sky import models
26
+
22
27
 
23
28
  def _is_url(policy_string: str) -> bool:
24
29
  """Check if the policy string is a URL."""
@@ -73,6 +78,7 @@ def _get_policy_impl(
73
78
  @contextlib.contextmanager
74
79
  def apply_and_use_config_in_current_request(
75
80
  entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
81
+ request_name: request_names.AdminPolicyRequestName,
76
82
  request_options: Optional[admin_policy.RequestOptions] = None,
77
83
  at_client_side: bool = False,
78
84
  ) -> Iterator['dag_lib.Dag']:
@@ -86,7 +92,8 @@ def apply_and_use_config_in_current_request(
86
92
  Refer to `apply()` for more details.
87
93
  """
88
94
  original_config = skypilot_config.to_dict()
89
- dag, mutated_config = apply(entrypoint, request_options, at_client_side)
95
+ dag, mutated_config = apply(entrypoint, request_name, request_options,
96
+ at_client_side)
90
97
  if mutated_config != original_config:
91
98
  with skypilot_config.replace_skypilot_config(mutated_config):
92
99
  yield dag
@@ -96,6 +103,7 @@ def apply_and_use_config_in_current_request(
96
103
 
97
104
  def apply(
98
105
  entrypoint: Union['dag_lib.Dag', 'task_lib.Task'],
106
+ request_name: request_names.AdminPolicyRequestName,
99
107
  request_options: Optional[admin_policy.RequestOptions] = None,
100
108
  at_client_side: bool = False,
101
109
  ) -> Tuple['dag_lib.Dag', config_utils.Config]:
@@ -126,9 +134,13 @@ def apply(
126
134
  if policy is None:
127
135
  return dag, skypilot_config.to_dict()
128
136
 
137
+ user = None
129
138
  if at_client_side:
130
139
  logger.info(f'Applying client admin policy: {policy}')
131
140
  else:
141
+ # When being called by the server, the middleware has set the
142
+ # current user and this information is available at this point.
143
+ user = common_utils.get_current_user()
132
144
  logger.info(f'Applying server admin policy: {policy}')
133
145
  config = copy.deepcopy(skypilot_config.to_dict())
134
146
  mutated_dag = dag_lib.Dag()
@@ -136,8 +148,9 @@ def apply(
136
148
 
137
149
  mutated_config = None
138
150
  for task in dag.tasks:
139
- user_request = admin_policy.UserRequest(task, config, request_options,
140
- at_client_side)
151
+ user_request = admin_policy.UserRequest(task, config, request_name,
152
+ request_options, at_client_side,
153
+ user)
141
154
  try:
142
155
  mutated_user_request = policy.apply(user_request)
143
156
  # Avoid duplicate exception wrapping.