skypilot-nightly 1.0.0.dev20250603__py3-none-any.whl → 1.0.0.dev20250605__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. sky/__init__.py +3 -3
  2. sky/adaptors/kubernetes.py +8 -0
  3. sky/admin_policy.py +5 -0
  4. sky/backends/backend_utils.py +1 -0
  5. sky/backends/cloud_vm_ray_backend.py +8 -4
  6. sky/{clouds/service_catalog → catalog}/__init__.py +6 -17
  7. sky/{clouds/service_catalog → catalog}/aws_catalog.py +3 -3
  8. sky/{clouds/service_catalog → catalog}/azure_catalog.py +2 -2
  9. sky/{clouds/service_catalog → catalog}/common.py +2 -2
  10. sky/{clouds/service_catalog → catalog}/cudo_catalog.py +1 -1
  11. sky/{clouds/service_catalog → catalog}/data_fetchers/analyze.py +1 -1
  12. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_aws.py +1 -1
  13. sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vsphere.py +1 -1
  14. sky/{clouds/service_catalog → catalog}/do_catalog.py +1 -1
  15. sky/{clouds/service_catalog → catalog}/fluidstack_catalog.py +1 -1
  16. sky/{clouds/service_catalog → catalog}/gcp_catalog.py +2 -2
  17. sky/{clouds/service_catalog → catalog}/ibm_catalog.py +1 -1
  18. sky/{clouds/service_catalog → catalog}/kubernetes_catalog.py +2 -2
  19. sky/{clouds/service_catalog → catalog}/lambda_catalog.py +1 -1
  20. sky/{clouds/service_catalog → catalog}/nebius_catalog.py +1 -1
  21. sky/{clouds/service_catalog → catalog}/oci_catalog.py +1 -1
  22. sky/{clouds/service_catalog → catalog}/paperspace_catalog.py +1 -1
  23. sky/{clouds/service_catalog → catalog}/runpod_catalog.py +1 -1
  24. sky/{clouds/service_catalog → catalog}/scp_catalog.py +1 -1
  25. sky/{clouds/service_catalog → catalog}/ssh_catalog.py +3 -3
  26. sky/{clouds/service_catalog → catalog}/vast_catalog.py +1 -1
  27. sky/{clouds/service_catalog → catalog}/vsphere_catalog.py +1 -1
  28. sky/cli.py +16 -13
  29. sky/client/cli.py +16 -13
  30. sky/client/sdk.py +30 -12
  31. sky/clouds/aws.py +41 -40
  32. sky/clouds/azure.py +31 -34
  33. sky/clouds/cloud.py +8 -8
  34. sky/clouds/cudo.py +26 -26
  35. sky/clouds/do.py +24 -24
  36. sky/clouds/fluidstack.py +27 -29
  37. sky/clouds/gcp.py +42 -42
  38. sky/clouds/ibm.py +26 -26
  39. sky/clouds/kubernetes.py +24 -12
  40. sky/clouds/lambda_cloud.py +28 -30
  41. sky/clouds/nebius.py +26 -28
  42. sky/clouds/oci.py +32 -32
  43. sky/clouds/paperspace.py +24 -26
  44. sky/clouds/runpod.py +26 -28
  45. sky/clouds/scp.py +37 -36
  46. sky/clouds/utils/gcp_utils.py +3 -2
  47. sky/clouds/vast.py +27 -27
  48. sky/clouds/vsphere.py +12 -15
  49. sky/core.py +2 -2
  50. sky/dashboard/out/404.html +1 -1
  51. sky/dashboard/out/_next/static/chunks/614-635a84e87800f99e.js +66 -0
  52. sky/dashboard/out/_next/static/chunks/{856-f1b1f7f47edde2e8.js → 856-3a32da4b84176f6d.js} +1 -1
  53. sky/dashboard/out/_next/static/chunks/937.3759f538f11a0953.js +1 -0
  54. sky/dashboard/out/_next/static/chunks/pages/config-1a1eeb949dab8897.js +6 -0
  55. sky/dashboard/out/_next/static/chunks/pages/users-262aab38b9baaf3a.js +16 -0
  56. sky/dashboard/out/_next/static/chunks/pages/workspaces-384ea5fa0cea8f28.js +1 -0
  57. sky/dashboard/out/_next/static/chunks/{webpack-f27c9a32aa3d9c6d.js → webpack-65d465f948974c0d.js} +1 -1
  58. sky/dashboard/out/_next/static/css/667d941a2888ce6e.css +3 -0
  59. sky/dashboard/out/_next/static/qjhIe-yC6nHcLKBqpzO1M/_buildManifest.js +1 -0
  60. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  61. sky/dashboard/out/clusters/[cluster].html +1 -1
  62. sky/dashboard/out/clusters.html +1 -1
  63. sky/dashboard/out/config.html +1 -1
  64. sky/dashboard/out/index.html +1 -1
  65. sky/dashboard/out/infra/[context].html +1 -1
  66. sky/dashboard/out/infra.html +1 -1
  67. sky/dashboard/out/jobs/[job].html +1 -1
  68. sky/dashboard/out/jobs.html +1 -1
  69. sky/dashboard/out/users.html +1 -1
  70. sky/dashboard/out/workspace/new.html +1 -1
  71. sky/dashboard/out/workspaces/[name].html +1 -1
  72. sky/dashboard/out/workspaces.html +1 -1
  73. sky/data/storage_utils.py +5 -2
  74. sky/execution.py +44 -46
  75. sky/global_user_state.py +119 -86
  76. sky/jobs/client/sdk.py +4 -1
  77. sky/jobs/server/core.py +6 -2
  78. sky/models.py +1 -0
  79. sky/optimizer.py +1 -1
  80. sky/provision/cudo/cudo_machine_type.py +1 -1
  81. sky/provision/kubernetes/utils.py +35 -22
  82. sky/provision/vast/utils.py +1 -1
  83. sky/provision/vsphere/common/vim_utils.py +1 -2
  84. sky/provision/vsphere/instance.py +1 -1
  85. sky/provision/vsphere/vsphere_utils.py +7 -11
  86. sky/resources.py +24 -3
  87. sky/serve/server/core.py +1 -1
  88. sky/server/constants.py +3 -1
  89. sky/server/requests/executor.py +4 -1
  90. sky/server/requests/payloads.py +25 -0
  91. sky/server/requests/serializers/decoders.py +1 -1
  92. sky/server/server.py +33 -12
  93. sky/server/stream_utils.py +2 -38
  94. sky/setup_files/MANIFEST.in +1 -0
  95. sky/setup_files/dependencies.py +2 -0
  96. sky/skylet/constants.py +10 -4
  97. sky/skypilot_config.py +92 -39
  98. sky/templates/websocket_proxy.py +11 -1
  99. sky/usage/usage_lib.py +4 -3
  100. sky/users/__init__.py +0 -0
  101. sky/users/model.conf +15 -0
  102. sky/users/permission.py +178 -0
  103. sky/users/rbac.py +86 -0
  104. sky/users/server.py +66 -0
  105. sky/utils/accelerator_registry.py +3 -3
  106. sky/utils/kubernetes/deploy_remote_cluster.py +2 -1
  107. sky/utils/schemas.py +20 -10
  108. sky/workspaces/core.py +2 -2
  109. {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/METADATA +3 -1
  110. {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/RECORD +134 -130
  111. sky/clouds/service_catalog/constants.py +0 -8
  112. sky/dashboard/out/_next/static/chunks/614-3d29f98e0634b179.js +0 -66
  113. sky/dashboard/out/_next/static/chunks/937.f97f83652028e944.js +0 -1
  114. sky/dashboard/out/_next/static/chunks/pages/config-35383adcb0edb5e2.js +0 -6
  115. sky/dashboard/out/_next/static/chunks/pages/users-07b523ccb19317ad.js +0 -6
  116. sky/dashboard/out/_next/static/chunks/pages/workspaces-f54921ec9eb20965.js +0 -1
  117. sky/dashboard/out/_next/static/css/63d3995d8b528eb1.css +0 -3
  118. sky/dashboard/out/_next/static/zTAFq_Iv6_yxQj3fXvJWR/_buildManifest.js +0 -1
  119. /sky/{clouds/service_catalog → catalog}/config.py +0 -0
  120. /sky/{clouds/service_catalog → catalog}/data_fetchers/__init__.py +0 -0
  121. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_azure.py +0 -0
  122. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_cudo.py +0 -0
  123. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_fluidstack.py +0 -0
  124. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_gcp.py +0 -0
  125. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_ibm.py +0 -0
  126. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_lambda_cloud.py +0 -0
  127. /sky/{clouds/service_catalog → catalog}/data_fetchers/fetch_vast.py +0 -0
  128. /sky/dashboard/out/_next/static/chunks/{121-8f55ee3fa6301784.js → 121-865d2bf8a3b84c6a.js} +0 -0
  129. /sky/dashboard/out/_next/static/chunks/{236-fef38aa6e5639300.js → 236-4c0dc6f63ccc6319.js} +0 -0
  130. /sky/dashboard/out/_next/static/chunks/{37-947904ccc5687bac.js → 37-beedd583fea84cc8.js} +0 -0
  131. /sky/dashboard/out/_next/static/chunks/{682-2be9b0f169727f2f.js → 682-6647f0417d5662f0.js} +0 -0
  132. /sky/dashboard/out/_next/static/chunks/{843-a097338acb89b7d7.js → 843-c296541442d4af88.js} +0 -0
  133. /sky/dashboard/out/_next/static/chunks/{969-d7b6fb7f602bfcb3.js → 969-c7abda31c10440ac.js} +0 -0
  134. /sky/dashboard/out/_next/static/chunks/pages/{_app-67925f5e6382e22f.js → _app-cb81dc4d27f4d009.js} +0 -0
  135. /sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-158b70da336d8607.js → [job]-65d04d5d77cbb6b6.js} +0 -0
  136. /sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-62c9982dc3675725.js → [cluster]-beabbcd7606c1a23.js} +0 -0
  137. /sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-a62a3c65dc9bc57c.js → [job]-86c47edc500f15f9.js} +0 -0
  138. /sky/dashboard/out/_next/static/{zTAFq_Iv6_yxQj3fXvJWR → qjhIe-yC6nHcLKBqpzO1M}/_ssgManifest.js +0 -0
  139. {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/WHEEL +0 -0
  140. {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/entry_points.txt +0 -0
  141. {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/licenses/LICENSE +0 -0
  142. {skypilot_nightly-1.0.0.dev20250603.dist-info → skypilot_nightly-1.0.0.dev20250605.dist-info}/top_level.txt +0 -0
sky/skypilot_config.py CHANGED
@@ -55,7 +55,7 @@ import os
55
55
  import tempfile
56
56
  import threading
57
57
  import typing
58
- from typing import Any, Dict, Iterator, List, Optional, Tuple
58
+ from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
59
59
 
60
60
  import filelock
61
61
 
@@ -163,11 +163,23 @@ def _set_loaded_config(config: config_utils.Config) -> None:
163
163
  _get_config_context().config = config
164
164
 
165
165
 
166
- def _get_loaded_config_path() -> Optional[str]:
167
- return _get_config_context().config_path
166
+ def _get_loaded_config_path() -> List[Optional[str]]:
167
+ serialized = _get_config_context().config_path
168
+ if not serialized:
169
+ return []
170
+ return json.loads(serialized)
171
+
172
+
173
+ def _set_loaded_config_path(
174
+ path: Optional[Union[str, List[Optional[str]]]]) -> None:
175
+ if not path:
176
+ _get_config_context().config_path = None
177
+ if isinstance(path, str):
178
+ path = [path]
179
+ _get_config_context().config_path = json.dumps(path)
168
180
 
169
181
 
170
- def _set_loaded_config_path(path: Optional[str]) -> None:
182
+ def _set_loaded_config_path_serialized(path: Optional[str]) -> None:
171
183
  _get_config_context().config_path = path
172
184
 
173
185
 
@@ -184,9 +196,14 @@ def get_user_config_path() -> str:
184
196
  return _GLOBAL_CONFIG_PATH
185
197
 
186
198
 
187
- def get_user_config() -> config_utils.Config:
188
- """Returns the user config."""
189
- # find the user config file
199
+ def _get_config_from_path(path: Optional[str]) -> config_utils.Config:
200
+ if path is None:
201
+ return config_utils.Config()
202
+ return parse_and_validate_config_file(path)
203
+
204
+
205
+ def _resolve_user_config_path() -> Optional[str]:
206
+ # find the user config file path, None if not resolved.
190
207
  user_config_path = _get_config_file_path(ENV_VAR_GLOBAL_CONFIG)
191
208
  if user_config_path:
192
209
  logger.debug('using user config file specified by '
@@ -203,16 +220,17 @@ def get_user_config() -> config_utils.Config:
203
220
  user_config_path = get_user_config_path()
204
221
  logger.debug(f'using default user config file: {user_config_path}')
205
222
  user_config_path = os.path.expanduser(user_config_path)
206
-
207
- # load the user config file
208
223
  if os.path.exists(user_config_path):
209
- user_config = parse_and_validate_config_file(user_config_path)
210
- else:
211
- user_config = config_utils.Config()
212
- return user_config
224
+ return user_config_path
225
+ return None
213
226
 
214
227
 
215
- def _get_project_config() -> config_utils.Config:
228
+ def get_user_config() -> config_utils.Config:
229
+ """Returns the user config."""
230
+ return _get_config_from_path(_resolve_user_config_path())
231
+
232
+
233
+ def _resolve_project_config_path() -> Optional[str]:
216
234
  # find the project config file
217
235
  project_config_path = _get_config_file_path(ENV_VAR_PROJECT_CONFIG)
218
236
  if project_config_path:
@@ -231,17 +249,17 @@ def _get_project_config() -> config_utils.Config:
231
249
  f'using default project config file: {_PROJECT_CONFIG_PATH}')
232
250
  project_config_path = _PROJECT_CONFIG_PATH
233
251
  project_config_path = os.path.expanduser(project_config_path)
234
-
235
- # load the project config file
236
252
  if os.path.exists(project_config_path):
237
- project_config = parse_and_validate_config_file(project_config_path)
238
- else:
239
- project_config = config_utils.Config()
240
- return project_config
253
+ return project_config_path
254
+ return None
241
255
 
242
256
 
243
- def get_server_config() -> config_utils.Config:
244
- """Returns the server config."""
257
+ def _get_project_config() -> config_utils.Config:
258
+ """Returns the project config."""
259
+ return _get_config_from_path(_resolve_project_config_path())
260
+
261
+
262
+ def _resolve_server_config_path() -> Optional[str]:
245
263
  # find the server config file
246
264
  server_config_path = _get_config_file_path(ENV_VAR_GLOBAL_CONFIG)
247
265
  if server_config_path:
@@ -259,13 +277,14 @@ def get_server_config() -> config_utils.Config:
259
277
  server_config_path = _GLOBAL_CONFIG_PATH
260
278
  logger.debug(f'using default server config file: {server_config_path}')
261
279
  server_config_path = os.path.expanduser(server_config_path)
262
-
263
- # load the server config file
264
280
  if os.path.exists(server_config_path):
265
- server_config = parse_and_validate_config_file(server_config_path)
266
- else:
267
- server_config = config_utils.Config()
268
- return server_config
281
+ return server_config_path
282
+ return None
283
+
284
+
285
+ def get_server_config() -> config_utils.Config:
286
+ """Returns the server config."""
287
+ return _get_config_from_path(_resolve_server_config_path())
269
288
 
270
289
 
271
290
  def get_nested(keys: Tuple[str, ...],
@@ -487,9 +506,11 @@ def _reload_config_from_internal_file(internal_config_path: str) -> None:
487
506
  def _reload_config_as_server() -> None:
488
507
  # Reset the global variables, to avoid using stale values.
489
508
  _set_loaded_config(config_utils.Config())
509
+ _set_loaded_config_path(None)
490
510
 
491
511
  overrides: List[config_utils.Config] = []
492
- server_config = get_server_config()
512
+ server_config_path = _resolve_server_config_path()
513
+ server_config = _get_config_from_path(server_config_path)
493
514
  if server_config:
494
515
  overrides.append(server_config)
495
516
 
@@ -503,17 +524,21 @@ def _reload_config_as_server() -> None:
503
524
  f'server config: \n'
504
525
  f'{common_utils.dump_yaml_str(dict(overlaid_server_config))}')
505
526
  _set_loaded_config(overlaid_server_config)
527
+ _set_loaded_config_path(server_config_path)
506
528
 
507
529
 
508
530
  def _reload_config_as_client() -> None:
509
531
  # Reset the global variables, to avoid using stale values.
510
532
  _set_loaded_config(config_utils.Config())
533
+ _set_loaded_config_path(None)
511
534
 
512
535
  overrides: List[config_utils.Config] = []
513
- user_config = get_user_config()
536
+ user_config_path = _resolve_user_config_path()
537
+ user_config = _get_config_from_path(user_config_path)
514
538
  if user_config:
515
539
  overrides.append(user_config)
516
- project_config = _get_project_config()
540
+ project_config_path = _resolve_project_config_path()
541
+ project_config = _get_config_from_path(project_config_path)
517
542
  if project_config:
518
543
  overrides.append(project_config)
519
544
 
@@ -527,14 +552,26 @@ def _reload_config_as_client() -> None:
527
552
  f'client config (before task and CLI overrides): \n'
528
553
  f'{common_utils.dump_yaml_str(dict(overlaid_client_config))}')
529
554
  _set_loaded_config(overlaid_client_config)
555
+ _set_loaded_config_path([user_config_path, project_config_path])
530
556
 
531
557
 
532
558
  def loaded_config_path() -> Optional[str]:
533
- """Returns the path to the loaded config file, or
534
- '<overridden>' if the config is overridden."""
535
- if _is_config_overridden():
536
- return '<overridden>'
537
- return _get_loaded_config_path()
559
+ """Returns the path to the loaded config file, or '<overridden>' if the
560
+ config is overridden."""
561
+ path = [p for p in set(_get_loaded_config_path()) if p is not None]
562
+ if len(path) == 0:
563
+ return '<overridden>' if _is_config_overridden() else None
564
+ if len(path) == 1:
565
+ return path[0]
566
+
567
+ header = 'overridden' if _is_config_overridden() else 'merged'
568
+ path_str = ', '.join(p for p in path if p is not None)
569
+ return f'<{header} ({path_str})>'
570
+
571
+
572
+ def loaded_config_path_serialized() -> Optional[str]:
573
+ """Returns the json serialized config path list"""
574
+ return _get_config_context().config_path
538
575
 
539
576
 
540
577
  # Load on import, synchronization is guaranteed by python interpreter.
@@ -548,7 +585,9 @@ def loaded() -> bool:
548
585
 
549
586
  @contextlib.contextmanager
550
587
  def override_skypilot_config(
551
- override_configs: Optional[Dict[str, Any]]) -> Iterator[None]:
588
+ override_configs: Optional[Dict[str, Any]],
589
+ override_config_path_serialized: Optional[str] = None
590
+ ) -> Iterator[None]:
552
591
  """Overrides the user configurations."""
553
592
  # TODO(SKY-1215): allow admin user to extend the disallowed keys or specify
554
593
  # allowed keys.
@@ -557,7 +596,13 @@ def override_skypilot_config(
557
596
  yield
558
597
  return
559
598
  original_config = _get_loaded_config()
599
+ original_config_path = loaded_config_path_serialized()
560
600
  override_configs = config_utils.Config(override_configs)
601
+ if override_config_path_serialized is None:
602
+ override_config_path = []
603
+ else:
604
+ override_config_path = json.loads(override_config_path_serialized)
605
+
561
606
  disallowed_diff_keys = []
562
607
  for key in constants.SKIPPED_CLIENT_OVERRIDE_KEYS:
563
608
  value = override_configs.pop_nested(key, default_value=None)
@@ -602,6 +647,8 @@ def override_skypilot_config(
602
647
  skip_none=False)
603
648
  _set_config_overridden(True)
604
649
  _set_loaded_config(config)
650
+ _set_loaded_config_path(_get_loaded_config_path() +
651
+ override_config_path)
605
652
  yield
606
653
  except exceptions.InvalidSkyPilotConfigError as e:
607
654
  with ux_utils.print_exception_no_traceback():
@@ -616,6 +663,7 @@ def override_skypilot_config(
616
663
  finally:
617
664
  _set_loaded_config(original_config)
618
665
  _set_config_overridden(False)
666
+ _set_loaded_config_path_serialized(original_config_path)
619
667
 
620
668
 
621
669
  @contextlib.contextmanager
@@ -628,6 +676,7 @@ def replace_skypilot_config(new_configs: config_utils.Config) -> Iterator[None]:
628
676
  sky_utils.context for more details.
629
677
  """
630
678
  original_config = _get_loaded_config()
679
+ original_config_path = loaded_config_path_serialized()
631
680
  original_env_var = os.environ.get(ENV_VAR_SKYPILOT_CONFIG)
632
681
  if new_configs != original_config:
633
682
  # Modify the global config of current process or context
@@ -642,9 +691,11 @@ def replace_skypilot_config(new_configs: config_utils.Config) -> Iterator[None]:
642
691
  # Note that this code modifies os.environ directly because it
643
692
  # will be hijacked to be context-aware if a context is active.
644
693
  os.environ[ENV_VAR_SKYPILOT_CONFIG] = temp_file.name
694
+ _set_loaded_config_path(temp_file.name)
645
695
  yield
646
696
  # Restore the original config and env var.
647
697
  _set_loaded_config(original_config)
698
+ _set_loaded_config_path_serialized(original_config_path)
648
699
  if original_env_var:
649
700
  os.environ[ENV_VAR_SKYPILOT_CONFIG] = original_env_var
650
701
  else:
@@ -705,13 +756,15 @@ def apply_cli_config(cli_config: Optional[List[str]]) -> Dict[str, Any]:
705
756
  return parsed_config
706
757
 
707
758
 
708
- def update_config_no_lock(config: config_utils.Config) -> None:
759
+ def update_api_server_config_no_lock(config: config_utils.Config) -> None:
709
760
  """Dumps the new config to a file and syncs to ConfigMap if in Kubernetes.
710
761
 
711
762
  Args:
712
763
  config: The config to save and sync.
713
764
  """
714
- global_config_path = os.path.expanduser(get_user_config_path())
765
+ global_config_path = _resolve_server_config_path()
766
+ if global_config_path is None:
767
+ global_config_path = get_user_config_path()
715
768
 
716
769
  # Always save to the local file (PVC in Kubernetes, local file otherwise)
717
770
  common_utils.dump_yaml(global_config_path, dict(config))
@@ -21,11 +21,21 @@ from websockets.asyncio.client import connect
21
21
 
22
22
  BUFFER_SIZE = 2**16 # 64KB
23
23
 
24
+ # Environment variable for a file path to the API cookie file.
25
+ # Keep in sync with server/constants.py
26
+ API_COOKIE_FILE_ENV_VAR = 'SKYPILOT_API_COOKIE_FILE'
27
+ # Default file if unset.
28
+ # Keep in sync with server/constants.py
29
+ API_COOKIE_FILE_DEFAULT_LOCATION = '~/.sky/cookies.txt'
30
+
24
31
 
25
32
  def _get_cookie_header(url: str) -> Dict[str, str]:
26
33
  """Extract Cookie header value from a cookie jar for a specific URL"""
27
- cookie_path = os.environ.get('SKYPILOT_API_COOKIE_FILE')
34
+ cookie_path = os.environ.get(API_COOKIE_FILE_ENV_VAR)
28
35
  if cookie_path is None:
36
+ cookie_path = API_COOKIE_FILE_DEFAULT_LOCATION
37
+ cookie_path = os.path.expanduser(cookie_path)
38
+ if not os.path.exists(cookie_path):
29
39
  return {}
30
40
 
31
41
  request = Request(url)
sky/usage/usage_lib.py CHANGED
@@ -205,8 +205,8 @@ class UsageMessageToReport(MessageToReport):
205
205
  logger.debug('Multiple accelerators are not supported: '
206
206
  f'{resources.accelerators}.')
207
207
  self.task_accelerators = list(resources.accelerators.keys())[0]
208
- self.task_num_accelerators = resources.accelerators[
209
- self.task_accelerators]
208
+ self.task_num_accelerators = int(
209
+ resources.accelerators[self.task_accelerators])
210
210
  else:
211
211
  self.task_accelerators = None
212
212
  self.task_num_accelerators = None
@@ -245,7 +245,8 @@ class UsageMessageToReport(MessageToReport):
245
245
  logger.debug('Multiple accelerators are not supported: '
246
246
  f'{resources.accelerators}.')
247
247
  self.accelerators = list(resources.accelerators.keys())[0]
248
- self.num_accelerators = resources.accelerators[self.accelerators]
248
+ self.num_accelerators = int(
249
+ resources.accelerators[self.accelerators])
249
250
  else:
250
251
  self.accelerators = None
251
252
  self.num_accelerators = None
sky/users/__init__.py ADDED
File without changes
sky/users/model.conf ADDED
@@ -0,0 +1,15 @@
1
+ # rbac_model.conf
2
+ [request_definition]
3
+ r = sub, obj, act
4
+
5
+ [policy_definition]
6
+ p = sub, obj, act
7
+
8
+ [role_definition]
9
+ g = _, _
10
+
11
+ [policy_effect]
12
+ e = some(where (p.eft == allow))
13
+
14
+ [matchers]
15
+ m = g(r.sub, p.sub) && r.obj == p.obj && r.act == p.act
@@ -0,0 +1,178 @@
1
+ """Permission service for SkyPilot API Server."""
2
+ import contextlib
3
+ import logging
4
+ import os
5
+ import threading
6
+ from typing import List
7
+
8
+ import casbin
9
+ import filelock
10
+ import sqlalchemy_adapter
11
+
12
+ from sky import global_user_state
13
+ from sky import sky_logging
14
+ from sky.users import rbac
15
+
16
+ logger = sky_logging.init_logger(__name__)
17
+
18
+ # Filelocks for the policy update.
19
+ POLICY_UPDATE_LOCK_PATH = os.path.expanduser('~/.sky/.policy_update.lock')
20
+ POLICY_UPDATE_LOCK_TIMEOUT_SECONDS = 20
21
+
22
+ _enforcer_instance = None
23
+ _lock = threading.Lock()
24
+
25
+
26
+ class PermissionService:
27
+ """Permission service for SkyPilot API Server."""
28
+
29
+ def __init__(self):
30
+ global _enforcer_instance
31
+ if _enforcer_instance is None:
32
+ # For different threads, we share the same enforcer instance.
33
+ with _lock:
34
+ if _enforcer_instance is None:
35
+ _enforcer_instance = self
36
+ engine = global_user_state.SQLALCHEMY_ENGINE
37
+ adapter = sqlalchemy_adapter.Adapter(engine)
38
+ model_path = os.path.join(os.path.dirname(__file__),
39
+ 'model.conf')
40
+ enforcer = casbin.Enforcer(model_path, adapter)
41
+ logging.getLogger('casbin.policy').setLevel(
42
+ sky_logging.ERROR)
43
+ logging.getLogger('casbin.role').setLevel(sky_logging.ERROR)
44
+ self.enforcer = enforcer
45
+ else:
46
+ self.enforcer = _enforcer_instance.enforcer
47
+ self._maybe_initialize_policies()
48
+
49
+ def _maybe_initialize_policies(self):
50
+ """Initialize policies if they don't already exist."""
51
+ logger.debug(f'Initializing policies in process: {os.getpid()}')
52
+
53
+ # Check if policies are already initialized by looking for existing
54
+ # permission policies in the enforcer
55
+ existing_policies = self.enforcer.get_policy()
56
+
57
+ # If we already have policies for the expected roles, skip
58
+ # initialization
59
+ role_permissions = rbac.get_role_permissions()
60
+ expected_policies = []
61
+ for role, permissions in role_permissions.items():
62
+ if permissions['permissions'] and 'blocklist' in permissions[
63
+ 'permissions']:
64
+ blocklist = permissions['permissions']['blocklist']
65
+ for item in blocklist:
66
+ expected_policies.append(
67
+ [role, item['path'], item['method']])
68
+
69
+ # Check if all expected policies already exist
70
+ policies_exist = all(
71
+ any(policy == expected
72
+ for policy in existing_policies)
73
+ for expected in expected_policies)
74
+
75
+ if not policies_exist:
76
+ # Only clear and reinitialize if policies don't exist or are
77
+ # incomplete
78
+ logger.debug('Policies not found or incomplete, initializing...')
79
+ # Only clear p policies (permission policies),
80
+ # keep g policies (role policies)
81
+ self.enforcer.remove_filtered_policy(0)
82
+ for role, permissions in role_permissions.items():
83
+ if permissions['permissions'] and 'blocklist' in permissions[
84
+ 'permissions']:
85
+ blocklist = permissions['permissions']['blocklist']
86
+ for item in blocklist:
87
+ path = item['path']
88
+ method = item['method']
89
+ self.enforcer.add_policy(role, path, method)
90
+ self.enforcer.save_policy()
91
+ else:
92
+ logger.debug('Policies already exist, skipping initialization')
93
+
94
+ # Always ensure users have default roles (this is idempotent)
95
+ all_users = global_user_state.get_all_users()
96
+ for user in all_users:
97
+ self.add_user_if_not_exists(user.id)
98
+
99
+ def add_user_if_not_exists(self, user: str) -> None:
100
+ """Add user role relationship."""
101
+ with _policy_lock():
102
+ user_roles = self.enforcer.get_roles_for_user(user)
103
+ if not user_roles:
104
+ logger.info(f'User {user} has no roles, adding'
105
+ f' default role {rbac.get_default_role()}')
106
+ self.enforcer.add_grouping_policy(user, rbac.get_default_role())
107
+ self.enforcer.save_policy()
108
+
109
+ def update_role(self, user: str, new_role: str):
110
+ """Update user role relationship."""
111
+ with _policy_lock():
112
+ # Get current roles
113
+ self._load_policy_no_lock()
114
+ # Avoid calling get_user_roles, as it will require the lock.
115
+ current_roles = self.enforcer.get_roles_for_user(user)
116
+ if not current_roles:
117
+ logger.warning(f'User {user} has no roles')
118
+ else:
119
+ # TODO(hailong): how to handle multiple roles?
120
+ current_role = current_roles[0]
121
+ if current_role == new_role:
122
+ logger.info(f'User {user} already has role {new_role}')
123
+ return
124
+ self.enforcer.remove_grouping_policy(user, current_role)
125
+
126
+ # Update user role
127
+ self.enforcer.add_grouping_policy(user, new_role)
128
+ self.enforcer.save_policy()
129
+
130
+ def get_user_roles(self, user: str) -> List[str]:
131
+ """Get all roles for a user.
132
+
133
+ This method returns all roles that the user has, including inherited
134
+ roles. For example, if a user has role 'admin' and 'admin' inherits
135
+ from 'user', this method will return ['admin', 'user'].
136
+
137
+ Args:
138
+ user: The user ID to get roles for.
139
+
140
+ Returns:
141
+ A list of role names that the user has.
142
+ """
143
+ self._load_policy()
144
+ return self.enforcer.get_roles_for_user(user)
145
+
146
+ def check_permission(self, user: str, path: str, method: str) -> bool:
147
+ """Check permission."""
148
+ # We intentionally don't load the policy here, as it is a hot path, and
149
+ # we don't support updating the policy.
150
+ # We don't hold the lock for checking permission, as it is read only and
151
+ # it is a hot path in every request. It is ok to have a stale policy,
152
+ # as long as it is eventually consistent.
153
+ # self._load_policy_no_lock()
154
+ return self.enforcer.enforce(user, path, method)
155
+
156
+ def _load_policy_no_lock(self):
157
+ """Load policy from storage."""
158
+ self.enforcer.load_policy()
159
+
160
+ def _load_policy(self):
161
+ """Load policy from storage with lock."""
162
+ with _policy_lock():
163
+ self._load_policy_no_lock()
164
+
165
+
166
+ @contextlib.contextmanager
167
+ def _policy_lock():
168
+ """Context manager for policy update lock."""
169
+ try:
170
+ with filelock.FileLock(POLICY_UPDATE_LOCK_PATH,
171
+ POLICY_UPDATE_LOCK_TIMEOUT_SECONDS):
172
+ yield
173
+ except filelock.Timeout as e:
174
+ raise RuntimeError(f'Failed to load policy due to a timeout '
175
+ f'when trying to acquire the lock at '
176
+ f'{POLICY_UPDATE_LOCK_PATH}. '
177
+ 'Please try again or manually remove the lock '
178
+ f'file if you believe it is stale.') from e
sky/users/rbac.py ADDED
@@ -0,0 +1,86 @@
1
+ """RBAC (Role-Based Access Control) functionality for SkyPilot API Server."""
2
+
3
+ import enum
4
+ from typing import Dict, List
5
+
6
+ from sky import sky_logging
7
+ from sky import skypilot_config
8
+
9
+ logger = sky_logging.init_logger(__name__)
10
+
11
+ # Default user blocklist for user role
12
+ # Cannot access workspace CUD operations
13
+ _DEFAULT_USER_BLOCKLIST = [{
14
+ 'path': '/workspaces/config',
15
+ 'method': 'POST'
16
+ }, {
17
+ 'path': '/workspaces/update',
18
+ 'method': 'POST'
19
+ }, {
20
+ 'path': '/workspaces/create',
21
+ 'method': 'POST'
22
+ }, {
23
+ 'path': '/workspaces/delete',
24
+ 'method': 'POST'
25
+ }, {
26
+ 'path': '/users/update',
27
+ 'method': 'POST'
28
+ }]
29
+
30
+
31
+ # Define roles
32
+ class RoleName(str, enum.Enum):
33
+ ADMIN = 'admin'
34
+ USER = 'user'
35
+
36
+
37
+ def get_supported_roles() -> List[str]:
38
+ return [role_name.value for role_name in RoleName]
39
+
40
+
41
+ def get_default_role() -> str:
42
+ return skypilot_config.get_nested(('rbac', 'default_role'),
43
+ default_value=RoleName.ADMIN.value)
44
+
45
+
46
+ def get_role_permissions(
47
+ ) -> Dict[str, Dict[str, Dict[str, List[Dict[str, str]]]]]:
48
+ """Get all role permissions from config.
49
+
50
+ Returns:
51
+ Dictionary containing all roles and their permissions configuration.
52
+ Example:
53
+ {
54
+ 'admin': {
55
+ 'permissions': {
56
+ 'blocklist': []
57
+ }
58
+ },
59
+ 'user': {
60
+ 'permissions': {
61
+ 'blocklist': [
62
+ {'path': '/workspaces/config', 'method': 'POST'},
63
+ {'path': '/workspaces/update', 'method': 'POST'}
64
+ ]
65
+ }
66
+ }
67
+ }
68
+ """
69
+ # Get all roles from the config
70
+ config_permissions = skypilot_config.get_nested(('rbac', 'roles'),
71
+ default_value={})
72
+ supported_roles = get_supported_roles()
73
+ for role, permissions in config_permissions.items():
74
+ role_name = role.lower()
75
+ if role_name not in supported_roles:
76
+ logger.warning(f'Invalid role: {role_name}')
77
+ continue
78
+ config_permissions[role_name] = permissions
79
+ # Add default roles if not present
80
+ if 'user' not in config_permissions:
81
+ config_permissions['user'] = {
82
+ 'permissions': {
83
+ 'blocklist': _DEFAULT_USER_BLOCKLIST
84
+ }
85
+ }
86
+ return config_permissions
sky/users/server.py ADDED
@@ -0,0 +1,66 @@
1
+ """REST API for workspace management."""
2
+
3
+ import hashlib
4
+ from typing import Any, Dict, List
5
+
6
+ import fastapi
7
+
8
+ from sky import global_user_state
9
+ from sky import sky_logging
10
+ from sky.server.requests import payloads
11
+ from sky.users import permission
12
+ from sky.users import rbac
13
+ from sky.utils import common_utils
14
+
15
+ logger = sky_logging.init_logger(__name__)
16
+
17
+ router = fastapi.APIRouter()
18
+
19
+ permission_service = permission.PermissionService()
20
+
21
+
22
+ @router.get('')
23
+ async def users() -> List[Dict[str, Any]]:
24
+ """Gets all users."""
25
+ all_users = []
26
+ user_list = global_user_state.get_all_users()
27
+ for user in user_list:
28
+ user_roles = permission_service.get_user_roles(user.id)
29
+ all_users.append({
30
+ 'id': user.id,
31
+ 'name': user.name,
32
+ 'role': user_roles[0] if user_roles else ''
33
+ })
34
+ return all_users
35
+
36
+
37
+ @router.get('/role')
38
+ async def get_current_user_role(request: fastapi.Request):
39
+ """Get current user's role."""
40
+ # TODO(hailong): is there a reliable way to get the user
41
+ # hash for the request without 'X-Auth-Request-Email' header?
42
+ if 'X-Auth-Request-Email' not in request.headers:
43
+ return {'name': '', 'role': rbac.RoleName.ADMIN.value}
44
+ user_name = request.headers['X-Auth-Request-Email']
45
+ user_hash = hashlib.md5(
46
+ user_name.encode()).hexdigest()[:common_utils.USER_HASH_LENGTH]
47
+ user_roles = permission_service.get_user_roles(user_hash)
48
+ return {'name': user_name, 'role': user_roles[0] if user_roles else ''}
49
+
50
+
51
+ @router.post('/update')
52
+ async def user_update(user_update_body: payloads.UserUpdateBody) -> None:
53
+ """Updates the user role."""
54
+ user_id = user_update_body.user_id
55
+ role = user_update_body.role
56
+ supported_roles = rbac.get_supported_roles()
57
+ if role not in supported_roles:
58
+ raise fastapi.HTTPException(status_code=400,
59
+ detail=f'Invalid role: {role}')
60
+ user_info = global_user_state.get_user(user_id)
61
+ if not user_info.name:
62
+ raise fastapi.HTTPException(status_code=400,
63
+ detail=f'User {user_id} does not exist')
64
+
65
+ # Update user role in casbin policy
66
+ permission_service.update_role(user_id, role)