skypilot-nightly 1.0.0.dev20250624__py3-none-any.whl → 1.0.0.dev20250625__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +26 -11
  3. sky/backends/cloud_vm_ray_backend.py +16 -5
  4. sky/client/cli/command.py +222 -4
  5. sky/client/sdk.py +110 -82
  6. sky/clouds/aws.py +10 -7
  7. sky/clouds/azure.py +10 -7
  8. sky/clouds/cloud.py +2 -0
  9. sky/clouds/cudo.py +2 -0
  10. sky/clouds/do.py +10 -7
  11. sky/clouds/fluidstack.py +2 -0
  12. sky/clouds/gcp.py +10 -7
  13. sky/clouds/hyperbolic.py +10 -7
  14. sky/clouds/ibm.py +2 -0
  15. sky/clouds/kubernetes.py +26 -9
  16. sky/clouds/lambda_cloud.py +10 -7
  17. sky/clouds/nebius.py +10 -7
  18. sky/clouds/oci.py +10 -7
  19. sky/clouds/paperspace.py +10 -7
  20. sky/clouds/runpod.py +10 -7
  21. sky/clouds/scp.py +10 -7
  22. sky/clouds/vast.py +10 -7
  23. sky/clouds/vsphere.py +2 -0
  24. sky/core.py +1 -0
  25. sky/dag.py +14 -0
  26. sky/dashboard/out/404.html +1 -1
  27. sky/dashboard/out/_next/static/ZWdSYkqVe3WjnFR8ocqoG/_buildManifest.js +1 -0
  28. sky/dashboard/out/_next/static/chunks/230-d6e363362017ff3a.js +1 -0
  29. sky/dashboard/out/_next/static/chunks/310.2671028c20e892c7.js +16 -0
  30. sky/dashboard/out/_next/static/chunks/{37-4650f214e2119168.js → 37-1f1e94f5a561202a.js} +2 -2
  31. sky/dashboard/out/_next/static/chunks/42.bc85e5b1a4debf22.js +6 -0
  32. sky/dashboard/out/_next/static/chunks/470-92dd1614396389be.js +1 -0
  33. sky/dashboard/out/_next/static/chunks/544.110e53813fb98e2e.js +1 -0
  34. sky/dashboard/out/_next/static/chunks/645.961f08e39b8ce447.js +1 -0
  35. sky/dashboard/out/_next/static/chunks/66-66ae330df2d3c1c7.js +1 -0
  36. sky/dashboard/out/_next/static/chunks/682.00e56a220dd26fe1.js +6 -0
  37. sky/dashboard/out/_next/static/chunks/697.6460bf72e760addd.js +20 -0
  38. sky/dashboard/out/_next/static/chunks/{856-bfddc18e16f3873c.js → 856-cdf66268ec878d0c.js} +1 -1
  39. sky/dashboard/out/_next/static/chunks/pages/{_app-ce31493da9747ef4.js → _app-0ef7418d1a3822f3.js} +1 -1
  40. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-aff040d7bc5d0086.js +6 -0
  41. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-32ce4f49f2261f55.js +6 -0
  42. sky/dashboard/out/_next/static/chunks/pages/clusters-4aa031d1f42723d8.js +1 -0
  43. sky/dashboard/out/_next/static/chunks/pages/config-3102d02a188f04b3.js +1 -0
  44. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-6f1e02e31eecb5ce.js +1 -0
  45. sky/dashboard/out/_next/static/chunks/pages/infra-fd5dc8a91bd9169a.js +1 -0
  46. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-e4b23128db0774cd.js +16 -0
  47. sky/dashboard/out/_next/static/chunks/pages/jobs-26da173e20af16e4.js +1 -0
  48. sky/dashboard/out/_next/static/chunks/pages/users-ce29e7420385563d.js +1 -0
  49. sky/dashboard/out/_next/static/chunks/pages/volumes-476b670ef33d1ecd.js +1 -0
  50. sky/dashboard/out/_next/static/chunks/pages/workspace/new-09ae0f6f972aa871.js +1 -0
  51. sky/dashboard/out/_next/static/chunks/pages/workspaces/{[name]-ecc5a7003776cfa7.js → [name]-0b4c662a25e4747a.js} +1 -1
  52. sky/dashboard/out/_next/static/chunks/pages/workspaces-862b120406461b10.js +1 -0
  53. sky/dashboard/out/_next/static/chunks/webpack-6133dc1e928bd0b5.js +1 -0
  54. sky/dashboard/out/_next/static/css/b23cb0257bf96c51.css +3 -0
  55. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  56. sky/dashboard/out/clusters/[cluster].html +1 -1
  57. sky/dashboard/out/clusters.html +1 -1
  58. sky/dashboard/out/config.html +1 -1
  59. sky/dashboard/out/index.html +1 -1
  60. sky/dashboard/out/infra/[context].html +1 -1
  61. sky/dashboard/out/infra.html +1 -1
  62. sky/dashboard/out/jobs/[job].html +1 -1
  63. sky/dashboard/out/jobs.html +1 -1
  64. sky/dashboard/out/users.html +1 -1
  65. sky/dashboard/out/volumes.html +1 -0
  66. sky/dashboard/out/workspace/new.html +1 -1
  67. sky/dashboard/out/workspaces/[name].html +1 -1
  68. sky/dashboard/out/workspaces.html +1 -1
  69. sky/data/storage_utils.py +2 -4
  70. sky/exceptions.py +15 -0
  71. sky/execution.py +5 -0
  72. sky/global_user_state.py +129 -0
  73. sky/jobs/client/sdk.py +13 -11
  74. sky/jobs/server/core.py +4 -0
  75. sky/models.py +16 -0
  76. sky/provision/__init__.py +26 -0
  77. sky/provision/kubernetes/__init__.py +3 -0
  78. sky/provision/kubernetes/instance.py +38 -77
  79. sky/provision/kubernetes/utils.py +52 -2
  80. sky/provision/kubernetes/volume.py +147 -0
  81. sky/resources.py +20 -76
  82. sky/serve/client/sdk.py +13 -13
  83. sky/serve/server/core.py +5 -1
  84. sky/server/common.py +40 -5
  85. sky/server/constants.py +5 -1
  86. sky/server/metrics.py +105 -0
  87. sky/server/requests/executor.py +30 -14
  88. sky/server/requests/payloads.py +16 -0
  89. sky/server/requests/requests.py +35 -1
  90. sky/server/rest.py +152 -0
  91. sky/server/server.py +66 -16
  92. sky/server/state.py +20 -0
  93. sky/server/stream_utils.py +8 -3
  94. sky/server/uvicorn.py +153 -13
  95. sky/setup_files/dependencies.py +2 -0
  96. sky/skylet/constants.py +14 -3
  97. sky/task.py +141 -18
  98. sky/templates/kubernetes-ray.yml.j2 +30 -1
  99. sky/users/permission.py +2 -0
  100. sky/utils/context.py +3 -1
  101. sky/utils/resources_utils.py +66 -0
  102. sky/utils/rich_utils.py +6 -0
  103. sky/utils/schemas.py +146 -3
  104. sky/utils/status_lib.py +10 -0
  105. sky/utils/validator.py +11 -1
  106. sky/volumes/__init__.py +0 -0
  107. sky/volumes/client/__init__.py +0 -0
  108. sky/volumes/client/sdk.py +64 -0
  109. sky/volumes/server/__init__.py +0 -0
  110. sky/volumes/server/core.py +199 -0
  111. sky/volumes/server/server.py +85 -0
  112. sky/volumes/utils.py +158 -0
  113. sky/volumes/volume.py +198 -0
  114. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/METADATA +2 -1
  115. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/RECORD +123 -108
  116. sky/dashboard/out/_next/static/chunks/350.9e123a4551f68b0d.js +0 -1
  117. sky/dashboard/out/_next/static/chunks/42.2273cc2415291ceb.js +0 -6
  118. sky/dashboard/out/_next/static/chunks/470-1494c899266cf5c9.js +0 -1
  119. sky/dashboard/out/_next/static/chunks/641.c8e452bc5070a630.js +0 -1
  120. sky/dashboard/out/_next/static/chunks/682.4dd5dc116f740b5f.js +0 -6
  121. sky/dashboard/out/_next/static/chunks/760-a89d354797ce7af5.js +0 -1
  122. sky/dashboard/out/_next/static/chunks/901-b424d293275e1fd7.js +0 -1
  123. sky/dashboard/out/_next/static/chunks/984.ae8c08791d274ca0.js +0 -50
  124. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-4e065c812a52460b.js +0 -6
  125. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]-520ec1ab65e2f2a4.js +0 -6
  126. sky/dashboard/out/_next/static/chunks/pages/clusters-7e9736af1c6345a6.js +0 -1
  127. sky/dashboard/out/_next/static/chunks/pages/config-e4f473661889e7cd.js +0 -1
  128. sky/dashboard/out/_next/static/chunks/pages/infra/[context]-00fd23b9577492ca.js +0 -1
  129. sky/dashboard/out/_next/static/chunks/pages/infra-8a4bf7370d4d9bb7.js +0 -1
  130. sky/dashboard/out/_next/static/chunks/pages/jobs/[job]-171c27f4ca94861c.js +0 -16
  131. sky/dashboard/out/_next/static/chunks/pages/jobs-55e5bcb16d563231.js +0 -1
  132. sky/dashboard/out/_next/static/chunks/pages/users-c9f4d785cdaa52d8.js +0 -1
  133. sky/dashboard/out/_next/static/chunks/pages/workspace/new-31aa8bdcb7592635.js +0 -1
  134. sky/dashboard/out/_next/static/chunks/pages/workspaces-f00cba35691483b1.js +0 -1
  135. sky/dashboard/out/_next/static/chunks/webpack-c85998e6a5722f21.js +0 -1
  136. sky/dashboard/out/_next/static/css/6ab927686b492a4a.css +0 -3
  137. sky/dashboard/out/_next/static/zsALxITkbP8J8NVwSDwMo/_buildManifest.js +0 -1
  138. /sky/dashboard/out/_next/static/{zsALxITkbP8J8NVwSDwMo → ZWdSYkqVe3WjnFR8ocqoG}/_ssgManifest.js +0 -0
  139. /sky/dashboard/out/_next/static/chunks/{843-bde186946d353355.js → 843-07d25a7e64462fd8.js} +0 -0
  140. /sky/dashboard/out/_next/static/chunks/{938-ce7991c156584b06.js → 938-068520cc11738deb.js} +0 -0
  141. /sky/dashboard/out/_next/static/chunks/{973-56412c7976b4655b.js → 973-5b5019ba333e8d62.js} +0 -0
  142. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/WHEEL +0 -0
  143. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/entry_points.txt +0 -0
  144. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/licenses/LICENSE +0 -0
  145. {skypilot_nightly-1.0.0.dev20250624.dist-info → skypilot_nightly-1.0.0.dev20250625.dist-info}/top_level.txt +0 -0
sky/utils/rich_utils.py CHANGED
@@ -7,6 +7,7 @@ import threading
7
7
  import typing
8
8
  from typing import Callable, Iterator, Optional, Tuple, Union
9
9
 
10
+ from sky import exceptions
10
11
  from sky.adaptors import common as adaptors_common
11
12
  from sky.utils import annotations
12
13
  from sky.utils import context
@@ -58,6 +59,7 @@ class Control(enum.Enum):
58
59
  EXIT = 'rich_exit'
59
60
  UPDATE = 'rich_update'
60
61
  HEARTBEAT = 'heartbeat'
62
+ RETRY = 'retry'
61
63
 
62
64
  def encode(self, msg: str) -> str:
63
65
  return f'<{self.value}>{msg}</{self.value}>'
@@ -365,6 +367,10 @@ def decode_rich_status(
365
367
  yield line
366
368
  continue
367
369
 
370
+ if control == Control.RETRY:
371
+ raise exceptions.ServerTemporarilyUnavailableError(
372
+ 'The server is temporarily unavailable. Please try '
373
+ 'again.')
368
374
  # control is not None, i.e. it is a rich status control message.
369
375
  if threading.current_thread() is not threading.main_thread():
370
376
  yield None
sky/utils/schemas.py CHANGED
@@ -70,8 +70,36 @@ _AUTOSTOP_SCHEMA = {
70
70
  }
71
71
 
72
72
 
73
- def _get_single_resources_schema():
74
- """Schema for a single resource in a resources list."""
73
+ # Note: This is similar to _get_infra_pattern()
74
+ # but without the wildcard patterns.
75
+ def _get_volume_infra_pattern():
76
+ # Building the regex pattern for the infra field
77
+ # Format: cloud[/region[/zone]] or wildcards or kubernetes context
78
+ # Match any cloud name (case insensitive)
79
+ all_clouds = list(constants.ALL_CLOUDS)
80
+ all_clouds.remove('kubernetes')
81
+ cloud_pattern = f'(?i:({"|".join(all_clouds)}))'
82
+
83
+ # Optional /region followed by optional /zone
84
+ # /[^/]+ matches a slash followed by any characters except slash (region or
85
+ # zone name)
86
+ # The outer (?:...)? makes the entire region/zone part optional
87
+ region_zone_pattern = '(?:/[^/]+(?:/[^/]+)?)?'
88
+
89
+ # Kubernetes specific pattern - matches:
90
+ # 1. Just the word "kubernetes" or "k8s" by itself
91
+ # 2. "k8s/" or "kubernetes/" followed by any context name (which may contain
92
+ # slashes)
93
+ kubernetes_pattern = '(?i:kubernetes|k8s)(?:/.+)?'
94
+
95
+ # Combine all patterns with alternation (|)
96
+ # ^ marks start of string, $ marks end of string
97
+ infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
98
+ f'{kubernetes_pattern})$')
99
+ return infra_pattern
100
+
101
+
102
+ def _get_infra_pattern():
75
103
  # Building the regex pattern for the infra field
76
104
  # Format: cloud[/region[/zone]] or wildcards or kubernetes context
77
105
  # Match any cloud name (case insensitive)
@@ -103,7 +131,11 @@ def _get_single_resources_schema():
103
131
  infra_pattern = (f'^(?:{cloud_pattern}{region_zone_pattern}|'
104
132
  f'{wildcard_cloud}{wildcard_with_region}|'
105
133
  f'{kubernetes_pattern})$')
134
+ return infra_pattern
106
135
 
136
+
137
+ def _get_single_resources_schema():
138
+ """Schema for a single resource in a resources list."""
107
139
  return {
108
140
  '$schema': 'https://json-schema.org/draft/2020-12/schema',
109
141
  'type': 'object',
@@ -133,7 +165,7 @@ def _get_single_resources_schema():
133
165
  # 3. Kubernetes patterns - e.g. "kubernetes/my-context",
134
166
  # "k8s/context-name",
135
167
  # "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
136
- 'pattern': infra_pattern,
168
+ 'pattern': _get_infra_pattern(),
137
169
  },
138
170
  'cpus': {
139
171
  'anyOf': [{
@@ -383,6 +415,66 @@ def get_resources_schema():
383
415
  }
384
416
 
385
417
 
418
+ def get_volume_schema():
419
+ # pylint: disable=import-outside-toplevel
420
+ from sky.volumes import volume
421
+
422
+ return {
423
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
424
+ 'type': 'object',
425
+ 'required': ['name', 'type', 'infra'],
426
+ 'additionalProperties': False,
427
+ 'properties': {
428
+ 'name': {
429
+ 'type': 'string',
430
+ },
431
+ 'type': {
432
+ 'type': 'string',
433
+ 'case_sensitive_enum': [
434
+ type.value for type in volume.VolumeType
435
+ ],
436
+ },
437
+ 'infra': {
438
+ 'type': 'string',
439
+ 'description': ('Infrastructure specification in format: '
440
+ 'cloud[/region[/zone]].'),
441
+ # Pattern validates:
442
+ # 1. cloud[/region[/zone]] - e.g. "aws", "aws/us-east-1",
443
+ # "aws/us-east-1/us-east-1a"
444
+ # 2. Kubernetes patterns - e.g. "kubernetes/my-context",
445
+ # "k8s/context-name",
446
+ # "k8s/aws:eks:us-east-1:123456789012:cluster/my-cluster"
447
+ 'pattern': _get_volume_infra_pattern(),
448
+ },
449
+ 'size': {
450
+ 'type': 'string',
451
+ 'pattern': constants.MEMORY_SIZE_PATTERN,
452
+ },
453
+ 'resource_name': {
454
+ 'type': 'string',
455
+ },
456
+ 'config': {
457
+ 'type': 'object',
458
+ 'required': [],
459
+ 'properties': {
460
+ 'storage_class_name': {
461
+ 'type': 'string',
462
+ },
463
+ 'access_mode': {
464
+ 'type': 'string',
465
+ 'case_sensitive_enum': [
466
+ type.value for type in volume.VolumeAccessMode
467
+ ],
468
+ },
469
+ 'namespace': {
470
+ 'type': 'string',
471
+ },
472
+ },
473
+ },
474
+ }
475
+ }
476
+
477
+
386
478
  def get_storage_schema():
387
479
  # pylint: disable=import-outside-toplevel
388
480
  from sky.data import storage
@@ -457,6 +549,49 @@ def get_storage_schema():
457
549
  }
458
550
 
459
551
 
552
+ def get_volume_mount_schema():
553
+ """Schema for volume mount object in task config (internal use only)."""
554
+ return {
555
+ '$schema': 'https://json-schema.org/draft/2020-12/schema',
556
+ 'type': 'object',
557
+ 'required': [],
558
+ 'additionalProperties': False,
559
+ 'properties': {
560
+ 'path': {
561
+ 'type': 'string',
562
+ },
563
+ 'volume_name': {
564
+ 'type': 'string',
565
+ },
566
+ 'volume_config': {
567
+ 'type': 'object',
568
+ 'required': [],
569
+ 'additionalProperties': True,
570
+ 'properties': {
571
+ 'cloud': {
572
+ 'type': 'string',
573
+ 'case_insensitive_enum': list(constants.ALL_CLOUDS)
574
+ },
575
+ 'region': {
576
+ 'anyOf': [{
577
+ 'type': 'string'
578
+ }, {
579
+ 'type': 'null'
580
+ }]
581
+ },
582
+ 'zone': {
583
+ 'anyOf': [{
584
+ 'type': 'string'
585
+ }, {
586
+ 'type': 'null'
587
+ }]
588
+ },
589
+ },
590
+ }
591
+ }
592
+ }
593
+
594
+
460
595
  def get_service_schema():
461
596
  """Schema for top-level `service:` field (for SkyServe)."""
462
597
  # To avoid circular imports, only import when needed.
@@ -723,6 +858,14 @@ def get_task_schema():
723
858
  'config': _filter_schema(
724
859
  get_config_schema(),
725
860
  constants.OVERRIDEABLE_CONFIG_KEYS_IN_TASK),
861
+ # volumes config is validated separately using get_volume_schema
862
+ 'volumes': {
863
+ 'type': 'object',
864
+ },
865
+ 'volume_mounts': {
866
+ 'type': 'array',
867
+ 'items': get_volume_mount_schema(),
868
+ },
726
869
  **_experimental_task_schema(),
727
870
  }
728
871
  }
sky/utils/status_lib.py CHANGED
@@ -54,3 +54,13 @@ class StorageStatus(enum.Enum):
54
54
 
55
55
  # Finished uploading, in terminal state
56
56
  READY = 'READY'
57
+
58
+
59
+ class VolumeStatus(enum.Enum):
60
+ """Volume status as recorded in table 'volumes'."""
61
+
62
+ # Volume is ready to be used
63
+ READY = 'READY'
64
+
65
+ # Volume is being used
66
+ IN_USE = 'IN_USE'
sky/utils/validator.py CHANGED
@@ -14,9 +14,19 @@ def case_insensitive_enum(validator, enums, instance, schema):
14
14
  f'{instance!r} is not one of {enums!r}')
15
15
 
16
16
 
17
+ def case_sensitive_enum(validator, enums, instance, schema):
18
+ del validator, schema # Unused.
19
+ if instance not in enums:
20
+ yield jsonschema.ValidationError(
21
+ f'{instance!r} is not one of {enums!r}')
22
+
23
+
17
24
  # Move this to a function to delay initialization
18
25
  def get_schema_validator():
19
26
  """Get the schema validator class, initializing it only when needed."""
20
27
  return jsonschema.validators.extend(
21
28
  jsonschema.Draft7Validator,
22
- validators={'case_insensitive_enum': case_insensitive_enum})
29
+ validators={
30
+ 'case_insensitive_enum': case_insensitive_enum,
31
+ 'case_sensitive_enum': case_sensitive_enum
32
+ })
File without changes
File without changes
@@ -0,0 +1,64 @@
1
+ """SDK functions for managed jobs."""
2
+ import json
3
+ import typing
4
+ from typing import List
5
+
6
+ from sky import sky_logging
7
+ from sky.adaptors import common as adaptors_common
8
+ from sky.server import common as server_common
9
+ from sky.server.requests import payloads
10
+ from sky.usage import usage_lib
11
+ from sky.utils import annotations
12
+ from sky.utils import context
13
+ from sky.volumes import volume as volume_lib
14
+
15
+ if typing.TYPE_CHECKING:
16
+ import requests
17
+ else:
18
+ requests = adaptors_common.LazyImport('requests')
19
+
20
+ logger = sky_logging.init_logger(__name__)
21
+
22
+
23
+ @context.contextual
24
+ @usage_lib.entrypoint
25
+ @server_common.check_server_healthy_or_start
26
+ @annotations.client_api
27
+ def apply(volume: volume_lib.Volume) -> server_common.RequestId:
28
+ """Creates or registers a volume.
29
+ """
30
+ body = payloads.VolumeApplyBody(name=volume.name,
31
+ volume_type=volume.type,
32
+ cloud=volume.cloud,
33
+ region=volume.region,
34
+ zone=volume.zone,
35
+ size=volume.size,
36
+ config=volume.config)
37
+ response = requests.post(f'{server_common.get_server_url()}/volumes/apply',
38
+ json=json.loads(body.model_dump_json()),
39
+ cookies=server_common.get_api_cookie_jar())
40
+ return server_common.get_request_id(response)
41
+
42
+
43
+ @context.contextual
44
+ @usage_lib.entrypoint
45
+ @server_common.check_server_healthy_or_start
46
+ @annotations.client_api
47
+ def ls() -> server_common.RequestId:
48
+ """Lists all volumes."""
49
+ response = requests.get(f'{server_common.get_server_url()}/volumes',
50
+ cookies=server_common.get_api_cookie_jar())
51
+ return server_common.get_request_id(response)
52
+
53
+
54
+ @context.contextual
55
+ @usage_lib.entrypoint
56
+ @server_common.check_server_healthy_or_start
57
+ @annotations.client_api
58
+ def delete(names: List[str]) -> server_common.RequestId:
59
+ """Deletes a volume."""
60
+ body = payloads.VolumeDeleteBody(names=names)
61
+ response = requests.post(f'{server_common.get_server_url()}/volumes/delete',
62
+ json=json.loads(body.model_dump_json()),
63
+ cookies=server_common.get_api_cookie_jar())
64
+ return server_common.get_request_id(response)
File without changes
@@ -0,0 +1,199 @@
1
+ """Volume management core."""
2
+
3
+ import contextlib
4
+ import os
5
+ from typing import Any, Dict, Generator, List, Optional
6
+ import uuid
7
+
8
+ import filelock
9
+
10
+ import sky
11
+ from sky import global_user_state
12
+ from sky import models
13
+ from sky import provision
14
+ from sky import sky_logging
15
+ from sky.utils import common_utils
16
+ from sky.utils import status_lib
17
+
18
+ logger = sky_logging.init_logger(__name__)
19
+
20
+ # Filelocks for the storage management.
21
+ VOLUME_LOCK_PATH = os.path.expanduser('~/.sky/.{volume_name}.lock')
22
+ VOLUME_LOCK_TIMEOUT_SECONDS = 20
23
+
24
+
25
+ def volume_refresh():
26
+ """Refreshes the volume status."""
27
+ volumes = global_user_state.get_volumes()
28
+ for volume in volumes:
29
+ volume_name = volume.get('name')
30
+ config = volume.get('handle')
31
+ if config is None:
32
+ logger.warning(f'Volume {volume_name} has no handle.'
33
+ 'Skipping status refresh...')
34
+ continue
35
+ cloud = config.cloud
36
+ usedby = provision.get_volume_usedby(cloud, config)
37
+ with _volume_lock(volume_name):
38
+ latest_volume = global_user_state.get_volume_by_name(volume_name)
39
+ if latest_volume is None:
40
+ logger.warning(f'Volume {volume_name} not found.')
41
+ continue
42
+ status = latest_volume.get('status')
43
+ if not usedby:
44
+ if status != status_lib.VolumeStatus.READY:
45
+ logger.info(f'Update volume {volume_name} '
46
+ f'status to READY')
47
+ global_user_state.update_volume_status(
48
+ volume_name, status=status_lib.VolumeStatus.READY)
49
+ else:
50
+ if status != status_lib.VolumeStatus.IN_USE:
51
+ logger.info(f'Update volume {volume_name} '
52
+ f'status to IN_USE, usedby: {usedby}')
53
+ global_user_state.update_volume_status(
54
+ volume_name, status=status_lib.VolumeStatus.IN_USE)
55
+
56
+
57
+ def volume_list() -> List[Dict[str, Any]]:
58
+ """Gets the volumes.
59
+
60
+ Returns:
61
+ [
62
+ {
63
+ 'name': str,
64
+ 'type': str,
65
+ 'launched_at': int timestamp of creation,
66
+ 'cloud': str,
67
+ 'region': str,
68
+ 'zone': str,
69
+ 'size': str,
70
+ 'config': Dict[str, Any],
71
+ 'name_on_cloud': str,
72
+ 'user_hash': str,
73
+ 'workspace': str,
74
+ 'last_attached_at': int timestamp of last attachment,
75
+ 'last_use': last command,
76
+ 'status': sky.VolumeStatus,
77
+ }
78
+ ]
79
+ """
80
+ volumes = global_user_state.get_volumes()
81
+ all_users = global_user_state.get_all_users()
82
+ user_map = {user.id: user.name for user in all_users}
83
+ records = []
84
+ for volume in volumes:
85
+ volume_name = volume.get('name')
86
+ record = {
87
+ 'name': volume_name,
88
+ 'launched_at': volume.get('launched_at'),
89
+ 'user_hash': volume.get('user_hash'),
90
+ 'user_name': user_map.get(volume.get('user_hash'), ''),
91
+ 'workspace': volume.get('workspace'),
92
+ 'last_attached_at': volume.get('last_attached_at'),
93
+ 'last_use': volume.get('last_use'),
94
+ }
95
+ status = volume.get('status')
96
+ if status is not None:
97
+ record['status'] = status.value
98
+ else:
99
+ record['status'] = ''
100
+ config = volume.get('handle')
101
+ if config is None:
102
+ logger.warning(f'Volume {volume_name} has no handle.')
103
+ continue
104
+ record['type'] = config.type
105
+ record['cloud'] = config.cloud
106
+ record['region'] = config.region
107
+ record['zone'] = config.zone
108
+ record['size'] = config.size
109
+ record['config'] = config.config
110
+ record['name_on_cloud'] = config.name_on_cloud
111
+ records.append(record)
112
+ return records
113
+
114
+
115
+ def volume_delete(names: List[str]) -> None:
116
+ """Deletes volumes.
117
+
118
+ Args:
119
+ names: List of volume names to delete.
120
+
121
+ Raises:
122
+ ValueError: If the volume does not exist
123
+ or is in use or has no handle.
124
+ """
125
+ for name in names:
126
+ volume = global_user_state.get_volume_by_name(name)
127
+ if volume is None:
128
+ raise ValueError(f'Volume {name} not found.')
129
+ if volume.get('status') == status_lib.VolumeStatus.IN_USE:
130
+ raise ValueError(f'Volume {name} is in use.')
131
+ config = volume.get('handle')
132
+ if config is None:
133
+ raise ValueError(f'Volume {name} has no handle.')
134
+ logger.debug(f'Deleting volume {name} with config {config}')
135
+ cloud = config.cloud
136
+ with _volume_lock(name):
137
+ provision.delete_volume(cloud, config)
138
+ global_user_state.delete_volume(name)
139
+
140
+
141
+ def volume_apply(name: str, volume_type: str, cloud: str, region: Optional[str],
142
+ zone: Optional[str], size: Optional[str],
143
+ config: Dict[str, Any]) -> None:
144
+ """Creates or registers a volume.
145
+
146
+ Args:
147
+ name: The name of the volume.
148
+ volume_type: The type of the volume.
149
+ cloud: The cloud of the volume.
150
+ region: The region of the volume.
151
+ zone: The zone of the volume.
152
+ size: The size of the volume.
153
+ config: The configuration of the volume.
154
+
155
+ """
156
+ # Reuse the method for cluster name on cloud to
157
+ # generate the storage name on cloud.
158
+ cloud_obj = sky.CLOUD_REGISTRY.from_str(cloud)
159
+ assert cloud_obj is not None
160
+ name_uuid = str(uuid.uuid4())[:6]
161
+ name_on_cloud = common_utils.make_cluster_name_on_cloud(
162
+ name, max_length=cloud_obj.max_cluster_name_length())
163
+ name_on_cloud += '-' + name_uuid
164
+ config = models.VolumeConfig(
165
+ name=name,
166
+ type=volume_type,
167
+ cloud=str(cloud_obj),
168
+ region=region,
169
+ zone=zone,
170
+ size=size,
171
+ config=config,
172
+ name_on_cloud=name_on_cloud,
173
+ )
174
+ logger.debug(
175
+ f'Creating volume {name} on cloud {cloud} with config {config}')
176
+ with _volume_lock(name):
177
+ current_volume = global_user_state.get_volume_by_name(name)
178
+ if current_volume is not None:
179
+ logger.info(f'Volume {name} already exists.')
180
+ return
181
+ config = provision.apply_volume(cloud, config)
182
+ global_user_state.add_volume(name, config,
183
+ status_lib.VolumeStatus.READY)
184
+
185
+
186
+ @contextlib.contextmanager
187
+ def _volume_lock(volume_name: str) -> Generator[None, None, None]:
188
+ """Context manager for volume lock."""
189
+ try:
190
+ with filelock.FileLock(VOLUME_LOCK_PATH.format(volume_name=volume_name),
191
+ VOLUME_LOCK_TIMEOUT_SECONDS):
192
+ yield
193
+ except filelock.Timeout as e:
194
+ raise RuntimeError(
195
+ f'Failed to update user due to a timeout '
196
+ f'when trying to acquire the lock at '
197
+ f'{VOLUME_LOCK_PATH.format(volume_name=volume_name)}. '
198
+ 'Please try again or manually remove the lock '
199
+ f'file if you believe it is stale.') from e
@@ -0,0 +1,85 @@
1
+ """REST API for storage management."""
2
+
3
+ import fastapi
4
+
5
+ import sky
6
+ from sky import clouds
7
+ from sky import sky_logging
8
+ from sky.server.requests import executor
9
+ from sky.server.requests import payloads
10
+ from sky.server.requests import requests as requests_lib
11
+ from sky.volumes import volume
12
+ from sky.volumes.server import core
13
+
14
+ logger = sky_logging.init_logger(__name__)
15
+
16
+ router = fastapi.APIRouter()
17
+
18
+
19
+ @router.get('')
20
+ async def volume_list(request: fastapi.Request) -> None:
21
+ """Gets the volumes."""
22
+ executor.schedule_request(
23
+ request_id=request.state.request_id,
24
+ request_name='volume_list',
25
+ request_body=payloads.RequestBody(),
26
+ func=core.volume_list,
27
+ schedule_type=requests_lib.ScheduleType.SHORT,
28
+ )
29
+
30
+
31
+ @router.post('/delete')
32
+ async def volume_delete(request: fastapi.Request,
33
+ volume_delete_body: payloads.VolumeDeleteBody) -> None:
34
+ """Deletes a volume."""
35
+ executor.schedule_request(
36
+ request_id=request.state.request_id,
37
+ request_name='volume_delete',
38
+ request_body=volume_delete_body,
39
+ func=core.volume_delete,
40
+ schedule_type=requests_lib.ScheduleType.LONG,
41
+ )
42
+
43
+
44
+ @router.post('/apply')
45
+ async def volume_apply(request: fastapi.Request,
46
+ volume_apply_body: payloads.VolumeApplyBody) -> None:
47
+ """Creates or registers a volume."""
48
+ volume_cloud = volume_apply_body.cloud
49
+ volume_type = volume_apply_body.volume_type
50
+ volume_config = volume_apply_body.config
51
+
52
+ supported_volume_types = [
53
+ volume_type.value for volume_type in volume.VolumeType
54
+ ]
55
+ if volume_type not in supported_volume_types:
56
+ raise fastapi.HTTPException(
57
+ status_code=400, detail=f'Invalid volume type: {volume_type}')
58
+ cloud = sky.CLOUD_REGISTRY.from_str(volume_cloud)
59
+ if cloud is None:
60
+ raise fastapi.HTTPException(status_code=400,
61
+ detail=f'Invalid cloud: {volume_cloud}')
62
+ if volume_type == volume.VolumeType.PVC.value:
63
+ if not cloud.is_same_cloud(clouds.Kubernetes()):
64
+ raise fastapi.HTTPException(
65
+ status_code=400,
66
+ detail='PVC storage is only supported on Kubernetes')
67
+ supported_access_modes = [
68
+ access_mode.value for access_mode in volume.VolumeAccessMode
69
+ ]
70
+ if volume_config is None:
71
+ volume_config = {}
72
+ access_mode = volume_config.get('access_mode')
73
+ if access_mode is None:
74
+ volume_config[
75
+ 'access_mode'] = volume.VolumeAccessMode.READ_WRITE_ONCE.value
76
+ elif access_mode not in supported_access_modes:
77
+ raise fastapi.HTTPException(
78
+ status_code=400, detail=f'Invalid access mode: {access_mode}')
79
+ executor.schedule_request(
80
+ request_id=request.state.request_id,
81
+ request_name='volume_apply',
82
+ request_body=volume_apply_body,
83
+ func=core.volume_apply,
84
+ schedule_type=requests_lib.ScheduleType.LONG,
85
+ )