skypilot-nightly 1.0.0.dev20250718__py3-none-any.whl → 1.0.0.dev20250723__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skypilot-nightly might be problematic. Click here for more details.
- sky/__init__.py +4 -2
- sky/admin_policy.py +11 -4
- sky/backends/backend_utils.py +50 -24
- sky/backends/cloud_vm_ray_backend.py +41 -38
- sky/catalog/__init__.py +3 -1
- sky/catalog/aws_catalog.py +8 -5
- sky/catalog/azure_catalog.py +8 -5
- sky/catalog/common.py +8 -2
- sky/catalog/cudo_catalog.py +5 -2
- sky/catalog/do_catalog.py +4 -1
- sky/catalog/fluidstack_catalog.py +5 -2
- sky/catalog/gcp_catalog.py +8 -5
- sky/catalog/hyperbolic_catalog.py +5 -2
- sky/catalog/ibm_catalog.py +8 -5
- sky/catalog/lambda_catalog.py +8 -5
- sky/catalog/nebius_catalog.py +8 -5
- sky/catalog/oci_catalog.py +8 -5
- sky/catalog/paperspace_catalog.py +4 -1
- sky/catalog/runpod_catalog.py +5 -2
- sky/catalog/scp_catalog.py +8 -5
- sky/catalog/vast_catalog.py +5 -2
- sky/catalog/vsphere_catalog.py +4 -1
- sky/client/cli/command.py +63 -25
- sky/client/sdk.py +61 -11
- sky/clouds/aws.py +12 -7
- sky/clouds/azure.py +12 -7
- sky/clouds/cloud.py +9 -8
- sky/clouds/cudo.py +13 -7
- sky/clouds/do.py +12 -7
- sky/clouds/fluidstack.py +11 -6
- sky/clouds/gcp.py +12 -7
- sky/clouds/hyperbolic.py +11 -6
- sky/clouds/ibm.py +11 -6
- sky/clouds/kubernetes.py +7 -3
- sky/clouds/lambda_cloud.py +11 -6
- sky/clouds/nebius.py +14 -12
- sky/clouds/oci.py +12 -7
- sky/clouds/paperspace.py +12 -7
- sky/clouds/runpod.py +12 -7
- sky/clouds/scp.py +11 -6
- sky/clouds/vast.py +14 -8
- sky/clouds/vsphere.py +11 -6
- sky/core.py +6 -1
- sky/dashboard/out/404.html +1 -1
- sky/dashboard/out/_next/static/chunks/{1043-734e57d2b27dfe5d.js → 1043-869d9c78bf5dd3df.js} +1 -1
- sky/dashboard/out/_next/static/chunks/{1141-d8c6404a7c6fffe6.js → 1141-e49a159c30a6c4a7.js} +1 -1
- sky/dashboard/out/_next/static/chunks/1559-18717d96ef2fcbe9.js +30 -0
- sky/dashboard/out/_next/static/chunks/1871-ea0e7283886407ca.js +6 -0
- sky/dashboard/out/_next/static/chunks/2003.b82e6db40ec4c463.js +1 -0
- sky/dashboard/out/_next/static/chunks/2350.23778a2b19aabd33.js +1 -0
- sky/dashboard/out/_next/static/chunks/2369.2d6e4757f8dfc2b7.js +15 -0
- sky/dashboard/out/_next/static/chunks/{2641.35edc9ccaeaad9e3.js → 2641.74c19c4d45a2c034.js} +1 -1
- sky/dashboard/out/_next/static/chunks/3785.59705416215ff08b.js +1 -0
- sky/dashboard/out/_next/static/chunks/{4725.4c849b1e05c8e9ad.js → 4725.66125dcd9832aa5d.js} +1 -1
- sky/dashboard/out/_next/static/chunks/4869.da729a7db3a31f43.js +16 -0
- sky/dashboard/out/_next/static/chunks/4937.d75809403fc264ac.js +15 -0
- sky/dashboard/out/_next/static/chunks/6135-2abbd0352f8ee061.js +1 -0
- sky/dashboard/out/_next/static/chunks/691.488b4aef97c28727.js +55 -0
- sky/dashboard/out/_next/static/chunks/6990-f64e03df359e04f7.js +1 -0
- sky/dashboard/out/_next/static/chunks/7411-2cc31dc0fdf2a9ad.js +41 -0
- sky/dashboard/out/_next/static/chunks/9025.4a9099bdf3ed4875.js +6 -0
- sky/dashboard/out/_next/static/chunks/938-7ee806653aef0609.js +1 -0
- sky/dashboard/out/_next/static/chunks/9847.387abf8a14d722db.js +30 -0
- sky/dashboard/out/_next/static/chunks/{9984.2b5e3fa69171bff9.js → 9984.0460de9d3adf5582.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/_app-da491665d4289aae.js +34 -0
- sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/{[job]-fa406155b4223d0d.js → [job]-2186770cc2de1623.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-0c37ee1ac5f3474d.js → [cluster]-95afb019ab85801c.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/clusters-3d4be4961e1c94eb.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/index-89e7daf7b7df02e0.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-a90b4fe4616dc501.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/infra-0d3d1f890c5d188a.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/jobs/{[job]-c5b357bfd9502fbe.js → [job]-dc0299ffefebcdbe.js} +2 -2
- sky/dashboard/out/_next/static/chunks/pages/jobs-49f790d12a85027c.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/{users-19e98664bdd61643.js → users-6790fcefd5487b13.js} +1 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-6bcd4b20914d76c9.js +1 -0
- sky/dashboard/out/_next/static/chunks/pages/workspaces-5f7fe4b7d55b8612.js +1 -0
- sky/dashboard/out/_next/static/chunks/webpack-a305898dc479711e.js +1 -0
- sky/dashboard/out/_next/static/css/b3227360726f12eb.css +3 -0
- sky/dashboard/out/_next/static/mym3Ciwp-zqU7ZpOLGnrW/_buildManifest.js +1 -0
- sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
- sky/dashboard/out/clusters/[cluster].html +1 -1
- sky/dashboard/out/clusters.html +1 -1
- sky/dashboard/out/config.html +1 -1
- sky/dashboard/out/index.html +1 -1
- sky/dashboard/out/infra/[context].html +1 -1
- sky/dashboard/out/infra.html +1 -1
- sky/dashboard/out/jobs/[job].html +1 -1
- sky/dashboard/out/jobs.html +1 -1
- sky/dashboard/out/users.html +1 -1
- sky/dashboard/out/volumes.html +1 -1
- sky/dashboard/out/workspace/new.html +1 -1
- sky/dashboard/out/workspaces/[name].html +1 -1
- sky/dashboard/out/workspaces.html +1 -1
- sky/data/mounting_utils.py +93 -32
- sky/global_user_state.py +12 -143
- sky/jobs/state.py +9 -88
- sky/jobs/utils.py +28 -13
- sky/provision/nebius/utils.py +3 -6
- sky/schemas/db/README +4 -0
- sky/schemas/db/env.py +90 -0
- sky/schemas/db/global_user_state/001_initial_schema.py +124 -0
- sky/schemas/db/script.py.mako +28 -0
- sky/schemas/db/spot_jobs/001_initial_schema.py +97 -0
- sky/serve/client/sdk.py +6 -2
- sky/serve/controller.py +7 -3
- sky/serve/serve_state.py +1 -1
- sky/serve/serve_utils.py +171 -75
- sky/serve/server/core.py +17 -6
- sky/server/common.py +4 -3
- sky/server/requests/payloads.py +2 -0
- sky/server/requests/requests.py +1 -1
- sky/setup_files/MANIFEST.in +2 -0
- sky/setup_files/alembic.ini +148 -0
- sky/setup_files/dependencies.py +1 -0
- sky/skylet/configs.py +1 -1
- sky/skylet/constants.py +4 -0
- sky/skylet/job_lib.py +1 -1
- sky/skypilot_config.py +1 -1
- sky/users/permission.py +1 -1
- sky/utils/common_utils.py +85 -3
- sky/utils/config_utils.py +15 -0
- sky/utils/db/__init__.py +0 -0
- sky/utils/{db_utils.py → db/db_utils.py} +59 -0
- sky/utils/db/migration_utils.py +93 -0
- sky/utils/locks.py +319 -0
- sky/utils/schemas.py +38 -34
- sky/utils/timeline.py +41 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/METADATA +2 -1
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/RECORD +134 -125
- sky/dashboard/out/_next/static/FUjweqdImyeYhMYFON-Se/_buildManifest.js +0 -1
- sky/dashboard/out/_next/static/chunks/1746.27d40aedc22bd2d6.js +0 -60
- sky/dashboard/out/_next/static/chunks/1871-76491ac174a95278.js +0 -6
- sky/dashboard/out/_next/static/chunks/2544.27f70672535675ed.js +0 -1
- sky/dashboard/out/_next/static/chunks/2875.c24c6d57dc82e436.js +0 -25
- sky/dashboard/out/_next/static/chunks/3785.95b94f18aaec7233.js +0 -1
- sky/dashboard/out/_next/static/chunks/3947-b059261d6fa88a1f.js +0 -35
- sky/dashboard/out/_next/static/chunks/430.ed51037d1a4a438b.js +0 -1
- sky/dashboard/out/_next/static/chunks/4869.bdd42f14b51d1d6f.js +0 -16
- sky/dashboard/out/_next/static/chunks/5491.918ffed0ba7a5294.js +0 -20
- sky/dashboard/out/_next/static/chunks/6990-dcb411b566e64cde.js +0 -1
- sky/dashboard/out/_next/static/chunks/804-9f5e98ce84d46bdd.js +0 -21
- sky/dashboard/out/_next/static/chunks/9025.133e9ba5c780afeb.js +0 -6
- sky/dashboard/out/_next/static/chunks/938-6a9ffdaa21eee969.js +0 -1
- sky/dashboard/out/_next/static/chunks/9470-b6f6a35283863a6f.js +0 -1
- sky/dashboard/out/_next/static/chunks/9847.46e613d000c55859.js +0 -30
- sky/dashboard/out/_next/static/chunks/pages/_app-771a40cde532309b.js +0 -20
- sky/dashboard/out/_next/static/chunks/pages/clusters-102d169e87913ba1.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/index-927ddeebe57a8ac3.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra/[context]-8b0809f59034d509.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/infra-ae9d2f705ce582c9.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/jobs-5bbdc71878f0a068.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces/[name]-7c0187f43757a548.js +0 -1
- sky/dashboard/out/_next/static/chunks/pages/workspaces-a1e43d9ef51a9cea.js +0 -1
- sky/dashboard/out/_next/static/chunks/webpack-6b0575ea521af4f3.js +0 -1
- sky/dashboard/out/_next/static/css/219887b94512388c.css +0 -3
- /sky/dashboard/out/_next/static/{FUjweqdImyeYhMYFON-Se → mym3Ciwp-zqU7ZpOLGnrW}/_ssgManifest.js +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/WHEEL +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/licenses/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev20250718.dist-info → skypilot_nightly-1.0.0.dev20250723.dist-info}/top_level.txt +0 -0
sky/catalog/lambda_catalog.py
CHANGED
|
@@ -56,10 +56,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
56
56
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
def get_default_instance_type(
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
60
|
+
memory: Optional[str] = None,
|
|
61
|
+
disk_tier: Optional[
|
|
62
|
+
resources_utils.DiskTier] = None,
|
|
63
|
+
region: Optional[str] = None,
|
|
64
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
63
65
|
del disk_tier # unused
|
|
64
66
|
if cpus is None and memory is None:
|
|
65
67
|
cpus = f'{_DEFAULT_NUM_VCPUS}+'
|
|
@@ -68,7 +70,8 @@ def get_default_instance_type(
|
|
|
68
70
|
else:
|
|
69
71
|
memory_gb_or_ratio = memory
|
|
70
72
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
71
|
-
memory_gb_or_ratio
|
|
73
|
+
memory_gb_or_ratio,
|
|
74
|
+
region, zone)
|
|
72
75
|
|
|
73
76
|
|
|
74
77
|
def get_accelerators_from_instance_type(
|
sky/catalog/nebius_catalog.py
CHANGED
|
@@ -51,12 +51,15 @@ def get_vcpus_mem_from_instance_type(
|
|
|
51
51
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def get_default_instance_type(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
55
|
+
memory: Optional[str] = None,
|
|
56
|
+
disk_tier: Optional[
|
|
57
|
+
resources_utils.DiskTier] = None,
|
|
58
|
+
region: Optional[str] = None,
|
|
59
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
58
60
|
del disk_tier # unused
|
|
59
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
61
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
62
|
+
zone)
|
|
60
63
|
|
|
61
64
|
|
|
62
65
|
def get_accelerators_from_instance_type(
|
sky/catalog/oci_catalog.py
CHANGED
|
@@ -101,10 +101,12 @@ def get_hourly_cost(instance_type: str,
|
|
|
101
101
|
region, zone)
|
|
102
102
|
|
|
103
103
|
|
|
104
|
-
def get_default_instance_type(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
104
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
105
|
+
memory: Optional[str] = None,
|
|
106
|
+
disk_tier: Optional[
|
|
107
|
+
resources_utils.DiskTier] = None,
|
|
108
|
+
region: Optional[str] = None,
|
|
109
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
108
110
|
if cpus is None:
|
|
109
111
|
cpus = f'{oci_utils.oci_config.DEFAULT_NUM_VCPUS}+'
|
|
110
112
|
|
|
@@ -127,7 +129,8 @@ def get_default_instance_type(
|
|
|
127
129
|
|
|
128
130
|
logger.debug(f'# get_default_instance_type: {df}')
|
|
129
131
|
return common.get_instance_type_for_cpus_mem_impl(df, cpus,
|
|
130
|
-
memory_gb_or_ratio
|
|
132
|
+
memory_gb_or_ratio,
|
|
133
|
+
region, zone)
|
|
131
134
|
|
|
132
135
|
|
|
133
136
|
def get_accelerators_from_instance_type(
|
|
@@ -52,11 +52,14 @@ def get_default_instance_type(
|
|
|
52
52
|
cpus: Optional[str] = None,
|
|
53
53
|
memory: Optional[str] = None,
|
|
54
54
|
disk_tier: Optional[str] = None,
|
|
55
|
+
region: Optional[str] = None,
|
|
56
|
+
zone: Optional[str] = None,
|
|
55
57
|
) -> Optional[str]:
|
|
56
58
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
57
59
|
# want to specify a default instance type or family.
|
|
58
60
|
del disk_tier # unused
|
|
59
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
61
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
62
|
+
zone)
|
|
60
63
|
|
|
61
64
|
|
|
62
65
|
def get_accelerators_from_instance_type(
|
sky/catalog/runpod_catalog.py
CHANGED
|
@@ -41,11 +41,14 @@ def get_vcpus_mem_from_instance_type(
|
|
|
41
41
|
|
|
42
42
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
43
43
|
memory: Optional[str] = None,
|
|
44
|
-
disk_tier: Optional[str] = None
|
|
44
|
+
disk_tier: Optional[str] = None,
|
|
45
|
+
region: Optional[str] = None,
|
|
46
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
45
47
|
del disk_tier # RunPod does not support disk tiers.
|
|
46
48
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
47
49
|
# want to specify a default instance type or family.
|
|
48
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
50
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
51
|
+
zone)
|
|
49
52
|
|
|
50
53
|
|
|
51
54
|
def get_accelerators_from_instance_type(
|
sky/catalog/scp_catalog.py
CHANGED
|
@@ -51,10 +51,12 @@ def get_vcpus_mem_from_instance_type(
|
|
|
51
51
|
return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type)
|
|
52
52
|
|
|
53
53
|
|
|
54
|
-
def get_default_instance_type(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
def get_default_instance_type(cpus: Optional[str] = None,
|
|
55
|
+
memory: Optional[str] = None,
|
|
56
|
+
disk_tier: Optional[
|
|
57
|
+
resources_utils.DiskTier] = None,
|
|
58
|
+
region: Optional[str] = None,
|
|
59
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
58
60
|
del disk_tier # unused
|
|
59
61
|
if cpus is None and memory is None:
|
|
60
62
|
cpus = str(_DEFAULT_NUM_VCPUS)
|
|
@@ -63,7 +65,8 @@ def get_default_instance_type(
|
|
|
63
65
|
else:
|
|
64
66
|
memory_gb_or_ratio = memory
|
|
65
67
|
return common.get_instance_type_for_cpus_mem_impl(_df, cpus,
|
|
66
|
-
memory_gb_or_ratio
|
|
68
|
+
memory_gb_or_ratio,
|
|
69
|
+
region, zone)
|
|
67
70
|
|
|
68
71
|
|
|
69
72
|
def get_accelerators_from_instance_type(
|
sky/catalog/vast_catalog.py
CHANGED
|
@@ -48,11 +48,14 @@ def get_vcpus_mem_from_instance_type(
|
|
|
48
48
|
|
|
49
49
|
def get_default_instance_type(cpus: Optional[str] = None,
|
|
50
50
|
memory: Optional[str] = None,
|
|
51
|
-
disk_tier: Optional[str] = None
|
|
51
|
+
disk_tier: Optional[str] = None,
|
|
52
|
+
region: Optional[str] = None,
|
|
53
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
52
54
|
del disk_tier
|
|
53
55
|
# NOTE: After expanding catalog to multiple entries, you may
|
|
54
56
|
# want to specify a default instance type or family.
|
|
55
|
-
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory
|
|
57
|
+
return common.get_instance_type_for_cpus_mem_impl(_df, cpus, memory, region,
|
|
58
|
+
zone)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
def get_accelerators_from_instance_type(
|
sky/catalog/vsphere_catalog.py
CHANGED
|
@@ -72,6 +72,8 @@ def get_default_instance_type(
|
|
|
72
72
|
cpus: Optional[str] = None,
|
|
73
73
|
memory: Optional[str] = None,
|
|
74
74
|
disk_tier: Optional[str] = None,
|
|
75
|
+
region: Optional[str] = None,
|
|
76
|
+
zone: Optional[str] = None,
|
|
75
77
|
) -> Optional[str]:
|
|
76
78
|
del disk_tier # unused
|
|
77
79
|
if cpus is None and memory is None:
|
|
@@ -81,7 +83,8 @@ def get_default_instance_type(
|
|
|
81
83
|
else:
|
|
82
84
|
memory_gb_or_ratio = memory
|
|
83
85
|
return common.get_instance_type_for_cpus_mem_impl(_get_df(), cpus,
|
|
84
|
-
memory_gb_or_ratio
|
|
86
|
+
memory_gb_or_ratio,
|
|
87
|
+
region, zone)
|
|
85
88
|
|
|
86
89
|
|
|
87
90
|
def get_accelerators_from_instance_type(
|
sky/client/cli/command.py
CHANGED
|
@@ -3023,17 +3023,18 @@ def _down_or_stop_clusters(
|
|
|
3023
3023
|
click.echo(common_utils.format_exception(e))
|
|
3024
3024
|
else:
|
|
3025
3025
|
raise
|
|
3026
|
-
|
|
3027
|
-
|
|
3028
|
-
|
|
3029
|
-
|
|
3030
|
-
|
|
3031
|
-
|
|
3032
|
-
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3026
|
+
if not purge:
|
|
3027
|
+
confirm_str = 'delete'
|
|
3028
|
+
user_input = click.prompt(
|
|
3029
|
+
f'To proceed, please type {colorama.Style.BRIGHT}'
|
|
3030
|
+
f'{confirm_str!r}{colorama.Style.RESET_ALL}',
|
|
3031
|
+
type=str)
|
|
3032
|
+
if user_input != confirm_str:
|
|
3033
|
+
raise click.Abort()
|
|
3034
|
+
else:
|
|
3035
|
+
click.echo('Since --purge is set, errors will be ignored '
|
|
3036
|
+
'and controller will be removed from '
|
|
3037
|
+
'local state.\nSkipping confirmation.')
|
|
3037
3038
|
no_confirm = True
|
|
3038
3039
|
names += controllers
|
|
3039
3040
|
|
|
@@ -5114,6 +5115,12 @@ def serve_down(
|
|
|
5114
5115
|
default=False,
|
|
5115
5116
|
help='Sync down logs to the local machine. Can be combined with '
|
|
5116
5117
|
'--controller, --load-balancer, or a replica ID to narrow scope.')
|
|
5118
|
+
@click.option(
|
|
5119
|
+
'--tail',
|
|
5120
|
+
default=None,
|
|
5121
|
+
type=int,
|
|
5122
|
+
help='The number of lines to display from the end of the log file. '
|
|
5123
|
+
'Default is None, which means print all lines.')
|
|
5117
5124
|
@click.argument('service_name', required=True, type=str)
|
|
5118
5125
|
@click.argument('replica_ids', required=False, type=int, nargs=-1)
|
|
5119
5126
|
@usage_lib.entrypoint
|
|
@@ -5126,6 +5133,7 @@ def serve_logs(
|
|
|
5126
5133
|
load_balancer: bool,
|
|
5127
5134
|
replica_ids: Tuple[int, ...],
|
|
5128
5135
|
sync_down: bool,
|
|
5136
|
+
tail: Optional[int],
|
|
5129
5137
|
):
|
|
5130
5138
|
"""Tail or sync down logs of a service.
|
|
5131
5139
|
|
|
@@ -5145,12 +5153,26 @@ def serve_logs(
|
|
|
5145
5153
|
# Tail the logs of replica 1
|
|
5146
5154
|
sky serve logs [SERVICE_NAME] 1
|
|
5147
5155
|
\b
|
|
5156
|
+
# Show the last 100 lines of the controller logs
|
|
5157
|
+
sky serve logs --controller --tail 100 [SERVICE_NAME]
|
|
5158
|
+
\b
|
|
5148
5159
|
# Sync down all logs of the service (controller, LB, all replicas)
|
|
5149
5160
|
sky serve logs [SERVICE_NAME] --sync-down
|
|
5150
5161
|
\b
|
|
5151
5162
|
# Sync down controller logs and logs for replicas 1 and 3
|
|
5152
5163
|
sky serve logs [SERVICE_NAME] 1 3 --controller --sync-down
|
|
5153
5164
|
"""
|
|
5165
|
+
if tail is not None:
|
|
5166
|
+
if tail < 0:
|
|
5167
|
+
raise click.UsageError('--tail must be a non-negative integer.')
|
|
5168
|
+
# TODO(arda): We could add ability to tail and follow logs together.
|
|
5169
|
+
if follow:
|
|
5170
|
+
follow = False
|
|
5171
|
+
logger.warning(
|
|
5172
|
+
f'{colorama.Fore.YELLOW}'
|
|
5173
|
+
'--tail and --follow cannot be used together. '
|
|
5174
|
+
f'Changed the mode to --no-follow.{colorama.Style.RESET_ALL}')
|
|
5175
|
+
|
|
5154
5176
|
chosen_components: Set[serve_lib.ServiceComponent] = set()
|
|
5155
5177
|
if controller:
|
|
5156
5178
|
chosen_components.add(serve_lib.ServiceComponent.CONTROLLER)
|
|
@@ -5185,7 +5207,8 @@ def serve_logs(
|
|
|
5185
5207
|
serve_lib.sync_down_logs(service_name,
|
|
5186
5208
|
local_dir=str(log_dir),
|
|
5187
5209
|
targets=targets_to_sync,
|
|
5188
|
-
replica_ids=list(replica_ids)
|
|
5210
|
+
replica_ids=list(replica_ids),
|
|
5211
|
+
tail=tail)
|
|
5189
5212
|
style = colorama.Style
|
|
5190
5213
|
fore = colorama.Fore
|
|
5191
5214
|
logger.info(f'{fore.CYAN}Service {service_name} logs: '
|
|
@@ -5227,7 +5250,8 @@ def serve_logs(
|
|
|
5227
5250
|
serve_lib.tail_logs(service_name,
|
|
5228
5251
|
target=target_component,
|
|
5229
5252
|
replica_id=target_replica_id,
|
|
5230
|
-
follow=follow
|
|
5253
|
+
follow=follow,
|
|
5254
|
+
tail=tail)
|
|
5231
5255
|
except exceptions.ClusterNotUpError:
|
|
5232
5256
|
with ux_utils.print_exception_no_traceback():
|
|
5233
5257
|
raise
|
|
@@ -5485,19 +5509,27 @@ def api_status(request_ids: Optional[List[str]], all_status: bool,
|
|
|
5485
5509
|
columns.append('Cluster')
|
|
5486
5510
|
columns.extend(['Created', 'Status'])
|
|
5487
5511
|
table = log_utils.create_table(columns)
|
|
5488
|
-
|
|
5489
|
-
|
|
5490
|
-
|
|
5491
|
-
|
|
5492
|
-
|
|
5493
|
-
|
|
5512
|
+
if len(request_list) > 0:
|
|
5513
|
+
for request in request_list:
|
|
5514
|
+
r_id = request.request_id
|
|
5515
|
+
if not verbose:
|
|
5516
|
+
r_id = common_utils.truncate_long_string(r_id, 36)
|
|
5517
|
+
req_status = requests.RequestStatus(request.status)
|
|
5518
|
+
row = [r_id, request.user_name, request.name]
|
|
5519
|
+
if verbose:
|
|
5520
|
+
row.append(request.cluster_name)
|
|
5521
|
+
row.extend([
|
|
5522
|
+
log_utils.readable_time_duration(request.created_at),
|
|
5523
|
+
req_status.colored_str()
|
|
5524
|
+
])
|
|
5525
|
+
table.add_row(row)
|
|
5526
|
+
else:
|
|
5527
|
+
# add dummy data for when api server is down.
|
|
5528
|
+
dummy_row = ['-'] * 5
|
|
5494
5529
|
if verbose:
|
|
5495
|
-
|
|
5496
|
-
|
|
5497
|
-
|
|
5498
|
-
req_status.colored_str()
|
|
5499
|
-
])
|
|
5500
|
-
table.add_row(row)
|
|
5530
|
+
dummy_row.append('-')
|
|
5531
|
+
table.add_row(dummy_row)
|
|
5532
|
+
click.echo()
|
|
5501
5533
|
click.echo(table)
|
|
5502
5534
|
|
|
5503
5535
|
|
|
@@ -5545,6 +5577,12 @@ def api_login(endpoint: Optional[str], relogin: bool,
|
|
|
5545
5577
|
sdk.api_login(endpoint, relogin, service_account_token)
|
|
5546
5578
|
|
|
5547
5579
|
|
|
5580
|
+
@api.command('logout', cls=_DocumentedCodeCommand)
|
|
5581
|
+
def api_logout():
|
|
5582
|
+
"""Logs out of the api server"""
|
|
5583
|
+
sdk.api_logout()
|
|
5584
|
+
|
|
5585
|
+
|
|
5548
5586
|
@api.command('info', cls=_DocumentedCodeCommand)
|
|
5549
5587
|
@flags.config_option(expose_value=False)
|
|
5550
5588
|
@usage_lib.entrypoint
|
sky/client/sdk.py
CHANGED
|
@@ -29,7 +29,6 @@ import colorama
|
|
|
29
29
|
import filelock
|
|
30
30
|
|
|
31
31
|
from sky import admin_policy
|
|
32
|
-
from sky import backends
|
|
33
32
|
from sky import exceptions
|
|
34
33
|
from sky import sky_logging
|
|
35
34
|
from sky import skypilot_config
|
|
@@ -64,6 +63,7 @@ if typing.TYPE_CHECKING:
|
|
|
64
63
|
import requests
|
|
65
64
|
|
|
66
65
|
import sky
|
|
66
|
+
from sky import backends
|
|
67
67
|
else:
|
|
68
68
|
psutil = adaptors_common.LazyImport('psutil')
|
|
69
69
|
|
|
@@ -73,6 +73,11 @@ logging.getLogger('httpx').setLevel(logging.CRITICAL)
|
|
|
73
73
|
_LINE_PROCESSED_KEY = 'line_processed'
|
|
74
74
|
|
|
75
75
|
|
|
76
|
+
def reload_config() -> None:
|
|
77
|
+
"""Reloads the client-side config."""
|
|
78
|
+
skypilot_config.safe_reload_config()
|
|
79
|
+
|
|
80
|
+
|
|
76
81
|
def stream_response(request_id: Optional[str],
|
|
77
82
|
response: 'requests.Response',
|
|
78
83
|
output_stream: Optional['io.TextIOBase'] = None,
|
|
@@ -372,7 +377,7 @@ def launch(
|
|
|
372
377
|
idle_minutes_to_autostop: Optional[int] = None,
|
|
373
378
|
dryrun: bool = False,
|
|
374
379
|
down: bool = False, # pylint: disable=redefined-outer-name
|
|
375
|
-
backend: Optional[backends.Backend] = None,
|
|
380
|
+
backend: Optional['backends.Backend'] = None,
|
|
376
381
|
optimize_target: common.OptimizeTarget = common.OptimizeTarget.COST,
|
|
377
382
|
no_setup: bool = False,
|
|
378
383
|
clone_disk_from: Optional[str] = None,
|
|
@@ -530,7 +535,7 @@ def _launch(
|
|
|
530
535
|
idle_minutes_to_autostop: Optional[int] = None,
|
|
531
536
|
dryrun: bool = False,
|
|
532
537
|
down: bool = False, # pylint: disable=redefined-outer-name
|
|
533
|
-
backend: Optional[backends.Backend] = None,
|
|
538
|
+
backend: Optional['backends.Backend'] = None,
|
|
534
539
|
optimize_target: common.OptimizeTarget = common.OptimizeTarget.COST,
|
|
535
540
|
no_setup: bool = False,
|
|
536
541
|
clone_disk_from: Optional[str] = None,
|
|
@@ -639,7 +644,7 @@ def exec( # pylint: disable=redefined-builtin
|
|
|
639
644
|
cluster_name: Optional[str] = None,
|
|
640
645
|
dryrun: bool = False,
|
|
641
646
|
down: bool = False, # pylint: disable=redefined-outer-name
|
|
642
|
-
backend: Optional[backends.Backend] = None,
|
|
647
|
+
backend: Optional['backends.Backend'] = None,
|
|
643
648
|
) -> server_common.RequestId:
|
|
644
649
|
"""Executes a task on an existing cluster.
|
|
645
650
|
|
|
@@ -1849,6 +1854,18 @@ def api_cancel(request_ids: Optional[Union[str, List[str]]] = None,
|
|
|
1849
1854
|
return server_common.get_request_id(response)
|
|
1850
1855
|
|
|
1851
1856
|
|
|
1857
|
+
def _local_api_server_running(kill: bool = False) -> bool:
|
|
1858
|
+
"""Checks if the local api server is running."""
|
|
1859
|
+
for process in psutil.process_iter(attrs=['pid', 'cmdline']):
|
|
1860
|
+
cmdline = process.info['cmdline']
|
|
1861
|
+
if cmdline and server_common.API_SERVER_CMD in ' '.join(cmdline):
|
|
1862
|
+
if kill:
|
|
1863
|
+
subprocess_utils.kill_children_processes(
|
|
1864
|
+
parent_pids=[process.pid], force=True)
|
|
1865
|
+
return True
|
|
1866
|
+
return False
|
|
1867
|
+
|
|
1868
|
+
|
|
1852
1869
|
@usage_lib.entrypoint
|
|
1853
1870
|
@annotations.client_api
|
|
1854
1871
|
def api_status(
|
|
@@ -1867,6 +1884,10 @@ def api_status(
|
|
|
1867
1884
|
Returns:
|
|
1868
1885
|
A list of request payloads.
|
|
1869
1886
|
"""
|
|
1887
|
+
if server_common.is_api_server_local() and not _local_api_server_running():
|
|
1888
|
+
logger.info('SkyPilot API server is not running.')
|
|
1889
|
+
return []
|
|
1890
|
+
|
|
1870
1891
|
body = payloads.RequestStatusBody(request_ids=request_ids,
|
|
1871
1892
|
all_status=all_status)
|
|
1872
1893
|
response = server_common.make_authenticated_request(
|
|
@@ -1987,13 +2008,7 @@ def api_stop() -> None:
|
|
|
1987
2008
|
f'Cannot kill the API server at {server_url} because it is not '
|
|
1988
2009
|
f'the default SkyPilot API server started locally.')
|
|
1989
2010
|
|
|
1990
|
-
found =
|
|
1991
|
-
for process in psutil.process_iter(attrs=['pid', 'cmdline']):
|
|
1992
|
-
cmdline = process.info['cmdline']
|
|
1993
|
-
if cmdline and server_common.API_SERVER_CMD in ' '.join(cmdline):
|
|
1994
|
-
subprocess_utils.kill_children_processes(parent_pids=[process.pid],
|
|
1995
|
-
force=True)
|
|
1996
|
-
found = True
|
|
2011
|
+
found = _local_api_server_running(kill=True)
|
|
1997
2012
|
|
|
1998
2013
|
# Remove the database for requests.
|
|
1999
2014
|
server_common.clear_local_api_server_database()
|
|
@@ -2062,6 +2077,22 @@ def _save_config_updates(endpoint: Optional[str] = None,
|
|
|
2062
2077
|
skypilot_config.reload_config()
|
|
2063
2078
|
|
|
2064
2079
|
|
|
2080
|
+
def _clear_api_server_config() -> None:
|
|
2081
|
+
"""Clear endpoint and service account token from config file."""
|
|
2082
|
+
config_path = pathlib.Path(
|
|
2083
|
+
skypilot_config.get_user_config_path()).expanduser()
|
|
2084
|
+
with filelock.FileLock(config_path.with_suffix('.lock')):
|
|
2085
|
+
if not config_path.exists():
|
|
2086
|
+
return
|
|
2087
|
+
|
|
2088
|
+
config = skypilot_config.get_user_config()
|
|
2089
|
+
config = dict(config)
|
|
2090
|
+
del config['api_server']
|
|
2091
|
+
|
|
2092
|
+
common_utils.dump_yaml(str(config_path), config, blank=True)
|
|
2093
|
+
skypilot_config.reload_config()
|
|
2094
|
+
|
|
2095
|
+
|
|
2065
2096
|
def _validate_endpoint(endpoint: Optional[str]) -> str:
|
|
2066
2097
|
"""Validate and normalize the endpoint URL."""
|
|
2067
2098
|
if endpoint is None:
|
|
@@ -2318,3 +2349,22 @@ def api_login(endpoint: Optional[str] = None,
|
|
|
2318
2349
|
endpoint)
|
|
2319
2350
|
_show_logged_in_message(endpoint, dashboard_url, final_api_server_info.user,
|
|
2320
2351
|
server_status)
|
|
2352
|
+
|
|
2353
|
+
|
|
2354
|
+
@usage_lib.entrypoint
|
|
2355
|
+
@annotations.client_api
|
|
2356
|
+
def api_logout() -> None:
|
|
2357
|
+
"""Logout of the API server.
|
|
2358
|
+
|
|
2359
|
+
Clears all cookies and settings stored in ~/.sky/config.yaml"""
|
|
2360
|
+
if server_common.is_api_server_local():
|
|
2361
|
+
with ux_utils.print_exception_no_traceback():
|
|
2362
|
+
raise RuntimeError('Local api server cannot be logged out. '
|
|
2363
|
+
'Use `sky api stop` instead.')
|
|
2364
|
+
|
|
2365
|
+
# no need to clear cookies if it doesn't exist.
|
|
2366
|
+
server_common.set_api_cookie_jar(cookiejar.MozillaCookieJar(),
|
|
2367
|
+
create_if_not_exists=False)
|
|
2368
|
+
_clear_api_server_config()
|
|
2369
|
+
logger.info(f'{colorama.Fore.GREEN}Logged out of SkyPilot API server.'
|
|
2370
|
+
f'{colorama.Style.RESET_ALL}')
|
sky/clouds/aws.py
CHANGED
|
@@ -404,15 +404,18 @@ class AWS(clouds.Cloud):
|
|
|
404
404
|
return cost
|
|
405
405
|
|
|
406
406
|
@classmethod
|
|
407
|
-
def get_default_instance_type(
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
407
|
+
def get_default_instance_type(cls,
|
|
408
|
+
cpus: Optional[str] = None,
|
|
409
|
+
memory: Optional[str] = None,
|
|
410
|
+
disk_tier: Optional[
|
|
411
|
+
resources_utils.DiskTier] = None,
|
|
412
|
+
region: Optional[str] = None,
|
|
413
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
413
414
|
return catalog.get_default_instance_type(cpus=cpus,
|
|
414
415
|
memory=memory,
|
|
415
416
|
disk_tier=disk_tier,
|
|
417
|
+
region=region,
|
|
418
|
+
zone=zone,
|
|
416
419
|
clouds='aws')
|
|
417
420
|
|
|
418
421
|
# TODO: factor the following three methods, as they are the same logic
|
|
@@ -554,7 +557,9 @@ class AWS(clouds.Cloud):
|
|
|
554
557
|
default_instance_type = AWS.get_default_instance_type(
|
|
555
558
|
cpus=resources.cpus,
|
|
556
559
|
memory=resources.memory,
|
|
557
|
-
disk_tier=resources.disk_tier
|
|
560
|
+
disk_tier=resources.disk_tier,
|
|
561
|
+
region=resources.region,
|
|
562
|
+
zone=resources.zone)
|
|
558
563
|
if default_instance_type is None:
|
|
559
564
|
return resources_utils.FeasibleResources([], [], None)
|
|
560
565
|
else:
|
sky/clouds/azure.py
CHANGED
|
@@ -154,15 +154,18 @@ class Azure(clouds.Cloud):
|
|
|
154
154
|
return cost
|
|
155
155
|
|
|
156
156
|
@classmethod
|
|
157
|
-
def get_default_instance_type(
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
157
|
+
def get_default_instance_type(cls,
|
|
158
|
+
cpus: Optional[str] = None,
|
|
159
|
+
memory: Optional[str] = None,
|
|
160
|
+
disk_tier: Optional[
|
|
161
|
+
resources_utils.DiskTier] = None,
|
|
162
|
+
region: Optional[str] = None,
|
|
163
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
163
164
|
return catalog.get_default_instance_type(cpus=cpus,
|
|
164
165
|
memory=memory,
|
|
165
166
|
disk_tier=disk_tier,
|
|
167
|
+
region=region,
|
|
168
|
+
zone=zone,
|
|
166
169
|
clouds='azure')
|
|
167
170
|
|
|
168
171
|
@classmethod
|
|
@@ -499,7 +502,9 @@ class Azure(clouds.Cloud):
|
|
|
499
502
|
default_instance_type = Azure.get_default_instance_type(
|
|
500
503
|
cpus=resources.cpus,
|
|
501
504
|
memory=resources.memory,
|
|
502
|
-
disk_tier=resources.disk_tier
|
|
505
|
+
disk_tier=resources.disk_tier,
|
|
506
|
+
region=resources.region,
|
|
507
|
+
zone=resources.zone)
|
|
503
508
|
if default_instance_type is None:
|
|
504
509
|
return resources_utils.FeasibleResources([], [], None)
|
|
505
510
|
else:
|
sky/clouds/cloud.py
CHANGED
|
@@ -341,14 +341,15 @@ class Cloud:
|
|
|
341
341
|
raise NotImplementedError
|
|
342
342
|
|
|
343
343
|
@classmethod
|
|
344
|
-
def get_default_instance_type(
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
344
|
+
def get_default_instance_type(cls,
|
|
345
|
+
cpus: Optional[str] = None,
|
|
346
|
+
memory: Optional[str] = None,
|
|
347
|
+
disk_tier: Optional[
|
|
348
|
+
resources_utils.DiskTier] = None,
|
|
349
|
+
region: Optional[str] = None,
|
|
350
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
351
|
+
"""Returns the default instance type with the given #vCPUs, memory,
|
|
352
|
+
disk tier, region, and zone.
|
|
352
353
|
|
|
353
354
|
For example, if cpus='4', this method returns the default instance type
|
|
354
355
|
with 4 vCPUs. If cpus='4+', this method returns the default instance
|
sky/clouds/cudo.py
CHANGED
|
@@ -175,14 +175,18 @@ class Cudo(clouds.Cloud):
|
|
|
175
175
|
return 0.0
|
|
176
176
|
|
|
177
177
|
@classmethod
|
|
178
|
-
def get_default_instance_type(
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
178
|
+
def get_default_instance_type(cls,
|
|
179
|
+
cpus: Optional[str] = None,
|
|
180
|
+
memory: Optional[str] = None,
|
|
181
|
+
disk_tier: Optional[
|
|
182
|
+
resources_utils.DiskTier] = None,
|
|
183
|
+
region: Optional[str] = None,
|
|
184
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
184
185
|
return catalog.get_default_instance_type(cpus=cpus,
|
|
185
186
|
memory=memory,
|
|
187
|
+
disk_tier=disk_tier,
|
|
188
|
+
region=region,
|
|
189
|
+
zone=zone,
|
|
186
190
|
clouds='cudo')
|
|
187
191
|
|
|
188
192
|
@classmethod
|
|
@@ -251,7 +255,9 @@ class Cudo(clouds.Cloud):
|
|
|
251
255
|
default_instance_type = Cudo.get_default_instance_type(
|
|
252
256
|
cpus=resources.cpus,
|
|
253
257
|
memory=resources.memory,
|
|
254
|
-
disk_tier=resources.disk_tier
|
|
258
|
+
disk_tier=resources.disk_tier,
|
|
259
|
+
region=resources.region,
|
|
260
|
+
zone=resources.zone)
|
|
255
261
|
if default_instance_type is None:
|
|
256
262
|
return resources_utils.FeasibleResources([], [], None)
|
|
257
263
|
else:
|
sky/clouds/do.py
CHANGED
|
@@ -156,16 +156,19 @@ class DO(clouds.Cloud):
|
|
|
156
156
|
return self._REPR
|
|
157
157
|
|
|
158
158
|
@classmethod
|
|
159
|
-
def get_default_instance_type(
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
159
|
+
def get_default_instance_type(cls,
|
|
160
|
+
cpus: Optional[str] = None,
|
|
161
|
+
memory: Optional[str] = None,
|
|
162
|
+
disk_tier: Optional[
|
|
163
|
+
resources_utils.DiskTier] = None,
|
|
164
|
+
region: Optional[str] = None,
|
|
165
|
+
zone: Optional[str] = None) -> Optional[str]:
|
|
165
166
|
"""Returns the default instance type for DO."""
|
|
166
167
|
return catalog.get_default_instance_type(cpus=cpus,
|
|
167
168
|
memory=memory,
|
|
168
169
|
disk_tier=disk_tier,
|
|
170
|
+
region=region,
|
|
171
|
+
zone=zone,
|
|
169
172
|
clouds='DO')
|
|
170
173
|
|
|
171
174
|
@classmethod
|
|
@@ -246,7 +249,9 @@ class DO(clouds.Cloud):
|
|
|
246
249
|
default_instance_type = DO.get_default_instance_type(
|
|
247
250
|
cpus=resources.cpus,
|
|
248
251
|
memory=resources.memory,
|
|
249
|
-
disk_tier=resources.disk_tier
|
|
252
|
+
disk_tier=resources.disk_tier,
|
|
253
|
+
region=resources.region,
|
|
254
|
+
zone=resources.zone)
|
|
250
255
|
if default_instance_type is None:
|
|
251
256
|
return resources_utils.FeasibleResources([], [], None)
|
|
252
257
|
else:
|