skypilot-nightly 1.0.0.dev20251012__py3-none-any.whl → 1.0.0.dev20251013__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (45) hide show
  1. sky/__init__.py +4 -2
  2. sky/adaptors/shadeform.py +89 -0
  3. sky/authentication.py +43 -0
  4. sky/backends/backend_utils.py +2 -0
  5. sky/backends/cloud_vm_ray_backend.py +2 -0
  6. sky/catalog/data_fetchers/fetch_shadeform.py +142 -0
  7. sky/catalog/shadeform_catalog.py +165 -0
  8. sky/clouds/__init__.py +2 -0
  9. sky/clouds/shadeform.py +393 -0
  10. sky/dashboard/out/404.html +1 -1
  11. sky/dashboard/out/_next/static/chunks/{webpack-66f23594d38c7f16.js → webpack-ac3a34c8f9fef041.js} +1 -1
  12. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  13. sky/dashboard/out/clusters/[cluster].html +1 -1
  14. sky/dashboard/out/clusters.html +1 -1
  15. sky/dashboard/out/config.html +1 -1
  16. sky/dashboard/out/index.html +1 -1
  17. sky/dashboard/out/infra/[context].html +1 -1
  18. sky/dashboard/out/infra.html +1 -1
  19. sky/dashboard/out/jobs/[job].html +1 -1
  20. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  21. sky/dashboard/out/jobs.html +1 -1
  22. sky/dashboard/out/users.html +1 -1
  23. sky/dashboard/out/volumes.html +1 -1
  24. sky/dashboard/out/workspace/new.html +1 -1
  25. sky/dashboard/out/workspaces/[name].html +1 -1
  26. sky/dashboard/out/workspaces.html +1 -1
  27. sky/provision/__init__.py +1 -0
  28. sky/provision/shadeform/__init__.py +11 -0
  29. sky/provision/shadeform/config.py +12 -0
  30. sky/provision/shadeform/instance.py +351 -0
  31. sky/provision/shadeform/shadeform_utils.py +83 -0
  32. sky/server/requests/executor.py +22 -2
  33. sky/server/server.py +4 -3
  34. sky/setup_files/dependencies.py +1 -0
  35. sky/skylet/constants.py +1 -1
  36. sky/templates/shadeform-ray.yml.j2 +72 -0
  37. sky/utils/context_utils.py +13 -9
  38. {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/METADATA +35 -34
  39. {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/RECORD +45 -36
  40. /sky/dashboard/out/_next/static/{yOfMelBaFp8uL5F9atyAK → MtlDUf-nH1hhcy7xwbCj3}/_buildManifest.js +0 -0
  41. /sky/dashboard/out/_next/static/{yOfMelBaFp8uL5F9atyAK → MtlDUf-nH1hhcy7xwbCj3}/_ssgManifest.js +0 -0
  42. {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/WHEEL +0 -0
  43. {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/entry_points.txt +0 -0
  44. {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/licenses/LICENSE +0 -0
  45. {skypilot_nightly-1.0.0.dev20251012.dist-info → skypilot_nightly-1.0.0.dev20251013.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-835d14ba94808f79.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"yOfMelBaFp8uL5F9atyAK","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-ac3a34c8f9fef041.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/volumes-835d14ba94808f79.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/volumes","query":{},"buildId":"MtlDUf-nH1hhcy7xwbCj3","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"yOfMelBaFp8uL5F9atyAK","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-ac3a34c8f9fef041.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspace/new","query":{},"buildId":"MtlDUf-nH1hhcy7xwbCj3","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/7359-c8d04e06886000b3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-01359c57e018caa4.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-66237729cdf9749e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f6818c84ed8f1c86.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-4b4d5e824b7f9d3c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1121-d0782b9251f0fcd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/3015-7e0e8f06bb2f881c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-3b40c39626f99c89.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-e8688c35c06f0ac5.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"yOfMelBaFp8uL5F9atyAK","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-ac3a34c8f9fef041.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/616-3d59f75e2ccf9321.js" defer=""></script><script src="/dashboard/_next/static/chunks/6130-2be46d70a38f1e82.js" defer=""></script><script src="/dashboard/_next/static/chunks/5739-d67458fcb1386c92.js" defer=""></script><script src="/dashboard/_next/static/chunks/7411-b15471acd2cba716.js" defer=""></script><script src="/dashboard/_next/static/chunks/1272-1ef0bf0237faccdb.js" defer=""></script><script src="/dashboard/_next/static/chunks/7359-c8d04e06886000b3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6989-01359c57e018caa4.js" defer=""></script><script src="/dashboard/_next/static/chunks/3850-ff4a9a69d978632b.js" defer=""></script><script src="/dashboard/_next/static/chunks/8969-66237729cdf9749e.js" defer=""></script><script src="/dashboard/_next/static/chunks/6990-f6818c84ed8f1c86.js" defer=""></script><script src="/dashboard/_next/static/chunks/6135-4b4d5e824b7f9d3c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1121-d0782b9251f0fcd3.js" defer=""></script><script src="/dashboard/_next/static/chunks/6601-06114c982db410b6.js" defer=""></script><script src="/dashboard/_next/static/chunks/3015-7e0e8f06bb2f881c.js" defer=""></script><script src="/dashboard/_next/static/chunks/1141-3b40c39626f99c89.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces/%5Bname%5D-e8688c35c06f0ac5.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces/[name]","query":{},"buildId":"MtlDUf-nH1hhcy7xwbCj3","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-66f23594d38c7f16.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-69c80d677d3c2949.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/yOfMelBaFp8uL5F9atyAK/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"yOfMelBaFp8uL5F9atyAK","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-ac3a34c8f9fef041.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/workspaces-69c80d677d3c2949.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/MtlDUf-nH1hhcy7xwbCj3/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{}},"page":"/workspaces","query":{},"buildId":"MtlDUf-nH1hhcy7xwbCj3","assetPrefix":"/dashboard","nextExport":true,"autoExport":true,"isFallback":false,"scriptLoader":[]}</script></body></html>
sky/provision/__init__.py CHANGED
@@ -28,6 +28,7 @@ from sky.provision import primeintellect
28
28
  from sky.provision import runpod
29
29
  from sky.provision import scp
30
30
  from sky.provision import seeweb
31
+ from sky.provision import shadeform
31
32
  from sky.provision import ssh
32
33
  from sky.provision import vast
33
34
  from sky.provision import vsphere
@@ -0,0 +1,11 @@
1
+ """Shadeform provisioner."""
2
+
3
+ from sky.provision.shadeform.config import bootstrap_instances
4
+ from sky.provision.shadeform.instance import cleanup_ports
5
+ from sky.provision.shadeform.instance import get_cluster_info
6
+ from sky.provision.shadeform.instance import open_ports
7
+ from sky.provision.shadeform.instance import query_instances
8
+ from sky.provision.shadeform.instance import run_instances
9
+ from sky.provision.shadeform.instance import stop_instances
10
+ from sky.provision.shadeform.instance import terminate_instances
11
+ from sky.provision.shadeform.instance import wait_instances
@@ -0,0 +1,12 @@
1
+ """Shadeform configuration bootstrapping."""
2
+
3
+ from sky.provision import common
4
+
5
+
6
+ def bootstrap_instances(
7
+ region: str, cluster_name: str,
8
+ config: common.ProvisionConfig) -> common.ProvisionConfig:
9
+ """Bootstraps instances for the given cluster."""
10
+ del region, cluster_name # unused
11
+
12
+ return config
@@ -0,0 +1,351 @@
1
+ """Shadeform instance provisioning."""
2
+ import time
3
+ from typing import Any, Dict, List, Optional, Tuple
4
+
5
+ import requests
6
+
7
+ from sky import sky_logging
8
+ from sky.provision import common
9
+ from sky.provision.shadeform import shadeform_utils
10
+ from sky.utils import status_lib
11
+
12
+ POLL_INTERVAL = 10
13
+ INSTANCE_READY_TIMEOUT = 3600
14
+
15
+ logger = sky_logging.init_logger(__name__)
16
+
17
+ # Status mapping from Shadeform to SkyPilot
18
+ SHADEFORM_STATUS_MAP = {
19
+ 'creating': status_lib.ClusterStatus.INIT,
20
+ 'pending_provider': status_lib.ClusterStatus.INIT,
21
+ 'pending': status_lib.ClusterStatus.INIT,
22
+ 'active': status_lib.ClusterStatus.UP,
23
+ 'deleted': status_lib.ClusterStatus.STOPPED,
24
+ }
25
+
26
+
27
+ def _get_cluster_instances(cluster_name_on_cloud: str) -> Dict[str, Any]:
28
+ """Get all instances belonging to a cluster."""
29
+ try:
30
+ response = shadeform_utils.get_instances()
31
+ instances = response.get('instances', [])
32
+
33
+ cluster_instances = {}
34
+ possible_names = [
35
+ f'{cluster_name_on_cloud}-head', f'{cluster_name_on_cloud}-worker'
36
+ ]
37
+
38
+ for instance in instances:
39
+ if instance.get('name') in possible_names:
40
+ cluster_instances[instance['id']] = instance
41
+
42
+ return cluster_instances
43
+ except (ValueError, KeyError, requests.exceptions.RequestException) as e:
44
+ logger.warning(f'Failed to get instances: {e}')
45
+ return {}
46
+
47
+
48
+ def _get_head_instance_id(instances: Dict[str, Any]) -> Optional[str]:
49
+ """Get the head instance ID from a list of instances."""
50
+ for instance_id, instance in instances.items():
51
+ if instance.get('name', '').endswith('-head'):
52
+ return instance_id
53
+ return None
54
+
55
+
56
+ def _wait_for_instances_ready(cluster_name_on_cloud: str,
57
+ expected_count: int,
58
+ timeout: int = INSTANCE_READY_TIMEOUT) -> bool:
59
+ """Wait for instances to be ready (active state with SSH access)."""
60
+ start_time = time.time()
61
+
62
+ while time.time() - start_time < timeout:
63
+ instances = _get_cluster_instances(cluster_name_on_cloud)
64
+ ready_count = 0
65
+
66
+ for instance in instances.values():
67
+ if (instance.get('status') == 'active' and
68
+ instance.get('ip') is not None and
69
+ instance.get('ssh_port') is not None):
70
+ ready_count += 1
71
+
72
+ logger.info(f'Waiting for instances to be ready: '
73
+ f'({ready_count}/{expected_count})')
74
+
75
+ if ready_count >= expected_count:
76
+ return True
77
+
78
+ time.sleep(POLL_INTERVAL)
79
+
80
+ return False
81
+
82
+
83
+ def run_instances(region: str, cluster_name: str, cluster_name_on_cloud: str,
84
+ config: common.ProvisionConfig) -> common.ProvisionRecord:
85
+ """Run instances for the given cluster."""
86
+ del cluster_name # unused - we use cluster_name_on_cloud
87
+ logger.info(f'Running instances for cluster {cluster_name_on_cloud} '
88
+ f'in region {region}')
89
+ logger.debug(f'DEBUG: region type={type(region)}, value={region!r}')
90
+ logger.debug(f'DEBUG: config node_config={config.node_config}')
91
+
92
+ # Check existing instances
93
+ existing_instances = _get_cluster_instances(cluster_name_on_cloud)
94
+ head_instance_id = _get_head_instance_id(existing_instances)
95
+
96
+ # Filter active instances
97
+ active_instances = {
98
+ iid: inst
99
+ for iid, inst in existing_instances.items()
100
+ if inst.get('status') == 'active'
101
+ }
102
+
103
+ current_count = len(active_instances)
104
+ target_count = config.count
105
+
106
+ logger.info(f'Current instances: {current_count}, target: {target_count}')
107
+
108
+ if current_count >= target_count:
109
+ if head_instance_id is None:
110
+ raise RuntimeError(
111
+ f'Cluster {cluster_name_on_cloud} has no head node')
112
+ logger.info(f'Cluster already has {current_count} instances, '
113
+ f'no need to start more')
114
+ return common.ProvisionRecord(
115
+ provider_name='shadeform',
116
+ cluster_name=cluster_name_on_cloud,
117
+ region=region,
118
+ zone=None, # Shadeform doesn't use separate zones
119
+ head_instance_id=head_instance_id,
120
+ resumed_instance_ids=[],
121
+ created_instance_ids=[])
122
+
123
+ # Create new instances
124
+ to_create = target_count - current_count
125
+ created_instance_ids = []
126
+
127
+ for _ in range(to_create):
128
+ node_type = 'head' if head_instance_id is None else 'worker'
129
+ instance_name = f'{cluster_name_on_cloud}-{node_type}'
130
+
131
+ # Extract configuration from node_config
132
+
133
+ # The node_config contains instance specs including InstanceType
134
+ # which follows the format: {cloud_provider}_{instance_type}
135
+ # (e.g., "massedcompute_A6000_basex2")
136
+ node_config = config.node_config
137
+ assert 'InstanceType' in node_config, \
138
+ 'InstanceType must be present in node_config'
139
+
140
+ # Parse the instance type to extract cloud provider and instance specs
141
+ # Expected format: "{cloud}_{instance_type}" where cloud is provider
142
+ # (massedcompute, scaleway, lambda, etc.)
143
+ instance_type_full = node_config['InstanceType']
144
+ assert (isinstance(instance_type_full, str) and
145
+ '_' in instance_type_full), \
146
+ f'InstanceType must be in format cloud_instance_type, got: ' \
147
+ f'{instance_type_full}'
148
+
149
+ instance_type_split = instance_type_full.split('_')
150
+ assert len(instance_type_split) >= 2, \
151
+ f'InstanceType must contain at least one underscore, got: ' \
152
+ f'{instance_type_full}'
153
+
154
+ # Extract cloud provider (first part) and instance type (remaining)
155
+ # Example: "massedcompute_A6000-basex2" -> cloud="massedcompute",
156
+ # instance_type="A6000-basex2"
157
+ cloud = instance_type_split[0]
158
+ instance_type = '_'.join(instance_type_split[1:])
159
+
160
+ # Shadeform uses underscores instead of hyphens
161
+ instance_type = instance_type.replace('-', '_')
162
+
163
+ if instance_type.endswith('B'):
164
+ instance_type = instance_type[:-1]
165
+
166
+ # Replace "GBx" with "Gx" (case sensitive)
167
+ if 'GBx' in instance_type:
168
+ instance_type = instance_type.replace('GBx', 'Gx')
169
+
170
+ assert cloud, 'Cloud provider cannot be empty'
171
+ assert instance_type, 'Instance type cannot be empty'
172
+
173
+ # Get SSH key ID for authentication - this is optional and may be None
174
+ ssh_key_id = config.authentication_config.get('ssh_key_id')
175
+
176
+ create_config = {
177
+ 'cloud': cloud,
178
+ 'region': region,
179
+ 'shade_instance_type': instance_type,
180
+ 'name': instance_name,
181
+ 'ssh_key_id': ssh_key_id
182
+ }
183
+
184
+ try:
185
+ logger.info(f'Creating {node_type} instance: {instance_name}')
186
+ response = shadeform_utils.create_instance(create_config)
187
+ instance_id = response['id']
188
+ created_instance_ids.append(instance_id)
189
+
190
+ if head_instance_id is None:
191
+ head_instance_id = instance_id
192
+
193
+ logger.info(f'Created instance {instance_id} ({node_type})')
194
+
195
+ except Exception as e:
196
+ logger.error(f'Failed to create instance: {e}')
197
+ # Clean up any created instances
198
+ for iid in created_instance_ids:
199
+ try:
200
+ shadeform_utils.delete_instance(iid)
201
+ except requests.exceptions.RequestException as cleanup_e:
202
+ logger.warning(
203
+ f'Failed to cleanup instance {iid}: {cleanup_e}')
204
+ raise
205
+
206
+ # Wait for all instances to be ready
207
+ logger.info('Waiting for instances to become ready...')
208
+ if not _wait_for_instances_ready(cluster_name_on_cloud, target_count):
209
+ raise RuntimeError('Timed out waiting for instances to be ready')
210
+
211
+ assert head_instance_id is not None, 'head_instance_id should not be None'
212
+
213
+ return common.ProvisionRecord(provider_name='shadeform',
214
+ cluster_name=cluster_name_on_cloud,
215
+ region=region,
216
+ zone=region,
217
+ head_instance_id=head_instance_id,
218
+ resumed_instance_ids=[],
219
+ created_instance_ids=created_instance_ids)
220
+
221
+
222
+ def wait_instances(region: str, cluster_name_on_cloud: str,
223
+ state: Optional[status_lib.ClusterStatus]) -> None:
224
+ """Wait for instances to reach the specified state."""
225
+ del region, cluster_name_on_cloud, state # unused
226
+ # For Shadeform, instances are ready when they reach 'active' status
227
+ # This is already handled in run_instances
228
+
229
+
230
+ def stop_instances(cluster_name_on_cloud: str,
231
+ provider_config: Optional[Dict[str, Any]] = None,
232
+ worker_only: bool = False) -> None:
233
+ """Stop instances (not supported by Shadeform)."""
234
+ del cluster_name_on_cloud, provider_config, worker_only # unused
235
+ raise NotImplementedError(
236
+ 'Stopping instances is not supported by Shadeform')
237
+
238
+
239
+ def terminate_instances(cluster_name_on_cloud: str,
240
+ provider_config: Optional[Dict[str, Any]] = None,
241
+ worker_only: bool = False) -> None:
242
+ """Terminate instances."""
243
+ del provider_config # unused
244
+ logger.info(f'Terminating instances for cluster {cluster_name_on_cloud}')
245
+
246
+ instances = _get_cluster_instances(cluster_name_on_cloud)
247
+
248
+ if not instances:
249
+ logger.info(f'No instances found for cluster {cluster_name_on_cloud}')
250
+ return
251
+
252
+ instances_to_delete = instances
253
+ if worker_only:
254
+ # Only delete worker nodes, not head
255
+ instances_to_delete = {
256
+ iid: inst
257
+ for iid, inst in instances.items()
258
+ if not inst.get('name', '').endswith('-head')
259
+ }
260
+
261
+ for instance_id, instance in instances_to_delete.items():
262
+ try:
263
+ logger.info(
264
+ f'Terminating instance {instance_id} ({instance.get("name")})')
265
+ shadeform_utils.delete_instance(instance_id)
266
+ except requests.exceptions.RequestException as e:
267
+ logger.warning(f'Failed to terminate instance {instance_id}: {e}')
268
+
269
+
270
+ def get_cluster_info(
271
+ region: str,
272
+ cluster_name_on_cloud: str,
273
+ provider_config: Optional[Dict[str, Any]] = None) -> common.ClusterInfo:
274
+ """Get cluster information."""
275
+ del region, provider_config # unused
276
+ instances = _get_cluster_instances(cluster_name_on_cloud)
277
+
278
+ if not instances:
279
+ return common.ClusterInfo(instances={},
280
+ head_instance_id=None,
281
+ provider_name='shadeform')
282
+
283
+ head_instance_id = _get_head_instance_id(instances)
284
+
285
+ # Convert instance format for ClusterInfo
286
+ cluster_instances = {}
287
+ for instance_id, instance in instances.items():
288
+ instance_info = common.InstanceInfo(
289
+ instance_id=instance_id,
290
+ internal_ip=instance.get('ip', ''),
291
+ external_ip=instance.get('ip', ''),
292
+ ssh_port=instance.get('ssh_port', 22),
293
+ tags={},
294
+ )
295
+ # ClusterInfo expects Dict[InstanceId, List[InstanceInfo]]
296
+ cluster_instances[instance_id] = [instance_info]
297
+
298
+ ssh_user = 'shadeform' # default
299
+ if head_instance_id is not None:
300
+ ssh_user = instances.get(head_instance_id,
301
+ {}).get('ssh_user', 'shadeform')
302
+
303
+ return common.ClusterInfo(instances=cluster_instances,
304
+ head_instance_id=head_instance_id,
305
+ provider_name='shadeform',
306
+ ssh_user=ssh_user)
307
+
308
+
309
+ def query_instances(
310
+ cluster_name: str,
311
+ cluster_name_on_cloud: str,
312
+ provider_config: Optional[Dict[str, Any]] = None,
313
+ non_terminated_only: bool = True,
314
+ ) -> Dict[str, Tuple[Optional['status_lib.ClusterStatus'], Optional[str]]]:
315
+ """Query the status of instances."""
316
+ del cluster_name, provider_config # unused
317
+ instances = _get_cluster_instances(cluster_name_on_cloud)
318
+
319
+ if not instances:
320
+ return {}
321
+
322
+ status_map: Dict[str, Tuple[Optional['status_lib.ClusterStatus'],
323
+ Optional[str]]] = {}
324
+ for instance_id, instance in instances.items():
325
+ shadeform_status = instance.get('status', 'unknown')
326
+ sky_status = SHADEFORM_STATUS_MAP.get(shadeform_status,
327
+ status_lib.ClusterStatus.INIT)
328
+
329
+ if (non_terminated_only and
330
+ sky_status == status_lib.ClusterStatus.STOPPED):
331
+ continue
332
+
333
+ status_map[instance_id] = (sky_status, None)
334
+
335
+ return status_map
336
+
337
+
338
+ def open_ports(cluster_name_on_cloud: str,
339
+ ports: List[str],
340
+ provider_config: Optional[Dict[str, Any]] = None) -> None:
341
+ """Open ports (not supported by Shadeform)."""
342
+ del cluster_name_on_cloud, ports, provider_config # unused
343
+ raise NotImplementedError()
344
+
345
+
346
+ def cleanup_ports(cluster_name_on_cloud: str,
347
+ ports: List[str],
348
+ provider_config: Optional[Dict[str, Any]] = None) -> None:
349
+ """Cleanup ports (not supported by Shadeform)."""
350
+ del cluster_name_on_cloud, ports, provider_config # unused
351
+ # Nothing to cleanup since we don't support dynamic port opening
@@ -0,0 +1,83 @@
1
+ """Shadeform API utilities."""
2
+
3
+ import os
4
+ from typing import Any, Dict
5
+
6
+ from sky.adaptors import common
7
+
8
+ # Lazy import to avoid dependency on external packages
9
+ requests = common.LazyImport('requests')
10
+
11
+ # Shadeform API configuration
12
+ SHADEFORM_API_BASE = 'https://api.shadeform.ai/v1'
13
+ SHADEFORM_API_KEY_PATH = '~/.shadeform/api_key'
14
+
15
+
16
+ def get_api_key() -> str:
17
+ """Get Shadeform API key from file."""
18
+ api_key_path = os.path.expanduser(SHADEFORM_API_KEY_PATH)
19
+ if not os.path.exists(api_key_path):
20
+ raise FileNotFoundError(
21
+ f'Shadeform API key not found at {api_key_path}. '
22
+ 'Please save your API key to this file.')
23
+
24
+ with open(api_key_path, 'r', encoding='utf-8') as f:
25
+ api_key = f.read().strip()
26
+
27
+ if not api_key:
28
+ raise ValueError(f'Shadeform API key is empty in {api_key_path}')
29
+
30
+ return api_key
31
+
32
+
33
+ def make_request(method: str, endpoint: str, **kwargs) -> Any:
34
+ """Make a request to the Shadeform API."""
35
+ url = f'{SHADEFORM_API_BASE}/{endpoint.lstrip("/")}'
36
+ headers = {
37
+ 'X-API-KEY': get_api_key(),
38
+ 'Content-Type': 'application/json',
39
+ }
40
+
41
+ response = requests.request(method, url, headers=headers, **kwargs)
42
+ response.raise_for_status()
43
+
44
+ # Some APIs (like delete) return empty responses with just 200 status
45
+ if response.text.strip():
46
+ return response.json()
47
+ else:
48
+ # Return empty dict for empty responses (e.g., delete operations)
49
+ return {}
50
+
51
+
52
+ def get_instances() -> Dict[str, Any]:
53
+ """Get all instances."""
54
+ return make_request('GET', '/instances')
55
+
56
+
57
+ def get_instance_info(instance_id: str) -> Dict[str, Any]:
58
+ """Get information about a specific instance."""
59
+ return make_request('GET', f'/instances/{instance_id}/info')
60
+
61
+
62
+ def create_instance(config: Dict[str, Any]) -> Dict[str, Any]:
63
+ """Create a new instance."""
64
+ return make_request('POST', '/instances/create', json=config)
65
+
66
+
67
+ def delete_instance(instance_id: str) -> Dict[str, Any]:
68
+ """Delete an instance.
69
+
70
+ Note: Shadeform delete API returns empty response with 200 status.
71
+ """
72
+ return make_request('POST', f'/instances/{instance_id}/delete')
73
+
74
+
75
+ def get_ssh_keys() -> Dict[str, Any]:
76
+ """Get all SSH keys."""
77
+ return make_request('GET', '/sshkeys')
78
+
79
+
80
+ def add_ssh_key(name: str, public_key: str) -> Dict[str, Any]:
81
+ """Add a new SSH key."""
82
+ config = {'name': name, 'public_key': public_key}
83
+ return make_request('POST', '/sshkeys/add', json=config)
@@ -81,6 +81,26 @@ logger = sky_logging.init_logger(__name__)
81
81
  # platforms, including macOS.
82
82
  multiprocessing.set_start_method('spawn', force=True)
83
83
 
84
+ # Max threads that is equivalent to the number of thread workers in the
85
+ # default thread pool executor of event loop.
86
+ _REQUEST_THREADS_LIMIT = min(32, (os.cpu_count() or 0) + 4)
87
+
88
+ _REQUEST_THREAD_EXECUTOR_LOCK = threading.Lock()
89
+ # A dedicated thread pool executor for synced requests execution in coroutine
90
+ _REQUEST_THREAD_EXECUTOR: Optional[concurrent.futures.ThreadPoolExecutor] = None
91
+
92
+
93
+ def get_request_thread_executor() -> concurrent.futures.ThreadPoolExecutor:
94
+ """Lazy init and return the request thread executor for current process."""
95
+ global _REQUEST_THREAD_EXECUTOR
96
+ if _REQUEST_THREAD_EXECUTOR is not None:
97
+ return _REQUEST_THREAD_EXECUTOR
98
+ with _REQUEST_THREAD_EXECUTOR_LOCK:
99
+ if _REQUEST_THREAD_EXECUTOR is None:
100
+ _REQUEST_THREAD_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
101
+ max_workers=_REQUEST_THREADS_LIMIT)
102
+ return _REQUEST_THREAD_EXECUTOR
103
+
84
104
 
85
105
  class RequestQueue:
86
106
  """The queue for the requests, either redis or multiprocessing.
@@ -576,8 +596,8 @@ async def _execute_request_coroutine(request: api_requests.Request):
576
596
  # 1. skypilot config is not contextual
577
597
  # 2. envs that read directly from os.environ are not contextual
578
598
  ctx.override_envs(request_body.env_vars)
579
- fut: asyncio.Future = context_utils.to_thread(func,
580
- **request_body.to_kwargs())
599
+ fut: asyncio.Future = context_utils.to_thread_with_executor(
600
+ get_request_thread_executor(), func, **request_body.to_kwargs())
581
601
 
582
602
  async def poll_task(request_id: str) -> bool:
583
603
  req_status = await api_requests.get_request_status_async(request_id)
sky/server/server.py CHANGED
@@ -3,6 +3,7 @@
3
3
  import argparse
4
4
  import asyncio
5
5
  import base64
6
+ from concurrent.futures import ThreadPoolExecutor
6
7
  import contextlib
7
8
  import datetime
8
9
  import hashlib
@@ -1731,9 +1732,9 @@ async def kubernetes_pod_ssh_proxy(websocket: fastapi.WebSocket,
1731
1732
  logger.info(f'WebSocket connection accepted for cluster: {cluster_name}')
1732
1733
 
1733
1734
  # Run core.status in another thread to avoid blocking the event loop.
1734
- cluster_records = await context_utils.to_thread(core.status,
1735
- cluster_name,
1736
- all_users=True)
1735
+ with ThreadPoolExecutor(max_workers=1) as thread_pool_executor:
1736
+ cluster_records = await context_utils.to_thread_with_executor(
1737
+ thread_pool_executor, core.status, cluster_name, all_users=True)
1737
1738
  cluster_record = cluster_records[0]
1738
1739
  if cluster_record['status'] != status_lib.ClusterStatus.UP:
1739
1740
  raise fastapi.HTTPException(
@@ -222,6 +222,7 @@ extras_require: Dict[str, List[str]] = {
222
222
  'hyperbolic': [], # No dependencies needed for hyperbolic
223
223
  'seeweb': ['ecsapi>=0.2.0'],
224
224
  'server': server_dependencies,
225
+ 'shadeform': [], # No dependencies needed for shadeform
225
226
  }
226
227
 
227
228
  # Calculate which clouds should be included in the [all] installation.
sky/skylet/constants.py CHANGED
@@ -471,7 +471,7 @@ CATALOG_DIR = '~/.sky/catalogs'
471
471
  ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci',
472
472
  'kubernetes', 'runpod', 'vast', 'vsphere', 'cudo', 'fluidstack',
473
473
  'paperspace', 'primeintellect', 'do', 'nebius', 'ssh',
474
- 'hyperbolic', 'seeweb')
474
+ 'hyperbolic', 'seeweb', 'shadeform')
475
475
  # END constants used for service catalog.
476
476
 
477
477
  # The user ID of the SkyPilot system.