skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sky/__init__.py +64 -32
- sky/adaptors/aws.py +23 -6
- sky/adaptors/azure.py +432 -15
- sky/adaptors/cloudflare.py +5 -5
- sky/adaptors/common.py +19 -9
- sky/adaptors/do.py +20 -0
- sky/adaptors/gcp.py +3 -2
- sky/adaptors/kubernetes.py +122 -88
- sky/adaptors/nebius.py +100 -0
- sky/adaptors/oci.py +39 -1
- sky/adaptors/vast.py +29 -0
- sky/admin_policy.py +101 -0
- sky/authentication.py +117 -98
- sky/backends/backend.py +52 -20
- sky/backends/backend_utils.py +669 -557
- sky/backends/cloud_vm_ray_backend.py +1099 -808
- sky/backends/local_docker_backend.py +14 -8
- sky/backends/wheel_utils.py +38 -20
- sky/benchmark/benchmark_utils.py +22 -23
- sky/check.py +76 -27
- sky/cli.py +1586 -1139
- sky/client/__init__.py +1 -0
- sky/client/cli.py +5683 -0
- sky/client/common.py +345 -0
- sky/client/sdk.py +1765 -0
- sky/cloud_stores.py +283 -19
- sky/clouds/__init__.py +7 -2
- sky/clouds/aws.py +303 -112
- sky/clouds/azure.py +185 -179
- sky/clouds/cloud.py +115 -37
- sky/clouds/cudo.py +29 -22
- sky/clouds/do.py +313 -0
- sky/clouds/fluidstack.py +44 -54
- sky/clouds/gcp.py +206 -65
- sky/clouds/ibm.py +26 -21
- sky/clouds/kubernetes.py +345 -91
- sky/clouds/lambda_cloud.py +40 -29
- sky/clouds/nebius.py +297 -0
- sky/clouds/oci.py +129 -90
- sky/clouds/paperspace.py +22 -18
- sky/clouds/runpod.py +53 -34
- sky/clouds/scp.py +28 -24
- sky/clouds/service_catalog/__init__.py +19 -13
- sky/clouds/service_catalog/aws_catalog.py +29 -12
- sky/clouds/service_catalog/azure_catalog.py +33 -6
- sky/clouds/service_catalog/common.py +95 -75
- sky/clouds/service_catalog/constants.py +3 -3
- sky/clouds/service_catalog/cudo_catalog.py +13 -3
- sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
- sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
- sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
- sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
- sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
- sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
- sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
- sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
- sky/clouds/service_catalog/do_catalog.py +111 -0
- sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
- sky/clouds/service_catalog/gcp_catalog.py +16 -2
- sky/clouds/service_catalog/ibm_catalog.py +2 -2
- sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
- sky/clouds/service_catalog/lambda_catalog.py +8 -3
- sky/clouds/service_catalog/nebius_catalog.py +116 -0
- sky/clouds/service_catalog/oci_catalog.py +31 -4
- sky/clouds/service_catalog/paperspace_catalog.py +2 -2
- sky/clouds/service_catalog/runpod_catalog.py +2 -2
- sky/clouds/service_catalog/scp_catalog.py +2 -2
- sky/clouds/service_catalog/vast_catalog.py +104 -0
- sky/clouds/service_catalog/vsphere_catalog.py +2 -2
- sky/clouds/utils/aws_utils.py +65 -0
- sky/clouds/utils/azure_utils.py +91 -0
- sky/clouds/utils/gcp_utils.py +5 -9
- sky/clouds/utils/oci_utils.py +47 -5
- sky/clouds/utils/scp_utils.py +4 -3
- sky/clouds/vast.py +280 -0
- sky/clouds/vsphere.py +22 -18
- sky/core.py +361 -107
- sky/dag.py +41 -28
- sky/data/data_transfer.py +37 -0
- sky/data/data_utils.py +211 -32
- sky/data/mounting_utils.py +182 -30
- sky/data/storage.py +2118 -270
- sky/data/storage_utils.py +126 -5
- sky/exceptions.py +179 -8
- sky/execution.py +158 -85
- sky/global_user_state.py +150 -34
- sky/jobs/__init__.py +12 -10
- sky/jobs/client/__init__.py +0 -0
- sky/jobs/client/sdk.py +302 -0
- sky/jobs/constants.py +49 -11
- sky/jobs/controller.py +161 -99
- sky/jobs/dashboard/dashboard.py +171 -25
- sky/jobs/dashboard/templates/index.html +572 -60
- sky/jobs/recovery_strategy.py +157 -156
- sky/jobs/scheduler.py +307 -0
- sky/jobs/server/__init__.py +1 -0
- sky/jobs/server/core.py +598 -0
- sky/jobs/server/dashboard_utils.py +69 -0
- sky/jobs/server/server.py +190 -0
- sky/jobs/state.py +627 -122
- sky/jobs/utils.py +615 -206
- sky/models.py +27 -0
- sky/optimizer.py +142 -83
- sky/provision/__init__.py +20 -5
- sky/provision/aws/config.py +124 -42
- sky/provision/aws/instance.py +130 -53
- sky/provision/azure/__init__.py +7 -0
- sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
- sky/provision/azure/config.py +220 -0
- sky/provision/azure/instance.py +1012 -37
- sky/provision/common.py +31 -3
- sky/provision/constants.py +25 -0
- sky/provision/cudo/__init__.py +2 -1
- sky/provision/cudo/cudo_utils.py +112 -0
- sky/provision/cudo/cudo_wrapper.py +37 -16
- sky/provision/cudo/instance.py +28 -12
- sky/provision/do/__init__.py +11 -0
- sky/provision/do/config.py +14 -0
- sky/provision/do/constants.py +10 -0
- sky/provision/do/instance.py +287 -0
- sky/provision/do/utils.py +301 -0
- sky/provision/docker_utils.py +82 -46
- sky/provision/fluidstack/fluidstack_utils.py +57 -125
- sky/provision/fluidstack/instance.py +15 -43
- sky/provision/gcp/config.py +19 -9
- sky/provision/gcp/constants.py +7 -1
- sky/provision/gcp/instance.py +55 -34
- sky/provision/gcp/instance_utils.py +339 -80
- sky/provision/gcp/mig_utils.py +210 -0
- sky/provision/instance_setup.py +172 -133
- sky/provision/kubernetes/__init__.py +1 -0
- sky/provision/kubernetes/config.py +104 -90
- sky/provision/kubernetes/constants.py +8 -0
- sky/provision/kubernetes/instance.py +680 -325
- sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
- sky/provision/kubernetes/network.py +54 -20
- sky/provision/kubernetes/network_utils.py +70 -21
- sky/provision/kubernetes/utils.py +1370 -251
- sky/provision/lambda_cloud/__init__.py +11 -0
- sky/provision/lambda_cloud/config.py +10 -0
- sky/provision/lambda_cloud/instance.py +265 -0
- sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
- sky/provision/logging.py +1 -1
- sky/provision/nebius/__init__.py +11 -0
- sky/provision/nebius/config.py +11 -0
- sky/provision/nebius/instance.py +285 -0
- sky/provision/nebius/utils.py +318 -0
- sky/provision/oci/__init__.py +15 -0
- sky/provision/oci/config.py +51 -0
- sky/provision/oci/instance.py +436 -0
- sky/provision/oci/query_utils.py +681 -0
- sky/provision/paperspace/constants.py +6 -0
- sky/provision/paperspace/instance.py +4 -3
- sky/provision/paperspace/utils.py +2 -0
- sky/provision/provisioner.py +207 -130
- sky/provision/runpod/__init__.py +1 -0
- sky/provision/runpod/api/__init__.py +3 -0
- sky/provision/runpod/api/commands.py +119 -0
- sky/provision/runpod/api/pods.py +142 -0
- sky/provision/runpod/instance.py +64 -8
- sky/provision/runpod/utils.py +239 -23
- sky/provision/vast/__init__.py +10 -0
- sky/provision/vast/config.py +11 -0
- sky/provision/vast/instance.py +247 -0
- sky/provision/vast/utils.py +162 -0
- sky/provision/vsphere/common/vim_utils.py +1 -1
- sky/provision/vsphere/instance.py +8 -18
- sky/provision/vsphere/vsphere_utils.py +1 -1
- sky/resources.py +247 -102
- sky/serve/__init__.py +9 -9
- sky/serve/autoscalers.py +361 -299
- sky/serve/client/__init__.py +0 -0
- sky/serve/client/sdk.py +366 -0
- sky/serve/constants.py +12 -3
- sky/serve/controller.py +106 -36
- sky/serve/load_balancer.py +63 -12
- sky/serve/load_balancing_policies.py +84 -2
- sky/serve/replica_managers.py +42 -34
- sky/serve/serve_state.py +62 -32
- sky/serve/serve_utils.py +271 -160
- sky/serve/server/__init__.py +0 -0
- sky/serve/{core.py → server/core.py} +271 -90
- sky/serve/server/server.py +112 -0
- sky/serve/service.py +52 -16
- sky/serve/service_spec.py +95 -32
- sky/server/__init__.py +1 -0
- sky/server/common.py +430 -0
- sky/server/constants.py +21 -0
- sky/server/html/log.html +174 -0
- sky/server/requests/__init__.py +0 -0
- sky/server/requests/executor.py +472 -0
- sky/server/requests/payloads.py +487 -0
- sky/server/requests/queues/__init__.py +0 -0
- sky/server/requests/queues/mp_queue.py +76 -0
- sky/server/requests/requests.py +567 -0
- sky/server/requests/serializers/__init__.py +0 -0
- sky/server/requests/serializers/decoders.py +192 -0
- sky/server/requests/serializers/encoders.py +166 -0
- sky/server/server.py +1106 -0
- sky/server/stream_utils.py +141 -0
- sky/setup_files/MANIFEST.in +2 -5
- sky/setup_files/dependencies.py +159 -0
- sky/setup_files/setup.py +14 -125
- sky/sky_logging.py +59 -14
- sky/skylet/autostop_lib.py +2 -2
- sky/skylet/constants.py +183 -50
- sky/skylet/events.py +22 -10
- sky/skylet/job_lib.py +403 -258
- sky/skylet/log_lib.py +111 -71
- sky/skylet/log_lib.pyi +6 -0
- sky/skylet/providers/command_runner.py +6 -8
- sky/skylet/providers/ibm/node_provider.py +2 -2
- sky/skylet/providers/scp/config.py +11 -3
- sky/skylet/providers/scp/node_provider.py +8 -8
- sky/skylet/skylet.py +3 -1
- sky/skylet/subprocess_daemon.py +69 -17
- sky/skypilot_config.py +119 -57
- sky/task.py +205 -64
- sky/templates/aws-ray.yml.j2 +37 -7
- sky/templates/azure-ray.yml.j2 +27 -82
- sky/templates/cudo-ray.yml.j2 +7 -3
- sky/templates/do-ray.yml.j2 +98 -0
- sky/templates/fluidstack-ray.yml.j2 +7 -4
- sky/templates/gcp-ray.yml.j2 +26 -6
- sky/templates/ibm-ray.yml.j2 +3 -2
- sky/templates/jobs-controller.yaml.j2 +46 -11
- sky/templates/kubernetes-ingress.yml.j2 +7 -0
- sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
- sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
- sky/templates/kubernetes-ray.yml.j2 +292 -25
- sky/templates/lambda-ray.yml.j2 +30 -40
- sky/templates/nebius-ray.yml.j2 +79 -0
- sky/templates/oci-ray.yml.j2 +18 -57
- sky/templates/paperspace-ray.yml.j2 +10 -6
- sky/templates/runpod-ray.yml.j2 +26 -4
- sky/templates/scp-ray.yml.j2 +3 -2
- sky/templates/sky-serve-controller.yaml.j2 +12 -1
- sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
- sky/templates/vast-ray.yml.j2 +70 -0
- sky/templates/vsphere-ray.yml.j2 +8 -3
- sky/templates/websocket_proxy.py +64 -0
- sky/usage/constants.py +10 -1
- sky/usage/usage_lib.py +130 -37
- sky/utils/accelerator_registry.py +35 -51
- sky/utils/admin_policy_utils.py +147 -0
- sky/utils/annotations.py +51 -0
- sky/utils/cli_utils/status_utils.py +81 -23
- sky/utils/cluster_utils.py +356 -0
- sky/utils/command_runner.py +452 -89
- sky/utils/command_runner.pyi +77 -3
- sky/utils/common.py +54 -0
- sky/utils/common_utils.py +319 -108
- sky/utils/config_utils.py +204 -0
- sky/utils/control_master_utils.py +48 -0
- sky/utils/controller_utils.py +548 -266
- sky/utils/dag_utils.py +93 -32
- sky/utils/db_utils.py +18 -4
- sky/utils/env_options.py +29 -7
- sky/utils/kubernetes/create_cluster.sh +8 -60
- sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
- sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
- sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
- sky/utils/kubernetes/gpu_labeler.py +4 -4
- sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
- sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
- sky/utils/kubernetes/rsync_helper.sh +24 -0
- sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
- sky/utils/log_utils.py +240 -33
- sky/utils/message_utils.py +81 -0
- sky/utils/registry.py +127 -0
- sky/utils/resources_utils.py +94 -22
- sky/utils/rich_utils.py +247 -18
- sky/utils/schemas.py +284 -64
- sky/{status_lib.py → utils/status_lib.py} +12 -7
- sky/utils/subprocess_utils.py +212 -46
- sky/utils/timeline.py +12 -7
- sky/utils/ux_utils.py +168 -15
- skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
- skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
- sky/clouds/cloud_registry.py +0 -31
- sky/jobs/core.py +0 -330
- sky/skylet/providers/azure/__init__.py +0 -2
- sky/skylet/providers/azure/azure-vm-template.json +0 -301
- sky/skylet/providers/azure/config.py +0 -170
- sky/skylet/providers/azure/node_provider.py +0 -466
- sky/skylet/providers/lambda_cloud/__init__.py +0 -2
- sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
- sky/skylet/providers/oci/__init__.py +0 -2
- sky/skylet/providers/oci/node_provider.py +0 -488
- sky/skylet/providers/oci/query_helper.py +0 -383
- sky/skylet/providers/oci/utils.py +0 -21
- sky/utils/cluster_yaml_utils.py +0 -24
- sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
- skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
- skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
- {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -9,98 +9,9 @@ import os
|
|
9
9
|
|
10
10
|
import cudo_compute
|
11
11
|
|
12
|
-
|
12
|
+
import sky.provision.cudo.cudo_utils as utils
|
13
13
|
|
14
|
-
|
15
|
-
'NVIDIA V100': 'V100',
|
16
|
-
'NVIDIA A40': 'A40',
|
17
|
-
'RTX 3080': 'RTX3080',
|
18
|
-
'RTX A4000': 'RTXA4000',
|
19
|
-
'RTX A4500': 'RTXA4500',
|
20
|
-
'RTX A5000': 'RTXA5000',
|
21
|
-
'RTX A6000': 'RTXA6000',
|
22
|
-
}
|
23
|
-
|
24
|
-
cudo_gpu_mem = {
|
25
|
-
'RTX3080': 12,
|
26
|
-
'A40': 48,
|
27
|
-
'RTXA4000': 16,
|
28
|
-
'RTXA4500': 20,
|
29
|
-
'RTXA5000': 24,
|
30
|
-
'RTXA6000': 48,
|
31
|
-
'V100': 16,
|
32
|
-
}
|
33
|
-
|
34
|
-
machine_specs = [
|
35
|
-
# Low
|
36
|
-
{
|
37
|
-
'vcpu': 2,
|
38
|
-
'mem': 4,
|
39
|
-
'gpu': 1,
|
40
|
-
},
|
41
|
-
{
|
42
|
-
'vcpu': 4,
|
43
|
-
'mem': 8,
|
44
|
-
'gpu': 1,
|
45
|
-
},
|
46
|
-
{
|
47
|
-
'vcpu': 8,
|
48
|
-
'mem': 16,
|
49
|
-
'gpu': 2,
|
50
|
-
},
|
51
|
-
{
|
52
|
-
'vcpu': 16,
|
53
|
-
'mem': 32,
|
54
|
-
'gpu': 2,
|
55
|
-
},
|
56
|
-
{
|
57
|
-
'vcpu': 32,
|
58
|
-
'mem': 64,
|
59
|
-
'gpu': 4,
|
60
|
-
},
|
61
|
-
{
|
62
|
-
'vcpu': 64,
|
63
|
-
'mem': 128,
|
64
|
-
'gpu': 8,
|
65
|
-
},
|
66
|
-
# Mid
|
67
|
-
{
|
68
|
-
'vcpu': 96,
|
69
|
-
'mem': 192,
|
70
|
-
'gpu': 8
|
71
|
-
},
|
72
|
-
{
|
73
|
-
'vcpu': 48,
|
74
|
-
'mem': 96,
|
75
|
-
'gpu': 4
|
76
|
-
},
|
77
|
-
{
|
78
|
-
'vcpu': 24,
|
79
|
-
'mem': 48,
|
80
|
-
'gpu': 2
|
81
|
-
},
|
82
|
-
{
|
83
|
-
'vcpu': 12,
|
84
|
-
'mem': 24,
|
85
|
-
'gpu': 1
|
86
|
-
},
|
87
|
-
# Hi
|
88
|
-
{
|
89
|
-
'vcpu': 96,
|
90
|
-
'mem': 192,
|
91
|
-
'gpu': 4
|
92
|
-
},
|
93
|
-
{
|
94
|
-
'vcpu': 48,
|
95
|
-
'mem': 96,
|
96
|
-
'gpu': 2
|
97
|
-
},
|
98
|
-
{
|
99
|
-
'vcpu': 24,
|
100
|
-
'mem': 48,
|
101
|
-
'gpu': 1
|
102
|
-
},
|
103
|
-
]
|
14
|
+
VMS_CSV = 'cudo/vms.csv'
|
104
15
|
|
105
16
|
|
106
17
|
def cudo_api():
|
@@ -110,28 +21,8 @@ def cudo_api():
|
|
110
21
|
return cudo_compute.VirtualMachinesApi(client)
|
111
22
|
|
112
23
|
|
113
|
-
def cudo_gpu_to_skypilot_gpu(model):
|
114
|
-
if model in cudo_gpu_model:
|
115
|
-
return cudo_gpu_model[model]
|
116
|
-
else:
|
117
|
-
return model
|
118
|
-
|
119
|
-
|
120
|
-
def skypilot_gpu_to_cudo_gpu(model):
|
121
|
-
for key, value in cudo_gpu_model.items():
|
122
|
-
if value == model:
|
123
|
-
return key
|
124
|
-
return model
|
125
|
-
|
126
|
-
|
127
|
-
def gpu_exists(model):
|
128
|
-
if model in cudo_gpu_model:
|
129
|
-
return True
|
130
|
-
return False
|
131
|
-
|
132
|
-
|
133
24
|
def get_gpu_info(count, model):
|
134
|
-
mem = cudo_gpu_mem[model]
|
25
|
+
mem = utils.cudo_gpu_mem[model]
|
135
26
|
# pylint: disable=line-too-long
|
136
27
|
# {'Name': 'A4000', 'Manufacturer': 'NVIDIA', 'Count': 1.0, 'MemoryInfo': {'SizeInMiB': 16384}}], 'TotalGpuMemoryInMiB': 16384}"
|
137
28
|
info = {
|
@@ -168,16 +59,16 @@ def machine_types(gpu_model, mem_gib, vcpu_count, gpu_count):
|
|
168
59
|
|
169
60
|
def update_prices():
|
170
61
|
rows = []
|
171
|
-
for spec in machine_specs:
|
62
|
+
for spec in utils.machine_specs:
|
172
63
|
mts = machine_types('', spec['mem'], spec['vcpu'], spec['gpu'])
|
173
64
|
for hc in mts['host_configs']:
|
174
|
-
if not gpu_exists(hc['gpu_model']):
|
65
|
+
if not utils.gpu_exists(hc['gpu_model']):
|
175
66
|
continue
|
176
|
-
accelerator_name = cudo_gpu_to_skypilot_gpu(hc['gpu_model'])
|
67
|
+
accelerator_name = utils.cudo_gpu_to_skypilot_gpu(hc['gpu_model'])
|
177
68
|
row = {
|
178
69
|
'instance_type': get_instance_type(hc['machine_type'],
|
179
|
-
spec['
|
180
|
-
spec['
|
70
|
+
spec['vcpu'], spec['mem'],
|
71
|
+
spec['gpu']),
|
181
72
|
'accelerator_name': accelerator_name,
|
182
73
|
'accelerator_count': str(spec['gpu']) + '.0',
|
183
74
|
'vcpus': str(spec['vcpu']),
|
@@ -11,16 +11,167 @@ from typing import List
|
|
11
11
|
|
12
12
|
import requests
|
13
13
|
|
14
|
-
ENDPOINT = 'https://
|
14
|
+
ENDPOINT = 'https://platform.fluidstack.io/list_available_configurations'
|
15
15
|
DEFAULT_FLUIDSTACK_API_KEY_PATH = os.path.expanduser('~/.fluidstack/api_key')
|
16
|
-
|
17
|
-
|
16
|
+
|
17
|
+
plan_vcpus_memory = [{
|
18
|
+
'gpu_type': 'H100_SXM5_80GB',
|
19
|
+
'gpu_count': 1,
|
20
|
+
'min_cpu_count': 52,
|
21
|
+
'min_memory': 450
|
22
|
+
}, {
|
23
|
+
'gpu_type': 'H100_SXM5_80GB',
|
24
|
+
'gpu_count': 2,
|
25
|
+
'min_cpu_count': 52,
|
26
|
+
'min_memory': 450
|
27
|
+
}, {
|
28
|
+
'gpu_type': 'H100_SXM5_80GB',
|
29
|
+
'gpu_count': 4,
|
30
|
+
'min_cpu_count': 104,
|
31
|
+
'min_memory': 900
|
32
|
+
}, {
|
33
|
+
'gpu_type': 'H100_SXM5_80GB',
|
34
|
+
'gpu_count': 8,
|
35
|
+
'min_cpu_count': 192,
|
36
|
+
'min_memory': 1800
|
37
|
+
}, {
|
38
|
+
'gpu_type': 'RTX_A6000_48GB',
|
39
|
+
'gpu_count': 2,
|
40
|
+
'min_cpu_count': 12,
|
41
|
+
'min_memory': 110.0
|
42
|
+
}, {
|
43
|
+
'gpu_type': 'RTX_A6000_48GB',
|
44
|
+
'gpu_count': 4,
|
45
|
+
'min_cpu_count': 24,
|
46
|
+
'min_memory': 220.0
|
47
|
+
}, {
|
48
|
+
'gpu_type': 'A100_NVLINK_80GB',
|
49
|
+
'gpu_count': 8,
|
50
|
+
'min_cpu_count': 252,
|
51
|
+
'min_memory': 960.0
|
52
|
+
}, {
|
53
|
+
'gpu_type': 'H100_PCIE_80GB',
|
54
|
+
'gpu_count': 8,
|
55
|
+
'min_cpu_count': 252,
|
56
|
+
'min_memory': 1440.0
|
57
|
+
}, {
|
58
|
+
'gpu_type': 'RTX_A4000_16GB',
|
59
|
+
'gpu_count': 2,
|
60
|
+
'min_cpu_count': 12,
|
61
|
+
'min_memory': 48.0
|
62
|
+
}, {
|
63
|
+
'gpu_type': 'H100_PCIE_80GB',
|
64
|
+
'gpu_count': 2,
|
65
|
+
'min_cpu_count': 60,
|
66
|
+
'min_memory': 360.0
|
67
|
+
}, {
|
68
|
+
'gpu_type': 'RTX_A6000_48GB',
|
69
|
+
'gpu_count': 8,
|
70
|
+
'min_cpu_count': 252,
|
71
|
+
'min_memory': 464.0
|
72
|
+
}, {
|
73
|
+
'gpu_type': 'H100_NVLINK_80GB',
|
74
|
+
'gpu_count': 8,
|
75
|
+
'min_cpu_count': 252,
|
76
|
+
'min_memory': 1440.0
|
77
|
+
}, {
|
78
|
+
'gpu_type': 'H100_PCIE_80GB',
|
79
|
+
'gpu_count': 1,
|
80
|
+
'min_cpu_count': 28,
|
81
|
+
'min_memory': 180.0
|
82
|
+
}, {
|
83
|
+
'gpu_type': 'RTX_A5000_24GB',
|
84
|
+
'gpu_count': 1,
|
85
|
+
'min_cpu_count': 8,
|
86
|
+
'min_memory': 30.0
|
87
|
+
}, {
|
88
|
+
'gpu_type': 'RTX_A5000_24GB',
|
89
|
+
'gpu_count': 2,
|
90
|
+
'min_cpu_count': 16,
|
91
|
+
'min_memory': 60.0
|
92
|
+
}, {
|
93
|
+
'gpu_type': 'L40_48GB',
|
94
|
+
'gpu_count': 2,
|
95
|
+
'min_cpu_count': 64,
|
96
|
+
'min_memory': 120.0
|
97
|
+
}, {
|
98
|
+
'gpu_type': 'RTX_A4000_16GB',
|
99
|
+
'gpu_count': 8,
|
100
|
+
'min_cpu_count': 48,
|
101
|
+
'min_memory': 192.0
|
102
|
+
}, {
|
103
|
+
'gpu_type': 'RTX_A4000_16GB',
|
104
|
+
'gpu_count': 1,
|
105
|
+
'min_cpu_count': 6,
|
106
|
+
'min_memory': 24.0
|
107
|
+
}, {
|
108
|
+
'gpu_type': 'RTX_A4000_16GB',
|
109
|
+
'gpu_count': 4,
|
110
|
+
'min_cpu_count': 24,
|
111
|
+
'min_memory': 96.0
|
112
|
+
}, {
|
113
|
+
'gpu_type': 'A100_PCIE_80GB',
|
114
|
+
'gpu_count': 4,
|
115
|
+
'min_cpu_count': 124,
|
116
|
+
'min_memory': 480.0
|
117
|
+
}, {
|
118
|
+
'gpu_type': 'H100_PCIE_80GB',
|
119
|
+
'gpu_count': 4,
|
120
|
+
'min_cpu_count': 124,
|
121
|
+
'min_memory': 720.0
|
122
|
+
}, {
|
123
|
+
'gpu_type': 'L40_48GB',
|
124
|
+
'gpu_count': 8,
|
125
|
+
'min_cpu_count': 252,
|
126
|
+
'min_memory': 480.0
|
127
|
+
}, {
|
128
|
+
'gpu_type': 'RTX_A5000_24GB',
|
129
|
+
'gpu_count': 8,
|
130
|
+
'min_cpu_count': 64,
|
131
|
+
'min_memory': 240.0
|
132
|
+
}, {
|
133
|
+
'gpu_type': 'L40_48GB',
|
134
|
+
'gpu_count': 1,
|
135
|
+
'min_cpu_count': 32,
|
136
|
+
'min_memory': 60.0
|
137
|
+
}, {
|
138
|
+
'gpu_type': 'RTX_A6000_48GB',
|
139
|
+
'gpu_count': 1,
|
140
|
+
'min_cpu_count': 6,
|
141
|
+
'min_memory': 55.0
|
142
|
+
}, {
|
143
|
+
'gpu_type': 'L40_48GB',
|
144
|
+
'gpu_count': 4,
|
145
|
+
'min_cpu_count': 126,
|
146
|
+
'min_memory': 240.0
|
147
|
+
}, {
|
148
|
+
'gpu_type': 'A100_PCIE_80GB',
|
149
|
+
'gpu_count': 1,
|
150
|
+
'min_cpu_count': 28,
|
151
|
+
'min_memory': 120.0
|
152
|
+
}, {
|
153
|
+
'gpu_type': 'A100_PCIE_80GB',
|
154
|
+
'gpu_count': 8,
|
155
|
+
'min_cpu_count': 252,
|
156
|
+
'min_memory': 1440.0
|
157
|
+
}, {
|
158
|
+
'gpu_type': 'A100_PCIE_80GB',
|
159
|
+
'gpu_count': 2,
|
160
|
+
'min_cpu_count': 60,
|
161
|
+
'min_memory': 240.0
|
162
|
+
}, {
|
163
|
+
'gpu_type': 'RTX_A5000_24GB',
|
164
|
+
'gpu_count': 4,
|
165
|
+
'min_cpu_count': 32,
|
166
|
+
'min_memory': 120.0
|
167
|
+
}]
|
18
168
|
|
19
169
|
GPU_MAP = {
|
20
170
|
'H100_PCIE_80GB': 'H100',
|
21
171
|
'H100_NVLINK_80GB': 'H100',
|
22
172
|
'A100_NVLINK_80GB': 'A100-80GB',
|
23
|
-
'A100_SXM4_80GB': 'A100-80GB',
|
173
|
+
'A100_SXM4_80GB': 'A100-80GB-SXM',
|
174
|
+
'H100_SXM5_80GB': 'H100-SXM',
|
24
175
|
'A100_PCIE_80GB': 'A100-80GB',
|
25
176
|
'A100_SXM4_40GB': 'A100',
|
26
177
|
'A100_PCIE_40GB': 'A100',
|
@@ -47,19 +198,15 @@ def get_regions(plans: List) -> dict:
|
|
47
198
|
regions = {}
|
48
199
|
for plan in plans:
|
49
200
|
for region in plan.get('regions', []):
|
50
|
-
regions[region
|
201
|
+
regions[region] = region
|
51
202
|
return regions
|
52
203
|
|
53
204
|
|
54
205
|
def create_catalog(output_dir: str) -> None:
|
55
|
-
|
206
|
+
with open(DEFAULT_FLUIDSTACK_API_KEY_PATH, 'r', encoding='UTF-8') as f:
|
207
|
+
api_key = f.read().strip()
|
208
|
+
response = requests.get(ENDPOINT, headers={'api-key': api_key})
|
56
209
|
plans = response.json()
|
57
|
-
#plans = [plan for plan in plans if len(plan['regions']) > 0]
|
58
|
-
plans = [
|
59
|
-
plan for plan in plans if plan['minimum_commitment'] == 'hourly' and
|
60
|
-
plan['type'] in ['preconfigured'] and
|
61
|
-
plan['gpu_type'] not in ['NO GPU', 'RTX_3080_10GB', 'RTX_3090_24GB']
|
62
|
-
]
|
63
210
|
|
64
211
|
with open(os.path.join(output_dir, 'vms.csv'), mode='w',
|
65
212
|
encoding='utf-8') as f:
|
@@ -81,39 +228,45 @@ def create_catalog(output_dir: str) -> None:
|
|
81
228
|
except KeyError:
|
82
229
|
#print(f'Could not map {plan["gpu_type"]}')
|
83
230
|
continue
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
'
|
94
|
-
|
95
|
-
|
96
|
-
'MemoryInfo': {
|
97
|
-
'SizeInMiB': int(gpu_memory)
|
98
|
-
},
|
99
|
-
}],
|
100
|
-
'TotalGpuMemoryInMiB': int(gpu_memory * gpu_cnt),
|
101
|
-
}
|
102
|
-
gpuinfo = json.dumps(gpuinfo).replace('"', "'") # pylint: disable=invalid-string-quote
|
103
|
-
for r in plan.get('regions', []):
|
104
|
-
if r['id'] == 'india_2':
|
231
|
+
for gpu_cnt in plan['gpu_counts']:
|
232
|
+
gpu_memory = float(plan['gpu_type'].split('_')[-1].replace(
|
233
|
+
'GB', '')) * 1024
|
234
|
+
try:
|
235
|
+
vcpus_mem = [
|
236
|
+
x for x in plan_vcpus_memory
|
237
|
+
if x['gpu_type'] == plan['gpu_type'] and
|
238
|
+
x['gpu_count'] == gpu_cnt
|
239
|
+
][0]
|
240
|
+
vcpus = vcpus_mem['min_cpu_count']
|
241
|
+
mem = vcpus_mem['min_memory']
|
242
|
+
except IndexError:
|
105
243
|
continue
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
244
|
+
price = float(plan['price_per_gpu_hr']) * gpu_cnt
|
245
|
+
gpuinfo = {
|
246
|
+
'Gpus': [{
|
247
|
+
'Name': gpu,
|
248
|
+
'Manufacturer': 'NVIDIA',
|
249
|
+
'Count': gpu_cnt,
|
250
|
+
'MemoryInfo': {
|
251
|
+
'SizeInMiB': int(gpu_memory)
|
252
|
+
},
|
253
|
+
}],
|
254
|
+
'TotalGpuMemoryInMiB': int(gpu_memory * gpu_cnt),
|
255
|
+
}
|
256
|
+
gpuinfo = json.dumps(gpuinfo).replace('"', "'") # pylint: disable=invalid-string-quote
|
257
|
+
instance_type = f'{plan["gpu_type"]}::{gpu_cnt}'
|
258
|
+
for region in plan.get('regions', []):
|
259
|
+
writer.writerow([
|
260
|
+
instance_type,
|
261
|
+
gpu,
|
262
|
+
gpu_cnt,
|
263
|
+
vcpus,
|
264
|
+
mem,
|
265
|
+
price,
|
266
|
+
region,
|
267
|
+
gpuinfo,
|
268
|
+
'',
|
269
|
+
])
|
117
270
|
|
118
271
|
|
119
272
|
if __name__ == '__main__':
|