skypilot-nightly 1.0.0.dev2024053101__py3-none-any.whl → 1.0.0.dev2025022801__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. sky/__init__.py +64 -32
  2. sky/adaptors/aws.py +23 -6
  3. sky/adaptors/azure.py +432 -15
  4. sky/adaptors/cloudflare.py +5 -5
  5. sky/adaptors/common.py +19 -9
  6. sky/adaptors/do.py +20 -0
  7. sky/adaptors/gcp.py +3 -2
  8. sky/adaptors/kubernetes.py +122 -88
  9. sky/adaptors/nebius.py +100 -0
  10. sky/adaptors/oci.py +39 -1
  11. sky/adaptors/vast.py +29 -0
  12. sky/admin_policy.py +101 -0
  13. sky/authentication.py +117 -98
  14. sky/backends/backend.py +52 -20
  15. sky/backends/backend_utils.py +669 -557
  16. sky/backends/cloud_vm_ray_backend.py +1099 -808
  17. sky/backends/local_docker_backend.py +14 -8
  18. sky/backends/wheel_utils.py +38 -20
  19. sky/benchmark/benchmark_utils.py +22 -23
  20. sky/check.py +76 -27
  21. sky/cli.py +1586 -1139
  22. sky/client/__init__.py +1 -0
  23. sky/client/cli.py +5683 -0
  24. sky/client/common.py +345 -0
  25. sky/client/sdk.py +1765 -0
  26. sky/cloud_stores.py +283 -19
  27. sky/clouds/__init__.py +7 -2
  28. sky/clouds/aws.py +303 -112
  29. sky/clouds/azure.py +185 -179
  30. sky/clouds/cloud.py +115 -37
  31. sky/clouds/cudo.py +29 -22
  32. sky/clouds/do.py +313 -0
  33. sky/clouds/fluidstack.py +44 -54
  34. sky/clouds/gcp.py +206 -65
  35. sky/clouds/ibm.py +26 -21
  36. sky/clouds/kubernetes.py +345 -91
  37. sky/clouds/lambda_cloud.py +40 -29
  38. sky/clouds/nebius.py +297 -0
  39. sky/clouds/oci.py +129 -90
  40. sky/clouds/paperspace.py +22 -18
  41. sky/clouds/runpod.py +53 -34
  42. sky/clouds/scp.py +28 -24
  43. sky/clouds/service_catalog/__init__.py +19 -13
  44. sky/clouds/service_catalog/aws_catalog.py +29 -12
  45. sky/clouds/service_catalog/azure_catalog.py +33 -6
  46. sky/clouds/service_catalog/common.py +95 -75
  47. sky/clouds/service_catalog/constants.py +3 -3
  48. sky/clouds/service_catalog/cudo_catalog.py +13 -3
  49. sky/clouds/service_catalog/data_fetchers/fetch_aws.py +36 -21
  50. sky/clouds/service_catalog/data_fetchers/fetch_azure.py +31 -4
  51. sky/clouds/service_catalog/data_fetchers/fetch_cudo.py +8 -117
  52. sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py +197 -44
  53. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +224 -36
  54. sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py +44 -24
  55. sky/clouds/service_catalog/data_fetchers/fetch_vast.py +147 -0
  56. sky/clouds/service_catalog/data_fetchers/fetch_vsphere.py +1 -1
  57. sky/clouds/service_catalog/do_catalog.py +111 -0
  58. sky/clouds/service_catalog/fluidstack_catalog.py +2 -2
  59. sky/clouds/service_catalog/gcp_catalog.py +16 -2
  60. sky/clouds/service_catalog/ibm_catalog.py +2 -2
  61. sky/clouds/service_catalog/kubernetes_catalog.py +192 -70
  62. sky/clouds/service_catalog/lambda_catalog.py +8 -3
  63. sky/clouds/service_catalog/nebius_catalog.py +116 -0
  64. sky/clouds/service_catalog/oci_catalog.py +31 -4
  65. sky/clouds/service_catalog/paperspace_catalog.py +2 -2
  66. sky/clouds/service_catalog/runpod_catalog.py +2 -2
  67. sky/clouds/service_catalog/scp_catalog.py +2 -2
  68. sky/clouds/service_catalog/vast_catalog.py +104 -0
  69. sky/clouds/service_catalog/vsphere_catalog.py +2 -2
  70. sky/clouds/utils/aws_utils.py +65 -0
  71. sky/clouds/utils/azure_utils.py +91 -0
  72. sky/clouds/utils/gcp_utils.py +5 -9
  73. sky/clouds/utils/oci_utils.py +47 -5
  74. sky/clouds/utils/scp_utils.py +4 -3
  75. sky/clouds/vast.py +280 -0
  76. sky/clouds/vsphere.py +22 -18
  77. sky/core.py +361 -107
  78. sky/dag.py +41 -28
  79. sky/data/data_transfer.py +37 -0
  80. sky/data/data_utils.py +211 -32
  81. sky/data/mounting_utils.py +182 -30
  82. sky/data/storage.py +2118 -270
  83. sky/data/storage_utils.py +126 -5
  84. sky/exceptions.py +179 -8
  85. sky/execution.py +158 -85
  86. sky/global_user_state.py +150 -34
  87. sky/jobs/__init__.py +12 -10
  88. sky/jobs/client/__init__.py +0 -0
  89. sky/jobs/client/sdk.py +302 -0
  90. sky/jobs/constants.py +49 -11
  91. sky/jobs/controller.py +161 -99
  92. sky/jobs/dashboard/dashboard.py +171 -25
  93. sky/jobs/dashboard/templates/index.html +572 -60
  94. sky/jobs/recovery_strategy.py +157 -156
  95. sky/jobs/scheduler.py +307 -0
  96. sky/jobs/server/__init__.py +1 -0
  97. sky/jobs/server/core.py +598 -0
  98. sky/jobs/server/dashboard_utils.py +69 -0
  99. sky/jobs/server/server.py +190 -0
  100. sky/jobs/state.py +627 -122
  101. sky/jobs/utils.py +615 -206
  102. sky/models.py +27 -0
  103. sky/optimizer.py +142 -83
  104. sky/provision/__init__.py +20 -5
  105. sky/provision/aws/config.py +124 -42
  106. sky/provision/aws/instance.py +130 -53
  107. sky/provision/azure/__init__.py +7 -0
  108. sky/{skylet/providers → provision}/azure/azure-config-template.json +19 -7
  109. sky/provision/azure/config.py +220 -0
  110. sky/provision/azure/instance.py +1012 -37
  111. sky/provision/common.py +31 -3
  112. sky/provision/constants.py +25 -0
  113. sky/provision/cudo/__init__.py +2 -1
  114. sky/provision/cudo/cudo_utils.py +112 -0
  115. sky/provision/cudo/cudo_wrapper.py +37 -16
  116. sky/provision/cudo/instance.py +28 -12
  117. sky/provision/do/__init__.py +11 -0
  118. sky/provision/do/config.py +14 -0
  119. sky/provision/do/constants.py +10 -0
  120. sky/provision/do/instance.py +287 -0
  121. sky/provision/do/utils.py +301 -0
  122. sky/provision/docker_utils.py +82 -46
  123. sky/provision/fluidstack/fluidstack_utils.py +57 -125
  124. sky/provision/fluidstack/instance.py +15 -43
  125. sky/provision/gcp/config.py +19 -9
  126. sky/provision/gcp/constants.py +7 -1
  127. sky/provision/gcp/instance.py +55 -34
  128. sky/provision/gcp/instance_utils.py +339 -80
  129. sky/provision/gcp/mig_utils.py +210 -0
  130. sky/provision/instance_setup.py +172 -133
  131. sky/provision/kubernetes/__init__.py +1 -0
  132. sky/provision/kubernetes/config.py +104 -90
  133. sky/provision/kubernetes/constants.py +8 -0
  134. sky/provision/kubernetes/instance.py +680 -325
  135. sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml +3 -0
  136. sky/provision/kubernetes/network.py +54 -20
  137. sky/provision/kubernetes/network_utils.py +70 -21
  138. sky/provision/kubernetes/utils.py +1370 -251
  139. sky/provision/lambda_cloud/__init__.py +11 -0
  140. sky/provision/lambda_cloud/config.py +10 -0
  141. sky/provision/lambda_cloud/instance.py +265 -0
  142. sky/{clouds/utils → provision/lambda_cloud}/lambda_utils.py +24 -23
  143. sky/provision/logging.py +1 -1
  144. sky/provision/nebius/__init__.py +11 -0
  145. sky/provision/nebius/config.py +11 -0
  146. sky/provision/nebius/instance.py +285 -0
  147. sky/provision/nebius/utils.py +318 -0
  148. sky/provision/oci/__init__.py +15 -0
  149. sky/provision/oci/config.py +51 -0
  150. sky/provision/oci/instance.py +436 -0
  151. sky/provision/oci/query_utils.py +681 -0
  152. sky/provision/paperspace/constants.py +6 -0
  153. sky/provision/paperspace/instance.py +4 -3
  154. sky/provision/paperspace/utils.py +2 -0
  155. sky/provision/provisioner.py +207 -130
  156. sky/provision/runpod/__init__.py +1 -0
  157. sky/provision/runpod/api/__init__.py +3 -0
  158. sky/provision/runpod/api/commands.py +119 -0
  159. sky/provision/runpod/api/pods.py +142 -0
  160. sky/provision/runpod/instance.py +64 -8
  161. sky/provision/runpod/utils.py +239 -23
  162. sky/provision/vast/__init__.py +10 -0
  163. sky/provision/vast/config.py +11 -0
  164. sky/provision/vast/instance.py +247 -0
  165. sky/provision/vast/utils.py +162 -0
  166. sky/provision/vsphere/common/vim_utils.py +1 -1
  167. sky/provision/vsphere/instance.py +8 -18
  168. sky/provision/vsphere/vsphere_utils.py +1 -1
  169. sky/resources.py +247 -102
  170. sky/serve/__init__.py +9 -9
  171. sky/serve/autoscalers.py +361 -299
  172. sky/serve/client/__init__.py +0 -0
  173. sky/serve/client/sdk.py +366 -0
  174. sky/serve/constants.py +12 -3
  175. sky/serve/controller.py +106 -36
  176. sky/serve/load_balancer.py +63 -12
  177. sky/serve/load_balancing_policies.py +84 -2
  178. sky/serve/replica_managers.py +42 -34
  179. sky/serve/serve_state.py +62 -32
  180. sky/serve/serve_utils.py +271 -160
  181. sky/serve/server/__init__.py +0 -0
  182. sky/serve/{core.py → server/core.py} +271 -90
  183. sky/serve/server/server.py +112 -0
  184. sky/serve/service.py +52 -16
  185. sky/serve/service_spec.py +95 -32
  186. sky/server/__init__.py +1 -0
  187. sky/server/common.py +430 -0
  188. sky/server/constants.py +21 -0
  189. sky/server/html/log.html +174 -0
  190. sky/server/requests/__init__.py +0 -0
  191. sky/server/requests/executor.py +472 -0
  192. sky/server/requests/payloads.py +487 -0
  193. sky/server/requests/queues/__init__.py +0 -0
  194. sky/server/requests/queues/mp_queue.py +76 -0
  195. sky/server/requests/requests.py +567 -0
  196. sky/server/requests/serializers/__init__.py +0 -0
  197. sky/server/requests/serializers/decoders.py +192 -0
  198. sky/server/requests/serializers/encoders.py +166 -0
  199. sky/server/server.py +1106 -0
  200. sky/server/stream_utils.py +141 -0
  201. sky/setup_files/MANIFEST.in +2 -5
  202. sky/setup_files/dependencies.py +159 -0
  203. sky/setup_files/setup.py +14 -125
  204. sky/sky_logging.py +59 -14
  205. sky/skylet/autostop_lib.py +2 -2
  206. sky/skylet/constants.py +183 -50
  207. sky/skylet/events.py +22 -10
  208. sky/skylet/job_lib.py +403 -258
  209. sky/skylet/log_lib.py +111 -71
  210. sky/skylet/log_lib.pyi +6 -0
  211. sky/skylet/providers/command_runner.py +6 -8
  212. sky/skylet/providers/ibm/node_provider.py +2 -2
  213. sky/skylet/providers/scp/config.py +11 -3
  214. sky/skylet/providers/scp/node_provider.py +8 -8
  215. sky/skylet/skylet.py +3 -1
  216. sky/skylet/subprocess_daemon.py +69 -17
  217. sky/skypilot_config.py +119 -57
  218. sky/task.py +205 -64
  219. sky/templates/aws-ray.yml.j2 +37 -7
  220. sky/templates/azure-ray.yml.j2 +27 -82
  221. sky/templates/cudo-ray.yml.j2 +7 -3
  222. sky/templates/do-ray.yml.j2 +98 -0
  223. sky/templates/fluidstack-ray.yml.j2 +7 -4
  224. sky/templates/gcp-ray.yml.j2 +26 -6
  225. sky/templates/ibm-ray.yml.j2 +3 -2
  226. sky/templates/jobs-controller.yaml.j2 +46 -11
  227. sky/templates/kubernetes-ingress.yml.j2 +7 -0
  228. sky/templates/kubernetes-loadbalancer.yml.j2 +7 -0
  229. sky/templates/{kubernetes-port-forward-proxy-command.sh.j2 → kubernetes-port-forward-proxy-command.sh} +51 -7
  230. sky/templates/kubernetes-ray.yml.j2 +292 -25
  231. sky/templates/lambda-ray.yml.j2 +30 -40
  232. sky/templates/nebius-ray.yml.j2 +79 -0
  233. sky/templates/oci-ray.yml.j2 +18 -57
  234. sky/templates/paperspace-ray.yml.j2 +10 -6
  235. sky/templates/runpod-ray.yml.j2 +26 -4
  236. sky/templates/scp-ray.yml.j2 +3 -2
  237. sky/templates/sky-serve-controller.yaml.j2 +12 -1
  238. sky/templates/skypilot-server-kubernetes-proxy.sh +36 -0
  239. sky/templates/vast-ray.yml.j2 +70 -0
  240. sky/templates/vsphere-ray.yml.j2 +8 -3
  241. sky/templates/websocket_proxy.py +64 -0
  242. sky/usage/constants.py +10 -1
  243. sky/usage/usage_lib.py +130 -37
  244. sky/utils/accelerator_registry.py +35 -51
  245. sky/utils/admin_policy_utils.py +147 -0
  246. sky/utils/annotations.py +51 -0
  247. sky/utils/cli_utils/status_utils.py +81 -23
  248. sky/utils/cluster_utils.py +356 -0
  249. sky/utils/command_runner.py +452 -89
  250. sky/utils/command_runner.pyi +77 -3
  251. sky/utils/common.py +54 -0
  252. sky/utils/common_utils.py +319 -108
  253. sky/utils/config_utils.py +204 -0
  254. sky/utils/control_master_utils.py +48 -0
  255. sky/utils/controller_utils.py +548 -266
  256. sky/utils/dag_utils.py +93 -32
  257. sky/utils/db_utils.py +18 -4
  258. sky/utils/env_options.py +29 -7
  259. sky/utils/kubernetes/create_cluster.sh +8 -60
  260. sky/utils/kubernetes/deploy_remote_cluster.sh +243 -0
  261. sky/utils/kubernetes/exec_kubeconfig_converter.py +73 -0
  262. sky/utils/kubernetes/generate_kubeconfig.sh +336 -0
  263. sky/utils/kubernetes/gpu_labeler.py +4 -4
  264. sky/utils/kubernetes/k8s_gpu_labeler_job.yaml +4 -3
  265. sky/utils/kubernetes/kubernetes_deploy_utils.py +228 -0
  266. sky/utils/kubernetes/rsync_helper.sh +24 -0
  267. sky/utils/kubernetes/ssh_jump_lifecycle_manager.py +1 -1
  268. sky/utils/log_utils.py +240 -33
  269. sky/utils/message_utils.py +81 -0
  270. sky/utils/registry.py +127 -0
  271. sky/utils/resources_utils.py +94 -22
  272. sky/utils/rich_utils.py +247 -18
  273. sky/utils/schemas.py +284 -64
  274. sky/{status_lib.py → utils/status_lib.py} +12 -7
  275. sky/utils/subprocess_utils.py +212 -46
  276. sky/utils/timeline.py +12 -7
  277. sky/utils/ux_utils.py +168 -15
  278. skypilot_nightly-1.0.0.dev2025022801.dist-info/METADATA +363 -0
  279. skypilot_nightly-1.0.0.dev2025022801.dist-info/RECORD +352 -0
  280. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/WHEEL +1 -1
  281. sky/clouds/cloud_registry.py +0 -31
  282. sky/jobs/core.py +0 -330
  283. sky/skylet/providers/azure/__init__.py +0 -2
  284. sky/skylet/providers/azure/azure-vm-template.json +0 -301
  285. sky/skylet/providers/azure/config.py +0 -170
  286. sky/skylet/providers/azure/node_provider.py +0 -466
  287. sky/skylet/providers/lambda_cloud/__init__.py +0 -2
  288. sky/skylet/providers/lambda_cloud/node_provider.py +0 -320
  289. sky/skylet/providers/oci/__init__.py +0 -2
  290. sky/skylet/providers/oci/node_provider.py +0 -488
  291. sky/skylet/providers/oci/query_helper.py +0 -383
  292. sky/skylet/providers/oci/utils.py +0 -21
  293. sky/utils/cluster_yaml_utils.py +0 -24
  294. sky/utils/kubernetes/generate_static_kubeconfig.sh +0 -137
  295. skypilot_nightly-1.0.0.dev2024053101.dist-info/METADATA +0 -315
  296. skypilot_nightly-1.0.0.dev2024053101.dist-info/RECORD +0 -275
  297. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/LICENSE +0 -0
  298. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/entry_points.txt +0 -0
  299. {skypilot_nightly-1.0.0.dev2024053101.dist-info → skypilot_nightly-1.0.0.dev2025022801.dist-info}/top_level.txt +0 -0
@@ -9,98 +9,9 @@ import os
9
9
 
10
10
  import cudo_compute
11
11
 
12
- VMS_CSV = 'cudo/vms.csv'
12
+ import sky.provision.cudo.cudo_utils as utils
13
13
 
14
- cudo_gpu_model = {
15
- 'NVIDIA V100': 'V100',
16
- 'NVIDIA A40': 'A40',
17
- 'RTX 3080': 'RTX3080',
18
- 'RTX A4000': 'RTXA4000',
19
- 'RTX A4500': 'RTXA4500',
20
- 'RTX A5000': 'RTXA5000',
21
- 'RTX A6000': 'RTXA6000',
22
- }
23
-
24
- cudo_gpu_mem = {
25
- 'RTX3080': 12,
26
- 'A40': 48,
27
- 'RTXA4000': 16,
28
- 'RTXA4500': 20,
29
- 'RTXA5000': 24,
30
- 'RTXA6000': 48,
31
- 'V100': 16,
32
- }
33
-
34
- machine_specs = [
35
- # Low
36
- {
37
- 'vcpu': 2,
38
- 'mem': 4,
39
- 'gpu': 1,
40
- },
41
- {
42
- 'vcpu': 4,
43
- 'mem': 8,
44
- 'gpu': 1,
45
- },
46
- {
47
- 'vcpu': 8,
48
- 'mem': 16,
49
- 'gpu': 2,
50
- },
51
- {
52
- 'vcpu': 16,
53
- 'mem': 32,
54
- 'gpu': 2,
55
- },
56
- {
57
- 'vcpu': 32,
58
- 'mem': 64,
59
- 'gpu': 4,
60
- },
61
- {
62
- 'vcpu': 64,
63
- 'mem': 128,
64
- 'gpu': 8,
65
- },
66
- # Mid
67
- {
68
- 'vcpu': 96,
69
- 'mem': 192,
70
- 'gpu': 8
71
- },
72
- {
73
- 'vcpu': 48,
74
- 'mem': 96,
75
- 'gpu': 4
76
- },
77
- {
78
- 'vcpu': 24,
79
- 'mem': 48,
80
- 'gpu': 2
81
- },
82
- {
83
- 'vcpu': 12,
84
- 'mem': 24,
85
- 'gpu': 1
86
- },
87
- # Hi
88
- {
89
- 'vcpu': 96,
90
- 'mem': 192,
91
- 'gpu': 4
92
- },
93
- {
94
- 'vcpu': 48,
95
- 'mem': 96,
96
- 'gpu': 2
97
- },
98
- {
99
- 'vcpu': 24,
100
- 'mem': 48,
101
- 'gpu': 1
102
- },
103
- ]
14
+ VMS_CSV = 'cudo/vms.csv'
104
15
 
105
16
 
106
17
  def cudo_api():
@@ -110,28 +21,8 @@ def cudo_api():
110
21
  return cudo_compute.VirtualMachinesApi(client)
111
22
 
112
23
 
113
- def cudo_gpu_to_skypilot_gpu(model):
114
- if model in cudo_gpu_model:
115
- return cudo_gpu_model[model]
116
- else:
117
- return model
118
-
119
-
120
- def skypilot_gpu_to_cudo_gpu(model):
121
- for key, value in cudo_gpu_model.items():
122
- if value == model:
123
- return key
124
- return model
125
-
126
-
127
- def gpu_exists(model):
128
- if model in cudo_gpu_model:
129
- return True
130
- return False
131
-
132
-
133
24
  def get_gpu_info(count, model):
134
- mem = cudo_gpu_mem[model]
25
+ mem = utils.cudo_gpu_mem[model]
135
26
  # pylint: disable=line-too-long
136
27
  # {'Name': 'A4000', 'Manufacturer': 'NVIDIA', 'Count': 1.0, 'MemoryInfo': {'SizeInMiB': 16384}}], 'TotalGpuMemoryInMiB': 16384}"
137
28
  info = {
@@ -168,16 +59,16 @@ def machine_types(gpu_model, mem_gib, vcpu_count, gpu_count):
168
59
 
169
60
  def update_prices():
170
61
  rows = []
171
- for spec in machine_specs:
62
+ for spec in utils.machine_specs:
172
63
  mts = machine_types('', spec['mem'], spec['vcpu'], spec['gpu'])
173
64
  for hc in mts['host_configs']:
174
- if not gpu_exists(hc['gpu_model']):
65
+ if not utils.gpu_exists(hc['gpu_model']):
175
66
  continue
176
- accelerator_name = cudo_gpu_to_skypilot_gpu(hc['gpu_model'])
67
+ accelerator_name = utils.cudo_gpu_to_skypilot_gpu(hc['gpu_model'])
177
68
  row = {
178
69
  'instance_type': get_instance_type(hc['machine_type'],
179
- spec['gpu'], spec['vcpu'],
180
- spec['mem']),
70
+ spec['vcpu'], spec['mem'],
71
+ spec['gpu']),
181
72
  'accelerator_name': accelerator_name,
182
73
  'accelerator_count': str(spec['gpu']) + '.0',
183
74
  'vcpus': str(spec['vcpu']),
@@ -11,16 +11,167 @@ from typing import List
11
11
 
12
12
  import requests
13
13
 
14
- ENDPOINT = 'https://api.fluidstack.io/v1/plans'
14
+ ENDPOINT = 'https://platform.fluidstack.io/list_available_configurations'
15
15
  DEFAULT_FLUIDSTACK_API_KEY_PATH = os.path.expanduser('~/.fluidstack/api_key')
16
- DEFAULT_FLUIDSTACK_API_TOKEN_PATH = os.path.expanduser(
17
- '~/.fluidstack/api_token')
16
+
17
+ plan_vcpus_memory = [{
18
+ 'gpu_type': 'H100_SXM5_80GB',
19
+ 'gpu_count': 1,
20
+ 'min_cpu_count': 52,
21
+ 'min_memory': 450
22
+ }, {
23
+ 'gpu_type': 'H100_SXM5_80GB',
24
+ 'gpu_count': 2,
25
+ 'min_cpu_count': 52,
26
+ 'min_memory': 450
27
+ }, {
28
+ 'gpu_type': 'H100_SXM5_80GB',
29
+ 'gpu_count': 4,
30
+ 'min_cpu_count': 104,
31
+ 'min_memory': 900
32
+ }, {
33
+ 'gpu_type': 'H100_SXM5_80GB',
34
+ 'gpu_count': 8,
35
+ 'min_cpu_count': 192,
36
+ 'min_memory': 1800
37
+ }, {
38
+ 'gpu_type': 'RTX_A6000_48GB',
39
+ 'gpu_count': 2,
40
+ 'min_cpu_count': 12,
41
+ 'min_memory': 110.0
42
+ }, {
43
+ 'gpu_type': 'RTX_A6000_48GB',
44
+ 'gpu_count': 4,
45
+ 'min_cpu_count': 24,
46
+ 'min_memory': 220.0
47
+ }, {
48
+ 'gpu_type': 'A100_NVLINK_80GB',
49
+ 'gpu_count': 8,
50
+ 'min_cpu_count': 252,
51
+ 'min_memory': 960.0
52
+ }, {
53
+ 'gpu_type': 'H100_PCIE_80GB',
54
+ 'gpu_count': 8,
55
+ 'min_cpu_count': 252,
56
+ 'min_memory': 1440.0
57
+ }, {
58
+ 'gpu_type': 'RTX_A4000_16GB',
59
+ 'gpu_count': 2,
60
+ 'min_cpu_count': 12,
61
+ 'min_memory': 48.0
62
+ }, {
63
+ 'gpu_type': 'H100_PCIE_80GB',
64
+ 'gpu_count': 2,
65
+ 'min_cpu_count': 60,
66
+ 'min_memory': 360.0
67
+ }, {
68
+ 'gpu_type': 'RTX_A6000_48GB',
69
+ 'gpu_count': 8,
70
+ 'min_cpu_count': 252,
71
+ 'min_memory': 464.0
72
+ }, {
73
+ 'gpu_type': 'H100_NVLINK_80GB',
74
+ 'gpu_count': 8,
75
+ 'min_cpu_count': 252,
76
+ 'min_memory': 1440.0
77
+ }, {
78
+ 'gpu_type': 'H100_PCIE_80GB',
79
+ 'gpu_count': 1,
80
+ 'min_cpu_count': 28,
81
+ 'min_memory': 180.0
82
+ }, {
83
+ 'gpu_type': 'RTX_A5000_24GB',
84
+ 'gpu_count': 1,
85
+ 'min_cpu_count': 8,
86
+ 'min_memory': 30.0
87
+ }, {
88
+ 'gpu_type': 'RTX_A5000_24GB',
89
+ 'gpu_count': 2,
90
+ 'min_cpu_count': 16,
91
+ 'min_memory': 60.0
92
+ }, {
93
+ 'gpu_type': 'L40_48GB',
94
+ 'gpu_count': 2,
95
+ 'min_cpu_count': 64,
96
+ 'min_memory': 120.0
97
+ }, {
98
+ 'gpu_type': 'RTX_A4000_16GB',
99
+ 'gpu_count': 8,
100
+ 'min_cpu_count': 48,
101
+ 'min_memory': 192.0
102
+ }, {
103
+ 'gpu_type': 'RTX_A4000_16GB',
104
+ 'gpu_count': 1,
105
+ 'min_cpu_count': 6,
106
+ 'min_memory': 24.0
107
+ }, {
108
+ 'gpu_type': 'RTX_A4000_16GB',
109
+ 'gpu_count': 4,
110
+ 'min_cpu_count': 24,
111
+ 'min_memory': 96.0
112
+ }, {
113
+ 'gpu_type': 'A100_PCIE_80GB',
114
+ 'gpu_count': 4,
115
+ 'min_cpu_count': 124,
116
+ 'min_memory': 480.0
117
+ }, {
118
+ 'gpu_type': 'H100_PCIE_80GB',
119
+ 'gpu_count': 4,
120
+ 'min_cpu_count': 124,
121
+ 'min_memory': 720.0
122
+ }, {
123
+ 'gpu_type': 'L40_48GB',
124
+ 'gpu_count': 8,
125
+ 'min_cpu_count': 252,
126
+ 'min_memory': 480.0
127
+ }, {
128
+ 'gpu_type': 'RTX_A5000_24GB',
129
+ 'gpu_count': 8,
130
+ 'min_cpu_count': 64,
131
+ 'min_memory': 240.0
132
+ }, {
133
+ 'gpu_type': 'L40_48GB',
134
+ 'gpu_count': 1,
135
+ 'min_cpu_count': 32,
136
+ 'min_memory': 60.0
137
+ }, {
138
+ 'gpu_type': 'RTX_A6000_48GB',
139
+ 'gpu_count': 1,
140
+ 'min_cpu_count': 6,
141
+ 'min_memory': 55.0
142
+ }, {
143
+ 'gpu_type': 'L40_48GB',
144
+ 'gpu_count': 4,
145
+ 'min_cpu_count': 126,
146
+ 'min_memory': 240.0
147
+ }, {
148
+ 'gpu_type': 'A100_PCIE_80GB',
149
+ 'gpu_count': 1,
150
+ 'min_cpu_count': 28,
151
+ 'min_memory': 120.0
152
+ }, {
153
+ 'gpu_type': 'A100_PCIE_80GB',
154
+ 'gpu_count': 8,
155
+ 'min_cpu_count': 252,
156
+ 'min_memory': 1440.0
157
+ }, {
158
+ 'gpu_type': 'A100_PCIE_80GB',
159
+ 'gpu_count': 2,
160
+ 'min_cpu_count': 60,
161
+ 'min_memory': 240.0
162
+ }, {
163
+ 'gpu_type': 'RTX_A5000_24GB',
164
+ 'gpu_count': 4,
165
+ 'min_cpu_count': 32,
166
+ 'min_memory': 120.0
167
+ }]
18
168
 
19
169
  GPU_MAP = {
20
170
  'H100_PCIE_80GB': 'H100',
21
171
  'H100_NVLINK_80GB': 'H100',
22
172
  'A100_NVLINK_80GB': 'A100-80GB',
23
- 'A100_SXM4_80GB': 'A100-80GB',
173
+ 'A100_SXM4_80GB': 'A100-80GB-SXM',
174
+ 'H100_SXM5_80GB': 'H100-SXM',
24
175
  'A100_PCIE_80GB': 'A100-80GB',
25
176
  'A100_SXM4_40GB': 'A100',
26
177
  'A100_PCIE_40GB': 'A100',
@@ -47,19 +198,15 @@ def get_regions(plans: List) -> dict:
47
198
  regions = {}
48
199
  for plan in plans:
49
200
  for region in plan.get('regions', []):
50
- regions[region['id']] = region['id']
201
+ regions[region] = region
51
202
  return regions
52
203
 
53
204
 
54
205
  def create_catalog(output_dir: str) -> None:
55
- response = requests.get(ENDPOINT)
206
+ with open(DEFAULT_FLUIDSTACK_API_KEY_PATH, 'r', encoding='UTF-8') as f:
207
+ api_key = f.read().strip()
208
+ response = requests.get(ENDPOINT, headers={'api-key': api_key})
56
209
  plans = response.json()
57
- #plans = [plan for plan in plans if len(plan['regions']) > 0]
58
- plans = [
59
- plan for plan in plans if plan['minimum_commitment'] == 'hourly' and
60
- plan['type'] in ['preconfigured'] and
61
- plan['gpu_type'] not in ['NO GPU', 'RTX_3080_10GB', 'RTX_3090_24GB']
62
- ]
63
210
 
64
211
  with open(os.path.join(output_dir, 'vms.csv'), mode='w',
65
212
  encoding='utf-8') as f:
@@ -81,39 +228,45 @@ def create_catalog(output_dir: str) -> None:
81
228
  except KeyError:
82
229
  #print(f'Could not map {plan["gpu_type"]}')
83
230
  continue
84
- gpu_memory = int(
85
- str(plan['configuration']['gpu_memory']).replace('GB',
86
- '')) * 1024
87
- gpu_cnt = int(plan['configuration']['gpu_count'])
88
- vcpus = float(plan['configuration']['core_count'])
89
- mem = float(plan['configuration']['ram'])
90
- price = float(plan['price']['hourly']) * gpu_cnt
91
- gpuinfo = {
92
- 'Gpus': [{
93
- 'Name': gpu,
94
- 'Manufacturer': 'NVIDIA',
95
- 'Count': gpu_cnt,
96
- 'MemoryInfo': {
97
- 'SizeInMiB': int(gpu_memory)
98
- },
99
- }],
100
- 'TotalGpuMemoryInMiB': int(gpu_memory * gpu_cnt),
101
- }
102
- gpuinfo = json.dumps(gpuinfo).replace('"', "'") # pylint: disable=invalid-string-quote
103
- for r in plan.get('regions', []):
104
- if r['id'] == 'india_2':
231
+ for gpu_cnt in plan['gpu_counts']:
232
+ gpu_memory = float(plan['gpu_type'].split('_')[-1].replace(
233
+ 'GB', '')) * 1024
234
+ try:
235
+ vcpus_mem = [
236
+ x for x in plan_vcpus_memory
237
+ if x['gpu_type'] == plan['gpu_type'] and
238
+ x['gpu_count'] == gpu_cnt
239
+ ][0]
240
+ vcpus = vcpus_mem['min_cpu_count']
241
+ mem = vcpus_mem['min_memory']
242
+ except IndexError:
105
243
  continue
106
- writer.writerow([
107
- plan['plan_id'],
108
- gpu,
109
- gpu_cnt,
110
- vcpus,
111
- mem,
112
- price,
113
- r['id'],
114
- gpuinfo,
115
- '',
116
- ])
244
+ price = float(plan['price_per_gpu_hr']) * gpu_cnt
245
+ gpuinfo = {
246
+ 'Gpus': [{
247
+ 'Name': gpu,
248
+ 'Manufacturer': 'NVIDIA',
249
+ 'Count': gpu_cnt,
250
+ 'MemoryInfo': {
251
+ 'SizeInMiB': int(gpu_memory)
252
+ },
253
+ }],
254
+ 'TotalGpuMemoryInMiB': int(gpu_memory * gpu_cnt),
255
+ }
256
+ gpuinfo = json.dumps(gpuinfo).replace('"', "'") # pylint: disable=invalid-string-quote
257
+ instance_type = f'{plan["gpu_type"]}::{gpu_cnt}'
258
+ for region in plan.get('regions', []):
259
+ writer.writerow([
260
+ instance_type,
261
+ gpu,
262
+ gpu_cnt,
263
+ vcpus,
264
+ mem,
265
+ price,
266
+ region,
267
+ gpuinfo,
268
+ '',
269
+ ])
117
270
 
118
271
 
119
272
  if __name__ == '__main__':