skypilot-nightly 1.0.0.dev20250916__py3-none-any.whl → 1.0.0.dev20250919__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (81) hide show
  1. sky/__init__.py +4 -2
  2. sky/adaptors/primeintellect.py +1 -0
  3. sky/adaptors/seeweb.py +68 -4
  4. sky/authentication.py +25 -0
  5. sky/backends/__init__.py +3 -2
  6. sky/backends/backend_utils.py +16 -12
  7. sky/backends/cloud_vm_ray_backend.py +57 -0
  8. sky/catalog/primeintellect_catalog.py +95 -0
  9. sky/clouds/__init__.py +2 -0
  10. sky/clouds/primeintellect.py +314 -0
  11. sky/core.py +77 -48
  12. sky/dashboard/out/404.html +1 -1
  13. sky/dashboard/out/_next/static/{y8s7LlyyfhMzpzCkxuD2r → VvaUqYDvHOcHZRnvMBmax}/_buildManifest.js +1 -1
  14. sky/dashboard/out/_next/static/chunks/1121-4ff1ec0dbc5792ab.js +1 -0
  15. sky/dashboard/out/_next/static/chunks/3015-88c7c8d69b0b6dba.js +1 -0
  16. sky/dashboard/out/_next/static/chunks/{6856-e0754534b3015377.js → 6856-9a2538f38c004652.js} +1 -1
  17. sky/dashboard/out/_next/static/chunks/8969-a39efbadcd9fde80.js +1 -0
  18. sky/dashboard/out/_next/static/chunks/9037-472ee1222cb1e158.js +6 -0
  19. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js +16 -0
  20. sky/dashboard/out/_next/static/chunks/pages/clusters/{[cluster]-0b4b35dc1dfe046c.js → [cluster]-9525660179df3605.js} +1 -1
  21. sky/dashboard/out/_next/static/chunks/{webpack-05f82d90d6fd7f82.js → webpack-b2a3938c22b6647b.js} +1 -1
  22. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  23. sky/dashboard/out/clusters/[cluster].html +1 -1
  24. sky/dashboard/out/clusters.html +1 -1
  25. sky/dashboard/out/config.html +1 -1
  26. sky/dashboard/out/index.html +1 -1
  27. sky/dashboard/out/infra/[context].html +1 -1
  28. sky/dashboard/out/infra.html +1 -1
  29. sky/dashboard/out/jobs/[job].html +1 -1
  30. sky/dashboard/out/jobs/pools/[pool].html +1 -1
  31. sky/dashboard/out/jobs.html +1 -1
  32. sky/dashboard/out/users.html +1 -1
  33. sky/dashboard/out/volumes.html +1 -1
  34. sky/dashboard/out/workspace/new.html +1 -1
  35. sky/dashboard/out/workspaces/[name].html +1 -1
  36. sky/dashboard/out/workspaces.html +1 -1
  37. sky/global_user_state.py +99 -62
  38. sky/jobs/server/server.py +14 -1
  39. sky/jobs/state.py +26 -1
  40. sky/metrics/utils.py +174 -8
  41. sky/provision/__init__.py +1 -0
  42. sky/provision/docker_utils.py +6 -2
  43. sky/provision/primeintellect/__init__.py +10 -0
  44. sky/provision/primeintellect/config.py +11 -0
  45. sky/provision/primeintellect/instance.py +454 -0
  46. sky/provision/primeintellect/utils.py +398 -0
  47. sky/resources.py +9 -1
  48. sky/schemas/generated/jobsv1_pb2.py +40 -40
  49. sky/schemas/generated/servev1_pb2.py +58 -0
  50. sky/schemas/generated/servev1_pb2.pyi +115 -0
  51. sky/schemas/generated/servev1_pb2_grpc.py +322 -0
  52. sky/serve/serve_rpc_utils.py +179 -0
  53. sky/serve/serve_utils.py +29 -12
  54. sky/serve/server/core.py +37 -19
  55. sky/serve/server/impl.py +221 -129
  56. sky/server/metrics.py +52 -158
  57. sky/server/requests/executor.py +12 -8
  58. sky/server/requests/payloads.py +6 -0
  59. sky/server/requests/requests.py +1 -1
  60. sky/server/requests/serializers/encoders.py +3 -2
  61. sky/server/server.py +5 -41
  62. sky/setup_files/dependencies.py +1 -0
  63. sky/skylet/constants.py +10 -5
  64. sky/skylet/job_lib.py +14 -15
  65. sky/skylet/services.py +98 -0
  66. sky/skylet/skylet.py +3 -1
  67. sky/templates/kubernetes-ray.yml.j2 +22 -12
  68. sky/templates/primeintellect-ray.yml.j2 +71 -0
  69. sky/utils/locks.py +41 -10
  70. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250919.dist-info}/METADATA +36 -35
  71. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250919.dist-info}/RECORD +76 -64
  72. sky/dashboard/out/_next/static/chunks/1121-408ed10b2f9fce17.js +0 -1
  73. sky/dashboard/out/_next/static/chunks/3015-2ea98b57e318bd6e.js +0 -1
  74. sky/dashboard/out/_next/static/chunks/8969-0487dfbf149d9e53.js +0 -1
  75. sky/dashboard/out/_next/static/chunks/9037-f9800e64eb05dd1c.js +0 -6
  76. sky/dashboard/out/_next/static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js +0 -16
  77. /sky/dashboard/out/_next/static/{y8s7LlyyfhMzpzCkxuD2r → VvaUqYDvHOcHZRnvMBmax}/_ssgManifest.js +0 -0
  78. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250919.dist-info}/WHEEL +0 -0
  79. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250919.dist-info}/entry_points.txt +0 -0
  80. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250919.dist-info}/licenses/LICENSE +0 -0
  81. {skypilot_nightly-1.0.0.dev20250916.dist-info → skypilot_nightly-1.0.0.dev20250919.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,314 @@
1
+ """ Prime Intellect Cloud. """
2
+ import json
3
+ import os
4
+ import typing
5
+ from typing import Dict, Iterator, List, Optional, Tuple, Union
6
+
7
+ from sky import catalog
8
+ from sky import clouds
9
+ from sky.provision.primeintellect import utils
10
+ from sky.utils import registry
11
+ from sky.utils import resources_utils
12
+
13
+ if typing.TYPE_CHECKING:
14
+ from sky import resources as resources_lib
15
+ from sky.utils import volume as volume_lib
16
+
17
+ CredentialCheckResult = Tuple[bool, Optional[Union[str, Dict[str, str]]]]
18
+
19
+ _CREDENTIAL_FILES = [
20
+ 'config.json',
21
+ ]
22
+
23
+
24
+ @registry.CLOUD_REGISTRY.register
25
+ class PrimeIntellect(clouds.Cloud):
26
+ """Prime Intellect GPU Cloud"""
27
+ _REPR = 'PrimeIntellect'
28
+ _CLOUD_UNSUPPORTED_FEATURES = {
29
+ clouds.CloudImplementationFeatures.AUTOSTOP: 'Stopping not supported.',
30
+ clouds.CloudImplementationFeatures.AUTODOWN:
31
+ ('Auto down not supported yet.'),
32
+ clouds.CloudImplementationFeatures.STOP: 'Stopping not supported.',
33
+ clouds.CloudImplementationFeatures.MULTI_NODE:
34
+ ('Multi-node not supported yet.'),
35
+ clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER:
36
+ ('Custom disk tier not supported yet.'),
37
+ clouds.CloudImplementationFeatures.CUSTOM_NETWORK_TIER:
38
+ ('Custom network tier not supported yet.'),
39
+ clouds.CloudImplementationFeatures.CUSTOM_MULTI_NETWORK:
40
+ ('Customized multiple network interfaces are not supported'),
41
+ clouds.CloudImplementationFeatures.IMAGE_ID:
42
+ ('Custom image not supported yet.'),
43
+ clouds.CloudImplementationFeatures.DOCKER_IMAGE:
44
+ ('Custom docker image not supported yet.'),
45
+ }
46
+ PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
47
+ STATUS_VERSION = clouds.StatusVersion.SKYPILOT
48
+ _MAX_CLUSTER_NAME_LEN_LIMIT = 120
49
+ _regions: List[clouds.Region] = []
50
+
51
+ @classmethod
52
+ def _cloud_unsupported_features(
53
+ cls) -> Dict[clouds.CloudImplementationFeatures, str]:
54
+ return cls._CLOUD_UNSUPPORTED_FEATURES
55
+
56
+ @classmethod
57
+ def _max_cluster_name_length(cls) -> Optional[int]:
58
+ return cls._MAX_CLUSTER_NAME_LEN_LIMIT
59
+
60
+ @classmethod
61
+ def regions_with_offering(
62
+ cls,
63
+ instance_type: str,
64
+ accelerators: Optional[Dict[str, int]],
65
+ use_spot: bool,
66
+ region: Optional[str],
67
+ zone: Optional[str],
68
+ ) -> List[clouds.Region]:
69
+ """Returns the regions that offer the specified resources."""
70
+ del accelerators
71
+ regions = catalog.get_region_zones_for_instance_type(
72
+ instance_type, use_spot, 'primeintellect')
73
+
74
+ if region is not None:
75
+ regions = [r for r in regions if r.name == region]
76
+ if zone is not None:
77
+ for r in regions:
78
+ assert r.zones is not None, r
79
+ r.set_zones([z for z in r.zones if z.name == zone])
80
+ regions = [r for r in regions if r.zones]
81
+ return regions
82
+
83
+ @classmethod
84
+ def get_vcpus_mem_from_instance_type(
85
+ cls,
86
+ instance_type: str,
87
+ ) -> Tuple[Optional[float], Optional[float]]:
88
+ """Returns the #vCPUs and memory that the instance type offers."""
89
+ return catalog.get_vcpus_mem_from_instance_type(instance_type,
90
+ clouds='primeintellect')
91
+
92
+ @classmethod
93
+ def zones_provision_loop(
94
+ cls,
95
+ *,
96
+ region: str,
97
+ num_nodes: int,
98
+ instance_type: str,
99
+ accelerators: Optional[Dict[str, int]] = None,
100
+ use_spot: bool = False,
101
+ ) -> Iterator[Optional[List['clouds.Zone']]]:
102
+ """Returns an iterator over zones for provisioning."""
103
+ regions = cls.regions_with_offering(instance_type,
104
+ accelerators,
105
+ use_spot,
106
+ region=region,
107
+ zone=None)
108
+ for r in regions:
109
+ assert r.zones is not None, r
110
+ yield r.zones
111
+
112
+ def instance_type_to_hourly_cost(self,
113
+ instance_type: str,
114
+ use_spot: bool,
115
+ region: Optional[str] = None,
116
+ zone: Optional[str] = None) -> float:
117
+ """Returns the cost, or the cheapest cost among all zones for spot."""
118
+ return catalog.get_hourly_cost(instance_type,
119
+ use_spot=use_spot,
120
+ region=region,
121
+ zone=zone,
122
+ clouds='primeintellect')
123
+
124
+ def accelerators_to_hourly_cost(self,
125
+ accelerators: Dict[str, int],
126
+ use_spot: bool,
127
+ region: Optional[str] = None,
128
+ zone: Optional[str] = None) -> float:
129
+ """Returns the cost, or the cheapest cost among all zones for spot."""
130
+ del accelerators, use_spot, region, zone # Unused.
131
+ return 0.0
132
+
133
+ def get_egress_cost(self, num_gigabytes: float) -> float:
134
+ return 0.0
135
+
136
+ def is_same_cloud(self, other: clouds.Cloud) -> bool:
137
+ return isinstance(other, PrimeIntellect)
138
+
139
+ @classmethod
140
+ def get_default_instance_type(cls,
141
+ cpus: Optional[str] = None,
142
+ memory: Optional[str] = None,
143
+ disk_tier: Optional[
144
+ resources_utils.DiskTier] = None,
145
+ region: Optional[str] = None,
146
+ zone: Optional[str] = None) -> Optional[str]:
147
+ """Returns the default instance type for Prime Intellect."""
148
+ return catalog.get_default_instance_type(cpus=cpus,
149
+ memory=memory,
150
+ disk_tier=disk_tier,
151
+ region=region,
152
+ zone=zone,
153
+ clouds='primeintellect')
154
+
155
+ @classmethod
156
+ def get_accelerators_from_instance_type(
157
+ cls, instance_type: str) -> Optional[Dict[str, Union[int, float]]]:
158
+ return catalog.get_accelerators_from_instance_type(
159
+ instance_type, clouds='primeintellect')
160
+
161
+ @classmethod
162
+ def get_zone_shell_cmd(cls) -> Optional[str]:
163
+ return None
164
+
165
+ def make_deploy_resources_variables(
166
+ self,
167
+ resources: 'resources_lib.Resources',
168
+ cluster_name: resources_utils.ClusterName,
169
+ region: 'clouds.Region',
170
+ zones: Optional[List['clouds.Zone']],
171
+ num_nodes: int,
172
+ dryrun: bool = False,
173
+ volume_mounts: Optional[List['volume_lib.VolumeMount']] = None
174
+ ) -> Dict[str, Optional[str]]:
175
+ del dryrun, cluster_name, num_nodes, volume_mounts
176
+ assert zones is not None, (region, zones)
177
+
178
+ resources = resources.assert_launchable()
179
+ acc_dict = self.get_accelerators_from_instance_type(
180
+ resources.instance_type)
181
+ if acc_dict is not None:
182
+ custom_resources = json.dumps(acc_dict, separators=(',', ':'))
183
+ else:
184
+ custom_resources = None
185
+
186
+ return {
187
+ 'instance_type': resources.instance_type,
188
+ 'custom_resources': custom_resources,
189
+ 'region': region.name,
190
+ 'zones': zones[0].name,
191
+ 'availability_zone': zones[0].name,
192
+ }
193
+
194
+ def _get_feasible_launchable_resources(
195
+ self, resources: 'resources_lib.Resources'
196
+ ) -> 'resources_utils.FeasibleResources':
197
+ """Returns a list of feasible resources for the given resources."""
198
+ if resources.instance_type is not None:
199
+ assert resources.is_launchable(), resources
200
+ resources = resources.copy(accelerators=None)
201
+ return resources_utils.FeasibleResources([resources], [], None)
202
+
203
+ def _make(instance_list):
204
+ resource_list = []
205
+ for instance_type in instance_list:
206
+ r = resources.copy(
207
+ cloud=PrimeIntellect(),
208
+ instance_type=instance_type,
209
+ accelerators=None,
210
+ cpus=None,
211
+ )
212
+ resource_list.append(r)
213
+ return resource_list
214
+
215
+ # Currently, handle a filter on accelerators only.
216
+ accelerators = resources.accelerators
217
+ if accelerators is None:
218
+ default_instance_type = PrimeIntellect.get_default_instance_type(
219
+ cpus=resources.cpus,
220
+ memory=resources.memory,
221
+ disk_tier=resources.disk_tier)
222
+ if default_instance_type is None:
223
+ # TODO(pokgak): Add hints to all return values in this method
224
+ # to help users understand why the resources are not
225
+ # launchable.
226
+ return resources_utils.FeasibleResources([], [], None)
227
+ else:
228
+ return resources_utils.FeasibleResources(
229
+ _make([default_instance_type]), [], None)
230
+
231
+ assert len(accelerators) == 1, resources
232
+ acc, acc_count = list(accelerators.items())[0]
233
+ (instance_list,
234
+ fuzzy_candidate_list) = catalog.get_instance_type_for_accelerator(
235
+ acc,
236
+ acc_count,
237
+ use_spot=resources.use_spot,
238
+ cpus=resources.cpus,
239
+ region=resources.region,
240
+ zone=resources.zone,
241
+ clouds='primeintellect')
242
+ if instance_list is None:
243
+ return resources_utils.FeasibleResources([], fuzzy_candidate_list,
244
+ None)
245
+ return resources_utils.FeasibleResources(_make(instance_list),
246
+ fuzzy_candidate_list, None)
247
+
248
+ @classmethod
249
+ def _check_credentials(cls) -> Tuple[bool, Optional[str]]:
250
+ """Verify that the user has valid credentials for Prime Intellect."""
251
+
252
+ primeintellect_config_file = '~/.prime/config.json'
253
+ if not os.path.isfile(os.path.expanduser(primeintellect_config_file)):
254
+ return (False, f'{primeintellect_config_file} does not exist.')
255
+
256
+ with open(os.path.expanduser(primeintellect_config_file),
257
+ encoding='UTF-8') as f:
258
+ data = json.load(f)
259
+ api_key = data.get('api_key')
260
+ if not api_key:
261
+ print('API key is missing or empty')
262
+
263
+ client = utils.PrimeIntellectAPIClient()
264
+ try:
265
+ client.list_instances()
266
+ except utils.PrimeintellectAPIError as e:
267
+ if e.status_code == 403:
268
+ return False, (
269
+ 'Please check that your API key has the correct '
270
+ 'permissions, generate a new one at '
271
+ 'https://app.primeintellect.ai/dashboard/tokens, '
272
+ 'or run \'prime login\' to configure a new API key.')
273
+ return True, None
274
+
275
+ @classmethod
276
+ def _check_compute_credentials(cls) -> CredentialCheckResult:
277
+ """Checks if the user has access credentials to Prime Intellect's
278
+ compute service."""
279
+ return cls._check_credentials()
280
+
281
+ def get_credential_file_mounts(self) -> Dict[str, str]:
282
+ """Returns a dict of credential file paths to mount paths."""
283
+ return {
284
+ f'~/.prime/{filename}': f'~/.prime/{filename}'
285
+ for filename in _CREDENTIAL_FILES
286
+ }
287
+
288
+ @classmethod
289
+ def get_current_user_identity(cls) -> Optional[List[str]]:
290
+ return None
291
+
292
+ def instance_type_exists(self, instance_type: str) -> bool:
293
+ return catalog.instance_type_exists(instance_type, 'primeintellect')
294
+
295
+ def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
296
+ return catalog.validate_region_zone(region,
297
+ zone,
298
+ clouds='primeintellect')
299
+
300
+ @classmethod
301
+ def _unsupported_features_for_resources(
302
+ cls, resources: 'resources_lib.Resources'
303
+ ) -> Dict[clouds.CloudImplementationFeatures, str]:
304
+ """The features not supported based on the resources provided.
305
+
306
+ This method is used by check_features_are_supported() to check if the
307
+ cloud implementation supports all the requested features.
308
+
309
+ Returns:
310
+ A dict of {feature: reason} for the features not supported by the
311
+ cloud implementation.
312
+ """
313
+ del resources # unused
314
+ return cls._CLOUD_UNSUPPORTED_FEATURES
sky/core.py CHANGED
@@ -182,9 +182,16 @@ def status(
182
182
  cluster_names=cluster_names,
183
183
  all_users=all_users,
184
184
  include_credentials=include_credentials)
185
- return [
186
- responses.StatusResponse.model_validate(cluster) for cluster in clusters
187
- ]
185
+
186
+ status_responses = []
187
+ for cluster in clusters:
188
+ try:
189
+ status_responses.append(
190
+ responses.StatusResponse.model_validate(cluster))
191
+ except Exception as e: # pylint: disable=broad-except
192
+ logger.warning('Failed to validate status responses for cluster '
193
+ f'{cluster.get("name")}: {e}')
194
+ return status_responses
188
195
 
189
196
 
190
197
  def status_kubernetes(
@@ -293,7 +300,10 @@ def endpoints(cluster: str,
293
300
 
294
301
 
295
302
  @usage_lib.entrypoint
296
- def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
303
+ def cost_report(
304
+ days: Optional[int] = None,
305
+ dashboard_summary_response: bool = False,
306
+ cluster_hashes: Optional[List[str]] = None) -> List[Dict[str, Any]]:
297
307
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
298
308
  """Get all cluster cost reports, including those that have been downed.
299
309
 
@@ -339,7 +349,12 @@ def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
339
349
  if days is None:
340
350
  days = constants.COST_REPORT_DEFAULT_DAYS
341
351
 
342
- cluster_reports = global_user_state.get_clusters_from_history(days=days)
352
+ abbreviate_response = dashboard_summary_response and cluster_hashes is None
353
+
354
+ cluster_reports = global_user_state.get_clusters_from_history(
355
+ days=days,
356
+ abbreviate_response=abbreviate_response,
357
+ cluster_hashes=cluster_hashes)
343
358
  logger.debug(
344
359
  f'{len(cluster_reports)} clusters found from history with {days} days.')
345
360
 
@@ -357,43 +372,6 @@ def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
357
372
  cost = (launched_resources.get_cost(duration) * launched_nodes)
358
373
  return cost
359
374
 
360
- def _update_record_with_resources(record: Dict[str, Any]) -> None:
361
- """Add resource fields for dashboard compatibility."""
362
- if record is None:
363
- return
364
- resources = record.get('resources')
365
- if resources is None:
366
- return
367
- fields = ['cloud', 'region', 'cpus', 'memory', 'accelerators']
368
- for field in fields:
369
- try:
370
- record[field] = str(getattr(resources, field))
371
- except Exception as e: # pylint: disable=broad-except
372
- # Ok to skip the fields as this is just for display
373
- # purposes.
374
- logger.debug(f'Failed to get resources.{field} for cluster '
375
- f'{record["name"]}: {str(e)}')
376
- record[field] = None
377
-
378
- # Add resources_str and resources_str_full for dashboard
379
- # compatibility
380
- num_nodes = record.get('num_nodes', 1)
381
- try:
382
- resource_str_simple = resources_utils.format_resource(
383
- resources, simplify=True)
384
- resource_str_full = resources_utils.format_resource(
385
- resources, simplify=False)
386
- record['resources_str'] = f'{num_nodes}x{resource_str_simple}'
387
- record[
388
- 'resources_str_full'] = f'{num_nodes}x{resource_str_full}'
389
- except Exception as e: # pylint: disable=broad-except
390
- logger.debug(f'Failed to get resources_str for cluster '
391
- f'{record["name"]}: {str(e)}')
392
- for field in fields:
393
- record[field] = None
394
- record['resources_str'] = '-'
395
- record['resources_str_full'] = '-'
396
-
397
375
  try:
398
376
  report['total_cost'] = get_total_cost(report)
399
377
  except Exception as e: # pylint: disable=broad-except
@@ -402,17 +380,66 @@ def cost_report(days: Optional[int] = None) -> List[Dict[str, Any]]:
402
380
  f'{report["name"]}: {str(e)}')
403
381
  report['total_cost'] = 0.0
404
382
 
405
- _update_record_with_resources(report)
406
383
  return report
407
384
 
408
385
  # Process clusters in parallel
409
386
  if not cluster_reports:
410
387
  return []
411
388
 
412
- processed_reports = subprocess_utils.run_in_parallel(
413
- _process_cluster_report, cluster_reports)
389
+ if not abbreviate_response:
390
+ cluster_reports = subprocess_utils.run_in_parallel(
391
+ _process_cluster_report, cluster_reports)
392
+
393
+ def _update_record_with_resources(record: Dict[str, Any]) -> None:
394
+ """Add resource fields for dashboard compatibility."""
395
+ if record is None:
396
+ return
397
+ resources = record.get('resources')
398
+ if resources is None:
399
+ return
400
+ if not dashboard_summary_response:
401
+ fields = ['cloud', 'region', 'cpus', 'memory', 'accelerators']
402
+ else:
403
+ fields = ['cloud']
404
+ for field in fields:
405
+ try:
406
+ record[field] = str(getattr(resources, field))
407
+ except Exception as e: # pylint: disable=broad-except
408
+ # Ok to skip the fields as this is just for display
409
+ # purposes.
410
+ logger.debug(f'Failed to get resources.{field} for cluster '
411
+ f'{record["name"]}: {str(e)}')
412
+ record[field] = None
413
+
414
+ # Add resources_str and resources_str_full for dashboard
415
+ # compatibility
416
+ num_nodes = record.get('num_nodes', 1)
417
+ try:
418
+ resource_str_simple = resources_utils.format_resource(resources,
419
+ simplify=True)
420
+ record['resources_str'] = f'{num_nodes}x{resource_str_simple}'
421
+ if not abbreviate_response:
422
+ resource_str_full = resources_utils.format_resource(
423
+ resources, simplify=False)
424
+ record[
425
+ 'resources_str_full'] = f'{num_nodes}x{resource_str_full}'
426
+ except Exception as e: # pylint: disable=broad-except
427
+ logger.debug(f'Failed to get resources_str for cluster '
428
+ f'{record["name"]}: {str(e)}')
429
+ for field in fields:
430
+ record[field] = None
431
+ record['resources_str'] = '-'
432
+ if not abbreviate_response:
433
+ record['resources_str_full'] = '-'
434
+
435
+ for report in cluster_reports:
436
+ _update_record_with_resources(report)
437
+ if dashboard_summary_response:
438
+ report.pop('usage_intervals')
439
+ report.pop('user_hash')
440
+ report.pop('resources')
414
441
 
415
- return processed_reports
442
+ return cluster_reports
416
443
 
417
444
 
418
445
  def _start(
@@ -840,8 +867,10 @@ def queue(cluster_name: str,
840
867
  'submitted_at': job_info.submitted_at,
841
868
  'status': job_lib.JobStatus.from_protobuf(job_info.status),
842
869
  'run_timestamp': job_info.run_timestamp,
843
- 'start_at': job_info.start_at,
844
- 'end_at': job_info.end_at,
870
+ 'start_at': job_info.start_at
871
+ if job_info.HasField('start_at') else None,
872
+ 'end_at': job_info.end_at
873
+ if job_info.HasField('end_at') else None,
845
874
  'resources': job_info.resources,
846
875
  'log_path': job_info.log_path,
847
876
  'user_hash': job_info.username,
@@ -1 +1 @@
1
- <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-05f82d90d6fd7f82.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/y8s7LlyyfhMzpzCkxuD2r/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/y8s7LlyyfhMzpzCkxuD2r/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"y8s7LlyyfhMzpzCkxuD2r","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
1
+ <!DOCTYPE html><html><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width"/><meta name="next-head-count" content="2"/><link rel="preload" href="/dashboard/_next/static/css/4614e06482d7309e.css" as="style"/><link rel="stylesheet" href="/dashboard/_next/static/css/4614e06482d7309e.css" data-n-g=""/><noscript data-n-css=""></noscript><script defer="" nomodule="" src="/dashboard/_next/static/chunks/polyfills-78c92fac7aa8fdd8.js"></script><script src="/dashboard/_next/static/chunks/webpack-b2a3938c22b6647b.js" defer=""></script><script src="/dashboard/_next/static/chunks/framework-cf60a09ccd051a10.js" defer=""></script><script src="/dashboard/_next/static/chunks/main-f15ccb73239a3bf1.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_app-ce361c6959bc2001.js" defer=""></script><script src="/dashboard/_next/static/chunks/pages/_error-c66a4e8afc46f17b.js" defer=""></script><script src="/dashboard/_next/static/VvaUqYDvHOcHZRnvMBmax/_buildManifest.js" defer=""></script><script src="/dashboard/_next/static/VvaUqYDvHOcHZRnvMBmax/_ssgManifest.js" defer=""></script></head><body><div id="__next"></div><script id="__NEXT_DATA__" type="application/json">{"props":{"pageProps":{"statusCode":404}},"page":"/_error","query":{},"buildId":"VvaUqYDvHOcHZRnvMBmax","assetPrefix":"/dashboard","nextExport":true,"isFallback":false,"gip":true,"scriptLoader":[]}</script></body></html>
@@ -1 +1 @@
1
- self.__BUILD_MANIFEST=function(s,c,e,a,t,f,b,u,n,o,j,i,r,k){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-444f1804401f04ea.js"],"/_error":["static/chunks/pages/_error-c66a4e8afc46f17b.js"],"/clusters":["static/chunks/pages/clusters-469814d711d63b1b.js"],"/clusters/[cluster]":[s,c,e,f,b,"static/chunks/4676-9da7fdbde90b5549.js",o,a,t,u,j,n,i,"static/chunks/6856-e0754534b3015377.js",r,k,"static/chunks/9037-f9800e64eb05dd1c.js","static/chunks/pages/clusters/[cluster]-0b4b35dc1dfe046c.js"],"/clusters/[cluster]/[job]":[s,c,e,f,a,t,n,"static/chunks/pages/clusters/[cluster]/[job]-1cbba24bd1bd35f8.js"],"/config":["static/chunks/pages/config-dfb9bf07b13045f4.js"],"/infra":["static/chunks/pages/infra-aabba60d57826e0f.js"],"/infra/[context]":["static/chunks/pages/infra/[context]-6563820e094f68ca.js"],"/jobs":["static/chunks/pages/jobs-1f70d9faa564804f.js"],"/jobs/pools/[pool]":[s,c,e,b,o,a,t,u,"static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js"],"/jobs/[job]":[s,c,e,f,b,o,a,t,u,n,"static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js"],"/users":["static/chunks/pages/users-018bf31cda52e11b.js"],"/volumes":["static/chunks/pages/volumes-739726d6b823f532.js"],"/workspace/new":["static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js"],"/workspaces":["static/chunks/pages/workspaces-7528cc0ef8c522c5.js"],"/workspaces/[name]":[s,c,e,f,b,"static/chunks/1836-37fede578e2da5f8.js",a,t,u,j,n,i,r,k,"static/chunks/1141-159df2d4c441a9d1.js","static/chunks/pages/workspaces/[name]-af76bb06dbb3954f.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/config","/infra","/infra/[context]","/jobs","/jobs/pools/[pool]","/jobs/[job]","/users","/volumes","/workspace/new","/workspaces","/workspaces/[name]"]}}("static/chunks/616-3d59f75e2ccf9321.js","static/chunks/6130-2be46d70a38f1e82.js","static/chunks/5739-d67458fcb1386c92.js","static/chunks/6989-01359c57e018caa4.js","static/chunks/3850-ff4a9a69d978632b.js","static/chunks/7411-b15471acd2cba716.js","static/chunks/1272-1ef0bf0237faccdb.js","static/chunks/8969-0487dfbf149d9e53.js","static/chunks/6135-4b4d5e824b7f9d3c.js","static/chunks/754-d0da8ab45f9509e9.js","static/chunks/6990-f6818c84ed8f1c86.js","static/chunks/1121-408ed10b2f9fce17.js","static/chunks/6601-06114c982db410b6.js","static/chunks/3015-2ea98b57e318bd6e.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
1
+ self.__BUILD_MANIFEST=function(s,c,a,e,t,f,u,n,b,o,j,i,r,d){return{__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/":["static/chunks/pages/index-444f1804401f04ea.js"],"/_error":["static/chunks/pages/_error-c66a4e8afc46f17b.js"],"/clusters":["static/chunks/pages/clusters-469814d711d63b1b.js"],"/clusters/[cluster]":[s,c,a,f,u,"static/chunks/4676-9da7fdbde90b5549.js",o,e,t,n,j,b,i,"static/chunks/6856-9a2538f38c004652.js",r,d,"static/chunks/9037-472ee1222cb1e158.js","static/chunks/pages/clusters/[cluster]-9525660179df3605.js"],"/clusters/[cluster]/[job]":[s,c,a,f,e,t,b,"static/chunks/pages/clusters/[cluster]/[job]-1e9248ddbddcd122.js"],"/config":["static/chunks/pages/config-dfb9bf07b13045f4.js"],"/infra":["static/chunks/pages/infra-aabba60d57826e0f.js"],"/infra/[context]":["static/chunks/pages/infra/[context]-6563820e094f68ca.js"],"/jobs":["static/chunks/pages/jobs-1f70d9faa564804f.js"],"/jobs/pools/[pool]":[s,c,a,u,o,e,t,n,"static/chunks/pages/jobs/pools/[pool]-07349868f7905d37.js"],"/jobs/[job]":[s,c,a,f,u,o,e,t,n,b,"static/chunks/pages/jobs/[job]-dd64309c3fe67ed2.js"],"/users":["static/chunks/pages/users-018bf31cda52e11b.js"],"/volumes":["static/chunks/pages/volumes-739726d6b823f532.js"],"/workspace/new":["static/chunks/pages/workspace/new-3f88a1c7e86a3f86.js"],"/workspaces":["static/chunks/pages/workspaces-7528cc0ef8c522c5.js"],"/workspaces/[name]":[s,c,a,f,u,"static/chunks/1836-37fede578e2da5f8.js",e,t,n,j,b,i,r,d,"static/chunks/1141-159df2d4c441a9d1.js","static/chunks/pages/workspaces/[name]-af76bb06dbb3954f.js"],sortedPages:["/","/_app","/_error","/clusters","/clusters/[cluster]","/clusters/[cluster]/[job]","/config","/infra","/infra/[context]","/jobs","/jobs/pools/[pool]","/jobs/[job]","/users","/volumes","/workspace/new","/workspaces","/workspaces/[name]"]}}("static/chunks/616-3d59f75e2ccf9321.js","static/chunks/6130-2be46d70a38f1e82.js","static/chunks/5739-d67458fcb1386c92.js","static/chunks/6989-01359c57e018caa4.js","static/chunks/3850-ff4a9a69d978632b.js","static/chunks/7411-b15471acd2cba716.js","static/chunks/1272-1ef0bf0237faccdb.js","static/chunks/8969-a39efbadcd9fde80.js","static/chunks/6135-4b4d5e824b7f9d3c.js","static/chunks/754-d0da8ab45f9509e9.js","static/chunks/6990-f6818c84ed8f1c86.js","static/chunks/1121-4ff1ec0dbc5792ab.js","static/chunks/6601-06114c982db410b6.js","static/chunks/3015-88c7c8d69b0b6dba.js"),self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
@@ -0,0 +1 @@
1
+ "use strict";(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[1121],{50326:function(e,t,a){a.d(t,{$N:function(){return _},Be:function(){return h},Vq:function(){return c},cN:function(){return m},cZ:function(){return d},fK:function(){return f}});var r=a(85893),s=a(67294),o=a(6327),n=a(32350),l=a(43767);let c=o.fC;o.xz;let u=o.h_;o.x8;let i=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.aV,{ref:t,className:(0,n.cn)("fixed inset-0 z-50 bg-black/50 backdrop-blur-sm data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0",a),...s})});i.displayName=o.aV.displayName;let d=s.forwardRef((e,t)=>{let{className:a,children:s,...c}=e;return(0,r.jsxs)(u,{children:[(0,r.jsx)(i,{}),(0,r.jsxs)(o.VY,{ref:t,className:(0,n.cn)("fixed left-[50%] top-[50%] z-50 grid w-full max-w-lg translate-x-[-50%] translate-y-[-50%] gap-4 border border-gray-200 bg-white p-6 shadow-lg duration-200 data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[state=closed]:slide-out-to-left-1/2 data-[state=closed]:slide-out-to-top-[48%] data-[state=open]:slide-in-from-left-1/2 data-[state=open]:slide-in-from-top-[48%] sm:rounded-lg",a),...c,children:[s,(0,r.jsxs)(o.x8,{className:"absolute right-4 top-4 rounded-sm opacity-70 ring-offset-white transition-opacity hover:opacity-100 focus:outline-none focus:ring-2 focus:ring-gray-400 focus:ring-offset-2 disabled:pointer-events-none data-[state=open]:bg-gray-100 data-[state=open]:text-gray-500",children:[(0,r.jsx)(l.Z,{className:"h-4 w-4"}),(0,r.jsx)("span",{className:"sr-only",children:"Close"})]})]})]})});d.displayName=o.VY.displayName;let f=e=>{let{className:t,...a}=e;return(0,r.jsx)("div",{className:(0,n.cn)("flex flex-col space-y-1.5 text-center sm:text-left",t),...a})};f.displayName="DialogHeader";let m=e=>{let{className:t,...a}=e;return(0,r.jsx)("div",{className:(0,n.cn)("flex flex-col-reverse sm:flex-row sm:justify-end sm:space-x-2",t),...a})};m.displayName="DialogFooter";let _=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.Dx,{ref:t,className:(0,n.cn)("text-lg font-semibold leading-none tracking-tight",a),...s})});_.displayName=o.Dx.displayName;let h=s.forwardRef((e,t)=>{let{className:a,...s}=e;return(0,r.jsx)(o.dk,{ref:t,className:(0,n.cn)("text-sm text-gray-500",a),...s})});h.displayName=o.dk.displayName},23266:function(e,t,a){a.d(t,{GH:function(){return f},QL:function(){return _},Sl:function(){return d},getClusters:function(){return u},uR:function(){return i}});var r=a(67294),s=a(15821),o=a(47145),n=a(93225),l=a(6378);let c={UP:"RUNNING",STOPPED:"STOPPED",INIT:"LAUNCHING",null:"TERMINATED"};async function u(){let{clusterNames:e=null}=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{};try{return(await o.x.fetch("/status",{cluster_names:e,all_users:!0,include_credentials:!1})).map(e=>{let t="",a=t=e.zone?e.zone:e.region;return t&&t.length>25&&(t=function(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:15;if(!e||e.length<=t)return e;if(t<=3)return"...";let a=Math.floor((t-3)/2),r=a+(t-3)%2;return 0===a?e.substring(0,r)+"...":e.substring(0,r)+"..."+e.substring(e.length-a)}(t,25)),{status:c[e.status],cluster:e.name,user:e.user_name,user_hash:e.user_hash,cluster_hash:e.cluster_hash,cloud:e.cloud,region:e.region,infra:t?e.cloud+" ("+t+")":e.cloud,full_infra:a?"".concat(e.cloud," (").concat(a,")"):e.cloud,cpus:e.cpus,mem:e.memory,gpus:e.accelerators,resources_str:e.resources_str,resources_str_full:e.resources_str_full,time:new Date(1e3*e.launched_at),num_nodes:e.nodes,workspace:e.workspace,autostop:e.autostop,last_event:e.last_event,to_down:e.to_down,jobs:[],command:e.last_creation_command||e.last_use,task_yaml:e.last_creation_yaml||"{}",events:[{time:new Date(1e3*e.launched_at),event:"Cluster created."}]}})}catch(e){return console.error("Error fetching clusters:",e),[]}}async function i(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:null;try{let t={days:30,dashboard_summary_response:!0};e&&(t.cluster_hashes=[e]);let a=await o.x.fetch("/cost_report",t);console.log("Raw cluster history data:",a);let r=a.map(e=>{let t="Unknown";e.cloud?t=e.cloud:e.resources&&e.resources.cloud&&(t=e.resources.cloud);let a=e.user_name||"-";return{status:e.status?c[e.status]:"TERMINATED",cluster:e.name,user:a,user_hash:e.user_hash,cluster_hash:e.cluster_hash,cloud:t,region:"",infra:t,full_infra:t,resources_str:e.resources_str,resources_str_full:e.resources_str_full,time:e.launched_at?new Date(1e3*e.launched_at):null,num_nodes:e.num_nodes||1,duration:e.duration,total_cost:e.total_cost,workspace:e.workspace||"default",autostop:-1,last_event:e.last_event,to_down:!1,usage_intervals:e.usage_intervals,command:e.last_creation_command||"",task_yaml:e.last_creation_yaml||"{}",events:[{time:e.launched_at?new Date(1e3*e.launched_at):new Date,event:"Cluster created."}]}});return console.log("Processed cluster history data:",r),r}catch(e){return console.error("Error fetching cluster history:",e),[]}}async function d(e){let{clusterName:t,jobId:a,onNewLog:r,workspace:n}=e;try{await o.x.stream("/logs",{follow:!1,cluster_name:t,job_id:a,tail:1e4,override_skypilot_config:{active_workspace:n||"default"}},r)}catch(e){console.error("Error in streamClusterJobLogs:",e),(0,s.C)("Error in streamClusterJobLogs: ".concat(e.message),"error")}}async function f(e){let{clusterName:t,jobIds:a=null,workspace:r}=e;try{let e=await o.x.fetch("/download_logs",{cluster_name:t,job_ids:a?a.map(String):null,override_skypilot_config:{active_workspace:r||"default"}}),l=Object.values(e||{});if(!l.length){(0,s.C)("No logs found to download.","warning");return}let c=window.location.origin,u="".concat(c).concat(n.f4,"/download"),i=await fetch("".concat(u,"?relative=items"),{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({folder_paths:l})});if(!i.ok){let e=await i.text();throw Error("Download failed: ".concat(i.status," ").concat(e))}let d=await i.blob(),f=window.URL.createObjectURL(d),m=document.createElement("a"),_=new Date().toISOString().replace(/[:.]/g,"-"),h=a&&1===a.length?"job-".concat(a[0]):"jobs";m.href=f,m.download="".concat(t,"-").concat(h,"-logs-").concat(_,".zip"),document.body.appendChild(m),m.click(),m.remove(),window.URL.revokeObjectURL(f)}catch(e){console.error("Error downloading logs:",e),(0,s.C)("Error downloading logs: ".concat(e.message),"error")}}async function m(e){let{clusterName:t,workspace:a}=e;try{return(await o.x.fetch("/queue",{cluster_name:t,all_users:!0,override_skypilot_config:{active_workspace:a}})).map(e=>{var r;let s=e.end_at?e.end_at:Date.now()/1e3,o=0,n=0;return e.submitted_at&&(o=s-e.submitted_at),e.start_at&&(n=s-e.start_at),{id:e.job_id,status:e.status,job:e.job_name,user:e.username,user_hash:e.user_hash,gpus:e.accelerators||{},submitted_at:e.submitted_at?new Date(1e3*e.submitted_at):null,resources:e.resources,cluster:t,total_duration:o,job_duration:n,infra:"",logs:"",workspace:a||"default",git_commit:(null===(r=e.metadata)||void 0===r?void 0:r.git_commit)||"-"}})}catch(e){return console.error("Error fetching cluster jobs:",e),[]}}function _(e){let{cluster:t,job:a=null}=e,[s,o]=(0,r.useState)(null),[n,c]=(0,r.useState)(null),[i,d]=(0,r.useState)(!0),[f,_]=(0,r.useState)(!0),h=(0,r.useCallback)(async()=>{if(t)try{d(!0);let e=await l.default.get(u,[{clusterNames:[t]}]);return o(e[0]),e[0]}catch(e){console.error("Error fetching cluster data:",e)}finally{d(!1)}return null},[t]),g=(0,r.useCallback)(async e=>{if(t)try{_(!0);let a=await l.default.get(m,[{clusterName:t,workspace:e||"default"}]);c(a)}catch(e){console.error("Error fetching cluster job data:",e)}finally{_(!1)}},[t]),p=(0,r.useCallback)(async()=>{l.default.invalidate(u,[{clusterNames:[t]}]);let e=await h();e&&(l.default.invalidate(m,[{clusterName:t,workspace:e.workspace||"default"}]),await g(e.workspace))},[h,g,t]),w=(0,r.useCallback)(async()=>{s&&(l.default.invalidate(m,[{clusterName:t,workspace:s.workspace||"default"}]),await g(s.workspace))},[g,s,t]);return(0,r.useEffect)(()=>{(async()=>{let e=await h();e&&g(e.workspace)})()},[t,a,h,g]),{clusterData:s,clusterJobData:n,loading:i,clusterDetailsLoading:i,clusterJobsLoading:f,refreshData:p,refreshClusterJobsOnly:w}}},53081:function(e,t,a){a.d(t,{R:function(){return s}}),a(23266),a(68969);var r=a(47145);async function s(){try{let e=await r.x.get("/users");if(!e.ok)throw Error("HTTP error! status: ".concat(e.status));return(await e.json()).map(e=>({userId:e.id,username:e.name,role:e.role,created_at:e.created_at}))||[]}catch(e){return console.error("Failed to fetch users:",e),[]}}}}]);