skypilot-nightly 1.0.0.dev20240925__py3-none-any.whl → 1.0.0.dev20240927__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,42 +28,6 @@ TAG_RAY_CLUSTER_NAME = 'ray-cluster-name'
28
28
  TAG_SKYPILOT_CLUSTER_NAME = 'skypilot-cluster-name'
29
29
  TAG_POD_INITIALIZED = 'skypilot-initialized'
30
30
 
31
- POD_STATUSES = {
32
- 'Pending', 'Running', 'Succeeded', 'Failed', 'Unknown', 'Terminating'
33
- }
34
-
35
-
36
- def to_label_selector(tags):
37
- label_selector = ''
38
- for k, v in tags.items():
39
- if label_selector != '':
40
- label_selector += ','
41
- label_selector += '{}={}'.format(k, v)
42
- return label_selector
43
-
44
-
45
- def _filter_pods(namespace: str, context: str, tag_filters: Dict[str, str],
46
- status_filters: Optional[List[str]]) -> Dict[str, Any]:
47
- """Filters pods by tags and status."""
48
- non_included_pod_statuses = POD_STATUSES.copy()
49
-
50
- field_selector = ''
51
- if status_filters is not None:
52
- non_included_pod_statuses -= set(status_filters)
53
- field_selector = ','.join(
54
- [f'status.phase!={status}' for status in non_included_pod_statuses])
55
-
56
- label_selector = to_label_selector(tag_filters)
57
- pod_list = kubernetes.core_api(context).list_namespaced_pod(
58
- namespace, field_selector=field_selector, label_selector=label_selector)
59
-
60
- # Don't return pods marked for deletion,
61
- # i.e. pods with non-null metadata.DeletionTimestamp.
62
- pods = [
63
- pod for pod in pod_list.items if pod.metadata.deletion_timestamp is None
64
- ]
65
- return {pod.metadata.name: pod for pod in pods}
66
-
67
31
 
68
32
  def _get_head_pod_name(pods: Dict[str, Any]) -> Optional[str]:
69
33
  head_pod_name = None
@@ -475,7 +439,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
475
439
  pod_spec['metadata']['labels'].update(
476
440
  {TAG_SKYPILOT_CLUSTER_NAME: cluster_name_on_cloud})
477
441
 
478
- terminating_pods = _filter_pods(namespace, context, tags, ['Terminating'])
442
+ terminating_pods = kubernetes_utils.filter_pods(namespace, context, tags,
443
+ ['Terminating'])
479
444
  start_time = time.time()
480
445
  while (len(terminating_pods) > 0 and
481
446
  time.time() - start_time < _TIMEOUT_FOR_POD_TERMINATION):
@@ -483,8 +448,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
483
448
  'terminating pods. Waiting them to finish: '
484
449
  f'{list(terminating_pods.keys())}')
485
450
  time.sleep(POLL_INTERVAL)
486
- terminating_pods = _filter_pods(namespace, context, tags,
487
- ['Terminating'])
451
+ terminating_pods = kubernetes_utils.filter_pods(namespace, context,
452
+ tags, ['Terminating'])
488
453
 
489
454
  if len(terminating_pods) > 0:
490
455
  # If there are still terminating pods, we force delete them.
@@ -501,8 +466,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
501
466
  _request_timeout=config_lib.DELETION_TIMEOUT,
502
467
  grace_period_seconds=0)
503
468
 
504
- running_pods = _filter_pods(namespace, context, tags,
505
- ['Pending', 'Running'])
469
+ running_pods = kubernetes_utils.filter_pods(namespace, context, tags,
470
+ ['Pending', 'Running'])
506
471
  head_pod_name = _get_head_pod_name(running_pods)
507
472
  logger.debug(f'Found {len(running_pods)} existing pods: '
508
473
  f'{list(running_pods.keys())}')
@@ -583,7 +548,8 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
583
548
  if head_pod_name is None:
584
549
  head_pod_name = pod.metadata.name
585
550
 
586
- wait_pods_dict = _filter_pods(namespace, context, tags, ['Pending'])
551
+ wait_pods_dict = kubernetes_utils.filter_pods(namespace, context, tags,
552
+ ['Pending'])
587
553
  wait_pods = list(wait_pods_dict.values())
588
554
 
589
555
  networking_mode = network_utils.get_networking_mode(
@@ -613,8 +579,9 @@ def _create_pods(region: str, cluster_name_on_cloud: str,
613
579
  logger.debug(f'run_instances: all pods are scheduled and running: '
614
580
  f'{list(wait_pods_dict.keys())}')
615
581
 
616
- running_pods = _filter_pods(namespace, context, tags, ['Running'])
617
- initialized_pods = _filter_pods(namespace, context, {
582
+ running_pods = kubernetes_utils.filter_pods(namespace, context, tags,
583
+ ['Running'])
584
+ initialized_pods = kubernetes_utils.filter_pods(namespace, context, {
618
585
  TAG_POD_INITIALIZED: 'true',
619
586
  **tags
620
587
  }, ['Running'])
@@ -722,7 +689,7 @@ def terminate_instances(
722
689
  tag_filters = {
723
690
  TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud,
724
691
  }
725
- pods = _filter_pods(namespace, context, tag_filters, None)
692
+ pods = kubernetes_utils.filter_pods(namespace, context, tag_filters, None)
726
693
 
727
694
  def _is_head(pod) -> bool:
728
695
  return pod.metadata.labels[constants.TAG_RAY_NODE_KIND] == 'head'
@@ -746,7 +713,9 @@ def get_cluster_info(
746
713
  TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud,
747
714
  }
748
715
 
749
- running_pods = _filter_pods(namespace, context, tag_filters, ['Running'])
716
+ running_pods = kubernetes_utils.filter_pods(namespace, context, tag_filters,
717
+ ['Running'])
718
+
750
719
  pods: Dict[str, List[common.InstanceInfo]] = {}
751
720
  head_pod_name = None
752
721
 
@@ -79,13 +79,14 @@ def _open_ports_using_ingress(
79
79
  )
80
80
 
81
81
  # Prepare service names, ports, for template rendering
82
- service_details = [(f'{cluster_name_on_cloud}--skypilot-svc--{port}', port,
83
- _PATH_PREFIX.format(
84
- cluster_name_on_cloud=cluster_name_on_cloud,
85
- port=port,
86
- namespace=kubernetes_utils.
87
- get_current_kube_config_context_namespace()).rstrip(
88
- '/').lstrip('/')) for port in ports]
82
+ service_details = [
83
+ (f'{cluster_name_on_cloud}--skypilot-svc--{port}', port,
84
+ _PATH_PREFIX.format(
85
+ cluster_name_on_cloud=cluster_name_on_cloud,
86
+ port=port,
87
+ namespace=kubernetes_utils.get_kube_config_context_namespace(
88
+ context)).rstrip('/').lstrip('/')) for port in ports
89
+ ]
89
90
 
90
91
  # Generate ingress and services specs
91
92
  # We batch ingress rule creation because each rule triggers a hot reload of
@@ -171,7 +172,8 @@ def _cleanup_ports_for_ingress(
171
172
  for port in ports:
172
173
  service_name = f'{cluster_name_on_cloud}--skypilot-svc--{port}'
173
174
  network_utils.delete_namespaced_service(
174
- namespace=provider_config.get('namespace', 'default'),
175
+ namespace=provider_config.get('namespace',
176
+ kubernetes_utils.DEFAULT_NAMESPACE),
175
177
  service_name=service_name,
176
178
  )
177
179
 
@@ -208,11 +210,13 @@ def query_ports(
208
210
  return _query_ports_for_ingress(
209
211
  cluster_name_on_cloud=cluster_name_on_cloud,
210
212
  ports=ports,
213
+ provider_config=provider_config,
211
214
  )
212
215
  elif port_mode == kubernetes_enums.KubernetesPortMode.PODIP:
213
216
  return _query_ports_for_podip(
214
217
  cluster_name_on_cloud=cluster_name_on_cloud,
215
218
  ports=ports,
219
+ provider_config=provider_config,
216
220
  )
217
221
  else:
218
222
  return {}
@@ -231,8 +235,14 @@ def _query_ports_for_loadbalancer(
231
235
  result: Dict[int, List[common.Endpoint]] = {}
232
236
  service_name = _LOADBALANCER_SERVICE_NAME.format(
233
237
  cluster_name_on_cloud=cluster_name_on_cloud)
238
+ context = provider_config.get(
239
+ 'context', kubernetes_utils.get_current_kube_config_context_name())
240
+ namespace = provider_config.get(
241
+ 'namespace',
242
+ kubernetes_utils.get_kube_config_context_namespace(context))
234
243
  external_ip = network_utils.get_loadbalancer_ip(
235
- namespace=provider_config.get('namespace', 'default'),
244
+ context=context,
245
+ namespace=namespace,
236
246
  service_name=service_name,
237
247
  # Timeout is set so that we can retry the query when the
238
248
  # cluster is firstly created and the load balancer is not ready yet.
@@ -251,19 +261,24 @@ def _query_ports_for_loadbalancer(
251
261
  def _query_ports_for_ingress(
252
262
  cluster_name_on_cloud: str,
253
263
  ports: List[int],
264
+ provider_config: Dict[str, Any],
254
265
  ) -> Dict[int, List[common.Endpoint]]:
255
- ingress_details = network_utils.get_ingress_external_ip_and_ports()
266
+ context = provider_config.get(
267
+ 'context', kubernetes_utils.get_current_kube_config_context_name())
268
+ ingress_details = network_utils.get_ingress_external_ip_and_ports(context)
256
269
  external_ip, external_ports = ingress_details
257
270
  if external_ip is None:
258
271
  return {}
259
272
 
273
+ namespace = provider_config.get(
274
+ 'namespace',
275
+ kubernetes_utils.get_kube_config_context_namespace(context))
260
276
  result: Dict[int, List[common.Endpoint]] = {}
261
277
  for port in ports:
262
278
  path_prefix = _PATH_PREFIX.format(
263
279
  cluster_name_on_cloud=cluster_name_on_cloud,
264
280
  port=port,
265
- namespace=kubernetes_utils.
266
- get_current_kube_config_context_namespace())
281
+ namespace=namespace)
267
282
 
268
283
  http_port, https_port = external_ports \
269
284
  if external_ports is not None else (None, None)
@@ -282,10 +297,15 @@ def _query_ports_for_ingress(
282
297
  def _query_ports_for_podip(
283
298
  cluster_name_on_cloud: str,
284
299
  ports: List[int],
300
+ provider_config: Dict[str, Any],
285
301
  ) -> Dict[int, List[common.Endpoint]]:
286
- namespace = kubernetes_utils.get_current_kube_config_context_namespace()
302
+ context = provider_config.get(
303
+ 'context', kubernetes_utils.get_current_kube_config_context_name())
304
+ namespace = provider_config.get(
305
+ 'namespace',
306
+ kubernetes_utils.get_kube_config_context_namespace(context))
287
307
  pod_name = kubernetes_utils.get_head_pod_name(cluster_name_on_cloud)
288
- pod_ip = network_utils.get_pod_ip(namespace, pod_name)
308
+ pod_ip = network_utils.get_pod_ip(context, namespace, pod_name)
289
309
 
290
310
  result: Dict[int, List[common.Endpoint]] = {}
291
311
  if pod_ip is None:
@@ -220,10 +220,11 @@ def ingress_controller_exists(context: str,
220
220
 
221
221
 
222
222
  def get_ingress_external_ip_and_ports(
223
+ context: str,
223
224
  namespace: str = 'ingress-nginx'
224
225
  ) -> Tuple[Optional[str], Optional[Tuple[int, int]]]:
225
226
  """Returns external ip and ports for the ingress controller."""
226
- core_api = kubernetes.core_api()
227
+ core_api = kubernetes.core_api(context)
227
228
  ingress_services = [
228
229
  item for item in core_api.list_namespaced_service(
229
230
  namespace, _request_timeout=kubernetes.API_TIMEOUT).items
@@ -257,11 +258,12 @@ def get_ingress_external_ip_and_ports(
257
258
  return external_ip, None
258
259
 
259
260
 
260
- def get_loadbalancer_ip(namespace: str,
261
+ def get_loadbalancer_ip(context: str,
262
+ namespace: str,
261
263
  service_name: str,
262
264
  timeout: int = 0) -> Optional[str]:
263
265
  """Returns the IP address of the load balancer."""
264
- core_api = kubernetes.core_api()
266
+ core_api = kubernetes.core_api(context)
265
267
 
266
268
  ip = None
267
269
 
@@ -282,9 +284,9 @@ def get_loadbalancer_ip(namespace: str,
282
284
  return ip
283
285
 
284
286
 
285
- def get_pod_ip(namespace: str, pod_name: str) -> Optional[str]:
287
+ def get_pod_ip(context: str, namespace: str, pod_name: str) -> Optional[str]:
286
288
  """Returns the IP address of the pod."""
287
- core_api = kubernetes.core_api()
289
+ core_api = kubernetes.core_api(context)
288
290
  pod = core_api.read_namespaced_pod(pod_name,
289
291
  namespace,
290
292
  _request_timeout=kubernetes.API_TIMEOUT)