skypilot-nightly 1.0.0.dev20241114__py3-none-any.whl → 1.0.0.dev20241115__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = 'a2e670d347b666f40edb7f675af87c86faec3971'
8
+ _SKYPILOT_COMMIT_SHA = 'a404e3fc9bee7f0865f4118cfdd158de2b51ee28'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241114'
38
+ __version__ = '1.0.0.dev20241115'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -2604,15 +2604,18 @@ def check_stale_runtime_on_remote(returncode: int, stderr: str,
2604
2604
  pattern = re.compile(r'AttributeError: module \'sky\.(.*)\' has no '
2605
2605
  r'attribute \'(.*)\'')
2606
2606
  if returncode != 0:
2607
+ # TODO(zhwu): Backward compatibility for old SkyPilot runtime version on
2608
+ # the remote cluster. Remove this after 0.10.0 is released.
2607
2609
  attribute_error = re.findall(pattern, stderr)
2608
- if attribute_error:
2610
+ if attribute_error or 'SkyPilot runtime is too old' in stderr:
2609
2611
  with ux_utils.print_exception_no_traceback():
2610
2612
  raise RuntimeError(
2611
2613
  f'{colorama.Fore.RED}SkyPilot runtime needs to be updated '
2612
- 'on the remote cluster. To update, run (existing jobs are '
2613
- f'not interrupted): {colorama.Style.BRIGHT}sky start -f -y '
2614
+ f'on the remote cluster: {cluster_name}. To update, run '
2615
+ '(existing jobs will not be interrupted): '
2616
+ f'{colorama.Style.BRIGHT}sky start -f -y '
2614
2617
  f'{cluster_name}{colorama.Style.RESET_ALL}'
2615
- f'\n--- Details ---\n{stderr.strip()}\n')
2618
+ f'\n--- Details ---\n{stderr.strip()}\n') from None
2616
2619
 
2617
2620
 
2618
2621
  def get_endpoints(cluster: str,
@@ -276,6 +276,7 @@ class RayCodeGen:
276
276
  from sky.skylet import constants
277
277
  from sky.skylet import job_lib
278
278
  from sky.utils import log_utils
279
+ from sky.utils import subprocess_utils
279
280
 
280
281
  SKY_REMOTE_WORKDIR = {constants.SKY_REMOTE_WORKDIR!r}
281
282
 
@@ -3275,14 +3276,13 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3275
3276
  encoded_script = shlex.quote(codegen)
3276
3277
  create_script_code = (f'{{ echo {encoded_script} > {script_path}; }}')
3277
3278
  job_submit_cmd = (
3278
- f'RAY_DASHBOARD_PORT=$({constants.SKY_PYTHON_CMD} -c "from sky.skylet import job_lib; print(job_lib.get_job_submission_port())" 2> /dev/null || echo 8265);' # pylint: disable=line-too-long
3279
- f'{cd} && {constants.SKY_RAY_CMD} job submit '
3280
- '--address=http://127.0.0.1:$RAY_DASHBOARD_PORT '
3281
- f'--submission-id {job_id}-$(whoami) --no-wait '
3282
- f'"{constants.SKY_PYTHON_CMD} -u {script_path} '
3279
+ # JOB_CMD_IDENTIFIER is used for identifying the process retrieved
3280
+ # with pid is the same driver process.
3281
+ f'{job_lib.JOB_CMD_IDENTIFIER.format(job_id)} && '
3282
+ f'{cd} && {constants.SKY_PYTHON_CMD} -u {script_path}'
3283
3283
  # Do not use &>, which is not POSIX and may not work.
3284
3284
  # Note that the order of ">filename 2>&1" matters.
3285
- f'> {remote_log_path} 2>&1"')
3285
+ f'> {remote_log_path} 2>&1')
3286
3286
 
3287
3287
  code = job_lib.JobLibCodeGen.queue_job(job_id, job_submit_cmd)
3288
3288
  job_submit_cmd = ' && '.join([mkdir_code, create_script_code, code])
@@ -3330,6 +3330,10 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3330
3330
  job_submit_cmd,
3331
3331
  stream_logs=False,
3332
3332
  require_outputs=True)
3333
+ # Happens when someone calls `sky exec` but remote is outdated for
3334
+ # running a job. Necessitating calling `sky launch`.
3335
+ backend_utils.check_stale_runtime_on_remote(returncode, stderr,
3336
+ handle.cluster_name)
3333
3337
  if returncode == 255 and 'too long' in stdout + stderr:
3334
3338
  # If the generated script is too long, we retry it with dumping
3335
3339
  # the script to a file and running it with SSH. We use a general
@@ -3344,10 +3348,6 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3344
3348
  stream_logs=False,
3345
3349
  require_outputs=True)
3346
3350
 
3347
- # Happens when someone calls `sky exec` but remote is outdated
3348
- # necessitating calling `sky launch`.
3349
- backend_utils.check_stale_runtime_on_remote(returncode, stdout,
3350
- handle.cluster_name)
3351
3351
  subprocess_utils.handle_returncode(returncode,
3352
3352
  job_submit_cmd,
3353
3353
  f'Failed to submit job {job_id}.',
@@ -3417,6 +3417,10 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3417
3417
  stream_logs=False,
3418
3418
  require_outputs=True,
3419
3419
  separate_stderr=True)
3420
+ # Happens when someone calls `sky exec` but remote is outdated for
3421
+ # adding a job. Necessitating calling `sky launch`.
3422
+ backend_utils.check_stale_runtime_on_remote(returncode, stderr,
3423
+ handle.cluster_name)
3420
3424
  # TODO(zhwu): this sometimes will unexpectedly fail, we can add
3421
3425
  # retry for this, after we figure out the reason.
3422
3426
  subprocess_utils.handle_returncode(returncode, code,
sky/clouds/oci.py CHANGED
@@ -75,8 +75,6 @@ class OCI(clouds.Cloud):
75
75
  (f'Docker image is currently not supported on {cls._REPR}. '
76
76
  'You can try running docker command inside the '
77
77
  '`run` section in task.yaml.'),
78
- clouds.CloudImplementationFeatures.OPEN_PORTS:
79
- (f'Opening ports is currently not supported on {cls._REPR}.'),
80
78
  }
81
79
  if resources.use_spot:
82
80
  features[clouds.CloudImplementationFeatures.STOP] = (
@@ -4,6 +4,8 @@ History:
4
4
  - Zhanghao Wu @ Oct 2023: Formatting and refactoring
5
5
  - Hysun He (hysun.he@oracle.com) @ Oct, 2024: Add default image OS
6
6
  configuration.
7
+ - Hysun He (hysun.he@oracle.com) @ Nov.12, 2024: Add the constant
8
+ SERVICE_PORT_RULE_TAG
7
9
  """
8
10
  import os
9
11
 
@@ -42,6 +44,9 @@ class OCIConfig:
42
44
  VCN_CIDR_INTERNET = '0.0.0.0/0'
43
45
  VCN_CIDR = '192.168.0.0/16'
44
46
  VCN_SUBNET_CIDR = '192.168.0.0/18'
47
+ SERVICE_PORT_RULE_TAG = 'SkyServe-Service-Port'
48
+ # NSG name template
49
+ NSG_NAME_TEMPLATE = 'nsg_{cluster_name}'
45
50
 
46
51
  MAX_RETRY_COUNT = 3
47
52
  RETRY_INTERVAL_BASE_SECONDS = 5
sky/execution.py CHANGED
@@ -11,10 +11,10 @@ import sky
11
11
  from sky import admin_policy
12
12
  from sky import backends
13
13
  from sky import clouds
14
- from sky import exceptions
15
14
  from sky import global_user_state
16
15
  from sky import optimizer
17
16
  from sky import sky_logging
17
+ from sky import status_lib
18
18
  from sky.backends import backend_utils
19
19
  from sky.usage import usage_lib
20
20
  from sky.utils import admin_policy_utils
@@ -463,28 +463,43 @@ def launch(
463
463
  stages = None
464
464
  # Check if cluster exists and we are doing fast provisioning
465
465
  if fast and cluster_name is not None:
466
- maybe_handle = global_user_state.get_handle_from_cluster_name(
467
- cluster_name)
468
- if maybe_handle is not None:
469
- try:
470
- # This will throw if the cluster is not available
471
- backend_utils.check_cluster_available(
466
+ cluster_status, maybe_handle = (
467
+ backend_utils.refresh_cluster_status_handle(cluster_name))
468
+ if cluster_status == status_lib.ClusterStatus.INIT:
469
+ # If the cluster is INIT, it may be provisioning. We want to prevent
470
+ # concurrent calls from queueing up many sequential reprovision
471
+ # attempts. Since provisioning will hold the cluster status lock, we
472
+ # wait to hold that lock by force refreshing the status. This will
473
+ # block until the cluster finishes provisioning, then correctly see
474
+ # that it is UP.
475
+ # TODO(cooperc): If multiple processes launched in parallel see that
476
+ # the cluster is STOPPED or does not exist, they will still all try
477
+ # to provision it, since we do not hold the lock continuously from
478
+ # the status check until the provision call. Fixing this requires a
479
+ # bigger refactor.
480
+ cluster_status, maybe_handle = (
481
+ backend_utils.refresh_cluster_status_handle(
472
482
  cluster_name,
473
- operation='executing tasks',
474
- check_cloud_vm_ray_backend=False,
475
- dryrun=dryrun)
476
- handle = maybe_handle
477
- # Get all stages
478
- stages = [
479
- Stage.SYNC_WORKDIR,
480
- Stage.SYNC_FILE_MOUNTS,
481
- Stage.PRE_EXEC,
482
- Stage.EXEC,
483
- Stage.DOWN,
484
- ]
485
- except exceptions.ClusterNotUpError:
486
- # Proceed with normal provisioning
487
- pass
483
+ force_refresh_statuses=[
484
+ # If the cluster is INIT, we want to try to grab the
485
+ # status lock, which should block until provisioning is
486
+ # finished.
487
+ status_lib.ClusterStatus.INIT,
488
+ ],
489
+ # Wait indefinitely to obtain the lock, so that we don't
490
+ # have multiple processes launching the same cluster at
491
+ # once.
492
+ cluster_status_lock_timeout=-1,
493
+ ))
494
+ if cluster_status == status_lib.ClusterStatus.UP:
495
+ handle = maybe_handle
496
+ stages = [
497
+ Stage.SYNC_WORKDIR,
498
+ Stage.SYNC_FILE_MOUNTS,
499
+ Stage.PRE_EXEC,
500
+ Stage.EXEC,
501
+ Stage.DOWN,
502
+ ]
488
503
 
489
504
  return _execute(
490
505
  entrypoint=entrypoint,
sky/jobs/core.py CHANGED
@@ -133,7 +133,6 @@ def launch(
133
133
  controller_task.set_resources(controller_resources)
134
134
 
135
135
  controller_task.managed_job_dag = dag
136
- assert len(controller_task.resources) == 1, controller_task
137
136
 
138
137
  sky_logging.print(
139
138
  f'{colorama.Fore.YELLOW}'
sky/jobs/utils.py CHANGED
@@ -85,7 +85,8 @@ def get_job_status(backend: 'backends.CloudVmRayBackend',
85
85
  cluster_name: str) -> Optional['job_lib.JobStatus']:
86
86
  """Check the status of the job running on a managed job cluster.
87
87
 
88
- It can be None, INIT, RUNNING, SUCCEEDED, FAILED, FAILED_SETUP or CANCELLED.
88
+ It can be None, INIT, RUNNING, SUCCEEDED, FAILED, FAILED_DRIVER,
89
+ FAILED_SETUP or CANCELLED.
89
90
  """
90
91
  handle = global_user_state.get_handle_from_cluster_name(cluster_name)
91
92
  assert isinstance(handle, backends.CloudVmRayResourceHandle), handle
@@ -866,7 +867,7 @@ class ManagedJobCodeGen:
866
867
  code += inspect.getsource(stream_logs)
867
868
  code += textwrap.dedent(f"""\
868
869
 
869
- msg = stream_logs({job_id!r}, {job_name!r},
870
+ msg = stream_logs({job_id!r}, {job_name!r},
870
871
  follow={follow}, controller={controller})
871
872
  print(msg, flush=True)
872
873
  """)
@@ -883,7 +884,7 @@ class ManagedJobCodeGen:
883
884
  resources_str = backend_utils.get_task_resources_str(
884
885
  task, is_managed_job=True)
885
886
  code += textwrap.dedent(f"""\
886
- managed_job_state.set_pending({job_id}, {task_id},
887
+ managed_job_state.set_pending({job_id}, {task_id},
887
888
  {task.name!r}, {resources_str!r})
888
889
  """)
889
890
  return cls._build(code)
@@ -2,6 +2,8 @@
2
2
 
3
3
  History:
4
4
  - Hysun He (hysun.he@oracle.com) @ Oct.16, 2024: Initial implementation
5
+ - Hysun He (hysun.he@oracle.com) @ Nov.13, 2024: Implement open_ports
6
+ and cleanup_ports for supporting SkyServe.
5
7
  """
6
8
 
7
9
  import copy
@@ -292,11 +294,11 @@ def open_ports(
292
294
  provider_config: Optional[Dict[str, Any]] = None,
293
295
  ) -> None:
294
296
  """Open ports for inbound traffic."""
295
- # OCI ports in security groups are opened while creating the new
296
- # VCN (skypilot_vcn). If user configure to use existing VCN, it is
297
- # intended to let user to manage the ports instead of automatically
298
- # opening ports here.
299
- del cluster_name_on_cloud, ports, provider_config
297
+ assert provider_config is not None, cluster_name_on_cloud
298
+ region = provider_config['region']
299
+ query_helper.create_nsg_rules(region=region,
300
+ cluster_name=cluster_name_on_cloud,
301
+ ports=ports)
300
302
 
301
303
 
302
304
  @query_utils.debug_enabled(logger)
@@ -306,12 +308,11 @@ def cleanup_ports(
306
308
  provider_config: Optional[Dict[str, Any]] = None,
307
309
  ) -> None:
308
310
  """Delete any opened ports."""
309
- del cluster_name_on_cloud, ports, provider_config
310
- # OCI ports in security groups are opened while creating the new
311
- # VCN (skypilot_vcn). The VCN will only be created at the first
312
- # time when it is not existed. We'll not automatically delete the
313
- # VCN while teardown clusters. it is intended to let user to decide
314
- # to delete the VCN or not from OCI console, for example.
311
+ assert provider_config is not None, cluster_name_on_cloud
312
+ region = provider_config['region']
313
+ del ports
314
+ query_helper.remove_cluster_nsg(region=region,
315
+ cluster_name=cluster_name_on_cloud)
315
316
 
316
317
 
317
318
  @query_utils.debug_enabled(logger)
@@ -5,6 +5,8 @@ History:
5
5
  migrated from the old provisioning API.
6
6
  - Hysun He (hysun.he@oracle.com) @ Oct.18, 2024: Enhancement.
7
7
  find_compartment: allow search subtree when find a compartment.
8
+ - Hysun He (hysun.he@oracle.com) @ Nov.12, 2024: Add methods to
9
+ Add/remove security rules: create_nsg_rules & remove_nsg
8
10
  """
9
11
  from datetime import datetime
10
12
  import functools
@@ -13,12 +15,15 @@ import re
13
15
  import time
14
16
  import traceback
15
17
  import typing
16
- from typing import Optional
18
+ from typing import List, Optional, Tuple
17
19
 
20
+ from sky import exceptions
18
21
  from sky import sky_logging
19
22
  from sky.adaptors import common as adaptors_common
20
23
  from sky.adaptors import oci as oci_adaptor
21
24
  from sky.clouds.utils import oci_utils
25
+ from sky.provision import constants
26
+ from sky.utils import resources_utils
22
27
 
23
28
  if typing.TYPE_CHECKING:
24
29
  import pandas as pd
@@ -81,19 +86,33 @@ class QueryHelper:
81
86
  return result_set
82
87
 
83
88
  @classmethod
89
+ @debug_enabled(logger)
84
90
  def terminate_instances_by_tags(cls, tag_filters, region) -> int:
85
91
  logger.debug(f'Terminate instance by tags: {tag_filters}')
92
+
93
+ cluster_name = tag_filters[constants.TAG_RAY_CLUSTER_NAME]
94
+ nsg_name = oci_utils.oci_config.NSG_NAME_TEMPLATE.format(
95
+ cluster_name=cluster_name)
96
+ nsg_id = cls.find_nsg(region, nsg_name, create_if_not_exist=False)
97
+
98
+ core_client = oci_adaptor.get_core_client(
99
+ region, oci_utils.oci_config.get_profile())
100
+
86
101
  insts = cls.query_instances_by_tags(tag_filters, region)
87
102
  fail_count = 0
88
103
  for inst in insts:
89
104
  inst_id = inst.identifier
90
- logger.debug(f'Got instance(to be terminated): {inst_id}')
105
+ logger.debug(f'Terminating instance {inst_id}')
91
106
 
92
107
  try:
93
- oci_adaptor.get_core_client(
94
- region,
95
- oci_utils.oci_config.get_profile()).terminate_instance(
96
- inst_id)
108
+ # Release the NSG reference so that the NSG can be
109
+ # deleted without waiting the instance being terminated.
110
+ if nsg_id is not None:
111
+ cls.detach_nsg(region, inst, nsg_id)
112
+
113
+ # Terminate the instance
114
+ core_client.terminate_instance(inst_id)
115
+
97
116
  except oci_adaptor.oci.exceptions.ServiceError as e:
98
117
  fail_count += 1
99
118
  logger.error(f'Terminate instance failed: {str(e)}\n: {inst}')
@@ -468,5 +487,192 @@ class QueryHelper:
468
487
  logger.error(
469
488
  f'Delete VCN {oci_utils.oci_config.VCN_NAME} Error: {str(e)}')
470
489
 
490
+ @classmethod
491
+ @debug_enabled(logger)
492
+ def find_nsg(cls, region: str, nsg_name: str,
493
+ create_if_not_exist: bool) -> Optional[str]:
494
+ net_client = oci_adaptor.get_net_client(
495
+ region, oci_utils.oci_config.get_profile())
496
+
497
+ compartment = cls.find_compartment(region)
498
+
499
+ list_vcns_resp = net_client.list_vcns(
500
+ compartment_id=compartment,
501
+ display_name=oci_utils.oci_config.VCN_NAME,
502
+ lifecycle_state='AVAILABLE',
503
+ )
504
+
505
+ if not list_vcns_resp:
506
+ raise exceptions.ResourcesUnavailableError(
507
+ 'The VCN is not available')
508
+
509
+ # Get the primary vnic.
510
+ assert len(list_vcns_resp.data) > 0
511
+ vcn = list_vcns_resp.data[0]
512
+
513
+ list_nsg_resp = net_client.list_network_security_groups(
514
+ compartment_id=compartment,
515
+ vcn_id=vcn.id,
516
+ limit=1,
517
+ display_name=nsg_name,
518
+ )
519
+
520
+ nsgs = list_nsg_resp.data
521
+ if nsgs:
522
+ assert len(nsgs) == 1
523
+ return nsgs[0].id
524
+ elif not create_if_not_exist:
525
+ return None
526
+
527
+ # Continue to create new NSG if not exists
528
+ create_nsg_resp = net_client.create_network_security_group(
529
+ create_network_security_group_details=oci_adaptor.oci.core.models.
530
+ CreateNetworkSecurityGroupDetails(
531
+ compartment_id=compartment,
532
+ vcn_id=vcn.id,
533
+ display_name=nsg_name,
534
+ ))
535
+ get_nsg_resp = net_client.get_network_security_group(
536
+ network_security_group_id=create_nsg_resp.data.id)
537
+ oci_adaptor.oci.wait_until(
538
+ net_client,
539
+ get_nsg_resp,
540
+ 'lifecycle_state',
541
+ 'AVAILABLE',
542
+ )
543
+
544
+ return get_nsg_resp.data.id
545
+
546
+ @classmethod
547
+ def get_range_min_max(cls, port_range: str) -> Tuple[int, int]:
548
+ range_list = port_range.split('-')
549
+ if len(range_list) == 1:
550
+ return (int(range_list[0]), int(range_list[0]))
551
+ from_port, to_port = range_list
552
+ return (int(from_port), int(to_port))
553
+
554
+ @classmethod
555
+ @debug_enabled(logger)
556
+ def create_nsg_rules(cls, region: str, cluster_name: str,
557
+ ports: List[str]) -> None:
558
+ """ Create per-cluster NSG with ingress rules """
559
+ if not ports:
560
+ return
561
+
562
+ net_client = oci_adaptor.get_net_client(
563
+ region, oci_utils.oci_config.get_profile())
564
+
565
+ nsg_name = oci_utils.oci_config.NSG_NAME_TEMPLATE.format(
566
+ cluster_name=cluster_name)
567
+ nsg_id = cls.find_nsg(region, nsg_name, create_if_not_exist=True)
568
+
569
+ filters = {constants.TAG_RAY_CLUSTER_NAME: cluster_name}
570
+ insts = query_helper.query_instances_by_tags(filters, region)
571
+ for inst in insts:
572
+ vnic = cls.get_instance_primary_vnic(
573
+ region=region,
574
+ inst_info={
575
+ 'inst_id': inst.identifier,
576
+ 'ad': inst.availability_domain,
577
+ 'compartment': inst.compartment_id,
578
+ })
579
+ nsg_ids = vnic.nsg_ids
580
+ if not nsg_ids:
581
+ net_client.update_vnic(
582
+ vnic_id=vnic.id,
583
+ update_vnic_details=oci_adaptor.oci.core.models.
584
+ UpdateVnicDetails(nsg_ids=[nsg_id],
585
+ skip_source_dest_check=False),
586
+ )
587
+
588
+ # pylint: disable=line-too-long
589
+ list_nsg_rules_resp = net_client.list_network_security_group_security_rules(
590
+ network_security_group_id=nsg_id,
591
+ direction='INGRESS',
592
+ sort_by='TIMECREATED',
593
+ sort_order='DESC',
594
+ )
595
+
596
+ ingress_rules: List = list_nsg_rules_resp.data
597
+ existing_port_ranges: List[str] = []
598
+ for r in ingress_rules:
599
+ if r.tcp_options:
600
+ options_range = r.tcp_options.destination_port_range
601
+ rule_port_range = f'{options_range.min}-{options_range.max}'
602
+ existing_port_ranges.append(rule_port_range)
603
+
604
+ new_ports = resources_utils.port_ranges_to_set(ports)
605
+ existing_ports = resources_utils.port_ranges_to_set(
606
+ existing_port_ranges)
607
+ if new_ports.issubset(existing_ports):
608
+ # ports already contains in the existing rules, nothing to add.
609
+ return
610
+
611
+ # Determine the ports to be added, without overlapping.
612
+ ports_to_open = new_ports - existing_ports
613
+ port_ranges_to_open = resources_utils.port_set_to_ranges(ports_to_open)
614
+
615
+ new_rules = []
616
+ for port_range in port_ranges_to_open:
617
+ port_range_min, port_range_max = cls.get_range_min_max(port_range)
618
+ new_rules.append(
619
+ oci_adaptor.oci.core.models.AddSecurityRuleDetails(
620
+ direction='INGRESS',
621
+ protocol='6',
622
+ is_stateless=False,
623
+ source=oci_utils.oci_config.VCN_CIDR_INTERNET,
624
+ source_type='CIDR_BLOCK',
625
+ tcp_options=oci_adaptor.oci.core.models.TcpOptions(
626
+ destination_port_range=oci_adaptor.oci.core.models.
627
+ PortRange(min=port_range_min, max=port_range_max),),
628
+ description=oci_utils.oci_config.SERVICE_PORT_RULE_TAG,
629
+ ))
630
+
631
+ net_client.add_network_security_group_security_rules(
632
+ network_security_group_id=nsg_id,
633
+ add_network_security_group_security_rules_details=oci_adaptor.oci.
634
+ core.models.AddNetworkSecurityGroupSecurityRulesDetails(
635
+ security_rules=new_rules),
636
+ )
637
+
638
+ @classmethod
639
+ @debug_enabled(logger)
640
+ def detach_nsg(cls, region: str, inst, nsg_id: Optional[str]) -> None:
641
+ if nsg_id is None:
642
+ return
643
+
644
+ vnic = cls.get_instance_primary_vnic(
645
+ region=region,
646
+ inst_info={
647
+ 'inst_id': inst.identifier,
648
+ 'ad': inst.availability_domain,
649
+ 'compartment': inst.compartment_id,
650
+ })
651
+
652
+ # Detatch the NSG before removing it.
653
+ oci_adaptor.get_net_client(region, oci_utils.oci_config.get_profile(
654
+ )).update_vnic(
655
+ vnic_id=vnic.id,
656
+ update_vnic_details=oci_adaptor.oci.core.models.UpdateVnicDetails(
657
+ nsg_ids=[], skip_source_dest_check=False),
658
+ )
659
+
660
+ @classmethod
661
+ @debug_enabled(logger)
662
+ def remove_cluster_nsg(cls, region: str, cluster_name: str) -> None:
663
+ """ Remove NSG of the cluster """
664
+ net_client = oci_adaptor.get_net_client(
665
+ region, oci_utils.oci_config.get_profile())
666
+
667
+ nsg_name = oci_utils.oci_config.NSG_NAME_TEMPLATE.format(
668
+ cluster_name=cluster_name)
669
+ nsg_id = cls.find_nsg(region, nsg_name, create_if_not_exist=False)
670
+ if nsg_id is None:
671
+ return
672
+
673
+ # Delete the NSG
674
+ net_client.delete_network_security_group(
675
+ network_security_group_id=nsg_id)
676
+
471
677
 
472
678
  query_helper = QueryHelper()
sky/serve/core.py CHANGED
@@ -701,6 +701,7 @@ def tail_logs(
701
701
  with ux_utils.print_exception_no_traceback():
702
702
  raise ValueError(f'`target` must be a string or '
703
703
  f'sky.serve.ServiceComponent, got {type(target)}.')
704
+
704
705
  if target == serve_utils.ServiceComponent.REPLICA:
705
706
  if replica_id is None:
706
707
  with ux_utils.print_exception_no_traceback():