skypilot-nightly 1.0.0.dev20241108__py3-none-any.whl → 1.0.0.dev20241110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/cloud_vm_ray_backend.py +6 -21
  3. sky/backends/wheel_utils.py +5 -1
  4. sky/cli.py +25 -1
  5. sky/clouds/oci.py +11 -21
  6. sky/clouds/service_catalog/oci_catalog.py +1 -1
  7. sky/clouds/utils/oci_utils.py +16 -2
  8. sky/core.py +3 -2
  9. sky/dag.py +20 -15
  10. sky/data/mounting_utils.py +4 -16
  11. sky/exceptions.py +4 -1
  12. sky/execution.py +10 -8
  13. sky/jobs/core.py +3 -1
  14. sky/provision/__init__.py +1 -0
  15. sky/provision/aws/config.py +25 -5
  16. sky/provision/oci/__init__.py +15 -0
  17. sky/provision/oci/config.py +51 -0
  18. sky/provision/oci/instance.py +430 -0
  19. sky/{skylet/providers/oci/query_helper.py → provision/oci/query_utils.py} +148 -59
  20. sky/serve/core.py +11 -1
  21. sky/setup_files/MANIFEST.in +0 -1
  22. sky/skylet/constants.py +1 -1
  23. sky/skylet/job_lib.py +39 -20
  24. sky/skylet/log_lib.py +77 -8
  25. sky/templates/kubernetes-ray.yml.j2 +3 -1
  26. sky/templates/oci-ray.yml.j2 +3 -53
  27. sky/utils/admin_policy_utils.py +1 -0
  28. sky/utils/command_runner.py +14 -2
  29. sky/utils/control_master_utils.py +49 -0
  30. {skypilot_nightly-1.0.0.dev20241108.dist-info → skypilot_nightly-1.0.0.dev20241110.dist-info}/METADATA +1 -1
  31. {skypilot_nightly-1.0.0.dev20241108.dist-info → skypilot_nightly-1.0.0.dev20241110.dist-info}/RECORD +35 -34
  32. sky/skylet/providers/oci/__init__.py +0 -2
  33. sky/skylet/providers/oci/node_provider.py +0 -488
  34. sky/skylet/providers/oci/utils.py +0 -21
  35. {skypilot_nightly-1.0.0.dev20241108.dist-info → skypilot_nightly-1.0.0.dev20241110.dist-info}/LICENSE +0 -0
  36. {skypilot_nightly-1.0.0.dev20241108.dist-info → skypilot_nightly-1.0.0.dev20241110.dist-info}/WHEEL +0 -0
  37. {skypilot_nightly-1.0.0.dev20241108.dist-info → skypilot_nightly-1.0.0.dev20241110.dist-info}/entry_points.txt +0 -0
  38. {skypilot_nightly-1.0.0.dev20241108.dist-info → skypilot_nightly-1.0.0.dev20241110.dist-info}/top_level.txt +0 -0
sky/__init__.py CHANGED
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import urllib.request
6
6
 
7
7
  # Replaced with the current commit when building the wheels.
8
- _SKYPILOT_COMMIT_SHA = '7bea46813b47a46ce7b00501413cac637ee400a3'
8
+ _SKYPILOT_COMMIT_SHA = 'dddd65187953a5d6b32f762bea78eed1f109ec3c'
9
9
 
10
10
 
11
11
  def _get_git_commit():
@@ -35,7 +35,7 @@ def _get_git_commit():
35
35
 
36
36
 
37
37
  __commit__ = _get_git_commit()
38
- __version__ = '1.0.0.dev20241108'
38
+ __version__ = '1.0.0.dev20241110'
39
39
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
40
40
 
41
41
 
@@ -3711,7 +3711,8 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3711
3711
  handle: CloudVmRayResourceHandle,
3712
3712
  job_id: Optional[int],
3713
3713
  managed_job_id: Optional[int] = None,
3714
- follow: bool = True) -> int:
3714
+ follow: bool = True,
3715
+ tail: int = 0) -> int:
3715
3716
  """Tail the logs of a job.
3716
3717
 
3717
3718
  Args:
@@ -3719,10 +3720,13 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3719
3720
  job_id: The job ID to tail the logs of.
3720
3721
  managed_job_id: The managed job ID for display purpose only.
3721
3722
  follow: Whether to follow the logs.
3723
+ tail: The number of lines to display from the end of the
3724
+ log file. If 0, print all lines.
3722
3725
  """
3723
3726
  code = job_lib.JobLibCodeGen.tail_logs(job_id,
3724
3727
  managed_job_id=managed_job_id,
3725
- follow=follow)
3728
+ follow=follow,
3729
+ tail=tail)
3726
3730
  if job_id is None and managed_job_id is None:
3727
3731
  logger.info(
3728
3732
  'Job ID not provided. Streaming the logs of the latest job.')
@@ -3975,25 +3979,6 @@ class CloudVmRayBackend(backends.Backend['CloudVmRayResourceHandle']):
3975
3979
  stdout = ''
3976
3980
  stderr = str(e)
3977
3981
 
3978
- # Apr, 2023 by Hysun(hysun.he@oracle.com): Added support for OCI
3979
- # May, 2023 by Hysun: Allow terminate INIT cluster which may have
3980
- # some instances provisioning in background but not completed.
3981
- elif (isinstance(cloud, clouds.OCI) and terminate and
3982
- prev_cluster_status in (status_lib.ClusterStatus.STOPPED,
3983
- status_lib.ClusterStatus.INIT)):
3984
- region = config['provider']['region']
3985
-
3986
- # pylint: disable=import-outside-toplevel
3987
- from ray.autoscaler.tags import TAG_RAY_CLUSTER_NAME
3988
-
3989
- from sky.skylet.providers.oci.query_helper import oci_query_helper
3990
-
3991
- # 0: All terminated successfully, failed count otherwise
3992
- returncode = oci_query_helper.terminate_instances_by_tags(
3993
- {TAG_RAY_CLUSTER_NAME: cluster_name_on_cloud}, region)
3994
-
3995
- # To avoid undefined local variables error.
3996
- stdout = stderr = ''
3997
3982
  else:
3998
3983
  config['provider']['cache_stopped_nodes'] = not terminate
3999
3984
  with tempfile.NamedTemporaryFile('w',
@@ -129,7 +129,11 @@ def _build_sky_wheel() -> pathlib.Path:
129
129
 
130
130
  wheel_dir = WHEEL_DIR / hash_of_latest_wheel
131
131
  wheel_dir.mkdir(parents=True, exist_ok=True)
132
- shutil.move(str(wheel_path), wheel_dir)
132
+ # shutil.move will fail when the file already exists and is being
133
+ # moved across filesystems.
134
+ if not os.path.exists(
135
+ os.path.join(wheel_dir, os.path.basename(wheel_path))):
136
+ shutil.move(str(wheel_path), wheel_dir)
133
137
  return wheel_dir / wheel_path.name
134
138
 
135
139
 
sky/cli.py CHANGED
@@ -46,6 +46,7 @@ from rich import progress as rich_progress
46
46
  import yaml
47
47
 
48
48
  import sky
49
+ from sky import admin_policy
49
50
  from sky import backends
50
51
  from sky import check as sky_check
51
52
  from sky import clouds as sky_clouds
@@ -67,6 +68,7 @@ from sky.skylet import constants
67
68
  from sky.skylet import job_lib
68
69
  from sky.skylet import log_lib
69
70
  from sky.usage import usage_lib
71
+ from sky.utils import admin_policy_utils
70
72
  from sky.utils import common_utils
71
73
  from sky.utils import controller_utils
72
74
  from sky.utils import dag_utils
@@ -582,6 +584,15 @@ def _launch_with_confirm(
582
584
  with ux_utils.print_exception_no_traceback():
583
585
  raise RuntimeError(f'{colorama.Fore.YELLOW}{e}'
584
586
  f'{colorama.Style.RESET_ALL}') from e
587
+ dag, _ = admin_policy_utils.apply(
588
+ dag,
589
+ request_options=admin_policy.RequestOptions(
590
+ cluster_name=cluster,
591
+ idle_minutes_to_autostop=idle_minutes_to_autostop,
592
+ down=down,
593
+ dryrun=dryrun,
594
+ ),
595
+ )
585
596
  dag = sky.optimize(dag)
586
597
  task = dag.tasks[0]
587
598
 
@@ -2011,6 +2022,12 @@ def queue(clusters: List[str], skip_finished: bool, all_users: bool):
2011
2022
  help=('Follow the logs of a job. '
2012
2023
  'If --no-follow is specified, print the log so far and exit. '
2013
2024
  '[default: --follow]'))
2025
+ @click.option(
2026
+ '--tail',
2027
+ default=0,
2028
+ type=int,
2029
+ help=('The number of lines to display from the end of the log file. '
2030
+ 'Default is 0, which means print all lines.'))
2014
2031
  @click.argument('cluster',
2015
2032
  required=True,
2016
2033
  type=str,
@@ -2024,6 +2041,7 @@ def logs(
2024
2041
  sync_down: bool,
2025
2042
  status: bool, # pylint: disable=redefined-outer-name
2026
2043
  follow: bool,
2044
+ tail: int,
2027
2045
  ):
2028
2046
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
2029
2047
  """Tail the log of a job.
@@ -2090,7 +2108,7 @@ def logs(
2090
2108
  click.secho(f'Job {id_str}not found', fg='red')
2091
2109
  sys.exit(1)
2092
2110
 
2093
- core.tail_logs(cluster, job_id, follow)
2111
+ core.tail_logs(cluster, job_id, follow, tail)
2094
2112
 
2095
2113
 
2096
2114
  @cli.command()
@@ -3667,6 +3685,8 @@ def jobs_launch(
3667
3685
 
3668
3686
  click.secho(f'Managed job {dag.name!r} will be launched on (estimated):',
3669
3687
  fg='cyan')
3688
+ dag, _ = admin_policy_utils.apply(
3689
+ dag, use_mutated_config_in_current_request=False)
3670
3690
  dag = sky.optimize(dag)
3671
3691
 
3672
3692
  if not yes:
@@ -4145,6 +4165,8 @@ def serve_up(
4145
4165
  fg='cyan')
4146
4166
  with sky.Dag() as dag:
4147
4167
  dag.add(task)
4168
+ dag, _ = admin_policy_utils.apply(
4169
+ dag, use_mutated_config_in_current_request=False)
4148
4170
  sky.optimize(dag)
4149
4171
 
4150
4172
  if not yes:
@@ -4261,6 +4283,8 @@ def serve_update(
4261
4283
  fg='cyan')
4262
4284
  with sky.Dag() as dag:
4263
4285
  dag.add(task)
4286
+ dag, _ = admin_policy_utils.apply(
4287
+ dag, use_mutated_config_in_current_request=False)
4264
4288
  sky.optimize(dag)
4265
4289
 
4266
4290
  if not yes:
sky/clouds/oci.py CHANGED
@@ -31,6 +31,7 @@ from sky import status_lib
31
31
  from sky.adaptors import oci as oci_adaptor
32
32
  from sky.clouds import service_catalog
33
33
  from sky.clouds.utils import oci_utils
34
+ from sky.provision.oci.query_utils import query_helper
34
35
  from sky.utils import common_utils
35
36
  from sky.utils import resources_utils
36
37
  from sky.utils import ux_utils
@@ -60,6 +61,9 @@ class OCI(clouds.Cloud):
60
61
  {resources_utils.DiskTier.ULTRA})
61
62
  _BEST_DISK_TIER = resources_utils.DiskTier.HIGH
62
63
 
64
+ PROVISIONER_VERSION = clouds.ProvisionerVersion.SKYPILOT
65
+ STATUS_VERSION = clouds.StatusVersion.SKYPILOT
66
+
63
67
  @classmethod
64
68
  def _unsupported_features_for_resources(
65
69
  cls, resources: 'resources_lib.Resources'
@@ -433,7 +437,7 @@ class OCI(clouds.Cloud):
433
437
  return True, None
434
438
  except (oci_adaptor.oci.exceptions.ConfigFileNotFound,
435
439
  oci_adaptor.oci.exceptions.InvalidConfig,
436
- oci_adaptor.service_exception()) as e:
440
+ oci_adaptor.oci.exceptions.ServiceError) as e:
437
441
  return False, (
438
442
  f'OCI credential is not correctly set. '
439
443
  f'Check the credential file at {conf_file}\n'
@@ -597,25 +601,11 @@ class OCI(clouds.Cloud):
597
601
  region: Optional[str], zone: Optional[str],
598
602
  **kwargs) -> List[status_lib.ClusterStatus]:
599
603
  del zone, kwargs # Unused.
600
- # Check the lifecycleState definition from the page
601
- # https://docs.oracle.com/en-us/iaas/api/#/en/iaas/latest/Instance/
602
- status_map = {
603
- 'PROVISIONING': status_lib.ClusterStatus.INIT,
604
- 'STARTING': status_lib.ClusterStatus.INIT,
605
- 'RUNNING': status_lib.ClusterStatus.UP,
606
- 'STOPPING': status_lib.ClusterStatus.STOPPED,
607
- 'STOPPED': status_lib.ClusterStatus.STOPPED,
608
- 'TERMINATED': None,
609
- 'TERMINATING': None,
610
- }
611
-
612
- # pylint: disable=import-outside-toplevel
613
- from sky.skylet.providers.oci.query_helper import oci_query_helper
614
604
 
615
605
  status_list = []
616
606
  try:
617
- vms = oci_query_helper.query_instances_by_tags(
618
- tag_filters=tag_filters, region=region)
607
+ vms = query_helper.query_instances_by_tags(tag_filters=tag_filters,
608
+ region=region)
619
609
  except Exception as e: # pylint: disable=broad-except
620
610
  with ux_utils.print_exception_no_traceback():
621
611
  raise exceptions.ClusterStatusFetchingError(
@@ -625,9 +615,9 @@ class OCI(clouds.Cloud):
625
615
 
626
616
  for node in vms:
627
617
  vm_status = node.lifecycle_state
628
- if vm_status in status_map:
629
- sky_status = status_map[vm_status]
630
- if sky_status is not None:
631
- status_list.append(sky_status)
618
+ sky_status = oci_utils.oci_config.STATE_MAPPING_OCI_TO_SKY.get(
619
+ vm_status, None)
620
+ if sky_status is not None:
621
+ status_list.append(sky_status)
632
622
 
633
623
  return status_list
@@ -66,7 +66,7 @@ def _get_df() -> 'pd.DataFrame':
66
66
  logger.debug(f'It is OK goes here when testing: {str(e)}')
67
67
  subscribed_regions = []
68
68
 
69
- except oci_adaptor.service_exception() as e:
69
+ except oci_adaptor.oci.exceptions.ServiceError as e:
70
70
  # Should never expect going here. However, we still catch
71
71
  # it so that if any OCI call failed, the program can still
72
72
  # proceed with try-and-error way.
@@ -5,13 +5,14 @@ History:
5
5
  - Hysun He (hysun.he@oracle.com) @ Oct, 2024: Add default image OS
6
6
  configuration.
7
7
  """
8
- import logging
9
8
  import os
10
9
 
10
+ from sky import sky_logging
11
11
  from sky import skypilot_config
12
+ from sky import status_lib
12
13
  from sky.utils import resources_utils
13
14
 
14
- logger = logging.getLogger(__name__)
15
+ logger = sky_logging.init_logger(__name__)
15
16
 
16
17
 
17
18
  class OCIConfig:
@@ -77,6 +78,19 @@ class OCIConfig:
77
78
  resources_utils.DiskTier.HIGH: DISK_TIER_HIGH,
78
79
  }
79
80
 
81
+ # Oracle instance's lifecycle state to sky state mapping.
82
+ # For Oracle VM instance's lifecyle state, please refer to the link:
83
+ # https://docs.oracle.com/en-us/iaas/api/#/en/iaas/latest/Instance/
84
+ STATE_MAPPING_OCI_TO_SKY = {
85
+ 'PROVISIONING': status_lib.ClusterStatus.INIT,
86
+ 'STARTING': status_lib.ClusterStatus.INIT,
87
+ 'RUNNING': status_lib.ClusterStatus.UP,
88
+ 'STOPPING': status_lib.ClusterStatus.STOPPED,
89
+ 'STOPPED': status_lib.ClusterStatus.STOPPED,
90
+ 'TERMINATED': None,
91
+ 'TERMINATING': None,
92
+ }
93
+
80
94
  @classmethod
81
95
  def get_compartment(cls, region):
82
96
  # Allow task(cluster)-specific compartment/VCN parameters.
sky/core.py CHANGED
@@ -742,7 +742,8 @@ def cancel(
742
742
  @usage_lib.entrypoint
743
743
  def tail_logs(cluster_name: str,
744
744
  job_id: Optional[int],
745
- follow: bool = True) -> None:
745
+ follow: bool = True,
746
+ tail: int = 0) -> None:
746
747
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
747
748
  """Tail the logs of a job.
748
749
 
@@ -775,7 +776,7 @@ def tail_logs(cluster_name: str,
775
776
  f'{colorama.Style.RESET_ALL}')
776
777
 
777
778
  usage_lib.record_cluster_name_for_current_operation(cluster_name)
778
- backend.tail_logs(handle, job_id, follow=follow)
779
+ backend.tail_logs(handle, job_id, follow=follow, tail=tail)
779
780
 
780
781
 
781
782
  @usage_lib.entrypoint
sky/dag.py CHANGED
@@ -23,6 +23,7 @@ class Dag:
23
23
 
24
24
  self.graph = nx.DiGraph()
25
25
  self.name: Optional[str] = None
26
+ self.policy_applied: bool = False
26
27
 
27
28
  def add(self, task: 'task.Task') -> None:
28
29
  self.graph.add_node(task)
@@ -55,21 +56,25 @@ class Dag:
55
56
  return self.graph
56
57
 
57
58
  def is_chain(self) -> bool:
58
- # NOTE: this method assumes that the graph has no cycle.
59
- is_chain = True
60
- visited_zero_out_degree = False
61
- for node in self.graph.nodes:
62
- out_degree = self.graph.out_degree(node)
63
- if out_degree > 1:
64
- is_chain = False
65
- break
66
- elif out_degree == 0:
67
- if visited_zero_out_degree:
68
- is_chain = False
69
- break
70
- else:
71
- visited_zero_out_degree = True
72
- return is_chain
59
+ """Check if the DAG is a linear chain of tasks."""
60
+
61
+ nodes = list(self.graph.nodes)
62
+
63
+ if len(nodes) == 0:
64
+ return True
65
+
66
+ in_degrees = [self.graph.in_degree(node) for node in nodes]
67
+ out_degrees = [self.graph.out_degree(node) for node in nodes]
68
+
69
+ # Check out-degrees: all <= 1 and exactly one node has out_degree == 0
70
+ out_degree_condition = (all(degree <= 1 for degree in out_degrees) and
71
+ sum(degree == 0 for degree in out_degrees) == 1)
72
+
73
+ # Check in-degrees: all <= 1 and exactly one node has in_degree == 0
74
+ in_degree_condition = (all(degree <= 1 for degree in in_degrees) and
75
+ sum(degree == 0 for degree in in_degrees) == 1)
76
+
77
+ return out_degree_condition and in_degree_condition
73
78
 
74
79
 
75
80
  class _DagContext(threading.local):
@@ -276,23 +276,11 @@ def get_mounting_command(
276
276
  script = get_mounting_script(mount_path, mount_cmd, install_cmd,
277
277
  version_check_cmd)
278
278
 
279
- # TODO(romilb): Get direct bash script to work like so:
280
- # command = f'bash <<-\EOL' \
281
- # f'{script}' \
282
- # 'EOL'
283
-
284
- # TODO(romilb): This heredoc should have EOF after script, but it
285
- # fails with sky's ssh pipeline. Instead, we don't use EOF and use )
286
- # as the end of heredoc. This raises a warning (here-document delimited
287
- # by end-of-file) that can be safely ignored.
288
-
289
279
  # While these commands are run sequentially for each storage object,
290
280
  # we add random int to be on the safer side and avoid collisions.
291
281
  script_path = f'~/.sky/mount_{random.randint(0, 1000000)}.sh'
292
- first_line = r'(cat <<-\EOF > {}'.format(script_path)
293
- command = (f'{first_line}'
294
- f'{script}'
295
- f') && chmod +x {script_path}'
296
- f' && bash {script_path}'
297
- f' && rm {script_path}')
282
+ command = (f'echo {shlex.quote(script)} > {script_path} && '
283
+ f'chmod +x {script_path} && '
284
+ f'bash {script_path} && '
285
+ f'rm {script_path}')
298
286
  return command
sky/exceptions.py CHANGED
@@ -3,6 +3,8 @@ import enum
3
3
  import typing
4
4
  from typing import List, Optional, Sequence
5
5
 
6
+ from sky.utils import env_options
7
+
6
8
  if typing.TYPE_CHECKING:
7
9
  from sky import status_lib
8
10
  from sky.backends import backend
@@ -104,7 +106,8 @@ class CommandError(Exception):
104
106
  if not command:
105
107
  message = error_msg
106
108
  else:
107
- if len(command) > 100:
109
+ if (len(command) > 100 and
110
+ not env_options.Options.SHOW_DEBUG_INFO.get()):
108
111
  # Chunck the command to avoid overflow.
109
112
  command = command[:100] + '...'
110
113
  message = (f'Command {command} failed with return code '
sky/execution.py CHANGED
@@ -160,14 +160,16 @@ def _execute(
160
160
  """
161
161
 
162
162
  dag = dag_utils.convert_entrypoint_to_dag(entrypoint)
163
- dag, _ = admin_policy_utils.apply(
164
- dag,
165
- request_options=admin_policy.RequestOptions(
166
- cluster_name=cluster_name,
167
- idle_minutes_to_autostop=idle_minutes_to_autostop,
168
- down=down,
169
- dryrun=dryrun,
170
- ))
163
+ if not dag.policy_applied:
164
+ dag, _ = admin_policy_utils.apply(
165
+ dag,
166
+ request_options=admin_policy.RequestOptions(
167
+ cluster_name=cluster_name,
168
+ idle_minutes_to_autostop=idle_minutes_to_autostop,
169
+ down=down,
170
+ dryrun=dryrun,
171
+ ),
172
+ )
171
173
  assert len(dag) == 1, f'We support 1 task for now. {dag}'
172
174
  task = dag.tasks[0]
173
175
 
sky/jobs/core.py CHANGED
@@ -59,8 +59,10 @@ def launch(
59
59
  """
60
60
  entrypoint = task
61
61
  dag_uuid = str(uuid.uuid4().hex[:4])
62
-
63
62
  dag = dag_utils.convert_entrypoint_to_dag(entrypoint)
63
+ # Always apply the policy again here, even though it might have been applied
64
+ # in the CLI. This is to ensure that we apply the policy to the final DAG
65
+ # and get the mutated config.
64
66
  dag, mutated_user_config = admin_policy_utils.apply(
65
67
  dag, use_mutated_config_in_current_request=False)
66
68
  if not dag.is_chain():
sky/provision/__init__.py CHANGED
@@ -20,6 +20,7 @@ from sky.provision import fluidstack
20
20
  from sky.provision import gcp
21
21
  from sky.provision import kubernetes
22
22
  from sky.provision import lambda_cloud
23
+ from sky.provision import oci
23
24
  from sky.provision import runpod
24
25
  from sky.provision import vsphere
25
26
  from sky.utils import command_runner
@@ -42,8 +42,9 @@ def _skypilot_log_error_and_exit_for_failover(error: str) -> None:
42
42
  Mainly used for handling VPC/subnet errors before nodes are launched.
43
43
  """
44
44
  # NOTE: keep. The backend looks for this to know no nodes are launched.
45
- prefix = 'SKYPILOT_ERROR_NO_NODES_LAUNCHED: '
46
- raise RuntimeError(prefix + error)
45
+ full_error = f'SKYPILOT_ERROR_NO_NODES_LAUNCHED: {error}'
46
+ logger.error(full_error)
47
+ raise RuntimeError(full_error)
47
48
 
48
49
 
49
50
  def bootstrap_instances(
@@ -222,10 +223,27 @@ def _configure_iam_role(iam) -> Dict[str, Any]:
222
223
 
223
224
 
224
225
  @functools.lru_cache(maxsize=128) # Keep bounded.
225
- def _get_route_tables(ec2, vpc_id: Optional[str], main: bool) -> List[Any]:
226
+ def _get_route_tables(ec2, vpc_id: Optional[str], region: str,
227
+ main: bool) -> List[Any]:
228
+ """Get route tables associated with a VPC and region
229
+
230
+ Args:
231
+ ec2: ec2 resource object
232
+ vpc_id: vpc_id is optional, if not provided, all route tables in the
233
+ region will be returned
234
+ region: region is mandatory to allow the lru cache
235
+ to return the corect results
236
+ main: if True, only main route tables will be returned otherwise
237
+ only non-main route tables will be returned
238
+
239
+ Returns:
240
+ A list of route tables associated with the options VPC and region
241
+ """
226
242
  filters = [{'Name': 'association.main', 'Values': [str(main).lower()]}]
227
243
  if vpc_id is not None:
228
244
  filters.append({'Name': 'vpc-id', 'Values': [vpc_id]})
245
+ logger.debug(
246
+ f'Getting route tables with filters: {filters} in region: {region}')
229
247
  return ec2.meta.client.describe_route_tables(Filters=filters).get(
230
248
  'RouteTables', [])
231
249
 
@@ -238,7 +256,8 @@ def _is_subnet_public(ec2, subnet_id, vpc_id: Optional[str]) -> bool:
238
256
  https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html
239
257
  """
240
258
  # Get the route tables associated with the subnet
241
- all_route_tables = _get_route_tables(ec2, vpc_id, main=False)
259
+ region = ec2.meta.client.meta.region_name
260
+ all_route_tables = _get_route_tables(ec2, vpc_id, region, main=False)
242
261
  route_tables = [
243
262
  rt for rt in all_route_tables
244
263
  # An RT can be associated with multiple subnets, i.e.,
@@ -267,7 +286,8 @@ def _is_subnet_public(ec2, subnet_id, vpc_id: Optional[str]) -> bool:
267
286
  # subnets. Since the associations are implicit, the filter above won't find
268
287
  # any. Check there exists a main route table with routes pointing to an IGW.
269
288
  logger.debug('Checking main route table')
270
- main_route_tables = _get_route_tables(ec2, vpc_id, main=True)
289
+ region = ec2.meta.client.meta.region_name
290
+ main_route_tables = _get_route_tables(ec2, vpc_id, region, main=True)
271
291
  return _has_igw_route(main_route_tables)
272
292
 
273
293
 
@@ -0,0 +1,15 @@
1
+ """OCI provisioner for SkyPilot.
2
+
3
+ History:
4
+ - Hysun He (hysun.he@oracle.com) @ Oct.16, 2024: Initial implementation
5
+ """
6
+
7
+ from sky.provision.oci.config import bootstrap_instances
8
+ from sky.provision.oci.instance import cleanup_ports
9
+ from sky.provision.oci.instance import get_cluster_info
10
+ from sky.provision.oci.instance import open_ports
11
+ from sky.provision.oci.instance import query_instances
12
+ from sky.provision.oci.instance import run_instances
13
+ from sky.provision.oci.instance import stop_instances
14
+ from sky.provision.oci.instance import terminate_instances
15
+ from sky.provision.oci.instance import wait_instances
@@ -0,0 +1,51 @@
1
+ """OCI configuration bootstrapping.
2
+
3
+ Creates the resource group and deploys the configuration template to OCI for
4
+ a cluster to be launched.
5
+
6
+ History:
7
+ - Hysun He (hysun.he@oracle.com) @ Oct.16, 2024: Initial implementation
8
+ """
9
+
10
+ from sky import exceptions
11
+ from sky import sky_logging
12
+ from sky.adaptors import oci as oci_adaptor
13
+ from sky.clouds.utils import oci_utils
14
+ from sky.provision import common
15
+ from sky.provision.oci.query_utils import query_helper
16
+
17
+ logger = sky_logging.init_logger(__name__)
18
+
19
+
20
+ @common.log_function_start_end
21
+ def bootstrap_instances(
22
+ region: str, cluster_name_on_cloud: str,
23
+ config: common.ProvisionConfig) -> common.ProvisionConfig:
24
+ """See sky/provision/__init__.py"""
25
+ # OCI module import and oci client
26
+ oci_adaptor.get_core_client(region, oci_utils.oci_config.get_profile())
27
+
28
+ # Find / create a compartment for creating instances.
29
+ compartment = query_helper.find_compartment(region)
30
+
31
+ # Find the configured VCN, or create a new one.
32
+ vcn = query_helper.find_create_vcn_subnet(region)
33
+ if vcn is None:
34
+ # pylint: disable=line-too-long
35
+ raise exceptions.ResourcesUnavailableError(
36
+ 'Failed to create a new VCN, possibly you hit the resource limitation.'
37
+ )
38
+
39
+ node_config = config.node_config
40
+
41
+ # Subscribe the image if it is from Marketplace listing.
42
+ query_helper.subscribe_image(
43
+ compartment_id=compartment,
44
+ listing_id=node_config['AppCatalogListingId'],
45
+ resource_version=node_config['ResourceVersion'],
46
+ region=region,
47
+ )
48
+
49
+ logger.info(f'Using cluster name: {cluster_name_on_cloud}')
50
+
51
+ return config