skypilot-nightly 1.0.0.dev20250513__py3-none-any.whl → 1.0.0.dev20250514__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. sky/__init__.py +2 -2
  2. sky/backends/backend_utils.py +0 -3
  3. sky/backends/cloud_vm_ray_backend.py +22 -10
  4. sky/clouds/gcp.py +24 -8
  5. sky/clouds/service_catalog/data_fetchers/fetch_gcp.py +33 -11
  6. sky/clouds/service_catalog/gcp_catalog.py +7 -1
  7. sky/dashboard/out/404.html +1 -1
  8. sky/dashboard/out/clusters/[cluster]/[job].html +1 -1
  9. sky/dashboard/out/clusters/[cluster].html +1 -1
  10. sky/dashboard/out/clusters.html +1 -1
  11. sky/dashboard/out/index.html +1 -1
  12. sky/dashboard/out/jobs/[job].html +1 -1
  13. sky/dashboard/out/jobs.html +1 -1
  14. sky/global_user_state.py +0 -2
  15. sky/resources.py +4 -0
  16. sky/server/requests/executor.py +22 -114
  17. sky/server/requests/requests.py +0 -15
  18. sky/server/server.py +7 -12
  19. sky/server/uvicorn.py +2 -12
  20. sky/sky_logging.py +2 -40
  21. sky/skylet/log_lib.py +11 -51
  22. sky/templates/nebius-ray.yml.j2 +3 -1
  23. sky/utils/command_runner.py +0 -3
  24. sky/utils/rich_utils.py +37 -81
  25. sky/utils/subprocess_utils.py +2 -8
  26. {skypilot_nightly-1.0.0.dev20250513.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/METADATA +1 -1
  27. {skypilot_nightly-1.0.0.dev20250513.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/RECORD +33 -35
  28. sky/utils/context.py +0 -264
  29. sky/utils/context_utils.py +0 -172
  30. /sky/dashboard/out/_next/static/{2dkponv64SfFShA8Rnw0D → tdxxQrPV6NW90a983oHXe}/_buildManifest.js +0 -0
  31. /sky/dashboard/out/_next/static/{2dkponv64SfFShA8Rnw0D → tdxxQrPV6NW90a983oHXe}/_ssgManifest.js +0 -0
  32. {skypilot_nightly-1.0.0.dev20250513.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/WHEEL +0 -0
  33. {skypilot_nightly-1.0.0.dev20250513.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/entry_points.txt +0 -0
  34. {skypilot_nightly-1.0.0.dev20250513.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/licenses/LICENSE +0 -0
  35. {skypilot_nightly-1.0.0.dev20250513.dist-info → skypilot_nightly-1.0.0.dev20250514.dist-info}/top_level.txt +0 -0
@@ -18,10 +18,7 @@ The number of the workers is determined by the system resources.
18
18
 
19
19
  See the [README.md](../README.md) for detailed architecture of the executor.
20
20
  """
21
- import asyncio
22
21
  import contextlib
23
- import contextvars
24
- import functools
25
22
  import multiprocessing
26
23
  import os
27
24
  import queue as queue_lib
@@ -50,7 +47,6 @@ from sky.server.requests.queues import mp_queue
50
47
  from sky.skylet import constants
51
48
  from sky.utils import annotations
52
49
  from sky.utils import common_utils
53
- from sky.utils import context
54
50
  from sky.utils import subprocess_utils
55
51
  from sky.utils import timeline
56
52
 
@@ -64,6 +60,7 @@ else:
64
60
  from typing_extensions import ParamSpec
65
61
 
66
62
  P = ParamSpec('P')
63
+
67
64
  logger = sky_logging.init_logger(__name__)
68
65
 
69
66
  # On macOS, the default start method for multiprocessing is 'fork', which
@@ -344,114 +341,6 @@ def _request_execution_wrapper(request_id: str,
344
341
  logger.info(f'Request {request_id} finished')
345
342
 
346
343
 
347
- async def execute_request_coroutine(request: api_requests.Request):
348
- """Execute a request in current event loop.
349
-
350
- Similar to _request_execution_wrapper, but executed as coroutine in current
351
- event loop. This is designed for executing tasks that are not CPU
352
- intensive, e.g. sky logs.
353
- """
354
- ctx = context.get()
355
- if ctx is None:
356
- raise ValueError('Context is not initialized')
357
- logger.info(f'Executing request {request.request_id} in coroutine')
358
- func = request.entrypoint
359
- request_body = request.request_body
360
- with api_requests.update_request(request.request_id) as request_task:
361
- request_task.status = api_requests.RequestStatus.RUNNING
362
- # Redirect stdout and stderr to the request log path.
363
- original_output = ctx.redirect_log(request.log_path)
364
- # Override environment variables that backs env_options.Options
365
- # TODO(aylei): compared to process executor, running task in coroutine has
366
- # two issues to fix:
367
- # 1. skypilot config is not contextual
368
- # 2. envs that read directly from os.environ are not contextual
369
- ctx.override_envs(request_body.env_vars)
370
- loop = asyncio.get_running_loop()
371
- pyctx = contextvars.copy_context()
372
- func_call = functools.partial(pyctx.run, func, **request_body.to_kwargs())
373
- fut: asyncio.Future = loop.run_in_executor(None, func_call)
374
-
375
- async def poll_task(request_id: str) -> bool:
376
- request = api_requests.get_request(request_id)
377
- if request is None:
378
- raise RuntimeError('Request not found')
379
-
380
- if request.status == api_requests.RequestStatus.CANCELLED:
381
- ctx.cancel()
382
- return True
383
-
384
- if fut.done():
385
- try:
386
- result = await fut
387
- api_requests.set_request_succeeded(request_id, result)
388
- except asyncio.CancelledError:
389
- # The task is cancelled by ctx.cancel(), where the status
390
- # should already be set to CANCELLED.
391
- pass
392
- except Exception as e: # pylint: disable=broad-except
393
- ctx.redirect_log(original_output)
394
- api_requests.set_request_failed(request_id, e)
395
- logger.error(f'Request {request_id} failed due to '
396
- f'{common_utils.format_exception(e)}')
397
- return True
398
- return False
399
-
400
- try:
401
- while True:
402
- res = await poll_task(request.request_id)
403
- if res:
404
- break
405
- await asyncio.sleep(0.5)
406
- except asyncio.CancelledError:
407
- # Current coroutine is cancelled due to client disconnect, set the
408
- # request status for consistency.
409
- api_requests.set_request_cancelled(request.request_id)
410
- pass
411
- # pylint: disable=broad-except
412
- except (Exception, KeyboardInterrupt, SystemExit) as e:
413
- # Handle any other error
414
- ctx.redirect_log(original_output)
415
- ctx.cancel()
416
- api_requests.set_request_failed(request.request_id, e)
417
- logger.error(f'Request {request.request_id} interrupted due to '
418
- f'unhandled exception: {common_utils.format_exception(e)}')
419
- raise
420
-
421
-
422
- def prepare_request(
423
- request_id: str,
424
- request_name: str,
425
- request_body: payloads.RequestBody,
426
- func: Callable[P, Any],
427
- request_cluster_name: Optional[str] = None,
428
- schedule_type: api_requests.ScheduleType = (api_requests.ScheduleType.LONG),
429
- is_skypilot_system: bool = False,
430
- ) -> api_requests.Request:
431
- """Prepare a request for execution."""
432
- user_id = request_body.env_vars[constants.USER_ID_ENV_VAR]
433
- if is_skypilot_system:
434
- user_id = server_constants.SKYPILOT_SYSTEM_USER_ID
435
- global_user_state.add_or_update_user(
436
- models.User(id=user_id, name=user_id))
437
- request = api_requests.Request(request_id=request_id,
438
- name=server_constants.REQUEST_NAME_PREFIX +
439
- request_name,
440
- entrypoint=func,
441
- request_body=request_body,
442
- status=api_requests.RequestStatus.PENDING,
443
- created_at=time.time(),
444
- schedule_type=schedule_type,
445
- user_id=user_id,
446
- cluster_name=request_cluster_name)
447
-
448
- if not api_requests.create_if_not_exists(request):
449
- raise RuntimeError(f'Request {request_id} already exists.')
450
-
451
- request.log_path.touch()
452
- return request
453
-
454
-
455
344
  def schedule_request(
456
345
  request_id: str,
457
346
  request_name: str,
@@ -483,8 +372,27 @@ def schedule_request(
483
372
  The precondition is waited asynchronously and does not block the
484
373
  caller.
485
374
  """
486
- prepare_request(request_id, request_name, request_body, func,
487
- request_cluster_name, schedule_type, is_skypilot_system)
375
+ user_id = request_body.env_vars[constants.USER_ID_ENV_VAR]
376
+ if is_skypilot_system:
377
+ user_id = server_constants.SKYPILOT_SYSTEM_USER_ID
378
+ global_user_state.add_or_update_user(
379
+ models.User(id=user_id, name=user_id))
380
+ request = api_requests.Request(request_id=request_id,
381
+ name=server_constants.REQUEST_NAME_PREFIX +
382
+ request_name,
383
+ entrypoint=func,
384
+ request_body=request_body,
385
+ status=api_requests.RequestStatus.PENDING,
386
+ created_at=time.time(),
387
+ schedule_type=schedule_type,
388
+ user_id=user_id,
389
+ cluster_name=request_cluster_name)
390
+
391
+ if not api_requests.create_if_not_exists(request):
392
+ logger.debug(f'Request {request_id} already exists.')
393
+ return
394
+
395
+ request.log_path.touch()
488
396
 
489
397
  def enqueue():
490
398
  input_tuple = (request_id, ignore_return_value)
@@ -606,18 +606,3 @@ def set_request_failed(request_id: str, e: BaseException) -> None:
606
606
  assert request_task is not None, request_id
607
607
  request_task.status = RequestStatus.FAILED
608
608
  request_task.set_error(e)
609
-
610
-
611
- def set_request_succeeded(request_id: str, result: Any) -> None:
612
- """Set a request to succeeded and populate the result."""
613
- with update_request(request_id) as request_task:
614
- assert request_task is not None, request_id
615
- request_task.status = RequestStatus.SUCCEEDED
616
- request_task.set_return_value(result)
617
-
618
-
619
- def set_request_cancelled(request_id: str) -> None:
620
- """Set a request to cancelled."""
621
- with update_request(request_id) as request_task:
622
- assert request_task is not None, request_id
623
- request_task.status = RequestStatus.CANCELLED
sky/server/server.py CHANGED
@@ -47,7 +47,6 @@ from sky.usage import usage_lib
47
47
  from sky.utils import admin_policy_utils
48
48
  from sky.utils import common as common_lib
49
49
  from sky.utils import common_utils
50
- from sky.utils import context
51
50
  from sky.utils import dag_utils
52
51
  from sky.utils import env_options
53
52
  from sky.utils import status_lib
@@ -674,28 +673,24 @@ async def logs(
674
673
  # TODO(zhwu): This should wait for the request on the cluster, e.g., async
675
674
  # launch, to finish, so that a user does not need to manually pull the
676
675
  # request status.
677
- # Only initialize the context in logs handler to limit the scope of this
678
- # experimental change.
679
- # TODO(aylei): init in lifespan() to enable SkyPilot context in all APIs.
680
- context.initialize()
681
- request_task = executor.prepare_request(
676
+ executor.schedule_request(
682
677
  request_id=request.state.request_id,
683
678
  request_name='logs',
684
679
  request_body=cluster_job_body,
685
680
  func=core.tail_logs,
681
+ # TODO(aylei): We have tail logs scheduled as SHORT request, because it
682
+ # should be responsive. However, it can be long running if the user's
683
+ # job keeps running, and we should avoid it taking the SHORT worker.
686
684
  schedule_type=requests_lib.ScheduleType.SHORT,
685
+ request_cluster_name=cluster_job_body.cluster_name,
687
686
  )
688
- task = asyncio.create_task(executor.execute_request_coroutine(request_task))
689
687
 
690
- def cancel_task():
691
- task.cancel()
688
+ request_task = requests_lib.get_request(request.state.request_id)
692
689
 
693
- # Cancel the task after the request is done or client disconnects
694
- background_tasks.add_task(cancel_task)
695
690
  # TODO(zhwu): This makes viewing logs in browser impossible. We should adopt
696
691
  # the same approach as /stream.
697
692
  return stream_utils.stream_response(
698
- request_id=request.state.request_id,
693
+ request_id=request_task.request_id,
699
694
  logs_path=request_task.log_path,
700
695
  background_tasks=background_tasks,
701
696
  )
sky/server/uvicorn.py CHANGED
@@ -3,7 +3,6 @@
3
3
  This module is a wrapper around uvicorn to customize the behavior of the
4
4
  server.
5
5
  """
6
- import functools
7
6
  import os
8
7
  import threading
9
8
  from typing import Optional
@@ -11,7 +10,6 @@ from typing import Optional
11
10
  import uvicorn
12
11
  from uvicorn.supervisors import multiprocess
13
12
 
14
- from sky.utils import context_utils
15
13
  from sky.utils import subprocess_utils
16
14
 
17
15
 
@@ -23,27 +21,19 @@ def run(config: uvicorn.Config):
23
21
  # guard by an exception.
24
22
  raise ValueError('Reload is not supported yet.')
25
23
  server = uvicorn.Server(config=config)
26
- run_server_process = functools.partial(_run_server_process, server)
27
24
  try:
28
25
  if config.workers is not None and config.workers > 1:
29
26
  sock = config.bind_socket()
30
- SlowStartMultiprocess(config,
31
- target=run_server_process,
27
+ SlowStartMultiprocess(config, target=server.run,
32
28
  sockets=[sock]).run()
33
29
  else:
34
- run_server_process()
30
+ server.run()
35
31
  finally:
36
32
  # Copied from unvicorn.run()
37
33
  if config.uds and os.path.exists(config.uds):
38
34
  os.remove(config.uds)
39
35
 
40
36
 
41
- def _run_server_process(server: uvicorn.Server, *args, **kwargs):
42
- """Run the server process with contextually aware."""
43
- context_utils.hijack_sys_attrs()
44
- server.run(*args, **kwargs)
45
-
46
-
47
37
  class SlowStartMultiprocess(multiprocess.Multiprocess):
48
38
  """Uvicorn Multiprocess wrapper with slow start.
49
39
 
sky/sky_logging.py CHANGED
@@ -10,7 +10,6 @@ import threading
10
10
  import colorama
11
11
 
12
12
  from sky.skylet import constants
13
- from sky.utils import context
14
13
  from sky.utils import env_options
15
14
  from sky.utils import rich_utils
16
15
 
@@ -48,43 +47,6 @@ class NewLineFormatter(logging.Formatter):
48
47
  return msg
49
48
 
50
49
 
51
- class EnvAwareHandler(rich_utils.RichSafeStreamHandler):
52
- """A handler that awares environment variables.
53
-
54
- This handler dynamically reflects the log level from environment variables.
55
- """
56
-
57
- def __init__(self, stream=None, level=logging.NOTSET, sensitive=False):
58
- super().__init__(stream)
59
- self.level = level
60
- self._sensitive = sensitive
61
-
62
- @property
63
- def level(self):
64
- # Only refresh log level if we are in a context, since the log level
65
- # has already been reloaded eagerly in multi-processing. Refresh again
66
- # is a no-op and can be avoided.
67
- # TODO(aylei): unify the mechanism for coroutine context and
68
- # multi-processing.
69
- if context.get() is not None:
70
- if self._sensitive:
71
- # For sensitive logger, suppress debug log despite the
72
- # SKYPILOT_DEBUG env var if SUPPRESS_SENSITIVE_LOG is set
73
- if env_options.Options.SUPPRESS_SENSITIVE_LOG.get():
74
- return logging.INFO
75
- if env_options.Options.SHOW_DEBUG_INFO.get():
76
- return logging.DEBUG
77
- else:
78
- return self._level
79
- else:
80
- return self._level
81
-
82
- @level.setter
83
- def level(self, level):
84
- # pylint: disable=protected-access
85
- self._level = logging._checkLevel(level)
86
-
87
-
88
50
  _root_logger = logging.getLogger('sky')
89
51
  _default_handler = None
90
52
  _logging_config = threading.local()
@@ -105,7 +67,7 @@ def _setup_logger():
105
67
  _root_logger.setLevel(logging.DEBUG)
106
68
  global _default_handler
107
69
  if _default_handler is None:
108
- _default_handler = EnvAwareHandler(sys.stdout)
70
+ _default_handler = rich_utils.RichSafeStreamHandler(sys.stdout)
109
71
  _default_handler.flush = sys.stdout.flush # type: ignore
110
72
  if env_options.Options.SHOW_DEBUG_INFO.get():
111
73
  _default_handler.setLevel(logging.DEBUG)
@@ -125,7 +87,7 @@ def _setup_logger():
125
87
  # for certain loggers.
126
88
  for logger_name in _SENSITIVE_LOGGER:
127
89
  logger = logging.getLogger(logger_name)
128
- handler_to_logger = EnvAwareHandler(sys.stdout, sensitive=True)
90
+ handler_to_logger = rich_utils.RichSafeStreamHandler(sys.stdout)
129
91
  handler_to_logger.flush = sys.stdout.flush # type: ignore
130
92
  logger.addHandler(handler_to_logger)
131
93
  logger.setLevel(logging.INFO)
sky/skylet/log_lib.py CHANGED
@@ -4,7 +4,6 @@ This is a remote utility module that provides logging functionality.
4
4
  """
5
5
  import collections
6
6
  import copy
7
- import functools
8
7
  import io
9
8
  import multiprocessing.pool
10
9
  import os
@@ -22,8 +21,6 @@ import colorama
22
21
  from sky import sky_logging
23
22
  from sky.skylet import constants
24
23
  from sky.skylet import job_lib
25
- from sky.utils import context
26
- from sky.utils import context_utils
27
24
  from sky.utils import log_utils
28
25
  from sky.utils import subprocess_utils
29
26
  from sky.utils import ux_utils
@@ -80,9 +77,6 @@ def _handle_io_stream(io_stream, out_stream, args: _ProcessingArgs):
80
77
  with open(args.log_path, 'a', encoding='utf-8') as fout:
81
78
  with line_processor:
82
79
  while True:
83
- ctx = context.get()
84
- if ctx is not None and ctx.is_canceled():
85
- return
86
80
  line = out_io.readline()
87
81
  if not line:
88
82
  break
@@ -117,29 +111,30 @@ def _handle_io_stream(io_stream, out_stream, args: _ProcessingArgs):
117
111
  return ''.join(out)
118
112
 
119
113
 
120
- def process_subprocess_stream(proc, stdout_stream_handler,
121
- stderr_stream_handler) -> Tuple[str, str]:
122
- """Process the stream of a process in threads, blocking."""
114
+ def process_subprocess_stream(proc, args: _ProcessingArgs) -> Tuple[str, str]:
115
+ """Redirect the process's filtered stdout/stderr to both stream and file"""
123
116
  if proc.stderr is not None:
124
117
  # Asyncio does not work as the output processing can be executed in a
125
118
  # different thread.
126
119
  # selectors is possible to handle the multiplexing of stdout/stderr,
127
120
  # but it introduces buffering making the output not streaming.
128
121
  with multiprocessing.pool.ThreadPool(processes=1) as pool:
129
- stderr_fut = pool.apply_async(stderr_stream_handler,
130
- args=(proc.stderr, sys.stderr))
122
+ err_args = copy.copy(args)
123
+ err_args.line_processor = None
124
+ stderr_fut = pool.apply_async(_handle_io_stream,
125
+ args=(proc.stderr, sys.stderr,
126
+ err_args))
131
127
  # Do not launch a thread for stdout as the rich.status does not
132
128
  # work in a thread, which is used in
133
129
  # log_utils.RayUpLineProcessor.
134
- stdout = stdout_stream_handler(proc.stdout, sys.stdout)
130
+ stdout = _handle_io_stream(proc.stdout, sys.stdout, args)
135
131
  stderr = stderr_fut.get()
136
132
  else:
137
- stdout = stdout_stream_handler(proc.stdout, sys.stdout)
133
+ stdout = _handle_io_stream(proc.stdout, sys.stdout, args)
138
134
  stderr = ''
139
135
  return stdout, stderr
140
136
 
141
137
 
142
- @context_utils.cancellation_guard
143
138
  def run_with_log(
144
139
  cmd: Union[List[str], str],
145
140
  log_path: str,
@@ -181,12 +176,7 @@ def run_with_log(
181
176
  # Redirect stderr to stdout when using ray, to preserve the order of
182
177
  # stdout and stderr.
183
178
  stdout_arg = stderr_arg = None
184
- ctx = context.get()
185
- if process_stream or ctx is not None:
186
- # Capture stdout/stderr of the subprocess if:
187
- # 1. Post-processing is needed (process_stream=True)
188
- # 2. Potential contextual handling is needed (ctx is not None)
189
- # TODO(aylei): can we always capture the stdout/stderr?
179
+ if process_stream:
190
180
  stdout_arg = subprocess.PIPE
191
181
  stderr_arg = subprocess.PIPE if not with_ray else subprocess.STDOUT
192
182
  # Use stdin=subprocess.DEVNULL by default, as allowing inputs will mess up
@@ -207,8 +197,6 @@ def run_with_log(
207
197
  subprocess_utils.kill_process_daemon(proc.pid)
208
198
  stdout = ''
209
199
  stderr = ''
210
- stdout_stream_handler = None
211
- stderr_stream_handler = None
212
200
 
213
201
  if process_stream:
214
202
  if skip_lines is None:
@@ -235,35 +223,7 @@ def run_with_log(
235
223
  replace_crlf=with_ray,
236
224
  streaming_prefix=streaming_prefix,
237
225
  )
238
- stdout_stream_handler = functools.partial(
239
- _handle_io_stream,
240
- args=args,
241
- )
242
- if proc.stderr is not None:
243
- err_args = copy.copy(args)
244
- err_args.line_processor = None
245
- stderr_stream_handler = functools.partial(
246
- _handle_io_stream,
247
- args=err_args,
248
- )
249
- if ctx is not None:
250
- # When runs in a coroutine, always process the subprocess
251
- # stream to:
252
- # 1. handle context cancellation
253
- # 2. redirect subprocess stdout/stderr to the contextual
254
- # stdout/stderr of current coroutine.
255
- stdout, stderr = context_utils.pipe_and_wait_process(
256
- ctx,
257
- proc,
258
- cancel_callback=subprocess_utils.kill_children_processes,
259
- stdout_stream_handler=stdout_stream_handler,
260
- stderr_stream_handler=stderr_stream_handler)
261
- elif process_stream:
262
- # When runs in a process, only process subprocess stream if
263
- # necessary to avoid unnecessary stream handling overhead.
264
- stdout, stderr = process_subprocess_stream(
265
- proc, stdout_stream_handler, stderr_stream_handler)
266
- # Ensure returncode is set.
226
+ stdout, stderr = process_subprocess_stream(proc, args)
267
227
  proc.wait()
268
228
  if require_outputs:
269
229
  return proc.returncode, stdout, stderr
@@ -105,6 +105,7 @@ file_mounts: {
105
105
  "{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}",
106
106
  {%- for remote_path, local_path in credentials.items() %}
107
107
  "{{remote_path}}": "{{local_path}}",
108
+ "~/.ssh/sky-cluster-key": "{{ssh_private_key}}",
108
109
  {%- endfor %}
109
110
  }
110
111
 
@@ -120,6 +121,7 @@ initialization_commands: []
120
121
  # Increment the following for catching performance bugs easier:
121
122
  # current num items (num SSH connections): 1
122
123
  setup_commands:
124
+ # Add ~/.ssh/sky-cluster-key to SSH config to allow nodes within a cluster to connect to each other
123
125
  # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.)
124
126
  # Create ~/.ssh/config file in case the file does not exist in the image.
125
127
  # Line 'rm ..': there is another installation of pip.
@@ -142,6 +144,6 @@ setup_commands:
142
144
  {{ ray_skypilot_installation_commands }}
143
145
  sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
144
146
  sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
145
- mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config;
147
+ mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n IdentityFile ~/.ssh/sky-cluster-key\n IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;
146
148
  [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf');
147
149
  {{ ssh_max_sessions_config }}
@@ -11,7 +11,6 @@ from sky import sky_logging
11
11
  from sky.skylet import constants
12
12
  from sky.skylet import log_lib
13
13
  from sky.utils import common_utils
14
- from sky.utils import context_utils
15
14
  from sky.utils import control_master_utils
16
15
  from sky.utils import subprocess_utils
17
16
  from sky.utils import timeline
@@ -575,7 +574,6 @@ class SSHCommandRunner(CommandRunner):
575
574
  shell=True)
576
575
 
577
576
  @timeline.event
578
- @context_utils.cancellation_guard
579
577
  def run(
580
578
  self,
581
579
  cmd: Union[str, List[str]],
@@ -781,7 +779,6 @@ class KubernetesCommandRunner(CommandRunner):
781
779
  return kubectl_cmd
782
780
 
783
781
  @timeline.event
784
- @context_utils.cancellation_guard
785
782
  def run(
786
783
  self,
787
784
  cmd: Union[str, List[str]],