PyPI - skypilot-nightly - Versions diffs - 1.0.0.dev20251027__py3-none-any.whl → 1.0.0.dev20251029__py3-none-any.whl - Mend

skypilot-nightly 1.0.0.dev20251027py3-none-any.whl → 1.0.0.dev20251029py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of skypilot-nightly might be problematic. Click here for more details.

Files changed (81) hide show

sky/schemas/api/responses.py CHANGED Viewed

@@ -90,7 +90,7 @@ class StatusResponse(ResponseBaseModel):
     # This is an internally facing field anyway, so it's less
     # of a problem that it's not typed.
     handle: Optional[Any] = None
-    last_use: str
+    last_use: Optional[str] = None
     status: status_lib.ClusterStatus
     autostop: int
     to_down: bool
@@ -98,11 +98,8 @@ class StatusResponse(ResponseBaseModel):
     # metadata is a JSON, so we use Any here.
     metadata: Optional[Dict[str, Any]] = None
     cluster_hash: str
-    # pydantic cannot generate the pydantic-core schema for
-    # storage_mounts_metadata, so we use Any here.
-    storage_mounts_metadata: Optional[Dict[str, Any]] = None
     cluster_ever_up: bool
-    status_updated_at: int
+    status_updated_at: Optional[int] = None
     user_hash: str
     user_name: str
     config_hash: Optional[str] = None

sky/serve/replica_managers.py CHANGED Viewed

@@ -495,8 +495,8 @@ class ReplicaInfo:
                 info_dict['cloud'] = repr(handle.launched_resources.cloud)
                 info_dict['region'] = handle.launched_resources.region
                 info_dict['resources_str'] = (
-                    resources_utils.get_readable_resources_repr(handle,
-                                                                simplify=True))
+                    resources_utils.get_readable_resources_repr(
+                        handle, simplified_only=True)[0])
         return info_dict
     def __repr__(self) -> str:

sky/serve/serve_utils.py CHANGED Viewed

@@ -1550,8 +1550,15 @@ def _format_replica_table(replica_records: List[Dict[str, Any]], show_all: bool,
             'handle']
         if replica_handle is not None:
             infra = replica_handle.launched_resources.infra.formatted_str()
-            resources_str = resources_utils.get_readable_resources_repr(
-                replica_handle, simplify=not show_all)
+            simplified = not show_all
+            resources_str_simple, resources_str_full = (
+                resources_utils.get_readable_resources_repr(
+                    replica_handle, simplified_only=simplified))
+            if simplified:
+                resources_str = resources_str_simple
+            else:
+                assert resources_str_full is not None
+                resources_str = resources_str_full
         replica_values = [
             service_name,

sky/server/requests/payloads.py CHANGED Viewed

@@ -319,6 +319,8 @@ class StatusBody(RequestBody):
     # Only return fields that are needed for the
     # dashboard / CLI summary response
     summary_response: bool = False
+    # Include the cluster handle in the response
+    include_handle: bool = True
 class StartBody(RequestBody):

sky/server/requests/requests.py CHANGED Viewed

@@ -5,7 +5,6 @@ import contextlib
 import dataclasses
 import enum
 import functools
-import json
 import os
 import pathlib
 import shutil
@@ -21,6 +20,7 @@ import uuid
 import anyio
 import colorama
 import filelock
+import orjson
 from sky import exceptions
 from sky import global_user_state
@@ -213,8 +213,8 @@ class Request:
             entrypoint=self.entrypoint.__name__,
             request_body=self.request_body.model_dump_json(),
             status=self.status.value,
-            return_value=json.dumps(None),
-            error=json.dumps(None),
+            return_value=orjson.dumps(None).decode('utf-8'),
+            error=orjson.dumps(None).decode('utf-8'),
             pid=None,
             created_at=self.created_at,
             schedule_type=self.schedule_type.value,
@@ -237,8 +237,8 @@ class Request:
                 entrypoint=encoders.pickle_and_encode(self.entrypoint),
                 request_body=encoders.pickle_and_encode(self.request_body),
                 status=self.status.value,
-                return_value=json.dumps(self.return_value),
-                error=json.dumps(self.error),
+                return_value=orjson.dumps(self.return_value).decode('utf-8'),
+                error=orjson.dumps(self.error).decode('utf-8'),
                 pid=self.pid,
                 created_at=self.created_at,
                 schedule_type=self.schedule_type.value,
@@ -270,8 +270,8 @@ class Request:
                 entrypoint=decoders.decode_and_unpickle(payload.entrypoint),
                 request_body=decoders.decode_and_unpickle(payload.request_body),
                 status=RequestStatus(payload.status),
-                return_value=json.loads(payload.return_value),
-                error=json.loads(payload.error),
+                return_value=orjson.loads(payload.return_value),
+                error=orjson.loads(payload.error),
                 pid=payload.pid,
                 created_at=payload.created_at,
                 schedule_type=ScheduleType(payload.schedule_type),
@@ -328,10 +328,11 @@ def encode_requests(requests: List[Request]) -> List[payloads.RequestPayload]:
             entrypoint=request.entrypoint.__name__
             if request.entrypoint is not None else '',
             request_body=request.request_body.model_dump_json()
-            if request.request_body is not None else json.dumps(None),
+            if request.request_body is not None else
+            orjson.dumps(None).decode('utf-8'),
             status=request.status.value,
-            return_value=json.dumps(None),
-            error=json.dumps(None),
+            return_value=orjson.dumps(None).decode('utf-8'),
+            error=orjson.dumps(None).decode('utf-8'),
             pid=None,
             created_at=request.created_at,
             schedule_type=request.schedule_type.value,
@@ -372,9 +373,9 @@ def _update_request_row_fields(
     if 'user_id' not in fields:
         content['user_id'] = ''
     if 'return_value' not in fields:
-        content['return_value'] = json.dumps(None)
+        content['return_value'] = orjson.dumps(None).decode('utf-8')
     if 'error' not in fields:
-        content['error'] = json.dumps(None)
+        content['error'] = orjson.dumps(None).decode('utf-8')
     if 'schedule_type' not in fields:
         content['schedule_type'] = ScheduleType.SHORT.value
     # Optional fields in RequestPayload
@@ -393,94 +394,6 @@ def _update_request_row_fields(
     return tuple(content[col] for col in REQUEST_COLUMNS)
-def kill_cluster_requests(cluster_name: str, exclude_request_name: str):
-    """Kill all pending and running requests for a cluster.
-    Args:
-        cluster_name: the name of the cluster.
-        exclude_request_names: exclude requests with these names. This is to
-            prevent killing the caller request.
-    """
-    request_ids = [
-        request_task.request_id
-        for request_task in get_request_tasks(req_filter=RequestTaskFilter(
-            status=[RequestStatus.PENDING, RequestStatus.RUNNING],
-            exclude_request_names=[exclude_request_name],
-            cluster_names=[cluster_name],
-            fields=['request_id']))
-    ]
-    kill_requests(request_ids)
-def kill_requests_with_prefix(request_ids: Optional[List[str]] = None,
-                              user_id: Optional[str] = None) -> List[str]:
-    """Kill requests with a given request ID prefix."""
-    expanded_request_ids: Optional[List[str]] = None
-    if request_ids is not None:
-        expanded_request_ids = []
-        for request_id in request_ids:
-            request_tasks = get_requests_with_prefix(request_id,
-                                                     fields=['request_id'])
-            if request_tasks is None or len(request_tasks) == 0:
-                continue
-            if len(request_tasks) > 1:
-                raise ValueError(f'Multiple requests found for '
-                                 f'request ID prefix: {request_id}')
-            expanded_request_ids.append(request_tasks[0].request_id)
-    return kill_requests(request_ids=expanded_request_ids, user_id=user_id)
-def kill_requests(request_ids: Optional[List[str]] = None,
-                  user_id: Optional[str] = None) -> List[str]:
-    """Kill a SkyPilot API request and set its status to cancelled.
-    Args:
-        request_ids: The request IDs to kill. If None, all requests for the
-            user are killed.
-        user_id: The user ID to kill requests for. If None, all users are
-            killed.
-    Returns:
-        A list of request IDs that were cancelled.
-    """
-    if request_ids is None:
-        request_ids = [
-            request_task.request_id
-            for request_task in get_request_tasks(req_filter=RequestTaskFilter(
-                status=[RequestStatus.PENDING, RequestStatus.RUNNING],
-                # Avoid cancelling the cancel request itself.
-                exclude_request_names=['sky.api_cancel'],
-                user_id=user_id,
-                fields=['request_id']))
-        ]
-    cancelled_request_ids = []
-    for request_id in request_ids:
-        with update_request(request_id) as request_record:
-            if request_record is None:
-                logger.debug(f'No request ID {request_id}')
-                continue
-            # Skip internal requests. The internal requests are scheduled with
-            # request_id in range(len(INTERNAL_REQUEST_EVENTS)).
-            if request_record.request_id in set(
-                    event.id for event in daemons.INTERNAL_REQUEST_DAEMONS):
-                continue
-            if request_record.status > RequestStatus.RUNNING:
-                logger.debug(f'Request {request_id} already finished')
-                continue
-            if request_record.pid is not None:
-                logger.debug(f'Killing request process {request_record.pid}')
-                # Use SIGTERM instead of SIGKILL:
-                # - The executor can handle SIGTERM gracefully
-                # - After SIGTERM, the executor can reuse the request process
-                #   for other requests, avoiding the overhead of forking a new
-                #   process for each request.
-                os.kill(request_record.pid, signal.SIGTERM)
-            request_record.status = RequestStatus.CANCELLED
-            request_record.finished_at = time.time()
-            cancelled_request_ids.append(request_id)
-    return cancelled_request_ids
 def create_table(cursor, conn):
     # Enable WAL mode to avoid locking issues.
     # See: issue #1441 and PR #1509
@@ -624,6 +537,128 @@ def request_lock_path(request_id: str) -> str:
     return os.path.join(lock_path, f'.{request_id}.lock')
+def kill_cluster_requests(cluster_name: str, exclude_request_name: str):
+    """Kill all pending and running requests for a cluster.
+    Args:
+        cluster_name: the name of the cluster.
+        exclude_request_names: exclude requests with these names. This is to
+            prevent killing the caller request.
+    """
+    request_ids = [
+        request_task.request_id
+        for request_task in get_request_tasks(req_filter=RequestTaskFilter(
+            status=[RequestStatus.PENDING, RequestStatus.RUNNING],
+            exclude_request_names=[exclude_request_name],
+            cluster_names=[cluster_name],
+            fields=['request_id']))
+    ]
+    _kill_requests(request_ids)
+def kill_requests_with_prefix(request_ids: Optional[List[str]] = None,
+                              user_id: Optional[str] = None) -> List[str]:
+    """Kill requests with a given request ID prefix."""
+    expanded_request_ids: Optional[List[str]] = None
+    if request_ids is not None:
+        expanded_request_ids = []
+        for request_id in request_ids:
+            request_tasks = get_requests_with_prefix(request_id,
+                                                     fields=['request_id'])
+            if request_tasks is None or len(request_tasks) == 0:
+                continue
+            if len(request_tasks) > 1:
+                raise ValueError(f'Multiple requests found for '
+                                 f'request ID prefix: {request_id}')
+            expanded_request_ids.append(request_tasks[0].request_id)
+    return _kill_requests(request_ids=expanded_request_ids, user_id=user_id)
+def _should_kill_request(request_id: str,
+                         request_record: Optional[Request]) -> bool:
+    if request_record is None:
+        logger.debug(f'No request ID {request_id}')
+        return False
+    # Skip internal requests. The internal requests are scheduled with
+    # request_id in range(len(INTERNAL_REQUEST_EVENTS)).
+    if request_record.request_id in set(
+            event.id for event in daemons.INTERNAL_REQUEST_DAEMONS):
+        return False
+    if request_record.status > RequestStatus.RUNNING:
+        logger.debug(f'Request {request_id} already finished')
+        return False
+    return True
+def _kill_requests(request_ids: Optional[List[str]] = None,
+                   user_id: Optional[str] = None) -> List[str]:
+    """Kill a SkyPilot API request and set its status to cancelled.
+    Args:
+        request_ids: The request IDs to kill. If None, all requests for the
+            user are killed.
+        user_id: The user ID to kill requests for. If None, all users are
+            killed.
+    Returns:
+        A list of request IDs that were cancelled.
+    """
+    if request_ids is None:
+        request_ids = [
+            request_task.request_id
+            for request_task in get_request_tasks(req_filter=RequestTaskFilter(
+                status=[RequestStatus.PENDING, RequestStatus.RUNNING],
+                # Avoid cancelling the cancel request itself.
+                exclude_request_names=['sky.api_cancel'],
+                user_id=user_id,
+                fields=['request_id']))
+        ]
+    cancelled_request_ids = []
+    for request_id in request_ids:
+        with update_request(request_id) as request_record:
+            if not _should_kill_request(request_id, request_record):
+                continue
+            if request_record.pid is not None:
+                logger.debug(f'Killing request process {request_record.pid}')
+                # Use SIGTERM instead of SIGKILL:
+                # - The executor can handle SIGTERM gracefully
+                # - After SIGTERM, the executor can reuse the request process
+                #   for other requests, avoiding the overhead of forking a new
+                #   process for each request.
+                os.kill(request_record.pid, signal.SIGTERM)
+            request_record.status = RequestStatus.CANCELLED
+            request_record.finished_at = time.time()
+            cancelled_request_ids.append(request_id)
+    return cancelled_request_ids
+@init_db_async
+@asyncio_utils.shield
+async def kill_request_async(request_id: str) -> bool:
+    """Kill a SkyPilot API request and set its status to cancelled.
+    Returns:
+        True if the request was killed, False otherwise.
+    """
+    async with filelock.AsyncFileLock(request_lock_path(request_id)):
+        request = await _get_request_no_lock_async(request_id)
+        if not _should_kill_request(request_id, request):
+            return False
+        assert request is not None
+        if request.pid is not None:
+            logger.debug(f'Killing request process {request.pid}')
+            # Use SIGTERM instead of SIGKILL:
+            # - The executor can handle SIGTERM gracefully
+            # - After SIGTERM, the executor can reuse the request process
+            #   for other requests, avoiding the overhead of forking a new
+            #   process for each request.
+            os.kill(request.pid, signal.SIGTERM)
+        request.status = RequestStatus.CANCELLED
+        request.finished_at = time.time()
+        await _add_or_update_request_no_lock_async(request)
+    return True
 @contextlib.contextmanager
 @init_db
 @metrics_lib.time_me
@@ -638,7 +673,7 @@ def update_request(request_id: str) -> Generator[Optional[Request], None, None]:
             _add_or_update_request_no_lock(request)
-@init_db
+@init_db_async
 @metrics_lib.time_me
 @asyncio_utils.shield
 async def update_status_async(request_id: str, status: RequestStatus) -> None:
@@ -650,7 +685,7 @@ async def update_status_async(request_id: str, status: RequestStatus) -> None:
             await _add_or_update_request_no_lock_async(request)
-@init_db
+@init_db_async
 @metrics_lib.time_me
 @asyncio_utils.shield
 async def update_status_msg_async(request_id: str, status_msg: str) -> None:

sky/server/requests/serializers/decoders.py CHANGED Viewed

@@ -60,12 +60,6 @@ def decode_status(
         if 'handle' in cluster and cluster['handle'] is not None:
             cluster['handle'] = decode_and_unpickle(cluster['handle'])
         cluster['status'] = status_lib.ClusterStatus(cluster['status'])
-        # this field is to be deprecated in the future.
-        # do not decode this field if it is not present.
-        if ('storage_mounts_metadata' in cluster and
-                cluster['storage_mounts_metadata'] is not None):
-            cluster['storage_mounts_metadata'] = decode_and_unpickle(
-                cluster['storage_mounts_metadata'])
         if 'is_managed' not in cluster:
             cluster['is_managed'] = False
         response.append(responses.StatusResponse.model_validate(cluster))

sky/server/requests/serializers/encoders.py CHANGED Viewed

@@ -60,13 +60,23 @@ def encode_status(
         clusters: List[responses.StatusResponse]) -> List[Dict[str, Any]]:
     response = []
     for cluster in clusters:
-        response_cluster = cluster.model_dump()
+        response_cluster = cluster.model_dump(exclude_none=True)
+        # These default setting is needed because last_use and status_updated_at
+        # used to be not optional.
+        # TODO(syang): remove this after v0.10.7 or v0.11.0
+        if 'last_use' not in response_cluster:
+            response_cluster['last_use'] = ''
+        if 'status_updated_at' not in response_cluster:
+            response_cluster['status_updated_at'] = 0
         response_cluster['status'] = cluster['status'].value
         handle = serialize_utils.prepare_handle_for_backwards_compatibility(
             cluster['handle'])
         response_cluster['handle'] = pickle_and_encode(handle)
+        # TODO (syang) We still need to return this field for backwards
+        # compatibility.
+        # Remove this field at or after v0.10.7 or v0.11.0
         response_cluster['storage_mounts_metadata'] = pickle_and_encode(
-            response_cluster['storage_mounts_metadata'])
+            None)  # Always returns None.
         response.append(response_cluster)
     return response
@@ -206,10 +216,11 @@ def encode_enabled_clouds(clouds: List['clouds.Cloud']) -> List[str]:
 @register_encoder('storage_ls')
 def encode_storage_ls(
         return_value: List[responses.StorageRecord]) -> List[Dict[str, Any]]:
-    for storage_info in return_value:
+    response_list = [storage_info.model_dump() for storage_info in return_value]
+    for storage_info in response_list:
         storage_info['status'] = storage_info['status'].value
         storage_info['store'] = [store.value for store in storage_info['store']]
-    return [storage_info.model_dump() for storage_info in return_value]
+    return response_list
 @register_encoder('volume_list')
@@ -219,11 +230,11 @@ def encode_volume_list(
 @register_encoder('job_status')
-def encode_job_status(return_value: Dict[int, Any]) -> Dict[int, str]:
+def encode_job_status(return_value: Dict[int, Any]) -> Dict[str, str]:
     for job_id in return_value.keys():
         if return_value[job_id] is not None:
             return_value[job_id] = return_value[job_id].value
-    return return_value
+    return {str(k): v for k, v in return_value.items()}
 @register_encoder('kubernetes_node_info')
@@ -235,3 +246,19 @@ def encode_kubernetes_node_info(
 @register_encoder('endpoints')
 def encode_endpoints(return_value: Dict[int, str]) -> Dict[str, str]:
     return {str(k): v for k, v in return_value.items()}
+@register_encoder('realtime_kubernetes_gpu_availability')
+def encode_realtime_gpu_availability(
+    return_value: List[Tuple[str,
+                             List[Any]]]) -> List[Tuple[str, List[List[Any]]]]:
+    # Convert RealtimeGpuAvailability namedtuples to lists
+    # for JSON serialization.
+    result = []
+    for context, gpu_list in return_value:
+        gpu_availability_list = []
+        for gpu in gpu_list:
+            gpu_list_item = [gpu.gpu, gpu.counts, gpu.capacity, gpu.available]
+            gpu_availability_list.append(gpu_list_item)
+        result.append((context, gpu_availability_list))
+    return result

sky/server/server.py CHANGED Viewed

@@ -25,6 +25,7 @@ import zipfile
 import aiofiles
 import anyio
 import fastapi
+from fastapi import responses as fastapi_responses
 from fastapi.middleware import cors
 import starlette.middleware.base
 import uvloop
@@ -1512,7 +1513,7 @@ async def get_expanded_request_id(request_id: str) -> str:
 # === API server related APIs ===
-@app.get('/api/get')
+@app.get('/api/get', response_class=fastapi_responses.ORJSONResponse)
 async def api_get(request_id: str) -> payloads.RequestPayload:
     """Gets a request with a given request ID prefix."""
     # Validate request_id prefix matches a single request.

sky/server/stream_utils.py CHANGED Viewed

@@ -25,6 +25,8 @@ logger = sky_logging.init_logger(__name__)
 _BUFFER_SIZE = 8 * 1024  # 8KB
 _BUFFER_TIMEOUT = 0.02  # 20ms
 _HEARTBEAT_INTERVAL = 30
+_READ_CHUNK_SIZE = 256 * 1024  # 256KB chunks for file reading
 # If a SHORT request has been stuck in pending for
 # _SHORT_REQUEST_SPINNER_TIMEOUT seconds, we show the waiting spinner
 _SHORT_REQUEST_SPINNER_TIMEOUT = 2
@@ -235,6 +237,9 @@ async def _tail_log_file(
     buffer_bytes = 0
     last_flush_time = asyncio.get_event_loop().time()
+    # Read file in chunks instead of line-by-line for better performance
+    incomplete_line = b''  # Buffer for incomplete lines across chunks
     async def flush_buffer() -> AsyncGenerator[str, None]:
         nonlocal buffer, buffer_bytes, last_flush_time
         if buffer:
@@ -255,8 +260,23 @@ async def _tail_log_file(
             async for chunk in flush_buffer():
                 yield chunk
-        line: Optional[bytes] = await f.readline()
-        if not line:
+        # Read file in chunks for better I/O performance
+        file_chunk: bytes = await f.read(_READ_CHUNK_SIZE)
+        if not file_chunk:
+            # Process any remaining incomplete line
+            if incomplete_line:
+                line_str = incomplete_line.decode('utf-8')
+                if plain_logs:
+                    is_payload, line_str = message_utils.decode_payload(
+                        line_str, raise_for_mismatch=False)
+                    if not is_payload:
+                        buffer.append(line_str)
+                        buffer_bytes += len(line_str.encode('utf-8'))
+                else:
+                    buffer.append(line_str)
+                    buffer_bytes += len(line_str.encode('utf-8'))
+                incomplete_line = b''
             # Avoid checking the status too frequently to avoid overloading the
             # DB.
             should_check_status = (current_time -
@@ -328,16 +348,39 @@ async def _tail_log_file(
         # performance but it helps avoid unnecessary heartbeat strings
         # being printed when the client runs in an old version.
         last_heartbeat_time = asyncio.get_event_loop().time()
-        line_str = line.decode('utf-8')
-        if plain_logs:
-            is_payload, line_str = message_utils.decode_payload(
-                line_str, raise_for_mismatch=False)
-            # TODO(aylei): implement heartbeat mechanism for plain logs,
-            # sending invisible characters might be okay.
-            if is_payload:
-                continue
-        buffer.append(line_str)
-        buffer_bytes += len(line_str.encode('utf-8'))
+        # Combine with any incomplete line from previous chunk
+        file_chunk = incomplete_line + file_chunk
+        incomplete_line = b''
+        # Split chunk into lines, preserving line structure
+        lines_bytes = file_chunk.split(b'\n')
+        # If chunk doesn't end with newline, the last element is incomplete
+        if file_chunk and not file_chunk.endswith(b'\n'):
+            incomplete_line = lines_bytes[-1]
+            lines_bytes = lines_bytes[:-1]
+        else:
+            # If ends with \n, split creates an empty last element we should
+            # ignore
+            if lines_bytes and lines_bytes[-1] == b'':
+                lines_bytes = lines_bytes[:-1]
+        # Process all complete lines in this chunk
+        for line_bytes in lines_bytes:
+            # Reconstruct line with newline (since split removed it)
+            line_str = line_bytes.decode('utf-8') + '\n'
+            if plain_logs:
+                is_payload, line_str = message_utils.decode_payload(
+                    line_str, raise_for_mismatch=False)
+                # TODO(aylei): implement heartbeat mechanism for plain logs,
+                # sending invisible characters might be okay.
+                if is_payload:
+                    continue
+            buffer.append(line_str)
+            buffer_bytes += len(line_str.encode('utf-8'))
     # Flush remaining lines in the buffer.
     async for chunk in flush_buffer():
@@ -373,7 +416,7 @@ def stream_response(
         async def on_disconnect():
             logger.info(f'User terminated the connection for request '
                         f'{request_id}')
-            requests_lib.kill_requests([request_id])
+            await requests_lib.kill_request_async(request_id)
         # The background task will be run after returning a response.
         # https://fastapi.tiangolo.com/tutorial/background-tasks/

sky/setup_files/dependencies.py CHANGED Viewed

@@ -49,6 +49,7 @@ install_requires = [
     # <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414
     'pyyaml > 3.13, != 5.4.*',
     'ijson',
+    'orjson',
     'requests',
     # SkyPilot inherits from uvicorn.Server to customize the behavior of
     # uvicorn, so we need to pin uvicorn version to avoid potential break
@@ -187,6 +188,7 @@ cloud_dependencies: Dict[str, List[str]] = {
     'docker': ['docker'] + local_ray,
     'lambda': [],  # No dependencies needed for lambda
     'cloudflare': aws_dependencies,
+    'coreweave': aws_dependencies,
     'scp': local_ray,
     'oci': ['oci'],
     # Kubernetes 32.0.0 has an authentication bug: https://github.com/kubernetes-client/python/issues/2333 # pylint: disable=line-too-long

sky/task.py CHANGED Viewed

@@ -1552,6 +1552,16 @@ class Task:
                     self.update_file_mounts({
                         mnt_path: blob_path,
                     })
+                elif store_type is storage_lib.StoreType.COREWEAVE:
+                    if storage.source is not None and not isinstance(
+                            storage.source,
+                            list) and storage.source.startswith('cw://'):
+                        blob_path = storage.source
+                    else:
+                        blob_path = 'cw://' + storage.name
+                    self.update_file_mounts({
+                        mnt_path: blob_path,
+                    })
                 else:
                     with ux_utils.print_exception_no_traceback():
                         raise ValueError(f'Storage Type {store_type} '

sky/templates/nebius-ray.yml.j2 CHANGED Viewed

@@ -156,6 +156,7 @@ setup_commands:
     echo '{{env_var}}={{env_value}}' | sudo tee -a /etc/environment;
     {%- endfor %}
   {%- endif %}
+    IP=$(hostname -I | awk '{print $1}'); echo "$IP $(hostname)" | sudo tee -a /etc/hosts;
     sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf';
     sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload;
     mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n  StrictHostKeyChecking no\n  IdentityFile ~/.ssh/sky-cluster-key\n  IdentityFile ~/.ssh/id_rsa" ~/.ssh/config) || printf "Host *\n  StrictHostKeyChecking no\n  IdentityFile ~/.ssh/sky-cluster-key\n  IdentityFile ~/.ssh/id_rsa\n" >> ~/.ssh/config;

sky/utils/cli_utils/status_utils.py CHANGED Viewed

@@ -282,8 +282,14 @@ def _get_resources(cluster_record: _ClusterRecord,
             if resources_str_full is not None:
                 resources_str = resources_str_full
         if resources_str is None:
-            resources_str = resources_utils.get_readable_resources_repr(
-                handle, simplify=truncate)
+            resources_str_simple, resources_str_full = (
+                resources_utils.get_readable_resources_repr(
+                    handle, simplified_only=truncate))
+            if truncate:
+                resources_str = resources_str_simple
+            else:
+                assert resources_str_full is not None
+                resources_str = resources_str_full
         return resources_str
     return '-'

skypilot-nightly 1.0.0.dev20251027__py3-none-any.whl → 1.0.0.dev20251029__py3-none-any.whl

Potentially problematic release.

skypilot-nightly 1.0.0.dev20251027py3-none-any.whl → 1.0.0.dev20251029py3-none-any.whl