PyPI - dstack - Versions diffs - 0.19.10__py3-none-any.whl → 0.19.11__py3-none-any.whl - Mend

dstack 0.19.10py3-none-any.whl → 0.19.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dstack might be problematic. Click here for more details.

Files changed (39) hide show

dstack/_internal/cli/commands/metrics.py CHANGED Viewed

@@ -39,8 +39,6 @@ class MetricsCommand(APIBaseCommand):
         run = self.api.runs.get(run_name=args.run_name)
         if run is None:
             raise CLIError(f"Run {args.run_name} not found")
-        if run.status.is_finished():
-            raise CLIError(f"Run {args.run_name} is finished")
         metrics = _get_run_jobs_metrics(api=self.api, run=run)
         if not args.watch:
@@ -55,8 +53,6 @@ class MetricsCommand(APIBaseCommand):
                     run = self.api.runs.get(run_name=args.run_name)
                     if run is None:
                         raise CLIError(f"Run {args.run_name} not found")
-                    if run.status.is_finished():
-                        raise CLIError(f"Run {args.run_name} is finished")
                     metrics = _get_run_jobs_metrics(api=self.api, run=run)
         except KeyboardInterrupt:
             pass
@@ -78,11 +74,12 @@ def _get_run_jobs_metrics(api: Client, run: Run) -> List[JobMetrics]:
 def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
     table = Table(box=None)
     table.add_column("NAME", style="bold", no_wrap=True)
+    table.add_column("STATUS")
     table.add_column("CPU")
     table.add_column("MEMORY")
     table.add_column("GPU")
-    run_row: Dict[Union[str, int], Any] = {"NAME": run.name}
+    run_row: Dict[Union[str, int], Any] = {"NAME": run.name, "STATUS": run.status.value}
     if len(run._run.jobs) != 1:
         add_row_from_dict(table, run_row)
@@ -101,9 +98,9 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
             cpu_usage = f"{cpu_usage:.0f}%"
         memory_usage = _get_metric_value(job_metrics, "memory_working_set_bytes")
         if memory_usage is not None:
-            memory_usage = f"{round(memory_usage / 1024 / 1024)}MB"
+            memory_usage = _format_memory(memory_usage, 2)
             if resources is not None:
-                memory_usage += f"/{resources.memory_mib}MB"
+                memory_usage += f"/{_format_memory(resources.memory_mib * 1024 * 1024, 2)}"
         gpu_metrics = ""
         gpus_detected_num = _get_metric_value(job_metrics, "gpus_detected_num")
         if gpus_detected_num is not None:
@@ -113,13 +110,16 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
                 if gpu_memory_usage is not None:
                     if i != 0:
                         gpu_metrics += "\n"
-                    gpu_metrics += f"#{i} {round(gpu_memory_usage / 1024 / 1024)}MB"
+                    gpu_metrics += f"gpu={i} mem={_format_memory(gpu_memory_usage, 2)}"
                     if resources is not None:
-                        gpu_metrics += f"/{resources.gpus[i].memory_mib}MB"
-                    gpu_metrics += f" {gpu_util_percent}% Util"
+                        gpu_metrics += (
+                            f"/{_format_memory(resources.gpus[i].memory_mib * 1024 * 1024, 2)}"
+                        )
+                    gpu_metrics += f" util={gpu_util_percent}%"
         job_row: Dict[Union[str, int], Any] = {
             "NAME": f"  replica={job.job_spec.replica_num} job={job.job_spec.job_num}",
+            "STATUS": job.job_submissions[-1].status.value,
             "CPU": cpu_usage or "-",
             "MEMORY": memory_usage or "-",
             "GPU": gpu_metrics or "-",
@@ -136,3 +136,18 @@ def _get_metric_value(job_metrics: JobMetrics, name: str) -> Optional[Any]:
         if metric.name == name:
             return metric.values[-1]
     return None
+def _format_memory(memory_bytes: int, decimal_places: int) -> str:
+    """See test_format_memory in tests/_internal/cli/commands/test_metrics.py for examples."""
+    memory_mb = memory_bytes / 1024 / 1024
+    if memory_mb >= 1024:
+        value = memory_mb / 1024
+        unit = "GB"
+    else:
+        value = memory_mb
+        unit = "MB"
+    if decimal_places == 0:
+        return f"{round(value)}{unit}"
+    return f"{value:.{decimal_places}f}".rstrip("0").rstrip(".") + unit

dstack/_internal/cli/commands/offer.py CHANGED Viewed

@@ -84,6 +84,8 @@ class OfferCommand(APIBaseCommand):
         job_plan = run_plan.job_plans[0]
         if args.format == "json":
+            # FIXME: Should use effective_run_spec from run_plan,
+            # since the spec can be changed by the server and plugins
             output = {
                 "project": run_plan.project_name,
                 "user": run_plan.user,

dstack/_internal/cli/services/configurators/run.py CHANGED Viewed

@@ -105,7 +105,7 @@ class BaseRunConfigurator(ApplyEnvVarsConfiguratorMixin, BaseApplyConfigurator):
             changed_fields = []
             if run_plan.action == ApplyAction.UPDATE:
                 diff = diff_models(
-                    run_plan.run_spec.configuration,
+                    run_plan.get_effective_run_spec().configuration,
                     run_plan.current_resource.run_spec.configuration,
                 )
                 changed_fields = list(diff.keys())

dstack/_internal/cli/utils/updates.py CHANGED Viewed

@@ -57,10 +57,22 @@ def _is_last_check_time_outdated() -> bool:
     )
+def is_update_available(current_version: str, latest_version: str) -> bool:
+    """
+    Return True if latest_version is newer than current_version.
+    Pre-releases are only considered if the current version is also a pre-release.
+    """
+    _current_version = pkg_version.parse(str(current_version))
+    _latest_version = pkg_version.parse(str(latest_version))
+    return _current_version < _latest_version and (
+        not _latest_version.is_prerelease or _current_version.is_prerelease
+    )
 def _check_version():
     latest_version = get_latest_version()
     if latest_version is not None:
-        if pkg_version.parse(str(version.__version__)) < pkg_version.parse(latest_version):
+        if is_update_available(version.__version__, latest_version):
             console.print(f"A new version of dstack is available: [code]{latest_version}[/]\n")

dstack/_internal/core/backends/aws/compute.py CHANGED Viewed

@@ -611,9 +611,12 @@ class AWSCompute(
                 raise e
         logger.debug("Deleted EBS volume %s", volume.configuration.name)
-    def attach_volume(self, volume: Volume, instance_id: str) -> VolumeAttachmentData:
+    def attach_volume(
+        self, volume: Volume, provisioning_data: JobProvisioningData
+    ) -> VolumeAttachmentData:
         ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
+        instance_id = provisioning_data.instance_id
         device_names = aws_resources.list_available_device_names(
             ec2_client=ec2_client, instance_id=instance_id
         )
@@ -646,9 +649,12 @@ class AWSCompute(
         logger.debug("Attached EBS volume %s to instance %s", volume.volume_id, instance_id)
         return VolumeAttachmentData(device_name=device_name)
-    def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
+    def detach_volume(
+        self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
+    ):
         ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
+        instance_id = provisioning_data.instance_id
         logger.debug("Detaching EBS volume %s from instance %s", volume.volume_id, instance_id)
         attachment_data = get_or_error(volume.get_attachment_data_for_instance(instance_id))
         try:
@@ -667,9 +673,10 @@ class AWSCompute(
             raise e
         logger.debug("Detached EBS volume %s from instance %s", volume.volume_id, instance_id)
-    def is_volume_detached(self, volume: Volume, instance_id: str) -> bool:
+    def is_volume_detached(self, volume: Volume, provisioning_data: JobProvisioningData) -> bool:
         ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
+        instance_id = provisioning_data.instance_id
         logger.debug("Getting EBS volume %s status", volume.volume_id)
         response = ec2_client.describe_volumes(VolumeIds=[volume.volume_id])
         volumes_infos = response.get("Volumes")
@@ -819,18 +826,23 @@ def _get_regions_to_zones(session: boto3.Session, regions: List[str]) -> Dict[st
 def _supported_instances(offer: InstanceOffer) -> bool:
     for family in [
+        "m7i.",
+        "c7i.",
+        "r7i.",
+        "t3.",
         "t2.small",
         "c5.",
         "m5.",
-        "g4dn.",
-        "g5.",
+        "p5.",
+        "p5e.",
+        "p4d.",
+        "p4de.",
+        "p3.",
         "g6.",
         "g6e.",
         "gr6.",
-        "p3.",
-        "p4d.",
-        "p4de.",
-        "p5.",
+        "g5.",
+        "g4dn.",
     ]:
         if offer.instance.name.startswith(family):
             return True

dstack/_internal/core/backends/azure/compute.py CHANGED Viewed

@@ -391,11 +391,8 @@ class VMImageVariant(enum.Enum):
 _SUPPORTED_VM_SERIES_PATTERNS = [
-    # TODO: Support newer CPU series (Dsv6, Esv6).
-    # They are NVMe-only and require marking the VM image as NVMe.
-    r"D(\d+)s_v3",  # Dsv3-series (general purpose)
-    r"E(\d+)i?s_v4",  # Esv4-series (memory optimized)
-    r"E(\d+)-(\d+)s_v4",  # Esv4-series (constrained vCPU)
+    r"D(\d+)s_v6",  # Dsv6-series (general purpose)
+    r"E(\d+)i?s_v6",  # Esv6-series (memory optimized)
     r"F(\d+)s_v2",  # Fsv2-series (compute optimized)
     r"NC(\d+)s_v3",  # NCv3-series [V100 16GB]
     r"NC(\d+)as_T4_v3",  # NCasT4_v3-series [T4]
@@ -404,6 +401,11 @@ _SUPPORTED_VM_SERIES_PATTERNS = [
     r"NC(\d+)ads_A100_v4",  # NC A100 v4-series [A100 80GB]
     r"ND(\d+)asr_v4",  # ND A100 v4-series [8xA100 40GB]
     r"ND(\d+)amsr_A100_v4",  # NDm A100 v4-series [8xA100 80GB]
+    # Deprecated series
+    # TODO: Remove after several releases
+    r"D(\d+)s_v3",  # Dsv3-series (general purpose)
+    r"E(\d+)i?s_v4",  # Esv4-series (memory optimized)
+    r"E(\d+)-(\d+)s_v4",  # Esv4-series (constrained vCPU)
 ]
 _SUPPORTED_VM_SERIES_PATTERN = (
     "^Standard_(" + "|".join(f"({s})" for s in _SUPPORTED_VM_SERIES_PATTERNS) + ")$"

dstack/_internal/core/backends/base/compute.py CHANGED Viewed

@@ -19,6 +19,7 @@ from dstack._internal.core.consts import (
     DSTACK_RUNNER_SSH_PORT,
     DSTACK_SHIM_HTTP_PORT,
 )
+from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
 from dstack._internal.core.models.gateways import (
     GatewayComputeConfiguration,
     GatewayProvisioningData,
@@ -335,7 +336,9 @@ class ComputeWithVolumeSupport(ABC):
         """
         raise NotImplementedError()
-    def attach_volume(self, volume: Volume, instance_id: str) -> VolumeAttachmentData:
+    def attach_volume(
+        self, volume: Volume, provisioning_data: JobProvisioningData
+    ) -> VolumeAttachmentData:
         """
         Attaches a volume to the instance.
         If the volume is not found, it should raise `ComputeError()`.
@@ -344,7 +347,9 @@ class ComputeWithVolumeSupport(ABC):
         """
         raise NotImplementedError()
-    def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
+    def detach_volume(
+        self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
+    ):
         """
         Detaches a volume from the instance.
         Implement only if compute may return `VolumeProvisioningData.detachable`.
@@ -352,7 +357,7 @@ class ComputeWithVolumeSupport(ABC):
         """
         raise NotImplementedError()
-    def is_volume_detached(self, volume: Volume, instance_id: str) -> bool:
+    def is_volume_detached(self, volume: Volume, provisioning_data: JobProvisioningData) -> bool:
         """
         Checks if a volume was detached from the instance.
         If `detach_volume()` may fail to detach volume,
@@ -754,7 +759,7 @@ def get_docker_commands(
             f" --ssh-port {DSTACK_RUNNER_SSH_PORT}"
             " --temp-dir /tmp/runner"
             " --home-dir /root"
-            " --working-dir /workflow"
+            f" --working-dir {DEFAULT_REPO_DIR}"
         ),
     ]

dstack/_internal/core/backends/gcp/compute.py CHANGED Viewed

@@ -649,13 +649,24 @@ class GCPCompute(
             pass
         logger.debug("Deleted persistent disk for volume %s", volume.name)
-    def attach_volume(self, volume: Volume, instance_id: str) -> VolumeAttachmentData:
+    def attach_volume(
+        self, volume: Volume, provisioning_data: JobProvisioningData
+    ) -> VolumeAttachmentData:
+        instance_id = provisioning_data.instance_id
         logger.debug(
             "Attaching persistent disk for volume %s to instance %s",
             volume.volume_id,
             instance_id,
         )
+        if not gcp_resources.instance_type_supports_persistent_disk(
+            provisioning_data.instance_type.name
+        ):
+            raise ComputeError(
+                f"Instance type {provisioning_data.instance_type.name} does not support Persistent disk volumes"
+            )
         zone = get_or_error(volume.provisioning_data).availability_zone
+        is_tpu = _is_tpu_provisioning_data(provisioning_data)
         try:
             disk = self.disk_client.get(
                 project=self.config.project_id,
@@ -663,18 +674,16 @@ class GCPCompute(
                 disk=volume.volume_id,
             )
             disk_url = disk.self_link
+        except google.api_core.exceptions.NotFound:
+            raise ComputeError("Persistent disk found")
-            # This method has no information if the instance is a TPU or a VM,
-            # so we first try to see if there is a TPU with such name
-            try:
+        try:
+            if is_tpu:
                 get_node_request = tpu_v2.GetNodeRequest(
                     name=f"projects/{self.config.project_id}/locations/{zone}/nodes/{instance_id}",
                 )
                 tpu_node = self.tpu_client.get_node(get_node_request)
-            except google.api_core.exceptions.NotFound:
-                tpu_node = None
-            if tpu_node is not None:
                 # Python API to attach a disk to a TPU is not documented,
                 # so we follow the code from the gcloud CLI:
                 # https://github.com/twistedpair/google-cloud-sdk/blob/26ab5a281d56b384cc25750f3279a27afe5b499f/google-cloud-sdk/lib/googlecloudsdk/command_lib/compute/tpus/tpu_vm/util.py#L113
@@ -711,7 +720,6 @@ class GCPCompute(
                 attached_disk.auto_delete = False
                 attached_disk.device_name = f"pd-{volume.volume_id}"
                 device_name = attached_disk.device_name
                 operation = self.instances_client.attach_disk(
                     project=self.config.project_id,
                     zone=zone,
@@ -720,13 +728,16 @@ class GCPCompute(
                 )
                 gcp_resources.wait_for_extended_operation(operation, "persistent disk attachment")
         except google.api_core.exceptions.NotFound:
-            raise ComputeError("Persistent disk or instance not found")
+            raise ComputeError("Disk or instance not found")
         logger.debug(
             "Attached persistent disk for volume %s to instance %s", volume.volume_id, instance_id
         )
         return VolumeAttachmentData(device_name=device_name)
-    def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
+    def detach_volume(
+        self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
+    ):
+        instance_id = provisioning_data.instance_id
         logger.debug(
             "Detaching persistent disk for volume %s from instance %s",
             volume.volume_id,
@@ -734,17 +745,16 @@ class GCPCompute(
         )
         zone = get_or_error(volume.provisioning_data).availability_zone
         attachment_data = get_or_error(volume.get_attachment_data_for_instance(instance_id))
-        # This method has no information if the instance is a TPU or a VM,
-        # so we first try to see if there is a TPU with such name
-        try:
-            get_node_request = tpu_v2.GetNodeRequest(
-                name=f"projects/{self.config.project_id}/locations/{zone}/nodes/{instance_id}",
-            )
-            tpu_node = self.tpu_client.get_node(get_node_request)
-        except google.api_core.exceptions.NotFound:
-            tpu_node = None
+        is_tpu = _is_tpu_provisioning_data(provisioning_data)
+        if is_tpu:
+            try:
+                get_node_request = tpu_v2.GetNodeRequest(
+                    name=f"projects/{self.config.project_id}/locations/{zone}/nodes/{instance_id}",
+                )
+                tpu_node = self.tpu_client.get_node(get_node_request)
+            except google.api_core.exceptions.NotFound:
+                raise ComputeError("Instance not found")
-        if tpu_node is not None:
             source_disk = (
                 f"projects/{self.config.project_id}/zones/{zone}/disks/{volume.volume_id}"
             )
@@ -815,6 +825,11 @@ def _supported_instances_and_zones(
         if _is_tpu(offer.instance.name) and not _is_single_host_tpu(offer.instance.name):
             return False
         for family in [
+            "m4-",
+            "c4-",
+            "n4-",
+            "h3-",
+            "n2-",
             "e2-medium",
             "e2-standard-",
             "e2-highmem-",
@@ -1001,3 +1016,11 @@ def _get_tpu_data_disk_for_volume(project_id: str, volume: Volume) -> tpu_v2.Att
         mode=tpu_v2.AttachedDisk.DiskMode.READ_WRITE,
     )
     return attached_disk
+def _is_tpu_provisioning_data(provisioning_data: JobProvisioningData) -> bool:
+    is_tpu = False
+    if provisioning_data.backend_data:
+        backend_data_dict = json.loads(provisioning_data.backend_data)
+        is_tpu = backend_data_dict.get("is_tpu", False)
+    return is_tpu

dstack/_internal/core/backends/gcp/resources.py CHANGED Viewed

@@ -140,7 +140,10 @@ def create_instance_struct(
     initialize_params = compute_v1.AttachedDiskInitializeParams()
     initialize_params.source_image = image_id
     initialize_params.disk_size_gb = disk_size
-    initialize_params.disk_type = f"zones/{zone}/diskTypes/pd-balanced"
+    if instance_type_supports_persistent_disk(machine_type):
+        initialize_params.disk_type = f"zones/{zone}/diskTypes/pd-balanced"
+    else:
+        initialize_params.disk_type = f"zones/{zone}/diskTypes/hyperdisk-balanced"
     disk.initialize_params = initialize_params
     instance.disks = [disk]
@@ -421,7 +424,7 @@ def wait_for_extended_operation(
     if operation.error_code:
         # Write only debug logs here.
-        # The unexpected errors will be propagated and logged appropriatly by the caller.
+        # The unexpected errors will be propagated and logged appropriately by the caller.
         logger.debug(
             "Error during %s: [Code: %s]: %s",
             verbose_name,
@@ -462,3 +465,16 @@ def get_placement_policy_resource_name(
     placement_policy: str,
 ) -> str:
     return f"projects/{project_id}/regions/{region}/resourcePolicies/{placement_policy}"
+def instance_type_supports_persistent_disk(instance_type_name: str) -> bool:
+    return not any(
+        instance_type_name.startswith(series)
+        for series in [
+            "m4-",
+            "c4-",
+            "n4-",
+            "h3-",
+            "v6e",
+        ]
+    )

dstack/_internal/core/backends/local/compute.py CHANGED Viewed

@@ -110,8 +110,10 @@ class LocalCompute(
     def delete_volume(self, volume: Volume):
         pass
-    def attach_volume(self, volume: Volume, instance_id: str):
+    def attach_volume(self, volume: Volume, provisioning_data: JobProvisioningData):
         pass
-    def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
+    def detach_volume(
+        self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
+    ):
         pass

dstack/_internal/core/models/configurations.py CHANGED Viewed

@@ -26,6 +26,7 @@ STRIP_PREFIX_DEFAULT = True
 RUN_PRIOTIRY_MIN = 0
 RUN_PRIOTIRY_MAX = 100
 RUN_PRIORITY_DEFAULT = 0
+DEFAULT_REPO_DIR = "/workflow"
 class RunConfigurationType(str, Enum):
@@ -181,7 +182,7 @@ class BaseRunConfiguration(CoreModel):
         Field(
             description=(
                 "The path to the working directory inside the container."
-                " It's specified relative to the repository directory (`/workflow`) and should be inside it."
+                f" It's specified relative to the repository directory (`{DEFAULT_REPO_DIR}`) and should be inside it."
                 ' Defaults to `"."` '
             )
         ),

dstack/_internal/core/models/runs.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing_extensions import Annotated
 from dstack._internal.core.models.backends.base import BackendType
 from dstack._internal.core.models.common import ApplyAction, CoreModel, NetworkMode, RegistryAuth
 from dstack._internal.core.models.configurations import (
+    DEFAULT_REPO_DIR,
     AnyRunConfiguration,
     RunConfiguration,
 )
@@ -338,7 +339,7 @@ class RunSpec(CoreModel):
         Field(
             description=(
                 "The path to the working directory inside the container."
-                " It's specified relative to the repository directory (`/workflow`) and should be inside it."
+                f" It's specified relative to the repository directory (`{DEFAULT_REPO_DIR}`) and should be inside it."
                 ' Defaults to `"."`.'
             )
         ),

dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf CHANGED Viewed

@@ -1 +1,11 @@
-log_format dstack_stat '$time_iso8601 $host $status $request_time';
+log_format dstack_stat '$time_iso8601 $host $status $request_time $dstack_replica_hit';
+# A hack to avoid this Nginx reload error when no services are registered:
+#     nginx: [emerg] unknown "dstack_replica_hit" variable
+server {
+    listen unix:/tmp/dstack-dummy-nginx.sock;
+    server_name placeholder.local;
+    deny all;
+    set $dstack_replica_hit 0;
+}

dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 CHANGED Viewed

@@ -14,6 +14,7 @@ upstream {{ domain }}.upstream {
 server {
     server_name {{ domain }};
     limit_req_status 429;
+    set $dstack_replica_hit 0;
     access_log {{ access_log_path }} dstack_stat;
     client_max_body_size {{ client_max_body_size }};
@@ -23,11 +24,7 @@ server {
         auth_request /_dstack_auth;
         {% endif %}
-        {% if replicas %}
         try_files /nonexistent @$http_upgrade;
-        {% else %}
-        return 503;
-        {% endif %}
         {% if location.limit_req %}
         limit_req zone={{ location.limit_req.zone }}{% if location.limit_req.burst %} burst={{ location.limit_req.burst }} nodelay{% endif %};
@@ -35,8 +32,9 @@ server {
     }
     {% endfor %}
-    {% if replicas %}
     location @websocket {
+        set $dstack_replica_hit 1;
+        {% if replicas %}
         proxy_pass http://{{ domain }}.upstream;
         proxy_set_header X-Real-IP $remote_addr;
         proxy_set_header Host $host;
@@ -44,19 +42,27 @@ server {
         proxy_set_header Upgrade $http_upgrade;
         proxy_set_header Connection "Upgrade";
         proxy_read_timeout 300s;
+        {% else %}
+        return 503;
+        {% endif %}
     }
     location @ {
+        set $dstack_replica_hit 1;
+        {% if replicas %}
         proxy_pass http://{{ domain }}.upstream;
         proxy_set_header X-Real-IP $remote_addr;
         proxy_set_header Host $host;
         proxy_read_timeout 300s;
+        {% else %}
+        return 503;
+        {% endif %}
     }
-    {% endif %}
     {% if auth %}
     location = /_dstack_auth {
         internal;
         if ($remote_addr = 127.0.0.1) {
+            # for requests from the gateway app, e.g. from the OpenAI-compatible API
             return 200;
         }
         proxy_pass http://localhost:{{ proxy_port }}/api/auth/{{ project_name }};

dstack/_internal/proxy/gateway/services/stats.py CHANGED Viewed

@@ -11,10 +11,10 @@ from pydantic import BaseModel
 from dstack._internal.proxy.gateway.repo.repo import GatewayProxyRepo
 from dstack._internal.proxy.gateway.schemas.stats import PerWindowStats, ServiceStats, Stat
+from dstack._internal.proxy.lib.errors import UnexpectedProxyError
 from dstack._internal.utils.common import run_async
 logger = logging.getLogger(__name__)
-IGNORE_STATUSES = {403, 404}
 WINDOWS = (30, 60, 300)
 TTL = WINDOWS[-1]
 EMPTY_STATS = {window: Stat(requests=0, request_time=0.0) for window in WINDOWS}
@@ -35,6 +35,7 @@ class LogEntry(BaseModel):
     host: str
     status: int
     request_time: float
+    is_replica_hit: bool
 class StatsCollector:
@@ -87,7 +88,8 @@ class StatsCollector:
         now = datetime.datetime.now(tz=datetime.timezone.utc)
         for entry in self._read_access_log(now - datetime.timedelta(seconds=TTL)):
-            if entry.status in IGNORE_STATUSES:
+            # only include requests that hit or should hit a service replica
+            if not entry.is_replica_hit:
                 continue
             frame_timestamp = int(entry.timestamp.timestamp())
@@ -119,7 +121,10 @@ class StatsCollector:
                 line = self._file.readline()
                 if not line:
                     break
-                timestamp_str, host, status, request_time = line.split()
+                cells = line.split()
+                if len(cells) == 4:  # compatibility with pre-0.19.11 logs
+                    cells.append("0" if cells[2] in ["403", "404"] else "1")
+                timestamp_str, host, status, request_time, dstack_replica_hit = cells
                 timestamp = datetime.datetime.fromisoformat(timestamp_str)
                 if timestamp < after:
                     continue
@@ -128,6 +133,7 @@ class StatsCollector:
                     host=host,
                     status=int(status),
                     request_time=float(request_time),
+                    is_replica_hit=_parse_nginx_bool(dstack_replica_hit),
                 )
             if os.fstat(self._file.fileno()).st_ino != st_ino:
                 # file was rotated
@@ -154,3 +160,11 @@ async def get_service_stats(
         )
         for service in services
     ]
+def _parse_nginx_bool(v: str) -> bool:
+    if v == "0":
+        return False
+    if v == "1":
+        return True
+    raise UnexpectedProxyError(f"Cannot parse boolean value: expected '0' or '1', got {v!r}")

dstack/_internal/server/background/tasks/process_submitted_jobs.py CHANGED Viewed

@@ -659,7 +659,7 @@ async def _attach_volumes(
                         backend=backend,
                         volume_model=volume_model,
                         instance=instance,
-                        instance_id=job_provisioning_data.instance_id,
+                        jpd=job_provisioning_data,
                     )
                     job_runtime_data.volume_names.append(volume.name)
                     break  # attach next mount point
@@ -685,7 +685,7 @@ async def _attach_volume(
     backend: Backend,
     volume_model: VolumeModel,
     instance: InstanceModel,
-    instance_id: str,
+    jpd: JobProvisioningData,
 ):
     compute = backend.compute()
     assert isinstance(compute, ComputeWithVolumeSupport)
@@ -697,7 +697,7 @@ async def _attach_volume(
     attachment_data = await common_utils.run_async(
         compute.attach_volume,
         volume=volume,
-        instance_id=instance_id,
+        provisioning_data=jpd,
     )
     volume_attachment_model = VolumeAttachmentModel(
         volume=volume_model,

dstack 0.19.10__py3-none-any.whl → 0.19.11__py3-none-any.whl

Potentially problematic release.

dstack 0.19.10py3-none-any.whl → 0.19.11py3-none-any.whl