dstack 0.19.10__py3-none-any.whl → 0.19.11rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (33) hide show
  1. dstack/_internal/cli/commands/metrics.py +25 -10
  2. dstack/_internal/cli/utils/updates.py +13 -1
  3. dstack/_internal/core/backends/aws/compute.py +21 -9
  4. dstack/_internal/core/backends/azure/compute.py +7 -5
  5. dstack/_internal/core/backends/base/compute.py +9 -4
  6. dstack/_internal/core/backends/gcp/compute.py +43 -20
  7. dstack/_internal/core/backends/gcp/resources.py +18 -2
  8. dstack/_internal/core/backends/local/compute.py +4 -2
  9. dstack/_internal/core/models/configurations.py +2 -1
  10. dstack/_internal/core/models/runs.py +2 -1
  11. dstack/_internal/proxy/gateway/resources/nginx/00-log-format.conf +11 -1
  12. dstack/_internal/proxy/gateway/resources/nginx/service.jinja2 +12 -6
  13. dstack/_internal/proxy/gateway/services/stats.py +17 -3
  14. dstack/_internal/server/background/tasks/process_submitted_jobs.py +3 -3
  15. dstack/_internal/server/routers/repos.py +9 -4
  16. dstack/_internal/server/services/jobs/__init__.py +4 -4
  17. dstack/_internal/server/services/jobs/configurators/base.py +15 -1
  18. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +3 -1
  19. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +3 -1
  20. dstack/_internal/server/services/plugins.py +61 -30
  21. dstack/_internal/server/statics/index.html +1 -1
  22. dstack/_internal/server/statics/{main-b4803049eac16aea9a49.js → main-5b9786c955b42bf93581.js} +5 -5
  23. dstack/_internal/server/statics/{main-b4803049eac16aea9a49.js.map → main-5b9786c955b42bf93581.js.map} +1 -1
  24. dstack/plugins/builtin/__init__.py +0 -0
  25. dstack/plugins/builtin/rest_plugin/__init__.py +18 -0
  26. dstack/plugins/builtin/rest_plugin/_models.py +48 -0
  27. dstack/plugins/builtin/rest_plugin/_plugin.py +127 -0
  28. dstack/version.py +2 -2
  29. {dstack-0.19.10.dist-info → dstack-0.19.11rc2.dist-info}/METADATA +1 -2
  30. {dstack-0.19.10.dist-info → dstack-0.19.11rc2.dist-info}/RECORD +33 -29
  31. {dstack-0.19.10.dist-info → dstack-0.19.11rc2.dist-info}/WHEEL +0 -0
  32. {dstack-0.19.10.dist-info → dstack-0.19.11rc2.dist-info}/entry_points.txt +0 -0
  33. {dstack-0.19.10.dist-info → dstack-0.19.11rc2.dist-info}/licenses/LICENSE.md +0 -0
@@ -39,8 +39,6 @@ class MetricsCommand(APIBaseCommand):
39
39
  run = self.api.runs.get(run_name=args.run_name)
40
40
  if run is None:
41
41
  raise CLIError(f"Run {args.run_name} not found")
42
- if run.status.is_finished():
43
- raise CLIError(f"Run {args.run_name} is finished")
44
42
  metrics = _get_run_jobs_metrics(api=self.api, run=run)
45
43
 
46
44
  if not args.watch:
@@ -55,8 +53,6 @@ class MetricsCommand(APIBaseCommand):
55
53
  run = self.api.runs.get(run_name=args.run_name)
56
54
  if run is None:
57
55
  raise CLIError(f"Run {args.run_name} not found")
58
- if run.status.is_finished():
59
- raise CLIError(f"Run {args.run_name} is finished")
60
56
  metrics = _get_run_jobs_metrics(api=self.api, run=run)
61
57
  except KeyboardInterrupt:
62
58
  pass
@@ -78,11 +74,12 @@ def _get_run_jobs_metrics(api: Client, run: Run) -> List[JobMetrics]:
78
74
  def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
79
75
  table = Table(box=None)
80
76
  table.add_column("NAME", style="bold", no_wrap=True)
77
+ table.add_column("STATUS")
81
78
  table.add_column("CPU")
82
79
  table.add_column("MEMORY")
83
80
  table.add_column("GPU")
84
81
 
85
- run_row: Dict[Union[str, int], Any] = {"NAME": run.name}
82
+ run_row: Dict[Union[str, int], Any] = {"NAME": run.name, "STATUS": run.status.value}
86
83
  if len(run._run.jobs) != 1:
87
84
  add_row_from_dict(table, run_row)
88
85
 
@@ -101,9 +98,9 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
101
98
  cpu_usage = f"{cpu_usage:.0f}%"
102
99
  memory_usage = _get_metric_value(job_metrics, "memory_working_set_bytes")
103
100
  if memory_usage is not None:
104
- memory_usage = f"{round(memory_usage / 1024 / 1024)}MB"
101
+ memory_usage = _format_memory(memory_usage, 2)
105
102
  if resources is not None:
106
- memory_usage += f"/{resources.memory_mib}MB"
103
+ memory_usage += f"/{_format_memory(resources.memory_mib * 1024 * 1024, 2)}"
107
104
  gpu_metrics = ""
108
105
  gpus_detected_num = _get_metric_value(job_metrics, "gpus_detected_num")
109
106
  if gpus_detected_num is not None:
@@ -113,13 +110,16 @@ def _get_metrics_table(run: Run, metrics: List[JobMetrics]) -> Table:
113
110
  if gpu_memory_usage is not None:
114
111
  if i != 0:
115
112
  gpu_metrics += "\n"
116
- gpu_metrics += f"#{i} {round(gpu_memory_usage / 1024 / 1024)}MB"
113
+ gpu_metrics += f"gpu={i} mem={_format_memory(gpu_memory_usage, 2)}"
117
114
  if resources is not None:
118
- gpu_metrics += f"/{resources.gpus[i].memory_mib}MB"
119
- gpu_metrics += f" {gpu_util_percent}% Util"
115
+ gpu_metrics += (
116
+ f"/{_format_memory(resources.gpus[i].memory_mib * 1024 * 1024, 2)}"
117
+ )
118
+ gpu_metrics += f" util={gpu_util_percent}%"
120
119
 
121
120
  job_row: Dict[Union[str, int], Any] = {
122
121
  "NAME": f" replica={job.job_spec.replica_num} job={job.job_spec.job_num}",
122
+ "STATUS": job.job_submissions[-1].status.value,
123
123
  "CPU": cpu_usage or "-",
124
124
  "MEMORY": memory_usage or "-",
125
125
  "GPU": gpu_metrics or "-",
@@ -136,3 +136,18 @@ def _get_metric_value(job_metrics: JobMetrics, name: str) -> Optional[Any]:
136
136
  if metric.name == name:
137
137
  return metric.values[-1]
138
138
  return None
139
+
140
+
141
+ def _format_memory(memory_bytes: int, decimal_places: int) -> str:
142
+ """See test_format_memory in tests/_internal/cli/commands/test_metrics.py for examples."""
143
+ memory_mb = memory_bytes / 1024 / 1024
144
+ if memory_mb >= 1024:
145
+ value = memory_mb / 1024
146
+ unit = "GB"
147
+ else:
148
+ value = memory_mb
149
+ unit = "MB"
150
+
151
+ if decimal_places == 0:
152
+ return f"{round(value)}{unit}"
153
+ return f"{value:.{decimal_places}f}".rstrip("0").rstrip(".") + unit
@@ -57,10 +57,22 @@ def _is_last_check_time_outdated() -> bool:
57
57
  )
58
58
 
59
59
 
60
+ def is_update_available(current_version: str, latest_version: str) -> bool:
61
+ """
62
+ Return True if latest_version is newer than current_version.
63
+ Pre-releases are only considered if the current version is also a pre-release.
64
+ """
65
+ _current_version = pkg_version.parse(str(current_version))
66
+ _latest_version = pkg_version.parse(str(latest_version))
67
+ return _current_version < _latest_version and (
68
+ not _latest_version.is_prerelease or _current_version.is_prerelease
69
+ )
70
+
71
+
60
72
  def _check_version():
61
73
  latest_version = get_latest_version()
62
74
  if latest_version is not None:
63
- if pkg_version.parse(str(version.__version__)) < pkg_version.parse(latest_version):
75
+ if is_update_available(version.__version__, latest_version):
64
76
  console.print(f"A new version of dstack is available: [code]{latest_version}[/]\n")
65
77
 
66
78
 
@@ -611,9 +611,12 @@ class AWSCompute(
611
611
  raise e
612
612
  logger.debug("Deleted EBS volume %s", volume.configuration.name)
613
613
 
614
- def attach_volume(self, volume: Volume, instance_id: str) -> VolumeAttachmentData:
614
+ def attach_volume(
615
+ self, volume: Volume, provisioning_data: JobProvisioningData
616
+ ) -> VolumeAttachmentData:
615
617
  ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
616
618
 
619
+ instance_id = provisioning_data.instance_id
617
620
  device_names = aws_resources.list_available_device_names(
618
621
  ec2_client=ec2_client, instance_id=instance_id
619
622
  )
@@ -646,9 +649,12 @@ class AWSCompute(
646
649
  logger.debug("Attached EBS volume %s to instance %s", volume.volume_id, instance_id)
647
650
  return VolumeAttachmentData(device_name=device_name)
648
651
 
649
- def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
652
+ def detach_volume(
653
+ self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
654
+ ):
650
655
  ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
651
656
 
657
+ instance_id = provisioning_data.instance_id
652
658
  logger.debug("Detaching EBS volume %s from instance %s", volume.volume_id, instance_id)
653
659
  attachment_data = get_or_error(volume.get_attachment_data_for_instance(instance_id))
654
660
  try:
@@ -667,9 +673,10 @@ class AWSCompute(
667
673
  raise e
668
674
  logger.debug("Detached EBS volume %s from instance %s", volume.volume_id, instance_id)
669
675
 
670
- def is_volume_detached(self, volume: Volume, instance_id: str) -> bool:
676
+ def is_volume_detached(self, volume: Volume, provisioning_data: JobProvisioningData) -> bool:
671
677
  ec2_client = self.session.client("ec2", region_name=volume.configuration.region)
672
678
 
679
+ instance_id = provisioning_data.instance_id
673
680
  logger.debug("Getting EBS volume %s status", volume.volume_id)
674
681
  response = ec2_client.describe_volumes(VolumeIds=[volume.volume_id])
675
682
  volumes_infos = response.get("Volumes")
@@ -819,18 +826,23 @@ def _get_regions_to_zones(session: boto3.Session, regions: List[str]) -> Dict[st
819
826
 
820
827
  def _supported_instances(offer: InstanceOffer) -> bool:
821
828
  for family in [
829
+ "m7i.",
830
+ "c7i.",
831
+ "r7i.",
832
+ "t3.",
822
833
  "t2.small",
823
834
  "c5.",
824
835
  "m5.",
825
- "g4dn.",
826
- "g5.",
836
+ "p5.",
837
+ "p5e.",
838
+ "p4d.",
839
+ "p4de.",
840
+ "p3.",
827
841
  "g6.",
828
842
  "g6e.",
829
843
  "gr6.",
830
- "p3.",
831
- "p4d.",
832
- "p4de.",
833
- "p5.",
844
+ "g5.",
845
+ "g4dn.",
834
846
  ]:
835
847
  if offer.instance.name.startswith(family):
836
848
  return True
@@ -391,11 +391,8 @@ class VMImageVariant(enum.Enum):
391
391
 
392
392
 
393
393
  _SUPPORTED_VM_SERIES_PATTERNS = [
394
- # TODO: Support newer CPU series (Dsv6, Esv6).
395
- # They are NVMe-only and require marking the VM image as NVMe.
396
- r"D(\d+)s_v3", # Dsv3-series (general purpose)
397
- r"E(\d+)i?s_v4", # Esv4-series (memory optimized)
398
- r"E(\d+)-(\d+)s_v4", # Esv4-series (constrained vCPU)
394
+ r"D(\d+)s_v6", # Dsv6-series (general purpose)
395
+ r"E(\d+)i?s_v6", # Esv6-series (memory optimized)
399
396
  r"F(\d+)s_v2", # Fsv2-series (compute optimized)
400
397
  r"NC(\d+)s_v3", # NCv3-series [V100 16GB]
401
398
  r"NC(\d+)as_T4_v3", # NCasT4_v3-series [T4]
@@ -404,6 +401,11 @@ _SUPPORTED_VM_SERIES_PATTERNS = [
404
401
  r"NC(\d+)ads_A100_v4", # NC A100 v4-series [A100 80GB]
405
402
  r"ND(\d+)asr_v4", # ND A100 v4-series [8xA100 40GB]
406
403
  r"ND(\d+)amsr_A100_v4", # NDm A100 v4-series [8xA100 80GB]
404
+ # Deprecated series
405
+ # TODO: Remove after several releases
406
+ r"D(\d+)s_v3", # Dsv3-series (general purpose)
407
+ r"E(\d+)i?s_v4", # Esv4-series (memory optimized)
408
+ r"E(\d+)-(\d+)s_v4", # Esv4-series (constrained vCPU)
407
409
  ]
408
410
  _SUPPORTED_VM_SERIES_PATTERN = (
409
411
  "^Standard_(" + "|".join(f"({s})" for s in _SUPPORTED_VM_SERIES_PATTERNS) + ")$"
@@ -19,6 +19,7 @@ from dstack._internal.core.consts import (
19
19
  DSTACK_RUNNER_SSH_PORT,
20
20
  DSTACK_SHIM_HTTP_PORT,
21
21
  )
22
+ from dstack._internal.core.models.configurations import DEFAULT_REPO_DIR
22
23
  from dstack._internal.core.models.gateways import (
23
24
  GatewayComputeConfiguration,
24
25
  GatewayProvisioningData,
@@ -335,7 +336,9 @@ class ComputeWithVolumeSupport(ABC):
335
336
  """
336
337
  raise NotImplementedError()
337
338
 
338
- def attach_volume(self, volume: Volume, instance_id: str) -> VolumeAttachmentData:
339
+ def attach_volume(
340
+ self, volume: Volume, provisioning_data: JobProvisioningData
341
+ ) -> VolumeAttachmentData:
339
342
  """
340
343
  Attaches a volume to the instance.
341
344
  If the volume is not found, it should raise `ComputeError()`.
@@ -344,7 +347,9 @@ class ComputeWithVolumeSupport(ABC):
344
347
  """
345
348
  raise NotImplementedError()
346
349
 
347
- def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
350
+ def detach_volume(
351
+ self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
352
+ ):
348
353
  """
349
354
  Detaches a volume from the instance.
350
355
  Implement only if compute may return `VolumeProvisioningData.detachable`.
@@ -352,7 +357,7 @@ class ComputeWithVolumeSupport(ABC):
352
357
  """
353
358
  raise NotImplementedError()
354
359
 
355
- def is_volume_detached(self, volume: Volume, instance_id: str) -> bool:
360
+ def is_volume_detached(self, volume: Volume, provisioning_data: JobProvisioningData) -> bool:
356
361
  """
357
362
  Checks if a volume was detached from the instance.
358
363
  If `detach_volume()` may fail to detach volume,
@@ -754,7 +759,7 @@ def get_docker_commands(
754
759
  f" --ssh-port {DSTACK_RUNNER_SSH_PORT}"
755
760
  " --temp-dir /tmp/runner"
756
761
  " --home-dir /root"
757
- " --working-dir /workflow"
762
+ f" --working-dir {DEFAULT_REPO_DIR}"
758
763
  ),
759
764
  ]
760
765
 
@@ -649,13 +649,24 @@ class GCPCompute(
649
649
  pass
650
650
  logger.debug("Deleted persistent disk for volume %s", volume.name)
651
651
 
652
- def attach_volume(self, volume: Volume, instance_id: str) -> VolumeAttachmentData:
652
+ def attach_volume(
653
+ self, volume: Volume, provisioning_data: JobProvisioningData
654
+ ) -> VolumeAttachmentData:
655
+ instance_id = provisioning_data.instance_id
653
656
  logger.debug(
654
657
  "Attaching persistent disk for volume %s to instance %s",
655
658
  volume.volume_id,
656
659
  instance_id,
657
660
  )
661
+ if not gcp_resources.instance_type_supports_persistent_disk(
662
+ provisioning_data.instance_type.name
663
+ ):
664
+ raise ComputeError(
665
+ f"Instance type {provisioning_data.instance_type.name} does not support Persistent disk volumes"
666
+ )
667
+
658
668
  zone = get_or_error(volume.provisioning_data).availability_zone
669
+ is_tpu = _is_tpu_provisioning_data(provisioning_data)
659
670
  try:
660
671
  disk = self.disk_client.get(
661
672
  project=self.config.project_id,
@@ -663,18 +674,16 @@ class GCPCompute(
663
674
  disk=volume.volume_id,
664
675
  )
665
676
  disk_url = disk.self_link
677
+ except google.api_core.exceptions.NotFound:
678
+ raise ComputeError("Persistent disk found")
666
679
 
667
- # This method has no information if the instance is a TPU or a VM,
668
- # so we first try to see if there is a TPU with such name
669
- try:
680
+ try:
681
+ if is_tpu:
670
682
  get_node_request = tpu_v2.GetNodeRequest(
671
683
  name=f"projects/{self.config.project_id}/locations/{zone}/nodes/{instance_id}",
672
684
  )
673
685
  tpu_node = self.tpu_client.get_node(get_node_request)
674
- except google.api_core.exceptions.NotFound:
675
- tpu_node = None
676
686
 
677
- if tpu_node is not None:
678
687
  # Python API to attach a disk to a TPU is not documented,
679
688
  # so we follow the code from the gcloud CLI:
680
689
  # https://github.com/twistedpair/google-cloud-sdk/blob/26ab5a281d56b384cc25750f3279a27afe5b499f/google-cloud-sdk/lib/googlecloudsdk/command_lib/compute/tpus/tpu_vm/util.py#L113
@@ -711,7 +720,6 @@ class GCPCompute(
711
720
  attached_disk.auto_delete = False
712
721
  attached_disk.device_name = f"pd-{volume.volume_id}"
713
722
  device_name = attached_disk.device_name
714
-
715
723
  operation = self.instances_client.attach_disk(
716
724
  project=self.config.project_id,
717
725
  zone=zone,
@@ -720,13 +728,16 @@ class GCPCompute(
720
728
  )
721
729
  gcp_resources.wait_for_extended_operation(operation, "persistent disk attachment")
722
730
  except google.api_core.exceptions.NotFound:
723
- raise ComputeError("Persistent disk or instance not found")
731
+ raise ComputeError("Disk or instance not found")
724
732
  logger.debug(
725
733
  "Attached persistent disk for volume %s to instance %s", volume.volume_id, instance_id
726
734
  )
727
735
  return VolumeAttachmentData(device_name=device_name)
728
736
 
729
- def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
737
+ def detach_volume(
738
+ self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
739
+ ):
740
+ instance_id = provisioning_data.instance_id
730
741
  logger.debug(
731
742
  "Detaching persistent disk for volume %s from instance %s",
732
743
  volume.volume_id,
@@ -734,17 +745,16 @@ class GCPCompute(
734
745
  )
735
746
  zone = get_or_error(volume.provisioning_data).availability_zone
736
747
  attachment_data = get_or_error(volume.get_attachment_data_for_instance(instance_id))
737
- # This method has no information if the instance is a TPU or a VM,
738
- # so we first try to see if there is a TPU with such name
739
- try:
740
- get_node_request = tpu_v2.GetNodeRequest(
741
- name=f"projects/{self.config.project_id}/locations/{zone}/nodes/{instance_id}",
742
- )
743
- tpu_node = self.tpu_client.get_node(get_node_request)
744
- except google.api_core.exceptions.NotFound:
745
- tpu_node = None
748
+ is_tpu = _is_tpu_provisioning_data(provisioning_data)
749
+ if is_tpu:
750
+ try:
751
+ get_node_request = tpu_v2.GetNodeRequest(
752
+ name=f"projects/{self.config.project_id}/locations/{zone}/nodes/{instance_id}",
753
+ )
754
+ tpu_node = self.tpu_client.get_node(get_node_request)
755
+ except google.api_core.exceptions.NotFound:
756
+ raise ComputeError("Instance not found")
746
757
 
747
- if tpu_node is not None:
748
758
  source_disk = (
749
759
  f"projects/{self.config.project_id}/zones/{zone}/disks/{volume.volume_id}"
750
760
  )
@@ -815,6 +825,11 @@ def _supported_instances_and_zones(
815
825
  if _is_tpu(offer.instance.name) and not _is_single_host_tpu(offer.instance.name):
816
826
  return False
817
827
  for family in [
828
+ "m4-",
829
+ "c4-",
830
+ "n4-",
831
+ "h3-",
832
+ "n2-",
818
833
  "e2-medium",
819
834
  "e2-standard-",
820
835
  "e2-highmem-",
@@ -1001,3 +1016,11 @@ def _get_tpu_data_disk_for_volume(project_id: str, volume: Volume) -> tpu_v2.Att
1001
1016
  mode=tpu_v2.AttachedDisk.DiskMode.READ_WRITE,
1002
1017
  )
1003
1018
  return attached_disk
1019
+
1020
+
1021
+ def _is_tpu_provisioning_data(provisioning_data: JobProvisioningData) -> bool:
1022
+ is_tpu = False
1023
+ if provisioning_data.backend_data:
1024
+ backend_data_dict = json.loads(provisioning_data.backend_data)
1025
+ is_tpu = backend_data_dict.get("is_tpu", False)
1026
+ return is_tpu
@@ -140,7 +140,10 @@ def create_instance_struct(
140
140
  initialize_params = compute_v1.AttachedDiskInitializeParams()
141
141
  initialize_params.source_image = image_id
142
142
  initialize_params.disk_size_gb = disk_size
143
- initialize_params.disk_type = f"zones/{zone}/diskTypes/pd-balanced"
143
+ if instance_type_supports_persistent_disk(machine_type):
144
+ initialize_params.disk_type = f"zones/{zone}/diskTypes/pd-balanced"
145
+ else:
146
+ initialize_params.disk_type = f"zones/{zone}/diskTypes/hyperdisk-balanced"
144
147
  disk.initialize_params = initialize_params
145
148
  instance.disks = [disk]
146
149
 
@@ -421,7 +424,7 @@ def wait_for_extended_operation(
421
424
 
422
425
  if operation.error_code:
423
426
  # Write only debug logs here.
424
- # The unexpected errors will be propagated and logged appropriatly by the caller.
427
+ # The unexpected errors will be propagated and logged appropriately by the caller.
425
428
  logger.debug(
426
429
  "Error during %s: [Code: %s]: %s",
427
430
  verbose_name,
@@ -462,3 +465,16 @@ def get_placement_policy_resource_name(
462
465
  placement_policy: str,
463
466
  ) -> str:
464
467
  return f"projects/{project_id}/regions/{region}/resourcePolicies/{placement_policy}"
468
+
469
+
470
+ def instance_type_supports_persistent_disk(instance_type_name: str) -> bool:
471
+ return not any(
472
+ instance_type_name.startswith(series)
473
+ for series in [
474
+ "m4-",
475
+ "c4-",
476
+ "n4-",
477
+ "h3-",
478
+ "v6e",
479
+ ]
480
+ )
@@ -110,8 +110,10 @@ class LocalCompute(
110
110
  def delete_volume(self, volume: Volume):
111
111
  pass
112
112
 
113
- def attach_volume(self, volume: Volume, instance_id: str):
113
+ def attach_volume(self, volume: Volume, provisioning_data: JobProvisioningData):
114
114
  pass
115
115
 
116
- def detach_volume(self, volume: Volume, instance_id: str, force: bool = False):
116
+ def detach_volume(
117
+ self, volume: Volume, provisioning_data: JobProvisioningData, force: bool = False
118
+ ):
117
119
  pass
@@ -26,6 +26,7 @@ STRIP_PREFIX_DEFAULT = True
26
26
  RUN_PRIOTIRY_MIN = 0
27
27
  RUN_PRIOTIRY_MAX = 100
28
28
  RUN_PRIORITY_DEFAULT = 0
29
+ DEFAULT_REPO_DIR = "/workflow"
29
30
 
30
31
 
31
32
  class RunConfigurationType(str, Enum):
@@ -181,7 +182,7 @@ class BaseRunConfiguration(CoreModel):
181
182
  Field(
182
183
  description=(
183
184
  "The path to the working directory inside the container."
184
- " It's specified relative to the repository directory (`/workflow`) and should be inside it."
185
+ f" It's specified relative to the repository directory (`{DEFAULT_REPO_DIR}`) and should be inside it."
185
186
  ' Defaults to `"."` '
186
187
  )
187
188
  ),
@@ -8,6 +8,7 @@ from typing_extensions import Annotated
8
8
  from dstack._internal.core.models.backends.base import BackendType
9
9
  from dstack._internal.core.models.common import ApplyAction, CoreModel, NetworkMode, RegistryAuth
10
10
  from dstack._internal.core.models.configurations import (
11
+ DEFAULT_REPO_DIR,
11
12
  AnyRunConfiguration,
12
13
  RunConfiguration,
13
14
  )
@@ -338,7 +339,7 @@ class RunSpec(CoreModel):
338
339
  Field(
339
340
  description=(
340
341
  "The path to the working directory inside the container."
341
- " It's specified relative to the repository directory (`/workflow`) and should be inside it."
342
+ f" It's specified relative to the repository directory (`{DEFAULT_REPO_DIR}`) and should be inside it."
342
343
  ' Defaults to `"."`.'
343
344
  )
344
345
  ),
@@ -1 +1,11 @@
1
- log_format dstack_stat '$time_iso8601 $host $status $request_time';
1
+ log_format dstack_stat '$time_iso8601 $host $status $request_time $dstack_replica_hit';
2
+
3
+
4
+ # A hack to avoid this Nginx reload error when no services are registered:
5
+ # nginx: [emerg] unknown "dstack_replica_hit" variable
6
+ server {
7
+ listen unix:/tmp/dstack-dummy-nginx.sock;
8
+ server_name placeholder.local;
9
+ deny all;
10
+ set $dstack_replica_hit 0;
11
+ }
@@ -14,6 +14,7 @@ upstream {{ domain }}.upstream {
14
14
  server {
15
15
  server_name {{ domain }};
16
16
  limit_req_status 429;
17
+ set $dstack_replica_hit 0;
17
18
  access_log {{ access_log_path }} dstack_stat;
18
19
  client_max_body_size {{ client_max_body_size }};
19
20
 
@@ -23,11 +24,7 @@ server {
23
24
  auth_request /_dstack_auth;
24
25
  {% endif %}
25
26
 
26
- {% if replicas %}
27
27
  try_files /nonexistent @$http_upgrade;
28
- {% else %}
29
- return 503;
30
- {% endif %}
31
28
 
32
29
  {% if location.limit_req %}
33
30
  limit_req zone={{ location.limit_req.zone }}{% if location.limit_req.burst %} burst={{ location.limit_req.burst }} nodelay{% endif %};
@@ -35,8 +32,9 @@ server {
35
32
  }
36
33
  {% endfor %}
37
34
 
38
- {% if replicas %}
39
35
  location @websocket {
36
+ set $dstack_replica_hit 1;
37
+ {% if replicas %}
40
38
  proxy_pass http://{{ domain }}.upstream;
41
39
  proxy_set_header X-Real-IP $remote_addr;
42
40
  proxy_set_header Host $host;
@@ -44,19 +42,27 @@ server {
44
42
  proxy_set_header Upgrade $http_upgrade;
45
43
  proxy_set_header Connection "Upgrade";
46
44
  proxy_read_timeout 300s;
45
+ {% else %}
46
+ return 503;
47
+ {% endif %}
47
48
  }
48
49
  location @ {
50
+ set $dstack_replica_hit 1;
51
+ {% if replicas %}
49
52
  proxy_pass http://{{ domain }}.upstream;
50
53
  proxy_set_header X-Real-IP $remote_addr;
51
54
  proxy_set_header Host $host;
52
55
  proxy_read_timeout 300s;
56
+ {% else %}
57
+ return 503;
58
+ {% endif %}
53
59
  }
54
- {% endif %}
55
60
 
56
61
  {% if auth %}
57
62
  location = /_dstack_auth {
58
63
  internal;
59
64
  if ($remote_addr = 127.0.0.1) {
65
+ # for requests from the gateway app, e.g. from the OpenAI-compatible API
60
66
  return 200;
61
67
  }
62
68
  proxy_pass http://localhost:{{ proxy_port }}/api/auth/{{ project_name }};
@@ -11,10 +11,10 @@ from pydantic import BaseModel
11
11
 
12
12
  from dstack._internal.proxy.gateway.repo.repo import GatewayProxyRepo
13
13
  from dstack._internal.proxy.gateway.schemas.stats import PerWindowStats, ServiceStats, Stat
14
+ from dstack._internal.proxy.lib.errors import UnexpectedProxyError
14
15
  from dstack._internal.utils.common import run_async
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
- IGNORE_STATUSES = {403, 404}
18
18
  WINDOWS = (30, 60, 300)
19
19
  TTL = WINDOWS[-1]
20
20
  EMPTY_STATS = {window: Stat(requests=0, request_time=0.0) for window in WINDOWS}
@@ -35,6 +35,7 @@ class LogEntry(BaseModel):
35
35
  host: str
36
36
  status: int
37
37
  request_time: float
38
+ is_replica_hit: bool
38
39
 
39
40
 
40
41
  class StatsCollector:
@@ -87,7 +88,8 @@ class StatsCollector:
87
88
  now = datetime.datetime.now(tz=datetime.timezone.utc)
88
89
 
89
90
  for entry in self._read_access_log(now - datetime.timedelta(seconds=TTL)):
90
- if entry.status in IGNORE_STATUSES:
91
+ # only include requests that hit or should hit a service replica
92
+ if not entry.is_replica_hit:
91
93
  continue
92
94
 
93
95
  frame_timestamp = int(entry.timestamp.timestamp())
@@ -119,7 +121,10 @@ class StatsCollector:
119
121
  line = self._file.readline()
120
122
  if not line:
121
123
  break
122
- timestamp_str, host, status, request_time = line.split()
124
+ cells = line.split()
125
+ if len(cells) == 4: # compatibility with pre-0.19.11 logs
126
+ cells.append("0" if cells[2] in ["403", "404"] else "1")
127
+ timestamp_str, host, status, request_time, dstack_replica_hit = cells
123
128
  timestamp = datetime.datetime.fromisoformat(timestamp_str)
124
129
  if timestamp < after:
125
130
  continue
@@ -128,6 +133,7 @@ class StatsCollector:
128
133
  host=host,
129
134
  status=int(status),
130
135
  request_time=float(request_time),
136
+ is_replica_hit=_parse_nginx_bool(dstack_replica_hit),
131
137
  )
132
138
  if os.fstat(self._file.fileno()).st_ino != st_ino:
133
139
  # file was rotated
@@ -154,3 +160,11 @@ async def get_service_stats(
154
160
  )
155
161
  for service in services
156
162
  ]
163
+
164
+
165
+ def _parse_nginx_bool(v: str) -> bool:
166
+ if v == "0":
167
+ return False
168
+ if v == "1":
169
+ return True
170
+ raise UnexpectedProxyError(f"Cannot parse boolean value: expected '0' or '1', got {v!r}")
@@ -659,7 +659,7 @@ async def _attach_volumes(
659
659
  backend=backend,
660
660
  volume_model=volume_model,
661
661
  instance=instance,
662
- instance_id=job_provisioning_data.instance_id,
662
+ jpd=job_provisioning_data,
663
663
  )
664
664
  job_runtime_data.volume_names.append(volume.name)
665
665
  break # attach next mount point
@@ -685,7 +685,7 @@ async def _attach_volume(
685
685
  backend: Backend,
686
686
  volume_model: VolumeModel,
687
687
  instance: InstanceModel,
688
- instance_id: str,
688
+ jpd: JobProvisioningData,
689
689
  ):
690
690
  compute = backend.compute()
691
691
  assert isinstance(compute, ComputeWithVolumeSupport)
@@ -697,7 +697,7 @@ async def _attach_volume(
697
697
  attachment_data = await common_utils.run_async(
698
698
  compute.attach_volume,
699
699
  volume=volume,
700
- instance_id=instance_id,
700
+ provisioning_data=jpd,
701
701
  )
702
702
  volume_attachment_model = VolumeAttachmentModel(
703
703
  volume=volume_model,
@@ -1,7 +1,6 @@
1
1
  from typing import List, Tuple
2
2
 
3
3
  from fastapi import APIRouter, Depends, Request, UploadFile
4
- from humanize import naturalsize
5
4
  from sqlalchemy.ext.asyncio import AsyncSession
6
5
 
7
6
  from dstack._internal.core.errors import ResourceNotExistsError, ServerClientError
@@ -20,6 +19,7 @@ from dstack._internal.server.utils.routers import (
20
19
  get_base_api_additional_responses,
21
20
  get_request_size,
22
21
  )
22
+ from dstack._internal.utils.common import sizeof_fmt
23
23
 
24
24
  router = APIRouter(
25
25
  prefix="/api/project/{project_name}/repos",
@@ -98,10 +98,15 @@ async def upload_code(
98
98
  ):
99
99
  request_size = get_request_size(request)
100
100
  if SERVER_CODE_UPLOAD_LIMIT > 0 and request_size > SERVER_CODE_UPLOAD_LIMIT:
101
+ diff_size_fmt = sizeof_fmt(request_size)
102
+ limit_fmt = sizeof_fmt(SERVER_CODE_UPLOAD_LIMIT)
103
+ if diff_size_fmt == limit_fmt:
104
+ diff_size_fmt = f"{request_size}B"
105
+ limit_fmt = f"{SERVER_CODE_UPLOAD_LIMIT}B"
101
106
  raise ServerClientError(
102
- f"Repo diff size is {naturalsize(request_size)}, which exceeds the limit of "
103
- f"{naturalsize(SERVER_CODE_UPLOAD_LIMIT)}. Use .gitignore to exclude large files from the repo. This "
104
- f"limit can be modified by setting the DSTACK_SERVER_CODE_UPLOAD_LIMIT_BYTES environment variable"
107
+ f"Repo diff size is {diff_size_fmt}, which exceeds the limit of {limit_fmt}."
108
+ " Use .gitignore to exclude large files from the repo."
109
+ " This limit can be modified by setting the DSTACK_SERVER_CODE_UPLOAD_LIMIT environment variable."
105
110
  )
106
111
  _, project = user_project
107
112
  await repos.upload_code(