dstack 0.19.25__py3-none-any.whl → 0.19.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dstack might be problematic. Click here for more details.

Files changed (128) hide show
  1. dstack/_internal/cli/commands/__init__.py +2 -2
  2. dstack/_internal/cli/commands/apply.py +3 -61
  3. dstack/_internal/cli/commands/attach.py +1 -1
  4. dstack/_internal/cli/commands/completion.py +1 -1
  5. dstack/_internal/cli/commands/delete.py +2 -2
  6. dstack/_internal/cli/commands/fleet.py +1 -1
  7. dstack/_internal/cli/commands/gateway.py +2 -2
  8. dstack/_internal/cli/commands/init.py +56 -24
  9. dstack/_internal/cli/commands/logs.py +1 -1
  10. dstack/_internal/cli/commands/metrics.py +1 -1
  11. dstack/_internal/cli/commands/offer.py +45 -7
  12. dstack/_internal/cli/commands/project.py +2 -2
  13. dstack/_internal/cli/commands/secrets.py +2 -2
  14. dstack/_internal/cli/commands/server.py +1 -1
  15. dstack/_internal/cli/commands/stop.py +1 -1
  16. dstack/_internal/cli/commands/volume.py +1 -1
  17. dstack/_internal/cli/main.py +2 -2
  18. dstack/_internal/cli/services/completion.py +2 -2
  19. dstack/_internal/cli/services/configurators/__init__.py +6 -2
  20. dstack/_internal/cli/services/configurators/base.py +6 -7
  21. dstack/_internal/cli/services/configurators/fleet.py +1 -3
  22. dstack/_internal/cli/services/configurators/gateway.py +2 -4
  23. dstack/_internal/cli/services/configurators/run.py +195 -55
  24. dstack/_internal/cli/services/configurators/volume.py +2 -4
  25. dstack/_internal/cli/services/profile.py +1 -1
  26. dstack/_internal/cli/services/repos.py +51 -47
  27. dstack/_internal/core/backends/aws/configurator.py +11 -7
  28. dstack/_internal/core/backends/azure/configurator.py +11 -7
  29. dstack/_internal/core/backends/base/configurator.py +25 -13
  30. dstack/_internal/core/backends/cloudrift/configurator.py +13 -7
  31. dstack/_internal/core/backends/cudo/configurator.py +11 -7
  32. dstack/_internal/core/backends/datacrunch/compute.py +5 -1
  33. dstack/_internal/core/backends/datacrunch/configurator.py +13 -7
  34. dstack/_internal/core/backends/gcp/configurator.py +11 -7
  35. dstack/_internal/core/backends/hotaisle/configurator.py +13 -7
  36. dstack/_internal/core/backends/kubernetes/configurator.py +13 -7
  37. dstack/_internal/core/backends/lambdalabs/configurator.py +11 -7
  38. dstack/_internal/core/backends/nebius/compute.py +1 -1
  39. dstack/_internal/core/backends/nebius/configurator.py +11 -7
  40. dstack/_internal/core/backends/nebius/resources.py +21 -11
  41. dstack/_internal/core/backends/oci/configurator.py +11 -7
  42. dstack/_internal/core/backends/runpod/configurator.py +11 -7
  43. dstack/_internal/core/backends/template/configurator.py.jinja +11 -7
  44. dstack/_internal/core/backends/tensordock/configurator.py +13 -7
  45. dstack/_internal/core/backends/vastai/configurator.py +11 -7
  46. dstack/_internal/core/backends/vultr/configurator.py +11 -4
  47. dstack/_internal/core/compatibility/gpus.py +13 -0
  48. dstack/_internal/core/compatibility/runs.py +1 -0
  49. dstack/_internal/core/models/common.py +3 -3
  50. dstack/_internal/core/models/configurations.py +172 -27
  51. dstack/_internal/core/models/files.py +1 -1
  52. dstack/_internal/core/models/fleets.py +5 -1
  53. dstack/_internal/core/models/profiles.py +41 -11
  54. dstack/_internal/core/models/resources.py +46 -42
  55. dstack/_internal/core/models/runs.py +4 -0
  56. dstack/_internal/core/services/configs/__init__.py +2 -2
  57. dstack/_internal/core/services/profiles.py +2 -2
  58. dstack/_internal/core/services/repos.py +5 -3
  59. dstack/_internal/core/services/ssh/ports.py +1 -1
  60. dstack/_internal/proxy/lib/deps.py +6 -2
  61. dstack/_internal/server/app.py +22 -17
  62. dstack/_internal/server/background/tasks/process_gateways.py +4 -1
  63. dstack/_internal/server/background/tasks/process_instances.py +10 -2
  64. dstack/_internal/server/background/tasks/process_probes.py +1 -1
  65. dstack/_internal/server/background/tasks/process_running_jobs.py +10 -4
  66. dstack/_internal/server/background/tasks/process_runs.py +1 -1
  67. dstack/_internal/server/background/tasks/process_submitted_jobs.py +54 -43
  68. dstack/_internal/server/background/tasks/process_terminating_jobs.py +2 -2
  69. dstack/_internal/server/background/tasks/process_volumes.py +1 -1
  70. dstack/_internal/server/db.py +8 -4
  71. dstack/_internal/server/models.py +1 -0
  72. dstack/_internal/server/routers/gpus.py +1 -6
  73. dstack/_internal/server/schemas/runner.py +10 -0
  74. dstack/_internal/server/services/backends/__init__.py +14 -8
  75. dstack/_internal/server/services/backends/handlers.py +6 -1
  76. dstack/_internal/server/services/docker.py +5 -5
  77. dstack/_internal/server/services/fleets.py +14 -13
  78. dstack/_internal/server/services/gateways/__init__.py +2 -0
  79. dstack/_internal/server/services/gateways/client.py +5 -2
  80. dstack/_internal/server/services/gateways/connection.py +1 -1
  81. dstack/_internal/server/services/gpus.py +50 -49
  82. dstack/_internal/server/services/instances.py +41 -1
  83. dstack/_internal/server/services/jobs/__init__.py +15 -4
  84. dstack/_internal/server/services/jobs/configurators/base.py +7 -11
  85. dstack/_internal/server/services/jobs/configurators/dev.py +5 -0
  86. dstack/_internal/server/services/jobs/configurators/extensions/cursor.py +3 -3
  87. dstack/_internal/server/services/jobs/configurators/extensions/vscode.py +3 -3
  88. dstack/_internal/server/services/jobs/configurators/service.py +1 -0
  89. dstack/_internal/server/services/jobs/configurators/task.py +3 -0
  90. dstack/_internal/server/services/locking.py +5 -5
  91. dstack/_internal/server/services/logging.py +10 -2
  92. dstack/_internal/server/services/logs/__init__.py +8 -6
  93. dstack/_internal/server/services/logs/aws.py +330 -327
  94. dstack/_internal/server/services/logs/filelog.py +7 -6
  95. dstack/_internal/server/services/logs/gcp.py +141 -139
  96. dstack/_internal/server/services/plugins.py +1 -1
  97. dstack/_internal/server/services/projects.py +2 -5
  98. dstack/_internal/server/services/proxy/repo.py +5 -1
  99. dstack/_internal/server/services/requirements/__init__.py +0 -0
  100. dstack/_internal/server/services/requirements/combine.py +259 -0
  101. dstack/_internal/server/services/runner/client.py +7 -0
  102. dstack/_internal/server/services/runs.py +1 -1
  103. dstack/_internal/server/services/services/__init__.py +8 -2
  104. dstack/_internal/server/services/services/autoscalers.py +2 -0
  105. dstack/_internal/server/services/ssh.py +2 -1
  106. dstack/_internal/server/services/storage/__init__.py +5 -6
  107. dstack/_internal/server/services/storage/gcs.py +49 -49
  108. dstack/_internal/server/services/storage/s3.py +52 -52
  109. dstack/_internal/server/statics/index.html +1 -1
  110. dstack/_internal/server/testing/common.py +1 -1
  111. dstack/_internal/server/utils/logging.py +3 -3
  112. dstack/_internal/server/utils/provisioning.py +3 -3
  113. dstack/_internal/utils/json_schema.py +3 -1
  114. dstack/_internal/utils/typing.py +14 -0
  115. dstack/api/_public/repos.py +21 -2
  116. dstack/api/_public/runs.py +5 -7
  117. dstack/api/server/__init__.py +17 -19
  118. dstack/api/server/_gpus.py +2 -1
  119. dstack/api/server/_group.py +4 -3
  120. dstack/api/server/_repos.py +20 -3
  121. dstack/plugins/builtin/rest_plugin/_plugin.py +1 -0
  122. dstack/version.py +1 -1
  123. {dstack-0.19.25.dist-info → dstack-0.19.26.dist-info}/METADATA +1 -1
  124. {dstack-0.19.25.dist-info → dstack-0.19.26.dist-info}/RECORD +127 -124
  125. dstack/api/huggingface/__init__.py +0 -73
  126. {dstack-0.19.25.dist-info → dstack-0.19.26.dist-info}/WHEEL +0 -0
  127. {dstack-0.19.25.dist-info → dstack-0.19.26.dist-info}/entry_points.txt +0 -0
  128. {dstack-0.19.25.dist-info → dstack-0.19.26.dist-info}/licenses/LICENSE.md +0 -0
@@ -48,7 +48,7 @@ class FileLogStorage(LogStorage):
48
48
  ) -> JobSubmissionLogs:
49
49
  start_line = 0
50
50
  if request.next_token:
51
- start_line = self._next_token(request)
51
+ start_line = self._parse_next_token(request.next_token)
52
52
 
53
53
  logs = []
54
54
  next_token = None
@@ -97,7 +97,9 @@ class FileLogStorage(LogStorage):
97
97
  def _poll_logs_descending(
98
98
  self, log_file_path: Path, request: PollLogsRequest
99
99
  ) -> JobSubmissionLogs:
100
- start_offset = self._next_token(request)
100
+ start_offset = None
101
+ if request.next_token is not None:
102
+ start_offset = self._parse_next_token(request.next_token)
101
103
 
102
104
  candidate_logs = []
103
105
 
@@ -123,12 +125,12 @@ class FileLogStorage(LogStorage):
123
125
  except FileNotFoundError:
124
126
  return JobSubmissionLogs(logs=[], next_token=None)
125
127
 
126
- logs = [log for log, offset in candidate_logs[: request.limit]]
128
+ logs = [log for log, _ in candidate_logs[: request.limit]]
127
129
  next_token = None
128
130
  if len(candidate_logs) > request.limit:
129
131
  # We fetched one more than the limit, so there are more pages.
130
132
  # The next token should point to the start of the last log we are returning.
131
- _last_log_event, last_log_offset = candidate_logs[request.limit - 1]
133
+ _, last_log_offset = candidate_logs[request.limit - 1]
132
134
  next_token = str(last_log_offset)
133
135
 
134
136
  return JobSubmissionLogs(logs=logs, next_token=next_token)
@@ -245,8 +247,7 @@ class FileLogStorage(LogStorage):
245
247
  message=runner_log_event.message.decode(errors="replace"),
246
248
  )
247
249
 
248
- def _next_token(self, request: PollLogsRequest) -> Optional[int]:
249
- next_token = request.next_token
250
+ def _parse_next_token(self, next_token: str) -> int:
250
251
  if next_token is None:
251
252
  return None
252
253
  try:
@@ -20,6 +20,9 @@ from dstack._internal.server.services.logs.base import (
20
20
  from dstack._internal.utils.common import batched
21
21
  from dstack._internal.utils.logging import get_logger
22
22
 
23
+ logger = get_logger(__name__)
24
+
25
+
23
26
  GCP_LOGGING_AVAILABLE = True
24
27
  try:
25
28
  import google.api_core.exceptions
@@ -28,152 +31,151 @@ try:
28
31
  from google.cloud.logging_v2.types import ListLogEntriesRequest
29
32
  except ImportError:
30
33
  GCP_LOGGING_AVAILABLE = False
31
-
32
-
33
- logger = get_logger(__name__)
34
-
35
-
36
- class GCPLogStorage(LogStorage):
37
- # Max expected message size from runner is 32KB.
38
- # Max expected LogEntry size is 32KB + metadata < 50KB < 256KB limit.
39
- # With MAX_BATCH_SIZE = 100, max write request size < 5MB < 10 MB limit.
40
- # See: https://cloud.google.com/logging/quotas.
41
- MAX_RUNNER_MESSAGE_SIZE = 32 * 1024
42
- MAX_BATCH_SIZE = 100
43
-
44
- # Use the same log name for all run logs so that it's easy to manage all dstack-related logs.
45
- LOG_NAME = "dstack-run-logs"
46
- # Logs from different jobs belong to different "streams".
47
- # GCP Logging has no built-in concepts of streams, so we implement them with labels.
48
- # It should be fast to filter by labels since labels are indexed by default
49
- # (https://cloud.google.com/logging/docs/analyze/custom-index).
50
-
51
- def __init__(self, project_id: str):
52
- self.project_id = project_id
53
- try:
54
- self.client = logging_v2.Client(project=project_id)
55
- self.logger = self.client.logger(name=self.LOG_NAME)
56
- self.logger.list_entries(max_results=1)
57
- # Python client doesn't seem to support dry_run,
58
- # so emit an empty log to check permissions.
59
- self.logger.log_empty()
60
- except google.auth.exceptions.DefaultCredentialsError:
61
- raise LogStorageError("Default credentials not found")
62
- except google.api_core.exceptions.NotFound:
63
- raise LogStorageError(f"Project {project_id} not found")
64
- except google.api_core.exceptions.PermissionDenied:
65
- raise LogStorageError("Insufficient permissions")
66
-
67
- def poll_logs(self, project: ProjectModel, request: PollLogsRequest) -> JobSubmissionLogs:
68
- # TODO: GCP may return logs in random order when events have the same timestamp.
69
- producer = LogProducer.RUNNER if request.diagnose else LogProducer.JOB
70
- stream_name = self._get_stream_name(
71
- project_name=project.name,
72
- run_name=request.run_name,
73
- job_submission_id=request.job_submission_id,
74
- producer=producer,
75
- )
76
- log_filters = [f'labels.stream = "{stream_name}"']
77
- if request.start_time:
78
- log_filters.append(f'timestamp > "{request.start_time.isoformat()}"')
79
- if request.end_time:
80
- log_filters.append(f'timestamp < "{request.end_time.isoformat()}"')
81
- log_filter = " AND ".join(log_filters)
82
-
83
- order_by = logging_v2.DESCENDING if request.descending else logging_v2.ASCENDING
84
- try:
85
- # Use low-level API to get access to next_page_token
86
- request_obj = ListLogEntriesRequest(
87
- resource_names=[f"projects/{self.client.project}"],
88
- filter=log_filter,
89
- order_by=order_by,
90
- page_size=request.limit,
91
- page_token=request.next_token,
92
- )
93
- response = self.client._logging_api._gapic_api.list_log_entries(request=request_obj)
94
-
95
- logs = [
96
- LogEvent(
97
- timestamp=entry.timestamp,
98
- message=entry.json_payload.get("message"),
99
- log_source=LogEventSource.STDOUT,
100
- )
101
- for entry in response.entries
102
- ]
103
- next_token = response.next_page_token or None
104
- except google.api_core.exceptions.ResourceExhausted as e:
105
- logger.warning("GCP Logging exception: %s", repr(e))
106
- # GCP Logging has severely low quota of 60 reads/min for entries.list
107
- raise ServerClientError(
108
- "GCP Logging read request limit exceeded."
109
- " It's recommended to increase default entries.list request quota from 60 per minute."
110
- )
111
- return JobSubmissionLogs(
112
- logs=logs,
113
- external_url=self._get_stream_extrnal_url(stream_name),
114
- next_token=next_token if len(logs) > 0 else None,
115
- )
116
-
117
- def write_logs(
118
- self,
119
- project: ProjectModel,
120
- run_name: str,
121
- job_submission_id: UUID,
122
- runner_logs: List[RunnerLogEvent],
123
- job_logs: List[RunnerLogEvent],
124
- ):
125
- producers_with_logs = [(LogProducer.RUNNER, runner_logs), (LogProducer.JOB, job_logs)]
126
- for producer, producer_logs in producers_with_logs:
34
+ else:
35
+
36
+ class GCPLogStorage(LogStorage):
37
+ # Max expected message size from runner is 32KB.
38
+ # Max expected LogEntry size is 32KB + metadata < 50KB < 256KB limit.
39
+ # With MAX_BATCH_SIZE = 100, max write request size < 5MB < 10 MB limit.
40
+ # See: https://cloud.google.com/logging/quotas.
41
+ MAX_RUNNER_MESSAGE_SIZE = 32 * 1024
42
+ MAX_BATCH_SIZE = 100
43
+
44
+ # Use the same log name for all run logs so that it's easy to manage all dstack-related logs.
45
+ LOG_NAME = "dstack-run-logs"
46
+ # Logs from different jobs belong to different "streams".
47
+ # GCP Logging has no built-in concepts of streams, so we implement them with labels.
48
+ # It should be fast to filter by labels since labels are indexed by default
49
+ # (https://cloud.google.com/logging/docs/analyze/custom-index).
50
+
51
+ def __init__(self, project_id: str):
52
+ self.project_id = project_id
53
+ try:
54
+ self.client = logging_v2.Client(project=project_id)
55
+ self.logger = self.client.logger(name=self.LOG_NAME)
56
+ self.logger.list_entries(max_results=1)
57
+ # Python client doesn't seem to support dry_run,
58
+ # so emit an empty log to check permissions.
59
+ self.logger.log_empty()
60
+ except google.auth.exceptions.DefaultCredentialsError:
61
+ raise LogStorageError("Default credentials not found")
62
+ except google.api_core.exceptions.NotFound:
63
+ raise LogStorageError(f"Project {project_id} not found")
64
+ except google.api_core.exceptions.PermissionDenied:
65
+ raise LogStorageError("Insufficient permissions")
66
+
67
+ def poll_logs(self, project: ProjectModel, request: PollLogsRequest) -> JobSubmissionLogs:
68
+ # TODO: GCP may return logs in random order when events have the same timestamp.
69
+ producer = LogProducer.RUNNER if request.diagnose else LogProducer.JOB
127
70
  stream_name = self._get_stream_name(
128
71
  project_name=project.name,
129
- run_name=run_name,
130
- job_submission_id=job_submission_id,
72
+ run_name=request.run_name,
73
+ job_submission_id=request.job_submission_id,
131
74
  producer=producer,
132
75
  )
133
- self._write_logs_to_stream(
134
- stream_name=stream_name,
135
- logs=producer_logs,
76
+ log_filters = [f'labels.stream = "{stream_name}"']
77
+ if request.start_time:
78
+ log_filters.append(f'timestamp > "{request.start_time.isoformat()}"')
79
+ if request.end_time:
80
+ log_filters.append(f'timestamp < "{request.end_time.isoformat()}"')
81
+ log_filter = " AND ".join(log_filters)
82
+
83
+ order_by = logging_v2.DESCENDING if request.descending else logging_v2.ASCENDING
84
+ try:
85
+ # Use low-level API to get access to next_page_token
86
+ request_obj = ListLogEntriesRequest(
87
+ resource_names=[f"projects/{self.client.project}"],
88
+ filter=log_filter,
89
+ order_by=order_by,
90
+ page_size=request.limit,
91
+ page_token=request.next_token,
92
+ )
93
+ response = self.client._logging_api._gapic_api.list_log_entries( # type: ignore[attr-defined]
94
+ request=request_obj
95
+ )
96
+
97
+ logs = [
98
+ LogEvent(
99
+ timestamp=entry.timestamp,
100
+ message=entry.json_payload.get("message"),
101
+ log_source=LogEventSource.STDOUT,
102
+ )
103
+ for entry in response.entries
104
+ ]
105
+ next_token = response.next_page_token or None
106
+ except google.api_core.exceptions.ResourceExhausted as e:
107
+ logger.warning("GCP Logging exception: %s", repr(e))
108
+ # GCP Logging has severely low quota of 60 reads/min for entries.list
109
+ raise ServerClientError(
110
+ "GCP Logging read request limit exceeded."
111
+ " It's recommended to increase default entries.list request quota from 60 per minute."
112
+ )
113
+ return JobSubmissionLogs(
114
+ logs=logs,
115
+ external_url=self._get_stream_extrnal_url(stream_name),
116
+ next_token=next_token if len(logs) > 0 else None,
136
117
  )
137
118
 
138
- def close(self):
139
- self.client.close()
140
-
141
- def _write_logs_to_stream(self, stream_name: str, logs: List[RunnerLogEvent]):
142
- with self.logger.batch() as batcher:
143
- for batch in batched(logs, self.MAX_BATCH_SIZE):
144
- for log in batch:
145
- message = log.message.decode(errors="replace")
146
- timestamp = unix_time_ms_to_datetime(log.timestamp)
147
- if len(log.message) > self.MAX_RUNNER_MESSAGE_SIZE:
148
- logger.error(
149
- "Stream %s: skipping event at %s, message exceeds max size: %d > %d",
150
- stream_name,
151
- timestamp.isoformat(),
152
- len(log.message),
153
- self.MAX_RUNNER_MESSAGE_SIZE,
119
+ def write_logs(
120
+ self,
121
+ project: ProjectModel,
122
+ run_name: str,
123
+ job_submission_id: UUID,
124
+ runner_logs: List[RunnerLogEvent],
125
+ job_logs: List[RunnerLogEvent],
126
+ ):
127
+ producers_with_logs = [(LogProducer.RUNNER, runner_logs), (LogProducer.JOB, job_logs)]
128
+ for producer, producer_logs in producers_with_logs:
129
+ stream_name = self._get_stream_name(
130
+ project_name=project.name,
131
+ run_name=run_name,
132
+ job_submission_id=job_submission_id,
133
+ producer=producer,
134
+ )
135
+ self._write_logs_to_stream(
136
+ stream_name=stream_name,
137
+ logs=producer_logs,
138
+ )
139
+
140
+ def close(self):
141
+ self.client.close()
142
+
143
+ def _write_logs_to_stream(self, stream_name: str, logs: List[RunnerLogEvent]):
144
+ with self.logger.batch() as batcher:
145
+ for batch in batched(logs, self.MAX_BATCH_SIZE):
146
+ for log in batch:
147
+ message = log.message.decode(errors="replace")
148
+ timestamp = unix_time_ms_to_datetime(log.timestamp)
149
+ if len(log.message) > self.MAX_RUNNER_MESSAGE_SIZE:
150
+ logger.error(
151
+ "Stream %s: skipping event at %s, message exceeds max size: %d > %d",
152
+ stream_name,
153
+ timestamp.isoformat(),
154
+ len(log.message),
155
+ self.MAX_RUNNER_MESSAGE_SIZE,
156
+ )
157
+ continue
158
+ batcher.log_struct(
159
+ {
160
+ "message": message,
161
+ },
162
+ labels={
163
+ "stream": stream_name,
164
+ },
165
+ timestamp=timestamp,
154
166
  )
155
- continue
156
- batcher.log_struct(
157
- {
158
- "message": message,
159
- },
160
- labels={
161
- "stream": stream_name,
162
- },
163
- timestamp=timestamp,
164
- )
165
- batcher.commit()
167
+ batcher.commit()
166
168
 
167
- def _get_stream_name(
168
- self, project_name: str, run_name: str, job_submission_id: UUID, producer: LogProducer
169
- ) -> str:
170
- return f"{project_name}-{run_name}-{job_submission_id}-{producer.value}"
169
+ def _get_stream_name(
170
+ self, project_name: str, run_name: str, job_submission_id: UUID, producer: LogProducer
171
+ ) -> str:
172
+ return f"{project_name}-{run_name}-{job_submission_id}-{producer.value}"
171
173
 
172
- def _get_stream_extrnal_url(self, stream_name: str) -> str:
173
- log_name_resource_name = self._get_log_name_resource_name()
174
- query = f'logName="{log_name_resource_name}" AND labels.stream="{stream_name}"'
175
- quoted_query = urllib.parse.quote(query, safe="")
176
- return f"https://console.cloud.google.com/logs/query;query={quoted_query}?project={self.project_id}"
174
+ def _get_stream_extrnal_url(self, stream_name: str) -> str:
175
+ log_name_resource_name = self._get_log_name_resource_name()
176
+ query = f'logName="{log_name_resource_name}" AND labels.stream="{stream_name}"'
177
+ quoted_query = urllib.parse.quote(query, safe="")
178
+ return f"https://console.cloud.google.com/logs/query;query={quoted_query}?project={self.project_id}"
177
179
 
178
- def _get_log_name_resource_name(self) -> str:
179
- return f"projects/{self.project_id}/logs/{self.LOG_NAME}"
180
+ def _get_log_name_resource_name(self) -> str:
181
+ return f"projects/{self.project_id}/logs/{self.LOG_NAME}"
@@ -60,7 +60,7 @@ def load_plugins(enabled_plugins: list[str]):
60
60
  _PLUGINS.clear()
61
61
  entrypoints: dict[str, PluginEntrypoint] = {}
62
62
  plugins_to_load = enabled_plugins.copy()
63
- for entrypoint in entry_points(group="dstack.plugins"):
63
+ for entrypoint in entry_points(group="dstack.plugins"): # type: ignore[call-arg]
64
64
  if entrypoint.name not in enabled_plugins:
65
65
  logger.info(
66
66
  ("Found not enabled plugin %s. Plugin will not be loaded."),
@@ -19,7 +19,7 @@ from dstack._internal.server.models import MemberModel, ProjectModel, UserModel
19
19
  from dstack._internal.server.schemas.projects import MemberSetting
20
20
  from dstack._internal.server.services import users
21
21
  from dstack._internal.server.services.backends import (
22
- get_backend_config_from_backend_model,
22
+ get_backend_config_without_creds_from_backend_model,
23
23
  )
24
24
  from dstack._internal.server.services.permissions import get_default_permissions
25
25
  from dstack._internal.server.settings import DEFAULT_PROJECT_NAME
@@ -313,7 +313,6 @@ async def add_project_members(
313
313
  member_num=None,
314
314
  commit=False,
315
315
  )
316
- member_by_user_id[user_to_add.id] = None
317
316
 
318
317
  await session.commit()
319
318
 
@@ -544,9 +543,7 @@ def project_model_to_project(
544
543
  b.type.value,
545
544
  )
546
545
  continue
547
- backend_config = get_backend_config_from_backend_model(
548
- configurator, b, include_creds=False
549
- )
546
+ backend_config = get_backend_config_without_creds_from_backend_model(configurator, b)
550
547
  if isinstance(backend_config, DstackBackendConfig):
551
548
  for backend_type in backend_config.base_backends:
552
549
  backends.append(
@@ -74,6 +74,8 @@ class ServerProxyRepo(BaseProxyRepo):
74
74
  jpd: JobProvisioningData = JobProvisioningData.__response__.parse_raw(
75
75
  job.job_provisioning_data
76
76
  )
77
+ assert jpd.hostname is not None
78
+ assert jpd.ssh_port is not None
77
79
  if not jpd.dockerized:
78
80
  ssh_destination = f"{jpd.username}@{jpd.hostname}"
79
81
  ssh_port = jpd.ssh_port
@@ -140,7 +142,7 @@ class ServerProxyRepo(BaseProxyRepo):
140
142
  model_options_obj = service_spec.options.get("openai", {}).get("model")
141
143
  if model_spec is None or model_options_obj is None:
142
144
  continue
143
- model_options = pydantic.parse_obj_as(AnyModel, model_options_obj)
145
+ model_options = pydantic.parse_obj_as(AnyModel, model_options_obj) # type: ignore[arg-type]
144
146
  model = ChatModel(
145
147
  project_name=project_name,
146
148
  name=model_spec.name,
@@ -175,6 +177,8 @@ def _model_options_to_format_spec(model: AnyModel) -> AnyModelFormat:
175
177
  if model.format == "openai":
176
178
  return OpenAIChatModelFormat(prefix=model.prefix)
177
179
  elif model.format == "tgi":
180
+ assert model.chat_template is not None
181
+ assert model.eos_token is not None
178
182
  return TGIChatModelFormat(
179
183
  chat_template=model.chat_template,
180
184
  eos_token=model.eos_token,
@@ -0,0 +1,259 @@
1
+ from typing import Callable, Optional, Protocol, TypeVar
2
+
3
+ from pydantic import BaseModel
4
+ from typing_extensions import Self
5
+
6
+ from dstack._internal.core.models.profiles import Profile, SpotPolicy
7
+ from dstack._internal.core.models.resources import (
8
+ CPUSpec,
9
+ DiskSpec,
10
+ GPUSpec,
11
+ Memory,
12
+ Range,
13
+ ResourcesSpec,
14
+ )
15
+ from dstack._internal.core.models.runs import Requirements
16
+ from dstack._internal.utils.typing import SupportsRichComparison
17
+
18
+
19
+ class CombineError(ValueError):
20
+ pass
21
+
22
+
23
+ def combine_fleet_and_run_profiles(
24
+ fleet_profile: Profile, run_profile: Profile
25
+ ) -> Optional[Profile]:
26
+ """
27
+ Combines fleet and run profile parameters that affect offer selection or provisioning.
28
+ """
29
+ try:
30
+ return Profile(
31
+ backends=_intersect_lists_optional(fleet_profile.backends, run_profile.backends),
32
+ regions=_intersect_lists_optional(fleet_profile.regions, run_profile.regions),
33
+ availability_zones=_intersect_lists_optional(
34
+ fleet_profile.availability_zones, run_profile.availability_zones
35
+ ),
36
+ instance_types=_intersect_lists_optional(
37
+ fleet_profile.instance_types, run_profile.instance_types
38
+ ),
39
+ reservation=_get_single_value_optional(
40
+ fleet_profile.reservation, run_profile.reservation
41
+ ),
42
+ spot_policy=_combine_spot_policy_optional(
43
+ fleet_profile.spot_policy, run_profile.spot_policy
44
+ ),
45
+ max_price=_get_min_optional(fleet_profile.max_price, run_profile.max_price),
46
+ idle_duration=_combine_idle_duration_optional(
47
+ fleet_profile.idle_duration, run_profile.idle_duration
48
+ ),
49
+ tags=_combine_tags_optional(fleet_profile.tags, run_profile.tags),
50
+ )
51
+ except CombineError:
52
+ return None
53
+
54
+
55
+ def combine_fleet_and_run_requirements(
56
+ fleet_requirements: Requirements, run_requirements: Requirements
57
+ ) -> Optional[Requirements]:
58
+ try:
59
+ return Requirements(
60
+ resources=_combine_resources(fleet_requirements.resources, run_requirements.resources),
61
+ max_price=_get_min_optional(fleet_requirements.max_price, run_requirements.max_price),
62
+ spot=_combine_spot_optional(fleet_requirements.spot, run_requirements.spot),
63
+ reservation=_get_single_value_optional(
64
+ fleet_requirements.reservation, run_requirements.reservation
65
+ ),
66
+ )
67
+ except CombineError:
68
+ return None
69
+
70
+
71
+ _T = TypeVar("_T")
72
+ _ModelT = TypeVar("_ModelT", bound=BaseModel)
73
+ _CompT = TypeVar("_CompT", bound=SupportsRichComparison)
74
+
75
+
76
+ class _SupportsCopy(Protocol):
77
+ def copy(self) -> Self: ...
78
+
79
+
80
+ _CopyT = TypeVar("_CopyT", bound=_SupportsCopy)
81
+
82
+
83
+ def _intersect_lists_optional(
84
+ list1: Optional[list[_T]], list2: Optional[list[_T]]
85
+ ) -> Optional[list[_T]]:
86
+ if list1 is None:
87
+ if list2 is None:
88
+ return None
89
+ return list2.copy()
90
+ if list2 is None:
91
+ return list1.copy()
92
+ return [x for x in list1 if x in list2]
93
+
94
+
95
+ def _get_min(value1: _CompT, value2: _CompT) -> _CompT:
96
+ return min(value1, value2)
97
+
98
+
99
+ def _get_min_optional(value1: Optional[_CompT], value2: Optional[_CompT]) -> Optional[_CompT]:
100
+ return _combine_optional(value1, value2, _get_min)
101
+
102
+
103
+ def _get_single_value(value1: _T, value2: _T) -> _T:
104
+ if value1 == value2:
105
+ return value1
106
+ raise CombineError(f"Values {value1} and {value2} cannot be combined")
107
+
108
+
109
+ def _get_single_value_optional(value1: Optional[_T], value2: Optional[_T]) -> Optional[_T]:
110
+ return _combine_optional(value1, value2, _get_single_value)
111
+
112
+
113
+ def _combine_spot_policy(value1: SpotPolicy, value2: SpotPolicy) -> SpotPolicy:
114
+ if value1 == SpotPolicy.AUTO:
115
+ return value2
116
+ if value2 == SpotPolicy.AUTO:
117
+ return value1
118
+ if value1 == value2:
119
+ return value1
120
+ raise CombineError(f"spot_policy values {value1} and {value2} cannot be combined")
121
+
122
+
123
+ def _combine_spot_policy_optional(
124
+ value1: Optional[SpotPolicy], value2: Optional[SpotPolicy]
125
+ ) -> Optional[SpotPolicy]:
126
+ return _combine_optional(value1, value2, _combine_spot_policy)
127
+
128
+
129
+ def _combine_idle_duration(value1: int, value2: int) -> int:
130
+ if value1 < 0 and value2 >= 0 or value2 < 0 and value1 >= 0:
131
+ raise CombineError(f"idle_duration values {value1} and {value2} cannot be combined")
132
+ return min(value1, value2)
133
+
134
+
135
+ def _combine_idle_duration_optional(value1: Optional[int], value2: Optional[int]) -> Optional[int]:
136
+ return _combine_optional(value1, value2, _combine_idle_duration)
137
+
138
+
139
+ def _combine_tags_optional(
140
+ value1: Optional[dict[str, str]], value2: Optional[dict[str, str]]
141
+ ) -> Optional[dict[str, str]]:
142
+ return _combine_copy_optional(value1, value2, _combine_tags)
143
+
144
+
145
+ def _combine_tags(value1: dict[str, str], value2: dict[str, str]) -> dict[str, str]:
146
+ return value1 | value2
147
+
148
+
149
+ def _combine_resources(value1: ResourcesSpec, value2: ResourcesSpec) -> ResourcesSpec:
150
+ return ResourcesSpec(
151
+ cpu=_combine_cpu(value1.cpu, value2.cpu), # type: ignore[attr-defined]
152
+ memory=_combine_memory(value1.memory, value2.memory),
153
+ shm_size=_combine_shm_size_optional(value1.shm_size, value2.shm_size),
154
+ gpu=_combine_gpu_optional(value1.gpu, value2.gpu),
155
+ disk=_combine_disk_optional(value1.disk, value2.disk),
156
+ )
157
+
158
+
159
+ def _combine_cpu(value1: CPUSpec, value2: CPUSpec) -> CPUSpec:
160
+ return CPUSpec(
161
+ arch=_get_single_value_optional(value1.arch, value2.arch),
162
+ count=_combine_range(value1.count, value2.count),
163
+ )
164
+
165
+
166
+ def _combine_memory(value1: Range[Memory], value2: Range[Memory]) -> Range[Memory]:
167
+ return _combine_range(value1, value2)
168
+
169
+
170
+ def _combine_shm_size_optional(
171
+ value1: Optional[Memory], value2: Optional[Memory]
172
+ ) -> Optional[Memory]:
173
+ return _get_min_optional(value1, value2)
174
+
175
+
176
+ def _combine_gpu(value1: GPUSpec, value2: GPUSpec) -> GPUSpec:
177
+ return GPUSpec(
178
+ vendor=_get_single_value_optional(value1.vendor, value2.vendor),
179
+ name=_intersect_lists_optional(value1.name, value2.name),
180
+ count=_combine_range(value1.count, value2.count),
181
+ memory=_combine_range_optional(value1.memory, value2.memory),
182
+ total_memory=_combine_range_optional(value1.total_memory, value2.total_memory),
183
+ compute_capability=_get_min_optional(value1.compute_capability, value2.compute_capability),
184
+ )
185
+
186
+
187
+ def _combine_gpu_optional(
188
+ value1: Optional[GPUSpec], value2: Optional[GPUSpec]
189
+ ) -> Optional[GPUSpec]:
190
+ return _combine_models_optional(value1, value2, _combine_gpu)
191
+
192
+
193
+ def _combine_disk(value1: DiskSpec, value2: DiskSpec) -> DiskSpec:
194
+ return DiskSpec(size=_combine_range(value1.size, value2.size))
195
+
196
+
197
+ def _combine_disk_optional(
198
+ value1: Optional[DiskSpec], value2: Optional[DiskSpec]
199
+ ) -> Optional[DiskSpec]:
200
+ return _combine_models_optional(value1, value2, _combine_disk)
201
+
202
+
203
+ def _combine_spot(value1: bool, value2: bool) -> bool:
204
+ if value1 != value2:
205
+ raise CombineError(f"spot values {value1} and {value2} cannot be combined")
206
+ return value1
207
+
208
+
209
+ def _combine_spot_optional(value1: Optional[bool], value2: Optional[bool]) -> Optional[bool]:
210
+ return _combine_optional(value1, value2, _combine_spot)
211
+
212
+
213
+ def _combine_range(value1: Range, value2: Range) -> Range:
214
+ res = value1.intersect(value2)
215
+ if res is None:
216
+ raise CombineError(f"Ranges {value1} and {value2} cannot be combined")
217
+ return res
218
+
219
+
220
+ def _combine_range_optional(value1: Optional[Range], value2: Optional[Range]) -> Optional[Range]:
221
+ return _combine_models_optional(value1, value2, _combine_range)
222
+
223
+
224
+ def _combine_optional(
225
+ value1: Optional[_T], value2: Optional[_T], combiner: Callable[[_T, _T], _T]
226
+ ) -> Optional[_T]:
227
+ if value1 is None:
228
+ return value2
229
+ if value2 is None:
230
+ return value1
231
+ return combiner(value1, value2)
232
+
233
+
234
+ def _combine_models_optional(
235
+ value1: Optional[_ModelT],
236
+ value2: Optional[_ModelT],
237
+ combiner: Callable[[_ModelT, _ModelT], _ModelT],
238
+ ) -> Optional[_ModelT]:
239
+ if value1 is None:
240
+ if value2 is not None:
241
+ return value2.copy(deep=True)
242
+ return None
243
+ if value2 is None:
244
+ return value1.copy(deep=True)
245
+ return combiner(value1, value2)
246
+
247
+
248
+ def _combine_copy_optional(
249
+ value1: Optional[_CopyT],
250
+ value2: Optional[_CopyT],
251
+ combiner: Callable[[_CopyT, _CopyT], _CopyT],
252
+ ) -> Optional[_CopyT]:
253
+ if value1 is None:
254
+ if value2 is not None:
255
+ return value2.copy()
256
+ return None
257
+ if value2 is None:
258
+ return value1.copy()
259
+ return combiner(value1, value2)