dstack 0.18.40rc1__py3-none-any.whl → 0.18.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dstack/_internal/cli/commands/apply.py +8 -5
- dstack/_internal/cli/services/configurators/base.py +4 -2
- dstack/_internal/cli/services/configurators/fleet.py +21 -9
- dstack/_internal/cli/services/configurators/gateway.py +15 -0
- dstack/_internal/cli/services/configurators/run.py +6 -5
- dstack/_internal/cli/services/configurators/volume.py +15 -0
- dstack/_internal/cli/services/repos.py +3 -3
- dstack/_internal/cli/utils/fleet.py +44 -33
- dstack/_internal/cli/utils/run.py +27 -7
- dstack/_internal/cli/utils/volume.py +30 -9
- dstack/_internal/core/backends/aws/compute.py +94 -53
- dstack/_internal/core/backends/aws/resources.py +22 -12
- dstack/_internal/core/backends/azure/compute.py +2 -0
- dstack/_internal/core/backends/base/compute.py +20 -2
- dstack/_internal/core/backends/gcp/compute.py +32 -24
- dstack/_internal/core/backends/gcp/resources.py +0 -15
- dstack/_internal/core/backends/oci/compute.py +10 -5
- dstack/_internal/core/backends/oci/resources.py +23 -26
- dstack/_internal/core/backends/remote/provisioning.py +65 -27
- dstack/_internal/core/backends/runpod/compute.py +1 -0
- dstack/_internal/core/models/backends/azure.py +3 -1
- dstack/_internal/core/models/configurations.py +24 -1
- dstack/_internal/core/models/fleets.py +46 -0
- dstack/_internal/core/models/instances.py +5 -1
- dstack/_internal/core/models/pools.py +4 -1
- dstack/_internal/core/models/profiles.py +10 -4
- dstack/_internal/core/models/runs.py +23 -3
- dstack/_internal/core/models/volumes.py +26 -0
- dstack/_internal/core/services/ssh/attach.py +92 -53
- dstack/_internal/core/services/ssh/tunnel.py +58 -31
- dstack/_internal/proxy/gateway/routers/registry.py +2 -0
- dstack/_internal/proxy/gateway/schemas/registry.py +2 -0
- dstack/_internal/proxy/gateway/services/registry.py +4 -0
- dstack/_internal/proxy/lib/models.py +3 -0
- dstack/_internal/proxy/lib/services/service_connection.py +8 -1
- dstack/_internal/server/background/tasks/process_instances.py +73 -35
- dstack/_internal/server/background/tasks/process_metrics.py +9 -9
- dstack/_internal/server/background/tasks/process_running_jobs.py +77 -26
- dstack/_internal/server/background/tasks/process_runs.py +2 -12
- dstack/_internal/server/background/tasks/process_submitted_jobs.py +121 -49
- dstack/_internal/server/background/tasks/process_terminating_jobs.py +14 -3
- dstack/_internal/server/background/tasks/process_volumes.py +11 -1
- dstack/_internal/server/migrations/versions/1338b788b612_reverse_job_instance_relationship.py +71 -0
- dstack/_internal/server/migrations/versions/1e76fb0dde87_add_jobmodel_inactivity_secs.py +32 -0
- dstack/_internal/server/migrations/versions/51d45659d574_add_instancemodel_blocks_fields.py +43 -0
- dstack/_internal/server/migrations/versions/63c3f19cb184_add_jobterminationreason_inactivity_.py +83 -0
- dstack/_internal/server/migrations/versions/a751ef183f27_move_attachment_data_to_volumes_.py +34 -0
- dstack/_internal/server/models.py +27 -23
- dstack/_internal/server/routers/runs.py +1 -0
- dstack/_internal/server/schemas/runner.py +1 -0
- dstack/_internal/server/services/backends/configurators/azure.py +34 -8
- dstack/_internal/server/services/config.py +9 -0
- dstack/_internal/server/services/fleets.py +32 -3
- dstack/_internal/server/services/gateways/client.py +9 -1
- dstack/_internal/server/services/jobs/__init__.py +217 -45
- dstack/_internal/server/services/jobs/configurators/base.py +47 -2
- dstack/_internal/server/services/offers.py +96 -10
- dstack/_internal/server/services/pools.py +98 -14
- dstack/_internal/server/services/proxy/repo.py +17 -3
- dstack/_internal/server/services/runner/client.py +9 -6
- dstack/_internal/server/services/runner/ssh.py +33 -5
- dstack/_internal/server/services/runs.py +48 -179
- dstack/_internal/server/services/services/__init__.py +9 -1
- dstack/_internal/server/services/volumes.py +68 -9
- dstack/_internal/server/statics/index.html +1 -1
- dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js → main-2ac66bfcbd2e39830b88.js} +30 -31
- dstack/_internal/server/statics/{main-11ec5e4a00ea6ec833e3.js.map → main-2ac66bfcbd2e39830b88.js.map} +1 -1
- dstack/_internal/server/statics/{main-fc56d1f4af8e57522a1c.css → main-ad5150a441de98cd8987.css} +1 -1
- dstack/_internal/server/testing/common.py +130 -61
- dstack/_internal/utils/common.py +22 -8
- dstack/_internal/utils/env.py +14 -0
- dstack/_internal/utils/ssh.py +1 -1
- dstack/api/server/_fleets.py +25 -1
- dstack/api/server/_runs.py +23 -2
- dstack/api/server/_volumes.py +12 -1
- dstack/version.py +1 -1
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/METADATA +1 -1
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/RECORD +104 -93
- tests/_internal/cli/services/configurators/test_profile.py +3 -3
- tests/_internal/core/services/ssh/test_tunnel.py +56 -4
- tests/_internal/proxy/gateway/routers/test_registry.py +30 -7
- tests/_internal/server/background/tasks/test_process_instances.py +138 -20
- tests/_internal/server/background/tasks/test_process_metrics.py +12 -0
- tests/_internal/server/background/tasks/test_process_running_jobs.py +193 -0
- tests/_internal/server/background/tasks/test_process_runs.py +27 -3
- tests/_internal/server/background/tasks/test_process_submitted_jobs.py +53 -6
- tests/_internal/server/background/tasks/test_process_terminating_jobs.py +135 -17
- tests/_internal/server/routers/test_fleets.py +15 -2
- tests/_internal/server/routers/test_pools.py +6 -0
- tests/_internal/server/routers/test_runs.py +27 -0
- tests/_internal/server/routers/test_volumes.py +9 -2
- tests/_internal/server/services/jobs/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/__init__.py +0 -0
- tests/_internal/server/services/jobs/configurators/test_base.py +72 -0
- tests/_internal/server/services/runner/test_client.py +22 -3
- tests/_internal/server/services/test_offers.py +167 -0
- tests/_internal/server/services/test_pools.py +109 -1
- tests/_internal/server/services/test_runs.py +5 -41
- tests/_internal/utils/test_common.py +21 -0
- tests/_internal/utils/test_env.py +38 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/LICENSE.md +0 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/WHEEL +0 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/entry_points.txt +0 -0
- {dstack-0.18.40rc1.dist-info → dstack-0.18.42.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,6 @@ from typing import Callable, List, Optional, Union
|
|
|
5
5
|
from sqlalchemy import (
|
|
6
6
|
BigInteger,
|
|
7
7
|
Boolean,
|
|
8
|
-
Column,
|
|
9
8
|
DateTime,
|
|
10
9
|
Enum,
|
|
11
10
|
Float,
|
|
@@ -15,7 +14,6 @@ from sqlalchemy import (
|
|
|
15
14
|
LargeBinary,
|
|
16
15
|
MetaData,
|
|
17
16
|
String,
|
|
18
|
-
Table,
|
|
19
17
|
Text,
|
|
20
18
|
TypeDecorator,
|
|
21
19
|
UniqueConstraint,
|
|
@@ -351,13 +349,17 @@ class JobModel(BaseModel):
|
|
|
351
349
|
job_spec_data: Mapped[str] = mapped_column(Text)
|
|
352
350
|
job_provisioning_data: Mapped[Optional[str]] = mapped_column(Text)
|
|
353
351
|
runner_timestamp: Mapped[Optional[int]] = mapped_column(BigInteger)
|
|
352
|
+
inactivity_secs: Mapped[Optional[int]] = mapped_column(Integer) # 0 - active, None - N/A
|
|
354
353
|
# `removed` is used to ensure that the instance is killed after the job is finished
|
|
355
354
|
remove_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
|
|
356
355
|
volumes_detached_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
|
|
357
356
|
# `instance_assigned` means instance assignment was done.
|
|
358
357
|
# if `instance_assigned` is True and `instance` is None, no instance was assiged.
|
|
359
358
|
instance_assigned: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
360
|
-
|
|
359
|
+
instance_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
|
360
|
+
ForeignKey("instances.id", ondelete="CASCADE")
|
|
361
|
+
)
|
|
362
|
+
instance: Mapped[Optional["InstanceModel"]] = relationship(back_populates="jobs")
|
|
361
363
|
used_instance_id: Mapped[Optional[uuid.UUID]] = mapped_column(UUIDType(binary=False))
|
|
362
364
|
replica_num: Mapped[int] = mapped_column(Integer)
|
|
363
365
|
job_runtime_data: Mapped[Optional[str]] = mapped_column(Text)
|
|
@@ -543,15 +545,19 @@ class InstanceModel(BaseModel):
|
|
|
543
545
|
|
|
544
546
|
remote_connection_info: Mapped[Optional[str]] = mapped_column(Text)
|
|
545
547
|
|
|
546
|
-
#
|
|
547
|
-
|
|
548
|
-
|
|
548
|
+
# NULL means `auto` (only during provisioning, when ready it's not NULL)
|
|
549
|
+
total_blocks: Mapped[Optional[int]] = mapped_column(Integer)
|
|
550
|
+
busy_blocks: Mapped[int] = mapped_column(Integer, default=0)
|
|
551
|
+
|
|
552
|
+
jobs: Mapped[list["JobModel"]] = relationship(back_populates="instance", lazy="joined")
|
|
549
553
|
last_job_processed_at: Mapped[Optional[datetime]] = mapped_column(NaiveDateTime)
|
|
550
554
|
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
+
volume_attachments: Mapped[List["VolumeAttachmentModel"]] = relationship(
|
|
556
|
+
back_populates="instance",
|
|
557
|
+
# Add delete-orphan option so that removing entries from volume_attachments
|
|
558
|
+
# automatically marks them for deletion.
|
|
559
|
+
# SQLalchemy requires delete when using delete-orphan.
|
|
560
|
+
cascade="save-update, merge, delete-orphan, delete",
|
|
555
561
|
)
|
|
556
562
|
|
|
557
563
|
|
|
@@ -581,23 +587,21 @@ class VolumeModel(BaseModel):
|
|
|
581
587
|
|
|
582
588
|
configuration: Mapped[str] = mapped_column(Text)
|
|
583
589
|
volume_provisioning_data: Mapped[Optional[str]] = mapped_column(Text)
|
|
584
|
-
|
|
585
|
-
|
|
590
|
+
|
|
591
|
+
attachments: Mapped[List["VolumeAttachmentModel"]] = relationship(back_populates="volume")
|
|
592
|
+
|
|
593
|
+
# Deprecated in favor of VolumeAttachmentModel.attachment_data
|
|
586
594
|
volume_attachment_data: Mapped[Optional[str]] = mapped_column(Text)
|
|
587
595
|
|
|
588
|
-
# instances the volume is attached to
|
|
589
|
-
instances: Mapped[List["InstanceModel"]] = relationship(
|
|
590
|
-
secondary="volumes_attachments",
|
|
591
|
-
back_populates="volumes",
|
|
592
|
-
)
|
|
593
596
|
|
|
597
|
+
class VolumeAttachmentModel(BaseModel):
|
|
598
|
+
__tablename__ = "volumes_attachments"
|
|
594
599
|
|
|
595
|
-
|
|
596
|
-
"
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
)
|
|
600
|
+
volume_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("volumes.id"), primary_key=True)
|
|
601
|
+
volume: Mapped[VolumeModel] = relationship(back_populates="attachments")
|
|
602
|
+
instance_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("instances.id"), primary_key=True)
|
|
603
|
+
instance: Mapped[InstanceModel] = relationship(back_populates="volume_attachments")
|
|
604
|
+
attachment_data: Mapped[Optional[str]] = mapped_column(Text)
|
|
601
605
|
|
|
602
606
|
|
|
603
607
|
class PlacementGroupModel(BaseModel):
|
|
@@ -47,6 +47,7 @@ async def list_runs(
|
|
|
47
47
|
"""
|
|
48
48
|
Returns all runs visible to user sorted by descending `submitted_at`.
|
|
49
49
|
`project_name`, `repo_id`, `username`, and `only_active` can be specified as filters.
|
|
50
|
+
Setting `only_active` to `true` excludes finished runs and deleted runs.
|
|
50
51
|
Specifying `repo_id` without `project_name` returns no runs.
|
|
51
52
|
|
|
52
53
|
The results are paginated. To get the next page, pass `submitted_at` and `id` of
|
|
@@ -2,6 +2,7 @@ import json
|
|
|
2
2
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
3
3
|
from typing import List, Optional, Tuple
|
|
4
4
|
|
|
5
|
+
import azure.core.exceptions
|
|
5
6
|
from azure.core.credentials import TokenCredential
|
|
6
7
|
from azure.mgmt import network as network_mgmt
|
|
7
8
|
from azure.mgmt import resource as resource_mgmt
|
|
@@ -154,16 +155,17 @@ class AzureConfigurator(Configurator):
|
|
|
154
155
|
if is_core_model_instance(config.creds, AzureClientCreds):
|
|
155
156
|
self._set_client_creds_tenant_id(config.creds, config.tenant_id)
|
|
156
157
|
credential, _ = auth.authenticate(config.creds)
|
|
157
|
-
resource_group
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
158
|
+
if config.resource_group is None:
|
|
159
|
+
config.resource_group = self._create_resource_group(
|
|
160
|
+
credential=credential,
|
|
161
|
+
subscription_id=config.subscription_id,
|
|
162
|
+
location=MAIN_LOCATION,
|
|
163
|
+
project_name=project.name,
|
|
164
|
+
)
|
|
163
165
|
self._create_network_resources(
|
|
164
166
|
credential=credential,
|
|
165
167
|
subscription_id=config.subscription_id,
|
|
166
|
-
resource_group=resource_group,
|
|
168
|
+
resource_group=config.resource_group,
|
|
167
169
|
locations=config.locations,
|
|
168
170
|
create_default_network=config.vpc_ids is None,
|
|
169
171
|
)
|
|
@@ -172,7 +174,6 @@ class AzureConfigurator(Configurator):
|
|
|
172
174
|
type=self.TYPE.value,
|
|
173
175
|
config=AzureStoredConfig(
|
|
174
176
|
**AzureConfigInfo.__response__.parse_obj(config).dict(),
|
|
175
|
-
resource_group=resource_group,
|
|
176
177
|
).json(),
|
|
177
178
|
auth=DecryptedString(plaintext=AzureCreds.parse_obj(config.creds).__root__.json()),
|
|
178
179
|
)
|
|
@@ -322,6 +323,7 @@ class AzureConfigurator(Configurator):
|
|
|
322
323
|
self, config: AzureConfigInfoWithCredsPartial, credential: auth.AzureCredential
|
|
323
324
|
):
|
|
324
325
|
self._check_tags_config(config)
|
|
326
|
+
self._check_resource_group(config=config, credential=credential)
|
|
325
327
|
self._check_vpc_config(config=config, credential=credential)
|
|
326
328
|
|
|
327
329
|
def _check_tags_config(self, config: AzureConfigInfoWithCredsPartial):
|
|
@@ -336,6 +338,18 @@ class AzureConfigurator(Configurator):
|
|
|
336
338
|
except BackendError as e:
|
|
337
339
|
raise ServerClientError(e.args[0])
|
|
338
340
|
|
|
341
|
+
def _check_resource_group(
|
|
342
|
+
self, config: AzureConfigInfoWithCredsPartial, credential: auth.AzureCredential
|
|
343
|
+
):
|
|
344
|
+
if config.resource_group is None:
|
|
345
|
+
return
|
|
346
|
+
resource_manager = ResourceManager(
|
|
347
|
+
credential=credential,
|
|
348
|
+
subscription_id=config.subscription_id,
|
|
349
|
+
)
|
|
350
|
+
if not resource_manager.resource_group_exists(config.resource_group):
|
|
351
|
+
raise ServerClientError(f"Resource group {config.resource_group} not found")
|
|
352
|
+
|
|
339
353
|
def _check_vpc_config(
|
|
340
354
|
self, config: AzureConfigInfoWithCredsPartial, credential: auth.AzureCredential
|
|
341
355
|
):
|
|
@@ -406,6 +420,18 @@ class ResourceManager:
|
|
|
406
420
|
)
|
|
407
421
|
return resource_group.name
|
|
408
422
|
|
|
423
|
+
def resource_group_exists(
|
|
424
|
+
self,
|
|
425
|
+
name: str,
|
|
426
|
+
) -> bool:
|
|
427
|
+
try:
|
|
428
|
+
self.resource_client.resource_groups.get(
|
|
429
|
+
resource_group_name=name,
|
|
430
|
+
)
|
|
431
|
+
except azure.core.exceptions.ResourceNotFoundError:
|
|
432
|
+
return False
|
|
433
|
+
return True
|
|
434
|
+
|
|
409
435
|
|
|
410
436
|
class NetworkManager:
|
|
411
437
|
def __init__(self, credential: TokenCredential, subscription_id: str):
|
|
@@ -124,6 +124,15 @@ class AzureConfig(CoreModel):
|
|
|
124
124
|
type: Annotated[Literal["azure"], Field(description="The type of the backend")] = "azure"
|
|
125
125
|
tenant_id: Annotated[str, Field(description="The tenant ID")]
|
|
126
126
|
subscription_id: Annotated[str, Field(description="The subscription ID")]
|
|
127
|
+
resource_group: Annotated[
|
|
128
|
+
Optional[str],
|
|
129
|
+
Field(
|
|
130
|
+
description=(
|
|
131
|
+
"The resource group for resources created by `dstack`."
|
|
132
|
+
" If not specified, `dstack` will create a new resource group"
|
|
133
|
+
)
|
|
134
|
+
),
|
|
135
|
+
] = None
|
|
127
136
|
regions: Annotated[
|
|
128
137
|
Optional[List[str]],
|
|
129
138
|
Field(description="The list of Azure regions (locations). Omit to use all regions"),
|
|
@@ -2,7 +2,7 @@ import random
|
|
|
2
2
|
import string
|
|
3
3
|
import uuid
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
|
-
from typing import List, Optional, Tuple, Union, cast
|
|
5
|
+
from typing import List, Literal, Optional, Tuple, Union, cast
|
|
6
6
|
|
|
7
7
|
from sqlalchemy import and_, func, or_, select
|
|
8
8
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
@@ -31,6 +31,7 @@ from dstack._internal.core.models.instances import (
|
|
|
31
31
|
InstanceOfferWithAvailability,
|
|
32
32
|
InstanceStatus,
|
|
33
33
|
RemoteConnectionInfo,
|
|
34
|
+
SSHConnectionParams,
|
|
34
35
|
SSHKey,
|
|
35
36
|
)
|
|
36
37
|
from dstack._internal.core.models.pools import Instance
|
|
@@ -256,6 +257,8 @@ async def get_plan(
|
|
|
256
257
|
project=project,
|
|
257
258
|
profile=spec.merged_profile,
|
|
258
259
|
requirements=_get_fleet_requirements(spec),
|
|
260
|
+
fleet_spec=spec,
|
|
261
|
+
blocks=spec.configuration.blocks,
|
|
259
262
|
)
|
|
260
263
|
offers = [offer for _, offer in offers_with_backends]
|
|
261
264
|
_remove_fleet_spec_sensitive_info(spec)
|
|
@@ -275,11 +278,15 @@ async def get_create_instance_offers(
|
|
|
275
278
|
project: ProjectModel,
|
|
276
279
|
profile: Profile,
|
|
277
280
|
requirements: Requirements,
|
|
278
|
-
|
|
281
|
+
fleet_spec: Optional[FleetSpec] = None,
|
|
279
282
|
fleet_model: Optional[FleetModel] = None,
|
|
283
|
+
blocks: Union[int, Literal["auto"]] = 1,
|
|
284
|
+
exclude_not_available: bool = False,
|
|
280
285
|
) -> List[Tuple[Backend, InstanceOfferWithAvailability]]:
|
|
281
286
|
multinode = False
|
|
282
287
|
master_job_provisioning_data = None
|
|
288
|
+
if fleet_spec is not None:
|
|
289
|
+
multinode = fleet_spec.configuration.placement == InstanceGroupPlacement.CLUSTER
|
|
283
290
|
if fleet_model is not None:
|
|
284
291
|
fleet = fleet_model_to_fleet(fleet_model)
|
|
285
292
|
multinode = fleet.spec.configuration.placement == InstanceGroupPlacement.CLUSTER
|
|
@@ -296,6 +303,7 @@ async def get_create_instance_offers(
|
|
|
296
303
|
exclude_not_available=exclude_not_available,
|
|
297
304
|
multinode=multinode,
|
|
298
305
|
master_job_provisioning_data=master_job_provisioning_data,
|
|
306
|
+
blocks=blocks,
|
|
299
307
|
)
|
|
300
308
|
offers = [
|
|
301
309
|
(backend, offer)
|
|
@@ -406,6 +414,7 @@ async def create_fleet_instance_model(
|
|
|
406
414
|
instance_num=instance_num,
|
|
407
415
|
placement_group_name=placement_group_name,
|
|
408
416
|
reservation=reservation,
|
|
417
|
+
blocks=spec.configuration.blocks,
|
|
409
418
|
)
|
|
410
419
|
return instance_model
|
|
411
420
|
|
|
@@ -424,18 +433,33 @@ async def create_fleet_ssh_instance_model(
|
|
|
424
433
|
ssh_user = ssh_params.user
|
|
425
434
|
ssh_key = ssh_params.ssh_key
|
|
426
435
|
port = ssh_params.port
|
|
436
|
+
proxy_jump = ssh_params.proxy_jump
|
|
427
437
|
internal_ip = None
|
|
438
|
+
blocks = 1
|
|
428
439
|
else:
|
|
429
440
|
hostname = host.hostname
|
|
430
441
|
ssh_user = host.user or ssh_params.user
|
|
431
442
|
ssh_key = host.ssh_key or ssh_params.ssh_key
|
|
432
443
|
port = host.port or ssh_params.port
|
|
444
|
+
proxy_jump = host.proxy_jump or ssh_params.proxy_jump
|
|
433
445
|
internal_ip = host.internal_ip
|
|
446
|
+
blocks = host.blocks
|
|
434
447
|
|
|
435
448
|
if ssh_user is None or ssh_key is None:
|
|
436
449
|
# This should not be reachable but checked by fleet spec validation
|
|
437
450
|
raise ServerClientError("ssh key or user not specified")
|
|
438
451
|
|
|
452
|
+
if proxy_jump is not None:
|
|
453
|
+
ssh_proxy = SSHConnectionParams(
|
|
454
|
+
hostname=proxy_jump.hostname,
|
|
455
|
+
port=proxy_jump.port or 22,
|
|
456
|
+
username=proxy_jump.user,
|
|
457
|
+
)
|
|
458
|
+
ssh_proxy_keys = [proxy_jump.ssh_key]
|
|
459
|
+
else:
|
|
460
|
+
ssh_proxy = None
|
|
461
|
+
ssh_proxy_keys = None
|
|
462
|
+
|
|
439
463
|
instance_model = await pools_services.create_ssh_instance_model(
|
|
440
464
|
project=project,
|
|
441
465
|
pool=pool,
|
|
@@ -445,10 +469,13 @@ async def create_fleet_ssh_instance_model(
|
|
|
445
469
|
host=hostname,
|
|
446
470
|
ssh_user=ssh_user,
|
|
447
471
|
ssh_keys=[ssh_key],
|
|
472
|
+
ssh_proxy=ssh_proxy,
|
|
473
|
+
ssh_proxy_keys=ssh_proxy_keys,
|
|
448
474
|
env=env,
|
|
449
475
|
internal_ip=internal_ip,
|
|
450
476
|
instance_network=ssh_params.network,
|
|
451
477
|
port=port or 22,
|
|
478
|
+
blocks=blocks,
|
|
452
479
|
)
|
|
453
480
|
return instance_model
|
|
454
481
|
|
|
@@ -544,7 +571,7 @@ async def generate_fleet_name(session: AsyncSession, project: ProjectModel) -> s
|
|
|
544
571
|
|
|
545
572
|
|
|
546
573
|
def is_fleet_in_use(fleet_model: FleetModel, instance_nums: Optional[List[int]] = None) -> bool:
|
|
547
|
-
instances_in_use = [i for i in fleet_model.instances if i.
|
|
574
|
+
instances_in_use = [i for i in fleet_model.instances if i.jobs and not i.deleted]
|
|
548
575
|
selected_instance_in_use = instances_in_use
|
|
549
576
|
if instance_nums is not None:
|
|
550
577
|
selected_instance_in_use = [i for i in instances_in_use if i.instance_num in instance_nums]
|
|
@@ -606,6 +633,8 @@ async def create_instance(
|
|
|
606
633
|
instance_configuration=None,
|
|
607
634
|
termination_policy=termination_policy,
|
|
608
635
|
termination_idle_time=termination_idle_time,
|
|
636
|
+
total_blocks=1,
|
|
637
|
+
busy_blocks=0,
|
|
609
638
|
)
|
|
610
639
|
logger.info(
|
|
611
640
|
"Added a new instance %s",
|
|
@@ -74,10 +74,18 @@ class GatewayClient:
|
|
|
74
74
|
resp.raise_for_status()
|
|
75
75
|
self.is_server_ready = True
|
|
76
76
|
|
|
77
|
-
async def register_replica(
|
|
77
|
+
async def register_replica(
|
|
78
|
+
self,
|
|
79
|
+
run: Run,
|
|
80
|
+
job_submission: JobSubmission,
|
|
81
|
+
ssh_head_proxy: Optional[SSHConnectionParams],
|
|
82
|
+
ssh_head_proxy_private_key: Optional[str],
|
|
83
|
+
):
|
|
78
84
|
payload = {
|
|
79
85
|
"job_id": job_submission.id.hex,
|
|
80
86
|
"app_port": run.run_spec.configuration.port.container_port,
|
|
87
|
+
"ssh_head_proxy": ssh_head_proxy.dict() if ssh_head_proxy is not None else None,
|
|
88
|
+
"ssh_head_proxy_private_key": ssh_head_proxy_private_key,
|
|
81
89
|
}
|
|
82
90
|
jpd = job_submission.job_provisioning_data
|
|
83
91
|
if not jpd.dockerized:
|