dagster-cloud 1.10.11__py3-none-any.whl → 1.12.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_cloud/__init__.py +3 -3
- dagster_cloud/agent/dagster_cloud_agent.py +64 -20
- dagster_cloud/agent/instrumentation/run_launch.py +2 -2
- dagster_cloud/agent/instrumentation/schedule.py +1 -1
- dagster_cloud/agent/instrumentation/sensor.py +1 -1
- dagster_cloud/anomaly_detection/__init__.py +2 -2
- dagster_cloud/anomaly_detection/defs.py +11 -8
- dagster_cloud/api/dagster_cloud_api.py +7 -5
- dagster_cloud/auth/constants.py +21 -5
- dagster_cloud/batching/__init__.py +1 -1
- dagster_cloud/dagster_insights/__init__.py +12 -6
- dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py +8 -2
- dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py +4 -2
- dagster_cloud/dagster_insights/insights_utils.py +1 -1
- dagster_cloud/dagster_insights/metrics_utils.py +1 -1
- dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py +1 -9
- dagster_cloud/dagster_insights/snowflake/dbt_wrapper.py +9 -2
- dagster_cloud/dagster_insights/snowflake/definitions.py +5 -4
- dagster_cloud/dagster_insights/snowflake/insights_snowflake_resource.py +1 -2
- dagster_cloud/definitions/__init__.py +0 -0
- dagster_cloud/definitions/job_selection.py +36 -0
- dagster_cloud/execution/utils/process.py +1 -1
- dagster_cloud/instance/__init__.py +81 -42
- dagster_cloud/metadata/source_code.py +3 -1
- dagster_cloud/opentelemetry/config/exporter.py +1 -1
- dagster_cloud/opentelemetry/controller.py +1 -1
- dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +1 -1
- dagster_cloud/opentelemetry/observers/execution_observer.py +4 -2
- dagster_cloud/pex/grpc/__init__.py +2 -2
- dagster_cloud/pex/grpc/client.py +2 -2
- dagster_cloud/pex/grpc/server/__init__.py +2 -2
- dagster_cloud/pex/grpc/server/cli/__init__.py +2 -2
- dagster_cloud/pex/grpc/server/manager.py +5 -4
- dagster_cloud/pex/grpc/server/registry.py +15 -8
- dagster_cloud/pex/grpc/server/server.py +17 -8
- dagster_cloud/secrets/__init__.py +1 -1
- dagster_cloud/serverless/__init__.py +1 -1
- dagster_cloud/serverless/io_manager.py +4 -1
- dagster_cloud/storage/compute_logs/__init__.py +3 -1
- dagster_cloud/storage/compute_logs/compute_log_manager.py +17 -13
- dagster_cloud/storage/defs_state/__init__.py +3 -0
- dagster_cloud/storage/defs_state/queries.py +15 -0
- dagster_cloud/storage/defs_state/storage.py +113 -0
- dagster_cloud/storage/event_logs/__init__.py +3 -1
- dagster_cloud/storage/event_logs/storage.py +9 -2
- dagster_cloud/storage/event_logs/utils.py +1 -3
- dagster_cloud/storage/runs/__init__.py +1 -1
- dagster_cloud/storage/runs/queries.py +15 -0
- dagster_cloud/storage/runs/storage.py +30 -3
- dagster_cloud/storage/schedules/__init__.py +1 -1
- dagster_cloud/storage/schedules/storage.py +1 -1
- dagster_cloud/util/errors.py +0 -91
- dagster_cloud/version.py +1 -1
- dagster_cloud/workspace/config_schema/__init__.py +43 -5
- dagster_cloud/workspace/docker/__init__.py +8 -7
- dagster_cloud/workspace/docker/utils.py +1 -1
- dagster_cloud/workspace/ecs/__init__.py +1 -1
- dagster_cloud/workspace/ecs/client.py +23 -18
- dagster_cloud/workspace/ecs/launcher.py +19 -5
- dagster_cloud/workspace/ecs/run_launcher.py +1 -2
- dagster_cloud/workspace/ecs/utils.py +5 -2
- dagster_cloud/workspace/kubernetes/__init__.py +1 -1
- dagster_cloud/workspace/kubernetes/launcher.py +11 -12
- dagster_cloud/workspace/kubernetes/utils.py +1 -2
- dagster_cloud/workspace/user_code_launcher/__init__.py +5 -3
- dagster_cloud/workspace/user_code_launcher/process.py +2 -3
- dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +71 -34
- dagster_cloud/workspace/user_code_launcher/utils.py +7 -0
- {dagster_cloud-1.10.11.dist-info → dagster_cloud-1.12.6.dist-info}/METADATA +9 -8
- dagster_cloud-1.12.6.dist-info/RECORD +134 -0
- {dagster_cloud-1.10.11.dist-info → dagster_cloud-1.12.6.dist-info}/WHEEL +1 -1
- dagster_cloud-1.10.11.dist-info/RECORD +0 -129
- {dagster_cloud-1.10.11.dist-info → dagster_cloud-1.12.6.dist-info}/top_level.txt +0 -0
|
@@ -64,6 +64,7 @@ class Client:
|
|
|
64
64
|
grace_period: int = DEFAULT_ECS_GRACE_PERIOD,
|
|
65
65
|
launch_type: str = "FARGATE",
|
|
66
66
|
show_debug_cluster_info: bool = True,
|
|
67
|
+
assign_public_ip: Optional[bool] = None,
|
|
67
68
|
):
|
|
68
69
|
self.ecs = ecs_client if ecs_client else boto3.client("ecs", config=config)
|
|
69
70
|
self.logs = boto3.client("logs", config=config)
|
|
@@ -85,6 +86,7 @@ class Client:
|
|
|
85
86
|
self.grace_period = check.int_param(grace_period, "grace_period")
|
|
86
87
|
self.launch_type = check.str_param(launch_type, "launch_type")
|
|
87
88
|
self._namespace: Optional[str] = None
|
|
89
|
+
self._assign_public_ip_override = assign_public_ip
|
|
88
90
|
|
|
89
91
|
@property
|
|
90
92
|
def ec2(self):
|
|
@@ -110,19 +112,22 @@ class Client:
|
|
|
110
112
|
name="serviceLongArnFormat",
|
|
111
113
|
effectiveSettings=True,
|
|
112
114
|
)
|
|
113
|
-
return settings["settings"][0]["value"] == "enabled"
|
|
115
|
+
return settings["settings"][0]["value"] == "enabled"
|
|
114
116
|
|
|
115
117
|
@property
|
|
116
118
|
@cached_method
|
|
117
119
|
def network_configuration(self):
|
|
120
|
+
if self.launch_type != "FARGATE":
|
|
121
|
+
assign_public_ip = None
|
|
122
|
+
elif self._assign_public_ip_override is not None:
|
|
123
|
+
assign_public_ip = "ENABLED" if self._assign_public_ip_override else "DISABLED"
|
|
124
|
+
else:
|
|
125
|
+
assign_public_ip = self._infer_assign_public_ip()
|
|
126
|
+
|
|
118
127
|
network_configuration = {
|
|
119
128
|
"awsvpcConfiguration": {
|
|
120
129
|
"subnets": self.subnet_ids,
|
|
121
|
-
**(
|
|
122
|
-
{"assignPublicIp": self._assign_public_ip()}
|
|
123
|
-
if self.launch_type == "FARGATE"
|
|
124
|
-
else {}
|
|
125
|
-
),
|
|
130
|
+
**({"assignPublicIp": assign_public_ip} if assign_public_ip else {}),
|
|
126
131
|
},
|
|
127
132
|
}
|
|
128
133
|
|
|
@@ -155,7 +160,7 @@ class Client:
|
|
|
155
160
|
):
|
|
156
161
|
task_definition_arn = (
|
|
157
162
|
self.ecs.register_task_definition(
|
|
158
|
-
**desired_task_definition_config.task_definition_dict()
|
|
163
|
+
**desired_task_definition_config.task_definition_dict()
|
|
159
164
|
)
|
|
160
165
|
.get("taskDefinition")
|
|
161
166
|
.get("taskDefinitionArn")
|
|
@@ -461,10 +466,10 @@ class Client:
|
|
|
461
466
|
|
|
462
467
|
task_arn = (
|
|
463
468
|
self.ecs.run_task(
|
|
464
|
-
taskDefinition=task_definition_arn,
|
|
469
|
+
taskDefinition=task_definition_arn,
|
|
465
470
|
cluster=self.cluster_name,
|
|
466
|
-
launchType=self.launch_type,
|
|
467
|
-
networkConfiguration=self.network_configuration,
|
|
471
|
+
launchType=self.launch_type,
|
|
472
|
+
networkConfiguration=self.network_configuration,
|
|
468
473
|
)
|
|
469
474
|
.get("tasks", [{}])[0]
|
|
470
475
|
.get("taskArn")
|
|
@@ -472,14 +477,14 @@ class Client:
|
|
|
472
477
|
|
|
473
478
|
self.ecs.get_waiter("tasks_stopped").wait(
|
|
474
479
|
cluster=self.cluster_name,
|
|
475
|
-
tasks=[task_arn],
|
|
480
|
+
tasks=[task_arn],
|
|
476
481
|
WaiterConfig={"Delay": 1, "MaxAttempts": self.timeout},
|
|
477
482
|
)
|
|
478
483
|
|
|
479
484
|
exit_code = (
|
|
480
485
|
self.ecs.describe_tasks(
|
|
481
486
|
cluster=self.cluster_name,
|
|
482
|
-
tasks=[task_arn],
|
|
487
|
+
tasks=[task_arn],
|
|
483
488
|
)
|
|
484
489
|
.get("tasks", [{}])[0]
|
|
485
490
|
.get("containers", [{}])[0]
|
|
@@ -541,7 +546,7 @@ class Client:
|
|
|
541
546
|
for key, value in tags.items()
|
|
542
547
|
]
|
|
543
548
|
|
|
544
|
-
arn = self.ecs.create_service(**params).get("service").get("serviceArn")
|
|
549
|
+
arn = self.ecs.create_service(**params).get("service").get("serviceArn")
|
|
545
550
|
|
|
546
551
|
return Service(client=self, arn=arn)
|
|
547
552
|
|
|
@@ -716,7 +721,7 @@ class Client:
|
|
|
716
721
|
|
|
717
722
|
stopped_tasks = sorted(
|
|
718
723
|
stopped_tasks,
|
|
719
|
-
key=lambda task: task["createdAt"].timestamp(),
|
|
724
|
+
key=lambda task: task["createdAt"].timestamp(),
|
|
720
725
|
reverse=True,
|
|
721
726
|
)
|
|
722
727
|
return stopped_tasks
|
|
@@ -760,7 +765,7 @@ class Client:
|
|
|
760
765
|
if service["Name"] == service_name:
|
|
761
766
|
return service["Id"]
|
|
762
767
|
|
|
763
|
-
def
|
|
768
|
+
def _infer_assign_public_ip(self):
|
|
764
769
|
# https://docs.aws.amazon.com/AmazonECS/latest/userguide/fargate-task-networking.html
|
|
765
770
|
# Assign a public IP if any of the subnets are public
|
|
766
771
|
route_tables = self.ec2.route_tables.filter(
|
|
@@ -780,14 +785,14 @@ class Client:
|
|
|
780
785
|
task = self.ecs.describe_tasks(cluster=self.cluster_name, tasks=[task_arn]).get("tasks")[0]
|
|
781
786
|
|
|
782
787
|
task_definition_arn = task.get("taskDefinitionArn")
|
|
783
|
-
task_definition = self.ecs.describe_task_definition(taskDefinition=task_definition_arn).get(
|
|
788
|
+
task_definition = self.ecs.describe_task_definition(taskDefinition=task_definition_arn).get(
|
|
784
789
|
"taskDefinition"
|
|
785
790
|
)
|
|
786
791
|
|
|
787
792
|
matching_container_definitions = [
|
|
788
793
|
container_definition
|
|
789
794
|
for container_definition in task_definition.get("containerDefinitions", [])
|
|
790
|
-
if container_definition["name"] == container_name
|
|
795
|
+
if container_definition["name"] == container_name
|
|
791
796
|
]
|
|
792
797
|
if not matching_container_definitions:
|
|
793
798
|
raise Exception(f"Could not find container with name {container_name}")
|
|
@@ -795,7 +800,7 @@ class Client:
|
|
|
795
800
|
container_definition = matching_container_definitions[0]
|
|
796
801
|
|
|
797
802
|
log_stream_prefix = (
|
|
798
|
-
container_definition.get("logConfiguration").get("options").get("awslogs-stream-prefix")
|
|
803
|
+
container_definition.get("logConfiguration").get("options").get("awslogs-stream-prefix")
|
|
799
804
|
)
|
|
800
805
|
container_name = container_definition.get("name")
|
|
801
806
|
task_id = task_arn.split("/")[-1]
|
|
@@ -31,9 +31,15 @@ from dagster_cloud.workspace.ecs.client import (
|
|
|
31
31
|
DEFAULT_ECS_TIMEOUT,
|
|
32
32
|
ECS_EXEC_LINUX_PARAMETERS,
|
|
33
33
|
Client,
|
|
34
|
+
get_debug_ecs_prompt,
|
|
34
35
|
)
|
|
36
|
+
from dagster_cloud.workspace.ecs.run_launcher import CloudEcsRunLauncher
|
|
35
37
|
from dagster_cloud.workspace.ecs.service import Service
|
|
36
|
-
from dagster_cloud.workspace.ecs.utils import
|
|
38
|
+
from dagster_cloud.workspace.ecs.utils import (
|
|
39
|
+
get_ecs_human_readable_label,
|
|
40
|
+
get_server_task_definition_family,
|
|
41
|
+
unique_ecs_resource_name,
|
|
42
|
+
)
|
|
37
43
|
from dagster_cloud.workspace.user_code_launcher import (
|
|
38
44
|
DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
|
|
39
45
|
SHARED_USER_CODE_LAUNCHER_CONFIG,
|
|
@@ -50,10 +56,6 @@ from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
|
50
56
|
get_grpc_server_env,
|
|
51
57
|
)
|
|
52
58
|
|
|
53
|
-
from .client import get_debug_ecs_prompt
|
|
54
|
-
from .run_launcher import CloudEcsRunLauncher
|
|
55
|
-
from .utils import get_server_task_definition_family
|
|
56
|
-
|
|
57
59
|
EcsServerHandleType = Service
|
|
58
60
|
|
|
59
61
|
CONTAINER_NAME = "dagster"
|
|
@@ -90,6 +92,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
|
|
|
90
92
|
enable_ecs_exec=False,
|
|
91
93
|
server_task_definition_prefix: str = "server",
|
|
92
94
|
run_task_definition_prefix: str = "run",
|
|
95
|
+
assign_public_ip: Optional[bool] = None,
|
|
93
96
|
**kwargs,
|
|
94
97
|
):
|
|
95
98
|
self.ecs = boto3.client("ecs")
|
|
@@ -182,6 +185,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
|
|
|
182
185
|
timeout=self._ecs_timeout,
|
|
183
186
|
grace_period=self._ecs_grace_period,
|
|
184
187
|
launch_type=self.launch_type,
|
|
188
|
+
assign_public_ip=assign_public_ip,
|
|
185
189
|
)
|
|
186
190
|
super().__init__(**kwargs)
|
|
187
191
|
|
|
@@ -297,6 +301,16 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
|
|
|
297
301
|
"run_task_definition_prefix": Field(
|
|
298
302
|
str, is_required=False, default_value="dagsterrun"
|
|
299
303
|
),
|
|
304
|
+
"assign_public_ip": Field(
|
|
305
|
+
Noneable(bool),
|
|
306
|
+
is_required=False,
|
|
307
|
+
default_value=None,
|
|
308
|
+
description=(
|
|
309
|
+
"When using the FARGATE launch type, the launcher will attempt to automatically determine if it is "
|
|
310
|
+
"necessary to assign a public IP to the ECS task. In complex network topologies, this automatic "
|
|
311
|
+
"determination may not be accurate. In this case, you can explicitly set this value to True or False."
|
|
312
|
+
),
|
|
313
|
+
),
|
|
300
314
|
},
|
|
301
315
|
SHARED_ECS_CONFIG,
|
|
302
316
|
SHARED_USER_CODE_LAUNCHER_CONFIG,
|
|
@@ -2,8 +2,7 @@ import dagster._check as check
|
|
|
2
2
|
from dagster_aws.ecs import EcsRunLauncher
|
|
3
3
|
|
|
4
4
|
from dagster_cloud.instance import DagsterCloudAgentInstance
|
|
5
|
-
|
|
6
|
-
from .utils import get_run_task_definition_family
|
|
5
|
+
from dagster_cloud.workspace.ecs.utils import get_run_task_definition_family
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class CloudEcsRunLauncher(EcsRunLauncher[DagsterCloudAgentInstance]):
|
|
@@ -2,10 +2,13 @@ import hashlib
|
|
|
2
2
|
import re
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
-
from dagster._core.
|
|
5
|
+
from dagster._core.remote_origin import RemoteJobOrigin
|
|
6
6
|
from dagster_aws.ecs.utils import sanitize_family
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
9
|
+
get_human_readable_label,
|
|
10
|
+
unique_resource_name,
|
|
11
|
+
)
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
def unique_ecs_resource_name(deployment_name, location_name):
|
|
@@ -1 +1 @@
|
|
|
1
|
-
from .launcher import K8sUserCodeLauncher as K8sUserCodeLauncher
|
|
1
|
+
from dagster_cloud.workspace.kubernetes.launcher import K8sUserCodeLauncher as K8sUserCodeLauncher
|
|
@@ -33,17 +33,7 @@ from dagster_cloud.api.dagster_cloud_api import UserCodeDeploymentType
|
|
|
33
33
|
from dagster_cloud.constants import RESERVED_ENV_VAR_NAMES
|
|
34
34
|
from dagster_cloud.execution.cloud_run_launcher.k8s import CloudK8sRunLauncher
|
|
35
35
|
from dagster_cloud.execution.monitoring import CloudContainerResourceLimits
|
|
36
|
-
|
|
37
|
-
from ..user_code_launcher import (
|
|
38
|
-
DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
|
|
39
|
-
SHARED_USER_CODE_LAUNCHER_CONFIG,
|
|
40
|
-
DagsterCloudGrpcServer,
|
|
41
|
-
DagsterCloudUserCodeLauncher,
|
|
42
|
-
ServerEndpoint,
|
|
43
|
-
UserCodeLauncherEntry,
|
|
44
|
-
)
|
|
45
|
-
from ..user_code_launcher.utils import deterministic_label_for_location
|
|
46
|
-
from .utils import (
|
|
36
|
+
from dagster_cloud.workspace.kubernetes.utils import (
|
|
47
37
|
SERVICE_PORT,
|
|
48
38
|
construct_code_location_deployment,
|
|
49
39
|
construct_code_location_service,
|
|
@@ -51,11 +41,20 @@ from .utils import (
|
|
|
51
41
|
unique_k8s_resource_name,
|
|
52
42
|
wait_for_deployment_complete,
|
|
53
43
|
)
|
|
44
|
+
from dagster_cloud.workspace.user_code_launcher import (
|
|
45
|
+
DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
|
|
46
|
+
SHARED_USER_CODE_LAUNCHER_CONFIG,
|
|
47
|
+
DagsterCloudGrpcServer,
|
|
48
|
+
DagsterCloudUserCodeLauncher,
|
|
49
|
+
ServerEndpoint,
|
|
50
|
+
UserCodeLauncherEntry,
|
|
51
|
+
)
|
|
52
|
+
from dagster_cloud.workspace.user_code_launcher.utils import deterministic_label_for_location
|
|
54
53
|
|
|
55
54
|
DEFAULT_DEPLOYMENT_STARTUP_TIMEOUT = 300
|
|
56
55
|
DEFAULT_IMAGE_PULL_GRACE_PERIOD = 30
|
|
57
56
|
|
|
58
|
-
from
|
|
57
|
+
from dagster_cloud.workspace.config_schema.kubernetes import SHARED_K8S_CONFIG
|
|
59
58
|
|
|
60
59
|
|
|
61
60
|
class K8sHandle(NamedTuple):
|
|
@@ -11,8 +11,7 @@ from dagster_k8s.models import k8s_model_from_dict
|
|
|
11
11
|
from kubernetes import client
|
|
12
12
|
|
|
13
13
|
from dagster_cloud.instance import DagsterCloudAgentInstance
|
|
14
|
-
|
|
15
|
-
from ..user_code_launcher.utils import (
|
|
14
|
+
from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
16
15
|
deterministic_label_for_location,
|
|
17
16
|
get_grpc_server_env,
|
|
18
17
|
get_human_readable_label,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
from .process import
|
|
2
|
-
|
|
1
|
+
from dagster_cloud.workspace.user_code_launcher.process import (
|
|
2
|
+
ProcessUserCodeLauncher as ProcessUserCodeLauncher,
|
|
3
|
+
)
|
|
4
|
+
from dagster_cloud.workspace.user_code_launcher.user_code_launcher import (
|
|
3
5
|
DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT as DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
|
|
4
6
|
DEFAULT_SERVER_TTL_SECONDS as DEFAULT_SERVER_TTL_SECONDS,
|
|
5
7
|
SHARED_USER_CODE_LAUNCHER_CONFIG as SHARED_USER_CODE_LAUNCHER_CONFIG,
|
|
@@ -8,7 +10,7 @@ from .user_code_launcher import (
|
|
|
8
10
|
ServerEndpoint as ServerEndpoint,
|
|
9
11
|
UserCodeLauncherEntry as UserCodeLauncherEntry,
|
|
10
12
|
)
|
|
11
|
-
from .utils import (
|
|
13
|
+
from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
12
14
|
deterministic_label_for_location as deterministic_label_for_location,
|
|
13
15
|
get_human_readable_label as get_human_readable_label,
|
|
14
16
|
unique_resource_name as unique_resource_name,
|
|
@@ -26,9 +26,7 @@ from dagster_cloud.api.dagster_cloud_api import UserCodeDeploymentType
|
|
|
26
26
|
from dagster_cloud.execution.cloud_run_launcher.process import CloudProcessRunLauncher
|
|
27
27
|
from dagster_cloud.execution.monitoring import CloudContainerResourceLimits
|
|
28
28
|
from dagster_cloud.pex.grpc import MultiPexGrpcClient
|
|
29
|
-
from dagster_cloud.workspace.user_code_launcher.
|
|
30
|
-
|
|
31
|
-
from .user_code_launcher import (
|
|
29
|
+
from dagster_cloud.workspace.user_code_launcher.user_code_launcher import (
|
|
32
30
|
DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
|
|
33
31
|
SHARED_USER_CODE_LAUNCHER_CONFIG,
|
|
34
32
|
DagsterCloudGrpcServer,
|
|
@@ -36,6 +34,7 @@ from .user_code_launcher import (
|
|
|
36
34
|
ServerEndpoint,
|
|
37
35
|
UserCodeLauncherEntry,
|
|
38
36
|
)
|
|
37
|
+
from dagster_cloud.workspace.user_code_launcher.utils import get_grpc_server_env
|
|
39
38
|
|
|
40
39
|
CLEANUP_ZOMBIE_PROCESSES_INTERVAL = 5
|
|
41
40
|
|
|
@@ -27,13 +27,13 @@ from dagster._core.definitions.selector import JobSelector
|
|
|
27
27
|
from dagster._core.errors import DagsterUserCodeUnreachableError
|
|
28
28
|
from dagster._core.instance import MayHaveInstanceWeakref
|
|
29
29
|
from dagster._core.launcher import RunLauncher
|
|
30
|
-
from dagster._core.
|
|
31
|
-
from dagster._core.remote_representation.external_data import (
|
|
32
|
-
extract_serialized_job_snap_from_serialized_job_data_snap,
|
|
33
|
-
)
|
|
34
|
-
from dagster._core.remote_representation.origin import (
|
|
30
|
+
from dagster._core.remote_origin import (
|
|
35
31
|
CodeLocationOrigin,
|
|
36
32
|
RegisteredCodeLocationOrigin,
|
|
33
|
+
RemoteRepositoryOrigin,
|
|
34
|
+
)
|
|
35
|
+
from dagster._core.remote_representation.external_data import (
|
|
36
|
+
extract_serialized_job_snap_from_serialized_job_data_snap,
|
|
37
37
|
)
|
|
38
38
|
from dagster._grpc.client import DagsterGrpcClient
|
|
39
39
|
from dagster._grpc.types import GetCurrentImageResult, ListRepositoriesResponse
|
|
@@ -45,7 +45,11 @@ from dagster._serdes import (
|
|
|
45
45
|
whitelist_for_serdes,
|
|
46
46
|
)
|
|
47
47
|
from dagster._time import get_current_timestamp
|
|
48
|
-
from dagster._utils.error import
|
|
48
|
+
from dagster._utils.error import (
|
|
49
|
+
SerializableErrorInfo,
|
|
50
|
+
serializable_error_info_from_exc_info,
|
|
51
|
+
truncate_serialized_error,
|
|
52
|
+
)
|
|
49
53
|
from dagster._utils.merger import merge_dicts
|
|
50
54
|
from dagster._utils.typed_dict import init_optional_typeddict
|
|
51
55
|
from dagster_cloud_cli.core.errors import raise_http_error
|
|
@@ -91,7 +95,6 @@ from dagster_cloud.pex.grpc.types import (
|
|
|
91
95
|
ShutdownPexServerArgs,
|
|
92
96
|
)
|
|
93
97
|
from dagster_cloud.util import diff_serializable_namedtuple_map
|
|
94
|
-
from dagster_cloud.util.errors import truncate_serialized_error
|
|
95
98
|
|
|
96
99
|
DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT = 180
|
|
97
100
|
DEFAULT_MAX_TTL_SERVERS = 25
|
|
@@ -398,6 +401,7 @@ class DagsterCloudUserCodeLauncher(
|
|
|
398
401
|
self._run_worker_statuses_dict: dict[str, list[CloudRunWorkerStatus]] = {}
|
|
399
402
|
self._run_worker_monitoring_lock = threading.Lock()
|
|
400
403
|
|
|
404
|
+
self._in_progress_reconcile_start_time = time.time()
|
|
401
405
|
self._reconcile_count = 0
|
|
402
406
|
self._reconcile_grpc_metadata_shutdown_event = threading.Event()
|
|
403
407
|
self._reconcile_grpc_metadata_thread = None
|
|
@@ -641,6 +645,7 @@ class DagsterCloudUserCodeLauncher(
|
|
|
641
645
|
response = self._instance.requests_managed_retries_session.put(
|
|
642
646
|
url=upload_data.presigned_put_url,
|
|
643
647
|
data=file,
|
|
648
|
+
timeout=self._instance.dagster_cloud_api_timeout,
|
|
644
649
|
)
|
|
645
650
|
raise_http_error(response)
|
|
646
651
|
|
|
@@ -884,23 +889,32 @@ class DagsterCloudUserCodeLauncher(
|
|
|
884
889
|
repository_name,
|
|
885
890
|
code_pointer,
|
|
886
891
|
) in list_repositories_response.repository_code_pointer_dict.items():
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
892
|
+
if os.getenv("DAGSTER_CLOUD_USE_STREAMING_EXTERNAL_REPOSITORY"):
|
|
893
|
+
external_repository_chunks = [
|
|
894
|
+
chunk
|
|
895
|
+
async for chunk in client.gen_streaming_external_repository(
|
|
896
|
+
remote_repository_origin=RemoteRepositoryOrigin(
|
|
897
|
+
location_origin,
|
|
898
|
+
repository_name,
|
|
899
|
+
),
|
|
900
|
+
defer_snapshots=True,
|
|
901
|
+
)
|
|
902
|
+
]
|
|
903
|
+
|
|
904
|
+
serialized_repository_data = "".join(
|
|
905
|
+
[
|
|
906
|
+
chunk["serialized_external_repository_chunk"]
|
|
907
|
+
for chunk in external_repository_chunks
|
|
908
|
+
]
|
|
909
|
+
)
|
|
910
|
+
else:
|
|
911
|
+
serialized_repository_data = await client.gen_external_repository(
|
|
890
912
|
remote_repository_origin=RemoteRepositoryOrigin(
|
|
891
913
|
location_origin,
|
|
892
914
|
repository_name,
|
|
893
915
|
),
|
|
894
916
|
defer_snapshots=True,
|
|
895
917
|
)
|
|
896
|
-
]
|
|
897
|
-
|
|
898
|
-
serialized_repository_data = "".join(
|
|
899
|
-
[
|
|
900
|
-
chunk["serialized_external_repository_chunk"]
|
|
901
|
-
for chunk in external_repository_chunks
|
|
902
|
-
]
|
|
903
|
-
)
|
|
904
918
|
|
|
905
919
|
# Don't deserialize in case there are breaking changes - let the server do it
|
|
906
920
|
upload_repo_datas.append(
|
|
@@ -1476,6 +1490,8 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1476
1490
|
self._logger.exception("Failed to refresh actual entries.")
|
|
1477
1491
|
self._last_refreshed_actual_entries = now
|
|
1478
1492
|
|
|
1493
|
+
self._in_progress_reconcile_start_time = time.time()
|
|
1494
|
+
|
|
1479
1495
|
self._reconcile(
|
|
1480
1496
|
desired_entries,
|
|
1481
1497
|
upload_locations,
|
|
@@ -1492,6 +1508,7 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1492
1508
|
f"Started polling for requests from {self._instance.dagster_cloud_url}"
|
|
1493
1509
|
)
|
|
1494
1510
|
|
|
1511
|
+
self._in_progress_reconcile_start_time = None
|
|
1495
1512
|
self._reconcile_count += 1
|
|
1496
1513
|
|
|
1497
1514
|
def _update_metrics_thread(self, shutdown_event):
|
|
@@ -1516,9 +1533,29 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1516
1533
|
# thread-safe since reconcile_count is an integer
|
|
1517
1534
|
return self._reconcile_count > 0
|
|
1518
1535
|
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
return
|
|
1536
|
+
@property
|
|
1537
|
+
def in_progress_reconcile_start_time(self) -> Optional[float]:
|
|
1538
|
+
return self._in_progress_reconcile_start_time
|
|
1539
|
+
|
|
1540
|
+
def _make_check_on_running_server_endpoint(
|
|
1541
|
+
self, server_endpoint: ServerEndpoint
|
|
1542
|
+
) -> Callable[[], Union[ListRepositoriesResponse, SerializableErrorInfo]]:
|
|
1543
|
+
return lambda: deserialize_value(
|
|
1544
|
+
server_endpoint.create_client().list_repositories(),
|
|
1545
|
+
(ListRepositoriesResponse, SerializableErrorInfo),
|
|
1546
|
+
)
|
|
1547
|
+
|
|
1548
|
+
def _trigger_recovery_server_restart(self, deployment_location: DeploymentAndLocation):
|
|
1549
|
+
del self._actual_entries[deployment_location]
|
|
1550
|
+
|
|
1551
|
+
if deployment_location in self._first_unavailable_times:
|
|
1552
|
+
del self._first_unavailable_times[deployment_location]
|
|
1553
|
+
|
|
1554
|
+
# redeploy the multipex server in this case as well to ensure a fresh start
|
|
1555
|
+
# if it resource contrained (and ensure that we don't try to create the same
|
|
1556
|
+
# PexServerHandle again and delete the code location in a loop)
|
|
1557
|
+
if deployment_location in self._multipex_servers:
|
|
1558
|
+
del self._multipex_servers[deployment_location]
|
|
1522
1559
|
|
|
1523
1560
|
def _refresh_actual_entries(self) -> None:
|
|
1524
1561
|
for deployment_location, multipex_server in self._multipex_servers.items():
|
|
@@ -1549,11 +1586,11 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1549
1586
|
if isinstance(grpc_server_or_error, DagsterCloudGrpcServer):
|
|
1550
1587
|
self._logger.warning(
|
|
1551
1588
|
"Pex servers disappeared for running code location %s:%s. Removing actual entries to"
|
|
1552
|
-
" activate reconciliation logic.",
|
|
1589
|
+
" activate reconciliation logic and deploy a new code server and multipex server.",
|
|
1553
1590
|
deployment_name,
|
|
1554
1591
|
location_name,
|
|
1555
1592
|
)
|
|
1556
|
-
|
|
1593
|
+
self._trigger_recovery_server_restart(deployment_location)
|
|
1557
1594
|
|
|
1558
1595
|
# Check to see if any servers have become unresponsive
|
|
1559
1596
|
unavailable_server_timeout = int(
|
|
@@ -1596,15 +1633,22 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1596
1633
|
|
|
1597
1634
|
deployment_name, location_name = deployment_location
|
|
1598
1635
|
try:
|
|
1599
|
-
future.result()
|
|
1600
|
-
|
|
1636
|
+
response_or_error = future.result()
|
|
1601
1637
|
# Successful ping resets the tracked last unavailable time for this code server, if set
|
|
1602
1638
|
self._first_unavailable_times.pop(deployment_location, None)
|
|
1639
|
+
if isinstance(response_or_error, SerializableErrorInfo):
|
|
1640
|
+
# This can happen if the server was previously healthy but restarted
|
|
1641
|
+
# and moved into an error state - attempt to recover
|
|
1642
|
+
self._logger.exception(
|
|
1643
|
+
f"Code server for {deployment_name}:{location_name} unexpectedly moved into an error state. Deploying a new code server. Observed error: \n{response_or_error.to_string()}"
|
|
1644
|
+
)
|
|
1645
|
+
self._trigger_recovery_server_restart(deployment_location)
|
|
1603
1646
|
except Exception as e:
|
|
1604
1647
|
if (
|
|
1605
1648
|
isinstance(e, DagsterUserCodeUnreachableError)
|
|
1606
1649
|
and isinstance(e.__cause__, grpc.RpcError)
|
|
1607
|
-
and cast("grpc.RpcError", e.__cause__).code()
|
|
1650
|
+
and cast("grpc.RpcError", e.__cause__).code()
|
|
1651
|
+
in {grpc.StatusCode.UNAVAILABLE, grpc.StatusCode.UNKNOWN}
|
|
1608
1652
|
):
|
|
1609
1653
|
first_unavailable_time = self._first_unavailable_times.get(
|
|
1610
1654
|
deployment_location
|
|
@@ -1622,14 +1666,7 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1622
1666
|
self._logger.warning(
|
|
1623
1667
|
f"Code server for {deployment_name}:{location_name} has been unresponsive for more than {unavailable_server_timeout} seconds. Deploying a new code server."
|
|
1624
1668
|
)
|
|
1625
|
-
|
|
1626
|
-
del self._first_unavailable_times[deployment_location]
|
|
1627
|
-
|
|
1628
|
-
# redeploy the multipex server in this case as well to ensure a fresh start
|
|
1629
|
-
# (and ensure that we don't try to create the same PexServerHandle again and
|
|
1630
|
-
# delete the code location in a loop)
|
|
1631
|
-
if deployment_location in self._multipex_servers:
|
|
1632
|
-
del self._multipex_servers[deployment_location]
|
|
1669
|
+
self._trigger_recovery_server_restart(deployment_location)
|
|
1633
1670
|
|
|
1634
1671
|
else:
|
|
1635
1672
|
self._logger.exception(
|
|
@@ -125,4 +125,11 @@ def get_grpc_server_env(
|
|
|
125
125
|
if code_location_deploy_data.executable_path
|
|
126
126
|
else {}
|
|
127
127
|
),
|
|
128
|
+
**(
|
|
129
|
+
{
|
|
130
|
+
"DAGSTER_CLI_API_GRPC_AUTOLOAD_DEFS_MODULE_NAME": code_location_deploy_data.autoload_defs_module_name
|
|
131
|
+
}
|
|
132
|
+
if code_location_deploy_data.autoload_defs_module_name
|
|
133
|
+
else {}
|
|
134
|
+
),
|
|
128
135
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-cloud
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.12.6
|
|
4
4
|
Author-email: Elementl <support@elementl.com>
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Project-URL: Homepage, https://dagster.io/cloud
|
|
@@ -26,15 +26,16 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
26
26
|
Classifier: Topic :: System :: Monitoring
|
|
27
27
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
28
28
|
Classifier: Operating System :: OS Independent
|
|
29
|
-
Requires-Python: <3.
|
|
29
|
+
Requires-Python: <3.14,>=3.9
|
|
30
30
|
Description-Content-Type: text/markdown
|
|
31
|
-
Requires-Dist: dagster==1.
|
|
32
|
-
Requires-Dist: dagster-
|
|
31
|
+
Requires-Dist: dagster==1.12.6
|
|
32
|
+
Requires-Dist: dagster-shared==1.12.6
|
|
33
|
+
Requires-Dist: dagster-cloud-cli==1.12.6
|
|
33
34
|
Requires-Dist: opentelemetry-api<2,>=1.27.0
|
|
34
35
|
Requires-Dist: opentelemetry-sdk<2,>=1.27.0
|
|
35
36
|
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc<2,>=1.27.0
|
|
36
37
|
Requires-Dist: opentelemetry-exporter-otlp-proto-http<2,>=1.27.0
|
|
37
|
-
Requires-Dist: pex<
|
|
38
|
+
Requires-Dist: pex<2.60.0,>=2.1.132
|
|
38
39
|
Requires-Dist: questionary
|
|
39
40
|
Requires-Dist: requests
|
|
40
41
|
Requires-Dist: typer
|
|
@@ -64,12 +65,12 @@ Provides-Extra: insights
|
|
|
64
65
|
Requires-Dist: pyarrow; extra == "insights"
|
|
65
66
|
Provides-Extra: docker
|
|
66
67
|
Requires-Dist: docker; extra == "docker"
|
|
67
|
-
Requires-Dist: dagster-docker==0.
|
|
68
|
+
Requires-Dist: dagster-docker==0.28.6; extra == "docker"
|
|
68
69
|
Provides-Extra: kubernetes
|
|
69
70
|
Requires-Dist: kubernetes; extra == "kubernetes"
|
|
70
|
-
Requires-Dist: dagster-k8s==0.
|
|
71
|
+
Requires-Dist: dagster-k8s==0.28.6; extra == "kubernetes"
|
|
71
72
|
Provides-Extra: ecs
|
|
72
|
-
Requires-Dist: dagster-aws==0.
|
|
73
|
+
Requires-Dist: dagster-aws==0.28.6; extra == "ecs"
|
|
73
74
|
Requires-Dist: boto3; extra == "ecs"
|
|
74
75
|
Provides-Extra: sandbox
|
|
75
76
|
Requires-Dist: supervisor; extra == "sandbox"
|