dagster-cloud 1.12.10__py3-none-any.whl → 1.12.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_cloud/agent/dagster_cloud_agent.py +43 -86
- dagster_cloud/instance/__init__.py +1 -28
- dagster_cloud/pex/grpc/server/manager.py +3 -3
- dagster_cloud/version.py +1 -1
- dagster_cloud/workspace/docker/__init__.py +2 -1
- dagster_cloud/workspace/ecs/client.py +1 -1
- dagster_cloud/workspace/ecs/launcher.py +8 -4
- dagster_cloud/workspace/kubernetes/launcher.py +5 -3
- dagster_cloud/workspace/kubernetes/utils.py +6 -3
- dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +41 -104
- dagster_cloud/workspace/user_code_launcher/utils.py +14 -0
- {dagster_cloud-1.12.10.dist-info → dagster_cloud-1.12.12.dist-info}/METADATA +16 -13
- {dagster_cloud-1.12.10.dist-info → dagster_cloud-1.12.12.dist-info}/RECORD +15 -37
- {dagster_cloud-1.12.10.dist-info → dagster_cloud-1.12.12.dist-info}/WHEEL +1 -1
- dagster_cloud/agent/instrumentation/__init__.py +0 -0
- dagster_cloud/agent/instrumentation/constants.py +0 -2
- dagster_cloud/agent/instrumentation/run_launch.py +0 -23
- dagster_cloud/agent/instrumentation/schedule.py +0 -34
- dagster_cloud/agent/instrumentation/sensor.py +0 -34
- dagster_cloud/opentelemetry/__init__.py +0 -0
- dagster_cloud/opentelemetry/config/__init__.py +0 -73
- dagster_cloud/opentelemetry/config/exporter.py +0 -81
- dagster_cloud/opentelemetry/config/log_record_processor.py +0 -40
- dagster_cloud/opentelemetry/config/logging_handler.py +0 -14
- dagster_cloud/opentelemetry/config/meter_provider.py +0 -9
- dagster_cloud/opentelemetry/config/metric_reader.py +0 -39
- dagster_cloud/opentelemetry/controller.py +0 -319
- dagster_cloud/opentelemetry/enum.py +0 -58
- dagster_cloud/opentelemetry/factories/__init__.py +0 -1
- dagster_cloud/opentelemetry/factories/logs.py +0 -113
- dagster_cloud/opentelemetry/factories/metrics.py +0 -121
- dagster_cloud/opentelemetry/metrics/__init__.py +0 -0
- dagster_cloud/opentelemetry/metrics/meter.py +0 -140
- dagster_cloud/opentelemetry/observers/__init__.py +0 -0
- dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +0 -40
- dagster_cloud/opentelemetry/observers/execution_observer.py +0 -178
- {dagster_cloud-1.12.10.dist-info → dagster_cloud-1.12.12.dist-info}/top_level.txt +0 -0
|
@@ -30,11 +30,8 @@ from dagster._utils.merger import merge_dicts
|
|
|
30
30
|
from dagster._utils.typed_dict import init_optional_typeddict
|
|
31
31
|
from dagster_cloud_cli.core.errors import DagsterCloudHTTPError, raise_http_error
|
|
32
32
|
from dagster_cloud_cli.core.workspace import CodeLocationDeployData
|
|
33
|
+
from dagster_shared.record import replace
|
|
33
34
|
|
|
34
|
-
from dagster_cloud.agent.instrumentation.constants import DAGSTER_CLOUD_AGENT_METRIC_PREFIX
|
|
35
|
-
from dagster_cloud.agent.instrumentation.run_launch import extract_run_attributes
|
|
36
|
-
from dagster_cloud.agent.instrumentation.schedule import inspect_schedule_result
|
|
37
|
-
from dagster_cloud.agent.instrumentation.sensor import inspect_sensor_result
|
|
38
35
|
from dagster_cloud.agent.queries import (
|
|
39
36
|
ADD_AGENT_HEARTBEATS_MUTATION,
|
|
40
37
|
DEPLOYMENTS_QUERY,
|
|
@@ -58,7 +55,6 @@ from dagster_cloud.api.dagster_cloud_api import (
|
|
|
58
55
|
)
|
|
59
56
|
from dagster_cloud.batching import Batcher
|
|
60
57
|
from dagster_cloud.instance import DagsterCloudAgentInstance
|
|
61
|
-
from dagster_cloud.opentelemetry.observers.execution_observer import observe_execution
|
|
62
58
|
from dagster_cloud.util import SERVER_HANDLE_TAG, compressed_namedtuple_upload_file, is_isolated_run
|
|
63
59
|
from dagster_cloud.version import __version__
|
|
64
60
|
from dagster_cloud.workspace.user_code_launcher import (
|
|
@@ -907,8 +903,9 @@ class DagsterCloudAgent:
|
|
|
907
903
|
user_code_launcher, deployment_name, cast("str", location_name)
|
|
908
904
|
)
|
|
909
905
|
serialized_snapshot_or_error = client.execution_plan_snapshot(
|
|
910
|
-
execution_plan_snapshot_args=
|
|
911
|
-
|
|
906
|
+
execution_plan_snapshot_args=replace(
|
|
907
|
+
request.request_args,
|
|
908
|
+
instance_ref=self._get_user_code_instance_ref(deployment_name),
|
|
912
909
|
)
|
|
913
910
|
)
|
|
914
911
|
return DagsterCloudApiGrpcResponse(
|
|
@@ -974,64 +971,34 @@ class DagsterCloudAgent:
|
|
|
974
971
|
user_code_launcher, deployment_name, cast("str", location_name)
|
|
975
972
|
)
|
|
976
973
|
|
|
977
|
-
args =
|
|
978
|
-
|
|
974
|
+
args = replace(
|
|
975
|
+
request.request_args,
|
|
976
|
+
instance_ref=self._get_user_code_instance_ref(deployment_name),
|
|
979
977
|
)
|
|
980
978
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
with observe_execution(
|
|
989
|
-
opentelemetry=self._instance.opentelemetry,
|
|
990
|
-
event_key=f"{DAGSTER_CLOUD_AGENT_METRIC_PREFIX}.schedule.evaluation",
|
|
991
|
-
short_description="schedule evaluation requests",
|
|
992
|
-
attributes=schedule_attributes,
|
|
993
|
-
result_evaluator_callback=inspect_schedule_result,
|
|
994
|
-
) as observer:
|
|
995
|
-
serialized_schedule_data_or_error = client.external_schedule_execution(
|
|
996
|
-
external_schedule_execution_args=args,
|
|
997
|
-
)
|
|
998
|
-
observer.evaluate_result(
|
|
999
|
-
serialized_data_or_error=serialized_schedule_data_or_error,
|
|
1000
|
-
)
|
|
1001
|
-
return DagsterCloudApiGrpcResponse(
|
|
1002
|
-
serialized_response_or_error=serialized_schedule_data_or_error
|
|
1003
|
-
)
|
|
979
|
+
serialized_schedule_data_or_error = client.external_schedule_execution(
|
|
980
|
+
external_schedule_execution_args=args,
|
|
981
|
+
)
|
|
982
|
+
return DagsterCloudApiGrpcResponse(
|
|
983
|
+
serialized_response_or_error=serialized_schedule_data_or_error
|
|
984
|
+
)
|
|
1004
985
|
|
|
1005
986
|
elif api_name == DagsterCloudApi.GET_EXTERNAL_SENSOR_EXECUTION_DATA:
|
|
1006
987
|
client = self._get_grpc_client(
|
|
1007
988
|
user_code_launcher, deployment_name, cast("str", location_name)
|
|
1008
989
|
)
|
|
1009
990
|
|
|
1010
|
-
args =
|
|
1011
|
-
|
|
991
|
+
args = replace(
|
|
992
|
+
request.request_args,
|
|
993
|
+
instance_ref=self._get_user_code_instance_ref(deployment_name),
|
|
1012
994
|
)
|
|
1013
995
|
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
with observe_execution(
|
|
1022
|
-
opentelemetry=self._instance.opentelemetry,
|
|
1023
|
-
event_key=f"{DAGSTER_CLOUD_AGENT_METRIC_PREFIX}.sensor.evaluation",
|
|
1024
|
-
short_description="sensor evaluation requests",
|
|
1025
|
-
attributes=sensor_attributes,
|
|
1026
|
-
result_evaluator_callback=inspect_sensor_result,
|
|
1027
|
-
) as observer:
|
|
1028
|
-
serialized_sensor_data_or_error = client.external_sensor_execution(
|
|
1029
|
-
sensor_execution_args=args,
|
|
1030
|
-
)
|
|
1031
|
-
observer.evaluate_result(serialized_sensor_data_or_error)
|
|
1032
|
-
return DagsterCloudApiGrpcResponse(
|
|
1033
|
-
serialized_response_or_error=serialized_sensor_data_or_error
|
|
1034
|
-
)
|
|
996
|
+
serialized_sensor_data_or_error = client.external_sensor_execution(
|
|
997
|
+
sensor_execution_args=args,
|
|
998
|
+
)
|
|
999
|
+
return DagsterCloudApiGrpcResponse(
|
|
1000
|
+
serialized_response_or_error=serialized_sensor_data_or_error
|
|
1001
|
+
)
|
|
1035
1002
|
elif api_name == DagsterCloudApi.GET_EXTERNAL_NOTEBOOK_DATA:
|
|
1036
1003
|
client = self._get_grpc_client(
|
|
1037
1004
|
user_code_launcher, deployment_name, cast("str", location_name)
|
|
@@ -1062,42 +1029,32 @@ class DagsterCloudAgent:
|
|
|
1062
1029
|
),
|
|
1063
1030
|
)
|
|
1064
1031
|
|
|
1065
|
-
|
|
1066
|
-
with observe_execution(
|
|
1067
|
-
opentelemetry=self._instance.opentelemetry,
|
|
1068
|
-
event_key=f"{DAGSTER_CLOUD_AGENT_METRIC_PREFIX}.run.launches",
|
|
1069
|
-
short_description="run execution requests",
|
|
1070
|
-
attributes=run_attributes,
|
|
1071
|
-
) as observer:
|
|
1072
|
-
launcher = scoped_instance.get_run_launcher_for_run(run) # type: ignore # (instance subclass)
|
|
1032
|
+
launcher = scoped_instance.get_run_launcher_for_run(run) # type: ignore # (instance subclass)
|
|
1073
1033
|
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1034
|
+
if is_isolated_run(run):
|
|
1035
|
+
launcher.launch_run(LaunchRunContext(dagster_run=run, workspace=None))
|
|
1036
|
+
else:
|
|
1037
|
+
scoped_instance.report_engine_event(
|
|
1038
|
+
f"Launching {run.run_id} without an isolated run environment.",
|
|
1039
|
+
run,
|
|
1040
|
+
cls=self.__class__,
|
|
1041
|
+
)
|
|
1082
1042
|
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1043
|
+
run_location_name = cast(
|
|
1044
|
+
"str",
|
|
1045
|
+
run.remote_job_origin.repository_origin.code_location_origin.location_name,
|
|
1046
|
+
)
|
|
1087
1047
|
|
|
1088
|
-
|
|
1089
|
-
deployment_name, run_location_name
|
|
1090
|
-
)
|
|
1048
|
+
server = user_code_launcher.get_grpc_server(deployment_name, run_location_name)
|
|
1091
1049
|
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1050
|
+
# Record the server handle that we launched it on to for run monitoring
|
|
1051
|
+
scoped_instance.add_run_tags(
|
|
1052
|
+
run.run_id, new_tags={SERVER_HANDLE_TAG: str(server.server_handle)}
|
|
1053
|
+
)
|
|
1096
1054
|
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
observer.evaluate_result(run=run)
|
|
1055
|
+
launcher.launch_run_from_grpc_client(
|
|
1056
|
+
scoped_instance, run, server.server_endpoint.create_client()
|
|
1057
|
+
)
|
|
1101
1058
|
|
|
1102
1059
|
return DagsterCloudApiSuccess()
|
|
1103
1060
|
elif api_name == DagsterCloudApi.TERMINATE_RUN:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import socket
|
|
3
3
|
import uuid
|
|
4
|
-
from collections.abc import
|
|
4
|
+
from collections.abc import Sequence
|
|
5
5
|
from contextlib import ExitStack
|
|
6
6
|
from functools import lru_cache
|
|
7
7
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
@@ -34,11 +34,8 @@ from urllib3 import Retry
|
|
|
34
34
|
|
|
35
35
|
from dagster_cloud.agent import AgentQueuesConfig
|
|
36
36
|
from dagster_cloud.auth.constants import decode_agent_token
|
|
37
|
-
from dagster_cloud.opentelemetry.config import opentelemetry_config_schema
|
|
38
|
-
from dagster_cloud.opentelemetry.controller import OpenTelemetryController
|
|
39
37
|
from dagster_cloud.storage.client import dagster_cloud_api_config
|
|
40
38
|
from dagster_cloud.util import get_env_names_from_config, is_isolated_run
|
|
41
|
-
from dagster_cloud.version import __version__
|
|
42
39
|
|
|
43
40
|
if TYPE_CHECKING:
|
|
44
41
|
from requests import Session
|
|
@@ -83,7 +80,6 @@ class DagsterCloudAgentInstance(DagsterCloudInstance):
|
|
|
83
80
|
allowed_full_deployment_locations=None,
|
|
84
81
|
allowed_branch_deployment_locations=None,
|
|
85
82
|
agent_metrics=None,
|
|
86
|
-
opentelemetry=None,
|
|
87
83
|
**kwargs,
|
|
88
84
|
):
|
|
89
85
|
super().__init__(*args, **kwargs)
|
|
@@ -155,12 +151,6 @@ class DagsterCloudAgentInstance(DagsterCloudInstance):
|
|
|
155
151
|
allowed_branch_deployment_locations
|
|
156
152
|
)
|
|
157
153
|
|
|
158
|
-
self._opentelemetry_config: Optional[Mapping[str, Any]] = self._get_processed_config(
|
|
159
|
-
"opentelemetry", opentelemetry, opentelemetry_config_schema()
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
self._opentelemetry_controller: Optional[OpenTelemetryController] = None
|
|
163
|
-
|
|
164
154
|
self._instance_uuid = str(uuid.uuid4())
|
|
165
155
|
|
|
166
156
|
@property
|
|
@@ -582,9 +572,6 @@ instance_class:
|
|
|
582
572
|
is_required=False,
|
|
583
573
|
description="List of allowed location names for branch deployments",
|
|
584
574
|
),
|
|
585
|
-
"opentelemetry": Field(
|
|
586
|
-
opentelemetry_config_schema(), is_required=False, default_value={"enabled": False}
|
|
587
|
-
),
|
|
588
575
|
}
|
|
589
576
|
|
|
590
577
|
@classmethod
|
|
@@ -667,9 +654,6 @@ instance_class:
|
|
|
667
654
|
|
|
668
655
|
def dispose(self) -> None:
|
|
669
656
|
super().dispose()
|
|
670
|
-
if self._opentelemetry_controller:
|
|
671
|
-
self._opentelemetry_controller.dispose()
|
|
672
|
-
self._opentelemetry_controller = None
|
|
673
657
|
self._exit_stack.close()
|
|
674
658
|
|
|
675
659
|
@property
|
|
@@ -690,17 +674,6 @@ instance_class:
|
|
|
690
674
|
# potentially overridden interval in the serverless user code launcher
|
|
691
675
|
return 30
|
|
692
676
|
|
|
693
|
-
@property
|
|
694
|
-
def opentelemetry(self) -> OpenTelemetryController:
|
|
695
|
-
if not self._opentelemetry_controller:
|
|
696
|
-
self._opentelemetry_controller = OpenTelemetryController(
|
|
697
|
-
instance_id=self.instance_uuid,
|
|
698
|
-
version=__version__,
|
|
699
|
-
config=self._opentelemetry_config,
|
|
700
|
-
)
|
|
701
|
-
|
|
702
|
-
return self._opentelemetry_controller
|
|
703
|
-
|
|
704
677
|
|
|
705
678
|
@lru_cache(maxsize=100) # Scales on order of active branch deployments
|
|
706
679
|
def _cached_inject_deployment(
|
|
@@ -16,7 +16,7 @@ from dagster._utils.error import SerializableErrorInfo, serializable_error_info_
|
|
|
16
16
|
from dagster_cloud_cli.core.workspace import CodeLocationDeployData, PexMetadata
|
|
17
17
|
from dagster_shared import seven
|
|
18
18
|
from dagster_shared.ipc import open_ipc_subprocess
|
|
19
|
-
from pydantic import BaseModel
|
|
19
|
+
from pydantic import BaseModel
|
|
20
20
|
|
|
21
21
|
from dagster_cloud.pex.grpc.server.registry import PexS3Registry
|
|
22
22
|
from dagster_cloud.pex.grpc.types import PexServerHandle
|
|
@@ -25,7 +25,7 @@ from dagster_cloud.workspace.user_code_launcher.utils import get_grpc_server_env
|
|
|
25
25
|
logger = logging.getLogger("dagster.multipex")
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
class PexProcessEntry(BaseModel, frozen=True, extra=
|
|
28
|
+
class PexProcessEntry(BaseModel, frozen=True, extra="forbid", arbitrary_types_allowed=True):
|
|
29
29
|
pex_server_handle: PexServerHandle
|
|
30
30
|
grpc_server_process: subprocess.Popen
|
|
31
31
|
grpc_client: DagsterGrpcClient
|
|
@@ -33,7 +33,7 @@ class PexProcessEntry(BaseModel, frozen=True, extra=Extra.forbid, arbitrary_type
|
|
|
33
33
|
heartbeat_thread: threading.Thread
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
class PexErrorEntry(BaseModel, frozen=True, extra=
|
|
36
|
+
class PexErrorEntry(BaseModel, frozen=True, extra="forbid", arbitrary_types_allowed=True):
|
|
37
37
|
pex_server_handle: PexServerHandle
|
|
38
38
|
error: SerializableErrorInfo
|
|
39
39
|
|
dagster_cloud/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.12.
|
|
1
|
+
__version__ = "1.12.12"
|
|
@@ -40,6 +40,7 @@ from dagster_cloud.workspace.user_code_launcher import (
|
|
|
40
40
|
from dagster_cloud.workspace.user_code_launcher.user_code_launcher import UserCodeLauncherEntry
|
|
41
41
|
from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
42
42
|
deterministic_label_for_location,
|
|
43
|
+
get_code_server_port,
|
|
43
44
|
get_grpc_server_env,
|
|
44
45
|
)
|
|
45
46
|
|
|
@@ -293,7 +294,7 @@ class DockerUserCodeLauncher(
|
|
|
293
294
|
|
|
294
295
|
has_network = len(self._networks) > 0
|
|
295
296
|
if has_network:
|
|
296
|
-
grpc_port =
|
|
297
|
+
grpc_port = get_code_server_port()
|
|
297
298
|
hostname = container_name
|
|
298
299
|
else:
|
|
299
300
|
grpc_port = find_free_port()
|
|
@@ -17,7 +17,7 @@ from dagster_aws.ecs.utils import is_transient_task_stopped_reason, task_definit
|
|
|
17
17
|
|
|
18
18
|
from dagster_cloud.workspace.ecs.service import Service
|
|
19
19
|
|
|
20
|
-
DEFAULT_ECS_TIMEOUT =
|
|
20
|
+
DEFAULT_ECS_TIMEOUT = 600
|
|
21
21
|
DEFAULT_ECS_GRACE_PERIOD = 30
|
|
22
22
|
|
|
23
23
|
STOPPED_TASK_GRACE_PERIOD = 30
|
|
@@ -53,13 +53,13 @@ from dagster_cloud.workspace.user_code_launcher.user_code_launcher import (
|
|
|
53
53
|
)
|
|
54
54
|
from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
55
55
|
deterministic_label_for_location,
|
|
56
|
+
get_code_server_port,
|
|
56
57
|
get_grpc_server_env,
|
|
57
58
|
)
|
|
58
59
|
|
|
59
60
|
EcsServerHandleType = Service
|
|
60
61
|
|
|
61
62
|
CONTAINER_NAME = "dagster"
|
|
62
|
-
PORT = 4000
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], ConfigurableClass):
|
|
@@ -405,7 +405,8 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
|
|
|
405
405
|
|
|
406
406
|
if metadata.pex_metadata:
|
|
407
407
|
command = metadata.get_multipex_server_command(
|
|
408
|
-
|
|
408
|
+
get_code_server_port(),
|
|
409
|
+
metrics_enabled=self._instance.user_code_launcher.code_server_metrics_enabled,
|
|
409
410
|
)
|
|
410
411
|
additional_env = metadata.get_multipex_server_env()
|
|
411
412
|
tags = {
|
|
@@ -418,7 +419,10 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
|
|
|
418
419
|
metrics_enabled=self._instance.user_code_launcher.code_server_metrics_enabled
|
|
419
420
|
)
|
|
420
421
|
additional_env = get_grpc_server_env(
|
|
421
|
-
metadata,
|
|
422
|
+
metadata,
|
|
423
|
+
get_code_server_port(),
|
|
424
|
+
location_name,
|
|
425
|
+
self._instance.ref_for_deployment(deployment_name),
|
|
422
426
|
)
|
|
423
427
|
tags = {
|
|
424
428
|
"dagster/grpc_server": "1",
|
|
@@ -524,7 +528,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
|
|
|
524
528
|
|
|
525
529
|
endpoint = ServerEndpoint(
|
|
526
530
|
host=service.hostname,
|
|
527
|
-
port=
|
|
531
|
+
port=get_code_server_port(),
|
|
528
532
|
socket=None,
|
|
529
533
|
)
|
|
530
534
|
|
|
@@ -34,7 +34,6 @@ from dagster_cloud.constants import RESERVED_ENV_VAR_NAMES
|
|
|
34
34
|
from dagster_cloud.execution.cloud_run_launcher.k8s import CloudK8sRunLauncher
|
|
35
35
|
from dagster_cloud.execution.monitoring import CloudContainerResourceLimits
|
|
36
36
|
from dagster_cloud.workspace.kubernetes.utils import (
|
|
37
|
-
SERVICE_PORT,
|
|
38
37
|
construct_code_location_deployment,
|
|
39
38
|
construct_code_location_service,
|
|
40
39
|
get_deployment_failure_debug_info,
|
|
@@ -49,7 +48,10 @@ from dagster_cloud.workspace.user_code_launcher import (
|
|
|
49
48
|
ServerEndpoint,
|
|
50
49
|
UserCodeLauncherEntry,
|
|
51
50
|
)
|
|
52
|
-
from dagster_cloud.workspace.user_code_launcher.utils import
|
|
51
|
+
from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
52
|
+
deterministic_label_for_location,
|
|
53
|
+
get_code_server_port,
|
|
54
|
+
)
|
|
53
55
|
|
|
54
56
|
DEFAULT_DEPLOYMENT_STARTUP_TIMEOUT = 300
|
|
55
57
|
DEFAULT_IMAGE_PULL_GRACE_PERIOD = 30
|
|
@@ -506,7 +508,7 @@ class K8sUserCodeLauncher(DagsterCloudUserCodeLauncher[K8sHandle], ConfigurableC
|
|
|
506
508
|
|
|
507
509
|
endpoint = ServerEndpoint(
|
|
508
510
|
host=host,
|
|
509
|
-
port=
|
|
511
|
+
port=get_code_server_port(),
|
|
510
512
|
socket=None,
|
|
511
513
|
)
|
|
512
514
|
|
|
@@ -13,13 +13,13 @@ from kubernetes import client
|
|
|
13
13
|
from dagster_cloud.instance import DagsterCloudAgentInstance
|
|
14
14
|
from dagster_cloud.workspace.user_code_launcher.utils import (
|
|
15
15
|
deterministic_label_for_location,
|
|
16
|
+
get_code_server_port,
|
|
16
17
|
get_grpc_server_env,
|
|
17
18
|
get_human_readable_label,
|
|
18
19
|
unique_resource_name,
|
|
19
20
|
)
|
|
20
21
|
|
|
21
22
|
MANAGED_RESOURCES_LABEL = {"managed_by": "K8sUserCodeLauncher"}
|
|
22
|
-
SERVICE_PORT = 4000
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def _get_dagster_k8s_labels(
|
|
@@ -106,7 +106,7 @@ def construct_code_location_service(
|
|
|
106
106
|
},
|
|
107
107
|
"spec": {
|
|
108
108
|
"selector": {"user-deployment": service_name},
|
|
109
|
-
"ports": [{"name": "grpc", "protocol": "TCP", "port":
|
|
109
|
+
"ports": [{"name": "grpc", "protocol": "TCP", "port": get_code_server_port()}],
|
|
110
110
|
},
|
|
111
111
|
},
|
|
112
112
|
)
|
|
@@ -123,7 +123,10 @@ def construct_code_location_deployment(
|
|
|
123
123
|
server_timestamp: float,
|
|
124
124
|
):
|
|
125
125
|
env = get_grpc_server_env(
|
|
126
|
-
metadata,
|
|
126
|
+
metadata,
|
|
127
|
+
get_code_server_port(),
|
|
128
|
+
location_name,
|
|
129
|
+
instance.ref_for_deployment(deployment_name),
|
|
127
130
|
)
|
|
128
131
|
|
|
129
132
|
user_defined_config = container_context.server_k8s_config
|
|
@@ -56,7 +56,6 @@ from dagster_cloud_cli.core.errors import raise_http_error
|
|
|
56
56
|
from dagster_cloud_cli.core.workspace import CodeLocationDeployData
|
|
57
57
|
from typing_extensions import Self, TypeAlias
|
|
58
58
|
|
|
59
|
-
from dagster_cloud.agent.instrumentation.constants import DAGSTER_CLOUD_AGENT_METRIC_PREFIX
|
|
60
59
|
from dagster_cloud.agent.queries import GET_AGENTS_QUERY
|
|
61
60
|
from dagster_cloud.api.dagster_cloud_api import (
|
|
62
61
|
CheckSnapshotResult,
|
|
@@ -85,8 +84,6 @@ from dagster_cloud.execution.monitoring import (
|
|
|
85
84
|
start_run_worker_monitoring_thread,
|
|
86
85
|
)
|
|
87
86
|
from dagster_cloud.instance import DagsterCloudAgentInstance
|
|
88
|
-
from dagster_cloud.opentelemetry.controller import OpenTelemetryController
|
|
89
|
-
from dagster_cloud.opentelemetry.observers.execution_observer import observe_execution
|
|
90
87
|
from dagster_cloud.pex.grpc.client import MultiPexGrpcClient
|
|
91
88
|
from dagster_cloud.pex.grpc.types import (
|
|
92
89
|
CreatePexServerArgs,
|
|
@@ -1125,11 +1122,6 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1125
1122
|
):
|
|
1126
1123
|
deployment_name, location_name = to_update_key
|
|
1127
1124
|
|
|
1128
|
-
attributes = {
|
|
1129
|
-
"deployment": deployment_name,
|
|
1130
|
-
"location": location_name,
|
|
1131
|
-
}
|
|
1132
|
-
|
|
1133
1125
|
code_location_deploy_data = desired_entry.code_location_deploy_data
|
|
1134
1126
|
pex_metadata = code_location_deploy_data.pex_metadata
|
|
1135
1127
|
deployment_info = (
|
|
@@ -1142,19 +1134,13 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1142
1134
|
self._logger.info(
|
|
1143
1135
|
f"Waiting for new grpc server for {deployment_name}:{location_name} for {deployment_info} to be ready..."
|
|
1144
1136
|
)
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
deployment_name,
|
|
1153
|
-
location_name,
|
|
1154
|
-
desired_entry,
|
|
1155
|
-
server_or_error.server_handle,
|
|
1156
|
-
server_or_error.server_endpoint,
|
|
1157
|
-
)
|
|
1137
|
+
await self._wait_for_new_server_ready(
|
|
1138
|
+
deployment_name,
|
|
1139
|
+
location_name,
|
|
1140
|
+
desired_entry,
|
|
1141
|
+
server_or_error.server_handle,
|
|
1142
|
+
server_or_error.server_endpoint,
|
|
1143
|
+
)
|
|
1158
1144
|
except Exception:
|
|
1159
1145
|
error_info = serializable_error_info_from_exc_info(sys.exc_info())
|
|
1160
1146
|
self._logger.error(
|
|
@@ -1164,18 +1150,12 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1164
1150
|
server_or_error = error_info
|
|
1165
1151
|
|
|
1166
1152
|
if should_upload:
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
)
|
|
1173
|
-
await self._try_update_location_data(
|
|
1174
|
-
deployment_name,
|
|
1175
|
-
location_name,
|
|
1176
|
-
server_or_error,
|
|
1177
|
-
desired_entry.code_location_deploy_data,
|
|
1178
|
-
)
|
|
1153
|
+
await self._try_update_location_data(
|
|
1154
|
+
deployment_name,
|
|
1155
|
+
location_name,
|
|
1156
|
+
server_or_error,
|
|
1157
|
+
desired_entry.code_location_deploy_data,
|
|
1158
|
+
)
|
|
1179
1159
|
|
|
1180
1160
|
# Once we've verified that the new server has uploaded its data successfully, swap in
|
|
1181
1161
|
# the server to start serving new requests
|
|
@@ -1558,7 +1538,7 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1558
1538
|
del self._multipex_servers[deployment_location]
|
|
1559
1539
|
|
|
1560
1540
|
def _refresh_actual_entries(self) -> None:
|
|
1561
|
-
for deployment_location, multipex_server in self._multipex_servers.items():
|
|
1541
|
+
for deployment_location, multipex_server in self._multipex_servers.copy().items():
|
|
1562
1542
|
if deployment_location in self._actual_entries:
|
|
1563
1543
|
# If a multipex server exists, we query it over gRPC
|
|
1564
1544
|
# to make sure the pex server is still available.
|
|
@@ -1836,28 +1816,16 @@ class DagsterCloudUserCodeLauncher(
|
|
|
1836
1816
|
# confirm it's a valid image since _start_new_server_spinup will launch a container
|
|
1837
1817
|
self._check_for_image(desired_entry.code_location_deploy_data)
|
|
1838
1818
|
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
1844
|
-
|
|
1845
|
-
|
|
1846
|
-
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
attributes=attributes,
|
|
1850
|
-
):
|
|
1851
|
-
multipex_server = self._start_new_server_spinup(
|
|
1852
|
-
deployment_name, location_name, desired_entry
|
|
1853
|
-
)
|
|
1854
|
-
self._multipex_servers[to_update_key] = multipex_server
|
|
1855
|
-
assert self._get_multipex_server(
|
|
1856
|
-
deployment_name,
|
|
1857
|
-
location_name,
|
|
1858
|
-
desired_entry.code_location_deploy_data,
|
|
1859
|
-
)
|
|
1860
|
-
new_multipex_servers[to_update_key] = multipex_server
|
|
1819
|
+
multipex_server = self._start_new_server_spinup(
|
|
1820
|
+
deployment_name, location_name, desired_entry
|
|
1821
|
+
)
|
|
1822
|
+
self._multipex_servers[to_update_key] = multipex_server
|
|
1823
|
+
assert self._get_multipex_server(
|
|
1824
|
+
deployment_name,
|
|
1825
|
+
location_name,
|
|
1826
|
+
desired_entry.code_location_deploy_data,
|
|
1827
|
+
)
|
|
1828
|
+
new_multipex_servers[to_update_key] = multipex_server
|
|
1861
1829
|
else:
|
|
1862
1830
|
self._logger.info(
|
|
1863
1831
|
f"Found running multipex server for {multipex_server_repr}"
|
|
@@ -2173,11 +2141,6 @@ class DagsterCloudUserCodeLauncher(
|
|
|
2173
2141
|
def _start_new_dagster_server(
|
|
2174
2142
|
self, deployment_name: str, location_name: str, desired_entry: UserCodeLauncherEntry
|
|
2175
2143
|
) -> DagsterCloudGrpcServer:
|
|
2176
|
-
attributes = {
|
|
2177
|
-
"deployment": deployment_name,
|
|
2178
|
-
"location": location_name,
|
|
2179
|
-
}
|
|
2180
|
-
|
|
2181
2144
|
if desired_entry.code_location_deploy_data.pex_metadata:
|
|
2182
2145
|
multipex_server = self._get_multipex_server(
|
|
2183
2146
|
deployment_name, location_name, desired_entry.code_location_deploy_data
|
|
@@ -2185,45 +2148,26 @@ class DagsterCloudUserCodeLauncher(
|
|
|
2185
2148
|
|
|
2186
2149
|
assert multipex_server # should have been started earlier or we should never reach here
|
|
2187
2150
|
|
|
2188
|
-
|
|
2189
|
-
attributes["python_version"] = (
|
|
2190
|
-
desired_entry.code_location_deploy_data.pex_metadata.python_version
|
|
2191
|
-
)
|
|
2151
|
+
self._create_pex_server(deployment_name, location_name, desired_entry, multipex_server)
|
|
2192
2152
|
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
event_key=f"{DAGSTER_CLOUD_AGENT_METRIC_PREFIX}.user_code.pex_server.start",
|
|
2196
|
-
short_description="starting new pex server",
|
|
2197
|
-
attributes=attributes,
|
|
2198
|
-
):
|
|
2199
|
-
self._create_pex_server(
|
|
2200
|
-
deployment_name, location_name, desired_entry, multipex_server
|
|
2201
|
-
)
|
|
2153
|
+
server_handle = multipex_server.server_handle
|
|
2154
|
+
multipex_endpoint = multipex_server.server_endpoint
|
|
2202
2155
|
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
),
|
|
2217
|
-
desired_entry.code_location_deploy_data,
|
|
2218
|
-
)
|
|
2156
|
+
# start a new pex server on the multipexer, which we can count on already existing
|
|
2157
|
+
return DagsterCloudGrpcServer(
|
|
2158
|
+
server_handle,
|
|
2159
|
+
multipex_endpoint.with_metadata(
|
|
2160
|
+
[
|
|
2161
|
+
("has_pex", "1"),
|
|
2162
|
+
("deployment", deployment_name),
|
|
2163
|
+
("location", location_name),
|
|
2164
|
+
("timestamp", str(int(desired_entry.update_timestamp))),
|
|
2165
|
+
],
|
|
2166
|
+
),
|
|
2167
|
+
desired_entry.code_location_deploy_data,
|
|
2168
|
+
)
|
|
2219
2169
|
else:
|
|
2220
|
-
|
|
2221
|
-
opentelemetry=self.opentelemetry,
|
|
2222
|
-
event_key=f"{DAGSTER_CLOUD_AGENT_METRIC_PREFIX}.user_code.code_server.start",
|
|
2223
|
-
short_description="new code server spin up",
|
|
2224
|
-
attributes=attributes,
|
|
2225
|
-
):
|
|
2226
|
-
return self._start_new_server_spinup(deployment_name, location_name, desired_entry)
|
|
2170
|
+
return self._start_new_server_spinup(deployment_name, location_name, desired_entry)
|
|
2227
2171
|
|
|
2228
2172
|
def get_grpc_endpoint(
|
|
2229
2173
|
self,
|
|
@@ -2443,13 +2387,6 @@ class DagsterCloudUserCodeLauncher(
|
|
|
2443
2387
|
)
|
|
2444
2388
|
return response
|
|
2445
2389
|
|
|
2446
|
-
@property
|
|
2447
|
-
def opentelemetry(self) -> Optional[OpenTelemetryController]:
|
|
2448
|
-
if not self.has_instance:
|
|
2449
|
-
return None
|
|
2450
|
-
else:
|
|
2451
|
-
return self._instance.opentelemetry
|
|
2452
|
-
|
|
2453
2390
|
def upload_job_snap_direct(
|
|
2454
2391
|
self,
|
|
2455
2392
|
deployment_name: str,
|