dagster-cloud 1.8.2__py3-none-any.whl → 1.12.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_cloud/__init__.py +3 -3
- dagster_cloud/agent/__init__.py +4 -4
- dagster_cloud/agent/cli/__init__.py +56 -17
- dagster_cloud/agent/dagster_cloud_agent.py +360 -172
- dagster_cloud/agent/instrumentation/__init__.py +0 -0
- dagster_cloud/agent/instrumentation/constants.py +2 -0
- dagster_cloud/agent/instrumentation/run_launch.py +23 -0
- dagster_cloud/agent/instrumentation/schedule.py +34 -0
- dagster_cloud/agent/instrumentation/sensor.py +34 -0
- dagster_cloud/anomaly_detection/__init__.py +2 -2
- dagster_cloud/anomaly_detection/defs.py +17 -12
- dagster_cloud/anomaly_detection/types.py +3 -3
- dagster_cloud/api/dagster_cloud_api.py +209 -293
- dagster_cloud/auth/constants.py +21 -5
- dagster_cloud/batching/__init__.py +1 -0
- dagster_cloud/batching/batcher.py +210 -0
- dagster_cloud/dagster_insights/__init__.py +12 -6
- dagster_cloud/dagster_insights/bigquery/bigquery_utils.py +3 -2
- dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py +39 -12
- dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py +8 -6
- dagster_cloud/dagster_insights/insights_utils.py +18 -8
- dagster_cloud/dagster_insights/metrics_utils.py +12 -12
- dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py +5 -12
- dagster_cloud/dagster_insights/snowflake/dbt_wrapper.py +34 -8
- dagster_cloud/dagster_insights/snowflake/definitions.py +38 -12
- dagster_cloud/dagster_insights/snowflake/insights_snowflake_resource.py +11 -23
- dagster_cloud/definitions/__init__.py +0 -0
- dagster_cloud/definitions/job_selection.py +36 -0
- dagster_cloud/execution/cloud_run_launcher/k8s.py +1 -1
- dagster_cloud/execution/cloud_run_launcher/process.py +3 -3
- dagster_cloud/execution/monitoring/__init__.py +27 -33
- dagster_cloud/execution/utils/process.py +3 -3
- dagster_cloud/instance/__init__.py +125 -38
- dagster_cloud/instrumentation/__init__.py +32 -0
- dagster_cloud/metadata/source_code.py +13 -8
- dagster_cloud/metrics/__init__.py +0 -0
- dagster_cloud/metrics/tracer.py +59 -0
- dagster_cloud/opentelemetry/__init__.py +0 -0
- dagster_cloud/opentelemetry/config/__init__.py +73 -0
- dagster_cloud/opentelemetry/config/exporter.py +81 -0
- dagster_cloud/opentelemetry/config/log_record_processor.py +40 -0
- dagster_cloud/opentelemetry/config/logging_handler.py +14 -0
- dagster_cloud/opentelemetry/config/meter_provider.py +9 -0
- dagster_cloud/opentelemetry/config/metric_reader.py +39 -0
- dagster_cloud/opentelemetry/controller.py +319 -0
- dagster_cloud/opentelemetry/enum.py +58 -0
- dagster_cloud/opentelemetry/factories/__init__.py +1 -0
- dagster_cloud/opentelemetry/factories/logs.py +113 -0
- dagster_cloud/opentelemetry/factories/metrics.py +121 -0
- dagster_cloud/opentelemetry/metrics/__init__.py +0 -0
- dagster_cloud/opentelemetry/metrics/meter.py +140 -0
- dagster_cloud/opentelemetry/observers/__init__.py +0 -0
- dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +40 -0
- dagster_cloud/opentelemetry/observers/execution_observer.py +178 -0
- dagster_cloud/pex/grpc/__generated__/multi_pex_api_pb2.pyi +175 -0
- dagster_cloud/pex/grpc/__init__.py +2 -2
- dagster_cloud/pex/grpc/client.py +4 -4
- dagster_cloud/pex/grpc/compile.py +2 -2
- dagster_cloud/pex/grpc/server/__init__.py +2 -2
- dagster_cloud/pex/grpc/server/cli/__init__.py +31 -19
- dagster_cloud/pex/grpc/server/manager.py +60 -42
- dagster_cloud/pex/grpc/server/registry.py +28 -21
- dagster_cloud/pex/grpc/server/server.py +23 -14
- dagster_cloud/pex/grpc/types.py +5 -5
- dagster_cloud/py.typed +0 -0
- dagster_cloud/secrets/__init__.py +1 -1
- dagster_cloud/secrets/loader.py +3 -3
- dagster_cloud/serverless/__init__.py +1 -1
- dagster_cloud/serverless/io_manager.py +36 -53
- dagster_cloud/storage/client.py +54 -17
- dagster_cloud/storage/compute_logs/__init__.py +3 -1
- dagster_cloud/storage/compute_logs/compute_log_manager.py +22 -17
- dagster_cloud/storage/defs_state/__init__.py +3 -0
- dagster_cloud/storage/defs_state/queries.py +15 -0
- dagster_cloud/storage/defs_state/storage.py +113 -0
- dagster_cloud/storage/event_logs/__init__.py +3 -1
- dagster_cloud/storage/event_logs/queries.py +102 -4
- dagster_cloud/storage/event_logs/storage.py +266 -73
- dagster_cloud/storage/event_logs/utils.py +88 -7
- dagster_cloud/storage/runs/__init__.py +1 -1
- dagster_cloud/storage/runs/queries.py +17 -2
- dagster_cloud/storage/runs/storage.py +88 -42
- dagster_cloud/storage/schedules/__init__.py +1 -1
- dagster_cloud/storage/schedules/storage.py +6 -8
- dagster_cloud/storage/tags.py +66 -1
- dagster_cloud/util/__init__.py +10 -12
- dagster_cloud/util/errors.py +49 -64
- dagster_cloud/version.py +1 -1
- dagster_cloud/workspace/config_schema/__init__.py +55 -13
- dagster_cloud/workspace/docker/__init__.py +76 -25
- dagster_cloud/workspace/docker/utils.py +1 -1
- dagster_cloud/workspace/ecs/__init__.py +1 -1
- dagster_cloud/workspace/ecs/client.py +51 -33
- dagster_cloud/workspace/ecs/launcher.py +76 -22
- dagster_cloud/workspace/ecs/run_launcher.py +3 -3
- dagster_cloud/workspace/ecs/utils.py +14 -5
- dagster_cloud/workspace/kubernetes/__init__.py +1 -1
- dagster_cloud/workspace/kubernetes/launcher.py +61 -29
- dagster_cloud/workspace/kubernetes/utils.py +34 -22
- dagster_cloud/workspace/user_code_launcher/__init__.py +5 -3
- dagster_cloud/workspace/user_code_launcher/process.py +16 -14
- dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +552 -172
- dagster_cloud/workspace/user_code_launcher/utils.py +105 -1
- {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/METADATA +48 -42
- dagster_cloud-1.12.6.dist-info/RECORD +134 -0
- {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/WHEEL +1 -1
- dagster_cloud-1.8.2.dist-info/RECORD +0 -100
- {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/top_level.txt +0 -0
|
@@ -69,7 +69,7 @@ def protoc(generated_dir: str):
|
|
|
69
69
|
generated_grpc_path,
|
|
70
70
|
tempfile_path,
|
|
71
71
|
)
|
|
72
|
-
with open(tempfile_path,
|
|
72
|
+
with open(tempfile_path, encoding="utf8") as generated:
|
|
73
73
|
with open(generated_grpc_path, "w", encoding="utf8") as rewritten:
|
|
74
74
|
for line in GENERATED_HEADER:
|
|
75
75
|
rewritten.write(line)
|
|
@@ -85,7 +85,7 @@ def protoc(generated_dir: str):
|
|
|
85
85
|
generated_pb2_path,
|
|
86
86
|
tempfile_path,
|
|
87
87
|
)
|
|
88
|
-
with open(tempfile_path,
|
|
88
|
+
with open(tempfile_path, encoding="utf8") as generated:
|
|
89
89
|
with open(generated_pb2_path, "w", encoding="utf8") as rewritten:
|
|
90
90
|
for line in GENERATED_HEADER:
|
|
91
91
|
rewritten.write(line)
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
from .manager import MultiPexManager as MultiPexManager
|
|
2
|
-
from .server import run_multipex_server as run_multipex_server
|
|
1
|
+
from dagster_cloud.pex.grpc.server.manager import MultiPexManager as MultiPexManager
|
|
2
|
+
from dagster_cloud.pex.grpc.server.server import run_multipex_server as run_multipex_server
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
+
import signal
|
|
3
4
|
import subprocess
|
|
4
5
|
from typing import Optional
|
|
5
6
|
|
|
6
7
|
from dagster._serdes import deserialize_value
|
|
7
|
-
from dagster._utils.interrupts import
|
|
8
|
+
from dagster._utils.interrupts import setup_interrupt_handlers
|
|
8
9
|
from dagster_cloud_cli.core.workspace import PexMetadata
|
|
9
10
|
from typer import Option, Typer
|
|
10
11
|
|
|
11
|
-
from
|
|
12
|
-
from
|
|
12
|
+
from dagster_cloud.pex.grpc.server.registry import PexS3Registry
|
|
13
|
+
from dagster_cloud.pex.grpc.server.server import run_multipex_server
|
|
13
14
|
|
|
14
15
|
app = Typer(hidden=True)
|
|
15
16
|
|
|
@@ -48,20 +49,31 @@ def execute_run(
|
|
|
48
49
|
default="/tmp/pex-files", envvar="LOCAL_PEX_FILES_DIR"
|
|
49
50
|
),
|
|
50
51
|
):
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
setup_interrupt_handlers()
|
|
53
|
+
pex_metadata = deserialize_value(pex_metadata_json, PexMetadata)
|
|
54
|
+
executable = PexS3Registry(local_pex_files_dir).get_pex_executable(pex_metadata)
|
|
54
55
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
56
|
+
run_process = subprocess.Popen(
|
|
57
|
+
[
|
|
58
|
+
executable.source_path,
|
|
59
|
+
"-m",
|
|
60
|
+
"dagster",
|
|
61
|
+
"api",
|
|
62
|
+
"execute_run",
|
|
63
|
+
input_json,
|
|
64
|
+
],
|
|
65
|
+
env={**os.environ.copy(), **executable.environ},
|
|
66
|
+
cwd=executable.working_directory,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
logger = logging.getLogger("dagster.pex_run")
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
return_code = run_process.wait()
|
|
73
|
+
if return_code != 0:
|
|
74
|
+
raise Exception(f"PEX subprocess returned with exit code {return_code}")
|
|
75
|
+
except KeyboardInterrupt:
|
|
76
|
+
logger.info("Forwarding interrupt to PEX subprocess")
|
|
77
|
+
run_process.send_signal(signal.SIGINT)
|
|
78
|
+
run_process.wait()
|
|
79
|
+
raise
|
|
@@ -5,21 +5,24 @@ import sys
|
|
|
5
5
|
import threading
|
|
6
6
|
import time
|
|
7
7
|
from contextlib import AbstractContextManager
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import Optional, Union, cast
|
|
9
9
|
|
|
10
|
-
import dagster._seven as seven
|
|
11
10
|
from dagster import _check as check
|
|
12
11
|
from dagster._core.errors import DagsterUserCodeUnreachableError
|
|
13
12
|
from dagster._core.instance.ref import InstanceRef
|
|
14
13
|
from dagster._grpc.client import DagsterGrpcClient, client_heartbeat_thread
|
|
15
|
-
from dagster._serdes.ipc import open_ipc_subprocess
|
|
16
14
|
from dagster._utils import find_free_port, safe_tempfile_path_unmanaged
|
|
17
15
|
from dagster._utils.error import SerializableErrorInfo, serializable_error_info_from_exc_info
|
|
18
16
|
from dagster_cloud_cli.core.workspace import CodeLocationDeployData, PexMetadata
|
|
17
|
+
from dagster_shared import seven
|
|
18
|
+
from dagster_shared.ipc import open_ipc_subprocess
|
|
19
19
|
from pydantic import BaseModel, Extra
|
|
20
20
|
|
|
21
|
-
from
|
|
22
|
-
from .
|
|
21
|
+
from dagster_cloud.pex.grpc.server.registry import PexS3Registry
|
|
22
|
+
from dagster_cloud.pex.grpc.types import PexServerHandle
|
|
23
|
+
from dagster_cloud.workspace.user_code_launcher.utils import get_grpc_server_env
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger("dagster.multipex")
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
class PexProcessEntry(BaseModel, frozen=True, extra=Extra.forbid, arbitrary_types_allowed=True):
|
|
@@ -43,11 +46,11 @@ class MultiPexManager(AbstractContextManager):
|
|
|
43
46
|
enable_metrics: bool = False,
|
|
44
47
|
):
|
|
45
48
|
# Keyed by hash of PexServerHandle
|
|
46
|
-
self._pex_servers:
|
|
47
|
-
self._pending_startup_pex_servers:
|
|
48
|
-
self._pending_shutdown_pex_servers:
|
|
49
|
+
self._pex_servers: dict[str, Union[PexProcessEntry, PexErrorEntry]] = {}
|
|
50
|
+
self._pending_startup_pex_servers: set[str] = set()
|
|
51
|
+
self._pending_shutdown_pex_servers: set[str] = set()
|
|
49
52
|
self._pex_servers_lock = threading.RLock()
|
|
50
|
-
self._pex_metadata_for_handle:
|
|
53
|
+
self._pex_metadata_for_handle: dict[
|
|
51
54
|
str, Optional[PexMetadata]
|
|
52
55
|
] = {} # maps handle id to the pex tag
|
|
53
56
|
self._heartbeat_ttl = 60
|
|
@@ -62,13 +65,13 @@ class MultiPexManager(AbstractContextManager):
|
|
|
62
65
|
daemon=True,
|
|
63
66
|
)
|
|
64
67
|
self._watchdog_thread.start()
|
|
65
|
-
|
|
68
|
+
logger.info(
|
|
66
69
|
"Created a watchdog thread %s for MultiPexManager with watchdog_run_interval=%s",
|
|
67
70
|
self._watchdog_thread.name,
|
|
68
71
|
watchdog_run_interval,
|
|
69
72
|
)
|
|
70
73
|
else:
|
|
71
|
-
|
|
74
|
+
logger.info(
|
|
72
75
|
"No watchdog thread started for MultiPexManager (watchdog_run_interval=%s)",
|
|
73
76
|
watchdog_run_interval,
|
|
74
77
|
)
|
|
@@ -85,7 +88,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
85
88
|
returncode = server.grpc_server_process.poll()
|
|
86
89
|
if returncode is not None:
|
|
87
90
|
dead_server_returncodes.append((server, returncode))
|
|
88
|
-
|
|
91
|
+
logger.error(
|
|
89
92
|
"watchdog: pex subprocesss %s unexpectedly exited with returncode %s -"
|
|
90
93
|
" changing state to error",
|
|
91
94
|
server_id,
|
|
@@ -93,7 +96,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
93
96
|
)
|
|
94
97
|
self._mark_servers_unexpected_termination(dead_server_returncodes)
|
|
95
98
|
if dead_server_returncodes:
|
|
96
|
-
|
|
99
|
+
logger.warning(
|
|
97
100
|
"watchdog: inspected %s active servers %s, of which %s were found unexpectedly"
|
|
98
101
|
" terminated",
|
|
99
102
|
len(active_servers),
|
|
@@ -102,7 +105,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
102
105
|
)
|
|
103
106
|
|
|
104
107
|
def _mark_servers_unexpected_termination(
|
|
105
|
-
self, dead_server_returncodes:
|
|
108
|
+
self, dead_server_returncodes: list[tuple[PexProcessEntry, int]]
|
|
106
109
|
) -> None:
|
|
107
110
|
with self._pex_servers_lock:
|
|
108
111
|
for server, returncode in dead_server_returncodes:
|
|
@@ -137,9 +140,9 @@ class MultiPexManager(AbstractContextManager):
|
|
|
137
140
|
if isinstance(pex_server_or_error, PexErrorEntry):
|
|
138
141
|
return pex_server_or_error.error
|
|
139
142
|
|
|
140
|
-
return cast(PexProcessEntry, self._pex_servers[handle_id]).grpc_client
|
|
143
|
+
return cast("PexProcessEntry", self._pex_servers[handle_id]).grpc_client
|
|
141
144
|
|
|
142
|
-
def get_active_pex_servers(self) ->
|
|
145
|
+
def get_active_pex_servers(self) -> list[PexProcessEntry]:
|
|
143
146
|
with self._pex_servers_lock:
|
|
144
147
|
return [
|
|
145
148
|
server
|
|
@@ -147,7 +150,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
147
150
|
if self.is_server_active(server_id) and isinstance(server, PexProcessEntry)
|
|
148
151
|
]
|
|
149
152
|
|
|
150
|
-
def get_error_pex_servers(self) ->
|
|
153
|
+
def get_error_pex_servers(self) -> list[PexErrorEntry]:
|
|
151
154
|
with self._pex_servers_lock:
|
|
152
155
|
return [
|
|
153
156
|
server
|
|
@@ -157,7 +160,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
157
160
|
|
|
158
161
|
def get_active_pex_server_handles(
|
|
159
162
|
self, deployment_name, location_name: str
|
|
160
|
-
) ->
|
|
163
|
+
) -> list[PexServerHandle]:
|
|
161
164
|
return [
|
|
162
165
|
server.pex_server_handle
|
|
163
166
|
for server in self.get_active_pex_servers()
|
|
@@ -167,7 +170,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
167
170
|
|
|
168
171
|
def get_error_pex_server_handles(
|
|
169
172
|
self, deployment_name, location_name: str
|
|
170
|
-
) ->
|
|
173
|
+
) -> list[PexServerHandle]:
|
|
171
174
|
return [
|
|
172
175
|
server.pex_server_handle
|
|
173
176
|
for server in self.get_error_pex_servers()
|
|
@@ -175,7 +178,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
175
178
|
and server.pex_server_handle.location_name == location_name
|
|
176
179
|
]
|
|
177
180
|
|
|
178
|
-
def get_all_pex_grpc_clients_map(self) ->
|
|
181
|
+
def get_all_pex_grpc_clients_map(self) -> dict[str, DagsterGrpcClient]:
|
|
179
182
|
with self._pex_servers_lock:
|
|
180
183
|
return {
|
|
181
184
|
server.pex_server_handle.get_id(): server.grpc_client
|
|
@@ -209,14 +212,14 @@ class MultiPexManager(AbstractContextManager):
|
|
|
209
212
|
pex_executable = self._registry.get_pex_executable(
|
|
210
213
|
check.not_none(code_location_deploy_data.pex_metadata)
|
|
211
214
|
)
|
|
212
|
-
|
|
215
|
+
logger.info(
|
|
213
216
|
"Installed pex executable %s at %s",
|
|
214
217
|
code_location_deploy_data.pex_metadata,
|
|
215
218
|
pex_executable.source_path,
|
|
216
219
|
)
|
|
217
220
|
|
|
218
221
|
metadata = code_location_deploy_data
|
|
219
|
-
|
|
222
|
+
logger.info("Launching subprocess %s", pex_executable.source_path)
|
|
220
223
|
subprocess_args = [
|
|
221
224
|
pex_executable.source_path,
|
|
222
225
|
"-m",
|
|
@@ -243,7 +246,8 @@ class MultiPexManager(AbstractContextManager):
|
|
|
243
246
|
port = None
|
|
244
247
|
socket = safe_tempfile_path_unmanaged()
|
|
245
248
|
|
|
246
|
-
additional_env =
|
|
249
|
+
additional_env = get_grpc_server_env(
|
|
250
|
+
code_location_deploy_data=metadata,
|
|
247
251
|
port=port,
|
|
248
252
|
location_name=server_handle.location_name,
|
|
249
253
|
instance_ref=instance_ref,
|
|
@@ -274,7 +278,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
274
278
|
daemon=True,
|
|
275
279
|
)
|
|
276
280
|
heartbeat_thread.start()
|
|
277
|
-
|
|
281
|
+
logger.info(
|
|
278
282
|
"Created a heartbeat thread %s for %s",
|
|
279
283
|
heartbeat_thread.name,
|
|
280
284
|
server_handle.get_id(),
|
|
@@ -287,7 +291,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
287
291
|
error=serializable_error_info_from_exc_info(sys.exc_info()),
|
|
288
292
|
)
|
|
289
293
|
self._pending_startup_pex_servers.remove(server_handle.get_id())
|
|
290
|
-
|
|
294
|
+
logger.exception(
|
|
291
295
|
"Creating new pex server for %s:%s failed",
|
|
292
296
|
server_handle.deployment_name,
|
|
293
297
|
server_handle.location_name,
|
|
@@ -307,7 +311,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
307
311
|
with self._pex_servers_lock:
|
|
308
312
|
handle_id = server_handle.get_id()
|
|
309
313
|
if handle_id in self._pending_startup_pex_servers:
|
|
310
|
-
|
|
314
|
+
logger.info(
|
|
311
315
|
"Ignoring request to create pex server for %s - an identical server is"
|
|
312
316
|
" already pending start",
|
|
313
317
|
handle_id,
|
|
@@ -316,12 +320,12 @@ class MultiPexManager(AbstractContextManager):
|
|
|
316
320
|
self._pending_startup_pex_servers.add(handle_id)
|
|
317
321
|
if handle_id in self._pex_servers:
|
|
318
322
|
# clear any previous error state since we're attempting to start server again
|
|
319
|
-
|
|
323
|
+
logger.info(
|
|
320
324
|
"Clearing previous state for %s: %s", handle_id, self._pex_servers[handle_id]
|
|
321
325
|
)
|
|
322
326
|
del self._pex_servers[handle_id]
|
|
323
327
|
|
|
324
|
-
|
|
328
|
+
logger.info(
|
|
325
329
|
"Creating new pex server for %s:%s",
|
|
326
330
|
server_handle.deployment_name,
|
|
327
331
|
server_handle.location_name,
|
|
@@ -332,7 +336,7 @@ class MultiPexManager(AbstractContextManager):
|
|
|
332
336
|
handle_id = server_handle.get_id()
|
|
333
337
|
with self._pex_servers_lock:
|
|
334
338
|
if handle_id in self._pex_servers or handle_id in self._pending_startup_pex_servers:
|
|
335
|
-
|
|
339
|
+
logger.info("Server %s marked for shutdown", handle_id)
|
|
336
340
|
self._pending_shutdown_pex_servers.add(handle_id)
|
|
337
341
|
|
|
338
342
|
def cleanup_pending_shutdown_pex_servers(self) -> None:
|
|
@@ -343,14 +347,23 @@ class MultiPexManager(AbstractContextManager):
|
|
|
343
347
|
if handle_id not in self._pex_servers:
|
|
344
348
|
continue
|
|
345
349
|
server = self._pex_servers[handle_id]
|
|
346
|
-
if (
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
350
|
+
if isinstance(server, PexProcessEntry):
|
|
351
|
+
if server.grpc_server_process.poll() is not None:
|
|
352
|
+
# Server process shut down
|
|
353
|
+
to_remove.add(handle_id)
|
|
354
|
+
else:
|
|
355
|
+
try:
|
|
356
|
+
server.grpc_client.ping("")
|
|
357
|
+
except DagsterUserCodeUnreachableError:
|
|
358
|
+
logger.warning(
|
|
359
|
+
"server process is still running but the server is unreachable - killing the process",
|
|
360
|
+
exc_info=True,
|
|
361
|
+
)
|
|
362
|
+
server.grpc_server_process.kill()
|
|
363
|
+
to_remove.add(handle_id)
|
|
351
364
|
|
|
352
365
|
for handle_id in to_remove:
|
|
353
|
-
|
|
366
|
+
logger.info("Server %s completely shutdown, cleaning up", handle_id)
|
|
354
367
|
self._pending_shutdown_pex_servers.remove(handle_id)
|
|
355
368
|
del self._pex_servers[handle_id]
|
|
356
369
|
|
|
@@ -359,32 +372,37 @@ class MultiPexManager(AbstractContextManager):
|
|
|
359
372
|
pex_server = self._pex_servers.get(handle_id)
|
|
360
373
|
if not pex_server:
|
|
361
374
|
# still in _pending_startup_pex_servers
|
|
362
|
-
|
|
375
|
+
logger.info("Server %s not up yet, will request shutdown later", handle_id)
|
|
363
376
|
continue
|
|
364
377
|
|
|
365
378
|
if isinstance(pex_server, PexErrorEntry):
|
|
366
|
-
|
|
379
|
+
logger.debug("Server %s was in an error state, no shutdown needed", handle_id)
|
|
367
380
|
continue
|
|
368
381
|
|
|
369
382
|
if pex_server.heartbeat_shutdown_event.is_set():
|
|
370
383
|
# already requested shutdown
|
|
371
|
-
logging.info("Already requested shutdown for server %s", handle_id)
|
|
372
384
|
continue
|
|
373
385
|
|
|
374
|
-
|
|
386
|
+
logger.info("Requesting shutdown for server %s", handle_id)
|
|
375
387
|
pex_server.heartbeat_shutdown_event.set()
|
|
376
388
|
pex_server.heartbeat_thread.join()
|
|
377
389
|
try:
|
|
378
390
|
pex_server.grpc_client.shutdown_server()
|
|
379
391
|
except DagsterUserCodeUnreachableError:
|
|
380
|
-
|
|
381
|
-
|
|
392
|
+
logger.warning(
|
|
393
|
+
"Server shutdown for %s over grpc failed, killing the process",
|
|
394
|
+
handle_id,
|
|
395
|
+
exc_info=True,
|
|
396
|
+
)
|
|
397
|
+
pex_server.grpc_server_process.kill()
|
|
382
398
|
|
|
383
399
|
# Delete any registry files not in use anymore
|
|
384
400
|
# - ensure that resources for servers starting up or shutting down are not removed
|
|
385
401
|
# - important to do this while holding the lock to avoid race conditions
|
|
386
402
|
running_server_ids = {
|
|
387
|
-
proc.pex_server_handle.get_id()
|
|
403
|
+
proc.pex_server_handle.get_id()
|
|
404
|
+
for proc in self._pex_servers.values()
|
|
405
|
+
if isinstance(proc, PexProcessEntry)
|
|
388
406
|
}
|
|
389
407
|
in_use_handle_ids = self._pending_startup_pex_servers.union(
|
|
390
408
|
self._pending_shutdown_pex_servers
|
|
@@ -9,7 +9,7 @@ import threading
|
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from os.path import expanduser
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
12
|
+
from typing import NamedTuple, Optional
|
|
13
13
|
from uuid import uuid4
|
|
14
14
|
|
|
15
15
|
from dagster import _check as check
|
|
@@ -19,6 +19,8 @@ DEFAULT_PEX_FILES_DIR = "/tmp/pex-files"
|
|
|
19
19
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html
|
|
20
20
|
MULTIPART_DOWNLOAD_THREADS = 20 # Double the boto3 default of 10
|
|
21
21
|
|
|
22
|
+
logger = logging.getLogger("dagster.multipex")
|
|
23
|
+
|
|
22
24
|
|
|
23
25
|
def _download_from_s3(filename: str, local_filepath: str):
|
|
24
26
|
# Lazy import boto3 to avoid a hard dependency during module load
|
|
@@ -28,7 +30,11 @@ def _download_from_s3(filename: str, local_filepath: str):
|
|
|
28
30
|
|
|
29
31
|
config = Config(retries={"max_attempts": 3, "mode": "standard"})
|
|
30
32
|
|
|
31
|
-
s3 = boto3.client(
|
|
33
|
+
s3 = boto3.client(
|
|
34
|
+
"s3",
|
|
35
|
+
region_name=os.getenv("DAGSTER_CLOUD_SERVERLESS_REGION", "us-west-2"),
|
|
36
|
+
config=config,
|
|
37
|
+
)
|
|
32
38
|
|
|
33
39
|
# TODO: move the bucket and prefix to pex_metdata
|
|
34
40
|
s3_bucket_name = os.environ["DAGSTER_CLOUD_SERVERLESS_STORAGE_S3_BUCKET"]
|
|
@@ -52,22 +58,22 @@ class PexExecutable(
|
|
|
52
58
|
"_PexExecutable",
|
|
53
59
|
[
|
|
54
60
|
("source_path", str),
|
|
55
|
-
("all_paths",
|
|
56
|
-
("environ",
|
|
61
|
+
("all_paths", list[str]),
|
|
62
|
+
("environ", dict[str, str]),
|
|
57
63
|
("working_directory", Optional[str]),
|
|
58
|
-
("venv_dirs",
|
|
64
|
+
("venv_dirs", list[str]),
|
|
59
65
|
],
|
|
60
66
|
)
|
|
61
67
|
):
|
|
62
68
|
def __new__(
|
|
63
69
|
cls,
|
|
64
70
|
source_path: str,
|
|
65
|
-
all_paths:
|
|
66
|
-
environ:
|
|
71
|
+
all_paths: list[str],
|
|
72
|
+
environ: dict[str, str],
|
|
67
73
|
working_directory: Optional[str],
|
|
68
|
-
venv_dirs:
|
|
74
|
+
venv_dirs: list[str],
|
|
69
75
|
):
|
|
70
|
-
return super(
|
|
76
|
+
return super().__new__(
|
|
71
77
|
cls,
|
|
72
78
|
check.str_param(source_path, "source_path"),
|
|
73
79
|
check.list_param(all_paths, "all_paths", str),
|
|
@@ -96,12 +102,12 @@ class PexS3Registry:
|
|
|
96
102
|
local_pex_files_dir if local_pex_files_dir else DEFAULT_PEX_FILES_DIR
|
|
97
103
|
)
|
|
98
104
|
os.makedirs(self._local_pex_files_dir, exist_ok=True)
|
|
99
|
-
self.working_dirs:
|
|
105
|
+
self.working_dirs: dict[
|
|
100
106
|
str, str
|
|
101
107
|
] = {} # once unpacked, working dirs dont change so we cache them
|
|
102
108
|
|
|
103
109
|
# keep track of local files and directories used by each pex tag
|
|
104
|
-
self.local_paths_for_pex_tag:
|
|
110
|
+
self.local_paths_for_pex_tag: dict[str, set[str]] = {}
|
|
105
111
|
|
|
106
112
|
# lock to do safe install and cleanup
|
|
107
113
|
self._install_lock = threading.RLock()
|
|
@@ -149,7 +155,7 @@ class PexS3Registry:
|
|
|
149
155
|
deps_pex_filepaths.append(local_filepath)
|
|
150
156
|
|
|
151
157
|
if not source_pex_filepath:
|
|
152
|
-
raise ValueError("Invalid pex_tag has no source pex:
|
|
158
|
+
raise ValueError(f"Invalid pex_tag has no source pex: {pex_metadata.pex_tag!r}")
|
|
153
159
|
|
|
154
160
|
# we unpack each pex file into its own venv
|
|
155
161
|
source_venv = self.venv_for(source_pex_filepath)
|
|
@@ -188,11 +194,11 @@ class PexS3Registry:
|
|
|
188
194
|
venv_dirs=[str(source_venv.path)] + [str(deps_venv.path) for deps_venv in deps_venvs],
|
|
189
195
|
)
|
|
190
196
|
|
|
191
|
-
def cleanup_unused_files(self, in_use_pex_metadatas:
|
|
197
|
+
def cleanup_unused_files(self, in_use_pex_metadatas: list[PexMetadata]) -> None:
|
|
192
198
|
with self._install_lock:
|
|
193
199
|
return self._cleanup_unused_files(in_use_pex_metadatas)
|
|
194
200
|
|
|
195
|
-
def _cleanup_unused_files(self, in_use_pex_metadatas:
|
|
201
|
+
def _cleanup_unused_files(self, in_use_pex_metadatas: list[PexMetadata]) -> None:
|
|
196
202
|
"""Cleans up all local files and directories that are not associated with any PexMetadata provided."""
|
|
197
203
|
in_use_pex_tags = [pex_metadata.pex_tag for pex_metadata in in_use_pex_metadatas]
|
|
198
204
|
|
|
@@ -207,7 +213,7 @@ class PexS3Registry:
|
|
|
207
213
|
unused_local_paths = all_local_paths - in_use_local_paths
|
|
208
214
|
unused_paths_present = [path for path in unused_local_paths if os.path.exists(path)]
|
|
209
215
|
if unused_paths_present:
|
|
210
|
-
|
|
216
|
+
logger.info(
|
|
211
217
|
"Cleaning up %s unused local paths: %r",
|
|
212
218
|
len(unused_paths_present),
|
|
213
219
|
unused_paths_present,
|
|
@@ -219,13 +225,13 @@ class PexS3Registry:
|
|
|
219
225
|
else:
|
|
220
226
|
os.remove(path)
|
|
221
227
|
except OSError:
|
|
222
|
-
|
|
228
|
+
logger.exception("Ignoring failure to clean up local unused path %s", path)
|
|
223
229
|
|
|
224
230
|
def venv_for(self, pex_filepath) -> PexVenv:
|
|
225
231
|
_, pex_filename = os.path.split(pex_filepath)
|
|
226
232
|
venv_dir = self.venv_dir_for(pex_filepath)
|
|
227
233
|
if os.path.exists(venv_dir):
|
|
228
|
-
|
|
234
|
+
logger.info("Reusing existing venv %r for %r", venv_dir, pex_filepath)
|
|
229
235
|
else:
|
|
230
236
|
self.install_venv(venv_dir, pex_filepath)
|
|
231
237
|
if not os.path.exists(venv_dir):
|
|
@@ -259,6 +265,7 @@ class PexS3Registry:
|
|
|
259
265
|
# since we combine multiple venvs, we need non hermetic scripts
|
|
260
266
|
"--non-hermetic-scripts",
|
|
261
267
|
venv_dir,
|
|
268
|
+
"--pip",
|
|
262
269
|
],
|
|
263
270
|
stderr=subprocess.STDOUT,
|
|
264
271
|
)
|
|
@@ -267,7 +274,7 @@ class PexS3Registry:
|
|
|
267
274
|
raise PexInstallationError(
|
|
268
275
|
f"Could not install venv. Pex output: {e.output}", pex_filepath
|
|
269
276
|
) from e
|
|
270
|
-
|
|
277
|
+
logger.info(
|
|
271
278
|
"Unpacked pex file %r into venv at %r",
|
|
272
279
|
pex_filepath,
|
|
273
280
|
venv_dir,
|
|
@@ -283,7 +290,7 @@ class PexS3Registry:
|
|
|
283
290
|
if not proc.returncode:
|
|
284
291
|
return Path(proc.stdout.decode("utf-8").strip()).absolute()
|
|
285
292
|
else:
|
|
286
|
-
|
|
293
|
+
logger.error(
|
|
287
294
|
"Cannot determine site-packages for venv at %r: %s\n%s",
|
|
288
295
|
venv_path,
|
|
289
296
|
proc.stdout.decode("utf-8"),
|
|
@@ -315,12 +322,12 @@ class PexS3Registry:
|
|
|
315
322
|
|
|
316
323
|
except subprocess.CalledProcessError:
|
|
317
324
|
# working_directory package is optional, just log a message
|
|
318
|
-
|
|
325
|
+
logger.info("Cannot import working_directory package - not setting current directory.")
|
|
319
326
|
return None
|
|
320
327
|
except OSError:
|
|
321
328
|
# some issue with pex not being runnable, log an error but don't fail yet
|
|
322
329
|
# might fail later if we try to run this again
|
|
323
|
-
|
|
330
|
+
logger.exception(
|
|
324
331
|
"Ignoring failure to run pex file to determine working_directory %r", pex_path
|
|
325
332
|
)
|
|
326
333
|
return None
|
|
@@ -9,12 +9,12 @@ from typing import Optional, cast
|
|
|
9
9
|
import dagster._check as check
|
|
10
10
|
import grpc
|
|
11
11
|
from dagster._core.errors import DagsterUserCodeUnreachableError
|
|
12
|
-
from dagster._grpc.__generated__ import
|
|
13
|
-
from dagster._grpc.__generated__.
|
|
12
|
+
from dagster._grpc.__generated__ import dagster_api_pb2
|
|
13
|
+
from dagster._grpc.__generated__.dagster_api_pb2_grpc import (
|
|
14
14
|
DagsterApiServicer,
|
|
15
15
|
add_DagsterApiServicer_to_server,
|
|
16
16
|
)
|
|
17
|
-
from dagster._grpc.client import DEFAULT_GRPC_TIMEOUT
|
|
17
|
+
from dagster._grpc.client import DEFAULT_GRPC_TIMEOUT, DEFAULT_REPOSITORY_GRPC_TIMEOUT
|
|
18
18
|
from dagster._grpc.server import server_termination_target
|
|
19
19
|
from dagster._grpc.types import GetCurrentRunsResult, SensorExecutionArgs
|
|
20
20
|
from dagster._grpc.utils import max_rx_bytes, max_send_bytes
|
|
@@ -22,12 +22,13 @@ from dagster._serdes import deserialize_value, serialize_value
|
|
|
22
22
|
from dagster._utils.error import SerializableErrorInfo, serializable_error_info_from_exc_info
|
|
23
23
|
from grpc_health.v1 import health, health_pb2, health_pb2_grpc
|
|
24
24
|
|
|
25
|
-
from
|
|
26
|
-
from
|
|
25
|
+
from dagster_cloud.pex.grpc.__generated__ import multi_pex_api_pb2
|
|
26
|
+
from dagster_cloud.pex.grpc.__generated__.multi_pex_api_pb2_grpc import (
|
|
27
27
|
MultiPexApiServicer,
|
|
28
28
|
add_MultiPexApiServicer_to_server,
|
|
29
29
|
)
|
|
30
|
-
from
|
|
30
|
+
from dagster_cloud.pex.grpc.server.manager import MultiPexManager
|
|
31
|
+
from dagster_cloud.pex.grpc.types import (
|
|
31
32
|
CreatePexServerArgs,
|
|
32
33
|
CreatePexServerResponse,
|
|
33
34
|
GetCrashedPexServersArgs,
|
|
@@ -38,7 +39,6 @@ from ..types import (
|
|
|
38
39
|
ShutdownPexServerArgs,
|
|
39
40
|
ShutdownPexServerResponse,
|
|
40
41
|
)
|
|
41
|
-
from .manager import MultiPexManager
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
class MultiPexApiServer(MultiPexApiServicer):
|
|
@@ -186,10 +186,15 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
|
|
|
186
186
|
self._get_handle_from_metadata(context)
|
|
187
187
|
)
|
|
188
188
|
if isinstance(client_or_error, SerializableErrorInfo):
|
|
189
|
-
return
|
|
189
|
+
return dagster_api_pb2.ListRepositoriesReply(
|
|
190
190
|
serialized_list_repositories_response_or_error=serialize_value(client_or_error)
|
|
191
191
|
)
|
|
192
|
-
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
return client_or_error._get_response("ListRepositories", request) # noqa: SLF001
|
|
195
|
+
except grpc.RpcError as e:
|
|
196
|
+
# Surface the grpc error to the caller
|
|
197
|
+
context.abort(e.code(), e.details())
|
|
193
198
|
|
|
194
199
|
def Ping(self, request, context):
|
|
195
200
|
return self._query("Ping", request, context)
|
|
@@ -201,7 +206,9 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
|
|
|
201
206
|
return self._query("GetCurrentImage", request, context)
|
|
202
207
|
|
|
203
208
|
def StreamingExternalRepository(self, request, context):
|
|
204
|
-
return self._streaming_query(
|
|
209
|
+
return self._streaming_query(
|
|
210
|
+
"StreamingExternalRepository", request, context, timeout=DEFAULT_REPOSITORY_GRPC_TIMEOUT
|
|
211
|
+
)
|
|
205
212
|
|
|
206
213
|
def Heartbeat(self, request, context):
|
|
207
214
|
return self._query("Heartbeat", request, context)
|
|
@@ -228,7 +235,9 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
|
|
|
228
235
|
return self._query("ExternalPipelineSubsetSnapshot", request, context)
|
|
229
236
|
|
|
230
237
|
def ExternalRepository(self, request, context):
|
|
231
|
-
return self._query(
|
|
238
|
+
return self._query(
|
|
239
|
+
"ExternalRepository", request, context, timeout=DEFAULT_REPOSITORY_GRPC_TIMEOUT
|
|
240
|
+
)
|
|
232
241
|
|
|
233
242
|
def ExternalJob(self, request, context):
|
|
234
243
|
return self._query("ExternalJob", request, context)
|
|
@@ -242,7 +251,7 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
|
|
|
242
251
|
except Exception as e:
|
|
243
252
|
if (
|
|
244
253
|
isinstance(e, grpc.RpcError)
|
|
245
|
-
and cast(grpc.RpcError, e).code() == grpc.StatusCode.UNIMPLEMENTED
|
|
254
|
+
and cast("grpc.RpcError", e).code() == grpc.StatusCode.UNIMPLEMENTED
|
|
246
255
|
):
|
|
247
256
|
context.abort(
|
|
248
257
|
grpc.StatusCode.UNIMPLEMENTED,
|
|
@@ -278,7 +287,7 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
|
|
|
278
287
|
except Exception as e:
|
|
279
288
|
if (
|
|
280
289
|
isinstance(e, grpc.RpcError)
|
|
281
|
-
and cast(grpc.RpcError, e).code() == grpc.StatusCode.UNIMPLEMENTED
|
|
290
|
+
and cast("grpc.RpcError", e).code() == grpc.StatusCode.UNIMPLEMENTED
|
|
282
291
|
):
|
|
283
292
|
context.abort(
|
|
284
293
|
grpc.StatusCode.UNIMPLEMENTED,
|
|
@@ -337,7 +346,7 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
|
|
|
337
346
|
f"Active server hit error:\n{e}",
|
|
338
347
|
)
|
|
339
348
|
|
|
340
|
-
return
|
|
349
|
+
return dagster_api_pb2.GetCurrentRunsReply(
|
|
341
350
|
serialized_current_runs=serialize_value(
|
|
342
351
|
GetCurrentRunsResult(current_runs=all_run_ids, serializable_error_info=None)
|
|
343
352
|
)
|