dagster-cloud 1.8.2__py3-none-any.whl → 1.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. dagster_cloud/__init__.py +3 -3
  2. dagster_cloud/agent/__init__.py +4 -4
  3. dagster_cloud/agent/cli/__init__.py +56 -17
  4. dagster_cloud/agent/dagster_cloud_agent.py +360 -172
  5. dagster_cloud/agent/instrumentation/__init__.py +0 -0
  6. dagster_cloud/agent/instrumentation/constants.py +2 -0
  7. dagster_cloud/agent/instrumentation/run_launch.py +23 -0
  8. dagster_cloud/agent/instrumentation/schedule.py +34 -0
  9. dagster_cloud/agent/instrumentation/sensor.py +34 -0
  10. dagster_cloud/anomaly_detection/__init__.py +2 -2
  11. dagster_cloud/anomaly_detection/defs.py +17 -12
  12. dagster_cloud/anomaly_detection/types.py +3 -3
  13. dagster_cloud/api/dagster_cloud_api.py +209 -293
  14. dagster_cloud/auth/constants.py +21 -5
  15. dagster_cloud/batching/__init__.py +1 -0
  16. dagster_cloud/batching/batcher.py +210 -0
  17. dagster_cloud/dagster_insights/__init__.py +12 -6
  18. dagster_cloud/dagster_insights/bigquery/bigquery_utils.py +3 -2
  19. dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py +39 -12
  20. dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py +8 -6
  21. dagster_cloud/dagster_insights/insights_utils.py +18 -8
  22. dagster_cloud/dagster_insights/metrics_utils.py +12 -12
  23. dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py +5 -12
  24. dagster_cloud/dagster_insights/snowflake/dbt_wrapper.py +34 -8
  25. dagster_cloud/dagster_insights/snowflake/definitions.py +38 -12
  26. dagster_cloud/dagster_insights/snowflake/insights_snowflake_resource.py +11 -23
  27. dagster_cloud/definitions/__init__.py +0 -0
  28. dagster_cloud/definitions/job_selection.py +36 -0
  29. dagster_cloud/execution/cloud_run_launcher/k8s.py +1 -1
  30. dagster_cloud/execution/cloud_run_launcher/process.py +3 -3
  31. dagster_cloud/execution/monitoring/__init__.py +27 -33
  32. dagster_cloud/execution/utils/process.py +3 -3
  33. dagster_cloud/instance/__init__.py +125 -38
  34. dagster_cloud/instrumentation/__init__.py +32 -0
  35. dagster_cloud/metadata/source_code.py +13 -8
  36. dagster_cloud/metrics/__init__.py +0 -0
  37. dagster_cloud/metrics/tracer.py +59 -0
  38. dagster_cloud/opentelemetry/__init__.py +0 -0
  39. dagster_cloud/opentelemetry/config/__init__.py +73 -0
  40. dagster_cloud/opentelemetry/config/exporter.py +81 -0
  41. dagster_cloud/opentelemetry/config/log_record_processor.py +40 -0
  42. dagster_cloud/opentelemetry/config/logging_handler.py +14 -0
  43. dagster_cloud/opentelemetry/config/meter_provider.py +9 -0
  44. dagster_cloud/opentelemetry/config/metric_reader.py +39 -0
  45. dagster_cloud/opentelemetry/controller.py +319 -0
  46. dagster_cloud/opentelemetry/enum.py +58 -0
  47. dagster_cloud/opentelemetry/factories/__init__.py +1 -0
  48. dagster_cloud/opentelemetry/factories/logs.py +113 -0
  49. dagster_cloud/opentelemetry/factories/metrics.py +121 -0
  50. dagster_cloud/opentelemetry/metrics/__init__.py +0 -0
  51. dagster_cloud/opentelemetry/metrics/meter.py +140 -0
  52. dagster_cloud/opentelemetry/observers/__init__.py +0 -0
  53. dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +40 -0
  54. dagster_cloud/opentelemetry/observers/execution_observer.py +178 -0
  55. dagster_cloud/pex/grpc/__generated__/multi_pex_api_pb2.pyi +175 -0
  56. dagster_cloud/pex/grpc/__init__.py +2 -2
  57. dagster_cloud/pex/grpc/client.py +4 -4
  58. dagster_cloud/pex/grpc/compile.py +2 -2
  59. dagster_cloud/pex/grpc/server/__init__.py +2 -2
  60. dagster_cloud/pex/grpc/server/cli/__init__.py +31 -19
  61. dagster_cloud/pex/grpc/server/manager.py +60 -42
  62. dagster_cloud/pex/grpc/server/registry.py +28 -21
  63. dagster_cloud/pex/grpc/server/server.py +23 -14
  64. dagster_cloud/pex/grpc/types.py +5 -5
  65. dagster_cloud/py.typed +0 -0
  66. dagster_cloud/secrets/__init__.py +1 -1
  67. dagster_cloud/secrets/loader.py +3 -3
  68. dagster_cloud/serverless/__init__.py +1 -1
  69. dagster_cloud/serverless/io_manager.py +36 -53
  70. dagster_cloud/storage/client.py +54 -17
  71. dagster_cloud/storage/compute_logs/__init__.py +3 -1
  72. dagster_cloud/storage/compute_logs/compute_log_manager.py +22 -17
  73. dagster_cloud/storage/defs_state/__init__.py +3 -0
  74. dagster_cloud/storage/defs_state/queries.py +15 -0
  75. dagster_cloud/storage/defs_state/storage.py +113 -0
  76. dagster_cloud/storage/event_logs/__init__.py +3 -1
  77. dagster_cloud/storage/event_logs/queries.py +102 -4
  78. dagster_cloud/storage/event_logs/storage.py +266 -73
  79. dagster_cloud/storage/event_logs/utils.py +88 -7
  80. dagster_cloud/storage/runs/__init__.py +1 -1
  81. dagster_cloud/storage/runs/queries.py +17 -2
  82. dagster_cloud/storage/runs/storage.py +88 -42
  83. dagster_cloud/storage/schedules/__init__.py +1 -1
  84. dagster_cloud/storage/schedules/storage.py +6 -8
  85. dagster_cloud/storage/tags.py +66 -1
  86. dagster_cloud/util/__init__.py +10 -12
  87. dagster_cloud/util/errors.py +49 -64
  88. dagster_cloud/version.py +1 -1
  89. dagster_cloud/workspace/config_schema/__init__.py +55 -13
  90. dagster_cloud/workspace/docker/__init__.py +76 -25
  91. dagster_cloud/workspace/docker/utils.py +1 -1
  92. dagster_cloud/workspace/ecs/__init__.py +1 -1
  93. dagster_cloud/workspace/ecs/client.py +51 -33
  94. dagster_cloud/workspace/ecs/launcher.py +76 -22
  95. dagster_cloud/workspace/ecs/run_launcher.py +3 -3
  96. dagster_cloud/workspace/ecs/utils.py +14 -5
  97. dagster_cloud/workspace/kubernetes/__init__.py +1 -1
  98. dagster_cloud/workspace/kubernetes/launcher.py +61 -29
  99. dagster_cloud/workspace/kubernetes/utils.py +34 -22
  100. dagster_cloud/workspace/user_code_launcher/__init__.py +5 -3
  101. dagster_cloud/workspace/user_code_launcher/process.py +16 -14
  102. dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +552 -172
  103. dagster_cloud/workspace/user_code_launcher/utils.py +105 -1
  104. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/METADATA +48 -42
  105. dagster_cloud-1.12.6.dist-info/RECORD +134 -0
  106. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/WHEEL +1 -1
  107. dagster_cloud-1.8.2.dist-info/RECORD +0 -100
  108. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/top_level.txt +0 -0
@@ -69,7 +69,7 @@ def protoc(generated_dir: str):
69
69
  generated_grpc_path,
70
70
  tempfile_path,
71
71
  )
72
- with open(tempfile_path, "r", encoding="utf8") as generated:
72
+ with open(tempfile_path, encoding="utf8") as generated:
73
73
  with open(generated_grpc_path, "w", encoding="utf8") as rewritten:
74
74
  for line in GENERATED_HEADER:
75
75
  rewritten.write(line)
@@ -85,7 +85,7 @@ def protoc(generated_dir: str):
85
85
  generated_pb2_path,
86
86
  tempfile_path,
87
87
  )
88
- with open(tempfile_path, "r", encoding="utf8") as generated:
88
+ with open(tempfile_path, encoding="utf8") as generated:
89
89
  with open(generated_pb2_path, "w", encoding="utf8") as rewritten:
90
90
  for line in GENERATED_HEADER:
91
91
  rewritten.write(line)
@@ -1,2 +1,2 @@
1
- from .manager import MultiPexManager as MultiPexManager
2
- from .server import run_multipex_server as run_multipex_server
1
+ from dagster_cloud.pex.grpc.server.manager import MultiPexManager as MultiPexManager
2
+ from dagster_cloud.pex.grpc.server.server import run_multipex_server as run_multipex_server
@@ -1,15 +1,16 @@
1
1
  import logging
2
2
  import os
3
+ import signal
3
4
  import subprocess
4
5
  from typing import Optional
5
6
 
6
7
  from dagster._serdes import deserialize_value
7
- from dagster._utils.interrupts import capture_interrupts
8
+ from dagster._utils.interrupts import setup_interrupt_handlers
8
9
  from dagster_cloud_cli.core.workspace import PexMetadata
9
10
  from typer import Option, Typer
10
11
 
11
- from ..registry import PexS3Registry
12
- from ..server import run_multipex_server
12
+ from dagster_cloud.pex.grpc.server.registry import PexS3Registry
13
+ from dagster_cloud.pex.grpc.server.server import run_multipex_server
13
14
 
14
15
  app = Typer(hidden=True)
15
16
 
@@ -48,20 +49,31 @@ def execute_run(
48
49
  default="/tmp/pex-files", envvar="LOCAL_PEX_FILES_DIR"
49
50
  ),
50
51
  ):
51
- with capture_interrupts():
52
- pex_metadata = deserialize_value(pex_metadata_json, PexMetadata)
53
- executable = PexS3Registry(local_pex_files_dir).get_pex_executable(pex_metadata)
52
+ setup_interrupt_handlers()
53
+ pex_metadata = deserialize_value(pex_metadata_json, PexMetadata)
54
+ executable = PexS3Registry(local_pex_files_dir).get_pex_executable(pex_metadata)
54
55
 
55
- subprocess.run(
56
- [
57
- executable.source_path,
58
- "-m",
59
- "dagster",
60
- "api",
61
- "execute_run",
62
- input_json,
63
- ],
64
- check=True,
65
- env={**os.environ.copy(), **executable.environ},
66
- cwd=executable.working_directory,
67
- )
56
+ run_process = subprocess.Popen(
57
+ [
58
+ executable.source_path,
59
+ "-m",
60
+ "dagster",
61
+ "api",
62
+ "execute_run",
63
+ input_json,
64
+ ],
65
+ env={**os.environ.copy(), **executable.environ},
66
+ cwd=executable.working_directory,
67
+ )
68
+
69
+ logger = logging.getLogger("dagster.pex_run")
70
+
71
+ try:
72
+ return_code = run_process.wait()
73
+ if return_code != 0:
74
+ raise Exception(f"PEX subprocess returned with exit code {return_code}")
75
+ except KeyboardInterrupt:
76
+ logger.info("Forwarding interrupt to PEX subprocess")
77
+ run_process.send_signal(signal.SIGINT)
78
+ run_process.wait()
79
+ raise
@@ -5,21 +5,24 @@ import sys
5
5
  import threading
6
6
  import time
7
7
  from contextlib import AbstractContextManager
8
- from typing import Dict, List, Optional, Set, Tuple, Union, cast
8
+ from typing import Optional, Union, cast
9
9
 
10
- import dagster._seven as seven
11
10
  from dagster import _check as check
12
11
  from dagster._core.errors import DagsterUserCodeUnreachableError
13
12
  from dagster._core.instance.ref import InstanceRef
14
13
  from dagster._grpc.client import DagsterGrpcClient, client_heartbeat_thread
15
- from dagster._serdes.ipc import open_ipc_subprocess
16
14
  from dagster._utils import find_free_port, safe_tempfile_path_unmanaged
17
15
  from dagster._utils.error import SerializableErrorInfo, serializable_error_info_from_exc_info
18
16
  from dagster_cloud_cli.core.workspace import CodeLocationDeployData, PexMetadata
17
+ from dagster_shared import seven
18
+ from dagster_shared.ipc import open_ipc_subprocess
19
19
  from pydantic import BaseModel, Extra
20
20
 
21
- from ..types import PexServerHandle
22
- from .registry import PexS3Registry
21
+ from dagster_cloud.pex.grpc.server.registry import PexS3Registry
22
+ from dagster_cloud.pex.grpc.types import PexServerHandle
23
+ from dagster_cloud.workspace.user_code_launcher.utils import get_grpc_server_env
24
+
25
+ logger = logging.getLogger("dagster.multipex")
23
26
 
24
27
 
25
28
  class PexProcessEntry(BaseModel, frozen=True, extra=Extra.forbid, arbitrary_types_allowed=True):
@@ -43,11 +46,11 @@ class MultiPexManager(AbstractContextManager):
43
46
  enable_metrics: bool = False,
44
47
  ):
45
48
  # Keyed by hash of PexServerHandle
46
- self._pex_servers: Dict[str, Union[PexProcessEntry, PexErrorEntry]] = {}
47
- self._pending_startup_pex_servers: Set[str] = set()
48
- self._pending_shutdown_pex_servers: Set[str] = set()
49
+ self._pex_servers: dict[str, Union[PexProcessEntry, PexErrorEntry]] = {}
50
+ self._pending_startup_pex_servers: set[str] = set()
51
+ self._pending_shutdown_pex_servers: set[str] = set()
49
52
  self._pex_servers_lock = threading.RLock()
50
- self._pex_metadata_for_handle: Dict[
53
+ self._pex_metadata_for_handle: dict[
51
54
  str, Optional[PexMetadata]
52
55
  ] = {} # maps handle id to the pex tag
53
56
  self._heartbeat_ttl = 60
@@ -62,13 +65,13 @@ class MultiPexManager(AbstractContextManager):
62
65
  daemon=True,
63
66
  )
64
67
  self._watchdog_thread.start()
65
- logging.info(
68
+ logger.info(
66
69
  "Created a watchdog thread %s for MultiPexManager with watchdog_run_interval=%s",
67
70
  self._watchdog_thread.name,
68
71
  watchdog_run_interval,
69
72
  )
70
73
  else:
71
- logging.info(
74
+ logger.info(
72
75
  "No watchdog thread started for MultiPexManager (watchdog_run_interval=%s)",
73
76
  watchdog_run_interval,
74
77
  )
@@ -85,7 +88,7 @@ class MultiPexManager(AbstractContextManager):
85
88
  returncode = server.grpc_server_process.poll()
86
89
  if returncode is not None:
87
90
  dead_server_returncodes.append((server, returncode))
88
- logging.error(
91
+ logger.error(
89
92
  "watchdog: pex subprocesss %s unexpectedly exited with returncode %s -"
90
93
  " changing state to error",
91
94
  server_id,
@@ -93,7 +96,7 @@ class MultiPexManager(AbstractContextManager):
93
96
  )
94
97
  self._mark_servers_unexpected_termination(dead_server_returncodes)
95
98
  if dead_server_returncodes:
96
- logging.warning(
99
+ logger.warning(
97
100
  "watchdog: inspected %s active servers %s, of which %s were found unexpectedly"
98
101
  " terminated",
99
102
  len(active_servers),
@@ -102,7 +105,7 @@ class MultiPexManager(AbstractContextManager):
102
105
  )
103
106
 
104
107
  def _mark_servers_unexpected_termination(
105
- self, dead_server_returncodes: List[Tuple[PexProcessEntry, int]]
108
+ self, dead_server_returncodes: list[tuple[PexProcessEntry, int]]
106
109
  ) -> None:
107
110
  with self._pex_servers_lock:
108
111
  for server, returncode in dead_server_returncodes:
@@ -137,9 +140,9 @@ class MultiPexManager(AbstractContextManager):
137
140
  if isinstance(pex_server_or_error, PexErrorEntry):
138
141
  return pex_server_or_error.error
139
142
 
140
- return cast(PexProcessEntry, self._pex_servers[handle_id]).grpc_client
143
+ return cast("PexProcessEntry", self._pex_servers[handle_id]).grpc_client
141
144
 
142
- def get_active_pex_servers(self) -> List[PexProcessEntry]:
145
+ def get_active_pex_servers(self) -> list[PexProcessEntry]:
143
146
  with self._pex_servers_lock:
144
147
  return [
145
148
  server
@@ -147,7 +150,7 @@ class MultiPexManager(AbstractContextManager):
147
150
  if self.is_server_active(server_id) and isinstance(server, PexProcessEntry)
148
151
  ]
149
152
 
150
- def get_error_pex_servers(self) -> List[PexErrorEntry]:
153
+ def get_error_pex_servers(self) -> list[PexErrorEntry]:
151
154
  with self._pex_servers_lock:
152
155
  return [
153
156
  server
@@ -157,7 +160,7 @@ class MultiPexManager(AbstractContextManager):
157
160
 
158
161
  def get_active_pex_server_handles(
159
162
  self, deployment_name, location_name: str
160
- ) -> List[PexServerHandle]:
163
+ ) -> list[PexServerHandle]:
161
164
  return [
162
165
  server.pex_server_handle
163
166
  for server in self.get_active_pex_servers()
@@ -167,7 +170,7 @@ class MultiPexManager(AbstractContextManager):
167
170
 
168
171
  def get_error_pex_server_handles(
169
172
  self, deployment_name, location_name: str
170
- ) -> List[PexServerHandle]:
173
+ ) -> list[PexServerHandle]:
171
174
  return [
172
175
  server.pex_server_handle
173
176
  for server in self.get_error_pex_servers()
@@ -175,7 +178,7 @@ class MultiPexManager(AbstractContextManager):
175
178
  and server.pex_server_handle.location_name == location_name
176
179
  ]
177
180
 
178
- def get_all_pex_grpc_clients_map(self) -> Dict[str, DagsterGrpcClient]:
181
+ def get_all_pex_grpc_clients_map(self) -> dict[str, DagsterGrpcClient]:
179
182
  with self._pex_servers_lock:
180
183
  return {
181
184
  server.pex_server_handle.get_id(): server.grpc_client
@@ -209,14 +212,14 @@ class MultiPexManager(AbstractContextManager):
209
212
  pex_executable = self._registry.get_pex_executable(
210
213
  check.not_none(code_location_deploy_data.pex_metadata)
211
214
  )
212
- logging.info(
215
+ logger.info(
213
216
  "Installed pex executable %s at %s",
214
217
  code_location_deploy_data.pex_metadata,
215
218
  pex_executable.source_path,
216
219
  )
217
220
 
218
221
  metadata = code_location_deploy_data
219
- logging.info("Launching subprocess %s", pex_executable.source_path)
222
+ logger.info("Launching subprocess %s", pex_executable.source_path)
220
223
  subprocess_args = [
221
224
  pex_executable.source_path,
222
225
  "-m",
@@ -243,7 +246,8 @@ class MultiPexManager(AbstractContextManager):
243
246
  port = None
244
247
  socket = safe_tempfile_path_unmanaged()
245
248
 
246
- additional_env = metadata.get_grpc_server_env(
249
+ additional_env = get_grpc_server_env(
250
+ code_location_deploy_data=metadata,
247
251
  port=port,
248
252
  location_name=server_handle.location_name,
249
253
  instance_ref=instance_ref,
@@ -274,7 +278,7 @@ class MultiPexManager(AbstractContextManager):
274
278
  daemon=True,
275
279
  )
276
280
  heartbeat_thread.start()
277
- logging.info(
281
+ logger.info(
278
282
  "Created a heartbeat thread %s for %s",
279
283
  heartbeat_thread.name,
280
284
  server_handle.get_id(),
@@ -287,7 +291,7 @@ class MultiPexManager(AbstractContextManager):
287
291
  error=serializable_error_info_from_exc_info(sys.exc_info()),
288
292
  )
289
293
  self._pending_startup_pex_servers.remove(server_handle.get_id())
290
- logging.exception(
294
+ logger.exception(
291
295
  "Creating new pex server for %s:%s failed",
292
296
  server_handle.deployment_name,
293
297
  server_handle.location_name,
@@ -307,7 +311,7 @@ class MultiPexManager(AbstractContextManager):
307
311
  with self._pex_servers_lock:
308
312
  handle_id = server_handle.get_id()
309
313
  if handle_id in self._pending_startup_pex_servers:
310
- logging.info(
314
+ logger.info(
311
315
  "Ignoring request to create pex server for %s - an identical server is"
312
316
  " already pending start",
313
317
  handle_id,
@@ -316,12 +320,12 @@ class MultiPexManager(AbstractContextManager):
316
320
  self._pending_startup_pex_servers.add(handle_id)
317
321
  if handle_id in self._pex_servers:
318
322
  # clear any previous error state since we're attempting to start server again
319
- logging.info(
323
+ logger.info(
320
324
  "Clearing previous state for %s: %s", handle_id, self._pex_servers[handle_id]
321
325
  )
322
326
  del self._pex_servers[handle_id]
323
327
 
324
- logging.info(
328
+ logger.info(
325
329
  "Creating new pex server for %s:%s",
326
330
  server_handle.deployment_name,
327
331
  server_handle.location_name,
@@ -332,7 +336,7 @@ class MultiPexManager(AbstractContextManager):
332
336
  handle_id = server_handle.get_id()
333
337
  with self._pex_servers_lock:
334
338
  if handle_id in self._pex_servers or handle_id in self._pending_startup_pex_servers:
335
- logging.info("Server %s marked for shutdown", handle_id)
339
+ logger.info("Server %s marked for shutdown", handle_id)
336
340
  self._pending_shutdown_pex_servers.add(handle_id)
337
341
 
338
342
  def cleanup_pending_shutdown_pex_servers(self) -> None:
@@ -343,14 +347,23 @@ class MultiPexManager(AbstractContextManager):
343
347
  if handle_id not in self._pex_servers:
344
348
  continue
345
349
  server = self._pex_servers[handle_id]
346
- if (
347
- isinstance(server, PexProcessEntry)
348
- and server.grpc_server_process.poll() is not None
349
- ):
350
- to_remove.add(handle_id)
350
+ if isinstance(server, PexProcessEntry):
351
+ if server.grpc_server_process.poll() is not None:
352
+ # Server process shut down
353
+ to_remove.add(handle_id)
354
+ else:
355
+ try:
356
+ server.grpc_client.ping("")
357
+ except DagsterUserCodeUnreachableError:
358
+ logger.warning(
359
+ "server process is still running but the server is unreachable - killing the process",
360
+ exc_info=True,
361
+ )
362
+ server.grpc_server_process.kill()
363
+ to_remove.add(handle_id)
351
364
 
352
365
  for handle_id in to_remove:
353
- logging.info("Server %s completely shutdown, cleaning up", handle_id)
366
+ logger.info("Server %s completely shutdown, cleaning up", handle_id)
354
367
  self._pending_shutdown_pex_servers.remove(handle_id)
355
368
  del self._pex_servers[handle_id]
356
369
 
@@ -359,32 +372,37 @@ class MultiPexManager(AbstractContextManager):
359
372
  pex_server = self._pex_servers.get(handle_id)
360
373
  if not pex_server:
361
374
  # still in _pending_startup_pex_servers
362
- logging.info("Server %s not up yet, will request shutdown later", handle_id)
375
+ logger.info("Server %s not up yet, will request shutdown later", handle_id)
363
376
  continue
364
377
 
365
378
  if isinstance(pex_server, PexErrorEntry):
366
- logging.debug("Server %s was in an error state, no shutdown needed", handle_id)
379
+ logger.debug("Server %s was in an error state, no shutdown needed", handle_id)
367
380
  continue
368
381
 
369
382
  if pex_server.heartbeat_shutdown_event.is_set():
370
383
  # already requested shutdown
371
- logging.info("Already requested shutdown for server %s", handle_id)
372
384
  continue
373
385
 
374
- logging.info("Requesting shutdown for server %s", handle_id)
386
+ logger.info("Requesting shutdown for server %s", handle_id)
375
387
  pex_server.heartbeat_shutdown_event.set()
376
388
  pex_server.heartbeat_thread.join()
377
389
  try:
378
390
  pex_server.grpc_client.shutdown_server()
379
391
  except DagsterUserCodeUnreachableError:
380
- # Server already shutdown
381
- pass
392
+ logger.warning(
393
+ "Server shutdown for %s over grpc failed, killing the process",
394
+ handle_id,
395
+ exc_info=True,
396
+ )
397
+ pex_server.grpc_server_process.kill()
382
398
 
383
399
  # Delete any registry files not in use anymore
384
400
  # - ensure that resources for servers starting up or shutting down are not removed
385
401
  # - important to do this while holding the lock to avoid race conditions
386
402
  running_server_ids = {
387
- proc.pex_server_handle.get_id() for proc in self._pex_servers.values()
403
+ proc.pex_server_handle.get_id()
404
+ for proc in self._pex_servers.values()
405
+ if isinstance(proc, PexProcessEntry)
388
406
  }
389
407
  in_use_handle_ids = self._pending_startup_pex_servers.union(
390
408
  self._pending_shutdown_pex_servers
@@ -9,7 +9,7 @@ import threading
9
9
  from dataclasses import dataclass
10
10
  from os.path import expanduser
11
11
  from pathlib import Path
12
- from typing import Dict, List, NamedTuple, Optional, Set
12
+ from typing import NamedTuple, Optional
13
13
  from uuid import uuid4
14
14
 
15
15
  from dagster import _check as check
@@ -19,6 +19,8 @@ DEFAULT_PEX_FILES_DIR = "/tmp/pex-files"
19
19
  # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html
20
20
  MULTIPART_DOWNLOAD_THREADS = 20 # Double the boto3 default of 10
21
21
 
22
+ logger = logging.getLogger("dagster.multipex")
23
+
22
24
 
23
25
  def _download_from_s3(filename: str, local_filepath: str):
24
26
  # Lazy import boto3 to avoid a hard dependency during module load
@@ -28,7 +30,11 @@ def _download_from_s3(filename: str, local_filepath: str):
28
30
 
29
31
  config = Config(retries={"max_attempts": 3, "mode": "standard"})
30
32
 
31
- s3 = boto3.client("s3", region_name="us-west-2", config=config)
33
+ s3 = boto3.client(
34
+ "s3",
35
+ region_name=os.getenv("DAGSTER_CLOUD_SERVERLESS_REGION", "us-west-2"),
36
+ config=config,
37
+ )
32
38
 
33
39
  # TODO: move the bucket and prefix to pex_metdata
34
40
  s3_bucket_name = os.environ["DAGSTER_CLOUD_SERVERLESS_STORAGE_S3_BUCKET"]
@@ -52,22 +58,22 @@ class PexExecutable(
52
58
  "_PexExecutable",
53
59
  [
54
60
  ("source_path", str),
55
- ("all_paths", List[str]),
56
- ("environ", Dict[str, str]),
61
+ ("all_paths", list[str]),
62
+ ("environ", dict[str, str]),
57
63
  ("working_directory", Optional[str]),
58
- ("venv_dirs", List[str]),
64
+ ("venv_dirs", list[str]),
59
65
  ],
60
66
  )
61
67
  ):
62
68
  def __new__(
63
69
  cls,
64
70
  source_path: str,
65
- all_paths: List[str],
66
- environ: Dict[str, str],
71
+ all_paths: list[str],
72
+ environ: dict[str, str],
67
73
  working_directory: Optional[str],
68
- venv_dirs: List[str],
74
+ venv_dirs: list[str],
69
75
  ):
70
- return super(PexExecutable, cls).__new__(
76
+ return super().__new__(
71
77
  cls,
72
78
  check.str_param(source_path, "source_path"),
73
79
  check.list_param(all_paths, "all_paths", str),
@@ -96,12 +102,12 @@ class PexS3Registry:
96
102
  local_pex_files_dir if local_pex_files_dir else DEFAULT_PEX_FILES_DIR
97
103
  )
98
104
  os.makedirs(self._local_pex_files_dir, exist_ok=True)
99
- self.working_dirs: Dict[
105
+ self.working_dirs: dict[
100
106
  str, str
101
107
  ] = {} # once unpacked, working dirs dont change so we cache them
102
108
 
103
109
  # keep track of local files and directories used by each pex tag
104
- self.local_paths_for_pex_tag: Dict[str, Set[str]] = {}
110
+ self.local_paths_for_pex_tag: dict[str, set[str]] = {}
105
111
 
106
112
  # lock to do safe install and cleanup
107
113
  self._install_lock = threading.RLock()
@@ -149,7 +155,7 @@ class PexS3Registry:
149
155
  deps_pex_filepaths.append(local_filepath)
150
156
 
151
157
  if not source_pex_filepath:
152
- raise ValueError("Invalid pex_tag has no source pex: %r" % pex_metadata.pex_tag)
158
+ raise ValueError(f"Invalid pex_tag has no source pex: {pex_metadata.pex_tag!r}")
153
159
 
154
160
  # we unpack each pex file into its own venv
155
161
  source_venv = self.venv_for(source_pex_filepath)
@@ -188,11 +194,11 @@ class PexS3Registry:
188
194
  venv_dirs=[str(source_venv.path)] + [str(deps_venv.path) for deps_venv in deps_venvs],
189
195
  )
190
196
 
191
- def cleanup_unused_files(self, in_use_pex_metadatas: List[PexMetadata]) -> None:
197
+ def cleanup_unused_files(self, in_use_pex_metadatas: list[PexMetadata]) -> None:
192
198
  with self._install_lock:
193
199
  return self._cleanup_unused_files(in_use_pex_metadatas)
194
200
 
195
- def _cleanup_unused_files(self, in_use_pex_metadatas: List[PexMetadata]) -> None:
201
+ def _cleanup_unused_files(self, in_use_pex_metadatas: list[PexMetadata]) -> None:
196
202
  """Cleans up all local files and directories that are not associated with any PexMetadata provided."""
197
203
  in_use_pex_tags = [pex_metadata.pex_tag for pex_metadata in in_use_pex_metadatas]
198
204
 
@@ -207,7 +213,7 @@ class PexS3Registry:
207
213
  unused_local_paths = all_local_paths - in_use_local_paths
208
214
  unused_paths_present = [path for path in unused_local_paths if os.path.exists(path)]
209
215
  if unused_paths_present:
210
- logging.info(
216
+ logger.info(
211
217
  "Cleaning up %s unused local paths: %r",
212
218
  len(unused_paths_present),
213
219
  unused_paths_present,
@@ -219,13 +225,13 @@ class PexS3Registry:
219
225
  else:
220
226
  os.remove(path)
221
227
  except OSError:
222
- logging.exception("Ignoring failure to clean up local unused path %s", path)
228
+ logger.exception("Ignoring failure to clean up local unused path %s", path)
223
229
 
224
230
  def venv_for(self, pex_filepath) -> PexVenv:
225
231
  _, pex_filename = os.path.split(pex_filepath)
226
232
  venv_dir = self.venv_dir_for(pex_filepath)
227
233
  if os.path.exists(venv_dir):
228
- logging.info("Reusing existing venv %r for %r", venv_dir, pex_filepath)
234
+ logger.info("Reusing existing venv %r for %r", venv_dir, pex_filepath)
229
235
  else:
230
236
  self.install_venv(venv_dir, pex_filepath)
231
237
  if not os.path.exists(venv_dir):
@@ -259,6 +265,7 @@ class PexS3Registry:
259
265
  # since we combine multiple venvs, we need non hermetic scripts
260
266
  "--non-hermetic-scripts",
261
267
  venv_dir,
268
+ "--pip",
262
269
  ],
263
270
  stderr=subprocess.STDOUT,
264
271
  )
@@ -267,7 +274,7 @@ class PexS3Registry:
267
274
  raise PexInstallationError(
268
275
  f"Could not install venv. Pex output: {e.output}", pex_filepath
269
276
  ) from e
270
- logging.info(
277
+ logger.info(
271
278
  "Unpacked pex file %r into venv at %r",
272
279
  pex_filepath,
273
280
  venv_dir,
@@ -283,7 +290,7 @@ class PexS3Registry:
283
290
  if not proc.returncode:
284
291
  return Path(proc.stdout.decode("utf-8").strip()).absolute()
285
292
  else:
286
- logging.error(
293
+ logger.error(
287
294
  "Cannot determine site-packages for venv at %r: %s\n%s",
288
295
  venv_path,
289
296
  proc.stdout.decode("utf-8"),
@@ -315,12 +322,12 @@ class PexS3Registry:
315
322
 
316
323
  except subprocess.CalledProcessError:
317
324
  # working_directory package is optional, just log a message
318
- logging.info("Cannot import working_directory package - not setting current directory.")
325
+ logger.info("Cannot import working_directory package - not setting current directory.")
319
326
  return None
320
327
  except OSError:
321
328
  # some issue with pex not being runnable, log an error but don't fail yet
322
329
  # might fail later if we try to run this again
323
- logging.exception(
330
+ logger.exception(
324
331
  "Ignoring failure to run pex file to determine working_directory %r", pex_path
325
332
  )
326
333
  return None
@@ -9,12 +9,12 @@ from typing import Optional, cast
9
9
  import dagster._check as check
10
10
  import grpc
11
11
  from dagster._core.errors import DagsterUserCodeUnreachableError
12
- from dagster._grpc.__generated__ import api_pb2
13
- from dagster._grpc.__generated__.api_pb2_grpc import (
12
+ from dagster._grpc.__generated__ import dagster_api_pb2
13
+ from dagster._grpc.__generated__.dagster_api_pb2_grpc import (
14
14
  DagsterApiServicer,
15
15
  add_DagsterApiServicer_to_server,
16
16
  )
17
- from dagster._grpc.client import DEFAULT_GRPC_TIMEOUT
17
+ from dagster._grpc.client import DEFAULT_GRPC_TIMEOUT, DEFAULT_REPOSITORY_GRPC_TIMEOUT
18
18
  from dagster._grpc.server import server_termination_target
19
19
  from dagster._grpc.types import GetCurrentRunsResult, SensorExecutionArgs
20
20
  from dagster._grpc.utils import max_rx_bytes, max_send_bytes
@@ -22,12 +22,13 @@ from dagster._serdes import deserialize_value, serialize_value
22
22
  from dagster._utils.error import SerializableErrorInfo, serializable_error_info_from_exc_info
23
23
  from grpc_health.v1 import health, health_pb2, health_pb2_grpc
24
24
 
25
- from ..__generated__ import multi_pex_api_pb2
26
- from ..__generated__.multi_pex_api_pb2_grpc import (
25
+ from dagster_cloud.pex.grpc.__generated__ import multi_pex_api_pb2
26
+ from dagster_cloud.pex.grpc.__generated__.multi_pex_api_pb2_grpc import (
27
27
  MultiPexApiServicer,
28
28
  add_MultiPexApiServicer_to_server,
29
29
  )
30
- from ..types import (
30
+ from dagster_cloud.pex.grpc.server.manager import MultiPexManager
31
+ from dagster_cloud.pex.grpc.types import (
31
32
  CreatePexServerArgs,
32
33
  CreatePexServerResponse,
33
34
  GetCrashedPexServersArgs,
@@ -38,7 +39,6 @@ from ..types import (
38
39
  ShutdownPexServerArgs,
39
40
  ShutdownPexServerResponse,
40
41
  )
41
- from .manager import MultiPexManager
42
42
 
43
43
 
44
44
  class MultiPexApiServer(MultiPexApiServicer):
@@ -186,10 +186,15 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
186
186
  self._get_handle_from_metadata(context)
187
187
  )
188
188
  if isinstance(client_or_error, SerializableErrorInfo):
189
- return api_pb2.ListRepositoriesReply(
189
+ return dagster_api_pb2.ListRepositoriesReply(
190
190
  serialized_list_repositories_response_or_error=serialize_value(client_or_error)
191
191
  )
192
- return client_or_error._get_response("ListRepositories", request) # noqa: SLF001
192
+
193
+ try:
194
+ return client_or_error._get_response("ListRepositories", request) # noqa: SLF001
195
+ except grpc.RpcError as e:
196
+ # Surface the grpc error to the caller
197
+ context.abort(e.code(), e.details())
193
198
 
194
199
  def Ping(self, request, context):
195
200
  return self._query("Ping", request, context)
@@ -201,7 +206,9 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
201
206
  return self._query("GetCurrentImage", request, context)
202
207
 
203
208
  def StreamingExternalRepository(self, request, context):
204
- return self._streaming_query("StreamingExternalRepository", request, context)
209
+ return self._streaming_query(
210
+ "StreamingExternalRepository", request, context, timeout=DEFAULT_REPOSITORY_GRPC_TIMEOUT
211
+ )
205
212
 
206
213
  def Heartbeat(self, request, context):
207
214
  return self._query("Heartbeat", request, context)
@@ -228,7 +235,9 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
228
235
  return self._query("ExternalPipelineSubsetSnapshot", request, context)
229
236
 
230
237
  def ExternalRepository(self, request, context):
231
- return self._query("ExternalRepository", request, context)
238
+ return self._query(
239
+ "ExternalRepository", request, context, timeout=DEFAULT_REPOSITORY_GRPC_TIMEOUT
240
+ )
232
241
 
233
242
  def ExternalJob(self, request, context):
234
243
  return self._query("ExternalJob", request, context)
@@ -242,7 +251,7 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
242
251
  except Exception as e:
243
252
  if (
244
253
  isinstance(e, grpc.RpcError)
245
- and cast(grpc.RpcError, e).code() == grpc.StatusCode.UNIMPLEMENTED
254
+ and cast("grpc.RpcError", e).code() == grpc.StatusCode.UNIMPLEMENTED
246
255
  ):
247
256
  context.abort(
248
257
  grpc.StatusCode.UNIMPLEMENTED,
@@ -278,7 +287,7 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
278
287
  except Exception as e:
279
288
  if (
280
289
  isinstance(e, grpc.RpcError)
281
- and cast(grpc.RpcError, e).code() == grpc.StatusCode.UNIMPLEMENTED
290
+ and cast("grpc.RpcError", e).code() == grpc.StatusCode.UNIMPLEMENTED
282
291
  ):
283
292
  context.abort(
284
293
  grpc.StatusCode.UNIMPLEMENTED,
@@ -337,7 +346,7 @@ class DagsterPexProxyApiServer(DagsterApiServicer):
337
346
  f"Active server hit error:\n{e}",
338
347
  )
339
348
 
340
- return api_pb2.GetCurrentRunsReply(
349
+ return dagster_api_pb2.GetCurrentRunsReply(
341
350
  serialized_current_runs=serialize_value(
342
351
  GetCurrentRunsResult(current_runs=all_run_ids, serializable_error_info=None)
343
352
  )