dagster-cloud 1.8.2__py3-none-any.whl → 1.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. dagster_cloud/__init__.py +3 -3
  2. dagster_cloud/agent/__init__.py +4 -4
  3. dagster_cloud/agent/cli/__init__.py +56 -17
  4. dagster_cloud/agent/dagster_cloud_agent.py +360 -172
  5. dagster_cloud/agent/instrumentation/__init__.py +0 -0
  6. dagster_cloud/agent/instrumentation/constants.py +2 -0
  7. dagster_cloud/agent/instrumentation/run_launch.py +23 -0
  8. dagster_cloud/agent/instrumentation/schedule.py +34 -0
  9. dagster_cloud/agent/instrumentation/sensor.py +34 -0
  10. dagster_cloud/anomaly_detection/__init__.py +2 -2
  11. dagster_cloud/anomaly_detection/defs.py +17 -12
  12. dagster_cloud/anomaly_detection/types.py +3 -3
  13. dagster_cloud/api/dagster_cloud_api.py +209 -293
  14. dagster_cloud/auth/constants.py +21 -5
  15. dagster_cloud/batching/__init__.py +1 -0
  16. dagster_cloud/batching/batcher.py +210 -0
  17. dagster_cloud/dagster_insights/__init__.py +12 -6
  18. dagster_cloud/dagster_insights/bigquery/bigquery_utils.py +3 -2
  19. dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py +39 -12
  20. dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py +8 -6
  21. dagster_cloud/dagster_insights/insights_utils.py +18 -8
  22. dagster_cloud/dagster_insights/metrics_utils.py +12 -12
  23. dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py +5 -12
  24. dagster_cloud/dagster_insights/snowflake/dbt_wrapper.py +34 -8
  25. dagster_cloud/dagster_insights/snowflake/definitions.py +38 -12
  26. dagster_cloud/dagster_insights/snowflake/insights_snowflake_resource.py +11 -23
  27. dagster_cloud/definitions/__init__.py +0 -0
  28. dagster_cloud/definitions/job_selection.py +36 -0
  29. dagster_cloud/execution/cloud_run_launcher/k8s.py +1 -1
  30. dagster_cloud/execution/cloud_run_launcher/process.py +3 -3
  31. dagster_cloud/execution/monitoring/__init__.py +27 -33
  32. dagster_cloud/execution/utils/process.py +3 -3
  33. dagster_cloud/instance/__init__.py +125 -38
  34. dagster_cloud/instrumentation/__init__.py +32 -0
  35. dagster_cloud/metadata/source_code.py +13 -8
  36. dagster_cloud/metrics/__init__.py +0 -0
  37. dagster_cloud/metrics/tracer.py +59 -0
  38. dagster_cloud/opentelemetry/__init__.py +0 -0
  39. dagster_cloud/opentelemetry/config/__init__.py +73 -0
  40. dagster_cloud/opentelemetry/config/exporter.py +81 -0
  41. dagster_cloud/opentelemetry/config/log_record_processor.py +40 -0
  42. dagster_cloud/opentelemetry/config/logging_handler.py +14 -0
  43. dagster_cloud/opentelemetry/config/meter_provider.py +9 -0
  44. dagster_cloud/opentelemetry/config/metric_reader.py +39 -0
  45. dagster_cloud/opentelemetry/controller.py +319 -0
  46. dagster_cloud/opentelemetry/enum.py +58 -0
  47. dagster_cloud/opentelemetry/factories/__init__.py +1 -0
  48. dagster_cloud/opentelemetry/factories/logs.py +113 -0
  49. dagster_cloud/opentelemetry/factories/metrics.py +121 -0
  50. dagster_cloud/opentelemetry/metrics/__init__.py +0 -0
  51. dagster_cloud/opentelemetry/metrics/meter.py +140 -0
  52. dagster_cloud/opentelemetry/observers/__init__.py +0 -0
  53. dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +40 -0
  54. dagster_cloud/opentelemetry/observers/execution_observer.py +178 -0
  55. dagster_cloud/pex/grpc/__generated__/multi_pex_api_pb2.pyi +175 -0
  56. dagster_cloud/pex/grpc/__init__.py +2 -2
  57. dagster_cloud/pex/grpc/client.py +4 -4
  58. dagster_cloud/pex/grpc/compile.py +2 -2
  59. dagster_cloud/pex/grpc/server/__init__.py +2 -2
  60. dagster_cloud/pex/grpc/server/cli/__init__.py +31 -19
  61. dagster_cloud/pex/grpc/server/manager.py +60 -42
  62. dagster_cloud/pex/grpc/server/registry.py +28 -21
  63. dagster_cloud/pex/grpc/server/server.py +23 -14
  64. dagster_cloud/pex/grpc/types.py +5 -5
  65. dagster_cloud/py.typed +0 -0
  66. dagster_cloud/secrets/__init__.py +1 -1
  67. dagster_cloud/secrets/loader.py +3 -3
  68. dagster_cloud/serverless/__init__.py +1 -1
  69. dagster_cloud/serverless/io_manager.py +36 -53
  70. dagster_cloud/storage/client.py +54 -17
  71. dagster_cloud/storage/compute_logs/__init__.py +3 -1
  72. dagster_cloud/storage/compute_logs/compute_log_manager.py +22 -17
  73. dagster_cloud/storage/defs_state/__init__.py +3 -0
  74. dagster_cloud/storage/defs_state/queries.py +15 -0
  75. dagster_cloud/storage/defs_state/storage.py +113 -0
  76. dagster_cloud/storage/event_logs/__init__.py +3 -1
  77. dagster_cloud/storage/event_logs/queries.py +102 -4
  78. dagster_cloud/storage/event_logs/storage.py +266 -73
  79. dagster_cloud/storage/event_logs/utils.py +88 -7
  80. dagster_cloud/storage/runs/__init__.py +1 -1
  81. dagster_cloud/storage/runs/queries.py +17 -2
  82. dagster_cloud/storage/runs/storage.py +88 -42
  83. dagster_cloud/storage/schedules/__init__.py +1 -1
  84. dagster_cloud/storage/schedules/storage.py +6 -8
  85. dagster_cloud/storage/tags.py +66 -1
  86. dagster_cloud/util/__init__.py +10 -12
  87. dagster_cloud/util/errors.py +49 -64
  88. dagster_cloud/version.py +1 -1
  89. dagster_cloud/workspace/config_schema/__init__.py +55 -13
  90. dagster_cloud/workspace/docker/__init__.py +76 -25
  91. dagster_cloud/workspace/docker/utils.py +1 -1
  92. dagster_cloud/workspace/ecs/__init__.py +1 -1
  93. dagster_cloud/workspace/ecs/client.py +51 -33
  94. dagster_cloud/workspace/ecs/launcher.py +76 -22
  95. dagster_cloud/workspace/ecs/run_launcher.py +3 -3
  96. dagster_cloud/workspace/ecs/utils.py +14 -5
  97. dagster_cloud/workspace/kubernetes/__init__.py +1 -1
  98. dagster_cloud/workspace/kubernetes/launcher.py +61 -29
  99. dagster_cloud/workspace/kubernetes/utils.py +34 -22
  100. dagster_cloud/workspace/user_code_launcher/__init__.py +5 -3
  101. dagster_cloud/workspace/user_code_launcher/process.py +16 -14
  102. dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +552 -172
  103. dagster_cloud/workspace/user_code_launcher/utils.py +105 -1
  104. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/METADATA +48 -42
  105. dagster_cloud-1.12.6.dist-info/RECORD +134 -0
  106. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/WHEEL +1 -1
  107. dagster_cloud-1.8.2.dist-info/RECORD +0 -100
  108. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ import json
3
3
  import logging
4
4
  import os
5
5
  import time
6
- from typing import List, Optional
6
+ from typing import Optional
7
7
 
8
8
  import boto3
9
9
  import botocore
@@ -13,7 +13,7 @@ from botocore.exceptions import ClientError
13
13
  from dagster._utils.backoff import backoff
14
14
  from dagster._utils.cached_method import cached_method
15
15
  from dagster_aws.ecs.tasks import DagsterEcsTaskDefinitionConfig
16
- from dagster_aws.ecs.utils import task_definitions_match
16
+ from dagster_aws.ecs.utils import is_transient_task_stopped_reason, task_definitions_match
17
17
 
18
18
  from dagster_cloud.workspace.ecs.service import Service
19
19
 
@@ -57,13 +57,14 @@ class Client:
57
57
  cluster_name: str,
58
58
  service_discovery_namespace_id: str,
59
59
  log_group: str,
60
- subnet_ids: Optional[List[str]] = None,
61
- security_group_ids: Optional[List[str]] = None,
60
+ subnet_ids: Optional[list[str]] = None,
61
+ security_group_ids: Optional[list[str]] = None,
62
62
  ecs_client=None,
63
63
  timeout: int = DEFAULT_ECS_TIMEOUT,
64
64
  grace_period: int = DEFAULT_ECS_GRACE_PERIOD,
65
65
  launch_type: str = "FARGATE",
66
66
  show_debug_cluster_info: bool = True,
67
+ assign_public_ip: Optional[bool] = None,
67
68
  ):
68
69
  self.ecs = ecs_client if ecs_client else boto3.client("ecs", config=config)
69
70
  self.logs = boto3.client("logs", config=config)
@@ -85,6 +86,7 @@ class Client:
85
86
  self.grace_period = check.int_param(grace_period, "grace_period")
86
87
  self.launch_type = check.str_param(launch_type, "launch_type")
87
88
  self._namespace: Optional[str] = None
89
+ self._assign_public_ip_override = assign_public_ip
88
90
 
89
91
  @property
90
92
  def ec2(self):
@@ -115,14 +117,17 @@ class Client:
115
117
  @property
116
118
  @cached_method
117
119
  def network_configuration(self):
120
+ if self.launch_type != "FARGATE":
121
+ assign_public_ip = None
122
+ elif self._assign_public_ip_override is not None:
123
+ assign_public_ip = "ENABLED" if self._assign_public_ip_override else "DISABLED"
124
+ else:
125
+ assign_public_ip = self._infer_assign_public_ip()
126
+
118
127
  network_configuration = {
119
128
  "awsvpcConfiguration": {
120
129
  "subnets": self.subnet_ids,
121
- **(
122
- {"assignPublicIp": self._assign_public_ip()}
123
- if self.launch_type == "FARGATE"
124
- else {}
125
- ),
130
+ **({"assignPublicIp": assign_public_ip} if assign_public_ip else {}),
126
131
  },
127
132
  }
128
133
 
@@ -162,7 +167,7 @@ class Client:
162
167
  )
163
168
  logger.info(f"Created new task definition {task_definition_arn}")
164
169
  else:
165
- task_definition_arn = existing_task_definition["taskDefinitionArn"]
170
+ task_definition_arn = check.not_none(existing_task_definition.get("taskDefinitionArn"))
166
171
  logger.info(f"Re-using existing task definition {task_definition_arn}")
167
172
 
168
173
  return task_definition_arn
@@ -333,7 +338,7 @@ class Client:
333
338
  service=service.name,
334
339
  desiredCount=0,
335
340
  )
336
- except botocore.exceptions.ClientError as error:
341
+ except botocore.exceptions.ClientError as error: # pyright: ignore[reportAttributeAccessIssue]
337
342
  if error.response["Error"]["Code"] in [
338
343
  "ServiceNotFoundException",
339
344
  "ServiceNotActiveException",
@@ -416,7 +421,7 @@ class Client:
416
421
  if resource_arn in actual_services:
417
422
  services.append(Service(client=self, arn=resource_arn))
418
423
 
419
- except botocore.exceptions.ClientError as error:
424
+ except botocore.exceptions.ClientError as error: # pyright: ignore[reportAttributeAccessIssue]
420
425
  if error.response["Error"]["Code"] == "AccessDeniedException":
421
426
  self._use_legacy_tag_filtering = True
422
427
  logger.warning(
@@ -487,7 +492,7 @@ class Client:
487
492
  )
488
493
 
489
494
  if exit_code:
490
- raise Exception(self.get_task_logs(task_arn))
495
+ raise Exception(self.get_task_logs(task_arn)) # pyright: ignore[reportCallIssue]
491
496
 
492
497
  return True
493
498
 
@@ -608,7 +613,7 @@ class Client:
608
613
 
609
614
  async def check_service_has_running_tasks(
610
615
  self, service_name, container_name, logger=None
611
- ) -> List[str]:
616
+ ) -> list[str]:
612
617
  # return the ARN of the task if it starts
613
618
  logger = logger or logging.getLogger("dagster_cloud.EcsClient")
614
619
  start_time = time.time()
@@ -622,33 +627,38 @@ class Client:
622
627
  cluster=self.cluster_name,
623
628
  services=[service_name],
624
629
  )
625
- if not services:
630
+ if not services or not services.get("services"):
626
631
  raise Exception(
627
632
  f"Service description not found for {self.cluster_name}/{service_name}"
628
633
  )
629
634
 
630
- service = services.get("services")[0]
635
+ service = services["services"][0]
631
636
  desired_count = service.get("desiredCount")
632
637
  running_count = service.get("runningCount")
633
638
 
634
639
  # If the service has reached the desired count, we can start tracking the tasks
635
- if desired_count == running_count:
640
+ if desired_count and (desired_count > 0) and (desired_count == running_count):
636
641
  running_tasks = self.ecs.list_tasks(
637
642
  cluster=self.cluster_name,
638
643
  serviceName=service_name,
639
644
  desiredStatus="RUNNING",
640
645
  ).get("taskArns")
641
646
 
642
- if not running_tasks:
643
- raise Exception(
644
- f"Unexpected error obtaining tasks for {service_name} in {self.cluster_name}"
645
- )
647
+ if running_tasks:
648
+ tasks_to_track = running_tasks
649
+
650
+ if not tasks_to_track and time.time() > start_time + STOPPED_TASK_GRACE_PERIOD:
651
+ # If there are still no running_tasks tasks after a certain grace period, check for stopped tasks
652
+ stopped_tasks = self._check_for_stopped_tasks(service_name)
653
+ if stopped_tasks:
654
+ latest_stopped_task = stopped_tasks[0]
655
+ stopped_reason = latest_stopped_task.get("stoppedReason", "")
646
656
 
647
- tasks_to_track = running_tasks
648
- elif time.time() > start_time + STOPPED_TASK_GRACE_PERIOD:
649
- # If there are still no running_tasks tasks after a certain grace period, check for stopped tasks
650
- stopped_tasks = self._check_for_stopped_tasks(service_name)
651
- if stopped_tasks:
657
+ if is_transient_task_stopped_reason(stopped_reason):
658
+ logger.warning(
659
+ f"Task stopped with a transient stoppedReason: {stopped_reason} - waiting for the service to launch a new task"
660
+ )
661
+ else:
652
662
  self._raise_failed_task(stopped_tasks[0], container_name, logger)
653
663
 
654
664
  if tasks_to_track:
@@ -663,7 +673,15 @@ class Client:
663
673
  if not self._check_all_essential_containers_are_running(task):
664
674
  all_tasks_running = False
665
675
  elif task.get("lastStatus") == "STOPPED":
666
- self._raise_failed_task(task, container_name, logger)
676
+ stopped_reason = task.get("stoppedReason", "")
677
+ if is_transient_task_stopped_reason(stopped_reason):
678
+ logger.warning(
679
+ f"Running task stopped with a transient stoppedReason: {stopped_reason} - waiting for the service to launch a new task"
680
+ )
681
+ tasks_to_track = []
682
+ all_tasks_running = False
683
+ else:
684
+ self._raise_failed_task(task, container_name, logger)
667
685
 
668
686
  if all_tasks_running:
669
687
  return tasks_to_track
@@ -680,7 +698,7 @@ class Client:
680
698
  )
681
699
  if response.get("services"):
682
700
  service = response["services"][0]
683
- service_events = [event.get("message") for event in service.get("events")]
701
+ service_events = [str(event.get("message")) for event in service.get("events", [])]
684
702
  service_events_str = "Service events:\n" + "\n".join(service_events)
685
703
  except:
686
704
  logger.exception(f"Error trying to get service event logs from service {service_name}")
@@ -715,9 +733,9 @@ class Client:
715
733
  ).get("taskDefinition")
716
734
 
717
735
  essential_containers = {
718
- container["name"]
719
- for container in task_definition["containerDefinitions"]
720
- if container["essential"]
736
+ check.not_none(container.get("name"))
737
+ for container in task_definition.get("containerDefinitions", [])
738
+ if container.get("essential") and container.get("name")
721
739
  }
722
740
 
723
741
  # Just because the task is RUNNING doesn't mean everything has started up correctly -
@@ -729,7 +747,7 @@ class Client:
729
747
  )
730
748
 
731
749
  def _get_service_discovery_id(self, hostname):
732
- service_name = hostname.split("." + self.namespace)[0]
750
+ service_name = hostname.split("." + self.namespace)[0] # pyright: ignore[reportOperatorIssue]
733
751
 
734
752
  paginator = self.service_discovery.get_paginator("list_services")
735
753
  for page in paginator.paginate(
@@ -747,7 +765,7 @@ class Client:
747
765
  if service["Name"] == service_name:
748
766
  return service["Id"]
749
767
 
750
- def _assign_public_ip(self):
768
+ def _infer_assign_public_ip(self):
751
769
  # https://docs.aws.amazon.com/AmazonECS/latest/userguide/fargate-task-networking.html
752
770
  # Assign a public IP if any of the subnets are public
753
771
  route_tables = self.ec2.route_tables.filter(
@@ -1,7 +1,8 @@
1
1
  import asyncio
2
2
  import os
3
+ from collections.abc import Collection, Mapping, Sequence
3
4
  from pathlib import Path
4
- from typing import Any, Collection, Dict, List, Mapping, Optional, Sequence, cast
5
+ from typing import Any, Optional, cast
5
6
 
6
7
  import boto3
7
8
  import grpc
@@ -30,9 +31,15 @@ from dagster_cloud.workspace.ecs.client import (
30
31
  DEFAULT_ECS_TIMEOUT,
31
32
  ECS_EXEC_LINUX_PARAMETERS,
32
33
  Client,
34
+ get_debug_ecs_prompt,
33
35
  )
36
+ from dagster_cloud.workspace.ecs.run_launcher import CloudEcsRunLauncher
34
37
  from dagster_cloud.workspace.ecs.service import Service
35
- from dagster_cloud.workspace.ecs.utils import get_ecs_human_readable_label, unique_ecs_resource_name
38
+ from dagster_cloud.workspace.ecs.utils import (
39
+ get_ecs_human_readable_label,
40
+ get_server_task_definition_family,
41
+ unique_ecs_resource_name,
42
+ )
36
43
  from dagster_cloud.workspace.user_code_launcher import (
37
44
  DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
38
45
  SHARED_USER_CODE_LAUNCHER_CONFIG,
@@ -44,11 +51,10 @@ from dagster_cloud.workspace.user_code_launcher.user_code_launcher import (
44
51
  UserCodeLauncherEntry,
45
52
  async_serialize_exceptions,
46
53
  )
47
- from dagster_cloud.workspace.user_code_launcher.utils import deterministic_label_for_location
48
-
49
- from .client import get_debug_ecs_prompt
50
- from .run_launcher import CloudEcsRunLauncher
51
- from .utils import get_server_task_definition_family
54
+ from dagster_cloud.workspace.user_code_launcher.utils import (
55
+ deterministic_label_for_location,
56
+ get_grpc_server_env,
57
+ )
52
58
 
53
59
  EcsServerHandleType = Service
54
60
 
@@ -60,12 +66,12 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
60
66
  def __init__(
61
67
  self,
62
68
  cluster: str,
63
- subnets: List[str],
69
+ subnets: list[str],
64
70
  execution_role_arn: str,
65
71
  log_group: str,
66
72
  service_discovery_namespace_id: str,
67
73
  task_role_arn: Optional[str] = None,
68
- security_group_ids: Optional[List[str]] = None,
74
+ security_group_ids: Optional[list[str]] = None,
69
75
  inst_data: Optional[ConfigurableClassData] = None,
70
76
  secrets=None,
71
77
  secrets_tag=None,
@@ -83,6 +89,10 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
83
89
  server_ecs_tags: Optional[Sequence[Mapping[str, Optional[str]]]] = None,
84
90
  run_ecs_tags: Optional[Sequence[Mapping[str, Optional[str]]]] = None,
85
91
  server_health_check: Optional[Mapping[str, Any]] = None,
92
+ enable_ecs_exec=False,
93
+ server_task_definition_prefix: str = "server",
94
+ run_task_definition_prefix: str = "run",
95
+ assign_public_ip: Optional[bool] = None,
86
96
  **kwargs,
87
97
  ):
88
98
  self.ecs = boto3.client("ecs")
@@ -140,6 +150,22 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
140
150
  run_sidecar_containers, "run_sidecar_containers"
141
151
  )
142
152
 
153
+ self.server_task_definition_prefix = check.str_param(
154
+ server_task_definition_prefix, "server_task_definition_prefix"
155
+ )
156
+ check.invariant(
157
+ len(self.server_task_definition_prefix) <= 16,
158
+ "server_task_definition_prefix must be at most 16 characters",
159
+ )
160
+ self.run_task_definition_prefix = check.str_param(
161
+ run_task_definition_prefix, "run_task_definition_prefix"
162
+ )
163
+
164
+ check.invariant(
165
+ len(self.run_task_definition_prefix) <= 16,
166
+ "run_task_definition_prefix must be at most 16 characters",
167
+ )
168
+
143
169
  self.server_ecs_tags = check.opt_sequence_param(server_ecs_tags, "server_ecs_tags")
144
170
  self.run_ecs_tags = check.opt_sequence_param(run_ecs_tags, "run_ecs_tags")
145
171
 
@@ -147,6 +173,8 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
147
173
  server_health_check, "server_health_check"
148
174
  )
149
175
 
176
+ self._enable_ecs_exec = enable_ecs_exec
177
+
150
178
  self.client = Client(
151
179
  cluster_name=self.cluster,
152
180
  subnet_ids=self.subnets,
@@ -157,8 +185,9 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
157
185
  timeout=self._ecs_timeout,
158
186
  grace_period=self._ecs_grace_period,
159
187
  launch_type=self.launch_type,
188
+ assign_public_ip=assign_public_ip,
160
189
  )
161
- super(EcsUserCodeLauncher, self).__init__(**kwargs)
190
+ super().__init__(**kwargs)
162
191
 
163
192
  @property
164
193
  def show_debug_cluster_info(self) -> bool:
@@ -261,13 +290,34 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
261
290
  {"enabled": Field(bool, is_required=False, default_value=False)},
262
291
  is_required=False,
263
292
  ),
293
+ "enable_ecs_exec": Field(
294
+ bool,
295
+ is_required=False,
296
+ default_value=False,
297
+ ),
298
+ "server_task_definition_prefix": Field(
299
+ str, is_required=False, default_value="server"
300
+ ),
301
+ "run_task_definition_prefix": Field(
302
+ str, is_required=False, default_value="dagsterrun"
303
+ ),
304
+ "assign_public_ip": Field(
305
+ Noneable(bool),
306
+ is_required=False,
307
+ default_value=None,
308
+ description=(
309
+ "When using the FARGATE launch type, the launcher will attempt to automatically determine if it is "
310
+ "necessary to assign a public IP to the ECS task. In complex network topologies, this automatic "
311
+ "determination may not be accurate. In this case, you can explicitly set this value to True or False."
312
+ ),
313
+ ),
264
314
  },
265
315
  SHARED_ECS_CONFIG,
266
316
  SHARED_USER_CODE_LAUNCHER_CONFIG,
267
317
  )
268
318
 
269
319
  @classmethod
270
- def from_config_value(cls, inst_data: ConfigurableClassData, config_value: Dict[str, Any]): # pyright: ignore[reportIncompatibleMethodOverride], fix me!
320
+ def from_config_value(cls, inst_data: ConfigurableClassData, config_value: dict[str, Any]): # pyright: ignore[reportIncompatibleMethodOverride], fix me!
271
321
  return EcsUserCodeLauncher(inst_data=inst_data, **config_value)
272
322
 
273
323
  @property
@@ -314,12 +364,12 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
314
364
  return container_context.repository_credentials
315
365
 
316
366
  def _get_enable_ecs_exec(self) -> bool:
317
- return False
367
+ return self._enable_ecs_exec
318
368
 
319
- def _get_additional_grpc_server_env(self) -> Dict[str, str]:
369
+ def _get_additional_grpc_server_env(self) -> dict[str, str]:
320
370
  return {}
321
371
 
322
- def _get_dagster_tags(self, deployment_name: str, location_name: str) -> Dict[str, str]:
372
+ def _get_dagster_tags(self, deployment_name: str, location_name: str) -> dict[str, str]:
323
373
  return {
324
374
  "dagster/deployment_name": get_ecs_human_readable_label(deployment_name),
325
375
  "dagster/location_name": get_ecs_human_readable_label(
@@ -337,7 +387,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
337
387
  f"Getting resource limits for {deployment_name}:{location_name}. resources: {self.server_resources}"
338
388
  )
339
389
  metadata = self._actual_entries[(deployment_name, location_name)].code_location_deploy_data
340
- resources = metadata.container_context.get("ecs", {}).get("server_resources")
390
+ resources = metadata.container_context.get("ecs", {}).get("server_resources", {})
341
391
  return {
342
392
  "ecs": {
343
393
  "cpu_limit": resources.get("cpu"),
@@ -367,8 +417,8 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
367
417
  command = metadata.get_grpc_server_command(
368
418
  metrics_enabled=self._instance.user_code_launcher.code_server_metrics_enabled
369
419
  )
370
- additional_env = metadata.get_grpc_server_env(
371
- PORT, location_name, self._instance.ref_for_deployment(deployment_name)
420
+ additional_env = get_grpc_server_env(
421
+ metadata, PORT, location_name, self._instance.ref_for_deployment(deployment_name)
372
422
  )
373
423
  tags = {
374
424
  "dagster/grpc_server": "1",
@@ -420,7 +470,10 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
420
470
  self._logger.info(f"Creating a new service for {deployment_name}:{location_name}...")
421
471
 
422
472
  family = get_server_task_definition_family(
423
- self._instance.organization_name, deployment_name, location_name
473
+ self.server_task_definition_prefix,
474
+ self._instance.organization_name,
475
+ deployment_name,
476
+ location_name,
424
477
  )
425
478
 
426
479
  system_tags = {**self._get_dagster_tags(deployment_name, location_name), **tags}
@@ -524,7 +577,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
524
577
  task_logs = "Task logs:\n" + "\n".join(logs) if logs else "No logs in task."
525
578
  sections.append(task_logs)
526
579
  except:
527
- self._logger.exception("Error trying to get logs for failed task", task_arn=task_arn)
580
+ self._logger.exception("Error trying to get logs for failed task", task_arn=task_arn) # pyright: ignore[reportCallIssue]
528
581
 
529
582
  if self.show_debug_cluster_info:
530
583
  sections.append(get_debug_ecs_prompt(self.cluster, task_arn))
@@ -575,7 +628,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
575
628
  except Exception as e:
576
629
  if (
577
630
  isinstance(e.__cause__, grpc.RpcError)
578
- and cast(grpc.RpcError, e.__cause__).code() == grpc.StatusCode.UNIMPLEMENTED
631
+ and cast("grpc.RpcError", e.__cause__).code() == grpc.StatusCode.UNIMPLEMENTED
579
632
  ):
580
633
  # New gRPC method not implemented on old multipex server versions
581
634
  pass
@@ -640,7 +693,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
640
693
  }
641
694
  return self.client.list_services(tags)
642
695
 
643
- def _list_server_handles(self) -> List[EcsServerHandleType]:
696
+ def _list_server_handles(self) -> list[EcsServerHandleType]:
644
697
  return [
645
698
  service
646
699
  for service in self.client.list_services()
@@ -654,7 +707,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
654
707
  def get_server_create_timestamp(self, handle: EcsServerHandleType) -> Optional[float]:
655
708
  return handle.create_timestamp
656
709
 
657
- def _run_launcher_kwargs(self) -> Dict[str, Any]:
710
+ def _run_launcher_kwargs(self) -> dict[str, Any]:
658
711
  return dict(
659
712
  task_definition={
660
713
  "log_group": self.log_group,
@@ -688,6 +741,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
688
741
  run_ecs_tags=self.run_ecs_tags,
689
742
  container_name=CONTAINER_NAME,
690
743
  run_resources=self.run_resources,
744
+ task_definition_prefix=self.run_task_definition_prefix,
691
745
  )
692
746
 
693
747
  def run_launcher(self) -> CloudEcsRunLauncher: # pyright: ignore[reportIncompatibleMethodOverride], fix me!
@@ -2,14 +2,14 @@ import dagster._check as check
2
2
  from dagster_aws.ecs import EcsRunLauncher
3
3
 
4
4
  from dagster_cloud.instance import DagsterCloudAgentInstance
5
-
6
- from .utils import get_run_task_definition_family
5
+ from dagster_cloud.workspace.ecs.utils import get_run_task_definition_family
7
6
 
8
7
 
9
8
  class CloudEcsRunLauncher(EcsRunLauncher[DagsterCloudAgentInstance]):
10
9
  def _get_run_task_definition_family(self, run) -> str:
11
10
  return get_run_task_definition_family(
11
+ self._task_definition_prefix,
12
12
  self._instance.organization_name,
13
13
  check.not_none(self._instance.deployment_name),
14
- check.not_none(run.external_job_origin),
14
+ check.not_none(run.remote_job_origin),
15
15
  )
@@ -2,10 +2,13 @@ import hashlib
2
2
  import re
3
3
  from typing import Optional
4
4
 
5
- from dagster._core.remote_representation.origin import RemoteJobOrigin
5
+ from dagster._core.remote_origin import RemoteJobOrigin
6
6
  from dagster_aws.ecs.utils import sanitize_family
7
7
 
8
- from ..user_code_launcher.utils import get_human_readable_label, unique_resource_name
8
+ from dagster_cloud.workspace.user_code_launcher.utils import (
9
+ get_human_readable_label,
10
+ unique_resource_name,
11
+ )
9
12
 
10
13
 
11
14
  def unique_ecs_resource_name(deployment_name, location_name):
@@ -29,10 +32,11 @@ def _get_family_hash(name, max_length=32, hash_size=8):
29
32
  m = hashlib.sha1()
30
33
  m.update(name.encode("utf-8"))
31
34
  name_hash = m.hexdigest()[:hash_size]
32
- return f"{name[:(max_length-hash_size-1)]}_{name_hash}"
35
+ return f"{name[: (max_length - hash_size - 1)]}_{name_hash}"
33
36
 
34
37
 
35
38
  def get_server_task_definition_family(
39
+ task_definition_prefix: str,
36
40
  organization_name: Optional[str],
37
41
  deployment_name: str,
38
42
  location_name: str,
@@ -43,9 +47,12 @@ def get_server_task_definition_family(
43
47
  m = hashlib.sha1()
44
48
  m.update(location_name.encode("utf-8"))
45
49
 
50
+ # '{16}_{64}_{64}_{64}': max 211 characters
46
51
  truncated_location_name = _get_family_hash(location_name, max_length=64)
47
52
 
48
- final_family = f"server_{organization_name}_{deployment_name}_{truncated_location_name}"
53
+ final_family: str = (
54
+ f"{task_definition_prefix}_{organization_name}_{deployment_name}_{truncated_location_name}"
55
+ )
49
56
 
50
57
  assert len(final_family) <= 255
51
58
 
@@ -53,6 +60,7 @@ def get_server_task_definition_family(
53
60
 
54
61
 
55
62
  def get_run_task_definition_family(
63
+ task_definition_prefix: str,
56
64
  organization_name: Optional[str],
57
65
  deployment_name: str,
58
66
  job_origin: RemoteJobOrigin,
@@ -64,12 +72,13 @@ def get_run_task_definition_family(
64
72
  repo_name = job_origin.repository_origin.repository_name
65
73
  location_name = job_origin.repository_origin.code_location_origin.location_name
66
74
 
75
+ assert len(task_definition_prefix) <= 16
67
76
  assert len(str(organization_name)) <= 64
68
77
  assert len(deployment_name) <= 64
69
78
 
70
79
  # '{16}_{64}_{64}_{32}_{32}_{32}': max 245 characters
71
80
 
72
- final_family = f"run_{organization_name}_{deployment_name}_{_get_family_hash(location_name)}_{_get_family_hash(repo_name)}_{_get_family_hash(job_name)}"
81
+ final_family = f"{task_definition_prefix}_{organization_name}_{deployment_name}_{_get_family_hash(location_name)}_{_get_family_hash(repo_name)}_{_get_family_hash(job_name)}"
73
82
 
74
83
  assert len(final_family) <= 255
75
84
 
@@ -1 +1 @@
1
- from .launcher import K8sUserCodeLauncher as K8sUserCodeLauncher
1
+ from dagster_cloud.workspace.kubernetes.launcher import K8sUserCodeLauncher as K8sUserCodeLauncher