dagster-cloud 1.10.11__py3-none-any.whl → 1.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. dagster_cloud/__init__.py +3 -3
  2. dagster_cloud/agent/dagster_cloud_agent.py +64 -20
  3. dagster_cloud/agent/instrumentation/run_launch.py +2 -2
  4. dagster_cloud/agent/instrumentation/schedule.py +1 -1
  5. dagster_cloud/agent/instrumentation/sensor.py +1 -1
  6. dagster_cloud/anomaly_detection/__init__.py +2 -2
  7. dagster_cloud/anomaly_detection/defs.py +11 -8
  8. dagster_cloud/api/dagster_cloud_api.py +7 -5
  9. dagster_cloud/auth/constants.py +21 -5
  10. dagster_cloud/batching/__init__.py +1 -1
  11. dagster_cloud/dagster_insights/__init__.py +12 -6
  12. dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py +8 -2
  13. dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py +4 -2
  14. dagster_cloud/dagster_insights/insights_utils.py +1 -1
  15. dagster_cloud/dagster_insights/metrics_utils.py +1 -1
  16. dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py +1 -9
  17. dagster_cloud/dagster_insights/snowflake/dbt_wrapper.py +9 -2
  18. dagster_cloud/dagster_insights/snowflake/definitions.py +5 -4
  19. dagster_cloud/dagster_insights/snowflake/insights_snowflake_resource.py +1 -2
  20. dagster_cloud/definitions/__init__.py +0 -0
  21. dagster_cloud/definitions/job_selection.py +36 -0
  22. dagster_cloud/execution/utils/process.py +1 -1
  23. dagster_cloud/instance/__init__.py +81 -42
  24. dagster_cloud/metadata/source_code.py +3 -1
  25. dagster_cloud/opentelemetry/config/exporter.py +1 -1
  26. dagster_cloud/opentelemetry/controller.py +1 -1
  27. dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +1 -1
  28. dagster_cloud/opentelemetry/observers/execution_observer.py +4 -2
  29. dagster_cloud/pex/grpc/__init__.py +2 -2
  30. dagster_cloud/pex/grpc/client.py +2 -2
  31. dagster_cloud/pex/grpc/server/__init__.py +2 -2
  32. dagster_cloud/pex/grpc/server/cli/__init__.py +2 -2
  33. dagster_cloud/pex/grpc/server/manager.py +5 -4
  34. dagster_cloud/pex/grpc/server/registry.py +15 -8
  35. dagster_cloud/pex/grpc/server/server.py +17 -8
  36. dagster_cloud/secrets/__init__.py +1 -1
  37. dagster_cloud/serverless/__init__.py +1 -1
  38. dagster_cloud/serverless/io_manager.py +4 -1
  39. dagster_cloud/storage/compute_logs/__init__.py +3 -1
  40. dagster_cloud/storage/compute_logs/compute_log_manager.py +17 -13
  41. dagster_cloud/storage/defs_state/__init__.py +3 -0
  42. dagster_cloud/storage/defs_state/queries.py +15 -0
  43. dagster_cloud/storage/defs_state/storage.py +113 -0
  44. dagster_cloud/storage/event_logs/__init__.py +3 -1
  45. dagster_cloud/storage/event_logs/storage.py +9 -2
  46. dagster_cloud/storage/event_logs/utils.py +1 -3
  47. dagster_cloud/storage/runs/__init__.py +1 -1
  48. dagster_cloud/storage/runs/queries.py +15 -0
  49. dagster_cloud/storage/runs/storage.py +30 -3
  50. dagster_cloud/storage/schedules/__init__.py +1 -1
  51. dagster_cloud/storage/schedules/storage.py +1 -1
  52. dagster_cloud/util/errors.py +0 -91
  53. dagster_cloud/version.py +1 -1
  54. dagster_cloud/workspace/config_schema/__init__.py +43 -5
  55. dagster_cloud/workspace/docker/__init__.py +8 -7
  56. dagster_cloud/workspace/docker/utils.py +1 -1
  57. dagster_cloud/workspace/ecs/__init__.py +1 -1
  58. dagster_cloud/workspace/ecs/client.py +23 -18
  59. dagster_cloud/workspace/ecs/launcher.py +19 -5
  60. dagster_cloud/workspace/ecs/run_launcher.py +1 -2
  61. dagster_cloud/workspace/ecs/utils.py +5 -2
  62. dagster_cloud/workspace/kubernetes/__init__.py +1 -1
  63. dagster_cloud/workspace/kubernetes/launcher.py +11 -12
  64. dagster_cloud/workspace/kubernetes/utils.py +1 -2
  65. dagster_cloud/workspace/user_code_launcher/__init__.py +5 -3
  66. dagster_cloud/workspace/user_code_launcher/process.py +2 -3
  67. dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +71 -34
  68. dagster_cloud/workspace/user_code_launcher/utils.py +7 -0
  69. {dagster_cloud-1.10.11.dist-info → dagster_cloud-1.12.6.dist-info}/METADATA +9 -8
  70. dagster_cloud-1.12.6.dist-info/RECORD +134 -0
  71. {dagster_cloud-1.10.11.dist-info → dagster_cloud-1.12.6.dist-info}/WHEEL +1 -1
  72. dagster_cloud-1.10.11.dist-info/RECORD +0 -129
  73. {dagster_cloud-1.10.11.dist-info → dagster_cloud-1.12.6.dist-info}/top_level.txt +0 -0
@@ -64,6 +64,7 @@ class Client:
64
64
  grace_period: int = DEFAULT_ECS_GRACE_PERIOD,
65
65
  launch_type: str = "FARGATE",
66
66
  show_debug_cluster_info: bool = True,
67
+ assign_public_ip: Optional[bool] = None,
67
68
  ):
68
69
  self.ecs = ecs_client if ecs_client else boto3.client("ecs", config=config)
69
70
  self.logs = boto3.client("logs", config=config)
@@ -85,6 +86,7 @@ class Client:
85
86
  self.grace_period = check.int_param(grace_period, "grace_period")
86
87
  self.launch_type = check.str_param(launch_type, "launch_type")
87
88
  self._namespace: Optional[str] = None
89
+ self._assign_public_ip_override = assign_public_ip
88
90
 
89
91
  @property
90
92
  def ec2(self):
@@ -110,19 +112,22 @@ class Client:
110
112
  name="serviceLongArnFormat",
111
113
  effectiveSettings=True,
112
114
  )
113
- return settings["settings"][0]["value"] == "enabled" # pyright: ignore[reportTypedDictNotRequiredAccess]
115
+ return settings["settings"][0]["value"] == "enabled"
114
116
 
115
117
  @property
116
118
  @cached_method
117
119
  def network_configuration(self):
120
+ if self.launch_type != "FARGATE":
121
+ assign_public_ip = None
122
+ elif self._assign_public_ip_override is not None:
123
+ assign_public_ip = "ENABLED" if self._assign_public_ip_override else "DISABLED"
124
+ else:
125
+ assign_public_ip = self._infer_assign_public_ip()
126
+
118
127
  network_configuration = {
119
128
  "awsvpcConfiguration": {
120
129
  "subnets": self.subnet_ids,
121
- **(
122
- {"assignPublicIp": self._assign_public_ip()}
123
- if self.launch_type == "FARGATE"
124
- else {}
125
- ),
130
+ **({"assignPublicIp": assign_public_ip} if assign_public_ip else {}),
126
131
  },
127
132
  }
128
133
 
@@ -155,7 +160,7 @@ class Client:
155
160
  ):
156
161
  task_definition_arn = (
157
162
  self.ecs.register_task_definition(
158
- **desired_task_definition_config.task_definition_dict() # pyright: ignore[reportArgumentType]
163
+ **desired_task_definition_config.task_definition_dict()
159
164
  )
160
165
  .get("taskDefinition")
161
166
  .get("taskDefinitionArn")
@@ -461,10 +466,10 @@ class Client:
461
466
 
462
467
  task_arn = (
463
468
  self.ecs.run_task(
464
- taskDefinition=task_definition_arn, # pyright: ignore[reportArgumentType]
469
+ taskDefinition=task_definition_arn,
465
470
  cluster=self.cluster_name,
466
- launchType=self.launch_type, # pyright: ignore[reportArgumentType]
467
- networkConfiguration=self.network_configuration, # pyright: ignore[reportArgumentType]
471
+ launchType=self.launch_type,
472
+ networkConfiguration=self.network_configuration,
468
473
  )
469
474
  .get("tasks", [{}])[0]
470
475
  .get("taskArn")
@@ -472,14 +477,14 @@ class Client:
472
477
 
473
478
  self.ecs.get_waiter("tasks_stopped").wait(
474
479
  cluster=self.cluster_name,
475
- tasks=[task_arn], # pyright: ignore[reportArgumentType]
480
+ tasks=[task_arn],
476
481
  WaiterConfig={"Delay": 1, "MaxAttempts": self.timeout},
477
482
  )
478
483
 
479
484
  exit_code = (
480
485
  self.ecs.describe_tasks(
481
486
  cluster=self.cluster_name,
482
- tasks=[task_arn], # pyright: ignore[reportArgumentType]
487
+ tasks=[task_arn],
483
488
  )
484
489
  .get("tasks", [{}])[0]
485
490
  .get("containers", [{}])[0]
@@ -541,7 +546,7 @@ class Client:
541
546
  for key, value in tags.items()
542
547
  ]
543
548
 
544
- arn = self.ecs.create_service(**params).get("service").get("serviceArn") # pyright: ignore[reportArgumentType]
549
+ arn = self.ecs.create_service(**params).get("service").get("serviceArn")
545
550
 
546
551
  return Service(client=self, arn=arn)
547
552
 
@@ -716,7 +721,7 @@ class Client:
716
721
 
717
722
  stopped_tasks = sorted(
718
723
  stopped_tasks,
719
- key=lambda task: task["createdAt"].timestamp(), # pyright: ignore[reportTypedDictNotRequiredAccess]
724
+ key=lambda task: task["createdAt"].timestamp(),
720
725
  reverse=True,
721
726
  )
722
727
  return stopped_tasks
@@ -760,7 +765,7 @@ class Client:
760
765
  if service["Name"] == service_name:
761
766
  return service["Id"]
762
767
 
763
- def _assign_public_ip(self):
768
+ def _infer_assign_public_ip(self):
764
769
  # https://docs.aws.amazon.com/AmazonECS/latest/userguide/fargate-task-networking.html
765
770
  # Assign a public IP if any of the subnets are public
766
771
  route_tables = self.ec2.route_tables.filter(
@@ -780,14 +785,14 @@ class Client:
780
785
  task = self.ecs.describe_tasks(cluster=self.cluster_name, tasks=[task_arn]).get("tasks")[0]
781
786
 
782
787
  task_definition_arn = task.get("taskDefinitionArn")
783
- task_definition = self.ecs.describe_task_definition(taskDefinition=task_definition_arn).get( # pyright: ignore[reportArgumentType]
788
+ task_definition = self.ecs.describe_task_definition(taskDefinition=task_definition_arn).get(
784
789
  "taskDefinition"
785
790
  )
786
791
 
787
792
  matching_container_definitions = [
788
793
  container_definition
789
794
  for container_definition in task_definition.get("containerDefinitions", [])
790
- if container_definition["name"] == container_name # pyright: ignore[reportTypedDictNotRequiredAccess]
795
+ if container_definition["name"] == container_name
791
796
  ]
792
797
  if not matching_container_definitions:
793
798
  raise Exception(f"Could not find container with name {container_name}")
@@ -795,7 +800,7 @@ class Client:
795
800
  container_definition = matching_container_definitions[0]
796
801
 
797
802
  log_stream_prefix = (
798
- container_definition.get("logConfiguration").get("options").get("awslogs-stream-prefix") # pyright: ignore[reportOptionalMemberAccess]
803
+ container_definition.get("logConfiguration").get("options").get("awslogs-stream-prefix")
799
804
  )
800
805
  container_name = container_definition.get("name")
801
806
  task_id = task_arn.split("/")[-1]
@@ -31,9 +31,15 @@ from dagster_cloud.workspace.ecs.client import (
31
31
  DEFAULT_ECS_TIMEOUT,
32
32
  ECS_EXEC_LINUX_PARAMETERS,
33
33
  Client,
34
+ get_debug_ecs_prompt,
34
35
  )
36
+ from dagster_cloud.workspace.ecs.run_launcher import CloudEcsRunLauncher
35
37
  from dagster_cloud.workspace.ecs.service import Service
36
- from dagster_cloud.workspace.ecs.utils import get_ecs_human_readable_label, unique_ecs_resource_name
38
+ from dagster_cloud.workspace.ecs.utils import (
39
+ get_ecs_human_readable_label,
40
+ get_server_task_definition_family,
41
+ unique_ecs_resource_name,
42
+ )
37
43
  from dagster_cloud.workspace.user_code_launcher import (
38
44
  DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
39
45
  SHARED_USER_CODE_LAUNCHER_CONFIG,
@@ -50,10 +56,6 @@ from dagster_cloud.workspace.user_code_launcher.utils import (
50
56
  get_grpc_server_env,
51
57
  )
52
58
 
53
- from .client import get_debug_ecs_prompt
54
- from .run_launcher import CloudEcsRunLauncher
55
- from .utils import get_server_task_definition_family
56
-
57
59
  EcsServerHandleType = Service
58
60
 
59
61
  CONTAINER_NAME = "dagster"
@@ -90,6 +92,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
90
92
  enable_ecs_exec=False,
91
93
  server_task_definition_prefix: str = "server",
92
94
  run_task_definition_prefix: str = "run",
95
+ assign_public_ip: Optional[bool] = None,
93
96
  **kwargs,
94
97
  ):
95
98
  self.ecs = boto3.client("ecs")
@@ -182,6 +185,7 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
182
185
  timeout=self._ecs_timeout,
183
186
  grace_period=self._ecs_grace_period,
184
187
  launch_type=self.launch_type,
188
+ assign_public_ip=assign_public_ip,
185
189
  )
186
190
  super().__init__(**kwargs)
187
191
 
@@ -297,6 +301,16 @@ class EcsUserCodeLauncher(DagsterCloudUserCodeLauncher[EcsServerHandleType], Con
297
301
  "run_task_definition_prefix": Field(
298
302
  str, is_required=False, default_value="dagsterrun"
299
303
  ),
304
+ "assign_public_ip": Field(
305
+ Noneable(bool),
306
+ is_required=False,
307
+ default_value=None,
308
+ description=(
309
+ "When using the FARGATE launch type, the launcher will attempt to automatically determine if it is "
310
+ "necessary to assign a public IP to the ECS task. In complex network topologies, this automatic "
311
+ "determination may not be accurate. In this case, you can explicitly set this value to True or False."
312
+ ),
313
+ ),
300
314
  },
301
315
  SHARED_ECS_CONFIG,
302
316
  SHARED_USER_CODE_LAUNCHER_CONFIG,
@@ -2,8 +2,7 @@ import dagster._check as check
2
2
  from dagster_aws.ecs import EcsRunLauncher
3
3
 
4
4
  from dagster_cloud.instance import DagsterCloudAgentInstance
5
-
6
- from .utils import get_run_task_definition_family
5
+ from dagster_cloud.workspace.ecs.utils import get_run_task_definition_family
7
6
 
8
7
 
9
8
  class CloudEcsRunLauncher(EcsRunLauncher[DagsterCloudAgentInstance]):
@@ -2,10 +2,13 @@ import hashlib
2
2
  import re
3
3
  from typing import Optional
4
4
 
5
- from dagster._core.remote_representation.origin import RemoteJobOrigin
5
+ from dagster._core.remote_origin import RemoteJobOrigin
6
6
  from dagster_aws.ecs.utils import sanitize_family
7
7
 
8
- from ..user_code_launcher.utils import get_human_readable_label, unique_resource_name
8
+ from dagster_cloud.workspace.user_code_launcher.utils import (
9
+ get_human_readable_label,
10
+ unique_resource_name,
11
+ )
9
12
 
10
13
 
11
14
  def unique_ecs_resource_name(deployment_name, location_name):
@@ -1 +1 @@
1
- from .launcher import K8sUserCodeLauncher as K8sUserCodeLauncher
1
+ from dagster_cloud.workspace.kubernetes.launcher import K8sUserCodeLauncher as K8sUserCodeLauncher
@@ -33,17 +33,7 @@ from dagster_cloud.api.dagster_cloud_api import UserCodeDeploymentType
33
33
  from dagster_cloud.constants import RESERVED_ENV_VAR_NAMES
34
34
  from dagster_cloud.execution.cloud_run_launcher.k8s import CloudK8sRunLauncher
35
35
  from dagster_cloud.execution.monitoring import CloudContainerResourceLimits
36
-
37
- from ..user_code_launcher import (
38
- DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
39
- SHARED_USER_CODE_LAUNCHER_CONFIG,
40
- DagsterCloudGrpcServer,
41
- DagsterCloudUserCodeLauncher,
42
- ServerEndpoint,
43
- UserCodeLauncherEntry,
44
- )
45
- from ..user_code_launcher.utils import deterministic_label_for_location
46
- from .utils import (
36
+ from dagster_cloud.workspace.kubernetes.utils import (
47
37
  SERVICE_PORT,
48
38
  construct_code_location_deployment,
49
39
  construct_code_location_service,
@@ -51,11 +41,20 @@ from .utils import (
51
41
  unique_k8s_resource_name,
52
42
  wait_for_deployment_complete,
53
43
  )
44
+ from dagster_cloud.workspace.user_code_launcher import (
45
+ DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
46
+ SHARED_USER_CODE_LAUNCHER_CONFIG,
47
+ DagsterCloudGrpcServer,
48
+ DagsterCloudUserCodeLauncher,
49
+ ServerEndpoint,
50
+ UserCodeLauncherEntry,
51
+ )
52
+ from dagster_cloud.workspace.user_code_launcher.utils import deterministic_label_for_location
54
53
 
55
54
  DEFAULT_DEPLOYMENT_STARTUP_TIMEOUT = 300
56
55
  DEFAULT_IMAGE_PULL_GRACE_PERIOD = 30
57
56
 
58
- from ..config_schema.kubernetes import SHARED_K8S_CONFIG
57
+ from dagster_cloud.workspace.config_schema.kubernetes import SHARED_K8S_CONFIG
59
58
 
60
59
 
61
60
  class K8sHandle(NamedTuple):
@@ -11,8 +11,7 @@ from dagster_k8s.models import k8s_model_from_dict
11
11
  from kubernetes import client
12
12
 
13
13
  from dagster_cloud.instance import DagsterCloudAgentInstance
14
-
15
- from ..user_code_launcher.utils import (
14
+ from dagster_cloud.workspace.user_code_launcher.utils import (
16
15
  deterministic_label_for_location,
17
16
  get_grpc_server_env,
18
17
  get_human_readable_label,
@@ -1,5 +1,7 @@
1
- from .process import ProcessUserCodeLauncher as ProcessUserCodeLauncher
2
- from .user_code_launcher import (
1
+ from dagster_cloud.workspace.user_code_launcher.process import (
2
+ ProcessUserCodeLauncher as ProcessUserCodeLauncher,
3
+ )
4
+ from dagster_cloud.workspace.user_code_launcher.user_code_launcher import (
3
5
  DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT as DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
4
6
  DEFAULT_SERVER_TTL_SECONDS as DEFAULT_SERVER_TTL_SECONDS,
5
7
  SHARED_USER_CODE_LAUNCHER_CONFIG as SHARED_USER_CODE_LAUNCHER_CONFIG,
@@ -8,7 +10,7 @@ from .user_code_launcher import (
8
10
  ServerEndpoint as ServerEndpoint,
9
11
  UserCodeLauncherEntry as UserCodeLauncherEntry,
10
12
  )
11
- from .utils import (
13
+ from dagster_cloud.workspace.user_code_launcher.utils import (
12
14
  deterministic_label_for_location as deterministic_label_for_location,
13
15
  get_human_readable_label as get_human_readable_label,
14
16
  unique_resource_name as unique_resource_name,
@@ -26,9 +26,7 @@ from dagster_cloud.api.dagster_cloud_api import UserCodeDeploymentType
26
26
  from dagster_cloud.execution.cloud_run_launcher.process import CloudProcessRunLauncher
27
27
  from dagster_cloud.execution.monitoring import CloudContainerResourceLimits
28
28
  from dagster_cloud.pex.grpc import MultiPexGrpcClient
29
- from dagster_cloud.workspace.user_code_launcher.utils import get_grpc_server_env
30
-
31
- from .user_code_launcher import (
29
+ from dagster_cloud.workspace.user_code_launcher.user_code_launcher import (
32
30
  DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT,
33
31
  SHARED_USER_CODE_LAUNCHER_CONFIG,
34
32
  DagsterCloudGrpcServer,
@@ -36,6 +34,7 @@ from .user_code_launcher import (
36
34
  ServerEndpoint,
37
35
  UserCodeLauncherEntry,
38
36
  )
37
+ from dagster_cloud.workspace.user_code_launcher.utils import get_grpc_server_env
39
38
 
40
39
  CLEANUP_ZOMBIE_PROCESSES_INTERVAL = 5
41
40
 
@@ -27,13 +27,13 @@ from dagster._core.definitions.selector import JobSelector
27
27
  from dagster._core.errors import DagsterUserCodeUnreachableError
28
28
  from dagster._core.instance import MayHaveInstanceWeakref
29
29
  from dagster._core.launcher import RunLauncher
30
- from dagster._core.remote_representation import RemoteRepositoryOrigin
31
- from dagster._core.remote_representation.external_data import (
32
- extract_serialized_job_snap_from_serialized_job_data_snap,
33
- )
34
- from dagster._core.remote_representation.origin import (
30
+ from dagster._core.remote_origin import (
35
31
  CodeLocationOrigin,
36
32
  RegisteredCodeLocationOrigin,
33
+ RemoteRepositoryOrigin,
34
+ )
35
+ from dagster._core.remote_representation.external_data import (
36
+ extract_serialized_job_snap_from_serialized_job_data_snap,
37
37
  )
38
38
  from dagster._grpc.client import DagsterGrpcClient
39
39
  from dagster._grpc.types import GetCurrentImageResult, ListRepositoriesResponse
@@ -45,7 +45,11 @@ from dagster._serdes import (
45
45
  whitelist_for_serdes,
46
46
  )
47
47
  from dagster._time import get_current_timestamp
48
- from dagster._utils.error import SerializableErrorInfo, serializable_error_info_from_exc_info
48
+ from dagster._utils.error import (
49
+ SerializableErrorInfo,
50
+ serializable_error_info_from_exc_info,
51
+ truncate_serialized_error,
52
+ )
49
53
  from dagster._utils.merger import merge_dicts
50
54
  from dagster._utils.typed_dict import init_optional_typeddict
51
55
  from dagster_cloud_cli.core.errors import raise_http_error
@@ -91,7 +95,6 @@ from dagster_cloud.pex.grpc.types import (
91
95
  ShutdownPexServerArgs,
92
96
  )
93
97
  from dagster_cloud.util import diff_serializable_namedtuple_map
94
- from dagster_cloud.util.errors import truncate_serialized_error
95
98
 
96
99
  DEFAULT_SERVER_PROCESS_STARTUP_TIMEOUT = 180
97
100
  DEFAULT_MAX_TTL_SERVERS = 25
@@ -398,6 +401,7 @@ class DagsterCloudUserCodeLauncher(
398
401
  self._run_worker_statuses_dict: dict[str, list[CloudRunWorkerStatus]] = {}
399
402
  self._run_worker_monitoring_lock = threading.Lock()
400
403
 
404
+ self._in_progress_reconcile_start_time = time.time()
401
405
  self._reconcile_count = 0
402
406
  self._reconcile_grpc_metadata_shutdown_event = threading.Event()
403
407
  self._reconcile_grpc_metadata_thread = None
@@ -641,6 +645,7 @@ class DagsterCloudUserCodeLauncher(
641
645
  response = self._instance.requests_managed_retries_session.put(
642
646
  url=upload_data.presigned_put_url,
643
647
  data=file,
648
+ timeout=self._instance.dagster_cloud_api_timeout,
644
649
  )
645
650
  raise_http_error(response)
646
651
 
@@ -884,23 +889,32 @@ class DagsterCloudUserCodeLauncher(
884
889
  repository_name,
885
890
  code_pointer,
886
891
  ) in list_repositories_response.repository_code_pointer_dict.items():
887
- external_repository_chunks = [
888
- chunk
889
- async for chunk in client.gen_streaming_external_repository(
892
+ if os.getenv("DAGSTER_CLOUD_USE_STREAMING_EXTERNAL_REPOSITORY"):
893
+ external_repository_chunks = [
894
+ chunk
895
+ async for chunk in client.gen_streaming_external_repository(
896
+ remote_repository_origin=RemoteRepositoryOrigin(
897
+ location_origin,
898
+ repository_name,
899
+ ),
900
+ defer_snapshots=True,
901
+ )
902
+ ]
903
+
904
+ serialized_repository_data = "".join(
905
+ [
906
+ chunk["serialized_external_repository_chunk"]
907
+ for chunk in external_repository_chunks
908
+ ]
909
+ )
910
+ else:
911
+ serialized_repository_data = await client.gen_external_repository(
890
912
  remote_repository_origin=RemoteRepositoryOrigin(
891
913
  location_origin,
892
914
  repository_name,
893
915
  ),
894
916
  defer_snapshots=True,
895
917
  )
896
- ]
897
-
898
- serialized_repository_data = "".join(
899
- [
900
- chunk["serialized_external_repository_chunk"]
901
- for chunk in external_repository_chunks
902
- ]
903
- )
904
918
 
905
919
  # Don't deserialize in case there are breaking changes - let the server do it
906
920
  upload_repo_datas.append(
@@ -1476,6 +1490,8 @@ class DagsterCloudUserCodeLauncher(
1476
1490
  self._logger.exception("Failed to refresh actual entries.")
1477
1491
  self._last_refreshed_actual_entries = now
1478
1492
 
1493
+ self._in_progress_reconcile_start_time = time.time()
1494
+
1479
1495
  self._reconcile(
1480
1496
  desired_entries,
1481
1497
  upload_locations,
@@ -1492,6 +1508,7 @@ class DagsterCloudUserCodeLauncher(
1492
1508
  f"Started polling for requests from {self._instance.dagster_cloud_url}"
1493
1509
  )
1494
1510
 
1511
+ self._in_progress_reconcile_start_time = None
1495
1512
  self._reconcile_count += 1
1496
1513
 
1497
1514
  def _update_metrics_thread(self, shutdown_event):
@@ -1516,9 +1533,29 @@ class DagsterCloudUserCodeLauncher(
1516
1533
  # thread-safe since reconcile_count is an integer
1517
1534
  return self._reconcile_count > 0
1518
1535
 
1519
- def _make_check_on_running_server_endpoint(self, server_endpoint: ServerEndpoint):
1520
- # Ensure that server_endpoint is bound correctly
1521
- return lambda: server_endpoint.create_client().ping("")
1536
+ @property
1537
+ def in_progress_reconcile_start_time(self) -> Optional[float]:
1538
+ return self._in_progress_reconcile_start_time
1539
+
1540
+ def _make_check_on_running_server_endpoint(
1541
+ self, server_endpoint: ServerEndpoint
1542
+ ) -> Callable[[], Union[ListRepositoriesResponse, SerializableErrorInfo]]:
1543
+ return lambda: deserialize_value(
1544
+ server_endpoint.create_client().list_repositories(),
1545
+ (ListRepositoriesResponse, SerializableErrorInfo),
1546
+ )
1547
+
1548
+ def _trigger_recovery_server_restart(self, deployment_location: DeploymentAndLocation):
1549
+ del self._actual_entries[deployment_location]
1550
+
1551
+ if deployment_location in self._first_unavailable_times:
1552
+ del self._first_unavailable_times[deployment_location]
1553
+
1554
+ # redeploy the multipex server in this case as well to ensure a fresh start
1555
+ # if it resource contrained (and ensure that we don't try to create the same
1556
+ # PexServerHandle again and delete the code location in a loop)
1557
+ if deployment_location in self._multipex_servers:
1558
+ del self._multipex_servers[deployment_location]
1522
1559
 
1523
1560
  def _refresh_actual_entries(self) -> None:
1524
1561
  for deployment_location, multipex_server in self._multipex_servers.items():
@@ -1549,11 +1586,11 @@ class DagsterCloudUserCodeLauncher(
1549
1586
  if isinstance(grpc_server_or_error, DagsterCloudGrpcServer):
1550
1587
  self._logger.warning(
1551
1588
  "Pex servers disappeared for running code location %s:%s. Removing actual entries to"
1552
- " activate reconciliation logic.",
1589
+ " activate reconciliation logic and deploy a new code server and multipex server.",
1553
1590
  deployment_name,
1554
1591
  location_name,
1555
1592
  )
1556
- del self._actual_entries[deployment_location]
1593
+ self._trigger_recovery_server_restart(deployment_location)
1557
1594
 
1558
1595
  # Check to see if any servers have become unresponsive
1559
1596
  unavailable_server_timeout = int(
@@ -1596,15 +1633,22 @@ class DagsterCloudUserCodeLauncher(
1596
1633
 
1597
1634
  deployment_name, location_name = deployment_location
1598
1635
  try:
1599
- future.result()
1600
-
1636
+ response_or_error = future.result()
1601
1637
  # Successful ping resets the tracked last unavailable time for this code server, if set
1602
1638
  self._first_unavailable_times.pop(deployment_location, None)
1639
+ if isinstance(response_or_error, SerializableErrorInfo):
1640
+ # This can happen if the server was previously healthy but restarted
1641
+ # and moved into an error state - attempt to recover
1642
+ self._logger.exception(
1643
+ f"Code server for {deployment_name}:{location_name} unexpectedly moved into an error state. Deploying a new code server. Observed error: \n{response_or_error.to_string()}"
1644
+ )
1645
+ self._trigger_recovery_server_restart(deployment_location)
1603
1646
  except Exception as e:
1604
1647
  if (
1605
1648
  isinstance(e, DagsterUserCodeUnreachableError)
1606
1649
  and isinstance(e.__cause__, grpc.RpcError)
1607
- and cast("grpc.RpcError", e.__cause__).code() == grpc.StatusCode.UNAVAILABLE
1650
+ and cast("grpc.RpcError", e.__cause__).code()
1651
+ in {grpc.StatusCode.UNAVAILABLE, grpc.StatusCode.UNKNOWN}
1608
1652
  ):
1609
1653
  first_unavailable_time = self._first_unavailable_times.get(
1610
1654
  deployment_location
@@ -1622,14 +1666,7 @@ class DagsterCloudUserCodeLauncher(
1622
1666
  self._logger.warning(
1623
1667
  f"Code server for {deployment_name}:{location_name} has been unresponsive for more than {unavailable_server_timeout} seconds. Deploying a new code server."
1624
1668
  )
1625
- del self._actual_entries[deployment_location]
1626
- del self._first_unavailable_times[deployment_location]
1627
-
1628
- # redeploy the multipex server in this case as well to ensure a fresh start
1629
- # (and ensure that we don't try to create the same PexServerHandle again and
1630
- # delete the code location in a loop)
1631
- if deployment_location in self._multipex_servers:
1632
- del self._multipex_servers[deployment_location]
1669
+ self._trigger_recovery_server_restart(deployment_location)
1633
1670
 
1634
1671
  else:
1635
1672
  self._logger.exception(
@@ -125,4 +125,11 @@ def get_grpc_server_env(
125
125
  if code_location_deploy_data.executable_path
126
126
  else {}
127
127
  ),
128
+ **(
129
+ {
130
+ "DAGSTER_CLI_API_GRPC_AUTOLOAD_DEFS_MODULE_NAME": code_location_deploy_data.autoload_defs_module_name
131
+ }
132
+ if code_location_deploy_data.autoload_defs_module_name
133
+ else {}
134
+ ),
128
135
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dagster-cloud
3
- Version: 1.10.11
3
+ Version: 1.12.6
4
4
  Author-email: Elementl <support@elementl.com>
5
5
  License: Apache-2.0
6
6
  Project-URL: Homepage, https://dagster.io/cloud
@@ -26,15 +26,16 @@ Classifier: License :: OSI Approved :: Apache Software License
26
26
  Classifier: Topic :: System :: Monitoring
27
27
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
28
28
  Classifier: Operating System :: OS Independent
29
- Requires-Python: <3.13,>=3.9
29
+ Requires-Python: <3.14,>=3.9
30
30
  Description-Content-Type: text/markdown
31
- Requires-Dist: dagster==1.10.11
32
- Requires-Dist: dagster-cloud-cli==1.10.11
31
+ Requires-Dist: dagster==1.12.6
32
+ Requires-Dist: dagster-shared==1.12.6
33
+ Requires-Dist: dagster-cloud-cli==1.12.6
33
34
  Requires-Dist: opentelemetry-api<2,>=1.27.0
34
35
  Requires-Dist: opentelemetry-sdk<2,>=1.27.0
35
36
  Requires-Dist: opentelemetry-exporter-otlp-proto-grpc<2,>=1.27.0
36
37
  Requires-Dist: opentelemetry-exporter-otlp-proto-http<2,>=1.27.0
37
- Requires-Dist: pex<3,>=2.1.132
38
+ Requires-Dist: pex<2.60.0,>=2.1.132
38
39
  Requires-Dist: questionary
39
40
  Requires-Dist: requests
40
41
  Requires-Dist: typer
@@ -64,12 +65,12 @@ Provides-Extra: insights
64
65
  Requires-Dist: pyarrow; extra == "insights"
65
66
  Provides-Extra: docker
66
67
  Requires-Dist: docker; extra == "docker"
67
- Requires-Dist: dagster-docker==0.26.11; extra == "docker"
68
+ Requires-Dist: dagster-docker==0.28.6; extra == "docker"
68
69
  Provides-Extra: kubernetes
69
70
  Requires-Dist: kubernetes; extra == "kubernetes"
70
- Requires-Dist: dagster-k8s==0.26.11; extra == "kubernetes"
71
+ Requires-Dist: dagster-k8s==0.28.6; extra == "kubernetes"
71
72
  Provides-Extra: ecs
72
- Requires-Dist: dagster-aws==0.26.11; extra == "ecs"
73
+ Requires-Dist: dagster-aws==0.28.6; extra == "ecs"
73
74
  Requires-Dist: boto3; extra == "ecs"
74
75
  Provides-Extra: sandbox
75
76
  Requires-Dist: supervisor; extra == "sandbox"