dagster-cloud 1.8.2__py3-none-any.whl → 1.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. dagster_cloud/__init__.py +3 -3
  2. dagster_cloud/agent/__init__.py +4 -4
  3. dagster_cloud/agent/cli/__init__.py +56 -17
  4. dagster_cloud/agent/dagster_cloud_agent.py +360 -172
  5. dagster_cloud/agent/instrumentation/__init__.py +0 -0
  6. dagster_cloud/agent/instrumentation/constants.py +2 -0
  7. dagster_cloud/agent/instrumentation/run_launch.py +23 -0
  8. dagster_cloud/agent/instrumentation/schedule.py +34 -0
  9. dagster_cloud/agent/instrumentation/sensor.py +34 -0
  10. dagster_cloud/anomaly_detection/__init__.py +2 -2
  11. dagster_cloud/anomaly_detection/defs.py +17 -12
  12. dagster_cloud/anomaly_detection/types.py +3 -3
  13. dagster_cloud/api/dagster_cloud_api.py +209 -293
  14. dagster_cloud/auth/constants.py +21 -5
  15. dagster_cloud/batching/__init__.py +1 -0
  16. dagster_cloud/batching/batcher.py +210 -0
  17. dagster_cloud/dagster_insights/__init__.py +12 -6
  18. dagster_cloud/dagster_insights/bigquery/bigquery_utils.py +3 -2
  19. dagster_cloud/dagster_insights/bigquery/dbt_wrapper.py +39 -12
  20. dagster_cloud/dagster_insights/bigquery/insights_bigquery_resource.py +8 -6
  21. dagster_cloud/dagster_insights/insights_utils.py +18 -8
  22. dagster_cloud/dagster_insights/metrics_utils.py +12 -12
  23. dagster_cloud/dagster_insights/snowflake/dagster_snowflake_insights.py +5 -12
  24. dagster_cloud/dagster_insights/snowflake/dbt_wrapper.py +34 -8
  25. dagster_cloud/dagster_insights/snowflake/definitions.py +38 -12
  26. dagster_cloud/dagster_insights/snowflake/insights_snowflake_resource.py +11 -23
  27. dagster_cloud/definitions/__init__.py +0 -0
  28. dagster_cloud/definitions/job_selection.py +36 -0
  29. dagster_cloud/execution/cloud_run_launcher/k8s.py +1 -1
  30. dagster_cloud/execution/cloud_run_launcher/process.py +3 -3
  31. dagster_cloud/execution/monitoring/__init__.py +27 -33
  32. dagster_cloud/execution/utils/process.py +3 -3
  33. dagster_cloud/instance/__init__.py +125 -38
  34. dagster_cloud/instrumentation/__init__.py +32 -0
  35. dagster_cloud/metadata/source_code.py +13 -8
  36. dagster_cloud/metrics/__init__.py +0 -0
  37. dagster_cloud/metrics/tracer.py +59 -0
  38. dagster_cloud/opentelemetry/__init__.py +0 -0
  39. dagster_cloud/opentelemetry/config/__init__.py +73 -0
  40. dagster_cloud/opentelemetry/config/exporter.py +81 -0
  41. dagster_cloud/opentelemetry/config/log_record_processor.py +40 -0
  42. dagster_cloud/opentelemetry/config/logging_handler.py +14 -0
  43. dagster_cloud/opentelemetry/config/meter_provider.py +9 -0
  44. dagster_cloud/opentelemetry/config/metric_reader.py +39 -0
  45. dagster_cloud/opentelemetry/controller.py +319 -0
  46. dagster_cloud/opentelemetry/enum.py +58 -0
  47. dagster_cloud/opentelemetry/factories/__init__.py +1 -0
  48. dagster_cloud/opentelemetry/factories/logs.py +113 -0
  49. dagster_cloud/opentelemetry/factories/metrics.py +121 -0
  50. dagster_cloud/opentelemetry/metrics/__init__.py +0 -0
  51. dagster_cloud/opentelemetry/metrics/meter.py +140 -0
  52. dagster_cloud/opentelemetry/observers/__init__.py +0 -0
  53. dagster_cloud/opentelemetry/observers/dagster_exception_handler.py +40 -0
  54. dagster_cloud/opentelemetry/observers/execution_observer.py +178 -0
  55. dagster_cloud/pex/grpc/__generated__/multi_pex_api_pb2.pyi +175 -0
  56. dagster_cloud/pex/grpc/__init__.py +2 -2
  57. dagster_cloud/pex/grpc/client.py +4 -4
  58. dagster_cloud/pex/grpc/compile.py +2 -2
  59. dagster_cloud/pex/grpc/server/__init__.py +2 -2
  60. dagster_cloud/pex/grpc/server/cli/__init__.py +31 -19
  61. dagster_cloud/pex/grpc/server/manager.py +60 -42
  62. dagster_cloud/pex/grpc/server/registry.py +28 -21
  63. dagster_cloud/pex/grpc/server/server.py +23 -14
  64. dagster_cloud/pex/grpc/types.py +5 -5
  65. dagster_cloud/py.typed +0 -0
  66. dagster_cloud/secrets/__init__.py +1 -1
  67. dagster_cloud/secrets/loader.py +3 -3
  68. dagster_cloud/serverless/__init__.py +1 -1
  69. dagster_cloud/serverless/io_manager.py +36 -53
  70. dagster_cloud/storage/client.py +54 -17
  71. dagster_cloud/storage/compute_logs/__init__.py +3 -1
  72. dagster_cloud/storage/compute_logs/compute_log_manager.py +22 -17
  73. dagster_cloud/storage/defs_state/__init__.py +3 -0
  74. dagster_cloud/storage/defs_state/queries.py +15 -0
  75. dagster_cloud/storage/defs_state/storage.py +113 -0
  76. dagster_cloud/storage/event_logs/__init__.py +3 -1
  77. dagster_cloud/storage/event_logs/queries.py +102 -4
  78. dagster_cloud/storage/event_logs/storage.py +266 -73
  79. dagster_cloud/storage/event_logs/utils.py +88 -7
  80. dagster_cloud/storage/runs/__init__.py +1 -1
  81. dagster_cloud/storage/runs/queries.py +17 -2
  82. dagster_cloud/storage/runs/storage.py +88 -42
  83. dagster_cloud/storage/schedules/__init__.py +1 -1
  84. dagster_cloud/storage/schedules/storage.py +6 -8
  85. dagster_cloud/storage/tags.py +66 -1
  86. dagster_cloud/util/__init__.py +10 -12
  87. dagster_cloud/util/errors.py +49 -64
  88. dagster_cloud/version.py +1 -1
  89. dagster_cloud/workspace/config_schema/__init__.py +55 -13
  90. dagster_cloud/workspace/docker/__init__.py +76 -25
  91. dagster_cloud/workspace/docker/utils.py +1 -1
  92. dagster_cloud/workspace/ecs/__init__.py +1 -1
  93. dagster_cloud/workspace/ecs/client.py +51 -33
  94. dagster_cloud/workspace/ecs/launcher.py +76 -22
  95. dagster_cloud/workspace/ecs/run_launcher.py +3 -3
  96. dagster_cloud/workspace/ecs/utils.py +14 -5
  97. dagster_cloud/workspace/kubernetes/__init__.py +1 -1
  98. dagster_cloud/workspace/kubernetes/launcher.py +61 -29
  99. dagster_cloud/workspace/kubernetes/utils.py +34 -22
  100. dagster_cloud/workspace/user_code_launcher/__init__.py +5 -3
  101. dagster_cloud/workspace/user_code_launcher/process.py +16 -14
  102. dagster_cloud/workspace/user_code_launcher/user_code_launcher.py +552 -172
  103. dagster_cloud/workspace/user_code_launcher/utils.py +105 -1
  104. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/METADATA +48 -42
  105. dagster_cloud-1.12.6.dist-info/RECORD +134 -0
  106. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/WHEEL +1 -1
  107. dagster_cloud-1.8.2.dist-info/RECORD +0 -100
  108. {dagster_cloud-1.8.2.dist-info → dagster_cloud-1.12.6.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,4 @@
1
+ import uuid
1
2
  from typing import Optional
2
3
 
3
4
  from dagster._core.errors import DagsterInvariantViolationError
@@ -19,12 +20,27 @@ def get_organization_public_id_from_api_token(api_token: str) -> Optional[str]:
19
20
  return split_token[2]
20
21
 
21
22
 
22
- def get_organization_name_from_agent_token(agent_token: str) -> Optional[str]:
23
+ def decode_region_from_uuid(regional_token: str) -> Optional[str]:
24
+ try:
25
+ regional_uuid = uuid.UUID(regional_token)
26
+ except ValueError:
27
+ # if it's not an actual uuid, we can't decode region
28
+ return None
29
+
30
+ # custom uuids contain region subdomains in the first 2 bytes
31
+ if regional_uuid.version != 8 or regional_uuid.variant != uuid.RFC_4122:
32
+ return None
33
+
34
+ uuid_bytes = regional_uuid.bytes
35
+ return uuid_bytes[:2].decode("ascii")
36
+
37
+
38
+ def decode_agent_token(agent_token: str) -> tuple[Optional[str], Optional[str]]:
23
39
  split_token = agent_token.split(":")
24
40
 
25
41
  # Legacy agent token format - organization must be specified in dagster.yaml
26
42
  if len(split_token) == 1:
27
- return None
43
+ return None, None
28
44
 
29
45
  token_type, *token = split_token
30
46
 
@@ -35,6 +51,6 @@ def get_organization_name_from_agent_token(agent_token: str) -> Optional[str]:
35
51
  "Generate a new agent token in Dagster Cloud."
36
52
  )
37
53
 
38
- organization, _identifier = token
39
-
40
- return organization
54
+ # token format: agent:<org>:<uuid>
55
+ organization, uuid_str = token
56
+ return organization, decode_region_from_uuid(uuid_str)
@@ -0,0 +1 @@
1
+ from dagster_cloud.batching.batcher import Batcher as Batcher
@@ -0,0 +1,210 @@
1
+ import logging
2
+ import os
3
+ from collections.abc import Generator
4
+ from concurrent.futures import Future, TimeoutError
5
+ from contextlib import contextmanager
6
+ from queue import Empty, Full, Queue
7
+ from threading import Lock
8
+ from typing import Callable, Generic, Optional, TypeVar
9
+
10
+ import dagster._check as check
11
+
12
+ from dagster_cloud.instrumentation import Instrumentation, NoOpInstrumentation
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ I = TypeVar("I") # noqa: E741
17
+ O = TypeVar("O") # noqa: E741
18
+ QueueItem = tuple[I, Future[O]]
19
+
20
+
21
+ DEFAULT_MAX_WAIT_MS = 1000
22
+ DEFAULT_MAX_BATCH_SIZE = 100
23
+ DEFAULT_MAX_QUEUE_SIZE = 1000
24
+
25
+
26
+ def _get_override_for_name(setting: str, name: str) -> Optional[int]:
27
+ env_name = f"DAGSTER_BATCHING__{name.upper().replace('-', '_')}__{setting.upper()}"
28
+ value = os.getenv(env_name)
29
+ if value is None:
30
+ return None
31
+
32
+ try:
33
+ value_int = int(value)
34
+ if value_int <= 0:
35
+ logger.warning(
36
+ f"Environment variable misconfiguration for {env_name} (should be positive int, got: '{value}')"
37
+ )
38
+ return None
39
+ return value_int
40
+ except ValueError:
41
+ logger.warning(
42
+ f"Environment variable misconfiguration for {env_name} (should be positive int, got: '{value}')"
43
+ )
44
+ return None
45
+
46
+
47
+ def _get_config(
48
+ setting: str, name: str, passed_in_default: Optional[int], global_default: int
49
+ ) -> int:
50
+ override = _get_override_for_name(setting, name)
51
+ if override is not None:
52
+ return override
53
+
54
+ if passed_in_default is not None:
55
+ return passed_in_default
56
+
57
+ return global_default
58
+
59
+
60
+ class Batcher(Generic[I, O]):
61
+ """the basic algorithm is.
62
+
63
+ 1. insert (item, future) into queue
64
+ 2. wait for future to complete, with max timeout
65
+ 2a. if future completes, return result
66
+ 2b. on timeout, acquire lock, then drain the queue until
67
+ the future completes
68
+
69
+ NOTE: if the queue is full, submit() will raise an exception
70
+ NOTE: the lock means that only one thread will ever be running the batcher_fn
71
+ at a time. the algorithm would still be correct without the lock but
72
+ locking leads to larger batches. HOWEVER without the lock we might try
73
+ to submit empty batches, which there is currently an invariant to protect
74
+ against
75
+ NOTE: the max queue size is meant to cap the number of inflight requests
76
+ in order to fail faster if the underlying function is taking too long
77
+ (database issues).
78
+
79
+ Configuration for queue size, max wait, and batch size is specified (by priority order) by:
80
+
81
+ 1. an env var override (of the form DAGSTER_BATCHING__TEST__MAX_WAIT_MS -- see _get_override_for_name)
82
+ 2. the passed in value
83
+ 3. the default (specified in this file)
84
+ """
85
+
86
+ def __init__(
87
+ self,
88
+ name: str,
89
+ batcher_fn: Callable[[list[I]], list[O]],
90
+ max_queue_size: Optional[int] = None,
91
+ max_batch_size: Optional[int] = None,
92
+ max_wait_ms: Optional[int] = None,
93
+ instrumentation: Optional[Instrumentation] = None,
94
+ ) -> None:
95
+ check.invariant(
96
+ max_wait_ms is None or max_wait_ms > 0,
97
+ "max wait, if provided, must be set to a positive integer",
98
+ )
99
+ check.invariant(
100
+ max_queue_size is None or max_queue_size > 0,
101
+ "max queue size, if provided, must be set to a positive integer",
102
+ )
103
+ check.invariant(
104
+ max_batch_size is None or max_batch_size > 0,
105
+ "max batch size, if provided, must be set to a positive integer",
106
+ )
107
+ if max_queue_size and max_batch_size:
108
+ check.invariant(
109
+ max_batch_size <= max_queue_size,
110
+ "if max batch size and max queue size are provided, max batch size must be "
111
+ "less than or equal to max queue size",
112
+ )
113
+ self._name = name
114
+ self._batcher_fn = batcher_fn
115
+ self._max_batch_size = _get_config(
116
+ "max_batch_size", name, max_batch_size, DEFAULT_MAX_BATCH_SIZE
117
+ )
118
+ self._max_wait_ms: float = _get_config(
119
+ "max_wait_ms", name, max_wait_ms, DEFAULT_MAX_WAIT_MS
120
+ )
121
+ config_max_queue_size = _get_config(
122
+ "max_queue_size", name, max_queue_size, DEFAULT_MAX_QUEUE_SIZE
123
+ )
124
+ self._queue: Queue[QueueItem] = Queue(maxsize=config_max_queue_size)
125
+ self._drain_lock = Lock()
126
+ self._instrumentation = (instrumentation or NoOpInstrumentation()).tags([f"batcher:{name}"])
127
+
128
+ def _submit_batch(self, batch: list[QueueItem]) -> None:
129
+ check.invariant(len(batch) > 0, "should never submit an empty batch")
130
+ self._instrument_batch_size(len(batch))
131
+ try:
132
+ with self._time("batcher_fn"):
133
+ results = self._batcher_fn([i for i, _ in batch])
134
+ except Exception as e:
135
+ for _, fut in batch:
136
+ fut.set_exception(e)
137
+ else:
138
+ check.invariant(
139
+ len(results) == len(batch), "batcher returned fewer results than expected"
140
+ )
141
+ for (_, fut), result in zip(batch, results):
142
+ fut.set_result(result)
143
+
144
+ def _build_batch(self) -> list[QueueItem]:
145
+ batch = []
146
+ for _ in range(self._max_batch_size):
147
+ try:
148
+ batch.append(self._queue.get(block=False))
149
+ except Empty:
150
+ break
151
+ return batch
152
+
153
+ @contextmanager
154
+ def _lock(self) -> Generator[None, None, None]:
155
+ with self._time("lock_acquisition"):
156
+ self._drain_lock.acquire()
157
+ try:
158
+ yield
159
+ finally:
160
+ self._drain_lock.release()
161
+
162
+ def _drain_batch(self, fut: Future[O]) -> O:
163
+ with self._lock(), self._time("drain_batch"):
164
+ while not fut.done():
165
+ self._submit_batch(self._build_batch())
166
+ return fut.result()
167
+
168
+ def submit(self, i: I) -> O:
169
+ with self._time("submit"):
170
+ fut: Future[O] = Future()
171
+ try:
172
+ self._queue.put((i, fut), block=False)
173
+ except Full:
174
+ self._instrumentation.increment("dagster.batching.full")
175
+ logger.exception(f"Batching queue for batcher {self._name} is full!")
176
+ raise
177
+ else:
178
+ try:
179
+ queue_size = self._queue.qsize()
180
+ self._instrument_queue_size(queue_size)
181
+ timeout = 0 if queue_size >= self._max_batch_size else self._max_wait_ms / 1000
182
+ return fut.result(timeout=timeout)
183
+ except TimeoutError:
184
+ self._instrumentation.increment("dagster.batching.timeout")
185
+ self._drain_batch(fut)
186
+ return fut.result()
187
+
188
+ def _instrument_queue_size(self, queue_size: int) -> None:
189
+ self._instrumentation.histogram("dagster.batching.queue_size", queue_size)
190
+ for bucket in [5, 10, 100]:
191
+ if queue_size >= bucket:
192
+ self._instrumentation.increment(f"dagster.batching.queue_size.ge_{bucket}")
193
+ else:
194
+ break
195
+
196
+ def _instrument_batch_size(self, batch_size: int) -> None:
197
+ self._instrumentation.histogram("dagster.batching.batch_size", batch_size)
198
+ for bucket in [5, 10, 100]:
199
+ if batch_size >= bucket:
200
+ self._instrumentation.increment(f"dagster.batching.batch_size.ge_{bucket}")
201
+ else:
202
+ break
203
+
204
+ @contextmanager
205
+ def _time(self, metric_name: str) -> Generator[None, None, None]:
206
+ with self._instrumentation.instrument_context(
207
+ f"dagster.batching.{metric_name}",
208
+ buckets_ms=[10, 100, 500, 1000],
209
+ ):
210
+ yield
@@ -1,15 +1,19 @@
1
1
  import sys
2
2
  from typing import Any
3
3
 
4
- from .snowflake.dbt_wrapper import dbt_with_snowflake_insights as dbt_with_snowflake_insights
5
- from .snowflake.definitions import (
4
+ from dagster_cloud.dagster_insights.snowflake.dbt_wrapper import (
5
+ dbt_with_snowflake_insights as dbt_with_snowflake_insights,
6
+ )
7
+ from dagster_cloud.dagster_insights.snowflake.definitions import (
6
8
  create_snowflake_insights_asset_and_schedule as create_snowflake_insights_asset_and_schedule,
7
9
  )
8
- from .snowflake.snowflake_utils import meter_snowflake_query as meter_snowflake_query
10
+ from dagster_cloud.dagster_insights.snowflake.snowflake_utils import (
11
+ meter_snowflake_query as meter_snowflake_query,
12
+ )
9
13
 
10
14
  dagster_snowflake_req_imports = {"InsightsSnowflakeResource"}
11
15
  try:
12
- from .snowflake.insights_snowflake_resource import (
16
+ from dagster_cloud.dagster_insights.snowflake.insights_snowflake_resource import (
13
17
  InsightsSnowflakeResource as InsightsSnowflakeResource,
14
18
  )
15
19
  except ImportError:
@@ -17,8 +21,10 @@ except ImportError:
17
21
 
18
22
  dagster_bigquery_req_imports = {"InsightsBigQueryResource", "dbt_with_bigquery_insights"}
19
23
  try:
20
- from .bigquery.dbt_wrapper import dbt_with_bigquery_insights as dbt_with_bigquery_insights
21
- from .bigquery.insights_bigquery_resource import (
24
+ from dagster_cloud.dagster_insights.bigquery.dbt_wrapper import (
25
+ dbt_with_bigquery_insights as dbt_with_bigquery_insights,
26
+ )
27
+ from dagster_cloud.dagster_insights.bigquery.insights_bigquery_resource import (
22
28
  InsightsBigQueryResource as InsightsBigQueryResource,
23
29
  )
24
30
  except ImportError:
@@ -1,4 +1,5 @@
1
- from typing import Any, List, Mapping, Optional
1
+ from collections.abc import Mapping
2
+ from typing import Any, Optional
2
3
 
3
4
  from dagster import AssetKey, JobDefinition
4
5
 
@@ -15,7 +16,7 @@ def marker_asset_key_for_job(
15
16
 
16
17
 
17
18
  def build_bigquery_cost_metadata(
18
- job_ids: Optional[List[str]], bytes_billed: int, slots_ms: int
19
+ job_ids: Optional[list[str]], bytes_billed: int, slots_ms: int
19
20
  ) -> Mapping[str, Any]:
20
21
  metadata: Mapping[str, Any] = {
21
22
  BIGQUERY_METADATA_BYTES_BILLED: bytes_billed,
@@ -1,9 +1,11 @@
1
1
  from collections import defaultdict
2
+ from collections.abc import Iterable, Iterator
2
3
  from dataclasses import dataclass
3
- from typing import TYPE_CHECKING, Iterable, Iterator, Optional, Union
4
+ from typing import TYPE_CHECKING, Optional, Union
4
5
 
5
6
  import yaml
6
7
  from dagster import (
8
+ AssetCheckEvaluation,
7
9
  AssetCheckResult,
8
10
  AssetExecutionContext,
9
11
  AssetKey,
@@ -16,8 +18,14 @@ from dagster_dbt import DbtCliInvocation
16
18
  from dagster_dbt.version import __version__ as dagster_dbt_version
17
19
  from packaging import version
18
20
 
19
- from ..insights_utils import extract_asset_info_from_event, handle_raise_on_error
20
- from .bigquery_utils import build_bigquery_cost_metadata, marker_asset_key_for_job
21
+ from dagster_cloud.dagster_insights.bigquery.bigquery_utils import (
22
+ build_bigquery_cost_metadata,
23
+ marker_asset_key_for_job,
24
+ )
25
+ from dagster_cloud.dagster_insights.insights_utils import (
26
+ extract_asset_info_from_event,
27
+ handle_raise_on_error,
28
+ )
21
29
 
22
30
  if TYPE_CHECKING:
23
31
  from dbt.adapters.base.impl import BaseAdapter
@@ -52,11 +60,21 @@ def dbt_with_bigquery_insights(
52
60
  context: Union[OpExecutionContext, AssetExecutionContext],
53
61
  dbt_cli_invocation: DbtCliInvocation,
54
62
  dagster_events: Optional[
55
- Iterable[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult]]
63
+ Iterable[
64
+ Union[
65
+ Output,
66
+ AssetMaterialization,
67
+ AssetObservation,
68
+ AssetCheckResult,
69
+ AssetCheckEvaluation,
70
+ ]
71
+ ]
56
72
  ] = None,
57
73
  skip_config_check=False,
58
74
  record_observation_usage: bool = True,
59
- ) -> Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult]]:
75
+ ) -> Iterator[
76
+ Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]
77
+ ]:
60
78
  """Wraps a dagster-dbt invocation to associate each BigQuery query with the produced
61
79
  asset materializations. This allows the cost of each query to be associated with the asset
62
80
  materialization that it produced.
@@ -67,7 +85,7 @@ def dbt_with_bigquery_insights(
67
85
  Args:
68
86
  context (AssetExecutionContext): The context of the asset that is being materialized.
69
87
  dbt_cli_invocation (DbtCliInvocation): The invocation of the dbt CLI to wrap.
70
- dagster_events (Optional[Iterable[Union[Output, AssetObservation, AssetCheckResult]]]):
88
+ dagster_events (Optional[Iterable[Union[Output, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]]):
71
89
  The events that were produced by the dbt CLI invocation. If not provided, it is assumed
72
90
  that the dbt CLI invocation has not yet been run, and it will be run and the events
73
91
  will be streamed.
@@ -116,7 +134,14 @@ def dbt_with_bigquery_insights(
116
134
  asset_info_by_unique_id = {}
117
135
  for dagster_event in dagster_events:
118
136
  if isinstance(
119
- dagster_event, (AssetMaterialization, AssetObservation, Output, AssetCheckResult)
137
+ dagster_event,
138
+ (
139
+ AssetMaterialization,
140
+ AssetObservation,
141
+ Output,
142
+ AssetCheckResult,
143
+ AssetCheckEvaluation,
144
+ ),
120
145
  ):
121
146
  unique_id = dagster_event.metadata["unique_id"].value
122
147
  asset_key, partition = extract_asset_info_from_event(
@@ -133,7 +158,7 @@ def dbt_with_bigquery_insights(
133
158
  invocation_id = run_results_json["metadata"]["invocation_id"]
134
159
 
135
160
  # backcompat-proof in case the invocation does not have an instantiated adapter on it
136
- adapter: Optional["BaseAdapter"] = getattr(dbt_cli_invocation, "adapter", None)
161
+ adapter: Optional[BaseAdapter] = getattr(dbt_cli_invocation, "adapter", None)
137
162
  if not adapter:
138
163
  if version.parse(dagster_dbt_version) < version.parse(MIN_DAGSTER_DBT_VERSION):
139
164
  upgrade_message = f" Extracting cost information requires dagster_dbt>={MIN_DAGSTER_DBT_VERSION} (found {dagster_dbt_version}). "
@@ -149,14 +174,16 @@ def dbt_with_bigquery_insights(
149
174
  cost_by_asset = defaultdict(list)
150
175
  try:
151
176
  with adapter.connection_named("dagster_insights:bigquery_cost"):
152
- client: "bigquery.Client" = adapter.connections.get_thread_connection().handle
153
- if client.location and client.project:
177
+ client: bigquery.Client = adapter.connections.get_thread_connection().handle # pyright: ignore[reportAssignmentType]
178
+
179
+ if (client.location or adapter.config.credentials.location) and client.project:
154
180
  # we should populate the location/project from the client, and use that to determine
155
181
  # the correct INFORMATION_SCHEMA.JOBS table to query for cost information
156
- location = client.location
182
+ # If the client doesn't have a location, fall back to the location provided
183
+ # in the dbt profile config
184
+ location = client.location or adapter.config.credentials.location
157
185
  project = client.project
158
186
  else:
159
- # try fetching the default dataset from the schema, if it exists
160
187
  dataset = client.get_dataset(adapter.config.credentials.schema)
161
188
  location = dataset.location if dataset else None
162
189
  project = client.project or dataset.project
@@ -1,16 +1,18 @@
1
+ from collections.abc import Iterator
1
2
  from contextlib import contextmanager, nullcontext
2
- from typing import Iterator, List
3
3
 
4
4
  from dagster import AssetObservation
5
- from dagster._annotations import experimental
5
+ from dagster._annotations import beta
6
6
  from dagster_gcp import BigQueryResource
7
7
  from dagster_gcp.bigquery.utils import setup_gcp_creds
8
8
  from google.cloud import bigquery
9
9
 
10
+ from dagster_cloud.dagster_insights.bigquery.bigquery_utils import (
11
+ build_bigquery_cost_metadata,
12
+ marker_asset_key_for_job,
13
+ )
10
14
  from dagster_cloud.dagster_insights.insights_utils import get_current_context_and_asset_key
11
15
 
12
- from .bigquery_utils import build_bigquery_cost_metadata, marker_asset_key_for_job
13
-
14
16
  OUTPUT_NON_ASSET_SIGIL = "__bigquery_query_metadata_"
15
17
 
16
18
 
@@ -29,7 +31,7 @@ class WrappedBigQueryClient(bigquery.Client):
29
31
  return bq_job
30
32
 
31
33
  @property
32
- def job_ids(self) -> List[str]:
34
+ def job_ids(self) -> list[str]:
33
35
  return self._job_ids
34
36
 
35
37
  @property
@@ -41,7 +43,7 @@ class WrappedBigQueryClient(bigquery.Client):
41
43
  return sum([x for x in self._query_slots_ms])
42
44
 
43
45
 
44
- @experimental
46
+ @beta
45
47
  class InsightsBigQueryResource(BigQueryResource):
46
48
  """A wrapper around :py:class:`BigQueryResource` which automatically collects metadata about
47
49
  BigQuery costs which can be attributed to Dagster jobs and assets.
@@ -1,8 +1,9 @@
1
1
  from dataclasses import replace
2
- from typing import Optional, Tuple, Union
2
+ from typing import Optional, Union
3
3
 
4
4
  import dagster._check as check
5
5
  from dagster import (
6
+ AssetCheckEvaluation,
6
7
  AssetCheckResult,
7
8
  AssetExecutionContext,
8
9
  AssetKey,
@@ -15,9 +16,9 @@ from dagster import (
15
16
  from dagster._core.errors import DagsterInvalidPropertyError
16
17
 
17
18
 
18
- def get_current_context_and_asset_key() -> (
19
- Tuple[Union[OpExecutionContext, AssetExecutionContext], Optional[AssetKey]]
20
- ):
19
+ def get_current_context_and_asset_key() -> tuple[
20
+ Union[OpExecutionContext, AssetExecutionContext], Optional[AssetKey]
21
+ ]:
21
22
  asset_key = None
22
23
  try:
23
24
  context = AssetExecutionContext.get()
@@ -32,7 +33,7 @@ def get_current_context_and_asset_key() -> (
32
33
  def get_asset_key_for_output(
33
34
  context: Union[OpExecutionContext, AssetExecutionContext], output_name: str
34
35
  ) -> Optional[AssetKey]:
35
- asset_key = context.job_def.asset_layer.asset_key_for_output(
36
+ asset_key = context.job_def.asset_layer.get_asset_key_for_node_output(
36
37
  node_handle=context.op_handle, output_name=output_name
37
38
  )
38
39
  if asset_key is None:
@@ -40,15 +41,24 @@ def get_asset_key_for_output(
40
41
  return asset_key
41
42
 
42
43
 
43
- def extract_asset_info_from_event(context, dagster_event, record_observation_usage):
44
+ def extract_asset_info_from_event(
45
+ context,
46
+ dagster_event: Union[
47
+ Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation
48
+ ],
49
+ record_observation_usage,
50
+ ):
44
51
  if isinstance(dagster_event, AssetMaterialization):
45
52
  return dagster_event.asset_key, dagster_event.partition
46
53
 
47
- if isinstance(dagster_event, (AssetCheckResult, AssetObservation)) and record_observation_usage:
54
+ if (
55
+ isinstance(dagster_event, (AssetCheckResult, AssetObservation, AssetCheckEvaluation))
56
+ and record_observation_usage
57
+ ):
48
58
  partition = dagster_event.partition if isinstance(dagster_event, AssetObservation) else None
49
59
  return dagster_event.asset_key, partition
50
60
 
51
- if isinstance(dagster_event, (AssetCheckResult, AssetObservation)):
61
+ if isinstance(dagster_event, (AssetCheckResult, AssetObservation, AssetCheckEvaluation)):
52
62
  return None, None
53
63
 
54
64
  if isinstance(dagster_event, Output):
@@ -1,19 +1,19 @@
1
1
  import os
2
2
  import tempfile
3
- from typing import Dict, List, NamedTuple, Optional, Tuple, Union
3
+ from typing import NamedTuple, Optional, Union
4
4
 
5
5
  import requests
6
6
  from dagster import AssetExecutionContext, DagsterInstance, OpExecutionContext
7
- from dagster._annotations import experimental
7
+ from dagster._annotations import beta
8
8
  from dagster_cloud_cli.core.errors import raise_http_error
9
9
  from dagster_cloud_cli.core.headers.auth import DagsterCloudInstanceScope
10
10
 
11
11
  from dagster_cloud.instance import DagsterCloudAgentInstance
12
12
 
13
13
 
14
- @experimental
14
+ @beta
15
15
  class DagsterMetric(NamedTuple):
16
- """Experimental: This class gives information about a Metric.
16
+ """Beta: This class gives information about a Metric.
17
17
 
18
18
  Args:
19
19
  metric_name (str): name of the metric
@@ -24,16 +24,16 @@ class DagsterMetric(NamedTuple):
24
24
  metric_value: float
25
25
 
26
26
 
27
- def get_url_and_token_from_instance(instance: DagsterInstance) -> Tuple[str, str]:
27
+ def get_url_and_token_from_instance(instance: DagsterInstance) -> tuple[str, str]:
28
28
  if not isinstance(instance, DagsterCloudAgentInstance):
29
29
  raise RuntimeError("This asset only functions in a running Dagster Cloud instance")
30
30
 
31
31
  return f"{instance.dagit_url}graphql", instance.dagster_cloud_agent_token
32
32
 
33
33
 
34
- def get_post_request_params(
34
+ def get_insights_upload_request_params(
35
35
  instance: DagsterInstance,
36
- ) -> Tuple[requests.Session, str, Dict[str, str], int, Optional[Dict[str, str]]]:
36
+ ) -> tuple[requests.Session, str, dict[str, str], int, Optional[dict[str, str]]]:
37
37
  if not isinstance(instance, DagsterCloudAgentInstance):
38
38
  raise RuntimeError("This asset only functions in a running Dagster Cloud instance")
39
39
 
@@ -49,7 +49,7 @@ def get_post_request_params(
49
49
  def upload_cost_information(
50
50
  context: Union[OpExecutionContext, AssetExecutionContext],
51
51
  metric_name: str,
52
- cost_information: List[Tuple[str, float, str]],
52
+ cost_information: list[tuple[str, float, str]],
53
53
  ):
54
54
  import pyarrow as pa
55
55
  import pyarrow.parquet as pq
@@ -70,9 +70,9 @@ def upload_cost_information(
70
70
  )
71
71
 
72
72
  instance = context.instance
73
- session, url, headers, timeout, proxies = get_post_request_params(instance)
73
+ session, url, headers, timeout, proxies = get_insights_upload_request_params(instance)
74
74
 
75
- resp = session.post(url, headers=headers, timeout=timeout, proxies=proxies)
75
+ resp = session.get(url, headers=headers, timeout=timeout, proxies=proxies)
76
76
  raise_http_error(resp)
77
77
  resp_data = resp.json()
78
78
 
@@ -86,11 +86,11 @@ def upload_cost_information(
86
86
  )
87
87
 
88
88
 
89
- @experimental
89
+ @beta
90
90
  def put_cost_information(
91
91
  context: Union[OpExecutionContext, AssetExecutionContext],
92
92
  metric_name: str,
93
- cost_information: List[Tuple[str, float, str]],
93
+ cost_information: list[tuple[str, float, str]],
94
94
  start: float,
95
95
  end: float,
96
96
  ) -> None:
@@ -1,11 +1,12 @@
1
1
  import json
2
+ from collections.abc import Sequence
2
3
  from dataclasses import dataclass
3
4
  from datetime import datetime, timedelta
4
- from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple
5
+ from typing import TYPE_CHECKING, Optional
5
6
 
6
7
  from dagster import AssetKey, AssetsDefinition, ScheduleDefinition
7
8
 
8
- from .snowflake_utils import OPAQUE_ID_SQL_SIGIL
9
+ from dagster_cloud.dagster_insights.snowflake.snowflake_utils import OPAQUE_ID_SQL_SIGIL
9
10
 
10
11
  if TYPE_CHECKING:
11
12
  from dagster_snowflake import SnowflakeConnection
@@ -34,7 +35,7 @@ def get_cost_data_for_hour(
34
35
  snowflake: "SnowflakeConnection",
35
36
  start_hour: datetime,
36
37
  end_hour: datetime,
37
- ) -> List[Tuple[str, float, str]]:
38
+ ) -> list[tuple[str, float, str]]:
38
39
  """Given a date range, queries the Snowflake query_history table for all queries that were run
39
40
  during that time period and returns a mapping from AssetMaterializationId to the cost of the
40
41
  query that produced it, as estimated by Snowflake. The cost is in Snowflake credits.
@@ -75,11 +76,7 @@ HAVING ARRAY_SIZE(opaque_ids) > 0
75
76
  assert result
76
77
  results = result.fetchall()
77
78
 
78
- costs: List[Tuple[str, float, str]] = []
79
-
80
- print(
81
- f"{len(results) if results else 0} annotated queries returned from snowflake query_history"
82
- )
79
+ costs: list[tuple[str, float, str]] = []
83
80
 
84
81
  if not results:
85
82
  return []
@@ -93,8 +90,4 @@ HAVING ARRAY_SIZE(opaque_ids) > 0
93
90
  for opaque_id in opaque_ids:
94
91
  costs.append((opaque_id, float(cost), query_id))
95
92
 
96
- print(
97
- f"Reported costs for {len(costs)} of {total} asset materializations found in the"
98
- " query_history."
99
- )
100
93
  return costs