dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_dbt/__init__.py +41 -140
- dagster_dbt/asset_decorator.py +49 -230
- dagster_dbt/asset_specs.py +65 -0
- dagster_dbt/asset_utils.py +655 -338
- dagster_dbt/cli/app.py +44 -43
- dagster_dbt/cloud/__init__.py +6 -4
- dagster_dbt/cloud/asset_defs.py +119 -177
- dagster_dbt/cloud/cli.py +3 -4
- dagster_dbt/cloud/ops.py +9 -6
- dagster_dbt/cloud/resources.py +9 -4
- dagster_dbt/cloud/types.py +12 -7
- dagster_dbt/cloud/utils.py +186 -0
- dagster_dbt/cloud_v2/__init__.py +10 -0
- dagster_dbt/cloud_v2/asset_decorator.py +81 -0
- dagster_dbt/cloud_v2/cli_invocation.py +67 -0
- dagster_dbt/cloud_v2/client.py +438 -0
- dagster_dbt/cloud_v2/resources.py +462 -0
- dagster_dbt/cloud_v2/run_handler.py +229 -0
- dagster_dbt/cloud_v2/sensor_builder.py +254 -0
- dagster_dbt/cloud_v2/types.py +143 -0
- dagster_dbt/compat.py +107 -0
- dagster_dbt/components/__init__.py +0 -0
- dagster_dbt/components/dbt_project/__init__.py +0 -0
- dagster_dbt/components/dbt_project/component.py +545 -0
- dagster_dbt/components/dbt_project/scaffolder.py +65 -0
- dagster_dbt/core/__init__.py +0 -10
- dagster_dbt/core/dbt_cli_event.py +612 -0
- dagster_dbt/core/dbt_cli_invocation.py +474 -0
- dagster_dbt/core/dbt_event_iterator.py +399 -0
- dagster_dbt/core/resource.py +733 -0
- dagster_dbt/core/utils.py +14 -279
- dagster_dbt/dagster_dbt_translator.py +317 -74
- dagster_dbt/dbt_core_version.py +1 -0
- dagster_dbt/dbt_manifest.py +6 -5
- dagster_dbt/dbt_manifest_asset_selection.py +62 -22
- dagster_dbt/dbt_project.py +179 -40
- dagster_dbt/dbt_project_manager.py +173 -0
- dagster_dbt/dbt_version.py +0 -0
- dagster_dbt/errors.py +9 -84
- dagster_dbt/freshness_builder.py +147 -0
- dagster_dbt/include/pyproject.toml.jinja +21 -0
- dagster_dbt/include/scaffold/assets.py.jinja +1 -8
- dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
- dagster_dbt/include/scaffold/project.py.jinja +1 -0
- dagster_dbt/include/setup.py.jinja +2 -3
- dagster_dbt/metadata_set.py +18 -0
- dagster_dbt/utils.py +136 -234
- dagster_dbt/version.py +1 -1
- dagster_dbt-0.28.4.dist-info/METADATA +47 -0
- dagster_dbt-0.28.4.dist-info/RECORD +59 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
- dagster_dbt/asset_defs.py +0 -1049
- dagster_dbt/core/resources.py +0 -527
- dagster_dbt/core/resources_v2.py +0 -1542
- dagster_dbt/core/types.py +0 -63
- dagster_dbt/dbt_resource.py +0 -220
- dagster_dbt/include/scaffold/constants.py.jinja +0 -21
- dagster_dbt/ops.py +0 -134
- dagster_dbt/types.py +0 -22
- dagster_dbt-0.23.3.dist-info/METADATA +0 -31
- dagster_dbt-0.23.3.dist-info/RECORD +0 -43
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections.abc import Sequence
|
|
3
|
+
from functools import cached_property
|
|
4
|
+
from typing import NamedTuple, Optional, Union
|
|
5
|
+
|
|
6
|
+
from dagster import (
|
|
7
|
+
AssetCheckSpec,
|
|
8
|
+
AssetExecutionContext,
|
|
9
|
+
AssetSpec,
|
|
10
|
+
ConfigurableResource,
|
|
11
|
+
Definitions,
|
|
12
|
+
_check as check,
|
|
13
|
+
get_dagster_logger,
|
|
14
|
+
multi_asset_check,
|
|
15
|
+
)
|
|
16
|
+
from dagster._annotations import public
|
|
17
|
+
from dagster._config.pythonic_config.resource import ResourceDependency
|
|
18
|
+
from dagster._core.definitions.definitions_load_context import StateBackedDefinitionsLoader
|
|
19
|
+
from dagster._record import record
|
|
20
|
+
from dagster._utils.cached_method import cached_method
|
|
21
|
+
from pydantic import Field
|
|
22
|
+
|
|
23
|
+
from dagster_dbt.asset_utils import (
|
|
24
|
+
DBT_DEFAULT_EXCLUDE,
|
|
25
|
+
DBT_DEFAULT_SELECT,
|
|
26
|
+
DBT_DEFAULT_SELECTOR,
|
|
27
|
+
build_dbt_specs,
|
|
28
|
+
get_updated_cli_invocation_params_for_context,
|
|
29
|
+
)
|
|
30
|
+
from dagster_dbt.cloud_v2.cli_invocation import DbtCloudCliInvocation
|
|
31
|
+
from dagster_dbt.cloud_v2.client import DbtCloudWorkspaceClient
|
|
32
|
+
from dagster_dbt.cloud_v2.run_handler import DbtCloudJobRunHandler
|
|
33
|
+
from dagster_dbt.cloud_v2.types import (
|
|
34
|
+
DbtCloudAccount,
|
|
35
|
+
DbtCloudEnvironment,
|
|
36
|
+
DbtCloudJob,
|
|
37
|
+
DbtCloudProject,
|
|
38
|
+
DbtCloudWorkspaceData,
|
|
39
|
+
)
|
|
40
|
+
from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator, validate_opt_translator
|
|
41
|
+
from dagster_dbt.utils import clean_name
|
|
42
|
+
|
|
43
|
+
DAGSTER_ADHOC_PREFIX = "DAGSTER_ADHOC_JOB__"
|
|
44
|
+
DBT_CLOUD_RECONSTRUCTION_METADATA_KEY_PREFIX = "__dbt_cloud"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_dagster_adhoc_job_name(
|
|
48
|
+
project_id: int,
|
|
49
|
+
project_name: Optional[str],
|
|
50
|
+
environment_id: int,
|
|
51
|
+
environment_name: Optional[str],
|
|
52
|
+
) -> str:
|
|
53
|
+
name = (
|
|
54
|
+
f"{DAGSTER_ADHOC_PREFIX}{project_name or project_id}__{environment_name or environment_id}"
|
|
55
|
+
)
|
|
56
|
+
# Clean the name and convert it to uppercase
|
|
57
|
+
return clean_name(name).upper()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@public
|
|
61
|
+
class DbtCloudCredentials(NamedTuple):
|
|
62
|
+
"""The DbtCloudCredentials to access your dbt Cloud workspace.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
account_id (int): The ID of your dbt Cloud account.
|
|
66
|
+
token (str): Your dbt Cloud API token.
|
|
67
|
+
access_url (str): Your dbt Cloud workspace URL.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
account_id: int
|
|
71
|
+
token: str
|
|
72
|
+
access_url: str
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@public
|
|
76
|
+
class DbtCloudWorkspace(ConfigurableResource):
|
|
77
|
+
"""This class represents a dbt Cloud workspace and provides utilities
|
|
78
|
+
to interact with dbt Cloud APIs.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
credentials (DbtCloudCredentials): An instance of DbtCloudCredentials class.
|
|
82
|
+
project_id (int): The ID of the dbt cloud project to use for this resource.
|
|
83
|
+
environment_id (int): The ID of the environment to use for the dbt Cloud
|
|
84
|
+
project used in this resource.
|
|
85
|
+
adhoc_job_name (Optional[str]): The name of the ad hoc job that will be
|
|
86
|
+
created by Dagster in your dbt Cloud workspace. This ad hoc job is
|
|
87
|
+
used to parse your project and materialize your dbt Cloud assets.
|
|
88
|
+
If not provided, this job name will be generated using your project
|
|
89
|
+
ID and environment ID.
|
|
90
|
+
request_max_retries (int): The maximum number of times requests to the
|
|
91
|
+
dbt Cloud API should be retried before failing.
|
|
92
|
+
request_retry_delay (float): Time (in seconds) to wait between each
|
|
93
|
+
request retry.
|
|
94
|
+
request_timeout: Time (in seconds) after which the requests to dbt Cloud
|
|
95
|
+
are declared timed out.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
credentials: ResourceDependency[DbtCloudCredentials]
|
|
99
|
+
project_id: int = Field(description="The ID of the dbt Cloud project to use for this resource.")
|
|
100
|
+
environment_id: int = Field(
|
|
101
|
+
description="The ID of environment to use for the dbt Cloud project used in this resource."
|
|
102
|
+
)
|
|
103
|
+
adhoc_job_name: Optional[str] = Field(
|
|
104
|
+
default=None,
|
|
105
|
+
description=(
|
|
106
|
+
"The name of the ad hoc job that will be created by Dagster in your dbt Cloud workspace. "
|
|
107
|
+
"This ad hoc job is used to parse your project and materialize your dbt Cloud assets. "
|
|
108
|
+
"If not provided, this job name will be generated using your project ID and environment ID."
|
|
109
|
+
),
|
|
110
|
+
)
|
|
111
|
+
request_max_retries: int = Field(
|
|
112
|
+
default=3,
|
|
113
|
+
description=(
|
|
114
|
+
"The maximum number of times requests to the dbt Cloud API should be retried "
|
|
115
|
+
"before failing."
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
request_retry_delay: float = Field(
|
|
119
|
+
default=0.25,
|
|
120
|
+
description="Time (in seconds) to wait between each request retry.",
|
|
121
|
+
)
|
|
122
|
+
request_timeout: int = Field(
|
|
123
|
+
default=15,
|
|
124
|
+
description="Time (in seconds) after which the requests to dbt Cloud are declared timed out.",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
@cached_method
|
|
129
|
+
def _log(self) -> logging.Logger:
|
|
130
|
+
return get_dagster_logger()
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def unique_id(self) -> str:
|
|
134
|
+
"""Unique ID for this dbt Cloud workspace, which is composed of the project ID and environment ID.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
str: the unique ID for this dbt Cloud workspace.
|
|
138
|
+
"""
|
|
139
|
+
return f"{self.project_id}-{self.environment_id}"
|
|
140
|
+
|
|
141
|
+
@cached_property
|
|
142
|
+
def account_name(self) -> Optional[str]:
|
|
143
|
+
"""The name of the account for this dbt Cloud workspace.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Optional[str]: the name of the account for this dbt Cloud workspace.
|
|
147
|
+
"""
|
|
148
|
+
account = DbtCloudAccount.from_account_details(
|
|
149
|
+
account_details=self.get_client().get_account_details()
|
|
150
|
+
)
|
|
151
|
+
if not account.name:
|
|
152
|
+
self._log.warning(
|
|
153
|
+
f"Account name was not returned by the dbt Cloud API for account ID `{account.id}`. "
|
|
154
|
+
f"Make sure to set a name for this account in dbt Cloud."
|
|
155
|
+
)
|
|
156
|
+
return account.name
|
|
157
|
+
|
|
158
|
+
@cached_property
|
|
159
|
+
def project_name(self) -> Optional[str]:
|
|
160
|
+
"""The name of the project for this dbt Cloud workspace.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
str: the name of the project for this dbt Cloud workspace.
|
|
164
|
+
"""
|
|
165
|
+
project = DbtCloudProject.from_project_details(
|
|
166
|
+
project_details=self.get_client().get_project_details(project_id=self.project_id)
|
|
167
|
+
)
|
|
168
|
+
if not project.name:
|
|
169
|
+
self._log.warning(
|
|
170
|
+
f"Project name was not returned by the dbt Cloud API for project ID `{project.id}`. "
|
|
171
|
+
f"Make sure to set a name for this project in dbt Cloud."
|
|
172
|
+
)
|
|
173
|
+
return project.name
|
|
174
|
+
|
|
175
|
+
@cached_property
|
|
176
|
+
def environment_name(self) -> Optional[str]:
|
|
177
|
+
"""The name of the environment for this dbt Cloud workspace.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
str: the name of the environment for this dbt Cloud workspace.
|
|
181
|
+
"""
|
|
182
|
+
environment = DbtCloudEnvironment.from_environment_details(
|
|
183
|
+
environment_details=self.get_client().get_environment_details(
|
|
184
|
+
environment_id=self.environment_id
|
|
185
|
+
)
|
|
186
|
+
)
|
|
187
|
+
if not environment.name:
|
|
188
|
+
self._log.warning(
|
|
189
|
+
f"Environment name was not returned by the dbt Cloud API for environment ID `{environment.id}`. "
|
|
190
|
+
f"Make sure to set a name for this environment in dbt Cloud."
|
|
191
|
+
)
|
|
192
|
+
return environment.name
|
|
193
|
+
|
|
194
|
+
@cached_method
|
|
195
|
+
def get_client(self) -> DbtCloudWorkspaceClient:
|
|
196
|
+
"""Get the dbt Cloud client to interact with this dbt Cloud workspace.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
DbtCloudWorkspaceClient: The dbt Cloud client to interact with the dbt Cloud workspace.
|
|
200
|
+
"""
|
|
201
|
+
return DbtCloudWorkspaceClient(
|
|
202
|
+
account_id=self.credentials.account_id,
|
|
203
|
+
token=self.credentials.token,
|
|
204
|
+
access_url=self.credentials.access_url,
|
|
205
|
+
request_max_retries=self.request_max_retries,
|
|
206
|
+
request_retry_delay=self.request_retry_delay,
|
|
207
|
+
request_timeout=self.request_timeout,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def _get_or_create_dagster_adhoc_job(self) -> DbtCloudJob:
|
|
211
|
+
"""Get or create an ad hoc dbt Cloud job for the given project and environment in this dbt Cloud Workspace.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
DbtCloudJob: Internal representation of the dbt Cloud job.
|
|
215
|
+
"""
|
|
216
|
+
client = self.get_client()
|
|
217
|
+
expected_job_name = self.adhoc_job_name or get_dagster_adhoc_job_name(
|
|
218
|
+
project_id=self.project_id,
|
|
219
|
+
project_name=self.project_name,
|
|
220
|
+
environment_id=self.environment_id,
|
|
221
|
+
environment_name=self.environment_name,
|
|
222
|
+
)
|
|
223
|
+
jobs = [
|
|
224
|
+
DbtCloudJob.from_job_details(job_details)
|
|
225
|
+
for job_details in client.list_jobs(
|
|
226
|
+
project_id=self.project_id,
|
|
227
|
+
environment_id=self.environment_id,
|
|
228
|
+
)
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
if expected_job_name in {job.name for job in jobs}:
|
|
232
|
+
return next(job for job in jobs if job.name == expected_job_name)
|
|
233
|
+
return DbtCloudJob.from_job_details(
|
|
234
|
+
client.create_job(
|
|
235
|
+
project_id=self.project_id,
|
|
236
|
+
environment_id=self.environment_id,
|
|
237
|
+
job_name=expected_job_name,
|
|
238
|
+
description=(
|
|
239
|
+
"This job is used by Dagster to parse your dbt Cloud workspace "
|
|
240
|
+
"and to kick off runs of dbt Cloud models."
|
|
241
|
+
),
|
|
242
|
+
)
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
@cached_method
|
|
246
|
+
def fetch_workspace_data(self) -> DbtCloudWorkspaceData:
|
|
247
|
+
adhoc_job = self._get_or_create_dagster_adhoc_job()
|
|
248
|
+
run_handler = DbtCloudJobRunHandler.run(
|
|
249
|
+
job_id=adhoc_job.id,
|
|
250
|
+
args=["parse"],
|
|
251
|
+
client=self.get_client(),
|
|
252
|
+
)
|
|
253
|
+
run = run_handler.wait()
|
|
254
|
+
run.raise_for_status()
|
|
255
|
+
return DbtCloudWorkspaceData(
|
|
256
|
+
project_id=self.project_id,
|
|
257
|
+
environment_id=self.environment_id,
|
|
258
|
+
adhoc_job_id=adhoc_job.id,
|
|
259
|
+
manifest=run_handler.get_manifest(),
|
|
260
|
+
jobs=self.get_client().list_jobs(
|
|
261
|
+
project_id=self.project_id,
|
|
262
|
+
environment_id=self.environment_id,
|
|
263
|
+
),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def get_or_fetch_workspace_data(self) -> DbtCloudWorkspaceData:
|
|
267
|
+
return DbtCloudWorkspaceDefsLoader(
|
|
268
|
+
workspace=self,
|
|
269
|
+
translator=DagsterDbtTranslator(),
|
|
270
|
+
select=DBT_DEFAULT_SELECT,
|
|
271
|
+
exclude=DBT_DEFAULT_EXCLUDE,
|
|
272
|
+
selector=DBT_DEFAULT_SELECTOR,
|
|
273
|
+
).get_or_fetch_state()
|
|
274
|
+
|
|
275
|
+
# Cache spec retrieval for a specific translator class and dbt selection args.
|
|
276
|
+
@cached_method
|
|
277
|
+
def load_specs(
|
|
278
|
+
self,
|
|
279
|
+
select: str,
|
|
280
|
+
exclude: str,
|
|
281
|
+
selector: str,
|
|
282
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
283
|
+
) -> Sequence[Union[AssetSpec, AssetCheckSpec]]:
|
|
284
|
+
dagster_dbt_translator = dagster_dbt_translator or DagsterDbtTranslator()
|
|
285
|
+
|
|
286
|
+
with self.process_config_and_initialize_cm() as initialized_workspace:
|
|
287
|
+
defs = DbtCloudWorkspaceDefsLoader(
|
|
288
|
+
workspace=initialized_workspace,
|
|
289
|
+
translator=dagster_dbt_translator,
|
|
290
|
+
select=select,
|
|
291
|
+
exclude=exclude,
|
|
292
|
+
selector=selector,
|
|
293
|
+
).build_defs()
|
|
294
|
+
asset_specs = check.is_list(
|
|
295
|
+
defs.assets,
|
|
296
|
+
AssetSpec,
|
|
297
|
+
)
|
|
298
|
+
asset_check_specs = check.is_list(
|
|
299
|
+
[
|
|
300
|
+
check_spec
|
|
301
|
+
for asset_def in defs.asset_checks or []
|
|
302
|
+
for check_spec in asset_def.check_specs
|
|
303
|
+
],
|
|
304
|
+
AssetCheckSpec,
|
|
305
|
+
)
|
|
306
|
+
return [*asset_specs, *asset_check_specs]
|
|
307
|
+
|
|
308
|
+
def load_asset_specs(
|
|
309
|
+
self,
|
|
310
|
+
select: str,
|
|
311
|
+
exclude: str,
|
|
312
|
+
selector: str,
|
|
313
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
314
|
+
) -> Sequence[AssetSpec]:
|
|
315
|
+
return [
|
|
316
|
+
spec
|
|
317
|
+
for spec in self.load_specs(
|
|
318
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
319
|
+
select=select,
|
|
320
|
+
exclude=exclude,
|
|
321
|
+
selector=selector,
|
|
322
|
+
)
|
|
323
|
+
if isinstance(spec, AssetSpec)
|
|
324
|
+
]
|
|
325
|
+
|
|
326
|
+
def load_check_specs(
|
|
327
|
+
self,
|
|
328
|
+
select: str,
|
|
329
|
+
exclude: str,
|
|
330
|
+
selector: str,
|
|
331
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
332
|
+
) -> Sequence[AssetCheckSpec]:
|
|
333
|
+
return [
|
|
334
|
+
spec
|
|
335
|
+
for spec in self.load_specs(
|
|
336
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
337
|
+
select=select,
|
|
338
|
+
exclude=exclude,
|
|
339
|
+
selector=selector,
|
|
340
|
+
)
|
|
341
|
+
if isinstance(spec, AssetCheckSpec)
|
|
342
|
+
]
|
|
343
|
+
|
|
344
|
+
@public
|
|
345
|
+
def cli(
|
|
346
|
+
self,
|
|
347
|
+
args: Sequence[str],
|
|
348
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
349
|
+
context: Optional[AssetExecutionContext] = None,
|
|
350
|
+
) -> DbtCloudCliInvocation:
|
|
351
|
+
"""Creates a dbt CLI invocation with the dbt Cloud client.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
args: (Sequence[str]): The dbt CLI command to execute.
|
|
355
|
+
dagster_dbt_translator (Optional[DagsterDbtTranslator]): Allows customizing how to map
|
|
356
|
+
dbt models, seeds, etc. to asset keys and asset metadata.
|
|
357
|
+
context (Optional[AssetExecutionContext]): The execution context.
|
|
358
|
+
"""
|
|
359
|
+
dagster_dbt_translator = validate_opt_translator(dagster_dbt_translator)
|
|
360
|
+
dagster_dbt_translator = dagster_dbt_translator or DagsterDbtTranslator()
|
|
361
|
+
|
|
362
|
+
client = self.get_client()
|
|
363
|
+
workspace_data = self.get_or_fetch_workspace_data()
|
|
364
|
+
job_id = workspace_data.adhoc_job_id
|
|
365
|
+
manifest = workspace_data.manifest
|
|
366
|
+
|
|
367
|
+
updated_params = get_updated_cli_invocation_params_for_context(
|
|
368
|
+
context=context, manifest=manifest, dagster_dbt_translator=dagster_dbt_translator
|
|
369
|
+
)
|
|
370
|
+
manifest = updated_params.manifest
|
|
371
|
+
dagster_dbt_translator = updated_params.dagster_dbt_translator
|
|
372
|
+
selection_args = updated_params.selection_args
|
|
373
|
+
indirect_selection = updated_params.indirect_selection
|
|
374
|
+
|
|
375
|
+
# set dbt indirect selection if needed to execute specific dbt tests due to asset check
|
|
376
|
+
# selection
|
|
377
|
+
indirect_selection_args = (
|
|
378
|
+
[f"--indirect-selection {indirect_selection}"] if indirect_selection else []
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
full_dbt_args = [*args, *selection_args, *indirect_selection_args]
|
|
382
|
+
|
|
383
|
+
# We pass the manifest instead of the workspace data
|
|
384
|
+
# because we use the manifest included in the asset definitions
|
|
385
|
+
# when this method is called inside a function decorated with `@dbt_cloud_assets`
|
|
386
|
+
return DbtCloudCliInvocation.run(
|
|
387
|
+
job_id=job_id,
|
|
388
|
+
args=full_dbt_args,
|
|
389
|
+
client=client,
|
|
390
|
+
manifest=manifest,
|
|
391
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
392
|
+
context=context,
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def load_dbt_cloud_asset_specs(
|
|
397
|
+
workspace: DbtCloudWorkspace,
|
|
398
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
399
|
+
select: str = DBT_DEFAULT_SELECT,
|
|
400
|
+
exclude: str = DBT_DEFAULT_EXCLUDE,
|
|
401
|
+
selector: str = DBT_DEFAULT_SELECTOR,
|
|
402
|
+
) -> Sequence[AssetSpec]:
|
|
403
|
+
return workspace.load_asset_specs(
|
|
404
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
405
|
+
select=select,
|
|
406
|
+
exclude=exclude,
|
|
407
|
+
selector=selector,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def load_dbt_cloud_check_specs(
|
|
412
|
+
workspace: DbtCloudWorkspace,
|
|
413
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
414
|
+
select: str = DBT_DEFAULT_SELECT,
|
|
415
|
+
exclude: str = DBT_DEFAULT_EXCLUDE,
|
|
416
|
+
selector: str = DBT_DEFAULT_SELECTOR,
|
|
417
|
+
) -> Sequence[AssetCheckSpec]:
|
|
418
|
+
return workspace.load_check_specs(
|
|
419
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
420
|
+
select=select,
|
|
421
|
+
exclude=exclude,
|
|
422
|
+
selector=selector,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
@record
|
|
427
|
+
class DbtCloudWorkspaceDefsLoader(StateBackedDefinitionsLoader[DbtCloudWorkspaceData]):
|
|
428
|
+
workspace: DbtCloudWorkspace
|
|
429
|
+
translator: DagsterDbtTranslator
|
|
430
|
+
select: str
|
|
431
|
+
exclude: str
|
|
432
|
+
selector: str
|
|
433
|
+
|
|
434
|
+
@property
|
|
435
|
+
def defs_key(self) -> str:
|
|
436
|
+
return f"{DBT_CLOUD_RECONSTRUCTION_METADATA_KEY_PREFIX}.{self.workspace.unique_id}"
|
|
437
|
+
|
|
438
|
+
def fetch_state(self) -> DbtCloudWorkspaceData:
|
|
439
|
+
return self.workspace.fetch_workspace_data()
|
|
440
|
+
|
|
441
|
+
def defs_from_state(self, state: DbtCloudWorkspaceData) -> Definitions:
|
|
442
|
+
all_asset_specs, all_check_specs = build_dbt_specs(
|
|
443
|
+
manifest=state.manifest,
|
|
444
|
+
translator=self.translator,
|
|
445
|
+
select=self.select,
|
|
446
|
+
exclude=self.exclude,
|
|
447
|
+
selector=self.selector,
|
|
448
|
+
io_manager_key=None,
|
|
449
|
+
project=None,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
all_asset_specs = [
|
|
453
|
+
spec.replace_attributes(kinds={"dbtcloud"} | spec.kinds - {"dbt"})
|
|
454
|
+
for spec in all_asset_specs
|
|
455
|
+
]
|
|
456
|
+
|
|
457
|
+
# External facing checks are not supported yet
|
|
458
|
+
# https://linear.app/dagster-labs/issue/AD-915/support-external-asset-checks-in-dbt-cloud-v2
|
|
459
|
+
@multi_asset_check(specs=all_check_specs)
|
|
460
|
+
def _all_asset_checks(): ...
|
|
461
|
+
|
|
462
|
+
return Definitions(assets=all_asset_specs, asset_checks=[_all_asset_checks])
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
from collections.abc import Iterator, Mapping, Sequence
|
|
2
|
+
from typing import Any, Optional, Union
|
|
3
|
+
|
|
4
|
+
from dagster import (
|
|
5
|
+
AssetCheckEvaluation,
|
|
6
|
+
AssetCheckResult,
|
|
7
|
+
AssetCheckSeverity,
|
|
8
|
+
AssetExecutionContext,
|
|
9
|
+
AssetMaterialization,
|
|
10
|
+
MetadataValue,
|
|
11
|
+
Output,
|
|
12
|
+
get_dagster_logger,
|
|
13
|
+
)
|
|
14
|
+
from dagster._record import record
|
|
15
|
+
from dateutil import parser
|
|
16
|
+
|
|
17
|
+
from dagster_dbt.asset_utils import build_dbt_specs, get_asset_check_key_for_test
|
|
18
|
+
from dagster_dbt.cloud_v2.client import DbtCloudWorkspaceClient
|
|
19
|
+
from dagster_dbt.cloud_v2.types import DbtCloudRun
|
|
20
|
+
from dagster_dbt.compat import REFABLE_NODE_TYPES, NodeStatus, NodeType, TestStatus
|
|
21
|
+
from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator
|
|
22
|
+
|
|
23
|
+
COMPLETED_AT_TIMESTAMP_METADATA_KEY = "dagster_dbt/completed_at_timestamp"
|
|
24
|
+
|
|
25
|
+
logger = get_dagster_logger()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@record
|
|
29
|
+
class DbtCloudJobRunHandler:
|
|
30
|
+
"""Handles the process of a dbt Cloud job run."""
|
|
31
|
+
|
|
32
|
+
job_id: int
|
|
33
|
+
run_id: int
|
|
34
|
+
args: Sequence[str]
|
|
35
|
+
client: DbtCloudWorkspaceClient
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def run(
|
|
39
|
+
cls, job_id: int, args: Sequence[str], client: DbtCloudWorkspaceClient
|
|
40
|
+
) -> "DbtCloudJobRunHandler":
|
|
41
|
+
run_details = client.trigger_job_run(job_id, steps_override=[" ".join(["dbt", *args])])
|
|
42
|
+
dbt_cloud_run = DbtCloudRun.from_run_details(run_details=run_details)
|
|
43
|
+
return DbtCloudJobRunHandler(
|
|
44
|
+
job_id=job_id,
|
|
45
|
+
run_id=dbt_cloud_run.id,
|
|
46
|
+
args=args,
|
|
47
|
+
client=client,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def wait(self, timeout: Optional[float] = None) -> DbtCloudRun:
|
|
51
|
+
run_details = self.client.poll_run(run_id=self.run_id, poll_timeout=timeout)
|
|
52
|
+
dbt_cloud_run = DbtCloudRun.from_run_details(run_details=run_details)
|
|
53
|
+
return dbt_cloud_run
|
|
54
|
+
|
|
55
|
+
def get_run_results(self) -> Mapping[str, Any]:
|
|
56
|
+
return self.client.get_run_results_json(run_id=self.run_id)
|
|
57
|
+
|
|
58
|
+
def get_manifest(self) -> Mapping[str, Any]:
|
|
59
|
+
return self.client.get_run_manifest_json(run_id=self.run_id)
|
|
60
|
+
|
|
61
|
+
def list_run_artifacts(self) -> Sequence[str]:
|
|
62
|
+
return self.client.list_run_artifacts(run_id=self.run_id)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def get_completed_at_timestamp(result: Mapping[str, Any]) -> float:
|
|
66
|
+
# result["timing"] is a list of events in run_results.json
|
|
67
|
+
# For successful models and passing tests,
|
|
68
|
+
# the last item of that list includes the timing details of the execution.
|
|
69
|
+
return parser.parse(result["timing"][-1]["completed_at"]).timestamp()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@record
|
|
73
|
+
class DbtCloudJobRunResults:
|
|
74
|
+
"""Represents the run results of a dbt Cloud job run."""
|
|
75
|
+
|
|
76
|
+
run_id: int
|
|
77
|
+
run_results: Mapping[str, Any]
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def from_run_results_json(cls, run_results_json: Mapping[str, Any]) -> "DbtCloudJobRunResults":
|
|
81
|
+
return cls(
|
|
82
|
+
run_id=int(run_results_json["metadata"]["env"]["DBT_CLOUD_RUN_ID"]),
|
|
83
|
+
run_results=run_results_json,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
def to_default_asset_events(
|
|
87
|
+
self,
|
|
88
|
+
client: DbtCloudWorkspaceClient,
|
|
89
|
+
manifest: Mapping[str, Any],
|
|
90
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
91
|
+
context: Optional[AssetExecutionContext] = None,
|
|
92
|
+
) -> Iterator[Union[AssetCheckEvaluation, AssetCheckResult, AssetMaterialization, Output]]:
|
|
93
|
+
"""Convert the run results of a dbt Cloud job run to a set of corresponding Dagster events.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
client (DbtCloudWorkspaceClient): The client for the dbt Cloud workspace.
|
|
97
|
+
manifest (Mapping[str, Any]): The dbt manifest blob.
|
|
98
|
+
dagster_dbt_translator (DagsterDbtTranslator): Optionally, a custom translator for
|
|
99
|
+
linking dbt nodes to Dagster assets.
|
|
100
|
+
context (Optional[AssetExecutionContext]): The execution context.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Iterator[Union[AssetCheckEvaluation, AssetCheckResult, AssetMaterialization, Output]]:
|
|
104
|
+
A set of corresponding Dagster events.
|
|
105
|
+
|
|
106
|
+
In a Dagster asset definition, the following are yielded:
|
|
107
|
+
- Output for refables (e.g. models, seeds, snapshots.)
|
|
108
|
+
- AssetCheckResult for dbt tests.
|
|
109
|
+
|
|
110
|
+
For ad hoc usage, the following are yielded:
|
|
111
|
+
- AssetMaterialization for refables (e.g. models, seeds, snapshots.)
|
|
112
|
+
- AssetCheckEvaluation for dbt tests.
|
|
113
|
+
"""
|
|
114
|
+
dagster_dbt_translator = dagster_dbt_translator or DagsterDbtTranslator()
|
|
115
|
+
has_asset_def: bool = bool(context and context.has_assets_def)
|
|
116
|
+
|
|
117
|
+
run = DbtCloudRun.from_run_details(run_details=client.get_run_details(run_id=self.run_id))
|
|
118
|
+
|
|
119
|
+
invocation_id: str = self.run_results["metadata"]["invocation_id"]
|
|
120
|
+
for result in self.run_results["results"]:
|
|
121
|
+
unique_id: str = result["unique_id"]
|
|
122
|
+
dbt_resource_props: Mapping[str, Any] = manifest["nodes"].get(unique_id)
|
|
123
|
+
if not dbt_resource_props:
|
|
124
|
+
logger.warning(
|
|
125
|
+
f"Unique ID {unique_id} not found in manifest. "
|
|
126
|
+
f"This can happen if you are parsing old runs fetched via the sensor, "
|
|
127
|
+
f"or if your manifest is out of date. "
|
|
128
|
+
f"Reloading your code location will fix the latter."
|
|
129
|
+
)
|
|
130
|
+
continue
|
|
131
|
+
select: str = ".".join(dbt_resource_props["fqn"])
|
|
132
|
+
|
|
133
|
+
default_metadata = {
|
|
134
|
+
"unique_id": unique_id,
|
|
135
|
+
"invocation_id": invocation_id,
|
|
136
|
+
"execution_duration": result["execution_time"],
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if run.url:
|
|
140
|
+
default_metadata["run_url"] = MetadataValue.url(run.url)
|
|
141
|
+
|
|
142
|
+
resource_type: str = dbt_resource_props["resource_type"]
|
|
143
|
+
result_status: str = result["status"]
|
|
144
|
+
materialization: str = dbt_resource_props["config"]["materialized"]
|
|
145
|
+
|
|
146
|
+
is_ephemeral = materialization == "ephemeral"
|
|
147
|
+
|
|
148
|
+
# Build the specs for the given unique ID
|
|
149
|
+
asset_specs, _ = build_dbt_specs(
|
|
150
|
+
manifest=manifest,
|
|
151
|
+
translator=dagster_dbt_translator,
|
|
152
|
+
select=select,
|
|
153
|
+
exclude="",
|
|
154
|
+
selector="",
|
|
155
|
+
io_manager_key=None,
|
|
156
|
+
project=None,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if (
|
|
160
|
+
resource_type in REFABLE_NODE_TYPES
|
|
161
|
+
and result_status == NodeStatus.Success
|
|
162
|
+
and not is_ephemeral
|
|
163
|
+
):
|
|
164
|
+
spec = asset_specs[0]
|
|
165
|
+
metadata = {
|
|
166
|
+
**default_metadata,
|
|
167
|
+
COMPLETED_AT_TIMESTAMP_METADATA_KEY: MetadataValue.timestamp(
|
|
168
|
+
get_completed_at_timestamp(result=result)
|
|
169
|
+
),
|
|
170
|
+
}
|
|
171
|
+
if context and has_asset_def:
|
|
172
|
+
yield Output(
|
|
173
|
+
value=None,
|
|
174
|
+
output_name=spec.key.to_python_identifier(),
|
|
175
|
+
metadata=metadata,
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
yield AssetMaterialization(
|
|
179
|
+
asset_key=spec.key,
|
|
180
|
+
metadata=metadata,
|
|
181
|
+
)
|
|
182
|
+
elif resource_type == NodeType.Test:
|
|
183
|
+
metadata = {
|
|
184
|
+
**default_metadata,
|
|
185
|
+
"status": result_status,
|
|
186
|
+
COMPLETED_AT_TIMESTAMP_METADATA_KEY: MetadataValue.timestamp(
|
|
187
|
+
get_completed_at_timestamp(result=result)
|
|
188
|
+
),
|
|
189
|
+
}
|
|
190
|
+
if result["failures"] is not None:
|
|
191
|
+
metadata["dagster_dbt/failed_row_count"] = result["failures"]
|
|
192
|
+
|
|
193
|
+
asset_check_key = get_asset_check_key_for_test(
|
|
194
|
+
manifest=manifest,
|
|
195
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
196
|
+
test_unique_id=unique_id,
|
|
197
|
+
project=None,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if (
|
|
201
|
+
context
|
|
202
|
+
and has_asset_def
|
|
203
|
+
and asset_check_key is not None
|
|
204
|
+
and asset_check_key in context.selected_asset_check_keys
|
|
205
|
+
):
|
|
206
|
+
# The test is an asset check in an asset, so yield an `AssetCheckResult`.
|
|
207
|
+
yield AssetCheckResult(
|
|
208
|
+
passed=result_status == TestStatus.Pass,
|
|
209
|
+
asset_key=asset_check_key.asset_key,
|
|
210
|
+
check_name=asset_check_key.name,
|
|
211
|
+
metadata=metadata,
|
|
212
|
+
severity=(
|
|
213
|
+
AssetCheckSeverity.WARN
|
|
214
|
+
if result_status == TestStatus.Warn
|
|
215
|
+
else AssetCheckSeverity.ERROR
|
|
216
|
+
),
|
|
217
|
+
)
|
|
218
|
+
elif not has_asset_def and asset_check_key is not None:
|
|
219
|
+
yield AssetCheckEvaluation(
|
|
220
|
+
passed=result_status == TestStatus.Pass,
|
|
221
|
+
asset_key=asset_check_key.asset_key,
|
|
222
|
+
check_name=asset_check_key.name,
|
|
223
|
+
metadata=metadata,
|
|
224
|
+
severity=(
|
|
225
|
+
AssetCheckSeverity.WARN
|
|
226
|
+
if result_status == TestStatus.Warn
|
|
227
|
+
else AssetCheckSeverity.ERROR
|
|
228
|
+
),
|
|
229
|
+
)
|