dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_dbt/__init__.py +41 -140
- dagster_dbt/asset_decorator.py +49 -230
- dagster_dbt/asset_specs.py +65 -0
- dagster_dbt/asset_utils.py +655 -338
- dagster_dbt/cli/app.py +44 -43
- dagster_dbt/cloud/__init__.py +6 -4
- dagster_dbt/cloud/asset_defs.py +119 -177
- dagster_dbt/cloud/cli.py +3 -4
- dagster_dbt/cloud/ops.py +9 -6
- dagster_dbt/cloud/resources.py +9 -4
- dagster_dbt/cloud/types.py +12 -7
- dagster_dbt/cloud/utils.py +186 -0
- dagster_dbt/cloud_v2/__init__.py +10 -0
- dagster_dbt/cloud_v2/asset_decorator.py +81 -0
- dagster_dbt/cloud_v2/cli_invocation.py +67 -0
- dagster_dbt/cloud_v2/client.py +438 -0
- dagster_dbt/cloud_v2/resources.py +462 -0
- dagster_dbt/cloud_v2/run_handler.py +229 -0
- dagster_dbt/cloud_v2/sensor_builder.py +254 -0
- dagster_dbt/cloud_v2/types.py +143 -0
- dagster_dbt/compat.py +107 -0
- dagster_dbt/components/__init__.py +0 -0
- dagster_dbt/components/dbt_project/__init__.py +0 -0
- dagster_dbt/components/dbt_project/component.py +545 -0
- dagster_dbt/components/dbt_project/scaffolder.py +65 -0
- dagster_dbt/core/__init__.py +0 -10
- dagster_dbt/core/dbt_cli_event.py +612 -0
- dagster_dbt/core/dbt_cli_invocation.py +474 -0
- dagster_dbt/core/dbt_event_iterator.py +399 -0
- dagster_dbt/core/resource.py +733 -0
- dagster_dbt/core/utils.py +14 -279
- dagster_dbt/dagster_dbt_translator.py +317 -74
- dagster_dbt/dbt_core_version.py +1 -0
- dagster_dbt/dbt_manifest.py +6 -5
- dagster_dbt/dbt_manifest_asset_selection.py +62 -22
- dagster_dbt/dbt_project.py +179 -40
- dagster_dbt/dbt_project_manager.py +173 -0
- dagster_dbt/dbt_version.py +0 -0
- dagster_dbt/errors.py +9 -84
- dagster_dbt/freshness_builder.py +147 -0
- dagster_dbt/include/pyproject.toml.jinja +21 -0
- dagster_dbt/include/scaffold/assets.py.jinja +1 -8
- dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
- dagster_dbt/include/scaffold/project.py.jinja +1 -0
- dagster_dbt/include/setup.py.jinja +2 -3
- dagster_dbt/metadata_set.py +18 -0
- dagster_dbt/utils.py +136 -234
- dagster_dbt/version.py +1 -1
- dagster_dbt-0.28.4.dist-info/METADATA +47 -0
- dagster_dbt-0.28.4.dist-info/RECORD +59 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
- dagster_dbt/asset_defs.py +0 -1049
- dagster_dbt/core/resources.py +0 -527
- dagster_dbt/core/resources_v2.py +0 -1542
- dagster_dbt/core/types.py +0 -63
- dagster_dbt/dbt_resource.py +0 -220
- dagster_dbt/include/scaffold/constants.py.jinja +0 -21
- dagster_dbt/ops.py +0 -134
- dagster_dbt/types.py +0 -22
- dagster_dbt-0.23.3.dist-info/METADATA +0 -31
- dagster_dbt-0.23.3.dist-info/RECORD +0 -43
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
dagster_dbt/asset_utils.py
CHANGED
|
@@ -1,66 +1,83 @@
|
|
|
1
1
|
import hashlib
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import tempfile
|
|
2
5
|
import textwrap
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Dict,
|
|
8
|
-
FrozenSet,
|
|
9
|
-
List,
|
|
10
|
-
Mapping,
|
|
11
|
-
Optional,
|
|
12
|
-
Sequence,
|
|
13
|
-
Set,
|
|
14
|
-
Tuple,
|
|
15
|
-
cast,
|
|
16
|
-
)
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from collections.abc import Iterable, Mapping, Sequence
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import TYPE_CHECKING, AbstractSet, Annotated, Any, Final, Optional, Union # noqa: UP035
|
|
17
10
|
|
|
11
|
+
import yaml
|
|
18
12
|
from dagster import (
|
|
19
13
|
AssetCheckKey,
|
|
20
14
|
AssetCheckSpec,
|
|
15
|
+
AssetExecutionContext,
|
|
21
16
|
AssetKey,
|
|
22
17
|
AssetsDefinition,
|
|
23
18
|
AssetSelection,
|
|
19
|
+
AssetSpec,
|
|
24
20
|
AutoMaterializePolicy,
|
|
21
|
+
DagsterInvalidDefinitionError,
|
|
25
22
|
DagsterInvariantViolationError,
|
|
26
23
|
DefaultScheduleStatus,
|
|
27
|
-
|
|
28
|
-
In,
|
|
29
|
-
Nothing,
|
|
30
|
-
Out,
|
|
24
|
+
OpExecutionContext,
|
|
31
25
|
RunConfig,
|
|
32
26
|
ScheduleDefinition,
|
|
33
27
|
TableColumn,
|
|
34
28
|
TableSchema,
|
|
35
29
|
_check as check,
|
|
36
30
|
define_asset_job,
|
|
31
|
+
get_dagster_logger,
|
|
37
32
|
)
|
|
38
|
-
from dagster._core.definitions.
|
|
39
|
-
_validate_and_assign_output_names_to_check_specs,
|
|
40
|
-
)
|
|
33
|
+
from dagster._core.definitions.assets.definition.asset_spec import SYSTEM_METADATA_KEY_DAGSTER_TYPE
|
|
41
34
|
from dagster._core.definitions.metadata import TableMetadataSet
|
|
42
|
-
from dagster.
|
|
43
|
-
from dagster.
|
|
44
|
-
from
|
|
45
|
-
from
|
|
35
|
+
from dagster._core.errors import DagsterInvalidPropertyError
|
|
36
|
+
from dagster._core.types.dagster_type import Nothing
|
|
37
|
+
from dagster._record import ImportFrom, record
|
|
38
|
+
from dagster_shared.record import replace
|
|
46
39
|
|
|
47
|
-
from .
|
|
40
|
+
from dagster_dbt.dbt_project import DbtProject
|
|
41
|
+
from dagster_dbt.metadata_set import DbtMetadataSet
|
|
42
|
+
from dagster_dbt.utils import ASSET_RESOURCE_TYPES, dagster_name_fn, select_unique_ids
|
|
48
43
|
|
|
49
44
|
if TYPE_CHECKING:
|
|
50
|
-
from .dagster_dbt_translator import DagsterDbtTranslator, DbtManifestWrapper
|
|
45
|
+
from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator, DbtManifestWrapper
|
|
51
46
|
|
|
52
47
|
DAGSTER_DBT_MANIFEST_METADATA_KEY = "dagster_dbt/manifest"
|
|
53
48
|
DAGSTER_DBT_TRANSLATOR_METADATA_KEY = "dagster_dbt/dagster_dbt_translator"
|
|
49
|
+
DAGSTER_DBT_PROJECT_METADATA_KEY = "dagster_dbt/project"
|
|
54
50
|
DAGSTER_DBT_SELECT_METADATA_KEY = "dagster_dbt/select"
|
|
55
51
|
DAGSTER_DBT_EXCLUDE_METADATA_KEY = "dagster_dbt/exclude"
|
|
52
|
+
DAGSTER_DBT_SELECTOR_METADATA_KEY = "dagster_dbt/selector"
|
|
53
|
+
DAGSTER_DBT_UNIQUE_ID_METADATA_KEY = "dagster_dbt/unique_id"
|
|
54
|
+
|
|
55
|
+
DBT_DEFAULT_SELECT = "fqn:*"
|
|
56
|
+
DBT_DEFAULT_EXCLUDE = ""
|
|
57
|
+
DBT_DEFAULT_SELECTOR = ""
|
|
58
|
+
|
|
59
|
+
DBT_INDIRECT_SELECTION_ENV: Final[str] = "DBT_INDIRECT_SELECTION"
|
|
60
|
+
DBT_EMPTY_INDIRECT_SELECTION: Final[str] = "empty"
|
|
61
|
+
|
|
62
|
+
# Threshold for switching to selector file to avoid CLI argument length limits
|
|
63
|
+
# https://github.com/dagster-io/dagster/issues/16997
|
|
64
|
+
_SELECTION_ARGS_THRESHOLD: Final[int] = 200
|
|
65
|
+
|
|
66
|
+
DUPLICATE_ASSET_KEY_ERROR_MESSAGE = (
|
|
67
|
+
"The following dbt resources are configured with identical Dagster asset keys."
|
|
68
|
+
" Please ensure that each dbt resource generates a unique Dagster asset key."
|
|
69
|
+
" See the reference for configuring Dagster asset keys for your dbt project:"
|
|
70
|
+
" https://docs.dagster.io/integrations/libraries/dbt/reference#customizing-asset-keys."
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
logger = get_dagster_logger()
|
|
56
74
|
|
|
57
75
|
|
|
58
76
|
def get_asset_key_for_model(dbt_assets: Sequence[AssetsDefinition], model_name: str) -> AssetKey:
|
|
59
77
|
"""Return the corresponding Dagster asset key for a dbt model, seed, or snapshot.
|
|
60
78
|
|
|
61
79
|
Args:
|
|
62
|
-
dbt_assets (AssetsDefinition): An AssetsDefinition object produced by
|
|
63
|
-
load_assets_from_dbt_project, load_assets_from_dbt_manifest, or @dbt_assets.
|
|
80
|
+
dbt_assets (AssetsDefinition): An AssetsDefinition object produced by @dbt_assets.
|
|
64
81
|
model_name (str): The name of the dbt model, seed, or snapshot.
|
|
65
82
|
|
|
66
83
|
Returns:
|
|
@@ -84,18 +101,24 @@ def get_asset_key_for_model(dbt_assets: Sequence[AssetsDefinition], model_name:
|
|
|
84
101
|
check.sequence_param(dbt_assets, "dbt_assets", of_type=AssetsDefinition)
|
|
85
102
|
check.str_param(model_name, "model_name")
|
|
86
103
|
|
|
87
|
-
manifest, dagster_dbt_translator = get_manifest_and_translator_from_dbt_assets(
|
|
104
|
+
manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
|
|
105
|
+
dbt_assets
|
|
106
|
+
)
|
|
88
107
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
for value in manifest["nodes"].
|
|
108
|
+
matching_model_ids = [
|
|
109
|
+
unique_id
|
|
110
|
+
for unique_id, value in manifest["nodes"].items()
|
|
92
111
|
if value["name"] == model_name and value["resource_type"] in ASSET_RESOURCE_TYPES
|
|
93
112
|
]
|
|
94
113
|
|
|
95
|
-
if len(
|
|
114
|
+
if len(matching_model_ids) == 0:
|
|
96
115
|
raise KeyError(f"Could not find a dbt model, seed, or snapshot with name: {model_name}")
|
|
97
116
|
|
|
98
|
-
return dagster_dbt_translator.
|
|
117
|
+
return dagster_dbt_translator.get_asset_spec(
|
|
118
|
+
manifest,
|
|
119
|
+
next(iter(matching_model_ids)),
|
|
120
|
+
dbt_project,
|
|
121
|
+
).key
|
|
99
122
|
|
|
100
123
|
|
|
101
124
|
def get_asset_keys_by_output_name_for_source(
|
|
@@ -138,18 +161,24 @@ def get_asset_keys_by_output_name_for_source(
|
|
|
138
161
|
check.sequence_param(dbt_assets, "dbt_assets", of_type=AssetsDefinition)
|
|
139
162
|
check.str_param(source_name, "source_name")
|
|
140
163
|
|
|
141
|
-
manifest, dagster_dbt_translator = get_manifest_and_translator_from_dbt_assets(
|
|
164
|
+
manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
|
|
165
|
+
dbt_assets
|
|
166
|
+
)
|
|
142
167
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
168
|
+
matching = {
|
|
169
|
+
unique_id: value
|
|
170
|
+
for unique_id, value in manifest["sources"].items()
|
|
171
|
+
if value["source_name"] == source_name
|
|
172
|
+
}
|
|
146
173
|
|
|
147
|
-
if len(
|
|
174
|
+
if len(matching) == 0:
|
|
148
175
|
raise KeyError(f"Could not find a dbt source with name: {source_name}")
|
|
149
176
|
|
|
150
177
|
return {
|
|
151
|
-
dagster_name_fn(value): dagster_dbt_translator.
|
|
152
|
-
|
|
178
|
+
dagster_name_fn(value): dagster_dbt_translator.get_asset_spec(
|
|
179
|
+
manifest, unique_id, dbt_project
|
|
180
|
+
).key
|
|
181
|
+
for unique_id, value in matching.items()
|
|
153
182
|
}
|
|
154
183
|
|
|
155
184
|
|
|
@@ -194,8 +223,9 @@ def get_asset_key_for_source(dbt_assets: Sequence[AssetsDefinition], source_name
|
|
|
194
223
|
|
|
195
224
|
def build_dbt_asset_selection(
|
|
196
225
|
dbt_assets: Sequence[AssetsDefinition],
|
|
197
|
-
dbt_select: str =
|
|
198
|
-
dbt_exclude: Optional[str] =
|
|
226
|
+
dbt_select: str = DBT_DEFAULT_SELECT,
|
|
227
|
+
dbt_exclude: Optional[str] = DBT_DEFAULT_EXCLUDE,
|
|
228
|
+
dbt_selector: Optional[str] = DBT_DEFAULT_SELECTOR,
|
|
199
229
|
) -> AssetSelection:
|
|
200
230
|
"""Build an asset selection for a dbt selection string.
|
|
201
231
|
|
|
@@ -249,24 +279,35 @@ def build_dbt_asset_selection(
|
|
|
249
279
|
bar_plus_and_foo_and_downstream_selection = bar_plus_and_foo_selection.downstream()
|
|
250
280
|
|
|
251
281
|
"""
|
|
252
|
-
manifest, dagster_dbt_translator = get_manifest_and_translator_from_dbt_assets(
|
|
282
|
+
manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
|
|
283
|
+
dbt_assets
|
|
284
|
+
)
|
|
253
285
|
[dbt_assets_definition] = dbt_assets
|
|
254
286
|
|
|
255
287
|
dbt_assets_select = dbt_assets_definition.op.tags[DAGSTER_DBT_SELECT_METADATA_KEY]
|
|
256
|
-
dbt_assets_exclude = dbt_assets_definition.op.tags.get(
|
|
288
|
+
dbt_assets_exclude = dbt_assets_definition.op.tags.get(
|
|
289
|
+
DAGSTER_DBT_EXCLUDE_METADATA_KEY, DBT_DEFAULT_EXCLUDE
|
|
290
|
+
)
|
|
291
|
+
dbt_assets_selector = dbt_assets_definition.op.tags.get(
|
|
292
|
+
DAGSTER_DBT_SELECTOR_METADATA_KEY, DBT_DEFAULT_SELECTOR
|
|
293
|
+
)
|
|
257
294
|
|
|
258
|
-
from .dbt_manifest_asset_selection import DbtManifestAssetSelection
|
|
295
|
+
from dagster_dbt.dbt_manifest_asset_selection import DbtManifestAssetSelection
|
|
259
296
|
|
|
260
297
|
return DbtManifestAssetSelection.build(
|
|
261
298
|
manifest=manifest,
|
|
262
299
|
dagster_dbt_translator=dagster_dbt_translator,
|
|
263
300
|
select=dbt_assets_select,
|
|
264
301
|
exclude=dbt_assets_exclude,
|
|
302
|
+
selector=dbt_assets_selector,
|
|
303
|
+
project=dbt_project,
|
|
265
304
|
) & DbtManifestAssetSelection.build(
|
|
266
305
|
manifest=manifest,
|
|
267
306
|
dagster_dbt_translator=dagster_dbt_translator,
|
|
268
307
|
select=dbt_select,
|
|
269
|
-
exclude=dbt_exclude,
|
|
308
|
+
exclude=dbt_exclude or DBT_DEFAULT_EXCLUDE,
|
|
309
|
+
selector=dbt_selector or DBT_DEFAULT_SELECTOR,
|
|
310
|
+
project=dbt_project,
|
|
270
311
|
)
|
|
271
312
|
|
|
272
313
|
|
|
@@ -274,8 +315,9 @@ def build_schedule_from_dbt_selection(
|
|
|
274
315
|
dbt_assets: Sequence[AssetsDefinition],
|
|
275
316
|
job_name: str,
|
|
276
317
|
cron_schedule: str,
|
|
277
|
-
dbt_select: str =
|
|
278
|
-
dbt_exclude: Optional[str] =
|
|
318
|
+
dbt_select: str = DBT_DEFAULT_SELECT,
|
|
319
|
+
dbt_exclude: Optional[str] = DBT_DEFAULT_EXCLUDE,
|
|
320
|
+
dbt_selector: str = DBT_DEFAULT_SELECTOR,
|
|
279
321
|
schedule_name: Optional[str] = None,
|
|
280
322
|
tags: Optional[Mapping[str, str]] = None,
|
|
281
323
|
config: Optional[RunConfig] = None,
|
|
@@ -292,6 +334,7 @@ def build_schedule_from_dbt_selection(
|
|
|
292
334
|
cron_schedule (str): The cron schedule to define the schedule.
|
|
293
335
|
dbt_select (str): A dbt selection string to specify a set of dbt resources.
|
|
294
336
|
dbt_exclude (Optional[str]): A dbt selection string to exclude a set of dbt resources.
|
|
337
|
+
dbt_selector (str): A dbt selector to select resources to materialize.
|
|
295
338
|
schedule_name (Optional[str]): The name of the dbt schedule to create.
|
|
296
339
|
tags (Optional[Mapping[str, str]]): A dictionary of tags (string key-value pairs) to attach
|
|
297
340
|
to the scheduled runs.
|
|
@@ -327,7 +370,8 @@ def build_schedule_from_dbt_selection(
|
|
|
327
370
|
selection=build_dbt_asset_selection(
|
|
328
371
|
dbt_assets,
|
|
329
372
|
dbt_select=dbt_select,
|
|
330
|
-
dbt_exclude=dbt_exclude,
|
|
373
|
+
dbt_exclude=dbt_exclude or DBT_DEFAULT_EXCLUDE,
|
|
374
|
+
dbt_selector=dbt_selector,
|
|
331
375
|
),
|
|
332
376
|
config=config,
|
|
333
377
|
tags=tags,
|
|
@@ -339,31 +383,131 @@ def build_schedule_from_dbt_selection(
|
|
|
339
383
|
|
|
340
384
|
def get_manifest_and_translator_from_dbt_assets(
|
|
341
385
|
dbt_assets: Sequence[AssetsDefinition],
|
|
342
|
-
) ->
|
|
386
|
+
) -> tuple[Mapping[str, Any], "DagsterDbtTranslator", Optional[DbtProject]]:
|
|
343
387
|
check.invariant(len(dbt_assets) == 1, "Exactly one dbt AssetsDefinition is required")
|
|
344
388
|
dbt_assets_def = dbt_assets[0]
|
|
345
389
|
metadata_by_key = dbt_assets_def.metadata_by_key or {}
|
|
346
390
|
first_asset_key = next(iter(dbt_assets_def.metadata_by_key.keys()))
|
|
347
391
|
first_metadata = metadata_by_key.get(first_asset_key, {})
|
|
348
|
-
manifest_wrapper: Optional[
|
|
392
|
+
manifest_wrapper: Optional[DbtManifestWrapper] = first_metadata.get(
|
|
349
393
|
DAGSTER_DBT_MANIFEST_METADATA_KEY
|
|
350
394
|
)
|
|
395
|
+
project = first_metadata.get(DAGSTER_DBT_PROJECT_METADATA_KEY)
|
|
351
396
|
if manifest_wrapper is None:
|
|
352
397
|
raise DagsterInvariantViolationError(
|
|
353
398
|
f"Expected to find dbt manifest metadata on asset {first_asset_key.to_user_string()},"
|
|
354
|
-
" but did not. Did you pass in assets that weren't generated by"
|
|
355
|
-
" load_assets_from_dbt_project, load_assets_from_dbt_manifest, or @dbt_assets?"
|
|
399
|
+
" but did not. Did you pass in assets that weren't generated by @dbt_assets?"
|
|
356
400
|
)
|
|
357
401
|
|
|
358
402
|
dagster_dbt_translator = first_metadata.get(DAGSTER_DBT_TRANSLATOR_METADATA_KEY)
|
|
359
403
|
if dagster_dbt_translator is None:
|
|
360
404
|
raise DagsterInvariantViolationError(
|
|
361
405
|
f"Expected to find dbt translator metadata on asset {first_asset_key.to_user_string()},"
|
|
362
|
-
" but did not. Did you pass in assets that weren't generated by"
|
|
363
|
-
" load_assets_from_dbt_project, load_assets_from_dbt_manifest, or @dbt_assets?"
|
|
406
|
+
" but did not. Did you pass in assets that weren't generated by @dbt_assets?"
|
|
364
407
|
)
|
|
365
408
|
|
|
366
|
-
return manifest_wrapper.manifest, dagster_dbt_translator
|
|
409
|
+
return manifest_wrapper.manifest, dagster_dbt_translator, project
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
def get_asset_keys_to_resource_props(
|
|
413
|
+
manifest: Mapping[str, Any],
|
|
414
|
+
translator: "DagsterDbtTranslator",
|
|
415
|
+
) -> Mapping[AssetKey, Mapping[str, Any]]:
|
|
416
|
+
return {
|
|
417
|
+
translator.get_asset_key(node): node
|
|
418
|
+
for node in manifest["nodes"].values()
|
|
419
|
+
if node["resource_type"] in ASSET_RESOURCE_TYPES
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
@record
|
|
424
|
+
class DbtCliInvocationPartialParams:
|
|
425
|
+
manifest: Mapping[str, Any]
|
|
426
|
+
dagster_dbt_translator: Annotated[
|
|
427
|
+
"DagsterDbtTranslator", ImportFrom("dagster_dbt.dagster_dbt_translator")
|
|
428
|
+
]
|
|
429
|
+
selection_args: Sequence[str]
|
|
430
|
+
indirect_selection: Optional[str]
|
|
431
|
+
dbt_project: Optional[DbtProject]
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def get_updated_cli_invocation_params_for_context(
|
|
435
|
+
context: Optional[Union[OpExecutionContext, AssetExecutionContext]],
|
|
436
|
+
manifest: Mapping[str, Any],
|
|
437
|
+
dagster_dbt_translator: "DagsterDbtTranslator",
|
|
438
|
+
) -> DbtCliInvocationPartialParams:
|
|
439
|
+
try:
|
|
440
|
+
assets_def = context.assets_def if context else None
|
|
441
|
+
except DagsterInvalidPropertyError:
|
|
442
|
+
# If assets_def is None in an OpExecutionContext, we raise a DagsterInvalidPropertyError,
|
|
443
|
+
# but we don't want to raise the error here.
|
|
444
|
+
assets_def = None
|
|
445
|
+
|
|
446
|
+
selection_args: list[str] = []
|
|
447
|
+
indirect_selection = os.getenv(DBT_INDIRECT_SELECTION_ENV, None)
|
|
448
|
+
dbt_project = None
|
|
449
|
+
if context and assets_def is not None:
|
|
450
|
+
manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
|
|
451
|
+
[assets_def]
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
# Get project_dir from dbt_project if available
|
|
455
|
+
project_dir = Path(dbt_project.project_dir) if dbt_project else None
|
|
456
|
+
target_project = dbt_project
|
|
457
|
+
|
|
458
|
+
selection_args, indirect_selection_override = get_subset_selection_for_context(
|
|
459
|
+
context=context,
|
|
460
|
+
manifest=manifest,
|
|
461
|
+
select=context.op.tags.get(DAGSTER_DBT_SELECT_METADATA_KEY),
|
|
462
|
+
exclude=context.op.tags.get(DAGSTER_DBT_EXCLUDE_METADATA_KEY),
|
|
463
|
+
selector=context.op.tags.get(DAGSTER_DBT_SELECTOR_METADATA_KEY),
|
|
464
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
465
|
+
current_dbt_indirect_selection_env=indirect_selection,
|
|
466
|
+
)
|
|
467
|
+
if (
|
|
468
|
+
selection_args[0] == "--select"
|
|
469
|
+
and project_dir
|
|
470
|
+
and len(resources := selection_args[1].split(" ")) > _SELECTION_ARGS_THRESHOLD
|
|
471
|
+
):
|
|
472
|
+
temp_project_dir = tempfile.mkdtemp()
|
|
473
|
+
shutil.copytree(project_dir, temp_project_dir, dirs_exist_ok=True)
|
|
474
|
+
selectors_path = Path(temp_project_dir) / "selectors.yml"
|
|
475
|
+
|
|
476
|
+
# Delete any existing selectors, we need to create our own
|
|
477
|
+
if selectors_path.exists():
|
|
478
|
+
selectors_path.unlink()
|
|
479
|
+
|
|
480
|
+
selector_name = f"dagster_run_{context.run_id}"
|
|
481
|
+
temp_selectors = {
|
|
482
|
+
"selectors": [
|
|
483
|
+
{
|
|
484
|
+
"name": selector_name,
|
|
485
|
+
"definition": {"union": list(resources)},
|
|
486
|
+
}
|
|
487
|
+
]
|
|
488
|
+
}
|
|
489
|
+
selectors_path.write_text(yaml.safe_dump(temp_selectors))
|
|
490
|
+
logger.info(
|
|
491
|
+
f"DBT selection of {len(resources)} resources exceeds threshold of {_SELECTION_ARGS_THRESHOLD}. "
|
|
492
|
+
"This may exceed system argument length limits. "
|
|
493
|
+
f"Executing materialization against temporary copy of DBT project at {temp_project_dir} with ephemeral selector."
|
|
494
|
+
)
|
|
495
|
+
selection_args = ["--selector", selector_name]
|
|
496
|
+
target_project = replace(dbt_project, project_dir=Path(temp_project_dir))
|
|
497
|
+
|
|
498
|
+
indirect_selection = (
|
|
499
|
+
indirect_selection_override if indirect_selection_override else indirect_selection
|
|
500
|
+
)
|
|
501
|
+
else:
|
|
502
|
+
target_project = dbt_project
|
|
503
|
+
|
|
504
|
+
return DbtCliInvocationPartialParams(
|
|
505
|
+
manifest=manifest,
|
|
506
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
507
|
+
selection_args=selection_args,
|
|
508
|
+
indirect_selection=indirect_selection,
|
|
509
|
+
dbt_project=target_project,
|
|
510
|
+
)
|
|
367
511
|
|
|
368
512
|
|
|
369
513
|
###################
|
|
@@ -407,24 +551,40 @@ def default_asset_key_fn(dbt_resource_props: Mapping[str, Any]) -> AssetKey:
|
|
|
407
551
|
def default_metadata_from_dbt_resource_props(
|
|
408
552
|
dbt_resource_props: Mapping[str, Any],
|
|
409
553
|
) -> Mapping[str, Any]:
|
|
410
|
-
|
|
554
|
+
column_schema = None
|
|
411
555
|
columns = dbt_resource_props.get("columns", {})
|
|
412
556
|
if len(columns) > 0:
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
description=column_info.get("description"),
|
|
421
|
-
)
|
|
422
|
-
for column_name, column_info in columns.items()
|
|
423
|
-
]
|
|
557
|
+
column_schema = TableSchema(
|
|
558
|
+
columns=[
|
|
559
|
+
TableColumn(
|
|
560
|
+
name=column_name,
|
|
561
|
+
type=column_info.get("data_type") or "?",
|
|
562
|
+
description=column_info.get("description"),
|
|
563
|
+
tags={tag_name: "" for tag_name in column_info.get("tags", [])},
|
|
424
564
|
)
|
|
425
|
-
|
|
565
|
+
for column_name, column_info in columns.items()
|
|
566
|
+
]
|
|
426
567
|
)
|
|
427
|
-
|
|
568
|
+
|
|
569
|
+
relation_parts = [
|
|
570
|
+
relation_part
|
|
571
|
+
for relation_part in [
|
|
572
|
+
dbt_resource_props.get("database"),
|
|
573
|
+
dbt_resource_props.get("schema"),
|
|
574
|
+
dbt_resource_props.get("alias"),
|
|
575
|
+
]
|
|
576
|
+
if relation_part
|
|
577
|
+
]
|
|
578
|
+
relation_name = ".".join(relation_parts) if relation_parts else None
|
|
579
|
+
|
|
580
|
+
materialization_type = dbt_resource_props.get("config", {}).get("materialized")
|
|
581
|
+
return {
|
|
582
|
+
**DbtMetadataSet(materialization_type=materialization_type),
|
|
583
|
+
**TableMetadataSet(
|
|
584
|
+
column_schema=column_schema,
|
|
585
|
+
table_name=relation_name,
|
|
586
|
+
),
|
|
587
|
+
}
|
|
428
588
|
|
|
429
589
|
|
|
430
590
|
def default_group_from_dbt_resource_props(dbt_resource_props: Mapping[str, Any]) -> Optional[str]:
|
|
@@ -458,16 +618,6 @@ def group_from_dbt_resource_props_fallback_to_directory(
|
|
|
458
618
|
|
|
459
619
|
Args:
|
|
460
620
|
dbt_resource_props (Mapping[str, Any]): A dictionary representing the dbt resource.
|
|
461
|
-
|
|
462
|
-
Examples:
|
|
463
|
-
.. code-block:: python
|
|
464
|
-
|
|
465
|
-
from dagster_dbt import group_from_dbt_resource_props_fallback_to_directory
|
|
466
|
-
|
|
467
|
-
dbt_assets = load_assets_from_dbt_manifest(
|
|
468
|
-
manifest=manifest,
|
|
469
|
-
node_info_to_group_fn=group_from_dbt_resource_props_fallback_to_directory,
|
|
470
|
-
)
|
|
471
621
|
"""
|
|
472
622
|
group_name = default_group_from_dbt_resource_props(dbt_resource_props)
|
|
473
623
|
if group_name is not None:
|
|
@@ -489,48 +639,14 @@ def default_owners_from_dbt_resource_props(
|
|
|
489
639
|
if owners_config:
|
|
490
640
|
return owners_config
|
|
491
641
|
|
|
492
|
-
owner: Optional[str] = (
|
|
642
|
+
owner: Optional[Union[str, Sequence[str]]] = (
|
|
643
|
+
(dbt_resource_props.get("group") or {}).get("owner", {}).get("email")
|
|
644
|
+
)
|
|
493
645
|
|
|
494
646
|
if not owner:
|
|
495
647
|
return None
|
|
496
648
|
|
|
497
|
-
return [owner]
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
def default_freshness_policy_fn(dbt_resource_props: Mapping[str, Any]) -> Optional[FreshnessPolicy]:
|
|
501
|
-
dagster_metadata = dbt_resource_props.get("meta", {}).get("dagster", {})
|
|
502
|
-
freshness_policy_config = dagster_metadata.get("freshness_policy", {})
|
|
503
|
-
|
|
504
|
-
freshness_policy = _legacy_freshness_policy_fn(freshness_policy_config)
|
|
505
|
-
if freshness_policy:
|
|
506
|
-
return freshness_policy
|
|
507
|
-
|
|
508
|
-
legacy_freshness_policy_config = dbt_resource_props["config"].get(
|
|
509
|
-
"dagster_freshness_policy", {}
|
|
510
|
-
)
|
|
511
|
-
legacy_freshness_policy = _legacy_freshness_policy_fn(legacy_freshness_policy_config)
|
|
512
|
-
|
|
513
|
-
if legacy_freshness_policy:
|
|
514
|
-
deprecation_warning(
|
|
515
|
-
"dagster_freshness_policy",
|
|
516
|
-
"0.21.0",
|
|
517
|
-
"Instead, configure a Dagster freshness policy on a dbt model using"
|
|
518
|
-
" +meta.dagster.freshness_policy.",
|
|
519
|
-
)
|
|
520
|
-
|
|
521
|
-
return legacy_freshness_policy
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
def _legacy_freshness_policy_fn(
|
|
525
|
-
freshness_policy_config: Mapping[str, Any],
|
|
526
|
-
) -> Optional[FreshnessPolicy]:
|
|
527
|
-
if freshness_policy_config:
|
|
528
|
-
return FreshnessPolicy(
|
|
529
|
-
maximum_lag_minutes=float(freshness_policy_config["maximum_lag_minutes"]),
|
|
530
|
-
cron_schedule=freshness_policy_config.get("cron_schedule"),
|
|
531
|
-
cron_schedule_timezone=freshness_policy_config.get("cron_schedule_timezone"),
|
|
532
|
-
)
|
|
533
|
-
return None
|
|
649
|
+
return [owner] if isinstance(owner, str) else owner
|
|
534
650
|
|
|
535
651
|
|
|
536
652
|
def default_auto_materialize_policy_fn(
|
|
@@ -539,31 +655,6 @@ def default_auto_materialize_policy_fn(
|
|
|
539
655
|
dagster_metadata = dbt_resource_props.get("meta", {}).get("dagster", {})
|
|
540
656
|
auto_materialize_policy_config = dagster_metadata.get("auto_materialize_policy", {})
|
|
541
657
|
|
|
542
|
-
auto_materialize_policy = _auto_materialize_policy_fn(auto_materialize_policy_config)
|
|
543
|
-
if auto_materialize_policy:
|
|
544
|
-
return auto_materialize_policy
|
|
545
|
-
|
|
546
|
-
legacy_auto_materialize_policy_config = dbt_resource_props["config"].get(
|
|
547
|
-
"dagster_auto_materialize_policy", {}
|
|
548
|
-
)
|
|
549
|
-
legacy_auto_materialize_policy = _auto_materialize_policy_fn(
|
|
550
|
-
legacy_auto_materialize_policy_config
|
|
551
|
-
)
|
|
552
|
-
|
|
553
|
-
if legacy_auto_materialize_policy:
|
|
554
|
-
deprecation_warning(
|
|
555
|
-
"dagster_auto_materialize_policy",
|
|
556
|
-
"0.21.0",
|
|
557
|
-
"Instead, configure a Dagster auto-materialize policy on a dbt model using"
|
|
558
|
-
" +meta.dagster.auto_materialize_policy.",
|
|
559
|
-
)
|
|
560
|
-
|
|
561
|
-
return legacy_auto_materialize_policy
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
def _auto_materialize_policy_fn(
|
|
565
|
-
auto_materialize_policy_config: Mapping[str, Any],
|
|
566
|
-
) -> Optional[AutoMaterializePolicy]:
|
|
567
658
|
if auto_materialize_policy_config.get("type") == "eager":
|
|
568
659
|
return AutoMaterializePolicy.eager()
|
|
569
660
|
elif auto_materialize_policy_config.get("type") == "lazy":
|
|
@@ -576,56 +667,62 @@ def default_description_fn(dbt_resource_props: Mapping[str, Any], display_raw_sq
|
|
|
576
667
|
dbt_resource_props.get("raw_sql") or dbt_resource_props.get("raw_code", ""), " "
|
|
577
668
|
)
|
|
578
669
|
description_sections = [
|
|
579
|
-
dbt_resource_props
|
|
670
|
+
dbt_resource_props.get("description")
|
|
580
671
|
or f"dbt {dbt_resource_props['resource_type']} {dbt_resource_props['name']}",
|
|
581
672
|
]
|
|
582
673
|
if display_raw_sql:
|
|
583
|
-
description_sections.append(f"#### Raw SQL:\n
|
|
674
|
+
description_sections.append(f"#### Raw SQL:\n```sql\n{code_block}\n```")
|
|
584
675
|
return "\n\n".join(filter(None, description_sections))
|
|
585
676
|
|
|
586
677
|
|
|
587
678
|
def default_asset_check_fn(
|
|
588
679
|
manifest: Mapping[str, Any],
|
|
589
|
-
dbt_nodes: Mapping[str, Any],
|
|
590
680
|
dagster_dbt_translator: "DagsterDbtTranslator",
|
|
591
681
|
asset_key: AssetKey,
|
|
592
682
|
test_unique_id: str,
|
|
683
|
+
project: Optional[DbtProject],
|
|
593
684
|
) -> Optional[AssetCheckSpec]:
|
|
594
685
|
if not dagster_dbt_translator.settings.enable_asset_checks:
|
|
595
686
|
return None
|
|
596
687
|
|
|
597
|
-
test_resource_props =
|
|
598
|
-
parent_unique_ids:
|
|
688
|
+
test_resource_props = get_node(manifest, test_unique_id)
|
|
689
|
+
parent_unique_ids: set[str] = set(manifest["parent_map"].get(test_unique_id, []))
|
|
599
690
|
|
|
600
691
|
asset_check_key = get_asset_check_key_for_test(
|
|
601
692
|
manifest=manifest,
|
|
602
693
|
dagster_dbt_translator=dagster_dbt_translator,
|
|
603
694
|
test_unique_id=test_unique_id,
|
|
695
|
+
project=project,
|
|
604
696
|
)
|
|
605
697
|
|
|
606
698
|
if not (asset_check_key and asset_check_key.asset_key == asset_key):
|
|
607
699
|
return None
|
|
608
700
|
|
|
609
701
|
additional_deps = {
|
|
610
|
-
dagster_dbt_translator.
|
|
702
|
+
dagster_dbt_translator.get_asset_spec(manifest, parent_id, project).key
|
|
611
703
|
for parent_id in parent_unique_ids
|
|
612
704
|
}
|
|
613
705
|
additional_deps.discard(asset_key)
|
|
614
706
|
|
|
707
|
+
severity = test_resource_props.get("config", {}).get("severity", "error")
|
|
708
|
+
blocking = severity.lower() == "error"
|
|
709
|
+
|
|
615
710
|
return AssetCheckSpec(
|
|
616
711
|
name=test_resource_props["name"],
|
|
617
712
|
asset=asset_key,
|
|
618
713
|
description=test_resource_props.get("meta", {}).get("description"),
|
|
619
714
|
additional_deps=additional_deps,
|
|
715
|
+
metadata={DAGSTER_DBT_UNIQUE_ID_METADATA_KEY: test_unique_id},
|
|
716
|
+
blocking=blocking,
|
|
620
717
|
)
|
|
621
718
|
|
|
622
719
|
|
|
623
|
-
def default_code_version_fn(dbt_resource_props: Mapping[str, Any]) -> str:
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
).
|
|
720
|
+
def default_code_version_fn(dbt_resource_props: Mapping[str, Any]) -> Optional[str]:
|
|
721
|
+
code: Optional[str] = dbt_resource_props.get("raw_sql") or dbt_resource_props.get("raw_code")
|
|
722
|
+
if code:
|
|
723
|
+
return hashlib.sha1(code.encode("utf-8")).hexdigest()
|
|
724
|
+
|
|
725
|
+
return dbt_resource_props.get("checksum", {}).get("checksum")
|
|
629
726
|
|
|
630
727
|
|
|
631
728
|
###################
|
|
@@ -641,194 +738,181 @@ def is_non_asset_node(dbt_resource_props: Mapping[str, Any]):
|
|
|
641
738
|
[
|
|
642
739
|
resource_type == "metric",
|
|
643
740
|
resource_type == "semantic_model",
|
|
741
|
+
resource_type == "saved_query",
|
|
644
742
|
resource_type == "model"
|
|
645
743
|
and dbt_resource_props.get("config", {}).get("materialized") == "ephemeral",
|
|
646
744
|
]
|
|
647
745
|
)
|
|
648
746
|
|
|
649
747
|
|
|
650
|
-
def
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
asset_resource_types: List[str],
|
|
654
|
-
) -> Mapping[str, FrozenSet[str]]:
|
|
655
|
-
def _valid_parent_node(dbt_resource_props):
|
|
656
|
-
# sources are valid parents, but not assets
|
|
657
|
-
return dbt_resource_props["resource_type"] in asset_resource_types + ["source"]
|
|
658
|
-
|
|
659
|
-
asset_deps: Dict[str, Set[str]] = {}
|
|
660
|
-
for unique_id in selected_unique_ids:
|
|
661
|
-
dbt_resource_props = dbt_nodes[unique_id]
|
|
662
|
-
node_resource_type = dbt_resource_props["resource_type"]
|
|
663
|
-
|
|
664
|
-
# skip non-assets, such as semantic models, metrics, tests, and ephemeral models
|
|
665
|
-
if is_non_asset_node(dbt_resource_props) or node_resource_type not in asset_resource_types:
|
|
666
|
-
continue
|
|
667
|
-
|
|
668
|
-
asset_deps[unique_id] = set()
|
|
669
|
-
for parent_unique_id in dbt_resource_props.get("depends_on", {}).get("nodes", []):
|
|
670
|
-
parent_node_info = dbt_nodes[parent_unique_id]
|
|
671
|
-
# for metrics or ephemeral dbt models, BFS to find valid parents
|
|
672
|
-
if is_non_asset_node(parent_node_info):
|
|
673
|
-
visited = set()
|
|
674
|
-
replaced_parent_ids = set()
|
|
675
|
-
# make a copy to avoid mutating the actual dictionary
|
|
676
|
-
queue = list(parent_node_info.get("depends_on", {}).get("nodes", []))
|
|
677
|
-
while queue:
|
|
678
|
-
candidate_parent_id = queue.pop()
|
|
679
|
-
if candidate_parent_id in visited:
|
|
680
|
-
continue
|
|
681
|
-
visited.add(candidate_parent_id)
|
|
748
|
+
def is_valid_upstream_node(dbt_resource_props: Mapping[str, Any]) -> bool:
|
|
749
|
+
# sources are valid parents, but not assets
|
|
750
|
+
return dbt_resource_props["resource_type"] in ASSET_RESOURCE_TYPES + ["source"]
|
|
682
751
|
|
|
683
|
-
candidate_parent_info = dbt_nodes[candidate_parent_id]
|
|
684
|
-
if is_non_asset_node(candidate_parent_info):
|
|
685
|
-
queue.extend(candidate_parent_info.get("depends_on", {}).get("nodes", []))
|
|
686
|
-
elif _valid_parent_node(candidate_parent_info):
|
|
687
|
-
replaced_parent_ids.add(candidate_parent_id)
|
|
688
752
|
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
753
|
+
def get_upstream_unique_ids(
|
|
754
|
+
manifest: Mapping[str, Any],
|
|
755
|
+
dbt_resource_props: Mapping[str, Any],
|
|
756
|
+
) -> AbstractSet[str]:
|
|
757
|
+
upstreams = set()
|
|
758
|
+
for parent_unique_id in dbt_resource_props.get("depends_on", {}).get("nodes", []):
|
|
759
|
+
parent_node_info = get_node(manifest, parent_unique_id)
|
|
760
|
+
# for metrics or ephemeral dbt models, BFS to find valid parents
|
|
761
|
+
if is_non_asset_node(parent_node_info):
|
|
762
|
+
visited = set()
|
|
763
|
+
replaced_parent_ids = set()
|
|
764
|
+
# make a copy to avoid mutating the actual dictionary
|
|
765
|
+
queue = list(parent_node_info.get("depends_on", {}).get("nodes", []))
|
|
766
|
+
while queue:
|
|
767
|
+
candidate_parent_id = queue.pop()
|
|
768
|
+
if candidate_parent_id in visited:
|
|
769
|
+
continue
|
|
770
|
+
visited.add(candidate_parent_id)
|
|
771
|
+
|
|
772
|
+
candidate_parent_info = get_node(manifest, candidate_parent_id)
|
|
773
|
+
if is_non_asset_node(candidate_parent_info):
|
|
774
|
+
queue.extend(candidate_parent_info.get("depends_on", {}).get("nodes", []))
|
|
775
|
+
elif is_valid_upstream_node(candidate_parent_info):
|
|
776
|
+
replaced_parent_ids.add(candidate_parent_id)
|
|
777
|
+
|
|
778
|
+
upstreams |= replaced_parent_ids
|
|
779
|
+
# ignore nodes which are not assets / sources
|
|
780
|
+
elif is_valid_upstream_node(parent_node_info):
|
|
781
|
+
upstreams.add(parent_unique_id)
|
|
782
|
+
|
|
783
|
+
return upstreams
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def _build_child_map(manifest: Mapping[str, Any]) -> Mapping[str, AbstractSet[str]]:
|
|
787
|
+
"""Manifests produced by early versions of dbt Fusion do not contain a child map, so we need to build it manually."""
|
|
788
|
+
if manifest.get("child_map"):
|
|
789
|
+
return manifest["child_map"]
|
|
790
|
+
|
|
791
|
+
child_map = defaultdict(set)
|
|
792
|
+
for unique_id, node in manifest["nodes"].items():
|
|
793
|
+
for upstream_unique_id in get_upstream_unique_ids(manifest, node):
|
|
794
|
+
child_map[upstream_unique_id].add(unique_id)
|
|
795
|
+
return child_map
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def build_dbt_specs(
|
|
799
|
+
*,
|
|
800
|
+
translator: "DagsterDbtTranslator",
|
|
801
|
+
manifest: Mapping[str, Any],
|
|
802
|
+
select: str,
|
|
803
|
+
exclude: str,
|
|
804
|
+
selector: str,
|
|
805
|
+
io_manager_key: Optional[str],
|
|
806
|
+
project: Optional[DbtProject],
|
|
807
|
+
) -> tuple[Sequence[AssetSpec], Sequence[AssetCheckSpec]]:
|
|
808
|
+
selected_unique_ids = select_unique_ids(
|
|
809
|
+
select=select, exclude=exclude, selector=selector, project=project, manifest_json=manifest
|
|
810
|
+
)
|
|
693
811
|
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
}
|
|
812
|
+
specs: list[AssetSpec] = []
|
|
813
|
+
check_specs: dict[str, AssetCheckSpec] = {}
|
|
814
|
+
key_by_unique_id: dict[str, AssetKey] = {}
|
|
697
815
|
|
|
698
|
-
|
|
816
|
+
child_map = _build_child_map(manifest)
|
|
817
|
+
for unique_id in selected_unique_ids:
|
|
818
|
+
resource_props = get_node(manifest, unique_id)
|
|
819
|
+
resource_type = resource_props["resource_type"]
|
|
699
820
|
|
|
821
|
+
# skip non-assets, such as semantic models, metrics, tests, and ephemeral models
|
|
822
|
+
if is_non_asset_node(resource_props) or resource_type not in ASSET_RESOURCE_TYPES:
|
|
823
|
+
continue
|
|
700
824
|
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
dagster_dbt_translator: "DagsterDbtTranslator",
|
|
707
|
-
) -> Tuple[
|
|
708
|
-
Dict[AssetKey, Set[AssetKey]],
|
|
709
|
-
Dict[AssetKey, Tuple[str, In]],
|
|
710
|
-
Dict[AssetKey, Tuple[str, Out]],
|
|
711
|
-
Dict[AssetKey, str],
|
|
712
|
-
Dict[AssetKey, FreshnessPolicy],
|
|
713
|
-
Dict[AssetKey, AutoMaterializePolicy],
|
|
714
|
-
Dict[str, AssetCheckSpec],
|
|
715
|
-
Dict[str, List[str]],
|
|
716
|
-
Dict[str, Dict[str, Any]],
|
|
717
|
-
]:
|
|
718
|
-
from .dagster_dbt_translator import DbtManifestWrapper, validate_translator
|
|
719
|
-
|
|
720
|
-
dagster_dbt_translator = validate_translator(dagster_dbt_translator)
|
|
721
|
-
|
|
722
|
-
asset_deps: Dict[AssetKey, Set[AssetKey]] = {}
|
|
723
|
-
asset_ins: Dict[AssetKey, Tuple[str, In]] = {}
|
|
724
|
-
asset_outs: Dict[AssetKey, Tuple[str, Out]] = {}
|
|
725
|
-
|
|
726
|
-
# These dicts could be refactored as a single dict, mapping from output name to arbitrary
|
|
727
|
-
# metadata that we need to store for reference.
|
|
728
|
-
group_names_by_key: Dict[AssetKey, str] = {}
|
|
729
|
-
freshness_policies_by_key: Dict[AssetKey, FreshnessPolicy] = {}
|
|
730
|
-
auto_materialize_policies_by_key: Dict[AssetKey, AutoMaterializePolicy] = {}
|
|
731
|
-
check_specs_by_key: Dict[AssetCheckKey, AssetCheckSpec] = {}
|
|
732
|
-
fqns_by_output_name: Dict[str, List[str]] = {}
|
|
733
|
-
metadata_by_output_name: Dict[str, Dict[str, Any]] = {}
|
|
734
|
-
|
|
735
|
-
for unique_id, parent_unique_ids in deps.items():
|
|
736
|
-
dbt_resource_props = dbt_nodes[unique_id]
|
|
737
|
-
|
|
738
|
-
output_name = dagster_name_fn(dbt_resource_props)
|
|
739
|
-
fqns_by_output_name[output_name] = dbt_resource_props["fqn"]
|
|
740
|
-
|
|
741
|
-
metadata_by_output_name[output_name] = {
|
|
742
|
-
key: dbt_resource_props[key] for key in ["unique_id", "resource_type"]
|
|
743
|
-
}
|
|
744
|
-
|
|
745
|
-
asset_key = dagster_dbt_translator.get_asset_key(dbt_resource_props)
|
|
746
|
-
|
|
747
|
-
asset_deps[asset_key] = set()
|
|
748
|
-
|
|
749
|
-
metadata = merge_dicts(
|
|
750
|
-
dagster_dbt_translator.get_metadata(dbt_resource_props),
|
|
751
|
-
{
|
|
752
|
-
DAGSTER_DBT_MANIFEST_METADATA_KEY: DbtManifestWrapper(manifest=manifest)
|
|
753
|
-
if manifest
|
|
754
|
-
else None,
|
|
755
|
-
DAGSTER_DBT_TRANSLATOR_METADATA_KEY: dagster_dbt_translator,
|
|
756
|
-
},
|
|
757
|
-
)
|
|
758
|
-
asset_outs[asset_key] = (
|
|
759
|
-
output_name,
|
|
760
|
-
Out(
|
|
761
|
-
io_manager_key=io_manager_key,
|
|
762
|
-
description=dagster_dbt_translator.get_description(dbt_resource_props),
|
|
763
|
-
metadata=metadata,
|
|
764
|
-
is_required=False,
|
|
765
|
-
dagster_type=Nothing,
|
|
766
|
-
code_version=default_code_version_fn(dbt_resource_props),
|
|
767
|
-
),
|
|
825
|
+
# get the spec for the given node
|
|
826
|
+
spec = translator.get_asset_spec(
|
|
827
|
+
manifest,
|
|
828
|
+
unique_id,
|
|
829
|
+
project,
|
|
768
830
|
)
|
|
831
|
+
key_by_unique_id[unique_id] = spec.key
|
|
769
832
|
|
|
770
|
-
|
|
771
|
-
if
|
|
772
|
-
|
|
833
|
+
# add the io manager key and set the dagster type to Nothing
|
|
834
|
+
if io_manager_key is not None:
|
|
835
|
+
spec = spec.with_io_manager_key(io_manager_key)
|
|
836
|
+
spec = spec.merge_attributes(metadata={SYSTEM_METADATA_KEY_DAGSTER_TYPE: Nothing})
|
|
773
837
|
|
|
774
|
-
|
|
775
|
-
if freshness_policy is not None:
|
|
776
|
-
freshness_policies_by_key[asset_key] = freshness_policy
|
|
838
|
+
specs.append(spec)
|
|
777
839
|
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
for child_unique_id in manifest["child_map"][unique_id]
|
|
789
|
-
if child_unique_id.startswith("test")
|
|
790
|
-
]
|
|
791
|
-
|
|
792
|
-
for test_unique_id in test_unique_ids:
|
|
793
|
-
check_spec = default_asset_check_fn(
|
|
794
|
-
manifest,
|
|
795
|
-
dbt_nodes,
|
|
796
|
-
dagster_dbt_translator,
|
|
797
|
-
asset_key,
|
|
798
|
-
test_unique_id,
|
|
799
|
-
)
|
|
800
|
-
if check_spec:
|
|
801
|
-
check_specs_by_key[check_spec.key] = check_spec
|
|
840
|
+
# add check specs associated with the asset
|
|
841
|
+
for child_unique_id in child_map.get(unique_id, []):
|
|
842
|
+
if child_unique_id not in selected_unique_ids or not child_unique_id.startswith("test"):
|
|
843
|
+
continue
|
|
844
|
+
check_spec = translator.get_asset_check_spec(
|
|
845
|
+
asset_spec=spec,
|
|
846
|
+
manifest=manifest,
|
|
847
|
+
unique_id=child_unique_id,
|
|
848
|
+
project=project,
|
|
849
|
+
)
|
|
802
850
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
851
|
+
if check_spec:
|
|
852
|
+
check_specs[check_spec.get_python_identifier()] = check_spec
|
|
853
|
+
|
|
854
|
+
# update the keys_by_unqiue_id dictionary to include keys created for upstream
|
|
855
|
+
# assets. note that this step may need to change once the translator is updated
|
|
856
|
+
# to no longer rely on `get_asset_key` as a standalone method
|
|
857
|
+
for upstream_id in get_upstream_unique_ids(manifest, resource_props):
|
|
858
|
+
spec = translator.get_asset_spec(manifest, upstream_id, project)
|
|
859
|
+
key_by_unique_id[upstream_id] = spec.key
|
|
860
|
+
if (
|
|
861
|
+
upstream_id.startswith("source")
|
|
862
|
+
and translator.settings.enable_source_tests_as_checks
|
|
863
|
+
):
|
|
864
|
+
for child_unique_id in child_map.get(upstream_id, []):
|
|
865
|
+
if not child_unique_id.startswith("test"):
|
|
866
|
+
continue
|
|
867
|
+
check_spec = translator.get_asset_check_spec(
|
|
868
|
+
asset_spec=spec,
|
|
869
|
+
manifest=manifest,
|
|
870
|
+
unique_id=child_unique_id,
|
|
871
|
+
project=project,
|
|
872
|
+
)
|
|
873
|
+
if check_spec:
|
|
874
|
+
check_specs[check_spec.get_python_identifier()] = check_spec
|
|
806
875
|
|
|
807
|
-
|
|
876
|
+
_validate_asset_keys(translator, manifest, key_by_unique_id)
|
|
877
|
+
return specs, list(check_specs.values())
|
|
808
878
|
|
|
809
|
-
# if this parent is not one of the selected nodes, it's an input
|
|
810
|
-
if parent_unique_id not in deps:
|
|
811
|
-
input_name = dagster_name_fn(parent_node_info)
|
|
812
|
-
asset_ins[parent_asset_key] = (input_name, In(Nothing))
|
|
813
879
|
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
)
|
|
880
|
+
def _validate_asset_keys(
|
|
881
|
+
translator: "DagsterDbtTranslator",
|
|
882
|
+
manifest: Mapping[str, Any],
|
|
883
|
+
key_by_unique_id: Mapping[str, AssetKey],
|
|
884
|
+
) -> None:
|
|
885
|
+
unique_ids_by_key = defaultdict(set)
|
|
886
|
+
for unique_id, key in key_by_unique_id.items():
|
|
887
|
+
unique_ids_by_key[key].add(unique_id)
|
|
888
|
+
|
|
889
|
+
error_messages = []
|
|
890
|
+
for key, unique_ids in unique_ids_by_key.items():
|
|
891
|
+
if len(unique_ids) == 1:
|
|
892
|
+
continue
|
|
893
|
+
if translator.settings.enable_duplicate_source_asset_keys:
|
|
894
|
+
resource_types = {
|
|
895
|
+
get_node(manifest, unique_id)["resource_type"] for unique_id in unique_ids
|
|
896
|
+
}
|
|
897
|
+
if resource_types == {"source"}:
|
|
898
|
+
continue
|
|
899
|
+
formatted_ids = [
|
|
900
|
+
f" - `{id}` ({get_node(manifest, id)['original_file_path']})"
|
|
901
|
+
for id in sorted(unique_ids)
|
|
902
|
+
]
|
|
903
|
+
error_messages.append(
|
|
904
|
+
"\n".join(
|
|
905
|
+
[
|
|
906
|
+
f"The following dbt resources have the asset key `{key.path}`:",
|
|
907
|
+
*formatted_ids,
|
|
908
|
+
]
|
|
909
|
+
)
|
|
910
|
+
)
|
|
820
911
|
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
group_names_by_key,
|
|
826
|
-
freshness_policies_by_key,
|
|
827
|
-
auto_materialize_policies_by_key,
|
|
828
|
-
check_specs_by_output_name,
|
|
829
|
-
fqns_by_output_name,
|
|
830
|
-
metadata_by_output_name,
|
|
831
|
-
)
|
|
912
|
+
if error_messages:
|
|
913
|
+
raise DagsterInvalidDefinitionError(
|
|
914
|
+
"\n\n".join([DUPLICATE_ASSET_KEY_ERROR_MESSAGE, *error_messages])
|
|
915
|
+
)
|
|
832
916
|
|
|
833
917
|
|
|
834
918
|
def has_self_dependency(dbt_resource_props: Mapping[str, Any]) -> bool:
|
|
@@ -842,11 +926,12 @@ def get_asset_check_key_for_test(
|
|
|
842
926
|
manifest: Mapping[str, Any],
|
|
843
927
|
dagster_dbt_translator: "DagsterDbtTranslator",
|
|
844
928
|
test_unique_id: str,
|
|
929
|
+
project: Optional[DbtProject],
|
|
845
930
|
) -> Optional[AssetCheckKey]:
|
|
846
931
|
if not test_unique_id.startswith("test"):
|
|
847
932
|
return None
|
|
848
933
|
|
|
849
|
-
test_resource_props = manifest
|
|
934
|
+
test_resource_props = get_node(manifest, test_unique_id)
|
|
850
935
|
upstream_unique_ids: AbstractSet[str] = set(test_resource_props["depends_on"]["nodes"])
|
|
851
936
|
|
|
852
937
|
# If the test is generic, it will have an attached node that we can use.
|
|
@@ -868,35 +953,267 @@ def get_asset_check_key_for_test(
|
|
|
868
953
|
)
|
|
869
954
|
|
|
870
955
|
# Attempt to find the attached node from the ref.
|
|
871
|
-
if attached_node_ref
|
|
956
|
+
if attached_node_ref:
|
|
872
957
|
ref_name, ref_package, ref_version = (
|
|
873
958
|
attached_node_ref["name"],
|
|
874
959
|
attached_node_ref.get("package"),
|
|
875
960
|
attached_node_ref.get("version"),
|
|
876
961
|
)
|
|
877
962
|
|
|
878
|
-
project_name = manifest
|
|
963
|
+
project_name = manifest.get("metadata", {})["project_name"]
|
|
879
964
|
if not ref_package:
|
|
880
965
|
ref_package = project_name
|
|
881
966
|
|
|
882
|
-
|
|
883
|
-
|
|
967
|
+
attached_node_unique_id = None
|
|
968
|
+
for unique_id, dbt_resource_props in manifest["nodes"].items():
|
|
969
|
+
if (ref_name, ref_package, ref_version) == (
|
|
884
970
|
dbt_resource_props["name"],
|
|
885
971
|
dbt_resource_props["package_name"],
|
|
886
972
|
dbt_resource_props.get("version"),
|
|
887
|
-
):
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
attached_node_unique_id = unique_id_by_ref.get((ref_name, ref_package, ref_version))
|
|
973
|
+
):
|
|
974
|
+
attached_node_unique_id = unique_id
|
|
975
|
+
break
|
|
892
976
|
|
|
893
977
|
if not attached_node_unique_id:
|
|
894
978
|
return None
|
|
895
979
|
|
|
896
980
|
return AssetCheckKey(
|
|
897
981
|
name=test_resource_props["name"],
|
|
898
|
-
asset_key=dagster_dbt_translator.
|
|
899
|
-
manifest
|
|
900
|
-
|
|
901
|
-
|
|
982
|
+
asset_key=dagster_dbt_translator.get_asset_spec(
|
|
983
|
+
manifest,
|
|
984
|
+
attached_node_unique_id,
|
|
985
|
+
project,
|
|
986
|
+
).key,
|
|
987
|
+
)
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
def get_checks_on_sources_upstream_of_selected_assets(
|
|
991
|
+
assets_def: AssetsDefinition, selected_asset_keys: AbstractSet[AssetKey]
|
|
992
|
+
) -> AbstractSet[AssetCheckKey]:
|
|
993
|
+
upstream_source_keys = assets_def.get_upstream_input_keys(frozenset(selected_asset_keys))
|
|
994
|
+
return assets_def.get_checks_targeting_keys(frozenset(upstream_source_keys))
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def get_subset_selection_for_context(
|
|
998
|
+
context: Union[OpExecutionContext, AssetExecutionContext],
|
|
999
|
+
manifest: Mapping[str, Any],
|
|
1000
|
+
select: Optional[str],
|
|
1001
|
+
exclude: Optional[str],
|
|
1002
|
+
selector: Optional[str],
|
|
1003
|
+
dagster_dbt_translator: "DagsterDbtTranslator",
|
|
1004
|
+
current_dbt_indirect_selection_env: Optional[str],
|
|
1005
|
+
) -> tuple[list[str], Optional[str]]:
|
|
1006
|
+
"""Generate a dbt selection string and DBT_INDIRECT_SELECTION setting to execute the selected
|
|
1007
|
+
resources in a subsetted execution context.
|
|
1008
|
+
|
|
1009
|
+
See https://docs.getdbt.com/reference/node-selection/syntax#how-does-selection-work.
|
|
1010
|
+
|
|
1011
|
+
Args:
|
|
1012
|
+
context (Union[OpExecutionContext, AssetExecutionContext]): The execution context for the current execution step.
|
|
1013
|
+
manifest (Mapping[str, Any]): The dbt manifest blob.
|
|
1014
|
+
select (Optional[str]): A dbt selection string to select resources to materialize.
|
|
1015
|
+
exclude (Optional[str]): A dbt selection string to exclude resources from materializing.
|
|
1016
|
+
selector (Optional[str]): A dbt selector to select resources to materialize.
|
|
1017
|
+
dagster_dbt_translator (DagsterDbtTranslator): The translator to link dbt nodes to Dagster
|
|
1018
|
+
assets.
|
|
1019
|
+
current_dbt_indirect_selection_env (Optional[str]): The user's value for the DBT_INDIRECT_SELECTION
|
|
1020
|
+
environment variable.
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
Returns:
|
|
1024
|
+
List[str]: dbt CLI arguments to materialize the selected resources in a
|
|
1025
|
+
subsetted execution context.
|
|
1026
|
+
|
|
1027
|
+
If the current execution context is not performing a subsetted execution,
|
|
1028
|
+
return CLI arguments composed of the inputed selection and exclusion arguments.
|
|
1029
|
+
Optional[str]: A value for the DBT_INDIRECT_SELECTION environment variable. If None, then
|
|
1030
|
+
the environment variable is not set and will either use dbt's default (eager) or the
|
|
1031
|
+
user's setting.
|
|
1032
|
+
"""
|
|
1033
|
+
default_dbt_selection = []
|
|
1034
|
+
if select:
|
|
1035
|
+
default_dbt_selection += ["--select", select]
|
|
1036
|
+
if exclude:
|
|
1037
|
+
default_dbt_selection += ["--exclude", exclude]
|
|
1038
|
+
if selector:
|
|
1039
|
+
default_dbt_selection += ["--selector", selector]
|
|
1040
|
+
|
|
1041
|
+
assets_def = context.assets_def
|
|
1042
|
+
is_asset_subset = assets_def.keys_by_output_name != assets_def.node_keys_by_output_name
|
|
1043
|
+
is_checks_subset = (
|
|
1044
|
+
assets_def.check_specs_by_output_name != assets_def.node_check_specs_by_output_name
|
|
902
1045
|
)
|
|
1046
|
+
|
|
1047
|
+
# It's nice to use the default dbt selection arguments when not subsetting for readability. We
|
|
1048
|
+
# also use dbt indirect selection to avoid hitting cli arg length limits.
|
|
1049
|
+
# https://github.com/dagster-io/dagster/issues/16997#issuecomment-1832443279
|
|
1050
|
+
# A biproduct is that we'll run singular dbt tests (not currently modeled as asset checks) in
|
|
1051
|
+
# cases when we can use indirection selection, an not when we need to turn it off.
|
|
1052
|
+
if not (is_asset_subset or is_checks_subset):
|
|
1053
|
+
logger.info(
|
|
1054
|
+
"A dbt subsetted execution is not being performed. Using the default dbt selection"
|
|
1055
|
+
f" arguments `{default_dbt_selection}`."
|
|
1056
|
+
)
|
|
1057
|
+
# default eager indirect selection. This means we'll also run any singular tests (which
|
|
1058
|
+
# aren't modeled as asset checks currently).
|
|
1059
|
+
return default_dbt_selection, None
|
|
1060
|
+
|
|
1061
|
+
# Explicitly select a dbt resource by its path. Selecting a resource by path is more terse
|
|
1062
|
+
# than selecting it by its fully qualified name.
|
|
1063
|
+
# https://docs.getdbt.com/reference/node-selection/methods#the-path-method
|
|
1064
|
+
selected_asset_resources = get_dbt_resource_names_for_asset_keys(
|
|
1065
|
+
dagster_dbt_translator, manifest, assets_def, context.selected_asset_keys
|
|
1066
|
+
)
|
|
1067
|
+
|
|
1068
|
+
# We explicitly use node_check_specs_by_output_name because it contains every single check spec, not just those selected in the currently
|
|
1069
|
+
# executing subset.
|
|
1070
|
+
checks_targeting_selected_sources = get_checks_on_sources_upstream_of_selected_assets(
|
|
1071
|
+
assets_def=assets_def, selected_asset_keys=context.selected_asset_keys
|
|
1072
|
+
)
|
|
1073
|
+
selected_check_keys = {*context.selected_asset_check_keys, *checks_targeting_selected_sources}
|
|
1074
|
+
|
|
1075
|
+
# if all asset checks for the subsetted assets are selected, then we can just select the
|
|
1076
|
+
# assets and use indirect selection for the tests. We verify that
|
|
1077
|
+
# 1. all the selected checks are for selected assets
|
|
1078
|
+
# 2. no checks for selected assets are excluded
|
|
1079
|
+
# This also means we'll run any singular tests.
|
|
1080
|
+
selected_checks_on_non_selected_assets = {
|
|
1081
|
+
check_key
|
|
1082
|
+
for check_key in selected_check_keys
|
|
1083
|
+
if check_key.asset_key not in context.selected_asset_keys
|
|
1084
|
+
}
|
|
1085
|
+
all_check_keys = {
|
|
1086
|
+
check_spec.key for check_spec in assets_def.node_check_specs_by_output_name.values()
|
|
1087
|
+
}
|
|
1088
|
+
excluded_checks = all_check_keys.difference(selected_check_keys)
|
|
1089
|
+
excluded_checks_on_selected_assets = [
|
|
1090
|
+
check_key
|
|
1091
|
+
for check_key in excluded_checks
|
|
1092
|
+
if check_key.asset_key in context.selected_asset_keys
|
|
1093
|
+
]
|
|
1094
|
+
|
|
1095
|
+
# note that this will always be false if checks are disabled (which means the assets_def has no
|
|
1096
|
+
# check specs)
|
|
1097
|
+
if excluded_checks_on_selected_assets:
|
|
1098
|
+
# select all assets and tests explicitly, and turn off indirect selection. This risks
|
|
1099
|
+
# hitting the CLI argument length limit, but in the common scenarios that can be launched from the UI
|
|
1100
|
+
# (all checks disabled, only one check and no assets) it's not a concern.
|
|
1101
|
+
# Since we're setting DBT_INDIRECT_SELECTION=empty, we won't run any singular tests.
|
|
1102
|
+
selected_dbt_resources = [
|
|
1103
|
+
*selected_asset_resources,
|
|
1104
|
+
*get_dbt_test_names_for_check_keys(
|
|
1105
|
+
dagster_dbt_translator, manifest, assets_def, context.selected_asset_check_keys
|
|
1106
|
+
),
|
|
1107
|
+
]
|
|
1108
|
+
indirect_selection_override = DBT_EMPTY_INDIRECT_SELECTION
|
|
1109
|
+
logger.info(
|
|
1110
|
+
"Overriding default `DBT_INDIRECT_SELECTION` "
|
|
1111
|
+
f"{current_dbt_indirect_selection_env or 'eager'} with "
|
|
1112
|
+
f"`{indirect_selection_override}` due to additional checks "
|
|
1113
|
+
f"{', '.join([c.to_user_string() for c in selected_checks_on_non_selected_assets])} "
|
|
1114
|
+
f"and excluded checks {', '.join([c.to_user_string() for c in excluded_checks_on_selected_assets])}."
|
|
1115
|
+
)
|
|
1116
|
+
elif selected_checks_on_non_selected_assets:
|
|
1117
|
+
# explicitly select the tests that won't be run via indirect selection
|
|
1118
|
+
selected_dbt_resources = [
|
|
1119
|
+
*selected_asset_resources,
|
|
1120
|
+
*get_dbt_test_names_for_check_keys(
|
|
1121
|
+
dagster_dbt_translator,
|
|
1122
|
+
manifest,
|
|
1123
|
+
assets_def,
|
|
1124
|
+
selected_checks_on_non_selected_assets,
|
|
1125
|
+
),
|
|
1126
|
+
]
|
|
1127
|
+
indirect_selection_override = None
|
|
1128
|
+
else:
|
|
1129
|
+
selected_dbt_resources = selected_asset_resources
|
|
1130
|
+
indirect_selection_override = None
|
|
1131
|
+
|
|
1132
|
+
logger.info(
|
|
1133
|
+
"A dbt subsetted execution is being performed. Overriding default dbt selection"
|
|
1134
|
+
f" arguments `{default_dbt_selection}` with arguments: `{selected_dbt_resources}`."
|
|
1135
|
+
)
|
|
1136
|
+
|
|
1137
|
+
# Take the union of all the selected resources.
|
|
1138
|
+
# https://docs.getdbt.com/reference/node-selection/set-operators#unions
|
|
1139
|
+
union_selected_dbt_resources = ["--select"] + [" ".join(selected_dbt_resources)]
|
|
1140
|
+
|
|
1141
|
+
return union_selected_dbt_resources, indirect_selection_override
|
|
1142
|
+
|
|
1143
|
+
|
|
1144
|
+
def get_dbt_resource_names_for_asset_keys(
|
|
1145
|
+
translator: "DagsterDbtTranslator",
|
|
1146
|
+
manifest: Mapping[str, Any],
|
|
1147
|
+
assets_def: AssetsDefinition,
|
|
1148
|
+
asset_keys: Iterable[AssetKey],
|
|
1149
|
+
) -> Sequence[str]:
|
|
1150
|
+
dbt_resource_props_gen = (
|
|
1151
|
+
get_node(
|
|
1152
|
+
manifest,
|
|
1153
|
+
assets_def.get_asset_spec(key).metadata[DAGSTER_DBT_UNIQUE_ID_METADATA_KEY],
|
|
1154
|
+
)
|
|
1155
|
+
for key in asset_keys
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
# Explicitly select a dbt resource by its file name.
|
|
1159
|
+
# https://docs.getdbt.com/reference/node-selection/methods#the-file-method
|
|
1160
|
+
if translator.settings.enable_dbt_selection_by_name:
|
|
1161
|
+
return [
|
|
1162
|
+
Path(dbt_resource_props["original_file_path"]).stem
|
|
1163
|
+
for dbt_resource_props in dbt_resource_props_gen
|
|
1164
|
+
]
|
|
1165
|
+
|
|
1166
|
+
# Explictly select a dbt resource by its fully qualified name (FQN).
|
|
1167
|
+
# https://docs.getdbt.com/reference/node-selection/methods#the-file-or-fqn-method
|
|
1168
|
+
return [".".join(dbt_resource_props["fqn"]) for dbt_resource_props in dbt_resource_props_gen]
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
def get_dbt_test_names_for_check_keys(
|
|
1172
|
+
translator: "DagsterDbtTranslator",
|
|
1173
|
+
manifest: Mapping[str, Any],
|
|
1174
|
+
assets_def: AssetsDefinition,
|
|
1175
|
+
check_keys: Iterable[AssetCheckKey],
|
|
1176
|
+
) -> Sequence[str]:
|
|
1177
|
+
dbt_resource_props_gen = (
|
|
1178
|
+
get_node(
|
|
1179
|
+
manifest,
|
|
1180
|
+
(assets_def.get_spec_for_check_key(key).metadata or {})[
|
|
1181
|
+
DAGSTER_DBT_UNIQUE_ID_METADATA_KEY
|
|
1182
|
+
],
|
|
1183
|
+
)
|
|
1184
|
+
for key in check_keys
|
|
1185
|
+
)
|
|
1186
|
+
# Explicitly select a dbt test by its test name.
|
|
1187
|
+
# https://docs.getdbt.com/reference/node-selection/test-selection-examples#more-complex-selection.
|
|
1188
|
+
if translator.settings.enable_dbt_selection_by_name:
|
|
1189
|
+
return [asset_check_key.name for asset_check_key in check_keys]
|
|
1190
|
+
|
|
1191
|
+
# Explictly select a dbt test by its fully qualified name (FQN).
|
|
1192
|
+
# https://docs.getdbt.com/reference/node-selection/methods#the-file-or-fqn-method
|
|
1193
|
+
return [".".join(dbt_resource_props["fqn"]) for dbt_resource_props in dbt_resource_props_gen]
|
|
1194
|
+
|
|
1195
|
+
|
|
1196
|
+
def get_node(manifest: Mapping[str, Any], unique_id: str) -> Mapping[str, Any]:
|
|
1197
|
+
"""Find a node by unique_id in manifest_json."""
|
|
1198
|
+
if unique_id in manifest["nodes"]:
|
|
1199
|
+
return manifest["nodes"][unique_id]
|
|
1200
|
+
|
|
1201
|
+
if unique_id in manifest["sources"]:
|
|
1202
|
+
return manifest["sources"][unique_id]
|
|
1203
|
+
|
|
1204
|
+
if unique_id in manifest["exposures"]:
|
|
1205
|
+
return manifest["exposures"][unique_id]
|
|
1206
|
+
|
|
1207
|
+
if unique_id in manifest["metrics"]:
|
|
1208
|
+
return manifest["metrics"][unique_id]
|
|
1209
|
+
|
|
1210
|
+
if unique_id in manifest.get("semantic_models", {}):
|
|
1211
|
+
return manifest["semantic_models"][unique_id]
|
|
1212
|
+
|
|
1213
|
+
if unique_id in manifest.get("saved_queries", {}):
|
|
1214
|
+
return manifest["saved_queries"][unique_id]
|
|
1215
|
+
|
|
1216
|
+
if unique_id in manifest.get("unit_tests", {}):
|
|
1217
|
+
return manifest["unit_tests"][unique_id]
|
|
1218
|
+
|
|
1219
|
+
check.failed(f"Could not find {unique_id} in dbt manifest")
|