dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. dagster_dbt/__init__.py +41 -140
  2. dagster_dbt/asset_decorator.py +49 -230
  3. dagster_dbt/asset_specs.py +65 -0
  4. dagster_dbt/asset_utils.py +655 -338
  5. dagster_dbt/cli/app.py +44 -43
  6. dagster_dbt/cloud/__init__.py +6 -4
  7. dagster_dbt/cloud/asset_defs.py +119 -177
  8. dagster_dbt/cloud/cli.py +3 -4
  9. dagster_dbt/cloud/ops.py +9 -6
  10. dagster_dbt/cloud/resources.py +9 -4
  11. dagster_dbt/cloud/types.py +12 -7
  12. dagster_dbt/cloud/utils.py +186 -0
  13. dagster_dbt/cloud_v2/__init__.py +10 -0
  14. dagster_dbt/cloud_v2/asset_decorator.py +81 -0
  15. dagster_dbt/cloud_v2/cli_invocation.py +67 -0
  16. dagster_dbt/cloud_v2/client.py +438 -0
  17. dagster_dbt/cloud_v2/resources.py +462 -0
  18. dagster_dbt/cloud_v2/run_handler.py +229 -0
  19. dagster_dbt/cloud_v2/sensor_builder.py +254 -0
  20. dagster_dbt/cloud_v2/types.py +143 -0
  21. dagster_dbt/compat.py +107 -0
  22. dagster_dbt/components/__init__.py +0 -0
  23. dagster_dbt/components/dbt_project/__init__.py +0 -0
  24. dagster_dbt/components/dbt_project/component.py +545 -0
  25. dagster_dbt/components/dbt_project/scaffolder.py +65 -0
  26. dagster_dbt/core/__init__.py +0 -10
  27. dagster_dbt/core/dbt_cli_event.py +612 -0
  28. dagster_dbt/core/dbt_cli_invocation.py +474 -0
  29. dagster_dbt/core/dbt_event_iterator.py +399 -0
  30. dagster_dbt/core/resource.py +733 -0
  31. dagster_dbt/core/utils.py +14 -279
  32. dagster_dbt/dagster_dbt_translator.py +317 -74
  33. dagster_dbt/dbt_core_version.py +1 -0
  34. dagster_dbt/dbt_manifest.py +6 -5
  35. dagster_dbt/dbt_manifest_asset_selection.py +62 -22
  36. dagster_dbt/dbt_project.py +179 -40
  37. dagster_dbt/dbt_project_manager.py +173 -0
  38. dagster_dbt/dbt_version.py +0 -0
  39. dagster_dbt/errors.py +9 -84
  40. dagster_dbt/freshness_builder.py +147 -0
  41. dagster_dbt/include/pyproject.toml.jinja +21 -0
  42. dagster_dbt/include/scaffold/assets.py.jinja +1 -8
  43. dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
  44. dagster_dbt/include/scaffold/project.py.jinja +1 -0
  45. dagster_dbt/include/setup.py.jinja +2 -3
  46. dagster_dbt/metadata_set.py +18 -0
  47. dagster_dbt/utils.py +136 -234
  48. dagster_dbt/version.py +1 -1
  49. dagster_dbt-0.28.4.dist-info/METADATA +47 -0
  50. dagster_dbt-0.28.4.dist-info/RECORD +59 -0
  51. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
  52. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
  53. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
  54. dagster_dbt/asset_defs.py +0 -1049
  55. dagster_dbt/core/resources.py +0 -527
  56. dagster_dbt/core/resources_v2.py +0 -1542
  57. dagster_dbt/core/types.py +0 -63
  58. dagster_dbt/dbt_resource.py +0 -220
  59. dagster_dbt/include/scaffold/constants.py.jinja +0 -21
  60. dagster_dbt/ops.py +0 -134
  61. dagster_dbt/types.py +0 -22
  62. dagster_dbt-0.23.3.dist-info/METADATA +0 -31
  63. dagster_dbt-0.23.3.dist-info/RECORD +0 -43
  64. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
@@ -1,66 +1,83 @@
1
1
  import hashlib
2
+ import os
3
+ import shutil
4
+ import tempfile
2
5
  import textwrap
3
- from typing import (
4
- TYPE_CHECKING,
5
- AbstractSet,
6
- Any,
7
- Dict,
8
- FrozenSet,
9
- List,
10
- Mapping,
11
- Optional,
12
- Sequence,
13
- Set,
14
- Tuple,
15
- cast,
16
- )
6
+ from collections import defaultdict
7
+ from collections.abc import Iterable, Mapping, Sequence
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING, AbstractSet, Annotated, Any, Final, Optional, Union # noqa: UP035
17
10
 
11
+ import yaml
18
12
  from dagster import (
19
13
  AssetCheckKey,
20
14
  AssetCheckSpec,
15
+ AssetExecutionContext,
21
16
  AssetKey,
22
17
  AssetsDefinition,
23
18
  AssetSelection,
19
+ AssetSpec,
24
20
  AutoMaterializePolicy,
21
+ DagsterInvalidDefinitionError,
25
22
  DagsterInvariantViolationError,
26
23
  DefaultScheduleStatus,
27
- FreshnessPolicy,
28
- In,
29
- Nothing,
30
- Out,
24
+ OpExecutionContext,
31
25
  RunConfig,
32
26
  ScheduleDefinition,
33
27
  TableColumn,
34
28
  TableSchema,
35
29
  _check as check,
36
30
  define_asset_job,
31
+ get_dagster_logger,
37
32
  )
38
- from dagster._core.definitions.decorators.asset_decorator import (
39
- _validate_and_assign_output_names_to_check_specs,
40
- )
33
+ from dagster._core.definitions.assets.definition.asset_spec import SYSTEM_METADATA_KEY_DAGSTER_TYPE
41
34
  from dagster._core.definitions.metadata import TableMetadataSet
42
- from dagster._utils.merger import merge_dicts
43
- from dagster._utils.warnings import deprecation_warning
44
- from dbt.version import __version__ as dbt_version
45
- from packaging import version
35
+ from dagster._core.errors import DagsterInvalidPropertyError
36
+ from dagster._core.types.dagster_type import Nothing
37
+ from dagster._record import ImportFrom, record
38
+ from dagster_shared.record import replace
46
39
 
47
- from .utils import ASSET_RESOURCE_TYPES, dagster_name_fn
40
+ from dagster_dbt.dbt_project import DbtProject
41
+ from dagster_dbt.metadata_set import DbtMetadataSet
42
+ from dagster_dbt.utils import ASSET_RESOURCE_TYPES, dagster_name_fn, select_unique_ids
48
43
 
49
44
  if TYPE_CHECKING:
50
- from .dagster_dbt_translator import DagsterDbtTranslator, DbtManifestWrapper
45
+ from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator, DbtManifestWrapper
51
46
 
52
47
  DAGSTER_DBT_MANIFEST_METADATA_KEY = "dagster_dbt/manifest"
53
48
  DAGSTER_DBT_TRANSLATOR_METADATA_KEY = "dagster_dbt/dagster_dbt_translator"
49
+ DAGSTER_DBT_PROJECT_METADATA_KEY = "dagster_dbt/project"
54
50
  DAGSTER_DBT_SELECT_METADATA_KEY = "dagster_dbt/select"
55
51
  DAGSTER_DBT_EXCLUDE_METADATA_KEY = "dagster_dbt/exclude"
52
+ DAGSTER_DBT_SELECTOR_METADATA_KEY = "dagster_dbt/selector"
53
+ DAGSTER_DBT_UNIQUE_ID_METADATA_KEY = "dagster_dbt/unique_id"
54
+
55
+ DBT_DEFAULT_SELECT = "fqn:*"
56
+ DBT_DEFAULT_EXCLUDE = ""
57
+ DBT_DEFAULT_SELECTOR = ""
58
+
59
+ DBT_INDIRECT_SELECTION_ENV: Final[str] = "DBT_INDIRECT_SELECTION"
60
+ DBT_EMPTY_INDIRECT_SELECTION: Final[str] = "empty"
61
+
62
+ # Threshold for switching to selector file to avoid CLI argument length limits
63
+ # https://github.com/dagster-io/dagster/issues/16997
64
+ _SELECTION_ARGS_THRESHOLD: Final[int] = 200
65
+
66
+ DUPLICATE_ASSET_KEY_ERROR_MESSAGE = (
67
+ "The following dbt resources are configured with identical Dagster asset keys."
68
+ " Please ensure that each dbt resource generates a unique Dagster asset key."
69
+ " See the reference for configuring Dagster asset keys for your dbt project:"
70
+ " https://docs.dagster.io/integrations/libraries/dbt/reference#customizing-asset-keys."
71
+ )
72
+
73
+ logger = get_dagster_logger()
56
74
 
57
75
 
58
76
  def get_asset_key_for_model(dbt_assets: Sequence[AssetsDefinition], model_name: str) -> AssetKey:
59
77
  """Return the corresponding Dagster asset key for a dbt model, seed, or snapshot.
60
78
 
61
79
  Args:
62
- dbt_assets (AssetsDefinition): An AssetsDefinition object produced by
63
- load_assets_from_dbt_project, load_assets_from_dbt_manifest, or @dbt_assets.
80
+ dbt_assets (AssetsDefinition): An AssetsDefinition object produced by @dbt_assets.
64
81
  model_name (str): The name of the dbt model, seed, or snapshot.
65
82
 
66
83
  Returns:
@@ -84,18 +101,24 @@ def get_asset_key_for_model(dbt_assets: Sequence[AssetsDefinition], model_name:
84
101
  check.sequence_param(dbt_assets, "dbt_assets", of_type=AssetsDefinition)
85
102
  check.str_param(model_name, "model_name")
86
103
 
87
- manifest, dagster_dbt_translator = get_manifest_and_translator_from_dbt_assets(dbt_assets)
104
+ manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
105
+ dbt_assets
106
+ )
88
107
 
89
- matching_models = [
90
- value
91
- for value in manifest["nodes"].values()
108
+ matching_model_ids = [
109
+ unique_id
110
+ for unique_id, value in manifest["nodes"].items()
92
111
  if value["name"] == model_name and value["resource_type"] in ASSET_RESOURCE_TYPES
93
112
  ]
94
113
 
95
- if len(matching_models) == 0:
114
+ if len(matching_model_ids) == 0:
96
115
  raise KeyError(f"Could not find a dbt model, seed, or snapshot with name: {model_name}")
97
116
 
98
- return dagster_dbt_translator.get_asset_key(next(iter(matching_models)))
117
+ return dagster_dbt_translator.get_asset_spec(
118
+ manifest,
119
+ next(iter(matching_model_ids)),
120
+ dbt_project,
121
+ ).key
99
122
 
100
123
 
101
124
  def get_asset_keys_by_output_name_for_source(
@@ -138,18 +161,24 @@ def get_asset_keys_by_output_name_for_source(
138
161
  check.sequence_param(dbt_assets, "dbt_assets", of_type=AssetsDefinition)
139
162
  check.str_param(source_name, "source_name")
140
163
 
141
- manifest, dagster_dbt_translator = get_manifest_and_translator_from_dbt_assets(dbt_assets)
164
+ manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
165
+ dbt_assets
166
+ )
142
167
 
143
- matching_nodes = [
144
- value for value in manifest["sources"].values() if value["source_name"] == source_name
145
- ]
168
+ matching = {
169
+ unique_id: value
170
+ for unique_id, value in manifest["sources"].items()
171
+ if value["source_name"] == source_name
172
+ }
146
173
 
147
- if len(matching_nodes) == 0:
174
+ if len(matching) == 0:
148
175
  raise KeyError(f"Could not find a dbt source with name: {source_name}")
149
176
 
150
177
  return {
151
- dagster_name_fn(value): dagster_dbt_translator.get_asset_key(value)
152
- for value in matching_nodes
178
+ dagster_name_fn(value): dagster_dbt_translator.get_asset_spec(
179
+ manifest, unique_id, dbt_project
180
+ ).key
181
+ for unique_id, value in matching.items()
153
182
  }
154
183
 
155
184
 
@@ -194,8 +223,9 @@ def get_asset_key_for_source(dbt_assets: Sequence[AssetsDefinition], source_name
194
223
 
195
224
  def build_dbt_asset_selection(
196
225
  dbt_assets: Sequence[AssetsDefinition],
197
- dbt_select: str = "fqn:*",
198
- dbt_exclude: Optional[str] = None,
226
+ dbt_select: str = DBT_DEFAULT_SELECT,
227
+ dbt_exclude: Optional[str] = DBT_DEFAULT_EXCLUDE,
228
+ dbt_selector: Optional[str] = DBT_DEFAULT_SELECTOR,
199
229
  ) -> AssetSelection:
200
230
  """Build an asset selection for a dbt selection string.
201
231
 
@@ -249,24 +279,35 @@ def build_dbt_asset_selection(
249
279
  bar_plus_and_foo_and_downstream_selection = bar_plus_and_foo_selection.downstream()
250
280
 
251
281
  """
252
- manifest, dagster_dbt_translator = get_manifest_and_translator_from_dbt_assets(dbt_assets)
282
+ manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
283
+ dbt_assets
284
+ )
253
285
  [dbt_assets_definition] = dbt_assets
254
286
 
255
287
  dbt_assets_select = dbt_assets_definition.op.tags[DAGSTER_DBT_SELECT_METADATA_KEY]
256
- dbt_assets_exclude = dbt_assets_definition.op.tags.get(DAGSTER_DBT_EXCLUDE_METADATA_KEY)
288
+ dbt_assets_exclude = dbt_assets_definition.op.tags.get(
289
+ DAGSTER_DBT_EXCLUDE_METADATA_KEY, DBT_DEFAULT_EXCLUDE
290
+ )
291
+ dbt_assets_selector = dbt_assets_definition.op.tags.get(
292
+ DAGSTER_DBT_SELECTOR_METADATA_KEY, DBT_DEFAULT_SELECTOR
293
+ )
257
294
 
258
- from .dbt_manifest_asset_selection import DbtManifestAssetSelection
295
+ from dagster_dbt.dbt_manifest_asset_selection import DbtManifestAssetSelection
259
296
 
260
297
  return DbtManifestAssetSelection.build(
261
298
  manifest=manifest,
262
299
  dagster_dbt_translator=dagster_dbt_translator,
263
300
  select=dbt_assets_select,
264
301
  exclude=dbt_assets_exclude,
302
+ selector=dbt_assets_selector,
303
+ project=dbt_project,
265
304
  ) & DbtManifestAssetSelection.build(
266
305
  manifest=manifest,
267
306
  dagster_dbt_translator=dagster_dbt_translator,
268
307
  select=dbt_select,
269
- exclude=dbt_exclude,
308
+ exclude=dbt_exclude or DBT_DEFAULT_EXCLUDE,
309
+ selector=dbt_selector or DBT_DEFAULT_SELECTOR,
310
+ project=dbt_project,
270
311
  )
271
312
 
272
313
 
@@ -274,8 +315,9 @@ def build_schedule_from_dbt_selection(
274
315
  dbt_assets: Sequence[AssetsDefinition],
275
316
  job_name: str,
276
317
  cron_schedule: str,
277
- dbt_select: str = "fqn:*",
278
- dbt_exclude: Optional[str] = None,
318
+ dbt_select: str = DBT_DEFAULT_SELECT,
319
+ dbt_exclude: Optional[str] = DBT_DEFAULT_EXCLUDE,
320
+ dbt_selector: str = DBT_DEFAULT_SELECTOR,
279
321
  schedule_name: Optional[str] = None,
280
322
  tags: Optional[Mapping[str, str]] = None,
281
323
  config: Optional[RunConfig] = None,
@@ -292,6 +334,7 @@ def build_schedule_from_dbt_selection(
292
334
  cron_schedule (str): The cron schedule to define the schedule.
293
335
  dbt_select (str): A dbt selection string to specify a set of dbt resources.
294
336
  dbt_exclude (Optional[str]): A dbt selection string to exclude a set of dbt resources.
337
+ dbt_selector (str): A dbt selector to select resources to materialize.
295
338
  schedule_name (Optional[str]): The name of the dbt schedule to create.
296
339
  tags (Optional[Mapping[str, str]]): A dictionary of tags (string key-value pairs) to attach
297
340
  to the scheduled runs.
@@ -327,7 +370,8 @@ def build_schedule_from_dbt_selection(
327
370
  selection=build_dbt_asset_selection(
328
371
  dbt_assets,
329
372
  dbt_select=dbt_select,
330
- dbt_exclude=dbt_exclude,
373
+ dbt_exclude=dbt_exclude or DBT_DEFAULT_EXCLUDE,
374
+ dbt_selector=dbt_selector,
331
375
  ),
332
376
  config=config,
333
377
  tags=tags,
@@ -339,31 +383,131 @@ def build_schedule_from_dbt_selection(
339
383
 
340
384
  def get_manifest_and_translator_from_dbt_assets(
341
385
  dbt_assets: Sequence[AssetsDefinition],
342
- ) -> Tuple[Mapping[str, Any], "DagsterDbtTranslator"]:
386
+ ) -> tuple[Mapping[str, Any], "DagsterDbtTranslator", Optional[DbtProject]]:
343
387
  check.invariant(len(dbt_assets) == 1, "Exactly one dbt AssetsDefinition is required")
344
388
  dbt_assets_def = dbt_assets[0]
345
389
  metadata_by_key = dbt_assets_def.metadata_by_key or {}
346
390
  first_asset_key = next(iter(dbt_assets_def.metadata_by_key.keys()))
347
391
  first_metadata = metadata_by_key.get(first_asset_key, {})
348
- manifest_wrapper: Optional["DbtManifestWrapper"] = first_metadata.get(
392
+ manifest_wrapper: Optional[DbtManifestWrapper] = first_metadata.get(
349
393
  DAGSTER_DBT_MANIFEST_METADATA_KEY
350
394
  )
395
+ project = first_metadata.get(DAGSTER_DBT_PROJECT_METADATA_KEY)
351
396
  if manifest_wrapper is None:
352
397
  raise DagsterInvariantViolationError(
353
398
  f"Expected to find dbt manifest metadata on asset {first_asset_key.to_user_string()},"
354
- " but did not. Did you pass in assets that weren't generated by"
355
- " load_assets_from_dbt_project, load_assets_from_dbt_manifest, or @dbt_assets?"
399
+ " but did not. Did you pass in assets that weren't generated by @dbt_assets?"
356
400
  )
357
401
 
358
402
  dagster_dbt_translator = first_metadata.get(DAGSTER_DBT_TRANSLATOR_METADATA_KEY)
359
403
  if dagster_dbt_translator is None:
360
404
  raise DagsterInvariantViolationError(
361
405
  f"Expected to find dbt translator metadata on asset {first_asset_key.to_user_string()},"
362
- " but did not. Did you pass in assets that weren't generated by"
363
- " load_assets_from_dbt_project, load_assets_from_dbt_manifest, or @dbt_assets?"
406
+ " but did not. Did you pass in assets that weren't generated by @dbt_assets?"
364
407
  )
365
408
 
366
- return manifest_wrapper.manifest, dagster_dbt_translator
409
+ return manifest_wrapper.manifest, dagster_dbt_translator, project
410
+
411
+
412
+ def get_asset_keys_to_resource_props(
413
+ manifest: Mapping[str, Any],
414
+ translator: "DagsterDbtTranslator",
415
+ ) -> Mapping[AssetKey, Mapping[str, Any]]:
416
+ return {
417
+ translator.get_asset_key(node): node
418
+ for node in manifest["nodes"].values()
419
+ if node["resource_type"] in ASSET_RESOURCE_TYPES
420
+ }
421
+
422
+
423
+ @record
424
+ class DbtCliInvocationPartialParams:
425
+ manifest: Mapping[str, Any]
426
+ dagster_dbt_translator: Annotated[
427
+ "DagsterDbtTranslator", ImportFrom("dagster_dbt.dagster_dbt_translator")
428
+ ]
429
+ selection_args: Sequence[str]
430
+ indirect_selection: Optional[str]
431
+ dbt_project: Optional[DbtProject]
432
+
433
+
434
+ def get_updated_cli_invocation_params_for_context(
435
+ context: Optional[Union[OpExecutionContext, AssetExecutionContext]],
436
+ manifest: Mapping[str, Any],
437
+ dagster_dbt_translator: "DagsterDbtTranslator",
438
+ ) -> DbtCliInvocationPartialParams:
439
+ try:
440
+ assets_def = context.assets_def if context else None
441
+ except DagsterInvalidPropertyError:
442
+ # If assets_def is None in an OpExecutionContext, we raise a DagsterInvalidPropertyError,
443
+ # but we don't want to raise the error here.
444
+ assets_def = None
445
+
446
+ selection_args: list[str] = []
447
+ indirect_selection = os.getenv(DBT_INDIRECT_SELECTION_ENV, None)
448
+ dbt_project = None
449
+ if context and assets_def is not None:
450
+ manifest, dagster_dbt_translator, dbt_project = get_manifest_and_translator_from_dbt_assets(
451
+ [assets_def]
452
+ )
453
+
454
+ # Get project_dir from dbt_project if available
455
+ project_dir = Path(dbt_project.project_dir) if dbt_project else None
456
+ target_project = dbt_project
457
+
458
+ selection_args, indirect_selection_override = get_subset_selection_for_context(
459
+ context=context,
460
+ manifest=manifest,
461
+ select=context.op.tags.get(DAGSTER_DBT_SELECT_METADATA_KEY),
462
+ exclude=context.op.tags.get(DAGSTER_DBT_EXCLUDE_METADATA_KEY),
463
+ selector=context.op.tags.get(DAGSTER_DBT_SELECTOR_METADATA_KEY),
464
+ dagster_dbt_translator=dagster_dbt_translator,
465
+ current_dbt_indirect_selection_env=indirect_selection,
466
+ )
467
+ if (
468
+ selection_args[0] == "--select"
469
+ and project_dir
470
+ and len(resources := selection_args[1].split(" ")) > _SELECTION_ARGS_THRESHOLD
471
+ ):
472
+ temp_project_dir = tempfile.mkdtemp()
473
+ shutil.copytree(project_dir, temp_project_dir, dirs_exist_ok=True)
474
+ selectors_path = Path(temp_project_dir) / "selectors.yml"
475
+
476
+ # Delete any existing selectors, we need to create our own
477
+ if selectors_path.exists():
478
+ selectors_path.unlink()
479
+
480
+ selector_name = f"dagster_run_{context.run_id}"
481
+ temp_selectors = {
482
+ "selectors": [
483
+ {
484
+ "name": selector_name,
485
+ "definition": {"union": list(resources)},
486
+ }
487
+ ]
488
+ }
489
+ selectors_path.write_text(yaml.safe_dump(temp_selectors))
490
+ logger.info(
491
+ f"DBT selection of {len(resources)} resources exceeds threshold of {_SELECTION_ARGS_THRESHOLD}. "
492
+ "This may exceed system argument length limits. "
493
+ f"Executing materialization against temporary copy of DBT project at {temp_project_dir} with ephemeral selector."
494
+ )
495
+ selection_args = ["--selector", selector_name]
496
+ target_project = replace(dbt_project, project_dir=Path(temp_project_dir))
497
+
498
+ indirect_selection = (
499
+ indirect_selection_override if indirect_selection_override else indirect_selection
500
+ )
501
+ else:
502
+ target_project = dbt_project
503
+
504
+ return DbtCliInvocationPartialParams(
505
+ manifest=manifest,
506
+ dagster_dbt_translator=dagster_dbt_translator,
507
+ selection_args=selection_args,
508
+ indirect_selection=indirect_selection,
509
+ dbt_project=target_project,
510
+ )
367
511
 
368
512
 
369
513
  ###################
@@ -407,24 +551,40 @@ def default_asset_key_fn(dbt_resource_props: Mapping[str, Any]) -> AssetKey:
407
551
  def default_metadata_from_dbt_resource_props(
408
552
  dbt_resource_props: Mapping[str, Any],
409
553
  ) -> Mapping[str, Any]:
410
- metadata: Dict[str, Any] = {}
554
+ column_schema = None
411
555
  columns = dbt_resource_props.get("columns", {})
412
556
  if len(columns) > 0:
413
- return dict(
414
- TableMetadataSet(
415
- column_schema=TableSchema(
416
- columns=[
417
- TableColumn(
418
- name=column_name,
419
- type=column_info.get("data_type") or "?",
420
- description=column_info.get("description"),
421
- )
422
- for column_name, column_info in columns.items()
423
- ]
557
+ column_schema = TableSchema(
558
+ columns=[
559
+ TableColumn(
560
+ name=column_name,
561
+ type=column_info.get("data_type") or "?",
562
+ description=column_info.get("description"),
563
+ tags={tag_name: "" for tag_name in column_info.get("tags", [])},
424
564
  )
425
- )
565
+ for column_name, column_info in columns.items()
566
+ ]
426
567
  )
427
- return metadata
568
+
569
+ relation_parts = [
570
+ relation_part
571
+ for relation_part in [
572
+ dbt_resource_props.get("database"),
573
+ dbt_resource_props.get("schema"),
574
+ dbt_resource_props.get("alias"),
575
+ ]
576
+ if relation_part
577
+ ]
578
+ relation_name = ".".join(relation_parts) if relation_parts else None
579
+
580
+ materialization_type = dbt_resource_props.get("config", {}).get("materialized")
581
+ return {
582
+ **DbtMetadataSet(materialization_type=materialization_type),
583
+ **TableMetadataSet(
584
+ column_schema=column_schema,
585
+ table_name=relation_name,
586
+ ),
587
+ }
428
588
 
429
589
 
430
590
  def default_group_from_dbt_resource_props(dbt_resource_props: Mapping[str, Any]) -> Optional[str]:
@@ -458,16 +618,6 @@ def group_from_dbt_resource_props_fallback_to_directory(
458
618
 
459
619
  Args:
460
620
  dbt_resource_props (Mapping[str, Any]): A dictionary representing the dbt resource.
461
-
462
- Examples:
463
- .. code-block:: python
464
-
465
- from dagster_dbt import group_from_dbt_resource_props_fallback_to_directory
466
-
467
- dbt_assets = load_assets_from_dbt_manifest(
468
- manifest=manifest,
469
- node_info_to_group_fn=group_from_dbt_resource_props_fallback_to_directory,
470
- )
471
621
  """
472
622
  group_name = default_group_from_dbt_resource_props(dbt_resource_props)
473
623
  if group_name is not None:
@@ -489,48 +639,14 @@ def default_owners_from_dbt_resource_props(
489
639
  if owners_config:
490
640
  return owners_config
491
641
 
492
- owner: Optional[str] = (dbt_resource_props.get("group") or {}).get("owner", {}).get("email")
642
+ owner: Optional[Union[str, Sequence[str]]] = (
643
+ (dbt_resource_props.get("group") or {}).get("owner", {}).get("email")
644
+ )
493
645
 
494
646
  if not owner:
495
647
  return None
496
648
 
497
- return [owner]
498
-
499
-
500
- def default_freshness_policy_fn(dbt_resource_props: Mapping[str, Any]) -> Optional[FreshnessPolicy]:
501
- dagster_metadata = dbt_resource_props.get("meta", {}).get("dagster", {})
502
- freshness_policy_config = dagster_metadata.get("freshness_policy", {})
503
-
504
- freshness_policy = _legacy_freshness_policy_fn(freshness_policy_config)
505
- if freshness_policy:
506
- return freshness_policy
507
-
508
- legacy_freshness_policy_config = dbt_resource_props["config"].get(
509
- "dagster_freshness_policy", {}
510
- )
511
- legacy_freshness_policy = _legacy_freshness_policy_fn(legacy_freshness_policy_config)
512
-
513
- if legacy_freshness_policy:
514
- deprecation_warning(
515
- "dagster_freshness_policy",
516
- "0.21.0",
517
- "Instead, configure a Dagster freshness policy on a dbt model using"
518
- " +meta.dagster.freshness_policy.",
519
- )
520
-
521
- return legacy_freshness_policy
522
-
523
-
524
- def _legacy_freshness_policy_fn(
525
- freshness_policy_config: Mapping[str, Any],
526
- ) -> Optional[FreshnessPolicy]:
527
- if freshness_policy_config:
528
- return FreshnessPolicy(
529
- maximum_lag_minutes=float(freshness_policy_config["maximum_lag_minutes"]),
530
- cron_schedule=freshness_policy_config.get("cron_schedule"),
531
- cron_schedule_timezone=freshness_policy_config.get("cron_schedule_timezone"),
532
- )
533
- return None
649
+ return [owner] if isinstance(owner, str) else owner
534
650
 
535
651
 
536
652
  def default_auto_materialize_policy_fn(
@@ -539,31 +655,6 @@ def default_auto_materialize_policy_fn(
539
655
  dagster_metadata = dbt_resource_props.get("meta", {}).get("dagster", {})
540
656
  auto_materialize_policy_config = dagster_metadata.get("auto_materialize_policy", {})
541
657
 
542
- auto_materialize_policy = _auto_materialize_policy_fn(auto_materialize_policy_config)
543
- if auto_materialize_policy:
544
- return auto_materialize_policy
545
-
546
- legacy_auto_materialize_policy_config = dbt_resource_props["config"].get(
547
- "dagster_auto_materialize_policy", {}
548
- )
549
- legacy_auto_materialize_policy = _auto_materialize_policy_fn(
550
- legacy_auto_materialize_policy_config
551
- )
552
-
553
- if legacy_auto_materialize_policy:
554
- deprecation_warning(
555
- "dagster_auto_materialize_policy",
556
- "0.21.0",
557
- "Instead, configure a Dagster auto-materialize policy on a dbt model using"
558
- " +meta.dagster.auto_materialize_policy.",
559
- )
560
-
561
- return legacy_auto_materialize_policy
562
-
563
-
564
- def _auto_materialize_policy_fn(
565
- auto_materialize_policy_config: Mapping[str, Any],
566
- ) -> Optional[AutoMaterializePolicy]:
567
658
  if auto_materialize_policy_config.get("type") == "eager":
568
659
  return AutoMaterializePolicy.eager()
569
660
  elif auto_materialize_policy_config.get("type") == "lazy":
@@ -576,56 +667,62 @@ def default_description_fn(dbt_resource_props: Mapping[str, Any], display_raw_sq
576
667
  dbt_resource_props.get("raw_sql") or dbt_resource_props.get("raw_code", ""), " "
577
668
  )
578
669
  description_sections = [
579
- dbt_resource_props["description"]
670
+ dbt_resource_props.get("description")
580
671
  or f"dbt {dbt_resource_props['resource_type']} {dbt_resource_props['name']}",
581
672
  ]
582
673
  if display_raw_sql:
583
- description_sections.append(f"#### Raw SQL:\n```\n{code_block}\n```")
674
+ description_sections.append(f"#### Raw SQL:\n```sql\n{code_block}\n```")
584
675
  return "\n\n".join(filter(None, description_sections))
585
676
 
586
677
 
587
678
  def default_asset_check_fn(
588
679
  manifest: Mapping[str, Any],
589
- dbt_nodes: Mapping[str, Any],
590
680
  dagster_dbt_translator: "DagsterDbtTranslator",
591
681
  asset_key: AssetKey,
592
682
  test_unique_id: str,
683
+ project: Optional[DbtProject],
593
684
  ) -> Optional[AssetCheckSpec]:
594
685
  if not dagster_dbt_translator.settings.enable_asset_checks:
595
686
  return None
596
687
 
597
- test_resource_props = dbt_nodes[test_unique_id]
598
- parent_unique_ids: Set[str] = set(manifest["parent_map"].get(test_unique_id, []))
688
+ test_resource_props = get_node(manifest, test_unique_id)
689
+ parent_unique_ids: set[str] = set(manifest["parent_map"].get(test_unique_id, []))
599
690
 
600
691
  asset_check_key = get_asset_check_key_for_test(
601
692
  manifest=manifest,
602
693
  dagster_dbt_translator=dagster_dbt_translator,
603
694
  test_unique_id=test_unique_id,
695
+ project=project,
604
696
  )
605
697
 
606
698
  if not (asset_check_key and asset_check_key.asset_key == asset_key):
607
699
  return None
608
700
 
609
701
  additional_deps = {
610
- dagster_dbt_translator.get_asset_key(dbt_nodes[parent_id])
702
+ dagster_dbt_translator.get_asset_spec(manifest, parent_id, project).key
611
703
  for parent_id in parent_unique_ids
612
704
  }
613
705
  additional_deps.discard(asset_key)
614
706
 
707
+ severity = test_resource_props.get("config", {}).get("severity", "error")
708
+ blocking = severity.lower() == "error"
709
+
615
710
  return AssetCheckSpec(
616
711
  name=test_resource_props["name"],
617
712
  asset=asset_key,
618
713
  description=test_resource_props.get("meta", {}).get("description"),
619
714
  additional_deps=additional_deps,
715
+ metadata={DAGSTER_DBT_UNIQUE_ID_METADATA_KEY: test_unique_id},
716
+ blocking=blocking,
620
717
  )
621
718
 
622
719
 
623
- def default_code_version_fn(dbt_resource_props: Mapping[str, Any]) -> str:
624
- return hashlib.sha1(
625
- (dbt_resource_props.get("raw_sql") or dbt_resource_props.get("raw_code", "")).encode(
626
- "utf-8"
627
- )
628
- ).hexdigest()
720
+ def default_code_version_fn(dbt_resource_props: Mapping[str, Any]) -> Optional[str]:
721
+ code: Optional[str] = dbt_resource_props.get("raw_sql") or dbt_resource_props.get("raw_code")
722
+ if code:
723
+ return hashlib.sha1(code.encode("utf-8")).hexdigest()
724
+
725
+ return dbt_resource_props.get("checksum", {}).get("checksum")
629
726
 
630
727
 
631
728
  ###################
@@ -641,194 +738,181 @@ def is_non_asset_node(dbt_resource_props: Mapping[str, Any]):
641
738
  [
642
739
  resource_type == "metric",
643
740
  resource_type == "semantic_model",
741
+ resource_type == "saved_query",
644
742
  resource_type == "model"
645
743
  and dbt_resource_props.get("config", {}).get("materialized") == "ephemeral",
646
744
  ]
647
745
  )
648
746
 
649
747
 
650
- def get_deps(
651
- dbt_nodes: Mapping[str, Any],
652
- selected_unique_ids: AbstractSet[str],
653
- asset_resource_types: List[str],
654
- ) -> Mapping[str, FrozenSet[str]]:
655
- def _valid_parent_node(dbt_resource_props):
656
- # sources are valid parents, but not assets
657
- return dbt_resource_props["resource_type"] in asset_resource_types + ["source"]
658
-
659
- asset_deps: Dict[str, Set[str]] = {}
660
- for unique_id in selected_unique_ids:
661
- dbt_resource_props = dbt_nodes[unique_id]
662
- node_resource_type = dbt_resource_props["resource_type"]
663
-
664
- # skip non-assets, such as semantic models, metrics, tests, and ephemeral models
665
- if is_non_asset_node(dbt_resource_props) or node_resource_type not in asset_resource_types:
666
- continue
667
-
668
- asset_deps[unique_id] = set()
669
- for parent_unique_id in dbt_resource_props.get("depends_on", {}).get("nodes", []):
670
- parent_node_info = dbt_nodes[parent_unique_id]
671
- # for metrics or ephemeral dbt models, BFS to find valid parents
672
- if is_non_asset_node(parent_node_info):
673
- visited = set()
674
- replaced_parent_ids = set()
675
- # make a copy to avoid mutating the actual dictionary
676
- queue = list(parent_node_info.get("depends_on", {}).get("nodes", []))
677
- while queue:
678
- candidate_parent_id = queue.pop()
679
- if candidate_parent_id in visited:
680
- continue
681
- visited.add(candidate_parent_id)
748
+ def is_valid_upstream_node(dbt_resource_props: Mapping[str, Any]) -> bool:
749
+ # sources are valid parents, but not assets
750
+ return dbt_resource_props["resource_type"] in ASSET_RESOURCE_TYPES + ["source"]
682
751
 
683
- candidate_parent_info = dbt_nodes[candidate_parent_id]
684
- if is_non_asset_node(candidate_parent_info):
685
- queue.extend(candidate_parent_info.get("depends_on", {}).get("nodes", []))
686
- elif _valid_parent_node(candidate_parent_info):
687
- replaced_parent_ids.add(candidate_parent_id)
688
752
 
689
- asset_deps[unique_id] |= replaced_parent_ids
690
- # ignore nodes which are not assets / sources
691
- elif _valid_parent_node(parent_node_info):
692
- asset_deps[unique_id].add(parent_unique_id)
753
+ def get_upstream_unique_ids(
754
+ manifest: Mapping[str, Any],
755
+ dbt_resource_props: Mapping[str, Any],
756
+ ) -> AbstractSet[str]:
757
+ upstreams = set()
758
+ for parent_unique_id in dbt_resource_props.get("depends_on", {}).get("nodes", []):
759
+ parent_node_info = get_node(manifest, parent_unique_id)
760
+ # for metrics or ephemeral dbt models, BFS to find valid parents
761
+ if is_non_asset_node(parent_node_info):
762
+ visited = set()
763
+ replaced_parent_ids = set()
764
+ # make a copy to avoid mutating the actual dictionary
765
+ queue = list(parent_node_info.get("depends_on", {}).get("nodes", []))
766
+ while queue:
767
+ candidate_parent_id = queue.pop()
768
+ if candidate_parent_id in visited:
769
+ continue
770
+ visited.add(candidate_parent_id)
771
+
772
+ candidate_parent_info = get_node(manifest, candidate_parent_id)
773
+ if is_non_asset_node(candidate_parent_info):
774
+ queue.extend(candidate_parent_info.get("depends_on", {}).get("nodes", []))
775
+ elif is_valid_upstream_node(candidate_parent_info):
776
+ replaced_parent_ids.add(candidate_parent_id)
777
+
778
+ upstreams |= replaced_parent_ids
779
+ # ignore nodes which are not assets / sources
780
+ elif is_valid_upstream_node(parent_node_info):
781
+ upstreams.add(parent_unique_id)
782
+
783
+ return upstreams
784
+
785
+
786
+ def _build_child_map(manifest: Mapping[str, Any]) -> Mapping[str, AbstractSet[str]]:
787
+ """Manifests produced by early versions of dbt Fusion do not contain a child map, so we need to build it manually."""
788
+ if manifest.get("child_map"):
789
+ return manifest["child_map"]
790
+
791
+ child_map = defaultdict(set)
792
+ for unique_id, node in manifest["nodes"].items():
793
+ for upstream_unique_id in get_upstream_unique_ids(manifest, node):
794
+ child_map[upstream_unique_id].add(unique_id)
795
+ return child_map
796
+
797
+
798
+ def build_dbt_specs(
799
+ *,
800
+ translator: "DagsterDbtTranslator",
801
+ manifest: Mapping[str, Any],
802
+ select: str,
803
+ exclude: str,
804
+ selector: str,
805
+ io_manager_key: Optional[str],
806
+ project: Optional[DbtProject],
807
+ ) -> tuple[Sequence[AssetSpec], Sequence[AssetCheckSpec]]:
808
+ selected_unique_ids = select_unique_ids(
809
+ select=select, exclude=exclude, selector=selector, project=project, manifest_json=manifest
810
+ )
693
811
 
694
- frozen_asset_deps = {
695
- unique_id: frozenset(parent_ids) for unique_id, parent_ids in asset_deps.items()
696
- }
812
+ specs: list[AssetSpec] = []
813
+ check_specs: dict[str, AssetCheckSpec] = {}
814
+ key_by_unique_id: dict[str, AssetKey] = {}
697
815
 
698
- return frozen_asset_deps
816
+ child_map = _build_child_map(manifest)
817
+ for unique_id in selected_unique_ids:
818
+ resource_props = get_node(manifest, unique_id)
819
+ resource_type = resource_props["resource_type"]
699
820
 
821
+ # skip non-assets, such as semantic models, metrics, tests, and ephemeral models
822
+ if is_non_asset_node(resource_props) or resource_type not in ASSET_RESOURCE_TYPES:
823
+ continue
700
824
 
701
- def get_asset_deps(
702
- dbt_nodes,
703
- deps,
704
- io_manager_key,
705
- manifest: Optional[Mapping[str, Any]],
706
- dagster_dbt_translator: "DagsterDbtTranslator",
707
- ) -> Tuple[
708
- Dict[AssetKey, Set[AssetKey]],
709
- Dict[AssetKey, Tuple[str, In]],
710
- Dict[AssetKey, Tuple[str, Out]],
711
- Dict[AssetKey, str],
712
- Dict[AssetKey, FreshnessPolicy],
713
- Dict[AssetKey, AutoMaterializePolicy],
714
- Dict[str, AssetCheckSpec],
715
- Dict[str, List[str]],
716
- Dict[str, Dict[str, Any]],
717
- ]:
718
- from .dagster_dbt_translator import DbtManifestWrapper, validate_translator
719
-
720
- dagster_dbt_translator = validate_translator(dagster_dbt_translator)
721
-
722
- asset_deps: Dict[AssetKey, Set[AssetKey]] = {}
723
- asset_ins: Dict[AssetKey, Tuple[str, In]] = {}
724
- asset_outs: Dict[AssetKey, Tuple[str, Out]] = {}
725
-
726
- # These dicts could be refactored as a single dict, mapping from output name to arbitrary
727
- # metadata that we need to store for reference.
728
- group_names_by_key: Dict[AssetKey, str] = {}
729
- freshness_policies_by_key: Dict[AssetKey, FreshnessPolicy] = {}
730
- auto_materialize_policies_by_key: Dict[AssetKey, AutoMaterializePolicy] = {}
731
- check_specs_by_key: Dict[AssetCheckKey, AssetCheckSpec] = {}
732
- fqns_by_output_name: Dict[str, List[str]] = {}
733
- metadata_by_output_name: Dict[str, Dict[str, Any]] = {}
734
-
735
- for unique_id, parent_unique_ids in deps.items():
736
- dbt_resource_props = dbt_nodes[unique_id]
737
-
738
- output_name = dagster_name_fn(dbt_resource_props)
739
- fqns_by_output_name[output_name] = dbt_resource_props["fqn"]
740
-
741
- metadata_by_output_name[output_name] = {
742
- key: dbt_resource_props[key] for key in ["unique_id", "resource_type"]
743
- }
744
-
745
- asset_key = dagster_dbt_translator.get_asset_key(dbt_resource_props)
746
-
747
- asset_deps[asset_key] = set()
748
-
749
- metadata = merge_dicts(
750
- dagster_dbt_translator.get_metadata(dbt_resource_props),
751
- {
752
- DAGSTER_DBT_MANIFEST_METADATA_KEY: DbtManifestWrapper(manifest=manifest)
753
- if manifest
754
- else None,
755
- DAGSTER_DBT_TRANSLATOR_METADATA_KEY: dagster_dbt_translator,
756
- },
757
- )
758
- asset_outs[asset_key] = (
759
- output_name,
760
- Out(
761
- io_manager_key=io_manager_key,
762
- description=dagster_dbt_translator.get_description(dbt_resource_props),
763
- metadata=metadata,
764
- is_required=False,
765
- dagster_type=Nothing,
766
- code_version=default_code_version_fn(dbt_resource_props),
767
- ),
825
+ # get the spec for the given node
826
+ spec = translator.get_asset_spec(
827
+ manifest,
828
+ unique_id,
829
+ project,
768
830
  )
831
+ key_by_unique_id[unique_id] = spec.key
769
832
 
770
- group_name = dagster_dbt_translator.get_group_name(dbt_resource_props)
771
- if group_name is not None:
772
- group_names_by_key[asset_key] = group_name
833
+ # add the io manager key and set the dagster type to Nothing
834
+ if io_manager_key is not None:
835
+ spec = spec.with_io_manager_key(io_manager_key)
836
+ spec = spec.merge_attributes(metadata={SYSTEM_METADATA_KEY_DAGSTER_TYPE: Nothing})
773
837
 
774
- freshness_policy = dagster_dbt_translator.get_freshness_policy(dbt_resource_props)
775
- if freshness_policy is not None:
776
- freshness_policies_by_key[asset_key] = freshness_policy
838
+ specs.append(spec)
777
839
 
778
- auto_materialize_policy = dagster_dbt_translator.get_auto_materialize_policy(
779
- dbt_resource_props
780
- )
781
- if auto_materialize_policy is not None:
782
- auto_materialize_policies_by_key[asset_key] = auto_materialize_policy
783
-
784
- test_unique_ids = []
785
- if manifest:
786
- test_unique_ids = [
787
- child_unique_id
788
- for child_unique_id in manifest["child_map"][unique_id]
789
- if child_unique_id.startswith("test")
790
- ]
791
-
792
- for test_unique_id in test_unique_ids:
793
- check_spec = default_asset_check_fn(
794
- manifest,
795
- dbt_nodes,
796
- dagster_dbt_translator,
797
- asset_key,
798
- test_unique_id,
799
- )
800
- if check_spec:
801
- check_specs_by_key[check_spec.key] = check_spec
840
+ # add check specs associated with the asset
841
+ for child_unique_id in child_map.get(unique_id, []):
842
+ if child_unique_id not in selected_unique_ids or not child_unique_id.startswith("test"):
843
+ continue
844
+ check_spec = translator.get_asset_check_spec(
845
+ asset_spec=spec,
846
+ manifest=manifest,
847
+ unique_id=child_unique_id,
848
+ project=project,
849
+ )
802
850
 
803
- for parent_unique_id in parent_unique_ids:
804
- parent_node_info = dbt_nodes[parent_unique_id]
805
- parent_asset_key = dagster_dbt_translator.get_asset_key(parent_node_info)
851
+ if check_spec:
852
+ check_specs[check_spec.get_python_identifier()] = check_spec
853
+
854
+ # update the keys_by_unqiue_id dictionary to include keys created for upstream
855
+ # assets. note that this step may need to change once the translator is updated
856
+ # to no longer rely on `get_asset_key` as a standalone method
857
+ for upstream_id in get_upstream_unique_ids(manifest, resource_props):
858
+ spec = translator.get_asset_spec(manifest, upstream_id, project)
859
+ key_by_unique_id[upstream_id] = spec.key
860
+ if (
861
+ upstream_id.startswith("source")
862
+ and translator.settings.enable_source_tests_as_checks
863
+ ):
864
+ for child_unique_id in child_map.get(upstream_id, []):
865
+ if not child_unique_id.startswith("test"):
866
+ continue
867
+ check_spec = translator.get_asset_check_spec(
868
+ asset_spec=spec,
869
+ manifest=manifest,
870
+ unique_id=child_unique_id,
871
+ project=project,
872
+ )
873
+ if check_spec:
874
+ check_specs[check_spec.get_python_identifier()] = check_spec
806
875
 
807
- asset_deps[asset_key].add(parent_asset_key)
876
+ _validate_asset_keys(translator, manifest, key_by_unique_id)
877
+ return specs, list(check_specs.values())
808
878
 
809
- # if this parent is not one of the selected nodes, it's an input
810
- if parent_unique_id not in deps:
811
- input_name = dagster_name_fn(parent_node_info)
812
- asset_ins[parent_asset_key] = (input_name, In(Nothing))
813
879
 
814
- check_specs_by_output_name = cast(
815
- Dict[str, AssetCheckSpec],
816
- _validate_and_assign_output_names_to_check_specs(
817
- list(check_specs_by_key.values()), list(asset_outs.keys())
818
- ),
819
- )
880
+ def _validate_asset_keys(
881
+ translator: "DagsterDbtTranslator",
882
+ manifest: Mapping[str, Any],
883
+ key_by_unique_id: Mapping[str, AssetKey],
884
+ ) -> None:
885
+ unique_ids_by_key = defaultdict(set)
886
+ for unique_id, key in key_by_unique_id.items():
887
+ unique_ids_by_key[key].add(unique_id)
888
+
889
+ error_messages = []
890
+ for key, unique_ids in unique_ids_by_key.items():
891
+ if len(unique_ids) == 1:
892
+ continue
893
+ if translator.settings.enable_duplicate_source_asset_keys:
894
+ resource_types = {
895
+ get_node(manifest, unique_id)["resource_type"] for unique_id in unique_ids
896
+ }
897
+ if resource_types == {"source"}:
898
+ continue
899
+ formatted_ids = [
900
+ f" - `{id}` ({get_node(manifest, id)['original_file_path']})"
901
+ for id in sorted(unique_ids)
902
+ ]
903
+ error_messages.append(
904
+ "\n".join(
905
+ [
906
+ f"The following dbt resources have the asset key `{key.path}`:",
907
+ *formatted_ids,
908
+ ]
909
+ )
910
+ )
820
911
 
821
- return (
822
- asset_deps,
823
- asset_ins,
824
- asset_outs,
825
- group_names_by_key,
826
- freshness_policies_by_key,
827
- auto_materialize_policies_by_key,
828
- check_specs_by_output_name,
829
- fqns_by_output_name,
830
- metadata_by_output_name,
831
- )
912
+ if error_messages:
913
+ raise DagsterInvalidDefinitionError(
914
+ "\n\n".join([DUPLICATE_ASSET_KEY_ERROR_MESSAGE, *error_messages])
915
+ )
832
916
 
833
917
 
834
918
  def has_self_dependency(dbt_resource_props: Mapping[str, Any]) -> bool:
@@ -842,11 +926,12 @@ def get_asset_check_key_for_test(
842
926
  manifest: Mapping[str, Any],
843
927
  dagster_dbt_translator: "DagsterDbtTranslator",
844
928
  test_unique_id: str,
929
+ project: Optional[DbtProject],
845
930
  ) -> Optional[AssetCheckKey]:
846
931
  if not test_unique_id.startswith("test"):
847
932
  return None
848
933
 
849
- test_resource_props = manifest["nodes"][test_unique_id]
934
+ test_resource_props = get_node(manifest, test_unique_id)
850
935
  upstream_unique_ids: AbstractSet[str] = set(test_resource_props["depends_on"]["nodes"])
851
936
 
852
937
  # If the test is generic, it will have an attached node that we can use.
@@ -868,35 +953,267 @@ def get_asset_check_key_for_test(
868
953
  )
869
954
 
870
955
  # Attempt to find the attached node from the ref.
871
- if attached_node_ref and version.parse(dbt_version) >= version.parse("1.6.0"):
956
+ if attached_node_ref:
872
957
  ref_name, ref_package, ref_version = (
873
958
  attached_node_ref["name"],
874
959
  attached_node_ref.get("package"),
875
960
  attached_node_ref.get("version"),
876
961
  )
877
962
 
878
- project_name = manifest["metadata"]["project_name"]
963
+ project_name = manifest.get("metadata", {})["project_name"]
879
964
  if not ref_package:
880
965
  ref_package = project_name
881
966
 
882
- unique_id_by_ref: Mapping[Tuple[str, str, Optional[str]], str] = {
883
- (
967
+ attached_node_unique_id = None
968
+ for unique_id, dbt_resource_props in manifest["nodes"].items():
969
+ if (ref_name, ref_package, ref_version) == (
884
970
  dbt_resource_props["name"],
885
971
  dbt_resource_props["package_name"],
886
972
  dbt_resource_props.get("version"),
887
- ): unique_id
888
- for unique_id, dbt_resource_props in manifest["nodes"].items()
889
- }
890
-
891
- attached_node_unique_id = unique_id_by_ref.get((ref_name, ref_package, ref_version))
973
+ ):
974
+ attached_node_unique_id = unique_id
975
+ break
892
976
 
893
977
  if not attached_node_unique_id:
894
978
  return None
895
979
 
896
980
  return AssetCheckKey(
897
981
  name=test_resource_props["name"],
898
- asset_key=dagster_dbt_translator.get_asset_key(
899
- manifest["nodes"].get(attached_node_unique_id)
900
- or manifest["sources"].get(attached_node_unique_id)
901
- ),
982
+ asset_key=dagster_dbt_translator.get_asset_spec(
983
+ manifest,
984
+ attached_node_unique_id,
985
+ project,
986
+ ).key,
987
+ )
988
+
989
+
990
+ def get_checks_on_sources_upstream_of_selected_assets(
991
+ assets_def: AssetsDefinition, selected_asset_keys: AbstractSet[AssetKey]
992
+ ) -> AbstractSet[AssetCheckKey]:
993
+ upstream_source_keys = assets_def.get_upstream_input_keys(frozenset(selected_asset_keys))
994
+ return assets_def.get_checks_targeting_keys(frozenset(upstream_source_keys))
995
+
996
+
997
+ def get_subset_selection_for_context(
998
+ context: Union[OpExecutionContext, AssetExecutionContext],
999
+ manifest: Mapping[str, Any],
1000
+ select: Optional[str],
1001
+ exclude: Optional[str],
1002
+ selector: Optional[str],
1003
+ dagster_dbt_translator: "DagsterDbtTranslator",
1004
+ current_dbt_indirect_selection_env: Optional[str],
1005
+ ) -> tuple[list[str], Optional[str]]:
1006
+ """Generate a dbt selection string and DBT_INDIRECT_SELECTION setting to execute the selected
1007
+ resources in a subsetted execution context.
1008
+
1009
+ See https://docs.getdbt.com/reference/node-selection/syntax#how-does-selection-work.
1010
+
1011
+ Args:
1012
+ context (Union[OpExecutionContext, AssetExecutionContext]): The execution context for the current execution step.
1013
+ manifest (Mapping[str, Any]): The dbt manifest blob.
1014
+ select (Optional[str]): A dbt selection string to select resources to materialize.
1015
+ exclude (Optional[str]): A dbt selection string to exclude resources from materializing.
1016
+ selector (Optional[str]): A dbt selector to select resources to materialize.
1017
+ dagster_dbt_translator (DagsterDbtTranslator): The translator to link dbt nodes to Dagster
1018
+ assets.
1019
+ current_dbt_indirect_selection_env (Optional[str]): The user's value for the DBT_INDIRECT_SELECTION
1020
+ environment variable.
1021
+
1022
+
1023
+ Returns:
1024
+ List[str]: dbt CLI arguments to materialize the selected resources in a
1025
+ subsetted execution context.
1026
+
1027
+ If the current execution context is not performing a subsetted execution,
1028
+ return CLI arguments composed of the inputed selection and exclusion arguments.
1029
+ Optional[str]: A value for the DBT_INDIRECT_SELECTION environment variable. If None, then
1030
+ the environment variable is not set and will either use dbt's default (eager) or the
1031
+ user's setting.
1032
+ """
1033
+ default_dbt_selection = []
1034
+ if select:
1035
+ default_dbt_selection += ["--select", select]
1036
+ if exclude:
1037
+ default_dbt_selection += ["--exclude", exclude]
1038
+ if selector:
1039
+ default_dbt_selection += ["--selector", selector]
1040
+
1041
+ assets_def = context.assets_def
1042
+ is_asset_subset = assets_def.keys_by_output_name != assets_def.node_keys_by_output_name
1043
+ is_checks_subset = (
1044
+ assets_def.check_specs_by_output_name != assets_def.node_check_specs_by_output_name
902
1045
  )
1046
+
1047
+ # It's nice to use the default dbt selection arguments when not subsetting for readability. We
1048
+ # also use dbt indirect selection to avoid hitting cli arg length limits.
1049
+ # https://github.com/dagster-io/dagster/issues/16997#issuecomment-1832443279
1050
+ # A biproduct is that we'll run singular dbt tests (not currently modeled as asset checks) in
1051
+ # cases when we can use indirection selection, an not when we need to turn it off.
1052
+ if not (is_asset_subset or is_checks_subset):
1053
+ logger.info(
1054
+ "A dbt subsetted execution is not being performed. Using the default dbt selection"
1055
+ f" arguments `{default_dbt_selection}`."
1056
+ )
1057
+ # default eager indirect selection. This means we'll also run any singular tests (which
1058
+ # aren't modeled as asset checks currently).
1059
+ return default_dbt_selection, None
1060
+
1061
+ # Explicitly select a dbt resource by its path. Selecting a resource by path is more terse
1062
+ # than selecting it by its fully qualified name.
1063
+ # https://docs.getdbt.com/reference/node-selection/methods#the-path-method
1064
+ selected_asset_resources = get_dbt_resource_names_for_asset_keys(
1065
+ dagster_dbt_translator, manifest, assets_def, context.selected_asset_keys
1066
+ )
1067
+
1068
+ # We explicitly use node_check_specs_by_output_name because it contains every single check spec, not just those selected in the currently
1069
+ # executing subset.
1070
+ checks_targeting_selected_sources = get_checks_on_sources_upstream_of_selected_assets(
1071
+ assets_def=assets_def, selected_asset_keys=context.selected_asset_keys
1072
+ )
1073
+ selected_check_keys = {*context.selected_asset_check_keys, *checks_targeting_selected_sources}
1074
+
1075
+ # if all asset checks for the subsetted assets are selected, then we can just select the
1076
+ # assets and use indirect selection for the tests. We verify that
1077
+ # 1. all the selected checks are for selected assets
1078
+ # 2. no checks for selected assets are excluded
1079
+ # This also means we'll run any singular tests.
1080
+ selected_checks_on_non_selected_assets = {
1081
+ check_key
1082
+ for check_key in selected_check_keys
1083
+ if check_key.asset_key not in context.selected_asset_keys
1084
+ }
1085
+ all_check_keys = {
1086
+ check_spec.key for check_spec in assets_def.node_check_specs_by_output_name.values()
1087
+ }
1088
+ excluded_checks = all_check_keys.difference(selected_check_keys)
1089
+ excluded_checks_on_selected_assets = [
1090
+ check_key
1091
+ for check_key in excluded_checks
1092
+ if check_key.asset_key in context.selected_asset_keys
1093
+ ]
1094
+
1095
+ # note that this will always be false if checks are disabled (which means the assets_def has no
1096
+ # check specs)
1097
+ if excluded_checks_on_selected_assets:
1098
+ # select all assets and tests explicitly, and turn off indirect selection. This risks
1099
+ # hitting the CLI argument length limit, but in the common scenarios that can be launched from the UI
1100
+ # (all checks disabled, only one check and no assets) it's not a concern.
1101
+ # Since we're setting DBT_INDIRECT_SELECTION=empty, we won't run any singular tests.
1102
+ selected_dbt_resources = [
1103
+ *selected_asset_resources,
1104
+ *get_dbt_test_names_for_check_keys(
1105
+ dagster_dbt_translator, manifest, assets_def, context.selected_asset_check_keys
1106
+ ),
1107
+ ]
1108
+ indirect_selection_override = DBT_EMPTY_INDIRECT_SELECTION
1109
+ logger.info(
1110
+ "Overriding default `DBT_INDIRECT_SELECTION` "
1111
+ f"{current_dbt_indirect_selection_env or 'eager'} with "
1112
+ f"`{indirect_selection_override}` due to additional checks "
1113
+ f"{', '.join([c.to_user_string() for c in selected_checks_on_non_selected_assets])} "
1114
+ f"and excluded checks {', '.join([c.to_user_string() for c in excluded_checks_on_selected_assets])}."
1115
+ )
1116
+ elif selected_checks_on_non_selected_assets:
1117
+ # explicitly select the tests that won't be run via indirect selection
1118
+ selected_dbt_resources = [
1119
+ *selected_asset_resources,
1120
+ *get_dbt_test_names_for_check_keys(
1121
+ dagster_dbt_translator,
1122
+ manifest,
1123
+ assets_def,
1124
+ selected_checks_on_non_selected_assets,
1125
+ ),
1126
+ ]
1127
+ indirect_selection_override = None
1128
+ else:
1129
+ selected_dbt_resources = selected_asset_resources
1130
+ indirect_selection_override = None
1131
+
1132
+ logger.info(
1133
+ "A dbt subsetted execution is being performed. Overriding default dbt selection"
1134
+ f" arguments `{default_dbt_selection}` with arguments: `{selected_dbt_resources}`."
1135
+ )
1136
+
1137
+ # Take the union of all the selected resources.
1138
+ # https://docs.getdbt.com/reference/node-selection/set-operators#unions
1139
+ union_selected_dbt_resources = ["--select"] + [" ".join(selected_dbt_resources)]
1140
+
1141
+ return union_selected_dbt_resources, indirect_selection_override
1142
+
1143
+
1144
+ def get_dbt_resource_names_for_asset_keys(
1145
+ translator: "DagsterDbtTranslator",
1146
+ manifest: Mapping[str, Any],
1147
+ assets_def: AssetsDefinition,
1148
+ asset_keys: Iterable[AssetKey],
1149
+ ) -> Sequence[str]:
1150
+ dbt_resource_props_gen = (
1151
+ get_node(
1152
+ manifest,
1153
+ assets_def.get_asset_spec(key).metadata[DAGSTER_DBT_UNIQUE_ID_METADATA_KEY],
1154
+ )
1155
+ for key in asset_keys
1156
+ )
1157
+
1158
+ # Explicitly select a dbt resource by its file name.
1159
+ # https://docs.getdbt.com/reference/node-selection/methods#the-file-method
1160
+ if translator.settings.enable_dbt_selection_by_name:
1161
+ return [
1162
+ Path(dbt_resource_props["original_file_path"]).stem
1163
+ for dbt_resource_props in dbt_resource_props_gen
1164
+ ]
1165
+
1166
+ # Explictly select a dbt resource by its fully qualified name (FQN).
1167
+ # https://docs.getdbt.com/reference/node-selection/methods#the-file-or-fqn-method
1168
+ return [".".join(dbt_resource_props["fqn"]) for dbt_resource_props in dbt_resource_props_gen]
1169
+
1170
+
1171
+ def get_dbt_test_names_for_check_keys(
1172
+ translator: "DagsterDbtTranslator",
1173
+ manifest: Mapping[str, Any],
1174
+ assets_def: AssetsDefinition,
1175
+ check_keys: Iterable[AssetCheckKey],
1176
+ ) -> Sequence[str]:
1177
+ dbt_resource_props_gen = (
1178
+ get_node(
1179
+ manifest,
1180
+ (assets_def.get_spec_for_check_key(key).metadata or {})[
1181
+ DAGSTER_DBT_UNIQUE_ID_METADATA_KEY
1182
+ ],
1183
+ )
1184
+ for key in check_keys
1185
+ )
1186
+ # Explicitly select a dbt test by its test name.
1187
+ # https://docs.getdbt.com/reference/node-selection/test-selection-examples#more-complex-selection.
1188
+ if translator.settings.enable_dbt_selection_by_name:
1189
+ return [asset_check_key.name for asset_check_key in check_keys]
1190
+
1191
+ # Explictly select a dbt test by its fully qualified name (FQN).
1192
+ # https://docs.getdbt.com/reference/node-selection/methods#the-file-or-fqn-method
1193
+ return [".".join(dbt_resource_props["fqn"]) for dbt_resource_props in dbt_resource_props_gen]
1194
+
1195
+
1196
+ def get_node(manifest: Mapping[str, Any], unique_id: str) -> Mapping[str, Any]:
1197
+ """Find a node by unique_id in manifest_json."""
1198
+ if unique_id in manifest["nodes"]:
1199
+ return manifest["nodes"][unique_id]
1200
+
1201
+ if unique_id in manifest["sources"]:
1202
+ return manifest["sources"][unique_id]
1203
+
1204
+ if unique_id in manifest["exposures"]:
1205
+ return manifest["exposures"][unique_id]
1206
+
1207
+ if unique_id in manifest["metrics"]:
1208
+ return manifest["metrics"][unique_id]
1209
+
1210
+ if unique_id in manifest.get("semantic_models", {}):
1211
+ return manifest["semantic_models"][unique_id]
1212
+
1213
+ if unique_id in manifest.get("saved_queries", {}):
1214
+ return manifest["saved_queries"][unique_id]
1215
+
1216
+ if unique_id in manifest.get("unit_tests", {}):
1217
+ return manifest["unit_tests"][unique_id]
1218
+
1219
+ check.failed(f"Could not find {unique_id} in dbt manifest")