dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. dagster_dbt/__init__.py +41 -140
  2. dagster_dbt/asset_decorator.py +49 -230
  3. dagster_dbt/asset_specs.py +65 -0
  4. dagster_dbt/asset_utils.py +655 -338
  5. dagster_dbt/cli/app.py +44 -43
  6. dagster_dbt/cloud/__init__.py +6 -4
  7. dagster_dbt/cloud/asset_defs.py +119 -177
  8. dagster_dbt/cloud/cli.py +3 -4
  9. dagster_dbt/cloud/ops.py +9 -6
  10. dagster_dbt/cloud/resources.py +9 -4
  11. dagster_dbt/cloud/types.py +12 -7
  12. dagster_dbt/cloud/utils.py +186 -0
  13. dagster_dbt/cloud_v2/__init__.py +10 -0
  14. dagster_dbt/cloud_v2/asset_decorator.py +81 -0
  15. dagster_dbt/cloud_v2/cli_invocation.py +67 -0
  16. dagster_dbt/cloud_v2/client.py +438 -0
  17. dagster_dbt/cloud_v2/resources.py +462 -0
  18. dagster_dbt/cloud_v2/run_handler.py +229 -0
  19. dagster_dbt/cloud_v2/sensor_builder.py +254 -0
  20. dagster_dbt/cloud_v2/types.py +143 -0
  21. dagster_dbt/compat.py +107 -0
  22. dagster_dbt/components/__init__.py +0 -0
  23. dagster_dbt/components/dbt_project/__init__.py +0 -0
  24. dagster_dbt/components/dbt_project/component.py +545 -0
  25. dagster_dbt/components/dbt_project/scaffolder.py +65 -0
  26. dagster_dbt/core/__init__.py +0 -10
  27. dagster_dbt/core/dbt_cli_event.py +612 -0
  28. dagster_dbt/core/dbt_cli_invocation.py +474 -0
  29. dagster_dbt/core/dbt_event_iterator.py +399 -0
  30. dagster_dbt/core/resource.py +733 -0
  31. dagster_dbt/core/utils.py +14 -279
  32. dagster_dbt/dagster_dbt_translator.py +317 -74
  33. dagster_dbt/dbt_core_version.py +1 -0
  34. dagster_dbt/dbt_manifest.py +6 -5
  35. dagster_dbt/dbt_manifest_asset_selection.py +62 -22
  36. dagster_dbt/dbt_project.py +179 -40
  37. dagster_dbt/dbt_project_manager.py +173 -0
  38. dagster_dbt/dbt_version.py +0 -0
  39. dagster_dbt/errors.py +9 -84
  40. dagster_dbt/freshness_builder.py +147 -0
  41. dagster_dbt/include/pyproject.toml.jinja +21 -0
  42. dagster_dbt/include/scaffold/assets.py.jinja +1 -8
  43. dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
  44. dagster_dbt/include/scaffold/project.py.jinja +1 -0
  45. dagster_dbt/include/setup.py.jinja +2 -3
  46. dagster_dbt/metadata_set.py +18 -0
  47. dagster_dbt/utils.py +136 -234
  48. dagster_dbt/version.py +1 -1
  49. dagster_dbt-0.28.4.dist-info/METADATA +47 -0
  50. dagster_dbt-0.28.4.dist-info/RECORD +59 -0
  51. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
  52. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
  53. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
  54. dagster_dbt/asset_defs.py +0 -1049
  55. dagster_dbt/core/resources.py +0 -527
  56. dagster_dbt/core/resources_v2.py +0 -1542
  57. dagster_dbt/core/types.py +0 -63
  58. dagster_dbt/dbt_resource.py +0 -220
  59. dagster_dbt/include/scaffold/constants.py.jinja +0 -21
  60. dagster_dbt/ops.py +0 -134
  61. dagster_dbt/types.py +0 -22
  62. dagster_dbt-0.23.3.dist-info/METADATA +0 -31
  63. dagster_dbt-0.23.3.dist-info/RECORD +0 -43
  64. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,733 @@
1
+ import os
2
+ import re
3
+ import shutil
4
+ import uuid
5
+ from argparse import ArgumentParser, Namespace
6
+ from collections.abc import Sequence
7
+ from functools import cache, cached_property
8
+ from pathlib import Path
9
+ from subprocess import check_output
10
+ from typing import Any, Optional, Union, cast
11
+
12
+ import yaml
13
+ from dagster import (
14
+ AssetExecutionContext,
15
+ ConfigurableResource,
16
+ OpExecutionContext,
17
+ get_dagster_logger,
18
+ )
19
+ from dagster._annotations import public
20
+ from dagster._core.execution.context.init import InitResourceContext
21
+ from dagster._utils import pushd
22
+ from packaging import version
23
+ from pydantic import Field, ValidationInfo, field_validator, model_validator
24
+
25
+ from dagster_dbt.asset_utils import (
26
+ DBT_INDIRECT_SELECTION_ENV,
27
+ get_updated_cli_invocation_params_for_context,
28
+ )
29
+ from dagster_dbt.compat import DBT_PYTHON_VERSION, BaseAdapter
30
+ from dagster_dbt.core.dbt_cli_invocation import DbtCliInvocation, _get_dbt_target_path
31
+ from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator, validate_opt_translator
32
+ from dagster_dbt.dbt_manifest import DbtManifestParam, validate_manifest
33
+ from dagster_dbt.dbt_project import DbtProject
34
+
35
+ logger = get_dagster_logger()
36
+
37
+
38
+ @cache
39
+ def _get_dbt_executable() -> str:
40
+ if shutil.which("dbtf"):
41
+ return "dbtf"
42
+ else:
43
+ return "dbt"
44
+
45
+
46
+ DBT_EXECUTABLE = _get_dbt_executable()
47
+ DBT_PROJECT_YML_NAME = "dbt_project.yml"
48
+ DBT_PROFILES_YML_NAME = "profiles.yml"
49
+
50
+
51
+ DAGSTER_GITHUB_REPO_DBT_PACKAGE = "https://github.com/dagster-io/dagster.git"
52
+
53
+
54
+ def _dbt_packages_has_dagster_dbt(packages_file: Path) -> bool:
55
+ """Checks whether any package in the passed yaml file is the Dagster dbt package."""
56
+ packages = cast(
57
+ "list[dict[str, Any]]", yaml.safe_load(packages_file.read_text()).get("packages", [])
58
+ )
59
+ return any(package.get("git") == DAGSTER_GITHUB_REPO_DBT_PACKAGE for package in packages)
60
+
61
+
62
+ class DbtCliResource(ConfigurableResource):
63
+ """A resource used to execute dbt CLI commands.
64
+
65
+ Args:
66
+ project_dir (str): The path to the dbt project directory. This directory should contain a
67
+ `dbt_project.yml`. See https://docs.getdbt.com/reference/dbt_project.yml for more
68
+ information.
69
+ global_config_flags (List[str]): A list of global flags configuration to pass to the dbt CLI
70
+ invocation. Invoke `dbt --help` to see a full list of global flags.
71
+ profiles_dir (Optional[str]): The path to the directory containing your dbt `profiles.yml`.
72
+ By default, the current working directory is used, which is the dbt project directory.
73
+ See https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more
74
+ information.
75
+ profile (Optional[str]): The profile from your dbt `profiles.yml` to use for execution. See
76
+ https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more
77
+ information.
78
+ target (Optional[str]): The target from your dbt `profiles.yml` to use for execution. See
79
+ https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more
80
+ information.
81
+ dbt_executable (str): The path to the dbt executable. By default, this is `dbt`.
82
+ state_path (Optional[str]): The path, relative to the project directory, to a directory of
83
+ dbt artifacts to be used with `--state` / `--defer-state`.
84
+
85
+ Examples:
86
+ Creating a dbt resource with only a reference to ``project_dir``:
87
+
88
+ .. code-block:: python
89
+
90
+ from dagster_dbt import DbtCliResource
91
+
92
+ dbt = DbtCliResource(project_dir="/path/to/dbt/project")
93
+
94
+ Creating a dbt resource with a custom ``profiles_dir``:
95
+
96
+ .. code-block:: python
97
+
98
+ from dagster_dbt import DbtCliResource
99
+
100
+ dbt = DbtCliResource(
101
+ project_dir="/path/to/dbt/project",
102
+ profiles_dir="/path/to/dbt/project/profiles",
103
+ )
104
+
105
+ Creating a dbt resource with a custom ``profile`` and ``target``:
106
+
107
+ .. code-block:: python
108
+
109
+ from dagster_dbt import DbtCliResource
110
+
111
+ dbt = DbtCliResource(
112
+ project_dir="/path/to/dbt/project",
113
+ profiles_dir="/path/to/dbt/project/profiles",
114
+ profile="jaffle_shop",
115
+ target="dev",
116
+ )
117
+
118
+ Creating a dbt resource with global configs, e.g. disabling colored logs with ``--no-use-color``:
119
+
120
+ .. code-block:: python
121
+
122
+ from dagster_dbt import DbtCliResource
123
+
124
+ dbt = DbtCliResource(
125
+ project_dir="/path/to/dbt/project",
126
+ global_config_flags=["--no-use-color"],
127
+ )
128
+
129
+ Creating a dbt resource with custom dbt executable path:
130
+
131
+ .. code-block:: python
132
+
133
+ from dagster_dbt import DbtCliResource
134
+
135
+ dbt = DbtCliResource(
136
+ project_dir="/path/to/dbt/project",
137
+ dbt_executable="/path/to/dbt/executable",
138
+ )
139
+ """
140
+
141
+ project_dir: str = Field(
142
+ description=(
143
+ "The path to your dbt project directory. This directory should contain a"
144
+ " `dbt_project.yml`. See https://docs.getdbt.com/reference/dbt_project.yml for more"
145
+ " information."
146
+ ),
147
+ )
148
+ global_config_flags: list[str] = Field(
149
+ default=[],
150
+ description=(
151
+ "A list of global flags configuration to pass to the dbt CLI invocation. See"
152
+ " https://docs.getdbt.com/reference/global-configs for a full list of configuration."
153
+ ),
154
+ )
155
+ profiles_dir: Optional[str] = Field(
156
+ default=None,
157
+ description=(
158
+ "The path to the directory containing your dbt `profiles.yml`. By default, the current"
159
+ " working directory is used, which is the dbt project directory."
160
+ " See https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for "
161
+ " more information."
162
+ ),
163
+ )
164
+ profile: Optional[str] = Field(
165
+ default=None,
166
+ description=(
167
+ "The profile from your dbt `profiles.yml` to use for execution. See"
168
+ " https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more"
169
+ " information."
170
+ ),
171
+ )
172
+ target: Optional[str] = Field(
173
+ default=None,
174
+ description=(
175
+ "The target from your dbt `profiles.yml` to use for execution. See"
176
+ " https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more"
177
+ " information."
178
+ ),
179
+ )
180
+ dbt_executable: str = Field(
181
+ default=DBT_EXECUTABLE,
182
+ description="The path to the dbt executable. Defaults to `dbtf` if available, otherwise `dbt`.",
183
+ )
184
+ state_path: Optional[str] = Field(
185
+ default=None,
186
+ description=(
187
+ "The path, relative to the project directory, to a directory of dbt artifacts to be"
188
+ " used with --state / --defer-state."
189
+ " This can be used with methods such as get_defer_args to allow for a @dbt_assets to"
190
+ " use defer in the appropriate environments."
191
+ ),
192
+ )
193
+
194
+ def __init__(
195
+ self,
196
+ project_dir: Union[str, Path, DbtProject],
197
+ global_config_flags: Optional[list[str]] = None,
198
+ profiles_dir: Optional[Union[str, Path]] = None,
199
+ profile: Optional[str] = None,
200
+ target: Optional[str] = None,
201
+ dbt_executable: Union[str, Path] = DBT_EXECUTABLE,
202
+ state_path: Optional[Union[str, Path]] = None,
203
+ **kwargs, # allow custom subclasses to add fields
204
+ ):
205
+ if isinstance(project_dir, DbtProject):
206
+ if not state_path and project_dir.state_path:
207
+ state_path = project_dir.state_path
208
+
209
+ if not profiles_dir and project_dir.profiles_dir:
210
+ profiles_dir = project_dir.profiles_dir
211
+
212
+ if not profile and project_dir.profile:
213
+ profile = project_dir.profile
214
+
215
+ if not target and project_dir.target:
216
+ target = project_dir.target
217
+
218
+ project_dir = project_dir.project_dir
219
+
220
+ # DbtProject handles making state_path relative to project_dir
221
+ # when directly instantiated we have to join it
222
+ elif state_path and not Path(state_path).is_absolute():
223
+ state_path = os.path.join(project_dir, state_path)
224
+
225
+ project_dir = os.fspath(project_dir)
226
+ state_path = state_path and os.fspath(state_path)
227
+
228
+ # static typing doesn't understand whats going on here, thinks these fields dont exist
229
+ super().__init__(
230
+ project_dir=project_dir, # type: ignore
231
+ global_config_flags=global_config_flags or [], # type: ignore
232
+ profiles_dir=profiles_dir, # type: ignore
233
+ profile=profile, # type: ignore
234
+ target=target, # type: ignore
235
+ dbt_executable=dbt_executable, # type: ignore
236
+ state_path=state_path, # type: ignore
237
+ **kwargs,
238
+ )
239
+
240
+ @classmethod
241
+ def _validate_absolute_path_exists(cls, path: Union[str, Path]) -> Path:
242
+ absolute_path = Path(path).absolute()
243
+ try:
244
+ resolved_path = absolute_path.resolve(strict=True)
245
+ except FileNotFoundError:
246
+ raise ValueError(f"The absolute path of '{path}' ('{absolute_path}') does not exist")
247
+
248
+ return resolved_path
249
+
250
+ @classmethod
251
+ def _validate_path_contains_file(cls, path: Path, file_name: str, error_message: str):
252
+ if not path.joinpath(file_name).exists():
253
+ raise ValueError(error_message)
254
+
255
+ @field_validator("project_dir", "profiles_dir", "dbt_executable", mode="before")
256
+ def convert_path_to_str(cls, v: Any) -> Any:
257
+ """Validate that the path is converted to a string."""
258
+ if isinstance(v, Path):
259
+ resolved_path = cls._validate_absolute_path_exists(v)
260
+
261
+ absolute_path = Path(v).absolute()
262
+ try:
263
+ resolved_path = absolute_path.resolve(strict=True)
264
+ except FileNotFoundError:
265
+ raise ValueError(f"The absolute path of '{v}' ('{absolute_path}') does not exist")
266
+ return os.fspath(resolved_path)
267
+
268
+ return v
269
+
270
+ @field_validator("project_dir")
271
+ def validate_project_dir(cls, project_dir: str) -> str:
272
+ resolved_project_dir = cls._validate_absolute_path_exists(project_dir)
273
+
274
+ cls._validate_path_contains_file(
275
+ path=resolved_project_dir,
276
+ file_name=DBT_PROJECT_YML_NAME,
277
+ error_message=(
278
+ f"{resolved_project_dir} does not contain a {DBT_PROJECT_YML_NAME} file. Please"
279
+ " specify a valid path to a dbt project."
280
+ ),
281
+ )
282
+
283
+ return os.fspath(resolved_project_dir)
284
+
285
+ @field_validator("profiles_dir")
286
+ def validate_profiles_dir(cls, profiles_dir: Optional[str]) -> Optional[str]:
287
+ if profiles_dir is None:
288
+ return None
289
+
290
+ resolved_profiles_dir = cls._validate_absolute_path_exists(profiles_dir)
291
+
292
+ cls._validate_path_contains_file(
293
+ path=resolved_profiles_dir,
294
+ file_name=DBT_PROFILES_YML_NAME,
295
+ error_message=(
296
+ f"{resolved_profiles_dir} does not contain a {DBT_PROFILES_YML_NAME} file. Please"
297
+ " specify a valid path to a dbt profile directory."
298
+ ),
299
+ )
300
+
301
+ return os.fspath(resolved_profiles_dir)
302
+
303
+ @field_validator("dbt_executable")
304
+ def validate_dbt_executable(cls, dbt_executable: str) -> str:
305
+ resolved_dbt_executable = shutil.which(dbt_executable)
306
+ if not resolved_dbt_executable:
307
+ raise ValueError(
308
+ f"The dbt executable '{dbt_executable}' does not exist. Please specify a valid"
309
+ " path to a dbt executable."
310
+ )
311
+
312
+ return dbt_executable
313
+
314
+ @model_validator(mode="before")
315
+ def validate_dbt_version(cls, values: dict[str, Any]) -> dict[str, Any]:
316
+ """Validate that the dbt version is supported."""
317
+ if DBT_PYTHON_VERSION is None:
318
+ # dbt-core is not installed, so assume fusion is installed
319
+ return values
320
+
321
+ if DBT_PYTHON_VERSION < version.parse("1.7.0"):
322
+ raise ValueError(
323
+ "To use `dagster_dbt.DbtCliResource`, you must use `dbt-core>=1.7.0` or dbt Fusion. Currently,"
324
+ f" you are using `dbt-core=={DBT_PYTHON_VERSION.base_version}`. Please install a compatible dbt engine."
325
+ )
326
+
327
+ return values
328
+
329
+ @field_validator("state_path")
330
+ def validate_state_path(cls, state_path: Optional[str], info: ValidationInfo) -> Optional[str]:
331
+ if state_path is None:
332
+ return None
333
+
334
+ return os.fspath(Path(state_path).absolute().resolve())
335
+
336
+ def _get_unique_target_path(
337
+ self, *, context: Optional[Union[OpExecutionContext, AssetExecutionContext]]
338
+ ) -> Path:
339
+ """Get a unique target path for the dbt CLI invocation.
340
+
341
+ Args:
342
+ context (Optional[Union[OpExecutionContext, AssetExecutionContext]]): The execution context.
343
+
344
+ Returns:
345
+ str: A unique target path for the dbt CLI invocation.
346
+ """
347
+ unique_id = str(uuid.uuid4())[:7]
348
+ path = unique_id
349
+ if context:
350
+ path = f"{context.op_execution_context.op.name}-{context.run.run_id[:7]}-{unique_id}"
351
+
352
+ current_target_path = _get_dbt_target_path()
353
+
354
+ return current_target_path.joinpath(path)
355
+
356
+ def _initialize_dbt_core_adapter(self, args: Sequence[str]) -> BaseAdapter:
357
+ from dbt.adapters.factory import get_adapter, register_adapter, reset_adapters
358
+ from dbt.config import RuntimeConfig
359
+ from dbt.config.runtime import load_profile, load_project
360
+ from dbt.config.utils import parse_cli_vars
361
+ from dbt.flags import get_flags, set_from_args
362
+
363
+ parser = ArgumentParser(description="Parse cli vars from dbt command")
364
+ parser.add_argument("--vars")
365
+ var_args, _ = parser.parse_known_args(args)
366
+ if not var_args.vars:
367
+ cli_vars = {}
368
+ else:
369
+ cli_vars = parse_cli_vars(var_args.vars)
370
+
371
+ if DBT_PYTHON_VERSION >= version.parse("1.8.0"):
372
+ from dbt_common.context import set_invocation_context
373
+
374
+ set_invocation_context(os.environ.copy())
375
+
376
+ # constructs a dummy set of flags, using the `run` command (ensures profile/project reqs get loaded)
377
+ profiles_dir = self.profiles_dir if self.profiles_dir else self.project_dir
378
+ set_from_args(Namespace(profiles_dir=profiles_dir), None)
379
+ flags = get_flags()
380
+
381
+ profile = load_profile(self.project_dir, cli_vars, self.profile, self.target)
382
+ project = load_project(self.project_dir, False, profile, cli_vars)
383
+ config = RuntimeConfig.from_parts(project, profile, flags)
384
+
385
+ # these flags are required for the adapter to be able to look up
386
+ # relations correctly
387
+ new_flags = Namespace()
388
+ for key, val in config.args.__dict__.items():
389
+ setattr(new_flags, key, val)
390
+
391
+ setattr(new_flags, "profile", profile.profile_name)
392
+ setattr(new_flags, "target", profile.target_name)
393
+ config.args = new_flags
394
+
395
+ # If the dbt adapter is DuckDB, set the access mode to READ_ONLY, since DuckDB only allows
396
+ # simultaneous connections for read-only access.
397
+
398
+ if config.credentials and config.credentials.__class__.__name__ == "DuckDBCredentials":
399
+ from dbt.adapters.duckdb.credentials import DuckDBCredentials
400
+
401
+ if isinstance(config.credentials, DuckDBCredentials):
402
+ if not config.credentials.config_options:
403
+ config.credentials.config_options = {}
404
+ config.credentials.config_options["access_mode"] = "READ_ONLY"
405
+ # convert adapter duckdb filepath to absolute path, since the Python
406
+ # working directory may not be the same as the dbt project directory
407
+ with pushd(self.project_dir):
408
+ config.credentials.path = os.fspath(Path(config.credentials.path).absolute())
409
+
410
+ if DBT_PYTHON_VERSION < version.parse("1.8.0"):
411
+ from dbt.events.functions import cleanup_event_logger # type: ignore
412
+ else:
413
+ from dbt_common.events.event_manager_client import cleanup_event_logger
414
+
415
+ cleanup_event_logger()
416
+
417
+ # reset adapters list in case we have instantiated an adapter before in this process
418
+ reset_adapters()
419
+ if DBT_PYTHON_VERSION < version.parse("1.8.0"):
420
+ register_adapter(config) # type: ignore
421
+ else:
422
+ from dbt.adapters.protocol import MacroContextGeneratorCallable # noqa: TC002
423
+ from dbt.context.providers import generate_runtime_macro_context
424
+ from dbt.mp_context import get_mp_context
425
+ from dbt.parser.manifest import ManifestLoader
426
+
427
+ register_adapter(config, get_mp_context())
428
+ adapter = get_adapter(config)
429
+ manifest = ManifestLoader.load_macros(
430
+ config,
431
+ adapter.connections.set_query_header,
432
+ base_macros_only=True,
433
+ )
434
+ adapter.set_macro_resolver(manifest)
435
+ adapter.set_macro_context_generator(
436
+ cast("MacroContextGeneratorCallable", generate_runtime_macro_context)
437
+ )
438
+
439
+ adapter = cast("BaseAdapter", get_adapter(config))
440
+
441
+ return adapter
442
+
443
+ @cached_property
444
+ def _cli_version(self) -> version.Version:
445
+ """Gets the version of the currently-installed dbt executable.
446
+
447
+ This may differ from the version of the dbt-core package, most obviously if dbt-core is not
448
+ installed due to the fusion engine being used.
449
+ """
450
+ raw_output = check_output([self.dbt_executable, "--version"]).decode("utf-8").strip()
451
+ match = re.search(r"(\d+\.\d+\.\d+)", raw_output)
452
+ if not match:
453
+ raise ValueError(f"Could not parse dbt version from output: {raw_output}")
454
+ return version.parse(match.group(1))
455
+
456
+ @public
457
+ def get_defer_args(self) -> Sequence[str]:
458
+ """Build the defer arguments for the dbt CLI command, using the supplied state directory.
459
+ If no state directory is supplied, or the state directory does not have a manifest for.
460
+ comparison, an empty list of arguments is returned.
461
+
462
+ Returns:
463
+ Sequence[str]: The defer arguments for the dbt CLI command.
464
+ """
465
+ if not (self.state_path and Path(self.state_path).joinpath("manifest.json").exists()):
466
+ return []
467
+
468
+ return ["--defer", "--defer-state", self.state_path]
469
+
470
+ @public
471
+ def get_state_args(self) -> Sequence[str]:
472
+ """Build the state arguments for the dbt CLI command, using the supplied state directory.
473
+ If no state directory is supplied, or the state directory does not have a manifest for.
474
+ comparison, an empty list of arguments is returned.
475
+
476
+ Returns:
477
+ Sequence[str]: The state arguments for the dbt CLI command.
478
+ """
479
+ if not (self.state_path and Path(self.state_path).joinpath("manifest.json").exists()):
480
+ return []
481
+
482
+ return ["--state", self.state_path]
483
+
484
+ @public
485
+ def cli(
486
+ self,
487
+ args: Sequence[str],
488
+ *,
489
+ raise_on_error: bool = True,
490
+ manifest: Optional[DbtManifestParam] = None,
491
+ dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
492
+ context: Optional[Union[OpExecutionContext, AssetExecutionContext]] = None,
493
+ target_path: Optional[Path] = None,
494
+ ) -> DbtCliInvocation:
495
+ """Create a subprocess to execute a dbt CLI command.
496
+
497
+ Args:
498
+ args (Sequence[str]): The dbt CLI command to execute.
499
+ raise_on_error (bool): Whether to raise an exception if the dbt CLI command fails.
500
+ manifest (Optional[Union[Mapping[str, Any], str, Path]]): The dbt manifest blob. If an
501
+ execution context from within `@dbt_assets` is provided to the context argument,
502
+ then the manifest provided to `@dbt_assets` will be used.
503
+ dagster_dbt_translator (Optional[DagsterDbtTranslator]): The translator to link dbt
504
+ nodes to Dagster assets. If an execution context from within `@dbt_assets` is
505
+ provided to the context argument, then the dagster_dbt_translator provided to
506
+ `@dbt_assets` will be used.
507
+ context (Optional[Union[OpExecutionContext, AssetExecutionContext]]): The execution context from within `@dbt_assets`.
508
+ If an AssetExecutionContext is passed, its underlying OpExecutionContext will be used.
509
+ target_path (Optional[Path]): An explicit path to a target folder to use to store and
510
+ retrieve dbt artifacts when running a dbt CLI command. If not provided, a unique
511
+ target path will be generated.
512
+
513
+ Returns:
514
+ DbtCliInvocation: A invocation instance that can be used to retrieve the output of the
515
+ dbt CLI command.
516
+
517
+ Examples:
518
+ Streaming Dagster events for dbt asset materializations and observations:
519
+
520
+ .. code-block:: python
521
+
522
+ from pathlib import Path
523
+
524
+ from dagster import AssetExecutionContext
525
+ from dagster_dbt import DbtCliResource, dbt_assets
526
+
527
+
528
+ @dbt_assets(manifest=Path("target", "manifest.json"))
529
+ def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
530
+ yield from dbt.cli(["run"], context=context).stream()
531
+
532
+ Retrieving a dbt artifact after streaming the Dagster events:
533
+
534
+ .. code-block:: python
535
+
536
+ from pathlib import Path
537
+
538
+ from dagster import AssetExecutionContext
539
+ from dagster_dbt import DbtCliResource, dbt_assets
540
+
541
+
542
+ @dbt_assets(manifest=Path("target", "manifest.json"))
543
+ def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
544
+ dbt_run_invocation = dbt.cli(["run"], context=context)
545
+
546
+ yield from dbt_run_invocation.stream()
547
+
548
+ # Retrieve the `run_results.json` dbt artifact as a dictionary:
549
+ run_results_json = dbt_run_invocation.get_artifact("run_results.json")
550
+
551
+ # Retrieve the `run_results.json` dbt artifact as a file path:
552
+ run_results_path = dbt_run_invocation.target_path.joinpath("run_results.json")
553
+
554
+ Customizing the asset materialization metadata when streaming the Dagster events:
555
+
556
+ .. code-block:: python
557
+
558
+ from pathlib import Path
559
+
560
+ from dagster import AssetExecutionContext
561
+ from dagster_dbt import DbtCliResource, dbt_assets
562
+
563
+
564
+ @dbt_assets(manifest=Path("target", "manifest.json"))
565
+ def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
566
+ dbt_cli_invocation = dbt.cli(["run"], context=context)
567
+
568
+ for dagster_event in dbt_cli_invocation.stream():
569
+ if isinstance(dagster_event, Output):
570
+ context.add_output_metadata(
571
+ metadata={
572
+ "my_custom_metadata": "my_custom_metadata_value",
573
+ },
574
+ output_name=dagster_event.output_name,
575
+ )
576
+
577
+ yield dagster_event
578
+
579
+ Suppressing exceptions from a dbt CLI command when a non-zero exit code is returned:
580
+
581
+ .. code-block:: python
582
+
583
+ from pathlib import Path
584
+
585
+ from dagster import AssetExecutionContext
586
+ from dagster_dbt import DbtCliResource, dbt_assets
587
+
588
+
589
+ @dbt_assets(manifest=Path("target", "manifest.json"))
590
+ def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
591
+ dbt_run_invocation = dbt.cli(["run"], context=context, raise_on_error=False)
592
+
593
+ if dbt_run_invocation.is_successful():
594
+ yield from dbt_run_invocation.stream()
595
+ else:
596
+ ...
597
+
598
+ Invoking a dbt CLI command in a custom asset or op:
599
+
600
+ .. code-block:: python
601
+
602
+ import json
603
+
604
+ from dagster import Nothing, Out, asset, op
605
+ from dagster_dbt import DbtCliResource
606
+
607
+
608
+ @asset
609
+ def my_dbt_asset(dbt: DbtCliResource):
610
+ dbt_macro_args = {"key": "value"}
611
+ dbt.cli(["run-operation", "my-macro", json.dumps(dbt_macro_args)]).wait()
612
+
613
+
614
+ @op(out=Out(Nothing))
615
+ def my_dbt_op(dbt: DbtCliResource):
616
+ dbt_macro_args = {"key": "value"}
617
+ yield from dbt.cli(["run-operation", "my-macro", json.dumps(dbt_macro_args)]).stream()
618
+ """
619
+ dagster_dbt_translator = validate_opt_translator(dagster_dbt_translator)
620
+ dagster_dbt_translator = dagster_dbt_translator or DagsterDbtTranslator()
621
+ manifest = validate_manifest(manifest) if manifest else {}
622
+
623
+ updated_params = get_updated_cli_invocation_params_for_context(
624
+ context=context, manifest=manifest, dagster_dbt_translator=dagster_dbt_translator
625
+ )
626
+ manifest = updated_params.manifest
627
+ dagster_dbt_translator = updated_params.dagster_dbt_translator
628
+ selection_args = updated_params.selection_args
629
+ indirect_selection = updated_params.indirect_selection
630
+ target_path = target_path or self._get_unique_target_path(context=context)
631
+ project_dir = Path(
632
+ updated_params.dbt_project.project_dir
633
+ if updated_params.dbt_project
634
+ else self.project_dir
635
+ )
636
+ env = {
637
+ # Allow IO streaming when running in Windows.
638
+ # Also, allow it to be overriden by the current environment.
639
+ "PYTHONLEGACYWINDOWSSTDIO": "1",
640
+ # Pass the current environment variables to the dbt CLI invocation.
641
+ **os.environ.copy(),
642
+ # An environment variable to indicate that the dbt CLI is being invoked from Dagster.
643
+ "DAGSTER_DBT_CLI": "true",
644
+ # Run dbt with unbuffered output.
645
+ "PYTHONUNBUFFERED": "1",
646
+ # Disable anonymous usage statistics for performance.
647
+ "DBT_SEND_ANONYMOUS_USAGE_STATS": "false",
648
+ # The DBT_LOG_FORMAT environment variable must be set to `json`. We use this
649
+ # environment variable to ensure that the dbt CLI outputs structured logs.
650
+ "DBT_LOG_FORMAT": "json",
651
+ # The DBT_TARGET_PATH environment variable is set to a unique value for each dbt
652
+ # invocation so that artifact paths are separated.
653
+ # See https://discourse.getdbt.com/t/multiple-run-results-json-and-manifest-json-files/7555
654
+ # for more information.
655
+ "DBT_TARGET_PATH": os.fspath(target_path),
656
+ # The DBT_LOG_PATH environment variable is set to the same value as DBT_TARGET_PATH
657
+ # so that logs for each dbt invocation has separate log files.
658
+ "DBT_LOG_PATH": os.fspath(target_path),
659
+ # The DBT_PROFILES_DIR environment variable is set to the path containing the dbt
660
+ # profiles.yml file.
661
+ # See https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
662
+ # for more information.
663
+ **({"DBT_PROFILES_DIR": self.profiles_dir} if self.profiles_dir else {}),
664
+ # The DBT_PROJECT_DIR environment variable is set to the path containing the dbt project
665
+ # See https://docs.getdbt.com/reference/dbt_project.yml for more information.
666
+ "DBT_PROJECT_DIR": str(project_dir),
667
+ }
668
+
669
+ # set dbt indirect selection if needed to execute specific dbt tests due to asset check
670
+ # selection
671
+ if indirect_selection:
672
+ env[DBT_INDIRECT_SELECTION_ENV] = indirect_selection
673
+
674
+ # TODO: verify that args does not have any selection flags if the context and manifest
675
+ # are passed to this function.
676
+ profile_args: list[str] = []
677
+ if self.profile:
678
+ profile_args = ["--profile", self.profile]
679
+
680
+ if self.target:
681
+ profile_args += ["--target", self.target]
682
+
683
+ full_dbt_args = [
684
+ self.dbt_executable,
685
+ *self.global_config_flags,
686
+ *args,
687
+ *profile_args,
688
+ *selection_args,
689
+ ]
690
+
691
+ if not target_path.is_absolute():
692
+ target_path = project_dir.joinpath(target_path)
693
+
694
+ # run dbt --version to get the dbt core version
695
+ adapter: Optional[BaseAdapter] = None
696
+ with pushd(str(project_dir)):
697
+ # we do not need to initialize the adapter if we are using the fusion engine
698
+ if self._cli_version.major < 2:
699
+ try:
700
+ adapter = self._initialize_dbt_core_adapter(args)
701
+ except:
702
+ logger.warning(
703
+ "An error was encountered when creating a handle to the dbt adapter in Dagster.",
704
+ exc_info=True,
705
+ )
706
+
707
+ return DbtCliInvocation.run(
708
+ args=full_dbt_args,
709
+ env=env,
710
+ manifest=manifest,
711
+ dagster_dbt_translator=dagster_dbt_translator,
712
+ project_dir=project_dir,
713
+ target_path=target_path,
714
+ raise_on_error=raise_on_error,
715
+ context=context,
716
+ adapter=adapter,
717
+ cli_version=self._cli_version,
718
+ dbt_project=updated_params.dbt_project,
719
+ )
720
+
721
+ def setup_for_execution(self, context: InitResourceContext) -> None:
722
+ packages_yaml = Path(self.project_dir).joinpath("packages.yml")
723
+ dependencies_yaml = Path(self.project_dir).joinpath("dependencies.yml")
724
+
725
+ if context.log and (
726
+ (packages_yaml.exists() and _dbt_packages_has_dagster_dbt(packages_yaml))
727
+ or (dependencies_yaml.exists() and _dbt_packages_has_dagster_dbt(dependencies_yaml))
728
+ ):
729
+ context.log.warn(
730
+ "Fetching column metadata using `log_column_level_metadata` macro is deprecated and will be"
731
+ " removed in dagster-dbt 0.24.0. Use the `fetch_column_metadata` method in your asset definition"
732
+ " to fetch column metadata instead."
733
+ )