dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_dbt/__init__.py +41 -140
- dagster_dbt/asset_decorator.py +49 -230
- dagster_dbt/asset_specs.py +65 -0
- dagster_dbt/asset_utils.py +655 -338
- dagster_dbt/cli/app.py +44 -43
- dagster_dbt/cloud/__init__.py +6 -4
- dagster_dbt/cloud/asset_defs.py +119 -177
- dagster_dbt/cloud/cli.py +3 -4
- dagster_dbt/cloud/ops.py +9 -6
- dagster_dbt/cloud/resources.py +9 -4
- dagster_dbt/cloud/types.py +12 -7
- dagster_dbt/cloud/utils.py +186 -0
- dagster_dbt/cloud_v2/__init__.py +10 -0
- dagster_dbt/cloud_v2/asset_decorator.py +81 -0
- dagster_dbt/cloud_v2/cli_invocation.py +67 -0
- dagster_dbt/cloud_v2/client.py +438 -0
- dagster_dbt/cloud_v2/resources.py +462 -0
- dagster_dbt/cloud_v2/run_handler.py +229 -0
- dagster_dbt/cloud_v2/sensor_builder.py +254 -0
- dagster_dbt/cloud_v2/types.py +143 -0
- dagster_dbt/compat.py +107 -0
- dagster_dbt/components/__init__.py +0 -0
- dagster_dbt/components/dbt_project/__init__.py +0 -0
- dagster_dbt/components/dbt_project/component.py +545 -0
- dagster_dbt/components/dbt_project/scaffolder.py +65 -0
- dagster_dbt/core/__init__.py +0 -10
- dagster_dbt/core/dbt_cli_event.py +612 -0
- dagster_dbt/core/dbt_cli_invocation.py +474 -0
- dagster_dbt/core/dbt_event_iterator.py +399 -0
- dagster_dbt/core/resource.py +733 -0
- dagster_dbt/core/utils.py +14 -279
- dagster_dbt/dagster_dbt_translator.py +317 -74
- dagster_dbt/dbt_core_version.py +1 -0
- dagster_dbt/dbt_manifest.py +6 -5
- dagster_dbt/dbt_manifest_asset_selection.py +62 -22
- dagster_dbt/dbt_project.py +179 -40
- dagster_dbt/dbt_project_manager.py +173 -0
- dagster_dbt/dbt_version.py +0 -0
- dagster_dbt/errors.py +9 -84
- dagster_dbt/freshness_builder.py +147 -0
- dagster_dbt/include/pyproject.toml.jinja +21 -0
- dagster_dbt/include/scaffold/assets.py.jinja +1 -8
- dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
- dagster_dbt/include/scaffold/project.py.jinja +1 -0
- dagster_dbt/include/setup.py.jinja +2 -3
- dagster_dbt/metadata_set.py +18 -0
- dagster_dbt/utils.py +136 -234
- dagster_dbt/version.py +1 -1
- dagster_dbt-0.28.4.dist-info/METADATA +47 -0
- dagster_dbt-0.28.4.dist-info/RECORD +59 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
- dagster_dbt/asset_defs.py +0 -1049
- dagster_dbt/core/resources.py +0 -527
- dagster_dbt/core/resources_v2.py +0 -1542
- dagster_dbt/core/types.py +0 -63
- dagster_dbt/dbt_resource.py +0 -220
- dagster_dbt/include/scaffold/constants.py.jinja +0 -21
- dagster_dbt/ops.py +0 -134
- dagster_dbt/types.py +0 -22
- dagster_dbt-0.23.3.dist-info/METADATA +0 -31
- dagster_dbt-0.23.3.dist-info/RECORD +0 -43
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import shutil
|
|
4
|
+
import uuid
|
|
5
|
+
from argparse import ArgumentParser, Namespace
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
from functools import cache, cached_property
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from subprocess import check_output
|
|
10
|
+
from typing import Any, Optional, Union, cast
|
|
11
|
+
|
|
12
|
+
import yaml
|
|
13
|
+
from dagster import (
|
|
14
|
+
AssetExecutionContext,
|
|
15
|
+
ConfigurableResource,
|
|
16
|
+
OpExecutionContext,
|
|
17
|
+
get_dagster_logger,
|
|
18
|
+
)
|
|
19
|
+
from dagster._annotations import public
|
|
20
|
+
from dagster._core.execution.context.init import InitResourceContext
|
|
21
|
+
from dagster._utils import pushd
|
|
22
|
+
from packaging import version
|
|
23
|
+
from pydantic import Field, ValidationInfo, field_validator, model_validator
|
|
24
|
+
|
|
25
|
+
from dagster_dbt.asset_utils import (
|
|
26
|
+
DBT_INDIRECT_SELECTION_ENV,
|
|
27
|
+
get_updated_cli_invocation_params_for_context,
|
|
28
|
+
)
|
|
29
|
+
from dagster_dbt.compat import DBT_PYTHON_VERSION, BaseAdapter
|
|
30
|
+
from dagster_dbt.core.dbt_cli_invocation import DbtCliInvocation, _get_dbt_target_path
|
|
31
|
+
from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator, validate_opt_translator
|
|
32
|
+
from dagster_dbt.dbt_manifest import DbtManifestParam, validate_manifest
|
|
33
|
+
from dagster_dbt.dbt_project import DbtProject
|
|
34
|
+
|
|
35
|
+
logger = get_dagster_logger()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@cache
|
|
39
|
+
def _get_dbt_executable() -> str:
|
|
40
|
+
if shutil.which("dbtf"):
|
|
41
|
+
return "dbtf"
|
|
42
|
+
else:
|
|
43
|
+
return "dbt"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
DBT_EXECUTABLE = _get_dbt_executable()
|
|
47
|
+
DBT_PROJECT_YML_NAME = "dbt_project.yml"
|
|
48
|
+
DBT_PROFILES_YML_NAME = "profiles.yml"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
DAGSTER_GITHUB_REPO_DBT_PACKAGE = "https://github.com/dagster-io/dagster.git"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _dbt_packages_has_dagster_dbt(packages_file: Path) -> bool:
|
|
55
|
+
"""Checks whether any package in the passed yaml file is the Dagster dbt package."""
|
|
56
|
+
packages = cast(
|
|
57
|
+
"list[dict[str, Any]]", yaml.safe_load(packages_file.read_text()).get("packages", [])
|
|
58
|
+
)
|
|
59
|
+
return any(package.get("git") == DAGSTER_GITHUB_REPO_DBT_PACKAGE for package in packages)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class DbtCliResource(ConfigurableResource):
|
|
63
|
+
"""A resource used to execute dbt CLI commands.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
project_dir (str): The path to the dbt project directory. This directory should contain a
|
|
67
|
+
`dbt_project.yml`. See https://docs.getdbt.com/reference/dbt_project.yml for more
|
|
68
|
+
information.
|
|
69
|
+
global_config_flags (List[str]): A list of global flags configuration to pass to the dbt CLI
|
|
70
|
+
invocation. Invoke `dbt --help` to see a full list of global flags.
|
|
71
|
+
profiles_dir (Optional[str]): The path to the directory containing your dbt `profiles.yml`.
|
|
72
|
+
By default, the current working directory is used, which is the dbt project directory.
|
|
73
|
+
See https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more
|
|
74
|
+
information.
|
|
75
|
+
profile (Optional[str]): The profile from your dbt `profiles.yml` to use for execution. See
|
|
76
|
+
https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more
|
|
77
|
+
information.
|
|
78
|
+
target (Optional[str]): The target from your dbt `profiles.yml` to use for execution. See
|
|
79
|
+
https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more
|
|
80
|
+
information.
|
|
81
|
+
dbt_executable (str): The path to the dbt executable. By default, this is `dbt`.
|
|
82
|
+
state_path (Optional[str]): The path, relative to the project directory, to a directory of
|
|
83
|
+
dbt artifacts to be used with `--state` / `--defer-state`.
|
|
84
|
+
|
|
85
|
+
Examples:
|
|
86
|
+
Creating a dbt resource with only a reference to ``project_dir``:
|
|
87
|
+
|
|
88
|
+
.. code-block:: python
|
|
89
|
+
|
|
90
|
+
from dagster_dbt import DbtCliResource
|
|
91
|
+
|
|
92
|
+
dbt = DbtCliResource(project_dir="/path/to/dbt/project")
|
|
93
|
+
|
|
94
|
+
Creating a dbt resource with a custom ``profiles_dir``:
|
|
95
|
+
|
|
96
|
+
.. code-block:: python
|
|
97
|
+
|
|
98
|
+
from dagster_dbt import DbtCliResource
|
|
99
|
+
|
|
100
|
+
dbt = DbtCliResource(
|
|
101
|
+
project_dir="/path/to/dbt/project",
|
|
102
|
+
profiles_dir="/path/to/dbt/project/profiles",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
Creating a dbt resource with a custom ``profile`` and ``target``:
|
|
106
|
+
|
|
107
|
+
.. code-block:: python
|
|
108
|
+
|
|
109
|
+
from dagster_dbt import DbtCliResource
|
|
110
|
+
|
|
111
|
+
dbt = DbtCliResource(
|
|
112
|
+
project_dir="/path/to/dbt/project",
|
|
113
|
+
profiles_dir="/path/to/dbt/project/profiles",
|
|
114
|
+
profile="jaffle_shop",
|
|
115
|
+
target="dev",
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
Creating a dbt resource with global configs, e.g. disabling colored logs with ``--no-use-color``:
|
|
119
|
+
|
|
120
|
+
.. code-block:: python
|
|
121
|
+
|
|
122
|
+
from dagster_dbt import DbtCliResource
|
|
123
|
+
|
|
124
|
+
dbt = DbtCliResource(
|
|
125
|
+
project_dir="/path/to/dbt/project",
|
|
126
|
+
global_config_flags=["--no-use-color"],
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
Creating a dbt resource with custom dbt executable path:
|
|
130
|
+
|
|
131
|
+
.. code-block:: python
|
|
132
|
+
|
|
133
|
+
from dagster_dbt import DbtCliResource
|
|
134
|
+
|
|
135
|
+
dbt = DbtCliResource(
|
|
136
|
+
project_dir="/path/to/dbt/project",
|
|
137
|
+
dbt_executable="/path/to/dbt/executable",
|
|
138
|
+
)
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
project_dir: str = Field(
|
|
142
|
+
description=(
|
|
143
|
+
"The path to your dbt project directory. This directory should contain a"
|
|
144
|
+
" `dbt_project.yml`. See https://docs.getdbt.com/reference/dbt_project.yml for more"
|
|
145
|
+
" information."
|
|
146
|
+
),
|
|
147
|
+
)
|
|
148
|
+
global_config_flags: list[str] = Field(
|
|
149
|
+
default=[],
|
|
150
|
+
description=(
|
|
151
|
+
"A list of global flags configuration to pass to the dbt CLI invocation. See"
|
|
152
|
+
" https://docs.getdbt.com/reference/global-configs for a full list of configuration."
|
|
153
|
+
),
|
|
154
|
+
)
|
|
155
|
+
profiles_dir: Optional[str] = Field(
|
|
156
|
+
default=None,
|
|
157
|
+
description=(
|
|
158
|
+
"The path to the directory containing your dbt `profiles.yml`. By default, the current"
|
|
159
|
+
" working directory is used, which is the dbt project directory."
|
|
160
|
+
" See https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for "
|
|
161
|
+
" more information."
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
profile: Optional[str] = Field(
|
|
165
|
+
default=None,
|
|
166
|
+
description=(
|
|
167
|
+
"The profile from your dbt `profiles.yml` to use for execution. See"
|
|
168
|
+
" https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more"
|
|
169
|
+
" information."
|
|
170
|
+
),
|
|
171
|
+
)
|
|
172
|
+
target: Optional[str] = Field(
|
|
173
|
+
default=None,
|
|
174
|
+
description=(
|
|
175
|
+
"The target from your dbt `profiles.yml` to use for execution. See"
|
|
176
|
+
" https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles for more"
|
|
177
|
+
" information."
|
|
178
|
+
),
|
|
179
|
+
)
|
|
180
|
+
dbt_executable: str = Field(
|
|
181
|
+
default=DBT_EXECUTABLE,
|
|
182
|
+
description="The path to the dbt executable. Defaults to `dbtf` if available, otherwise `dbt`.",
|
|
183
|
+
)
|
|
184
|
+
state_path: Optional[str] = Field(
|
|
185
|
+
default=None,
|
|
186
|
+
description=(
|
|
187
|
+
"The path, relative to the project directory, to a directory of dbt artifacts to be"
|
|
188
|
+
" used with --state / --defer-state."
|
|
189
|
+
" This can be used with methods such as get_defer_args to allow for a @dbt_assets to"
|
|
190
|
+
" use defer in the appropriate environments."
|
|
191
|
+
),
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
def __init__(
|
|
195
|
+
self,
|
|
196
|
+
project_dir: Union[str, Path, DbtProject],
|
|
197
|
+
global_config_flags: Optional[list[str]] = None,
|
|
198
|
+
profiles_dir: Optional[Union[str, Path]] = None,
|
|
199
|
+
profile: Optional[str] = None,
|
|
200
|
+
target: Optional[str] = None,
|
|
201
|
+
dbt_executable: Union[str, Path] = DBT_EXECUTABLE,
|
|
202
|
+
state_path: Optional[Union[str, Path]] = None,
|
|
203
|
+
**kwargs, # allow custom subclasses to add fields
|
|
204
|
+
):
|
|
205
|
+
if isinstance(project_dir, DbtProject):
|
|
206
|
+
if not state_path and project_dir.state_path:
|
|
207
|
+
state_path = project_dir.state_path
|
|
208
|
+
|
|
209
|
+
if not profiles_dir and project_dir.profiles_dir:
|
|
210
|
+
profiles_dir = project_dir.profiles_dir
|
|
211
|
+
|
|
212
|
+
if not profile and project_dir.profile:
|
|
213
|
+
profile = project_dir.profile
|
|
214
|
+
|
|
215
|
+
if not target and project_dir.target:
|
|
216
|
+
target = project_dir.target
|
|
217
|
+
|
|
218
|
+
project_dir = project_dir.project_dir
|
|
219
|
+
|
|
220
|
+
# DbtProject handles making state_path relative to project_dir
|
|
221
|
+
# when directly instantiated we have to join it
|
|
222
|
+
elif state_path and not Path(state_path).is_absolute():
|
|
223
|
+
state_path = os.path.join(project_dir, state_path)
|
|
224
|
+
|
|
225
|
+
project_dir = os.fspath(project_dir)
|
|
226
|
+
state_path = state_path and os.fspath(state_path)
|
|
227
|
+
|
|
228
|
+
# static typing doesn't understand whats going on here, thinks these fields dont exist
|
|
229
|
+
super().__init__(
|
|
230
|
+
project_dir=project_dir, # type: ignore
|
|
231
|
+
global_config_flags=global_config_flags or [], # type: ignore
|
|
232
|
+
profiles_dir=profiles_dir, # type: ignore
|
|
233
|
+
profile=profile, # type: ignore
|
|
234
|
+
target=target, # type: ignore
|
|
235
|
+
dbt_executable=dbt_executable, # type: ignore
|
|
236
|
+
state_path=state_path, # type: ignore
|
|
237
|
+
**kwargs,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
@classmethod
|
|
241
|
+
def _validate_absolute_path_exists(cls, path: Union[str, Path]) -> Path:
|
|
242
|
+
absolute_path = Path(path).absolute()
|
|
243
|
+
try:
|
|
244
|
+
resolved_path = absolute_path.resolve(strict=True)
|
|
245
|
+
except FileNotFoundError:
|
|
246
|
+
raise ValueError(f"The absolute path of '{path}' ('{absolute_path}') does not exist")
|
|
247
|
+
|
|
248
|
+
return resolved_path
|
|
249
|
+
|
|
250
|
+
@classmethod
|
|
251
|
+
def _validate_path_contains_file(cls, path: Path, file_name: str, error_message: str):
|
|
252
|
+
if not path.joinpath(file_name).exists():
|
|
253
|
+
raise ValueError(error_message)
|
|
254
|
+
|
|
255
|
+
@field_validator("project_dir", "profiles_dir", "dbt_executable", mode="before")
|
|
256
|
+
def convert_path_to_str(cls, v: Any) -> Any:
|
|
257
|
+
"""Validate that the path is converted to a string."""
|
|
258
|
+
if isinstance(v, Path):
|
|
259
|
+
resolved_path = cls._validate_absolute_path_exists(v)
|
|
260
|
+
|
|
261
|
+
absolute_path = Path(v).absolute()
|
|
262
|
+
try:
|
|
263
|
+
resolved_path = absolute_path.resolve(strict=True)
|
|
264
|
+
except FileNotFoundError:
|
|
265
|
+
raise ValueError(f"The absolute path of '{v}' ('{absolute_path}') does not exist")
|
|
266
|
+
return os.fspath(resolved_path)
|
|
267
|
+
|
|
268
|
+
return v
|
|
269
|
+
|
|
270
|
+
@field_validator("project_dir")
|
|
271
|
+
def validate_project_dir(cls, project_dir: str) -> str:
|
|
272
|
+
resolved_project_dir = cls._validate_absolute_path_exists(project_dir)
|
|
273
|
+
|
|
274
|
+
cls._validate_path_contains_file(
|
|
275
|
+
path=resolved_project_dir,
|
|
276
|
+
file_name=DBT_PROJECT_YML_NAME,
|
|
277
|
+
error_message=(
|
|
278
|
+
f"{resolved_project_dir} does not contain a {DBT_PROJECT_YML_NAME} file. Please"
|
|
279
|
+
" specify a valid path to a dbt project."
|
|
280
|
+
),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return os.fspath(resolved_project_dir)
|
|
284
|
+
|
|
285
|
+
@field_validator("profiles_dir")
|
|
286
|
+
def validate_profiles_dir(cls, profiles_dir: Optional[str]) -> Optional[str]:
|
|
287
|
+
if profiles_dir is None:
|
|
288
|
+
return None
|
|
289
|
+
|
|
290
|
+
resolved_profiles_dir = cls._validate_absolute_path_exists(profiles_dir)
|
|
291
|
+
|
|
292
|
+
cls._validate_path_contains_file(
|
|
293
|
+
path=resolved_profiles_dir,
|
|
294
|
+
file_name=DBT_PROFILES_YML_NAME,
|
|
295
|
+
error_message=(
|
|
296
|
+
f"{resolved_profiles_dir} does not contain a {DBT_PROFILES_YML_NAME} file. Please"
|
|
297
|
+
" specify a valid path to a dbt profile directory."
|
|
298
|
+
),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
return os.fspath(resolved_profiles_dir)
|
|
302
|
+
|
|
303
|
+
@field_validator("dbt_executable")
|
|
304
|
+
def validate_dbt_executable(cls, dbt_executable: str) -> str:
|
|
305
|
+
resolved_dbt_executable = shutil.which(dbt_executable)
|
|
306
|
+
if not resolved_dbt_executable:
|
|
307
|
+
raise ValueError(
|
|
308
|
+
f"The dbt executable '{dbt_executable}' does not exist. Please specify a valid"
|
|
309
|
+
" path to a dbt executable."
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
return dbt_executable
|
|
313
|
+
|
|
314
|
+
@model_validator(mode="before")
|
|
315
|
+
def validate_dbt_version(cls, values: dict[str, Any]) -> dict[str, Any]:
|
|
316
|
+
"""Validate that the dbt version is supported."""
|
|
317
|
+
if DBT_PYTHON_VERSION is None:
|
|
318
|
+
# dbt-core is not installed, so assume fusion is installed
|
|
319
|
+
return values
|
|
320
|
+
|
|
321
|
+
if DBT_PYTHON_VERSION < version.parse("1.7.0"):
|
|
322
|
+
raise ValueError(
|
|
323
|
+
"To use `dagster_dbt.DbtCliResource`, you must use `dbt-core>=1.7.0` or dbt Fusion. Currently,"
|
|
324
|
+
f" you are using `dbt-core=={DBT_PYTHON_VERSION.base_version}`. Please install a compatible dbt engine."
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
return values
|
|
328
|
+
|
|
329
|
+
@field_validator("state_path")
|
|
330
|
+
def validate_state_path(cls, state_path: Optional[str], info: ValidationInfo) -> Optional[str]:
|
|
331
|
+
if state_path is None:
|
|
332
|
+
return None
|
|
333
|
+
|
|
334
|
+
return os.fspath(Path(state_path).absolute().resolve())
|
|
335
|
+
|
|
336
|
+
def _get_unique_target_path(
|
|
337
|
+
self, *, context: Optional[Union[OpExecutionContext, AssetExecutionContext]]
|
|
338
|
+
) -> Path:
|
|
339
|
+
"""Get a unique target path for the dbt CLI invocation.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
context (Optional[Union[OpExecutionContext, AssetExecutionContext]]): The execution context.
|
|
343
|
+
|
|
344
|
+
Returns:
|
|
345
|
+
str: A unique target path for the dbt CLI invocation.
|
|
346
|
+
"""
|
|
347
|
+
unique_id = str(uuid.uuid4())[:7]
|
|
348
|
+
path = unique_id
|
|
349
|
+
if context:
|
|
350
|
+
path = f"{context.op_execution_context.op.name}-{context.run.run_id[:7]}-{unique_id}"
|
|
351
|
+
|
|
352
|
+
current_target_path = _get_dbt_target_path()
|
|
353
|
+
|
|
354
|
+
return current_target_path.joinpath(path)
|
|
355
|
+
|
|
356
|
+
def _initialize_dbt_core_adapter(self, args: Sequence[str]) -> BaseAdapter:
|
|
357
|
+
from dbt.adapters.factory import get_adapter, register_adapter, reset_adapters
|
|
358
|
+
from dbt.config import RuntimeConfig
|
|
359
|
+
from dbt.config.runtime import load_profile, load_project
|
|
360
|
+
from dbt.config.utils import parse_cli_vars
|
|
361
|
+
from dbt.flags import get_flags, set_from_args
|
|
362
|
+
|
|
363
|
+
parser = ArgumentParser(description="Parse cli vars from dbt command")
|
|
364
|
+
parser.add_argument("--vars")
|
|
365
|
+
var_args, _ = parser.parse_known_args(args)
|
|
366
|
+
if not var_args.vars:
|
|
367
|
+
cli_vars = {}
|
|
368
|
+
else:
|
|
369
|
+
cli_vars = parse_cli_vars(var_args.vars)
|
|
370
|
+
|
|
371
|
+
if DBT_PYTHON_VERSION >= version.parse("1.8.0"):
|
|
372
|
+
from dbt_common.context import set_invocation_context
|
|
373
|
+
|
|
374
|
+
set_invocation_context(os.environ.copy())
|
|
375
|
+
|
|
376
|
+
# constructs a dummy set of flags, using the `run` command (ensures profile/project reqs get loaded)
|
|
377
|
+
profiles_dir = self.profiles_dir if self.profiles_dir else self.project_dir
|
|
378
|
+
set_from_args(Namespace(profiles_dir=profiles_dir), None)
|
|
379
|
+
flags = get_flags()
|
|
380
|
+
|
|
381
|
+
profile = load_profile(self.project_dir, cli_vars, self.profile, self.target)
|
|
382
|
+
project = load_project(self.project_dir, False, profile, cli_vars)
|
|
383
|
+
config = RuntimeConfig.from_parts(project, profile, flags)
|
|
384
|
+
|
|
385
|
+
# these flags are required for the adapter to be able to look up
|
|
386
|
+
# relations correctly
|
|
387
|
+
new_flags = Namespace()
|
|
388
|
+
for key, val in config.args.__dict__.items():
|
|
389
|
+
setattr(new_flags, key, val)
|
|
390
|
+
|
|
391
|
+
setattr(new_flags, "profile", profile.profile_name)
|
|
392
|
+
setattr(new_flags, "target", profile.target_name)
|
|
393
|
+
config.args = new_flags
|
|
394
|
+
|
|
395
|
+
# If the dbt adapter is DuckDB, set the access mode to READ_ONLY, since DuckDB only allows
|
|
396
|
+
# simultaneous connections for read-only access.
|
|
397
|
+
|
|
398
|
+
if config.credentials and config.credentials.__class__.__name__ == "DuckDBCredentials":
|
|
399
|
+
from dbt.adapters.duckdb.credentials import DuckDBCredentials
|
|
400
|
+
|
|
401
|
+
if isinstance(config.credentials, DuckDBCredentials):
|
|
402
|
+
if not config.credentials.config_options:
|
|
403
|
+
config.credentials.config_options = {}
|
|
404
|
+
config.credentials.config_options["access_mode"] = "READ_ONLY"
|
|
405
|
+
# convert adapter duckdb filepath to absolute path, since the Python
|
|
406
|
+
# working directory may not be the same as the dbt project directory
|
|
407
|
+
with pushd(self.project_dir):
|
|
408
|
+
config.credentials.path = os.fspath(Path(config.credentials.path).absolute())
|
|
409
|
+
|
|
410
|
+
if DBT_PYTHON_VERSION < version.parse("1.8.0"):
|
|
411
|
+
from dbt.events.functions import cleanup_event_logger # type: ignore
|
|
412
|
+
else:
|
|
413
|
+
from dbt_common.events.event_manager_client import cleanup_event_logger
|
|
414
|
+
|
|
415
|
+
cleanup_event_logger()
|
|
416
|
+
|
|
417
|
+
# reset adapters list in case we have instantiated an adapter before in this process
|
|
418
|
+
reset_adapters()
|
|
419
|
+
if DBT_PYTHON_VERSION < version.parse("1.8.0"):
|
|
420
|
+
register_adapter(config) # type: ignore
|
|
421
|
+
else:
|
|
422
|
+
from dbt.adapters.protocol import MacroContextGeneratorCallable # noqa: TC002
|
|
423
|
+
from dbt.context.providers import generate_runtime_macro_context
|
|
424
|
+
from dbt.mp_context import get_mp_context
|
|
425
|
+
from dbt.parser.manifest import ManifestLoader
|
|
426
|
+
|
|
427
|
+
register_adapter(config, get_mp_context())
|
|
428
|
+
adapter = get_adapter(config)
|
|
429
|
+
manifest = ManifestLoader.load_macros(
|
|
430
|
+
config,
|
|
431
|
+
adapter.connections.set_query_header,
|
|
432
|
+
base_macros_only=True,
|
|
433
|
+
)
|
|
434
|
+
adapter.set_macro_resolver(manifest)
|
|
435
|
+
adapter.set_macro_context_generator(
|
|
436
|
+
cast("MacroContextGeneratorCallable", generate_runtime_macro_context)
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
adapter = cast("BaseAdapter", get_adapter(config))
|
|
440
|
+
|
|
441
|
+
return adapter
|
|
442
|
+
|
|
443
|
+
@cached_property
|
|
444
|
+
def _cli_version(self) -> version.Version:
|
|
445
|
+
"""Gets the version of the currently-installed dbt executable.
|
|
446
|
+
|
|
447
|
+
This may differ from the version of the dbt-core package, most obviously if dbt-core is not
|
|
448
|
+
installed due to the fusion engine being used.
|
|
449
|
+
"""
|
|
450
|
+
raw_output = check_output([self.dbt_executable, "--version"]).decode("utf-8").strip()
|
|
451
|
+
match = re.search(r"(\d+\.\d+\.\d+)", raw_output)
|
|
452
|
+
if not match:
|
|
453
|
+
raise ValueError(f"Could not parse dbt version from output: {raw_output}")
|
|
454
|
+
return version.parse(match.group(1))
|
|
455
|
+
|
|
456
|
+
@public
|
|
457
|
+
def get_defer_args(self) -> Sequence[str]:
|
|
458
|
+
"""Build the defer arguments for the dbt CLI command, using the supplied state directory.
|
|
459
|
+
If no state directory is supplied, or the state directory does not have a manifest for.
|
|
460
|
+
comparison, an empty list of arguments is returned.
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
Sequence[str]: The defer arguments for the dbt CLI command.
|
|
464
|
+
"""
|
|
465
|
+
if not (self.state_path and Path(self.state_path).joinpath("manifest.json").exists()):
|
|
466
|
+
return []
|
|
467
|
+
|
|
468
|
+
return ["--defer", "--defer-state", self.state_path]
|
|
469
|
+
|
|
470
|
+
@public
|
|
471
|
+
def get_state_args(self) -> Sequence[str]:
|
|
472
|
+
"""Build the state arguments for the dbt CLI command, using the supplied state directory.
|
|
473
|
+
If no state directory is supplied, or the state directory does not have a manifest for.
|
|
474
|
+
comparison, an empty list of arguments is returned.
|
|
475
|
+
|
|
476
|
+
Returns:
|
|
477
|
+
Sequence[str]: The state arguments for the dbt CLI command.
|
|
478
|
+
"""
|
|
479
|
+
if not (self.state_path and Path(self.state_path).joinpath("manifest.json").exists()):
|
|
480
|
+
return []
|
|
481
|
+
|
|
482
|
+
return ["--state", self.state_path]
|
|
483
|
+
|
|
484
|
+
@public
|
|
485
|
+
def cli(
|
|
486
|
+
self,
|
|
487
|
+
args: Sequence[str],
|
|
488
|
+
*,
|
|
489
|
+
raise_on_error: bool = True,
|
|
490
|
+
manifest: Optional[DbtManifestParam] = None,
|
|
491
|
+
dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
|
|
492
|
+
context: Optional[Union[OpExecutionContext, AssetExecutionContext]] = None,
|
|
493
|
+
target_path: Optional[Path] = None,
|
|
494
|
+
) -> DbtCliInvocation:
|
|
495
|
+
"""Create a subprocess to execute a dbt CLI command.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
args (Sequence[str]): The dbt CLI command to execute.
|
|
499
|
+
raise_on_error (bool): Whether to raise an exception if the dbt CLI command fails.
|
|
500
|
+
manifest (Optional[Union[Mapping[str, Any], str, Path]]): The dbt manifest blob. If an
|
|
501
|
+
execution context from within `@dbt_assets` is provided to the context argument,
|
|
502
|
+
then the manifest provided to `@dbt_assets` will be used.
|
|
503
|
+
dagster_dbt_translator (Optional[DagsterDbtTranslator]): The translator to link dbt
|
|
504
|
+
nodes to Dagster assets. If an execution context from within `@dbt_assets` is
|
|
505
|
+
provided to the context argument, then the dagster_dbt_translator provided to
|
|
506
|
+
`@dbt_assets` will be used.
|
|
507
|
+
context (Optional[Union[OpExecutionContext, AssetExecutionContext]]): The execution context from within `@dbt_assets`.
|
|
508
|
+
If an AssetExecutionContext is passed, its underlying OpExecutionContext will be used.
|
|
509
|
+
target_path (Optional[Path]): An explicit path to a target folder to use to store and
|
|
510
|
+
retrieve dbt artifacts when running a dbt CLI command. If not provided, a unique
|
|
511
|
+
target path will be generated.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
DbtCliInvocation: A invocation instance that can be used to retrieve the output of the
|
|
515
|
+
dbt CLI command.
|
|
516
|
+
|
|
517
|
+
Examples:
|
|
518
|
+
Streaming Dagster events for dbt asset materializations and observations:
|
|
519
|
+
|
|
520
|
+
.. code-block:: python
|
|
521
|
+
|
|
522
|
+
from pathlib import Path
|
|
523
|
+
|
|
524
|
+
from dagster import AssetExecutionContext
|
|
525
|
+
from dagster_dbt import DbtCliResource, dbt_assets
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
@dbt_assets(manifest=Path("target", "manifest.json"))
|
|
529
|
+
def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
|
|
530
|
+
yield from dbt.cli(["run"], context=context).stream()
|
|
531
|
+
|
|
532
|
+
Retrieving a dbt artifact after streaming the Dagster events:
|
|
533
|
+
|
|
534
|
+
.. code-block:: python
|
|
535
|
+
|
|
536
|
+
from pathlib import Path
|
|
537
|
+
|
|
538
|
+
from dagster import AssetExecutionContext
|
|
539
|
+
from dagster_dbt import DbtCliResource, dbt_assets
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
@dbt_assets(manifest=Path("target", "manifest.json"))
|
|
543
|
+
def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
|
|
544
|
+
dbt_run_invocation = dbt.cli(["run"], context=context)
|
|
545
|
+
|
|
546
|
+
yield from dbt_run_invocation.stream()
|
|
547
|
+
|
|
548
|
+
# Retrieve the `run_results.json` dbt artifact as a dictionary:
|
|
549
|
+
run_results_json = dbt_run_invocation.get_artifact("run_results.json")
|
|
550
|
+
|
|
551
|
+
# Retrieve the `run_results.json` dbt artifact as a file path:
|
|
552
|
+
run_results_path = dbt_run_invocation.target_path.joinpath("run_results.json")
|
|
553
|
+
|
|
554
|
+
Customizing the asset materialization metadata when streaming the Dagster events:
|
|
555
|
+
|
|
556
|
+
.. code-block:: python
|
|
557
|
+
|
|
558
|
+
from pathlib import Path
|
|
559
|
+
|
|
560
|
+
from dagster import AssetExecutionContext
|
|
561
|
+
from dagster_dbt import DbtCliResource, dbt_assets
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
@dbt_assets(manifest=Path("target", "manifest.json"))
|
|
565
|
+
def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
|
|
566
|
+
dbt_cli_invocation = dbt.cli(["run"], context=context)
|
|
567
|
+
|
|
568
|
+
for dagster_event in dbt_cli_invocation.stream():
|
|
569
|
+
if isinstance(dagster_event, Output):
|
|
570
|
+
context.add_output_metadata(
|
|
571
|
+
metadata={
|
|
572
|
+
"my_custom_metadata": "my_custom_metadata_value",
|
|
573
|
+
},
|
|
574
|
+
output_name=dagster_event.output_name,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
yield dagster_event
|
|
578
|
+
|
|
579
|
+
Suppressing exceptions from a dbt CLI command when a non-zero exit code is returned:
|
|
580
|
+
|
|
581
|
+
.. code-block:: python
|
|
582
|
+
|
|
583
|
+
from pathlib import Path
|
|
584
|
+
|
|
585
|
+
from dagster import AssetExecutionContext
|
|
586
|
+
from dagster_dbt import DbtCliResource, dbt_assets
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
@dbt_assets(manifest=Path("target", "manifest.json"))
|
|
590
|
+
def my_dbt_assets(context: AssetExecutionContext, dbt: DbtCliResource):
|
|
591
|
+
dbt_run_invocation = dbt.cli(["run"], context=context, raise_on_error=False)
|
|
592
|
+
|
|
593
|
+
if dbt_run_invocation.is_successful():
|
|
594
|
+
yield from dbt_run_invocation.stream()
|
|
595
|
+
else:
|
|
596
|
+
...
|
|
597
|
+
|
|
598
|
+
Invoking a dbt CLI command in a custom asset or op:
|
|
599
|
+
|
|
600
|
+
.. code-block:: python
|
|
601
|
+
|
|
602
|
+
import json
|
|
603
|
+
|
|
604
|
+
from dagster import Nothing, Out, asset, op
|
|
605
|
+
from dagster_dbt import DbtCliResource
|
|
606
|
+
|
|
607
|
+
|
|
608
|
+
@asset
|
|
609
|
+
def my_dbt_asset(dbt: DbtCliResource):
|
|
610
|
+
dbt_macro_args = {"key": "value"}
|
|
611
|
+
dbt.cli(["run-operation", "my-macro", json.dumps(dbt_macro_args)]).wait()
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
@op(out=Out(Nothing))
|
|
615
|
+
def my_dbt_op(dbt: DbtCliResource):
|
|
616
|
+
dbt_macro_args = {"key": "value"}
|
|
617
|
+
yield from dbt.cli(["run-operation", "my-macro", json.dumps(dbt_macro_args)]).stream()
|
|
618
|
+
"""
|
|
619
|
+
dagster_dbt_translator = validate_opt_translator(dagster_dbt_translator)
|
|
620
|
+
dagster_dbt_translator = dagster_dbt_translator or DagsterDbtTranslator()
|
|
621
|
+
manifest = validate_manifest(manifest) if manifest else {}
|
|
622
|
+
|
|
623
|
+
updated_params = get_updated_cli_invocation_params_for_context(
|
|
624
|
+
context=context, manifest=manifest, dagster_dbt_translator=dagster_dbt_translator
|
|
625
|
+
)
|
|
626
|
+
manifest = updated_params.manifest
|
|
627
|
+
dagster_dbt_translator = updated_params.dagster_dbt_translator
|
|
628
|
+
selection_args = updated_params.selection_args
|
|
629
|
+
indirect_selection = updated_params.indirect_selection
|
|
630
|
+
target_path = target_path or self._get_unique_target_path(context=context)
|
|
631
|
+
project_dir = Path(
|
|
632
|
+
updated_params.dbt_project.project_dir
|
|
633
|
+
if updated_params.dbt_project
|
|
634
|
+
else self.project_dir
|
|
635
|
+
)
|
|
636
|
+
env = {
|
|
637
|
+
# Allow IO streaming when running in Windows.
|
|
638
|
+
# Also, allow it to be overriden by the current environment.
|
|
639
|
+
"PYTHONLEGACYWINDOWSSTDIO": "1",
|
|
640
|
+
# Pass the current environment variables to the dbt CLI invocation.
|
|
641
|
+
**os.environ.copy(),
|
|
642
|
+
# An environment variable to indicate that the dbt CLI is being invoked from Dagster.
|
|
643
|
+
"DAGSTER_DBT_CLI": "true",
|
|
644
|
+
# Run dbt with unbuffered output.
|
|
645
|
+
"PYTHONUNBUFFERED": "1",
|
|
646
|
+
# Disable anonymous usage statistics for performance.
|
|
647
|
+
"DBT_SEND_ANONYMOUS_USAGE_STATS": "false",
|
|
648
|
+
# The DBT_LOG_FORMAT environment variable must be set to `json`. We use this
|
|
649
|
+
# environment variable to ensure that the dbt CLI outputs structured logs.
|
|
650
|
+
"DBT_LOG_FORMAT": "json",
|
|
651
|
+
# The DBT_TARGET_PATH environment variable is set to a unique value for each dbt
|
|
652
|
+
# invocation so that artifact paths are separated.
|
|
653
|
+
# See https://discourse.getdbt.com/t/multiple-run-results-json-and-manifest-json-files/7555
|
|
654
|
+
# for more information.
|
|
655
|
+
"DBT_TARGET_PATH": os.fspath(target_path),
|
|
656
|
+
# The DBT_LOG_PATH environment variable is set to the same value as DBT_TARGET_PATH
|
|
657
|
+
# so that logs for each dbt invocation has separate log files.
|
|
658
|
+
"DBT_LOG_PATH": os.fspath(target_path),
|
|
659
|
+
# The DBT_PROFILES_DIR environment variable is set to the path containing the dbt
|
|
660
|
+
# profiles.yml file.
|
|
661
|
+
# See https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#advanced-customizing-a-profile-directory
|
|
662
|
+
# for more information.
|
|
663
|
+
**({"DBT_PROFILES_DIR": self.profiles_dir} if self.profiles_dir else {}),
|
|
664
|
+
# The DBT_PROJECT_DIR environment variable is set to the path containing the dbt project
|
|
665
|
+
# See https://docs.getdbt.com/reference/dbt_project.yml for more information.
|
|
666
|
+
"DBT_PROJECT_DIR": str(project_dir),
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
# set dbt indirect selection if needed to execute specific dbt tests due to asset check
|
|
670
|
+
# selection
|
|
671
|
+
if indirect_selection:
|
|
672
|
+
env[DBT_INDIRECT_SELECTION_ENV] = indirect_selection
|
|
673
|
+
|
|
674
|
+
# TODO: verify that args does not have any selection flags if the context and manifest
|
|
675
|
+
# are passed to this function.
|
|
676
|
+
profile_args: list[str] = []
|
|
677
|
+
if self.profile:
|
|
678
|
+
profile_args = ["--profile", self.profile]
|
|
679
|
+
|
|
680
|
+
if self.target:
|
|
681
|
+
profile_args += ["--target", self.target]
|
|
682
|
+
|
|
683
|
+
full_dbt_args = [
|
|
684
|
+
self.dbt_executable,
|
|
685
|
+
*self.global_config_flags,
|
|
686
|
+
*args,
|
|
687
|
+
*profile_args,
|
|
688
|
+
*selection_args,
|
|
689
|
+
]
|
|
690
|
+
|
|
691
|
+
if not target_path.is_absolute():
|
|
692
|
+
target_path = project_dir.joinpath(target_path)
|
|
693
|
+
|
|
694
|
+
# run dbt --version to get the dbt core version
|
|
695
|
+
adapter: Optional[BaseAdapter] = None
|
|
696
|
+
with pushd(str(project_dir)):
|
|
697
|
+
# we do not need to initialize the adapter if we are using the fusion engine
|
|
698
|
+
if self._cli_version.major < 2:
|
|
699
|
+
try:
|
|
700
|
+
adapter = self._initialize_dbt_core_adapter(args)
|
|
701
|
+
except:
|
|
702
|
+
logger.warning(
|
|
703
|
+
"An error was encountered when creating a handle to the dbt adapter in Dagster.",
|
|
704
|
+
exc_info=True,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
return DbtCliInvocation.run(
|
|
708
|
+
args=full_dbt_args,
|
|
709
|
+
env=env,
|
|
710
|
+
manifest=manifest,
|
|
711
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
712
|
+
project_dir=project_dir,
|
|
713
|
+
target_path=target_path,
|
|
714
|
+
raise_on_error=raise_on_error,
|
|
715
|
+
context=context,
|
|
716
|
+
adapter=adapter,
|
|
717
|
+
cli_version=self._cli_version,
|
|
718
|
+
dbt_project=updated_params.dbt_project,
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
def setup_for_execution(self, context: InitResourceContext) -> None:
|
|
722
|
+
packages_yaml = Path(self.project_dir).joinpath("packages.yml")
|
|
723
|
+
dependencies_yaml = Path(self.project_dir).joinpath("dependencies.yml")
|
|
724
|
+
|
|
725
|
+
if context.log and (
|
|
726
|
+
(packages_yaml.exists() and _dbt_packages_has_dagster_dbt(packages_yaml))
|
|
727
|
+
or (dependencies_yaml.exists() and _dbt_packages_has_dagster_dbt(dependencies_yaml))
|
|
728
|
+
):
|
|
729
|
+
context.log.warn(
|
|
730
|
+
"Fetching column metadata using `log_column_level_metadata` macro is deprecated and will be"
|
|
731
|
+
" removed in dagster-dbt 0.24.0. Use the `fetch_column_metadata` method in your asset definition"
|
|
732
|
+
" to fetch column metadata instead."
|
|
733
|
+
)
|