dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_dbt/__init__.py +41 -140
- dagster_dbt/asset_decorator.py +49 -230
- dagster_dbt/asset_specs.py +65 -0
- dagster_dbt/asset_utils.py +655 -338
- dagster_dbt/cli/app.py +44 -43
- dagster_dbt/cloud/__init__.py +6 -4
- dagster_dbt/cloud/asset_defs.py +119 -177
- dagster_dbt/cloud/cli.py +3 -4
- dagster_dbt/cloud/ops.py +9 -6
- dagster_dbt/cloud/resources.py +9 -4
- dagster_dbt/cloud/types.py +12 -7
- dagster_dbt/cloud/utils.py +186 -0
- dagster_dbt/cloud_v2/__init__.py +10 -0
- dagster_dbt/cloud_v2/asset_decorator.py +81 -0
- dagster_dbt/cloud_v2/cli_invocation.py +67 -0
- dagster_dbt/cloud_v2/client.py +438 -0
- dagster_dbt/cloud_v2/resources.py +462 -0
- dagster_dbt/cloud_v2/run_handler.py +229 -0
- dagster_dbt/cloud_v2/sensor_builder.py +254 -0
- dagster_dbt/cloud_v2/types.py +143 -0
- dagster_dbt/compat.py +107 -0
- dagster_dbt/components/__init__.py +0 -0
- dagster_dbt/components/dbt_project/__init__.py +0 -0
- dagster_dbt/components/dbt_project/component.py +545 -0
- dagster_dbt/components/dbt_project/scaffolder.py +65 -0
- dagster_dbt/core/__init__.py +0 -10
- dagster_dbt/core/dbt_cli_event.py +612 -0
- dagster_dbt/core/dbt_cli_invocation.py +474 -0
- dagster_dbt/core/dbt_event_iterator.py +399 -0
- dagster_dbt/core/resource.py +733 -0
- dagster_dbt/core/utils.py +14 -279
- dagster_dbt/dagster_dbt_translator.py +317 -74
- dagster_dbt/dbt_core_version.py +1 -0
- dagster_dbt/dbt_manifest.py +6 -5
- dagster_dbt/dbt_manifest_asset_selection.py +62 -22
- dagster_dbt/dbt_project.py +179 -40
- dagster_dbt/dbt_project_manager.py +173 -0
- dagster_dbt/dbt_version.py +0 -0
- dagster_dbt/errors.py +9 -84
- dagster_dbt/freshness_builder.py +147 -0
- dagster_dbt/include/pyproject.toml.jinja +21 -0
- dagster_dbt/include/scaffold/assets.py.jinja +1 -8
- dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
- dagster_dbt/include/scaffold/project.py.jinja +1 -0
- dagster_dbt/include/setup.py.jinja +2 -3
- dagster_dbt/metadata_set.py +18 -0
- dagster_dbt/utils.py +136 -234
- dagster_dbt/version.py +1 -1
- dagster_dbt-0.28.4.dist-info/METADATA +47 -0
- dagster_dbt-0.28.4.dist-info/RECORD +59 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
- dagster_dbt/asset_defs.py +0 -1049
- dagster_dbt/core/resources.py +0 -527
- dagster_dbt/core/resources_v2.py +0 -1542
- dagster_dbt/core/types.py +0 -63
- dagster_dbt/dbt_resource.py +0 -220
- dagster_dbt/include/scaffold/constants.py.jinja +0 -21
- dagster_dbt/ops.py +0 -134
- dagster_dbt/types.py +0 -22
- dagster_dbt-0.23.3.dist-info/METADATA +0 -31
- dagster_dbt-0.23.3.dist-info/RECORD +0 -43
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,474 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
import copy
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import signal
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from collections.abc import Iterator, Mapping, Sequence
|
|
9
|
+
from dataclasses import dataclass, field, replace
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Final, Literal, NamedTuple, Optional, Union, cast
|
|
12
|
+
|
|
13
|
+
import orjson
|
|
14
|
+
from dagster import (
|
|
15
|
+
AssetCheckEvaluation,
|
|
16
|
+
AssetCheckResult,
|
|
17
|
+
AssetExecutionContext,
|
|
18
|
+
AssetMaterialization,
|
|
19
|
+
AssetObservation,
|
|
20
|
+
OpExecutionContext,
|
|
21
|
+
Output,
|
|
22
|
+
get_dagster_logger,
|
|
23
|
+
)
|
|
24
|
+
from dagster._annotations import public
|
|
25
|
+
from dagster._core.errors import DagsterExecutionInterruptedError
|
|
26
|
+
from packaging import version
|
|
27
|
+
|
|
28
|
+
from dagster_dbt.compat import BaseAdapter, BaseColumn, BaseRelation
|
|
29
|
+
from dagster_dbt.core.dbt_cli_event import (
|
|
30
|
+
DbtCliEventMessage,
|
|
31
|
+
DbtCoreCliEventMessage,
|
|
32
|
+
DbtFusionCliEventMessage,
|
|
33
|
+
)
|
|
34
|
+
from dagster_dbt.core.dbt_event_iterator import DbtDagsterEventType, DbtEventIterator
|
|
35
|
+
from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator
|
|
36
|
+
from dagster_dbt.dbt_project import DbtProject
|
|
37
|
+
from dagster_dbt.errors import DagsterDbtCliRuntimeError
|
|
38
|
+
|
|
39
|
+
PARTIAL_PARSE_FILE_NAME = "partial_parse.msgpack"
|
|
40
|
+
DAGSTER_DBT_TERMINATION_TIMEOUT_SECONDS = int(
|
|
41
|
+
os.getenv("DAGSTER_DBT_TERMINATION_TIMEOUT_SECONDS", "25")
|
|
42
|
+
)
|
|
43
|
+
DEFAULT_EVENT_POSTPROCESSING_THREADPOOL_SIZE: Final[int] = 4
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
logger = get_dagster_logger()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_dbt_target_path() -> Path:
|
|
50
|
+
return Path(os.getenv("DBT_TARGET_PATH", "target"))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class RelationKey(NamedTuple):
|
|
54
|
+
"""Hashable representation of the information needed to identify a relation in a database."""
|
|
55
|
+
|
|
56
|
+
database: str
|
|
57
|
+
schema: str
|
|
58
|
+
identifier: str
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class RelationData(NamedTuple):
|
|
62
|
+
"""Relation metadata queried from a database."""
|
|
63
|
+
|
|
64
|
+
name: str
|
|
65
|
+
columns: list[BaseColumn]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _get_relation_from_adapter(adapter: BaseAdapter, relation_key: RelationKey) -> BaseRelation:
|
|
69
|
+
return adapter.Relation.create(
|
|
70
|
+
database=relation_key.database,
|
|
71
|
+
schema=relation_key.schema,
|
|
72
|
+
identifier=relation_key.identifier,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class DbtCliInvocation:
|
|
78
|
+
"""The representation of an invoked dbt command.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
process (subprocess.Popen): The process running the dbt command.
|
|
82
|
+
manifest (Mapping[str, Any]): The dbt manifest blob.
|
|
83
|
+
project (Optional[DbtProject]): The dbt project.
|
|
84
|
+
project_dir (Path): The path to the dbt project.
|
|
85
|
+
target_path (Path): The path to the dbt target folder.
|
|
86
|
+
raise_on_error (bool): Whether to raise an exception if the dbt command fails.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
process: subprocess.Popen
|
|
90
|
+
manifest: Mapping[str, Any]
|
|
91
|
+
dagster_dbt_translator: DagsterDbtTranslator
|
|
92
|
+
project_dir: Path
|
|
93
|
+
target_path: Path
|
|
94
|
+
raise_on_error: bool
|
|
95
|
+
cli_version: version.Version
|
|
96
|
+
project: Optional[DbtProject] = field(default=None)
|
|
97
|
+
context: Optional[Union[OpExecutionContext, AssetExecutionContext]] = field(
|
|
98
|
+
default=None, repr=False
|
|
99
|
+
)
|
|
100
|
+
termination_timeout_seconds: float = field(
|
|
101
|
+
init=False, default=DAGSTER_DBT_TERMINATION_TIMEOUT_SECONDS
|
|
102
|
+
)
|
|
103
|
+
adapter: Optional[BaseAdapter] = field(default=None)
|
|
104
|
+
postprocessing_threadpool_num_threads: int = field(
|
|
105
|
+
init=False, default=DEFAULT_EVENT_POSTPROCESSING_THREADPOOL_SIZE
|
|
106
|
+
)
|
|
107
|
+
_stdout: list[Union[str, dict[str, Any]]] = field(init=False, default_factory=list)
|
|
108
|
+
_error_messages: list[str] = field(init=False, default_factory=list)
|
|
109
|
+
|
|
110
|
+
# Caches fetching relation column metadata to avoid redundant queries to the database.
|
|
111
|
+
_relation_column_metadata_cache: dict[RelationKey, RelationData] = field(
|
|
112
|
+
init=False, default_factory=dict
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _get_columns_from_dbt_resource_props(
|
|
116
|
+
self, adapter: BaseAdapter, dbt_resource_props: dict[str, Any]
|
|
117
|
+
) -> RelationData:
|
|
118
|
+
"""Given a dbt resource properties dictionary, fetches the resource's column metadata from
|
|
119
|
+
the database, or returns the cached metadata if it has already been fetched.
|
|
120
|
+
"""
|
|
121
|
+
relation_key = RelationKey(
|
|
122
|
+
database=dbt_resource_props["database"],
|
|
123
|
+
schema=dbt_resource_props["schema"],
|
|
124
|
+
identifier=(
|
|
125
|
+
dbt_resource_props["identifier"]
|
|
126
|
+
if dbt_resource_props["unique_id"].startswith("source")
|
|
127
|
+
else dbt_resource_props["alias"]
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
if relation_key in self._relation_column_metadata_cache:
|
|
131
|
+
return self._relation_column_metadata_cache[relation_key]
|
|
132
|
+
|
|
133
|
+
relation = _get_relation_from_adapter(adapter=adapter, relation_key=relation_key)
|
|
134
|
+
cols: list = adapter.get_columns_in_relation(relation=relation)
|
|
135
|
+
return self._relation_column_metadata_cache.setdefault(
|
|
136
|
+
relation_key, RelationData(name=str(relation), columns=cols)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def run(
|
|
141
|
+
cls,
|
|
142
|
+
args: Sequence[str],
|
|
143
|
+
env: dict[str, str],
|
|
144
|
+
manifest: Mapping[str, Any],
|
|
145
|
+
dagster_dbt_translator: DagsterDbtTranslator,
|
|
146
|
+
project_dir: Path,
|
|
147
|
+
target_path: Path,
|
|
148
|
+
raise_on_error: bool,
|
|
149
|
+
context: Optional[Union[OpExecutionContext, AssetExecutionContext]],
|
|
150
|
+
adapter: Optional[BaseAdapter],
|
|
151
|
+
cli_version: version.Version,
|
|
152
|
+
dbt_project: Optional[DbtProject] = None,
|
|
153
|
+
) -> "DbtCliInvocation":
|
|
154
|
+
# Attempt to take advantage of partial parsing. If there is a `partial_parse.msgpack` in
|
|
155
|
+
# in the target folder, then copy it to the dynamic target path.
|
|
156
|
+
#
|
|
157
|
+
# This effectively allows us to skip the parsing of the manifest, which can be expensive.
|
|
158
|
+
# See https://docs.getdbt.com/reference/programmatic-invocations#reusing-objects for more
|
|
159
|
+
# details.
|
|
160
|
+
current_target_path = _get_dbt_target_path()
|
|
161
|
+
partial_parse_file_path = (
|
|
162
|
+
current_target_path.joinpath(PARTIAL_PARSE_FILE_NAME)
|
|
163
|
+
if current_target_path.is_absolute()
|
|
164
|
+
else project_dir.joinpath(current_target_path, PARTIAL_PARSE_FILE_NAME)
|
|
165
|
+
)
|
|
166
|
+
partial_parse_destination_target_path = target_path.joinpath(PARTIAL_PARSE_FILE_NAME)
|
|
167
|
+
|
|
168
|
+
if partial_parse_file_path.exists() and not partial_parse_destination_target_path.exists():
|
|
169
|
+
logger.info(
|
|
170
|
+
f"Copying `{partial_parse_file_path}` to `{partial_parse_destination_target_path}`"
|
|
171
|
+
" to take advantage of partial parsing."
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
partial_parse_destination_target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
175
|
+
shutil.copy(partial_parse_file_path, partial_parse_destination_target_path)
|
|
176
|
+
|
|
177
|
+
# Create a subprocess that runs the dbt CLI command.
|
|
178
|
+
process = subprocess.Popen(
|
|
179
|
+
args=args,
|
|
180
|
+
stdout=subprocess.PIPE,
|
|
181
|
+
stderr=subprocess.STDOUT,
|
|
182
|
+
env=env,
|
|
183
|
+
cwd=project_dir,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
dbt_cli_invocation = cls(
|
|
187
|
+
process=process,
|
|
188
|
+
manifest=manifest,
|
|
189
|
+
project=dbt_project,
|
|
190
|
+
dagster_dbt_translator=dagster_dbt_translator,
|
|
191
|
+
project_dir=project_dir,
|
|
192
|
+
target_path=target_path,
|
|
193
|
+
raise_on_error=raise_on_error,
|
|
194
|
+
context=context,
|
|
195
|
+
adapter=adapter,
|
|
196
|
+
cli_version=cli_version,
|
|
197
|
+
)
|
|
198
|
+
logger.info(f"Running dbt command: `{dbt_cli_invocation.dbt_command}`.")
|
|
199
|
+
|
|
200
|
+
return dbt_cli_invocation
|
|
201
|
+
|
|
202
|
+
@public
|
|
203
|
+
def wait(self) -> "DbtCliInvocation":
|
|
204
|
+
"""Wait for the dbt CLI process to complete.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
DbtCliInvocation: The current representation of the dbt CLI invocation.
|
|
208
|
+
|
|
209
|
+
Examples:
|
|
210
|
+
.. code-block:: python
|
|
211
|
+
|
|
212
|
+
from dagster_dbt import DbtCliResource
|
|
213
|
+
|
|
214
|
+
dbt = DbtCliResource(project_dir="/path/to/dbt/project")
|
|
215
|
+
|
|
216
|
+
dbt_cli_invocation = dbt.cli(["run"]).wait()
|
|
217
|
+
"""
|
|
218
|
+
list(self.stream_raw_events())
|
|
219
|
+
|
|
220
|
+
return self
|
|
221
|
+
|
|
222
|
+
@public
|
|
223
|
+
def is_successful(self) -> bool:
|
|
224
|
+
"""Return whether the dbt CLI process completed successfully.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
bool: True, if the dbt CLI process returns with a zero exit code, and False otherwise.
|
|
228
|
+
|
|
229
|
+
Examples:
|
|
230
|
+
.. code-block:: python
|
|
231
|
+
|
|
232
|
+
from dagster_dbt import DbtCliResource
|
|
233
|
+
|
|
234
|
+
dbt = DbtCliResource(project_dir="/path/to/dbt/project")
|
|
235
|
+
|
|
236
|
+
dbt_cli_invocation = dbt.cli(["run"], raise_on_error=False)
|
|
237
|
+
|
|
238
|
+
if dbt_cli_invocation.is_successful():
|
|
239
|
+
...
|
|
240
|
+
"""
|
|
241
|
+
self._stdout = list(self._stream_stdout())
|
|
242
|
+
|
|
243
|
+
return self.process.wait() == 0 and not self._error_messages
|
|
244
|
+
|
|
245
|
+
@public
|
|
246
|
+
def get_error(self) -> Optional[Exception]:
|
|
247
|
+
"""Return an exception if the dbt CLI process failed.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Optional[Exception]: An exception if the dbt CLI process failed, and None otherwise.
|
|
251
|
+
|
|
252
|
+
Examples:
|
|
253
|
+
.. code-block:: python
|
|
254
|
+
|
|
255
|
+
from dagster_dbt import DbtCliResource
|
|
256
|
+
|
|
257
|
+
dbt = DbtCliResource(project_dir="/path/to/dbt/project")
|
|
258
|
+
|
|
259
|
+
dbt_cli_invocation = dbt.cli(["run"], raise_on_error=False)
|
|
260
|
+
|
|
261
|
+
error = dbt_cli_invocation.get_error()
|
|
262
|
+
if error:
|
|
263
|
+
logger.error(error)
|
|
264
|
+
"""
|
|
265
|
+
if self.is_successful():
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
log_path = self.target_path.joinpath("dbt.log")
|
|
269
|
+
extra_description = ""
|
|
270
|
+
|
|
271
|
+
if log_path.exists():
|
|
272
|
+
extra_description = f", or view the dbt debug log: {log_path}"
|
|
273
|
+
|
|
274
|
+
return DagsterDbtCliRuntimeError(
|
|
275
|
+
description=(
|
|
276
|
+
f"The dbt CLI process with command\n\n"
|
|
277
|
+
f"`{self.dbt_command}`\n\n"
|
|
278
|
+
f"failed with exit code `{self.process.returncode}`."
|
|
279
|
+
" Check the stdout in the Dagster compute logs for the full information about"
|
|
280
|
+
f" the error{extra_description}.{self._format_error_messages()}"
|
|
281
|
+
),
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
def _stream_asset_events(
|
|
285
|
+
self,
|
|
286
|
+
) -> Iterator[DbtDagsterEventType]:
|
|
287
|
+
"""Stream the dbt CLI events and convert them to Dagster events."""
|
|
288
|
+
for event in self.stream_raw_events():
|
|
289
|
+
yield from event.to_default_asset_events(
|
|
290
|
+
manifest=self.manifest,
|
|
291
|
+
dagster_dbt_translator=self.dagster_dbt_translator,
|
|
292
|
+
context=self.context,
|
|
293
|
+
target_path=self.target_path,
|
|
294
|
+
project=self.project,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
@public
|
|
298
|
+
def stream(
|
|
299
|
+
self,
|
|
300
|
+
) -> "DbtEventIterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]":
|
|
301
|
+
"""Stream the events from the dbt CLI process and convert them to Dagster events.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]:
|
|
305
|
+
A set of corresponding Dagster events.
|
|
306
|
+
|
|
307
|
+
In a Dagster asset definition, the following are yielded:
|
|
308
|
+
- Output for refables (e.g. models, seeds, snapshots.)
|
|
309
|
+
- AssetCheckResult for dbt test results that are enabled as asset checks.
|
|
310
|
+
- AssetObservation for dbt test results that are not enabled as asset checks.
|
|
311
|
+
|
|
312
|
+
In a Dagster op definition, the following are yielded:
|
|
313
|
+
- AssetMaterialization refables (e.g. models, seeds, snapshots.)
|
|
314
|
+
- AssetCheckEvaluation for dbt test results that are enabled as asset checks.
|
|
315
|
+
- AssetObservation for dbt test results that are not enabled as asset checks.
|
|
316
|
+
|
|
317
|
+
Examples:
|
|
318
|
+
.. code-block:: python
|
|
319
|
+
|
|
320
|
+
from pathlib import Path
|
|
321
|
+
from dagster_dbt import DbtCliResource, dbt_assets
|
|
322
|
+
|
|
323
|
+
@dbt_assets(manifest=Path("target", "manifest.json"))
|
|
324
|
+
def my_dbt_assets(context, dbt: DbtCliResource):
|
|
325
|
+
yield from dbt.cli(["run"], context=context).stream()
|
|
326
|
+
"""
|
|
327
|
+
return DbtEventIterator(
|
|
328
|
+
self._stream_asset_events(),
|
|
329
|
+
self,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
@public
|
|
333
|
+
def stream_raw_events(self) -> Iterator[DbtCliEventMessage]:
|
|
334
|
+
"""Stream the events from the dbt CLI process.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Iterator[DbtCliEventMessage]: An iterator of events from the dbt CLI process.
|
|
338
|
+
"""
|
|
339
|
+
event_history_metadata_by_unique_id: dict[str, dict[str, Any]] = {}
|
|
340
|
+
|
|
341
|
+
for raw_event in self._stdout or self._stream_stdout():
|
|
342
|
+
if isinstance(raw_event, str):
|
|
343
|
+
# If we can't parse the event, then just emit it as a raw log.
|
|
344
|
+
sys.stdout.write(raw_event + "\n")
|
|
345
|
+
sys.stdout.flush()
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
unique_id: Optional[str] = raw_event["data"].get("node_info", {}).get("unique_id")
|
|
349
|
+
|
|
350
|
+
if self.cli_version.major < 2:
|
|
351
|
+
event = DbtCoreCliEventMessage(raw_event=raw_event, event_history_metadata={})
|
|
352
|
+
else:
|
|
353
|
+
event = DbtFusionCliEventMessage(raw_event=raw_event, event_history_metadata={})
|
|
354
|
+
|
|
355
|
+
if unique_id and event.is_result_event:
|
|
356
|
+
event_history_metadata = copy.deepcopy(
|
|
357
|
+
event_history_metadata_by_unique_id.get(unique_id, {})
|
|
358
|
+
)
|
|
359
|
+
event = replace(event, event_history_metadata=event_history_metadata)
|
|
360
|
+
|
|
361
|
+
# Attempt to parse the column level metadata from the event message.
|
|
362
|
+
# If it exists, save it as historical metadata to attach to the NodeFinished event.
|
|
363
|
+
if event.raw_event["info"]["name"] == "JinjaLogInfo":
|
|
364
|
+
with contextlib.suppress(orjson.JSONDecodeError):
|
|
365
|
+
column_level_metadata = orjson.loads(event.raw_event["info"]["msg"])
|
|
366
|
+
|
|
367
|
+
event_history_metadata_by_unique_id[cast("str", unique_id)] = (
|
|
368
|
+
column_level_metadata
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Don't show this message in stdout
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
# Re-emit the logs from dbt CLI process into stdout.
|
|
375
|
+
sys.stdout.write(str(event) + "\n")
|
|
376
|
+
sys.stdout.flush()
|
|
377
|
+
|
|
378
|
+
yield event
|
|
379
|
+
|
|
380
|
+
# Ensure that the dbt CLI process has completed.
|
|
381
|
+
self._raise_on_error()
|
|
382
|
+
|
|
383
|
+
@public
|
|
384
|
+
def get_artifact(
|
|
385
|
+
self,
|
|
386
|
+
artifact: Union[
|
|
387
|
+
Literal["manifest.json"],
|
|
388
|
+
Literal["catalog.json"],
|
|
389
|
+
Literal["run_results.json"],
|
|
390
|
+
Literal["sources.json"],
|
|
391
|
+
],
|
|
392
|
+
) -> dict[str, Any]:
|
|
393
|
+
"""Retrieve a dbt artifact from the target path.
|
|
394
|
+
|
|
395
|
+
See https://docs.getdbt.com/reference/artifacts/dbt-artifacts for more information.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
artifact (Union[Literal["manifest.json"], Literal["catalog.json"], Literal["run_results.json"], Literal["sources.json"]]): The name of the artifact to retrieve.
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
Dict[str, Any]: The artifact as a dictionary.
|
|
402
|
+
|
|
403
|
+
Examples:
|
|
404
|
+
.. code-block:: python
|
|
405
|
+
|
|
406
|
+
from dagster_dbt import DbtCliResource
|
|
407
|
+
|
|
408
|
+
dbt = DbtCliResource(project_dir="/path/to/dbt/project")
|
|
409
|
+
|
|
410
|
+
dbt_cli_invocation = dbt.cli(["run"]).wait()
|
|
411
|
+
|
|
412
|
+
# Retrieve the run_results.json artifact.
|
|
413
|
+
run_results = dbt_cli_invocation.get_artifact("run_results.json")
|
|
414
|
+
"""
|
|
415
|
+
artifact_path = self.target_path.joinpath(artifact)
|
|
416
|
+
|
|
417
|
+
return orjson.loads(artifact_path.read_bytes())
|
|
418
|
+
|
|
419
|
+
@property
|
|
420
|
+
def dbt_command(self) -> str:
|
|
421
|
+
"""The dbt CLI command that was invoked."""
|
|
422
|
+
return " ".join(cast("Sequence[str]", self.process.args))
|
|
423
|
+
|
|
424
|
+
def _stream_stdout(self) -> Iterator[Union[str, dict[str, Any]]]:
|
|
425
|
+
"""Stream the stdout from the dbt CLI process."""
|
|
426
|
+
try:
|
|
427
|
+
if not self.process.stdout or self.process.stdout.closed:
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
with self.process.stdout:
|
|
431
|
+
for raw_line in self.process.stdout or []:
|
|
432
|
+
raw_event_str = raw_line.decode().strip()
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
raw_event = orjson.loads(raw_event_str)
|
|
436
|
+
|
|
437
|
+
# Parse the error message from the event, if it exists.
|
|
438
|
+
is_error_message = raw_event["info"]["level"] == "error"
|
|
439
|
+
if is_error_message:
|
|
440
|
+
self._error_messages.append(raw_event["info"]["msg"])
|
|
441
|
+
|
|
442
|
+
yield raw_event
|
|
443
|
+
except:
|
|
444
|
+
yield raw_event_str
|
|
445
|
+
|
|
446
|
+
except DagsterExecutionInterruptedError:
|
|
447
|
+
logger.info(f"Forwarding interrupt signal to dbt command: `{self.dbt_command}`.")
|
|
448
|
+
self.process.send_signal(signal.SIGINT)
|
|
449
|
+
self.process.wait(timeout=self.termination_timeout_seconds)
|
|
450
|
+
logger.info(f"dbt process terminated with exit code `{self.process.returncode}`.")
|
|
451
|
+
|
|
452
|
+
raise
|
|
453
|
+
|
|
454
|
+
def _format_error_messages(self) -> str:
|
|
455
|
+
"""Format the error messages from the dbt CLI process."""
|
|
456
|
+
if not self._error_messages:
|
|
457
|
+
return ""
|
|
458
|
+
|
|
459
|
+
return "\n\n".join(
|
|
460
|
+
[
|
|
461
|
+
"",
|
|
462
|
+
"Errors parsed from dbt logs:",
|
|
463
|
+
*self._error_messages,
|
|
464
|
+
]
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
def _raise_on_error(self) -> None:
|
|
468
|
+
"""Ensure that the dbt CLI process has completed. If the process has not successfully
|
|
469
|
+
completed, then optionally raise an error.
|
|
470
|
+
"""
|
|
471
|
+
logger.info(f"Finished dbt command: `{self.dbt_command}`.")
|
|
472
|
+
error = self.get_error()
|
|
473
|
+
if error and self.raise_on_error:
|
|
474
|
+
raise error
|