dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_dbt/__init__.py +41 -140
- dagster_dbt/asset_decorator.py +49 -230
- dagster_dbt/asset_specs.py +65 -0
- dagster_dbt/asset_utils.py +655 -338
- dagster_dbt/cli/app.py +44 -43
- dagster_dbt/cloud/__init__.py +6 -4
- dagster_dbt/cloud/asset_defs.py +119 -177
- dagster_dbt/cloud/cli.py +3 -4
- dagster_dbt/cloud/ops.py +9 -6
- dagster_dbt/cloud/resources.py +9 -4
- dagster_dbt/cloud/types.py +12 -7
- dagster_dbt/cloud/utils.py +186 -0
- dagster_dbt/cloud_v2/__init__.py +10 -0
- dagster_dbt/cloud_v2/asset_decorator.py +81 -0
- dagster_dbt/cloud_v2/cli_invocation.py +67 -0
- dagster_dbt/cloud_v2/client.py +438 -0
- dagster_dbt/cloud_v2/resources.py +462 -0
- dagster_dbt/cloud_v2/run_handler.py +229 -0
- dagster_dbt/cloud_v2/sensor_builder.py +254 -0
- dagster_dbt/cloud_v2/types.py +143 -0
- dagster_dbt/compat.py +107 -0
- dagster_dbt/components/__init__.py +0 -0
- dagster_dbt/components/dbt_project/__init__.py +0 -0
- dagster_dbt/components/dbt_project/component.py +545 -0
- dagster_dbt/components/dbt_project/scaffolder.py +65 -0
- dagster_dbt/core/__init__.py +0 -10
- dagster_dbt/core/dbt_cli_event.py +612 -0
- dagster_dbt/core/dbt_cli_invocation.py +474 -0
- dagster_dbt/core/dbt_event_iterator.py +399 -0
- dagster_dbt/core/resource.py +733 -0
- dagster_dbt/core/utils.py +14 -279
- dagster_dbt/dagster_dbt_translator.py +317 -74
- dagster_dbt/dbt_core_version.py +1 -0
- dagster_dbt/dbt_manifest.py +6 -5
- dagster_dbt/dbt_manifest_asset_selection.py +62 -22
- dagster_dbt/dbt_project.py +179 -40
- dagster_dbt/dbt_project_manager.py +173 -0
- dagster_dbt/dbt_version.py +0 -0
- dagster_dbt/errors.py +9 -84
- dagster_dbt/freshness_builder.py +147 -0
- dagster_dbt/include/pyproject.toml.jinja +21 -0
- dagster_dbt/include/scaffold/assets.py.jinja +1 -8
- dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
- dagster_dbt/include/scaffold/project.py.jinja +1 -0
- dagster_dbt/include/setup.py.jinja +2 -3
- dagster_dbt/metadata_set.py +18 -0
- dagster_dbt/utils.py +136 -234
- dagster_dbt/version.py +1 -1
- dagster_dbt-0.28.4.dist-info/METADATA +47 -0
- dagster_dbt-0.28.4.dist-info/RECORD +59 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
- dagster_dbt/asset_defs.py +0 -1049
- dagster_dbt/core/resources.py +0 -527
- dagster_dbt/core/resources_v2.py +0 -1542
- dagster_dbt/core/types.py +0 -63
- dagster_dbt/dbt_resource.py +0 -220
- dagster_dbt/include/scaffold/constants.py.jinja +0 -21
- dagster_dbt/ops.py +0 -134
- dagster_dbt/types.py +0 -22
- dagster_dbt-0.23.3.dist-info/METADATA +0 -31
- dagster_dbt-0.23.3.dist-info/RECORD +0 -43
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from collections.abc import Iterator, Mapping, Sequence
|
|
3
|
+
from dataclasses import InitVar, dataclass
|
|
4
|
+
from functools import cached_property
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import AbstractSet, Any, NamedTuple, Optional, TypedDict, Union, cast # noqa: UP035
|
|
7
|
+
|
|
8
|
+
import dateutil.parser
|
|
9
|
+
from dagster import (
|
|
10
|
+
AssetCheckResult,
|
|
11
|
+
AssetCheckSeverity,
|
|
12
|
+
AssetExecutionContext,
|
|
13
|
+
AssetMaterialization,
|
|
14
|
+
AssetObservation,
|
|
15
|
+
OpExecutionContext,
|
|
16
|
+
Output,
|
|
17
|
+
TableColumnDep,
|
|
18
|
+
TableColumnLineage,
|
|
19
|
+
get_dagster_logger,
|
|
20
|
+
)
|
|
21
|
+
from dagster._annotations import public
|
|
22
|
+
from dagster._core.definitions.asset_checks.asset_check_evaluation import AssetCheckEvaluation
|
|
23
|
+
from dagster._core.definitions.asset_key import AssetCheckKey, AssetKey
|
|
24
|
+
from dagster._core.definitions.metadata import TableMetadataSet
|
|
25
|
+
from dagster._utils.warnings import disable_dagster_warnings
|
|
26
|
+
from sqlglot import MappingSchema, exp, parse_one, to_table
|
|
27
|
+
from sqlglot.expressions import normalize_table_name
|
|
28
|
+
from sqlglot.lineage import lineage
|
|
29
|
+
from sqlglot.optimizer import optimize
|
|
30
|
+
|
|
31
|
+
from dagster_dbt.asset_utils import (
|
|
32
|
+
default_metadata_from_dbt_resource_props,
|
|
33
|
+
get_asset_check_key_for_test,
|
|
34
|
+
get_checks_on_sources_upstream_of_selected_assets,
|
|
35
|
+
)
|
|
36
|
+
from dagster_dbt.compat import REFABLE_NODE_TYPES, NodeStatus, NodeType, TestStatus
|
|
37
|
+
from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator, validate_translator
|
|
38
|
+
from dagster_dbt.dbt_manifest import DbtManifestParam, validate_manifest
|
|
39
|
+
from dagster_dbt.dbt_project import DbtProject
|
|
40
|
+
|
|
41
|
+
logger = get_dagster_logger()
|
|
42
|
+
|
|
43
|
+
# depending on the specific dbt version, any of these values
|
|
44
|
+
# may be used to indicate an empty value in a log message
|
|
45
|
+
_EMPTY_VALUES = {"", "None", None}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class EventHistoryMetadata(NamedTuple):
|
|
49
|
+
columns: dict[str, dict[str, Any]]
|
|
50
|
+
parents: dict[str, dict[str, Any]]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class CheckProperties(TypedDict):
|
|
54
|
+
# Convenenience to abstract over AssetCheckResult and AssetCheckEvaluation
|
|
55
|
+
passed: bool
|
|
56
|
+
asset_key: AssetKey
|
|
57
|
+
check_name: str
|
|
58
|
+
severity: AssetCheckSeverity
|
|
59
|
+
metadata: Mapping[str, Any]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _build_column_lineage_metadata(
|
|
63
|
+
event_history_metadata: EventHistoryMetadata,
|
|
64
|
+
dbt_resource_props: dict[str, Any],
|
|
65
|
+
manifest: Mapping[str, Any],
|
|
66
|
+
dagster_dbt_translator: DagsterDbtTranslator,
|
|
67
|
+
target_path: Optional[Path],
|
|
68
|
+
) -> dict[str, Any]:
|
|
69
|
+
"""Process the lineage metadata for a dbt CLI event.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
event_history_metadata (EventHistoryMetadata): Unprocessed column type data and map of
|
|
73
|
+
parent relation names to their column type data.
|
|
74
|
+
dbt_resource_props (Dict[str, Any]): The dbt resource properties for the given event.
|
|
75
|
+
manifest (Mapping[str, Any]): The dbt manifest blob.
|
|
76
|
+
dagster_dbt_translator (DagsterDbtTranslator): The translator for dbt nodes to Dagster assets.
|
|
77
|
+
target_path (Path): The path to the dbt target folder.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Dict[str, Any]: The lineage metadata.
|
|
81
|
+
"""
|
|
82
|
+
if (
|
|
83
|
+
# Column lineage can only be built if initial metadata is provided.
|
|
84
|
+
not target_path
|
|
85
|
+
):
|
|
86
|
+
return {}
|
|
87
|
+
|
|
88
|
+
event_node_info: dict[str, Any] = dbt_resource_props
|
|
89
|
+
unique_id: str = event_node_info["unique_id"]
|
|
90
|
+
|
|
91
|
+
node_resource_type: str = event_node_info["resource_type"]
|
|
92
|
+
|
|
93
|
+
if node_resource_type not in REFABLE_NODE_TYPES:
|
|
94
|
+
return {}
|
|
95
|
+
|
|
96
|
+
# If the unique_id is a seed, then we don't need to process lineage.
|
|
97
|
+
if unique_id.startswith("seed"):
|
|
98
|
+
return {}
|
|
99
|
+
|
|
100
|
+
# 1. Retrieve the current node's SQL file and its parents' column schemas.
|
|
101
|
+
sql_dialect = manifest["metadata"]["adapter_type"]
|
|
102
|
+
sqlglot_mapping_schema = MappingSchema(dialect=sql_dialect)
|
|
103
|
+
|
|
104
|
+
parent_relation_metadata_by_relation_name = {
|
|
105
|
+
**event_history_metadata.parents,
|
|
106
|
+
# Include the current node's column schema to optimize self-referential models.
|
|
107
|
+
dbt_resource_props["relation_name"]: event_history_metadata.columns,
|
|
108
|
+
}
|
|
109
|
+
for (
|
|
110
|
+
parent_relation_name,
|
|
111
|
+
parent_relation_metadata,
|
|
112
|
+
) in parent_relation_metadata_by_relation_name.items():
|
|
113
|
+
sqlglot_mapping_schema.add_table(
|
|
114
|
+
table=to_table(parent_relation_name, dialect=sql_dialect),
|
|
115
|
+
column_mapping={
|
|
116
|
+
column_name: column_meta["data_type"]
|
|
117
|
+
for column_name, column_meta in parent_relation_metadata.items()
|
|
118
|
+
},
|
|
119
|
+
dialect=sql_dialect,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
package_name = dbt_resource_props["package_name"]
|
|
123
|
+
node_sql_path = target_path.joinpath(
|
|
124
|
+
"compiled",
|
|
125
|
+
package_name,
|
|
126
|
+
dbt_resource_props["original_file_path"].replace("\\", "/"),
|
|
127
|
+
)
|
|
128
|
+
optimized_node_ast = cast(
|
|
129
|
+
"exp.Query",
|
|
130
|
+
optimize(
|
|
131
|
+
parse_one(sql=node_sql_path.read_text(), dialect=sql_dialect),
|
|
132
|
+
schema=sqlglot_mapping_schema,
|
|
133
|
+
dialect=sql_dialect,
|
|
134
|
+
),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# 2. Retrieve the column names from the current node.
|
|
138
|
+
schema_column_names = {column.lower() for column in event_history_metadata.columns.keys()}
|
|
139
|
+
sqlglot_column_names = set(optimized_node_ast.named_selects)
|
|
140
|
+
|
|
141
|
+
# 3. For each column, retrieve its dependencies on upstream columns from direct parents.
|
|
142
|
+
dbt_parent_resource_props_by_relation_name: dict[str, dict[str, Any]] = {}
|
|
143
|
+
for parent_unique_id in dbt_resource_props["depends_on"]["nodes"]:
|
|
144
|
+
is_resource_type_source = parent_unique_id.startswith("source")
|
|
145
|
+
parent_dbt_resource_props = (
|
|
146
|
+
manifest["sources"] if is_resource_type_source else manifest["nodes"]
|
|
147
|
+
)[parent_unique_id]
|
|
148
|
+
parent_relation_name = normalize_table_name(
|
|
149
|
+
to_table(parent_dbt_resource_props["relation_name"], dialect=sql_dialect),
|
|
150
|
+
dialect=sql_dialect,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
dbt_parent_resource_props_by_relation_name[parent_relation_name] = parent_dbt_resource_props
|
|
154
|
+
|
|
155
|
+
normalized_sqlglot_column_names = {
|
|
156
|
+
sqlglot_column.lower() for sqlglot_column in sqlglot_column_names
|
|
157
|
+
}
|
|
158
|
+
implicit_alias_column_names = {
|
|
159
|
+
column for column in schema_column_names if column not in normalized_sqlglot_column_names
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
deps_by_column: dict[str, Sequence[TableColumnDep]] = {}
|
|
163
|
+
if implicit_alias_column_names:
|
|
164
|
+
logger.warning(
|
|
165
|
+
"The following columns are implicitly aliased and will be marked with an "
|
|
166
|
+
f" empty list column dependencies: `{implicit_alias_column_names}`."
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
deps_by_column = {column: [] for column in implicit_alias_column_names}
|
|
170
|
+
|
|
171
|
+
for column_name in sqlglot_column_names:
|
|
172
|
+
if column_name.lower() not in schema_column_names:
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
column_deps: set[TableColumnDep] = set()
|
|
176
|
+
for sqlglot_lineage_node in lineage(
|
|
177
|
+
column=column_name,
|
|
178
|
+
sql=optimized_node_ast,
|
|
179
|
+
schema=sqlglot_mapping_schema,
|
|
180
|
+
dialect=sql_dialect,
|
|
181
|
+
).walk():
|
|
182
|
+
# Only the leaves of the lineage graph contain relevant information.
|
|
183
|
+
if sqlglot_lineage_node.downstream:
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
# Attempt to find a table in the lineage node.
|
|
187
|
+
table = sqlglot_lineage_node.expression.find(exp.Table)
|
|
188
|
+
if not table:
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
# Attempt to retrieve the table's associated asset key and column.
|
|
192
|
+
parent_column_name = exp.to_column(sqlglot_lineage_node.name).name.lower()
|
|
193
|
+
parent_relation_name = normalize_table_name(table, dialect=sql_dialect)
|
|
194
|
+
parent_resource_props = dbt_parent_resource_props_by_relation_name.get(
|
|
195
|
+
parent_relation_name
|
|
196
|
+
)
|
|
197
|
+
if not parent_resource_props:
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
# Add the column dependency.
|
|
201
|
+
column_deps.add(
|
|
202
|
+
TableColumnDep(
|
|
203
|
+
asset_key=dagster_dbt_translator.get_asset_key(parent_resource_props),
|
|
204
|
+
column_name=parent_column_name,
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
deps_by_column[column_name.lower()] = list(column_deps)
|
|
209
|
+
|
|
210
|
+
# 4. Render the lineage as metadata.
|
|
211
|
+
with disable_dagster_warnings():
|
|
212
|
+
return dict(
|
|
213
|
+
TableMetadataSet(column_lineage=TableColumnLineage(deps_by_column=deps_by_column))
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
@dataclass
|
|
218
|
+
class DbtCliEventMessage(ABC):
|
|
219
|
+
"""The representation of a dbt CLI event.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
raw_event (Dict[str, Any]): The raw event dictionary.
|
|
223
|
+
See https://docs.getdbt.com/reference/events-logging#structured-logging for more
|
|
224
|
+
information.
|
|
225
|
+
event_history_metadata (Dict[str, Any]): A dictionary of metadata about the
|
|
226
|
+
current event, gathered from previous historical events.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
raw_event: dict[str, Any]
|
|
230
|
+
event_history_metadata: InitVar[dict[str, Any]]
|
|
231
|
+
|
|
232
|
+
def __post_init__(self, event_history_metadata: dict[str, Any]):
|
|
233
|
+
self._event_history_metadata = event_history_metadata
|
|
234
|
+
|
|
235
|
+
def __str__(self) -> str:
|
|
236
|
+
return self.raw_event["info"]["msg"]
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def has_column_lineage_metadata(self) -> bool:
|
|
240
|
+
"""Whether the event has column level lineage metadata."""
|
|
241
|
+
return bool(self._event_history_metadata) and "parents" in self._event_history_metadata
|
|
242
|
+
|
|
243
|
+
@cached_property
|
|
244
|
+
def _unique_id(self) -> str:
|
|
245
|
+
return self.raw_event["data"]["node_info"]["unique_id"]
|
|
246
|
+
|
|
247
|
+
@cached_property
|
|
248
|
+
def _raw_data(self) -> Mapping[str, Any]:
|
|
249
|
+
return self.raw_event["data"]
|
|
250
|
+
|
|
251
|
+
@cached_property
|
|
252
|
+
def _raw_node_info(self) -> Mapping[str, Any]:
|
|
253
|
+
return self.raw_event["data"]["node_info"]
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
@abstractmethod
|
|
257
|
+
def is_result_event(self) -> bool: ...
|
|
258
|
+
|
|
259
|
+
def _is_model_execution_event(self, manifest: Mapping[str, Any]) -> bool:
|
|
260
|
+
resource_props = self._get_resource_props(self._unique_id, manifest)
|
|
261
|
+
materialized_type = (
|
|
262
|
+
# check event info
|
|
263
|
+
self._raw_node_info.get("materialized")
|
|
264
|
+
# then top-level props
|
|
265
|
+
or resource_props.get("materialized")
|
|
266
|
+
# then config
|
|
267
|
+
or resource_props.get("config", {}).get("materialized")
|
|
268
|
+
)
|
|
269
|
+
return (
|
|
270
|
+
resource_props["resource_type"] in REFABLE_NODE_TYPES
|
|
271
|
+
and materialized_type != "ephemeral"
|
|
272
|
+
and self._get_node_status() == NodeStatus.Success
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
def _is_test_execution_event(self, manifest: Mapping[str, Any]) -> bool:
|
|
276
|
+
resource_props = self._get_resource_props(self._unique_id, manifest)
|
|
277
|
+
return resource_props["resource_type"] == NodeType.Test
|
|
278
|
+
|
|
279
|
+
def _get_resource_props(self, unique_id: str, manifest: Mapping[str, Any]) -> dict[str, Any]:
|
|
280
|
+
return manifest["nodes"][unique_id]
|
|
281
|
+
|
|
282
|
+
def _get_execution_duration_metadata(self) -> Mapping[str, float]:
|
|
283
|
+
raw_started_at = self._raw_node_info.get("node_started_at")
|
|
284
|
+
raw_finished_at = self._raw_node_info.get("node_finished_at")
|
|
285
|
+
|
|
286
|
+
has_started_at = raw_started_at not in _EMPTY_VALUES
|
|
287
|
+
has_finished_at = raw_finished_at not in _EMPTY_VALUES
|
|
288
|
+
|
|
289
|
+
if has_started_at and has_finished_at:
|
|
290
|
+
started_at = dateutil.parser.isoparse(cast("str", raw_started_at))
|
|
291
|
+
finished_at = dateutil.parser.isoparse(cast("str", raw_finished_at))
|
|
292
|
+
duration = (finished_at - started_at).total_seconds()
|
|
293
|
+
return {"Execution Duration": (finished_at - started_at).total_seconds()}
|
|
294
|
+
else:
|
|
295
|
+
# if model materialization is incremental microbatch, node_started_at and
|
|
296
|
+
# node_finished_at are empty strings and require fallback to data.execution_time
|
|
297
|
+
duration = self._raw_data.get("execution_time")
|
|
298
|
+
|
|
299
|
+
return {"Execution Duration": duration} if duration else {}
|
|
300
|
+
|
|
301
|
+
###############
|
|
302
|
+
# MODEL PARSING
|
|
303
|
+
###############
|
|
304
|
+
|
|
305
|
+
def _get_column_schema_metadata(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
306
|
+
try:
|
|
307
|
+
return default_metadata_from_dbt_resource_props(self._event_history_metadata)
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.warning(
|
|
310
|
+
"An error occurred while building column schema metadata from event history"
|
|
311
|
+
f" `{self._event_history_metadata}` for the dbt resource"
|
|
312
|
+
f" `{self._get_resource_props(self._unique_id, manifest)['original_file_path']}`."
|
|
313
|
+
" Column schema metadata will not be included in the event.\n\n"
|
|
314
|
+
f"Exception: {e}",
|
|
315
|
+
exc_info=True,
|
|
316
|
+
)
|
|
317
|
+
return {}
|
|
318
|
+
|
|
319
|
+
def _get_default_metadata(self, manifest: Mapping[str, Any]) -> dict[str, Any]:
|
|
320
|
+
return {
|
|
321
|
+
**self._get_column_schema_metadata(manifest),
|
|
322
|
+
**self._get_execution_duration_metadata(),
|
|
323
|
+
"unique_id": self._unique_id,
|
|
324
|
+
"invocation_id": self.raw_event["info"]["invocation_id"],
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
def _get_node_status(self) -> str:
|
|
328
|
+
# if model materialization is incremental microbatch, node_status
|
|
329
|
+
# property is "None", hence fall back to status
|
|
330
|
+
raw_node_status = self._raw_node_info.get("node_status")
|
|
331
|
+
return (
|
|
332
|
+
raw_node_status
|
|
333
|
+
if raw_node_status and raw_node_status not in _EMPTY_VALUES
|
|
334
|
+
else self._raw_data["status"].lower()
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def _get_lineage_metadata(
|
|
338
|
+
self,
|
|
339
|
+
translator: DagsterDbtTranslator,
|
|
340
|
+
manifest: Mapping[str, Any],
|
|
341
|
+
target_path: Optional[Path],
|
|
342
|
+
) -> Mapping[str, Any]:
|
|
343
|
+
try:
|
|
344
|
+
column_data = self._event_history_metadata.get("columns", {})
|
|
345
|
+
parent_column_data = {
|
|
346
|
+
parent_key: parent_data["columns"]
|
|
347
|
+
for parent_key, parent_data in self._event_history_metadata.get(
|
|
348
|
+
"parents", {}
|
|
349
|
+
).items()
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
# Column lineage can only be built if initial metadata is provided.
|
|
353
|
+
if self.has_column_lineage_metadata:
|
|
354
|
+
return _build_column_lineage_metadata(
|
|
355
|
+
event_history_metadata=EventHistoryMetadata(
|
|
356
|
+
columns=column_data, parents=parent_column_data
|
|
357
|
+
),
|
|
358
|
+
dbt_resource_props=self._get_resource_props(self._unique_id, manifest),
|
|
359
|
+
manifest=manifest,
|
|
360
|
+
dagster_dbt_translator=translator,
|
|
361
|
+
target_path=target_path,
|
|
362
|
+
)
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.warning(
|
|
365
|
+
"An error occurred while building column lineage metadata for the dbt resource"
|
|
366
|
+
f" `{self._get_resource_props(self._unique_id, manifest)['original_file_path']}`."
|
|
367
|
+
" Lineage metadata will not be included in the event.\n\n"
|
|
368
|
+
f"Exception: {e}",
|
|
369
|
+
exc_info=True,
|
|
370
|
+
)
|
|
371
|
+
return {}
|
|
372
|
+
|
|
373
|
+
def _get_materialization_metadata(
|
|
374
|
+
self,
|
|
375
|
+
translator: DagsterDbtTranslator,
|
|
376
|
+
manifest: Mapping[str, Any],
|
|
377
|
+
target_path: Optional[Path],
|
|
378
|
+
) -> dict[str, Any]:
|
|
379
|
+
return {
|
|
380
|
+
**self._get_default_metadata(manifest),
|
|
381
|
+
**self._get_lineage_metadata(translator, manifest, target_path),
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
def _to_model_events(
|
|
385
|
+
self,
|
|
386
|
+
manifest: Mapping[str, Any],
|
|
387
|
+
dagster_dbt_translator: DagsterDbtTranslator,
|
|
388
|
+
context: Optional[Union[OpExecutionContext, AssetExecutionContext]],
|
|
389
|
+
target_path: Optional[Path],
|
|
390
|
+
project: Optional[DbtProject],
|
|
391
|
+
) -> Iterator[Union[Output, AssetMaterialization]]:
|
|
392
|
+
asset_key = dagster_dbt_translator.get_asset_spec(manifest, self._unique_id, project).key
|
|
393
|
+
metadata = self._get_materialization_metadata(dagster_dbt_translator, manifest, target_path)
|
|
394
|
+
if context and context.has_assets_def:
|
|
395
|
+
yield Output(
|
|
396
|
+
value=None, output_name=asset_key.to_python_identifier(), metadata=metadata
|
|
397
|
+
)
|
|
398
|
+
else:
|
|
399
|
+
yield AssetMaterialization(asset_key=asset_key, metadata=metadata)
|
|
400
|
+
|
|
401
|
+
##############
|
|
402
|
+
# TEST PARSING
|
|
403
|
+
##############
|
|
404
|
+
|
|
405
|
+
def _get_check_execution_metadata(self, manifest: Mapping[str, Any]) -> dict[str, Any]:
|
|
406
|
+
failure_count = self._raw_data.get("num_failures")
|
|
407
|
+
return {
|
|
408
|
+
**self._get_default_metadata(manifest),
|
|
409
|
+
"status": self._get_node_status(),
|
|
410
|
+
**({} if failure_count is None else {"dagster_dbt/failed_row_count": failure_count}),
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
@abstractmethod
|
|
414
|
+
def _get_check_passed(self) -> bool: ...
|
|
415
|
+
|
|
416
|
+
@abstractmethod
|
|
417
|
+
def _get_check_severity(self) -> AssetCheckSeverity: ...
|
|
418
|
+
|
|
419
|
+
def _get_check_properties(
|
|
420
|
+
self, key: AssetCheckKey, manifest: Mapping[str, Any]
|
|
421
|
+
) -> CheckProperties:
|
|
422
|
+
return CheckProperties(
|
|
423
|
+
passed=self._get_check_passed(),
|
|
424
|
+
asset_key=key.asset_key,
|
|
425
|
+
check_name=key.name,
|
|
426
|
+
severity=self._get_check_severity(),
|
|
427
|
+
metadata=self._get_check_execution_metadata(manifest),
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
def _get_result_check_keys(
|
|
431
|
+
self, context: Optional[Union[OpExecutionContext, AssetExecutionContext]]
|
|
432
|
+
) -> AbstractSet[AssetCheckKey]:
|
|
433
|
+
"""Returns the set of check keys for which we should emit AssetCheckResult events."""
|
|
434
|
+
if context is None or not context.has_assets_def:
|
|
435
|
+
return set()
|
|
436
|
+
return {
|
|
437
|
+
*context.selected_asset_check_keys,
|
|
438
|
+
*get_checks_on_sources_upstream_of_selected_assets(
|
|
439
|
+
assets_def=context.assets_def,
|
|
440
|
+
selected_asset_keys=context.selected_asset_keys,
|
|
441
|
+
),
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
def _to_observation_events_for_test(
|
|
445
|
+
self,
|
|
446
|
+
key: Optional[AssetCheckKey],
|
|
447
|
+
dagster_dbt_translator: DagsterDbtTranslator,
|
|
448
|
+
validated_manifest: Mapping[str, Any],
|
|
449
|
+
metadata: Mapping[str, Any],
|
|
450
|
+
) -> Iterator[AssetObservation]:
|
|
451
|
+
resource_props = self._get_resource_props(self._unique_id, validated_manifest)
|
|
452
|
+
message = None
|
|
453
|
+
|
|
454
|
+
# dbt's default indirect selection (eager) will select relationship tests
|
|
455
|
+
# on unselected assets, if they're compared with a selected asset.
|
|
456
|
+
# This doesn't match Dagster's default check selection which is to only
|
|
457
|
+
# select checks on selected assets. When we use eager, we may receive
|
|
458
|
+
# unexpected test results so we log those as observations as if
|
|
459
|
+
# asset checks were disabled.
|
|
460
|
+
if dagster_dbt_translator.settings.enable_asset_checks:
|
|
461
|
+
# If the test did not have an asset key associated with it, it was a singular
|
|
462
|
+
# test with multiple dependencies without a configured asset key.
|
|
463
|
+
test_name = resource_props["name"]
|
|
464
|
+
additional_message = (
|
|
465
|
+
(
|
|
466
|
+
f"`{test_name}` is a singular test with multiple dependencies."
|
|
467
|
+
" Configure an asset key in the test's dbt meta to load it as an"
|
|
468
|
+
" asset check.\n\n"
|
|
469
|
+
)
|
|
470
|
+
if not key
|
|
471
|
+
else ""
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
message = (
|
|
475
|
+
"Logging an `AssetObservation` instead of an `AssetCheckResult`"
|
|
476
|
+
f" for dbt test `{test_name}`.\n\n"
|
|
477
|
+
f"{additional_message}"
|
|
478
|
+
"This test was not included in Dagster's asset check"
|
|
479
|
+
" selection, and was likely executed due to dbt indirect selection."
|
|
480
|
+
)
|
|
481
|
+
logger.warning(message)
|
|
482
|
+
|
|
483
|
+
for upstream_unique_id in resource_props["depends_on"]["nodes"]:
|
|
484
|
+
upstream_resource_props: dict[str, Any] = validated_manifest["nodes"].get(
|
|
485
|
+
upstream_unique_id
|
|
486
|
+
) or validated_manifest["sources"].get(upstream_unique_id)
|
|
487
|
+
upstream_asset_key = dagster_dbt_translator.get_asset_key(upstream_resource_props)
|
|
488
|
+
|
|
489
|
+
yield AssetObservation(
|
|
490
|
+
asset_key=upstream_asset_key, metadata=metadata, description=message
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
def _to_test_events(
|
|
494
|
+
self,
|
|
495
|
+
manifest: Mapping[str, Any],
|
|
496
|
+
translator: DagsterDbtTranslator,
|
|
497
|
+
project: Optional[DbtProject],
|
|
498
|
+
context: Optional[Union[OpExecutionContext, AssetExecutionContext]],
|
|
499
|
+
) -> Iterator[Union[AssetCheckResult, AssetCheckEvaluation, AssetObservation]]:
|
|
500
|
+
"""Converts a dbt CLI event to a set of Dagster events corresponding to a test execution."""
|
|
501
|
+
key = get_asset_check_key_for_test(manifest, translator, self._unique_id, project=project)
|
|
502
|
+
|
|
503
|
+
has_assets_def = context is not None and context.has_assets_def
|
|
504
|
+
|
|
505
|
+
if key is not None and has_assets_def and key in self._get_result_check_keys(context):
|
|
506
|
+
# key was expected to be evaluated, use AssetCheckResult
|
|
507
|
+
properties = self._get_check_properties(key, manifest)
|
|
508
|
+
yield AssetCheckResult(**properties)
|
|
509
|
+
return
|
|
510
|
+
elif key is not None and not has_assets_def:
|
|
511
|
+
# in an op definition, we don't have an assets def, so we use AssetCheckEvaluation
|
|
512
|
+
properties = self._get_check_properties(key, manifest)
|
|
513
|
+
yield AssetCheckEvaluation(**properties)
|
|
514
|
+
return
|
|
515
|
+
|
|
516
|
+
# fallback case, emit observation events if we have no key to associate with the
|
|
517
|
+
# test, or if the test was not expected to be evaluated.
|
|
518
|
+
metadata = self._get_check_execution_metadata(manifest)
|
|
519
|
+
yield from self._to_observation_events_for_test(
|
|
520
|
+
key=key,
|
|
521
|
+
dagster_dbt_translator=translator,
|
|
522
|
+
validated_manifest=manifest,
|
|
523
|
+
metadata=metadata,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
@public
|
|
527
|
+
def to_default_asset_events(
|
|
528
|
+
self,
|
|
529
|
+
manifest: DbtManifestParam,
|
|
530
|
+
dagster_dbt_translator: DagsterDbtTranslator = DagsterDbtTranslator(),
|
|
531
|
+
context: Optional[Union[OpExecutionContext, AssetExecutionContext]] = None,
|
|
532
|
+
target_path: Optional[Path] = None,
|
|
533
|
+
project: Optional[DbtProject] = None,
|
|
534
|
+
) -> Iterator[
|
|
535
|
+
Union[
|
|
536
|
+
Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation
|
|
537
|
+
]
|
|
538
|
+
]:
|
|
539
|
+
"""Convert a dbt CLI event to a set of corresponding Dagster events.
|
|
540
|
+
|
|
541
|
+
Args:
|
|
542
|
+
manifest (Union[Mapping[str, Any], str, Path]): The dbt manifest blob.
|
|
543
|
+
dagster_dbt_translator (DagsterDbtTranslator): Optionally, a custom translator for
|
|
544
|
+
linking dbt nodes to Dagster assets.
|
|
545
|
+
context (Optional[Union[OpExecutionContext, AssetExecutionContext]]): The execution context.
|
|
546
|
+
target_path (Optional[Path]): An explicit path to a target folder used to retrieve
|
|
547
|
+
dbt artifacts while generating events.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]:
|
|
551
|
+
A set of corresponding Dagster events.
|
|
552
|
+
|
|
553
|
+
In a Dagster asset definition, the following are yielded:
|
|
554
|
+
- Output for refables (e.g. models, seeds, snapshots.)
|
|
555
|
+
- AssetCheckResult for dbt test results that are enabled as asset checks.
|
|
556
|
+
- AssetObservation for dbt test results that are not enabled as asset checks.
|
|
557
|
+
|
|
558
|
+
In a Dagster op definition, the following are yielded:
|
|
559
|
+
- AssetMaterialization refables (e.g. models, seeds, snapshots.)
|
|
560
|
+
- AssetCheckEvaluation for dbt test results that are enabled as asset checks.
|
|
561
|
+
- AssetObservation for dbt test results that are not enabled as asset checks.
|
|
562
|
+
"""
|
|
563
|
+
if not self.is_result_event:
|
|
564
|
+
return
|
|
565
|
+
|
|
566
|
+
dagster_dbt_translator = validate_translator(dagster_dbt_translator)
|
|
567
|
+
manifest = validate_manifest(manifest)
|
|
568
|
+
|
|
569
|
+
if self._is_model_execution_event(manifest):
|
|
570
|
+
yield from self._to_model_events(
|
|
571
|
+
manifest, dagster_dbt_translator, context, target_path, project
|
|
572
|
+
)
|
|
573
|
+
if self._is_test_execution_event(manifest):
|
|
574
|
+
yield from self._to_test_events(manifest, dagster_dbt_translator, project, context)
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
class DbtCoreCliEventMessage(DbtCliEventMessage):
|
|
578
|
+
"""Represents a dbt CLI event that was produced using the dbt Core engine."""
|
|
579
|
+
|
|
580
|
+
@property
|
|
581
|
+
def is_result_event(self) -> bool:
|
|
582
|
+
return self.raw_event["info"]["name"] in set(
|
|
583
|
+
["LogSeedResult", "LogModelResult", "LogSnapshotResult", "LogTestResult"]
|
|
584
|
+
) and not self.raw_event["data"].get("node_info", {}).get("unique_id", "").startswith(
|
|
585
|
+
"unit_test"
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
def _get_check_passed(self) -> bool:
|
|
589
|
+
return self._get_node_status() == TestStatus.Pass
|
|
590
|
+
|
|
591
|
+
def _get_check_severity(self) -> AssetCheckSeverity:
|
|
592
|
+
node_status = self._get_node_status()
|
|
593
|
+
return (
|
|
594
|
+
AssetCheckSeverity.WARN if node_status == NodeStatus.Warn else AssetCheckSeverity.ERROR
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
class DbtFusionCliEventMessage(DbtCliEventMessage):
|
|
599
|
+
"""Represents a dbt CLI event that was produced using the dbt Fusion engine."""
|
|
600
|
+
|
|
601
|
+
@property
|
|
602
|
+
def is_result_event(self) -> bool:
|
|
603
|
+
return self.raw_event["info"]["name"] == "NodeFinished"
|
|
604
|
+
|
|
605
|
+
def _get_check_passed(self) -> bool:
|
|
606
|
+
return self._get_node_status() == NodeStatus.Success
|
|
607
|
+
|
|
608
|
+
def _get_check_severity(self) -> AssetCheckSeverity:
|
|
609
|
+
node_status = self._get_node_status()
|
|
610
|
+
return (
|
|
611
|
+
AssetCheckSeverity.WARN if node_status == NodeStatus.Warn else AssetCheckSeverity.ERROR
|
|
612
|
+
)
|