dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. dagster_dbt/__init__.py +41 -140
  2. dagster_dbt/asset_decorator.py +49 -230
  3. dagster_dbt/asset_specs.py +65 -0
  4. dagster_dbt/asset_utils.py +655 -338
  5. dagster_dbt/cli/app.py +44 -43
  6. dagster_dbt/cloud/__init__.py +6 -4
  7. dagster_dbt/cloud/asset_defs.py +119 -177
  8. dagster_dbt/cloud/cli.py +3 -4
  9. dagster_dbt/cloud/ops.py +9 -6
  10. dagster_dbt/cloud/resources.py +9 -4
  11. dagster_dbt/cloud/types.py +12 -7
  12. dagster_dbt/cloud/utils.py +186 -0
  13. dagster_dbt/cloud_v2/__init__.py +10 -0
  14. dagster_dbt/cloud_v2/asset_decorator.py +81 -0
  15. dagster_dbt/cloud_v2/cli_invocation.py +67 -0
  16. dagster_dbt/cloud_v2/client.py +438 -0
  17. dagster_dbt/cloud_v2/resources.py +462 -0
  18. dagster_dbt/cloud_v2/run_handler.py +229 -0
  19. dagster_dbt/cloud_v2/sensor_builder.py +254 -0
  20. dagster_dbt/cloud_v2/types.py +143 -0
  21. dagster_dbt/compat.py +107 -0
  22. dagster_dbt/components/__init__.py +0 -0
  23. dagster_dbt/components/dbt_project/__init__.py +0 -0
  24. dagster_dbt/components/dbt_project/component.py +545 -0
  25. dagster_dbt/components/dbt_project/scaffolder.py +65 -0
  26. dagster_dbt/core/__init__.py +0 -10
  27. dagster_dbt/core/dbt_cli_event.py +612 -0
  28. dagster_dbt/core/dbt_cli_invocation.py +474 -0
  29. dagster_dbt/core/dbt_event_iterator.py +399 -0
  30. dagster_dbt/core/resource.py +733 -0
  31. dagster_dbt/core/utils.py +14 -279
  32. dagster_dbt/dagster_dbt_translator.py +317 -74
  33. dagster_dbt/dbt_core_version.py +1 -0
  34. dagster_dbt/dbt_manifest.py +6 -5
  35. dagster_dbt/dbt_manifest_asset_selection.py +62 -22
  36. dagster_dbt/dbt_project.py +179 -40
  37. dagster_dbt/dbt_project_manager.py +173 -0
  38. dagster_dbt/dbt_version.py +0 -0
  39. dagster_dbt/errors.py +9 -84
  40. dagster_dbt/freshness_builder.py +147 -0
  41. dagster_dbt/include/pyproject.toml.jinja +21 -0
  42. dagster_dbt/include/scaffold/assets.py.jinja +1 -8
  43. dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
  44. dagster_dbt/include/scaffold/project.py.jinja +1 -0
  45. dagster_dbt/include/setup.py.jinja +2 -3
  46. dagster_dbt/metadata_set.py +18 -0
  47. dagster_dbt/utils.py +136 -234
  48. dagster_dbt/version.py +1 -1
  49. dagster_dbt-0.28.4.dist-info/METADATA +47 -0
  50. dagster_dbt-0.28.4.dist-info/RECORD +59 -0
  51. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
  52. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
  53. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
  54. dagster_dbt/asset_defs.py +0 -1049
  55. dagster_dbt/core/resources.py +0 -527
  56. dagster_dbt/core/resources_v2.py +0 -1542
  57. dagster_dbt/core/types.py +0 -63
  58. dagster_dbt/dbt_resource.py +0 -220
  59. dagster_dbt/include/scaffold/constants.py.jinja +0 -21
  60. dagster_dbt/ops.py +0 -134
  61. dagster_dbt/types.py +0 -22
  62. dagster_dbt-0.23.3.dist-info/METADATA +0 -31
  63. dagster_dbt-0.23.3.dist-info/RECORD +0 -43
  64. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,399 @@
1
+ from collections.abc import Callable, Iterator
2
+ from concurrent.futures import ThreadPoolExecutor
3
+ from typing import TYPE_CHECKING, Any, Optional, Union, cast
4
+
5
+ from dagster import (
6
+ AssetCheckResult,
7
+ AssetMaterialization,
8
+ AssetObservation,
9
+ Output,
10
+ _check as check,
11
+ get_dagster_logger,
12
+ )
13
+ from dagster._annotations import public
14
+ from dagster._core.definitions.asset_checks.asset_check_evaluation import AssetCheckEvaluation
15
+ from dagster._core.definitions.metadata import TableMetadataSet, TextMetadataValue
16
+ from dagster._core.errors import DagsterInvalidPropertyError
17
+ from dagster._core.utils import exhaust_iterator_and_yield_results_with_exception, imap
18
+ from dagster._utils import pushd
19
+ from typing_extensions import TypeVar
20
+
21
+ from dagster_dbt.asset_utils import default_metadata_from_dbt_resource_props
22
+ from dagster_dbt.compat import DBT_PYTHON_VERSION
23
+ from dagster_dbt.core.dbt_cli_event import EventHistoryMetadata, _build_column_lineage_metadata
24
+
25
+ if TYPE_CHECKING:
26
+ from dagster_dbt.core.dbt_cli_invocation import DbtCliInvocation
27
+
28
+
29
+ logger = get_dagster_logger()
30
+
31
+ DbtDagsterEventType = Union[
32
+ Output, AssetMaterialization, AssetCheckResult, AssetObservation, AssetCheckEvaluation
33
+ ]
34
+
35
+
36
+ # We define DbtEventIterator as a generic type for the sake of type hinting.
37
+ # This is so that users who inspect the type of the return value of `DbtCliInvocation.stream()`
38
+ # will be able to see the inner type of the iterator, rather than just `DbtEventIterator`.
39
+ T = TypeVar("T", bound=DbtDagsterEventType)
40
+
41
+
42
+ def _get_dbt_resource_props_from_event(
43
+ invocation: "DbtCliInvocation", event: DbtDagsterEventType
44
+ ) -> dict[str, Any]:
45
+ unique_id = cast("TextMetadataValue", event.metadata["unique_id"]).text
46
+ return check.not_none(invocation.manifest["nodes"].get(unique_id))
47
+
48
+
49
+ def _fetch_column_metadata(
50
+ invocation: "DbtCliInvocation", event: DbtDagsterEventType, with_column_lineage: bool
51
+ ) -> Optional[dict[str, Any]]:
52
+ """Threaded task which fetches column schema and lineage metadata for dbt models in a dbt
53
+ run once they are built, returning the metadata to be attached.
54
+
55
+ First we use the dbt adapter to obtain the column metadata for the built model. Then we
56
+ retrieve the column metadata for the model's parent models, if they exist. Finally, we
57
+ build the column lineage metadata for the model and attach it to the event.
58
+
59
+ Args:
60
+ invocation (DbtCliInvocation): The dbt CLI invocation.
61
+ event (DbtDagsterEventType): The dbt event to append column metadata to.
62
+ with_column_lineage (bool): Whether to include column lineage metadata in the event.
63
+ """
64
+ adapter = check.not_none(invocation.adapter)
65
+
66
+ dbt_resource_props = _get_dbt_resource_props_from_event(invocation, event)
67
+
68
+ with (
69
+ pushd(str(invocation.project_dir)),
70
+ adapter.connection_named(f"column_metadata_{dbt_resource_props['unique_id']}"),
71
+ ):
72
+ try:
73
+ cols = invocation._get_columns_from_dbt_resource_props( # noqa: SLF001
74
+ adapter=adapter, dbt_resource_props=dbt_resource_props
75
+ ).columns
76
+ except Exception as e:
77
+ logger.warning(
78
+ "An error occurred while fetching column schema metadata for the dbt resource"
79
+ f" `{dbt_resource_props['original_file_path']}`."
80
+ " Column metadata will not be included in the event.\n\n"
81
+ f"Exception: {e}",
82
+ exc_info=True,
83
+ )
84
+ return {}
85
+
86
+ schema_metadata = {}
87
+ try:
88
+ column_schema_data = {col.name: {"data_type": col.data_type} for col in cols}
89
+ col_data = {"columns": column_schema_data}
90
+ schema_metadata = default_metadata_from_dbt_resource_props(col_data)
91
+ except Exception as e:
92
+ logger.warning(
93
+ "An error occurred while building column schema metadata from data"
94
+ f" `{col_data}` for the dbt resource" # pyright: ignore[reportPossiblyUnboundVariable]
95
+ f" `{dbt_resource_props['original_file_path']}`."
96
+ " Column schema metadata will not be included in the event.\n\n"
97
+ f"Exception: {e}",
98
+ exc_info=True,
99
+ )
100
+
101
+ lineage_metadata = {}
102
+ if with_column_lineage:
103
+ try:
104
+ parents = {}
105
+ parent_unique_ids = invocation.manifest["parent_map"].get(
106
+ dbt_resource_props["unique_id"], []
107
+ )
108
+ for parent_unique_id in parent_unique_ids:
109
+ dbt_parent_resource_props = invocation.manifest["nodes"].get(
110
+ parent_unique_id
111
+ ) or invocation.manifest["sources"].get(parent_unique_id)
112
+
113
+ parent_name, parent_columns = invocation._get_columns_from_dbt_resource_props( # noqa: SLF001
114
+ adapter=adapter, dbt_resource_props=dbt_parent_resource_props
115
+ )
116
+
117
+ parents[parent_name] = {
118
+ col.name: {"data_type": col.data_type} for col in parent_columns
119
+ }
120
+
121
+ lineage_metadata = _build_column_lineage_metadata(
122
+ event_history_metadata=EventHistoryMetadata(
123
+ columns=column_schema_data, # pyright: ignore[reportPossiblyUnboundVariable]
124
+ parents=parents,
125
+ ),
126
+ dbt_resource_props=dbt_resource_props,
127
+ manifest=invocation.manifest,
128
+ dagster_dbt_translator=invocation.dagster_dbt_translator,
129
+ target_path=invocation.target_path,
130
+ )
131
+
132
+ except Exception as e:
133
+ logger.warning(
134
+ "An error occurred while building column lineage metadata for the dbt resource"
135
+ f" `{dbt_resource_props['original_file_path']}`."
136
+ " Lineage metadata will not be included in the event.\n\n"
137
+ f"Exception: {e}",
138
+ exc_info=True,
139
+ )
140
+
141
+ return {
142
+ **schema_metadata,
143
+ **lineage_metadata,
144
+ }
145
+
146
+
147
+ def _fetch_row_count_metadata(
148
+ invocation: "DbtCliInvocation",
149
+ event: DbtDagsterEventType,
150
+ ) -> Optional[dict[str, Any]]:
151
+ """Threaded task which fetches row counts for materialized dbt models in a dbt run
152
+ once they are built, and attaches the row count as metadata to the event.
153
+ """
154
+ if not isinstance(event, (AssetMaterialization, Output)):
155
+ return None
156
+
157
+ adapter = check.not_none(invocation.adapter)
158
+
159
+ dbt_resource_props = _get_dbt_resource_props_from_event(invocation, event)
160
+ is_view = dbt_resource_props["config"]["materialized"] == "view"
161
+
162
+ # Avoid counting rows for views, since they may include complex SQL queries
163
+ # that are costly to execute. We can revisit this in the future if there is
164
+ # a demand for it.
165
+ if is_view:
166
+ return None
167
+
168
+ unique_id = dbt_resource_props["unique_id"]
169
+ logger.debug("Fetching row count for %s", unique_id)
170
+ relation_name = dbt_resource_props["relation_name"]
171
+
172
+ try:
173
+ with (
174
+ pushd(str(invocation.project_dir)),
175
+ adapter.connection_named(f"row_count_{unique_id}"),
176
+ ):
177
+ query_result = adapter.execute(
178
+ f"""
179
+ SELECT
180
+ count(*) as row_count
181
+ FROM
182
+ {relation_name}
183
+ """,
184
+ fetch=True,
185
+ )
186
+ query_result_table = query_result[1]
187
+ # some adapters do not output the column names, so we need
188
+ # to index by position
189
+ row_count = query_result_table[0][0]
190
+ return {**TableMetadataSet(row_count=row_count)}
191
+
192
+ except Exception as e:
193
+ logger.exception(
194
+ f"An error occurred while fetching row count for {unique_id}. Row count metadata"
195
+ " will not be included in the event.\n\n"
196
+ f"Exception: {e}"
197
+ )
198
+ return None
199
+
200
+
201
+ class DbtEventIterator(Iterator[T]):
202
+ """A wrapper around an iterator of dbt events which contains additional methods for
203
+ post-processing the events, such as fetching row counts for materialized tables.
204
+ """
205
+
206
+ def __init__(
207
+ self,
208
+ events: Iterator[T],
209
+ dbt_cli_invocation: "DbtCliInvocation",
210
+ ) -> None:
211
+ self._inner_iterator = events
212
+ self._dbt_cli_invocation = dbt_cli_invocation
213
+
214
+ def __next__(self) -> T:
215
+ return next(self._inner_iterator)
216
+
217
+ def __iter__(self) -> "DbtEventIterator[T]":
218
+ return self
219
+
220
+ @public
221
+ def fetch_row_counts(
222
+ self,
223
+ ) -> "DbtEventIterator[Union[Output, AssetMaterialization, AssetCheckResult, AssetObservation, AssetCheckEvaluation]]":
224
+ """Functionality which will fetch row counts for materialized dbt
225
+ models in a dbt run once they are built. Note that row counts will not be fetched
226
+ for views, since this requires running the view's SQL query which may be costly.
227
+
228
+ Returns:
229
+ Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]:
230
+ A set of corresponding Dagster events for dbt models, with row counts attached,
231
+ yielded in the order they are emitted by dbt.
232
+ """
233
+ return self._attach_metadata(_fetch_row_count_metadata)
234
+
235
+ @public
236
+ def fetch_column_metadata(
237
+ self,
238
+ with_column_lineage: bool = True,
239
+ ) -> "DbtEventIterator[Union[Output, AssetMaterialization, AssetCheckResult, AssetObservation, AssetCheckEvaluation]]":
240
+ """Functionality which will fetch column schema metadata for dbt models in a run
241
+ once they're built. It will also fetch schema information for upstream models and generate
242
+ column lineage metadata using sqlglot, if enabled.
243
+
244
+ Args:
245
+ generate_column_lineage (bool): Whether to generate column lineage metadata using sqlglot.
246
+
247
+ Returns:
248
+ Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]:
249
+ A set of corresponding Dagster events for dbt models, with column metadata attached,
250
+ yielded in the order they are emitted by dbt.
251
+ """
252
+ check.invariant(
253
+ DBT_PYTHON_VERSION is not None, "Column metadata not supported for dbt Fusion."
254
+ )
255
+ fetch_metadata = lambda invocation, event: _fetch_column_metadata(
256
+ invocation, event, with_column_lineage
257
+ )
258
+ return self._attach_metadata(fetch_metadata)
259
+
260
+ def _attach_metadata(
261
+ self,
262
+ fn: Callable[["DbtCliInvocation", DbtDagsterEventType], Optional[dict[str, Any]]],
263
+ ) -> "DbtEventIterator[DbtDagsterEventType]":
264
+ """Runs a threaded task to attach metadata to each event in the iterator.
265
+
266
+ Args:
267
+ fn (Callable[[DbtCliInvocation, DbtDagsterEventType], Optional[Dict[str, Any]]]):
268
+ A function which takes a DbtCliInvocation and a DbtDagsterEventType and returns
269
+ a dictionary of metadata to attach to the event.
270
+
271
+ Returns:
272
+ Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult]]:
273
+ A set of corresponding Dagster events for dbt models, with any metadata output
274
+ by the function attached, yielded in the order they are emitted by dbt.
275
+ """
276
+
277
+ def _map_fn(event: DbtDagsterEventType) -> DbtDagsterEventType:
278
+ with pushd(str(self._dbt_cli_invocation.project_dir)):
279
+ result = fn(self._dbt_cli_invocation, event)
280
+ if result is None:
281
+ return event
282
+
283
+ return event.with_metadata({**event.metadata, **result})
284
+
285
+ # If the adapter is DuckDB, we need to wait for the dbt CLI process to complete
286
+ # so that the DuckDB lock is released. This is because DuckDB does not allow for
287
+ # opening multiple connections to the same database when a write connection, such
288
+ # as the one dbt uses, is open.
289
+ event_stream = self
290
+ if (
291
+ self._dbt_cli_invocation.adapter
292
+ and self._dbt_cli_invocation.adapter.__class__.__name__ == "DuckDBAdapter"
293
+ ):
294
+ from dbt.adapters.duckdb import DuckDBAdapter
295
+
296
+ if isinstance(self._dbt_cli_invocation.adapter, DuckDBAdapter):
297
+ event_stream = exhaust_iterator_and_yield_results_with_exception(self)
298
+
299
+ def _threadpool_wrap_map_fn() -> Iterator[
300
+ Union[
301
+ Output,
302
+ AssetMaterialization,
303
+ AssetObservation,
304
+ AssetCheckResult,
305
+ AssetCheckEvaluation,
306
+ ]
307
+ ]:
308
+ with ThreadPoolExecutor(
309
+ max_workers=self._dbt_cli_invocation.postprocessing_threadpool_num_threads,
310
+ thread_name_prefix=f"dbt_attach_metadata_{fn.__name__}",
311
+ ) as executor:
312
+ yield from imap(
313
+ executor=executor,
314
+ iterable=event_stream,
315
+ func=_map_fn,
316
+ )
317
+
318
+ return DbtEventIterator(
319
+ _threadpool_wrap_map_fn(),
320
+ dbt_cli_invocation=self._dbt_cli_invocation,
321
+ )
322
+
323
+ @public
324
+ def with_insights(
325
+ self,
326
+ skip_config_check: bool = False,
327
+ record_observation_usage: bool = True,
328
+ ) -> "DbtEventIterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]":
329
+ """Associate each warehouse query with the produced asset materializations for use in Dagster
330
+ Plus Insights. Currently supports Snowflake and BigQuery.
331
+
332
+ For more information, see the documentation for
333
+ `dagster_cloud.dagster_insights.dbt_with_snowflake_insights` and
334
+ `dagster_cloud.dagster_insights.dbt_with_bigquery_insights`.
335
+
336
+ Args:
337
+ skip_config_check (bool): If true, skips the check that the dbt project config is set up
338
+ correctly. Defaults to False.
339
+ record_observation_usage (bool): If True, associates the usage associated with
340
+ asset observations with that asset. Default is True.
341
+
342
+ **Example:**
343
+
344
+ .. code-block:: python
345
+
346
+ @dbt_assets(manifest=DBT_MANIFEST_PATH)
347
+ def jaffle_shop_dbt_assets(
348
+ context: AssetExecutionContext,
349
+ dbt: DbtCliResource,
350
+ ):
351
+ yield from dbt.cli(["build"], context=context).stream().with_insights()
352
+ """
353
+ adapter_type = self._dbt_cli_invocation.manifest.get("metadata", {}).get("adapter_type")
354
+ if adapter_type == "snowflake":
355
+ try:
356
+ from dagster_cloud.dagster_insights import ( # pyright: ignore[reportMissingImports]
357
+ dbt_with_snowflake_insights,
358
+ )
359
+ except ImportError as e:
360
+ raise DagsterInvalidPropertyError(
361
+ "The `dagster_cloud` library is required to use the `with_insights`"
362
+ " method. Install the library with `pip install dagster-cloud`."
363
+ ) from e
364
+
365
+ return DbtEventIterator(
366
+ events=dbt_with_snowflake_insights(
367
+ context=self._dbt_cli_invocation.context,
368
+ dbt_cli_invocation=self._dbt_cli_invocation,
369
+ dagster_events=self,
370
+ skip_config_check=skip_config_check,
371
+ record_observation_usage=record_observation_usage,
372
+ ),
373
+ dbt_cli_invocation=self._dbt_cli_invocation,
374
+ )
375
+ elif adapter_type == "bigquery":
376
+ try:
377
+ from dagster_cloud.dagster_insights import ( # pyright: ignore[reportMissingImports]
378
+ dbt_with_bigquery_insights,
379
+ )
380
+ except ImportError as e:
381
+ raise DagsterInvalidPropertyError(
382
+ "The `dagster_cloud` library is required to use the `with_insights`"
383
+ " method. Install the library with `pip install dagster-cloud`."
384
+ ) from e
385
+
386
+ return DbtEventIterator(
387
+ events=dbt_with_bigquery_insights(
388
+ context=self._dbt_cli_invocation.context,
389
+ dbt_cli_invocation=self._dbt_cli_invocation,
390
+ dagster_events=self,
391
+ skip_config_check=skip_config_check,
392
+ record_observation_usage=record_observation_usage,
393
+ ),
394
+ dbt_cli_invocation=self._dbt_cli_invocation,
395
+ )
396
+ else:
397
+ check.failed(
398
+ f"The `with_insights` method is only supported for Snowflake and BigQuery and is not supported for adapter type `{adapter_type}`"
399
+ )