dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. dagster_dbt/__init__.py +41 -140
  2. dagster_dbt/asset_decorator.py +49 -230
  3. dagster_dbt/asset_specs.py +65 -0
  4. dagster_dbt/asset_utils.py +655 -338
  5. dagster_dbt/cli/app.py +44 -43
  6. dagster_dbt/cloud/__init__.py +6 -4
  7. dagster_dbt/cloud/asset_defs.py +119 -177
  8. dagster_dbt/cloud/cli.py +3 -4
  9. dagster_dbt/cloud/ops.py +9 -6
  10. dagster_dbt/cloud/resources.py +9 -4
  11. dagster_dbt/cloud/types.py +12 -7
  12. dagster_dbt/cloud/utils.py +186 -0
  13. dagster_dbt/cloud_v2/__init__.py +10 -0
  14. dagster_dbt/cloud_v2/asset_decorator.py +81 -0
  15. dagster_dbt/cloud_v2/cli_invocation.py +67 -0
  16. dagster_dbt/cloud_v2/client.py +438 -0
  17. dagster_dbt/cloud_v2/resources.py +462 -0
  18. dagster_dbt/cloud_v2/run_handler.py +229 -0
  19. dagster_dbt/cloud_v2/sensor_builder.py +254 -0
  20. dagster_dbt/cloud_v2/types.py +143 -0
  21. dagster_dbt/compat.py +107 -0
  22. dagster_dbt/components/__init__.py +0 -0
  23. dagster_dbt/components/dbt_project/__init__.py +0 -0
  24. dagster_dbt/components/dbt_project/component.py +545 -0
  25. dagster_dbt/components/dbt_project/scaffolder.py +65 -0
  26. dagster_dbt/core/__init__.py +0 -10
  27. dagster_dbt/core/dbt_cli_event.py +612 -0
  28. dagster_dbt/core/dbt_cli_invocation.py +474 -0
  29. dagster_dbt/core/dbt_event_iterator.py +399 -0
  30. dagster_dbt/core/resource.py +733 -0
  31. dagster_dbt/core/utils.py +14 -279
  32. dagster_dbt/dagster_dbt_translator.py +317 -74
  33. dagster_dbt/dbt_core_version.py +1 -0
  34. dagster_dbt/dbt_manifest.py +6 -5
  35. dagster_dbt/dbt_manifest_asset_selection.py +62 -22
  36. dagster_dbt/dbt_project.py +179 -40
  37. dagster_dbt/dbt_project_manager.py +173 -0
  38. dagster_dbt/dbt_version.py +0 -0
  39. dagster_dbt/errors.py +9 -84
  40. dagster_dbt/freshness_builder.py +147 -0
  41. dagster_dbt/include/pyproject.toml.jinja +21 -0
  42. dagster_dbt/include/scaffold/assets.py.jinja +1 -8
  43. dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
  44. dagster_dbt/include/scaffold/project.py.jinja +1 -0
  45. dagster_dbt/include/setup.py.jinja +2 -3
  46. dagster_dbt/metadata_set.py +18 -0
  47. dagster_dbt/utils.py +136 -234
  48. dagster_dbt/version.py +1 -1
  49. dagster_dbt-0.28.4.dist-info/METADATA +47 -0
  50. dagster_dbt-0.28.4.dist-info/RECORD +59 -0
  51. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
  52. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
  53. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
  54. dagster_dbt/asset_defs.py +0 -1049
  55. dagster_dbt/core/resources.py +0 -527
  56. dagster_dbt/core/resources_v2.py +0 -1542
  57. dagster_dbt/core/types.py +0 -63
  58. dagster_dbt/dbt_resource.py +0 -220
  59. dagster_dbt/include/scaffold/constants.py.jinja +0 -21
  60. dagster_dbt/ops.py +0 -134
  61. dagster_dbt/types.py +0 -22
  62. dagster_dbt-0.23.3.dist-info/METADATA +0 -31
  63. dagster_dbt-0.23.3.dist-info/RECORD +0 -43
  64. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,474 @@
1
+ import contextlib
2
+ import copy
3
+ import os
4
+ import shutil
5
+ import signal
6
+ import subprocess
7
+ import sys
8
+ from collections.abc import Iterator, Mapping, Sequence
9
+ from dataclasses import dataclass, field, replace
10
+ from pathlib import Path
11
+ from typing import Any, Final, Literal, NamedTuple, Optional, Union, cast
12
+
13
+ import orjson
14
+ from dagster import (
15
+ AssetCheckEvaluation,
16
+ AssetCheckResult,
17
+ AssetExecutionContext,
18
+ AssetMaterialization,
19
+ AssetObservation,
20
+ OpExecutionContext,
21
+ Output,
22
+ get_dagster_logger,
23
+ )
24
+ from dagster._annotations import public
25
+ from dagster._core.errors import DagsterExecutionInterruptedError
26
+ from packaging import version
27
+
28
+ from dagster_dbt.compat import BaseAdapter, BaseColumn, BaseRelation
29
+ from dagster_dbt.core.dbt_cli_event import (
30
+ DbtCliEventMessage,
31
+ DbtCoreCliEventMessage,
32
+ DbtFusionCliEventMessage,
33
+ )
34
+ from dagster_dbt.core.dbt_event_iterator import DbtDagsterEventType, DbtEventIterator
35
+ from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator
36
+ from dagster_dbt.dbt_project import DbtProject
37
+ from dagster_dbt.errors import DagsterDbtCliRuntimeError
38
+
39
+ PARTIAL_PARSE_FILE_NAME = "partial_parse.msgpack"
40
+ DAGSTER_DBT_TERMINATION_TIMEOUT_SECONDS = int(
41
+ os.getenv("DAGSTER_DBT_TERMINATION_TIMEOUT_SECONDS", "25")
42
+ )
43
+ DEFAULT_EVENT_POSTPROCESSING_THREADPOOL_SIZE: Final[int] = 4
44
+
45
+
46
+ logger = get_dagster_logger()
47
+
48
+
49
+ def _get_dbt_target_path() -> Path:
50
+ return Path(os.getenv("DBT_TARGET_PATH", "target"))
51
+
52
+
53
+ class RelationKey(NamedTuple):
54
+ """Hashable representation of the information needed to identify a relation in a database."""
55
+
56
+ database: str
57
+ schema: str
58
+ identifier: str
59
+
60
+
61
+ class RelationData(NamedTuple):
62
+ """Relation metadata queried from a database."""
63
+
64
+ name: str
65
+ columns: list[BaseColumn]
66
+
67
+
68
+ def _get_relation_from_adapter(adapter: BaseAdapter, relation_key: RelationKey) -> BaseRelation:
69
+ return adapter.Relation.create(
70
+ database=relation_key.database,
71
+ schema=relation_key.schema,
72
+ identifier=relation_key.identifier,
73
+ )
74
+
75
+
76
+ @dataclass
77
+ class DbtCliInvocation:
78
+ """The representation of an invoked dbt command.
79
+
80
+ Args:
81
+ process (subprocess.Popen): The process running the dbt command.
82
+ manifest (Mapping[str, Any]): The dbt manifest blob.
83
+ project (Optional[DbtProject]): The dbt project.
84
+ project_dir (Path): The path to the dbt project.
85
+ target_path (Path): The path to the dbt target folder.
86
+ raise_on_error (bool): Whether to raise an exception if the dbt command fails.
87
+ """
88
+
89
+ process: subprocess.Popen
90
+ manifest: Mapping[str, Any]
91
+ dagster_dbt_translator: DagsterDbtTranslator
92
+ project_dir: Path
93
+ target_path: Path
94
+ raise_on_error: bool
95
+ cli_version: version.Version
96
+ project: Optional[DbtProject] = field(default=None)
97
+ context: Optional[Union[OpExecutionContext, AssetExecutionContext]] = field(
98
+ default=None, repr=False
99
+ )
100
+ termination_timeout_seconds: float = field(
101
+ init=False, default=DAGSTER_DBT_TERMINATION_TIMEOUT_SECONDS
102
+ )
103
+ adapter: Optional[BaseAdapter] = field(default=None)
104
+ postprocessing_threadpool_num_threads: int = field(
105
+ init=False, default=DEFAULT_EVENT_POSTPROCESSING_THREADPOOL_SIZE
106
+ )
107
+ _stdout: list[Union[str, dict[str, Any]]] = field(init=False, default_factory=list)
108
+ _error_messages: list[str] = field(init=False, default_factory=list)
109
+
110
+ # Caches fetching relation column metadata to avoid redundant queries to the database.
111
+ _relation_column_metadata_cache: dict[RelationKey, RelationData] = field(
112
+ init=False, default_factory=dict
113
+ )
114
+
115
+ def _get_columns_from_dbt_resource_props(
116
+ self, adapter: BaseAdapter, dbt_resource_props: dict[str, Any]
117
+ ) -> RelationData:
118
+ """Given a dbt resource properties dictionary, fetches the resource's column metadata from
119
+ the database, or returns the cached metadata if it has already been fetched.
120
+ """
121
+ relation_key = RelationKey(
122
+ database=dbt_resource_props["database"],
123
+ schema=dbt_resource_props["schema"],
124
+ identifier=(
125
+ dbt_resource_props["identifier"]
126
+ if dbt_resource_props["unique_id"].startswith("source")
127
+ else dbt_resource_props["alias"]
128
+ ),
129
+ )
130
+ if relation_key in self._relation_column_metadata_cache:
131
+ return self._relation_column_metadata_cache[relation_key]
132
+
133
+ relation = _get_relation_from_adapter(adapter=adapter, relation_key=relation_key)
134
+ cols: list = adapter.get_columns_in_relation(relation=relation)
135
+ return self._relation_column_metadata_cache.setdefault(
136
+ relation_key, RelationData(name=str(relation), columns=cols)
137
+ )
138
+
139
+ @classmethod
140
+ def run(
141
+ cls,
142
+ args: Sequence[str],
143
+ env: dict[str, str],
144
+ manifest: Mapping[str, Any],
145
+ dagster_dbt_translator: DagsterDbtTranslator,
146
+ project_dir: Path,
147
+ target_path: Path,
148
+ raise_on_error: bool,
149
+ context: Optional[Union[OpExecutionContext, AssetExecutionContext]],
150
+ adapter: Optional[BaseAdapter],
151
+ cli_version: version.Version,
152
+ dbt_project: Optional[DbtProject] = None,
153
+ ) -> "DbtCliInvocation":
154
+ # Attempt to take advantage of partial parsing. If there is a `partial_parse.msgpack` in
155
+ # in the target folder, then copy it to the dynamic target path.
156
+ #
157
+ # This effectively allows us to skip the parsing of the manifest, which can be expensive.
158
+ # See https://docs.getdbt.com/reference/programmatic-invocations#reusing-objects for more
159
+ # details.
160
+ current_target_path = _get_dbt_target_path()
161
+ partial_parse_file_path = (
162
+ current_target_path.joinpath(PARTIAL_PARSE_FILE_NAME)
163
+ if current_target_path.is_absolute()
164
+ else project_dir.joinpath(current_target_path, PARTIAL_PARSE_FILE_NAME)
165
+ )
166
+ partial_parse_destination_target_path = target_path.joinpath(PARTIAL_PARSE_FILE_NAME)
167
+
168
+ if partial_parse_file_path.exists() and not partial_parse_destination_target_path.exists():
169
+ logger.info(
170
+ f"Copying `{partial_parse_file_path}` to `{partial_parse_destination_target_path}`"
171
+ " to take advantage of partial parsing."
172
+ )
173
+
174
+ partial_parse_destination_target_path.parent.mkdir(parents=True, exist_ok=True)
175
+ shutil.copy(partial_parse_file_path, partial_parse_destination_target_path)
176
+
177
+ # Create a subprocess that runs the dbt CLI command.
178
+ process = subprocess.Popen(
179
+ args=args,
180
+ stdout=subprocess.PIPE,
181
+ stderr=subprocess.STDOUT,
182
+ env=env,
183
+ cwd=project_dir,
184
+ )
185
+
186
+ dbt_cli_invocation = cls(
187
+ process=process,
188
+ manifest=manifest,
189
+ project=dbt_project,
190
+ dagster_dbt_translator=dagster_dbt_translator,
191
+ project_dir=project_dir,
192
+ target_path=target_path,
193
+ raise_on_error=raise_on_error,
194
+ context=context,
195
+ adapter=adapter,
196
+ cli_version=cli_version,
197
+ )
198
+ logger.info(f"Running dbt command: `{dbt_cli_invocation.dbt_command}`.")
199
+
200
+ return dbt_cli_invocation
201
+
202
+ @public
203
+ def wait(self) -> "DbtCliInvocation":
204
+ """Wait for the dbt CLI process to complete.
205
+
206
+ Returns:
207
+ DbtCliInvocation: The current representation of the dbt CLI invocation.
208
+
209
+ Examples:
210
+ .. code-block:: python
211
+
212
+ from dagster_dbt import DbtCliResource
213
+
214
+ dbt = DbtCliResource(project_dir="/path/to/dbt/project")
215
+
216
+ dbt_cli_invocation = dbt.cli(["run"]).wait()
217
+ """
218
+ list(self.stream_raw_events())
219
+
220
+ return self
221
+
222
+ @public
223
+ def is_successful(self) -> bool:
224
+ """Return whether the dbt CLI process completed successfully.
225
+
226
+ Returns:
227
+ bool: True, if the dbt CLI process returns with a zero exit code, and False otherwise.
228
+
229
+ Examples:
230
+ .. code-block:: python
231
+
232
+ from dagster_dbt import DbtCliResource
233
+
234
+ dbt = DbtCliResource(project_dir="/path/to/dbt/project")
235
+
236
+ dbt_cli_invocation = dbt.cli(["run"], raise_on_error=False)
237
+
238
+ if dbt_cli_invocation.is_successful():
239
+ ...
240
+ """
241
+ self._stdout = list(self._stream_stdout())
242
+
243
+ return self.process.wait() == 0 and not self._error_messages
244
+
245
+ @public
246
+ def get_error(self) -> Optional[Exception]:
247
+ """Return an exception if the dbt CLI process failed.
248
+
249
+ Returns:
250
+ Optional[Exception]: An exception if the dbt CLI process failed, and None otherwise.
251
+
252
+ Examples:
253
+ .. code-block:: python
254
+
255
+ from dagster_dbt import DbtCliResource
256
+
257
+ dbt = DbtCliResource(project_dir="/path/to/dbt/project")
258
+
259
+ dbt_cli_invocation = dbt.cli(["run"], raise_on_error=False)
260
+
261
+ error = dbt_cli_invocation.get_error()
262
+ if error:
263
+ logger.error(error)
264
+ """
265
+ if self.is_successful():
266
+ return None
267
+
268
+ log_path = self.target_path.joinpath("dbt.log")
269
+ extra_description = ""
270
+
271
+ if log_path.exists():
272
+ extra_description = f", or view the dbt debug log: {log_path}"
273
+
274
+ return DagsterDbtCliRuntimeError(
275
+ description=(
276
+ f"The dbt CLI process with command\n\n"
277
+ f"`{self.dbt_command}`\n\n"
278
+ f"failed with exit code `{self.process.returncode}`."
279
+ " Check the stdout in the Dagster compute logs for the full information about"
280
+ f" the error{extra_description}.{self._format_error_messages()}"
281
+ ),
282
+ )
283
+
284
+ def _stream_asset_events(
285
+ self,
286
+ ) -> Iterator[DbtDagsterEventType]:
287
+ """Stream the dbt CLI events and convert them to Dagster events."""
288
+ for event in self.stream_raw_events():
289
+ yield from event.to_default_asset_events(
290
+ manifest=self.manifest,
291
+ dagster_dbt_translator=self.dagster_dbt_translator,
292
+ context=self.context,
293
+ target_path=self.target_path,
294
+ project=self.project,
295
+ )
296
+
297
+ @public
298
+ def stream(
299
+ self,
300
+ ) -> "DbtEventIterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]":
301
+ """Stream the events from the dbt CLI process and convert them to Dagster events.
302
+
303
+ Returns:
304
+ Iterator[Union[Output, AssetMaterialization, AssetObservation, AssetCheckResult, AssetCheckEvaluation]]:
305
+ A set of corresponding Dagster events.
306
+
307
+ In a Dagster asset definition, the following are yielded:
308
+ - Output for refables (e.g. models, seeds, snapshots.)
309
+ - AssetCheckResult for dbt test results that are enabled as asset checks.
310
+ - AssetObservation for dbt test results that are not enabled as asset checks.
311
+
312
+ In a Dagster op definition, the following are yielded:
313
+ - AssetMaterialization refables (e.g. models, seeds, snapshots.)
314
+ - AssetCheckEvaluation for dbt test results that are enabled as asset checks.
315
+ - AssetObservation for dbt test results that are not enabled as asset checks.
316
+
317
+ Examples:
318
+ .. code-block:: python
319
+
320
+ from pathlib import Path
321
+ from dagster_dbt import DbtCliResource, dbt_assets
322
+
323
+ @dbt_assets(manifest=Path("target", "manifest.json"))
324
+ def my_dbt_assets(context, dbt: DbtCliResource):
325
+ yield from dbt.cli(["run"], context=context).stream()
326
+ """
327
+ return DbtEventIterator(
328
+ self._stream_asset_events(),
329
+ self,
330
+ )
331
+
332
+ @public
333
+ def stream_raw_events(self) -> Iterator[DbtCliEventMessage]:
334
+ """Stream the events from the dbt CLI process.
335
+
336
+ Returns:
337
+ Iterator[DbtCliEventMessage]: An iterator of events from the dbt CLI process.
338
+ """
339
+ event_history_metadata_by_unique_id: dict[str, dict[str, Any]] = {}
340
+
341
+ for raw_event in self._stdout or self._stream_stdout():
342
+ if isinstance(raw_event, str):
343
+ # If we can't parse the event, then just emit it as a raw log.
344
+ sys.stdout.write(raw_event + "\n")
345
+ sys.stdout.flush()
346
+ continue
347
+
348
+ unique_id: Optional[str] = raw_event["data"].get("node_info", {}).get("unique_id")
349
+
350
+ if self.cli_version.major < 2:
351
+ event = DbtCoreCliEventMessage(raw_event=raw_event, event_history_metadata={})
352
+ else:
353
+ event = DbtFusionCliEventMessage(raw_event=raw_event, event_history_metadata={})
354
+
355
+ if unique_id and event.is_result_event:
356
+ event_history_metadata = copy.deepcopy(
357
+ event_history_metadata_by_unique_id.get(unique_id, {})
358
+ )
359
+ event = replace(event, event_history_metadata=event_history_metadata)
360
+
361
+ # Attempt to parse the column level metadata from the event message.
362
+ # If it exists, save it as historical metadata to attach to the NodeFinished event.
363
+ if event.raw_event["info"]["name"] == "JinjaLogInfo":
364
+ with contextlib.suppress(orjson.JSONDecodeError):
365
+ column_level_metadata = orjson.loads(event.raw_event["info"]["msg"])
366
+
367
+ event_history_metadata_by_unique_id[cast("str", unique_id)] = (
368
+ column_level_metadata
369
+ )
370
+
371
+ # Don't show this message in stdout
372
+ continue
373
+
374
+ # Re-emit the logs from dbt CLI process into stdout.
375
+ sys.stdout.write(str(event) + "\n")
376
+ sys.stdout.flush()
377
+
378
+ yield event
379
+
380
+ # Ensure that the dbt CLI process has completed.
381
+ self._raise_on_error()
382
+
383
+ @public
384
+ def get_artifact(
385
+ self,
386
+ artifact: Union[
387
+ Literal["manifest.json"],
388
+ Literal["catalog.json"],
389
+ Literal["run_results.json"],
390
+ Literal["sources.json"],
391
+ ],
392
+ ) -> dict[str, Any]:
393
+ """Retrieve a dbt artifact from the target path.
394
+
395
+ See https://docs.getdbt.com/reference/artifacts/dbt-artifacts for more information.
396
+
397
+ Args:
398
+ artifact (Union[Literal["manifest.json"], Literal["catalog.json"], Literal["run_results.json"], Literal["sources.json"]]): The name of the artifact to retrieve.
399
+
400
+ Returns:
401
+ Dict[str, Any]: The artifact as a dictionary.
402
+
403
+ Examples:
404
+ .. code-block:: python
405
+
406
+ from dagster_dbt import DbtCliResource
407
+
408
+ dbt = DbtCliResource(project_dir="/path/to/dbt/project")
409
+
410
+ dbt_cli_invocation = dbt.cli(["run"]).wait()
411
+
412
+ # Retrieve the run_results.json artifact.
413
+ run_results = dbt_cli_invocation.get_artifact("run_results.json")
414
+ """
415
+ artifact_path = self.target_path.joinpath(artifact)
416
+
417
+ return orjson.loads(artifact_path.read_bytes())
418
+
419
+ @property
420
+ def dbt_command(self) -> str:
421
+ """The dbt CLI command that was invoked."""
422
+ return " ".join(cast("Sequence[str]", self.process.args))
423
+
424
+ def _stream_stdout(self) -> Iterator[Union[str, dict[str, Any]]]:
425
+ """Stream the stdout from the dbt CLI process."""
426
+ try:
427
+ if not self.process.stdout or self.process.stdout.closed:
428
+ return
429
+
430
+ with self.process.stdout:
431
+ for raw_line in self.process.stdout or []:
432
+ raw_event_str = raw_line.decode().strip()
433
+
434
+ try:
435
+ raw_event = orjson.loads(raw_event_str)
436
+
437
+ # Parse the error message from the event, if it exists.
438
+ is_error_message = raw_event["info"]["level"] == "error"
439
+ if is_error_message:
440
+ self._error_messages.append(raw_event["info"]["msg"])
441
+
442
+ yield raw_event
443
+ except:
444
+ yield raw_event_str
445
+
446
+ except DagsterExecutionInterruptedError:
447
+ logger.info(f"Forwarding interrupt signal to dbt command: `{self.dbt_command}`.")
448
+ self.process.send_signal(signal.SIGINT)
449
+ self.process.wait(timeout=self.termination_timeout_seconds)
450
+ logger.info(f"dbt process terminated with exit code `{self.process.returncode}`.")
451
+
452
+ raise
453
+
454
+ def _format_error_messages(self) -> str:
455
+ """Format the error messages from the dbt CLI process."""
456
+ if not self._error_messages:
457
+ return ""
458
+
459
+ return "\n\n".join(
460
+ [
461
+ "",
462
+ "Errors parsed from dbt logs:",
463
+ *self._error_messages,
464
+ ]
465
+ )
466
+
467
+ def _raise_on_error(self) -> None:
468
+ """Ensure that the dbt CLI process has completed. If the process has not successfully
469
+ completed, then optionally raise an error.
470
+ """
471
+ logger.info(f"Finished dbt command: `{self.dbt_command}`.")
472
+ error = self.get_error()
473
+ if error and self.raise_on_error:
474
+ raise error