dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. dagster_dbt/__init__.py +41 -140
  2. dagster_dbt/asset_decorator.py +49 -230
  3. dagster_dbt/asset_specs.py +65 -0
  4. dagster_dbt/asset_utils.py +655 -338
  5. dagster_dbt/cli/app.py +44 -43
  6. dagster_dbt/cloud/__init__.py +6 -4
  7. dagster_dbt/cloud/asset_defs.py +119 -177
  8. dagster_dbt/cloud/cli.py +3 -4
  9. dagster_dbt/cloud/ops.py +9 -6
  10. dagster_dbt/cloud/resources.py +9 -4
  11. dagster_dbt/cloud/types.py +12 -7
  12. dagster_dbt/cloud/utils.py +186 -0
  13. dagster_dbt/cloud_v2/__init__.py +10 -0
  14. dagster_dbt/cloud_v2/asset_decorator.py +81 -0
  15. dagster_dbt/cloud_v2/cli_invocation.py +67 -0
  16. dagster_dbt/cloud_v2/client.py +438 -0
  17. dagster_dbt/cloud_v2/resources.py +462 -0
  18. dagster_dbt/cloud_v2/run_handler.py +229 -0
  19. dagster_dbt/cloud_v2/sensor_builder.py +254 -0
  20. dagster_dbt/cloud_v2/types.py +143 -0
  21. dagster_dbt/compat.py +107 -0
  22. dagster_dbt/components/__init__.py +0 -0
  23. dagster_dbt/components/dbt_project/__init__.py +0 -0
  24. dagster_dbt/components/dbt_project/component.py +545 -0
  25. dagster_dbt/components/dbt_project/scaffolder.py +65 -0
  26. dagster_dbt/core/__init__.py +0 -10
  27. dagster_dbt/core/dbt_cli_event.py +612 -0
  28. dagster_dbt/core/dbt_cli_invocation.py +474 -0
  29. dagster_dbt/core/dbt_event_iterator.py +399 -0
  30. dagster_dbt/core/resource.py +733 -0
  31. dagster_dbt/core/utils.py +14 -279
  32. dagster_dbt/dagster_dbt_translator.py +317 -74
  33. dagster_dbt/dbt_core_version.py +1 -0
  34. dagster_dbt/dbt_manifest.py +6 -5
  35. dagster_dbt/dbt_manifest_asset_selection.py +62 -22
  36. dagster_dbt/dbt_project.py +179 -40
  37. dagster_dbt/dbt_project_manager.py +173 -0
  38. dagster_dbt/dbt_version.py +0 -0
  39. dagster_dbt/errors.py +9 -84
  40. dagster_dbt/freshness_builder.py +147 -0
  41. dagster_dbt/include/pyproject.toml.jinja +21 -0
  42. dagster_dbt/include/scaffold/assets.py.jinja +1 -8
  43. dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
  44. dagster_dbt/include/scaffold/project.py.jinja +1 -0
  45. dagster_dbt/include/setup.py.jinja +2 -3
  46. dagster_dbt/metadata_set.py +18 -0
  47. dagster_dbt/utils.py +136 -234
  48. dagster_dbt/version.py +1 -1
  49. dagster_dbt-0.28.4.dist-info/METADATA +47 -0
  50. dagster_dbt-0.28.4.dist-info/RECORD +59 -0
  51. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
  52. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
  53. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
  54. dagster_dbt/asset_defs.py +0 -1049
  55. dagster_dbt/core/resources.py +0 -527
  56. dagster_dbt/core/resources_v2.py +0 -1542
  57. dagster_dbt/core/types.py +0 -63
  58. dagster_dbt/dbt_resource.py +0 -220
  59. dagster_dbt/include/scaffold/constants.py.jinja +0 -21
  60. dagster_dbt/ops.py +0 -134
  61. dagster_dbt/types.py +0 -22
  62. dagster_dbt-0.23.3.dist-info/METADATA +0 -31
  63. dagster_dbt-0.23.3.dist-info/RECORD +0 -43
  64. {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,21 @@
1
+ from collections.abc import Mapping
1
2
  from datetime import datetime
2
- from typing import Any, Mapping, Optional
3
+ from typing import Any, Optional
3
4
 
4
5
  import dagster._check as check
5
- from dateutil.parser import isoparse
6
+ from dagster._annotations import beta
7
+ from dagster._vendored.dateutil.parser import isoparse
6
8
 
7
- from ..types import DbtOutput
8
9
 
9
-
10
- class DbtCloudOutput(DbtOutput):
10
+ @beta
11
+ class DbtCloudOutput:
11
12
  """The results of executing a dbt Cloud job, along with additional metadata produced from the
12
13
  job run.
13
14
 
14
15
  Note that users should not construct instances of this class directly. This class is intended
15
16
  to be constructed from the JSON output of dbt Cloud commands.
16
17
 
17
- Attributes:
18
+ Args:
18
19
  run_details (Dict[str, Any]): The raw dictionary data representing the run details returned
19
20
  by the dbt Cloud API. For more info, see: https://docs.getdbt.com/dbt-cloud/api-v2#operation/getRunById
20
21
  result (Dict[str, Any]): Dictionary containing dbt-reported result information
@@ -32,7 +33,11 @@ class DbtCloudOutput(DbtOutput):
32
33
  result: Mapping[str, Any],
33
34
  ):
34
35
  self._run_details = check.mapping_param(run_details, "run_details", key_type=str)
35
- super().__init__(result)
36
+ self._result = check.mapping_param(result, "result", key_type=str)
37
+
38
+ @property
39
+ def result(self) -> Mapping[str, Any]:
40
+ return self._result
36
41
 
37
42
  @property
38
43
  def run_details(self) -> Mapping[str, Any]:
@@ -0,0 +1,186 @@
1
+ from collections.abc import Callable, Iterator, Mapping, Sequence
2
+ from typing import Any, Optional, Union
3
+
4
+ import dateutil
5
+ from dagster import (
6
+ AssetKey,
7
+ AssetMaterialization,
8
+ AssetObservation,
9
+ MetadataValue,
10
+ Output,
11
+ _check as check,
12
+ )
13
+ from dagster._core.definitions.metadata import RawMetadataValue
14
+
15
+ from dagster_dbt.cloud.types import DbtCloudOutput
16
+ from dagster_dbt.utils import ASSET_RESOURCE_TYPES, default_node_info_to_asset_key
17
+
18
+
19
+ def _resource_type(unique_id: str) -> str:
20
+ # returns the type of the node (e.g. model, test, snapshot)
21
+ return unique_id.split(".")[0]
22
+
23
+
24
+ def _node_result_to_metadata(node_result: Mapping[str, Any]) -> Mapping[str, RawMetadataValue]:
25
+ return {
26
+ "Materialization Strategy": node_result["config"]["materialized"],
27
+ "Database": node_result["database"],
28
+ "Schema": node_result["schema"],
29
+ "Alias": node_result["alias"],
30
+ "Description": node_result["description"],
31
+ }
32
+
33
+
34
+ def _timing_to_metadata(timings: Sequence[Mapping[str, Any]]) -> Mapping[str, RawMetadataValue]:
35
+ metadata: dict[str, RawMetadataValue] = {}
36
+ for timing in timings:
37
+ if timing["name"] == "execute":
38
+ desc = "Execution"
39
+ elif timing["name"] == "compile":
40
+ desc = "Compilation"
41
+ else:
42
+ continue
43
+
44
+ # dateutil does not properly expose its modules to static checkers
45
+ started_at = dateutil.parser.isoparse(timing["started_at"])
46
+ completed_at = dateutil.parser.isoparse(timing["completed_at"])
47
+ duration = completed_at - started_at
48
+ metadata.update(
49
+ {
50
+ f"{desc} Started At": started_at.isoformat(timespec="seconds"),
51
+ f"{desc} Completed At": started_at.isoformat(timespec="seconds"),
52
+ f"{desc} Duration": duration.total_seconds(),
53
+ }
54
+ )
55
+ return metadata
56
+
57
+
58
+ def result_to_events(
59
+ result: Mapping[str, Any],
60
+ docs_url: Optional[str] = None,
61
+ node_info_to_asset_key: Optional[Callable[[Mapping[str, Any]], AssetKey]] = None,
62
+ manifest_json: Optional[Mapping[str, Any]] = None,
63
+ extra_metadata: Optional[Mapping[str, RawMetadataValue]] = None,
64
+ generate_asset_outputs: bool = False,
65
+ ) -> Iterator[Union[AssetMaterialization, AssetObservation, Output]]:
66
+ """This is a hacky solution that attempts to consolidate parsing many of the potential formats
67
+ that dbt can provide its results in. This is known to work for CLI Outputs for dbt versions 0.18+,
68
+ as well as RPC responses for a similar time period, but as the RPC response schema is not documented
69
+ nor enforced, this can become out of date easily.
70
+ """
71
+ node_info_to_asset_key = check.opt_callable_param(
72
+ node_info_to_asset_key, "node_info_to_asset_key", default=default_node_info_to_asset_key
73
+ )
74
+
75
+ # status comes from set of fields rather than "status"
76
+ if "fail" in result:
77
+ status = (
78
+ "fail"
79
+ if result.get("fail")
80
+ else "skip"
81
+ if result.get("skip")
82
+ else "error"
83
+ if result.get("error")
84
+ else "success"
85
+ )
86
+ else:
87
+ status = result["status"]
88
+
89
+ # all versions represent timing the same way
90
+ metadata = {"Status": status, "Execution Time (seconds)": result["execution_time"]}
91
+ metadata.update(_timing_to_metadata(result["timing"]))
92
+
93
+ # working with a response that contains the node block (RPC and CLI 0.18.x)
94
+ if "node" in result:
95
+ unique_id = result["node"]["unique_id"]
96
+ metadata.update(_node_result_to_metadata(result["node"]))
97
+ else:
98
+ unique_id = result["unique_id"]
99
+
100
+ if docs_url:
101
+ metadata["docs_url"] = MetadataValue.url(f"{docs_url}#!/model/{unique_id}")
102
+
103
+ if extra_metadata:
104
+ metadata.update(extra_metadata)
105
+
106
+ # if you have a manifest available, get the full node info, otherwise just populate unique_id
107
+ dbt_resource_props = (
108
+ manifest_json["nodes"][unique_id] if manifest_json else {"unique_id": unique_id}
109
+ )
110
+
111
+ node_resource_type = _resource_type(unique_id)
112
+
113
+ if node_resource_type in ASSET_RESOURCE_TYPES and status == "success":
114
+ if generate_asset_outputs:
115
+ yield Output(
116
+ value=None,
117
+ output_name=node_info_to_asset_key(dbt_resource_props).to_python_identifier(),
118
+ metadata=metadata,
119
+ )
120
+ else:
121
+ yield AssetMaterialization(
122
+ asset_key=node_info_to_asset_key(dbt_resource_props),
123
+ description=f"dbt node: {unique_id}",
124
+ metadata=metadata,
125
+ )
126
+ # can only associate tests with assets if we have manifest_json available
127
+ elif node_resource_type == "test" and manifest_json and status != "skipped":
128
+ upstream_unique_ids = manifest_json["nodes"][unique_id]["depends_on"]["nodes"]
129
+ # tests can apply to multiple asset keys
130
+ for upstream_id in upstream_unique_ids:
131
+ # the upstream id can reference a node or a source
132
+ dbt_resource_props = manifest_json["nodes"].get(upstream_id) or manifest_json[
133
+ "sources"
134
+ ].get(upstream_id)
135
+ if dbt_resource_props is None:
136
+ continue
137
+ upstream_asset_key = node_info_to_asset_key(dbt_resource_props)
138
+ yield AssetObservation(
139
+ asset_key=upstream_asset_key,
140
+ metadata={
141
+ "Test ID": result["unique_id"],
142
+ "Test Status": status,
143
+ "Test Message": result.get("message") or "",
144
+ },
145
+ )
146
+
147
+
148
+ def generate_events(
149
+ dbt_output: DbtCloudOutput,
150
+ node_info_to_asset_key: Optional[Callable[[Mapping[str, Any]], AssetKey]] = None,
151
+ manifest_json: Optional[Mapping[str, Any]] = None,
152
+ ) -> Iterator[Union[AssetMaterialization, AssetObservation]]:
153
+ """This function yields :py:class:`dagster.AssetMaterialization` events for each model updated by
154
+ a dbt command, and :py:class:`dagster.AssetObservation` events for each test run.
155
+
156
+ Information parsed from a :py:class:`~DbtOutput` object.
157
+ """
158
+ for result in dbt_output.result["results"]:
159
+ for event in result_to_events(
160
+ result,
161
+ docs_url=dbt_output.docs_url,
162
+ node_info_to_asset_key=node_info_to_asset_key,
163
+ manifest_json=manifest_json,
164
+ ):
165
+ yield check.inst(
166
+ event,
167
+ (AssetMaterialization, AssetObservation),
168
+ )
169
+
170
+
171
+ def generate_materializations(
172
+ dbt_output: DbtCloudOutput,
173
+ asset_key_prefix: Optional[Sequence[str]] = None,
174
+ ) -> Iterator[AssetMaterialization]:
175
+ """This function yields :py:class:`dagster.AssetMaterialization` events for each model updated by
176
+ a dbt command.
177
+ """
178
+ asset_key_prefix = check.opt_sequence_param(asset_key_prefix, "asset_key_prefix", of_type=str)
179
+
180
+ for event in generate_events(
181
+ dbt_output,
182
+ node_info_to_asset_key=lambda info: AssetKey(
183
+ asset_key_prefix + info["unique_id"].split(".")
184
+ ),
185
+ ):
186
+ yield check.inst(event, AssetMaterialization)
@@ -0,0 +1,10 @@
1
+ from dagster_dbt.cloud_v2.asset_decorator import dbt_cloud_assets as dbt_cloud_assets
2
+ from dagster_dbt.cloud_v2.resources import (
3
+ DbtCloudCredentials as DbtCloudCredentials,
4
+ DbtCloudWorkspace as DbtCloudWorkspace,
5
+ load_dbt_cloud_asset_specs as load_dbt_cloud_asset_specs,
6
+ load_dbt_cloud_check_specs as load_dbt_cloud_check_specs,
7
+ )
8
+ from dagster_dbt.cloud_v2.sensor_builder import (
9
+ build_dbt_cloud_polling_sensor as build_dbt_cloud_polling_sensor,
10
+ )
@@ -0,0 +1,81 @@
1
+ from collections.abc import Callable
2
+ from typing import Any, Optional
3
+
4
+ from dagster import AssetsDefinition, multi_asset
5
+ from dagster._annotations import public
6
+ from dagster._core.errors import DagsterInvariantViolationError
7
+
8
+ from dagster_dbt.asset_utils import (
9
+ DAGSTER_DBT_EXCLUDE_METADATA_KEY,
10
+ DAGSTER_DBT_SELECT_METADATA_KEY,
11
+ DAGSTER_DBT_SELECTOR_METADATA_KEY,
12
+ DBT_DEFAULT_EXCLUDE,
13
+ DBT_DEFAULT_SELECT,
14
+ DBT_DEFAULT_SELECTOR,
15
+ )
16
+ from dagster_dbt.cloud_v2.resources import DbtCloudWorkspace
17
+ from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator
18
+
19
+
20
+ @public
21
+ def dbt_cloud_assets(
22
+ *,
23
+ workspace: DbtCloudWorkspace,
24
+ select: str = DBT_DEFAULT_SELECT,
25
+ exclude: str = DBT_DEFAULT_EXCLUDE,
26
+ selector: str = DBT_DEFAULT_SELECTOR,
27
+ name: Optional[str] = None,
28
+ group_name: Optional[str] = None,
29
+ dagster_dbt_translator: Optional[DagsterDbtTranslator] = None,
30
+ ) -> Callable[[Callable[..., Any]], AssetsDefinition]:
31
+ """Create a definition for how to compute a set of dbt Cloud resources,
32
+ described by a manifest.json for a given dbt Cloud workspace.
33
+
34
+ Args:
35
+ workspace (DbtCloudWorkspace): The dbt Cloud workspace.
36
+ select (str): A dbt selection string for the models in a project that you want
37
+ to include. Defaults to ``fqn:*``.
38
+ exclude (str): A dbt selection string for the models in a project that you want
39
+ to exclude. Defaults to "".
40
+ selector (str): A dbt selector to select resources to materialize. Defaults to "".
41
+ name (Optional[str], optional): The name of the op.
42
+ group_name (Optional[str], optional): The name of the asset group.
43
+ dagster_dbt_translator (Optional[DagsterDbtTranslator], optional): The translator to use
44
+ to convert dbt Cloud content into :py:class:`dagster.AssetSpec`.
45
+ Defaults to :py:class:`DagsterDbtTranslator`.
46
+ """
47
+ dagster_dbt_translator = dagster_dbt_translator or DagsterDbtTranslator()
48
+
49
+ op_tags = {
50
+ DAGSTER_DBT_SELECT_METADATA_KEY: select,
51
+ DAGSTER_DBT_EXCLUDE_METADATA_KEY: exclude,
52
+ DAGSTER_DBT_SELECTOR_METADATA_KEY: selector,
53
+ }
54
+
55
+ specs = workspace.load_asset_specs(
56
+ dagster_dbt_translator=dagster_dbt_translator,
57
+ select=select,
58
+ exclude=exclude,
59
+ selector=selector,
60
+ )
61
+
62
+ if any([spec for spec in specs if spec.group_name]) and group_name:
63
+ raise DagsterInvariantViolationError(
64
+ f"Cannot set group_name parameter on dbt_cloud_assets for dbt Cloud workspace with account "
65
+ f"{workspace.account_name}, project {workspace.project_name} and environment {workspace.environment_name} -"
66
+ f" one or more of the dbt Cloud asset specs have a group_name defined."
67
+ )
68
+
69
+ return multi_asset(
70
+ name=name,
71
+ group_name=group_name,
72
+ can_subset=True,
73
+ specs=specs,
74
+ op_tags=op_tags,
75
+ check_specs=workspace.load_check_specs(
76
+ dagster_dbt_translator=dagster_dbt_translator,
77
+ select=select,
78
+ exclude=exclude,
79
+ selector=selector,
80
+ ),
81
+ )
@@ -0,0 +1,67 @@
1
+ from collections.abc import Iterator, Mapping, Sequence
2
+ from typing import Any, Optional, Union
3
+
4
+ from dagster import (
5
+ AssetCheckEvaluation,
6
+ AssetCheckResult,
7
+ AssetExecutionContext,
8
+ AssetMaterialization,
9
+ Output,
10
+ )
11
+ from dagster._record import record
12
+
13
+ from dagster_dbt.cloud_v2.client import DbtCloudWorkspaceClient
14
+ from dagster_dbt.cloud_v2.run_handler import DbtCloudJobRunHandler, DbtCloudJobRunResults
15
+ from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator
16
+
17
+
18
+ @record
19
+ class DbtCloudCliInvocation:
20
+ """Represents a dbt Cloud cli invocation."""
21
+
22
+ args: Sequence[str]
23
+ client: DbtCloudWorkspaceClient
24
+ manifest: Mapping[str, Any]
25
+ dagster_dbt_translator: DagsterDbtTranslator
26
+ run_handler: DbtCloudJobRunHandler
27
+ context: Optional[AssetExecutionContext]
28
+
29
+ @classmethod
30
+ def run(
31
+ cls,
32
+ job_id: int,
33
+ args: Sequence[str],
34
+ client: DbtCloudWorkspaceClient,
35
+ manifest: Mapping[str, Any],
36
+ dagster_dbt_translator: DagsterDbtTranslator,
37
+ context: Optional[AssetExecutionContext] = None,
38
+ ) -> "DbtCloudCliInvocation":
39
+ run_handler = DbtCloudJobRunHandler.run(
40
+ job_id=job_id,
41
+ args=args,
42
+ client=client,
43
+ )
44
+ return DbtCloudCliInvocation(
45
+ args=args,
46
+ client=client,
47
+ manifest=manifest,
48
+ dagster_dbt_translator=dagster_dbt_translator,
49
+ run_handler=run_handler,
50
+ context=context,
51
+ )
52
+
53
+ def wait(
54
+ self, timeout: Optional[float] = None
55
+ ) -> Iterator[Union[AssetCheckEvaluation, AssetCheckResult, AssetMaterialization, Output]]:
56
+ run = self.run_handler.wait(timeout=timeout)
57
+ if "run_results.json" in self.run_handler.list_run_artifacts():
58
+ run_results = DbtCloudJobRunResults.from_run_results_json(
59
+ run_results_json=self.run_handler.get_run_results()
60
+ )
61
+ yield from run_results.to_default_asset_events(
62
+ client=self.client,
63
+ manifest=self.manifest,
64
+ dagster_dbt_translator=self.dagster_dbt_translator,
65
+ context=self.context,
66
+ )
67
+ run.raise_for_status()