dagster-dbt 0.23.3__py3-none-any.whl → 0.28.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagster_dbt/__init__.py +41 -140
- dagster_dbt/asset_decorator.py +49 -230
- dagster_dbt/asset_specs.py +65 -0
- dagster_dbt/asset_utils.py +655 -338
- dagster_dbt/cli/app.py +44 -43
- dagster_dbt/cloud/__init__.py +6 -4
- dagster_dbt/cloud/asset_defs.py +119 -177
- dagster_dbt/cloud/cli.py +3 -4
- dagster_dbt/cloud/ops.py +9 -6
- dagster_dbt/cloud/resources.py +9 -4
- dagster_dbt/cloud/types.py +12 -7
- dagster_dbt/cloud/utils.py +186 -0
- dagster_dbt/cloud_v2/__init__.py +10 -0
- dagster_dbt/cloud_v2/asset_decorator.py +81 -0
- dagster_dbt/cloud_v2/cli_invocation.py +67 -0
- dagster_dbt/cloud_v2/client.py +438 -0
- dagster_dbt/cloud_v2/resources.py +462 -0
- dagster_dbt/cloud_v2/run_handler.py +229 -0
- dagster_dbt/cloud_v2/sensor_builder.py +254 -0
- dagster_dbt/cloud_v2/types.py +143 -0
- dagster_dbt/compat.py +107 -0
- dagster_dbt/components/__init__.py +0 -0
- dagster_dbt/components/dbt_project/__init__.py +0 -0
- dagster_dbt/components/dbt_project/component.py +545 -0
- dagster_dbt/components/dbt_project/scaffolder.py +65 -0
- dagster_dbt/core/__init__.py +0 -10
- dagster_dbt/core/dbt_cli_event.py +612 -0
- dagster_dbt/core/dbt_cli_invocation.py +474 -0
- dagster_dbt/core/dbt_event_iterator.py +399 -0
- dagster_dbt/core/resource.py +733 -0
- dagster_dbt/core/utils.py +14 -279
- dagster_dbt/dagster_dbt_translator.py +317 -74
- dagster_dbt/dbt_core_version.py +1 -0
- dagster_dbt/dbt_manifest.py +6 -5
- dagster_dbt/dbt_manifest_asset_selection.py +62 -22
- dagster_dbt/dbt_project.py +179 -40
- dagster_dbt/dbt_project_manager.py +173 -0
- dagster_dbt/dbt_version.py +0 -0
- dagster_dbt/errors.py +9 -84
- dagster_dbt/freshness_builder.py +147 -0
- dagster_dbt/include/pyproject.toml.jinja +21 -0
- dagster_dbt/include/scaffold/assets.py.jinja +1 -8
- dagster_dbt/include/scaffold/definitions.py.jinja +0 -15
- dagster_dbt/include/scaffold/project.py.jinja +1 -0
- dagster_dbt/include/setup.py.jinja +2 -3
- dagster_dbt/metadata_set.py +18 -0
- dagster_dbt/utils.py +136 -234
- dagster_dbt/version.py +1 -1
- dagster_dbt-0.28.4.dist-info/METADATA +47 -0
- dagster_dbt-0.28.4.dist-info/RECORD +59 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/WHEEL +1 -1
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/entry_points.txt +3 -0
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info/licenses}/LICENSE +1 -1
- dagster_dbt/asset_defs.py +0 -1049
- dagster_dbt/core/resources.py +0 -527
- dagster_dbt/core/resources_v2.py +0 -1542
- dagster_dbt/core/types.py +0 -63
- dagster_dbt/dbt_resource.py +0 -220
- dagster_dbt/include/scaffold/constants.py.jinja +0 -21
- dagster_dbt/ops.py +0 -134
- dagster_dbt/types.py +0 -22
- dagster_dbt-0.23.3.dist-info/METADATA +0 -31
- dagster_dbt-0.23.3.dist-info/RECORD +0 -43
- {dagster_dbt-0.23.3.dist-info → dagster_dbt-0.28.4.dist-info}/top_level.txt +0 -0
dagster_dbt/cloud/asset_defs.py
CHANGED
|
@@ -1,58 +1,45 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import shlex
|
|
3
|
-
from argparse import Namespace
|
|
3
|
+
from argparse import ArgumentParser, Namespace
|
|
4
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
4
5
|
from contextlib import suppress
|
|
5
|
-
from typing import
|
|
6
|
-
Any,
|
|
7
|
-
Callable,
|
|
8
|
-
Dict,
|
|
9
|
-
FrozenSet,
|
|
10
|
-
List,
|
|
11
|
-
Mapping,
|
|
12
|
-
Optional,
|
|
13
|
-
Sequence,
|
|
14
|
-
Set,
|
|
15
|
-
Tuple,
|
|
16
|
-
Union,
|
|
17
|
-
cast,
|
|
18
|
-
)
|
|
6
|
+
from typing import Any, Optional, Union, cast
|
|
19
7
|
|
|
20
8
|
import dagster._check as check
|
|
21
9
|
from dagster import (
|
|
22
10
|
AssetExecutionContext,
|
|
23
11
|
AssetKey,
|
|
24
|
-
AssetOut,
|
|
25
12
|
AssetsDefinition,
|
|
26
13
|
AutoMaterializePolicy,
|
|
27
|
-
|
|
14
|
+
LegacyFreshnessPolicy,
|
|
28
15
|
MetadataValue,
|
|
29
16
|
PartitionsDefinition,
|
|
30
17
|
ResourceDefinition,
|
|
31
18
|
multi_asset,
|
|
32
19
|
with_resources,
|
|
33
20
|
)
|
|
34
|
-
from dagster._annotations import
|
|
35
|
-
from dagster._core.definitions.
|
|
21
|
+
from dagster._annotations import beta, beta_param
|
|
22
|
+
from dagster._core.definitions.assets.definition.asset_spec import AssetSpec
|
|
23
|
+
from dagster._core.definitions.assets.definition.cacheable_assets_definition import (
|
|
36
24
|
AssetsDefinitionCacheableData,
|
|
37
25
|
CacheableAssetsDefinition,
|
|
38
26
|
)
|
|
39
27
|
from dagster._core.definitions.metadata import RawMetadataMapping
|
|
40
28
|
from dagster._core.execution.context.init import build_init_resource_context
|
|
41
29
|
|
|
30
|
+
from dagster_dbt.asset_specs import build_dbt_asset_specs
|
|
42
31
|
from dagster_dbt.asset_utils import (
|
|
32
|
+
DAGSTER_DBT_UNIQUE_ID_METADATA_KEY,
|
|
43
33
|
default_asset_key_fn,
|
|
44
34
|
default_auto_materialize_policy_fn,
|
|
45
35
|
default_description_fn,
|
|
46
|
-
default_freshness_policy_fn,
|
|
47
36
|
default_group_from_dbt_resource_props,
|
|
48
|
-
|
|
49
|
-
get_deps,
|
|
37
|
+
get_node,
|
|
50
38
|
)
|
|
39
|
+
from dagster_dbt.cloud.resources import DbtCloudClient, DbtCloudClientResource, DbtCloudRunStatus
|
|
40
|
+
from dagster_dbt.cloud.utils import result_to_events
|
|
51
41
|
from dagster_dbt.dagster_dbt_translator import DagsterDbtTranslator
|
|
52
|
-
|
|
53
|
-
from ..errors import DagsterDbtCloudJobInvariantViolationError
|
|
54
|
-
from ..utils import ASSET_RESOURCE_TYPES, result_to_events
|
|
55
|
-
from .resources import DbtCloudClient, DbtCloudClientResource, DbtCloudRunStatus
|
|
42
|
+
from dagster_dbt.errors import DagsterDbtCloudJobInvariantViolationError
|
|
56
43
|
|
|
57
44
|
DAGSTER_DBT_COMPILE_RUN_ID_ENV_VAR = "DBT_DAGSTER_COMPILE_RUN_ID"
|
|
58
45
|
|
|
@@ -64,7 +51,9 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
64
51
|
job_id: int,
|
|
65
52
|
node_info_to_asset_key: Callable[[Mapping[str, Any]], AssetKey],
|
|
66
53
|
node_info_to_group_fn: Callable[[Mapping[str, Any]], Optional[str]],
|
|
67
|
-
node_info_to_freshness_policy_fn: Callable[
|
|
54
|
+
node_info_to_freshness_policy_fn: Callable[
|
|
55
|
+
[Mapping[str, Any]], Optional[LegacyFreshnessPolicy]
|
|
56
|
+
],
|
|
68
57
|
node_info_to_auto_materialize_policy_fn: Callable[
|
|
69
58
|
[Mapping[str, Any]], Optional[AutoMaterializePolicy]
|
|
70
59
|
],
|
|
@@ -85,7 +74,7 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
85
74
|
self._job_id = job_id
|
|
86
75
|
self._project_id: int
|
|
87
76
|
self._has_generate_docs: bool
|
|
88
|
-
self._job_commands:
|
|
77
|
+
self._job_commands: list[str]
|
|
89
78
|
self._job_materialization_command_step: int
|
|
90
79
|
self._node_info_to_asset_key = node_info_to_asset_key
|
|
91
80
|
self._node_info_to_group_fn = node_info_to_group_fn
|
|
@@ -97,8 +86,8 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
97
86
|
super().__init__(unique_id=f"dbt-cloud-{job_id}")
|
|
98
87
|
|
|
99
88
|
def compute_cacheable_data(self) -> Sequence[AssetsDefinitionCacheableData]:
|
|
100
|
-
|
|
101
|
-
return [self._build_dbt_cloud_assets_cacheable_data(
|
|
89
|
+
manifest_json, executed_unique_ids = self._get_manifest_json_and_executed_unique_ids()
|
|
90
|
+
return [self._build_dbt_cloud_assets_cacheable_data(manifest_json, executed_unique_ids)]
|
|
102
91
|
|
|
103
92
|
def build_definitions(
|
|
104
93
|
self, data: Sequence[AssetsDefinitionCacheableData]
|
|
@@ -113,10 +102,7 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
113
102
|
|
|
114
103
|
@staticmethod
|
|
115
104
|
def parse_dbt_command(dbt_command: str) -> Namespace:
|
|
116
|
-
from dbt.cli.flags import
|
|
117
|
-
Flags,
|
|
118
|
-
args_to_context,
|
|
119
|
-
)
|
|
105
|
+
from dbt.cli.flags import Flags, args_to_context
|
|
120
106
|
|
|
121
107
|
args = shlex.split(dbt_command)[1:]
|
|
122
108
|
|
|
@@ -124,7 +110,7 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
124
110
|
return Namespace(**vars(Flags(args_to_context(args + ["--profiles-dir", "."]))))
|
|
125
111
|
|
|
126
112
|
@staticmethod
|
|
127
|
-
def get_job_materialization_command_step(execute_steps:
|
|
113
|
+
def get_job_materialization_command_step(execute_steps: list[str]) -> int:
|
|
128
114
|
materialization_command_filter = [
|
|
129
115
|
DbtCloudCacheableAssetsDefinition.parse_dbt_command(command).which in ["run", "build"]
|
|
130
116
|
for command in execute_steps
|
|
@@ -139,8 +125,8 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
139
125
|
return materialization_command_filter.index(True)
|
|
140
126
|
|
|
141
127
|
@staticmethod
|
|
142
|
-
def get_compile_filters(parsed_args: Namespace) ->
|
|
143
|
-
dbt_compile_options:
|
|
128
|
+
def get_compile_filters(parsed_args: Namespace) -> list[str]:
|
|
129
|
+
dbt_compile_options: list[str] = []
|
|
144
130
|
|
|
145
131
|
selected_models = parsed_args.select or []
|
|
146
132
|
if selected_models:
|
|
@@ -158,7 +144,7 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
158
144
|
|
|
159
145
|
return dbt_compile_options
|
|
160
146
|
|
|
161
|
-
def _get_cached_compile_dbt_cloud_job_run(self, compile_run_id: int) ->
|
|
147
|
+
def _get_cached_compile_dbt_cloud_job_run(self, compile_run_id: int) -> tuple[int, int]:
|
|
162
148
|
# If the compile run is ongoing, allow it a grace period of 10 minutes to finish.
|
|
163
149
|
with suppress(Exception):
|
|
164
150
|
self._dbt_cloud.poll_run(run_id=compile_run_id, poll_timeout=600)
|
|
@@ -186,7 +172,7 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
186
172
|
|
|
187
173
|
return compile_run_id, compile_job_materialization_command_step
|
|
188
174
|
|
|
189
|
-
def _compile_dbt_cloud_job(self, dbt_cloud_job: Mapping[str, Any]) ->
|
|
175
|
+
def _compile_dbt_cloud_job(self, dbt_cloud_job: Mapping[str, Any]) -> tuple[int, int]:
|
|
190
176
|
# Retrieve the filters options from the dbt Cloud job's materialization command.
|
|
191
177
|
#
|
|
192
178
|
# There are three filters: `--select`, `--exclude`, and `--selector`.
|
|
@@ -246,9 +232,9 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
246
232
|
|
|
247
233
|
return compile_run_dbt_output.run_id, compile_job_materialization_command_step
|
|
248
234
|
|
|
249
|
-
def
|
|
235
|
+
def _get_manifest_json_and_executed_unique_ids(
|
|
250
236
|
self,
|
|
251
|
-
) ->
|
|
237
|
+
) -> tuple[Mapping[str, Any], frozenset[str]]:
|
|
252
238
|
"""For a given dbt Cloud job, fetch the latest run's dependency structure of executed nodes."""
|
|
253
239
|
# Fetch information about the job.
|
|
254
240
|
job = self._dbt_cloud.get_job(job_id=self._job_id)
|
|
@@ -300,12 +286,7 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
300
286
|
)
|
|
301
287
|
|
|
302
288
|
# Filter the manifest to only include the nodes that were executed.
|
|
303
|
-
|
|
304
|
-
**manifest_json.get("nodes", {}),
|
|
305
|
-
**manifest_json.get("sources", {}),
|
|
306
|
-
**manifest_json.get("metrics", {}),
|
|
307
|
-
}
|
|
308
|
-
executed_node_ids: Set[str] = set(
|
|
289
|
+
executed_node_ids: set[str] = set(
|
|
309
290
|
result["unique_id"] for result in run_results_json["results"]
|
|
310
291
|
)
|
|
311
292
|
|
|
@@ -319,17 +300,11 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
319
300
|
f"options applied. Received commands: {self._job_commands}."
|
|
320
301
|
)
|
|
321
302
|
|
|
322
|
-
#
|
|
323
|
-
|
|
324
|
-
dbt_nodes=dbt_nodes,
|
|
325
|
-
selected_unique_ids=executed_node_ids,
|
|
326
|
-
asset_resource_types=ASSET_RESOURCE_TYPES,
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
return dbt_nodes, dbt_dependencies
|
|
303
|
+
# sort to stabilize job snapshots
|
|
304
|
+
return manifest_json, frozenset(sorted(executed_node_ids))
|
|
330
305
|
|
|
331
306
|
def _build_dbt_cloud_assets_cacheable_data(
|
|
332
|
-
self,
|
|
307
|
+
self, manifest_json: Mapping[str, Any], executed_unique_ids: frozenset[str]
|
|
333
308
|
) -> AssetsDefinitionCacheableData:
|
|
334
309
|
"""Given all of the nodes and dependencies for a dbt Cloud job, build the cacheable
|
|
335
310
|
representation that generate the asset definition for the job.
|
|
@@ -337,88 +312,74 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
337
312
|
|
|
338
313
|
class CustomDagsterDbtTranslator(DagsterDbtTranslator):
|
|
339
314
|
@classmethod
|
|
340
|
-
def get_asset_key(cls, dbt_resource_props):
|
|
315
|
+
def get_asset_key(cls, dbt_resource_props): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
341
316
|
return self._node_info_to_asset_key(dbt_resource_props)
|
|
342
317
|
|
|
343
318
|
@classmethod
|
|
344
|
-
def get_description(cls, dbt_resource_props):
|
|
319
|
+
def get_description(cls, dbt_resource_props): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
345
320
|
# We shouldn't display the raw sql. Instead, inspect if dbt docs were generated,
|
|
346
321
|
# and attach metadata to link to the docs.
|
|
347
322
|
return default_description_fn(dbt_resource_props, display_raw_sql=False)
|
|
348
323
|
|
|
349
324
|
@classmethod
|
|
350
|
-
def get_group_name(cls, dbt_resource_props):
|
|
325
|
+
def get_group_name(cls, dbt_resource_props): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
351
326
|
return self._node_info_to_group_fn(dbt_resource_props)
|
|
352
327
|
|
|
353
328
|
@classmethod
|
|
354
|
-
def
|
|
355
|
-
return self._node_info_to_freshness_policy_fn(dbt_resource_props)
|
|
356
|
-
|
|
357
|
-
@classmethod
|
|
358
|
-
def get_auto_materialize_policy(cls, dbt_resource_props):
|
|
329
|
+
def get_auto_materialize_policy(cls, dbt_resource_props): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
359
330
|
return self._node_info_to_auto_materialize_policy_fn(dbt_resource_props)
|
|
360
331
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
asset_outs,
|
|
365
|
-
group_names_by_key,
|
|
366
|
-
freshness_policies_by_key,
|
|
367
|
-
auto_materialize_policies_by_key,
|
|
368
|
-
_,
|
|
369
|
-
fqns_by_output_name,
|
|
370
|
-
metadata_by_output_name,
|
|
371
|
-
) = get_asset_deps(
|
|
372
|
-
dbt_nodes=dbt_nodes,
|
|
373
|
-
deps=dbt_dependencies,
|
|
374
|
-
# TODO: In the future, allow the IO manager to be specified.
|
|
375
|
-
io_manager_key=None,
|
|
332
|
+
# generate specs for each executed node
|
|
333
|
+
specs = build_dbt_asset_specs(
|
|
334
|
+
manifest=manifest_json,
|
|
376
335
|
dagster_dbt_translator=CustomDagsterDbtTranslator(),
|
|
377
|
-
|
|
336
|
+
select=" ".join(
|
|
337
|
+
f"fqn:{'.'.join(get_node(manifest_json, unique_id)['fqn'])}"
|
|
338
|
+
for unique_id in executed_unique_ids
|
|
339
|
+
),
|
|
378
340
|
)
|
|
379
341
|
|
|
380
342
|
return AssetsDefinitionCacheableData(
|
|
381
343
|
# TODO: In the future, we should allow additional upstream assets to be specified.
|
|
382
|
-
|
|
383
|
-
input_name: asset_key for asset_key, (input_name, _) in asset_ins.items()
|
|
384
|
-
},
|
|
385
|
-
keys_by_output_name={
|
|
386
|
-
output_name: asset_key for asset_key, (output_name, _) in asset_outs.items()
|
|
387
|
-
},
|
|
344
|
+
keys_by_output_name={spec.key.to_python_identifier(): spec.key for spec in specs},
|
|
388
345
|
internal_asset_deps={
|
|
389
|
-
|
|
346
|
+
spec.key.to_python_identifier(): {dep.asset_key for dep in spec.deps}
|
|
347
|
+
for spec in specs
|
|
390
348
|
},
|
|
391
|
-
# We don't rely on a static group name. Instead, we map over the dbt metadata to
|
|
392
|
-
# determine the group name for each asset.
|
|
393
|
-
group_name=None,
|
|
394
349
|
metadata_by_output_name={
|
|
395
|
-
|
|
396
|
-
|
|
350
|
+
spec.key.to_python_identifier(): self._build_dbt_cloud_assets_metadata(
|
|
351
|
+
get_node(
|
|
352
|
+
manifest_json,
|
|
353
|
+
spec.metadata[DAGSTER_DBT_UNIQUE_ID_METADATA_KEY],
|
|
354
|
+
)
|
|
355
|
+
)
|
|
356
|
+
for spec in specs
|
|
397
357
|
},
|
|
398
|
-
# TODO: In the future, we should allow the key prefix to be specified.
|
|
399
|
-
key_prefix=None,
|
|
400
|
-
can_subset=True,
|
|
401
358
|
extra_metadata={
|
|
402
359
|
"job_id": self._job_id,
|
|
403
360
|
"job_commands": self._job_commands,
|
|
404
361
|
"job_materialization_command_step": self._job_materialization_command_step,
|
|
405
362
|
"group_names_by_output_name": {
|
|
406
|
-
|
|
407
|
-
|
|
363
|
+
spec.key.to_python_identifier(): spec.group_name for spec in specs
|
|
364
|
+
},
|
|
365
|
+
"fqns_by_output_name": {
|
|
366
|
+
spec.key.to_python_identifier(): get_node(
|
|
367
|
+
manifest_json,
|
|
368
|
+
spec.metadata[DAGSTER_DBT_UNIQUE_ID_METADATA_KEY],
|
|
369
|
+
)["fqn"]
|
|
370
|
+
for spec in specs
|
|
408
371
|
},
|
|
409
|
-
"fqns_by_output_name": fqns_by_output_name,
|
|
410
|
-
},
|
|
411
|
-
freshness_policies_by_output_name={
|
|
412
|
-
asset_outs[asset_key][0]: freshness_policy
|
|
413
|
-
for asset_key, freshness_policy in freshness_policies_by_key.items()
|
|
414
372
|
},
|
|
415
373
|
auto_materialize_policies_by_output_name={
|
|
416
|
-
|
|
417
|
-
for
|
|
374
|
+
spec.key.to_python_identifier(): spec.auto_materialize_policy
|
|
375
|
+
for spec in specs
|
|
376
|
+
if spec.auto_materialize_policy
|
|
418
377
|
},
|
|
419
378
|
)
|
|
420
379
|
|
|
421
|
-
def _build_dbt_cloud_assets_metadata(
|
|
380
|
+
def _build_dbt_cloud_assets_metadata(
|
|
381
|
+
self, resource_props: Mapping[str, Any]
|
|
382
|
+
) -> RawMetadataMapping:
|
|
422
383
|
metadata = {
|
|
423
384
|
"dbt Cloud Job": MetadataValue.url(
|
|
424
385
|
self._dbt_cloud.build_url_for_job(
|
|
@@ -432,66 +393,57 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
432
393
|
metadata["dbt Cloud Documentation"] = MetadataValue.url(
|
|
433
394
|
self._dbt_cloud.build_url_for_cloud_docs(
|
|
434
395
|
job_id=self._job_id,
|
|
435
|
-
resource_type=
|
|
436
|
-
unique_id=
|
|
396
|
+
resource_type=resource_props["resource_type"],
|
|
397
|
+
unique_id=resource_props["unique_id"],
|
|
437
398
|
)
|
|
438
399
|
)
|
|
439
400
|
|
|
440
401
|
return metadata
|
|
441
402
|
|
|
403
|
+
def _rebuild_specs(self, cacheable_data: AssetsDefinitionCacheableData) -> Sequence[AssetSpec]:
|
|
404
|
+
specs = []
|
|
405
|
+
for id, key in (cacheable_data.keys_by_output_name or {}).items():
|
|
406
|
+
specs.append(
|
|
407
|
+
AssetSpec(
|
|
408
|
+
key=key,
|
|
409
|
+
group_name=(cacheable_data.extra_metadata or {})[
|
|
410
|
+
"group_names_by_output_name"
|
|
411
|
+
].get(id),
|
|
412
|
+
deps=(cacheable_data.internal_asset_deps or {}).get(id),
|
|
413
|
+
metadata=(cacheable_data.metadata_by_output_name or {}).get(id),
|
|
414
|
+
legacy_freshness_policy=(
|
|
415
|
+
cacheable_data.legacy_freshness_policies_by_output_name or {}
|
|
416
|
+
).get(id),
|
|
417
|
+
auto_materialize_policy=(
|
|
418
|
+
cacheable_data.auto_materialize_policies_by_output_name or {}
|
|
419
|
+
).get(id),
|
|
420
|
+
skippable=False,
|
|
421
|
+
)
|
|
422
|
+
)
|
|
423
|
+
return specs
|
|
424
|
+
|
|
442
425
|
def _build_dbt_cloud_assets_from_cacheable_data(
|
|
443
426
|
self, assets_definition_cacheable_data: AssetsDefinitionCacheableData
|
|
444
427
|
) -> AssetsDefinition:
|
|
445
|
-
metadata = cast(Mapping[str, Any], assets_definition_cacheable_data.extra_metadata)
|
|
446
|
-
job_id = cast(int, metadata["job_id"])
|
|
447
|
-
job_commands = cast(
|
|
448
|
-
job_materialization_command_step = cast(int, metadata["job_materialization_command_step"])
|
|
449
|
-
|
|
450
|
-
fqns_by_output_name = cast(Mapping[str, List[str]], metadata["fqns_by_output_name"])
|
|
428
|
+
metadata = cast("Mapping[str, Any]", assets_definition_cacheable_data.extra_metadata)
|
|
429
|
+
job_id = cast("int", metadata["job_id"])
|
|
430
|
+
job_commands = cast("list[str]", list(metadata["job_commands"]))
|
|
431
|
+
job_materialization_command_step = cast("int", metadata["job_materialization_command_step"])
|
|
432
|
+
fqns_by_output_name = cast("Mapping[str, list[str]]", metadata["fqns_by_output_name"])
|
|
451
433
|
|
|
452
434
|
@multi_asset(
|
|
453
435
|
name=f"dbt_cloud_job_{job_id}",
|
|
454
|
-
|
|
455
|
-
outs={
|
|
456
|
-
output_name: AssetOut(
|
|
457
|
-
key=asset_key,
|
|
458
|
-
group_name=group_names_by_output_name.get(output_name),
|
|
459
|
-
freshness_policy=(
|
|
460
|
-
assets_definition_cacheable_data.freshness_policies_by_output_name or {}
|
|
461
|
-
).get(
|
|
462
|
-
output_name,
|
|
463
|
-
),
|
|
464
|
-
auto_materialize_policy=(
|
|
465
|
-
assets_definition_cacheable_data.auto_materialize_policies_by_output_name
|
|
466
|
-
or {}
|
|
467
|
-
).get(
|
|
468
|
-
output_name,
|
|
469
|
-
),
|
|
470
|
-
metadata=(assets_definition_cacheable_data.metadata_by_output_name or {}).get(
|
|
471
|
-
output_name
|
|
472
|
-
),
|
|
473
|
-
is_required=False,
|
|
474
|
-
)
|
|
475
|
-
for output_name, asset_key in (
|
|
476
|
-
assets_definition_cacheable_data.keys_by_output_name or {}
|
|
477
|
-
).items()
|
|
478
|
-
},
|
|
479
|
-
internal_asset_deps={
|
|
480
|
-
output_name: set(asset_deps)
|
|
481
|
-
for output_name, asset_deps in (
|
|
482
|
-
assets_definition_cacheable_data.internal_asset_deps or {}
|
|
483
|
-
).items()
|
|
484
|
-
},
|
|
436
|
+
specs=self._rebuild_specs(assets_definition_cacheable_data),
|
|
485
437
|
partitions_def=self._partitions_def,
|
|
486
|
-
can_subset=
|
|
438
|
+
can_subset=True,
|
|
487
439
|
required_resource_keys={"dbt_cloud"},
|
|
488
440
|
compute_kind="dbt",
|
|
489
441
|
)
|
|
490
442
|
def _assets(context: AssetExecutionContext):
|
|
491
|
-
dbt_cloud = cast(DbtCloudClient, context.resources.dbt_cloud)
|
|
443
|
+
dbt_cloud = cast("DbtCloudClient", context.resources.dbt_cloud)
|
|
492
444
|
|
|
493
445
|
# Add the partition variable as a variable to the dbt Cloud job command.
|
|
494
|
-
dbt_options:
|
|
446
|
+
dbt_options: list[str] = []
|
|
495
447
|
if context.has_partition_key and self._partition_key_to_vars_fn:
|
|
496
448
|
partition_var = self._partition_key_to_vars_fn(context.partition_key)
|
|
497
449
|
|
|
@@ -502,32 +454,32 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
502
454
|
|
|
503
455
|
# Map the selected outputs to dbt models that should be materialized.
|
|
504
456
|
#
|
|
505
|
-
#
|
|
506
|
-
#
|
|
457
|
+
# From version 1.5.0 dbt allows multiple select args to be used in command,
|
|
458
|
+
# so we cannot just add our arg as last one to be used and need to remove
|
|
459
|
+
# both command-native --select args and --selector arg to run dagster-generated
|
|
460
|
+
# subset of models
|
|
507
461
|
#
|
|
508
|
-
#
|
|
509
|
-
# `--select` options when displayed in the UI, but parsing the command line argument
|
|
510
|
-
# to remove the initial select using argparse.
|
|
462
|
+
# See https://docs.getdbt.com/reference/node-selection/syntax for details.
|
|
511
463
|
if context.is_subset:
|
|
512
464
|
selected_models = [
|
|
513
465
|
".".join(fqns_by_output_name[output_name])
|
|
514
466
|
for output_name in context.op_execution_context.selected_output_names
|
|
467
|
+
# outputs corresponding to asset checks from dbt tests won't be in this dict
|
|
468
|
+
if output_name in fqns_by_output_name
|
|
515
469
|
]
|
|
516
470
|
|
|
517
471
|
dbt_options.append(f"--select {' '.join(sorted(selected_models))}")
|
|
518
472
|
|
|
519
|
-
|
|
520
|
-
#
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
split_materialization_command = shlex.split(materialization_command)
|
|
524
|
-
if "--selector" in split_materialization_command:
|
|
525
|
-
idx = split_materialization_command.index("--selector")
|
|
473
|
+
parser = ArgumentParser(description="Parse selection args from dbt command")
|
|
474
|
+
# Select arg should have nargs="+", but we probably want dbt itself to deal with it
|
|
475
|
+
parser.add_argument("-s", "--select", nargs="*", action="append")
|
|
476
|
+
parser.add_argument("--selector", nargs="*")
|
|
526
477
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
478
|
+
split_materialization_command = shlex.split(materialization_command)
|
|
479
|
+
_, non_selection_command_parts = parser.parse_known_args(
|
|
480
|
+
split_materialization_command
|
|
481
|
+
)
|
|
482
|
+
materialization_command = " ".join(non_selection_command_parts)
|
|
531
483
|
|
|
532
484
|
job_commands[job_materialization_command_step] = (
|
|
533
485
|
f"{materialization_command} {' '.join(dbt_options)}".strip()
|
|
@@ -574,9 +526,9 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
574
526
|
return _assets
|
|
575
527
|
|
|
576
528
|
|
|
577
|
-
@
|
|
578
|
-
@
|
|
579
|
-
@
|
|
529
|
+
@beta
|
|
530
|
+
@beta_param(param="partitions_def")
|
|
531
|
+
@beta_param(param="partition_key_to_vars_fn")
|
|
580
532
|
def load_assets_from_dbt_cloud_job(
|
|
581
533
|
dbt_cloud: Union[DbtCloudClientResource, ResourceDefinition],
|
|
582
534
|
job_id: int,
|
|
@@ -584,9 +536,6 @@ def load_assets_from_dbt_cloud_job(
|
|
|
584
536
|
node_info_to_group_fn: Callable[
|
|
585
537
|
[Mapping[str, Any]], Optional[str]
|
|
586
538
|
] = default_group_from_dbt_resource_props,
|
|
587
|
-
node_info_to_freshness_policy_fn: Callable[
|
|
588
|
-
[Mapping[str, Any]], Optional[FreshnessPolicy]
|
|
589
|
-
] = default_freshness_policy_fn,
|
|
590
539
|
node_info_to_auto_materialize_policy_fn: Callable[
|
|
591
540
|
[Mapping[str, Any]], Optional[AutoMaterializePolicy]
|
|
592
541
|
] = default_auto_materialize_policy_fn,
|
|
@@ -608,13 +557,6 @@ def load_assets_from_dbt_cloud_job(
|
|
|
608
557
|
dbt source -> AssetKey([source_name, table_name])
|
|
609
558
|
node_info_to_group_fn (Dict[str, Any] -> Optional[str]): A function that takes a
|
|
610
559
|
dictionary of dbt node info and returns the group that this node should be assigned to.
|
|
611
|
-
node_info_to_freshness_policy_fn (Dict[str, Any] -> Optional[FreshnessPolicy]): A function
|
|
612
|
-
that takes a dictionary of dbt node info and optionally returns a FreshnessPolicy that
|
|
613
|
-
should be applied to this node. By default, freshness policies will be created from
|
|
614
|
-
config applied to dbt models, i.e.:
|
|
615
|
-
`dagster_freshness_policy={"maximum_lag_minutes": 60, "cron_schedule": "0 9 * * *"}`
|
|
616
|
-
will result in that model being assigned
|
|
617
|
-
`FreshnessPolicy(maximum_lag_minutes=60, cron_schedule="0 9 * * *")`
|
|
618
560
|
node_info_to_auto_materialize_policy_fn (Dict[str, Any] -> Optional[AutoMaterializePolicy]):
|
|
619
561
|
A function that takes a dictionary of dbt node info and optionally returns a AutoMaterializePolicy
|
|
620
562
|
that should be applied to this node. By default, AutoMaterializePolicies will be created from
|
|
@@ -669,7 +611,7 @@ def load_assets_from_dbt_cloud_job(
|
|
|
669
611
|
job_id=job_id,
|
|
670
612
|
node_info_to_asset_key=node_info_to_asset_key,
|
|
671
613
|
node_info_to_group_fn=node_info_to_group_fn,
|
|
672
|
-
node_info_to_freshness_policy_fn=
|
|
614
|
+
node_info_to_freshness_policy_fn=lambda _: None,
|
|
673
615
|
node_info_to_auto_materialize_policy_fn=node_info_to_auto_materialize_policy_fn,
|
|
674
616
|
partitions_def=partitions_def,
|
|
675
617
|
partition_key_to_vars_fn=partition_key_to_vars_fn,
|
dagster_dbt/cloud/cli.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Annotated
|
|
2
2
|
|
|
3
3
|
import typer
|
|
4
|
-
from typing_extensions import Annotated
|
|
5
4
|
|
|
6
5
|
from dagster_dbt.cloud.asset_defs import (
|
|
7
6
|
DAGSTER_DBT_COMPILE_RUN_ID_ENV_VAR,
|
|
@@ -77,7 +76,7 @@ def cache_compile_references(
|
|
|
77
76
|
continue
|
|
78
77
|
|
|
79
78
|
# Retrieve the filters for the compile override step
|
|
80
|
-
job_commands:
|
|
79
|
+
job_commands: list[str] = dbt_cloud_job["execute_steps"]
|
|
81
80
|
job_materialization_command_step = (
|
|
82
81
|
DbtCloudCacheableAssetsDefinition.get_job_materialization_command_step(
|
|
83
82
|
execute_steps=job_commands
|
|
@@ -87,7 +86,7 @@ def cache_compile_references(
|
|
|
87
86
|
parsed_args = DbtCloudCacheableAssetsDefinition.parse_dbt_command(
|
|
88
87
|
dbt_materialization_command
|
|
89
88
|
)
|
|
90
|
-
dbt_compile_options:
|
|
89
|
+
dbt_compile_options: list[str] = DbtCloudCacheableAssetsDefinition.get_compile_filters(
|
|
91
90
|
parsed_args=parsed_args
|
|
92
91
|
)
|
|
93
92
|
dbt_compile_command = f"dbt compile {' '.join(dbt_compile_options)}"
|
dagster_dbt/cloud/ops.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import Optional
|
|
2
2
|
|
|
3
3
|
from dagster import Config, In, Nothing, Out, Output, op
|
|
4
|
+
from dagster._annotations import beta
|
|
5
|
+
from dagster._core.storage.tags import COMPUTE_KIND_TAG
|
|
4
6
|
from pydantic import Field
|
|
5
7
|
|
|
6
|
-
from
|
|
7
|
-
from .
|
|
8
|
-
from .
|
|
8
|
+
from dagster_dbt.cloud.resources import DEFAULT_POLL_INTERVAL
|
|
9
|
+
from dagster_dbt.cloud.types import DbtCloudOutput
|
|
10
|
+
from dagster_dbt.cloud.utils import generate_materializations
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
class DbtCloudRunOpConfig(Config):
|
|
@@ -36,7 +38,7 @@ class DbtCloudRunOpConfig(Config):
|
|
|
36
38
|
),
|
|
37
39
|
)
|
|
38
40
|
|
|
39
|
-
asset_key_prefix:
|
|
41
|
+
asset_key_prefix: list[str] = Field(
|
|
40
42
|
default=["dbt"],
|
|
41
43
|
description=(
|
|
42
44
|
"If provided and yield_materializations is True, these components will be used to "
|
|
@@ -49,8 +51,9 @@ class DbtCloudRunOpConfig(Config):
|
|
|
49
51
|
required_resource_keys={"dbt_cloud"},
|
|
50
52
|
ins={"start_after": In(Nothing)},
|
|
51
53
|
out=Out(DbtCloudOutput, description="Parsed output from running the dbt Cloud job."),
|
|
52
|
-
tags={
|
|
54
|
+
tags={COMPUTE_KIND_TAG: "dbt_cloud"},
|
|
53
55
|
)
|
|
56
|
+
@beta
|
|
54
57
|
def dbt_cloud_run_op(context, config: DbtCloudRunOpConfig):
|
|
55
58
|
"""Initiates a run for a dbt Cloud job, then polls until the run completes. If the job
|
|
56
59
|
fails or is otherwised stopped before succeeding, a `dagster.Failure` exception will be raised,
|
dagster_dbt/cloud/resources.py
CHANGED
|
@@ -2,8 +2,9 @@ import datetime
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
+
from collections.abc import Mapping, Sequence
|
|
5
6
|
from enum import Enum
|
|
6
|
-
from typing import Any,
|
|
7
|
+
from typing import Any, Optional, cast
|
|
7
8
|
from urllib.parse import urlencode, urljoin
|
|
8
9
|
|
|
9
10
|
import requests
|
|
@@ -17,12 +18,13 @@ from dagster import (
|
|
|
17
18
|
get_dagster_logger,
|
|
18
19
|
resource,
|
|
19
20
|
)
|
|
21
|
+
from dagster._annotations import beta
|
|
20
22
|
from dagster._core.definitions.resource_definition import dagster_maintained_resource
|
|
21
23
|
from dagster._utils.merger import deep_merge_dicts
|
|
22
24
|
from pydantic import Field
|
|
23
25
|
from requests.exceptions import RequestException
|
|
24
26
|
|
|
25
|
-
from .types import DbtCloudOutput
|
|
27
|
+
from dagster_dbt.cloud.types import DbtCloudOutput
|
|
26
28
|
|
|
27
29
|
DBT_DEFAULT_HOST = "https://cloud.getdbt.com/"
|
|
28
30
|
DBT_API_V2_PATH = "api/v2/accounts/"
|
|
@@ -352,7 +354,7 @@ class DbtCloudClient:
|
|
|
352
354
|
"""
|
|
353
355
|
query_params = f"?step={step}" if step else ""
|
|
354
356
|
return cast(
|
|
355
|
-
list,
|
|
357
|
+
"list",
|
|
356
358
|
self.make_request(
|
|
357
359
|
"GET",
|
|
358
360
|
f"{self._account_id}/runs/{run_id}/artifacts/{query_params}",
|
|
@@ -586,10 +588,12 @@ class DbtCloudClient:
|
|
|
586
588
|
)
|
|
587
589
|
|
|
588
590
|
|
|
591
|
+
@beta
|
|
589
592
|
class DbtCloudResource(DbtCloudClient):
|
|
590
593
|
pass
|
|
591
594
|
|
|
592
595
|
|
|
596
|
+
@beta
|
|
593
597
|
class DbtCloudClientResource(ConfigurableResource, IAttachDifferentObjectToOpContext):
|
|
594
598
|
"""This resource helps interact with dbt Cloud connectors."""
|
|
595
599
|
|
|
@@ -655,6 +659,7 @@ class DbtCloudClientResource(ConfigurableResource, IAttachDifferentObjectToOpCon
|
|
|
655
659
|
return self.get_dbt_client()
|
|
656
660
|
|
|
657
661
|
|
|
662
|
+
@beta
|
|
658
663
|
@dagster_maintained_resource
|
|
659
664
|
@resource(
|
|
660
665
|
config_schema=DbtCloudClientResource.to_config_schema(),
|
|
@@ -669,7 +674,7 @@ def dbt_cloud_resource(context) -> DbtCloudResource:
|
|
|
669
674
|
response JSON schemae, see the `dbt Cloud API Docs <https://docs.getdbt.com/dbt-cloud/api-v2>`_.
|
|
670
675
|
|
|
671
676
|
To configure this resource, we recommend using the `configured
|
|
672
|
-
<https://docs.dagster.io/concepts/configuration/configured>`_ method.
|
|
677
|
+
<https://legacy-docs.dagster.io/concepts/configuration/configured>`_ method.
|
|
673
678
|
|
|
674
679
|
**Examples:**
|
|
675
680
|
|