dagster-dbt 0.27.13__tar.gz → 0.28.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dagster_dbt-0.27.13/dagster_dbt.egg-info → dagster_dbt-0.28.8}/PKG-INFO +6 -5
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/asset_decorator.py +2 -2
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/asset_utils.py +52 -23
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud/asset_defs.py +3 -23
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud/utils.py +2 -2
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/asset_decorator.py +2 -1
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/cli_invocation.py +7 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/client.py +91 -18
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/run_handler.py +22 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/sensor_builder.py +1 -1
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/compat.py +1 -2
- dagster_dbt-0.28.8/dagster_dbt/components/dbt_project/component.py +545 -0
- dagster_dbt-0.28.8/dagster_dbt/components/dbt_project/scaffolder.py +65 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/core/dbt_cli_invocation.py +1 -2
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/core/dbt_event_iterator.py +17 -8
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/core/resource.py +8 -5
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/dagster_dbt_translator.py +48 -66
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/dbt_project.py +36 -0
- dagster_dbt-0.28.8/dagster_dbt/dbt_project_manager.py +173 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/freshness_builder.py +6 -2
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/pyproject.toml.jinja +1 -1
- dagster_dbt-0.28.8/dagster_dbt/version.py +1 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8/dagster_dbt.egg-info}/PKG-INFO +6 -5
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt.egg-info/SOURCES.txt +1 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt.egg-info/requires.txt +3 -2
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/setup.py +5 -4
- dagster_dbt-0.27.13/dagster_dbt/components/dbt_project/component.py +0 -339
- dagster_dbt-0.27.13/dagster_dbt/components/dbt_project/scaffolder.py +0 -50
- dagster_dbt-0.27.13/dagster_dbt/version.py +0 -1
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/LICENSE +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/MANIFEST.in +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/README.md +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/asset_specs.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cli/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cli/app.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud/cli.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud/ops.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud/resources.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud/types.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/resources.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/cloud_v2/types.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/components/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/components/dbt_project/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/core/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/core/dbt_cli_event.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/core/utils.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/dbt_core_version.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/dbt_manifest.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/dbt_manifest_asset_selection.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/dbt_version.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/errors.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/__init__.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/scaffold/__init__.py.jinja +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/scaffold/assets.py.jinja +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/scaffold/definitions.py.jinja +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/scaffold/project.py.jinja +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/scaffold/schedules.py.jinja +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/include/setup.py.jinja +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/metadata_set.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/py.typed +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt/utils.py +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt.egg-info/dependency_links.txt +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt.egg-info/entry_points.txt +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt.egg-info/not-zip-safe +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/dagster_dbt.egg-info/top_level.txt +0 -0
- {dagster_dbt-0.27.13 → dagster_dbt-0.28.8}/setup.cfg +0 -0
|
@@ -1,28 +1,29 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-dbt
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.28.8
|
|
4
4
|
Summary: A Dagster integration for dbt
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-dbt
|
|
6
6
|
Author: Dagster Labs
|
|
7
7
|
Author-email: hello@dagsterlabs.com
|
|
8
8
|
License: Apache-2.0
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
10
9
|
Classifier: Programming Language :: Python :: 3.10
|
|
11
10
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
14
14
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
|
-
Requires-Python: >=3.
|
|
16
|
+
Requires-Python: >=3.10,<3.15
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: dagster==1.
|
|
18
|
+
Requires-Dist: dagster==1.12.8
|
|
19
19
|
Requires-Dist: dbt-core<1.11,>=1.7
|
|
20
|
+
Requires-Dist: gitpython
|
|
20
21
|
Requires-Dist: Jinja2
|
|
21
22
|
Requires-Dist: networkx
|
|
22
23
|
Requires-Dist: orjson
|
|
23
24
|
Requires-Dist: requests
|
|
24
25
|
Requires-Dist: rich
|
|
25
|
-
Requires-Dist: sqlglot[rs]
|
|
26
|
+
Requires-Dist: sqlglot[rs]<28.1.0
|
|
26
27
|
Requires-Dist: typer>=0.9.0
|
|
27
28
|
Requires-Dist: packaging
|
|
28
29
|
Provides-Extra: test-bare
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import tempfile
|
|
3
5
|
import textwrap
|
|
4
6
|
from collections import defaultdict
|
|
5
7
|
from collections.abc import Iterable, Mapping, Sequence
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
from typing import TYPE_CHECKING, AbstractSet, Annotated, Any, Final, Optional, Union # noqa: UP035
|
|
8
10
|
|
|
11
|
+
import yaml
|
|
9
12
|
from dagster import (
|
|
10
13
|
AssetCheckKey,
|
|
11
14
|
AssetCheckSpec,
|
|
@@ -18,7 +21,6 @@ from dagster import (
|
|
|
18
21
|
DagsterInvalidDefinitionError,
|
|
19
22
|
DagsterInvariantViolationError,
|
|
20
23
|
DefaultScheduleStatus,
|
|
21
|
-
LegacyFreshnessPolicy,
|
|
22
24
|
OpExecutionContext,
|
|
23
25
|
RunConfig,
|
|
24
26
|
ScheduleDefinition,
|
|
@@ -33,6 +35,7 @@ from dagster._core.definitions.metadata import TableMetadataSet
|
|
|
33
35
|
from dagster._core.errors import DagsterInvalidPropertyError
|
|
34
36
|
from dagster._core.types.dagster_type import Nothing
|
|
35
37
|
from dagster._record import ImportFrom, record
|
|
38
|
+
from dagster_shared.record import replace
|
|
36
39
|
|
|
37
40
|
from dagster_dbt.dbt_project import DbtProject
|
|
38
41
|
from dagster_dbt.metadata_set import DbtMetadataSet
|
|
@@ -56,6 +59,10 @@ DBT_DEFAULT_SELECTOR = ""
|
|
|
56
59
|
DBT_INDIRECT_SELECTION_ENV: Final[str] = "DBT_INDIRECT_SELECTION"
|
|
57
60
|
DBT_EMPTY_INDIRECT_SELECTION: Final[str] = "empty"
|
|
58
61
|
|
|
62
|
+
# Threshold for switching to selector file to avoid CLI argument length limits
|
|
63
|
+
# https://github.com/dagster-io/dagster/issues/16997
|
|
64
|
+
_SELECTION_ARGS_THRESHOLD: Final[int] = 200
|
|
65
|
+
|
|
59
66
|
DUPLICATE_ASSET_KEY_ERROR_MESSAGE = (
|
|
60
67
|
"The following dbt resources are configured with identical Dagster asset keys."
|
|
61
68
|
" Please ensure that each dbt resource generates a unique Dagster asset key."
|
|
@@ -168,7 +175,9 @@ def get_asset_keys_by_output_name_for_source(
|
|
|
168
175
|
raise KeyError(f"Could not find a dbt source with name: {source_name}")
|
|
169
176
|
|
|
170
177
|
return {
|
|
171
|
-
dagster_name_fn(value): dagster_dbt_translator.get_asset_spec(
|
|
178
|
+
dagster_name_fn(value): dagster_dbt_translator.get_asset_spec(
|
|
179
|
+
manifest, unique_id, dbt_project
|
|
180
|
+
).key
|
|
172
181
|
for unique_id, value in matching.items()
|
|
173
182
|
}
|
|
174
183
|
|
|
@@ -442,6 +451,10 @@ def get_updated_cli_invocation_params_for_context(
|
|
|
442
451
|
[assets_def]
|
|
443
452
|
)
|
|
444
453
|
|
|
454
|
+
# Get project_dir from dbt_project if available
|
|
455
|
+
project_dir = Path(dbt_project.project_dir) if dbt_project else None
|
|
456
|
+
target_project = dbt_project
|
|
457
|
+
|
|
445
458
|
selection_args, indirect_selection_override = get_subset_selection_for_context(
|
|
446
459
|
context=context,
|
|
447
460
|
manifest=manifest,
|
|
@@ -451,17 +464,49 @@ def get_updated_cli_invocation_params_for_context(
|
|
|
451
464
|
dagster_dbt_translator=dagster_dbt_translator,
|
|
452
465
|
current_dbt_indirect_selection_env=indirect_selection,
|
|
453
466
|
)
|
|
467
|
+
if (
|
|
468
|
+
selection_args[0] == "--select"
|
|
469
|
+
and project_dir
|
|
470
|
+
and len(resources := selection_args[1].split(" ")) > _SELECTION_ARGS_THRESHOLD
|
|
471
|
+
):
|
|
472
|
+
temp_project_dir = tempfile.mkdtemp()
|
|
473
|
+
shutil.copytree(project_dir, temp_project_dir, dirs_exist_ok=True)
|
|
474
|
+
selectors_path = Path(temp_project_dir) / "selectors.yml"
|
|
475
|
+
|
|
476
|
+
# Delete any existing selectors, we need to create our own
|
|
477
|
+
if selectors_path.exists():
|
|
478
|
+
selectors_path.unlink()
|
|
479
|
+
|
|
480
|
+
selector_name = f"dagster_run_{context.run_id}"
|
|
481
|
+
temp_selectors = {
|
|
482
|
+
"selectors": [
|
|
483
|
+
{
|
|
484
|
+
"name": selector_name,
|
|
485
|
+
"definition": {"union": list(resources)},
|
|
486
|
+
}
|
|
487
|
+
]
|
|
488
|
+
}
|
|
489
|
+
selectors_path.write_text(yaml.safe_dump(temp_selectors))
|
|
490
|
+
logger.info(
|
|
491
|
+
f"DBT selection of {len(resources)} resources exceeds threshold of {_SELECTION_ARGS_THRESHOLD}. "
|
|
492
|
+
"This may exceed system argument length limits. "
|
|
493
|
+
f"Executing materialization against temporary copy of DBT project at {temp_project_dir} with ephemeral selector."
|
|
494
|
+
)
|
|
495
|
+
selection_args = ["--selector", selector_name]
|
|
496
|
+
target_project = replace(dbt_project, project_dir=Path(temp_project_dir))
|
|
454
497
|
|
|
455
498
|
indirect_selection = (
|
|
456
499
|
indirect_selection_override if indirect_selection_override else indirect_selection
|
|
457
500
|
)
|
|
501
|
+
else:
|
|
502
|
+
target_project = dbt_project
|
|
458
503
|
|
|
459
504
|
return DbtCliInvocationPartialParams(
|
|
460
505
|
manifest=manifest,
|
|
461
506
|
dagster_dbt_translator=dagster_dbt_translator,
|
|
462
507
|
selection_args=selection_args,
|
|
463
508
|
indirect_selection=indirect_selection,
|
|
464
|
-
dbt_project=
|
|
509
|
+
dbt_project=target_project,
|
|
465
510
|
)
|
|
466
511
|
|
|
467
512
|
|
|
@@ -527,6 +572,9 @@ def default_metadata_from_dbt_resource_props(
|
|
|
527
572
|
dbt_resource_props.get("database"),
|
|
528
573
|
dbt_resource_props.get("schema"),
|
|
529
574
|
dbt_resource_props.get("alias"),
|
|
575
|
+
dbt_resource_props.get("name")
|
|
576
|
+
if dbt_resource_props.get("resource_type") == "source"
|
|
577
|
+
else None,
|
|
530
578
|
]
|
|
531
579
|
if relation_part
|
|
532
580
|
]
|
|
@@ -604,25 +652,6 @@ def default_owners_from_dbt_resource_props(
|
|
|
604
652
|
return [owner] if isinstance(owner, str) else owner
|
|
605
653
|
|
|
606
654
|
|
|
607
|
-
def default_freshness_policy_fn(
|
|
608
|
-
dbt_resource_props: Mapping[str, Any],
|
|
609
|
-
) -> Optional[LegacyFreshnessPolicy]:
|
|
610
|
-
dagster_metadata = dbt_resource_props.get("meta", {}).get("dagster", {})
|
|
611
|
-
freshness_policy_config = dagster_metadata.get("freshness_policy", {})
|
|
612
|
-
|
|
613
|
-
freshness_policy = (
|
|
614
|
-
LegacyFreshnessPolicy(
|
|
615
|
-
maximum_lag_minutes=float(freshness_policy_config["maximum_lag_minutes"]),
|
|
616
|
-
cron_schedule=freshness_policy_config.get("cron_schedule"),
|
|
617
|
-
cron_schedule_timezone=freshness_policy_config.get("cron_schedule_timezone"),
|
|
618
|
-
)
|
|
619
|
-
if freshness_policy_config
|
|
620
|
-
else None
|
|
621
|
-
)
|
|
622
|
-
|
|
623
|
-
return freshness_policy
|
|
624
|
-
|
|
625
|
-
|
|
626
655
|
def default_auto_materialize_policy_fn(
|
|
627
656
|
dbt_resource_props: Mapping[str, Any],
|
|
628
657
|
) -> Optional[AutoMaterializePolicy]:
|
|
@@ -813,7 +842,7 @@ def build_dbt_specs(
|
|
|
813
842
|
|
|
814
843
|
# add check specs associated with the asset
|
|
815
844
|
for child_unique_id in child_map.get(unique_id, []):
|
|
816
|
-
if not child_unique_id.startswith("test"):
|
|
845
|
+
if child_unique_id not in selected_unique_ids or not child_unique_id.startswith("test"):
|
|
817
846
|
continue
|
|
818
847
|
check_spec = translator.get_asset_check_spec(
|
|
819
848
|
asset_spec=spec,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import shlex
|
|
3
3
|
from argparse import ArgumentParser, Namespace
|
|
4
|
-
from collections.abc import Mapping, Sequence
|
|
4
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
5
5
|
from contextlib import suppress
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, Optional, Union, cast
|
|
7
7
|
|
|
8
8
|
import dagster._check as check
|
|
9
9
|
from dagster import (
|
|
@@ -33,7 +33,6 @@ from dagster_dbt.asset_utils import (
|
|
|
33
33
|
default_asset_key_fn,
|
|
34
34
|
default_auto_materialize_policy_fn,
|
|
35
35
|
default_description_fn,
|
|
36
|
-
default_freshness_policy_fn,
|
|
37
36
|
default_group_from_dbt_resource_props,
|
|
38
37
|
get_node,
|
|
39
38
|
)
|
|
@@ -326,10 +325,6 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
326
325
|
def get_group_name(cls, dbt_resource_props): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
327
326
|
return self._node_info_to_group_fn(dbt_resource_props)
|
|
328
327
|
|
|
329
|
-
@classmethod
|
|
330
|
-
def get_freshness_policy(cls, dbt_resource_props): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
331
|
-
return self._node_info_to_freshness_policy_fn(dbt_resource_props)
|
|
332
|
-
|
|
333
328
|
@classmethod
|
|
334
329
|
def get_auto_materialize_policy(cls, dbt_resource_props): # pyright: ignore[reportIncompatibleMethodOverride]
|
|
335
330
|
return self._node_info_to_auto_materialize_policy_fn(dbt_resource_props)
|
|
@@ -375,11 +370,6 @@ class DbtCloudCacheableAssetsDefinition(CacheableAssetsDefinition):
|
|
|
375
370
|
for spec in specs
|
|
376
371
|
},
|
|
377
372
|
},
|
|
378
|
-
legacy_freshness_policies_by_output_name={
|
|
379
|
-
spec.key.to_python_identifier(): spec.legacy_freshness_policy
|
|
380
|
-
for spec in specs
|
|
381
|
-
if spec.legacy_freshness_policy
|
|
382
|
-
},
|
|
383
373
|
auto_materialize_policies_by_output_name={
|
|
384
374
|
spec.key.to_python_identifier(): spec.auto_materialize_policy
|
|
385
375
|
for spec in specs
|
|
@@ -546,9 +536,6 @@ def load_assets_from_dbt_cloud_job(
|
|
|
546
536
|
node_info_to_group_fn: Callable[
|
|
547
537
|
[Mapping[str, Any]], Optional[str]
|
|
548
538
|
] = default_group_from_dbt_resource_props,
|
|
549
|
-
node_info_to_freshness_policy_fn: Callable[
|
|
550
|
-
[Mapping[str, Any]], Optional[LegacyFreshnessPolicy]
|
|
551
|
-
] = default_freshness_policy_fn,
|
|
552
539
|
node_info_to_auto_materialize_policy_fn: Callable[
|
|
553
540
|
[Mapping[str, Any]], Optional[AutoMaterializePolicy]
|
|
554
541
|
] = default_auto_materialize_policy_fn,
|
|
@@ -570,13 +557,6 @@ def load_assets_from_dbt_cloud_job(
|
|
|
570
557
|
dbt source -> AssetKey([source_name, table_name])
|
|
571
558
|
node_info_to_group_fn (Dict[str, Any] -> Optional[str]): A function that takes a
|
|
572
559
|
dictionary of dbt node info and returns the group that this node should be assigned to.
|
|
573
|
-
node_info_to_freshness_policy_fn (Dict[str, Any] -> Optional[FreshnessPolicy]): A function
|
|
574
|
-
that takes a dictionary of dbt node info and optionally returns a FreshnessPolicy that
|
|
575
|
-
should be applied to this node. By default, freshness policies will be created from
|
|
576
|
-
config applied to dbt models, i.e.:
|
|
577
|
-
`dagster_freshness_policy={"maximum_lag_minutes": 60, "cron_schedule": "0 9 * * *"}`
|
|
578
|
-
will result in that model being assigned
|
|
579
|
-
`FreshnessPolicy(maximum_lag_minutes=60, cron_schedule="0 9 * * *")`
|
|
580
560
|
node_info_to_auto_materialize_policy_fn (Dict[str, Any] -> Optional[AutoMaterializePolicy]):
|
|
581
561
|
A function that takes a dictionary of dbt node info and optionally returns a AutoMaterializePolicy
|
|
582
562
|
that should be applied to this node. By default, AutoMaterializePolicies will be created from
|
|
@@ -631,7 +611,7 @@ def load_assets_from_dbt_cloud_job(
|
|
|
631
611
|
job_id=job_id,
|
|
632
612
|
node_info_to_asset_key=node_info_to_asset_key,
|
|
633
613
|
node_info_to_group_fn=node_info_to_group_fn,
|
|
634
|
-
node_info_to_freshness_policy_fn=
|
|
614
|
+
node_info_to_freshness_policy_fn=lambda _: None,
|
|
635
615
|
node_info_to_auto_materialize_policy_fn=node_info_to_auto_materialize_policy_fn,
|
|
636
616
|
partitions_def=partitions_def,
|
|
637
617
|
partition_key_to_vars_fn=partition_key_to_vars_fn,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import sys
|
|
1
2
|
from collections.abc import Iterator, Mapping, Sequence
|
|
2
3
|
from typing import Any, Optional, Union
|
|
3
4
|
|
|
@@ -54,6 +55,12 @@ class DbtCloudCliInvocation:
|
|
|
54
55
|
self, timeout: Optional[float] = None
|
|
55
56
|
) -> Iterator[Union[AssetCheckEvaluation, AssetCheckResult, AssetMaterialization, Output]]:
|
|
56
57
|
run = self.run_handler.wait(timeout=timeout)
|
|
58
|
+
|
|
59
|
+
# Write dbt Cloud run logs to stdout
|
|
60
|
+
logs = self.run_handler.get_run_logs()
|
|
61
|
+
if logs:
|
|
62
|
+
sys.stdout.write(logs)
|
|
63
|
+
|
|
57
64
|
if "run_results.json" in self.run_handler.list_run_artifacts():
|
|
58
65
|
run_results = DbtCloudJobRunResults.from_run_results_json(
|
|
59
66
|
run_results_json=self.run_handler.get_run_results()
|
|
@@ -92,7 +92,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
92
92
|
data: Optional[Mapping[str, Any]] = None,
|
|
93
93
|
params: Optional[Mapping[str, Any]] = None,
|
|
94
94
|
session_attr: str = "_get_session",
|
|
95
|
-
) ->
|
|
95
|
+
) -> requests.Response:
|
|
96
96
|
url = f"{base_url}/{endpoint}" if endpoint else base_url
|
|
97
97
|
|
|
98
98
|
num_retries = 0
|
|
@@ -107,7 +107,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
107
107
|
timeout=self.request_timeout,
|
|
108
108
|
)
|
|
109
109
|
response.raise_for_status()
|
|
110
|
-
return response
|
|
110
|
+
return response
|
|
111
111
|
except RequestException as e:
|
|
112
112
|
self._log.error(
|
|
113
113
|
f"Request to dbt Cloud API failed for url {url} with method {method} : {e}"
|
|
@@ -143,7 +143,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
143
143
|
"""
|
|
144
144
|
if not description:
|
|
145
145
|
description = "A job that runs dbt models, sources, and tests."
|
|
146
|
-
|
|
146
|
+
response = self._make_request(
|
|
147
147
|
method="post",
|
|
148
148
|
endpoint="jobs",
|
|
149
149
|
base_url=self.api_v2_url,
|
|
@@ -155,7 +155,8 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
155
155
|
"description": description,
|
|
156
156
|
"job_type": "other",
|
|
157
157
|
},
|
|
158
|
-
)
|
|
158
|
+
)
|
|
159
|
+
return response.json()["data"]
|
|
159
160
|
|
|
160
161
|
def list_jobs(
|
|
161
162
|
self,
|
|
@@ -185,7 +186,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
185
186
|
"limit": DAGSTER_DBT_CLOUD_LIST_JOBS_INDIVIDUAL_REQUEST_LIMIT,
|
|
186
187
|
"offset": len(results),
|
|
187
188
|
},
|
|
188
|
-
)["data"]:
|
|
189
|
+
).json()["data"]:
|
|
189
190
|
results.extend(jobs)
|
|
190
191
|
if len(jobs) < DAGSTER_DBT_CLOUD_LIST_JOBS_INDIVIDUAL_REQUEST_LIMIT:
|
|
191
192
|
break
|
|
@@ -201,7 +202,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
201
202
|
method="get",
|
|
202
203
|
endpoint=f"jobs/{job_id}",
|
|
203
204
|
base_url=self.api_v2_url,
|
|
204
|
-
)["data"]
|
|
205
|
+
).json()["data"]
|
|
205
206
|
|
|
206
207
|
def destroy_job(self, job_id: int) -> Mapping[str, Any]:
|
|
207
208
|
"""Destroys a given dbt Cloud job.
|
|
@@ -213,7 +214,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
213
214
|
method="delete",
|
|
214
215
|
endpoint=f"jobs/{job_id}",
|
|
215
216
|
base_url=self.api_v2_url,
|
|
216
|
-
)["data"]
|
|
217
|
+
).json()["data"]
|
|
217
218
|
|
|
218
219
|
def trigger_job_run(
|
|
219
220
|
self, job_id: int, steps_override: Optional[Sequence[str]] = None
|
|
@@ -237,7 +238,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
237
238
|
data={"steps_override": steps_override, "cause": DAGSTER_ADHOC_TRIGGER_CAUSE}
|
|
238
239
|
if steps_override
|
|
239
240
|
else {"cause": DAGSTER_ADHOC_TRIGGER_CAUSE},
|
|
240
|
-
)["data"]
|
|
241
|
+
).json()["data"]
|
|
241
242
|
|
|
242
243
|
def get_runs_batch(
|
|
243
244
|
self,
|
|
@@ -278,26 +279,35 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
278
279
|
"finished_at__range": f"""["{finished_at_lower_bound.isoformat()}", "{finished_at_upper_bound.isoformat()}"]""",
|
|
279
280
|
"order_by": "finished_at",
|
|
280
281
|
},
|
|
281
|
-
)
|
|
282
|
+
).json()
|
|
282
283
|
data = cast("Sequence[Mapping[str, Any]]", resp["data"])
|
|
283
284
|
total_count = resp["extra"]["pagination"]["total_count"]
|
|
284
285
|
return data, total_count
|
|
285
286
|
|
|
286
|
-
def get_run_details(
|
|
287
|
+
def get_run_details(
|
|
288
|
+
self, run_id: int, include_related: Optional[Sequence[str]] = None
|
|
289
|
+
) -> Mapping[str, Any]:
|
|
287
290
|
"""Retrieves the details of a given dbt Cloud Run.
|
|
288
291
|
|
|
289
292
|
Args:
|
|
290
|
-
run_id (
|
|
293
|
+
run_id (int): The dbt Cloud Run ID. You can retrieve this value from the
|
|
291
294
|
URL of the given run in the dbt Cloud UI.
|
|
295
|
+
include_related (Optional[Sequence[str]]): List of related fields to pull with the run.
|
|
296
|
+
Valid values are "trigger", "job", "debug_logs", and "run_steps".
|
|
292
297
|
|
|
293
298
|
Returns:
|
|
294
299
|
Dict[str, Any]: Parsed json data representing the API response.
|
|
295
300
|
"""
|
|
301
|
+
params = {}
|
|
302
|
+
if include_related:
|
|
303
|
+
params["include_related"] = ",".join(include_related)
|
|
304
|
+
|
|
296
305
|
return self._make_request(
|
|
297
306
|
method="get",
|
|
298
307
|
endpoint=f"runs/{run_id}",
|
|
299
308
|
base_url=self.api_v2_url,
|
|
300
|
-
|
|
309
|
+
params=params,
|
|
310
|
+
).json()["data"]
|
|
301
311
|
|
|
302
312
|
def poll_run(
|
|
303
313
|
self,
|
|
@@ -352,21 +362,25 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
352
362
|
endpoint=f"runs/{run_id}/artifacts",
|
|
353
363
|
base_url=self.api_v2_url,
|
|
354
364
|
session_attr="_get_artifact_session",
|
|
355
|
-
)["data"],
|
|
365
|
+
).json()["data"],
|
|
356
366
|
)
|
|
357
367
|
|
|
358
368
|
def get_run_artifact(self, run_id: int, path: str) -> Mapping[str, Any]:
|
|
359
369
|
"""Retrieves an artifact at the given path for a given dbt Cloud Run.
|
|
360
370
|
|
|
371
|
+
Args:
|
|
372
|
+
run_id (int): The dbt Cloud Run ID.
|
|
373
|
+
path (str): The path to the artifact (e.g., "run_results.json", "manifest.json").
|
|
374
|
+
|
|
361
375
|
Returns:
|
|
362
|
-
Dict[str, Any]: Parsed json data representing the
|
|
376
|
+
Dict[str, Any]: Parsed json data representing the artifact.
|
|
363
377
|
"""
|
|
364
378
|
return self._make_request(
|
|
365
379
|
method="get",
|
|
366
380
|
endpoint=f"runs/{run_id}/artifacts/{path}",
|
|
367
381
|
base_url=self.api_v2_url,
|
|
368
382
|
session_attr="_get_artifact_session",
|
|
369
|
-
)
|
|
383
|
+
).json()
|
|
370
384
|
|
|
371
385
|
def get_run_results_json(self, run_id: int) -> Mapping[str, Any]:
|
|
372
386
|
"""Retrieves the run_results.json artifact of a given dbt Cloud Run.
|
|
@@ -384,6 +398,65 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
384
398
|
"""
|
|
385
399
|
return self.get_run_artifact(run_id=run_id, path="manifest.json")
|
|
386
400
|
|
|
401
|
+
def get_run_logs(self, run_id: int, max_retries: int = 3, retry_delay: float = 2.0) -> str:
|
|
402
|
+
"""Retrieves the stdout/stderr logs from a given dbt Cloud Run.
|
|
403
|
+
|
|
404
|
+
This method fetches logs from the run_steps field by calling get_run_details
|
|
405
|
+
with include_related=["run_steps"]. Each step contains a logs field with
|
|
406
|
+
the stdout/stderr output for that step.
|
|
407
|
+
|
|
408
|
+
Note: There can be a slight delay between when a run completes and when the logs
|
|
409
|
+
are fully populated in the API. This method will retry a few times if it detects
|
|
410
|
+
completed steps with empty logs.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
run_id (int): The dbt Cloud Run ID.
|
|
414
|
+
max_retries (int): Maximum number of times to retry fetching logs if empty. Defaults to 3.
|
|
415
|
+
retry_delay (float): Time in seconds to wait between retries. Defaults to 2.0.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
str: The concatenated log text content from all run steps.
|
|
419
|
+
"""
|
|
420
|
+
for attempt in range(max_retries):
|
|
421
|
+
run_details = self.get_run_details(run_id=run_id, include_related=["run_steps"])
|
|
422
|
+
|
|
423
|
+
logs_parts = []
|
|
424
|
+
run_steps = run_details.get("run_steps", [])
|
|
425
|
+
completed_steps_with_empty_logs = 0
|
|
426
|
+
|
|
427
|
+
for step in run_steps:
|
|
428
|
+
step_name = step.get("name", "Unknown Step")
|
|
429
|
+
step_logs = step.get("logs", "")
|
|
430
|
+
step_status = step.get("status_humanized", "unknown")
|
|
431
|
+
|
|
432
|
+
# Track completed steps with empty logs
|
|
433
|
+
if step_status == "Success" and not step_logs:
|
|
434
|
+
completed_steps_with_empty_logs += 1
|
|
435
|
+
|
|
436
|
+
if step_logs:
|
|
437
|
+
logs_parts.append(f"=== Step: {step_name} ===")
|
|
438
|
+
logs_parts.append(step_logs)
|
|
439
|
+
logs_parts.append("") # Empty line between steps
|
|
440
|
+
|
|
441
|
+
# If we have completed steps with empty logs and retries left, wait and try again
|
|
442
|
+
if completed_steps_with_empty_logs > 0 and attempt < max_retries - 1:
|
|
443
|
+
self._log.warning(
|
|
444
|
+
f"Found {completed_steps_with_empty_logs} completed steps with empty logs for run {run_id}. "
|
|
445
|
+
f"Retrying in {retry_delay} seconds..."
|
|
446
|
+
)
|
|
447
|
+
time.sleep(retry_delay)
|
|
448
|
+
continue
|
|
449
|
+
|
|
450
|
+
# Either we got all logs or we're out of retries
|
|
451
|
+
if completed_steps_with_empty_logs > 0:
|
|
452
|
+
self._log.warning(
|
|
453
|
+
f"Still missing logs for {completed_steps_with_empty_logs} completed steps after {max_retries} attempts"
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
return "\n".join(logs_parts) if logs_parts else ""
|
|
457
|
+
|
|
458
|
+
return ""
|
|
459
|
+
|
|
387
460
|
def get_project_details(self, project_id: int) -> Mapping[str, Any]:
|
|
388
461
|
"""Retrieves the details of a given dbt Cloud Project.
|
|
389
462
|
|
|
@@ -398,7 +471,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
398
471
|
method="get",
|
|
399
472
|
endpoint=f"projects/{project_id}",
|
|
400
473
|
base_url=self.api_v2_url,
|
|
401
|
-
)["data"]
|
|
474
|
+
).json()["data"]
|
|
402
475
|
|
|
403
476
|
def get_environment_details(self, environment_id: int) -> Mapping[str, Any]:
|
|
404
477
|
"""Retrieves the details of a given dbt Cloud Environment.
|
|
@@ -414,7 +487,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
414
487
|
method="get",
|
|
415
488
|
endpoint=f"environments/{environment_id}",
|
|
416
489
|
base_url=self.api_v2_url,
|
|
417
|
-
)["data"]
|
|
490
|
+
).json()["data"]
|
|
418
491
|
|
|
419
492
|
def get_account_details(self) -> Mapping[str, Any]:
|
|
420
493
|
"""Retrieves the details of the account associated to the dbt Cloud workspace.
|
|
@@ -426,7 +499,7 @@ class DbtCloudWorkspaceClient(DagsterModel):
|
|
|
426
499
|
method="get",
|
|
427
500
|
endpoint=None,
|
|
428
501
|
base_url=self.api_v2_url,
|
|
429
|
-
)["data"]
|
|
502
|
+
).json()["data"]
|
|
430
503
|
|
|
431
504
|
def verify_connection(self) -> None:
|
|
432
505
|
"""Verifies the connection to the dbt Cloud REST API."""
|
|
@@ -12,7 +12,9 @@ from dagster import (
|
|
|
12
12
|
get_dagster_logger,
|
|
13
13
|
)
|
|
14
14
|
from dagster._record import record
|
|
15
|
+
from dagster._time import get_current_timestamp
|
|
15
16
|
from dateutil import parser
|
|
17
|
+
from requests.exceptions import RequestException
|
|
16
18
|
|
|
17
19
|
from dagster_dbt.asset_utils import build_dbt_specs, get_asset_check_key_for_test
|
|
18
20
|
from dagster_dbt.cloud_v2.client import DbtCloudWorkspaceClient
|
|
@@ -61,8 +63,28 @@ class DbtCloudJobRunHandler:
|
|
|
61
63
|
def list_run_artifacts(self) -> Sequence[str]:
|
|
62
64
|
return self.client.list_run_artifacts(run_id=self.run_id)
|
|
63
65
|
|
|
66
|
+
def get_run_logs(self) -> Optional[str]:
|
|
67
|
+
"""Retrieves the stdout/stderr logs from the completed dbt Cloud run.
|
|
68
|
+
|
|
69
|
+
This method fetches logs from the run_steps by calling get_run_details
|
|
70
|
+
with include_related=["run_steps"].
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Optional[str]: The concatenated log text content from all run steps,
|
|
74
|
+
or None if logs are not available.
|
|
75
|
+
"""
|
|
76
|
+
try:
|
|
77
|
+
return self.client.get_run_logs(run_id=self.run_id)
|
|
78
|
+
except RequestException as e:
|
|
79
|
+
logger.warning(f"Failed to retrieve logs for run {self.run_id}: {e}")
|
|
80
|
+
return None
|
|
81
|
+
|
|
64
82
|
|
|
65
83
|
def get_completed_at_timestamp(result: Mapping[str, Any]) -> float:
|
|
84
|
+
timing = result["timing"]
|
|
85
|
+
if len(timing) == 0:
|
|
86
|
+
# as a fallback, use the current timestamp
|
|
87
|
+
return get_current_timestamp()
|
|
66
88
|
# result["timing"] is a list of events in run_results.json
|
|
67
89
|
# For successful models and passing tests,
|
|
68
90
|
# the last item of that list includes the timing details of the execution.
|
|
@@ -142,7 +142,7 @@ def sorted_asset_events(
|
|
|
142
142
|
return [
|
|
143
143
|
sorted_event[1]
|
|
144
144
|
for sorted_event in sorted(
|
|
145
|
-
materializations_and_timestamps, key=lambda x: (
|
|
145
|
+
materializations_and_timestamps, key=lambda x: (topo_aks.index(x[1].asset_key), x[0])
|
|
146
146
|
)
|
|
147
147
|
]
|
|
148
148
|
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
+
from typing import TYPE_CHECKING, Any, TypeAlias
|
|
4
4
|
|
|
5
5
|
from packaging import version
|
|
6
|
-
from typing_extensions import TypeAlias
|
|
7
6
|
|
|
8
7
|
# it's unclear exactly which dbt import adds a handler to the root logger, but something certainly does!
|
|
9
8
|
# on this line, we keep track of the set of handlers that are on the root logger BEFORE any dbt imports
|