dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/task/runnable.py
ADDED
|
@@ -0,0 +1,802 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from concurrent.futures import as_completed
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import AbstractSet, Dict, Iterable, List, Optional, Set, Tuple, Type, Union
|
|
8
|
+
|
|
9
|
+
import dvt.exceptions
|
|
10
|
+
import dvt.tracking
|
|
11
|
+
import dvt.utils
|
|
12
|
+
from dvt.artifacts.schemas.results import (
|
|
13
|
+
BaseResult,
|
|
14
|
+
NodeStatus,
|
|
15
|
+
RunningStatus,
|
|
16
|
+
RunStatus,
|
|
17
|
+
)
|
|
18
|
+
from dvt.artifacts.schemas.run import RunExecutionResult, RunResult
|
|
19
|
+
from dvt.cli.flags import Flags
|
|
20
|
+
from dvt.config.runtime import RuntimeConfig
|
|
21
|
+
from dvt.constants import RUN_RESULTS_FILE_NAME
|
|
22
|
+
from dvt.contracts.graph.manifest import Manifest
|
|
23
|
+
from dvt.contracts.graph.nodes import Exposure, ResultNode
|
|
24
|
+
from dvt.contracts.state import PreviousState
|
|
25
|
+
from dvt.events.types import (
|
|
26
|
+
ArtifactWritten,
|
|
27
|
+
ConcurrencyLine,
|
|
28
|
+
DefaultSelector,
|
|
29
|
+
EndRunResult,
|
|
30
|
+
GenericExceptionOnRun,
|
|
31
|
+
LogCancelLine,
|
|
32
|
+
MarkSkippedChildren,
|
|
33
|
+
NodeFinished,
|
|
34
|
+
NodeStart,
|
|
35
|
+
NothingToDo,
|
|
36
|
+
QueryCancelationUnsupported,
|
|
37
|
+
SkippingDetails,
|
|
38
|
+
)
|
|
39
|
+
from dvt.exceptions import DbtInternalError, DbtRuntimeError, FailFastError
|
|
40
|
+
from dvt.flags import get_flags
|
|
41
|
+
from dvt.graph import (
|
|
42
|
+
GraphQueue,
|
|
43
|
+
NodeSelector,
|
|
44
|
+
SelectionSpec,
|
|
45
|
+
UniqueId,
|
|
46
|
+
parse_difference,
|
|
47
|
+
)
|
|
48
|
+
from dvt.graph.thread_pool import DbtThreadPool
|
|
49
|
+
from dvt.parser.manifest import write_manifest
|
|
50
|
+
from dvt.task import group_lookup
|
|
51
|
+
from dvt.task.base import BaseRunner, ConfiguredTask
|
|
52
|
+
from dvt.task.printer import print_run_end_messages, print_run_result_error
|
|
53
|
+
from dvt.utils.artifact_upload import add_artifact_produced
|
|
54
|
+
|
|
55
|
+
import dbt_common.utils.formatting
|
|
56
|
+
from dbt.adapters.base import BaseAdapter, BaseRelation
|
|
57
|
+
from dbt.adapters.factory import get_adapter
|
|
58
|
+
from dbt_common.context import _INVOCATION_CONTEXT_VAR, get_invocation_context
|
|
59
|
+
from dbt_common.dataclass_schema import StrEnum
|
|
60
|
+
from dbt_common.events.contextvars import log_contextvars, task_contextvars
|
|
61
|
+
from dbt_common.events.functions import fire_event, warn_or_error
|
|
62
|
+
from dbt_common.events.types import Formatting
|
|
63
|
+
from dbt_common.exceptions import NotImplementedError
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class GraphRunnableMode(StrEnum):
|
|
67
|
+
Topological = "topological"
|
|
68
|
+
Independent = "independent"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def mark_node_as_skipped(
|
|
72
|
+
node: ResultNode, executed_node_ids: Set[str], message: Optional[str]
|
|
73
|
+
) -> Optional[RunResult]:
|
|
74
|
+
if node.unique_id not in executed_node_ids:
|
|
75
|
+
return RunResult.from_node(node, RunStatus.Skipped, message)
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class GraphRunnableTask(ConfiguredTask):
|
|
80
|
+
MARK_DEPENDENT_ERRORS_STATUSES = [NodeStatus.Error, NodeStatus.PartialSuccess]
|
|
81
|
+
|
|
82
|
+
def __init__(self, args: Flags, config: RuntimeConfig, manifest: Manifest) -> None:
|
|
83
|
+
super().__init__(args, config, manifest)
|
|
84
|
+
self.config = config
|
|
85
|
+
self._flattened_nodes: Optional[List[ResultNode]] = None
|
|
86
|
+
self._raise_next_tick: Optional[DbtRuntimeError] = None
|
|
87
|
+
self._skipped_children: Dict[str, Optional[RunResult]] = {}
|
|
88
|
+
self.job_queue: Optional[GraphQueue] = None
|
|
89
|
+
self.node_results: List[BaseResult] = []
|
|
90
|
+
self.num_nodes: int = 0
|
|
91
|
+
self.previous_state: Optional[PreviousState] = None
|
|
92
|
+
self.previous_defer_state: Optional[PreviousState] = None
|
|
93
|
+
self.run_count: int = 0
|
|
94
|
+
self.started_at: float = 0
|
|
95
|
+
|
|
96
|
+
# DVT: Initialize execution router and multi-adapter manager
|
|
97
|
+
self._init_dvt_execution_layer()
|
|
98
|
+
|
|
99
|
+
if self.args.state:
|
|
100
|
+
self.previous_state = PreviousState(
|
|
101
|
+
state_path=self.args.state,
|
|
102
|
+
target_path=Path(self.config.target_path),
|
|
103
|
+
project_root=Path(self.config.project_root),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if self.args.defer_state:
|
|
107
|
+
self.previous_defer_state = PreviousState(
|
|
108
|
+
state_path=self.args.defer_state,
|
|
109
|
+
target_path=Path(self.config.target_path),
|
|
110
|
+
project_root=Path(self.config.project_root),
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def _init_dvt_execution_layer(self) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Initialize DVT execution layer components.
|
|
116
|
+
|
|
117
|
+
This sets up:
|
|
118
|
+
- UnifiedProfileConfig: Load all connection profiles
|
|
119
|
+
- MultiAdapterManager: Manage multiple adapter instances
|
|
120
|
+
- ComputeConfig: Load compute layer configuration
|
|
121
|
+
- ExecutionRouter: Route queries to optimal execution engine
|
|
122
|
+
|
|
123
|
+
These components enable DVT's data virtualization capabilities.
|
|
124
|
+
"""
|
|
125
|
+
from multiprocessing import get_context
|
|
126
|
+
from pathlib import Path
|
|
127
|
+
|
|
128
|
+
from dvt.adapters import create_multi_adapter_manager
|
|
129
|
+
from dvt.compute.router import ExecutionRouter
|
|
130
|
+
from dvt.config.compute_config import load_compute_config
|
|
131
|
+
from dvt.config.profiles_v2 import load_unified_profiles
|
|
132
|
+
|
|
133
|
+
# Load unified profiles (sources + targets)
|
|
134
|
+
project_path = Path(self.config.project_root)
|
|
135
|
+
self.unified_profiles = load_unified_profiles(project_path)
|
|
136
|
+
|
|
137
|
+
# Create multi-adapter manager for handling different source connections
|
|
138
|
+
mp_context = get_context("spawn")
|
|
139
|
+
self.multi_adapter_manager = create_multi_adapter_manager(
|
|
140
|
+
self.unified_profiles, mp_context
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Load compute layer configuration (DuckDB, Spark settings)
|
|
144
|
+
self.compute_config = load_compute_config(project_path)
|
|
145
|
+
|
|
146
|
+
# Initialize execution router for query routing decisions
|
|
147
|
+
self.execution_router = ExecutionRouter(
|
|
148
|
+
compute_config=self.compute_config,
|
|
149
|
+
multi_adapter_manager=self.multi_adapter_manager,
|
|
150
|
+
manifest=self.manifest,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def index_offset(self, value: int) -> int:
|
|
154
|
+
return value
|
|
155
|
+
|
|
156
|
+
@property
|
|
157
|
+
def selection_arg(self):
|
|
158
|
+
return self.args.select
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def exclusion_arg(self):
|
|
162
|
+
return self.args.exclude
|
|
163
|
+
|
|
164
|
+
def get_selection_spec(self) -> SelectionSpec:
|
|
165
|
+
default_selector_name = self.config.get_default_selector_name()
|
|
166
|
+
spec: Union[SelectionSpec, bool]
|
|
167
|
+
if hasattr(self.args, "inline") and self.args.inline:
|
|
168
|
+
# We want an empty selection spec.
|
|
169
|
+
spec = parse_difference(None, None)
|
|
170
|
+
elif self.args.selector:
|
|
171
|
+
# use pre-defined selector (--selector)
|
|
172
|
+
spec = self.config.get_selector(self.args.selector)
|
|
173
|
+
elif not (self.selection_arg or self.exclusion_arg) and default_selector_name:
|
|
174
|
+
# use pre-defined selector (--selector) with default: true
|
|
175
|
+
fire_event(DefaultSelector(name=default_selector_name))
|
|
176
|
+
spec = self.config.get_selector(default_selector_name)
|
|
177
|
+
else:
|
|
178
|
+
# This is what's used with no default selector and no selection
|
|
179
|
+
# use --select and --exclude args
|
|
180
|
+
spec = parse_difference(self.selection_arg, self.exclusion_arg)
|
|
181
|
+
# mypy complains because the return values of get_selector and parse_difference
|
|
182
|
+
# are different
|
|
183
|
+
return spec # type: ignore
|
|
184
|
+
|
|
185
|
+
@abstractmethod
|
|
186
|
+
def get_node_selector(self) -> NodeSelector:
|
|
187
|
+
raise NotImplementedError(f"get_node_selector not implemented for task {type(self)}")
|
|
188
|
+
|
|
189
|
+
def defer_to_manifest(self):
|
|
190
|
+
deferred_manifest = self._get_deferred_manifest()
|
|
191
|
+
if deferred_manifest is None:
|
|
192
|
+
return
|
|
193
|
+
if self.manifest is None:
|
|
194
|
+
raise DbtInternalError(
|
|
195
|
+
"Expected to defer to manifest, but there is no runtime manifest to defer from!"
|
|
196
|
+
)
|
|
197
|
+
self.manifest.merge_from_artifact(other=deferred_manifest)
|
|
198
|
+
|
|
199
|
+
def get_graph_queue(self) -> GraphQueue:
|
|
200
|
+
selector = self.get_node_selector()
|
|
201
|
+
# Following uses self.selection_arg and self.exclusion_arg
|
|
202
|
+
spec = self.get_selection_spec()
|
|
203
|
+
|
|
204
|
+
preserve_edges = True
|
|
205
|
+
if self.get_run_mode() == GraphRunnableMode.Independent:
|
|
206
|
+
preserve_edges = False
|
|
207
|
+
|
|
208
|
+
return selector.get_graph_queue(spec, preserve_edges)
|
|
209
|
+
|
|
210
|
+
def get_run_mode(self) -> GraphRunnableMode:
|
|
211
|
+
return GraphRunnableMode.Topological
|
|
212
|
+
|
|
213
|
+
def _runtime_initialize(self):
|
|
214
|
+
self.compile_manifest()
|
|
215
|
+
if self.manifest is None or self.graph is None:
|
|
216
|
+
raise DbtInternalError("_runtime_initialize never loaded the graph!")
|
|
217
|
+
|
|
218
|
+
self.job_queue = self.get_graph_queue()
|
|
219
|
+
|
|
220
|
+
# we use this a couple of times. order does not matter.
|
|
221
|
+
self._flattened_nodes = []
|
|
222
|
+
for uid in self.job_queue.get_selected_nodes():
|
|
223
|
+
if uid in self.manifest.nodes:
|
|
224
|
+
self._flattened_nodes.append(self.manifest.nodes[uid])
|
|
225
|
+
elif uid in self.manifest.sources:
|
|
226
|
+
self._flattened_nodes.append(self.manifest.sources[uid])
|
|
227
|
+
elif uid in self.manifest.saved_queries:
|
|
228
|
+
self._flattened_nodes.append(self.manifest.saved_queries[uid])
|
|
229
|
+
elif uid in self.manifest.unit_tests:
|
|
230
|
+
self._flattened_nodes.append(self.manifest.unit_tests[uid])
|
|
231
|
+
elif uid in self.manifest.exposures:
|
|
232
|
+
self._flattened_nodes.append(self.manifest.exposures[uid])
|
|
233
|
+
elif uid in self.manifest.functions:
|
|
234
|
+
self._flattened_nodes.append(self.manifest.functions[uid])
|
|
235
|
+
else:
|
|
236
|
+
raise DbtInternalError(
|
|
237
|
+
f"Node selection returned {uid}, expected an exposure, a function, a node, a saved query, a source, or a unit test"
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
self.num_nodes = len([n for n in self._flattened_nodes if not n.is_ephemeral_model])
|
|
241
|
+
|
|
242
|
+
def raise_on_first_error(self) -> bool:
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
|
|
246
|
+
raise NotImplementedError("Not Implemented")
|
|
247
|
+
|
|
248
|
+
def result_path(self) -> str:
|
|
249
|
+
return os.path.join(self.config.project_target_path, RUN_RESULTS_FILE_NAME)
|
|
250
|
+
|
|
251
|
+
def get_runner(self, node) -> BaseRunner:
|
|
252
|
+
adapter = get_adapter(self.config)
|
|
253
|
+
run_count: int = 0
|
|
254
|
+
num_nodes: int = 0
|
|
255
|
+
|
|
256
|
+
if node.is_ephemeral_model:
|
|
257
|
+
run_count = 0
|
|
258
|
+
num_nodes = 0
|
|
259
|
+
else:
|
|
260
|
+
self.run_count += 1
|
|
261
|
+
run_count = self.run_count
|
|
262
|
+
num_nodes = self.num_nodes
|
|
263
|
+
|
|
264
|
+
cls = self.get_runner_type(node)
|
|
265
|
+
|
|
266
|
+
if cls is None:
|
|
267
|
+
raise DbtInternalError("Could not find runner type for node.")
|
|
268
|
+
|
|
269
|
+
return cls(self.config, adapter, node, run_count, num_nodes)
|
|
270
|
+
|
|
271
|
+
def call_runner(self, runner: BaseRunner) -> RunResult:
|
|
272
|
+
with log_contextvars(node_info=runner.node.node_info):
|
|
273
|
+
runner.node.update_event_status(
|
|
274
|
+
started_at=datetime.now(timezone.utc).replace(tzinfo=None).isoformat(),
|
|
275
|
+
node_status=RunningStatus.Started,
|
|
276
|
+
)
|
|
277
|
+
fire_event(
|
|
278
|
+
NodeStart(
|
|
279
|
+
node_info=runner.node.node_info,
|
|
280
|
+
)
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
result = None
|
|
284
|
+
thread_exception: Optional[Union[KeyboardInterrupt, SystemExit, Exception]] = None
|
|
285
|
+
try:
|
|
286
|
+
result = runner.run_with_hooks(self.manifest)
|
|
287
|
+
except (KeyboardInterrupt, SystemExit) as exe:
|
|
288
|
+
result = None
|
|
289
|
+
thread_exception = exe
|
|
290
|
+
raise
|
|
291
|
+
except Exception as e:
|
|
292
|
+
result = None
|
|
293
|
+
thread_exception = e
|
|
294
|
+
finally:
|
|
295
|
+
if result is not None:
|
|
296
|
+
fire_event(
|
|
297
|
+
NodeFinished(
|
|
298
|
+
node_info=runner.node.node_info,
|
|
299
|
+
run_result=result.to_msg_dict(),
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
else:
|
|
303
|
+
msg = f"Exception on worker thread. {thread_exception}"
|
|
304
|
+
|
|
305
|
+
fire_event(
|
|
306
|
+
GenericExceptionOnRun(
|
|
307
|
+
unique_id=runner.node.unique_id,
|
|
308
|
+
exc=str(thread_exception),
|
|
309
|
+
node_info=runner.node.node_info,
|
|
310
|
+
)
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
result = RunResult(
|
|
314
|
+
status=RunStatus.Error, # type: ignore
|
|
315
|
+
timing=[],
|
|
316
|
+
thread_id="",
|
|
317
|
+
execution_time=0.0,
|
|
318
|
+
adapter_response={},
|
|
319
|
+
message=msg,
|
|
320
|
+
failures=None,
|
|
321
|
+
batch_results=None,
|
|
322
|
+
node=runner.node,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# `_event_status` dict is only used for logging. Make sure
|
|
326
|
+
# it gets deleted when we're done with it
|
|
327
|
+
runner.node.clear_event_status()
|
|
328
|
+
|
|
329
|
+
fail_fast = get_flags().FAIL_FAST
|
|
330
|
+
|
|
331
|
+
if (
|
|
332
|
+
result.status in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.PartialSuccess)
|
|
333
|
+
and fail_fast
|
|
334
|
+
):
|
|
335
|
+
self._raise_next_tick = FailFastError(
|
|
336
|
+
msg="Failing early due to test failure or runtime error",
|
|
337
|
+
result=result,
|
|
338
|
+
node=getattr(result, "node", None),
|
|
339
|
+
)
|
|
340
|
+
elif result.status == NodeStatus.Error and self.raise_on_first_error():
|
|
341
|
+
# if we raise inside a thread, it'll just get silently swallowed.
|
|
342
|
+
# stash the error message we want here, and it will check the
|
|
343
|
+
# next 'tick' - should be soon since our thread is about to finish!
|
|
344
|
+
self._raise_next_tick = DbtRuntimeError(result.message)
|
|
345
|
+
|
|
346
|
+
return result
|
|
347
|
+
|
|
348
|
+
def _submit(self, pool, args, callback):
|
|
349
|
+
"""If the caller has passed the magic 'single-threaded' flag, call the
|
|
350
|
+
function directly instead of pool.apply_async. The single-threaded flag
|
|
351
|
+
is intended for gathering more useful performance information about
|
|
352
|
+
what happens beneath `call_runner`, since python's default profiling
|
|
353
|
+
tools ignore child threads.
|
|
354
|
+
|
|
355
|
+
This does still go through the callback path for result collection.
|
|
356
|
+
"""
|
|
357
|
+
if self.config.args.single_threaded:
|
|
358
|
+
callback(self.call_runner(*args))
|
|
359
|
+
else:
|
|
360
|
+
pool.apply_async(self.call_runner, args=args, callback=callback)
|
|
361
|
+
|
|
362
|
+
def _raise_set_error(self):
|
|
363
|
+
if self._raise_next_tick is not None:
|
|
364
|
+
raise self._raise_next_tick
|
|
365
|
+
|
|
366
|
+
def run_queue(self, pool):
|
|
367
|
+
"""Given a pool, submit jobs from the queue to the pool."""
|
|
368
|
+
if self.job_queue is None:
|
|
369
|
+
raise DbtInternalError("Got to run_queue with no job queue set")
|
|
370
|
+
|
|
371
|
+
def callback(result):
|
|
372
|
+
"""Note: mark_done, at a minimum, must happen here or dbt will
|
|
373
|
+
deadlock during ephemeral result error handling!
|
|
374
|
+
"""
|
|
375
|
+
self._handle_result(result)
|
|
376
|
+
|
|
377
|
+
if self.job_queue is None:
|
|
378
|
+
raise DbtInternalError("Got to run_queue callback with no job queue set")
|
|
379
|
+
self.job_queue.mark_done(result.node.unique_id)
|
|
380
|
+
|
|
381
|
+
while not self.job_queue.empty():
|
|
382
|
+
self.handle_job_queue(pool, callback)
|
|
383
|
+
|
|
384
|
+
# block on completion
|
|
385
|
+
if get_flags().FAIL_FAST:
|
|
386
|
+
# checkout for an errors after task completion in case of
|
|
387
|
+
# fast failure
|
|
388
|
+
while self.job_queue.wait_until_something_was_done():
|
|
389
|
+
self._raise_set_error()
|
|
390
|
+
else:
|
|
391
|
+
# wait until every task will be complete
|
|
392
|
+
self.job_queue.join()
|
|
393
|
+
|
|
394
|
+
# if an error got set during join(), raise it.
|
|
395
|
+
self._raise_set_error()
|
|
396
|
+
|
|
397
|
+
return
|
|
398
|
+
|
|
399
|
+
# The build command overrides this
|
|
400
|
+
def handle_job_queue(self, pool, callback):
|
|
401
|
+
node = self.job_queue.get()
|
|
402
|
+
self._raise_set_error()
|
|
403
|
+
runner = self.get_runner(node)
|
|
404
|
+
# we finally know what we're running! Make sure we haven't decided
|
|
405
|
+
# to skip it due to upstream failures
|
|
406
|
+
if runner.node.unique_id in self._skipped_children:
|
|
407
|
+
cause = self._skipped_children.pop(runner.node.unique_id)
|
|
408
|
+
runner.do_skip(cause=cause)
|
|
409
|
+
args = [runner]
|
|
410
|
+
self._submit(pool, args, callback)
|
|
411
|
+
|
|
412
|
+
def _handle_result(self, result: RunResult) -> None:
|
|
413
|
+
"""Mark the result as completed, insert the `CompileResultNode` into
|
|
414
|
+
the manifest, and mark any descendants (potentially with a 'cause' if
|
|
415
|
+
the result was an ephemeral model) as skipped.
|
|
416
|
+
"""
|
|
417
|
+
is_ephemeral = result.node.is_ephemeral_model
|
|
418
|
+
if not is_ephemeral:
|
|
419
|
+
self.node_results.append(result)
|
|
420
|
+
|
|
421
|
+
node = result.node
|
|
422
|
+
|
|
423
|
+
if self.manifest is None:
|
|
424
|
+
raise DbtInternalError("manifest was None in _handle_result")
|
|
425
|
+
|
|
426
|
+
# If result.status == NodeStatus.Error, plus Fail for build command
|
|
427
|
+
if result.status in self.MARK_DEPENDENT_ERRORS_STATUSES:
|
|
428
|
+
if is_ephemeral:
|
|
429
|
+
cause = result
|
|
430
|
+
else:
|
|
431
|
+
cause = None
|
|
432
|
+
self._mark_dependent_errors(node.unique_id, result, cause)
|
|
433
|
+
|
|
434
|
+
def _cancel_connections(self, pool):
|
|
435
|
+
"""Given a pool, cancel all adapter connections and wait until all
|
|
436
|
+
runners gentle terminates.
|
|
437
|
+
"""
|
|
438
|
+
pool.close()
|
|
439
|
+
pool.terminate()
|
|
440
|
+
|
|
441
|
+
adapter = get_adapter(self.config)
|
|
442
|
+
|
|
443
|
+
if not adapter.is_cancelable():
|
|
444
|
+
fire_event(QueryCancelationUnsupported(type=adapter.type()))
|
|
445
|
+
else:
|
|
446
|
+
with adapter.connection_named("master"):
|
|
447
|
+
for conn_name in adapter.cancel_open_connections():
|
|
448
|
+
if self.manifest is not None:
|
|
449
|
+
node = self.manifest.nodes.get(conn_name)
|
|
450
|
+
if node is not None and node.is_ephemeral_model:
|
|
451
|
+
continue
|
|
452
|
+
# if we don't have a manifest/don't have a node, print
|
|
453
|
+
# anyway.
|
|
454
|
+
fire_event(LogCancelLine(conn_name=conn_name))
|
|
455
|
+
|
|
456
|
+
pool.join()
|
|
457
|
+
|
|
458
|
+
def execute_nodes(self):
|
|
459
|
+
num_threads = self.config.threads
|
|
460
|
+
|
|
461
|
+
pool = DbtThreadPool(
|
|
462
|
+
num_threads, self._pool_thread_initializer, [get_invocation_context()]
|
|
463
|
+
)
|
|
464
|
+
try:
|
|
465
|
+
self.run_queue(pool)
|
|
466
|
+
except FailFastError as failure:
|
|
467
|
+
self._cancel_connections(pool)
|
|
468
|
+
|
|
469
|
+
executed_node_ids = {r.node.unique_id for r in self.node_results}
|
|
470
|
+
message = "Skipping due to fail_fast"
|
|
471
|
+
|
|
472
|
+
for node in self._flattened_nodes:
|
|
473
|
+
if node.unique_id not in executed_node_ids:
|
|
474
|
+
self.node_results.append(
|
|
475
|
+
mark_node_as_skipped(node, executed_node_ids, message)
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
print_run_result_error(failure.result)
|
|
479
|
+
# ensure information about all nodes is propagated to run results when failing fast
|
|
480
|
+
return self.node_results
|
|
481
|
+
except (KeyboardInterrupt, SystemExit):
|
|
482
|
+
run_result = self.get_result(
|
|
483
|
+
results=self.node_results,
|
|
484
|
+
elapsed_time=time.time() - self.started_at,
|
|
485
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
if self.args.write_json and hasattr(run_result, "write"):
|
|
489
|
+
run_result.write(self.result_path())
|
|
490
|
+
add_artifact_produced(self.result_path())
|
|
491
|
+
fire_event(
|
|
492
|
+
ArtifactWritten(
|
|
493
|
+
artifact_type=run_result.__class__.__name__,
|
|
494
|
+
artifact_path=self.result_path(),
|
|
495
|
+
)
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
self._cancel_connections(pool)
|
|
499
|
+
print_run_end_messages(self.node_results, keyboard_interrupt=True)
|
|
500
|
+
|
|
501
|
+
raise
|
|
502
|
+
|
|
503
|
+
pool.close()
|
|
504
|
+
pool.join()
|
|
505
|
+
|
|
506
|
+
return self.node_results
|
|
507
|
+
|
|
508
|
+
@staticmethod
|
|
509
|
+
def _pool_thread_initializer(invocation_context):
|
|
510
|
+
_INVOCATION_CONTEXT_VAR.set(invocation_context)
|
|
511
|
+
|
|
512
|
+
def _mark_dependent_errors(
|
|
513
|
+
self, node_id: str, result: RunResult, cause: Optional[RunResult]
|
|
514
|
+
) -> None:
|
|
515
|
+
if self.graph is None:
|
|
516
|
+
raise DbtInternalError("graph is None in _mark_dependent_errors")
|
|
517
|
+
fire_event(
|
|
518
|
+
MarkSkippedChildren(
|
|
519
|
+
unique_id=node_id,
|
|
520
|
+
status=result.status,
|
|
521
|
+
run_result=result.to_msg_dict(),
|
|
522
|
+
)
|
|
523
|
+
)
|
|
524
|
+
for dep_node_id in self.graph.get_dependent_nodes(UniqueId(node_id)):
|
|
525
|
+
self._skipped_children[dep_node_id] = cause
|
|
526
|
+
|
|
527
|
+
def populate_adapter_cache(
|
|
528
|
+
self, adapter, required_schemas: Optional[Set[BaseRelation]] = None
|
|
529
|
+
):
|
|
530
|
+
if not self.args.populate_cache:
|
|
531
|
+
return
|
|
532
|
+
|
|
533
|
+
if self.manifest is None:
|
|
534
|
+
raise DbtInternalError("manifest was None in populate_adapter_cache")
|
|
535
|
+
|
|
536
|
+
start_populate_cache = time.perf_counter()
|
|
537
|
+
# the cache only cares about executable nodes
|
|
538
|
+
cachable_nodes = [
|
|
539
|
+
node
|
|
540
|
+
for node in self.manifest.nodes.values()
|
|
541
|
+
if (node.is_relational and not node.is_ephemeral_model and not node.is_external_node)
|
|
542
|
+
]
|
|
543
|
+
|
|
544
|
+
if get_flags().CACHE_SELECTED_ONLY is True:
|
|
545
|
+
adapter.set_relations_cache(cachable_nodes, required_schemas=required_schemas)
|
|
546
|
+
else:
|
|
547
|
+
adapter.set_relations_cache(cachable_nodes)
|
|
548
|
+
cache_populate_time = time.perf_counter() - start_populate_cache
|
|
549
|
+
if dbt.tracking.active_user is not None:
|
|
550
|
+
dbt.tracking.track_runnable_timing(
|
|
551
|
+
{"adapter_cache_construction_elapsed": cache_populate_time}
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
|
|
555
|
+
with adapter.connection_named("master"):
|
|
556
|
+
self.defer_to_manifest()
|
|
557
|
+
self.populate_adapter_cache(adapter)
|
|
558
|
+
return RunStatus.Success
|
|
559
|
+
|
|
560
|
+
def after_run(self, adapter, results) -> None:
|
|
561
|
+
pass
|
|
562
|
+
|
|
563
|
+
def print_results_line(self, node_results, elapsed):
|
|
564
|
+
pass
|
|
565
|
+
|
|
566
|
+
def execute_with_hooks(self, selected_uids: AbstractSet[str]):
|
|
567
|
+
adapter = get_adapter(self.config)
|
|
568
|
+
|
|
569
|
+
fire_event(Formatting(""))
|
|
570
|
+
fire_event(
|
|
571
|
+
ConcurrencyLine(
|
|
572
|
+
num_threads=self.config.threads,
|
|
573
|
+
target_name=self.config.target_name,
|
|
574
|
+
node_count=self.num_nodes,
|
|
575
|
+
)
|
|
576
|
+
)
|
|
577
|
+
fire_event(Formatting(""))
|
|
578
|
+
|
|
579
|
+
self.started_at = time.time()
|
|
580
|
+
try:
|
|
581
|
+
before_run_status = self.before_run(adapter, selected_uids)
|
|
582
|
+
if before_run_status == RunStatus.Success or (
|
|
583
|
+
not get_flags().skip_nodes_if_on_run_start_fails
|
|
584
|
+
):
|
|
585
|
+
res = self.execute_nodes()
|
|
586
|
+
else:
|
|
587
|
+
executed_node_ids = {
|
|
588
|
+
r.node.unique_id for r in self.node_results if hasattr(r, "node")
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
res = []
|
|
592
|
+
|
|
593
|
+
for index, node in enumerate(self._flattened_nodes or []):
|
|
594
|
+
group = group_lookup.get(node.unique_id)
|
|
595
|
+
|
|
596
|
+
if node.unique_id not in executed_node_ids:
|
|
597
|
+
fire_event(
|
|
598
|
+
SkippingDetails(
|
|
599
|
+
resource_type=node.resource_type,
|
|
600
|
+
schema=node.schema,
|
|
601
|
+
node_name=node.name,
|
|
602
|
+
index=index + 1,
|
|
603
|
+
total=self.num_nodes,
|
|
604
|
+
node_info=node.node_info,
|
|
605
|
+
group=group,
|
|
606
|
+
)
|
|
607
|
+
)
|
|
608
|
+
skipped_node_result = mark_node_as_skipped(node, executed_node_ids, None)
|
|
609
|
+
if skipped_node_result:
|
|
610
|
+
self.node_results.append(skipped_node_result)
|
|
611
|
+
|
|
612
|
+
self.after_run(adapter, res)
|
|
613
|
+
finally:
|
|
614
|
+
adapter.cleanup_connections()
|
|
615
|
+
elapsed = time.time() - self.started_at
|
|
616
|
+
self.print_results_line(self.node_results, elapsed)
|
|
617
|
+
result = self.get_result(
|
|
618
|
+
results=self.node_results,
|
|
619
|
+
elapsed_time=elapsed,
|
|
620
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
return result
|
|
624
|
+
|
|
625
|
+
def run(self):
|
|
626
|
+
"""
|
|
627
|
+
Run dbt for the query, based on the graph.
|
|
628
|
+
"""
|
|
629
|
+
# We set up a context manager here with "task_contextvars" because we
|
|
630
|
+
# need the project_root in runtime_initialize.
|
|
631
|
+
with task_contextvars(project_root=self.config.project_root):
|
|
632
|
+
self._runtime_initialize()
|
|
633
|
+
|
|
634
|
+
if self._flattened_nodes is None:
|
|
635
|
+
raise DbtInternalError(
|
|
636
|
+
"after _runtime_initialize, _flattened_nodes was still None"
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
if len(self._flattened_nodes) == 0:
|
|
640
|
+
warn_or_error(NothingToDo())
|
|
641
|
+
result = self.get_result(
|
|
642
|
+
results=[],
|
|
643
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
644
|
+
elapsed_time=0.0,
|
|
645
|
+
)
|
|
646
|
+
else:
|
|
647
|
+
selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
|
|
648
|
+
result = self.execute_with_hooks(selected_uids)
|
|
649
|
+
|
|
650
|
+
# We have other result types here too, including FreshnessResult
|
|
651
|
+
if isinstance(result, RunExecutionResult):
|
|
652
|
+
result_msgs = [result.to_msg_dict() for result in result.results]
|
|
653
|
+
fire_event(
|
|
654
|
+
EndRunResult(
|
|
655
|
+
results=result_msgs,
|
|
656
|
+
generated_at=result.generated_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
657
|
+
elapsed_time=result.elapsed_time,
|
|
658
|
+
success=GraphRunnableTask.interpret_results(result.results),
|
|
659
|
+
)
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
if self.args.write_json:
|
|
663
|
+
write_manifest(self.manifest, self.config.project_target_path)
|
|
664
|
+
if hasattr(result, "write"):
|
|
665
|
+
result.write(self.result_path())
|
|
666
|
+
add_artifact_produced(self.result_path())
|
|
667
|
+
fire_event(
|
|
668
|
+
ArtifactWritten(
|
|
669
|
+
artifact_type=result.__class__.__name__, artifact_path=self.result_path()
|
|
670
|
+
)
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
self.task_end_messages(result.results)
|
|
674
|
+
return result
|
|
675
|
+
|
|
676
|
+
@classmethod
|
|
677
|
+
def interpret_results(cls, results):
|
|
678
|
+
if results is None:
|
|
679
|
+
return False
|
|
680
|
+
|
|
681
|
+
num_runtime_errors = len([r for r in results if r.status == NodeStatus.RuntimeErr])
|
|
682
|
+
num_errors = len([r for r in results if r.status == NodeStatus.Error])
|
|
683
|
+
num_fails = len([r for r in results if r.status == NodeStatus.Fail])
|
|
684
|
+
num_skipped = len(
|
|
685
|
+
[
|
|
686
|
+
r
|
|
687
|
+
for r in results
|
|
688
|
+
if r.status == NodeStatus.Skipped and not isinstance(r.node, Exposure)
|
|
689
|
+
]
|
|
690
|
+
)
|
|
691
|
+
num_partial_success = len([r for r in results if r.status == NodeStatus.PartialSuccess])
|
|
692
|
+
num_total = num_runtime_errors + num_errors + num_fails + num_skipped + num_partial_success
|
|
693
|
+
return num_total == 0
|
|
694
|
+
|
|
695
|
+
def get_model_schemas(self, adapter, selected_uids: Iterable[str]) -> Set[BaseRelation]:
|
|
696
|
+
if self.manifest is None:
|
|
697
|
+
raise DbtInternalError("manifest was None in get_model_schemas")
|
|
698
|
+
result: Set[BaseRelation] = set()
|
|
699
|
+
|
|
700
|
+
for node in self.manifest.nodes.values():
|
|
701
|
+
if node.unique_id not in selected_uids:
|
|
702
|
+
continue
|
|
703
|
+
if node.is_relational and not node.is_ephemeral:
|
|
704
|
+
relation = adapter.Relation.create_from(self.config, node)
|
|
705
|
+
result.add(relation.without_identifier())
|
|
706
|
+
|
|
707
|
+
return result
|
|
708
|
+
|
|
709
|
+
def create_schemas(self, adapter, required_schemas: Set[BaseRelation]):
|
|
710
|
+
# we want the string form of the information schema database
|
|
711
|
+
required_databases: Set[BaseRelation] = set()
|
|
712
|
+
for required in required_schemas:
|
|
713
|
+
db_only = required.include(database=True, schema=False, identifier=False)
|
|
714
|
+
required_databases.add(db_only)
|
|
715
|
+
|
|
716
|
+
existing_schemas_lowered: Set[Tuple[Optional[str], Optional[str]]]
|
|
717
|
+
existing_schemas_lowered = set()
|
|
718
|
+
|
|
719
|
+
def list_schemas(db_only: BaseRelation) -> List[Tuple[Optional[str], str]]:
|
|
720
|
+
# the database can be None on some warehouses that don't support it
|
|
721
|
+
database_quoted: Optional[str]
|
|
722
|
+
db_lowercase = dbt_common.utils.formatting.lowercase(db_only.database)
|
|
723
|
+
if db_only.database is None:
|
|
724
|
+
database_quoted = None
|
|
725
|
+
else:
|
|
726
|
+
database_quoted = str(db_only)
|
|
727
|
+
|
|
728
|
+
# we should never create a null schema, so just filter them out
|
|
729
|
+
return [
|
|
730
|
+
(db_lowercase, s.lower())
|
|
731
|
+
for s in adapter.list_schemas(database_quoted)
|
|
732
|
+
if s is not None
|
|
733
|
+
]
|
|
734
|
+
|
|
735
|
+
def create_schema(relation: BaseRelation) -> None:
|
|
736
|
+
db = relation.database or ""
|
|
737
|
+
schema = relation.schema
|
|
738
|
+
with adapter.connection_named(f"create_{db}_{schema}"):
|
|
739
|
+
adapter.create_schema(relation)
|
|
740
|
+
|
|
741
|
+
list_futures = []
|
|
742
|
+
create_futures = []
|
|
743
|
+
|
|
744
|
+
# TODO: following has a mypy issue because profile and project config
|
|
745
|
+
# defines threads as int and HasThreadingConfig defines it as Optional[int]
|
|
746
|
+
with dbt_common.utils.executor(self.config) as tpe: # type: ignore
|
|
747
|
+
for req in required_databases:
|
|
748
|
+
if req.database is None:
|
|
749
|
+
name = "list_schemas"
|
|
750
|
+
else:
|
|
751
|
+
name = f"list_{req.database}"
|
|
752
|
+
fut = tpe.submit_connected(adapter, name, list_schemas, req)
|
|
753
|
+
list_futures.append(fut)
|
|
754
|
+
|
|
755
|
+
for ls_future in as_completed(list_futures):
|
|
756
|
+
existing_schemas_lowered.update(ls_future.result())
|
|
757
|
+
|
|
758
|
+
for info in required_schemas:
|
|
759
|
+
if info.schema is None:
|
|
760
|
+
# we are not in the business of creating null schemas, so
|
|
761
|
+
# skip this
|
|
762
|
+
continue
|
|
763
|
+
db: Optional[str] = info.database
|
|
764
|
+
db_lower: Optional[str] = dbt_common.utils.formatting.lowercase(db)
|
|
765
|
+
schema: str = info.schema
|
|
766
|
+
|
|
767
|
+
db_schema = (db_lower, schema.lower())
|
|
768
|
+
if db_schema not in existing_schemas_lowered:
|
|
769
|
+
existing_schemas_lowered.add(db_schema)
|
|
770
|
+
fut = tpe.submit_connected(
|
|
771
|
+
adapter, f'create_{info.database or ""}_{info.schema}', create_schema, info
|
|
772
|
+
)
|
|
773
|
+
create_futures.append(fut)
|
|
774
|
+
|
|
775
|
+
for create_future in as_completed(create_futures):
|
|
776
|
+
# trigger/re-raise any exceptions while creating schemas
|
|
777
|
+
create_future.result()
|
|
778
|
+
|
|
779
|
+
def get_result(self, results, elapsed_time, generated_at):
|
|
780
|
+
return RunExecutionResult(
|
|
781
|
+
results=results,
|
|
782
|
+
elapsed_time=elapsed_time,
|
|
783
|
+
generated_at=generated_at,
|
|
784
|
+
args=dbt.utils.args_to_dict(self.args),
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
def task_end_messages(self, results) -> None:
|
|
788
|
+
print_run_end_messages(results)
|
|
789
|
+
|
|
790
|
+
def _get_previous_state(self) -> Optional[Manifest]:
|
|
791
|
+
state = self.previous_defer_state or self.previous_state
|
|
792
|
+
if not state:
|
|
793
|
+
raise DbtRuntimeError(
|
|
794
|
+
"--state or --defer-state are required for deferral, but neither was provided"
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
if not state.manifest:
|
|
798
|
+
raise DbtRuntimeError(f'Could not find manifest in --state path: "{state.state_path}"')
|
|
799
|
+
return state.manifest
|
|
800
|
+
|
|
801
|
+
def _get_deferred_manifest(self) -> Optional[Manifest]:
|
|
802
|
+
return self._get_previous_state() if self.args.defer else None
|