dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/task/run.py
ADDED
|
@@ -0,0 +1,1146 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import functools
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from dataclasses import asdict
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from typing import AbstractSet, Any, Dict, Iterable, List, Optional, Set, Tuple, Type
|
|
10
|
+
|
|
11
|
+
from dvt import tracking, utils
|
|
12
|
+
from dvt.artifacts.resources import Hook
|
|
13
|
+
from dvt.artifacts.schemas.batch_results import BatchResults, BatchType
|
|
14
|
+
from dvt.artifacts.schemas.results import (
|
|
15
|
+
NodeStatus,
|
|
16
|
+
RunningStatus,
|
|
17
|
+
RunStatus,
|
|
18
|
+
TimingInfo,
|
|
19
|
+
collect_timing_info,
|
|
20
|
+
)
|
|
21
|
+
from dvt.artifacts.schemas.run import RunResult
|
|
22
|
+
from dvt.cli.flags import Flags
|
|
23
|
+
from dvt.clients.jinja import MacroGenerator
|
|
24
|
+
from dvt.config import RuntimeConfig
|
|
25
|
+
from dvt.context.providers import generate_runtime_model_context
|
|
26
|
+
from dvt.contracts.graph.manifest import Manifest
|
|
27
|
+
from dvt.contracts.graph.nodes import BatchContext, HookNode, ModelNode, ResultNode
|
|
28
|
+
from dvt.events.types import (
|
|
29
|
+
GenericExceptionOnRun,
|
|
30
|
+
LogBatchResult,
|
|
31
|
+
LogHookEndLine,
|
|
32
|
+
LogHookStartLine,
|
|
33
|
+
LogModelResult,
|
|
34
|
+
LogStartBatch,
|
|
35
|
+
LogStartLine,
|
|
36
|
+
MicrobatchExecutionDebug,
|
|
37
|
+
)
|
|
38
|
+
from dvt.exceptions import CompilationError, DbtInternalError, DbtRuntimeError
|
|
39
|
+
from dvt.graph import ResourceTypeSelector
|
|
40
|
+
from dvt.graph.thread_pool import DbtThreadPool
|
|
41
|
+
from dvt.hooks import get_hook_dict
|
|
42
|
+
from dvt.materializations.incremental.microbatch import MicrobatchBuilder
|
|
43
|
+
from dvt.node_types import NodeType, RunHookType
|
|
44
|
+
from dvt.task import group_lookup
|
|
45
|
+
from dvt.task.base import BaseRunner
|
|
46
|
+
from dvt.task.compile import CompileRunner, CompileTask
|
|
47
|
+
from dvt.task.printer import get_counts, print_run_end_messages
|
|
48
|
+
from dvt.utils.artifact_upload import add_artifact_produced
|
|
49
|
+
|
|
50
|
+
from dbt.adapters.base import BaseAdapter, BaseRelation
|
|
51
|
+
from dbt.adapters.capability import Capability
|
|
52
|
+
from dbt.adapters.events.types import FinishedRunningStats
|
|
53
|
+
from dbt.adapters.exceptions import MissingMaterializationError
|
|
54
|
+
from dbt_common.clients.jinja import MacroProtocol
|
|
55
|
+
from dbt_common.dataclass_schema import dbtClassMixin
|
|
56
|
+
from dbt_common.events.base_types import EventLevel
|
|
57
|
+
from dbt_common.events.contextvars import log_contextvars
|
|
58
|
+
from dbt_common.events.functions import fire_event, get_invocation_id
|
|
59
|
+
from dbt_common.events.types import Formatting
|
|
60
|
+
from dbt_common.exceptions import DbtValidationError
|
|
61
|
+
from dbt_common.invocation import get_invocation_started_at
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@functools.total_ordering
|
|
65
|
+
class BiggestName(str):
|
|
66
|
+
def __lt__(self, other):
|
|
67
|
+
return True
|
|
68
|
+
|
|
69
|
+
def __eq__(self, other):
|
|
70
|
+
return isinstance(other, self.__class__)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _hook_list() -> List[HookNode]:
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_hooks_by_tags(
|
|
78
|
+
nodes: Iterable[ResultNode],
|
|
79
|
+
match_tags: Set[str],
|
|
80
|
+
) -> List[HookNode]:
|
|
81
|
+
matched_nodes = []
|
|
82
|
+
for node in nodes:
|
|
83
|
+
if not isinstance(node, HookNode):
|
|
84
|
+
continue
|
|
85
|
+
node_tags = node.tags
|
|
86
|
+
if len(set(node_tags) & match_tags):
|
|
87
|
+
matched_nodes.append(node)
|
|
88
|
+
return matched_nodes
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_hook(source, index):
|
|
92
|
+
hook_dict = get_hook_dict(source)
|
|
93
|
+
hook_dict.setdefault("index", index)
|
|
94
|
+
Hook.validate(hook_dict)
|
|
95
|
+
return Hook.from_dict(hook_dict)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def get_execution_status(sql: str, adapter: BaseAdapter) -> Tuple[RunStatus, str]:
|
|
99
|
+
if not sql.strip():
|
|
100
|
+
return RunStatus.Success, "OK"
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
response, _ = adapter.execute(sql, auto_begin=False, fetch=False)
|
|
104
|
+
status = RunStatus.Success
|
|
105
|
+
message = response._message
|
|
106
|
+
except (KeyboardInterrupt, SystemExit):
|
|
107
|
+
raise
|
|
108
|
+
except DbtRuntimeError as exc:
|
|
109
|
+
status = RunStatus.Error
|
|
110
|
+
message = exc.msg
|
|
111
|
+
except Exception as exc:
|
|
112
|
+
status = RunStatus.Error
|
|
113
|
+
message = str(exc)
|
|
114
|
+
|
|
115
|
+
return (status, message)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _get_adapter_info(adapter, run_model_result) -> Dict[str, Any]:
|
|
119
|
+
"""Each adapter returns a dataclass with a flexible dictionary for
|
|
120
|
+
adapter-specific fields. Only the non-'model_adapter_details' fields
|
|
121
|
+
are guaranteed cross adapter."""
|
|
122
|
+
return asdict(adapter.get_adapter_run_info(run_model_result.node.config)) if adapter else {}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def track_model_run(index, num_nodes, run_model_result, adapter=None):
|
|
126
|
+
if tracking.active_user is None:
|
|
127
|
+
raise DbtInternalError("cannot track model run with no active user")
|
|
128
|
+
invocation_id = get_invocation_id()
|
|
129
|
+
node = run_model_result.node
|
|
130
|
+
has_group = True if hasattr(node, "group") and node.group else False
|
|
131
|
+
if node.resource_type == NodeType.Model:
|
|
132
|
+
access = node.access.value if node.access is not None else None
|
|
133
|
+
contract_enforced = node.contract.enforced
|
|
134
|
+
versioned = True if node.version else False
|
|
135
|
+
incremental_strategy = node.config.incremental_strategy
|
|
136
|
+
else:
|
|
137
|
+
access = None
|
|
138
|
+
contract_enforced = False
|
|
139
|
+
versioned = False
|
|
140
|
+
incremental_strategy = None
|
|
141
|
+
|
|
142
|
+
tracking.track_model_run(
|
|
143
|
+
{
|
|
144
|
+
"invocation_id": invocation_id,
|
|
145
|
+
"index": index,
|
|
146
|
+
"total": num_nodes,
|
|
147
|
+
"execution_time": run_model_result.execution_time,
|
|
148
|
+
"run_status": str(run_model_result.status).upper(),
|
|
149
|
+
"run_skipped": run_model_result.status == NodeStatus.Skipped,
|
|
150
|
+
"run_error": run_model_result.status == NodeStatus.Error,
|
|
151
|
+
"model_materialization": node.get_materialization(),
|
|
152
|
+
"model_incremental_strategy": incremental_strategy,
|
|
153
|
+
"model_id": utils.get_hash(node),
|
|
154
|
+
"hashed_contents": utils.get_hashed_contents(node),
|
|
155
|
+
"timing": [t.to_dict(omit_none=True) for t in run_model_result.timing],
|
|
156
|
+
"language": str(node.language),
|
|
157
|
+
"has_group": has_group,
|
|
158
|
+
"contract_enforced": contract_enforced,
|
|
159
|
+
"access": access,
|
|
160
|
+
"versioned": versioned,
|
|
161
|
+
"adapter_info": _get_adapter_info(adapter, run_model_result),
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# make sure that we got an ok result back from a materialization
|
|
167
|
+
def _validate_materialization_relations_dict(inp: Dict[Any, Any], model) -> List[BaseRelation]:
|
|
168
|
+
try:
|
|
169
|
+
relations_value = inp["relations"]
|
|
170
|
+
except KeyError:
|
|
171
|
+
msg = (
|
|
172
|
+
'Invalid return value from materialization, "relations" '
|
|
173
|
+
"not found, got keys: {}".format(list(inp))
|
|
174
|
+
)
|
|
175
|
+
raise CompilationError(msg, node=model) from None
|
|
176
|
+
|
|
177
|
+
if not isinstance(relations_value, list):
|
|
178
|
+
msg = (
|
|
179
|
+
'Invalid return value from materialization, "relations" '
|
|
180
|
+
"not a list, got: {}".format(relations_value)
|
|
181
|
+
)
|
|
182
|
+
raise CompilationError(msg, node=model) from None
|
|
183
|
+
|
|
184
|
+
relations: List[BaseRelation] = []
|
|
185
|
+
for relation in relations_value:
|
|
186
|
+
if not isinstance(relation, BaseRelation):
|
|
187
|
+
msg = (
|
|
188
|
+
"Invalid return value from materialization, "
|
|
189
|
+
'"relations" contains non-Relation: {}'.format(relation)
|
|
190
|
+
)
|
|
191
|
+
raise CompilationError(msg, node=model)
|
|
192
|
+
|
|
193
|
+
assert isinstance(relation, BaseRelation)
|
|
194
|
+
relations.append(relation)
|
|
195
|
+
return relations
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class ModelRunner(CompileRunner):
|
|
199
|
+
def get_node_representation(self):
|
|
200
|
+
display_quote_policy = {"database": False, "schema": False, "identifier": False}
|
|
201
|
+
relation = self.adapter.Relation.create_from(
|
|
202
|
+
self.config, self.node, quote_policy=display_quote_policy
|
|
203
|
+
)
|
|
204
|
+
# exclude the database from output if it's the default
|
|
205
|
+
if self.node.database == self.config.credentials.database:
|
|
206
|
+
relation = relation.include(database=False)
|
|
207
|
+
return str(relation)
|
|
208
|
+
|
|
209
|
+
def describe_node(self) -> str:
|
|
210
|
+
# TODO CL 'language' will be moved to node level when we change representation
|
|
211
|
+
return f"{self.node.language} {self.node.get_materialization()} model {self.get_node_representation()}"
|
|
212
|
+
|
|
213
|
+
def print_start_line(self):
|
|
214
|
+
fire_event(
|
|
215
|
+
LogStartLine(
|
|
216
|
+
description=self.describe_node(),
|
|
217
|
+
index=self.node_index,
|
|
218
|
+
total=self.num_nodes,
|
|
219
|
+
node_info=self.node.node_info,
|
|
220
|
+
)
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def print_result_line(self, result):
|
|
224
|
+
description = self.describe_node()
|
|
225
|
+
group = group_lookup.get(self.node.unique_id)
|
|
226
|
+
if result.status == NodeStatus.Error:
|
|
227
|
+
status = result.status
|
|
228
|
+
level = EventLevel.ERROR
|
|
229
|
+
else:
|
|
230
|
+
status = result.message
|
|
231
|
+
level = EventLevel.INFO
|
|
232
|
+
fire_event(
|
|
233
|
+
LogModelResult(
|
|
234
|
+
description=description,
|
|
235
|
+
status=status,
|
|
236
|
+
index=self.node_index,
|
|
237
|
+
total=self.num_nodes,
|
|
238
|
+
execution_time=result.execution_time,
|
|
239
|
+
node_info=self.node.node_info,
|
|
240
|
+
group=group,
|
|
241
|
+
),
|
|
242
|
+
level=level,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
def before_execute(self) -> None:
|
|
246
|
+
self.print_start_line()
|
|
247
|
+
|
|
248
|
+
def after_execute(self, result) -> None:
|
|
249
|
+
track_model_run(self.node_index, self.num_nodes, result, adapter=self.adapter)
|
|
250
|
+
self.print_result_line(result)
|
|
251
|
+
|
|
252
|
+
def _build_run_model_result(self, model, context, elapsed_time: float = 0.0):
|
|
253
|
+
result = context["load_result"]("main")
|
|
254
|
+
if not result:
|
|
255
|
+
raise DbtRuntimeError("main is not being called during running model")
|
|
256
|
+
adapter_response = {}
|
|
257
|
+
if isinstance(result.response, dbtClassMixin):
|
|
258
|
+
adapter_response = result.response.to_dict(omit_none=True)
|
|
259
|
+
return RunResult(
|
|
260
|
+
node=model,
|
|
261
|
+
status=RunStatus.Success,
|
|
262
|
+
timing=[],
|
|
263
|
+
thread_id=threading.current_thread().name,
|
|
264
|
+
execution_time=elapsed_time,
|
|
265
|
+
message=str(result.response),
|
|
266
|
+
adapter_response=adapter_response,
|
|
267
|
+
failures=result.get("failures"),
|
|
268
|
+
batch_results=None,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def _materialization_relations(self, result: Any, model) -> List[BaseRelation]:
|
|
272
|
+
if isinstance(result, str):
|
|
273
|
+
msg = (
|
|
274
|
+
'The materialization ("{}") did not explicitly return a '
|
|
275
|
+
"list of relations to add to the cache.".format(str(model.get_materialization()))
|
|
276
|
+
)
|
|
277
|
+
raise CompilationError(msg, node=model)
|
|
278
|
+
|
|
279
|
+
if isinstance(result, dict):
|
|
280
|
+
return _validate_materialization_relations_dict(result, model)
|
|
281
|
+
|
|
282
|
+
msg = (
|
|
283
|
+
"Invalid return value from materialization, expected a dict "
|
|
284
|
+
'with key "relations", got: {}'.format(str(result))
|
|
285
|
+
)
|
|
286
|
+
raise CompilationError(msg, node=model)
|
|
287
|
+
|
|
288
|
+
def _execute_model(
|
|
289
|
+
self,
|
|
290
|
+
hook_ctx: Any,
|
|
291
|
+
context_config: Any,
|
|
292
|
+
model: ModelNode,
|
|
293
|
+
context: Dict[str, Any],
|
|
294
|
+
materialization_macro: MacroProtocol,
|
|
295
|
+
) -> RunResult:
|
|
296
|
+
try:
|
|
297
|
+
result = MacroGenerator(
|
|
298
|
+
materialization_macro, context, stack=context["context_macro_stack"]
|
|
299
|
+
)()
|
|
300
|
+
finally:
|
|
301
|
+
self.adapter.post_model_hook(context_config, hook_ctx)
|
|
302
|
+
|
|
303
|
+
for relation in self._materialization_relations(result, model):
|
|
304
|
+
self.adapter.cache_added(relation.incorporate(dbt_created=True))
|
|
305
|
+
|
|
306
|
+
return self._build_run_model_result(model, context)
|
|
307
|
+
|
|
308
|
+
def execute(self, model, manifest):
|
|
309
|
+
context = generate_runtime_model_context(model, self.config, manifest)
|
|
310
|
+
|
|
311
|
+
materialization_macro = manifest.find_materialization_macro_by_name(
|
|
312
|
+
self.config.project_name, model.get_materialization(), self.adapter.type()
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
if materialization_macro is None:
|
|
316
|
+
raise MissingMaterializationError(
|
|
317
|
+
materialization=model.get_materialization(), adapter_type=self.adapter.type()
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
if "config" not in context:
|
|
321
|
+
raise DbtInternalError(
|
|
322
|
+
"Invalid materialization context generated, missing config: {}".format(context)
|
|
323
|
+
)
|
|
324
|
+
context_config = context["config"]
|
|
325
|
+
|
|
326
|
+
mat_has_supported_langs = hasattr(materialization_macro, "supported_languages")
|
|
327
|
+
model_lang_supported = model.language in materialization_macro.supported_languages
|
|
328
|
+
if mat_has_supported_langs and not model_lang_supported:
|
|
329
|
+
str_langs = [str(lang) for lang in materialization_macro.supported_languages]
|
|
330
|
+
raise DbtValidationError(
|
|
331
|
+
f'Materialization "{materialization_macro.name}" only supports languages {str_langs}; '
|
|
332
|
+
f'got "{model.language}"'
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
hook_ctx = self.adapter.pre_model_hook(context_config)
|
|
336
|
+
|
|
337
|
+
return self._execute_model(hook_ctx, context_config, model, context, materialization_macro)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class MicrobatchBatchRunner(ModelRunner):
|
|
341
|
+
"""Handles the running of individual batches"""
|
|
342
|
+
|
|
343
|
+
def __init__(
|
|
344
|
+
self,
|
|
345
|
+
config,
|
|
346
|
+
adapter,
|
|
347
|
+
node,
|
|
348
|
+
node_index: int,
|
|
349
|
+
num_nodes: int,
|
|
350
|
+
batch_idx: int,
|
|
351
|
+
batches: Dict[int, BatchType],
|
|
352
|
+
relation_exists: bool,
|
|
353
|
+
incremental_batch: bool,
|
|
354
|
+
):
|
|
355
|
+
super().__init__(config, adapter, node, node_index, num_nodes)
|
|
356
|
+
|
|
357
|
+
self.batch_idx = batch_idx
|
|
358
|
+
self.batches = batches
|
|
359
|
+
self.relation_exists = relation_exists
|
|
360
|
+
self.incremental_batch = incremental_batch
|
|
361
|
+
|
|
362
|
+
def describe_batch(self) -> str:
|
|
363
|
+
batch_start = self.batches[self.batch_idx][0]
|
|
364
|
+
formatted_batch_start = MicrobatchBuilder.format_batch_start(
|
|
365
|
+
batch_start, self.node.config.batch_size
|
|
366
|
+
)
|
|
367
|
+
return f"batch {formatted_batch_start} of {self.get_node_representation()}"
|
|
368
|
+
|
|
369
|
+
def print_result_line(self, result: RunResult):
|
|
370
|
+
if result.status == NodeStatus.Error:
|
|
371
|
+
status = result.status
|
|
372
|
+
level = EventLevel.ERROR
|
|
373
|
+
elif result.status == NodeStatus.Skipped:
|
|
374
|
+
status = result.status
|
|
375
|
+
level = EventLevel.INFO
|
|
376
|
+
else:
|
|
377
|
+
status = result.message
|
|
378
|
+
level = EventLevel.INFO
|
|
379
|
+
|
|
380
|
+
fire_event(
|
|
381
|
+
LogBatchResult(
|
|
382
|
+
description=self.describe_batch(),
|
|
383
|
+
status=status,
|
|
384
|
+
batch_index=self.batch_idx + 1,
|
|
385
|
+
total_batches=len(self.batches),
|
|
386
|
+
execution_time=result.execution_time,
|
|
387
|
+
node_info=self.node.node_info,
|
|
388
|
+
group=group_lookup.get(self.node.unique_id),
|
|
389
|
+
),
|
|
390
|
+
level=level,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
def print_start_line(self) -> None:
|
|
394
|
+
fire_event(
|
|
395
|
+
LogStartBatch(
|
|
396
|
+
description=self.describe_batch(),
|
|
397
|
+
batch_index=self.batch_idx + 1,
|
|
398
|
+
total_batches=len(self.batches),
|
|
399
|
+
node_info=self.node.node_info,
|
|
400
|
+
)
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
def should_run_in_parallel(self) -> bool:
|
|
404
|
+
if not self.adapter.supports(Capability.MicrobatchConcurrency):
|
|
405
|
+
run_in_parallel = False
|
|
406
|
+
elif not self.relation_exists:
|
|
407
|
+
# If the relation doesn't exist, we can't run in parallel
|
|
408
|
+
run_in_parallel = False
|
|
409
|
+
elif self.node.config.concurrent_batches is not None:
|
|
410
|
+
# If the relation exists and the `concurrent_batches` config isn't None, use the config value
|
|
411
|
+
run_in_parallel = self.node.config.concurrent_batches
|
|
412
|
+
else:
|
|
413
|
+
# If the relation exists, the `concurrent_batches` config is None, check if the model self references `this`.
|
|
414
|
+
# If the model self references `this` then we assume the model batches _can't_ be run in parallel
|
|
415
|
+
run_in_parallel = not self.node.has_this
|
|
416
|
+
|
|
417
|
+
return run_in_parallel
|
|
418
|
+
|
|
419
|
+
def on_skip(self):
|
|
420
|
+
result = RunResult(
|
|
421
|
+
node=self.node,
|
|
422
|
+
status=RunStatus.Skipped,
|
|
423
|
+
timing=[],
|
|
424
|
+
thread_id=threading.current_thread().name,
|
|
425
|
+
execution_time=0.0,
|
|
426
|
+
message="SKIPPED",
|
|
427
|
+
adapter_response={},
|
|
428
|
+
failures=1,
|
|
429
|
+
batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
|
|
430
|
+
)
|
|
431
|
+
self.print_result_line(result=result)
|
|
432
|
+
return result
|
|
433
|
+
|
|
434
|
+
def error_result(self, node, message, start_time, timing_info):
|
|
435
|
+
"""Necessary to return a result with a batch result
|
|
436
|
+
|
|
437
|
+
Called by `BaseRunner.safe_run` when an error occurs
|
|
438
|
+
"""
|
|
439
|
+
return self._build_run_result(
|
|
440
|
+
node=node,
|
|
441
|
+
start_time=start_time,
|
|
442
|
+
status=RunStatus.Error,
|
|
443
|
+
timing_info=timing_info,
|
|
444
|
+
message=message,
|
|
445
|
+
batch_results=BatchResults(failed=[self.batches[self.batch_idx]]),
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
def compile(self, manifest: Manifest):
|
|
449
|
+
batch = self.batches[self.batch_idx]
|
|
450
|
+
|
|
451
|
+
# LEGACY: Set start/end in context prior to re-compiling (Will be removed for 1.10+)
|
|
452
|
+
# TODO: REMOVE before 1.10 GA
|
|
453
|
+
self.node.config["__dbt_internal_microbatch_event_time_start"] = batch[0]
|
|
454
|
+
self.node.config["__dbt_internal_microbatch_event_time_end"] = batch[1]
|
|
455
|
+
# Create batch context on model node prior to re-compiling
|
|
456
|
+
self.node.batch = BatchContext(
|
|
457
|
+
id=MicrobatchBuilder.batch_id(batch[0], self.node.config.batch_size),
|
|
458
|
+
event_time_start=batch[0],
|
|
459
|
+
event_time_end=batch[1],
|
|
460
|
+
)
|
|
461
|
+
# Recompile node to re-resolve refs with event time filters rendered, update context
|
|
462
|
+
self.compiler.compile_node(
|
|
463
|
+
self.node,
|
|
464
|
+
manifest,
|
|
465
|
+
{},
|
|
466
|
+
split_suffix=MicrobatchBuilder.format_batch_start(
|
|
467
|
+
batch[0], self.node.config.batch_size
|
|
468
|
+
),
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
return self.node
|
|
472
|
+
|
|
473
|
+
def _build_succesful_run_batch_result(
|
|
474
|
+
self,
|
|
475
|
+
model: ModelNode,
|
|
476
|
+
context: Dict[str, Any],
|
|
477
|
+
batch: BatchType,
|
|
478
|
+
elapsed_time: float = 0.0,
|
|
479
|
+
) -> RunResult:
|
|
480
|
+
run_result = self._build_run_model_result(model, context, elapsed_time)
|
|
481
|
+
run_result.batch_results = BatchResults(successful=[batch])
|
|
482
|
+
return run_result
|
|
483
|
+
|
|
484
|
+
def _build_failed_run_batch_result(
|
|
485
|
+
self,
|
|
486
|
+
model: ModelNode,
|
|
487
|
+
batch: BatchType,
|
|
488
|
+
elapsed_time: float = 0.0,
|
|
489
|
+
) -> RunResult:
|
|
490
|
+
return RunResult(
|
|
491
|
+
node=model,
|
|
492
|
+
status=RunStatus.Error,
|
|
493
|
+
timing=[],
|
|
494
|
+
thread_id=threading.current_thread().name,
|
|
495
|
+
execution_time=elapsed_time,
|
|
496
|
+
message="ERROR",
|
|
497
|
+
adapter_response={},
|
|
498
|
+
failures=1,
|
|
499
|
+
batch_results=BatchResults(failed=[batch]),
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
def _execute_microbatch_materialization(
|
|
503
|
+
self,
|
|
504
|
+
model: ModelNode,
|
|
505
|
+
context: Dict[str, Any],
|
|
506
|
+
materialization_macro: MacroProtocol,
|
|
507
|
+
) -> RunResult:
|
|
508
|
+
|
|
509
|
+
batch = self.batches[self.batch_idx]
|
|
510
|
+
# call materialization_macro to get a batch-level run result
|
|
511
|
+
start_time = time.perf_counter()
|
|
512
|
+
try:
|
|
513
|
+
# Update jinja context with batch context members
|
|
514
|
+
jinja_context = MicrobatchBuilder.build_jinja_context_for_batch(
|
|
515
|
+
model=model,
|
|
516
|
+
incremental_batch=self.incremental_batch,
|
|
517
|
+
)
|
|
518
|
+
context.update(jinja_context)
|
|
519
|
+
|
|
520
|
+
# Materialize batch and cache any materialized relations
|
|
521
|
+
result = MacroGenerator(
|
|
522
|
+
materialization_macro, context, stack=context["context_macro_stack"]
|
|
523
|
+
)()
|
|
524
|
+
for relation in self._materialization_relations(result, model):
|
|
525
|
+
self.adapter.cache_added(relation.incorporate(dbt_created=True))
|
|
526
|
+
|
|
527
|
+
# Build result of executed batch
|
|
528
|
+
batch_run_result = self._build_succesful_run_batch_result(
|
|
529
|
+
model, context, batch, time.perf_counter() - start_time
|
|
530
|
+
)
|
|
531
|
+
batch_result = batch_run_result
|
|
532
|
+
|
|
533
|
+
# At least one batch has been inserted successfully!
|
|
534
|
+
# Can proceed incrementally + in parallel
|
|
535
|
+
self.relation_exists = True
|
|
536
|
+
|
|
537
|
+
except (KeyboardInterrupt, SystemExit):
|
|
538
|
+
# reraise it for GraphRunnableTask.execute_nodes to handle
|
|
539
|
+
raise
|
|
540
|
+
except Exception as e:
|
|
541
|
+
fire_event(
|
|
542
|
+
GenericExceptionOnRun(
|
|
543
|
+
unique_id=self.node.unique_id,
|
|
544
|
+
exc=f"Exception on worker thread. {str(e)}",
|
|
545
|
+
node_info=self.node.node_info,
|
|
546
|
+
)
|
|
547
|
+
)
|
|
548
|
+
batch_run_result = self._build_failed_run_batch_result(
|
|
549
|
+
model, batch, time.perf_counter() - start_time
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
batch_result = batch_run_result
|
|
553
|
+
|
|
554
|
+
return batch_result
|
|
555
|
+
|
|
556
|
+
def _execute_model(
|
|
557
|
+
self,
|
|
558
|
+
hook_ctx: Any,
|
|
559
|
+
context_config: Any,
|
|
560
|
+
model: ModelNode,
|
|
561
|
+
context: Dict[str, Any],
|
|
562
|
+
materialization_macro: MacroProtocol,
|
|
563
|
+
) -> RunResult:
|
|
564
|
+
try:
|
|
565
|
+
batch_result = self._execute_microbatch_materialization(
|
|
566
|
+
model, context, materialization_macro
|
|
567
|
+
)
|
|
568
|
+
finally:
|
|
569
|
+
self.adapter.post_model_hook(context_config, hook_ctx)
|
|
570
|
+
|
|
571
|
+
return batch_result
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
class MicrobatchModelRunner(ModelRunner):
|
|
575
|
+
"""Handles the orchestration of batches to run for a given microbatch model"""
|
|
576
|
+
|
|
577
|
+
def __init__(self, config, adapter, node, node_index: int, num_nodes: int):
|
|
578
|
+
super().__init__(config, adapter, node, node_index, num_nodes)
|
|
579
|
+
|
|
580
|
+
# The parent task is necessary because we need access to the `_submit_batch` and `submit` methods
|
|
581
|
+
self._parent_task: Optional[RunTask] = None
|
|
582
|
+
# The pool is necessary because we need to batches to be executed within the same thread pool
|
|
583
|
+
self._pool: Optional[DbtThreadPool] = None
|
|
584
|
+
|
|
585
|
+
def set_parent_task(self, parent_task: RunTask) -> None:
|
|
586
|
+
self._parent_task = parent_task
|
|
587
|
+
|
|
588
|
+
def set_pool(self, pool: DbtThreadPool) -> None:
|
|
589
|
+
self._pool = pool
|
|
590
|
+
|
|
591
|
+
@property
|
|
592
|
+
def parent_task(self) -> RunTask:
|
|
593
|
+
if self._parent_task is None:
|
|
594
|
+
raise DbtInternalError(
|
|
595
|
+
msg="Tried to access `parent_task` of `MicrobatchModelRunner` before it was set"
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
return self._parent_task
|
|
599
|
+
|
|
600
|
+
@property
|
|
601
|
+
def pool(self) -> DbtThreadPool:
|
|
602
|
+
if self._pool is None:
|
|
603
|
+
raise DbtInternalError(
|
|
604
|
+
msg="Tried to access `pool` of `MicrobatchModelRunner` before it was set"
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
return self._pool
|
|
608
|
+
|
|
609
|
+
def _has_relation(self, model: ModelNode) -> bool:
|
|
610
|
+
"""Check whether the relation for the model exists in the data warehouse"""
|
|
611
|
+
relation_info = self.adapter.Relation.create_from(self.config, model)
|
|
612
|
+
relation = self.adapter.get_relation(
|
|
613
|
+
relation_info.database, relation_info.schema, relation_info.name
|
|
614
|
+
)
|
|
615
|
+
return relation is not None
|
|
616
|
+
|
|
617
|
+
def _is_incremental(self, model) -> bool:
|
|
618
|
+
"""Check whether the model should be run `incrementally` or as `full refresh`"""
|
|
619
|
+
# TODO: Remove this whole function. This should be a temporary method. We're working with adapters on
|
|
620
|
+
# a strategy to ensure we can access the `is_incremental` logic without drift
|
|
621
|
+
relation_info = self.adapter.Relation.create_from(self.config, model)
|
|
622
|
+
relation = self.adapter.get_relation(
|
|
623
|
+
relation_info.database, relation_info.schema, relation_info.name
|
|
624
|
+
)
|
|
625
|
+
if (
|
|
626
|
+
relation is not None
|
|
627
|
+
and relation.type == "table"
|
|
628
|
+
and model.config.materialized == "incremental"
|
|
629
|
+
):
|
|
630
|
+
if model.config.full_refresh is not None:
|
|
631
|
+
return not model.config.full_refresh
|
|
632
|
+
else:
|
|
633
|
+
return not getattr(self.config.args, "FULL_REFRESH", False)
|
|
634
|
+
else:
|
|
635
|
+
return False
|
|
636
|
+
|
|
637
|
+
def _initial_run_microbatch_model_result(self, model: ModelNode) -> RunResult:
|
|
638
|
+
return RunResult(
|
|
639
|
+
node=model,
|
|
640
|
+
status=RunStatus.Success,
|
|
641
|
+
timing=[],
|
|
642
|
+
thread_id=threading.current_thread().name,
|
|
643
|
+
# The execution_time here doesn't get propagated to logs because
|
|
644
|
+
# `safe_run_hooks` handles the elapsed time at the node level
|
|
645
|
+
execution_time=0,
|
|
646
|
+
message="",
|
|
647
|
+
adapter_response={},
|
|
648
|
+
failures=0,
|
|
649
|
+
batch_results=BatchResults(),
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
def describe_node(self) -> str:
|
|
653
|
+
return f"{self.node.language} microbatch model {self.get_node_representation()}"
|
|
654
|
+
|
|
655
|
+
def merge_batch_results(self, result: RunResult, batch_results: List[RunResult]):
|
|
656
|
+
"""merge batch_results into result"""
|
|
657
|
+
if result.batch_results is None:
|
|
658
|
+
result.batch_results = BatchResults()
|
|
659
|
+
|
|
660
|
+
for batch_result in batch_results:
|
|
661
|
+
if batch_result.batch_results is not None:
|
|
662
|
+
result.batch_results += batch_result.batch_results
|
|
663
|
+
result.execution_time += batch_result.execution_time
|
|
664
|
+
|
|
665
|
+
num_successes = len(result.batch_results.successful)
|
|
666
|
+
num_failures = len(result.batch_results.failed)
|
|
667
|
+
if num_failures == 0:
|
|
668
|
+
status = RunStatus.Success
|
|
669
|
+
msg = "SUCCESS"
|
|
670
|
+
elif num_successes == 0:
|
|
671
|
+
status = RunStatus.Error
|
|
672
|
+
msg = "ERROR"
|
|
673
|
+
else:
|
|
674
|
+
status = RunStatus.PartialSuccess
|
|
675
|
+
msg = f"PARTIAL SUCCESS ({num_successes}/{num_successes + num_failures})"
|
|
676
|
+
result.status = status
|
|
677
|
+
result.message = msg
|
|
678
|
+
|
|
679
|
+
result.batch_results.successful = sorted(result.batch_results.successful)
|
|
680
|
+
result.batch_results.failed = sorted(result.batch_results.failed)
|
|
681
|
+
|
|
682
|
+
# # If retrying, propagate previously successful batches into final result, even thoguh they were not run in this invocation
|
|
683
|
+
if self.node.previous_batch_results is not None:
|
|
684
|
+
result.batch_results.successful += self.node.previous_batch_results.successful
|
|
685
|
+
|
|
686
|
+
def _update_result_with_unfinished_batches(
|
|
687
|
+
self, result: RunResult, batches: Dict[int, BatchType]
|
|
688
|
+
) -> None:
|
|
689
|
+
"""This method is really only to be used when the execution of a microbatch model is halted before all batches have had a chance to run"""
|
|
690
|
+
batches_finished: Set[BatchType] = set()
|
|
691
|
+
|
|
692
|
+
if result.batch_results:
|
|
693
|
+
# build list of finished batches
|
|
694
|
+
batches_finished = batches_finished.union(set(result.batch_results.successful))
|
|
695
|
+
batches_finished = batches_finished.union(set(result.batch_results.failed))
|
|
696
|
+
else:
|
|
697
|
+
# instantiate `batch_results` if it was `None`
|
|
698
|
+
result.batch_results = BatchResults()
|
|
699
|
+
|
|
700
|
+
# skipped batches are any batch that was expected but didn't finish
|
|
701
|
+
batches_expected = {batch for _, batch in batches.items()}
|
|
702
|
+
skipped_batches = batches_expected.difference(batches_finished)
|
|
703
|
+
|
|
704
|
+
result.batch_results.failed.extend(list(skipped_batches))
|
|
705
|
+
|
|
706
|
+
# We call this method, even though we are merging no new results, as it updates
|
|
707
|
+
# the result witht he appropriate status (Success/Partial/Failed)
|
|
708
|
+
self.merge_batch_results(result, [])
|
|
709
|
+
|
|
710
|
+
def get_microbatch_builder(self, model: ModelNode) -> MicrobatchBuilder:
|
|
711
|
+
# Intially set the start/end to values from args
|
|
712
|
+
event_time_start = getattr(self.config.args, "EVENT_TIME_START", None)
|
|
713
|
+
event_time_end = getattr(self.config.args, "EVENT_TIME_END", None)
|
|
714
|
+
|
|
715
|
+
# If we're in sample mode, alter start/end to sample values
|
|
716
|
+
if getattr(self.config.args, "SAMPLE", None) is not None:
|
|
717
|
+
event_time_start = self.config.args.sample.start
|
|
718
|
+
event_time_end = self.config.args.sample.end
|
|
719
|
+
|
|
720
|
+
return MicrobatchBuilder(
|
|
721
|
+
model=model,
|
|
722
|
+
is_incremental=self._is_incremental(model),
|
|
723
|
+
event_time_start=event_time_start,
|
|
724
|
+
event_time_end=event_time_end,
|
|
725
|
+
default_end_time=get_invocation_started_at(),
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
def get_batches(self, model: ModelNode) -> Dict[int, BatchType]:
|
|
729
|
+
"""Get the batches that should be run for the model"""
|
|
730
|
+
|
|
731
|
+
# Note currently (02/23/2025) model.previous_batch_results is only ever _not_ `None`
|
|
732
|
+
# IFF `dbt retry` is being run and the microbatch model had batches which
|
|
733
|
+
# failed on the run of the model (which is being retried)
|
|
734
|
+
if model.previous_batch_results is None:
|
|
735
|
+
microbatch_builder = self.get_microbatch_builder(model)
|
|
736
|
+
end = microbatch_builder.build_end_time()
|
|
737
|
+
start = microbatch_builder.build_start_time(end)
|
|
738
|
+
batches = microbatch_builder.build_batches(start, end)
|
|
739
|
+
else:
|
|
740
|
+
batches = model.previous_batch_results.failed
|
|
741
|
+
|
|
742
|
+
return {batch_idx: batches[batch_idx] for batch_idx in range(len(batches))}
|
|
743
|
+
|
|
744
|
+
def compile(self, manifest: Manifest):
|
|
745
|
+
"""Don't do anything here because this runner doesn't need to compile anything"""
|
|
746
|
+
return self.node
|
|
747
|
+
|
|
748
|
+
def execute(self, model: ModelNode, manifest: Manifest) -> RunResult:
|
|
749
|
+
# Execution really means orchestration in this case
|
|
750
|
+
|
|
751
|
+
batches = self.get_batches(model=model)
|
|
752
|
+
relation_exists = self._has_relation(model=model)
|
|
753
|
+
result = self._initial_run_microbatch_model_result(model=model)
|
|
754
|
+
|
|
755
|
+
# No batches to run, so return initial result
|
|
756
|
+
if len(batches) == 0:
|
|
757
|
+
return result
|
|
758
|
+
|
|
759
|
+
batch_results: List[RunResult] = []
|
|
760
|
+
batch_idx = 0
|
|
761
|
+
|
|
762
|
+
# Run first batch not in parallel
|
|
763
|
+
relation_exists = self.parent_task._submit_batch(
|
|
764
|
+
node=model,
|
|
765
|
+
adapter=self.adapter,
|
|
766
|
+
relation_exists=relation_exists,
|
|
767
|
+
batches=batches,
|
|
768
|
+
batch_idx=batch_idx,
|
|
769
|
+
batch_results=batch_results,
|
|
770
|
+
pool=self.pool,
|
|
771
|
+
force_sequential_run=True,
|
|
772
|
+
incremental_batch=self._is_incremental(model=model),
|
|
773
|
+
)
|
|
774
|
+
batch_idx += 1
|
|
775
|
+
skip_batches = batch_results[0].status != RunStatus.Success
|
|
776
|
+
|
|
777
|
+
# Run all batches except first and last batch, in parallel if possible
|
|
778
|
+
while batch_idx < len(batches) - 1:
|
|
779
|
+
relation_exists = self.parent_task._submit_batch(
|
|
780
|
+
node=model,
|
|
781
|
+
adapter=self.adapter,
|
|
782
|
+
relation_exists=relation_exists,
|
|
783
|
+
batches=batches,
|
|
784
|
+
batch_idx=batch_idx,
|
|
785
|
+
batch_results=batch_results,
|
|
786
|
+
pool=self.pool,
|
|
787
|
+
skip=skip_batches,
|
|
788
|
+
)
|
|
789
|
+
batch_idx += 1
|
|
790
|
+
|
|
791
|
+
# Wait until all submitted batches have completed
|
|
792
|
+
while len(batch_results) != batch_idx:
|
|
793
|
+
# Check if the pool was closed, because if it was, then the main thread is trying to exit.
|
|
794
|
+
# If the main thread is trying to exit, we need to shutdown. If we _don't_ shutdown, then
|
|
795
|
+
# batches will continue to execute and we'll delay the run from stopping
|
|
796
|
+
if self.pool.is_closed():
|
|
797
|
+
# It's technically possible for more results to come in while we clean up
|
|
798
|
+
# instead we're going to say the didn't finish, regardless of if they finished
|
|
799
|
+
# or not. Thus, lets get a copy of the results as they exist right "now".
|
|
800
|
+
frozen_batch_results = deepcopy(batch_results)
|
|
801
|
+
self.merge_batch_results(result, frozen_batch_results)
|
|
802
|
+
self._update_result_with_unfinished_batches(result, batches)
|
|
803
|
+
return result
|
|
804
|
+
|
|
805
|
+
# breifly sleep so that this thread doesn't go brrrrr while waiting
|
|
806
|
+
time.sleep(0.1)
|
|
807
|
+
|
|
808
|
+
# Only run "last" batch if there is more than one batch
|
|
809
|
+
if len(batches) != 1:
|
|
810
|
+
# Final batch runs once all others complete to ensure post_hook runs at the end
|
|
811
|
+
self.parent_task._submit_batch(
|
|
812
|
+
node=model,
|
|
813
|
+
adapter=self.adapter,
|
|
814
|
+
relation_exists=relation_exists,
|
|
815
|
+
batches=batches,
|
|
816
|
+
batch_idx=batch_idx,
|
|
817
|
+
batch_results=batch_results,
|
|
818
|
+
pool=self.pool,
|
|
819
|
+
force_sequential_run=True,
|
|
820
|
+
skip=skip_batches,
|
|
821
|
+
)
|
|
822
|
+
|
|
823
|
+
# Finalize run: merge results, track model run, and print final result line
|
|
824
|
+
self.merge_batch_results(result, batch_results)
|
|
825
|
+
|
|
826
|
+
return result
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
class RunTask(CompileTask):
|
|
830
|
+
def __init__(
|
|
831
|
+
self,
|
|
832
|
+
args: Flags,
|
|
833
|
+
config: RuntimeConfig,
|
|
834
|
+
manifest: Manifest,
|
|
835
|
+
batch_map: Optional[Dict[str, BatchResults]] = None,
|
|
836
|
+
) -> None:
|
|
837
|
+
super().__init__(args, config, manifest)
|
|
838
|
+
self.batch_map = batch_map
|
|
839
|
+
|
|
840
|
+
def raise_on_first_error(self) -> bool:
|
|
841
|
+
return False
|
|
842
|
+
|
|
843
|
+
def get_hook_sql(self, adapter, hook, idx, num_hooks, extra_context) -> str:
|
|
844
|
+
if self.manifest is None:
|
|
845
|
+
raise DbtInternalError("compile_node called before manifest was loaded")
|
|
846
|
+
|
|
847
|
+
compiled = self.compiler.compile_node(hook, self.manifest, extra_context)
|
|
848
|
+
statement = compiled.compiled_code
|
|
849
|
+
hook_index = hook.index or num_hooks
|
|
850
|
+
hook_obj = get_hook(statement, index=hook_index)
|
|
851
|
+
return hook_obj.sql or ""
|
|
852
|
+
|
|
853
|
+
def handle_job_queue(self, pool, callback):
|
|
854
|
+
node = self.job_queue.get()
|
|
855
|
+
self._raise_set_error()
|
|
856
|
+
runner = self.get_runner(node)
|
|
857
|
+
# we finally know what we're running! Make sure we haven't decided
|
|
858
|
+
# to skip it due to upstream failures
|
|
859
|
+
if runner.node.unique_id in self._skipped_children:
|
|
860
|
+
cause = self._skipped_children.pop(runner.node.unique_id)
|
|
861
|
+
runner.do_skip(cause=cause)
|
|
862
|
+
|
|
863
|
+
if isinstance(runner, MicrobatchModelRunner):
|
|
864
|
+
runner.set_parent_task(self)
|
|
865
|
+
runner.set_pool(pool)
|
|
866
|
+
|
|
867
|
+
args = [runner]
|
|
868
|
+
self._submit(pool, args, callback)
|
|
869
|
+
|
|
870
|
+
def _submit_batch(
|
|
871
|
+
self,
|
|
872
|
+
node: ModelNode,
|
|
873
|
+
adapter: BaseAdapter,
|
|
874
|
+
relation_exists: bool,
|
|
875
|
+
batches: Dict[int, BatchType],
|
|
876
|
+
batch_idx: int,
|
|
877
|
+
batch_results: List[RunResult],
|
|
878
|
+
pool: DbtThreadPool,
|
|
879
|
+
force_sequential_run: bool = False,
|
|
880
|
+
skip: bool = False,
|
|
881
|
+
incremental_batch: bool = True,
|
|
882
|
+
):
|
|
883
|
+
node_copy = deepcopy(node)
|
|
884
|
+
# Only run pre_hook(s) for first batch
|
|
885
|
+
if batch_idx != 0:
|
|
886
|
+
node_copy.config.pre_hook = []
|
|
887
|
+
|
|
888
|
+
# Only run post_hook(s) for last batch
|
|
889
|
+
if batch_idx != len(batches) - 1:
|
|
890
|
+
node_copy.config.post_hook = []
|
|
891
|
+
|
|
892
|
+
# TODO: We should be doing self.get_runner, however doing so
|
|
893
|
+
# currently causes the tracking of how many nodes there are to
|
|
894
|
+
# increment when we don't want it to
|
|
895
|
+
batch_runner = MicrobatchBatchRunner(
|
|
896
|
+
self.config,
|
|
897
|
+
adapter,
|
|
898
|
+
node_copy,
|
|
899
|
+
self.run_count,
|
|
900
|
+
self.num_nodes,
|
|
901
|
+
batch_idx,
|
|
902
|
+
batches,
|
|
903
|
+
relation_exists,
|
|
904
|
+
incremental_batch,
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
if skip:
|
|
908
|
+
batch_runner.do_skip()
|
|
909
|
+
|
|
910
|
+
if not pool.is_closed():
|
|
911
|
+
if not force_sequential_run and batch_runner.should_run_in_parallel():
|
|
912
|
+
fire_event(
|
|
913
|
+
MicrobatchExecutionDebug(
|
|
914
|
+
msg=f"{batch_runner.describe_batch()} is being run concurrently"
|
|
915
|
+
)
|
|
916
|
+
)
|
|
917
|
+
self._submit(pool, [batch_runner], batch_results.append)
|
|
918
|
+
else:
|
|
919
|
+
fire_event(
|
|
920
|
+
MicrobatchExecutionDebug(
|
|
921
|
+
msg=f"{batch_runner.describe_batch()} is being run sequentially"
|
|
922
|
+
)
|
|
923
|
+
)
|
|
924
|
+
batch_results.append(self.call_runner(batch_runner))
|
|
925
|
+
relation_exists = batch_runner.relation_exists
|
|
926
|
+
else:
|
|
927
|
+
batch_results.append(
|
|
928
|
+
batch_runner._build_failed_run_batch_result(node_copy, batches[batch_idx])
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
return relation_exists
|
|
932
|
+
|
|
933
|
+
def _hook_keyfunc(self, hook: HookNode) -> Tuple[str, Optional[int]]:
|
|
934
|
+
package_name = hook.package_name
|
|
935
|
+
if package_name == self.config.project_name:
|
|
936
|
+
package_name = BiggestName("")
|
|
937
|
+
return package_name, hook.index
|
|
938
|
+
|
|
939
|
+
def get_hooks_by_type(self, hook_type: RunHookType) -> List[HookNode]:
|
|
940
|
+
|
|
941
|
+
if self.manifest is None:
|
|
942
|
+
raise DbtInternalError("self.manifest was None in get_hooks_by_type")
|
|
943
|
+
|
|
944
|
+
nodes = self.manifest.nodes.values()
|
|
945
|
+
# find all hooks defined in the manifest (could be multiple projects)
|
|
946
|
+
hooks: List[HookNode] = get_hooks_by_tags(nodes, {hook_type})
|
|
947
|
+
hooks.sort(key=self._hook_keyfunc)
|
|
948
|
+
return hooks
|
|
949
|
+
|
|
950
|
+
def safe_run_hooks(
|
|
951
|
+
self, adapter: BaseAdapter, hook_type: RunHookType, extra_context: Dict[str, Any]
|
|
952
|
+
) -> RunStatus:
|
|
953
|
+
ordered_hooks = self.get_hooks_by_type(hook_type)
|
|
954
|
+
|
|
955
|
+
if hook_type == RunHookType.End and ordered_hooks:
|
|
956
|
+
fire_event(Formatting(""))
|
|
957
|
+
|
|
958
|
+
# on-run-* hooks should run outside a transaction. This happens because psycopg2 automatically begins a transaction when a connection is created.
|
|
959
|
+
adapter.clear_transaction()
|
|
960
|
+
if not ordered_hooks:
|
|
961
|
+
return RunStatus.Success
|
|
962
|
+
|
|
963
|
+
status = RunStatus.Success
|
|
964
|
+
failed = False
|
|
965
|
+
num_hooks = len(ordered_hooks)
|
|
966
|
+
|
|
967
|
+
for idx, hook in enumerate(ordered_hooks, 1):
|
|
968
|
+
with log_contextvars(node_info=hook.node_info):
|
|
969
|
+
hook.index = idx
|
|
970
|
+
hook_name = f"{hook.package_name}.{hook_type}.{hook.index - 1}"
|
|
971
|
+
execution_time = 0.0
|
|
972
|
+
timing: List[TimingInfo] = []
|
|
973
|
+
failures = 1
|
|
974
|
+
|
|
975
|
+
if not failed:
|
|
976
|
+
with collect_timing_info("compile", timing.append):
|
|
977
|
+
sql = self.get_hook_sql(
|
|
978
|
+
adapter, hook, hook.index, num_hooks, extra_context
|
|
979
|
+
)
|
|
980
|
+
|
|
981
|
+
started_at = timing[0].started_at or datetime.now(timezone.utc).replace(
|
|
982
|
+
tzinfo=None
|
|
983
|
+
)
|
|
984
|
+
hook.update_event_status(
|
|
985
|
+
started_at=started_at.isoformat(), node_status=RunningStatus.Started
|
|
986
|
+
)
|
|
987
|
+
|
|
988
|
+
fire_event(
|
|
989
|
+
LogHookStartLine(
|
|
990
|
+
statement=hook_name,
|
|
991
|
+
index=hook.index,
|
|
992
|
+
total=num_hooks,
|
|
993
|
+
node_info=hook.node_info,
|
|
994
|
+
)
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
with collect_timing_info("execute", timing.append):
|
|
998
|
+
status, message = get_execution_status(sql, adapter)
|
|
999
|
+
|
|
1000
|
+
finished_at = timing[1].completed_at or datetime.now(timezone.utc).replace(
|
|
1001
|
+
tzinfo=None
|
|
1002
|
+
)
|
|
1003
|
+
hook.update_event_status(finished_at=finished_at.isoformat())
|
|
1004
|
+
execution_time = (finished_at - started_at).total_seconds()
|
|
1005
|
+
failures = 0 if status == RunStatus.Success else 1
|
|
1006
|
+
|
|
1007
|
+
if status == RunStatus.Success:
|
|
1008
|
+
message = f"{hook_name} passed"
|
|
1009
|
+
else:
|
|
1010
|
+
message = f"{hook_name} failed, error:\n {message}"
|
|
1011
|
+
failed = True
|
|
1012
|
+
else:
|
|
1013
|
+
status = RunStatus.Skipped
|
|
1014
|
+
message = f"{hook_name} skipped"
|
|
1015
|
+
|
|
1016
|
+
hook.update_event_status(node_status=status)
|
|
1017
|
+
|
|
1018
|
+
self.node_results.append(
|
|
1019
|
+
RunResult(
|
|
1020
|
+
status=status,
|
|
1021
|
+
thread_id="main",
|
|
1022
|
+
timing=timing,
|
|
1023
|
+
message=message,
|
|
1024
|
+
adapter_response={},
|
|
1025
|
+
execution_time=execution_time,
|
|
1026
|
+
failures=failures,
|
|
1027
|
+
node=hook,
|
|
1028
|
+
)
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
fire_event(
|
|
1032
|
+
LogHookEndLine(
|
|
1033
|
+
statement=hook_name,
|
|
1034
|
+
status=status,
|
|
1035
|
+
index=hook.index,
|
|
1036
|
+
total=num_hooks,
|
|
1037
|
+
execution_time=execution_time,
|
|
1038
|
+
node_info=hook.node_info,
|
|
1039
|
+
)
|
|
1040
|
+
)
|
|
1041
|
+
|
|
1042
|
+
if hook_type == RunHookType.Start and ordered_hooks:
|
|
1043
|
+
fire_event(Formatting(""))
|
|
1044
|
+
|
|
1045
|
+
return status
|
|
1046
|
+
|
|
1047
|
+
def print_results_line(self, results, execution_time) -> None:
|
|
1048
|
+
nodes = [r.node for r in results if hasattr(r, "node")]
|
|
1049
|
+
stat_line = get_counts(nodes)
|
|
1050
|
+
|
|
1051
|
+
execution = ""
|
|
1052
|
+
|
|
1053
|
+
if execution_time is not None:
|
|
1054
|
+
execution = utils.humanize_execution_time(execution_time=execution_time)
|
|
1055
|
+
|
|
1056
|
+
fire_event(Formatting(""))
|
|
1057
|
+
fire_event(
|
|
1058
|
+
FinishedRunningStats(
|
|
1059
|
+
stat_line=stat_line, execution=execution, execution_time=execution_time
|
|
1060
|
+
)
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
def populate_microbatch_batches(self, selected_uids: AbstractSet[str]):
|
|
1064
|
+
if self.batch_map is not None and self.manifest is not None:
|
|
1065
|
+
for uid in selected_uids:
|
|
1066
|
+
if uid in self.batch_map:
|
|
1067
|
+
node = self.manifest.ref_lookup.perform_lookup(uid, self.manifest)
|
|
1068
|
+
if isinstance(node, ModelNode):
|
|
1069
|
+
node.previous_batch_results = self.batch_map[uid]
|
|
1070
|
+
|
|
1071
|
+
def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
|
|
1072
|
+
with adapter.connection_named("master"):
|
|
1073
|
+
self.defer_to_manifest()
|
|
1074
|
+
required_schemas = self.get_model_schemas(adapter, selected_uids)
|
|
1075
|
+
self.create_schemas(adapter, required_schemas)
|
|
1076
|
+
self.populate_adapter_cache(adapter, required_schemas)
|
|
1077
|
+
self.populate_microbatch_batches(selected_uids)
|
|
1078
|
+
group_lookup.init(self.manifest, selected_uids)
|
|
1079
|
+
run_hooks_status = self.safe_run_hooks(adapter, RunHookType.Start, {})
|
|
1080
|
+
return run_hooks_status
|
|
1081
|
+
|
|
1082
|
+
def after_run(self, adapter, results) -> None:
|
|
1083
|
+
# in on-run-end hooks, provide the value 'database_schemas', which is a
|
|
1084
|
+
# list of unique (database, schema) pairs that successfully executed
|
|
1085
|
+
# models were in. For backwards compatibility, include the old
|
|
1086
|
+
# 'schemas', which did not include database information.
|
|
1087
|
+
|
|
1088
|
+
database_schema_set: Set[Tuple[Optional[str], str]] = {
|
|
1089
|
+
(r.node.database, r.node.schema)
|
|
1090
|
+
for r in results
|
|
1091
|
+
if (hasattr(r, "node") and r.node.is_relational)
|
|
1092
|
+
and r.status not in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.Skipped)
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
extras = {
|
|
1096
|
+
"schemas": list({s for _, s in database_schema_set}),
|
|
1097
|
+
"results": [
|
|
1098
|
+
r for r in results if r.thread_id != "main" or r.status == RunStatus.Error
|
|
1099
|
+
], # exclude that didn't fail to preserve backwards compatibility
|
|
1100
|
+
"database_schemas": list(database_schema_set),
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
try:
|
|
1104
|
+
with adapter.connection_named("master"):
|
|
1105
|
+
self.safe_run_hooks(adapter, RunHookType.End, extras)
|
|
1106
|
+
except (KeyboardInterrupt, SystemExit, DbtRuntimeError):
|
|
1107
|
+
run_result = self.get_result(
|
|
1108
|
+
results=self.node_results,
|
|
1109
|
+
elapsed_time=time.time() - self.started_at,
|
|
1110
|
+
generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
if self.args.write_json and hasattr(run_result, "write"):
|
|
1114
|
+
run_result.write(self.result_path())
|
|
1115
|
+
add_artifact_produced(self.result_path())
|
|
1116
|
+
|
|
1117
|
+
print_run_end_messages(self.node_results, keyboard_interrupt=True)
|
|
1118
|
+
|
|
1119
|
+
raise
|
|
1120
|
+
|
|
1121
|
+
def get_node_selector(self) -> ResourceTypeSelector:
|
|
1122
|
+
if self.manifest is None or self.graph is None:
|
|
1123
|
+
raise DbtInternalError("manifest and graph must be set to get perform node selection")
|
|
1124
|
+
return ResourceTypeSelector(
|
|
1125
|
+
graph=self.graph,
|
|
1126
|
+
manifest=self.manifest,
|
|
1127
|
+
previous_state=self.previous_state,
|
|
1128
|
+
resource_types=[NodeType.Model],
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
def get_runner_type(self, node) -> Optional[Type[BaseRunner]]:
|
|
1132
|
+
if self.manifest is None:
|
|
1133
|
+
raise DbtInternalError("manifest must be set prior to calling get_runner_type")
|
|
1134
|
+
|
|
1135
|
+
if (
|
|
1136
|
+
node.config.materialized == "incremental"
|
|
1137
|
+
and node.config.incremental_strategy == "microbatch"
|
|
1138
|
+
and self.manifest.use_microbatch_batches(project_name=self.config.project_name)
|
|
1139
|
+
):
|
|
1140
|
+
return MicrobatchModelRunner
|
|
1141
|
+
else:
|
|
1142
|
+
return ModelRunner
|
|
1143
|
+
|
|
1144
|
+
def task_end_messages(self, results) -> None:
|
|
1145
|
+
if results:
|
|
1146
|
+
print_run_end_messages(results)
|