dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/parser/manifest.py
ADDED
|
@@ -0,0 +1,2204 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import pprint
|
|
4
|
+
import time
|
|
5
|
+
import traceback
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import date, datetime, timezone
|
|
9
|
+
from itertools import chain
|
|
10
|
+
from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Tuple, Type, Union
|
|
11
|
+
|
|
12
|
+
import dvt.deprecations
|
|
13
|
+
import dvt.exceptions
|
|
14
|
+
import dvt.tracking
|
|
15
|
+
import dvt.utils
|
|
16
|
+
import msgpack
|
|
17
|
+
from dvt import plugins
|
|
18
|
+
from dvt.artifacts.resources import (
|
|
19
|
+
CatalogWriteIntegrationConfig,
|
|
20
|
+
FileHash,
|
|
21
|
+
NodeRelation,
|
|
22
|
+
NodeVersion,
|
|
23
|
+
)
|
|
24
|
+
from dvt.artifacts.resources.types import BatchSize
|
|
25
|
+
from dvt.artifacts.schemas.base import Writable
|
|
26
|
+
from dvt.clients.jinja import MacroStack, get_rendered
|
|
27
|
+
from dvt.clients.jinja_static import statically_extract_macro_calls
|
|
28
|
+
from dvt.config import Project, RuntimeConfig
|
|
29
|
+
from dvt.constants import (
|
|
30
|
+
MANIFEST_FILE_NAME,
|
|
31
|
+
PARTIAL_PARSE_FILE_NAME,
|
|
32
|
+
SEMANTIC_MANIFEST_FILE_NAME,
|
|
33
|
+
)
|
|
34
|
+
from dvt.context.configured import generate_macro_context
|
|
35
|
+
from dvt.context.docs import generate_runtime_docs_context
|
|
36
|
+
from dvt.context.macro_resolver import MacroResolver, TestMacroNamespace
|
|
37
|
+
from dvt.context.providers import ParseProvider, generate_runtime_macro_context
|
|
38
|
+
from dvt.context.query_header import generate_query_header_context
|
|
39
|
+
from dvt.contracts.files import ParseFileType, SchemaSourceFile
|
|
40
|
+
from dvt.contracts.graph.manifest import (
|
|
41
|
+
Disabled,
|
|
42
|
+
MacroManifest,
|
|
43
|
+
Manifest,
|
|
44
|
+
ManifestStateCheck,
|
|
45
|
+
ParsingInfo,
|
|
46
|
+
)
|
|
47
|
+
from dvt.contracts.graph.nodes import (
|
|
48
|
+
Exposure,
|
|
49
|
+
GenericTestNode,
|
|
50
|
+
Macro,
|
|
51
|
+
ManifestNode,
|
|
52
|
+
Metric,
|
|
53
|
+
ModelNode,
|
|
54
|
+
ResultNode,
|
|
55
|
+
SavedQuery,
|
|
56
|
+
SeedNode,
|
|
57
|
+
SemanticManifestNode,
|
|
58
|
+
SemanticModel,
|
|
59
|
+
SourceDefinition,
|
|
60
|
+
)
|
|
61
|
+
from dvt.contracts.graph.semantic_manifest import SemanticManifest
|
|
62
|
+
from dvt.events.types import (
|
|
63
|
+
ArtifactWritten,
|
|
64
|
+
DeprecatedModel,
|
|
65
|
+
DeprecatedReference,
|
|
66
|
+
InvalidConcurrentBatchesConfig,
|
|
67
|
+
InvalidDisabledTargetInTestNode,
|
|
68
|
+
MicrobatchModelNoEventTimeInputs,
|
|
69
|
+
NodeNotFoundOrDisabled,
|
|
70
|
+
ParsedFileLoadFailed,
|
|
71
|
+
ParsePerfInfoPath,
|
|
72
|
+
PartialParsingError,
|
|
73
|
+
PartialParsingErrorProcessingFile,
|
|
74
|
+
PartialParsingNotEnabled,
|
|
75
|
+
PartialParsingSkipParsing,
|
|
76
|
+
SpacesInResourceNameDeprecation,
|
|
77
|
+
StateCheckVarsHash,
|
|
78
|
+
UnableToPartialParse,
|
|
79
|
+
UpcomingReferenceDeprecation,
|
|
80
|
+
)
|
|
81
|
+
from dvt.exceptions import (
|
|
82
|
+
AmbiguousAliasError,
|
|
83
|
+
InvalidAccessTypeError,
|
|
84
|
+
TargetNotFoundError,
|
|
85
|
+
scrub_secrets,
|
|
86
|
+
)
|
|
87
|
+
from dvt.flags import get_flags
|
|
88
|
+
from dvt.mp_context import get_mp_context
|
|
89
|
+
from dvt.node_types import AccessType, NodeType
|
|
90
|
+
from dvt.parser.analysis import AnalysisParser
|
|
91
|
+
from dvt.parser.base import Parser
|
|
92
|
+
from dvt.parser.docs import DocumentationParser
|
|
93
|
+
from dvt.parser.fixtures import FixtureParser
|
|
94
|
+
from dvt.parser.functions import FunctionParser
|
|
95
|
+
from dvt.parser.generic_test import GenericTestParser
|
|
96
|
+
from dvt.parser.hooks import HookParser
|
|
97
|
+
from dvt.parser.macros import MacroParser
|
|
98
|
+
from dvt.parser.models import ModelParser
|
|
99
|
+
from dvt.parser.partial import PartialParsing, special_override_macros
|
|
100
|
+
from dvt.parser.read_files import (
|
|
101
|
+
FileDiff,
|
|
102
|
+
ReadFiles,
|
|
103
|
+
ReadFilesFromDiff,
|
|
104
|
+
ReadFilesFromFileSystem,
|
|
105
|
+
load_source_file,
|
|
106
|
+
)
|
|
107
|
+
from dvt.parser.schemas import SchemaParser
|
|
108
|
+
from dvt.parser.search import FileBlock
|
|
109
|
+
from dvt.parser.seeds import SeedParser
|
|
110
|
+
from dvt.parser.singular_test import SingularTestParser
|
|
111
|
+
from dvt.parser.snapshots import SnapshotParser
|
|
112
|
+
from dvt.parser.sources import SourcePatcher
|
|
113
|
+
from dvt.parser.unit_tests import process_models_for_unit_test
|
|
114
|
+
from dvt.utils.artifact_upload import add_artifact_produced
|
|
115
|
+
from dvt.version import __version__
|
|
116
|
+
from jinja2.nodes import Call
|
|
117
|
+
|
|
118
|
+
import dbt_common.utils
|
|
119
|
+
from dbt.adapters.capability import Capability
|
|
120
|
+
from dbt.adapters.factory import (
|
|
121
|
+
get_adapter,
|
|
122
|
+
get_adapter_package_names,
|
|
123
|
+
get_relation_class_by_name,
|
|
124
|
+
register_adapter,
|
|
125
|
+
)
|
|
126
|
+
from dbt_common.clients.jinja import parse
|
|
127
|
+
from dbt_common.clients.system import make_directory, path_exists, read_json, write_file
|
|
128
|
+
from dbt_common.constants import SECRET_ENV_PREFIX
|
|
129
|
+
from dbt_common.dataclass_schema import StrEnum, dbtClassMixin
|
|
130
|
+
from dbt_common.events.base_types import EventLevel
|
|
131
|
+
from dbt_common.events.functions import fire_event, get_invocation_id, warn_or_error
|
|
132
|
+
from dbt_common.events.types import Note
|
|
133
|
+
from dbt_common.exceptions.base import DbtValidationError
|
|
134
|
+
from dbt_common.helper_types import PathSet
|
|
135
|
+
from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
|
|
136
|
+
from dbt_semantic_interfaces.type_enums import MetricType
|
|
137
|
+
|
|
138
|
+
PERF_INFO_FILE_NAME = "perf_info.json"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def extended_mashumaro_encoder(data):
|
|
142
|
+
return msgpack.packb(data, default=extended_msgpack_encoder, use_bin_type=True)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def extended_msgpack_encoder(obj):
|
|
146
|
+
if type(obj) is date:
|
|
147
|
+
date_bytes = msgpack.ExtType(1, obj.isoformat().encode())
|
|
148
|
+
return date_bytes
|
|
149
|
+
elif type(obj) is datetime:
|
|
150
|
+
datetime_bytes = msgpack.ExtType(2, obj.isoformat().encode())
|
|
151
|
+
return datetime_bytes
|
|
152
|
+
|
|
153
|
+
return obj
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def extended_mashumuro_decoder(data):
|
|
157
|
+
return msgpack.unpackb(data, ext_hook=extended_msgpack_decoder, raw=False)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def extended_msgpack_decoder(code, data):
|
|
161
|
+
if code == 1:
|
|
162
|
+
d = date.fromisoformat(data.decode())
|
|
163
|
+
return d
|
|
164
|
+
elif code == 2:
|
|
165
|
+
dt = datetime.fromisoformat(data.decode())
|
|
166
|
+
return dt
|
|
167
|
+
else:
|
|
168
|
+
return msgpack.ExtType(code, data)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def version_to_str(version: Optional[Union[str, int]]) -> str:
|
|
172
|
+
if isinstance(version, int):
|
|
173
|
+
return str(version)
|
|
174
|
+
elif isinstance(version, str):
|
|
175
|
+
return version
|
|
176
|
+
|
|
177
|
+
return ""
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class ReparseReason(StrEnum):
|
|
181
|
+
version_mismatch = "01_version_mismatch"
|
|
182
|
+
file_not_found = "02_file_not_found"
|
|
183
|
+
vars_changed = "03_vars_changed"
|
|
184
|
+
profile_changed = "04_profile_changed"
|
|
185
|
+
deps_changed = "05_deps_changed"
|
|
186
|
+
project_config_changed = "06_project_config_changed"
|
|
187
|
+
load_file_failure = "07_load_file_failure"
|
|
188
|
+
exception = "08_exception"
|
|
189
|
+
proj_env_vars_changed = "09_project_env_vars_changed"
|
|
190
|
+
prof_env_vars_changed = "10_profile_env_vars_changed"
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Part of saved performance info
|
|
194
|
+
@dataclass
|
|
195
|
+
class ParserInfo(dbtClassMixin):
|
|
196
|
+
parser: str
|
|
197
|
+
elapsed: float
|
|
198
|
+
parsed_path_count: int = 0
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# Part of saved performance info
|
|
202
|
+
@dataclass
|
|
203
|
+
class ProjectLoaderInfo(dbtClassMixin):
|
|
204
|
+
project_name: str
|
|
205
|
+
elapsed: float
|
|
206
|
+
parsers: List[ParserInfo] = field(default_factory=list)
|
|
207
|
+
parsed_path_count: int = 0
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# Part of saved performance info
|
|
211
|
+
@dataclass
|
|
212
|
+
class ManifestLoaderInfo(dbtClassMixin, Writable):
|
|
213
|
+
path_count: int = 0
|
|
214
|
+
parsed_path_count: int = 0
|
|
215
|
+
static_analysis_path_count: int = 0
|
|
216
|
+
static_analysis_parsed_path_count: int = 0
|
|
217
|
+
is_partial_parse_enabled: Optional[bool] = None
|
|
218
|
+
is_static_analysis_enabled: Optional[bool] = None
|
|
219
|
+
read_files_elapsed: Optional[float] = None
|
|
220
|
+
load_macros_elapsed: Optional[float] = None
|
|
221
|
+
parse_project_elapsed: Optional[float] = None
|
|
222
|
+
patch_sources_elapsed: Optional[float] = None
|
|
223
|
+
process_manifest_elapsed: Optional[float] = None
|
|
224
|
+
load_all_elapsed: Optional[float] = None
|
|
225
|
+
projects: List[ProjectLoaderInfo] = field(default_factory=list)
|
|
226
|
+
_project_index: Dict[str, ProjectLoaderInfo] = field(default_factory=dict)
|
|
227
|
+
|
|
228
|
+
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
|
|
229
|
+
del dct["_project_index"]
|
|
230
|
+
return dct
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# The ManifestLoader loads the manifest. The standard way to use the
|
|
234
|
+
# ManifestLoader is using the 'get_full_manifest' class method, but
|
|
235
|
+
# many tests use abbreviated processes.
|
|
236
|
+
class ManifestLoader:
|
|
237
|
+
def __init__(
|
|
238
|
+
self,
|
|
239
|
+
root_project: RuntimeConfig,
|
|
240
|
+
all_projects: Mapping[str, RuntimeConfig],
|
|
241
|
+
macro_hook: Optional[Callable[[Manifest], Any]] = None,
|
|
242
|
+
file_diff: Optional[FileDiff] = None,
|
|
243
|
+
) -> None:
|
|
244
|
+
self.root_project: RuntimeConfig = root_project
|
|
245
|
+
self.all_projects: Mapping[str, RuntimeConfig] = all_projects
|
|
246
|
+
self.file_diff = file_diff
|
|
247
|
+
self.manifest: Manifest = Manifest()
|
|
248
|
+
self.new_manifest = self.manifest
|
|
249
|
+
self.manifest.metadata = root_project.get_metadata()
|
|
250
|
+
self.macro_resolver = None # built after macros are loaded
|
|
251
|
+
self.started_at = time.time()
|
|
252
|
+
# This is a MacroQueryStringSetter callable, which is called
|
|
253
|
+
# later after we set the MacroManifest in the adapter. It sets
|
|
254
|
+
# up the query headers.
|
|
255
|
+
self.macro_hook: Callable[[Manifest], Any]
|
|
256
|
+
if macro_hook is None:
|
|
257
|
+
self.macro_hook = lambda m: None
|
|
258
|
+
else:
|
|
259
|
+
self.macro_hook = macro_hook
|
|
260
|
+
|
|
261
|
+
self._perf_info = self.build_perf_info()
|
|
262
|
+
|
|
263
|
+
# State check determines whether the saved_manifest and the current
|
|
264
|
+
# manifest match well enough to do partial parsing
|
|
265
|
+
self.manifest.state_check = self.build_manifest_state_check()
|
|
266
|
+
# We need to know if we're actually partially parsing. It could
|
|
267
|
+
# have been enabled, but not happening because of some issue.
|
|
268
|
+
self.partially_parsing = False
|
|
269
|
+
self.partial_parser: Optional[PartialParsing] = None
|
|
270
|
+
self.skip_parsing = False
|
|
271
|
+
|
|
272
|
+
# This is a saved manifest from a previous run that's used for partial parsing
|
|
273
|
+
self.saved_manifest: Optional[Manifest] = self.read_manifest_for_partial_parse()
|
|
274
|
+
|
|
275
|
+
# This is the method that builds a complete manifest. We sometimes
|
|
276
|
+
# use an abbreviated process in tests.
|
|
277
|
+
@classmethod
|
|
278
|
+
def get_full_manifest(
|
|
279
|
+
cls,
|
|
280
|
+
config: RuntimeConfig,
|
|
281
|
+
*,
|
|
282
|
+
file_diff: Optional[FileDiff] = None,
|
|
283
|
+
reset: bool = False,
|
|
284
|
+
write_perf_info=False,
|
|
285
|
+
) -> Manifest:
|
|
286
|
+
adapter = get_adapter(config) # type: ignore
|
|
287
|
+
# reset is set in a TaskManager load_manifest call, since
|
|
288
|
+
# the config and adapter may be persistent.
|
|
289
|
+
if reset:
|
|
290
|
+
config.clear_dependencies()
|
|
291
|
+
adapter.clear_macro_resolver()
|
|
292
|
+
macro_hook = adapter.connections.set_query_header
|
|
293
|
+
|
|
294
|
+
flags = get_flags()
|
|
295
|
+
if not flags.PARTIAL_PARSE_FILE_DIFF:
|
|
296
|
+
file_diff = FileDiff.from_dict(
|
|
297
|
+
{
|
|
298
|
+
"deleted": [],
|
|
299
|
+
"changed": [],
|
|
300
|
+
"added": [],
|
|
301
|
+
}
|
|
302
|
+
)
|
|
303
|
+
# Hack to test file_diffs
|
|
304
|
+
elif os.environ.get("DBT_PP_FILE_DIFF_TEST"):
|
|
305
|
+
file_diff_path = "file_diff.json"
|
|
306
|
+
if path_exists(file_diff_path):
|
|
307
|
+
file_diff_dct = read_json(file_diff_path)
|
|
308
|
+
file_diff = FileDiff.from_dict(file_diff_dct)
|
|
309
|
+
|
|
310
|
+
# Start performance counting
|
|
311
|
+
start_load_all = time.perf_counter()
|
|
312
|
+
|
|
313
|
+
projects = config.load_dependencies()
|
|
314
|
+
loader = cls(
|
|
315
|
+
config,
|
|
316
|
+
projects,
|
|
317
|
+
macro_hook=macro_hook,
|
|
318
|
+
file_diff=file_diff,
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
manifest = loader.load()
|
|
322
|
+
|
|
323
|
+
_check_manifest(manifest, config)
|
|
324
|
+
manifest.build_flat_graph()
|
|
325
|
+
|
|
326
|
+
# This needs to happen after loading from a partial parse,
|
|
327
|
+
# so that the adapter has the query headers from the macro_hook.
|
|
328
|
+
loader.save_macros_to_adapter(adapter)
|
|
329
|
+
|
|
330
|
+
# Save performance info
|
|
331
|
+
loader._perf_info.load_all_elapsed = time.perf_counter() - start_load_all
|
|
332
|
+
loader.track_project_load()
|
|
333
|
+
|
|
334
|
+
if write_perf_info:
|
|
335
|
+
loader.write_perf_info(config.project_target_path)
|
|
336
|
+
|
|
337
|
+
return manifest
|
|
338
|
+
|
|
339
|
+
# This is where the main action happens
|
|
340
|
+
def load(self) -> Manifest:
|
|
341
|
+
start_read_files = time.perf_counter()
|
|
342
|
+
|
|
343
|
+
# This updates the "files" dictionary in self.manifest, and creates
|
|
344
|
+
# the partial_parser_files dictionary (see read_files.py),
|
|
345
|
+
# which is a dictionary of projects to a dictionary
|
|
346
|
+
# of parsers to lists of file strings. The file strings are
|
|
347
|
+
# used to get the SourceFiles from the manifest files.
|
|
348
|
+
saved_files = self.saved_manifest.files if self.saved_manifest else {}
|
|
349
|
+
file_reader: Optional[ReadFiles] = None
|
|
350
|
+
if self.file_diff:
|
|
351
|
+
# We're getting files from a file diff
|
|
352
|
+
file_reader = ReadFilesFromDiff(
|
|
353
|
+
all_projects=self.all_projects,
|
|
354
|
+
files=self.manifest.files,
|
|
355
|
+
saved_files=saved_files,
|
|
356
|
+
root_project_name=self.root_project.project_name,
|
|
357
|
+
file_diff=self.file_diff,
|
|
358
|
+
)
|
|
359
|
+
else:
|
|
360
|
+
# We're getting files from the file system
|
|
361
|
+
file_reader = ReadFilesFromFileSystem(
|
|
362
|
+
all_projects=self.all_projects,
|
|
363
|
+
files=self.manifest.files,
|
|
364
|
+
saved_files=saved_files,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
# Set the files in the manifest and save the project_parser_files
|
|
368
|
+
file_reader.read_files()
|
|
369
|
+
self.manifest.files = file_reader.files
|
|
370
|
+
project_parser_files = orig_project_parser_files = file_reader.project_parser_files
|
|
371
|
+
self._perf_info.path_count = len(self.manifest.files)
|
|
372
|
+
self._perf_info.read_files_elapsed = time.perf_counter() - start_read_files
|
|
373
|
+
|
|
374
|
+
self.skip_parsing = False
|
|
375
|
+
project_parser_files = self.safe_update_project_parser_files_partially(
|
|
376
|
+
project_parser_files
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
if self.manifest._parsing_info is None:
|
|
380
|
+
self.manifest._parsing_info = ParsingInfo()
|
|
381
|
+
|
|
382
|
+
if self.skip_parsing:
|
|
383
|
+
fire_event(PartialParsingSkipParsing())
|
|
384
|
+
else:
|
|
385
|
+
# Load Macros and tests
|
|
386
|
+
# We need to parse the macros first, so they're resolvable when
|
|
387
|
+
# the other files are loaded. Also need to parse tests, specifically
|
|
388
|
+
# generic tests
|
|
389
|
+
start_load_macros = time.perf_counter()
|
|
390
|
+
self.load_and_parse_macros(project_parser_files)
|
|
391
|
+
|
|
392
|
+
# If we're partially parsing check that certain macros have not been changed
|
|
393
|
+
if self.partially_parsing and self.skip_partial_parsing_because_of_macros():
|
|
394
|
+
fire_event(
|
|
395
|
+
UnableToPartialParse(
|
|
396
|
+
reason="change detected to override macro. Starting full parse."
|
|
397
|
+
)
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# Get new Manifest with original file records and move over the macros
|
|
401
|
+
self.manifest = self.new_manifest # contains newly read files
|
|
402
|
+
project_parser_files = orig_project_parser_files
|
|
403
|
+
self.partially_parsing = False
|
|
404
|
+
self.load_and_parse_macros(project_parser_files)
|
|
405
|
+
|
|
406
|
+
self._perf_info.load_macros_elapsed = time.perf_counter() - start_load_macros
|
|
407
|
+
|
|
408
|
+
# Now that the macros are parsed, parse the rest of the files.
|
|
409
|
+
# This is currently done on a per project basis.
|
|
410
|
+
start_parse_projects = time.perf_counter()
|
|
411
|
+
|
|
412
|
+
# Load the rest of the files except for schema yaml files
|
|
413
|
+
parser_types: List[Type[Parser]] = [
|
|
414
|
+
ModelParser,
|
|
415
|
+
SnapshotParser,
|
|
416
|
+
AnalysisParser,
|
|
417
|
+
SingularTestParser,
|
|
418
|
+
SeedParser,
|
|
419
|
+
DocumentationParser,
|
|
420
|
+
HookParser,
|
|
421
|
+
FixtureParser,
|
|
422
|
+
FunctionParser,
|
|
423
|
+
]
|
|
424
|
+
for project in self.all_projects.values():
|
|
425
|
+
if project.project_name not in project_parser_files:
|
|
426
|
+
continue
|
|
427
|
+
self.parse_project(
|
|
428
|
+
project, project_parser_files[project.project_name], parser_types
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
# Now that we've loaded most of the nodes (except for schema tests, sources, metrics)
|
|
432
|
+
# load up the Lookup objects to resolve them by name, so the SourceFiles store
|
|
433
|
+
# the unique_id instead of the name. Sources are loaded from yaml files, so
|
|
434
|
+
# aren't in place yet
|
|
435
|
+
self.manifest.rebuild_ref_lookup()
|
|
436
|
+
self.manifest.rebuild_doc_lookup()
|
|
437
|
+
self.manifest.rebuild_disabled_lookup()
|
|
438
|
+
|
|
439
|
+
# Load yaml files
|
|
440
|
+
parser_types = [SchemaParser] # type: ignore
|
|
441
|
+
for project in self.all_projects.values():
|
|
442
|
+
if project.project_name not in project_parser_files:
|
|
443
|
+
continue
|
|
444
|
+
self.parse_project(
|
|
445
|
+
project, project_parser_files[project.project_name], parser_types
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
self.cleanup_disabled()
|
|
449
|
+
|
|
450
|
+
self._perf_info.parse_project_elapsed = time.perf_counter() - start_parse_projects
|
|
451
|
+
|
|
452
|
+
# patch_sources converts the UnparsedSourceDefinitions in the
|
|
453
|
+
# Manifest.sources to SourceDefinition via 'patch_source'
|
|
454
|
+
# in SourcePatcher
|
|
455
|
+
start_patch = time.perf_counter()
|
|
456
|
+
patcher = SourcePatcher(self.root_project, self.manifest)
|
|
457
|
+
patcher.construct_sources()
|
|
458
|
+
self.manifest.sources = patcher.sources
|
|
459
|
+
self._perf_info.patch_sources_elapsed = time.perf_counter() - start_patch
|
|
460
|
+
|
|
461
|
+
# We need to rebuild disabled in order to include disabled sources
|
|
462
|
+
self.manifest.rebuild_disabled_lookup()
|
|
463
|
+
|
|
464
|
+
# copy the selectors from the root_project to the manifest
|
|
465
|
+
self.manifest.selectors = self.root_project.manifest_selectors
|
|
466
|
+
|
|
467
|
+
# inject any available external nodes
|
|
468
|
+
self.manifest.build_parent_and_child_maps()
|
|
469
|
+
external_nodes_modified = self.inject_external_nodes()
|
|
470
|
+
if external_nodes_modified:
|
|
471
|
+
self.manifest.rebuild_ref_lookup()
|
|
472
|
+
|
|
473
|
+
# update the refs, sources, docs and metrics depends_on.nodes
|
|
474
|
+
# These check the created_at time on the nodes to
|
|
475
|
+
# determine whether they need processing.
|
|
476
|
+
start_process = time.perf_counter()
|
|
477
|
+
self.process_sources(self.root_project.project_name)
|
|
478
|
+
self.process_refs(self.root_project.project_name, self.root_project.dependencies)
|
|
479
|
+
self.process_unit_tests(self.root_project.project_name)
|
|
480
|
+
self.process_docs(self.root_project)
|
|
481
|
+
self.process_metrics(self.root_project)
|
|
482
|
+
self.process_saved_queries(self.root_project)
|
|
483
|
+
self.process_model_inferred_primary_keys()
|
|
484
|
+
self.process_functions(self.root_project.project_name)
|
|
485
|
+
self.check_valid_group_config()
|
|
486
|
+
self.check_valid_access_property()
|
|
487
|
+
self.check_valid_snapshot_config()
|
|
488
|
+
self.check_valid_microbatch_config()
|
|
489
|
+
|
|
490
|
+
semantic_manifest = SemanticManifest(self.manifest)
|
|
491
|
+
if not semantic_manifest.validate():
|
|
492
|
+
raise dbt.exceptions.ParsingError("Semantic Manifest validation failed.")
|
|
493
|
+
|
|
494
|
+
# update tracking data
|
|
495
|
+
self._perf_info.process_manifest_elapsed = time.perf_counter() - start_process
|
|
496
|
+
self._perf_info.static_analysis_parsed_path_count = (
|
|
497
|
+
self.manifest._parsing_info.static_analysis_parsed_path_count
|
|
498
|
+
)
|
|
499
|
+
self._perf_info.static_analysis_path_count = (
|
|
500
|
+
self.manifest._parsing_info.static_analysis_path_count
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
# Inject any available external nodes, reprocess refs if changes to the manifest were made.
|
|
504
|
+
external_nodes_modified = False
|
|
505
|
+
if self.skip_parsing:
|
|
506
|
+
# If we didn't skip parsing, this will have already run because it must run
|
|
507
|
+
# before process_refs. If we did skip parsing, then it's possible that only
|
|
508
|
+
# external nodes have changed and we need to run this to capture that.
|
|
509
|
+
self.manifest.build_parent_and_child_maps()
|
|
510
|
+
external_nodes_modified = self.inject_external_nodes()
|
|
511
|
+
if external_nodes_modified:
|
|
512
|
+
self.manifest.rebuild_ref_lookup()
|
|
513
|
+
self.process_refs(
|
|
514
|
+
self.root_project.project_name,
|
|
515
|
+
self.root_project.dependencies,
|
|
516
|
+
)
|
|
517
|
+
# parent and child maps will be rebuilt by write_manifest
|
|
518
|
+
|
|
519
|
+
if not self.skip_parsing or external_nodes_modified:
|
|
520
|
+
# write out the fully parsed manifest
|
|
521
|
+
self.write_manifest_for_partial_parse()
|
|
522
|
+
|
|
523
|
+
self.check_for_model_deprecations()
|
|
524
|
+
self.check_for_spaces_in_resource_names()
|
|
525
|
+
self.check_for_microbatch_deprecations()
|
|
526
|
+
self.check_forcing_batch_concurrency()
|
|
527
|
+
self.check_microbatch_model_has_a_filtered_input()
|
|
528
|
+
|
|
529
|
+
return self.manifest
|
|
530
|
+
|
|
531
|
+
def safe_update_project_parser_files_partially(self, project_parser_files: Dict) -> Dict:
|
|
532
|
+
if self.saved_manifest is None:
|
|
533
|
+
return project_parser_files
|
|
534
|
+
|
|
535
|
+
self.partial_parser = PartialParsing(self.saved_manifest, self.manifest.files) # type: ignore[arg-type]
|
|
536
|
+
self.skip_parsing = self.partial_parser.skip_parsing()
|
|
537
|
+
if self.skip_parsing:
|
|
538
|
+
# nothing changed, so we don't need to generate project_parser_files
|
|
539
|
+
self.manifest = self.saved_manifest # type: ignore[assignment]
|
|
540
|
+
else:
|
|
541
|
+
# create child_map and parent_map
|
|
542
|
+
self.saved_manifest.build_parent_and_child_maps() # type: ignore[union-attr]
|
|
543
|
+
# create group_map
|
|
544
|
+
self.saved_manifest.build_group_map() # type: ignore[union-attr]
|
|
545
|
+
# files are different, we need to create a new set of
|
|
546
|
+
# project_parser_files.
|
|
547
|
+
try:
|
|
548
|
+
project_parser_files = self.partial_parser.get_parsing_files()
|
|
549
|
+
self.partially_parsing = True
|
|
550
|
+
self.manifest = self.saved_manifest # type: ignore[assignment]
|
|
551
|
+
except Exception as exc:
|
|
552
|
+
# pp_files should still be the full set and manifest is new manifest,
|
|
553
|
+
# since get_parsing_files failed
|
|
554
|
+
fire_event(
|
|
555
|
+
UnableToPartialParse(reason="an error occurred. Switching to full reparse.")
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
# Get traceback info
|
|
559
|
+
tb_info = traceback.format_exc()
|
|
560
|
+
# index last stack frame in traceback (i.e. lastest exception and its context)
|
|
561
|
+
tb_last_frame = traceback.extract_tb(exc.__traceback__)[-1]
|
|
562
|
+
exc_info = {
|
|
563
|
+
"traceback": tb_info,
|
|
564
|
+
"exception": tb_info.splitlines()[-1],
|
|
565
|
+
"code": tb_last_frame.line, # if the source is not available, it is None
|
|
566
|
+
"location": f"line {tb_last_frame.lineno} in {tb_last_frame.name}",
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
# get file info for local logs
|
|
570
|
+
parse_file_type: str = ""
|
|
571
|
+
file_id = self.partial_parser.processing_file
|
|
572
|
+
if file_id:
|
|
573
|
+
source_file = None
|
|
574
|
+
if file_id in self.saved_manifest.files:
|
|
575
|
+
source_file = self.saved_manifest.files[file_id]
|
|
576
|
+
elif file_id in self.manifest.files:
|
|
577
|
+
source_file = self.manifest.files[file_id]
|
|
578
|
+
if source_file:
|
|
579
|
+
parse_file_type = source_file.parse_file_type
|
|
580
|
+
fire_event(PartialParsingErrorProcessingFile(file=file_id))
|
|
581
|
+
exc_info["parse_file_type"] = parse_file_type
|
|
582
|
+
fire_event(PartialParsingError(exc_info=exc_info))
|
|
583
|
+
# Send event
|
|
584
|
+
if dbt.tracking.active_user is not None:
|
|
585
|
+
exc_info["full_reparse_reason"] = ReparseReason.exception
|
|
586
|
+
dbt.tracking.track_partial_parser(exc_info)
|
|
587
|
+
|
|
588
|
+
if os.environ.get("DBT_PP_TEST"):
|
|
589
|
+
raise exc
|
|
590
|
+
|
|
591
|
+
return project_parser_files
|
|
592
|
+
|
|
593
|
+
def check_for_model_deprecations(self):
|
|
594
|
+
# build parent and child_maps
|
|
595
|
+
self.manifest.build_parent_and_child_maps()
|
|
596
|
+
for node in self.manifest.nodes.values():
|
|
597
|
+
if isinstance(node, ModelNode) and node.deprecation_date:
|
|
598
|
+
if node.is_past_deprecation_date:
|
|
599
|
+
warn_or_error(
|
|
600
|
+
DeprecatedModel(
|
|
601
|
+
model_name=node.name,
|
|
602
|
+
model_version=version_to_str(node.version),
|
|
603
|
+
deprecation_date=node.deprecation_date.isoformat(),
|
|
604
|
+
)
|
|
605
|
+
)
|
|
606
|
+
# At this point _process_refs should already have been called, and
|
|
607
|
+
# we just rebuilt the parent and child maps.
|
|
608
|
+
# Get the child_nodes and check for deprecations.
|
|
609
|
+
child_nodes = self.manifest.child_map[node.unique_id]
|
|
610
|
+
for child_unique_id in child_nodes:
|
|
611
|
+
child_node = self.manifest.nodes.get(child_unique_id)
|
|
612
|
+
if not isinstance(child_node, ModelNode):
|
|
613
|
+
continue
|
|
614
|
+
if node.is_past_deprecation_date:
|
|
615
|
+
event_cls = DeprecatedReference
|
|
616
|
+
else:
|
|
617
|
+
event_cls = UpcomingReferenceDeprecation
|
|
618
|
+
|
|
619
|
+
warn_or_error(
|
|
620
|
+
event_cls(
|
|
621
|
+
model_name=child_node.name,
|
|
622
|
+
ref_model_package=node.package_name,
|
|
623
|
+
ref_model_name=node.name,
|
|
624
|
+
ref_model_version=version_to_str(node.version),
|
|
625
|
+
ref_model_latest_version=str(node.latest_version),
|
|
626
|
+
ref_model_deprecation_date=node.deprecation_date.isoformat(),
|
|
627
|
+
)
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
def check_for_spaces_in_resource_names(self):
|
|
631
|
+
"""Validates that resource names do not contain spaces
|
|
632
|
+
|
|
633
|
+
If `DEBUG` flag is `False`, logs only first bad model name
|
|
634
|
+
If `DEBUG` flag is `True`, logs every bad model name
|
|
635
|
+
If `REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES` is `True`, logs are `ERROR` level and an exception is raised if any names are bad
|
|
636
|
+
If `REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES` is `False`, logs are `WARN` level
|
|
637
|
+
"""
|
|
638
|
+
improper_resource_names = 0
|
|
639
|
+
level = (
|
|
640
|
+
EventLevel.ERROR
|
|
641
|
+
if self.root_project.args.REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES
|
|
642
|
+
else EventLevel.WARN
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
flags = get_flags()
|
|
646
|
+
|
|
647
|
+
for node in self.manifest.nodes.values():
|
|
648
|
+
if " " in node.name:
|
|
649
|
+
if improper_resource_names == 0 or flags.DEBUG:
|
|
650
|
+
fire_event(
|
|
651
|
+
SpacesInResourceNameDeprecation(
|
|
652
|
+
unique_id=node.unique_id,
|
|
653
|
+
level=level.value,
|
|
654
|
+
),
|
|
655
|
+
level=level,
|
|
656
|
+
)
|
|
657
|
+
improper_resource_names += 1
|
|
658
|
+
|
|
659
|
+
if improper_resource_names > 0:
|
|
660
|
+
if level == EventLevel.WARN:
|
|
661
|
+
dbt.deprecations.warn(
|
|
662
|
+
"resource-names-with-spaces",
|
|
663
|
+
count_invalid_names=improper_resource_names,
|
|
664
|
+
show_debug_hint=(not flags.DEBUG),
|
|
665
|
+
)
|
|
666
|
+
else: # ERROR level
|
|
667
|
+
raise DbtValidationError("Resource names cannot contain spaces")
|
|
668
|
+
|
|
669
|
+
def check_for_microbatch_deprecations(self) -> None:
|
|
670
|
+
if not get_flags().require_batched_execution_for_custom_microbatch_strategy:
|
|
671
|
+
has_microbatch_model = False
|
|
672
|
+
for _, node in self.manifest.nodes.items():
|
|
673
|
+
if (
|
|
674
|
+
isinstance(node, ModelNode)
|
|
675
|
+
and node.config.materialized == "incremental"
|
|
676
|
+
and node.config.incremental_strategy == "microbatch"
|
|
677
|
+
):
|
|
678
|
+
has_microbatch_model = True
|
|
679
|
+
break
|
|
680
|
+
|
|
681
|
+
if has_microbatch_model and not self.manifest._microbatch_macro_is_core(
|
|
682
|
+
self.root_project.project_name
|
|
683
|
+
):
|
|
684
|
+
dbt.deprecations.warn("microbatch-macro-outside-of-batches-deprecation")
|
|
685
|
+
|
|
686
|
+
def load_and_parse_macros(self, project_parser_files):
|
|
687
|
+
for project in self.all_projects.values():
|
|
688
|
+
if project.project_name not in project_parser_files:
|
|
689
|
+
continue
|
|
690
|
+
parser_files = project_parser_files[project.project_name]
|
|
691
|
+
if "MacroParser" in parser_files:
|
|
692
|
+
parser = MacroParser(project, self.manifest)
|
|
693
|
+
for file_id in parser_files["MacroParser"]:
|
|
694
|
+
block = FileBlock(self.manifest.files[file_id])
|
|
695
|
+
parser.parse_file(block)
|
|
696
|
+
# increment parsed path count for performance tracking
|
|
697
|
+
self._perf_info.parsed_path_count += 1
|
|
698
|
+
# generic tests hisotrically lived in the macros directoy but can now be nested
|
|
699
|
+
# in a /generic directory under /tests so we want to process them here as well
|
|
700
|
+
if "GenericTestParser" in parser_files:
|
|
701
|
+
parser = GenericTestParser(project, self.manifest)
|
|
702
|
+
for file_id in parser_files["GenericTestParser"]:
|
|
703
|
+
block = FileBlock(self.manifest.files[file_id])
|
|
704
|
+
parser.parse_file(block)
|
|
705
|
+
# increment parsed path count for performance tracking
|
|
706
|
+
self._perf_info.parsed_path_count += 1
|
|
707
|
+
|
|
708
|
+
self.build_macro_resolver()
|
|
709
|
+
# Look at changed macros and update the macro.depends_on.macros
|
|
710
|
+
self.macro_depends_on()
|
|
711
|
+
|
|
712
|
+
# Parse the files in the 'parser_files' dictionary, for parsers listed in
|
|
713
|
+
# 'parser_types'
|
|
714
|
+
def parse_project(
|
|
715
|
+
self,
|
|
716
|
+
project: RuntimeConfig,
|
|
717
|
+
parser_files,
|
|
718
|
+
parser_types: List[Type[Parser]],
|
|
719
|
+
) -> None:
|
|
720
|
+
|
|
721
|
+
project_loader_info = self._perf_info._project_index[project.project_name]
|
|
722
|
+
start_timer = time.perf_counter()
|
|
723
|
+
total_parsed_path_count = 0
|
|
724
|
+
|
|
725
|
+
# Loop through parsers with loaded files.
|
|
726
|
+
for parser_cls in parser_types:
|
|
727
|
+
parser_name = parser_cls.__name__
|
|
728
|
+
# No point in creating a parser if we don't have files for it
|
|
729
|
+
if parser_name not in parser_files or not parser_files[parser_name]:
|
|
730
|
+
continue
|
|
731
|
+
|
|
732
|
+
# Initialize timing info
|
|
733
|
+
project_parsed_path_count = 0
|
|
734
|
+
parser_start_timer = time.perf_counter()
|
|
735
|
+
|
|
736
|
+
# Parse the project files for this parser
|
|
737
|
+
parser: Parser = parser_cls(project, self.manifest, self.root_project)
|
|
738
|
+
for file_id in parser_files[parser_name]:
|
|
739
|
+
block = FileBlock(self.manifest.files[file_id])
|
|
740
|
+
if isinstance(parser, SchemaParser):
|
|
741
|
+
assert isinstance(block.file, SchemaSourceFile)
|
|
742
|
+
if self.partially_parsing:
|
|
743
|
+
dct = block.file.pp_dict
|
|
744
|
+
else:
|
|
745
|
+
dct = block.file.dict_from_yaml
|
|
746
|
+
# this is where the schema file gets parsed
|
|
747
|
+
parser.parse_file(block, dct=dct)
|
|
748
|
+
# Came out of here with UnpatchedSourceDefinition containing configs at the source level
|
|
749
|
+
# and not configs at the table level (as expected)
|
|
750
|
+
else:
|
|
751
|
+
parser.parse_file(block)
|
|
752
|
+
project_parsed_path_count += 1
|
|
753
|
+
|
|
754
|
+
# Save timing info
|
|
755
|
+
project_loader_info.parsers.append(
|
|
756
|
+
ParserInfo(
|
|
757
|
+
parser=parser.resource_type,
|
|
758
|
+
parsed_path_count=project_parsed_path_count,
|
|
759
|
+
elapsed=time.perf_counter() - parser_start_timer,
|
|
760
|
+
)
|
|
761
|
+
)
|
|
762
|
+
total_parsed_path_count += project_parsed_path_count
|
|
763
|
+
|
|
764
|
+
# HookParser doesn't run from loaded files, just dbt_project.yml,
|
|
765
|
+
# so do separately
|
|
766
|
+
# This shouldn't need to be parsed again if we're starting from
|
|
767
|
+
# a saved manifest, because that won't be allowed if dbt_project.yml
|
|
768
|
+
# changed, but leave for now.
|
|
769
|
+
if not self.partially_parsing and HookParser in parser_types:
|
|
770
|
+
hook_parser = HookParser(project, self.manifest, self.root_project)
|
|
771
|
+
path = hook_parser.get_path()
|
|
772
|
+
file = load_source_file(path, ParseFileType.Hook, project.project_name, {})
|
|
773
|
+
if file:
|
|
774
|
+
file_block = FileBlock(file)
|
|
775
|
+
hook_parser.parse_file(file_block)
|
|
776
|
+
|
|
777
|
+
# Store the performance info
|
|
778
|
+
elapsed = time.perf_counter() - start_timer
|
|
779
|
+
project_loader_info.parsed_path_count = (
|
|
780
|
+
project_loader_info.parsed_path_count + total_parsed_path_count
|
|
781
|
+
)
|
|
782
|
+
project_loader_info.elapsed += elapsed
|
|
783
|
+
self._perf_info.parsed_path_count = (
|
|
784
|
+
self._perf_info.parsed_path_count + total_parsed_path_count
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
# This should only be called after the macros have been loaded
|
|
788
|
+
def build_macro_resolver(self):
|
|
789
|
+
internal_package_names = get_adapter_package_names(self.root_project.credentials.type)
|
|
790
|
+
self.macro_resolver = MacroResolver(
|
|
791
|
+
self.manifest.macros, self.root_project.project_name, internal_package_names
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
# Loop through macros in the manifest and statically parse
|
|
795
|
+
# the 'macro_sql' to find depends_on.macros
|
|
796
|
+
def macro_depends_on(self):
|
|
797
|
+
macro_ctx = generate_macro_context(self.root_project)
|
|
798
|
+
macro_namespace = TestMacroNamespace(self.macro_resolver, {}, None, MacroStack(), [])
|
|
799
|
+
adapter = get_adapter(self.root_project)
|
|
800
|
+
db_wrapper = ParseProvider().DatabaseWrapper(adapter, macro_namespace)
|
|
801
|
+
for macro in self.manifest.macros.values():
|
|
802
|
+
if macro.created_at < self.started_at:
|
|
803
|
+
continue
|
|
804
|
+
possible_macro_calls = statically_extract_macro_calls(
|
|
805
|
+
macro.macro_sql, macro_ctx, db_wrapper
|
|
806
|
+
)
|
|
807
|
+
for macro_name in possible_macro_calls:
|
|
808
|
+
# adapter.dispatch calls can generate a call with the same name as the macro
|
|
809
|
+
# it ought to be an adapter prefix (postgres_) or default_
|
|
810
|
+
if macro_name == macro.name:
|
|
811
|
+
continue
|
|
812
|
+
package_name = macro.package_name
|
|
813
|
+
if "." in macro_name:
|
|
814
|
+
package_name, macro_name = macro_name.split(".")
|
|
815
|
+
dep_macro_id = self.macro_resolver.get_macro_id(package_name, macro_name)
|
|
816
|
+
if dep_macro_id:
|
|
817
|
+
macro.depends_on.add_macro(dep_macro_id) # will check for dupes
|
|
818
|
+
|
|
819
|
+
def write_manifest_for_partial_parse(self):
|
|
820
|
+
path = os.path.join(self.root_project.project_target_path, PARTIAL_PARSE_FILE_NAME)
|
|
821
|
+
try:
|
|
822
|
+
# This shouldn't be necessary, but we have gotten bug reports (#3757) of the
|
|
823
|
+
# saved manifest not matching the code version.
|
|
824
|
+
if self.manifest.metadata.dbt_version != __version__:
|
|
825
|
+
fire_event(
|
|
826
|
+
UnableToPartialParse(reason="saved manifest contained the wrong version")
|
|
827
|
+
)
|
|
828
|
+
self.manifest.metadata.dbt_version = __version__
|
|
829
|
+
manifest_msgpack = self.manifest.to_msgpack(extended_mashumaro_encoder)
|
|
830
|
+
make_directory(os.path.dirname(path))
|
|
831
|
+
with open(path, "wb") as fp:
|
|
832
|
+
fp.write(manifest_msgpack)
|
|
833
|
+
except Exception:
|
|
834
|
+
raise
|
|
835
|
+
|
|
836
|
+
def inject_external_nodes(self) -> bool:
|
|
837
|
+
# Remove previously existing external nodes since we are regenerating them
|
|
838
|
+
manifest_nodes_modified = False
|
|
839
|
+
# Remove all dependent nodes before removing referencing nodes
|
|
840
|
+
for unique_id in self.manifest.external_node_unique_ids:
|
|
841
|
+
remove_dependent_project_references(self.manifest, unique_id)
|
|
842
|
+
manifest_nodes_modified = True
|
|
843
|
+
for unique_id in self.manifest.external_node_unique_ids:
|
|
844
|
+
# remove external nodes from manifest only after dependent project references safely removed
|
|
845
|
+
self.manifest.nodes.pop(unique_id)
|
|
846
|
+
|
|
847
|
+
# Inject any newly-available external nodes
|
|
848
|
+
pm = plugins.get_plugin_manager(self.root_project.project_name)
|
|
849
|
+
plugin_model_nodes = pm.get_nodes().models
|
|
850
|
+
for node_arg in plugin_model_nodes.values():
|
|
851
|
+
node = ModelNode.from_args(node_arg)
|
|
852
|
+
# node may already exist from package or running project (even if it is disabled),
|
|
853
|
+
# in which case we should avoid clobbering it with an external node
|
|
854
|
+
if (
|
|
855
|
+
node.unique_id not in self.manifest.nodes
|
|
856
|
+
and node.unique_id not in self.manifest.disabled
|
|
857
|
+
):
|
|
858
|
+
self.manifest.add_node_nofile(node)
|
|
859
|
+
manifest_nodes_modified = True
|
|
860
|
+
|
|
861
|
+
return manifest_nodes_modified
|
|
862
|
+
|
|
863
|
+
def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]:
|
|
864
|
+
"""Compare the global hashes of the read-in parse results' values to
|
|
865
|
+
the known ones, and return if it is ok to re-use the results.
|
|
866
|
+
"""
|
|
867
|
+
valid = True
|
|
868
|
+
reparse_reason = None
|
|
869
|
+
|
|
870
|
+
if manifest.metadata.dbt_version != __version__:
|
|
871
|
+
# #3757 log both versions because of reports of invalid cases of mismatch.
|
|
872
|
+
fire_event(UnableToPartialParse(reason="of a version mismatch"))
|
|
873
|
+
# If the version is wrong, the other checks might not work
|
|
874
|
+
return False, ReparseReason.version_mismatch
|
|
875
|
+
if self.manifest.state_check.vars_hash != manifest.state_check.vars_hash:
|
|
876
|
+
fire_event(
|
|
877
|
+
UnableToPartialParse(
|
|
878
|
+
reason="config vars, config profile, or config target have changed"
|
|
879
|
+
)
|
|
880
|
+
)
|
|
881
|
+
fire_event(
|
|
882
|
+
Note(
|
|
883
|
+
msg=f"previous checksum: {self.manifest.state_check.vars_hash.checksum}, current checksum: {manifest.state_check.vars_hash.checksum}"
|
|
884
|
+
),
|
|
885
|
+
level=EventLevel.DEBUG,
|
|
886
|
+
)
|
|
887
|
+
valid = False
|
|
888
|
+
reparse_reason = ReparseReason.vars_changed
|
|
889
|
+
if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash:
|
|
890
|
+
# Note: This should be made more granular. We shouldn't need to invalidate
|
|
891
|
+
# partial parsing if a non-used profile section has changed.
|
|
892
|
+
fire_event(UnableToPartialParse(reason="profile has changed"))
|
|
893
|
+
valid = False
|
|
894
|
+
reparse_reason = ReparseReason.profile_changed
|
|
895
|
+
if (
|
|
896
|
+
self.manifest.state_check.project_env_vars_hash
|
|
897
|
+
!= manifest.state_check.project_env_vars_hash
|
|
898
|
+
):
|
|
899
|
+
fire_event(
|
|
900
|
+
UnableToPartialParse(reason="env vars used in dbt_project.yml have changed")
|
|
901
|
+
)
|
|
902
|
+
valid = False
|
|
903
|
+
reparse_reason = ReparseReason.proj_env_vars_changed
|
|
904
|
+
|
|
905
|
+
missing_keys = {
|
|
906
|
+
k
|
|
907
|
+
for k in self.manifest.state_check.project_hashes
|
|
908
|
+
if k not in manifest.state_check.project_hashes
|
|
909
|
+
}
|
|
910
|
+
if missing_keys:
|
|
911
|
+
fire_event(UnableToPartialParse(reason="a project dependency has been added"))
|
|
912
|
+
valid = False
|
|
913
|
+
reparse_reason = ReparseReason.deps_changed
|
|
914
|
+
|
|
915
|
+
for key, new_value in self.manifest.state_check.project_hashes.items():
|
|
916
|
+
if key in manifest.state_check.project_hashes:
|
|
917
|
+
old_value = manifest.state_check.project_hashes[key]
|
|
918
|
+
if new_value != old_value:
|
|
919
|
+
fire_event(UnableToPartialParse(reason="a project config has changed"))
|
|
920
|
+
valid = False
|
|
921
|
+
reparse_reason = ReparseReason.project_config_changed
|
|
922
|
+
return valid, reparse_reason
|
|
923
|
+
|
|
924
|
+
def skip_partial_parsing_because_of_macros(self):
|
|
925
|
+
if not self.partial_parser:
|
|
926
|
+
return False
|
|
927
|
+
if self.partial_parser.deleted_special_override_macro:
|
|
928
|
+
return True
|
|
929
|
+
# Check for custom versions of these special macros
|
|
930
|
+
for macro_name in special_override_macros:
|
|
931
|
+
macro = self.macro_resolver.get_macro(None, macro_name)
|
|
932
|
+
if macro and macro.package_name != "dbt":
|
|
933
|
+
if (
|
|
934
|
+
macro.file_id in self.partial_parser.file_diff["changed"]
|
|
935
|
+
or macro.file_id in self.partial_parser.file_diff["added"]
|
|
936
|
+
):
|
|
937
|
+
# The file with the macro in it has changed
|
|
938
|
+
return True
|
|
939
|
+
return False
|
|
940
|
+
|
|
941
|
+
def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
|
|
942
|
+
flags = get_flags()
|
|
943
|
+
if not flags.PARTIAL_PARSE:
|
|
944
|
+
fire_event(PartialParsingNotEnabled())
|
|
945
|
+
return None
|
|
946
|
+
path = flags.PARTIAL_PARSE_FILE_PATH or os.path.join(
|
|
947
|
+
self.root_project.project_target_path, PARTIAL_PARSE_FILE_NAME
|
|
948
|
+
)
|
|
949
|
+
|
|
950
|
+
reparse_reason = None
|
|
951
|
+
|
|
952
|
+
if os.path.exists(path):
|
|
953
|
+
try:
|
|
954
|
+
with open(path, "rb") as fp:
|
|
955
|
+
manifest_mp = fp.read()
|
|
956
|
+
manifest: Manifest = Manifest.from_msgpack(manifest_mp, decoder=extended_mashumuro_decoder) # type: ignore
|
|
957
|
+
# keep this check inside the try/except in case something about
|
|
958
|
+
# the file has changed in weird ways, perhaps due to being a
|
|
959
|
+
# different version of dbt
|
|
960
|
+
is_partial_parsable, reparse_reason = self.is_partial_parsable(manifest)
|
|
961
|
+
if is_partial_parsable:
|
|
962
|
+
# We don't want to have stale generated_at dates
|
|
963
|
+
manifest.metadata.generated_at = datetime.now(timezone.utc).replace(
|
|
964
|
+
tzinfo=None
|
|
965
|
+
)
|
|
966
|
+
# or invocation_ids
|
|
967
|
+
manifest.metadata.invocation_id = get_invocation_id()
|
|
968
|
+
return manifest
|
|
969
|
+
except Exception as exc:
|
|
970
|
+
fire_event(
|
|
971
|
+
ParsedFileLoadFailed(path=path, exc=str(exc), exc_info=traceback.format_exc())
|
|
972
|
+
)
|
|
973
|
+
reparse_reason = ReparseReason.load_file_failure
|
|
974
|
+
else:
|
|
975
|
+
fire_event(
|
|
976
|
+
UnableToPartialParse(reason="saved manifest not found. Starting full parse.")
|
|
977
|
+
)
|
|
978
|
+
reparse_reason = ReparseReason.file_not_found
|
|
979
|
+
|
|
980
|
+
# this event is only fired if a full reparse is needed
|
|
981
|
+
if dbt.tracking.active_user is not None: # no active_user if doing load_macros
|
|
982
|
+
dbt.tracking.track_partial_parser({"full_reparse_reason": reparse_reason})
|
|
983
|
+
|
|
984
|
+
return None
|
|
985
|
+
|
|
986
|
+
def build_perf_info(self):
|
|
987
|
+
flags = get_flags()
|
|
988
|
+
mli = ManifestLoaderInfo(
|
|
989
|
+
is_partial_parse_enabled=flags.PARTIAL_PARSE,
|
|
990
|
+
is_static_analysis_enabled=flags.STATIC_PARSER,
|
|
991
|
+
)
|
|
992
|
+
for project in self.all_projects.values():
|
|
993
|
+
project_info = ProjectLoaderInfo(
|
|
994
|
+
project_name=project.project_name,
|
|
995
|
+
elapsed=0,
|
|
996
|
+
)
|
|
997
|
+
mli.projects.append(project_info)
|
|
998
|
+
mli._project_index[project.project_name] = project_info
|
|
999
|
+
return mli
|
|
1000
|
+
|
|
1001
|
+
# TODO: handle --vars in the same way we handle env_var
|
|
1002
|
+
# https://github.com/dbt-labs/dbt-core/issues/6323
|
|
1003
|
+
def build_manifest_state_check(self):
|
|
1004
|
+
config = self.root_project
|
|
1005
|
+
all_projects = self.all_projects
|
|
1006
|
+
# if any of these change, we need to reject the parser
|
|
1007
|
+
|
|
1008
|
+
# Create a FileHash of vars string, profile name and target name
|
|
1009
|
+
# This does not capture vars in dbt_project, just the command line
|
|
1010
|
+
# arg vars, but since any changes to that file will cause state_check
|
|
1011
|
+
# to not pass, it doesn't matter. If we move to more granular checking
|
|
1012
|
+
# of env_vars, that would need to change.
|
|
1013
|
+
# We are using the parsed cli_vars instead of config.args.vars, in order
|
|
1014
|
+
# to sort them and avoid reparsing because of ordering issues.
|
|
1015
|
+
secret_vars = [
|
|
1016
|
+
v for k, v in config.cli_vars.items() if k.startswith(SECRET_ENV_PREFIX) and v.strip()
|
|
1017
|
+
]
|
|
1018
|
+
stringified_cli_vars = pprint.pformat(config.cli_vars)
|
|
1019
|
+
vars_hash = FileHash.from_contents(
|
|
1020
|
+
"\x00".join(
|
|
1021
|
+
[
|
|
1022
|
+
stringified_cli_vars,
|
|
1023
|
+
getattr(config.args, "profile", "") or "",
|
|
1024
|
+
getattr(config.args, "target", "") or "",
|
|
1025
|
+
__version__,
|
|
1026
|
+
]
|
|
1027
|
+
)
|
|
1028
|
+
)
|
|
1029
|
+
fire_event(
|
|
1030
|
+
StateCheckVarsHash(
|
|
1031
|
+
checksum=vars_hash.checksum,
|
|
1032
|
+
vars=scrub_secrets(stringified_cli_vars, secret_vars),
|
|
1033
|
+
profile=config.args.profile,
|
|
1034
|
+
target=config.args.target,
|
|
1035
|
+
version=__version__,
|
|
1036
|
+
)
|
|
1037
|
+
)
|
|
1038
|
+
|
|
1039
|
+
# Create a FileHash of the env_vars in the project
|
|
1040
|
+
key_list = list(config.project_env_vars.keys())
|
|
1041
|
+
key_list.sort()
|
|
1042
|
+
env_var_str = ""
|
|
1043
|
+
for key in key_list:
|
|
1044
|
+
env_var_str += f"{key}:{config.project_env_vars[key]}|"
|
|
1045
|
+
project_env_vars_hash = FileHash.from_contents(env_var_str)
|
|
1046
|
+
|
|
1047
|
+
# Create a hash of the connection_info, which user has access to in
|
|
1048
|
+
# jinja context. Thus attributes here may affect the parsing result.
|
|
1049
|
+
# Ideally we should not expose all of the connection info to the jinja.
|
|
1050
|
+
|
|
1051
|
+
# Renaming this variable mean that we will have to do a whole lot more
|
|
1052
|
+
# change to make sure the previous manifest can be loaded correctly.
|
|
1053
|
+
# This is an example of naming should be chosen based on the functionality
|
|
1054
|
+
# rather than the implementation details.
|
|
1055
|
+
connection_keys = list(config.credentials.connection_info())
|
|
1056
|
+
# avoid reparsing because of ordering issues
|
|
1057
|
+
connection_keys.sort()
|
|
1058
|
+
profile_hash = FileHash.from_contents(pprint.pformat(connection_keys))
|
|
1059
|
+
|
|
1060
|
+
# Create a FileHashes for dbt_project for all dependencies
|
|
1061
|
+
project_hashes = {}
|
|
1062
|
+
for name, project in all_projects.items():
|
|
1063
|
+
path = os.path.join(project.project_root, "dbt_project.yml")
|
|
1064
|
+
with open(path) as fp:
|
|
1065
|
+
project_hashes[name] = FileHash.from_contents(fp.read())
|
|
1066
|
+
|
|
1067
|
+
# Create the ManifestStateCheck object
|
|
1068
|
+
state_check = ManifestStateCheck(
|
|
1069
|
+
project_env_vars_hash=project_env_vars_hash,
|
|
1070
|
+
vars_hash=vars_hash,
|
|
1071
|
+
profile_hash=profile_hash,
|
|
1072
|
+
project_hashes=project_hashes,
|
|
1073
|
+
)
|
|
1074
|
+
return state_check
|
|
1075
|
+
|
|
1076
|
+
def save_macros_to_adapter(self, adapter):
|
|
1077
|
+
adapter.set_macro_resolver(self.manifest)
|
|
1078
|
+
# This executes the callable macro_hook and sets the
|
|
1079
|
+
# query headers
|
|
1080
|
+
# This executes the callable macro_hook and sets the query headers
|
|
1081
|
+
query_header_context = generate_query_header_context(adapter.config, self.manifest)
|
|
1082
|
+
self.macro_hook(query_header_context)
|
|
1083
|
+
|
|
1084
|
+
# This creates a MacroManifest which contains the macros in
|
|
1085
|
+
# the adapter. Only called by the load_macros call from the
|
|
1086
|
+
# adapter.
|
|
1087
|
+
def create_macro_manifest(self):
|
|
1088
|
+
for project in self.all_projects.values():
|
|
1089
|
+
# what is the manifest passed in actually used for?
|
|
1090
|
+
macro_parser = MacroParser(project, self.manifest)
|
|
1091
|
+
for path in macro_parser.get_paths():
|
|
1092
|
+
source_file = load_source_file(path, ParseFileType.Macro, project.project_name, {})
|
|
1093
|
+
block = FileBlock(source_file)
|
|
1094
|
+
# This does not add the file to the manifest.files,
|
|
1095
|
+
# but that shouldn't be necessary here.
|
|
1096
|
+
macro_parser.parse_file(block)
|
|
1097
|
+
macro_manifest = MacroManifest(self.manifest.macros)
|
|
1098
|
+
return macro_manifest
|
|
1099
|
+
|
|
1100
|
+
# This is called by the adapter code only, to create the
|
|
1101
|
+
# MacroManifest that's stored in the adapter.
|
|
1102
|
+
# 'get_full_manifest' uses a persistent ManifestLoader while this
|
|
1103
|
+
# creates a temporary ManifestLoader and throws it away.
|
|
1104
|
+
# Not sure when this would actually get used except in tests.
|
|
1105
|
+
# The ManifestLoader loads macros with other files, then copies
|
|
1106
|
+
# into the adapter MacroManifest.
|
|
1107
|
+
@classmethod
|
|
1108
|
+
def load_macros(
|
|
1109
|
+
cls,
|
|
1110
|
+
root_config: RuntimeConfig,
|
|
1111
|
+
macro_hook: Callable[[Manifest], Any],
|
|
1112
|
+
base_macros_only=False,
|
|
1113
|
+
) -> Manifest:
|
|
1114
|
+
# base_only/base_macros_only: for testing only,
|
|
1115
|
+
# allows loading macros without running 'dbt deps' first
|
|
1116
|
+
projects = root_config.load_dependencies(base_only=base_macros_only)
|
|
1117
|
+
|
|
1118
|
+
# This creates a loader object, including result,
|
|
1119
|
+
# and then throws it away, returning only the
|
|
1120
|
+
# manifest
|
|
1121
|
+
loader = cls(root_config, projects, macro_hook)
|
|
1122
|
+
|
|
1123
|
+
return loader.create_macro_manifest()
|
|
1124
|
+
|
|
1125
|
+
# Create tracking event for saving performance info
|
|
1126
|
+
def track_project_load(self):
|
|
1127
|
+
invocation_id = get_invocation_id()
|
|
1128
|
+
dbt.tracking.track_project_load(
|
|
1129
|
+
{
|
|
1130
|
+
"invocation_id": invocation_id,
|
|
1131
|
+
"project_id": self.root_project.hashed_name(),
|
|
1132
|
+
"path_count": self._perf_info.path_count,
|
|
1133
|
+
"parsed_path_count": self._perf_info.parsed_path_count,
|
|
1134
|
+
"read_files_elapsed": self._perf_info.read_files_elapsed,
|
|
1135
|
+
"load_macros_elapsed": self._perf_info.load_macros_elapsed,
|
|
1136
|
+
"parse_project_elapsed": self._perf_info.parse_project_elapsed,
|
|
1137
|
+
"patch_sources_elapsed": self._perf_info.patch_sources_elapsed,
|
|
1138
|
+
"process_manifest_elapsed": (self._perf_info.process_manifest_elapsed),
|
|
1139
|
+
"load_all_elapsed": self._perf_info.load_all_elapsed,
|
|
1140
|
+
"is_partial_parse_enabled": (self._perf_info.is_partial_parse_enabled),
|
|
1141
|
+
"is_static_analysis_enabled": self._perf_info.is_static_analysis_enabled,
|
|
1142
|
+
"static_analysis_path_count": self._perf_info.static_analysis_path_count,
|
|
1143
|
+
"static_analysis_parsed_path_count": self._perf_info.static_analysis_parsed_path_count, # noqa: E501
|
|
1144
|
+
}
|
|
1145
|
+
)
|
|
1146
|
+
|
|
1147
|
+
# Takes references in 'refs' array of nodes and exposures, finds the target
|
|
1148
|
+
# node, and updates 'depends_on.nodes' with the unique id
|
|
1149
|
+
def process_refs(self, current_project: str, dependencies: Optional[Mapping[str, Project]]):
|
|
1150
|
+
for node in self.manifest.nodes.values():
|
|
1151
|
+
if node.created_at < self.started_at:
|
|
1152
|
+
continue
|
|
1153
|
+
_process_refs(self.manifest, current_project, node, dependencies)
|
|
1154
|
+
for exposure in self.manifest.exposures.values():
|
|
1155
|
+
if exposure.created_at < self.started_at:
|
|
1156
|
+
continue
|
|
1157
|
+
_process_refs(self.manifest, current_project, exposure, dependencies)
|
|
1158
|
+
for metric in self.manifest.metrics.values():
|
|
1159
|
+
if metric.created_at < self.started_at:
|
|
1160
|
+
continue
|
|
1161
|
+
_process_refs(self.manifest, current_project, metric, dependencies)
|
|
1162
|
+
for semantic_model in self.manifest.semantic_models.values():
|
|
1163
|
+
if semantic_model.created_at < self.started_at:
|
|
1164
|
+
continue
|
|
1165
|
+
_process_refs(self.manifest, current_project, semantic_model, dependencies)
|
|
1166
|
+
self.update_semantic_model(semantic_model)
|
|
1167
|
+
for function in self.manifest.functions.values():
|
|
1168
|
+
if function.created_at < self.started_at:
|
|
1169
|
+
continue
|
|
1170
|
+
_process_refs(self.manifest, current_project, function, dependencies)
|
|
1171
|
+
|
|
1172
|
+
# Takes references in 'metrics' array of nodes and exposures, finds the target
|
|
1173
|
+
# node, and updates 'depends_on.nodes' with the unique id
|
|
1174
|
+
def process_metrics(self, config: RuntimeConfig):
|
|
1175
|
+
current_project = config.project_name
|
|
1176
|
+
for metric in self.manifest.metrics.values():
|
|
1177
|
+
if metric.created_at < self.started_at:
|
|
1178
|
+
continue
|
|
1179
|
+
_process_metric_node(self.manifest, current_project, metric)
|
|
1180
|
+
_process_metrics_for_node(self.manifest, current_project, metric)
|
|
1181
|
+
for node in self.manifest.nodes.values():
|
|
1182
|
+
if node.created_at < self.started_at:
|
|
1183
|
+
continue
|
|
1184
|
+
_process_metrics_for_node(self.manifest, current_project, node)
|
|
1185
|
+
for exposure in self.manifest.exposures.values():
|
|
1186
|
+
if exposure.created_at < self.started_at:
|
|
1187
|
+
continue
|
|
1188
|
+
_process_metrics_for_node(self.manifest, current_project, exposure)
|
|
1189
|
+
|
|
1190
|
+
def process_saved_queries(self, config: RuntimeConfig):
|
|
1191
|
+
"""Processes SavedQuery nodes to populate their `depends_on`."""
|
|
1192
|
+
# Note: This will also capture various nodes which have been re-parsed
|
|
1193
|
+
# because they refer to some other changed node, so there will be
|
|
1194
|
+
# false positives. Ideally we would compare actual changes.
|
|
1195
|
+
semantic_manifest_changed = False
|
|
1196
|
+
semantic_manifest_nodes: chain[SemanticManifestNode] = chain(
|
|
1197
|
+
self.manifest.saved_queries.values(),
|
|
1198
|
+
self.manifest.semantic_models.values(),
|
|
1199
|
+
self.manifest.metrics.values(),
|
|
1200
|
+
)
|
|
1201
|
+
for node in semantic_manifest_nodes:
|
|
1202
|
+
# Check if this node has been modified in this parsing run
|
|
1203
|
+
if node.created_at > self.started_at:
|
|
1204
|
+
semantic_manifest_changed = True
|
|
1205
|
+
break # as soon as we run into one changed node we can stop
|
|
1206
|
+
if semantic_manifest_changed is False:
|
|
1207
|
+
return
|
|
1208
|
+
|
|
1209
|
+
current_project = config.project_name
|
|
1210
|
+
for saved_query in self.manifest.saved_queries.values():
|
|
1211
|
+
# TODO:
|
|
1212
|
+
# 1. process `where` of SavedQuery for `depends_on`s
|
|
1213
|
+
# 2. process `group_by` of SavedQuery for `depends_on``
|
|
1214
|
+
_process_metrics_for_node(self.manifest, current_project, saved_query)
|
|
1215
|
+
|
|
1216
|
+
def process_model_inferred_primary_keys(self):
|
|
1217
|
+
"""Processes Model nodes to populate their `primary_key`."""
|
|
1218
|
+
model_to_generic_test_map: Dict[str, List[GenericTestNode]] = {}
|
|
1219
|
+
for node in self.manifest.nodes.values():
|
|
1220
|
+
if not isinstance(node, ModelNode):
|
|
1221
|
+
continue
|
|
1222
|
+
if node.created_at < self.started_at:
|
|
1223
|
+
continue
|
|
1224
|
+
if not model_to_generic_test_map:
|
|
1225
|
+
model_to_generic_test_map = self.build_model_to_generic_tests_map()
|
|
1226
|
+
generic_tests: List[GenericTestNode] = []
|
|
1227
|
+
if node.unique_id in model_to_generic_test_map:
|
|
1228
|
+
generic_tests = model_to_generic_test_map[node.unique_id]
|
|
1229
|
+
primary_key = node.infer_primary_key(generic_tests)
|
|
1230
|
+
node.primary_key = sorted(primary_key)
|
|
1231
|
+
|
|
1232
|
+
def update_semantic_model(self, semantic_model) -> None:
|
|
1233
|
+
# This has to be done at the end of parsing because the referenced model
|
|
1234
|
+
# might have alias/schema/database fields that are updated by yaml config.
|
|
1235
|
+
if semantic_model.depends_on_nodes[0]:
|
|
1236
|
+
refd_node = self.manifest.nodes[semantic_model.depends_on_nodes[0]]
|
|
1237
|
+
semantic_model.node_relation = NodeRelation(
|
|
1238
|
+
relation_name=refd_node.relation_name,
|
|
1239
|
+
alias=refd_node.alias,
|
|
1240
|
+
schema_name=refd_node.schema,
|
|
1241
|
+
database=refd_node.database,
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
# nodes: node and column descriptions, version columns descriptions
|
|
1245
|
+
# sources: source and table descriptions, column descriptions
|
|
1246
|
+
# macros: macro argument descriptions
|
|
1247
|
+
# exposures: exposure descriptions
|
|
1248
|
+
# metrics: metric descriptions
|
|
1249
|
+
# semantic_models: semantic model descriptions
|
|
1250
|
+
def process_docs(self, config: RuntimeConfig):
|
|
1251
|
+
for node in self.manifest.nodes.values():
|
|
1252
|
+
if node.created_at < self.started_at:
|
|
1253
|
+
continue
|
|
1254
|
+
ctx = generate_runtime_docs_context(
|
|
1255
|
+
config,
|
|
1256
|
+
node,
|
|
1257
|
+
self.manifest,
|
|
1258
|
+
config.project_name,
|
|
1259
|
+
)
|
|
1260
|
+
_process_docs_for_node(ctx, node, self.manifest)
|
|
1261
|
+
for source in self.manifest.sources.values():
|
|
1262
|
+
if source.created_at < self.started_at:
|
|
1263
|
+
continue
|
|
1264
|
+
ctx = generate_runtime_docs_context(
|
|
1265
|
+
config,
|
|
1266
|
+
source,
|
|
1267
|
+
self.manifest,
|
|
1268
|
+
config.project_name,
|
|
1269
|
+
)
|
|
1270
|
+
_process_docs_for_source(ctx, source, self.manifest)
|
|
1271
|
+
for macro in self.manifest.macros.values():
|
|
1272
|
+
if macro.created_at < self.started_at:
|
|
1273
|
+
continue
|
|
1274
|
+
ctx = generate_runtime_docs_context(
|
|
1275
|
+
config,
|
|
1276
|
+
macro,
|
|
1277
|
+
self.manifest,
|
|
1278
|
+
config.project_name,
|
|
1279
|
+
)
|
|
1280
|
+
_process_docs_for_macro(ctx, macro)
|
|
1281
|
+
for exposure in self.manifest.exposures.values():
|
|
1282
|
+
if exposure.created_at < self.started_at:
|
|
1283
|
+
continue
|
|
1284
|
+
ctx = generate_runtime_docs_context(
|
|
1285
|
+
config,
|
|
1286
|
+
exposure,
|
|
1287
|
+
self.manifest,
|
|
1288
|
+
config.project_name,
|
|
1289
|
+
)
|
|
1290
|
+
_process_docs_for_exposure(ctx, exposure)
|
|
1291
|
+
for metric in self.manifest.metrics.values():
|
|
1292
|
+
if metric.created_at < self.started_at:
|
|
1293
|
+
continue
|
|
1294
|
+
ctx = generate_runtime_docs_context(
|
|
1295
|
+
config,
|
|
1296
|
+
metric,
|
|
1297
|
+
self.manifest,
|
|
1298
|
+
config.project_name,
|
|
1299
|
+
)
|
|
1300
|
+
_process_docs_for_metrics(ctx, metric)
|
|
1301
|
+
for semantic_model in self.manifest.semantic_models.values():
|
|
1302
|
+
if semantic_model.created_at < self.started_at:
|
|
1303
|
+
continue
|
|
1304
|
+
ctx = generate_runtime_docs_context(
|
|
1305
|
+
config,
|
|
1306
|
+
semantic_model,
|
|
1307
|
+
self.manifest,
|
|
1308
|
+
config.project_name,
|
|
1309
|
+
)
|
|
1310
|
+
_process_docs_for_semantic_model(ctx, semantic_model)
|
|
1311
|
+
for saved_query in self.manifest.saved_queries.values():
|
|
1312
|
+
if saved_query.created_at < self.started_at:
|
|
1313
|
+
continue
|
|
1314
|
+
ctx = generate_runtime_docs_context(
|
|
1315
|
+
config, saved_query, self.manifest, config.project_name
|
|
1316
|
+
)
|
|
1317
|
+
_process_docs_for_saved_query(ctx, saved_query)
|
|
1318
|
+
|
|
1319
|
+
# Loops through all nodes and exposures, for each element in
|
|
1320
|
+
# 'sources' array finds the source node and updates the
|
|
1321
|
+
# 'depends_on.nodes' array with the unique id
|
|
1322
|
+
def process_sources(self, current_project: str):
|
|
1323
|
+
for node in self.manifest.nodes.values():
|
|
1324
|
+
if node.resource_type == NodeType.Source:
|
|
1325
|
+
continue
|
|
1326
|
+
assert not isinstance(node, SourceDefinition)
|
|
1327
|
+
if node.created_at < self.started_at:
|
|
1328
|
+
continue
|
|
1329
|
+
_process_sources_for_node(self.manifest, current_project, node)
|
|
1330
|
+
for exposure in self.manifest.exposures.values():
|
|
1331
|
+
if exposure.created_at < self.started_at:
|
|
1332
|
+
continue
|
|
1333
|
+
_process_sources_for_exposure(self.manifest, current_project, exposure)
|
|
1334
|
+
|
|
1335
|
+
# Loops through all nodes, for each element in
|
|
1336
|
+
# 'unit_test' array finds the node and updates the
|
|
1337
|
+
# 'depends_on.nodes' array with the unique id
|
|
1338
|
+
def process_unit_tests(self, current_project: str):
|
|
1339
|
+
models_to_versions = None
|
|
1340
|
+
unit_test_unique_ids = list(self.manifest.unit_tests.keys())
|
|
1341
|
+
for unit_test_unique_id in unit_test_unique_ids:
|
|
1342
|
+
# This is because some unit tests will be removed when processing
|
|
1343
|
+
# and the list of unit_test_unique_ids won't have changed
|
|
1344
|
+
if unit_test_unique_id in self.manifest.unit_tests:
|
|
1345
|
+
unit_test = self.manifest.unit_tests[unit_test_unique_id]
|
|
1346
|
+
else:
|
|
1347
|
+
continue
|
|
1348
|
+
if unit_test.created_at < self.started_at:
|
|
1349
|
+
continue
|
|
1350
|
+
if not models_to_versions:
|
|
1351
|
+
models_to_versions = _build_model_names_to_versions(self.manifest)
|
|
1352
|
+
process_models_for_unit_test(
|
|
1353
|
+
self.manifest, current_project, unit_test, models_to_versions
|
|
1354
|
+
)
|
|
1355
|
+
|
|
1356
|
+
# Loops through all nodes, for each element in
|
|
1357
|
+
# 'functions' array finds the node and updates the
|
|
1358
|
+
# 'depends_on.nodes' array with the unique id
|
|
1359
|
+
def process_functions(self, current_project: str):
|
|
1360
|
+
for node in self.manifest.nodes.values():
|
|
1361
|
+
if node.created_at < self.started_at:
|
|
1362
|
+
continue
|
|
1363
|
+
_process_functions_for_node(self.manifest, current_project, node)
|
|
1364
|
+
|
|
1365
|
+
for function in self.manifest.functions.values():
|
|
1366
|
+
if function.created_at < self.started_at:
|
|
1367
|
+
continue
|
|
1368
|
+
_process_functions_for_node(self.manifest, current_project, function)
|
|
1369
|
+
|
|
1370
|
+
def cleanup_disabled(self):
|
|
1371
|
+
# make sure the nodes are in the manifest.nodes or the disabled dict,
|
|
1372
|
+
# correctly now that the schema files are also parsed
|
|
1373
|
+
disabled_nodes = []
|
|
1374
|
+
for node in self.manifest.nodes.values():
|
|
1375
|
+
if not node.config.enabled:
|
|
1376
|
+
disabled_nodes.append(node.unique_id)
|
|
1377
|
+
self.manifest.add_disabled_nofile(node)
|
|
1378
|
+
for unique_id in disabled_nodes:
|
|
1379
|
+
self.manifest.nodes.pop(unique_id)
|
|
1380
|
+
|
|
1381
|
+
disabled_copy = deepcopy(self.manifest.disabled)
|
|
1382
|
+
for disabled in disabled_copy.values():
|
|
1383
|
+
for node in disabled:
|
|
1384
|
+
if node.config.enabled:
|
|
1385
|
+
for dis_index, dis_node in enumerate(disabled):
|
|
1386
|
+
# Remove node from disabled and unique_id from disabled dict if necessary
|
|
1387
|
+
del self.manifest.disabled[node.unique_id][dis_index]
|
|
1388
|
+
if not self.manifest.disabled[node.unique_id]:
|
|
1389
|
+
self.manifest.disabled.pop(node.unique_id)
|
|
1390
|
+
|
|
1391
|
+
self.manifest.add_node_nofile(node)
|
|
1392
|
+
|
|
1393
|
+
self.manifest.rebuild_ref_lookup()
|
|
1394
|
+
|
|
1395
|
+
def check_valid_group_config(self):
|
|
1396
|
+
manifest = self.manifest
|
|
1397
|
+
group_names = {group.name for group in manifest.groups.values()}
|
|
1398
|
+
|
|
1399
|
+
for metric in manifest.metrics.values():
|
|
1400
|
+
self.check_valid_group_config_node(metric, group_names)
|
|
1401
|
+
|
|
1402
|
+
for semantic_model in manifest.semantic_models.values():
|
|
1403
|
+
self.check_valid_group_config_node(semantic_model, group_names)
|
|
1404
|
+
|
|
1405
|
+
for saved_query in manifest.saved_queries.values():
|
|
1406
|
+
self.check_valid_group_config_node(saved_query, group_names)
|
|
1407
|
+
|
|
1408
|
+
for node in manifest.nodes.values():
|
|
1409
|
+
self.check_valid_group_config_node(node, group_names)
|
|
1410
|
+
|
|
1411
|
+
def check_valid_group_config_node(
|
|
1412
|
+
self,
|
|
1413
|
+
groupable_node: Union[Metric, SavedQuery, SemanticModel, ManifestNode],
|
|
1414
|
+
valid_group_names: Set[str],
|
|
1415
|
+
):
|
|
1416
|
+
groupable_node_group = groupable_node.group
|
|
1417
|
+
if groupable_node_group and groupable_node_group not in valid_group_names:
|
|
1418
|
+
raise dbt.exceptions.ParsingError(
|
|
1419
|
+
f"Invalid group '{groupable_node_group}', expected one of {sorted(list(valid_group_names))}",
|
|
1420
|
+
node=groupable_node,
|
|
1421
|
+
)
|
|
1422
|
+
|
|
1423
|
+
def check_valid_access_property(self):
|
|
1424
|
+
for node in self.manifest.nodes.values():
|
|
1425
|
+
if (
|
|
1426
|
+
isinstance(node, ModelNode)
|
|
1427
|
+
and node.access == AccessType.Public
|
|
1428
|
+
and node.get_materialization() == "ephemeral"
|
|
1429
|
+
):
|
|
1430
|
+
raise InvalidAccessTypeError(
|
|
1431
|
+
unique_id=node.unique_id,
|
|
1432
|
+
field_value=node.access,
|
|
1433
|
+
materialization=node.get_materialization(),
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
def check_valid_snapshot_config(self):
|
|
1437
|
+
# Snapshot config can be set in either SQL files or yaml files,
|
|
1438
|
+
# so we need to validate afterward.
|
|
1439
|
+
for node in self.manifest.nodes.values():
|
|
1440
|
+
if node.resource_type != NodeType.Snapshot:
|
|
1441
|
+
continue
|
|
1442
|
+
if node.created_at < self.started_at:
|
|
1443
|
+
continue
|
|
1444
|
+
node.config.final_validate()
|
|
1445
|
+
|
|
1446
|
+
def check_valid_microbatch_config(self):
|
|
1447
|
+
if self.manifest.use_microbatch_batches(project_name=self.root_project.project_name):
|
|
1448
|
+
for node in self.manifest.nodes.values():
|
|
1449
|
+
if (
|
|
1450
|
+
node.config.materialized == "incremental"
|
|
1451
|
+
and node.config.incremental_strategy == "microbatch"
|
|
1452
|
+
):
|
|
1453
|
+
# Required configs: event_time, batch_size, begin
|
|
1454
|
+
event_time = node.config.event_time
|
|
1455
|
+
if event_time is None:
|
|
1456
|
+
raise dbt.exceptions.ParsingError(
|
|
1457
|
+
f"Microbatch model '{node.name}' must provide an 'event_time' (string) config that indicates the name of the event time column."
|
|
1458
|
+
)
|
|
1459
|
+
if not isinstance(event_time, str):
|
|
1460
|
+
raise dbt.exceptions.ParsingError(
|
|
1461
|
+
f"Microbatch model '{node.name}' must provide an 'event_time' config of type string, but got: {type(event_time)}."
|
|
1462
|
+
)
|
|
1463
|
+
|
|
1464
|
+
begin = node.config.begin
|
|
1465
|
+
if begin is None:
|
|
1466
|
+
raise dbt.exceptions.ParsingError(
|
|
1467
|
+
f"Microbatch model '{node.name}' must provide a 'begin' (datetime) config that indicates the earliest timestamp the microbatch model should be built from."
|
|
1468
|
+
)
|
|
1469
|
+
|
|
1470
|
+
# Try to cast begin to a datetime using same format as mashumaro for consistency with other yaml-provided datetimes
|
|
1471
|
+
# Mashumaro default: https://github.com/Fatal1ty/mashumaro/blob/4ac16fd060a6c651053475597b58b48f958e8c5c/README.md?plain=1#L1186
|
|
1472
|
+
if isinstance(begin, str):
|
|
1473
|
+
try:
|
|
1474
|
+
begin = datetime.fromisoformat(begin)
|
|
1475
|
+
node.config.begin = begin
|
|
1476
|
+
except Exception:
|
|
1477
|
+
raise dbt.exceptions.ParsingError(
|
|
1478
|
+
f"Microbatch model '{node.name}' must provide a 'begin' config of valid datetime (ISO format), but got: {begin}."
|
|
1479
|
+
)
|
|
1480
|
+
|
|
1481
|
+
if not isinstance(begin, datetime):
|
|
1482
|
+
raise dbt.exceptions.ParsingError(
|
|
1483
|
+
f"Microbatch model '{node.name}' must provide a 'begin' config of type datetime, but got: {type(begin)}."
|
|
1484
|
+
)
|
|
1485
|
+
|
|
1486
|
+
batch_size = node.config.batch_size
|
|
1487
|
+
valid_batch_sizes = [size.value for size in BatchSize]
|
|
1488
|
+
if batch_size not in valid_batch_sizes:
|
|
1489
|
+
raise dbt.exceptions.ParsingError(
|
|
1490
|
+
f"Microbatch model '{node.name}' must provide a 'batch_size' config that is one of {valid_batch_sizes}, but got: {batch_size}."
|
|
1491
|
+
)
|
|
1492
|
+
|
|
1493
|
+
# Optional config: lookback (int)
|
|
1494
|
+
lookback = node.config.lookback
|
|
1495
|
+
if not isinstance(lookback, int) and lookback is not None:
|
|
1496
|
+
raise dbt.exceptions.ParsingError(
|
|
1497
|
+
f"Microbatch model '{node.name}' must provide the optional 'lookback' config as type int, but got: {type(lookback)})."
|
|
1498
|
+
)
|
|
1499
|
+
|
|
1500
|
+
# optional config: concurrent_batches (bool)
|
|
1501
|
+
concurrent_batches = node.config.concurrent_batches
|
|
1502
|
+
if not isinstance(concurrent_batches, bool) and concurrent_batches is not None:
|
|
1503
|
+
raise dbt.exceptions.ParsingError(
|
|
1504
|
+
f"Microbatch model '{node.name}' optional 'concurrent_batches' config must be of type `bool` if specified, but got: {type(concurrent_batches)})."
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1507
|
+
def check_forcing_batch_concurrency(self) -> None:
|
|
1508
|
+
if self.manifest.use_microbatch_batches(project_name=self.root_project.project_name):
|
|
1509
|
+
adapter = get_adapter(self.root_project)
|
|
1510
|
+
|
|
1511
|
+
if not adapter.supports(Capability.MicrobatchConcurrency):
|
|
1512
|
+
models_forcing_concurrent_batches = 0
|
|
1513
|
+
for node in self.manifest.nodes.values():
|
|
1514
|
+
if (
|
|
1515
|
+
hasattr(node.config, "concurrent_batches")
|
|
1516
|
+
and node.config.concurrent_batches is True
|
|
1517
|
+
):
|
|
1518
|
+
models_forcing_concurrent_batches += 1
|
|
1519
|
+
|
|
1520
|
+
if models_forcing_concurrent_batches > 0:
|
|
1521
|
+
warn_or_error(
|
|
1522
|
+
InvalidConcurrentBatchesConfig(
|
|
1523
|
+
num_models=models_forcing_concurrent_batches,
|
|
1524
|
+
adapter_type=adapter.type(),
|
|
1525
|
+
)
|
|
1526
|
+
)
|
|
1527
|
+
|
|
1528
|
+
def check_microbatch_model_has_a_filtered_input(self):
|
|
1529
|
+
if self.manifest.use_microbatch_batches(project_name=self.root_project.project_name):
|
|
1530
|
+
for node in self.manifest.nodes.values():
|
|
1531
|
+
if (
|
|
1532
|
+
node.config.materialized == "incremental"
|
|
1533
|
+
and node.config.incremental_strategy == "microbatch"
|
|
1534
|
+
):
|
|
1535
|
+
# Validate upstream node event_time (if configured)
|
|
1536
|
+
has_input_with_event_time_config = False
|
|
1537
|
+
for input_unique_id in node.depends_on.nodes:
|
|
1538
|
+
input_node = self.manifest.expect(unique_id=input_unique_id)
|
|
1539
|
+
input_event_time = input_node.config.event_time
|
|
1540
|
+
if input_event_time:
|
|
1541
|
+
if not isinstance(input_event_time, str):
|
|
1542
|
+
raise dbt.exceptions.ParsingError(
|
|
1543
|
+
f"Microbatch model '{node.name}' depends on an input node '{input_node.name}' with an 'event_time' config of invalid (non-string) type: {type(input_event_time)}."
|
|
1544
|
+
)
|
|
1545
|
+
has_input_with_event_time_config = True
|
|
1546
|
+
|
|
1547
|
+
if not has_input_with_event_time_config:
|
|
1548
|
+
fire_event(MicrobatchModelNoEventTimeInputs(model_name=node.name))
|
|
1549
|
+
|
|
1550
|
+
def write_perf_info(self, target_path: str):
|
|
1551
|
+
path = os.path.join(target_path, PERF_INFO_FILE_NAME)
|
|
1552
|
+
write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4))
|
|
1553
|
+
fire_event(ParsePerfInfoPath(path=path))
|
|
1554
|
+
|
|
1555
|
+
def build_model_to_generic_tests_map(self) -> Dict[str, List[GenericTestNode]]:
|
|
1556
|
+
"""Return a list of generic tests that are attached to the given model, including disabled tests"""
|
|
1557
|
+
model_to_generic_tests_map: Dict[str, List[GenericTestNode]] = {}
|
|
1558
|
+
for _, node in self.manifest.nodes.items():
|
|
1559
|
+
if isinstance(node, GenericTestNode) and node.attached_node:
|
|
1560
|
+
if node.attached_node not in model_to_generic_tests_map:
|
|
1561
|
+
model_to_generic_tests_map[node.attached_node] = []
|
|
1562
|
+
model_to_generic_tests_map[node.attached_node].append(node)
|
|
1563
|
+
for _, nodes in self.manifest.disabled.items():
|
|
1564
|
+
for disabled_node in nodes:
|
|
1565
|
+
if isinstance(disabled_node, GenericTestNode) and disabled_node.attached_node:
|
|
1566
|
+
if disabled_node.attached_node not in model_to_generic_tests_map:
|
|
1567
|
+
model_to_generic_tests_map[disabled_node.attached_node] = []
|
|
1568
|
+
model_to_generic_tests_map[disabled_node.attached_node].append(disabled_node)
|
|
1569
|
+
return model_to_generic_tests_map
|
|
1570
|
+
|
|
1571
|
+
|
|
1572
|
+
def invalid_target_fail_unless_test(
|
|
1573
|
+
node,
|
|
1574
|
+
target_name: str,
|
|
1575
|
+
target_kind: str,
|
|
1576
|
+
target_package: Optional[str] = None,
|
|
1577
|
+
target_version: Optional[NodeVersion] = None,
|
|
1578
|
+
disabled: Optional[bool] = None,
|
|
1579
|
+
should_warn_if_disabled: bool = True,
|
|
1580
|
+
):
|
|
1581
|
+
if node.resource_type == NodeType.Test:
|
|
1582
|
+
if disabled:
|
|
1583
|
+
event = InvalidDisabledTargetInTestNode(
|
|
1584
|
+
resource_type_title=node.resource_type.title(),
|
|
1585
|
+
unique_id=node.unique_id,
|
|
1586
|
+
original_file_path=node.original_file_path,
|
|
1587
|
+
target_kind=target_kind,
|
|
1588
|
+
target_name=target_name,
|
|
1589
|
+
target_package=target_package if target_package else "",
|
|
1590
|
+
)
|
|
1591
|
+
|
|
1592
|
+
fire_event(event, EventLevel.WARN if should_warn_if_disabled else None)
|
|
1593
|
+
else:
|
|
1594
|
+
warn_or_error(
|
|
1595
|
+
NodeNotFoundOrDisabled(
|
|
1596
|
+
original_file_path=node.original_file_path,
|
|
1597
|
+
unique_id=node.unique_id,
|
|
1598
|
+
resource_type_title=node.resource_type.title(),
|
|
1599
|
+
target_name=target_name,
|
|
1600
|
+
target_kind=target_kind,
|
|
1601
|
+
target_package=target_package if target_package else "",
|
|
1602
|
+
disabled=str(disabled),
|
|
1603
|
+
)
|
|
1604
|
+
)
|
|
1605
|
+
else:
|
|
1606
|
+
raise TargetNotFoundError(
|
|
1607
|
+
node=node,
|
|
1608
|
+
target_name=target_name,
|
|
1609
|
+
target_kind=target_kind,
|
|
1610
|
+
target_package=target_package,
|
|
1611
|
+
target_version=target_version,
|
|
1612
|
+
disabled=disabled,
|
|
1613
|
+
)
|
|
1614
|
+
|
|
1615
|
+
|
|
1616
|
+
def _build_model_names_to_versions(manifest: Manifest) -> Dict[str, Dict]:
|
|
1617
|
+
model_names_to_versions: Dict[str, Dict] = {}
|
|
1618
|
+
for node in manifest.nodes.values():
|
|
1619
|
+
if node.resource_type != NodeType.Model:
|
|
1620
|
+
continue
|
|
1621
|
+
if not node.is_versioned:
|
|
1622
|
+
continue
|
|
1623
|
+
if node.package_name not in model_names_to_versions:
|
|
1624
|
+
model_names_to_versions[node.package_name] = {}
|
|
1625
|
+
if node.name not in model_names_to_versions[node.package_name]:
|
|
1626
|
+
model_names_to_versions[node.package_name][node.name] = []
|
|
1627
|
+
model_names_to_versions[node.package_name][node.name].append(node.unique_id)
|
|
1628
|
+
return model_names_to_versions
|
|
1629
|
+
|
|
1630
|
+
|
|
1631
|
+
def _check_resource_uniqueness(
|
|
1632
|
+
manifest: Manifest,
|
|
1633
|
+
config: RuntimeConfig,
|
|
1634
|
+
) -> None:
|
|
1635
|
+
alias_resources: Dict[str, ManifestNode] = {}
|
|
1636
|
+
name_resources: Dict[str, Dict] = {}
|
|
1637
|
+
|
|
1638
|
+
for resource, node in manifest.nodes.items():
|
|
1639
|
+
if not node.is_relational:
|
|
1640
|
+
continue
|
|
1641
|
+
|
|
1642
|
+
if node.package_name not in name_resources:
|
|
1643
|
+
name_resources[node.package_name] = {"ver": {}, "unver": {}}
|
|
1644
|
+
if node.is_versioned:
|
|
1645
|
+
name_resources[node.package_name]["ver"][node.name] = node
|
|
1646
|
+
else:
|
|
1647
|
+
name_resources[node.package_name]["unver"][node.name] = node
|
|
1648
|
+
|
|
1649
|
+
# the full node name is really defined by the adapter's relation
|
|
1650
|
+
relation_cls = get_relation_class_by_name(config.credentials.type)
|
|
1651
|
+
relation = relation_cls.create_from(quoting=config, relation_config=node) # type: ignore[arg-type]
|
|
1652
|
+
full_node_name = str(relation)
|
|
1653
|
+
|
|
1654
|
+
existing_alias = alias_resources.get(full_node_name)
|
|
1655
|
+
if existing_alias is not None:
|
|
1656
|
+
raise AmbiguousAliasError(
|
|
1657
|
+
node_1=existing_alias, node_2=node, duped_name=full_node_name
|
|
1658
|
+
)
|
|
1659
|
+
|
|
1660
|
+
alias_resources[full_node_name] = node
|
|
1661
|
+
|
|
1662
|
+
for ver_unver_dict in name_resources.values():
|
|
1663
|
+
versioned_names = ver_unver_dict["ver"].keys()
|
|
1664
|
+
unversioned_names = ver_unver_dict["unver"].keys()
|
|
1665
|
+
intersection_versioned = set(versioned_names).intersection(set(unversioned_names))
|
|
1666
|
+
if intersection_versioned:
|
|
1667
|
+
for name in intersection_versioned:
|
|
1668
|
+
versioned_node = ver_unver_dict["ver"][name]
|
|
1669
|
+
unversioned_node = ver_unver_dict["unver"][name]
|
|
1670
|
+
raise dbt.exceptions.DuplicateVersionedUnversionedError(
|
|
1671
|
+
versioned_node, unversioned_node
|
|
1672
|
+
)
|
|
1673
|
+
|
|
1674
|
+
|
|
1675
|
+
def _warn_for_unused_resource_config_paths(manifest: Manifest, config: RuntimeConfig) -> None:
|
|
1676
|
+
resource_fqns: Mapping[str, PathSet] = manifest.get_resource_fqns()
|
|
1677
|
+
disabled_fqns: PathSet = frozenset(
|
|
1678
|
+
tuple(n.fqn) for n in list(chain.from_iterable(manifest.disabled.values()))
|
|
1679
|
+
)
|
|
1680
|
+
config.warn_for_unused_resource_config_paths(resource_fqns, disabled_fqns)
|
|
1681
|
+
|
|
1682
|
+
|
|
1683
|
+
def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None:
|
|
1684
|
+
_check_resource_uniqueness(manifest, config)
|
|
1685
|
+
_warn_for_unused_resource_config_paths(manifest, config)
|
|
1686
|
+
|
|
1687
|
+
|
|
1688
|
+
DocsContextCallback = Callable[[ResultNode], Dict[str, Any]]
|
|
1689
|
+
|
|
1690
|
+
|
|
1691
|
+
def _get_doc_blocks(description: str, manifest: Manifest, node_package: str) -> List[str]:
|
|
1692
|
+
ast = parse(description)
|
|
1693
|
+
doc_blocks: List[str] = []
|
|
1694
|
+
|
|
1695
|
+
if not hasattr(ast, "body"):
|
|
1696
|
+
return doc_blocks
|
|
1697
|
+
|
|
1698
|
+
for statement in ast.body:
|
|
1699
|
+
for node in statement.nodes:
|
|
1700
|
+
if (
|
|
1701
|
+
isinstance(node, Call)
|
|
1702
|
+
and hasattr(node, "node")
|
|
1703
|
+
and hasattr(node, "args")
|
|
1704
|
+
and hasattr(node.node, "name")
|
|
1705
|
+
and node.node.name == "doc"
|
|
1706
|
+
):
|
|
1707
|
+
doc_args = [arg.value for arg in node.args]
|
|
1708
|
+
|
|
1709
|
+
if len(doc_args) == 1:
|
|
1710
|
+
package, name = None, doc_args[0]
|
|
1711
|
+
elif len(doc_args) == 2:
|
|
1712
|
+
package, name = doc_args
|
|
1713
|
+
else:
|
|
1714
|
+
continue
|
|
1715
|
+
|
|
1716
|
+
if not manifest.metadata.project_name:
|
|
1717
|
+
continue
|
|
1718
|
+
|
|
1719
|
+
resolved_doc = manifest.resolve_doc(
|
|
1720
|
+
name, package, manifest.metadata.project_name, node_package
|
|
1721
|
+
)
|
|
1722
|
+
|
|
1723
|
+
if resolved_doc:
|
|
1724
|
+
doc_blocks.append(resolved_doc.unique_id)
|
|
1725
|
+
|
|
1726
|
+
return doc_blocks
|
|
1727
|
+
|
|
1728
|
+
|
|
1729
|
+
# node and column descriptions
|
|
1730
|
+
def _process_docs_for_node(
|
|
1731
|
+
context: Dict[str, Any],
|
|
1732
|
+
node: ManifestNode,
|
|
1733
|
+
manifest: Manifest,
|
|
1734
|
+
):
|
|
1735
|
+
node.doc_blocks = _get_doc_blocks(node.description, manifest, node.package_name)
|
|
1736
|
+
node.description = get_rendered(node.description, context)
|
|
1737
|
+
|
|
1738
|
+
for column_name, column in node.columns.items():
|
|
1739
|
+
column.doc_blocks = _get_doc_blocks(column.description, manifest, node.package_name)
|
|
1740
|
+
column.description = get_rendered(column.description, context)
|
|
1741
|
+
|
|
1742
|
+
|
|
1743
|
+
# source and table descriptions, column descriptions
|
|
1744
|
+
def _process_docs_for_source(
|
|
1745
|
+
context: Dict[str, Any],
|
|
1746
|
+
source: SourceDefinition,
|
|
1747
|
+
manifest: Manifest,
|
|
1748
|
+
):
|
|
1749
|
+
source.doc_blocks = _get_doc_blocks(source.description, manifest, source.package_name)
|
|
1750
|
+
source.description = get_rendered(source.description, context)
|
|
1751
|
+
|
|
1752
|
+
source.source_description = get_rendered(source.source_description, context)
|
|
1753
|
+
|
|
1754
|
+
for column in source.columns.values():
|
|
1755
|
+
column.doc_blocks = _get_doc_blocks(column.description, manifest, source.package_name)
|
|
1756
|
+
column.description = get_rendered(column.description, context)
|
|
1757
|
+
|
|
1758
|
+
|
|
1759
|
+
# macro argument descriptions
|
|
1760
|
+
def _process_docs_for_macro(context: Dict[str, Any], macro: Macro) -> None:
|
|
1761
|
+
macro.description = get_rendered(macro.description, context)
|
|
1762
|
+
for arg in macro.arguments:
|
|
1763
|
+
arg.description = get_rendered(arg.description, context)
|
|
1764
|
+
|
|
1765
|
+
|
|
1766
|
+
# exposure descriptions
|
|
1767
|
+
def _process_docs_for_exposure(context: Dict[str, Any], exposure: Exposure) -> None:
|
|
1768
|
+
exposure.description = get_rendered(exposure.description, context)
|
|
1769
|
+
|
|
1770
|
+
|
|
1771
|
+
def _process_docs_for_metrics(context: Dict[str, Any], metric: Metric) -> None:
|
|
1772
|
+
metric.description = get_rendered(metric.description, context)
|
|
1773
|
+
|
|
1774
|
+
|
|
1775
|
+
def _process_docs_for_semantic_model(
|
|
1776
|
+
context: Dict[str, Any], semantic_model: SemanticModel
|
|
1777
|
+
) -> None:
|
|
1778
|
+
if semantic_model.description:
|
|
1779
|
+
semantic_model.description = get_rendered(semantic_model.description, context)
|
|
1780
|
+
|
|
1781
|
+
for dimension in semantic_model.dimensions:
|
|
1782
|
+
if dimension.description:
|
|
1783
|
+
dimension.description = get_rendered(dimension.description, context)
|
|
1784
|
+
|
|
1785
|
+
for measure in semantic_model.measures:
|
|
1786
|
+
if measure.description:
|
|
1787
|
+
measure.description = get_rendered(measure.description, context)
|
|
1788
|
+
|
|
1789
|
+
for entity in semantic_model.entities:
|
|
1790
|
+
if entity.description:
|
|
1791
|
+
entity.description = get_rendered(entity.description, context)
|
|
1792
|
+
|
|
1793
|
+
|
|
1794
|
+
def _process_docs_for_saved_query(context: Dict[str, Any], saved_query: SavedQuery) -> None:
|
|
1795
|
+
if saved_query.description:
|
|
1796
|
+
saved_query.description = get_rendered(saved_query.description, context)
|
|
1797
|
+
|
|
1798
|
+
|
|
1799
|
+
def _process_refs(
|
|
1800
|
+
manifest: Manifest, current_project: str, node, dependencies: Optional[Mapping[str, Project]]
|
|
1801
|
+
) -> None:
|
|
1802
|
+
"""Given a manifest and node in that manifest, process its refs"""
|
|
1803
|
+
|
|
1804
|
+
dependencies = dependencies or {}
|
|
1805
|
+
|
|
1806
|
+
if isinstance(node, SeedNode):
|
|
1807
|
+
return
|
|
1808
|
+
|
|
1809
|
+
for ref in node.refs:
|
|
1810
|
+
target_model: Optional[Union[Disabled, ManifestNode]] = None
|
|
1811
|
+
target_model_name: str = ref.name
|
|
1812
|
+
target_model_package: Optional[str] = ref.package
|
|
1813
|
+
target_model_version: Optional[NodeVersion] = ref.version
|
|
1814
|
+
|
|
1815
|
+
if len(ref.positional_args) < 1 or len(ref.positional_args) > 2:
|
|
1816
|
+
raise dbt.exceptions.DbtInternalError(
|
|
1817
|
+
f"Refs should always be 1 or 2 arguments - got {len(ref.positional_args)}"
|
|
1818
|
+
)
|
|
1819
|
+
|
|
1820
|
+
target_model = manifest.resolve_ref(
|
|
1821
|
+
node,
|
|
1822
|
+
target_model_name,
|
|
1823
|
+
target_model_package,
|
|
1824
|
+
target_model_version,
|
|
1825
|
+
current_project,
|
|
1826
|
+
node.package_name,
|
|
1827
|
+
)
|
|
1828
|
+
|
|
1829
|
+
if target_model is None or isinstance(target_model, Disabled):
|
|
1830
|
+
# This may raise. Even if it doesn't, we don't want to add
|
|
1831
|
+
# this exposure to the graph b/c there is no destination exposure
|
|
1832
|
+
node.config.enabled = False
|
|
1833
|
+
invalid_target_fail_unless_test(
|
|
1834
|
+
node=node,
|
|
1835
|
+
target_name=target_model_name,
|
|
1836
|
+
target_kind="node",
|
|
1837
|
+
target_package=target_model_package,
|
|
1838
|
+
target_version=target_model_version,
|
|
1839
|
+
disabled=(isinstance(target_model, Disabled)),
|
|
1840
|
+
should_warn_if_disabled=False,
|
|
1841
|
+
)
|
|
1842
|
+
|
|
1843
|
+
continue
|
|
1844
|
+
elif manifest.is_invalid_private_ref(node, target_model, dependencies):
|
|
1845
|
+
raise dbt.exceptions.DbtReferenceError(
|
|
1846
|
+
unique_id=node.unique_id,
|
|
1847
|
+
ref_unique_id=target_model.unique_id,
|
|
1848
|
+
access=AccessType.Private,
|
|
1849
|
+
scope=dbt_common.utils.cast_to_str(target_model.group),
|
|
1850
|
+
)
|
|
1851
|
+
elif manifest.is_invalid_protected_ref(node, target_model, dependencies):
|
|
1852
|
+
raise dbt.exceptions.DbtReferenceError(
|
|
1853
|
+
unique_id=node.unique_id,
|
|
1854
|
+
ref_unique_id=target_model.unique_id,
|
|
1855
|
+
access=AccessType.Protected,
|
|
1856
|
+
scope=target_model.package_name,
|
|
1857
|
+
)
|
|
1858
|
+
|
|
1859
|
+
target_model_id = target_model.unique_id
|
|
1860
|
+
node.depends_on.add_node(target_model_id)
|
|
1861
|
+
|
|
1862
|
+
|
|
1863
|
+
def _process_metric_depends_on(
|
|
1864
|
+
manifest: Manifest,
|
|
1865
|
+
current_project: str,
|
|
1866
|
+
metric: Metric,
|
|
1867
|
+
) -> None:
|
|
1868
|
+
"""For a given metric, set the `depends_on` property"""
|
|
1869
|
+
|
|
1870
|
+
assert len(metric.type_params.input_measures) > 0
|
|
1871
|
+
for input_measure in metric.type_params.input_measures:
|
|
1872
|
+
target_semantic_model = manifest.resolve_semantic_model_for_measure(
|
|
1873
|
+
target_measure_name=input_measure.name,
|
|
1874
|
+
current_project=current_project,
|
|
1875
|
+
node_package=metric.package_name,
|
|
1876
|
+
)
|
|
1877
|
+
if target_semantic_model is None:
|
|
1878
|
+
raise dbt.exceptions.ParsingError(
|
|
1879
|
+
f"A semantic model having a measure `{input_measure.name}` does not exist but was referenced.",
|
|
1880
|
+
node=metric,
|
|
1881
|
+
)
|
|
1882
|
+
if target_semantic_model.config.enabled is False:
|
|
1883
|
+
raise dbt.exceptions.ParsingError(
|
|
1884
|
+
f"The measure `{input_measure.name}` is referenced on disabled semantic model `{target_semantic_model.name}`.",
|
|
1885
|
+
node=metric,
|
|
1886
|
+
)
|
|
1887
|
+
|
|
1888
|
+
metric.depends_on.add_node(target_semantic_model.unique_id)
|
|
1889
|
+
|
|
1890
|
+
|
|
1891
|
+
def _process_metric_node(
|
|
1892
|
+
manifest: Manifest,
|
|
1893
|
+
current_project: str,
|
|
1894
|
+
metric: Metric,
|
|
1895
|
+
) -> None:
|
|
1896
|
+
"""Sets a metric's `input_measures` and `depends_on` properties"""
|
|
1897
|
+
|
|
1898
|
+
# This ensures that if this metrics input_measures have already been set
|
|
1899
|
+
# we skip the work. This could happen either due to recursion or if multiple
|
|
1900
|
+
# metrics derive from another given metric.
|
|
1901
|
+
# NOTE: This does not protect against infinite loops
|
|
1902
|
+
if len(metric.type_params.input_measures) > 0:
|
|
1903
|
+
return
|
|
1904
|
+
|
|
1905
|
+
if metric.type is MetricType.SIMPLE or metric.type is MetricType.CUMULATIVE:
|
|
1906
|
+
assert (
|
|
1907
|
+
metric.type_params.measure is not None
|
|
1908
|
+
), f"{metric} should have a measure defined, but it does not."
|
|
1909
|
+
metric.add_input_measure(metric.type_params.measure)
|
|
1910
|
+
_process_metric_depends_on(
|
|
1911
|
+
manifest=manifest, current_project=current_project, metric=metric
|
|
1912
|
+
)
|
|
1913
|
+
elif metric.type is MetricType.CONVERSION:
|
|
1914
|
+
conversion_type_params = metric.type_params.conversion_type_params
|
|
1915
|
+
assert (
|
|
1916
|
+
conversion_type_params
|
|
1917
|
+
), f"{metric.name} is a conversion metric and must have conversion_type_params defined."
|
|
1918
|
+
metric.add_input_measure(conversion_type_params.base_measure)
|
|
1919
|
+
metric.add_input_measure(conversion_type_params.conversion_measure)
|
|
1920
|
+
_process_metric_depends_on(
|
|
1921
|
+
manifest=manifest, current_project=current_project, metric=metric
|
|
1922
|
+
)
|
|
1923
|
+
elif metric.type is MetricType.DERIVED or metric.type is MetricType.RATIO:
|
|
1924
|
+
input_metrics = metric.input_metrics
|
|
1925
|
+
if metric.type is MetricType.RATIO:
|
|
1926
|
+
if metric.type_params.numerator is None or metric.type_params.denominator is None:
|
|
1927
|
+
raise dbt.exceptions.ParsingError(
|
|
1928
|
+
"Invalid ratio metric. Both a numerator and denominator must be specified",
|
|
1929
|
+
node=metric,
|
|
1930
|
+
)
|
|
1931
|
+
input_metrics = [metric.type_params.numerator, metric.type_params.denominator]
|
|
1932
|
+
|
|
1933
|
+
for input_metric in input_metrics:
|
|
1934
|
+
target_metric = manifest.resolve_metric(
|
|
1935
|
+
target_metric_name=input_metric.name,
|
|
1936
|
+
target_metric_package=None,
|
|
1937
|
+
current_project=current_project,
|
|
1938
|
+
node_package=metric.package_name,
|
|
1939
|
+
)
|
|
1940
|
+
|
|
1941
|
+
if target_metric is None:
|
|
1942
|
+
raise dbt.exceptions.ParsingError(
|
|
1943
|
+
f"The metric `{input_metric.name}` does not exist but was referenced.",
|
|
1944
|
+
node=metric,
|
|
1945
|
+
)
|
|
1946
|
+
elif isinstance(target_metric, Disabled):
|
|
1947
|
+
raise dbt.exceptions.ParsingError(
|
|
1948
|
+
f"The metric `{input_metric.name}` is disabled and thus cannot be referenced.",
|
|
1949
|
+
node=metric,
|
|
1950
|
+
)
|
|
1951
|
+
|
|
1952
|
+
_process_metric_node(
|
|
1953
|
+
manifest=manifest, current_project=current_project, metric=target_metric
|
|
1954
|
+
)
|
|
1955
|
+
for input_measure in target_metric.type_params.input_measures:
|
|
1956
|
+
metric.add_input_measure(input_measure)
|
|
1957
|
+
metric.depends_on.add_node(target_metric.unique_id)
|
|
1958
|
+
else:
|
|
1959
|
+
assert_values_exhausted(metric.type)
|
|
1960
|
+
|
|
1961
|
+
|
|
1962
|
+
def _process_metrics_for_node(
|
|
1963
|
+
manifest: Manifest,
|
|
1964
|
+
current_project: str,
|
|
1965
|
+
node: Union[ManifestNode, Metric, Exposure, SavedQuery],
|
|
1966
|
+
):
|
|
1967
|
+
"""Given a manifest and a node in that manifest, process its metrics"""
|
|
1968
|
+
|
|
1969
|
+
metrics: List[List[str]]
|
|
1970
|
+
if isinstance(node, SeedNode):
|
|
1971
|
+
return
|
|
1972
|
+
elif isinstance(node, SavedQuery):
|
|
1973
|
+
metrics = [[metric] for metric in node.metrics]
|
|
1974
|
+
else:
|
|
1975
|
+
metrics = node.metrics
|
|
1976
|
+
|
|
1977
|
+
for metric in metrics:
|
|
1978
|
+
target_metric: Optional[Union[Disabled, Metric]] = None
|
|
1979
|
+
target_metric_name: str
|
|
1980
|
+
target_metric_package: Optional[str] = None
|
|
1981
|
+
|
|
1982
|
+
if len(metric) == 1:
|
|
1983
|
+
target_metric_name = metric[0]
|
|
1984
|
+
elif len(metric) == 2:
|
|
1985
|
+
target_metric_package, target_metric_name = metric
|
|
1986
|
+
else:
|
|
1987
|
+
raise dbt.exceptions.DbtInternalError(
|
|
1988
|
+
f"Metric references should always be 1 or 2 arguments - got {len(metric)}"
|
|
1989
|
+
)
|
|
1990
|
+
|
|
1991
|
+
target_metric = manifest.resolve_metric(
|
|
1992
|
+
target_metric_name,
|
|
1993
|
+
target_metric_package,
|
|
1994
|
+
current_project,
|
|
1995
|
+
node.package_name,
|
|
1996
|
+
)
|
|
1997
|
+
|
|
1998
|
+
if target_metric is None or isinstance(target_metric, Disabled):
|
|
1999
|
+
# This may raise. Even if it doesn't, we don't want to add
|
|
2000
|
+
# this node to the graph b/c there is no destination node
|
|
2001
|
+
node.config.enabled = False
|
|
2002
|
+
invalid_target_fail_unless_test(
|
|
2003
|
+
node=node,
|
|
2004
|
+
target_name=target_metric_name,
|
|
2005
|
+
target_kind="metric",
|
|
2006
|
+
target_package=target_metric_package,
|
|
2007
|
+
disabled=(isinstance(target_metric, Disabled)),
|
|
2008
|
+
)
|
|
2009
|
+
continue
|
|
2010
|
+
|
|
2011
|
+
target_metric_id = target_metric.unique_id
|
|
2012
|
+
|
|
2013
|
+
node.depends_on.add_node(target_metric_id)
|
|
2014
|
+
|
|
2015
|
+
|
|
2016
|
+
def remove_dependent_project_references(manifest, external_node_unique_id):
|
|
2017
|
+
for child_id in manifest.child_map[external_node_unique_id]:
|
|
2018
|
+
node = manifest.expect(child_id)
|
|
2019
|
+
# child node may have been modified and already recreated its depends_on.nodes list
|
|
2020
|
+
if external_node_unique_id in node.depends_on_nodes:
|
|
2021
|
+
node.depends_on_nodes.remove(external_node_unique_id)
|
|
2022
|
+
node.created_at = time.time()
|
|
2023
|
+
|
|
2024
|
+
|
|
2025
|
+
def _process_sources_for_exposure(manifest: Manifest, current_project: str, exposure: Exposure):
|
|
2026
|
+
target_source: Optional[Union[Disabled, SourceDefinition]] = None
|
|
2027
|
+
for source_name, table_name in exposure.sources:
|
|
2028
|
+
target_source = manifest.resolve_source(
|
|
2029
|
+
source_name,
|
|
2030
|
+
table_name,
|
|
2031
|
+
current_project,
|
|
2032
|
+
exposure.package_name,
|
|
2033
|
+
)
|
|
2034
|
+
if target_source is None or isinstance(target_source, Disabled):
|
|
2035
|
+
exposure.config.enabled = False
|
|
2036
|
+
invalid_target_fail_unless_test(
|
|
2037
|
+
node=exposure,
|
|
2038
|
+
target_name=f"{source_name}.{table_name}",
|
|
2039
|
+
target_kind="source",
|
|
2040
|
+
disabled=(isinstance(target_source, Disabled)),
|
|
2041
|
+
)
|
|
2042
|
+
continue
|
|
2043
|
+
target_source_id = target_source.unique_id
|
|
2044
|
+
exposure.depends_on.add_node(target_source_id)
|
|
2045
|
+
|
|
2046
|
+
|
|
2047
|
+
def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
|
|
2048
|
+
target_source: Optional[Union[Disabled, SourceDefinition]] = None
|
|
2049
|
+
for source_name, table_name in metric.sources:
|
|
2050
|
+
target_source = manifest.resolve_source(
|
|
2051
|
+
source_name,
|
|
2052
|
+
table_name,
|
|
2053
|
+
current_project,
|
|
2054
|
+
metric.package_name,
|
|
2055
|
+
)
|
|
2056
|
+
if target_source is None or isinstance(target_source, Disabled):
|
|
2057
|
+
metric.config.enabled = False
|
|
2058
|
+
invalid_target_fail_unless_test(
|
|
2059
|
+
node=metric,
|
|
2060
|
+
target_name=f"{source_name}.{table_name}",
|
|
2061
|
+
target_kind="source",
|
|
2062
|
+
disabled=(isinstance(target_source, Disabled)),
|
|
2063
|
+
)
|
|
2064
|
+
continue
|
|
2065
|
+
target_source_id = target_source.unique_id
|
|
2066
|
+
metric.depends_on.add_node(target_source_id)
|
|
2067
|
+
|
|
2068
|
+
|
|
2069
|
+
def _process_sources_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
|
|
2070
|
+
if isinstance(node, SeedNode):
|
|
2071
|
+
return
|
|
2072
|
+
|
|
2073
|
+
target_source: Optional[Union[Disabled, SourceDefinition]] = None
|
|
2074
|
+
for source_name, table_name in node.sources:
|
|
2075
|
+
target_source = manifest.resolve_source(
|
|
2076
|
+
source_name,
|
|
2077
|
+
table_name,
|
|
2078
|
+
current_project,
|
|
2079
|
+
node.package_name,
|
|
2080
|
+
)
|
|
2081
|
+
|
|
2082
|
+
if target_source is None or isinstance(target_source, Disabled):
|
|
2083
|
+
# this follows the same pattern as refs
|
|
2084
|
+
node.config.enabled = False
|
|
2085
|
+
invalid_target_fail_unless_test(
|
|
2086
|
+
node=node,
|
|
2087
|
+
target_name=f"{source_name}.{table_name}",
|
|
2088
|
+
target_kind="source",
|
|
2089
|
+
disabled=(isinstance(target_source, Disabled)),
|
|
2090
|
+
)
|
|
2091
|
+
continue
|
|
2092
|
+
target_source_id = target_source.unique_id
|
|
2093
|
+
node.depends_on.add_node(target_source_id)
|
|
2094
|
+
|
|
2095
|
+
|
|
2096
|
+
def _process_functions_for_node(
|
|
2097
|
+
manifest: Manifest, current_project: str, node: ManifestNode
|
|
2098
|
+
) -> None:
|
|
2099
|
+
"""Given a manifest and node in that manifest, process its functions"""
|
|
2100
|
+
|
|
2101
|
+
if isinstance(node, SeedNode):
|
|
2102
|
+
return
|
|
2103
|
+
|
|
2104
|
+
for function_args in node.functions:
|
|
2105
|
+
target_function_name: str
|
|
2106
|
+
target_function_package: Optional[str] = None
|
|
2107
|
+
if len(function_args) == 1:
|
|
2108
|
+
target_function_name = function_args[0]
|
|
2109
|
+
elif len(function_args) == 2:
|
|
2110
|
+
target_function_package, target_function_name = function_args
|
|
2111
|
+
else:
|
|
2112
|
+
raise dbt.exceptions.DbtInternalError(
|
|
2113
|
+
f"Functions should always be 1 or 2 arguments - got {len(function_args)}"
|
|
2114
|
+
)
|
|
2115
|
+
|
|
2116
|
+
target_function = manifest.resolve_function(
|
|
2117
|
+
target_function_name,
|
|
2118
|
+
target_function_package,
|
|
2119
|
+
current_project,
|
|
2120
|
+
node.package_name,
|
|
2121
|
+
)
|
|
2122
|
+
|
|
2123
|
+
if target_function is None or isinstance(target_function, Disabled):
|
|
2124
|
+
node.config.enabled = False
|
|
2125
|
+
invalid_target_fail_unless_test(
|
|
2126
|
+
node=node,
|
|
2127
|
+
target_name=target_function_name,
|
|
2128
|
+
target_kind="function",
|
|
2129
|
+
target_package=target_function_package,
|
|
2130
|
+
disabled=(isinstance(target_function, Disabled)),
|
|
2131
|
+
should_warn_if_disabled=False,
|
|
2132
|
+
)
|
|
2133
|
+
|
|
2134
|
+
continue
|
|
2135
|
+
|
|
2136
|
+
node.depends_on.add_node(target_function.unique_id)
|
|
2137
|
+
|
|
2138
|
+
|
|
2139
|
+
# This is called in task.rpc.sql_commands when a "dynamic" node is
|
|
2140
|
+
# created in the manifest, in 'add_refs'
|
|
2141
|
+
def process_macro(config: RuntimeConfig, manifest: Manifest, macro: Macro) -> None:
|
|
2142
|
+
ctx = generate_runtime_docs_context(
|
|
2143
|
+
config,
|
|
2144
|
+
macro,
|
|
2145
|
+
manifest,
|
|
2146
|
+
config.project_name,
|
|
2147
|
+
)
|
|
2148
|
+
_process_docs_for_macro(ctx, macro)
|
|
2149
|
+
|
|
2150
|
+
|
|
2151
|
+
# This is called in task.rpc.sql_commands when a "dynamic" node is
|
|
2152
|
+
# created in the manifest, in 'add_refs'
|
|
2153
|
+
def process_node(config: RuntimeConfig, manifest: Manifest, node: ManifestNode):
|
|
2154
|
+
_process_sources_for_node(manifest, config.project_name, node)
|
|
2155
|
+
_process_refs(manifest, config.project_name, node, config.dependencies)
|
|
2156
|
+
ctx = generate_runtime_docs_context(config, node, manifest, config.project_name)
|
|
2157
|
+
_process_docs_for_node(ctx, node, manifest)
|
|
2158
|
+
|
|
2159
|
+
|
|
2160
|
+
def write_semantic_manifest(manifest: Manifest, target_path: str) -> None:
|
|
2161
|
+
path = os.path.join(target_path, SEMANTIC_MANIFEST_FILE_NAME)
|
|
2162
|
+
semantic_manifest = SemanticManifest(manifest)
|
|
2163
|
+
semantic_manifest.write_json_to_file(path)
|
|
2164
|
+
|
|
2165
|
+
|
|
2166
|
+
def write_manifest(manifest: Manifest, target_path: str, which: Optional[str] = None):
|
|
2167
|
+
file_name = MANIFEST_FILE_NAME
|
|
2168
|
+
path = os.path.join(target_path, file_name)
|
|
2169
|
+
manifest.write(path)
|
|
2170
|
+
add_artifact_produced(path)
|
|
2171
|
+
|
|
2172
|
+
write_semantic_manifest(manifest=manifest, target_path=target_path)
|
|
2173
|
+
|
|
2174
|
+
|
|
2175
|
+
def parse_manifest(
|
|
2176
|
+
runtime_config: RuntimeConfig,
|
|
2177
|
+
write_perf_info: bool,
|
|
2178
|
+
write: bool,
|
|
2179
|
+
write_json: bool,
|
|
2180
|
+
active_integrations: List[Optional[CatalogWriteIntegrationConfig]],
|
|
2181
|
+
) -> Manifest:
|
|
2182
|
+
register_adapter(runtime_config, get_mp_context())
|
|
2183
|
+
adapter = get_adapter(runtime_config)
|
|
2184
|
+
adapter.set_macro_context_generator(generate_runtime_macro_context)
|
|
2185
|
+
for integration in active_integrations:
|
|
2186
|
+
adapter.add_catalog_integration(integration)
|
|
2187
|
+
manifest = ManifestLoader.get_full_manifest(
|
|
2188
|
+
runtime_config,
|
|
2189
|
+
write_perf_info=write_perf_info,
|
|
2190
|
+
)
|
|
2191
|
+
|
|
2192
|
+
# If we should (over)write the manifest in the target path, do that now
|
|
2193
|
+
if write and write_json:
|
|
2194
|
+
write_manifest(manifest, runtime_config.project_target_path)
|
|
2195
|
+
pm = plugins.get_plugin_manager(runtime_config.project_name)
|
|
2196
|
+
plugin_artifacts = pm.get_manifest_artifacts(manifest)
|
|
2197
|
+
for path, plugin_artifact in plugin_artifacts.items():
|
|
2198
|
+
plugin_artifact.write(path)
|
|
2199
|
+
fire_event(
|
|
2200
|
+
ArtifactWritten(
|
|
2201
|
+
artifact_type=plugin_artifact.__class__.__name__, artifact_path=path
|
|
2202
|
+
)
|
|
2203
|
+
)
|
|
2204
|
+
return manifest
|