dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/parser/manifest.py ADDED
@@ -0,0 +1,2204 @@
1
+ import json
2
+ import os
3
+ import pprint
4
+ import time
5
+ import traceback
6
+ from copy import deepcopy
7
+ from dataclasses import dataclass, field
8
+ from datetime import date, datetime, timezone
9
+ from itertools import chain
10
+ from typing import Any, Callable, Dict, List, Mapping, Optional, Set, Tuple, Type, Union
11
+
12
+ import dvt.deprecations
13
+ import dvt.exceptions
14
+ import dvt.tracking
15
+ import dvt.utils
16
+ import msgpack
17
+ from dvt import plugins
18
+ from dvt.artifacts.resources import (
19
+ CatalogWriteIntegrationConfig,
20
+ FileHash,
21
+ NodeRelation,
22
+ NodeVersion,
23
+ )
24
+ from dvt.artifacts.resources.types import BatchSize
25
+ from dvt.artifacts.schemas.base import Writable
26
+ from dvt.clients.jinja import MacroStack, get_rendered
27
+ from dvt.clients.jinja_static import statically_extract_macro_calls
28
+ from dvt.config import Project, RuntimeConfig
29
+ from dvt.constants import (
30
+ MANIFEST_FILE_NAME,
31
+ PARTIAL_PARSE_FILE_NAME,
32
+ SEMANTIC_MANIFEST_FILE_NAME,
33
+ )
34
+ from dvt.context.configured import generate_macro_context
35
+ from dvt.context.docs import generate_runtime_docs_context
36
+ from dvt.context.macro_resolver import MacroResolver, TestMacroNamespace
37
+ from dvt.context.providers import ParseProvider, generate_runtime_macro_context
38
+ from dvt.context.query_header import generate_query_header_context
39
+ from dvt.contracts.files import ParseFileType, SchemaSourceFile
40
+ from dvt.contracts.graph.manifest import (
41
+ Disabled,
42
+ MacroManifest,
43
+ Manifest,
44
+ ManifestStateCheck,
45
+ ParsingInfo,
46
+ )
47
+ from dvt.contracts.graph.nodes import (
48
+ Exposure,
49
+ GenericTestNode,
50
+ Macro,
51
+ ManifestNode,
52
+ Metric,
53
+ ModelNode,
54
+ ResultNode,
55
+ SavedQuery,
56
+ SeedNode,
57
+ SemanticManifestNode,
58
+ SemanticModel,
59
+ SourceDefinition,
60
+ )
61
+ from dvt.contracts.graph.semantic_manifest import SemanticManifest
62
+ from dvt.events.types import (
63
+ ArtifactWritten,
64
+ DeprecatedModel,
65
+ DeprecatedReference,
66
+ InvalidConcurrentBatchesConfig,
67
+ InvalidDisabledTargetInTestNode,
68
+ MicrobatchModelNoEventTimeInputs,
69
+ NodeNotFoundOrDisabled,
70
+ ParsedFileLoadFailed,
71
+ ParsePerfInfoPath,
72
+ PartialParsingError,
73
+ PartialParsingErrorProcessingFile,
74
+ PartialParsingNotEnabled,
75
+ PartialParsingSkipParsing,
76
+ SpacesInResourceNameDeprecation,
77
+ StateCheckVarsHash,
78
+ UnableToPartialParse,
79
+ UpcomingReferenceDeprecation,
80
+ )
81
+ from dvt.exceptions import (
82
+ AmbiguousAliasError,
83
+ InvalidAccessTypeError,
84
+ TargetNotFoundError,
85
+ scrub_secrets,
86
+ )
87
+ from dvt.flags import get_flags
88
+ from dvt.mp_context import get_mp_context
89
+ from dvt.node_types import AccessType, NodeType
90
+ from dvt.parser.analysis import AnalysisParser
91
+ from dvt.parser.base import Parser
92
+ from dvt.parser.docs import DocumentationParser
93
+ from dvt.parser.fixtures import FixtureParser
94
+ from dvt.parser.functions import FunctionParser
95
+ from dvt.parser.generic_test import GenericTestParser
96
+ from dvt.parser.hooks import HookParser
97
+ from dvt.parser.macros import MacroParser
98
+ from dvt.parser.models import ModelParser
99
+ from dvt.parser.partial import PartialParsing, special_override_macros
100
+ from dvt.parser.read_files import (
101
+ FileDiff,
102
+ ReadFiles,
103
+ ReadFilesFromDiff,
104
+ ReadFilesFromFileSystem,
105
+ load_source_file,
106
+ )
107
+ from dvt.parser.schemas import SchemaParser
108
+ from dvt.parser.search import FileBlock
109
+ from dvt.parser.seeds import SeedParser
110
+ from dvt.parser.singular_test import SingularTestParser
111
+ from dvt.parser.snapshots import SnapshotParser
112
+ from dvt.parser.sources import SourcePatcher
113
+ from dvt.parser.unit_tests import process_models_for_unit_test
114
+ from dvt.utils.artifact_upload import add_artifact_produced
115
+ from dvt.version import __version__
116
+ from jinja2.nodes import Call
117
+
118
+ import dbt_common.utils
119
+ from dbt.adapters.capability import Capability
120
+ from dbt.adapters.factory import (
121
+ get_adapter,
122
+ get_adapter_package_names,
123
+ get_relation_class_by_name,
124
+ register_adapter,
125
+ )
126
+ from dbt_common.clients.jinja import parse
127
+ from dbt_common.clients.system import make_directory, path_exists, read_json, write_file
128
+ from dbt_common.constants import SECRET_ENV_PREFIX
129
+ from dbt_common.dataclass_schema import StrEnum, dbtClassMixin
130
+ from dbt_common.events.base_types import EventLevel
131
+ from dbt_common.events.functions import fire_event, get_invocation_id, warn_or_error
132
+ from dbt_common.events.types import Note
133
+ from dbt_common.exceptions.base import DbtValidationError
134
+ from dbt_common.helper_types import PathSet
135
+ from dbt_semantic_interfaces.enum_extension import assert_values_exhausted
136
+ from dbt_semantic_interfaces.type_enums import MetricType
137
+
138
+ PERF_INFO_FILE_NAME = "perf_info.json"
139
+
140
+
141
+ def extended_mashumaro_encoder(data):
142
+ return msgpack.packb(data, default=extended_msgpack_encoder, use_bin_type=True)
143
+
144
+
145
+ def extended_msgpack_encoder(obj):
146
+ if type(obj) is date:
147
+ date_bytes = msgpack.ExtType(1, obj.isoformat().encode())
148
+ return date_bytes
149
+ elif type(obj) is datetime:
150
+ datetime_bytes = msgpack.ExtType(2, obj.isoformat().encode())
151
+ return datetime_bytes
152
+
153
+ return obj
154
+
155
+
156
+ def extended_mashumuro_decoder(data):
157
+ return msgpack.unpackb(data, ext_hook=extended_msgpack_decoder, raw=False)
158
+
159
+
160
+ def extended_msgpack_decoder(code, data):
161
+ if code == 1:
162
+ d = date.fromisoformat(data.decode())
163
+ return d
164
+ elif code == 2:
165
+ dt = datetime.fromisoformat(data.decode())
166
+ return dt
167
+ else:
168
+ return msgpack.ExtType(code, data)
169
+
170
+
171
+ def version_to_str(version: Optional[Union[str, int]]) -> str:
172
+ if isinstance(version, int):
173
+ return str(version)
174
+ elif isinstance(version, str):
175
+ return version
176
+
177
+ return ""
178
+
179
+
180
+ class ReparseReason(StrEnum):
181
+ version_mismatch = "01_version_mismatch"
182
+ file_not_found = "02_file_not_found"
183
+ vars_changed = "03_vars_changed"
184
+ profile_changed = "04_profile_changed"
185
+ deps_changed = "05_deps_changed"
186
+ project_config_changed = "06_project_config_changed"
187
+ load_file_failure = "07_load_file_failure"
188
+ exception = "08_exception"
189
+ proj_env_vars_changed = "09_project_env_vars_changed"
190
+ prof_env_vars_changed = "10_profile_env_vars_changed"
191
+
192
+
193
+ # Part of saved performance info
194
+ @dataclass
195
+ class ParserInfo(dbtClassMixin):
196
+ parser: str
197
+ elapsed: float
198
+ parsed_path_count: int = 0
199
+
200
+
201
+ # Part of saved performance info
202
+ @dataclass
203
+ class ProjectLoaderInfo(dbtClassMixin):
204
+ project_name: str
205
+ elapsed: float
206
+ parsers: List[ParserInfo] = field(default_factory=list)
207
+ parsed_path_count: int = 0
208
+
209
+
210
+ # Part of saved performance info
211
+ @dataclass
212
+ class ManifestLoaderInfo(dbtClassMixin, Writable):
213
+ path_count: int = 0
214
+ parsed_path_count: int = 0
215
+ static_analysis_path_count: int = 0
216
+ static_analysis_parsed_path_count: int = 0
217
+ is_partial_parse_enabled: Optional[bool] = None
218
+ is_static_analysis_enabled: Optional[bool] = None
219
+ read_files_elapsed: Optional[float] = None
220
+ load_macros_elapsed: Optional[float] = None
221
+ parse_project_elapsed: Optional[float] = None
222
+ patch_sources_elapsed: Optional[float] = None
223
+ process_manifest_elapsed: Optional[float] = None
224
+ load_all_elapsed: Optional[float] = None
225
+ projects: List[ProjectLoaderInfo] = field(default_factory=list)
226
+ _project_index: Dict[str, ProjectLoaderInfo] = field(default_factory=dict)
227
+
228
+ def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
229
+ del dct["_project_index"]
230
+ return dct
231
+
232
+
233
+ # The ManifestLoader loads the manifest. The standard way to use the
234
+ # ManifestLoader is using the 'get_full_manifest' class method, but
235
+ # many tests use abbreviated processes.
236
+ class ManifestLoader:
237
+ def __init__(
238
+ self,
239
+ root_project: RuntimeConfig,
240
+ all_projects: Mapping[str, RuntimeConfig],
241
+ macro_hook: Optional[Callable[[Manifest], Any]] = None,
242
+ file_diff: Optional[FileDiff] = None,
243
+ ) -> None:
244
+ self.root_project: RuntimeConfig = root_project
245
+ self.all_projects: Mapping[str, RuntimeConfig] = all_projects
246
+ self.file_diff = file_diff
247
+ self.manifest: Manifest = Manifest()
248
+ self.new_manifest = self.manifest
249
+ self.manifest.metadata = root_project.get_metadata()
250
+ self.macro_resolver = None # built after macros are loaded
251
+ self.started_at = time.time()
252
+ # This is a MacroQueryStringSetter callable, which is called
253
+ # later after we set the MacroManifest in the adapter. It sets
254
+ # up the query headers.
255
+ self.macro_hook: Callable[[Manifest], Any]
256
+ if macro_hook is None:
257
+ self.macro_hook = lambda m: None
258
+ else:
259
+ self.macro_hook = macro_hook
260
+
261
+ self._perf_info = self.build_perf_info()
262
+
263
+ # State check determines whether the saved_manifest and the current
264
+ # manifest match well enough to do partial parsing
265
+ self.manifest.state_check = self.build_manifest_state_check()
266
+ # We need to know if we're actually partially parsing. It could
267
+ # have been enabled, but not happening because of some issue.
268
+ self.partially_parsing = False
269
+ self.partial_parser: Optional[PartialParsing] = None
270
+ self.skip_parsing = False
271
+
272
+ # This is a saved manifest from a previous run that's used for partial parsing
273
+ self.saved_manifest: Optional[Manifest] = self.read_manifest_for_partial_parse()
274
+
275
+ # This is the method that builds a complete manifest. We sometimes
276
+ # use an abbreviated process in tests.
277
+ @classmethod
278
+ def get_full_manifest(
279
+ cls,
280
+ config: RuntimeConfig,
281
+ *,
282
+ file_diff: Optional[FileDiff] = None,
283
+ reset: bool = False,
284
+ write_perf_info=False,
285
+ ) -> Manifest:
286
+ adapter = get_adapter(config) # type: ignore
287
+ # reset is set in a TaskManager load_manifest call, since
288
+ # the config and adapter may be persistent.
289
+ if reset:
290
+ config.clear_dependencies()
291
+ adapter.clear_macro_resolver()
292
+ macro_hook = adapter.connections.set_query_header
293
+
294
+ flags = get_flags()
295
+ if not flags.PARTIAL_PARSE_FILE_DIFF:
296
+ file_diff = FileDiff.from_dict(
297
+ {
298
+ "deleted": [],
299
+ "changed": [],
300
+ "added": [],
301
+ }
302
+ )
303
+ # Hack to test file_diffs
304
+ elif os.environ.get("DBT_PP_FILE_DIFF_TEST"):
305
+ file_diff_path = "file_diff.json"
306
+ if path_exists(file_diff_path):
307
+ file_diff_dct = read_json(file_diff_path)
308
+ file_diff = FileDiff.from_dict(file_diff_dct)
309
+
310
+ # Start performance counting
311
+ start_load_all = time.perf_counter()
312
+
313
+ projects = config.load_dependencies()
314
+ loader = cls(
315
+ config,
316
+ projects,
317
+ macro_hook=macro_hook,
318
+ file_diff=file_diff,
319
+ )
320
+
321
+ manifest = loader.load()
322
+
323
+ _check_manifest(manifest, config)
324
+ manifest.build_flat_graph()
325
+
326
+ # This needs to happen after loading from a partial parse,
327
+ # so that the adapter has the query headers from the macro_hook.
328
+ loader.save_macros_to_adapter(adapter)
329
+
330
+ # Save performance info
331
+ loader._perf_info.load_all_elapsed = time.perf_counter() - start_load_all
332
+ loader.track_project_load()
333
+
334
+ if write_perf_info:
335
+ loader.write_perf_info(config.project_target_path)
336
+
337
+ return manifest
338
+
339
+ # This is where the main action happens
340
+ def load(self) -> Manifest:
341
+ start_read_files = time.perf_counter()
342
+
343
+ # This updates the "files" dictionary in self.manifest, and creates
344
+ # the partial_parser_files dictionary (see read_files.py),
345
+ # which is a dictionary of projects to a dictionary
346
+ # of parsers to lists of file strings. The file strings are
347
+ # used to get the SourceFiles from the manifest files.
348
+ saved_files = self.saved_manifest.files if self.saved_manifest else {}
349
+ file_reader: Optional[ReadFiles] = None
350
+ if self.file_diff:
351
+ # We're getting files from a file diff
352
+ file_reader = ReadFilesFromDiff(
353
+ all_projects=self.all_projects,
354
+ files=self.manifest.files,
355
+ saved_files=saved_files,
356
+ root_project_name=self.root_project.project_name,
357
+ file_diff=self.file_diff,
358
+ )
359
+ else:
360
+ # We're getting files from the file system
361
+ file_reader = ReadFilesFromFileSystem(
362
+ all_projects=self.all_projects,
363
+ files=self.manifest.files,
364
+ saved_files=saved_files,
365
+ )
366
+
367
+ # Set the files in the manifest and save the project_parser_files
368
+ file_reader.read_files()
369
+ self.manifest.files = file_reader.files
370
+ project_parser_files = orig_project_parser_files = file_reader.project_parser_files
371
+ self._perf_info.path_count = len(self.manifest.files)
372
+ self._perf_info.read_files_elapsed = time.perf_counter() - start_read_files
373
+
374
+ self.skip_parsing = False
375
+ project_parser_files = self.safe_update_project_parser_files_partially(
376
+ project_parser_files
377
+ )
378
+
379
+ if self.manifest._parsing_info is None:
380
+ self.manifest._parsing_info = ParsingInfo()
381
+
382
+ if self.skip_parsing:
383
+ fire_event(PartialParsingSkipParsing())
384
+ else:
385
+ # Load Macros and tests
386
+ # We need to parse the macros first, so they're resolvable when
387
+ # the other files are loaded. Also need to parse tests, specifically
388
+ # generic tests
389
+ start_load_macros = time.perf_counter()
390
+ self.load_and_parse_macros(project_parser_files)
391
+
392
+ # If we're partially parsing check that certain macros have not been changed
393
+ if self.partially_parsing and self.skip_partial_parsing_because_of_macros():
394
+ fire_event(
395
+ UnableToPartialParse(
396
+ reason="change detected to override macro. Starting full parse."
397
+ )
398
+ )
399
+
400
+ # Get new Manifest with original file records and move over the macros
401
+ self.manifest = self.new_manifest # contains newly read files
402
+ project_parser_files = orig_project_parser_files
403
+ self.partially_parsing = False
404
+ self.load_and_parse_macros(project_parser_files)
405
+
406
+ self._perf_info.load_macros_elapsed = time.perf_counter() - start_load_macros
407
+
408
+ # Now that the macros are parsed, parse the rest of the files.
409
+ # This is currently done on a per project basis.
410
+ start_parse_projects = time.perf_counter()
411
+
412
+ # Load the rest of the files except for schema yaml files
413
+ parser_types: List[Type[Parser]] = [
414
+ ModelParser,
415
+ SnapshotParser,
416
+ AnalysisParser,
417
+ SingularTestParser,
418
+ SeedParser,
419
+ DocumentationParser,
420
+ HookParser,
421
+ FixtureParser,
422
+ FunctionParser,
423
+ ]
424
+ for project in self.all_projects.values():
425
+ if project.project_name not in project_parser_files:
426
+ continue
427
+ self.parse_project(
428
+ project, project_parser_files[project.project_name], parser_types
429
+ )
430
+
431
+ # Now that we've loaded most of the nodes (except for schema tests, sources, metrics)
432
+ # load up the Lookup objects to resolve them by name, so the SourceFiles store
433
+ # the unique_id instead of the name. Sources are loaded from yaml files, so
434
+ # aren't in place yet
435
+ self.manifest.rebuild_ref_lookup()
436
+ self.manifest.rebuild_doc_lookup()
437
+ self.manifest.rebuild_disabled_lookup()
438
+
439
+ # Load yaml files
440
+ parser_types = [SchemaParser] # type: ignore
441
+ for project in self.all_projects.values():
442
+ if project.project_name not in project_parser_files:
443
+ continue
444
+ self.parse_project(
445
+ project, project_parser_files[project.project_name], parser_types
446
+ )
447
+
448
+ self.cleanup_disabled()
449
+
450
+ self._perf_info.parse_project_elapsed = time.perf_counter() - start_parse_projects
451
+
452
+ # patch_sources converts the UnparsedSourceDefinitions in the
453
+ # Manifest.sources to SourceDefinition via 'patch_source'
454
+ # in SourcePatcher
455
+ start_patch = time.perf_counter()
456
+ patcher = SourcePatcher(self.root_project, self.manifest)
457
+ patcher.construct_sources()
458
+ self.manifest.sources = patcher.sources
459
+ self._perf_info.patch_sources_elapsed = time.perf_counter() - start_patch
460
+
461
+ # We need to rebuild disabled in order to include disabled sources
462
+ self.manifest.rebuild_disabled_lookup()
463
+
464
+ # copy the selectors from the root_project to the manifest
465
+ self.manifest.selectors = self.root_project.manifest_selectors
466
+
467
+ # inject any available external nodes
468
+ self.manifest.build_parent_and_child_maps()
469
+ external_nodes_modified = self.inject_external_nodes()
470
+ if external_nodes_modified:
471
+ self.manifest.rebuild_ref_lookup()
472
+
473
+ # update the refs, sources, docs and metrics depends_on.nodes
474
+ # These check the created_at time on the nodes to
475
+ # determine whether they need processing.
476
+ start_process = time.perf_counter()
477
+ self.process_sources(self.root_project.project_name)
478
+ self.process_refs(self.root_project.project_name, self.root_project.dependencies)
479
+ self.process_unit_tests(self.root_project.project_name)
480
+ self.process_docs(self.root_project)
481
+ self.process_metrics(self.root_project)
482
+ self.process_saved_queries(self.root_project)
483
+ self.process_model_inferred_primary_keys()
484
+ self.process_functions(self.root_project.project_name)
485
+ self.check_valid_group_config()
486
+ self.check_valid_access_property()
487
+ self.check_valid_snapshot_config()
488
+ self.check_valid_microbatch_config()
489
+
490
+ semantic_manifest = SemanticManifest(self.manifest)
491
+ if not semantic_manifest.validate():
492
+ raise dbt.exceptions.ParsingError("Semantic Manifest validation failed.")
493
+
494
+ # update tracking data
495
+ self._perf_info.process_manifest_elapsed = time.perf_counter() - start_process
496
+ self._perf_info.static_analysis_parsed_path_count = (
497
+ self.manifest._parsing_info.static_analysis_parsed_path_count
498
+ )
499
+ self._perf_info.static_analysis_path_count = (
500
+ self.manifest._parsing_info.static_analysis_path_count
501
+ )
502
+
503
+ # Inject any available external nodes, reprocess refs if changes to the manifest were made.
504
+ external_nodes_modified = False
505
+ if self.skip_parsing:
506
+ # If we didn't skip parsing, this will have already run because it must run
507
+ # before process_refs. If we did skip parsing, then it's possible that only
508
+ # external nodes have changed and we need to run this to capture that.
509
+ self.manifest.build_parent_and_child_maps()
510
+ external_nodes_modified = self.inject_external_nodes()
511
+ if external_nodes_modified:
512
+ self.manifest.rebuild_ref_lookup()
513
+ self.process_refs(
514
+ self.root_project.project_name,
515
+ self.root_project.dependencies,
516
+ )
517
+ # parent and child maps will be rebuilt by write_manifest
518
+
519
+ if not self.skip_parsing or external_nodes_modified:
520
+ # write out the fully parsed manifest
521
+ self.write_manifest_for_partial_parse()
522
+
523
+ self.check_for_model_deprecations()
524
+ self.check_for_spaces_in_resource_names()
525
+ self.check_for_microbatch_deprecations()
526
+ self.check_forcing_batch_concurrency()
527
+ self.check_microbatch_model_has_a_filtered_input()
528
+
529
+ return self.manifest
530
+
531
+ def safe_update_project_parser_files_partially(self, project_parser_files: Dict) -> Dict:
532
+ if self.saved_manifest is None:
533
+ return project_parser_files
534
+
535
+ self.partial_parser = PartialParsing(self.saved_manifest, self.manifest.files) # type: ignore[arg-type]
536
+ self.skip_parsing = self.partial_parser.skip_parsing()
537
+ if self.skip_parsing:
538
+ # nothing changed, so we don't need to generate project_parser_files
539
+ self.manifest = self.saved_manifest # type: ignore[assignment]
540
+ else:
541
+ # create child_map and parent_map
542
+ self.saved_manifest.build_parent_and_child_maps() # type: ignore[union-attr]
543
+ # create group_map
544
+ self.saved_manifest.build_group_map() # type: ignore[union-attr]
545
+ # files are different, we need to create a new set of
546
+ # project_parser_files.
547
+ try:
548
+ project_parser_files = self.partial_parser.get_parsing_files()
549
+ self.partially_parsing = True
550
+ self.manifest = self.saved_manifest # type: ignore[assignment]
551
+ except Exception as exc:
552
+ # pp_files should still be the full set and manifest is new manifest,
553
+ # since get_parsing_files failed
554
+ fire_event(
555
+ UnableToPartialParse(reason="an error occurred. Switching to full reparse.")
556
+ )
557
+
558
+ # Get traceback info
559
+ tb_info = traceback.format_exc()
560
+ # index last stack frame in traceback (i.e. lastest exception and its context)
561
+ tb_last_frame = traceback.extract_tb(exc.__traceback__)[-1]
562
+ exc_info = {
563
+ "traceback": tb_info,
564
+ "exception": tb_info.splitlines()[-1],
565
+ "code": tb_last_frame.line, # if the source is not available, it is None
566
+ "location": f"line {tb_last_frame.lineno} in {tb_last_frame.name}",
567
+ }
568
+
569
+ # get file info for local logs
570
+ parse_file_type: str = ""
571
+ file_id = self.partial_parser.processing_file
572
+ if file_id:
573
+ source_file = None
574
+ if file_id in self.saved_manifest.files:
575
+ source_file = self.saved_manifest.files[file_id]
576
+ elif file_id in self.manifest.files:
577
+ source_file = self.manifest.files[file_id]
578
+ if source_file:
579
+ parse_file_type = source_file.parse_file_type
580
+ fire_event(PartialParsingErrorProcessingFile(file=file_id))
581
+ exc_info["parse_file_type"] = parse_file_type
582
+ fire_event(PartialParsingError(exc_info=exc_info))
583
+ # Send event
584
+ if dbt.tracking.active_user is not None:
585
+ exc_info["full_reparse_reason"] = ReparseReason.exception
586
+ dbt.tracking.track_partial_parser(exc_info)
587
+
588
+ if os.environ.get("DBT_PP_TEST"):
589
+ raise exc
590
+
591
+ return project_parser_files
592
+
593
+ def check_for_model_deprecations(self):
594
+ # build parent and child_maps
595
+ self.manifest.build_parent_and_child_maps()
596
+ for node in self.manifest.nodes.values():
597
+ if isinstance(node, ModelNode) and node.deprecation_date:
598
+ if node.is_past_deprecation_date:
599
+ warn_or_error(
600
+ DeprecatedModel(
601
+ model_name=node.name,
602
+ model_version=version_to_str(node.version),
603
+ deprecation_date=node.deprecation_date.isoformat(),
604
+ )
605
+ )
606
+ # At this point _process_refs should already have been called, and
607
+ # we just rebuilt the parent and child maps.
608
+ # Get the child_nodes and check for deprecations.
609
+ child_nodes = self.manifest.child_map[node.unique_id]
610
+ for child_unique_id in child_nodes:
611
+ child_node = self.manifest.nodes.get(child_unique_id)
612
+ if not isinstance(child_node, ModelNode):
613
+ continue
614
+ if node.is_past_deprecation_date:
615
+ event_cls = DeprecatedReference
616
+ else:
617
+ event_cls = UpcomingReferenceDeprecation
618
+
619
+ warn_or_error(
620
+ event_cls(
621
+ model_name=child_node.name,
622
+ ref_model_package=node.package_name,
623
+ ref_model_name=node.name,
624
+ ref_model_version=version_to_str(node.version),
625
+ ref_model_latest_version=str(node.latest_version),
626
+ ref_model_deprecation_date=node.deprecation_date.isoformat(),
627
+ )
628
+ )
629
+
630
+ def check_for_spaces_in_resource_names(self):
631
+ """Validates that resource names do not contain spaces
632
+
633
+ If `DEBUG` flag is `False`, logs only first bad model name
634
+ If `DEBUG` flag is `True`, logs every bad model name
635
+ If `REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES` is `True`, logs are `ERROR` level and an exception is raised if any names are bad
636
+ If `REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES` is `False`, logs are `WARN` level
637
+ """
638
+ improper_resource_names = 0
639
+ level = (
640
+ EventLevel.ERROR
641
+ if self.root_project.args.REQUIRE_RESOURCE_NAMES_WITHOUT_SPACES
642
+ else EventLevel.WARN
643
+ )
644
+
645
+ flags = get_flags()
646
+
647
+ for node in self.manifest.nodes.values():
648
+ if " " in node.name:
649
+ if improper_resource_names == 0 or flags.DEBUG:
650
+ fire_event(
651
+ SpacesInResourceNameDeprecation(
652
+ unique_id=node.unique_id,
653
+ level=level.value,
654
+ ),
655
+ level=level,
656
+ )
657
+ improper_resource_names += 1
658
+
659
+ if improper_resource_names > 0:
660
+ if level == EventLevel.WARN:
661
+ dbt.deprecations.warn(
662
+ "resource-names-with-spaces",
663
+ count_invalid_names=improper_resource_names,
664
+ show_debug_hint=(not flags.DEBUG),
665
+ )
666
+ else: # ERROR level
667
+ raise DbtValidationError("Resource names cannot contain spaces")
668
+
669
+ def check_for_microbatch_deprecations(self) -> None:
670
+ if not get_flags().require_batched_execution_for_custom_microbatch_strategy:
671
+ has_microbatch_model = False
672
+ for _, node in self.manifest.nodes.items():
673
+ if (
674
+ isinstance(node, ModelNode)
675
+ and node.config.materialized == "incremental"
676
+ and node.config.incremental_strategy == "microbatch"
677
+ ):
678
+ has_microbatch_model = True
679
+ break
680
+
681
+ if has_microbatch_model and not self.manifest._microbatch_macro_is_core(
682
+ self.root_project.project_name
683
+ ):
684
+ dbt.deprecations.warn("microbatch-macro-outside-of-batches-deprecation")
685
+
686
+ def load_and_parse_macros(self, project_parser_files):
687
+ for project in self.all_projects.values():
688
+ if project.project_name not in project_parser_files:
689
+ continue
690
+ parser_files = project_parser_files[project.project_name]
691
+ if "MacroParser" in parser_files:
692
+ parser = MacroParser(project, self.manifest)
693
+ for file_id in parser_files["MacroParser"]:
694
+ block = FileBlock(self.manifest.files[file_id])
695
+ parser.parse_file(block)
696
+ # increment parsed path count for performance tracking
697
+ self._perf_info.parsed_path_count += 1
698
+ # generic tests hisotrically lived in the macros directoy but can now be nested
699
+ # in a /generic directory under /tests so we want to process them here as well
700
+ if "GenericTestParser" in parser_files:
701
+ parser = GenericTestParser(project, self.manifest)
702
+ for file_id in parser_files["GenericTestParser"]:
703
+ block = FileBlock(self.manifest.files[file_id])
704
+ parser.parse_file(block)
705
+ # increment parsed path count for performance tracking
706
+ self._perf_info.parsed_path_count += 1
707
+
708
+ self.build_macro_resolver()
709
+ # Look at changed macros and update the macro.depends_on.macros
710
+ self.macro_depends_on()
711
+
712
+ # Parse the files in the 'parser_files' dictionary, for parsers listed in
713
+ # 'parser_types'
714
+ def parse_project(
715
+ self,
716
+ project: RuntimeConfig,
717
+ parser_files,
718
+ parser_types: List[Type[Parser]],
719
+ ) -> None:
720
+
721
+ project_loader_info = self._perf_info._project_index[project.project_name]
722
+ start_timer = time.perf_counter()
723
+ total_parsed_path_count = 0
724
+
725
+ # Loop through parsers with loaded files.
726
+ for parser_cls in parser_types:
727
+ parser_name = parser_cls.__name__
728
+ # No point in creating a parser if we don't have files for it
729
+ if parser_name not in parser_files or not parser_files[parser_name]:
730
+ continue
731
+
732
+ # Initialize timing info
733
+ project_parsed_path_count = 0
734
+ parser_start_timer = time.perf_counter()
735
+
736
+ # Parse the project files for this parser
737
+ parser: Parser = parser_cls(project, self.manifest, self.root_project)
738
+ for file_id in parser_files[parser_name]:
739
+ block = FileBlock(self.manifest.files[file_id])
740
+ if isinstance(parser, SchemaParser):
741
+ assert isinstance(block.file, SchemaSourceFile)
742
+ if self.partially_parsing:
743
+ dct = block.file.pp_dict
744
+ else:
745
+ dct = block.file.dict_from_yaml
746
+ # this is where the schema file gets parsed
747
+ parser.parse_file(block, dct=dct)
748
+ # Came out of here with UnpatchedSourceDefinition containing configs at the source level
749
+ # and not configs at the table level (as expected)
750
+ else:
751
+ parser.parse_file(block)
752
+ project_parsed_path_count += 1
753
+
754
+ # Save timing info
755
+ project_loader_info.parsers.append(
756
+ ParserInfo(
757
+ parser=parser.resource_type,
758
+ parsed_path_count=project_parsed_path_count,
759
+ elapsed=time.perf_counter() - parser_start_timer,
760
+ )
761
+ )
762
+ total_parsed_path_count += project_parsed_path_count
763
+
764
+ # HookParser doesn't run from loaded files, just dbt_project.yml,
765
+ # so do separately
766
+ # This shouldn't need to be parsed again if we're starting from
767
+ # a saved manifest, because that won't be allowed if dbt_project.yml
768
+ # changed, but leave for now.
769
+ if not self.partially_parsing and HookParser in parser_types:
770
+ hook_parser = HookParser(project, self.manifest, self.root_project)
771
+ path = hook_parser.get_path()
772
+ file = load_source_file(path, ParseFileType.Hook, project.project_name, {})
773
+ if file:
774
+ file_block = FileBlock(file)
775
+ hook_parser.parse_file(file_block)
776
+
777
+ # Store the performance info
778
+ elapsed = time.perf_counter() - start_timer
779
+ project_loader_info.parsed_path_count = (
780
+ project_loader_info.parsed_path_count + total_parsed_path_count
781
+ )
782
+ project_loader_info.elapsed += elapsed
783
+ self._perf_info.parsed_path_count = (
784
+ self._perf_info.parsed_path_count + total_parsed_path_count
785
+ )
786
+
787
+ # This should only be called after the macros have been loaded
788
+ def build_macro_resolver(self):
789
+ internal_package_names = get_adapter_package_names(self.root_project.credentials.type)
790
+ self.macro_resolver = MacroResolver(
791
+ self.manifest.macros, self.root_project.project_name, internal_package_names
792
+ )
793
+
794
+ # Loop through macros in the manifest and statically parse
795
+ # the 'macro_sql' to find depends_on.macros
796
+ def macro_depends_on(self):
797
+ macro_ctx = generate_macro_context(self.root_project)
798
+ macro_namespace = TestMacroNamespace(self.macro_resolver, {}, None, MacroStack(), [])
799
+ adapter = get_adapter(self.root_project)
800
+ db_wrapper = ParseProvider().DatabaseWrapper(adapter, macro_namespace)
801
+ for macro in self.manifest.macros.values():
802
+ if macro.created_at < self.started_at:
803
+ continue
804
+ possible_macro_calls = statically_extract_macro_calls(
805
+ macro.macro_sql, macro_ctx, db_wrapper
806
+ )
807
+ for macro_name in possible_macro_calls:
808
+ # adapter.dispatch calls can generate a call with the same name as the macro
809
+ # it ought to be an adapter prefix (postgres_) or default_
810
+ if macro_name == macro.name:
811
+ continue
812
+ package_name = macro.package_name
813
+ if "." in macro_name:
814
+ package_name, macro_name = macro_name.split(".")
815
+ dep_macro_id = self.macro_resolver.get_macro_id(package_name, macro_name)
816
+ if dep_macro_id:
817
+ macro.depends_on.add_macro(dep_macro_id) # will check for dupes
818
+
819
+ def write_manifest_for_partial_parse(self):
820
+ path = os.path.join(self.root_project.project_target_path, PARTIAL_PARSE_FILE_NAME)
821
+ try:
822
+ # This shouldn't be necessary, but we have gotten bug reports (#3757) of the
823
+ # saved manifest not matching the code version.
824
+ if self.manifest.metadata.dbt_version != __version__:
825
+ fire_event(
826
+ UnableToPartialParse(reason="saved manifest contained the wrong version")
827
+ )
828
+ self.manifest.metadata.dbt_version = __version__
829
+ manifest_msgpack = self.manifest.to_msgpack(extended_mashumaro_encoder)
830
+ make_directory(os.path.dirname(path))
831
+ with open(path, "wb") as fp:
832
+ fp.write(manifest_msgpack)
833
+ except Exception:
834
+ raise
835
+
836
+ def inject_external_nodes(self) -> bool:
837
+ # Remove previously existing external nodes since we are regenerating them
838
+ manifest_nodes_modified = False
839
+ # Remove all dependent nodes before removing referencing nodes
840
+ for unique_id in self.manifest.external_node_unique_ids:
841
+ remove_dependent_project_references(self.manifest, unique_id)
842
+ manifest_nodes_modified = True
843
+ for unique_id in self.manifest.external_node_unique_ids:
844
+ # remove external nodes from manifest only after dependent project references safely removed
845
+ self.manifest.nodes.pop(unique_id)
846
+
847
+ # Inject any newly-available external nodes
848
+ pm = plugins.get_plugin_manager(self.root_project.project_name)
849
+ plugin_model_nodes = pm.get_nodes().models
850
+ for node_arg in plugin_model_nodes.values():
851
+ node = ModelNode.from_args(node_arg)
852
+ # node may already exist from package or running project (even if it is disabled),
853
+ # in which case we should avoid clobbering it with an external node
854
+ if (
855
+ node.unique_id not in self.manifest.nodes
856
+ and node.unique_id not in self.manifest.disabled
857
+ ):
858
+ self.manifest.add_node_nofile(node)
859
+ manifest_nodes_modified = True
860
+
861
+ return manifest_nodes_modified
862
+
863
+ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]:
864
+ """Compare the global hashes of the read-in parse results' values to
865
+ the known ones, and return if it is ok to re-use the results.
866
+ """
867
+ valid = True
868
+ reparse_reason = None
869
+
870
+ if manifest.metadata.dbt_version != __version__:
871
+ # #3757 log both versions because of reports of invalid cases of mismatch.
872
+ fire_event(UnableToPartialParse(reason="of a version mismatch"))
873
+ # If the version is wrong, the other checks might not work
874
+ return False, ReparseReason.version_mismatch
875
+ if self.manifest.state_check.vars_hash != manifest.state_check.vars_hash:
876
+ fire_event(
877
+ UnableToPartialParse(
878
+ reason="config vars, config profile, or config target have changed"
879
+ )
880
+ )
881
+ fire_event(
882
+ Note(
883
+ msg=f"previous checksum: {self.manifest.state_check.vars_hash.checksum}, current checksum: {manifest.state_check.vars_hash.checksum}"
884
+ ),
885
+ level=EventLevel.DEBUG,
886
+ )
887
+ valid = False
888
+ reparse_reason = ReparseReason.vars_changed
889
+ if self.manifest.state_check.profile_hash != manifest.state_check.profile_hash:
890
+ # Note: This should be made more granular. We shouldn't need to invalidate
891
+ # partial parsing if a non-used profile section has changed.
892
+ fire_event(UnableToPartialParse(reason="profile has changed"))
893
+ valid = False
894
+ reparse_reason = ReparseReason.profile_changed
895
+ if (
896
+ self.manifest.state_check.project_env_vars_hash
897
+ != manifest.state_check.project_env_vars_hash
898
+ ):
899
+ fire_event(
900
+ UnableToPartialParse(reason="env vars used in dbt_project.yml have changed")
901
+ )
902
+ valid = False
903
+ reparse_reason = ReparseReason.proj_env_vars_changed
904
+
905
+ missing_keys = {
906
+ k
907
+ for k in self.manifest.state_check.project_hashes
908
+ if k not in manifest.state_check.project_hashes
909
+ }
910
+ if missing_keys:
911
+ fire_event(UnableToPartialParse(reason="a project dependency has been added"))
912
+ valid = False
913
+ reparse_reason = ReparseReason.deps_changed
914
+
915
+ for key, new_value in self.manifest.state_check.project_hashes.items():
916
+ if key in manifest.state_check.project_hashes:
917
+ old_value = manifest.state_check.project_hashes[key]
918
+ if new_value != old_value:
919
+ fire_event(UnableToPartialParse(reason="a project config has changed"))
920
+ valid = False
921
+ reparse_reason = ReparseReason.project_config_changed
922
+ return valid, reparse_reason
923
+
924
+ def skip_partial_parsing_because_of_macros(self):
925
+ if not self.partial_parser:
926
+ return False
927
+ if self.partial_parser.deleted_special_override_macro:
928
+ return True
929
+ # Check for custom versions of these special macros
930
+ for macro_name in special_override_macros:
931
+ macro = self.macro_resolver.get_macro(None, macro_name)
932
+ if macro and macro.package_name != "dbt":
933
+ if (
934
+ macro.file_id in self.partial_parser.file_diff["changed"]
935
+ or macro.file_id in self.partial_parser.file_diff["added"]
936
+ ):
937
+ # The file with the macro in it has changed
938
+ return True
939
+ return False
940
+
941
+ def read_manifest_for_partial_parse(self) -> Optional[Manifest]:
942
+ flags = get_flags()
943
+ if not flags.PARTIAL_PARSE:
944
+ fire_event(PartialParsingNotEnabled())
945
+ return None
946
+ path = flags.PARTIAL_PARSE_FILE_PATH or os.path.join(
947
+ self.root_project.project_target_path, PARTIAL_PARSE_FILE_NAME
948
+ )
949
+
950
+ reparse_reason = None
951
+
952
+ if os.path.exists(path):
953
+ try:
954
+ with open(path, "rb") as fp:
955
+ manifest_mp = fp.read()
956
+ manifest: Manifest = Manifest.from_msgpack(manifest_mp, decoder=extended_mashumuro_decoder) # type: ignore
957
+ # keep this check inside the try/except in case something about
958
+ # the file has changed in weird ways, perhaps due to being a
959
+ # different version of dbt
960
+ is_partial_parsable, reparse_reason = self.is_partial_parsable(manifest)
961
+ if is_partial_parsable:
962
+ # We don't want to have stale generated_at dates
963
+ manifest.metadata.generated_at = datetime.now(timezone.utc).replace(
964
+ tzinfo=None
965
+ )
966
+ # or invocation_ids
967
+ manifest.metadata.invocation_id = get_invocation_id()
968
+ return manifest
969
+ except Exception as exc:
970
+ fire_event(
971
+ ParsedFileLoadFailed(path=path, exc=str(exc), exc_info=traceback.format_exc())
972
+ )
973
+ reparse_reason = ReparseReason.load_file_failure
974
+ else:
975
+ fire_event(
976
+ UnableToPartialParse(reason="saved manifest not found. Starting full parse.")
977
+ )
978
+ reparse_reason = ReparseReason.file_not_found
979
+
980
+ # this event is only fired if a full reparse is needed
981
+ if dbt.tracking.active_user is not None: # no active_user if doing load_macros
982
+ dbt.tracking.track_partial_parser({"full_reparse_reason": reparse_reason})
983
+
984
+ return None
985
+
986
+ def build_perf_info(self):
987
+ flags = get_flags()
988
+ mli = ManifestLoaderInfo(
989
+ is_partial_parse_enabled=flags.PARTIAL_PARSE,
990
+ is_static_analysis_enabled=flags.STATIC_PARSER,
991
+ )
992
+ for project in self.all_projects.values():
993
+ project_info = ProjectLoaderInfo(
994
+ project_name=project.project_name,
995
+ elapsed=0,
996
+ )
997
+ mli.projects.append(project_info)
998
+ mli._project_index[project.project_name] = project_info
999
+ return mli
1000
+
1001
+ # TODO: handle --vars in the same way we handle env_var
1002
+ # https://github.com/dbt-labs/dbt-core/issues/6323
1003
+ def build_manifest_state_check(self):
1004
+ config = self.root_project
1005
+ all_projects = self.all_projects
1006
+ # if any of these change, we need to reject the parser
1007
+
1008
+ # Create a FileHash of vars string, profile name and target name
1009
+ # This does not capture vars in dbt_project, just the command line
1010
+ # arg vars, but since any changes to that file will cause state_check
1011
+ # to not pass, it doesn't matter. If we move to more granular checking
1012
+ # of env_vars, that would need to change.
1013
+ # We are using the parsed cli_vars instead of config.args.vars, in order
1014
+ # to sort them and avoid reparsing because of ordering issues.
1015
+ secret_vars = [
1016
+ v for k, v in config.cli_vars.items() if k.startswith(SECRET_ENV_PREFIX) and v.strip()
1017
+ ]
1018
+ stringified_cli_vars = pprint.pformat(config.cli_vars)
1019
+ vars_hash = FileHash.from_contents(
1020
+ "\x00".join(
1021
+ [
1022
+ stringified_cli_vars,
1023
+ getattr(config.args, "profile", "") or "",
1024
+ getattr(config.args, "target", "") or "",
1025
+ __version__,
1026
+ ]
1027
+ )
1028
+ )
1029
+ fire_event(
1030
+ StateCheckVarsHash(
1031
+ checksum=vars_hash.checksum,
1032
+ vars=scrub_secrets(stringified_cli_vars, secret_vars),
1033
+ profile=config.args.profile,
1034
+ target=config.args.target,
1035
+ version=__version__,
1036
+ )
1037
+ )
1038
+
1039
+ # Create a FileHash of the env_vars in the project
1040
+ key_list = list(config.project_env_vars.keys())
1041
+ key_list.sort()
1042
+ env_var_str = ""
1043
+ for key in key_list:
1044
+ env_var_str += f"{key}:{config.project_env_vars[key]}|"
1045
+ project_env_vars_hash = FileHash.from_contents(env_var_str)
1046
+
1047
+ # Create a hash of the connection_info, which user has access to in
1048
+ # jinja context. Thus attributes here may affect the parsing result.
1049
+ # Ideally we should not expose all of the connection info to the jinja.
1050
+
1051
+ # Renaming this variable mean that we will have to do a whole lot more
1052
+ # change to make sure the previous manifest can be loaded correctly.
1053
+ # This is an example of naming should be chosen based on the functionality
1054
+ # rather than the implementation details.
1055
+ connection_keys = list(config.credentials.connection_info())
1056
+ # avoid reparsing because of ordering issues
1057
+ connection_keys.sort()
1058
+ profile_hash = FileHash.from_contents(pprint.pformat(connection_keys))
1059
+
1060
+ # Create a FileHashes for dbt_project for all dependencies
1061
+ project_hashes = {}
1062
+ for name, project in all_projects.items():
1063
+ path = os.path.join(project.project_root, "dbt_project.yml")
1064
+ with open(path) as fp:
1065
+ project_hashes[name] = FileHash.from_contents(fp.read())
1066
+
1067
+ # Create the ManifestStateCheck object
1068
+ state_check = ManifestStateCheck(
1069
+ project_env_vars_hash=project_env_vars_hash,
1070
+ vars_hash=vars_hash,
1071
+ profile_hash=profile_hash,
1072
+ project_hashes=project_hashes,
1073
+ )
1074
+ return state_check
1075
+
1076
+ def save_macros_to_adapter(self, adapter):
1077
+ adapter.set_macro_resolver(self.manifest)
1078
+ # This executes the callable macro_hook and sets the
1079
+ # query headers
1080
+ # This executes the callable macro_hook and sets the query headers
1081
+ query_header_context = generate_query_header_context(adapter.config, self.manifest)
1082
+ self.macro_hook(query_header_context)
1083
+
1084
+ # This creates a MacroManifest which contains the macros in
1085
+ # the adapter. Only called by the load_macros call from the
1086
+ # adapter.
1087
+ def create_macro_manifest(self):
1088
+ for project in self.all_projects.values():
1089
+ # what is the manifest passed in actually used for?
1090
+ macro_parser = MacroParser(project, self.manifest)
1091
+ for path in macro_parser.get_paths():
1092
+ source_file = load_source_file(path, ParseFileType.Macro, project.project_name, {})
1093
+ block = FileBlock(source_file)
1094
+ # This does not add the file to the manifest.files,
1095
+ # but that shouldn't be necessary here.
1096
+ macro_parser.parse_file(block)
1097
+ macro_manifest = MacroManifest(self.manifest.macros)
1098
+ return macro_manifest
1099
+
1100
+ # This is called by the adapter code only, to create the
1101
+ # MacroManifest that's stored in the adapter.
1102
+ # 'get_full_manifest' uses a persistent ManifestLoader while this
1103
+ # creates a temporary ManifestLoader and throws it away.
1104
+ # Not sure when this would actually get used except in tests.
1105
+ # The ManifestLoader loads macros with other files, then copies
1106
+ # into the adapter MacroManifest.
1107
+ @classmethod
1108
+ def load_macros(
1109
+ cls,
1110
+ root_config: RuntimeConfig,
1111
+ macro_hook: Callable[[Manifest], Any],
1112
+ base_macros_only=False,
1113
+ ) -> Manifest:
1114
+ # base_only/base_macros_only: for testing only,
1115
+ # allows loading macros without running 'dbt deps' first
1116
+ projects = root_config.load_dependencies(base_only=base_macros_only)
1117
+
1118
+ # This creates a loader object, including result,
1119
+ # and then throws it away, returning only the
1120
+ # manifest
1121
+ loader = cls(root_config, projects, macro_hook)
1122
+
1123
+ return loader.create_macro_manifest()
1124
+
1125
+ # Create tracking event for saving performance info
1126
+ def track_project_load(self):
1127
+ invocation_id = get_invocation_id()
1128
+ dbt.tracking.track_project_load(
1129
+ {
1130
+ "invocation_id": invocation_id,
1131
+ "project_id": self.root_project.hashed_name(),
1132
+ "path_count": self._perf_info.path_count,
1133
+ "parsed_path_count": self._perf_info.parsed_path_count,
1134
+ "read_files_elapsed": self._perf_info.read_files_elapsed,
1135
+ "load_macros_elapsed": self._perf_info.load_macros_elapsed,
1136
+ "parse_project_elapsed": self._perf_info.parse_project_elapsed,
1137
+ "patch_sources_elapsed": self._perf_info.patch_sources_elapsed,
1138
+ "process_manifest_elapsed": (self._perf_info.process_manifest_elapsed),
1139
+ "load_all_elapsed": self._perf_info.load_all_elapsed,
1140
+ "is_partial_parse_enabled": (self._perf_info.is_partial_parse_enabled),
1141
+ "is_static_analysis_enabled": self._perf_info.is_static_analysis_enabled,
1142
+ "static_analysis_path_count": self._perf_info.static_analysis_path_count,
1143
+ "static_analysis_parsed_path_count": self._perf_info.static_analysis_parsed_path_count, # noqa: E501
1144
+ }
1145
+ )
1146
+
1147
+ # Takes references in 'refs' array of nodes and exposures, finds the target
1148
+ # node, and updates 'depends_on.nodes' with the unique id
1149
+ def process_refs(self, current_project: str, dependencies: Optional[Mapping[str, Project]]):
1150
+ for node in self.manifest.nodes.values():
1151
+ if node.created_at < self.started_at:
1152
+ continue
1153
+ _process_refs(self.manifest, current_project, node, dependencies)
1154
+ for exposure in self.manifest.exposures.values():
1155
+ if exposure.created_at < self.started_at:
1156
+ continue
1157
+ _process_refs(self.manifest, current_project, exposure, dependencies)
1158
+ for metric in self.manifest.metrics.values():
1159
+ if metric.created_at < self.started_at:
1160
+ continue
1161
+ _process_refs(self.manifest, current_project, metric, dependencies)
1162
+ for semantic_model in self.manifest.semantic_models.values():
1163
+ if semantic_model.created_at < self.started_at:
1164
+ continue
1165
+ _process_refs(self.manifest, current_project, semantic_model, dependencies)
1166
+ self.update_semantic_model(semantic_model)
1167
+ for function in self.manifest.functions.values():
1168
+ if function.created_at < self.started_at:
1169
+ continue
1170
+ _process_refs(self.manifest, current_project, function, dependencies)
1171
+
1172
+ # Takes references in 'metrics' array of nodes and exposures, finds the target
1173
+ # node, and updates 'depends_on.nodes' with the unique id
1174
+ def process_metrics(self, config: RuntimeConfig):
1175
+ current_project = config.project_name
1176
+ for metric in self.manifest.metrics.values():
1177
+ if metric.created_at < self.started_at:
1178
+ continue
1179
+ _process_metric_node(self.manifest, current_project, metric)
1180
+ _process_metrics_for_node(self.manifest, current_project, metric)
1181
+ for node in self.manifest.nodes.values():
1182
+ if node.created_at < self.started_at:
1183
+ continue
1184
+ _process_metrics_for_node(self.manifest, current_project, node)
1185
+ for exposure in self.manifest.exposures.values():
1186
+ if exposure.created_at < self.started_at:
1187
+ continue
1188
+ _process_metrics_for_node(self.manifest, current_project, exposure)
1189
+
1190
+ def process_saved_queries(self, config: RuntimeConfig):
1191
+ """Processes SavedQuery nodes to populate their `depends_on`."""
1192
+ # Note: This will also capture various nodes which have been re-parsed
1193
+ # because they refer to some other changed node, so there will be
1194
+ # false positives. Ideally we would compare actual changes.
1195
+ semantic_manifest_changed = False
1196
+ semantic_manifest_nodes: chain[SemanticManifestNode] = chain(
1197
+ self.manifest.saved_queries.values(),
1198
+ self.manifest.semantic_models.values(),
1199
+ self.manifest.metrics.values(),
1200
+ )
1201
+ for node in semantic_manifest_nodes:
1202
+ # Check if this node has been modified in this parsing run
1203
+ if node.created_at > self.started_at:
1204
+ semantic_manifest_changed = True
1205
+ break # as soon as we run into one changed node we can stop
1206
+ if semantic_manifest_changed is False:
1207
+ return
1208
+
1209
+ current_project = config.project_name
1210
+ for saved_query in self.manifest.saved_queries.values():
1211
+ # TODO:
1212
+ # 1. process `where` of SavedQuery for `depends_on`s
1213
+ # 2. process `group_by` of SavedQuery for `depends_on``
1214
+ _process_metrics_for_node(self.manifest, current_project, saved_query)
1215
+
1216
+ def process_model_inferred_primary_keys(self):
1217
+ """Processes Model nodes to populate their `primary_key`."""
1218
+ model_to_generic_test_map: Dict[str, List[GenericTestNode]] = {}
1219
+ for node in self.manifest.nodes.values():
1220
+ if not isinstance(node, ModelNode):
1221
+ continue
1222
+ if node.created_at < self.started_at:
1223
+ continue
1224
+ if not model_to_generic_test_map:
1225
+ model_to_generic_test_map = self.build_model_to_generic_tests_map()
1226
+ generic_tests: List[GenericTestNode] = []
1227
+ if node.unique_id in model_to_generic_test_map:
1228
+ generic_tests = model_to_generic_test_map[node.unique_id]
1229
+ primary_key = node.infer_primary_key(generic_tests)
1230
+ node.primary_key = sorted(primary_key)
1231
+
1232
+ def update_semantic_model(self, semantic_model) -> None:
1233
+ # This has to be done at the end of parsing because the referenced model
1234
+ # might have alias/schema/database fields that are updated by yaml config.
1235
+ if semantic_model.depends_on_nodes[0]:
1236
+ refd_node = self.manifest.nodes[semantic_model.depends_on_nodes[0]]
1237
+ semantic_model.node_relation = NodeRelation(
1238
+ relation_name=refd_node.relation_name,
1239
+ alias=refd_node.alias,
1240
+ schema_name=refd_node.schema,
1241
+ database=refd_node.database,
1242
+ )
1243
+
1244
+ # nodes: node and column descriptions, version columns descriptions
1245
+ # sources: source and table descriptions, column descriptions
1246
+ # macros: macro argument descriptions
1247
+ # exposures: exposure descriptions
1248
+ # metrics: metric descriptions
1249
+ # semantic_models: semantic model descriptions
1250
+ def process_docs(self, config: RuntimeConfig):
1251
+ for node in self.manifest.nodes.values():
1252
+ if node.created_at < self.started_at:
1253
+ continue
1254
+ ctx = generate_runtime_docs_context(
1255
+ config,
1256
+ node,
1257
+ self.manifest,
1258
+ config.project_name,
1259
+ )
1260
+ _process_docs_for_node(ctx, node, self.manifest)
1261
+ for source in self.manifest.sources.values():
1262
+ if source.created_at < self.started_at:
1263
+ continue
1264
+ ctx = generate_runtime_docs_context(
1265
+ config,
1266
+ source,
1267
+ self.manifest,
1268
+ config.project_name,
1269
+ )
1270
+ _process_docs_for_source(ctx, source, self.manifest)
1271
+ for macro in self.manifest.macros.values():
1272
+ if macro.created_at < self.started_at:
1273
+ continue
1274
+ ctx = generate_runtime_docs_context(
1275
+ config,
1276
+ macro,
1277
+ self.manifest,
1278
+ config.project_name,
1279
+ )
1280
+ _process_docs_for_macro(ctx, macro)
1281
+ for exposure in self.manifest.exposures.values():
1282
+ if exposure.created_at < self.started_at:
1283
+ continue
1284
+ ctx = generate_runtime_docs_context(
1285
+ config,
1286
+ exposure,
1287
+ self.manifest,
1288
+ config.project_name,
1289
+ )
1290
+ _process_docs_for_exposure(ctx, exposure)
1291
+ for metric in self.manifest.metrics.values():
1292
+ if metric.created_at < self.started_at:
1293
+ continue
1294
+ ctx = generate_runtime_docs_context(
1295
+ config,
1296
+ metric,
1297
+ self.manifest,
1298
+ config.project_name,
1299
+ )
1300
+ _process_docs_for_metrics(ctx, metric)
1301
+ for semantic_model in self.manifest.semantic_models.values():
1302
+ if semantic_model.created_at < self.started_at:
1303
+ continue
1304
+ ctx = generate_runtime_docs_context(
1305
+ config,
1306
+ semantic_model,
1307
+ self.manifest,
1308
+ config.project_name,
1309
+ )
1310
+ _process_docs_for_semantic_model(ctx, semantic_model)
1311
+ for saved_query in self.manifest.saved_queries.values():
1312
+ if saved_query.created_at < self.started_at:
1313
+ continue
1314
+ ctx = generate_runtime_docs_context(
1315
+ config, saved_query, self.manifest, config.project_name
1316
+ )
1317
+ _process_docs_for_saved_query(ctx, saved_query)
1318
+
1319
+ # Loops through all nodes and exposures, for each element in
1320
+ # 'sources' array finds the source node and updates the
1321
+ # 'depends_on.nodes' array with the unique id
1322
+ def process_sources(self, current_project: str):
1323
+ for node in self.manifest.nodes.values():
1324
+ if node.resource_type == NodeType.Source:
1325
+ continue
1326
+ assert not isinstance(node, SourceDefinition)
1327
+ if node.created_at < self.started_at:
1328
+ continue
1329
+ _process_sources_for_node(self.manifest, current_project, node)
1330
+ for exposure in self.manifest.exposures.values():
1331
+ if exposure.created_at < self.started_at:
1332
+ continue
1333
+ _process_sources_for_exposure(self.manifest, current_project, exposure)
1334
+
1335
+ # Loops through all nodes, for each element in
1336
+ # 'unit_test' array finds the node and updates the
1337
+ # 'depends_on.nodes' array with the unique id
1338
+ def process_unit_tests(self, current_project: str):
1339
+ models_to_versions = None
1340
+ unit_test_unique_ids = list(self.manifest.unit_tests.keys())
1341
+ for unit_test_unique_id in unit_test_unique_ids:
1342
+ # This is because some unit tests will be removed when processing
1343
+ # and the list of unit_test_unique_ids won't have changed
1344
+ if unit_test_unique_id in self.manifest.unit_tests:
1345
+ unit_test = self.manifest.unit_tests[unit_test_unique_id]
1346
+ else:
1347
+ continue
1348
+ if unit_test.created_at < self.started_at:
1349
+ continue
1350
+ if not models_to_versions:
1351
+ models_to_versions = _build_model_names_to_versions(self.manifest)
1352
+ process_models_for_unit_test(
1353
+ self.manifest, current_project, unit_test, models_to_versions
1354
+ )
1355
+
1356
+ # Loops through all nodes, for each element in
1357
+ # 'functions' array finds the node and updates the
1358
+ # 'depends_on.nodes' array with the unique id
1359
+ def process_functions(self, current_project: str):
1360
+ for node in self.manifest.nodes.values():
1361
+ if node.created_at < self.started_at:
1362
+ continue
1363
+ _process_functions_for_node(self.manifest, current_project, node)
1364
+
1365
+ for function in self.manifest.functions.values():
1366
+ if function.created_at < self.started_at:
1367
+ continue
1368
+ _process_functions_for_node(self.manifest, current_project, function)
1369
+
1370
+ def cleanup_disabled(self):
1371
+ # make sure the nodes are in the manifest.nodes or the disabled dict,
1372
+ # correctly now that the schema files are also parsed
1373
+ disabled_nodes = []
1374
+ for node in self.manifest.nodes.values():
1375
+ if not node.config.enabled:
1376
+ disabled_nodes.append(node.unique_id)
1377
+ self.manifest.add_disabled_nofile(node)
1378
+ for unique_id in disabled_nodes:
1379
+ self.manifest.nodes.pop(unique_id)
1380
+
1381
+ disabled_copy = deepcopy(self.manifest.disabled)
1382
+ for disabled in disabled_copy.values():
1383
+ for node in disabled:
1384
+ if node.config.enabled:
1385
+ for dis_index, dis_node in enumerate(disabled):
1386
+ # Remove node from disabled and unique_id from disabled dict if necessary
1387
+ del self.manifest.disabled[node.unique_id][dis_index]
1388
+ if not self.manifest.disabled[node.unique_id]:
1389
+ self.manifest.disabled.pop(node.unique_id)
1390
+
1391
+ self.manifest.add_node_nofile(node)
1392
+
1393
+ self.manifest.rebuild_ref_lookup()
1394
+
1395
+ def check_valid_group_config(self):
1396
+ manifest = self.manifest
1397
+ group_names = {group.name for group in manifest.groups.values()}
1398
+
1399
+ for metric in manifest.metrics.values():
1400
+ self.check_valid_group_config_node(metric, group_names)
1401
+
1402
+ for semantic_model in manifest.semantic_models.values():
1403
+ self.check_valid_group_config_node(semantic_model, group_names)
1404
+
1405
+ for saved_query in manifest.saved_queries.values():
1406
+ self.check_valid_group_config_node(saved_query, group_names)
1407
+
1408
+ for node in manifest.nodes.values():
1409
+ self.check_valid_group_config_node(node, group_names)
1410
+
1411
+ def check_valid_group_config_node(
1412
+ self,
1413
+ groupable_node: Union[Metric, SavedQuery, SemanticModel, ManifestNode],
1414
+ valid_group_names: Set[str],
1415
+ ):
1416
+ groupable_node_group = groupable_node.group
1417
+ if groupable_node_group and groupable_node_group not in valid_group_names:
1418
+ raise dbt.exceptions.ParsingError(
1419
+ f"Invalid group '{groupable_node_group}', expected one of {sorted(list(valid_group_names))}",
1420
+ node=groupable_node,
1421
+ )
1422
+
1423
+ def check_valid_access_property(self):
1424
+ for node in self.manifest.nodes.values():
1425
+ if (
1426
+ isinstance(node, ModelNode)
1427
+ and node.access == AccessType.Public
1428
+ and node.get_materialization() == "ephemeral"
1429
+ ):
1430
+ raise InvalidAccessTypeError(
1431
+ unique_id=node.unique_id,
1432
+ field_value=node.access,
1433
+ materialization=node.get_materialization(),
1434
+ )
1435
+
1436
+ def check_valid_snapshot_config(self):
1437
+ # Snapshot config can be set in either SQL files or yaml files,
1438
+ # so we need to validate afterward.
1439
+ for node in self.manifest.nodes.values():
1440
+ if node.resource_type != NodeType.Snapshot:
1441
+ continue
1442
+ if node.created_at < self.started_at:
1443
+ continue
1444
+ node.config.final_validate()
1445
+
1446
+ def check_valid_microbatch_config(self):
1447
+ if self.manifest.use_microbatch_batches(project_name=self.root_project.project_name):
1448
+ for node in self.manifest.nodes.values():
1449
+ if (
1450
+ node.config.materialized == "incremental"
1451
+ and node.config.incremental_strategy == "microbatch"
1452
+ ):
1453
+ # Required configs: event_time, batch_size, begin
1454
+ event_time = node.config.event_time
1455
+ if event_time is None:
1456
+ raise dbt.exceptions.ParsingError(
1457
+ f"Microbatch model '{node.name}' must provide an 'event_time' (string) config that indicates the name of the event time column."
1458
+ )
1459
+ if not isinstance(event_time, str):
1460
+ raise dbt.exceptions.ParsingError(
1461
+ f"Microbatch model '{node.name}' must provide an 'event_time' config of type string, but got: {type(event_time)}."
1462
+ )
1463
+
1464
+ begin = node.config.begin
1465
+ if begin is None:
1466
+ raise dbt.exceptions.ParsingError(
1467
+ f"Microbatch model '{node.name}' must provide a 'begin' (datetime) config that indicates the earliest timestamp the microbatch model should be built from."
1468
+ )
1469
+
1470
+ # Try to cast begin to a datetime using same format as mashumaro for consistency with other yaml-provided datetimes
1471
+ # Mashumaro default: https://github.com/Fatal1ty/mashumaro/blob/4ac16fd060a6c651053475597b58b48f958e8c5c/README.md?plain=1#L1186
1472
+ if isinstance(begin, str):
1473
+ try:
1474
+ begin = datetime.fromisoformat(begin)
1475
+ node.config.begin = begin
1476
+ except Exception:
1477
+ raise dbt.exceptions.ParsingError(
1478
+ f"Microbatch model '{node.name}' must provide a 'begin' config of valid datetime (ISO format), but got: {begin}."
1479
+ )
1480
+
1481
+ if not isinstance(begin, datetime):
1482
+ raise dbt.exceptions.ParsingError(
1483
+ f"Microbatch model '{node.name}' must provide a 'begin' config of type datetime, but got: {type(begin)}."
1484
+ )
1485
+
1486
+ batch_size = node.config.batch_size
1487
+ valid_batch_sizes = [size.value for size in BatchSize]
1488
+ if batch_size not in valid_batch_sizes:
1489
+ raise dbt.exceptions.ParsingError(
1490
+ f"Microbatch model '{node.name}' must provide a 'batch_size' config that is one of {valid_batch_sizes}, but got: {batch_size}."
1491
+ )
1492
+
1493
+ # Optional config: lookback (int)
1494
+ lookback = node.config.lookback
1495
+ if not isinstance(lookback, int) and lookback is not None:
1496
+ raise dbt.exceptions.ParsingError(
1497
+ f"Microbatch model '{node.name}' must provide the optional 'lookback' config as type int, but got: {type(lookback)})."
1498
+ )
1499
+
1500
+ # optional config: concurrent_batches (bool)
1501
+ concurrent_batches = node.config.concurrent_batches
1502
+ if not isinstance(concurrent_batches, bool) and concurrent_batches is not None:
1503
+ raise dbt.exceptions.ParsingError(
1504
+ f"Microbatch model '{node.name}' optional 'concurrent_batches' config must be of type `bool` if specified, but got: {type(concurrent_batches)})."
1505
+ )
1506
+
1507
+ def check_forcing_batch_concurrency(self) -> None:
1508
+ if self.manifest.use_microbatch_batches(project_name=self.root_project.project_name):
1509
+ adapter = get_adapter(self.root_project)
1510
+
1511
+ if not adapter.supports(Capability.MicrobatchConcurrency):
1512
+ models_forcing_concurrent_batches = 0
1513
+ for node in self.manifest.nodes.values():
1514
+ if (
1515
+ hasattr(node.config, "concurrent_batches")
1516
+ and node.config.concurrent_batches is True
1517
+ ):
1518
+ models_forcing_concurrent_batches += 1
1519
+
1520
+ if models_forcing_concurrent_batches > 0:
1521
+ warn_or_error(
1522
+ InvalidConcurrentBatchesConfig(
1523
+ num_models=models_forcing_concurrent_batches,
1524
+ adapter_type=adapter.type(),
1525
+ )
1526
+ )
1527
+
1528
+ def check_microbatch_model_has_a_filtered_input(self):
1529
+ if self.manifest.use_microbatch_batches(project_name=self.root_project.project_name):
1530
+ for node in self.manifest.nodes.values():
1531
+ if (
1532
+ node.config.materialized == "incremental"
1533
+ and node.config.incremental_strategy == "microbatch"
1534
+ ):
1535
+ # Validate upstream node event_time (if configured)
1536
+ has_input_with_event_time_config = False
1537
+ for input_unique_id in node.depends_on.nodes:
1538
+ input_node = self.manifest.expect(unique_id=input_unique_id)
1539
+ input_event_time = input_node.config.event_time
1540
+ if input_event_time:
1541
+ if not isinstance(input_event_time, str):
1542
+ raise dbt.exceptions.ParsingError(
1543
+ f"Microbatch model '{node.name}' depends on an input node '{input_node.name}' with an 'event_time' config of invalid (non-string) type: {type(input_event_time)}."
1544
+ )
1545
+ has_input_with_event_time_config = True
1546
+
1547
+ if not has_input_with_event_time_config:
1548
+ fire_event(MicrobatchModelNoEventTimeInputs(model_name=node.name))
1549
+
1550
+ def write_perf_info(self, target_path: str):
1551
+ path = os.path.join(target_path, PERF_INFO_FILE_NAME)
1552
+ write_file(path, json.dumps(self._perf_info, cls=dbt.utils.JSONEncoder, indent=4))
1553
+ fire_event(ParsePerfInfoPath(path=path))
1554
+
1555
+ def build_model_to_generic_tests_map(self) -> Dict[str, List[GenericTestNode]]:
1556
+ """Return a list of generic tests that are attached to the given model, including disabled tests"""
1557
+ model_to_generic_tests_map: Dict[str, List[GenericTestNode]] = {}
1558
+ for _, node in self.manifest.nodes.items():
1559
+ if isinstance(node, GenericTestNode) and node.attached_node:
1560
+ if node.attached_node not in model_to_generic_tests_map:
1561
+ model_to_generic_tests_map[node.attached_node] = []
1562
+ model_to_generic_tests_map[node.attached_node].append(node)
1563
+ for _, nodes in self.manifest.disabled.items():
1564
+ for disabled_node in nodes:
1565
+ if isinstance(disabled_node, GenericTestNode) and disabled_node.attached_node:
1566
+ if disabled_node.attached_node not in model_to_generic_tests_map:
1567
+ model_to_generic_tests_map[disabled_node.attached_node] = []
1568
+ model_to_generic_tests_map[disabled_node.attached_node].append(disabled_node)
1569
+ return model_to_generic_tests_map
1570
+
1571
+
1572
+ def invalid_target_fail_unless_test(
1573
+ node,
1574
+ target_name: str,
1575
+ target_kind: str,
1576
+ target_package: Optional[str] = None,
1577
+ target_version: Optional[NodeVersion] = None,
1578
+ disabled: Optional[bool] = None,
1579
+ should_warn_if_disabled: bool = True,
1580
+ ):
1581
+ if node.resource_type == NodeType.Test:
1582
+ if disabled:
1583
+ event = InvalidDisabledTargetInTestNode(
1584
+ resource_type_title=node.resource_type.title(),
1585
+ unique_id=node.unique_id,
1586
+ original_file_path=node.original_file_path,
1587
+ target_kind=target_kind,
1588
+ target_name=target_name,
1589
+ target_package=target_package if target_package else "",
1590
+ )
1591
+
1592
+ fire_event(event, EventLevel.WARN if should_warn_if_disabled else None)
1593
+ else:
1594
+ warn_or_error(
1595
+ NodeNotFoundOrDisabled(
1596
+ original_file_path=node.original_file_path,
1597
+ unique_id=node.unique_id,
1598
+ resource_type_title=node.resource_type.title(),
1599
+ target_name=target_name,
1600
+ target_kind=target_kind,
1601
+ target_package=target_package if target_package else "",
1602
+ disabled=str(disabled),
1603
+ )
1604
+ )
1605
+ else:
1606
+ raise TargetNotFoundError(
1607
+ node=node,
1608
+ target_name=target_name,
1609
+ target_kind=target_kind,
1610
+ target_package=target_package,
1611
+ target_version=target_version,
1612
+ disabled=disabled,
1613
+ )
1614
+
1615
+
1616
+ def _build_model_names_to_versions(manifest: Manifest) -> Dict[str, Dict]:
1617
+ model_names_to_versions: Dict[str, Dict] = {}
1618
+ for node in manifest.nodes.values():
1619
+ if node.resource_type != NodeType.Model:
1620
+ continue
1621
+ if not node.is_versioned:
1622
+ continue
1623
+ if node.package_name not in model_names_to_versions:
1624
+ model_names_to_versions[node.package_name] = {}
1625
+ if node.name not in model_names_to_versions[node.package_name]:
1626
+ model_names_to_versions[node.package_name][node.name] = []
1627
+ model_names_to_versions[node.package_name][node.name].append(node.unique_id)
1628
+ return model_names_to_versions
1629
+
1630
+
1631
+ def _check_resource_uniqueness(
1632
+ manifest: Manifest,
1633
+ config: RuntimeConfig,
1634
+ ) -> None:
1635
+ alias_resources: Dict[str, ManifestNode] = {}
1636
+ name_resources: Dict[str, Dict] = {}
1637
+
1638
+ for resource, node in manifest.nodes.items():
1639
+ if not node.is_relational:
1640
+ continue
1641
+
1642
+ if node.package_name not in name_resources:
1643
+ name_resources[node.package_name] = {"ver": {}, "unver": {}}
1644
+ if node.is_versioned:
1645
+ name_resources[node.package_name]["ver"][node.name] = node
1646
+ else:
1647
+ name_resources[node.package_name]["unver"][node.name] = node
1648
+
1649
+ # the full node name is really defined by the adapter's relation
1650
+ relation_cls = get_relation_class_by_name(config.credentials.type)
1651
+ relation = relation_cls.create_from(quoting=config, relation_config=node) # type: ignore[arg-type]
1652
+ full_node_name = str(relation)
1653
+
1654
+ existing_alias = alias_resources.get(full_node_name)
1655
+ if existing_alias is not None:
1656
+ raise AmbiguousAliasError(
1657
+ node_1=existing_alias, node_2=node, duped_name=full_node_name
1658
+ )
1659
+
1660
+ alias_resources[full_node_name] = node
1661
+
1662
+ for ver_unver_dict in name_resources.values():
1663
+ versioned_names = ver_unver_dict["ver"].keys()
1664
+ unversioned_names = ver_unver_dict["unver"].keys()
1665
+ intersection_versioned = set(versioned_names).intersection(set(unversioned_names))
1666
+ if intersection_versioned:
1667
+ for name in intersection_versioned:
1668
+ versioned_node = ver_unver_dict["ver"][name]
1669
+ unversioned_node = ver_unver_dict["unver"][name]
1670
+ raise dbt.exceptions.DuplicateVersionedUnversionedError(
1671
+ versioned_node, unversioned_node
1672
+ )
1673
+
1674
+
1675
+ def _warn_for_unused_resource_config_paths(manifest: Manifest, config: RuntimeConfig) -> None:
1676
+ resource_fqns: Mapping[str, PathSet] = manifest.get_resource_fqns()
1677
+ disabled_fqns: PathSet = frozenset(
1678
+ tuple(n.fqn) for n in list(chain.from_iterable(manifest.disabled.values()))
1679
+ )
1680
+ config.warn_for_unused_resource_config_paths(resource_fqns, disabled_fqns)
1681
+
1682
+
1683
+ def _check_manifest(manifest: Manifest, config: RuntimeConfig) -> None:
1684
+ _check_resource_uniqueness(manifest, config)
1685
+ _warn_for_unused_resource_config_paths(manifest, config)
1686
+
1687
+
1688
+ DocsContextCallback = Callable[[ResultNode], Dict[str, Any]]
1689
+
1690
+
1691
+ def _get_doc_blocks(description: str, manifest: Manifest, node_package: str) -> List[str]:
1692
+ ast = parse(description)
1693
+ doc_blocks: List[str] = []
1694
+
1695
+ if not hasattr(ast, "body"):
1696
+ return doc_blocks
1697
+
1698
+ for statement in ast.body:
1699
+ for node in statement.nodes:
1700
+ if (
1701
+ isinstance(node, Call)
1702
+ and hasattr(node, "node")
1703
+ and hasattr(node, "args")
1704
+ and hasattr(node.node, "name")
1705
+ and node.node.name == "doc"
1706
+ ):
1707
+ doc_args = [arg.value for arg in node.args]
1708
+
1709
+ if len(doc_args) == 1:
1710
+ package, name = None, doc_args[0]
1711
+ elif len(doc_args) == 2:
1712
+ package, name = doc_args
1713
+ else:
1714
+ continue
1715
+
1716
+ if not manifest.metadata.project_name:
1717
+ continue
1718
+
1719
+ resolved_doc = manifest.resolve_doc(
1720
+ name, package, manifest.metadata.project_name, node_package
1721
+ )
1722
+
1723
+ if resolved_doc:
1724
+ doc_blocks.append(resolved_doc.unique_id)
1725
+
1726
+ return doc_blocks
1727
+
1728
+
1729
+ # node and column descriptions
1730
+ def _process_docs_for_node(
1731
+ context: Dict[str, Any],
1732
+ node: ManifestNode,
1733
+ manifest: Manifest,
1734
+ ):
1735
+ node.doc_blocks = _get_doc_blocks(node.description, manifest, node.package_name)
1736
+ node.description = get_rendered(node.description, context)
1737
+
1738
+ for column_name, column in node.columns.items():
1739
+ column.doc_blocks = _get_doc_blocks(column.description, manifest, node.package_name)
1740
+ column.description = get_rendered(column.description, context)
1741
+
1742
+
1743
+ # source and table descriptions, column descriptions
1744
+ def _process_docs_for_source(
1745
+ context: Dict[str, Any],
1746
+ source: SourceDefinition,
1747
+ manifest: Manifest,
1748
+ ):
1749
+ source.doc_blocks = _get_doc_blocks(source.description, manifest, source.package_name)
1750
+ source.description = get_rendered(source.description, context)
1751
+
1752
+ source.source_description = get_rendered(source.source_description, context)
1753
+
1754
+ for column in source.columns.values():
1755
+ column.doc_blocks = _get_doc_blocks(column.description, manifest, source.package_name)
1756
+ column.description = get_rendered(column.description, context)
1757
+
1758
+
1759
+ # macro argument descriptions
1760
+ def _process_docs_for_macro(context: Dict[str, Any], macro: Macro) -> None:
1761
+ macro.description = get_rendered(macro.description, context)
1762
+ for arg in macro.arguments:
1763
+ arg.description = get_rendered(arg.description, context)
1764
+
1765
+
1766
+ # exposure descriptions
1767
+ def _process_docs_for_exposure(context: Dict[str, Any], exposure: Exposure) -> None:
1768
+ exposure.description = get_rendered(exposure.description, context)
1769
+
1770
+
1771
+ def _process_docs_for_metrics(context: Dict[str, Any], metric: Metric) -> None:
1772
+ metric.description = get_rendered(metric.description, context)
1773
+
1774
+
1775
+ def _process_docs_for_semantic_model(
1776
+ context: Dict[str, Any], semantic_model: SemanticModel
1777
+ ) -> None:
1778
+ if semantic_model.description:
1779
+ semantic_model.description = get_rendered(semantic_model.description, context)
1780
+
1781
+ for dimension in semantic_model.dimensions:
1782
+ if dimension.description:
1783
+ dimension.description = get_rendered(dimension.description, context)
1784
+
1785
+ for measure in semantic_model.measures:
1786
+ if measure.description:
1787
+ measure.description = get_rendered(measure.description, context)
1788
+
1789
+ for entity in semantic_model.entities:
1790
+ if entity.description:
1791
+ entity.description = get_rendered(entity.description, context)
1792
+
1793
+
1794
+ def _process_docs_for_saved_query(context: Dict[str, Any], saved_query: SavedQuery) -> None:
1795
+ if saved_query.description:
1796
+ saved_query.description = get_rendered(saved_query.description, context)
1797
+
1798
+
1799
+ def _process_refs(
1800
+ manifest: Manifest, current_project: str, node, dependencies: Optional[Mapping[str, Project]]
1801
+ ) -> None:
1802
+ """Given a manifest and node in that manifest, process its refs"""
1803
+
1804
+ dependencies = dependencies or {}
1805
+
1806
+ if isinstance(node, SeedNode):
1807
+ return
1808
+
1809
+ for ref in node.refs:
1810
+ target_model: Optional[Union[Disabled, ManifestNode]] = None
1811
+ target_model_name: str = ref.name
1812
+ target_model_package: Optional[str] = ref.package
1813
+ target_model_version: Optional[NodeVersion] = ref.version
1814
+
1815
+ if len(ref.positional_args) < 1 or len(ref.positional_args) > 2:
1816
+ raise dbt.exceptions.DbtInternalError(
1817
+ f"Refs should always be 1 or 2 arguments - got {len(ref.positional_args)}"
1818
+ )
1819
+
1820
+ target_model = manifest.resolve_ref(
1821
+ node,
1822
+ target_model_name,
1823
+ target_model_package,
1824
+ target_model_version,
1825
+ current_project,
1826
+ node.package_name,
1827
+ )
1828
+
1829
+ if target_model is None or isinstance(target_model, Disabled):
1830
+ # This may raise. Even if it doesn't, we don't want to add
1831
+ # this exposure to the graph b/c there is no destination exposure
1832
+ node.config.enabled = False
1833
+ invalid_target_fail_unless_test(
1834
+ node=node,
1835
+ target_name=target_model_name,
1836
+ target_kind="node",
1837
+ target_package=target_model_package,
1838
+ target_version=target_model_version,
1839
+ disabled=(isinstance(target_model, Disabled)),
1840
+ should_warn_if_disabled=False,
1841
+ )
1842
+
1843
+ continue
1844
+ elif manifest.is_invalid_private_ref(node, target_model, dependencies):
1845
+ raise dbt.exceptions.DbtReferenceError(
1846
+ unique_id=node.unique_id,
1847
+ ref_unique_id=target_model.unique_id,
1848
+ access=AccessType.Private,
1849
+ scope=dbt_common.utils.cast_to_str(target_model.group),
1850
+ )
1851
+ elif manifest.is_invalid_protected_ref(node, target_model, dependencies):
1852
+ raise dbt.exceptions.DbtReferenceError(
1853
+ unique_id=node.unique_id,
1854
+ ref_unique_id=target_model.unique_id,
1855
+ access=AccessType.Protected,
1856
+ scope=target_model.package_name,
1857
+ )
1858
+
1859
+ target_model_id = target_model.unique_id
1860
+ node.depends_on.add_node(target_model_id)
1861
+
1862
+
1863
+ def _process_metric_depends_on(
1864
+ manifest: Manifest,
1865
+ current_project: str,
1866
+ metric: Metric,
1867
+ ) -> None:
1868
+ """For a given metric, set the `depends_on` property"""
1869
+
1870
+ assert len(metric.type_params.input_measures) > 0
1871
+ for input_measure in metric.type_params.input_measures:
1872
+ target_semantic_model = manifest.resolve_semantic_model_for_measure(
1873
+ target_measure_name=input_measure.name,
1874
+ current_project=current_project,
1875
+ node_package=metric.package_name,
1876
+ )
1877
+ if target_semantic_model is None:
1878
+ raise dbt.exceptions.ParsingError(
1879
+ f"A semantic model having a measure `{input_measure.name}` does not exist but was referenced.",
1880
+ node=metric,
1881
+ )
1882
+ if target_semantic_model.config.enabled is False:
1883
+ raise dbt.exceptions.ParsingError(
1884
+ f"The measure `{input_measure.name}` is referenced on disabled semantic model `{target_semantic_model.name}`.",
1885
+ node=metric,
1886
+ )
1887
+
1888
+ metric.depends_on.add_node(target_semantic_model.unique_id)
1889
+
1890
+
1891
+ def _process_metric_node(
1892
+ manifest: Manifest,
1893
+ current_project: str,
1894
+ metric: Metric,
1895
+ ) -> None:
1896
+ """Sets a metric's `input_measures` and `depends_on` properties"""
1897
+
1898
+ # This ensures that if this metrics input_measures have already been set
1899
+ # we skip the work. This could happen either due to recursion or if multiple
1900
+ # metrics derive from another given metric.
1901
+ # NOTE: This does not protect against infinite loops
1902
+ if len(metric.type_params.input_measures) > 0:
1903
+ return
1904
+
1905
+ if metric.type is MetricType.SIMPLE or metric.type is MetricType.CUMULATIVE:
1906
+ assert (
1907
+ metric.type_params.measure is not None
1908
+ ), f"{metric} should have a measure defined, but it does not."
1909
+ metric.add_input_measure(metric.type_params.measure)
1910
+ _process_metric_depends_on(
1911
+ manifest=manifest, current_project=current_project, metric=metric
1912
+ )
1913
+ elif metric.type is MetricType.CONVERSION:
1914
+ conversion_type_params = metric.type_params.conversion_type_params
1915
+ assert (
1916
+ conversion_type_params
1917
+ ), f"{metric.name} is a conversion metric and must have conversion_type_params defined."
1918
+ metric.add_input_measure(conversion_type_params.base_measure)
1919
+ metric.add_input_measure(conversion_type_params.conversion_measure)
1920
+ _process_metric_depends_on(
1921
+ manifest=manifest, current_project=current_project, metric=metric
1922
+ )
1923
+ elif metric.type is MetricType.DERIVED or metric.type is MetricType.RATIO:
1924
+ input_metrics = metric.input_metrics
1925
+ if metric.type is MetricType.RATIO:
1926
+ if metric.type_params.numerator is None or metric.type_params.denominator is None:
1927
+ raise dbt.exceptions.ParsingError(
1928
+ "Invalid ratio metric. Both a numerator and denominator must be specified",
1929
+ node=metric,
1930
+ )
1931
+ input_metrics = [metric.type_params.numerator, metric.type_params.denominator]
1932
+
1933
+ for input_metric in input_metrics:
1934
+ target_metric = manifest.resolve_metric(
1935
+ target_metric_name=input_metric.name,
1936
+ target_metric_package=None,
1937
+ current_project=current_project,
1938
+ node_package=metric.package_name,
1939
+ )
1940
+
1941
+ if target_metric is None:
1942
+ raise dbt.exceptions.ParsingError(
1943
+ f"The metric `{input_metric.name}` does not exist but was referenced.",
1944
+ node=metric,
1945
+ )
1946
+ elif isinstance(target_metric, Disabled):
1947
+ raise dbt.exceptions.ParsingError(
1948
+ f"The metric `{input_metric.name}` is disabled and thus cannot be referenced.",
1949
+ node=metric,
1950
+ )
1951
+
1952
+ _process_metric_node(
1953
+ manifest=manifest, current_project=current_project, metric=target_metric
1954
+ )
1955
+ for input_measure in target_metric.type_params.input_measures:
1956
+ metric.add_input_measure(input_measure)
1957
+ metric.depends_on.add_node(target_metric.unique_id)
1958
+ else:
1959
+ assert_values_exhausted(metric.type)
1960
+
1961
+
1962
+ def _process_metrics_for_node(
1963
+ manifest: Manifest,
1964
+ current_project: str,
1965
+ node: Union[ManifestNode, Metric, Exposure, SavedQuery],
1966
+ ):
1967
+ """Given a manifest and a node in that manifest, process its metrics"""
1968
+
1969
+ metrics: List[List[str]]
1970
+ if isinstance(node, SeedNode):
1971
+ return
1972
+ elif isinstance(node, SavedQuery):
1973
+ metrics = [[metric] for metric in node.metrics]
1974
+ else:
1975
+ metrics = node.metrics
1976
+
1977
+ for metric in metrics:
1978
+ target_metric: Optional[Union[Disabled, Metric]] = None
1979
+ target_metric_name: str
1980
+ target_metric_package: Optional[str] = None
1981
+
1982
+ if len(metric) == 1:
1983
+ target_metric_name = metric[0]
1984
+ elif len(metric) == 2:
1985
+ target_metric_package, target_metric_name = metric
1986
+ else:
1987
+ raise dbt.exceptions.DbtInternalError(
1988
+ f"Metric references should always be 1 or 2 arguments - got {len(metric)}"
1989
+ )
1990
+
1991
+ target_metric = manifest.resolve_metric(
1992
+ target_metric_name,
1993
+ target_metric_package,
1994
+ current_project,
1995
+ node.package_name,
1996
+ )
1997
+
1998
+ if target_metric is None or isinstance(target_metric, Disabled):
1999
+ # This may raise. Even if it doesn't, we don't want to add
2000
+ # this node to the graph b/c there is no destination node
2001
+ node.config.enabled = False
2002
+ invalid_target_fail_unless_test(
2003
+ node=node,
2004
+ target_name=target_metric_name,
2005
+ target_kind="metric",
2006
+ target_package=target_metric_package,
2007
+ disabled=(isinstance(target_metric, Disabled)),
2008
+ )
2009
+ continue
2010
+
2011
+ target_metric_id = target_metric.unique_id
2012
+
2013
+ node.depends_on.add_node(target_metric_id)
2014
+
2015
+
2016
+ def remove_dependent_project_references(manifest, external_node_unique_id):
2017
+ for child_id in manifest.child_map[external_node_unique_id]:
2018
+ node = manifest.expect(child_id)
2019
+ # child node may have been modified and already recreated its depends_on.nodes list
2020
+ if external_node_unique_id in node.depends_on_nodes:
2021
+ node.depends_on_nodes.remove(external_node_unique_id)
2022
+ node.created_at = time.time()
2023
+
2024
+
2025
+ def _process_sources_for_exposure(manifest: Manifest, current_project: str, exposure: Exposure):
2026
+ target_source: Optional[Union[Disabled, SourceDefinition]] = None
2027
+ for source_name, table_name in exposure.sources:
2028
+ target_source = manifest.resolve_source(
2029
+ source_name,
2030
+ table_name,
2031
+ current_project,
2032
+ exposure.package_name,
2033
+ )
2034
+ if target_source is None or isinstance(target_source, Disabled):
2035
+ exposure.config.enabled = False
2036
+ invalid_target_fail_unless_test(
2037
+ node=exposure,
2038
+ target_name=f"{source_name}.{table_name}",
2039
+ target_kind="source",
2040
+ disabled=(isinstance(target_source, Disabled)),
2041
+ )
2042
+ continue
2043
+ target_source_id = target_source.unique_id
2044
+ exposure.depends_on.add_node(target_source_id)
2045
+
2046
+
2047
+ def _process_sources_for_metric(manifest: Manifest, current_project: str, metric: Metric):
2048
+ target_source: Optional[Union[Disabled, SourceDefinition]] = None
2049
+ for source_name, table_name in metric.sources:
2050
+ target_source = manifest.resolve_source(
2051
+ source_name,
2052
+ table_name,
2053
+ current_project,
2054
+ metric.package_name,
2055
+ )
2056
+ if target_source is None or isinstance(target_source, Disabled):
2057
+ metric.config.enabled = False
2058
+ invalid_target_fail_unless_test(
2059
+ node=metric,
2060
+ target_name=f"{source_name}.{table_name}",
2061
+ target_kind="source",
2062
+ disabled=(isinstance(target_source, Disabled)),
2063
+ )
2064
+ continue
2065
+ target_source_id = target_source.unique_id
2066
+ metric.depends_on.add_node(target_source_id)
2067
+
2068
+
2069
+ def _process_sources_for_node(manifest: Manifest, current_project: str, node: ManifestNode):
2070
+ if isinstance(node, SeedNode):
2071
+ return
2072
+
2073
+ target_source: Optional[Union[Disabled, SourceDefinition]] = None
2074
+ for source_name, table_name in node.sources:
2075
+ target_source = manifest.resolve_source(
2076
+ source_name,
2077
+ table_name,
2078
+ current_project,
2079
+ node.package_name,
2080
+ )
2081
+
2082
+ if target_source is None or isinstance(target_source, Disabled):
2083
+ # this follows the same pattern as refs
2084
+ node.config.enabled = False
2085
+ invalid_target_fail_unless_test(
2086
+ node=node,
2087
+ target_name=f"{source_name}.{table_name}",
2088
+ target_kind="source",
2089
+ disabled=(isinstance(target_source, Disabled)),
2090
+ )
2091
+ continue
2092
+ target_source_id = target_source.unique_id
2093
+ node.depends_on.add_node(target_source_id)
2094
+
2095
+
2096
+ def _process_functions_for_node(
2097
+ manifest: Manifest, current_project: str, node: ManifestNode
2098
+ ) -> None:
2099
+ """Given a manifest and node in that manifest, process its functions"""
2100
+
2101
+ if isinstance(node, SeedNode):
2102
+ return
2103
+
2104
+ for function_args in node.functions:
2105
+ target_function_name: str
2106
+ target_function_package: Optional[str] = None
2107
+ if len(function_args) == 1:
2108
+ target_function_name = function_args[0]
2109
+ elif len(function_args) == 2:
2110
+ target_function_package, target_function_name = function_args
2111
+ else:
2112
+ raise dbt.exceptions.DbtInternalError(
2113
+ f"Functions should always be 1 or 2 arguments - got {len(function_args)}"
2114
+ )
2115
+
2116
+ target_function = manifest.resolve_function(
2117
+ target_function_name,
2118
+ target_function_package,
2119
+ current_project,
2120
+ node.package_name,
2121
+ )
2122
+
2123
+ if target_function is None or isinstance(target_function, Disabled):
2124
+ node.config.enabled = False
2125
+ invalid_target_fail_unless_test(
2126
+ node=node,
2127
+ target_name=target_function_name,
2128
+ target_kind="function",
2129
+ target_package=target_function_package,
2130
+ disabled=(isinstance(target_function, Disabled)),
2131
+ should_warn_if_disabled=False,
2132
+ )
2133
+
2134
+ continue
2135
+
2136
+ node.depends_on.add_node(target_function.unique_id)
2137
+
2138
+
2139
+ # This is called in task.rpc.sql_commands when a "dynamic" node is
2140
+ # created in the manifest, in 'add_refs'
2141
+ def process_macro(config: RuntimeConfig, manifest: Manifest, macro: Macro) -> None:
2142
+ ctx = generate_runtime_docs_context(
2143
+ config,
2144
+ macro,
2145
+ manifest,
2146
+ config.project_name,
2147
+ )
2148
+ _process_docs_for_macro(ctx, macro)
2149
+
2150
+
2151
+ # This is called in task.rpc.sql_commands when a "dynamic" node is
2152
+ # created in the manifest, in 'add_refs'
2153
+ def process_node(config: RuntimeConfig, manifest: Manifest, node: ManifestNode):
2154
+ _process_sources_for_node(manifest, config.project_name, node)
2155
+ _process_refs(manifest, config.project_name, node, config.dependencies)
2156
+ ctx = generate_runtime_docs_context(config, node, manifest, config.project_name)
2157
+ _process_docs_for_node(ctx, node, manifest)
2158
+
2159
+
2160
+ def write_semantic_manifest(manifest: Manifest, target_path: str) -> None:
2161
+ path = os.path.join(target_path, SEMANTIC_MANIFEST_FILE_NAME)
2162
+ semantic_manifest = SemanticManifest(manifest)
2163
+ semantic_manifest.write_json_to_file(path)
2164
+
2165
+
2166
+ def write_manifest(manifest: Manifest, target_path: str, which: Optional[str] = None):
2167
+ file_name = MANIFEST_FILE_NAME
2168
+ path = os.path.join(target_path, file_name)
2169
+ manifest.write(path)
2170
+ add_artifact_produced(path)
2171
+
2172
+ write_semantic_manifest(manifest=manifest, target_path=target_path)
2173
+
2174
+
2175
+ def parse_manifest(
2176
+ runtime_config: RuntimeConfig,
2177
+ write_perf_info: bool,
2178
+ write: bool,
2179
+ write_json: bool,
2180
+ active_integrations: List[Optional[CatalogWriteIntegrationConfig]],
2181
+ ) -> Manifest:
2182
+ register_adapter(runtime_config, get_mp_context())
2183
+ adapter = get_adapter(runtime_config)
2184
+ adapter.set_macro_context_generator(generate_runtime_macro_context)
2185
+ for integration in active_integrations:
2186
+ adapter.add_catalog_integration(integration)
2187
+ manifest = ManifestLoader.get_full_manifest(
2188
+ runtime_config,
2189
+ write_perf_info=write_perf_info,
2190
+ )
2191
+
2192
+ # If we should (over)write the manifest in the target path, do that now
2193
+ if write and write_json:
2194
+ write_manifest(manifest, runtime_config.project_target_path)
2195
+ pm = plugins.get_plugin_manager(runtime_config.project_name)
2196
+ plugin_artifacts = pm.get_manifest_artifacts(manifest)
2197
+ for path, plugin_artifact in plugin_artifacts.items():
2198
+ plugin_artifact.write(path)
2199
+ fire_event(
2200
+ ArtifactWritten(
2201
+ artifact_type=plugin_artifact.__class__.__name__, artifact_path=path
2202
+ )
2203
+ )
2204
+ return manifest