dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/parser/partial.py ADDED
@@ -0,0 +1,1179 @@
1
+ import os
2
+ from copy import deepcopy
3
+ from typing import Callable, Dict, List, MutableMapping, Union
4
+
5
+ from dvt.constants import DEFAULT_ENV_PLACEHOLDER
6
+ from dvt.contracts.files import (
7
+ AnySourceFile,
8
+ ParseFileType,
9
+ SchemaSourceFile,
10
+ SourceFile,
11
+ parse_file_type_to_parser,
12
+ )
13
+ from dvt.contracts.graph.manifest import Manifest
14
+ from dvt.contracts.graph.nodes import AnalysisNode, ModelNode, SeedNode, SnapshotNode
15
+ from dvt.events.types import PartialParsingEnabled, PartialParsingFile
16
+ from dvt.node_types import NodeType
17
+
18
+ from dbt_common.context import get_invocation_context
19
+ from dbt_common.events.base_types import EventLevel
20
+ from dbt_common.events.functions import fire_event
21
+
22
+ mssat_files = (
23
+ ParseFileType.Model,
24
+ ParseFileType.Seed,
25
+ ParseFileType.Snapshot,
26
+ ParseFileType.Analysis,
27
+ ParseFileType.SingularTest,
28
+ )
29
+
30
+ mg_files = (
31
+ ParseFileType.Macro,
32
+ ParseFileType.GenericTest,
33
+ )
34
+
35
+
36
+ key_to_prefix = {
37
+ "models": "model",
38
+ "seeds": "seed",
39
+ "snapshots": "snapshot",
40
+ "analyses": "analysis",
41
+ "sources": "source",
42
+ }
43
+
44
+
45
+ parse_file_type_to_key = {
46
+ ParseFileType.Model: "models",
47
+ ParseFileType.Seed: "seeds",
48
+ ParseFileType.Snapshot: "snapshots",
49
+ ParseFileType.Analysis: "analyses",
50
+ }
51
+
52
+
53
+ # These macro names have special treatment in the ManifestLoader and
54
+ # partial parsing. If they have changed we will skip partial parsing
55
+ special_override_macros = [
56
+ "ref",
57
+ "source",
58
+ "config",
59
+ "generate_schema_name",
60
+ "generate_database_name",
61
+ "generate_alias_name",
62
+ ]
63
+
64
+
65
+ # Partial parsing. Create a diff of files from saved manifest and current
66
+ # files and produce a project_parser_file dictionary to drive parsing of
67
+ # only the necessary changes.
68
+ # Will produce a 'skip_parsing' method, and a project_parser_file dictionary
69
+ # All file objects from the new manifest are deepcopied, because we need
70
+ # to preserve an unchanged file object in case we need to drop back to a
71
+ # a full parse (such as for certain macro changes)
72
+ class PartialParsing:
73
+ def __init__(
74
+ self, saved_manifest: Manifest, new_files: MutableMapping[str, AnySourceFile]
75
+ ) -> None:
76
+ self.saved_manifest = saved_manifest
77
+ self.new_files = new_files
78
+ self.project_parser_files: Dict = {}
79
+ self.saved_files = self.saved_manifest.files
80
+ self.project_parser_files = {}
81
+ self.macro_child_map: Dict[str, List[str]] = {}
82
+ (
83
+ self.env_vars_changed_source_files,
84
+ self.env_vars_changed_schema_files,
85
+ ) = self.build_env_vars_to_files()
86
+ self.build_file_diff()
87
+ self.processing_file = None
88
+ self.deleted_special_override_macro = False
89
+ self.disabled_by_file_id = self.saved_manifest.build_disabled_by_file_id()
90
+
91
+ def skip_parsing(self):
92
+ return (
93
+ not self.file_diff["deleted"]
94
+ and not self.file_diff["added"]
95
+ and not self.file_diff["changed"]
96
+ and not self.file_diff["changed_schema_files"]
97
+ and not self.file_diff["deleted_schema_files"]
98
+ )
99
+
100
+ # Compare the previously saved manifest files and the just-loaded manifest
101
+ # files to see if anything changed
102
+ def build_file_diff(self):
103
+ saved_file_ids = set(self.saved_files.keys())
104
+ new_file_ids = set(self.new_files.keys())
105
+ deleted_all_files = saved_file_ids.difference(new_file_ids)
106
+ added = new_file_ids.difference(saved_file_ids)
107
+ common = saved_file_ids.intersection(new_file_ids)
108
+ changed_or_deleted_macro_file = False
109
+
110
+ # separate out deleted schema files
111
+ deleted_schema_files = []
112
+ deleted = []
113
+ for file_id in deleted_all_files:
114
+ if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
115
+ deleted_schema_files.append(file_id)
116
+ else:
117
+ if self.saved_files[file_id].parse_file_type in mg_files:
118
+ changed_or_deleted_macro_file = True
119
+ deleted.append(file_id)
120
+
121
+ changed = []
122
+ changed_schema_files = []
123
+ unchanged = []
124
+ for file_id in common:
125
+ if self.saved_files[file_id].checksum == self.new_files[file_id].checksum:
126
+ unchanged.append(file_id)
127
+ else:
128
+ # separate out changed schema files
129
+ if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
130
+ sf = self.saved_files[file_id]
131
+ if type(sf).__name__ != "SchemaSourceFile":
132
+ raise Exception(f"Serialization failure for {file_id}")
133
+ changed_schema_files.append(file_id)
134
+ else:
135
+ if self.saved_files[file_id].parse_file_type in mg_files:
136
+ changed_or_deleted_macro_file = True
137
+ changed.append(file_id)
138
+
139
+ # handle changed env_vars for non-schema-files
140
+ for file_id in self.env_vars_changed_source_files:
141
+ if file_id in deleted or file_id in changed:
142
+ continue
143
+ changed.append(file_id)
144
+
145
+ # handle changed env_vars for schema files
146
+ for file_id in self.env_vars_changed_schema_files.keys():
147
+ if file_id in deleted_schema_files or file_id in changed_schema_files:
148
+ continue
149
+ changed_schema_files.append(file_id)
150
+
151
+ file_diff = {
152
+ "deleted": deleted,
153
+ "deleted_schema_files": deleted_schema_files,
154
+ "added": added,
155
+ "changed": changed,
156
+ "changed_schema_files": changed_schema_files,
157
+ "unchanged": unchanged,
158
+ }
159
+ if changed_or_deleted_macro_file:
160
+ self.macro_child_map = self.saved_manifest.build_macro_child_map()
161
+ deleted = len(deleted) + len(deleted_schema_files)
162
+ changed = len(changed) + len(changed_schema_files)
163
+ event = PartialParsingEnabled(deleted=deleted, added=len(added), changed=changed)
164
+
165
+ if get_invocation_context().env.get("DBT_PP_TEST"):
166
+ fire_event(event, level=EventLevel.INFO)
167
+ else:
168
+ fire_event(event)
169
+ self.file_diff = file_diff
170
+
171
+ # generate the list of files that need parsing
172
+ # uses self.manifest.files generated by 'read_files'
173
+ def get_parsing_files(self):
174
+ if self.skip_parsing():
175
+ return {}
176
+ # Need to add new files first, because changes in schema files
177
+ # might refer to them
178
+ for file_id in self.file_diff["added"]:
179
+ self.processing_file = file_id
180
+ self.add_to_saved(file_id)
181
+ # Need to process schema files next, because the dictionaries
182
+ # need to be in place for handling SQL file changes
183
+ # The reverse sort here is just to ensure that the schema file
184
+ # processing order test case works, because otherwise the order
185
+ # of processing the schema files is not guaranteed.
186
+ self.file_diff["changed_schema_files"].sort(reverse=True)
187
+ for file_id in self.file_diff["changed_schema_files"]:
188
+ self.processing_file = file_id
189
+ self.change_schema_file(file_id)
190
+ for file_id in self.file_diff["deleted_schema_files"]:
191
+ self.processing_file = file_id
192
+ self.delete_schema_file(file_id)
193
+ for file_id in self.file_diff["deleted"]:
194
+ self.processing_file = file_id
195
+ self.delete_from_saved(file_id)
196
+ for file_id in self.file_diff["changed"]:
197
+ self.processing_file = file_id
198
+ self.update_in_saved(file_id)
199
+ return self.project_parser_files
200
+
201
+ # Add the file to the project parser dictionaries to schedule parsing
202
+ def add_to_pp_files(self, source_file):
203
+ file_id = source_file.file_id
204
+ parser_name = parse_file_type_to_parser[source_file.parse_file_type]
205
+ project_name = source_file.project_name
206
+ if not parser_name or not project_name:
207
+ raise Exception(
208
+ f"Did not find parse_file_type or project_name "
209
+ f"in SourceFile for {source_file.file_id}"
210
+ )
211
+ if project_name not in self.project_parser_files:
212
+ self.project_parser_files[project_name] = {}
213
+ if parser_name not in self.project_parser_files[project_name]:
214
+ self.project_parser_files[project_name][parser_name] = []
215
+ if (
216
+ file_id not in self.project_parser_files[project_name][parser_name]
217
+ and file_id not in self.file_diff["deleted"]
218
+ and file_id not in self.file_diff["deleted_schema_files"]
219
+ ):
220
+ self.project_parser_files[project_name][parser_name].append(file_id)
221
+
222
+ def already_scheduled_for_parsing(self, source_file):
223
+ file_id = source_file.file_id
224
+ project_name = source_file.project_name
225
+ if project_name not in self.project_parser_files:
226
+ return False
227
+ parser_name = parse_file_type_to_parser[source_file.parse_file_type]
228
+ if parser_name not in self.project_parser_files[project_name]:
229
+ return False
230
+ if file_id not in self.project_parser_files[project_name][parser_name]:
231
+ return False
232
+ return True
233
+
234
+ # Add new files, including schema files
235
+ def add_to_saved(self, file_id):
236
+ # add file object to saved manifest.files
237
+ source_file = deepcopy(self.new_files[file_id])
238
+ if source_file.parse_file_type == ParseFileType.Schema:
239
+ self.handle_added_schema_file(source_file)
240
+ self.saved_files[file_id] = source_file
241
+ # update pp_files to parse
242
+ self.add_to_pp_files(source_file)
243
+ fire_event(PartialParsingFile(operation="added", file_id=file_id))
244
+
245
+ def handle_added_schema_file(self, source_file):
246
+ source_file.pp_dict = source_file.dict_from_yaml.copy()
247
+ if "sources" in source_file.pp_dict:
248
+ for source in source_file.pp_dict["sources"]:
249
+ # We need to remove the original source, so it can
250
+ # be properly patched
251
+ if "overrides" in source:
252
+ self.remove_source_override_target(source)
253
+ if "models" in source_file.pp_dict:
254
+ for model in source_file.pp_dict["models"]:
255
+ if "versions" in model:
256
+ self.versioned_model_delete_schema_mssa_links(source_file, "models", model)
257
+
258
+ def delete_disabled(self, unique_id, file_id):
259
+ # This node/metric/exposure is disabled. Find it and remove it from disabled dictionary.
260
+ for dis_index, dis_node in enumerate(self.saved_manifest.disabled[unique_id]):
261
+ if dis_node.file_id == file_id:
262
+ node = dis_node
263
+ index = dis_index
264
+ break
265
+ # Remove node from disabled
266
+ del self.saved_manifest.disabled[unique_id][index]
267
+ # if all nodes were removed for the unique id, delete the unique_id
268
+ # from the disabled dict
269
+ if not self.saved_manifest.disabled[unique_id]:
270
+ self.saved_manifest.disabled.pop(unique_id)
271
+
272
+ return node
273
+
274
+ # Deletes for all non-schema files
275
+ def delete_from_saved(self, file_id):
276
+ # Look at all things touched by file, remove those
277
+ # nodes, and update pp_files to parse unless the
278
+ # file creating those nodes has also been deleted
279
+ saved_source_file = self.saved_files[file_id]
280
+
281
+ # SQL file: models, seeds, snapshots, analyses, tests: SQL files, except
282
+ # macros/tests
283
+ if saved_source_file.parse_file_type in mssat_files:
284
+ self.remove_mssat_file(saved_source_file)
285
+ self.saved_manifest.files.pop(file_id)
286
+
287
+ # macros
288
+ if saved_source_file.parse_file_type in mg_files:
289
+ self.delete_macro_file(saved_source_file, follow_references=True)
290
+
291
+ # docs
292
+ if saved_source_file.parse_file_type == ParseFileType.Documentation:
293
+ self.delete_doc_node(saved_source_file)
294
+
295
+ # fixtures
296
+ if saved_source_file.parse_file_type == ParseFileType.Fixture:
297
+ self.delete_fixture_node(saved_source_file)
298
+
299
+ fire_event(PartialParsingFile(operation="deleted", file_id=file_id))
300
+
301
+ # Updates for non-schema files
302
+ def update_in_saved(self, file_id):
303
+ new_source_file = deepcopy(self.new_files[file_id])
304
+ old_source_file = self.saved_files[file_id]
305
+
306
+ if new_source_file.parse_file_type in mssat_files:
307
+ self.update_mssat_in_saved(new_source_file, old_source_file)
308
+ elif new_source_file.parse_file_type in mg_files:
309
+ self.update_macro_in_saved(new_source_file, old_source_file)
310
+ elif new_source_file.parse_file_type == ParseFileType.Documentation:
311
+ self.update_doc_in_saved(new_source_file, old_source_file)
312
+ elif new_source_file.parse_file_type == ParseFileType.Fixture:
313
+ self.update_fixture_in_saved(new_source_file, old_source_file)
314
+ else:
315
+ raise Exception(f"Invalid parse_file_type in source_file {file_id}")
316
+ fire_event(PartialParsingFile(operation="updated", file_id=file_id))
317
+
318
+ # Models, seeds, snapshots: patches and tests
319
+ # analyses: patches, no tests
320
+ # tests: not touched by schema files (no patches, no tests)
321
+ # Updated schema files should have been processed already.
322
+ def update_mssat_in_saved(self, new_source_file, old_source_file):
323
+
324
+ if self.already_scheduled_for_parsing(old_source_file):
325
+ return
326
+
327
+ # These files only have one node except for snapshots
328
+ unique_ids = []
329
+ if old_source_file.nodes:
330
+ unique_ids = old_source_file.nodes
331
+
332
+ # replace source_file in saved and add to parsing list
333
+ file_id = new_source_file.file_id
334
+ self.saved_files[file_id] = deepcopy(new_source_file)
335
+ self.add_to_pp_files(new_source_file)
336
+ for unique_id in unique_ids:
337
+ self.remove_node_in_saved(new_source_file, unique_id)
338
+
339
+ def remove_node_in_saved(self, source_file, unique_id):
340
+ if unique_id in self.saved_manifest.nodes:
341
+ # delete node in saved
342
+ node = self.saved_manifest.nodes.pop(unique_id)
343
+ elif (
344
+ source_file.file_id in self.disabled_by_file_id
345
+ and unique_id in self.saved_manifest.disabled
346
+ ):
347
+ # This node is disabled. Find the node and remove it from disabled dictionary.
348
+ node = self.delete_disabled(unique_id, source_file.file_id)
349
+ else:
350
+ # Has already been deleted by another action
351
+ return
352
+
353
+ # look at patch_path in model node to see if we need
354
+ # to reapply a patch from a schema_file.
355
+ if node.patch_path:
356
+ file_id = node.patch_path
357
+ # it might be changed... then what?
358
+ if (
359
+ file_id not in self.file_diff["deleted"]
360
+ and file_id in self.saved_files
361
+ and source_file.parse_file_type in parse_file_type_to_key
362
+ ):
363
+ # Schema files should already be updated if this comes from a node,
364
+ # but this code is also called when updating groups and exposures.
365
+ # This might save the old schema file element, so when the schema file
366
+ # is processed, it should overwrite it by passing True to "merge_patch"
367
+ schema_file = self.saved_files[file_id]
368
+ dict_key = parse_file_type_to_key[source_file.parse_file_type]
369
+ # look for a matching list dictionary
370
+ elem_patch = None
371
+ if dict_key in schema_file.dict_from_yaml:
372
+ for elem in schema_file.dict_from_yaml[dict_key]:
373
+ if elem["name"] == node.name:
374
+ elem_patch = elem
375
+ break
376
+ if elem_patch:
377
+ self.delete_schema_mssa_links(schema_file, dict_key, elem_patch)
378
+ self.merge_patch(schema_file, dict_key, elem_patch)
379
+ if unique_id in schema_file.node_patches:
380
+ schema_file.node_patches.remove(unique_id)
381
+ if unique_id in self.saved_manifest.disabled:
382
+ # We have a patch_path in disabled nodes with a patch so
383
+ # that we can connect the patch to the node
384
+ for node in self.saved_manifest.disabled[unique_id]:
385
+ node.patch_path = None
386
+
387
+ def update_macro_in_saved(self, new_source_file, old_source_file):
388
+ if self.already_scheduled_for_parsing(old_source_file):
389
+ return
390
+ self.handle_macro_file_links(old_source_file, follow_references=True)
391
+ file_id = new_source_file.file_id
392
+ self.saved_files[file_id] = deepcopy(new_source_file)
393
+ self.add_to_pp_files(new_source_file)
394
+
395
+ def update_doc_in_saved(self, new_source_file, old_source_file):
396
+ if self.already_scheduled_for_parsing(old_source_file):
397
+ return
398
+ self.delete_doc_node(old_source_file)
399
+ self.saved_files[new_source_file.file_id] = deepcopy(new_source_file)
400
+ self.add_to_pp_files(new_source_file)
401
+
402
+ def update_fixture_in_saved(self, new_source_file, old_source_file):
403
+ if self.already_scheduled_for_parsing(old_source_file):
404
+ return
405
+ self.delete_fixture_node(old_source_file)
406
+ self.saved_files[new_source_file.file_id] = deepcopy(new_source_file)
407
+ self.add_to_pp_files(new_source_file)
408
+
409
+ def remove_mssat_file(self, source_file: AnySourceFile):
410
+ # nodes [unique_ids] -- SQL files
411
+ # There should always be a node for a SQL file
412
+ if not isinstance(source_file, SourceFile) or not source_file.nodes:
413
+ return
414
+ # There is generally only 1 node for SQL files, except for macros and snapshots
415
+ for unique_id in source_file.nodes:
416
+ self.remove_node_in_saved(source_file, unique_id)
417
+ self.schedule_referencing_nodes_for_parsing(unique_id)
418
+
419
+ # We need to re-parse nodes that reference another removed node
420
+ def schedule_referencing_nodes_for_parsing(self, unique_id):
421
+ # Look at "children", i.e. nodes that reference this node
422
+ if unique_id in self.saved_manifest.child_map:
423
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
424
+
425
+ def schedule_nodes_for_parsing(self, unique_ids):
426
+ for unique_id in unique_ids:
427
+ if unique_id in self.saved_manifest.nodes:
428
+ node = self.saved_manifest.nodes[unique_id]
429
+ if node.resource_type == NodeType.Test and node.test_node_type == "generic":
430
+ # test nodes are handled separately. Must be removed from schema file
431
+ continue
432
+ file_id = node.file_id
433
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
434
+ source_file = self.saved_files[file_id]
435
+ self.remove_mssat_file(source_file)
436
+ # content of non-schema files is only in new files
437
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
438
+ self.add_to_pp_files(self.saved_files[file_id])
439
+ elif unique_id in self.saved_manifest.sources:
440
+ source = self.saved_manifest.sources[unique_id]
441
+ self._schedule_for_parsing(
442
+ "sources", source, source.source_name, self.delete_schema_source
443
+ )
444
+ elif unique_id in self.saved_manifest.exposures:
445
+ exposure = self.saved_manifest.exposures[unique_id]
446
+ self._schedule_for_parsing(
447
+ "exposures", exposure, exposure.name, self.delete_schema_exposure
448
+ )
449
+ elif unique_id in self.saved_manifest.metrics:
450
+ metric = self.saved_manifest.metrics[unique_id]
451
+ self._schedule_for_parsing(
452
+ "metrics", metric, metric.name, self.delete_schema_metric
453
+ )
454
+ elif unique_id in self.saved_manifest.semantic_models:
455
+ semantic_model = self.saved_manifest.semantic_models[unique_id]
456
+ self._schedule_for_parsing(
457
+ "semantic_models",
458
+ semantic_model,
459
+ semantic_model.name,
460
+ self.delete_schema_semantic_model,
461
+ )
462
+ elif unique_id in self.saved_manifest.saved_queries:
463
+ saved_query = self.saved_manifest.saved_queries[unique_id]
464
+ self._schedule_for_parsing(
465
+ "saved_queries", saved_query, saved_query.name, self.delete_schema_saved_query
466
+ )
467
+ elif unique_id in self.saved_manifest.macros:
468
+ macro = self.saved_manifest.macros[unique_id]
469
+ file_id = macro.file_id
470
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
471
+ source_file = self.saved_files[file_id]
472
+ self.delete_macro_file(source_file)
473
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
474
+ self.add_to_pp_files(self.saved_files[file_id])
475
+ elif unique_id in self.saved_manifest.unit_tests:
476
+ unit_test = self.saved_manifest.unit_tests[unique_id]
477
+ self._schedule_for_parsing(
478
+ "unit_tests", unit_test, unit_test.name, self.delete_schema_unit_test
479
+ )
480
+
481
+ def _schedule_for_parsing(self, dict_key: str, element, name, delete: Callable) -> None:
482
+ file_id = element.file_id
483
+ if (
484
+ file_id in self.saved_files
485
+ and file_id not in self.file_diff["deleted"]
486
+ and file_id not in self.file_diff["deleted_schema_files"]
487
+ ):
488
+ schema_file = self.saved_files[file_id]
489
+ elements = []
490
+ assert isinstance(schema_file, SchemaSourceFile)
491
+ if dict_key in schema_file.dict_from_yaml:
492
+ elements = schema_file.dict_from_yaml[dict_key]
493
+ schema_element = self.get_schema_element(elements, name)
494
+ if schema_element:
495
+ delete(schema_file, schema_element)
496
+ self.merge_patch(schema_file, dict_key, schema_element)
497
+
498
+ def delete_macro_file(self, source_file, follow_references=False):
499
+ self.check_for_special_deleted_macros(source_file)
500
+ self.handle_macro_file_links(source_file, follow_references)
501
+ file_id = source_file.file_id
502
+ # It's not clear when this file_id would not exist in saved_files
503
+ if file_id in self.saved_files:
504
+ self.saved_files.pop(file_id)
505
+
506
+ def check_for_special_deleted_macros(self, source_file):
507
+ for unique_id in source_file.macros:
508
+ if unique_id in self.saved_manifest.macros:
509
+ package_name = unique_id.split(".")[1]
510
+ if package_name == "dbt":
511
+ continue
512
+ macro = self.saved_manifest.macros[unique_id]
513
+ if macro.name in special_override_macros:
514
+ self.deleted_special_override_macro = True
515
+
516
+ def recursively_gather_macro_references(self, macro_unique_id, referencing_nodes):
517
+ for unique_id in self.macro_child_map[macro_unique_id]:
518
+ if unique_id in referencing_nodes:
519
+ continue
520
+ referencing_nodes.append(unique_id)
521
+ if unique_id.startswith("macro."):
522
+ self.recursively_gather_macro_references(unique_id, referencing_nodes)
523
+
524
+ def handle_macro_file_links(self, source_file, follow_references=False):
525
+ # remove the macros in the 'macros' dictionary
526
+ macros = source_file.macros.copy()
527
+ for unique_id in macros:
528
+ if unique_id not in self.saved_manifest.macros:
529
+ # This happens when a macro has already been removed
530
+ if unique_id in source_file.macros:
531
+ source_file.macros.remove(unique_id)
532
+ continue
533
+
534
+ base_macro = self.saved_manifest.macros.pop(unique_id)
535
+
536
+ # Recursively check children of this macro
537
+ # The macro_child_map might not exist if a macro is removed by
538
+ # schedule_nodes_for parsing. We only want to follow
539
+ # references if the macro file itself has been updated or
540
+ # deleted, not if we're just updating referenced nodes.
541
+ if self.macro_child_map and follow_references:
542
+ referencing_nodes = []
543
+ self.recursively_gather_macro_references(unique_id, referencing_nodes)
544
+ self.schedule_macro_nodes_for_parsing(referencing_nodes)
545
+
546
+ if base_macro.patch_path:
547
+ file_id = base_macro.patch_path
548
+ if file_id in self.saved_files:
549
+ schema_file = self.saved_files[file_id]
550
+ macro_patches = []
551
+ if "macros" in schema_file.dict_from_yaml:
552
+ macro_patches = schema_file.dict_from_yaml["macros"]
553
+ macro_patch = self.get_schema_element(macro_patches, base_macro.name)
554
+ self.delete_schema_macro_patch(schema_file, macro_patch)
555
+ self.merge_patch(schema_file, "macros", macro_patch)
556
+ # The macro may have already been removed by handling macro children
557
+ if unique_id in source_file.macros:
558
+ source_file.macros.remove(unique_id)
559
+
560
+ # similar to schedule_nodes_for_parsing but doesn't do sources and exposures
561
+ # and handles schema tests
562
+ def schedule_macro_nodes_for_parsing(self, unique_ids):
563
+ for unique_id in unique_ids:
564
+ if unique_id in self.saved_manifest.nodes:
565
+ node = self.saved_manifest.nodes[unique_id]
566
+ # Both generic tests from yaml files and singular tests have NodeType.Test
567
+ # so check for generic test.
568
+ if node.resource_type == NodeType.Test and node.test_node_type == "generic":
569
+ schema_file_id = node.file_id
570
+ schema_file = self.saved_manifest.files[schema_file_id]
571
+ (key, name) = schema_file.get_key_and_name_for_test(node.unique_id)
572
+ if key and name:
573
+ patch_list = []
574
+ if key in schema_file.dict_from_yaml:
575
+ patch_list = schema_file.dict_from_yaml[key]
576
+ patch = self.get_schema_element(patch_list, name)
577
+ if patch:
578
+ if key in ["models", "seeds", "snapshots"]:
579
+ self.delete_schema_mssa_links(schema_file, key, patch)
580
+ self.merge_patch(schema_file, key, patch)
581
+ if unique_id in schema_file.node_patches:
582
+ schema_file.node_patches.remove(unique_id)
583
+ elif key == "sources":
584
+ # re-schedule source
585
+ if "overrides" in patch:
586
+ # This is a source patch; need to re-parse orig source
587
+ self.remove_source_override_target(patch)
588
+ self.delete_schema_source(schema_file, patch)
589
+ self.merge_patch(schema_file, "sources", patch)
590
+ else:
591
+ file_id = node.file_id
592
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
593
+ source_file = self.saved_files[file_id]
594
+ self.remove_mssat_file(source_file)
595
+ # content of non-schema files is only in new files
596
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
597
+ self.add_to_pp_files(self.saved_files[file_id])
598
+ elif unique_id in self.saved_manifest.macros:
599
+ macro = self.saved_manifest.macros[unique_id]
600
+ file_id = macro.file_id
601
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
602
+ source_file = self.saved_files[file_id]
603
+ self.delete_macro_file(source_file)
604
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
605
+ self.add_to_pp_files(self.saved_files[file_id])
606
+
607
+ def delete_doc_node(self, source_file):
608
+ # remove the nodes in the 'docs' dictionary
609
+ docs = source_file.docs.copy()
610
+ for unique_id in docs:
611
+ self.saved_manifest.docs.pop(unique_id)
612
+ source_file.docs.remove(unique_id)
613
+ # The unique_id of objects that contain a doc call are stored in the
614
+ # doc source_file.nodes
615
+ self.schedule_nodes_for_parsing(source_file.nodes)
616
+ source_file.nodes = []
617
+ # Remove the file object
618
+ self.saved_manifest.files.pop(source_file.file_id)
619
+
620
+ def delete_fixture_node(self, source_file):
621
+ # remove fixtures from the "fixtures" dictionary
622
+ fixture_unique_id = source_file.fixture
623
+ self.saved_manifest.fixtures.pop(fixture_unique_id)
624
+ unit_tests = source_file.unit_tests.copy()
625
+ for unique_id in unit_tests:
626
+ unit_test = self.saved_manifest.unit_tests.pop(unique_id)
627
+ # schedule unit_test for parsing
628
+ self._schedule_for_parsing(
629
+ "unit_tests", unit_test, unit_test.name, self.delete_schema_unit_test
630
+ )
631
+ source_file.unit_tests.remove(unique_id)
632
+ self.saved_manifest.files.pop(source_file.file_id)
633
+
634
+ # Schema files -----------------------
635
+ # Changed schema files
636
+ def change_schema_file(self, file_id):
637
+ saved_schema_file = self.saved_files[file_id]
638
+ new_schema_file = deepcopy(self.new_files[file_id])
639
+ saved_yaml_dict = saved_schema_file.dict_from_yaml
640
+ new_yaml_dict = new_schema_file.dict_from_yaml
641
+ if saved_schema_file.pp_dict is None:
642
+ saved_schema_file.pp_dict = {}
643
+ self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
644
+
645
+ # copy from new schema_file to saved_schema_file to preserve references
646
+ # that weren't removed
647
+ saved_schema_file.contents = new_schema_file.contents
648
+ saved_schema_file.checksum = new_schema_file.checksum
649
+ saved_schema_file.dfy = new_schema_file.dfy
650
+ # schedule parsing
651
+ self.add_to_pp_files(saved_schema_file)
652
+ # schema_file pp_dict should have been generated already
653
+ fire_event(PartialParsingFile(operation="updated", file_id=file_id))
654
+
655
+ # Delete schema files -- a variation on change_schema_file
656
+ def delete_schema_file(self, file_id):
657
+ saved_schema_file = self.saved_files[file_id]
658
+ saved_yaml_dict = saved_schema_file.dict_from_yaml
659
+ new_yaml_dict = {}
660
+ self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
661
+ self.saved_manifest.files.pop(file_id)
662
+
663
+ # For each key in a schema file dictionary, process the changed, deleted, and added
664
+ # elements for the key lists
665
+ def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict):
666
+ # loop through comparing previous dict_from_yaml with current dict_from_yaml
667
+ # Need to do the deleted/added/changed thing, just like the files lists
668
+
669
+ env_var_changes = {}
670
+ if schema_file.file_id in self.env_vars_changed_schema_files:
671
+ env_var_changes = self.env_vars_changed_schema_files[schema_file.file_id]
672
+
673
+ # models, seeds, snapshots, analyses
674
+ for dict_key in ["models", "seeds", "snapshots", "analyses"]:
675
+ key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
676
+ if key_diff["changed"]:
677
+ for elem in key_diff["changed"]:
678
+ if dict_key == "snapshots" and "relation" in elem:
679
+ self.delete_yaml_snapshot(schema_file, elem)
680
+ self.delete_schema_mssa_links(schema_file, dict_key, elem)
681
+ self.merge_patch(schema_file, dict_key, elem, True)
682
+ if key_diff["deleted"]:
683
+ for elem in key_diff["deleted"]:
684
+ if dict_key == "snapshots" and "relation" in elem:
685
+ self.delete_yaml_snapshot(schema_file, elem)
686
+ self.delete_schema_mssa_links(schema_file, dict_key, elem)
687
+ if key_diff["added"]:
688
+ for elem in key_diff["added"]:
689
+ if dict_key == "models" and "versions" in elem:
690
+ self.versioned_model_delete_schema_mssa_links(schema_file, dict_key, elem)
691
+ self.merge_patch(schema_file, dict_key, elem, True)
692
+ # Handle schema file updates due to env_var changes
693
+ if dict_key in env_var_changes and dict_key in new_yaml_dict:
694
+ for name in env_var_changes[dict_key]:
695
+ if name in key_diff["changed_or_deleted_names"]:
696
+ continue
697
+ elem = self.get_schema_element(new_yaml_dict[dict_key], name)
698
+ if elem:
699
+ if dict_key == "snapshots" and "relation" in elem:
700
+ self.delete_yaml_snapshot(schema_file, elem)
701
+ self.delete_schema_mssa_links(schema_file, dict_key, elem)
702
+ self.merge_patch(schema_file, dict_key, elem, True)
703
+
704
+ # sources
705
+ dict_key = "sources"
706
+ source_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
707
+ if source_diff["changed"]:
708
+ for source in source_diff["changed"]:
709
+ if "overrides" in source: # This is a source patch; need to re-parse orig source
710
+ self.remove_source_override_target(source)
711
+ self.delete_schema_source(schema_file, source)
712
+ self.merge_patch(schema_file, dict_key, source, True)
713
+ if source_diff["deleted"]:
714
+ for source in source_diff["deleted"]:
715
+ if "overrides" in source: # This is a source patch; need to re-parse orig source
716
+ self.remove_source_override_target(source)
717
+ self.delete_schema_source(schema_file, source)
718
+ if source_diff["added"]:
719
+ for source in source_diff["added"]:
720
+ if "overrides" in source: # This is a source patch; need to re-parse orig source
721
+ self.remove_source_override_target(source)
722
+ self.merge_patch(schema_file, dict_key, source, True)
723
+ # Handle schema file updates due to env_var changes
724
+ if dict_key in env_var_changes and dict_key in new_yaml_dict:
725
+ for name in env_var_changes[dict_key]:
726
+ if name in source_diff["changed_or_deleted_names"]:
727
+ continue
728
+ source = self.get_schema_element(new_yaml_dict[dict_key], name)
729
+ if source:
730
+ if "overrides" in source:
731
+ self.remove_source_override_target(source)
732
+ self.delete_schema_source(schema_file, source)
733
+ self.merge_patch(schema_file, dict_key, source, True)
734
+
735
+ def handle_change(key: str, delete: Callable):
736
+ self._handle_element_change(
737
+ schema_file, saved_yaml_dict, new_yaml_dict, env_var_changes, key, delete
738
+ )
739
+
740
+ handle_change("macros", self.delete_schema_macro_patch)
741
+ handle_change("exposures", self.delete_schema_exposure)
742
+ handle_change("metrics", self.delete_schema_metric)
743
+ handle_change("groups", self.delete_schema_group)
744
+ handle_change("semantic_models", self.delete_schema_semantic_model)
745
+ handle_change("unit_tests", self.delete_schema_unit_test)
746
+ handle_change("saved_queries", self.delete_schema_saved_query)
747
+ handle_change("data_tests", self.delete_schema_data_test_patch)
748
+
749
+ def _handle_element_change(
750
+ self, schema_file, saved_yaml_dict, new_yaml_dict, env_var_changes, dict_key: str, delete
751
+ ):
752
+ element_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
753
+ if element_diff["changed"]:
754
+ for element in element_diff["changed"]:
755
+ delete(schema_file, element)
756
+ self.merge_patch(schema_file, dict_key, element, True)
757
+ if element_diff["deleted"]:
758
+ for element in element_diff["deleted"]:
759
+ delete(schema_file, element)
760
+ if element_diff["added"]:
761
+ for element in element_diff["added"]:
762
+ self.merge_patch(schema_file, dict_key, element, True)
763
+ # Handle schema file updates due to env_var changes
764
+ if dict_key in env_var_changes and dict_key in new_yaml_dict:
765
+ for name in env_var_changes[dict_key]:
766
+ if name in element_diff["changed_or_deleted_names"]:
767
+ continue
768
+ elem = self.get_schema_element(new_yaml_dict[dict_key], name)
769
+ if elem:
770
+ delete(schema_file, elem)
771
+ self.merge_patch(schema_file, dict_key, elem, True)
772
+
773
+ # Take a "section" of the schema file yaml dictionary from saved and new schema files
774
+ # and determine which parts have changed
775
+ def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict):
776
+ if key in saved_yaml_dict or key in new_yaml_dict:
777
+ saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else []
778
+ new_elements = new_yaml_dict[key] if key in new_yaml_dict else []
779
+ else:
780
+ return {"deleted": [], "added": [], "changed": []}
781
+ # for each set of keys, need to create a dictionary of names pointing to entry
782
+ saved_elements_by_name = {}
783
+ new_elements_by_name = {}
784
+ # sources have two part names?
785
+ for element in saved_elements:
786
+ saved_elements_by_name[element["name"]] = element
787
+ for element in new_elements:
788
+ new_elements_by_name[element["name"]] = element
789
+
790
+ # now determine which elements, by name, are added, deleted or changed
791
+ saved_element_names = set(saved_elements_by_name.keys())
792
+ new_element_names = set(new_elements_by_name.keys())
793
+ deleted = saved_element_names.difference(new_element_names)
794
+ added = new_element_names.difference(saved_element_names)
795
+ common = saved_element_names.intersection(new_element_names)
796
+ changed = []
797
+ for element_name in common:
798
+ if saved_elements_by_name[element_name] != new_elements_by_name[element_name]:
799
+ changed.append(element_name)
800
+
801
+ # make lists of yaml elements to return as diffs
802
+ deleted_elements = [saved_elements_by_name[name].copy() for name in deleted]
803
+ added_elements = [new_elements_by_name[name].copy() for name in added]
804
+ changed_elements = [new_elements_by_name[name].copy() for name in changed]
805
+
806
+ diff = {
807
+ "deleted": deleted_elements,
808
+ "added": added_elements,
809
+ "changed": changed_elements,
810
+ "changed_or_deleted_names": list(changed) + list(deleted),
811
+ }
812
+ return diff
813
+
814
+ # Merge a patch file into the pp_dict in a schema file. The "new_patch"
815
+ # flag indicates that we're processing a schema file, so if a matching
816
+ # patch has already been scheduled, replace it.
817
+ def merge_patch(self, schema_file, key, patch, new_patch=False):
818
+ if schema_file.pp_dict is None:
819
+ schema_file.pp_dict = {}
820
+ pp_dict = schema_file.pp_dict
821
+ if key not in pp_dict:
822
+ pp_dict[key] = [patch]
823
+ else:
824
+ # check that this patch hasn't already been saved
825
+ found_elem = None
826
+ for elem in pp_dict[key]:
827
+ if elem["name"] == patch["name"]:
828
+ found_elem = elem
829
+ if not found_elem:
830
+ pp_dict[key].append(patch)
831
+ elif found_elem and new_patch:
832
+ # remove patch and replace with new one
833
+ pp_dict[key].remove(found_elem)
834
+ pp_dict[key].append(patch)
835
+
836
+ schema_file.delete_from_env_vars(key, patch["name"])
837
+ schema_file.delete_from_unrendered_configs(key, patch["name"])
838
+ self.add_to_pp_files(schema_file)
839
+
840
+ # For model, seed, snapshot, analysis schema dictionary keys,
841
+ # delete the patches and tests from the patch
842
+ def delete_schema_mssa_links(self, schema_file, dict_key, elem) -> None:
843
+ # find elem node unique_id in node_patches
844
+ prefix = key_to_prefix[dict_key]
845
+ elem_unique_ids = []
846
+ for unique_id in schema_file.node_patches:
847
+ if not unique_id.startswith(prefix):
848
+ continue
849
+ parts = unique_id.split(".")
850
+ elem_name = parts[2]
851
+ if elem_name == elem["name"]:
852
+ elem_unique_ids.append(unique_id)
853
+ self._delete_schema_mssa_links(schema_file, dict_key, elem, elem_unique_ids)
854
+
855
+ def versioned_model_delete_schema_mssa_links(self, schema_file, dict_key, elem) -> None:
856
+ elem_unique_ids = []
857
+ # We need to look up possible existing models that this new or modified patch applies to
858
+ unique_id = f"model.{schema_file.project_name}.{elem['name']}"
859
+ if unique_id in self.saved_manifest.nodes:
860
+ elem_unique_ids.append(unique_id)
861
+ if not elem_unique_ids:
862
+ return
863
+ self._delete_schema_mssa_links(schema_file, dict_key, elem, elem_unique_ids)
864
+
865
+ def _delete_schema_mssa_links(self, schema_file, dict_key, elem, elem_unique_ids):
866
+ # remove elem node and remove unique_id from node_patches
867
+ for elem_unique_id in elem_unique_ids:
868
+ # might have been already removed
869
+ # For all-yaml snapshots, we don't do this, since the node
870
+ # should have already been removed.
871
+ if (
872
+ elem_unique_id in self.saved_manifest.nodes
873
+ or elem_unique_id in self.saved_manifest.disabled
874
+ ):
875
+ nodes: List[Union[ModelNode, SeedNode, SnapshotNode, AnalysisNode]] = []
876
+ if elem_unique_id in self.saved_manifest.nodes:
877
+ nodes = [self.saved_manifest.nodes.pop(elem_unique_id)] # type: ignore[list-item]
878
+ else:
879
+ # The value of disabled items is a list of nodes
880
+ nodes = self.saved_manifest.disabled.pop(elem_unique_id) # type: ignore[assignment]
881
+ # need to add the node source_file to pp_files
882
+ for node in nodes:
883
+ file_id = node.file_id
884
+ # need to copy new file to saved files in order to get content
885
+ if file_id in self.new_files:
886
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
887
+ if self.saved_files[file_id]:
888
+ source_file = self.saved_files[file_id]
889
+ self.add_to_pp_files(source_file)
890
+ # if the node's group has changed - need to reparse all referencing nodes to ensure valid ref access
891
+ if node.group != elem.get("group"):
892
+ self.schedule_referencing_nodes_for_parsing(node.unique_id)
893
+ # If the latest version has changed, a version has been removed, or a version has been added,
894
+ # we need to reparse referencing nodes.
895
+ if node.is_versioned or elem.get("versions"):
896
+ self.schedule_referencing_nodes_for_parsing(node.unique_id)
897
+ # remove from patches
898
+ # For versioned models, the schedule_referencing_nodes_for_parsing call above
899
+ # could have caused a recursive visit to this file.
900
+ if elem_unique_id in schema_file.node_patches:
901
+ schema_file.node_patches.remove(elem_unique_id)
902
+
903
+ # for models, seeds, snapshots (not analyses)
904
+ if dict_key in ["models", "seeds", "snapshots"]:
905
+ # find related tests and remove them
906
+ self.remove_tests(schema_file, dict_key, elem["name"])
907
+
908
+ def remove_tests(self, schema_file, dict_key, name):
909
+ tests = schema_file.get_tests(dict_key, name)
910
+ for test_unique_id in tests:
911
+ if test_unique_id in self.saved_manifest.nodes:
912
+ self.saved_manifest.nodes.pop(test_unique_id)
913
+ schema_file.remove_tests(dict_key, name)
914
+ # We also need to remove tests in other schema files that
915
+ # reference this node.
916
+ unique_id = f"{key_to_prefix[dict_key]}.{schema_file.project_name}.{name}"
917
+ if unique_id in self.saved_manifest.child_map:
918
+ for child_id in self.saved_manifest.child_map[unique_id]:
919
+ if child_id.startswith("test") and child_id in self.saved_manifest.nodes:
920
+ child_test = self.saved_manifest.nodes[child_id]
921
+ if child_test.attached_node:
922
+ if child_test.attached_node in self.saved_manifest.nodes:
923
+ attached_node = self.saved_manifest.nodes[child_test.attached_node]
924
+ self.update_in_saved(attached_node.file_id)
925
+
926
+ def delete_yaml_snapshot(self, schema_file, snapshot_dict):
927
+ snapshot_name = snapshot_dict["name"]
928
+ snapshots = schema_file.snapshots.copy()
929
+ for unique_id in snapshots:
930
+ if unique_id in self.saved_manifest.nodes:
931
+ snapshot = self.saved_manifest.nodes[unique_id]
932
+ if snapshot.name == snapshot_name:
933
+ self.saved_manifest.nodes.pop(unique_id)
934
+ schema_file.snapshots.remove(unique_id)
935
+ elif unique_id in self.saved_manifest.disabled:
936
+ self.delete_disabled(unique_id, schema_file.file_id)
937
+ schema_file.snapshots.remove(unique_id)
938
+
939
+ def delete_schema_source(self, schema_file, source_dict):
940
+ # both patches, tests, and source nodes
941
+ source_name = source_dict["name"]
942
+ # There may be multiple sources for each source dict, since
943
+ # there will be a separate source node for each table.
944
+ # SourceDefinition name = table name, dict name is source_name
945
+ sources = schema_file.sources.copy()
946
+ for unique_id in sources:
947
+ if unique_id in self.saved_manifest.sources:
948
+ source = self.saved_manifest.sources[unique_id]
949
+ if source.source_name == source_name:
950
+ source = self.saved_manifest.sources.pop(unique_id)
951
+ schema_file.sources.remove(unique_id)
952
+ self.schedule_referencing_nodes_for_parsing(unique_id)
953
+
954
+ self.remove_tests(schema_file, "sources", source_name)
955
+
956
+ def delete_schema_macro_patch(self, schema_file, macro):
957
+ # This is just macro patches that need to be reapplied
958
+ macro_unique_id = None
959
+ if macro["name"] in schema_file.macro_patches:
960
+ macro_unique_id = schema_file.macro_patches[macro["name"]]
961
+ del schema_file.macro_patches[macro["name"]]
962
+ # Need to delete all macros in the same file
963
+ # and then reapply all schema file updates for those macros
964
+ if macro_unique_id and macro_unique_id in self.saved_manifest.macros:
965
+ macro = self.saved_manifest.macros.pop(macro_unique_id)
966
+ macro_file_id = macro.file_id
967
+ if macro_file_id in self.new_files:
968
+ source_file = self.saved_files[macro_file_id]
969
+ self.delete_macro_file(source_file)
970
+ self.saved_files[macro_file_id] = deepcopy(self.new_files[macro_file_id])
971
+ self.add_to_pp_files(self.saved_files[macro_file_id])
972
+
973
+ def delete_schema_data_test_patch(self, schema_file, data_test):
974
+ data_test_unique_id = None
975
+ for unique_id in schema_file.node_patches:
976
+ if not unique_id.startswith("test"):
977
+ continue
978
+ parts = unique_id.split(".")
979
+ elem_name = parts[2]
980
+ if elem_name == data_test["name"]:
981
+ data_test_unique_id = unique_id
982
+ break
983
+ if data_test_unique_id and data_test_unique_id in self.saved_manifest.nodes:
984
+ singular_data_test = self.saved_manifest.nodes.pop(data_test_unique_id)
985
+ file_id = singular_data_test.file_id
986
+ if file_id in self.new_files:
987
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
988
+ self.add_to_pp_files(self.saved_files[file_id])
989
+
990
+ # exposures are created only from schema files, so just delete
991
+ # the exposure or the disabled exposure.
992
+ def delete_schema_exposure(self, schema_file, exposure_dict):
993
+ exposure_name = exposure_dict["name"]
994
+ exposures = schema_file.exposures.copy()
995
+ for unique_id in exposures:
996
+ if unique_id in self.saved_manifest.exposures:
997
+ exposure = self.saved_manifest.exposures[unique_id]
998
+ if exposure.name == exposure_name:
999
+ self.saved_manifest.exposures.pop(unique_id)
1000
+ schema_file.exposures.remove(unique_id)
1001
+ elif unique_id in self.saved_manifest.disabled:
1002
+ self.delete_disabled(unique_id, schema_file.file_id)
1003
+
1004
+ # groups are created only from schema files, so just delete the group
1005
+ def delete_schema_group(self, schema_file, group_dict):
1006
+ group_name = group_dict["name"]
1007
+ groups = schema_file.groups.copy()
1008
+ for unique_id in groups:
1009
+ if unique_id in self.saved_manifest.groups:
1010
+ group = self.saved_manifest.groups[unique_id]
1011
+ if group.name == group_name:
1012
+ self.schedule_nodes_for_parsing(self.saved_manifest.group_map[group.name])
1013
+ self.saved_manifest.groups.pop(unique_id)
1014
+ schema_file.groups.remove(unique_id)
1015
+
1016
+ # metrics are created only from schema files, but also can be referred to by other nodes
1017
+ def delete_schema_metric(self, schema_file, metric_dict):
1018
+ metric_name = metric_dict["name"]
1019
+ metrics = schema_file.metrics.copy()
1020
+ for unique_id in metrics:
1021
+ if unique_id in self.saved_manifest.metrics:
1022
+ metric = self.saved_manifest.metrics[unique_id]
1023
+ if metric.name == metric_name:
1024
+ # Need to find everything that referenced this metric and schedule for parsing
1025
+ if unique_id in self.saved_manifest.child_map:
1026
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
1027
+ self.saved_manifest.metrics.pop(unique_id)
1028
+ schema_file.metrics.remove(unique_id)
1029
+ elif unique_id in self.saved_manifest.disabled:
1030
+ self.delete_disabled(unique_id, schema_file.file_id)
1031
+
1032
+ def delete_schema_saved_query(self, schema_file, saved_query_dict):
1033
+ saved_query_name = saved_query_dict["name"]
1034
+ saved_queries = schema_file.saved_queries.copy()
1035
+ for unique_id in saved_queries:
1036
+ if unique_id in self.saved_manifest.saved_queries:
1037
+ saved_query = self.saved_manifest.saved_queries[unique_id]
1038
+ if saved_query.name == saved_query_name:
1039
+ # Need to find everything that referenced this saved_query and schedule for parsing
1040
+ if unique_id in self.saved_manifest.child_map:
1041
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
1042
+ self.saved_manifest.saved_queries.pop(unique_id)
1043
+ elif unique_id in self.saved_manifest.disabled:
1044
+ self.delete_disabled(unique_id, schema_file.file_id)
1045
+
1046
+ def delete_schema_semantic_model(self, schema_file, semantic_model_dict):
1047
+ semantic_model_name = semantic_model_dict["name"]
1048
+ semantic_models = schema_file.semantic_models.copy()
1049
+ for unique_id in semantic_models:
1050
+ if unique_id in self.saved_manifest.semantic_models:
1051
+ semantic_model = self.saved_manifest.semantic_models[unique_id]
1052
+ if semantic_model.name == semantic_model_name:
1053
+ # Need to find everything that referenced this semantic model and schedule for parsing
1054
+ if unique_id in self.saved_manifest.child_map:
1055
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
1056
+ self.saved_manifest.semantic_models.pop(unique_id)
1057
+ schema_file.semantic_models.remove(unique_id)
1058
+ elif unique_id in self.saved_manifest.disabled:
1059
+ self.delete_disabled(unique_id, schema_file.file_id)
1060
+
1061
+ if schema_file.generated_metrics:
1062
+ # If this partial parse file has an old "generated_metrics" list,
1063
+ # call code to fix it up before processing.
1064
+ schema_file.fix_metrics_from_measures()
1065
+ if semantic_model_name in schema_file.metrics_from_measures:
1066
+ for unique_id in schema_file.metrics_from_measures[semantic_model_name]:
1067
+ if unique_id in self.saved_manifest.metrics:
1068
+ self.saved_manifest.metrics.pop(unique_id)
1069
+ elif unique_id in self.saved_manifest.disabled:
1070
+ self.delete_disabled(unique_id, schema_file.file_id)
1071
+ del schema_file.metrics_from_measures[semantic_model_name]
1072
+
1073
+ def delete_schema_unit_test(self, schema_file, unit_test_dict):
1074
+ unit_test_name = unit_test_dict["name"]
1075
+ unit_tests = schema_file.unit_tests.copy()
1076
+ for unique_id in unit_tests:
1077
+ if unique_id in self.saved_manifest.unit_tests:
1078
+ unit_test = self.saved_manifest.unit_tests[unique_id]
1079
+ if unit_test.name == unit_test_name:
1080
+ self.saved_manifest.unit_tests.pop(unique_id)
1081
+ schema_file.unit_tests.remove(unique_id)
1082
+ # No disabled unit tests yet
1083
+
1084
+ def get_schema_element(self, elem_list, elem_name):
1085
+ for element in elem_list:
1086
+ if "name" in element and element["name"] == elem_name:
1087
+ return element
1088
+ return None
1089
+
1090
+ def get_schema_file_for_source(self, package_name, source_name):
1091
+ schema_file = None
1092
+ for source in self.saved_manifest.sources.values():
1093
+ if source.package_name == package_name and source.source_name == source_name:
1094
+ file_id = source.file_id
1095
+ if file_id in self.saved_files:
1096
+ schema_file = self.saved_files[file_id]
1097
+ break
1098
+ return schema_file
1099
+
1100
+ def get_source_override_file_and_dict(self, source):
1101
+ package = source["overrides"]
1102
+ source_name = source["name"]
1103
+ orig_source_schema_file = self.get_schema_file_for_source(package, source_name)
1104
+ orig_sources = orig_source_schema_file.dict_from_yaml["sources"]
1105
+ orig_source = self.get_schema_element(orig_sources, source_name)
1106
+ return (orig_source_schema_file, orig_source)
1107
+
1108
+ def remove_source_override_target(self, source_dict):
1109
+ (orig_file, orig_source) = self.get_source_override_file_and_dict(source_dict)
1110
+ if orig_source:
1111
+ self.delete_schema_source(orig_file, orig_source)
1112
+ self.merge_patch(orig_file, "sources", orig_source)
1113
+ self.add_to_pp_files(orig_file)
1114
+
1115
+ # This builds a dictionary of files that need to be scheduled for parsing
1116
+ # because the env var has changed.
1117
+ # source_files
1118
+ # env_vars_changed_source_files: [file_id, file_id...]
1119
+ # schema_files
1120
+ # env_vars_changed_schema_files: {file_id: {"yaml_key": [name, ..]}}
1121
+ def build_env_vars_to_files(self):
1122
+ unchanged_vars = []
1123
+ changed_vars = []
1124
+ delete_vars = []
1125
+ # Check whether the env_var has changed and add it to
1126
+ # an unchanged or changed list
1127
+ for env_var in self.saved_manifest.env_vars:
1128
+ prev_value = self.saved_manifest.env_vars[env_var]
1129
+ current_value = os.getenv(env_var)
1130
+ if current_value is None:
1131
+ # This will be true when depending on the default value.
1132
+ # We store env vars set by defaults as a static string so we can recognize they have
1133
+ # defaults. We depend on default changes triggering reparsing by file change. If
1134
+ # the file has not changed we can assume the default has not changed.
1135
+ if prev_value == DEFAULT_ENV_PLACEHOLDER:
1136
+ unchanged_vars.append(env_var)
1137
+ continue
1138
+ # env_var no longer set, remove from manifest
1139
+ delete_vars.append(env_var)
1140
+ if prev_value == current_value:
1141
+ unchanged_vars.append(env_var)
1142
+ else: # prev_value != current_value
1143
+ changed_vars.append(env_var)
1144
+ for env_var in delete_vars:
1145
+ del self.saved_manifest.env_vars[env_var]
1146
+
1147
+ env_vars_changed_source_files = []
1148
+ env_vars_changed_schema_files = {}
1149
+ # The SourceFiles contain a list of env_vars that were used in the file.
1150
+ # The SchemaSourceFiles contain a dictionary of yaml_key to schema entry names to
1151
+ # a list of vars.
1152
+ # Create a list of file_ids for source_files that need to be reparsed, and
1153
+ # a dictionary of file_ids to yaml_keys to names.
1154
+ for source_file in self.saved_files.values():
1155
+ if source_file.parse_file_type == ParseFileType.Fixture:
1156
+ continue
1157
+ file_id = source_file.file_id
1158
+ if not source_file.env_vars:
1159
+ continue
1160
+ if source_file.parse_file_type == ParseFileType.Schema:
1161
+ for yaml_key in source_file.env_vars.keys():
1162
+ for name in source_file.env_vars[yaml_key].keys():
1163
+ for env_var in source_file.env_vars[yaml_key][name]:
1164
+ if env_var in changed_vars:
1165
+ if file_id not in env_vars_changed_schema_files:
1166
+ env_vars_changed_schema_files[file_id] = {}
1167
+ if yaml_key not in env_vars_changed_schema_files[file_id]:
1168
+ env_vars_changed_schema_files[file_id][yaml_key] = []
1169
+ if name not in env_vars_changed_schema_files[file_id][yaml_key]:
1170
+ env_vars_changed_schema_files[file_id][yaml_key].append(name)
1171
+ break # if one env_var is changed we can stop
1172
+
1173
+ else:
1174
+ for env_var in source_file.env_vars:
1175
+ if env_var in changed_vars:
1176
+ env_vars_changed_source_files.append(file_id)
1177
+ break # if one env_var is changed we can stop
1178
+
1179
+ return (env_vars_changed_source_files, env_vars_changed_schema_files)