dvt-core 0.52.2__cp310-cp310-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (275) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2039 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +804 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +624 -0
  74. dbt/compute/federated_executor.py +837 -0
  75. dbt/compute/filter_pushdown.cpython-310-darwin.so +0 -0
  76. dbt/compute/filter_pushdown.py +273 -0
  77. dbt/compute/jar_provisioning.cpython-310-darwin.so +0 -0
  78. dbt/compute/jar_provisioning.py +255 -0
  79. dbt/compute/java_compat.cpython-310-darwin.so +0 -0
  80. dbt/compute/java_compat.py +689 -0
  81. dbt/compute/jdbc_utils.cpython-310-darwin.so +0 -0
  82. dbt/compute/jdbc_utils.py +678 -0
  83. dbt/compute/smart_selector.cpython-310-darwin.so +0 -0
  84. dbt/compute/smart_selector.py +311 -0
  85. dbt/compute/strategies/__init__.py +54 -0
  86. dbt/compute/strategies/base.py +165 -0
  87. dbt/compute/strategies/dataproc.py +207 -0
  88. dbt/compute/strategies/emr.py +203 -0
  89. dbt/compute/strategies/local.py +364 -0
  90. dbt/compute/strategies/standalone.py +262 -0
  91. dbt/config/__init__.py +4 -0
  92. dbt/config/catalogs.py +94 -0
  93. dbt/config/compute.cpython-310-darwin.so +0 -0
  94. dbt/config/compute.py +547 -0
  95. dbt/config/dvt_profile.cpython-310-darwin.so +0 -0
  96. dbt/config/dvt_profile.py +342 -0
  97. dbt/config/profile.py +422 -0
  98. dbt/config/project.py +873 -0
  99. dbt/config/project_utils.py +28 -0
  100. dbt/config/renderer.py +231 -0
  101. dbt/config/runtime.py +553 -0
  102. dbt/config/selectors.py +208 -0
  103. dbt/config/utils.py +77 -0
  104. dbt/constants.py +28 -0
  105. dbt/context/__init__.py +0 -0
  106. dbt/context/base.py +745 -0
  107. dbt/context/configured.py +135 -0
  108. dbt/context/context_config.py +382 -0
  109. dbt/context/docs.py +82 -0
  110. dbt/context/exceptions_jinja.py +178 -0
  111. dbt/context/macro_resolver.py +195 -0
  112. dbt/context/macros.py +171 -0
  113. dbt/context/manifest.py +72 -0
  114. dbt/context/providers.py +2249 -0
  115. dbt/context/query_header.py +13 -0
  116. dbt/context/secret.py +58 -0
  117. dbt/context/target.py +74 -0
  118. dbt/contracts/__init__.py +0 -0
  119. dbt/contracts/files.py +413 -0
  120. dbt/contracts/graph/__init__.py +0 -0
  121. dbt/contracts/graph/manifest.py +1904 -0
  122. dbt/contracts/graph/metrics.py +97 -0
  123. dbt/contracts/graph/model_config.py +70 -0
  124. dbt/contracts/graph/node_args.py +42 -0
  125. dbt/contracts/graph/nodes.py +1806 -0
  126. dbt/contracts/graph/semantic_manifest.py +232 -0
  127. dbt/contracts/graph/unparsed.py +811 -0
  128. dbt/contracts/project.py +417 -0
  129. dbt/contracts/results.py +53 -0
  130. dbt/contracts/selection.py +23 -0
  131. dbt/contracts/sql.py +85 -0
  132. dbt/contracts/state.py +68 -0
  133. dbt/contracts/util.py +46 -0
  134. dbt/deprecations.py +346 -0
  135. dbt/deps/__init__.py +0 -0
  136. dbt/deps/base.py +152 -0
  137. dbt/deps/git.py +195 -0
  138. dbt/deps/local.py +79 -0
  139. dbt/deps/registry.py +130 -0
  140. dbt/deps/resolver.py +149 -0
  141. dbt/deps/tarball.py +120 -0
  142. dbt/docs/source/_ext/dbt_click.py +119 -0
  143. dbt/docs/source/conf.py +32 -0
  144. dbt/env_vars.py +64 -0
  145. dbt/event_time/event_time.py +40 -0
  146. dbt/event_time/sample_window.py +60 -0
  147. dbt/events/__init__.py +15 -0
  148. dbt/events/base_types.py +36 -0
  149. dbt/events/core_types_pb2.py +2 -0
  150. dbt/events/logging.py +108 -0
  151. dbt/events/types.py +2516 -0
  152. dbt/exceptions.py +1486 -0
  153. dbt/flags.py +89 -0
  154. dbt/graph/__init__.py +11 -0
  155. dbt/graph/cli.py +247 -0
  156. dbt/graph/graph.py +172 -0
  157. dbt/graph/queue.py +214 -0
  158. dbt/graph/selector.py +374 -0
  159. dbt/graph/selector_methods.py +975 -0
  160. dbt/graph/selector_spec.py +222 -0
  161. dbt/graph/thread_pool.py +18 -0
  162. dbt/hooks.py +21 -0
  163. dbt/include/README.md +49 -0
  164. dbt/include/__init__.py +3 -0
  165. dbt/include/starter_project/.gitignore +4 -0
  166. dbt/include/starter_project/README.md +15 -0
  167. dbt/include/starter_project/__init__.py +3 -0
  168. dbt/include/starter_project/analyses/.gitkeep +0 -0
  169. dbt/include/starter_project/dbt_project.yml +36 -0
  170. dbt/include/starter_project/macros/.gitkeep +0 -0
  171. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  172. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  173. dbt/include/starter_project/models/example/schema.yml +21 -0
  174. dbt/include/starter_project/seeds/.gitkeep +0 -0
  175. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  176. dbt/include/starter_project/tests/.gitkeep +0 -0
  177. dbt/internal_deprecations.py +26 -0
  178. dbt/jsonschemas/__init__.py +3 -0
  179. dbt/jsonschemas/jsonschemas.py +309 -0
  180. dbt/jsonschemas/project/0.0.110.json +4717 -0
  181. dbt/jsonschemas/project/0.0.85.json +2015 -0
  182. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  183. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  184. dbt/jsonschemas/resources/latest.json +6773 -0
  185. dbt/links.py +4 -0
  186. dbt/materializations/__init__.py +0 -0
  187. dbt/materializations/incremental/__init__.py +0 -0
  188. dbt/materializations/incremental/microbatch.py +236 -0
  189. dbt/mp_context.py +8 -0
  190. dbt/node_types.py +37 -0
  191. dbt/parser/__init__.py +23 -0
  192. dbt/parser/analysis.py +21 -0
  193. dbt/parser/base.py +548 -0
  194. dbt/parser/common.py +266 -0
  195. dbt/parser/docs.py +52 -0
  196. dbt/parser/fixtures.py +51 -0
  197. dbt/parser/functions.py +30 -0
  198. dbt/parser/generic_test.py +100 -0
  199. dbt/parser/generic_test_builders.py +333 -0
  200. dbt/parser/hooks.py +118 -0
  201. dbt/parser/macros.py +137 -0
  202. dbt/parser/manifest.py +2204 -0
  203. dbt/parser/models.py +573 -0
  204. dbt/parser/partial.py +1178 -0
  205. dbt/parser/read_files.py +445 -0
  206. dbt/parser/schema_generic_tests.py +422 -0
  207. dbt/parser/schema_renderer.py +111 -0
  208. dbt/parser/schema_yaml_readers.py +935 -0
  209. dbt/parser/schemas.py +1466 -0
  210. dbt/parser/search.py +149 -0
  211. dbt/parser/seeds.py +28 -0
  212. dbt/parser/singular_test.py +20 -0
  213. dbt/parser/snapshots.py +44 -0
  214. dbt/parser/sources.py +558 -0
  215. dbt/parser/sql.py +62 -0
  216. dbt/parser/unit_tests.py +621 -0
  217. dbt/plugins/__init__.py +20 -0
  218. dbt/plugins/contracts.py +9 -0
  219. dbt/plugins/exceptions.py +2 -0
  220. dbt/plugins/manager.py +163 -0
  221. dbt/plugins/manifest.py +21 -0
  222. dbt/profiler.py +20 -0
  223. dbt/py.typed +1 -0
  224. dbt/query_analyzer.cpython-310-darwin.so +0 -0
  225. dbt/query_analyzer.py +410 -0
  226. dbt/runners/__init__.py +2 -0
  227. dbt/runners/exposure_runner.py +7 -0
  228. dbt/runners/no_op_runner.py +45 -0
  229. dbt/runners/saved_query_runner.py +7 -0
  230. dbt/selected_resources.py +8 -0
  231. dbt/task/__init__.py +0 -0
  232. dbt/task/base.py +503 -0
  233. dbt/task/build.py +197 -0
  234. dbt/task/clean.py +56 -0
  235. dbt/task/clone.py +161 -0
  236. dbt/task/compile.py +150 -0
  237. dbt/task/compute.py +454 -0
  238. dbt/task/debug.py +505 -0
  239. dbt/task/deps.py +280 -0
  240. dbt/task/docs/__init__.py +3 -0
  241. dbt/task/docs/generate.py +660 -0
  242. dbt/task/docs/index.html +250 -0
  243. dbt/task/docs/serve.py +29 -0
  244. dbt/task/freshness.py +322 -0
  245. dbt/task/function.py +121 -0
  246. dbt/task/group_lookup.py +46 -0
  247. dbt/task/init.py +553 -0
  248. dbt/task/java.py +316 -0
  249. dbt/task/list.py +236 -0
  250. dbt/task/printer.py +175 -0
  251. dbt/task/retry.py +175 -0
  252. dbt/task/run.py +1306 -0
  253. dbt/task/run_operation.py +141 -0
  254. dbt/task/runnable.py +758 -0
  255. dbt/task/seed.py +103 -0
  256. dbt/task/show.py +149 -0
  257. dbt/task/snapshot.py +56 -0
  258. dbt/task/spark.py +414 -0
  259. dbt/task/sql.py +110 -0
  260. dbt/task/target_sync.py +759 -0
  261. dbt/task/test.py +464 -0
  262. dbt/tests/fixtures/__init__.py +1 -0
  263. dbt/tests/fixtures/project.py +620 -0
  264. dbt/tests/util.py +651 -0
  265. dbt/tracking.py +529 -0
  266. dbt/utils/__init__.py +3 -0
  267. dbt/utils/artifact_upload.py +151 -0
  268. dbt/utils/utils.py +408 -0
  269. dbt/version.py +268 -0
  270. dvt_cli/__init__.py +72 -0
  271. dvt_core-0.52.2.dist-info/METADATA +286 -0
  272. dvt_core-0.52.2.dist-info/RECORD +275 -0
  273. dvt_core-0.52.2.dist-info/WHEEL +5 -0
  274. dvt_core-0.52.2.dist-info/entry_points.txt +2 -0
  275. dvt_core-0.52.2.dist-info/top_level.txt +2 -0
dbt/parser/partial.py ADDED
@@ -0,0 +1,1178 @@
1
+ import os
2
+ from copy import deepcopy
3
+ from typing import Callable, Dict, List, MutableMapping, Union
4
+
5
+ from dbt.constants import DEFAULT_ENV_PLACEHOLDER
6
+ from dbt.contracts.files import (
7
+ AnySourceFile,
8
+ ParseFileType,
9
+ SchemaSourceFile,
10
+ SourceFile,
11
+ parse_file_type_to_parser,
12
+ )
13
+ from dbt.contracts.graph.manifest import Manifest
14
+ from dbt.contracts.graph.nodes import AnalysisNode, ModelNode, SeedNode, SnapshotNode
15
+ from dbt.events.types import PartialParsingEnabled, PartialParsingFile
16
+ from dbt.node_types import NodeType
17
+ from dbt_common.context import get_invocation_context
18
+ from dbt_common.events.base_types import EventLevel
19
+ from dbt_common.events.functions import fire_event
20
+
21
+ mssat_files = (
22
+ ParseFileType.Model,
23
+ ParseFileType.Seed,
24
+ ParseFileType.Snapshot,
25
+ ParseFileType.Analysis,
26
+ ParseFileType.SingularTest,
27
+ )
28
+
29
+ mg_files = (
30
+ ParseFileType.Macro,
31
+ ParseFileType.GenericTest,
32
+ )
33
+
34
+
35
+ key_to_prefix = {
36
+ "models": "model",
37
+ "seeds": "seed",
38
+ "snapshots": "snapshot",
39
+ "analyses": "analysis",
40
+ "sources": "source",
41
+ }
42
+
43
+
44
+ parse_file_type_to_key = {
45
+ ParseFileType.Model: "models",
46
+ ParseFileType.Seed: "seeds",
47
+ ParseFileType.Snapshot: "snapshots",
48
+ ParseFileType.Analysis: "analyses",
49
+ }
50
+
51
+
52
+ # These macro names have special treatment in the ManifestLoader and
53
+ # partial parsing. If they have changed we will skip partial parsing
54
+ special_override_macros = [
55
+ "ref",
56
+ "source",
57
+ "config",
58
+ "generate_schema_name",
59
+ "generate_database_name",
60
+ "generate_alias_name",
61
+ ]
62
+
63
+
64
+ # Partial parsing. Create a diff of files from saved manifest and current
65
+ # files and produce a project_parser_file dictionary to drive parsing of
66
+ # only the necessary changes.
67
+ # Will produce a 'skip_parsing' method, and a project_parser_file dictionary
68
+ # All file objects from the new manifest are deepcopied, because we need
69
+ # to preserve an unchanged file object in case we need to drop back to a
70
+ # a full parse (such as for certain macro changes)
71
+ class PartialParsing:
72
+ def __init__(
73
+ self, saved_manifest: Manifest, new_files: MutableMapping[str, AnySourceFile]
74
+ ) -> None:
75
+ self.saved_manifest = saved_manifest
76
+ self.new_files = new_files
77
+ self.project_parser_files: Dict = {}
78
+ self.saved_files = self.saved_manifest.files
79
+ self.project_parser_files = {}
80
+ self.macro_child_map: Dict[str, List[str]] = {}
81
+ (
82
+ self.env_vars_changed_source_files,
83
+ self.env_vars_changed_schema_files,
84
+ ) = self.build_env_vars_to_files()
85
+ self.build_file_diff()
86
+ self.processing_file = None
87
+ self.deleted_special_override_macro = False
88
+ self.disabled_by_file_id = self.saved_manifest.build_disabled_by_file_id()
89
+
90
+ def skip_parsing(self):
91
+ return (
92
+ not self.file_diff["deleted"]
93
+ and not self.file_diff["added"]
94
+ and not self.file_diff["changed"]
95
+ and not self.file_diff["changed_schema_files"]
96
+ and not self.file_diff["deleted_schema_files"]
97
+ )
98
+
99
+ # Compare the previously saved manifest files and the just-loaded manifest
100
+ # files to see if anything changed
101
+ def build_file_diff(self):
102
+ saved_file_ids = set(self.saved_files.keys())
103
+ new_file_ids = set(self.new_files.keys())
104
+ deleted_all_files = saved_file_ids.difference(new_file_ids)
105
+ added = new_file_ids.difference(saved_file_ids)
106
+ common = saved_file_ids.intersection(new_file_ids)
107
+ changed_or_deleted_macro_file = False
108
+
109
+ # separate out deleted schema files
110
+ deleted_schema_files = []
111
+ deleted = []
112
+ for file_id in deleted_all_files:
113
+ if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
114
+ deleted_schema_files.append(file_id)
115
+ else:
116
+ if self.saved_files[file_id].parse_file_type in mg_files:
117
+ changed_or_deleted_macro_file = True
118
+ deleted.append(file_id)
119
+
120
+ changed = []
121
+ changed_schema_files = []
122
+ unchanged = []
123
+ for file_id in common:
124
+ if self.saved_files[file_id].checksum == self.new_files[file_id].checksum:
125
+ unchanged.append(file_id)
126
+ else:
127
+ # separate out changed schema files
128
+ if self.saved_files[file_id].parse_file_type == ParseFileType.Schema:
129
+ sf = self.saved_files[file_id]
130
+ if type(sf).__name__ != "SchemaSourceFile":
131
+ raise Exception(f"Serialization failure for {file_id}")
132
+ changed_schema_files.append(file_id)
133
+ else:
134
+ if self.saved_files[file_id].parse_file_type in mg_files:
135
+ changed_or_deleted_macro_file = True
136
+ changed.append(file_id)
137
+
138
+ # handle changed env_vars for non-schema-files
139
+ for file_id in self.env_vars_changed_source_files:
140
+ if file_id in deleted or file_id in changed:
141
+ continue
142
+ changed.append(file_id)
143
+
144
+ # handle changed env_vars for schema files
145
+ for file_id in self.env_vars_changed_schema_files.keys():
146
+ if file_id in deleted_schema_files or file_id in changed_schema_files:
147
+ continue
148
+ changed_schema_files.append(file_id)
149
+
150
+ file_diff = {
151
+ "deleted": deleted,
152
+ "deleted_schema_files": deleted_schema_files,
153
+ "added": added,
154
+ "changed": changed,
155
+ "changed_schema_files": changed_schema_files,
156
+ "unchanged": unchanged,
157
+ }
158
+ if changed_or_deleted_macro_file:
159
+ self.macro_child_map = self.saved_manifest.build_macro_child_map()
160
+ deleted = len(deleted) + len(deleted_schema_files)
161
+ changed = len(changed) + len(changed_schema_files)
162
+ event = PartialParsingEnabled(deleted=deleted, added=len(added), changed=changed)
163
+
164
+ if get_invocation_context().env.get("DBT_PP_TEST"):
165
+ fire_event(event, level=EventLevel.INFO)
166
+ else:
167
+ fire_event(event)
168
+ self.file_diff = file_diff
169
+
170
+ # generate the list of files that need parsing
171
+ # uses self.manifest.files generated by 'read_files'
172
+ def get_parsing_files(self):
173
+ if self.skip_parsing():
174
+ return {}
175
+ # Need to add new files first, because changes in schema files
176
+ # might refer to them
177
+ for file_id in self.file_diff["added"]:
178
+ self.processing_file = file_id
179
+ self.add_to_saved(file_id)
180
+ # Need to process schema files next, because the dictionaries
181
+ # need to be in place for handling SQL file changes
182
+ # The reverse sort here is just to ensure that the schema file
183
+ # processing order test case works, because otherwise the order
184
+ # of processing the schema files is not guaranteed.
185
+ self.file_diff["changed_schema_files"].sort(reverse=True)
186
+ for file_id in self.file_diff["changed_schema_files"]:
187
+ self.processing_file = file_id
188
+ self.change_schema_file(file_id)
189
+ for file_id in self.file_diff["deleted_schema_files"]:
190
+ self.processing_file = file_id
191
+ self.delete_schema_file(file_id)
192
+ for file_id in self.file_diff["deleted"]:
193
+ self.processing_file = file_id
194
+ self.delete_from_saved(file_id)
195
+ for file_id in self.file_diff["changed"]:
196
+ self.processing_file = file_id
197
+ self.update_in_saved(file_id)
198
+ return self.project_parser_files
199
+
200
+ # Add the file to the project parser dictionaries to schedule parsing
201
+ def add_to_pp_files(self, source_file):
202
+ file_id = source_file.file_id
203
+ parser_name = parse_file_type_to_parser[source_file.parse_file_type]
204
+ project_name = source_file.project_name
205
+ if not parser_name or not project_name:
206
+ raise Exception(
207
+ f"Did not find parse_file_type or project_name "
208
+ f"in SourceFile for {source_file.file_id}"
209
+ )
210
+ if project_name not in self.project_parser_files:
211
+ self.project_parser_files[project_name] = {}
212
+ if parser_name not in self.project_parser_files[project_name]:
213
+ self.project_parser_files[project_name][parser_name] = []
214
+ if (
215
+ file_id not in self.project_parser_files[project_name][parser_name]
216
+ and file_id not in self.file_diff["deleted"]
217
+ and file_id not in self.file_diff["deleted_schema_files"]
218
+ ):
219
+ self.project_parser_files[project_name][parser_name].append(file_id)
220
+
221
+ def already_scheduled_for_parsing(self, source_file):
222
+ file_id = source_file.file_id
223
+ project_name = source_file.project_name
224
+ if project_name not in self.project_parser_files:
225
+ return False
226
+ parser_name = parse_file_type_to_parser[source_file.parse_file_type]
227
+ if parser_name not in self.project_parser_files[project_name]:
228
+ return False
229
+ if file_id not in self.project_parser_files[project_name][parser_name]:
230
+ return False
231
+ return True
232
+
233
+ # Add new files, including schema files
234
+ def add_to_saved(self, file_id):
235
+ # add file object to saved manifest.files
236
+ source_file = deepcopy(self.new_files[file_id])
237
+ if source_file.parse_file_type == ParseFileType.Schema:
238
+ self.handle_added_schema_file(source_file)
239
+ self.saved_files[file_id] = source_file
240
+ # update pp_files to parse
241
+ self.add_to_pp_files(source_file)
242
+ fire_event(PartialParsingFile(operation="added", file_id=file_id))
243
+
244
+ def handle_added_schema_file(self, source_file):
245
+ source_file.pp_dict = source_file.dict_from_yaml.copy()
246
+ if "sources" in source_file.pp_dict:
247
+ for source in source_file.pp_dict["sources"]:
248
+ # We need to remove the original source, so it can
249
+ # be properly patched
250
+ if "overrides" in source:
251
+ self.remove_source_override_target(source)
252
+ if "models" in source_file.pp_dict:
253
+ for model in source_file.pp_dict["models"]:
254
+ if "versions" in model:
255
+ self.versioned_model_delete_schema_mssa_links(source_file, "models", model)
256
+
257
+ def delete_disabled(self, unique_id, file_id):
258
+ # This node/metric/exposure is disabled. Find it and remove it from disabled dictionary.
259
+ for dis_index, dis_node in enumerate(self.saved_manifest.disabled[unique_id]):
260
+ if dis_node.file_id == file_id:
261
+ node = dis_node
262
+ index = dis_index
263
+ break
264
+ # Remove node from disabled
265
+ del self.saved_manifest.disabled[unique_id][index]
266
+ # if all nodes were removed for the unique id, delete the unique_id
267
+ # from the disabled dict
268
+ if not self.saved_manifest.disabled[unique_id]:
269
+ self.saved_manifest.disabled.pop(unique_id)
270
+
271
+ return node
272
+
273
+ # Deletes for all non-schema files
274
+ def delete_from_saved(self, file_id):
275
+ # Look at all things touched by file, remove those
276
+ # nodes, and update pp_files to parse unless the
277
+ # file creating those nodes has also been deleted
278
+ saved_source_file = self.saved_files[file_id]
279
+
280
+ # SQL file: models, seeds, snapshots, analyses, tests: SQL files, except
281
+ # macros/tests
282
+ if saved_source_file.parse_file_type in mssat_files:
283
+ self.remove_mssat_file(saved_source_file)
284
+ self.saved_manifest.files.pop(file_id)
285
+
286
+ # macros
287
+ if saved_source_file.parse_file_type in mg_files:
288
+ self.delete_macro_file(saved_source_file, follow_references=True)
289
+
290
+ # docs
291
+ if saved_source_file.parse_file_type == ParseFileType.Documentation:
292
+ self.delete_doc_node(saved_source_file)
293
+
294
+ # fixtures
295
+ if saved_source_file.parse_file_type == ParseFileType.Fixture:
296
+ self.delete_fixture_node(saved_source_file)
297
+
298
+ fire_event(PartialParsingFile(operation="deleted", file_id=file_id))
299
+
300
+ # Updates for non-schema files
301
+ def update_in_saved(self, file_id):
302
+ new_source_file = deepcopy(self.new_files[file_id])
303
+ old_source_file = self.saved_files[file_id]
304
+
305
+ if new_source_file.parse_file_type in mssat_files:
306
+ self.update_mssat_in_saved(new_source_file, old_source_file)
307
+ elif new_source_file.parse_file_type in mg_files:
308
+ self.update_macro_in_saved(new_source_file, old_source_file)
309
+ elif new_source_file.parse_file_type == ParseFileType.Documentation:
310
+ self.update_doc_in_saved(new_source_file, old_source_file)
311
+ elif new_source_file.parse_file_type == ParseFileType.Fixture:
312
+ self.update_fixture_in_saved(new_source_file, old_source_file)
313
+ else:
314
+ raise Exception(f"Invalid parse_file_type in source_file {file_id}")
315
+ fire_event(PartialParsingFile(operation="updated", file_id=file_id))
316
+
317
+ # Models, seeds, snapshots: patches and tests
318
+ # analyses: patches, no tests
319
+ # tests: not touched by schema files (no patches, no tests)
320
+ # Updated schema files should have been processed already.
321
+ def update_mssat_in_saved(self, new_source_file, old_source_file):
322
+
323
+ if self.already_scheduled_for_parsing(old_source_file):
324
+ return
325
+
326
+ # These files only have one node except for snapshots
327
+ unique_ids = []
328
+ if old_source_file.nodes:
329
+ unique_ids = old_source_file.nodes
330
+
331
+ # replace source_file in saved and add to parsing list
332
+ file_id = new_source_file.file_id
333
+ self.saved_files[file_id] = deepcopy(new_source_file)
334
+ self.add_to_pp_files(new_source_file)
335
+ for unique_id in unique_ids:
336
+ self.remove_node_in_saved(new_source_file, unique_id)
337
+
338
+ def remove_node_in_saved(self, source_file, unique_id):
339
+ if unique_id in self.saved_manifest.nodes:
340
+ # delete node in saved
341
+ node = self.saved_manifest.nodes.pop(unique_id)
342
+ elif (
343
+ source_file.file_id in self.disabled_by_file_id
344
+ and unique_id in self.saved_manifest.disabled
345
+ ):
346
+ # This node is disabled. Find the node and remove it from disabled dictionary.
347
+ node = self.delete_disabled(unique_id, source_file.file_id)
348
+ else:
349
+ # Has already been deleted by another action
350
+ return
351
+
352
+ # look at patch_path in model node to see if we need
353
+ # to reapply a patch from a schema_file.
354
+ if node.patch_path:
355
+ file_id = node.patch_path
356
+ # it might be changed... then what?
357
+ if (
358
+ file_id not in self.file_diff["deleted"]
359
+ and file_id in self.saved_files
360
+ and source_file.parse_file_type in parse_file_type_to_key
361
+ ):
362
+ # Schema files should already be updated if this comes from a node,
363
+ # but this code is also called when updating groups and exposures.
364
+ # This might save the old schema file element, so when the schema file
365
+ # is processed, it should overwrite it by passing True to "merge_patch"
366
+ schema_file = self.saved_files[file_id]
367
+ dict_key = parse_file_type_to_key[source_file.parse_file_type]
368
+ # look for a matching list dictionary
369
+ elem_patch = None
370
+ if dict_key in schema_file.dict_from_yaml:
371
+ for elem in schema_file.dict_from_yaml[dict_key]:
372
+ if elem["name"] == node.name:
373
+ elem_patch = elem
374
+ break
375
+ if elem_patch:
376
+ self.delete_schema_mssa_links(schema_file, dict_key, elem_patch)
377
+ self.merge_patch(schema_file, dict_key, elem_patch)
378
+ if unique_id in schema_file.node_patches:
379
+ schema_file.node_patches.remove(unique_id)
380
+ if unique_id in self.saved_manifest.disabled:
381
+ # We have a patch_path in disabled nodes with a patch so
382
+ # that we can connect the patch to the node
383
+ for node in self.saved_manifest.disabled[unique_id]:
384
+ node.patch_path = None
385
+
386
+ def update_macro_in_saved(self, new_source_file, old_source_file):
387
+ if self.already_scheduled_for_parsing(old_source_file):
388
+ return
389
+ self.handle_macro_file_links(old_source_file, follow_references=True)
390
+ file_id = new_source_file.file_id
391
+ self.saved_files[file_id] = deepcopy(new_source_file)
392
+ self.add_to_pp_files(new_source_file)
393
+
394
+ def update_doc_in_saved(self, new_source_file, old_source_file):
395
+ if self.already_scheduled_for_parsing(old_source_file):
396
+ return
397
+ self.delete_doc_node(old_source_file)
398
+ self.saved_files[new_source_file.file_id] = deepcopy(new_source_file)
399
+ self.add_to_pp_files(new_source_file)
400
+
401
+ def update_fixture_in_saved(self, new_source_file, old_source_file):
402
+ if self.already_scheduled_for_parsing(old_source_file):
403
+ return
404
+ self.delete_fixture_node(old_source_file)
405
+ self.saved_files[new_source_file.file_id] = deepcopy(new_source_file)
406
+ self.add_to_pp_files(new_source_file)
407
+
408
+ def remove_mssat_file(self, source_file: AnySourceFile):
409
+ # nodes [unique_ids] -- SQL files
410
+ # There should always be a node for a SQL file
411
+ if not isinstance(source_file, SourceFile) or not source_file.nodes:
412
+ return
413
+ # There is generally only 1 node for SQL files, except for macros and snapshots
414
+ for unique_id in source_file.nodes:
415
+ self.remove_node_in_saved(source_file, unique_id)
416
+ self.schedule_referencing_nodes_for_parsing(unique_id)
417
+
418
+ # We need to re-parse nodes that reference another removed node
419
+ def schedule_referencing_nodes_for_parsing(self, unique_id):
420
+ # Look at "children", i.e. nodes that reference this node
421
+ if unique_id in self.saved_manifest.child_map:
422
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
423
+
424
+ def schedule_nodes_for_parsing(self, unique_ids):
425
+ for unique_id in unique_ids:
426
+ if unique_id in self.saved_manifest.nodes:
427
+ node = self.saved_manifest.nodes[unique_id]
428
+ if node.resource_type == NodeType.Test and node.test_node_type == "generic":
429
+ # test nodes are handled separately. Must be removed from schema file
430
+ continue
431
+ file_id = node.file_id
432
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
433
+ source_file = self.saved_files[file_id]
434
+ self.remove_mssat_file(source_file)
435
+ # content of non-schema files is only in new files
436
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
437
+ self.add_to_pp_files(self.saved_files[file_id])
438
+ elif unique_id in self.saved_manifest.sources:
439
+ source = self.saved_manifest.sources[unique_id]
440
+ self._schedule_for_parsing(
441
+ "sources", source, source.source_name, self.delete_schema_source
442
+ )
443
+ elif unique_id in self.saved_manifest.exposures:
444
+ exposure = self.saved_manifest.exposures[unique_id]
445
+ self._schedule_for_parsing(
446
+ "exposures", exposure, exposure.name, self.delete_schema_exposure
447
+ )
448
+ elif unique_id in self.saved_manifest.metrics:
449
+ metric = self.saved_manifest.metrics[unique_id]
450
+ self._schedule_for_parsing(
451
+ "metrics", metric, metric.name, self.delete_schema_metric
452
+ )
453
+ elif unique_id in self.saved_manifest.semantic_models:
454
+ semantic_model = self.saved_manifest.semantic_models[unique_id]
455
+ self._schedule_for_parsing(
456
+ "semantic_models",
457
+ semantic_model,
458
+ semantic_model.name,
459
+ self.delete_schema_semantic_model,
460
+ )
461
+ elif unique_id in self.saved_manifest.saved_queries:
462
+ saved_query = self.saved_manifest.saved_queries[unique_id]
463
+ self._schedule_for_parsing(
464
+ "saved_queries", saved_query, saved_query.name, self.delete_schema_saved_query
465
+ )
466
+ elif unique_id in self.saved_manifest.macros:
467
+ macro = self.saved_manifest.macros[unique_id]
468
+ file_id = macro.file_id
469
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
470
+ source_file = self.saved_files[file_id]
471
+ self.delete_macro_file(source_file)
472
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
473
+ self.add_to_pp_files(self.saved_files[file_id])
474
+ elif unique_id in self.saved_manifest.unit_tests:
475
+ unit_test = self.saved_manifest.unit_tests[unique_id]
476
+ self._schedule_for_parsing(
477
+ "unit_tests", unit_test, unit_test.name, self.delete_schema_unit_test
478
+ )
479
+
480
+ def _schedule_for_parsing(self, dict_key: str, element, name, delete: Callable) -> None:
481
+ file_id = element.file_id
482
+ if (
483
+ file_id in self.saved_files
484
+ and file_id not in self.file_diff["deleted"]
485
+ and file_id not in self.file_diff["deleted_schema_files"]
486
+ ):
487
+ schema_file = self.saved_files[file_id]
488
+ elements = []
489
+ assert isinstance(schema_file, SchemaSourceFile)
490
+ if dict_key in schema_file.dict_from_yaml:
491
+ elements = schema_file.dict_from_yaml[dict_key]
492
+ schema_element = self.get_schema_element(elements, name)
493
+ if schema_element:
494
+ delete(schema_file, schema_element)
495
+ self.merge_patch(schema_file, dict_key, schema_element)
496
+
497
+ def delete_macro_file(self, source_file, follow_references=False):
498
+ self.check_for_special_deleted_macros(source_file)
499
+ self.handle_macro_file_links(source_file, follow_references)
500
+ file_id = source_file.file_id
501
+ # It's not clear when this file_id would not exist in saved_files
502
+ if file_id in self.saved_files:
503
+ self.saved_files.pop(file_id)
504
+
505
+ def check_for_special_deleted_macros(self, source_file):
506
+ for unique_id in source_file.macros:
507
+ if unique_id in self.saved_manifest.macros:
508
+ package_name = unique_id.split(".")[1]
509
+ if package_name == "dbt":
510
+ continue
511
+ macro = self.saved_manifest.macros[unique_id]
512
+ if macro.name in special_override_macros:
513
+ self.deleted_special_override_macro = True
514
+
515
+ def recursively_gather_macro_references(self, macro_unique_id, referencing_nodes):
516
+ for unique_id in self.macro_child_map[macro_unique_id]:
517
+ if unique_id in referencing_nodes:
518
+ continue
519
+ referencing_nodes.append(unique_id)
520
+ if unique_id.startswith("macro."):
521
+ self.recursively_gather_macro_references(unique_id, referencing_nodes)
522
+
523
+ def handle_macro_file_links(self, source_file, follow_references=False):
524
+ # remove the macros in the 'macros' dictionary
525
+ macros = source_file.macros.copy()
526
+ for unique_id in macros:
527
+ if unique_id not in self.saved_manifest.macros:
528
+ # This happens when a macro has already been removed
529
+ if unique_id in source_file.macros:
530
+ source_file.macros.remove(unique_id)
531
+ continue
532
+
533
+ base_macro = self.saved_manifest.macros.pop(unique_id)
534
+
535
+ # Recursively check children of this macro
536
+ # The macro_child_map might not exist if a macro is removed by
537
+ # schedule_nodes_for parsing. We only want to follow
538
+ # references if the macro file itself has been updated or
539
+ # deleted, not if we're just updating referenced nodes.
540
+ if self.macro_child_map and follow_references:
541
+ referencing_nodes = []
542
+ self.recursively_gather_macro_references(unique_id, referencing_nodes)
543
+ self.schedule_macro_nodes_for_parsing(referencing_nodes)
544
+
545
+ if base_macro.patch_path:
546
+ file_id = base_macro.patch_path
547
+ if file_id in self.saved_files:
548
+ schema_file = self.saved_files[file_id]
549
+ macro_patches = []
550
+ if "macros" in schema_file.dict_from_yaml:
551
+ macro_patches = schema_file.dict_from_yaml["macros"]
552
+ macro_patch = self.get_schema_element(macro_patches, base_macro.name)
553
+ self.delete_schema_macro_patch(schema_file, macro_patch)
554
+ self.merge_patch(schema_file, "macros", macro_patch)
555
+ # The macro may have already been removed by handling macro children
556
+ if unique_id in source_file.macros:
557
+ source_file.macros.remove(unique_id)
558
+
559
+ # similar to schedule_nodes_for_parsing but doesn't do sources and exposures
560
+ # and handles schema tests
561
+ def schedule_macro_nodes_for_parsing(self, unique_ids):
562
+ for unique_id in unique_ids:
563
+ if unique_id in self.saved_manifest.nodes:
564
+ node = self.saved_manifest.nodes[unique_id]
565
+ # Both generic tests from yaml files and singular tests have NodeType.Test
566
+ # so check for generic test.
567
+ if node.resource_type == NodeType.Test and node.test_node_type == "generic":
568
+ schema_file_id = node.file_id
569
+ schema_file = self.saved_manifest.files[schema_file_id]
570
+ (key, name) = schema_file.get_key_and_name_for_test(node.unique_id)
571
+ if key and name:
572
+ patch_list = []
573
+ if key in schema_file.dict_from_yaml:
574
+ patch_list = schema_file.dict_from_yaml[key]
575
+ patch = self.get_schema_element(patch_list, name)
576
+ if patch:
577
+ if key in ["models", "seeds", "snapshots"]:
578
+ self.delete_schema_mssa_links(schema_file, key, patch)
579
+ self.merge_patch(schema_file, key, patch)
580
+ if unique_id in schema_file.node_patches:
581
+ schema_file.node_patches.remove(unique_id)
582
+ elif key == "sources":
583
+ # re-schedule source
584
+ if "overrides" in patch:
585
+ # This is a source patch; need to re-parse orig source
586
+ self.remove_source_override_target(patch)
587
+ self.delete_schema_source(schema_file, patch)
588
+ self.merge_patch(schema_file, "sources", patch)
589
+ else:
590
+ file_id = node.file_id
591
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
592
+ source_file = self.saved_files[file_id]
593
+ self.remove_mssat_file(source_file)
594
+ # content of non-schema files is only in new files
595
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
596
+ self.add_to_pp_files(self.saved_files[file_id])
597
+ elif unique_id in self.saved_manifest.macros:
598
+ macro = self.saved_manifest.macros[unique_id]
599
+ file_id = macro.file_id
600
+ if file_id in self.saved_files and file_id not in self.file_diff["deleted"]:
601
+ source_file = self.saved_files[file_id]
602
+ self.delete_macro_file(source_file)
603
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
604
+ self.add_to_pp_files(self.saved_files[file_id])
605
+
606
+ def delete_doc_node(self, source_file):
607
+ # remove the nodes in the 'docs' dictionary
608
+ docs = source_file.docs.copy()
609
+ for unique_id in docs:
610
+ self.saved_manifest.docs.pop(unique_id)
611
+ source_file.docs.remove(unique_id)
612
+ # The unique_id of objects that contain a doc call are stored in the
613
+ # doc source_file.nodes
614
+ self.schedule_nodes_for_parsing(source_file.nodes)
615
+ source_file.nodes = []
616
+ # Remove the file object
617
+ self.saved_manifest.files.pop(source_file.file_id)
618
+
619
+ def delete_fixture_node(self, source_file):
620
+ # remove fixtures from the "fixtures" dictionary
621
+ fixture_unique_id = source_file.fixture
622
+ self.saved_manifest.fixtures.pop(fixture_unique_id)
623
+ unit_tests = source_file.unit_tests.copy()
624
+ for unique_id in unit_tests:
625
+ unit_test = self.saved_manifest.unit_tests.pop(unique_id)
626
+ # schedule unit_test for parsing
627
+ self._schedule_for_parsing(
628
+ "unit_tests", unit_test, unit_test.name, self.delete_schema_unit_test
629
+ )
630
+ source_file.unit_tests.remove(unique_id)
631
+ self.saved_manifest.files.pop(source_file.file_id)
632
+
633
+ # Schema files -----------------------
634
+ # Changed schema files
635
+ def change_schema_file(self, file_id):
636
+ saved_schema_file = self.saved_files[file_id]
637
+ new_schema_file = deepcopy(self.new_files[file_id])
638
+ saved_yaml_dict = saved_schema_file.dict_from_yaml
639
+ new_yaml_dict = new_schema_file.dict_from_yaml
640
+ if saved_schema_file.pp_dict is None:
641
+ saved_schema_file.pp_dict = {}
642
+ self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
643
+
644
+ # copy from new schema_file to saved_schema_file to preserve references
645
+ # that weren't removed
646
+ saved_schema_file.contents = new_schema_file.contents
647
+ saved_schema_file.checksum = new_schema_file.checksum
648
+ saved_schema_file.dfy = new_schema_file.dfy
649
+ # schedule parsing
650
+ self.add_to_pp_files(saved_schema_file)
651
+ # schema_file pp_dict should have been generated already
652
+ fire_event(PartialParsingFile(operation="updated", file_id=file_id))
653
+
654
+ # Delete schema files -- a variation on change_schema_file
655
+ def delete_schema_file(self, file_id):
656
+ saved_schema_file = self.saved_files[file_id]
657
+ saved_yaml_dict = saved_schema_file.dict_from_yaml
658
+ new_yaml_dict = {}
659
+ self.handle_schema_file_changes(saved_schema_file, saved_yaml_dict, new_yaml_dict)
660
+ self.saved_manifest.files.pop(file_id)
661
+
662
+ # For each key in a schema file dictionary, process the changed, deleted, and added
663
+ # elements for the key lists
664
+ def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict):
665
+ # loop through comparing previous dict_from_yaml with current dict_from_yaml
666
+ # Need to do the deleted/added/changed thing, just like the files lists
667
+
668
+ env_var_changes = {}
669
+ if schema_file.file_id in self.env_vars_changed_schema_files:
670
+ env_var_changes = self.env_vars_changed_schema_files[schema_file.file_id]
671
+
672
+ # models, seeds, snapshots, analyses
673
+ for dict_key in ["models", "seeds", "snapshots", "analyses"]:
674
+ key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
675
+ if key_diff["changed"]:
676
+ for elem in key_diff["changed"]:
677
+ if dict_key == "snapshots" and "relation" in elem:
678
+ self.delete_yaml_snapshot(schema_file, elem)
679
+ self.delete_schema_mssa_links(schema_file, dict_key, elem)
680
+ self.merge_patch(schema_file, dict_key, elem, True)
681
+ if key_diff["deleted"]:
682
+ for elem in key_diff["deleted"]:
683
+ if dict_key == "snapshots" and "relation" in elem:
684
+ self.delete_yaml_snapshot(schema_file, elem)
685
+ self.delete_schema_mssa_links(schema_file, dict_key, elem)
686
+ if key_diff["added"]:
687
+ for elem in key_diff["added"]:
688
+ if dict_key == "models" and "versions" in elem:
689
+ self.versioned_model_delete_schema_mssa_links(schema_file, dict_key, elem)
690
+ self.merge_patch(schema_file, dict_key, elem, True)
691
+ # Handle schema file updates due to env_var changes
692
+ if dict_key in env_var_changes and dict_key in new_yaml_dict:
693
+ for name in env_var_changes[dict_key]:
694
+ if name in key_diff["changed_or_deleted_names"]:
695
+ continue
696
+ elem = self.get_schema_element(new_yaml_dict[dict_key], name)
697
+ if elem:
698
+ if dict_key == "snapshots" and "relation" in elem:
699
+ self.delete_yaml_snapshot(schema_file, elem)
700
+ self.delete_schema_mssa_links(schema_file, dict_key, elem)
701
+ self.merge_patch(schema_file, dict_key, elem, True)
702
+
703
+ # sources
704
+ dict_key = "sources"
705
+ source_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
706
+ if source_diff["changed"]:
707
+ for source in source_diff["changed"]:
708
+ if "overrides" in source: # This is a source patch; need to re-parse orig source
709
+ self.remove_source_override_target(source)
710
+ self.delete_schema_source(schema_file, source)
711
+ self.merge_patch(schema_file, dict_key, source, True)
712
+ if source_diff["deleted"]:
713
+ for source in source_diff["deleted"]:
714
+ if "overrides" in source: # This is a source patch; need to re-parse orig source
715
+ self.remove_source_override_target(source)
716
+ self.delete_schema_source(schema_file, source)
717
+ if source_diff["added"]:
718
+ for source in source_diff["added"]:
719
+ if "overrides" in source: # This is a source patch; need to re-parse orig source
720
+ self.remove_source_override_target(source)
721
+ self.merge_patch(schema_file, dict_key, source, True)
722
+ # Handle schema file updates due to env_var changes
723
+ if dict_key in env_var_changes and dict_key in new_yaml_dict:
724
+ for name in env_var_changes[dict_key]:
725
+ if name in source_diff["changed_or_deleted_names"]:
726
+ continue
727
+ source = self.get_schema_element(new_yaml_dict[dict_key], name)
728
+ if source:
729
+ if "overrides" in source:
730
+ self.remove_source_override_target(source)
731
+ self.delete_schema_source(schema_file, source)
732
+ self.merge_patch(schema_file, dict_key, source, True)
733
+
734
+ def handle_change(key: str, delete: Callable):
735
+ self._handle_element_change(
736
+ schema_file, saved_yaml_dict, new_yaml_dict, env_var_changes, key, delete
737
+ )
738
+
739
+ handle_change("macros", self.delete_schema_macro_patch)
740
+ handle_change("exposures", self.delete_schema_exposure)
741
+ handle_change("metrics", self.delete_schema_metric)
742
+ handle_change("groups", self.delete_schema_group)
743
+ handle_change("semantic_models", self.delete_schema_semantic_model)
744
+ handle_change("unit_tests", self.delete_schema_unit_test)
745
+ handle_change("saved_queries", self.delete_schema_saved_query)
746
+ handle_change("data_tests", self.delete_schema_data_test_patch)
747
+
748
+ def _handle_element_change(
749
+ self, schema_file, saved_yaml_dict, new_yaml_dict, env_var_changes, dict_key: str, delete
750
+ ):
751
+ element_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict)
752
+ if element_diff["changed"]:
753
+ for element in element_diff["changed"]:
754
+ delete(schema_file, element)
755
+ self.merge_patch(schema_file, dict_key, element, True)
756
+ if element_diff["deleted"]:
757
+ for element in element_diff["deleted"]:
758
+ delete(schema_file, element)
759
+ if element_diff["added"]:
760
+ for element in element_diff["added"]:
761
+ self.merge_patch(schema_file, dict_key, element, True)
762
+ # Handle schema file updates due to env_var changes
763
+ if dict_key in env_var_changes and dict_key in new_yaml_dict:
764
+ for name in env_var_changes[dict_key]:
765
+ if name in element_diff["changed_or_deleted_names"]:
766
+ continue
767
+ elem = self.get_schema_element(new_yaml_dict[dict_key], name)
768
+ if elem:
769
+ delete(schema_file, elem)
770
+ self.merge_patch(schema_file, dict_key, elem, True)
771
+
772
+ # Take a "section" of the schema file yaml dictionary from saved and new schema files
773
+ # and determine which parts have changed
774
+ def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict):
775
+ if key in saved_yaml_dict or key in new_yaml_dict:
776
+ saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else []
777
+ new_elements = new_yaml_dict[key] if key in new_yaml_dict else []
778
+ else:
779
+ return {"deleted": [], "added": [], "changed": []}
780
+ # for each set of keys, need to create a dictionary of names pointing to entry
781
+ saved_elements_by_name = {}
782
+ new_elements_by_name = {}
783
+ # sources have two part names?
784
+ for element in saved_elements:
785
+ saved_elements_by_name[element["name"]] = element
786
+ for element in new_elements:
787
+ new_elements_by_name[element["name"]] = element
788
+
789
+ # now determine which elements, by name, are added, deleted or changed
790
+ saved_element_names = set(saved_elements_by_name.keys())
791
+ new_element_names = set(new_elements_by_name.keys())
792
+ deleted = saved_element_names.difference(new_element_names)
793
+ added = new_element_names.difference(saved_element_names)
794
+ common = saved_element_names.intersection(new_element_names)
795
+ changed = []
796
+ for element_name in common:
797
+ if saved_elements_by_name[element_name] != new_elements_by_name[element_name]:
798
+ changed.append(element_name)
799
+
800
+ # make lists of yaml elements to return as diffs
801
+ deleted_elements = [saved_elements_by_name[name].copy() for name in deleted]
802
+ added_elements = [new_elements_by_name[name].copy() for name in added]
803
+ changed_elements = [new_elements_by_name[name].copy() for name in changed]
804
+
805
+ diff = {
806
+ "deleted": deleted_elements,
807
+ "added": added_elements,
808
+ "changed": changed_elements,
809
+ "changed_or_deleted_names": list(changed) + list(deleted),
810
+ }
811
+ return diff
812
+
813
+ # Merge a patch file into the pp_dict in a schema file. The "new_patch"
814
+ # flag indicates that we're processing a schema file, so if a matching
815
+ # patch has already been scheduled, replace it.
816
+ def merge_patch(self, schema_file, key, patch, new_patch=False):
817
+ if schema_file.pp_dict is None:
818
+ schema_file.pp_dict = {}
819
+ pp_dict = schema_file.pp_dict
820
+ if key not in pp_dict:
821
+ pp_dict[key] = [patch]
822
+ else:
823
+ # check that this patch hasn't already been saved
824
+ found_elem = None
825
+ for elem in pp_dict[key]:
826
+ if elem["name"] == patch["name"]:
827
+ found_elem = elem
828
+ if not found_elem:
829
+ pp_dict[key].append(patch)
830
+ elif found_elem and new_patch:
831
+ # remove patch and replace with new one
832
+ pp_dict[key].remove(found_elem)
833
+ pp_dict[key].append(patch)
834
+
835
+ schema_file.delete_from_env_vars(key, patch["name"])
836
+ schema_file.delete_from_unrendered_configs(key, patch["name"])
837
+ self.add_to_pp_files(schema_file)
838
+
839
+ # For model, seed, snapshot, analysis schema dictionary keys,
840
+ # delete the patches and tests from the patch
841
+ def delete_schema_mssa_links(self, schema_file, dict_key, elem) -> None:
842
+ # find elem node unique_id in node_patches
843
+ prefix = key_to_prefix[dict_key]
844
+ elem_unique_ids = []
845
+ for unique_id in schema_file.node_patches:
846
+ if not unique_id.startswith(prefix):
847
+ continue
848
+ parts = unique_id.split(".")
849
+ elem_name = parts[2]
850
+ if elem_name == elem["name"]:
851
+ elem_unique_ids.append(unique_id)
852
+ self._delete_schema_mssa_links(schema_file, dict_key, elem, elem_unique_ids)
853
+
854
+ def versioned_model_delete_schema_mssa_links(self, schema_file, dict_key, elem) -> None:
855
+ elem_unique_ids = []
856
+ # We need to look up possible existing models that this new or modified patch applies to
857
+ unique_id = f"model.{schema_file.project_name}.{elem['name']}"
858
+ if unique_id in self.saved_manifest.nodes:
859
+ elem_unique_ids.append(unique_id)
860
+ if not elem_unique_ids:
861
+ return
862
+ self._delete_schema_mssa_links(schema_file, dict_key, elem, elem_unique_ids)
863
+
864
+ def _delete_schema_mssa_links(self, schema_file, dict_key, elem, elem_unique_ids):
865
+ # remove elem node and remove unique_id from node_patches
866
+ for elem_unique_id in elem_unique_ids:
867
+ # might have been already removed
868
+ # For all-yaml snapshots, we don't do this, since the node
869
+ # should have already been removed.
870
+ if (
871
+ elem_unique_id in self.saved_manifest.nodes
872
+ or elem_unique_id in self.saved_manifest.disabled
873
+ ):
874
+ nodes: List[Union[ModelNode, SeedNode, SnapshotNode, AnalysisNode]] = []
875
+ if elem_unique_id in self.saved_manifest.nodes:
876
+ nodes = [self.saved_manifest.nodes.pop(elem_unique_id)] # type: ignore[list-item]
877
+ else:
878
+ # The value of disabled items is a list of nodes
879
+ nodes = self.saved_manifest.disabled.pop(elem_unique_id) # type: ignore[assignment]
880
+ # need to add the node source_file to pp_files
881
+ for node in nodes:
882
+ file_id = node.file_id
883
+ # need to copy new file to saved files in order to get content
884
+ if file_id in self.new_files:
885
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
886
+ if self.saved_files[file_id]:
887
+ source_file = self.saved_files[file_id]
888
+ self.add_to_pp_files(source_file)
889
+ # if the node's group has changed - need to reparse all referencing nodes to ensure valid ref access
890
+ if node.group != elem.get("group"):
891
+ self.schedule_referencing_nodes_for_parsing(node.unique_id)
892
+ # If the latest version has changed, a version has been removed, or a version has been added,
893
+ # we need to reparse referencing nodes.
894
+ if node.is_versioned or elem.get("versions"):
895
+ self.schedule_referencing_nodes_for_parsing(node.unique_id)
896
+ # remove from patches
897
+ # For versioned models, the schedule_referencing_nodes_for_parsing call above
898
+ # could have caused a recursive visit to this file.
899
+ if elem_unique_id in schema_file.node_patches:
900
+ schema_file.node_patches.remove(elem_unique_id)
901
+
902
+ # for models, seeds, snapshots (not analyses)
903
+ if dict_key in ["models", "seeds", "snapshots"]:
904
+ # find related tests and remove them
905
+ self.remove_tests(schema_file, dict_key, elem["name"])
906
+
907
+ def remove_tests(self, schema_file, dict_key, name):
908
+ tests = schema_file.get_tests(dict_key, name)
909
+ for test_unique_id in tests:
910
+ if test_unique_id in self.saved_manifest.nodes:
911
+ self.saved_manifest.nodes.pop(test_unique_id)
912
+ schema_file.remove_tests(dict_key, name)
913
+ # We also need to remove tests in other schema files that
914
+ # reference this node.
915
+ unique_id = f"{key_to_prefix[dict_key]}.{schema_file.project_name}.{name}"
916
+ if unique_id in self.saved_manifest.child_map:
917
+ for child_id in self.saved_manifest.child_map[unique_id]:
918
+ if child_id.startswith("test") and child_id in self.saved_manifest.nodes:
919
+ child_test = self.saved_manifest.nodes[child_id]
920
+ if child_test.attached_node:
921
+ if child_test.attached_node in self.saved_manifest.nodes:
922
+ attached_node = self.saved_manifest.nodes[child_test.attached_node]
923
+ self.update_in_saved(attached_node.file_id)
924
+
925
+ def delete_yaml_snapshot(self, schema_file, snapshot_dict):
926
+ snapshot_name = snapshot_dict["name"]
927
+ snapshots = schema_file.snapshots.copy()
928
+ for unique_id in snapshots:
929
+ if unique_id in self.saved_manifest.nodes:
930
+ snapshot = self.saved_manifest.nodes[unique_id]
931
+ if snapshot.name == snapshot_name:
932
+ self.saved_manifest.nodes.pop(unique_id)
933
+ schema_file.snapshots.remove(unique_id)
934
+ elif unique_id in self.saved_manifest.disabled:
935
+ self.delete_disabled(unique_id, schema_file.file_id)
936
+ schema_file.snapshots.remove(unique_id)
937
+
938
+ def delete_schema_source(self, schema_file, source_dict):
939
+ # both patches, tests, and source nodes
940
+ source_name = source_dict["name"]
941
+ # There may be multiple sources for each source dict, since
942
+ # there will be a separate source node for each table.
943
+ # SourceDefinition name = table name, dict name is source_name
944
+ sources = schema_file.sources.copy()
945
+ for unique_id in sources:
946
+ if unique_id in self.saved_manifest.sources:
947
+ source = self.saved_manifest.sources[unique_id]
948
+ if source.source_name == source_name:
949
+ source = self.saved_manifest.sources.pop(unique_id)
950
+ schema_file.sources.remove(unique_id)
951
+ self.schedule_referencing_nodes_for_parsing(unique_id)
952
+
953
+ self.remove_tests(schema_file, "sources", source_name)
954
+
955
+ def delete_schema_macro_patch(self, schema_file, macro):
956
+ # This is just macro patches that need to be reapplied
957
+ macro_unique_id = None
958
+ if macro["name"] in schema_file.macro_patches:
959
+ macro_unique_id = schema_file.macro_patches[macro["name"]]
960
+ del schema_file.macro_patches[macro["name"]]
961
+ # Need to delete all macros in the same file
962
+ # and then reapply all schema file updates for those macros
963
+ if macro_unique_id and macro_unique_id in self.saved_manifest.macros:
964
+ macro = self.saved_manifest.macros.pop(macro_unique_id)
965
+ macro_file_id = macro.file_id
966
+ if macro_file_id in self.new_files:
967
+ source_file = self.saved_files[macro_file_id]
968
+ self.delete_macro_file(source_file)
969
+ self.saved_files[macro_file_id] = deepcopy(self.new_files[macro_file_id])
970
+ self.add_to_pp_files(self.saved_files[macro_file_id])
971
+
972
+ def delete_schema_data_test_patch(self, schema_file, data_test):
973
+ data_test_unique_id = None
974
+ for unique_id in schema_file.node_patches:
975
+ if not unique_id.startswith("test"):
976
+ continue
977
+ parts = unique_id.split(".")
978
+ elem_name = parts[2]
979
+ if elem_name == data_test["name"]:
980
+ data_test_unique_id = unique_id
981
+ break
982
+ if data_test_unique_id and data_test_unique_id in self.saved_manifest.nodes:
983
+ singular_data_test = self.saved_manifest.nodes.pop(data_test_unique_id)
984
+ file_id = singular_data_test.file_id
985
+ if file_id in self.new_files:
986
+ self.saved_files[file_id] = deepcopy(self.new_files[file_id])
987
+ self.add_to_pp_files(self.saved_files[file_id])
988
+
989
+ # exposures are created only from schema files, so just delete
990
+ # the exposure or the disabled exposure.
991
+ def delete_schema_exposure(self, schema_file, exposure_dict):
992
+ exposure_name = exposure_dict["name"]
993
+ exposures = schema_file.exposures.copy()
994
+ for unique_id in exposures:
995
+ if unique_id in self.saved_manifest.exposures:
996
+ exposure = self.saved_manifest.exposures[unique_id]
997
+ if exposure.name == exposure_name:
998
+ self.saved_manifest.exposures.pop(unique_id)
999
+ schema_file.exposures.remove(unique_id)
1000
+ elif unique_id in self.saved_manifest.disabled:
1001
+ self.delete_disabled(unique_id, schema_file.file_id)
1002
+
1003
+ # groups are created only from schema files, so just delete the group
1004
+ def delete_schema_group(self, schema_file, group_dict):
1005
+ group_name = group_dict["name"]
1006
+ groups = schema_file.groups.copy()
1007
+ for unique_id in groups:
1008
+ if unique_id in self.saved_manifest.groups:
1009
+ group = self.saved_manifest.groups[unique_id]
1010
+ if group.name == group_name:
1011
+ self.schedule_nodes_for_parsing(self.saved_manifest.group_map[group.name])
1012
+ self.saved_manifest.groups.pop(unique_id)
1013
+ schema_file.groups.remove(unique_id)
1014
+
1015
+ # metrics are created only from schema files, but also can be referred to by other nodes
1016
+ def delete_schema_metric(self, schema_file, metric_dict):
1017
+ metric_name = metric_dict["name"]
1018
+ metrics = schema_file.metrics.copy()
1019
+ for unique_id in metrics:
1020
+ if unique_id in self.saved_manifest.metrics:
1021
+ metric = self.saved_manifest.metrics[unique_id]
1022
+ if metric.name == metric_name:
1023
+ # Need to find everything that referenced this metric and schedule for parsing
1024
+ if unique_id in self.saved_manifest.child_map:
1025
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
1026
+ self.saved_manifest.metrics.pop(unique_id)
1027
+ schema_file.metrics.remove(unique_id)
1028
+ elif unique_id in self.saved_manifest.disabled:
1029
+ self.delete_disabled(unique_id, schema_file.file_id)
1030
+
1031
+ def delete_schema_saved_query(self, schema_file, saved_query_dict):
1032
+ saved_query_name = saved_query_dict["name"]
1033
+ saved_queries = schema_file.saved_queries.copy()
1034
+ for unique_id in saved_queries:
1035
+ if unique_id in self.saved_manifest.saved_queries:
1036
+ saved_query = self.saved_manifest.saved_queries[unique_id]
1037
+ if saved_query.name == saved_query_name:
1038
+ # Need to find everything that referenced this saved_query and schedule for parsing
1039
+ if unique_id in self.saved_manifest.child_map:
1040
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
1041
+ self.saved_manifest.saved_queries.pop(unique_id)
1042
+ elif unique_id in self.saved_manifest.disabled:
1043
+ self.delete_disabled(unique_id, schema_file.file_id)
1044
+
1045
+ def delete_schema_semantic_model(self, schema_file, semantic_model_dict):
1046
+ semantic_model_name = semantic_model_dict["name"]
1047
+ semantic_models = schema_file.semantic_models.copy()
1048
+ for unique_id in semantic_models:
1049
+ if unique_id in self.saved_manifest.semantic_models:
1050
+ semantic_model = self.saved_manifest.semantic_models[unique_id]
1051
+ if semantic_model.name == semantic_model_name:
1052
+ # Need to find everything that referenced this semantic model and schedule for parsing
1053
+ if unique_id in self.saved_manifest.child_map:
1054
+ self.schedule_nodes_for_parsing(self.saved_manifest.child_map[unique_id])
1055
+ self.saved_manifest.semantic_models.pop(unique_id)
1056
+ schema_file.semantic_models.remove(unique_id)
1057
+ elif unique_id in self.saved_manifest.disabled:
1058
+ self.delete_disabled(unique_id, schema_file.file_id)
1059
+
1060
+ if schema_file.generated_metrics:
1061
+ # If this partial parse file has an old "generated_metrics" list,
1062
+ # call code to fix it up before processing.
1063
+ schema_file.fix_metrics_from_measures()
1064
+ if semantic_model_name in schema_file.metrics_from_measures:
1065
+ for unique_id in schema_file.metrics_from_measures[semantic_model_name]:
1066
+ if unique_id in self.saved_manifest.metrics:
1067
+ self.saved_manifest.metrics.pop(unique_id)
1068
+ elif unique_id in self.saved_manifest.disabled:
1069
+ self.delete_disabled(unique_id, schema_file.file_id)
1070
+ del schema_file.metrics_from_measures[semantic_model_name]
1071
+
1072
+ def delete_schema_unit_test(self, schema_file, unit_test_dict):
1073
+ unit_test_name = unit_test_dict["name"]
1074
+ unit_tests = schema_file.unit_tests.copy()
1075
+ for unique_id in unit_tests:
1076
+ if unique_id in self.saved_manifest.unit_tests:
1077
+ unit_test = self.saved_manifest.unit_tests[unique_id]
1078
+ if unit_test.name == unit_test_name:
1079
+ self.saved_manifest.unit_tests.pop(unique_id)
1080
+ schema_file.unit_tests.remove(unique_id)
1081
+ # No disabled unit tests yet
1082
+
1083
+ def get_schema_element(self, elem_list, elem_name):
1084
+ for element in elem_list:
1085
+ if "name" in element and element["name"] == elem_name:
1086
+ return element
1087
+ return None
1088
+
1089
+ def get_schema_file_for_source(self, package_name, source_name):
1090
+ schema_file = None
1091
+ for source in self.saved_manifest.sources.values():
1092
+ if source.package_name == package_name and source.source_name == source_name:
1093
+ file_id = source.file_id
1094
+ if file_id in self.saved_files:
1095
+ schema_file = self.saved_files[file_id]
1096
+ break
1097
+ return schema_file
1098
+
1099
+ def get_source_override_file_and_dict(self, source):
1100
+ package = source["overrides"]
1101
+ source_name = source["name"]
1102
+ orig_source_schema_file = self.get_schema_file_for_source(package, source_name)
1103
+ orig_sources = orig_source_schema_file.dict_from_yaml["sources"]
1104
+ orig_source = self.get_schema_element(orig_sources, source_name)
1105
+ return (orig_source_schema_file, orig_source)
1106
+
1107
+ def remove_source_override_target(self, source_dict):
1108
+ (orig_file, orig_source) = self.get_source_override_file_and_dict(source_dict)
1109
+ if orig_source:
1110
+ self.delete_schema_source(orig_file, orig_source)
1111
+ self.merge_patch(orig_file, "sources", orig_source)
1112
+ self.add_to_pp_files(orig_file)
1113
+
1114
+ # This builds a dictionary of files that need to be scheduled for parsing
1115
+ # because the env var has changed.
1116
+ # source_files
1117
+ # env_vars_changed_source_files: [file_id, file_id...]
1118
+ # schema_files
1119
+ # env_vars_changed_schema_files: {file_id: {"yaml_key": [name, ..]}}
1120
+ def build_env_vars_to_files(self):
1121
+ unchanged_vars = []
1122
+ changed_vars = []
1123
+ delete_vars = []
1124
+ # Check whether the env_var has changed and add it to
1125
+ # an unchanged or changed list
1126
+ for env_var in self.saved_manifest.env_vars:
1127
+ prev_value = self.saved_manifest.env_vars[env_var]
1128
+ current_value = os.getenv(env_var)
1129
+ if current_value is None:
1130
+ # This will be true when depending on the default value.
1131
+ # We store env vars set by defaults as a static string so we can recognize they have
1132
+ # defaults. We depend on default changes triggering reparsing by file change. If
1133
+ # the file has not changed we can assume the default has not changed.
1134
+ if prev_value == DEFAULT_ENV_PLACEHOLDER:
1135
+ unchanged_vars.append(env_var)
1136
+ continue
1137
+ # env_var no longer set, remove from manifest
1138
+ delete_vars.append(env_var)
1139
+ if prev_value == current_value:
1140
+ unchanged_vars.append(env_var)
1141
+ else: # prev_value != current_value
1142
+ changed_vars.append(env_var)
1143
+ for env_var in delete_vars:
1144
+ del self.saved_manifest.env_vars[env_var]
1145
+
1146
+ env_vars_changed_source_files = []
1147
+ env_vars_changed_schema_files = {}
1148
+ # The SourceFiles contain a list of env_vars that were used in the file.
1149
+ # The SchemaSourceFiles contain a dictionary of yaml_key to schema entry names to
1150
+ # a list of vars.
1151
+ # Create a list of file_ids for source_files that need to be reparsed, and
1152
+ # a dictionary of file_ids to yaml_keys to names.
1153
+ for source_file in self.saved_files.values():
1154
+ if source_file.parse_file_type == ParseFileType.Fixture:
1155
+ continue
1156
+ file_id = source_file.file_id
1157
+ if not source_file.env_vars:
1158
+ continue
1159
+ if source_file.parse_file_type == ParseFileType.Schema:
1160
+ for yaml_key in source_file.env_vars.keys():
1161
+ for name in source_file.env_vars[yaml_key].keys():
1162
+ for env_var in source_file.env_vars[yaml_key][name]:
1163
+ if env_var in changed_vars:
1164
+ if file_id not in env_vars_changed_schema_files:
1165
+ env_vars_changed_schema_files[file_id] = {}
1166
+ if yaml_key not in env_vars_changed_schema_files[file_id]:
1167
+ env_vars_changed_schema_files[file_id][yaml_key] = []
1168
+ if name not in env_vars_changed_schema_files[file_id][yaml_key]:
1169
+ env_vars_changed_schema_files[file_id][yaml_key].append(name)
1170
+ break # if one env_var is changed we can stop
1171
+
1172
+ else:
1173
+ for env_var in source_file.env_vars:
1174
+ if env_var in changed_vars:
1175
+ env_vars_changed_source_files.append(file_id)
1176
+ break # if one env_var is changed we can stop
1177
+
1178
+ return (env_vars_changed_source_files, env_vars_changed_schema_files)