dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/parser/sources.py ADDED
@@ -0,0 +1,557 @@
1
+ import itertools
2
+ from dataclasses import replace
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
5
+
6
+ from dvt.artifacts.resources import FreshnessThreshold, SourceConfig, Time
7
+ from dvt.config import RuntimeConfig
8
+ from dvt.context.context_config import (
9
+ BaseContextConfigGenerator,
10
+ ContextConfigGenerator,
11
+ UnrenderedConfigGenerator,
12
+ )
13
+ from dvt.contracts.graph.manifest import Manifest, SourceKey
14
+ from dvt.contracts.graph.nodes import (
15
+ GenericTestNode,
16
+ SourceDefinition,
17
+ UnpatchedSourceDefinition,
18
+ )
19
+ from dvt.contracts.graph.unparsed import (
20
+ SourcePatch,
21
+ SourceTablePatch,
22
+ UnparsedColumn,
23
+ UnparsedSourceDefinition,
24
+ UnparsedSourceTableDefinition,
25
+ )
26
+ from dvt.events.types import FreshnessConfigProblem, UnusedTables, ValidationWarning
27
+ from dvt.exceptions import ParsingError
28
+ from dvt.node_types import NodeType
29
+ from dvt.parser.common import ParserRef
30
+ from dvt.parser.schema_generic_tests import SchemaGenericTestParser
31
+
32
+ from dbt.adapters.capability import Capability
33
+ from dbt.adapters.factory import get_adapter
34
+ from dbt_common.events.functions import fire_event, warn_or_error
35
+ from dbt_common.exceptions import DbtInternalError
36
+
37
+
38
+ # An UnparsedSourceDefinition is taken directly from the yaml
39
+ # file. It can affect multiple tables, all of which will eventually
40
+ # have their own source node. An UnparsedSourceDefinition will
41
+ # generate multiple UnpatchedSourceDefinition nodes (one per
42
+ # table) in the SourceParser.add_source_definitions. The
43
+ # SourcePatcher takes an UnparsedSourceDefinition and the
44
+ # SourcePatch and produces a SourceDefinition. Each
45
+ # SourcePatch can be applied to multiple UnpatchedSourceDefinitions.
46
+ class SourcePatcher:
47
+ def __init__(
48
+ self,
49
+ root_project: RuntimeConfig,
50
+ manifest: Manifest,
51
+ ) -> None:
52
+ self.root_project = root_project
53
+ self.manifest = manifest
54
+ self.generic_test_parsers: Dict[str, SchemaGenericTestParser] = {}
55
+ self.patches_used: Dict[SourceKey, Set[str]] = {}
56
+ self.sources: Dict[str, SourceDefinition] = {}
57
+ self._deprecations: Set[Any] = set()
58
+
59
+ # This method calls the 'parse_source' method which takes
60
+ # the UnpatchedSourceDefinitions in the manifest and combines them
61
+ # with SourcePatches to produce SourceDefinitions.
62
+ def construct_sources(self) -> None:
63
+ for unique_id, unpatched in self.manifest.sources.items():
64
+ schema_file = self.manifest.files[unpatched.file_id]
65
+ if isinstance(unpatched, SourceDefinition):
66
+ # In partial parsing, there will be SourceDefinitions
67
+ # which must be retained.
68
+ self.sources[unpatched.unique_id] = unpatched
69
+ continue
70
+ # returns None if there is no patch
71
+ patch = self.get_patch_for(unpatched)
72
+
73
+ # returns unpatched if there is no patch
74
+ patched = self.patch_source(unpatched, patch)
75
+
76
+ # now use the patched UnpatchedSourceDefinition to extract test data.
77
+ for test in self.get_source_tests(patched):
78
+ if test.config.enabled:
79
+ self.manifest.add_node_nofile(test)
80
+ else:
81
+ self.manifest.add_disabled_nofile(test)
82
+ # save the test unique_id in the schema_file, so we can
83
+ # process in partial parsing
84
+ test_from = {"key": "sources", "name": patched.source.name}
85
+ schema_file.add_test(test.unique_id, test_from)
86
+
87
+ # Convert UnpatchedSourceDefinition to a SourceDefinition
88
+ parsed = self.parse_source(patched)
89
+ if parsed.config.enabled:
90
+ self.sources[unique_id] = parsed
91
+ else:
92
+ self.manifest.add_disabled_nofile(parsed)
93
+
94
+ self.warn_unused()
95
+
96
+ def patch_source(
97
+ self,
98
+ unpatched: UnpatchedSourceDefinition,
99
+ patch: Optional[SourcePatch],
100
+ ) -> UnpatchedSourceDefinition:
101
+
102
+ # This skips patching if no patch exists because of the
103
+ # performance overhead of converting to and from dicts
104
+ if patch is None:
105
+ return unpatched
106
+
107
+ source_dct = unpatched.source.to_dict(omit_none=True)
108
+ table_dct = unpatched.table.to_dict(omit_none=True)
109
+ patch_path: Optional[Path] = None
110
+
111
+ source_table_patch: Optional[SourceTablePatch] = None
112
+
113
+ if patch is not None:
114
+ source_table_patch = patch.get_table_named(unpatched.table.name)
115
+ source_dct.update(patch.to_patch_dict())
116
+ patch_path = patch.path
117
+
118
+ if source_table_patch is not None:
119
+ table_dct.update(source_table_patch.to_patch_dict())
120
+
121
+ source = UnparsedSourceDefinition.from_dict(source_dct)
122
+ table = UnparsedSourceTableDefinition.from_dict(table_dct)
123
+ return replace(unpatched, source=source, table=table, patch_path=patch_path)
124
+
125
+ # This converts an UnpatchedSourceDefinition to a SourceDefinition
126
+ def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition:
127
+ source = target.source
128
+ table = target.table
129
+ refs = ParserRef.from_target(table)
130
+ unique_id = target.unique_id
131
+ description = table.description or ""
132
+ source_description = source.description or ""
133
+
134
+ quoting = source.quoting.merged(table.quoting)
135
+ # Retain original source meta prior to merge with table meta
136
+ source_meta = {**source.meta, **source.config.get("meta", {})}
137
+
138
+ config = self._generate_source_config(
139
+ target=target,
140
+ rendered=True,
141
+ )
142
+
143
+ config = config.finalize_and_validate()
144
+
145
+ unrendered_config = self._generate_source_config(
146
+ target=target,
147
+ rendered=False,
148
+ )
149
+
150
+ if not isinstance(config, SourceConfig):
151
+ raise DbtInternalError(
152
+ f"Calculated a {type(config)} for a source, but expected a SourceConfig"
153
+ )
154
+
155
+ default_database = self.root_project.credentials.database
156
+
157
+ parsed_source = SourceDefinition(
158
+ package_name=target.package_name,
159
+ database=(source.database or default_database),
160
+ unrendered_database=source.unrendered_database,
161
+ schema=(source.schema or source.name),
162
+ unrendered_schema=source.unrendered_schema,
163
+ identifier=(table.identifier or table.name),
164
+ path=target.path,
165
+ original_file_path=target.original_file_path,
166
+ columns=refs.column_info,
167
+ unique_id=unique_id,
168
+ name=table.name,
169
+ description=description,
170
+ external=table.external,
171
+ source_name=source.name,
172
+ source_description=source_description,
173
+ source_meta=source_meta,
174
+ meta=config.meta,
175
+ loader=source.loader,
176
+ loaded_at_field=config.loaded_at_field,
177
+ loaded_at_query=config.loaded_at_query,
178
+ freshness=config.freshness,
179
+ quoting=quoting,
180
+ resource_type=NodeType.Source,
181
+ fqn=target.fqn,
182
+ tags=config.tags,
183
+ config=config,
184
+ unrendered_config=unrendered_config,
185
+ profile=source.profile, # DVT: propagate profile reference for multi-source support
186
+ )
187
+
188
+ if (
189
+ parsed_source.freshness
190
+ and not parsed_source.loaded_at_field
191
+ and not get_adapter(self.root_project).supports(Capability.TableLastModifiedMetadata)
192
+ ):
193
+ # Metadata-based freshness is being used by default for this node,
194
+ # but is not available through the configured adapter, so warn the
195
+ # user that freshness info will not be collected for this node at
196
+ # runtime.
197
+ fire_event(
198
+ FreshnessConfigProblem(
199
+ msg=f"The configured adapter does not support metadata-based freshness. A loaded_at_field must be specified for source '{source.name}.{table.name}'."
200
+ )
201
+ )
202
+
203
+ # relation name is added after instantiation because the adapter does
204
+ # not provide the relation name for a UnpatchedSourceDefinition object
205
+ parsed_source.relation_name = self._get_relation_name(parsed_source)
206
+ return parsed_source
207
+
208
+ # Use the SchemaGenericTestParser to parse the source tests
209
+ def get_generic_test_parser_for(self, package_name: str) -> "SchemaGenericTestParser":
210
+ if package_name in self.generic_test_parsers:
211
+ generic_test_parser = self.generic_test_parsers[package_name]
212
+ else:
213
+ all_projects = self.root_project.load_dependencies()
214
+ project = all_projects[package_name]
215
+ generic_test_parser = SchemaGenericTestParser(
216
+ project, self.manifest, self.root_project
217
+ )
218
+ self.generic_test_parsers[package_name] = generic_test_parser
219
+ return generic_test_parser
220
+
221
+ def get_source_tests(self, target: UnpatchedSourceDefinition) -> Iterable[GenericTestNode]:
222
+ is_root_project = True if self.root_project.project_name == target.package_name else False
223
+ target.validate_data_tests(is_root_project)
224
+ for data_test, column in target.get_tests():
225
+ yield self.parse_source_test(
226
+ target=target,
227
+ data_test=data_test,
228
+ column=column,
229
+ )
230
+
231
+ def get_patch_for(
232
+ self,
233
+ unpatched: UnpatchedSourceDefinition,
234
+ ) -> Optional[SourcePatch]:
235
+ if isinstance(unpatched, SourceDefinition):
236
+ return None
237
+ key = (unpatched.package_name, unpatched.source.name)
238
+ patch: Optional[SourcePatch] = self.manifest.source_patches.get(key)
239
+ if patch is None:
240
+ return None
241
+ if key not in self.patches_used:
242
+ # mark the key as used
243
+ self.patches_used[key] = set()
244
+ if patch.get_table_named(unpatched.table.name) is not None:
245
+ self.patches_used[key].add(unpatched.table.name)
246
+ return patch
247
+
248
+ # This calls parse_generic_test in the SchemaGenericTestParser
249
+ def parse_source_test(
250
+ self,
251
+ target: UnpatchedSourceDefinition,
252
+ data_test: Dict[str, Any],
253
+ column: Optional[UnparsedColumn],
254
+ ) -> GenericTestNode:
255
+ column_name: Optional[str]
256
+ if column is None:
257
+ column_name = None
258
+ else:
259
+ column_name = column.name
260
+ should_quote = column.quote or (column.quote is None and target.quote_columns)
261
+ if should_quote:
262
+ column_name = get_adapter(self.root_project).quote(column_name)
263
+
264
+ tags_sources = [target.source.tags, target.table.tags]
265
+ if column is not None:
266
+ tags_sources.append(column.tags)
267
+ if column_config_tags := column.config.get("tags", []):
268
+ if isinstance(column_config_tags, list):
269
+ tags_sources.append(column_config_tags)
270
+ elif isinstance(column_config_tags, str):
271
+ tags_sources.append([column_config_tags])
272
+ tags = list(itertools.chain.from_iterable(tags_sources))
273
+
274
+ generic_test_parser = self.get_generic_test_parser_for(target.package_name)
275
+ node = generic_test_parser.parse_generic_test(
276
+ target=target,
277
+ data_test=data_test,
278
+ tags=tags,
279
+ column_name=column_name,
280
+ schema_file_id=target.file_id,
281
+ version=None,
282
+ )
283
+ return node
284
+
285
+ def _generate_source_config(self, target: UnpatchedSourceDefinition, rendered: bool):
286
+ generator: BaseContextConfigGenerator
287
+ if rendered:
288
+ generator = ContextConfigGenerator(self.root_project)
289
+ else:
290
+ generator = UnrenderedConfigGenerator(self.root_project)
291
+
292
+ # configs with precendence set
293
+ precedence_configs = dict()
294
+ # first apply source configs
295
+ precedence_configs.update(target.source.config)
296
+ # then overrite anything that is defined on source tables
297
+ # this is not quite complex enough for configs that can be set as top-level node keys, but
298
+ # it works while source configs can only include `enabled`.
299
+ precedence_configs.update(target.table.config)
300
+
301
+ precedence_freshness = self.calculate_freshness_from_raw_target(target)
302
+ if precedence_freshness:
303
+ precedence_configs["freshness"] = precedence_freshness.to_dict()
304
+ elif precedence_freshness is None:
305
+ precedence_configs["freshness"] = None
306
+ else:
307
+ # this means that the user did not set a freshness threshold in the source schema file, as such
308
+ # there should be no freshness precedence
309
+ precedence_configs.pop("freshness", None)
310
+
311
+ precedence_loaded_at_field, precedence_loaded_at_query = (
312
+ self.calculate_loaded_at_field_query_from_raw_target(target)
313
+ )
314
+ precedence_configs["loaded_at_field"] = precedence_loaded_at_field
315
+ precedence_configs["loaded_at_query"] = precedence_loaded_at_query
316
+
317
+ # Handle merges across source, table, and config for meta and tags
318
+ precedence_meta = self.calculate_meta_from_raw_target(target)
319
+ precedence_configs["meta"] = precedence_meta
320
+
321
+ precedence_tags = self.calculate_tags_from_raw_target(target)
322
+ precedence_configs["tags"] = precedence_tags
323
+
324
+ # Because freshness is a "object" config, the freshness from the dbt_project.yml and the freshness
325
+ # from the schema file _won't_ get merged by this process. The result will be that the freshness will
326
+ # come from the schema file if provided, and if not, it'll fall back to the dbt_project.yml freshness.
327
+ return generator.calculate_node_config(
328
+ config_call_dict={},
329
+ fqn=target.fqn,
330
+ resource_type=NodeType.Source,
331
+ project_name=target.package_name,
332
+ base=False,
333
+ patch_config_dict=precedence_configs,
334
+ )
335
+
336
+ def _get_relation_name(self, node: SourceDefinition):
337
+ adapter = get_adapter(self.root_project)
338
+ relation_cls = adapter.Relation
339
+ return str(relation_cls.create_from(self.root_project, node))
340
+
341
+ def warn_unused(self) -> None:
342
+ unused_tables: Dict[SourceKey, Optional[Set[str]]] = {}
343
+ for patch in self.manifest.source_patches.values():
344
+ key = (patch.overrides, patch.name)
345
+ if key not in self.patches_used:
346
+ unused_tables[key] = None
347
+ elif patch.tables is not None:
348
+ table_patches = {t.name for t in patch.tables}
349
+ unused = table_patches - self.patches_used[key]
350
+ # don't add unused tables, the
351
+ if unused:
352
+ # because patches are required to be unique, we can safely
353
+ # write without looking
354
+ unused_tables[key] = unused
355
+
356
+ if unused_tables:
357
+ unused_tables_formatted = self.get_unused_msg(unused_tables)
358
+ warn_or_error(UnusedTables(unused_tables=unused_tables_formatted))
359
+
360
+ self.manifest.source_patches = {}
361
+
362
+ def get_unused_msg(
363
+ self,
364
+ unused_tables: Dict[SourceKey, Optional[Set[str]]],
365
+ ) -> List:
366
+ unused_tables_formatted = []
367
+ for key, table_names in unused_tables.items():
368
+ patch = self.manifest.source_patches[key]
369
+ patch_name = f"{patch.overrides}.{patch.name}"
370
+ if table_names is None:
371
+ unused_tables_formatted.append(f" - Source {patch_name} (in {patch.path})")
372
+ else:
373
+ for table_name in sorted(table_names):
374
+ unused_tables_formatted.append(
375
+ f" - Source table {patch_name}.{table_name} " f"(in {patch.path})"
376
+ )
377
+ return unused_tables_formatted
378
+
379
+ def calculate_freshness_from_raw_target(
380
+ self,
381
+ target: UnpatchedSourceDefinition,
382
+ ) -> Optional[FreshnessThreshold]:
383
+ source: UnparsedSourceDefinition = target.source
384
+
385
+ source_freshness = source.freshness
386
+
387
+ source_config_freshness_raw: Optional[Dict] = source.config.get(
388
+ "freshness", {}
389
+ ) # Will only be None if the user explicitly set it to null
390
+ source_config_freshness: Optional[FreshnessThreshold] = (
391
+ FreshnessThreshold.from_dict(source_config_freshness_raw)
392
+ if source_config_freshness_raw is not None
393
+ else None
394
+ )
395
+
396
+ table: UnparsedSourceTableDefinition = target.table
397
+ table_freshness = table.freshness
398
+
399
+ table_config_freshness_raw: Optional[Dict] = table.config.get(
400
+ "freshness", {}
401
+ ) # Will only be None if the user explicitly set it to null
402
+ table_config_freshness: Optional[FreshnessThreshold] = (
403
+ FreshnessThreshold.from_dict(table_config_freshness_raw)
404
+ if table_config_freshness_raw is not None
405
+ else None
406
+ )
407
+
408
+ return merge_source_freshness(
409
+ source_freshness,
410
+ source_config_freshness,
411
+ table_freshness,
412
+ table_config_freshness,
413
+ )
414
+
415
+ def calculate_loaded_at_field_query_from_raw_target(
416
+ self, target: UnpatchedSourceDefinition
417
+ ) -> Tuple[Optional[str], Optional[str]]:
418
+ # We need to be able to tell the difference between explicitly setting the loaded_at_field to None/null
419
+ # and when it's simply not set. This allows a user to override the source level loaded_at_field so that
420
+ # specific table can default to metadata-based freshness.
421
+
422
+ # loaded_at_field and loaded_at_query are supported both at top-level (deprecated) and config-level (preferred) on sources and tables.
423
+ if target.table.loaded_at_field_present and (
424
+ target.table.loaded_at_query or target.table.config.get("loaded_at_query")
425
+ ):
426
+ raise ParsingError(
427
+ "Cannot specify both loaded_at_field and loaded_at_query at table level."
428
+ )
429
+ if (target.source.loaded_at_field or target.source.config.get("loaded_at_field")) and (
430
+ target.source.loaded_at_query or target.source.config.get("loaded_at_query")
431
+ ):
432
+ raise ParsingError(
433
+ "Cannot specify both loaded_at_field and loaded_at_query at source level."
434
+ )
435
+
436
+ if (
437
+ target.table.loaded_at_field_present
438
+ or target.table.loaded_at_field is not None
439
+ or target.table.config.get("loaded_at_field") is not None
440
+ ):
441
+ loaded_at_field = target.table.loaded_at_field or target.table.config.get(
442
+ "loaded_at_field"
443
+ )
444
+ else:
445
+ loaded_at_field = target.source.loaded_at_field or target.source.config.get(
446
+ "loaded_at_field"
447
+ ) # may be None, that's okay
448
+
449
+ loaded_at_query: Optional[str]
450
+ if (
451
+ target.table.loaded_at_query is not None
452
+ or target.table.config.get("loaded_at_query") is not None
453
+ ):
454
+ loaded_at_query = target.table.loaded_at_query or target.table.config.get(
455
+ "loaded_at_query"
456
+ )
457
+ else:
458
+ if target.table.loaded_at_field_present:
459
+ loaded_at_query = None
460
+ else:
461
+ loaded_at_query = target.source.loaded_at_query or target.source.config.get(
462
+ "loaded_at_query"
463
+ )
464
+
465
+ return loaded_at_field, loaded_at_query
466
+
467
+ def calculate_meta_from_raw_target(self, target: UnpatchedSourceDefinition) -> Dict[str, Any]:
468
+ source_meta = target.source.meta or {}
469
+ source_config_meta = target.source.config.get("meta", {})
470
+ source_config_meta = source_config_meta if isinstance(source_config_meta, dict) else {}
471
+
472
+ table_meta = target.table.meta or {}
473
+ table_config_meta = target.table.config.get("meta", {})
474
+ table_config_meta = table_config_meta if isinstance(table_config_meta, dict) else {}
475
+
476
+ return {**source_meta, **source_config_meta, **table_meta, **table_config_meta}
477
+
478
+ def calculate_tags_from_raw_target(self, target: UnpatchedSourceDefinition) -> List[str]:
479
+ source_tags = target.source.tags or []
480
+ source_config_tags = self._get_config_tags(
481
+ target.source.config.get("tags", []), target.source.name
482
+ )
483
+
484
+ table_tags = target.table.tags or []
485
+ table_config_tags = self._get_config_tags(
486
+ target.table.config.get("tags", []), target.table.name
487
+ )
488
+
489
+ return sorted(
490
+ set(itertools.chain(source_tags, source_config_tags, table_tags, table_config_tags))
491
+ )
492
+
493
+ def _get_config_tags(self, tags: Any, source_name: str) -> List[str]:
494
+ config_tags = tags if isinstance(tags, list) else [tags]
495
+
496
+ config_tags_valid: List[str] = []
497
+ for tag in config_tags:
498
+ if not isinstance(tag, str):
499
+ warn_or_error(
500
+ ValidationWarning(
501
+ field_name=f"`config.tags`: {tags}",
502
+ resource_type=NodeType.Source.value,
503
+ node_name=source_name,
504
+ )
505
+ )
506
+ else:
507
+ config_tags_valid.append(tag)
508
+
509
+ return config_tags_valid
510
+
511
+
512
+ def merge_freshness_time_thresholds(
513
+ base: Optional[Time], update: Optional[Time]
514
+ ) -> Optional[Time]:
515
+ if base and update:
516
+ return base.merged(update)
517
+ elif update is None:
518
+ return None
519
+ else:
520
+ return update or base
521
+
522
+
523
+ def merge_source_freshness(
524
+ *thresholds: Optional[FreshnessThreshold],
525
+ ) -> Optional[FreshnessThreshold]:
526
+ if not thresholds:
527
+ return None
528
+
529
+ # Initialize with the first threshold.
530
+ # If the first threshold is None, current_merged_value will be None,
531
+ # and subsequent merges will correctly follow the original logic.
532
+ current_merged_value: Optional[FreshnessThreshold] = thresholds[0]
533
+
534
+ # Iterate through the rest of the thresholds, applying the original pairwise logic
535
+ for i in range(1, len(thresholds)):
536
+ base = current_merged_value
537
+ update = thresholds[i]
538
+
539
+ if base is not None and update is not None:
540
+ merged_freshness_obj = base.merged(update)
541
+ # merge one level deeper the error_after and warn_after thresholds
542
+ merged_error_after = merge_freshness_time_thresholds(
543
+ base.error_after, update.error_after
544
+ )
545
+ merged_warn_after = merge_freshness_time_thresholds(base.warn_after, update.warn_after)
546
+
547
+ merged_freshness_obj.error_after = merged_error_after
548
+ merged_freshness_obj.warn_after = merged_warn_after
549
+ current_merged_value = merged_freshness_obj
550
+ elif base is None and bool(update):
551
+ # If current_merged_value (base) is None, the update becomes the new value
552
+ current_merged_value = update
553
+ else: # This covers cases where 'update' is None, or both 'base' and 'update' are None.
554
+ # Following original logic, if 'update' is None, the result of the pair-merge is None.
555
+ current_merged_value = None
556
+
557
+ return current_merged_value
dvt/parser/sql.py ADDED
@@ -0,0 +1,63 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+ from typing import Iterable
4
+
5
+ from dvt.contracts.graph.manifest import SourceFile
6
+ from dvt.contracts.graph.nodes import Macro, SqlNode
7
+ from dvt.contracts.graph.unparsed import UnparsedMacro
8
+ from dvt.node_types import NodeType
9
+ from dvt.parser.base import SimpleSQLParser
10
+ from dvt.parser.macros import MacroParser
11
+ from dvt.parser.search import FileBlock
12
+
13
+ from dbt_common.exceptions import DbtInternalError
14
+
15
+
16
+ @dataclass
17
+ class SqlBlock(FileBlock):
18
+ block_name: str
19
+
20
+ @property
21
+ def name(self):
22
+ return self.block_name
23
+
24
+
25
+ class SqlBlockParser(SimpleSQLParser[SqlNode]):
26
+ def parse_from_dict(self, dct, validate=True) -> SqlNode:
27
+ if validate:
28
+ SqlNode.validate(dct)
29
+ return SqlNode.from_dict(dct)
30
+
31
+ @property
32
+ def resource_type(self) -> NodeType:
33
+ return NodeType.SqlOperation
34
+
35
+ @staticmethod
36
+ def get_compiled_path(block: FileBlock):
37
+ # we do it this way to make mypy happy
38
+ if not isinstance(block, SqlBlock):
39
+ raise DbtInternalError(
40
+ "While parsing SQL operation, got an actual file block instead of "
41
+ "an SQL block: {}".format(block)
42
+ )
43
+
44
+ return os.path.join("sql", block.name)
45
+
46
+ def parse_remote(self, sql: str, name: str) -> SqlNode:
47
+ source_file = SourceFile.remote(sql, self.project.project_name, "sql")
48
+ contents = SqlBlock(block_name=name, file=source_file)
49
+ return self.parse_node(contents)
50
+
51
+
52
+ class SqlMacroParser(MacroParser):
53
+ def parse_remote(self, contents) -> Iterable[Macro]:
54
+ base = UnparsedMacro(
55
+ path="from remote system",
56
+ original_file_path="from remote system",
57
+ package_name=self.project.project_name,
58
+ raw_code=contents,
59
+ language="sql",
60
+ resource_type=NodeType.Macro,
61
+ )
62
+ for node in self.parse_unparsed_macros(base):
63
+ yield node