dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/graph/queue.py ADDED
@@ -0,0 +1,213 @@
1
+ import threading
2
+ from queue import PriorityQueue
3
+ from typing import Dict, Generator, List, Optional, Set
4
+
5
+ import networkx as nx # type: ignore
6
+ from dvt.contracts.graph.manifest import Manifest
7
+ from dvt.contracts.graph.nodes import (
8
+ Exposure,
9
+ GraphMemberNode,
10
+ Metric,
11
+ SourceDefinition,
12
+ )
13
+ from dvt.node_types import NodeType
14
+
15
+ from .graph import UniqueId
16
+
17
+
18
+ class GraphQueue:
19
+ """A fancy queue that is backed by the dependency graph.
20
+ Note: this will mutate input!
21
+
22
+ This queue is thread-safe for `mark_done` calls, though you must ensure
23
+ that separate threads do not call `.empty()` or `__len__()` and `.get()` at
24
+ the same time, as there is an unlocked race!
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ graph: nx.DiGraph,
30
+ manifest: Manifest,
31
+ selected: Set[UniqueId],
32
+ preserve_edges: bool = True,
33
+ ) -> None:
34
+ # 'create_empty_copy' returns a copy of the graph G with all of the edges removed, and leaves nodes intact.
35
+ self.graph = graph if preserve_edges else nx.classes.function.create_empty_copy(graph)
36
+ self.manifest = manifest
37
+ self._selected = selected
38
+ # store the queue as a priority queue.
39
+ self.inner: PriorityQueue = PriorityQueue()
40
+ # things that have been popped off the queue but not finished
41
+ # and worker thread reservations
42
+ self.in_progress: Set[UniqueId] = set()
43
+ # things that are in the queue
44
+ self.queued: Set[UniqueId] = set()
45
+ # this lock controls most things
46
+ self.lock = threading.Lock()
47
+ # store the 'score' of each node as a number. Lower is higher priority.
48
+ self._scores = self._get_scores(self.graph)
49
+ # populate the initial queue
50
+ self._find_new_additions(list(self.graph.nodes()))
51
+ # awaits after task end
52
+ self.some_task_done = threading.Condition(self.lock)
53
+
54
+ def get_selected_nodes(self) -> Set[UniqueId]:
55
+ return self._selected.copy()
56
+
57
+ def _include_in_cost(self, node_id: UniqueId) -> bool:
58
+ node = self.manifest.expect(node_id)
59
+ if node.resource_type != NodeType.Model:
60
+ return False
61
+ # must be a Model - tell mypy this won't be a Source or Exposure or Metric
62
+ assert not isinstance(node, (SourceDefinition, Exposure, Metric))
63
+ if node.is_ephemeral:
64
+ return False
65
+ return True
66
+
67
+ @staticmethod
68
+ def _grouped_topological_sort(
69
+ graph: nx.DiGraph,
70
+ ) -> Generator[List[str], None, None]:
71
+ """Topological sort of given graph that groups ties.
72
+
73
+ Adapted from `nx.topological_sort`, this function returns a topo sort of a graph however
74
+ instead of arbitrarily ordering ties in the sort order, ties are grouped together in
75
+ lists.
76
+
77
+ Args:
78
+ graph: The graph to be sorted.
79
+
80
+ Returns:
81
+ A generator that yields lists of nodes, one list per graph depth level.
82
+ """
83
+ indegree_map = {v: d for v, d in graph.in_degree() if d > 0}
84
+ zero_indegree = [v for v, d in graph.in_degree() if d == 0]
85
+
86
+ while zero_indegree:
87
+ yield zero_indegree
88
+ new_zero_indegree = []
89
+ for v in zero_indegree:
90
+ for _, child in graph.edges(v):
91
+ indegree_map[child] -= 1
92
+ if not indegree_map[child]:
93
+ new_zero_indegree.append(child)
94
+ zero_indegree = new_zero_indegree
95
+
96
+ def _get_scores(self, graph: nx.DiGraph) -> Dict[str, int]:
97
+ """Scoring nodes for processing order.
98
+
99
+ Scores are calculated by the graph depth level. Lowest score (0) should be processed first.
100
+
101
+ Args:
102
+ graph: The graph to be scored.
103
+
104
+ Returns:
105
+ A dictionary consisting of `node name`:`score` pairs.
106
+ """
107
+ # split graph by connected subgraphs
108
+ subgraphs = (graph.subgraph(x) for x in nx.connected_components(nx.Graph(graph)))
109
+
110
+ # score all nodes in all subgraphs
111
+ scores = {}
112
+ for subgraph in subgraphs:
113
+ grouped_nodes = self._grouped_topological_sort(subgraph)
114
+ for level, group in enumerate(grouped_nodes):
115
+ for node in group:
116
+ scores[node] = level
117
+
118
+ return scores
119
+
120
+ def get(self, block: bool = True, timeout: Optional[float] = None) -> GraphMemberNode:
121
+ """Get a node off the inner priority queue. By default, this blocks.
122
+
123
+ This takes the lock, but only for part of it.
124
+
125
+ :param block: If True, block until the inner queue has data
126
+ :param timeout: If set, block for timeout seconds waiting for data.
127
+ :return: The node as present in the manifest.
128
+
129
+ See `queue.PriorityQueue` for more information on `get()` behavior and
130
+ exceptions.
131
+ """
132
+ _, node_id = self.inner.get(block=block, timeout=timeout)
133
+ with self.lock:
134
+ self._mark_in_progress(node_id)
135
+ return self.manifest.expect(node_id)
136
+
137
+ def __len__(self) -> int:
138
+ """The length of the queue is the number of tasks left for the queue to
139
+ give out, regardless of where they are. Incomplete tasks are not part
140
+ of the length.
141
+
142
+ This takes the lock.
143
+ """
144
+ with self.lock:
145
+ return len(self.graph) - len(self.in_progress)
146
+
147
+ def empty(self) -> bool:
148
+ """The graph queue is 'empty' if it all remaining nodes in the graph
149
+ are in progress.
150
+
151
+ This takes the lock.
152
+ """
153
+ return len(self) == 0
154
+
155
+ def _already_known(self, node: UniqueId) -> bool:
156
+ """Decide if a node is already known (either handed out as a task, or
157
+ in the queue).
158
+
159
+ Callers must hold the lock.
160
+
161
+ :param str node: The node ID to check
162
+ :returns bool: If the node is in progress/queued.
163
+ """
164
+ return node in self.in_progress or node in self.queued
165
+
166
+ def _find_new_additions(self, candidates) -> None:
167
+ """Find any nodes in the graph that need to be added to the internal
168
+ queue and add them.
169
+ """
170
+ for node in candidates:
171
+ if self.graph.in_degree(node) == 0 and not self._already_known(node):
172
+ self.inner.put((self._scores[node], node))
173
+ self.queued.add(node)
174
+
175
+ def mark_done(self, node_id: UniqueId) -> None:
176
+ """Given a node's unique ID, mark it as done.
177
+
178
+ This method takes the lock.
179
+
180
+ :param str node_id: The node ID to mark as complete.
181
+ """
182
+ with self.lock:
183
+ self.in_progress.remove(node_id)
184
+ successors = list(self.graph.successors(node_id))
185
+ self.graph.remove_node(node_id)
186
+ self._find_new_additions(successors)
187
+ self.inner.task_done()
188
+ self.some_task_done.notify_all()
189
+
190
+ def _mark_in_progress(self, node_id: UniqueId) -> None:
191
+ """Mark the node as 'in progress'.
192
+
193
+ Callers must hold the lock.
194
+
195
+ :param str node_id: The node ID to mark as in progress.
196
+ """
197
+ self.queued.remove(node_id)
198
+ self.in_progress.add(node_id)
199
+
200
+ def join(self) -> None:
201
+ """Join the queue. Blocks until all tasks are marked as done.
202
+
203
+ Make sure not to call this before the queue reports that it is empty.
204
+ """
205
+ self.inner.join()
206
+
207
+ def wait_until_something_was_done(self) -> int:
208
+ """Block until a task is done, then return the number of unfinished
209
+ tasks.
210
+ """
211
+ with self.lock:
212
+ self.some_task_done.wait()
213
+ return self.inner.unfinished_tasks
dvt/graph/selector.py ADDED
@@ -0,0 +1,375 @@
1
+ from typing import List, Optional, Set, Tuple
2
+
3
+ from dvt import selected_resources
4
+ from dvt.contracts.graph.manifest import Manifest
5
+ from dvt.contracts.graph.nodes import GraphMemberNode
6
+ from dvt.contracts.state import PreviousState
7
+ from dvt.events.types import NoNodesForSelectionCriteria, SelectorReportInvalidSelector
8
+ from dvt.exceptions import DbtInternalError, InvalidSelectorError
9
+ from dvt.node_types import NodeType
10
+
11
+ from dbt_common.events.functions import fire_event, warn_or_error
12
+
13
+ from .graph import Graph, UniqueId
14
+ from .queue import GraphQueue
15
+ from .selector_methods import MethodManager
16
+ from .selector_spec import IndirectSelection, SelectionCriteria, SelectionSpec
17
+
18
+
19
+ def get_package_names(nodes):
20
+ return set([node.split(".")[1] for node in nodes])
21
+
22
+
23
+ def can_select_indirectly(node):
24
+ """If a node is not selected itself, but its parent(s) are, it may qualify
25
+ for indirect selection.
26
+ Today, only Test nodes can be indirectly selected. In the future,
27
+ other node types or invocation flags might qualify.
28
+ """
29
+ if node.resource_type == NodeType.Test:
30
+ return True
31
+ elif node.resource_type == NodeType.Unit:
32
+ return True
33
+ else:
34
+ return False
35
+
36
+
37
+ class NodeSelector(MethodManager):
38
+ """The node selector is aware of the graph and manifest"""
39
+
40
+ def __init__(
41
+ self,
42
+ graph: Graph,
43
+ manifest: Manifest,
44
+ previous_state: Optional[PreviousState] = None,
45
+ include_empty_nodes: bool = False,
46
+ ) -> None:
47
+ super().__init__(manifest, previous_state)
48
+ self.full_graph: Graph = graph
49
+ self.include_empty_nodes: bool = include_empty_nodes
50
+
51
+ # build a subgraph containing only non-empty, enabled nodes and enabled
52
+ # sources.
53
+ graph_members = {
54
+ unique_id for unique_id in self.full_graph.nodes() if self._is_graph_member(unique_id)
55
+ }
56
+ self.graph = self.full_graph.subgraph(graph_members)
57
+
58
+ def select_included(
59
+ self,
60
+ included_nodes: Set[UniqueId],
61
+ spec: SelectionCriteria,
62
+ ) -> Set[UniqueId]:
63
+ """Select the explicitly included nodes, using the given spec. Return
64
+ the selected set of unique IDs.
65
+ """
66
+ method = self.get_method(spec.method, spec.method_arguments)
67
+ return set(method.search(included_nodes, spec.value))
68
+
69
+ def get_nodes_from_criteria(
70
+ self, spec: SelectionCriteria
71
+ ) -> Tuple[Set[UniqueId], Set[UniqueId]]:
72
+ """Get all nodes specified by the single selection criteria.
73
+
74
+ - collect the directly included nodes
75
+ - find their specified relatives
76
+ - perform any selector-specific expansion
77
+ """
78
+
79
+ nodes = self.graph.nodes()
80
+ try:
81
+ collected = self.select_included(nodes, spec)
82
+ except InvalidSelectorError:
83
+ valid_selectors = ", ".join(self.SELECTOR_METHODS)
84
+ fire_event(
85
+ SelectorReportInvalidSelector(
86
+ valid_selectors=valid_selectors, spec_method=spec.method, raw_spec=spec.raw
87
+ )
88
+ )
89
+ return set(), set()
90
+
91
+ neighbors = self.collect_specified_neighbors(spec, collected)
92
+ selected = collected | neighbors
93
+
94
+ # if --indirect-selection EMPTY, do not expand to adjacent tests
95
+ if spec.indirect_selection == IndirectSelection.Empty:
96
+ return selected, set()
97
+ else:
98
+ direct_nodes, indirect_nodes = self.expand_selection(
99
+ selected=selected, indirect_selection=spec.indirect_selection
100
+ )
101
+ return direct_nodes, indirect_nodes
102
+
103
+ def collect_specified_neighbors(
104
+ self, spec: SelectionCriteria, selected: Set[UniqueId]
105
+ ) -> Set[UniqueId]:
106
+ """Given the set of models selected by the explicit part of the
107
+ selector (like "tag:foo"), apply the modifiers on the spec ("+"/"@").
108
+ Return the set of additional nodes that should be collected (which may
109
+ overlap with the selected set).
110
+ """
111
+ additional: Set[UniqueId] = set()
112
+ if spec.childrens_parents:
113
+ additional.update(self.graph.select_childrens_parents(selected))
114
+
115
+ if spec.parents:
116
+ depth = spec.parents_depth
117
+ additional.update(self.graph.select_parents(selected, depth))
118
+
119
+ if spec.children:
120
+ depth = spec.children_depth
121
+ additional.update(self.graph.select_children(selected, depth))
122
+ return additional
123
+
124
+ def select_nodes_recursively(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
125
+ """If the spec is a composite spec (a union, difference, or intersection),
126
+ recurse into its selections and combine them. If the spec is a concrete
127
+ selection criteria, resolve that using the given graph.
128
+ """
129
+ if isinstance(spec, SelectionCriteria):
130
+ direct_nodes, indirect_nodes = self.get_nodes_from_criteria(spec)
131
+ else:
132
+ bundles = [self.select_nodes_recursively(component) for component in spec]
133
+
134
+ direct_sets = []
135
+ indirect_sets = []
136
+
137
+ for direct, indirect in bundles:
138
+ direct_sets.append(direct)
139
+ indirect_sets.append(direct | indirect)
140
+
141
+ initial_direct = spec.combined(direct_sets)
142
+ indirect_nodes = spec.combined(indirect_sets)
143
+
144
+ direct_nodes = self.incorporate_indirect_nodes(
145
+ initial_direct, indirect_nodes, spec.indirect_selection
146
+ )
147
+
148
+ if spec.expect_exists and len(direct_nodes) == 0:
149
+ warn_or_error(NoNodesForSelectionCriteria(spec_raw=str(spec.raw)))
150
+
151
+ return direct_nodes, indirect_nodes
152
+
153
+ def select_nodes(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
154
+ """Select the nodes in the graph according to the spec.
155
+
156
+ This is the main point of entry for turning a spec into a set of nodes:
157
+ - Recurse through spec, select by criteria, combine by set operation
158
+ - Return final (unfiltered) selection set
159
+ """
160
+ direct_nodes, indirect_nodes = self.select_nodes_recursively(spec)
161
+ indirect_only = indirect_nodes.difference(direct_nodes)
162
+ return direct_nodes, indirect_only
163
+
164
+ def _is_graph_member(self, unique_id: UniqueId) -> bool:
165
+ if unique_id in self.manifest.sources:
166
+ source = self.manifest.sources[unique_id]
167
+ return source.config.enabled
168
+ elif unique_id in self.manifest.exposures:
169
+ return True
170
+ elif unique_id in self.manifest.functions:
171
+ function = self.manifest.functions[unique_id]
172
+ return function.config.enabled
173
+ elif unique_id in self.manifest.metrics:
174
+ metric = self.manifest.metrics[unique_id]
175
+ return metric.config.enabled
176
+ elif unique_id in self.manifest.semantic_models:
177
+ semantic_model = self.manifest.semantic_models[unique_id]
178
+ return semantic_model.config.enabled
179
+ elif unique_id in self.manifest.unit_tests:
180
+ unit_test = self.manifest.unit_tests[unique_id]
181
+ return unit_test.config.enabled
182
+ elif unique_id in self.manifest.saved_queries:
183
+ saved_query = self.manifest.saved_queries[unique_id]
184
+ return saved_query.config.enabled
185
+ elif unique_id in self.manifest.exposures:
186
+ exposure = self.manifest.exposures[unique_id]
187
+ return exposure.config.enabled
188
+ else:
189
+ node = self.manifest.nodes[unique_id]
190
+ return node.config.enabled
191
+
192
+ def _is_empty_node(self, unique_id: UniqueId) -> bool:
193
+ if unique_id in self.manifest.nodes:
194
+ node = self.manifest.nodes[unique_id]
195
+ return node.empty
196
+ else:
197
+ return False
198
+
199
+ def node_is_match(self, node: GraphMemberNode) -> bool:
200
+ """Determine if a node is a match for the selector. Non-match nodes
201
+ will be excluded from results during filtering.
202
+ """
203
+ return True
204
+
205
+ def _is_match(self, unique_id: UniqueId) -> bool:
206
+ node: GraphMemberNode
207
+ if unique_id in self.manifest.nodes:
208
+ node = self.manifest.nodes[unique_id]
209
+ elif unique_id in self.manifest.sources:
210
+ node = self.manifest.sources[unique_id]
211
+ elif unique_id in self.manifest.exposures:
212
+ node = self.manifest.exposures[unique_id]
213
+ elif unique_id in self.manifest.functions:
214
+ node = self.manifest.functions[unique_id]
215
+ elif unique_id in self.manifest.metrics:
216
+ node = self.manifest.metrics[unique_id]
217
+ elif unique_id in self.manifest.semantic_models:
218
+ node = self.manifest.semantic_models[unique_id]
219
+ elif unique_id in self.manifest.unit_tests:
220
+ node = self.manifest.unit_tests[unique_id]
221
+ elif unique_id in self.manifest.saved_queries:
222
+ node = self.manifest.saved_queries[unique_id]
223
+ else:
224
+ raise DbtInternalError(f"Node {unique_id} not found in the manifest!")
225
+ return self.node_is_match(node)
226
+
227
+ def filter_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]:
228
+ """Return the subset of selected nodes that is a match for this
229
+ selector.
230
+ """
231
+ return {
232
+ unique_id
233
+ for unique_id in selected
234
+ if self._is_match(unique_id)
235
+ and (self.include_empty_nodes or not self._is_empty_node(unique_id))
236
+ }
237
+
238
+ def expand_selection(
239
+ self,
240
+ selected: Set[UniqueId],
241
+ indirect_selection: IndirectSelection = IndirectSelection.Eager,
242
+ ) -> Tuple[Set[UniqueId], Set[UniqueId]]:
243
+ # Test selection by default expands to include an implicitly/indirectly selected tests.
244
+ # `dbt test -m model_a` also includes tests that directly depend on `model_a`.
245
+ # Expansion has four modes, EAGER, CAUTIOUS and BUILDABLE, EMPTY.
246
+ #
247
+ # EAGER mode: If ANY parent is selected, select the test.
248
+ #
249
+ # CAUTIOUS mode:
250
+ # - If ALL parents are selected, select the test.
251
+ # - If ANY parent is missing, return it separately. We'll keep it around
252
+ # for later and see if its other parents show up.
253
+ #
254
+ # BUILDABLE mode:
255
+ # - If ALL parents are selected, or the parents of the test are themselves parents of the selected, select the test.
256
+ # - If ANY parent is missing, return it separately. We'll keep it around
257
+ # for later and see if its other parents show up.
258
+ #
259
+ # EMPTY mode: Only select the given node and ignore attached nodes (i.e. ignore tests attached to a model)
260
+ #
261
+ # Users can opt out of inclusive EAGER mode by passing --indirect-selection cautious
262
+ # CLI argument or by specifying `indirect_selection: true` in a yaml selector
263
+
264
+ direct_nodes = set(selected)
265
+ indirect_nodes = set()
266
+ selected_and_parents = set()
267
+ if indirect_selection == IndirectSelection.Buildable:
268
+ selected_and_parents = selected.union(self.graph.select_parents(selected)).union(
269
+ self.manifest.sources
270
+ )
271
+
272
+ for unique_id in self.graph.select_successors(selected):
273
+ if unique_id in self.manifest.nodes or unique_id in self.manifest.unit_tests:
274
+ if unique_id in self.manifest.nodes:
275
+ node = self.manifest.nodes[unique_id]
276
+ elif unique_id in self.manifest.unit_tests:
277
+ node = self.manifest.unit_tests[unique_id] # type: ignore
278
+ # Test nodes that are not selected themselves, but whose parents are selected.
279
+ # (Does not include unit tests because they can only have one parent.)
280
+ if can_select_indirectly(node):
281
+ # should we add it in directly?
282
+ if indirect_selection == IndirectSelection.Eager or set(
283
+ node.depends_on_nodes
284
+ ) <= set(selected):
285
+ direct_nodes.add(unique_id)
286
+ elif indirect_selection == IndirectSelection.Buildable and set(
287
+ node.depends_on_nodes
288
+ ) <= set(selected_and_parents):
289
+ direct_nodes.add(unique_id)
290
+ elif indirect_selection == IndirectSelection.Empty:
291
+ pass
292
+ else:
293
+ indirect_nodes.add(unique_id)
294
+
295
+ return direct_nodes, indirect_nodes
296
+
297
+ def incorporate_indirect_nodes(
298
+ self,
299
+ direct_nodes: Set[UniqueId],
300
+ indirect_nodes: Set[UniqueId] = set(),
301
+ indirect_selection: IndirectSelection = IndirectSelection.Eager,
302
+ ) -> Set[UniqueId]:
303
+ # Check tests previously selected indirectly to see if ALL their
304
+ # parents are now present.
305
+
306
+ # performance: if identical, skip the processing below
307
+ if set(direct_nodes) == set(indirect_nodes):
308
+ return direct_nodes
309
+
310
+ selected = set(direct_nodes)
311
+
312
+ if indirect_selection == IndirectSelection.Cautious:
313
+ for unique_id in indirect_nodes:
314
+ if unique_id in self.manifest.nodes:
315
+ node = self.manifest.nodes[unique_id]
316
+ if set(node.depends_on_nodes) <= set(selected):
317
+ selected.add(unique_id)
318
+ elif indirect_selection == IndirectSelection.Buildable:
319
+ selected_and_parents = selected.union(self.graph.select_parents(selected))
320
+ for unique_id in indirect_nodes:
321
+ if unique_id in self.manifest.nodes:
322
+ node = self.manifest.nodes[unique_id]
323
+ if set(node.depends_on_nodes) <= set(selected_and_parents):
324
+ selected.add(unique_id)
325
+
326
+ return selected
327
+
328
+ def get_selected(self, spec: SelectionSpec) -> Set[UniqueId]:
329
+ """get_selected runs through the node selection process:
330
+
331
+ - node selection. Based on the include/exclude sets, the set
332
+ of matched unique IDs is returned
333
+ - includes direct + indirect selection (for tests)
334
+ - filtering:
335
+ - selectors can filter the nodes after all of them have been
336
+ selected
337
+ """
338
+ selected_nodes, indirect_only = self.select_nodes(spec)
339
+ filtered_nodes = self.filter_selection(selected_nodes)
340
+
341
+ return filtered_nodes
342
+
343
+ def get_graph_queue(self, spec: SelectionSpec, preserve_edges: bool = True) -> GraphQueue:
344
+ """Returns a queue over nodes in the graph that tracks progress of
345
+ dependencies.
346
+ """
347
+ # Filtering happens in get_selected
348
+ selected_nodes = self.get_selected(spec)
349
+ # Save to global variable
350
+ selected_resources.set_selected_resources(selected_nodes)
351
+ # Construct a new graph using the selected_nodes
352
+ new_graph = self.full_graph.get_subset_graph(selected_nodes)
353
+ # should we give a way here for consumers to mutate the graph?
354
+ return GraphQueue(new_graph.graph, self.manifest, selected_nodes, preserve_edges)
355
+
356
+
357
+ class ResourceTypeSelector(NodeSelector):
358
+ def __init__(
359
+ self,
360
+ graph: Graph,
361
+ manifest: Manifest,
362
+ previous_state: Optional[PreviousState],
363
+ resource_types: List[NodeType],
364
+ include_empty_nodes: bool = False,
365
+ ) -> None:
366
+ super().__init__(
367
+ graph=graph,
368
+ manifest=manifest,
369
+ previous_state=previous_state,
370
+ include_empty_nodes=include_empty_nodes,
371
+ )
372
+ self.resource_types: Set[NodeType] = set(resource_types)
373
+
374
+ def node_is_match(self, node):
375
+ return node.resource_type in self.resource_types