dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2403 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
  74. dbt/compute/engines/spark_engine.py +642 -0
  75. dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
  76. dbt/compute/federated_executor.py +1080 -0
  77. dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
  78. dbt/compute/filter_pushdown.py +273 -0
  79. dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
  80. dbt/compute/jar_provisioning.py +255 -0
  81. dbt/compute/java_compat.cpython-311-darwin.so +0 -0
  82. dbt/compute/java_compat.py +689 -0
  83. dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
  84. dbt/compute/jdbc_utils.py +678 -0
  85. dbt/compute/metadata/__init__.py +40 -0
  86. dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
  87. dbt/compute/metadata/adapters_registry.py +370 -0
  88. dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
  89. dbt/compute/metadata/registry.py +674 -0
  90. dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
  91. dbt/compute/metadata/store.py +1499 -0
  92. dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
  93. dbt/compute/smart_selector.py +377 -0
  94. dbt/compute/strategies/__init__.py +55 -0
  95. dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
  96. dbt/compute/strategies/base.py +165 -0
  97. dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
  98. dbt/compute/strategies/dataproc.py +207 -0
  99. dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
  100. dbt/compute/strategies/emr.py +203 -0
  101. dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
  102. dbt/compute/strategies/local.py +443 -0
  103. dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
  104. dbt/compute/strategies/standalone.py +262 -0
  105. dbt/config/__init__.py +4 -0
  106. dbt/config/catalogs.py +94 -0
  107. dbt/config/compute.cpython-311-darwin.so +0 -0
  108. dbt/config/compute.py +513 -0
  109. dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
  110. dbt/config/dvt_profile.py +342 -0
  111. dbt/config/profile.py +422 -0
  112. dbt/config/project.py +873 -0
  113. dbt/config/project_utils.py +28 -0
  114. dbt/config/renderer.py +231 -0
  115. dbt/config/runtime.py +553 -0
  116. dbt/config/selectors.py +208 -0
  117. dbt/config/utils.py +77 -0
  118. dbt/constants.py +28 -0
  119. dbt/context/__init__.py +0 -0
  120. dbt/context/base.py +745 -0
  121. dbt/context/configured.py +135 -0
  122. dbt/context/context_config.py +382 -0
  123. dbt/context/docs.py +82 -0
  124. dbt/context/exceptions_jinja.py +178 -0
  125. dbt/context/macro_resolver.py +195 -0
  126. dbt/context/macros.py +171 -0
  127. dbt/context/manifest.py +72 -0
  128. dbt/context/providers.py +2249 -0
  129. dbt/context/query_header.py +13 -0
  130. dbt/context/secret.py +58 -0
  131. dbt/context/target.py +74 -0
  132. dbt/contracts/__init__.py +0 -0
  133. dbt/contracts/files.py +413 -0
  134. dbt/contracts/graph/__init__.py +0 -0
  135. dbt/contracts/graph/manifest.py +1904 -0
  136. dbt/contracts/graph/metrics.py +97 -0
  137. dbt/contracts/graph/model_config.py +70 -0
  138. dbt/contracts/graph/node_args.py +42 -0
  139. dbt/contracts/graph/nodes.py +1806 -0
  140. dbt/contracts/graph/semantic_manifest.py +232 -0
  141. dbt/contracts/graph/unparsed.py +811 -0
  142. dbt/contracts/project.py +417 -0
  143. dbt/contracts/results.py +53 -0
  144. dbt/contracts/selection.py +23 -0
  145. dbt/contracts/sql.py +85 -0
  146. dbt/contracts/state.py +68 -0
  147. dbt/contracts/util.py +46 -0
  148. dbt/deprecations.py +348 -0
  149. dbt/deps/__init__.py +0 -0
  150. dbt/deps/base.py +152 -0
  151. dbt/deps/git.py +195 -0
  152. dbt/deps/local.py +79 -0
  153. dbt/deps/registry.py +130 -0
  154. dbt/deps/resolver.py +149 -0
  155. dbt/deps/tarball.py +120 -0
  156. dbt/docs/source/_ext/dbt_click.py +119 -0
  157. dbt/docs/source/conf.py +32 -0
  158. dbt/env_vars.py +64 -0
  159. dbt/event_time/event_time.py +40 -0
  160. dbt/event_time/sample_window.py +60 -0
  161. dbt/events/__init__.py +15 -0
  162. dbt/events/base_types.py +36 -0
  163. dbt/events/core_types_pb2.py +2 -0
  164. dbt/events/logging.py +108 -0
  165. dbt/events/types.py +2516 -0
  166. dbt/exceptions.py +1486 -0
  167. dbt/flags.py +89 -0
  168. dbt/graph/__init__.py +11 -0
  169. dbt/graph/cli.py +249 -0
  170. dbt/graph/graph.py +172 -0
  171. dbt/graph/queue.py +214 -0
  172. dbt/graph/selector.py +374 -0
  173. dbt/graph/selector_methods.py +975 -0
  174. dbt/graph/selector_spec.py +222 -0
  175. dbt/graph/thread_pool.py +18 -0
  176. dbt/hooks.py +21 -0
  177. dbt/include/README.md +49 -0
  178. dbt/include/__init__.py +3 -0
  179. dbt/include/data/adapters_registry.duckdb +0 -0
  180. dbt/include/data/build_registry.py +242 -0
  181. dbt/include/data/csv/adapter_queries.csv +33 -0
  182. dbt/include/data/csv/syntax_rules.csv +9 -0
  183. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  184. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  185. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  186. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  187. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  188. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  189. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  190. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  191. dbt/include/starter_project/.gitignore +4 -0
  192. dbt/include/starter_project/README.md +15 -0
  193. dbt/include/starter_project/__init__.py +3 -0
  194. dbt/include/starter_project/analyses/.gitkeep +0 -0
  195. dbt/include/starter_project/dbt_project.yml +36 -0
  196. dbt/include/starter_project/macros/.gitkeep +0 -0
  197. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  198. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  199. dbt/include/starter_project/models/example/schema.yml +21 -0
  200. dbt/include/starter_project/seeds/.gitkeep +0 -0
  201. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  202. dbt/include/starter_project/tests/.gitkeep +0 -0
  203. dbt/internal_deprecations.py +26 -0
  204. dbt/jsonschemas/__init__.py +3 -0
  205. dbt/jsonschemas/jsonschemas.py +309 -0
  206. dbt/jsonschemas/project/0.0.110.json +4717 -0
  207. dbt/jsonschemas/project/0.0.85.json +2015 -0
  208. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  209. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  210. dbt/jsonschemas/resources/latest.json +6773 -0
  211. dbt/links.py +4 -0
  212. dbt/materializations/__init__.py +0 -0
  213. dbt/materializations/incremental/__init__.py +0 -0
  214. dbt/materializations/incremental/microbatch.py +236 -0
  215. dbt/mp_context.py +8 -0
  216. dbt/node_types.py +37 -0
  217. dbt/parser/__init__.py +23 -0
  218. dbt/parser/analysis.py +21 -0
  219. dbt/parser/base.py +548 -0
  220. dbt/parser/common.py +266 -0
  221. dbt/parser/docs.py +52 -0
  222. dbt/parser/fixtures.py +51 -0
  223. dbt/parser/functions.py +30 -0
  224. dbt/parser/generic_test.py +100 -0
  225. dbt/parser/generic_test_builders.py +333 -0
  226. dbt/parser/hooks.py +118 -0
  227. dbt/parser/macros.py +137 -0
  228. dbt/parser/manifest.py +2204 -0
  229. dbt/parser/models.py +573 -0
  230. dbt/parser/partial.py +1178 -0
  231. dbt/parser/read_files.py +445 -0
  232. dbt/parser/schema_generic_tests.py +422 -0
  233. dbt/parser/schema_renderer.py +111 -0
  234. dbt/parser/schema_yaml_readers.py +935 -0
  235. dbt/parser/schemas.py +1466 -0
  236. dbt/parser/search.py +149 -0
  237. dbt/parser/seeds.py +28 -0
  238. dbt/parser/singular_test.py +20 -0
  239. dbt/parser/snapshots.py +44 -0
  240. dbt/parser/sources.py +558 -0
  241. dbt/parser/sql.py +62 -0
  242. dbt/parser/unit_tests.py +621 -0
  243. dbt/plugins/__init__.py +20 -0
  244. dbt/plugins/contracts.py +9 -0
  245. dbt/plugins/exceptions.py +2 -0
  246. dbt/plugins/manager.py +163 -0
  247. dbt/plugins/manifest.py +21 -0
  248. dbt/profiler.py +20 -0
  249. dbt/py.typed +1 -0
  250. dbt/query_analyzer.cpython-311-darwin.so +0 -0
  251. dbt/query_analyzer.py +410 -0
  252. dbt/runners/__init__.py +2 -0
  253. dbt/runners/exposure_runner.py +7 -0
  254. dbt/runners/no_op_runner.py +45 -0
  255. dbt/runners/saved_query_runner.py +7 -0
  256. dbt/selected_resources.py +8 -0
  257. dbt/task/__init__.py +0 -0
  258. dbt/task/base.py +503 -0
  259. dbt/task/build.py +197 -0
  260. dbt/task/clean.py +56 -0
  261. dbt/task/clone.py +161 -0
  262. dbt/task/compile.py +150 -0
  263. dbt/task/compute.cpython-311-darwin.so +0 -0
  264. dbt/task/compute.py +458 -0
  265. dbt/task/debug.py +505 -0
  266. dbt/task/deps.py +280 -0
  267. dbt/task/docs/__init__.py +3 -0
  268. dbt/task/docs/api/__init__.py +23 -0
  269. dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
  270. dbt/task/docs/api/catalog.py +204 -0
  271. dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
  272. dbt/task/docs/api/lineage.py +234 -0
  273. dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
  274. dbt/task/docs/api/profile.py +204 -0
  275. dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
  276. dbt/task/docs/api/spark.py +186 -0
  277. dbt/task/docs/generate.py +947 -0
  278. dbt/task/docs/index.html +250 -0
  279. dbt/task/docs/serve.cpython-311-darwin.so +0 -0
  280. dbt/task/docs/serve.py +174 -0
  281. dbt/task/dvt_output.py +362 -0
  282. dbt/task/dvt_run.py +204 -0
  283. dbt/task/freshness.py +322 -0
  284. dbt/task/function.py +121 -0
  285. dbt/task/group_lookup.py +46 -0
  286. dbt/task/init.cpython-311-darwin.so +0 -0
  287. dbt/task/init.py +604 -0
  288. dbt/task/java.cpython-311-darwin.so +0 -0
  289. dbt/task/java.py +316 -0
  290. dbt/task/list.py +236 -0
  291. dbt/task/metadata.cpython-311-darwin.so +0 -0
  292. dbt/task/metadata.py +804 -0
  293. dbt/task/printer.py +175 -0
  294. dbt/task/profile.cpython-311-darwin.so +0 -0
  295. dbt/task/profile.py +1307 -0
  296. dbt/task/profile_serve.py +615 -0
  297. dbt/task/retract.py +438 -0
  298. dbt/task/retry.py +175 -0
  299. dbt/task/run.py +1387 -0
  300. dbt/task/run_operation.py +141 -0
  301. dbt/task/runnable.py +758 -0
  302. dbt/task/seed.py +103 -0
  303. dbt/task/show.py +149 -0
  304. dbt/task/snapshot.py +56 -0
  305. dbt/task/spark.cpython-311-darwin.so +0 -0
  306. dbt/task/spark.py +414 -0
  307. dbt/task/sql.py +110 -0
  308. dbt/task/target_sync.cpython-311-darwin.so +0 -0
  309. dbt/task/target_sync.py +766 -0
  310. dbt/task/test.py +464 -0
  311. dbt/tests/fixtures/__init__.py +1 -0
  312. dbt/tests/fixtures/project.py +620 -0
  313. dbt/tests/util.py +651 -0
  314. dbt/tracking.py +529 -0
  315. dbt/utils/__init__.py +3 -0
  316. dbt/utils/artifact_upload.py +151 -0
  317. dbt/utils/utils.py +408 -0
  318. dbt/version.py +270 -0
  319. dvt_cli/__init__.py +72 -0
  320. dvt_core-0.58.6.dist-info/METADATA +288 -0
  321. dvt_core-0.58.6.dist-info/RECORD +324 -0
  322. dvt_core-0.58.6.dist-info/WHEEL +5 -0
  323. dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
  324. dvt_core-0.58.6.dist-info/top_level.txt +2 -0
dbt/graph/queue.py ADDED
@@ -0,0 +1,214 @@
1
+ import threading
2
+ from queue import PriorityQueue
3
+ from typing import Dict, Generator, List, Optional, Set
4
+
5
+ import networkx as nx # type: ignore
6
+
7
+ from dbt.contracts.graph.manifest import Manifest
8
+ from dbt.contracts.graph.nodes import (
9
+ Exposure,
10
+ GraphMemberNode,
11
+ Metric,
12
+ SourceDefinition,
13
+ )
14
+ from dbt.node_types import NodeType
15
+
16
+ from .graph import UniqueId
17
+
18
+
19
+ class GraphQueue:
20
+ """A fancy queue that is backed by the dependency graph.
21
+ Note: this will mutate input!
22
+
23
+ This queue is thread-safe for `mark_done` calls, though you must ensure
24
+ that separate threads do not call `.empty()` or `__len__()` and `.get()` at
25
+ the same time, as there is an unlocked race!
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ graph: nx.DiGraph,
31
+ manifest: Manifest,
32
+ selected: Set[UniqueId],
33
+ preserve_edges: bool = True,
34
+ ) -> None:
35
+ # 'create_empty_copy' returns a copy of the graph G with all of the edges removed, and leaves nodes intact.
36
+ self.graph = graph if preserve_edges else nx.classes.function.create_empty_copy(graph)
37
+ self.manifest = manifest
38
+ self._selected = selected
39
+ # store the queue as a priority queue.
40
+ self.inner: PriorityQueue = PriorityQueue()
41
+ # things that have been popped off the queue but not finished
42
+ # and worker thread reservations
43
+ self.in_progress: Set[UniqueId] = set()
44
+ # things that are in the queue
45
+ self.queued: Set[UniqueId] = set()
46
+ # this lock controls most things
47
+ self.lock = threading.Lock()
48
+ # store the 'score' of each node as a number. Lower is higher priority.
49
+ self._scores = self._get_scores(self.graph)
50
+ # populate the initial queue
51
+ self._find_new_additions(list(self.graph.nodes()))
52
+ # awaits after task end
53
+ self.some_task_done = threading.Condition(self.lock)
54
+
55
+ def get_selected_nodes(self) -> Set[UniqueId]:
56
+ return self._selected.copy()
57
+
58
+ def _include_in_cost(self, node_id: UniqueId) -> bool:
59
+ node = self.manifest.expect(node_id)
60
+ if node.resource_type != NodeType.Model:
61
+ return False
62
+ # must be a Model - tell mypy this won't be a Source or Exposure or Metric
63
+ assert not isinstance(node, (SourceDefinition, Exposure, Metric))
64
+ if node.is_ephemeral:
65
+ return False
66
+ return True
67
+
68
+ @staticmethod
69
+ def _grouped_topological_sort(
70
+ graph: nx.DiGraph,
71
+ ) -> Generator[List[str], None, None]:
72
+ """Topological sort of given graph that groups ties.
73
+
74
+ Adapted from `nx.topological_sort`, this function returns a topo sort of a graph however
75
+ instead of arbitrarily ordering ties in the sort order, ties are grouped together in
76
+ lists.
77
+
78
+ Args:
79
+ graph: The graph to be sorted.
80
+
81
+ Returns:
82
+ A generator that yields lists of nodes, one list per graph depth level.
83
+ """
84
+ indegree_map = {v: d for v, d in graph.in_degree() if d > 0}
85
+ zero_indegree = [v for v, d in graph.in_degree() if d == 0]
86
+
87
+ while zero_indegree:
88
+ yield zero_indegree
89
+ new_zero_indegree = []
90
+ for v in zero_indegree:
91
+ for _, child in graph.edges(v):
92
+ indegree_map[child] -= 1
93
+ if not indegree_map[child]:
94
+ new_zero_indegree.append(child)
95
+ zero_indegree = new_zero_indegree
96
+
97
+ def _get_scores(self, graph: nx.DiGraph) -> Dict[str, int]:
98
+ """Scoring nodes for processing order.
99
+
100
+ Scores are calculated by the graph depth level. Lowest score (0) should be processed first.
101
+
102
+ Args:
103
+ graph: The graph to be scored.
104
+
105
+ Returns:
106
+ A dictionary consisting of `node name`:`score` pairs.
107
+ """
108
+ # split graph by connected subgraphs
109
+ subgraphs = (graph.subgraph(x) for x in nx.connected_components(nx.Graph(graph)))
110
+
111
+ # score all nodes in all subgraphs
112
+ scores = {}
113
+ for subgraph in subgraphs:
114
+ grouped_nodes = self._grouped_topological_sort(subgraph)
115
+ for level, group in enumerate(grouped_nodes):
116
+ for node in group:
117
+ scores[node] = level
118
+
119
+ return scores
120
+
121
+ def get(self, block: bool = True, timeout: Optional[float] = None) -> GraphMemberNode:
122
+ """Get a node off the inner priority queue. By default, this blocks.
123
+
124
+ This takes the lock, but only for part of it.
125
+
126
+ :param block: If True, block until the inner queue has data
127
+ :param timeout: If set, block for timeout seconds waiting for data.
128
+ :return: The node as present in the manifest.
129
+
130
+ See `queue.PriorityQueue` for more information on `get()` behavior and
131
+ exceptions.
132
+ """
133
+ _, node_id = self.inner.get(block=block, timeout=timeout)
134
+ with self.lock:
135
+ self._mark_in_progress(node_id)
136
+ return self.manifest.expect(node_id)
137
+
138
+ def __len__(self) -> int:
139
+ """The length of the queue is the number of tasks left for the queue to
140
+ give out, regardless of where they are. Incomplete tasks are not part
141
+ of the length.
142
+
143
+ This takes the lock.
144
+ """
145
+ with self.lock:
146
+ return len(self.graph) - len(self.in_progress)
147
+
148
+ def empty(self) -> bool:
149
+ """The graph queue is 'empty' if it all remaining nodes in the graph
150
+ are in progress.
151
+
152
+ This takes the lock.
153
+ """
154
+ return len(self) == 0
155
+
156
+ def _already_known(self, node: UniqueId) -> bool:
157
+ """Decide if a node is already known (either handed out as a task, or
158
+ in the queue).
159
+
160
+ Callers must hold the lock.
161
+
162
+ :param str node: The node ID to check
163
+ :returns bool: If the node is in progress/queued.
164
+ """
165
+ return node in self.in_progress or node in self.queued
166
+
167
+ def _find_new_additions(self, candidates) -> None:
168
+ """Find any nodes in the graph that need to be added to the internal
169
+ queue and add them.
170
+ """
171
+ for node in candidates:
172
+ if self.graph.in_degree(node) == 0 and not self._already_known(node):
173
+ self.inner.put((self._scores[node], node))
174
+ self.queued.add(node)
175
+
176
+ def mark_done(self, node_id: UniqueId) -> None:
177
+ """Given a node's unique ID, mark it as done.
178
+
179
+ This method takes the lock.
180
+
181
+ :param str node_id: The node ID to mark as complete.
182
+ """
183
+ with self.lock:
184
+ self.in_progress.remove(node_id)
185
+ successors = list(self.graph.successors(node_id))
186
+ self.graph.remove_node(node_id)
187
+ self._find_new_additions(successors)
188
+ self.inner.task_done()
189
+ self.some_task_done.notify_all()
190
+
191
+ def _mark_in_progress(self, node_id: UniqueId) -> None:
192
+ """Mark the node as 'in progress'.
193
+
194
+ Callers must hold the lock.
195
+
196
+ :param str node_id: The node ID to mark as in progress.
197
+ """
198
+ self.queued.remove(node_id)
199
+ self.in_progress.add(node_id)
200
+
201
+ def join(self) -> None:
202
+ """Join the queue. Blocks until all tasks are marked as done.
203
+
204
+ Make sure not to call this before the queue reports that it is empty.
205
+ """
206
+ self.inner.join()
207
+
208
+ def wait_until_something_was_done(self) -> int:
209
+ """Block until a task is done, then return the number of unfinished
210
+ tasks.
211
+ """
212
+ with self.lock:
213
+ self.some_task_done.wait()
214
+ return self.inner.unfinished_tasks
dbt/graph/selector.py ADDED
@@ -0,0 +1,374 @@
1
+ from typing import List, Optional, Set, Tuple
2
+
3
+ from dbt import selected_resources
4
+ from dbt.contracts.graph.manifest import Manifest
5
+ from dbt.contracts.graph.nodes import GraphMemberNode
6
+ from dbt.contracts.state import PreviousState
7
+ from dbt.events.types import NoNodesForSelectionCriteria, SelectorReportInvalidSelector
8
+ from dbt.exceptions import DbtInternalError, InvalidSelectorError
9
+ from dbt.node_types import NodeType
10
+ from dbt_common.events.functions import fire_event, warn_or_error
11
+
12
+ from .graph import Graph, UniqueId
13
+ from .queue import GraphQueue
14
+ from .selector_methods import MethodManager
15
+ from .selector_spec import IndirectSelection, SelectionCriteria, SelectionSpec
16
+
17
+
18
+ def get_package_names(nodes):
19
+ return set([node.split(".")[1] for node in nodes])
20
+
21
+
22
+ def can_select_indirectly(node):
23
+ """If a node is not selected itself, but its parent(s) are, it may qualify
24
+ for indirect selection.
25
+ Today, only Test nodes can be indirectly selected. In the future,
26
+ other node types or invocation flags might qualify.
27
+ """
28
+ if node.resource_type == NodeType.Test:
29
+ return True
30
+ elif node.resource_type == NodeType.Unit:
31
+ return True
32
+ else:
33
+ return False
34
+
35
+
36
+ class NodeSelector(MethodManager):
37
+ """The node selector is aware of the graph and manifest"""
38
+
39
+ def __init__(
40
+ self,
41
+ graph: Graph,
42
+ manifest: Manifest,
43
+ previous_state: Optional[PreviousState] = None,
44
+ include_empty_nodes: bool = False,
45
+ ) -> None:
46
+ super().__init__(manifest, previous_state)
47
+ self.full_graph: Graph = graph
48
+ self.include_empty_nodes: bool = include_empty_nodes
49
+
50
+ # build a subgraph containing only non-empty, enabled nodes and enabled
51
+ # sources.
52
+ graph_members = {
53
+ unique_id for unique_id in self.full_graph.nodes() if self._is_graph_member(unique_id)
54
+ }
55
+ self.graph = self.full_graph.subgraph(graph_members)
56
+
57
+ def select_included(
58
+ self,
59
+ included_nodes: Set[UniqueId],
60
+ spec: SelectionCriteria,
61
+ ) -> Set[UniqueId]:
62
+ """Select the explicitly included nodes, using the given spec. Return
63
+ the selected set of unique IDs.
64
+ """
65
+ method = self.get_method(spec.method, spec.method_arguments)
66
+ return set(method.search(included_nodes, spec.value))
67
+
68
+ def get_nodes_from_criteria(
69
+ self, spec: SelectionCriteria
70
+ ) -> Tuple[Set[UniqueId], Set[UniqueId]]:
71
+ """Get all nodes specified by the single selection criteria.
72
+
73
+ - collect the directly included nodes
74
+ - find their specified relatives
75
+ - perform any selector-specific expansion
76
+ """
77
+
78
+ nodes = self.graph.nodes()
79
+ try:
80
+ collected = self.select_included(nodes, spec)
81
+ except InvalidSelectorError:
82
+ valid_selectors = ", ".join(self.SELECTOR_METHODS)
83
+ fire_event(
84
+ SelectorReportInvalidSelector(
85
+ valid_selectors=valid_selectors, spec_method=spec.method, raw_spec=spec.raw
86
+ )
87
+ )
88
+ return set(), set()
89
+
90
+ neighbors = self.collect_specified_neighbors(spec, collected)
91
+ selected = collected | neighbors
92
+
93
+ # if --indirect-selection EMPTY, do not expand to adjacent tests
94
+ if spec.indirect_selection == IndirectSelection.Empty:
95
+ return selected, set()
96
+ else:
97
+ direct_nodes, indirect_nodes = self.expand_selection(
98
+ selected=selected, indirect_selection=spec.indirect_selection
99
+ )
100
+ return direct_nodes, indirect_nodes
101
+
102
+ def collect_specified_neighbors(
103
+ self, spec: SelectionCriteria, selected: Set[UniqueId]
104
+ ) -> Set[UniqueId]:
105
+ """Given the set of models selected by the explicit part of the
106
+ selector (like "tag:foo"), apply the modifiers on the spec ("+"/"@").
107
+ Return the set of additional nodes that should be collected (which may
108
+ overlap with the selected set).
109
+ """
110
+ additional: Set[UniqueId] = set()
111
+ if spec.childrens_parents:
112
+ additional.update(self.graph.select_childrens_parents(selected))
113
+
114
+ if spec.parents:
115
+ depth = spec.parents_depth
116
+ additional.update(self.graph.select_parents(selected, depth))
117
+
118
+ if spec.children:
119
+ depth = spec.children_depth
120
+ additional.update(self.graph.select_children(selected, depth))
121
+ return additional
122
+
123
+ def select_nodes_recursively(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
124
+ """If the spec is a composite spec (a union, difference, or intersection),
125
+ recurse into its selections and combine them. If the spec is a concrete
126
+ selection criteria, resolve that using the given graph.
127
+ """
128
+ if isinstance(spec, SelectionCriteria):
129
+ direct_nodes, indirect_nodes = self.get_nodes_from_criteria(spec)
130
+ else:
131
+ bundles = [self.select_nodes_recursively(component) for component in spec]
132
+
133
+ direct_sets = []
134
+ indirect_sets = []
135
+
136
+ for direct, indirect in bundles:
137
+ direct_sets.append(direct)
138
+ indirect_sets.append(direct | indirect)
139
+
140
+ initial_direct = spec.combined(direct_sets)
141
+ indirect_nodes = spec.combined(indirect_sets)
142
+
143
+ direct_nodes = self.incorporate_indirect_nodes(
144
+ initial_direct, indirect_nodes, spec.indirect_selection
145
+ )
146
+
147
+ if spec.expect_exists and len(direct_nodes) == 0:
148
+ warn_or_error(NoNodesForSelectionCriteria(spec_raw=str(spec.raw)))
149
+
150
+ return direct_nodes, indirect_nodes
151
+
152
+ def select_nodes(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
153
+ """Select the nodes in the graph according to the spec.
154
+
155
+ This is the main point of entry for turning a spec into a set of nodes:
156
+ - Recurse through spec, select by criteria, combine by set operation
157
+ - Return final (unfiltered) selection set
158
+ """
159
+ direct_nodes, indirect_nodes = self.select_nodes_recursively(spec)
160
+ indirect_only = indirect_nodes.difference(direct_nodes)
161
+ return direct_nodes, indirect_only
162
+
163
+ def _is_graph_member(self, unique_id: UniqueId) -> bool:
164
+ if unique_id in self.manifest.sources:
165
+ source = self.manifest.sources[unique_id]
166
+ return source.config.enabled
167
+ elif unique_id in self.manifest.exposures:
168
+ return True
169
+ elif unique_id in self.manifest.functions:
170
+ function = self.manifest.functions[unique_id]
171
+ return function.config.enabled
172
+ elif unique_id in self.manifest.metrics:
173
+ metric = self.manifest.metrics[unique_id]
174
+ return metric.config.enabled
175
+ elif unique_id in self.manifest.semantic_models:
176
+ semantic_model = self.manifest.semantic_models[unique_id]
177
+ return semantic_model.config.enabled
178
+ elif unique_id in self.manifest.unit_tests:
179
+ unit_test = self.manifest.unit_tests[unique_id]
180
+ return unit_test.config.enabled
181
+ elif unique_id in self.manifest.saved_queries:
182
+ saved_query = self.manifest.saved_queries[unique_id]
183
+ return saved_query.config.enabled
184
+ elif unique_id in self.manifest.exposures:
185
+ exposure = self.manifest.exposures[unique_id]
186
+ return exposure.config.enabled
187
+ else:
188
+ node = self.manifest.nodes[unique_id]
189
+ return node.config.enabled
190
+
191
+ def _is_empty_node(self, unique_id: UniqueId) -> bool:
192
+ if unique_id in self.manifest.nodes:
193
+ node = self.manifest.nodes[unique_id]
194
+ return node.empty
195
+ else:
196
+ return False
197
+
198
+ def node_is_match(self, node: GraphMemberNode) -> bool:
199
+ """Determine if a node is a match for the selector. Non-match nodes
200
+ will be excluded from results during filtering.
201
+ """
202
+ return True
203
+
204
+ def _is_match(self, unique_id: UniqueId) -> bool:
205
+ node: GraphMemberNode
206
+ if unique_id in self.manifest.nodes:
207
+ node = self.manifest.nodes[unique_id]
208
+ elif unique_id in self.manifest.sources:
209
+ node = self.manifest.sources[unique_id]
210
+ elif unique_id in self.manifest.exposures:
211
+ node = self.manifest.exposures[unique_id]
212
+ elif unique_id in self.manifest.functions:
213
+ node = self.manifest.functions[unique_id]
214
+ elif unique_id in self.manifest.metrics:
215
+ node = self.manifest.metrics[unique_id]
216
+ elif unique_id in self.manifest.semantic_models:
217
+ node = self.manifest.semantic_models[unique_id]
218
+ elif unique_id in self.manifest.unit_tests:
219
+ node = self.manifest.unit_tests[unique_id]
220
+ elif unique_id in self.manifest.saved_queries:
221
+ node = self.manifest.saved_queries[unique_id]
222
+ else:
223
+ raise DbtInternalError(f"Node {unique_id} not found in the manifest!")
224
+ return self.node_is_match(node)
225
+
226
+ def filter_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]:
227
+ """Return the subset of selected nodes that is a match for this
228
+ selector.
229
+ """
230
+ return {
231
+ unique_id
232
+ for unique_id in selected
233
+ if self._is_match(unique_id)
234
+ and (self.include_empty_nodes or not self._is_empty_node(unique_id))
235
+ }
236
+
237
+ def expand_selection(
238
+ self,
239
+ selected: Set[UniqueId],
240
+ indirect_selection: IndirectSelection = IndirectSelection.Eager,
241
+ ) -> Tuple[Set[UniqueId], Set[UniqueId]]:
242
+ # Test selection by default expands to include an implicitly/indirectly selected tests.
243
+ # `dbt test -m model_a` also includes tests that directly depend on `model_a`.
244
+ # Expansion has four modes, EAGER, CAUTIOUS and BUILDABLE, EMPTY.
245
+ #
246
+ # EAGER mode: If ANY parent is selected, select the test.
247
+ #
248
+ # CAUTIOUS mode:
249
+ # - If ALL parents are selected, select the test.
250
+ # - If ANY parent is missing, return it separately. We'll keep it around
251
+ # for later and see if its other parents show up.
252
+ #
253
+ # BUILDABLE mode:
254
+ # - If ALL parents are selected, or the parents of the test are themselves parents of the selected, select the test.
255
+ # - If ANY parent is missing, return it separately. We'll keep it around
256
+ # for later and see if its other parents show up.
257
+ #
258
+ # EMPTY mode: Only select the given node and ignore attached nodes (i.e. ignore tests attached to a model)
259
+ #
260
+ # Users can opt out of inclusive EAGER mode by passing --indirect-selection cautious
261
+ # CLI argument or by specifying `indirect_selection: true` in a yaml selector
262
+
263
+ direct_nodes = set(selected)
264
+ indirect_nodes = set()
265
+ selected_and_parents = set()
266
+ if indirect_selection == IndirectSelection.Buildable:
267
+ selected_and_parents = selected.union(self.graph.select_parents(selected)).union(
268
+ self.manifest.sources
269
+ )
270
+
271
+ for unique_id in self.graph.select_successors(selected):
272
+ if unique_id in self.manifest.nodes or unique_id in self.manifest.unit_tests:
273
+ if unique_id in self.manifest.nodes:
274
+ node = self.manifest.nodes[unique_id]
275
+ elif unique_id in self.manifest.unit_tests:
276
+ node = self.manifest.unit_tests[unique_id] # type: ignore
277
+ # Test nodes that are not selected themselves, but whose parents are selected.
278
+ # (Does not include unit tests because they can only have one parent.)
279
+ if can_select_indirectly(node):
280
+ # should we add it in directly?
281
+ if indirect_selection == IndirectSelection.Eager or set(
282
+ node.depends_on_nodes
283
+ ) <= set(selected):
284
+ direct_nodes.add(unique_id)
285
+ elif indirect_selection == IndirectSelection.Buildable and set(
286
+ node.depends_on_nodes
287
+ ) <= set(selected_and_parents):
288
+ direct_nodes.add(unique_id)
289
+ elif indirect_selection == IndirectSelection.Empty:
290
+ pass
291
+ else:
292
+ indirect_nodes.add(unique_id)
293
+
294
+ return direct_nodes, indirect_nodes
295
+
296
+ def incorporate_indirect_nodes(
297
+ self,
298
+ direct_nodes: Set[UniqueId],
299
+ indirect_nodes: Set[UniqueId] = set(),
300
+ indirect_selection: IndirectSelection = IndirectSelection.Eager,
301
+ ) -> Set[UniqueId]:
302
+ # Check tests previously selected indirectly to see if ALL their
303
+ # parents are now present.
304
+
305
+ # performance: if identical, skip the processing below
306
+ if set(direct_nodes) == set(indirect_nodes):
307
+ return direct_nodes
308
+
309
+ selected = set(direct_nodes)
310
+
311
+ if indirect_selection == IndirectSelection.Cautious:
312
+ for unique_id in indirect_nodes:
313
+ if unique_id in self.manifest.nodes:
314
+ node = self.manifest.nodes[unique_id]
315
+ if set(node.depends_on_nodes) <= set(selected):
316
+ selected.add(unique_id)
317
+ elif indirect_selection == IndirectSelection.Buildable:
318
+ selected_and_parents = selected.union(self.graph.select_parents(selected))
319
+ for unique_id in indirect_nodes:
320
+ if unique_id in self.manifest.nodes:
321
+ node = self.manifest.nodes[unique_id]
322
+ if set(node.depends_on_nodes) <= set(selected_and_parents):
323
+ selected.add(unique_id)
324
+
325
+ return selected
326
+
327
+ def get_selected(self, spec: SelectionSpec) -> Set[UniqueId]:
328
+ """get_selected runs through the node selection process:
329
+
330
+ - node selection. Based on the include/exclude sets, the set
331
+ of matched unique IDs is returned
332
+ - includes direct + indirect selection (for tests)
333
+ - filtering:
334
+ - selectors can filter the nodes after all of them have been
335
+ selected
336
+ """
337
+ selected_nodes, indirect_only = self.select_nodes(spec)
338
+ filtered_nodes = self.filter_selection(selected_nodes)
339
+
340
+ return filtered_nodes
341
+
342
+ def get_graph_queue(self, spec: SelectionSpec, preserve_edges: bool = True) -> GraphQueue:
343
+ """Returns a queue over nodes in the graph that tracks progress of
344
+ dependencies.
345
+ """
346
+ # Filtering happens in get_selected
347
+ selected_nodes = self.get_selected(spec)
348
+ # Save to global variable
349
+ selected_resources.set_selected_resources(selected_nodes)
350
+ # Construct a new graph using the selected_nodes
351
+ new_graph = self.full_graph.get_subset_graph(selected_nodes)
352
+ # should we give a way here for consumers to mutate the graph?
353
+ return GraphQueue(new_graph.graph, self.manifest, selected_nodes, preserve_edges)
354
+
355
+
356
+ class ResourceTypeSelector(NodeSelector):
357
+ def __init__(
358
+ self,
359
+ graph: Graph,
360
+ manifest: Manifest,
361
+ previous_state: Optional[PreviousState],
362
+ resource_types: List[NodeType],
363
+ include_empty_nodes: bool = False,
364
+ ) -> None:
365
+ super().__init__(
366
+ graph=graph,
367
+ manifest=manifest,
368
+ previous_state=previous_state,
369
+ include_empty_nodes=include_empty_nodes,
370
+ )
371
+ self.resource_types: Set[NodeType] = set(resource_types)
372
+
373
+ def node_is_match(self, node):
374
+ return node.resource_type in self.resource_types