dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/graph/queue.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
from queue import PriorityQueue
|
|
3
|
+
from typing import Dict, Generator, List, Optional, Set
|
|
4
|
+
|
|
5
|
+
import networkx as nx # type: ignore
|
|
6
|
+
from dvt.contracts.graph.manifest import Manifest
|
|
7
|
+
from dvt.contracts.graph.nodes import (
|
|
8
|
+
Exposure,
|
|
9
|
+
GraphMemberNode,
|
|
10
|
+
Metric,
|
|
11
|
+
SourceDefinition,
|
|
12
|
+
)
|
|
13
|
+
from dvt.node_types import NodeType
|
|
14
|
+
|
|
15
|
+
from .graph import UniqueId
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GraphQueue:
|
|
19
|
+
"""A fancy queue that is backed by the dependency graph.
|
|
20
|
+
Note: this will mutate input!
|
|
21
|
+
|
|
22
|
+
This queue is thread-safe for `mark_done` calls, though you must ensure
|
|
23
|
+
that separate threads do not call `.empty()` or `__len__()` and `.get()` at
|
|
24
|
+
the same time, as there is an unlocked race!
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
graph: nx.DiGraph,
|
|
30
|
+
manifest: Manifest,
|
|
31
|
+
selected: Set[UniqueId],
|
|
32
|
+
preserve_edges: bool = True,
|
|
33
|
+
) -> None:
|
|
34
|
+
# 'create_empty_copy' returns a copy of the graph G with all of the edges removed, and leaves nodes intact.
|
|
35
|
+
self.graph = graph if preserve_edges else nx.classes.function.create_empty_copy(graph)
|
|
36
|
+
self.manifest = manifest
|
|
37
|
+
self._selected = selected
|
|
38
|
+
# store the queue as a priority queue.
|
|
39
|
+
self.inner: PriorityQueue = PriorityQueue()
|
|
40
|
+
# things that have been popped off the queue but not finished
|
|
41
|
+
# and worker thread reservations
|
|
42
|
+
self.in_progress: Set[UniqueId] = set()
|
|
43
|
+
# things that are in the queue
|
|
44
|
+
self.queued: Set[UniqueId] = set()
|
|
45
|
+
# this lock controls most things
|
|
46
|
+
self.lock = threading.Lock()
|
|
47
|
+
# store the 'score' of each node as a number. Lower is higher priority.
|
|
48
|
+
self._scores = self._get_scores(self.graph)
|
|
49
|
+
# populate the initial queue
|
|
50
|
+
self._find_new_additions(list(self.graph.nodes()))
|
|
51
|
+
# awaits after task end
|
|
52
|
+
self.some_task_done = threading.Condition(self.lock)
|
|
53
|
+
|
|
54
|
+
def get_selected_nodes(self) -> Set[UniqueId]:
|
|
55
|
+
return self._selected.copy()
|
|
56
|
+
|
|
57
|
+
def _include_in_cost(self, node_id: UniqueId) -> bool:
|
|
58
|
+
node = self.manifest.expect(node_id)
|
|
59
|
+
if node.resource_type != NodeType.Model:
|
|
60
|
+
return False
|
|
61
|
+
# must be a Model - tell mypy this won't be a Source or Exposure or Metric
|
|
62
|
+
assert not isinstance(node, (SourceDefinition, Exposure, Metric))
|
|
63
|
+
if node.is_ephemeral:
|
|
64
|
+
return False
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _grouped_topological_sort(
|
|
69
|
+
graph: nx.DiGraph,
|
|
70
|
+
) -> Generator[List[str], None, None]:
|
|
71
|
+
"""Topological sort of given graph that groups ties.
|
|
72
|
+
|
|
73
|
+
Adapted from `nx.topological_sort`, this function returns a topo sort of a graph however
|
|
74
|
+
instead of arbitrarily ordering ties in the sort order, ties are grouped together in
|
|
75
|
+
lists.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
graph: The graph to be sorted.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
A generator that yields lists of nodes, one list per graph depth level.
|
|
82
|
+
"""
|
|
83
|
+
indegree_map = {v: d for v, d in graph.in_degree() if d > 0}
|
|
84
|
+
zero_indegree = [v for v, d in graph.in_degree() if d == 0]
|
|
85
|
+
|
|
86
|
+
while zero_indegree:
|
|
87
|
+
yield zero_indegree
|
|
88
|
+
new_zero_indegree = []
|
|
89
|
+
for v in zero_indegree:
|
|
90
|
+
for _, child in graph.edges(v):
|
|
91
|
+
indegree_map[child] -= 1
|
|
92
|
+
if not indegree_map[child]:
|
|
93
|
+
new_zero_indegree.append(child)
|
|
94
|
+
zero_indegree = new_zero_indegree
|
|
95
|
+
|
|
96
|
+
def _get_scores(self, graph: nx.DiGraph) -> Dict[str, int]:
|
|
97
|
+
"""Scoring nodes for processing order.
|
|
98
|
+
|
|
99
|
+
Scores are calculated by the graph depth level. Lowest score (0) should be processed first.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
graph: The graph to be scored.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A dictionary consisting of `node name`:`score` pairs.
|
|
106
|
+
"""
|
|
107
|
+
# split graph by connected subgraphs
|
|
108
|
+
subgraphs = (graph.subgraph(x) for x in nx.connected_components(nx.Graph(graph)))
|
|
109
|
+
|
|
110
|
+
# score all nodes in all subgraphs
|
|
111
|
+
scores = {}
|
|
112
|
+
for subgraph in subgraphs:
|
|
113
|
+
grouped_nodes = self._grouped_topological_sort(subgraph)
|
|
114
|
+
for level, group in enumerate(grouped_nodes):
|
|
115
|
+
for node in group:
|
|
116
|
+
scores[node] = level
|
|
117
|
+
|
|
118
|
+
return scores
|
|
119
|
+
|
|
120
|
+
def get(self, block: bool = True, timeout: Optional[float] = None) -> GraphMemberNode:
|
|
121
|
+
"""Get a node off the inner priority queue. By default, this blocks.
|
|
122
|
+
|
|
123
|
+
This takes the lock, but only for part of it.
|
|
124
|
+
|
|
125
|
+
:param block: If True, block until the inner queue has data
|
|
126
|
+
:param timeout: If set, block for timeout seconds waiting for data.
|
|
127
|
+
:return: The node as present in the manifest.
|
|
128
|
+
|
|
129
|
+
See `queue.PriorityQueue` for more information on `get()` behavior and
|
|
130
|
+
exceptions.
|
|
131
|
+
"""
|
|
132
|
+
_, node_id = self.inner.get(block=block, timeout=timeout)
|
|
133
|
+
with self.lock:
|
|
134
|
+
self._mark_in_progress(node_id)
|
|
135
|
+
return self.manifest.expect(node_id)
|
|
136
|
+
|
|
137
|
+
def __len__(self) -> int:
|
|
138
|
+
"""The length of the queue is the number of tasks left for the queue to
|
|
139
|
+
give out, regardless of where they are. Incomplete tasks are not part
|
|
140
|
+
of the length.
|
|
141
|
+
|
|
142
|
+
This takes the lock.
|
|
143
|
+
"""
|
|
144
|
+
with self.lock:
|
|
145
|
+
return len(self.graph) - len(self.in_progress)
|
|
146
|
+
|
|
147
|
+
def empty(self) -> bool:
|
|
148
|
+
"""The graph queue is 'empty' if it all remaining nodes in the graph
|
|
149
|
+
are in progress.
|
|
150
|
+
|
|
151
|
+
This takes the lock.
|
|
152
|
+
"""
|
|
153
|
+
return len(self) == 0
|
|
154
|
+
|
|
155
|
+
def _already_known(self, node: UniqueId) -> bool:
|
|
156
|
+
"""Decide if a node is already known (either handed out as a task, or
|
|
157
|
+
in the queue).
|
|
158
|
+
|
|
159
|
+
Callers must hold the lock.
|
|
160
|
+
|
|
161
|
+
:param str node: The node ID to check
|
|
162
|
+
:returns bool: If the node is in progress/queued.
|
|
163
|
+
"""
|
|
164
|
+
return node in self.in_progress or node in self.queued
|
|
165
|
+
|
|
166
|
+
def _find_new_additions(self, candidates) -> None:
|
|
167
|
+
"""Find any nodes in the graph that need to be added to the internal
|
|
168
|
+
queue and add them.
|
|
169
|
+
"""
|
|
170
|
+
for node in candidates:
|
|
171
|
+
if self.graph.in_degree(node) == 0 and not self._already_known(node):
|
|
172
|
+
self.inner.put((self._scores[node], node))
|
|
173
|
+
self.queued.add(node)
|
|
174
|
+
|
|
175
|
+
def mark_done(self, node_id: UniqueId) -> None:
|
|
176
|
+
"""Given a node's unique ID, mark it as done.
|
|
177
|
+
|
|
178
|
+
This method takes the lock.
|
|
179
|
+
|
|
180
|
+
:param str node_id: The node ID to mark as complete.
|
|
181
|
+
"""
|
|
182
|
+
with self.lock:
|
|
183
|
+
self.in_progress.remove(node_id)
|
|
184
|
+
successors = list(self.graph.successors(node_id))
|
|
185
|
+
self.graph.remove_node(node_id)
|
|
186
|
+
self._find_new_additions(successors)
|
|
187
|
+
self.inner.task_done()
|
|
188
|
+
self.some_task_done.notify_all()
|
|
189
|
+
|
|
190
|
+
def _mark_in_progress(self, node_id: UniqueId) -> None:
|
|
191
|
+
"""Mark the node as 'in progress'.
|
|
192
|
+
|
|
193
|
+
Callers must hold the lock.
|
|
194
|
+
|
|
195
|
+
:param str node_id: The node ID to mark as in progress.
|
|
196
|
+
"""
|
|
197
|
+
self.queued.remove(node_id)
|
|
198
|
+
self.in_progress.add(node_id)
|
|
199
|
+
|
|
200
|
+
def join(self) -> None:
|
|
201
|
+
"""Join the queue. Blocks until all tasks are marked as done.
|
|
202
|
+
|
|
203
|
+
Make sure not to call this before the queue reports that it is empty.
|
|
204
|
+
"""
|
|
205
|
+
self.inner.join()
|
|
206
|
+
|
|
207
|
+
def wait_until_something_was_done(self) -> int:
|
|
208
|
+
"""Block until a task is done, then return the number of unfinished
|
|
209
|
+
tasks.
|
|
210
|
+
"""
|
|
211
|
+
with self.lock:
|
|
212
|
+
self.some_task_done.wait()
|
|
213
|
+
return self.inner.unfinished_tasks
|
dvt/graph/selector.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
from typing import List, Optional, Set, Tuple
|
|
2
|
+
|
|
3
|
+
from dvt import selected_resources
|
|
4
|
+
from dvt.contracts.graph.manifest import Manifest
|
|
5
|
+
from dvt.contracts.graph.nodes import GraphMemberNode
|
|
6
|
+
from dvt.contracts.state import PreviousState
|
|
7
|
+
from dvt.events.types import NoNodesForSelectionCriteria, SelectorReportInvalidSelector
|
|
8
|
+
from dvt.exceptions import DbtInternalError, InvalidSelectorError
|
|
9
|
+
from dvt.node_types import NodeType
|
|
10
|
+
|
|
11
|
+
from dbt_common.events.functions import fire_event, warn_or_error
|
|
12
|
+
|
|
13
|
+
from .graph import Graph, UniqueId
|
|
14
|
+
from .queue import GraphQueue
|
|
15
|
+
from .selector_methods import MethodManager
|
|
16
|
+
from .selector_spec import IndirectSelection, SelectionCriteria, SelectionSpec
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_package_names(nodes):
|
|
20
|
+
return set([node.split(".")[1] for node in nodes])
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def can_select_indirectly(node):
|
|
24
|
+
"""If a node is not selected itself, but its parent(s) are, it may qualify
|
|
25
|
+
for indirect selection.
|
|
26
|
+
Today, only Test nodes can be indirectly selected. In the future,
|
|
27
|
+
other node types or invocation flags might qualify.
|
|
28
|
+
"""
|
|
29
|
+
if node.resource_type == NodeType.Test:
|
|
30
|
+
return True
|
|
31
|
+
elif node.resource_type == NodeType.Unit:
|
|
32
|
+
return True
|
|
33
|
+
else:
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NodeSelector(MethodManager):
|
|
38
|
+
"""The node selector is aware of the graph and manifest"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
graph: Graph,
|
|
43
|
+
manifest: Manifest,
|
|
44
|
+
previous_state: Optional[PreviousState] = None,
|
|
45
|
+
include_empty_nodes: bool = False,
|
|
46
|
+
) -> None:
|
|
47
|
+
super().__init__(manifest, previous_state)
|
|
48
|
+
self.full_graph: Graph = graph
|
|
49
|
+
self.include_empty_nodes: bool = include_empty_nodes
|
|
50
|
+
|
|
51
|
+
# build a subgraph containing only non-empty, enabled nodes and enabled
|
|
52
|
+
# sources.
|
|
53
|
+
graph_members = {
|
|
54
|
+
unique_id for unique_id in self.full_graph.nodes() if self._is_graph_member(unique_id)
|
|
55
|
+
}
|
|
56
|
+
self.graph = self.full_graph.subgraph(graph_members)
|
|
57
|
+
|
|
58
|
+
def select_included(
|
|
59
|
+
self,
|
|
60
|
+
included_nodes: Set[UniqueId],
|
|
61
|
+
spec: SelectionCriteria,
|
|
62
|
+
) -> Set[UniqueId]:
|
|
63
|
+
"""Select the explicitly included nodes, using the given spec. Return
|
|
64
|
+
the selected set of unique IDs.
|
|
65
|
+
"""
|
|
66
|
+
method = self.get_method(spec.method, spec.method_arguments)
|
|
67
|
+
return set(method.search(included_nodes, spec.value))
|
|
68
|
+
|
|
69
|
+
def get_nodes_from_criteria(
|
|
70
|
+
self, spec: SelectionCriteria
|
|
71
|
+
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
|
72
|
+
"""Get all nodes specified by the single selection criteria.
|
|
73
|
+
|
|
74
|
+
- collect the directly included nodes
|
|
75
|
+
- find their specified relatives
|
|
76
|
+
- perform any selector-specific expansion
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
nodes = self.graph.nodes()
|
|
80
|
+
try:
|
|
81
|
+
collected = self.select_included(nodes, spec)
|
|
82
|
+
except InvalidSelectorError:
|
|
83
|
+
valid_selectors = ", ".join(self.SELECTOR_METHODS)
|
|
84
|
+
fire_event(
|
|
85
|
+
SelectorReportInvalidSelector(
|
|
86
|
+
valid_selectors=valid_selectors, spec_method=spec.method, raw_spec=spec.raw
|
|
87
|
+
)
|
|
88
|
+
)
|
|
89
|
+
return set(), set()
|
|
90
|
+
|
|
91
|
+
neighbors = self.collect_specified_neighbors(spec, collected)
|
|
92
|
+
selected = collected | neighbors
|
|
93
|
+
|
|
94
|
+
# if --indirect-selection EMPTY, do not expand to adjacent tests
|
|
95
|
+
if spec.indirect_selection == IndirectSelection.Empty:
|
|
96
|
+
return selected, set()
|
|
97
|
+
else:
|
|
98
|
+
direct_nodes, indirect_nodes = self.expand_selection(
|
|
99
|
+
selected=selected, indirect_selection=spec.indirect_selection
|
|
100
|
+
)
|
|
101
|
+
return direct_nodes, indirect_nodes
|
|
102
|
+
|
|
103
|
+
def collect_specified_neighbors(
|
|
104
|
+
self, spec: SelectionCriteria, selected: Set[UniqueId]
|
|
105
|
+
) -> Set[UniqueId]:
|
|
106
|
+
"""Given the set of models selected by the explicit part of the
|
|
107
|
+
selector (like "tag:foo"), apply the modifiers on the spec ("+"/"@").
|
|
108
|
+
Return the set of additional nodes that should be collected (which may
|
|
109
|
+
overlap with the selected set).
|
|
110
|
+
"""
|
|
111
|
+
additional: Set[UniqueId] = set()
|
|
112
|
+
if spec.childrens_parents:
|
|
113
|
+
additional.update(self.graph.select_childrens_parents(selected))
|
|
114
|
+
|
|
115
|
+
if spec.parents:
|
|
116
|
+
depth = spec.parents_depth
|
|
117
|
+
additional.update(self.graph.select_parents(selected, depth))
|
|
118
|
+
|
|
119
|
+
if spec.children:
|
|
120
|
+
depth = spec.children_depth
|
|
121
|
+
additional.update(self.graph.select_children(selected, depth))
|
|
122
|
+
return additional
|
|
123
|
+
|
|
124
|
+
def select_nodes_recursively(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
|
125
|
+
"""If the spec is a composite spec (a union, difference, or intersection),
|
|
126
|
+
recurse into its selections and combine them. If the spec is a concrete
|
|
127
|
+
selection criteria, resolve that using the given graph.
|
|
128
|
+
"""
|
|
129
|
+
if isinstance(spec, SelectionCriteria):
|
|
130
|
+
direct_nodes, indirect_nodes = self.get_nodes_from_criteria(spec)
|
|
131
|
+
else:
|
|
132
|
+
bundles = [self.select_nodes_recursively(component) for component in spec]
|
|
133
|
+
|
|
134
|
+
direct_sets = []
|
|
135
|
+
indirect_sets = []
|
|
136
|
+
|
|
137
|
+
for direct, indirect in bundles:
|
|
138
|
+
direct_sets.append(direct)
|
|
139
|
+
indirect_sets.append(direct | indirect)
|
|
140
|
+
|
|
141
|
+
initial_direct = spec.combined(direct_sets)
|
|
142
|
+
indirect_nodes = spec.combined(indirect_sets)
|
|
143
|
+
|
|
144
|
+
direct_nodes = self.incorporate_indirect_nodes(
|
|
145
|
+
initial_direct, indirect_nodes, spec.indirect_selection
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if spec.expect_exists and len(direct_nodes) == 0:
|
|
149
|
+
warn_or_error(NoNodesForSelectionCriteria(spec_raw=str(spec.raw)))
|
|
150
|
+
|
|
151
|
+
return direct_nodes, indirect_nodes
|
|
152
|
+
|
|
153
|
+
def select_nodes(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
|
154
|
+
"""Select the nodes in the graph according to the spec.
|
|
155
|
+
|
|
156
|
+
This is the main point of entry for turning a spec into a set of nodes:
|
|
157
|
+
- Recurse through spec, select by criteria, combine by set operation
|
|
158
|
+
- Return final (unfiltered) selection set
|
|
159
|
+
"""
|
|
160
|
+
direct_nodes, indirect_nodes = self.select_nodes_recursively(spec)
|
|
161
|
+
indirect_only = indirect_nodes.difference(direct_nodes)
|
|
162
|
+
return direct_nodes, indirect_only
|
|
163
|
+
|
|
164
|
+
def _is_graph_member(self, unique_id: UniqueId) -> bool:
|
|
165
|
+
if unique_id in self.manifest.sources:
|
|
166
|
+
source = self.manifest.sources[unique_id]
|
|
167
|
+
return source.config.enabled
|
|
168
|
+
elif unique_id in self.manifest.exposures:
|
|
169
|
+
return True
|
|
170
|
+
elif unique_id in self.manifest.functions:
|
|
171
|
+
function = self.manifest.functions[unique_id]
|
|
172
|
+
return function.config.enabled
|
|
173
|
+
elif unique_id in self.manifest.metrics:
|
|
174
|
+
metric = self.manifest.metrics[unique_id]
|
|
175
|
+
return metric.config.enabled
|
|
176
|
+
elif unique_id in self.manifest.semantic_models:
|
|
177
|
+
semantic_model = self.manifest.semantic_models[unique_id]
|
|
178
|
+
return semantic_model.config.enabled
|
|
179
|
+
elif unique_id in self.manifest.unit_tests:
|
|
180
|
+
unit_test = self.manifest.unit_tests[unique_id]
|
|
181
|
+
return unit_test.config.enabled
|
|
182
|
+
elif unique_id in self.manifest.saved_queries:
|
|
183
|
+
saved_query = self.manifest.saved_queries[unique_id]
|
|
184
|
+
return saved_query.config.enabled
|
|
185
|
+
elif unique_id in self.manifest.exposures:
|
|
186
|
+
exposure = self.manifest.exposures[unique_id]
|
|
187
|
+
return exposure.config.enabled
|
|
188
|
+
else:
|
|
189
|
+
node = self.manifest.nodes[unique_id]
|
|
190
|
+
return node.config.enabled
|
|
191
|
+
|
|
192
|
+
def _is_empty_node(self, unique_id: UniqueId) -> bool:
|
|
193
|
+
if unique_id in self.manifest.nodes:
|
|
194
|
+
node = self.manifest.nodes[unique_id]
|
|
195
|
+
return node.empty
|
|
196
|
+
else:
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
def node_is_match(self, node: GraphMemberNode) -> bool:
|
|
200
|
+
"""Determine if a node is a match for the selector. Non-match nodes
|
|
201
|
+
will be excluded from results during filtering.
|
|
202
|
+
"""
|
|
203
|
+
return True
|
|
204
|
+
|
|
205
|
+
def _is_match(self, unique_id: UniqueId) -> bool:
|
|
206
|
+
node: GraphMemberNode
|
|
207
|
+
if unique_id in self.manifest.nodes:
|
|
208
|
+
node = self.manifest.nodes[unique_id]
|
|
209
|
+
elif unique_id in self.manifest.sources:
|
|
210
|
+
node = self.manifest.sources[unique_id]
|
|
211
|
+
elif unique_id in self.manifest.exposures:
|
|
212
|
+
node = self.manifest.exposures[unique_id]
|
|
213
|
+
elif unique_id in self.manifest.functions:
|
|
214
|
+
node = self.manifest.functions[unique_id]
|
|
215
|
+
elif unique_id in self.manifest.metrics:
|
|
216
|
+
node = self.manifest.metrics[unique_id]
|
|
217
|
+
elif unique_id in self.manifest.semantic_models:
|
|
218
|
+
node = self.manifest.semantic_models[unique_id]
|
|
219
|
+
elif unique_id in self.manifest.unit_tests:
|
|
220
|
+
node = self.manifest.unit_tests[unique_id]
|
|
221
|
+
elif unique_id in self.manifest.saved_queries:
|
|
222
|
+
node = self.manifest.saved_queries[unique_id]
|
|
223
|
+
else:
|
|
224
|
+
raise DbtInternalError(f"Node {unique_id} not found in the manifest!")
|
|
225
|
+
return self.node_is_match(node)
|
|
226
|
+
|
|
227
|
+
def filter_selection(self, selected: Set[UniqueId]) -> Set[UniqueId]:
|
|
228
|
+
"""Return the subset of selected nodes that is a match for this
|
|
229
|
+
selector.
|
|
230
|
+
"""
|
|
231
|
+
return {
|
|
232
|
+
unique_id
|
|
233
|
+
for unique_id in selected
|
|
234
|
+
if self._is_match(unique_id)
|
|
235
|
+
and (self.include_empty_nodes or not self._is_empty_node(unique_id))
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
def expand_selection(
|
|
239
|
+
self,
|
|
240
|
+
selected: Set[UniqueId],
|
|
241
|
+
indirect_selection: IndirectSelection = IndirectSelection.Eager,
|
|
242
|
+
) -> Tuple[Set[UniqueId], Set[UniqueId]]:
|
|
243
|
+
# Test selection by default expands to include an implicitly/indirectly selected tests.
|
|
244
|
+
# `dbt test -m model_a` also includes tests that directly depend on `model_a`.
|
|
245
|
+
# Expansion has four modes, EAGER, CAUTIOUS and BUILDABLE, EMPTY.
|
|
246
|
+
#
|
|
247
|
+
# EAGER mode: If ANY parent is selected, select the test.
|
|
248
|
+
#
|
|
249
|
+
# CAUTIOUS mode:
|
|
250
|
+
# - If ALL parents are selected, select the test.
|
|
251
|
+
# - If ANY parent is missing, return it separately. We'll keep it around
|
|
252
|
+
# for later and see if its other parents show up.
|
|
253
|
+
#
|
|
254
|
+
# BUILDABLE mode:
|
|
255
|
+
# - If ALL parents are selected, or the parents of the test are themselves parents of the selected, select the test.
|
|
256
|
+
# - If ANY parent is missing, return it separately. We'll keep it around
|
|
257
|
+
# for later and see if its other parents show up.
|
|
258
|
+
#
|
|
259
|
+
# EMPTY mode: Only select the given node and ignore attached nodes (i.e. ignore tests attached to a model)
|
|
260
|
+
#
|
|
261
|
+
# Users can opt out of inclusive EAGER mode by passing --indirect-selection cautious
|
|
262
|
+
# CLI argument or by specifying `indirect_selection: true` in a yaml selector
|
|
263
|
+
|
|
264
|
+
direct_nodes = set(selected)
|
|
265
|
+
indirect_nodes = set()
|
|
266
|
+
selected_and_parents = set()
|
|
267
|
+
if indirect_selection == IndirectSelection.Buildable:
|
|
268
|
+
selected_and_parents = selected.union(self.graph.select_parents(selected)).union(
|
|
269
|
+
self.manifest.sources
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
for unique_id in self.graph.select_successors(selected):
|
|
273
|
+
if unique_id in self.manifest.nodes or unique_id in self.manifest.unit_tests:
|
|
274
|
+
if unique_id in self.manifest.nodes:
|
|
275
|
+
node = self.manifest.nodes[unique_id]
|
|
276
|
+
elif unique_id in self.manifest.unit_tests:
|
|
277
|
+
node = self.manifest.unit_tests[unique_id] # type: ignore
|
|
278
|
+
# Test nodes that are not selected themselves, but whose parents are selected.
|
|
279
|
+
# (Does not include unit tests because they can only have one parent.)
|
|
280
|
+
if can_select_indirectly(node):
|
|
281
|
+
# should we add it in directly?
|
|
282
|
+
if indirect_selection == IndirectSelection.Eager or set(
|
|
283
|
+
node.depends_on_nodes
|
|
284
|
+
) <= set(selected):
|
|
285
|
+
direct_nodes.add(unique_id)
|
|
286
|
+
elif indirect_selection == IndirectSelection.Buildable and set(
|
|
287
|
+
node.depends_on_nodes
|
|
288
|
+
) <= set(selected_and_parents):
|
|
289
|
+
direct_nodes.add(unique_id)
|
|
290
|
+
elif indirect_selection == IndirectSelection.Empty:
|
|
291
|
+
pass
|
|
292
|
+
else:
|
|
293
|
+
indirect_nodes.add(unique_id)
|
|
294
|
+
|
|
295
|
+
return direct_nodes, indirect_nodes
|
|
296
|
+
|
|
297
|
+
def incorporate_indirect_nodes(
|
|
298
|
+
self,
|
|
299
|
+
direct_nodes: Set[UniqueId],
|
|
300
|
+
indirect_nodes: Set[UniqueId] = set(),
|
|
301
|
+
indirect_selection: IndirectSelection = IndirectSelection.Eager,
|
|
302
|
+
) -> Set[UniqueId]:
|
|
303
|
+
# Check tests previously selected indirectly to see if ALL their
|
|
304
|
+
# parents are now present.
|
|
305
|
+
|
|
306
|
+
# performance: if identical, skip the processing below
|
|
307
|
+
if set(direct_nodes) == set(indirect_nodes):
|
|
308
|
+
return direct_nodes
|
|
309
|
+
|
|
310
|
+
selected = set(direct_nodes)
|
|
311
|
+
|
|
312
|
+
if indirect_selection == IndirectSelection.Cautious:
|
|
313
|
+
for unique_id in indirect_nodes:
|
|
314
|
+
if unique_id in self.manifest.nodes:
|
|
315
|
+
node = self.manifest.nodes[unique_id]
|
|
316
|
+
if set(node.depends_on_nodes) <= set(selected):
|
|
317
|
+
selected.add(unique_id)
|
|
318
|
+
elif indirect_selection == IndirectSelection.Buildable:
|
|
319
|
+
selected_and_parents = selected.union(self.graph.select_parents(selected))
|
|
320
|
+
for unique_id in indirect_nodes:
|
|
321
|
+
if unique_id in self.manifest.nodes:
|
|
322
|
+
node = self.manifest.nodes[unique_id]
|
|
323
|
+
if set(node.depends_on_nodes) <= set(selected_and_parents):
|
|
324
|
+
selected.add(unique_id)
|
|
325
|
+
|
|
326
|
+
return selected
|
|
327
|
+
|
|
328
|
+
def get_selected(self, spec: SelectionSpec) -> Set[UniqueId]:
|
|
329
|
+
"""get_selected runs through the node selection process:
|
|
330
|
+
|
|
331
|
+
- node selection. Based on the include/exclude sets, the set
|
|
332
|
+
of matched unique IDs is returned
|
|
333
|
+
- includes direct + indirect selection (for tests)
|
|
334
|
+
- filtering:
|
|
335
|
+
- selectors can filter the nodes after all of them have been
|
|
336
|
+
selected
|
|
337
|
+
"""
|
|
338
|
+
selected_nodes, indirect_only = self.select_nodes(spec)
|
|
339
|
+
filtered_nodes = self.filter_selection(selected_nodes)
|
|
340
|
+
|
|
341
|
+
return filtered_nodes
|
|
342
|
+
|
|
343
|
+
def get_graph_queue(self, spec: SelectionSpec, preserve_edges: bool = True) -> GraphQueue:
|
|
344
|
+
"""Returns a queue over nodes in the graph that tracks progress of
|
|
345
|
+
dependencies.
|
|
346
|
+
"""
|
|
347
|
+
# Filtering happens in get_selected
|
|
348
|
+
selected_nodes = self.get_selected(spec)
|
|
349
|
+
# Save to global variable
|
|
350
|
+
selected_resources.set_selected_resources(selected_nodes)
|
|
351
|
+
# Construct a new graph using the selected_nodes
|
|
352
|
+
new_graph = self.full_graph.get_subset_graph(selected_nodes)
|
|
353
|
+
# should we give a way here for consumers to mutate the graph?
|
|
354
|
+
return GraphQueue(new_graph.graph, self.manifest, selected_nodes, preserve_edges)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class ResourceTypeSelector(NodeSelector):
|
|
358
|
+
def __init__(
|
|
359
|
+
self,
|
|
360
|
+
graph: Graph,
|
|
361
|
+
manifest: Manifest,
|
|
362
|
+
previous_state: Optional[PreviousState],
|
|
363
|
+
resource_types: List[NodeType],
|
|
364
|
+
include_empty_nodes: bool = False,
|
|
365
|
+
) -> None:
|
|
366
|
+
super().__init__(
|
|
367
|
+
graph=graph,
|
|
368
|
+
manifest=manifest,
|
|
369
|
+
previous_state=previous_state,
|
|
370
|
+
include_empty_nodes=include_empty_nodes,
|
|
371
|
+
)
|
|
372
|
+
self.resource_types: Set[NodeType] = set(resource_types)
|
|
373
|
+
|
|
374
|
+
def node_is_match(self, node):
|
|
375
|
+
return node.resource_type in self.resource_types
|