dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/parser/models.py
ADDED
|
@@ -0,0 +1,574 @@
|
|
|
1
|
+
# New for Python models :p
|
|
2
|
+
import ast
|
|
3
|
+
import random
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from functools import reduce
|
|
6
|
+
from itertools import chain
|
|
7
|
+
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
|
|
8
|
+
|
|
9
|
+
import dvt.tracking as tracking
|
|
10
|
+
from dvt import utils
|
|
11
|
+
from dvt.artifacts.resources import RefArgs
|
|
12
|
+
from dvt.clients.jinja import get_rendered
|
|
13
|
+
from dvt.context.context_config import ContextConfig
|
|
14
|
+
from dvt.contracts.graph.nodes import ModelNode
|
|
15
|
+
from dvt.exceptions import (
|
|
16
|
+
ModelConfigError,
|
|
17
|
+
ParsingError,
|
|
18
|
+
PythonLiteralEvalError,
|
|
19
|
+
PythonParsingError,
|
|
20
|
+
)
|
|
21
|
+
from dvt.flags import get_flags
|
|
22
|
+
from dvt.node_types import ModelLanguage, NodeType
|
|
23
|
+
from dvt.parser.base import SimpleSQLParser
|
|
24
|
+
from dvt.parser.search import FileBlock
|
|
25
|
+
|
|
26
|
+
from dbt_common.contracts.config.base import merge_config_dicts
|
|
27
|
+
from dbt_common.dataclass_schema import ValidationError
|
|
28
|
+
from dbt_common.exceptions.macros import UndefinedMacroError
|
|
29
|
+
from dbt_extractor import ExtractionError, py_extract_from_source # type: ignore
|
|
30
|
+
|
|
31
|
+
dbt_function_key_words = set(["ref", "source", "config", "get"])
|
|
32
|
+
dbt_function_full_names = set(["dbt.ref", "dbt.source", "dbt.config", "dbt.config.get"])
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class PythonValidationVisitor(ast.NodeVisitor):
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
super().__init__()
|
|
38
|
+
self.dbt_errors: List[str] = []
|
|
39
|
+
self.num_model_def = 0
|
|
40
|
+
|
|
41
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
42
|
+
if node.name == "model":
|
|
43
|
+
self.num_model_def += 1
|
|
44
|
+
if node.args.args and not node.args.args[0].arg == "dbt":
|
|
45
|
+
self.dbt_errors.append("'dbt' not provided for model as the first argument")
|
|
46
|
+
if len(node.args.args) != 2:
|
|
47
|
+
self.dbt_errors.append(
|
|
48
|
+
"model function should have two args, `dbt` and a session to current warehouse"
|
|
49
|
+
)
|
|
50
|
+
# check we have a return and only one
|
|
51
|
+
if not isinstance(node.body[-1], ast.Return) or isinstance(
|
|
52
|
+
node.body[-1].value, ast.Tuple
|
|
53
|
+
):
|
|
54
|
+
self.dbt_errors.append(
|
|
55
|
+
"In current version, model function should return only one dataframe object"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def check_error(self, node):
|
|
59
|
+
if self.num_model_def != 1:
|
|
60
|
+
raise ParsingError(
|
|
61
|
+
f"dbt allows exactly one model defined per python file, found {self.num_model_def}",
|
|
62
|
+
node=node,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if len(self.dbt_errors) != 0:
|
|
66
|
+
raise ParsingError("\n".join(self.dbt_errors), node=node)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class PythonParseVisitor(ast.NodeVisitor):
|
|
70
|
+
def __init__(self, dbt_node):
|
|
71
|
+
super().__init__()
|
|
72
|
+
|
|
73
|
+
self.dbt_node = dbt_node
|
|
74
|
+
self.dbt_function_calls = []
|
|
75
|
+
self.packages = []
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def _flatten_attr(cls, node):
|
|
79
|
+
if isinstance(node, ast.Attribute):
|
|
80
|
+
return str(cls._flatten_attr(node.value)) + "." + node.attr
|
|
81
|
+
elif isinstance(node, ast.Name):
|
|
82
|
+
return str(node.id)
|
|
83
|
+
else:
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
def _safe_eval(self, node):
|
|
87
|
+
try:
|
|
88
|
+
return ast.literal_eval(node)
|
|
89
|
+
except (SyntaxError, ValueError, TypeError, MemoryError, RecursionError) as exc:
|
|
90
|
+
raise PythonLiteralEvalError(exc, node=self.dbt_node) from exc
|
|
91
|
+
|
|
92
|
+
def _get_call_literals(self, node):
|
|
93
|
+
# List of literals
|
|
94
|
+
arg_literals = []
|
|
95
|
+
kwarg_literals = {}
|
|
96
|
+
|
|
97
|
+
# TODO : Make sure this throws (and that we catch it)
|
|
98
|
+
# for non-literal inputs
|
|
99
|
+
for arg in node.args:
|
|
100
|
+
rendered = self._safe_eval(arg)
|
|
101
|
+
arg_literals.append(rendered)
|
|
102
|
+
|
|
103
|
+
for keyword in node.keywords:
|
|
104
|
+
key = keyword.arg
|
|
105
|
+
rendered = self._safe_eval(keyword.value)
|
|
106
|
+
kwarg_literals[key] = rendered
|
|
107
|
+
|
|
108
|
+
return arg_literals, kwarg_literals
|
|
109
|
+
|
|
110
|
+
def visit_Call(self, node: ast.Call) -> None:
|
|
111
|
+
# check weather the current call could be a dbt function call
|
|
112
|
+
if isinstance(node.func, ast.Attribute) and node.func.attr in dbt_function_key_words:
|
|
113
|
+
func_name = self._flatten_attr(node.func)
|
|
114
|
+
# check weather the current call really is a dbt function call
|
|
115
|
+
if func_name in dbt_function_full_names:
|
|
116
|
+
# drop the dot-dbt prefix
|
|
117
|
+
func_name = func_name.split(".")[-1]
|
|
118
|
+
args, kwargs = self._get_call_literals(node)
|
|
119
|
+
self.dbt_function_calls.append((func_name, args, kwargs))
|
|
120
|
+
|
|
121
|
+
# no matter what happened above, we should keep visiting the rest of the tree
|
|
122
|
+
# visit args and kwargs to see if there's call in it
|
|
123
|
+
for obj in node.args + [kwarg.value for kwarg in node.keywords]:
|
|
124
|
+
if isinstance(obj, ast.Call):
|
|
125
|
+
self.visit_Call(obj)
|
|
126
|
+
# support dbt.ref in list args, kwargs
|
|
127
|
+
elif isinstance(obj, ast.List) or isinstance(obj, ast.Tuple):
|
|
128
|
+
for el in obj.elts:
|
|
129
|
+
if isinstance(el, ast.Call):
|
|
130
|
+
self.visit_Call(el)
|
|
131
|
+
# support dbt.ref in dict args, kwargs
|
|
132
|
+
elif isinstance(obj, ast.Dict):
|
|
133
|
+
for value in obj.values:
|
|
134
|
+
if isinstance(value, ast.Call):
|
|
135
|
+
self.visit_Call(value)
|
|
136
|
+
# support dbt function calls in f-strings
|
|
137
|
+
elif isinstance(obj, ast.JoinedStr):
|
|
138
|
+
for value in obj.values:
|
|
139
|
+
if isinstance(value, ast.FormattedValue) and isinstance(value.value, ast.Call):
|
|
140
|
+
self.visit_Call(value.value)
|
|
141
|
+
|
|
142
|
+
# visit node.func.value if we are at an call attr
|
|
143
|
+
if isinstance(node.func, ast.Attribute):
|
|
144
|
+
self.attribute_helper(node.func)
|
|
145
|
+
|
|
146
|
+
def attribute_helper(self, node: ast.Attribute) -> None:
|
|
147
|
+
while isinstance(node, ast.Attribute):
|
|
148
|
+
node = node.value # type: ignore
|
|
149
|
+
if isinstance(node, ast.Call):
|
|
150
|
+
self.visit_Call(node)
|
|
151
|
+
|
|
152
|
+
def visit_Import(self, node: ast.Import) -> None:
|
|
153
|
+
for n in node.names:
|
|
154
|
+
self.packages.append(n.name.split(".")[0])
|
|
155
|
+
|
|
156
|
+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
|
157
|
+
if node.module:
|
|
158
|
+
self.packages.append(node.module.split(".")[0])
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def verify_python_model_code(node):
|
|
162
|
+
# TODO: add a test for this
|
|
163
|
+
try:
|
|
164
|
+
rendered_python = get_rendered(
|
|
165
|
+
node.raw_code,
|
|
166
|
+
{},
|
|
167
|
+
node,
|
|
168
|
+
)
|
|
169
|
+
if rendered_python != node.raw_code:
|
|
170
|
+
raise ParsingError("")
|
|
171
|
+
except (UndefinedMacroError, ParsingError):
|
|
172
|
+
raise ParsingError("No jinja in python model code is allowed", node=node)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class ModelParser(SimpleSQLParser[ModelNode]):
|
|
176
|
+
def parse_from_dict(self, dct, validate=True) -> ModelNode:
|
|
177
|
+
if validate:
|
|
178
|
+
ModelNode.validate(dct)
|
|
179
|
+
return ModelNode.from_dict(dct)
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def resource_type(self) -> NodeType:
|
|
183
|
+
return NodeType.Model
|
|
184
|
+
|
|
185
|
+
@classmethod
|
|
186
|
+
def get_compiled_path(cls, block: FileBlock):
|
|
187
|
+
return block.path.relative_path
|
|
188
|
+
|
|
189
|
+
def parse_python_model(self, node, config, context):
|
|
190
|
+
config_keys_used = []
|
|
191
|
+
config_keys_defaults = []
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
tree = ast.parse(node.raw_code, filename=node.original_file_path)
|
|
195
|
+
except SyntaxError as exc:
|
|
196
|
+
raise PythonParsingError(exc, node=node) from exc
|
|
197
|
+
|
|
198
|
+
# Only parse if AST tree has instructions in body
|
|
199
|
+
if tree.body:
|
|
200
|
+
# We are doing a validator and a parser because visit_FunctionDef in parser
|
|
201
|
+
# would actually make the parser not doing the visit_Calls any more
|
|
202
|
+
dbt_validator = PythonValidationVisitor()
|
|
203
|
+
dbt_validator.visit(tree)
|
|
204
|
+
dbt_validator.check_error(node)
|
|
205
|
+
|
|
206
|
+
dbt_parser = PythonParseVisitor(node)
|
|
207
|
+
dbt_parser.visit(tree)
|
|
208
|
+
|
|
209
|
+
for func, args, kwargs in dbt_parser.dbt_function_calls:
|
|
210
|
+
if func == "get":
|
|
211
|
+
num_args = len(args)
|
|
212
|
+
if num_args == 0:
|
|
213
|
+
raise ParsingError(
|
|
214
|
+
"dbt.config.get() requires at least one argument",
|
|
215
|
+
node=node,
|
|
216
|
+
)
|
|
217
|
+
if num_args > 2:
|
|
218
|
+
raise ParsingError(
|
|
219
|
+
f"dbt.config.get() takes at most 2 arguments ({num_args} given)",
|
|
220
|
+
node=node,
|
|
221
|
+
)
|
|
222
|
+
key = args[0]
|
|
223
|
+
default_value = args[1] if num_args == 2 else None
|
|
224
|
+
config_keys_used.append(key)
|
|
225
|
+
config_keys_defaults.append(default_value)
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
context[func](*args, **kwargs)
|
|
229
|
+
|
|
230
|
+
if config_keys_used:
|
|
231
|
+
# this is being used in macro build_config_dict
|
|
232
|
+
context["config"](
|
|
233
|
+
config_keys_used=config_keys_used,
|
|
234
|
+
config_keys_defaults=config_keys_defaults,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
def render_update(
|
|
238
|
+
self, node: ModelNode, config: ContextConfig, validate_config_call_dict: bool = False
|
|
239
|
+
) -> None:
|
|
240
|
+
self.manifest._parsing_info.static_analysis_path_count += 1
|
|
241
|
+
flags = get_flags()
|
|
242
|
+
if node.language == ModelLanguage.python:
|
|
243
|
+
try:
|
|
244
|
+
verify_python_model_code(node)
|
|
245
|
+
context = self._context_for(node, config)
|
|
246
|
+
self.parse_python_model(node, config, context)
|
|
247
|
+
self.update_parsed_node_config(
|
|
248
|
+
node, config, context=context, validate_config_call_dict=True
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
except ValidationError as exc:
|
|
252
|
+
# we got a ValidationError - probably bad types in config()
|
|
253
|
+
raise ModelConfigError(exc, node=node) from exc
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
elif not flags.STATIC_PARSER:
|
|
257
|
+
# jinja rendering
|
|
258
|
+
super().render_update(node, config)
|
|
259
|
+
return
|
|
260
|
+
|
|
261
|
+
# only sample for experimental parser correctness on normal runs,
|
|
262
|
+
# not when the experimental parser flag is on.
|
|
263
|
+
exp_sample: bool = False
|
|
264
|
+
# sampling the stable static parser against jinja is significantly
|
|
265
|
+
# more expensive and therefore done far less frequently.
|
|
266
|
+
stable_sample: bool = False
|
|
267
|
+
# there are two samples above, and it is perfectly fine if both happen
|
|
268
|
+
# at the same time. If that happens, the experimental parser, stable
|
|
269
|
+
# parser, and jinja rendering will run on the same model file and
|
|
270
|
+
# send back codes for experimental v stable, and stable v jinja.
|
|
271
|
+
if not flags.USE_EXPERIMENTAL_PARSER:
|
|
272
|
+
# `True` roughly 1/5000 times this function is called
|
|
273
|
+
# sample = random.randint(1, 5001) == 5000
|
|
274
|
+
stable_sample = random.randint(1, 5001) == 5000
|
|
275
|
+
# sampling the experimental parser is explicitly disabled here, but use the following
|
|
276
|
+
# commented code to sample a fraction of the time when new
|
|
277
|
+
# experimental features are added.
|
|
278
|
+
# `True` roughly 1/100 times this function is called
|
|
279
|
+
# exp_sample = random.randint(1, 101) == 100
|
|
280
|
+
|
|
281
|
+
# top-level declaration of variables
|
|
282
|
+
statically_parsed: Optional[Union[str, Dict[str, List[Any]]]] = None
|
|
283
|
+
experimental_sample: Optional[Union[str, Dict[str, List[Any]]]] = None
|
|
284
|
+
exp_sample_node: Optional[ModelNode] = None
|
|
285
|
+
exp_sample_config: Optional[ContextConfig] = None
|
|
286
|
+
jinja_sample_node: Optional[ModelNode] = None
|
|
287
|
+
jinja_sample_config: Optional[ContextConfig] = None
|
|
288
|
+
result: List[str] = []
|
|
289
|
+
|
|
290
|
+
# sample the experimental parser only during a normal run
|
|
291
|
+
if exp_sample and not flags.USE_EXPERIMENTAL_PARSER:
|
|
292
|
+
experimental_sample = self.run_experimental_parser(node)
|
|
293
|
+
# if the experimental parser succeeded, make a full copy of model parser
|
|
294
|
+
# and populate _everything_ into it so it can be compared apples-to-apples
|
|
295
|
+
# with a fully jinja-rendered project. This is necessary because the experimental
|
|
296
|
+
# parser will likely add features that the existing static parser will fail on
|
|
297
|
+
# so comparing those directly would give us bad results. The comparison will be
|
|
298
|
+
# conducted after this model has been fully rendered either by the static parser
|
|
299
|
+
# or by full jinja rendering
|
|
300
|
+
if isinstance(experimental_sample, dict):
|
|
301
|
+
model_parser_copy = self.partial_deepcopy()
|
|
302
|
+
exp_sample_node = deepcopy(node)
|
|
303
|
+
exp_sample_config = deepcopy(config)
|
|
304
|
+
model_parser_copy.populate(exp_sample_node, exp_sample_config, experimental_sample)
|
|
305
|
+
# use the experimental parser exclusively if the flag is on
|
|
306
|
+
if flags.USE_EXPERIMENTAL_PARSER:
|
|
307
|
+
statically_parsed = self.run_experimental_parser(node)
|
|
308
|
+
# run the stable static parser unless it is explicitly turned off
|
|
309
|
+
else:
|
|
310
|
+
statically_parsed = self.run_static_parser(node)
|
|
311
|
+
|
|
312
|
+
# if the static parser succeeded, extract some data in easy-to-compare formats
|
|
313
|
+
if isinstance(statically_parsed, dict):
|
|
314
|
+
# only sample jinja for the purpose of comparing with the stable static parser
|
|
315
|
+
# if we know we don't need to fall back to jinja (i.e. - nothing to compare
|
|
316
|
+
# with jinja v jinja).
|
|
317
|
+
# This means we skip sampling for 40% of the 1/5000 samples. We could run the
|
|
318
|
+
# sampling rng here, but the effect would be the same since we would only roll
|
|
319
|
+
# it 40% of the time. So I've opted to keep all the rng code colocated above.
|
|
320
|
+
if stable_sample and not flags.USE_EXPERIMENTAL_PARSER:
|
|
321
|
+
# if this will _never_ mutate anything `self` we could avoid these deep copies,
|
|
322
|
+
# but we can't really guarantee that going forward.
|
|
323
|
+
model_parser_copy = self.partial_deepcopy()
|
|
324
|
+
jinja_sample_node = deepcopy(node)
|
|
325
|
+
jinja_sample_config = deepcopy(config)
|
|
326
|
+
# rendering mutates the node and the config
|
|
327
|
+
super(ModelParser, model_parser_copy).render_update(
|
|
328
|
+
jinja_sample_node, jinja_sample_config
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
# update the unrendered config with values from the static parser.
|
|
332
|
+
# values from yaml files are in there already
|
|
333
|
+
self.populate(node, config, statically_parsed)
|
|
334
|
+
|
|
335
|
+
# if we took a jinja sample, compare now that the base node has been populated
|
|
336
|
+
if jinja_sample_node is not None and jinja_sample_config is not None:
|
|
337
|
+
result = _get_stable_sample_result(
|
|
338
|
+
jinja_sample_node, jinja_sample_config, node, config
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
# if we took an experimental sample, compare now that the base node has been populated
|
|
342
|
+
if exp_sample_node is not None and exp_sample_config is not None:
|
|
343
|
+
result = _get_exp_sample_result(
|
|
344
|
+
exp_sample_node,
|
|
345
|
+
exp_sample_config,
|
|
346
|
+
node,
|
|
347
|
+
config,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
self.manifest._parsing_info.static_analysis_parsed_path_count += 1
|
|
351
|
+
# if the static parser didn't succeed, fall back to jinja
|
|
352
|
+
else:
|
|
353
|
+
# jinja rendering
|
|
354
|
+
super().render_update(node, config, validate_config_call_dict=True)
|
|
355
|
+
|
|
356
|
+
# if sampling, add the correct messages for tracking
|
|
357
|
+
if exp_sample and isinstance(experimental_sample, str):
|
|
358
|
+
if experimental_sample == "cannot_parse":
|
|
359
|
+
result += ["01_experimental_parser_cannot_parse"]
|
|
360
|
+
elif experimental_sample == "has_banned_macro":
|
|
361
|
+
result += ["08_has_banned_macro"]
|
|
362
|
+
elif stable_sample and isinstance(statically_parsed, str):
|
|
363
|
+
if statically_parsed == "cannot_parse":
|
|
364
|
+
result += ["81_stable_parser_cannot_parse"]
|
|
365
|
+
elif statically_parsed == "has_banned_macro":
|
|
366
|
+
result += ["88_has_banned_macro"]
|
|
367
|
+
|
|
368
|
+
# only send the tracking event if there is at least one result code
|
|
369
|
+
if result:
|
|
370
|
+
# fire a tracking event. this fires one event for every sample
|
|
371
|
+
# so that we have data on a per file basis. Not only can we expect
|
|
372
|
+
# no false positives or misses, we can expect the number model
|
|
373
|
+
# files parseable by the experimental parser to match our internal
|
|
374
|
+
# testing.
|
|
375
|
+
if tracking.active_user is not None: # None in some tests
|
|
376
|
+
tracking.track_experimental_parser_sample(
|
|
377
|
+
{
|
|
378
|
+
"project_id": self.root_project.hashed_name(),
|
|
379
|
+
"file_id": utils.get_hash(node),
|
|
380
|
+
"status": result,
|
|
381
|
+
}
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
def run_static_parser(self, node: ModelNode) -> Optional[Union[str, Dict[str, List[Any]]]]:
|
|
385
|
+
# if any banned macros have been overridden by the user, we cannot use the static parser.
|
|
386
|
+
if self._has_banned_macro(node):
|
|
387
|
+
return "has_banned_macro"
|
|
388
|
+
|
|
389
|
+
# run the stable static parser and return the results
|
|
390
|
+
try:
|
|
391
|
+
statically_parsed = py_extract_from_source(node.raw_code)
|
|
392
|
+
return _shift_sources(statically_parsed)
|
|
393
|
+
# if we want information on what features are barring the static
|
|
394
|
+
# parser from reading model files, this is where we would add that
|
|
395
|
+
# since that information is stored in the `ExtractionError`.
|
|
396
|
+
except ExtractionError:
|
|
397
|
+
return "cannot_parse"
|
|
398
|
+
|
|
399
|
+
def run_experimental_parser(
|
|
400
|
+
self, node: ModelNode
|
|
401
|
+
) -> Optional[Union[str, Dict[str, List[Any]]]]:
|
|
402
|
+
# if any banned macros have been overridden by the user, we cannot use the static parser.
|
|
403
|
+
if self._has_banned_macro(node):
|
|
404
|
+
return "has_banned_macro"
|
|
405
|
+
|
|
406
|
+
# run the experimental parser and return the results
|
|
407
|
+
try:
|
|
408
|
+
# for now, this line calls the stable static parser since there are no
|
|
409
|
+
# experimental features. Change `py_extract_from_source` to the new
|
|
410
|
+
# experimental call when we add additional features.
|
|
411
|
+
experimentally_parsed = py_extract_from_source(node.raw_code)
|
|
412
|
+
return _shift_sources(experimentally_parsed)
|
|
413
|
+
# if we want information on what features are barring the experimental
|
|
414
|
+
# parser from reading model files, this is where we would add that
|
|
415
|
+
# since that information is stored in the `ExtractionError`.
|
|
416
|
+
except ExtractionError:
|
|
417
|
+
return "cannot_parse"
|
|
418
|
+
|
|
419
|
+
# checks for banned macros
|
|
420
|
+
def _has_banned_macro(self, node: ModelNode) -> bool:
|
|
421
|
+
# first check if there is a banned macro defined in scope for this model file
|
|
422
|
+
root_project_name = self.root_project.project_name
|
|
423
|
+
project_name = node.package_name
|
|
424
|
+
banned_macros = ["ref", "source", "config"]
|
|
425
|
+
|
|
426
|
+
all_banned_macro_keys: Iterator[str] = chain.from_iterable(
|
|
427
|
+
map(
|
|
428
|
+
lambda name: [f"macro.{project_name}.{name}", f"macro.{root_project_name}.{name}"],
|
|
429
|
+
banned_macros,
|
|
430
|
+
)
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
return reduce(
|
|
434
|
+
lambda z, key: z or (key in self.manifest.macros), all_banned_macro_keys, False
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
# this method updates the model node rendered and unrendered config as well
|
|
438
|
+
# as the node object. Used to populate these values when circumventing jinja
|
|
439
|
+
# rendering like the static parser.
|
|
440
|
+
def populate(self, node: ModelNode, config: ContextConfig, statically_parsed: Dict[str, Any]):
|
|
441
|
+
# manually fit configs in
|
|
442
|
+
config._config_call_dict = _get_config_call_dict(statically_parsed)
|
|
443
|
+
|
|
444
|
+
# if there are hooks present this, it WILL render jinja. Will need to change
|
|
445
|
+
# when the experimental parser supports hooks
|
|
446
|
+
self.update_parsed_node_config(node, config, validate_config_call_dict=True)
|
|
447
|
+
|
|
448
|
+
# update the unrendered config with values from the file.
|
|
449
|
+
# values from yaml files are in there already
|
|
450
|
+
node.unrendered_config.update(dict(statically_parsed["configs"]))
|
|
451
|
+
|
|
452
|
+
# set refs and sources on the node object
|
|
453
|
+
refs: List[RefArgs] = []
|
|
454
|
+
for ref in statically_parsed["refs"]:
|
|
455
|
+
name = ref.get("name")
|
|
456
|
+
package = ref.get("package")
|
|
457
|
+
version = ref.get("version")
|
|
458
|
+
refs.append(RefArgs(name, package, version))
|
|
459
|
+
|
|
460
|
+
node.refs += refs
|
|
461
|
+
node.sources += statically_parsed["sources"]
|
|
462
|
+
|
|
463
|
+
# configs don't need to be merged into the node because they
|
|
464
|
+
# are read from config._config_call_dict
|
|
465
|
+
|
|
466
|
+
# the manifest is often huge so this method avoids deepcopying it
|
|
467
|
+
def partial_deepcopy(self):
|
|
468
|
+
return ModelParser(deepcopy(self.project), self.manifest, deepcopy(self.root_project))
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
# pure function. safe to use elsewhere, but unlikely to be useful outside this file.
|
|
472
|
+
def _get_config_call_dict(static_parser_result: Dict[str, Any]) -> Dict[str, Any]:
|
|
473
|
+
config_call_dict: Dict[str, Any] = {}
|
|
474
|
+
|
|
475
|
+
for c in static_parser_result["configs"]:
|
|
476
|
+
merge_config_dicts(config_call_dict, {c[0]: c[1]})
|
|
477
|
+
|
|
478
|
+
return config_call_dict
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
# TODO if we format sources in the extractor to match this type, we won't need this function.
|
|
482
|
+
def _shift_sources(static_parser_result: Dict[str, List[Any]]) -> Dict[str, List[Any]]:
|
|
483
|
+
shifted_result = deepcopy(static_parser_result)
|
|
484
|
+
source_calls = []
|
|
485
|
+
|
|
486
|
+
for s in static_parser_result["sources"]:
|
|
487
|
+
source_calls.append([s[0], s[1]])
|
|
488
|
+
shifted_result["sources"] = source_calls
|
|
489
|
+
|
|
490
|
+
return shifted_result
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
# returns a list of string codes to be sent as a tracking event
|
|
494
|
+
def _get_exp_sample_result(
|
|
495
|
+
sample_node: ModelNode,
|
|
496
|
+
sample_config: ContextConfig,
|
|
497
|
+
node: ModelNode,
|
|
498
|
+
config: ContextConfig,
|
|
499
|
+
) -> List[str]:
|
|
500
|
+
result: List[Tuple[int, str]] = _get_sample_result(sample_node, sample_config, node, config)
|
|
501
|
+
|
|
502
|
+
def process(codemsg):
|
|
503
|
+
code, msg = codemsg
|
|
504
|
+
return f"0{code}_experimental_{msg}"
|
|
505
|
+
|
|
506
|
+
return list(map(process, result))
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
# returns a list of string codes to be sent as a tracking event
|
|
510
|
+
def _get_stable_sample_result(
|
|
511
|
+
sample_node: ModelNode,
|
|
512
|
+
sample_config: ContextConfig,
|
|
513
|
+
node: ModelNode,
|
|
514
|
+
config: ContextConfig,
|
|
515
|
+
) -> List[str]:
|
|
516
|
+
result: List[Tuple[int, str]] = _get_sample_result(sample_node, sample_config, node, config)
|
|
517
|
+
|
|
518
|
+
def process(codemsg):
|
|
519
|
+
code, msg = codemsg
|
|
520
|
+
return f"8{code}_stable_{msg}"
|
|
521
|
+
|
|
522
|
+
return list(map(process, result))
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
# returns a list of string codes that need a single digit prefix to be prepended
|
|
526
|
+
# before being sent as a tracking event
|
|
527
|
+
def _get_sample_result(
|
|
528
|
+
sample_node: ModelNode,
|
|
529
|
+
sample_config: ContextConfig,
|
|
530
|
+
node: ModelNode,
|
|
531
|
+
config: ContextConfig,
|
|
532
|
+
) -> List[Tuple[int, str]]:
|
|
533
|
+
result: List[Tuple[int, str]] = []
|
|
534
|
+
# look for false positive configs
|
|
535
|
+
for k in sample_config._config_call_dict.keys():
|
|
536
|
+
if k not in config._config_call_dict.keys():
|
|
537
|
+
result += [(2, "false_positive_config_value")]
|
|
538
|
+
break
|
|
539
|
+
|
|
540
|
+
# look for missed configs
|
|
541
|
+
for k in config._config_call_dict.keys():
|
|
542
|
+
if k not in sample_config._config_call_dict.keys():
|
|
543
|
+
result += [(3, "missed_config_value")]
|
|
544
|
+
break
|
|
545
|
+
|
|
546
|
+
# look for false positive sources
|
|
547
|
+
for s in sample_node.sources:
|
|
548
|
+
if s not in node.sources:
|
|
549
|
+
result += [(4, "false_positive_source_value")]
|
|
550
|
+
break
|
|
551
|
+
|
|
552
|
+
# look for missed sources
|
|
553
|
+
for s in node.sources:
|
|
554
|
+
if s not in sample_node.sources:
|
|
555
|
+
result += [(5, "missed_source_value")]
|
|
556
|
+
break
|
|
557
|
+
|
|
558
|
+
# look for false positive refs
|
|
559
|
+
for r in sample_node.refs:
|
|
560
|
+
if r not in node.refs:
|
|
561
|
+
result += [(6, "false_positive_ref_value")]
|
|
562
|
+
break
|
|
563
|
+
|
|
564
|
+
# look for missed refs
|
|
565
|
+
for r in node.refs:
|
|
566
|
+
if r not in sample_node.refs:
|
|
567
|
+
result += [(7, "missed_ref_value")]
|
|
568
|
+
break
|
|
569
|
+
|
|
570
|
+
# if there are no errors, return a success value
|
|
571
|
+
if not result:
|
|
572
|
+
result = [(0, "exact_match")]
|
|
573
|
+
|
|
574
|
+
return result
|