dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/parser/sources.py
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
from dataclasses import replace
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
|
5
|
+
|
|
6
|
+
from dvt.artifacts.resources import FreshnessThreshold, SourceConfig, Time
|
|
7
|
+
from dvt.config import RuntimeConfig
|
|
8
|
+
from dvt.context.context_config import (
|
|
9
|
+
BaseContextConfigGenerator,
|
|
10
|
+
ContextConfigGenerator,
|
|
11
|
+
UnrenderedConfigGenerator,
|
|
12
|
+
)
|
|
13
|
+
from dvt.contracts.graph.manifest import Manifest, SourceKey
|
|
14
|
+
from dvt.contracts.graph.nodes import (
|
|
15
|
+
GenericTestNode,
|
|
16
|
+
SourceDefinition,
|
|
17
|
+
UnpatchedSourceDefinition,
|
|
18
|
+
)
|
|
19
|
+
from dvt.contracts.graph.unparsed import (
|
|
20
|
+
SourcePatch,
|
|
21
|
+
SourceTablePatch,
|
|
22
|
+
UnparsedColumn,
|
|
23
|
+
UnparsedSourceDefinition,
|
|
24
|
+
UnparsedSourceTableDefinition,
|
|
25
|
+
)
|
|
26
|
+
from dvt.events.types import FreshnessConfigProblem, UnusedTables, ValidationWarning
|
|
27
|
+
from dvt.exceptions import ParsingError
|
|
28
|
+
from dvt.node_types import NodeType
|
|
29
|
+
from dvt.parser.common import ParserRef
|
|
30
|
+
from dvt.parser.schema_generic_tests import SchemaGenericTestParser
|
|
31
|
+
|
|
32
|
+
from dbt.adapters.capability import Capability
|
|
33
|
+
from dbt.adapters.factory import get_adapter
|
|
34
|
+
from dbt_common.events.functions import fire_event, warn_or_error
|
|
35
|
+
from dbt_common.exceptions import DbtInternalError
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# An UnparsedSourceDefinition is taken directly from the yaml
|
|
39
|
+
# file. It can affect multiple tables, all of which will eventually
|
|
40
|
+
# have their own source node. An UnparsedSourceDefinition will
|
|
41
|
+
# generate multiple UnpatchedSourceDefinition nodes (one per
|
|
42
|
+
# table) in the SourceParser.add_source_definitions. The
|
|
43
|
+
# SourcePatcher takes an UnparsedSourceDefinition and the
|
|
44
|
+
# SourcePatch and produces a SourceDefinition. Each
|
|
45
|
+
# SourcePatch can be applied to multiple UnpatchedSourceDefinitions.
|
|
46
|
+
class SourcePatcher:
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
root_project: RuntimeConfig,
|
|
50
|
+
manifest: Manifest,
|
|
51
|
+
) -> None:
|
|
52
|
+
self.root_project = root_project
|
|
53
|
+
self.manifest = manifest
|
|
54
|
+
self.generic_test_parsers: Dict[str, SchemaGenericTestParser] = {}
|
|
55
|
+
self.patches_used: Dict[SourceKey, Set[str]] = {}
|
|
56
|
+
self.sources: Dict[str, SourceDefinition] = {}
|
|
57
|
+
self._deprecations: Set[Any] = set()
|
|
58
|
+
|
|
59
|
+
# This method calls the 'parse_source' method which takes
|
|
60
|
+
# the UnpatchedSourceDefinitions in the manifest and combines them
|
|
61
|
+
# with SourcePatches to produce SourceDefinitions.
|
|
62
|
+
def construct_sources(self) -> None:
|
|
63
|
+
for unique_id, unpatched in self.manifest.sources.items():
|
|
64
|
+
schema_file = self.manifest.files[unpatched.file_id]
|
|
65
|
+
if isinstance(unpatched, SourceDefinition):
|
|
66
|
+
# In partial parsing, there will be SourceDefinitions
|
|
67
|
+
# which must be retained.
|
|
68
|
+
self.sources[unpatched.unique_id] = unpatched
|
|
69
|
+
continue
|
|
70
|
+
# returns None if there is no patch
|
|
71
|
+
patch = self.get_patch_for(unpatched)
|
|
72
|
+
|
|
73
|
+
# returns unpatched if there is no patch
|
|
74
|
+
patched = self.patch_source(unpatched, patch)
|
|
75
|
+
|
|
76
|
+
# now use the patched UnpatchedSourceDefinition to extract test data.
|
|
77
|
+
for test in self.get_source_tests(patched):
|
|
78
|
+
if test.config.enabled:
|
|
79
|
+
self.manifest.add_node_nofile(test)
|
|
80
|
+
else:
|
|
81
|
+
self.manifest.add_disabled_nofile(test)
|
|
82
|
+
# save the test unique_id in the schema_file, so we can
|
|
83
|
+
# process in partial parsing
|
|
84
|
+
test_from = {"key": "sources", "name": patched.source.name}
|
|
85
|
+
schema_file.add_test(test.unique_id, test_from)
|
|
86
|
+
|
|
87
|
+
# Convert UnpatchedSourceDefinition to a SourceDefinition
|
|
88
|
+
parsed = self.parse_source(patched)
|
|
89
|
+
if parsed.config.enabled:
|
|
90
|
+
self.sources[unique_id] = parsed
|
|
91
|
+
else:
|
|
92
|
+
self.manifest.add_disabled_nofile(parsed)
|
|
93
|
+
|
|
94
|
+
self.warn_unused()
|
|
95
|
+
|
|
96
|
+
def patch_source(
|
|
97
|
+
self,
|
|
98
|
+
unpatched: UnpatchedSourceDefinition,
|
|
99
|
+
patch: Optional[SourcePatch],
|
|
100
|
+
) -> UnpatchedSourceDefinition:
|
|
101
|
+
|
|
102
|
+
# This skips patching if no patch exists because of the
|
|
103
|
+
# performance overhead of converting to and from dicts
|
|
104
|
+
if patch is None:
|
|
105
|
+
return unpatched
|
|
106
|
+
|
|
107
|
+
source_dct = unpatched.source.to_dict(omit_none=True)
|
|
108
|
+
table_dct = unpatched.table.to_dict(omit_none=True)
|
|
109
|
+
patch_path: Optional[Path] = None
|
|
110
|
+
|
|
111
|
+
source_table_patch: Optional[SourceTablePatch] = None
|
|
112
|
+
|
|
113
|
+
if patch is not None:
|
|
114
|
+
source_table_patch = patch.get_table_named(unpatched.table.name)
|
|
115
|
+
source_dct.update(patch.to_patch_dict())
|
|
116
|
+
patch_path = patch.path
|
|
117
|
+
|
|
118
|
+
if source_table_patch is not None:
|
|
119
|
+
table_dct.update(source_table_patch.to_patch_dict())
|
|
120
|
+
|
|
121
|
+
source = UnparsedSourceDefinition.from_dict(source_dct)
|
|
122
|
+
table = UnparsedSourceTableDefinition.from_dict(table_dct)
|
|
123
|
+
return replace(unpatched, source=source, table=table, patch_path=patch_path)
|
|
124
|
+
|
|
125
|
+
# This converts an UnpatchedSourceDefinition to a SourceDefinition
|
|
126
|
+
def parse_source(self, target: UnpatchedSourceDefinition) -> SourceDefinition:
|
|
127
|
+
source = target.source
|
|
128
|
+
table = target.table
|
|
129
|
+
refs = ParserRef.from_target(table)
|
|
130
|
+
unique_id = target.unique_id
|
|
131
|
+
description = table.description or ""
|
|
132
|
+
source_description = source.description or ""
|
|
133
|
+
|
|
134
|
+
quoting = source.quoting.merged(table.quoting)
|
|
135
|
+
# Retain original source meta prior to merge with table meta
|
|
136
|
+
source_meta = {**source.meta, **source.config.get("meta", {})}
|
|
137
|
+
|
|
138
|
+
config = self._generate_source_config(
|
|
139
|
+
target=target,
|
|
140
|
+
rendered=True,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
config = config.finalize_and_validate()
|
|
144
|
+
|
|
145
|
+
unrendered_config = self._generate_source_config(
|
|
146
|
+
target=target,
|
|
147
|
+
rendered=False,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if not isinstance(config, SourceConfig):
|
|
151
|
+
raise DbtInternalError(
|
|
152
|
+
f"Calculated a {type(config)} for a source, but expected a SourceConfig"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
default_database = self.root_project.credentials.database
|
|
156
|
+
|
|
157
|
+
parsed_source = SourceDefinition(
|
|
158
|
+
package_name=target.package_name,
|
|
159
|
+
database=(source.database or default_database),
|
|
160
|
+
unrendered_database=source.unrendered_database,
|
|
161
|
+
schema=(source.schema or source.name),
|
|
162
|
+
unrendered_schema=source.unrendered_schema,
|
|
163
|
+
identifier=(table.identifier or table.name),
|
|
164
|
+
path=target.path,
|
|
165
|
+
original_file_path=target.original_file_path,
|
|
166
|
+
columns=refs.column_info,
|
|
167
|
+
unique_id=unique_id,
|
|
168
|
+
name=table.name,
|
|
169
|
+
description=description,
|
|
170
|
+
external=table.external,
|
|
171
|
+
source_name=source.name,
|
|
172
|
+
source_description=source_description,
|
|
173
|
+
source_meta=source_meta,
|
|
174
|
+
meta=config.meta,
|
|
175
|
+
loader=source.loader,
|
|
176
|
+
loaded_at_field=config.loaded_at_field,
|
|
177
|
+
loaded_at_query=config.loaded_at_query,
|
|
178
|
+
freshness=config.freshness,
|
|
179
|
+
quoting=quoting,
|
|
180
|
+
resource_type=NodeType.Source,
|
|
181
|
+
fqn=target.fqn,
|
|
182
|
+
tags=config.tags,
|
|
183
|
+
config=config,
|
|
184
|
+
unrendered_config=unrendered_config,
|
|
185
|
+
profile=source.profile, # DVT: propagate profile reference for multi-source support
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if (
|
|
189
|
+
parsed_source.freshness
|
|
190
|
+
and not parsed_source.loaded_at_field
|
|
191
|
+
and not get_adapter(self.root_project).supports(Capability.TableLastModifiedMetadata)
|
|
192
|
+
):
|
|
193
|
+
# Metadata-based freshness is being used by default for this node,
|
|
194
|
+
# but is not available through the configured adapter, so warn the
|
|
195
|
+
# user that freshness info will not be collected for this node at
|
|
196
|
+
# runtime.
|
|
197
|
+
fire_event(
|
|
198
|
+
FreshnessConfigProblem(
|
|
199
|
+
msg=f"The configured adapter does not support metadata-based freshness. A loaded_at_field must be specified for source '{source.name}.{table.name}'."
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# relation name is added after instantiation because the adapter does
|
|
204
|
+
# not provide the relation name for a UnpatchedSourceDefinition object
|
|
205
|
+
parsed_source.relation_name = self._get_relation_name(parsed_source)
|
|
206
|
+
return parsed_source
|
|
207
|
+
|
|
208
|
+
# Use the SchemaGenericTestParser to parse the source tests
|
|
209
|
+
def get_generic_test_parser_for(self, package_name: str) -> "SchemaGenericTestParser":
|
|
210
|
+
if package_name in self.generic_test_parsers:
|
|
211
|
+
generic_test_parser = self.generic_test_parsers[package_name]
|
|
212
|
+
else:
|
|
213
|
+
all_projects = self.root_project.load_dependencies()
|
|
214
|
+
project = all_projects[package_name]
|
|
215
|
+
generic_test_parser = SchemaGenericTestParser(
|
|
216
|
+
project, self.manifest, self.root_project
|
|
217
|
+
)
|
|
218
|
+
self.generic_test_parsers[package_name] = generic_test_parser
|
|
219
|
+
return generic_test_parser
|
|
220
|
+
|
|
221
|
+
def get_source_tests(self, target: UnpatchedSourceDefinition) -> Iterable[GenericTestNode]:
|
|
222
|
+
is_root_project = True if self.root_project.project_name == target.package_name else False
|
|
223
|
+
target.validate_data_tests(is_root_project)
|
|
224
|
+
for data_test, column in target.get_tests():
|
|
225
|
+
yield self.parse_source_test(
|
|
226
|
+
target=target,
|
|
227
|
+
data_test=data_test,
|
|
228
|
+
column=column,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
def get_patch_for(
|
|
232
|
+
self,
|
|
233
|
+
unpatched: UnpatchedSourceDefinition,
|
|
234
|
+
) -> Optional[SourcePatch]:
|
|
235
|
+
if isinstance(unpatched, SourceDefinition):
|
|
236
|
+
return None
|
|
237
|
+
key = (unpatched.package_name, unpatched.source.name)
|
|
238
|
+
patch: Optional[SourcePatch] = self.manifest.source_patches.get(key)
|
|
239
|
+
if patch is None:
|
|
240
|
+
return None
|
|
241
|
+
if key not in self.patches_used:
|
|
242
|
+
# mark the key as used
|
|
243
|
+
self.patches_used[key] = set()
|
|
244
|
+
if patch.get_table_named(unpatched.table.name) is not None:
|
|
245
|
+
self.patches_used[key].add(unpatched.table.name)
|
|
246
|
+
return patch
|
|
247
|
+
|
|
248
|
+
# This calls parse_generic_test in the SchemaGenericTestParser
|
|
249
|
+
def parse_source_test(
|
|
250
|
+
self,
|
|
251
|
+
target: UnpatchedSourceDefinition,
|
|
252
|
+
data_test: Dict[str, Any],
|
|
253
|
+
column: Optional[UnparsedColumn],
|
|
254
|
+
) -> GenericTestNode:
|
|
255
|
+
column_name: Optional[str]
|
|
256
|
+
if column is None:
|
|
257
|
+
column_name = None
|
|
258
|
+
else:
|
|
259
|
+
column_name = column.name
|
|
260
|
+
should_quote = column.quote or (column.quote is None and target.quote_columns)
|
|
261
|
+
if should_quote:
|
|
262
|
+
column_name = get_adapter(self.root_project).quote(column_name)
|
|
263
|
+
|
|
264
|
+
tags_sources = [target.source.tags, target.table.tags]
|
|
265
|
+
if column is not None:
|
|
266
|
+
tags_sources.append(column.tags)
|
|
267
|
+
if column_config_tags := column.config.get("tags", []):
|
|
268
|
+
if isinstance(column_config_tags, list):
|
|
269
|
+
tags_sources.append(column_config_tags)
|
|
270
|
+
elif isinstance(column_config_tags, str):
|
|
271
|
+
tags_sources.append([column_config_tags])
|
|
272
|
+
tags = list(itertools.chain.from_iterable(tags_sources))
|
|
273
|
+
|
|
274
|
+
generic_test_parser = self.get_generic_test_parser_for(target.package_name)
|
|
275
|
+
node = generic_test_parser.parse_generic_test(
|
|
276
|
+
target=target,
|
|
277
|
+
data_test=data_test,
|
|
278
|
+
tags=tags,
|
|
279
|
+
column_name=column_name,
|
|
280
|
+
schema_file_id=target.file_id,
|
|
281
|
+
version=None,
|
|
282
|
+
)
|
|
283
|
+
return node
|
|
284
|
+
|
|
285
|
+
def _generate_source_config(self, target: UnpatchedSourceDefinition, rendered: bool):
|
|
286
|
+
generator: BaseContextConfigGenerator
|
|
287
|
+
if rendered:
|
|
288
|
+
generator = ContextConfigGenerator(self.root_project)
|
|
289
|
+
else:
|
|
290
|
+
generator = UnrenderedConfigGenerator(self.root_project)
|
|
291
|
+
|
|
292
|
+
# configs with precendence set
|
|
293
|
+
precedence_configs = dict()
|
|
294
|
+
# first apply source configs
|
|
295
|
+
precedence_configs.update(target.source.config)
|
|
296
|
+
# then overrite anything that is defined on source tables
|
|
297
|
+
# this is not quite complex enough for configs that can be set as top-level node keys, but
|
|
298
|
+
# it works while source configs can only include `enabled`.
|
|
299
|
+
precedence_configs.update(target.table.config)
|
|
300
|
+
|
|
301
|
+
precedence_freshness = self.calculate_freshness_from_raw_target(target)
|
|
302
|
+
if precedence_freshness:
|
|
303
|
+
precedence_configs["freshness"] = precedence_freshness.to_dict()
|
|
304
|
+
elif precedence_freshness is None:
|
|
305
|
+
precedence_configs["freshness"] = None
|
|
306
|
+
else:
|
|
307
|
+
# this means that the user did not set a freshness threshold in the source schema file, as such
|
|
308
|
+
# there should be no freshness precedence
|
|
309
|
+
precedence_configs.pop("freshness", None)
|
|
310
|
+
|
|
311
|
+
precedence_loaded_at_field, precedence_loaded_at_query = (
|
|
312
|
+
self.calculate_loaded_at_field_query_from_raw_target(target)
|
|
313
|
+
)
|
|
314
|
+
precedence_configs["loaded_at_field"] = precedence_loaded_at_field
|
|
315
|
+
precedence_configs["loaded_at_query"] = precedence_loaded_at_query
|
|
316
|
+
|
|
317
|
+
# Handle merges across source, table, and config for meta and tags
|
|
318
|
+
precedence_meta = self.calculate_meta_from_raw_target(target)
|
|
319
|
+
precedence_configs["meta"] = precedence_meta
|
|
320
|
+
|
|
321
|
+
precedence_tags = self.calculate_tags_from_raw_target(target)
|
|
322
|
+
precedence_configs["tags"] = precedence_tags
|
|
323
|
+
|
|
324
|
+
# Because freshness is a "object" config, the freshness from the dbt_project.yml and the freshness
|
|
325
|
+
# from the schema file _won't_ get merged by this process. The result will be that the freshness will
|
|
326
|
+
# come from the schema file if provided, and if not, it'll fall back to the dbt_project.yml freshness.
|
|
327
|
+
return generator.calculate_node_config(
|
|
328
|
+
config_call_dict={},
|
|
329
|
+
fqn=target.fqn,
|
|
330
|
+
resource_type=NodeType.Source,
|
|
331
|
+
project_name=target.package_name,
|
|
332
|
+
base=False,
|
|
333
|
+
patch_config_dict=precedence_configs,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def _get_relation_name(self, node: SourceDefinition):
|
|
337
|
+
adapter = get_adapter(self.root_project)
|
|
338
|
+
relation_cls = adapter.Relation
|
|
339
|
+
return str(relation_cls.create_from(self.root_project, node))
|
|
340
|
+
|
|
341
|
+
def warn_unused(self) -> None:
|
|
342
|
+
unused_tables: Dict[SourceKey, Optional[Set[str]]] = {}
|
|
343
|
+
for patch in self.manifest.source_patches.values():
|
|
344
|
+
key = (patch.overrides, patch.name)
|
|
345
|
+
if key not in self.patches_used:
|
|
346
|
+
unused_tables[key] = None
|
|
347
|
+
elif patch.tables is not None:
|
|
348
|
+
table_patches = {t.name for t in patch.tables}
|
|
349
|
+
unused = table_patches - self.patches_used[key]
|
|
350
|
+
# don't add unused tables, the
|
|
351
|
+
if unused:
|
|
352
|
+
# because patches are required to be unique, we can safely
|
|
353
|
+
# write without looking
|
|
354
|
+
unused_tables[key] = unused
|
|
355
|
+
|
|
356
|
+
if unused_tables:
|
|
357
|
+
unused_tables_formatted = self.get_unused_msg(unused_tables)
|
|
358
|
+
warn_or_error(UnusedTables(unused_tables=unused_tables_formatted))
|
|
359
|
+
|
|
360
|
+
self.manifest.source_patches = {}
|
|
361
|
+
|
|
362
|
+
def get_unused_msg(
|
|
363
|
+
self,
|
|
364
|
+
unused_tables: Dict[SourceKey, Optional[Set[str]]],
|
|
365
|
+
) -> List:
|
|
366
|
+
unused_tables_formatted = []
|
|
367
|
+
for key, table_names in unused_tables.items():
|
|
368
|
+
patch = self.manifest.source_patches[key]
|
|
369
|
+
patch_name = f"{patch.overrides}.{patch.name}"
|
|
370
|
+
if table_names is None:
|
|
371
|
+
unused_tables_formatted.append(f" - Source {patch_name} (in {patch.path})")
|
|
372
|
+
else:
|
|
373
|
+
for table_name in sorted(table_names):
|
|
374
|
+
unused_tables_formatted.append(
|
|
375
|
+
f" - Source table {patch_name}.{table_name} " f"(in {patch.path})"
|
|
376
|
+
)
|
|
377
|
+
return unused_tables_formatted
|
|
378
|
+
|
|
379
|
+
def calculate_freshness_from_raw_target(
|
|
380
|
+
self,
|
|
381
|
+
target: UnpatchedSourceDefinition,
|
|
382
|
+
) -> Optional[FreshnessThreshold]:
|
|
383
|
+
source: UnparsedSourceDefinition = target.source
|
|
384
|
+
|
|
385
|
+
source_freshness = source.freshness
|
|
386
|
+
|
|
387
|
+
source_config_freshness_raw: Optional[Dict] = source.config.get(
|
|
388
|
+
"freshness", {}
|
|
389
|
+
) # Will only be None if the user explicitly set it to null
|
|
390
|
+
source_config_freshness: Optional[FreshnessThreshold] = (
|
|
391
|
+
FreshnessThreshold.from_dict(source_config_freshness_raw)
|
|
392
|
+
if source_config_freshness_raw is not None
|
|
393
|
+
else None
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
table: UnparsedSourceTableDefinition = target.table
|
|
397
|
+
table_freshness = table.freshness
|
|
398
|
+
|
|
399
|
+
table_config_freshness_raw: Optional[Dict] = table.config.get(
|
|
400
|
+
"freshness", {}
|
|
401
|
+
) # Will only be None if the user explicitly set it to null
|
|
402
|
+
table_config_freshness: Optional[FreshnessThreshold] = (
|
|
403
|
+
FreshnessThreshold.from_dict(table_config_freshness_raw)
|
|
404
|
+
if table_config_freshness_raw is not None
|
|
405
|
+
else None
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
return merge_source_freshness(
|
|
409
|
+
source_freshness,
|
|
410
|
+
source_config_freshness,
|
|
411
|
+
table_freshness,
|
|
412
|
+
table_config_freshness,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
def calculate_loaded_at_field_query_from_raw_target(
|
|
416
|
+
self, target: UnpatchedSourceDefinition
|
|
417
|
+
) -> Tuple[Optional[str], Optional[str]]:
|
|
418
|
+
# We need to be able to tell the difference between explicitly setting the loaded_at_field to None/null
|
|
419
|
+
# and when it's simply not set. This allows a user to override the source level loaded_at_field so that
|
|
420
|
+
# specific table can default to metadata-based freshness.
|
|
421
|
+
|
|
422
|
+
# loaded_at_field and loaded_at_query are supported both at top-level (deprecated) and config-level (preferred) on sources and tables.
|
|
423
|
+
if target.table.loaded_at_field_present and (
|
|
424
|
+
target.table.loaded_at_query or target.table.config.get("loaded_at_query")
|
|
425
|
+
):
|
|
426
|
+
raise ParsingError(
|
|
427
|
+
"Cannot specify both loaded_at_field and loaded_at_query at table level."
|
|
428
|
+
)
|
|
429
|
+
if (target.source.loaded_at_field or target.source.config.get("loaded_at_field")) and (
|
|
430
|
+
target.source.loaded_at_query or target.source.config.get("loaded_at_query")
|
|
431
|
+
):
|
|
432
|
+
raise ParsingError(
|
|
433
|
+
"Cannot specify both loaded_at_field and loaded_at_query at source level."
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
if (
|
|
437
|
+
target.table.loaded_at_field_present
|
|
438
|
+
or target.table.loaded_at_field is not None
|
|
439
|
+
or target.table.config.get("loaded_at_field") is not None
|
|
440
|
+
):
|
|
441
|
+
loaded_at_field = target.table.loaded_at_field or target.table.config.get(
|
|
442
|
+
"loaded_at_field"
|
|
443
|
+
)
|
|
444
|
+
else:
|
|
445
|
+
loaded_at_field = target.source.loaded_at_field or target.source.config.get(
|
|
446
|
+
"loaded_at_field"
|
|
447
|
+
) # may be None, that's okay
|
|
448
|
+
|
|
449
|
+
loaded_at_query: Optional[str]
|
|
450
|
+
if (
|
|
451
|
+
target.table.loaded_at_query is not None
|
|
452
|
+
or target.table.config.get("loaded_at_query") is not None
|
|
453
|
+
):
|
|
454
|
+
loaded_at_query = target.table.loaded_at_query or target.table.config.get(
|
|
455
|
+
"loaded_at_query"
|
|
456
|
+
)
|
|
457
|
+
else:
|
|
458
|
+
if target.table.loaded_at_field_present:
|
|
459
|
+
loaded_at_query = None
|
|
460
|
+
else:
|
|
461
|
+
loaded_at_query = target.source.loaded_at_query or target.source.config.get(
|
|
462
|
+
"loaded_at_query"
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
return loaded_at_field, loaded_at_query
|
|
466
|
+
|
|
467
|
+
def calculate_meta_from_raw_target(self, target: UnpatchedSourceDefinition) -> Dict[str, Any]:
|
|
468
|
+
source_meta = target.source.meta or {}
|
|
469
|
+
source_config_meta = target.source.config.get("meta", {})
|
|
470
|
+
source_config_meta = source_config_meta if isinstance(source_config_meta, dict) else {}
|
|
471
|
+
|
|
472
|
+
table_meta = target.table.meta or {}
|
|
473
|
+
table_config_meta = target.table.config.get("meta", {})
|
|
474
|
+
table_config_meta = table_config_meta if isinstance(table_config_meta, dict) else {}
|
|
475
|
+
|
|
476
|
+
return {**source_meta, **source_config_meta, **table_meta, **table_config_meta}
|
|
477
|
+
|
|
478
|
+
def calculate_tags_from_raw_target(self, target: UnpatchedSourceDefinition) -> List[str]:
|
|
479
|
+
source_tags = target.source.tags or []
|
|
480
|
+
source_config_tags = self._get_config_tags(
|
|
481
|
+
target.source.config.get("tags", []), target.source.name
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
table_tags = target.table.tags or []
|
|
485
|
+
table_config_tags = self._get_config_tags(
|
|
486
|
+
target.table.config.get("tags", []), target.table.name
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
return sorted(
|
|
490
|
+
set(itertools.chain(source_tags, source_config_tags, table_tags, table_config_tags))
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
def _get_config_tags(self, tags: Any, source_name: str) -> List[str]:
|
|
494
|
+
config_tags = tags if isinstance(tags, list) else [tags]
|
|
495
|
+
|
|
496
|
+
config_tags_valid: List[str] = []
|
|
497
|
+
for tag in config_tags:
|
|
498
|
+
if not isinstance(tag, str):
|
|
499
|
+
warn_or_error(
|
|
500
|
+
ValidationWarning(
|
|
501
|
+
field_name=f"`config.tags`: {tags}",
|
|
502
|
+
resource_type=NodeType.Source.value,
|
|
503
|
+
node_name=source_name,
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
else:
|
|
507
|
+
config_tags_valid.append(tag)
|
|
508
|
+
|
|
509
|
+
return config_tags_valid
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def merge_freshness_time_thresholds(
|
|
513
|
+
base: Optional[Time], update: Optional[Time]
|
|
514
|
+
) -> Optional[Time]:
|
|
515
|
+
if base and update:
|
|
516
|
+
return base.merged(update)
|
|
517
|
+
elif update is None:
|
|
518
|
+
return None
|
|
519
|
+
else:
|
|
520
|
+
return update or base
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def merge_source_freshness(
|
|
524
|
+
*thresholds: Optional[FreshnessThreshold],
|
|
525
|
+
) -> Optional[FreshnessThreshold]:
|
|
526
|
+
if not thresholds:
|
|
527
|
+
return None
|
|
528
|
+
|
|
529
|
+
# Initialize with the first threshold.
|
|
530
|
+
# If the first threshold is None, current_merged_value will be None,
|
|
531
|
+
# and subsequent merges will correctly follow the original logic.
|
|
532
|
+
current_merged_value: Optional[FreshnessThreshold] = thresholds[0]
|
|
533
|
+
|
|
534
|
+
# Iterate through the rest of the thresholds, applying the original pairwise logic
|
|
535
|
+
for i in range(1, len(thresholds)):
|
|
536
|
+
base = current_merged_value
|
|
537
|
+
update = thresholds[i]
|
|
538
|
+
|
|
539
|
+
if base is not None and update is not None:
|
|
540
|
+
merged_freshness_obj = base.merged(update)
|
|
541
|
+
# merge one level deeper the error_after and warn_after thresholds
|
|
542
|
+
merged_error_after = merge_freshness_time_thresholds(
|
|
543
|
+
base.error_after, update.error_after
|
|
544
|
+
)
|
|
545
|
+
merged_warn_after = merge_freshness_time_thresholds(base.warn_after, update.warn_after)
|
|
546
|
+
|
|
547
|
+
merged_freshness_obj.error_after = merged_error_after
|
|
548
|
+
merged_freshness_obj.warn_after = merged_warn_after
|
|
549
|
+
current_merged_value = merged_freshness_obj
|
|
550
|
+
elif base is None and bool(update):
|
|
551
|
+
# If current_merged_value (base) is None, the update becomes the new value
|
|
552
|
+
current_merged_value = update
|
|
553
|
+
else: # This covers cases where 'update' is None, or both 'base' and 'update' are None.
|
|
554
|
+
# Following original logic, if 'update' is None, the result of the pair-merge is None.
|
|
555
|
+
current_merged_value = None
|
|
556
|
+
|
|
557
|
+
return current_merged_value
|
dvt/parser/sql.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Iterable
|
|
4
|
+
|
|
5
|
+
from dvt.contracts.graph.manifest import SourceFile
|
|
6
|
+
from dvt.contracts.graph.nodes import Macro, SqlNode
|
|
7
|
+
from dvt.contracts.graph.unparsed import UnparsedMacro
|
|
8
|
+
from dvt.node_types import NodeType
|
|
9
|
+
from dvt.parser.base import SimpleSQLParser
|
|
10
|
+
from dvt.parser.macros import MacroParser
|
|
11
|
+
from dvt.parser.search import FileBlock
|
|
12
|
+
|
|
13
|
+
from dbt_common.exceptions import DbtInternalError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class SqlBlock(FileBlock):
|
|
18
|
+
block_name: str
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def name(self):
|
|
22
|
+
return self.block_name
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SqlBlockParser(SimpleSQLParser[SqlNode]):
|
|
26
|
+
def parse_from_dict(self, dct, validate=True) -> SqlNode:
|
|
27
|
+
if validate:
|
|
28
|
+
SqlNode.validate(dct)
|
|
29
|
+
return SqlNode.from_dict(dct)
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def resource_type(self) -> NodeType:
|
|
33
|
+
return NodeType.SqlOperation
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def get_compiled_path(block: FileBlock):
|
|
37
|
+
# we do it this way to make mypy happy
|
|
38
|
+
if not isinstance(block, SqlBlock):
|
|
39
|
+
raise DbtInternalError(
|
|
40
|
+
"While parsing SQL operation, got an actual file block instead of "
|
|
41
|
+
"an SQL block: {}".format(block)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return os.path.join("sql", block.name)
|
|
45
|
+
|
|
46
|
+
def parse_remote(self, sql: str, name: str) -> SqlNode:
|
|
47
|
+
source_file = SourceFile.remote(sql, self.project.project_name, "sql")
|
|
48
|
+
contents = SqlBlock(block_name=name, file=source_file)
|
|
49
|
+
return self.parse_node(contents)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SqlMacroParser(MacroParser):
|
|
53
|
+
def parse_remote(self, contents) -> Iterable[Macro]:
|
|
54
|
+
base = UnparsedMacro(
|
|
55
|
+
path="from remote system",
|
|
56
|
+
original_file_path="from remote system",
|
|
57
|
+
package_name=self.project.project_name,
|
|
58
|
+
raw_code=contents,
|
|
59
|
+
language="sql",
|
|
60
|
+
resource_type=NodeType.Macro,
|
|
61
|
+
)
|
|
62
|
+
for node in self.parse_unparsed_macros(base):
|
|
63
|
+
yield node
|