dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/tests/util.py
ADDED
|
@@ -0,0 +1,651 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from contextvars import ContextVar, copy_context
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from io import StringIO
|
|
8
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
9
|
+
from unittest import mock
|
|
10
|
+
|
|
11
|
+
import pytz
|
|
12
|
+
import yaml
|
|
13
|
+
from dvt.cli.main import dbtRunner
|
|
14
|
+
from dvt.contracts.graph.manifest import Manifest
|
|
15
|
+
from dvt.materializations.incremental.microbatch import MicrobatchBuilder
|
|
16
|
+
|
|
17
|
+
from dbt.adapters.base.relation import BaseRelation
|
|
18
|
+
from dbt.adapters.factory import Adapter
|
|
19
|
+
from dbt_common.context import _INVOCATION_CONTEXT_VAR, InvocationContext
|
|
20
|
+
from dbt_common.events.base_types import EventLevel, EventMsg
|
|
21
|
+
from dbt_common.events.functions import (
|
|
22
|
+
capture_stdout_logs,
|
|
23
|
+
fire_event,
|
|
24
|
+
reset_metadata_vars,
|
|
25
|
+
stop_capture_stdout_logs,
|
|
26
|
+
)
|
|
27
|
+
from dbt_common.events.types import Note
|
|
28
|
+
|
|
29
|
+
# =============================================================================
|
|
30
|
+
# Test utilities
|
|
31
|
+
# run_dbt
|
|
32
|
+
# run_dbt_and_capture
|
|
33
|
+
# get_manifest
|
|
34
|
+
# copy_file
|
|
35
|
+
# rm_file
|
|
36
|
+
# write_file
|
|
37
|
+
# read_file
|
|
38
|
+
# mkdir
|
|
39
|
+
# rm_dir
|
|
40
|
+
# get_artifact
|
|
41
|
+
# update_config_file
|
|
42
|
+
# write_config_file
|
|
43
|
+
# get_unique_ids_in_results
|
|
44
|
+
# check_result_nodes_by_name
|
|
45
|
+
# check_result_nodes_by_unique_id
|
|
46
|
+
|
|
47
|
+
# SQL related utilities that use the adapter
|
|
48
|
+
# run_sql_with_adapter
|
|
49
|
+
# relation_from_name
|
|
50
|
+
# check_relation_types (table/view)
|
|
51
|
+
# check_relations_equal
|
|
52
|
+
# check_relation_has_expected_schema
|
|
53
|
+
# check_relations_equal_with_relations
|
|
54
|
+
# check_table_does_exist
|
|
55
|
+
# check_table_does_not_exist
|
|
56
|
+
# get_relation_columns
|
|
57
|
+
# update_rows
|
|
58
|
+
# generate_update_clause
|
|
59
|
+
#
|
|
60
|
+
# Classes for comparing fields in dictionaries
|
|
61
|
+
# AnyFloat
|
|
62
|
+
# AnyInteger
|
|
63
|
+
# AnyString
|
|
64
|
+
# AnyStringWith
|
|
65
|
+
# =============================================================================
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# 'run_dbt' is used in pytest tests to run dbt commands. It will return
|
|
69
|
+
# different objects depending on the command that is executed.
|
|
70
|
+
# For a run command (and most other commands) it will return a list
|
|
71
|
+
# of results. For the 'docs generate' command it returns a CatalogArtifact.
|
|
72
|
+
# The first parameter is a list of dbt command line arguments, such as
|
|
73
|
+
# run_dbt(["run", "--vars", "seed_name: base"])
|
|
74
|
+
# If the command is expected to fail, pass in "expect_pass=False"):
|
|
75
|
+
# run_dbt(["test"], expect_pass=False)
|
|
76
|
+
def run_dbt(
|
|
77
|
+
args: Optional[List[str]] = None,
|
|
78
|
+
expect_pass: bool = True,
|
|
79
|
+
callbacks: Optional[List[Callable[[EventMsg], None]]] = None,
|
|
80
|
+
):
|
|
81
|
+
# reset global vars
|
|
82
|
+
reset_metadata_vars()
|
|
83
|
+
|
|
84
|
+
if args is None:
|
|
85
|
+
args = ["run"]
|
|
86
|
+
|
|
87
|
+
print("\n\nInvoking dbt with {}".format(args))
|
|
88
|
+
from dvt.flags import get_flags
|
|
89
|
+
|
|
90
|
+
flags = get_flags()
|
|
91
|
+
project_dir = getattr(flags, "PROJECT_DIR", None)
|
|
92
|
+
profiles_dir = getattr(flags, "PROFILES_DIR", None)
|
|
93
|
+
if project_dir and "--project-dir" not in args:
|
|
94
|
+
args.extend(["--project-dir", project_dir])
|
|
95
|
+
if profiles_dir and "--profiles-dir" not in args:
|
|
96
|
+
args.extend(["--profiles-dir", profiles_dir])
|
|
97
|
+
dbt = dbtRunner(callbacks=callbacks)
|
|
98
|
+
|
|
99
|
+
res = dbt.invoke(args)
|
|
100
|
+
|
|
101
|
+
# the exception is immediately raised to be caught in tests
|
|
102
|
+
# using a pattern like `with pytest.raises(SomeException):`
|
|
103
|
+
if res.exception is not None:
|
|
104
|
+
raise res.exception
|
|
105
|
+
|
|
106
|
+
if expect_pass is not None:
|
|
107
|
+
assert res.success == expect_pass, "dbt exit state did not match expected"
|
|
108
|
+
|
|
109
|
+
return res.result
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# Use this if you need to capture the command logs in a test.
|
|
113
|
+
# If you want the logs that are normally written to a file, you must
|
|
114
|
+
# start with the "--debug" flag. The structured schema log CI test
|
|
115
|
+
# will turn the logs into json, so you have to be prepared for that.
|
|
116
|
+
def run_dbt_and_capture(
|
|
117
|
+
args: Optional[List[str]] = None,
|
|
118
|
+
expect_pass: bool = True,
|
|
119
|
+
):
|
|
120
|
+
try:
|
|
121
|
+
stringbuf = StringIO()
|
|
122
|
+
capture_stdout_logs(stringbuf)
|
|
123
|
+
res = run_dbt(args, expect_pass=expect_pass)
|
|
124
|
+
stdout = stringbuf.getvalue()
|
|
125
|
+
|
|
126
|
+
finally:
|
|
127
|
+
stop_capture_stdout_logs()
|
|
128
|
+
|
|
129
|
+
return res, stdout
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def get_logging_events(log_output, event_name):
|
|
133
|
+
logging_events = []
|
|
134
|
+
for log_line in log_output.split("\n"):
|
|
135
|
+
# skip empty lines
|
|
136
|
+
if len(log_line) == 0:
|
|
137
|
+
continue
|
|
138
|
+
# The adapter logging also shows up, so skip non-json lines
|
|
139
|
+
if not log_line.startswith("{"):
|
|
140
|
+
continue
|
|
141
|
+
if event_name in log_line:
|
|
142
|
+
log_dct = json.loads(log_line)
|
|
143
|
+
if log_dct["info"]["name"] == event_name:
|
|
144
|
+
logging_events.append(log_dct)
|
|
145
|
+
return logging_events
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# Used in test cases to get the manifest from the partial parsing file
|
|
149
|
+
# Note: this uses an internal version of the manifest, and in the future
|
|
150
|
+
# parts of it will not be supported for external use.
|
|
151
|
+
def get_manifest(project_root) -> Optional[Manifest]:
|
|
152
|
+
path = os.path.join(project_root, "target", "partial_parse.msgpack")
|
|
153
|
+
if os.path.exists(path):
|
|
154
|
+
with open(path, "rb") as fp:
|
|
155
|
+
manifest_mp = fp.read()
|
|
156
|
+
manifest: Manifest = Manifest.from_msgpack(manifest_mp) # type: ignore[attr-defined]
|
|
157
|
+
return manifest
|
|
158
|
+
else:
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# Used in test cases to get the run_results.json file.
|
|
163
|
+
def get_run_results(project_root) -> Any:
|
|
164
|
+
path = os.path.join(project_root, "target", "run_results.json")
|
|
165
|
+
if os.path.exists(path):
|
|
166
|
+
with open(path) as run_result_text:
|
|
167
|
+
return json.load(run_result_text)
|
|
168
|
+
else:
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# Used in tests to copy a file, usually from a data directory to the project directory
|
|
173
|
+
def copy_file(src_path, src, dest_path, dest) -> None:
|
|
174
|
+
# dest is a list, so that we can provide nested directories, like 'models' etc.
|
|
175
|
+
# copy files from the data_dir to appropriate project directory
|
|
176
|
+
shutil.copyfile(
|
|
177
|
+
os.path.join(src_path, src),
|
|
178
|
+
os.path.join(dest_path, *dest),
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# Used in tests when you want to remove a file from the project directory
|
|
183
|
+
def rm_file(*paths) -> None:
|
|
184
|
+
# remove files from proj_path
|
|
185
|
+
os.remove(os.path.join(*paths))
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# Used in tests to write out the string contents of a file to a
|
|
189
|
+
# file in the project directory.
|
|
190
|
+
# We need to explicitly use encoding="utf-8" because otherwise on
|
|
191
|
+
# Windows we'll get codepage 1252 and things might break
|
|
192
|
+
def write_file(contents, *paths):
|
|
193
|
+
with open(os.path.join(*paths), "w", encoding="utf-8") as fp:
|
|
194
|
+
fp.write(contents)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def file_exists(*paths):
|
|
198
|
+
"""Check if file exists at path"""
|
|
199
|
+
return os.path.exists(os.path.join(*paths))
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# Used in test utilities
|
|
203
|
+
def read_file(*paths):
|
|
204
|
+
contents = ""
|
|
205
|
+
with open(os.path.join(*paths), "r") as fp:
|
|
206
|
+
contents = fp.read()
|
|
207
|
+
return contents
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# To create a directory
|
|
211
|
+
def mkdir(directory_path):
|
|
212
|
+
try:
|
|
213
|
+
os.makedirs(directory_path)
|
|
214
|
+
except FileExistsError:
|
|
215
|
+
raise FileExistsError(f"{directory_path} already exists.")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
# To remove a directory
|
|
219
|
+
def rm_dir(directory_path):
|
|
220
|
+
try:
|
|
221
|
+
shutil.rmtree(directory_path)
|
|
222
|
+
except FileNotFoundError:
|
|
223
|
+
raise FileNotFoundError(f"{directory_path} does not exist.")
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def rename_dir(src_directory_path, dest_directory_path):
|
|
227
|
+
os.rename(src_directory_path, dest_directory_path)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# Get an artifact (usually from the target directory) such as
|
|
231
|
+
# manifest.json or catalog.json to use in a test
|
|
232
|
+
def get_artifact(*paths):
|
|
233
|
+
contents = read_file(*paths)
|
|
234
|
+
dct = json.loads(contents)
|
|
235
|
+
return dct
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def write_artifact(dct, *paths):
|
|
239
|
+
json_output = json.dumps(dct)
|
|
240
|
+
write_file(json_output, *paths)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# For updating yaml config files
|
|
244
|
+
def update_config_file(updates, *paths):
|
|
245
|
+
current_yaml = read_file(*paths)
|
|
246
|
+
config = yaml.safe_load(current_yaml)
|
|
247
|
+
config.update(updates)
|
|
248
|
+
new_yaml = yaml.safe_dump(config)
|
|
249
|
+
write_file(new_yaml, *paths)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
# Write new config file
|
|
253
|
+
def write_config_file(data, *paths):
|
|
254
|
+
if type(data) is dict:
|
|
255
|
+
data = yaml.safe_dump(data)
|
|
256
|
+
write_file(data, *paths)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# Get the unique_ids in dbt command results
|
|
260
|
+
def get_unique_ids_in_results(results):
|
|
261
|
+
unique_ids = []
|
|
262
|
+
for result in results:
|
|
263
|
+
unique_ids.append(result.node.unique_id)
|
|
264
|
+
return unique_ids
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
# Check the nodes in the results returned by a dbt run command
|
|
268
|
+
def check_result_nodes_by_name(results, names):
|
|
269
|
+
result_names = []
|
|
270
|
+
for result in results:
|
|
271
|
+
result_names.append(result.node.name)
|
|
272
|
+
assert set(names) == set(result_names)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
# Check the nodes in the results returned by a dbt run command
|
|
276
|
+
def check_result_nodes_by_unique_id(results, unique_ids):
|
|
277
|
+
result_unique_ids = []
|
|
278
|
+
for result in results:
|
|
279
|
+
result_unique_ids.append(result.node.unique_id)
|
|
280
|
+
assert set(unique_ids) == set(result_unique_ids)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
# Check datetime is between start and end/now
|
|
284
|
+
def check_datetime_between(timestr, start, end=None):
|
|
285
|
+
datefmt = "%Y-%m-%dT%H:%M:%S.%fZ"
|
|
286
|
+
if end is None:
|
|
287
|
+
end = datetime.now(timezone.utc).replace(tzinfo=None)
|
|
288
|
+
parsed = datetime.strptime(timestr, datefmt)
|
|
289
|
+
assert start <= parsed
|
|
290
|
+
assert end >= parsed
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class TestProcessingException(Exception):
|
|
294
|
+
pass
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# Testing utilities that use adapter code
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# Uses:
|
|
301
|
+
# adapter.config.credentials
|
|
302
|
+
# adapter.quote
|
|
303
|
+
# adapter.run_sql_for_tests
|
|
304
|
+
def run_sql_with_adapter(adapter, sql, fetch=None):
|
|
305
|
+
if sql.strip() == "":
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
# substitute schema and database in sql
|
|
309
|
+
kwargs = {
|
|
310
|
+
"schema": adapter.config.credentials.schema,
|
|
311
|
+
"database": adapter.quote(adapter.config.credentials.database),
|
|
312
|
+
}
|
|
313
|
+
sql = sql.format(**kwargs)
|
|
314
|
+
|
|
315
|
+
msg = f'test connection "__test" executing: {sql}'
|
|
316
|
+
fire_event(Note(msg=msg), level=EventLevel.DEBUG)
|
|
317
|
+
with get_connection(adapter) as conn:
|
|
318
|
+
return adapter.run_sql_for_tests(sql, fetch, conn)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
# Get a Relation object from the identifier (name of table/view).
|
|
322
|
+
# Uses the default database and schema. If you need a relation
|
|
323
|
+
# with a different schema, it should be constructed in the test.
|
|
324
|
+
# Uses:
|
|
325
|
+
# adapter.Relation
|
|
326
|
+
# adapter.config.credentials
|
|
327
|
+
# Relation.get_default_quote_policy
|
|
328
|
+
# Relation.get_default_include_policy
|
|
329
|
+
def relation_from_name(adapter, name: str):
|
|
330
|
+
"""reverse-engineer a relation from a given name and
|
|
331
|
+
the adapter. The relation name is split by the '.' character.
|
|
332
|
+
"""
|
|
333
|
+
|
|
334
|
+
# Different adapters have different Relation classes
|
|
335
|
+
cls = adapter.Relation
|
|
336
|
+
credentials = adapter.config.credentials
|
|
337
|
+
quote_policy = cls.get_default_quote_policy().to_dict()
|
|
338
|
+
include_policy = cls.get_default_include_policy().to_dict()
|
|
339
|
+
|
|
340
|
+
# Make sure we have database/schema/identifier parts, even if
|
|
341
|
+
# only identifier was supplied.
|
|
342
|
+
relation_parts = name.split(".")
|
|
343
|
+
if len(relation_parts) == 1:
|
|
344
|
+
relation_parts.insert(0, credentials.schema)
|
|
345
|
+
if len(relation_parts) == 2:
|
|
346
|
+
relation_parts.insert(0, credentials.database)
|
|
347
|
+
kwargs = {
|
|
348
|
+
"database": relation_parts[0],
|
|
349
|
+
"schema": relation_parts[1],
|
|
350
|
+
"identifier": relation_parts[2],
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
relation = cls.create(
|
|
354
|
+
include_policy=include_policy,
|
|
355
|
+
quote_policy=quote_policy,
|
|
356
|
+
**kwargs,
|
|
357
|
+
)
|
|
358
|
+
return relation
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# Ensure that models with different materialiations have the
|
|
362
|
+
# current table/view.
|
|
363
|
+
# Uses:
|
|
364
|
+
# adapter.list_relations_without_caching
|
|
365
|
+
def check_relation_types(adapter, relation_to_type):
|
|
366
|
+
"""
|
|
367
|
+
Relation name to table/view
|
|
368
|
+
{
|
|
369
|
+
"base": "table",
|
|
370
|
+
"other": "view",
|
|
371
|
+
}
|
|
372
|
+
"""
|
|
373
|
+
|
|
374
|
+
expected_relation_values = {}
|
|
375
|
+
found_relations = []
|
|
376
|
+
schemas = set()
|
|
377
|
+
|
|
378
|
+
for key, value in relation_to_type.items():
|
|
379
|
+
relation = relation_from_name(adapter, key)
|
|
380
|
+
expected_relation_values[relation] = value
|
|
381
|
+
schemas.add(relation.without_identifier())
|
|
382
|
+
|
|
383
|
+
with get_connection(adapter):
|
|
384
|
+
for schema in schemas:
|
|
385
|
+
found_relations.extend(adapter.list_relations_without_caching(schema))
|
|
386
|
+
|
|
387
|
+
for key, value in relation_to_type.items():
|
|
388
|
+
for relation in found_relations:
|
|
389
|
+
# this might be too broad
|
|
390
|
+
if relation.identifier == key:
|
|
391
|
+
assert relation.type == value, (
|
|
392
|
+
f"Got an unexpected relation type of {relation.type} "
|
|
393
|
+
f"for relation {key}, expected {value}"
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
# Replaces assertTablesEqual. assertManyTablesEqual can be replaced
|
|
398
|
+
# by doing a separate call for each set of tables/relations.
|
|
399
|
+
# Wraps check_relations_equal_with_relations by creating relations
|
|
400
|
+
# from the list of names passed in.
|
|
401
|
+
def check_relations_equal(adapter, relation_names: List, compare_snapshot_cols=False):
|
|
402
|
+
if len(relation_names) < 2:
|
|
403
|
+
raise TestProcessingException(
|
|
404
|
+
"Not enough relations to compare",
|
|
405
|
+
)
|
|
406
|
+
relations = [relation_from_name(adapter, name) for name in relation_names]
|
|
407
|
+
return check_relations_equal_with_relations(
|
|
408
|
+
adapter, relations, compare_snapshot_cols=compare_snapshot_cols
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# Used to check that a particular relation has an expected schema
|
|
413
|
+
# expected_schema should look like {"column_name": "expected datatype"}
|
|
414
|
+
def check_relation_has_expected_schema(adapter, relation_name, expected_schema: Dict):
|
|
415
|
+
relation = relation_from_name(adapter, relation_name)
|
|
416
|
+
with get_connection(adapter):
|
|
417
|
+
actual_columns = {c.name: c.data_type for c in adapter.get_columns_in_relation(relation)}
|
|
418
|
+
assert (
|
|
419
|
+
actual_columns == expected_schema
|
|
420
|
+
), f"Actual schema did not match expected, actual: {json.dumps(actual_columns)}"
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
# This can be used when checking relations in different schemas, by supplying
|
|
424
|
+
# a list of relations. Called by 'check_relations_equal'.
|
|
425
|
+
# Uses:
|
|
426
|
+
# adapter.get_columns_in_relation
|
|
427
|
+
# adapter.get_rows_different_sql
|
|
428
|
+
# adapter.execute
|
|
429
|
+
def check_relations_equal_with_relations(
|
|
430
|
+
adapter: Adapter, relations: List, compare_snapshot_cols=False
|
|
431
|
+
):
|
|
432
|
+
with get_connection(adapter):
|
|
433
|
+
basis, compares = relations[0], relations[1:]
|
|
434
|
+
# Skip columns starting with "dbt_" because we don't want to
|
|
435
|
+
# compare those, since they are time sensitive
|
|
436
|
+
# (unless comparing "dbt_" snapshot columns is explicitly enabled)
|
|
437
|
+
column_names = [
|
|
438
|
+
c.name
|
|
439
|
+
for c in adapter.get_columns_in_relation(basis) # type: ignore
|
|
440
|
+
if not c.name.lower().startswith("dbt_") or compare_snapshot_cols
|
|
441
|
+
]
|
|
442
|
+
|
|
443
|
+
for relation in compares:
|
|
444
|
+
sql = adapter.get_rows_different_sql(basis, relation, column_names=column_names) # type: ignore
|
|
445
|
+
_, tbl = adapter.execute(sql, fetch=True)
|
|
446
|
+
num_rows = len(tbl)
|
|
447
|
+
assert (
|
|
448
|
+
num_rows == 1
|
|
449
|
+
), f"Invalid sql query from get_rows_different_sql: incorrect number of rows ({num_rows})"
|
|
450
|
+
num_cols = len(tbl[0])
|
|
451
|
+
assert (
|
|
452
|
+
num_cols == 2
|
|
453
|
+
), f"Invalid sql query from get_rows_different_sql: incorrect number of cols ({num_cols})"
|
|
454
|
+
row_count_difference = tbl[0][0]
|
|
455
|
+
assert (
|
|
456
|
+
row_count_difference == 0
|
|
457
|
+
), f"Got {row_count_difference} difference in row count betwen {basis} and {relation}"
|
|
458
|
+
rows_mismatched = tbl[0][1]
|
|
459
|
+
assert (
|
|
460
|
+
rows_mismatched == 0
|
|
461
|
+
), f"Got {rows_mismatched} different rows between {basis} and {relation}"
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
# Uses:
|
|
465
|
+
# adapter.update_column_sql
|
|
466
|
+
# adapter.execute
|
|
467
|
+
# adapter.commit_if_has_connection
|
|
468
|
+
def update_rows(adapter, update_rows_config):
|
|
469
|
+
"""
|
|
470
|
+
{
|
|
471
|
+
"name": "base",
|
|
472
|
+
"dst_col": "some_date"
|
|
473
|
+
"clause": {
|
|
474
|
+
"type": "add_timestamp",
|
|
475
|
+
"src_col": "some_date",
|
|
476
|
+
"where" "id > 10"
|
|
477
|
+
}
|
|
478
|
+
"""
|
|
479
|
+
for key in ["name", "dst_col", "clause"]:
|
|
480
|
+
if key not in update_rows_config:
|
|
481
|
+
raise TestProcessingException(f"Invalid update_rows: no {key}")
|
|
482
|
+
|
|
483
|
+
clause = update_rows_config["clause"]
|
|
484
|
+
clause = generate_update_clause(adapter, clause)
|
|
485
|
+
|
|
486
|
+
where = None
|
|
487
|
+
if "where" in update_rows_config:
|
|
488
|
+
where = update_rows_config["where"]
|
|
489
|
+
|
|
490
|
+
name = update_rows_config["name"]
|
|
491
|
+
dst_col = update_rows_config["dst_col"]
|
|
492
|
+
relation = relation_from_name(adapter, name)
|
|
493
|
+
|
|
494
|
+
with get_connection(adapter):
|
|
495
|
+
sql = adapter.update_column_sql(
|
|
496
|
+
dst_name=str(relation),
|
|
497
|
+
dst_column=dst_col,
|
|
498
|
+
clause=clause,
|
|
499
|
+
where_clause=where,
|
|
500
|
+
)
|
|
501
|
+
adapter.execute(sql, auto_begin=True)
|
|
502
|
+
adapter.commit_if_has_connection()
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
# This is called by the 'update_rows' function.
|
|
506
|
+
# Uses:
|
|
507
|
+
# adapter.timestamp_add_sql
|
|
508
|
+
# adapter.string_add_sql
|
|
509
|
+
def generate_update_clause(adapter, clause) -> str:
|
|
510
|
+
"""
|
|
511
|
+
Called by update_rows function. Expects the "clause" dictionary
|
|
512
|
+
documented in 'update_rows.
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
if "type" not in clause or clause["type"] not in ["add_timestamp", "add_string"]:
|
|
516
|
+
raise TestProcessingException("invalid update_rows clause: type missing or incorrect")
|
|
517
|
+
clause_type = clause["type"]
|
|
518
|
+
|
|
519
|
+
if clause_type == "add_timestamp":
|
|
520
|
+
if "src_col" not in clause:
|
|
521
|
+
raise TestProcessingException("Invalid update_rows clause: no src_col")
|
|
522
|
+
add_to = clause["src_col"]
|
|
523
|
+
kwargs = {k: v for k, v in clause.items() if k in ("interval", "number")}
|
|
524
|
+
with get_connection(adapter):
|
|
525
|
+
return adapter.timestamp_add_sql(add_to=add_to, **kwargs)
|
|
526
|
+
elif clause_type == "add_string":
|
|
527
|
+
for key in ["src_col", "value"]:
|
|
528
|
+
if key not in clause:
|
|
529
|
+
raise TestProcessingException(f"Invalid update_rows clause: no {key}")
|
|
530
|
+
src_col = clause["src_col"]
|
|
531
|
+
value = clause["value"]
|
|
532
|
+
location = clause.get("location", "append")
|
|
533
|
+
with get_connection(adapter):
|
|
534
|
+
return adapter.string_add_sql(src_col, value, location)
|
|
535
|
+
return ""
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
@contextmanager
|
|
539
|
+
def get_connection(adapter, name="_test"):
|
|
540
|
+
with adapter.connection_named(name):
|
|
541
|
+
conn = adapter.connections.get_thread_connection()
|
|
542
|
+
yield conn
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
# Uses:
|
|
546
|
+
# adapter.get_columns_in_relation
|
|
547
|
+
def get_relation_columns(adapter, name):
|
|
548
|
+
relation = relation_from_name(adapter, name)
|
|
549
|
+
with get_connection(adapter):
|
|
550
|
+
columns = adapter.get_columns_in_relation(relation)
|
|
551
|
+
return sorted(((c.name, c.dtype, c.char_size) for c in columns), key=lambda x: x[0])
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def check_table_does_not_exist(adapter, name):
|
|
555
|
+
columns = get_relation_columns(adapter, name)
|
|
556
|
+
assert len(columns) == 0
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def check_table_does_exist(adapter, name):
|
|
560
|
+
columns = get_relation_columns(adapter, name)
|
|
561
|
+
assert len(columns) > 0
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
# Utility classes for enabling comparison of dictionaries
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
class AnyFloat:
|
|
568
|
+
"""Any float. Use this in assert calls"""
|
|
569
|
+
|
|
570
|
+
def __eq__(self, other):
|
|
571
|
+
return isinstance(other, float)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
class AnyInteger:
|
|
575
|
+
"""Any Integer. Use this in assert calls"""
|
|
576
|
+
|
|
577
|
+
def __eq__(self, other):
|
|
578
|
+
return isinstance(other, int)
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class AnyString:
|
|
582
|
+
"""Any string. Use this in assert calls"""
|
|
583
|
+
|
|
584
|
+
def __eq__(self, other):
|
|
585
|
+
return isinstance(other, str)
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
class AnyStringWith:
|
|
589
|
+
"""AnyStringWith("AUTO")"""
|
|
590
|
+
|
|
591
|
+
def __init__(self, contains=None):
|
|
592
|
+
self.contains = contains
|
|
593
|
+
|
|
594
|
+
def __eq__(self, other):
|
|
595
|
+
if not isinstance(other, str):
|
|
596
|
+
return False
|
|
597
|
+
|
|
598
|
+
if self.contains is None:
|
|
599
|
+
return True
|
|
600
|
+
|
|
601
|
+
return self.contains in other
|
|
602
|
+
|
|
603
|
+
def __repr__(self):
|
|
604
|
+
return "AnyStringWith<{!r}>".format(self.contains)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def assert_message_in_logs(message: str, logs: str, expected_pass: bool = True):
|
|
608
|
+
# if the logs are json strings, then 'jsonify' the message because of things like escape quotes
|
|
609
|
+
if os.environ.get("DBT_LOG_FORMAT", "") == "json":
|
|
610
|
+
message = message.replace(r'"', r"\"")
|
|
611
|
+
|
|
612
|
+
if expected_pass:
|
|
613
|
+
assert message in logs
|
|
614
|
+
else:
|
|
615
|
+
assert message not in logs
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def get_project_config(project):
|
|
619
|
+
file_yaml = read_file(project.project_root, "dbt_project.yml")
|
|
620
|
+
return yaml.safe_load(file_yaml)
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def set_project_config(project, config):
|
|
624
|
+
config_yaml = yaml.safe_dump(config)
|
|
625
|
+
write_file(config_yaml, project.project_root, "dbt_project.yml")
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def get_model_file(project, relation: BaseRelation) -> str:
|
|
629
|
+
return read_file(project.project_root, "models", f"{relation.name}.sql")
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def set_model_file(project, relation: BaseRelation, model_sql: str):
|
|
633
|
+
write_file(model_sql, project.project_root, "models", f"{relation.name}.sql")
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def safe_set_invocation_context():
|
|
637
|
+
"""In order to deal with a problem with the way the pytest runner interacts
|
|
638
|
+
with ContextVars, this function provides a mechanism for setting the
|
|
639
|
+
invocation context reliably, using its name rather than the reference
|
|
640
|
+
variable, which may have been loaded in a separate context."""
|
|
641
|
+
invocation_var: Optional[ContextVar] = next(
|
|
642
|
+
iter([cv for cv in copy_context() if cv.name == _INVOCATION_CONTEXT_VAR.name]), None
|
|
643
|
+
)
|
|
644
|
+
if invocation_var is None:
|
|
645
|
+
invocation_var = _INVOCATION_CONTEXT_VAR
|
|
646
|
+
invocation_var.set(InvocationContext(os.environ))
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def patch_microbatch_end_time(dt_str: str):
|
|
650
|
+
dt = datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=pytz.UTC)
|
|
651
|
+
return mock.patch.object(MicrobatchBuilder, "build_end_time", return_value=dt)
|