dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from dvt.context.manifest import ManifestContext
|
|
2
|
+
from dvt.contracts.graph.manifest import Manifest
|
|
3
|
+
|
|
4
|
+
from dbt.adapters.contracts.connection import AdapterRequiredConfig
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class QueryHeaderContext(ManifestContext):
|
|
8
|
+
def __init__(self, config: AdapterRequiredConfig, manifest: Manifest) -> None:
|
|
9
|
+
super().__init__(config, manifest, config.project_name)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def generate_query_header_context(config: AdapterRequiredConfig, manifest: Manifest):
|
|
13
|
+
ctx = QueryHeaderContext(config, manifest)
|
|
14
|
+
return ctx.to_dict()
|
dvt/context/secret.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional
|
|
2
|
+
|
|
3
|
+
from dvt.constants import DEFAULT_ENV_PLACEHOLDER, SECRET_PLACEHOLDER
|
|
4
|
+
from dvt.exceptions import EnvVarMissingError
|
|
5
|
+
|
|
6
|
+
from dbt_common.constants import SECRET_ENV_PREFIX
|
|
7
|
+
from dbt_common.context import get_invocation_context
|
|
8
|
+
|
|
9
|
+
from .base import BaseContext, contextmember
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SecretContext(BaseContext):
|
|
13
|
+
"""This context is used in profiles.yml + packages.yml. It can render secret
|
|
14
|
+
env vars that aren't usable elsewhere"""
|
|
15
|
+
|
|
16
|
+
@contextmember()
|
|
17
|
+
def env_var(self, var: str, default: Optional[str] = None) -> str:
|
|
18
|
+
"""The env_var() function. Return the environment variable named 'var'.
|
|
19
|
+
If there is no such environment variable set, return the default.
|
|
20
|
+
|
|
21
|
+
If the default is None, raise an exception for an undefined variable.
|
|
22
|
+
|
|
23
|
+
In this context *only*, env_var will accept env vars prefixed with DBT_ENV_SECRET_.
|
|
24
|
+
It will return the name of the secret env var, wrapped in 'start' and 'end' identifiers.
|
|
25
|
+
The actual value will be subbed in later in SecretRenderer.render_value()
|
|
26
|
+
"""
|
|
27
|
+
return_value = None
|
|
28
|
+
|
|
29
|
+
# if this is a 'secret' env var, just return the name of the env var
|
|
30
|
+
# instead of rendering the actual value here, to avoid any risk of
|
|
31
|
+
# Jinja manipulation. it will be subbed out later, in SecretRenderer.render_value
|
|
32
|
+
env = get_invocation_context().env
|
|
33
|
+
if var in env and var.startswith(SECRET_ENV_PREFIX):
|
|
34
|
+
return SECRET_PLACEHOLDER.format(var)
|
|
35
|
+
|
|
36
|
+
if var in env:
|
|
37
|
+
return_value = env[var]
|
|
38
|
+
elif default is not None:
|
|
39
|
+
return_value = default
|
|
40
|
+
|
|
41
|
+
if return_value is not None:
|
|
42
|
+
# store env vars in the internal manifest to power partial parsing
|
|
43
|
+
# if it's a 'secret' env var, we shouldn't even get here
|
|
44
|
+
# but just to be safe, don't save secrets
|
|
45
|
+
if not var.startswith(SECRET_ENV_PREFIX):
|
|
46
|
+
# If the environment variable is set from a default, store a string indicating
|
|
47
|
+
# that so we can skip partial parsing. Otherwise the file will be scheduled for
|
|
48
|
+
# reparsing. If the default changes, the file will have been updated and therefore
|
|
49
|
+
# will be scheduled for reparsing anyways.
|
|
50
|
+
self.env_vars[var] = return_value if var in env else DEFAULT_ENV_PLACEHOLDER
|
|
51
|
+
return return_value
|
|
52
|
+
else:
|
|
53
|
+
raise EnvVarMissingError(var)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def generate_secret_context(cli_vars: Dict[str, Any]) -> Dict[str, Any]:
|
|
57
|
+
ctx = SecretContext(cli_vars)
|
|
58
|
+
# This is not a Mashumaro to_dict call
|
|
59
|
+
return ctx.to_dict()
|
dvt/context/target.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from dvt.context.base import BaseContext, contextproperty
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TargetContext(BaseContext):
|
|
7
|
+
# subclass is ConfiguredContext
|
|
8
|
+
def __init__(self, target_dict: Dict[str, Any], cli_vars: Dict[str, Any]):
|
|
9
|
+
super().__init__(cli_vars=cli_vars)
|
|
10
|
+
self.target_dict = target_dict
|
|
11
|
+
|
|
12
|
+
@contextproperty()
|
|
13
|
+
def target(self) -> Dict[str, Any]:
|
|
14
|
+
"""`target` contains information about your connection to the warehouse
|
|
15
|
+
(specified in profiles.yml). Some configs are shared between all
|
|
16
|
+
adapters, while others are adapter-specific.
|
|
17
|
+
|
|
18
|
+
Common:
|
|
19
|
+
|
|
20
|
+
|----------|-----------|------------------------------------------|
|
|
21
|
+
| Variable | Example | Description |
|
|
22
|
+
|----------|-----------|------------------------------------------|
|
|
23
|
+
| name | dev | Name of the active target |
|
|
24
|
+
|----------|-----------|------------------------------------------|
|
|
25
|
+
| schema | dbt_alice | Name of the dbt schema (or, dataset on |
|
|
26
|
+
| | | BigQuery) |
|
|
27
|
+
|----------|-----------|------------------------------------------|
|
|
28
|
+
| type | postgres | The active adapter being used. |
|
|
29
|
+
|----------|-----------|------------------------------------------|
|
|
30
|
+
| threads | 4 | The number of threads in use by dbt |
|
|
31
|
+
|----------|-----------|------------------------------------------|
|
|
32
|
+
|
|
33
|
+
Snowflake:
|
|
34
|
+
|
|
35
|
+
|----------|-----------|------------------------------------------|
|
|
36
|
+
| Variable | Example | Description |
|
|
37
|
+
|----------|-----------|------------------------------------------|
|
|
38
|
+
| database | RAW | The active target's database. |
|
|
39
|
+
|----------|-----------|------------------------------------------|
|
|
40
|
+
| warehouse| TRANSFORM | The active target's warehouse. |
|
|
41
|
+
|----------|-----------|------------------------------------------|
|
|
42
|
+
| user | USERNAME | The active target's user |
|
|
43
|
+
|----------|-----------|------------------------------------------|
|
|
44
|
+
| role | ROLENAME | The active target's role |
|
|
45
|
+
|----------|-----------|------------------------------------------|
|
|
46
|
+
| account | abc123 | The active target's account |
|
|
47
|
+
|----------|-----------|------------------------------------------|
|
|
48
|
+
|
|
49
|
+
Postgres/Redshift:
|
|
50
|
+
|
|
51
|
+
|----------|-------------------|----------------------------------|
|
|
52
|
+
| Variable | Example | Description |
|
|
53
|
+
|----------|-------------------|----------------------------------|
|
|
54
|
+
| dbname | analytics | The active target's database. |
|
|
55
|
+
|----------|-------------------|----------------------------------|
|
|
56
|
+
| host | abc123.us-west-2. | The active target's host. |
|
|
57
|
+
| | redshift.amazonaws| |
|
|
58
|
+
| | .com | |
|
|
59
|
+
|----------|-------------------|----------------------------------|
|
|
60
|
+
| user | dbt_user | The active target's user |
|
|
61
|
+
|----------|-------------------|----------------------------------|
|
|
62
|
+
| port | 5439 | The active target's port |
|
|
63
|
+
|----------|-------------------|----------------------------------|
|
|
64
|
+
|
|
65
|
+
BigQuery:
|
|
66
|
+
|
|
67
|
+
|----------|-----------|------------------------------------------|
|
|
68
|
+
| Variable | Example | Description |
|
|
69
|
+
|----------|-----------|------------------------------------------|
|
|
70
|
+
| project | abc-123 | The active target's project. |
|
|
71
|
+
|----------|-----------|------------------------------------------|
|
|
72
|
+
|
|
73
|
+
"""
|
|
74
|
+
return self.target_dict
|
|
File without changes
|
dvt/contracts/files.py
ADDED
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from dvt.artifacts.resources.base import FileHash
|
|
6
|
+
from dvt.constants import MAXIMUM_SEED_SIZE
|
|
7
|
+
from mashumaro.types import SerializableType
|
|
8
|
+
|
|
9
|
+
from dbt_common.dataclass_schema import StrEnum, dbtClassMixin
|
|
10
|
+
|
|
11
|
+
from .util import SourceKey
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ParseFileType(StrEnum):
|
|
15
|
+
Macro = "macro"
|
|
16
|
+
Model = "model"
|
|
17
|
+
Snapshot = "snapshot"
|
|
18
|
+
Analysis = "analysis"
|
|
19
|
+
SingularTest = "singular_test"
|
|
20
|
+
GenericTest = "generic_test"
|
|
21
|
+
Seed = "seed"
|
|
22
|
+
Documentation = "docs"
|
|
23
|
+
Schema = "schema"
|
|
24
|
+
Hook = "hook" # not a real filetype, from dbt_project.yml
|
|
25
|
+
Fixture = "fixture"
|
|
26
|
+
Function = "function"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
parse_file_type_to_parser = {
|
|
30
|
+
ParseFileType.Macro: "MacroParser",
|
|
31
|
+
ParseFileType.Model: "ModelParser",
|
|
32
|
+
ParseFileType.Snapshot: "SnapshotParser",
|
|
33
|
+
ParseFileType.Analysis: "AnalysisParser",
|
|
34
|
+
ParseFileType.SingularTest: "SingularTestParser",
|
|
35
|
+
ParseFileType.GenericTest: "GenericTestParser",
|
|
36
|
+
ParseFileType.Seed: "SeedParser",
|
|
37
|
+
ParseFileType.Documentation: "DocumentationParser",
|
|
38
|
+
ParseFileType.Schema: "SchemaParser",
|
|
39
|
+
ParseFileType.Hook: "HookParser",
|
|
40
|
+
ParseFileType.Fixture: "FixtureParser",
|
|
41
|
+
ParseFileType.Function: "FunctionParser",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class FilePath(dbtClassMixin):
|
|
47
|
+
searched_path: str
|
|
48
|
+
relative_path: str
|
|
49
|
+
modification_time: float
|
|
50
|
+
project_root: str
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def search_key(self) -> str:
|
|
54
|
+
# TODO: should this be project name + path relative to project root?
|
|
55
|
+
return self.absolute_path
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def full_path(self) -> str:
|
|
59
|
+
# useful for symlink preservation
|
|
60
|
+
return os.path.join(self.project_root, self.searched_path, self.relative_path)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def absolute_path(self) -> str:
|
|
64
|
+
return os.path.abspath(self.full_path)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def original_file_path(self) -> str:
|
|
68
|
+
return os.path.join(self.searched_path, self.relative_path)
|
|
69
|
+
|
|
70
|
+
def seed_too_large(self) -> bool:
|
|
71
|
+
"""Return whether the file this represents is over the seed size limit"""
|
|
72
|
+
return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class RemoteFile(dbtClassMixin):
|
|
77
|
+
def __init__(self, language) -> None:
|
|
78
|
+
if language == "sql":
|
|
79
|
+
self.path_end = ".sql"
|
|
80
|
+
elif language == "python":
|
|
81
|
+
self.path_end = ".py"
|
|
82
|
+
else:
|
|
83
|
+
raise RuntimeError(f"Invalid language for remote File {language}")
|
|
84
|
+
self.path = f"from remote system{self.path_end}"
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def searched_path(self) -> str:
|
|
88
|
+
return self.path
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def relative_path(self) -> str:
|
|
92
|
+
return self.path
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def absolute_path(self) -> str:
|
|
96
|
+
return self.path
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def original_file_path(self):
|
|
100
|
+
return self.path
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def modification_time(self):
|
|
104
|
+
return self.path
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass
|
|
108
|
+
class BaseSourceFile(dbtClassMixin, SerializableType):
|
|
109
|
+
"""Define a source file in dbt"""
|
|
110
|
+
|
|
111
|
+
path: Union[FilePath, RemoteFile] # the path information
|
|
112
|
+
checksum: FileHash
|
|
113
|
+
# Seems like knowing which project the file came from would be useful
|
|
114
|
+
project_name: Optional[str] = None
|
|
115
|
+
# Parse file type: i.e. which parser will process this file
|
|
116
|
+
parse_file_type: Optional[ParseFileType] = None
|
|
117
|
+
# we don't want to serialize this
|
|
118
|
+
contents: Optional[str] = None
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def file_id(self):
|
|
122
|
+
if isinstance(self.path, RemoteFile):
|
|
123
|
+
return None
|
|
124
|
+
return f"{self.project_name}://{self.path.original_file_path}"
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def original_file_path(self):
|
|
128
|
+
return self.path.original_file_path
|
|
129
|
+
|
|
130
|
+
def _serialize(self):
|
|
131
|
+
dct = self.to_dict()
|
|
132
|
+
return dct
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def _deserialize(cls, dct: Dict[str, int]):
|
|
136
|
+
if dct["parse_file_type"] == "schema":
|
|
137
|
+
sf = SchemaSourceFile.from_dict(dct)
|
|
138
|
+
elif dct["parse_file_type"] == "fixture":
|
|
139
|
+
sf = FixtureSourceFile.from_dict(dct)
|
|
140
|
+
else:
|
|
141
|
+
sf = SourceFile.from_dict(dct)
|
|
142
|
+
return sf
|
|
143
|
+
|
|
144
|
+
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
|
|
145
|
+
dct = super().__post_serialize__(dct, context)
|
|
146
|
+
# remove empty lists to save space
|
|
147
|
+
dct_keys = list(dct.keys())
|
|
148
|
+
for key in dct_keys:
|
|
149
|
+
if isinstance(dct[key], list) and not dct[key]:
|
|
150
|
+
del dct[key]
|
|
151
|
+
# remove contents. Schema files will still have 'dict_from_yaml'
|
|
152
|
+
# from the contents
|
|
153
|
+
if "contents" in dct:
|
|
154
|
+
del dct["contents"]
|
|
155
|
+
return dct
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class SourceFile(BaseSourceFile):
|
|
160
|
+
nodes: List[str] = field(default_factory=list)
|
|
161
|
+
docs: List[str] = field(default_factory=list)
|
|
162
|
+
macros: List[str] = field(default_factory=list)
|
|
163
|
+
env_vars: List[str] = field(default_factory=list)
|
|
164
|
+
|
|
165
|
+
@classmethod
|
|
166
|
+
def big_seed(cls, path: FilePath) -> "SourceFile":
|
|
167
|
+
"""Parse seeds over the size limit with just the path"""
|
|
168
|
+
self = cls(path=path, checksum=FileHash.path(path.original_file_path))
|
|
169
|
+
self.contents = ""
|
|
170
|
+
return self
|
|
171
|
+
|
|
172
|
+
def add_node(self, value):
|
|
173
|
+
if value not in self.nodes:
|
|
174
|
+
self.nodes.append(value)
|
|
175
|
+
|
|
176
|
+
# TODO: do this a different way. This remote file kludge isn't going
|
|
177
|
+
# to work long term
|
|
178
|
+
@classmethod
|
|
179
|
+
def remote(cls, contents: str, project_name: str, language: str) -> "SourceFile":
|
|
180
|
+
self = cls(
|
|
181
|
+
path=RemoteFile(language),
|
|
182
|
+
checksum=FileHash.from_contents(contents),
|
|
183
|
+
project_name=project_name,
|
|
184
|
+
contents=contents,
|
|
185
|
+
)
|
|
186
|
+
return self
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@dataclass
|
|
190
|
+
class SchemaSourceFile(BaseSourceFile):
|
|
191
|
+
dfy: Dict[str, Any] = field(default_factory=dict)
|
|
192
|
+
# these are in the manifest.nodes dictionary
|
|
193
|
+
data_tests: Dict[str, Any] = field(default_factory=dict)
|
|
194
|
+
sources: List[str] = field(default_factory=list)
|
|
195
|
+
exposures: List[str] = field(default_factory=list)
|
|
196
|
+
functions: List[str] = field(default_factory=list)
|
|
197
|
+
metrics: List[str] = field(default_factory=list)
|
|
198
|
+
snapshots: List[str] = field(default_factory=list)
|
|
199
|
+
# The following field will no longer be used. Leaving
|
|
200
|
+
# here to avoid breaking existing projects. To be removed
|
|
201
|
+
# later if possible.
|
|
202
|
+
generated_metrics: List[str] = field(default_factory=list)
|
|
203
|
+
# metrics generated from semantic_model measures. The key is
|
|
204
|
+
# the name of the semantic_model, so that we can find it later.
|
|
205
|
+
metrics_from_measures: Dict[str, Any] = field(default_factory=dict)
|
|
206
|
+
groups: List[str] = field(default_factory=list)
|
|
207
|
+
# node patches contain models, seeds, snapshots, analyses
|
|
208
|
+
ndp: List[str] = field(default_factory=list)
|
|
209
|
+
semantic_models: List[str] = field(default_factory=list)
|
|
210
|
+
unit_tests: List[str] = field(default_factory=list)
|
|
211
|
+
saved_queries: List[str] = field(default_factory=list)
|
|
212
|
+
# any macro patches in this file by macro unique_id.
|
|
213
|
+
mcp: Dict[str, str] = field(default_factory=dict)
|
|
214
|
+
# any source patches in this file. The entries are package, name pairs
|
|
215
|
+
# Patches are only against external sources. Sources can be
|
|
216
|
+
# created too, but those are in 'sources'
|
|
217
|
+
sop: List[SourceKey] = field(default_factory=list)
|
|
218
|
+
env_vars: Dict[str, Any] = field(default_factory=dict)
|
|
219
|
+
unrendered_configs: Dict[str, Any] = field(default_factory=dict)
|
|
220
|
+
unrendered_databases: Dict[str, Any] = field(default_factory=dict)
|
|
221
|
+
unrendered_schemas: Dict[str, Any] = field(default_factory=dict)
|
|
222
|
+
pp_dict: Optional[Dict[str, Any]] = None
|
|
223
|
+
pp_test_index: Optional[Dict[str, Any]] = None
|
|
224
|
+
|
|
225
|
+
@property
|
|
226
|
+
def dict_from_yaml(self):
|
|
227
|
+
return self.dfy
|
|
228
|
+
|
|
229
|
+
@property
|
|
230
|
+
def node_patches(self):
|
|
231
|
+
return self.ndp
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def macro_patches(self):
|
|
235
|
+
return self.mcp
|
|
236
|
+
|
|
237
|
+
@property
|
|
238
|
+
def source_patches(self):
|
|
239
|
+
return self.sop
|
|
240
|
+
|
|
241
|
+
def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
|
|
242
|
+
dct = super().__post_serialize__(dct, context)
|
|
243
|
+
# Remove partial parsing specific data
|
|
244
|
+
for key in ("pp_test_index", "pp_dict"):
|
|
245
|
+
if key in dct:
|
|
246
|
+
del dct[key]
|
|
247
|
+
return dct
|
|
248
|
+
|
|
249
|
+
def append_patch(self, yaml_key, unique_id):
|
|
250
|
+
self.node_patches.append(unique_id)
|
|
251
|
+
|
|
252
|
+
def add_test(self, node_unique_id, test_from):
|
|
253
|
+
name = test_from["name"]
|
|
254
|
+
key = test_from["key"]
|
|
255
|
+
if key not in self.data_tests:
|
|
256
|
+
self.data_tests[key] = {}
|
|
257
|
+
if name not in self.data_tests[key]:
|
|
258
|
+
self.data_tests[key][name] = []
|
|
259
|
+
self.data_tests[key][name].append(node_unique_id)
|
|
260
|
+
|
|
261
|
+
# this is only used in tests/unit
|
|
262
|
+
def remove_tests(self, yaml_key, name):
|
|
263
|
+
if yaml_key in self.data_tests:
|
|
264
|
+
if name in self.data_tests[yaml_key]:
|
|
265
|
+
del self.data_tests[yaml_key][name]
|
|
266
|
+
|
|
267
|
+
# this is only used in the tests directory (unit + functional)
|
|
268
|
+
def get_tests(self, yaml_key, name):
|
|
269
|
+
if yaml_key in self.data_tests:
|
|
270
|
+
if name in self.data_tests[yaml_key]:
|
|
271
|
+
return self.data_tests[yaml_key][name]
|
|
272
|
+
return []
|
|
273
|
+
|
|
274
|
+
def add_metrics_from_measures(self, semantic_model_name: str, metric_unique_id: str):
|
|
275
|
+
if self.generated_metrics:
|
|
276
|
+
# Probably not needed, but for safety sake, convert the
|
|
277
|
+
# old generated_metrics to metrics_from_measures.
|
|
278
|
+
self.fix_metrics_from_measures()
|
|
279
|
+
if semantic_model_name not in self.metrics_from_measures:
|
|
280
|
+
self.metrics_from_measures[semantic_model_name] = []
|
|
281
|
+
self.metrics_from_measures[semantic_model_name].append(metric_unique_id)
|
|
282
|
+
|
|
283
|
+
def fix_metrics_from_measures(self):
|
|
284
|
+
# Temporary method to fix up existing projects with a partial parse file.
|
|
285
|
+
# This should only be called if SchemaSourceFile in a msgpack
|
|
286
|
+
# pack manifest has an existing "generated_metrics" list, to turn it
|
|
287
|
+
# it into a "metrics_from_measures" dictionary, so that we can
|
|
288
|
+
# correctly partially parse.
|
|
289
|
+
# This code can be removed when "generated_metrics" is removed.
|
|
290
|
+
generated_metrics = self.generated_metrics
|
|
291
|
+
self.generated_metrics = [] # Should never be needed again
|
|
292
|
+
# For each metric_unique_id we loop through the semantic models
|
|
293
|
+
# looking for the name of the "measure" which generated the metric.
|
|
294
|
+
# When it's found, add it to "metrics_from_measures", with a key
|
|
295
|
+
# of the semantic_model name, and a list of metrics.
|
|
296
|
+
for metric_unique_id in generated_metrics:
|
|
297
|
+
parts = metric_unique_id.split(".")
|
|
298
|
+
# get the metric_name
|
|
299
|
+
metric_name = parts[-1]
|
|
300
|
+
if "semantic_models" in self.dict_from_yaml:
|
|
301
|
+
for sem_model in self.dict_from_yaml["semantic_models"]:
|
|
302
|
+
if "measures" in sem_model:
|
|
303
|
+
for measure in sem_model["measures"]:
|
|
304
|
+
if measure["name"] == metric_name:
|
|
305
|
+
self.add_metrics_from_measures(sem_model["name"], metric_unique_id)
|
|
306
|
+
break
|
|
307
|
+
|
|
308
|
+
def get_key_and_name_for_test(self, test_unique_id):
|
|
309
|
+
yaml_key = None
|
|
310
|
+
block_name = None
|
|
311
|
+
for key in self.data_tests.keys():
|
|
312
|
+
for name in self.data_tests[key]:
|
|
313
|
+
for unique_id in self.data_tests[key][name]:
|
|
314
|
+
if unique_id == test_unique_id:
|
|
315
|
+
yaml_key = key
|
|
316
|
+
block_name = name
|
|
317
|
+
break
|
|
318
|
+
return (yaml_key, block_name)
|
|
319
|
+
|
|
320
|
+
def get_all_test_ids(self):
|
|
321
|
+
test_ids = []
|
|
322
|
+
for key in self.data_tests.keys():
|
|
323
|
+
for name in self.data_tests[key]:
|
|
324
|
+
test_ids.extend(self.data_tests[key][name])
|
|
325
|
+
return test_ids
|
|
326
|
+
|
|
327
|
+
def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None):
|
|
328
|
+
versioned_name = f"{name}_v{version}" if version is not None else name
|
|
329
|
+
|
|
330
|
+
if yaml_key not in self.unrendered_configs:
|
|
331
|
+
self.unrendered_configs[yaml_key] = {}
|
|
332
|
+
|
|
333
|
+
if versioned_name not in self.unrendered_configs[yaml_key]:
|
|
334
|
+
self.unrendered_configs[yaml_key][versioned_name] = unrendered_config
|
|
335
|
+
|
|
336
|
+
def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]:
|
|
337
|
+
versioned_name = f"{name}_v{version}" if version is not None else name
|
|
338
|
+
|
|
339
|
+
if yaml_key not in self.unrendered_configs:
|
|
340
|
+
return None
|
|
341
|
+
if versioned_name not in self.unrendered_configs[yaml_key]:
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
return self.unrendered_configs[yaml_key][versioned_name]
|
|
345
|
+
|
|
346
|
+
def delete_from_unrendered_configs(self, yaml_key, name):
|
|
347
|
+
# We delete all unrendered_configs for this yaml_key/name because the
|
|
348
|
+
# entry has been scheduled for reparsing.
|
|
349
|
+
if self.get_unrendered_config(yaml_key, name):
|
|
350
|
+
del self.unrendered_configs[yaml_key][name]
|
|
351
|
+
# Delete all versioned keys associated with name
|
|
352
|
+
version_names_to_delete = []
|
|
353
|
+
for potential_version_name in self.unrendered_configs[yaml_key]:
|
|
354
|
+
if potential_version_name.startswith(f"{name}_v"):
|
|
355
|
+
version_names_to_delete.append(potential_version_name)
|
|
356
|
+
for version_name in version_names_to_delete:
|
|
357
|
+
del self.unrendered_configs[yaml_key][version_name]
|
|
358
|
+
|
|
359
|
+
if not self.unrendered_configs[yaml_key]:
|
|
360
|
+
del self.unrendered_configs[yaml_key]
|
|
361
|
+
|
|
362
|
+
def add_env_var(self, var, yaml_key, name):
|
|
363
|
+
if yaml_key not in self.env_vars:
|
|
364
|
+
self.env_vars[yaml_key] = {}
|
|
365
|
+
if name not in self.env_vars[yaml_key]:
|
|
366
|
+
self.env_vars[yaml_key][name] = []
|
|
367
|
+
if var not in self.env_vars[yaml_key][name]:
|
|
368
|
+
self.env_vars[yaml_key][name].append(var)
|
|
369
|
+
|
|
370
|
+
def delete_from_env_vars(self, yaml_key, name):
|
|
371
|
+
# We delete all vars for this yaml_key/name because the
|
|
372
|
+
# entry has been scheduled for reparsing.
|
|
373
|
+
if yaml_key in self.env_vars and name in self.env_vars[yaml_key]:
|
|
374
|
+
del self.env_vars[yaml_key][name]
|
|
375
|
+
if not self.env_vars[yaml_key]:
|
|
376
|
+
del self.env_vars[yaml_key]
|
|
377
|
+
|
|
378
|
+
def add_unrendered_database(self, yaml_key: str, name: str, unrendered_database: str) -> None:
|
|
379
|
+
if yaml_key not in self.unrendered_databases:
|
|
380
|
+
self.unrendered_databases[yaml_key] = {}
|
|
381
|
+
|
|
382
|
+
self.unrendered_databases[yaml_key][name] = unrendered_database
|
|
383
|
+
|
|
384
|
+
def get_unrendered_database(self, yaml_key: str, name: str) -> Optional[str]:
|
|
385
|
+
if yaml_key not in self.unrendered_databases:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
return self.unrendered_databases[yaml_key].get(name)
|
|
389
|
+
|
|
390
|
+
def add_unrendered_schema(self, yaml_key: str, name: str, unrendered_schema: str) -> None:
|
|
391
|
+
if yaml_key not in self.unrendered_schemas:
|
|
392
|
+
self.unrendered_schemas[yaml_key] = {}
|
|
393
|
+
|
|
394
|
+
self.unrendered_schemas[yaml_key][name] = unrendered_schema
|
|
395
|
+
|
|
396
|
+
def get_unrendered_schema(self, yaml_key: str, name: str) -> Optional[str]:
|
|
397
|
+
if yaml_key not in self.unrendered_schemas:
|
|
398
|
+
return None
|
|
399
|
+
|
|
400
|
+
return self.unrendered_schemas[yaml_key].get(name)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
@dataclass
|
|
404
|
+
class FixtureSourceFile(BaseSourceFile):
|
|
405
|
+
fixture: Optional[str] = None
|
|
406
|
+
unit_tests: List[str] = field(default_factory=list)
|
|
407
|
+
|
|
408
|
+
def add_unit_test(self, value):
|
|
409
|
+
if value not in self.unit_tests:
|
|
410
|
+
self.unit_tests.append(value)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
AnySourceFile = Union[SchemaSourceFile, SourceFile, FixtureSourceFile]
|
|
File without changes
|