dvt-core 1.11.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dvt-core might be problematic. Click here for more details.
- dvt/__init__.py +7 -0
- dvt/_pydantic_shim.py +26 -0
- dvt/adapters/__init__.py +16 -0
- dvt/adapters/multi_adapter_manager.py +268 -0
- dvt/artifacts/__init__.py +0 -0
- dvt/artifacts/exceptions/__init__.py +1 -0
- dvt/artifacts/exceptions/schemas.py +31 -0
- dvt/artifacts/resources/__init__.py +116 -0
- dvt/artifacts/resources/base.py +68 -0
- dvt/artifacts/resources/types.py +93 -0
- dvt/artifacts/resources/v1/analysis.py +10 -0
- dvt/artifacts/resources/v1/catalog.py +23 -0
- dvt/artifacts/resources/v1/components.py +275 -0
- dvt/artifacts/resources/v1/config.py +282 -0
- dvt/artifacts/resources/v1/documentation.py +11 -0
- dvt/artifacts/resources/v1/exposure.py +52 -0
- dvt/artifacts/resources/v1/function.py +53 -0
- dvt/artifacts/resources/v1/generic_test.py +32 -0
- dvt/artifacts/resources/v1/group.py +22 -0
- dvt/artifacts/resources/v1/hook.py +11 -0
- dvt/artifacts/resources/v1/macro.py +30 -0
- dvt/artifacts/resources/v1/metric.py +173 -0
- dvt/artifacts/resources/v1/model.py +146 -0
- dvt/artifacts/resources/v1/owner.py +10 -0
- dvt/artifacts/resources/v1/saved_query.py +112 -0
- dvt/artifacts/resources/v1/seed.py +42 -0
- dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dvt/artifacts/resources/v1/semantic_model.py +315 -0
- dvt/artifacts/resources/v1/singular_test.py +14 -0
- dvt/artifacts/resources/v1/snapshot.py +92 -0
- dvt/artifacts/resources/v1/source_definition.py +85 -0
- dvt/artifacts/resources/v1/sql_operation.py +10 -0
- dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
- dvt/artifacts/schemas/__init__.py +0 -0
- dvt/artifacts/schemas/base.py +191 -0
- dvt/artifacts/schemas/batch_results.py +24 -0
- dvt/artifacts/schemas/catalog/__init__.py +12 -0
- dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
- dvt/artifacts/schemas/freshness/__init__.py +1 -0
- dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
- dvt/artifacts/schemas/manifest/__init__.py +2 -0
- dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
- dvt/artifacts/schemas/results.py +148 -0
- dvt/artifacts/schemas/run/__init__.py +2 -0
- dvt/artifacts/schemas/run/v5/__init__.py +0 -0
- dvt/artifacts/schemas/run/v5/run.py +184 -0
- dvt/artifacts/schemas/upgrades/__init__.py +4 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dvt/artifacts/utils/validation.py +153 -0
- dvt/cli/__init__.py +1 -0
- dvt/cli/context.py +16 -0
- dvt/cli/exceptions.py +56 -0
- dvt/cli/flags.py +558 -0
- dvt/cli/main.py +971 -0
- dvt/cli/option_types.py +121 -0
- dvt/cli/options.py +79 -0
- dvt/cli/params.py +803 -0
- dvt/cli/requires.py +478 -0
- dvt/cli/resolvers.py +32 -0
- dvt/cli/types.py +40 -0
- dvt/clients/__init__.py +0 -0
- dvt/clients/checked_load.py +82 -0
- dvt/clients/git.py +164 -0
- dvt/clients/jinja.py +206 -0
- dvt/clients/jinja_static.py +245 -0
- dvt/clients/registry.py +192 -0
- dvt/clients/yaml_helper.py +68 -0
- dvt/compilation.py +833 -0
- dvt/compute/__init__.py +26 -0
- dvt/compute/base.py +288 -0
- dvt/compute/engines/__init__.py +13 -0
- dvt/compute/engines/duckdb_engine.py +368 -0
- dvt/compute/engines/spark_engine.py +273 -0
- dvt/compute/query_analyzer.py +212 -0
- dvt/compute/router.py +483 -0
- dvt/config/__init__.py +4 -0
- dvt/config/catalogs.py +95 -0
- dvt/config/compute_config.py +406 -0
- dvt/config/profile.py +411 -0
- dvt/config/profiles_v2.py +464 -0
- dvt/config/project.py +893 -0
- dvt/config/renderer.py +232 -0
- dvt/config/runtime.py +491 -0
- dvt/config/selectors.py +209 -0
- dvt/config/utils.py +78 -0
- dvt/connectors/.gitignore +6 -0
- dvt/connectors/README.md +306 -0
- dvt/connectors/catalog.yml +217 -0
- dvt/connectors/download_connectors.py +300 -0
- dvt/constants.py +29 -0
- dvt/context/__init__.py +0 -0
- dvt/context/base.py +746 -0
- dvt/context/configured.py +136 -0
- dvt/context/context_config.py +350 -0
- dvt/context/docs.py +82 -0
- dvt/context/exceptions_jinja.py +179 -0
- dvt/context/macro_resolver.py +195 -0
- dvt/context/macros.py +171 -0
- dvt/context/manifest.py +73 -0
- dvt/context/providers.py +2198 -0
- dvt/context/query_header.py +14 -0
- dvt/context/secret.py +59 -0
- dvt/context/target.py +74 -0
- dvt/contracts/__init__.py +0 -0
- dvt/contracts/files.py +413 -0
- dvt/contracts/graph/__init__.py +0 -0
- dvt/contracts/graph/manifest.py +1904 -0
- dvt/contracts/graph/metrics.py +98 -0
- dvt/contracts/graph/model_config.py +71 -0
- dvt/contracts/graph/node_args.py +42 -0
- dvt/contracts/graph/nodes.py +1806 -0
- dvt/contracts/graph/semantic_manifest.py +233 -0
- dvt/contracts/graph/unparsed.py +812 -0
- dvt/contracts/project.py +417 -0
- dvt/contracts/results.py +53 -0
- dvt/contracts/selection.py +23 -0
- dvt/contracts/sql.py +86 -0
- dvt/contracts/state.py +69 -0
- dvt/contracts/util.py +46 -0
- dvt/deprecations.py +347 -0
- dvt/deps/__init__.py +0 -0
- dvt/deps/base.py +153 -0
- dvt/deps/git.py +196 -0
- dvt/deps/local.py +80 -0
- dvt/deps/registry.py +131 -0
- dvt/deps/resolver.py +149 -0
- dvt/deps/tarball.py +121 -0
- dvt/docs/source/_ext/dbt_click.py +118 -0
- dvt/docs/source/conf.py +32 -0
- dvt/env_vars.py +64 -0
- dvt/event_time/event_time.py +40 -0
- dvt/event_time/sample_window.py +60 -0
- dvt/events/__init__.py +16 -0
- dvt/events/base_types.py +37 -0
- dvt/events/core_types_pb2.py +2 -0
- dvt/events/logging.py +109 -0
- dvt/events/types.py +2534 -0
- dvt/exceptions.py +1487 -0
- dvt/flags.py +89 -0
- dvt/graph/__init__.py +11 -0
- dvt/graph/cli.py +248 -0
- dvt/graph/graph.py +172 -0
- dvt/graph/queue.py +213 -0
- dvt/graph/selector.py +375 -0
- dvt/graph/selector_methods.py +976 -0
- dvt/graph/selector_spec.py +223 -0
- dvt/graph/thread_pool.py +18 -0
- dvt/hooks.py +21 -0
- dvt/include/README.md +49 -0
- dvt/include/__init__.py +3 -0
- dvt/include/global_project.py +4 -0
- dvt/include/starter_project/.gitignore +4 -0
- dvt/include/starter_project/README.md +15 -0
- dvt/include/starter_project/__init__.py +3 -0
- dvt/include/starter_project/analyses/.gitkeep +0 -0
- dvt/include/starter_project/dvt_project.yml +36 -0
- dvt/include/starter_project/macros/.gitkeep +0 -0
- dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dvt/include/starter_project/models/example/schema.yml +21 -0
- dvt/include/starter_project/seeds/.gitkeep +0 -0
- dvt/include/starter_project/snapshots/.gitkeep +0 -0
- dvt/include/starter_project/tests/.gitkeep +0 -0
- dvt/internal_deprecations.py +27 -0
- dvt/jsonschemas/__init__.py +3 -0
- dvt/jsonschemas/jsonschemas.py +309 -0
- dvt/jsonschemas/project/0.0.110.json +4717 -0
- dvt/jsonschemas/project/0.0.85.json +2015 -0
- dvt/jsonschemas/resources/0.0.110.json +2636 -0
- dvt/jsonschemas/resources/0.0.85.json +2536 -0
- dvt/jsonschemas/resources/latest.json +6773 -0
- dvt/links.py +4 -0
- dvt/materializations/__init__.py +0 -0
- dvt/materializations/incremental/__init__.py +0 -0
- dvt/materializations/incremental/microbatch.py +235 -0
- dvt/mp_context.py +8 -0
- dvt/node_types.py +37 -0
- dvt/parser/__init__.py +23 -0
- dvt/parser/analysis.py +21 -0
- dvt/parser/base.py +549 -0
- dvt/parser/common.py +267 -0
- dvt/parser/docs.py +52 -0
- dvt/parser/fixtures.py +51 -0
- dvt/parser/functions.py +30 -0
- dvt/parser/generic_test.py +100 -0
- dvt/parser/generic_test_builders.py +334 -0
- dvt/parser/hooks.py +119 -0
- dvt/parser/macros.py +137 -0
- dvt/parser/manifest.py +2204 -0
- dvt/parser/models.py +574 -0
- dvt/parser/partial.py +1179 -0
- dvt/parser/read_files.py +445 -0
- dvt/parser/schema_generic_tests.py +423 -0
- dvt/parser/schema_renderer.py +111 -0
- dvt/parser/schema_yaml_readers.py +936 -0
- dvt/parser/schemas.py +1467 -0
- dvt/parser/search.py +149 -0
- dvt/parser/seeds.py +28 -0
- dvt/parser/singular_test.py +20 -0
- dvt/parser/snapshots.py +44 -0
- dvt/parser/sources.py +557 -0
- dvt/parser/sql.py +63 -0
- dvt/parser/unit_tests.py +622 -0
- dvt/plugins/__init__.py +20 -0
- dvt/plugins/contracts.py +10 -0
- dvt/plugins/exceptions.py +2 -0
- dvt/plugins/manager.py +164 -0
- dvt/plugins/manifest.py +21 -0
- dvt/profiler.py +20 -0
- dvt/py.typed +1 -0
- dvt/runners/__init__.py +2 -0
- dvt/runners/exposure_runner.py +7 -0
- dvt/runners/no_op_runner.py +46 -0
- dvt/runners/saved_query_runner.py +7 -0
- dvt/selected_resources.py +8 -0
- dvt/task/__init__.py +0 -0
- dvt/task/base.py +504 -0
- dvt/task/build.py +197 -0
- dvt/task/clean.py +57 -0
- dvt/task/clone.py +162 -0
- dvt/task/compile.py +151 -0
- dvt/task/compute.py +366 -0
- dvt/task/debug.py +650 -0
- dvt/task/deps.py +280 -0
- dvt/task/docs/__init__.py +3 -0
- dvt/task/docs/generate.py +408 -0
- dvt/task/docs/index.html +250 -0
- dvt/task/docs/serve.py +28 -0
- dvt/task/freshness.py +323 -0
- dvt/task/function.py +122 -0
- dvt/task/group_lookup.py +46 -0
- dvt/task/init.py +374 -0
- dvt/task/list.py +237 -0
- dvt/task/printer.py +176 -0
- dvt/task/profiles.py +256 -0
- dvt/task/retry.py +175 -0
- dvt/task/run.py +1146 -0
- dvt/task/run_operation.py +142 -0
- dvt/task/runnable.py +802 -0
- dvt/task/seed.py +104 -0
- dvt/task/show.py +150 -0
- dvt/task/snapshot.py +57 -0
- dvt/task/sql.py +111 -0
- dvt/task/test.py +464 -0
- dvt/tests/fixtures/__init__.py +1 -0
- dvt/tests/fixtures/project.py +620 -0
- dvt/tests/util.py +651 -0
- dvt/tracking.py +529 -0
- dvt/utils/__init__.py +3 -0
- dvt/utils/artifact_upload.py +151 -0
- dvt/utils/utils.py +408 -0
- dvt/version.py +249 -0
- dvt_core-1.11.0b4.dist-info/METADATA +252 -0
- dvt_core-1.11.0b4.dist-info/RECORD +261 -0
- dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
- dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
- dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/parser/read_files.py
ADDED
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Dict, List, Mapping, MutableMapping, Optional, Protocol
|
|
5
|
+
|
|
6
|
+
import pathspec # type: ignore
|
|
7
|
+
from dvt.config import Project
|
|
8
|
+
from dvt.contracts.files import (
|
|
9
|
+
AnySourceFile,
|
|
10
|
+
FileHash,
|
|
11
|
+
FilePath,
|
|
12
|
+
FixtureSourceFile,
|
|
13
|
+
ParseFileType,
|
|
14
|
+
SchemaSourceFile,
|
|
15
|
+
SourceFile,
|
|
16
|
+
)
|
|
17
|
+
from dvt.events.types import InputFileDiffError
|
|
18
|
+
from dvt.exceptions import ParsingError
|
|
19
|
+
from dvt.parser.common import schema_file_keys
|
|
20
|
+
from dvt.parser.schemas import yaml_from_file
|
|
21
|
+
from dvt.parser.search import filesystem_search
|
|
22
|
+
|
|
23
|
+
from dbt_common.clients.system import load_file_contents
|
|
24
|
+
from dbt_common.dataclass_schema import dbtClassMixin
|
|
25
|
+
from dbt_common.events.functions import fire_event
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class InputFile(dbtClassMixin):
|
|
30
|
+
path: str
|
|
31
|
+
content: str
|
|
32
|
+
modification_time: float = 0.0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class FileDiff(dbtClassMixin):
|
|
37
|
+
deleted: List[str]
|
|
38
|
+
# Note: it would be possible to not distinguish between
|
|
39
|
+
# added and changed files, but we would lose some error handling.
|
|
40
|
+
changed: List[InputFile]
|
|
41
|
+
added: List[InputFile]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# This loads the files contents and creates the SourceFile object
|
|
45
|
+
def load_source_file(
|
|
46
|
+
path: FilePath,
|
|
47
|
+
parse_file_type: ParseFileType,
|
|
48
|
+
project_name: str,
|
|
49
|
+
saved_files,
|
|
50
|
+
) -> Optional[AnySourceFile]:
|
|
51
|
+
|
|
52
|
+
if parse_file_type == ParseFileType.Schema:
|
|
53
|
+
sf_cls = SchemaSourceFile
|
|
54
|
+
elif parse_file_type == ParseFileType.Fixture:
|
|
55
|
+
sf_cls = FixtureSourceFile # type:ignore[assignment]
|
|
56
|
+
else:
|
|
57
|
+
sf_cls = SourceFile # type:ignore[assignment]
|
|
58
|
+
|
|
59
|
+
source_file = sf_cls(
|
|
60
|
+
path=path,
|
|
61
|
+
checksum=FileHash.empty(),
|
|
62
|
+
parse_file_type=parse_file_type,
|
|
63
|
+
project_name=project_name,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
skip_loading_schema_file = False
|
|
67
|
+
if (
|
|
68
|
+
parse_file_type == ParseFileType.Schema
|
|
69
|
+
and saved_files
|
|
70
|
+
and source_file.file_id in saved_files
|
|
71
|
+
):
|
|
72
|
+
old_source_file = saved_files[source_file.file_id]
|
|
73
|
+
if (
|
|
74
|
+
source_file.path.modification_time != 0.0
|
|
75
|
+
and old_source_file.path.modification_time == source_file.path.modification_time
|
|
76
|
+
):
|
|
77
|
+
source_file.checksum = old_source_file.checksum
|
|
78
|
+
source_file.dfy = old_source_file.dfy
|
|
79
|
+
skip_loading_schema_file = True
|
|
80
|
+
|
|
81
|
+
if not skip_loading_schema_file:
|
|
82
|
+
# We strip the file_contents before generating the checksum because we want
|
|
83
|
+
# the checksum to match the stored file contents
|
|
84
|
+
file_contents = load_file_contents(path.absolute_path, strip=True)
|
|
85
|
+
source_file.contents = file_contents
|
|
86
|
+
source_file.checksum = FileHash.from_contents(file_contents)
|
|
87
|
+
|
|
88
|
+
if parse_file_type == ParseFileType.Schema and source_file.contents:
|
|
89
|
+
dfy = yaml_from_file(source_file=source_file, validate=True)
|
|
90
|
+
if dfy:
|
|
91
|
+
validate_yaml(source_file.path.original_file_path, dfy)
|
|
92
|
+
source_file.dfy = dfy
|
|
93
|
+
return source_file
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# Do some minimal validation of the yaml in a schema file.
|
|
97
|
+
# Check version, that key values are lists and that each element in
|
|
98
|
+
# the lists has a 'name' key
|
|
99
|
+
def validate_yaml(file_path, dct):
|
|
100
|
+
for key in schema_file_keys:
|
|
101
|
+
if key in dct:
|
|
102
|
+
if not isinstance(dct[key], list):
|
|
103
|
+
msg = (
|
|
104
|
+
f"The schema file at {file_path} is "
|
|
105
|
+
f"invalid because the value of '{key}' is not a list"
|
|
106
|
+
)
|
|
107
|
+
raise ParsingError(msg)
|
|
108
|
+
for element in dct[key]:
|
|
109
|
+
if not isinstance(element, dict):
|
|
110
|
+
msg = (
|
|
111
|
+
f"The schema file at {file_path} is "
|
|
112
|
+
f"invalid because a list element for '{key}' is not a dictionary"
|
|
113
|
+
)
|
|
114
|
+
raise ParsingError(msg)
|
|
115
|
+
if "name" not in element:
|
|
116
|
+
msg = (
|
|
117
|
+
f"The schema file at {file_path} is "
|
|
118
|
+
f"invalid because a list element for '{key}' does not have a "
|
|
119
|
+
"name attribute."
|
|
120
|
+
)
|
|
121
|
+
raise ParsingError(msg)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# Special processing for big seed files
|
|
125
|
+
def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
|
|
126
|
+
if match.seed_too_large():
|
|
127
|
+
# We don't want to calculate a hash of this file. Use the path.
|
|
128
|
+
source_file = SourceFile.big_seed(match)
|
|
129
|
+
else:
|
|
130
|
+
file_contents = load_file_contents(match.absolute_path, strip=True)
|
|
131
|
+
checksum = FileHash.from_contents(file_contents)
|
|
132
|
+
source_file = SourceFile(path=match, checksum=checksum)
|
|
133
|
+
source_file.contents = ""
|
|
134
|
+
source_file.parse_file_type = ParseFileType.Seed
|
|
135
|
+
source_file.project_name = project_name
|
|
136
|
+
return source_file
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# Use the FilesystemSearcher to get a bunch of FilePaths, then turn
|
|
140
|
+
# them into a bunch of FileSource objects
|
|
141
|
+
def get_source_files(project, paths, extension, parse_file_type, saved_files, ignore_spec):
|
|
142
|
+
# file path list
|
|
143
|
+
fp_list = filesystem_search(project, paths, extension, ignore_spec)
|
|
144
|
+
# file block list
|
|
145
|
+
fb_list = []
|
|
146
|
+
for fp in fp_list:
|
|
147
|
+
if parse_file_type == ParseFileType.Seed:
|
|
148
|
+
fb_list.append(load_seed_source_file(fp, project.project_name))
|
|
149
|
+
# singular tests live in /tests but only generic tests live
|
|
150
|
+
# in /tests/generic and fixtures in /tests/fixture so we want to skip those
|
|
151
|
+
else:
|
|
152
|
+
if parse_file_type == ParseFileType.SingularTest:
|
|
153
|
+
path = pathlib.Path(fp.relative_path)
|
|
154
|
+
if path.parts[0] in ["generic", "fixtures"]:
|
|
155
|
+
continue
|
|
156
|
+
file = load_source_file(fp, parse_file_type, project.project_name, saved_files)
|
|
157
|
+
# only append the list if it has contents. added to fix #3568
|
|
158
|
+
if file:
|
|
159
|
+
fb_list.append(file)
|
|
160
|
+
return fb_list
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def read_files_for_parser(project, files, parse_ft, file_type_info, saved_files, ignore_spec):
|
|
164
|
+
dirs = file_type_info["paths"]
|
|
165
|
+
parser_files = []
|
|
166
|
+
for extension in file_type_info["extensions"]:
|
|
167
|
+
source_files = get_source_files(
|
|
168
|
+
project, dirs, extension, parse_ft, saved_files, ignore_spec
|
|
169
|
+
)
|
|
170
|
+
for sf in source_files:
|
|
171
|
+
files[sf.file_id] = sf
|
|
172
|
+
parser_files.append(sf.file_id)
|
|
173
|
+
return parser_files
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def generate_dbt_ignore_spec(project_root):
|
|
177
|
+
ignore_file_path = os.path.join(project_root, ".dbtignore")
|
|
178
|
+
|
|
179
|
+
ignore_spec = None
|
|
180
|
+
if os.path.exists(ignore_file_path):
|
|
181
|
+
with open(ignore_file_path) as f:
|
|
182
|
+
ignore_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, f)
|
|
183
|
+
return ignore_spec
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
# Protocol for the ReadFiles... classes
|
|
187
|
+
class ReadFiles(Protocol):
|
|
188
|
+
files: MutableMapping[str, AnySourceFile]
|
|
189
|
+
project_parser_files: Dict
|
|
190
|
+
|
|
191
|
+
def read_files(self):
|
|
192
|
+
pass
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@dataclass
|
|
196
|
+
class ReadFilesFromFileSystem:
|
|
197
|
+
all_projects: Mapping[str, Project]
|
|
198
|
+
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
|
|
199
|
+
# saved_files is only used to compare schema files
|
|
200
|
+
saved_files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
|
|
201
|
+
# project_parser_files = {
|
|
202
|
+
# "my_project": {
|
|
203
|
+
# "ModelParser": ["my_project://models/my_model.sql"]
|
|
204
|
+
# }
|
|
205
|
+
# }
|
|
206
|
+
#
|
|
207
|
+
project_parser_files: Dict = field(default_factory=dict)
|
|
208
|
+
|
|
209
|
+
def read_files(self):
|
|
210
|
+
for project in self.all_projects.values():
|
|
211
|
+
file_types = get_file_types_for_project(project)
|
|
212
|
+
self.read_files_for_project(project, file_types)
|
|
213
|
+
|
|
214
|
+
def read_files_for_project(self, project, file_types):
|
|
215
|
+
dbt_ignore_spec = generate_dbt_ignore_spec(project.project_root)
|
|
216
|
+
project_files = self.project_parser_files[project.project_name] = {}
|
|
217
|
+
|
|
218
|
+
for parse_ft, file_type_info in file_types.items():
|
|
219
|
+
project_files[file_type_info["parser"]] = read_files_for_parser(
|
|
220
|
+
project,
|
|
221
|
+
self.files,
|
|
222
|
+
parse_ft,
|
|
223
|
+
file_type_info,
|
|
224
|
+
self.saved_files,
|
|
225
|
+
dbt_ignore_spec,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
@dataclass
|
|
230
|
+
class ReadFilesFromDiff:
|
|
231
|
+
root_project_name: str
|
|
232
|
+
all_projects: Mapping[str, Project]
|
|
233
|
+
file_diff: FileDiff
|
|
234
|
+
files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
|
|
235
|
+
# saved_files is used to construct a fresh copy of files, without
|
|
236
|
+
# additional information from parsing
|
|
237
|
+
saved_files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
|
|
238
|
+
project_parser_files: Dict = field(default_factory=dict)
|
|
239
|
+
project_file_types: Dict = field(default_factory=dict)
|
|
240
|
+
local_package_dirs: Optional[List[str]] = None
|
|
241
|
+
|
|
242
|
+
def read_files(self):
|
|
243
|
+
# Copy the base file information from the existing manifest.
|
|
244
|
+
# We will do deletions, adds, changes from the file_diff to emulate
|
|
245
|
+
# a complete read of the project file system.
|
|
246
|
+
for file_id, source_file in self.saved_files.items():
|
|
247
|
+
if isinstance(source_file, SchemaSourceFile):
|
|
248
|
+
file_cls = SchemaSourceFile
|
|
249
|
+
else:
|
|
250
|
+
file_cls = SourceFile
|
|
251
|
+
new_source_file = file_cls(
|
|
252
|
+
path=source_file.path,
|
|
253
|
+
checksum=source_file.checksum,
|
|
254
|
+
project_name=source_file.project_name,
|
|
255
|
+
parse_file_type=source_file.parse_file_type,
|
|
256
|
+
contents=source_file.contents,
|
|
257
|
+
)
|
|
258
|
+
self.files[file_id] = new_source_file
|
|
259
|
+
|
|
260
|
+
# Now that we have a copy of the files, remove deleted files
|
|
261
|
+
# For now, we assume that all files are in the root_project, until
|
|
262
|
+
# we've determined whether project name will be provided or deduced
|
|
263
|
+
# from the directory.
|
|
264
|
+
for input_file_path in self.file_diff.deleted:
|
|
265
|
+
project_name = self.get_project_name(input_file_path)
|
|
266
|
+
file_id = f"{project_name}://{input_file_path}"
|
|
267
|
+
if file_id in self.files:
|
|
268
|
+
self.files.pop(file_id)
|
|
269
|
+
else:
|
|
270
|
+
fire_event(InputFileDiffError(category="deleted file not found", file_id=file_id))
|
|
271
|
+
|
|
272
|
+
# Now we do the changes
|
|
273
|
+
for input_file in self.file_diff.changed:
|
|
274
|
+
project_name = self.get_project_name(input_file.path)
|
|
275
|
+
file_id = f"{project_name}://{input_file.path}"
|
|
276
|
+
if file_id in self.files:
|
|
277
|
+
# Get the existing source_file object and update the contents and mod time
|
|
278
|
+
source_file = self.files[file_id]
|
|
279
|
+
source_file.contents = input_file.content
|
|
280
|
+
source_file.checksum = FileHash.from_contents(input_file.content)
|
|
281
|
+
source_file.path.modification_time = input_file.modification_time
|
|
282
|
+
# Handle creation of dictionary version of schema file content
|
|
283
|
+
if isinstance(source_file, SchemaSourceFile) and source_file.contents:
|
|
284
|
+
dfy = yaml_from_file(source_file)
|
|
285
|
+
if dfy:
|
|
286
|
+
validate_yaml(source_file.path.original_file_path, dfy)
|
|
287
|
+
source_file.dfy = dfy
|
|
288
|
+
# TODO: ensure we have a file object even for empty files, such as schema files
|
|
289
|
+
|
|
290
|
+
# Now the new files
|
|
291
|
+
for input_file in self.file_diff.added:
|
|
292
|
+
project_name = self.get_project_name(input_file.path)
|
|
293
|
+
# FilePath
|
|
294
|
+
# searched_path i.e. "models"
|
|
295
|
+
# relative_path i.e. the part after searched_path, or "model.sql"
|
|
296
|
+
# modification_time float, default 0.0...
|
|
297
|
+
# project_root
|
|
298
|
+
# We use PurePath because there's no actual filesystem to look at
|
|
299
|
+
input_file_path = pathlib.PurePath(input_file.path)
|
|
300
|
+
extension = input_file_path.suffix
|
|
301
|
+
searched_path = input_file_path.parts[0]
|
|
302
|
+
# check what happens with generic tests... searched_path/relative_path
|
|
303
|
+
|
|
304
|
+
relative_path_parts = input_file_path.parts[1:]
|
|
305
|
+
relative_path = pathlib.PurePath("").joinpath(*relative_path_parts)
|
|
306
|
+
# Create FilePath object
|
|
307
|
+
input_file_path = FilePath(
|
|
308
|
+
searched_path=searched_path,
|
|
309
|
+
relative_path=str(relative_path),
|
|
310
|
+
modification_time=input_file.modification_time,
|
|
311
|
+
project_root=self.all_projects[project_name].project_root,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Now use the extension and "searched_path" to determine which file_type
|
|
315
|
+
(file_types, file_type_lookup) = self.get_project_file_types(project_name)
|
|
316
|
+
parse_ft_for_extension = set()
|
|
317
|
+
parse_ft_for_path = set()
|
|
318
|
+
if extension in file_type_lookup["extensions"]:
|
|
319
|
+
parse_ft_for_extension = file_type_lookup["extensions"][extension]
|
|
320
|
+
if searched_path in file_type_lookup["paths"]:
|
|
321
|
+
parse_ft_for_path = file_type_lookup["paths"][searched_path]
|
|
322
|
+
if len(parse_ft_for_extension) == 0 or len(parse_ft_for_path) == 0:
|
|
323
|
+
fire_event(InputFileDiffError(category="not a project file", file_id=file_id))
|
|
324
|
+
continue
|
|
325
|
+
parse_ft_set = parse_ft_for_extension.intersection(parse_ft_for_path)
|
|
326
|
+
if (
|
|
327
|
+
len(parse_ft_set) != 1
|
|
328
|
+
): # There should only be one result for a path/extension combination
|
|
329
|
+
fire_event(
|
|
330
|
+
InputFileDiffError(
|
|
331
|
+
category="unable to resolve diff file location", file_id=file_id
|
|
332
|
+
)
|
|
333
|
+
)
|
|
334
|
+
continue
|
|
335
|
+
parse_ft = parse_ft_set.pop()
|
|
336
|
+
source_file_cls = SourceFile
|
|
337
|
+
if parse_ft == ParseFileType.Schema:
|
|
338
|
+
source_file_cls = SchemaSourceFile
|
|
339
|
+
source_file = source_file_cls(
|
|
340
|
+
path=input_file_path,
|
|
341
|
+
contents=input_file.content,
|
|
342
|
+
checksum=FileHash.from_contents(input_file.content),
|
|
343
|
+
project_name=project_name,
|
|
344
|
+
parse_file_type=parse_ft,
|
|
345
|
+
)
|
|
346
|
+
if source_file_cls == SchemaSourceFile:
|
|
347
|
+
dfy = yaml_from_file(source_file)
|
|
348
|
+
if dfy:
|
|
349
|
+
validate_yaml(source_file.path.original_file_path, dfy)
|
|
350
|
+
source_file.dfy = dfy
|
|
351
|
+
else:
|
|
352
|
+
# don't include in files because no content
|
|
353
|
+
continue
|
|
354
|
+
self.files[source_file.file_id] = source_file
|
|
355
|
+
|
|
356
|
+
def get_project_name(self, path):
|
|
357
|
+
# It's not currently possible to recognize any other project files,
|
|
358
|
+
# and it's an open issue how to handle deps.
|
|
359
|
+
return self.root_project_name
|
|
360
|
+
|
|
361
|
+
def get_project_file_types(self, project_name):
|
|
362
|
+
if project_name not in self.project_file_types:
|
|
363
|
+
file_types = get_file_types_for_project(self.all_projects[project_name])
|
|
364
|
+
file_type_lookup = self.get_file_type_lookup(file_types)
|
|
365
|
+
self.project_file_types[project_name] = {
|
|
366
|
+
"file_types": file_types,
|
|
367
|
+
"file_type_lookup": file_type_lookup,
|
|
368
|
+
}
|
|
369
|
+
file_types = self.project_file_types[project_name]["file_types"]
|
|
370
|
+
file_type_lookup = self.project_file_types[project_name]["file_type_lookup"]
|
|
371
|
+
return (file_types, file_type_lookup)
|
|
372
|
+
|
|
373
|
+
def get_file_type_lookup(self, file_types):
|
|
374
|
+
file_type_lookup = {"paths": {}, "extensions": {}}
|
|
375
|
+
for parse_ft, file_type in file_types.items():
|
|
376
|
+
for path in file_type["paths"]:
|
|
377
|
+
if path not in file_type_lookup["paths"]:
|
|
378
|
+
file_type_lookup["paths"][path] = set()
|
|
379
|
+
file_type_lookup["paths"][path].add(parse_ft)
|
|
380
|
+
for extension in file_type["extensions"]:
|
|
381
|
+
if extension not in file_type_lookup["extensions"]:
|
|
382
|
+
file_type_lookup["extensions"][extension] = set()
|
|
383
|
+
file_type_lookup["extensions"][extension].add(parse_ft)
|
|
384
|
+
return file_type_lookup
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def get_file_types_for_project(project):
|
|
388
|
+
file_types = {
|
|
389
|
+
ParseFileType.Macro: {
|
|
390
|
+
"paths": project.macro_paths,
|
|
391
|
+
"extensions": [".sql"],
|
|
392
|
+
"parser": "MacroParser",
|
|
393
|
+
},
|
|
394
|
+
ParseFileType.Model: {
|
|
395
|
+
"paths": project.model_paths,
|
|
396
|
+
"extensions": [".sql", ".py"],
|
|
397
|
+
"parser": "ModelParser",
|
|
398
|
+
},
|
|
399
|
+
ParseFileType.Snapshot: {
|
|
400
|
+
"paths": project.snapshot_paths,
|
|
401
|
+
"extensions": [".sql"],
|
|
402
|
+
"parser": "SnapshotParser",
|
|
403
|
+
},
|
|
404
|
+
ParseFileType.Analysis: {
|
|
405
|
+
"paths": project.analysis_paths,
|
|
406
|
+
"extensions": [".sql"],
|
|
407
|
+
"parser": "AnalysisParser",
|
|
408
|
+
},
|
|
409
|
+
ParseFileType.SingularTest: {
|
|
410
|
+
"paths": project.test_paths,
|
|
411
|
+
"extensions": [".sql"],
|
|
412
|
+
"parser": "SingularTestParser",
|
|
413
|
+
},
|
|
414
|
+
ParseFileType.GenericTest: {
|
|
415
|
+
"paths": project.generic_test_paths,
|
|
416
|
+
"extensions": [".sql"],
|
|
417
|
+
"parser": "GenericTestParser",
|
|
418
|
+
},
|
|
419
|
+
ParseFileType.Seed: {
|
|
420
|
+
"paths": project.seed_paths,
|
|
421
|
+
"extensions": [".csv"],
|
|
422
|
+
"parser": "SeedParser",
|
|
423
|
+
},
|
|
424
|
+
ParseFileType.Documentation: {
|
|
425
|
+
"paths": project.docs_paths,
|
|
426
|
+
"extensions": [".md"],
|
|
427
|
+
"parser": "DocumentationParser",
|
|
428
|
+
},
|
|
429
|
+
ParseFileType.Schema: {
|
|
430
|
+
"paths": project.all_source_paths,
|
|
431
|
+
"extensions": [".yml", ".yaml"],
|
|
432
|
+
"parser": "SchemaParser",
|
|
433
|
+
},
|
|
434
|
+
ParseFileType.Fixture: {
|
|
435
|
+
"paths": project.fixture_paths,
|
|
436
|
+
"extensions": [".csv", ".sql"],
|
|
437
|
+
"parser": "FixtureParser",
|
|
438
|
+
},
|
|
439
|
+
ParseFileType.Function: {
|
|
440
|
+
"paths": project.function_paths,
|
|
441
|
+
"extensions": [".sql", ".py"],
|
|
442
|
+
"parser": "FunctionParser",
|
|
443
|
+
},
|
|
444
|
+
}
|
|
445
|
+
return file_types
|