dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,445 @@
1
+ import os
2
+ import pathlib
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, List, Mapping, MutableMapping, Optional, Protocol
5
+
6
+ import pathspec # type: ignore
7
+ from dvt.config import Project
8
+ from dvt.contracts.files import (
9
+ AnySourceFile,
10
+ FileHash,
11
+ FilePath,
12
+ FixtureSourceFile,
13
+ ParseFileType,
14
+ SchemaSourceFile,
15
+ SourceFile,
16
+ )
17
+ from dvt.events.types import InputFileDiffError
18
+ from dvt.exceptions import ParsingError
19
+ from dvt.parser.common import schema_file_keys
20
+ from dvt.parser.schemas import yaml_from_file
21
+ from dvt.parser.search import filesystem_search
22
+
23
+ from dbt_common.clients.system import load_file_contents
24
+ from dbt_common.dataclass_schema import dbtClassMixin
25
+ from dbt_common.events.functions import fire_event
26
+
27
+
28
+ @dataclass
29
+ class InputFile(dbtClassMixin):
30
+ path: str
31
+ content: str
32
+ modification_time: float = 0.0
33
+
34
+
35
+ @dataclass
36
+ class FileDiff(dbtClassMixin):
37
+ deleted: List[str]
38
+ # Note: it would be possible to not distinguish between
39
+ # added and changed files, but we would lose some error handling.
40
+ changed: List[InputFile]
41
+ added: List[InputFile]
42
+
43
+
44
+ # This loads the files contents and creates the SourceFile object
45
+ def load_source_file(
46
+ path: FilePath,
47
+ parse_file_type: ParseFileType,
48
+ project_name: str,
49
+ saved_files,
50
+ ) -> Optional[AnySourceFile]:
51
+
52
+ if parse_file_type == ParseFileType.Schema:
53
+ sf_cls = SchemaSourceFile
54
+ elif parse_file_type == ParseFileType.Fixture:
55
+ sf_cls = FixtureSourceFile # type:ignore[assignment]
56
+ else:
57
+ sf_cls = SourceFile # type:ignore[assignment]
58
+
59
+ source_file = sf_cls(
60
+ path=path,
61
+ checksum=FileHash.empty(),
62
+ parse_file_type=parse_file_type,
63
+ project_name=project_name,
64
+ )
65
+
66
+ skip_loading_schema_file = False
67
+ if (
68
+ parse_file_type == ParseFileType.Schema
69
+ and saved_files
70
+ and source_file.file_id in saved_files
71
+ ):
72
+ old_source_file = saved_files[source_file.file_id]
73
+ if (
74
+ source_file.path.modification_time != 0.0
75
+ and old_source_file.path.modification_time == source_file.path.modification_time
76
+ ):
77
+ source_file.checksum = old_source_file.checksum
78
+ source_file.dfy = old_source_file.dfy
79
+ skip_loading_schema_file = True
80
+
81
+ if not skip_loading_schema_file:
82
+ # We strip the file_contents before generating the checksum because we want
83
+ # the checksum to match the stored file contents
84
+ file_contents = load_file_contents(path.absolute_path, strip=True)
85
+ source_file.contents = file_contents
86
+ source_file.checksum = FileHash.from_contents(file_contents)
87
+
88
+ if parse_file_type == ParseFileType.Schema and source_file.contents:
89
+ dfy = yaml_from_file(source_file=source_file, validate=True)
90
+ if dfy:
91
+ validate_yaml(source_file.path.original_file_path, dfy)
92
+ source_file.dfy = dfy
93
+ return source_file
94
+
95
+
96
+ # Do some minimal validation of the yaml in a schema file.
97
+ # Check version, that key values are lists and that each element in
98
+ # the lists has a 'name' key
99
+ def validate_yaml(file_path, dct):
100
+ for key in schema_file_keys:
101
+ if key in dct:
102
+ if not isinstance(dct[key], list):
103
+ msg = (
104
+ f"The schema file at {file_path} is "
105
+ f"invalid because the value of '{key}' is not a list"
106
+ )
107
+ raise ParsingError(msg)
108
+ for element in dct[key]:
109
+ if not isinstance(element, dict):
110
+ msg = (
111
+ f"The schema file at {file_path} is "
112
+ f"invalid because a list element for '{key}' is not a dictionary"
113
+ )
114
+ raise ParsingError(msg)
115
+ if "name" not in element:
116
+ msg = (
117
+ f"The schema file at {file_path} is "
118
+ f"invalid because a list element for '{key}' does not have a "
119
+ "name attribute."
120
+ )
121
+ raise ParsingError(msg)
122
+
123
+
124
+ # Special processing for big seed files
125
+ def load_seed_source_file(match: FilePath, project_name) -> SourceFile:
126
+ if match.seed_too_large():
127
+ # We don't want to calculate a hash of this file. Use the path.
128
+ source_file = SourceFile.big_seed(match)
129
+ else:
130
+ file_contents = load_file_contents(match.absolute_path, strip=True)
131
+ checksum = FileHash.from_contents(file_contents)
132
+ source_file = SourceFile(path=match, checksum=checksum)
133
+ source_file.contents = ""
134
+ source_file.parse_file_type = ParseFileType.Seed
135
+ source_file.project_name = project_name
136
+ return source_file
137
+
138
+
139
+ # Use the FilesystemSearcher to get a bunch of FilePaths, then turn
140
+ # them into a bunch of FileSource objects
141
+ def get_source_files(project, paths, extension, parse_file_type, saved_files, ignore_spec):
142
+ # file path list
143
+ fp_list = filesystem_search(project, paths, extension, ignore_spec)
144
+ # file block list
145
+ fb_list = []
146
+ for fp in fp_list:
147
+ if parse_file_type == ParseFileType.Seed:
148
+ fb_list.append(load_seed_source_file(fp, project.project_name))
149
+ # singular tests live in /tests but only generic tests live
150
+ # in /tests/generic and fixtures in /tests/fixture so we want to skip those
151
+ else:
152
+ if parse_file_type == ParseFileType.SingularTest:
153
+ path = pathlib.Path(fp.relative_path)
154
+ if path.parts[0] in ["generic", "fixtures"]:
155
+ continue
156
+ file = load_source_file(fp, parse_file_type, project.project_name, saved_files)
157
+ # only append the list if it has contents. added to fix #3568
158
+ if file:
159
+ fb_list.append(file)
160
+ return fb_list
161
+
162
+
163
+ def read_files_for_parser(project, files, parse_ft, file_type_info, saved_files, ignore_spec):
164
+ dirs = file_type_info["paths"]
165
+ parser_files = []
166
+ for extension in file_type_info["extensions"]:
167
+ source_files = get_source_files(
168
+ project, dirs, extension, parse_ft, saved_files, ignore_spec
169
+ )
170
+ for sf in source_files:
171
+ files[sf.file_id] = sf
172
+ parser_files.append(sf.file_id)
173
+ return parser_files
174
+
175
+
176
+ def generate_dbt_ignore_spec(project_root):
177
+ ignore_file_path = os.path.join(project_root, ".dbtignore")
178
+
179
+ ignore_spec = None
180
+ if os.path.exists(ignore_file_path):
181
+ with open(ignore_file_path) as f:
182
+ ignore_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, f)
183
+ return ignore_spec
184
+
185
+
186
+ # Protocol for the ReadFiles... classes
187
+ class ReadFiles(Protocol):
188
+ files: MutableMapping[str, AnySourceFile]
189
+ project_parser_files: Dict
190
+
191
+ def read_files(self):
192
+ pass
193
+
194
+
195
+ @dataclass
196
+ class ReadFilesFromFileSystem:
197
+ all_projects: Mapping[str, Project]
198
+ files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
199
+ # saved_files is only used to compare schema files
200
+ saved_files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
201
+ # project_parser_files = {
202
+ # "my_project": {
203
+ # "ModelParser": ["my_project://models/my_model.sql"]
204
+ # }
205
+ # }
206
+ #
207
+ project_parser_files: Dict = field(default_factory=dict)
208
+
209
+ def read_files(self):
210
+ for project in self.all_projects.values():
211
+ file_types = get_file_types_for_project(project)
212
+ self.read_files_for_project(project, file_types)
213
+
214
+ def read_files_for_project(self, project, file_types):
215
+ dbt_ignore_spec = generate_dbt_ignore_spec(project.project_root)
216
+ project_files = self.project_parser_files[project.project_name] = {}
217
+
218
+ for parse_ft, file_type_info in file_types.items():
219
+ project_files[file_type_info["parser"]] = read_files_for_parser(
220
+ project,
221
+ self.files,
222
+ parse_ft,
223
+ file_type_info,
224
+ self.saved_files,
225
+ dbt_ignore_spec,
226
+ )
227
+
228
+
229
+ @dataclass
230
+ class ReadFilesFromDiff:
231
+ root_project_name: str
232
+ all_projects: Mapping[str, Project]
233
+ file_diff: FileDiff
234
+ files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
235
+ # saved_files is used to construct a fresh copy of files, without
236
+ # additional information from parsing
237
+ saved_files: MutableMapping[str, AnySourceFile] = field(default_factory=dict)
238
+ project_parser_files: Dict = field(default_factory=dict)
239
+ project_file_types: Dict = field(default_factory=dict)
240
+ local_package_dirs: Optional[List[str]] = None
241
+
242
+ def read_files(self):
243
+ # Copy the base file information from the existing manifest.
244
+ # We will do deletions, adds, changes from the file_diff to emulate
245
+ # a complete read of the project file system.
246
+ for file_id, source_file in self.saved_files.items():
247
+ if isinstance(source_file, SchemaSourceFile):
248
+ file_cls = SchemaSourceFile
249
+ else:
250
+ file_cls = SourceFile
251
+ new_source_file = file_cls(
252
+ path=source_file.path,
253
+ checksum=source_file.checksum,
254
+ project_name=source_file.project_name,
255
+ parse_file_type=source_file.parse_file_type,
256
+ contents=source_file.contents,
257
+ )
258
+ self.files[file_id] = new_source_file
259
+
260
+ # Now that we have a copy of the files, remove deleted files
261
+ # For now, we assume that all files are in the root_project, until
262
+ # we've determined whether project name will be provided or deduced
263
+ # from the directory.
264
+ for input_file_path in self.file_diff.deleted:
265
+ project_name = self.get_project_name(input_file_path)
266
+ file_id = f"{project_name}://{input_file_path}"
267
+ if file_id in self.files:
268
+ self.files.pop(file_id)
269
+ else:
270
+ fire_event(InputFileDiffError(category="deleted file not found", file_id=file_id))
271
+
272
+ # Now we do the changes
273
+ for input_file in self.file_diff.changed:
274
+ project_name = self.get_project_name(input_file.path)
275
+ file_id = f"{project_name}://{input_file.path}"
276
+ if file_id in self.files:
277
+ # Get the existing source_file object and update the contents and mod time
278
+ source_file = self.files[file_id]
279
+ source_file.contents = input_file.content
280
+ source_file.checksum = FileHash.from_contents(input_file.content)
281
+ source_file.path.modification_time = input_file.modification_time
282
+ # Handle creation of dictionary version of schema file content
283
+ if isinstance(source_file, SchemaSourceFile) and source_file.contents:
284
+ dfy = yaml_from_file(source_file)
285
+ if dfy:
286
+ validate_yaml(source_file.path.original_file_path, dfy)
287
+ source_file.dfy = dfy
288
+ # TODO: ensure we have a file object even for empty files, such as schema files
289
+
290
+ # Now the new files
291
+ for input_file in self.file_diff.added:
292
+ project_name = self.get_project_name(input_file.path)
293
+ # FilePath
294
+ # searched_path i.e. "models"
295
+ # relative_path i.e. the part after searched_path, or "model.sql"
296
+ # modification_time float, default 0.0...
297
+ # project_root
298
+ # We use PurePath because there's no actual filesystem to look at
299
+ input_file_path = pathlib.PurePath(input_file.path)
300
+ extension = input_file_path.suffix
301
+ searched_path = input_file_path.parts[0]
302
+ # check what happens with generic tests... searched_path/relative_path
303
+
304
+ relative_path_parts = input_file_path.parts[1:]
305
+ relative_path = pathlib.PurePath("").joinpath(*relative_path_parts)
306
+ # Create FilePath object
307
+ input_file_path = FilePath(
308
+ searched_path=searched_path,
309
+ relative_path=str(relative_path),
310
+ modification_time=input_file.modification_time,
311
+ project_root=self.all_projects[project_name].project_root,
312
+ )
313
+
314
+ # Now use the extension and "searched_path" to determine which file_type
315
+ (file_types, file_type_lookup) = self.get_project_file_types(project_name)
316
+ parse_ft_for_extension = set()
317
+ parse_ft_for_path = set()
318
+ if extension in file_type_lookup["extensions"]:
319
+ parse_ft_for_extension = file_type_lookup["extensions"][extension]
320
+ if searched_path in file_type_lookup["paths"]:
321
+ parse_ft_for_path = file_type_lookup["paths"][searched_path]
322
+ if len(parse_ft_for_extension) == 0 or len(parse_ft_for_path) == 0:
323
+ fire_event(InputFileDiffError(category="not a project file", file_id=file_id))
324
+ continue
325
+ parse_ft_set = parse_ft_for_extension.intersection(parse_ft_for_path)
326
+ if (
327
+ len(parse_ft_set) != 1
328
+ ): # There should only be one result for a path/extension combination
329
+ fire_event(
330
+ InputFileDiffError(
331
+ category="unable to resolve diff file location", file_id=file_id
332
+ )
333
+ )
334
+ continue
335
+ parse_ft = parse_ft_set.pop()
336
+ source_file_cls = SourceFile
337
+ if parse_ft == ParseFileType.Schema:
338
+ source_file_cls = SchemaSourceFile
339
+ source_file = source_file_cls(
340
+ path=input_file_path,
341
+ contents=input_file.content,
342
+ checksum=FileHash.from_contents(input_file.content),
343
+ project_name=project_name,
344
+ parse_file_type=parse_ft,
345
+ )
346
+ if source_file_cls == SchemaSourceFile:
347
+ dfy = yaml_from_file(source_file)
348
+ if dfy:
349
+ validate_yaml(source_file.path.original_file_path, dfy)
350
+ source_file.dfy = dfy
351
+ else:
352
+ # don't include in files because no content
353
+ continue
354
+ self.files[source_file.file_id] = source_file
355
+
356
+ def get_project_name(self, path):
357
+ # It's not currently possible to recognize any other project files,
358
+ # and it's an open issue how to handle deps.
359
+ return self.root_project_name
360
+
361
+ def get_project_file_types(self, project_name):
362
+ if project_name not in self.project_file_types:
363
+ file_types = get_file_types_for_project(self.all_projects[project_name])
364
+ file_type_lookup = self.get_file_type_lookup(file_types)
365
+ self.project_file_types[project_name] = {
366
+ "file_types": file_types,
367
+ "file_type_lookup": file_type_lookup,
368
+ }
369
+ file_types = self.project_file_types[project_name]["file_types"]
370
+ file_type_lookup = self.project_file_types[project_name]["file_type_lookup"]
371
+ return (file_types, file_type_lookup)
372
+
373
+ def get_file_type_lookup(self, file_types):
374
+ file_type_lookup = {"paths": {}, "extensions": {}}
375
+ for parse_ft, file_type in file_types.items():
376
+ for path in file_type["paths"]:
377
+ if path not in file_type_lookup["paths"]:
378
+ file_type_lookup["paths"][path] = set()
379
+ file_type_lookup["paths"][path].add(parse_ft)
380
+ for extension in file_type["extensions"]:
381
+ if extension not in file_type_lookup["extensions"]:
382
+ file_type_lookup["extensions"][extension] = set()
383
+ file_type_lookup["extensions"][extension].add(parse_ft)
384
+ return file_type_lookup
385
+
386
+
387
+ def get_file_types_for_project(project):
388
+ file_types = {
389
+ ParseFileType.Macro: {
390
+ "paths": project.macro_paths,
391
+ "extensions": [".sql"],
392
+ "parser": "MacroParser",
393
+ },
394
+ ParseFileType.Model: {
395
+ "paths": project.model_paths,
396
+ "extensions": [".sql", ".py"],
397
+ "parser": "ModelParser",
398
+ },
399
+ ParseFileType.Snapshot: {
400
+ "paths": project.snapshot_paths,
401
+ "extensions": [".sql"],
402
+ "parser": "SnapshotParser",
403
+ },
404
+ ParseFileType.Analysis: {
405
+ "paths": project.analysis_paths,
406
+ "extensions": [".sql"],
407
+ "parser": "AnalysisParser",
408
+ },
409
+ ParseFileType.SingularTest: {
410
+ "paths": project.test_paths,
411
+ "extensions": [".sql"],
412
+ "parser": "SingularTestParser",
413
+ },
414
+ ParseFileType.GenericTest: {
415
+ "paths": project.generic_test_paths,
416
+ "extensions": [".sql"],
417
+ "parser": "GenericTestParser",
418
+ },
419
+ ParseFileType.Seed: {
420
+ "paths": project.seed_paths,
421
+ "extensions": [".csv"],
422
+ "parser": "SeedParser",
423
+ },
424
+ ParseFileType.Documentation: {
425
+ "paths": project.docs_paths,
426
+ "extensions": [".md"],
427
+ "parser": "DocumentationParser",
428
+ },
429
+ ParseFileType.Schema: {
430
+ "paths": project.all_source_paths,
431
+ "extensions": [".yml", ".yaml"],
432
+ "parser": "SchemaParser",
433
+ },
434
+ ParseFileType.Fixture: {
435
+ "paths": project.fixture_paths,
436
+ "extensions": [".csv", ".sql"],
437
+ "parser": "FixtureParser",
438
+ },
439
+ ParseFileType.Function: {
440
+ "paths": project.function_paths,
441
+ "extensions": [".sql", ".py"],
442
+ "parser": "FunctionParser",
443
+ },
444
+ }
445
+ return file_types