dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
dvt/task/deps.py ADDED
@@ -0,0 +1,280 @@
1
+ import json
2
+ from hashlib import sha1
3
+ from pathlib import Path
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import dvt.deprecations
7
+ import dvt.exceptions
8
+ import dvt.utils
9
+ import yaml
10
+ from dvt.config import Project
11
+ from dvt.config.project import load_yml_dict, package_config_from_data
12
+ from dvt.config.renderer import PackageRenderer
13
+ from dvt.constants import PACKAGE_LOCK_FILE_NAME, PACKAGE_LOCK_HASH_KEY
14
+ from dvt.contracts.project import PackageSpec
15
+ from dvt.deps.base import downloads_directory
16
+ from dvt.deps.registry import RegistryPinnedPackage
17
+ from dvt.deps.resolver import resolve_lock_packages, resolve_packages
18
+ from dvt.events.types import (
19
+ DepsAddPackage,
20
+ DepsFoundDuplicatePackage,
21
+ DepsInstallInfo,
22
+ DepsListSubdirectory,
23
+ DepsLockUpdating,
24
+ DepsNoPackagesFound,
25
+ DepsNotifyUpdatesAvailable,
26
+ DepsStartPackageInstall,
27
+ DepsUpdateAvailable,
28
+ DepsUpToDate,
29
+ )
30
+ from dvt.task.base import BaseTask, move_to_nearest_project_dir
31
+
32
+ from dbt_common.clients import system
33
+ from dbt_common.events.functions import fire_event
34
+ from dbt_common.events.types import Formatting
35
+
36
+
37
+ class dbtPackageDumper(yaml.Dumper):
38
+ def increase_indent(self, flow=False, indentless=False):
39
+ return super(dbtPackageDumper, self).increase_indent(flow, False)
40
+
41
+
42
+ def _create_sha1_hash(packages: List[PackageSpec]) -> str:
43
+ """Create a SHA1 hash of the packages list,
44
+ this is used to determine if the packages for current execution matches
45
+ the previous lock.
46
+
47
+ Args:
48
+ list[Packages]: list of packages specified that are already rendered
49
+
50
+ Returns:
51
+ str: SHA1 hash of the packages list
52
+ """
53
+ package_strs = [json.dumps(package.to_dict(), sort_keys=True) for package in packages]
54
+ package_strs = sorted(package_strs)
55
+
56
+ return sha1("\n".join(package_strs).encode("utf-8")).hexdigest()
57
+
58
+
59
+ def _create_packages_yml_entry(package: str, version: Optional[str], source: str) -> dict:
60
+ """Create a formatted entry to add to `packages.yml` or `package-lock.yml` file
61
+
62
+ Args:
63
+ package (str): Name of package to download
64
+ version (str): Version of package to download
65
+ source (str): Source of where to download package from
66
+
67
+ Returns:
68
+ dict: Formatted dict to write to `packages.yml` or `package-lock.yml` file
69
+ """
70
+ package_key = source
71
+ version_key = "version"
72
+
73
+ if source == "hub":
74
+ package_key = "package"
75
+
76
+ packages_yml_entry = {package_key: package}
77
+
78
+ if source == "git":
79
+ version_key = "revision"
80
+
81
+ if version:
82
+ if "," in version:
83
+ version = version.split(",") # type: ignore
84
+
85
+ packages_yml_entry[version_key] = version
86
+
87
+ return packages_yml_entry
88
+
89
+
90
+ class DepsTask(BaseTask):
91
+ def __init__(self, args: Any, project: Project) -> None:
92
+ super().__init__(args=args)
93
+ # N.B. This is a temporary fix for a bug when using relative paths via
94
+ # --project-dir with deps. A larger overhaul of our path handling methods
95
+ # is needed to fix this the "right" way.
96
+ # See GH-7615
97
+ project.project_root = str(Path(project.project_root).resolve())
98
+ self.project = project
99
+ self.cli_vars = args.vars
100
+
101
+ def track_package_install(
102
+ self, package_name: str, source_type: str, version: Optional[str]
103
+ ) -> None:
104
+ # Hub packages do not need to be hashed, as they are public
105
+ if source_type == "local":
106
+ package_name = dbt.utils.md5(package_name)
107
+ version = "local"
108
+ elif source_type == "tarball":
109
+ package_name = dbt.utils.md5(package_name)
110
+ version = "tarball"
111
+ elif source_type != "hub":
112
+ package_name = dbt.utils.md5(package_name)
113
+ version = dbt.utils.md5(version)
114
+
115
+ dbt.tracking.track_package_install(
116
+ "deps",
117
+ self.project.hashed_name(),
118
+ {"name": package_name, "source": source_type, "version": version},
119
+ )
120
+
121
+ def check_for_duplicate_packages(self, packages_yml):
122
+ """Loop through contents of `packages.yml` to ensure no duplicate package names + versions.
123
+
124
+ This duplicate check will take into consideration exact match of a package name, as well as
125
+ a check to see if a package name exists within a name (i.e. a package name inside a git URL).
126
+
127
+ Args:
128
+ packages_yml (dict): In-memory read of `packages.yml` contents
129
+
130
+ Returns:
131
+ dict: Updated or untouched packages_yml contents
132
+ """
133
+ for i, pkg_entry in enumerate(packages_yml["packages"]):
134
+ for val in pkg_entry.values():
135
+ if self.args.add_package["name"] in val:
136
+ del packages_yml["packages"][i]
137
+
138
+ fire_event(DepsFoundDuplicatePackage(removed_package=pkg_entry))
139
+
140
+ return packages_yml
141
+
142
+ def add(self):
143
+ packages_yml_filepath = (
144
+ f"{self.project.project_root}/{self.project.packages_specified_path}"
145
+ )
146
+ if not system.path_exists(packages_yml_filepath):
147
+ with open(packages_yml_filepath, "w") as package_yml:
148
+ yaml.safe_dump({"packages": []}, package_yml)
149
+ fire_event(Formatting("Created packages.yml"))
150
+
151
+ new_package_entry = _create_packages_yml_entry(
152
+ self.args.add_package["name"], self.args.add_package["version"], self.args.source
153
+ )
154
+
155
+ with open(packages_yml_filepath, "r") as user_yml_obj:
156
+ packages_yml = yaml.safe_load(user_yml_obj)
157
+ packages_yml = self.check_for_duplicate_packages(packages_yml)
158
+ packages_yml["packages"].append(new_package_entry)
159
+
160
+ self.project.packages.packages = package_config_from_data(packages_yml).packages
161
+
162
+ if packages_yml:
163
+ with open(packages_yml_filepath, "w") as pkg_obj:
164
+ pkg_obj.write(
165
+ yaml.dump(packages_yml, Dumper=dbtPackageDumper, default_flow_style=False)
166
+ )
167
+
168
+ fire_event(
169
+ DepsAddPackage(
170
+ package_name=self.args.add_package["name"],
171
+ version=self.args.add_package["version"],
172
+ packages_filepath=packages_yml_filepath,
173
+ )
174
+ )
175
+
176
+ def lock(self) -> None:
177
+ lock_filepath = f"{self.project.project_root}/{PACKAGE_LOCK_FILE_NAME}"
178
+
179
+ packages = self.project.packages.packages
180
+ packages_installed: Dict[str, Any] = {"packages": []}
181
+
182
+ if not packages:
183
+ fire_event(DepsNoPackagesFound())
184
+ return
185
+
186
+ with downloads_directory():
187
+ resolved_deps = resolve_packages(packages, self.project, self.cli_vars)
188
+
189
+ # this loop is to create the package-lock.yml in the same format as original packages.yml
190
+ # package-lock.yml includes both the stated packages in packages.yml along with dependent packages
191
+ renderer = PackageRenderer(self.cli_vars)
192
+ for package in resolved_deps:
193
+ package_dict = package.to_dict()
194
+ package_dict["name"] = package.get_project_name(self.project, renderer)
195
+ packages_installed["packages"].append(package_dict)
196
+
197
+ packages_installed[PACKAGE_LOCK_HASH_KEY] = _create_sha1_hash(
198
+ self.project.packages.packages
199
+ )
200
+
201
+ with open(lock_filepath, "w") as lock_obj:
202
+ yaml.dump(packages_installed, lock_obj, Dumper=dbtPackageDumper)
203
+
204
+ fire_event(DepsLockUpdating(lock_filepath=lock_filepath))
205
+
206
+ def run(self) -> None:
207
+ move_to_nearest_project_dir(self.args.project_dir)
208
+ if self.args.add_package:
209
+ self.add()
210
+
211
+ # Check lock file exist and generated by the same packages.yml
212
+ # or dependencies.yml.
213
+ lock_file_path = f"{self.project.project_root}/{PACKAGE_LOCK_FILE_NAME}"
214
+ if not system.path_exists(lock_file_path):
215
+ self.lock()
216
+ elif self.args.upgrade:
217
+ self.lock()
218
+ else:
219
+ # Check dependency definition is modified or not.
220
+ current_hash = _create_sha1_hash(self.project.packages.packages)
221
+ previous_hash = load_yml_dict(lock_file_path).get(PACKAGE_LOCK_HASH_KEY, None)
222
+ if previous_hash != current_hash:
223
+ self.lock()
224
+
225
+ # Early return when 'dbt deps --lock'
226
+ # Just resolve packages and write lock file, don't actually install packages
227
+ if self.args.lock:
228
+ return
229
+
230
+ if system.path_exists(self.project.packages_install_path):
231
+ system.rmtree(self.project.packages_install_path)
232
+
233
+ system.make_directory(self.project.packages_install_path)
234
+
235
+ packages_lock_dict = load_yml_dict(f"{self.project.project_root}/{PACKAGE_LOCK_FILE_NAME}")
236
+
237
+ renderer = PackageRenderer(self.cli_vars)
238
+ packages_lock_config = package_config_from_data(
239
+ renderer.render_data(packages_lock_dict), packages_lock_dict
240
+ ).packages
241
+
242
+ if not packages_lock_config:
243
+ fire_event(DepsNoPackagesFound())
244
+ return
245
+
246
+ with downloads_directory():
247
+ lock_defined_deps = resolve_lock_packages(packages_lock_config)
248
+ renderer = PackageRenderer(self.cli_vars)
249
+
250
+ packages_to_upgrade = []
251
+
252
+ for package in lock_defined_deps:
253
+ package_name = package.name
254
+ source_type = package.source_type()
255
+ version = package.get_version()
256
+
257
+ fire_event(DepsStartPackageInstall(package_name=package_name))
258
+ package.install(self.project, renderer)
259
+
260
+ fire_event(DepsInstallInfo(version_name=package.nice_version_name()))
261
+
262
+ if isinstance(package, RegistryPinnedPackage):
263
+ version_latest = package.get_version_latest()
264
+
265
+ if version_latest != version:
266
+ packages_to_upgrade.append(package_name)
267
+ fire_event(DepsUpdateAvailable(version_latest=version_latest))
268
+ else:
269
+ fire_event(DepsUpToDate())
270
+
271
+ if package.get_subdirectory():
272
+ fire_event(DepsListSubdirectory(subdirectory=package.get_subdirectory()))
273
+
274
+ self.track_package_install(
275
+ package_name=package_name, source_type=source_type, version=version
276
+ )
277
+
278
+ if packages_to_upgrade:
279
+ fire_event(Formatting(""))
280
+ fire_event(DepsNotifyUpdatesAvailable(packages=packages_to_upgrade))
@@ -0,0 +1,3 @@
1
+ import os
2
+
3
+ DOCS_INDEX_FILE_PATH = os.path.normpath(os.path.join(os.path.dirname(__file__), "index.html"))
@@ -0,0 +1,408 @@
1
+ import os
2
+ import shutil
3
+ from dataclasses import replace
4
+ from datetime import datetime, timezone
5
+ from itertools import chain
6
+ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
7
+
8
+ import agate
9
+ import dvt.compilation
10
+ import dvt.exceptions
11
+ import dvt.utils
12
+ from dvt.artifacts.schemas.catalog import (
13
+ CatalogArtifact,
14
+ CatalogKey,
15
+ CatalogResults,
16
+ CatalogTable,
17
+ ColumnMetadata,
18
+ PrimitiveDict,
19
+ StatsDict,
20
+ StatsItem,
21
+ TableMetadata,
22
+ )
23
+ from dvt.artifacts.schemas.results import NodeStatus
24
+ from dvt.constants import CATALOG_FILENAME, MANIFEST_FILE_NAME
25
+ from dvt.contracts.graph.manifest import Manifest
26
+ from dvt.contracts.graph.nodes import ResultNode
27
+ from dvt.events.types import ArtifactWritten
28
+ from dvt.exceptions import AmbiguousCatalogMatchError
29
+ from dvt.graph import ResourceTypeSelector
30
+ from dvt.graph.graph import UniqueId
31
+ from dvt.node_types import EXECUTABLE_NODE_TYPES, NodeType
32
+ from dvt.parser.manifest import write_manifest
33
+ from dvt.task.compile import CompileTask
34
+ from dvt.task.docs import DOCS_INDEX_FILE_PATH
35
+ from dvt.utils.artifact_upload import add_artifact_produced
36
+
37
+ import dbt_common.utils.formatting
38
+ from dbt.adapters.events.types import (
39
+ BuildingCatalog,
40
+ CannotGenerateDocs,
41
+ CatalogWritten,
42
+ WriteCatalogFailure,
43
+ )
44
+ from dbt.adapters.factory import get_adapter
45
+ from dbt_common.clients.system import load_file_contents
46
+ from dbt_common.dataclass_schema import ValidationError
47
+ from dbt_common.events.functions import fire_event
48
+ from dbt_common.exceptions import DbtInternalError
49
+
50
+
51
+ def get_stripped_prefix(source: Dict[str, Any], prefix: str) -> Dict[str, Any]:
52
+ """Go through the source, extracting every key/value pair where the key starts
53
+ with the given prefix.
54
+ """
55
+ cut = len(prefix)
56
+ return {k[cut:]: v for k, v in source.items() if k.startswith(prefix)}
57
+
58
+
59
+ def build_catalog_table(data) -> CatalogTable:
60
+ # build the new table's metadata + stats
61
+ metadata = TableMetadata.from_dict(get_stripped_prefix(data, "table_"))
62
+ stats = format_stats(get_stripped_prefix(data, "stats:"))
63
+
64
+ return CatalogTable(
65
+ metadata=metadata,
66
+ stats=stats,
67
+ columns={},
68
+ )
69
+
70
+
71
+ # keys are database name, schema name, table name
72
+ class Catalog(Dict[CatalogKey, CatalogTable]):
73
+ def __init__(self, columns: List[PrimitiveDict]) -> None:
74
+ super().__init__()
75
+ for col in columns:
76
+ self.add_column(col)
77
+
78
+ def get_table(self, data: PrimitiveDict) -> CatalogTable:
79
+ database = data.get("table_database")
80
+ if database is None:
81
+ dkey: Optional[str] = None
82
+ else:
83
+ dkey = str(database)
84
+
85
+ try:
86
+ key = CatalogKey(
87
+ dkey,
88
+ str(data["table_schema"]),
89
+ str(data["table_name"]),
90
+ )
91
+ except KeyError as exc:
92
+ raise dbt_common.exceptions.CompilationError(
93
+ "Catalog information missing required key {} (got {})".format(exc, data)
94
+ )
95
+ table: CatalogTable
96
+ if key in self:
97
+ table = self[key]
98
+ else:
99
+ table = build_catalog_table(data)
100
+ self[key] = table
101
+ return table
102
+
103
+ def add_column(self, data: PrimitiveDict):
104
+ table = self.get_table(data)
105
+ column_data = get_stripped_prefix(data, "column_")
106
+ # the index should really never be that big so it's ok to end up
107
+ # serializing this to JSON (2^53 is the max safe value there)
108
+ column_data["index"] = int(column_data["index"])
109
+
110
+ column = ColumnMetadata.from_dict(column_data)
111
+ table.columns[column.name] = column
112
+
113
+ def make_unique_id_map(
114
+ self, manifest: Manifest, selected_node_ids: Optional[Set[UniqueId]] = None
115
+ ) -> Tuple[Dict[str, CatalogTable], Dict[str, CatalogTable]]:
116
+ """
117
+ Create mappings between CatalogKeys and CatalogTables for nodes and sources, filtered by selected_node_ids.
118
+
119
+ By default, selected_node_ids is None and all nodes and sources defined in the manifest are included in the mappings.
120
+ """
121
+ nodes: Dict[str, CatalogTable] = {}
122
+ sources: Dict[str, CatalogTable] = {}
123
+
124
+ node_map, source_map = get_unique_id_mapping(manifest)
125
+ table: CatalogTable
126
+ for table in self.values():
127
+ key = table.key()
128
+ if key in node_map:
129
+ unique_id = node_map[key]
130
+ if selected_node_ids is None or unique_id in selected_node_ids:
131
+ nodes[unique_id] = replace(table, unique_id=unique_id)
132
+
133
+ unique_ids = source_map.get(table.key(), set())
134
+ for unique_id in unique_ids:
135
+ if unique_id in sources:
136
+ raise AmbiguousCatalogMatchError(
137
+ unique_id,
138
+ sources[unique_id].to_dict(omit_none=True),
139
+ table.to_dict(omit_none=True),
140
+ )
141
+ elif selected_node_ids is None or unique_id in selected_node_ids:
142
+ sources[unique_id] = replace(table, unique_id=unique_id)
143
+ return nodes, sources
144
+
145
+
146
+ def format_stats(stats: PrimitiveDict) -> StatsDict:
147
+ """Given a dictionary following this layout:
148
+
149
+ {
150
+ 'encoded:label': 'Encoded',
151
+ 'encoded:value': 'Yes',
152
+ 'encoded:description': 'Indicates if the column is encoded',
153
+ 'encoded:include': True,
154
+
155
+ 'size:label': 'Size',
156
+ 'size:value': 128,
157
+ 'size:description': 'Size of the table in MB',
158
+ 'size:include': True,
159
+ }
160
+
161
+ format_stats will convert the dict into a StatsDict with keys of 'encoded'
162
+ and 'size'.
163
+ """
164
+ stats_collector: StatsDict = {}
165
+
166
+ base_keys = {k.split(":")[0] for k in stats}
167
+ for key in base_keys:
168
+ dct: PrimitiveDict = {"id": key}
169
+ for subkey in ("label", "value", "description", "include"):
170
+ dct[subkey] = stats["{}:{}".format(key, subkey)]
171
+
172
+ try:
173
+ stats_item = StatsItem.from_dict(dct)
174
+ except ValidationError:
175
+ continue
176
+ if stats_item.include:
177
+ stats_collector[key] = stats_item
178
+
179
+ # we always have a 'has_stats' field, it's never included
180
+ has_stats = StatsItem(
181
+ id="has_stats",
182
+ label="Has Stats?",
183
+ value=len(stats_collector) > 0,
184
+ description="Indicates whether there are statistics for this table",
185
+ include=False,
186
+ )
187
+ stats_collector["has_stats"] = has_stats
188
+ return stats_collector
189
+
190
+
191
+ def mapping_key(node: ResultNode) -> CatalogKey:
192
+ dkey = dbt_common.utils.formatting.lowercase(node.database)
193
+ return CatalogKey(dkey, node.schema.lower(), node.identifier.lower())
194
+
195
+
196
+ def get_unique_id_mapping(
197
+ manifest: Manifest,
198
+ ) -> Tuple[Dict[CatalogKey, str], Dict[CatalogKey, Set[str]]]:
199
+ # A single relation could have multiple unique IDs pointing to it if a
200
+ # source were also a node.
201
+ node_map: Dict[CatalogKey, str] = {}
202
+ source_map: Dict[CatalogKey, Set[str]] = {}
203
+ for unique_id, node in manifest.nodes.items():
204
+ key = mapping_key(node)
205
+ node_map[key] = unique_id
206
+
207
+ for unique_id, source in manifest.sources.items():
208
+ key = mapping_key(source)
209
+ if key not in source_map:
210
+ source_map[key] = set()
211
+ source_map[key].add(unique_id)
212
+ return node_map, source_map
213
+
214
+
215
+ class GenerateTask(CompileTask):
216
+ def run(self) -> CatalogArtifact:
217
+ compile_results = None
218
+ if self.args.compile:
219
+ compile_results = CompileTask.run(self)
220
+ if any(r.status == NodeStatus.Error for r in compile_results):
221
+ fire_event(CannotGenerateDocs())
222
+ return CatalogArtifact.from_results(
223
+ nodes={},
224
+ sources={},
225
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
226
+ errors=None,
227
+ compile_results=compile_results,
228
+ )
229
+
230
+ shutil.copyfile(
231
+ DOCS_INDEX_FILE_PATH, os.path.join(self.config.project_target_path, "index.html")
232
+ )
233
+
234
+ for asset_path in self.config.asset_paths:
235
+ to_asset_path = os.path.join(self.config.project_target_path, asset_path)
236
+
237
+ if os.path.exists(to_asset_path):
238
+ shutil.rmtree(to_asset_path)
239
+
240
+ from_asset_path = os.path.join(self.config.project_root, asset_path)
241
+
242
+ if os.path.exists(from_asset_path):
243
+ shutil.copytree(from_asset_path, to_asset_path)
244
+
245
+ if self.manifest is None:
246
+ raise DbtInternalError("self.manifest was None in run!")
247
+
248
+ selected_node_ids: Optional[Set[UniqueId]] = None
249
+ if self.args.empty_catalog:
250
+ catalog_table: agate.Table = agate.Table([])
251
+ exceptions: List[Exception] = []
252
+ selected_node_ids = set()
253
+ else:
254
+ adapter = get_adapter(self.config)
255
+ with adapter.connection_named("generate_catalog"):
256
+ fire_event(BuildingCatalog())
257
+ # Get a list of relations we need from the catalog
258
+ relations = None
259
+ if self.job_queue is not None:
260
+ selected_node_ids = self.job_queue.get_selected_nodes()
261
+ selected_nodes = self._get_nodes_from_ids(self.manifest, selected_node_ids)
262
+
263
+ # Source selection is handled separately from main job_queue selection because
264
+ # SourceDefinition nodes cannot be safely compiled / run by the CompileRunner / CompileTask,
265
+ # but should still be included in the catalog based on the selection spec
266
+ selected_source_ids = self._get_selected_source_ids()
267
+ selected_source_nodes = self._get_nodes_from_ids(
268
+ self.manifest, selected_source_ids
269
+ )
270
+ selected_node_ids.update(selected_source_ids)
271
+ selected_nodes.extend(selected_source_nodes)
272
+
273
+ relations = {
274
+ adapter.Relation.create_from(adapter.config, node)
275
+ for node in selected_nodes
276
+ }
277
+
278
+ # This generates the catalog as an agate.Table
279
+ catalogable_nodes = chain(
280
+ [
281
+ node
282
+ for node in self.manifest.nodes.values()
283
+ if (node.is_relational and not node.is_ephemeral_model)
284
+ ],
285
+ self.manifest.sources.values(),
286
+ )
287
+ used_schemas = self.manifest.get_used_schemas()
288
+ catalog_table, exceptions = adapter.get_filtered_catalog(
289
+ catalogable_nodes, used_schemas, relations
290
+ )
291
+
292
+ catalog_data: List[PrimitiveDict] = [
293
+ dict(zip(catalog_table.column_names, map(dbt.utils._coerce_decimal, row)))
294
+ for row in catalog_table
295
+ ]
296
+
297
+ catalog = Catalog(catalog_data)
298
+
299
+ errors: Optional[List[str]] = None
300
+ if exceptions:
301
+ errors = [str(e) for e in exceptions]
302
+
303
+ nodes, sources = catalog.make_unique_id_map(self.manifest, selected_node_ids)
304
+ results = self.get_catalog_results(
305
+ nodes=nodes,
306
+ sources=sources,
307
+ generated_at=datetime.now(timezone.utc).replace(tzinfo=None),
308
+ compile_results=compile_results,
309
+ errors=errors,
310
+ )
311
+
312
+ catalog_path = os.path.join(self.config.project_target_path, CATALOG_FILENAME)
313
+ results.write(catalog_path)
314
+ add_artifact_produced(catalog_path)
315
+ fire_event(
316
+ ArtifactWritten(artifact_type=results.__class__.__name__, artifact_path=catalog_path)
317
+ )
318
+
319
+ if self.args.compile:
320
+ write_manifest(self.manifest, self.config.project_target_path)
321
+
322
+ if self.args.static:
323
+
324
+ # Read manifest.json and catalog.json
325
+ read_manifest_data = load_file_contents(
326
+ os.path.join(self.config.project_target_path, MANIFEST_FILE_NAME)
327
+ )
328
+ read_catalog_data = load_file_contents(catalog_path)
329
+
330
+ # Create new static index file contents
331
+ index_data = load_file_contents(DOCS_INDEX_FILE_PATH)
332
+ index_data = index_data.replace('"MANIFEST.JSON INLINE DATA"', read_manifest_data)
333
+ index_data = index_data.replace('"CATALOG.JSON INLINE DATA"', read_catalog_data)
334
+
335
+ # Write out the new index file
336
+ static_index_path = os.path.join(self.config.project_target_path, "static_index.html")
337
+ with open(static_index_path, "wb") as static_index_file:
338
+ static_index_file.write(bytes(index_data, "utf8"))
339
+
340
+ if exceptions:
341
+ fire_event(WriteCatalogFailure(num_exceptions=len(exceptions)))
342
+ fire_event(CatalogWritten(path=os.path.abspath(catalog_path)))
343
+ return results
344
+
345
+ def get_node_selector(self) -> ResourceTypeSelector:
346
+ if self.manifest is None or self.graph is None:
347
+ raise DbtInternalError("manifest and graph must be set to perform node selection")
348
+ return ResourceTypeSelector(
349
+ graph=self.graph,
350
+ manifest=self.manifest,
351
+ previous_state=self.previous_state,
352
+ resource_types=EXECUTABLE_NODE_TYPES,
353
+ include_empty_nodes=True,
354
+ )
355
+
356
+ def get_catalog_results(
357
+ self,
358
+ nodes: Dict[str, CatalogTable],
359
+ sources: Dict[str, CatalogTable],
360
+ generated_at: datetime,
361
+ compile_results: Optional[Any],
362
+ errors: Optional[List[str]],
363
+ ) -> CatalogArtifact:
364
+ return CatalogArtifact.from_results(
365
+ generated_at=generated_at,
366
+ nodes=nodes,
367
+ sources=sources,
368
+ compile_results=compile_results,
369
+ errors=errors,
370
+ )
371
+
372
+ @classmethod
373
+ def interpret_results(self, results: Optional[CatalogResults]) -> bool:
374
+ if results is None:
375
+ return False
376
+ if results.errors:
377
+ return False
378
+ compile_results = results._compile_results
379
+ if compile_results is None:
380
+ return True
381
+
382
+ return super().interpret_results(compile_results)
383
+
384
+ @staticmethod
385
+ def _get_nodes_from_ids(manifest: Manifest, node_ids: Iterable[str]) -> List[ResultNode]:
386
+ selected: List[ResultNode] = []
387
+ for unique_id in node_ids:
388
+ if unique_id in manifest.nodes:
389
+ node = manifest.nodes[unique_id]
390
+ if node.is_relational and not node.is_ephemeral_model:
391
+ selected.append(node)
392
+ elif unique_id in manifest.sources:
393
+ source = manifest.sources[unique_id]
394
+ selected.append(source)
395
+ return selected
396
+
397
+ def _get_selected_source_ids(self) -> Set[UniqueId]:
398
+ if self.manifest is None or self.graph is None:
399
+ raise DbtInternalError("manifest and graph must be set to perform node selection")
400
+
401
+ source_selector = ResourceTypeSelector(
402
+ graph=self.graph,
403
+ manifest=self.manifest,
404
+ previous_state=self.previous_state,
405
+ resource_types=[NodeType.Source],
406
+ )
407
+
408
+ return source_selector.get_graph_queue(self.get_selection_spec()).get_selected_nodes()