dvt-core 1.11.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dvt-core might be problematic. Click here for more details.

Files changed (261) hide show
  1. dvt/__init__.py +7 -0
  2. dvt/_pydantic_shim.py +26 -0
  3. dvt/adapters/__init__.py +16 -0
  4. dvt/adapters/multi_adapter_manager.py +268 -0
  5. dvt/artifacts/__init__.py +0 -0
  6. dvt/artifacts/exceptions/__init__.py +1 -0
  7. dvt/artifacts/exceptions/schemas.py +31 -0
  8. dvt/artifacts/resources/__init__.py +116 -0
  9. dvt/artifacts/resources/base.py +68 -0
  10. dvt/artifacts/resources/types.py +93 -0
  11. dvt/artifacts/resources/v1/analysis.py +10 -0
  12. dvt/artifacts/resources/v1/catalog.py +23 -0
  13. dvt/artifacts/resources/v1/components.py +275 -0
  14. dvt/artifacts/resources/v1/config.py +282 -0
  15. dvt/artifacts/resources/v1/documentation.py +11 -0
  16. dvt/artifacts/resources/v1/exposure.py +52 -0
  17. dvt/artifacts/resources/v1/function.py +53 -0
  18. dvt/artifacts/resources/v1/generic_test.py +32 -0
  19. dvt/artifacts/resources/v1/group.py +22 -0
  20. dvt/artifacts/resources/v1/hook.py +11 -0
  21. dvt/artifacts/resources/v1/macro.py +30 -0
  22. dvt/artifacts/resources/v1/metric.py +173 -0
  23. dvt/artifacts/resources/v1/model.py +146 -0
  24. dvt/artifacts/resources/v1/owner.py +10 -0
  25. dvt/artifacts/resources/v1/saved_query.py +112 -0
  26. dvt/artifacts/resources/v1/seed.py +42 -0
  27. dvt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  28. dvt/artifacts/resources/v1/semantic_model.py +315 -0
  29. dvt/artifacts/resources/v1/singular_test.py +14 -0
  30. dvt/artifacts/resources/v1/snapshot.py +92 -0
  31. dvt/artifacts/resources/v1/source_definition.py +85 -0
  32. dvt/artifacts/resources/v1/sql_operation.py +10 -0
  33. dvt/artifacts/resources/v1/unit_test_definition.py +78 -0
  34. dvt/artifacts/schemas/__init__.py +0 -0
  35. dvt/artifacts/schemas/base.py +191 -0
  36. dvt/artifacts/schemas/batch_results.py +24 -0
  37. dvt/artifacts/schemas/catalog/__init__.py +12 -0
  38. dvt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  39. dvt/artifacts/schemas/catalog/v1/catalog.py +60 -0
  40. dvt/artifacts/schemas/freshness/__init__.py +1 -0
  41. dvt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  42. dvt/artifacts/schemas/freshness/v3/freshness.py +159 -0
  43. dvt/artifacts/schemas/manifest/__init__.py +2 -0
  44. dvt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  45. dvt/artifacts/schemas/manifest/v12/manifest.py +212 -0
  46. dvt/artifacts/schemas/results.py +148 -0
  47. dvt/artifacts/schemas/run/__init__.py +2 -0
  48. dvt/artifacts/schemas/run/v5/__init__.py +0 -0
  49. dvt/artifacts/schemas/run/v5/run.py +184 -0
  50. dvt/artifacts/schemas/upgrades/__init__.py +4 -0
  51. dvt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  52. dvt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  53. dvt/artifacts/utils/validation.py +153 -0
  54. dvt/cli/__init__.py +1 -0
  55. dvt/cli/context.py +16 -0
  56. dvt/cli/exceptions.py +56 -0
  57. dvt/cli/flags.py +558 -0
  58. dvt/cli/main.py +971 -0
  59. dvt/cli/option_types.py +121 -0
  60. dvt/cli/options.py +79 -0
  61. dvt/cli/params.py +803 -0
  62. dvt/cli/requires.py +478 -0
  63. dvt/cli/resolvers.py +32 -0
  64. dvt/cli/types.py +40 -0
  65. dvt/clients/__init__.py +0 -0
  66. dvt/clients/checked_load.py +82 -0
  67. dvt/clients/git.py +164 -0
  68. dvt/clients/jinja.py +206 -0
  69. dvt/clients/jinja_static.py +245 -0
  70. dvt/clients/registry.py +192 -0
  71. dvt/clients/yaml_helper.py +68 -0
  72. dvt/compilation.py +833 -0
  73. dvt/compute/__init__.py +26 -0
  74. dvt/compute/base.py +288 -0
  75. dvt/compute/engines/__init__.py +13 -0
  76. dvt/compute/engines/duckdb_engine.py +368 -0
  77. dvt/compute/engines/spark_engine.py +273 -0
  78. dvt/compute/query_analyzer.py +212 -0
  79. dvt/compute/router.py +483 -0
  80. dvt/config/__init__.py +4 -0
  81. dvt/config/catalogs.py +95 -0
  82. dvt/config/compute_config.py +406 -0
  83. dvt/config/profile.py +411 -0
  84. dvt/config/profiles_v2.py +464 -0
  85. dvt/config/project.py +893 -0
  86. dvt/config/renderer.py +232 -0
  87. dvt/config/runtime.py +491 -0
  88. dvt/config/selectors.py +209 -0
  89. dvt/config/utils.py +78 -0
  90. dvt/connectors/.gitignore +6 -0
  91. dvt/connectors/README.md +306 -0
  92. dvt/connectors/catalog.yml +217 -0
  93. dvt/connectors/download_connectors.py +300 -0
  94. dvt/constants.py +29 -0
  95. dvt/context/__init__.py +0 -0
  96. dvt/context/base.py +746 -0
  97. dvt/context/configured.py +136 -0
  98. dvt/context/context_config.py +350 -0
  99. dvt/context/docs.py +82 -0
  100. dvt/context/exceptions_jinja.py +179 -0
  101. dvt/context/macro_resolver.py +195 -0
  102. dvt/context/macros.py +171 -0
  103. dvt/context/manifest.py +73 -0
  104. dvt/context/providers.py +2198 -0
  105. dvt/context/query_header.py +14 -0
  106. dvt/context/secret.py +59 -0
  107. dvt/context/target.py +74 -0
  108. dvt/contracts/__init__.py +0 -0
  109. dvt/contracts/files.py +413 -0
  110. dvt/contracts/graph/__init__.py +0 -0
  111. dvt/contracts/graph/manifest.py +1904 -0
  112. dvt/contracts/graph/metrics.py +98 -0
  113. dvt/contracts/graph/model_config.py +71 -0
  114. dvt/contracts/graph/node_args.py +42 -0
  115. dvt/contracts/graph/nodes.py +1806 -0
  116. dvt/contracts/graph/semantic_manifest.py +233 -0
  117. dvt/contracts/graph/unparsed.py +812 -0
  118. dvt/contracts/project.py +417 -0
  119. dvt/contracts/results.py +53 -0
  120. dvt/contracts/selection.py +23 -0
  121. dvt/contracts/sql.py +86 -0
  122. dvt/contracts/state.py +69 -0
  123. dvt/contracts/util.py +46 -0
  124. dvt/deprecations.py +347 -0
  125. dvt/deps/__init__.py +0 -0
  126. dvt/deps/base.py +153 -0
  127. dvt/deps/git.py +196 -0
  128. dvt/deps/local.py +80 -0
  129. dvt/deps/registry.py +131 -0
  130. dvt/deps/resolver.py +149 -0
  131. dvt/deps/tarball.py +121 -0
  132. dvt/docs/source/_ext/dbt_click.py +118 -0
  133. dvt/docs/source/conf.py +32 -0
  134. dvt/env_vars.py +64 -0
  135. dvt/event_time/event_time.py +40 -0
  136. dvt/event_time/sample_window.py +60 -0
  137. dvt/events/__init__.py +16 -0
  138. dvt/events/base_types.py +37 -0
  139. dvt/events/core_types_pb2.py +2 -0
  140. dvt/events/logging.py +109 -0
  141. dvt/events/types.py +2534 -0
  142. dvt/exceptions.py +1487 -0
  143. dvt/flags.py +89 -0
  144. dvt/graph/__init__.py +11 -0
  145. dvt/graph/cli.py +248 -0
  146. dvt/graph/graph.py +172 -0
  147. dvt/graph/queue.py +213 -0
  148. dvt/graph/selector.py +375 -0
  149. dvt/graph/selector_methods.py +976 -0
  150. dvt/graph/selector_spec.py +223 -0
  151. dvt/graph/thread_pool.py +18 -0
  152. dvt/hooks.py +21 -0
  153. dvt/include/README.md +49 -0
  154. dvt/include/__init__.py +3 -0
  155. dvt/include/global_project.py +4 -0
  156. dvt/include/starter_project/.gitignore +4 -0
  157. dvt/include/starter_project/README.md +15 -0
  158. dvt/include/starter_project/__init__.py +3 -0
  159. dvt/include/starter_project/analyses/.gitkeep +0 -0
  160. dvt/include/starter_project/dvt_project.yml +36 -0
  161. dvt/include/starter_project/macros/.gitkeep +0 -0
  162. dvt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  163. dvt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  164. dvt/include/starter_project/models/example/schema.yml +21 -0
  165. dvt/include/starter_project/seeds/.gitkeep +0 -0
  166. dvt/include/starter_project/snapshots/.gitkeep +0 -0
  167. dvt/include/starter_project/tests/.gitkeep +0 -0
  168. dvt/internal_deprecations.py +27 -0
  169. dvt/jsonschemas/__init__.py +3 -0
  170. dvt/jsonschemas/jsonschemas.py +309 -0
  171. dvt/jsonschemas/project/0.0.110.json +4717 -0
  172. dvt/jsonschemas/project/0.0.85.json +2015 -0
  173. dvt/jsonschemas/resources/0.0.110.json +2636 -0
  174. dvt/jsonschemas/resources/0.0.85.json +2536 -0
  175. dvt/jsonschemas/resources/latest.json +6773 -0
  176. dvt/links.py +4 -0
  177. dvt/materializations/__init__.py +0 -0
  178. dvt/materializations/incremental/__init__.py +0 -0
  179. dvt/materializations/incremental/microbatch.py +235 -0
  180. dvt/mp_context.py +8 -0
  181. dvt/node_types.py +37 -0
  182. dvt/parser/__init__.py +23 -0
  183. dvt/parser/analysis.py +21 -0
  184. dvt/parser/base.py +549 -0
  185. dvt/parser/common.py +267 -0
  186. dvt/parser/docs.py +52 -0
  187. dvt/parser/fixtures.py +51 -0
  188. dvt/parser/functions.py +30 -0
  189. dvt/parser/generic_test.py +100 -0
  190. dvt/parser/generic_test_builders.py +334 -0
  191. dvt/parser/hooks.py +119 -0
  192. dvt/parser/macros.py +137 -0
  193. dvt/parser/manifest.py +2204 -0
  194. dvt/parser/models.py +574 -0
  195. dvt/parser/partial.py +1179 -0
  196. dvt/parser/read_files.py +445 -0
  197. dvt/parser/schema_generic_tests.py +423 -0
  198. dvt/parser/schema_renderer.py +111 -0
  199. dvt/parser/schema_yaml_readers.py +936 -0
  200. dvt/parser/schemas.py +1467 -0
  201. dvt/parser/search.py +149 -0
  202. dvt/parser/seeds.py +28 -0
  203. dvt/parser/singular_test.py +20 -0
  204. dvt/parser/snapshots.py +44 -0
  205. dvt/parser/sources.py +557 -0
  206. dvt/parser/sql.py +63 -0
  207. dvt/parser/unit_tests.py +622 -0
  208. dvt/plugins/__init__.py +20 -0
  209. dvt/plugins/contracts.py +10 -0
  210. dvt/plugins/exceptions.py +2 -0
  211. dvt/plugins/manager.py +164 -0
  212. dvt/plugins/manifest.py +21 -0
  213. dvt/profiler.py +20 -0
  214. dvt/py.typed +1 -0
  215. dvt/runners/__init__.py +2 -0
  216. dvt/runners/exposure_runner.py +7 -0
  217. dvt/runners/no_op_runner.py +46 -0
  218. dvt/runners/saved_query_runner.py +7 -0
  219. dvt/selected_resources.py +8 -0
  220. dvt/task/__init__.py +0 -0
  221. dvt/task/base.py +504 -0
  222. dvt/task/build.py +197 -0
  223. dvt/task/clean.py +57 -0
  224. dvt/task/clone.py +162 -0
  225. dvt/task/compile.py +151 -0
  226. dvt/task/compute.py +366 -0
  227. dvt/task/debug.py +650 -0
  228. dvt/task/deps.py +280 -0
  229. dvt/task/docs/__init__.py +3 -0
  230. dvt/task/docs/generate.py +408 -0
  231. dvt/task/docs/index.html +250 -0
  232. dvt/task/docs/serve.py +28 -0
  233. dvt/task/freshness.py +323 -0
  234. dvt/task/function.py +122 -0
  235. dvt/task/group_lookup.py +46 -0
  236. dvt/task/init.py +374 -0
  237. dvt/task/list.py +237 -0
  238. dvt/task/printer.py +176 -0
  239. dvt/task/profiles.py +256 -0
  240. dvt/task/retry.py +175 -0
  241. dvt/task/run.py +1146 -0
  242. dvt/task/run_operation.py +142 -0
  243. dvt/task/runnable.py +802 -0
  244. dvt/task/seed.py +104 -0
  245. dvt/task/show.py +150 -0
  246. dvt/task/snapshot.py +57 -0
  247. dvt/task/sql.py +111 -0
  248. dvt/task/test.py +464 -0
  249. dvt/tests/fixtures/__init__.py +1 -0
  250. dvt/tests/fixtures/project.py +620 -0
  251. dvt/tests/util.py +651 -0
  252. dvt/tracking.py +529 -0
  253. dvt/utils/__init__.py +3 -0
  254. dvt/utils/artifact_upload.py +151 -0
  255. dvt/utils/utils.py +408 -0
  256. dvt/version.py +249 -0
  257. dvt_core-1.11.0b4.dist-info/METADATA +252 -0
  258. dvt_core-1.11.0b4.dist-info/RECORD +261 -0
  259. dvt_core-1.11.0b4.dist-info/WHEEL +5 -0
  260. dvt_core-1.11.0b4.dist-info/entry_points.txt +2 -0
  261. dvt_core-1.11.0b4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,14 @@
1
+ from dvt.context.manifest import ManifestContext
2
+ from dvt.contracts.graph.manifest import Manifest
3
+
4
+ from dbt.adapters.contracts.connection import AdapterRequiredConfig
5
+
6
+
7
+ class QueryHeaderContext(ManifestContext):
8
+ def __init__(self, config: AdapterRequiredConfig, manifest: Manifest) -> None:
9
+ super().__init__(config, manifest, config.project_name)
10
+
11
+
12
+ def generate_query_header_context(config: AdapterRequiredConfig, manifest: Manifest):
13
+ ctx = QueryHeaderContext(config, manifest)
14
+ return ctx.to_dict()
dvt/context/secret.py ADDED
@@ -0,0 +1,59 @@
1
+ from typing import Any, Dict, Optional
2
+
3
+ from dvt.constants import DEFAULT_ENV_PLACEHOLDER, SECRET_PLACEHOLDER
4
+ from dvt.exceptions import EnvVarMissingError
5
+
6
+ from dbt_common.constants import SECRET_ENV_PREFIX
7
+ from dbt_common.context import get_invocation_context
8
+
9
+ from .base import BaseContext, contextmember
10
+
11
+
12
+ class SecretContext(BaseContext):
13
+ """This context is used in profiles.yml + packages.yml. It can render secret
14
+ env vars that aren't usable elsewhere"""
15
+
16
+ @contextmember()
17
+ def env_var(self, var: str, default: Optional[str] = None) -> str:
18
+ """The env_var() function. Return the environment variable named 'var'.
19
+ If there is no such environment variable set, return the default.
20
+
21
+ If the default is None, raise an exception for an undefined variable.
22
+
23
+ In this context *only*, env_var will accept env vars prefixed with DBT_ENV_SECRET_.
24
+ It will return the name of the secret env var, wrapped in 'start' and 'end' identifiers.
25
+ The actual value will be subbed in later in SecretRenderer.render_value()
26
+ """
27
+ return_value = None
28
+
29
+ # if this is a 'secret' env var, just return the name of the env var
30
+ # instead of rendering the actual value here, to avoid any risk of
31
+ # Jinja manipulation. it will be subbed out later, in SecretRenderer.render_value
32
+ env = get_invocation_context().env
33
+ if var in env and var.startswith(SECRET_ENV_PREFIX):
34
+ return SECRET_PLACEHOLDER.format(var)
35
+
36
+ if var in env:
37
+ return_value = env[var]
38
+ elif default is not None:
39
+ return_value = default
40
+
41
+ if return_value is not None:
42
+ # store env vars in the internal manifest to power partial parsing
43
+ # if it's a 'secret' env var, we shouldn't even get here
44
+ # but just to be safe, don't save secrets
45
+ if not var.startswith(SECRET_ENV_PREFIX):
46
+ # If the environment variable is set from a default, store a string indicating
47
+ # that so we can skip partial parsing. Otherwise the file will be scheduled for
48
+ # reparsing. If the default changes, the file will have been updated and therefore
49
+ # will be scheduled for reparsing anyways.
50
+ self.env_vars[var] = return_value if var in env else DEFAULT_ENV_PLACEHOLDER
51
+ return return_value
52
+ else:
53
+ raise EnvVarMissingError(var)
54
+
55
+
56
+ def generate_secret_context(cli_vars: Dict[str, Any]) -> Dict[str, Any]:
57
+ ctx = SecretContext(cli_vars)
58
+ # This is not a Mashumaro to_dict call
59
+ return ctx.to_dict()
dvt/context/target.py ADDED
@@ -0,0 +1,74 @@
1
+ from typing import Any, Dict
2
+
3
+ from dvt.context.base import BaseContext, contextproperty
4
+
5
+
6
+ class TargetContext(BaseContext):
7
+ # subclass is ConfiguredContext
8
+ def __init__(self, target_dict: Dict[str, Any], cli_vars: Dict[str, Any]):
9
+ super().__init__(cli_vars=cli_vars)
10
+ self.target_dict = target_dict
11
+
12
+ @contextproperty()
13
+ def target(self) -> Dict[str, Any]:
14
+ """`target` contains information about your connection to the warehouse
15
+ (specified in profiles.yml). Some configs are shared between all
16
+ adapters, while others are adapter-specific.
17
+
18
+ Common:
19
+
20
+ |----------|-----------|------------------------------------------|
21
+ | Variable | Example | Description |
22
+ |----------|-----------|------------------------------------------|
23
+ | name | dev | Name of the active target |
24
+ |----------|-----------|------------------------------------------|
25
+ | schema | dbt_alice | Name of the dbt schema (or, dataset on |
26
+ | | | BigQuery) |
27
+ |----------|-----------|------------------------------------------|
28
+ | type | postgres | The active adapter being used. |
29
+ |----------|-----------|------------------------------------------|
30
+ | threads | 4 | The number of threads in use by dbt |
31
+ |----------|-----------|------------------------------------------|
32
+
33
+ Snowflake:
34
+
35
+ |----------|-----------|------------------------------------------|
36
+ | Variable | Example | Description |
37
+ |----------|-----------|------------------------------------------|
38
+ | database | RAW | The active target's database. |
39
+ |----------|-----------|------------------------------------------|
40
+ | warehouse| TRANSFORM | The active target's warehouse. |
41
+ |----------|-----------|------------------------------------------|
42
+ | user | USERNAME | The active target's user |
43
+ |----------|-----------|------------------------------------------|
44
+ | role | ROLENAME | The active target's role |
45
+ |----------|-----------|------------------------------------------|
46
+ | account | abc123 | The active target's account |
47
+ |----------|-----------|------------------------------------------|
48
+
49
+ Postgres/Redshift:
50
+
51
+ |----------|-------------------|----------------------------------|
52
+ | Variable | Example | Description |
53
+ |----------|-------------------|----------------------------------|
54
+ | dbname | analytics | The active target's database. |
55
+ |----------|-------------------|----------------------------------|
56
+ | host | abc123.us-west-2. | The active target's host. |
57
+ | | redshift.amazonaws| |
58
+ | | .com | |
59
+ |----------|-------------------|----------------------------------|
60
+ | user | dbt_user | The active target's user |
61
+ |----------|-------------------|----------------------------------|
62
+ | port | 5439 | The active target's port |
63
+ |----------|-------------------|----------------------------------|
64
+
65
+ BigQuery:
66
+
67
+ |----------|-----------|------------------------------------------|
68
+ | Variable | Example | Description |
69
+ |----------|-----------|------------------------------------------|
70
+ | project | abc-123 | The active target's project. |
71
+ |----------|-----------|------------------------------------------|
72
+
73
+ """
74
+ return self.target_dict
File without changes
dvt/contracts/files.py ADDED
@@ -0,0 +1,413 @@
1
+ import os
2
+ from dataclasses import dataclass, field
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from dvt.artifacts.resources.base import FileHash
6
+ from dvt.constants import MAXIMUM_SEED_SIZE
7
+ from mashumaro.types import SerializableType
8
+
9
+ from dbt_common.dataclass_schema import StrEnum, dbtClassMixin
10
+
11
+ from .util import SourceKey
12
+
13
+
14
+ class ParseFileType(StrEnum):
15
+ Macro = "macro"
16
+ Model = "model"
17
+ Snapshot = "snapshot"
18
+ Analysis = "analysis"
19
+ SingularTest = "singular_test"
20
+ GenericTest = "generic_test"
21
+ Seed = "seed"
22
+ Documentation = "docs"
23
+ Schema = "schema"
24
+ Hook = "hook" # not a real filetype, from dbt_project.yml
25
+ Fixture = "fixture"
26
+ Function = "function"
27
+
28
+
29
+ parse_file_type_to_parser = {
30
+ ParseFileType.Macro: "MacroParser",
31
+ ParseFileType.Model: "ModelParser",
32
+ ParseFileType.Snapshot: "SnapshotParser",
33
+ ParseFileType.Analysis: "AnalysisParser",
34
+ ParseFileType.SingularTest: "SingularTestParser",
35
+ ParseFileType.GenericTest: "GenericTestParser",
36
+ ParseFileType.Seed: "SeedParser",
37
+ ParseFileType.Documentation: "DocumentationParser",
38
+ ParseFileType.Schema: "SchemaParser",
39
+ ParseFileType.Hook: "HookParser",
40
+ ParseFileType.Fixture: "FixtureParser",
41
+ ParseFileType.Function: "FunctionParser",
42
+ }
43
+
44
+
45
+ @dataclass
46
+ class FilePath(dbtClassMixin):
47
+ searched_path: str
48
+ relative_path: str
49
+ modification_time: float
50
+ project_root: str
51
+
52
+ @property
53
+ def search_key(self) -> str:
54
+ # TODO: should this be project name + path relative to project root?
55
+ return self.absolute_path
56
+
57
+ @property
58
+ def full_path(self) -> str:
59
+ # useful for symlink preservation
60
+ return os.path.join(self.project_root, self.searched_path, self.relative_path)
61
+
62
+ @property
63
+ def absolute_path(self) -> str:
64
+ return os.path.abspath(self.full_path)
65
+
66
+ @property
67
+ def original_file_path(self) -> str:
68
+ return os.path.join(self.searched_path, self.relative_path)
69
+
70
+ def seed_too_large(self) -> bool:
71
+ """Return whether the file this represents is over the seed size limit"""
72
+ return os.stat(self.full_path).st_size > MAXIMUM_SEED_SIZE
73
+
74
+
75
+ @dataclass
76
+ class RemoteFile(dbtClassMixin):
77
+ def __init__(self, language) -> None:
78
+ if language == "sql":
79
+ self.path_end = ".sql"
80
+ elif language == "python":
81
+ self.path_end = ".py"
82
+ else:
83
+ raise RuntimeError(f"Invalid language for remote File {language}")
84
+ self.path = f"from remote system{self.path_end}"
85
+
86
+ @property
87
+ def searched_path(self) -> str:
88
+ return self.path
89
+
90
+ @property
91
+ def relative_path(self) -> str:
92
+ return self.path
93
+
94
+ @property
95
+ def absolute_path(self) -> str:
96
+ return self.path
97
+
98
+ @property
99
+ def original_file_path(self):
100
+ return self.path
101
+
102
+ @property
103
+ def modification_time(self):
104
+ return self.path
105
+
106
+
107
+ @dataclass
108
+ class BaseSourceFile(dbtClassMixin, SerializableType):
109
+ """Define a source file in dbt"""
110
+
111
+ path: Union[FilePath, RemoteFile] # the path information
112
+ checksum: FileHash
113
+ # Seems like knowing which project the file came from would be useful
114
+ project_name: Optional[str] = None
115
+ # Parse file type: i.e. which parser will process this file
116
+ parse_file_type: Optional[ParseFileType] = None
117
+ # we don't want to serialize this
118
+ contents: Optional[str] = None
119
+
120
+ @property
121
+ def file_id(self):
122
+ if isinstance(self.path, RemoteFile):
123
+ return None
124
+ return f"{self.project_name}://{self.path.original_file_path}"
125
+
126
+ @property
127
+ def original_file_path(self):
128
+ return self.path.original_file_path
129
+
130
+ def _serialize(self):
131
+ dct = self.to_dict()
132
+ return dct
133
+
134
+ @classmethod
135
+ def _deserialize(cls, dct: Dict[str, int]):
136
+ if dct["parse_file_type"] == "schema":
137
+ sf = SchemaSourceFile.from_dict(dct)
138
+ elif dct["parse_file_type"] == "fixture":
139
+ sf = FixtureSourceFile.from_dict(dct)
140
+ else:
141
+ sf = SourceFile.from_dict(dct)
142
+ return sf
143
+
144
+ def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
145
+ dct = super().__post_serialize__(dct, context)
146
+ # remove empty lists to save space
147
+ dct_keys = list(dct.keys())
148
+ for key in dct_keys:
149
+ if isinstance(dct[key], list) and not dct[key]:
150
+ del dct[key]
151
+ # remove contents. Schema files will still have 'dict_from_yaml'
152
+ # from the contents
153
+ if "contents" in dct:
154
+ del dct["contents"]
155
+ return dct
156
+
157
+
158
+ @dataclass
159
+ class SourceFile(BaseSourceFile):
160
+ nodes: List[str] = field(default_factory=list)
161
+ docs: List[str] = field(default_factory=list)
162
+ macros: List[str] = field(default_factory=list)
163
+ env_vars: List[str] = field(default_factory=list)
164
+
165
+ @classmethod
166
+ def big_seed(cls, path: FilePath) -> "SourceFile":
167
+ """Parse seeds over the size limit with just the path"""
168
+ self = cls(path=path, checksum=FileHash.path(path.original_file_path))
169
+ self.contents = ""
170
+ return self
171
+
172
+ def add_node(self, value):
173
+ if value not in self.nodes:
174
+ self.nodes.append(value)
175
+
176
+ # TODO: do this a different way. This remote file kludge isn't going
177
+ # to work long term
178
+ @classmethod
179
+ def remote(cls, contents: str, project_name: str, language: str) -> "SourceFile":
180
+ self = cls(
181
+ path=RemoteFile(language),
182
+ checksum=FileHash.from_contents(contents),
183
+ project_name=project_name,
184
+ contents=contents,
185
+ )
186
+ return self
187
+
188
+
189
+ @dataclass
190
+ class SchemaSourceFile(BaseSourceFile):
191
+ dfy: Dict[str, Any] = field(default_factory=dict)
192
+ # these are in the manifest.nodes dictionary
193
+ data_tests: Dict[str, Any] = field(default_factory=dict)
194
+ sources: List[str] = field(default_factory=list)
195
+ exposures: List[str] = field(default_factory=list)
196
+ functions: List[str] = field(default_factory=list)
197
+ metrics: List[str] = field(default_factory=list)
198
+ snapshots: List[str] = field(default_factory=list)
199
+ # The following field will no longer be used. Leaving
200
+ # here to avoid breaking existing projects. To be removed
201
+ # later if possible.
202
+ generated_metrics: List[str] = field(default_factory=list)
203
+ # metrics generated from semantic_model measures. The key is
204
+ # the name of the semantic_model, so that we can find it later.
205
+ metrics_from_measures: Dict[str, Any] = field(default_factory=dict)
206
+ groups: List[str] = field(default_factory=list)
207
+ # node patches contain models, seeds, snapshots, analyses
208
+ ndp: List[str] = field(default_factory=list)
209
+ semantic_models: List[str] = field(default_factory=list)
210
+ unit_tests: List[str] = field(default_factory=list)
211
+ saved_queries: List[str] = field(default_factory=list)
212
+ # any macro patches in this file by macro unique_id.
213
+ mcp: Dict[str, str] = field(default_factory=dict)
214
+ # any source patches in this file. The entries are package, name pairs
215
+ # Patches are only against external sources. Sources can be
216
+ # created too, but those are in 'sources'
217
+ sop: List[SourceKey] = field(default_factory=list)
218
+ env_vars: Dict[str, Any] = field(default_factory=dict)
219
+ unrendered_configs: Dict[str, Any] = field(default_factory=dict)
220
+ unrendered_databases: Dict[str, Any] = field(default_factory=dict)
221
+ unrendered_schemas: Dict[str, Any] = field(default_factory=dict)
222
+ pp_dict: Optional[Dict[str, Any]] = None
223
+ pp_test_index: Optional[Dict[str, Any]] = None
224
+
225
+ @property
226
+ def dict_from_yaml(self):
227
+ return self.dfy
228
+
229
+ @property
230
+ def node_patches(self):
231
+ return self.ndp
232
+
233
+ @property
234
+ def macro_patches(self):
235
+ return self.mcp
236
+
237
+ @property
238
+ def source_patches(self):
239
+ return self.sop
240
+
241
+ def __post_serialize__(self, dct: Dict, context: Optional[Dict] = None):
242
+ dct = super().__post_serialize__(dct, context)
243
+ # Remove partial parsing specific data
244
+ for key in ("pp_test_index", "pp_dict"):
245
+ if key in dct:
246
+ del dct[key]
247
+ return dct
248
+
249
+ def append_patch(self, yaml_key, unique_id):
250
+ self.node_patches.append(unique_id)
251
+
252
+ def add_test(self, node_unique_id, test_from):
253
+ name = test_from["name"]
254
+ key = test_from["key"]
255
+ if key not in self.data_tests:
256
+ self.data_tests[key] = {}
257
+ if name not in self.data_tests[key]:
258
+ self.data_tests[key][name] = []
259
+ self.data_tests[key][name].append(node_unique_id)
260
+
261
+ # this is only used in tests/unit
262
+ def remove_tests(self, yaml_key, name):
263
+ if yaml_key in self.data_tests:
264
+ if name in self.data_tests[yaml_key]:
265
+ del self.data_tests[yaml_key][name]
266
+
267
+ # this is only used in the tests directory (unit + functional)
268
+ def get_tests(self, yaml_key, name):
269
+ if yaml_key in self.data_tests:
270
+ if name in self.data_tests[yaml_key]:
271
+ return self.data_tests[yaml_key][name]
272
+ return []
273
+
274
+ def add_metrics_from_measures(self, semantic_model_name: str, metric_unique_id: str):
275
+ if self.generated_metrics:
276
+ # Probably not needed, but for safety sake, convert the
277
+ # old generated_metrics to metrics_from_measures.
278
+ self.fix_metrics_from_measures()
279
+ if semantic_model_name not in self.metrics_from_measures:
280
+ self.metrics_from_measures[semantic_model_name] = []
281
+ self.metrics_from_measures[semantic_model_name].append(metric_unique_id)
282
+
283
+ def fix_metrics_from_measures(self):
284
+ # Temporary method to fix up existing projects with a partial parse file.
285
+ # This should only be called if SchemaSourceFile in a msgpack
286
+ # pack manifest has an existing "generated_metrics" list, to turn it
287
+ # it into a "metrics_from_measures" dictionary, so that we can
288
+ # correctly partially parse.
289
+ # This code can be removed when "generated_metrics" is removed.
290
+ generated_metrics = self.generated_metrics
291
+ self.generated_metrics = [] # Should never be needed again
292
+ # For each metric_unique_id we loop through the semantic models
293
+ # looking for the name of the "measure" which generated the metric.
294
+ # When it's found, add it to "metrics_from_measures", with a key
295
+ # of the semantic_model name, and a list of metrics.
296
+ for metric_unique_id in generated_metrics:
297
+ parts = metric_unique_id.split(".")
298
+ # get the metric_name
299
+ metric_name = parts[-1]
300
+ if "semantic_models" in self.dict_from_yaml:
301
+ for sem_model in self.dict_from_yaml["semantic_models"]:
302
+ if "measures" in sem_model:
303
+ for measure in sem_model["measures"]:
304
+ if measure["name"] == metric_name:
305
+ self.add_metrics_from_measures(sem_model["name"], metric_unique_id)
306
+ break
307
+
308
+ def get_key_and_name_for_test(self, test_unique_id):
309
+ yaml_key = None
310
+ block_name = None
311
+ for key in self.data_tests.keys():
312
+ for name in self.data_tests[key]:
313
+ for unique_id in self.data_tests[key][name]:
314
+ if unique_id == test_unique_id:
315
+ yaml_key = key
316
+ block_name = name
317
+ break
318
+ return (yaml_key, block_name)
319
+
320
+ def get_all_test_ids(self):
321
+ test_ids = []
322
+ for key in self.data_tests.keys():
323
+ for name in self.data_tests[key]:
324
+ test_ids.extend(self.data_tests[key][name])
325
+ return test_ids
326
+
327
+ def add_unrendered_config(self, unrendered_config, yaml_key, name, version=None):
328
+ versioned_name = f"{name}_v{version}" if version is not None else name
329
+
330
+ if yaml_key not in self.unrendered_configs:
331
+ self.unrendered_configs[yaml_key] = {}
332
+
333
+ if versioned_name not in self.unrendered_configs[yaml_key]:
334
+ self.unrendered_configs[yaml_key][versioned_name] = unrendered_config
335
+
336
+ def get_unrendered_config(self, yaml_key, name, version=None) -> Optional[Dict[str, Any]]:
337
+ versioned_name = f"{name}_v{version}" if version is not None else name
338
+
339
+ if yaml_key not in self.unrendered_configs:
340
+ return None
341
+ if versioned_name not in self.unrendered_configs[yaml_key]:
342
+ return None
343
+
344
+ return self.unrendered_configs[yaml_key][versioned_name]
345
+
346
+ def delete_from_unrendered_configs(self, yaml_key, name):
347
+ # We delete all unrendered_configs for this yaml_key/name because the
348
+ # entry has been scheduled for reparsing.
349
+ if self.get_unrendered_config(yaml_key, name):
350
+ del self.unrendered_configs[yaml_key][name]
351
+ # Delete all versioned keys associated with name
352
+ version_names_to_delete = []
353
+ for potential_version_name in self.unrendered_configs[yaml_key]:
354
+ if potential_version_name.startswith(f"{name}_v"):
355
+ version_names_to_delete.append(potential_version_name)
356
+ for version_name in version_names_to_delete:
357
+ del self.unrendered_configs[yaml_key][version_name]
358
+
359
+ if not self.unrendered_configs[yaml_key]:
360
+ del self.unrendered_configs[yaml_key]
361
+
362
+ def add_env_var(self, var, yaml_key, name):
363
+ if yaml_key not in self.env_vars:
364
+ self.env_vars[yaml_key] = {}
365
+ if name not in self.env_vars[yaml_key]:
366
+ self.env_vars[yaml_key][name] = []
367
+ if var not in self.env_vars[yaml_key][name]:
368
+ self.env_vars[yaml_key][name].append(var)
369
+
370
+ def delete_from_env_vars(self, yaml_key, name):
371
+ # We delete all vars for this yaml_key/name because the
372
+ # entry has been scheduled for reparsing.
373
+ if yaml_key in self.env_vars and name in self.env_vars[yaml_key]:
374
+ del self.env_vars[yaml_key][name]
375
+ if not self.env_vars[yaml_key]:
376
+ del self.env_vars[yaml_key]
377
+
378
+ def add_unrendered_database(self, yaml_key: str, name: str, unrendered_database: str) -> None:
379
+ if yaml_key not in self.unrendered_databases:
380
+ self.unrendered_databases[yaml_key] = {}
381
+
382
+ self.unrendered_databases[yaml_key][name] = unrendered_database
383
+
384
+ def get_unrendered_database(self, yaml_key: str, name: str) -> Optional[str]:
385
+ if yaml_key not in self.unrendered_databases:
386
+ return None
387
+
388
+ return self.unrendered_databases[yaml_key].get(name)
389
+
390
+ def add_unrendered_schema(self, yaml_key: str, name: str, unrendered_schema: str) -> None:
391
+ if yaml_key not in self.unrendered_schemas:
392
+ self.unrendered_schemas[yaml_key] = {}
393
+
394
+ self.unrendered_schemas[yaml_key][name] = unrendered_schema
395
+
396
+ def get_unrendered_schema(self, yaml_key: str, name: str) -> Optional[str]:
397
+ if yaml_key not in self.unrendered_schemas:
398
+ return None
399
+
400
+ return self.unrendered_schemas[yaml_key].get(name)
401
+
402
+
403
+ @dataclass
404
+ class FixtureSourceFile(BaseSourceFile):
405
+ fixture: Optional[str] = None
406
+ unit_tests: List[str] = field(default_factory=list)
407
+
408
+ def add_unit_test(self, value):
409
+ if value not in self.unit_tests:
410
+ self.unit_tests.append(value)
411
+
412
+
413
+ AnySourceFile = Union[SchemaSourceFile, SourceFile, FixtureSourceFile]
File without changes