dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2403 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
  74. dbt/compute/engines/spark_engine.py +642 -0
  75. dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
  76. dbt/compute/federated_executor.py +1080 -0
  77. dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
  78. dbt/compute/filter_pushdown.py +273 -0
  79. dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
  80. dbt/compute/jar_provisioning.py +255 -0
  81. dbt/compute/java_compat.cpython-311-darwin.so +0 -0
  82. dbt/compute/java_compat.py +689 -0
  83. dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
  84. dbt/compute/jdbc_utils.py +678 -0
  85. dbt/compute/metadata/__init__.py +40 -0
  86. dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
  87. dbt/compute/metadata/adapters_registry.py +370 -0
  88. dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
  89. dbt/compute/metadata/registry.py +674 -0
  90. dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
  91. dbt/compute/metadata/store.py +1499 -0
  92. dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
  93. dbt/compute/smart_selector.py +377 -0
  94. dbt/compute/strategies/__init__.py +55 -0
  95. dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
  96. dbt/compute/strategies/base.py +165 -0
  97. dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
  98. dbt/compute/strategies/dataproc.py +207 -0
  99. dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
  100. dbt/compute/strategies/emr.py +203 -0
  101. dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
  102. dbt/compute/strategies/local.py +443 -0
  103. dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
  104. dbt/compute/strategies/standalone.py +262 -0
  105. dbt/config/__init__.py +4 -0
  106. dbt/config/catalogs.py +94 -0
  107. dbt/config/compute.cpython-311-darwin.so +0 -0
  108. dbt/config/compute.py +513 -0
  109. dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
  110. dbt/config/dvt_profile.py +342 -0
  111. dbt/config/profile.py +422 -0
  112. dbt/config/project.py +873 -0
  113. dbt/config/project_utils.py +28 -0
  114. dbt/config/renderer.py +231 -0
  115. dbt/config/runtime.py +553 -0
  116. dbt/config/selectors.py +208 -0
  117. dbt/config/utils.py +77 -0
  118. dbt/constants.py +28 -0
  119. dbt/context/__init__.py +0 -0
  120. dbt/context/base.py +745 -0
  121. dbt/context/configured.py +135 -0
  122. dbt/context/context_config.py +382 -0
  123. dbt/context/docs.py +82 -0
  124. dbt/context/exceptions_jinja.py +178 -0
  125. dbt/context/macro_resolver.py +195 -0
  126. dbt/context/macros.py +171 -0
  127. dbt/context/manifest.py +72 -0
  128. dbt/context/providers.py +2249 -0
  129. dbt/context/query_header.py +13 -0
  130. dbt/context/secret.py +58 -0
  131. dbt/context/target.py +74 -0
  132. dbt/contracts/__init__.py +0 -0
  133. dbt/contracts/files.py +413 -0
  134. dbt/contracts/graph/__init__.py +0 -0
  135. dbt/contracts/graph/manifest.py +1904 -0
  136. dbt/contracts/graph/metrics.py +97 -0
  137. dbt/contracts/graph/model_config.py +70 -0
  138. dbt/contracts/graph/node_args.py +42 -0
  139. dbt/contracts/graph/nodes.py +1806 -0
  140. dbt/contracts/graph/semantic_manifest.py +232 -0
  141. dbt/contracts/graph/unparsed.py +811 -0
  142. dbt/contracts/project.py +417 -0
  143. dbt/contracts/results.py +53 -0
  144. dbt/contracts/selection.py +23 -0
  145. dbt/contracts/sql.py +85 -0
  146. dbt/contracts/state.py +68 -0
  147. dbt/contracts/util.py +46 -0
  148. dbt/deprecations.py +348 -0
  149. dbt/deps/__init__.py +0 -0
  150. dbt/deps/base.py +152 -0
  151. dbt/deps/git.py +195 -0
  152. dbt/deps/local.py +79 -0
  153. dbt/deps/registry.py +130 -0
  154. dbt/deps/resolver.py +149 -0
  155. dbt/deps/tarball.py +120 -0
  156. dbt/docs/source/_ext/dbt_click.py +119 -0
  157. dbt/docs/source/conf.py +32 -0
  158. dbt/env_vars.py +64 -0
  159. dbt/event_time/event_time.py +40 -0
  160. dbt/event_time/sample_window.py +60 -0
  161. dbt/events/__init__.py +15 -0
  162. dbt/events/base_types.py +36 -0
  163. dbt/events/core_types_pb2.py +2 -0
  164. dbt/events/logging.py +108 -0
  165. dbt/events/types.py +2516 -0
  166. dbt/exceptions.py +1486 -0
  167. dbt/flags.py +89 -0
  168. dbt/graph/__init__.py +11 -0
  169. dbt/graph/cli.py +249 -0
  170. dbt/graph/graph.py +172 -0
  171. dbt/graph/queue.py +214 -0
  172. dbt/graph/selector.py +374 -0
  173. dbt/graph/selector_methods.py +975 -0
  174. dbt/graph/selector_spec.py +222 -0
  175. dbt/graph/thread_pool.py +18 -0
  176. dbt/hooks.py +21 -0
  177. dbt/include/README.md +49 -0
  178. dbt/include/__init__.py +3 -0
  179. dbt/include/data/adapters_registry.duckdb +0 -0
  180. dbt/include/data/build_registry.py +242 -0
  181. dbt/include/data/csv/adapter_queries.csv +33 -0
  182. dbt/include/data/csv/syntax_rules.csv +9 -0
  183. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  184. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  185. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  186. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  187. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  188. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  189. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  190. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  191. dbt/include/starter_project/.gitignore +4 -0
  192. dbt/include/starter_project/README.md +15 -0
  193. dbt/include/starter_project/__init__.py +3 -0
  194. dbt/include/starter_project/analyses/.gitkeep +0 -0
  195. dbt/include/starter_project/dbt_project.yml +36 -0
  196. dbt/include/starter_project/macros/.gitkeep +0 -0
  197. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  198. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  199. dbt/include/starter_project/models/example/schema.yml +21 -0
  200. dbt/include/starter_project/seeds/.gitkeep +0 -0
  201. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  202. dbt/include/starter_project/tests/.gitkeep +0 -0
  203. dbt/internal_deprecations.py +26 -0
  204. dbt/jsonschemas/__init__.py +3 -0
  205. dbt/jsonschemas/jsonschemas.py +309 -0
  206. dbt/jsonschemas/project/0.0.110.json +4717 -0
  207. dbt/jsonschemas/project/0.0.85.json +2015 -0
  208. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  209. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  210. dbt/jsonschemas/resources/latest.json +6773 -0
  211. dbt/links.py +4 -0
  212. dbt/materializations/__init__.py +0 -0
  213. dbt/materializations/incremental/__init__.py +0 -0
  214. dbt/materializations/incremental/microbatch.py +236 -0
  215. dbt/mp_context.py +8 -0
  216. dbt/node_types.py +37 -0
  217. dbt/parser/__init__.py +23 -0
  218. dbt/parser/analysis.py +21 -0
  219. dbt/parser/base.py +548 -0
  220. dbt/parser/common.py +266 -0
  221. dbt/parser/docs.py +52 -0
  222. dbt/parser/fixtures.py +51 -0
  223. dbt/parser/functions.py +30 -0
  224. dbt/parser/generic_test.py +100 -0
  225. dbt/parser/generic_test_builders.py +333 -0
  226. dbt/parser/hooks.py +118 -0
  227. dbt/parser/macros.py +137 -0
  228. dbt/parser/manifest.py +2204 -0
  229. dbt/parser/models.py +573 -0
  230. dbt/parser/partial.py +1178 -0
  231. dbt/parser/read_files.py +445 -0
  232. dbt/parser/schema_generic_tests.py +422 -0
  233. dbt/parser/schema_renderer.py +111 -0
  234. dbt/parser/schema_yaml_readers.py +935 -0
  235. dbt/parser/schemas.py +1466 -0
  236. dbt/parser/search.py +149 -0
  237. dbt/parser/seeds.py +28 -0
  238. dbt/parser/singular_test.py +20 -0
  239. dbt/parser/snapshots.py +44 -0
  240. dbt/parser/sources.py +558 -0
  241. dbt/parser/sql.py +62 -0
  242. dbt/parser/unit_tests.py +621 -0
  243. dbt/plugins/__init__.py +20 -0
  244. dbt/plugins/contracts.py +9 -0
  245. dbt/plugins/exceptions.py +2 -0
  246. dbt/plugins/manager.py +163 -0
  247. dbt/plugins/manifest.py +21 -0
  248. dbt/profiler.py +20 -0
  249. dbt/py.typed +1 -0
  250. dbt/query_analyzer.cpython-311-darwin.so +0 -0
  251. dbt/query_analyzer.py +410 -0
  252. dbt/runners/__init__.py +2 -0
  253. dbt/runners/exposure_runner.py +7 -0
  254. dbt/runners/no_op_runner.py +45 -0
  255. dbt/runners/saved_query_runner.py +7 -0
  256. dbt/selected_resources.py +8 -0
  257. dbt/task/__init__.py +0 -0
  258. dbt/task/base.py +503 -0
  259. dbt/task/build.py +197 -0
  260. dbt/task/clean.py +56 -0
  261. dbt/task/clone.py +161 -0
  262. dbt/task/compile.py +150 -0
  263. dbt/task/compute.cpython-311-darwin.so +0 -0
  264. dbt/task/compute.py +458 -0
  265. dbt/task/debug.py +505 -0
  266. dbt/task/deps.py +280 -0
  267. dbt/task/docs/__init__.py +3 -0
  268. dbt/task/docs/api/__init__.py +23 -0
  269. dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
  270. dbt/task/docs/api/catalog.py +204 -0
  271. dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
  272. dbt/task/docs/api/lineage.py +234 -0
  273. dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
  274. dbt/task/docs/api/profile.py +204 -0
  275. dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
  276. dbt/task/docs/api/spark.py +186 -0
  277. dbt/task/docs/generate.py +947 -0
  278. dbt/task/docs/index.html +250 -0
  279. dbt/task/docs/serve.cpython-311-darwin.so +0 -0
  280. dbt/task/docs/serve.py +174 -0
  281. dbt/task/dvt_output.py +362 -0
  282. dbt/task/dvt_run.py +204 -0
  283. dbt/task/freshness.py +322 -0
  284. dbt/task/function.py +121 -0
  285. dbt/task/group_lookup.py +46 -0
  286. dbt/task/init.cpython-311-darwin.so +0 -0
  287. dbt/task/init.py +604 -0
  288. dbt/task/java.cpython-311-darwin.so +0 -0
  289. dbt/task/java.py +316 -0
  290. dbt/task/list.py +236 -0
  291. dbt/task/metadata.cpython-311-darwin.so +0 -0
  292. dbt/task/metadata.py +804 -0
  293. dbt/task/printer.py +175 -0
  294. dbt/task/profile.cpython-311-darwin.so +0 -0
  295. dbt/task/profile.py +1307 -0
  296. dbt/task/profile_serve.py +615 -0
  297. dbt/task/retract.py +438 -0
  298. dbt/task/retry.py +175 -0
  299. dbt/task/run.py +1387 -0
  300. dbt/task/run_operation.py +141 -0
  301. dbt/task/runnable.py +758 -0
  302. dbt/task/seed.py +103 -0
  303. dbt/task/show.py +149 -0
  304. dbt/task/snapshot.py +56 -0
  305. dbt/task/spark.cpython-311-darwin.so +0 -0
  306. dbt/task/spark.py +414 -0
  307. dbt/task/sql.py +110 -0
  308. dbt/task/target_sync.cpython-311-darwin.so +0 -0
  309. dbt/task/target_sync.py +766 -0
  310. dbt/task/test.py +464 -0
  311. dbt/tests/fixtures/__init__.py +1 -0
  312. dbt/tests/fixtures/project.py +620 -0
  313. dbt/tests/util.py +651 -0
  314. dbt/tracking.py +529 -0
  315. dbt/utils/__init__.py +3 -0
  316. dbt/utils/artifact_upload.py +151 -0
  317. dbt/utils/utils.py +408 -0
  318. dbt/version.py +270 -0
  319. dvt_cli/__init__.py +72 -0
  320. dvt_core-0.58.6.dist-info/METADATA +288 -0
  321. dvt_core-0.58.6.dist-info/RECORD +324 -0
  322. dvt_core-0.58.6.dist-info/WHEEL +5 -0
  323. dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
  324. dvt_core-0.58.6.dist-info/top_level.txt +2 -0
@@ -0,0 +1,222 @@
1
+ import os
2
+ import re
3
+ from abc import ABCMeta, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union
6
+
7
+ from dbt.exceptions import InvalidSelectorError
8
+ from dbt.flags import get_flags
9
+ from dbt_common.dataclass_schema import StrEnum, dbtClassMixin
10
+ from dbt_common.exceptions import DbtRuntimeError
11
+
12
+ from .graph import UniqueId
13
+ from .selector_methods import MethodName
14
+
15
+ RAW_SELECTOR_PATTERN = re.compile(
16
+ r"\A"
17
+ r"(?P<childrens_parents>(\@))?"
18
+ r"(?P<parents>((?P<parents_depth>(\d*))\+))?"
19
+ r"((?P<method>([\w.]+)):)?(?P<value>(.*?))"
20
+ r"(?P<children>(\+(?P<children_depth>(\d*))))?"
21
+ r"\Z"
22
+ )
23
+ SELECTOR_METHOD_SEPARATOR = "."
24
+
25
+
26
+ class IndirectSelection(StrEnum):
27
+ Eager = "eager"
28
+ Cautious = "cautious"
29
+ Buildable = "buildable"
30
+ Empty = "empty"
31
+
32
+
33
+ def _probably_path(value: str):
34
+ """Decide if the value is probably a path. Windows has two path separators, so
35
+ we should check both sep ('\\') and altsep ('/') there.
36
+ """
37
+ if os.path.sep in value:
38
+ return True
39
+ elif os.path.altsep is not None and os.path.altsep in value:
40
+ return True
41
+ else:
42
+ return False
43
+
44
+
45
+ def _match_to_int(match: Dict[str, str], key: str) -> Optional[int]:
46
+ raw = match.get(key)
47
+ # turn the empty string into None, too.
48
+ if not raw:
49
+ return None
50
+ try:
51
+ return int(raw)
52
+ except ValueError as exc:
53
+ raise DbtRuntimeError(f"Invalid node spec - could not handle parent depth {raw}") from exc
54
+
55
+
56
+ SelectionSpec = Union[
57
+ "SelectionCriteria",
58
+ "SelectionIntersection",
59
+ "SelectionDifference",
60
+ "SelectionUnion",
61
+ ]
62
+
63
+
64
+ @dataclass
65
+ class SelectionCriteria:
66
+ raw: Any
67
+ method: MethodName
68
+ method_arguments: List[str]
69
+ value: Any
70
+ childrens_parents: bool
71
+ parents: bool
72
+ parents_depth: Optional[int]
73
+ children: bool
74
+ children_depth: Optional[int]
75
+ indirect_selection: IndirectSelection = IndirectSelection.Eager
76
+
77
+ def __post_init__(self):
78
+ if self.children and self.childrens_parents:
79
+ raise DbtRuntimeError(
80
+ f'Invalid node spec {self.raw} - "@" prefix and "+" suffix ' "are incompatible"
81
+ )
82
+
83
+ @classmethod
84
+ def default_method(cls, value: str) -> MethodName:
85
+ if _probably_path(value):
86
+ return MethodName.Path
87
+ elif value.lower().endswith((".sql", ".py", ".csv")):
88
+ return MethodName.File
89
+ else:
90
+ return MethodName.FQN
91
+
92
+ @classmethod
93
+ def parse_method(cls, groupdict: Dict[str, Any]) -> Tuple[MethodName, List[str]]:
94
+ raw_method = groupdict.get("method")
95
+ if raw_method is None:
96
+ return cls.default_method(groupdict["value"]), []
97
+
98
+ method_parts: List[str] = raw_method.split(SELECTOR_METHOD_SEPARATOR)
99
+ try:
100
+ method_name = MethodName(method_parts[0])
101
+ except ValueError as exc:
102
+ raise InvalidSelectorError(f"'{method_parts[0]}' is not a valid method name") from exc
103
+
104
+ # Following is for cases like config.severity and config.materialized
105
+ method_arguments: List[str] = method_parts[1:]
106
+
107
+ return method_name, method_arguments
108
+
109
+ @classmethod
110
+ def selection_criteria_from_dict(
111
+ cls,
112
+ raw: Any,
113
+ dct: Dict[str, Any],
114
+ ) -> "SelectionCriteria":
115
+ if "value" not in dct:
116
+ raise DbtRuntimeError(f'Invalid node spec "{raw}" - no search value!')
117
+ method_name, method_arguments = cls.parse_method(dct)
118
+
119
+ parents_depth = _match_to_int(dct, "parents_depth")
120
+ children_depth = _match_to_int(dct, "children_depth")
121
+
122
+ # If defined field in selector, override CLI flag
123
+ indirect_selection = IndirectSelection(
124
+ dct.get("indirect_selection", getattr(get_flags(), "INDIRECT_SELECTION", "eager"))
125
+ )
126
+
127
+ return cls(
128
+ raw=raw,
129
+ method=method_name,
130
+ method_arguments=method_arguments,
131
+ value=dct["value"],
132
+ childrens_parents=bool(dct.get("childrens_parents")),
133
+ parents=bool(dct.get("parents")),
134
+ parents_depth=parents_depth,
135
+ children=bool(dct.get("children")),
136
+ children_depth=children_depth,
137
+ indirect_selection=indirect_selection,
138
+ )
139
+
140
+ @classmethod
141
+ def dict_from_single_spec(cls, raw: str):
142
+ result = RAW_SELECTOR_PATTERN.match(raw)
143
+ if result is None:
144
+ return {"error": "Invalid selector spec"}
145
+ dct: Dict[str, Any] = result.groupdict()
146
+ method_name, method_arguments = cls.parse_method(dct)
147
+ meth_name = str(method_name)
148
+ if method_arguments:
149
+ meth_name += "." + ".".join(method_arguments)
150
+ dct["method"] = meth_name
151
+ dct = {k: v for k, v in dct.items() if (v is not None and v != "")}
152
+ if "childrens_parents" in dct:
153
+ dct["childrens_parents"] = bool(dct.get("childrens_parents"))
154
+ if "parents" in dct:
155
+ dct["parents"] = bool(dct.get("parents"))
156
+ if "children" in dct:
157
+ dct["children"] = bool(dct.get("children"))
158
+ return dct
159
+
160
+ @classmethod
161
+ def from_single_spec(cls, raw: str) -> "SelectionCriteria":
162
+ result = RAW_SELECTOR_PATTERN.match(raw)
163
+ if result is None:
164
+ # bad spec!
165
+ raise DbtRuntimeError(f'Invalid selector spec "{raw}"')
166
+
167
+ return cls.selection_criteria_from_dict(raw, result.groupdict())
168
+
169
+
170
+ class BaseSelectionGroup(dbtClassMixin, Iterable[SelectionSpec], metaclass=ABCMeta):
171
+ def __init__(
172
+ self,
173
+ components: Iterable[SelectionSpec],
174
+ indirect_selection: IndirectSelection = IndirectSelection.Eager,
175
+ expect_exists: bool = False,
176
+ raw: Any = None,
177
+ ) -> None:
178
+ self.components: List[SelectionSpec] = list(components)
179
+ self.expect_exists = expect_exists
180
+ self.raw = raw
181
+ self.indirect_selection = indirect_selection
182
+
183
+ def __iter__(self) -> Iterator[SelectionSpec]:
184
+ for component in self.components:
185
+ yield component
186
+
187
+ @abstractmethod
188
+ def combine_selections(
189
+ self,
190
+ selections: List[Set[UniqueId]],
191
+ ) -> Set[UniqueId]:
192
+ raise NotImplementedError("_combine_selections not implemented!")
193
+
194
+ def combined(self, selections: List[Set[UniqueId]]) -> Set[UniqueId]:
195
+ if not selections:
196
+ return set()
197
+
198
+ return self.combine_selections(selections)
199
+
200
+
201
+ class SelectionIntersection(BaseSelectionGroup):
202
+ def combine_selections(
203
+ self,
204
+ selections: List[Set[UniqueId]],
205
+ ) -> Set[UniqueId]:
206
+ return set.intersection(*selections)
207
+
208
+
209
+ class SelectionDifference(BaseSelectionGroup):
210
+ def combine_selections(
211
+ self,
212
+ selections: List[Set[UniqueId]],
213
+ ) -> Set[UniqueId]:
214
+ return set.difference(*selections)
215
+
216
+
217
+ class SelectionUnion(BaseSelectionGroup):
218
+ def combine_selections(
219
+ self,
220
+ selections: List[Set[UniqueId]],
221
+ ) -> Set[UniqueId]:
222
+ return set.union(*selections)
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from multiprocessing.pool import ThreadPool
4
+
5
+
6
+ class DbtThreadPool(ThreadPool):
7
+ """A ThreadPool that tracks whether or not it's been closed"""
8
+
9
+ def __init__(self, *args, **kwargs):
10
+ super().__init__(*args, **kwargs)
11
+ self.closed = False
12
+
13
+ def close(self):
14
+ self.closed = True
15
+ super().close()
16
+
17
+ def is_closed(self):
18
+ return self.closed
dbt/hooks.py ADDED
@@ -0,0 +1,21 @@
1
+ import json
2
+ from typing import Any, Dict, Union
3
+
4
+ from dbt_common.dataclass_schema import StrEnum
5
+
6
+
7
+ class ModelHookType(StrEnum):
8
+ PreHook = "pre-hook"
9
+ PostHook = "post-hook"
10
+
11
+
12
+ def get_hook_dict(source: Union[str, Dict[str, Any]]) -> Dict[str, Any]:
13
+ """From a source string-or-dict, get a dictionary that can be passed to
14
+ Hook.from_dict
15
+ """
16
+ if isinstance(source, dict):
17
+ return source
18
+ try:
19
+ return json.loads(source)
20
+ except ValueError:
21
+ return {"sql": source}
dbt/include/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # Include Module
2
+
3
+ The Include module is responsible for the starter project scaffold.
4
+
5
+ # Directories
6
+
7
+ ## `starter_project`
8
+ Produces the default project after running the `dbt init` command for the CLI. `dbt-cloud` initializes the project by using [dbt-starter-project](https://github.com/dbt-labs/dbt-starter-project).
9
+
10
+ # adapter.dispatch
11
+ Packages (e.g. `include` directories of adapters, any [hub](https://hub.getdbt.com/)-hosted package) can be interpreted as namespaces of functions a.k.a macros. In `dbt`'s macrospace, we take advantage of the multiple dispatch programming language concept. In short, multiple dispatch supports dynamic searching for a function across several namespaces—usually in a manually specified manner/order.
12
+
13
+ Adapters can have their own implementation of the same macro X. For example, a macro executed by `dbt-redshift` may need a specific implementation different from `dbt-snowflake`'s macro. We use multiple dispatch via `adapter.dispatch`, a Jinja function, which enables polymorphic macro invocations. The chosen implementation is selected according to what the `adapter` object is set to at runtime (it could be for redshift, postgres, and so on).
14
+
15
+ For more on this object, check out the dbt docs [here](https://docs.getdbt.com/reference/dbt-jinja-functions/adapter).
16
+
17
+ # dbt and database adapter python package interop
18
+
19
+ Let’s say we have a fictional python app named `dbt-core` with this structure
20
+
21
+ ```
22
+ dbt
23
+ ├── adapters
24
+ │   └── base.py
25
+ ├── cli.py
26
+ └── main.py
27
+ ```
28
+
29
+ `pip install dbt-core` will install this application in my python environment, maintaining the same structure. Note that `dbt.adapters` only contains a `base.py`. In this example, we can assume that base.py includes an abstract class for creating connections. Let’s say we wanted to create an postgres adapter that this app could use, and can be installed independently. We can create a python package with the following structure called `dbt-postgres`
30
+ ```
31
+ dbt
32
+ └── adapters
33
+ └── postgres
34
+ └── impl.py
35
+ ```
36
+
37
+ `pip install dbt-postgres` will install this package in the python environment, maintaining the same structure again. Let’s say `impl.py` imports `dbt.adapters.base` and implements a concrete class inheriting from the abstract class in `base.py` from the `dbt-core` package. Since our top level package is named the same in both packages, `pip` will put this in the same place. We end up with this installed in our python environment.
38
+
39
+ ```
40
+ dbt
41
+ ├── adapters
42
+ │   ├── base.py
43
+ │   └── postgres
44
+ │   └── impl.py
45
+ ├── cli.py
46
+ └── main.py
47
+ ```
48
+
49
+ `dbt.adapters` now has a postgres module that dbt can easily find and call directly. dbt and its adapters follows the same type of file structure convention. This is the magic that allows you to import `dbt.*` in database adapters, and using a factory pattern in dbt-core, we can create instances of concrete classes defined in the database adapter packages (for creating connections, defining database configuration, defining credentials, etc.)
@@ -0,0 +1,3 @@
1
+ from pkgutil import extend_path
2
+
3
+ __path__ = extend_path(__path__, __name__)
@@ -0,0 +1,242 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Build Script for DVT Adapters Registry
4
+
5
+ This script reads CSV files from the csv/ directory and creates
6
+ adapters_registry.duckdb with pre-populated type mappings, syntax rules,
7
+ and adapter queries.
8
+
9
+ Usage:
10
+ python build_registry.py
11
+
12
+ The resulting adapters_registry.duckdb is shipped with the DVT package.
13
+ """
14
+
15
+ import csv
16
+ import os
17
+ from pathlib import Path
18
+
19
+ try:
20
+ import duckdb
21
+ except ImportError:
22
+ print("Error: duckdb is required. Install with: pip install duckdb")
23
+ exit(1)
24
+
25
+
26
+ def get_script_dir() -> Path:
27
+ """Get directory containing this script."""
28
+ return Path(__file__).parent
29
+
30
+
31
+ def create_schema(conn: duckdb.DuckDBPyConnection) -> None:
32
+ """Create the database schema."""
33
+
34
+ # Table: datatype_mappings
35
+ conn.execute("""
36
+ CREATE TABLE IF NOT EXISTS datatype_mappings (
37
+ adapter_name VARCHAR NOT NULL,
38
+ adapter_type VARCHAR NOT NULL,
39
+ spark_type VARCHAR NOT NULL,
40
+ spark_version VARCHAR DEFAULT 'all',
41
+ is_complex BOOLEAN DEFAULT FALSE,
42
+ cast_expression VARCHAR,
43
+ notes VARCHAR,
44
+ UNIQUE (adapter_name, adapter_type, spark_version)
45
+ )
46
+ """)
47
+ conn.execute("""
48
+ CREATE INDEX IF NOT EXISTS idx_datatype_lookup
49
+ ON datatype_mappings(adapter_name, adapter_type)
50
+ """)
51
+
52
+ # Table: syntax_registry
53
+ conn.execute("""
54
+ CREATE TABLE IF NOT EXISTS syntax_registry (
55
+ adapter_name VARCHAR NOT NULL PRIMARY KEY,
56
+ quote_start VARCHAR NOT NULL,
57
+ quote_end VARCHAR NOT NULL,
58
+ case_sensitivity VARCHAR NOT NULL,
59
+ reserved_keywords VARCHAR
60
+ )
61
+ """)
62
+
63
+ # Table: adapter_queries
64
+ conn.execute("""
65
+ CREATE TABLE IF NOT EXISTS adapter_queries (
66
+ adapter_name VARCHAR NOT NULL,
67
+ query_type VARCHAR NOT NULL,
68
+ query_template VARCHAR NOT NULL,
69
+ notes VARCHAR,
70
+ PRIMARY KEY (adapter_name, query_type)
71
+ )
72
+ """)
73
+
74
+ print("Schema created successfully")
75
+
76
+
77
+ def load_type_mappings(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
78
+ """Load type mappings from CSV files."""
79
+ total_rows = 0
80
+
81
+ # Find all type_mappings_*.csv files
82
+ for csv_file in sorted(csv_dir.glob("type_mappings_*.csv")):
83
+ adapter_name = csv_file.stem.replace("type_mappings_", "")
84
+ rows_loaded = 0
85
+
86
+ with open(csv_file, 'r', encoding='utf-8') as f:
87
+ reader = csv.DictReader(f)
88
+ for row in reader:
89
+ conn.execute("""
90
+ INSERT OR REPLACE INTO datatype_mappings
91
+ (adapter_name, adapter_type, spark_type, spark_version,
92
+ is_complex, cast_expression, notes)
93
+ VALUES (?, ?, ?, ?, ?, ?, ?)
94
+ """, [
95
+ adapter_name,
96
+ row['adapter_type'].strip(),
97
+ row['spark_type'].strip(),
98
+ row.get('spark_version', 'all').strip() or 'all',
99
+ row.get('is_complex', 'false').strip().lower() == 'true',
100
+ row.get('cast_expression', '').strip() or None,
101
+ row.get('notes', '').strip() or None,
102
+ ])
103
+ rows_loaded += 1
104
+
105
+ print(f" Loaded {rows_loaded} type mappings for {adapter_name}")
106
+ total_rows += rows_loaded
107
+
108
+ return total_rows
109
+
110
+
111
+ def load_syntax_rules(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
112
+ """Load syntax rules from CSV file."""
113
+ csv_file = csv_dir / "syntax_rules.csv"
114
+ if not csv_file.exists():
115
+ print(" Warning: syntax_rules.csv not found")
116
+ return 0
117
+
118
+ rows_loaded = 0
119
+ with open(csv_file, 'r', encoding='utf-8') as f:
120
+ reader = csv.DictReader(f)
121
+ for row in reader:
122
+ conn.execute("""
123
+ INSERT OR REPLACE INTO syntax_registry
124
+ (adapter_name, quote_start, quote_end, case_sensitivity, reserved_keywords)
125
+ VALUES (?, ?, ?, ?, ?)
126
+ """, [
127
+ row['adapter_name'].strip(),
128
+ row['quote_start'].strip(),
129
+ row['quote_end'].strip(),
130
+ row['case_sensitivity'].strip(),
131
+ row.get('reserved_keywords', '').strip() or None,
132
+ ])
133
+ rows_loaded += 1
134
+
135
+ print(f" Loaded {rows_loaded} syntax rules")
136
+ return rows_loaded
137
+
138
+
139
+ def load_adapter_queries(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
140
+ """Load adapter queries from CSV file."""
141
+ csv_file = csv_dir / "adapter_queries.csv"
142
+ if not csv_file.exists():
143
+ print(" Warning: adapter_queries.csv not found")
144
+ return 0
145
+
146
+ rows_loaded = 0
147
+ with open(csv_file, 'r', encoding='utf-8') as f:
148
+ reader = csv.DictReader(f)
149
+ for row in reader:
150
+ conn.execute("""
151
+ INSERT OR REPLACE INTO adapter_queries
152
+ (adapter_name, query_type, query_template, notes)
153
+ VALUES (?, ?, ?, ?)
154
+ """, [
155
+ row['adapter_name'].strip(),
156
+ row['query_type'].strip(),
157
+ row['query_template'].strip(),
158
+ row.get('notes', '').strip() or None,
159
+ ])
160
+ rows_loaded += 1
161
+
162
+ print(f" Loaded {rows_loaded} adapter queries")
163
+ return rows_loaded
164
+
165
+
166
+ def print_stats(conn: duckdb.DuckDBPyConnection) -> None:
167
+ """Print statistics about the loaded data."""
168
+ print("\n=== Registry Statistics ===")
169
+
170
+ # Type mappings by adapter
171
+ result = conn.execute("""
172
+ SELECT adapter_name, COUNT(*) as count
173
+ FROM datatype_mappings
174
+ GROUP BY adapter_name
175
+ ORDER BY adapter_name
176
+ """).fetchall()
177
+ print("\nType mappings per adapter:")
178
+ for row in result:
179
+ print(f" {row[0]}: {row[1]}")
180
+
181
+ # Syntax rules
182
+ result = conn.execute("SELECT COUNT(*) FROM syntax_registry").fetchone()
183
+ print(f"\nSyntax rules: {result[0]} adapters")
184
+
185
+ # Adapter queries
186
+ result = conn.execute("""
187
+ SELECT adapter_name, COUNT(*) as count
188
+ FROM adapter_queries
189
+ GROUP BY adapter_name
190
+ ORDER BY adapter_name
191
+ """).fetchall()
192
+ print("\nAdapter queries:")
193
+ for row in result:
194
+ print(f" {row[0]}: {row[1]} queries")
195
+
196
+
197
+ def main():
198
+ script_dir = get_script_dir()
199
+ csv_dir = script_dir / "csv"
200
+ db_path = script_dir / "adapters_registry.duckdb"
201
+
202
+ print(f"Building adapters_registry.duckdb")
203
+ print(f"CSV directory: {csv_dir}")
204
+ print(f"Output: {db_path}")
205
+ print()
206
+
207
+ # Remove existing database
208
+ if db_path.exists():
209
+ os.remove(db_path)
210
+ print("Removed existing database")
211
+
212
+ # Create new database
213
+ conn = duckdb.connect(str(db_path))
214
+
215
+ try:
216
+ # Create schema
217
+ print("\nCreating schema...")
218
+ create_schema(conn)
219
+
220
+ # Load data
221
+ print("\nLoading type mappings...")
222
+ type_count = load_type_mappings(conn, csv_dir)
223
+
224
+ print("\nLoading syntax rules...")
225
+ syntax_count = load_syntax_rules(conn, csv_dir)
226
+
227
+ print("\nLoading adapter queries...")
228
+ query_count = load_adapter_queries(conn, csv_dir)
229
+
230
+ # Print stats
231
+ print_stats(conn)
232
+
233
+ print(f"\n=== Build Complete ===")
234
+ print(f"Total: {type_count} type mappings, {syntax_count} syntax rules, {query_count} queries")
235
+ print(f"Database size: {db_path.stat().st_size / 1024:.1f} KB")
236
+
237
+ finally:
238
+ conn.close()
239
+
240
+
241
+ if __name__ == "__main__":
242
+ main()
@@ -0,0 +1,33 @@
1
+ adapter_name,query_type,query_template,notes
2
+ postgres,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
3
+ postgres,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
4
+ postgres,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
5
+ postgres,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
6
+ snowflake,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
7
+ snowflake,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
8
+ snowflake,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
9
+ snowflake,primary_key,"SELECT column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
10
+ mysql,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
11
+ mysql,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
12
+ mysql,row_count,"SELECT COUNT(*) FROM `{schema}`.`{table}`",Count rows in table
13
+ mysql,primary_key,"SELECT column_name FROM information_schema.key_column_usage WHERE table_schema = '{schema}' AND table_name = '{table}' AND constraint_name = 'PRIMARY'",Get primary key columns
14
+ bigquery,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '{table}' ORDER BY ordinal_position",Get column metadata
15
+ bigquery,tables,"SELECT table_name FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.TABLES WHERE table_type = 'BASE TABLE'",List tables in dataset
16
+ bigquery,row_count,"SELECT COUNT(*) FROM `{project}`.`{schema}`.`{table}`",Count rows in table
17
+ bigquery,primary_key,"SELECT column_name FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE table_name = '{table}' AND constraint_name LIKE 'pk_%'",Get primary key columns
18
+ redshift,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
19
+ redshift,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
20
+ redshift,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
21
+ redshift,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
22
+ oracle,columns,"SELECT column_name, data_type, nullable as is_nullable, column_id as ordinal_position FROM all_tab_columns WHERE owner = UPPER('{schema}') AND table_name = UPPER('{table}') ORDER BY column_id",Get column metadata
23
+ oracle,tables,"SELECT table_name FROM all_tables WHERE owner = UPPER('{schema}')",List tables in schema
24
+ oracle,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
25
+ oracle,primary_key,"SELECT cols.column_name FROM all_constraints cons JOIN all_cons_columns cols ON cons.constraint_name = cols.constraint_name WHERE cons.owner = UPPER('{schema}') AND cons.table_name = UPPER('{table}') AND cons.constraint_type = 'P'",Get primary key columns
26
+ sqlserver,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
27
+ sqlserver,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
28
+ sqlserver,row_count,"SELECT COUNT(*) FROM [{schema}].[{table}]",Count rows in table
29
+ sqlserver,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
30
+ databricks,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
31
+ databricks,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type IN ('BASE TABLE', 'MANAGED', 'EXTERNAL')",List tables in schema
32
+ databricks,row_count,"SELECT COUNT(*) FROM `{schema}`.`{table}`",Count rows in table
33
+ databricks,primary_key,"SELECT column_name FROM system.information_schema.table_constraints tc JOIN system.information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
@@ -0,0 +1,9 @@
1
+ adapter_name,quote_start,quote_end,case_sensitivity,reserved_keywords
2
+ postgres,"""","""",lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF"
3
+ snowflake,"""","""",uppercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,MINUS,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,FETCH,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IFF,QUALIFY,PIVOT,UNPIVOT,LATERAL,FLATTEN,VARIANT,OBJECT,ARRAY"
4
+ mysql,`,`,lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,CROSS,UNION,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,DIV,MOD,XOR,REGEXP,RLIKE"
5
+ bigquery,`,`,lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,VIEW,SCHEMA,DATASET,PROJECT,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,STRUCT,ARRAY,UNNEST,PARTITION,CLUSTER,TABLESAMPLE"
6
+ redshift,"""","""",lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,SIMILAR,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,NVL,NVL2,DECODE,DISTKEY,SORTKEY,DISTSTYLE"
7
+ oracle,"""","""",uppercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,USER,GRANT,REVOKE,AND,OR,NOT,NULL,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,MINUS,ORDER,BY,GROUP,HAVING,FETCH,FIRST,NEXT,ROWS,ONLY,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,NVL,NVL2,DECODE,CONNECT,START,WITH,PRIOR,LEVEL,ROWNUM,ROWID,SYSDATE,SYSTIMESTAMP,DUAL"
8
+ sqlserver,[,],case_insensitive,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,TOP,OFFSET,FETCH,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,ISNULL,IIF,CHOOSE,PIVOT,UNPIVOT,APPLY,CROSS,OUTER,GO,USE,EXEC,EXECUTE,PRINT,DECLARE,SET,BEGIN,COMMIT,ROLLBACK"
9
+ databricks,`,`,case_insensitive,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,CATALOG,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,RLIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,SEMI,ANTI,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,NVL,STRUCT,ARRAY,MAP,LATERAL,EXPLODE,POSEXPLODE,INLINE,STACK,TABLESAMPLE,PIVOT,UNPIVOT,DISTRIBUTE,CLUSTER,SORT"
@@ -0,0 +1,28 @@
1
+ adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
2
+ STRING,StringType,all,false,,Variable-length Unicode string
3
+ BYTES,BinaryType,all,false,,Variable-length binary
4
+ INT64,LongType,all,false,,64-bit signed integer
5
+ INTEGER,LongType,all,false,,Alias for INT64
6
+ INT,LongType,all,false,,Alias for INT64
7
+ SMALLINT,LongType,all,false,,Alias for INT64
8
+ BIGINT,LongType,all,false,,Alias for INT64
9
+ TINYINT,LongType,all,false,,Alias for INT64
10
+ BYTEINT,LongType,all,false,,Alias for INT64
11
+ FLOAT64,DoubleType,all,false,,64-bit floating point
12
+ FLOAT,DoubleType,all,false,,Alias for FLOAT64
13
+ NUMERIC,DecimalType,all,false,,Exact numeric (precision 38 scale 9)
14
+ DECIMAL,DecimalType,all,false,,Alias for NUMERIC
15
+ BIGNUMERIC,DecimalType,all,false,,High precision numeric (76.76)
16
+ BIGDECIMAL,DecimalType,all,false,,Alias for BIGNUMERIC
17
+ BOOL,BooleanType,all,false,,Boolean true/false
18
+ BOOLEAN,BooleanType,all,false,,Alias for BOOL
19
+ DATE,DateType,all,false,,Calendar date
20
+ TIME,StringType,all,false,,Time of day
21
+ DATETIME,TimestampType,all,false,,Date and time without timezone
22
+ TIMESTAMP,TimestampType,all,false,,Date and time with timezone
23
+ INTERVAL,StringType,all,false,,Time interval
24
+ GEOGRAPHY,StringType,all,false,,Geographic data (GeoJSON)
25
+ JSON,StringType,all,true,TO_JSON_STRING({}),JSON document
26
+ STRUCT,StringType,all,true,TO_JSON_STRING({}),Structured record
27
+ RECORD,StringType,all,true,TO_JSON_STRING({}),Alias for STRUCT
28
+ ARRAY,ArrayType,all,true,,Array of values
@@ -0,0 +1,30 @@
1
+ adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
2
+ STRING,StringType,all,false,,Variable-length string
3
+ VARCHAR,StringType,all,false,,Variable-length string (alias)
4
+ CHAR,StringType,all,false,,Fixed-length string
5
+ BINARY,BinaryType,all,false,,Binary data
6
+ TINYINT,ByteType,all,false,,8-bit signed integer
7
+ BYTE,ByteType,all,false,,Alias for TINYINT
8
+ SMALLINT,ShortType,all,false,,16-bit signed integer
9
+ SHORT,ShortType,all,false,,Alias for SMALLINT
10
+ INT,IntegerType,all,false,,32-bit signed integer
11
+ INTEGER,IntegerType,all,false,,Alias for INT
12
+ BIGINT,LongType,all,false,,64-bit signed integer
13
+ LONG,LongType,all,false,,Alias for BIGINT
14
+ FLOAT,FloatType,all,false,,32-bit floating point
15
+ REAL,FloatType,all,false,,Alias for FLOAT
16
+ DOUBLE,DoubleType,all,false,,64-bit floating point
17
+ DECIMAL,DecimalType,all,false,,Exact numeric with precision
18
+ DEC,DecimalType,all,false,,Alias for DECIMAL
19
+ NUMERIC,DecimalType,all,false,,Alias for DECIMAL
20
+ BOOLEAN,BooleanType,all,false,,True/false value
21
+ DATE,DateType,all,false,,Calendar date
22
+ TIMESTAMP,TimestampType,all,false,,Timestamp without timezone
23
+ TIMESTAMP_NTZ,TimestampType,all,false,,Timestamp no timezone (explicit)
24
+ TIMESTAMP_LTZ,TimestampType,all,false,,Timestamp with local timezone
25
+ INTERVAL,StringType,all,false,,Time interval
26
+ ARRAY,ArrayType,all,true,,Array of elements
27
+ MAP,MapType,all,true,TO_JSON({}),Key-value pairs
28
+ STRUCT,StringType,all,true,TO_JSON({}),Structured record
29
+ VARIANT,StringType,all,true,TO_JSON({}),Semi-structured (Unity Catalog)
30
+ OBJECT,StringType,all,true,TO_JSON({}),Object type (Unity Catalog)