dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2403 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +50 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
- dbt/compute/jar_provisioning.py +255 -0
- dbt/compute/java_compat.cpython-311-darwin.so +0 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
- dbt/compute/jdbc_utils.py +678 -0
- dbt/compute/metadata/__init__.py +40 -0
- dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
- dbt/compute/metadata/store.py +1499 -0
- dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/local.py +443 -0
- dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.cpython-311-darwin.so +0 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
- dbt/config/dvt_profile.py +342 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +873 -0
- dbt/config/project_utils.py +28 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +553 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +417 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/starter_project/.gitignore +4 -0
- dbt/include/starter_project/README.md +15 -0
- dbt/include/starter_project/__init__.py +3 -0
- dbt/include/starter_project/analyses/.gitkeep +0 -0
- dbt/include/starter_project/dbt_project.yml +36 -0
- dbt/include/starter_project/macros/.gitkeep +0 -0
- dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/starter_project/models/example/schema.yml +21 -0
- dbt/include/starter_project/seeds/.gitkeep +0 -0
- dbt/include/starter_project/snapshots/.gitkeep +0 -0
- dbt/include/starter_project/tests/.gitkeep +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +118 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2204 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.cpython-311-darwin.so +0 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +503 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.cpython-311-darwin.so +0 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +505 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +947 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.cpython-311-darwin.so +0 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +362 -0
- dbt/task/dvt_run.py +204 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.cpython-311-darwin.so +0 -0
- dbt/task/init.py +604 -0
- dbt/task/java.cpython-311-darwin.so +0 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.cpython-311-darwin.so +0 -0
- dbt/task/metadata.py +804 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.cpython-311-darwin.so +0 -0
- dbt/task/profile.py +1307 -0
- dbt/task/profile_serve.py +615 -0
- dbt/task/retract.py +438 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1387 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.cpython-311-darwin.so +0 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.cpython-311-darwin.so +0 -0
- dbt/task/target_sync.py +766 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +270 -0
- dvt_cli/__init__.py +72 -0
- dvt_core-0.58.6.dist-info/METADATA +288 -0
- dvt_core-0.58.6.dist-info/RECORD +324 -0
- dvt_core-0.58.6.dist-info/WHEEL +5 -0
- dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
- dvt_core-0.58.6.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from abc import ABCMeta, abstractmethod
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union
|
|
6
|
+
|
|
7
|
+
from dbt.exceptions import InvalidSelectorError
|
|
8
|
+
from dbt.flags import get_flags
|
|
9
|
+
from dbt_common.dataclass_schema import StrEnum, dbtClassMixin
|
|
10
|
+
from dbt_common.exceptions import DbtRuntimeError
|
|
11
|
+
|
|
12
|
+
from .graph import UniqueId
|
|
13
|
+
from .selector_methods import MethodName
|
|
14
|
+
|
|
15
|
+
RAW_SELECTOR_PATTERN = re.compile(
|
|
16
|
+
r"\A"
|
|
17
|
+
r"(?P<childrens_parents>(\@))?"
|
|
18
|
+
r"(?P<parents>((?P<parents_depth>(\d*))\+))?"
|
|
19
|
+
r"((?P<method>([\w.]+)):)?(?P<value>(.*?))"
|
|
20
|
+
r"(?P<children>(\+(?P<children_depth>(\d*))))?"
|
|
21
|
+
r"\Z"
|
|
22
|
+
)
|
|
23
|
+
SELECTOR_METHOD_SEPARATOR = "."
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class IndirectSelection(StrEnum):
|
|
27
|
+
Eager = "eager"
|
|
28
|
+
Cautious = "cautious"
|
|
29
|
+
Buildable = "buildable"
|
|
30
|
+
Empty = "empty"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _probably_path(value: str):
|
|
34
|
+
"""Decide if the value is probably a path. Windows has two path separators, so
|
|
35
|
+
we should check both sep ('\\') and altsep ('/') there.
|
|
36
|
+
"""
|
|
37
|
+
if os.path.sep in value:
|
|
38
|
+
return True
|
|
39
|
+
elif os.path.altsep is not None and os.path.altsep in value:
|
|
40
|
+
return True
|
|
41
|
+
else:
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _match_to_int(match: Dict[str, str], key: str) -> Optional[int]:
|
|
46
|
+
raw = match.get(key)
|
|
47
|
+
# turn the empty string into None, too.
|
|
48
|
+
if not raw:
|
|
49
|
+
return None
|
|
50
|
+
try:
|
|
51
|
+
return int(raw)
|
|
52
|
+
except ValueError as exc:
|
|
53
|
+
raise DbtRuntimeError(f"Invalid node spec - could not handle parent depth {raw}") from exc
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
SelectionSpec = Union[
|
|
57
|
+
"SelectionCriteria",
|
|
58
|
+
"SelectionIntersection",
|
|
59
|
+
"SelectionDifference",
|
|
60
|
+
"SelectionUnion",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class SelectionCriteria:
|
|
66
|
+
raw: Any
|
|
67
|
+
method: MethodName
|
|
68
|
+
method_arguments: List[str]
|
|
69
|
+
value: Any
|
|
70
|
+
childrens_parents: bool
|
|
71
|
+
parents: bool
|
|
72
|
+
parents_depth: Optional[int]
|
|
73
|
+
children: bool
|
|
74
|
+
children_depth: Optional[int]
|
|
75
|
+
indirect_selection: IndirectSelection = IndirectSelection.Eager
|
|
76
|
+
|
|
77
|
+
def __post_init__(self):
|
|
78
|
+
if self.children and self.childrens_parents:
|
|
79
|
+
raise DbtRuntimeError(
|
|
80
|
+
f'Invalid node spec {self.raw} - "@" prefix and "+" suffix ' "are incompatible"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def default_method(cls, value: str) -> MethodName:
|
|
85
|
+
if _probably_path(value):
|
|
86
|
+
return MethodName.Path
|
|
87
|
+
elif value.lower().endswith((".sql", ".py", ".csv")):
|
|
88
|
+
return MethodName.File
|
|
89
|
+
else:
|
|
90
|
+
return MethodName.FQN
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def parse_method(cls, groupdict: Dict[str, Any]) -> Tuple[MethodName, List[str]]:
|
|
94
|
+
raw_method = groupdict.get("method")
|
|
95
|
+
if raw_method is None:
|
|
96
|
+
return cls.default_method(groupdict["value"]), []
|
|
97
|
+
|
|
98
|
+
method_parts: List[str] = raw_method.split(SELECTOR_METHOD_SEPARATOR)
|
|
99
|
+
try:
|
|
100
|
+
method_name = MethodName(method_parts[0])
|
|
101
|
+
except ValueError as exc:
|
|
102
|
+
raise InvalidSelectorError(f"'{method_parts[0]}' is not a valid method name") from exc
|
|
103
|
+
|
|
104
|
+
# Following is for cases like config.severity and config.materialized
|
|
105
|
+
method_arguments: List[str] = method_parts[1:]
|
|
106
|
+
|
|
107
|
+
return method_name, method_arguments
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def selection_criteria_from_dict(
|
|
111
|
+
cls,
|
|
112
|
+
raw: Any,
|
|
113
|
+
dct: Dict[str, Any],
|
|
114
|
+
) -> "SelectionCriteria":
|
|
115
|
+
if "value" not in dct:
|
|
116
|
+
raise DbtRuntimeError(f'Invalid node spec "{raw}" - no search value!')
|
|
117
|
+
method_name, method_arguments = cls.parse_method(dct)
|
|
118
|
+
|
|
119
|
+
parents_depth = _match_to_int(dct, "parents_depth")
|
|
120
|
+
children_depth = _match_to_int(dct, "children_depth")
|
|
121
|
+
|
|
122
|
+
# If defined field in selector, override CLI flag
|
|
123
|
+
indirect_selection = IndirectSelection(
|
|
124
|
+
dct.get("indirect_selection", getattr(get_flags(), "INDIRECT_SELECTION", "eager"))
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return cls(
|
|
128
|
+
raw=raw,
|
|
129
|
+
method=method_name,
|
|
130
|
+
method_arguments=method_arguments,
|
|
131
|
+
value=dct["value"],
|
|
132
|
+
childrens_parents=bool(dct.get("childrens_parents")),
|
|
133
|
+
parents=bool(dct.get("parents")),
|
|
134
|
+
parents_depth=parents_depth,
|
|
135
|
+
children=bool(dct.get("children")),
|
|
136
|
+
children_depth=children_depth,
|
|
137
|
+
indirect_selection=indirect_selection,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def dict_from_single_spec(cls, raw: str):
|
|
142
|
+
result = RAW_SELECTOR_PATTERN.match(raw)
|
|
143
|
+
if result is None:
|
|
144
|
+
return {"error": "Invalid selector spec"}
|
|
145
|
+
dct: Dict[str, Any] = result.groupdict()
|
|
146
|
+
method_name, method_arguments = cls.parse_method(dct)
|
|
147
|
+
meth_name = str(method_name)
|
|
148
|
+
if method_arguments:
|
|
149
|
+
meth_name += "." + ".".join(method_arguments)
|
|
150
|
+
dct["method"] = meth_name
|
|
151
|
+
dct = {k: v for k, v in dct.items() if (v is not None and v != "")}
|
|
152
|
+
if "childrens_parents" in dct:
|
|
153
|
+
dct["childrens_parents"] = bool(dct.get("childrens_parents"))
|
|
154
|
+
if "parents" in dct:
|
|
155
|
+
dct["parents"] = bool(dct.get("parents"))
|
|
156
|
+
if "children" in dct:
|
|
157
|
+
dct["children"] = bool(dct.get("children"))
|
|
158
|
+
return dct
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def from_single_spec(cls, raw: str) -> "SelectionCriteria":
|
|
162
|
+
result = RAW_SELECTOR_PATTERN.match(raw)
|
|
163
|
+
if result is None:
|
|
164
|
+
# bad spec!
|
|
165
|
+
raise DbtRuntimeError(f'Invalid selector spec "{raw}"')
|
|
166
|
+
|
|
167
|
+
return cls.selection_criteria_from_dict(raw, result.groupdict())
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class BaseSelectionGroup(dbtClassMixin, Iterable[SelectionSpec], metaclass=ABCMeta):
|
|
171
|
+
def __init__(
|
|
172
|
+
self,
|
|
173
|
+
components: Iterable[SelectionSpec],
|
|
174
|
+
indirect_selection: IndirectSelection = IndirectSelection.Eager,
|
|
175
|
+
expect_exists: bool = False,
|
|
176
|
+
raw: Any = None,
|
|
177
|
+
) -> None:
|
|
178
|
+
self.components: List[SelectionSpec] = list(components)
|
|
179
|
+
self.expect_exists = expect_exists
|
|
180
|
+
self.raw = raw
|
|
181
|
+
self.indirect_selection = indirect_selection
|
|
182
|
+
|
|
183
|
+
def __iter__(self) -> Iterator[SelectionSpec]:
|
|
184
|
+
for component in self.components:
|
|
185
|
+
yield component
|
|
186
|
+
|
|
187
|
+
@abstractmethod
|
|
188
|
+
def combine_selections(
|
|
189
|
+
self,
|
|
190
|
+
selections: List[Set[UniqueId]],
|
|
191
|
+
) -> Set[UniqueId]:
|
|
192
|
+
raise NotImplementedError("_combine_selections not implemented!")
|
|
193
|
+
|
|
194
|
+
def combined(self, selections: List[Set[UniqueId]]) -> Set[UniqueId]:
|
|
195
|
+
if not selections:
|
|
196
|
+
return set()
|
|
197
|
+
|
|
198
|
+
return self.combine_selections(selections)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class SelectionIntersection(BaseSelectionGroup):
|
|
202
|
+
def combine_selections(
|
|
203
|
+
self,
|
|
204
|
+
selections: List[Set[UniqueId]],
|
|
205
|
+
) -> Set[UniqueId]:
|
|
206
|
+
return set.intersection(*selections)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class SelectionDifference(BaseSelectionGroup):
|
|
210
|
+
def combine_selections(
|
|
211
|
+
self,
|
|
212
|
+
selections: List[Set[UniqueId]],
|
|
213
|
+
) -> Set[UniqueId]:
|
|
214
|
+
return set.difference(*selections)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
class SelectionUnion(BaseSelectionGroup):
|
|
218
|
+
def combine_selections(
|
|
219
|
+
self,
|
|
220
|
+
selections: List[Set[UniqueId]],
|
|
221
|
+
) -> Set[UniqueId]:
|
|
222
|
+
return set.union(*selections)
|
dbt/graph/thread_pool.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from multiprocessing.pool import ThreadPool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DbtThreadPool(ThreadPool):
|
|
7
|
+
"""A ThreadPool that tracks whether or not it's been closed"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, *args, **kwargs):
|
|
10
|
+
super().__init__(*args, **kwargs)
|
|
11
|
+
self.closed = False
|
|
12
|
+
|
|
13
|
+
def close(self):
|
|
14
|
+
self.closed = True
|
|
15
|
+
super().close()
|
|
16
|
+
|
|
17
|
+
def is_closed(self):
|
|
18
|
+
return self.closed
|
dbt/hooks.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict, Union
|
|
3
|
+
|
|
4
|
+
from dbt_common.dataclass_schema import StrEnum
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ModelHookType(StrEnum):
|
|
8
|
+
PreHook = "pre-hook"
|
|
9
|
+
PostHook = "post-hook"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_hook_dict(source: Union[str, Dict[str, Any]]) -> Dict[str, Any]:
|
|
13
|
+
"""From a source string-or-dict, get a dictionary that can be passed to
|
|
14
|
+
Hook.from_dict
|
|
15
|
+
"""
|
|
16
|
+
if isinstance(source, dict):
|
|
17
|
+
return source
|
|
18
|
+
try:
|
|
19
|
+
return json.loads(source)
|
|
20
|
+
except ValueError:
|
|
21
|
+
return {"sql": source}
|
dbt/include/README.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Include Module
|
|
2
|
+
|
|
3
|
+
The Include module is responsible for the starter project scaffold.
|
|
4
|
+
|
|
5
|
+
# Directories
|
|
6
|
+
|
|
7
|
+
## `starter_project`
|
|
8
|
+
Produces the default project after running the `dbt init` command for the CLI. `dbt-cloud` initializes the project by using [dbt-starter-project](https://github.com/dbt-labs/dbt-starter-project).
|
|
9
|
+
|
|
10
|
+
# adapter.dispatch
|
|
11
|
+
Packages (e.g. `include` directories of adapters, any [hub](https://hub.getdbt.com/)-hosted package) can be interpreted as namespaces of functions a.k.a macros. In `dbt`'s macrospace, we take advantage of the multiple dispatch programming language concept. In short, multiple dispatch supports dynamic searching for a function across several namespaces—usually in a manually specified manner/order.
|
|
12
|
+
|
|
13
|
+
Adapters can have their own implementation of the same macro X. For example, a macro executed by `dbt-redshift` may need a specific implementation different from `dbt-snowflake`'s macro. We use multiple dispatch via `adapter.dispatch`, a Jinja function, which enables polymorphic macro invocations. The chosen implementation is selected according to what the `adapter` object is set to at runtime (it could be for redshift, postgres, and so on).
|
|
14
|
+
|
|
15
|
+
For more on this object, check out the dbt docs [here](https://docs.getdbt.com/reference/dbt-jinja-functions/adapter).
|
|
16
|
+
|
|
17
|
+
# dbt and database adapter python package interop
|
|
18
|
+
|
|
19
|
+
Let’s say we have a fictional python app named `dbt-core` with this structure
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
dbt
|
|
23
|
+
├── adapters
|
|
24
|
+
│ └── base.py
|
|
25
|
+
├── cli.py
|
|
26
|
+
└── main.py
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
`pip install dbt-core` will install this application in my python environment, maintaining the same structure. Note that `dbt.adapters` only contains a `base.py`. In this example, we can assume that base.py includes an abstract class for creating connections. Let’s say we wanted to create an postgres adapter that this app could use, and can be installed independently. We can create a python package with the following structure called `dbt-postgres`
|
|
30
|
+
```
|
|
31
|
+
dbt
|
|
32
|
+
└── adapters
|
|
33
|
+
└── postgres
|
|
34
|
+
└── impl.py
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
`pip install dbt-postgres` will install this package in the python environment, maintaining the same structure again. Let’s say `impl.py` imports `dbt.adapters.base` and implements a concrete class inheriting from the abstract class in `base.py` from the `dbt-core` package. Since our top level package is named the same in both packages, `pip` will put this in the same place. We end up with this installed in our python environment.
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
dbt
|
|
41
|
+
├── adapters
|
|
42
|
+
│ ├── base.py
|
|
43
|
+
│ └── postgres
|
|
44
|
+
│ └── impl.py
|
|
45
|
+
├── cli.py
|
|
46
|
+
└── main.py
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
`dbt.adapters` now has a postgres module that dbt can easily find and call directly. dbt and its adapters follows the same type of file structure convention. This is the magic that allows you to import `dbt.*` in database adapters, and using a factory pattern in dbt-core, we can create instances of concrete classes defined in the database adapter packages (for creating connections, defining database configuration, defining credentials, etc.)
|
dbt/include/__init__.py
ADDED
|
Binary file
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Build Script for DVT Adapters Registry
|
|
4
|
+
|
|
5
|
+
This script reads CSV files from the csv/ directory and creates
|
|
6
|
+
adapters_registry.duckdb with pre-populated type mappings, syntax rules,
|
|
7
|
+
and adapter queries.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python build_registry.py
|
|
11
|
+
|
|
12
|
+
The resulting adapters_registry.duckdb is shipped with the DVT package.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import csv
|
|
16
|
+
import os
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
import duckdb
|
|
21
|
+
except ImportError:
|
|
22
|
+
print("Error: duckdb is required. Install with: pip install duckdb")
|
|
23
|
+
exit(1)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_script_dir() -> Path:
|
|
27
|
+
"""Get directory containing this script."""
|
|
28
|
+
return Path(__file__).parent
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def create_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
|
32
|
+
"""Create the database schema."""
|
|
33
|
+
|
|
34
|
+
# Table: datatype_mappings
|
|
35
|
+
conn.execute("""
|
|
36
|
+
CREATE TABLE IF NOT EXISTS datatype_mappings (
|
|
37
|
+
adapter_name VARCHAR NOT NULL,
|
|
38
|
+
adapter_type VARCHAR NOT NULL,
|
|
39
|
+
spark_type VARCHAR NOT NULL,
|
|
40
|
+
spark_version VARCHAR DEFAULT 'all',
|
|
41
|
+
is_complex BOOLEAN DEFAULT FALSE,
|
|
42
|
+
cast_expression VARCHAR,
|
|
43
|
+
notes VARCHAR,
|
|
44
|
+
UNIQUE (adapter_name, adapter_type, spark_version)
|
|
45
|
+
)
|
|
46
|
+
""")
|
|
47
|
+
conn.execute("""
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_datatype_lookup
|
|
49
|
+
ON datatype_mappings(adapter_name, adapter_type)
|
|
50
|
+
""")
|
|
51
|
+
|
|
52
|
+
# Table: syntax_registry
|
|
53
|
+
conn.execute("""
|
|
54
|
+
CREATE TABLE IF NOT EXISTS syntax_registry (
|
|
55
|
+
adapter_name VARCHAR NOT NULL PRIMARY KEY,
|
|
56
|
+
quote_start VARCHAR NOT NULL,
|
|
57
|
+
quote_end VARCHAR NOT NULL,
|
|
58
|
+
case_sensitivity VARCHAR NOT NULL,
|
|
59
|
+
reserved_keywords VARCHAR
|
|
60
|
+
)
|
|
61
|
+
""")
|
|
62
|
+
|
|
63
|
+
# Table: adapter_queries
|
|
64
|
+
conn.execute("""
|
|
65
|
+
CREATE TABLE IF NOT EXISTS adapter_queries (
|
|
66
|
+
adapter_name VARCHAR NOT NULL,
|
|
67
|
+
query_type VARCHAR NOT NULL,
|
|
68
|
+
query_template VARCHAR NOT NULL,
|
|
69
|
+
notes VARCHAR,
|
|
70
|
+
PRIMARY KEY (adapter_name, query_type)
|
|
71
|
+
)
|
|
72
|
+
""")
|
|
73
|
+
|
|
74
|
+
print("Schema created successfully")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def load_type_mappings(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
|
|
78
|
+
"""Load type mappings from CSV files."""
|
|
79
|
+
total_rows = 0
|
|
80
|
+
|
|
81
|
+
# Find all type_mappings_*.csv files
|
|
82
|
+
for csv_file in sorted(csv_dir.glob("type_mappings_*.csv")):
|
|
83
|
+
adapter_name = csv_file.stem.replace("type_mappings_", "")
|
|
84
|
+
rows_loaded = 0
|
|
85
|
+
|
|
86
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
87
|
+
reader = csv.DictReader(f)
|
|
88
|
+
for row in reader:
|
|
89
|
+
conn.execute("""
|
|
90
|
+
INSERT OR REPLACE INTO datatype_mappings
|
|
91
|
+
(adapter_name, adapter_type, spark_type, spark_version,
|
|
92
|
+
is_complex, cast_expression, notes)
|
|
93
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
94
|
+
""", [
|
|
95
|
+
adapter_name,
|
|
96
|
+
row['adapter_type'].strip(),
|
|
97
|
+
row['spark_type'].strip(),
|
|
98
|
+
row.get('spark_version', 'all').strip() or 'all',
|
|
99
|
+
row.get('is_complex', 'false').strip().lower() == 'true',
|
|
100
|
+
row.get('cast_expression', '').strip() or None,
|
|
101
|
+
row.get('notes', '').strip() or None,
|
|
102
|
+
])
|
|
103
|
+
rows_loaded += 1
|
|
104
|
+
|
|
105
|
+
print(f" Loaded {rows_loaded} type mappings for {adapter_name}")
|
|
106
|
+
total_rows += rows_loaded
|
|
107
|
+
|
|
108
|
+
return total_rows
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def load_syntax_rules(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
|
|
112
|
+
"""Load syntax rules from CSV file."""
|
|
113
|
+
csv_file = csv_dir / "syntax_rules.csv"
|
|
114
|
+
if not csv_file.exists():
|
|
115
|
+
print(" Warning: syntax_rules.csv not found")
|
|
116
|
+
return 0
|
|
117
|
+
|
|
118
|
+
rows_loaded = 0
|
|
119
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
120
|
+
reader = csv.DictReader(f)
|
|
121
|
+
for row in reader:
|
|
122
|
+
conn.execute("""
|
|
123
|
+
INSERT OR REPLACE INTO syntax_registry
|
|
124
|
+
(adapter_name, quote_start, quote_end, case_sensitivity, reserved_keywords)
|
|
125
|
+
VALUES (?, ?, ?, ?, ?)
|
|
126
|
+
""", [
|
|
127
|
+
row['adapter_name'].strip(),
|
|
128
|
+
row['quote_start'].strip(),
|
|
129
|
+
row['quote_end'].strip(),
|
|
130
|
+
row['case_sensitivity'].strip(),
|
|
131
|
+
row.get('reserved_keywords', '').strip() or None,
|
|
132
|
+
])
|
|
133
|
+
rows_loaded += 1
|
|
134
|
+
|
|
135
|
+
print(f" Loaded {rows_loaded} syntax rules")
|
|
136
|
+
return rows_loaded
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def load_adapter_queries(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
|
|
140
|
+
"""Load adapter queries from CSV file."""
|
|
141
|
+
csv_file = csv_dir / "adapter_queries.csv"
|
|
142
|
+
if not csv_file.exists():
|
|
143
|
+
print(" Warning: adapter_queries.csv not found")
|
|
144
|
+
return 0
|
|
145
|
+
|
|
146
|
+
rows_loaded = 0
|
|
147
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
148
|
+
reader = csv.DictReader(f)
|
|
149
|
+
for row in reader:
|
|
150
|
+
conn.execute("""
|
|
151
|
+
INSERT OR REPLACE INTO adapter_queries
|
|
152
|
+
(adapter_name, query_type, query_template, notes)
|
|
153
|
+
VALUES (?, ?, ?, ?)
|
|
154
|
+
""", [
|
|
155
|
+
row['adapter_name'].strip(),
|
|
156
|
+
row['query_type'].strip(),
|
|
157
|
+
row['query_template'].strip(),
|
|
158
|
+
row.get('notes', '').strip() or None,
|
|
159
|
+
])
|
|
160
|
+
rows_loaded += 1
|
|
161
|
+
|
|
162
|
+
print(f" Loaded {rows_loaded} adapter queries")
|
|
163
|
+
return rows_loaded
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def print_stats(conn: duckdb.DuckDBPyConnection) -> None:
|
|
167
|
+
"""Print statistics about the loaded data."""
|
|
168
|
+
print("\n=== Registry Statistics ===")
|
|
169
|
+
|
|
170
|
+
# Type mappings by adapter
|
|
171
|
+
result = conn.execute("""
|
|
172
|
+
SELECT adapter_name, COUNT(*) as count
|
|
173
|
+
FROM datatype_mappings
|
|
174
|
+
GROUP BY adapter_name
|
|
175
|
+
ORDER BY adapter_name
|
|
176
|
+
""").fetchall()
|
|
177
|
+
print("\nType mappings per adapter:")
|
|
178
|
+
for row in result:
|
|
179
|
+
print(f" {row[0]}: {row[1]}")
|
|
180
|
+
|
|
181
|
+
# Syntax rules
|
|
182
|
+
result = conn.execute("SELECT COUNT(*) FROM syntax_registry").fetchone()
|
|
183
|
+
print(f"\nSyntax rules: {result[0]} adapters")
|
|
184
|
+
|
|
185
|
+
# Adapter queries
|
|
186
|
+
result = conn.execute("""
|
|
187
|
+
SELECT adapter_name, COUNT(*) as count
|
|
188
|
+
FROM adapter_queries
|
|
189
|
+
GROUP BY adapter_name
|
|
190
|
+
ORDER BY adapter_name
|
|
191
|
+
""").fetchall()
|
|
192
|
+
print("\nAdapter queries:")
|
|
193
|
+
for row in result:
|
|
194
|
+
print(f" {row[0]}: {row[1]} queries")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def main():
|
|
198
|
+
script_dir = get_script_dir()
|
|
199
|
+
csv_dir = script_dir / "csv"
|
|
200
|
+
db_path = script_dir / "adapters_registry.duckdb"
|
|
201
|
+
|
|
202
|
+
print(f"Building adapters_registry.duckdb")
|
|
203
|
+
print(f"CSV directory: {csv_dir}")
|
|
204
|
+
print(f"Output: {db_path}")
|
|
205
|
+
print()
|
|
206
|
+
|
|
207
|
+
# Remove existing database
|
|
208
|
+
if db_path.exists():
|
|
209
|
+
os.remove(db_path)
|
|
210
|
+
print("Removed existing database")
|
|
211
|
+
|
|
212
|
+
# Create new database
|
|
213
|
+
conn = duckdb.connect(str(db_path))
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
# Create schema
|
|
217
|
+
print("\nCreating schema...")
|
|
218
|
+
create_schema(conn)
|
|
219
|
+
|
|
220
|
+
# Load data
|
|
221
|
+
print("\nLoading type mappings...")
|
|
222
|
+
type_count = load_type_mappings(conn, csv_dir)
|
|
223
|
+
|
|
224
|
+
print("\nLoading syntax rules...")
|
|
225
|
+
syntax_count = load_syntax_rules(conn, csv_dir)
|
|
226
|
+
|
|
227
|
+
print("\nLoading adapter queries...")
|
|
228
|
+
query_count = load_adapter_queries(conn, csv_dir)
|
|
229
|
+
|
|
230
|
+
# Print stats
|
|
231
|
+
print_stats(conn)
|
|
232
|
+
|
|
233
|
+
print(f"\n=== Build Complete ===")
|
|
234
|
+
print(f"Total: {type_count} type mappings, {syntax_count} syntax rules, {query_count} queries")
|
|
235
|
+
print(f"Database size: {db_path.stat().st_size / 1024:.1f} KB")
|
|
236
|
+
|
|
237
|
+
finally:
|
|
238
|
+
conn.close()
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
if __name__ == "__main__":
|
|
242
|
+
main()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
adapter_name,query_type,query_template,notes
|
|
2
|
+
postgres,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
3
|
+
postgres,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
4
|
+
postgres,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
5
|
+
postgres,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
6
|
+
snowflake,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
7
|
+
snowflake,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
8
|
+
snowflake,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
9
|
+
snowflake,primary_key,"SELECT column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
10
|
+
mysql,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
11
|
+
mysql,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
12
|
+
mysql,row_count,"SELECT COUNT(*) FROM `{schema}`.`{table}`",Count rows in table
|
|
13
|
+
mysql,primary_key,"SELECT column_name FROM information_schema.key_column_usage WHERE table_schema = '{schema}' AND table_name = '{table}' AND constraint_name = 'PRIMARY'",Get primary key columns
|
|
14
|
+
bigquery,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
15
|
+
bigquery,tables,"SELECT table_name FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.TABLES WHERE table_type = 'BASE TABLE'",List tables in dataset
|
|
16
|
+
bigquery,row_count,"SELECT COUNT(*) FROM `{project}`.`{schema}`.`{table}`",Count rows in table
|
|
17
|
+
bigquery,primary_key,"SELECT column_name FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE table_name = '{table}' AND constraint_name LIKE 'pk_%'",Get primary key columns
|
|
18
|
+
redshift,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
19
|
+
redshift,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
20
|
+
redshift,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
21
|
+
redshift,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
22
|
+
oracle,columns,"SELECT column_name, data_type, nullable as is_nullable, column_id as ordinal_position FROM all_tab_columns WHERE owner = UPPER('{schema}') AND table_name = UPPER('{table}') ORDER BY column_id",Get column metadata
|
|
23
|
+
oracle,tables,"SELECT table_name FROM all_tables WHERE owner = UPPER('{schema}')",List tables in schema
|
|
24
|
+
oracle,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
25
|
+
oracle,primary_key,"SELECT cols.column_name FROM all_constraints cons JOIN all_cons_columns cols ON cons.constraint_name = cols.constraint_name WHERE cons.owner = UPPER('{schema}') AND cons.table_name = UPPER('{table}') AND cons.constraint_type = 'P'",Get primary key columns
|
|
26
|
+
sqlserver,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
27
|
+
sqlserver,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
28
|
+
sqlserver,row_count,"SELECT COUNT(*) FROM [{schema}].[{table}]",Count rows in table
|
|
29
|
+
sqlserver,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
30
|
+
databricks,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
31
|
+
databricks,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type IN ('BASE TABLE', 'MANAGED', 'EXTERNAL')",List tables in schema
|
|
32
|
+
databricks,row_count,"SELECT COUNT(*) FROM `{schema}`.`{table}`",Count rows in table
|
|
33
|
+
databricks,primary_key,"SELECT column_name FROM system.information_schema.table_constraints tc JOIN system.information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
adapter_name,quote_start,quote_end,case_sensitivity,reserved_keywords
|
|
2
|
+
postgres,"""","""",lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF"
|
|
3
|
+
snowflake,"""","""",uppercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,MINUS,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,FETCH,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IFF,QUALIFY,PIVOT,UNPIVOT,LATERAL,FLATTEN,VARIANT,OBJECT,ARRAY"
|
|
4
|
+
mysql,`,`,lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,CROSS,UNION,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,DIV,MOD,XOR,REGEXP,RLIKE"
|
|
5
|
+
bigquery,`,`,lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,VIEW,SCHEMA,DATASET,PROJECT,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,STRUCT,ARRAY,UNNEST,PARTITION,CLUSTER,TABLESAMPLE"
|
|
6
|
+
redshift,"""","""",lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,SIMILAR,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,NVL,NVL2,DECODE,DISTKEY,SORTKEY,DISTSTYLE"
|
|
7
|
+
oracle,"""","""",uppercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,USER,GRANT,REVOKE,AND,OR,NOT,NULL,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,MINUS,ORDER,BY,GROUP,HAVING,FETCH,FIRST,NEXT,ROWS,ONLY,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,NVL,NVL2,DECODE,CONNECT,START,WITH,PRIOR,LEVEL,ROWNUM,ROWID,SYSDATE,SYSTIMESTAMP,DUAL"
|
|
8
|
+
sqlserver,[,],case_insensitive,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,TOP,OFFSET,FETCH,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,ISNULL,IIF,CHOOSE,PIVOT,UNPIVOT,APPLY,CROSS,OUTER,GO,USE,EXEC,EXECUTE,PRINT,DECLARE,SET,BEGIN,COMMIT,ROLLBACK"
|
|
9
|
+
databricks,`,`,case_insensitive,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,CATALOG,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,RLIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,SEMI,ANTI,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,NVL,STRUCT,ARRAY,MAP,LATERAL,EXPLODE,POSEXPLODE,INLINE,STACK,TABLESAMPLE,PIVOT,UNPIVOT,DISTRIBUTE,CLUSTER,SORT"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
STRING,StringType,all,false,,Variable-length Unicode string
|
|
3
|
+
BYTES,BinaryType,all,false,,Variable-length binary
|
|
4
|
+
INT64,LongType,all,false,,64-bit signed integer
|
|
5
|
+
INTEGER,LongType,all,false,,Alias for INT64
|
|
6
|
+
INT,LongType,all,false,,Alias for INT64
|
|
7
|
+
SMALLINT,LongType,all,false,,Alias for INT64
|
|
8
|
+
BIGINT,LongType,all,false,,Alias for INT64
|
|
9
|
+
TINYINT,LongType,all,false,,Alias for INT64
|
|
10
|
+
BYTEINT,LongType,all,false,,Alias for INT64
|
|
11
|
+
FLOAT64,DoubleType,all,false,,64-bit floating point
|
|
12
|
+
FLOAT,DoubleType,all,false,,Alias for FLOAT64
|
|
13
|
+
NUMERIC,DecimalType,all,false,,Exact numeric (precision 38 scale 9)
|
|
14
|
+
DECIMAL,DecimalType,all,false,,Alias for NUMERIC
|
|
15
|
+
BIGNUMERIC,DecimalType,all,false,,High precision numeric (76.76)
|
|
16
|
+
BIGDECIMAL,DecimalType,all,false,,Alias for BIGNUMERIC
|
|
17
|
+
BOOL,BooleanType,all,false,,Boolean true/false
|
|
18
|
+
BOOLEAN,BooleanType,all,false,,Alias for BOOL
|
|
19
|
+
DATE,DateType,all,false,,Calendar date
|
|
20
|
+
TIME,StringType,all,false,,Time of day
|
|
21
|
+
DATETIME,TimestampType,all,false,,Date and time without timezone
|
|
22
|
+
TIMESTAMP,TimestampType,all,false,,Date and time with timezone
|
|
23
|
+
INTERVAL,StringType,all,false,,Time interval
|
|
24
|
+
GEOGRAPHY,StringType,all,false,,Geographic data (GeoJSON)
|
|
25
|
+
JSON,StringType,all,true,TO_JSON_STRING({}),JSON document
|
|
26
|
+
STRUCT,StringType,all,true,TO_JSON_STRING({}),Structured record
|
|
27
|
+
RECORD,StringType,all,true,TO_JSON_STRING({}),Alias for STRUCT
|
|
28
|
+
ARRAY,ArrayType,all,true,,Array of values
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
STRING,StringType,all,false,,Variable-length string
|
|
3
|
+
VARCHAR,StringType,all,false,,Variable-length string (alias)
|
|
4
|
+
CHAR,StringType,all,false,,Fixed-length string
|
|
5
|
+
BINARY,BinaryType,all,false,,Binary data
|
|
6
|
+
TINYINT,ByteType,all,false,,8-bit signed integer
|
|
7
|
+
BYTE,ByteType,all,false,,Alias for TINYINT
|
|
8
|
+
SMALLINT,ShortType,all,false,,16-bit signed integer
|
|
9
|
+
SHORT,ShortType,all,false,,Alias for SMALLINT
|
|
10
|
+
INT,IntegerType,all,false,,32-bit signed integer
|
|
11
|
+
INTEGER,IntegerType,all,false,,Alias for INT
|
|
12
|
+
BIGINT,LongType,all,false,,64-bit signed integer
|
|
13
|
+
LONG,LongType,all,false,,Alias for BIGINT
|
|
14
|
+
FLOAT,FloatType,all,false,,32-bit floating point
|
|
15
|
+
REAL,FloatType,all,false,,Alias for FLOAT
|
|
16
|
+
DOUBLE,DoubleType,all,false,,64-bit floating point
|
|
17
|
+
DECIMAL,DecimalType,all,false,,Exact numeric with precision
|
|
18
|
+
DEC,DecimalType,all,false,,Alias for DECIMAL
|
|
19
|
+
NUMERIC,DecimalType,all,false,,Alias for DECIMAL
|
|
20
|
+
BOOLEAN,BooleanType,all,false,,True/false value
|
|
21
|
+
DATE,DateType,all,false,,Calendar date
|
|
22
|
+
TIMESTAMP,TimestampType,all,false,,Timestamp without timezone
|
|
23
|
+
TIMESTAMP_NTZ,TimestampType,all,false,,Timestamp no timezone (explicit)
|
|
24
|
+
TIMESTAMP_LTZ,TimestampType,all,false,,Timestamp with local timezone
|
|
25
|
+
INTERVAL,StringType,all,false,,Time interval
|
|
26
|
+
ARRAY,ArrayType,all,true,,Array of elements
|
|
27
|
+
MAP,MapType,all,true,TO_JSON({}),Key-value pairs
|
|
28
|
+
STRUCT,StringType,all,true,TO_JSON({}),Structured record
|
|
29
|
+
VARIANT,StringType,all,true,TO_JSON({}),Semi-structured (Unity Catalog)
|
|
30
|
+
OBJECT,StringType,all,true,TO_JSON({}),Object type (Unity Catalog)
|