dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Build Script for DVT Adapters Registry
|
|
4
|
+
|
|
5
|
+
This script reads CSV files from the csv/ directory and creates
|
|
6
|
+
adapters_registry.duckdb with pre-populated type mappings, syntax rules,
|
|
7
|
+
and adapter queries.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python build_registry.py
|
|
11
|
+
|
|
12
|
+
The resulting adapters_registry.duckdb is shipped with the DVT package.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import csv
|
|
16
|
+
import os
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
import duckdb
|
|
21
|
+
except ImportError:
|
|
22
|
+
print("Error: duckdb is required. Install with: pip install duckdb")
|
|
23
|
+
exit(1)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_script_dir() -> Path:
|
|
27
|
+
"""Get directory containing this script."""
|
|
28
|
+
return Path(__file__).parent
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def create_schema(conn: duckdb.DuckDBPyConnection) -> None:
|
|
32
|
+
"""Create the database schema."""
|
|
33
|
+
|
|
34
|
+
# Table: datatype_mappings
|
|
35
|
+
conn.execute("""
|
|
36
|
+
CREATE TABLE IF NOT EXISTS datatype_mappings (
|
|
37
|
+
adapter_name VARCHAR NOT NULL,
|
|
38
|
+
adapter_type VARCHAR NOT NULL,
|
|
39
|
+
spark_type VARCHAR NOT NULL,
|
|
40
|
+
spark_version VARCHAR DEFAULT 'all',
|
|
41
|
+
is_complex BOOLEAN DEFAULT FALSE,
|
|
42
|
+
cast_expression VARCHAR,
|
|
43
|
+
notes VARCHAR,
|
|
44
|
+
UNIQUE (adapter_name, adapter_type, spark_version)
|
|
45
|
+
)
|
|
46
|
+
""")
|
|
47
|
+
conn.execute("""
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_datatype_lookup
|
|
49
|
+
ON datatype_mappings(adapter_name, adapter_type)
|
|
50
|
+
""")
|
|
51
|
+
|
|
52
|
+
# Table: syntax_registry
|
|
53
|
+
conn.execute("""
|
|
54
|
+
CREATE TABLE IF NOT EXISTS syntax_registry (
|
|
55
|
+
adapter_name VARCHAR NOT NULL PRIMARY KEY,
|
|
56
|
+
quote_start VARCHAR NOT NULL,
|
|
57
|
+
quote_end VARCHAR NOT NULL,
|
|
58
|
+
case_sensitivity VARCHAR NOT NULL,
|
|
59
|
+
reserved_keywords VARCHAR
|
|
60
|
+
)
|
|
61
|
+
""")
|
|
62
|
+
|
|
63
|
+
# Table: adapter_queries
|
|
64
|
+
conn.execute("""
|
|
65
|
+
CREATE TABLE IF NOT EXISTS adapter_queries (
|
|
66
|
+
adapter_name VARCHAR NOT NULL,
|
|
67
|
+
query_type VARCHAR NOT NULL,
|
|
68
|
+
query_template VARCHAR NOT NULL,
|
|
69
|
+
notes VARCHAR,
|
|
70
|
+
PRIMARY KEY (adapter_name, query_type)
|
|
71
|
+
)
|
|
72
|
+
""")
|
|
73
|
+
|
|
74
|
+
print("Schema created successfully")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def load_type_mappings(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
|
|
78
|
+
"""Load type mappings from CSV files."""
|
|
79
|
+
total_rows = 0
|
|
80
|
+
|
|
81
|
+
# Find all type_mappings_*.csv files
|
|
82
|
+
for csv_file in sorted(csv_dir.glob("type_mappings_*.csv")):
|
|
83
|
+
adapter_name = csv_file.stem.replace("type_mappings_", "")
|
|
84
|
+
rows_loaded = 0
|
|
85
|
+
|
|
86
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
87
|
+
reader = csv.DictReader(f)
|
|
88
|
+
for row in reader:
|
|
89
|
+
conn.execute("""
|
|
90
|
+
INSERT OR REPLACE INTO datatype_mappings
|
|
91
|
+
(adapter_name, adapter_type, spark_type, spark_version,
|
|
92
|
+
is_complex, cast_expression, notes)
|
|
93
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
94
|
+
""", [
|
|
95
|
+
adapter_name,
|
|
96
|
+
row['adapter_type'].strip(),
|
|
97
|
+
row['spark_type'].strip(),
|
|
98
|
+
row.get('spark_version', 'all').strip() or 'all',
|
|
99
|
+
row.get('is_complex', 'false').strip().lower() == 'true',
|
|
100
|
+
row.get('cast_expression', '').strip() or None,
|
|
101
|
+
row.get('notes', '').strip() or None,
|
|
102
|
+
])
|
|
103
|
+
rows_loaded += 1
|
|
104
|
+
|
|
105
|
+
print(f" Loaded {rows_loaded} type mappings for {adapter_name}")
|
|
106
|
+
total_rows += rows_loaded
|
|
107
|
+
|
|
108
|
+
return total_rows
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def load_syntax_rules(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
|
|
112
|
+
"""Load syntax rules from CSV file."""
|
|
113
|
+
csv_file = csv_dir / "syntax_rules.csv"
|
|
114
|
+
if not csv_file.exists():
|
|
115
|
+
print(" Warning: syntax_rules.csv not found")
|
|
116
|
+
return 0
|
|
117
|
+
|
|
118
|
+
rows_loaded = 0
|
|
119
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
120
|
+
reader = csv.DictReader(f)
|
|
121
|
+
for row in reader:
|
|
122
|
+
conn.execute("""
|
|
123
|
+
INSERT OR REPLACE INTO syntax_registry
|
|
124
|
+
(adapter_name, quote_start, quote_end, case_sensitivity, reserved_keywords)
|
|
125
|
+
VALUES (?, ?, ?, ?, ?)
|
|
126
|
+
""", [
|
|
127
|
+
row['adapter_name'].strip(),
|
|
128
|
+
row['quote_start'].strip(),
|
|
129
|
+
row['quote_end'].strip(),
|
|
130
|
+
row['case_sensitivity'].strip(),
|
|
131
|
+
row.get('reserved_keywords', '').strip() or None,
|
|
132
|
+
])
|
|
133
|
+
rows_loaded += 1
|
|
134
|
+
|
|
135
|
+
print(f" Loaded {rows_loaded} syntax rules")
|
|
136
|
+
return rows_loaded
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def load_adapter_queries(conn: duckdb.DuckDBPyConnection, csv_dir: Path) -> int:
|
|
140
|
+
"""Load adapter queries from CSV file."""
|
|
141
|
+
csv_file = csv_dir / "adapter_queries.csv"
|
|
142
|
+
if not csv_file.exists():
|
|
143
|
+
print(" Warning: adapter_queries.csv not found")
|
|
144
|
+
return 0
|
|
145
|
+
|
|
146
|
+
rows_loaded = 0
|
|
147
|
+
with open(csv_file, 'r', encoding='utf-8') as f:
|
|
148
|
+
reader = csv.DictReader(f)
|
|
149
|
+
for row in reader:
|
|
150
|
+
conn.execute("""
|
|
151
|
+
INSERT OR REPLACE INTO adapter_queries
|
|
152
|
+
(adapter_name, query_type, query_template, notes)
|
|
153
|
+
VALUES (?, ?, ?, ?)
|
|
154
|
+
""", [
|
|
155
|
+
row['adapter_name'].strip(),
|
|
156
|
+
row['query_type'].strip(),
|
|
157
|
+
row['query_template'].strip(),
|
|
158
|
+
row.get('notes', '').strip() or None,
|
|
159
|
+
])
|
|
160
|
+
rows_loaded += 1
|
|
161
|
+
|
|
162
|
+
print(f" Loaded {rows_loaded} adapter queries")
|
|
163
|
+
return rows_loaded
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def print_stats(conn: duckdb.DuckDBPyConnection) -> None:
|
|
167
|
+
"""Print statistics about the loaded data."""
|
|
168
|
+
print("\n=== Registry Statistics ===")
|
|
169
|
+
|
|
170
|
+
# Type mappings by adapter
|
|
171
|
+
result = conn.execute("""
|
|
172
|
+
SELECT adapter_name, COUNT(*) as count
|
|
173
|
+
FROM datatype_mappings
|
|
174
|
+
GROUP BY adapter_name
|
|
175
|
+
ORDER BY adapter_name
|
|
176
|
+
""").fetchall()
|
|
177
|
+
print("\nType mappings per adapter:")
|
|
178
|
+
for row in result:
|
|
179
|
+
print(f" {row[0]}: {row[1]}")
|
|
180
|
+
|
|
181
|
+
# Syntax rules
|
|
182
|
+
result = conn.execute("SELECT COUNT(*) FROM syntax_registry").fetchone()
|
|
183
|
+
print(f"\nSyntax rules: {result[0]} adapters")
|
|
184
|
+
|
|
185
|
+
# Adapter queries
|
|
186
|
+
result = conn.execute("""
|
|
187
|
+
SELECT adapter_name, COUNT(*) as count
|
|
188
|
+
FROM adapter_queries
|
|
189
|
+
GROUP BY adapter_name
|
|
190
|
+
ORDER BY adapter_name
|
|
191
|
+
""").fetchall()
|
|
192
|
+
print("\nAdapter queries:")
|
|
193
|
+
for row in result:
|
|
194
|
+
print(f" {row[0]}: {row[1]} queries")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def main():
|
|
198
|
+
script_dir = get_script_dir()
|
|
199
|
+
csv_dir = script_dir / "csv"
|
|
200
|
+
db_path = script_dir / "adapters_registry.duckdb"
|
|
201
|
+
|
|
202
|
+
print(f"Building adapters_registry.duckdb")
|
|
203
|
+
print(f"CSV directory: {csv_dir}")
|
|
204
|
+
print(f"Output: {db_path}")
|
|
205
|
+
print()
|
|
206
|
+
|
|
207
|
+
# Remove existing database
|
|
208
|
+
if db_path.exists():
|
|
209
|
+
os.remove(db_path)
|
|
210
|
+
print("Removed existing database")
|
|
211
|
+
|
|
212
|
+
# Create new database
|
|
213
|
+
conn = duckdb.connect(str(db_path))
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
# Create schema
|
|
217
|
+
print("\nCreating schema...")
|
|
218
|
+
create_schema(conn)
|
|
219
|
+
|
|
220
|
+
# Load data
|
|
221
|
+
print("\nLoading type mappings...")
|
|
222
|
+
type_count = load_type_mappings(conn, csv_dir)
|
|
223
|
+
|
|
224
|
+
print("\nLoading syntax rules...")
|
|
225
|
+
syntax_count = load_syntax_rules(conn, csv_dir)
|
|
226
|
+
|
|
227
|
+
print("\nLoading adapter queries...")
|
|
228
|
+
query_count = load_adapter_queries(conn, csv_dir)
|
|
229
|
+
|
|
230
|
+
# Print stats
|
|
231
|
+
print_stats(conn)
|
|
232
|
+
|
|
233
|
+
print(f"\n=== Build Complete ===")
|
|
234
|
+
print(f"Total: {type_count} type mappings, {syntax_count} syntax rules, {query_count} queries")
|
|
235
|
+
print(f"Database size: {db_path.stat().st_size / 1024:.1f} KB")
|
|
236
|
+
|
|
237
|
+
finally:
|
|
238
|
+
conn.close()
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
if __name__ == "__main__":
|
|
242
|
+
main()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
adapter_name,query_type,query_template,notes
|
|
2
|
+
postgres,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
3
|
+
postgres,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
4
|
+
postgres,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
5
|
+
postgres,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
6
|
+
snowflake,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
7
|
+
snowflake,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
8
|
+
snowflake,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
9
|
+
snowflake,primary_key,"SELECT column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
10
|
+
mysql,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
11
|
+
mysql,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
12
|
+
mysql,row_count,"SELECT COUNT(*) FROM `{schema}`.`{table}`",Count rows in table
|
|
13
|
+
mysql,primary_key,"SELECT column_name FROM information_schema.key_column_usage WHERE table_schema = '{schema}' AND table_name = '{table}' AND constraint_name = 'PRIMARY'",Get primary key columns
|
|
14
|
+
bigquery,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.COLUMNS WHERE table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
15
|
+
bigquery,tables,"SELECT table_name FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.TABLES WHERE table_type = 'BASE TABLE'",List tables in dataset
|
|
16
|
+
bigquery,row_count,"SELECT COUNT(*) FROM `{project}`.`{schema}`.`{table}`",Count rows in table
|
|
17
|
+
bigquery,primary_key,"SELECT column_name FROM `{project}`.`{schema}`.INFORMATION_SCHEMA.KEY_COLUMN_USAGE WHERE table_name = '{table}' AND constraint_name LIKE 'pk_%'",Get primary key columns
|
|
18
|
+
redshift,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
19
|
+
redshift,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
20
|
+
redshift,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
21
|
+
redshift,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
22
|
+
oracle,columns,"SELECT column_name, data_type, nullable as is_nullable, column_id as ordinal_position FROM all_tab_columns WHERE owner = UPPER('{schema}') AND table_name = UPPER('{table}') ORDER BY column_id",Get column metadata
|
|
23
|
+
oracle,tables,"SELECT table_name FROM all_tables WHERE owner = UPPER('{schema}')",List tables in schema
|
|
24
|
+
oracle,row_count,"SELECT COUNT(*) FROM ""{schema}"".""{table}""",Count rows in table
|
|
25
|
+
oracle,primary_key,"SELECT cols.column_name FROM all_constraints cons JOIN all_cons_columns cols ON cons.constraint_name = cols.constraint_name WHERE cons.owner = UPPER('{schema}') AND cons.table_name = UPPER('{table}') AND cons.constraint_type = 'P'",Get primary key columns
|
|
26
|
+
sqlserver,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
27
|
+
sqlserver,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type = 'BASE TABLE'",List tables in schema
|
|
28
|
+
sqlserver,row_count,"SELECT COUNT(*) FROM [{schema}].[{table}]",Count rows in table
|
|
29
|
+
sqlserver,primary_key,"SELECT kcu.column_name FROM information_schema.table_constraints tc JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
30
|
+
databricks,columns,"SELECT column_name, data_type, is_nullable, ordinal_position FROM information_schema.columns WHERE table_schema = '{schema}' AND table_name = '{table}' ORDER BY ordinal_position",Get column metadata
|
|
31
|
+
databricks,tables,"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema}' AND table_type IN ('BASE TABLE', 'MANAGED', 'EXTERNAL')",List tables in schema
|
|
32
|
+
databricks,row_count,"SELECT COUNT(*) FROM `{schema}`.`{table}`",Count rows in table
|
|
33
|
+
databricks,primary_key,"SELECT column_name FROM system.information_schema.table_constraints tc JOIN system.information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name WHERE tc.table_schema = '{schema}' AND tc.table_name = '{table}' AND tc.constraint_type = 'PRIMARY KEY'",Get primary key columns
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
adapter_name,quote_start,quote_end,case_sensitivity,reserved_keywords
|
|
2
|
+
postgres,"""","""",lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF"
|
|
3
|
+
snowflake,"""","""",uppercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,MINUS,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,FETCH,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IFF,QUALIFY,PIVOT,UNPIVOT,LATERAL,FLATTEN,VARIANT,OBJECT,ARRAY"
|
|
4
|
+
mysql,`,`,lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,CROSS,UNION,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,DIV,MOD,XOR,REGEXP,RLIKE"
|
|
5
|
+
bigquery,`,`,lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,VIEW,SCHEMA,DATASET,PROJECT,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,STRUCT,ARRAY,UNNEST,PARTITION,CLUSTER,TABLESAMPLE"
|
|
6
|
+
redshift,"""","""",lowercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,SIMILAR,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,NVL,NVL2,DECODE,DISTKEY,SORTKEY,DISTSTYLE"
|
|
7
|
+
oracle,"""","""",uppercase,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,USER,GRANT,REVOKE,AND,OR,NOT,NULL,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,MINUS,ORDER,BY,GROUP,HAVING,FETCH,FIRST,NEXT,ROWS,ONLY,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,NVL,NVL2,DECODE,CONNECT,START,WITH,PRIOR,LEVEL,ROWNUM,ROWID,SYSDATE,SYSTIMESTAMP,DUAL"
|
|
8
|
+
sqlserver,[,],case_insensitive,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,USER,GRANT,REVOKE,AND,OR,NOT,NULL,IN,BETWEEN,LIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,TOP,OFFSET,FETCH,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,ISNULL,IIF,CHOOSE,PIVOT,UNPIVOT,APPLY,CROSS,OUTER,GO,USE,EXEC,EXECUTE,PRINT,DECLARE,SET,BEGIN,COMMIT,ROLLBACK"
|
|
9
|
+
databricks,`,`,case_insensitive,"SELECT,FROM,WHERE,INSERT,UPDATE,DELETE,CREATE,DROP,ALTER,TABLE,INDEX,VIEW,SCHEMA,DATABASE,CATALOG,AND,OR,NOT,NULL,TRUE,FALSE,IN,BETWEEN,LIKE,ILIKE,RLIKE,IS,AS,ON,JOIN,LEFT,RIGHT,INNER,OUTER,FULL,CROSS,SEMI,ANTI,UNION,EXCEPT,INTERSECT,ORDER,BY,GROUP,HAVING,LIMIT,OFFSET,DISTINCT,ALL,ANY,EXISTS,CASE,WHEN,THEN,ELSE,END,CAST,COALESCE,NULLIF,IF,IFNULL,NVL,STRUCT,ARRAY,MAP,LATERAL,EXPLODE,POSEXPLODE,INLINE,STACK,TABLESAMPLE,PIVOT,UNPIVOT,DISTRIBUTE,CLUSTER,SORT"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
STRING,StringType,all,false,,Variable-length Unicode string
|
|
3
|
+
BYTES,BinaryType,all,false,,Variable-length binary
|
|
4
|
+
INT64,LongType,all,false,,64-bit signed integer
|
|
5
|
+
INTEGER,LongType,all,false,,Alias for INT64
|
|
6
|
+
INT,LongType,all,false,,Alias for INT64
|
|
7
|
+
SMALLINT,LongType,all,false,,Alias for INT64
|
|
8
|
+
BIGINT,LongType,all,false,,Alias for INT64
|
|
9
|
+
TINYINT,LongType,all,false,,Alias for INT64
|
|
10
|
+
BYTEINT,LongType,all,false,,Alias for INT64
|
|
11
|
+
FLOAT64,DoubleType,all,false,,64-bit floating point
|
|
12
|
+
FLOAT,DoubleType,all,false,,Alias for FLOAT64
|
|
13
|
+
NUMERIC,DecimalType,all,false,,Exact numeric (precision 38 scale 9)
|
|
14
|
+
DECIMAL,DecimalType,all,false,,Alias for NUMERIC
|
|
15
|
+
BIGNUMERIC,DecimalType,all,false,,High precision numeric (76.76)
|
|
16
|
+
BIGDECIMAL,DecimalType,all,false,,Alias for BIGNUMERIC
|
|
17
|
+
BOOL,BooleanType,all,false,,Boolean true/false
|
|
18
|
+
BOOLEAN,BooleanType,all,false,,Alias for BOOL
|
|
19
|
+
DATE,DateType,all,false,,Calendar date
|
|
20
|
+
TIME,StringType,all,false,,Time of day
|
|
21
|
+
DATETIME,TimestampType,all,false,,Date and time without timezone
|
|
22
|
+
TIMESTAMP,TimestampType,all,false,,Date and time with timezone
|
|
23
|
+
INTERVAL,StringType,all,false,,Time interval
|
|
24
|
+
GEOGRAPHY,StringType,all,false,,Geographic data (GeoJSON)
|
|
25
|
+
JSON,StringType,all,true,TO_JSON_STRING({}),JSON document
|
|
26
|
+
STRUCT,StringType,all,true,TO_JSON_STRING({}),Structured record
|
|
27
|
+
RECORD,StringType,all,true,TO_JSON_STRING({}),Alias for STRUCT
|
|
28
|
+
ARRAY,ArrayType,all,true,,Array of values
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
STRING,StringType,all,false,,Variable-length string
|
|
3
|
+
VARCHAR,StringType,all,false,,Variable-length string (alias)
|
|
4
|
+
CHAR,StringType,all,false,,Fixed-length string
|
|
5
|
+
BINARY,BinaryType,all,false,,Binary data
|
|
6
|
+
TINYINT,ByteType,all,false,,8-bit signed integer
|
|
7
|
+
BYTE,ByteType,all,false,,Alias for TINYINT
|
|
8
|
+
SMALLINT,ShortType,all,false,,16-bit signed integer
|
|
9
|
+
SHORT,ShortType,all,false,,Alias for SMALLINT
|
|
10
|
+
INT,IntegerType,all,false,,32-bit signed integer
|
|
11
|
+
INTEGER,IntegerType,all,false,,Alias for INT
|
|
12
|
+
BIGINT,LongType,all,false,,64-bit signed integer
|
|
13
|
+
LONG,LongType,all,false,,Alias for BIGINT
|
|
14
|
+
FLOAT,FloatType,all,false,,32-bit floating point
|
|
15
|
+
REAL,FloatType,all,false,,Alias for FLOAT
|
|
16
|
+
DOUBLE,DoubleType,all,false,,64-bit floating point
|
|
17
|
+
DECIMAL,DecimalType,all,false,,Exact numeric with precision
|
|
18
|
+
DEC,DecimalType,all,false,,Alias for DECIMAL
|
|
19
|
+
NUMERIC,DecimalType,all,false,,Alias for DECIMAL
|
|
20
|
+
BOOLEAN,BooleanType,all,false,,True/false value
|
|
21
|
+
DATE,DateType,all,false,,Calendar date
|
|
22
|
+
TIMESTAMP,TimestampType,all,false,,Timestamp without timezone
|
|
23
|
+
TIMESTAMP_NTZ,TimestampType,all,false,,Timestamp no timezone (explicit)
|
|
24
|
+
TIMESTAMP_LTZ,TimestampType,all,false,,Timestamp with local timezone
|
|
25
|
+
INTERVAL,StringType,all,false,,Time interval
|
|
26
|
+
ARRAY,ArrayType,all,true,,Array of elements
|
|
27
|
+
MAP,MapType,all,true,TO_JSON({}),Key-value pairs
|
|
28
|
+
STRUCT,StringType,all,true,TO_JSON({}),Structured record
|
|
29
|
+
VARIANT,StringType,all,true,TO_JSON({}),Semi-structured (Unity Catalog)
|
|
30
|
+
OBJECT,StringType,all,true,TO_JSON({}),Object type (Unity Catalog)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
VARCHAR,StringType,all,false,,Variable-length string
|
|
3
|
+
CHAR,StringType,all,false,,Fixed-length string
|
|
4
|
+
TEXT,StringType,all,false,,Variable unlimited length text
|
|
5
|
+
TINYTEXT,StringType,all,false,,Tiny text (255 chars)
|
|
6
|
+
MEDIUMTEXT,StringType,all,false,,Medium text (16MB)
|
|
7
|
+
LONGTEXT,StringType,all,false,,Long text (4GB)
|
|
8
|
+
TINYINT,ByteType,all,false,,8-bit signed integer
|
|
9
|
+
SMALLINT,ShortType,all,false,,16-bit signed integer
|
|
10
|
+
MEDIUMINT,IntegerType,all,false,,24-bit signed integer
|
|
11
|
+
INT,IntegerType,all,false,,32-bit signed integer
|
|
12
|
+
INTEGER,IntegerType,all,false,,Alias for INT
|
|
13
|
+
BIGINT,LongType,all,false,,64-bit signed integer
|
|
14
|
+
FLOAT,FloatType,all,false,,32-bit floating point
|
|
15
|
+
DOUBLE,DoubleType,all,false,,64-bit floating point
|
|
16
|
+
DOUBLE PRECISION,DoubleType,all,false,,Alias for DOUBLE
|
|
17
|
+
REAL,DoubleType,all,false,,Alias for DOUBLE
|
|
18
|
+
DECIMAL,DecimalType,all,false,,Exact numeric with precision
|
|
19
|
+
NUMERIC,DecimalType,all,false,,Alias for DECIMAL
|
|
20
|
+
BIT,BooleanType,all,false,,Bit value (1 bit = boolean)
|
|
21
|
+
BOOLEAN,BooleanType,all,false,,Alias for TINYINT(1)
|
|
22
|
+
BOOL,BooleanType,all,false,,Alias for BOOLEAN
|
|
23
|
+
DATE,DateType,all,false,,Calendar date
|
|
24
|
+
TIME,StringType,all,false,,Time of day
|
|
25
|
+
DATETIME,TimestampType,all,false,,Date and time
|
|
26
|
+
TIMESTAMP,TimestampType,all,false,,Timestamp with auto-update
|
|
27
|
+
YEAR,IntegerType,all,false,,Year in 4-digit format
|
|
28
|
+
BINARY,BinaryType,all,false,,Fixed-length binary
|
|
29
|
+
VARBINARY,BinaryType,all,false,,Variable-length binary
|
|
30
|
+
TINYBLOB,BinaryType,all,false,,Tiny binary (255 bytes)
|
|
31
|
+
BLOB,BinaryType,all,false,,Binary large object (64KB)
|
|
32
|
+
MEDIUMBLOB,BinaryType,all,false,,Medium blob (16MB)
|
|
33
|
+
LONGBLOB,BinaryType,all,false,,Long blob (4GB)
|
|
34
|
+
ENUM,StringType,all,false,,Enumeration type
|
|
35
|
+
SET,StringType,all,false,,Set of values
|
|
36
|
+
JSON,StringType,all,true,CAST({} AS STRING),JSON document
|
|
37
|
+
GEOMETRY,StringType,all,false,,Spatial geometry
|
|
38
|
+
POINT,StringType,all,false,,Spatial point
|
|
39
|
+
LINESTRING,StringType,all,false,,Spatial line
|
|
40
|
+
POLYGON,StringType,all,false,,Spatial polygon
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
VARCHAR2,StringType,all,false,,Variable-length string (max 4000/32767)
|
|
3
|
+
NVARCHAR2,StringType,all,false,,Unicode VARCHAR2
|
|
4
|
+
CHAR,StringType,all,false,,Fixed-length string
|
|
5
|
+
NCHAR,StringType,all,false,,Unicode fixed-length
|
|
6
|
+
CLOB,StringType,all,false,,Character large object
|
|
7
|
+
NCLOB,StringType,all,false,,Unicode CLOB
|
|
8
|
+
LONG,StringType,all,false,,Legacy long text (deprecated)
|
|
9
|
+
NUMBER,DecimalType,all,false,,Numeric with precision and scale
|
|
10
|
+
FLOAT,DoubleType,all,false,,Floating point (binary precision)
|
|
11
|
+
BINARY_FLOAT,FloatType,all,false,,32-bit IEEE floating point
|
|
12
|
+
BINARY_DOUBLE,DoubleType,all,false,,64-bit IEEE floating point
|
|
13
|
+
INTEGER,LongType,all,false,,Alias for NUMBER(38)
|
|
14
|
+
INT,LongType,all,false,,Alias for NUMBER(38)
|
|
15
|
+
SMALLINT,LongType,all,false,,Alias for NUMBER(38)
|
|
16
|
+
DATE,TimestampType,all,false,,Date and time (second precision)
|
|
17
|
+
TIMESTAMP,TimestampType,all,false,,Timestamp (fractional seconds)
|
|
18
|
+
TIMESTAMP WITH TIME ZONE,TimestampType,all,false,,Timestamp with timezone
|
|
19
|
+
TIMESTAMP WITH LOCAL TIME ZONE,TimestampType,all,false,,Timestamp with session timezone
|
|
20
|
+
INTERVAL YEAR TO MONTH,StringType,all,false,,Year-month interval
|
|
21
|
+
INTERVAL DAY TO SECOND,StringType,all,false,,Day-second interval
|
|
22
|
+
RAW,BinaryType,all,false,,Raw binary data
|
|
23
|
+
LONG RAW,BinaryType,all,false,,Legacy raw (deprecated)
|
|
24
|
+
BLOB,BinaryType,all,false,,Binary large object
|
|
25
|
+
BFILE,StringType,all,false,,External file pointer
|
|
26
|
+
ROWID,StringType,all,false,,Physical row address
|
|
27
|
+
UROWID,StringType,all,false,,Universal ROWID
|
|
28
|
+
XMLTYPE,StringType,all,true,XMLSERIALIZE(CONTENT {} AS CLOB),XML document
|
|
29
|
+
JSON,StringType,all,true,,JSON document (21c+)
|
|
30
|
+
SDO_GEOMETRY,StringType,all,false,,Spatial geometry
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
VARCHAR,StringType,all,false,,Variable-length character string
|
|
3
|
+
CHAR,StringType,all,false,,Fixed-length character string
|
|
4
|
+
TEXT,StringType,all,false,,Variable unlimited length text
|
|
5
|
+
BPCHAR,StringType,all,false,,Blank-padded character
|
|
6
|
+
NAME,StringType,all,false,,Internal type for identifiers
|
|
7
|
+
INTEGER,IntegerType,all,false,,32-bit signed integer
|
|
8
|
+
INT,IntegerType,all,false,,Alias for INTEGER
|
|
9
|
+
INT4,IntegerType,all,false,,Alias for INTEGER
|
|
10
|
+
SMALLINT,ShortType,all,false,,16-bit signed integer
|
|
11
|
+
INT2,ShortType,all,false,,Alias for SMALLINT
|
|
12
|
+
BIGINT,LongType,all,false,,64-bit signed integer
|
|
13
|
+
INT8,LongType,all,false,,Alias for BIGINT
|
|
14
|
+
SERIAL,IntegerType,all,false,,Auto-incrementing integer
|
|
15
|
+
BIGSERIAL,LongType,all,false,,Auto-incrementing bigint
|
|
16
|
+
SMALLSERIAL,ShortType,all,false,,Auto-incrementing smallint
|
|
17
|
+
REAL,FloatType,all,false,,32-bit floating point
|
|
18
|
+
FLOAT4,FloatType,all,false,,Alias for REAL
|
|
19
|
+
DOUBLE PRECISION,DoubleType,all,false,,64-bit floating point
|
|
20
|
+
FLOAT8,DoubleType,all,false,,Alias for DOUBLE PRECISION
|
|
21
|
+
FLOAT,DoubleType,all,false,,Floating point (defaults to double)
|
|
22
|
+
NUMERIC,DecimalType,all,false,,Exact numeric with precision
|
|
23
|
+
DECIMAL,DecimalType,all,false,,Alias for NUMERIC
|
|
24
|
+
MONEY,DecimalType,all,false,,Currency amount
|
|
25
|
+
BOOLEAN,BooleanType,all,false,,True/false value
|
|
26
|
+
BOOL,BooleanType,all,false,,Alias for BOOLEAN
|
|
27
|
+
DATE,DateType,all,false,,Calendar date
|
|
28
|
+
TIME,StringType,all,false,,Time of day (no timezone)
|
|
29
|
+
TIMETZ,StringType,all,false,,Time with timezone
|
|
30
|
+
TIMESTAMP,TimestampType,all,false,,Date and time (no timezone)
|
|
31
|
+
TIMESTAMPTZ,TimestampType,all,false,,Date and time with timezone
|
|
32
|
+
INTERVAL,StringType,all,false,,Time interval
|
|
33
|
+
BYTEA,BinaryType,all,false,,Binary data
|
|
34
|
+
UUID,StringType,all,false,,Universally unique identifier
|
|
35
|
+
JSON,StringType,all,true,CAST({} AS STRING),JSON data
|
|
36
|
+
JSONB,StringType,all,true,CAST({} AS STRING),Binary JSON data
|
|
37
|
+
XML,StringType,all,true,CAST({} AS STRING),XML data
|
|
38
|
+
ARRAY,ArrayType,all,true,,PostgreSQL array type
|
|
39
|
+
INET,StringType,all,false,,IPv4 or IPv6 host address
|
|
40
|
+
CIDR,StringType,all,false,,IPv4 or IPv6 network address
|
|
41
|
+
MACADDR,StringType,all,false,,MAC address
|
|
42
|
+
MACADDR8,StringType,all,false,,MAC address (EUI-64 format)
|
|
43
|
+
BIT,StringType,all,false,,Fixed-length bit string
|
|
44
|
+
VARBIT,StringType,all,false,,Variable-length bit string
|
|
45
|
+
POINT,StringType,all,false,,Geometric point
|
|
46
|
+
LINE,StringType,all,false,,Infinite line
|
|
47
|
+
LSEG,StringType,all,false,,Line segment
|
|
48
|
+
BOX,StringType,all,false,,Rectangular box
|
|
49
|
+
PATH,StringType,all,false,,Geometric path
|
|
50
|
+
POLYGON,StringType,all,false,,Closed geometric path
|
|
51
|
+
CIRCLE,StringType,all,false,,Circle
|
|
52
|
+
TSVECTOR,StringType,all,false,,Text search document
|
|
53
|
+
TSQUERY,StringType,all,false,,Text search query
|
|
54
|
+
OID,LongType,all,false,,Object identifier
|
|
55
|
+
REGCLASS,StringType,all,false,,Relation name
|
|
56
|
+
REGTYPE,StringType,all,false,,Data type name
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
VARCHAR,StringType,all,false,,Variable-length string (max 65535)
|
|
3
|
+
CHAR,StringType,all,false,,Fixed-length string
|
|
4
|
+
BPCHAR,StringType,all,false,,Blank-padded character
|
|
5
|
+
TEXT,StringType,all,false,,Variable unlimited length (alias)
|
|
6
|
+
NVARCHAR,StringType,all,false,,Unicode VARCHAR
|
|
7
|
+
NCHAR,StringType,all,false,,Unicode CHAR
|
|
8
|
+
SMALLINT,ShortType,all,false,,16-bit signed integer
|
|
9
|
+
INT2,ShortType,all,false,,Alias for SMALLINT
|
|
10
|
+
INTEGER,IntegerType,all,false,,32-bit signed integer
|
|
11
|
+
INT,IntegerType,all,false,,Alias for INTEGER
|
|
12
|
+
INT4,IntegerType,all,false,,Alias for INTEGER
|
|
13
|
+
BIGINT,LongType,all,false,,64-bit signed integer
|
|
14
|
+
INT8,LongType,all,false,,Alias for BIGINT
|
|
15
|
+
REAL,FloatType,all,false,,32-bit floating point
|
|
16
|
+
FLOAT4,FloatType,all,false,,Alias for REAL
|
|
17
|
+
DOUBLE PRECISION,DoubleType,all,false,,64-bit floating point
|
|
18
|
+
FLOAT8,DoubleType,all,false,,Alias for DOUBLE PRECISION
|
|
19
|
+
FLOAT,DoubleType,all,false,,Alias for DOUBLE PRECISION
|
|
20
|
+
DECIMAL,DecimalType,all,false,,Exact numeric with precision
|
|
21
|
+
NUMERIC,DecimalType,all,false,,Alias for DECIMAL
|
|
22
|
+
BOOLEAN,BooleanType,all,false,,True/false value
|
|
23
|
+
BOOL,BooleanType,all,false,,Alias for BOOLEAN
|
|
24
|
+
DATE,DateType,all,false,,Calendar date
|
|
25
|
+
TIME,StringType,all,false,,Time of day
|
|
26
|
+
TIMETZ,StringType,all,false,,Time with timezone
|
|
27
|
+
TIMESTAMP,TimestampType,all,false,,Timestamp without timezone
|
|
28
|
+
TIMESTAMPTZ,TimestampType,all,false,,Timestamp with timezone
|
|
29
|
+
GEOMETRY,StringType,all,false,,Spatial geometry (EWKB)
|
|
30
|
+
GEOGRAPHY,StringType,all,false,,Geographic data
|
|
31
|
+
HLLSKETCH,StringType,all,false,,HyperLogLog sketch
|
|
32
|
+
SUPER,StringType,all,true,JSON_SERIALIZE({}),Semi-structured data
|
|
33
|
+
VARBYTE,BinaryType,all,false,,Variable-length binary
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
VARCHAR,StringType,all,false,,Variable-length string
|
|
3
|
+
STRING,StringType,all,false,,Alias for VARCHAR
|
|
4
|
+
TEXT,StringType,all,false,,Alias for VARCHAR
|
|
5
|
+
CHAR,StringType,all,false,,Fixed-length string
|
|
6
|
+
CHARACTER,StringType,all,false,,Alias for CHAR
|
|
7
|
+
NVARCHAR,StringType,all,false,,Unicode VARCHAR
|
|
8
|
+
NCHAR,StringType,all,false,,Unicode CHAR
|
|
9
|
+
NUMBER,DecimalType,all,false,,Numeric with precision and scale
|
|
10
|
+
DECIMAL,DecimalType,all,false,,Alias for NUMBER
|
|
11
|
+
NUMERIC,DecimalType,all,false,,Alias for NUMBER
|
|
12
|
+
INT,LongType,all,false,,Integer (38 digits precision)
|
|
13
|
+
INTEGER,LongType,all,false,,Alias for INT
|
|
14
|
+
BIGINT,LongType,all,false,,Alias for INT
|
|
15
|
+
SMALLINT,LongType,all,false,,Alias for INT
|
|
16
|
+
TINYINT,LongType,all,false,,Alias for INT
|
|
17
|
+
BYTEINT,LongType,all,false,,Alias for INT
|
|
18
|
+
FLOAT,DoubleType,all,false,,64-bit floating point
|
|
19
|
+
FLOAT4,FloatType,all,false,,32-bit floating point
|
|
20
|
+
FLOAT8,DoubleType,all,false,,64-bit floating point
|
|
21
|
+
DOUBLE,DoubleType,all,false,,Alias for FLOAT
|
|
22
|
+
DOUBLE PRECISION,DoubleType,all,false,,Alias for FLOAT
|
|
23
|
+
REAL,FloatType,all,false,,32-bit floating point
|
|
24
|
+
BOOLEAN,BooleanType,all,false,,True/false value
|
|
25
|
+
DATE,DateType,all,false,,Calendar date
|
|
26
|
+
TIME,StringType,all,false,,Time of day
|
|
27
|
+
DATETIME,TimestampType,all,false,,Alias for TIMESTAMP
|
|
28
|
+
TIMESTAMP,TimestampType,all,false,,Timestamp without timezone
|
|
29
|
+
TIMESTAMP_LTZ,TimestampType,all,false,,Timestamp with local timezone
|
|
30
|
+
TIMESTAMP_NTZ,TimestampType,all,false,,Timestamp no timezone
|
|
31
|
+
TIMESTAMP_TZ,TimestampType,all,false,,Timestamp with timezone
|
|
32
|
+
BINARY,BinaryType,all,false,,Binary data
|
|
33
|
+
VARBINARY,BinaryType,all,false,,Variable binary
|
|
34
|
+
VARIANT,StringType,all,true,TO_JSON({}),Semi-structured data
|
|
35
|
+
OBJECT,StringType,all,true,TO_JSON({}),Key-value pairs
|
|
36
|
+
ARRAY,StringType,all,true,TO_JSON({}),Ordered list of values
|
|
37
|
+
GEOGRAPHY,StringType,all,false,,Geographic data (WKT)
|
|
38
|
+
GEOMETRY,StringType,all,false,,Geometric data (WKT)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
adapter_type,spark_type,spark_version,is_complex,cast_expression,notes
|
|
2
|
+
VARCHAR,StringType,all,false,,Variable-length string
|
|
3
|
+
NVARCHAR,StringType,all,false,,Unicode variable-length
|
|
4
|
+
CHAR,StringType,all,false,,Fixed-length string
|
|
5
|
+
NCHAR,StringType,all,false,,Unicode fixed-length
|
|
6
|
+
TEXT,StringType,all,false,,Variable text (deprecated)
|
|
7
|
+
NTEXT,StringType,all,false,,Unicode text (deprecated)
|
|
8
|
+
TINYINT,ShortType,all,false,,8-bit unsigned (0-255)
|
|
9
|
+
SMALLINT,ShortType,all,false,,16-bit signed integer
|
|
10
|
+
INT,IntegerType,all,false,,32-bit signed integer
|
|
11
|
+
BIGINT,LongType,all,false,,64-bit signed integer
|
|
12
|
+
REAL,FloatType,all,false,,32-bit floating point
|
|
13
|
+
FLOAT,DoubleType,all,false,,64-bit floating point
|
|
14
|
+
DECIMAL,DecimalType,all,false,,Exact numeric with precision
|
|
15
|
+
NUMERIC,DecimalType,all,false,,Alias for DECIMAL
|
|
16
|
+
MONEY,DecimalType,all,false,,Currency (8 bytes)
|
|
17
|
+
SMALLMONEY,DecimalType,all,false,,Currency (4 bytes)
|
|
18
|
+
BIT,BooleanType,all,false,,Boolean (0 or 1)
|
|
19
|
+
DATE,DateType,all,false,,Calendar date
|
|
20
|
+
TIME,StringType,all,false,,Time of day
|
|
21
|
+
DATETIME,TimestampType,all,false,,Date and time (3.33ms precision)
|
|
22
|
+
DATETIME2,TimestampType,all,false,,Date and time (100ns precision)
|
|
23
|
+
SMALLDATETIME,TimestampType,all,false,,Date and time (minute precision)
|
|
24
|
+
DATETIMEOFFSET,TimestampType,all,false,,Date time with timezone
|
|
25
|
+
BINARY,BinaryType,all,false,,Fixed-length binary
|
|
26
|
+
VARBINARY,BinaryType,all,false,,Variable-length binary
|
|
27
|
+
IMAGE,BinaryType,all,false,,Binary data (deprecated)
|
|
28
|
+
UNIQUEIDENTIFIER,StringType,all,false,,GUID/UUID
|
|
29
|
+
XML,StringType,all,true,CAST({} AS NVARCHAR(MAX)),XML document
|
|
30
|
+
SQL_VARIANT,StringType,all,false,,Mixed data types
|
|
31
|
+
GEOGRAPHY,StringType,all,false,,Geographic data (CLR)
|
|
32
|
+
GEOMETRY,StringType,all,false,,Geometric data (CLR)
|
|
33
|
+
HIERARCHYID,StringType,all,false,,Hierarchy position
|
|
34
|
+
TIMESTAMP,BinaryType,all,false,,Row version (not datetime!)
|
|
35
|
+
ROWVERSION,BinaryType,all,false,,Alias for TIMESTAMP
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Welcome to your new dbt project!
|
|
2
|
+
|
|
3
|
+
### Using the starter project
|
|
4
|
+
|
|
5
|
+
Try running the following commands:
|
|
6
|
+
- dbt run
|
|
7
|
+
- dbt test
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
### Resources:
|
|
11
|
+
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
|
|
12
|
+
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
|
|
13
|
+
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
|
|
14
|
+
- Find [dbt events](https://events.getdbt.com) near you
|
|
15
|
+
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices
|
|
File without changes
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
|
|
2
|
+
# Name your project! Project names should contain only lowercase characters
|
|
3
|
+
# and underscores. A good package name should reflect your organization's
|
|
4
|
+
# name or the intended use of these models
|
|
5
|
+
name: '{project_name}'
|
|
6
|
+
version: '1.0.0'
|
|
7
|
+
|
|
8
|
+
# This setting configures which "profile" DVT uses for this project.
|
|
9
|
+
profile: '{profile_name}'
|
|
10
|
+
|
|
11
|
+
# These configurations specify where DVT should look for different types of files.
|
|
12
|
+
# The `model-paths` config, for example, states that models in this project can be
|
|
13
|
+
# found in the "models/" directory. You probably won't need to change these!
|
|
14
|
+
model-paths: ["models"]
|
|
15
|
+
analysis-paths: ["analyses"]
|
|
16
|
+
test-paths: ["tests"]
|
|
17
|
+
seed-paths: ["seeds"]
|
|
18
|
+
macro-paths: ["macros"]
|
|
19
|
+
snapshot-paths: ["snapshots"]
|
|
20
|
+
|
|
21
|
+
# DVT-specific: Path for flat files (CSV, Parquet, etc.) for local data ingestion
|
|
22
|
+
flatfile-paths: ["flatfiles"]
|
|
23
|
+
|
|
24
|
+
clean-targets: # directories to be removed by `dvt clean`
|
|
25
|
+
- "target"
|
|
26
|
+
- "dbt_packages"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Configuring models
|
|
30
|
+
# Full documentation: https://docs.getdbt.com/docs/configuring-models
|
|
31
|
+
|
|
32
|
+
# In this example config, we tell DVT to build all models in the example/
|
|
33
|
+
# directory as views. These settings can be overridden in the individual model
|
|
34
|
+
# files using the `{{{{ config(...) }}}}` macro.
|
|
35
|
+
models:
|
|
36
|
+
'{project_name}':
|
|
37
|
+
# Config indicated by + and applies to all files under models/example/
|
|
38
|
+
example:
|
|
39
|
+
+materialized: view
|
|
File without changes
|