dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# DVT Global Registries
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Centralized lookup tables for type mappings and syntax rules.
|
|
5
|
+
# These are shipped with DVT and loaded into the project metadata store.
|
|
6
|
+
#
|
|
7
|
+
# DVT v0.54.0: Initial implementation
|
|
8
|
+
# =============================================================================
|
|
9
|
+
|
|
10
|
+
from typing import Dict, List, Optional, Any
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class TypeMapping:
|
|
16
|
+
"""A single type mapping entry."""
|
|
17
|
+
adapter_name: str
|
|
18
|
+
adapter_native_type: str
|
|
19
|
+
spark_version: str
|
|
20
|
+
spark_native_type: str
|
|
21
|
+
is_complex: bool = False
|
|
22
|
+
cast_expression: Optional[str] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class SyntaxRule:
|
|
27
|
+
"""Syntax rules for a specific adapter."""
|
|
28
|
+
adapter_name: str
|
|
29
|
+
quote_start: str
|
|
30
|
+
quote_end: str
|
|
31
|
+
case_sensitivity: str # 'LOWER', 'UPPER', 'PRESERVE'
|
|
32
|
+
reserved_keywords: List[str]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class TypeRegistry:
|
|
36
|
+
"""
|
|
37
|
+
Global type registry for mapping adapter types to Spark types.
|
|
38
|
+
|
|
39
|
+
This registry is shipped with DVT and provides the definitive mapping
|
|
40
|
+
between every supported adapter's native types and Spark's Catalyst types.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
# ==========================================================================
|
|
44
|
+
# Type Mappings: adapter_name -> adapter_type -> spark_version -> spark_type
|
|
45
|
+
# ==========================================================================
|
|
46
|
+
|
|
47
|
+
TYPE_MAPPINGS: List[Dict[str, Any]] = [
|
|
48
|
+
# ======================================================================
|
|
49
|
+
# PostgreSQL
|
|
50
|
+
# ======================================================================
|
|
51
|
+
# String types
|
|
52
|
+
{"adapter_name": "postgres", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
53
|
+
{"adapter_name": "postgres", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
54
|
+
{"adapter_name": "postgres", "adapter_native_type": "CHARACTER VARYING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
55
|
+
{"adapter_name": "postgres", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
56
|
+
{"adapter_name": "postgres", "adapter_native_type": "CHARACTER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
57
|
+
{"adapter_name": "postgres", "adapter_native_type": "BPCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
58
|
+
{"adapter_name": "postgres", "adapter_native_type": "NAME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
59
|
+
|
|
60
|
+
# Integer types
|
|
61
|
+
{"adapter_name": "postgres", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
62
|
+
{"adapter_name": "postgres", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
63
|
+
{"adapter_name": "postgres", "adapter_native_type": "INT4", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
64
|
+
{"adapter_name": "postgres", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
65
|
+
{"adapter_name": "postgres", "adapter_native_type": "INT2", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
66
|
+
{"adapter_name": "postgres", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
67
|
+
{"adapter_name": "postgres", "adapter_native_type": "INT8", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
68
|
+
{"adapter_name": "postgres", "adapter_native_type": "SERIAL", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
69
|
+
{"adapter_name": "postgres", "adapter_native_type": "BIGSERIAL", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
70
|
+
|
|
71
|
+
# Floating point types
|
|
72
|
+
{"adapter_name": "postgres", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
73
|
+
{"adapter_name": "postgres", "adapter_native_type": "FLOAT4", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
74
|
+
{"adapter_name": "postgres", "adapter_native_type": "DOUBLE PRECISION", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
75
|
+
{"adapter_name": "postgres", "adapter_native_type": "FLOAT8", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
76
|
+
{"adapter_name": "postgres", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
77
|
+
|
|
78
|
+
# Numeric/Decimal types
|
|
79
|
+
{"adapter_name": "postgres", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
80
|
+
{"adapter_name": "postgres", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
81
|
+
{"adapter_name": "postgres", "adapter_native_type": "MONEY", "spark_version": "all", "spark_native_type": "DecimalType(19,2)", "is_complex": False},
|
|
82
|
+
|
|
83
|
+
# Boolean
|
|
84
|
+
{"adapter_name": "postgres", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
85
|
+
{"adapter_name": "postgres", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
86
|
+
|
|
87
|
+
# Date/Time types
|
|
88
|
+
{"adapter_name": "postgres", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
|
|
89
|
+
{"adapter_name": "postgres", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False}, # Spark has no TimeType
|
|
90
|
+
{"adapter_name": "postgres", "adapter_native_type": "TIMETZ", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
91
|
+
{"adapter_name": "postgres", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
92
|
+
{"adapter_name": "postgres", "adapter_native_type": "TIMESTAMPTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
93
|
+
{"adapter_name": "postgres", "adapter_native_type": "TIMESTAMP WITHOUT TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
94
|
+
{"adapter_name": "postgres", "adapter_native_type": "TIMESTAMP WITH TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
95
|
+
{"adapter_name": "postgres", "adapter_native_type": "INTERVAL", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
96
|
+
|
|
97
|
+
# Binary types
|
|
98
|
+
{"adapter_name": "postgres", "adapter_native_type": "BYTEA", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
99
|
+
|
|
100
|
+
# JSON types
|
|
101
|
+
{"adapter_name": "postgres", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "CAST({} AS STRING)"},
|
|
102
|
+
{"adapter_name": "postgres", "adapter_native_type": "JSONB", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "CAST({} AS STRING)"},
|
|
103
|
+
|
|
104
|
+
# UUID
|
|
105
|
+
{"adapter_name": "postgres", "adapter_native_type": "UUID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
106
|
+
|
|
107
|
+
# Array types (complex)
|
|
108
|
+
{"adapter_name": "postgres", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "CAST({} AS STRING)"},
|
|
109
|
+
|
|
110
|
+
# ======================================================================
|
|
111
|
+
# Snowflake
|
|
112
|
+
# ======================================================================
|
|
113
|
+
# String types
|
|
114
|
+
{"adapter_name": "snowflake", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
115
|
+
{"adapter_name": "snowflake", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
116
|
+
{"adapter_name": "snowflake", "adapter_native_type": "STRING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
117
|
+
{"adapter_name": "snowflake", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
118
|
+
{"adapter_name": "snowflake", "adapter_native_type": "CHARACTER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
119
|
+
|
|
120
|
+
# Numeric types
|
|
121
|
+
{"adapter_name": "snowflake", "adapter_native_type": "NUMBER", "spark_version": "all", "spark_native_type": "DecimalType(38,0)", "is_complex": False},
|
|
122
|
+
{"adapter_name": "snowflake", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
123
|
+
{"adapter_name": "snowflake", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
124
|
+
{"adapter_name": "snowflake", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
125
|
+
{"adapter_name": "snowflake", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
126
|
+
{"adapter_name": "snowflake", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
127
|
+
{"adapter_name": "snowflake", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
128
|
+
{"adapter_name": "snowflake", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
129
|
+
{"adapter_name": "snowflake", "adapter_native_type": "BYTEINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
130
|
+
|
|
131
|
+
# Floating point
|
|
132
|
+
{"adapter_name": "snowflake", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
133
|
+
{"adapter_name": "snowflake", "adapter_native_type": "FLOAT4", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
134
|
+
{"adapter_name": "snowflake", "adapter_native_type": "FLOAT8", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
135
|
+
{"adapter_name": "snowflake", "adapter_native_type": "DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
136
|
+
{"adapter_name": "snowflake", "adapter_native_type": "DOUBLE PRECISION", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
137
|
+
{"adapter_name": "snowflake", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
138
|
+
|
|
139
|
+
# Boolean
|
|
140
|
+
{"adapter_name": "snowflake", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
141
|
+
|
|
142
|
+
# Date/Time
|
|
143
|
+
{"adapter_name": "snowflake", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
|
|
144
|
+
{"adapter_name": "snowflake", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
145
|
+
{"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
146
|
+
{"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP_LTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
147
|
+
{"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP_NTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
148
|
+
{"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP_TZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
149
|
+
{"adapter_name": "snowflake", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
150
|
+
|
|
151
|
+
# Binary
|
|
152
|
+
{"adapter_name": "snowflake", "adapter_native_type": "BINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
153
|
+
{"adapter_name": "snowflake", "adapter_native_type": "VARBINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
154
|
+
|
|
155
|
+
# Semi-structured (complex)
|
|
156
|
+
{"adapter_name": "snowflake", "adapter_native_type": "VARIANT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "TO_VARCHAR({})"},
|
|
157
|
+
{"adapter_name": "snowflake", "adapter_native_type": "OBJECT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "TO_VARCHAR({})"},
|
|
158
|
+
{"adapter_name": "snowflake", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "TO_VARCHAR({})"},
|
|
159
|
+
|
|
160
|
+
# ======================================================================
|
|
161
|
+
# Databricks / Delta Lake
|
|
162
|
+
# ======================================================================
|
|
163
|
+
{"adapter_name": "databricks", "adapter_native_type": "STRING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
164
|
+
{"adapter_name": "databricks", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
165
|
+
{"adapter_name": "databricks", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
166
|
+
{"adapter_name": "databricks", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
167
|
+
{"adapter_name": "databricks", "adapter_native_type": "LONG", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
168
|
+
{"adapter_name": "databricks", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
169
|
+
{"adapter_name": "databricks", "adapter_native_type": "SHORT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
170
|
+
{"adapter_name": "databricks", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
|
|
171
|
+
{"adapter_name": "databricks", "adapter_native_type": "BYTE", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
|
|
172
|
+
{"adapter_name": "databricks", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
173
|
+
{"adapter_name": "databricks", "adapter_native_type": "DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
174
|
+
{"adapter_name": "databricks", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
175
|
+
{"adapter_name": "databricks", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
176
|
+
{"adapter_name": "databricks", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
|
|
177
|
+
{"adapter_name": "databricks", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
178
|
+
{"adapter_name": "databricks", "adapter_native_type": "TIMESTAMP_NTZ", "spark_version": "all", "spark_native_type": "TimestampNTZType", "is_complex": False},
|
|
179
|
+
{"adapter_name": "databricks", "adapter_native_type": "BINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
180
|
+
{"adapter_name": "databricks", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "ArrayType", "is_complex": True},
|
|
181
|
+
{"adapter_name": "databricks", "adapter_native_type": "MAP", "spark_version": "all", "spark_native_type": "MapType", "is_complex": True},
|
|
182
|
+
{"adapter_name": "databricks", "adapter_native_type": "STRUCT", "spark_version": "all", "spark_native_type": "StructType", "is_complex": True},
|
|
183
|
+
|
|
184
|
+
# ======================================================================
|
|
185
|
+
# MySQL
|
|
186
|
+
# ======================================================================
|
|
187
|
+
{"adapter_name": "mysql", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
188
|
+
{"adapter_name": "mysql", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
189
|
+
{"adapter_name": "mysql", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
190
|
+
{"adapter_name": "mysql", "adapter_native_type": "TINYTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
191
|
+
{"adapter_name": "mysql", "adapter_native_type": "MEDIUMTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
192
|
+
{"adapter_name": "mysql", "adapter_native_type": "LONGTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
193
|
+
{"adapter_name": "mysql", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
194
|
+
{"adapter_name": "mysql", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
195
|
+
{"adapter_name": "mysql", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
196
|
+
{"adapter_name": "mysql", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
197
|
+
{"adapter_name": "mysql", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
|
|
198
|
+
{"adapter_name": "mysql", "adapter_native_type": "MEDIUMINT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
199
|
+
{"adapter_name": "mysql", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
200
|
+
{"adapter_name": "mysql", "adapter_native_type": "DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
201
|
+
{"adapter_name": "mysql", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
202
|
+
{"adapter_name": "mysql", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
203
|
+
{"adapter_name": "mysql", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
204
|
+
{"adapter_name": "mysql", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
205
|
+
{"adapter_name": "mysql", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
|
|
206
|
+
{"adapter_name": "mysql", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
207
|
+
{"adapter_name": "mysql", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
208
|
+
{"adapter_name": "mysql", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
209
|
+
{"adapter_name": "mysql", "adapter_native_type": "YEAR", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
210
|
+
{"adapter_name": "mysql", "adapter_native_type": "BLOB", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
211
|
+
{"adapter_name": "mysql", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
212
|
+
|
|
213
|
+
# ======================================================================
|
|
214
|
+
# BigQuery
|
|
215
|
+
# ======================================================================
|
|
216
|
+
{"adapter_name": "bigquery", "adapter_native_type": "STRING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
217
|
+
{"adapter_name": "bigquery", "adapter_native_type": "INT64", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
218
|
+
{"adapter_name": "bigquery", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
219
|
+
{"adapter_name": "bigquery", "adapter_native_type": "FLOAT64", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
220
|
+
{"adapter_name": "bigquery", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
221
|
+
{"adapter_name": "bigquery", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,9)", "is_complex": False},
|
|
222
|
+
{"adapter_name": "bigquery", "adapter_native_type": "BIGNUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(76,38)", "is_complex": False},
|
|
223
|
+
{"adapter_name": "bigquery", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
224
|
+
{"adapter_name": "bigquery", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
225
|
+
{"adapter_name": "bigquery", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
|
|
226
|
+
{"adapter_name": "bigquery", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
227
|
+
{"adapter_name": "bigquery", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
228
|
+
{"adapter_name": "bigquery", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
229
|
+
{"adapter_name": "bigquery", "adapter_native_type": "BYTES", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
230
|
+
{"adapter_name": "bigquery", "adapter_native_type": "GEOGRAPHY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
231
|
+
{"adapter_name": "bigquery", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
232
|
+
{"adapter_name": "bigquery", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "ArrayType", "is_complex": True},
|
|
233
|
+
{"adapter_name": "bigquery", "adapter_native_type": "STRUCT", "spark_version": "all", "spark_native_type": "StructType", "is_complex": True},
|
|
234
|
+
{"adapter_name": "bigquery", "adapter_native_type": "RECORD", "spark_version": "all", "spark_native_type": "StructType", "is_complex": True},
|
|
235
|
+
|
|
236
|
+
# ======================================================================
|
|
237
|
+
# Redshift
|
|
238
|
+
# ======================================================================
|
|
239
|
+
{"adapter_name": "redshift", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
240
|
+
{"adapter_name": "redshift", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
241
|
+
{"adapter_name": "redshift", "adapter_native_type": "BPCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
242
|
+
{"adapter_name": "redshift", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
243
|
+
{"adapter_name": "redshift", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
244
|
+
{"adapter_name": "redshift", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
245
|
+
{"adapter_name": "redshift", "adapter_native_type": "INT4", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
246
|
+
{"adapter_name": "redshift", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
247
|
+
{"adapter_name": "redshift", "adapter_native_type": "INT8", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
248
|
+
{"adapter_name": "redshift", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
249
|
+
{"adapter_name": "redshift", "adapter_native_type": "INT2", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
250
|
+
{"adapter_name": "redshift", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
251
|
+
{"adapter_name": "redshift", "adapter_native_type": "FLOAT4", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
252
|
+
{"adapter_name": "redshift", "adapter_native_type": "DOUBLE PRECISION", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
253
|
+
{"adapter_name": "redshift", "adapter_native_type": "FLOAT8", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
254
|
+
{"adapter_name": "redshift", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
255
|
+
{"adapter_name": "redshift", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
256
|
+
{"adapter_name": "redshift", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
257
|
+
{"adapter_name": "redshift", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
258
|
+
{"adapter_name": "redshift", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
259
|
+
{"adapter_name": "redshift", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
|
|
260
|
+
{"adapter_name": "redshift", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
261
|
+
{"adapter_name": "redshift", "adapter_native_type": "TIMESTAMPTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
262
|
+
{"adapter_name": "redshift", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
263
|
+
{"adapter_name": "redshift", "adapter_native_type": "TIMETZ", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
264
|
+
{"adapter_name": "redshift", "adapter_native_type": "SUPER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
265
|
+
{"adapter_name": "redshift", "adapter_native_type": "GEOMETRY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
266
|
+
{"adapter_name": "redshift", "adapter_native_type": "GEOGRAPHY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
267
|
+
{"adapter_name": "redshift", "adapter_native_type": "HLLSKETCH", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": True},
|
|
268
|
+
|
|
269
|
+
# ======================================================================
|
|
270
|
+
# Oracle
|
|
271
|
+
# ======================================================================
|
|
272
|
+
{"adapter_name": "oracle", "adapter_native_type": "VARCHAR2", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
273
|
+
{"adapter_name": "oracle", "adapter_native_type": "NVARCHAR2", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
274
|
+
{"adapter_name": "oracle", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
275
|
+
{"adapter_name": "oracle", "adapter_native_type": "NCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
276
|
+
{"adapter_name": "oracle", "adapter_native_type": "CLOB", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
277
|
+
{"adapter_name": "oracle", "adapter_native_type": "NCLOB", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
278
|
+
{"adapter_name": "oracle", "adapter_native_type": "NUMBER", "spark_version": "all", "spark_native_type": "DecimalType(38,10)", "is_complex": False},
|
|
279
|
+
{"adapter_name": "oracle", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
280
|
+
{"adapter_name": "oracle", "adapter_native_type": "BINARY_FLOAT", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
281
|
+
{"adapter_name": "oracle", "adapter_native_type": "BINARY_DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
282
|
+
{"adapter_name": "oracle", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False}, # Oracle DATE has time component
|
|
283
|
+
{"adapter_name": "oracle", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
284
|
+
{"adapter_name": "oracle", "adapter_native_type": "TIMESTAMP WITH TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
285
|
+
{"adapter_name": "oracle", "adapter_native_type": "TIMESTAMP WITH LOCAL TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
286
|
+
{"adapter_name": "oracle", "adapter_native_type": "INTERVAL YEAR TO MONTH", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
287
|
+
{"adapter_name": "oracle", "adapter_native_type": "INTERVAL DAY TO SECOND", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
288
|
+
{"adapter_name": "oracle", "adapter_native_type": "RAW", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
289
|
+
{"adapter_name": "oracle", "adapter_native_type": "BLOB", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
290
|
+
{"adapter_name": "oracle", "adapter_native_type": "BFILE", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": True},
|
|
291
|
+
{"adapter_name": "oracle", "adapter_native_type": "ROWID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
292
|
+
{"adapter_name": "oracle", "adapter_native_type": "UROWID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
293
|
+
{"adapter_name": "oracle", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
294
|
+
{"adapter_name": "oracle", "adapter_native_type": "XMLTYPE", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
295
|
+
|
|
296
|
+
# ======================================================================
|
|
297
|
+
# SQL Server
|
|
298
|
+
# ======================================================================
|
|
299
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
300
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "NVARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
301
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
302
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "NCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
303
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
304
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "NTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
305
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
|
|
306
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
|
|
307
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
|
|
308
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
|
|
309
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
|
|
310
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
|
|
311
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
312
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
|
|
313
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "MONEY", "spark_version": "all", "spark_native_type": "DecimalType(19,4)", "is_complex": False},
|
|
314
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "SMALLMONEY", "spark_version": "all", "spark_native_type": "DecimalType(10,4)", "is_complex": False},
|
|
315
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "BIT", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
|
|
316
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
|
|
317
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
318
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "DATETIME2", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
319
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "SMALLDATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
320
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "DATETIMEOFFSET", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
|
|
321
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
322
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "BINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
323
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "VARBINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
324
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "IMAGE", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
|
|
325
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "UNIQUEIDENTIFIER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
|
|
326
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "XML", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
327
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "GEOGRAPHY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
328
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "GEOMETRY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
329
|
+
{"adapter_name": "sqlserver", "adapter_native_type": "HIERARCHYID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
|
|
330
|
+
]
|
|
331
|
+
|
|
332
|
+
@classmethod
|
|
333
|
+
def get_spark_type(
|
|
334
|
+
cls,
|
|
335
|
+
adapter_name: str,
|
|
336
|
+
adapter_type: str,
|
|
337
|
+
spark_version: str = "4.0"
|
|
338
|
+
) -> Optional[Dict[str, Any]]:
|
|
339
|
+
"""
|
|
340
|
+
Look up the Spark type for a given adapter type.
|
|
341
|
+
|
|
342
|
+
:param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
|
|
343
|
+
:param adapter_type: Adapter's native type (e.g., 'INTEGER', 'VARCHAR')
|
|
344
|
+
:param spark_version: Target Spark version (default '4.0')
|
|
345
|
+
:returns: Dict with spark_native_type, is_complex, cast_expression or None
|
|
346
|
+
"""
|
|
347
|
+
# Normalize inputs
|
|
348
|
+
adapter_name = adapter_name.lower()
|
|
349
|
+
adapter_type = adapter_type.upper().strip()
|
|
350
|
+
|
|
351
|
+
# Remove size specifiers: VARCHAR(255) -> VARCHAR
|
|
352
|
+
import re
|
|
353
|
+
adapter_type_normalized = re.sub(r'\([^)]*\)', '', adapter_type).strip()
|
|
354
|
+
|
|
355
|
+
for mapping in cls.TYPE_MAPPINGS:
|
|
356
|
+
if (mapping["adapter_name"] == adapter_name and
|
|
357
|
+
mapping["adapter_native_type"] == adapter_type_normalized):
|
|
358
|
+
# Check spark version match
|
|
359
|
+
if mapping["spark_version"] == "all" or mapping["spark_version"] == spark_version:
|
|
360
|
+
return {
|
|
361
|
+
"spark_native_type": mapping["spark_native_type"],
|
|
362
|
+
"is_complex": mapping.get("is_complex", False),
|
|
363
|
+
"cast_expression": mapping.get("cast_expression"),
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
return None
|
|
367
|
+
|
|
368
|
+
@classmethod
|
|
369
|
+
def get_all_mappings_for_adapter(cls, adapter_name: str) -> List[Dict[str, Any]]:
|
|
370
|
+
"""Get all type mappings for a specific adapter."""
|
|
371
|
+
adapter_name = adapter_name.lower()
|
|
372
|
+
return [m for m in cls.TYPE_MAPPINGS if m["adapter_name"] == adapter_name]
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
class SyntaxRegistry:
|
|
376
|
+
"""
|
|
377
|
+
Syntax rules for each adapter.
|
|
378
|
+
|
|
379
|
+
Defines quoting characters, case sensitivity, and reserved keywords
|
|
380
|
+
to ensure correct SQL generation across different dialects.
|
|
381
|
+
"""
|
|
382
|
+
|
|
383
|
+
SYNTAX_RULES: Dict[str, Dict[str, Any]] = {
|
|
384
|
+
"postgres": {
|
|
385
|
+
"quote_start": '"',
|
|
386
|
+
"quote_end": '"',
|
|
387
|
+
"case_sensitivity": "LOWER", # Postgres folds to lowercase
|
|
388
|
+
"reserved_keywords": [
|
|
389
|
+
"ALL", "ANALYSE", "ANALYZE", "AND", "ANY", "ARRAY", "AS", "ASC",
|
|
390
|
+
"ASYMMETRIC", "BOTH", "CASE", "CAST", "CHECK", "COLLATE", "COLUMN",
|
|
391
|
+
"CONSTRAINT", "CREATE", "CURRENT_CATALOG", "CURRENT_DATE",
|
|
392
|
+
"CURRENT_ROLE", "CURRENT_TIME", "CURRENT_TIMESTAMP", "CURRENT_USER",
|
|
393
|
+
"DEFAULT", "DEFERRABLE", "DESC", "DISTINCT", "DO", "ELSE", "END",
|
|
394
|
+
"EXCEPT", "FALSE", "FETCH", "FOR", "FOREIGN", "FROM", "GRANT",
|
|
395
|
+
"GROUP", "HAVING", "IN", "INITIALLY", "INTERSECT", "INTO", "LATERAL",
|
|
396
|
+
"LEADING", "LIMIT", "LOCALTIME", "LOCALTIMESTAMP", "NOT", "NULL",
|
|
397
|
+
"OFFSET", "ON", "ONLY", "OR", "ORDER", "PLACING", "PRIMARY",
|
|
398
|
+
"REFERENCES", "RETURNING", "SELECT", "SESSION_USER", "SOME",
|
|
399
|
+
"SYMMETRIC", "TABLE", "THEN", "TO", "TRAILING", "TRUE", "UNION",
|
|
400
|
+
"UNIQUE", "USER", "USING", "VARIADIC", "WHEN", "WHERE", "WINDOW",
|
|
401
|
+
"WITH"
|
|
402
|
+
],
|
|
403
|
+
},
|
|
404
|
+
"snowflake": {
|
|
405
|
+
"quote_start": '"',
|
|
406
|
+
"quote_end": '"',
|
|
407
|
+
"case_sensitivity": "UPPER", # Snowflake folds to uppercase
|
|
408
|
+
"reserved_keywords": [
|
|
409
|
+
"ACCOUNT", "ALL", "ALTER", "AND", "ANY", "AS", "BETWEEN", "BY",
|
|
410
|
+
"CASE", "CAST", "CHECK", "CLUSTER", "COLUMN", "CONNECT", "CONNECTION",
|
|
411
|
+
"CONSTRAINT", "CREATE", "CROSS", "CURRENT", "CURRENT_DATE",
|
|
412
|
+
"CURRENT_TIME", "CURRENT_TIMESTAMP", "CURRENT_USER", "DATABASE",
|
|
413
|
+
"DELETE", "DISTINCT", "DROP", "ELSE", "EXISTS", "FALSE", "FOLLOWING",
|
|
414
|
+
"FOR", "FROM", "FULL", "GRANT", "GROUP", "GSCLUSTER", "HAVING",
|
|
415
|
+
"ILIKE", "IN", "INCREMENT", "INNER", "INSERT", "INTERSECT", "INTO",
|
|
416
|
+
"IS", "ISSUE", "JOIN", "LATERAL", "LEFT", "LIKE", "LOCALTIME",
|
|
417
|
+
"LOCALTIMESTAMP", "MINUS", "NATURAL", "NOT", "NULL", "OF", "ON",
|
|
418
|
+
"OR", "ORDER", "ORGANIZATION", "QUALIFY", "REGEXP", "REVOKE",
|
|
419
|
+
"RIGHT", "RLIKE", "ROW", "ROWS", "SAMPLE", "SCHEMA", "SELECT",
|
|
420
|
+
"SET", "SOME", "START", "TABLE", "TABLESAMPLE", "THEN", "TO",
|
|
421
|
+
"TRIGGER", "TRUE", "TRY_CAST", "UNION", "UNIQUE", "UPDATE",
|
|
422
|
+
"USING", "VALUES", "VIEW", "WHEN", "WHENEVER", "WHERE", "WITH"
|
|
423
|
+
],
|
|
424
|
+
},
|
|
425
|
+
"databricks": {
|
|
426
|
+
"quote_start": '`',
|
|
427
|
+
"quote_end": '`',
|
|
428
|
+
"case_sensitivity": "PRESERVE", # Databricks preserves case
|
|
429
|
+
"reserved_keywords": [
|
|
430
|
+
"ALL", "ALTER", "AND", "ANTI", "ANY", "ARCHIVE", "ARRAY", "AS",
|
|
431
|
+
"ASC", "AT", "AUTHORIZATION", "BETWEEN", "BOTH", "BUCKET", "BUCKETS",
|
|
432
|
+
"BY", "CACHE", "CASCADE", "CASE", "CAST", "CHANGE", "CHECK", "CLEAR",
|
|
433
|
+
"CLUSTER", "CLUSTERED", "CODEGEN", "COLLATE", "COLLECTION", "COLUMN",
|
|
434
|
+
"COLUMNS", "COMMENT", "COMMIT", "COMPACT", "COMPACTIONS", "COMPUTE",
|
|
435
|
+
"CONCATENATE", "CONSTRAINT", "COST", "CREATE", "CROSS", "CUBE",
|
|
436
|
+
"CURRENT", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
|
|
437
|
+
"CURRENT_USER", "DATA", "DATABASE", "DATABASES", "DAY", "DBPROPERTIES",
|
|
438
|
+
"DEFINED", "DELETE", "DELIMITED", "DESC", "DESCRIBE", "DFS", "DIRECTORIES",
|
|
439
|
+
"DIRECTORY", "DISTINCT", "DISTRIBUTE", "DROP", "ELSE", "END", "ESCAPE",
|
|
440
|
+
"ESCAPED", "EXCEPT", "EXCHANGE", "EXISTS", "EXPLAIN", "EXPORT", "EXTENDED",
|
|
441
|
+
"EXTERNAL", "EXTRACT", "FALSE", "FETCH", "FIELDS", "FILTER", "FILEFORMAT",
|
|
442
|
+
"FIRST", "FOLLOWING", "FOR", "FOREIGN", "FORMAT", "FORMATTED", "FROM",
|
|
443
|
+
"FULL", "FUNCTION", "FUNCTIONS", "GLOBAL", "GRANT", "GROUP", "GROUPING",
|
|
444
|
+
"HAVING", "HOUR", "IF", "IGNORE", "IMPORT", "IN", "INDEX", "INDEXES",
|
|
445
|
+
"INNER", "INPATH", "INPUTFORMAT", "INSERT", "INTERSECT", "INTERVAL",
|
|
446
|
+
"INTO", "IS", "ITEMS", "JOIN", "KEYS", "LAST", "LATERAL", "LAZY", "LEADING",
|
|
447
|
+
"LEFT", "LIKE", "LIMIT", "LINES", "LIST", "LOAD", "LOCAL", "LOCATION",
|
|
448
|
+
"LOCK", "LOCKS", "LOGICAL", "MACRO", "MAP", "MATCHED", "MERGE", "MINUTE",
|
|
449
|
+
"MONTH", "MSCK", "NAMESPACE", "NAMESPACES", "NATURAL", "NO", "NOT", "NULL",
|
|
450
|
+
"NULLS", "OF", "ON", "ONLY", "OPTION", "OPTIONS", "OR", "ORDER", "OUT",
|
|
451
|
+
"OUTER", "OUTPUTFORMAT", "OVER", "OVERLAPS", "OVERLAY", "OVERWRITE",
|
|
452
|
+
"PARTITION", "PARTITIONED", "PARTITIONS", "PERCENT", "PLACING", "POSITION",
|
|
453
|
+
"PRECEDING", "PRIMARY", "PRINCIPALS", "PROPERTIES", "PURGE", "QUERY",
|
|
454
|
+
"RANGE", "RECORDREADER", "RECORDWRITER", "RECOVER", "REDUCE", "REFERENCES",
|
|
455
|
+
"REFRESH", "RENAME", "REPAIR", "REPLACE", "RESET", "RESTRICT", "REVOKE",
|
|
456
|
+
"RIGHT", "RLIKE", "ROLE", "ROLES", "ROLLBACK", "ROLLUP", "ROW", "ROWS",
|
|
457
|
+
"SCHEMA", "SCHEMAS", "SECOND", "SELECT", "SEMI", "SEPARATED", "SERDE",
|
|
458
|
+
"SERDEPROPERTIES", "SESSION_USER", "SET", "SETS", "SHOW", "SKEWED", "SOME",
|
|
459
|
+
"SORT", "SORTED", "START", "STATISTICS", "STORED", "STRATIFY", "STRUCT",
|
|
460
|
+
"SUBSTR", "SUBSTRING", "TABLE", "TABLES", "TABLESAMPLE", "TBLPROPERTIES",
|
|
461
|
+
"TEMP", "TEMPORARY", "TERMINATED", "THEN", "TO", "TOUCH", "TRAILING",
|
|
462
|
+
"TRANSACTION", "TRANSACTIONS", "TRANSFORM", "TRIM", "TRUE", "TRUNCATE",
|
|
463
|
+
"TYPE", "UNARCHIVE", "UNBOUNDED", "UNCACHE", "UNION", "UNIQUE", "UNKNOWN",
|
|
464
|
+
"UNLOCK", "UNSET", "UPDATE", "USE", "USER", "USING", "VALUES", "VIEW",
|
|
465
|
+
"VIEWS", "WHEN", "WHERE", "WINDOW", "WITH", "YEAR"
|
|
466
|
+
],
|
|
467
|
+
},
|
|
468
|
+
"bigquery": {
|
|
469
|
+
"quote_start": '`',
|
|
470
|
+
"quote_end": '`',
|
|
471
|
+
"case_sensitivity": "PRESERVE", # BigQuery preserves case
|
|
472
|
+
"reserved_keywords": [
|
|
473
|
+
"ALL", "AND", "ANY", "ARRAY", "AS", "ASC", "ASSERT_ROWS_MODIFIED",
|
|
474
|
+
"AT", "BETWEEN", "BY", "CASE", "CAST", "COLLATE", "CONTAINS", "CREATE",
|
|
475
|
+
"CROSS", "CUBE", "CURRENT", "DEFAULT", "DEFINE", "DESC", "DISTINCT",
|
|
476
|
+
"ELSE", "END", "ENUM", "ESCAPE", "EXCEPT", "EXCLUDE", "EXISTS",
|
|
477
|
+
"EXTRACT", "FALSE", "FETCH", "FOLLOWING", "FOR", "FROM", "FULL",
|
|
478
|
+
"GROUP", "GROUPING", "GROUPS", "HASH", "HAVING", "IF", "IGNORE",
|
|
479
|
+
"IN", "INNER", "INTERSECT", "INTERVAL", "INTO", "IS", "JOIN",
|
|
480
|
+
"LATERAL", "LEFT", "LIKE", "LIMIT", "LOOKUP", "MERGE", "NATURAL",
|
|
481
|
+
"NEW", "NO", "NOT", "NULL", "NULLS", "OF", "ON", "OR", "ORDER",
|
|
482
|
+
"OUTER", "OVER", "PARTITION", "PRECEDING", "PROTO", "RANGE",
|
|
483
|
+
"RECURSIVE", "RESPECT", "RIGHT", "ROLLUP", "ROWS", "SELECT", "SET",
|
|
484
|
+
"SOME", "STRUCT", "TABLESAMPLE", "THEN", "TO", "TREAT", "TRUE",
|
|
485
|
+
"UNBOUNDED", "UNION", "UNNEST", "USING", "WHEN", "WHERE", "WINDOW",
|
|
486
|
+
"WITH", "WITHIN"
|
|
487
|
+
],
|
|
488
|
+
},
|
|
489
|
+
"mysql": {
|
|
490
|
+
"quote_start": '`',
|
|
491
|
+
"quote_end": '`',
|
|
492
|
+
"case_sensitivity": "PRESERVE", # Depends on collation, default preserve
|
|
493
|
+
"reserved_keywords": [
|
|
494
|
+
"ACCESSIBLE", "ADD", "ALL", "ALTER", "ANALYZE", "AND", "AS", "ASC",
|
|
495
|
+
"ASENSITIVE", "BEFORE", "BETWEEN", "BIGINT", "BINARY", "BLOB", "BOTH",
|
|
496
|
+
"BY", "CALL", "CASCADE", "CASE", "CHANGE", "CHAR", "CHARACTER", "CHECK",
|
|
497
|
+
"COLLATE", "COLUMN", "CONDITION", "CONSTRAINT", "CONTINUE", "CONVERT",
|
|
498
|
+
"CREATE", "CROSS", "CUBE", "CUME_DIST", "CURRENT_DATE", "CURRENT_TIME",
|
|
499
|
+
"CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATABASE", "DATABASES",
|
|
500
|
+
"DAY_HOUR", "DAY_MICROSECOND", "DAY_MINUTE", "DAY_SECOND", "DEC",
|
|
501
|
+
"DECIMAL", "DECLARE", "DEFAULT", "DELAYED", "DELETE", "DENSE_RANK",
|
|
502
|
+
"DESC", "DESCRIBE", "DETERMINISTIC", "DISTINCT", "DISTINCTROW", "DIV",
|
|
503
|
+
"DOUBLE", "DROP", "DUAL", "EACH", "ELSE", "ELSEIF", "EMPTY", "ENCLOSED",
|
|
504
|
+
"ESCAPED", "EXCEPT", "EXISTS", "EXIT", "EXPLAIN", "FALSE", "FETCH",
|
|
505
|
+
"FIRST_VALUE", "FLOAT", "FLOAT4", "FLOAT8", "FOR", "FORCE", "FOREIGN",
|
|
506
|
+
"FROM", "FULLTEXT", "FUNCTION", "GENERATED", "GET", "GRANT", "GROUP",
|
|
507
|
+
"GROUPING", "GROUPS", "HAVING", "HIGH_PRIORITY", "HOUR_MICROSECOND",
|
|
508
|
+
"HOUR_MINUTE", "HOUR_SECOND", "IF", "IGNORE", "IN", "INDEX", "INFILE",
|
|
509
|
+
"INNER", "INOUT", "INSENSITIVE", "INSERT", "INT", "INT1", "INT2", "INT3",
|
|
510
|
+
"INT4", "INT8", "INTEGER", "INTERVAL", "INTO", "IO_AFTER_GTIDS",
|
|
511
|
+
"IO_BEFORE_GTIDS", "IS", "ITERATE", "JOIN", "JSON_TABLE", "KEY", "KEYS",
|
|
512
|
+
"KILL", "LAG", "LAST_VALUE", "LATERAL", "LEAD", "LEADING", "LEAVE",
|
|
513
|
+
"LEFT", "LIKE", "LIMIT", "LINEAR", "LINES", "LOAD", "LOCALTIME",
|
|
514
|
+
"LOCALTIMESTAMP", "LOCK", "LONG", "LONGBLOB", "LONGTEXT", "LOOP",
|
|
515
|
+
"LOW_PRIORITY", "MASTER_BIND", "MASTER_SSL_VERIFY_SERVER_CERT", "MATCH",
|
|
516
|
+
"MAXVALUE", "MEDIUMBLOB", "MEDIUMINT", "MEDIUMTEXT", "MIDDLEINT",
|
|
517
|
+
"MINUTE_MICROSECOND", "MINUTE_SECOND", "MOD", "MODIFIES", "NATURAL",
|
|
518
|
+
"NOT", "NO_WRITE_TO_BINLOG", "NTH_VALUE", "NTILE", "NULL", "NUMERIC",
|
|
519
|
+
"OF", "ON", "OPTIMIZE", "OPTIMIZER_COSTS", "OPTION", "OPTIONALLY",
|
|
520
|
+
"OR", "ORDER", "OUT", "OUTER", "OUTFILE", "OVER", "PARTITION",
|
|
521
|
+
"PERCENT_RANK", "PRECISION", "PRIMARY", "PROCEDURE", "PURGE", "RANGE",
|
|
522
|
+
"RANK", "READ", "READS", "READ_WRITE", "REAL", "RECURSIVE", "REFERENCES",
|
|
523
|
+
"REGEXP", "RELEASE", "RENAME", "REPEAT", "REPLACE", "REQUIRE", "RESIGNAL",
|
|
524
|
+
"RESTRICT", "RETURN", "REVOKE", "RIGHT", "RLIKE", "ROW", "ROWS",
|
|
525
|
+
"ROW_NUMBER", "SCHEMA", "SCHEMAS", "SECOND_MICROSECOND", "SELECT",
|
|
526
|
+
"SENSITIVE", "SEPARATOR", "SET", "SHOW", "SIGNAL", "SMALLINT", "SPATIAL",
|
|
527
|
+
"SPECIFIC", "SQL", "SQLEXCEPTION", "SQLSTATE", "SQLWARNING",
|
|
528
|
+
"SQL_BIG_RESULT", "SQL_CALC_FOUND_ROWS", "SQL_SMALL_RESULT", "SSL",
|
|
529
|
+
"STARTING", "STORED", "STRAIGHT_JOIN", "SYSTEM", "TABLE", "TERMINATED",
|
|
530
|
+
"THEN", "TINYBLOB", "TINYINT", "TINYTEXT", "TO", "TRAILING", "TRIGGER",
|
|
531
|
+
"TRUE", "UNDO", "UNION", "UNIQUE", "UNLOCK", "UNSIGNED", "UPDATE",
|
|
532
|
+
"USAGE", "USE", "USING", "UTC_DATE", "UTC_TIME", "UTC_TIMESTAMP",
|
|
533
|
+
"VALUES", "VARBINARY", "VARCHAR", "VARCHARACTER", "VARYING", "VIRTUAL",
|
|
534
|
+
"WHEN", "WHERE", "WHILE", "WINDOW", "WITH", "WRITE", "XOR", "YEAR_MONTH",
|
|
535
|
+
"ZEROFILL"
|
|
536
|
+
],
|
|
537
|
+
},
|
|
538
|
+
"redshift": {
|
|
539
|
+
"quote_start": '"',
|
|
540
|
+
"quote_end": '"',
|
|
541
|
+
"case_sensitivity": "LOWER", # Redshift folds to lowercase
|
|
542
|
+
"reserved_keywords": [
|
|
543
|
+
"AES128", "AES256", "ALL", "ALLOWOVERWRITE", "ANALYSE", "ANALYZE",
|
|
544
|
+
"AND", "ANY", "ARRAY", "AS", "ASC", "AUTHORIZATION", "BACKUP",
|
|
545
|
+
"BETWEEN", "BINARY", "BLANKSASNULL", "BOTH", "BYTEDICT", "BZIP2",
|
|
546
|
+
"CASE", "CAST", "CHECK", "COLLATE", "COLUMN", "CONSTRAINT", "CREATE",
|
|
547
|
+
"CREDENTIALS", "CROSS", "CURRENT_DATE", "CURRENT_TIME",
|
|
548
|
+
"CURRENT_TIMESTAMP", "CURRENT_USER", "CURRENT_USER_ID", "DEFAULT",
|
|
549
|
+
"DEFERRABLE", "DEFLATE", "DEFRAG", "DELTA", "DELTA32K", "DESC",
|
|
550
|
+
"DISABLE", "DISTINCT", "DO", "ELSE", "EMPTYASNULL", "ENABLE", "ENCODE",
|
|
551
|
+
"ENCRYPT", "ENCRYPTION", "END", "EXCEPT", "EXPLICIT", "FALSE", "FOR",
|
|
552
|
+
"FOREIGN", "FREEZE", "FROM", "FULL", "GLOBALDICT256", "GLOBALDICT64K",
|
|
553
|
+
"GRANT", "GROUP", "GZIP", "HAVING", "IDENTITY", "IGNORE", "ILIKE",
|
|
554
|
+
"IN", "INITIALLY", "INNER", "INTERSECT", "INTO", "IS", "ISNULL",
|
|
555
|
+
"JOIN", "LANGUAGE", "LEADING", "LEFT", "LIKE", "LIMIT", "LOCALTIME",
|
|
556
|
+
"LOCALTIMESTAMP", "LUN", "LUNS", "LZO", "LZOP", "MINUS", "MOSTLY13",
|
|
557
|
+
"MOSTLY32", "MOSTLY8", "NATURAL", "NEW", "NOT", "NOTNULL", "NULL",
|
|
558
|
+
"NULLS", "OFF", "OFFLINE", "OFFSET", "OID", "OLD", "ON", "ONLY",
|
|
559
|
+
"OPEN", "OR", "ORDER", "OUTER", "OVERLAPS", "PARALLEL", "PARTITION",
|
|
560
|
+
"PERCENT", "PERMISSIONS", "PLACING", "PRIMARY", "RAW", "READRATIO",
|
|
561
|
+
"RECOVER", "REFERENCES", "RESPECT", "REJECTLOG", "RESORT", "RESTORE",
|
|
562
|
+
"RIGHT", "SELECT", "SESSION_USER", "SIMILAR", "SNAPSHOT", "SOME",
|
|
563
|
+
"SYSDATE", "SYSTEM", "TABLE", "TAG", "TDES", "TEXT255", "TEXT32K",
|
|
564
|
+
"THEN", "TIMESTAMP", "TO", "TOP", "TRAILING", "TRUE", "TRUNCATECOLUMNS",
|
|
565
|
+
"UNION", "UNIQUE", "USER", "USING", "VERBOSE", "WALLET", "WHEN",
|
|
566
|
+
"WHERE", "WITH", "WITHOUT"
|
|
567
|
+
],
|
|
568
|
+
},
|
|
569
|
+
"oracle": {
|
|
570
|
+
"quote_start": '"',
|
|
571
|
+
"quote_end": '"',
|
|
572
|
+
"case_sensitivity": "UPPER", # Oracle folds to uppercase
|
|
573
|
+
"reserved_keywords": [
|
|
574
|
+
"ACCESS", "ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUDIT",
|
|
575
|
+
"BETWEEN", "BY", "CHAR", "CHECK", "CLUSTER", "COLUMN", "COLUMN_VALUE",
|
|
576
|
+
"COMMENT", "COMPRESS", "CONNECT", "CREATE", "CURRENT", "DATE",
|
|
577
|
+
"DECIMAL", "DEFAULT", "DELETE", "DESC", "DISTINCT", "DROP", "ELSE",
|
|
578
|
+
"EXCLUSIVE", "EXISTS", "FILE", "FLOAT", "FOR", "FROM", "GRANT",
|
|
579
|
+
"GROUP", "HAVING", "IDENTIFIED", "IMMEDIATE", "IN", "INCREMENT",
|
|
580
|
+
"INDEX", "INITIAL", "INSERT", "INTEGER", "INTERSECT", "INTO", "IS",
|
|
581
|
+
"LEVEL", "LIKE", "LOCK", "LONG", "MAXEXTENTS", "MINUS", "MLSLABEL",
|
|
582
|
+
"MODE", "MODIFY", "NESTED_TABLE_ID", "NOAUDIT", "NOCOMPRESS", "NOT",
|
|
583
|
+
"NOWAIT", "NULL", "NUMBER", "OF", "OFFLINE", "ON", "ONLINE", "OPTION",
|
|
584
|
+
"OR", "ORDER", "PCTFREE", "PRIOR", "PUBLIC", "RAW", "RENAME",
|
|
585
|
+
"RESOURCE", "REVOKE", "ROW", "ROWID", "ROWNUM", "ROWS", "SELECT",
|
|
586
|
+
"SESSION", "SET", "SHARE", "SIZE", "SMALLINT", "START", "SUCCESSFUL",
|
|
587
|
+
"SYNONYM", "SYSDATE", "TABLE", "THEN", "TO", "TRIGGER", "UID", "UNION",
|
|
588
|
+
"UNIQUE", "UPDATE", "USER", "VALIDATE", "VALUES", "VARCHAR", "VARCHAR2",
|
|
589
|
+
"VIEW", "WHENEVER", "WHERE", "WITH"
|
|
590
|
+
],
|
|
591
|
+
},
|
|
592
|
+
"sqlserver": {
|
|
593
|
+
"quote_start": '[',
|
|
594
|
+
"quote_end": ']',
|
|
595
|
+
"case_sensitivity": "PRESERVE", # Depends on collation
|
|
596
|
+
"reserved_keywords": [
|
|
597
|
+
"ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUTHORIZATION",
|
|
598
|
+
"BACKUP", "BEGIN", "BETWEEN", "BREAK", "BROWSE", "BULK", "BY",
|
|
599
|
+
"CASCADE", "CASE", "CHECK", "CHECKPOINT", "CLOSE", "CLUSTERED",
|
|
600
|
+
"COALESCE", "COLLATE", "COLUMN", "COMMIT", "COMPUTE", "CONSTRAINT",
|
|
601
|
+
"CONTAINS", "CONTAINSTABLE", "CONTINUE", "CONVERT", "CREATE", "CROSS",
|
|
602
|
+
"CURRENT", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
|
|
603
|
+
"CURRENT_USER", "CURSOR", "DATABASE", "DBCC", "DEALLOCATE", "DECLARE",
|
|
604
|
+
"DEFAULT", "DELETE", "DENY", "DESC", "DISK", "DISTINCT", "DISTRIBUTED",
|
|
605
|
+
"DOUBLE", "DROP", "DUMP", "ELSE", "END", "ERRLVL", "ESCAPE", "EXCEPT",
|
|
606
|
+
"EXEC", "EXECUTE", "EXISTS", "EXIT", "EXTERNAL", "FETCH", "FILE",
|
|
607
|
+
"FILLFACTOR", "FOR", "FOREIGN", "FREETEXT", "FREETEXTTABLE", "FROM",
|
|
608
|
+
"FULL", "FUNCTION", "GOTO", "GRANT", "GROUP", "HAVING", "HOLDLOCK",
|
|
609
|
+
"IDENTITY", "IDENTITY_INSERT", "IDENTITYCOL", "IF", "IN", "INDEX",
|
|
610
|
+
"INNER", "INSERT", "INTERSECT", "INTO", "IS", "JOIN", "KEY", "KILL",
|
|
611
|
+
"LEFT", "LIKE", "LINENO", "LOAD", "MERGE", "NATIONAL", "NOCHECK",
|
|
612
|
+
"NONCLUSTERED", "NOT", "NULL", "NULLIF", "OF", "OFF", "OFFSETS", "ON",
|
|
613
|
+
"OPEN", "OPENDATASOURCE", "OPENQUERY", "OPENROWSET", "OPENXML",
|
|
614
|
+
"OPTION", "OR", "ORDER", "OUTER", "OVER", "PERCENT", "PIVOT", "PLAN",
|
|
615
|
+
"PRECISION", "PRIMARY", "PRINT", "PROC", "PROCEDURE", "PUBLIC",
|
|
616
|
+
"RAISERROR", "READ", "READTEXT", "RECONFIGURE", "REFERENCES",
|
|
617
|
+
"REPLICATION", "RESTORE", "RESTRICT", "RETURN", "REVERT", "REVOKE",
|
|
618
|
+
"RIGHT", "ROLLBACK", "ROWCOUNT", "ROWGUIDCOL", "RULE", "SAVE",
|
|
619
|
+
"SCHEMA", "SECURITYAUDIT", "SELECT", "SEMANTICKEYPHRASETABLE",
|
|
620
|
+
"SEMANTICSIMILARITYDETAILSTABLE", "SEMANTICSIMILARITYTABLE",
|
|
621
|
+
"SESSION_USER", "SET", "SETUSER", "SHUTDOWN", "SOME", "STATISTICS",
|
|
622
|
+
"SYSTEM_USER", "TABLE", "TABLESAMPLE", "TEXTSIZE", "THEN", "TO",
|
|
623
|
+
"TOP", "TRAN", "TRANSACTION", "TRIGGER", "TRUNCATE", "TRY_CONVERT",
|
|
624
|
+
"TSEQUAL", "UNION", "UNIQUE", "UNPIVOT", "UPDATE", "UPDATETEXT",
|
|
625
|
+
"USE", "USER", "VALUES", "VARYING", "VIEW", "WAITFOR", "WHEN",
|
|
626
|
+
"WHERE", "WHILE", "WITH", "WITHIN GROUP", "WRITETEXT"
|
|
627
|
+
],
|
|
628
|
+
},
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
@classmethod
|
|
632
|
+
def get_syntax_rule(cls, adapter_name: str) -> Optional[Dict[str, Any]]:
|
|
633
|
+
"""Get syntax rules for a specific adapter."""
|
|
634
|
+
return cls.SYNTAX_RULES.get(adapter_name.lower())
|
|
635
|
+
|
|
636
|
+
@classmethod
|
|
637
|
+
def quote_identifier(cls, adapter_name: str, identifier: str) -> str:
|
|
638
|
+
"""Quote an identifier using the adapter's quoting rules."""
|
|
639
|
+
rule = cls.get_syntax_rule(adapter_name)
|
|
640
|
+
if not rule:
|
|
641
|
+
return f'"{identifier}"' # Default to double quotes
|
|
642
|
+
return f'{rule["quote_start"]}{identifier}{rule["quote_end"]}'
|
|
643
|
+
|
|
644
|
+
@classmethod
|
|
645
|
+
def needs_quoting(cls, adapter_name: str, identifier: str) -> bool:
|
|
646
|
+
"""Check if an identifier needs quoting (reserved keyword or special chars)."""
|
|
647
|
+
rule = cls.get_syntax_rule(adapter_name)
|
|
648
|
+
if not rule:
|
|
649
|
+
return False
|
|
650
|
+
|
|
651
|
+
# Check if it's a reserved keyword
|
|
652
|
+
upper_id = identifier.upper()
|
|
653
|
+
if upper_id in rule.get("reserved_keywords", []):
|
|
654
|
+
return True
|
|
655
|
+
|
|
656
|
+
# Check for special characters or spaces
|
|
657
|
+
if not identifier.isidentifier() or ' ' in identifier or '-' in identifier:
|
|
658
|
+
return True
|
|
659
|
+
|
|
660
|
+
return False
|
|
661
|
+
|
|
662
|
+
@classmethod
|
|
663
|
+
def normalize_identifier(cls, adapter_name: str, identifier: str) -> str:
|
|
664
|
+
"""Normalize an identifier based on the adapter's case sensitivity rules."""
|
|
665
|
+
rule = cls.get_syntax_rule(adapter_name)
|
|
666
|
+
if not rule:
|
|
667
|
+
return identifier
|
|
668
|
+
|
|
669
|
+
case_rule = rule.get("case_sensitivity", "PRESERVE")
|
|
670
|
+
if case_rule == "UPPER":
|
|
671
|
+
return identifier.upper()
|
|
672
|
+
elif case_rule == "LOWER":
|
|
673
|
+
return identifier.lower()
|
|
674
|
+
return identifier # PRESERVE
|