dvt-core 0.59.0a51__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dbt/__init__.py +7 -0
- dbt/_pydantic_shim.py +26 -0
- dbt/artifacts/__init__.py +0 -0
- dbt/artifacts/exceptions/__init__.py +1 -0
- dbt/artifacts/exceptions/schemas.py +31 -0
- dbt/artifacts/resources/__init__.py +116 -0
- dbt/artifacts/resources/base.py +67 -0
- dbt/artifacts/resources/types.py +93 -0
- dbt/artifacts/resources/v1/analysis.py +10 -0
- dbt/artifacts/resources/v1/catalog.py +23 -0
- dbt/artifacts/resources/v1/components.py +274 -0
- dbt/artifacts/resources/v1/config.py +277 -0
- dbt/artifacts/resources/v1/documentation.py +11 -0
- dbt/artifacts/resources/v1/exposure.py +51 -0
- dbt/artifacts/resources/v1/function.py +52 -0
- dbt/artifacts/resources/v1/generic_test.py +31 -0
- dbt/artifacts/resources/v1/group.py +21 -0
- dbt/artifacts/resources/v1/hook.py +11 -0
- dbt/artifacts/resources/v1/macro.py +29 -0
- dbt/artifacts/resources/v1/metric.py +172 -0
- dbt/artifacts/resources/v1/model.py +145 -0
- dbt/artifacts/resources/v1/owner.py +10 -0
- dbt/artifacts/resources/v1/saved_query.py +111 -0
- dbt/artifacts/resources/v1/seed.py +41 -0
- dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
- dbt/artifacts/resources/v1/semantic_model.py +314 -0
- dbt/artifacts/resources/v1/singular_test.py +14 -0
- dbt/artifacts/resources/v1/snapshot.py +91 -0
- dbt/artifacts/resources/v1/source_definition.py +84 -0
- dbt/artifacts/resources/v1/sql_operation.py +10 -0
- dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
- dbt/artifacts/schemas/__init__.py +0 -0
- dbt/artifacts/schemas/base.py +191 -0
- dbt/artifacts/schemas/batch_results.py +24 -0
- dbt/artifacts/schemas/catalog/__init__.py +11 -0
- dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
- dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
- dbt/artifacts/schemas/freshness/__init__.py +1 -0
- dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
- dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
- dbt/artifacts/schemas/manifest/__init__.py +2 -0
- dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
- dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
- dbt/artifacts/schemas/results.py +147 -0
- dbt/artifacts/schemas/run/__init__.py +2 -0
- dbt/artifacts/schemas/run/v5/__init__.py +0 -0
- dbt/artifacts/schemas/run/v5/run.py +184 -0
- dbt/artifacts/schemas/upgrades/__init__.py +4 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
- dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
- dbt/artifacts/utils/validation.py +153 -0
- dbt/cli/__init__.py +1 -0
- dbt/cli/context.py +17 -0
- dbt/cli/exceptions.py +57 -0
- dbt/cli/flags.py +560 -0
- dbt/cli/main.py +2660 -0
- dbt/cli/option_types.py +121 -0
- dbt/cli/options.py +80 -0
- dbt/cli/params.py +844 -0
- dbt/cli/requires.py +490 -0
- dbt/cli/resolvers.py +60 -0
- dbt/cli/types.py +40 -0
- dbt/clients/__init__.py +0 -0
- dbt/clients/checked_load.py +83 -0
- dbt/clients/git.py +164 -0
- dbt/clients/jinja.py +206 -0
- dbt/clients/jinja_static.py +245 -0
- dbt/clients/registry.py +192 -0
- dbt/clients/yaml_helper.py +68 -0
- dbt/compilation.py +876 -0
- dbt/compute/__init__.py +14 -0
- dbt/compute/engines/__init__.py +12 -0
- dbt/compute/engines/spark_engine.py +642 -0
- dbt/compute/federated_executor.py +1080 -0
- dbt/compute/filter_pushdown.py +273 -0
- dbt/compute/jar_provisioning.py +273 -0
- dbt/compute/java_compat.py +689 -0
- dbt/compute/jdbc_utils.py +1252 -0
- dbt/compute/metadata/__init__.py +63 -0
- dbt/compute/metadata/adapters_registry.py +370 -0
- dbt/compute/metadata/catalog_store.py +1036 -0
- dbt/compute/metadata/registry.py +674 -0
- dbt/compute/metadata/store.py +1020 -0
- dbt/compute/smart_selector.py +377 -0
- dbt/compute/spark_logger.py +272 -0
- dbt/compute/strategies/__init__.py +55 -0
- dbt/compute/strategies/base.py +165 -0
- dbt/compute/strategies/dataproc.py +207 -0
- dbt/compute/strategies/emr.py +203 -0
- dbt/compute/strategies/local.py +472 -0
- dbt/compute/strategies/standalone.py +262 -0
- dbt/config/__init__.py +4 -0
- dbt/config/catalogs.py +94 -0
- dbt/config/compute.py +513 -0
- dbt/config/dvt_profile.py +408 -0
- dbt/config/profile.py +422 -0
- dbt/config/project.py +888 -0
- dbt/config/project_utils.py +48 -0
- dbt/config/renderer.py +231 -0
- dbt/config/runtime.py +564 -0
- dbt/config/selectors.py +208 -0
- dbt/config/utils.py +77 -0
- dbt/constants.py +28 -0
- dbt/context/__init__.py +0 -0
- dbt/context/base.py +745 -0
- dbt/context/configured.py +135 -0
- dbt/context/context_config.py +382 -0
- dbt/context/docs.py +82 -0
- dbt/context/exceptions_jinja.py +178 -0
- dbt/context/macro_resolver.py +195 -0
- dbt/context/macros.py +171 -0
- dbt/context/manifest.py +72 -0
- dbt/context/providers.py +2249 -0
- dbt/context/query_header.py +13 -0
- dbt/context/secret.py +58 -0
- dbt/context/target.py +74 -0
- dbt/contracts/__init__.py +0 -0
- dbt/contracts/files.py +413 -0
- dbt/contracts/graph/__init__.py +0 -0
- dbt/contracts/graph/manifest.py +1904 -0
- dbt/contracts/graph/metrics.py +97 -0
- dbt/contracts/graph/model_config.py +70 -0
- dbt/contracts/graph/node_args.py +42 -0
- dbt/contracts/graph/nodes.py +1806 -0
- dbt/contracts/graph/semantic_manifest.py +232 -0
- dbt/contracts/graph/unparsed.py +811 -0
- dbt/contracts/project.py +419 -0
- dbt/contracts/results.py +53 -0
- dbt/contracts/selection.py +23 -0
- dbt/contracts/sql.py +85 -0
- dbt/contracts/state.py +68 -0
- dbt/contracts/util.py +46 -0
- dbt/deprecations.py +348 -0
- dbt/deps/__init__.py +0 -0
- dbt/deps/base.py +152 -0
- dbt/deps/git.py +195 -0
- dbt/deps/local.py +79 -0
- dbt/deps/registry.py +130 -0
- dbt/deps/resolver.py +149 -0
- dbt/deps/tarball.py +120 -0
- dbt/docs/source/_ext/dbt_click.py +119 -0
- dbt/docs/source/conf.py +32 -0
- dbt/env_vars.py +64 -0
- dbt/event_time/event_time.py +40 -0
- dbt/event_time/sample_window.py +60 -0
- dbt/events/__init__.py +15 -0
- dbt/events/base_types.py +36 -0
- dbt/events/core_types_pb2.py +2 -0
- dbt/events/logging.py +108 -0
- dbt/events/types.py +2516 -0
- dbt/exceptions.py +1486 -0
- dbt/flags.py +89 -0
- dbt/graph/__init__.py +11 -0
- dbt/graph/cli.py +249 -0
- dbt/graph/graph.py +172 -0
- dbt/graph/queue.py +214 -0
- dbt/graph/selector.py +374 -0
- dbt/graph/selector_methods.py +975 -0
- dbt/graph/selector_spec.py +222 -0
- dbt/graph/thread_pool.py +18 -0
- dbt/hooks.py +21 -0
- dbt/include/README.md +49 -0
- dbt/include/__init__.py +3 -0
- dbt/include/data/adapters_registry.duckdb +0 -0
- dbt/include/data/build_comprehensive_registry.py +1254 -0
- dbt/include/data/build_registry.py +242 -0
- dbt/include/data/csv/adapter_queries.csv +33 -0
- dbt/include/data/csv/syntax_rules.csv +9 -0
- dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
- dbt/include/data/csv/type_mappings_databricks.csv +30 -0
- dbt/include/data/csv/type_mappings_mysql.csv +40 -0
- dbt/include/data/csv/type_mappings_oracle.csv +30 -0
- dbt/include/data/csv/type_mappings_postgres.csv +56 -0
- dbt/include/data/csv/type_mappings_redshift.csv +33 -0
- dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
- dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
- dbt/include/dvt_starter_project/README.md +15 -0
- dbt/include/dvt_starter_project/__init__.py +3 -0
- dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/dvt_project.yml +39 -0
- dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
- dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
- dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
- dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
- dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
- dbt/internal_deprecations.py +26 -0
- dbt/jsonschemas/__init__.py +3 -0
- dbt/jsonschemas/jsonschemas.py +309 -0
- dbt/jsonschemas/project/0.0.110.json +4717 -0
- dbt/jsonschemas/project/0.0.85.json +2015 -0
- dbt/jsonschemas/resources/0.0.110.json +2636 -0
- dbt/jsonschemas/resources/0.0.85.json +2536 -0
- dbt/jsonschemas/resources/latest.json +6773 -0
- dbt/links.py +4 -0
- dbt/materializations/__init__.py +0 -0
- dbt/materializations/incremental/__init__.py +0 -0
- dbt/materializations/incremental/microbatch.py +236 -0
- dbt/mp_context.py +8 -0
- dbt/node_types.py +37 -0
- dbt/parser/__init__.py +23 -0
- dbt/parser/analysis.py +21 -0
- dbt/parser/base.py +548 -0
- dbt/parser/common.py +266 -0
- dbt/parser/docs.py +52 -0
- dbt/parser/fixtures.py +51 -0
- dbt/parser/functions.py +30 -0
- dbt/parser/generic_test.py +100 -0
- dbt/parser/generic_test_builders.py +333 -0
- dbt/parser/hooks.py +122 -0
- dbt/parser/macros.py +137 -0
- dbt/parser/manifest.py +2208 -0
- dbt/parser/models.py +573 -0
- dbt/parser/partial.py +1178 -0
- dbt/parser/read_files.py +445 -0
- dbt/parser/schema_generic_tests.py +422 -0
- dbt/parser/schema_renderer.py +111 -0
- dbt/parser/schema_yaml_readers.py +935 -0
- dbt/parser/schemas.py +1466 -0
- dbt/parser/search.py +149 -0
- dbt/parser/seeds.py +28 -0
- dbt/parser/singular_test.py +20 -0
- dbt/parser/snapshots.py +44 -0
- dbt/parser/sources.py +558 -0
- dbt/parser/sql.py +62 -0
- dbt/parser/unit_tests.py +621 -0
- dbt/plugins/__init__.py +20 -0
- dbt/plugins/contracts.py +9 -0
- dbt/plugins/exceptions.py +2 -0
- dbt/plugins/manager.py +163 -0
- dbt/plugins/manifest.py +21 -0
- dbt/profiler.py +20 -0
- dbt/py.typed +1 -0
- dbt/query_analyzer.py +410 -0
- dbt/runners/__init__.py +2 -0
- dbt/runners/exposure_runner.py +7 -0
- dbt/runners/no_op_runner.py +45 -0
- dbt/runners/saved_query_runner.py +7 -0
- dbt/selected_resources.py +8 -0
- dbt/task/__init__.py +0 -0
- dbt/task/base.py +506 -0
- dbt/task/build.py +197 -0
- dbt/task/clean.py +56 -0
- dbt/task/clone.py +161 -0
- dbt/task/compile.py +150 -0
- dbt/task/compute.py +458 -0
- dbt/task/debug.py +513 -0
- dbt/task/deps.py +280 -0
- dbt/task/docs/__init__.py +3 -0
- dbt/task/docs/api/__init__.py +23 -0
- dbt/task/docs/api/catalog.py +204 -0
- dbt/task/docs/api/lineage.py +234 -0
- dbt/task/docs/api/profile.py +204 -0
- dbt/task/docs/api/spark.py +186 -0
- dbt/task/docs/generate.py +1002 -0
- dbt/task/docs/index.html +250 -0
- dbt/task/docs/serve.py +174 -0
- dbt/task/dvt_output.py +509 -0
- dbt/task/dvt_run.py +282 -0
- dbt/task/dvt_seed.py +806 -0
- dbt/task/freshness.py +322 -0
- dbt/task/function.py +121 -0
- dbt/task/group_lookup.py +46 -0
- dbt/task/init.py +1022 -0
- dbt/task/java.py +316 -0
- dbt/task/list.py +236 -0
- dbt/task/metadata.py +804 -0
- dbt/task/migrate.py +714 -0
- dbt/task/printer.py +175 -0
- dbt/task/profile.py +1489 -0
- dbt/task/profile_serve.py +662 -0
- dbt/task/retract.py +441 -0
- dbt/task/retry.py +175 -0
- dbt/task/run.py +1647 -0
- dbt/task/run_operation.py +141 -0
- dbt/task/runnable.py +758 -0
- dbt/task/seed.py +103 -0
- dbt/task/show.py +149 -0
- dbt/task/snapshot.py +56 -0
- dbt/task/spark.py +414 -0
- dbt/task/sql.py +110 -0
- dbt/task/target_sync.py +814 -0
- dbt/task/test.py +464 -0
- dbt/tests/fixtures/__init__.py +1 -0
- dbt/tests/fixtures/project.py +620 -0
- dbt/tests/util.py +651 -0
- dbt/tracking.py +529 -0
- dbt/utils/__init__.py +3 -0
- dbt/utils/artifact_upload.py +151 -0
- dbt/utils/utils.py +408 -0
- dbt/version.py +271 -0
- dvt_cli/__init__.py +158 -0
- dvt_core-0.59.0a51.dist-info/METADATA +288 -0
- dvt_core-0.59.0a51.dist-info/RECORD +299 -0
- dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
- dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
- dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1254 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Build comprehensive datatype_mappings for DVT.
|
|
4
|
+
|
|
5
|
+
This script builds version-aware type mappings for all major dbt adapters.
|
|
6
|
+
Spark versions: 3.x (3.0-3.5), 4.x (4.0+)
|
|
7
|
+
|
|
8
|
+
Key changes in Spark 4.0:
|
|
9
|
+
- MySQL: SMALLINT -> ShortType (was IntegerType), FLOAT -> FloatType (was DoubleType)
|
|
10
|
+
- PostgreSQL: TIMESTAMP WITH TIME ZONE handling changed
|
|
11
|
+
- New VARIANT type for semi-structured data
|
|
12
|
+
- Spark 3.4+: TIMESTAMP_NTZ support
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import duckdb
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
# All mappings: (adapter, adapter_type, spark_type, spark_version, is_complex, cast_expr, notes)
|
|
19
|
+
MAPPINGS = []
|
|
20
|
+
|
|
21
|
+
def add(adapter: str, adapter_type: str, spark_type: str,
|
|
22
|
+
spark_version: str = "all", is_complex: bool = False,
|
|
23
|
+
cast_expr: str = None, notes: str = ""):
|
|
24
|
+
"""Add a type mapping."""
|
|
25
|
+
MAPPINGS.append((adapter, adapter_type, spark_type, spark_version, is_complex, cast_expr, notes))
|
|
26
|
+
|
|
27
|
+
# =============================================================================
|
|
28
|
+
# POSTGRES (and PostgreSQL-compatible: AlloyDB, Materialize, TimescaleDB, CrateDB)
|
|
29
|
+
# =============================================================================
|
|
30
|
+
def add_postgres_types():
|
|
31
|
+
# Numeric types
|
|
32
|
+
add("postgres", "SMALLINT", "ShortType", "all", notes="16-bit signed integer")
|
|
33
|
+
add("postgres", "INT2", "ShortType", "all", notes="Alias for SMALLINT")
|
|
34
|
+
add("postgres", "INTEGER", "IntegerType", "all", notes="32-bit signed integer")
|
|
35
|
+
add("postgres", "INT", "IntegerType", "all", notes="Alias for INTEGER")
|
|
36
|
+
add("postgres", "INT4", "IntegerType", "all", notes="Alias for INTEGER")
|
|
37
|
+
add("postgres", "BIGINT", "LongType", "all", notes="64-bit signed integer")
|
|
38
|
+
add("postgres", "INT8", "LongType", "all", notes="Alias for BIGINT")
|
|
39
|
+
add("postgres", "SERIAL", "IntegerType", "all", notes="Auto-incrementing integer")
|
|
40
|
+
add("postgres", "BIGSERIAL", "LongType", "all", notes="Auto-incrementing bigint")
|
|
41
|
+
add("postgres", "SMALLSERIAL", "ShortType", "all", notes="Auto-incrementing smallint")
|
|
42
|
+
|
|
43
|
+
# Decimal/Numeric
|
|
44
|
+
add("postgres", "DECIMAL", "DecimalType", "all", notes="Exact numeric with precision")
|
|
45
|
+
add("postgres", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
46
|
+
add("postgres", "MONEY", "DecimalType", "all", notes="Currency amount")
|
|
47
|
+
|
|
48
|
+
# Floating point
|
|
49
|
+
add("postgres", "REAL", "FloatType", "all", notes="32-bit floating point")
|
|
50
|
+
add("postgres", "FLOAT4", "FloatType", "all", notes="Alias for REAL")
|
|
51
|
+
add("postgres", "DOUBLE PRECISION", "DoubleType", "all", notes="64-bit floating point")
|
|
52
|
+
add("postgres", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE PRECISION")
|
|
53
|
+
add("postgres", "FLOAT", "DoubleType", "all", notes="Floating point (precision-dependent)")
|
|
54
|
+
|
|
55
|
+
# Character types
|
|
56
|
+
add("postgres", "VARCHAR", "StringType", "all", notes="Variable-length string")
|
|
57
|
+
add("postgres", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
|
|
58
|
+
add("postgres", "CHAR", "StringType", "all", notes="Fixed-length string")
|
|
59
|
+
add("postgres", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
|
|
60
|
+
add("postgres", "TEXT", "StringType", "all", notes="Unlimited length string")
|
|
61
|
+
add("postgres", "BPCHAR", "StringType", "all", notes="Blank-padded character")
|
|
62
|
+
add("postgres", "NAME", "StringType", "all", notes="Internal name type")
|
|
63
|
+
|
|
64
|
+
# Binary
|
|
65
|
+
add("postgres", "BYTEA", "BinaryType", "all", notes="Binary data")
|
|
66
|
+
|
|
67
|
+
# Boolean
|
|
68
|
+
add("postgres", "BOOLEAN", "BooleanType", "all", notes="Boolean true/false")
|
|
69
|
+
add("postgres", "BOOL", "BooleanType", "all", notes="Alias for BOOLEAN")
|
|
70
|
+
|
|
71
|
+
# Date/Time - Version specific for Spark 4.0
|
|
72
|
+
add("postgres", "DATE", "DateType", "all", notes="Calendar date")
|
|
73
|
+
add("postgres", "TIME", "StringType", "all", notes="Time of day (no timezone)")
|
|
74
|
+
add("postgres", "TIME WITHOUT TIME ZONE", "StringType", "all", notes="Time without timezone")
|
|
75
|
+
add("postgres", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with timezone")
|
|
76
|
+
add("postgres", "TIMETZ", "StringType", "all", notes="Alias for TIME WITH TIME ZONE")
|
|
77
|
+
|
|
78
|
+
# Timestamp handling changed in Spark 4.0
|
|
79
|
+
add("postgres", "TIMESTAMP", "TimestampType", "3.x", notes="Timestamp without timezone (Spark 3.x)")
|
|
80
|
+
add("postgres", "TIMESTAMP", "TimestampNTZType", "4.x", notes="Timestamp without timezone (Spark 4.x)")
|
|
81
|
+
add("postgres", "TIMESTAMP WITHOUT TIME ZONE", "TimestampType", "3.x", notes="Explicit no timezone (Spark 3.x)")
|
|
82
|
+
add("postgres", "TIMESTAMP WITHOUT TIME ZONE", "TimestampNTZType", "4.x", notes="Explicit no timezone (Spark 4.x)")
|
|
83
|
+
add("postgres", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="Timestamp with timezone")
|
|
84
|
+
add("postgres", "TIMESTAMPTZ", "TimestampType", "all", notes="Alias for TIMESTAMP WITH TIME ZONE")
|
|
85
|
+
|
|
86
|
+
# Interval
|
|
87
|
+
add("postgres", "INTERVAL", "StringType", "all", notes="Time interval")
|
|
88
|
+
|
|
89
|
+
# UUID
|
|
90
|
+
add("postgres", "UUID", "StringType", "all", notes="Universally unique identifier")
|
|
91
|
+
|
|
92
|
+
# JSON types
|
|
93
|
+
add("postgres", "JSON", "StringType", "3.x", notes="JSON data (Spark 3.x)")
|
|
94
|
+
add("postgres", "JSON", "VariantType", "4.x", notes="JSON data (Spark 4.x with VARIANT)")
|
|
95
|
+
add("postgres", "JSONB", "StringType", "3.x", notes="Binary JSON (Spark 3.x)")
|
|
96
|
+
add("postgres", "JSONB", "VariantType", "4.x", notes="Binary JSON (Spark 4.x with VARIANT)")
|
|
97
|
+
|
|
98
|
+
# Array types
|
|
99
|
+
add("postgres", "ARRAY", "ArrayType", "all", True, notes="Array of any type")
|
|
100
|
+
add("postgres", "_INT4", "ArrayType", "all", True, notes="Integer array")
|
|
101
|
+
add("postgres", "_TEXT", "ArrayType", "all", True, notes="Text array")
|
|
102
|
+
add("postgres", "_VARCHAR", "ArrayType", "all", True, notes="Varchar array")
|
|
103
|
+
|
|
104
|
+
# Geometric types (store as string)
|
|
105
|
+
add("postgres", "POINT", "StringType", "all", notes="Geometric point")
|
|
106
|
+
add("postgres", "LINE", "StringType", "all", notes="Infinite line")
|
|
107
|
+
add("postgres", "LSEG", "StringType", "all", notes="Line segment")
|
|
108
|
+
add("postgres", "BOX", "StringType", "all", notes="Rectangular box")
|
|
109
|
+
add("postgres", "PATH", "StringType", "all", notes="Geometric path")
|
|
110
|
+
add("postgres", "POLYGON", "StringType", "all", notes="Polygon")
|
|
111
|
+
add("postgres", "CIRCLE", "StringType", "all", notes="Circle")
|
|
112
|
+
|
|
113
|
+
# Network types
|
|
114
|
+
add("postgres", "INET", "StringType", "all", notes="IPv4 or IPv6 address")
|
|
115
|
+
add("postgres", "CIDR", "StringType", "all", notes="IPv4 or IPv6 network")
|
|
116
|
+
add("postgres", "MACADDR", "StringType", "all", notes="MAC address")
|
|
117
|
+
add("postgres", "MACADDR8", "StringType", "all", notes="MAC address (EUI-64)")
|
|
118
|
+
|
|
119
|
+
# Bit string
|
|
120
|
+
add("postgres", "BIT", "BinaryType", "all", notes="Fixed-length bit string")
|
|
121
|
+
add("postgres", "BIT VARYING", "BinaryType", "all", notes="Variable-length bit string")
|
|
122
|
+
add("postgres", "VARBIT", "BinaryType", "all", notes="Alias for BIT VARYING")
|
|
123
|
+
|
|
124
|
+
# Text search
|
|
125
|
+
add("postgres", "TSVECTOR", "StringType", "all", notes="Text search vector")
|
|
126
|
+
add("postgres", "TSQUERY", "StringType", "all", notes="Text search query")
|
|
127
|
+
|
|
128
|
+
# Range types
|
|
129
|
+
add("postgres", "INT4RANGE", "StringType", "all", notes="Integer range")
|
|
130
|
+
add("postgres", "INT8RANGE", "StringType", "all", notes="Bigint range")
|
|
131
|
+
add("postgres", "NUMRANGE", "StringType", "all", notes="Numeric range")
|
|
132
|
+
add("postgres", "TSRANGE", "StringType", "all", notes="Timestamp range")
|
|
133
|
+
add("postgres", "TSTZRANGE", "StringType", "all", notes="Timestamp with timezone range")
|
|
134
|
+
add("postgres", "DATERANGE", "StringType", "all", notes="Date range")
|
|
135
|
+
|
|
136
|
+
# Other
|
|
137
|
+
add("postgres", "OID", "LongType", "all", notes="Object identifier")
|
|
138
|
+
add("postgres", "REGCLASS", "StringType", "all", notes="Registered class")
|
|
139
|
+
add("postgres", "XML", "StringType", "all", notes="XML data")
|
|
140
|
+
|
|
141
|
+
# =============================================================================
|
|
142
|
+
# MYSQL (with Spark 4.0 version-specific changes)
|
|
143
|
+
# =============================================================================
|
|
144
|
+
def add_mysql_types():
|
|
145
|
+
# Integer types - Spark 4.0 changed SMALLINT mapping
|
|
146
|
+
add("mysql", "TINYINT", "ByteType", "all", notes="8-bit signed integer")
|
|
147
|
+
add("mysql", "TINYINT UNSIGNED", "ShortType", "all", notes="8-bit unsigned integer")
|
|
148
|
+
add("mysql", "SMALLINT", "IntegerType", "3.x", notes="16-bit integer (Spark 3.x reads as INT)")
|
|
149
|
+
add("mysql", "SMALLINT", "ShortType", "4.x", notes="16-bit integer (Spark 4.x reads as SHORT)")
|
|
150
|
+
add("mysql", "SMALLINT UNSIGNED", "IntegerType", "all", notes="16-bit unsigned integer")
|
|
151
|
+
add("mysql", "MEDIUMINT", "IntegerType", "all", notes="24-bit signed integer")
|
|
152
|
+
add("mysql", "MEDIUMINT UNSIGNED", "LongType", "3.x", notes="24-bit unsigned (Spark 3.x)")
|
|
153
|
+
add("mysql", "MEDIUMINT UNSIGNED", "IntegerType", "4.x", notes="24-bit unsigned (Spark 4.x)")
|
|
154
|
+
add("mysql", "INT", "IntegerType", "all", notes="32-bit signed integer")
|
|
155
|
+
add("mysql", "INTEGER", "IntegerType", "all", notes="Alias for INT")
|
|
156
|
+
add("mysql", "INT UNSIGNED", "LongType", "all", notes="32-bit unsigned integer")
|
|
157
|
+
add("mysql", "BIGINT", "LongType", "all", notes="64-bit signed integer")
|
|
158
|
+
add("mysql", "BIGINT UNSIGNED", "DecimalType", "all", notes="64-bit unsigned (needs Decimal)")
|
|
159
|
+
|
|
160
|
+
# Floating point - Spark 4.0 changed FLOAT mapping
|
|
161
|
+
add("mysql", "FLOAT", "DoubleType", "3.x", notes="32-bit float (Spark 3.x reads as DOUBLE)")
|
|
162
|
+
add("mysql", "FLOAT", "FloatType", "4.x", notes="32-bit float (Spark 4.x reads as FLOAT)")
|
|
163
|
+
add("mysql", "DOUBLE", "DoubleType", "all", notes="64-bit floating point")
|
|
164
|
+
add("mysql", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
165
|
+
add("mysql", "REAL", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
166
|
+
|
|
167
|
+
# Decimal
|
|
168
|
+
add("mysql", "DECIMAL", "DecimalType", "all", notes="Exact numeric")
|
|
169
|
+
add("mysql", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
170
|
+
add("mysql", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
171
|
+
add("mysql", "FIXED", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
172
|
+
|
|
173
|
+
# Bit - Spark 4.0 changed BIT(n>1) mapping
|
|
174
|
+
add("mysql", "BIT", "BooleanType", "all", notes="BIT(1) as boolean")
|
|
175
|
+
add("mysql", "BIT(1)", "BooleanType", "all", notes="Single bit as boolean")
|
|
176
|
+
add("mysql", "BIT(n)", "LongType", "3.x", notes="Multi-bit as Long (Spark 3.x)")
|
|
177
|
+
add("mysql", "BIT(n)", "BinaryType", "4.x", notes="Multi-bit as Binary (Spark 4.x)")
|
|
178
|
+
|
|
179
|
+
# String types
|
|
180
|
+
add("mysql", "CHAR", "StringType", "all", notes="Fixed-length string")
|
|
181
|
+
add("mysql", "VARCHAR", "StringType", "all", notes="Variable-length string")
|
|
182
|
+
add("mysql", "TINYTEXT", "StringType", "all", notes="255 byte text")
|
|
183
|
+
add("mysql", "TEXT", "StringType", "all", notes="64KB text")
|
|
184
|
+
add("mysql", "MEDIUMTEXT", "StringType", "all", notes="16MB text")
|
|
185
|
+
add("mysql", "LONGTEXT", "StringType", "all", notes="4GB text")
|
|
186
|
+
|
|
187
|
+
# Binary types
|
|
188
|
+
add("mysql", "BINARY", "BinaryType", "all", notes="Fixed-length binary")
|
|
189
|
+
add("mysql", "VARBINARY", "BinaryType", "all", notes="Variable-length binary")
|
|
190
|
+
add("mysql", "TINYBLOB", "BinaryType", "all", notes="255 byte blob")
|
|
191
|
+
add("mysql", "BLOB", "BinaryType", "all", notes="64KB blob")
|
|
192
|
+
add("mysql", "MEDIUMBLOB", "BinaryType", "all", notes="16MB blob")
|
|
193
|
+
add("mysql", "LONGBLOB", "BinaryType", "all", notes="4GB blob")
|
|
194
|
+
|
|
195
|
+
# Date/Time
|
|
196
|
+
add("mysql", "DATE", "DateType", "all", notes="Calendar date")
|
|
197
|
+
add("mysql", "TIME", "StringType", "all", notes="Time of day")
|
|
198
|
+
add("mysql", "DATETIME", "TimestampType", "all", notes="Date and time")
|
|
199
|
+
add("mysql", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
|
|
200
|
+
add("mysql", "YEAR", "IntegerType", "all", notes="Year value")
|
|
201
|
+
|
|
202
|
+
# JSON
|
|
203
|
+
add("mysql", "JSON", "StringType", "3.x", notes="JSON document (Spark 3.x)")
|
|
204
|
+
add("mysql", "JSON", "VariantType", "4.x", notes="JSON document (Spark 4.x)")
|
|
205
|
+
|
|
206
|
+
# Enum and Set
|
|
207
|
+
add("mysql", "ENUM", "StringType", "all", notes="Enumeration")
|
|
208
|
+
add("mysql", "SET", "StringType", "all", notes="Set of values")
|
|
209
|
+
|
|
210
|
+
# Spatial types
|
|
211
|
+
add("mysql", "GEOMETRY", "BinaryType", "all", notes="Geometry type")
|
|
212
|
+
add("mysql", "POINT", "BinaryType", "all", notes="Point geometry")
|
|
213
|
+
add("mysql", "LINESTRING", "BinaryType", "all", notes="Line geometry")
|
|
214
|
+
add("mysql", "POLYGON", "BinaryType", "all", notes="Polygon geometry")
|
|
215
|
+
add("mysql", "GEOMETRYCOLLECTION", "BinaryType", "all", notes="Geometry collection")
|
|
216
|
+
add("mysql", "MULTIPOINT", "BinaryType", "all", notes="Multiple points")
|
|
217
|
+
add("mysql", "MULTILINESTRING", "BinaryType", "all", notes="Multiple lines")
|
|
218
|
+
add("mysql", "MULTIPOLYGON", "BinaryType", "all", notes="Multiple polygons")
|
|
219
|
+
|
|
220
|
+
# =============================================================================
|
|
221
|
+
# BIGQUERY
|
|
222
|
+
# =============================================================================
|
|
223
|
+
def add_bigquery_types():
|
|
224
|
+
# Numeric types
|
|
225
|
+
add("bigquery", "INT64", "LongType", "all", notes="64-bit signed integer")
|
|
226
|
+
add("bigquery", "INTEGER", "LongType", "all", notes="Alias for INT64")
|
|
227
|
+
add("bigquery", "INT", "LongType", "all", notes="Alias for INT64")
|
|
228
|
+
add("bigquery", "SMALLINT", "LongType", "all", notes="Alias for INT64")
|
|
229
|
+
add("bigquery", "BIGINT", "LongType", "all", notes="Alias for INT64")
|
|
230
|
+
add("bigquery", "TINYINT", "LongType", "all", notes="Alias for INT64")
|
|
231
|
+
add("bigquery", "BYTEINT", "LongType", "all", notes="Alias for INT64")
|
|
232
|
+
|
|
233
|
+
# Floating point
|
|
234
|
+
add("bigquery", "FLOAT64", "DoubleType", "all", notes="64-bit floating point")
|
|
235
|
+
add("bigquery", "FLOAT", "DoubleType", "all", notes="Alias for FLOAT64")
|
|
236
|
+
|
|
237
|
+
# Numeric/Decimal
|
|
238
|
+
add("bigquery", "NUMERIC", "DecimalType", "all", notes="38 digits precision, 9 scale")
|
|
239
|
+
add("bigquery", "DECIMAL", "DecimalType", "all", notes="Alias for NUMERIC")
|
|
240
|
+
add("bigquery", "BIGNUMERIC", "DecimalType", "all", notes="76 digits precision, 38 scale")
|
|
241
|
+
add("bigquery", "BIGDECIMAL", "DecimalType", "all", notes="Alias for BIGNUMERIC")
|
|
242
|
+
|
|
243
|
+
# Boolean
|
|
244
|
+
add("bigquery", "BOOL", "BooleanType", "all", notes="Boolean value")
|
|
245
|
+
add("bigquery", "BOOLEAN", "BooleanType", "all", notes="Alias for BOOL")
|
|
246
|
+
|
|
247
|
+
# String
|
|
248
|
+
add("bigquery", "STRING", "StringType", "all", notes="Variable-length Unicode string")
|
|
249
|
+
|
|
250
|
+
# Binary
|
|
251
|
+
add("bigquery", "BYTES", "BinaryType", "all", notes="Variable-length binary")
|
|
252
|
+
|
|
253
|
+
# Date/Time
|
|
254
|
+
add("bigquery", "DATE", "DateType", "all", notes="Calendar date")
|
|
255
|
+
add("bigquery", "TIME", "StringType", "all", notes="Time of day")
|
|
256
|
+
add("bigquery", "DATETIME", "TimestampType", "3.x", notes="Date and time (Spark 3.x)")
|
|
257
|
+
add("bigquery", "DATETIME", "TimestampNTZType", "4.x", notes="Date and time without TZ (Spark 4.x)")
|
|
258
|
+
add("bigquery", "TIMESTAMP", "TimestampType", "all", notes="Timestamp with microseconds")
|
|
259
|
+
|
|
260
|
+
# Interval
|
|
261
|
+
add("bigquery", "INTERVAL", "StringType", "all", notes="Duration of time")
|
|
262
|
+
|
|
263
|
+
# Complex types
|
|
264
|
+
add("bigquery", "ARRAY", "ArrayType", "all", True, notes="Ordered list")
|
|
265
|
+
add("bigquery", "STRUCT", "StructType", "all", True, notes="Ordered fields")
|
|
266
|
+
add("bigquery", "RECORD", "StructType", "all", True, notes="Alias for STRUCT")
|
|
267
|
+
|
|
268
|
+
# JSON
|
|
269
|
+
add("bigquery", "JSON", "StringType", "3.x", notes="JSON value (Spark 3.x)")
|
|
270
|
+
add("bigquery", "JSON", "VariantType", "4.x", notes="JSON value (Spark 4.x)")
|
|
271
|
+
|
|
272
|
+
# Geography
|
|
273
|
+
add("bigquery", "GEOGRAPHY", "StringType", "all", notes="Geographic data (WKT)")
|
|
274
|
+
|
|
275
|
+
# Range
|
|
276
|
+
add("bigquery", "RANGE", "StringType", "all", notes="Range of values")
|
|
277
|
+
|
|
278
|
+
# =============================================================================
|
|
279
|
+
# SNOWFLAKE
|
|
280
|
+
# =============================================================================
|
|
281
|
+
def add_snowflake_types():
|
|
282
|
+
# Numeric
|
|
283
|
+
add("snowflake", "NUMBER", "DecimalType", "all", notes="Numeric with precision/scale")
|
|
284
|
+
add("snowflake", "DECIMAL", "DecimalType", "all", notes="Alias for NUMBER")
|
|
285
|
+
add("snowflake", "NUMERIC", "DecimalType", "all", notes="Alias for NUMBER")
|
|
286
|
+
add("snowflake", "INT", "LongType", "all", notes="38-digit integer")
|
|
287
|
+
add("snowflake", "INTEGER", "LongType", "all", notes="Alias for INT")
|
|
288
|
+
add("snowflake", "BIGINT", "LongType", "all", notes="Alias for INT")
|
|
289
|
+
add("snowflake", "SMALLINT", "LongType", "all", notes="Alias for INT")
|
|
290
|
+
add("snowflake", "TINYINT", "LongType", "all", notes="Alias for INT")
|
|
291
|
+
add("snowflake", "BYTEINT", "LongType", "all", notes="Alias for INT")
|
|
292
|
+
|
|
293
|
+
# Floating point
|
|
294
|
+
add("snowflake", "FLOAT", "DoubleType", "all", notes="64-bit floating point")
|
|
295
|
+
add("snowflake", "FLOAT4", "DoubleType", "all", notes="Alias for FLOAT")
|
|
296
|
+
add("snowflake", "FLOAT8", "DoubleType", "all", notes="Alias for FLOAT")
|
|
297
|
+
add("snowflake", "DOUBLE", "DoubleType", "all", notes="Alias for FLOAT")
|
|
298
|
+
add("snowflake", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for FLOAT")
|
|
299
|
+
add("snowflake", "REAL", "DoubleType", "all", notes="Alias for FLOAT")
|
|
300
|
+
|
|
301
|
+
# String
|
|
302
|
+
add("snowflake", "VARCHAR", "StringType", "all", notes="Variable-length string (16MB)")
|
|
303
|
+
add("snowflake", "CHAR", "StringType", "all", notes="Alias for VARCHAR")
|
|
304
|
+
add("snowflake", "CHARACTER", "StringType", "all", notes="Alias for VARCHAR")
|
|
305
|
+
add("snowflake", "STRING", "StringType", "all", notes="Alias for VARCHAR")
|
|
306
|
+
add("snowflake", "TEXT", "StringType", "all", notes="Alias for VARCHAR")
|
|
307
|
+
add("snowflake", "NCHAR", "StringType", "all", notes="Unicode character")
|
|
308
|
+
add("snowflake", "NVARCHAR", "StringType", "all", notes="Unicode varchar")
|
|
309
|
+
add("snowflake", "NVARCHAR2", "StringType", "all", notes="Unicode varchar (Oracle compat)")
|
|
310
|
+
|
|
311
|
+
# Binary
|
|
312
|
+
add("snowflake", "BINARY", "BinaryType", "all", notes="Variable-length binary (8MB)")
|
|
313
|
+
add("snowflake", "VARBINARY", "BinaryType", "all", notes="Alias for BINARY")
|
|
314
|
+
|
|
315
|
+
# Boolean
|
|
316
|
+
add("snowflake", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
|
|
317
|
+
|
|
318
|
+
# Date/Time
|
|
319
|
+
add("snowflake", "DATE", "DateType", "all", notes="Calendar date")
|
|
320
|
+
add("snowflake", "TIME", "StringType", "all", notes="Time of day")
|
|
321
|
+
add("snowflake", "DATETIME", "TimestampType", "all", notes="Alias for TIMESTAMP")
|
|
322
|
+
add("snowflake", "TIMESTAMP", "TimestampType", "all", notes="Timestamp without timezone")
|
|
323
|
+
add("snowflake", "TIMESTAMP_LTZ", "TimestampType", "all", notes="Timestamp with local timezone")
|
|
324
|
+
add("snowflake", "TIMESTAMP_NTZ", "TimestampType", "3.x", notes="Timestamp no timezone (Spark 3.x)")
|
|
325
|
+
add("snowflake", "TIMESTAMP_NTZ", "TimestampNTZType", "4.x", notes="Timestamp no timezone (Spark 4.x)")
|
|
326
|
+
add("snowflake", "TIMESTAMP_TZ", "TimestampType", "all", notes="Timestamp with timezone")
|
|
327
|
+
|
|
328
|
+
# Semi-structured
|
|
329
|
+
add("snowflake", "VARIANT", "StringType", "3.x", notes="Semi-structured data (Spark 3.x)")
|
|
330
|
+
add("snowflake", "VARIANT", "VariantType", "4.x", notes="Semi-structured data (Spark 4.x)")
|
|
331
|
+
add("snowflake", "OBJECT", "MapType", "all", True, notes="Key-value pairs")
|
|
332
|
+
add("snowflake", "ARRAY", "ArrayType", "all", True, notes="Array of values")
|
|
333
|
+
|
|
334
|
+
# Geospatial
|
|
335
|
+
add("snowflake", "GEOGRAPHY", "StringType", "all", notes="Geographic data")
|
|
336
|
+
add("snowflake", "GEOMETRY", "StringType", "all", notes="Planar geometry")
|
|
337
|
+
|
|
338
|
+
# =============================================================================
|
|
339
|
+
# REDSHIFT
|
|
340
|
+
# =============================================================================
|
|
341
|
+
def add_redshift_types():
|
|
342
|
+
# Integer types
|
|
343
|
+
add("redshift", "SMALLINT", "ShortType", "all", notes="16-bit signed integer")
|
|
344
|
+
add("redshift", "INT2", "ShortType", "all", notes="Alias for SMALLINT")
|
|
345
|
+
add("redshift", "INTEGER", "IntegerType", "all", notes="32-bit signed integer")
|
|
346
|
+
add("redshift", "INT", "IntegerType", "all", notes="Alias for INTEGER")
|
|
347
|
+
add("redshift", "INT4", "IntegerType", "all", notes="Alias for INTEGER")
|
|
348
|
+
add("redshift", "BIGINT", "LongType", "all", notes="64-bit signed integer")
|
|
349
|
+
add("redshift", "INT8", "LongType", "all", notes="Alias for BIGINT")
|
|
350
|
+
|
|
351
|
+
# Decimal
|
|
352
|
+
add("redshift", "DECIMAL", "DecimalType", "all", notes="Exact numeric (38,37)")
|
|
353
|
+
add("redshift", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
354
|
+
|
|
355
|
+
# Floating point
|
|
356
|
+
add("redshift", "REAL", "FloatType", "all", notes="32-bit floating point")
|
|
357
|
+
add("redshift", "FLOAT4", "FloatType", "all", notes="Alias for REAL")
|
|
358
|
+
add("redshift", "DOUBLE PRECISION", "DoubleType", "all", notes="64-bit floating point")
|
|
359
|
+
add("redshift", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE PRECISION")
|
|
360
|
+
add("redshift", "FLOAT", "DoubleType", "all", notes="Alias for DOUBLE PRECISION")
|
|
361
|
+
|
|
362
|
+
# Boolean
|
|
363
|
+
add("redshift", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
|
|
364
|
+
add("redshift", "BOOL", "BooleanType", "all", notes="Alias for BOOLEAN")
|
|
365
|
+
|
|
366
|
+
# Character types
|
|
367
|
+
add("redshift", "CHAR", "StringType", "all", notes="Fixed-length string (4096)")
|
|
368
|
+
add("redshift", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
|
|
369
|
+
add("redshift", "NCHAR", "StringType", "all", notes="National character")
|
|
370
|
+
add("redshift", "BPCHAR", "StringType", "all", notes="Blank-padded char")
|
|
371
|
+
add("redshift", "VARCHAR", "StringType", "all", notes="Variable-length string (65535)")
|
|
372
|
+
add("redshift", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
|
|
373
|
+
add("redshift", "NVARCHAR", "StringType", "all", notes="National varchar")
|
|
374
|
+
add("redshift", "TEXT", "StringType", "all", notes="Alias for VARCHAR(256)")
|
|
375
|
+
|
|
376
|
+
# Binary
|
|
377
|
+
add("redshift", "VARBYTE", "BinaryType", "all", notes="Variable-length binary")
|
|
378
|
+
add("redshift", "VARBINARY", "BinaryType", "all", notes="Alias for VARBYTE")
|
|
379
|
+
add("redshift", "BINARY VARYING", "BinaryType", "all", notes="Alias for VARBYTE")
|
|
380
|
+
|
|
381
|
+
# Date/Time
|
|
382
|
+
add("redshift", "DATE", "DateType", "all", notes="Calendar date")
|
|
383
|
+
add("redshift", "TIME", "StringType", "all", notes="Time without timezone")
|
|
384
|
+
add("redshift", "TIMETZ", "StringType", "all", notes="Time with timezone")
|
|
385
|
+
add("redshift", "TIME WITHOUT TIME ZONE", "StringType", "all", notes="Time no TZ")
|
|
386
|
+
add("redshift", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with TZ")
|
|
387
|
+
add("redshift", "TIMESTAMP", "TimestampType", "all", notes="Timestamp without timezone")
|
|
388
|
+
add("redshift", "TIMESTAMPTZ", "TimestampType", "all", notes="Timestamp with timezone")
|
|
389
|
+
add("redshift", "TIMESTAMP WITHOUT TIME ZONE", "TimestampType", "3.x", notes="No TZ (Spark 3.x)")
|
|
390
|
+
add("redshift", "TIMESTAMP WITHOUT TIME ZONE", "TimestampNTZType", "4.x", notes="No TZ (Spark 4.x)")
|
|
391
|
+
add("redshift", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With timezone")
|
|
392
|
+
|
|
393
|
+
# Interval
|
|
394
|
+
add("redshift", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month interval")
|
|
395
|
+
add("redshift", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time interval")
|
|
396
|
+
|
|
397
|
+
# Semi-structured (SUPER type)
|
|
398
|
+
add("redshift", "SUPER", "StringType", "3.x", notes="Semi-structured (Spark 3.x)")
|
|
399
|
+
add("redshift", "SUPER", "VariantType", "4.x", notes="Semi-structured (Spark 4.x)")
|
|
400
|
+
|
|
401
|
+
# Geometry
|
|
402
|
+
add("redshift", "GEOMETRY", "BinaryType", "all", notes="Geometry data")
|
|
403
|
+
add("redshift", "GEOGRAPHY", "BinaryType", "all", notes="Geography data")
|
|
404
|
+
|
|
405
|
+
# HyperLogLog
|
|
406
|
+
add("redshift", "HLLSKETCH", "BinaryType", "all", notes="HyperLogLog sketch")
|
|
407
|
+
|
|
408
|
+
# =============================================================================
|
|
409
|
+
# DATABRICKS (Delta Lake)
|
|
410
|
+
# =============================================================================
|
|
411
|
+
def add_databricks_types():
|
|
412
|
+
# All native Spark types
|
|
413
|
+
add("databricks", "TINYINT", "ByteType", "all", notes="8-bit signed integer")
|
|
414
|
+
add("databricks", "BYTE", "ByteType", "all", notes="Alias for TINYINT")
|
|
415
|
+
add("databricks", "SMALLINT", "ShortType", "all", notes="16-bit signed integer")
|
|
416
|
+
add("databricks", "SHORT", "ShortType", "all", notes="Alias for SMALLINT")
|
|
417
|
+
add("databricks", "INT", "IntegerType", "all", notes="32-bit signed integer")
|
|
418
|
+
add("databricks", "INTEGER", "IntegerType", "all", notes="Alias for INT")
|
|
419
|
+
add("databricks", "BIGINT", "LongType", "all", notes="64-bit signed integer")
|
|
420
|
+
add("databricks", "LONG", "LongType", "all", notes="Alias for BIGINT")
|
|
421
|
+
|
|
422
|
+
# Floating point
|
|
423
|
+
add("databricks", "FLOAT", "FloatType", "all", notes="32-bit floating point")
|
|
424
|
+
add("databricks", "REAL", "FloatType", "all", notes="Alias for FLOAT")
|
|
425
|
+
add("databricks", "DOUBLE", "DoubleType", "all", notes="64-bit floating point")
|
|
426
|
+
add("databricks", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
427
|
+
|
|
428
|
+
# Decimal
|
|
429
|
+
add("databricks", "DECIMAL", "DecimalType", "all", notes="Arbitrary precision decimal")
|
|
430
|
+
add("databricks", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
431
|
+
add("databricks", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
432
|
+
|
|
433
|
+
# String
|
|
434
|
+
add("databricks", "STRING", "StringType", "all", notes="UTF-8 string")
|
|
435
|
+
add("databricks", "VARCHAR", "StringType", "all", notes="Alias for STRING")
|
|
436
|
+
add("databricks", "CHAR", "StringType", "all", notes="Alias for STRING")
|
|
437
|
+
|
|
438
|
+
# Binary
|
|
439
|
+
add("databricks", "BINARY", "BinaryType", "all", notes="Byte array")
|
|
440
|
+
|
|
441
|
+
# Boolean
|
|
442
|
+
add("databricks", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
|
|
443
|
+
|
|
444
|
+
# Date/Time
|
|
445
|
+
add("databricks", "DATE", "DateType", "all", notes="Calendar date")
|
|
446
|
+
add("databricks", "TIMESTAMP", "TimestampType", "all", notes="Timestamp with local TZ")
|
|
447
|
+
add("databricks", "TIMESTAMP_LTZ", "TimestampType", "all", notes="Timestamp local TZ")
|
|
448
|
+
add("databricks", "TIMESTAMP_NTZ", "TimestampType", "3.x", notes="No timezone (Spark 3.x)")
|
|
449
|
+
add("databricks", "TIMESTAMP_NTZ", "TimestampNTZType", "4.x", notes="No timezone (Spark 4.x)")
|
|
450
|
+
|
|
451
|
+
# Interval
|
|
452
|
+
add("databricks", "INTERVAL", "StringType", "all", notes="Time interval")
|
|
453
|
+
add("databricks", "INTERVAL YEAR", "YearMonthIntervalType", "all", notes="Year interval")
|
|
454
|
+
add("databricks", "INTERVAL MONTH", "YearMonthIntervalType", "all", notes="Month interval")
|
|
455
|
+
add("databricks", "INTERVAL DAY", "DayTimeIntervalType", "all", notes="Day interval")
|
|
456
|
+
add("databricks", "INTERVAL HOUR", "DayTimeIntervalType", "all", notes="Hour interval")
|
|
457
|
+
add("databricks", "INTERVAL MINUTE", "DayTimeIntervalType", "all", notes="Minute interval")
|
|
458
|
+
add("databricks", "INTERVAL SECOND", "DayTimeIntervalType", "all", notes="Second interval")
|
|
459
|
+
|
|
460
|
+
# Complex types
|
|
461
|
+
add("databricks", "ARRAY", "ArrayType", "all", True, notes="Array of elements")
|
|
462
|
+
add("databricks", "MAP", "MapType", "all", True, notes="Key-value map")
|
|
463
|
+
add("databricks", "STRUCT", "StructType", "all", True, notes="Structured record")
|
|
464
|
+
|
|
465
|
+
# Variant (Spark 4.0)
|
|
466
|
+
add("databricks", "VARIANT", "StringType", "3.x", notes="Semi-structured (Spark 3.x)")
|
|
467
|
+
add("databricks", "VARIANT", "VariantType", "4.x", notes="Semi-structured (Spark 4.x)")
|
|
468
|
+
|
|
469
|
+
# =============================================================================
|
|
470
|
+
# ORACLE
|
|
471
|
+
# =============================================================================
|
|
472
|
+
def add_oracle_types():
|
|
473
|
+
# Numeric
|
|
474
|
+
add("oracle", "NUMBER", "DecimalType", "all", notes="Numeric with precision/scale")
|
|
475
|
+
add("oracle", "FLOAT", "DoubleType", "all", notes="Floating point (126 binary)")
|
|
476
|
+
add("oracle", "BINARY_FLOAT", "FloatType", "all", notes="32-bit IEEE float")
|
|
477
|
+
add("oracle", "BINARY_DOUBLE", "DoubleType", "all", notes="64-bit IEEE double")
|
|
478
|
+
|
|
479
|
+
# Integer (Oracle doesn't have true integers, uses NUMBER)
|
|
480
|
+
add("oracle", "INTEGER", "DecimalType", "all", notes="NUMBER(38)")
|
|
481
|
+
add("oracle", "INT", "DecimalType", "all", notes="Alias for INTEGER")
|
|
482
|
+
add("oracle", "SMALLINT", "DecimalType", "all", notes="NUMBER(38)")
|
|
483
|
+
|
|
484
|
+
# Character
|
|
485
|
+
add("oracle", "CHAR", "StringType", "all", notes="Fixed-length character (2000)")
|
|
486
|
+
add("oracle", "NCHAR", "StringType", "all", notes="Fixed-length national char")
|
|
487
|
+
add("oracle", "VARCHAR2", "StringType", "all", notes="Variable-length string (4000)")
|
|
488
|
+
add("oracle", "NVARCHAR2", "StringType", "all", notes="Variable-length national")
|
|
489
|
+
add("oracle", "VARCHAR", "StringType", "all", notes="Alias for VARCHAR2")
|
|
490
|
+
add("oracle", "LONG", "StringType", "all", notes="Variable-length (deprecated)")
|
|
491
|
+
add("oracle", "CLOB", "StringType", "all", notes="Character large object")
|
|
492
|
+
add("oracle", "NCLOB", "StringType", "all", notes="National CLOB")
|
|
493
|
+
|
|
494
|
+
# Binary
|
|
495
|
+
add("oracle", "RAW", "BinaryType", "all", notes="Raw binary (2000)")
|
|
496
|
+
add("oracle", "LONG RAW", "BinaryType", "all", notes="Long raw (deprecated)")
|
|
497
|
+
add("oracle", "BLOB", "BinaryType", "all", notes="Binary large object")
|
|
498
|
+
add("oracle", "BFILE", "StringType", "all", notes="External file reference")
|
|
499
|
+
|
|
500
|
+
# Date/Time
|
|
501
|
+
add("oracle", "DATE", "TimestampType", "all", notes="Date with time component")
|
|
502
|
+
add("oracle", "TIMESTAMP", "TimestampType", "all", notes="Timestamp no timezone")
|
|
503
|
+
add("oracle", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With timezone")
|
|
504
|
+
add("oracle", "TIMESTAMP WITH LOCAL TIME ZONE", "TimestampType", "all", notes="Local TZ")
|
|
505
|
+
|
|
506
|
+
# Interval
|
|
507
|
+
add("oracle", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month interval")
|
|
508
|
+
add("oracle", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time interval")
|
|
509
|
+
|
|
510
|
+
# ROWID
|
|
511
|
+
add("oracle", "ROWID", "StringType", "all", notes="Row address")
|
|
512
|
+
add("oracle", "UROWID", "StringType", "all", notes="Universal ROWID")
|
|
513
|
+
|
|
514
|
+
# JSON (Oracle 21c+)
|
|
515
|
+
add("oracle", "JSON", "StringType", "3.x", notes="JSON data (Spark 3.x)")
|
|
516
|
+
add("oracle", "JSON", "VariantType", "4.x", notes="JSON data (Spark 4.x)")
|
|
517
|
+
|
|
518
|
+
# XMLType
|
|
519
|
+
add("oracle", "XMLTYPE", "StringType", "all", notes="XML data")
|
|
520
|
+
add("oracle", "SYS.XMLTYPE", "StringType", "all", notes="XML data (fully qualified)")
|
|
521
|
+
|
|
522
|
+
# SDO_GEOMETRY (spatial)
|
|
523
|
+
add("oracle", "SDO_GEOMETRY", "StringType", "all", notes="Spatial geometry")
|
|
524
|
+
add("oracle", "MDSYS.SDO_GEOMETRY", "StringType", "all", notes="Spatial (qualified)")
|
|
525
|
+
|
|
526
|
+
# =============================================================================
|
|
527
|
+
# SQL SERVER (and Azure Synapse, Azure SQL)
|
|
528
|
+
# =============================================================================
|
|
529
|
+
def add_sqlserver_types():
|
|
530
|
+
# Exact numerics
|
|
531
|
+
add("sqlserver", "BIT", "BooleanType", "all", notes="Boolean (0 or 1)")
|
|
532
|
+
add("sqlserver", "TINYINT", "ShortType", "all", notes="0 to 255")
|
|
533
|
+
add("sqlserver", "SMALLINT", "ShortType", "all", notes="16-bit signed")
|
|
534
|
+
add("sqlserver", "INT", "IntegerType", "all", notes="32-bit signed")
|
|
535
|
+
add("sqlserver", "INTEGER", "IntegerType", "all", notes="Alias for INT")
|
|
536
|
+
add("sqlserver", "BIGINT", "LongType", "all", notes="64-bit signed")
|
|
537
|
+
add("sqlserver", "DECIMAL", "DecimalType", "all", notes="Exact numeric (38,38)")
|
|
538
|
+
add("sqlserver", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
539
|
+
add("sqlserver", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
540
|
+
add("sqlserver", "MONEY", "DecimalType", "all", notes="Currency (-2^63 to 2^63)")
|
|
541
|
+
add("sqlserver", "SMALLMONEY", "DecimalType", "all", notes="Small currency")
|
|
542
|
+
|
|
543
|
+
# Approximate numerics
|
|
544
|
+
add("sqlserver", "FLOAT", "DoubleType", "all", notes="64-bit floating point")
|
|
545
|
+
add("sqlserver", "REAL", "FloatType", "all", notes="32-bit floating point")
|
|
546
|
+
|
|
547
|
+
# Character strings
|
|
548
|
+
add("sqlserver", "CHAR", "StringType", "all", notes="Fixed-length (8000)")
|
|
549
|
+
add("sqlserver", "VARCHAR", "StringType", "all", notes="Variable-length (8000)")
|
|
550
|
+
add("sqlserver", "VARCHAR(MAX)", "StringType", "all", notes="Variable-length (2GB)")
|
|
551
|
+
add("sqlserver", "TEXT", "StringType", "all", notes="Variable-length (deprecated)")
|
|
552
|
+
|
|
553
|
+
# Unicode character strings
|
|
554
|
+
add("sqlserver", "NCHAR", "StringType", "all", notes="Fixed-length Unicode (4000)")
|
|
555
|
+
add("sqlserver", "NVARCHAR", "StringType", "all", notes="Variable Unicode (4000)")
|
|
556
|
+
add("sqlserver", "NVARCHAR(MAX)", "StringType", "all", notes="Variable Unicode (2GB)")
|
|
557
|
+
add("sqlserver", "NTEXT", "StringType", "all", notes="Unicode text (deprecated)")
|
|
558
|
+
|
|
559
|
+
# Binary strings
|
|
560
|
+
add("sqlserver", "BINARY", "BinaryType", "all", notes="Fixed-length binary (8000)")
|
|
561
|
+
add("sqlserver", "VARBINARY", "BinaryType", "all", notes="Variable binary (8000)")
|
|
562
|
+
add("sqlserver", "VARBINARY(MAX)", "BinaryType", "all", notes="Variable binary (2GB)")
|
|
563
|
+
add("sqlserver", "IMAGE", "BinaryType", "all", notes="Binary (deprecated)")
|
|
564
|
+
|
|
565
|
+
# Date and time
|
|
566
|
+
add("sqlserver", "DATE", "DateType", "all", notes="Date only")
|
|
567
|
+
add("sqlserver", "TIME", "StringType", "all", notes="Time only")
|
|
568
|
+
add("sqlserver", "DATETIME", "TimestampType", "all", notes="Date and time")
|
|
569
|
+
add("sqlserver", "DATETIME2", "TimestampType", "all", notes="High precision datetime")
|
|
570
|
+
add("sqlserver", "SMALLDATETIME", "TimestampType", "all", notes="Low precision datetime")
|
|
571
|
+
add("sqlserver", "DATETIMEOFFSET", "TimestampType", "all", notes="Datetime with timezone")
|
|
572
|
+
|
|
573
|
+
# Other
|
|
574
|
+
add("sqlserver", "UNIQUEIDENTIFIER", "StringType", "all", notes="GUID/UUID")
|
|
575
|
+
add("sqlserver", "SQL_VARIANT", "StringType", "all", notes="Variant type")
|
|
576
|
+
add("sqlserver", "XML", "StringType", "all", notes="XML data")
|
|
577
|
+
add("sqlserver", "GEOGRAPHY", "BinaryType", "all", notes="Geographic data")
|
|
578
|
+
add("sqlserver", "GEOMETRY", "BinaryType", "all", notes="Geometric data")
|
|
579
|
+
add("sqlserver", "HIERARCHYID", "BinaryType", "all", notes="Hierarchy position")
|
|
580
|
+
|
|
581
|
+
# JSON (SQL Server 2016+, stored as NVARCHAR)
|
|
582
|
+
# Note: JSON is not a native type in SQL Server, but queries return it
|
|
583
|
+
add("sqlserver", "JSON", "StringType", "3.x", notes="JSON output (Spark 3.x)")
|
|
584
|
+
add("sqlserver", "JSON", "VariantType", "4.x", notes="JSON output (Spark 4.x)")
|
|
585
|
+
|
|
586
|
+
# =============================================================================
|
|
587
|
+
# CLICKHOUSE
|
|
588
|
+
# =============================================================================
|
|
589
|
+
def add_clickhouse_types():
|
|
590
|
+
# Integer types
|
|
591
|
+
add("clickhouse", "Int8", "ByteType", "all", notes="8-bit signed")
|
|
592
|
+
add("clickhouse", "Int16", "ShortType", "all", notes="16-bit signed")
|
|
593
|
+
add("clickhouse", "Int32", "IntegerType", "all", notes="32-bit signed")
|
|
594
|
+
add("clickhouse", "Int64", "LongType", "all", notes="64-bit signed")
|
|
595
|
+
add("clickhouse", "Int128", "DecimalType", "all", notes="128-bit signed")
|
|
596
|
+
add("clickhouse", "Int256", "DecimalType", "all", notes="256-bit signed")
|
|
597
|
+
add("clickhouse", "UInt8", "ShortType", "all", notes="8-bit unsigned")
|
|
598
|
+
add("clickhouse", "UInt16", "IntegerType", "all", notes="16-bit unsigned")
|
|
599
|
+
add("clickhouse", "UInt32", "LongType", "all", notes="32-bit unsigned")
|
|
600
|
+
add("clickhouse", "UInt64", "DecimalType", "all", notes="64-bit unsigned")
|
|
601
|
+
add("clickhouse", "UInt128", "DecimalType", "all", notes="128-bit unsigned")
|
|
602
|
+
add("clickhouse", "UInt256", "DecimalType", "all", notes="256-bit unsigned")
|
|
603
|
+
|
|
604
|
+
# Floating point
|
|
605
|
+
add("clickhouse", "Float32", "FloatType", "all", notes="32-bit IEEE float")
|
|
606
|
+
add("clickhouse", "Float64", "DoubleType", "all", notes="64-bit IEEE double")
|
|
607
|
+
|
|
608
|
+
# Decimal
|
|
609
|
+
add("clickhouse", "Decimal", "DecimalType", "all", notes="Fixed-point decimal")
|
|
610
|
+
add("clickhouse", "Decimal32", "DecimalType", "all", notes="Decimal(9, S)")
|
|
611
|
+
add("clickhouse", "Decimal64", "DecimalType", "all", notes="Decimal(18, S)")
|
|
612
|
+
add("clickhouse", "Decimal128", "DecimalType", "all", notes="Decimal(38, S)")
|
|
613
|
+
add("clickhouse", "Decimal256", "DecimalType", "all", notes="Decimal(76, S)")
|
|
614
|
+
|
|
615
|
+
# Boolean
|
|
616
|
+
add("clickhouse", "Bool", "BooleanType", "all", notes="Boolean")
|
|
617
|
+
|
|
618
|
+
# String
|
|
619
|
+
add("clickhouse", "String", "StringType", "all", notes="Arbitrary length")
|
|
620
|
+
add("clickhouse", "FixedString", "StringType", "all", notes="Fixed length")
|
|
621
|
+
|
|
622
|
+
# Date/Time
|
|
623
|
+
add("clickhouse", "Date", "DateType", "all", notes="Days since 1970")
|
|
624
|
+
add("clickhouse", "Date32", "DateType", "all", notes="Extended date range")
|
|
625
|
+
add("clickhouse", "DateTime", "TimestampType", "all", notes="Unix timestamp")
|
|
626
|
+
add("clickhouse", "DateTime64", "TimestampType", "all", notes="High precision")
|
|
627
|
+
|
|
628
|
+
# UUID
|
|
629
|
+
add("clickhouse", "UUID", "StringType", "all", notes="UUID value")
|
|
630
|
+
|
|
631
|
+
# Enum
|
|
632
|
+
add("clickhouse", "Enum8", "StringType", "all", notes="Enum with 8-bit index")
|
|
633
|
+
add("clickhouse", "Enum16", "StringType", "all", notes="Enum with 16-bit index")
|
|
634
|
+
|
|
635
|
+
# Array
|
|
636
|
+
add("clickhouse", "Array", "ArrayType", "all", True, notes="Array type")
|
|
637
|
+
|
|
638
|
+
# Tuple
|
|
639
|
+
add("clickhouse", "Tuple", "StructType", "all", True, notes="Named tuple")
|
|
640
|
+
|
|
641
|
+
# Map
|
|
642
|
+
add("clickhouse", "Map", "MapType", "all", True, notes="Key-value map")
|
|
643
|
+
|
|
644
|
+
# Nested
|
|
645
|
+
add("clickhouse", "Nested", "ArrayType", "all", True, notes="Nested structure")
|
|
646
|
+
|
|
647
|
+
# JSON
|
|
648
|
+
add("clickhouse", "JSON", "StringType", "3.x", notes="JSON object (Spark 3.x)")
|
|
649
|
+
add("clickhouse", "JSON", "VariantType", "4.x", notes="JSON object (Spark 4.x)")
|
|
650
|
+
|
|
651
|
+
# IP addresses
|
|
652
|
+
add("clickhouse", "IPv4", "StringType", "all", notes="IPv4 address")
|
|
653
|
+
add("clickhouse", "IPv6", "StringType", "all", notes="IPv6 address")
|
|
654
|
+
|
|
655
|
+
# Geo
|
|
656
|
+
add("clickhouse", "Point", "ArrayType", "all", True, notes="X,Y coordinates")
|
|
657
|
+
add("clickhouse", "Ring", "ArrayType", "all", True, notes="Polygon ring")
|
|
658
|
+
add("clickhouse", "Polygon", "ArrayType", "all", True, notes="Polygon")
|
|
659
|
+
add("clickhouse", "MultiPolygon", "ArrayType", "all", True, notes="Multi-polygon")
|
|
660
|
+
|
|
661
|
+
# Nullable wrapper (handled separately)
|
|
662
|
+
add("clickhouse", "Nullable", "NullType", "all", notes="Nullable wrapper")
|
|
663
|
+
|
|
664
|
+
# LowCardinality (handled separately)
|
|
665
|
+
add("clickhouse", "LowCardinality", "StringType", "all", notes="Dictionary encoded")
|
|
666
|
+
|
|
667
|
+
# =============================================================================
|
|
668
|
+
# TRINO / PRESTO (Athena, Starburst)
|
|
669
|
+
# =============================================================================
|
|
670
|
+
def add_trino_types():
|
|
671
|
+
# Boolean
|
|
672
|
+
add("trino", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
|
|
673
|
+
|
|
674
|
+
# Integer types
|
|
675
|
+
add("trino", "TINYINT", "ByteType", "all", notes="8-bit signed")
|
|
676
|
+
add("trino", "SMALLINT", "ShortType", "all", notes="16-bit signed")
|
|
677
|
+
add("trino", "INTEGER", "IntegerType", "all", notes="32-bit signed")
|
|
678
|
+
add("trino", "INT", "IntegerType", "all", notes="Alias for INTEGER")
|
|
679
|
+
add("trino", "BIGINT", "LongType", "all", notes="64-bit signed")
|
|
680
|
+
|
|
681
|
+
# Floating point
|
|
682
|
+
add("trino", "REAL", "FloatType", "all", notes="32-bit IEEE float")
|
|
683
|
+
add("trino", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE double")
|
|
684
|
+
|
|
685
|
+
# Decimal
|
|
686
|
+
add("trino", "DECIMAL", "DecimalType", "all", notes="Fixed-point decimal")
|
|
687
|
+
|
|
688
|
+
# String
|
|
689
|
+
add("trino", "VARCHAR", "StringType", "all", notes="Variable-length string")
|
|
690
|
+
add("trino", "CHAR", "StringType", "all", notes="Fixed-length string")
|
|
691
|
+
|
|
692
|
+
# Binary
|
|
693
|
+
add("trino", "VARBINARY", "BinaryType", "all", notes="Variable-length binary")
|
|
694
|
+
|
|
695
|
+
# Date/Time
|
|
696
|
+
add("trino", "DATE", "DateType", "all", notes="Calendar date")
|
|
697
|
+
add("trino", "TIME", "StringType", "all", notes="Time without timezone")
|
|
698
|
+
add("trino", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with TZ")
|
|
699
|
+
add("trino", "TIMESTAMP", "TimestampType", "3.x", notes="Timestamp no TZ (Spark 3.x)")
|
|
700
|
+
add("trino", "TIMESTAMP", "TimestampNTZType", "4.x", notes="Timestamp no TZ (Spark 4.x)")
|
|
701
|
+
add("trino", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With timezone")
|
|
702
|
+
|
|
703
|
+
# Interval
|
|
704
|
+
add("trino", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month interval")
|
|
705
|
+
add("trino", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time interval")
|
|
706
|
+
|
|
707
|
+
# Complex types
|
|
708
|
+
add("trino", "ARRAY", "ArrayType", "all", True, notes="Array of elements")
|
|
709
|
+
add("trino", "MAP", "MapType", "all", True, notes="Key-value map")
|
|
710
|
+
add("trino", "ROW", "StructType", "all", True, notes="Structured row")
|
|
711
|
+
|
|
712
|
+
# JSON
|
|
713
|
+
add("trino", "JSON", "StringType", "3.x", notes="JSON value (Spark 3.x)")
|
|
714
|
+
add("trino", "JSON", "VariantType", "4.x", notes="JSON value (Spark 4.x)")
|
|
715
|
+
|
|
716
|
+
# IP address
|
|
717
|
+
add("trino", "IPADDRESS", "StringType", "all", notes="IP address")
|
|
718
|
+
|
|
719
|
+
# UUID
|
|
720
|
+
add("trino", "UUID", "StringType", "all", notes="UUID value")
|
|
721
|
+
|
|
722
|
+
# HyperLogLog
|
|
723
|
+
add("trino", "HYPERLOGLOG", "BinaryType", "all", notes="HLL sketch")
|
|
724
|
+
add("trino", "P4HYPERLOGLOG", "BinaryType", "all", notes="P4 HLL sketch")
|
|
725
|
+
|
|
726
|
+
# Set Digest
|
|
727
|
+
add("trino", "SETDIGEST", "BinaryType", "all", notes="Set digest")
|
|
728
|
+
|
|
729
|
+
# QDigest
|
|
730
|
+
add("trino", "QDIGEST", "BinaryType", "all", notes="Quantile digest")
|
|
731
|
+
add("trino", "TDIGEST", "BinaryType", "all", notes="T-Digest")
|
|
732
|
+
|
|
733
|
+
# Geometry
|
|
734
|
+
add("trino", "GEOMETRY", "BinaryType", "all", notes="Geometry")
|
|
735
|
+
add("trino", "SPHERICALGEOGRAPHY", "BinaryType", "all", notes="Spherical geography")
|
|
736
|
+
|
|
737
|
+
# Add aliases for Athena (Presto-based)
|
|
738
|
+
def add_athena_types():
|
|
739
|
+
"""Athena uses Presto/Trino types - copy from trino with athena adapter name."""
|
|
740
|
+
# Get all trino mappings and duplicate for athena
|
|
741
|
+
trino_mappings = [(m[0], m[1], m[2], m[3], m[4], m[5], m[6])
|
|
742
|
+
for m in MAPPINGS if m[0] == "trino"]
|
|
743
|
+
for m in trino_mappings:
|
|
744
|
+
add("athena", m[1], m[2], m[3], m[4], m[5], m[6])
|
|
745
|
+
|
|
746
|
+
# =============================================================================
|
|
747
|
+
# DUCKDB
|
|
748
|
+
# =============================================================================
|
|
749
|
+
def add_duckdb_types():
|
|
750
|
+
# Boolean
|
|
751
|
+
add("duckdb", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
|
|
752
|
+
add("duckdb", "BOOL", "BooleanType", "all", notes="Alias for BOOLEAN")
|
|
753
|
+
|
|
754
|
+
# Integer types
|
|
755
|
+
add("duckdb", "TINYINT", "ByteType", "all", notes="8-bit signed")
|
|
756
|
+
add("duckdb", "INT1", "ByteType", "all", notes="Alias for TINYINT")
|
|
757
|
+
add("duckdb", "SMALLINT", "ShortType", "all", notes="16-bit signed")
|
|
758
|
+
add("duckdb", "INT2", "ShortType", "all", notes="Alias for SMALLINT")
|
|
759
|
+
add("duckdb", "INTEGER", "IntegerType", "all", notes="32-bit signed")
|
|
760
|
+
add("duckdb", "INT", "IntegerType", "all", notes="Alias for INTEGER")
|
|
761
|
+
add("duckdb", "INT4", "IntegerType", "all", notes="Alias for INTEGER")
|
|
762
|
+
add("duckdb", "BIGINT", "LongType", "all", notes="64-bit signed")
|
|
763
|
+
add("duckdb", "INT8", "LongType", "all", notes="Alias for BIGINT")
|
|
764
|
+
add("duckdb", "HUGEINT", "DecimalType", "all", notes="128-bit signed")
|
|
765
|
+
add("duckdb", "UHUGEINT", "DecimalType", "all", notes="128-bit unsigned")
|
|
766
|
+
add("duckdb", "UTINYINT", "ShortType", "all", notes="8-bit unsigned")
|
|
767
|
+
add("duckdb", "USMALLINT", "IntegerType", "all", notes="16-bit unsigned")
|
|
768
|
+
add("duckdb", "UINTEGER", "LongType", "all", notes="32-bit unsigned")
|
|
769
|
+
add("duckdb", "UBIGINT", "DecimalType", "all", notes="64-bit unsigned")
|
|
770
|
+
|
|
771
|
+
# Floating point
|
|
772
|
+
add("duckdb", "REAL", "FloatType", "all", notes="32-bit IEEE float")
|
|
773
|
+
add("duckdb", "FLOAT4", "FloatType", "all", notes="Alias for REAL")
|
|
774
|
+
add("duckdb", "FLOAT", "FloatType", "all", notes="Alias for REAL")
|
|
775
|
+
add("duckdb", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE double")
|
|
776
|
+
add("duckdb", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
777
|
+
|
|
778
|
+
# Decimal
|
|
779
|
+
add("duckdb", "DECIMAL", "DecimalType", "all", notes="Fixed-point decimal")
|
|
780
|
+
add("duckdb", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
781
|
+
|
|
782
|
+
# String
|
|
783
|
+
add("duckdb", "VARCHAR", "StringType", "all", notes="Variable-length string")
|
|
784
|
+
add("duckdb", "CHAR", "StringType", "all", notes="Fixed-length string")
|
|
785
|
+
add("duckdb", "BPCHAR", "StringType", "all", notes="Blank-padded char")
|
|
786
|
+
add("duckdb", "TEXT", "StringType", "all", notes="Alias for VARCHAR")
|
|
787
|
+
add("duckdb", "STRING", "StringType", "all", notes="Alias for VARCHAR")
|
|
788
|
+
|
|
789
|
+
# Binary
|
|
790
|
+
add("duckdb", "BLOB", "BinaryType", "all", notes="Binary data")
|
|
791
|
+
add("duckdb", "BYTEA", "BinaryType", "all", notes="Alias for BLOB")
|
|
792
|
+
add("duckdb", "BINARY", "BinaryType", "all", notes="Alias for BLOB")
|
|
793
|
+
add("duckdb", "VARBINARY", "BinaryType", "all", notes="Alias for BLOB")
|
|
794
|
+
|
|
795
|
+
# Date/Time
|
|
796
|
+
add("duckdb", "DATE", "DateType", "all", notes="Calendar date")
|
|
797
|
+
add("duckdb", "TIME", "StringType", "all", notes="Time of day")
|
|
798
|
+
add("duckdb", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
|
|
799
|
+
add("duckdb", "TIMESTAMPTZ", "TimestampType", "all", notes="With timezone")
|
|
800
|
+
add("duckdb", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With TZ")
|
|
801
|
+
|
|
802
|
+
# Interval
|
|
803
|
+
add("duckdb", "INTERVAL", "StringType", "all", notes="Time interval")
|
|
804
|
+
|
|
805
|
+
# UUID
|
|
806
|
+
add("duckdb", "UUID", "StringType", "all", notes="UUID value")
|
|
807
|
+
|
|
808
|
+
# Complex types
|
|
809
|
+
add("duckdb", "LIST", "ArrayType", "all", True, notes="List/array type")
|
|
810
|
+
add("duckdb", "STRUCT", "StructType", "all", True, notes="Struct type")
|
|
811
|
+
add("duckdb", "MAP", "MapType", "all", True, notes="Map type")
|
|
812
|
+
add("duckdb", "UNION", "StructType", "all", True, notes="Union type")
|
|
813
|
+
|
|
814
|
+
# JSON (stored as structured)
|
|
815
|
+
add("duckdb", "JSON", "StringType", "3.x", notes="JSON (Spark 3.x)")
|
|
816
|
+
add("duckdb", "JSON", "VariantType", "4.x", notes="JSON (Spark 4.x)")
|
|
817
|
+
|
|
818
|
+
# Enum
|
|
819
|
+
add("duckdb", "ENUM", "StringType", "all", notes="Enumeration")
|
|
820
|
+
|
|
821
|
+
# Bit
|
|
822
|
+
add("duckdb", "BIT", "StringType", "all", notes="Bit string")
|
|
823
|
+
add("duckdb", "BITSTRING", "StringType", "all", notes="Alias for BIT")
|
|
824
|
+
|
|
825
|
+
# =============================================================================
|
|
826
|
+
# TERADATA
|
|
827
|
+
# =============================================================================
|
|
828
|
+
def add_teradata_types():
|
|
829
|
+
# Integer types
|
|
830
|
+
add("teradata", "BYTEINT", "ByteType", "all", notes="8-bit signed")
|
|
831
|
+
add("teradata", "SMALLINT", "ShortType", "all", notes="16-bit signed")
|
|
832
|
+
add("teradata", "INTEGER", "IntegerType", "all", notes="32-bit signed")
|
|
833
|
+
add("teradata", "INT", "IntegerType", "all", notes="Alias for INTEGER")
|
|
834
|
+
add("teradata", "BIGINT", "LongType", "all", notes="64-bit signed")
|
|
835
|
+
|
|
836
|
+
# Decimal
|
|
837
|
+
add("teradata", "DECIMAL", "DecimalType", "all", notes="Fixed-point (18,0)")
|
|
838
|
+
add("teradata", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
839
|
+
add("teradata", "NUMBER", "DecimalType", "all", notes="Variable precision")
|
|
840
|
+
|
|
841
|
+
# Floating point
|
|
842
|
+
add("teradata", "REAL", "FloatType", "all", notes="32-bit IEEE")
|
|
843
|
+
add("teradata", "FLOAT", "DoubleType", "all", notes="64-bit IEEE")
|
|
844
|
+
add("teradata", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for FLOAT")
|
|
845
|
+
|
|
846
|
+
# Character
|
|
847
|
+
add("teradata", "CHAR", "StringType", "all", notes="Fixed-length (64000)")
|
|
848
|
+
add("teradata", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
|
|
849
|
+
add("teradata", "VARCHAR", "StringType", "all", notes="Variable-length (64000)")
|
|
850
|
+
add("teradata", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
|
|
851
|
+
add("teradata", "LONG VARCHAR", "StringType", "all", notes="Extended varchar")
|
|
852
|
+
add("teradata", "CLOB", "StringType", "all", notes="Character LOB (2GB)")
|
|
853
|
+
|
|
854
|
+
# Binary
|
|
855
|
+
add("teradata", "BYTE", "BinaryType", "all", notes="Fixed-length binary")
|
|
856
|
+
add("teradata", "VARBYTE", "BinaryType", "all", notes="Variable binary")
|
|
857
|
+
add("teradata", "BLOB", "BinaryType", "all", notes="Binary LOB (2GB)")
|
|
858
|
+
|
|
859
|
+
# Date/Time
|
|
860
|
+
add("teradata", "DATE", "DateType", "all", notes="Calendar date")
|
|
861
|
+
add("teradata", "TIME", "StringType", "all", notes="Time of day")
|
|
862
|
+
add("teradata", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with TZ")
|
|
863
|
+
add("teradata", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
|
|
864
|
+
add("teradata", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With TZ")
|
|
865
|
+
|
|
866
|
+
# Interval
|
|
867
|
+
add("teradata", "INTERVAL YEAR", "StringType", "all", notes="Year interval")
|
|
868
|
+
add("teradata", "INTERVAL MONTH", "StringType", "all", notes="Month interval")
|
|
869
|
+
add("teradata", "INTERVAL DAY", "StringType", "all", notes="Day interval")
|
|
870
|
+
add("teradata", "INTERVAL HOUR", "StringType", "all", notes="Hour interval")
|
|
871
|
+
add("teradata", "INTERVAL MINUTE", "StringType", "all", notes="Minute interval")
|
|
872
|
+
add("teradata", "INTERVAL SECOND", "StringType", "all", notes="Second interval")
|
|
873
|
+
add("teradata", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month")
|
|
874
|
+
add("teradata", "INTERVAL DAY TO HOUR", "StringType", "all", notes="Day-hour")
|
|
875
|
+
add("teradata", "INTERVAL DAY TO MINUTE", "StringType", "all", notes="Day-minute")
|
|
876
|
+
add("teradata", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-second")
|
|
877
|
+
add("teradata", "INTERVAL HOUR TO MINUTE", "StringType", "all", notes="Hour-minute")
|
|
878
|
+
add("teradata", "INTERVAL HOUR TO SECOND", "StringType", "all", notes="Hour-second")
|
|
879
|
+
add("teradata", "INTERVAL MINUTE TO SECOND", "StringType", "all", notes="Minute-second")
|
|
880
|
+
|
|
881
|
+
# Period
|
|
882
|
+
add("teradata", "PERIOD(DATE)", "StringType", "all", notes="Date period")
|
|
883
|
+
add("teradata", "PERIOD(TIME)", "StringType", "all", notes="Time period")
|
|
884
|
+
add("teradata", "PERIOD(TIMESTAMP)", "StringType", "all", notes="Timestamp period")
|
|
885
|
+
|
|
886
|
+
# JSON
|
|
887
|
+
add("teradata", "JSON", "StringType", "3.x", notes="JSON (Spark 3.x)")
|
|
888
|
+
add("teradata", "JSON", "VariantType", "4.x", notes="JSON (Spark 4.x)")
|
|
889
|
+
|
|
890
|
+
# XML
|
|
891
|
+
add("teradata", "XML", "StringType", "all", notes="XML document")
|
|
892
|
+
|
|
893
|
+
# Geospatial
|
|
894
|
+
add("teradata", "ST_GEOMETRY", "BinaryType", "all", notes="Geometry")
|
|
895
|
+
add("teradata", "MBR", "BinaryType", "all", notes="Minimum bounding rectangle")
|
|
896
|
+
|
|
897
|
+
# =============================================================================
|
|
898
|
+
# VERTICA
|
|
899
|
+
# =============================================================================
|
|
900
|
+
def add_vertica_types():
|
|
901
|
+
# Integer types
|
|
902
|
+
add("vertica", "INTEGER", "IntegerType", "all", notes="32-bit or 64-bit (precision)")
|
|
903
|
+
add("vertica", "INT", "IntegerType", "all", notes="Alias for INTEGER")
|
|
904
|
+
add("vertica", "BIGINT", "LongType", "all", notes="64-bit signed")
|
|
905
|
+
add("vertica", "INT8", "LongType", "all", notes="Alias for BIGINT")
|
|
906
|
+
add("vertica", "SMALLINT", "ShortType", "all", notes="16-bit signed")
|
|
907
|
+
add("vertica", "TINYINT", "ByteType", "all", notes="8-bit signed")
|
|
908
|
+
|
|
909
|
+
# Decimal
|
|
910
|
+
add("vertica", "NUMERIC", "DecimalType", "all", notes="Exact numeric")
|
|
911
|
+
add("vertica", "DECIMAL", "DecimalType", "all", notes="Alias for NUMERIC")
|
|
912
|
+
add("vertica", "NUMBER", "DecimalType", "all", notes="Alias for NUMERIC")
|
|
913
|
+
add("vertica", "MONEY", "DecimalType", "all", notes="Currency type")
|
|
914
|
+
|
|
915
|
+
# Floating point
|
|
916
|
+
add("vertica", "DOUBLE PRECISION", "DoubleType", "all", notes="64-bit float")
|
|
917
|
+
add("vertica", "FLOAT", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
918
|
+
add("vertica", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
919
|
+
add("vertica", "REAL", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
920
|
+
|
|
921
|
+
# Boolean
|
|
922
|
+
add("vertica", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
|
|
923
|
+
|
|
924
|
+
# Character
|
|
925
|
+
add("vertica", "CHAR", "StringType", "all", notes="Fixed-length (65000)")
|
|
926
|
+
add("vertica", "VARCHAR", "StringType", "all", notes="Variable-length (65000)")
|
|
927
|
+
add("vertica", "LONG VARCHAR", "StringType", "all", notes="Extended varchar")
|
|
928
|
+
|
|
929
|
+
# Binary
|
|
930
|
+
add("vertica", "BINARY", "BinaryType", "all", notes="Fixed-length binary")
|
|
931
|
+
add("vertica", "VARBINARY", "BinaryType", "all", notes="Variable binary")
|
|
932
|
+
add("vertica", "LONG VARBINARY", "BinaryType", "all", notes="Extended binary")
|
|
933
|
+
add("vertica", "BYTEA", "BinaryType", "all", notes="Alias for VARBINARY")
|
|
934
|
+
add("vertica", "RAW", "BinaryType", "all", notes="Alias for VARBINARY")
|
|
935
|
+
|
|
936
|
+
# Date/Time
|
|
937
|
+
add("vertica", "DATE", "DateType", "all", notes="Calendar date")
|
|
938
|
+
add("vertica", "TIME", "StringType", "all", notes="Time of day")
|
|
939
|
+
add("vertica", "TIME WITH TIMEZONE", "StringType", "all", notes="Time with TZ")
|
|
940
|
+
add("vertica", "TIMETZ", "StringType", "all", notes="Alias for TIME WITH TZ")
|
|
941
|
+
add("vertica", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
|
|
942
|
+
add("vertica", "TIMESTAMP WITH TIMEZONE", "TimestampType", "all", notes="With TZ")
|
|
943
|
+
add("vertica", "TIMESTAMPTZ", "TimestampType", "all", notes="Alias for WITH TZ")
|
|
944
|
+
add("vertica", "DATETIME", "TimestampType", "all", notes="Alias for TIMESTAMP")
|
|
945
|
+
add("vertica", "SMALLDATETIME", "TimestampType", "all", notes="Minute precision")
|
|
946
|
+
|
|
947
|
+
# Interval
|
|
948
|
+
add("vertica", "INTERVAL", "StringType", "all", notes="Time interval")
|
|
949
|
+
add("vertica", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time")
|
|
950
|
+
add("vertica", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month")
|
|
951
|
+
|
|
952
|
+
# UUID
|
|
953
|
+
add("vertica", "UUID", "StringType", "all", notes="UUID value")
|
|
954
|
+
|
|
955
|
+
# Complex types
|
|
956
|
+
add("vertica", "ARRAY", "ArrayType", "all", True, notes="Array type")
|
|
957
|
+
add("vertica", "SET", "ArrayType", "all", True, notes="Set type")
|
|
958
|
+
add("vertica", "ROW", "StructType", "all", True, notes="Row type")
|
|
959
|
+
add("vertica", "MAP", "MapType", "all", True, notes="Map type")
|
|
960
|
+
|
|
961
|
+
# Geospatial
|
|
962
|
+
add("vertica", "GEOMETRY", "BinaryType", "all", notes="Geometry")
|
|
963
|
+
add("vertica", "GEOGRAPHY", "BinaryType", "all", notes="Geography")
|
|
964
|
+
|
|
965
|
+
# =============================================================================
|
|
966
|
+
# HIVE
|
|
967
|
+
# =============================================================================
|
|
968
|
+
def add_hive_types():
|
|
969
|
+
# Numeric types
|
|
970
|
+
add("hive", "TINYINT", "ByteType", "all", notes="8-bit signed")
|
|
971
|
+
add("hive", "SMALLINT", "ShortType", "all", notes="16-bit signed")
|
|
972
|
+
add("hive", "INT", "IntegerType", "all", notes="32-bit signed")
|
|
973
|
+
add("hive", "INTEGER", "IntegerType", "all", notes="Alias for INT")
|
|
974
|
+
add("hive", "BIGINT", "LongType", "all", notes="64-bit signed")
|
|
975
|
+
|
|
976
|
+
# Floating point
|
|
977
|
+
add("hive", "FLOAT", "FloatType", "all", notes="32-bit IEEE")
|
|
978
|
+
add("hive", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE")
|
|
979
|
+
add("hive", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
980
|
+
|
|
981
|
+
# Decimal
|
|
982
|
+
add("hive", "DECIMAL", "DecimalType", "all", notes="Fixed-point decimal")
|
|
983
|
+
add("hive", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
984
|
+
|
|
985
|
+
# String
|
|
986
|
+
add("hive", "STRING", "StringType", "all", notes="Unbounded string")
|
|
987
|
+
add("hive", "VARCHAR", "StringType", "all", notes="Variable-length (65535)")
|
|
988
|
+
add("hive", "CHAR", "StringType", "all", notes="Fixed-length (255)")
|
|
989
|
+
|
|
990
|
+
# Binary
|
|
991
|
+
add("hive", "BINARY", "BinaryType", "all", notes="Binary data")
|
|
992
|
+
|
|
993
|
+
# Boolean
|
|
994
|
+
add("hive", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
|
|
995
|
+
|
|
996
|
+
# Date/Time
|
|
997
|
+
add("hive", "DATE", "DateType", "all", notes="Calendar date")
|
|
998
|
+
add("hive", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
|
|
999
|
+
add("hive", "INTERVAL", "StringType", "all", notes="Time interval")
|
|
1000
|
+
|
|
1001
|
+
# Complex types
|
|
1002
|
+
add("hive", "ARRAY", "ArrayType", "all", True, notes="Array type")
|
|
1003
|
+
add("hive", "MAP", "MapType", "all", True, notes="Key-value map")
|
|
1004
|
+
add("hive", "STRUCT", "StructType", "all", True, notes="Struct type")
|
|
1005
|
+
add("hive", "UNIONTYPE", "StructType", "all", True, notes="Union type")
|
|
1006
|
+
|
|
1007
|
+
# =============================================================================
|
|
1008
|
+
# DB2
|
|
1009
|
+
# =============================================================================
|
|
1010
|
+
def add_db2_types():
|
|
1011
|
+
# Integer types
|
|
1012
|
+
add("db2", "SMALLINT", "ShortType", "all", notes="16-bit signed")
|
|
1013
|
+
add("db2", "INTEGER", "IntegerType", "all", notes="32-bit signed")
|
|
1014
|
+
add("db2", "INT", "IntegerType", "all", notes="Alias for INTEGER")
|
|
1015
|
+
add("db2", "BIGINT", "LongType", "all", notes="64-bit signed")
|
|
1016
|
+
|
|
1017
|
+
# Decimal
|
|
1018
|
+
add("db2", "DECIMAL", "DecimalType", "all", notes="Exact numeric (31,31)")
|
|
1019
|
+
add("db2", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
1020
|
+
add("db2", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
1021
|
+
add("db2", "NUM", "DecimalType", "all", notes="Alias for DECIMAL")
|
|
1022
|
+
|
|
1023
|
+
# Floating point
|
|
1024
|
+
add("db2", "REAL", "FloatType", "all", notes="32-bit IEEE")
|
|
1025
|
+
add("db2", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE")
|
|
1026
|
+
add("db2", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
1027
|
+
add("db2", "FLOAT", "DoubleType", "all", notes="Alias for DOUBLE")
|
|
1028
|
+
add("db2", "DECFLOAT", "DecimalType", "all", notes="Decimal floating point")
|
|
1029
|
+
|
|
1030
|
+
# Character
|
|
1031
|
+
add("db2", "CHAR", "StringType", "all", notes="Fixed-length (254)")
|
|
1032
|
+
add("db2", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
|
|
1033
|
+
add("db2", "VARCHAR", "StringType", "all", notes="Variable-length (32672)")
|
|
1034
|
+
add("db2", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
|
|
1035
|
+
add("db2", "LONG VARCHAR", "StringType", "all", notes="Long varchar (32700)")
|
|
1036
|
+
add("db2", "CLOB", "StringType", "all", notes="Character LOB (2GB)")
|
|
1037
|
+
add("db2", "DBCLOB", "StringType", "all", notes="Double-byte CLOB")
|
|
1038
|
+
|
|
1039
|
+
# Graphic (DBCS)
|
|
1040
|
+
add("db2", "GRAPHIC", "StringType", "all", notes="Fixed DBCS (127)")
|
|
1041
|
+
add("db2", "VARGRAPHIC", "StringType", "all", notes="Variable DBCS (16336)")
|
|
1042
|
+
add("db2", "LONG VARGRAPHIC", "StringType", "all", notes="Long DBCS (16350)")
|
|
1043
|
+
|
|
1044
|
+
# Binary
|
|
1045
|
+
add("db2", "BINARY", "BinaryType", "all", notes="Fixed-length (254)")
|
|
1046
|
+
add("db2", "VARBINARY", "BinaryType", "all", notes="Variable binary (32672)")
|
|
1047
|
+
add("db2", "BLOB", "BinaryType", "all", notes="Binary LOB (2GB)")
|
|
1048
|
+
|
|
1049
|
+
# Boolean - Spark 4.0 changed DB2 BOOLEAN mapping
|
|
1050
|
+
add("db2", "BOOLEAN", "StringType", "3.x", notes="Boolean as CHAR(1) (Spark 3.x)")
|
|
1051
|
+
add("db2", "BOOLEAN", "BooleanType", "4.x", notes="Boolean (Spark 4.x)")
|
|
1052
|
+
|
|
1053
|
+
# Date/Time
|
|
1054
|
+
add("db2", "DATE", "DateType", "all", notes="Calendar date")
|
|
1055
|
+
add("db2", "TIME", "StringType", "all", notes="Time of day")
|
|
1056
|
+
add("db2", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
|
|
1057
|
+
|
|
1058
|
+
# XML
|
|
1059
|
+
add("db2", "XML", "StringType", "all", notes="XML document")
|
|
1060
|
+
|
|
1061
|
+
# Row ID
|
|
1062
|
+
add("db2", "ROWID", "BinaryType", "all", notes="Row identifier")
|
|
1063
|
+
|
|
1064
|
+
# =============================================================================
|
|
1065
|
+
# SQLITE
|
|
1066
|
+
# =============================================================================
|
|
1067
|
+
def add_sqlite_types():
|
|
1068
|
+
# SQLite has dynamic typing with 5 storage classes
|
|
1069
|
+
# INTEGER
|
|
1070
|
+
add("sqlite", "INTEGER", "LongType", "all", notes="64-bit signed integer")
|
|
1071
|
+
add("sqlite", "INT", "LongType", "all", notes="Alias for INTEGER")
|
|
1072
|
+
add("sqlite", "TINYINT", "LongType", "all", notes="Stored as INTEGER")
|
|
1073
|
+
add("sqlite", "SMALLINT", "LongType", "all", notes="Stored as INTEGER")
|
|
1074
|
+
add("sqlite", "MEDIUMINT", "LongType", "all", notes="Stored as INTEGER")
|
|
1075
|
+
add("sqlite", "BIGINT", "LongType", "all", notes="Stored as INTEGER")
|
|
1076
|
+
add("sqlite", "UNSIGNED BIG INT", "LongType", "all", notes="Stored as INTEGER")
|
|
1077
|
+
add("sqlite", "INT2", "LongType", "all", notes="Stored as INTEGER")
|
|
1078
|
+
add("sqlite", "INT8", "LongType", "all", notes="Stored as INTEGER")
|
|
1079
|
+
|
|
1080
|
+
# REAL
|
|
1081
|
+
add("sqlite", "REAL", "DoubleType", "all", notes="64-bit IEEE float")
|
|
1082
|
+
add("sqlite", "DOUBLE", "DoubleType", "all", notes="Stored as REAL")
|
|
1083
|
+
add("sqlite", "DOUBLE PRECISION", "DoubleType", "all", notes="Stored as REAL")
|
|
1084
|
+
add("sqlite", "FLOAT", "DoubleType", "all", notes="Stored as REAL")
|
|
1085
|
+
|
|
1086
|
+
# TEXT
|
|
1087
|
+
add("sqlite", "TEXT", "StringType", "all", notes="Variable-length string")
|
|
1088
|
+
add("sqlite", "CHARACTER", "StringType", "all", notes="Stored as TEXT")
|
|
1089
|
+
add("sqlite", "VARCHAR", "StringType", "all", notes="Stored as TEXT")
|
|
1090
|
+
add("sqlite", "VARYING CHARACTER", "StringType", "all", notes="Stored as TEXT")
|
|
1091
|
+
add("sqlite", "NCHAR", "StringType", "all", notes="Stored as TEXT")
|
|
1092
|
+
add("sqlite", "NATIVE CHARACTER", "StringType", "all", notes="Stored as TEXT")
|
|
1093
|
+
add("sqlite", "NVARCHAR", "StringType", "all", notes="Stored as TEXT")
|
|
1094
|
+
add("sqlite", "CLOB", "StringType", "all", notes="Stored as TEXT")
|
|
1095
|
+
|
|
1096
|
+
# BLOB
|
|
1097
|
+
add("sqlite", "BLOB", "BinaryType", "all", notes="Binary data")
|
|
1098
|
+
|
|
1099
|
+
# NUMERIC (affinity)
|
|
1100
|
+
add("sqlite", "NUMERIC", "DecimalType", "all", notes="Numeric affinity")
|
|
1101
|
+
add("sqlite", "DECIMAL", "DecimalType", "all", notes="Stored as NUMERIC")
|
|
1102
|
+
add("sqlite", "BOOLEAN", "BooleanType", "all", notes="Stored as NUMERIC (0/1)")
|
|
1103
|
+
add("sqlite", "DATE", "DateType", "all", notes="Stored as TEXT/REAL/INT")
|
|
1104
|
+
add("sqlite", "DATETIME", "TimestampType", "all", notes="Stored as TEXT/REAL/INT")
|
|
1105
|
+
|
|
1106
|
+
# JSON (stored as TEXT in SQLite, but parsed)
|
|
1107
|
+
add("sqlite", "JSON", "StringType", "3.x", notes="JSON as TEXT (Spark 3.x)")
|
|
1108
|
+
add("sqlite", "JSON", "VariantType", "4.x", notes="JSON (Spark 4.x)")
|
|
1109
|
+
|
|
1110
|
+
# =============================================================================
|
|
1111
|
+
# SPARK (native types for completeness)
|
|
1112
|
+
# =============================================================================
|
|
1113
|
+
def add_spark_types():
|
|
1114
|
+
"""Native Spark types for Spark-to-Spark operations."""
|
|
1115
|
+
add("spark", "ByteType", "ByteType", "all", notes="8-bit signed")
|
|
1116
|
+
add("spark", "ShortType", "ShortType", "all", notes="16-bit signed")
|
|
1117
|
+
add("spark", "IntegerType", "IntegerType", "all", notes="32-bit signed")
|
|
1118
|
+
add("spark", "LongType", "LongType", "all", notes="64-bit signed")
|
|
1119
|
+
add("spark", "FloatType", "FloatType", "all", notes="32-bit float")
|
|
1120
|
+
add("spark", "DoubleType", "DoubleType", "all", notes="64-bit float")
|
|
1121
|
+
add("spark", "DecimalType", "DecimalType", "all", notes="Arbitrary precision")
|
|
1122
|
+
add("spark", "StringType", "StringType", "all", notes="UTF-8 string")
|
|
1123
|
+
add("spark", "BinaryType", "BinaryType", "all", notes="Byte array")
|
|
1124
|
+
add("spark", "BooleanType", "BooleanType", "all", notes="Boolean")
|
|
1125
|
+
add("spark", "DateType", "DateType", "all", notes="Date")
|
|
1126
|
+
add("spark", "TimestampType", "TimestampType", "all", notes="Timestamp")
|
|
1127
|
+
add("spark", "TimestampNTZType", "TimestampNTZType", "4.x", notes="No TZ (Spark 3.4+)")
|
|
1128
|
+
add("spark", "ArrayType", "ArrayType", "all", True, notes="Array")
|
|
1129
|
+
add("spark", "MapType", "MapType", "all", True, notes="Map")
|
|
1130
|
+
add("spark", "StructType", "StructType", "all", True, notes="Struct")
|
|
1131
|
+
add("spark", "VariantType", "VariantType", "4.x", notes="Variant (Spark 4.0+)")
|
|
1132
|
+
add("spark", "YearMonthIntervalType", "YearMonthIntervalType", "all", notes="Year-month interval")
|
|
1133
|
+
add("spark", "DayTimeIntervalType", "DayTimeIntervalType", "all", notes="Day-time interval")
|
|
1134
|
+
add("spark", "NullType", "NullType", "all", notes="Null type")
|
|
1135
|
+
add("spark", "CalendarIntervalType", "CalendarIntervalType", "all", notes="Calendar interval")
|
|
1136
|
+
|
|
1137
|
+
# =============================================================================
|
|
1138
|
+
# MAIN BUILD FUNCTION
|
|
1139
|
+
# =============================================================================
|
|
1140
|
+
def build_registry():
|
|
1141
|
+
"""Build all type mappings."""
|
|
1142
|
+
print("Building comprehensive datatype mappings...")
|
|
1143
|
+
|
|
1144
|
+
# Add all adapters
|
|
1145
|
+
add_postgres_types()
|
|
1146
|
+
print(f" + postgres: {len([m for m in MAPPINGS if m[0] == 'postgres'])} types")
|
|
1147
|
+
|
|
1148
|
+
add_mysql_types()
|
|
1149
|
+
print(f" + mysql: {len([m for m in MAPPINGS if m[0] == 'mysql'])} types")
|
|
1150
|
+
|
|
1151
|
+
add_bigquery_types()
|
|
1152
|
+
print(f" + bigquery: {len([m for m in MAPPINGS if m[0] == 'bigquery'])} types")
|
|
1153
|
+
|
|
1154
|
+
add_snowflake_types()
|
|
1155
|
+
print(f" + snowflake: {len([m for m in MAPPINGS if m[0] == 'snowflake'])} types")
|
|
1156
|
+
|
|
1157
|
+
add_redshift_types()
|
|
1158
|
+
print(f" + redshift: {len([m for m in MAPPINGS if m[0] == 'redshift'])} types")
|
|
1159
|
+
|
|
1160
|
+
add_databricks_types()
|
|
1161
|
+
print(f" + databricks: {len([m for m in MAPPINGS if m[0] == 'databricks'])} types")
|
|
1162
|
+
|
|
1163
|
+
add_oracle_types()
|
|
1164
|
+
print(f" + oracle: {len([m for m in MAPPINGS if m[0] == 'oracle'])} types")
|
|
1165
|
+
|
|
1166
|
+
add_sqlserver_types()
|
|
1167
|
+
print(f" + sqlserver: {len([m for m in MAPPINGS if m[0] == 'sqlserver'])} types")
|
|
1168
|
+
|
|
1169
|
+
add_clickhouse_types()
|
|
1170
|
+
print(f" + clickhouse: {len([m for m in MAPPINGS if m[0] == 'clickhouse'])} types")
|
|
1171
|
+
|
|
1172
|
+
add_trino_types()
|
|
1173
|
+
print(f" + trino: {len([m for m in MAPPINGS if m[0] == 'trino'])} types")
|
|
1174
|
+
|
|
1175
|
+
add_athena_types()
|
|
1176
|
+
print(f" + athena: {len([m for m in MAPPINGS if m[0] == 'athena'])} types")
|
|
1177
|
+
|
|
1178
|
+
add_duckdb_types()
|
|
1179
|
+
print(f" + duckdb: {len([m for m in MAPPINGS if m[0] == 'duckdb'])} types")
|
|
1180
|
+
|
|
1181
|
+
add_teradata_types()
|
|
1182
|
+
print(f" + teradata: {len([m for m in MAPPINGS if m[0] == 'teradata'])} types")
|
|
1183
|
+
|
|
1184
|
+
add_vertica_types()
|
|
1185
|
+
print(f" + vertica: {len([m for m in MAPPINGS if m[0] == 'vertica'])} types")
|
|
1186
|
+
|
|
1187
|
+
add_hive_types()
|
|
1188
|
+
print(f" + hive: {len([m for m in MAPPINGS if m[0] == 'hive'])} types")
|
|
1189
|
+
|
|
1190
|
+
add_db2_types()
|
|
1191
|
+
print(f" + db2: {len([m for m in MAPPINGS if m[0] == 'db2'])} types")
|
|
1192
|
+
|
|
1193
|
+
add_sqlite_types()
|
|
1194
|
+
print(f" + sqlite: {len([m for m in MAPPINGS if m[0] == 'sqlite'])} types")
|
|
1195
|
+
|
|
1196
|
+
add_spark_types()
|
|
1197
|
+
print(f" + spark: {len([m for m in MAPPINGS if m[0] == 'spark'])} types")
|
|
1198
|
+
|
|
1199
|
+
print(f"\nTotal: {len(MAPPINGS)} type mappings across {len(set(m[0] for m in MAPPINGS))} adapters")
|
|
1200
|
+
|
|
1201
|
+
return MAPPINGS
|
|
1202
|
+
|
|
1203
|
+
|
|
1204
|
+
def save_to_duckdb(db_path: str):
|
|
1205
|
+
"""Save mappings to DuckDB."""
|
|
1206
|
+
mappings = build_registry()
|
|
1207
|
+
|
|
1208
|
+
conn = duckdb.connect(db_path)
|
|
1209
|
+
|
|
1210
|
+
# Drop and recreate table
|
|
1211
|
+
conn.execute("DROP TABLE IF EXISTS datatype_mappings")
|
|
1212
|
+
conn.execute("""
|
|
1213
|
+
CREATE TABLE datatype_mappings (
|
|
1214
|
+
adapter_name VARCHAR,
|
|
1215
|
+
adapter_type VARCHAR,
|
|
1216
|
+
spark_type VARCHAR,
|
|
1217
|
+
spark_version VARCHAR,
|
|
1218
|
+
is_complex BOOLEAN,
|
|
1219
|
+
cast_expression VARCHAR,
|
|
1220
|
+
notes VARCHAR
|
|
1221
|
+
)
|
|
1222
|
+
""")
|
|
1223
|
+
|
|
1224
|
+
# Insert all mappings
|
|
1225
|
+
conn.executemany(
|
|
1226
|
+
"INSERT INTO datatype_mappings VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
1227
|
+
mappings
|
|
1228
|
+
)
|
|
1229
|
+
|
|
1230
|
+
# Verify
|
|
1231
|
+
count = conn.execute("SELECT COUNT(*) FROM datatype_mappings").fetchone()[0]
|
|
1232
|
+
adapters = conn.execute("SELECT DISTINCT adapter_name FROM datatype_mappings ORDER BY adapter_name").fetchall()
|
|
1233
|
+
|
|
1234
|
+
print(f"\nSaved to {db_path}")
|
|
1235
|
+
print(f" - {count} total mappings")
|
|
1236
|
+
print(f" - {len(adapters)} adapters: {', '.join(a[0] for a in adapters)}")
|
|
1237
|
+
|
|
1238
|
+
# Version-specific stats
|
|
1239
|
+
v3_count = conn.execute("SELECT COUNT(*) FROM datatype_mappings WHERE spark_version = '3.x'").fetchone()[0]
|
|
1240
|
+
v4_count = conn.execute("SELECT COUNT(*) FROM datatype_mappings WHERE spark_version = '4.x'").fetchone()[0]
|
|
1241
|
+
all_count = conn.execute("SELECT COUNT(*) FROM datatype_mappings WHERE spark_version = 'all'").fetchone()[0]
|
|
1242
|
+
print(f" - Version-specific: {v3_count} for Spark 3.x, {v4_count} for Spark 4.x, {all_count} for all versions")
|
|
1243
|
+
|
|
1244
|
+
conn.close()
|
|
1245
|
+
|
|
1246
|
+
|
|
1247
|
+
if __name__ == "__main__":
|
|
1248
|
+
import sys
|
|
1249
|
+
|
|
1250
|
+
db_path = "adapters_registry.duckdb"
|
|
1251
|
+
if len(sys.argv) > 1:
|
|
1252
|
+
db_path = sys.argv[1]
|
|
1253
|
+
|
|
1254
|
+
save_to_duckdb(db_path)
|