dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1254 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Build comprehensive datatype_mappings for DVT.
4
+
5
+ This script builds version-aware type mappings for all major dbt adapters.
6
+ Spark versions: 3.x (3.0-3.5), 4.x (4.0+)
7
+
8
+ Key changes in Spark 4.0:
9
+ - MySQL: SMALLINT -> ShortType (was IntegerType), FLOAT -> FloatType (was DoubleType)
10
+ - PostgreSQL: TIMESTAMP WITH TIME ZONE handling changed
11
+ - New VARIANT type for semi-structured data
12
+ - Spark 3.4+: TIMESTAMP_NTZ support
13
+ """
14
+
15
+ import duckdb
16
+ from pathlib import Path
17
+
18
+ # All mappings: (adapter, adapter_type, spark_type, spark_version, is_complex, cast_expr, notes)
19
+ MAPPINGS = []
20
+
21
+ def add(adapter: str, adapter_type: str, spark_type: str,
22
+ spark_version: str = "all", is_complex: bool = False,
23
+ cast_expr: str = None, notes: str = ""):
24
+ """Add a type mapping."""
25
+ MAPPINGS.append((adapter, adapter_type, spark_type, spark_version, is_complex, cast_expr, notes))
26
+
27
+ # =============================================================================
28
+ # POSTGRES (and PostgreSQL-compatible: AlloyDB, Materialize, TimescaleDB, CrateDB)
29
+ # =============================================================================
30
+ def add_postgres_types():
31
+ # Numeric types
32
+ add("postgres", "SMALLINT", "ShortType", "all", notes="16-bit signed integer")
33
+ add("postgres", "INT2", "ShortType", "all", notes="Alias for SMALLINT")
34
+ add("postgres", "INTEGER", "IntegerType", "all", notes="32-bit signed integer")
35
+ add("postgres", "INT", "IntegerType", "all", notes="Alias for INTEGER")
36
+ add("postgres", "INT4", "IntegerType", "all", notes="Alias for INTEGER")
37
+ add("postgres", "BIGINT", "LongType", "all", notes="64-bit signed integer")
38
+ add("postgres", "INT8", "LongType", "all", notes="Alias for BIGINT")
39
+ add("postgres", "SERIAL", "IntegerType", "all", notes="Auto-incrementing integer")
40
+ add("postgres", "BIGSERIAL", "LongType", "all", notes="Auto-incrementing bigint")
41
+ add("postgres", "SMALLSERIAL", "ShortType", "all", notes="Auto-incrementing smallint")
42
+
43
+ # Decimal/Numeric
44
+ add("postgres", "DECIMAL", "DecimalType", "all", notes="Exact numeric with precision")
45
+ add("postgres", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
46
+ add("postgres", "MONEY", "DecimalType", "all", notes="Currency amount")
47
+
48
+ # Floating point
49
+ add("postgres", "REAL", "FloatType", "all", notes="32-bit floating point")
50
+ add("postgres", "FLOAT4", "FloatType", "all", notes="Alias for REAL")
51
+ add("postgres", "DOUBLE PRECISION", "DoubleType", "all", notes="64-bit floating point")
52
+ add("postgres", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE PRECISION")
53
+ add("postgres", "FLOAT", "DoubleType", "all", notes="Floating point (precision-dependent)")
54
+
55
+ # Character types
56
+ add("postgres", "VARCHAR", "StringType", "all", notes="Variable-length string")
57
+ add("postgres", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
58
+ add("postgres", "CHAR", "StringType", "all", notes="Fixed-length string")
59
+ add("postgres", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
60
+ add("postgres", "TEXT", "StringType", "all", notes="Unlimited length string")
61
+ add("postgres", "BPCHAR", "StringType", "all", notes="Blank-padded character")
62
+ add("postgres", "NAME", "StringType", "all", notes="Internal name type")
63
+
64
+ # Binary
65
+ add("postgres", "BYTEA", "BinaryType", "all", notes="Binary data")
66
+
67
+ # Boolean
68
+ add("postgres", "BOOLEAN", "BooleanType", "all", notes="Boolean true/false")
69
+ add("postgres", "BOOL", "BooleanType", "all", notes="Alias for BOOLEAN")
70
+
71
+ # Date/Time - Version specific for Spark 4.0
72
+ add("postgres", "DATE", "DateType", "all", notes="Calendar date")
73
+ add("postgres", "TIME", "StringType", "all", notes="Time of day (no timezone)")
74
+ add("postgres", "TIME WITHOUT TIME ZONE", "StringType", "all", notes="Time without timezone")
75
+ add("postgres", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with timezone")
76
+ add("postgres", "TIMETZ", "StringType", "all", notes="Alias for TIME WITH TIME ZONE")
77
+
78
+ # Timestamp handling changed in Spark 4.0
79
+ add("postgres", "TIMESTAMP", "TimestampType", "3.x", notes="Timestamp without timezone (Spark 3.x)")
80
+ add("postgres", "TIMESTAMP", "TimestampNTZType", "4.x", notes="Timestamp without timezone (Spark 4.x)")
81
+ add("postgres", "TIMESTAMP WITHOUT TIME ZONE", "TimestampType", "3.x", notes="Explicit no timezone (Spark 3.x)")
82
+ add("postgres", "TIMESTAMP WITHOUT TIME ZONE", "TimestampNTZType", "4.x", notes="Explicit no timezone (Spark 4.x)")
83
+ add("postgres", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="Timestamp with timezone")
84
+ add("postgres", "TIMESTAMPTZ", "TimestampType", "all", notes="Alias for TIMESTAMP WITH TIME ZONE")
85
+
86
+ # Interval
87
+ add("postgres", "INTERVAL", "StringType", "all", notes="Time interval")
88
+
89
+ # UUID
90
+ add("postgres", "UUID", "StringType", "all", notes="Universally unique identifier")
91
+
92
+ # JSON types
93
+ add("postgres", "JSON", "StringType", "3.x", notes="JSON data (Spark 3.x)")
94
+ add("postgres", "JSON", "VariantType", "4.x", notes="JSON data (Spark 4.x with VARIANT)")
95
+ add("postgres", "JSONB", "StringType", "3.x", notes="Binary JSON (Spark 3.x)")
96
+ add("postgres", "JSONB", "VariantType", "4.x", notes="Binary JSON (Spark 4.x with VARIANT)")
97
+
98
+ # Array types
99
+ add("postgres", "ARRAY", "ArrayType", "all", True, notes="Array of any type")
100
+ add("postgres", "_INT4", "ArrayType", "all", True, notes="Integer array")
101
+ add("postgres", "_TEXT", "ArrayType", "all", True, notes="Text array")
102
+ add("postgres", "_VARCHAR", "ArrayType", "all", True, notes="Varchar array")
103
+
104
+ # Geometric types (store as string)
105
+ add("postgres", "POINT", "StringType", "all", notes="Geometric point")
106
+ add("postgres", "LINE", "StringType", "all", notes="Infinite line")
107
+ add("postgres", "LSEG", "StringType", "all", notes="Line segment")
108
+ add("postgres", "BOX", "StringType", "all", notes="Rectangular box")
109
+ add("postgres", "PATH", "StringType", "all", notes="Geometric path")
110
+ add("postgres", "POLYGON", "StringType", "all", notes="Polygon")
111
+ add("postgres", "CIRCLE", "StringType", "all", notes="Circle")
112
+
113
+ # Network types
114
+ add("postgres", "INET", "StringType", "all", notes="IPv4 or IPv6 address")
115
+ add("postgres", "CIDR", "StringType", "all", notes="IPv4 or IPv6 network")
116
+ add("postgres", "MACADDR", "StringType", "all", notes="MAC address")
117
+ add("postgres", "MACADDR8", "StringType", "all", notes="MAC address (EUI-64)")
118
+
119
+ # Bit string
120
+ add("postgres", "BIT", "BinaryType", "all", notes="Fixed-length bit string")
121
+ add("postgres", "BIT VARYING", "BinaryType", "all", notes="Variable-length bit string")
122
+ add("postgres", "VARBIT", "BinaryType", "all", notes="Alias for BIT VARYING")
123
+
124
+ # Text search
125
+ add("postgres", "TSVECTOR", "StringType", "all", notes="Text search vector")
126
+ add("postgres", "TSQUERY", "StringType", "all", notes="Text search query")
127
+
128
+ # Range types
129
+ add("postgres", "INT4RANGE", "StringType", "all", notes="Integer range")
130
+ add("postgres", "INT8RANGE", "StringType", "all", notes="Bigint range")
131
+ add("postgres", "NUMRANGE", "StringType", "all", notes="Numeric range")
132
+ add("postgres", "TSRANGE", "StringType", "all", notes="Timestamp range")
133
+ add("postgres", "TSTZRANGE", "StringType", "all", notes="Timestamp with timezone range")
134
+ add("postgres", "DATERANGE", "StringType", "all", notes="Date range")
135
+
136
+ # Other
137
+ add("postgres", "OID", "LongType", "all", notes="Object identifier")
138
+ add("postgres", "REGCLASS", "StringType", "all", notes="Registered class")
139
+ add("postgres", "XML", "StringType", "all", notes="XML data")
140
+
141
+ # =============================================================================
142
+ # MYSQL (with Spark 4.0 version-specific changes)
143
+ # =============================================================================
144
+ def add_mysql_types():
145
+ # Integer types - Spark 4.0 changed SMALLINT mapping
146
+ add("mysql", "TINYINT", "ByteType", "all", notes="8-bit signed integer")
147
+ add("mysql", "TINYINT UNSIGNED", "ShortType", "all", notes="8-bit unsigned integer")
148
+ add("mysql", "SMALLINT", "IntegerType", "3.x", notes="16-bit integer (Spark 3.x reads as INT)")
149
+ add("mysql", "SMALLINT", "ShortType", "4.x", notes="16-bit integer (Spark 4.x reads as SHORT)")
150
+ add("mysql", "SMALLINT UNSIGNED", "IntegerType", "all", notes="16-bit unsigned integer")
151
+ add("mysql", "MEDIUMINT", "IntegerType", "all", notes="24-bit signed integer")
152
+ add("mysql", "MEDIUMINT UNSIGNED", "LongType", "3.x", notes="24-bit unsigned (Spark 3.x)")
153
+ add("mysql", "MEDIUMINT UNSIGNED", "IntegerType", "4.x", notes="24-bit unsigned (Spark 4.x)")
154
+ add("mysql", "INT", "IntegerType", "all", notes="32-bit signed integer")
155
+ add("mysql", "INTEGER", "IntegerType", "all", notes="Alias for INT")
156
+ add("mysql", "INT UNSIGNED", "LongType", "all", notes="32-bit unsigned integer")
157
+ add("mysql", "BIGINT", "LongType", "all", notes="64-bit signed integer")
158
+ add("mysql", "BIGINT UNSIGNED", "DecimalType", "all", notes="64-bit unsigned (needs Decimal)")
159
+
160
+ # Floating point - Spark 4.0 changed FLOAT mapping
161
+ add("mysql", "FLOAT", "DoubleType", "3.x", notes="32-bit float (Spark 3.x reads as DOUBLE)")
162
+ add("mysql", "FLOAT", "FloatType", "4.x", notes="32-bit float (Spark 4.x reads as FLOAT)")
163
+ add("mysql", "DOUBLE", "DoubleType", "all", notes="64-bit floating point")
164
+ add("mysql", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
165
+ add("mysql", "REAL", "DoubleType", "all", notes="Alias for DOUBLE")
166
+
167
+ # Decimal
168
+ add("mysql", "DECIMAL", "DecimalType", "all", notes="Exact numeric")
169
+ add("mysql", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
170
+ add("mysql", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
171
+ add("mysql", "FIXED", "DecimalType", "all", notes="Alias for DECIMAL")
172
+
173
+ # Bit - Spark 4.0 changed BIT(n>1) mapping
174
+ add("mysql", "BIT", "BooleanType", "all", notes="BIT(1) as boolean")
175
+ add("mysql", "BIT(1)", "BooleanType", "all", notes="Single bit as boolean")
176
+ add("mysql", "BIT(n)", "LongType", "3.x", notes="Multi-bit as Long (Spark 3.x)")
177
+ add("mysql", "BIT(n)", "BinaryType", "4.x", notes="Multi-bit as Binary (Spark 4.x)")
178
+
179
+ # String types
180
+ add("mysql", "CHAR", "StringType", "all", notes="Fixed-length string")
181
+ add("mysql", "VARCHAR", "StringType", "all", notes="Variable-length string")
182
+ add("mysql", "TINYTEXT", "StringType", "all", notes="255 byte text")
183
+ add("mysql", "TEXT", "StringType", "all", notes="64KB text")
184
+ add("mysql", "MEDIUMTEXT", "StringType", "all", notes="16MB text")
185
+ add("mysql", "LONGTEXT", "StringType", "all", notes="4GB text")
186
+
187
+ # Binary types
188
+ add("mysql", "BINARY", "BinaryType", "all", notes="Fixed-length binary")
189
+ add("mysql", "VARBINARY", "BinaryType", "all", notes="Variable-length binary")
190
+ add("mysql", "TINYBLOB", "BinaryType", "all", notes="255 byte blob")
191
+ add("mysql", "BLOB", "BinaryType", "all", notes="64KB blob")
192
+ add("mysql", "MEDIUMBLOB", "BinaryType", "all", notes="16MB blob")
193
+ add("mysql", "LONGBLOB", "BinaryType", "all", notes="4GB blob")
194
+
195
+ # Date/Time
196
+ add("mysql", "DATE", "DateType", "all", notes="Calendar date")
197
+ add("mysql", "TIME", "StringType", "all", notes="Time of day")
198
+ add("mysql", "DATETIME", "TimestampType", "all", notes="Date and time")
199
+ add("mysql", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
200
+ add("mysql", "YEAR", "IntegerType", "all", notes="Year value")
201
+
202
+ # JSON
203
+ add("mysql", "JSON", "StringType", "3.x", notes="JSON document (Spark 3.x)")
204
+ add("mysql", "JSON", "VariantType", "4.x", notes="JSON document (Spark 4.x)")
205
+
206
+ # Enum and Set
207
+ add("mysql", "ENUM", "StringType", "all", notes="Enumeration")
208
+ add("mysql", "SET", "StringType", "all", notes="Set of values")
209
+
210
+ # Spatial types
211
+ add("mysql", "GEOMETRY", "BinaryType", "all", notes="Geometry type")
212
+ add("mysql", "POINT", "BinaryType", "all", notes="Point geometry")
213
+ add("mysql", "LINESTRING", "BinaryType", "all", notes="Line geometry")
214
+ add("mysql", "POLYGON", "BinaryType", "all", notes="Polygon geometry")
215
+ add("mysql", "GEOMETRYCOLLECTION", "BinaryType", "all", notes="Geometry collection")
216
+ add("mysql", "MULTIPOINT", "BinaryType", "all", notes="Multiple points")
217
+ add("mysql", "MULTILINESTRING", "BinaryType", "all", notes="Multiple lines")
218
+ add("mysql", "MULTIPOLYGON", "BinaryType", "all", notes="Multiple polygons")
219
+
220
+ # =============================================================================
221
+ # BIGQUERY
222
+ # =============================================================================
223
+ def add_bigquery_types():
224
+ # Numeric types
225
+ add("bigquery", "INT64", "LongType", "all", notes="64-bit signed integer")
226
+ add("bigquery", "INTEGER", "LongType", "all", notes="Alias for INT64")
227
+ add("bigquery", "INT", "LongType", "all", notes="Alias for INT64")
228
+ add("bigquery", "SMALLINT", "LongType", "all", notes="Alias for INT64")
229
+ add("bigquery", "BIGINT", "LongType", "all", notes="Alias for INT64")
230
+ add("bigquery", "TINYINT", "LongType", "all", notes="Alias for INT64")
231
+ add("bigquery", "BYTEINT", "LongType", "all", notes="Alias for INT64")
232
+
233
+ # Floating point
234
+ add("bigquery", "FLOAT64", "DoubleType", "all", notes="64-bit floating point")
235
+ add("bigquery", "FLOAT", "DoubleType", "all", notes="Alias for FLOAT64")
236
+
237
+ # Numeric/Decimal
238
+ add("bigquery", "NUMERIC", "DecimalType", "all", notes="38 digits precision, 9 scale")
239
+ add("bigquery", "DECIMAL", "DecimalType", "all", notes="Alias for NUMERIC")
240
+ add("bigquery", "BIGNUMERIC", "DecimalType", "all", notes="76 digits precision, 38 scale")
241
+ add("bigquery", "BIGDECIMAL", "DecimalType", "all", notes="Alias for BIGNUMERIC")
242
+
243
+ # Boolean
244
+ add("bigquery", "BOOL", "BooleanType", "all", notes="Boolean value")
245
+ add("bigquery", "BOOLEAN", "BooleanType", "all", notes="Alias for BOOL")
246
+
247
+ # String
248
+ add("bigquery", "STRING", "StringType", "all", notes="Variable-length Unicode string")
249
+
250
+ # Binary
251
+ add("bigquery", "BYTES", "BinaryType", "all", notes="Variable-length binary")
252
+
253
+ # Date/Time
254
+ add("bigquery", "DATE", "DateType", "all", notes="Calendar date")
255
+ add("bigquery", "TIME", "StringType", "all", notes="Time of day")
256
+ add("bigquery", "DATETIME", "TimestampType", "3.x", notes="Date and time (Spark 3.x)")
257
+ add("bigquery", "DATETIME", "TimestampNTZType", "4.x", notes="Date and time without TZ (Spark 4.x)")
258
+ add("bigquery", "TIMESTAMP", "TimestampType", "all", notes="Timestamp with microseconds")
259
+
260
+ # Interval
261
+ add("bigquery", "INTERVAL", "StringType", "all", notes="Duration of time")
262
+
263
+ # Complex types
264
+ add("bigquery", "ARRAY", "ArrayType", "all", True, notes="Ordered list")
265
+ add("bigquery", "STRUCT", "StructType", "all", True, notes="Ordered fields")
266
+ add("bigquery", "RECORD", "StructType", "all", True, notes="Alias for STRUCT")
267
+
268
+ # JSON
269
+ add("bigquery", "JSON", "StringType", "3.x", notes="JSON value (Spark 3.x)")
270
+ add("bigquery", "JSON", "VariantType", "4.x", notes="JSON value (Spark 4.x)")
271
+
272
+ # Geography
273
+ add("bigquery", "GEOGRAPHY", "StringType", "all", notes="Geographic data (WKT)")
274
+
275
+ # Range
276
+ add("bigquery", "RANGE", "StringType", "all", notes="Range of values")
277
+
278
+ # =============================================================================
279
+ # SNOWFLAKE
280
+ # =============================================================================
281
+ def add_snowflake_types():
282
+ # Numeric
283
+ add("snowflake", "NUMBER", "DecimalType", "all", notes="Numeric with precision/scale")
284
+ add("snowflake", "DECIMAL", "DecimalType", "all", notes="Alias for NUMBER")
285
+ add("snowflake", "NUMERIC", "DecimalType", "all", notes="Alias for NUMBER")
286
+ add("snowflake", "INT", "LongType", "all", notes="38-digit integer")
287
+ add("snowflake", "INTEGER", "LongType", "all", notes="Alias for INT")
288
+ add("snowflake", "BIGINT", "LongType", "all", notes="Alias for INT")
289
+ add("snowflake", "SMALLINT", "LongType", "all", notes="Alias for INT")
290
+ add("snowflake", "TINYINT", "LongType", "all", notes="Alias for INT")
291
+ add("snowflake", "BYTEINT", "LongType", "all", notes="Alias for INT")
292
+
293
+ # Floating point
294
+ add("snowflake", "FLOAT", "DoubleType", "all", notes="64-bit floating point")
295
+ add("snowflake", "FLOAT4", "DoubleType", "all", notes="Alias for FLOAT")
296
+ add("snowflake", "FLOAT8", "DoubleType", "all", notes="Alias for FLOAT")
297
+ add("snowflake", "DOUBLE", "DoubleType", "all", notes="Alias for FLOAT")
298
+ add("snowflake", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for FLOAT")
299
+ add("snowflake", "REAL", "DoubleType", "all", notes="Alias for FLOAT")
300
+
301
+ # String
302
+ add("snowflake", "VARCHAR", "StringType", "all", notes="Variable-length string (16MB)")
303
+ add("snowflake", "CHAR", "StringType", "all", notes="Alias for VARCHAR")
304
+ add("snowflake", "CHARACTER", "StringType", "all", notes="Alias for VARCHAR")
305
+ add("snowflake", "STRING", "StringType", "all", notes="Alias for VARCHAR")
306
+ add("snowflake", "TEXT", "StringType", "all", notes="Alias for VARCHAR")
307
+ add("snowflake", "NCHAR", "StringType", "all", notes="Unicode character")
308
+ add("snowflake", "NVARCHAR", "StringType", "all", notes="Unicode varchar")
309
+ add("snowflake", "NVARCHAR2", "StringType", "all", notes="Unicode varchar (Oracle compat)")
310
+
311
+ # Binary
312
+ add("snowflake", "BINARY", "BinaryType", "all", notes="Variable-length binary (8MB)")
313
+ add("snowflake", "VARBINARY", "BinaryType", "all", notes="Alias for BINARY")
314
+
315
+ # Boolean
316
+ add("snowflake", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
317
+
318
+ # Date/Time
319
+ add("snowflake", "DATE", "DateType", "all", notes="Calendar date")
320
+ add("snowflake", "TIME", "StringType", "all", notes="Time of day")
321
+ add("snowflake", "DATETIME", "TimestampType", "all", notes="Alias for TIMESTAMP")
322
+ add("snowflake", "TIMESTAMP", "TimestampType", "all", notes="Timestamp without timezone")
323
+ add("snowflake", "TIMESTAMP_LTZ", "TimestampType", "all", notes="Timestamp with local timezone")
324
+ add("snowflake", "TIMESTAMP_NTZ", "TimestampType", "3.x", notes="Timestamp no timezone (Spark 3.x)")
325
+ add("snowflake", "TIMESTAMP_NTZ", "TimestampNTZType", "4.x", notes="Timestamp no timezone (Spark 4.x)")
326
+ add("snowflake", "TIMESTAMP_TZ", "TimestampType", "all", notes="Timestamp with timezone")
327
+
328
+ # Semi-structured
329
+ add("snowflake", "VARIANT", "StringType", "3.x", notes="Semi-structured data (Spark 3.x)")
330
+ add("snowflake", "VARIANT", "VariantType", "4.x", notes="Semi-structured data (Spark 4.x)")
331
+ add("snowflake", "OBJECT", "MapType", "all", True, notes="Key-value pairs")
332
+ add("snowflake", "ARRAY", "ArrayType", "all", True, notes="Array of values")
333
+
334
+ # Geospatial
335
+ add("snowflake", "GEOGRAPHY", "StringType", "all", notes="Geographic data")
336
+ add("snowflake", "GEOMETRY", "StringType", "all", notes="Planar geometry")
337
+
338
+ # =============================================================================
339
+ # REDSHIFT
340
+ # =============================================================================
341
+ def add_redshift_types():
342
+ # Integer types
343
+ add("redshift", "SMALLINT", "ShortType", "all", notes="16-bit signed integer")
344
+ add("redshift", "INT2", "ShortType", "all", notes="Alias for SMALLINT")
345
+ add("redshift", "INTEGER", "IntegerType", "all", notes="32-bit signed integer")
346
+ add("redshift", "INT", "IntegerType", "all", notes="Alias for INTEGER")
347
+ add("redshift", "INT4", "IntegerType", "all", notes="Alias for INTEGER")
348
+ add("redshift", "BIGINT", "LongType", "all", notes="64-bit signed integer")
349
+ add("redshift", "INT8", "LongType", "all", notes="Alias for BIGINT")
350
+
351
+ # Decimal
352
+ add("redshift", "DECIMAL", "DecimalType", "all", notes="Exact numeric (38,37)")
353
+ add("redshift", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
354
+
355
+ # Floating point
356
+ add("redshift", "REAL", "FloatType", "all", notes="32-bit floating point")
357
+ add("redshift", "FLOAT4", "FloatType", "all", notes="Alias for REAL")
358
+ add("redshift", "DOUBLE PRECISION", "DoubleType", "all", notes="64-bit floating point")
359
+ add("redshift", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE PRECISION")
360
+ add("redshift", "FLOAT", "DoubleType", "all", notes="Alias for DOUBLE PRECISION")
361
+
362
+ # Boolean
363
+ add("redshift", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
364
+ add("redshift", "BOOL", "BooleanType", "all", notes="Alias for BOOLEAN")
365
+
366
+ # Character types
367
+ add("redshift", "CHAR", "StringType", "all", notes="Fixed-length string (4096)")
368
+ add("redshift", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
369
+ add("redshift", "NCHAR", "StringType", "all", notes="National character")
370
+ add("redshift", "BPCHAR", "StringType", "all", notes="Blank-padded char")
371
+ add("redshift", "VARCHAR", "StringType", "all", notes="Variable-length string (65535)")
372
+ add("redshift", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
373
+ add("redshift", "NVARCHAR", "StringType", "all", notes="National varchar")
374
+ add("redshift", "TEXT", "StringType", "all", notes="Alias for VARCHAR(256)")
375
+
376
+ # Binary
377
+ add("redshift", "VARBYTE", "BinaryType", "all", notes="Variable-length binary")
378
+ add("redshift", "VARBINARY", "BinaryType", "all", notes="Alias for VARBYTE")
379
+ add("redshift", "BINARY VARYING", "BinaryType", "all", notes="Alias for VARBYTE")
380
+
381
+ # Date/Time
382
+ add("redshift", "DATE", "DateType", "all", notes="Calendar date")
383
+ add("redshift", "TIME", "StringType", "all", notes="Time without timezone")
384
+ add("redshift", "TIMETZ", "StringType", "all", notes="Time with timezone")
385
+ add("redshift", "TIME WITHOUT TIME ZONE", "StringType", "all", notes="Time no TZ")
386
+ add("redshift", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with TZ")
387
+ add("redshift", "TIMESTAMP", "TimestampType", "all", notes="Timestamp without timezone")
388
+ add("redshift", "TIMESTAMPTZ", "TimestampType", "all", notes="Timestamp with timezone")
389
+ add("redshift", "TIMESTAMP WITHOUT TIME ZONE", "TimestampType", "3.x", notes="No TZ (Spark 3.x)")
390
+ add("redshift", "TIMESTAMP WITHOUT TIME ZONE", "TimestampNTZType", "4.x", notes="No TZ (Spark 4.x)")
391
+ add("redshift", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With timezone")
392
+
393
+ # Interval
394
+ add("redshift", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month interval")
395
+ add("redshift", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time interval")
396
+
397
+ # Semi-structured (SUPER type)
398
+ add("redshift", "SUPER", "StringType", "3.x", notes="Semi-structured (Spark 3.x)")
399
+ add("redshift", "SUPER", "VariantType", "4.x", notes="Semi-structured (Spark 4.x)")
400
+
401
+ # Geometry
402
+ add("redshift", "GEOMETRY", "BinaryType", "all", notes="Geometry data")
403
+ add("redshift", "GEOGRAPHY", "BinaryType", "all", notes="Geography data")
404
+
405
+ # HyperLogLog
406
+ add("redshift", "HLLSKETCH", "BinaryType", "all", notes="HyperLogLog sketch")
407
+
408
+ # =============================================================================
409
+ # DATABRICKS (Delta Lake)
410
+ # =============================================================================
411
+ def add_databricks_types():
412
+ # All native Spark types
413
+ add("databricks", "TINYINT", "ByteType", "all", notes="8-bit signed integer")
414
+ add("databricks", "BYTE", "ByteType", "all", notes="Alias for TINYINT")
415
+ add("databricks", "SMALLINT", "ShortType", "all", notes="16-bit signed integer")
416
+ add("databricks", "SHORT", "ShortType", "all", notes="Alias for SMALLINT")
417
+ add("databricks", "INT", "IntegerType", "all", notes="32-bit signed integer")
418
+ add("databricks", "INTEGER", "IntegerType", "all", notes="Alias for INT")
419
+ add("databricks", "BIGINT", "LongType", "all", notes="64-bit signed integer")
420
+ add("databricks", "LONG", "LongType", "all", notes="Alias for BIGINT")
421
+
422
+ # Floating point
423
+ add("databricks", "FLOAT", "FloatType", "all", notes="32-bit floating point")
424
+ add("databricks", "REAL", "FloatType", "all", notes="Alias for FLOAT")
425
+ add("databricks", "DOUBLE", "DoubleType", "all", notes="64-bit floating point")
426
+ add("databricks", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
427
+
428
+ # Decimal
429
+ add("databricks", "DECIMAL", "DecimalType", "all", notes="Arbitrary precision decimal")
430
+ add("databricks", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
431
+ add("databricks", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
432
+
433
+ # String
434
+ add("databricks", "STRING", "StringType", "all", notes="UTF-8 string")
435
+ add("databricks", "VARCHAR", "StringType", "all", notes="Alias for STRING")
436
+ add("databricks", "CHAR", "StringType", "all", notes="Alias for STRING")
437
+
438
+ # Binary
439
+ add("databricks", "BINARY", "BinaryType", "all", notes="Byte array")
440
+
441
+ # Boolean
442
+ add("databricks", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
443
+
444
+ # Date/Time
445
+ add("databricks", "DATE", "DateType", "all", notes="Calendar date")
446
+ add("databricks", "TIMESTAMP", "TimestampType", "all", notes="Timestamp with local TZ")
447
+ add("databricks", "TIMESTAMP_LTZ", "TimestampType", "all", notes="Timestamp local TZ")
448
+ add("databricks", "TIMESTAMP_NTZ", "TimestampType", "3.x", notes="No timezone (Spark 3.x)")
449
+ add("databricks", "TIMESTAMP_NTZ", "TimestampNTZType", "4.x", notes="No timezone (Spark 4.x)")
450
+
451
+ # Interval
452
+ add("databricks", "INTERVAL", "StringType", "all", notes="Time interval")
453
+ add("databricks", "INTERVAL YEAR", "YearMonthIntervalType", "all", notes="Year interval")
454
+ add("databricks", "INTERVAL MONTH", "YearMonthIntervalType", "all", notes="Month interval")
455
+ add("databricks", "INTERVAL DAY", "DayTimeIntervalType", "all", notes="Day interval")
456
+ add("databricks", "INTERVAL HOUR", "DayTimeIntervalType", "all", notes="Hour interval")
457
+ add("databricks", "INTERVAL MINUTE", "DayTimeIntervalType", "all", notes="Minute interval")
458
+ add("databricks", "INTERVAL SECOND", "DayTimeIntervalType", "all", notes="Second interval")
459
+
460
+ # Complex types
461
+ add("databricks", "ARRAY", "ArrayType", "all", True, notes="Array of elements")
462
+ add("databricks", "MAP", "MapType", "all", True, notes="Key-value map")
463
+ add("databricks", "STRUCT", "StructType", "all", True, notes="Structured record")
464
+
465
+ # Variant (Spark 4.0)
466
+ add("databricks", "VARIANT", "StringType", "3.x", notes="Semi-structured (Spark 3.x)")
467
+ add("databricks", "VARIANT", "VariantType", "4.x", notes="Semi-structured (Spark 4.x)")
468
+
469
+ # =============================================================================
470
+ # ORACLE
471
+ # =============================================================================
472
+ def add_oracle_types():
473
+ # Numeric
474
+ add("oracle", "NUMBER", "DecimalType", "all", notes="Numeric with precision/scale")
475
+ add("oracle", "FLOAT", "DoubleType", "all", notes="Floating point (126 binary)")
476
+ add("oracle", "BINARY_FLOAT", "FloatType", "all", notes="32-bit IEEE float")
477
+ add("oracle", "BINARY_DOUBLE", "DoubleType", "all", notes="64-bit IEEE double")
478
+
479
+ # Integer (Oracle doesn't have true integers, uses NUMBER)
480
+ add("oracle", "INTEGER", "DecimalType", "all", notes="NUMBER(38)")
481
+ add("oracle", "INT", "DecimalType", "all", notes="Alias for INTEGER")
482
+ add("oracle", "SMALLINT", "DecimalType", "all", notes="NUMBER(38)")
483
+
484
+ # Character
485
+ add("oracle", "CHAR", "StringType", "all", notes="Fixed-length character (2000)")
486
+ add("oracle", "NCHAR", "StringType", "all", notes="Fixed-length national char")
487
+ add("oracle", "VARCHAR2", "StringType", "all", notes="Variable-length string (4000)")
488
+ add("oracle", "NVARCHAR2", "StringType", "all", notes="Variable-length national")
489
+ add("oracle", "VARCHAR", "StringType", "all", notes="Alias for VARCHAR2")
490
+ add("oracle", "LONG", "StringType", "all", notes="Variable-length (deprecated)")
491
+ add("oracle", "CLOB", "StringType", "all", notes="Character large object")
492
+ add("oracle", "NCLOB", "StringType", "all", notes="National CLOB")
493
+
494
+ # Binary
495
+ add("oracle", "RAW", "BinaryType", "all", notes="Raw binary (2000)")
496
+ add("oracle", "LONG RAW", "BinaryType", "all", notes="Long raw (deprecated)")
497
+ add("oracle", "BLOB", "BinaryType", "all", notes="Binary large object")
498
+ add("oracle", "BFILE", "StringType", "all", notes="External file reference")
499
+
500
+ # Date/Time
501
+ add("oracle", "DATE", "TimestampType", "all", notes="Date with time component")
502
+ add("oracle", "TIMESTAMP", "TimestampType", "all", notes="Timestamp no timezone")
503
+ add("oracle", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With timezone")
504
+ add("oracle", "TIMESTAMP WITH LOCAL TIME ZONE", "TimestampType", "all", notes="Local TZ")
505
+
506
+ # Interval
507
+ add("oracle", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month interval")
508
+ add("oracle", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time interval")
509
+
510
+ # ROWID
511
+ add("oracle", "ROWID", "StringType", "all", notes="Row address")
512
+ add("oracle", "UROWID", "StringType", "all", notes="Universal ROWID")
513
+
514
+ # JSON (Oracle 21c+)
515
+ add("oracle", "JSON", "StringType", "3.x", notes="JSON data (Spark 3.x)")
516
+ add("oracle", "JSON", "VariantType", "4.x", notes="JSON data (Spark 4.x)")
517
+
518
+ # XMLType
519
+ add("oracle", "XMLTYPE", "StringType", "all", notes="XML data")
520
+ add("oracle", "SYS.XMLTYPE", "StringType", "all", notes="XML data (fully qualified)")
521
+
522
+ # SDO_GEOMETRY (spatial)
523
+ add("oracle", "SDO_GEOMETRY", "StringType", "all", notes="Spatial geometry")
524
+ add("oracle", "MDSYS.SDO_GEOMETRY", "StringType", "all", notes="Spatial (qualified)")
525
+
526
+ # =============================================================================
527
+ # SQL SERVER (and Azure Synapse, Azure SQL)
528
+ # =============================================================================
529
+ def add_sqlserver_types():
530
+ # Exact numerics
531
+ add("sqlserver", "BIT", "BooleanType", "all", notes="Boolean (0 or 1)")
532
+ add("sqlserver", "TINYINT", "ShortType", "all", notes="0 to 255")
533
+ add("sqlserver", "SMALLINT", "ShortType", "all", notes="16-bit signed")
534
+ add("sqlserver", "INT", "IntegerType", "all", notes="32-bit signed")
535
+ add("sqlserver", "INTEGER", "IntegerType", "all", notes="Alias for INT")
536
+ add("sqlserver", "BIGINT", "LongType", "all", notes="64-bit signed")
537
+ add("sqlserver", "DECIMAL", "DecimalType", "all", notes="Exact numeric (38,38)")
538
+ add("sqlserver", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
539
+ add("sqlserver", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
540
+ add("sqlserver", "MONEY", "DecimalType", "all", notes="Currency (-2^63 to 2^63)")
541
+ add("sqlserver", "SMALLMONEY", "DecimalType", "all", notes="Small currency")
542
+
543
+ # Approximate numerics
544
+ add("sqlserver", "FLOAT", "DoubleType", "all", notes="64-bit floating point")
545
+ add("sqlserver", "REAL", "FloatType", "all", notes="32-bit floating point")
546
+
547
+ # Character strings
548
+ add("sqlserver", "CHAR", "StringType", "all", notes="Fixed-length (8000)")
549
+ add("sqlserver", "VARCHAR", "StringType", "all", notes="Variable-length (8000)")
550
+ add("sqlserver", "VARCHAR(MAX)", "StringType", "all", notes="Variable-length (2GB)")
551
+ add("sqlserver", "TEXT", "StringType", "all", notes="Variable-length (deprecated)")
552
+
553
+ # Unicode character strings
554
+ add("sqlserver", "NCHAR", "StringType", "all", notes="Fixed-length Unicode (4000)")
555
+ add("sqlserver", "NVARCHAR", "StringType", "all", notes="Variable Unicode (4000)")
556
+ add("sqlserver", "NVARCHAR(MAX)", "StringType", "all", notes="Variable Unicode (2GB)")
557
+ add("sqlserver", "NTEXT", "StringType", "all", notes="Unicode text (deprecated)")
558
+
559
+ # Binary strings
560
+ add("sqlserver", "BINARY", "BinaryType", "all", notes="Fixed-length binary (8000)")
561
+ add("sqlserver", "VARBINARY", "BinaryType", "all", notes="Variable binary (8000)")
562
+ add("sqlserver", "VARBINARY(MAX)", "BinaryType", "all", notes="Variable binary (2GB)")
563
+ add("sqlserver", "IMAGE", "BinaryType", "all", notes="Binary (deprecated)")
564
+
565
+ # Date and time
566
+ add("sqlserver", "DATE", "DateType", "all", notes="Date only")
567
+ add("sqlserver", "TIME", "StringType", "all", notes="Time only")
568
+ add("sqlserver", "DATETIME", "TimestampType", "all", notes="Date and time")
569
+ add("sqlserver", "DATETIME2", "TimestampType", "all", notes="High precision datetime")
570
+ add("sqlserver", "SMALLDATETIME", "TimestampType", "all", notes="Low precision datetime")
571
+ add("sqlserver", "DATETIMEOFFSET", "TimestampType", "all", notes="Datetime with timezone")
572
+
573
+ # Other
574
+ add("sqlserver", "UNIQUEIDENTIFIER", "StringType", "all", notes="GUID/UUID")
575
+ add("sqlserver", "SQL_VARIANT", "StringType", "all", notes="Variant type")
576
+ add("sqlserver", "XML", "StringType", "all", notes="XML data")
577
+ add("sqlserver", "GEOGRAPHY", "BinaryType", "all", notes="Geographic data")
578
+ add("sqlserver", "GEOMETRY", "BinaryType", "all", notes="Geometric data")
579
+ add("sqlserver", "HIERARCHYID", "BinaryType", "all", notes="Hierarchy position")
580
+
581
+ # JSON (SQL Server 2016+, stored as NVARCHAR)
582
+ # Note: JSON is not a native type in SQL Server, but queries return it
583
+ add("sqlserver", "JSON", "StringType", "3.x", notes="JSON output (Spark 3.x)")
584
+ add("sqlserver", "JSON", "VariantType", "4.x", notes="JSON output (Spark 4.x)")
585
+
586
+ # =============================================================================
587
+ # CLICKHOUSE
588
+ # =============================================================================
589
+ def add_clickhouse_types():
590
+ # Integer types
591
+ add("clickhouse", "Int8", "ByteType", "all", notes="8-bit signed")
592
+ add("clickhouse", "Int16", "ShortType", "all", notes="16-bit signed")
593
+ add("clickhouse", "Int32", "IntegerType", "all", notes="32-bit signed")
594
+ add("clickhouse", "Int64", "LongType", "all", notes="64-bit signed")
595
+ add("clickhouse", "Int128", "DecimalType", "all", notes="128-bit signed")
596
+ add("clickhouse", "Int256", "DecimalType", "all", notes="256-bit signed")
597
+ add("clickhouse", "UInt8", "ShortType", "all", notes="8-bit unsigned")
598
+ add("clickhouse", "UInt16", "IntegerType", "all", notes="16-bit unsigned")
599
+ add("clickhouse", "UInt32", "LongType", "all", notes="32-bit unsigned")
600
+ add("clickhouse", "UInt64", "DecimalType", "all", notes="64-bit unsigned")
601
+ add("clickhouse", "UInt128", "DecimalType", "all", notes="128-bit unsigned")
602
+ add("clickhouse", "UInt256", "DecimalType", "all", notes="256-bit unsigned")
603
+
604
+ # Floating point
605
+ add("clickhouse", "Float32", "FloatType", "all", notes="32-bit IEEE float")
606
+ add("clickhouse", "Float64", "DoubleType", "all", notes="64-bit IEEE double")
607
+
608
+ # Decimal
609
+ add("clickhouse", "Decimal", "DecimalType", "all", notes="Fixed-point decimal")
610
+ add("clickhouse", "Decimal32", "DecimalType", "all", notes="Decimal(9, S)")
611
+ add("clickhouse", "Decimal64", "DecimalType", "all", notes="Decimal(18, S)")
612
+ add("clickhouse", "Decimal128", "DecimalType", "all", notes="Decimal(38, S)")
613
+ add("clickhouse", "Decimal256", "DecimalType", "all", notes="Decimal(76, S)")
614
+
615
+ # Boolean
616
+ add("clickhouse", "Bool", "BooleanType", "all", notes="Boolean")
617
+
618
+ # String
619
+ add("clickhouse", "String", "StringType", "all", notes="Arbitrary length")
620
+ add("clickhouse", "FixedString", "StringType", "all", notes="Fixed length")
621
+
622
+ # Date/Time
623
+ add("clickhouse", "Date", "DateType", "all", notes="Days since 1970")
624
+ add("clickhouse", "Date32", "DateType", "all", notes="Extended date range")
625
+ add("clickhouse", "DateTime", "TimestampType", "all", notes="Unix timestamp")
626
+ add("clickhouse", "DateTime64", "TimestampType", "all", notes="High precision")
627
+
628
+ # UUID
629
+ add("clickhouse", "UUID", "StringType", "all", notes="UUID value")
630
+
631
+ # Enum
632
+ add("clickhouse", "Enum8", "StringType", "all", notes="Enum with 8-bit index")
633
+ add("clickhouse", "Enum16", "StringType", "all", notes="Enum with 16-bit index")
634
+
635
+ # Array
636
+ add("clickhouse", "Array", "ArrayType", "all", True, notes="Array type")
637
+
638
+ # Tuple
639
+ add("clickhouse", "Tuple", "StructType", "all", True, notes="Named tuple")
640
+
641
+ # Map
642
+ add("clickhouse", "Map", "MapType", "all", True, notes="Key-value map")
643
+
644
+ # Nested
645
+ add("clickhouse", "Nested", "ArrayType", "all", True, notes="Nested structure")
646
+
647
+ # JSON
648
+ add("clickhouse", "JSON", "StringType", "3.x", notes="JSON object (Spark 3.x)")
649
+ add("clickhouse", "JSON", "VariantType", "4.x", notes="JSON object (Spark 4.x)")
650
+
651
+ # IP addresses
652
+ add("clickhouse", "IPv4", "StringType", "all", notes="IPv4 address")
653
+ add("clickhouse", "IPv6", "StringType", "all", notes="IPv6 address")
654
+
655
+ # Geo
656
+ add("clickhouse", "Point", "ArrayType", "all", True, notes="X,Y coordinates")
657
+ add("clickhouse", "Ring", "ArrayType", "all", True, notes="Polygon ring")
658
+ add("clickhouse", "Polygon", "ArrayType", "all", True, notes="Polygon")
659
+ add("clickhouse", "MultiPolygon", "ArrayType", "all", True, notes="Multi-polygon")
660
+
661
+ # Nullable wrapper (handled separately)
662
+ add("clickhouse", "Nullable", "NullType", "all", notes="Nullable wrapper")
663
+
664
+ # LowCardinality (handled separately)
665
+ add("clickhouse", "LowCardinality", "StringType", "all", notes="Dictionary encoded")
666
+
667
+ # =============================================================================
668
+ # TRINO / PRESTO (Athena, Starburst)
669
+ # =============================================================================
670
+ def add_trino_types():
671
+ # Boolean
672
+ add("trino", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
673
+
674
+ # Integer types
675
+ add("trino", "TINYINT", "ByteType", "all", notes="8-bit signed")
676
+ add("trino", "SMALLINT", "ShortType", "all", notes="16-bit signed")
677
+ add("trino", "INTEGER", "IntegerType", "all", notes="32-bit signed")
678
+ add("trino", "INT", "IntegerType", "all", notes="Alias for INTEGER")
679
+ add("trino", "BIGINT", "LongType", "all", notes="64-bit signed")
680
+
681
+ # Floating point
682
+ add("trino", "REAL", "FloatType", "all", notes="32-bit IEEE float")
683
+ add("trino", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE double")
684
+
685
+ # Decimal
686
+ add("trino", "DECIMAL", "DecimalType", "all", notes="Fixed-point decimal")
687
+
688
+ # String
689
+ add("trino", "VARCHAR", "StringType", "all", notes="Variable-length string")
690
+ add("trino", "CHAR", "StringType", "all", notes="Fixed-length string")
691
+
692
+ # Binary
693
+ add("trino", "VARBINARY", "BinaryType", "all", notes="Variable-length binary")
694
+
695
+ # Date/Time
696
+ add("trino", "DATE", "DateType", "all", notes="Calendar date")
697
+ add("trino", "TIME", "StringType", "all", notes="Time without timezone")
698
+ add("trino", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with TZ")
699
+ add("trino", "TIMESTAMP", "TimestampType", "3.x", notes="Timestamp no TZ (Spark 3.x)")
700
+ add("trino", "TIMESTAMP", "TimestampNTZType", "4.x", notes="Timestamp no TZ (Spark 4.x)")
701
+ add("trino", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With timezone")
702
+
703
+ # Interval
704
+ add("trino", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month interval")
705
+ add("trino", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time interval")
706
+
707
+ # Complex types
708
+ add("trino", "ARRAY", "ArrayType", "all", True, notes="Array of elements")
709
+ add("trino", "MAP", "MapType", "all", True, notes="Key-value map")
710
+ add("trino", "ROW", "StructType", "all", True, notes="Structured row")
711
+
712
+ # JSON
713
+ add("trino", "JSON", "StringType", "3.x", notes="JSON value (Spark 3.x)")
714
+ add("trino", "JSON", "VariantType", "4.x", notes="JSON value (Spark 4.x)")
715
+
716
+ # IP address
717
+ add("trino", "IPADDRESS", "StringType", "all", notes="IP address")
718
+
719
+ # UUID
720
+ add("trino", "UUID", "StringType", "all", notes="UUID value")
721
+
722
+ # HyperLogLog
723
+ add("trino", "HYPERLOGLOG", "BinaryType", "all", notes="HLL sketch")
724
+ add("trino", "P4HYPERLOGLOG", "BinaryType", "all", notes="P4 HLL sketch")
725
+
726
+ # Set Digest
727
+ add("trino", "SETDIGEST", "BinaryType", "all", notes="Set digest")
728
+
729
+ # QDigest
730
+ add("trino", "QDIGEST", "BinaryType", "all", notes="Quantile digest")
731
+ add("trino", "TDIGEST", "BinaryType", "all", notes="T-Digest")
732
+
733
+ # Geometry
734
+ add("trino", "GEOMETRY", "BinaryType", "all", notes="Geometry")
735
+ add("trino", "SPHERICALGEOGRAPHY", "BinaryType", "all", notes="Spherical geography")
736
+
737
+ # Add aliases for Athena (Presto-based)
738
+ def add_athena_types():
739
+ """Athena uses Presto/Trino types - copy from trino with athena adapter name."""
740
+ # Get all trino mappings and duplicate for athena
741
+ trino_mappings = [(m[0], m[1], m[2], m[3], m[4], m[5], m[6])
742
+ for m in MAPPINGS if m[0] == "trino"]
743
+ for m in trino_mappings:
744
+ add("athena", m[1], m[2], m[3], m[4], m[5], m[6])
745
+
746
+ # =============================================================================
747
+ # DUCKDB
748
+ # =============================================================================
749
+ def add_duckdb_types():
750
+ # Boolean
751
+ add("duckdb", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
752
+ add("duckdb", "BOOL", "BooleanType", "all", notes="Alias for BOOLEAN")
753
+
754
+ # Integer types
755
+ add("duckdb", "TINYINT", "ByteType", "all", notes="8-bit signed")
756
+ add("duckdb", "INT1", "ByteType", "all", notes="Alias for TINYINT")
757
+ add("duckdb", "SMALLINT", "ShortType", "all", notes="16-bit signed")
758
+ add("duckdb", "INT2", "ShortType", "all", notes="Alias for SMALLINT")
759
+ add("duckdb", "INTEGER", "IntegerType", "all", notes="32-bit signed")
760
+ add("duckdb", "INT", "IntegerType", "all", notes="Alias for INTEGER")
761
+ add("duckdb", "INT4", "IntegerType", "all", notes="Alias for INTEGER")
762
+ add("duckdb", "BIGINT", "LongType", "all", notes="64-bit signed")
763
+ add("duckdb", "INT8", "LongType", "all", notes="Alias for BIGINT")
764
+ add("duckdb", "HUGEINT", "DecimalType", "all", notes="128-bit signed")
765
+ add("duckdb", "UHUGEINT", "DecimalType", "all", notes="128-bit unsigned")
766
+ add("duckdb", "UTINYINT", "ShortType", "all", notes="8-bit unsigned")
767
+ add("duckdb", "USMALLINT", "IntegerType", "all", notes="16-bit unsigned")
768
+ add("duckdb", "UINTEGER", "LongType", "all", notes="32-bit unsigned")
769
+ add("duckdb", "UBIGINT", "DecimalType", "all", notes="64-bit unsigned")
770
+
771
+ # Floating point
772
+ add("duckdb", "REAL", "FloatType", "all", notes="32-bit IEEE float")
773
+ add("duckdb", "FLOAT4", "FloatType", "all", notes="Alias for REAL")
774
+ add("duckdb", "FLOAT", "FloatType", "all", notes="Alias for REAL")
775
+ add("duckdb", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE double")
776
+ add("duckdb", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE")
777
+
778
+ # Decimal
779
+ add("duckdb", "DECIMAL", "DecimalType", "all", notes="Fixed-point decimal")
780
+ add("duckdb", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
781
+
782
+ # String
783
+ add("duckdb", "VARCHAR", "StringType", "all", notes="Variable-length string")
784
+ add("duckdb", "CHAR", "StringType", "all", notes="Fixed-length string")
785
+ add("duckdb", "BPCHAR", "StringType", "all", notes="Blank-padded char")
786
+ add("duckdb", "TEXT", "StringType", "all", notes="Alias for VARCHAR")
787
+ add("duckdb", "STRING", "StringType", "all", notes="Alias for VARCHAR")
788
+
789
+ # Binary
790
+ add("duckdb", "BLOB", "BinaryType", "all", notes="Binary data")
791
+ add("duckdb", "BYTEA", "BinaryType", "all", notes="Alias for BLOB")
792
+ add("duckdb", "BINARY", "BinaryType", "all", notes="Alias for BLOB")
793
+ add("duckdb", "VARBINARY", "BinaryType", "all", notes="Alias for BLOB")
794
+
795
+ # Date/Time
796
+ add("duckdb", "DATE", "DateType", "all", notes="Calendar date")
797
+ add("duckdb", "TIME", "StringType", "all", notes="Time of day")
798
+ add("duckdb", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
799
+ add("duckdb", "TIMESTAMPTZ", "TimestampType", "all", notes="With timezone")
800
+ add("duckdb", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With TZ")
801
+
802
+ # Interval
803
+ add("duckdb", "INTERVAL", "StringType", "all", notes="Time interval")
804
+
805
+ # UUID
806
+ add("duckdb", "UUID", "StringType", "all", notes="UUID value")
807
+
808
+ # Complex types
809
+ add("duckdb", "LIST", "ArrayType", "all", True, notes="List/array type")
810
+ add("duckdb", "STRUCT", "StructType", "all", True, notes="Struct type")
811
+ add("duckdb", "MAP", "MapType", "all", True, notes="Map type")
812
+ add("duckdb", "UNION", "StructType", "all", True, notes="Union type")
813
+
814
+ # JSON (stored as structured)
815
+ add("duckdb", "JSON", "StringType", "3.x", notes="JSON (Spark 3.x)")
816
+ add("duckdb", "JSON", "VariantType", "4.x", notes="JSON (Spark 4.x)")
817
+
818
+ # Enum
819
+ add("duckdb", "ENUM", "StringType", "all", notes="Enumeration")
820
+
821
+ # Bit
822
+ add("duckdb", "BIT", "StringType", "all", notes="Bit string")
823
+ add("duckdb", "BITSTRING", "StringType", "all", notes="Alias for BIT")
824
+
825
+ # =============================================================================
826
+ # TERADATA
827
+ # =============================================================================
828
+ def add_teradata_types():
829
+ # Integer types
830
+ add("teradata", "BYTEINT", "ByteType", "all", notes="8-bit signed")
831
+ add("teradata", "SMALLINT", "ShortType", "all", notes="16-bit signed")
832
+ add("teradata", "INTEGER", "IntegerType", "all", notes="32-bit signed")
833
+ add("teradata", "INT", "IntegerType", "all", notes="Alias for INTEGER")
834
+ add("teradata", "BIGINT", "LongType", "all", notes="64-bit signed")
835
+
836
+ # Decimal
837
+ add("teradata", "DECIMAL", "DecimalType", "all", notes="Fixed-point (18,0)")
838
+ add("teradata", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
839
+ add("teradata", "NUMBER", "DecimalType", "all", notes="Variable precision")
840
+
841
+ # Floating point
842
+ add("teradata", "REAL", "FloatType", "all", notes="32-bit IEEE")
843
+ add("teradata", "FLOAT", "DoubleType", "all", notes="64-bit IEEE")
844
+ add("teradata", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for FLOAT")
845
+
846
+ # Character
847
+ add("teradata", "CHAR", "StringType", "all", notes="Fixed-length (64000)")
848
+ add("teradata", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
849
+ add("teradata", "VARCHAR", "StringType", "all", notes="Variable-length (64000)")
850
+ add("teradata", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
851
+ add("teradata", "LONG VARCHAR", "StringType", "all", notes="Extended varchar")
852
+ add("teradata", "CLOB", "StringType", "all", notes="Character LOB (2GB)")
853
+
854
+ # Binary
855
+ add("teradata", "BYTE", "BinaryType", "all", notes="Fixed-length binary")
856
+ add("teradata", "VARBYTE", "BinaryType", "all", notes="Variable binary")
857
+ add("teradata", "BLOB", "BinaryType", "all", notes="Binary LOB (2GB)")
858
+
859
+ # Date/Time
860
+ add("teradata", "DATE", "DateType", "all", notes="Calendar date")
861
+ add("teradata", "TIME", "StringType", "all", notes="Time of day")
862
+ add("teradata", "TIME WITH TIME ZONE", "StringType", "all", notes="Time with TZ")
863
+ add("teradata", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
864
+ add("teradata", "TIMESTAMP WITH TIME ZONE", "TimestampType", "all", notes="With TZ")
865
+
866
+ # Interval
867
+ add("teradata", "INTERVAL YEAR", "StringType", "all", notes="Year interval")
868
+ add("teradata", "INTERVAL MONTH", "StringType", "all", notes="Month interval")
869
+ add("teradata", "INTERVAL DAY", "StringType", "all", notes="Day interval")
870
+ add("teradata", "INTERVAL HOUR", "StringType", "all", notes="Hour interval")
871
+ add("teradata", "INTERVAL MINUTE", "StringType", "all", notes="Minute interval")
872
+ add("teradata", "INTERVAL SECOND", "StringType", "all", notes="Second interval")
873
+ add("teradata", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month")
874
+ add("teradata", "INTERVAL DAY TO HOUR", "StringType", "all", notes="Day-hour")
875
+ add("teradata", "INTERVAL DAY TO MINUTE", "StringType", "all", notes="Day-minute")
876
+ add("teradata", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-second")
877
+ add("teradata", "INTERVAL HOUR TO MINUTE", "StringType", "all", notes="Hour-minute")
878
+ add("teradata", "INTERVAL HOUR TO SECOND", "StringType", "all", notes="Hour-second")
879
+ add("teradata", "INTERVAL MINUTE TO SECOND", "StringType", "all", notes="Minute-second")
880
+
881
+ # Period
882
+ add("teradata", "PERIOD(DATE)", "StringType", "all", notes="Date period")
883
+ add("teradata", "PERIOD(TIME)", "StringType", "all", notes="Time period")
884
+ add("teradata", "PERIOD(TIMESTAMP)", "StringType", "all", notes="Timestamp period")
885
+
886
+ # JSON
887
+ add("teradata", "JSON", "StringType", "3.x", notes="JSON (Spark 3.x)")
888
+ add("teradata", "JSON", "VariantType", "4.x", notes="JSON (Spark 4.x)")
889
+
890
+ # XML
891
+ add("teradata", "XML", "StringType", "all", notes="XML document")
892
+
893
+ # Geospatial
894
+ add("teradata", "ST_GEOMETRY", "BinaryType", "all", notes="Geometry")
895
+ add("teradata", "MBR", "BinaryType", "all", notes="Minimum bounding rectangle")
896
+
897
+ # =============================================================================
898
+ # VERTICA
899
+ # =============================================================================
900
+ def add_vertica_types():
901
+ # Integer types
902
+ add("vertica", "INTEGER", "IntegerType", "all", notes="32-bit or 64-bit (precision)")
903
+ add("vertica", "INT", "IntegerType", "all", notes="Alias for INTEGER")
904
+ add("vertica", "BIGINT", "LongType", "all", notes="64-bit signed")
905
+ add("vertica", "INT8", "LongType", "all", notes="Alias for BIGINT")
906
+ add("vertica", "SMALLINT", "ShortType", "all", notes="16-bit signed")
907
+ add("vertica", "TINYINT", "ByteType", "all", notes="8-bit signed")
908
+
909
+ # Decimal
910
+ add("vertica", "NUMERIC", "DecimalType", "all", notes="Exact numeric")
911
+ add("vertica", "DECIMAL", "DecimalType", "all", notes="Alias for NUMERIC")
912
+ add("vertica", "NUMBER", "DecimalType", "all", notes="Alias for NUMERIC")
913
+ add("vertica", "MONEY", "DecimalType", "all", notes="Currency type")
914
+
915
+ # Floating point
916
+ add("vertica", "DOUBLE PRECISION", "DoubleType", "all", notes="64-bit float")
917
+ add("vertica", "FLOAT", "DoubleType", "all", notes="Alias for DOUBLE")
918
+ add("vertica", "FLOAT8", "DoubleType", "all", notes="Alias for DOUBLE")
919
+ add("vertica", "REAL", "DoubleType", "all", notes="Alias for DOUBLE")
920
+
921
+ # Boolean
922
+ add("vertica", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
923
+
924
+ # Character
925
+ add("vertica", "CHAR", "StringType", "all", notes="Fixed-length (65000)")
926
+ add("vertica", "VARCHAR", "StringType", "all", notes="Variable-length (65000)")
927
+ add("vertica", "LONG VARCHAR", "StringType", "all", notes="Extended varchar")
928
+
929
+ # Binary
930
+ add("vertica", "BINARY", "BinaryType", "all", notes="Fixed-length binary")
931
+ add("vertica", "VARBINARY", "BinaryType", "all", notes="Variable binary")
932
+ add("vertica", "LONG VARBINARY", "BinaryType", "all", notes="Extended binary")
933
+ add("vertica", "BYTEA", "BinaryType", "all", notes="Alias for VARBINARY")
934
+ add("vertica", "RAW", "BinaryType", "all", notes="Alias for VARBINARY")
935
+
936
+ # Date/Time
937
+ add("vertica", "DATE", "DateType", "all", notes="Calendar date")
938
+ add("vertica", "TIME", "StringType", "all", notes="Time of day")
939
+ add("vertica", "TIME WITH TIMEZONE", "StringType", "all", notes="Time with TZ")
940
+ add("vertica", "TIMETZ", "StringType", "all", notes="Alias for TIME WITH TZ")
941
+ add("vertica", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
942
+ add("vertica", "TIMESTAMP WITH TIMEZONE", "TimestampType", "all", notes="With TZ")
943
+ add("vertica", "TIMESTAMPTZ", "TimestampType", "all", notes="Alias for WITH TZ")
944
+ add("vertica", "DATETIME", "TimestampType", "all", notes="Alias for TIMESTAMP")
945
+ add("vertica", "SMALLDATETIME", "TimestampType", "all", notes="Minute precision")
946
+
947
+ # Interval
948
+ add("vertica", "INTERVAL", "StringType", "all", notes="Time interval")
949
+ add("vertica", "INTERVAL DAY TO SECOND", "StringType", "all", notes="Day-time")
950
+ add("vertica", "INTERVAL YEAR TO MONTH", "StringType", "all", notes="Year-month")
951
+
952
+ # UUID
953
+ add("vertica", "UUID", "StringType", "all", notes="UUID value")
954
+
955
+ # Complex types
956
+ add("vertica", "ARRAY", "ArrayType", "all", True, notes="Array type")
957
+ add("vertica", "SET", "ArrayType", "all", True, notes="Set type")
958
+ add("vertica", "ROW", "StructType", "all", True, notes="Row type")
959
+ add("vertica", "MAP", "MapType", "all", True, notes="Map type")
960
+
961
+ # Geospatial
962
+ add("vertica", "GEOMETRY", "BinaryType", "all", notes="Geometry")
963
+ add("vertica", "GEOGRAPHY", "BinaryType", "all", notes="Geography")
964
+
965
+ # =============================================================================
966
+ # HIVE
967
+ # =============================================================================
968
+ def add_hive_types():
969
+ # Numeric types
970
+ add("hive", "TINYINT", "ByteType", "all", notes="8-bit signed")
971
+ add("hive", "SMALLINT", "ShortType", "all", notes="16-bit signed")
972
+ add("hive", "INT", "IntegerType", "all", notes="32-bit signed")
973
+ add("hive", "INTEGER", "IntegerType", "all", notes="Alias for INT")
974
+ add("hive", "BIGINT", "LongType", "all", notes="64-bit signed")
975
+
976
+ # Floating point
977
+ add("hive", "FLOAT", "FloatType", "all", notes="32-bit IEEE")
978
+ add("hive", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE")
979
+ add("hive", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
980
+
981
+ # Decimal
982
+ add("hive", "DECIMAL", "DecimalType", "all", notes="Fixed-point decimal")
983
+ add("hive", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
984
+
985
+ # String
986
+ add("hive", "STRING", "StringType", "all", notes="Unbounded string")
987
+ add("hive", "VARCHAR", "StringType", "all", notes="Variable-length (65535)")
988
+ add("hive", "CHAR", "StringType", "all", notes="Fixed-length (255)")
989
+
990
+ # Binary
991
+ add("hive", "BINARY", "BinaryType", "all", notes="Binary data")
992
+
993
+ # Boolean
994
+ add("hive", "BOOLEAN", "BooleanType", "all", notes="Boolean value")
995
+
996
+ # Date/Time
997
+ add("hive", "DATE", "DateType", "all", notes="Calendar date")
998
+ add("hive", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
999
+ add("hive", "INTERVAL", "StringType", "all", notes="Time interval")
1000
+
1001
+ # Complex types
1002
+ add("hive", "ARRAY", "ArrayType", "all", True, notes="Array type")
1003
+ add("hive", "MAP", "MapType", "all", True, notes="Key-value map")
1004
+ add("hive", "STRUCT", "StructType", "all", True, notes="Struct type")
1005
+ add("hive", "UNIONTYPE", "StructType", "all", True, notes="Union type")
1006
+
1007
+ # =============================================================================
1008
+ # DB2
1009
+ # =============================================================================
1010
+ def add_db2_types():
1011
+ # Integer types
1012
+ add("db2", "SMALLINT", "ShortType", "all", notes="16-bit signed")
1013
+ add("db2", "INTEGER", "IntegerType", "all", notes="32-bit signed")
1014
+ add("db2", "INT", "IntegerType", "all", notes="Alias for INTEGER")
1015
+ add("db2", "BIGINT", "LongType", "all", notes="64-bit signed")
1016
+
1017
+ # Decimal
1018
+ add("db2", "DECIMAL", "DecimalType", "all", notes="Exact numeric (31,31)")
1019
+ add("db2", "DEC", "DecimalType", "all", notes="Alias for DECIMAL")
1020
+ add("db2", "NUMERIC", "DecimalType", "all", notes="Alias for DECIMAL")
1021
+ add("db2", "NUM", "DecimalType", "all", notes="Alias for DECIMAL")
1022
+
1023
+ # Floating point
1024
+ add("db2", "REAL", "FloatType", "all", notes="32-bit IEEE")
1025
+ add("db2", "DOUBLE", "DoubleType", "all", notes="64-bit IEEE")
1026
+ add("db2", "DOUBLE PRECISION", "DoubleType", "all", notes="Alias for DOUBLE")
1027
+ add("db2", "FLOAT", "DoubleType", "all", notes="Alias for DOUBLE")
1028
+ add("db2", "DECFLOAT", "DecimalType", "all", notes="Decimal floating point")
1029
+
1030
+ # Character
1031
+ add("db2", "CHAR", "StringType", "all", notes="Fixed-length (254)")
1032
+ add("db2", "CHARACTER", "StringType", "all", notes="Alias for CHAR")
1033
+ add("db2", "VARCHAR", "StringType", "all", notes="Variable-length (32672)")
1034
+ add("db2", "CHARACTER VARYING", "StringType", "all", notes="Alias for VARCHAR")
1035
+ add("db2", "LONG VARCHAR", "StringType", "all", notes="Long varchar (32700)")
1036
+ add("db2", "CLOB", "StringType", "all", notes="Character LOB (2GB)")
1037
+ add("db2", "DBCLOB", "StringType", "all", notes="Double-byte CLOB")
1038
+
1039
+ # Graphic (DBCS)
1040
+ add("db2", "GRAPHIC", "StringType", "all", notes="Fixed DBCS (127)")
1041
+ add("db2", "VARGRAPHIC", "StringType", "all", notes="Variable DBCS (16336)")
1042
+ add("db2", "LONG VARGRAPHIC", "StringType", "all", notes="Long DBCS (16350)")
1043
+
1044
+ # Binary
1045
+ add("db2", "BINARY", "BinaryType", "all", notes="Fixed-length (254)")
1046
+ add("db2", "VARBINARY", "BinaryType", "all", notes="Variable binary (32672)")
1047
+ add("db2", "BLOB", "BinaryType", "all", notes="Binary LOB (2GB)")
1048
+
1049
+ # Boolean - Spark 4.0 changed DB2 BOOLEAN mapping
1050
+ add("db2", "BOOLEAN", "StringType", "3.x", notes="Boolean as CHAR(1) (Spark 3.x)")
1051
+ add("db2", "BOOLEAN", "BooleanType", "4.x", notes="Boolean (Spark 4.x)")
1052
+
1053
+ # Date/Time
1054
+ add("db2", "DATE", "DateType", "all", notes="Calendar date")
1055
+ add("db2", "TIME", "StringType", "all", notes="Time of day")
1056
+ add("db2", "TIMESTAMP", "TimestampType", "all", notes="Timestamp")
1057
+
1058
+ # XML
1059
+ add("db2", "XML", "StringType", "all", notes="XML document")
1060
+
1061
+ # Row ID
1062
+ add("db2", "ROWID", "BinaryType", "all", notes="Row identifier")
1063
+
1064
+ # =============================================================================
1065
+ # SQLITE
1066
+ # =============================================================================
1067
+ def add_sqlite_types():
1068
+ # SQLite has dynamic typing with 5 storage classes
1069
+ # INTEGER
1070
+ add("sqlite", "INTEGER", "LongType", "all", notes="64-bit signed integer")
1071
+ add("sqlite", "INT", "LongType", "all", notes="Alias for INTEGER")
1072
+ add("sqlite", "TINYINT", "LongType", "all", notes="Stored as INTEGER")
1073
+ add("sqlite", "SMALLINT", "LongType", "all", notes="Stored as INTEGER")
1074
+ add("sqlite", "MEDIUMINT", "LongType", "all", notes="Stored as INTEGER")
1075
+ add("sqlite", "BIGINT", "LongType", "all", notes="Stored as INTEGER")
1076
+ add("sqlite", "UNSIGNED BIG INT", "LongType", "all", notes="Stored as INTEGER")
1077
+ add("sqlite", "INT2", "LongType", "all", notes="Stored as INTEGER")
1078
+ add("sqlite", "INT8", "LongType", "all", notes="Stored as INTEGER")
1079
+
1080
+ # REAL
1081
+ add("sqlite", "REAL", "DoubleType", "all", notes="64-bit IEEE float")
1082
+ add("sqlite", "DOUBLE", "DoubleType", "all", notes="Stored as REAL")
1083
+ add("sqlite", "DOUBLE PRECISION", "DoubleType", "all", notes="Stored as REAL")
1084
+ add("sqlite", "FLOAT", "DoubleType", "all", notes="Stored as REAL")
1085
+
1086
+ # TEXT
1087
+ add("sqlite", "TEXT", "StringType", "all", notes="Variable-length string")
1088
+ add("sqlite", "CHARACTER", "StringType", "all", notes="Stored as TEXT")
1089
+ add("sqlite", "VARCHAR", "StringType", "all", notes="Stored as TEXT")
1090
+ add("sqlite", "VARYING CHARACTER", "StringType", "all", notes="Stored as TEXT")
1091
+ add("sqlite", "NCHAR", "StringType", "all", notes="Stored as TEXT")
1092
+ add("sqlite", "NATIVE CHARACTER", "StringType", "all", notes="Stored as TEXT")
1093
+ add("sqlite", "NVARCHAR", "StringType", "all", notes="Stored as TEXT")
1094
+ add("sqlite", "CLOB", "StringType", "all", notes="Stored as TEXT")
1095
+
1096
+ # BLOB
1097
+ add("sqlite", "BLOB", "BinaryType", "all", notes="Binary data")
1098
+
1099
+ # NUMERIC (affinity)
1100
+ add("sqlite", "NUMERIC", "DecimalType", "all", notes="Numeric affinity")
1101
+ add("sqlite", "DECIMAL", "DecimalType", "all", notes="Stored as NUMERIC")
1102
+ add("sqlite", "BOOLEAN", "BooleanType", "all", notes="Stored as NUMERIC (0/1)")
1103
+ add("sqlite", "DATE", "DateType", "all", notes="Stored as TEXT/REAL/INT")
1104
+ add("sqlite", "DATETIME", "TimestampType", "all", notes="Stored as TEXT/REAL/INT")
1105
+
1106
+ # JSON (stored as TEXT in SQLite, but parsed)
1107
+ add("sqlite", "JSON", "StringType", "3.x", notes="JSON as TEXT (Spark 3.x)")
1108
+ add("sqlite", "JSON", "VariantType", "4.x", notes="JSON (Spark 4.x)")
1109
+
1110
+ # =============================================================================
1111
+ # SPARK (native types for completeness)
1112
+ # =============================================================================
1113
+ def add_spark_types():
1114
+ """Native Spark types for Spark-to-Spark operations."""
1115
+ add("spark", "ByteType", "ByteType", "all", notes="8-bit signed")
1116
+ add("spark", "ShortType", "ShortType", "all", notes="16-bit signed")
1117
+ add("spark", "IntegerType", "IntegerType", "all", notes="32-bit signed")
1118
+ add("spark", "LongType", "LongType", "all", notes="64-bit signed")
1119
+ add("spark", "FloatType", "FloatType", "all", notes="32-bit float")
1120
+ add("spark", "DoubleType", "DoubleType", "all", notes="64-bit float")
1121
+ add("spark", "DecimalType", "DecimalType", "all", notes="Arbitrary precision")
1122
+ add("spark", "StringType", "StringType", "all", notes="UTF-8 string")
1123
+ add("spark", "BinaryType", "BinaryType", "all", notes="Byte array")
1124
+ add("spark", "BooleanType", "BooleanType", "all", notes="Boolean")
1125
+ add("spark", "DateType", "DateType", "all", notes="Date")
1126
+ add("spark", "TimestampType", "TimestampType", "all", notes="Timestamp")
1127
+ add("spark", "TimestampNTZType", "TimestampNTZType", "4.x", notes="No TZ (Spark 3.4+)")
1128
+ add("spark", "ArrayType", "ArrayType", "all", True, notes="Array")
1129
+ add("spark", "MapType", "MapType", "all", True, notes="Map")
1130
+ add("spark", "StructType", "StructType", "all", True, notes="Struct")
1131
+ add("spark", "VariantType", "VariantType", "4.x", notes="Variant (Spark 4.0+)")
1132
+ add("spark", "YearMonthIntervalType", "YearMonthIntervalType", "all", notes="Year-month interval")
1133
+ add("spark", "DayTimeIntervalType", "DayTimeIntervalType", "all", notes="Day-time interval")
1134
+ add("spark", "NullType", "NullType", "all", notes="Null type")
1135
+ add("spark", "CalendarIntervalType", "CalendarIntervalType", "all", notes="Calendar interval")
1136
+
1137
+ # =============================================================================
1138
+ # MAIN BUILD FUNCTION
1139
+ # =============================================================================
1140
+ def build_registry():
1141
+ """Build all type mappings."""
1142
+ print("Building comprehensive datatype mappings...")
1143
+
1144
+ # Add all adapters
1145
+ add_postgres_types()
1146
+ print(f" + postgres: {len([m for m in MAPPINGS if m[0] == 'postgres'])} types")
1147
+
1148
+ add_mysql_types()
1149
+ print(f" + mysql: {len([m for m in MAPPINGS if m[0] == 'mysql'])} types")
1150
+
1151
+ add_bigquery_types()
1152
+ print(f" + bigquery: {len([m for m in MAPPINGS if m[0] == 'bigquery'])} types")
1153
+
1154
+ add_snowflake_types()
1155
+ print(f" + snowflake: {len([m for m in MAPPINGS if m[0] == 'snowflake'])} types")
1156
+
1157
+ add_redshift_types()
1158
+ print(f" + redshift: {len([m for m in MAPPINGS if m[0] == 'redshift'])} types")
1159
+
1160
+ add_databricks_types()
1161
+ print(f" + databricks: {len([m for m in MAPPINGS if m[0] == 'databricks'])} types")
1162
+
1163
+ add_oracle_types()
1164
+ print(f" + oracle: {len([m for m in MAPPINGS if m[0] == 'oracle'])} types")
1165
+
1166
+ add_sqlserver_types()
1167
+ print(f" + sqlserver: {len([m for m in MAPPINGS if m[0] == 'sqlserver'])} types")
1168
+
1169
+ add_clickhouse_types()
1170
+ print(f" + clickhouse: {len([m for m in MAPPINGS if m[0] == 'clickhouse'])} types")
1171
+
1172
+ add_trino_types()
1173
+ print(f" + trino: {len([m for m in MAPPINGS if m[0] == 'trino'])} types")
1174
+
1175
+ add_athena_types()
1176
+ print(f" + athena: {len([m for m in MAPPINGS if m[0] == 'athena'])} types")
1177
+
1178
+ add_duckdb_types()
1179
+ print(f" + duckdb: {len([m for m in MAPPINGS if m[0] == 'duckdb'])} types")
1180
+
1181
+ add_teradata_types()
1182
+ print(f" + teradata: {len([m for m in MAPPINGS if m[0] == 'teradata'])} types")
1183
+
1184
+ add_vertica_types()
1185
+ print(f" + vertica: {len([m for m in MAPPINGS if m[0] == 'vertica'])} types")
1186
+
1187
+ add_hive_types()
1188
+ print(f" + hive: {len([m for m in MAPPINGS if m[0] == 'hive'])} types")
1189
+
1190
+ add_db2_types()
1191
+ print(f" + db2: {len([m for m in MAPPINGS if m[0] == 'db2'])} types")
1192
+
1193
+ add_sqlite_types()
1194
+ print(f" + sqlite: {len([m for m in MAPPINGS if m[0] == 'sqlite'])} types")
1195
+
1196
+ add_spark_types()
1197
+ print(f" + spark: {len([m for m in MAPPINGS if m[0] == 'spark'])} types")
1198
+
1199
+ print(f"\nTotal: {len(MAPPINGS)} type mappings across {len(set(m[0] for m in MAPPINGS))} adapters")
1200
+
1201
+ return MAPPINGS
1202
+
1203
+
1204
+ def save_to_duckdb(db_path: str):
1205
+ """Save mappings to DuckDB."""
1206
+ mappings = build_registry()
1207
+
1208
+ conn = duckdb.connect(db_path)
1209
+
1210
+ # Drop and recreate table
1211
+ conn.execute("DROP TABLE IF EXISTS datatype_mappings")
1212
+ conn.execute("""
1213
+ CREATE TABLE datatype_mappings (
1214
+ adapter_name VARCHAR,
1215
+ adapter_type VARCHAR,
1216
+ spark_type VARCHAR,
1217
+ spark_version VARCHAR,
1218
+ is_complex BOOLEAN,
1219
+ cast_expression VARCHAR,
1220
+ notes VARCHAR
1221
+ )
1222
+ """)
1223
+
1224
+ # Insert all mappings
1225
+ conn.executemany(
1226
+ "INSERT INTO datatype_mappings VALUES (?, ?, ?, ?, ?, ?, ?)",
1227
+ mappings
1228
+ )
1229
+
1230
+ # Verify
1231
+ count = conn.execute("SELECT COUNT(*) FROM datatype_mappings").fetchone()[0]
1232
+ adapters = conn.execute("SELECT DISTINCT adapter_name FROM datatype_mappings ORDER BY adapter_name").fetchall()
1233
+
1234
+ print(f"\nSaved to {db_path}")
1235
+ print(f" - {count} total mappings")
1236
+ print(f" - {len(adapters)} adapters: {', '.join(a[0] for a in adapters)}")
1237
+
1238
+ # Version-specific stats
1239
+ v3_count = conn.execute("SELECT COUNT(*) FROM datatype_mappings WHERE spark_version = '3.x'").fetchone()[0]
1240
+ v4_count = conn.execute("SELECT COUNT(*) FROM datatype_mappings WHERE spark_version = '4.x'").fetchone()[0]
1241
+ all_count = conn.execute("SELECT COUNT(*) FROM datatype_mappings WHERE spark_version = 'all'").fetchone()[0]
1242
+ print(f" - Version-specific: {v3_count} for Spark 3.x, {v4_count} for Spark 4.x, {all_count} for all versions")
1243
+
1244
+ conn.close()
1245
+
1246
+
1247
+ if __name__ == "__main__":
1248
+ import sys
1249
+
1250
+ db_path = "adapters_registry.duckdb"
1251
+ if len(sys.argv) > 1:
1252
+ db_path = sys.argv[1]
1253
+
1254
+ save_to_duckdb(db_path)