dvt-core 0.59.0a51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2660 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +60 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.py +642 -0
  74. dbt/compute/federated_executor.py +1080 -0
  75. dbt/compute/filter_pushdown.py +273 -0
  76. dbt/compute/jar_provisioning.py +273 -0
  77. dbt/compute/java_compat.py +689 -0
  78. dbt/compute/jdbc_utils.py +1252 -0
  79. dbt/compute/metadata/__init__.py +63 -0
  80. dbt/compute/metadata/adapters_registry.py +370 -0
  81. dbt/compute/metadata/catalog_store.py +1036 -0
  82. dbt/compute/metadata/registry.py +674 -0
  83. dbt/compute/metadata/store.py +1020 -0
  84. dbt/compute/smart_selector.py +377 -0
  85. dbt/compute/spark_logger.py +272 -0
  86. dbt/compute/strategies/__init__.py +55 -0
  87. dbt/compute/strategies/base.py +165 -0
  88. dbt/compute/strategies/dataproc.py +207 -0
  89. dbt/compute/strategies/emr.py +203 -0
  90. dbt/compute/strategies/local.py +472 -0
  91. dbt/compute/strategies/standalone.py +262 -0
  92. dbt/config/__init__.py +4 -0
  93. dbt/config/catalogs.py +94 -0
  94. dbt/config/compute.py +513 -0
  95. dbt/config/dvt_profile.py +408 -0
  96. dbt/config/profile.py +422 -0
  97. dbt/config/project.py +888 -0
  98. dbt/config/project_utils.py +48 -0
  99. dbt/config/renderer.py +231 -0
  100. dbt/config/runtime.py +564 -0
  101. dbt/config/selectors.py +208 -0
  102. dbt/config/utils.py +77 -0
  103. dbt/constants.py +28 -0
  104. dbt/context/__init__.py +0 -0
  105. dbt/context/base.py +745 -0
  106. dbt/context/configured.py +135 -0
  107. dbt/context/context_config.py +382 -0
  108. dbt/context/docs.py +82 -0
  109. dbt/context/exceptions_jinja.py +178 -0
  110. dbt/context/macro_resolver.py +195 -0
  111. dbt/context/macros.py +171 -0
  112. dbt/context/manifest.py +72 -0
  113. dbt/context/providers.py +2249 -0
  114. dbt/context/query_header.py +13 -0
  115. dbt/context/secret.py +58 -0
  116. dbt/context/target.py +74 -0
  117. dbt/contracts/__init__.py +0 -0
  118. dbt/contracts/files.py +413 -0
  119. dbt/contracts/graph/__init__.py +0 -0
  120. dbt/contracts/graph/manifest.py +1904 -0
  121. dbt/contracts/graph/metrics.py +97 -0
  122. dbt/contracts/graph/model_config.py +70 -0
  123. dbt/contracts/graph/node_args.py +42 -0
  124. dbt/contracts/graph/nodes.py +1806 -0
  125. dbt/contracts/graph/semantic_manifest.py +232 -0
  126. dbt/contracts/graph/unparsed.py +811 -0
  127. dbt/contracts/project.py +419 -0
  128. dbt/contracts/results.py +53 -0
  129. dbt/contracts/selection.py +23 -0
  130. dbt/contracts/sql.py +85 -0
  131. dbt/contracts/state.py +68 -0
  132. dbt/contracts/util.py +46 -0
  133. dbt/deprecations.py +348 -0
  134. dbt/deps/__init__.py +0 -0
  135. dbt/deps/base.py +152 -0
  136. dbt/deps/git.py +195 -0
  137. dbt/deps/local.py +79 -0
  138. dbt/deps/registry.py +130 -0
  139. dbt/deps/resolver.py +149 -0
  140. dbt/deps/tarball.py +120 -0
  141. dbt/docs/source/_ext/dbt_click.py +119 -0
  142. dbt/docs/source/conf.py +32 -0
  143. dbt/env_vars.py +64 -0
  144. dbt/event_time/event_time.py +40 -0
  145. dbt/event_time/sample_window.py +60 -0
  146. dbt/events/__init__.py +15 -0
  147. dbt/events/base_types.py +36 -0
  148. dbt/events/core_types_pb2.py +2 -0
  149. dbt/events/logging.py +108 -0
  150. dbt/events/types.py +2516 -0
  151. dbt/exceptions.py +1486 -0
  152. dbt/flags.py +89 -0
  153. dbt/graph/__init__.py +11 -0
  154. dbt/graph/cli.py +249 -0
  155. dbt/graph/graph.py +172 -0
  156. dbt/graph/queue.py +214 -0
  157. dbt/graph/selector.py +374 -0
  158. dbt/graph/selector_methods.py +975 -0
  159. dbt/graph/selector_spec.py +222 -0
  160. dbt/graph/thread_pool.py +18 -0
  161. dbt/hooks.py +21 -0
  162. dbt/include/README.md +49 -0
  163. dbt/include/__init__.py +3 -0
  164. dbt/include/data/adapters_registry.duckdb +0 -0
  165. dbt/include/data/build_comprehensive_registry.py +1254 -0
  166. dbt/include/data/build_registry.py +242 -0
  167. dbt/include/data/csv/adapter_queries.csv +33 -0
  168. dbt/include/data/csv/syntax_rules.csv +9 -0
  169. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  170. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  171. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  172. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  173. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  174. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  175. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  176. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  177. dbt/include/dvt_starter_project/README.md +15 -0
  178. dbt/include/dvt_starter_project/__init__.py +3 -0
  179. dbt/include/dvt_starter_project/analyses/PLACEHOLDER +0 -0
  180. dbt/include/dvt_starter_project/dvt_project.yml +39 -0
  181. dbt/include/dvt_starter_project/logs/PLACEHOLDER +0 -0
  182. dbt/include/dvt_starter_project/macros/PLACEHOLDER +0 -0
  183. dbt/include/dvt_starter_project/models/example/my_first_dbt_model.sql +27 -0
  184. dbt/include/dvt_starter_project/models/example/my_second_dbt_model.sql +6 -0
  185. dbt/include/dvt_starter_project/models/example/schema.yml +21 -0
  186. dbt/include/dvt_starter_project/seeds/PLACEHOLDER +0 -0
  187. dbt/include/dvt_starter_project/snapshots/PLACEHOLDER +0 -0
  188. dbt/include/dvt_starter_project/tests/PLACEHOLDER +0 -0
  189. dbt/internal_deprecations.py +26 -0
  190. dbt/jsonschemas/__init__.py +3 -0
  191. dbt/jsonschemas/jsonschemas.py +309 -0
  192. dbt/jsonschemas/project/0.0.110.json +4717 -0
  193. dbt/jsonschemas/project/0.0.85.json +2015 -0
  194. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  195. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  196. dbt/jsonschemas/resources/latest.json +6773 -0
  197. dbt/links.py +4 -0
  198. dbt/materializations/__init__.py +0 -0
  199. dbt/materializations/incremental/__init__.py +0 -0
  200. dbt/materializations/incremental/microbatch.py +236 -0
  201. dbt/mp_context.py +8 -0
  202. dbt/node_types.py +37 -0
  203. dbt/parser/__init__.py +23 -0
  204. dbt/parser/analysis.py +21 -0
  205. dbt/parser/base.py +548 -0
  206. dbt/parser/common.py +266 -0
  207. dbt/parser/docs.py +52 -0
  208. dbt/parser/fixtures.py +51 -0
  209. dbt/parser/functions.py +30 -0
  210. dbt/parser/generic_test.py +100 -0
  211. dbt/parser/generic_test_builders.py +333 -0
  212. dbt/parser/hooks.py +122 -0
  213. dbt/parser/macros.py +137 -0
  214. dbt/parser/manifest.py +2208 -0
  215. dbt/parser/models.py +573 -0
  216. dbt/parser/partial.py +1178 -0
  217. dbt/parser/read_files.py +445 -0
  218. dbt/parser/schema_generic_tests.py +422 -0
  219. dbt/parser/schema_renderer.py +111 -0
  220. dbt/parser/schema_yaml_readers.py +935 -0
  221. dbt/parser/schemas.py +1466 -0
  222. dbt/parser/search.py +149 -0
  223. dbt/parser/seeds.py +28 -0
  224. dbt/parser/singular_test.py +20 -0
  225. dbt/parser/snapshots.py +44 -0
  226. dbt/parser/sources.py +558 -0
  227. dbt/parser/sql.py +62 -0
  228. dbt/parser/unit_tests.py +621 -0
  229. dbt/plugins/__init__.py +20 -0
  230. dbt/plugins/contracts.py +9 -0
  231. dbt/plugins/exceptions.py +2 -0
  232. dbt/plugins/manager.py +163 -0
  233. dbt/plugins/manifest.py +21 -0
  234. dbt/profiler.py +20 -0
  235. dbt/py.typed +1 -0
  236. dbt/query_analyzer.py +410 -0
  237. dbt/runners/__init__.py +2 -0
  238. dbt/runners/exposure_runner.py +7 -0
  239. dbt/runners/no_op_runner.py +45 -0
  240. dbt/runners/saved_query_runner.py +7 -0
  241. dbt/selected_resources.py +8 -0
  242. dbt/task/__init__.py +0 -0
  243. dbt/task/base.py +506 -0
  244. dbt/task/build.py +197 -0
  245. dbt/task/clean.py +56 -0
  246. dbt/task/clone.py +161 -0
  247. dbt/task/compile.py +150 -0
  248. dbt/task/compute.py +458 -0
  249. dbt/task/debug.py +513 -0
  250. dbt/task/deps.py +280 -0
  251. dbt/task/docs/__init__.py +3 -0
  252. dbt/task/docs/api/__init__.py +23 -0
  253. dbt/task/docs/api/catalog.py +204 -0
  254. dbt/task/docs/api/lineage.py +234 -0
  255. dbt/task/docs/api/profile.py +204 -0
  256. dbt/task/docs/api/spark.py +186 -0
  257. dbt/task/docs/generate.py +1002 -0
  258. dbt/task/docs/index.html +250 -0
  259. dbt/task/docs/serve.py +174 -0
  260. dbt/task/dvt_output.py +509 -0
  261. dbt/task/dvt_run.py +282 -0
  262. dbt/task/dvt_seed.py +806 -0
  263. dbt/task/freshness.py +322 -0
  264. dbt/task/function.py +121 -0
  265. dbt/task/group_lookup.py +46 -0
  266. dbt/task/init.py +1022 -0
  267. dbt/task/java.py +316 -0
  268. dbt/task/list.py +236 -0
  269. dbt/task/metadata.py +804 -0
  270. dbt/task/migrate.py +714 -0
  271. dbt/task/printer.py +175 -0
  272. dbt/task/profile.py +1489 -0
  273. dbt/task/profile_serve.py +662 -0
  274. dbt/task/retract.py +441 -0
  275. dbt/task/retry.py +175 -0
  276. dbt/task/run.py +1647 -0
  277. dbt/task/run_operation.py +141 -0
  278. dbt/task/runnable.py +758 -0
  279. dbt/task/seed.py +103 -0
  280. dbt/task/show.py +149 -0
  281. dbt/task/snapshot.py +56 -0
  282. dbt/task/spark.py +414 -0
  283. dbt/task/sql.py +110 -0
  284. dbt/task/target_sync.py +814 -0
  285. dbt/task/test.py +464 -0
  286. dbt/tests/fixtures/__init__.py +1 -0
  287. dbt/tests/fixtures/project.py +620 -0
  288. dbt/tests/util.py +651 -0
  289. dbt/tracking.py +529 -0
  290. dbt/utils/__init__.py +3 -0
  291. dbt/utils/artifact_upload.py +151 -0
  292. dbt/utils/utils.py +408 -0
  293. dbt/version.py +271 -0
  294. dvt_cli/__init__.py +158 -0
  295. dvt_core-0.59.0a51.dist-info/METADATA +288 -0
  296. dvt_core-0.59.0a51.dist-info/RECORD +299 -0
  297. dvt_core-0.59.0a51.dist-info/WHEEL +5 -0
  298. dvt_core-0.59.0a51.dist-info/entry_points.txt +2 -0
  299. dvt_core-0.59.0a51.dist-info/top_level.txt +2 -0
@@ -0,0 +1,674 @@
1
+ # =============================================================================
2
+ # DVT Global Registries
3
+ # =============================================================================
4
+ # Centralized lookup tables for type mappings and syntax rules.
5
+ # These are shipped with DVT and loaded into the project metadata store.
6
+ #
7
+ # DVT v0.54.0: Initial implementation
8
+ # =============================================================================
9
+
10
+ from typing import Dict, List, Optional, Any
11
+ from dataclasses import dataclass
12
+
13
+
14
+ @dataclass
15
+ class TypeMapping:
16
+ """A single type mapping entry."""
17
+ adapter_name: str
18
+ adapter_native_type: str
19
+ spark_version: str
20
+ spark_native_type: str
21
+ is_complex: bool = False
22
+ cast_expression: Optional[str] = None
23
+
24
+
25
+ @dataclass
26
+ class SyntaxRule:
27
+ """Syntax rules for a specific adapter."""
28
+ adapter_name: str
29
+ quote_start: str
30
+ quote_end: str
31
+ case_sensitivity: str # 'LOWER', 'UPPER', 'PRESERVE'
32
+ reserved_keywords: List[str]
33
+
34
+
35
+ class TypeRegistry:
36
+ """
37
+ Global type registry for mapping adapter types to Spark types.
38
+
39
+ This registry is shipped with DVT and provides the definitive mapping
40
+ between every supported adapter's native types and Spark's Catalyst types.
41
+ """
42
+
43
+ # ==========================================================================
44
+ # Type Mappings: adapter_name -> adapter_type -> spark_version -> spark_type
45
+ # ==========================================================================
46
+
47
+ TYPE_MAPPINGS: List[Dict[str, Any]] = [
48
+ # ======================================================================
49
+ # PostgreSQL
50
+ # ======================================================================
51
+ # String types
52
+ {"adapter_name": "postgres", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
53
+ {"adapter_name": "postgres", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
54
+ {"adapter_name": "postgres", "adapter_native_type": "CHARACTER VARYING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
55
+ {"adapter_name": "postgres", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
56
+ {"adapter_name": "postgres", "adapter_native_type": "CHARACTER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
57
+ {"adapter_name": "postgres", "adapter_native_type": "BPCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
58
+ {"adapter_name": "postgres", "adapter_native_type": "NAME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
59
+
60
+ # Integer types
61
+ {"adapter_name": "postgres", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
62
+ {"adapter_name": "postgres", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
63
+ {"adapter_name": "postgres", "adapter_native_type": "INT4", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
64
+ {"adapter_name": "postgres", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
65
+ {"adapter_name": "postgres", "adapter_native_type": "INT2", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
66
+ {"adapter_name": "postgres", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
67
+ {"adapter_name": "postgres", "adapter_native_type": "INT8", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
68
+ {"adapter_name": "postgres", "adapter_native_type": "SERIAL", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
69
+ {"adapter_name": "postgres", "adapter_native_type": "BIGSERIAL", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
70
+
71
+ # Floating point types
72
+ {"adapter_name": "postgres", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
73
+ {"adapter_name": "postgres", "adapter_native_type": "FLOAT4", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
74
+ {"adapter_name": "postgres", "adapter_native_type": "DOUBLE PRECISION", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
75
+ {"adapter_name": "postgres", "adapter_native_type": "FLOAT8", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
76
+ {"adapter_name": "postgres", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
77
+
78
+ # Numeric/Decimal types
79
+ {"adapter_name": "postgres", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
80
+ {"adapter_name": "postgres", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
81
+ {"adapter_name": "postgres", "adapter_native_type": "MONEY", "spark_version": "all", "spark_native_type": "DecimalType(19,2)", "is_complex": False},
82
+
83
+ # Boolean
84
+ {"adapter_name": "postgres", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
85
+ {"adapter_name": "postgres", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
86
+
87
+ # Date/Time types
88
+ {"adapter_name": "postgres", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
89
+ {"adapter_name": "postgres", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False}, # Spark has no TimeType
90
+ {"adapter_name": "postgres", "adapter_native_type": "TIMETZ", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
91
+ {"adapter_name": "postgres", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
92
+ {"adapter_name": "postgres", "adapter_native_type": "TIMESTAMPTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
93
+ {"adapter_name": "postgres", "adapter_native_type": "TIMESTAMP WITHOUT TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
94
+ {"adapter_name": "postgres", "adapter_native_type": "TIMESTAMP WITH TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
95
+ {"adapter_name": "postgres", "adapter_native_type": "INTERVAL", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
96
+
97
+ # Binary types
98
+ {"adapter_name": "postgres", "adapter_native_type": "BYTEA", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
99
+
100
+ # JSON types
101
+ {"adapter_name": "postgres", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "CAST({} AS STRING)"},
102
+ {"adapter_name": "postgres", "adapter_native_type": "JSONB", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "CAST({} AS STRING)"},
103
+
104
+ # UUID
105
+ {"adapter_name": "postgres", "adapter_native_type": "UUID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
106
+
107
+ # Array types (complex)
108
+ {"adapter_name": "postgres", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "CAST({} AS STRING)"},
109
+
110
+ # ======================================================================
111
+ # Snowflake
112
+ # ======================================================================
113
+ # String types
114
+ {"adapter_name": "snowflake", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
115
+ {"adapter_name": "snowflake", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
116
+ {"adapter_name": "snowflake", "adapter_native_type": "STRING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
117
+ {"adapter_name": "snowflake", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
118
+ {"adapter_name": "snowflake", "adapter_native_type": "CHARACTER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
119
+
120
+ # Numeric types
121
+ {"adapter_name": "snowflake", "adapter_native_type": "NUMBER", "spark_version": "all", "spark_native_type": "DecimalType(38,0)", "is_complex": False},
122
+ {"adapter_name": "snowflake", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
123
+ {"adapter_name": "snowflake", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
124
+ {"adapter_name": "snowflake", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
125
+ {"adapter_name": "snowflake", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
126
+ {"adapter_name": "snowflake", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
127
+ {"adapter_name": "snowflake", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
128
+ {"adapter_name": "snowflake", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
129
+ {"adapter_name": "snowflake", "adapter_native_type": "BYTEINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
130
+
131
+ # Floating point
132
+ {"adapter_name": "snowflake", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
133
+ {"adapter_name": "snowflake", "adapter_native_type": "FLOAT4", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
134
+ {"adapter_name": "snowflake", "adapter_native_type": "FLOAT8", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
135
+ {"adapter_name": "snowflake", "adapter_native_type": "DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
136
+ {"adapter_name": "snowflake", "adapter_native_type": "DOUBLE PRECISION", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
137
+ {"adapter_name": "snowflake", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
138
+
139
+ # Boolean
140
+ {"adapter_name": "snowflake", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
141
+
142
+ # Date/Time
143
+ {"adapter_name": "snowflake", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
144
+ {"adapter_name": "snowflake", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
145
+ {"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
146
+ {"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP_LTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
147
+ {"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP_NTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
148
+ {"adapter_name": "snowflake", "adapter_native_type": "TIMESTAMP_TZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
149
+ {"adapter_name": "snowflake", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
150
+
151
+ # Binary
152
+ {"adapter_name": "snowflake", "adapter_native_type": "BINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
153
+ {"adapter_name": "snowflake", "adapter_native_type": "VARBINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
154
+
155
+ # Semi-structured (complex)
156
+ {"adapter_name": "snowflake", "adapter_native_type": "VARIANT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "TO_VARCHAR({})"},
157
+ {"adapter_name": "snowflake", "adapter_native_type": "OBJECT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "TO_VARCHAR({})"},
158
+ {"adapter_name": "snowflake", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True, "cast_expression": "TO_VARCHAR({})"},
159
+
160
+ # ======================================================================
161
+ # Databricks / Delta Lake
162
+ # ======================================================================
163
+ {"adapter_name": "databricks", "adapter_native_type": "STRING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
164
+ {"adapter_name": "databricks", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
165
+ {"adapter_name": "databricks", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
166
+ {"adapter_name": "databricks", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
167
+ {"adapter_name": "databricks", "adapter_native_type": "LONG", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
168
+ {"adapter_name": "databricks", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
169
+ {"adapter_name": "databricks", "adapter_native_type": "SHORT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
170
+ {"adapter_name": "databricks", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
171
+ {"adapter_name": "databricks", "adapter_native_type": "BYTE", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
172
+ {"adapter_name": "databricks", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
173
+ {"adapter_name": "databricks", "adapter_native_type": "DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
174
+ {"adapter_name": "databricks", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
175
+ {"adapter_name": "databricks", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
176
+ {"adapter_name": "databricks", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
177
+ {"adapter_name": "databricks", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
178
+ {"adapter_name": "databricks", "adapter_native_type": "TIMESTAMP_NTZ", "spark_version": "all", "spark_native_type": "TimestampNTZType", "is_complex": False},
179
+ {"adapter_name": "databricks", "adapter_native_type": "BINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
180
+ {"adapter_name": "databricks", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "ArrayType", "is_complex": True},
181
+ {"adapter_name": "databricks", "adapter_native_type": "MAP", "spark_version": "all", "spark_native_type": "MapType", "is_complex": True},
182
+ {"adapter_name": "databricks", "adapter_native_type": "STRUCT", "spark_version": "all", "spark_native_type": "StructType", "is_complex": True},
183
+
184
+ # ======================================================================
185
+ # MySQL
186
+ # ======================================================================
187
+ {"adapter_name": "mysql", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
188
+ {"adapter_name": "mysql", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
189
+ {"adapter_name": "mysql", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
190
+ {"adapter_name": "mysql", "adapter_native_type": "TINYTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
191
+ {"adapter_name": "mysql", "adapter_native_type": "MEDIUMTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
192
+ {"adapter_name": "mysql", "adapter_native_type": "LONGTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
193
+ {"adapter_name": "mysql", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
194
+ {"adapter_name": "mysql", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
195
+ {"adapter_name": "mysql", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
196
+ {"adapter_name": "mysql", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
197
+ {"adapter_name": "mysql", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
198
+ {"adapter_name": "mysql", "adapter_native_type": "MEDIUMINT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
199
+ {"adapter_name": "mysql", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
200
+ {"adapter_name": "mysql", "adapter_native_type": "DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
201
+ {"adapter_name": "mysql", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
202
+ {"adapter_name": "mysql", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
203
+ {"adapter_name": "mysql", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
204
+ {"adapter_name": "mysql", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
205
+ {"adapter_name": "mysql", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
206
+ {"adapter_name": "mysql", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
207
+ {"adapter_name": "mysql", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
208
+ {"adapter_name": "mysql", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
209
+ {"adapter_name": "mysql", "adapter_native_type": "YEAR", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
210
+ {"adapter_name": "mysql", "adapter_native_type": "BLOB", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
211
+ {"adapter_name": "mysql", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
212
+
213
+ # ======================================================================
214
+ # BigQuery
215
+ # ======================================================================
216
+ {"adapter_name": "bigquery", "adapter_native_type": "STRING", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
217
+ {"adapter_name": "bigquery", "adapter_native_type": "INT64", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
218
+ {"adapter_name": "bigquery", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
219
+ {"adapter_name": "bigquery", "adapter_native_type": "FLOAT64", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
220
+ {"adapter_name": "bigquery", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
221
+ {"adapter_name": "bigquery", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,9)", "is_complex": False},
222
+ {"adapter_name": "bigquery", "adapter_native_type": "BIGNUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(76,38)", "is_complex": False},
223
+ {"adapter_name": "bigquery", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
224
+ {"adapter_name": "bigquery", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
225
+ {"adapter_name": "bigquery", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
226
+ {"adapter_name": "bigquery", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
227
+ {"adapter_name": "bigquery", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
228
+ {"adapter_name": "bigquery", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
229
+ {"adapter_name": "bigquery", "adapter_native_type": "BYTES", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
230
+ {"adapter_name": "bigquery", "adapter_native_type": "GEOGRAPHY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
231
+ {"adapter_name": "bigquery", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
232
+ {"adapter_name": "bigquery", "adapter_native_type": "ARRAY", "spark_version": "all", "spark_native_type": "ArrayType", "is_complex": True},
233
+ {"adapter_name": "bigquery", "adapter_native_type": "STRUCT", "spark_version": "all", "spark_native_type": "StructType", "is_complex": True},
234
+ {"adapter_name": "bigquery", "adapter_native_type": "RECORD", "spark_version": "all", "spark_native_type": "StructType", "is_complex": True},
235
+
236
+ # ======================================================================
237
+ # Redshift
238
+ # ======================================================================
239
+ {"adapter_name": "redshift", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
240
+ {"adapter_name": "redshift", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
241
+ {"adapter_name": "redshift", "adapter_native_type": "BPCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
242
+ {"adapter_name": "redshift", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
243
+ {"adapter_name": "redshift", "adapter_native_type": "INTEGER", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
244
+ {"adapter_name": "redshift", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
245
+ {"adapter_name": "redshift", "adapter_native_type": "INT4", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
246
+ {"adapter_name": "redshift", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
247
+ {"adapter_name": "redshift", "adapter_native_type": "INT8", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
248
+ {"adapter_name": "redshift", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
249
+ {"adapter_name": "redshift", "adapter_native_type": "INT2", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
250
+ {"adapter_name": "redshift", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
251
+ {"adapter_name": "redshift", "adapter_native_type": "FLOAT4", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
252
+ {"adapter_name": "redshift", "adapter_native_type": "DOUBLE PRECISION", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
253
+ {"adapter_name": "redshift", "adapter_native_type": "FLOAT8", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
254
+ {"adapter_name": "redshift", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
255
+ {"adapter_name": "redshift", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
256
+ {"adapter_name": "redshift", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
257
+ {"adapter_name": "redshift", "adapter_native_type": "BOOLEAN", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
258
+ {"adapter_name": "redshift", "adapter_native_type": "BOOL", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
259
+ {"adapter_name": "redshift", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
260
+ {"adapter_name": "redshift", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
261
+ {"adapter_name": "redshift", "adapter_native_type": "TIMESTAMPTZ", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
262
+ {"adapter_name": "redshift", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
263
+ {"adapter_name": "redshift", "adapter_native_type": "TIMETZ", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
264
+ {"adapter_name": "redshift", "adapter_native_type": "SUPER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
265
+ {"adapter_name": "redshift", "adapter_native_type": "GEOMETRY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
266
+ {"adapter_name": "redshift", "adapter_native_type": "GEOGRAPHY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
267
+ {"adapter_name": "redshift", "adapter_native_type": "HLLSKETCH", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": True},
268
+
269
+ # ======================================================================
270
+ # Oracle
271
+ # ======================================================================
272
+ {"adapter_name": "oracle", "adapter_native_type": "VARCHAR2", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
273
+ {"adapter_name": "oracle", "adapter_native_type": "NVARCHAR2", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
274
+ {"adapter_name": "oracle", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
275
+ {"adapter_name": "oracle", "adapter_native_type": "NCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
276
+ {"adapter_name": "oracle", "adapter_native_type": "CLOB", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
277
+ {"adapter_name": "oracle", "adapter_native_type": "NCLOB", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
278
+ {"adapter_name": "oracle", "adapter_native_type": "NUMBER", "spark_version": "all", "spark_native_type": "DecimalType(38,10)", "is_complex": False},
279
+ {"adapter_name": "oracle", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
280
+ {"adapter_name": "oracle", "adapter_native_type": "BINARY_FLOAT", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
281
+ {"adapter_name": "oracle", "adapter_native_type": "BINARY_DOUBLE", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
282
+ {"adapter_name": "oracle", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False}, # Oracle DATE has time component
283
+ {"adapter_name": "oracle", "adapter_native_type": "TIMESTAMP", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
284
+ {"adapter_name": "oracle", "adapter_native_type": "TIMESTAMP WITH TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
285
+ {"adapter_name": "oracle", "adapter_native_type": "TIMESTAMP WITH LOCAL TIME ZONE", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
286
+ {"adapter_name": "oracle", "adapter_native_type": "INTERVAL YEAR TO MONTH", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
287
+ {"adapter_name": "oracle", "adapter_native_type": "INTERVAL DAY TO SECOND", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
288
+ {"adapter_name": "oracle", "adapter_native_type": "RAW", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
289
+ {"adapter_name": "oracle", "adapter_native_type": "BLOB", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
290
+ {"adapter_name": "oracle", "adapter_native_type": "BFILE", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": True},
291
+ {"adapter_name": "oracle", "adapter_native_type": "ROWID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
292
+ {"adapter_name": "oracle", "adapter_native_type": "UROWID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
293
+ {"adapter_name": "oracle", "adapter_native_type": "JSON", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
294
+ {"adapter_name": "oracle", "adapter_native_type": "XMLTYPE", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
295
+
296
+ # ======================================================================
297
+ # SQL Server
298
+ # ======================================================================
299
+ {"adapter_name": "sqlserver", "adapter_native_type": "VARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
300
+ {"adapter_name": "sqlserver", "adapter_native_type": "NVARCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
301
+ {"adapter_name": "sqlserver", "adapter_native_type": "CHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
302
+ {"adapter_name": "sqlserver", "adapter_native_type": "NCHAR", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
303
+ {"adapter_name": "sqlserver", "adapter_native_type": "TEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
304
+ {"adapter_name": "sqlserver", "adapter_native_type": "NTEXT", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
305
+ {"adapter_name": "sqlserver", "adapter_native_type": "INT", "spark_version": "all", "spark_native_type": "IntegerType", "is_complex": False},
306
+ {"adapter_name": "sqlserver", "adapter_native_type": "BIGINT", "spark_version": "all", "spark_native_type": "LongType", "is_complex": False},
307
+ {"adapter_name": "sqlserver", "adapter_native_type": "SMALLINT", "spark_version": "all", "spark_native_type": "ShortType", "is_complex": False},
308
+ {"adapter_name": "sqlserver", "adapter_native_type": "TINYINT", "spark_version": "all", "spark_native_type": "ByteType", "is_complex": False},
309
+ {"adapter_name": "sqlserver", "adapter_native_type": "FLOAT", "spark_version": "all", "spark_native_type": "DoubleType", "is_complex": False},
310
+ {"adapter_name": "sqlserver", "adapter_native_type": "REAL", "spark_version": "all", "spark_native_type": "FloatType", "is_complex": False},
311
+ {"adapter_name": "sqlserver", "adapter_native_type": "DECIMAL", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
312
+ {"adapter_name": "sqlserver", "adapter_native_type": "NUMERIC", "spark_version": "all", "spark_native_type": "DecimalType(38,18)", "is_complex": False},
313
+ {"adapter_name": "sqlserver", "adapter_native_type": "MONEY", "spark_version": "all", "spark_native_type": "DecimalType(19,4)", "is_complex": False},
314
+ {"adapter_name": "sqlserver", "adapter_native_type": "SMALLMONEY", "spark_version": "all", "spark_native_type": "DecimalType(10,4)", "is_complex": False},
315
+ {"adapter_name": "sqlserver", "adapter_native_type": "BIT", "spark_version": "all", "spark_native_type": "BooleanType", "is_complex": False},
316
+ {"adapter_name": "sqlserver", "adapter_native_type": "DATE", "spark_version": "all", "spark_native_type": "DateType", "is_complex": False},
317
+ {"adapter_name": "sqlserver", "adapter_native_type": "DATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
318
+ {"adapter_name": "sqlserver", "adapter_native_type": "DATETIME2", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
319
+ {"adapter_name": "sqlserver", "adapter_native_type": "SMALLDATETIME", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
320
+ {"adapter_name": "sqlserver", "adapter_native_type": "DATETIMEOFFSET", "spark_version": "all", "spark_native_type": "TimestampType", "is_complex": False},
321
+ {"adapter_name": "sqlserver", "adapter_native_type": "TIME", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
322
+ {"adapter_name": "sqlserver", "adapter_native_type": "BINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
323
+ {"adapter_name": "sqlserver", "adapter_native_type": "VARBINARY", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
324
+ {"adapter_name": "sqlserver", "adapter_native_type": "IMAGE", "spark_version": "all", "spark_native_type": "BinaryType", "is_complex": False},
325
+ {"adapter_name": "sqlserver", "adapter_native_type": "UNIQUEIDENTIFIER", "spark_version": "all", "spark_native_type": "StringType", "is_complex": False},
326
+ {"adapter_name": "sqlserver", "adapter_native_type": "XML", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
327
+ {"adapter_name": "sqlserver", "adapter_native_type": "GEOGRAPHY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
328
+ {"adapter_name": "sqlserver", "adapter_native_type": "GEOMETRY", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
329
+ {"adapter_name": "sqlserver", "adapter_native_type": "HIERARCHYID", "spark_version": "all", "spark_native_type": "StringType", "is_complex": True},
330
+ ]
331
+
332
+ @classmethod
333
+ def get_spark_type(
334
+ cls,
335
+ adapter_name: str,
336
+ adapter_type: str,
337
+ spark_version: str = "4.0"
338
+ ) -> Optional[Dict[str, Any]]:
339
+ """
340
+ Look up the Spark type for a given adapter type.
341
+
342
+ :param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
343
+ :param adapter_type: Adapter's native type (e.g., 'INTEGER', 'VARCHAR')
344
+ :param spark_version: Target Spark version (default '4.0')
345
+ :returns: Dict with spark_native_type, is_complex, cast_expression or None
346
+ """
347
+ # Normalize inputs
348
+ adapter_name = adapter_name.lower()
349
+ adapter_type = adapter_type.upper().strip()
350
+
351
+ # Remove size specifiers: VARCHAR(255) -> VARCHAR
352
+ import re
353
+ adapter_type_normalized = re.sub(r'\([^)]*\)', '', adapter_type).strip()
354
+
355
+ for mapping in cls.TYPE_MAPPINGS:
356
+ if (mapping["adapter_name"] == adapter_name and
357
+ mapping["adapter_native_type"] == adapter_type_normalized):
358
+ # Check spark version match
359
+ if mapping["spark_version"] == "all" or mapping["spark_version"] == spark_version:
360
+ return {
361
+ "spark_native_type": mapping["spark_native_type"],
362
+ "is_complex": mapping.get("is_complex", False),
363
+ "cast_expression": mapping.get("cast_expression"),
364
+ }
365
+
366
+ return None
367
+
368
+ @classmethod
369
+ def get_all_mappings_for_adapter(cls, adapter_name: str) -> List[Dict[str, Any]]:
370
+ """Get all type mappings for a specific adapter."""
371
+ adapter_name = adapter_name.lower()
372
+ return [m for m in cls.TYPE_MAPPINGS if m["adapter_name"] == adapter_name]
373
+
374
+
375
+ class SyntaxRegistry:
376
+ """
377
+ Syntax rules for each adapter.
378
+
379
+ Defines quoting characters, case sensitivity, and reserved keywords
380
+ to ensure correct SQL generation across different dialects.
381
+ """
382
+
383
+ SYNTAX_RULES: Dict[str, Dict[str, Any]] = {
384
+ "postgres": {
385
+ "quote_start": '"',
386
+ "quote_end": '"',
387
+ "case_sensitivity": "LOWER", # Postgres folds to lowercase
388
+ "reserved_keywords": [
389
+ "ALL", "ANALYSE", "ANALYZE", "AND", "ANY", "ARRAY", "AS", "ASC",
390
+ "ASYMMETRIC", "BOTH", "CASE", "CAST", "CHECK", "COLLATE", "COLUMN",
391
+ "CONSTRAINT", "CREATE", "CURRENT_CATALOG", "CURRENT_DATE",
392
+ "CURRENT_ROLE", "CURRENT_TIME", "CURRENT_TIMESTAMP", "CURRENT_USER",
393
+ "DEFAULT", "DEFERRABLE", "DESC", "DISTINCT", "DO", "ELSE", "END",
394
+ "EXCEPT", "FALSE", "FETCH", "FOR", "FOREIGN", "FROM", "GRANT",
395
+ "GROUP", "HAVING", "IN", "INITIALLY", "INTERSECT", "INTO", "LATERAL",
396
+ "LEADING", "LIMIT", "LOCALTIME", "LOCALTIMESTAMP", "NOT", "NULL",
397
+ "OFFSET", "ON", "ONLY", "OR", "ORDER", "PLACING", "PRIMARY",
398
+ "REFERENCES", "RETURNING", "SELECT", "SESSION_USER", "SOME",
399
+ "SYMMETRIC", "TABLE", "THEN", "TO", "TRAILING", "TRUE", "UNION",
400
+ "UNIQUE", "USER", "USING", "VARIADIC", "WHEN", "WHERE", "WINDOW",
401
+ "WITH"
402
+ ],
403
+ },
404
+ "snowflake": {
405
+ "quote_start": '"',
406
+ "quote_end": '"',
407
+ "case_sensitivity": "UPPER", # Snowflake folds to uppercase
408
+ "reserved_keywords": [
409
+ "ACCOUNT", "ALL", "ALTER", "AND", "ANY", "AS", "BETWEEN", "BY",
410
+ "CASE", "CAST", "CHECK", "CLUSTER", "COLUMN", "CONNECT", "CONNECTION",
411
+ "CONSTRAINT", "CREATE", "CROSS", "CURRENT", "CURRENT_DATE",
412
+ "CURRENT_TIME", "CURRENT_TIMESTAMP", "CURRENT_USER", "DATABASE",
413
+ "DELETE", "DISTINCT", "DROP", "ELSE", "EXISTS", "FALSE", "FOLLOWING",
414
+ "FOR", "FROM", "FULL", "GRANT", "GROUP", "GSCLUSTER", "HAVING",
415
+ "ILIKE", "IN", "INCREMENT", "INNER", "INSERT", "INTERSECT", "INTO",
416
+ "IS", "ISSUE", "JOIN", "LATERAL", "LEFT", "LIKE", "LOCALTIME",
417
+ "LOCALTIMESTAMP", "MINUS", "NATURAL", "NOT", "NULL", "OF", "ON",
418
+ "OR", "ORDER", "ORGANIZATION", "QUALIFY", "REGEXP", "REVOKE",
419
+ "RIGHT", "RLIKE", "ROW", "ROWS", "SAMPLE", "SCHEMA", "SELECT",
420
+ "SET", "SOME", "START", "TABLE", "TABLESAMPLE", "THEN", "TO",
421
+ "TRIGGER", "TRUE", "TRY_CAST", "UNION", "UNIQUE", "UPDATE",
422
+ "USING", "VALUES", "VIEW", "WHEN", "WHENEVER", "WHERE", "WITH"
423
+ ],
424
+ },
425
+ "databricks": {
426
+ "quote_start": '`',
427
+ "quote_end": '`',
428
+ "case_sensitivity": "PRESERVE", # Databricks preserves case
429
+ "reserved_keywords": [
430
+ "ALL", "ALTER", "AND", "ANTI", "ANY", "ARCHIVE", "ARRAY", "AS",
431
+ "ASC", "AT", "AUTHORIZATION", "BETWEEN", "BOTH", "BUCKET", "BUCKETS",
432
+ "BY", "CACHE", "CASCADE", "CASE", "CAST", "CHANGE", "CHECK", "CLEAR",
433
+ "CLUSTER", "CLUSTERED", "CODEGEN", "COLLATE", "COLLECTION", "COLUMN",
434
+ "COLUMNS", "COMMENT", "COMMIT", "COMPACT", "COMPACTIONS", "COMPUTE",
435
+ "CONCATENATE", "CONSTRAINT", "COST", "CREATE", "CROSS", "CUBE",
436
+ "CURRENT", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
437
+ "CURRENT_USER", "DATA", "DATABASE", "DATABASES", "DAY", "DBPROPERTIES",
438
+ "DEFINED", "DELETE", "DELIMITED", "DESC", "DESCRIBE", "DFS", "DIRECTORIES",
439
+ "DIRECTORY", "DISTINCT", "DISTRIBUTE", "DROP", "ELSE", "END", "ESCAPE",
440
+ "ESCAPED", "EXCEPT", "EXCHANGE", "EXISTS", "EXPLAIN", "EXPORT", "EXTENDED",
441
+ "EXTERNAL", "EXTRACT", "FALSE", "FETCH", "FIELDS", "FILTER", "FILEFORMAT",
442
+ "FIRST", "FOLLOWING", "FOR", "FOREIGN", "FORMAT", "FORMATTED", "FROM",
443
+ "FULL", "FUNCTION", "FUNCTIONS", "GLOBAL", "GRANT", "GROUP", "GROUPING",
444
+ "HAVING", "HOUR", "IF", "IGNORE", "IMPORT", "IN", "INDEX", "INDEXES",
445
+ "INNER", "INPATH", "INPUTFORMAT", "INSERT", "INTERSECT", "INTERVAL",
446
+ "INTO", "IS", "ITEMS", "JOIN", "KEYS", "LAST", "LATERAL", "LAZY", "LEADING",
447
+ "LEFT", "LIKE", "LIMIT", "LINES", "LIST", "LOAD", "LOCAL", "LOCATION",
448
+ "LOCK", "LOCKS", "LOGICAL", "MACRO", "MAP", "MATCHED", "MERGE", "MINUTE",
449
+ "MONTH", "MSCK", "NAMESPACE", "NAMESPACES", "NATURAL", "NO", "NOT", "NULL",
450
+ "NULLS", "OF", "ON", "ONLY", "OPTION", "OPTIONS", "OR", "ORDER", "OUT",
451
+ "OUTER", "OUTPUTFORMAT", "OVER", "OVERLAPS", "OVERLAY", "OVERWRITE",
452
+ "PARTITION", "PARTITIONED", "PARTITIONS", "PERCENT", "PLACING", "POSITION",
453
+ "PRECEDING", "PRIMARY", "PRINCIPALS", "PROPERTIES", "PURGE", "QUERY",
454
+ "RANGE", "RECORDREADER", "RECORDWRITER", "RECOVER", "REDUCE", "REFERENCES",
455
+ "REFRESH", "RENAME", "REPAIR", "REPLACE", "RESET", "RESTRICT", "REVOKE",
456
+ "RIGHT", "RLIKE", "ROLE", "ROLES", "ROLLBACK", "ROLLUP", "ROW", "ROWS",
457
+ "SCHEMA", "SCHEMAS", "SECOND", "SELECT", "SEMI", "SEPARATED", "SERDE",
458
+ "SERDEPROPERTIES", "SESSION_USER", "SET", "SETS", "SHOW", "SKEWED", "SOME",
459
+ "SORT", "SORTED", "START", "STATISTICS", "STORED", "STRATIFY", "STRUCT",
460
+ "SUBSTR", "SUBSTRING", "TABLE", "TABLES", "TABLESAMPLE", "TBLPROPERTIES",
461
+ "TEMP", "TEMPORARY", "TERMINATED", "THEN", "TO", "TOUCH", "TRAILING",
462
+ "TRANSACTION", "TRANSACTIONS", "TRANSFORM", "TRIM", "TRUE", "TRUNCATE",
463
+ "TYPE", "UNARCHIVE", "UNBOUNDED", "UNCACHE", "UNION", "UNIQUE", "UNKNOWN",
464
+ "UNLOCK", "UNSET", "UPDATE", "USE", "USER", "USING", "VALUES", "VIEW",
465
+ "VIEWS", "WHEN", "WHERE", "WINDOW", "WITH", "YEAR"
466
+ ],
467
+ },
468
+ "bigquery": {
469
+ "quote_start": '`',
470
+ "quote_end": '`',
471
+ "case_sensitivity": "PRESERVE", # BigQuery preserves case
472
+ "reserved_keywords": [
473
+ "ALL", "AND", "ANY", "ARRAY", "AS", "ASC", "ASSERT_ROWS_MODIFIED",
474
+ "AT", "BETWEEN", "BY", "CASE", "CAST", "COLLATE", "CONTAINS", "CREATE",
475
+ "CROSS", "CUBE", "CURRENT", "DEFAULT", "DEFINE", "DESC", "DISTINCT",
476
+ "ELSE", "END", "ENUM", "ESCAPE", "EXCEPT", "EXCLUDE", "EXISTS",
477
+ "EXTRACT", "FALSE", "FETCH", "FOLLOWING", "FOR", "FROM", "FULL",
478
+ "GROUP", "GROUPING", "GROUPS", "HASH", "HAVING", "IF", "IGNORE",
479
+ "IN", "INNER", "INTERSECT", "INTERVAL", "INTO", "IS", "JOIN",
480
+ "LATERAL", "LEFT", "LIKE", "LIMIT", "LOOKUP", "MERGE", "NATURAL",
481
+ "NEW", "NO", "NOT", "NULL", "NULLS", "OF", "ON", "OR", "ORDER",
482
+ "OUTER", "OVER", "PARTITION", "PRECEDING", "PROTO", "RANGE",
483
+ "RECURSIVE", "RESPECT", "RIGHT", "ROLLUP", "ROWS", "SELECT", "SET",
484
+ "SOME", "STRUCT", "TABLESAMPLE", "THEN", "TO", "TREAT", "TRUE",
485
+ "UNBOUNDED", "UNION", "UNNEST", "USING", "WHEN", "WHERE", "WINDOW",
486
+ "WITH", "WITHIN"
487
+ ],
488
+ },
489
+ "mysql": {
490
+ "quote_start": '`',
491
+ "quote_end": '`',
492
+ "case_sensitivity": "PRESERVE", # Depends on collation, default preserve
493
+ "reserved_keywords": [
494
+ "ACCESSIBLE", "ADD", "ALL", "ALTER", "ANALYZE", "AND", "AS", "ASC",
495
+ "ASENSITIVE", "BEFORE", "BETWEEN", "BIGINT", "BINARY", "BLOB", "BOTH",
496
+ "BY", "CALL", "CASCADE", "CASE", "CHANGE", "CHAR", "CHARACTER", "CHECK",
497
+ "COLLATE", "COLUMN", "CONDITION", "CONSTRAINT", "CONTINUE", "CONVERT",
498
+ "CREATE", "CROSS", "CUBE", "CUME_DIST", "CURRENT_DATE", "CURRENT_TIME",
499
+ "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "DATABASE", "DATABASES",
500
+ "DAY_HOUR", "DAY_MICROSECOND", "DAY_MINUTE", "DAY_SECOND", "DEC",
501
+ "DECIMAL", "DECLARE", "DEFAULT", "DELAYED", "DELETE", "DENSE_RANK",
502
+ "DESC", "DESCRIBE", "DETERMINISTIC", "DISTINCT", "DISTINCTROW", "DIV",
503
+ "DOUBLE", "DROP", "DUAL", "EACH", "ELSE", "ELSEIF", "EMPTY", "ENCLOSED",
504
+ "ESCAPED", "EXCEPT", "EXISTS", "EXIT", "EXPLAIN", "FALSE", "FETCH",
505
+ "FIRST_VALUE", "FLOAT", "FLOAT4", "FLOAT8", "FOR", "FORCE", "FOREIGN",
506
+ "FROM", "FULLTEXT", "FUNCTION", "GENERATED", "GET", "GRANT", "GROUP",
507
+ "GROUPING", "GROUPS", "HAVING", "HIGH_PRIORITY", "HOUR_MICROSECOND",
508
+ "HOUR_MINUTE", "HOUR_SECOND", "IF", "IGNORE", "IN", "INDEX", "INFILE",
509
+ "INNER", "INOUT", "INSENSITIVE", "INSERT", "INT", "INT1", "INT2", "INT3",
510
+ "INT4", "INT8", "INTEGER", "INTERVAL", "INTO", "IO_AFTER_GTIDS",
511
+ "IO_BEFORE_GTIDS", "IS", "ITERATE", "JOIN", "JSON_TABLE", "KEY", "KEYS",
512
+ "KILL", "LAG", "LAST_VALUE", "LATERAL", "LEAD", "LEADING", "LEAVE",
513
+ "LEFT", "LIKE", "LIMIT", "LINEAR", "LINES", "LOAD", "LOCALTIME",
514
+ "LOCALTIMESTAMP", "LOCK", "LONG", "LONGBLOB", "LONGTEXT", "LOOP",
515
+ "LOW_PRIORITY", "MASTER_BIND", "MASTER_SSL_VERIFY_SERVER_CERT", "MATCH",
516
+ "MAXVALUE", "MEDIUMBLOB", "MEDIUMINT", "MEDIUMTEXT", "MIDDLEINT",
517
+ "MINUTE_MICROSECOND", "MINUTE_SECOND", "MOD", "MODIFIES", "NATURAL",
518
+ "NOT", "NO_WRITE_TO_BINLOG", "NTH_VALUE", "NTILE", "NULL", "NUMERIC",
519
+ "OF", "ON", "OPTIMIZE", "OPTIMIZER_COSTS", "OPTION", "OPTIONALLY",
520
+ "OR", "ORDER", "OUT", "OUTER", "OUTFILE", "OVER", "PARTITION",
521
+ "PERCENT_RANK", "PRECISION", "PRIMARY", "PROCEDURE", "PURGE", "RANGE",
522
+ "RANK", "READ", "READS", "READ_WRITE", "REAL", "RECURSIVE", "REFERENCES",
523
+ "REGEXP", "RELEASE", "RENAME", "REPEAT", "REPLACE", "REQUIRE", "RESIGNAL",
524
+ "RESTRICT", "RETURN", "REVOKE", "RIGHT", "RLIKE", "ROW", "ROWS",
525
+ "ROW_NUMBER", "SCHEMA", "SCHEMAS", "SECOND_MICROSECOND", "SELECT",
526
+ "SENSITIVE", "SEPARATOR", "SET", "SHOW", "SIGNAL", "SMALLINT", "SPATIAL",
527
+ "SPECIFIC", "SQL", "SQLEXCEPTION", "SQLSTATE", "SQLWARNING",
528
+ "SQL_BIG_RESULT", "SQL_CALC_FOUND_ROWS", "SQL_SMALL_RESULT", "SSL",
529
+ "STARTING", "STORED", "STRAIGHT_JOIN", "SYSTEM", "TABLE", "TERMINATED",
530
+ "THEN", "TINYBLOB", "TINYINT", "TINYTEXT", "TO", "TRAILING", "TRIGGER",
531
+ "TRUE", "UNDO", "UNION", "UNIQUE", "UNLOCK", "UNSIGNED", "UPDATE",
532
+ "USAGE", "USE", "USING", "UTC_DATE", "UTC_TIME", "UTC_TIMESTAMP",
533
+ "VALUES", "VARBINARY", "VARCHAR", "VARCHARACTER", "VARYING", "VIRTUAL",
534
+ "WHEN", "WHERE", "WHILE", "WINDOW", "WITH", "WRITE", "XOR", "YEAR_MONTH",
535
+ "ZEROFILL"
536
+ ],
537
+ },
538
+ "redshift": {
539
+ "quote_start": '"',
540
+ "quote_end": '"',
541
+ "case_sensitivity": "LOWER", # Redshift folds to lowercase
542
+ "reserved_keywords": [
543
+ "AES128", "AES256", "ALL", "ALLOWOVERWRITE", "ANALYSE", "ANALYZE",
544
+ "AND", "ANY", "ARRAY", "AS", "ASC", "AUTHORIZATION", "BACKUP",
545
+ "BETWEEN", "BINARY", "BLANKSASNULL", "BOTH", "BYTEDICT", "BZIP2",
546
+ "CASE", "CAST", "CHECK", "COLLATE", "COLUMN", "CONSTRAINT", "CREATE",
547
+ "CREDENTIALS", "CROSS", "CURRENT_DATE", "CURRENT_TIME",
548
+ "CURRENT_TIMESTAMP", "CURRENT_USER", "CURRENT_USER_ID", "DEFAULT",
549
+ "DEFERRABLE", "DEFLATE", "DEFRAG", "DELTA", "DELTA32K", "DESC",
550
+ "DISABLE", "DISTINCT", "DO", "ELSE", "EMPTYASNULL", "ENABLE", "ENCODE",
551
+ "ENCRYPT", "ENCRYPTION", "END", "EXCEPT", "EXPLICIT", "FALSE", "FOR",
552
+ "FOREIGN", "FREEZE", "FROM", "FULL", "GLOBALDICT256", "GLOBALDICT64K",
553
+ "GRANT", "GROUP", "GZIP", "HAVING", "IDENTITY", "IGNORE", "ILIKE",
554
+ "IN", "INITIALLY", "INNER", "INTERSECT", "INTO", "IS", "ISNULL",
555
+ "JOIN", "LANGUAGE", "LEADING", "LEFT", "LIKE", "LIMIT", "LOCALTIME",
556
+ "LOCALTIMESTAMP", "LUN", "LUNS", "LZO", "LZOP", "MINUS", "MOSTLY13",
557
+ "MOSTLY32", "MOSTLY8", "NATURAL", "NEW", "NOT", "NOTNULL", "NULL",
558
+ "NULLS", "OFF", "OFFLINE", "OFFSET", "OID", "OLD", "ON", "ONLY",
559
+ "OPEN", "OR", "ORDER", "OUTER", "OVERLAPS", "PARALLEL", "PARTITION",
560
+ "PERCENT", "PERMISSIONS", "PLACING", "PRIMARY", "RAW", "READRATIO",
561
+ "RECOVER", "REFERENCES", "RESPECT", "REJECTLOG", "RESORT", "RESTORE",
562
+ "RIGHT", "SELECT", "SESSION_USER", "SIMILAR", "SNAPSHOT", "SOME",
563
+ "SYSDATE", "SYSTEM", "TABLE", "TAG", "TDES", "TEXT255", "TEXT32K",
564
+ "THEN", "TIMESTAMP", "TO", "TOP", "TRAILING", "TRUE", "TRUNCATECOLUMNS",
565
+ "UNION", "UNIQUE", "USER", "USING", "VERBOSE", "WALLET", "WHEN",
566
+ "WHERE", "WITH", "WITHOUT"
567
+ ],
568
+ },
569
+ "oracle": {
570
+ "quote_start": '"',
571
+ "quote_end": '"',
572
+ "case_sensitivity": "UPPER", # Oracle folds to uppercase
573
+ "reserved_keywords": [
574
+ "ACCESS", "ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUDIT",
575
+ "BETWEEN", "BY", "CHAR", "CHECK", "CLUSTER", "COLUMN", "COLUMN_VALUE",
576
+ "COMMENT", "COMPRESS", "CONNECT", "CREATE", "CURRENT", "DATE",
577
+ "DECIMAL", "DEFAULT", "DELETE", "DESC", "DISTINCT", "DROP", "ELSE",
578
+ "EXCLUSIVE", "EXISTS", "FILE", "FLOAT", "FOR", "FROM", "GRANT",
579
+ "GROUP", "HAVING", "IDENTIFIED", "IMMEDIATE", "IN", "INCREMENT",
580
+ "INDEX", "INITIAL", "INSERT", "INTEGER", "INTERSECT", "INTO", "IS",
581
+ "LEVEL", "LIKE", "LOCK", "LONG", "MAXEXTENTS", "MINUS", "MLSLABEL",
582
+ "MODE", "MODIFY", "NESTED_TABLE_ID", "NOAUDIT", "NOCOMPRESS", "NOT",
583
+ "NOWAIT", "NULL", "NUMBER", "OF", "OFFLINE", "ON", "ONLINE", "OPTION",
584
+ "OR", "ORDER", "PCTFREE", "PRIOR", "PUBLIC", "RAW", "RENAME",
585
+ "RESOURCE", "REVOKE", "ROW", "ROWID", "ROWNUM", "ROWS", "SELECT",
586
+ "SESSION", "SET", "SHARE", "SIZE", "SMALLINT", "START", "SUCCESSFUL",
587
+ "SYNONYM", "SYSDATE", "TABLE", "THEN", "TO", "TRIGGER", "UID", "UNION",
588
+ "UNIQUE", "UPDATE", "USER", "VALIDATE", "VALUES", "VARCHAR", "VARCHAR2",
589
+ "VIEW", "WHENEVER", "WHERE", "WITH"
590
+ ],
591
+ },
592
+ "sqlserver": {
593
+ "quote_start": '[',
594
+ "quote_end": ']',
595
+ "case_sensitivity": "PRESERVE", # Depends on collation
596
+ "reserved_keywords": [
597
+ "ADD", "ALL", "ALTER", "AND", "ANY", "AS", "ASC", "AUTHORIZATION",
598
+ "BACKUP", "BEGIN", "BETWEEN", "BREAK", "BROWSE", "BULK", "BY",
599
+ "CASCADE", "CASE", "CHECK", "CHECKPOINT", "CLOSE", "CLUSTERED",
600
+ "COALESCE", "COLLATE", "COLUMN", "COMMIT", "COMPUTE", "CONSTRAINT",
601
+ "CONTAINS", "CONTAINSTABLE", "CONTINUE", "CONVERT", "CREATE", "CROSS",
602
+ "CURRENT", "CURRENT_DATE", "CURRENT_TIME", "CURRENT_TIMESTAMP",
603
+ "CURRENT_USER", "CURSOR", "DATABASE", "DBCC", "DEALLOCATE", "DECLARE",
604
+ "DEFAULT", "DELETE", "DENY", "DESC", "DISK", "DISTINCT", "DISTRIBUTED",
605
+ "DOUBLE", "DROP", "DUMP", "ELSE", "END", "ERRLVL", "ESCAPE", "EXCEPT",
606
+ "EXEC", "EXECUTE", "EXISTS", "EXIT", "EXTERNAL", "FETCH", "FILE",
607
+ "FILLFACTOR", "FOR", "FOREIGN", "FREETEXT", "FREETEXTTABLE", "FROM",
608
+ "FULL", "FUNCTION", "GOTO", "GRANT", "GROUP", "HAVING", "HOLDLOCK",
609
+ "IDENTITY", "IDENTITY_INSERT", "IDENTITYCOL", "IF", "IN", "INDEX",
610
+ "INNER", "INSERT", "INTERSECT", "INTO", "IS", "JOIN", "KEY", "KILL",
611
+ "LEFT", "LIKE", "LINENO", "LOAD", "MERGE", "NATIONAL", "NOCHECK",
612
+ "NONCLUSTERED", "NOT", "NULL", "NULLIF", "OF", "OFF", "OFFSETS", "ON",
613
+ "OPEN", "OPENDATASOURCE", "OPENQUERY", "OPENROWSET", "OPENXML",
614
+ "OPTION", "OR", "ORDER", "OUTER", "OVER", "PERCENT", "PIVOT", "PLAN",
615
+ "PRECISION", "PRIMARY", "PRINT", "PROC", "PROCEDURE", "PUBLIC",
616
+ "RAISERROR", "READ", "READTEXT", "RECONFIGURE", "REFERENCES",
617
+ "REPLICATION", "RESTORE", "RESTRICT", "RETURN", "REVERT", "REVOKE",
618
+ "RIGHT", "ROLLBACK", "ROWCOUNT", "ROWGUIDCOL", "RULE", "SAVE",
619
+ "SCHEMA", "SECURITYAUDIT", "SELECT", "SEMANTICKEYPHRASETABLE",
620
+ "SEMANTICSIMILARITYDETAILSTABLE", "SEMANTICSIMILARITYTABLE",
621
+ "SESSION_USER", "SET", "SETUSER", "SHUTDOWN", "SOME", "STATISTICS",
622
+ "SYSTEM_USER", "TABLE", "TABLESAMPLE", "TEXTSIZE", "THEN", "TO",
623
+ "TOP", "TRAN", "TRANSACTION", "TRIGGER", "TRUNCATE", "TRY_CONVERT",
624
+ "TSEQUAL", "UNION", "UNIQUE", "UNPIVOT", "UPDATE", "UPDATETEXT",
625
+ "USE", "USER", "VALUES", "VARYING", "VIEW", "WAITFOR", "WHEN",
626
+ "WHERE", "WHILE", "WITH", "WITHIN GROUP", "WRITETEXT"
627
+ ],
628
+ },
629
+ }
630
+
631
+ @classmethod
632
+ def get_syntax_rule(cls, adapter_name: str) -> Optional[Dict[str, Any]]:
633
+ """Get syntax rules for a specific adapter."""
634
+ return cls.SYNTAX_RULES.get(adapter_name.lower())
635
+
636
+ @classmethod
637
+ def quote_identifier(cls, adapter_name: str, identifier: str) -> str:
638
+ """Quote an identifier using the adapter's quoting rules."""
639
+ rule = cls.get_syntax_rule(adapter_name)
640
+ if not rule:
641
+ return f'"{identifier}"' # Default to double quotes
642
+ return f'{rule["quote_start"]}{identifier}{rule["quote_end"]}'
643
+
644
+ @classmethod
645
+ def needs_quoting(cls, adapter_name: str, identifier: str) -> bool:
646
+ """Check if an identifier needs quoting (reserved keyword or special chars)."""
647
+ rule = cls.get_syntax_rule(adapter_name)
648
+ if not rule:
649
+ return False
650
+
651
+ # Check if it's a reserved keyword
652
+ upper_id = identifier.upper()
653
+ if upper_id in rule.get("reserved_keywords", []):
654
+ return True
655
+
656
+ # Check for special characters or spaces
657
+ if not identifier.isidentifier() or ' ' in identifier or '-' in identifier:
658
+ return True
659
+
660
+ return False
661
+
662
+ @classmethod
663
+ def normalize_identifier(cls, adapter_name: str, identifier: str) -> str:
664
+ """Normalize an identifier based on the adapter's case sensitivity rules."""
665
+ rule = cls.get_syntax_rule(adapter_name)
666
+ if not rule:
667
+ return identifier
668
+
669
+ case_rule = rule.get("case_sensitivity", "PRESERVE")
670
+ if case_rule == "UPPER":
671
+ return identifier.upper()
672
+ elif case_rule == "LOWER":
673
+ return identifier.lower()
674
+ return identifier # PRESERVE