dvt-core 0.58.6__cp311-cp311-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. dbt/__init__.py +7 -0
  2. dbt/_pydantic_shim.py +26 -0
  3. dbt/artifacts/__init__.py +0 -0
  4. dbt/artifacts/exceptions/__init__.py +1 -0
  5. dbt/artifacts/exceptions/schemas.py +31 -0
  6. dbt/artifacts/resources/__init__.py +116 -0
  7. dbt/artifacts/resources/base.py +67 -0
  8. dbt/artifacts/resources/types.py +93 -0
  9. dbt/artifacts/resources/v1/analysis.py +10 -0
  10. dbt/artifacts/resources/v1/catalog.py +23 -0
  11. dbt/artifacts/resources/v1/components.py +274 -0
  12. dbt/artifacts/resources/v1/config.py +277 -0
  13. dbt/artifacts/resources/v1/documentation.py +11 -0
  14. dbt/artifacts/resources/v1/exposure.py +51 -0
  15. dbt/artifacts/resources/v1/function.py +52 -0
  16. dbt/artifacts/resources/v1/generic_test.py +31 -0
  17. dbt/artifacts/resources/v1/group.py +21 -0
  18. dbt/artifacts/resources/v1/hook.py +11 -0
  19. dbt/artifacts/resources/v1/macro.py +29 -0
  20. dbt/artifacts/resources/v1/metric.py +172 -0
  21. dbt/artifacts/resources/v1/model.py +145 -0
  22. dbt/artifacts/resources/v1/owner.py +10 -0
  23. dbt/artifacts/resources/v1/saved_query.py +111 -0
  24. dbt/artifacts/resources/v1/seed.py +41 -0
  25. dbt/artifacts/resources/v1/semantic_layer_components.py +72 -0
  26. dbt/artifacts/resources/v1/semantic_model.py +314 -0
  27. dbt/artifacts/resources/v1/singular_test.py +14 -0
  28. dbt/artifacts/resources/v1/snapshot.py +91 -0
  29. dbt/artifacts/resources/v1/source_definition.py +84 -0
  30. dbt/artifacts/resources/v1/sql_operation.py +10 -0
  31. dbt/artifacts/resources/v1/unit_test_definition.py +77 -0
  32. dbt/artifacts/schemas/__init__.py +0 -0
  33. dbt/artifacts/schemas/base.py +191 -0
  34. dbt/artifacts/schemas/batch_results.py +24 -0
  35. dbt/artifacts/schemas/catalog/__init__.py +11 -0
  36. dbt/artifacts/schemas/catalog/v1/__init__.py +0 -0
  37. dbt/artifacts/schemas/catalog/v1/catalog.py +59 -0
  38. dbt/artifacts/schemas/freshness/__init__.py +1 -0
  39. dbt/artifacts/schemas/freshness/v3/__init__.py +0 -0
  40. dbt/artifacts/schemas/freshness/v3/freshness.py +158 -0
  41. dbt/artifacts/schemas/manifest/__init__.py +2 -0
  42. dbt/artifacts/schemas/manifest/v12/__init__.py +0 -0
  43. dbt/artifacts/schemas/manifest/v12/manifest.py +211 -0
  44. dbt/artifacts/schemas/results.py +147 -0
  45. dbt/artifacts/schemas/run/__init__.py +2 -0
  46. dbt/artifacts/schemas/run/v5/__init__.py +0 -0
  47. dbt/artifacts/schemas/run/v5/run.py +184 -0
  48. dbt/artifacts/schemas/upgrades/__init__.py +4 -0
  49. dbt/artifacts/schemas/upgrades/upgrade_manifest.py +174 -0
  50. dbt/artifacts/schemas/upgrades/upgrade_manifest_dbt_version.py +2 -0
  51. dbt/artifacts/utils/validation.py +153 -0
  52. dbt/cli/__init__.py +1 -0
  53. dbt/cli/context.py +17 -0
  54. dbt/cli/exceptions.py +57 -0
  55. dbt/cli/flags.py +560 -0
  56. dbt/cli/main.py +2403 -0
  57. dbt/cli/option_types.py +121 -0
  58. dbt/cli/options.py +80 -0
  59. dbt/cli/params.py +844 -0
  60. dbt/cli/requires.py +490 -0
  61. dbt/cli/resolvers.py +50 -0
  62. dbt/cli/types.py +40 -0
  63. dbt/clients/__init__.py +0 -0
  64. dbt/clients/checked_load.py +83 -0
  65. dbt/clients/git.py +164 -0
  66. dbt/clients/jinja.py +206 -0
  67. dbt/clients/jinja_static.py +245 -0
  68. dbt/clients/registry.py +192 -0
  69. dbt/clients/yaml_helper.py +68 -0
  70. dbt/compilation.py +876 -0
  71. dbt/compute/__init__.py +14 -0
  72. dbt/compute/engines/__init__.py +12 -0
  73. dbt/compute/engines/spark_engine.cpython-311-darwin.so +0 -0
  74. dbt/compute/engines/spark_engine.py +642 -0
  75. dbt/compute/federated_executor.cpython-311-darwin.so +0 -0
  76. dbt/compute/federated_executor.py +1080 -0
  77. dbt/compute/filter_pushdown.cpython-311-darwin.so +0 -0
  78. dbt/compute/filter_pushdown.py +273 -0
  79. dbt/compute/jar_provisioning.cpython-311-darwin.so +0 -0
  80. dbt/compute/jar_provisioning.py +255 -0
  81. dbt/compute/java_compat.cpython-311-darwin.so +0 -0
  82. dbt/compute/java_compat.py +689 -0
  83. dbt/compute/jdbc_utils.cpython-311-darwin.so +0 -0
  84. dbt/compute/jdbc_utils.py +678 -0
  85. dbt/compute/metadata/__init__.py +40 -0
  86. dbt/compute/metadata/adapters_registry.cpython-311-darwin.so +0 -0
  87. dbt/compute/metadata/adapters_registry.py +370 -0
  88. dbt/compute/metadata/registry.cpython-311-darwin.so +0 -0
  89. dbt/compute/metadata/registry.py +674 -0
  90. dbt/compute/metadata/store.cpython-311-darwin.so +0 -0
  91. dbt/compute/metadata/store.py +1499 -0
  92. dbt/compute/smart_selector.cpython-311-darwin.so +0 -0
  93. dbt/compute/smart_selector.py +377 -0
  94. dbt/compute/strategies/__init__.py +55 -0
  95. dbt/compute/strategies/base.cpython-311-darwin.so +0 -0
  96. dbt/compute/strategies/base.py +165 -0
  97. dbt/compute/strategies/dataproc.cpython-311-darwin.so +0 -0
  98. dbt/compute/strategies/dataproc.py +207 -0
  99. dbt/compute/strategies/emr.cpython-311-darwin.so +0 -0
  100. dbt/compute/strategies/emr.py +203 -0
  101. dbt/compute/strategies/local.cpython-311-darwin.so +0 -0
  102. dbt/compute/strategies/local.py +443 -0
  103. dbt/compute/strategies/standalone.cpython-311-darwin.so +0 -0
  104. dbt/compute/strategies/standalone.py +262 -0
  105. dbt/config/__init__.py +4 -0
  106. dbt/config/catalogs.py +94 -0
  107. dbt/config/compute.cpython-311-darwin.so +0 -0
  108. dbt/config/compute.py +513 -0
  109. dbt/config/dvt_profile.cpython-311-darwin.so +0 -0
  110. dbt/config/dvt_profile.py +342 -0
  111. dbt/config/profile.py +422 -0
  112. dbt/config/project.py +873 -0
  113. dbt/config/project_utils.py +28 -0
  114. dbt/config/renderer.py +231 -0
  115. dbt/config/runtime.py +553 -0
  116. dbt/config/selectors.py +208 -0
  117. dbt/config/utils.py +77 -0
  118. dbt/constants.py +28 -0
  119. dbt/context/__init__.py +0 -0
  120. dbt/context/base.py +745 -0
  121. dbt/context/configured.py +135 -0
  122. dbt/context/context_config.py +382 -0
  123. dbt/context/docs.py +82 -0
  124. dbt/context/exceptions_jinja.py +178 -0
  125. dbt/context/macro_resolver.py +195 -0
  126. dbt/context/macros.py +171 -0
  127. dbt/context/manifest.py +72 -0
  128. dbt/context/providers.py +2249 -0
  129. dbt/context/query_header.py +13 -0
  130. dbt/context/secret.py +58 -0
  131. dbt/context/target.py +74 -0
  132. dbt/contracts/__init__.py +0 -0
  133. dbt/contracts/files.py +413 -0
  134. dbt/contracts/graph/__init__.py +0 -0
  135. dbt/contracts/graph/manifest.py +1904 -0
  136. dbt/contracts/graph/metrics.py +97 -0
  137. dbt/contracts/graph/model_config.py +70 -0
  138. dbt/contracts/graph/node_args.py +42 -0
  139. dbt/contracts/graph/nodes.py +1806 -0
  140. dbt/contracts/graph/semantic_manifest.py +232 -0
  141. dbt/contracts/graph/unparsed.py +811 -0
  142. dbt/contracts/project.py +417 -0
  143. dbt/contracts/results.py +53 -0
  144. dbt/contracts/selection.py +23 -0
  145. dbt/contracts/sql.py +85 -0
  146. dbt/contracts/state.py +68 -0
  147. dbt/contracts/util.py +46 -0
  148. dbt/deprecations.py +348 -0
  149. dbt/deps/__init__.py +0 -0
  150. dbt/deps/base.py +152 -0
  151. dbt/deps/git.py +195 -0
  152. dbt/deps/local.py +79 -0
  153. dbt/deps/registry.py +130 -0
  154. dbt/deps/resolver.py +149 -0
  155. dbt/deps/tarball.py +120 -0
  156. dbt/docs/source/_ext/dbt_click.py +119 -0
  157. dbt/docs/source/conf.py +32 -0
  158. dbt/env_vars.py +64 -0
  159. dbt/event_time/event_time.py +40 -0
  160. dbt/event_time/sample_window.py +60 -0
  161. dbt/events/__init__.py +15 -0
  162. dbt/events/base_types.py +36 -0
  163. dbt/events/core_types_pb2.py +2 -0
  164. dbt/events/logging.py +108 -0
  165. dbt/events/types.py +2516 -0
  166. dbt/exceptions.py +1486 -0
  167. dbt/flags.py +89 -0
  168. dbt/graph/__init__.py +11 -0
  169. dbt/graph/cli.py +249 -0
  170. dbt/graph/graph.py +172 -0
  171. dbt/graph/queue.py +214 -0
  172. dbt/graph/selector.py +374 -0
  173. dbt/graph/selector_methods.py +975 -0
  174. dbt/graph/selector_spec.py +222 -0
  175. dbt/graph/thread_pool.py +18 -0
  176. dbt/hooks.py +21 -0
  177. dbt/include/README.md +49 -0
  178. dbt/include/__init__.py +3 -0
  179. dbt/include/data/adapters_registry.duckdb +0 -0
  180. dbt/include/data/build_registry.py +242 -0
  181. dbt/include/data/csv/adapter_queries.csv +33 -0
  182. dbt/include/data/csv/syntax_rules.csv +9 -0
  183. dbt/include/data/csv/type_mappings_bigquery.csv +28 -0
  184. dbt/include/data/csv/type_mappings_databricks.csv +30 -0
  185. dbt/include/data/csv/type_mappings_mysql.csv +40 -0
  186. dbt/include/data/csv/type_mappings_oracle.csv +30 -0
  187. dbt/include/data/csv/type_mappings_postgres.csv +56 -0
  188. dbt/include/data/csv/type_mappings_redshift.csv +33 -0
  189. dbt/include/data/csv/type_mappings_snowflake.csv +38 -0
  190. dbt/include/data/csv/type_mappings_sqlserver.csv +35 -0
  191. dbt/include/starter_project/.gitignore +4 -0
  192. dbt/include/starter_project/README.md +15 -0
  193. dbt/include/starter_project/__init__.py +3 -0
  194. dbt/include/starter_project/analyses/.gitkeep +0 -0
  195. dbt/include/starter_project/dbt_project.yml +36 -0
  196. dbt/include/starter_project/macros/.gitkeep +0 -0
  197. dbt/include/starter_project/models/example/my_first_dbt_model.sql +27 -0
  198. dbt/include/starter_project/models/example/my_second_dbt_model.sql +6 -0
  199. dbt/include/starter_project/models/example/schema.yml +21 -0
  200. dbt/include/starter_project/seeds/.gitkeep +0 -0
  201. dbt/include/starter_project/snapshots/.gitkeep +0 -0
  202. dbt/include/starter_project/tests/.gitkeep +0 -0
  203. dbt/internal_deprecations.py +26 -0
  204. dbt/jsonschemas/__init__.py +3 -0
  205. dbt/jsonschemas/jsonschemas.py +309 -0
  206. dbt/jsonschemas/project/0.0.110.json +4717 -0
  207. dbt/jsonschemas/project/0.0.85.json +2015 -0
  208. dbt/jsonschemas/resources/0.0.110.json +2636 -0
  209. dbt/jsonschemas/resources/0.0.85.json +2536 -0
  210. dbt/jsonschemas/resources/latest.json +6773 -0
  211. dbt/links.py +4 -0
  212. dbt/materializations/__init__.py +0 -0
  213. dbt/materializations/incremental/__init__.py +0 -0
  214. dbt/materializations/incremental/microbatch.py +236 -0
  215. dbt/mp_context.py +8 -0
  216. dbt/node_types.py +37 -0
  217. dbt/parser/__init__.py +23 -0
  218. dbt/parser/analysis.py +21 -0
  219. dbt/parser/base.py +548 -0
  220. dbt/parser/common.py +266 -0
  221. dbt/parser/docs.py +52 -0
  222. dbt/parser/fixtures.py +51 -0
  223. dbt/parser/functions.py +30 -0
  224. dbt/parser/generic_test.py +100 -0
  225. dbt/parser/generic_test_builders.py +333 -0
  226. dbt/parser/hooks.py +118 -0
  227. dbt/parser/macros.py +137 -0
  228. dbt/parser/manifest.py +2204 -0
  229. dbt/parser/models.py +573 -0
  230. dbt/parser/partial.py +1178 -0
  231. dbt/parser/read_files.py +445 -0
  232. dbt/parser/schema_generic_tests.py +422 -0
  233. dbt/parser/schema_renderer.py +111 -0
  234. dbt/parser/schema_yaml_readers.py +935 -0
  235. dbt/parser/schemas.py +1466 -0
  236. dbt/parser/search.py +149 -0
  237. dbt/parser/seeds.py +28 -0
  238. dbt/parser/singular_test.py +20 -0
  239. dbt/parser/snapshots.py +44 -0
  240. dbt/parser/sources.py +558 -0
  241. dbt/parser/sql.py +62 -0
  242. dbt/parser/unit_tests.py +621 -0
  243. dbt/plugins/__init__.py +20 -0
  244. dbt/plugins/contracts.py +9 -0
  245. dbt/plugins/exceptions.py +2 -0
  246. dbt/plugins/manager.py +163 -0
  247. dbt/plugins/manifest.py +21 -0
  248. dbt/profiler.py +20 -0
  249. dbt/py.typed +1 -0
  250. dbt/query_analyzer.cpython-311-darwin.so +0 -0
  251. dbt/query_analyzer.py +410 -0
  252. dbt/runners/__init__.py +2 -0
  253. dbt/runners/exposure_runner.py +7 -0
  254. dbt/runners/no_op_runner.py +45 -0
  255. dbt/runners/saved_query_runner.py +7 -0
  256. dbt/selected_resources.py +8 -0
  257. dbt/task/__init__.py +0 -0
  258. dbt/task/base.py +503 -0
  259. dbt/task/build.py +197 -0
  260. dbt/task/clean.py +56 -0
  261. dbt/task/clone.py +161 -0
  262. dbt/task/compile.py +150 -0
  263. dbt/task/compute.cpython-311-darwin.so +0 -0
  264. dbt/task/compute.py +458 -0
  265. dbt/task/debug.py +505 -0
  266. dbt/task/deps.py +280 -0
  267. dbt/task/docs/__init__.py +3 -0
  268. dbt/task/docs/api/__init__.py +23 -0
  269. dbt/task/docs/api/catalog.cpython-311-darwin.so +0 -0
  270. dbt/task/docs/api/catalog.py +204 -0
  271. dbt/task/docs/api/lineage.cpython-311-darwin.so +0 -0
  272. dbt/task/docs/api/lineage.py +234 -0
  273. dbt/task/docs/api/profile.cpython-311-darwin.so +0 -0
  274. dbt/task/docs/api/profile.py +204 -0
  275. dbt/task/docs/api/spark.cpython-311-darwin.so +0 -0
  276. dbt/task/docs/api/spark.py +186 -0
  277. dbt/task/docs/generate.py +947 -0
  278. dbt/task/docs/index.html +250 -0
  279. dbt/task/docs/serve.cpython-311-darwin.so +0 -0
  280. dbt/task/docs/serve.py +174 -0
  281. dbt/task/dvt_output.py +362 -0
  282. dbt/task/dvt_run.py +204 -0
  283. dbt/task/freshness.py +322 -0
  284. dbt/task/function.py +121 -0
  285. dbt/task/group_lookup.py +46 -0
  286. dbt/task/init.cpython-311-darwin.so +0 -0
  287. dbt/task/init.py +604 -0
  288. dbt/task/java.cpython-311-darwin.so +0 -0
  289. dbt/task/java.py +316 -0
  290. dbt/task/list.py +236 -0
  291. dbt/task/metadata.cpython-311-darwin.so +0 -0
  292. dbt/task/metadata.py +804 -0
  293. dbt/task/printer.py +175 -0
  294. dbt/task/profile.cpython-311-darwin.so +0 -0
  295. dbt/task/profile.py +1307 -0
  296. dbt/task/profile_serve.py +615 -0
  297. dbt/task/retract.py +438 -0
  298. dbt/task/retry.py +175 -0
  299. dbt/task/run.py +1387 -0
  300. dbt/task/run_operation.py +141 -0
  301. dbt/task/runnable.py +758 -0
  302. dbt/task/seed.py +103 -0
  303. dbt/task/show.py +149 -0
  304. dbt/task/snapshot.py +56 -0
  305. dbt/task/spark.cpython-311-darwin.so +0 -0
  306. dbt/task/spark.py +414 -0
  307. dbt/task/sql.py +110 -0
  308. dbt/task/target_sync.cpython-311-darwin.so +0 -0
  309. dbt/task/target_sync.py +766 -0
  310. dbt/task/test.py +464 -0
  311. dbt/tests/fixtures/__init__.py +1 -0
  312. dbt/tests/fixtures/project.py +620 -0
  313. dbt/tests/util.py +651 -0
  314. dbt/tracking.py +529 -0
  315. dbt/utils/__init__.py +3 -0
  316. dbt/utils/artifact_upload.py +151 -0
  317. dbt/utils/utils.py +408 -0
  318. dbt/version.py +270 -0
  319. dvt_cli/__init__.py +72 -0
  320. dvt_core-0.58.6.dist-info/METADATA +288 -0
  321. dvt_core-0.58.6.dist-info/RECORD +324 -0
  322. dvt_core-0.58.6.dist-info/WHEEL +5 -0
  323. dvt_core-0.58.6.dist-info/entry_points.txt +2 -0
  324. dvt_core-0.58.6.dist-info/top_level.txt +2 -0
@@ -0,0 +1,40 @@
1
+ # =============================================================================
2
+ # DVT Metadata Layer
3
+ # =============================================================================
4
+ # Project-level metadata store using DuckDB for:
5
+ # - Type registry (adapter types → Spark types)
6
+ # - Syntax registry (quoting, case sensitivity per adapter)
7
+ # - Metadata snapshot (cached table/column info)
8
+ # - Profile results (v0.56.0 - dvt profile)
9
+ # - Catalog nodes (v0.56.0 - dvt docs generate)
10
+ # - Lineage edges (v0.56.0 - dvt docs generate)
11
+ #
12
+ # DVT v0.54.0: Initial implementation
13
+ # DVT v0.55.0: Added AdaptersRegistry for shipped registry database
14
+ # DVT v0.56.0: Added profile_results, catalog_nodes, lineage_edges tables
15
+ # =============================================================================
16
+
17
+ from dbt.compute.metadata.store import (
18
+ ProjectMetadataStore,
19
+ ColumnMetadata,
20
+ TableMetadata,
21
+ RowCountInfo,
22
+ ColumnProfileResult,
23
+ CatalogNode,
24
+ LineageEdge,
25
+ )
26
+ from dbt.compute.metadata.registry import TypeRegistry, SyntaxRegistry
27
+ from dbt.compute.metadata.adapters_registry import AdaptersRegistry
28
+
29
+ __all__ = [
30
+ "ProjectMetadataStore",
31
+ "ColumnMetadata",
32
+ "TableMetadata",
33
+ "RowCountInfo",
34
+ "ColumnProfileResult",
35
+ "CatalogNode",
36
+ "LineageEdge",
37
+ "TypeRegistry",
38
+ "SyntaxRegistry",
39
+ "AdaptersRegistry",
40
+ ]
@@ -0,0 +1,370 @@
1
+ # =============================================================================
2
+ # DVT Adapters Registry
3
+ # =============================================================================
4
+ # Read-only access to the shipped adapters_registry.duckdb database containing:
5
+ # - Type mappings (adapter -> Spark types)
6
+ # - Syntax rules (quoting, case sensitivity)
7
+ # - Adapter metadata queries (SQL templates)
8
+ #
9
+ # DVT v0.54.0: DuckDB-backed registry
10
+ # =============================================================================
11
+
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import Any, Dict, List, Optional
15
+ import re
16
+
17
+ try:
18
+ import duckdb
19
+ except ImportError:
20
+ duckdb = None # Will raise helpful error on first use
21
+
22
+
23
+ @dataclass
24
+ class TypeMapping:
25
+ """A single type mapping entry."""
26
+ adapter_name: str
27
+ adapter_type: str
28
+ spark_type: str
29
+ spark_version: str = "all"
30
+ is_complex: bool = False
31
+ cast_expression: Optional[str] = None
32
+ notes: Optional[str] = None
33
+
34
+
35
+ @dataclass
36
+ class SyntaxRule:
37
+ """Syntax rules for a specific adapter."""
38
+ adapter_name: str
39
+ quote_start: str
40
+ quote_end: str
41
+ case_sensitivity: str # 'lowercase', 'uppercase', 'case_insensitive'
42
+ reserved_keywords: List[str]
43
+
44
+
45
+ @dataclass
46
+ class AdapterQuery:
47
+ """SQL query template for adapter metadata extraction."""
48
+ adapter_name: str
49
+ query_type: str # 'columns', 'tables', 'row_count', 'primary_key'
50
+ query_template: str
51
+ notes: Optional[str] = None
52
+
53
+
54
+ class AdaptersRegistry:
55
+ """
56
+ Read-only access to the shipped adapters registry database.
57
+
58
+ This registry is shipped with DVT and provides:
59
+ - Type mappings between adapter native types and Spark types
60
+ - Syntax rules for SQL generation (quoting, case sensitivity)
61
+ - Query templates for metadata extraction
62
+
63
+ The registry is stored as a DuckDB database in the package's include/data directory.
64
+ """
65
+
66
+ _instance: Optional['AdaptersRegistry'] = None
67
+ _registry_path: Optional[Path] = None
68
+
69
+ def __new__(cls) -> 'AdaptersRegistry':
70
+ """Singleton pattern for registry access."""
71
+ if cls._instance is None:
72
+ cls._instance = super().__new__(cls)
73
+ cls._instance._conn = None
74
+ return cls._instance
75
+
76
+ @classmethod
77
+ def get_registry_path(cls) -> Path:
78
+ """Return path to the shipped adapters_registry.duckdb."""
79
+ if cls._registry_path is not None:
80
+ return cls._registry_path
81
+
82
+ # Find the package's include/data directory
83
+ # This file is at: core/dbt/compute/metadata/adapters_registry.py
84
+ # Registry is at: core/dbt/include/data/adapters_registry.duckdb
85
+ this_file = Path(__file__)
86
+ package_root = this_file.parent.parent.parent # -> core/dbt
87
+ registry_path = package_root / "include" / "data" / "adapters_registry.duckdb"
88
+
89
+ if not registry_path.exists():
90
+ raise FileNotFoundError(
91
+ f"Adapters registry not found at: {registry_path}\n"
92
+ "This file should be shipped with the DVT package. "
93
+ "Please reinstall DVT or rebuild the registry with build_registry.py"
94
+ )
95
+
96
+ cls._registry_path = registry_path
97
+ return registry_path
98
+
99
+ def _get_connection(self) -> 'duckdb.DuckDBPyConnection':
100
+ """Get or create a read-only connection to the registry."""
101
+ if duckdb is None:
102
+ raise ImportError(
103
+ "duckdb is required for the adapters registry. "
104
+ "Install with: pip install duckdb"
105
+ )
106
+
107
+ if self._conn is None:
108
+ registry_path = self.get_registry_path()
109
+ self._conn = duckdb.connect(str(registry_path), read_only=True)
110
+
111
+ return self._conn
112
+
113
+ def close(self) -> None:
114
+ """Close the registry connection."""
115
+ if self._conn is not None:
116
+ self._conn.close()
117
+ self._conn = None
118
+
119
+ # =========================================================================
120
+ # Type Mappings
121
+ # =========================================================================
122
+
123
+ def get_spark_type(
124
+ self,
125
+ adapter_name: str,
126
+ adapter_type: str,
127
+ spark_version: str = "all"
128
+ ) -> Optional[TypeMapping]:
129
+ """
130
+ Look up the Spark type mapping for a given adapter type.
131
+
132
+ :param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
133
+ :param adapter_type: Adapter's native type (e.g., 'INTEGER', 'VARCHAR')
134
+ :param spark_version: Target Spark version (default 'all')
135
+ :returns: TypeMapping or None if not found
136
+ """
137
+ conn = self._get_connection()
138
+
139
+ # Normalize inputs
140
+ adapter_name = adapter_name.lower()
141
+ adapter_type = adapter_type.upper().strip()
142
+
143
+ # Remove size specifiers: VARCHAR(255) -> VARCHAR
144
+ adapter_type_normalized = re.sub(r'\([^)]*\)', '', adapter_type).strip()
145
+
146
+ result = conn.execute("""
147
+ SELECT adapter_name, adapter_type, spark_type, spark_version,
148
+ is_complex, cast_expression, notes
149
+ FROM datatype_mappings
150
+ WHERE adapter_name = ?
151
+ AND adapter_type = ?
152
+ AND (spark_version = 'all' OR spark_version = ?)
153
+ ORDER BY
154
+ CASE WHEN spark_version = ? THEN 0 ELSE 1 END
155
+ LIMIT 1
156
+ """, [adapter_name, adapter_type_normalized, spark_version, spark_version]).fetchone()
157
+
158
+ if result:
159
+ return TypeMapping(
160
+ adapter_name=result[0],
161
+ adapter_type=result[1],
162
+ spark_type=result[2],
163
+ spark_version=result[3],
164
+ is_complex=result[4],
165
+ cast_expression=result[5],
166
+ notes=result[6]
167
+ )
168
+ return None
169
+
170
+ def get_all_mappings_for_adapter(self, adapter_name: str) -> List[TypeMapping]:
171
+ """Get all type mappings for a specific adapter."""
172
+ conn = self._get_connection()
173
+ adapter_name = adapter_name.lower()
174
+
175
+ results = conn.execute("""
176
+ SELECT adapter_name, adapter_type, spark_type, spark_version,
177
+ is_complex, cast_expression, notes
178
+ FROM datatype_mappings
179
+ WHERE adapter_name = ?
180
+ ORDER BY adapter_type
181
+ """, [adapter_name]).fetchall()
182
+
183
+ return [
184
+ TypeMapping(
185
+ adapter_name=row[0],
186
+ adapter_type=row[1],
187
+ spark_type=row[2],
188
+ spark_version=row[3],
189
+ is_complex=row[4],
190
+ cast_expression=row[5],
191
+ notes=row[6]
192
+ )
193
+ for row in results
194
+ ]
195
+
196
+ def get_supported_adapters(self) -> List[str]:
197
+ """Get list of all supported adapter names."""
198
+ conn = self._get_connection()
199
+ results = conn.execute("""
200
+ SELECT DISTINCT adapter_name FROM datatype_mappings ORDER BY adapter_name
201
+ """).fetchall()
202
+ return [row[0] for row in results]
203
+
204
+ # =========================================================================
205
+ # Syntax Rules
206
+ # =========================================================================
207
+
208
+ def get_syntax_rule(self, adapter_name: str) -> Optional[SyntaxRule]:
209
+ """Get syntax rules for a specific adapter."""
210
+ conn = self._get_connection()
211
+ adapter_name = adapter_name.lower()
212
+
213
+ result = conn.execute("""
214
+ SELECT adapter_name, quote_start, quote_end, case_sensitivity, reserved_keywords
215
+ FROM syntax_registry
216
+ WHERE adapter_name = ?
217
+ """, [adapter_name]).fetchone()
218
+
219
+ if result:
220
+ # Parse reserved keywords from comma-separated string
221
+ keywords = []
222
+ if result[4]:
223
+ keywords = [kw.strip() for kw in result[4].split(',') if kw.strip()]
224
+
225
+ return SyntaxRule(
226
+ adapter_name=result[0],
227
+ quote_start=result[1],
228
+ quote_end=result[2],
229
+ case_sensitivity=result[3],
230
+ reserved_keywords=keywords
231
+ )
232
+ return None
233
+
234
+ def quote_identifier(self, adapter_name: str, identifier: str) -> str:
235
+ """Quote an identifier using the adapter's quoting rules."""
236
+ rule = self.get_syntax_rule(adapter_name)
237
+ if not rule:
238
+ return f'"{identifier}"' # Default to double quotes
239
+ return f'{rule.quote_start}{identifier}{rule.quote_end}'
240
+
241
+ def needs_quoting(self, adapter_name: str, identifier: str) -> bool:
242
+ """Check if an identifier needs quoting (reserved keyword or special chars)."""
243
+ rule = self.get_syntax_rule(adapter_name)
244
+ if not rule:
245
+ return False
246
+
247
+ # Check if it's a reserved keyword
248
+ upper_id = identifier.upper()
249
+ if upper_id in [kw.upper() for kw in rule.reserved_keywords]:
250
+ return True
251
+
252
+ # Check for special characters or spaces
253
+ if not identifier.replace('_', '').isalnum() or ' ' in identifier or '-' in identifier:
254
+ return True
255
+
256
+ return False
257
+
258
+ def normalize_identifier(self, adapter_name: str, identifier: str) -> str:
259
+ """Normalize an identifier based on the adapter's case sensitivity rules."""
260
+ rule = self.get_syntax_rule(adapter_name)
261
+ if not rule:
262
+ return identifier
263
+
264
+ case_rule = rule.case_sensitivity.lower()
265
+ if case_rule == "uppercase":
266
+ return identifier.upper()
267
+ elif case_rule == "lowercase":
268
+ return identifier.lower()
269
+ return identifier # case_insensitive or preserve
270
+
271
+ # =========================================================================
272
+ # Adapter Queries
273
+ # =========================================================================
274
+
275
+ def get_metadata_query(
276
+ self,
277
+ adapter_name: str,
278
+ query_type: str
279
+ ) -> Optional[AdapterQuery]:
280
+ """
281
+ Get SQL template for metadata extraction.
282
+
283
+ :param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
284
+ :param query_type: Query type: 'columns', 'tables', 'row_count', 'primary_key'
285
+ :returns: AdapterQuery or None if not found
286
+ """
287
+ conn = self._get_connection()
288
+ adapter_name = adapter_name.lower()
289
+
290
+ result = conn.execute("""
291
+ SELECT adapter_name, query_type, query_template, notes
292
+ FROM adapter_queries
293
+ WHERE adapter_name = ? AND query_type = ?
294
+ """, [adapter_name, query_type]).fetchone()
295
+
296
+ if result:
297
+ return AdapterQuery(
298
+ adapter_name=result[0],
299
+ query_type=result[1],
300
+ query_template=result[2],
301
+ notes=result[3]
302
+ )
303
+ return None
304
+
305
+ def get_all_queries_for_adapter(self, adapter_name: str) -> List[AdapterQuery]:
306
+ """Get all query templates for a specific adapter."""
307
+ conn = self._get_connection()
308
+ adapter_name = adapter_name.lower()
309
+
310
+ results = conn.execute("""
311
+ SELECT adapter_name, query_type, query_template, notes
312
+ FROM adapter_queries
313
+ WHERE adapter_name = ?
314
+ ORDER BY query_type
315
+ """, [adapter_name]).fetchall()
316
+
317
+ return [
318
+ AdapterQuery(
319
+ adapter_name=row[0],
320
+ query_type=row[1],
321
+ query_template=row[2],
322
+ notes=row[3]
323
+ )
324
+ for row in results
325
+ ]
326
+
327
+
328
+ # =============================================================================
329
+ # Module-level convenience functions
330
+ # =============================================================================
331
+
332
+ def get_registry() -> AdaptersRegistry:
333
+ """Get the singleton AdaptersRegistry instance."""
334
+ return AdaptersRegistry()
335
+
336
+
337
+ def get_spark_type(
338
+ adapter_name: str,
339
+ adapter_type: str,
340
+ spark_version: str = "all"
341
+ ) -> Optional[TypeMapping]:
342
+ """
343
+ Convenience function to look up Spark type mapping.
344
+
345
+ :param adapter_name: Source adapter (e.g., 'postgres', 'snowflake')
346
+ :param adapter_type: Adapter's native type (e.g., 'INTEGER', 'VARCHAR')
347
+ :param spark_version: Target Spark version (default 'all')
348
+ :returns: TypeMapping or None if not found
349
+ """
350
+ return get_registry().get_spark_type(adapter_name, adapter_type, spark_version)
351
+
352
+
353
+ def get_syntax_rule(adapter_name: str) -> Optional[SyntaxRule]:
354
+ """Convenience function to get syntax rules for an adapter."""
355
+ return get_registry().get_syntax_rule(adapter_name)
356
+
357
+
358
+ def get_metadata_query(adapter_name: str, query_type: str) -> Optional[AdapterQuery]:
359
+ """Convenience function to get a metadata query template."""
360
+ return get_registry().get_metadata_query(adapter_name, query_type)
361
+
362
+
363
+ def quote_identifier(adapter_name: str, identifier: str) -> str:
364
+ """Convenience function to quote an identifier."""
365
+ return get_registry().quote_identifier(adapter_name, identifier)
366
+
367
+
368
+ def normalize_identifier(adapter_name: str, identifier: str) -> str:
369
+ """Convenience function to normalize an identifier."""
370
+ return get_registry().normalize_identifier(adapter_name, identifier)