contractforge-databricks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. contractforge_databricks/__init__.py +172 -0
  2. contractforge_databricks/adapter.py +69 -0
  3. contractforge_databricks/annotations/__init__.py +10 -0
  4. contractforge_databricks/annotations/application.py +52 -0
  5. contractforge_databricks/annotations/audit.py +49 -0
  6. contractforge_databricks/annotations/sql.py +142 -0
  7. contractforge_databricks/api.py +65 -0
  8. contractforge_databricks/bundles/__init__.py +9 -0
  9. contractforge_databricks/bundles/assets.py +47 -0
  10. contractforge_databricks/bundles/project.py +213 -0
  11. contractforge_databricks/bundles/project_config.py +133 -0
  12. contractforge_databricks/capabilities/__init__.py +17 -0
  13. contractforge_databricks/capabilities/builders.py +43 -0
  14. contractforge_databricks/capabilities/evaluate.py +162 -0
  15. contractforge_databricks/capabilities/mapping.py +36 -0
  16. contractforge_databricks/capabilities/models.py +44 -0
  17. contractforge_databricks/capabilities/runtime.py +111 -0
  18. contractforge_databricks/capabilities/uc.py +47 -0
  19. contractforge_databricks/cli.py +196 -0
  20. contractforge_databricks/cli_deploy.py +98 -0
  21. contractforge_databricks/cli_governance.py +142 -0
  22. contractforge_databricks/cli_io.py +91 -0
  23. contractforge_databricks/cli_maintenance.py +69 -0
  24. contractforge_databricks/coercion.py +31 -0
  25. contractforge_databricks/contract_extensions.py +70 -0
  26. contractforge_databricks/cost/__init__.py +11 -0
  27. contractforge_databricks/cost/model.py +22 -0
  28. contractforge_databricks/cost/report.py +65 -0
  29. contractforge_databricks/cost/sql.py +136 -0
  30. contractforge_databricks/dashboards/__init__.py +15 -0
  31. contractforge_databricks/dashboards/control_tables.py +150 -0
  32. contractforge_databricks/diagnostics/__init__.py +7 -0
  33. contractforge_databricks/diagnostics/explain.py +40 -0
  34. contractforge_databricks/environment.py +53 -0
  35. contractforge_databricks/evidence/__init__.py +98 -0
  36. contractforge_databricks/evidence/ddl.py +35 -0
  37. contractforge_databricks/evidence/governance_log.py +175 -0
  38. contractforge_databricks/evidence/helpers.py +29 -0
  39. contractforge_databricks/evidence/ops_log.py +210 -0
  40. contractforge_databricks/evidence/records.py +27 -0
  41. contractforge_databricks/evidence/run_log.py +74 -0
  42. contractforge_databricks/evidence/schemas.py +7 -0
  43. contractforge_databricks/evidence/sql.py +144 -0
  44. contractforge_databricks/evidence/tables.py +20 -0
  45. contractforge_databricks/evidence/writer.py +118 -0
  46. contractforge_databricks/execution/__init__.py +70 -0
  47. contractforge_databricks/execution/delta_basic.py +57 -0
  48. contractforge_databricks/execution/hash_diff.py +126 -0
  49. contractforge_databricks/execution/hash_diff_latest.py +142 -0
  50. contractforge_databricks/execution/replace_partitions.py +40 -0
  51. contractforge_databricks/execution/results.py +5 -0
  52. contractforge_databricks/execution/retry.py +36 -0
  53. contractforge_databricks/execution/scd2.py +213 -0
  54. contractforge_databricks/execution/scd2_deletes.py +65 -0
  55. contractforge_databricks/execution/scd2_late.py +30 -0
  56. contractforge_databricks/execution/snapshot.py +77 -0
  57. contractforge_databricks/execution/sql_merge.py +85 -0
  58. contractforge_databricks/execution/tables.py +98 -0
  59. contractforge_databricks/execution/windows.py +58 -0
  60. contractforge_databricks/governance/__init__.py +30 -0
  61. contractforge_databricks/governance/access.py +185 -0
  62. contractforge_databricks/governance/application.py +93 -0
  63. contractforge_databricks/governance/drift.py +49 -0
  64. contractforge_databricks/governance/runtime.py +60 -0
  65. contractforge_databricks/governance/sql.py +31 -0
  66. contractforge_databricks/governance/validation.py +135 -0
  67. contractforge_databricks/lakeflow/__init__.py +21 -0
  68. contractforge_databricks/lakeflow/compatibility.py +194 -0
  69. contractforge_databricks/lakeflow/rendering.py +175 -0
  70. contractforge_databricks/lineage/__init__.py +7 -0
  71. contractforge_databricks/lineage/openlineage.py +182 -0
  72. contractforge_databricks/maintenance/__init__.py +27 -0
  73. contractforge_databricks/maintenance/retention.py +90 -0
  74. contractforge_databricks/maintenance/sql.py +68 -0
  75. contractforge_databricks/metrics/__init__.py +19 -0
  76. contractforge_databricks/metrics/history.py +21 -0
  77. contractforge_databricks/metrics/write.py +63 -0
  78. contractforge_databricks/operations/__init__.py +4 -0
  79. contractforge_databricks/operations/application.py +38 -0
  80. contractforge_databricks/operations/sql.py +95 -0
  81. contractforge_databricks/parity/__init__.py +18 -0
  82. contractforge_databricks/parity/catalog.py +59 -0
  83. contractforge_databricks/parity/models.py +7 -0
  84. contractforge_databricks/parity/scenarios.py +111 -0
  85. contractforge_databricks/partitioning/__init__.py +3 -0
  86. contractforge_databricks/partitioning/predicates.py +28 -0
  87. contractforge_databricks/preparation/__init__.py +47 -0
  88. contractforge_databricks/preparation/deduplicate.py +87 -0
  89. contractforge_databricks/preparation/encoding.py +37 -0
  90. contractforge_databricks/preparation/hashing.py +18 -0
  91. contractforge_databricks/preparation/pyspark.py +178 -0
  92. contractforge_databricks/preparation/pyspark_staging.py +70 -0
  93. contractforge_databricks/preparation/shape.py +209 -0
  94. contractforge_databricks/preparation/shape_validation.py +94 -0
  95. contractforge_databricks/preparation/staging.py +17 -0
  96. contractforge_databricks/preparation/zip_arrays.py +51 -0
  97. contractforge_databricks/presets/__init__.py +3 -0
  98. contractforge_databricks/presets/base.py +24 -0
  99. contractforge_databricks/presets/bronze.py +57 -0
  100. contractforge_databricks/presets/catalog.py +22 -0
  101. contractforge_databricks/presets/core.py +134 -0
  102. contractforge_databricks/presets/gold.py +62 -0
  103. contractforge_databricks/presets/modifiers.py +51 -0
  104. contractforge_databricks/presets/runtime.py +22 -0
  105. contractforge_databricks/presets/silver.py +101 -0
  106. contractforge_databricks/presets/write_engine.py +57 -0
  107. contractforge_databricks/quality/__init__.py +41 -0
  108. contractforge_databricks/quality/evaluation.py +178 -0
  109. contractforge_databricks/quality/persistence.py +81 -0
  110. contractforge_databricks/quality/registry.py +134 -0
  111. contractforge_databricks/quality/results.py +17 -0
  112. contractforge_databricks/quality/sql.py +113 -0
  113. contractforge_databricks/rendering/__init__.py +11 -0
  114. contractforge_databricks/rendering/bundle.py +93 -0
  115. contractforge_databricks/rendering/markdown.py +50 -0
  116. contractforge_databricks/rendering/names.py +56 -0
  117. contractforge_databricks/results.py +15 -0
  118. contractforge_databricks/runtime/__init__.py +101 -0
  119. contractforge_databricks/runtime/available_now.py +147 -0
  120. contractforge_databricks/runtime/bundles.py +211 -0
  121. contractforge_databricks/runtime/cache.py +20 -0
  122. contractforge_databricks/runtime/control_tables.py +19 -0
  123. contractforge_databricks/runtime/deploy.py +197 -0
  124. contractforge_databricks/runtime/detection.py +114 -0
  125. contractforge_databricks/runtime/dry_run.py +46 -0
  126. contractforge_databricks/runtime/errors.py +54 -0
  127. contractforge_databricks/runtime/file_selection.py +109 -0
  128. contractforge_databricks/runtime/finalization.py +168 -0
  129. contractforge_databricks/runtime/governance.py +37 -0
  130. contractforge_databricks/runtime/hooks.py +45 -0
  131. contractforge_databricks/runtime/http_file.py +37 -0
  132. contractforge_databricks/runtime/http_retry.py +15 -0
  133. contractforge_databricks/runtime/http_safety.py +9 -0
  134. contractforge_databricks/runtime/json_materialization.py +97 -0
  135. contractforge_databricks/runtime/lineage.py +164 -0
  136. contractforge_databricks/runtime/maintenance.py +43 -0
  137. contractforge_databricks/runtime/merge_validation.py +98 -0
  138. contractforge_databricks/runtime/metadata.py +21 -0
  139. contractforge_databricks/runtime/metrics.py +34 -0
  140. contractforge_databricks/runtime/models.py +32 -0
  141. contractforge_databricks/runtime/options.py +33 -0
  142. contractforge_databricks/runtime/orchestration_context.py +185 -0
  143. contractforge_databricks/runtime/orchestrator.py +147 -0
  144. contractforge_databricks/runtime/partitioning.py +93 -0
  145. contractforge_databricks/runtime/quality_quarantine.py +92 -0
  146. contractforge_databricks/runtime/rest_api.py +46 -0
  147. contractforge_databricks/runtime/rest_auth.py +21 -0
  148. contractforge_databricks/runtime/rest_pagination.py +21 -0
  149. contractforge_databricks/runtime/run_payload.py +177 -0
  150. contractforge_databricks/runtime/schema.py +106 -0
  151. contractforge_databricks/runtime/source_metadata.py +30 -0
  152. contractforge_databricks/runtime/source_registry.py +43 -0
  153. contractforge_databricks/runtime/source_schema.py +24 -0
  154. contractforge_databricks/runtime/sources.py +208 -0
  155. contractforge_databricks/runtime/spark.py +183 -0
  156. contractforge_databricks/runtime/spark_defaults.py +35 -0
  157. contractforge_databricks/runtime/storage_auth.py +132 -0
  158. contractforge_databricks/runtime/streaming.py +131 -0
  159. contractforge_databricks/runtime/success.py +104 -0
  160. contractforge_databricks/runtime/utils.py +52 -0
  161. contractforge_databricks/runtime/watermark.py +71 -0
  162. contractforge_databricks/runtime/windows.py +184 -0
  163. contractforge_databricks/runtime/write.py +66 -0
  164. contractforge_databricks/runtime/write_flow.py +146 -0
  165. contractforge_databricks/runtime/write_strategy.py +40 -0
  166. contractforge_databricks/schema/__init__.py +21 -0
  167. contractforge_databricks/schema/diff.py +11 -0
  168. contractforge_databricks/schema/policy.py +33 -0
  169. contractforge_databricks/schema/sync.py +23 -0
  170. contractforge_databricks/security/__init__.py +21 -0
  171. contractforge_databricks/security/errors.py +5 -0
  172. contractforge_databricks/security/redaction.py +5 -0
  173. contractforge_databricks/security/secrets.py +114 -0
  174. contractforge_databricks/security/source_policy.py +17 -0
  175. contractforge_databricks/shapes/__init__.py +3 -0
  176. contractforge_databricks/shapes/sql.py +123 -0
  177. contractforge_databricks/sources/__init__.py +67 -0
  178. contractforge_databricks/sources/artifacts.py +100 -0
  179. contractforge_databricks/sources/autoloader.py +48 -0
  180. contractforge_databricks/sources/bounded_streams.py +44 -0
  181. contractforge_databricks/sources/classification.py +115 -0
  182. contractforge_databricks/sources/delta_share.py +21 -0
  183. contractforge_databricks/sources/files.py +48 -0
  184. contractforge_databricks/sources/http_file.py +46 -0
  185. contractforge_databricks/sources/interpret.py +76 -0
  186. contractforge_databricks/sources/jdbc.py +32 -0
  187. contractforge_databricks/sources/metadata.py +18 -0
  188. contractforge_databricks/sources/native_passthrough.py +33 -0
  189. contractforge_databricks/sources/rds_iam.py +15 -0
  190. contractforge_databricks/sources/rds_iam_runtime.py +191 -0
  191. contractforge_databricks/sources/rest_api.py +33 -0
  192. contractforge_databricks/sources/support.py +50 -0
  193. contractforge_databricks/sources/table_refs.py +65 -0
  194. contractforge_databricks/sql/__init__.py +4 -0
  195. contractforge_databricks/sql/identifiers.py +17 -0
  196. contractforge_databricks/sql/literals.py +36 -0
  197. contractforge_databricks/state/__init__.py +39 -0
  198. contractforge_databricks/state/ddl.py +24 -0
  199. contractforge_databricks/state/migrations.py +146 -0
  200. contractforge_databricks/state/queries.py +149 -0
  201. contractforge_databricks/state/sql.py +116 -0
  202. contractforge_databricks/state/tables.py +9 -0
  203. contractforge_databricks/state/writer.py +83 -0
  204. contractforge_databricks/templates/__init__.py +15 -0
  205. contractforge_databricks/templates/catalog.py +205 -0
  206. contractforge_databricks/templates/catalog_parity.py +85 -0
  207. contractforge_databricks/templates/core.py +83 -0
  208. contractforge_databricks/templates/enrichment.py +175 -0
  209. contractforge_databricks/transforms/__init__.py +3 -0
  210. contractforge_databricks/transforms/sql.py +118 -0
  211. contractforge_databricks/watermark/__init__.py +6 -0
  212. contractforge_databricks/watermark/sql.py +91 -0
  213. contractforge_databricks/write_modes/__init__.py +20 -0
  214. contractforge_databricks/write_modes/registry.py +44 -0
  215. contractforge_databricks/write_modes/sql.py +33 -0
  216. contractforge_databricks/write_modes/strategy.py +192 -0
  217. contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
  218. contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
  219. contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
  220. contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,213 @@
1
+ """Render Databricks Asset Bundles from ContractForge project metadata."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any, Mapping
7
+
8
+ from contractforge_databricks.bundles.project_config import (
9
+ databricks_deployment,
10
+ databricks_scheduling,
11
+ job_fields,
12
+ mapping,
13
+ required_text,
14
+ runtime_environment,
15
+ sequence,
16
+ slug,
17
+ text,
18
+ validation_job_name,
19
+ variable_parameter,
20
+ variables,
21
+ )
22
+ from contractforge_databricks.cli_io import yaml_dump
23
+
24
+ _DEFAULT_TARGET = "dev"
25
+ _DEFAULT_CONTRACT_NOTEBOOK = "./notebooks/run_contractforge.py"
26
+ _DEFAULT_ENVIRONMENT_KEY = "contractforge_runtime"
27
+ _EXTRA_TASK_META_KEYS = {
28
+ "base_parameters",
29
+ "depends_on",
30
+ "environment_key",
31
+ "name",
32
+ "notebook_path",
33
+ "task_key",
34
+ }
35
+
36
+
37
+ def render_databricks_project_bundle(
38
+ project: Mapping[str, Any],
39
+ *,
40
+ project_root: str | Path | None = None,
41
+ target: str = _DEFAULT_TARGET,
42
+ ) -> dict[str, Any]:
43
+ """Render a Databricks Asset Bundle document from project-level metadata."""
44
+
45
+ deployment = databricks_deployment(project)
46
+ scheduling = databricks_scheduling(project)
47
+ project_name = required_text(project.get("name"), "project.name")
48
+ bundle_name = text(deployment.get("bundle_name")) or slug(project_name)
49
+ job_key = text(deployment.get("job_key")) or slug(project_name)
50
+ job_name = text(deployment.get("job_name")) or validation_job_name(project) or project_name
51
+ variables_payload = variables(deployment)
52
+ job = {
53
+ "name": job_name,
54
+ **job_fields(scheduling),
55
+ **runtime_environment(deployment, variables_payload),
56
+ "tasks": _tasks(project, deployment, scheduling, variables_payload),
57
+ }
58
+ bundle: dict[str, Any] = {
59
+ "bundle": {"name": bundle_name},
60
+ "resources": {"jobs": {job_key: job}},
61
+ "targets": {target: {"default": True}},
62
+ }
63
+ workspace_root = text(deployment.get("workspace_root_path") or deployment.get("workspace_root"))
64
+ if workspace_root:
65
+ bundle["workspace"] = {"root_path": workspace_root}
66
+ if variables_payload:
67
+ bundle["variables"] = variables_payload
68
+ return bundle
69
+
70
+
71
+ def render_databricks_project_bundle_yaml(
72
+ project: Mapping[str, Any],
73
+ *,
74
+ project_root: str | Path | None = None,
75
+ target: str = _DEFAULT_TARGET,
76
+ ) -> str:
77
+ """Render a Databricks Asset Bundle YAML document from project metadata."""
78
+
79
+ return yaml_dump(render_databricks_project_bundle(project, project_root=project_root, target=target))
80
+
81
+
82
+ def _tasks(
83
+ project: Mapping[str, Any],
84
+ deployment: Mapping[str, Any],
85
+ scheduling: Mapping[str, Any],
86
+ variables: Mapping[str, Any],
87
+ ) -> list[dict[str, Any]]:
88
+ task_overrides = mapping(scheduling.get("tasks"))
89
+ task_keys = _all_task_keys(project, scheduling, task_overrides)
90
+ extra_tasks = _extra_tasks(scheduling, deployment, task_keys)
91
+ return [
92
+ *[task for task in extra_tasks if not task.get("depends_on")],
93
+ *[
94
+ _contract_task(step, deployment, task_overrides, task_keys, variables)
95
+ for step in _execution_order(project)
96
+ ],
97
+ *[task for task in extra_tasks if task.get("depends_on")],
98
+ ]
99
+
100
+
101
+ def _extra_tasks(
102
+ scheduling: Mapping[str, Any],
103
+ deployment: Mapping[str, Any],
104
+ task_keys: Mapping[str, str],
105
+ ) -> list[dict[str, Any]]:
106
+ return [_extra_task(task, deployment, task_keys) for task in sequence(scheduling.get("extra_tasks")) if isinstance(task, Mapping)]
107
+
108
+
109
+ def _extra_task(task: Mapping[str, Any], deployment: Mapping[str, Any], task_keys: Mapping[str, str]) -> dict[str, Any]:
110
+ task_key = required_text(task.get("task_key") or task.get("name"), "scheduling.databricks.extra_tasks[].task_key")
111
+ environment_key = text(task.get("environment_key") or deployment.get("environment_key")) or _DEFAULT_ENVIRONMENT_KEY
112
+ rendered = {
113
+ "task_key": task_key,
114
+ "environment_key": environment_key,
115
+ **_task_dependencies(task, task_keys),
116
+ **_extra_task_body(task),
117
+ }
118
+ return {key: value for key, value in rendered.items() if value not in ({}, None)}
119
+
120
+
121
+ def _extra_task_body(task: Mapping[str, Any]) -> dict[str, Any]:
122
+ if isinstance(task.get("notebook_task"), Mapping):
123
+ return {"notebook_task": dict(task["notebook_task"])}
124
+ notebook_path = text(task.get("notebook_path"))
125
+ if notebook_path:
126
+ return {
127
+ "notebook_task": {
128
+ "notebook_path": notebook_path,
129
+ "base_parameters": {str(key): str(value) for key, value in mapping(task.get("base_parameters")).items()},
130
+ }
131
+ }
132
+ raw = {str(key): value for key, value in task.items() if key not in _EXTRA_TASK_META_KEYS}
133
+ return raw
134
+
135
+
136
+ def _contract_task(
137
+ step: Mapping[str, Any],
138
+ deployment: Mapping[str, Any],
139
+ task_overrides: Mapping[str, Any],
140
+ task_keys: Mapping[str, str],
141
+ variables: Mapping[str, Any],
142
+ ) -> dict[str, Any]:
143
+ name = required_text(step.get("name"), "execution_order[].name")
144
+ override = mapping(task_overrides.get(name))
145
+ contract = _databricks_contract(step)
146
+ environment_key = text(override.get("environment_key") or deployment.get("environment_key")) or _DEFAULT_ENVIRONMENT_KEY
147
+ notebook_path = text(override.get("notebook_path") or deployment.get("contract_notebook_path")) or _DEFAULT_CONTRACT_NOTEBOOK
148
+ task = {
149
+ "task_key": task_keys[name],
150
+ "environment_key": environment_key,
151
+ **_task_dependencies(step, task_keys),
152
+ "notebook_task": {
153
+ "notebook_path": notebook_path,
154
+ "base_parameters": _base_parameters(contract, override, variables),
155
+ },
156
+ }
157
+ return {key: value for key, value in task.items() if value not in ({}, None)}
158
+
159
+
160
+ def _base_parameters(contract: str, override: Mapping[str, Any], variables: Mapping[str, Any]) -> dict[str, str]:
161
+ base = {
162
+ **variable_parameter("bundle_root", variables),
163
+ "contract": contract,
164
+ **variable_parameter("evidence_catalog", variables),
165
+ **variable_parameter("evidence_schema", variables),
166
+ }
167
+ extra = mapping(override.get("base_parameters"))
168
+ return {**base, **{str(key): str(value) for key, value in extra.items()}}
169
+
170
+
171
+ def _task_dependencies(step: Mapping[str, Any], task_keys: Mapping[str, str]) -> dict[str, list[dict[str, str]]]:
172
+ dependencies = [_dependency_task_key(name, task_keys) for name in sequence(step.get("depends_on"))]
173
+ return {"depends_on": [{"task_key": key} for key in dependencies]} if dependencies else {}
174
+
175
+
176
+ def _dependency_task_key(name: Any, task_keys: Mapping[str, str]) -> str:
177
+ value = required_text(name, "execution_order[].depends_on[]")
178
+ return task_keys.get(value, value)
179
+
180
+
181
+ def _databricks_contract(step: Mapping[str, Any]) -> str:
182
+ contracts = mapping(step.get("contracts"))
183
+ value = text(contracts.get("databricks"))
184
+ if not value:
185
+ raise ValueError(f"execution_order entry {step.get('name')!r} must declare contracts.databricks")
186
+ path = Path(value)
187
+ return str(path.as_posix() if path.is_absolute() else path.as_posix())
188
+
189
+
190
+ def _all_task_keys(project: Mapping[str, Any], scheduling: Mapping[str, Any], task_overrides: Mapping[str, Any]) -> dict[str, str]:
191
+ contract_keys = {
192
+ name: text(mapping(task_overrides.get(name)).get("task_key") or step.get("task_key")) or slug(name)
193
+ for step in _execution_order(project)
194
+ for name in (required_text(step.get("name"), "execution_order[].name"),)
195
+ }
196
+ extra_keys = {
197
+ required_text(task.get("name"), "scheduling.databricks.extra_tasks[].name"): required_text(
198
+ task.get("task_key"), "scheduling.databricks.extra_tasks[].task_key"
199
+ )
200
+ for task in sequence(scheduling.get("extra_tasks"))
201
+ if isinstance(task, Mapping) and task.get("name")
202
+ }
203
+ return {**contract_keys, **extra_keys}
204
+
205
+
206
+ def _execution_order(project: Mapping[str, Any]) -> tuple[Mapping[str, Any], ...]:
207
+ steps = project.get("execution_order")
208
+ if not isinstance(steps, list) or not steps:
209
+ raise ValueError("project.execution_order must be a non-empty list")
210
+ invalid = [step for step in steps if not isinstance(step, Mapping)]
211
+ if invalid:
212
+ raise ValueError("project.execution_order entries must be objects")
213
+ return tuple(steps)
@@ -0,0 +1,133 @@
1
+ """Configuration helpers for Databricks project bundle rendering."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Any, Mapping
7
+
8
+ from contractforge_core.project import adapter_scheduling, quartz_cron_expression
9
+
10
+ _JOB_FIELD_BUILDERS = {
11
+ "email_notifications": lambda value: value,
12
+ "max_concurrent_runs": lambda value: int(value),
13
+ "notification_settings": lambda value: value,
14
+ "queue": lambda value: {"enabled": bool(value)} if isinstance(value, bool) else value,
15
+ "schedule": lambda value: databricks_schedule(mapping(value)),
16
+ "tags": lambda value: value,
17
+ "timeout_seconds": lambda value: int(value),
18
+ "webhook_notifications": lambda value: value,
19
+ }
20
+
21
+
22
+ def runtime_environment(deployment: Mapping[str, Any], variables: Mapping[str, Any]) -> dict[str, Any]:
23
+ environment = mapping(deployment.get("runtime_environment"))
24
+ enabled = environment.get("enabled", True)
25
+ if not enabled:
26
+ return {}
27
+ dependency_values = dependencies(environment, variables)
28
+ if not dependency_values:
29
+ return {}
30
+ return {
31
+ "environments": [
32
+ {
33
+ "environment_key": text(deployment.get("environment_key")) or "contractforge_runtime",
34
+ "spec": {
35
+ "environment_version": str(environment.get("environment_version") or "2"),
36
+ "dependencies": dependency_values,
37
+ },
38
+ }
39
+ ]
40
+ }
41
+
42
+
43
+ def dependencies(environment: Mapping[str, Any], variables: Mapping[str, Any]) -> list[str]:
44
+ configured = [str(value) for value in sequence(environment.get("dependencies"))]
45
+ wheel_vars = [f"${{var.{name}}}" for name in ("core_wheel_path", "databricks_wheel_path") if name in variables]
46
+ return configured or wheel_vars
47
+
48
+
49
+ def job_fields(scheduling: Mapping[str, Any]) -> dict[str, Any]:
50
+ fields = mapping(scheduling.get("job"))
51
+ direct = {key: scheduling[key] for key in ("max_concurrent_runs", "queue", "schedule", "tags") if key in scheduling}
52
+ configured = {**direct, **fields}
53
+ return {
54
+ key: _JOB_FIELD_BUILDERS[key](value)
55
+ for key, value in configured.items()
56
+ if key in _JOB_FIELD_BUILDERS and value is not None
57
+ }
58
+
59
+
60
+ def databricks_schedule(schedule: Mapping[str, Any]) -> dict[str, Any]:
61
+ cron = schedule.get("quartz_cron_expression") or _quartz_from_standard(schedule.get("cron"))
62
+ if not cron:
63
+ raise ValueError("schedule.cron is required for Databricks project scheduling")
64
+ return {
65
+ "quartz_cron_expression": str(cron),
66
+ "timezone_id": str(schedule.get("timezone_id") or schedule.get("timezone") or "UTC"),
67
+ "pause_status": str(schedule.get("pause_status") or _pause_status(schedule)),
68
+ }
69
+
70
+
71
+ def variables(deployment: Mapping[str, Any]) -> dict[str, Any]:
72
+ raw = mapping(deployment.get("variables"))
73
+ defaults = {
74
+ key: deployment[key]
75
+ for key in ("bundle_root", "core_wheel_path", "databricks_wheel_path", "evidence_catalog", "evidence_schema")
76
+ if key in deployment
77
+ }
78
+ values = {**defaults, **raw}
79
+ return {str(key): value if isinstance(value, Mapping) else {"default": str(value)} for key, value in values.items()}
80
+
81
+
82
+ def variable_parameter(name: str, variables: Mapping[str, Any]) -> dict[str, str]:
83
+ return {name: f"${{var.{name}}}"} if name in variables else {}
84
+
85
+
86
+ def databricks_deployment(project: Mapping[str, Any]) -> Mapping[str, Any]:
87
+ deployment = mapping(project.get("deployment"))
88
+ validation = mapping(project.get("validation"))
89
+ return {**mapping(validation.get("databricks")), **mapping(deployment.get("databricks"))}
90
+
91
+
92
+ def databricks_scheduling(project: Mapping[str, Any]) -> Mapping[str, Any]:
93
+ return adapter_scheduling(project, "databricks")
94
+
95
+
96
+ def validation_job_name(project: Mapping[str, Any]) -> str | None:
97
+ validation = mapping(project.get("validation"))
98
+ databricks = mapping(validation.get("databricks"))
99
+ return text(databricks.get("job_name"))
100
+
101
+
102
+ def mapping(value: Any) -> Mapping[str, Any]:
103
+ return value if isinstance(value, Mapping) else {}
104
+
105
+
106
+ def sequence(value: Any) -> tuple[Any, ...]:
107
+ return tuple(value) if isinstance(value, list) else ()
108
+
109
+
110
+ def required_text(value: Any, field_name: str) -> str:
111
+ value_text = text(value)
112
+ if not value_text:
113
+ raise ValueError(f"{field_name} must not be empty")
114
+ return value_text
115
+
116
+
117
+ def text(value: Any) -> str | None:
118
+ return str(value).strip() if value is not None and str(value).strip() else None
119
+
120
+
121
+ def slug(value: str) -> str:
122
+ rendered = re.sub(r"[^A-Za-z0-9_]+", "_", value.strip()).strip("_").lower()
123
+ return rendered or "contractforge_project"
124
+
125
+
126
+ def _quartz_from_standard(value: Any) -> str:
127
+ return quartz_cron_expression(str(value)) if value is not None and str(value).strip() else ""
128
+
129
+
130
+ def _pause_status(schedule: Mapping[str, Any]) -> str:
131
+ if schedule.get("paused", False) or schedule.get("enabled") is False:
132
+ return "PAUSED"
133
+ return "UNPAUSED"
@@ -0,0 +1,17 @@
1
+ from contractforge_databricks.capabilities.evaluate import evaluate_databricks_capabilities
2
+ from contractforge_databricks.capabilities.mapping import to_core_capabilities
3
+ from contractforge_databricks.capabilities.models import (
4
+ CapabilityEvidence,
5
+ DatabricksCapabilities,
6
+ NativeCapability,
7
+ )
8
+ from contractforge_databricks.capabilities.uc import uc_capability_issues
9
+
10
+ __all__ = [
11
+ "CapabilityEvidence",
12
+ "DatabricksCapabilities",
13
+ "NativeCapability",
14
+ "evaluate_databricks_capabilities",
15
+ "to_core_capabilities",
16
+ "uc_capability_issues",
17
+ ]
@@ -0,0 +1,43 @@
1
+ """Capability factory helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.capabilities import NativeCapability, capability
6
+
7
+
8
+ def uc_sql_capability(name: str, *, is_uc_target: bool, is_databricks: bool) -> NativeCapability:
9
+ if not is_uc_target:
10
+ return capability(
11
+ name,
12
+ "unsupported",
13
+ "Unity Catalog governance capability requires a three-part target table name.",
14
+ requires=("Unity Catalog", "catalog.schema.table"),
15
+ )
16
+ if is_databricks:
17
+ return capability(
18
+ name,
19
+ "supported",
20
+ "Unity Catalog SQL governance capability is eligible for this Databricks target.",
21
+ requires=("Unity Catalog privileges",),
22
+ )
23
+ return capability(
24
+ name,
25
+ "unknown",
26
+ "Target is Unity Catalog-shaped, but Databricks runtime evidence was not detected.",
27
+ requires=("Unity Catalog privileges",),
28
+ )
29
+
30
+
31
+ def workspace_capability(
32
+ name: str,
33
+ *,
34
+ is_databricks: bool,
35
+ is_uc_target: bool,
36
+ reason: str,
37
+ requires: tuple[str, ...],
38
+ ) -> NativeCapability:
39
+ if not is_databricks:
40
+ return capability(name, "unsupported", f"{reason} Databricks runtime evidence was not detected.", requires=requires)
41
+ if not is_uc_target:
42
+ return capability(name, "unknown", f"{reason} Target/catalog context is incomplete.", requires=requires)
43
+ return capability(name, "unknown", f"{reason} Workspace configuration and permissions were not probed.", requires=requires)
@@ -0,0 +1,162 @@
1
+ """Non-destructive Databricks capability evaluation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_databricks.capabilities.builders import capability, uc_sql_capability, workspace_capability
6
+ from contractforge_databricks.capabilities.models import DatabricksCapabilities
7
+ from contractforge_databricks.capabilities.runtime import is_three_part_name, runtime_evidence, runtime_kind
8
+
9
+
10
+ def evaluate_databricks_capabilities(
11
+ *,
12
+ target_table: str | None = None,
13
+ runtime_type: str | None = None,
14
+ spark_version: str | None = None,
15
+ spark_conf: dict[str, str] | None = None,
16
+ environment: dict[str, str] | None = None,
17
+ ) -> DatabricksCapabilities:
18
+ """Evaluate Databricks-native capability eligibility from passive evidence."""
19
+ conf = {str(key): str(value) for key, value in (spark_conf or {}).items()}
20
+ env = {str(key): str(value) for key, value in (environment or {}).items()}
21
+ kind = runtime_kind(runtime_type=runtime_type, spark_conf=conf, environment=env)
22
+ is_databricks = kind in {"databricks_classic", "databricks_serverless"}
23
+ is_uc_target = is_three_part_name(target_table)
24
+ evidence = runtime_evidence(runtime_kind=kind, spark_version=spark_version, spark_conf=conf, environment=env)
25
+
26
+ capabilities = {
27
+ "databricks_runtime": capability(
28
+ "databricks_runtime",
29
+ "supported" if is_databricks else "unsupported",
30
+ "Databricks runtime evidence was detected."
31
+ if is_databricks
32
+ else "No Databricks runtime evidence was detected.",
33
+ evidence=evidence,
34
+ ),
35
+ "serverless_runtime": capability(
36
+ "serverless_runtime",
37
+ "supported" if kind == "databricks_serverless" else "unsupported",
38
+ "The current runtime is Databricks serverless."
39
+ if kind == "databricks_serverless"
40
+ else "The current runtime is not classified as Databricks serverless.",
41
+ evidence=evidence,
42
+ ),
43
+ "delta_tables": capability(
44
+ "delta_tables",
45
+ "supported" if is_databricks else "unknown",
46
+ "Delta tables are native to Databricks runtimes."
47
+ if is_databricks
48
+ else "Delta support was not probed outside Databricks.",
49
+ evidence=evidence,
50
+ requires=("Delta Lake runtime",),
51
+ ),
52
+ "sql_merge": capability(
53
+ "sql_merge",
54
+ "supported" if is_databricks else "unknown",
55
+ "Databricks SQL MERGE is eligible in Databricks runtimes."
56
+ if is_databricks
57
+ else "SQL MERGE support was not probed outside Databricks.",
58
+ evidence=evidence,
59
+ requires=("Delta table", "MERGE privilege"),
60
+ ),
61
+ "unity_catalog_table": capability(
62
+ "unity_catalog_table",
63
+ "supported" if is_uc_target else "unsupported",
64
+ "Target table is a three-part Unity Catalog name."
65
+ if is_uc_target
66
+ else "Target table is not a three-part Unity Catalog name.",
67
+ requires=("catalog.schema.table",),
68
+ ),
69
+ "uc_table_comments": uc_sql_capability(
70
+ "uc_table_comments", is_uc_target=is_uc_target, is_databricks=is_databricks
71
+ ),
72
+ "uc_column_comments": uc_sql_capability(
73
+ "uc_column_comments", is_uc_target=is_uc_target, is_databricks=is_databricks
74
+ ),
75
+ "uc_table_tags": uc_sql_capability("uc_table_tags", is_uc_target=is_uc_target, is_databricks=is_databricks),
76
+ "uc_column_tags": uc_sql_capability("uc_column_tags", is_uc_target=is_uc_target, is_databricks=is_databricks),
77
+ "uc_grants": uc_sql_capability("uc_grants", is_uc_target=is_uc_target, is_databricks=is_databricks),
78
+ "uc_row_filters": uc_sql_capability("uc_row_filters", is_uc_target=is_uc_target, is_databricks=is_databricks),
79
+ "uc_column_masks": uc_sql_capability("uc_column_masks", is_uc_target=is_uc_target, is_databricks=is_databricks),
80
+ "uc_abac_policies": workspace_capability(
81
+ "uc_abac_policies",
82
+ is_databricks=is_databricks,
83
+ is_uc_target=is_uc_target,
84
+ reason="Unity Catalog ABAC policies require workspace/account feature support and permissions.",
85
+ requires=("Unity Catalog", "policy privileges", "supported workspace feature"),
86
+ ),
87
+ "uc_external_locations": workspace_capability(
88
+ "uc_external_locations",
89
+ is_databricks=is_databricks,
90
+ is_uc_target=True,
91
+ reason="External Locations are Unity Catalog storage-governance objects.",
92
+ requires=("Unity Catalog", "storage credential", "external location privileges"),
93
+ ),
94
+ "uc_volumes": workspace_capability(
95
+ "uc_volumes",
96
+ is_databricks=is_databricks,
97
+ is_uc_target=True,
98
+ reason="Volumes are Unity Catalog storage objects exposed through /Volumes paths.",
99
+ requires=("Unity Catalog", "volume privileges"),
100
+ ),
101
+ "databricks_connections": workspace_capability(
102
+ "databricks_connections",
103
+ is_databricks=is_databricks,
104
+ is_uc_target=True,
105
+ reason="Databricks Connections are governed workspace objects and must be configured externally.",
106
+ requires=("Databricks connection", "connection privileges"),
107
+ ),
108
+ "autoloader_cloudfiles": capability(
109
+ "autoloader_cloudfiles",
110
+ "supported" if is_databricks else "unsupported",
111
+ "Auto Loader cloudFiles is a Databricks runtime capability."
112
+ if is_databricks
113
+ else "Auto Loader cloudFiles requires Databricks runtime support.",
114
+ evidence=evidence,
115
+ requires=("spark.readStream.format('cloudFiles')",),
116
+ ),
117
+ "lakeflow_declarative_pipelines": workspace_capability(
118
+ "lakeflow_declarative_pipelines",
119
+ is_databricks=is_databricks,
120
+ is_uc_target=True,
121
+ reason="Lakeflow Declarative Pipelines are Databricks-native pipeline artifacts.",
122
+ requires=("Databricks workspace pipeline support", "Unity Catalog for governed pipelines"),
123
+ ),
124
+ "lakeflow_auto_cdc": workspace_capability(
125
+ "lakeflow_auto_cdc",
126
+ is_databricks=is_databricks,
127
+ is_uc_target=is_uc_target,
128
+ reason="Lakeflow AUTO CDC requires workspace support and CDC-compatible source semantics.",
129
+ requires=("Lakeflow Declarative Pipelines", "keys", "sequence_by", "CDC source semantics"),
130
+ ),
131
+ "liquid_clustering": workspace_capability(
132
+ "liquid_clustering",
133
+ is_databricks=is_databricks,
134
+ is_uc_target=True,
135
+ reason="Liquid Clustering is a Databricks Delta table optimization feature.",
136
+ requires=("Delta table", "supported Databricks runtime", "table alter privileges"),
137
+ ),
138
+ "delta_control_tables": capability(
139
+ "delta_control_tables",
140
+ "supported" if is_databricks else "unknown",
141
+ "Delta tables can implement ContractForge evidence stores on Databricks."
142
+ if is_databricks
143
+ else "Evidence storage was not probed outside Databricks.",
144
+ evidence=evidence,
145
+ requires=("Delta table create/write privileges",),
146
+ ),
147
+ "snapshot_soft_delete_merge": capability(
148
+ "snapshot_soft_delete_merge",
149
+ "supported" if is_databricks else "unknown",
150
+ "Databricks Delta MERGE supports NOT MATCHED BY SOURCE update semantics."
151
+ if is_databricks
152
+ else "Snapshot reconciliation was not probed outside Databricks.",
153
+ evidence=evidence,
154
+ requires=("Delta MERGE", "NOT MATCHED BY SOURCE"),
155
+ ),
156
+ }
157
+ return DatabricksCapabilities(
158
+ runtime_kind=kind,
159
+ target_table=target_table,
160
+ spark_version=spark_version,
161
+ capabilities=capabilities,
162
+ )
@@ -0,0 +1,36 @@
1
+ """Map Databricks-native capabilities to ContractForge Core capabilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.capabilities import PlatformCapabilities
6
+ from contractforge_databricks.capabilities.models import DatabricksCapabilities
7
+ from contractforge_databricks.write_modes.registry import list_write_modes
8
+
9
+
10
+ def to_core_capabilities(capabilities: DatabricksCapabilities) -> PlatformCapabilities:
11
+ review_required = []
12
+ if capabilities.status("lakeflow_auto_cdc") == "unknown":
13
+ review_required.append("lakeflow_auto_cdc")
14
+
15
+ return PlatformCapabilities(
16
+ platform="databricks",
17
+ supports_append=capabilities.supports("delta_tables"),
18
+ supports_overwrite=capabilities.supports("delta_tables"),
19
+ supports_merge=capabilities.supports("sql_merge"),
20
+ supports_hash_diff=capabilities.supports("sql_merge"),
21
+ supports_scd2=capabilities.supports("sql_merge"),
22
+ supports_snapshot_soft_delete=capabilities.supports("snapshot_soft_delete_merge"),
23
+ supports_schema_evolution=capabilities.supports("delta_tables"),
24
+ supports_row_filters=capabilities.supports("uc_row_filters"),
25
+ supports_column_masks=capabilities.supports("uc_column_masks"),
26
+ supports_available_now_streaming=capabilities.supports("autoloader_cloudfiles"),
27
+ supports_required_columns_quality=True,
28
+ supports_unique_key_quality=True,
29
+ supports_max_null_ratio_quality=True,
30
+ supports_expression_quality=True,
31
+ supports_shape=capabilities.supports("databricks_runtime"),
32
+ supports_transform=capabilities.supports("databricks_runtime"),
33
+ evidence_stores=("delta_control_tables",) if capabilities.supports("delta_control_tables") else (),
34
+ review_required_semantics=tuple(review_required),
35
+ supported_custom_write_modes=list_write_modes(),
36
+ )
@@ -0,0 +1,44 @@
1
+ """Databricks-native capability evidence models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any, Literal
7
+
8
+ from contractforge_core.capabilities import (
9
+ CapabilityEvidence as CapabilityEvidence,
10
+ CapabilityStatus,
11
+ NativeCapability,
12
+ )
13
+
14
+ RuntimeKind = Literal["databricks_serverless", "databricks_classic", "spark", "unknown"]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class DatabricksCapabilities:
19
+ runtime_kind: RuntimeKind
20
+ target_table: str | None
21
+ spark_version: str | None
22
+ capabilities: dict[str, NativeCapability] = field(default_factory=dict)
23
+
24
+ def supports(self, name: str) -> bool:
25
+ capability = self.capabilities.get(name)
26
+ return bool(capability and capability.supported)
27
+
28
+ def status(self, name: str) -> CapabilityStatus:
29
+ capability = self.capabilities.get(name)
30
+ return "unknown" if capability is None else capability.status
31
+
32
+ def unsupported(self) -> list[NativeCapability]:
33
+ return [item for item in self.capabilities.values() if item.status == "unsupported"]
34
+
35
+ def unknown(self) -> list[NativeCapability]:
36
+ return [item for item in self.capabilities.values() if item.status == "unknown"]
37
+
38
+ def as_dict(self) -> dict[str, Any]:
39
+ return {
40
+ "runtime_kind": self.runtime_kind,
41
+ "target_table": self.target_table,
42
+ "spark_version": self.spark_version,
43
+ "capabilities": {name: item.as_dict() for name, item in self.capabilities.items()},
44
+ }