contractforge-databricks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. contractforge_databricks/__init__.py +172 -0
  2. contractforge_databricks/adapter.py +69 -0
  3. contractforge_databricks/annotations/__init__.py +10 -0
  4. contractforge_databricks/annotations/application.py +52 -0
  5. contractforge_databricks/annotations/audit.py +49 -0
  6. contractforge_databricks/annotations/sql.py +142 -0
  7. contractforge_databricks/api.py +65 -0
  8. contractforge_databricks/bundles/__init__.py +9 -0
  9. contractforge_databricks/bundles/assets.py +47 -0
  10. contractforge_databricks/bundles/project.py +213 -0
  11. contractforge_databricks/bundles/project_config.py +133 -0
  12. contractforge_databricks/capabilities/__init__.py +17 -0
  13. contractforge_databricks/capabilities/builders.py +43 -0
  14. contractforge_databricks/capabilities/evaluate.py +162 -0
  15. contractforge_databricks/capabilities/mapping.py +36 -0
  16. contractforge_databricks/capabilities/models.py +44 -0
  17. contractforge_databricks/capabilities/runtime.py +111 -0
  18. contractforge_databricks/capabilities/uc.py +47 -0
  19. contractforge_databricks/cli.py +196 -0
  20. contractforge_databricks/cli_deploy.py +98 -0
  21. contractforge_databricks/cli_governance.py +142 -0
  22. contractforge_databricks/cli_io.py +91 -0
  23. contractforge_databricks/cli_maintenance.py +69 -0
  24. contractforge_databricks/coercion.py +31 -0
  25. contractforge_databricks/contract_extensions.py +70 -0
  26. contractforge_databricks/cost/__init__.py +11 -0
  27. contractforge_databricks/cost/model.py +22 -0
  28. contractforge_databricks/cost/report.py +65 -0
  29. contractforge_databricks/cost/sql.py +136 -0
  30. contractforge_databricks/dashboards/__init__.py +15 -0
  31. contractforge_databricks/dashboards/control_tables.py +150 -0
  32. contractforge_databricks/diagnostics/__init__.py +7 -0
  33. contractforge_databricks/diagnostics/explain.py +40 -0
  34. contractforge_databricks/environment.py +53 -0
  35. contractforge_databricks/evidence/__init__.py +98 -0
  36. contractforge_databricks/evidence/ddl.py +35 -0
  37. contractforge_databricks/evidence/governance_log.py +175 -0
  38. contractforge_databricks/evidence/helpers.py +29 -0
  39. contractforge_databricks/evidence/ops_log.py +210 -0
  40. contractforge_databricks/evidence/records.py +27 -0
  41. contractforge_databricks/evidence/run_log.py +74 -0
  42. contractforge_databricks/evidence/schemas.py +7 -0
  43. contractforge_databricks/evidence/sql.py +144 -0
  44. contractforge_databricks/evidence/tables.py +20 -0
  45. contractforge_databricks/evidence/writer.py +118 -0
  46. contractforge_databricks/execution/__init__.py +70 -0
  47. contractforge_databricks/execution/delta_basic.py +57 -0
  48. contractforge_databricks/execution/hash_diff.py +126 -0
  49. contractforge_databricks/execution/hash_diff_latest.py +142 -0
  50. contractforge_databricks/execution/replace_partitions.py +40 -0
  51. contractforge_databricks/execution/results.py +5 -0
  52. contractforge_databricks/execution/retry.py +36 -0
  53. contractforge_databricks/execution/scd2.py +213 -0
  54. contractforge_databricks/execution/scd2_deletes.py +65 -0
  55. contractforge_databricks/execution/scd2_late.py +30 -0
  56. contractforge_databricks/execution/snapshot.py +77 -0
  57. contractforge_databricks/execution/sql_merge.py +85 -0
  58. contractforge_databricks/execution/tables.py +98 -0
  59. contractforge_databricks/execution/windows.py +58 -0
  60. contractforge_databricks/governance/__init__.py +30 -0
  61. contractforge_databricks/governance/access.py +185 -0
  62. contractforge_databricks/governance/application.py +93 -0
  63. contractforge_databricks/governance/drift.py +49 -0
  64. contractforge_databricks/governance/runtime.py +60 -0
  65. contractforge_databricks/governance/sql.py +31 -0
  66. contractforge_databricks/governance/validation.py +135 -0
  67. contractforge_databricks/lakeflow/__init__.py +21 -0
  68. contractforge_databricks/lakeflow/compatibility.py +194 -0
  69. contractforge_databricks/lakeflow/rendering.py +175 -0
  70. contractforge_databricks/lineage/__init__.py +7 -0
  71. contractforge_databricks/lineage/openlineage.py +182 -0
  72. contractforge_databricks/maintenance/__init__.py +27 -0
  73. contractforge_databricks/maintenance/retention.py +90 -0
  74. contractforge_databricks/maintenance/sql.py +68 -0
  75. contractforge_databricks/metrics/__init__.py +19 -0
  76. contractforge_databricks/metrics/history.py +21 -0
  77. contractforge_databricks/metrics/write.py +63 -0
  78. contractforge_databricks/operations/__init__.py +4 -0
  79. contractforge_databricks/operations/application.py +38 -0
  80. contractforge_databricks/operations/sql.py +95 -0
  81. contractforge_databricks/parity/__init__.py +18 -0
  82. contractforge_databricks/parity/catalog.py +59 -0
  83. contractforge_databricks/parity/models.py +7 -0
  84. contractforge_databricks/parity/scenarios.py +111 -0
  85. contractforge_databricks/partitioning/__init__.py +3 -0
  86. contractforge_databricks/partitioning/predicates.py +28 -0
  87. contractforge_databricks/preparation/__init__.py +47 -0
  88. contractforge_databricks/preparation/deduplicate.py +87 -0
  89. contractforge_databricks/preparation/encoding.py +37 -0
  90. contractforge_databricks/preparation/hashing.py +18 -0
  91. contractforge_databricks/preparation/pyspark.py +178 -0
  92. contractforge_databricks/preparation/pyspark_staging.py +70 -0
  93. contractforge_databricks/preparation/shape.py +209 -0
  94. contractforge_databricks/preparation/shape_validation.py +94 -0
  95. contractforge_databricks/preparation/staging.py +17 -0
  96. contractforge_databricks/preparation/zip_arrays.py +51 -0
  97. contractforge_databricks/presets/__init__.py +3 -0
  98. contractforge_databricks/presets/base.py +24 -0
  99. contractforge_databricks/presets/bronze.py +57 -0
  100. contractforge_databricks/presets/catalog.py +22 -0
  101. contractforge_databricks/presets/core.py +134 -0
  102. contractforge_databricks/presets/gold.py +62 -0
  103. contractforge_databricks/presets/modifiers.py +51 -0
  104. contractforge_databricks/presets/runtime.py +22 -0
  105. contractforge_databricks/presets/silver.py +101 -0
  106. contractforge_databricks/presets/write_engine.py +57 -0
  107. contractforge_databricks/quality/__init__.py +41 -0
  108. contractforge_databricks/quality/evaluation.py +178 -0
  109. contractforge_databricks/quality/persistence.py +81 -0
  110. contractforge_databricks/quality/registry.py +134 -0
  111. contractforge_databricks/quality/results.py +17 -0
  112. contractforge_databricks/quality/sql.py +113 -0
  113. contractforge_databricks/rendering/__init__.py +11 -0
  114. contractforge_databricks/rendering/bundle.py +93 -0
  115. contractforge_databricks/rendering/markdown.py +50 -0
  116. contractforge_databricks/rendering/names.py +56 -0
  117. contractforge_databricks/results.py +15 -0
  118. contractforge_databricks/runtime/__init__.py +101 -0
  119. contractforge_databricks/runtime/available_now.py +147 -0
  120. contractforge_databricks/runtime/bundles.py +211 -0
  121. contractforge_databricks/runtime/cache.py +20 -0
  122. contractforge_databricks/runtime/control_tables.py +19 -0
  123. contractforge_databricks/runtime/deploy.py +197 -0
  124. contractforge_databricks/runtime/detection.py +114 -0
  125. contractforge_databricks/runtime/dry_run.py +46 -0
  126. contractforge_databricks/runtime/errors.py +54 -0
  127. contractforge_databricks/runtime/file_selection.py +109 -0
  128. contractforge_databricks/runtime/finalization.py +168 -0
  129. contractforge_databricks/runtime/governance.py +37 -0
  130. contractforge_databricks/runtime/hooks.py +45 -0
  131. contractforge_databricks/runtime/http_file.py +37 -0
  132. contractforge_databricks/runtime/http_retry.py +15 -0
  133. contractforge_databricks/runtime/http_safety.py +9 -0
  134. contractforge_databricks/runtime/json_materialization.py +97 -0
  135. contractforge_databricks/runtime/lineage.py +164 -0
  136. contractforge_databricks/runtime/maintenance.py +43 -0
  137. contractforge_databricks/runtime/merge_validation.py +98 -0
  138. contractforge_databricks/runtime/metadata.py +21 -0
  139. contractforge_databricks/runtime/metrics.py +34 -0
  140. contractforge_databricks/runtime/models.py +32 -0
  141. contractforge_databricks/runtime/options.py +33 -0
  142. contractforge_databricks/runtime/orchestration_context.py +185 -0
  143. contractforge_databricks/runtime/orchestrator.py +147 -0
  144. contractforge_databricks/runtime/partitioning.py +93 -0
  145. contractforge_databricks/runtime/quality_quarantine.py +92 -0
  146. contractforge_databricks/runtime/rest_api.py +46 -0
  147. contractforge_databricks/runtime/rest_auth.py +21 -0
  148. contractforge_databricks/runtime/rest_pagination.py +21 -0
  149. contractforge_databricks/runtime/run_payload.py +177 -0
  150. contractforge_databricks/runtime/schema.py +106 -0
  151. contractforge_databricks/runtime/source_metadata.py +30 -0
  152. contractforge_databricks/runtime/source_registry.py +43 -0
  153. contractforge_databricks/runtime/source_schema.py +24 -0
  154. contractforge_databricks/runtime/sources.py +208 -0
  155. contractforge_databricks/runtime/spark.py +183 -0
  156. contractforge_databricks/runtime/spark_defaults.py +35 -0
  157. contractforge_databricks/runtime/storage_auth.py +132 -0
  158. contractforge_databricks/runtime/streaming.py +131 -0
  159. contractforge_databricks/runtime/success.py +104 -0
  160. contractforge_databricks/runtime/utils.py +52 -0
  161. contractforge_databricks/runtime/watermark.py +71 -0
  162. contractforge_databricks/runtime/windows.py +184 -0
  163. contractforge_databricks/runtime/write.py +66 -0
  164. contractforge_databricks/runtime/write_flow.py +146 -0
  165. contractforge_databricks/runtime/write_strategy.py +40 -0
  166. contractforge_databricks/schema/__init__.py +21 -0
  167. contractforge_databricks/schema/diff.py +11 -0
  168. contractforge_databricks/schema/policy.py +33 -0
  169. contractforge_databricks/schema/sync.py +23 -0
  170. contractforge_databricks/security/__init__.py +21 -0
  171. contractforge_databricks/security/errors.py +5 -0
  172. contractforge_databricks/security/redaction.py +5 -0
  173. contractforge_databricks/security/secrets.py +114 -0
  174. contractforge_databricks/security/source_policy.py +17 -0
  175. contractforge_databricks/shapes/__init__.py +3 -0
  176. contractforge_databricks/shapes/sql.py +123 -0
  177. contractforge_databricks/sources/__init__.py +67 -0
  178. contractforge_databricks/sources/artifacts.py +100 -0
  179. contractforge_databricks/sources/autoloader.py +48 -0
  180. contractforge_databricks/sources/bounded_streams.py +44 -0
  181. contractforge_databricks/sources/classification.py +115 -0
  182. contractforge_databricks/sources/delta_share.py +21 -0
  183. contractforge_databricks/sources/files.py +48 -0
  184. contractforge_databricks/sources/http_file.py +46 -0
  185. contractforge_databricks/sources/interpret.py +76 -0
  186. contractforge_databricks/sources/jdbc.py +32 -0
  187. contractforge_databricks/sources/metadata.py +18 -0
  188. contractforge_databricks/sources/native_passthrough.py +33 -0
  189. contractforge_databricks/sources/rds_iam.py +15 -0
  190. contractforge_databricks/sources/rds_iam_runtime.py +191 -0
  191. contractforge_databricks/sources/rest_api.py +33 -0
  192. contractforge_databricks/sources/support.py +50 -0
  193. contractforge_databricks/sources/table_refs.py +65 -0
  194. contractforge_databricks/sql/__init__.py +4 -0
  195. contractforge_databricks/sql/identifiers.py +17 -0
  196. contractforge_databricks/sql/literals.py +36 -0
  197. contractforge_databricks/state/__init__.py +39 -0
  198. contractforge_databricks/state/ddl.py +24 -0
  199. contractforge_databricks/state/migrations.py +146 -0
  200. contractforge_databricks/state/queries.py +149 -0
  201. contractforge_databricks/state/sql.py +116 -0
  202. contractforge_databricks/state/tables.py +9 -0
  203. contractforge_databricks/state/writer.py +83 -0
  204. contractforge_databricks/templates/__init__.py +15 -0
  205. contractforge_databricks/templates/catalog.py +205 -0
  206. contractforge_databricks/templates/catalog_parity.py +85 -0
  207. contractforge_databricks/templates/core.py +83 -0
  208. contractforge_databricks/templates/enrichment.py +175 -0
  209. contractforge_databricks/transforms/__init__.py +3 -0
  210. contractforge_databricks/transforms/sql.py +118 -0
  211. contractforge_databricks/watermark/__init__.py +6 -0
  212. contractforge_databricks/watermark/sql.py +91 -0
  213. contractforge_databricks/write_modes/__init__.py +20 -0
  214. contractforge_databricks/write_modes/registry.py +44 -0
  215. contractforge_databricks/write_modes/sql.py +33 -0
  216. contractforge_databricks/write_modes/strategy.py +192 -0
  217. contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
  218. contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
  219. contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
  220. contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,43 @@
1
+ """Post-write Databricks maintenance hooks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.execution import ExecutionOutcome
6
+ from contractforge_core.semantic import SemanticContract
7
+ from contractforge_databricks.contract_extensions import databricks_extensions
8
+ from contractforge_databricks.execution import SqlRunner
9
+ from contractforge_databricks.maintenance import MaintenancePlan, execute_maintenance_plan
10
+
11
+
12
+ def run_post_write_maintenance(
13
+ *,
14
+ runner: SqlRunner,
15
+ contract: SemanticContract,
16
+ target_table: str,
17
+ outcome: ExecutionOutcome | None,
18
+ rows_written: int,
19
+ ) -> tuple[str, ...]:
20
+ extensions = databricks_extensions(contract)
21
+ if not extensions.get("optimize_after_write"):
22
+ return ()
23
+ rows_written = int((outcome.metrics if outcome else {}).get("rows_written", rows_written) or 0)
24
+ if rows_written <= 0:
25
+ return ()
26
+ return execute_maintenance_plan(
27
+ runner,
28
+ MaintenancePlan(
29
+ target_table=target_table,
30
+ optimize=True,
31
+ zorder_columns=_tuple(extensions.get("zorder_columns")),
32
+ ),
33
+ )
34
+
35
+
36
+ def _tuple(value: object) -> tuple[str, ...]:
37
+ if value is None:
38
+ return ()
39
+ if isinstance(value, str):
40
+ return (value,)
41
+ if isinstance(value, (list, tuple, set)):
42
+ return tuple(str(item) for item in value)
43
+ return (str(value),)
@@ -0,0 +1,98 @@
1
+ """Prepared-source safety checks for Databricks MERGE writes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.quality import QualityRuleResult, quality_status
8
+ from contractforge_core.runtime import PreparedInput, QueryOne
9
+ from contractforge_core.semantic import SemanticContract
10
+ from contractforge_databricks.sql import quote_identifier, quote_table_name
11
+
12
+ MERGE_WRITE_MODES = {"scd1_upsert", "scd2_historical", "snapshot_soft_delete"}
13
+
14
+
15
+ def validate_merge_source_safety(
16
+ *,
17
+ contract: SemanticContract,
18
+ prepared: PreparedInput,
19
+ query_one: QueryOne | None,
20
+ quality_results: tuple[QualityRuleResult, ...] = (),
21
+ ) -> dict[str, Any]:
22
+ """Validate source key safety before executing Databricks MERGE patterns."""
23
+ if contract.write.mode not in MERGE_WRITE_MODES:
24
+ return {"status": "SKIPPED", "reason": "not_merge_mode"}
25
+ if query_one is None:
26
+ return {"status": "SKIPPED", "reason": "query_one_not_configured"}
27
+ if not contract.write.merge_keys or prepared.rows_read <= 0:
28
+ return {"status": "SKIPPED", "reason": "no_merge_keys_or_rows"}
29
+
30
+ _validate_columns(contract.write.merge_keys, prepared.source_columns)
31
+ null_row = query_one(render_merge_key_nulls_sql(prepared.source_view, contract.write.merge_keys))
32
+ all_null_count = _int_row_value(null_row, "all_keys_null_rows")
33
+ if all_null_count == prepared.rows_read:
34
+ raise ValueError(
35
+ f"mode={contract.write.mode} received {prepared.rows_read} rows with fully null merge_keys. "
36
+ f"keys={list(contract.write.merge_keys)}. Fix the source or add quality_rules.not_null."
37
+ )
38
+
39
+ if _skip_duplicate_check(contract, quality_results):
40
+ return {"status": "PASSED", "all_null_key_rows": all_null_count, "duplicate_check": "SKIPPED"}
41
+
42
+ duplicate_row = query_one(render_merge_key_duplicates_sql(prepared.source_view, contract.write.merge_keys))
43
+ duplicate_groups = _int_row_value(duplicate_row, "duplicate_key_groups")
44
+ duplicate_rows = _int_row_value(duplicate_row, "duplicate_rows")
45
+ if duplicate_groups:
46
+ raise ValueError(
47
+ f"mode={contract.write.mode} received {duplicate_rows} duplicate source rows across "
48
+ f"{duplicate_groups} merge_key groups. keys={list(contract.write.merge_keys)}. "
49
+ "Fix the composite key, declare quality_rules.unique_key, or apply transform.deduplicate."
50
+ )
51
+ return {"status": "PASSED", "all_null_key_rows": all_null_count, "duplicate_key_groups": duplicate_groups}
52
+
53
+
54
+ def render_merge_key_nulls_sql(source_view: str, merge_keys: tuple[str, ...]) -> str:
55
+ all_keys_null = " AND ".join(f"{quote_identifier(key)} IS NULL" for key in merge_keys)
56
+ return (
57
+ f"SELECT count(*) AS all_keys_null_rows "
58
+ f"FROM {quote_table_name(source_view)} WHERE {all_keys_null}"
59
+ )
60
+
61
+
62
+ def render_merge_key_duplicates_sql(source_view: str, merge_keys: tuple[str, ...]) -> str:
63
+ key_list = ", ".join(quote_identifier(key) for key in merge_keys)
64
+ return (
65
+ "SELECT count(*) AS duplicate_key_groups, coalesce(sum(row_count), 0) AS duplicate_rows "
66
+ f"FROM (SELECT {key_list}, count(*) AS row_count FROM {quote_table_name(source_view)} "
67
+ f"GROUP BY {key_list} HAVING count(*) > 1)"
68
+ )
69
+
70
+
71
+ def _validate_columns(keys: tuple[str, ...], source_columns: tuple[str, ...]) -> None:
72
+ if not source_columns:
73
+ return
74
+ missing = [key for key in keys if key not in source_columns]
75
+ if missing:
76
+ raise ValueError(f"merge_keys missing from prepared source columns: {missing}")
77
+
78
+
79
+ def _skip_duplicate_check(contract: SemanticContract, quality_results: tuple[QualityRuleResult, ...]) -> bool:
80
+ if quality_status(quality_results) != "PASSED":
81
+ return False
82
+ unique_rules = tuple(rule for rule in contract.quality if rule.rule == "unique_key")
83
+ return any(set(rule.columns) == set(contract.write.merge_keys) for rule in unique_rules)
84
+
85
+
86
+ def _int_row_value(row: Any, key: str) -> int:
87
+ if row is None:
88
+ return 0
89
+ if isinstance(row, dict):
90
+ value = row.get(key)
91
+ elif hasattr(row, "asDict"):
92
+ value = row.asDict().get(key)
93
+ else:
94
+ value = getattr(row, key, None)
95
+ try:
96
+ return int(value or 0)
97
+ except (TypeError, ValueError):
98
+ return 0
@@ -0,0 +1,21 @@
1
+ """Runtime contract metadata payload helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.semantic import SemanticContract
8
+
9
+
10
+ def contract_metadata(contract: SemanticContract, operations: dict[str, Any]) -> dict[str, Any]:
11
+ return {
12
+ "description": operations.get("description"),
13
+ "owner": contract.governance.owner if contract.governance else None,
14
+ "domain": contract.target.domain,
15
+ "tags": operations.get("tags"),
16
+ "sla": operations.get("sla"),
17
+ "runtime_parameters": operations.get("runtime_parameters"),
18
+ "operations": contract.operations.metadata if contract.operations else None,
19
+ "applied_presets": operations.get("applied_presets"),
20
+ "target_schema": contract.target.namespace,
21
+ }
@@ -0,0 +1,34 @@
1
+ """Runtime write metric collection for Databricks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.metrics import normalize_rows_written
8
+ from contractforge_core.runtime import QueryOne
9
+ from contractforge_core.semantic import SemanticContract
10
+ from contractforge_databricks.metrics import (
11
+ latest_operation_metrics_from_history_row,
12
+ render_delta_history_query,
13
+ resolve_write_metrics,
14
+ )
15
+
16
+
17
+ def collect_write_metrics(
18
+ *,
19
+ contract: SemanticContract,
20
+ target_table: str,
21
+ rows_written: int,
22
+ query_one: QueryOne | None,
23
+ ) -> tuple[int, dict[str, Any]]:
24
+ delta_metrics = {}
25
+ if query_one is not None:
26
+ delta_metrics = latest_operation_metrics_from_history_row(
27
+ query_one(render_delta_history_query(target_table=target_table))
28
+ )
29
+ row_metrics, operation_metrics, metrics_source = resolve_write_metrics(contract, rows_written, delta_metrics)
30
+ operation_metrics["metrics_source"] = metrics_source
31
+ normalized = normalize_rows_written(rows_written, row_metrics)
32
+ row_metrics["rows_affected"] = normalized
33
+ operation_metrics["normalizedRowMetrics"] = row_metrics
34
+ return normalized, operation_metrics
@@ -0,0 +1,32 @@
1
+ """Runtime input models for Databricks ingestion orchestration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Callable
7
+
8
+ from contractforge_core.runtime import PreparedInput
9
+ from contractforge_databricks.runtime.hooks import DatabricksIngestionHooks
10
+
11
+
12
+ PreparedViewInput = PreparedInput
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class DatabricksIngestOptions:
17
+ catalog: str = "main"
18
+ schema: str = "ops"
19
+ dry_run: bool = False
20
+ ensure_table: bool = True
21
+ lock_enabled: bool = False
22
+ lock_owner: str | None = None
23
+ idempotency_key: str | None = None
24
+ idempotency_policy: str = "always_run"
25
+ quality_action: str = "fail"
26
+ run_id: str | None = None
27
+ run_id_factory: Callable[[], str] | None = None
28
+ runtime_metadata: dict[str, Any] | None = None
29
+ target_schema: dict[str, str] | None = None
30
+ allow_review_required: bool = False
31
+ raise_on_failure: bool = True
32
+ hooks: DatabricksIngestionHooks | None = None
@@ -0,0 +1,33 @@
1
+ """Databricks runtime option resolution from core contract semantics."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import replace
6
+ from typing import Any
7
+
8
+ from contractforge_core.semantic import SemanticContract
9
+ from contractforge_databricks.contract_extensions import databricks_extensions
10
+ from contractforge_databricks.runtime.hooks import DatabricksIngestionHooks
11
+ from contractforge_databricks.runtime.models import DatabricksIngestOptions
12
+
13
+
14
+ def effective_ingest_options(contract: SemanticContract, options: DatabricksIngestOptions) -> DatabricksIngestOptions:
15
+ metadata = contract.operations.metadata if contract.operations and contract.operations.metadata else {}
16
+ extensions = databricks_extensions(contract)
17
+ updates: dict[str, Any] = {}
18
+ if options.idempotency_key is None and metadata.get("idempotency_key"):
19
+ updates["idempotency_key"] = str(metadata["idempotency_key"])
20
+ if options.idempotency_policy == "always_run" and metadata.get("idempotency_policy"):
21
+ updates["idempotency_policy"] = str(metadata["idempotency_policy"])
22
+ if options.quality_action == "fail" and metadata.get("on_quality_fail"):
23
+ updates["quality_action"] = str(metadata["on_quality_fail"])
24
+ if options.hooks is None and extensions.get("hooks") is not None:
25
+ hooks = extensions["hooks"]
26
+ if isinstance(hooks, dict):
27
+ hooks = DatabricksIngestionHooks(**hooks)
28
+ if not isinstance(hooks, DatabricksIngestionHooks):
29
+ raise ValueError("extensions.databricks.hooks must be DatabricksIngestionHooks")
30
+ updates["hooks"] = hooks
31
+ if not options.lock_enabled and extensions.get("lock_enabled"):
32
+ updates["lock_enabled"] = True
33
+ return replace(options, **updates) if updates else options
@@ -0,0 +1,185 @@
1
+ """Private helpers for Databricks runtime orchestration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+ from typing import NamedTuple
7
+
8
+ from contractforge_core.contracts import semantic_contract_from_mapping
9
+ from contractforge_core.errors import raise_for_failure_result
10
+ from contractforge_core.quality import QualityRuleResult, quality_policy_status
11
+ from contractforge_core.runtime import PreparedInput, QueryOne
12
+ from contractforge_core.semantic import SemanticContract
13
+ from contractforge_databricks.contract_extensions import normalize_databricks_contract
14
+ from contractforge_databricks.evidence import EvidenceWriter
15
+ from contractforge_databricks.execution import SqlRunner
16
+ from contractforge_databricks.rendering.names import target_full_name
17
+ from contractforge_databricks.runtime.dry_run import finalize_dry_run
18
+ from contractforge_databricks.runtime.errors import error_log_payload
19
+ from contractforge_databricks.runtime.finalization import finalize_ingest
20
+ from contractforge_databricks.runtime.models import DatabricksIngestOptions
21
+ from contractforge_databricks.runtime.options import effective_ingest_options
22
+ from contractforge_databricks.runtime.success import finalize_success
23
+ from contractforge_databricks.runtime.utils import resolve_run_id, utc_now_str
24
+ from contractforge_databricks.runtime.write_flow import WriteFlowResult
25
+ from contractforge_databricks.security import exception_message
26
+ from contractforge_databricks.state import StateWriter
27
+
28
+
29
+ class RuntimeContext(NamedTuple):
30
+ semantic: SemanticContract
31
+ opts: DatabricksIngestOptions
32
+ target: str
33
+ run_id: str
34
+ started: str
35
+ evidence: EvidenceWriter
36
+ state: StateWriter
37
+ quality_status_value: str
38
+
39
+
40
+ class RuntimeProgress:
41
+ __slots__ = ("prepared", "schema_changes", "governance_results")
42
+
43
+ def __init__(self, prepared: PreparedInput) -> None:
44
+ self.prepared = prepared
45
+ self.schema_changes: dict[str, Any] = {}
46
+ self.governance_results: dict[str, Any] = {}
47
+
48
+
49
+ def build_runtime_context(
50
+ contract: dict[str, Any] | SemanticContract,
51
+ *,
52
+ runner: SqlRunner,
53
+ options: DatabricksIngestOptions | None,
54
+ query_one: QueryOne | None,
55
+ quality_results: tuple[QualityRuleResult, ...],
56
+ ) -> RuntimeContext:
57
+ base_opts = options or DatabricksIngestOptions()
58
+ semantic = contract if isinstance(contract, SemanticContract) else semantic_contract_from_mapping(normalize_databricks_contract(contract))
59
+ opts = effective_ingest_options(semantic, base_opts)
60
+ target = target_full_name(semantic)
61
+ return RuntimeContext(
62
+ semantic=semantic,
63
+ opts=opts,
64
+ target=target,
65
+ run_id=resolve_run_id(opts.run_id, opts.run_id_factory),
66
+ started=utc_now_str(),
67
+ evidence=EvidenceWriter(runner, catalog=opts.catalog, schema=opts.schema),
68
+ state=StateWriter(runner, catalog=opts.catalog, schema=opts.schema, query_one=query_one),
69
+ quality_status_value=quality_policy_status(quality_results, on_quality_fail=opts.quality_action),
70
+ )
71
+
72
+
73
+ def complete_result(ctx: RuntimeContext, result: dict[str, Any]) -> dict[str, Any]:
74
+ if ctx.opts.hooks and ctx.opts.hooks.after_finalize:
75
+ ctx.opts.hooks.after_finalize(ctx.semantic, result)
76
+ if ctx.opts.raise_on_failure:
77
+ raise_for_failure_result(result)
78
+ return result
79
+
80
+
81
+ def finalize_skipped_result(
82
+ ctx: RuntimeContext,
83
+ progress: RuntimeProgress,
84
+ *,
85
+ quality_results: tuple[QualityRuleResult, ...],
86
+ skipped_by_run_id: object,
87
+ ) -> dict[str, Any]:
88
+ return finalize_ingest(
89
+ ctx.evidence,
90
+ ctx.state,
91
+ ctx.semantic,
92
+ progress.prepared,
93
+ ctx.opts,
94
+ ctx.run_id,
95
+ ctx.target,
96
+ "SKIPPED",
97
+ ctx.started,
98
+ rows_written=0,
99
+ quality_status_value="SKIPPED",
100
+ quality_results=quality_results,
101
+ skip_reason="idempotency_key_already_succeeded",
102
+ skipped_by_run_id=str(skipped_by_run_id) if skipped_by_run_id else None,
103
+ )
104
+
105
+
106
+ def finalize_dry_run_result(ctx: RuntimeContext, progress: RuntimeProgress) -> dict[str, Any]:
107
+ return finalize_dry_run(
108
+ evidence=ctx.evidence,
109
+ state=ctx.state,
110
+ contract=ctx.semantic,
111
+ prepared=progress.prepared,
112
+ opts=ctx.opts,
113
+ run_id=ctx.run_id,
114
+ target=ctx.target,
115
+ started=ctx.started,
116
+ quality_status_value=ctx.quality_status_value,
117
+ )
118
+
119
+
120
+ def finalize_success_result(
121
+ ctx: RuntimeContext,
122
+ progress: RuntimeProgress,
123
+ *,
124
+ write_flow: WriteFlowResult,
125
+ quality_results: tuple[QualityRuleResult, ...],
126
+ query_one: QueryOne | None,
127
+ ) -> dict[str, Any]:
128
+ return finalize_success(
129
+ evidence=ctx.evidence,
130
+ state=ctx.state,
131
+ contract=ctx.semantic,
132
+ prepared=progress.prepared,
133
+ opts=ctx.opts,
134
+ run_id=ctx.run_id,
135
+ target=ctx.target,
136
+ started=ctx.started,
137
+ outcome=write_flow.outcome,
138
+ logical_rows_written=write_flow.logical_rows_written,
139
+ quality_status_value=ctx.quality_status_value,
140
+ quality_results=quality_results,
141
+ schema_changes=progress.schema_changes,
142
+ governance_results=progress.governance_results,
143
+ write_started_at=write_flow.write_started_at,
144
+ write_finished_at=write_flow.write_finished_at,
145
+ stage_durations=write_flow.stage_durations,
146
+ query_one=query_one,
147
+ )
148
+
149
+
150
+ def finalize_failure_result(
151
+ ctx: RuntimeContext,
152
+ progress: RuntimeProgress,
153
+ exc: Exception,
154
+ *,
155
+ quality_results: tuple[QualityRuleResult, ...],
156
+ ) -> dict[str, Any]:
157
+ error_message = exception_message(exc)
158
+ if not ctx.opts.dry_run:
159
+ ctx.evidence.write_error_log(
160
+ error_log_payload(
161
+ exc,
162
+ run_id=ctx.run_id,
163
+ target=ctx.target,
164
+ source_table=progress.prepared.source_name or progress.prepared.source_view,
165
+ mode=ctx.semantic.write.mode,
166
+ runtime_metadata=ctx.opts.runtime_metadata,
167
+ )
168
+ )
169
+ return finalize_ingest(
170
+ ctx.evidence,
171
+ ctx.state,
172
+ ctx.semantic,
173
+ progress.prepared,
174
+ ctx.opts,
175
+ ctx.run_id,
176
+ ctx.target,
177
+ "FAILED",
178
+ ctx.started,
179
+ rows_written=0,
180
+ quality_status_value=ctx.quality_status_value,
181
+ quality_results=quality_results,
182
+ error_message=error_message,
183
+ schema_changes=progress.schema_changes,
184
+ governance_results=progress.governance_results,
185
+ )
@@ -0,0 +1,147 @@
1
+ """Databricks runtime orchestration over prepared source views."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any
5
+
6
+ from contractforge_core.quality import QualityRuleResult, quality_status
7
+ from contractforge_core.runtime import PreparedInput, QueryOne
8
+ from contractforge_core.semantic import SemanticContract
9
+ from contractforge_databricks.adapter import DatabricksAdapter
10
+ from contractforge_databricks.execution import SqlRunner
11
+ from contractforge_databricks.rendering.names import target_full_name
12
+ from contractforge_databricks.runtime.hooks import apply_prepared_hook
13
+ from contractforge_databricks.runtime.models import DatabricksIngestOptions
14
+ from contractforge_databricks.runtime.orchestration_context import (
15
+ RuntimeContext,
16
+ RuntimeProgress,
17
+ build_runtime_context,
18
+ complete_result,
19
+ finalize_dry_run_result,
20
+ finalize_failure_result,
21
+ finalize_skipped_result,
22
+ finalize_success_result,
23
+ )
24
+ from contractforge_databricks.runtime.write_flow import execute_runtime_write_flow
25
+ from contractforge_databricks.state import render_find_idempotent_run_sql
26
+
27
+
28
+ def ingest_databricks_contract(
29
+ contract: dict[str, Any] | SemanticContract,
30
+ *,
31
+ runner: SqlRunner,
32
+ prepared: PreparedInput,
33
+ options: DatabricksIngestOptions | None = None,
34
+ query_one: QueryOne | None = None,
35
+ quality_results: tuple[QualityRuleResult, ...] = (),
36
+ ) -> dict[str, Any]:
37
+ """Execute one Databricks contract over an already prepared source view."""
38
+ ctx = build_runtime_context(
39
+ contract,
40
+ runner=runner,
41
+ options=options,
42
+ query_one=query_one,
43
+ quality_results=quality_results,
44
+ )
45
+ progress = RuntimeProgress(prepared=prepared)
46
+ raw_quality_status = quality_status(quality_results)
47
+
48
+ try:
49
+ _validate_planning(ctx.semantic, ctx.opts)
50
+ progress.prepared = _apply_after_prepare(ctx, progress.prepared)
51
+ skipped = _idempotency_skip(ctx.target, ctx.opts, query_one)
52
+ if skipped:
53
+ return complete_result(
54
+ ctx,
55
+ finalize_skipped_result(
56
+ ctx,
57
+ progress,
58
+ quality_results=quality_results,
59
+ skipped_by_run_id=skipped.get("run_id"),
60
+ ),
61
+ )
62
+ _raise_for_quality_failure(raw_quality_status, ctx.opts)
63
+ _acquire_lock(ctx)
64
+ progress.prepared = _apply_before_write(ctx, progress.prepared)
65
+ if ctx.opts.dry_run:
66
+ return complete_result(ctx, finalize_dry_run_result(ctx, progress))
67
+ write_flow = execute_runtime_write_flow(
68
+ runner=runner,
69
+ evidence=ctx.evidence,
70
+ contract=ctx.semantic,
71
+ prepared=progress.prepared,
72
+ opts=ctx.opts,
73
+ run_id=ctx.run_id,
74
+ target=ctx.target,
75
+ query_one=query_one,
76
+ quality_results=quality_results,
77
+ )
78
+ progress.schema_changes = write_flow.schema_changes
79
+ progress.governance_results = write_flow.governance_results
80
+ if ctx.opts.hooks and ctx.opts.hooks.after_write:
81
+ ctx.opts.hooks.after_write(ctx.semantic, progress.prepared, write_flow.outcome)
82
+ return complete_result(
83
+ ctx,
84
+ finalize_success_result(
85
+ ctx,
86
+ progress,
87
+ write_flow=write_flow,
88
+ quality_results=quality_results,
89
+ query_one=query_one,
90
+ ),
91
+ )
92
+ except Exception as exc:
93
+ return complete_result(ctx, finalize_failure_result(ctx, progress, exc, quality_results=quality_results))
94
+ finally:
95
+ _release_lock(ctx)
96
+
97
+
98
+ def _apply_after_prepare(ctx: RuntimeContext, prepared: PreparedInput) -> PreparedInput:
99
+ return apply_prepared_hook(ctx.opts.hooks.after_prepare if ctx.opts.hooks else None, ctx.semantic, prepared)
100
+
101
+
102
+ def _apply_before_write(ctx: RuntimeContext, prepared: PreparedInput) -> PreparedInput:
103
+ return apply_prepared_hook(ctx.opts.hooks.before_write if ctx.opts.hooks else None, ctx.semantic, prepared)
104
+
105
+
106
+ def _raise_for_quality_failure(raw_quality_status: str, opts: DatabricksIngestOptions) -> None:
107
+ if raw_quality_status == "FAILED" and opts.quality_action == "fail":
108
+ raise ValueError("Quality gates failed before Databricks write")
109
+
110
+
111
+ def _acquire_lock(ctx: RuntimeContext) -> None:
112
+ if ctx.opts.lock_enabled and not ctx.opts.dry_run:
113
+ ctx.state.acquire_lock(target_table=ctx.target, run_id=ctx.run_id, owner=ctx.opts.lock_owner)
114
+
115
+
116
+ def _release_lock(ctx: RuntimeContext) -> None:
117
+ if ctx.opts.lock_enabled and not ctx.opts.dry_run:
118
+ ctx.state.release_lock(target_table=ctx.target, run_id=ctx.run_id)
119
+
120
+
121
+ def _validate_planning(contract: SemanticContract, opts: DatabricksIngestOptions) -> None:
122
+ runtime = dict(opts.runtime_metadata or {})
123
+ result = DatabricksAdapter.from_evidence(
124
+ target_table=target_full_name(contract),
125
+ runtime_type=str(runtime.get("runtime_type") or "serverless"),
126
+ spark_version=str(runtime["spark_version"]) if runtime.get("spark_version") else None,
127
+ ).plan(contract)
128
+ if result.status == "UNSUPPORTED" or (result.status == "REVIEW_REQUIRED" and not opts.allow_review_required):
129
+ blockers = "; ".join(blocker.message for blocker in result.blockers)
130
+ raise ValueError(f"Databricks planning status {result.status}: {blockers}")
131
+
132
+
133
+ def _idempotency_skip(target: str, opts: DatabricksIngestOptions, query_one: QueryOne | None) -> dict[str, Any] | None:
134
+ if not opts.idempotency_key or opts.idempotency_policy not in {"skip_if_success", "rerun_if_failed", "fail_if_success"}:
135
+ return None
136
+ statement = render_find_idempotent_run_sql(
137
+ target_table=target,
138
+ idempotency_key=opts.idempotency_key,
139
+ status="SUCCESS",
140
+ runs_table=f"{opts.catalog}.{opts.schema}.ctrl_ingestion_runs",
141
+ )
142
+ previous = query_one(statement) if query_one else None
143
+ if not previous:
144
+ return None
145
+ if opts.idempotency_policy == "fail_if_success":
146
+ raise ValueError(f"idempotency_key={opts.idempotency_key!r} already succeeded")
147
+ return previous