contractforge-databricks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. contractforge_databricks/__init__.py +172 -0
  2. contractforge_databricks/adapter.py +69 -0
  3. contractforge_databricks/annotations/__init__.py +10 -0
  4. contractforge_databricks/annotations/application.py +52 -0
  5. contractforge_databricks/annotations/audit.py +49 -0
  6. contractforge_databricks/annotations/sql.py +142 -0
  7. contractforge_databricks/api.py +65 -0
  8. contractforge_databricks/bundles/__init__.py +9 -0
  9. contractforge_databricks/bundles/assets.py +47 -0
  10. contractforge_databricks/bundles/project.py +213 -0
  11. contractforge_databricks/bundles/project_config.py +133 -0
  12. contractforge_databricks/capabilities/__init__.py +17 -0
  13. contractforge_databricks/capabilities/builders.py +43 -0
  14. contractforge_databricks/capabilities/evaluate.py +162 -0
  15. contractforge_databricks/capabilities/mapping.py +36 -0
  16. contractforge_databricks/capabilities/models.py +44 -0
  17. contractforge_databricks/capabilities/runtime.py +111 -0
  18. contractforge_databricks/capabilities/uc.py +47 -0
  19. contractforge_databricks/cli.py +196 -0
  20. contractforge_databricks/cli_deploy.py +98 -0
  21. contractforge_databricks/cli_governance.py +142 -0
  22. contractforge_databricks/cli_io.py +91 -0
  23. contractforge_databricks/cli_maintenance.py +69 -0
  24. contractforge_databricks/coercion.py +31 -0
  25. contractforge_databricks/contract_extensions.py +70 -0
  26. contractforge_databricks/cost/__init__.py +11 -0
  27. contractforge_databricks/cost/model.py +22 -0
  28. contractforge_databricks/cost/report.py +65 -0
  29. contractforge_databricks/cost/sql.py +136 -0
  30. contractforge_databricks/dashboards/__init__.py +15 -0
  31. contractforge_databricks/dashboards/control_tables.py +150 -0
  32. contractforge_databricks/diagnostics/__init__.py +7 -0
  33. contractforge_databricks/diagnostics/explain.py +40 -0
  34. contractforge_databricks/environment.py +53 -0
  35. contractforge_databricks/evidence/__init__.py +98 -0
  36. contractforge_databricks/evidence/ddl.py +35 -0
  37. contractforge_databricks/evidence/governance_log.py +175 -0
  38. contractforge_databricks/evidence/helpers.py +29 -0
  39. contractforge_databricks/evidence/ops_log.py +210 -0
  40. contractforge_databricks/evidence/records.py +27 -0
  41. contractforge_databricks/evidence/run_log.py +74 -0
  42. contractforge_databricks/evidence/schemas.py +7 -0
  43. contractforge_databricks/evidence/sql.py +144 -0
  44. contractforge_databricks/evidence/tables.py +20 -0
  45. contractforge_databricks/evidence/writer.py +118 -0
  46. contractforge_databricks/execution/__init__.py +70 -0
  47. contractforge_databricks/execution/delta_basic.py +57 -0
  48. contractforge_databricks/execution/hash_diff.py +126 -0
  49. contractforge_databricks/execution/hash_diff_latest.py +142 -0
  50. contractforge_databricks/execution/replace_partitions.py +40 -0
  51. contractforge_databricks/execution/results.py +5 -0
  52. contractforge_databricks/execution/retry.py +36 -0
  53. contractforge_databricks/execution/scd2.py +213 -0
  54. contractforge_databricks/execution/scd2_deletes.py +65 -0
  55. contractforge_databricks/execution/scd2_late.py +30 -0
  56. contractforge_databricks/execution/snapshot.py +77 -0
  57. contractforge_databricks/execution/sql_merge.py +85 -0
  58. contractforge_databricks/execution/tables.py +98 -0
  59. contractforge_databricks/execution/windows.py +58 -0
  60. contractforge_databricks/governance/__init__.py +30 -0
  61. contractforge_databricks/governance/access.py +185 -0
  62. contractforge_databricks/governance/application.py +93 -0
  63. contractforge_databricks/governance/drift.py +49 -0
  64. contractforge_databricks/governance/runtime.py +60 -0
  65. contractforge_databricks/governance/sql.py +31 -0
  66. contractforge_databricks/governance/validation.py +135 -0
  67. contractforge_databricks/lakeflow/__init__.py +21 -0
  68. contractforge_databricks/lakeflow/compatibility.py +194 -0
  69. contractforge_databricks/lakeflow/rendering.py +175 -0
  70. contractforge_databricks/lineage/__init__.py +7 -0
  71. contractforge_databricks/lineage/openlineage.py +182 -0
  72. contractforge_databricks/maintenance/__init__.py +27 -0
  73. contractforge_databricks/maintenance/retention.py +90 -0
  74. contractforge_databricks/maintenance/sql.py +68 -0
  75. contractforge_databricks/metrics/__init__.py +19 -0
  76. contractforge_databricks/metrics/history.py +21 -0
  77. contractforge_databricks/metrics/write.py +63 -0
  78. contractforge_databricks/operations/__init__.py +4 -0
  79. contractforge_databricks/operations/application.py +38 -0
  80. contractforge_databricks/operations/sql.py +95 -0
  81. contractforge_databricks/parity/__init__.py +18 -0
  82. contractforge_databricks/parity/catalog.py +59 -0
  83. contractforge_databricks/parity/models.py +7 -0
  84. contractforge_databricks/parity/scenarios.py +111 -0
  85. contractforge_databricks/partitioning/__init__.py +3 -0
  86. contractforge_databricks/partitioning/predicates.py +28 -0
  87. contractforge_databricks/preparation/__init__.py +47 -0
  88. contractforge_databricks/preparation/deduplicate.py +87 -0
  89. contractforge_databricks/preparation/encoding.py +37 -0
  90. contractforge_databricks/preparation/hashing.py +18 -0
  91. contractforge_databricks/preparation/pyspark.py +178 -0
  92. contractforge_databricks/preparation/pyspark_staging.py +70 -0
  93. contractforge_databricks/preparation/shape.py +209 -0
  94. contractforge_databricks/preparation/shape_validation.py +94 -0
  95. contractforge_databricks/preparation/staging.py +17 -0
  96. contractforge_databricks/preparation/zip_arrays.py +51 -0
  97. contractforge_databricks/presets/__init__.py +3 -0
  98. contractforge_databricks/presets/base.py +24 -0
  99. contractforge_databricks/presets/bronze.py +57 -0
  100. contractforge_databricks/presets/catalog.py +22 -0
  101. contractforge_databricks/presets/core.py +134 -0
  102. contractforge_databricks/presets/gold.py +62 -0
  103. contractforge_databricks/presets/modifiers.py +51 -0
  104. contractforge_databricks/presets/runtime.py +22 -0
  105. contractforge_databricks/presets/silver.py +101 -0
  106. contractforge_databricks/presets/write_engine.py +57 -0
  107. contractforge_databricks/quality/__init__.py +41 -0
  108. contractforge_databricks/quality/evaluation.py +178 -0
  109. contractforge_databricks/quality/persistence.py +81 -0
  110. contractforge_databricks/quality/registry.py +134 -0
  111. contractforge_databricks/quality/results.py +17 -0
  112. contractforge_databricks/quality/sql.py +113 -0
  113. contractforge_databricks/rendering/__init__.py +11 -0
  114. contractforge_databricks/rendering/bundle.py +93 -0
  115. contractforge_databricks/rendering/markdown.py +50 -0
  116. contractforge_databricks/rendering/names.py +56 -0
  117. contractforge_databricks/results.py +15 -0
  118. contractforge_databricks/runtime/__init__.py +101 -0
  119. contractforge_databricks/runtime/available_now.py +147 -0
  120. contractforge_databricks/runtime/bundles.py +211 -0
  121. contractforge_databricks/runtime/cache.py +20 -0
  122. contractforge_databricks/runtime/control_tables.py +19 -0
  123. contractforge_databricks/runtime/deploy.py +197 -0
  124. contractforge_databricks/runtime/detection.py +114 -0
  125. contractforge_databricks/runtime/dry_run.py +46 -0
  126. contractforge_databricks/runtime/errors.py +54 -0
  127. contractforge_databricks/runtime/file_selection.py +109 -0
  128. contractforge_databricks/runtime/finalization.py +168 -0
  129. contractforge_databricks/runtime/governance.py +37 -0
  130. contractforge_databricks/runtime/hooks.py +45 -0
  131. contractforge_databricks/runtime/http_file.py +37 -0
  132. contractforge_databricks/runtime/http_retry.py +15 -0
  133. contractforge_databricks/runtime/http_safety.py +9 -0
  134. contractforge_databricks/runtime/json_materialization.py +97 -0
  135. contractforge_databricks/runtime/lineage.py +164 -0
  136. contractforge_databricks/runtime/maintenance.py +43 -0
  137. contractforge_databricks/runtime/merge_validation.py +98 -0
  138. contractforge_databricks/runtime/metadata.py +21 -0
  139. contractforge_databricks/runtime/metrics.py +34 -0
  140. contractforge_databricks/runtime/models.py +32 -0
  141. contractforge_databricks/runtime/options.py +33 -0
  142. contractforge_databricks/runtime/orchestration_context.py +185 -0
  143. contractforge_databricks/runtime/orchestrator.py +147 -0
  144. contractforge_databricks/runtime/partitioning.py +93 -0
  145. contractforge_databricks/runtime/quality_quarantine.py +92 -0
  146. contractforge_databricks/runtime/rest_api.py +46 -0
  147. contractforge_databricks/runtime/rest_auth.py +21 -0
  148. contractforge_databricks/runtime/rest_pagination.py +21 -0
  149. contractforge_databricks/runtime/run_payload.py +177 -0
  150. contractforge_databricks/runtime/schema.py +106 -0
  151. contractforge_databricks/runtime/source_metadata.py +30 -0
  152. contractforge_databricks/runtime/source_registry.py +43 -0
  153. contractforge_databricks/runtime/source_schema.py +24 -0
  154. contractforge_databricks/runtime/sources.py +208 -0
  155. contractforge_databricks/runtime/spark.py +183 -0
  156. contractforge_databricks/runtime/spark_defaults.py +35 -0
  157. contractforge_databricks/runtime/storage_auth.py +132 -0
  158. contractforge_databricks/runtime/streaming.py +131 -0
  159. contractforge_databricks/runtime/success.py +104 -0
  160. contractforge_databricks/runtime/utils.py +52 -0
  161. contractforge_databricks/runtime/watermark.py +71 -0
  162. contractforge_databricks/runtime/windows.py +184 -0
  163. contractforge_databricks/runtime/write.py +66 -0
  164. contractforge_databricks/runtime/write_flow.py +146 -0
  165. contractforge_databricks/runtime/write_strategy.py +40 -0
  166. contractforge_databricks/schema/__init__.py +21 -0
  167. contractforge_databricks/schema/diff.py +11 -0
  168. contractforge_databricks/schema/policy.py +33 -0
  169. contractforge_databricks/schema/sync.py +23 -0
  170. contractforge_databricks/security/__init__.py +21 -0
  171. contractforge_databricks/security/errors.py +5 -0
  172. contractforge_databricks/security/redaction.py +5 -0
  173. contractforge_databricks/security/secrets.py +114 -0
  174. contractforge_databricks/security/source_policy.py +17 -0
  175. contractforge_databricks/shapes/__init__.py +3 -0
  176. contractforge_databricks/shapes/sql.py +123 -0
  177. contractforge_databricks/sources/__init__.py +67 -0
  178. contractforge_databricks/sources/artifacts.py +100 -0
  179. contractforge_databricks/sources/autoloader.py +48 -0
  180. contractforge_databricks/sources/bounded_streams.py +44 -0
  181. contractforge_databricks/sources/classification.py +115 -0
  182. contractforge_databricks/sources/delta_share.py +21 -0
  183. contractforge_databricks/sources/files.py +48 -0
  184. contractforge_databricks/sources/http_file.py +46 -0
  185. contractforge_databricks/sources/interpret.py +76 -0
  186. contractforge_databricks/sources/jdbc.py +32 -0
  187. contractforge_databricks/sources/metadata.py +18 -0
  188. contractforge_databricks/sources/native_passthrough.py +33 -0
  189. contractforge_databricks/sources/rds_iam.py +15 -0
  190. contractforge_databricks/sources/rds_iam_runtime.py +191 -0
  191. contractforge_databricks/sources/rest_api.py +33 -0
  192. contractforge_databricks/sources/support.py +50 -0
  193. contractforge_databricks/sources/table_refs.py +65 -0
  194. contractforge_databricks/sql/__init__.py +4 -0
  195. contractforge_databricks/sql/identifiers.py +17 -0
  196. contractforge_databricks/sql/literals.py +36 -0
  197. contractforge_databricks/state/__init__.py +39 -0
  198. contractforge_databricks/state/ddl.py +24 -0
  199. contractforge_databricks/state/migrations.py +146 -0
  200. contractforge_databricks/state/queries.py +149 -0
  201. contractforge_databricks/state/sql.py +116 -0
  202. contractforge_databricks/state/tables.py +9 -0
  203. contractforge_databricks/state/writer.py +83 -0
  204. contractforge_databricks/templates/__init__.py +15 -0
  205. contractforge_databricks/templates/catalog.py +205 -0
  206. contractforge_databricks/templates/catalog_parity.py +85 -0
  207. contractforge_databricks/templates/core.py +83 -0
  208. contractforge_databricks/templates/enrichment.py +175 -0
  209. contractforge_databricks/transforms/__init__.py +3 -0
  210. contractforge_databricks/transforms/sql.py +118 -0
  211. contractforge_databricks/watermark/__init__.py +6 -0
  212. contractforge_databricks/watermark/sql.py +91 -0
  213. contractforge_databricks/write_modes/__init__.py +20 -0
  214. contractforge_databricks/write_modes/registry.py +44 -0
  215. contractforge_databricks/write_modes/sql.py +33 -0
  216. contractforge_databricks/write_modes/strategy.py +192 -0
  217. contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
  218. contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
  219. contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
  220. contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,182 @@
1
+ """OpenLineage event rendering for Databricks execution evidence."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime
7
+ from typing import Any
8
+
9
+ from contractforge_core.semantic import SemanticContract
10
+ from contractforge_databricks.evidence import LineageEvidenceRecord, render_lineage_insert_sql
11
+ from contractforge_databricks.rendering.names import target_full_name
12
+ from contractforge_databricks.security import redact_value
13
+
14
+ SchemaField = tuple[str, str]
15
+
16
+
17
+ def openlineage_namespace(contract: SemanticContract, *, namespace: str | None = None) -> str:
18
+ if namespace:
19
+ return namespace
20
+ catalog = contract.target.namespace.split(".", 1)[0] if contract.target.namespace else contract.target.layer
21
+ return f"databricks://{catalog}"
22
+
23
+
24
+ def build_openlineage_event(
25
+ contract: SemanticContract,
26
+ *,
27
+ run_id: str,
28
+ source_name: str,
29
+ status: str,
30
+ started_at_utc: datetime,
31
+ finished_at_utc: datetime,
32
+ rows_read: int = 0,
33
+ rows_written: int = 0,
34
+ input_schema: tuple[SchemaField, ...] = (),
35
+ output_schema: tuple[SchemaField, ...] = (),
36
+ delta_version_before: int | None = None,
37
+ delta_version_after: int | None = None,
38
+ operation_metrics: dict[str, Any] | None = None,
39
+ namespace: str | None = None,
40
+ producer: str = "contractforge-databricks",
41
+ parent_run_id: str | None = None,
42
+ spark_version: str | None = None,
43
+ source_code_url: str | None = None,
44
+ ) -> dict[str, Any]:
45
+ lineage_namespace = openlineage_namespace(contract, namespace=namespace)
46
+ target = target_full_name(contract)
47
+ event = {
48
+ "eventType": "COMPLETE" if status == "SUCCESS" else "FAIL",
49
+ "eventTime": finished_at_utc.isoformat(),
50
+ "producer": producer,
51
+ "schemaURL": "https://openlineage.io/spec/1-0-5/OpenLineage.json",
52
+ "run": {
53
+ "runId": run_id,
54
+ "facets": _clean_none(
55
+ {
56
+ "parent": _parent_facet(parent_run_id, lineage_namespace, contract, producer, source_code_url),
57
+ "processing_engine": {
58
+ "_producer": producer,
59
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ProcessingEngineRunFacet.json",
60
+ "name": "spark",
61
+ "version": spark_version,
62
+ },
63
+ }
64
+ ),
65
+ },
66
+ "job": {
67
+ "namespace": lineage_namespace,
68
+ "name": f"{contract.target.layer}.{contract.target.name}.{contract.write.mode}",
69
+ "facets": _clean_none({"sourceCodeLocation": _source_code_facet(source_code_url, producer)}),
70
+ },
71
+ "inputs": [_dataset(lineage_namespace, source_name, input_schema, producer)],
72
+ "outputs": [_output_dataset(lineage_namespace, target, output_schema, rows_written, producer)],
73
+ "facets": {
74
+ "contractforge": {
75
+ "_producer": producer,
76
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/RunFacet.json",
77
+ "mode": contract.write.mode,
78
+ "layer": contract.target.layer,
79
+ "rowsRead": rows_read,
80
+ "rowsWritten": rows_written,
81
+ "deltaVersionBefore": delta_version_before,
82
+ "deltaVersionAfter": delta_version_after,
83
+ "operationMetrics": redact_value(operation_metrics or {}),
84
+ "startedAt": started_at_utc.isoformat(),
85
+ "finishedAt": finished_at_utc.isoformat(),
86
+ }
87
+ },
88
+ }
89
+ return redact_value(_clean_none(event))
90
+
91
+
92
+ def render_openlineage_insert_sql(
93
+ contract: SemanticContract,
94
+ *,
95
+ run_id: str,
96
+ source_name: str,
97
+ status: str,
98
+ started_at_utc: datetime,
99
+ finished_at_utc: datetime,
100
+ catalog: str = "main",
101
+ schema: str = "ops",
102
+ **kwargs: Any,
103
+ ) -> str:
104
+ event = build_openlineage_event(
105
+ contract,
106
+ run_id=run_id,
107
+ source_name=source_name,
108
+ status=status,
109
+ started_at_utc=started_at_utc,
110
+ finished_at_utc=finished_at_utc,
111
+ **kwargs,
112
+ )
113
+ record = LineageEvidenceRecord(
114
+ run_id=run_id,
115
+ target_table=target_full_name(contract),
116
+ source_name=source_name,
117
+ event=json.loads(json.dumps(event, sort_keys=True)),
118
+ event_time_utc=finished_at_utc,
119
+ )
120
+ return render_lineage_insert_sql(record, catalog=catalog, schema=schema)
121
+
122
+
123
+ def _dataset(namespace: str, name: str, fields: tuple[SchemaField, ...], producer: str) -> dict[str, Any]:
124
+ return {
125
+ "namespace": namespace,
126
+ "name": name,
127
+ "facets": {
128
+ "schema": {
129
+ "_producer": producer,
130
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json",
131
+ "fields": [{"name": name, "type": dtype} for name, dtype in fields],
132
+ }
133
+ },
134
+ }
135
+
136
+
137
+ def _output_dataset(namespace: str, name: str, fields: tuple[SchemaField, ...], row_count: int, producer: str) -> dict[str, Any]:
138
+ dataset = _dataset(namespace, name, fields, producer)
139
+ dataset["facets"]["dataQualityMetrics"] = {
140
+ "_producer": producer,
141
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/DataQualityMetricsOutputDatasetFacet.json",
142
+ "rowCount": row_count,
143
+ }
144
+ return dataset
145
+
146
+
147
+ def _parent_facet(
148
+ parent_run_id: str | None,
149
+ namespace: str,
150
+ contract: SemanticContract,
151
+ producer: str,
152
+ source_code_url: str | None,
153
+ ) -> dict[str, Any] | None:
154
+ if not parent_run_id:
155
+ return None
156
+ operations = dict(contract.operations.metadata or {}) if contract.operations and contract.operations.metadata else {}
157
+ job_name = source_code_url or operations.get("notebook_name") or contract.target.name
158
+ return {
159
+ "_producer": producer,
160
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ParentRunFacet.json",
161
+ "job": {"namespace": namespace, "name": job_name},
162
+ "run": {"runId": parent_run_id},
163
+ }
164
+
165
+
166
+ def _source_code_facet(source_code_url: str | None, producer: str) -> dict[str, Any] | None:
167
+ if not source_code_url:
168
+ return None
169
+ return {
170
+ "_producer": producer,
171
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SourceCodeLocationJobFacet.json",
172
+ "type": "notebook",
173
+ "url": source_code_url,
174
+ }
175
+
176
+
177
+ def _clean_none(value: Any) -> Any:
178
+ if isinstance(value, dict):
179
+ return {key: _clean_none(item) for key, item in value.items() if item is not None}
180
+ if isinstance(value, list):
181
+ return [_clean_none(item) for item in value if item is not None]
182
+ return value
@@ -0,0 +1,27 @@
1
+ from contractforge_databricks.maintenance.retention import (
2
+ CONTROL_RETENTION_TARGETS,
3
+ ControlRetentionTarget,
4
+ build_control_retention_plan,
5
+ execute_control_retention_plan,
6
+ )
7
+ from contractforge_databricks.maintenance.sql import (
8
+ MaintenancePlan,
9
+ execute_maintenance_plan,
10
+ render_alter_table_properties_sql,
11
+ render_analyze_sql,
12
+ render_optimize_sql,
13
+ render_vacuum_sql,
14
+ )
15
+
16
+ __all__ = [
17
+ "MaintenancePlan",
18
+ "CONTROL_RETENTION_TARGETS",
19
+ "ControlRetentionTarget",
20
+ "execute_control_retention_plan",
21
+ "execute_maintenance_plan",
22
+ "build_control_retention_plan",
23
+ "render_alter_table_properties_sql",
24
+ "render_analyze_sql",
25
+ "render_optimize_sql",
26
+ "render_vacuum_sql",
27
+ ]
@@ -0,0 +1,90 @@
1
+ """Control-table retention planning for Databricks evidence tables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Iterable
7
+
8
+ from contractforge_databricks.execution.sql_merge import SqlRunner
9
+ from contractforge_databricks.sql import quote_identifier, quote_table_name
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class ControlRetentionTarget:
14
+ key: str
15
+ table: str
16
+ age_expression: str
17
+ description: str
18
+
19
+
20
+ CONTROL_RETENTION_TARGETS: tuple[ControlRetentionTarget, ...] = (
21
+ ControlRetentionTarget("runs", "ctrl_ingestion_runs", "run_date", "Run history"),
22
+ ControlRetentionTarget("errors", "ctrl_ingestion_errors", "error_date", "Error stack traces"),
23
+ ControlRetentionTarget("quality", "ctrl_ingestion_quality", "checked_at_utc", "Quality results"),
24
+ ControlRetentionTarget("quarantine", "ctrl_ingestion_quarantine", "quarantined_at_utc", "Quarantine references"),
25
+ ControlRetentionTarget("locks", "ctrl_ingestion_locks", "COALESCE(released_at_utc, expires_at_utc, acquired_at_utc)", "Expired or released locks"),
26
+ ControlRetentionTarget("explain", "ctrl_ingestion_explain", "captured_at_utc", "Explain plans"),
27
+ ControlRetentionTarget("lineage", "ctrl_ingestion_lineage", "event_time_utc", "Lineage events"),
28
+ ControlRetentionTarget("schema_changes", "ctrl_ingestion_schema_changes", "change_ts_utc", "Schema changes"),
29
+ ControlRetentionTarget("streams", "ctrl_ingestion_streams", "COALESCE(ended_at_utc, started_at_utc)", "Stream history"),
30
+ ControlRetentionTarget("annotations", "ctrl_ingestion_annotations", "annotation_date", "Annotation audit"),
31
+ ControlRetentionTarget("operations", "ctrl_ingestion_operations", "recorded_at_utc", "Operational audit"),
32
+ ControlRetentionTarget("access", "ctrl_ingestion_access", "access_date", "Access audit"),
33
+ ControlRetentionTarget("cost", "ctrl_ingestion_cost", "captured_at_utc", "Cost signals"),
34
+ )
35
+
36
+
37
+ def build_control_retention_plan(
38
+ *,
39
+ catalog: str = "main",
40
+ schema: str = "ops",
41
+ retention_days: int,
42
+ vacuum: bool = False,
43
+ vacuum_retention_hours: int = 168,
44
+ targets: Iterable[str] | None = None,
45
+ ) -> tuple[dict[str, Any], ...]:
46
+ if retention_days < 1:
47
+ raise ValueError("retention_days must be greater than or equal to 1")
48
+ if vacuum_retention_hours < 0:
49
+ raise ValueError("vacuum_retention_hours must be greater than or equal to 0")
50
+ requested = {str(target) for target in (targets or [])}
51
+ known = {target.key for target in CONTROL_RETENTION_TARGETS}
52
+ unknown = requested - known
53
+ if unknown:
54
+ raise ValueError(f"unknown ctrl retention targets: {sorted(unknown)}")
55
+ plan = []
56
+ for target in CONTROL_RETENTION_TARGETS:
57
+ if requested and target.key not in requested:
58
+ continue
59
+ table = f"{catalog}.{schema}.{target.table}"
60
+ predicate = _cutoff_predicate(target.age_expression, retention_days)
61
+ commands = [f"DELETE FROM {quote_table_name(table)} WHERE {predicate}"]
62
+ if vacuum:
63
+ commands.append(f"VACUUM {quote_table_name(table)} RETAIN {int(vacuum_retention_hours)} HOURS")
64
+ plan.append(
65
+ {
66
+ "target": target.key,
67
+ "table": table,
68
+ "description": target.description,
69
+ "retention_days": retention_days,
70
+ "predicate": predicate,
71
+ "commands": commands,
72
+ }
73
+ )
74
+ return tuple(plan)
75
+
76
+
77
+ def execute_control_retention_plan(runner: SqlRunner, plan: Iterable[dict[str, Any]]) -> tuple[str, ...]:
78
+ executed = []
79
+ for item in plan:
80
+ for command in item["commands"]:
81
+ runner.sql(str(command))
82
+ executed.append(str(command))
83
+ return tuple(executed)
84
+
85
+
86
+ def _cutoff_predicate(age_expression: str, retention_days: int) -> str:
87
+ expression = age_expression.strip()
88
+ if expression.endswith("_date") and "(" not in expression:
89
+ return f"{quote_identifier(expression)} < date_sub(current_date(), {int(retention_days)})"
90
+ return f"{expression} < current_timestamp() - INTERVAL {int(retention_days)} DAYS"
@@ -0,0 +1,68 @@
1
+ """Databricks table maintenance SQL helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from contractforge_databricks.execution.sql_merge import SqlRunner
8
+ from contractforge_databricks.sql import quote_identifier, quote_table_name
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class MaintenancePlan:
13
+ target_table: str
14
+ optimize: bool = False
15
+ zorder_columns: tuple[str, ...] = ()
16
+ vacuum_retention_hours: int | None = None
17
+ analyze: bool = False
18
+ delta_properties: dict[str, str] | None = None
19
+
20
+
21
+ def render_optimize_sql(target_table: str, *, zorder_columns: tuple[str, ...] = ()) -> str:
22
+ statement = f"OPTIMIZE {quote_table_name(target_table)}"
23
+ if zorder_columns:
24
+ columns = ", ".join(quote_identifier(column) for column in zorder_columns)
25
+ statement += f" ZORDER BY ({columns})"
26
+ return statement
27
+
28
+
29
+ def render_vacuum_sql(target_table: str, *, retention_hours: int) -> str:
30
+ if retention_hours < 0:
31
+ raise ValueError("vacuum retention must be non-negative")
32
+ return f"VACUUM {quote_table_name(target_table)} RETAIN {retention_hours} HOURS"
33
+
34
+
35
+ def render_analyze_sql(target_table: str) -> str:
36
+ return f"ANALYZE TABLE {quote_table_name(target_table)} COMPUTE STATISTICS"
37
+
38
+
39
+ def render_alter_table_properties_sql(target_table: str, properties: dict[str, str]) -> str:
40
+ if not properties:
41
+ raise ValueError("delta properties must not be empty")
42
+ props = ", ".join(f"{_sql_string(key)} = {_sql_string(value)}" for key, value in sorted(properties.items()))
43
+ return f"ALTER TABLE {quote_table_name(target_table)} SET TBLPROPERTIES ({props})"
44
+
45
+
46
+ def render_maintenance_plan_sql(plan: MaintenancePlan) -> tuple[str, ...]:
47
+ statements: list[str] = []
48
+ if plan.delta_properties:
49
+ statements.append(render_alter_table_properties_sql(plan.target_table, plan.delta_properties))
50
+ if plan.optimize:
51
+ statements.append(render_optimize_sql(plan.target_table, zorder_columns=plan.zorder_columns))
52
+ if plan.vacuum_retention_hours is not None:
53
+ statements.append(render_vacuum_sql(plan.target_table, retention_hours=plan.vacuum_retention_hours))
54
+ if plan.analyze:
55
+ statements.append(render_analyze_sql(plan.target_table))
56
+ return tuple(statements)
57
+
58
+
59
+ def execute_maintenance_plan(runner: SqlRunner, plan: MaintenancePlan) -> tuple[str, ...]:
60
+ statements = render_maintenance_plan_sql(plan)
61
+ for statement in statements:
62
+ runner.sql(statement)
63
+ return statements
64
+
65
+
66
+ def _sql_string(value: object) -> str:
67
+ return "'" + str(value).replace("'", "''") + "'"
68
+
@@ -0,0 +1,19 @@
1
+ from contractforge_databricks.metrics.history import (
2
+ latest_operation_metrics_from_history_row,
3
+ render_delta_history_query,
4
+ )
5
+ from contractforge_databricks.metrics.write import (
6
+ extract_delta_row_metrics,
7
+ logical_row_metrics,
8
+ normalize_rows_written,
9
+ resolve_write_metrics,
10
+ )
11
+
12
+ __all__ = [
13
+ "extract_delta_row_metrics",
14
+ "latest_operation_metrics_from_history_row",
15
+ "logical_row_metrics",
16
+ "normalize_rows_written",
17
+ "render_delta_history_query",
18
+ "resolve_write_metrics",
19
+ ]
@@ -0,0 +1,21 @@
1
+ """Delta history metric helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_databricks.sql import quote_table_name
8
+
9
+
10
+ def render_delta_history_query(*, target_table: str, limit: int = 1) -> str:
11
+ return f"DESCRIBE HISTORY {quote_table_name(target_table)} LIMIT {int(limit)}"
12
+
13
+
14
+ def latest_operation_metrics_from_history_row(row: dict[str, Any] | None) -> dict[str, Any]:
15
+ if not row:
16
+ return {}
17
+ return {
18
+ "version": row.get("version"),
19
+ "operation": row.get("operation"),
20
+ "operationMetrics": row.get("operationMetrics") or {},
21
+ }
@@ -0,0 +1,63 @@
1
+ """Databricks Delta write metric normalization."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.metrics import (
8
+ logical_row_metrics as core_logical_row_metrics,
9
+ normalize_rows_written as normalize_rows_written,
10
+ )
11
+ from contractforge_core.semantic import SemanticContract
12
+ from contractforge_databricks.contract_extensions import databricks_extensions
13
+
14
+
15
+ def extract_delta_row_metrics(metrics: dict[str, Any]) -> dict[str, int]:
16
+ operation = metrics.get("operationMetrics") or {}
17
+
18
+ def parse(*names: str) -> int:
19
+ for name in names:
20
+ if name in operation and operation[name] is not None:
21
+ try:
22
+ return int(operation[name])
23
+ except Exception:
24
+ return 0
25
+ return 0
26
+
27
+ return {
28
+ "rows_inserted": parse("numTargetRowsInserted", "numOutputRows"),
29
+ "rows_updated": parse("numTargetRowsUpdated"),
30
+ "rows_deleted": parse("numTargetRowsDeleted"),
31
+ "rows_expired": 0,
32
+ }
33
+
34
+
35
+ def resolve_write_metrics(
36
+ contract: SemanticContract,
37
+ rows_written: int,
38
+ delta_metrics: dict[str, Any],
39
+ ) -> tuple[dict[str, int], dict[str, Any], str]:
40
+ logical = logical_row_metrics(contract, rows_written)
41
+ operation_metrics = dict(delta_metrics or {})
42
+ operation_metrics["logicalMetrics"] = logical
43
+ if operation_metrics.get("operationMetrics"):
44
+ row_metrics = extract_delta_row_metrics(operation_metrics)
45
+ if contract.write.mode == "scd2_historical":
46
+ row_metrics["rows_expired"] = row_metrics["rows_updated"]
47
+ delta_rows_affected = row_metrics["rows_inserted"] + row_metrics["rows_updated"] + row_metrics["rows_deleted"]
48
+ row_metrics["rows_affected"] = max(logical["rows_affected"], delta_rows_affected)
49
+ operation_metrics["normalizedRowMetrics"] = row_metrics
50
+ return row_metrics, operation_metrics, "mixed"
51
+ operation_metrics["normalizedRowMetrics"] = logical
52
+ return logical, operation_metrics, "logical"
53
+
54
+
55
+ def logical_row_metrics(contract: SemanticContract, rows_written: int) -> dict[str, int]:
56
+ logical = core_logical_row_metrics(contract, rows_written)
57
+ if (
58
+ int(rows_written or 0) > 0
59
+ and contract.write.mode == "scd1_upsert"
60
+ and databricks_extensions(contract).get("merge_strategy") == "replace_partitions"
61
+ ):
62
+ logical["rows_inserted"] = int(rows_written)
63
+ return logical
@@ -0,0 +1,4 @@
1
+ from contractforge_databricks.operations.application import record_operations_contract
2
+ from contractforge_databricks.operations.sql import render_operations_insert_sql, render_operations_json
3
+
4
+ __all__ = ["record_operations_contract", "render_operations_insert_sql", "render_operations_json"]
@@ -0,0 +1,38 @@
1
+ """Record Databricks operations metadata with an injected SQL runner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+
7
+ from contractforge_core.results import OperationsRecordResult
8
+ from contractforge_core.semantic import SemanticContract
9
+ from contractforge_databricks.environment import DatabricksEnvironment
10
+ from contractforge_databricks.execution.sql_merge import SqlRunner
11
+ from contractforge_databricks.operations.sql import has_operations_metadata, render_operations_insert_sql
12
+ from contractforge_databricks.security import exception_message
13
+
14
+
15
+ def record_operations_contract(
16
+ *,
17
+ runner: SqlRunner,
18
+ contract: SemanticContract,
19
+ environment: DatabricksEnvironment | None = None,
20
+ run_id: str = "${run_id}",
21
+ recorded_at_utc: datetime | None = None,
22
+ ) -> OperationsRecordResult:
23
+ if not has_operations_metadata(contract):
24
+ return OperationsRecordResult(status="NOT_CONFIGURED")
25
+ env = environment or DatabricksEnvironment()
26
+ statement = render_operations_insert_sql(
27
+ contract,
28
+ run_id=run_id,
29
+ status="RECORDED",
30
+ recorded_at_utc=recorded_at_utc,
31
+ catalog=env.evidence_catalog,
32
+ schema=env.evidence_schema,
33
+ )
34
+ try:
35
+ runner.sql(statement)
36
+ except Exception as exc:
37
+ return OperationsRecordResult(status="FAILED", sql=statement, error=exception_message(exc))
38
+ return OperationsRecordResult(status="RECORDED", sql=statement)
@@ -0,0 +1,95 @@
1
+ """Render Databricks operations metadata evidence."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime
7
+ from typing import Any
8
+
9
+ from contractforge_core.semantic import SemanticContract
10
+ from contractforge_databricks.coercion import mapping, string_list
11
+ from contractforge_databricks.evidence.tables import evidence_table_names
12
+ from contractforge_databricks.rendering.names import target_full_name
13
+ from contractforge_databricks.sql import quote_table_name, sql_int, sql_string
14
+
15
+ _COLUMNS = [
16
+ "run_id",
17
+ "target_table",
18
+ "criticality",
19
+ "expected_frequency",
20
+ "freshness_sla_minutes",
21
+ "alert_on_failure",
22
+ "alert_on_quality_fail",
23
+ "runbook_url",
24
+ "ownership_json",
25
+ "owners_json",
26
+ "groups_json",
27
+ "tags_json",
28
+ "status",
29
+ "recorded_at_utc",
30
+ ]
31
+
32
+
33
+ def render_operations_json(contract: SemanticContract) -> str:
34
+ return json.dumps(_operations_payload(contract), indent=2, sort_keys=True)
35
+
36
+
37
+ def has_operations_metadata(contract: SemanticContract) -> bool:
38
+ return bool(contract.operations and contract.operations.metadata)
39
+
40
+
41
+ def render_operations_insert_sql(
42
+ contract: SemanticContract,
43
+ *,
44
+ run_id: str = "${run_id}",
45
+ status: str = "PLANNED",
46
+ recorded_at_utc: datetime | None = None,
47
+ catalog: str = "main",
48
+ schema: str = "ops",
49
+ ) -> str:
50
+ payload = _operations_payload(contract)
51
+ table = evidence_table_names(catalog, schema)["operations"]
52
+ recorded_at_utc = recorded_at_utc or datetime(1970, 1, 1, 0, 0, 0)
53
+ values = [
54
+ sql_string(run_id),
55
+ sql_string(target_full_name(contract)),
56
+ sql_string(payload.get("criticality")),
57
+ sql_string(payload.get("expected_frequency")),
58
+ sql_int(payload.get("freshness_sla_minutes")),
59
+ _sql_bool(payload.get("alert_on_failure")),
60
+ _sql_bool(payload.get("alert_on_quality_fail")),
61
+ sql_string(payload.get("runbook_url")),
62
+ _json(payload.get("ownership")),
63
+ _json(payload.get("owners")),
64
+ _json(payload.get("groups")),
65
+ _json(payload.get("tags")),
66
+ sql_string(status),
67
+ f"TIMESTAMP {sql_string(recorded_at_utc.strftime('%Y-%m-%d %H:%M:%S'))}",
68
+ ]
69
+ return f"INSERT INTO {quote_table_name(table)} ({', '.join(_COLUMNS)}) VALUES ({', '.join(values)})"
70
+
71
+
72
+ def _operations_payload(contract: SemanticContract) -> dict[str, Any]:
73
+ raw = contract.operations.metadata if contract.operations and contract.operations.metadata else {}
74
+ ownership = mapping(raw.get("ownership"))
75
+ operations = mapping(raw.get("operations")) or raw
76
+ return {
77
+ "criticality": operations.get("criticality"),
78
+ "expected_frequency": operations.get("expected_frequency"),
79
+ "freshness_sla_minutes": operations.get("freshness_sla_minutes"),
80
+ "alert_on_failure": bool(operations.get("alert_on_failure", False)),
81
+ "alert_on_quality_fail": bool(operations.get("alert_on_quality_fail", False)),
82
+ "runbook_url": operations.get("runbook_url"),
83
+ "ownership": ownership,
84
+ "owners": string_list(operations.get("owners"), sep="|"),
85
+ "groups": string_list(operations.get("groups"), sep="|"),
86
+ "tags": mapping(operations.get("tags")),
87
+ }
88
+
89
+
90
+ def _json(value: object) -> str:
91
+ return sql_string(json.dumps(value, sort_keys=True, separators=(",", ":")))
92
+
93
+
94
+ def _sql_bool(value: object) -> str:
95
+ return "true" if bool(value) else "false"
@@ -0,0 +1,18 @@
1
+ from contractforge_databricks.parity.catalog import (
2
+ build_write_engine_parity_plan,
3
+ get_write_engine_parity_scenario,
4
+ list_write_engine_parity_scenarios,
5
+ scenarios_for_engine,
6
+ scenarios_for_mode,
7
+ )
8
+ from contractforge_core.parity import ParityMetricExpectation, WriteEngineParityScenario
9
+
10
+ __all__ = [
11
+ "ParityMetricExpectation",
12
+ "WriteEngineParityScenario",
13
+ "build_write_engine_parity_plan",
14
+ "get_write_engine_parity_scenario",
15
+ "list_write_engine_parity_scenarios",
16
+ "scenarios_for_engine",
17
+ "scenarios_for_mode",
18
+ ]