contractforge-databricks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. contractforge_databricks/__init__.py +172 -0
  2. contractforge_databricks/adapter.py +69 -0
  3. contractforge_databricks/annotations/__init__.py +10 -0
  4. contractforge_databricks/annotations/application.py +52 -0
  5. contractforge_databricks/annotations/audit.py +49 -0
  6. contractforge_databricks/annotations/sql.py +142 -0
  7. contractforge_databricks/api.py +65 -0
  8. contractforge_databricks/bundles/__init__.py +9 -0
  9. contractforge_databricks/bundles/assets.py +47 -0
  10. contractforge_databricks/bundles/project.py +213 -0
  11. contractforge_databricks/bundles/project_config.py +133 -0
  12. contractforge_databricks/capabilities/__init__.py +17 -0
  13. contractforge_databricks/capabilities/builders.py +43 -0
  14. contractforge_databricks/capabilities/evaluate.py +162 -0
  15. contractforge_databricks/capabilities/mapping.py +36 -0
  16. contractforge_databricks/capabilities/models.py +44 -0
  17. contractforge_databricks/capabilities/runtime.py +111 -0
  18. contractforge_databricks/capabilities/uc.py +47 -0
  19. contractforge_databricks/cli.py +196 -0
  20. contractforge_databricks/cli_deploy.py +98 -0
  21. contractforge_databricks/cli_governance.py +142 -0
  22. contractforge_databricks/cli_io.py +91 -0
  23. contractforge_databricks/cli_maintenance.py +69 -0
  24. contractforge_databricks/coercion.py +31 -0
  25. contractforge_databricks/contract_extensions.py +70 -0
  26. contractforge_databricks/cost/__init__.py +11 -0
  27. contractforge_databricks/cost/model.py +22 -0
  28. contractforge_databricks/cost/report.py +65 -0
  29. contractforge_databricks/cost/sql.py +136 -0
  30. contractforge_databricks/dashboards/__init__.py +15 -0
  31. contractforge_databricks/dashboards/control_tables.py +150 -0
  32. contractforge_databricks/diagnostics/__init__.py +7 -0
  33. contractforge_databricks/diagnostics/explain.py +40 -0
  34. contractforge_databricks/environment.py +53 -0
  35. contractforge_databricks/evidence/__init__.py +98 -0
  36. contractforge_databricks/evidence/ddl.py +35 -0
  37. contractforge_databricks/evidence/governance_log.py +175 -0
  38. contractforge_databricks/evidence/helpers.py +29 -0
  39. contractforge_databricks/evidence/ops_log.py +210 -0
  40. contractforge_databricks/evidence/records.py +27 -0
  41. contractforge_databricks/evidence/run_log.py +74 -0
  42. contractforge_databricks/evidence/schemas.py +7 -0
  43. contractforge_databricks/evidence/sql.py +144 -0
  44. contractforge_databricks/evidence/tables.py +20 -0
  45. contractforge_databricks/evidence/writer.py +118 -0
  46. contractforge_databricks/execution/__init__.py +70 -0
  47. contractforge_databricks/execution/delta_basic.py +57 -0
  48. contractforge_databricks/execution/hash_diff.py +126 -0
  49. contractforge_databricks/execution/hash_diff_latest.py +142 -0
  50. contractforge_databricks/execution/replace_partitions.py +40 -0
  51. contractforge_databricks/execution/results.py +5 -0
  52. contractforge_databricks/execution/retry.py +36 -0
  53. contractforge_databricks/execution/scd2.py +213 -0
  54. contractforge_databricks/execution/scd2_deletes.py +65 -0
  55. contractforge_databricks/execution/scd2_late.py +30 -0
  56. contractforge_databricks/execution/snapshot.py +77 -0
  57. contractforge_databricks/execution/sql_merge.py +85 -0
  58. contractforge_databricks/execution/tables.py +98 -0
  59. contractforge_databricks/execution/windows.py +58 -0
  60. contractforge_databricks/governance/__init__.py +30 -0
  61. contractforge_databricks/governance/access.py +185 -0
  62. contractforge_databricks/governance/application.py +93 -0
  63. contractforge_databricks/governance/drift.py +49 -0
  64. contractforge_databricks/governance/runtime.py +60 -0
  65. contractforge_databricks/governance/sql.py +31 -0
  66. contractforge_databricks/governance/validation.py +135 -0
  67. contractforge_databricks/lakeflow/__init__.py +21 -0
  68. contractforge_databricks/lakeflow/compatibility.py +194 -0
  69. contractforge_databricks/lakeflow/rendering.py +175 -0
  70. contractforge_databricks/lineage/__init__.py +7 -0
  71. contractforge_databricks/lineage/openlineage.py +182 -0
  72. contractforge_databricks/maintenance/__init__.py +27 -0
  73. contractforge_databricks/maintenance/retention.py +90 -0
  74. contractforge_databricks/maintenance/sql.py +68 -0
  75. contractforge_databricks/metrics/__init__.py +19 -0
  76. contractforge_databricks/metrics/history.py +21 -0
  77. contractforge_databricks/metrics/write.py +63 -0
  78. contractforge_databricks/operations/__init__.py +4 -0
  79. contractforge_databricks/operations/application.py +38 -0
  80. contractforge_databricks/operations/sql.py +95 -0
  81. contractforge_databricks/parity/__init__.py +18 -0
  82. contractforge_databricks/parity/catalog.py +59 -0
  83. contractforge_databricks/parity/models.py +7 -0
  84. contractforge_databricks/parity/scenarios.py +111 -0
  85. contractforge_databricks/partitioning/__init__.py +3 -0
  86. contractforge_databricks/partitioning/predicates.py +28 -0
  87. contractforge_databricks/preparation/__init__.py +47 -0
  88. contractforge_databricks/preparation/deduplicate.py +87 -0
  89. contractforge_databricks/preparation/encoding.py +37 -0
  90. contractforge_databricks/preparation/hashing.py +18 -0
  91. contractforge_databricks/preparation/pyspark.py +178 -0
  92. contractforge_databricks/preparation/pyspark_staging.py +70 -0
  93. contractforge_databricks/preparation/shape.py +209 -0
  94. contractforge_databricks/preparation/shape_validation.py +94 -0
  95. contractforge_databricks/preparation/staging.py +17 -0
  96. contractforge_databricks/preparation/zip_arrays.py +51 -0
  97. contractforge_databricks/presets/__init__.py +3 -0
  98. contractforge_databricks/presets/base.py +24 -0
  99. contractforge_databricks/presets/bronze.py +57 -0
  100. contractforge_databricks/presets/catalog.py +22 -0
  101. contractforge_databricks/presets/core.py +134 -0
  102. contractforge_databricks/presets/gold.py +62 -0
  103. contractforge_databricks/presets/modifiers.py +51 -0
  104. contractforge_databricks/presets/runtime.py +22 -0
  105. contractforge_databricks/presets/silver.py +101 -0
  106. contractforge_databricks/presets/write_engine.py +57 -0
  107. contractforge_databricks/quality/__init__.py +41 -0
  108. contractforge_databricks/quality/evaluation.py +178 -0
  109. contractforge_databricks/quality/persistence.py +81 -0
  110. contractforge_databricks/quality/registry.py +134 -0
  111. contractforge_databricks/quality/results.py +17 -0
  112. contractforge_databricks/quality/sql.py +113 -0
  113. contractforge_databricks/rendering/__init__.py +11 -0
  114. contractforge_databricks/rendering/bundle.py +93 -0
  115. contractforge_databricks/rendering/markdown.py +50 -0
  116. contractforge_databricks/rendering/names.py +56 -0
  117. contractforge_databricks/results.py +15 -0
  118. contractforge_databricks/runtime/__init__.py +101 -0
  119. contractforge_databricks/runtime/available_now.py +147 -0
  120. contractforge_databricks/runtime/bundles.py +211 -0
  121. contractforge_databricks/runtime/cache.py +20 -0
  122. contractforge_databricks/runtime/control_tables.py +19 -0
  123. contractforge_databricks/runtime/deploy.py +197 -0
  124. contractforge_databricks/runtime/detection.py +114 -0
  125. contractforge_databricks/runtime/dry_run.py +46 -0
  126. contractforge_databricks/runtime/errors.py +54 -0
  127. contractforge_databricks/runtime/file_selection.py +109 -0
  128. contractforge_databricks/runtime/finalization.py +168 -0
  129. contractforge_databricks/runtime/governance.py +37 -0
  130. contractforge_databricks/runtime/hooks.py +45 -0
  131. contractforge_databricks/runtime/http_file.py +37 -0
  132. contractforge_databricks/runtime/http_retry.py +15 -0
  133. contractforge_databricks/runtime/http_safety.py +9 -0
  134. contractforge_databricks/runtime/json_materialization.py +97 -0
  135. contractforge_databricks/runtime/lineage.py +164 -0
  136. contractforge_databricks/runtime/maintenance.py +43 -0
  137. contractforge_databricks/runtime/merge_validation.py +98 -0
  138. contractforge_databricks/runtime/metadata.py +21 -0
  139. contractforge_databricks/runtime/metrics.py +34 -0
  140. contractforge_databricks/runtime/models.py +32 -0
  141. contractforge_databricks/runtime/options.py +33 -0
  142. contractforge_databricks/runtime/orchestration_context.py +185 -0
  143. contractforge_databricks/runtime/orchestrator.py +147 -0
  144. contractforge_databricks/runtime/partitioning.py +93 -0
  145. contractforge_databricks/runtime/quality_quarantine.py +92 -0
  146. contractforge_databricks/runtime/rest_api.py +46 -0
  147. contractforge_databricks/runtime/rest_auth.py +21 -0
  148. contractforge_databricks/runtime/rest_pagination.py +21 -0
  149. contractforge_databricks/runtime/run_payload.py +177 -0
  150. contractforge_databricks/runtime/schema.py +106 -0
  151. contractforge_databricks/runtime/source_metadata.py +30 -0
  152. contractforge_databricks/runtime/source_registry.py +43 -0
  153. contractforge_databricks/runtime/source_schema.py +24 -0
  154. contractforge_databricks/runtime/sources.py +208 -0
  155. contractforge_databricks/runtime/spark.py +183 -0
  156. contractforge_databricks/runtime/spark_defaults.py +35 -0
  157. contractforge_databricks/runtime/storage_auth.py +132 -0
  158. contractforge_databricks/runtime/streaming.py +131 -0
  159. contractforge_databricks/runtime/success.py +104 -0
  160. contractforge_databricks/runtime/utils.py +52 -0
  161. contractforge_databricks/runtime/watermark.py +71 -0
  162. contractforge_databricks/runtime/windows.py +184 -0
  163. contractforge_databricks/runtime/write.py +66 -0
  164. contractforge_databricks/runtime/write_flow.py +146 -0
  165. contractforge_databricks/runtime/write_strategy.py +40 -0
  166. contractforge_databricks/schema/__init__.py +21 -0
  167. contractforge_databricks/schema/diff.py +11 -0
  168. contractforge_databricks/schema/policy.py +33 -0
  169. contractforge_databricks/schema/sync.py +23 -0
  170. contractforge_databricks/security/__init__.py +21 -0
  171. contractforge_databricks/security/errors.py +5 -0
  172. contractforge_databricks/security/redaction.py +5 -0
  173. contractforge_databricks/security/secrets.py +114 -0
  174. contractforge_databricks/security/source_policy.py +17 -0
  175. contractforge_databricks/shapes/__init__.py +3 -0
  176. contractforge_databricks/shapes/sql.py +123 -0
  177. contractforge_databricks/sources/__init__.py +67 -0
  178. contractforge_databricks/sources/artifacts.py +100 -0
  179. contractforge_databricks/sources/autoloader.py +48 -0
  180. contractforge_databricks/sources/bounded_streams.py +44 -0
  181. contractforge_databricks/sources/classification.py +115 -0
  182. contractforge_databricks/sources/delta_share.py +21 -0
  183. contractforge_databricks/sources/files.py +48 -0
  184. contractforge_databricks/sources/http_file.py +46 -0
  185. contractforge_databricks/sources/interpret.py +76 -0
  186. contractforge_databricks/sources/jdbc.py +32 -0
  187. contractforge_databricks/sources/metadata.py +18 -0
  188. contractforge_databricks/sources/native_passthrough.py +33 -0
  189. contractforge_databricks/sources/rds_iam.py +15 -0
  190. contractforge_databricks/sources/rds_iam_runtime.py +191 -0
  191. contractforge_databricks/sources/rest_api.py +33 -0
  192. contractforge_databricks/sources/support.py +50 -0
  193. contractforge_databricks/sources/table_refs.py +65 -0
  194. contractforge_databricks/sql/__init__.py +4 -0
  195. contractforge_databricks/sql/identifiers.py +17 -0
  196. contractforge_databricks/sql/literals.py +36 -0
  197. contractforge_databricks/state/__init__.py +39 -0
  198. contractforge_databricks/state/ddl.py +24 -0
  199. contractforge_databricks/state/migrations.py +146 -0
  200. contractforge_databricks/state/queries.py +149 -0
  201. contractforge_databricks/state/sql.py +116 -0
  202. contractforge_databricks/state/tables.py +9 -0
  203. contractforge_databricks/state/writer.py +83 -0
  204. contractforge_databricks/templates/__init__.py +15 -0
  205. contractforge_databricks/templates/catalog.py +205 -0
  206. contractforge_databricks/templates/catalog_parity.py +85 -0
  207. contractforge_databricks/templates/core.py +83 -0
  208. contractforge_databricks/templates/enrichment.py +175 -0
  209. contractforge_databricks/transforms/__init__.py +3 -0
  210. contractforge_databricks/transforms/sql.py +118 -0
  211. contractforge_databricks/watermark/__init__.py +6 -0
  212. contractforge_databricks/watermark/sql.py +91 -0
  213. contractforge_databricks/write_modes/__init__.py +20 -0
  214. contractforge_databricks/write_modes/registry.py +44 -0
  215. contractforge_databricks/write_modes/sql.py +33 -0
  216. contractforge_databricks/write_modes/strategy.py +192 -0
  217. contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
  218. contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
  219. contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
  220. contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,31 @@
1
+ """Render reviewable Unity Catalog governance SQL."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.semantic import SemanticContract
6
+ from contractforge_databricks.annotations import render_annotations_sql
7
+ from contractforge_databricks.governance.access import render_access_sql
8
+ from contractforge_databricks.rendering.names import target_full_name
9
+
10
+
11
+ def render_governance_sql(contract: SemanticContract) -> str:
12
+ governance = contract.governance
13
+ if governance is None:
14
+ return "-- No governance intent declared.\n"
15
+
16
+ target = target_full_name(contract)
17
+ lines = [
18
+ "-- Review before execution. Function names and privileges are contract-owned.",
19
+ f"-- Target: {target}",
20
+ "",
21
+ ]
22
+ if governance.owner:
23
+ lines.append(f"-- Owner intent: {governance.owner}")
24
+ access_sql_body = render_access_sql(contract)
25
+ if access_sql_body:
26
+ lines.append(access_sql_body)
27
+ access_sql = "\n".join(lines) + "\n"
28
+ annotations_sql = render_annotations_sql(contract)
29
+ if annotations_sql.startswith("-- No annotations intent declared."):
30
+ return access_sql
31
+ return annotations_sql + "\n" + access_sql
@@ -0,0 +1,135 @@
1
+ """Databricks governance validation and access drift helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.semantic import SemanticContract
8
+ from contractforge_databricks.coercion import mapping, mapping_list, string_list
9
+ from contractforge_databricks.rendering.names import target_full_name
10
+
11
+
12
+ def governance_referenced_columns(contract: SemanticContract) -> dict[str, list[str]]:
13
+ annotations = contract.governance.annotations if contract.governance else None
14
+ access = contract.governance.access if contract.governance else None
15
+ annotation_columns = sorted(mapping(annotations).get("columns", {}).keys())
16
+ row_filter_columns = sorted(
17
+ {
18
+ column
19
+ for row_filter in mapping_list(mapping(access).get("row_filters"))
20
+ for column in string_list(row_filter.get("columns"))
21
+ }
22
+ )
23
+ mask_columns = sorted(
24
+ {
25
+ column
26
+ for mask in mapping_list(mapping(access).get("column_masks"))
27
+ for column in [str(mask.get("column")), *string_list(mask.get("using_columns"))]
28
+ if column and column != "None"
29
+ }
30
+ )
31
+ all_columns = sorted(set(annotation_columns) | set(row_filter_columns) | set(mask_columns))
32
+ return {
33
+ "annotations": annotation_columns,
34
+ "row_filters": row_filter_columns,
35
+ "column_masks": mask_columns,
36
+ "all": all_columns,
37
+ }
38
+
39
+
40
+ def validate_governance_contract(
41
+ contract: SemanticContract,
42
+ *,
43
+ existing_columns: set[str] | list[str] | tuple[str, ...],
44
+ target_table: str | None = None,
45
+ ) -> dict[str, Any]:
46
+ references = governance_referenced_columns(contract)
47
+ columns = set(str(column) for column in existing_columns)
48
+ issues = []
49
+ annotations = mapping(contract.governance.annotations if contract.governance else None)
50
+ table = mapping(annotations.get("table"))
51
+ annotation_columns = mapping(annotations.get("columns"))
52
+ contains_pii = str(mapping(table.get("tags")).get("contains_pii", "")).lower() == "true"
53
+ pii_columns = sorted(
54
+ column
55
+ for column, config in annotation_columns.items()
56
+ if bool(mapping(mapping(config).get("pii")).get("enabled", False))
57
+ )
58
+ if contains_pii and not pii_columns:
59
+ issues.append(_issue("fail", "annotations", "table.tags.contains_pii", "contains_pii=true requires at least one column with pii.enabled=true"))
60
+ for column in pii_columns:
61
+ if not mapping(annotation_columns.get(column)).get("description"):
62
+ issues.append(_issue("warn", "annotations", column, f"PII column {column!r} should declare a description"))
63
+
64
+ for scope, referenced in references.items():
65
+ if scope == "all":
66
+ continue
67
+ for column in sorted(set(referenced) - columns):
68
+ issues.append(_issue("fail", scope, column, f"Column {column!r} referenced by {scope} does not exist"))
69
+ return {
70
+ "status": "FAILED" if any(issue["severity"] == "fail" for issue in issues) else "SUCCESS",
71
+ "target_table": target_table or target_full_name(contract),
72
+ "references": references,
73
+ "issues": issues,
74
+ }
75
+
76
+
77
+ def access_drift_report(
78
+ contract: SemanticContract,
79
+ *,
80
+ current_grants: set[tuple[str, str]],
81
+ target_table: str | None = None,
82
+ ) -> dict[str, Any]:
83
+ access = mapping(contract.governance.access if contract.governance else None)
84
+ if not access:
85
+ return _drift_payload("NOT_CONFIGURED", target_table or target_full_name(contract), set(), current_grants, [], [])
86
+ declared = {
87
+ (str(grant.get("principal")), str(privilege).upper())
88
+ for grant in mapping_list(access.get("grants"))
89
+ for privilege in string_list(grant.get("privileges"))
90
+ }
91
+ normalized_current = {(str(principal), str(privilege).upper()) for principal, privilege in current_grants}
92
+ missing = sorted(declared - normalized_current)
93
+ unmanaged = sorted(normalized_current - declared)
94
+ policy = _access_policy(access)
95
+ revoke_unmanaged = bool(mapping(access.get("access_policy")).get("revoke_unmanaged", access.get("revoke_unmanaged", False)))
96
+ issues = [
97
+ _issue(policy, "grant", f"{principal}:{privilege}", f"Declared grant is missing: {privilege} for {principal}")
98
+ for principal, privilege in missing
99
+ ]
100
+ if revoke_unmanaged:
101
+ issues.extend(
102
+ _issue(policy, "grant", f"{principal}:{privilege}", f"Current unmanaged grant was detected: {privilege} from {principal}")
103
+ for principal, privilege in unmanaged
104
+ )
105
+ status = "DRIFTED" if missing or (revoke_unmanaged and unmanaged) else "IN_SYNC"
106
+ return _drift_payload(status, target_table or target_full_name(contract), declared, normalized_current, missing, unmanaged, issues)
107
+
108
+
109
+ def _drift_payload(
110
+ status: str,
111
+ target_table: str,
112
+ declared: set[tuple[str, str]],
113
+ current: set[tuple[str, str]],
114
+ missing: list[tuple[str, str]],
115
+ unmanaged: list[tuple[str, str]],
116
+ issues: list[dict[str, str]] | None = None,
117
+ ) -> dict[str, Any]:
118
+ return {
119
+ "status": status,
120
+ "target_table": target_table,
121
+ "declared_grants": sorted(declared),
122
+ "current_grants": sorted(current),
123
+ "missing_grants": missing,
124
+ "unmanaged_grants": unmanaged,
125
+ "issues": issues or [],
126
+ }
127
+
128
+
129
+ def _access_policy(access: dict[str, Any]) -> str:
130
+ policy = mapping(access.get("access_policy"))
131
+ return "fail" if policy.get("on_drift", access.get("on_drift", "warn")) == "fail" else "warn"
132
+
133
+
134
+ def _issue(severity: str, scope: str, obj: str, message: str) -> dict[str, str]:
135
+ return {"severity": severity, "scope": scope, "object": obj, "message": message}
@@ -0,0 +1,21 @@
1
+ from contractforge_databricks.lakeflow.compatibility import (
2
+ LakeflowCompatibility,
3
+ LakeflowSourceKind,
4
+ evaluate_lakeflow_compatibility,
5
+ render_lakeflow_review,
6
+ )
7
+ from contractforge_databricks.lakeflow.rendering import (
8
+ LakeflowAutoCdcArtifact,
9
+ render_lakeflow_auto_cdc_artifact,
10
+ render_lakeflow_auto_cdc_python,
11
+ )
12
+
13
+ __all__ = [
14
+ "LakeflowAutoCdcArtifact",
15
+ "LakeflowCompatibility",
16
+ "LakeflowSourceKind",
17
+ "evaluate_lakeflow_compatibility",
18
+ "render_lakeflow_auto_cdc_artifact",
19
+ "render_lakeflow_auto_cdc_python",
20
+ "render_lakeflow_review",
21
+ ]
@@ -0,0 +1,194 @@
1
+ """Lakeflow AUTO CDC compatibility checks for semantic contracts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Literal
7
+
8
+ from contractforge_core.semantic import SemanticContract
9
+ from contractforge_databricks.rendering.names import target_full_name
10
+
11
+ LakeflowStatus = Literal["compatible", "requires_translation", "unsupported"]
12
+ LakeflowSourceKind = Literal["change_feed", "snapshot"]
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class LakeflowCompatibility:
17
+ status: LakeflowStatus
18
+ source_kind: LakeflowSourceKind
19
+ scd_type: int | None
20
+ target_table: str
21
+ reasons: tuple[str, ...] = ()
22
+ required_fields: tuple[str, ...] = ()
23
+ mapped_fields: dict[str, Any] | None = None
24
+ translation_required: tuple[str, ...] = ()
25
+ unsupported_fields: tuple[str, ...] = ()
26
+ warnings: tuple[str, ...] = ()
27
+
28
+ @property
29
+ def supported(self) -> bool:
30
+ return self.status != "unsupported"
31
+
32
+ def as_dict(self) -> dict[str, Any]:
33
+ return {
34
+ "status": self.status,
35
+ "supported": self.supported,
36
+ "source_kind": self.source_kind,
37
+ "scd_type": self.scd_type,
38
+ "target_table": self.target_table,
39
+ "reasons": list(self.reasons),
40
+ "required_fields": list(self.required_fields),
41
+ "mapped_fields": dict(self.mapped_fields or {}),
42
+ "translation_required": list(self.translation_required),
43
+ "unsupported_fields": list(self.unsupported_fields),
44
+ "warnings": list(self.warnings),
45
+ }
46
+
47
+
48
+ def evaluate_lakeflow_compatibility(
49
+ contract: SemanticContract,
50
+ *,
51
+ source_kind: LakeflowSourceKind = "change_feed",
52
+ source_name: str | None = None,
53
+ keys: tuple[str, ...] = (),
54
+ sequence_by: str | None = None,
55
+ apply_as_truncates: str | None = None,
56
+ ) -> LakeflowCompatibility:
57
+ reasons: list[str] = []
58
+ required: list[str] = []
59
+ translation: list[str] = []
60
+ unsupported: list[str] = []
61
+ warnings: list[str] = []
62
+ scd_type = _scd_type(contract.write.mode)
63
+ target_table = target_full_name(contract)
64
+ effective_keys = keys or contract.write.merge_keys
65
+ effective_sequence_by = sequence_by or contract.write.scd2_sequence_by
66
+ mapped_fields: dict[str, Any] = {
67
+ "target": target_table,
68
+ "source": source_name,
69
+ "keys": list(effective_keys),
70
+ "sequence_by": effective_sequence_by,
71
+ "stored_as_scd_type": scd_type,
72
+ "apply_as_deletes": contract.write.scd2_apply_as_deletes,
73
+ "apply_as_truncates": apply_as_truncates,
74
+ }
75
+
76
+ if scd_type is None:
77
+ unsupported.append("mode")
78
+ reasons.append(f"{contract.write.mode} does not map directly to Lakeflow AUTO CDC.")
79
+ if not source_name:
80
+ required.append("source_name")
81
+ reasons.append("Lakeflow AUTO CDC requires a source table, view, or snapshot function.")
82
+ if not effective_keys:
83
+ required.append("keys")
84
+ reasons.append("Lakeflow AUTO CDC requires stable keys.")
85
+ if source_kind not in {"change_feed", "snapshot"}:
86
+ unsupported.append("source_kind")
87
+ reasons.append("Lakeflow source_kind must be 'change_feed' or 'snapshot'.")
88
+ if source_kind == "change_feed" and scd_type == 2 and not effective_sequence_by:
89
+ required.append("sequence_by")
90
+ reasons.append("Lakeflow AUTO CDC SCD2 requires sequence_by.")
91
+ if source_kind == "snapshot" and contract.write.scd2_apply_as_deletes:
92
+ unsupported.append("apply_as_deletes")
93
+ reasons.append("AUTO CDC FROM SNAPSHOT derives deletes from snapshots and does not use CDC delete predicates.")
94
+ if apply_as_truncates and scd_type == 2:
95
+ unsupported.append("apply_as_truncates")
96
+ reasons.append("Lakeflow apply_as_truncates is supported only for SCD type 1.")
97
+ if scd_type == 2 and contract.write.scd2_change_columns:
98
+ mapped_fields["track_history_column_list"] = list(contract.write.scd2_change_columns)
99
+ elif scd_type == 2:
100
+ warnings.append("SCD2 without scd2_change_columns maps to Lakeflow's default of tracking all output columns.")
101
+ if contract.quality:
102
+ translation.append("quality")
103
+ reasons.append("Quality rules must be materialized upstream or enforced outside AUTO CDC.")
104
+ metadata = contract.operations.metadata if contract.operations and contract.operations.metadata else {}
105
+ _translation_from_metadata(
106
+ metadata,
107
+ translation,
108
+ reasons,
109
+ "select_columns",
110
+ "Projection intent must be materialized upstream as the Lakeflow source table/view.",
111
+ )
112
+ _translation_from_metadata(
113
+ metadata,
114
+ translation,
115
+ reasons,
116
+ "column_mapping",
117
+ "Column mapping intent must be materialized upstream as the Lakeflow source table/view.",
118
+ )
119
+ _translation_from_metadata(
120
+ metadata,
121
+ translation,
122
+ reasons,
123
+ "filter_expression",
124
+ "Filter intent must be materialized upstream as the Lakeflow source table/view.",
125
+ )
126
+ _translation_from_metadata(
127
+ metadata,
128
+ translation,
129
+ reasons,
130
+ "watermark_columns",
131
+ "Watermark filtering/state remains ContractForge runtime behavior and must be resolved before AUTO CDC.",
132
+ )
133
+ if contract.shape:
134
+ translation.append("shape")
135
+ reasons.append("Shape intent must be materialized upstream as the Lakeflow source table/view.")
136
+ if contract.transform:
137
+ translation.append("transform")
138
+ reasons.append("Transform intent must be materialized upstream as the Lakeflow source table/view.")
139
+
140
+ if unsupported or required:
141
+ status: LakeflowStatus = "unsupported"
142
+ elif translation:
143
+ status = "requires_translation"
144
+ else:
145
+ status = "compatible"
146
+ reasons.append("Contract can map to Lakeflow AUTO CDC arguments.")
147
+
148
+ return LakeflowCompatibility(
149
+ status=status,
150
+ source_kind=source_kind,
151
+ scd_type=scd_type,
152
+ target_table=target_table,
153
+ reasons=tuple(reasons),
154
+ required_fields=tuple(required),
155
+ mapped_fields={key: value for key, value in mapped_fields.items() if value is not None},
156
+ translation_required=tuple(translation),
157
+ unsupported_fields=tuple(unsupported),
158
+ warnings=tuple(warnings),
159
+ )
160
+
161
+
162
+ def render_lakeflow_review(compatibility: LakeflowCompatibility) -> str:
163
+ lines = [
164
+ "# Lakeflow AUTO CDC Compatibility",
165
+ "",
166
+ f"- Status: `{compatibility.status}`",
167
+ f"- SCD type: `{compatibility.scd_type}`",
168
+ "",
169
+ ]
170
+ for reason in compatibility.reasons:
171
+ lines.append(f"- {reason}")
172
+ for warning in compatibility.warnings:
173
+ lines.append(f"- Warning: {warning}")
174
+ return "\n".join(lines) + "\n"
175
+
176
+
177
+ def _scd_type(mode: str) -> int | None:
178
+ if mode == "scd1_upsert":
179
+ return 1
180
+ if mode == "scd2_historical":
181
+ return 2
182
+ return None
183
+
184
+
185
+ def _translation_from_metadata(
186
+ metadata: dict[str, Any],
187
+ translation: list[str],
188
+ reasons: list[str],
189
+ key: str,
190
+ reason: str,
191
+ ) -> None:
192
+ if metadata.get(key) not in (None, "", [], {}):
193
+ translation.append(key)
194
+ reasons.append(reason)
@@ -0,0 +1,175 @@
1
+ """Lakeflow AUTO CDC Python artifact rendering."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Literal
7
+
8
+ from contractforge_core.semantic import SemanticContract
9
+ from contractforge_databricks.lakeflow.compatibility import (
10
+ LakeflowCompatibility,
11
+ LakeflowSourceKind,
12
+ evaluate_lakeflow_compatibility,
13
+ )
14
+ from contractforge_databricks.rendering.names import target_full_name
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class LakeflowAutoCdcArtifact:
19
+ language: Literal["python"]
20
+ source_kind: LakeflowSourceKind
21
+ code: str
22
+ compatibility: LakeflowCompatibility
23
+
24
+ def as_dict(self) -> dict[str, Any]:
25
+ return {
26
+ "language": self.language,
27
+ "source_kind": self.source_kind,
28
+ "code": self.code,
29
+ "compatibility": self.compatibility.as_dict(),
30
+ }
31
+
32
+
33
+ def render_lakeflow_auto_cdc_python(
34
+ contract: SemanticContract,
35
+ *,
36
+ source_kind: LakeflowSourceKind = "change_feed",
37
+ source_name: str,
38
+ keys: tuple[str, ...] = (),
39
+ sequence_by: str | None = None,
40
+ flow_name: str | None = None,
41
+ apply_as_truncates: str | None = None,
42
+ ignore_null_updates: bool = False,
43
+ once: bool = False,
44
+ ) -> str:
45
+ return render_lakeflow_auto_cdc_artifact(
46
+ contract,
47
+ source_kind=source_kind,
48
+ source_name=source_name,
49
+ keys=keys,
50
+ sequence_by=sequence_by,
51
+ flow_name=flow_name,
52
+ apply_as_truncates=apply_as_truncates,
53
+ ignore_null_updates=ignore_null_updates,
54
+ once=once,
55
+ ).code
56
+
57
+
58
+ def render_lakeflow_auto_cdc_artifact(
59
+ contract: SemanticContract,
60
+ *,
61
+ source_kind: LakeflowSourceKind = "change_feed",
62
+ source_name: str,
63
+ keys: tuple[str, ...] = (),
64
+ sequence_by: str | None = None,
65
+ flow_name: str | None = None,
66
+ apply_as_truncates: str | None = None,
67
+ ignore_null_updates: bool = False,
68
+ once: bool = False,
69
+ ) -> LakeflowAutoCdcArtifact:
70
+ compatibility = evaluate_lakeflow_compatibility(
71
+ contract,
72
+ source_kind=source_kind,
73
+ source_name=source_name,
74
+ keys=keys,
75
+ sequence_by=sequence_by,
76
+ apply_as_truncates=apply_as_truncates,
77
+ )
78
+ if compatibility.status == "unsupported":
79
+ raise ValueError("; ".join(compatibility.reasons))
80
+
81
+ if source_kind == "snapshot":
82
+ code = _render_snapshot_flow(
83
+ contract,
84
+ source_name=source_name,
85
+ keys=keys or contract.write.merge_keys,
86
+ flow_name=flow_name,
87
+ scd_type=compatibility.scd_type,
88
+ )
89
+ else:
90
+ code = _render_change_feed_flow(
91
+ contract,
92
+ source_name=source_name,
93
+ keys=keys or contract.write.merge_keys,
94
+ sequence_by=sequence_by or contract.write.scd2_sequence_by,
95
+ flow_name=flow_name,
96
+ scd_type=compatibility.scd_type,
97
+ apply_as_truncates=apply_as_truncates,
98
+ ignore_null_updates=ignore_null_updates,
99
+ once=once,
100
+ )
101
+ return LakeflowAutoCdcArtifact(
102
+ language="python",
103
+ source_kind=source_kind,
104
+ code=code,
105
+ compatibility=compatibility,
106
+ )
107
+
108
+
109
+ def _render_change_feed_flow(
110
+ contract: SemanticContract,
111
+ *,
112
+ source_name: str,
113
+ keys: tuple[str, ...],
114
+ sequence_by: str | None,
115
+ flow_name: str | None,
116
+ scd_type: int | None,
117
+ apply_as_truncates: str | None,
118
+ ignore_null_updates: bool,
119
+ once: bool,
120
+ ) -> str:
121
+ target = target_full_name(contract)
122
+ lines = [
123
+ "from pyspark import pipelines as dp",
124
+ "",
125
+ f"dp.create_streaming_table(name={target!r})",
126
+ "",
127
+ "dp.create_auto_cdc_flow(",
128
+ f" target={target!r},",
129
+ f" source={source_name!r},",
130
+ f" keys={list(keys)!r},",
131
+ f" stored_as_scd_type={scd_type!r},",
132
+ f" ignore_null_updates={ignore_null_updates!r},",
133
+ ]
134
+ if sequence_by:
135
+ lines.append(f" sequence_by={sequence_by!r},")
136
+ if contract.write.scd2_apply_as_deletes:
137
+ lines.append(f" apply_as_deletes={contract.write.scd2_apply_as_deletes!r},")
138
+ if apply_as_truncates:
139
+ lines.append(f" apply_as_truncates={apply_as_truncates!r},")
140
+ if contract.write.scd2_change_columns:
141
+ lines.append(f" track_history_column_list={list(contract.write.scd2_change_columns)!r},")
142
+ if flow_name:
143
+ lines.append(f" name={flow_name!r},")
144
+ if once:
145
+ lines.append(" once=True,")
146
+ lines.append(")")
147
+ return "\n".join(lines) + "\n"
148
+
149
+
150
+ def _render_snapshot_flow(
151
+ contract: SemanticContract,
152
+ *,
153
+ source_name: str,
154
+ keys: tuple[str, ...],
155
+ flow_name: str | None,
156
+ scd_type: int | None,
157
+ ) -> str:
158
+ target = target_full_name(contract)
159
+ lines = [
160
+ "from pyspark import pipelines as dp",
161
+ "",
162
+ f"dp.create_streaming_table(name={target!r})",
163
+ "",
164
+ "dp.create_auto_cdc_from_snapshot_flow(",
165
+ f" target={target!r},",
166
+ f" source={source_name!r},",
167
+ f" keys={list(keys)!r},",
168
+ f" stored_as_scd_type={scd_type!r},",
169
+ ]
170
+ if contract.write.scd2_change_columns:
171
+ lines.append(f" track_history_column_list={list(contract.write.scd2_change_columns)!r},")
172
+ if flow_name:
173
+ lines.append(f" name={flow_name!r},")
174
+ lines.append(")")
175
+ return "\n".join(lines) + "\n"
@@ -0,0 +1,7 @@
1
+ from contractforge_databricks.lineage.openlineage import (
2
+ build_openlineage_event,
3
+ openlineage_namespace,
4
+ render_openlineage_insert_sql,
5
+ )
6
+
7
+ __all__ = ["build_openlineage_event", "openlineage_namespace", "render_openlineage_insert_sql"]