contractforge-databricks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. contractforge_databricks/__init__.py +172 -0
  2. contractforge_databricks/adapter.py +69 -0
  3. contractforge_databricks/annotations/__init__.py +10 -0
  4. contractforge_databricks/annotations/application.py +52 -0
  5. contractforge_databricks/annotations/audit.py +49 -0
  6. contractforge_databricks/annotations/sql.py +142 -0
  7. contractforge_databricks/api.py +65 -0
  8. contractforge_databricks/bundles/__init__.py +9 -0
  9. contractforge_databricks/bundles/assets.py +47 -0
  10. contractforge_databricks/bundles/project.py +213 -0
  11. contractforge_databricks/bundles/project_config.py +133 -0
  12. contractforge_databricks/capabilities/__init__.py +17 -0
  13. contractforge_databricks/capabilities/builders.py +43 -0
  14. contractforge_databricks/capabilities/evaluate.py +162 -0
  15. contractforge_databricks/capabilities/mapping.py +36 -0
  16. contractforge_databricks/capabilities/models.py +44 -0
  17. contractforge_databricks/capabilities/runtime.py +111 -0
  18. contractforge_databricks/capabilities/uc.py +47 -0
  19. contractforge_databricks/cli.py +196 -0
  20. contractforge_databricks/cli_deploy.py +98 -0
  21. contractforge_databricks/cli_governance.py +142 -0
  22. contractforge_databricks/cli_io.py +91 -0
  23. contractforge_databricks/cli_maintenance.py +69 -0
  24. contractforge_databricks/coercion.py +31 -0
  25. contractforge_databricks/contract_extensions.py +70 -0
  26. contractforge_databricks/cost/__init__.py +11 -0
  27. contractforge_databricks/cost/model.py +22 -0
  28. contractforge_databricks/cost/report.py +65 -0
  29. contractforge_databricks/cost/sql.py +136 -0
  30. contractforge_databricks/dashboards/__init__.py +15 -0
  31. contractforge_databricks/dashboards/control_tables.py +150 -0
  32. contractforge_databricks/diagnostics/__init__.py +7 -0
  33. contractforge_databricks/diagnostics/explain.py +40 -0
  34. contractforge_databricks/environment.py +53 -0
  35. contractforge_databricks/evidence/__init__.py +98 -0
  36. contractforge_databricks/evidence/ddl.py +35 -0
  37. contractforge_databricks/evidence/governance_log.py +175 -0
  38. contractforge_databricks/evidence/helpers.py +29 -0
  39. contractforge_databricks/evidence/ops_log.py +210 -0
  40. contractforge_databricks/evidence/records.py +27 -0
  41. contractforge_databricks/evidence/run_log.py +74 -0
  42. contractforge_databricks/evidence/schemas.py +7 -0
  43. contractforge_databricks/evidence/sql.py +144 -0
  44. contractforge_databricks/evidence/tables.py +20 -0
  45. contractforge_databricks/evidence/writer.py +118 -0
  46. contractforge_databricks/execution/__init__.py +70 -0
  47. contractforge_databricks/execution/delta_basic.py +57 -0
  48. contractforge_databricks/execution/hash_diff.py +126 -0
  49. contractforge_databricks/execution/hash_diff_latest.py +142 -0
  50. contractforge_databricks/execution/replace_partitions.py +40 -0
  51. contractforge_databricks/execution/results.py +5 -0
  52. contractforge_databricks/execution/retry.py +36 -0
  53. contractforge_databricks/execution/scd2.py +213 -0
  54. contractforge_databricks/execution/scd2_deletes.py +65 -0
  55. contractforge_databricks/execution/scd2_late.py +30 -0
  56. contractforge_databricks/execution/snapshot.py +77 -0
  57. contractforge_databricks/execution/sql_merge.py +85 -0
  58. contractforge_databricks/execution/tables.py +98 -0
  59. contractforge_databricks/execution/windows.py +58 -0
  60. contractforge_databricks/governance/__init__.py +30 -0
  61. contractforge_databricks/governance/access.py +185 -0
  62. contractforge_databricks/governance/application.py +93 -0
  63. contractforge_databricks/governance/drift.py +49 -0
  64. contractforge_databricks/governance/runtime.py +60 -0
  65. contractforge_databricks/governance/sql.py +31 -0
  66. contractforge_databricks/governance/validation.py +135 -0
  67. contractforge_databricks/lakeflow/__init__.py +21 -0
  68. contractforge_databricks/lakeflow/compatibility.py +194 -0
  69. contractforge_databricks/lakeflow/rendering.py +175 -0
  70. contractforge_databricks/lineage/__init__.py +7 -0
  71. contractforge_databricks/lineage/openlineage.py +182 -0
  72. contractforge_databricks/maintenance/__init__.py +27 -0
  73. contractforge_databricks/maintenance/retention.py +90 -0
  74. contractforge_databricks/maintenance/sql.py +68 -0
  75. contractforge_databricks/metrics/__init__.py +19 -0
  76. contractforge_databricks/metrics/history.py +21 -0
  77. contractforge_databricks/metrics/write.py +63 -0
  78. contractforge_databricks/operations/__init__.py +4 -0
  79. contractforge_databricks/operations/application.py +38 -0
  80. contractforge_databricks/operations/sql.py +95 -0
  81. contractforge_databricks/parity/__init__.py +18 -0
  82. contractforge_databricks/parity/catalog.py +59 -0
  83. contractforge_databricks/parity/models.py +7 -0
  84. contractforge_databricks/parity/scenarios.py +111 -0
  85. contractforge_databricks/partitioning/__init__.py +3 -0
  86. contractforge_databricks/partitioning/predicates.py +28 -0
  87. contractforge_databricks/preparation/__init__.py +47 -0
  88. contractforge_databricks/preparation/deduplicate.py +87 -0
  89. contractforge_databricks/preparation/encoding.py +37 -0
  90. contractforge_databricks/preparation/hashing.py +18 -0
  91. contractforge_databricks/preparation/pyspark.py +178 -0
  92. contractforge_databricks/preparation/pyspark_staging.py +70 -0
  93. contractforge_databricks/preparation/shape.py +209 -0
  94. contractforge_databricks/preparation/shape_validation.py +94 -0
  95. contractforge_databricks/preparation/staging.py +17 -0
  96. contractforge_databricks/preparation/zip_arrays.py +51 -0
  97. contractforge_databricks/presets/__init__.py +3 -0
  98. contractforge_databricks/presets/base.py +24 -0
  99. contractforge_databricks/presets/bronze.py +57 -0
  100. contractforge_databricks/presets/catalog.py +22 -0
  101. contractforge_databricks/presets/core.py +134 -0
  102. contractforge_databricks/presets/gold.py +62 -0
  103. contractforge_databricks/presets/modifiers.py +51 -0
  104. contractforge_databricks/presets/runtime.py +22 -0
  105. contractforge_databricks/presets/silver.py +101 -0
  106. contractforge_databricks/presets/write_engine.py +57 -0
  107. contractforge_databricks/quality/__init__.py +41 -0
  108. contractforge_databricks/quality/evaluation.py +178 -0
  109. contractforge_databricks/quality/persistence.py +81 -0
  110. contractforge_databricks/quality/registry.py +134 -0
  111. contractforge_databricks/quality/results.py +17 -0
  112. contractforge_databricks/quality/sql.py +113 -0
  113. contractforge_databricks/rendering/__init__.py +11 -0
  114. contractforge_databricks/rendering/bundle.py +93 -0
  115. contractforge_databricks/rendering/markdown.py +50 -0
  116. contractforge_databricks/rendering/names.py +56 -0
  117. contractforge_databricks/results.py +15 -0
  118. contractforge_databricks/runtime/__init__.py +101 -0
  119. contractforge_databricks/runtime/available_now.py +147 -0
  120. contractforge_databricks/runtime/bundles.py +211 -0
  121. contractforge_databricks/runtime/cache.py +20 -0
  122. contractforge_databricks/runtime/control_tables.py +19 -0
  123. contractforge_databricks/runtime/deploy.py +197 -0
  124. contractforge_databricks/runtime/detection.py +114 -0
  125. contractforge_databricks/runtime/dry_run.py +46 -0
  126. contractforge_databricks/runtime/errors.py +54 -0
  127. contractforge_databricks/runtime/file_selection.py +109 -0
  128. contractforge_databricks/runtime/finalization.py +168 -0
  129. contractforge_databricks/runtime/governance.py +37 -0
  130. contractforge_databricks/runtime/hooks.py +45 -0
  131. contractforge_databricks/runtime/http_file.py +37 -0
  132. contractforge_databricks/runtime/http_retry.py +15 -0
  133. contractforge_databricks/runtime/http_safety.py +9 -0
  134. contractforge_databricks/runtime/json_materialization.py +97 -0
  135. contractforge_databricks/runtime/lineage.py +164 -0
  136. contractforge_databricks/runtime/maintenance.py +43 -0
  137. contractforge_databricks/runtime/merge_validation.py +98 -0
  138. contractforge_databricks/runtime/metadata.py +21 -0
  139. contractforge_databricks/runtime/metrics.py +34 -0
  140. contractforge_databricks/runtime/models.py +32 -0
  141. contractforge_databricks/runtime/options.py +33 -0
  142. contractforge_databricks/runtime/orchestration_context.py +185 -0
  143. contractforge_databricks/runtime/orchestrator.py +147 -0
  144. contractforge_databricks/runtime/partitioning.py +93 -0
  145. contractforge_databricks/runtime/quality_quarantine.py +92 -0
  146. contractforge_databricks/runtime/rest_api.py +46 -0
  147. contractforge_databricks/runtime/rest_auth.py +21 -0
  148. contractforge_databricks/runtime/rest_pagination.py +21 -0
  149. contractforge_databricks/runtime/run_payload.py +177 -0
  150. contractforge_databricks/runtime/schema.py +106 -0
  151. contractforge_databricks/runtime/source_metadata.py +30 -0
  152. contractforge_databricks/runtime/source_registry.py +43 -0
  153. contractforge_databricks/runtime/source_schema.py +24 -0
  154. contractforge_databricks/runtime/sources.py +208 -0
  155. contractforge_databricks/runtime/spark.py +183 -0
  156. contractforge_databricks/runtime/spark_defaults.py +35 -0
  157. contractforge_databricks/runtime/storage_auth.py +132 -0
  158. contractforge_databricks/runtime/streaming.py +131 -0
  159. contractforge_databricks/runtime/success.py +104 -0
  160. contractforge_databricks/runtime/utils.py +52 -0
  161. contractforge_databricks/runtime/watermark.py +71 -0
  162. contractforge_databricks/runtime/windows.py +184 -0
  163. contractforge_databricks/runtime/write.py +66 -0
  164. contractforge_databricks/runtime/write_flow.py +146 -0
  165. contractforge_databricks/runtime/write_strategy.py +40 -0
  166. contractforge_databricks/schema/__init__.py +21 -0
  167. contractforge_databricks/schema/diff.py +11 -0
  168. contractforge_databricks/schema/policy.py +33 -0
  169. contractforge_databricks/schema/sync.py +23 -0
  170. contractforge_databricks/security/__init__.py +21 -0
  171. contractforge_databricks/security/errors.py +5 -0
  172. contractforge_databricks/security/redaction.py +5 -0
  173. contractforge_databricks/security/secrets.py +114 -0
  174. contractforge_databricks/security/source_policy.py +17 -0
  175. contractforge_databricks/shapes/__init__.py +3 -0
  176. contractforge_databricks/shapes/sql.py +123 -0
  177. contractforge_databricks/sources/__init__.py +67 -0
  178. contractforge_databricks/sources/artifacts.py +100 -0
  179. contractforge_databricks/sources/autoloader.py +48 -0
  180. contractforge_databricks/sources/bounded_streams.py +44 -0
  181. contractforge_databricks/sources/classification.py +115 -0
  182. contractforge_databricks/sources/delta_share.py +21 -0
  183. contractforge_databricks/sources/files.py +48 -0
  184. contractforge_databricks/sources/http_file.py +46 -0
  185. contractforge_databricks/sources/interpret.py +76 -0
  186. contractforge_databricks/sources/jdbc.py +32 -0
  187. contractforge_databricks/sources/metadata.py +18 -0
  188. contractforge_databricks/sources/native_passthrough.py +33 -0
  189. contractforge_databricks/sources/rds_iam.py +15 -0
  190. contractforge_databricks/sources/rds_iam_runtime.py +191 -0
  191. contractforge_databricks/sources/rest_api.py +33 -0
  192. contractforge_databricks/sources/support.py +50 -0
  193. contractforge_databricks/sources/table_refs.py +65 -0
  194. contractforge_databricks/sql/__init__.py +4 -0
  195. contractforge_databricks/sql/identifiers.py +17 -0
  196. contractforge_databricks/sql/literals.py +36 -0
  197. contractforge_databricks/state/__init__.py +39 -0
  198. contractforge_databricks/state/ddl.py +24 -0
  199. contractforge_databricks/state/migrations.py +146 -0
  200. contractforge_databricks/state/queries.py +149 -0
  201. contractforge_databricks/state/sql.py +116 -0
  202. contractforge_databricks/state/tables.py +9 -0
  203. contractforge_databricks/state/writer.py +83 -0
  204. contractforge_databricks/templates/__init__.py +15 -0
  205. contractforge_databricks/templates/catalog.py +205 -0
  206. contractforge_databricks/templates/catalog_parity.py +85 -0
  207. contractforge_databricks/templates/core.py +83 -0
  208. contractforge_databricks/templates/enrichment.py +175 -0
  209. contractforge_databricks/transforms/__init__.py +3 -0
  210. contractforge_databricks/transforms/sql.py +118 -0
  211. contractforge_databricks/watermark/__init__.py +6 -0
  212. contractforge_databricks/watermark/sql.py +91 -0
  213. contractforge_databricks/write_modes/__init__.py +20 -0
  214. contractforge_databricks/write_modes/registry.py +44 -0
  215. contractforge_databricks/write_modes/sql.py +33 -0
  216. contractforge_databricks/write_modes/strategy.py +192 -0
  217. contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
  218. contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
  219. contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
  220. contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,17 @@
1
+ """Compatibility exports for platform-neutral quality result models."""
2
+
3
+ from contractforge_core.quality import (
4
+ QualityRuleResult,
5
+ QualitySeverity,
6
+ QualityStatus,
7
+ quality_status,
8
+ quarantinable_results,
9
+ )
10
+
11
+ __all__ = [
12
+ "QualityRuleResult",
13
+ "QualitySeverity",
14
+ "QualityStatus",
15
+ "quality_status",
16
+ "quarantinable_results",
17
+ ]
@@ -0,0 +1,113 @@
1
+ """Databricks SQL quality check rendering."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.config import MAX_INLINE_ACCEPTED_VALUES
6
+ from contractforge_core.semantic import QualityIntent, SemanticContract
7
+ from contractforge_databricks.rendering.names import target_full_name
8
+ from contractforge_databricks.sql import quote_identifier, quote_table_name
9
+
10
+
11
+ def render_quality_check_sql(contract: SemanticContract, *, source_view: str | None = None) -> str:
12
+ table = quote_table_name(source_view or target_full_name(contract))
13
+ statements = []
14
+ for quality in contract.quality:
15
+ statements.append(_render_quality_intent(quality, table))
16
+ return "\n\n".join(statements) + ("\n" if statements else "-- No quality rules declared.\n")
17
+
18
+
19
+ def _render_quality_intent(quality: QualityIntent, table: str) -> str:
20
+ if quality.rule == "required_columns":
21
+ return _render_required_columns(quality, table)
22
+ if quality.rule == "not_null":
23
+ column = _single_column(quality)
24
+ return (
25
+ f"-- quality: {quality.name}\n"
26
+ f"SELECT count(*) AS failed_rows FROM {table} WHERE {quote_identifier(column)} IS NULL;"
27
+ )
28
+ if quality.rule == "unique_key":
29
+ keys = ", ".join(quote_identifier(column) for column in quality.columns)
30
+ return (
31
+ f"-- quality: {quality.name}\n"
32
+ f"SELECT count(*) AS failed_rows FROM ("
33
+ f"SELECT {keys}, count(*) AS row_count FROM {table} GROUP BY {keys} HAVING count(*) > 1"
34
+ ") duplicates;"
35
+ )
36
+ if quality.rule == "accepted_values":
37
+ column = _single_column(quality)
38
+ values = quality.value if isinstance(quality.value, (list, tuple)) else [quality.value]
39
+ if len(values) > MAX_INLINE_ACCEPTED_VALUES:
40
+ raise ValueError(
41
+ f"quality.accepted_values.{column} has {len(values)} values. "
42
+ "Use a reference table or custom quality evaluator for large value sets."
43
+ )
44
+ accepted = ", ".join(_sql_literal(value) for value in values)
45
+ return (
46
+ f"-- quality: {quality.name}\n"
47
+ f"SELECT count(*) AS failed_rows FROM {table} "
48
+ f"WHERE {quote_identifier(column)} IS NOT NULL AND {quote_identifier(column)} NOT IN ({accepted});"
49
+ )
50
+ if quality.rule == "row_count_minimum":
51
+ return (
52
+ f"-- quality: {quality.name}\n"
53
+ f"SELECT CASE WHEN count(*) >= {int(quality.value)} THEN 0 ELSE 1 END AS failed_rows FROM {table};"
54
+ )
55
+ if quality.rule == "max_null_ratio":
56
+ column = _single_column(quality)
57
+ ratio = float(quality.value)
58
+ return (
59
+ f"-- quality: {quality.name}\n"
60
+ "SELECT CASE WHEN count(*) = 0 THEN 0 "
61
+ f"WHEN (sum(CASE WHEN {quote_identifier(column)} IS NULL THEN 1 ELSE 0 END) / count(*)) > {ratio} "
62
+ "THEN 1 ELSE 0 END AS failed_rows "
63
+ f"FROM {table};"
64
+ )
65
+ if quality.rule == "expression":
66
+ return (
67
+ f"-- quality: {quality.name}\n"
68
+ f"SELECT count(*) AS failed_rows FROM {table} "
69
+ f"WHERE NOT ({quality.value}) OR ({quality.value}) IS NULL;"
70
+ )
71
+ return f"-- Unsupported quality rule for Databricks SQL rendering: {quality.rule}\n"
72
+
73
+
74
+ def _single_column(quality: QualityIntent) -> str:
75
+ if len(quality.columns) != 1:
76
+ raise ValueError(f"quality rule {quality.name} requires exactly one column")
77
+ return quality.columns[0]
78
+
79
+
80
+ def _sql_literal(value: object) -> str:
81
+ if value is None:
82
+ return "NULL"
83
+ if isinstance(value, bool):
84
+ return "true" if value else "false"
85
+ if isinstance(value, (int, float)):
86
+ return str(value)
87
+ return "'" + str(value).replace("'", "''") + "'"
88
+
89
+
90
+ def _render_required_columns(quality: QualityIntent, table: str) -> str:
91
+ parts = _unquote_table_parts(table)
92
+ expected = ", ".join(_sql_literal(column) for column in quality.columns)
93
+ if len(parts) != 3:
94
+ return (
95
+ f"-- quality: {quality.name}\n"
96
+ "-- Required column checks need runtime schema inspection for temp views or non-qualified tables.\n"
97
+ f"-- expected columns: {', '.join(quality.columns)}"
98
+ )
99
+ catalog, schema, table_name = parts
100
+ return (
101
+ f"-- quality: {quality.name}\n"
102
+ f"SELECT count(*) AS failed_rows FROM (SELECT explode(array({expected})) AS expected_column) expected "
103
+ "LEFT ANTI JOIN ("
104
+ "SELECT column_name FROM system.information_schema.columns "
105
+ f"WHERE table_catalog = {_sql_literal(catalog)} "
106
+ f"AND table_schema = {_sql_literal(schema)} "
107
+ f"AND table_name = {_sql_literal(table_name)}"
108
+ ") actual ON expected.expected_column = actual.column_name;"
109
+ )
110
+
111
+
112
+ def _unquote_table_parts(table: str) -> tuple[str, ...]:
113
+ return tuple(part.strip("`").replace("``", "`") for part in table.split(".") if part.strip("`"))
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ def render_databricks_artifacts(*args: Any, **kwargs: Any) -> Any:
7
+ from contractforge_databricks.rendering.bundle import render_databricks_artifacts as _render
8
+
9
+ return _render(*args, **kwargs)
10
+
11
+ __all__ = ["render_databricks_artifacts"]
@@ -0,0 +1,93 @@
1
+ """Bundle Databricks adapter artifacts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime
7
+
8
+ from contractforge_core.adapters import RenderedArtifacts
9
+ from contractforge_core.planner import PlanningResult
10
+ from contractforge_core.semantic import SemanticContract
11
+ from contractforge_databricks.annotations import render_annotations_audit_insert_sql, render_annotations_sql
12
+ from contractforge_databricks.bundles import DatabricksJobSpec, render_databricks_asset_bundle
13
+ from contractforge_databricks.capabilities.models import DatabricksCapabilities
14
+ from contractforge_databricks.cost import render_operational_cost_query
15
+ from contractforge_databricks.diagnostics import render_create_explain_table_sql
16
+ from contractforge_databricks.environment import DatabricksEnvironment
17
+ from contractforge_databricks.evidence import render_create_evidence_tables_sql, render_evidence_table_notes
18
+ from contractforge_databricks.governance import render_access_audit_insert_sql, render_governance_sql
19
+ from contractforge_databricks.lakeflow import evaluate_lakeflow_compatibility, render_lakeflow_review
20
+ from contractforge_databricks.lineage import render_openlineage_insert_sql
21
+ from contractforge_databricks.operations import render_operations_insert_sql, render_operations_json
22
+ from contractforge_databricks.quality import render_quality_check_sql
23
+ from contractforge_databricks.rendering.markdown import render_review_markdown
24
+ from contractforge_databricks.rendering.names import artifact_prefix, bundle_name, job_name, task_key
25
+ from contractforge_databricks.schema import plan_schema_policy
26
+ from contractforge_databricks.shapes import render_shape_sql
27
+ from contractforge_databricks.sources import render_source_artifacts, render_source_metadata_json
28
+ from contractforge_databricks.state import render_control_table_migrations_sql, render_create_state_tables_sql
29
+ from contractforge_databricks.transforms import render_transform_sql
30
+ from contractforge_databricks.write_modes import choose_write_strategy, render_write_mode_sql_notes
31
+
32
+
33
+ def render_databricks_artifacts(
34
+ contract: SemanticContract,
35
+ planning: PlanningResult,
36
+ capabilities: DatabricksCapabilities,
37
+ *,
38
+ environment: DatabricksEnvironment | None = None,
39
+ ) -> RenderedArtifacts:
40
+ env = environment or DatabricksEnvironment()
41
+ prefix = artifact_prefix(contract)
42
+ artifacts = {
43
+ f"{prefix}.review.md": render_review_markdown(contract, planning, capabilities),
44
+ f"{prefix}.capabilities.json": json.dumps(capabilities.as_dict(), indent=2, sort_keys=True),
45
+ f"{prefix}.write_mode.sql": render_write_mode_sql_notes(contract),
46
+ f"{prefix}.shape.sql": render_shape_sql(contract),
47
+ f"{prefix}.transform.sql": render_transform_sql(contract),
48
+ f"{prefix}.annotations.sql": render_annotations_sql(contract),
49
+ f"{prefix}.annotations_audit.sql": render_annotations_audit_insert_sql(contract, catalog=env.evidence_catalog, schema=env.evidence_schema),
50
+ f"{prefix}.governance.sql": render_governance_sql(contract),
51
+ f"{prefix}.access_audit.sql": render_access_audit_insert_sql(contract, catalog=env.evidence_catalog, schema=env.evidence_schema),
52
+ f"{prefix}.quality.sql": render_quality_check_sql(contract),
53
+ f"{prefix}.schema_policy.json": json.dumps(plan_schema_policy(contract).as_dict(), indent=2, sort_keys=True),
54
+ f"{prefix}.source_metadata.json": render_source_metadata_json(contract),
55
+ f"{prefix}.evidence.sql": render_evidence_table_notes(catalog=env.evidence_catalog, schema=env.evidence_schema),
56
+ f"{prefix}.evidence_ddl.sql": render_create_evidence_tables_sql(catalog=env.evidence_catalog, schema=env.evidence_schema),
57
+ f"{prefix}.state_ddl.sql": render_create_state_tables_sql(catalog=env.evidence_catalog, schema=env.evidence_schema),
58
+ f"{prefix}.control_table_migrations.sql": render_control_table_migrations_sql(catalog=env.evidence_catalog, schema=env.evidence_schema),
59
+ f"{prefix}.openlineage.sql": _render_openlineage_template(contract, env),
60
+ f"{prefix}.operations.json": render_operations_json(contract),
61
+ f"{prefix}.operations.sql": render_operations_insert_sql(contract, catalog=env.evidence_catalog, schema=env.evidence_schema),
62
+ f"{prefix}.diagnostics_ddl.sql": render_create_explain_table_sql(catalog=env.evidence_catalog, schema=env.evidence_schema),
63
+ f"{prefix}.cost.sql": render_operational_cost_query(catalog=env.evidence_catalog, schema=env.evidence_schema),
64
+ }
65
+ strategy = choose_write_strategy(contract, capabilities)
66
+ artifacts[f"{prefix}.strategy.json"] = json.dumps(strategy.as_dict(), indent=2, sort_keys=True)
67
+ if capabilities.status("lakeflow_auto_cdc") != "unsupported":
68
+ compatibility = evaluate_lakeflow_compatibility(contract)
69
+ artifacts[f"{prefix}.lakeflow.md"] = render_lakeflow_review(compatibility)
70
+ artifacts.update(render_source_artifacts(contract, environment=env))
71
+ artifacts[f"{prefix}.databricks.yml"] = render_databricks_asset_bundle(
72
+ DatabricksJobSpec(
73
+ bundle_name=bundle_name(contract),
74
+ job_name=job_name(contract),
75
+ task_key=task_key(contract),
76
+ notebook_path=f"{env.workspace_path}/{prefix}/run",
77
+ target=env.bundle_target,
78
+ )
79
+ )
80
+ return RenderedArtifacts(artifacts=artifacts)
81
+
82
+
83
+ def _render_openlineage_template(contract: SemanticContract, env: DatabricksEnvironment) -> str:
84
+ return render_openlineage_insert_sql(
85
+ contract,
86
+ run_id="${run_id}",
87
+ source_name=contract.source.name,
88
+ status="SUCCESS",
89
+ started_at_utc=datetime(1970, 1, 1, 0, 0, 0),
90
+ finished_at_utc=datetime(1970, 1, 1, 0, 0, 0),
91
+ catalog=env.evidence_catalog,
92
+ schema=env.evidence_schema,
93
+ )
@@ -0,0 +1,50 @@
1
+ """Markdown review report renderer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.planner import PlanningResult
6
+ from contractforge_core.security import redact_value
7
+ from contractforge_core.semantic import SemanticContract
8
+ from contractforge_databricks.capabilities.models import DatabricksCapabilities
9
+ from contractforge_databricks.contract_extensions import databricks_extensions
10
+ from contractforge_databricks.rendering.names import plan_title, target_full_name
11
+
12
+
13
+ def render_review_markdown(
14
+ contract: SemanticContract,
15
+ planning: PlanningResult,
16
+ capabilities: DatabricksCapabilities,
17
+ ) -> str:
18
+ lines = [
19
+ f"# {plan_title(planning.plan) if planning.plan else 'ContractForge Databricks planning review'}",
20
+ "",
21
+ f"- Status: `{planning.status}`",
22
+ f"- Target: `{target_full_name(contract)}`",
23
+ f"- Write mode: `{contract.write.mode}`",
24
+ f"- Runtime kind: `{capabilities.runtime_kind}`",
25
+ "",
26
+ "## Plan Steps",
27
+ "",
28
+ ]
29
+ if planning.plan:
30
+ lines.extend(f"- `{step.name}`: {step.intent}" for step in planning.plan.steps)
31
+ else:
32
+ lines.append("- No executable abstract plan was produced.")
33
+
34
+ if planning.blockers:
35
+ lines.extend(["", "## Blockers", ""])
36
+ lines.extend(f"- `{blocker.code}`: {blocker.message}" for blocker in planning.blockers)
37
+
38
+ if planning.warnings:
39
+ lines.extend(["", "## Warnings", ""])
40
+ lines.extend(f"- `{warning.code}`: {warning.message}" for warning in planning.warnings)
41
+
42
+ extensions = databricks_extensions(contract)
43
+ if extensions:
44
+ lines.extend(["", "## Databricks Extensions", ""])
45
+ lines.extend(f"- `{name}`: `{redact_value(extensions[name])}`" for name in sorted(extensions))
46
+
47
+ lines.extend(["", "## Databricks Capability Evidence", ""])
48
+ for name, capability in sorted(capabilities.capabilities.items()):
49
+ lines.append(f"- `{name}`: `{capability.status}` - {capability.reason}")
50
+ return "\n".join(lines) + "\n"
@@ -0,0 +1,56 @@
1
+ """Databricks artifact naming helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.planner import ExecutionPlan
6
+ from contractforge_core.semantic import SemanticContract
7
+ from contractforge_core.naming import derive_names, naming_config_from_mapping
8
+
9
+
10
+ def target_full_name(contract: SemanticContract) -> str:
11
+ if contract.target.namespace:
12
+ return f"{contract.target.namespace}.{contract.target.name}"
13
+ return contract.target.name
14
+
15
+
16
+ def artifact_prefix(contract: SemanticContract) -> str:
17
+ if contract.naming:
18
+ return derive_names(
19
+ target_table=contract.target.name,
20
+ layer=contract.target.layer,
21
+ domain=contract.target.domain,
22
+ config=naming_config_from_mapping(contract.naming.raw),
23
+ ).contract_basename
24
+ namespace = contract.target.namespace.replace(".", "_") if contract.target.namespace else contract.target.layer
25
+ return f"{namespace}_{contract.target.name}"
26
+
27
+
28
+ def bundle_name(contract: SemanticContract) -> str:
29
+ return derive_names(
30
+ target_table=contract.target.name,
31
+ layer=contract.target.layer,
32
+ domain=contract.target.domain,
33
+ config=naming_config_from_mapping(contract.naming.raw if contract.naming else None),
34
+ ).bundle_name
35
+
36
+
37
+ def job_name(contract: SemanticContract) -> str:
38
+ return derive_names(
39
+ target_table=contract.target.name,
40
+ layer=contract.target.layer,
41
+ domain=contract.target.domain,
42
+ config=naming_config_from_mapping(contract.naming.raw if contract.naming else None),
43
+ ).job_name
44
+
45
+
46
+ def task_key(contract: SemanticContract) -> str:
47
+ return derive_names(
48
+ target_table=contract.target.name,
49
+ layer=contract.target.layer,
50
+ domain=contract.target.domain,
51
+ config=naming_config_from_mapping(contract.naming.raw if contract.naming else None),
52
+ ).task_key
53
+
54
+
55
+ def plan_title(plan: ExecutionPlan) -> str:
56
+ return f"ContractForge Databricks plan for {plan.platform}"
@@ -0,0 +1,15 @@
1
+ """Compatibility exports for platform-neutral result models."""
2
+
3
+ from contractforge_core.results import (
4
+ GovernanceApplyResult,
5
+ GovernanceApplyStatus,
6
+ OperationsRecordResult,
7
+ OperationsRecordStatus,
8
+ )
9
+
10
+ __all__ = [
11
+ "GovernanceApplyResult",
12
+ "GovernanceApplyStatus",
13
+ "OperationsRecordResult",
14
+ "OperationsRecordStatus",
15
+ ]
@@ -0,0 +1,101 @@
1
+ from contractforge_databricks.runtime.available_now import BatchIngestor, run_available_now_stream
2
+ from contractforge_databricks.runtime.bundles import (
3
+ apply_databricks_access_bundle,
4
+ apply_databricks_annotations_bundle,
5
+ apply_databricks_governance_bundle,
6
+ ingest_databricks_bundle,
7
+ )
8
+ from contractforge_databricks.runtime.detection import detect_databricks_capabilities
9
+ from contractforge_databricks.runtime.deploy import (
10
+ deploy_databricks_bundle,
11
+ deploy_databricks_project,
12
+ render_databricks_project_bundle_file,
13
+ )
14
+ from contractforge_databricks.runtime.hooks import DatabricksIngestionHooks
15
+ from contractforge_databricks.runtime.http_file import download_http_file, resolve_http_file_dataframe
16
+ from contractforge_databricks.runtime.models import DatabricksIngestOptions, PreparedViewInput
17
+ from contractforge_databricks.runtime.orchestrator import ingest_databricks_contract
18
+ from contractforge_databricks.runtime.rest_api import read_rest_api_records, resolve_rest_api_dataframe
19
+ from contractforge_databricks.runtime.source_registry import (
20
+ DatabricksSourceResolver,
21
+ get_source_resolver,
22
+ list_source_resolvers,
23
+ register_source_resolver,
24
+ unregister_source_resolver,
25
+ )
26
+ from contractforge_databricks.runtime.spark import (
27
+ fix_encoding,
28
+ runtime_info,
29
+ safe_cache,
30
+ safe_cache_table,
31
+ safe_unpersist,
32
+ safe_uncache_table,
33
+ schema_signature,
34
+ sync_delta_schema,
35
+ table_exists,
36
+ )
37
+ from contractforge_databricks.runtime.sources import prepare_contract_source_view, prepare_source_view, resolve_source_dataframe
38
+ from contractforge_databricks.runtime.streaming import (
39
+ prefer_child_stream_metrics,
40
+ stream_metrics_from_batches,
41
+ stream_result_payload,
42
+ stream_start_payload,
43
+ )
44
+ from contractforge_databricks.runtime.utils import (
45
+ as_list,
46
+ new_run_id,
47
+ safe_truncate,
48
+ today_str,
49
+ utc_now_str,
50
+ utc_now_ts,
51
+ validate_columns,
52
+ )
53
+
54
+ __all__ = [
55
+ "DatabricksIngestOptions",
56
+ "DatabricksIngestionHooks",
57
+ "BatchIngestor",
58
+ "DatabricksSourceResolver",
59
+ "PreparedViewInput",
60
+ "as_list",
61
+ "apply_databricks_access_bundle",
62
+ "apply_databricks_annotations_bundle",
63
+ "apply_databricks_governance_bundle",
64
+ "detect_databricks_capabilities",
65
+ "deploy_databricks_bundle",
66
+ "deploy_databricks_project",
67
+ "render_databricks_project_bundle_file",
68
+ "download_http_file",
69
+ "fix_encoding",
70
+ "get_source_resolver",
71
+ "ingest_databricks_bundle",
72
+ "ingest_databricks_contract",
73
+ "list_source_resolvers",
74
+ "prepare_source_view",
75
+ "prepare_contract_source_view",
76
+ "read_rest_api_records",
77
+ "register_source_resolver",
78
+ "new_run_id",
79
+ "resolve_http_file_dataframe",
80
+ "resolve_rest_api_dataframe",
81
+ "resolve_source_dataframe",
82
+ "run_available_now_stream",
83
+ "runtime_info",
84
+ "safe_cache",
85
+ "safe_cache_table",
86
+ "safe_truncate",
87
+ "safe_unpersist",
88
+ "safe_uncache_table",
89
+ "schema_signature",
90
+ "prefer_child_stream_metrics",
91
+ "sync_delta_schema",
92
+ "stream_metrics_from_batches",
93
+ "stream_result_payload",
94
+ "stream_start_payload",
95
+ "table_exists",
96
+ "today_str",
97
+ "utc_now_str",
98
+ "utc_now_ts",
99
+ "unregister_source_resolver",
100
+ "validate_columns",
101
+ ]
@@ -0,0 +1,147 @@
1
+ """Available-now stream orchestration helpers for Databricks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from datetime import datetime, timezone
7
+ from typing import Any
8
+
9
+ from contractforge_core.runtime import PreparedInput
10
+ from contractforge_core.runtime import QueryOne
11
+ from contractforge_core.semantic import SemanticContract
12
+ from contractforge_databricks.evidence import EvidenceWriter, render_stream_child_run_metrics_sql
13
+ from contractforge_databricks.runtime.sources import resolve_source_dataframe
14
+ from contractforge_databricks.sources.interpret import interpret_incremental_files_source, is_incremental_file_source
15
+ from contractforge_databricks.runtime.streaming import (
16
+ prefer_child_stream_metrics,
17
+ stream_metrics_from_batches,
18
+ stream_result_payload,
19
+ stream_start_payload,
20
+ )
21
+
22
+ BatchIngestor = Callable[[PreparedInput, int], dict[str, Any]]
23
+
24
+
25
+ def run_available_now_stream(
26
+ spark: Any,
27
+ contract: SemanticContract,
28
+ *,
29
+ stream_run_id: str,
30
+ batch_ingestor: BatchIngestor,
31
+ source_view_prefix: str = "cf_stream_batch",
32
+ evidence: EvidenceWriter | None = None,
33
+ query_one: QueryOne | None = None,
34
+ runtime_metadata: dict[str, Any] | None = None,
35
+ ) -> dict[str, Any]:
36
+ """Execute an available-now stream and process micro-batches through an injected ingestor."""
37
+
38
+ source = contract.source.raw or {}
39
+ interpreted_source = interpret_incremental_files_source(source) if is_incremental_file_source(source) else source
40
+ checkpoint = interpreted_source.get("progress_location") or interpreted_source.get("checkpoint_location")
41
+ if not checkpoint:
42
+ raise ValueError("available-now stream requires source.progress_location or source.checkpoint_location")
43
+
44
+ started = datetime.now(timezone.utc)
45
+ batch_results: list[dict[str, Any]] = []
46
+ stream_df = resolve_source_dataframe(spark, source)
47
+ status = "SUCCESS"
48
+ error_message = None
49
+ if evidence is not None:
50
+ evidence.write_stream_log(
51
+ stream_start_payload(
52
+ contract,
53
+ stream_run_id=stream_run_id,
54
+ status="RUNNING",
55
+ started_at_utc=started,
56
+ runtime_metadata=runtime_metadata,
57
+ )
58
+ )
59
+
60
+ def process_batch(batch_df: Any, batch_id: int) -> None:
61
+ view_name = f"{source_view_prefix}_{stream_run_id}_{batch_id}".replace("-", "_")
62
+ batch_df.createOrReplaceTempView(view_name)
63
+ prepared = PreparedInput(
64
+ source_view=view_name,
65
+ source_columns=tuple(str(column) for column in getattr(batch_df, "columns", ()) or ()),
66
+ rows_read=int(batch_df.count()) if hasattr(batch_df, "count") else 0,
67
+ source_name=str(source.get("path") or source.get("table") or contract.source.name),
68
+ source_metadata={"stream_run_id": stream_run_id, "batch_id": batch_id},
69
+ )
70
+ result = batch_ingestor(prepared, batch_id)
71
+ batch_results.append(result)
72
+ if result.get("status") == "FAILED":
73
+ raise RuntimeError(f"Available-now stream batch {batch_id} failed: {result.get('error_message')}")
74
+
75
+ try:
76
+ query = (
77
+ stream_df.writeStream.foreachBatch(process_batch)
78
+ .option("checkpointLocation", str(checkpoint))
79
+ .trigger(availableNow=True)
80
+ .start()
81
+ )
82
+ query.awaitTermination()
83
+ except Exception as exc:
84
+ status = "FAILED"
85
+ error_message = str(exc)
86
+ local_metrics = stream_metrics_from_batches(batch_results)
87
+ child_metrics = _child_stream_metrics(evidence, query_one, stream_run_id)
88
+ metrics = child_metrics if child_metrics and prefer_child_stream_metrics(local_metrics, child_metrics) else local_metrics
89
+ result = stream_result_payload(
90
+ contract,
91
+ stream_run_id=stream_run_id,
92
+ status=status,
93
+ started_at_utc=started,
94
+ batch_results=batch_results,
95
+ error_message=error_message,
96
+ stream_metrics=metrics,
97
+ runtime_metadata=runtime_metadata,
98
+ )
99
+ if evidence is not None:
100
+ evidence.finish_stream_log(stream_run_id=stream_run_id, payload=result)
101
+ if status == "FAILED":
102
+ evidence.write_error_log(_stream_error_payload(contract, stream_run_id, result, error_message))
103
+ return result
104
+
105
+
106
+ def _child_stream_metrics(
107
+ evidence: EvidenceWriter | None,
108
+ query_one: QueryOne | None,
109
+ stream_run_id: str,
110
+ ) -> dict[str, int] | None:
111
+ if evidence is None or query_one is None:
112
+ return None
113
+ row = query_one(
114
+ render_stream_child_run_metrics_sql(
115
+ stream_run_id=stream_run_id,
116
+ runs_table=f"{evidence.catalog}.{evidence.schema}.ctrl_ingestion_runs",
117
+ )
118
+ )
119
+ if not row:
120
+ return None
121
+ return {
122
+ "batches_processed": int(row.get("batches_processed") or 0),
123
+ "total_rows_read": int(row.get("total_rows_read") or 0),
124
+ "total_rows_written": int(row.get("total_rows_written") or 0),
125
+ "total_rows_quarantined": int(row.get("total_rows_quarantined") or 0),
126
+ }
127
+
128
+
129
+ def _stream_error_payload(
130
+ contract: SemanticContract,
131
+ stream_run_id: str,
132
+ result: dict[str, Any],
133
+ error_message: str | None,
134
+ ) -> dict[str, Any]:
135
+ source = contract.source.raw or {}
136
+ return {
137
+ "run_id": stream_run_id,
138
+ "error_ts_utc": result.get("ended_at_utc"),
139
+ "error_date": str(result.get("ended_at_utc") or "")[:10],
140
+ "target_table": result.get("target_table"),
141
+ "source_table": source.get("path") or source.get("table") or contract.source.name,
142
+ "mode": contract.write.mode,
143
+ "status": result.get("status"),
144
+ "error_type": "AvailableNowStreamError",
145
+ "error_message": error_message,
146
+ "stack_trace": error_message,
147
+ }