contractforge-databricks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. contractforge_databricks/__init__.py +172 -0
  2. contractforge_databricks/adapter.py +69 -0
  3. contractforge_databricks/annotations/__init__.py +10 -0
  4. contractforge_databricks/annotations/application.py +52 -0
  5. contractforge_databricks/annotations/audit.py +49 -0
  6. contractforge_databricks/annotations/sql.py +142 -0
  7. contractforge_databricks/api.py +65 -0
  8. contractforge_databricks/bundles/__init__.py +9 -0
  9. contractforge_databricks/bundles/assets.py +47 -0
  10. contractforge_databricks/bundles/project.py +213 -0
  11. contractforge_databricks/bundles/project_config.py +133 -0
  12. contractforge_databricks/capabilities/__init__.py +17 -0
  13. contractforge_databricks/capabilities/builders.py +43 -0
  14. contractforge_databricks/capabilities/evaluate.py +162 -0
  15. contractforge_databricks/capabilities/mapping.py +36 -0
  16. contractforge_databricks/capabilities/models.py +44 -0
  17. contractforge_databricks/capabilities/runtime.py +111 -0
  18. contractforge_databricks/capabilities/uc.py +47 -0
  19. contractforge_databricks/cli.py +196 -0
  20. contractforge_databricks/cli_deploy.py +98 -0
  21. contractforge_databricks/cli_governance.py +142 -0
  22. contractforge_databricks/cli_io.py +91 -0
  23. contractforge_databricks/cli_maintenance.py +69 -0
  24. contractforge_databricks/coercion.py +31 -0
  25. contractforge_databricks/contract_extensions.py +70 -0
  26. contractforge_databricks/cost/__init__.py +11 -0
  27. contractforge_databricks/cost/model.py +22 -0
  28. contractforge_databricks/cost/report.py +65 -0
  29. contractforge_databricks/cost/sql.py +136 -0
  30. contractforge_databricks/dashboards/__init__.py +15 -0
  31. contractforge_databricks/dashboards/control_tables.py +150 -0
  32. contractforge_databricks/diagnostics/__init__.py +7 -0
  33. contractforge_databricks/diagnostics/explain.py +40 -0
  34. contractforge_databricks/environment.py +53 -0
  35. contractforge_databricks/evidence/__init__.py +98 -0
  36. contractforge_databricks/evidence/ddl.py +35 -0
  37. contractforge_databricks/evidence/governance_log.py +175 -0
  38. contractforge_databricks/evidence/helpers.py +29 -0
  39. contractforge_databricks/evidence/ops_log.py +210 -0
  40. contractforge_databricks/evidence/records.py +27 -0
  41. contractforge_databricks/evidence/run_log.py +74 -0
  42. contractforge_databricks/evidence/schemas.py +7 -0
  43. contractforge_databricks/evidence/sql.py +144 -0
  44. contractforge_databricks/evidence/tables.py +20 -0
  45. contractforge_databricks/evidence/writer.py +118 -0
  46. contractforge_databricks/execution/__init__.py +70 -0
  47. contractforge_databricks/execution/delta_basic.py +57 -0
  48. contractforge_databricks/execution/hash_diff.py +126 -0
  49. contractforge_databricks/execution/hash_diff_latest.py +142 -0
  50. contractforge_databricks/execution/replace_partitions.py +40 -0
  51. contractforge_databricks/execution/results.py +5 -0
  52. contractforge_databricks/execution/retry.py +36 -0
  53. contractforge_databricks/execution/scd2.py +213 -0
  54. contractforge_databricks/execution/scd2_deletes.py +65 -0
  55. contractforge_databricks/execution/scd2_late.py +30 -0
  56. contractforge_databricks/execution/snapshot.py +77 -0
  57. contractforge_databricks/execution/sql_merge.py +85 -0
  58. contractforge_databricks/execution/tables.py +98 -0
  59. contractforge_databricks/execution/windows.py +58 -0
  60. contractforge_databricks/governance/__init__.py +30 -0
  61. contractforge_databricks/governance/access.py +185 -0
  62. contractforge_databricks/governance/application.py +93 -0
  63. contractforge_databricks/governance/drift.py +49 -0
  64. contractforge_databricks/governance/runtime.py +60 -0
  65. contractforge_databricks/governance/sql.py +31 -0
  66. contractforge_databricks/governance/validation.py +135 -0
  67. contractforge_databricks/lakeflow/__init__.py +21 -0
  68. contractforge_databricks/lakeflow/compatibility.py +194 -0
  69. contractforge_databricks/lakeflow/rendering.py +175 -0
  70. contractforge_databricks/lineage/__init__.py +7 -0
  71. contractforge_databricks/lineage/openlineage.py +182 -0
  72. contractforge_databricks/maintenance/__init__.py +27 -0
  73. contractforge_databricks/maintenance/retention.py +90 -0
  74. contractforge_databricks/maintenance/sql.py +68 -0
  75. contractforge_databricks/metrics/__init__.py +19 -0
  76. contractforge_databricks/metrics/history.py +21 -0
  77. contractforge_databricks/metrics/write.py +63 -0
  78. contractforge_databricks/operations/__init__.py +4 -0
  79. contractforge_databricks/operations/application.py +38 -0
  80. contractforge_databricks/operations/sql.py +95 -0
  81. contractforge_databricks/parity/__init__.py +18 -0
  82. contractforge_databricks/parity/catalog.py +59 -0
  83. contractforge_databricks/parity/models.py +7 -0
  84. contractforge_databricks/parity/scenarios.py +111 -0
  85. contractforge_databricks/partitioning/__init__.py +3 -0
  86. contractforge_databricks/partitioning/predicates.py +28 -0
  87. contractforge_databricks/preparation/__init__.py +47 -0
  88. contractforge_databricks/preparation/deduplicate.py +87 -0
  89. contractforge_databricks/preparation/encoding.py +37 -0
  90. contractforge_databricks/preparation/hashing.py +18 -0
  91. contractforge_databricks/preparation/pyspark.py +178 -0
  92. contractforge_databricks/preparation/pyspark_staging.py +70 -0
  93. contractforge_databricks/preparation/shape.py +209 -0
  94. contractforge_databricks/preparation/shape_validation.py +94 -0
  95. contractforge_databricks/preparation/staging.py +17 -0
  96. contractforge_databricks/preparation/zip_arrays.py +51 -0
  97. contractforge_databricks/presets/__init__.py +3 -0
  98. contractforge_databricks/presets/base.py +24 -0
  99. contractforge_databricks/presets/bronze.py +57 -0
  100. contractforge_databricks/presets/catalog.py +22 -0
  101. contractforge_databricks/presets/core.py +134 -0
  102. contractforge_databricks/presets/gold.py +62 -0
  103. contractforge_databricks/presets/modifiers.py +51 -0
  104. contractforge_databricks/presets/runtime.py +22 -0
  105. contractforge_databricks/presets/silver.py +101 -0
  106. contractforge_databricks/presets/write_engine.py +57 -0
  107. contractforge_databricks/quality/__init__.py +41 -0
  108. contractforge_databricks/quality/evaluation.py +178 -0
  109. contractforge_databricks/quality/persistence.py +81 -0
  110. contractforge_databricks/quality/registry.py +134 -0
  111. contractforge_databricks/quality/results.py +17 -0
  112. contractforge_databricks/quality/sql.py +113 -0
  113. contractforge_databricks/rendering/__init__.py +11 -0
  114. contractforge_databricks/rendering/bundle.py +93 -0
  115. contractforge_databricks/rendering/markdown.py +50 -0
  116. contractforge_databricks/rendering/names.py +56 -0
  117. contractforge_databricks/results.py +15 -0
  118. contractforge_databricks/runtime/__init__.py +101 -0
  119. contractforge_databricks/runtime/available_now.py +147 -0
  120. contractforge_databricks/runtime/bundles.py +211 -0
  121. contractforge_databricks/runtime/cache.py +20 -0
  122. contractforge_databricks/runtime/control_tables.py +19 -0
  123. contractforge_databricks/runtime/deploy.py +197 -0
  124. contractforge_databricks/runtime/detection.py +114 -0
  125. contractforge_databricks/runtime/dry_run.py +46 -0
  126. contractforge_databricks/runtime/errors.py +54 -0
  127. contractforge_databricks/runtime/file_selection.py +109 -0
  128. contractforge_databricks/runtime/finalization.py +168 -0
  129. contractforge_databricks/runtime/governance.py +37 -0
  130. contractforge_databricks/runtime/hooks.py +45 -0
  131. contractforge_databricks/runtime/http_file.py +37 -0
  132. contractforge_databricks/runtime/http_retry.py +15 -0
  133. contractforge_databricks/runtime/http_safety.py +9 -0
  134. contractforge_databricks/runtime/json_materialization.py +97 -0
  135. contractforge_databricks/runtime/lineage.py +164 -0
  136. contractforge_databricks/runtime/maintenance.py +43 -0
  137. contractforge_databricks/runtime/merge_validation.py +98 -0
  138. contractforge_databricks/runtime/metadata.py +21 -0
  139. contractforge_databricks/runtime/metrics.py +34 -0
  140. contractforge_databricks/runtime/models.py +32 -0
  141. contractforge_databricks/runtime/options.py +33 -0
  142. contractforge_databricks/runtime/orchestration_context.py +185 -0
  143. contractforge_databricks/runtime/orchestrator.py +147 -0
  144. contractforge_databricks/runtime/partitioning.py +93 -0
  145. contractforge_databricks/runtime/quality_quarantine.py +92 -0
  146. contractforge_databricks/runtime/rest_api.py +46 -0
  147. contractforge_databricks/runtime/rest_auth.py +21 -0
  148. contractforge_databricks/runtime/rest_pagination.py +21 -0
  149. contractforge_databricks/runtime/run_payload.py +177 -0
  150. contractforge_databricks/runtime/schema.py +106 -0
  151. contractforge_databricks/runtime/source_metadata.py +30 -0
  152. contractforge_databricks/runtime/source_registry.py +43 -0
  153. contractforge_databricks/runtime/source_schema.py +24 -0
  154. contractforge_databricks/runtime/sources.py +208 -0
  155. contractforge_databricks/runtime/spark.py +183 -0
  156. contractforge_databricks/runtime/spark_defaults.py +35 -0
  157. contractforge_databricks/runtime/storage_auth.py +132 -0
  158. contractforge_databricks/runtime/streaming.py +131 -0
  159. contractforge_databricks/runtime/success.py +104 -0
  160. contractforge_databricks/runtime/utils.py +52 -0
  161. contractforge_databricks/runtime/watermark.py +71 -0
  162. contractforge_databricks/runtime/windows.py +184 -0
  163. contractforge_databricks/runtime/write.py +66 -0
  164. contractforge_databricks/runtime/write_flow.py +146 -0
  165. contractforge_databricks/runtime/write_strategy.py +40 -0
  166. contractforge_databricks/schema/__init__.py +21 -0
  167. contractforge_databricks/schema/diff.py +11 -0
  168. contractforge_databricks/schema/policy.py +33 -0
  169. contractforge_databricks/schema/sync.py +23 -0
  170. contractforge_databricks/security/__init__.py +21 -0
  171. contractforge_databricks/security/errors.py +5 -0
  172. contractforge_databricks/security/redaction.py +5 -0
  173. contractforge_databricks/security/secrets.py +114 -0
  174. contractforge_databricks/security/source_policy.py +17 -0
  175. contractforge_databricks/shapes/__init__.py +3 -0
  176. contractforge_databricks/shapes/sql.py +123 -0
  177. contractforge_databricks/sources/__init__.py +67 -0
  178. contractforge_databricks/sources/artifacts.py +100 -0
  179. contractforge_databricks/sources/autoloader.py +48 -0
  180. contractforge_databricks/sources/bounded_streams.py +44 -0
  181. contractforge_databricks/sources/classification.py +115 -0
  182. contractforge_databricks/sources/delta_share.py +21 -0
  183. contractforge_databricks/sources/files.py +48 -0
  184. contractforge_databricks/sources/http_file.py +46 -0
  185. contractforge_databricks/sources/interpret.py +76 -0
  186. contractforge_databricks/sources/jdbc.py +32 -0
  187. contractforge_databricks/sources/metadata.py +18 -0
  188. contractforge_databricks/sources/native_passthrough.py +33 -0
  189. contractforge_databricks/sources/rds_iam.py +15 -0
  190. contractforge_databricks/sources/rds_iam_runtime.py +191 -0
  191. contractforge_databricks/sources/rest_api.py +33 -0
  192. contractforge_databricks/sources/support.py +50 -0
  193. contractforge_databricks/sources/table_refs.py +65 -0
  194. contractforge_databricks/sql/__init__.py +4 -0
  195. contractforge_databricks/sql/identifiers.py +17 -0
  196. contractforge_databricks/sql/literals.py +36 -0
  197. contractforge_databricks/state/__init__.py +39 -0
  198. contractforge_databricks/state/ddl.py +24 -0
  199. contractforge_databricks/state/migrations.py +146 -0
  200. contractforge_databricks/state/queries.py +149 -0
  201. contractforge_databricks/state/sql.py +116 -0
  202. contractforge_databricks/state/tables.py +9 -0
  203. contractforge_databricks/state/writer.py +83 -0
  204. contractforge_databricks/templates/__init__.py +15 -0
  205. contractforge_databricks/templates/catalog.py +205 -0
  206. contractforge_databricks/templates/catalog_parity.py +85 -0
  207. contractforge_databricks/templates/core.py +83 -0
  208. contractforge_databricks/templates/enrichment.py +175 -0
  209. contractforge_databricks/transforms/__init__.py +3 -0
  210. contractforge_databricks/transforms/sql.py +118 -0
  211. contractforge_databricks/watermark/__init__.py +6 -0
  212. contractforge_databricks/watermark/sql.py +91 -0
  213. contractforge_databricks/write_modes/__init__.py +20 -0
  214. contractforge_databricks/write_modes/registry.py +44 -0
  215. contractforge_databricks/write_modes/sql.py +33 -0
  216. contractforge_databricks/write_modes/strategy.py +192 -0
  217. contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
  218. contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
  219. contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
  220. contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,149 @@
1
+ """Databricks SQL lookup queries for control state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_databricks.sql import quote_table_name, sql_int, sql_string
6
+
7
+
8
+ def render_lock_status_sql(*, target_table: str, locks_table: str = "main.ops.ctrl_ingestion_locks") -> str:
9
+ return "\n".join(
10
+ [
11
+ "SELECT run_id, owner, status, acquired_at_utc, expires_at_utc, ttl_minutes",
12
+ f"FROM {quote_table_name(locks_table)}",
13
+ f"WHERE target_table = {sql_string(target_table)}",
14
+ "LIMIT 1",
15
+ ]
16
+ )
17
+
18
+
19
+ def render_find_idempotent_run_sql(
20
+ *,
21
+ target_table: str,
22
+ idempotency_key: str,
23
+ status: str | None = None,
24
+ runs_table: str = "main.ops.ctrl_ingestion_runs",
25
+ ) -> str:
26
+ filters = [
27
+ f"target_table = {sql_string(target_table)}",
28
+ f"idempotency_key = {sql_string(idempotency_key)}",
29
+ ]
30
+ if status:
31
+ filters.append(f"status = {sql_string(status)}")
32
+ return "\n".join(
33
+ [
34
+ "SELECT run_id, status",
35
+ f"FROM {quote_table_name(runs_table)}",
36
+ f"WHERE {' AND '.join(filters)}",
37
+ "ORDER BY run_ts_utc DESC NULLS LAST",
38
+ "LIMIT 1",
39
+ ]
40
+ )
41
+
42
+
43
+ def render_find_idempotent_stream_sql(
44
+ *,
45
+ target_table: str,
46
+ idempotency_key: str,
47
+ status: str | None = None,
48
+ streams_table: str = "main.ops.ctrl_ingestion_streams",
49
+ ) -> str:
50
+ filters = [
51
+ f"target_table = {sql_string(target_table)}",
52
+ f"idempotency_key = {sql_string(idempotency_key)}",
53
+ ]
54
+ if status:
55
+ filters.append(f"status = {sql_string(status)}")
56
+ return "\n".join(
57
+ [
58
+ "SELECT stream_run_id, status",
59
+ f"FROM {quote_table_name(streams_table)}",
60
+ f"WHERE {' AND '.join(filters)}",
61
+ "ORDER BY started_at_utc DESC NULLS LAST",
62
+ "LIMIT 1",
63
+ ]
64
+ )
65
+
66
+
67
+ def render_has_successful_run_sql(
68
+ *,
69
+ target_table: str,
70
+ idempotency_key: str,
71
+ runs_table: str = "main.ops.ctrl_ingestion_runs",
72
+ ) -> str:
73
+ return "\n".join(
74
+ [
75
+ "SELECT count(1) > 0 AS has_successful_run",
76
+ f"FROM {quote_table_name(runs_table)}",
77
+ "WHERE "
78
+ f"target_table = {sql_string(target_table)} "
79
+ f"AND idempotency_key = {sql_string(idempotency_key)} "
80
+ "AND status = 'SUCCESS'",
81
+ ]
82
+ )
83
+
84
+
85
+ def render_select_previous_watermark_sql(
86
+ *,
87
+ target_table: str,
88
+ state_table: str = "main.ops.ctrl_ingestion_state",
89
+ ) -> str:
90
+ return "\n".join(
91
+ [
92
+ "SELECT watermark_value",
93
+ f"FROM {quote_table_name(state_table)}",
94
+ f"WHERE target_table = {sql_string(target_table)}",
95
+ "LIMIT 1",
96
+ ]
97
+ )
98
+
99
+
100
+ def render_control_metadata_current_sql(
101
+ *,
102
+ framework_version: str,
103
+ ctrl_schema_version: int,
104
+ metadata_table: str = "main.ops.ctrl_ingestion_metadata",
105
+ ) -> str:
106
+ return "\n".join(
107
+ [
108
+ "SELECT 1",
109
+ f"FROM {quote_table_name(metadata_table)}",
110
+ "WHERE component = 'contractforge'",
111
+ f" AND framework_version = {sql_string(framework_version)}",
112
+ f" AND ctrl_schema_version = {sql_int(ctrl_schema_version)}",
113
+ "LIMIT 1",
114
+ ]
115
+ )
116
+
117
+
118
+ def render_record_control_metadata_sql(
119
+ *,
120
+ framework_version: str,
121
+ ctrl_schema_version: int,
122
+ metadata_table: str = "main.ops.ctrl_ingestion_metadata",
123
+ ) -> str:
124
+ return f"""
125
+ MERGE INTO {quote_table_name(metadata_table)} t
126
+ USING (
127
+ SELECT
128
+ 'contractforge' AS component,
129
+ {sql_string(framework_version)} AS framework_version,
130
+ {sql_int(ctrl_schema_version)} AS ctrl_schema_version,
131
+ current_timestamp() AS updated_at_utc
132
+ ) s
133
+ ON t.component = s.component
134
+ WHEN MATCHED THEN UPDATE SET
135
+ t.framework_version = s.framework_version,
136
+ t.ctrl_schema_version = s.ctrl_schema_version,
137
+ t.updated_at_utc = s.updated_at_utc
138
+ WHEN NOT MATCHED THEN INSERT (
139
+ component,
140
+ framework_version,
141
+ ctrl_schema_version,
142
+ updated_at_utc
143
+ ) VALUES (
144
+ s.component,
145
+ s.framework_version,
146
+ s.ctrl_schema_version,
147
+ s.updated_at_utc
148
+ )
149
+ """.strip()
@@ -0,0 +1,116 @@
1
+ """Databricks SQL for locks, state and idempotency lookups."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_databricks.sql import quote_table_name, sql_int, sql_string
6
+ from contractforge_databricks.state.tables import state_table_names
7
+
8
+
9
+ def render_acquire_lock_sql(
10
+ *,
11
+ target_table: str,
12
+ run_id: str,
13
+ owner: str | None = None,
14
+ ttl_minutes: int = 60,
15
+ catalog: str = "main",
16
+ schema: str = "ops",
17
+ ) -> str:
18
+ table = state_table_names(catalog, schema)["locks"]
19
+ return f"""
20
+ MERGE INTO {quote_table_name(table)} t
21
+ USING (
22
+ SELECT
23
+ {sql_string(target_table)} AS target_table,
24
+ {sql_string(run_id)} AS run_id,
25
+ {sql_string(owner)} AS owner,
26
+ current_timestamp() AS acquired_at_utc,
27
+ current_timestamp() + INTERVAL {int(ttl_minutes)} MINUTES AS expires_at_utc,
28
+ {sql_int(ttl_minutes)} AS ttl_minutes,
29
+ CAST(NULL AS TIMESTAMP) AS released_at_utc,
30
+ 'ACTIVE' AS status
31
+ ) s
32
+ ON t.target_table = s.target_table
33
+ WHEN MATCHED AND (t.status <> 'ACTIVE' OR t.expires_at_utc < current_timestamp()) THEN UPDATE SET *
34
+ WHEN NOT MATCHED THEN INSERT *
35
+ """.strip()
36
+
37
+
38
+ def render_release_lock_sql(
39
+ *, target_table: str, run_id: str, catalog: str = "main", schema: str = "ops"
40
+ ) -> str:
41
+ table = state_table_names(catalog, schema)["locks"]
42
+ return f"""
43
+ UPDATE {quote_table_name(table)}
44
+ SET status = 'RELEASED',
45
+ released_at_utc = current_timestamp()
46
+ WHERE target_table = {sql_string(target_table)} AND run_id = {sql_string(run_id)}
47
+ """.strip()
48
+
49
+
50
+ def render_upsert_state_sql(
51
+ *,
52
+ target_table: str,
53
+ run_id: str,
54
+ status: str,
55
+ rows_written: int,
56
+ watermark_column: str | None = None,
57
+ watermark_value: str | None = None,
58
+ success_at_utc: str | None = None,
59
+ error_message: str | None = None,
60
+ table_version: str | None = None,
61
+ write_completed_at_utc: str | None = None,
62
+ watermark_candidate: str | None = None,
63
+ parent_run_id: str | None = None,
64
+ run_group_id: str | None = None,
65
+ master_job_id: str | None = None,
66
+ master_run_id: str | None = None,
67
+ catalog: str = "main",
68
+ schema: str = "ops",
69
+ ) -> str:
70
+ table = state_table_names(catalog, schema)["state"]
71
+ return f"""
72
+ MERGE INTO {quote_table_name(table)} t
73
+ USING (
74
+ SELECT
75
+ {sql_string(target_table)} AS target_table,
76
+ {sql_string(watermark_column)} AS watermark_column,
77
+ {sql_string(watermark_value)} AS watermark_value,
78
+ CAST({sql_string(success_at_utc)} AS TIMESTAMP) AS last_success_at_utc,
79
+ {sql_string(run_id)} AS last_run_id,
80
+ {sql_string(status)} AS last_status,
81
+ {sql_int(rows_written)} AS last_rows_written,
82
+ {sql_string(_truncate(error_message))} AS last_error_message,
83
+ {sql_string(parent_run_id)} AS parent_run_id,
84
+ {sql_string(run_group_id)} AS run_group_id,
85
+ {sql_string(master_job_id)} AS master_job_id,
86
+ {sql_string(master_run_id)} AS master_run_id,
87
+ {sql_string(table_version)} AS last_table_version,
88
+ CAST({sql_string(write_completed_at_utc)} AS TIMESTAMP) AS last_write_completed_at_utc,
89
+ {sql_string(watermark_candidate)} AS last_watermark_candidate,
90
+ current_timestamp() AS last_updated_at_utc
91
+ ) s
92
+ ON t.target_table = s.target_table
93
+ WHEN MATCHED THEN UPDATE SET
94
+ t.watermark_column = s.watermark_column,
95
+ t.watermark_value = s.watermark_value,
96
+ t.last_success_at_utc = s.last_success_at_utc,
97
+ t.last_run_id = s.last_run_id,
98
+ t.last_status = s.last_status,
99
+ t.last_rows_written = s.last_rows_written,
100
+ t.last_error_message = s.last_error_message,
101
+ t.parent_run_id = s.parent_run_id,
102
+ t.run_group_id = s.run_group_id,
103
+ t.master_job_id = s.master_job_id,
104
+ t.master_run_id = s.master_run_id,
105
+ t.last_table_version = s.last_table_version,
106
+ t.last_write_completed_at_utc = s.last_write_completed_at_utc,
107
+ t.last_watermark_candidate = s.last_watermark_candidate,
108
+ t.last_updated_at_utc = s.last_updated_at_utc
109
+ WHEN NOT MATCHED THEN INSERT *
110
+ """.strip()
111
+
112
+
113
+ def _truncate(value: str | None, limit: int = 4000) -> str | None:
114
+ if value is None or len(value) <= limit:
115
+ return value
116
+ return value[:limit]
@@ -0,0 +1,9 @@
1
+ """Databricks operational state table names."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.evidence import STATE_TABLES
6
+
7
+
8
+ def state_table_names(catalog: str, schema: str) -> dict[str, str]:
9
+ return {name: f"{catalog}.{schema}.{table}" for name, table in STATE_TABLES.items()}
@@ -0,0 +1,83 @@
1
+ """State writer using an injected SQL runner."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from collections.abc import Callable
7
+ from typing import Any
8
+
9
+ from contractforge_databricks.execution.sql_merge import SqlRunner
10
+ from contractforge_databricks.state.queries import render_lock_status_sql, render_record_control_metadata_sql
11
+ from contractforge_databricks.state.sql import (
12
+ render_acquire_lock_sql,
13
+ render_release_lock_sql,
14
+ render_upsert_state_sql,
15
+ )
16
+
17
+
18
+ logger = logging.getLogger("contractforge_databricks")
19
+
20
+
21
+ class StateWriter:
22
+ def __init__(
23
+ self,
24
+ runner: SqlRunner,
25
+ *,
26
+ catalog: str = "main",
27
+ schema: str = "ops",
28
+ query_one: Callable[[str], dict[str, Any] | None] | None = None,
29
+ ) -> None:
30
+ self.runner = runner
31
+ self.catalog = catalog
32
+ self.schema = schema
33
+ self.query_one = query_one
34
+
35
+ def acquire_lock(self, *, target_table: str, run_id: str, owner: str | None = None, ttl_minutes: int = 60) -> None:
36
+ self.runner.sql(
37
+ render_acquire_lock_sql(
38
+ target_table=target_table,
39
+ run_id=run_id,
40
+ owner=owner,
41
+ ttl_minutes=ttl_minutes,
42
+ catalog=self.catalog,
43
+ schema=self.schema,
44
+ )
45
+ )
46
+ if self.query_one is None:
47
+ return
48
+ row = self.query_one(
49
+ render_lock_status_sql(
50
+ target_table=target_table,
51
+ locks_table=f"{self.catalog}.{self.schema}.ctrl_ingestion_locks",
52
+ )
53
+ )
54
+ if not row or row.get("run_id") != run_id or row.get("status") != "ACTIVE":
55
+ raise RuntimeError(
56
+ f"Lock is busy for {target_table}. This run_id={run_id} did not acquire the lock. "
57
+ f"Current lock: {row}"
58
+ )
59
+
60
+ def release_lock(self, *, target_table: str, run_id: str) -> None:
61
+ try:
62
+ self.runner.sql(
63
+ render_release_lock_sql(
64
+ target_table=target_table,
65
+ run_id=run_id,
66
+ catalog=self.catalog,
67
+ schema=self.schema,
68
+ )
69
+ )
70
+ except Exception as exc:
71
+ logger.warning("Failed to release lock for %s: %s", target_table, exc)
72
+
73
+ def upsert_state(self, **kwargs: object) -> None:
74
+ self.runner.sql(render_upsert_state_sql(catalog=self.catalog, schema=self.schema, **kwargs))
75
+
76
+ def record_control_metadata(self, *, framework_version: str, ctrl_schema_version: int) -> None:
77
+ self.runner.sql(
78
+ render_record_control_metadata_sql(
79
+ framework_version=framework_version,
80
+ ctrl_schema_version=ctrl_schema_version,
81
+ metadata_table=f"{self.catalog}.{self.schema}.ctrl_ingestion_metadata",
82
+ )
83
+ )
@@ -0,0 +1,15 @@
1
+ from contractforge_databricks.templates.core import (
2
+ contract_template_details,
3
+ contract_template_files,
4
+ get_contract_template,
5
+ list_contract_templates,
6
+ recommend_contract_templates,
7
+ )
8
+
9
+ __all__ = [
10
+ "contract_template_details",
11
+ "contract_template_files",
12
+ "get_contract_template",
13
+ "list_contract_templates",
14
+ "recommend_contract_templates",
15
+ ]
@@ -0,0 +1,205 @@
1
+ """Databricks template catalog for split ContractForge contracts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ ContractTemplate = dict[str, Any]
8
+ TEMPLATE_META_KEY = "_template"
9
+
10
+
11
+ def _template(
12
+ name: str,
13
+ category: str,
14
+ description: str,
15
+ ingestion: dict[str, Any],
16
+ *,
17
+ annotations: dict[str, Any] | None = None,
18
+ operations: dict[str, Any] | None = None,
19
+ access: dict[str, Any] | None = None,
20
+ priority: int = 50,
21
+ ) -> ContractTemplate:
22
+ result: ContractTemplate = {
23
+ TEMPLATE_META_KEY: {
24
+ "name": name,
25
+ "category": category,
26
+ "description": description,
27
+ "recommendation_priority": priority,
28
+ },
29
+ "ingestion": ingestion,
30
+ }
31
+ if annotations:
32
+ result["annotations"] = annotations
33
+ if operations:
34
+ result["operations"] = operations
35
+ if access:
36
+ result["access"] = access
37
+ return result
38
+
39
+
40
+ def _target(schema: str, table: str) -> dict[str, str]:
41
+ return {"catalog": "main", "schema": schema, "table": table}
42
+
43
+
44
+ def _ops(domain: str) -> dict[str, Any]:
45
+ return {
46
+ "owner": "data-platform",
47
+ "domain": domain,
48
+ "criticality": "medium",
49
+ "expected_frequency": "daily",
50
+ "runbook_url": f"https://wiki.example.com/runbooks/{domain}",
51
+ }
52
+
53
+
54
+ def _ann(description: str) -> dict[str, Any]:
55
+ return {"policy": "warn", "table": {"comment": description, "tags": {"contractforge": "databricks"}}}
56
+
57
+
58
+ def _access(group: str) -> dict[str, Any]:
59
+ return {"access_policy": {"mode": "validate_only", "on_drift": "warn"}, "grants": [{"principal": group, "privileges": ["SELECT"]}]}
60
+
61
+
62
+ from contractforge_databricks.templates.catalog_parity import PARITY_CONTRACT_TEMPLATES # noqa: E402
63
+ from contractforge_databricks.templates.enrichment import enrich_contractforge_parity # noqa: E402
64
+
65
+
66
+ BUILTIN_CONTRACT_TEMPLATES: dict[str, ContractTemplate] = {
67
+ **PARITY_CONTRACT_TEMPLATES,
68
+ "bronze_rest_api_incremental": _template(
69
+ "bronze_rest_api_incremental",
70
+ "bronze",
71
+ "REST API landing through reviewed native passthrough or bounded file fetch.",
72
+ {
73
+ "preset": "bronze_file_append",
74
+ "source": {"type": "native_passthrough", "system": "rest_api", "object": "orders"},
75
+ "target": _target("raw", "b_orders_api"),
76
+ },
77
+ annotations=_ann("Raw REST API order events."),
78
+ operations=_ops("b_orders_api"),
79
+ ),
80
+ "bronze_http_file_csv_snapshot": _template(
81
+ "bronze_http_file_csv_snapshot",
82
+ "bronze",
83
+ "HTTP CSV snapshot landing.",
84
+ {"preset": "bronze_full_overwrite", "source": {"type": "http_csv", "url": "https://example.com/orders.csv"}, "target": _target("raw", "b_orders_http")},
85
+ annotations=_ann("Raw HTTP CSV orders."),
86
+ operations=_ops("b_orders_http"),
87
+ ),
88
+ "bronze_autoloader_json": _template(
89
+ "bronze_autoloader_json",
90
+ "bronze",
91
+ "Portable incremental files rendered as Databricks Auto Loader.",
92
+ {"preset": "bronze_autoloader_append", "source": {"type": "incremental_files", "format": "json", "path": "s3://bucket/landing/orders/"}, "target": _target("raw", "b_orders_json")},
93
+ annotations=_ann("Raw incremental JSON orders."),
94
+ operations=_ops("b_orders_json"),
95
+ ),
96
+ "bronze_autoloader_available_now_json": _template(
97
+ "bronze_autoloader_available_now_json",
98
+ "bronze",
99
+ "Available-now incremental JSON ingestion.",
100
+ {"preset": "bronze_autoloader_append", "source": {"type": "incremental_files", "format": "json", "trigger": "available_now", "path": "s3://bucket/landing/orders/"}, "target": _target("raw", "b_orders_available_now")},
101
+ annotations=_ann("Available-now incremental JSON orders."),
102
+ operations=_ops("b_orders_available_now"),
103
+ ),
104
+ "bronze_autoloader_governed_delta": _template(
105
+ "bronze_autoloader_governed_delta",
106
+ "bronze",
107
+ "Governed Auto Loader landing with Delta optimization preview.",
108
+ {"preset": ["bronze_autoloader_append", "delta_optimized_writes", "governance_uc_basic"], "source": {"type": "incremental_files", "format": "json", "path": "s3://bucket/landing/governed/"}, "target": _target("raw", "b_governed_delta")},
109
+ annotations=_ann("Governed raw landing table."),
110
+ operations=_ops("b_governed_delta"),
111
+ access=_access("data-engineers"),
112
+ ),
113
+ "bronze_object_storage_nested_json_shape": _template(
114
+ "bronze_object_storage_nested_json_shape",
115
+ "bronze",
116
+ "Object-storage nested JSON with shape intent.",
117
+ {"preset": "bronze_file_append", "source": {"type": "json", "path": "s3://bucket/events/"}, "shape": {"parse_json": [{"column": "payload", "schema": "STRUCT<id: STRING>", "alias": "payload_obj"}]}, "target": _target("raw", "b_nested_events")},
118
+ annotations=_ann("Nested JSON event landing."),
119
+ operations=_ops("b_nested_events"),
120
+ ),
121
+ "bronze_object_storage_small_files": _template(
122
+ "bronze_object_storage_small_files",
123
+ "bronze",
124
+ "Object-storage small files batch append.",
125
+ {"preset": "bronze_file_append", "source": {"type": "parquet", "path": "s3://bucket/small-files/"}, "target": _target("raw", "b_small_files")},
126
+ annotations=_ann("Small-file batch landing."),
127
+ operations=_ops("b_small_files"),
128
+ ),
129
+ "silver_jdbc_scd1_upsert": _template(
130
+ "silver_jdbc_scd1_upsert",
131
+ "silver",
132
+ "JDBC SCD1 current-state upsert.",
133
+ {"preset": ["silver_incremental_watermark_upsert", "quality_quarantine", "delta_optimized_writes"], "source": {"type": "jdbc", "table": "public.orders"}, "target": _target("curated", "s_orders"), "merge_keys": ["order_id"], "watermark_columns": ["updated_at"]},
134
+ annotations=_ann("Current-state orders from JDBC."),
135
+ operations=_ops("s_orders"),
136
+ access=_access("sales-analytics"),
137
+ ),
138
+ "silver_jdbc_rds_iam_hash_diff": _template(
139
+ "silver_jdbc_rds_iam_hash_diff",
140
+ "silver",
141
+ "JDBC RDS IAM hash-diff append.",
142
+ {"preset": ["silver_hash_diff_append", "quality_quarantine"], "source": {"type": "postgres", "table": "public.orders", "auth": {"type": "rds_iam"}}, "target": _target("curated", "s_orders_hash_diff"), "hash_keys": ["order_id"]},
143
+ annotations=_ann("Hash-diff order changes from JDBC."),
144
+ operations=_ops("s_orders_hash_diff"),
145
+ ),
146
+ "silver_lakeflow_auto_cdc_scd1_preview": _template(
147
+ "silver_lakeflow_auto_cdc_scd1_preview",
148
+ "silver",
149
+ "Lakeflow AUTO CDC SCD1 review artifact with Delta fallback semantics.",
150
+ {"preset": ["silver_scd1_upsert", "delta_liquid_clustering"], "source": {"type": "table", "table": "main.raw.customer_cdc"}, "target": _target("curated", "s_customers_current"), "merge_keys": ["customer_id"], "extensions": {"databricks": {"cluster_columns": ["customer_id"], "write_engine": {"requested": "lakeflow_auto_cdc", "fallback_policy": "preview_only"}}}},
151
+ annotations=_ann("Current customers with Lakeflow review evidence."),
152
+ operations=_ops("s_customers_current"),
153
+ ),
154
+ "silver_lakeflow_auto_cdc_scd2_preview": _template(
155
+ "silver_lakeflow_auto_cdc_scd2_preview",
156
+ "silver",
157
+ "Lakeflow AUTO CDC SCD2 review artifact with Delta baseline.",
158
+ {"preset": ["silver_scd2_historical", "delta_liquid_clustering"], "source": {"type": "table", "table": "main.raw.product_cdc"}, "target": _target("curated", "s_products_history"), "merge_keys": ["product_id"], "extensions": {"databricks": {"cluster_columns": ["product_id"], "write_engine": {"requested": "lakeflow_auto_cdc", "fallback_policy": "preview_only"}}}},
159
+ annotations=_ann("Product SCD2 history with Lakeflow review evidence."),
160
+ operations=_ops("s_products_history"),
161
+ ),
162
+ "silver_raw_json_payload_shape": _template(
163
+ "silver_raw_json_payload_shape",
164
+ "silver",
165
+ "Raw JSON payload parsing into a curated table.",
166
+ {"preset": "silver_scd1_upsert", "source": {"type": "table", "table": "main.raw.b_events"}, "shape": {"parse_json": [{"column": "payload", "schema": "STRUCT<event_id: STRING>", "alias": "payload_obj"}], "columns": {"payload_obj.event_id": {"alias": "event_id", "cast": "STRING"}}}, "target": _target("curated", "s_events"), "merge_keys": ["event_id"]},
167
+ annotations=_ann("Curated event payloads."),
168
+ operations=_ops("s_events"),
169
+ ),
170
+ "silver_parallel_arrays_shape": _template(
171
+ "silver_parallel_arrays_shape",
172
+ "silver",
173
+ "Parallel array normalization review template.",
174
+ {"preset": "silver_scd1_upsert", "source": {"type": "table", "table": "main.raw.b_forecast"}, "shape": {"zip_arrays": [{"alias": "hour", "columns": {"times": "time", "values": "value"}}], "arrays": [{"path": "hour", "mode": "explode_outer", "alias": "hour"}]}, "target": _target("curated", "s_hourly_forecast"), "merge_keys": ["forecast_id"]},
175
+ annotations=_ann("Forecast rows derived from parallel arrays."),
176
+ operations=_ops("s_hourly_forecast"),
177
+ ),
178
+ "silver_snapshot_soft_delete": _template(
179
+ "silver_snapshot_soft_delete",
180
+ "silver",
181
+ "Current-state snapshot with soft delete for missing rows.",
182
+ {"preset": "silver_snapshot_soft_delete", "source": {"type": "table", "table": "main.raw.b_devices_snapshot"}, "target": _target("curated", "s_devices"), "merge_keys": ["device_id"]},
183
+ annotations=_ann("Device snapshot with soft delete semantics."),
184
+ operations=_ops("s_devices"),
185
+ ),
186
+ "silver_scd2_history": _template(
187
+ "silver_scd2_history",
188
+ "silver",
189
+ "SCD2 historical table.",
190
+ {"preset": "silver_scd2_historical", "source": {"type": "table", "table": "main.raw.b_customers"}, "target": _target("curated", "s_customers_history"), "merge_keys": ["customer_id"]},
191
+ annotations=_ann("Customer SCD2 history."),
192
+ operations=_ops("s_customers_history"),
193
+ ),
194
+ "gold_full_refresh_kpi": _template(
195
+ "gold_full_refresh_kpi",
196
+ "gold",
197
+ "Gold KPI table recalculated by full refresh.",
198
+ {"preset": "gold_full_refresh", "source": {"type": "sql", "query": "SELECT order_date, count(*) AS orders FROM main.curated.s_orders GROUP BY order_date"}, "target": _target("analytics", "g_daily_orders")},
199
+ annotations=_ann("Daily order KPI table."),
200
+ operations=_ops("g_daily_orders"),
201
+ access=_access("executive-dashboards"),
202
+ ),
203
+ }
204
+
205
+ enrich_contractforge_parity(BUILTIN_CONTRACT_TEMPLATES)
@@ -0,0 +1,85 @@
1
+ """Additional templates ported from the original ContractForge catalog."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_databricks.templates.catalog import _access, _ann, _ops, _target, _template
8
+
9
+ ContractTemplate = dict[str, Any]
10
+
11
+ PARITY_CONTRACT_TEMPLATES: dict[str, ContractTemplate] = {
12
+ "bronze_blob_partitioned_files": _template(
13
+ "bronze_blob_partitioned_files",
14
+ "bronze",
15
+ "Bronze batch ingestion for partitioned files in object storage.",
16
+ {
17
+ "preset": "bronze_file_append",
18
+ "source": {
19
+ "type": "s3",
20
+ "format": "parquet",
21
+ "path": "s3://company-landing/orders/",
22
+ "options": {"recursiveFileLookup": True, "pathGlobFilter": "*.parquet"},
23
+ "read": {
24
+ "source_complete": True,
25
+ "schema": "order_id STRING, order_date DATE, customer_id STRING, amount DOUBLE",
26
+ "file_regex": r"^year=2026/month=05/.*/orders_\d+\.parquet$",
27
+ "file_regex_scope": "relative_path",
28
+ "file_regex_max_listed": 50000,
29
+ },
30
+ },
31
+ "target": _target("raw", "b_orders_files"),
32
+ "layer": "bronze",
33
+ "mode": "scd0_append",
34
+ "schema_policy": "additive_only",
35
+ "quality_rules": {
36
+ "not_null": ["order_id"],
37
+ "expressions": [
38
+ {
39
+ "name": "valid_amount",
40
+ "expression": "amount IS NULL OR amount >= 0",
41
+ "severity": "warn",
42
+ "message": "Negative amount in raw file.",
43
+ }
44
+ ],
45
+ },
46
+ },
47
+ annotations=_ann("Partitioned order files in object storage."),
48
+ operations=_ops("b_orders_files"),
49
+ ),
50
+ "silver_scd1_hash_diff": _template(
51
+ "silver_scd1_hash_diff",
52
+ "silver",
53
+ "Silver append-only hash diff retaining changed versions.",
54
+ {
55
+ "preset": "silver_hash_diff_append",
56
+ "source": {"type": "table", "table": "main.raw.b_products"},
57
+ "target": _target("curated", "s_products_hash_diff"),
58
+ "layer": "silver",
59
+ "mode": "scd1_hash_diff",
60
+ "hash_keys": ["product_id"],
61
+ "hash_exclude_columns": ["updated_at"],
62
+ "transform": {
63
+ "deduplicate": {
64
+ "keys": ["product_id"],
65
+ "order_by": "updated_at DESC NULLS LAST",
66
+ }
67
+ },
68
+ "quality_rules": {
69
+ "not_null": ["product_id"],
70
+ "expressions": [
71
+ {
72
+ "name": "valid_product_status",
73
+ "expression": "status IS NULL OR status IN ('active', 'inactive', 'discontinued')",
74
+ "severity": "quarantine",
75
+ "message": "Invalid product status.",
76
+ }
77
+ ],
78
+ },
79
+ },
80
+ annotations=_ann("Changed product versions detected by hash diff."),
81
+ operations=_ops("s_products_hash_diff"),
82
+ access=_access("catalog-analytics"),
83
+ priority=10,
84
+ ),
85
+ }