contractforge-databricks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. contractforge_databricks/__init__.py +172 -0
  2. contractforge_databricks/adapter.py +69 -0
  3. contractforge_databricks/annotations/__init__.py +10 -0
  4. contractforge_databricks/annotations/application.py +52 -0
  5. contractforge_databricks/annotations/audit.py +49 -0
  6. contractforge_databricks/annotations/sql.py +142 -0
  7. contractforge_databricks/api.py +65 -0
  8. contractforge_databricks/bundles/__init__.py +9 -0
  9. contractforge_databricks/bundles/assets.py +47 -0
  10. contractforge_databricks/bundles/project.py +213 -0
  11. contractforge_databricks/bundles/project_config.py +133 -0
  12. contractforge_databricks/capabilities/__init__.py +17 -0
  13. contractforge_databricks/capabilities/builders.py +43 -0
  14. contractforge_databricks/capabilities/evaluate.py +162 -0
  15. contractforge_databricks/capabilities/mapping.py +36 -0
  16. contractforge_databricks/capabilities/models.py +44 -0
  17. contractforge_databricks/capabilities/runtime.py +111 -0
  18. contractforge_databricks/capabilities/uc.py +47 -0
  19. contractforge_databricks/cli.py +196 -0
  20. contractforge_databricks/cli_deploy.py +98 -0
  21. contractforge_databricks/cli_governance.py +142 -0
  22. contractforge_databricks/cli_io.py +91 -0
  23. contractforge_databricks/cli_maintenance.py +69 -0
  24. contractforge_databricks/coercion.py +31 -0
  25. contractforge_databricks/contract_extensions.py +70 -0
  26. contractforge_databricks/cost/__init__.py +11 -0
  27. contractforge_databricks/cost/model.py +22 -0
  28. contractforge_databricks/cost/report.py +65 -0
  29. contractforge_databricks/cost/sql.py +136 -0
  30. contractforge_databricks/dashboards/__init__.py +15 -0
  31. contractforge_databricks/dashboards/control_tables.py +150 -0
  32. contractforge_databricks/diagnostics/__init__.py +7 -0
  33. contractforge_databricks/diagnostics/explain.py +40 -0
  34. contractforge_databricks/environment.py +53 -0
  35. contractforge_databricks/evidence/__init__.py +98 -0
  36. contractforge_databricks/evidence/ddl.py +35 -0
  37. contractforge_databricks/evidence/governance_log.py +175 -0
  38. contractforge_databricks/evidence/helpers.py +29 -0
  39. contractforge_databricks/evidence/ops_log.py +210 -0
  40. contractforge_databricks/evidence/records.py +27 -0
  41. contractforge_databricks/evidence/run_log.py +74 -0
  42. contractforge_databricks/evidence/schemas.py +7 -0
  43. contractforge_databricks/evidence/sql.py +144 -0
  44. contractforge_databricks/evidence/tables.py +20 -0
  45. contractforge_databricks/evidence/writer.py +118 -0
  46. contractforge_databricks/execution/__init__.py +70 -0
  47. contractforge_databricks/execution/delta_basic.py +57 -0
  48. contractforge_databricks/execution/hash_diff.py +126 -0
  49. contractforge_databricks/execution/hash_diff_latest.py +142 -0
  50. contractforge_databricks/execution/replace_partitions.py +40 -0
  51. contractforge_databricks/execution/results.py +5 -0
  52. contractforge_databricks/execution/retry.py +36 -0
  53. contractforge_databricks/execution/scd2.py +213 -0
  54. contractforge_databricks/execution/scd2_deletes.py +65 -0
  55. contractforge_databricks/execution/scd2_late.py +30 -0
  56. contractforge_databricks/execution/snapshot.py +77 -0
  57. contractforge_databricks/execution/sql_merge.py +85 -0
  58. contractforge_databricks/execution/tables.py +98 -0
  59. contractforge_databricks/execution/windows.py +58 -0
  60. contractforge_databricks/governance/__init__.py +30 -0
  61. contractforge_databricks/governance/access.py +185 -0
  62. contractforge_databricks/governance/application.py +93 -0
  63. contractforge_databricks/governance/drift.py +49 -0
  64. contractforge_databricks/governance/runtime.py +60 -0
  65. contractforge_databricks/governance/sql.py +31 -0
  66. contractforge_databricks/governance/validation.py +135 -0
  67. contractforge_databricks/lakeflow/__init__.py +21 -0
  68. contractforge_databricks/lakeflow/compatibility.py +194 -0
  69. contractforge_databricks/lakeflow/rendering.py +175 -0
  70. contractforge_databricks/lineage/__init__.py +7 -0
  71. contractforge_databricks/lineage/openlineage.py +182 -0
  72. contractforge_databricks/maintenance/__init__.py +27 -0
  73. contractforge_databricks/maintenance/retention.py +90 -0
  74. contractforge_databricks/maintenance/sql.py +68 -0
  75. contractforge_databricks/metrics/__init__.py +19 -0
  76. contractforge_databricks/metrics/history.py +21 -0
  77. contractforge_databricks/metrics/write.py +63 -0
  78. contractforge_databricks/operations/__init__.py +4 -0
  79. contractforge_databricks/operations/application.py +38 -0
  80. contractforge_databricks/operations/sql.py +95 -0
  81. contractforge_databricks/parity/__init__.py +18 -0
  82. contractforge_databricks/parity/catalog.py +59 -0
  83. contractforge_databricks/parity/models.py +7 -0
  84. contractforge_databricks/parity/scenarios.py +111 -0
  85. contractforge_databricks/partitioning/__init__.py +3 -0
  86. contractforge_databricks/partitioning/predicates.py +28 -0
  87. contractforge_databricks/preparation/__init__.py +47 -0
  88. contractforge_databricks/preparation/deduplicate.py +87 -0
  89. contractforge_databricks/preparation/encoding.py +37 -0
  90. contractforge_databricks/preparation/hashing.py +18 -0
  91. contractforge_databricks/preparation/pyspark.py +178 -0
  92. contractforge_databricks/preparation/pyspark_staging.py +70 -0
  93. contractforge_databricks/preparation/shape.py +209 -0
  94. contractforge_databricks/preparation/shape_validation.py +94 -0
  95. contractforge_databricks/preparation/staging.py +17 -0
  96. contractforge_databricks/preparation/zip_arrays.py +51 -0
  97. contractforge_databricks/presets/__init__.py +3 -0
  98. contractforge_databricks/presets/base.py +24 -0
  99. contractforge_databricks/presets/bronze.py +57 -0
  100. contractforge_databricks/presets/catalog.py +22 -0
  101. contractforge_databricks/presets/core.py +134 -0
  102. contractforge_databricks/presets/gold.py +62 -0
  103. contractforge_databricks/presets/modifiers.py +51 -0
  104. contractforge_databricks/presets/runtime.py +22 -0
  105. contractforge_databricks/presets/silver.py +101 -0
  106. contractforge_databricks/presets/write_engine.py +57 -0
  107. contractforge_databricks/quality/__init__.py +41 -0
  108. contractforge_databricks/quality/evaluation.py +178 -0
  109. contractforge_databricks/quality/persistence.py +81 -0
  110. contractforge_databricks/quality/registry.py +134 -0
  111. contractforge_databricks/quality/results.py +17 -0
  112. contractforge_databricks/quality/sql.py +113 -0
  113. contractforge_databricks/rendering/__init__.py +11 -0
  114. contractforge_databricks/rendering/bundle.py +93 -0
  115. contractforge_databricks/rendering/markdown.py +50 -0
  116. contractforge_databricks/rendering/names.py +56 -0
  117. contractforge_databricks/results.py +15 -0
  118. contractforge_databricks/runtime/__init__.py +101 -0
  119. contractforge_databricks/runtime/available_now.py +147 -0
  120. contractforge_databricks/runtime/bundles.py +211 -0
  121. contractforge_databricks/runtime/cache.py +20 -0
  122. contractforge_databricks/runtime/control_tables.py +19 -0
  123. contractforge_databricks/runtime/deploy.py +197 -0
  124. contractforge_databricks/runtime/detection.py +114 -0
  125. contractforge_databricks/runtime/dry_run.py +46 -0
  126. contractforge_databricks/runtime/errors.py +54 -0
  127. contractforge_databricks/runtime/file_selection.py +109 -0
  128. contractforge_databricks/runtime/finalization.py +168 -0
  129. contractforge_databricks/runtime/governance.py +37 -0
  130. contractforge_databricks/runtime/hooks.py +45 -0
  131. contractforge_databricks/runtime/http_file.py +37 -0
  132. contractforge_databricks/runtime/http_retry.py +15 -0
  133. contractforge_databricks/runtime/http_safety.py +9 -0
  134. contractforge_databricks/runtime/json_materialization.py +97 -0
  135. contractforge_databricks/runtime/lineage.py +164 -0
  136. contractforge_databricks/runtime/maintenance.py +43 -0
  137. contractforge_databricks/runtime/merge_validation.py +98 -0
  138. contractforge_databricks/runtime/metadata.py +21 -0
  139. contractforge_databricks/runtime/metrics.py +34 -0
  140. contractforge_databricks/runtime/models.py +32 -0
  141. contractforge_databricks/runtime/options.py +33 -0
  142. contractforge_databricks/runtime/orchestration_context.py +185 -0
  143. contractforge_databricks/runtime/orchestrator.py +147 -0
  144. contractforge_databricks/runtime/partitioning.py +93 -0
  145. contractforge_databricks/runtime/quality_quarantine.py +92 -0
  146. contractforge_databricks/runtime/rest_api.py +46 -0
  147. contractforge_databricks/runtime/rest_auth.py +21 -0
  148. contractforge_databricks/runtime/rest_pagination.py +21 -0
  149. contractforge_databricks/runtime/run_payload.py +177 -0
  150. contractforge_databricks/runtime/schema.py +106 -0
  151. contractforge_databricks/runtime/source_metadata.py +30 -0
  152. contractforge_databricks/runtime/source_registry.py +43 -0
  153. contractforge_databricks/runtime/source_schema.py +24 -0
  154. contractforge_databricks/runtime/sources.py +208 -0
  155. contractforge_databricks/runtime/spark.py +183 -0
  156. contractforge_databricks/runtime/spark_defaults.py +35 -0
  157. contractforge_databricks/runtime/storage_auth.py +132 -0
  158. contractforge_databricks/runtime/streaming.py +131 -0
  159. contractforge_databricks/runtime/success.py +104 -0
  160. contractforge_databricks/runtime/utils.py +52 -0
  161. contractforge_databricks/runtime/watermark.py +71 -0
  162. contractforge_databricks/runtime/windows.py +184 -0
  163. contractforge_databricks/runtime/write.py +66 -0
  164. contractforge_databricks/runtime/write_flow.py +146 -0
  165. contractforge_databricks/runtime/write_strategy.py +40 -0
  166. contractforge_databricks/schema/__init__.py +21 -0
  167. contractforge_databricks/schema/diff.py +11 -0
  168. contractforge_databricks/schema/policy.py +33 -0
  169. contractforge_databricks/schema/sync.py +23 -0
  170. contractforge_databricks/security/__init__.py +21 -0
  171. contractforge_databricks/security/errors.py +5 -0
  172. contractforge_databricks/security/redaction.py +5 -0
  173. contractforge_databricks/security/secrets.py +114 -0
  174. contractforge_databricks/security/source_policy.py +17 -0
  175. contractforge_databricks/shapes/__init__.py +3 -0
  176. contractforge_databricks/shapes/sql.py +123 -0
  177. contractforge_databricks/sources/__init__.py +67 -0
  178. contractforge_databricks/sources/artifacts.py +100 -0
  179. contractforge_databricks/sources/autoloader.py +48 -0
  180. contractforge_databricks/sources/bounded_streams.py +44 -0
  181. contractforge_databricks/sources/classification.py +115 -0
  182. contractforge_databricks/sources/delta_share.py +21 -0
  183. contractforge_databricks/sources/files.py +48 -0
  184. contractforge_databricks/sources/http_file.py +46 -0
  185. contractforge_databricks/sources/interpret.py +76 -0
  186. contractforge_databricks/sources/jdbc.py +32 -0
  187. contractforge_databricks/sources/metadata.py +18 -0
  188. contractforge_databricks/sources/native_passthrough.py +33 -0
  189. contractforge_databricks/sources/rds_iam.py +15 -0
  190. contractforge_databricks/sources/rds_iam_runtime.py +191 -0
  191. contractforge_databricks/sources/rest_api.py +33 -0
  192. contractforge_databricks/sources/support.py +50 -0
  193. contractforge_databricks/sources/table_refs.py +65 -0
  194. contractforge_databricks/sql/__init__.py +4 -0
  195. contractforge_databricks/sql/identifiers.py +17 -0
  196. contractforge_databricks/sql/literals.py +36 -0
  197. contractforge_databricks/state/__init__.py +39 -0
  198. contractforge_databricks/state/ddl.py +24 -0
  199. contractforge_databricks/state/migrations.py +146 -0
  200. contractforge_databricks/state/queries.py +149 -0
  201. contractforge_databricks/state/sql.py +116 -0
  202. contractforge_databricks/state/tables.py +9 -0
  203. contractforge_databricks/state/writer.py +83 -0
  204. contractforge_databricks/templates/__init__.py +15 -0
  205. contractforge_databricks/templates/catalog.py +205 -0
  206. contractforge_databricks/templates/catalog_parity.py +85 -0
  207. contractforge_databricks/templates/core.py +83 -0
  208. contractforge_databricks/templates/enrichment.py +175 -0
  209. contractforge_databricks/transforms/__init__.py +3 -0
  210. contractforge_databricks/transforms/sql.py +118 -0
  211. contractforge_databricks/watermark/__init__.py +6 -0
  212. contractforge_databricks/watermark/sql.py +91 -0
  213. contractforge_databricks/write_modes/__init__.py +20 -0
  214. contractforge_databricks/write_modes/registry.py +44 -0
  215. contractforge_databricks/write_modes/sql.py +33 -0
  216. contractforge_databricks/write_modes/strategy.py +192 -0
  217. contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
  218. contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
  219. contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
  220. contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,146 @@
1
+ """Prepared write flow for Databricks runtime ingestion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from time import perf_counter
6
+ from typing import Any
7
+ from typing import NamedTuple
8
+
9
+ from contractforge_core.execution import ExecutionOutcome
10
+ from contractforge_core.quality import QualityRuleResult
11
+ from contractforge_core.runtime import PreparedInput, QueryOne, rows_written_from_outcome
12
+ from contractforge_core.semantic import SemanticContract
13
+ from contractforge_databricks.evidence import EvidenceWriter
14
+ from contractforge_databricks.execution import SqlRunner, with_delta_retry
15
+ from contractforge_databricks.runtime.governance import apply_runtime_governance
16
+ from contractforge_databricks.runtime.maintenance import run_post_write_maintenance
17
+ from contractforge_databricks.runtime.merge_validation import validate_merge_source_safety
18
+ from contractforge_databricks.runtime.models import DatabricksIngestOptions
19
+ from contractforge_databricks.runtime.partitioning import replace_partition_predicate, target_partition_predicate
20
+ from contractforge_databricks.runtime.schema import setup_and_sync_schema
21
+ from contractforge_databricks.runtime.utils import utc_now_str
22
+ from contractforge_databricks.runtime.write import execute_prepared_write
23
+
24
+
25
+ class WriteFlowResult(NamedTuple):
26
+ outcome: ExecutionOutcome
27
+ logical_rows_written: int
28
+ schema_changes: dict[str, Any]
29
+ governance_results: dict[str, Any]
30
+ write_started_at: str
31
+ write_finished_at: str
32
+ stage_durations: dict[str, float]
33
+
34
+
35
+ def execute_runtime_write_flow(
36
+ *,
37
+ runner: SqlRunner,
38
+ evidence: EvidenceWriter,
39
+ contract: SemanticContract,
40
+ prepared: PreparedInput,
41
+ opts: DatabricksIngestOptions,
42
+ run_id: str,
43
+ target: str,
44
+ query_one: QueryOne | None = None,
45
+ quality_results: tuple[QualityRuleResult, ...] = (),
46
+ ) -> WriteFlowResult:
47
+ stage_durations: dict[str, float] = {}
48
+ schema_start = perf_counter()
49
+ schema_changes = setup_and_sync_schema(
50
+ runner=runner,
51
+ evidence=evidence,
52
+ contract=contract,
53
+ prepared=prepared,
54
+ run_id=run_id,
55
+ ensure_table=opts.ensure_table,
56
+ target_schema=opts.target_schema,
57
+ )
58
+ stage_durations["schema"] = _elapsed(schema_start)
59
+ validation_start = perf_counter()
60
+ validate_merge_source_safety(
61
+ contract=contract,
62
+ prepared=prepared,
63
+ query_one=query_one,
64
+ quality_results=quality_results,
65
+ )
66
+ stage_durations["preflight"] = _elapsed(validation_start)
67
+ write_started_at = _utc_now()
68
+ write_start = perf_counter()
69
+ outcome = with_delta_retry(
70
+ lambda: execute_prepared_write(
71
+ runner=runner,
72
+ contract=contract,
73
+ prepared=prepared,
74
+ replace_partition_predicate=replace_partition_predicate(
75
+ contract=contract,
76
+ prepared=prepared,
77
+ query_one=query_one,
78
+ ),
79
+ target_schema=opts.target_schema,
80
+ query_one=query_one,
81
+ target_partition_predicate=target_partition_predicate(
82
+ contract=contract,
83
+ prepared=prepared,
84
+ query_one=query_one,
85
+ ),
86
+ ),
87
+ attempts=_retry_attempts(contract),
88
+ backoff_seconds=_retry_backoff_seconds(contract),
89
+ jitter=lambda: 0.0,
90
+ )
91
+ write_finished_at = _utc_now()
92
+ stage_durations["write"] = _elapsed(write_start)
93
+ logical_rows_written = rows_written_from_outcome(prepared, outcome)
94
+ maintenance_start = perf_counter()
95
+ run_post_write_maintenance(
96
+ runner=runner,
97
+ contract=contract,
98
+ target_table=target,
99
+ outcome=outcome,
100
+ rows_written=logical_rows_written,
101
+ )
102
+ stage_durations["maintenance"] = _elapsed(maintenance_start)
103
+ governance_start = perf_counter()
104
+ governance_results = apply_runtime_governance(
105
+ runner=runner,
106
+ contract=contract,
107
+ run_id=run_id,
108
+ evidence_catalog=opts.catalog,
109
+ evidence_schema=opts.schema,
110
+ )
111
+ stage_durations["governance"] = _elapsed(governance_start)
112
+ return WriteFlowResult(
113
+ outcome,
114
+ logical_rows_written,
115
+ schema_changes,
116
+ governance_results,
117
+ write_started_at,
118
+ write_finished_at,
119
+ stage_durations,
120
+ )
121
+
122
+
123
+ def _retry_attempts(contract: SemanticContract) -> int:
124
+ metadata = contract.operations.metadata if contract.operations and contract.operations.metadata else {}
125
+ value = metadata.get("retry_attempts", 1)
126
+ try:
127
+ return max(1, int(value))
128
+ except (TypeError, ValueError):
129
+ return 1
130
+
131
+
132
+ def _retry_backoff_seconds(contract: SemanticContract) -> float:
133
+ metadata = contract.operations.metadata if contract.operations and contract.operations.metadata else {}
134
+ value = metadata.get("retry_backoff_seconds", 1.0)
135
+ try:
136
+ return max(0.0, float(value))
137
+ except (TypeError, ValueError):
138
+ return 1.0
139
+
140
+
141
+ def _utc_now() -> str:
142
+ return utc_now_str()
143
+
144
+
145
+ def _elapsed(start: float) -> float:
146
+ return round(perf_counter() - start, 6)
@@ -0,0 +1,40 @@
1
+ """Runtime write-engine evidence for Databricks ingestion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.semantic import SemanticContract
8
+ from contractforge_databricks.capabilities import evaluate_databricks_capabilities
9
+ from contractforge_databricks.contract_extensions import databricks_extensions
10
+ from contractforge_databricks.write_modes import choose_write_strategy
11
+
12
+
13
+ def write_strategy_evidence(contract: SemanticContract, target: str, runtime: dict[str, Any]) -> dict[str, str]:
14
+ capabilities = evaluate_databricks_capabilities(
15
+ target_table=target,
16
+ runtime_type=str(runtime.get("runtime_type") or "serverless"),
17
+ spark_version=str(runtime["spark_version"]) if runtime.get("spark_version") else None,
18
+ )
19
+ strategy = choose_write_strategy(contract, capabilities)
20
+ return {
21
+ "write_engine_requested": _requested_engine(contract),
22
+ "write_engine_selected": strategy.engine,
23
+ "write_engine_status": strategy.kind,
24
+ "write_engine_reason": strategy.reason,
25
+ "write_engine_fallback_policy": _fallback_policy(contract),
26
+ }
27
+
28
+
29
+ def _requested_engine(contract: SemanticContract) -> str:
30
+ write_engine = databricks_extensions(contract).get("write_engine")
31
+ if isinstance(write_engine, dict):
32
+ return str(write_engine.get("requested") or write_engine.get("engine") or "auto")
33
+ return str(write_engine or "auto")
34
+
35
+
36
+ def _fallback_policy(contract: SemanticContract) -> str:
37
+ write_engine = databricks_extensions(contract).get("write_engine")
38
+ if isinstance(write_engine, dict):
39
+ return str(write_engine.get("fallback_policy") or "fail")
40
+ return "fail"
@@ -0,0 +1,21 @@
1
+ from contractforge_core.schema import (
2
+ SchemaDiff,
3
+ TypeChange,
4
+ compare_schema,
5
+ is_type_widening,
6
+ validate_schema_diff,
7
+ )
8
+ from contractforge_databricks.schema.policy import DatabricksSchemaPolicyPlan, plan_schema_policy
9
+ from contractforge_databricks.schema.sync import render_add_columns_sql, render_type_widening_sql
10
+
11
+ __all__ = [
12
+ "DatabricksSchemaPolicyPlan",
13
+ "SchemaDiff",
14
+ "TypeChange",
15
+ "compare_schema",
16
+ "is_type_widening",
17
+ "plan_schema_policy",
18
+ "render_add_columns_sql",
19
+ "render_type_widening_sql",
20
+ "validate_schema_diff",
21
+ ]
@@ -0,0 +1,11 @@
1
+ """Compatibility exports for platform-neutral schema diff helpers."""
2
+
3
+ from contractforge_core.schema import SchemaDiff, TypeChange, compare_schema, is_type_widening, validate_schema_diff
4
+
5
+ __all__ = [
6
+ "SchemaDiff",
7
+ "TypeChange",
8
+ "compare_schema",
9
+ "is_type_widening",
10
+ "validate_schema_diff",
11
+ ]
@@ -0,0 +1,33 @@
1
+ """Databricks schema policy planning."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.schema import SchemaPolicyPlan
6
+ from contractforge_core.semantic import SemanticContract
7
+
8
+ DatabricksSchemaPolicyPlan = SchemaPolicyPlan
9
+
10
+
11
+ def plan_schema_policy(contract: SemanticContract) -> DatabricksSchemaPolicyPlan:
12
+ policy = contract.write.schema_policy
13
+ if policy == "strict":
14
+ return DatabricksSchemaPolicyPlan(
15
+ policy=policy,
16
+ writer_options={},
17
+ preflight_required=True,
18
+ reason="Strict schema requires adapter preflight comparison before Delta write.",
19
+ )
20
+ if policy == "additive_only":
21
+ return DatabricksSchemaPolicyPlan(
22
+ policy=policy,
23
+ writer_options={"mergeSchema": "true"},
24
+ preflight_required=True,
25
+ reason="Additive-only schema allows new nullable columns after preflight validation.",
26
+ )
27
+ return DatabricksSchemaPolicyPlan(
28
+ policy=policy,
29
+ writer_options={"mergeSchema": "true"},
30
+ preflight_required=True,
31
+ reason="Permissive schema can use Delta schema merge, but type widening still requires evidence.",
32
+ warnings=("type widening must be recorded as schema-change evidence",),
33
+ )
@@ -0,0 +1,23 @@
1
+ """Render Databricks SQL for validated schema changes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from contractforge_core.schema import SchemaDiff
6
+ from contractforge_databricks.sql import quote_identifier, quote_table_name
7
+
8
+
9
+ def render_add_columns_sql(*, target_table: str, source_schema: dict[str, str], diff: SchemaDiff) -> str:
10
+ columns = [column for column in diff.added_columns if column in source_schema]
11
+ if not columns:
12
+ return "-- No additive schema changes to apply.\n"
13
+ cols_sql = ", ".join(f"{quote_identifier(column)} {source_schema[column]}" for column in columns)
14
+ return f"ALTER TABLE {quote_table_name(target_table)} ADD COLUMNS ({cols_sql})"
15
+
16
+
17
+ def render_type_widening_sql(*, target_table: str, diff: SchemaDiff) -> str:
18
+ statements = [
19
+ f"ALTER TABLE {quote_table_name(target_table)} ALTER COLUMN {quote_identifier(change.column)} TYPE {change.source_type}"
20
+ for change in diff.type_changes
21
+ if change.allowed
22
+ ]
23
+ return ";\n".join(statements) + (";\n" if statements else "-- No type widening changes to apply.\n")
@@ -0,0 +1,21 @@
1
+ from contractforge_core.errors import exception_message, short_error_message
2
+ from contractforge_core.security import redact_text, redact_value
3
+ from contractforge_databricks.security.secrets import (
4
+ assert_no_inline_jdbc_secrets,
5
+ contains_secret_placeholder,
6
+ resolve_databricks_secret_placeholders,
7
+ secret_placeholder_refs,
8
+ )
9
+ from contractforge_databricks.security.source_policy import validate_source_security
10
+
11
+ __all__ = [
12
+ "exception_message",
13
+ "assert_no_inline_jdbc_secrets",
14
+ "contains_secret_placeholder",
15
+ "redact_text",
16
+ "redact_value",
17
+ "resolve_databricks_secret_placeholders",
18
+ "secret_placeholder_refs",
19
+ "short_error_message",
20
+ "validate_source_security",
21
+ ]
@@ -0,0 +1,5 @@
1
+ """Compatibility exports for platform-neutral operational error normalization."""
2
+
3
+ from contractforge_core.errors import exception_message, short_error_message
4
+
5
+ __all__ = ["exception_message", "short_error_message"]
@@ -0,0 +1,5 @@
1
+ """Compatibility exports for platform-neutral redaction helpers."""
2
+
3
+ from contractforge_core.security import REDACTED, redact_text, redact_value
4
+
5
+ __all__ = ["REDACTED", "redact_text", "redact_value"]
@@ -0,0 +1,114 @@
1
+ """Databricks-owned secret placeholder resolution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import re
7
+ from collections.abc import Mapping
8
+ from typing import Any
9
+
10
+ SECRET_PLACEHOLDER_RE = re.compile(r"\{\{\s*secret:[^}]+\}\}", re.IGNORECASE)
11
+ ENV_OVERRIDE_FLAG = "CONTRACTFORGE_ALLOW_SECRET_ENV_OVERRIDE"
12
+ _TRUE_VALUES = {"1", "true", "yes", "on"}
13
+ _SENSITIVE_JDBC_OPTION_KEYS = ("password", "sfpassword")
14
+ _URL_INLINE_CREDENTIALS_RE = re.compile(r"://[^/@\s]+:[^/@\s]+@")
15
+
16
+
17
+ def contains_secret_placeholder(value: Any) -> bool:
18
+ """Return True if a string or nested value contains a secret placeholder."""
19
+
20
+ if isinstance(value, Mapping):
21
+ return any(contains_secret_placeholder(item) for item in value.values())
22
+ if isinstance(value, (list, tuple)):
23
+ return any(contains_secret_placeholder(item) for item in value)
24
+ if isinstance(value, str):
25
+ return SECRET_PLACEHOLDER_RE.search(value) is not None
26
+ return False
27
+
28
+
29
+ def secret_placeholder_refs(value: str) -> tuple[tuple[str, str], ...]:
30
+ """Return ``(scope, key)`` references found in a placeholder-bearing string."""
31
+
32
+ return tuple(_parse_secret_ref(match.group(0)[2:-2].strip()) for match in SECRET_PLACEHOLDER_RE.finditer(value))
33
+
34
+
35
+ def assert_no_inline_jdbc_secrets(options: Mapping[str, Any]) -> None:
36
+ """Refuse JDBC options that declare raw credentials in the contract.
37
+
38
+ Databricks resolves ``{{ secret:scope/key }}`` placeholders at runtime via
39
+ dbutils. Accepting a literal JDBC password or URL credential would put the
40
+ secret in versioned contract files and review artifacts before the adapter
41
+ ever has a chance to redact it.
42
+ """
43
+
44
+ for key in _SENSITIVE_JDBC_OPTION_KEYS:
45
+ if key in options and not contains_secret_placeholder(options[key]) and str(options[key]) != "{{rds_iam_token}}":
46
+ raise ValueError(
47
+ f"JDBC '{key}' must be provided via a {{{{ secret:scope/key }}}} placeholder "
48
+ "or adapter-owned runtime authentication; inline credentials are not accepted."
49
+ )
50
+ url = str(options.get("url") or "")
51
+ if _URL_INLINE_CREDENTIALS_RE.search(url) and not contains_secret_placeholder(url):
52
+ raise ValueError(
53
+ "JDBC url embeds inline credentials; move them to auth using {{ secret:scope/key }} placeholders."
54
+ )
55
+
56
+
57
+ def resolve_databricks_secret_placeholders(value: Any) -> Any:
58
+ """Resolve ``{{ secret:scope/key }}`` placeholders recursively at adapter runtime."""
59
+
60
+ if isinstance(value, Mapping):
61
+ return {key: resolve_databricks_secret_placeholders(item) for key, item in value.items()}
62
+ if isinstance(value, list):
63
+ return [resolve_databricks_secret_placeholders(item) for item in value]
64
+ if isinstance(value, tuple):
65
+ return tuple(resolve_databricks_secret_placeholders(item) for item in value)
66
+ if isinstance(value, str):
67
+ return SECRET_PLACEHOLDER_RE.sub(lambda match: _resolve_secret_token(match.group(0)[2:-2].strip()), value)
68
+ return value
69
+
70
+
71
+ def _env_override_enabled() -> bool:
72
+ """Return True only when CONTRACTFORGE_ALLOW_SECRET_ENV_OVERRIDE is explicitly on.
73
+
74
+ Honoring CONTRACTFORGE_SECRET_* env vars by default lets anyone who can
75
+ set cluster environment variables (init scripts, cluster policies) shadow
76
+ a secret coming from dbutils.secrets without an audit trail. Gating the
77
+ behavior behind a single explicit flag keeps the override available for
78
+ runtime token injection (e.g. RDS IAM tokens) while making the resolver
79
+ safe-by-default in shared workspaces.
80
+ """
81
+
82
+ raw = os.environ.get(ENV_OVERRIDE_FLAG, "")
83
+ return raw.strip().lower() in _TRUE_VALUES
84
+
85
+
86
+ def _resolve_secret_token(token: str) -> str:
87
+ scope, key = _parse_secret_ref(token)
88
+ if _env_override_enabled():
89
+ env_name = f"CONTRACTFORGE_SECRET_{scope}_{key}".upper().replace("-", "_").replace(".", "_")
90
+ if env_name in os.environ:
91
+ return os.environ[env_name]
92
+ return str(_dbutils().secrets.get(scope=scope, key=key))
93
+
94
+
95
+ def _parse_secret_ref(token: str) -> tuple[str, str]:
96
+ ref = token[len("secret:") :].strip() if token.lower().startswith("secret:") else token.strip()
97
+ if "/" not in ref:
98
+ raise ValueError("Secret placeholder must use format {{ secret:scope/key }}")
99
+ scope, key = [part.strip() for part in ref.split("/", 1)]
100
+ if not scope or not key:
101
+ raise ValueError("Secret placeholder requires non-empty scope and key")
102
+ return scope, key
103
+
104
+
105
+ def _dbutils() -> Any:
106
+ try:
107
+ from IPython import get_ipython # type: ignore
108
+
109
+ shell = get_ipython()
110
+ if shell and "dbutils" in shell.user_ns:
111
+ return shell.user_ns["dbutils"]
112
+ except Exception:
113
+ pass
114
+ raise RuntimeError("Could not resolve dbutils to access Databricks Secrets")
@@ -0,0 +1,17 @@
1
+ """Databricks source-level security policy checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.connectors import JDBC_CONNECTORS, jdbc_common_options
8
+ from contractforge_databricks.security.secrets import assert_no_inline_jdbc_secrets
9
+
10
+
11
+ def validate_source_security(source: dict[str, Any]) -> None:
12
+ """Validate adapter security policy before runtime placeholder resolution."""
13
+
14
+ source_type = source.get("type")
15
+ connector = source.get("connector")
16
+ if source_type == "jdbc" or connector in JDBC_CONNECTORS:
17
+ assert_no_inline_jdbc_secrets(jdbc_common_options(source))
@@ -0,0 +1,3 @@
1
+ from contractforge_databricks.shapes.sql import render_shape_sql
2
+
3
+ __all__ = ["render_shape_sql"]
@@ -0,0 +1,123 @@
1
+ """Databricks SQL review rendering for core shape intent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from contractforge_core.semantic import SemanticContract
8
+ from contractforge_databricks.sql import quote_identifier, quote_table_name
9
+
10
+
11
+ def render_shape_sql(
12
+ contract: SemanticContract,
13
+ *,
14
+ source_view: str = "${source_view}",
15
+ output_view: str = "${shaped_view}",
16
+ ) -> str:
17
+ if not contract.shape:
18
+ return "-- No shape declared.\n"
19
+ shape = contract.shape.raw
20
+ lines = [
21
+ "-- Shape SQL review artifact.",
22
+ "-- Databricks runtime may execute equivalent PySpark preparation for complex nested schemas.",
23
+ f"CREATE OR REPLACE TEMP VIEW {quote_table_name(output_view)} AS",
24
+ "SELECT",
25
+ ",\n".join(f" {item}" for item in _select_items(shape)),
26
+ f"FROM {_from_clause(shape, source_view)}",
27
+ ]
28
+ return "\n".join(lines) + ";\n" + _review_notes(shape)
29
+
30
+
31
+ def _select_items(shape: dict[str, Any]) -> list[str]:
32
+ items: list[str] = ["*"]
33
+ for config in shape.get("parse_json", ()):
34
+ column = str(config["column"])
35
+ schema = config.get("schema") or f"${{schema:{config.get('schema_ref')}}}"
36
+ alias = str(config.get("alias") or column)
37
+ cast_input = str(config.get("cast_input") or "").strip().upper()
38
+ source_expr = _path_expr(column)
39
+ if cast_input == "STRING":
40
+ source_expr = f"CAST({source_expr} AS STRING)"
41
+ items.append(f"from_json({source_expr}, '{schema}') AS {quote_identifier(alias)}")
42
+ for config in shape.get("zip_arrays", ()):
43
+ columns = ", ".join(_path_expr(path) for path in config.get("columns", {}))
44
+ items.append(f"arrays_zip({columns}) AS {quote_identifier(str(config['alias']))}")
45
+ for config in shape.get("arrays", ()):
46
+ rendered = _array_item(config)
47
+ if rendered:
48
+ items.append(rendered)
49
+ if shape.get("columns"):
50
+ return _projection_items(shape["columns"])
51
+ return items
52
+
53
+
54
+ def _projection_items(columns: dict[str, Any]) -> list[str]:
55
+ items = []
56
+ for path, config in columns.items():
57
+ if isinstance(config, str):
58
+ alias = config
59
+ expr = _path_expr(path)
60
+ else:
61
+ alias = str(config.get("alias") or _default_alias(path))
62
+ expr = str(config["expression"]) if config.get("expression") else _path_expr(path)
63
+ if config.get("cast"):
64
+ expr = f"CAST({expr} AS {config['cast']})"
65
+ items.append(f"{expr} AS {quote_identifier(alias)}")
66
+ return items
67
+
68
+
69
+ def _array_item(config: dict[str, Any]) -> str | None:
70
+ path = str(config["path"])
71
+ alias = str(config.get("alias") or _default_alias(path))
72
+ mode = str(config.get("mode", "keep"))
73
+ expr = _path_expr(path)
74
+ if mode == "keep":
75
+ return None
76
+ if mode == "to_json":
77
+ return f"to_json({expr}) AS {quote_identifier(alias)}"
78
+ if mode == "size":
79
+ return f"size({expr}) AS {quote_identifier(alias)}"
80
+ if mode == "first":
81
+ return f"element_at({expr}, 1) AS {quote_identifier(alias)}"
82
+ if mode == "explode":
83
+ return f"explode({expr}) AS {quote_identifier(alias)}"
84
+ if mode == "explode_outer":
85
+ return f"explode_outer({expr}) AS {quote_identifier(alias)}"
86
+ return None
87
+
88
+
89
+ def _from_clause(shape: dict[str, Any], source_view: str) -> str:
90
+ if _flatten_enabled(shape):
91
+ return f"{quote_table_name(source_view)} -- flatten requires schema-aware expansion"
92
+ return quote_table_name(source_view)
93
+
94
+
95
+ def _review_notes(shape: dict[str, Any]) -> str:
96
+ notes = []
97
+ if _flatten_enabled(shape):
98
+ flatten = shape.get("flatten")
99
+ separator = flatten.get("separator", "_") if isinstance(flatten, dict) else "_"
100
+ notes.append(f"-- flatten: enabled with separator {separator!r}; runtime must expand struct leaves.")
101
+ changing = [
102
+ config["path"]
103
+ for config in shape.get("arrays", ())
104
+ if config.get("mode") in {"explode", "explode_outer"}
105
+ ]
106
+ if changing and not shape.get("allow_cardinality_change_on_bronze", False):
107
+ notes.append("-- cardinality review: explode/explode_outer changes row counts and may require layer policy review.")
108
+ return ("\n".join(notes) + "\n") if notes else ""
109
+
110
+
111
+ def _flatten_enabled(shape: dict[str, Any]) -> bool:
112
+ flatten = shape.get("flatten")
113
+ if isinstance(flatten, bool):
114
+ return flatten
115
+ return isinstance(flatten, dict) and bool(flatten.get("enabled"))
116
+
117
+
118
+ def _path_expr(path: str) -> str:
119
+ return ".".join(quote_identifier(part) for part in str(path).split("."))
120
+
121
+
122
+ def _default_alias(path: str) -> str:
123
+ return str(path).replace(".", "_")
@@ -0,0 +1,67 @@
1
+ from contractforge_databricks.sources.autoloader import render_autoloader_python
2
+ from contractforge_databricks.sources.artifacts import render_source_artifacts
3
+ from contractforge_databricks.sources.bounded_streams import (
4
+ eventhubs_bounded_options,
5
+ is_bounded_stream_source,
6
+ kafka_bounded_options,
7
+ render_bounded_stream_python,
8
+ render_eventhubs_bounded_python,
9
+ render_kafka_bounded_python,
10
+ )
11
+ from contractforge_databricks.sources.delta_share import (
12
+ delta_share_options,
13
+ is_delta_share_source,
14
+ render_delta_share_python,
15
+ )
16
+ from contractforge_databricks.sources.files import (
17
+ is_catalog_source,
18
+ is_file_source,
19
+ render_catalog_source_python,
20
+ render_file_source_python,
21
+ )
22
+ from contractforge_databricks.sources.http_file import is_http_file_source, render_http_file_python
23
+ from contractforge_databricks.sources.interpret import interpret_incremental_files_source, is_incremental_file_source
24
+ from contractforge_databricks.sources.jdbc import jdbc_options, render_jdbc_python
25
+ from contractforge_databricks.sources.metadata import render_source_metadata_json, source_metadata_from_contract
26
+ from contractforge_databricks.sources.native_passthrough import render_native_passthrough_plan
27
+ from contractforge_databricks.sources.rds_iam import (
28
+ generate_rds_iam_auth_token,
29
+ infer_aws_region_from_rds_host,
30
+ parse_jdbc_host_port,
31
+ )
32
+ from contractforge_databricks.sources.rest_api import is_rest_api_connector, render_rest_api_review_plan
33
+ from contractforge_databricks.sources.support import databricks_source_support, list_databricks_source_support
34
+
35
+ __all__ = [
36
+ "is_catalog_source",
37
+ "is_bounded_stream_source",
38
+ "is_delta_share_source",
39
+ "is_file_source",
40
+ "is_http_file_source",
41
+ "is_incremental_file_source",
42
+ "is_rest_api_connector",
43
+ "interpret_incremental_files_source",
44
+ "eventhubs_bounded_options",
45
+ "delta_share_options",
46
+ "generate_rds_iam_auth_token",
47
+ "jdbc_options",
48
+ "infer_aws_region_from_rds_host",
49
+ "kafka_bounded_options",
50
+ "list_databricks_source_support",
51
+ "parse_jdbc_host_port",
52
+ "render_autoloader_python",
53
+ "render_bounded_stream_python",
54
+ "render_catalog_source_python",
55
+ "render_delta_share_python",
56
+ "render_eventhubs_bounded_python",
57
+ "render_file_source_python",
58
+ "render_http_file_python",
59
+ "render_jdbc_python",
60
+ "render_kafka_bounded_python",
61
+ "render_native_passthrough_plan",
62
+ "render_rest_api_review_plan",
63
+ "render_source_artifacts",
64
+ "render_source_metadata_json",
65
+ "source_metadata_from_contract",
66
+ "databricks_source_support",
67
+ ]