contractforge-databricks 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contractforge_databricks/__init__.py +172 -0
- contractforge_databricks/adapter.py +69 -0
- contractforge_databricks/annotations/__init__.py +10 -0
- contractforge_databricks/annotations/application.py +52 -0
- contractforge_databricks/annotations/audit.py +49 -0
- contractforge_databricks/annotations/sql.py +142 -0
- contractforge_databricks/api.py +65 -0
- contractforge_databricks/bundles/__init__.py +9 -0
- contractforge_databricks/bundles/assets.py +47 -0
- contractforge_databricks/bundles/project.py +213 -0
- contractforge_databricks/bundles/project_config.py +133 -0
- contractforge_databricks/capabilities/__init__.py +17 -0
- contractforge_databricks/capabilities/builders.py +43 -0
- contractforge_databricks/capabilities/evaluate.py +162 -0
- contractforge_databricks/capabilities/mapping.py +36 -0
- contractforge_databricks/capabilities/models.py +44 -0
- contractforge_databricks/capabilities/runtime.py +111 -0
- contractforge_databricks/capabilities/uc.py +47 -0
- contractforge_databricks/cli.py +196 -0
- contractforge_databricks/cli_deploy.py +98 -0
- contractforge_databricks/cli_governance.py +142 -0
- contractforge_databricks/cli_io.py +91 -0
- contractforge_databricks/cli_maintenance.py +69 -0
- contractforge_databricks/coercion.py +31 -0
- contractforge_databricks/contract_extensions.py +70 -0
- contractforge_databricks/cost/__init__.py +11 -0
- contractforge_databricks/cost/model.py +22 -0
- contractforge_databricks/cost/report.py +65 -0
- contractforge_databricks/cost/sql.py +136 -0
- contractforge_databricks/dashboards/__init__.py +15 -0
- contractforge_databricks/dashboards/control_tables.py +150 -0
- contractforge_databricks/diagnostics/__init__.py +7 -0
- contractforge_databricks/diagnostics/explain.py +40 -0
- contractforge_databricks/environment.py +53 -0
- contractforge_databricks/evidence/__init__.py +98 -0
- contractforge_databricks/evidence/ddl.py +35 -0
- contractforge_databricks/evidence/governance_log.py +175 -0
- contractforge_databricks/evidence/helpers.py +29 -0
- contractforge_databricks/evidence/ops_log.py +210 -0
- contractforge_databricks/evidence/records.py +27 -0
- contractforge_databricks/evidence/run_log.py +74 -0
- contractforge_databricks/evidence/schemas.py +7 -0
- contractforge_databricks/evidence/sql.py +144 -0
- contractforge_databricks/evidence/tables.py +20 -0
- contractforge_databricks/evidence/writer.py +118 -0
- contractforge_databricks/execution/__init__.py +70 -0
- contractforge_databricks/execution/delta_basic.py +57 -0
- contractforge_databricks/execution/hash_diff.py +126 -0
- contractforge_databricks/execution/hash_diff_latest.py +142 -0
- contractforge_databricks/execution/replace_partitions.py +40 -0
- contractforge_databricks/execution/results.py +5 -0
- contractforge_databricks/execution/retry.py +36 -0
- contractforge_databricks/execution/scd2.py +213 -0
- contractforge_databricks/execution/scd2_deletes.py +65 -0
- contractforge_databricks/execution/scd2_late.py +30 -0
- contractforge_databricks/execution/snapshot.py +77 -0
- contractforge_databricks/execution/sql_merge.py +85 -0
- contractforge_databricks/execution/tables.py +98 -0
- contractforge_databricks/execution/windows.py +58 -0
- contractforge_databricks/governance/__init__.py +30 -0
- contractforge_databricks/governance/access.py +185 -0
- contractforge_databricks/governance/application.py +93 -0
- contractforge_databricks/governance/drift.py +49 -0
- contractforge_databricks/governance/runtime.py +60 -0
- contractforge_databricks/governance/sql.py +31 -0
- contractforge_databricks/governance/validation.py +135 -0
- contractforge_databricks/lakeflow/__init__.py +21 -0
- contractforge_databricks/lakeflow/compatibility.py +194 -0
- contractforge_databricks/lakeflow/rendering.py +175 -0
- contractforge_databricks/lineage/__init__.py +7 -0
- contractforge_databricks/lineage/openlineage.py +182 -0
- contractforge_databricks/maintenance/__init__.py +27 -0
- contractforge_databricks/maintenance/retention.py +90 -0
- contractforge_databricks/maintenance/sql.py +68 -0
- contractforge_databricks/metrics/__init__.py +19 -0
- contractforge_databricks/metrics/history.py +21 -0
- contractforge_databricks/metrics/write.py +63 -0
- contractforge_databricks/operations/__init__.py +4 -0
- contractforge_databricks/operations/application.py +38 -0
- contractforge_databricks/operations/sql.py +95 -0
- contractforge_databricks/parity/__init__.py +18 -0
- contractforge_databricks/parity/catalog.py +59 -0
- contractforge_databricks/parity/models.py +7 -0
- contractforge_databricks/parity/scenarios.py +111 -0
- contractforge_databricks/partitioning/__init__.py +3 -0
- contractforge_databricks/partitioning/predicates.py +28 -0
- contractforge_databricks/preparation/__init__.py +47 -0
- contractforge_databricks/preparation/deduplicate.py +87 -0
- contractforge_databricks/preparation/encoding.py +37 -0
- contractforge_databricks/preparation/hashing.py +18 -0
- contractforge_databricks/preparation/pyspark.py +178 -0
- contractforge_databricks/preparation/pyspark_staging.py +70 -0
- contractforge_databricks/preparation/shape.py +209 -0
- contractforge_databricks/preparation/shape_validation.py +94 -0
- contractforge_databricks/preparation/staging.py +17 -0
- contractforge_databricks/preparation/zip_arrays.py +51 -0
- contractforge_databricks/presets/__init__.py +3 -0
- contractforge_databricks/presets/base.py +24 -0
- contractforge_databricks/presets/bronze.py +57 -0
- contractforge_databricks/presets/catalog.py +22 -0
- contractforge_databricks/presets/core.py +134 -0
- contractforge_databricks/presets/gold.py +62 -0
- contractforge_databricks/presets/modifiers.py +51 -0
- contractforge_databricks/presets/runtime.py +22 -0
- contractforge_databricks/presets/silver.py +101 -0
- contractforge_databricks/presets/write_engine.py +57 -0
- contractforge_databricks/quality/__init__.py +41 -0
- contractforge_databricks/quality/evaluation.py +178 -0
- contractforge_databricks/quality/persistence.py +81 -0
- contractforge_databricks/quality/registry.py +134 -0
- contractforge_databricks/quality/results.py +17 -0
- contractforge_databricks/quality/sql.py +113 -0
- contractforge_databricks/rendering/__init__.py +11 -0
- contractforge_databricks/rendering/bundle.py +93 -0
- contractforge_databricks/rendering/markdown.py +50 -0
- contractforge_databricks/rendering/names.py +56 -0
- contractforge_databricks/results.py +15 -0
- contractforge_databricks/runtime/__init__.py +101 -0
- contractforge_databricks/runtime/available_now.py +147 -0
- contractforge_databricks/runtime/bundles.py +211 -0
- contractforge_databricks/runtime/cache.py +20 -0
- contractforge_databricks/runtime/control_tables.py +19 -0
- contractforge_databricks/runtime/deploy.py +197 -0
- contractforge_databricks/runtime/detection.py +114 -0
- contractforge_databricks/runtime/dry_run.py +46 -0
- contractforge_databricks/runtime/errors.py +54 -0
- contractforge_databricks/runtime/file_selection.py +109 -0
- contractforge_databricks/runtime/finalization.py +168 -0
- contractforge_databricks/runtime/governance.py +37 -0
- contractforge_databricks/runtime/hooks.py +45 -0
- contractforge_databricks/runtime/http_file.py +37 -0
- contractforge_databricks/runtime/http_retry.py +15 -0
- contractforge_databricks/runtime/http_safety.py +9 -0
- contractforge_databricks/runtime/json_materialization.py +97 -0
- contractforge_databricks/runtime/lineage.py +164 -0
- contractforge_databricks/runtime/maintenance.py +43 -0
- contractforge_databricks/runtime/merge_validation.py +98 -0
- contractforge_databricks/runtime/metadata.py +21 -0
- contractforge_databricks/runtime/metrics.py +34 -0
- contractforge_databricks/runtime/models.py +32 -0
- contractforge_databricks/runtime/options.py +33 -0
- contractforge_databricks/runtime/orchestration_context.py +185 -0
- contractforge_databricks/runtime/orchestrator.py +147 -0
- contractforge_databricks/runtime/partitioning.py +93 -0
- contractforge_databricks/runtime/quality_quarantine.py +92 -0
- contractforge_databricks/runtime/rest_api.py +46 -0
- contractforge_databricks/runtime/rest_auth.py +21 -0
- contractforge_databricks/runtime/rest_pagination.py +21 -0
- contractforge_databricks/runtime/run_payload.py +177 -0
- contractforge_databricks/runtime/schema.py +106 -0
- contractforge_databricks/runtime/source_metadata.py +30 -0
- contractforge_databricks/runtime/source_registry.py +43 -0
- contractforge_databricks/runtime/source_schema.py +24 -0
- contractforge_databricks/runtime/sources.py +208 -0
- contractforge_databricks/runtime/spark.py +183 -0
- contractforge_databricks/runtime/spark_defaults.py +35 -0
- contractforge_databricks/runtime/storage_auth.py +132 -0
- contractforge_databricks/runtime/streaming.py +131 -0
- contractforge_databricks/runtime/success.py +104 -0
- contractforge_databricks/runtime/utils.py +52 -0
- contractforge_databricks/runtime/watermark.py +71 -0
- contractforge_databricks/runtime/windows.py +184 -0
- contractforge_databricks/runtime/write.py +66 -0
- contractforge_databricks/runtime/write_flow.py +146 -0
- contractforge_databricks/runtime/write_strategy.py +40 -0
- contractforge_databricks/schema/__init__.py +21 -0
- contractforge_databricks/schema/diff.py +11 -0
- contractforge_databricks/schema/policy.py +33 -0
- contractforge_databricks/schema/sync.py +23 -0
- contractforge_databricks/security/__init__.py +21 -0
- contractforge_databricks/security/errors.py +5 -0
- contractforge_databricks/security/redaction.py +5 -0
- contractforge_databricks/security/secrets.py +114 -0
- contractforge_databricks/security/source_policy.py +17 -0
- contractforge_databricks/shapes/__init__.py +3 -0
- contractforge_databricks/shapes/sql.py +123 -0
- contractforge_databricks/sources/__init__.py +67 -0
- contractforge_databricks/sources/artifacts.py +100 -0
- contractforge_databricks/sources/autoloader.py +48 -0
- contractforge_databricks/sources/bounded_streams.py +44 -0
- contractforge_databricks/sources/classification.py +115 -0
- contractforge_databricks/sources/delta_share.py +21 -0
- contractforge_databricks/sources/files.py +48 -0
- contractforge_databricks/sources/http_file.py +46 -0
- contractforge_databricks/sources/interpret.py +76 -0
- contractforge_databricks/sources/jdbc.py +32 -0
- contractforge_databricks/sources/metadata.py +18 -0
- contractforge_databricks/sources/native_passthrough.py +33 -0
- contractforge_databricks/sources/rds_iam.py +15 -0
- contractforge_databricks/sources/rds_iam_runtime.py +191 -0
- contractforge_databricks/sources/rest_api.py +33 -0
- contractforge_databricks/sources/support.py +50 -0
- contractforge_databricks/sources/table_refs.py +65 -0
- contractforge_databricks/sql/__init__.py +4 -0
- contractforge_databricks/sql/identifiers.py +17 -0
- contractforge_databricks/sql/literals.py +36 -0
- contractforge_databricks/state/__init__.py +39 -0
- contractforge_databricks/state/ddl.py +24 -0
- contractforge_databricks/state/migrations.py +146 -0
- contractforge_databricks/state/queries.py +149 -0
- contractforge_databricks/state/sql.py +116 -0
- contractforge_databricks/state/tables.py +9 -0
- contractforge_databricks/state/writer.py +83 -0
- contractforge_databricks/templates/__init__.py +15 -0
- contractforge_databricks/templates/catalog.py +205 -0
- contractforge_databricks/templates/catalog_parity.py +85 -0
- contractforge_databricks/templates/core.py +83 -0
- contractforge_databricks/templates/enrichment.py +175 -0
- contractforge_databricks/transforms/__init__.py +3 -0
- contractforge_databricks/transforms/sql.py +118 -0
- contractforge_databricks/watermark/__init__.py +6 -0
- contractforge_databricks/watermark/sql.py +91 -0
- contractforge_databricks/write_modes/__init__.py +20 -0
- contractforge_databricks/write_modes/registry.py +44 -0
- contractforge_databricks/write_modes/sql.py +33 -0
- contractforge_databricks/write_modes/strategy.py +192 -0
- contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
- contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
- contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
- contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Post-write Databricks maintenance hooks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contractforge_core.execution import ExecutionOutcome
|
|
6
|
+
from contractforge_core.semantic import SemanticContract
|
|
7
|
+
from contractforge_databricks.contract_extensions import databricks_extensions
|
|
8
|
+
from contractforge_databricks.execution import SqlRunner
|
|
9
|
+
from contractforge_databricks.maintenance import MaintenancePlan, execute_maintenance_plan
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run_post_write_maintenance(
|
|
13
|
+
*,
|
|
14
|
+
runner: SqlRunner,
|
|
15
|
+
contract: SemanticContract,
|
|
16
|
+
target_table: str,
|
|
17
|
+
outcome: ExecutionOutcome | None,
|
|
18
|
+
rows_written: int,
|
|
19
|
+
) -> tuple[str, ...]:
|
|
20
|
+
extensions = databricks_extensions(contract)
|
|
21
|
+
if not extensions.get("optimize_after_write"):
|
|
22
|
+
return ()
|
|
23
|
+
rows_written = int((outcome.metrics if outcome else {}).get("rows_written", rows_written) or 0)
|
|
24
|
+
if rows_written <= 0:
|
|
25
|
+
return ()
|
|
26
|
+
return execute_maintenance_plan(
|
|
27
|
+
runner,
|
|
28
|
+
MaintenancePlan(
|
|
29
|
+
target_table=target_table,
|
|
30
|
+
optimize=True,
|
|
31
|
+
zorder_columns=_tuple(extensions.get("zorder_columns")),
|
|
32
|
+
),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _tuple(value: object) -> tuple[str, ...]:
|
|
37
|
+
if value is None:
|
|
38
|
+
return ()
|
|
39
|
+
if isinstance(value, str):
|
|
40
|
+
return (value,)
|
|
41
|
+
if isinstance(value, (list, tuple, set)):
|
|
42
|
+
return tuple(str(item) for item in value)
|
|
43
|
+
return (str(value),)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Prepared-source safety checks for Databricks MERGE writes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.quality import QualityRuleResult, quality_status
|
|
8
|
+
from contractforge_core.runtime import PreparedInput, QueryOne
|
|
9
|
+
from contractforge_core.semantic import SemanticContract
|
|
10
|
+
from contractforge_databricks.sql import quote_identifier, quote_table_name
|
|
11
|
+
|
|
12
|
+
MERGE_WRITE_MODES = {"scd1_upsert", "scd2_historical", "snapshot_soft_delete"}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def validate_merge_source_safety(
|
|
16
|
+
*,
|
|
17
|
+
contract: SemanticContract,
|
|
18
|
+
prepared: PreparedInput,
|
|
19
|
+
query_one: QueryOne | None,
|
|
20
|
+
quality_results: tuple[QualityRuleResult, ...] = (),
|
|
21
|
+
) -> dict[str, Any]:
|
|
22
|
+
"""Validate source key safety before executing Databricks MERGE patterns."""
|
|
23
|
+
if contract.write.mode not in MERGE_WRITE_MODES:
|
|
24
|
+
return {"status": "SKIPPED", "reason": "not_merge_mode"}
|
|
25
|
+
if query_one is None:
|
|
26
|
+
return {"status": "SKIPPED", "reason": "query_one_not_configured"}
|
|
27
|
+
if not contract.write.merge_keys or prepared.rows_read <= 0:
|
|
28
|
+
return {"status": "SKIPPED", "reason": "no_merge_keys_or_rows"}
|
|
29
|
+
|
|
30
|
+
_validate_columns(contract.write.merge_keys, prepared.source_columns)
|
|
31
|
+
null_row = query_one(render_merge_key_nulls_sql(prepared.source_view, contract.write.merge_keys))
|
|
32
|
+
all_null_count = _int_row_value(null_row, "all_keys_null_rows")
|
|
33
|
+
if all_null_count == prepared.rows_read:
|
|
34
|
+
raise ValueError(
|
|
35
|
+
f"mode={contract.write.mode} received {prepared.rows_read} rows with fully null merge_keys. "
|
|
36
|
+
f"keys={list(contract.write.merge_keys)}. Fix the source or add quality_rules.not_null."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if _skip_duplicate_check(contract, quality_results):
|
|
40
|
+
return {"status": "PASSED", "all_null_key_rows": all_null_count, "duplicate_check": "SKIPPED"}
|
|
41
|
+
|
|
42
|
+
duplicate_row = query_one(render_merge_key_duplicates_sql(prepared.source_view, contract.write.merge_keys))
|
|
43
|
+
duplicate_groups = _int_row_value(duplicate_row, "duplicate_key_groups")
|
|
44
|
+
duplicate_rows = _int_row_value(duplicate_row, "duplicate_rows")
|
|
45
|
+
if duplicate_groups:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"mode={contract.write.mode} received {duplicate_rows} duplicate source rows across "
|
|
48
|
+
f"{duplicate_groups} merge_key groups. keys={list(contract.write.merge_keys)}. "
|
|
49
|
+
"Fix the composite key, declare quality_rules.unique_key, or apply transform.deduplicate."
|
|
50
|
+
)
|
|
51
|
+
return {"status": "PASSED", "all_null_key_rows": all_null_count, "duplicate_key_groups": duplicate_groups}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def render_merge_key_nulls_sql(source_view: str, merge_keys: tuple[str, ...]) -> str:
|
|
55
|
+
all_keys_null = " AND ".join(f"{quote_identifier(key)} IS NULL" for key in merge_keys)
|
|
56
|
+
return (
|
|
57
|
+
f"SELECT count(*) AS all_keys_null_rows "
|
|
58
|
+
f"FROM {quote_table_name(source_view)} WHERE {all_keys_null}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def render_merge_key_duplicates_sql(source_view: str, merge_keys: tuple[str, ...]) -> str:
|
|
63
|
+
key_list = ", ".join(quote_identifier(key) for key in merge_keys)
|
|
64
|
+
return (
|
|
65
|
+
"SELECT count(*) AS duplicate_key_groups, coalesce(sum(row_count), 0) AS duplicate_rows "
|
|
66
|
+
f"FROM (SELECT {key_list}, count(*) AS row_count FROM {quote_table_name(source_view)} "
|
|
67
|
+
f"GROUP BY {key_list} HAVING count(*) > 1)"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _validate_columns(keys: tuple[str, ...], source_columns: tuple[str, ...]) -> None:
|
|
72
|
+
if not source_columns:
|
|
73
|
+
return
|
|
74
|
+
missing = [key for key in keys if key not in source_columns]
|
|
75
|
+
if missing:
|
|
76
|
+
raise ValueError(f"merge_keys missing from prepared source columns: {missing}")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _skip_duplicate_check(contract: SemanticContract, quality_results: tuple[QualityRuleResult, ...]) -> bool:
|
|
80
|
+
if quality_status(quality_results) != "PASSED":
|
|
81
|
+
return False
|
|
82
|
+
unique_rules = tuple(rule for rule in contract.quality if rule.rule == "unique_key")
|
|
83
|
+
return any(set(rule.columns) == set(contract.write.merge_keys) for rule in unique_rules)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _int_row_value(row: Any, key: str) -> int:
|
|
87
|
+
if row is None:
|
|
88
|
+
return 0
|
|
89
|
+
if isinstance(row, dict):
|
|
90
|
+
value = row.get(key)
|
|
91
|
+
elif hasattr(row, "asDict"):
|
|
92
|
+
value = row.asDict().get(key)
|
|
93
|
+
else:
|
|
94
|
+
value = getattr(row, key, None)
|
|
95
|
+
try:
|
|
96
|
+
return int(value or 0)
|
|
97
|
+
except (TypeError, ValueError):
|
|
98
|
+
return 0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Runtime contract metadata payload helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.semantic import SemanticContract
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def contract_metadata(contract: SemanticContract, operations: dict[str, Any]) -> dict[str, Any]:
|
|
11
|
+
return {
|
|
12
|
+
"description": operations.get("description"),
|
|
13
|
+
"owner": contract.governance.owner if contract.governance else None,
|
|
14
|
+
"domain": contract.target.domain,
|
|
15
|
+
"tags": operations.get("tags"),
|
|
16
|
+
"sla": operations.get("sla"),
|
|
17
|
+
"runtime_parameters": operations.get("runtime_parameters"),
|
|
18
|
+
"operations": contract.operations.metadata if contract.operations else None,
|
|
19
|
+
"applied_presets": operations.get("applied_presets"),
|
|
20
|
+
"target_schema": contract.target.namespace,
|
|
21
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Runtime write metric collection for Databricks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.metrics import normalize_rows_written
|
|
8
|
+
from contractforge_core.runtime import QueryOne
|
|
9
|
+
from contractforge_core.semantic import SemanticContract
|
|
10
|
+
from contractforge_databricks.metrics import (
|
|
11
|
+
latest_operation_metrics_from_history_row,
|
|
12
|
+
render_delta_history_query,
|
|
13
|
+
resolve_write_metrics,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def collect_write_metrics(
|
|
18
|
+
*,
|
|
19
|
+
contract: SemanticContract,
|
|
20
|
+
target_table: str,
|
|
21
|
+
rows_written: int,
|
|
22
|
+
query_one: QueryOne | None,
|
|
23
|
+
) -> tuple[int, dict[str, Any]]:
|
|
24
|
+
delta_metrics = {}
|
|
25
|
+
if query_one is not None:
|
|
26
|
+
delta_metrics = latest_operation_metrics_from_history_row(
|
|
27
|
+
query_one(render_delta_history_query(target_table=target_table))
|
|
28
|
+
)
|
|
29
|
+
row_metrics, operation_metrics, metrics_source = resolve_write_metrics(contract, rows_written, delta_metrics)
|
|
30
|
+
operation_metrics["metrics_source"] = metrics_source
|
|
31
|
+
normalized = normalize_rows_written(rows_written, row_metrics)
|
|
32
|
+
row_metrics["rows_affected"] = normalized
|
|
33
|
+
operation_metrics["normalizedRowMetrics"] = row_metrics
|
|
34
|
+
return normalized, operation_metrics
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Runtime input models for Databricks ingestion orchestration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Callable
|
|
7
|
+
|
|
8
|
+
from contractforge_core.runtime import PreparedInput
|
|
9
|
+
from contractforge_databricks.runtime.hooks import DatabricksIngestionHooks
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
PreparedViewInput = PreparedInput
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class DatabricksIngestOptions:
|
|
17
|
+
catalog: str = "main"
|
|
18
|
+
schema: str = "ops"
|
|
19
|
+
dry_run: bool = False
|
|
20
|
+
ensure_table: bool = True
|
|
21
|
+
lock_enabled: bool = False
|
|
22
|
+
lock_owner: str | None = None
|
|
23
|
+
idempotency_key: str | None = None
|
|
24
|
+
idempotency_policy: str = "always_run"
|
|
25
|
+
quality_action: str = "fail"
|
|
26
|
+
run_id: str | None = None
|
|
27
|
+
run_id_factory: Callable[[], str] | None = None
|
|
28
|
+
runtime_metadata: dict[str, Any] | None = None
|
|
29
|
+
target_schema: dict[str, str] | None = None
|
|
30
|
+
allow_review_required: bool = False
|
|
31
|
+
raise_on_failure: bool = True
|
|
32
|
+
hooks: DatabricksIngestionHooks | None = None
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Databricks runtime option resolution from core contract semantics."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import replace
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from contractforge_core.semantic import SemanticContract
|
|
9
|
+
from contractforge_databricks.contract_extensions import databricks_extensions
|
|
10
|
+
from contractforge_databricks.runtime.hooks import DatabricksIngestionHooks
|
|
11
|
+
from contractforge_databricks.runtime.models import DatabricksIngestOptions
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def effective_ingest_options(contract: SemanticContract, options: DatabricksIngestOptions) -> DatabricksIngestOptions:
|
|
15
|
+
metadata = contract.operations.metadata if contract.operations and contract.operations.metadata else {}
|
|
16
|
+
extensions = databricks_extensions(contract)
|
|
17
|
+
updates: dict[str, Any] = {}
|
|
18
|
+
if options.idempotency_key is None and metadata.get("idempotency_key"):
|
|
19
|
+
updates["idempotency_key"] = str(metadata["idempotency_key"])
|
|
20
|
+
if options.idempotency_policy == "always_run" and metadata.get("idempotency_policy"):
|
|
21
|
+
updates["idempotency_policy"] = str(metadata["idempotency_policy"])
|
|
22
|
+
if options.quality_action == "fail" and metadata.get("on_quality_fail"):
|
|
23
|
+
updates["quality_action"] = str(metadata["on_quality_fail"])
|
|
24
|
+
if options.hooks is None and extensions.get("hooks") is not None:
|
|
25
|
+
hooks = extensions["hooks"]
|
|
26
|
+
if isinstance(hooks, dict):
|
|
27
|
+
hooks = DatabricksIngestionHooks(**hooks)
|
|
28
|
+
if not isinstance(hooks, DatabricksIngestionHooks):
|
|
29
|
+
raise ValueError("extensions.databricks.hooks must be DatabricksIngestionHooks")
|
|
30
|
+
updates["hooks"] = hooks
|
|
31
|
+
if not options.lock_enabled and extensions.get("lock_enabled"):
|
|
32
|
+
updates["lock_enabled"] = True
|
|
33
|
+
return replace(options, **updates) if updates else options
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Private helpers for Databricks runtime orchestration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
from typing import NamedTuple
|
|
7
|
+
|
|
8
|
+
from contractforge_core.contracts import semantic_contract_from_mapping
|
|
9
|
+
from contractforge_core.errors import raise_for_failure_result
|
|
10
|
+
from contractforge_core.quality import QualityRuleResult, quality_policy_status
|
|
11
|
+
from contractforge_core.runtime import PreparedInput, QueryOne
|
|
12
|
+
from contractforge_core.semantic import SemanticContract
|
|
13
|
+
from contractforge_databricks.contract_extensions import normalize_databricks_contract
|
|
14
|
+
from contractforge_databricks.evidence import EvidenceWriter
|
|
15
|
+
from contractforge_databricks.execution import SqlRunner
|
|
16
|
+
from contractforge_databricks.rendering.names import target_full_name
|
|
17
|
+
from contractforge_databricks.runtime.dry_run import finalize_dry_run
|
|
18
|
+
from contractforge_databricks.runtime.errors import error_log_payload
|
|
19
|
+
from contractforge_databricks.runtime.finalization import finalize_ingest
|
|
20
|
+
from contractforge_databricks.runtime.models import DatabricksIngestOptions
|
|
21
|
+
from contractforge_databricks.runtime.options import effective_ingest_options
|
|
22
|
+
from contractforge_databricks.runtime.success import finalize_success
|
|
23
|
+
from contractforge_databricks.runtime.utils import resolve_run_id, utc_now_str
|
|
24
|
+
from contractforge_databricks.runtime.write_flow import WriteFlowResult
|
|
25
|
+
from contractforge_databricks.security import exception_message
|
|
26
|
+
from contractforge_databricks.state import StateWriter
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RuntimeContext(NamedTuple):
|
|
30
|
+
semantic: SemanticContract
|
|
31
|
+
opts: DatabricksIngestOptions
|
|
32
|
+
target: str
|
|
33
|
+
run_id: str
|
|
34
|
+
started: str
|
|
35
|
+
evidence: EvidenceWriter
|
|
36
|
+
state: StateWriter
|
|
37
|
+
quality_status_value: str
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RuntimeProgress:
|
|
41
|
+
__slots__ = ("prepared", "schema_changes", "governance_results")
|
|
42
|
+
|
|
43
|
+
def __init__(self, prepared: PreparedInput) -> None:
|
|
44
|
+
self.prepared = prepared
|
|
45
|
+
self.schema_changes: dict[str, Any] = {}
|
|
46
|
+
self.governance_results: dict[str, Any] = {}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def build_runtime_context(
|
|
50
|
+
contract: dict[str, Any] | SemanticContract,
|
|
51
|
+
*,
|
|
52
|
+
runner: SqlRunner,
|
|
53
|
+
options: DatabricksIngestOptions | None,
|
|
54
|
+
query_one: QueryOne | None,
|
|
55
|
+
quality_results: tuple[QualityRuleResult, ...],
|
|
56
|
+
) -> RuntimeContext:
|
|
57
|
+
base_opts = options or DatabricksIngestOptions()
|
|
58
|
+
semantic = contract if isinstance(contract, SemanticContract) else semantic_contract_from_mapping(normalize_databricks_contract(contract))
|
|
59
|
+
opts = effective_ingest_options(semantic, base_opts)
|
|
60
|
+
target = target_full_name(semantic)
|
|
61
|
+
return RuntimeContext(
|
|
62
|
+
semantic=semantic,
|
|
63
|
+
opts=opts,
|
|
64
|
+
target=target,
|
|
65
|
+
run_id=resolve_run_id(opts.run_id, opts.run_id_factory),
|
|
66
|
+
started=utc_now_str(),
|
|
67
|
+
evidence=EvidenceWriter(runner, catalog=opts.catalog, schema=opts.schema),
|
|
68
|
+
state=StateWriter(runner, catalog=opts.catalog, schema=opts.schema, query_one=query_one),
|
|
69
|
+
quality_status_value=quality_policy_status(quality_results, on_quality_fail=opts.quality_action),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def complete_result(ctx: RuntimeContext, result: dict[str, Any]) -> dict[str, Any]:
|
|
74
|
+
if ctx.opts.hooks and ctx.opts.hooks.after_finalize:
|
|
75
|
+
ctx.opts.hooks.after_finalize(ctx.semantic, result)
|
|
76
|
+
if ctx.opts.raise_on_failure:
|
|
77
|
+
raise_for_failure_result(result)
|
|
78
|
+
return result
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def finalize_skipped_result(
|
|
82
|
+
ctx: RuntimeContext,
|
|
83
|
+
progress: RuntimeProgress,
|
|
84
|
+
*,
|
|
85
|
+
quality_results: tuple[QualityRuleResult, ...],
|
|
86
|
+
skipped_by_run_id: object,
|
|
87
|
+
) -> dict[str, Any]:
|
|
88
|
+
return finalize_ingest(
|
|
89
|
+
ctx.evidence,
|
|
90
|
+
ctx.state,
|
|
91
|
+
ctx.semantic,
|
|
92
|
+
progress.prepared,
|
|
93
|
+
ctx.opts,
|
|
94
|
+
ctx.run_id,
|
|
95
|
+
ctx.target,
|
|
96
|
+
"SKIPPED",
|
|
97
|
+
ctx.started,
|
|
98
|
+
rows_written=0,
|
|
99
|
+
quality_status_value="SKIPPED",
|
|
100
|
+
quality_results=quality_results,
|
|
101
|
+
skip_reason="idempotency_key_already_succeeded",
|
|
102
|
+
skipped_by_run_id=str(skipped_by_run_id) if skipped_by_run_id else None,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def finalize_dry_run_result(ctx: RuntimeContext, progress: RuntimeProgress) -> dict[str, Any]:
|
|
107
|
+
return finalize_dry_run(
|
|
108
|
+
evidence=ctx.evidence,
|
|
109
|
+
state=ctx.state,
|
|
110
|
+
contract=ctx.semantic,
|
|
111
|
+
prepared=progress.prepared,
|
|
112
|
+
opts=ctx.opts,
|
|
113
|
+
run_id=ctx.run_id,
|
|
114
|
+
target=ctx.target,
|
|
115
|
+
started=ctx.started,
|
|
116
|
+
quality_status_value=ctx.quality_status_value,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def finalize_success_result(
|
|
121
|
+
ctx: RuntimeContext,
|
|
122
|
+
progress: RuntimeProgress,
|
|
123
|
+
*,
|
|
124
|
+
write_flow: WriteFlowResult,
|
|
125
|
+
quality_results: tuple[QualityRuleResult, ...],
|
|
126
|
+
query_one: QueryOne | None,
|
|
127
|
+
) -> dict[str, Any]:
|
|
128
|
+
return finalize_success(
|
|
129
|
+
evidence=ctx.evidence,
|
|
130
|
+
state=ctx.state,
|
|
131
|
+
contract=ctx.semantic,
|
|
132
|
+
prepared=progress.prepared,
|
|
133
|
+
opts=ctx.opts,
|
|
134
|
+
run_id=ctx.run_id,
|
|
135
|
+
target=ctx.target,
|
|
136
|
+
started=ctx.started,
|
|
137
|
+
outcome=write_flow.outcome,
|
|
138
|
+
logical_rows_written=write_flow.logical_rows_written,
|
|
139
|
+
quality_status_value=ctx.quality_status_value,
|
|
140
|
+
quality_results=quality_results,
|
|
141
|
+
schema_changes=progress.schema_changes,
|
|
142
|
+
governance_results=progress.governance_results,
|
|
143
|
+
write_started_at=write_flow.write_started_at,
|
|
144
|
+
write_finished_at=write_flow.write_finished_at,
|
|
145
|
+
stage_durations=write_flow.stage_durations,
|
|
146
|
+
query_one=query_one,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def finalize_failure_result(
|
|
151
|
+
ctx: RuntimeContext,
|
|
152
|
+
progress: RuntimeProgress,
|
|
153
|
+
exc: Exception,
|
|
154
|
+
*,
|
|
155
|
+
quality_results: tuple[QualityRuleResult, ...],
|
|
156
|
+
) -> dict[str, Any]:
|
|
157
|
+
error_message = exception_message(exc)
|
|
158
|
+
if not ctx.opts.dry_run:
|
|
159
|
+
ctx.evidence.write_error_log(
|
|
160
|
+
error_log_payload(
|
|
161
|
+
exc,
|
|
162
|
+
run_id=ctx.run_id,
|
|
163
|
+
target=ctx.target,
|
|
164
|
+
source_table=progress.prepared.source_name or progress.prepared.source_view,
|
|
165
|
+
mode=ctx.semantic.write.mode,
|
|
166
|
+
runtime_metadata=ctx.opts.runtime_metadata,
|
|
167
|
+
)
|
|
168
|
+
)
|
|
169
|
+
return finalize_ingest(
|
|
170
|
+
ctx.evidence,
|
|
171
|
+
ctx.state,
|
|
172
|
+
ctx.semantic,
|
|
173
|
+
progress.prepared,
|
|
174
|
+
ctx.opts,
|
|
175
|
+
ctx.run_id,
|
|
176
|
+
ctx.target,
|
|
177
|
+
"FAILED",
|
|
178
|
+
ctx.started,
|
|
179
|
+
rows_written=0,
|
|
180
|
+
quality_status_value=ctx.quality_status_value,
|
|
181
|
+
quality_results=quality_results,
|
|
182
|
+
error_message=error_message,
|
|
183
|
+
schema_changes=progress.schema_changes,
|
|
184
|
+
governance_results=progress.governance_results,
|
|
185
|
+
)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Databricks runtime orchestration over prepared source views."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from contractforge_core.quality import QualityRuleResult, quality_status
|
|
7
|
+
from contractforge_core.runtime import PreparedInput, QueryOne
|
|
8
|
+
from contractforge_core.semantic import SemanticContract
|
|
9
|
+
from contractforge_databricks.adapter import DatabricksAdapter
|
|
10
|
+
from contractforge_databricks.execution import SqlRunner
|
|
11
|
+
from contractforge_databricks.rendering.names import target_full_name
|
|
12
|
+
from contractforge_databricks.runtime.hooks import apply_prepared_hook
|
|
13
|
+
from contractforge_databricks.runtime.models import DatabricksIngestOptions
|
|
14
|
+
from contractforge_databricks.runtime.orchestration_context import (
|
|
15
|
+
RuntimeContext,
|
|
16
|
+
RuntimeProgress,
|
|
17
|
+
build_runtime_context,
|
|
18
|
+
complete_result,
|
|
19
|
+
finalize_dry_run_result,
|
|
20
|
+
finalize_failure_result,
|
|
21
|
+
finalize_skipped_result,
|
|
22
|
+
finalize_success_result,
|
|
23
|
+
)
|
|
24
|
+
from contractforge_databricks.runtime.write_flow import execute_runtime_write_flow
|
|
25
|
+
from contractforge_databricks.state import render_find_idempotent_run_sql
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def ingest_databricks_contract(
|
|
29
|
+
contract: dict[str, Any] | SemanticContract,
|
|
30
|
+
*,
|
|
31
|
+
runner: SqlRunner,
|
|
32
|
+
prepared: PreparedInput,
|
|
33
|
+
options: DatabricksIngestOptions | None = None,
|
|
34
|
+
query_one: QueryOne | None = None,
|
|
35
|
+
quality_results: tuple[QualityRuleResult, ...] = (),
|
|
36
|
+
) -> dict[str, Any]:
|
|
37
|
+
"""Execute one Databricks contract over an already prepared source view."""
|
|
38
|
+
ctx = build_runtime_context(
|
|
39
|
+
contract,
|
|
40
|
+
runner=runner,
|
|
41
|
+
options=options,
|
|
42
|
+
query_one=query_one,
|
|
43
|
+
quality_results=quality_results,
|
|
44
|
+
)
|
|
45
|
+
progress = RuntimeProgress(prepared=prepared)
|
|
46
|
+
raw_quality_status = quality_status(quality_results)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
_validate_planning(ctx.semantic, ctx.opts)
|
|
50
|
+
progress.prepared = _apply_after_prepare(ctx, progress.prepared)
|
|
51
|
+
skipped = _idempotency_skip(ctx.target, ctx.opts, query_one)
|
|
52
|
+
if skipped:
|
|
53
|
+
return complete_result(
|
|
54
|
+
ctx,
|
|
55
|
+
finalize_skipped_result(
|
|
56
|
+
ctx,
|
|
57
|
+
progress,
|
|
58
|
+
quality_results=quality_results,
|
|
59
|
+
skipped_by_run_id=skipped.get("run_id"),
|
|
60
|
+
),
|
|
61
|
+
)
|
|
62
|
+
_raise_for_quality_failure(raw_quality_status, ctx.opts)
|
|
63
|
+
_acquire_lock(ctx)
|
|
64
|
+
progress.prepared = _apply_before_write(ctx, progress.prepared)
|
|
65
|
+
if ctx.opts.dry_run:
|
|
66
|
+
return complete_result(ctx, finalize_dry_run_result(ctx, progress))
|
|
67
|
+
write_flow = execute_runtime_write_flow(
|
|
68
|
+
runner=runner,
|
|
69
|
+
evidence=ctx.evidence,
|
|
70
|
+
contract=ctx.semantic,
|
|
71
|
+
prepared=progress.prepared,
|
|
72
|
+
opts=ctx.opts,
|
|
73
|
+
run_id=ctx.run_id,
|
|
74
|
+
target=ctx.target,
|
|
75
|
+
query_one=query_one,
|
|
76
|
+
quality_results=quality_results,
|
|
77
|
+
)
|
|
78
|
+
progress.schema_changes = write_flow.schema_changes
|
|
79
|
+
progress.governance_results = write_flow.governance_results
|
|
80
|
+
if ctx.opts.hooks and ctx.opts.hooks.after_write:
|
|
81
|
+
ctx.opts.hooks.after_write(ctx.semantic, progress.prepared, write_flow.outcome)
|
|
82
|
+
return complete_result(
|
|
83
|
+
ctx,
|
|
84
|
+
finalize_success_result(
|
|
85
|
+
ctx,
|
|
86
|
+
progress,
|
|
87
|
+
write_flow=write_flow,
|
|
88
|
+
quality_results=quality_results,
|
|
89
|
+
query_one=query_one,
|
|
90
|
+
),
|
|
91
|
+
)
|
|
92
|
+
except Exception as exc:
|
|
93
|
+
return complete_result(ctx, finalize_failure_result(ctx, progress, exc, quality_results=quality_results))
|
|
94
|
+
finally:
|
|
95
|
+
_release_lock(ctx)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _apply_after_prepare(ctx: RuntimeContext, prepared: PreparedInput) -> PreparedInput:
|
|
99
|
+
return apply_prepared_hook(ctx.opts.hooks.after_prepare if ctx.opts.hooks else None, ctx.semantic, prepared)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _apply_before_write(ctx: RuntimeContext, prepared: PreparedInput) -> PreparedInput:
|
|
103
|
+
return apply_prepared_hook(ctx.opts.hooks.before_write if ctx.opts.hooks else None, ctx.semantic, prepared)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _raise_for_quality_failure(raw_quality_status: str, opts: DatabricksIngestOptions) -> None:
|
|
107
|
+
if raw_quality_status == "FAILED" and opts.quality_action == "fail":
|
|
108
|
+
raise ValueError("Quality gates failed before Databricks write")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _acquire_lock(ctx: RuntimeContext) -> None:
|
|
112
|
+
if ctx.opts.lock_enabled and not ctx.opts.dry_run:
|
|
113
|
+
ctx.state.acquire_lock(target_table=ctx.target, run_id=ctx.run_id, owner=ctx.opts.lock_owner)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _release_lock(ctx: RuntimeContext) -> None:
|
|
117
|
+
if ctx.opts.lock_enabled and not ctx.opts.dry_run:
|
|
118
|
+
ctx.state.release_lock(target_table=ctx.target, run_id=ctx.run_id)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _validate_planning(contract: SemanticContract, opts: DatabricksIngestOptions) -> None:
|
|
122
|
+
runtime = dict(opts.runtime_metadata or {})
|
|
123
|
+
result = DatabricksAdapter.from_evidence(
|
|
124
|
+
target_table=target_full_name(contract),
|
|
125
|
+
runtime_type=str(runtime.get("runtime_type") or "serverless"),
|
|
126
|
+
spark_version=str(runtime["spark_version"]) if runtime.get("spark_version") else None,
|
|
127
|
+
).plan(contract)
|
|
128
|
+
if result.status == "UNSUPPORTED" or (result.status == "REVIEW_REQUIRED" and not opts.allow_review_required):
|
|
129
|
+
blockers = "; ".join(blocker.message for blocker in result.blockers)
|
|
130
|
+
raise ValueError(f"Databricks planning status {result.status}: {blockers}")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _idempotency_skip(target: str, opts: DatabricksIngestOptions, query_one: QueryOne | None) -> dict[str, Any] | None:
|
|
134
|
+
if not opts.idempotency_key or opts.idempotency_policy not in {"skip_if_success", "rerun_if_failed", "fail_if_success"}:
|
|
135
|
+
return None
|
|
136
|
+
statement = render_find_idempotent_run_sql(
|
|
137
|
+
target_table=target,
|
|
138
|
+
idempotency_key=opts.idempotency_key,
|
|
139
|
+
status="SUCCESS",
|
|
140
|
+
runs_table=f"{opts.catalog}.{opts.schema}.ctrl_ingestion_runs",
|
|
141
|
+
)
|
|
142
|
+
previous = query_one(statement) if query_one else None
|
|
143
|
+
if not previous:
|
|
144
|
+
return None
|
|
145
|
+
if opts.idempotency_policy == "fail_if_success":
|
|
146
|
+
raise ValueError(f"idempotency_key={opts.idempotency_key!r} already succeeded")
|
|
147
|
+
return previous
|