contractforge-databricks 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contractforge_databricks/__init__.py +172 -0
- contractforge_databricks/adapter.py +69 -0
- contractforge_databricks/annotations/__init__.py +10 -0
- contractforge_databricks/annotations/application.py +52 -0
- contractforge_databricks/annotations/audit.py +49 -0
- contractforge_databricks/annotations/sql.py +142 -0
- contractforge_databricks/api.py +65 -0
- contractforge_databricks/bundles/__init__.py +9 -0
- contractforge_databricks/bundles/assets.py +47 -0
- contractforge_databricks/bundles/project.py +213 -0
- contractforge_databricks/bundles/project_config.py +133 -0
- contractforge_databricks/capabilities/__init__.py +17 -0
- contractforge_databricks/capabilities/builders.py +43 -0
- contractforge_databricks/capabilities/evaluate.py +162 -0
- contractforge_databricks/capabilities/mapping.py +36 -0
- contractforge_databricks/capabilities/models.py +44 -0
- contractforge_databricks/capabilities/runtime.py +111 -0
- contractforge_databricks/capabilities/uc.py +47 -0
- contractforge_databricks/cli.py +196 -0
- contractforge_databricks/cli_deploy.py +98 -0
- contractforge_databricks/cli_governance.py +142 -0
- contractforge_databricks/cli_io.py +91 -0
- contractforge_databricks/cli_maintenance.py +69 -0
- contractforge_databricks/coercion.py +31 -0
- contractforge_databricks/contract_extensions.py +70 -0
- contractforge_databricks/cost/__init__.py +11 -0
- contractforge_databricks/cost/model.py +22 -0
- contractforge_databricks/cost/report.py +65 -0
- contractforge_databricks/cost/sql.py +136 -0
- contractforge_databricks/dashboards/__init__.py +15 -0
- contractforge_databricks/dashboards/control_tables.py +150 -0
- contractforge_databricks/diagnostics/__init__.py +7 -0
- contractforge_databricks/diagnostics/explain.py +40 -0
- contractforge_databricks/environment.py +53 -0
- contractforge_databricks/evidence/__init__.py +98 -0
- contractforge_databricks/evidence/ddl.py +35 -0
- contractforge_databricks/evidence/governance_log.py +175 -0
- contractforge_databricks/evidence/helpers.py +29 -0
- contractforge_databricks/evidence/ops_log.py +210 -0
- contractforge_databricks/evidence/records.py +27 -0
- contractforge_databricks/evidence/run_log.py +74 -0
- contractforge_databricks/evidence/schemas.py +7 -0
- contractforge_databricks/evidence/sql.py +144 -0
- contractforge_databricks/evidence/tables.py +20 -0
- contractforge_databricks/evidence/writer.py +118 -0
- contractforge_databricks/execution/__init__.py +70 -0
- contractforge_databricks/execution/delta_basic.py +57 -0
- contractforge_databricks/execution/hash_diff.py +126 -0
- contractforge_databricks/execution/hash_diff_latest.py +142 -0
- contractforge_databricks/execution/replace_partitions.py +40 -0
- contractforge_databricks/execution/results.py +5 -0
- contractforge_databricks/execution/retry.py +36 -0
- contractforge_databricks/execution/scd2.py +213 -0
- contractforge_databricks/execution/scd2_deletes.py +65 -0
- contractforge_databricks/execution/scd2_late.py +30 -0
- contractforge_databricks/execution/snapshot.py +77 -0
- contractforge_databricks/execution/sql_merge.py +85 -0
- contractforge_databricks/execution/tables.py +98 -0
- contractforge_databricks/execution/windows.py +58 -0
- contractforge_databricks/governance/__init__.py +30 -0
- contractforge_databricks/governance/access.py +185 -0
- contractforge_databricks/governance/application.py +93 -0
- contractforge_databricks/governance/drift.py +49 -0
- contractforge_databricks/governance/runtime.py +60 -0
- contractforge_databricks/governance/sql.py +31 -0
- contractforge_databricks/governance/validation.py +135 -0
- contractforge_databricks/lakeflow/__init__.py +21 -0
- contractforge_databricks/lakeflow/compatibility.py +194 -0
- contractforge_databricks/lakeflow/rendering.py +175 -0
- contractforge_databricks/lineage/__init__.py +7 -0
- contractforge_databricks/lineage/openlineage.py +182 -0
- contractforge_databricks/maintenance/__init__.py +27 -0
- contractforge_databricks/maintenance/retention.py +90 -0
- contractforge_databricks/maintenance/sql.py +68 -0
- contractforge_databricks/metrics/__init__.py +19 -0
- contractforge_databricks/metrics/history.py +21 -0
- contractforge_databricks/metrics/write.py +63 -0
- contractforge_databricks/operations/__init__.py +4 -0
- contractforge_databricks/operations/application.py +38 -0
- contractforge_databricks/operations/sql.py +95 -0
- contractforge_databricks/parity/__init__.py +18 -0
- contractforge_databricks/parity/catalog.py +59 -0
- contractforge_databricks/parity/models.py +7 -0
- contractforge_databricks/parity/scenarios.py +111 -0
- contractforge_databricks/partitioning/__init__.py +3 -0
- contractforge_databricks/partitioning/predicates.py +28 -0
- contractforge_databricks/preparation/__init__.py +47 -0
- contractforge_databricks/preparation/deduplicate.py +87 -0
- contractforge_databricks/preparation/encoding.py +37 -0
- contractforge_databricks/preparation/hashing.py +18 -0
- contractforge_databricks/preparation/pyspark.py +178 -0
- contractforge_databricks/preparation/pyspark_staging.py +70 -0
- contractforge_databricks/preparation/shape.py +209 -0
- contractforge_databricks/preparation/shape_validation.py +94 -0
- contractforge_databricks/preparation/staging.py +17 -0
- contractforge_databricks/preparation/zip_arrays.py +51 -0
- contractforge_databricks/presets/__init__.py +3 -0
- contractforge_databricks/presets/base.py +24 -0
- contractforge_databricks/presets/bronze.py +57 -0
- contractforge_databricks/presets/catalog.py +22 -0
- contractforge_databricks/presets/core.py +134 -0
- contractforge_databricks/presets/gold.py +62 -0
- contractforge_databricks/presets/modifiers.py +51 -0
- contractforge_databricks/presets/runtime.py +22 -0
- contractforge_databricks/presets/silver.py +101 -0
- contractforge_databricks/presets/write_engine.py +57 -0
- contractforge_databricks/quality/__init__.py +41 -0
- contractforge_databricks/quality/evaluation.py +178 -0
- contractforge_databricks/quality/persistence.py +81 -0
- contractforge_databricks/quality/registry.py +134 -0
- contractforge_databricks/quality/results.py +17 -0
- contractforge_databricks/quality/sql.py +113 -0
- contractforge_databricks/rendering/__init__.py +11 -0
- contractforge_databricks/rendering/bundle.py +93 -0
- contractforge_databricks/rendering/markdown.py +50 -0
- contractforge_databricks/rendering/names.py +56 -0
- contractforge_databricks/results.py +15 -0
- contractforge_databricks/runtime/__init__.py +101 -0
- contractforge_databricks/runtime/available_now.py +147 -0
- contractforge_databricks/runtime/bundles.py +211 -0
- contractforge_databricks/runtime/cache.py +20 -0
- contractforge_databricks/runtime/control_tables.py +19 -0
- contractforge_databricks/runtime/deploy.py +197 -0
- contractforge_databricks/runtime/detection.py +114 -0
- contractforge_databricks/runtime/dry_run.py +46 -0
- contractforge_databricks/runtime/errors.py +54 -0
- contractforge_databricks/runtime/file_selection.py +109 -0
- contractforge_databricks/runtime/finalization.py +168 -0
- contractforge_databricks/runtime/governance.py +37 -0
- contractforge_databricks/runtime/hooks.py +45 -0
- contractforge_databricks/runtime/http_file.py +37 -0
- contractforge_databricks/runtime/http_retry.py +15 -0
- contractforge_databricks/runtime/http_safety.py +9 -0
- contractforge_databricks/runtime/json_materialization.py +97 -0
- contractforge_databricks/runtime/lineage.py +164 -0
- contractforge_databricks/runtime/maintenance.py +43 -0
- contractforge_databricks/runtime/merge_validation.py +98 -0
- contractforge_databricks/runtime/metadata.py +21 -0
- contractforge_databricks/runtime/metrics.py +34 -0
- contractforge_databricks/runtime/models.py +32 -0
- contractforge_databricks/runtime/options.py +33 -0
- contractforge_databricks/runtime/orchestration_context.py +185 -0
- contractforge_databricks/runtime/orchestrator.py +147 -0
- contractforge_databricks/runtime/partitioning.py +93 -0
- contractforge_databricks/runtime/quality_quarantine.py +92 -0
- contractforge_databricks/runtime/rest_api.py +46 -0
- contractforge_databricks/runtime/rest_auth.py +21 -0
- contractforge_databricks/runtime/rest_pagination.py +21 -0
- contractforge_databricks/runtime/run_payload.py +177 -0
- contractforge_databricks/runtime/schema.py +106 -0
- contractforge_databricks/runtime/source_metadata.py +30 -0
- contractforge_databricks/runtime/source_registry.py +43 -0
- contractforge_databricks/runtime/source_schema.py +24 -0
- contractforge_databricks/runtime/sources.py +208 -0
- contractforge_databricks/runtime/spark.py +183 -0
- contractforge_databricks/runtime/spark_defaults.py +35 -0
- contractforge_databricks/runtime/storage_auth.py +132 -0
- contractforge_databricks/runtime/streaming.py +131 -0
- contractforge_databricks/runtime/success.py +104 -0
- contractforge_databricks/runtime/utils.py +52 -0
- contractforge_databricks/runtime/watermark.py +71 -0
- contractforge_databricks/runtime/windows.py +184 -0
- contractforge_databricks/runtime/write.py +66 -0
- contractforge_databricks/runtime/write_flow.py +146 -0
- contractforge_databricks/runtime/write_strategy.py +40 -0
- contractforge_databricks/schema/__init__.py +21 -0
- contractforge_databricks/schema/diff.py +11 -0
- contractforge_databricks/schema/policy.py +33 -0
- contractforge_databricks/schema/sync.py +23 -0
- contractforge_databricks/security/__init__.py +21 -0
- contractforge_databricks/security/errors.py +5 -0
- contractforge_databricks/security/redaction.py +5 -0
- contractforge_databricks/security/secrets.py +114 -0
- contractforge_databricks/security/source_policy.py +17 -0
- contractforge_databricks/shapes/__init__.py +3 -0
- contractforge_databricks/shapes/sql.py +123 -0
- contractforge_databricks/sources/__init__.py +67 -0
- contractforge_databricks/sources/artifacts.py +100 -0
- contractforge_databricks/sources/autoloader.py +48 -0
- contractforge_databricks/sources/bounded_streams.py +44 -0
- contractforge_databricks/sources/classification.py +115 -0
- contractforge_databricks/sources/delta_share.py +21 -0
- contractforge_databricks/sources/files.py +48 -0
- contractforge_databricks/sources/http_file.py +46 -0
- contractforge_databricks/sources/interpret.py +76 -0
- contractforge_databricks/sources/jdbc.py +32 -0
- contractforge_databricks/sources/metadata.py +18 -0
- contractforge_databricks/sources/native_passthrough.py +33 -0
- contractforge_databricks/sources/rds_iam.py +15 -0
- contractforge_databricks/sources/rds_iam_runtime.py +191 -0
- contractforge_databricks/sources/rest_api.py +33 -0
- contractforge_databricks/sources/support.py +50 -0
- contractforge_databricks/sources/table_refs.py +65 -0
- contractforge_databricks/sql/__init__.py +4 -0
- contractforge_databricks/sql/identifiers.py +17 -0
- contractforge_databricks/sql/literals.py +36 -0
- contractforge_databricks/state/__init__.py +39 -0
- contractforge_databricks/state/ddl.py +24 -0
- contractforge_databricks/state/migrations.py +146 -0
- contractforge_databricks/state/queries.py +149 -0
- contractforge_databricks/state/sql.py +116 -0
- contractforge_databricks/state/tables.py +9 -0
- contractforge_databricks/state/writer.py +83 -0
- contractforge_databricks/templates/__init__.py +15 -0
- contractforge_databricks/templates/catalog.py +205 -0
- contractforge_databricks/templates/catalog_parity.py +85 -0
- contractforge_databricks/templates/core.py +83 -0
- contractforge_databricks/templates/enrichment.py +175 -0
- contractforge_databricks/transforms/__init__.py +3 -0
- contractforge_databricks/transforms/sql.py +118 -0
- contractforge_databricks/watermark/__init__.py +6 -0
- contractforge_databricks/watermark/sql.py +91 -0
- contractforge_databricks/write_modes/__init__.py +20 -0
- contractforge_databricks/write_modes/registry.py +44 -0
- contractforge_databricks/write_modes/sql.py +33 -0
- contractforge_databricks/write_modes/strategy.py +192 -0
- contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
- contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
- contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
- contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Target-latest selection helpers for SCD1 hash-diff writes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from contractforge_core.runtime import QueryOne
|
|
9
|
+
from contractforge_core.semantic import SemanticContract
|
|
10
|
+
from contractforge_databricks.sql import quote_identifier, quote_table_name
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class HashDiffLatestSelection:
|
|
15
|
+
order_by: str | None = None
|
|
16
|
+
reason: str = "none"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def resolve_hash_diff_latest_selection(
|
|
20
|
+
contract: SemanticContract,
|
|
21
|
+
target_schema: dict[str, str] | None = None,
|
|
22
|
+
) -> HashDiffLatestSelection:
|
|
23
|
+
explicit = _explicit_order_by(contract)
|
|
24
|
+
if explicit:
|
|
25
|
+
return HashDiffLatestSelection(explicit, "contract")
|
|
26
|
+
columns = set(target_schema or {})
|
|
27
|
+
if "ingestion_sequence" in columns:
|
|
28
|
+
return HashDiffLatestSelection("ingestion_sequence DESC NULLS LAST", "ingestion_sequence")
|
|
29
|
+
if "ingestion_ts_utc" in columns:
|
|
30
|
+
if "__run_id" in columns:
|
|
31
|
+
return HashDiffLatestSelection(
|
|
32
|
+
"ingestion_ts_utc DESC NULLS LAST, __run_id DESC NULLS LAST",
|
|
33
|
+
"ingestion_ts_utc",
|
|
34
|
+
)
|
|
35
|
+
return HashDiffLatestSelection("ingestion_ts_utc DESC NULLS LAST", "ingestion_ts_utc")
|
|
36
|
+
if "source_loaded_at_utc" in columns:
|
|
37
|
+
return HashDiffLatestSelection("source_loaded_at_utc DESC NULLS LAST", "source_loaded_at_utc")
|
|
38
|
+
return HashDiffLatestSelection()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def validate_hash_diff_target_latest(
|
|
42
|
+
*,
|
|
43
|
+
query_one: QueryOne | None,
|
|
44
|
+
target_table: str,
|
|
45
|
+
merge_keys: tuple[str, ...],
|
|
46
|
+
selection: HashDiffLatestSelection,
|
|
47
|
+
) -> None:
|
|
48
|
+
if query_one is None:
|
|
49
|
+
return
|
|
50
|
+
if selection.reason == "ingestion_ts_utc":
|
|
51
|
+
ambiguous = query_one(render_null_ingestion_ts_ambiguity_sql(target_table, merge_keys))
|
|
52
|
+
if _count(ambiguous):
|
|
53
|
+
raise ValueError(
|
|
54
|
+
"scd1_hash_diff found multiple target versions per key with null ingestion_ts_utc. "
|
|
55
|
+
"Provide transform.deduplicate.order_by for history migration or rewrite the target with "
|
|
56
|
+
"ingestion_ts_utc/ingestion_sequence."
|
|
57
|
+
)
|
|
58
|
+
if selection.order_by is None:
|
|
59
|
+
duplicate = query_one(render_hash_diff_duplicate_target_keys_sql(target_table, merge_keys))
|
|
60
|
+
if _count(duplicate):
|
|
61
|
+
raise ValueError(
|
|
62
|
+
"scd1_hash_diff found multiple target versions per key, but no deterministic ordering exists "
|
|
63
|
+
"to select the latest state. Provide transform.deduplicate.order_by or rewrite the target with "
|
|
64
|
+
"ingestion_ts_utc/ingestion_sequence."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def render_null_ingestion_ts_ambiguity_sql(target_table: str, hash_keys: tuple[str, ...]) -> str:
|
|
69
|
+
keys = ", ".join(quote_identifier(key) for key in hash_keys)
|
|
70
|
+
return "\n".join(
|
|
71
|
+
[
|
|
72
|
+
"SELECT COUNT(*) AS ambiguous_key_count",
|
|
73
|
+
"FROM (",
|
|
74
|
+
f" SELECT {keys}, COUNT(*) AS __cnt, MAX({quote_identifier('ingestion_ts_utc')}) AS __max_ingestion_ts_utc",
|
|
75
|
+
f" FROM {quote_table_name(target_table)}",
|
|
76
|
+
f" GROUP BY {keys}",
|
|
77
|
+
") target_versions",
|
|
78
|
+
"WHERE __cnt > 1 AND __max_ingestion_ts_utc IS NULL",
|
|
79
|
+
]
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def render_hash_diff_duplicate_target_keys_sql(target_table: str, hash_keys: tuple[str, ...]) -> str:
|
|
84
|
+
keys = ", ".join(quote_identifier(key) for key in hash_keys)
|
|
85
|
+
return "\n".join(
|
|
86
|
+
[
|
|
87
|
+
"SELECT COUNT(*) AS duplicate_key_count",
|
|
88
|
+
"FROM (",
|
|
89
|
+
f" SELECT {keys}, COUNT(*) AS __cnt",
|
|
90
|
+
f" FROM {quote_table_name(target_table)}",
|
|
91
|
+
f" GROUP BY {keys}",
|
|
92
|
+
") target_versions",
|
|
93
|
+
"WHERE __cnt > 1",
|
|
94
|
+
]
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _explicit_order_by(contract: SemanticContract) -> str | None:
|
|
99
|
+
deduplicate = contract.transform.raw.get("deduplicate") if contract.transform else None
|
|
100
|
+
if not isinstance(deduplicate, dict):
|
|
101
|
+
return None
|
|
102
|
+
merge_keys = contract.write.merge_keys or contract.write.hash_keys
|
|
103
|
+
keys = _as_tuple(deduplicate.get("keys"))
|
|
104
|
+
if keys and keys != merge_keys:
|
|
105
|
+
return None
|
|
106
|
+
return _render_order_by(deduplicate.get("order_by"))
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _render_order_by(value: Any) -> str | None:
|
|
110
|
+
if isinstance(value, str) and value.strip():
|
|
111
|
+
return value.strip()
|
|
112
|
+
if not isinstance(value, list):
|
|
113
|
+
return None
|
|
114
|
+
parts: list[str] = []
|
|
115
|
+
for item in value:
|
|
116
|
+
if not isinstance(item, dict) or not item.get("column"):
|
|
117
|
+
continue
|
|
118
|
+
clause = quote_identifier(str(item["column"]))
|
|
119
|
+
clause += f" {str(item.get('direction', 'desc')).upper()}"
|
|
120
|
+
if item.get("nulls"):
|
|
121
|
+
clause += f" NULLS {str(item['nulls']).upper()}"
|
|
122
|
+
parts.append(clause)
|
|
123
|
+
return ", ".join(parts) or None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _as_tuple(value: Any) -> tuple[str, ...]:
|
|
127
|
+
if isinstance(value, str):
|
|
128
|
+
return (value,)
|
|
129
|
+
if isinstance(value, (list, tuple)):
|
|
130
|
+
return tuple(str(item) for item in value)
|
|
131
|
+
return ()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _count(row: dict[str, Any] | None) -> int:
|
|
135
|
+
if not row:
|
|
136
|
+
return 0
|
|
137
|
+
for value in row.values():
|
|
138
|
+
try:
|
|
139
|
+
return int(value)
|
|
140
|
+
except (TypeError, ValueError):
|
|
141
|
+
continue
|
|
142
|
+
return 0
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Databricks selective partition replacement helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contractforge_core.execution import ExecutionOutcome
|
|
6
|
+
from contractforge_core.semantic import SemanticContract
|
|
7
|
+
from contractforge_databricks.execution.sql_merge import SqlRunner
|
|
8
|
+
from contractforge_databricks.rendering.names import target_full_name
|
|
9
|
+
from contractforge_databricks.sql import quote_table_name
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def render_replace_partitions_sql(*, target_table: str, source_view: str, predicate: str) -> str:
|
|
13
|
+
if not predicate:
|
|
14
|
+
raise ValueError("replace_partitions requires a non-empty partition predicate")
|
|
15
|
+
return (
|
|
16
|
+
f"INSERT INTO TABLE {quote_table_name(target_table)} BY NAME\n"
|
|
17
|
+
f"REPLACE WHERE {predicate}\n"
|
|
18
|
+
f"SELECT * FROM {quote_table_name(source_view)}"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def execute_replace_partitions(
|
|
23
|
+
*,
|
|
24
|
+
runner: SqlRunner,
|
|
25
|
+
contract: SemanticContract,
|
|
26
|
+
source_view: str,
|
|
27
|
+
predicate: str | None,
|
|
28
|
+
) -> ExecutionOutcome:
|
|
29
|
+
if contract.write.mode != "scd1_upsert":
|
|
30
|
+
raise ValueError(f"execute_replace_partitions only supports scd1_upsert, got {contract.write.mode}")
|
|
31
|
+
target = target_full_name(contract)
|
|
32
|
+
statement = render_replace_partitions_sql(target_table=target, source_view=source_view, predicate=predicate or "")
|
|
33
|
+
runner.sql(statement)
|
|
34
|
+
return ExecutionOutcome(
|
|
35
|
+
status="SUCCESS",
|
|
36
|
+
operation="delta_replace_partitions",
|
|
37
|
+
target=target,
|
|
38
|
+
metrics={"replace_predicate": predicate},
|
|
39
|
+
sql=statement,
|
|
40
|
+
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Retry helpers for Databricks Delta concurrency operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from typing import TypeVar
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def is_retryable_delta_concurrency_error(exc: Exception) -> bool:
|
|
14
|
+
text = str(exc).upper()
|
|
15
|
+
return any(token in text for token in ("CONCURRENT", "CONFLICT", "RETRY", "DELTA_CONCURRENT"))
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def with_delta_retry(
|
|
19
|
+
fn: Callable[[], T],
|
|
20
|
+
*,
|
|
21
|
+
attempts: int = 3,
|
|
22
|
+
backoff_seconds: float = 1.0,
|
|
23
|
+
jitter: Callable[[], float] | None = None,
|
|
24
|
+
sleep: Callable[[float], None] = time.sleep,
|
|
25
|
+
) -> T:
|
|
26
|
+
last_exc: Exception | None = None
|
|
27
|
+
jitter_fn = jitter or random.random
|
|
28
|
+
for attempt in range(1, attempts + 1):
|
|
29
|
+
try:
|
|
30
|
+
return fn()
|
|
31
|
+
except Exception as exc:
|
|
32
|
+
last_exc = exc
|
|
33
|
+
if not is_retryable_delta_concurrency_error(exc) or attempt == attempts:
|
|
34
|
+
raise
|
|
35
|
+
sleep(backoff_seconds * attempt + jitter_fn())
|
|
36
|
+
raise last_exc # type: ignore[misc]
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""ContractForge-compatible SCD2 Delta MERGE SQL."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from contractforge_core.semantic import SemanticContract
|
|
8
|
+
from contractforge_core.execution import ExecutionOutcome
|
|
9
|
+
from contractforge_databricks.execution.scd2_deletes import render_scd2_delete_merge_sql
|
|
10
|
+
from contractforge_databricks.execution.scd2_late import (
|
|
11
|
+
joined_sequence_select,
|
|
12
|
+
late_arriving_condition,
|
|
13
|
+
late_arriving_filter,
|
|
14
|
+
reject_guard_join,
|
|
15
|
+
target_sequence_select,
|
|
16
|
+
)
|
|
17
|
+
from contractforge_databricks.execution.sql_merge import SqlRunner
|
|
18
|
+
from contractforge_databricks.rendering.names import target_full_name
|
|
19
|
+
from contractforge_databricks.sql import quote_identifier, quote_table_name
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def render_scd2_merge_sql(
|
|
23
|
+
*,
|
|
24
|
+
target_table: str,
|
|
25
|
+
source_view: str,
|
|
26
|
+
merge_keys: tuple[str, ...],
|
|
27
|
+
insert_columns: tuple[str, ...],
|
|
28
|
+
) -> str:
|
|
29
|
+
"""Render SCD2 MERGE over a prepared staging view.
|
|
30
|
+
|
|
31
|
+
The staging view must already contain ContractForge-managed SCD2 columns:
|
|
32
|
+
`valid_from`, `valid_to`, `is_current`, `row_hash`, and `changed_columns`.
|
|
33
|
+
"""
|
|
34
|
+
if not merge_keys:
|
|
35
|
+
raise ValueError("scd2_historical requires merge_keys")
|
|
36
|
+
_require_columns(insert_columns, tuple(f"__merge_key_{key}" for key in merge_keys), "merge key staging columns")
|
|
37
|
+
_require_columns(insert_columns, ("valid_from", "valid_to", "is_current", "row_hash", "changed_columns"))
|
|
38
|
+
|
|
39
|
+
target_insert_columns = _target_insert_columns(insert_columns, merge_keys)
|
|
40
|
+
key_condition = " AND ".join(
|
|
41
|
+
f"t.{quote_identifier(key)} <=> s.{quote_identifier('__merge_key_' + key)}" for key in merge_keys
|
|
42
|
+
)
|
|
43
|
+
insert_cols = ", ".join(quote_identifier(column) for column in target_insert_columns)
|
|
44
|
+
insert_vals = ", ".join(f"s.{quote_identifier(column)}" for column in target_insert_columns)
|
|
45
|
+
changed_expr = _changed_columns_expr(target_insert_columns, merge_keys)
|
|
46
|
+
|
|
47
|
+
return "\n".join(
|
|
48
|
+
[
|
|
49
|
+
f"MERGE INTO {quote_table_name(target_table)} t",
|
|
50
|
+
f"USING {quote_table_name(source_view)} s",
|
|
51
|
+
f"ON {key_condition} AND t.`is_current` = true",
|
|
52
|
+
"WHEN MATCHED AND t.`row_hash` <> s.`row_hash` THEN UPDATE SET",
|
|
53
|
+
" t.`valid_to` = current_timestamp(),",
|
|
54
|
+
" t.`is_current` = false,",
|
|
55
|
+
f" t.`changed_columns` = {changed_expr}",
|
|
56
|
+
f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})",
|
|
57
|
+
]
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def execute_scd2_merge(
|
|
62
|
+
*,
|
|
63
|
+
runner: SqlRunner,
|
|
64
|
+
contract: SemanticContract,
|
|
65
|
+
source_view: str,
|
|
66
|
+
insert_columns: tuple[str, ...],
|
|
67
|
+
) -> ExecutionOutcome:
|
|
68
|
+
if contract.write.mode != "scd2_historical":
|
|
69
|
+
raise ValueError(f"execute_scd2_merge only supports scd2_historical, got {contract.write.mode}")
|
|
70
|
+
target = target_full_name(contract)
|
|
71
|
+
stage_view = f"__cf_scd2_stage_{uuid4().hex}"
|
|
72
|
+
stage_statement = render_scd2_stage_sql(
|
|
73
|
+
target_table=target,
|
|
74
|
+
source_view=source_view,
|
|
75
|
+
stage_view=stage_view,
|
|
76
|
+
merge_keys=contract.write.merge_keys,
|
|
77
|
+
source_columns=insert_columns,
|
|
78
|
+
sequence_by=contract.write.scd2_sequence_by,
|
|
79
|
+
late_arriving_policy=contract.write.scd2_late_arriving_policy,
|
|
80
|
+
apply_as_deletes=contract.write.scd2_apply_as_deletes,
|
|
81
|
+
)
|
|
82
|
+
statement = render_scd2_merge_sql(
|
|
83
|
+
target_table=target,
|
|
84
|
+
source_view=stage_view,
|
|
85
|
+
merge_keys=contract.write.merge_keys,
|
|
86
|
+
insert_columns=insert_columns,
|
|
87
|
+
)
|
|
88
|
+
if contract.write.scd2_apply_as_deletes:
|
|
89
|
+
runner.sql(
|
|
90
|
+
render_scd2_delete_merge_sql(
|
|
91
|
+
target_table=target,
|
|
92
|
+
source_view=source_view,
|
|
93
|
+
merge_keys=contract.write.merge_keys,
|
|
94
|
+
apply_as_deletes=contract.write.scd2_apply_as_deletes,
|
|
95
|
+
sequence_by=contract.write.scd2_sequence_by,
|
|
96
|
+
late_arriving_policy=contract.write.scd2_late_arriving_policy,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
runner.sql(stage_statement)
|
|
100
|
+
try:
|
|
101
|
+
runner.sql(statement)
|
|
102
|
+
finally:
|
|
103
|
+
runner.sql(f"DROP VIEW IF EXISTS {quote_table_name(stage_view)}")
|
|
104
|
+
return ExecutionOutcome(
|
|
105
|
+
status="SUCCESS",
|
|
106
|
+
operation="core_managed_scd2_delta_merge",
|
|
107
|
+
target=target,
|
|
108
|
+
metrics={
|
|
109
|
+
"insert_columns": len(_target_insert_columns(insert_columns, contract.write.merge_keys)),
|
|
110
|
+
"merge_keys": len(contract.write.merge_keys),
|
|
111
|
+
"stage_view": stage_view,
|
|
112
|
+
},
|
|
113
|
+
sql=statement,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def render_scd2_stage_sql(
|
|
118
|
+
*,
|
|
119
|
+
target_table: str,
|
|
120
|
+
source_view: str,
|
|
121
|
+
stage_view: str,
|
|
122
|
+
merge_keys: tuple[str, ...],
|
|
123
|
+
source_columns: tuple[str, ...],
|
|
124
|
+
sequence_by: str | None = None,
|
|
125
|
+
late_arriving_policy: str = "apply",
|
|
126
|
+
apply_as_deletes: str | None = None,
|
|
127
|
+
) -> str:
|
|
128
|
+
if not merge_keys:
|
|
129
|
+
raise ValueError("scd2_historical requires merge_keys")
|
|
130
|
+
_require_columns(source_columns, ("valid_from", "valid_to", "is_current", "row_hash", "changed_columns"))
|
|
131
|
+
data_columns = _target_insert_columns(source_columns, merge_keys)
|
|
132
|
+
if sequence_by and sequence_by not in data_columns:
|
|
133
|
+
raise ValueError(f"prepared SCD2 source is missing scd2_sequence_by: {sequence_by}")
|
|
134
|
+
key_join = " AND ".join(f"t.{quote_identifier(key)} <=> s.{quote_identifier(key)}" for key in merge_keys)
|
|
135
|
+
target_keys = ", ".join(quote_identifier(key) for key in merge_keys)
|
|
136
|
+
select_data = ", ".join(f"s.{quote_identifier(column)}" for column in data_columns)
|
|
137
|
+
null_merge_keys = ", ".join(f"NULL AS {quote_identifier('__merge_key_' + key)}" for key in merge_keys)
|
|
138
|
+
update_merge_keys = ", ".join(f"{quote_identifier(key)} AS {quote_identifier('__merge_key_' + key)}" for key in merge_keys)
|
|
139
|
+
stage_columns = ", ".join(quote_identifier(column) for column in (*data_columns, *(f"__merge_key_{key}" for key in merge_keys)))
|
|
140
|
+
|
|
141
|
+
lines = [
|
|
142
|
+
f"CREATE OR REPLACE TEMP VIEW {quote_table_name(stage_view)} AS",
|
|
143
|
+
"WITH target_current AS (",
|
|
144
|
+
f" SELECT {target_keys}, `row_hash` AS `__tgt_row_hash`{target_sequence_select(sequence_by)}",
|
|
145
|
+
f" FROM {quote_table_name(target_table)}",
|
|
146
|
+
" WHERE `is_current` = true",
|
|
147
|
+
"), joined AS (",
|
|
148
|
+
f" SELECT {select_data}, t.`__tgt_row_hash`{joined_sequence_select(sequence_by)}",
|
|
149
|
+
f" FROM {quote_table_name(source_view)} s",
|
|
150
|
+
f" LEFT JOIN target_current t ON {key_join}",
|
|
151
|
+
f" WHERE {_non_delete_filter(apply_as_deletes)}",
|
|
152
|
+
")",
|
|
153
|
+
]
|
|
154
|
+
if late_arriving_policy == "reject" and sequence_by:
|
|
155
|
+
lines.extend(
|
|
156
|
+
[
|
|
157
|
+
", late_arriving AS (",
|
|
158
|
+
" SELECT count(*) AS late_count FROM joined",
|
|
159
|
+
f" WHERE {late_arriving_condition(sequence_by)}",
|
|
160
|
+
"), reject_late_arriving AS (",
|
|
161
|
+
" SELECT CASE WHEN late_count > 0 THEN 1 / 0 ELSE 0 END AS __late_guard FROM late_arriving",
|
|
162
|
+
")",
|
|
163
|
+
]
|
|
164
|
+
)
|
|
165
|
+
lines.extend(
|
|
166
|
+
[
|
|
167
|
+
", changed AS (",
|
|
168
|
+
f" SELECT * FROM joined{reject_guard_join(sequence_by, late_arriving_policy)}",
|
|
169
|
+
f" WHERE {late_arriving_filter(sequence_by, late_arriving_policy)}",
|
|
170
|
+
" AND (`__tgt_row_hash` IS NULL OR NOT (`row_hash` <=> `__tgt_row_hash`))",
|
|
171
|
+
"), insert_stage AS (",
|
|
172
|
+
f" SELECT {', '.join(quote_identifier(column) for column in data_columns)}, {null_merge_keys}",
|
|
173
|
+
" FROM changed",
|
|
174
|
+
"), update_stage AS (",
|
|
175
|
+
f" SELECT {', '.join(quote_identifier(column) for column in data_columns)}, {update_merge_keys}",
|
|
176
|
+
" FROM changed WHERE `__tgt_row_hash` IS NOT NULL",
|
|
177
|
+
")",
|
|
178
|
+
f"SELECT {stage_columns} FROM insert_stage",
|
|
179
|
+
"UNION ALL",
|
|
180
|
+
f"SELECT {stage_columns} FROM update_stage",
|
|
181
|
+
]
|
|
182
|
+
)
|
|
183
|
+
return "\n".join(lines)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _require_columns(columns: tuple[str, ...], required: tuple[str, ...], context: str = "required columns") -> None:
|
|
187
|
+
missing = [column for column in required if column not in columns]
|
|
188
|
+
if missing:
|
|
189
|
+
raise ValueError(f"prepared SCD2 source is missing {context}: {missing}")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _target_insert_columns(columns: tuple[str, ...], merge_keys: tuple[str, ...]) -> tuple[str, ...]:
|
|
193
|
+
staging_columns = {f"__merge_key_{key}" for key in merge_keys}
|
|
194
|
+
return tuple(column for column in columns if column not in staging_columns)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _changed_columns_expr(columns: tuple[str, ...], merge_keys: tuple[str, ...]) -> str:
|
|
198
|
+
excluded = {*merge_keys, "valid_from", "valid_to", "is_current", "row_hash", "changed_columns"}
|
|
199
|
+
candidates = tuple(column for column in columns if column not in excluded)
|
|
200
|
+
if not candidates:
|
|
201
|
+
return "s.`changed_columns`"
|
|
202
|
+
parts = ", ".join(
|
|
203
|
+
f"CASE WHEN NOT (t.{quote_identifier(column)} <=> s.{quote_identifier(column)}) "
|
|
204
|
+
f"THEN '{column}' ELSE NULL END"
|
|
205
|
+
for column in candidates
|
|
206
|
+
)
|
|
207
|
+
return f"concat_ws(',', {parts})"
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _non_delete_filter(apply_as_deletes: str | None) -> str:
|
|
211
|
+
if not apply_as_deletes:
|
|
212
|
+
return "true"
|
|
213
|
+
return f"NOT coalesce(CAST(({apply_as_deletes}) AS BOOLEAN), false)"
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Databricks SCD2 delete-expression merge helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contractforge_databricks.execution.scd2_late import late_arriving_condition, late_arriving_filter
|
|
6
|
+
from contractforge_databricks.sql import quote_identifier, quote_table_name
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def render_scd2_delete_merge_sql(
|
|
10
|
+
*,
|
|
11
|
+
target_table: str,
|
|
12
|
+
source_view: str,
|
|
13
|
+
merge_keys: tuple[str, ...],
|
|
14
|
+
apply_as_deletes: str,
|
|
15
|
+
sequence_by: str | None = None,
|
|
16
|
+
late_arriving_policy: str = "apply",
|
|
17
|
+
) -> str:
|
|
18
|
+
if not merge_keys:
|
|
19
|
+
raise ValueError("SCD2 delete handling requires merge_keys")
|
|
20
|
+
if not apply_as_deletes or not apply_as_deletes.strip():
|
|
21
|
+
raise ValueError("SCD2 delete handling requires scd2_apply_as_deletes")
|
|
22
|
+
key_list = ", ".join(quote_identifier(key) for key in merge_keys)
|
|
23
|
+
key_join = " AND ".join(f"t.{quote_identifier(key)} <=> d.{quote_identifier(key)}" for key in merge_keys)
|
|
24
|
+
joined_key_join = " AND ".join(f"t.{quote_identifier(key)} <=> d.{quote_identifier(key)}" for key in merge_keys)
|
|
25
|
+
source_sequence = f", {quote_identifier(sequence_by)}" if sequence_by else ""
|
|
26
|
+
target_sequence = f", {quote_identifier(sequence_by)} AS `__tgt_sequence`" if sequence_by else ""
|
|
27
|
+
joined_sequence = ", t.`__tgt_sequence`" if sequence_by else ""
|
|
28
|
+
reject_cte = _reject_cte(sequence_by, late_arriving_policy)
|
|
29
|
+
reject_join = " CROSS JOIN reject_late_arriving" if sequence_by and late_arriving_policy == "reject" else ""
|
|
30
|
+
return "\n".join(
|
|
31
|
+
[
|
|
32
|
+
f"MERGE INTO {quote_table_name(target_table)} t",
|
|
33
|
+
"USING (",
|
|
34
|
+
" WITH delete_candidates AS (",
|
|
35
|
+
f" SELECT DISTINCT {key_list}{source_sequence}",
|
|
36
|
+
f" FROM {quote_table_name(source_view)}",
|
|
37
|
+
f" WHERE coalesce(CAST(({apply_as_deletes}) AS BOOLEAN), false)",
|
|
38
|
+
" ), target_current AS (",
|
|
39
|
+
f" SELECT {key_list}{target_sequence}",
|
|
40
|
+
f" FROM {quote_table_name(target_table)}",
|
|
41
|
+
" WHERE `is_current` = true",
|
|
42
|
+
" ), joined AS (",
|
|
43
|
+
f" SELECT d.*{joined_sequence} FROM delete_candidates d",
|
|
44
|
+
f" LEFT JOIN target_current t ON {joined_key_join}",
|
|
45
|
+
f" ){reject_cte}",
|
|
46
|
+
f" SELECT {key_list} FROM joined{reject_join}",
|
|
47
|
+
f" WHERE {late_arriving_filter(sequence_by, late_arriving_policy)}",
|
|
48
|
+
") d",
|
|
49
|
+
f"ON {key_join} AND t.`is_current` = true",
|
|
50
|
+
"WHEN MATCHED THEN UPDATE SET",
|
|
51
|
+
" t.`valid_to` = current_timestamp(),",
|
|
52
|
+
" t.`is_current` = false,",
|
|
53
|
+
" t.`changed_columns` = 'DELETE'",
|
|
54
|
+
]
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _reject_cte(sequence_by: str | None, policy: str) -> str:
|
|
59
|
+
if not sequence_by or policy != "reject":
|
|
60
|
+
return ""
|
|
61
|
+
return (
|
|
62
|
+
"\n , late_arriving AS (\n SELECT count(*) AS late_count FROM joined\n WHERE "
|
|
63
|
+
+ late_arriving_condition(sequence_by)
|
|
64
|
+
+ "\n ), reject_late_arriving AS (\n SELECT CASE WHEN late_count > 0 THEN 1 / 0 ELSE 0 END AS __late_guard FROM late_arriving\n )"
|
|
65
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Databricks SQL fragments for SCD2 late-arriving policies."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contractforge_databricks.sql import quote_identifier
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def target_sequence_select(sequence_by: str | None) -> str:
|
|
9
|
+
return f", {quote_identifier(sequence_by)} AS `__tgt_sequence`" if sequence_by else ""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def joined_sequence_select(sequence_by: str | None) -> str:
|
|
13
|
+
return ", t.`__tgt_sequence`" if sequence_by else ""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def reject_guard_join(sequence_by: str | None, policy: str) -> str:
|
|
17
|
+
return " CROSS JOIN reject_late_arriving" if sequence_by and policy == "reject" else ""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def late_arriving_condition(sequence_by: str) -> str:
|
|
21
|
+
column = quote_identifier(sequence_by)
|
|
22
|
+
return f"`__tgt_sequence` IS NOT NULL AND ({column} IS NULL OR {column} <= `__tgt_sequence`)"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def late_arriving_filter(sequence_by: str | None, policy: str) -> str:
|
|
26
|
+
if not sequence_by or policy == "apply":
|
|
27
|
+
return "true"
|
|
28
|
+
if policy not in {"ignore", "reject"}:
|
|
29
|
+
raise ValueError("scd2_late_arriving_policy must be one of apply, ignore, reject")
|
|
30
|
+
return f"NOT ({late_arriving_condition(sequence_by)})"
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""ContractForge-compatible snapshot soft delete Delta MERGE SQL."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contractforge_core.semantic import SemanticContract
|
|
6
|
+
from contractforge_core.execution import ExecutionOutcome
|
|
7
|
+
from contractforge_databricks.execution.sql_merge import SqlRunner
|
|
8
|
+
from contractforge_databricks.rendering.names import target_full_name
|
|
9
|
+
from contractforge_databricks.sql import quote_identifier, quote_table_name
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def render_snapshot_soft_delete_sql(
|
|
13
|
+
*,
|
|
14
|
+
target_table: str,
|
|
15
|
+
source_view: str,
|
|
16
|
+
merge_keys: tuple[str, ...],
|
|
17
|
+
source_columns: tuple[str, ...],
|
|
18
|
+
) -> str:
|
|
19
|
+
if not merge_keys:
|
|
20
|
+
raise ValueError("snapshot_soft_delete requires merge_keys")
|
|
21
|
+
_require_columns(source_columns, merge_keys, "merge_keys")
|
|
22
|
+
_require_columns(source_columns, ("is_active", "deleted_at", "row_hash"))
|
|
23
|
+
|
|
24
|
+
key_condition = " AND ".join(
|
|
25
|
+
f"t.{quote_identifier(key)} <=> s.{quote_identifier(key)}" for key in merge_keys
|
|
26
|
+
)
|
|
27
|
+
update_columns = tuple(column for column in source_columns if column not in merge_keys)
|
|
28
|
+
update_set = ", ".join(
|
|
29
|
+
f"t.{quote_identifier(column)} = s.{quote_identifier(column)}" for column in update_columns
|
|
30
|
+
)
|
|
31
|
+
insert_columns = ", ".join(quote_identifier(column) for column in source_columns)
|
|
32
|
+
insert_values = ", ".join(f"s.{quote_identifier(column)}" for column in source_columns)
|
|
33
|
+
|
|
34
|
+
return "\n".join(
|
|
35
|
+
[
|
|
36
|
+
f"MERGE INTO {quote_table_name(target_table)} t",
|
|
37
|
+
f"USING {quote_table_name(source_view)} s",
|
|
38
|
+
f"ON {key_condition}",
|
|
39
|
+
f"WHEN MATCHED AND (NOT (t.`row_hash` <=> s.`row_hash`) OR t.`is_active` = false) THEN UPDATE SET {update_set}",
|
|
40
|
+
f"WHEN NOT MATCHED THEN INSERT ({insert_columns}) VALUES ({insert_values})",
|
|
41
|
+
"WHEN NOT MATCHED BY SOURCE AND t.`is_active` = true THEN UPDATE SET",
|
|
42
|
+
" t.`is_active` = false,",
|
|
43
|
+
" t.`deleted_at` = current_timestamp()",
|
|
44
|
+
]
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def execute_snapshot_soft_delete(
|
|
49
|
+
*,
|
|
50
|
+
runner: SqlRunner,
|
|
51
|
+
contract: SemanticContract,
|
|
52
|
+
source_view: str,
|
|
53
|
+
source_columns: tuple[str, ...],
|
|
54
|
+
) -> ExecutionOutcome:
|
|
55
|
+
if contract.write.mode != "snapshot_soft_delete":
|
|
56
|
+
raise ValueError(f"execute_snapshot_soft_delete only supports snapshot_soft_delete, got {contract.write.mode}")
|
|
57
|
+
target = target_full_name(contract)
|
|
58
|
+
statement = render_snapshot_soft_delete_sql(
|
|
59
|
+
target_table=target,
|
|
60
|
+
source_view=source_view,
|
|
61
|
+
merge_keys=contract.write.merge_keys,
|
|
62
|
+
source_columns=source_columns,
|
|
63
|
+
)
|
|
64
|
+
runner.sql(statement)
|
|
65
|
+
return ExecutionOutcome(
|
|
66
|
+
status="SUCCESS",
|
|
67
|
+
operation="core_managed_snapshot_soft_delete_delta_merge",
|
|
68
|
+
target=target,
|
|
69
|
+
metrics={"source_columns": len(source_columns), "merge_keys": len(contract.write.merge_keys)},
|
|
70
|
+
sql=statement,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _require_columns(columns: tuple[str, ...], required: tuple[str, ...], context: str = "required columns") -> None:
|
|
75
|
+
missing = [column for column in required if column not in columns]
|
|
76
|
+
if missing:
|
|
77
|
+
raise ValueError(f"prepared snapshot source is missing {context}: {missing}")
|