contractforge-databricks 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contractforge_databricks/__init__.py +172 -0
- contractforge_databricks/adapter.py +69 -0
- contractforge_databricks/annotations/__init__.py +10 -0
- contractforge_databricks/annotations/application.py +52 -0
- contractforge_databricks/annotations/audit.py +49 -0
- contractforge_databricks/annotations/sql.py +142 -0
- contractforge_databricks/api.py +65 -0
- contractforge_databricks/bundles/__init__.py +9 -0
- contractforge_databricks/bundles/assets.py +47 -0
- contractforge_databricks/bundles/project.py +213 -0
- contractforge_databricks/bundles/project_config.py +133 -0
- contractforge_databricks/capabilities/__init__.py +17 -0
- contractforge_databricks/capabilities/builders.py +43 -0
- contractforge_databricks/capabilities/evaluate.py +162 -0
- contractforge_databricks/capabilities/mapping.py +36 -0
- contractforge_databricks/capabilities/models.py +44 -0
- contractforge_databricks/capabilities/runtime.py +111 -0
- contractforge_databricks/capabilities/uc.py +47 -0
- contractforge_databricks/cli.py +196 -0
- contractforge_databricks/cli_deploy.py +98 -0
- contractforge_databricks/cli_governance.py +142 -0
- contractforge_databricks/cli_io.py +91 -0
- contractforge_databricks/cli_maintenance.py +69 -0
- contractforge_databricks/coercion.py +31 -0
- contractforge_databricks/contract_extensions.py +70 -0
- contractforge_databricks/cost/__init__.py +11 -0
- contractforge_databricks/cost/model.py +22 -0
- contractforge_databricks/cost/report.py +65 -0
- contractforge_databricks/cost/sql.py +136 -0
- contractforge_databricks/dashboards/__init__.py +15 -0
- contractforge_databricks/dashboards/control_tables.py +150 -0
- contractforge_databricks/diagnostics/__init__.py +7 -0
- contractforge_databricks/diagnostics/explain.py +40 -0
- contractforge_databricks/environment.py +53 -0
- contractforge_databricks/evidence/__init__.py +98 -0
- contractforge_databricks/evidence/ddl.py +35 -0
- contractforge_databricks/evidence/governance_log.py +175 -0
- contractforge_databricks/evidence/helpers.py +29 -0
- contractforge_databricks/evidence/ops_log.py +210 -0
- contractforge_databricks/evidence/records.py +27 -0
- contractforge_databricks/evidence/run_log.py +74 -0
- contractforge_databricks/evidence/schemas.py +7 -0
- contractforge_databricks/evidence/sql.py +144 -0
- contractforge_databricks/evidence/tables.py +20 -0
- contractforge_databricks/evidence/writer.py +118 -0
- contractforge_databricks/execution/__init__.py +70 -0
- contractforge_databricks/execution/delta_basic.py +57 -0
- contractforge_databricks/execution/hash_diff.py +126 -0
- contractforge_databricks/execution/hash_diff_latest.py +142 -0
- contractforge_databricks/execution/replace_partitions.py +40 -0
- contractforge_databricks/execution/results.py +5 -0
- contractforge_databricks/execution/retry.py +36 -0
- contractforge_databricks/execution/scd2.py +213 -0
- contractforge_databricks/execution/scd2_deletes.py +65 -0
- contractforge_databricks/execution/scd2_late.py +30 -0
- contractforge_databricks/execution/snapshot.py +77 -0
- contractforge_databricks/execution/sql_merge.py +85 -0
- contractforge_databricks/execution/tables.py +98 -0
- contractforge_databricks/execution/windows.py +58 -0
- contractforge_databricks/governance/__init__.py +30 -0
- contractforge_databricks/governance/access.py +185 -0
- contractforge_databricks/governance/application.py +93 -0
- contractforge_databricks/governance/drift.py +49 -0
- contractforge_databricks/governance/runtime.py +60 -0
- contractforge_databricks/governance/sql.py +31 -0
- contractforge_databricks/governance/validation.py +135 -0
- contractforge_databricks/lakeflow/__init__.py +21 -0
- contractforge_databricks/lakeflow/compatibility.py +194 -0
- contractforge_databricks/lakeflow/rendering.py +175 -0
- contractforge_databricks/lineage/__init__.py +7 -0
- contractforge_databricks/lineage/openlineage.py +182 -0
- contractforge_databricks/maintenance/__init__.py +27 -0
- contractforge_databricks/maintenance/retention.py +90 -0
- contractforge_databricks/maintenance/sql.py +68 -0
- contractforge_databricks/metrics/__init__.py +19 -0
- contractforge_databricks/metrics/history.py +21 -0
- contractforge_databricks/metrics/write.py +63 -0
- contractforge_databricks/operations/__init__.py +4 -0
- contractforge_databricks/operations/application.py +38 -0
- contractforge_databricks/operations/sql.py +95 -0
- contractforge_databricks/parity/__init__.py +18 -0
- contractforge_databricks/parity/catalog.py +59 -0
- contractforge_databricks/parity/models.py +7 -0
- contractforge_databricks/parity/scenarios.py +111 -0
- contractforge_databricks/partitioning/__init__.py +3 -0
- contractforge_databricks/partitioning/predicates.py +28 -0
- contractforge_databricks/preparation/__init__.py +47 -0
- contractforge_databricks/preparation/deduplicate.py +87 -0
- contractforge_databricks/preparation/encoding.py +37 -0
- contractforge_databricks/preparation/hashing.py +18 -0
- contractforge_databricks/preparation/pyspark.py +178 -0
- contractforge_databricks/preparation/pyspark_staging.py +70 -0
- contractforge_databricks/preparation/shape.py +209 -0
- contractforge_databricks/preparation/shape_validation.py +94 -0
- contractforge_databricks/preparation/staging.py +17 -0
- contractforge_databricks/preparation/zip_arrays.py +51 -0
- contractforge_databricks/presets/__init__.py +3 -0
- contractforge_databricks/presets/base.py +24 -0
- contractforge_databricks/presets/bronze.py +57 -0
- contractforge_databricks/presets/catalog.py +22 -0
- contractforge_databricks/presets/core.py +134 -0
- contractforge_databricks/presets/gold.py +62 -0
- contractforge_databricks/presets/modifiers.py +51 -0
- contractforge_databricks/presets/runtime.py +22 -0
- contractforge_databricks/presets/silver.py +101 -0
- contractforge_databricks/presets/write_engine.py +57 -0
- contractforge_databricks/quality/__init__.py +41 -0
- contractforge_databricks/quality/evaluation.py +178 -0
- contractforge_databricks/quality/persistence.py +81 -0
- contractforge_databricks/quality/registry.py +134 -0
- contractforge_databricks/quality/results.py +17 -0
- contractforge_databricks/quality/sql.py +113 -0
- contractforge_databricks/rendering/__init__.py +11 -0
- contractforge_databricks/rendering/bundle.py +93 -0
- contractforge_databricks/rendering/markdown.py +50 -0
- contractforge_databricks/rendering/names.py +56 -0
- contractforge_databricks/results.py +15 -0
- contractforge_databricks/runtime/__init__.py +101 -0
- contractforge_databricks/runtime/available_now.py +147 -0
- contractforge_databricks/runtime/bundles.py +211 -0
- contractforge_databricks/runtime/cache.py +20 -0
- contractforge_databricks/runtime/control_tables.py +19 -0
- contractforge_databricks/runtime/deploy.py +197 -0
- contractforge_databricks/runtime/detection.py +114 -0
- contractforge_databricks/runtime/dry_run.py +46 -0
- contractforge_databricks/runtime/errors.py +54 -0
- contractforge_databricks/runtime/file_selection.py +109 -0
- contractforge_databricks/runtime/finalization.py +168 -0
- contractforge_databricks/runtime/governance.py +37 -0
- contractforge_databricks/runtime/hooks.py +45 -0
- contractforge_databricks/runtime/http_file.py +37 -0
- contractforge_databricks/runtime/http_retry.py +15 -0
- contractforge_databricks/runtime/http_safety.py +9 -0
- contractforge_databricks/runtime/json_materialization.py +97 -0
- contractforge_databricks/runtime/lineage.py +164 -0
- contractforge_databricks/runtime/maintenance.py +43 -0
- contractforge_databricks/runtime/merge_validation.py +98 -0
- contractforge_databricks/runtime/metadata.py +21 -0
- contractforge_databricks/runtime/metrics.py +34 -0
- contractforge_databricks/runtime/models.py +32 -0
- contractforge_databricks/runtime/options.py +33 -0
- contractforge_databricks/runtime/orchestration_context.py +185 -0
- contractforge_databricks/runtime/orchestrator.py +147 -0
- contractforge_databricks/runtime/partitioning.py +93 -0
- contractforge_databricks/runtime/quality_quarantine.py +92 -0
- contractforge_databricks/runtime/rest_api.py +46 -0
- contractforge_databricks/runtime/rest_auth.py +21 -0
- contractforge_databricks/runtime/rest_pagination.py +21 -0
- contractforge_databricks/runtime/run_payload.py +177 -0
- contractforge_databricks/runtime/schema.py +106 -0
- contractforge_databricks/runtime/source_metadata.py +30 -0
- contractforge_databricks/runtime/source_registry.py +43 -0
- contractforge_databricks/runtime/source_schema.py +24 -0
- contractforge_databricks/runtime/sources.py +208 -0
- contractforge_databricks/runtime/spark.py +183 -0
- contractforge_databricks/runtime/spark_defaults.py +35 -0
- contractforge_databricks/runtime/storage_auth.py +132 -0
- contractforge_databricks/runtime/streaming.py +131 -0
- contractforge_databricks/runtime/success.py +104 -0
- contractforge_databricks/runtime/utils.py +52 -0
- contractforge_databricks/runtime/watermark.py +71 -0
- contractforge_databricks/runtime/windows.py +184 -0
- contractforge_databricks/runtime/write.py +66 -0
- contractforge_databricks/runtime/write_flow.py +146 -0
- contractforge_databricks/runtime/write_strategy.py +40 -0
- contractforge_databricks/schema/__init__.py +21 -0
- contractforge_databricks/schema/diff.py +11 -0
- contractforge_databricks/schema/policy.py +33 -0
- contractforge_databricks/schema/sync.py +23 -0
- contractforge_databricks/security/__init__.py +21 -0
- contractforge_databricks/security/errors.py +5 -0
- contractforge_databricks/security/redaction.py +5 -0
- contractforge_databricks/security/secrets.py +114 -0
- contractforge_databricks/security/source_policy.py +17 -0
- contractforge_databricks/shapes/__init__.py +3 -0
- contractforge_databricks/shapes/sql.py +123 -0
- contractforge_databricks/sources/__init__.py +67 -0
- contractforge_databricks/sources/artifacts.py +100 -0
- contractforge_databricks/sources/autoloader.py +48 -0
- contractforge_databricks/sources/bounded_streams.py +44 -0
- contractforge_databricks/sources/classification.py +115 -0
- contractforge_databricks/sources/delta_share.py +21 -0
- contractforge_databricks/sources/files.py +48 -0
- contractforge_databricks/sources/http_file.py +46 -0
- contractforge_databricks/sources/interpret.py +76 -0
- contractforge_databricks/sources/jdbc.py +32 -0
- contractforge_databricks/sources/metadata.py +18 -0
- contractforge_databricks/sources/native_passthrough.py +33 -0
- contractforge_databricks/sources/rds_iam.py +15 -0
- contractforge_databricks/sources/rds_iam_runtime.py +191 -0
- contractforge_databricks/sources/rest_api.py +33 -0
- contractforge_databricks/sources/support.py +50 -0
- contractforge_databricks/sources/table_refs.py +65 -0
- contractforge_databricks/sql/__init__.py +4 -0
- contractforge_databricks/sql/identifiers.py +17 -0
- contractforge_databricks/sql/literals.py +36 -0
- contractforge_databricks/state/__init__.py +39 -0
- contractforge_databricks/state/ddl.py +24 -0
- contractforge_databricks/state/migrations.py +146 -0
- contractforge_databricks/state/queries.py +149 -0
- contractforge_databricks/state/sql.py +116 -0
- contractforge_databricks/state/tables.py +9 -0
- contractforge_databricks/state/writer.py +83 -0
- contractforge_databricks/templates/__init__.py +15 -0
- contractforge_databricks/templates/catalog.py +205 -0
- contractforge_databricks/templates/catalog_parity.py +85 -0
- contractforge_databricks/templates/core.py +83 -0
- contractforge_databricks/templates/enrichment.py +175 -0
- contractforge_databricks/transforms/__init__.py +3 -0
- contractforge_databricks/transforms/sql.py +118 -0
- contractforge_databricks/watermark/__init__.py +6 -0
- contractforge_databricks/watermark/sql.py +91 -0
- contractforge_databricks/write_modes/__init__.py +20 -0
- contractforge_databricks/write_modes/registry.py +44 -0
- contractforge_databricks/write_modes/sql.py +33 -0
- contractforge_databricks/write_modes/strategy.py +192 -0
- contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
- contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
- contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
- contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""Materialize RDS IAM JDBC tokens at adapter runtime.
|
|
2
|
+
|
|
3
|
+
The core's ``rds_iam_review_options`` only writes a placeholder password
|
|
4
|
+
(``{{rds_iam_token}}``) plus three ``contractforge.rdsIamHost / Port /
|
|
5
|
+
Region`` metadata options. At runtime the Databricks adapter is the
|
|
6
|
+
layer that has the AWS credentials and the network reach to actually
|
|
7
|
+
mint the IAM auth token, so the materialization belongs here.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import time
|
|
14
|
+
from typing import Any, Mapping
|
|
15
|
+
|
|
16
|
+
from contractforge_core.connectors import generate_rds_iam_auth_token
|
|
17
|
+
|
|
18
|
+
RDS_IAM_TOKEN_PLACEHOLDER = "{{rds_iam_token}}"
|
|
19
|
+
_HOST_OPTION = "contractforge.rdsIamHost"
|
|
20
|
+
_PORT_OPTION = "contractforge.rdsIamPort"
|
|
21
|
+
_REGION_OPTION = "contractforge.rdsIamRegion"
|
|
22
|
+
_METADATA_OPTIONS = (_HOST_OPTION, _PORT_OPTION, _REGION_OPTION)
|
|
23
|
+
_TOKEN_TTL_SECONDS = 14 * 60
|
|
24
|
+
_TOKEN_CACHE: dict[str, tuple[float, str]] = {}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def materialize_rds_iam_options(
|
|
28
|
+
options: Mapping[str, str],
|
|
29
|
+
*,
|
|
30
|
+
auth: Mapping[str, Any] | None,
|
|
31
|
+
) -> dict[str, str]:
|
|
32
|
+
"""Replace the RDS IAM placeholder with a freshly minted IAM auth token.
|
|
33
|
+
|
|
34
|
+
Returns a new options dict with the contractforge.rdsIam* metadata
|
|
35
|
+
keys removed. If the placeholder is not present, the options are
|
|
36
|
+
returned untouched so non-IAM JDBC sources are unaffected.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
options_dict = dict(options)
|
|
40
|
+
if options_dict.get("password") != RDS_IAM_TOKEN_PLACEHOLDER:
|
|
41
|
+
for key in _METADATA_OPTIONS:
|
|
42
|
+
options_dict.pop(key, None)
|
|
43
|
+
return options_dict
|
|
44
|
+
host = options_dict.pop(_HOST_OPTION, None)
|
|
45
|
+
port = options_dict.pop(_PORT_OPTION, None)
|
|
46
|
+
region = options_dict.pop(_REGION_OPTION, None)
|
|
47
|
+
if not host or not port or not region:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
"RDS IAM materialization requires contractforge.rdsIamHost,"
|
|
50
|
+
" contractforge.rdsIamPort and contractforge.rdsIamRegion"
|
|
51
|
+
" (produced by the core for source.auth.type='rds_iam')"
|
|
52
|
+
)
|
|
53
|
+
auth_dict = dict(auth or {})
|
|
54
|
+
username = str(options_dict.get("user") or auth_dict.get("username") or "")
|
|
55
|
+
if not username:
|
|
56
|
+
raise ValueError("RDS IAM auth requires a JDBC user or source.auth.username")
|
|
57
|
+
host_text = str(host)
|
|
58
|
+
port_int = int(port)
|
|
59
|
+
region_text = str(region)
|
|
60
|
+
access_key = auth_dict.get("access_key_id")
|
|
61
|
+
secret_key = auth_dict.get("secret_access_key")
|
|
62
|
+
session_token = auth_dict.get("session_token")
|
|
63
|
+
token = _cached_token(
|
|
64
|
+
host=host_text,
|
|
65
|
+
port=port_int,
|
|
66
|
+
region=region_text,
|
|
67
|
+
username=username,
|
|
68
|
+
access_key=str(access_key or ""),
|
|
69
|
+
secret_key=str(secret_key or ""),
|
|
70
|
+
session_token=str(session_token or ""),
|
|
71
|
+
)
|
|
72
|
+
if not token:
|
|
73
|
+
if access_key and secret_key:
|
|
74
|
+
token = generate_rds_iam_auth_token(
|
|
75
|
+
host=host_text,
|
|
76
|
+
port=port_int,
|
|
77
|
+
region=region_text,
|
|
78
|
+
username=username,
|
|
79
|
+
access_key=str(access_key),
|
|
80
|
+
secret_key=str(secret_key),
|
|
81
|
+
session_token=str(session_token) if session_token else None,
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
token = _boto3_rds_iam_token(host=host_text, port=port_int, region=region_text, username=username)
|
|
85
|
+
_store_token(
|
|
86
|
+
host=host_text,
|
|
87
|
+
port=port_int,
|
|
88
|
+
region=region_text,
|
|
89
|
+
username=username,
|
|
90
|
+
access_key=str(access_key or ""),
|
|
91
|
+
secret_key=str(secret_key or ""),
|
|
92
|
+
session_token=str(session_token or ""),
|
|
93
|
+
token=token,
|
|
94
|
+
)
|
|
95
|
+
options_dict["password"] = token
|
|
96
|
+
return options_dict
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _cache_key(
|
|
100
|
+
*,
|
|
101
|
+
host: str,
|
|
102
|
+
port: int,
|
|
103
|
+
region: str,
|
|
104
|
+
username: str,
|
|
105
|
+
access_key: str,
|
|
106
|
+
secret_key: str,
|
|
107
|
+
session_token: str,
|
|
108
|
+
) -> str:
|
|
109
|
+
digest = hashlib.sha256()
|
|
110
|
+
for component in (host, str(port), region, username, access_key, secret_key, session_token):
|
|
111
|
+
digest.update(component.encode("utf-8"))
|
|
112
|
+
digest.update(b"\x1f")
|
|
113
|
+
return digest.hexdigest()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _cached_token(
|
|
117
|
+
*,
|
|
118
|
+
host: str,
|
|
119
|
+
port: int,
|
|
120
|
+
region: str,
|
|
121
|
+
username: str,
|
|
122
|
+
access_key: str,
|
|
123
|
+
secret_key: str,
|
|
124
|
+
session_token: str,
|
|
125
|
+
) -> str | None:
|
|
126
|
+
key = _cache_key(
|
|
127
|
+
host=host,
|
|
128
|
+
port=port,
|
|
129
|
+
region=region,
|
|
130
|
+
username=username,
|
|
131
|
+
access_key=access_key,
|
|
132
|
+
secret_key=secret_key,
|
|
133
|
+
session_token=session_token,
|
|
134
|
+
)
|
|
135
|
+
cached = _TOKEN_CACHE.get(key)
|
|
136
|
+
if not cached:
|
|
137
|
+
return None
|
|
138
|
+
created_at, token = cached
|
|
139
|
+
if time.time() - created_at >= _TOKEN_TTL_SECONDS:
|
|
140
|
+
_TOKEN_CACHE.pop(key, None)
|
|
141
|
+
return None
|
|
142
|
+
return token
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _store_token(
|
|
146
|
+
*,
|
|
147
|
+
host: str,
|
|
148
|
+
port: int,
|
|
149
|
+
region: str,
|
|
150
|
+
username: str,
|
|
151
|
+
access_key: str,
|
|
152
|
+
secret_key: str,
|
|
153
|
+
session_token: str,
|
|
154
|
+
token: str,
|
|
155
|
+
) -> None:
|
|
156
|
+
key = _cache_key(
|
|
157
|
+
host=host,
|
|
158
|
+
port=port,
|
|
159
|
+
region=region,
|
|
160
|
+
username=username,
|
|
161
|
+
access_key=access_key,
|
|
162
|
+
secret_key=secret_key,
|
|
163
|
+
session_token=session_token,
|
|
164
|
+
)
|
|
165
|
+
_TOKEN_CACHE[key] = (time.time(), token)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _boto3_rds_iam_token(*, host: str, port: int, region: str, username: str) -> str:
|
|
169
|
+
try:
|
|
170
|
+
import boto3 # type: ignore[import-not-found]
|
|
171
|
+
except Exception as exc: # pragma: no cover - depends on runtime image
|
|
172
|
+
raise ValueError(
|
|
173
|
+
"RDS IAM auth requires either source.auth.access_key_id/source.auth.secret_access_key "
|
|
174
|
+
"or Databricks cluster AWS credentials with boto3 available"
|
|
175
|
+
) from exc
|
|
176
|
+
try:
|
|
177
|
+
client = boto3.Session(region_name=region).client("rds", region_name=region)
|
|
178
|
+
return str(
|
|
179
|
+
client.generate_db_auth_token(
|
|
180
|
+
DBHostname=host,
|
|
181
|
+
Port=port,
|
|
182
|
+
DBUsername=username,
|
|
183
|
+
Region=region,
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
except Exception as exc:
|
|
187
|
+
raise ValueError(
|
|
188
|
+
"RDS IAM auth could not generate a token from Databricks cluster AWS credentials; "
|
|
189
|
+
"configure an instance profile/default AWS credential chain or declare "
|
|
190
|
+
"source.auth.access_key_id/source.auth.secret_access_key as secret placeholders"
|
|
191
|
+
) from exc
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Databricks review planning for generic REST API connector contracts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from contractforge_core.connectors import is_rest_api_connector as is_rest_api_connector, rest_api_descriptor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def render_rest_api_review_plan(source: dict[str, Any]) -> str:
|
|
12
|
+
descriptor = rest_api_descriptor(source, redaction="***REDACTED***")
|
|
13
|
+
payload = {
|
|
14
|
+
"kind": "databricks_rest_api_review_plan",
|
|
15
|
+
**descriptor,
|
|
16
|
+
"recommended_databricks_targets": _recommended_targets(source, descriptor["pagination"]),
|
|
17
|
+
"notes": [
|
|
18
|
+
"Generic paginated REST API ingestion is not rendered as a custom Databricks Python API client by default.",
|
|
19
|
+
"For bounded public files, prefer portable http_file/http_json/http_csv source types.",
|
|
20
|
+
"For proprietary SaaS APIs, prefer native_passthrough so the adapter can target Lakeflow Connect or Databricks Connections.",
|
|
21
|
+
"If no native connector exists, execute a reviewed landing step outside the semantic core and ingest landed files with incremental_files.",
|
|
22
|
+
],
|
|
23
|
+
}
|
|
24
|
+
return json.dumps(payload, indent=2, sort_keys=True) + "\n"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _recommended_targets(source: dict[str, Any], pagination: dict[str, Any]) -> list[str]:
|
|
28
|
+
if not pagination or pagination.get("type") in {None, "none"}:
|
|
29
|
+
return ["http_file_if_bounded", "databricks_connection"]
|
|
30
|
+
system = str(source.get("provider") or source.get("name") or "").lower()
|
|
31
|
+
if system in {"salesforce", "workday", "servicenow"}:
|
|
32
|
+
return ["lakeflow_connect", "native_passthrough"]
|
|
33
|
+
return ["native_passthrough", "land_to_object_storage_then_incremental_files"]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Databricks source connector support declarations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_databricks.sources.classification import (
|
|
8
|
+
DatabricksSourceClassification,
|
|
9
|
+
classify_databricks_source,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def databricks_source_support(source: dict[str, Any] | str) -> dict[str, Any]:
|
|
14
|
+
"""Return Databricks support metadata for a source connector.
|
|
15
|
+
|
|
16
|
+
This is adapter-owned documentation data. It does not affect core planning
|
|
17
|
+
and does not make Databricks names portable.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
return _entry(classify_databricks_source(source))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def list_databricks_source_support() -> tuple[dict[str, Any], ...]:
|
|
24
|
+
sources = (
|
|
25
|
+
"table",
|
|
26
|
+
"sql",
|
|
27
|
+
"csv",
|
|
28
|
+
"s3",
|
|
29
|
+
"incremental_files",
|
|
30
|
+
"http_json",
|
|
31
|
+
"rest_api",
|
|
32
|
+
"jdbc",
|
|
33
|
+
"kafka_bounded",
|
|
34
|
+
"eventhubs_bounded",
|
|
35
|
+
"delta_share",
|
|
36
|
+
"native_passthrough",
|
|
37
|
+
)
|
|
38
|
+
return tuple(databricks_source_support(source) for source in sources)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _entry(classification: DatabricksSourceClassification) -> dict[str, Any]:
|
|
42
|
+
entry: dict[str, Any] = {
|
|
43
|
+
"adapter": "databricks",
|
|
44
|
+
"source_type": classification.source_type,
|
|
45
|
+
"status": classification.status,
|
|
46
|
+
"note": classification.note,
|
|
47
|
+
}
|
|
48
|
+
if classification.native_mapping:
|
|
49
|
+
entry["native_mapping"] = classification.native_mapping
|
|
50
|
+
return entry
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Databricks resolution for core logical table references."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import replace
|
|
6
|
+
|
|
7
|
+
from contractforge_core.connectors import catalog_source_query, source_logical_table_reference
|
|
8
|
+
from contractforge_core.connectors import LogicalTableReference
|
|
9
|
+
from contractforge_core.semantic import SemanticContract
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def databricks_table_ref_resolver(contract: SemanticContract):
|
|
13
|
+
"""Return a resolver that maps logical refs to Databricks table names."""
|
|
14
|
+
|
|
15
|
+
def _resolve(ref: LogicalTableReference) -> str:
|
|
16
|
+
if ref.catalog and ref.schema:
|
|
17
|
+
return f"{ref.catalog}.{ref.schema}.{ref.table}"
|
|
18
|
+
if ref.schema:
|
|
19
|
+
catalog = _target_catalog(contract)
|
|
20
|
+
return f"{catalog}.{ref.schema}.{ref.table}" if catalog else f"{ref.schema}.{ref.table}"
|
|
21
|
+
namespace = _namespace_for_layer(contract, ref.layer)
|
|
22
|
+
return f"{namespace}.{ref.table}" if namespace else ref.table
|
|
23
|
+
|
|
24
|
+
return _resolve
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def resolve_databricks_source_table_refs(contract: SemanticContract) -> dict | None:
|
|
28
|
+
"""Return source.raw with logical refs resolved to Databricks table names."""
|
|
29
|
+
|
|
30
|
+
if not contract.source.raw:
|
|
31
|
+
return None
|
|
32
|
+
source = dict(contract.source.raw)
|
|
33
|
+
resolver = databricks_table_ref_resolver(contract)
|
|
34
|
+
table_ref = source_logical_table_reference(source)
|
|
35
|
+
if table_ref is not None:
|
|
36
|
+
source["table"] = resolver(table_ref)
|
|
37
|
+
source.pop("ref", None)
|
|
38
|
+
source.pop("table_ref", None)
|
|
39
|
+
if source.get("query"):
|
|
40
|
+
source["query"] = catalog_source_query(source, table_ref_resolver=resolver)
|
|
41
|
+
return source
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def contract_with_databricks_source_refs(contract: SemanticContract) -> SemanticContract:
|
|
45
|
+
source = resolve_databricks_source_table_refs(contract)
|
|
46
|
+
if source is None:
|
|
47
|
+
return contract
|
|
48
|
+
return replace(contract, source=replace(contract.source, raw=source))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _namespace_for_layer(contract: SemanticContract, layer: str) -> str | None:
|
|
52
|
+
namespace = contract.target.namespace
|
|
53
|
+
if not namespace:
|
|
54
|
+
return None
|
|
55
|
+
current_layer = contract.target.layer
|
|
56
|
+
if namespace.endswith(f"_{current_layer}"):
|
|
57
|
+
return f"{namespace[: -(len(current_layer) + 1)]}_{layer}"
|
|
58
|
+
return namespace
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _target_catalog(contract: SemanticContract) -> str | None:
|
|
62
|
+
namespace = contract.target.namespace
|
|
63
|
+
if not namespace or "." not in namespace:
|
|
64
|
+
return None
|
|
65
|
+
return namespace.split(".", 1)[0]
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
from contractforge_databricks.sql.identifiers import quote_identifier, quote_table_name
|
|
2
|
+
from contractforge_databricks.sql.literals import sql_int, sql_json, sql_literal, sql_string
|
|
3
|
+
|
|
4
|
+
__all__ = ["quote_identifier", "quote_table_name", "sql_int", "sql_json", "sql_literal", "sql_string"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Databricks SQL identifier helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def quote_identifier(identifier: str) -> str:
|
|
7
|
+
if not identifier or not identifier.strip():
|
|
8
|
+
raise ValueError("identifier must not be empty")
|
|
9
|
+
return f"`{identifier.replace('`', '``')}`"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def quote_table_name(table_name: str) -> str:
|
|
13
|
+
parts = [part.strip() for part in table_name.split(".") if part.strip()]
|
|
14
|
+
if not parts:
|
|
15
|
+
raise ValueError("table name must not be empty")
|
|
16
|
+
return ".".join(quote_identifier(part) for part in parts)
|
|
17
|
+
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Databricks SQL literal helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def sql_string(value: object) -> str:
|
|
10
|
+
if value is None:
|
|
11
|
+
return "NULL"
|
|
12
|
+
return "'" + str(value).replace("'", "''") + "'"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def sql_literal(value: Any) -> str:
|
|
16
|
+
if value is None:
|
|
17
|
+
return "NULL"
|
|
18
|
+
if isinstance(value, bool):
|
|
19
|
+
return "true" if value else "false"
|
|
20
|
+
if isinstance(value, int) and not isinstance(value, bool):
|
|
21
|
+
return str(value)
|
|
22
|
+
if isinstance(value, float):
|
|
23
|
+
return str(value)
|
|
24
|
+
return sql_string(value)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def sql_int(value: int | None) -> str:
|
|
28
|
+
return "NULL" if value is None else str(int(value))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def sql_json(value: Any) -> str:
|
|
32
|
+
try:
|
|
33
|
+
payload = json.dumps(value, default=str, ensure_ascii=False, sort_keys=True)
|
|
34
|
+
except Exception:
|
|
35
|
+
payload = json.dumps(str(value), ensure_ascii=False)
|
|
36
|
+
return sql_string(payload)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from contractforge_databricks.state.ddl import render_create_state_tables_sql
|
|
2
|
+
from contractforge_databricks.state.migrations import (
|
|
3
|
+
control_table_additive_migrations,
|
|
4
|
+
render_control_table_migrations_sql,
|
|
5
|
+
)
|
|
6
|
+
from contractforge_databricks.state.queries import (
|
|
7
|
+
render_control_metadata_current_sql,
|
|
8
|
+
render_find_idempotent_run_sql,
|
|
9
|
+
render_find_idempotent_stream_sql,
|
|
10
|
+
render_has_successful_run_sql,
|
|
11
|
+
render_lock_status_sql,
|
|
12
|
+
render_record_control_metadata_sql,
|
|
13
|
+
render_select_previous_watermark_sql,
|
|
14
|
+
)
|
|
15
|
+
from contractforge_databricks.state.sql import (
|
|
16
|
+
render_acquire_lock_sql,
|
|
17
|
+
render_release_lock_sql,
|
|
18
|
+
render_upsert_state_sql,
|
|
19
|
+
)
|
|
20
|
+
from contractforge_databricks.state.tables import state_table_names
|
|
21
|
+
from contractforge_databricks.state.writer import StateWriter
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"StateWriter",
|
|
25
|
+
"control_table_additive_migrations",
|
|
26
|
+
"render_acquire_lock_sql",
|
|
27
|
+
"render_control_metadata_current_sql",
|
|
28
|
+
"render_control_table_migrations_sql",
|
|
29
|
+
"render_create_state_tables_sql",
|
|
30
|
+
"render_find_idempotent_run_sql",
|
|
31
|
+
"render_find_idempotent_stream_sql",
|
|
32
|
+
"render_has_successful_run_sql",
|
|
33
|
+
"render_lock_status_sql",
|
|
34
|
+
"render_record_control_metadata_sql",
|
|
35
|
+
"render_release_lock_sql",
|
|
36
|
+
"render_select_previous_watermark_sql",
|
|
37
|
+
"render_upsert_state_sql",
|
|
38
|
+
"state_table_names",
|
|
39
|
+
]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Databricks Delta DDL for operational state tables."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contractforge_core.evidence import STATE_TABLE_SCHEMAS
|
|
6
|
+
from contractforge_databricks.sql import quote_table_name
|
|
7
|
+
from contractforge_databricks.state.tables import state_table_names
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def render_create_state_tables_sql(*, catalog: str = "main", schema: str = "ops") -> str:
|
|
11
|
+
names = state_table_names(catalog, schema)
|
|
12
|
+
statements = [f"CREATE SCHEMA IF NOT EXISTS {quote_table_name(f'{catalog}.{schema}')};"]
|
|
13
|
+
for name, table in names.items():
|
|
14
|
+
statements.append(
|
|
15
|
+
"\n".join(
|
|
16
|
+
[
|
|
17
|
+
f"CREATE TABLE IF NOT EXISTS {quote_table_name(table)} (",
|
|
18
|
+
f" {STATE_TABLE_SCHEMAS[name]}",
|
|
19
|
+
")",
|
|
20
|
+
"USING DELTA;",
|
|
21
|
+
]
|
|
22
|
+
)
|
|
23
|
+
)
|
|
24
|
+
return "\n\n".join(statements) + "\n"
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""Additive Databricks control-table migration planning."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from contractforge_databricks.evidence.tables import evidence_table_names
|
|
6
|
+
from contractforge_databricks.sql import quote_identifier, quote_table_name
|
|
7
|
+
from contractforge_databricks.state.tables import state_table_names
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
EVIDENCE_ADDITIVE_COLUMNS: dict[str, dict[str, str]] = {
|
|
11
|
+
"runs": {
|
|
12
|
+
"idempotency_key": "STRING",
|
|
13
|
+
"idempotency_policy": "STRING",
|
|
14
|
+
"skip_reason": "STRING",
|
|
15
|
+
"skipped_by_run_id": "STRING",
|
|
16
|
+
"metrics_source": "STRING",
|
|
17
|
+
"framework_version": "STRING",
|
|
18
|
+
"ctrl_schema_version": "BIGINT",
|
|
19
|
+
"runtime_type": "STRING",
|
|
20
|
+
"engine_version": "STRING",
|
|
21
|
+
"python_version": "STRING",
|
|
22
|
+
"stage_durations_json": "STRING",
|
|
23
|
+
"contract_description": "STRING",
|
|
24
|
+
"contract_owner": "STRING",
|
|
25
|
+
"contract_domain": "STRING",
|
|
26
|
+
"contract_tags_json": "STRING",
|
|
27
|
+
"contract_sla": "STRING",
|
|
28
|
+
"runtime_parameters_json": "STRING",
|
|
29
|
+
"annotations_status": "STRING",
|
|
30
|
+
"annotations_result_json": "STRING",
|
|
31
|
+
"ownership_json": "STRING",
|
|
32
|
+
"operations_json": "STRING",
|
|
33
|
+
"source_type": "STRING",
|
|
34
|
+
"source_connector": "STRING",
|
|
35
|
+
"source_name": "STRING",
|
|
36
|
+
"source_provider": "STRING",
|
|
37
|
+
"source_format": "STRING",
|
|
38
|
+
"source_path": "STRING",
|
|
39
|
+
"source_options_json": "STRING",
|
|
40
|
+
"source_read_json": "STRING",
|
|
41
|
+
"source_request_json": "STRING",
|
|
42
|
+
"source_auth_json": "STRING",
|
|
43
|
+
"source_pagination_json": "STRING",
|
|
44
|
+
"source_response_json": "STRING",
|
|
45
|
+
"source_incremental_json": "STRING",
|
|
46
|
+
"source_limits_json": "STRING",
|
|
47
|
+
"source_capabilities_json": "STRING",
|
|
48
|
+
"source_metrics_json": "STRING",
|
|
49
|
+
"source_system": "STRING",
|
|
50
|
+
"write_engine_requested": "STRING",
|
|
51
|
+
"write_engine_selected": "STRING",
|
|
52
|
+
"write_engine_status": "STRING",
|
|
53
|
+
"write_engine_reason": "STRING",
|
|
54
|
+
"write_engine_fallback_policy": "STRING",
|
|
55
|
+
"write_started_at_utc": "TIMESTAMP",
|
|
56
|
+
"write_finished_at_utc": "TIMESTAMP",
|
|
57
|
+
"table_version_before": "STRING",
|
|
58
|
+
"table_version_after": "STRING",
|
|
59
|
+
"write_committed": "BOOLEAN",
|
|
60
|
+
"parent_run_id": "STRING",
|
|
61
|
+
"run_group_id": "STRING",
|
|
62
|
+
"master_job_id": "STRING",
|
|
63
|
+
"master_run_id": "STRING",
|
|
64
|
+
"rows_expired": "BIGINT",
|
|
65
|
+
"metrics_json": "STRING",
|
|
66
|
+
},
|
|
67
|
+
"errors": {"error_class": "STRING", "occurred_at_utc": "TIMESTAMP"},
|
|
68
|
+
"quality": {"severity": "STRING", "observed_value": "STRING", "message": "STRING"},
|
|
69
|
+
"quarantine": {"record_ref": "STRING", "reason": "STRING"},
|
|
70
|
+
"schema_changes": {"payload_json": "STRING", "changed_at_utc": "TIMESTAMP"},
|
|
71
|
+
"lineage": {"source_name": "STRING"},
|
|
72
|
+
"metadata": {
|
|
73
|
+
"run_id": "STRING",
|
|
74
|
+
"target_table": "STRING",
|
|
75
|
+
"source_metadata_json": "STRING",
|
|
76
|
+
"captured_at_utc": "TIMESTAMP",
|
|
77
|
+
},
|
|
78
|
+
"streams": {
|
|
79
|
+
"run_id": "STRING",
|
|
80
|
+
"batch_id": "STRING",
|
|
81
|
+
"batch_metrics_json": "STRING",
|
|
82
|
+
"captured_at_utc": "TIMESTAMP",
|
|
83
|
+
},
|
|
84
|
+
"annotations": {"previous_value": "STRING", "annotation_date": "DATE"},
|
|
85
|
+
"operations": {"ownership_json": "STRING"},
|
|
86
|
+
"access": {
|
|
87
|
+
"access_run_id": "STRING",
|
|
88
|
+
"action": "STRING",
|
|
89
|
+
"column_name": "STRING",
|
|
90
|
+
"function_name": "STRING",
|
|
91
|
+
"new_value": "STRING",
|
|
92
|
+
"mode": "STRING",
|
|
93
|
+
"drift_policy": "STRING",
|
|
94
|
+
"revoke_unmanaged": "BOOLEAN",
|
|
95
|
+
"access_date": "DATE",
|
|
96
|
+
"payload_json": "STRING",
|
|
97
|
+
"applied_at_utc": "TIMESTAMP",
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
STATE_ADDITIVE_COLUMNS: dict[str, dict[str, str]] = {
|
|
102
|
+
"state": {
|
|
103
|
+
"parent_run_id": "STRING",
|
|
104
|
+
"run_group_id": "STRING",
|
|
105
|
+
"master_job_id": "STRING",
|
|
106
|
+
"master_run_id": "STRING",
|
|
107
|
+
"last_table_version": "STRING",
|
|
108
|
+
"last_write_completed_at_utc": "TIMESTAMP",
|
|
109
|
+
"last_watermark_candidate": "STRING",
|
|
110
|
+
},
|
|
111
|
+
"locks": {
|
|
112
|
+
"owner": "STRING",
|
|
113
|
+
"ttl_minutes": "BIGINT",
|
|
114
|
+
"released_at_utc": "TIMESTAMP",
|
|
115
|
+
},
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def control_table_additive_migrations(*, catalog: str = "main", schema: str = "ops") -> dict[str, dict[str, str]]:
|
|
120
|
+
tables = {**evidence_table_names(catalog, schema), **state_table_names(catalog, schema)}
|
|
121
|
+
migrations: dict[str, dict[str, str]] = {}
|
|
122
|
+
for key, columns in {**EVIDENCE_ADDITIVE_COLUMNS, **STATE_ADDITIVE_COLUMNS}.items():
|
|
123
|
+
migrations[tables[key]] = dict(columns)
|
|
124
|
+
return migrations
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def render_control_table_migrations_sql(*, catalog: str = "main", schema: str = "ops") -> str:
|
|
128
|
+
lines = [
|
|
129
|
+
"-- Databricks control-table additive migrations.",
|
|
130
|
+
"-- Review existing schemas before execution; apply only columns that are missing.",
|
|
131
|
+
"",
|
|
132
|
+
]
|
|
133
|
+
for table, columns in control_table_additive_migrations(catalog=catalog, schema=schema).items():
|
|
134
|
+
rendered_columns = ",\n ".join(
|
|
135
|
+
f"{quote_identifier(column)} {column_type}" for column, column_type in columns.items()
|
|
136
|
+
)
|
|
137
|
+
lines.extend(
|
|
138
|
+
[
|
|
139
|
+
f"-- {table}",
|
|
140
|
+
f"ALTER TABLE {quote_table_name(table)} ADD COLUMNS (",
|
|
141
|
+
f" {rendered_columns}",
|
|
142
|
+
");",
|
|
143
|
+
"",
|
|
144
|
+
]
|
|
145
|
+
)
|
|
146
|
+
return "\n".join(lines).rstrip() + "\n"
|