contractforge-databricks 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contractforge_databricks/__init__.py +172 -0
- contractforge_databricks/adapter.py +69 -0
- contractforge_databricks/annotations/__init__.py +10 -0
- contractforge_databricks/annotations/application.py +52 -0
- contractforge_databricks/annotations/audit.py +49 -0
- contractforge_databricks/annotations/sql.py +142 -0
- contractforge_databricks/api.py +65 -0
- contractforge_databricks/bundles/__init__.py +9 -0
- contractforge_databricks/bundles/assets.py +47 -0
- contractforge_databricks/bundles/project.py +213 -0
- contractforge_databricks/bundles/project_config.py +133 -0
- contractforge_databricks/capabilities/__init__.py +17 -0
- contractforge_databricks/capabilities/builders.py +43 -0
- contractforge_databricks/capabilities/evaluate.py +162 -0
- contractforge_databricks/capabilities/mapping.py +36 -0
- contractforge_databricks/capabilities/models.py +44 -0
- contractforge_databricks/capabilities/runtime.py +111 -0
- contractforge_databricks/capabilities/uc.py +47 -0
- contractforge_databricks/cli.py +196 -0
- contractforge_databricks/cli_deploy.py +98 -0
- contractforge_databricks/cli_governance.py +142 -0
- contractforge_databricks/cli_io.py +91 -0
- contractforge_databricks/cli_maintenance.py +69 -0
- contractforge_databricks/coercion.py +31 -0
- contractforge_databricks/contract_extensions.py +70 -0
- contractforge_databricks/cost/__init__.py +11 -0
- contractforge_databricks/cost/model.py +22 -0
- contractforge_databricks/cost/report.py +65 -0
- contractforge_databricks/cost/sql.py +136 -0
- contractforge_databricks/dashboards/__init__.py +15 -0
- contractforge_databricks/dashboards/control_tables.py +150 -0
- contractforge_databricks/diagnostics/__init__.py +7 -0
- contractforge_databricks/diagnostics/explain.py +40 -0
- contractforge_databricks/environment.py +53 -0
- contractforge_databricks/evidence/__init__.py +98 -0
- contractforge_databricks/evidence/ddl.py +35 -0
- contractforge_databricks/evidence/governance_log.py +175 -0
- contractforge_databricks/evidence/helpers.py +29 -0
- contractforge_databricks/evidence/ops_log.py +210 -0
- contractforge_databricks/evidence/records.py +27 -0
- contractforge_databricks/evidence/run_log.py +74 -0
- contractforge_databricks/evidence/schemas.py +7 -0
- contractforge_databricks/evidence/sql.py +144 -0
- contractforge_databricks/evidence/tables.py +20 -0
- contractforge_databricks/evidence/writer.py +118 -0
- contractforge_databricks/execution/__init__.py +70 -0
- contractforge_databricks/execution/delta_basic.py +57 -0
- contractforge_databricks/execution/hash_diff.py +126 -0
- contractforge_databricks/execution/hash_diff_latest.py +142 -0
- contractforge_databricks/execution/replace_partitions.py +40 -0
- contractforge_databricks/execution/results.py +5 -0
- contractforge_databricks/execution/retry.py +36 -0
- contractforge_databricks/execution/scd2.py +213 -0
- contractforge_databricks/execution/scd2_deletes.py +65 -0
- contractforge_databricks/execution/scd2_late.py +30 -0
- contractforge_databricks/execution/snapshot.py +77 -0
- contractforge_databricks/execution/sql_merge.py +85 -0
- contractforge_databricks/execution/tables.py +98 -0
- contractforge_databricks/execution/windows.py +58 -0
- contractforge_databricks/governance/__init__.py +30 -0
- contractforge_databricks/governance/access.py +185 -0
- contractforge_databricks/governance/application.py +93 -0
- contractforge_databricks/governance/drift.py +49 -0
- contractforge_databricks/governance/runtime.py +60 -0
- contractforge_databricks/governance/sql.py +31 -0
- contractforge_databricks/governance/validation.py +135 -0
- contractforge_databricks/lakeflow/__init__.py +21 -0
- contractforge_databricks/lakeflow/compatibility.py +194 -0
- contractforge_databricks/lakeflow/rendering.py +175 -0
- contractforge_databricks/lineage/__init__.py +7 -0
- contractforge_databricks/lineage/openlineage.py +182 -0
- contractforge_databricks/maintenance/__init__.py +27 -0
- contractforge_databricks/maintenance/retention.py +90 -0
- contractforge_databricks/maintenance/sql.py +68 -0
- contractforge_databricks/metrics/__init__.py +19 -0
- contractforge_databricks/metrics/history.py +21 -0
- contractforge_databricks/metrics/write.py +63 -0
- contractforge_databricks/operations/__init__.py +4 -0
- contractforge_databricks/operations/application.py +38 -0
- contractforge_databricks/operations/sql.py +95 -0
- contractforge_databricks/parity/__init__.py +18 -0
- contractforge_databricks/parity/catalog.py +59 -0
- contractforge_databricks/parity/models.py +7 -0
- contractforge_databricks/parity/scenarios.py +111 -0
- contractforge_databricks/partitioning/__init__.py +3 -0
- contractforge_databricks/partitioning/predicates.py +28 -0
- contractforge_databricks/preparation/__init__.py +47 -0
- contractforge_databricks/preparation/deduplicate.py +87 -0
- contractforge_databricks/preparation/encoding.py +37 -0
- contractforge_databricks/preparation/hashing.py +18 -0
- contractforge_databricks/preparation/pyspark.py +178 -0
- contractforge_databricks/preparation/pyspark_staging.py +70 -0
- contractforge_databricks/preparation/shape.py +209 -0
- contractforge_databricks/preparation/shape_validation.py +94 -0
- contractforge_databricks/preparation/staging.py +17 -0
- contractforge_databricks/preparation/zip_arrays.py +51 -0
- contractforge_databricks/presets/__init__.py +3 -0
- contractforge_databricks/presets/base.py +24 -0
- contractforge_databricks/presets/bronze.py +57 -0
- contractforge_databricks/presets/catalog.py +22 -0
- contractforge_databricks/presets/core.py +134 -0
- contractforge_databricks/presets/gold.py +62 -0
- contractforge_databricks/presets/modifiers.py +51 -0
- contractforge_databricks/presets/runtime.py +22 -0
- contractforge_databricks/presets/silver.py +101 -0
- contractforge_databricks/presets/write_engine.py +57 -0
- contractforge_databricks/quality/__init__.py +41 -0
- contractforge_databricks/quality/evaluation.py +178 -0
- contractforge_databricks/quality/persistence.py +81 -0
- contractforge_databricks/quality/registry.py +134 -0
- contractforge_databricks/quality/results.py +17 -0
- contractforge_databricks/quality/sql.py +113 -0
- contractforge_databricks/rendering/__init__.py +11 -0
- contractforge_databricks/rendering/bundle.py +93 -0
- contractforge_databricks/rendering/markdown.py +50 -0
- contractforge_databricks/rendering/names.py +56 -0
- contractforge_databricks/results.py +15 -0
- contractforge_databricks/runtime/__init__.py +101 -0
- contractforge_databricks/runtime/available_now.py +147 -0
- contractforge_databricks/runtime/bundles.py +211 -0
- contractforge_databricks/runtime/cache.py +20 -0
- contractforge_databricks/runtime/control_tables.py +19 -0
- contractforge_databricks/runtime/deploy.py +197 -0
- contractforge_databricks/runtime/detection.py +114 -0
- contractforge_databricks/runtime/dry_run.py +46 -0
- contractforge_databricks/runtime/errors.py +54 -0
- contractforge_databricks/runtime/file_selection.py +109 -0
- contractforge_databricks/runtime/finalization.py +168 -0
- contractforge_databricks/runtime/governance.py +37 -0
- contractforge_databricks/runtime/hooks.py +45 -0
- contractforge_databricks/runtime/http_file.py +37 -0
- contractforge_databricks/runtime/http_retry.py +15 -0
- contractforge_databricks/runtime/http_safety.py +9 -0
- contractforge_databricks/runtime/json_materialization.py +97 -0
- contractforge_databricks/runtime/lineage.py +164 -0
- contractforge_databricks/runtime/maintenance.py +43 -0
- contractforge_databricks/runtime/merge_validation.py +98 -0
- contractforge_databricks/runtime/metadata.py +21 -0
- contractforge_databricks/runtime/metrics.py +34 -0
- contractforge_databricks/runtime/models.py +32 -0
- contractforge_databricks/runtime/options.py +33 -0
- contractforge_databricks/runtime/orchestration_context.py +185 -0
- contractforge_databricks/runtime/orchestrator.py +147 -0
- contractforge_databricks/runtime/partitioning.py +93 -0
- contractforge_databricks/runtime/quality_quarantine.py +92 -0
- contractforge_databricks/runtime/rest_api.py +46 -0
- contractforge_databricks/runtime/rest_auth.py +21 -0
- contractforge_databricks/runtime/rest_pagination.py +21 -0
- contractforge_databricks/runtime/run_payload.py +177 -0
- contractforge_databricks/runtime/schema.py +106 -0
- contractforge_databricks/runtime/source_metadata.py +30 -0
- contractforge_databricks/runtime/source_registry.py +43 -0
- contractforge_databricks/runtime/source_schema.py +24 -0
- contractforge_databricks/runtime/sources.py +208 -0
- contractforge_databricks/runtime/spark.py +183 -0
- contractforge_databricks/runtime/spark_defaults.py +35 -0
- contractforge_databricks/runtime/storage_auth.py +132 -0
- contractforge_databricks/runtime/streaming.py +131 -0
- contractforge_databricks/runtime/success.py +104 -0
- contractforge_databricks/runtime/utils.py +52 -0
- contractforge_databricks/runtime/watermark.py +71 -0
- contractforge_databricks/runtime/windows.py +184 -0
- contractforge_databricks/runtime/write.py +66 -0
- contractforge_databricks/runtime/write_flow.py +146 -0
- contractforge_databricks/runtime/write_strategy.py +40 -0
- contractforge_databricks/schema/__init__.py +21 -0
- contractforge_databricks/schema/diff.py +11 -0
- contractforge_databricks/schema/policy.py +33 -0
- contractforge_databricks/schema/sync.py +23 -0
- contractforge_databricks/security/__init__.py +21 -0
- contractforge_databricks/security/errors.py +5 -0
- contractforge_databricks/security/redaction.py +5 -0
- contractforge_databricks/security/secrets.py +114 -0
- contractforge_databricks/security/source_policy.py +17 -0
- contractforge_databricks/shapes/__init__.py +3 -0
- contractforge_databricks/shapes/sql.py +123 -0
- contractforge_databricks/sources/__init__.py +67 -0
- contractforge_databricks/sources/artifacts.py +100 -0
- contractforge_databricks/sources/autoloader.py +48 -0
- contractforge_databricks/sources/bounded_streams.py +44 -0
- contractforge_databricks/sources/classification.py +115 -0
- contractforge_databricks/sources/delta_share.py +21 -0
- contractforge_databricks/sources/files.py +48 -0
- contractforge_databricks/sources/http_file.py +46 -0
- contractforge_databricks/sources/interpret.py +76 -0
- contractforge_databricks/sources/jdbc.py +32 -0
- contractforge_databricks/sources/metadata.py +18 -0
- contractforge_databricks/sources/native_passthrough.py +33 -0
- contractforge_databricks/sources/rds_iam.py +15 -0
- contractforge_databricks/sources/rds_iam_runtime.py +191 -0
- contractforge_databricks/sources/rest_api.py +33 -0
- contractforge_databricks/sources/support.py +50 -0
- contractforge_databricks/sources/table_refs.py +65 -0
- contractforge_databricks/sql/__init__.py +4 -0
- contractforge_databricks/sql/identifiers.py +17 -0
- contractforge_databricks/sql/literals.py +36 -0
- contractforge_databricks/state/__init__.py +39 -0
- contractforge_databricks/state/ddl.py +24 -0
- contractforge_databricks/state/migrations.py +146 -0
- contractforge_databricks/state/queries.py +149 -0
- contractforge_databricks/state/sql.py +116 -0
- contractforge_databricks/state/tables.py +9 -0
- contractforge_databricks/state/writer.py +83 -0
- contractforge_databricks/templates/__init__.py +15 -0
- contractforge_databricks/templates/catalog.py +205 -0
- contractforge_databricks/templates/catalog_parity.py +85 -0
- contractforge_databricks/templates/core.py +83 -0
- contractforge_databricks/templates/enrichment.py +175 -0
- contractforge_databricks/transforms/__init__.py +3 -0
- contractforge_databricks/transforms/sql.py +118 -0
- contractforge_databricks/watermark/__init__.py +6 -0
- contractforge_databricks/watermark/sql.py +91 -0
- contractforge_databricks/write_modes/__init__.py +20 -0
- contractforge_databricks/write_modes/registry.py +44 -0
- contractforge_databricks/write_modes/sql.py +33 -0
- contractforge_databricks/write_modes/strategy.py +192 -0
- contractforge_databricks-0.1.0.dist-info/METADATA +34 -0
- contractforge_databricks-0.1.0.dist-info/RECORD +220 -0
- contractforge_databricks-0.1.0.dist-info/WHEEL +4 -0
- contractforge_databricks-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Source artifact routing for Databricks bundles."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.connectors import JDBC_CONNECTORS
|
|
8
|
+
from contractforge_core.semantic import SemanticContract
|
|
9
|
+
from contractforge_databricks.environment import DatabricksEnvironment
|
|
10
|
+
from contractforge_databricks.rendering.names import artifact_prefix
|
|
11
|
+
from contractforge_databricks.sources.autoloader import render_autoloader_python
|
|
12
|
+
from contractforge_databricks.sources.bounded_streams import is_bounded_stream_source, render_bounded_stream_python
|
|
13
|
+
from contractforge_databricks.sources.delta_share import is_delta_share_source, render_delta_share_python
|
|
14
|
+
from contractforge_databricks.sources.files import (
|
|
15
|
+
is_catalog_source,
|
|
16
|
+
is_file_source,
|
|
17
|
+
render_catalog_source_python,
|
|
18
|
+
render_file_source_python,
|
|
19
|
+
)
|
|
20
|
+
from contractforge_databricks.sources.http_file import is_http_file_source, render_http_file_python
|
|
21
|
+
from contractforge_databricks.sources.interpret import interpret_incremental_files_source, is_incremental_file_source
|
|
22
|
+
from contractforge_databricks.sources.jdbc import render_jdbc_python
|
|
23
|
+
from contractforge_databricks.sources.native_passthrough import render_native_passthrough_plan
|
|
24
|
+
from contractforge_databricks.sources.rest_api import is_rest_api_connector, render_rest_api_review_plan
|
|
25
|
+
from contractforge_databricks.sources.table_refs import contract_with_databricks_source_refs
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def render_source_artifacts(
|
|
29
|
+
contract: SemanticContract,
|
|
30
|
+
*,
|
|
31
|
+
environment: DatabricksEnvironment | None = None,
|
|
32
|
+
) -> dict[str, str]:
|
|
33
|
+
if not contract.source.raw:
|
|
34
|
+
return {}
|
|
35
|
+
runtime_contract = contract_with_databricks_source_refs(contract)
|
|
36
|
+
source = runtime_contract.source.raw or {}
|
|
37
|
+
prefix = artifact_prefix(contract)
|
|
38
|
+
artifacts: dict[str, str] = {}
|
|
39
|
+
if is_incremental_file_source(source):
|
|
40
|
+
artifacts[f"{prefix}.source_autoloader.py"] = render_autoloader_python(
|
|
41
|
+
interpret_incremental_files_source(source, environment=environment)
|
|
42
|
+
)
|
|
43
|
+
if _is_jdbc_source(source):
|
|
44
|
+
artifacts[f"{prefix}.source_jdbc.py"] = render_jdbc_python(source)
|
|
45
|
+
if _can_render_file_source(source):
|
|
46
|
+
artifacts[f"{prefix}.source_files.py"] = render_file_source_python(source)
|
|
47
|
+
if _can_render_catalog_source(source):
|
|
48
|
+
artifacts[f"{prefix}.source_catalog.py"] = render_catalog_source_python(
|
|
49
|
+
source,
|
|
50
|
+
)
|
|
51
|
+
if _can_render_http_file_source(source):
|
|
52
|
+
artifacts[f"{prefix}.source_http_file.py"] = render_http_file_python(source)
|
|
53
|
+
if is_bounded_stream_source(source):
|
|
54
|
+
artifacts[f"{prefix}.source_bounded_stream.py"] = render_bounded_stream_python(source)
|
|
55
|
+
if _can_render_delta_share_source(source):
|
|
56
|
+
artifacts[f"{prefix}.source_delta_share.py"] = render_delta_share_python(source)
|
|
57
|
+
if source.get("type") == "native_passthrough":
|
|
58
|
+
artifacts[f"{prefix}.native_passthrough.json"] = render_native_passthrough_plan(source)
|
|
59
|
+
if is_rest_api_connector(source):
|
|
60
|
+
artifacts[f"{prefix}.source_rest_api_review.json"] = render_rest_api_review_plan(source)
|
|
61
|
+
return artifacts
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _is_jdbc_source(source: dict[str, Any]) -> bool:
|
|
65
|
+
source_type = source.get("type")
|
|
66
|
+
connector = source.get("connector")
|
|
67
|
+
options = source.get("options") if isinstance(source.get("options"), dict) else {}
|
|
68
|
+
has_url = bool(source.get("url") or options.get("url"))
|
|
69
|
+
is_jdbc = source_type == "jdbc" or connector in JDBC_CONNECTORS
|
|
70
|
+
return is_jdbc and has_url
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _can_render_file_source(source: dict[str, Any]) -> bool:
|
|
74
|
+
return is_file_source(source) and not is_incremental_file_source(source) and bool(source.get("path"))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _can_render_catalog_source(source: dict[str, Any]) -> bool:
|
|
78
|
+
if not is_catalog_source(source):
|
|
79
|
+
return False
|
|
80
|
+
options = source.get("options") if isinstance(source.get("options"), dict) else {}
|
|
81
|
+
return bool(
|
|
82
|
+
source.get("table")
|
|
83
|
+
or source.get("path")
|
|
84
|
+
or source.get("ref")
|
|
85
|
+
or source.get("table_ref")
|
|
86
|
+
or source.get("query")
|
|
87
|
+
or options.get("query")
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _can_render_http_file_source(source: dict[str, Any]) -> bool:
|
|
92
|
+
request = source.get("request") if isinstance(source.get("request"), dict) else {}
|
|
93
|
+
return is_http_file_source(source) and bool(source.get("url") or request.get("url"))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _can_render_delta_share_source(source: dict[str, Any]) -> bool:
|
|
97
|
+
options = source.get("options") if isinstance(source.get("options"), dict) else {}
|
|
98
|
+
return is_delta_share_source(source) and bool(source.get("profile_file") or options.get("profileFile")) and bool(
|
|
99
|
+
source.get("table") or options.get("table")
|
|
100
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Databricks Auto Loader rendering for incremental file sources."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def render_autoloader_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
9
|
+
source_type = source.get("type")
|
|
10
|
+
if source_type != "incremental_files":
|
|
11
|
+
raise ValueError("Auto Loader rendering requires source.type incremental_files")
|
|
12
|
+
path = source.get("path")
|
|
13
|
+
if not path:
|
|
14
|
+
raise ValueError("Auto Loader source requires path")
|
|
15
|
+
file_format = source.get("format") or "json"
|
|
16
|
+
options = {
|
|
17
|
+
"cloudFiles.format": file_format,
|
|
18
|
+
**{str(key): str(value) for key, value in source.get("options", {}).items()},
|
|
19
|
+
}
|
|
20
|
+
schema_location = source.get("schema_tracking_location")
|
|
21
|
+
if schema_location:
|
|
22
|
+
options["cloudFiles.schemaLocation"] = schema_location
|
|
23
|
+
if source.get("schema_hints"):
|
|
24
|
+
options["cloudFiles.schemaHints"] = source["schema_hints"]
|
|
25
|
+
|
|
26
|
+
lines = [
|
|
27
|
+
f"{dataframe_name} = (",
|
|
28
|
+
" spark.readStream",
|
|
29
|
+
" .format('cloudFiles')",
|
|
30
|
+
]
|
|
31
|
+
for key, value in sorted(options.items()):
|
|
32
|
+
lines.append(f" .option({key!r}, {value!r})")
|
|
33
|
+
lines.extend(
|
|
34
|
+
[
|
|
35
|
+
f" .load({path!r})",
|
|
36
|
+
")",
|
|
37
|
+
]
|
|
38
|
+
)
|
|
39
|
+
checkpoint = source.get("progress_location")
|
|
40
|
+
if checkpoint:
|
|
41
|
+
lines.extend(
|
|
42
|
+
[
|
|
43
|
+
"",
|
|
44
|
+
"# Use this checkpoint when writing the available-now stream.",
|
|
45
|
+
f"checkpoint_location = {checkpoint!r}",
|
|
46
|
+
]
|
|
47
|
+
)
|
|
48
|
+
return "\n".join(lines) + "\n"
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Databricks bounded stream source rendering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.connectors import (
|
|
8
|
+
eventhubs_bounded_options,
|
|
9
|
+
is_bounded_stream_source as is_bounded_stream_source,
|
|
10
|
+
kafka_bounded_options,
|
|
11
|
+
)
|
|
12
|
+
from contractforge_databricks.security import redact_value
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def render_bounded_stream_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
16
|
+
source_type = source.get("type")
|
|
17
|
+
if source_type == "kafka_bounded":
|
|
18
|
+
return render_kafka_bounded_python(source, dataframe_name=dataframe_name)
|
|
19
|
+
if source_type == "eventhubs_bounded":
|
|
20
|
+
return render_eventhubs_bounded_python(source, dataframe_name=dataframe_name)
|
|
21
|
+
raise ValueError("bounded stream rendering requires kafka_bounded or eventhubs_bounded")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def render_kafka_bounded_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
25
|
+
return _render_reader("kafka", kafka_bounded_options(source), dataframe_name, "kafka_bounded_options_review")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def render_eventhubs_bounded_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
29
|
+
return _render_reader("eventhubs", eventhubs_bounded_options(source), dataframe_name, "eventhubs_bounded_options_review")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _render_reader(source_format: str, options: dict[str, str], dataframe_name: str, review_name: str) -> str:
|
|
35
|
+
lines = [
|
|
36
|
+
"# Bounded replay/catch-up read. This is not a continuous streaming artifact.",
|
|
37
|
+
f"{dataframe_name} = (",
|
|
38
|
+
" spark.read",
|
|
39
|
+
f" .format({source_format!r})",
|
|
40
|
+
]
|
|
41
|
+
for key, value in sorted(options.items()):
|
|
42
|
+
lines.append(f" .option({key!r}, {value!r})")
|
|
43
|
+
lines.extend([" .load()", ")", "", f"{review_name} = {redact_value(options)!r}"])
|
|
44
|
+
return "\n".join(lines) + "\n"
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Databricks source connector classification.
|
|
2
|
+
|
|
3
|
+
This module is adapter-owned: it maps portable core connector semantics to the
|
|
4
|
+
Databricks rendering/runtime surface without making Databricks names portable.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from contractforge_core.connectors import (
|
|
13
|
+
JDBC_CONNECTORS,
|
|
14
|
+
is_bounded_stream_source,
|
|
15
|
+
is_catalog_source,
|
|
16
|
+
is_delta_share_source,
|
|
17
|
+
is_file_source,
|
|
18
|
+
is_http_file_source,
|
|
19
|
+
is_native_passthrough_source,
|
|
20
|
+
is_rest_api_connector,
|
|
21
|
+
)
|
|
22
|
+
from contractforge_databricks.sources.interpret import is_incremental_file_source
|
|
23
|
+
|
|
24
|
+
SUPPORTED = "SUPPORTED"
|
|
25
|
+
REVIEW_REQUIRED = "REVIEW_REQUIRED"
|
|
26
|
+
UNSUPPORTED = "UNSUPPORTED"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class DatabricksSourceClassification:
|
|
31
|
+
source_type: str
|
|
32
|
+
status: str
|
|
33
|
+
native_mapping: str | None
|
|
34
|
+
note: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def classify_databricks_source(source: dict[str, Any] | str) -> DatabricksSourceClassification:
|
|
38
|
+
"""Classify a source connector against Databricks support semantics."""
|
|
39
|
+
|
|
40
|
+
payload = {"type": source} if isinstance(source, str) else dict(source)
|
|
41
|
+
source_type = str(payload.get("connector") or payload.get("type") or "").strip().lower()
|
|
42
|
+
if is_incremental_file_source(payload):
|
|
43
|
+
return DatabricksSourceClassification(
|
|
44
|
+
source_type=source_type,
|
|
45
|
+
status=SUPPORTED,
|
|
46
|
+
native_mapping="Auto Loader cloudFiles",
|
|
47
|
+
note="Uses core incremental_files/file_stream intent.",
|
|
48
|
+
)
|
|
49
|
+
if _is_jdbc_source(payload):
|
|
50
|
+
return DatabricksSourceClassification(
|
|
51
|
+
source_type=source_type,
|
|
52
|
+
status=SUPPORTED,
|
|
53
|
+
native_mapping="Spark JDBC reader",
|
|
54
|
+
note="Core builds JDBC options; Databricks resolves secrets.",
|
|
55
|
+
)
|
|
56
|
+
if is_catalog_source(payload):
|
|
57
|
+
return DatabricksSourceClassification(
|
|
58
|
+
source_type=source_type,
|
|
59
|
+
status=SUPPORTED,
|
|
60
|
+
native_mapping="spark.table / spark.sql",
|
|
61
|
+
note="Catalog resolution is runtime-owned.",
|
|
62
|
+
)
|
|
63
|
+
if is_http_file_source(payload):
|
|
64
|
+
return DatabricksSourceClassification(
|
|
65
|
+
source_type=source_type,
|
|
66
|
+
status=SUPPORTED,
|
|
67
|
+
native_mapping="Core HTTP file fetch + Spark reader",
|
|
68
|
+
note="Fetch algorithm lives in core.",
|
|
69
|
+
)
|
|
70
|
+
if is_rest_api_connector(payload):
|
|
71
|
+
return DatabricksSourceClassification(
|
|
72
|
+
source_type=source_type,
|
|
73
|
+
status=SUPPORTED,
|
|
74
|
+
native_mapping="Core REST client + Spark JSON materialization",
|
|
75
|
+
note="Secrets resolve in Databricks.",
|
|
76
|
+
)
|
|
77
|
+
if is_bounded_stream_source(payload):
|
|
78
|
+
return DatabricksSourceClassification(
|
|
79
|
+
source_type=source_type,
|
|
80
|
+
status=SUPPORTED,
|
|
81
|
+
native_mapping="Spark bounded kafka/eventhubs reader",
|
|
82
|
+
note="Not continuous streaming.",
|
|
83
|
+
)
|
|
84
|
+
if is_delta_share_source(payload):
|
|
85
|
+
return DatabricksSourceClassification(
|
|
86
|
+
source_type=source_type,
|
|
87
|
+
status=SUPPORTED,
|
|
88
|
+
native_mapping="Delta Sharing Spark connector",
|
|
89
|
+
note="Runtime must provide connector support.",
|
|
90
|
+
)
|
|
91
|
+
if is_file_source(payload):
|
|
92
|
+
return DatabricksSourceClassification(
|
|
93
|
+
source_type=source_type,
|
|
94
|
+
status=SUPPORTED,
|
|
95
|
+
native_mapping="Spark file reader",
|
|
96
|
+
note="Format and reader options are core-normalized.",
|
|
97
|
+
)
|
|
98
|
+
if is_native_passthrough_source(payload):
|
|
99
|
+
return DatabricksSourceClassification(
|
|
100
|
+
source_type=source_type,
|
|
101
|
+
status=REVIEW_REQUIRED,
|
|
102
|
+
native_mapping="Databricks native connector handoff",
|
|
103
|
+
note="Adapter-owned design review.",
|
|
104
|
+
)
|
|
105
|
+
return DatabricksSourceClassification(
|
|
106
|
+
source_type=source_type,
|
|
107
|
+
status=UNSUPPORTED,
|
|
108
|
+
native_mapping=None,
|
|
109
|
+
note="No Databricks source renderer is declared for this connector.",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _is_jdbc_source(source: dict[str, Any]) -> bool:
|
|
114
|
+
connector = source.get("connector") or source.get("type")
|
|
115
|
+
return connector in JDBC_CONNECTORS or source.get("type") == "jdbc"
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Databricks Delta Sharing source rendering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.connectors import delta_share_options, is_delta_share_source as is_delta_share_source
|
|
8
|
+
from contractforge_databricks.security import redact_value
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def render_delta_share_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
12
|
+
options = delta_share_options(source)
|
|
13
|
+
lines = [
|
|
14
|
+
f"{dataframe_name} = (",
|
|
15
|
+
" spark.read",
|
|
16
|
+
" .format('deltaSharing')",
|
|
17
|
+
]
|
|
18
|
+
for key, value in sorted(options.items()):
|
|
19
|
+
lines.append(f" .option({key!r}, {value!r})")
|
|
20
|
+
lines.extend([")", "", f"delta_share_options_review = {redact_value(options)!r}"])
|
|
21
|
+
return "\n".join(lines) + "\n"
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Databricks Spark file and catalog source rendering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.connectors import (
|
|
8
|
+
TableRefResolver,
|
|
9
|
+
catalog_source_query,
|
|
10
|
+
catalog_source_table_or_path,
|
|
11
|
+
file_reader_options,
|
|
12
|
+
file_source_format,
|
|
13
|
+
is_catalog_source as is_catalog_source,
|
|
14
|
+
is_file_source as is_file_source,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def render_file_source_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
19
|
+
file_format = file_source_format(source)
|
|
20
|
+
path = source.get("path")
|
|
21
|
+
if not path:
|
|
22
|
+
raise ValueError("file source requires path")
|
|
23
|
+
options = file_reader_options(source)
|
|
24
|
+
lines = [
|
|
25
|
+
f"{dataframe_name} = (",
|
|
26
|
+
" spark.read",
|
|
27
|
+
f" .format({file_format!r})",
|
|
28
|
+
]
|
|
29
|
+
for key, value in sorted(options.items()):
|
|
30
|
+
lines.append(f" .option({key!r}, {value!r})")
|
|
31
|
+
lines.extend([f" .load({path!r})", ")"])
|
|
32
|
+
return "\n".join(lines) + "\n"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def render_catalog_source_python(
|
|
36
|
+
source: dict[str, Any],
|
|
37
|
+
*,
|
|
38
|
+
dataframe_name: str = "df",
|
|
39
|
+
table_ref_resolver: TableRefResolver | None = None,
|
|
40
|
+
) -> str:
|
|
41
|
+
source_type = source.get("type")
|
|
42
|
+
if source_type == "sql":
|
|
43
|
+
return f"{dataframe_name} = spark.sql({catalog_source_query(source, table_ref_resolver=table_ref_resolver)!r})\n"
|
|
44
|
+
table = catalog_source_table_or_path(source, table_ref_resolver=table_ref_resolver)
|
|
45
|
+
if source.get("path") and not source.get("table"):
|
|
46
|
+
file_format = "delta" if source_type == "delta_table" else source_type.replace("_table", "")
|
|
47
|
+
return f"{dataframe_name} = spark.read.format({file_format!r}).load({str(table)!r})\n"
|
|
48
|
+
return f"{dataframe_name} = spark.table({str(table)!r})\n"
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Databricks HTTP file source rendering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.connectors import (
|
|
8
|
+
http_file_format,
|
|
9
|
+
http_file_headers,
|
|
10
|
+
http_file_reader_options,
|
|
11
|
+
is_http_file_source as is_http_file_source,
|
|
12
|
+
)
|
|
13
|
+
from contractforge_databricks.security import redact_value
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def render_http_file_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
17
|
+
if not is_http_file_source(source):
|
|
18
|
+
raise ValueError("HTTP file rendering requires source.type http_file/http_csv/http_json/http_text")
|
|
19
|
+
request = source.get("request", {})
|
|
20
|
+
url = source.get("url") or request.get("url")
|
|
21
|
+
if not url:
|
|
22
|
+
raise ValueError("HTTP file source requires url or request.url")
|
|
23
|
+
method = str(request.get("method") or "GET").upper()
|
|
24
|
+
if method != "GET":
|
|
25
|
+
raise ValueError("HTTP file source supports only GET")
|
|
26
|
+
file_format = http_file_format(source)
|
|
27
|
+
http_file_headers(source)
|
|
28
|
+
options = http_file_reader_options(source)
|
|
29
|
+
lines = [
|
|
30
|
+
"from contractforge_core.connectors import download_http_file",
|
|
31
|
+
"",
|
|
32
|
+
f"source = {source!r}",
|
|
33
|
+
"local_path = download_http_file(source)",
|
|
34
|
+
f"reader = spark.read.format({file_format!r})",
|
|
35
|
+
]
|
|
36
|
+
for key, value in sorted(options.items()):
|
|
37
|
+
lines.append(f"reader = reader.option({key!r}, {value!r})")
|
|
38
|
+
lines.extend(
|
|
39
|
+
[
|
|
40
|
+
f"{dataframe_name} = reader.load(local_path)",
|
|
41
|
+
"",
|
|
42
|
+
"# Rendered HTTP source with sensitive values redacted for review:",
|
|
43
|
+
f"http_source_review = {redact_value(source)!r}",
|
|
44
|
+
]
|
|
45
|
+
)
|
|
46
|
+
return "\n".join(lines) + "\n"
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Interpret core source contracts for Databricks renderers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_databricks.environment import DatabricksEnvironment
|
|
8
|
+
|
|
9
|
+
_FILE_STREAM_SOURCE_TYPES = {
|
|
10
|
+
"adls",
|
|
11
|
+
"avro",
|
|
12
|
+
"azure_blob",
|
|
13
|
+
"blob",
|
|
14
|
+
"csv",
|
|
15
|
+
"delta",
|
|
16
|
+
"gcs",
|
|
17
|
+
"json",
|
|
18
|
+
"object_storage",
|
|
19
|
+
"orc",
|
|
20
|
+
"parquet",
|
|
21
|
+
"s3",
|
|
22
|
+
"text",
|
|
23
|
+
"xml",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def is_incremental_file_source(source: dict[str, Any]) -> bool:
|
|
28
|
+
source_type = str(source.get("type") or "")
|
|
29
|
+
if source_type == "incremental_files":
|
|
30
|
+
return True
|
|
31
|
+
return source.get("intent") == "file_stream" and source_type in _FILE_STREAM_SOURCE_TYPES
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def interpret_incremental_files_source(
|
|
35
|
+
source: dict[str, Any],
|
|
36
|
+
*,
|
|
37
|
+
environment: DatabricksEnvironment | None = None,
|
|
38
|
+
) -> dict[str, Any]:
|
|
39
|
+
if not is_incremental_file_source(source):
|
|
40
|
+
raise ValueError("incremental file interpretation requires source.type incremental_files or source.intent file_stream")
|
|
41
|
+
rendered = dict(source)
|
|
42
|
+
rendered["type"] = "incremental_files"
|
|
43
|
+
if not rendered.get("path"):
|
|
44
|
+
raise ValueError("Databricks file_stream source requires source.path")
|
|
45
|
+
options = _options(source)
|
|
46
|
+
params = (environment.parameters if environment else {}) or {}
|
|
47
|
+
state = source.get("state") if isinstance(source.get("state"), dict) else {}
|
|
48
|
+
state_location = state.get("location") if isinstance(state.get("location"), dict) else {}
|
|
49
|
+
|
|
50
|
+
if not rendered.get("progress_location") and state_location.get("type") == "object_storage" and state_location.get("path"):
|
|
51
|
+
rendered["progress_location"] = state_location["path"]
|
|
52
|
+
|
|
53
|
+
_set_bool_option(options, "cloudFiles.inferColumnTypes", source.get("options", {}).get("infer_column_types"))
|
|
54
|
+
_set_bool_option(options, "cloudFiles.inferColumnTypes", params.get("incremental_files.infer_column_types"))
|
|
55
|
+
_set_if_present(options, "cloudFiles.maxFilesPerTrigger", source.get("max_files_per_trigger"))
|
|
56
|
+
_set_if_present(options, "cloudFiles.maxFilesPerTrigger", params.get("incremental_files.max_files_per_trigger"))
|
|
57
|
+
|
|
58
|
+
rendered["options"] = options
|
|
59
|
+
return rendered
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _options(source: dict[str, Any]) -> dict[str, str]:
|
|
63
|
+
raw = source.get("options") if isinstance(source.get("options"), dict) else {}
|
|
64
|
+
ignored = {"infer_column_types"}
|
|
65
|
+
return {str(key): str(value) for key, value in raw.items() if key not in ignored}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _set_bool_option(options: dict[str, str], key: str, value: object) -> None:
|
|
69
|
+
if value is None:
|
|
70
|
+
return
|
|
71
|
+
options[key] = str(bool(value)).lower() if isinstance(value, bool) else str(value)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _set_if_present(options: dict[str, str], key: str, value: object) -> None:
|
|
75
|
+
if value is not None:
|
|
76
|
+
options[key] = str(value)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Databricks JDBC source rendering."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.connectors import JDBC_CONNECTORS, jdbc_common_options
|
|
8
|
+
from contractforge_databricks.security import assert_no_inline_jdbc_secrets, redact_value
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def render_jdbc_python(source: dict[str, Any], *, dataframe_name: str = "df") -> str:
|
|
12
|
+
connector = source.get("connector") or source.get("type")
|
|
13
|
+
if connector not in JDBC_CONNECTORS and source.get("type") != "jdbc":
|
|
14
|
+
raise ValueError("JDBC rendering requires a JDBC source connector")
|
|
15
|
+
options = jdbc_options(source)
|
|
16
|
+
lines = [
|
|
17
|
+
f"{dataframe_name} = (",
|
|
18
|
+
" spark.read",
|
|
19
|
+
" .format('jdbc')",
|
|
20
|
+
]
|
|
21
|
+
for key, value in sorted(options.items()):
|
|
22
|
+
lines.append(f" .option({key!r}, {value!r})")
|
|
23
|
+
lines.extend([")", ""])
|
|
24
|
+
lines.append("# Rendered JDBC options with sensitive values redacted for review:")
|
|
25
|
+
lines.append(f"jdbc_options_review = {redact_value(options)!r}")
|
|
26
|
+
return "\n".join(lines) + "\n"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def jdbc_options(source: dict[str, Any]) -> dict[str, str]:
|
|
30
|
+
options = jdbc_common_options(source)
|
|
31
|
+
assert_no_inline_jdbc_secrets(options)
|
|
32
|
+
return options
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Databricks source metadata evidence helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from contractforge_core.connectors import source_metadata_from_contract as core_source_metadata_from_contract
|
|
9
|
+
from contractforge_core.semantic import SemanticContract
|
|
10
|
+
from contractforge_databricks.rendering.names import target_full_name
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def source_metadata_from_contract(contract: SemanticContract) -> dict[str, Any]:
|
|
14
|
+
return core_source_metadata_from_contract(contract, target_table=target_full_name(contract))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def render_source_metadata_json(contract: SemanticContract) -> str:
|
|
18
|
+
return json.dumps(source_metadata_from_contract(contract), indent=2, sort_keys=True) + "\n"
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Databricks native passthrough planning artifacts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from contractforge_core.connectors import native_passthrough_descriptor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def render_native_passthrough_plan(source: dict[str, Any]) -> str:
|
|
12
|
+
descriptor = native_passthrough_descriptor(source)
|
|
13
|
+
|
|
14
|
+
payload = {
|
|
15
|
+
"kind": "databricks_native_passthrough_plan",
|
|
16
|
+
**descriptor,
|
|
17
|
+
"recommended_databricks_targets": _recommended_targets(str(descriptor["system"])),
|
|
18
|
+
"notes": [
|
|
19
|
+
"Use Databricks-native ingestion where available, such as Lakeflow Connect or Databricks Connections.",
|
|
20
|
+
"Do not implement proprietary SaaS API clients inside contractforge_databricks unless no native path exists.",
|
|
21
|
+
"Adapter execution must remain platform-owned and auditable.",
|
|
22
|
+
],
|
|
23
|
+
}
|
|
24
|
+
return json.dumps(payload, indent=2, sort_keys=True) + "\n"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _recommended_targets(system: str) -> list[str]:
|
|
28
|
+
normalized = system.lower()
|
|
29
|
+
if normalized in {"salesforce", "workday", "servicenow", "google_analytics", "google_ads"}:
|
|
30
|
+
return ["lakeflow_connect"]
|
|
31
|
+
if normalized in {"sftp", "ftp"}:
|
|
32
|
+
return ["databricks_connection", "autoloader"]
|
|
33
|
+
return ["databricks_connection", "lakeflow_connect_if_available"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Compatibility re-exports for Databricks JDBC RDS IAM helpers."""
|
|
2
|
+
|
|
3
|
+
from contractforge_core.connectors.databases import (
|
|
4
|
+
generate_rds_iam_auth_token,
|
|
5
|
+
infer_aws_region_from_rds_host,
|
|
6
|
+
parse_jdbc_host_port,
|
|
7
|
+
rds_iam_review_options,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"generate_rds_iam_auth_token",
|
|
12
|
+
"infer_aws_region_from_rds_host",
|
|
13
|
+
"parse_jdbc_host_port",
|
|
14
|
+
"rds_iam_review_options",
|
|
15
|
+
]
|