contractforge-aws 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contractforge_aws/__init__.py +126 -0
- contractforge_aws/adapter.py +62 -0
- contractforge_aws/annotations/__init__.py +13 -0
- contractforge_aws/annotations/api.py +54 -0
- contractforge_aws/annotations/rendering.py +209 -0
- contractforge_aws/annotations/runtime.py +168 -0
- contractforge_aws/api.py +214 -0
- contractforge_aws/capabilities/__init__.py +5 -0
- contractforge_aws/capabilities/glue_iceberg.py +45 -0
- contractforge_aws/cli/__init__.py +62 -0
- contractforge_aws/cli/apply.py +91 -0
- contractforge_aws/cli/deploy.py +53 -0
- contractforge_aws/cli/glue.py +167 -0
- contractforge_aws/cli/performance.py +65 -0
- contractforge_aws/cli/plan.py +85 -0
- contractforge_aws/cli/project.py +194 -0
- contractforge_aws/cli/project_cleanup.py +176 -0
- contractforge_aws/cli/project_cost.py +139 -0
- contractforge_aws/cli/project_orchestration.py +208 -0
- contractforge_aws/cli/project_orchestration_cost.py +75 -0
- contractforge_aws/cli/project_run.py +108 -0
- contractforge_aws/cli/project_step.py +62 -0
- contractforge_aws/cli/project_support.py +135 -0
- contractforge_aws/cli/project_validation.py +49 -0
- contractforge_aws/cli/runtime.py +120 -0
- contractforge_aws/cli/smoke.py +46 -0
- contractforge_aws/cli/stabilization.py +129 -0
- contractforge_aws/cli/support.py +124 -0
- contractforge_aws/contract_extensions.py +100 -0
- contractforge_aws/cost/__init__.py +6 -0
- contractforge_aws/cost/model.py +19 -0
- contractforge_aws/cost/sql.py +132 -0
- contractforge_aws/deployment_api.py +84 -0
- contractforge_aws/diagnostics/__init__.py +5 -0
- contractforge_aws/diagnostics/portability.py +153 -0
- contractforge_aws/environment.py +53 -0
- contractforge_aws/evidence/__init__.py +70 -0
- contractforge_aws/evidence/athena_ddl.py +127 -0
- contractforge_aws/evidence/database.py +10 -0
- contractforge_aws/evidence/ddl.py +139 -0
- contractforge_aws/evidence/error_runtime.py +102 -0
- contractforge_aws/evidence/failure_runtime.py +82 -0
- contractforge_aws/evidence/glue.py +128 -0
- contractforge_aws/evidence/metadata_runtime.py +88 -0
- contractforge_aws/evidence/run_context_runtime.py +51 -0
- contractforge_aws/evidence/run_helper_runtime.py +47 -0
- contractforge_aws/evidence/run_metadata.py +42 -0
- contractforge_aws/evidence/run_success_runtime.py +85 -0
- contractforge_aws/evidence/runtime.py +31 -0
- contractforge_aws/evidence/source.py +41 -0
- contractforge_aws/evidence/sql.py +99 -0
- contractforge_aws/evidence/stream_runtime.py +136 -0
- contractforge_aws/glue_job_definition.py +117 -0
- contractforge_aws/governance/__init__.py +15 -0
- contractforge_aws/governance/evidence.py +190 -0
- contractforge_aws/governance/lakeformation.py +199 -0
- contractforge_aws/governance/runtime.py +114 -0
- contractforge_aws/lineage/__init__.py +16 -0
- contractforge_aws/lineage/openlineage.py +178 -0
- contractforge_aws/lineage/runtime.py +110 -0
- contractforge_aws/operations/__init__.py +9 -0
- contractforge_aws/operations/sql.py +128 -0
- contractforge_aws/orchestration/__init__.py +13 -0
- contractforge_aws/orchestration/execution_name.py +15 -0
- contractforge_aws/orchestration/project_graph.py +55 -0
- contractforge_aws/orchestration/scheduler.py +99 -0
- contractforge_aws/orchestration/stepfunctions.py +145 -0
- contractforge_aws/performance/__init__.py +6 -0
- contractforge_aws/performance/profile.py +97 -0
- contractforge_aws/performance/sql.py +95 -0
- contractforge_aws/preparation/__init__.py +19 -0
- contractforge_aws/preparation/arrays.py +67 -0
- contractforge_aws/preparation/flatten.py +95 -0
- contractforge_aws/preparation/metadata.py +77 -0
- contractforge_aws/preparation/rendering.py +81 -0
- contractforge_aws/preparation/shape.py +175 -0
- contractforge_aws/preparation/transform.py +198 -0
- contractforge_aws/preparation/utils.py +17 -0
- contractforge_aws/quality/__init__.py +27 -0
- contractforge_aws/quality/dqdl.py +178 -0
- contractforge_aws/quality/enforcement.py +46 -0
- contractforge_aws/quality/expression.py +114 -0
- contractforge_aws/quality/quarantine.py +56 -0
- contractforge_aws/quality/runtime.py +173 -0
- contractforge_aws/rendering/__init__.py +51 -0
- contractforge_aws/rendering/artifact_registry.py +167 -0
- contractforge_aws/rendering/cloudformation.py +112 -0
- contractforge_aws/rendering/deployment.py +209 -0
- contractforge_aws/rendering/error_handler.py +38 -0
- contractforge_aws/rendering/glue_job.py +186 -0
- contractforge_aws/rendering/glue_job_common.py +158 -0
- contractforge_aws/rendering/glue_job_outline.py +111 -0
- contractforge_aws/rendering/iam.py +172 -0
- contractforge_aws/rendering/iam_s3.py +86 -0
- contractforge_aws/rendering/iceberg_config.py +64 -0
- contractforge_aws/rendering/library_runner.py +26 -0
- contractforge_aws/rendering/manifest.py +214 -0
- contractforge_aws/rendering/manifest_boundaries.py +84 -0
- contractforge_aws/rendering/manifest_size.py +28 -0
- contractforge_aws/rendering/names.py +34 -0
- contractforge_aws/rendering/review.py +161 -0
- contractforge_aws/rendering/streaming_job.py +210 -0
- contractforge_aws/rendering/terraform.py +82 -0
- contractforge_aws/rendering/write_mode_review.py +84 -0
- contractforge_aws/runtime/__init__.py +122 -0
- contractforge_aws/runtime/api.py +194 -0
- contractforge_aws/runtime/athena.py +136 -0
- contractforge_aws/runtime/audit.py +92 -0
- contractforge_aws/runtime/dependencies.py +16 -0
- contractforge_aws/runtime/deploy.py +94 -0
- contractforge_aws/runtime/evidence.py +96 -0
- contractforge_aws/runtime/evidence_api.py +25 -0
- contractforge_aws/runtime/glue_job_payload.py +58 -0
- contractforge_aws/runtime/glue_jobs.py +168 -0
- contractforge_aws/runtime/glue_wait.py +34 -0
- contractforge_aws/runtime/lakeformation_api.py +53 -0
- contractforge_aws/runtime/library_runner.py +208 -0
- contractforge_aws/runtime/operations.py +33 -0
- contractforge_aws/runtime/operations_api.py +33 -0
- contractforge_aws/runtime/orchestration.py +189 -0
- contractforge_aws/runtime/publishable.py +60 -0
- contractforge_aws/runtime/s3_artifacts.py +151 -0
- contractforge_aws/runtime_args.py +7 -0
- contractforge_aws/schema/__init__.py +9 -0
- contractforge_aws/schema/runtime.py +105 -0
- contractforge_aws/schema_columns.py +7 -0
- contractforge_aws/security/__init__.py +32 -0
- contractforge_aws/security/http_safety.py +9 -0
- contractforge_aws/security/secrets.py +141 -0
- contractforge_aws/security/source_policy.py +105 -0
- contractforge_aws/smoke/__init__.py +6 -0
- contractforge_aws/smoke/environment.py +139 -0
- contractforge_aws/smoke/minimal.py +45 -0
- contractforge_aws/smoke/models.py +64 -0
- contractforge_aws/smoke/runner.py +84 -0
- contractforge_aws/sources/__init__.py +40 -0
- contractforge_aws/sources/classification.py +215 -0
- contractforge_aws/sources/http_file.py +184 -0
- contractforge_aws/sources/interpret.py +105 -0
- contractforge_aws/sources/jdbc.py +122 -0
- contractforge_aws/sources/native_passthrough.py +165 -0
- contractforge_aws/sources/native_passthrough_candidates.py +125 -0
- contractforge_aws/sources/rds_iam.py +74 -0
- contractforge_aws/sources/rest_api.py +60 -0
- contractforge_aws/sources/spark.py +197 -0
- contractforge_aws/sources/streams.py +61 -0
- contractforge_aws/sources/support.py +33 -0
- contractforge_aws/sources/table_refs.py +77 -0
- contractforge_aws/state/__init__.py +5 -0
- contractforge_aws/state/runtime.py +112 -0
- contractforge_aws/subtargets.py +27 -0
- contractforge_aws/validation.py +10 -0
- contractforge_aws/write_modes/__init__.py +5 -0
- contractforge_aws/write_modes/hash_diff.py +134 -0
- contractforge_aws/write_modes/iceberg.py +151 -0
- contractforge_aws/write_modes/writer.py +53 -0
- contractforge_aws-0.1.0.dist-info/METADATA +241 -0
- contractforge_aws-0.1.0.dist-info/RECORD +160 -0
- contractforge_aws-0.1.0.dist-info/WHEEL +4 -0
- contractforge_aws-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Public API for the ContractForge AWS adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from importlib.metadata import PackageNotFoundError
|
|
6
|
+
from importlib.metadata import version as _version
|
|
7
|
+
|
|
8
|
+
from contractforge_aws.adapter import AWSAdapter
|
|
9
|
+
from contractforge_aws.api import (
|
|
10
|
+
plan_aws_contract,
|
|
11
|
+
render_aws_annotations_evidence_sql,
|
|
12
|
+
render_aws_annotations_plan,
|
|
13
|
+
render_aws_contract,
|
|
14
|
+
render_aws_deployment_manifest,
|
|
15
|
+
render_aws_glue_job_cloudformation,
|
|
16
|
+
render_aws_glue_job_definition,
|
|
17
|
+
render_aws_glue_job_iam_policy,
|
|
18
|
+
render_aws_glue_job_terraform,
|
|
19
|
+
render_aws_lake_formation_evidence_sql,
|
|
20
|
+
render_aws_lake_formation_plan,
|
|
21
|
+
render_aws_native_passthrough_plan,
|
|
22
|
+
render_aws_operations_evidence_sql,
|
|
23
|
+
render_aws_operations_json,
|
|
24
|
+
render_aws_operational_cost_query,
|
|
25
|
+
render_aws_quality_dqdl,
|
|
26
|
+
)
|
|
27
|
+
from contractforge_aws.runtime import (
|
|
28
|
+
AthenaQueryResult,
|
|
29
|
+
AthenaSqlRunner,
|
|
30
|
+
audit_evidence_tables,
|
|
31
|
+
apply_aws_annotations_contract,
|
|
32
|
+
apply_aws_annotations_plan,
|
|
33
|
+
apply_aws_lake_formation_contract,
|
|
34
|
+
apply_aws_lake_formation_plan,
|
|
35
|
+
create_or_update_schedule_payload,
|
|
36
|
+
create_or_update_state_machine_payload,
|
|
37
|
+
ensure_aws_evidence_tables,
|
|
38
|
+
deploy_aws_contract_to_glue,
|
|
39
|
+
get_aws_glue_job_run_status,
|
|
40
|
+
get_state_machine_execution_status,
|
|
41
|
+
publish_aws_contract_artifacts_to_s3,
|
|
42
|
+
record_aws_operations_contract,
|
|
43
|
+
reconcile_aws_glue_job_run_evidence,
|
|
44
|
+
register_aws_glue_job,
|
|
45
|
+
register_aws_glue_job_definition_payload,
|
|
46
|
+
render_aws_glue_job_run_evidence_sql,
|
|
47
|
+
start_aws_glue_job_run,
|
|
48
|
+
start_state_machine_execution,
|
|
49
|
+
wait_aws_glue_job_run,
|
|
50
|
+
wait_state_machine_execution,
|
|
51
|
+
)
|
|
52
|
+
from contractforge_aws.orchestration import (
|
|
53
|
+
render_eventbridge_scheduler_payload,
|
|
54
|
+
render_stepfunctions_state_machine_definition,
|
|
55
|
+
render_stepfunctions_state_machine_payload,
|
|
56
|
+
)
|
|
57
|
+
from contractforge_aws.capabilities import AWS_SUBTARGET_GLUE_ICEBERG, glue_iceberg_capabilities
|
|
58
|
+
from contractforge_aws.cost import CostModel, render_operational_cost_query
|
|
59
|
+
from contractforge_aws.environment import AWSEnvironment
|
|
60
|
+
from contractforge_aws.lineage import (
|
|
61
|
+
build_openlineage_event,
|
|
62
|
+
openlineage_namespace,
|
|
63
|
+
render_openlineage_insert_sql,
|
|
64
|
+
)
|
|
65
|
+
from contractforge_aws.subtargets import list_aws_subtargets
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
__version__ = _version("contractforge-aws")
|
|
69
|
+
except PackageNotFoundError: # pragma: no cover - editable/source tree without installed metadata
|
|
70
|
+
__version__ = "0.1.0"
|
|
71
|
+
|
|
72
|
+
__all__ = [
|
|
73
|
+
"AWSAdapter",
|
|
74
|
+
"AWSEnvironment",
|
|
75
|
+
"AWS_SUBTARGET_GLUE_ICEBERG",
|
|
76
|
+
"AthenaQueryResult",
|
|
77
|
+
"AthenaSqlRunner",
|
|
78
|
+
"CostModel",
|
|
79
|
+
"__version__",
|
|
80
|
+
"apply_aws_annotations_contract",
|
|
81
|
+
"apply_aws_annotations_plan",
|
|
82
|
+
"audit_evidence_tables",
|
|
83
|
+
"create_or_update_schedule_payload",
|
|
84
|
+
"create_or_update_state_machine_payload",
|
|
85
|
+
"ensure_aws_evidence_tables",
|
|
86
|
+
"deploy_aws_contract_to_glue",
|
|
87
|
+
"get_aws_glue_job_run_status",
|
|
88
|
+
"get_state_machine_execution_status",
|
|
89
|
+
"glue_iceberg_capabilities",
|
|
90
|
+
"build_openlineage_event",
|
|
91
|
+
"apply_aws_lake_formation_contract",
|
|
92
|
+
"apply_aws_lake_formation_plan",
|
|
93
|
+
"list_aws_subtargets",
|
|
94
|
+
"openlineage_namespace",
|
|
95
|
+
"plan_aws_contract",
|
|
96
|
+
"publish_aws_contract_artifacts_to_s3",
|
|
97
|
+
"record_aws_operations_contract",
|
|
98
|
+
"reconcile_aws_glue_job_run_evidence",
|
|
99
|
+
"register_aws_glue_job",
|
|
100
|
+
"register_aws_glue_job_definition_payload",
|
|
101
|
+
"render_aws_annotations_evidence_sql",
|
|
102
|
+
"render_aws_annotations_plan",
|
|
103
|
+
"render_aws_contract",
|
|
104
|
+
"render_aws_deployment_manifest",
|
|
105
|
+
"render_aws_glue_job_cloudformation",
|
|
106
|
+
"render_aws_glue_job_definition",
|
|
107
|
+
"render_aws_glue_job_iam_policy",
|
|
108
|
+
"render_aws_glue_job_terraform",
|
|
109
|
+
"render_aws_glue_job_run_evidence_sql",
|
|
110
|
+
"render_aws_lake_formation_evidence_sql",
|
|
111
|
+
"render_aws_lake_formation_plan",
|
|
112
|
+
"render_aws_native_passthrough_plan",
|
|
113
|
+
"render_aws_operations_evidence_sql",
|
|
114
|
+
"render_aws_operations_json",
|
|
115
|
+
"render_aws_operational_cost_query",
|
|
116
|
+
"render_aws_quality_dqdl",
|
|
117
|
+
"render_operational_cost_query",
|
|
118
|
+
"render_openlineage_insert_sql",
|
|
119
|
+
"render_eventbridge_scheduler_payload",
|
|
120
|
+
"render_stepfunctions_state_machine_definition",
|
|
121
|
+
"render_stepfunctions_state_machine_payload",
|
|
122
|
+
"start_aws_glue_job_run",
|
|
123
|
+
"start_state_machine_execution",
|
|
124
|
+
"wait_aws_glue_job_run",
|
|
125
|
+
"wait_state_machine_execution",
|
|
126
|
+
]
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""ContractForge adapter implementation for AWS targets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from contractforge_core.adapters import RenderedArtifacts
|
|
9
|
+
from contractforge_core.capabilities import PlatformCapabilities
|
|
10
|
+
from contractforge_core.planner import ExecutionPlan, PlanningResult, plan_contract
|
|
11
|
+
from contractforge_core.semantic import SemanticContract
|
|
12
|
+
from contractforge_aws.capabilities import AWS_SUBTARGET_GLUE_ICEBERG, glue_iceberg_capabilities
|
|
13
|
+
from contractforge_aws.contract_extensions import aws_extension_warnings
|
|
14
|
+
from contractforge_aws.diagnostics import aws_planning_warnings, unsupported_source_blockers
|
|
15
|
+
from contractforge_aws.environment import AWSEnvironment
|
|
16
|
+
from contractforge_aws.rendering import render_aws_review_artifacts
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class AWSAdapter:
|
|
21
|
+
"""AWS adapter for Glue/Iceberg planning and artifact rendering.
|
|
22
|
+
|
|
23
|
+
AWS SDK calls remain optional runtime helpers. The base adapter path stays
|
|
24
|
+
deterministic and SDK-free.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
declared_capabilities: PlatformCapabilities
|
|
28
|
+
environment: AWSEnvironment = AWSEnvironment()
|
|
29
|
+
name: str = AWS_SUBTARGET_GLUE_ICEBERG
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def glue_iceberg(cls, environment: dict[str, Any] | None = None) -> "AWSAdapter":
|
|
33
|
+
return cls(glue_iceberg_capabilities(), environment=AWSEnvironment.from_contract(environment))
|
|
34
|
+
|
|
35
|
+
def capabilities(self) -> PlatformCapabilities:
|
|
36
|
+
return self.declared_capabilities
|
|
37
|
+
|
|
38
|
+
def plan(self, contract: SemanticContract) -> PlanningResult:
|
|
39
|
+
source_blockers = unsupported_source_blockers(contract)
|
|
40
|
+
if source_blockers:
|
|
41
|
+
return PlanningResult(status="UNSUPPORTED", plan=None, blockers=source_blockers)
|
|
42
|
+
|
|
43
|
+
result = plan_contract(contract, self.capabilities())
|
|
44
|
+
aws_warnings = aws_planning_warnings(contract) + aws_extension_warnings(contract)
|
|
45
|
+
if not aws_warnings:
|
|
46
|
+
return result
|
|
47
|
+
warnings = result.warnings + aws_warnings
|
|
48
|
+
if result.status == "SUPPORTED":
|
|
49
|
+
return PlanningResult(status="SUPPORTED_WITH_WARNINGS", plan=result.plan, warnings=warnings)
|
|
50
|
+
return PlanningResult(status=result.status, plan=result.plan, blockers=result.blockers, warnings=warnings)
|
|
51
|
+
|
|
52
|
+
def render(self, plan: ExecutionPlan) -> RenderedArtifacts:
|
|
53
|
+
return render_aws_review_artifacts(plan=plan, planning=None, environment=self.environment)
|
|
54
|
+
|
|
55
|
+
def render_contract(self, contract: SemanticContract) -> RenderedArtifacts:
|
|
56
|
+
planning = self.plan(contract)
|
|
57
|
+
return render_aws_review_artifacts(
|
|
58
|
+
plan=planning.plan,
|
|
59
|
+
planning=planning,
|
|
60
|
+
contract=contract,
|
|
61
|
+
environment=self.environment,
|
|
62
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""AWS Glue Catalog annotation planning."""
|
|
2
|
+
|
|
3
|
+
from contractforge_aws.annotations.rendering import (
|
|
4
|
+
annotations_plan,
|
|
5
|
+
render_annotations_evidence_sql,
|
|
6
|
+
render_annotations_plan,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"annotations_plan",
|
|
11
|
+
"render_annotations_evidence_sql",
|
|
12
|
+
"render_annotations_plan",
|
|
13
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Runtime-facing AWS annotation helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from contractforge_core.contracts import semantic_contract_from_mapping
|
|
8
|
+
from contractforge_aws.capabilities import AWS_SUBTARGET_GLUE_ICEBERG
|
|
9
|
+
from contractforge_aws.annotations.rendering import render_annotations_plan
|
|
10
|
+
from contractforge_aws.annotations.runtime import (
|
|
11
|
+
GlueCatalogAnnotationApplyResult,
|
|
12
|
+
apply_glue_catalog_annotations_plan,
|
|
13
|
+
)
|
|
14
|
+
from contractforge_aws.subtargets import validate_aws_subtarget
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def apply_aws_annotations_plan(
|
|
18
|
+
plan: str | dict[str, Any],
|
|
19
|
+
*,
|
|
20
|
+
glue_client: Any | None = None,
|
|
21
|
+
catalog_id: str | None = None,
|
|
22
|
+
skip_archive: bool = True,
|
|
23
|
+
) -> GlueCatalogAnnotationApplyResult:
|
|
24
|
+
return apply_glue_catalog_annotations_plan(
|
|
25
|
+
plan,
|
|
26
|
+
glue_client=glue_client,
|
|
27
|
+
catalog_id=catalog_id,
|
|
28
|
+
skip_archive=skip_archive,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def apply_aws_annotations_contract(
|
|
33
|
+
contract: dict[str, Any],
|
|
34
|
+
*,
|
|
35
|
+
subtarget: str = AWS_SUBTARGET_GLUE_ICEBERG,
|
|
36
|
+
glue_client: Any | None = None,
|
|
37
|
+
catalog_id: str | None = None,
|
|
38
|
+
skip_archive: bool = True,
|
|
39
|
+
) -> GlueCatalogAnnotationApplyResult:
|
|
40
|
+
validate_aws_subtarget(subtarget)
|
|
41
|
+
semantic = semantic_contract_from_mapping(contract)
|
|
42
|
+
plan = render_annotations_plan(semantic)
|
|
43
|
+
if not plan:
|
|
44
|
+
target = semantic.target
|
|
45
|
+
return GlueCatalogAnnotationApplyResult(target.namespace or "default", target.name, "NOOP")
|
|
46
|
+
return apply_glue_catalog_annotations_plan(
|
|
47
|
+
plan,
|
|
48
|
+
glue_client=glue_client,
|
|
49
|
+
catalog_id=catalog_id,
|
|
50
|
+
skip_archive=skip_archive,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
__all__ = ["apply_aws_annotations_contract", "apply_aws_annotations_plan"]
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Render AWS Glue Catalog annotation plans and evidence."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from datetime import date, datetime
|
|
8
|
+
from functools import singledispatch
|
|
9
|
+
from typing import Any, Callable
|
|
10
|
+
|
|
11
|
+
from contractforge_core.security import redact_value
|
|
12
|
+
from contractforge_core.semantic import SemanticContract
|
|
13
|
+
from contractforge_aws.evidence.ddl import evidence_table_names
|
|
14
|
+
from contractforge_aws.rendering.names import glue_database_name, glue_table_name, iceberg_table_name
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class _TagExtractor:
|
|
19
|
+
prefix: str
|
|
20
|
+
extract: Callable[[dict[str, Any]], dict[str, str]]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
_TABLE_TAG_EXTRACTORS: tuple[_TagExtractor, ...] = (
|
|
24
|
+
_TagExtractor("", lambda data: _str_map(data.get("tags"))),
|
|
25
|
+
_TagExtractor("alias_", lambda data: _indexed_tags(data.get("aliases"))),
|
|
26
|
+
_TagExtractor("deprecated_", lambda data: _deprecated_tags(data.get("deprecated"))),
|
|
27
|
+
)
|
|
28
|
+
_COLUMN_TAG_EXTRACTORS: tuple[_TagExtractor, ...] = (
|
|
29
|
+
*_TABLE_TAG_EXTRACTORS,
|
|
30
|
+
_TagExtractor("pii_", lambda data: _pii_tags(data.get("pii"))),
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def render_annotations_plan(contract: SemanticContract) -> str:
|
|
35
|
+
plan = annotations_plan(contract)
|
|
36
|
+
if not plan["changes"]:
|
|
37
|
+
return ""
|
|
38
|
+
return json.dumps(plan, indent=2, sort_keys=True)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def annotations_plan(contract: SemanticContract) -> dict[str, Any]:
|
|
42
|
+
annotations = contract.governance.annotations if contract.governance else None
|
|
43
|
+
changes = _annotation_changes(annotations if isinstance(annotations, dict) else {})
|
|
44
|
+
return {
|
|
45
|
+
"target": iceberg_table_name(contract),
|
|
46
|
+
"resource": {"DatabaseName": glue_database_name(contract), "Name": glue_table_name(contract)},
|
|
47
|
+
"status": "PLANNED" if changes else "NOOP",
|
|
48
|
+
"apply_operation": "glue:UpdateTable",
|
|
49
|
+
"note": "Application requires reading the current Glue table definition and submitting a full TableInput.",
|
|
50
|
+
"changes": changes,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def render_annotations_evidence_sql(contract: SemanticContract, *, database: str, run_id: str = "${run_id}", captured_at_utc: datetime | None = None) -> str:
|
|
55
|
+
rows = _annotation_evidence_rows(contract, run_id=run_id, captured_at_utc=captured_at_utc)
|
|
56
|
+
if not rows:
|
|
57
|
+
return "-- No annotation intent declared.\n"
|
|
58
|
+
table = evidence_table_names(database)["annotations"]
|
|
59
|
+
return "\n".join(_insert(table, row) for row in rows) + "\n"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _annotation_changes(annotations: dict[str, Any]) -> list[dict[str, Any]]:
|
|
63
|
+
table = _mapping(annotations.get("table"))
|
|
64
|
+
rows = _table_changes(table)
|
|
65
|
+
rows.extend(_column_changes(_mapping(annotations.get("columns"))))
|
|
66
|
+
return rows
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _table_changes(table: dict[str, Any]) -> list[dict[str, Any]]:
|
|
70
|
+
rows = []
|
|
71
|
+
description = table.get("description")
|
|
72
|
+
if description:
|
|
73
|
+
rows.append(_change("table", "description", None, "Description", str(description)))
|
|
74
|
+
for key, value in _tags(table, _TABLE_TAG_EXTRACTORS).items():
|
|
75
|
+
rows.append(_change("table", "parameter", None, f"Parameters.{key}", value, key=key))
|
|
76
|
+
return rows
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _column_changes(columns: dict[str, Any]) -> list[dict[str, Any]]:
|
|
80
|
+
rows = []
|
|
81
|
+
for column, config in columns.items():
|
|
82
|
+
data = _mapping(config)
|
|
83
|
+
description = data.get("description")
|
|
84
|
+
if description:
|
|
85
|
+
rows.append(
|
|
86
|
+
_change("column", "description", str(column), "StorageDescriptor.Columns[].Comment", str(description))
|
|
87
|
+
)
|
|
88
|
+
for key, value in _tags(data, _COLUMN_TAG_EXTRACTORS).items():
|
|
89
|
+
rows.append(_change("column", "parameter", str(column), "StorageDescriptor.Columns[].Parameters." + key, value, key=key))
|
|
90
|
+
return rows
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _change(scope: str, kind: str, column: str | None, glue_path: str, value: str, *, key: str | None = None) -> dict[str, Any]:
|
|
94
|
+
return {
|
|
95
|
+
"annotation_scope": scope,
|
|
96
|
+
"annotation_type": kind,
|
|
97
|
+
"column_name": column,
|
|
98
|
+
"key": key or kind,
|
|
99
|
+
"value": value,
|
|
100
|
+
"glue_path": glue_path,
|
|
101
|
+
"status": "PLANNED",
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _annotation_evidence_rows(contract: SemanticContract, *, run_id: str, captured_at_utc: datetime | None) -> list[dict[str, Any]]:
|
|
106
|
+
captured = captured_at_utc or datetime(1970, 1, 1, 0, 0, 0)
|
|
107
|
+
target = iceberg_table_name(contract)
|
|
108
|
+
return [
|
|
109
|
+
{
|
|
110
|
+
"run_id": run_id,
|
|
111
|
+
"target_table": target,
|
|
112
|
+
"annotation_scope": change["annotation_scope"],
|
|
113
|
+
"annotation_type": change["annotation_type"],
|
|
114
|
+
"column_name": change["column_name"],
|
|
115
|
+
"key": change["key"],
|
|
116
|
+
"value": change["value"],
|
|
117
|
+
"status": change["status"],
|
|
118
|
+
"applied_sql": "glue:UpdateTable",
|
|
119
|
+
"annotation_ts_utc": captured,
|
|
120
|
+
"annotation_date": captured.date(),
|
|
121
|
+
"framework_version": "contractforge-aws",
|
|
122
|
+
"ctrl_schema_version": 1,
|
|
123
|
+
}
|
|
124
|
+
for change in annotations_plan(contract)["changes"]
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _tags(data: dict[str, Any], extractors: tuple[_TagExtractor, ...]) -> dict[str, str]:
|
|
129
|
+
tags: dict[str, str] = {}
|
|
130
|
+
for extractor in extractors:
|
|
131
|
+
tags.update({f"{extractor.prefix}{key}": value for key, value in extractor.extract(data).items()})
|
|
132
|
+
return tags
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _mapping(value: object) -> dict[str, Any]:
|
|
136
|
+
return dict(value) if isinstance(value, dict) else {}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _str_map(value: object) -> dict[str, str]:
|
|
140
|
+
return {str(key): _tag_value(item) for key, item in _mapping(value).items()}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _indexed_tags(value: object) -> dict[str, str]:
|
|
144
|
+
return {str(idx): item for idx, item in enumerate(_as_list(value), start=1)}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _deprecated_tags(value: object) -> dict[str, str]:
|
|
148
|
+
deprecated = _mapping(value)
|
|
149
|
+
if not deprecated:
|
|
150
|
+
return {}
|
|
151
|
+
return {"enabled": "true", **{key: str(deprecated[key]) for key in ("since", "replacement", "removal_date") if deprecated.get(key)}}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _pii_tags(value: object) -> dict[str, str]:
|
|
155
|
+
pii = _mapping(value)
|
|
156
|
+
if not pii:
|
|
157
|
+
return {}
|
|
158
|
+
return {
|
|
159
|
+
"enabled": _tag_value(pii.get("enabled", True)),
|
|
160
|
+
"type": str(pii.get("type", "unknown")),
|
|
161
|
+
"sensitivity": str(pii.get("sensitivity", "internal")),
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _as_list(value: object) -> list[str]:
|
|
166
|
+
if value is None:
|
|
167
|
+
return []
|
|
168
|
+
if isinstance(value, str):
|
|
169
|
+
return [item.strip() for item in value.split("|") if item.strip()]
|
|
170
|
+
return [str(item).strip() for item in value if str(item).strip()] # type: ignore[union-attr]
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _tag_value(value: object) -> str:
|
|
174
|
+
return str(value).lower() if isinstance(value, bool) else str(value)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _insert(table: str, columns: dict[str, Any]) -> str:
|
|
178
|
+
filtered = {key: value for key, value in columns.items() if value is not None}
|
|
179
|
+
names = ", ".join(_quote_identifier(name) for name in filtered)
|
|
180
|
+
values = ", ".join(_literal(value) for value in filtered.values())
|
|
181
|
+
return f"INSERT INTO {table} ({names}) VALUES ({values});"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@singledispatch
|
|
185
|
+
def _literal(value: Any) -> str:
|
|
186
|
+
return _string(str(redact_value(value)))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@_literal.register(int)
|
|
190
|
+
def _literal_int(value: int) -> str:
|
|
191
|
+
return str(value)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@_literal.register(datetime)
|
|
195
|
+
def _literal_datetime(value: datetime) -> str:
|
|
196
|
+
return f"TIMESTAMP {_string(value.strftime('%Y-%m-%d %H:%M:%S'))}"
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
@_literal.register(date)
|
|
200
|
+
def _literal_date(value: date) -> str:
|
|
201
|
+
return f"DATE {_string(value.strftime('%Y-%m-%d'))}"
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _string(value: str) -> str:
|
|
205
|
+
return "'" + value.replace("'", "''") + "'"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _quote_identifier(value: str) -> str:
|
|
209
|
+
return f"`{str(value).replace('`', '``')}`"
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Optional AWS Glue Catalog annotation apply helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
from contractforge_aws.runtime.dependencies import require_boto3
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class GlueCatalogAnnotationApplyResult:
|
|
14
|
+
database: str
|
|
15
|
+
table: str
|
|
16
|
+
status: str
|
|
17
|
+
applied: int = 0
|
|
18
|
+
skipped: int = 0
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class AnnotationChangeHandler:
|
|
23
|
+
scope: str
|
|
24
|
+
kind: str
|
|
25
|
+
apply: Callable[[dict[str, Any], dict[str, Any]], None]
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def key(self) -> tuple[str, str]:
|
|
29
|
+
return self.scope, self.kind
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def apply_glue_catalog_annotations_plan(
|
|
33
|
+
plan: str | dict[str, Any],
|
|
34
|
+
*,
|
|
35
|
+
glue_client: Any | None = None,
|
|
36
|
+
catalog_id: str | None = None,
|
|
37
|
+
skip_archive: bool = True,
|
|
38
|
+
) -> GlueCatalogAnnotationApplyResult:
|
|
39
|
+
payload = _plan_payload(plan)
|
|
40
|
+
resource = _resource(payload)
|
|
41
|
+
changes = _changes(payload)
|
|
42
|
+
if not changes:
|
|
43
|
+
return GlueCatalogAnnotationApplyResult(resource["DatabaseName"], resource["Name"], "NOOP")
|
|
44
|
+
|
|
45
|
+
client = glue_client or require_boto3().client("glue")
|
|
46
|
+
get_args = _catalog_args(catalog_id, DatabaseName=resource["DatabaseName"], Name=resource["Name"])
|
|
47
|
+
table = client.get_table(**get_args)["Table"]
|
|
48
|
+
table_input = _table_input(table)
|
|
49
|
+
applied = _apply_changes(table_input, changes)
|
|
50
|
+
update_args = _catalog_args(
|
|
51
|
+
catalog_id,
|
|
52
|
+
DatabaseName=resource["DatabaseName"],
|
|
53
|
+
Name=resource["Name"],
|
|
54
|
+
TableInput=table_input,
|
|
55
|
+
SkipArchive=skip_archive,
|
|
56
|
+
)
|
|
57
|
+
client.update_table(**update_args)
|
|
58
|
+
return GlueCatalogAnnotationApplyResult(resource["DatabaseName"], resource["Name"], "SUCCESS", applied=applied)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _plan_payload(plan: str | dict[str, Any]) -> dict[str, Any]:
|
|
62
|
+
if isinstance(plan, str):
|
|
63
|
+
loaded = json.loads(plan)
|
|
64
|
+
if not isinstance(loaded, dict):
|
|
65
|
+
raise ValueError("Glue Catalog annotation plan JSON must decode to an object")
|
|
66
|
+
return loaded
|
|
67
|
+
return dict(plan)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _resource(plan: dict[str, Any]) -> dict[str, str]:
|
|
71
|
+
resource = plan.get("resource")
|
|
72
|
+
if not isinstance(resource, dict):
|
|
73
|
+
raise ValueError("Glue Catalog annotation plan requires resource")
|
|
74
|
+
database = str(resource.get("DatabaseName") or "").strip()
|
|
75
|
+
table = str(resource.get("Name") or "").strip()
|
|
76
|
+
if not database or not table:
|
|
77
|
+
raise ValueError("Glue Catalog annotation plan requires resource.DatabaseName and resource.Name")
|
|
78
|
+
return {"DatabaseName": database, "Name": table}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _changes(plan: dict[str, Any]) -> list[dict[str, Any]]:
|
|
82
|
+
changes = plan.get("changes")
|
|
83
|
+
if not isinstance(changes, list):
|
|
84
|
+
return []
|
|
85
|
+
return [dict(item) for item in changes if isinstance(item, dict)]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _table_input(table: dict[str, Any]) -> dict[str, Any]:
|
|
89
|
+
return {key: table[key] for key in _TABLE_INPUT_KEYS if key in table and table[key] is not None}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _apply_changes(table_input: dict[str, Any], changes: list[dict[str, Any]]) -> int:
|
|
93
|
+
applied = 0
|
|
94
|
+
for change in changes:
|
|
95
|
+
handler = _CHANGE_HANDLERS.get((str(change.get("annotation_scope")), str(change.get("annotation_type"))))
|
|
96
|
+
if handler is not None:
|
|
97
|
+
handler.apply(table_input, change)
|
|
98
|
+
applied += 1
|
|
99
|
+
return applied
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _apply_table_description(table_input: dict[str, Any], change: dict[str, Any]) -> None:
|
|
103
|
+
table_input["Description"] = str(change.get("value") or "")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _apply_table_parameter(table_input: dict[str, Any], change: dict[str, Any]) -> None:
|
|
107
|
+
_parameters(table_input)[str(change["key"])] = str(change.get("value") or "")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _apply_column_description(table_input: dict[str, Any], change: dict[str, Any]) -> None:
|
|
111
|
+
_column(table_input, str(change["column_name"]))["Comment"] = str(change.get("value") or "")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _apply_column_parameter(table_input: dict[str, Any], change: dict[str, Any]) -> None:
|
|
115
|
+
column = _column(table_input, str(change["column_name"]))
|
|
116
|
+
column.setdefault("Parameters", {})[str(change["key"])] = str(change.get("value") or "")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _parameters(table_input: dict[str, Any]) -> dict[str, str]:
|
|
120
|
+
value = table_input.setdefault("Parameters", {})
|
|
121
|
+
if not isinstance(value, dict):
|
|
122
|
+
raise ValueError("Glue table Parameters must be an object")
|
|
123
|
+
return value
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _column(table_input: dict[str, Any], name: str) -> dict[str, Any]:
|
|
127
|
+
columns = table_input.get("StorageDescriptor", {}).get("Columns")
|
|
128
|
+
if not isinstance(columns, list):
|
|
129
|
+
raise ValueError("Glue table StorageDescriptor.Columns is required for column annotations")
|
|
130
|
+
for column in columns:
|
|
131
|
+
if isinstance(column, dict) and column.get("Name") == name:
|
|
132
|
+
return column
|
|
133
|
+
raise ValueError(f"Glue table does not contain column {name!r}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _catalog_args(catalog_id: str | None, **kwargs: Any) -> dict[str, Any]:
|
|
137
|
+
payload = dict(kwargs)
|
|
138
|
+
if catalog_id:
|
|
139
|
+
payload["CatalogId"] = catalog_id
|
|
140
|
+
return payload
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
_TABLE_INPUT_KEYS = (
|
|
144
|
+
"Name",
|
|
145
|
+
"Description",
|
|
146
|
+
"Owner",
|
|
147
|
+
"LastAccessTime",
|
|
148
|
+
"LastAnalyzedTime",
|
|
149
|
+
"Retention",
|
|
150
|
+
"StorageDescriptor",
|
|
151
|
+
"PartitionKeys",
|
|
152
|
+
"ViewOriginalText",
|
|
153
|
+
"ViewExpandedText",
|
|
154
|
+
"TableType",
|
|
155
|
+
"Parameters",
|
|
156
|
+
"TargetTable",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
_CHANGE_HANDLERS = {
|
|
161
|
+
handler.key: handler
|
|
162
|
+
for handler in (
|
|
163
|
+
AnnotationChangeHandler("table", "description", _apply_table_description),
|
|
164
|
+
AnnotationChangeHandler("table", "parameter", _apply_table_parameter),
|
|
165
|
+
AnnotationChangeHandler("column", "description", _apply_column_description),
|
|
166
|
+
AnnotationChangeHandler("column", "parameter", _apply_column_parameter),
|
|
167
|
+
)
|
|
168
|
+
}
|