serverless-data-mesh 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. serverless_data_mesh/__init__.py +93 -0
  2. serverless_data_mesh/catalog/__init__.py +6 -0
  3. serverless_data_mesh/catalog/glue_connector.py +17 -0
  4. serverless_data_mesh/catalog/glue_rest.py +134 -0
  5. serverless_data_mesh/cli.py +165 -0
  6. serverless_data_mesh/config.py +42 -0
  7. serverless_data_mesh/dashboard/__init__.py +5 -0
  8. serverless_data_mesh/dashboard/cloudwatch.py +80 -0
  9. serverless_data_mesh/dashboard/trust.py +162 -0
  10. serverless_data_mesh/exceptions.py +23 -0
  11. serverless_data_mesh/governance/__init__.py +9 -0
  12. serverless_data_mesh/governance/consumer_sla.py +109 -0
  13. serverless_data_mesh/lineage/__init__.py +5 -0
  14. serverless_data_mesh/lineage/openlineage.py +96 -0
  15. serverless_data_mesh/local/__init__.py +5 -0
  16. serverless_data_mesh/local/runtime.py +380 -0
  17. serverless_data_mesh/metrics/__init__.py +5 -0
  18. serverless_data_mesh/metrics/mesh_trust.py +56 -0
  19. serverless_data_mesh/orchestration/__init__.py +28 -0
  20. serverless_data_mesh/orchestration/canary.py +127 -0
  21. serverless_data_mesh/orchestration/coordinator.py +265 -0
  22. serverless_data_mesh/orchestration/durable_steps.py +74 -0
  23. serverless_data_mesh/orchestration/reprocess.py +143 -0
  24. serverless_data_mesh/orchestration/state.py +16 -0
  25. serverless_data_mesh/py.typed +0 -0
  26. serverless_data_mesh/rules/__init__.py +8 -0
  27. serverless_data_mesh/rules/sparkrules_connector.py +193 -0
  28. serverless_data_mesh/scaffold/__init__.py +5 -0
  29. serverless_data_mesh/scaffold/init_domain.py +210 -0
  30. serverless_data_mesh/types/__init__.py +21 -0
  31. serverless_data_mesh/types/workload.py +123 -0
  32. serverless_data_mesh/verification/__init__.py +21 -0
  33. serverless_data_mesh/verification/backend.py +41 -0
  34. serverless_data_mesh/verification/fallback.py +200 -0
  35. serverless_data_mesh/verification/vrp.py +202 -0
  36. serverless_data_mesh-0.2.0.dist-info/METADATA +143 -0
  37. serverless_data_mesh-0.2.0.dist-info/RECORD +40 -0
  38. serverless_data_mesh-0.2.0.dist-info/WHEEL +4 -0
  39. serverless_data_mesh-0.2.0.dist-info/entry_points.txt +2 -0
  40. serverless_data_mesh-0.2.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,162 @@
1
+ """Mesh trust dashboard: real-time VRP status per domain."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from serverless_data_mesh.dashboard.cloudwatch import fetch_cloudwatch_trust_rows
11
+
12
+
13
+ def _demo_rows() -> list[dict[str, Any]]:
14
+ now = datetime.now(timezone.utc).strftime("%I:%M %p")
15
+ return [
16
+ {"domain": "orders", "last_vrp": now, "status": "PASS", "rows": "5.2M", "detail": ""},
17
+ {"domain": "payments", "last_vrp": now, "status": "PASS", "rows": "1.1M", "detail": ""},
18
+ {
19
+ "domain": "inventory",
20
+ "last_vrp": "09:45 AM",
21
+ "status": "FAIL",
22
+ "rows": "0",
23
+ "detail": "3 drops detected",
24
+ },
25
+ {"domain": "shipping", "last_vrp": now, "status": "PASS", "rows": "800K", "detail": ""},
26
+ ]
27
+
28
+
29
+ def _scan_proofs(proofs_dir: Path) -> list[dict[str, Any]]:
30
+ rows: list[dict[str, Any]] = []
31
+ for proof_file in sorted(proofs_dir.rglob("*.vrp.json")):
32
+ try:
33
+ data = json.loads(proof_file.read_text(encoding="utf-8"))
34
+ except (json.JSONDecodeError, OSError):
35
+ continue
36
+ recon = data.get("reconciliation", {})
37
+ verdict = recon.get("verdict", "UNKNOWN")
38
+ domain = proof_file.parts[-3] if len(proof_file.parts) >= 3 else "unknown"
39
+ created = data.get("created_at", "")[:16].replace("T", " ")
40
+ rows.append(
41
+ {
42
+ "domain": domain,
43
+ "last_vrp": created or "unknown",
44
+ "status": verdict,
45
+ "rows": str(recon.get("sink_count", "?")),
46
+ "detail": _fail_detail(recon) if verdict == "FAIL" else "",
47
+ }
48
+ )
49
+ return rows or _demo_rows()
50
+
51
+
52
+ def _fail_detail(recon: dict[str, Any]) -> str:
53
+ missing = len(recon.get("missing", []))
54
+ mutated = len(recon.get("mutated", []))
55
+ dup = len(recon.get("duplicated", []))
56
+ parts = []
57
+ if missing:
58
+ parts.append(f"{missing} drops")
59
+ if mutated:
60
+ parts.append(f"{mutated} mutations")
61
+ if dup:
62
+ parts.append(f"{dup} duplicates")
63
+ return ", ".join(parts) or "reconciliation failed"
64
+
65
+
66
+ def render_trust_dashboard(
67
+ *,
68
+ proofs_dir: str | None = None,
69
+ output: str = "mesh-trust-dashboard.html",
70
+ demo: bool = False,
71
+ cloudwatch: bool = False,
72
+ cloudwatch_region: str | None = None,
73
+ ) -> Path:
74
+ """Render HTML trust dashboard from proofs, CloudWatch, or demo data."""
75
+ if cloudwatch:
76
+ rows = fetch_cloudwatch_trust_rows(region=cloudwatch_region)
77
+ mode = "cloudwatch"
78
+ if not rows:
79
+ rows = _demo_rows()
80
+ mode = "cloudwatch-fallback-demo"
81
+ elif demo or not proofs_dir:
82
+ rows = _demo_rows()
83
+ mode = "demo"
84
+ else:
85
+ rows = _scan_proofs(Path(proofs_dir))
86
+ mode = "live-proofs"
87
+
88
+ html = HTML_TEMPLATE.format(
89
+ generated_at=datetime.now(timezone.utc).isoformat(),
90
+ mode=mode,
91
+ rows=_render_rows(rows),
92
+ pass_count=sum(1 for r in rows if r["status"] == "PASS"),
93
+ fail_count=sum(1 for r in rows if r["status"] == "FAIL"),
94
+ total=len(rows),
95
+ )
96
+ out = Path(output)
97
+ out.write_text(html, encoding="utf-8")
98
+ return out.resolve()
99
+
100
+
101
+ def _render_rows(rows: list[dict[str, Any]]) -> str:
102
+ lines = []
103
+ for row in rows:
104
+ if row["status"] == "PASS":
105
+ icon = "PASS"
106
+ cls = "pass"
107
+ elif row["status"] == "FAIL":
108
+ icon = "FAIL"
109
+ cls = "fail"
110
+ else:
111
+ icon = "PENDING"
112
+ cls = "pending"
113
+ detail = f" ({row['detail']})" if row.get("detail") else ""
114
+ lines.append(
115
+ f"<tr class='{cls}'><td>{row['domain']}</td>"
116
+ f"<td>{row['last_vrp']}</td>"
117
+ f"<td><span class='badge {cls}'>{icon}</span></td>"
118
+ f"<td>{row['rows']}{detail}</td></tr>"
119
+ )
120
+ return "\n".join(lines)
121
+
122
+
123
+ HTML_TEMPLATE = """<!DOCTYPE html>
124
+ <html lang="en">
125
+ <head>
126
+ <meta charset="utf-8"/>
127
+ <title>Mesh Trust Dashboard</title>
128
+ <style>
129
+ body {{ font-family: system-ui, sans-serif; background: #0f172a; color: #e2e8f0; margin: 2rem; }}
130
+ h1 {{ color: #38bdf8; }}
131
+ .summary {{ display: flex; gap: 1.5rem; margin: 1.5rem 0; }}
132
+ .card {{ background: #1e293b; padding: 1rem 1.5rem; border-radius: 8px; }}
133
+ table {{ width: 100%; border-collapse: collapse; margin-top: 1rem; }}
134
+ th, td {{ padding: 0.75rem 1rem; text-align: left; border-bottom: 1px solid #334155; }}
135
+ th {{ color: #94a3b8; }}
136
+ tr.pass td {{ }}
137
+ tr.fail {{ background: #450a0a33; }}
138
+ .badge {{ padding: 0.2rem 0.6rem; border-radius: 4px; font-weight: 600; font-size: 0.85rem; }}
139
+ .badge.pass {{ background: #14532d; color: #86efac; }}
140
+ .badge.fail {{ background: #7f1d1d; color: #fca5a5; }}
141
+ .badge.pending {{ background: #713f12; color: #fde68a; }}
142
+ .meta {{ color: #64748b; font-size: 0.9rem; }}
143
+ </style>
144
+ </head>
145
+ <body>
146
+ <h1>Mesh Trust Dashboard</h1>
147
+ <p class="meta">Vaquar Pattern (PVDM) · mode: {mode} · generated {generated_at}</p>
148
+ <div class="summary">
149
+ <div class="card"><strong>{total}</strong> domains</div>
150
+ <div class="card"><strong>{pass_count}</strong> PASS</div>
151
+ <div class="card"><strong>{fail_count}</strong> FAIL</div>
152
+ </div>
153
+ <table>
154
+ <thead><tr><th>Domain</th><th>Last VRP</th><th>Status</th><th>Rows / Detail</th></tr></thead>
155
+ <tbody>
156
+ {rows}
157
+ </tbody>
158
+ </table>
159
+ <p class="meta">Invariant: commit_metadata implies VRP = PASS</p>
160
+ </body>
161
+ </html>
162
+ """
@@ -0,0 +1,23 @@
1
+ """Framework-specific exceptions for transaction boundary enforcement."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class ServerlessDataMeshError(Exception):
7
+ """Base error for all serverless-data-mesh failures."""
8
+
9
+
10
+ class VerificationRejectedError(ServerlessDataMeshError):
11
+ """Raised when VRP validation blocks a chunk from committing."""
12
+
13
+
14
+ class CatalogCommitError(ServerlessDataMeshError):
15
+ """Raised when the Glue REST metadata commit fails."""
16
+
17
+
18
+ class WorkloadConfigurationError(ServerlessDataMeshError):
19
+ """Raised when a domain workload or boundary contract is invalid."""
20
+
21
+
22
+ class RuleEvaluationError(ServerlessDataMeshError):
23
+ """Raised when SparkRules quality gate or policy evaluation fails."""
@@ -0,0 +1,9 @@
1
+ """Federated governance: consumer SLA and Lake Formation enforcement."""
2
+
3
+ from serverless_data_mesh.governance.consumer_sla import (
4
+ ConsumerAccessDecision,
5
+ enforce_consumer_sla,
6
+ grant_read_if_sla_met,
7
+ )
8
+
9
+ __all__ = ["ConsumerAccessDecision", "enforce_consumer_sla", "grant_read_if_sla_met"]
@@ -0,0 +1,109 @@
1
+ """Consumer SLA contracts backed by VRP proofs before read access."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timezone
8
+ from typing import Any
9
+
10
+ from serverless_data_mesh.types.workload import ConsumerSLAContract
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass(frozen=True, slots=True)
16
+ class ConsumerAccessDecision:
17
+ """Whether a consumer may read a table given proof + SLA."""
18
+
19
+ granted: bool
20
+ consumer_id: str
21
+ target_table: str
22
+ reason: str
23
+ checks: dict[str, bool]
24
+
25
+
26
+ def enforce_consumer_sla(
27
+ contract: ConsumerSLAContract,
28
+ *,
29
+ proof: dict[str, Any],
30
+ snapshot_committed_at: datetime | None = None,
31
+ ) -> ConsumerAccessDecision:
32
+ """Verify producer VRP proof meets consumer SLA before granting read access."""
33
+ checks: dict[str, bool] = {}
34
+ recon = proof.get("reconciliation", {})
35
+ verdict = recon.get("verdict", "FAIL")
36
+
37
+ checks["vrp_pass"] = verdict == "PASS"
38
+ if not checks["vrp_pass"]:
39
+ return ConsumerAccessDecision(
40
+ granted=False,
41
+ consumer_id=contract.consumer_id,
42
+ target_table=contract.target_table,
43
+ reason="VRP verdict is not PASS",
44
+ checks=checks,
45
+ )
46
+
47
+ source_count = int(recon.get("source_count", 0))
48
+ sink_count = int(recon.get("sink_count", 0))
49
+ if source_count > 0:
50
+ completeness = (sink_count / source_count) * 100.0
51
+ else:
52
+ completeness = 0.0
53
+ checks["completeness"] = completeness >= contract.min_completeness_pct
54
+
55
+ content_fields = set(proof.get("content_fields", []))
56
+ checks["required_columns"] = all(col in content_fields for col in contract.required_columns)
57
+
58
+ committed_at = snapshot_committed_at
59
+ if committed_at is None and proof.get("created_at"):
60
+ committed_at = datetime.fromisoformat(proof["created_at"].replace("Z", "+00:00"))
61
+ freshness_ok = True
62
+ if committed_at is not None:
63
+ age_min = (datetime.now(timezone.utc) - committed_at).total_seconds() / 60.0
64
+ freshness_ok = age_min <= contract.max_freshness_minutes
65
+ checks["freshness"] = freshness_ok
66
+
67
+ granted = all(checks.values())
68
+ failed = [k for k, v in checks.items() if not v]
69
+ reason = "All SLA checks passed" if granted else f"SLA failed: {', '.join(failed)}"
70
+
71
+ logger.info(
72
+ "Consumer SLA %s for %s: granted=%s checks=%s",
73
+ contract.consumer_id,
74
+ contract.target_table,
75
+ granted,
76
+ checks,
77
+ )
78
+
79
+ return ConsumerAccessDecision(
80
+ granted=granted,
81
+ consumer_id=contract.consumer_id,
82
+ target_table=contract.target_table,
83
+ reason=reason,
84
+ checks=checks,
85
+ )
86
+
87
+
88
+ def grant_read_if_sla_met(
89
+ contract: ConsumerSLAContract,
90
+ *,
91
+ proof: dict[str, Any],
92
+ snapshot_committed_at: datetime | None = None,
93
+ ) -> dict[str, Any]:
94
+ """Lake Formation / steward automation hook: return grant payload or denial."""
95
+ decision = enforce_consumer_sla(
96
+ contract,
97
+ proof=proof,
98
+ snapshot_committed_at=snapshot_committed_at,
99
+ )
100
+ return {
101
+ "consumer_id": decision.consumer_id,
102
+ "target_table": decision.target_table,
103
+ "grant_read": decision.granted,
104
+ "enforcement": contract.enforcement,
105
+ "reason": decision.reason,
106
+ "checks": decision.checks,
107
+ "lf_action": "GrantPermissions" if decision.granted else "Deny",
108
+ "proof_id": proof.get("proof_id"),
109
+ }
@@ -0,0 +1,5 @@
1
+ """OpenLineage integration for mesh data product discovery."""
2
+
3
+ from serverless_data_mesh.lineage.openlineage import emit_openlineage_event
4
+
5
+ __all__ = ["emit_openlineage_event"]
@@ -0,0 +1,96 @@
1
+ """Emit OpenLineage RunEvent after successful Vaquar Pattern commits."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ import urllib.error
9
+ import urllib.request
10
+ import uuid
11
+ from datetime import datetime, timezone
12
+ from typing import Any
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ OPENLINEAGE_PRODUCER = "https://github.com/vaquarkhan/aws-serverless-datamesh-framework"
17
+
18
+
19
+ def _dataset(name: str, namespace: str = "s3") -> dict[str, str]:
20
+ return {"namespace": namespace, "name": name}
21
+
22
+
23
+ def emit_openlineage_event(
24
+ *,
25
+ job_name: str,
26
+ run_id: str | None = None,
27
+ inputs: list[str],
28
+ outputs: list[str],
29
+ facets: dict[str, Any] | None = None,
30
+ event_type: str = "COMPLETE",
31
+ endpoint: str | None = None,
32
+ ) -> dict[str, Any]:
33
+ """Build and optionally POST an OpenLineage RunEvent.
34
+
35
+ Set ``OPENLINEAGE_URL`` (or pass ``endpoint``) to POST JSON to Marquez/DataHub.
36
+ Without an endpoint, returns the event dict for logging or local persistence.
37
+ """
38
+ run_id = run_id or str(uuid.uuid4())
39
+ now = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
40
+
41
+ event: dict[str, Any] = {
42
+ "eventType": event_type,
43
+ "eventTime": now,
44
+ "producer": OPENLINEAGE_PRODUCER,
45
+ "schemaURL": "https://openlineage.io/spec/1-0-5/OpenLineage.json",
46
+ "run": {"runId": run_id},
47
+ "job": {"namespace": "serverless-data-mesh", "name": job_name},
48
+ "inputs": [_dataset(name) for name in inputs],
49
+ "outputs": [_dataset(name) for name in outputs],
50
+ "facets": facets or {},
51
+ }
52
+
53
+ url = endpoint or os.environ.get("OPENLINEAGE_URL")
54
+ if url:
55
+ body = json.dumps(event).encode("utf-8")
56
+ request = urllib.request.Request(
57
+ url.rstrip("/") + "/api/v1/lineage",
58
+ data=body,
59
+ headers={"Content-Type": "application/json"},
60
+ method="POST",
61
+ )
62
+ try:
63
+ with urllib.request.urlopen(request, timeout=10) as response:
64
+ logger.info("OpenLineage event posted (%s)", response.status)
65
+ except urllib.error.URLError as exc:
66
+ logger.warning("OpenLineage POST failed: %s", exc)
67
+
68
+ return event
69
+
70
+
71
+ def emit_from_commit_result(
72
+ *,
73
+ domain_id: str,
74
+ target_table: str,
75
+ source_namespace: str,
76
+ commit_result: dict[str, Any],
77
+ proof_id: str | None = None,
78
+ checkpoint_path: str | None = None,
79
+ ) -> dict[str, Any]:
80
+ """Convenience wrapper after ``coordinator.execute_workload()`` returns committed."""
81
+ if commit_result.get("outcome") != "committed":
82
+ return {}
83
+
84
+ return emit_openlineage_event(
85
+ job_name=f"{domain_id}.{target_table}",
86
+ run_id=commit_result.get("workload_id"),
87
+ inputs=[source_namespace],
88
+ outputs=[target_table],
89
+ facets={
90
+ "vrp_proof_id": proof_id or commit_result.get("proof_chain_tail"),
91
+ "iceguard_checkpoint": checkpoint_path,
92
+ "row_count": commit_result.get("records_written"),
93
+ "snapshot_id": commit_result.get("snapshot_id"),
94
+ "vaquar_pattern": "PVDM",
95
+ },
96
+ )
@@ -0,0 +1,5 @@
1
+ """Local-first PVDM runtime (no AWS credentials required)."""
2
+
3
+ from serverless_data_mesh.local.runtime import LocalPVDMRuntime, LocalWriteResult
4
+
5
+ __all__ = ["LocalPVDMRuntime", "LocalWriteResult"]