serverless-data-mesh 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- serverless_data_mesh/__init__.py +93 -0
- serverless_data_mesh/catalog/__init__.py +6 -0
- serverless_data_mesh/catalog/glue_connector.py +17 -0
- serverless_data_mesh/catalog/glue_rest.py +134 -0
- serverless_data_mesh/cli.py +165 -0
- serverless_data_mesh/config.py +42 -0
- serverless_data_mesh/dashboard/__init__.py +5 -0
- serverless_data_mesh/dashboard/cloudwatch.py +80 -0
- serverless_data_mesh/dashboard/trust.py +162 -0
- serverless_data_mesh/exceptions.py +23 -0
- serverless_data_mesh/governance/__init__.py +9 -0
- serverless_data_mesh/governance/consumer_sla.py +109 -0
- serverless_data_mesh/lineage/__init__.py +5 -0
- serverless_data_mesh/lineage/openlineage.py +96 -0
- serverless_data_mesh/local/__init__.py +5 -0
- serverless_data_mesh/local/runtime.py +380 -0
- serverless_data_mesh/metrics/__init__.py +5 -0
- serverless_data_mesh/metrics/mesh_trust.py +56 -0
- serverless_data_mesh/orchestration/__init__.py +28 -0
- serverless_data_mesh/orchestration/canary.py +127 -0
- serverless_data_mesh/orchestration/coordinator.py +265 -0
- serverless_data_mesh/orchestration/durable_steps.py +74 -0
- serverless_data_mesh/orchestration/reprocess.py +143 -0
- serverless_data_mesh/orchestration/state.py +16 -0
- serverless_data_mesh/py.typed +0 -0
- serverless_data_mesh/rules/__init__.py +8 -0
- serverless_data_mesh/rules/sparkrules_connector.py +193 -0
- serverless_data_mesh/scaffold/__init__.py +5 -0
- serverless_data_mesh/scaffold/init_domain.py +210 -0
- serverless_data_mesh/types/__init__.py +21 -0
- serverless_data_mesh/types/workload.py +123 -0
- serverless_data_mesh/verification/__init__.py +21 -0
- serverless_data_mesh/verification/backend.py +41 -0
- serverless_data_mesh/verification/fallback.py +200 -0
- serverless_data_mesh/verification/vrp.py +202 -0
- serverless_data_mesh-0.2.0.dist-info/METADATA +143 -0
- serverless_data_mesh-0.2.0.dist-info/RECORD +40 -0
- serverless_data_mesh-0.2.0.dist-info/WHEEL +4 -0
- serverless_data_mesh-0.2.0.dist-info/entry_points.txt +2 -0
- serverless_data_mesh-0.2.0.dist-info/licenses/LICENSE +17 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Serverless Data Mesh: cross-domain lakehouse write coordination on AWS Lambda."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from serverless_data_mesh.config import MeshSettings
|
|
9
|
+
from serverless_data_mesh.exceptions import (
|
|
10
|
+
CatalogCommitError,
|
|
11
|
+
RuleEvaluationError,
|
|
12
|
+
ServerlessDataMeshError,
|
|
13
|
+
VerificationRejectedError,
|
|
14
|
+
WorkloadConfigurationError,
|
|
15
|
+
)
|
|
16
|
+
from serverless_data_mesh.types import (
|
|
17
|
+
ChunkWriteResult,
|
|
18
|
+
DataProductContract,
|
|
19
|
+
DataWriteWorkload,
|
|
20
|
+
DomainTransactionBoundary,
|
|
21
|
+
WriteOutcome,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def _read_version() -> str:
|
|
25
|
+
try:
|
|
26
|
+
from importlib.metadata import version
|
|
27
|
+
|
|
28
|
+
return version("serverless-data-mesh")
|
|
29
|
+
except Exception:
|
|
30
|
+
root = Path(__file__).resolve().parents[2]
|
|
31
|
+
return (root / "VERSION").read_text(encoding="utf-8").strip()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
__version__ = _read_version()
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"CatalogCommitError",
|
|
38
|
+
"ChunkWriteResult",
|
|
39
|
+
"DataProductContract",
|
|
40
|
+
"DataWriteWorkload",
|
|
41
|
+
"DomainTransactionBoundary",
|
|
42
|
+
"GlueCatalogConnector",
|
|
43
|
+
"GlueRestCatalogAdapter",
|
|
44
|
+
"IceGuardDurableCoordinator",
|
|
45
|
+
"MeshSettings",
|
|
46
|
+
"OrchestrationState",
|
|
47
|
+
"RuleEvaluationError",
|
|
48
|
+
"RuleFireSummary",
|
|
49
|
+
"SparkRulesConnector",
|
|
50
|
+
"ServerlessDataMeshError",
|
|
51
|
+
"VRPProofGenerator",
|
|
52
|
+
"VerificationRejectedError",
|
|
53
|
+
"WorkloadConfigurationError",
|
|
54
|
+
"WriteOutcome",
|
|
55
|
+
"validate_then_commit",
|
|
56
|
+
"__version__",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def __getattr__(name: str) -> Any:
|
|
61
|
+
"""Lazy-load heavy integrations (IceGuard, veridata-recon, PyIceberg)."""
|
|
62
|
+
if name in ("GlueCatalogConnector", "GlueRestCatalogAdapter"):
|
|
63
|
+
from serverless_data_mesh.catalog import GlueCatalogConnector, GlueRestCatalogAdapter
|
|
64
|
+
|
|
65
|
+
return {
|
|
66
|
+
"GlueCatalogConnector": GlueCatalogConnector,
|
|
67
|
+
"GlueRestCatalogAdapter": GlueRestCatalogAdapter,
|
|
68
|
+
}[name]
|
|
69
|
+
if name in ("SparkRulesConnector", "RuleFireSummary"):
|
|
70
|
+
from serverless_data_mesh.rules import RuleFireSummary, SparkRulesConnector
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
"SparkRulesConnector": SparkRulesConnector,
|
|
74
|
+
"RuleFireSummary": RuleFireSummary,
|
|
75
|
+
}[name]
|
|
76
|
+
if name in ("IceGuardDurableCoordinator", "OrchestrationState"):
|
|
77
|
+
from serverless_data_mesh.orchestration import (
|
|
78
|
+
IceGuardDurableCoordinator,
|
|
79
|
+
OrchestrationState,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
"IceGuardDurableCoordinator": IceGuardDurableCoordinator,
|
|
84
|
+
"OrchestrationState": OrchestrationState,
|
|
85
|
+
}[name]
|
|
86
|
+
if name in ("VRPProofGenerator", "validate_then_commit"):
|
|
87
|
+
from serverless_data_mesh.verification import VRPProofGenerator, validate_then_commit
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
"VRPProofGenerator": VRPProofGenerator,
|
|
91
|
+
"validate_then_commit": validate_then_commit,
|
|
92
|
+
}[name]
|
|
93
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
"""Iceberg catalog adapters for serverless metadata commits."""
|
|
2
|
+
|
|
3
|
+
from serverless_data_mesh.catalog.glue_connector import GlueCatalogConnector
|
|
4
|
+
from serverless_data_mesh.catalog.glue_rest import GlueRestCatalogAdapter
|
|
5
|
+
|
|
6
|
+
__all__ = ["GlueCatalogConnector", "GlueRestCatalogAdapter"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Glue Catalog Connector: metadata-only integration for Lambda domain writers.
|
|
2
|
+
|
|
3
|
+
AWS Glue **ETL jobs** (managed Spark runners) do not run inside Lambda containers.
|
|
4
|
+
Domain writers execute **physical** transforms on Lambda (PySpark-on-Lambda, Polars,
|
|
5
|
+
PyArrow, DuckDB, etc.) and use this connector for **metadata** commits against the
|
|
6
|
+
Glue Data Catalog Iceberg REST endpoint.
|
|
7
|
+
|
|
8
|
+
The connector is a thin SigV4 HTTPS client via PyIceberg: no Glue job runtime, no JVM
|
|
9
|
+
Spark catalog session, no Glue Studio dependency.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from serverless_data_mesh.catalog.glue_rest import GlueRestCatalogAdapter
|
|
13
|
+
|
|
14
|
+
# Public alias: "Glue connector" in docs maps to this class.
|
|
15
|
+
GlueCatalogConnector = GlueRestCatalogAdapter
|
|
16
|
+
|
|
17
|
+
__all__ = ["GlueCatalogConnector", "GlueRestCatalogAdapter"]
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Zero-config AWS Glue Iceberg REST catalog adapter for serverless 2PC commits."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import boto3
|
|
11
|
+
from pyiceberg.catalog import load_catalog
|
|
12
|
+
from pyiceberg.table import Table
|
|
13
|
+
|
|
14
|
+
from serverless_data_mesh.exceptions import CatalogCommitError
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True)
|
|
20
|
+
class GlueRestCatalogAdapter:
|
|
21
|
+
"""Native Python catalog commit adapter using Glue's Iceberg REST endpoint.
|
|
22
|
+
|
|
23
|
+
Replaces heavy PySpark JVM catalog operations with lightweight HTTPS REST
|
|
24
|
+
calls authenticated via AWS SigV4. IceGuard performs physical file writes;
|
|
25
|
+
this adapter executes the final metadata commit in the two-phase commit (2PC)
|
|
26
|
+
sequence purely over REST.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
catalog_name: str
|
|
30
|
+
namespace: str
|
|
31
|
+
table_name: str
|
|
32
|
+
region: str
|
|
33
|
+
warehouse: str | None = None
|
|
34
|
+
catalog: Any = field(default=None, repr=False)
|
|
35
|
+
_prepared_files: list[str] = field(default_factory=list, init=False, repr=False)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_environment(
|
|
39
|
+
cls,
|
|
40
|
+
*,
|
|
41
|
+
namespace: str,
|
|
42
|
+
table_name: str,
|
|
43
|
+
catalog_name: str = "glue_rest",
|
|
44
|
+
aws_account_id: str | None = None,
|
|
45
|
+
warehouse: str | None = None,
|
|
46
|
+
) -> GlueRestCatalogAdapter:
|
|
47
|
+
"""Construct adapter from standard Lambda environment variables."""
|
|
48
|
+
region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))
|
|
49
|
+
account_id = aws_account_id or os.environ.get("AWS_ACCOUNT_ID")
|
|
50
|
+
if not account_id:
|
|
51
|
+
account_id = boto3.client("sts").get_caller_identity()["Account"]
|
|
52
|
+
resolved_warehouse = warehouse or os.environ.get(
|
|
53
|
+
"ICEBERG_WAREHOUSE",
|
|
54
|
+
f"{account_id}:s3tablescatalog/{os.environ.get('ICEBERG_TABLE_BUCKET', 'default')}",
|
|
55
|
+
)
|
|
56
|
+
return cls(
|
|
57
|
+
catalog_name=catalog_name,
|
|
58
|
+
namespace=namespace,
|
|
59
|
+
table_name=table_name,
|
|
60
|
+
region=region,
|
|
61
|
+
warehouse=resolved_warehouse,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def _rest_properties(self) -> dict[str, str]:
|
|
65
|
+
"""Build pyiceberg REST catalog properties with SigV4 signing."""
|
|
66
|
+
if not self.warehouse:
|
|
67
|
+
raise ValueError("warehouse is required for Glue REST catalog access")
|
|
68
|
+
return {
|
|
69
|
+
"type": "rest",
|
|
70
|
+
"uri": f"https://glue.{self.region}.amazonaws.com/iceberg",
|
|
71
|
+
"warehouse": self.warehouse,
|
|
72
|
+
"rest.sigv4-enabled": "true",
|
|
73
|
+
"rest.signing-name": "glue",
|
|
74
|
+
"rest.signing-region": self.region,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
def connect(self) -> Table:
|
|
78
|
+
"""Authenticate via SigV4 and load the target Iceberg table handle."""
|
|
79
|
+
if self.catalog is None:
|
|
80
|
+
self.catalog = load_catalog(self.catalog_name, **self._rest_properties())
|
|
81
|
+
identifier = f"{self.namespace}.{self.table_name}"
|
|
82
|
+
table = self.catalog.load_table(identifier)
|
|
83
|
+
logger.info("Connected to Iceberg table %s via Glue REST", identifier)
|
|
84
|
+
return table
|
|
85
|
+
|
|
86
|
+
def prepare_commit(self, parquet_paths: list[str]) -> None:
|
|
87
|
+
"""Phase-1 prepare: stage file paths for the pending metadata transaction."""
|
|
88
|
+
if not parquet_paths:
|
|
89
|
+
raise ValueError("prepare_commit requires at least one parquet path")
|
|
90
|
+
self._prepared_files = list(dict.fromkeys(parquet_paths))
|
|
91
|
+
logger.info("Prepared %d data files for REST metadata commit", len(self._prepared_files))
|
|
92
|
+
|
|
93
|
+
def commit(self, *, snapshot_properties: dict[str, str] | None = None) -> int:
|
|
94
|
+
"""Phase-2 commit: publish a new Iceberg snapshot via HTTPS REST."""
|
|
95
|
+
if not self._prepared_files:
|
|
96
|
+
raise CatalogCommitError("commit called before prepare_commit")
|
|
97
|
+
|
|
98
|
+
table = self.connect()
|
|
99
|
+
props = snapshot_properties or {
|
|
100
|
+
"write.format.default": "parquet",
|
|
101
|
+
"app-id": "serverless-data-mesh",
|
|
102
|
+
}
|
|
103
|
+
try:
|
|
104
|
+
with table.transaction() as tx:
|
|
105
|
+
tx.add_files(self._prepared_files, snapshot_properties=props)
|
|
106
|
+
except Exception as exc:
|
|
107
|
+
raise CatalogCommitError(f"Glue REST commit failed: {exc}") from exc
|
|
108
|
+
|
|
109
|
+
snapshot_id = table.metadata.current_snapshot_id()
|
|
110
|
+
logger.info(
|
|
111
|
+
"Committed snapshot %s with %d files to %s.%s",
|
|
112
|
+
snapshot_id,
|
|
113
|
+
len(self._prepared_files),
|
|
114
|
+
self.namespace,
|
|
115
|
+
self.table_name,
|
|
116
|
+
)
|
|
117
|
+
self._prepared_files.clear()
|
|
118
|
+
return int(snapshot_id or 0)
|
|
119
|
+
|
|
120
|
+
def abort(self) -> None:
|
|
121
|
+
"""Abort the in-flight metadata transaction without catalog side effects."""
|
|
122
|
+
self._prepared_files.clear()
|
|
123
|
+
logger.info("Aborted pending REST catalog commit for %s.%s", self.namespace, self.table_name)
|
|
124
|
+
|
|
125
|
+
def rollback_to_snapshot(self, snapshot_id: int) -> None:
|
|
126
|
+
"""Rollback table metadata to a prior snapshot (IceGuard timeout recovery)."""
|
|
127
|
+
table = self.connect()
|
|
128
|
+
table.manage_snapshots().rollback_to_snapshot(snapshot_id).commit()
|
|
129
|
+
logger.warning(
|
|
130
|
+
"Rolled back %s.%s to snapshot %s",
|
|
131
|
+
self.namespace,
|
|
132
|
+
self.table_name,
|
|
133
|
+
snapshot_id,
|
|
134
|
+
)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CLI entry points for Serverless Data Mesh."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import json
|
|
8
|
+
import sys
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _cmd_demo(args: argparse.Namespace) -> int:
|
|
12
|
+
from serverless_data_mesh.local.runtime import LocalPVDMRuntime
|
|
13
|
+
from serverless_data_mesh.verification.backend import veridata_available
|
|
14
|
+
|
|
15
|
+
runtime = LocalPVDMRuntime()
|
|
16
|
+
result = runtime.run_demo_sequence()
|
|
17
|
+
|
|
18
|
+
if args.json:
|
|
19
|
+
print(json.dumps(result, indent=2))
|
|
20
|
+
else:
|
|
21
|
+
backend = result.get("verifier_backend", "unknown")
|
|
22
|
+
fallback_note = ""
|
|
23
|
+
if backend == "pure-python-fallback":
|
|
24
|
+
fallback_note = " (pure-Python verifier - no Rust wheel needed)\n"
|
|
25
|
+
print("\n Serverless Data Mesh - local PVDM demo (no AWS)\n")
|
|
26
|
+
print(f" Verifier: {backend}{fallback_note}")
|
|
27
|
+
print(f" Workspace: {result['root']}")
|
|
28
|
+
print(f" Elapsed: {result['elapsed_ms']} ms\n")
|
|
29
|
+
clean = result["phases"]["clean_write"]
|
|
30
|
+
corrupt = result["phases"]["corrupt_write"]
|
|
31
|
+
consumer = result["consumer"]
|
|
32
|
+
print(f" Phase 1 - clean write: {clean['outcome']} ({clean['records_written']} rows)")
|
|
33
|
+
print(f" Phase 2 - corrupt write: {corrupt['outcome']} (VRP {corrupt['proof_verdict']})")
|
|
34
|
+
print(f" Consumer visible rows: {consumer['visible_row_count']}")
|
|
35
|
+
print(f" Gate blocked bad data: {consumer['gate_blocked_bad_data']}\n")
|
|
36
|
+
print(f" {result['summary']}\n")
|
|
37
|
+
print(" Vaquar Pattern (PVDM): Physical -> Verify -> Durable -> Metadata")
|
|
38
|
+
if not veridata_available():
|
|
39
|
+
print(" Tip: pip install veridata-recon on Linux for cryptographic VRP proofs.")
|
|
40
|
+
print(" Docs: docs/vaquar-pattern.md\n")
|
|
41
|
+
|
|
42
|
+
return 0 if result["consumer"]["gate_blocked_bad_data"] else 1
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _cmd_init(args: argparse.Namespace) -> int:
|
|
46
|
+
from serverless_data_mesh.scaffold.init_domain import scaffold_domain
|
|
47
|
+
|
|
48
|
+
root = scaffold_domain(
|
|
49
|
+
domain=args.domain,
|
|
50
|
+
table=args.table,
|
|
51
|
+
account_id=args.account,
|
|
52
|
+
output_dir=args.output,
|
|
53
|
+
)
|
|
54
|
+
print(f"\n Domain scaffold created: {root}\n")
|
|
55
|
+
print(" Next steps:")
|
|
56
|
+
print(f" 1. Edit {root}/handler.py")
|
|
57
|
+
print(f" 2. Review {root}/contract.yaml")
|
|
58
|
+
print(f" 3. Deploy {root}/terraform/ (copy tfvars.example -> tfvars)")
|
|
59
|
+
print(f" 4. Run tests: pytest {root}/tests/\n")
|
|
60
|
+
return 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _cmd_dashboard(args: argparse.Namespace) -> int:
|
|
64
|
+
from serverless_data_mesh.dashboard.trust import render_trust_dashboard
|
|
65
|
+
|
|
66
|
+
path = render_trust_dashboard(
|
|
67
|
+
proofs_dir=args.proofs_dir,
|
|
68
|
+
output=args.output,
|
|
69
|
+
demo=not args.proofs_dir and not args.cloudwatch,
|
|
70
|
+
cloudwatch=args.cloudwatch,
|
|
71
|
+
cloudwatch_region=args.region,
|
|
72
|
+
)
|
|
73
|
+
print(f"Trust dashboard written: {path}")
|
|
74
|
+
if args.open_browser:
|
|
75
|
+
import webbrowser
|
|
76
|
+
|
|
77
|
+
webbrowser.open(f"file://{path}")
|
|
78
|
+
return 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _cmd_canary(args: argparse.Namespace) -> int:
|
|
82
|
+
from serverless_data_mesh.orchestration.canary import run_canary
|
|
83
|
+
|
|
84
|
+
result = run_canary(
|
|
85
|
+
record_count=args.records,
|
|
86
|
+
inject_canary_drift=args.drift,
|
|
87
|
+
max_divergence_pct=args.max_divergence,
|
|
88
|
+
)
|
|
89
|
+
if args.json:
|
|
90
|
+
print(json.dumps(result, indent=2))
|
|
91
|
+
else:
|
|
92
|
+
print(f"\n Canary outcome: {result['outcome']}")
|
|
93
|
+
print(f" Production VRP: {result['production_verdict']}")
|
|
94
|
+
print(f" Canary VRP: {result['canary_verdict']}")
|
|
95
|
+
print(f" Divergence: {result['divergence_pct']}%")
|
|
96
|
+
print(f" Promote: {result['promote']}\n")
|
|
97
|
+
print(f" {result['message']}\n")
|
|
98
|
+
return 0 if result["promote"] else 1
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _cmd_reprocess_demo(args: argparse.Namespace) -> int:
|
|
102
|
+
from serverless_data_mesh.local.runtime import LocalPVDMRuntime
|
|
103
|
+
|
|
104
|
+
runtime = LocalPVDMRuntime()
|
|
105
|
+
result = runtime.run_write_with_auto_repair(
|
|
106
|
+
record_count=args.records,
|
|
107
|
+
drop_count=args.drop,
|
|
108
|
+
)
|
|
109
|
+
if args.json:
|
|
110
|
+
print(json.dumps(result, indent=2, default=str))
|
|
111
|
+
else:
|
|
112
|
+
print("\n Auto VRP reprocessing demo\n")
|
|
113
|
+
print(f" Outcome: {result['outcome']}")
|
|
114
|
+
repair = result.get("repair", {})
|
|
115
|
+
print(f" Missing before: {repair.get('missing_before', '?')}")
|
|
116
|
+
print(f" Repair attempts: {repair.get('attempts', '?')}")
|
|
117
|
+
print(f" Consumer rows: {result.get('consumer_row_count', 0)}\n")
|
|
118
|
+
return 0 if result.get("outcome") == "repaired_and_committed" else 1
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def main(argv: list[str] | None = None) -> int:
|
|
122
|
+
parser = argparse.ArgumentParser(
|
|
123
|
+
prog="serverless-data-mesh",
|
|
124
|
+
description="Serverless Data Mesh - federated lakehouse writes on AWS Lambda",
|
|
125
|
+
)
|
|
126
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
127
|
+
|
|
128
|
+
demo = sub.add_parser("demo", help="Run local PVDM demo in <60s without AWS")
|
|
129
|
+
demo.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
|
130
|
+
demo.set_defaults(func=_cmd_demo)
|
|
131
|
+
|
|
132
|
+
init_p = sub.add_parser("init", help="Scaffold a new proof-gated domain writer")
|
|
133
|
+
init_p.add_argument("--domain", required=True, help="Domain id (e.g. payments)")
|
|
134
|
+
init_p.add_argument("--table", required=True, help="Target Iceberg table")
|
|
135
|
+
init_p.add_argument("--account", required=True, help="Producer AWS account ID")
|
|
136
|
+
init_p.add_argument("--output", default="domains", help="Output parent directory")
|
|
137
|
+
init_p.set_defaults(func=_cmd_init)
|
|
138
|
+
|
|
139
|
+
dash = sub.add_parser("dashboard", help="Generate mesh trust dashboard HTML")
|
|
140
|
+
dash.add_argument("--proofs-dir", help="Steward proofs directory (local or mounted S3)")
|
|
141
|
+
dash.add_argument("--cloudwatch", action="store_true", help="Pull live metrics from CloudWatch")
|
|
142
|
+
dash.add_argument("--region", help="AWS region for CloudWatch")
|
|
143
|
+
dash.add_argument("--output", default="mesh-trust-dashboard.html")
|
|
144
|
+
dash.add_argument("--open", dest="open_browser", action="store_true")
|
|
145
|
+
dash.set_defaults(func=_cmd_dashboard)
|
|
146
|
+
|
|
147
|
+
canary = sub.add_parser("canary", help="Run VRP canary comparison before promotion")
|
|
148
|
+
canary.add_argument("--records", type=int, default=1000)
|
|
149
|
+
canary.add_argument("--drift", action="store_true", help="Inject canary row-count drift")
|
|
150
|
+
canary.add_argument("--max-divergence", type=float, default=1.0)
|
|
151
|
+
canary.add_argument("--json", action="store_true")
|
|
152
|
+
canary.set_defaults(func=_cmd_canary)
|
|
153
|
+
|
|
154
|
+
repro = sub.add_parser("reprocess-demo", help="Demo auto VRP repair after dropped records")
|
|
155
|
+
repro.add_argument("--records", type=int, default=100)
|
|
156
|
+
repro.add_argument("--drop", type=int, default=5)
|
|
157
|
+
repro.add_argument("--json", action="store_true")
|
|
158
|
+
repro.set_defaults(func=_cmd_reprocess_demo)
|
|
159
|
+
|
|
160
|
+
args = parser.parse_args(argv)
|
|
161
|
+
return int(args.func(args))
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
if __name__ == "__main__":
|
|
165
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Runtime configuration loaded from environment variables."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True, slots=True)
|
|
10
|
+
class MeshSettings:
|
|
11
|
+
"""Operational settings for Lambda domain writers."""
|
|
12
|
+
|
|
13
|
+
checkpoint_bucket: str
|
|
14
|
+
proof_bucket: str
|
|
15
|
+
iceberg_table_bucket: str
|
|
16
|
+
aws_region: str
|
|
17
|
+
checkpoint_interval: int = 5000
|
|
18
|
+
rollback_threshold_ms: int = 30_000
|
|
19
|
+
lambda_timeout_seconds: int = 900
|
|
20
|
+
iceberg_warehouse: str | None = None
|
|
21
|
+
aws_account_id: str | None = None
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def from_environment(cls) -> MeshSettings:
|
|
25
|
+
checkpoint = os.environ.get("ICEGUARD_CHECKPOINT_BUCKET")
|
|
26
|
+
if not checkpoint:
|
|
27
|
+
raise ValueError("ICEGUARD_CHECKPOINT_BUCKET is required")
|
|
28
|
+
|
|
29
|
+
region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))
|
|
30
|
+
table_bucket = os.environ.get("ICEBERG_TABLE_BUCKET", "default")
|
|
31
|
+
|
|
32
|
+
return cls(
|
|
33
|
+
checkpoint_bucket=checkpoint,
|
|
34
|
+
proof_bucket=os.environ.get("VRP_PROOF_BUCKET", checkpoint),
|
|
35
|
+
iceberg_table_bucket=table_bucket,
|
|
36
|
+
aws_region=region,
|
|
37
|
+
checkpoint_interval=int(os.environ.get("ICEGUARD_CHECKPOINT_INTERVAL", "5000")),
|
|
38
|
+
rollback_threshold_ms=int(os.environ.get("ICEGUARD_ROLLBACK_THRESHOLD_MS", "30000")),
|
|
39
|
+
lambda_timeout_seconds=int(os.environ.get("LAMBDA_TIMEOUT_SECONDS", "900")),
|
|
40
|
+
iceberg_warehouse=os.environ.get("ICEBERG_WAREHOUSE"),
|
|
41
|
+
aws_account_id=os.environ.get("AWS_ACCOUNT_ID"),
|
|
42
|
+
)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Fetch live VRP trust rows from CloudWatch metrics."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
NAMESPACE = "ServerlessDataMesh/Trust"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def fetch_cloudwatch_trust_rows(
|
|
15
|
+
*,
|
|
16
|
+
hours: int = 24,
|
|
17
|
+
region: str | None = None,
|
|
18
|
+
cloudwatch_client: Any | None = None,
|
|
19
|
+
) -> list[dict[str, Any]]:
|
|
20
|
+
"""Pull latest VRPTrustScore per domain from CloudWatch for dashboard rendering."""
|
|
21
|
+
try:
|
|
22
|
+
import boto3
|
|
23
|
+
except ImportError:
|
|
24
|
+
logger.debug("boto3 unavailable for CloudWatch dashboard")
|
|
25
|
+
return []
|
|
26
|
+
|
|
27
|
+
client = cloudwatch_client or boto3.client("cloudwatch", region_name=region)
|
|
28
|
+
end = datetime.now(timezone.utc)
|
|
29
|
+
start = end - timedelta(hours=hours)
|
|
30
|
+
|
|
31
|
+
response = client.list_metrics(Namespace=NAMESPACE, MetricName="VRPTrustScore")
|
|
32
|
+
domains = sorted(
|
|
33
|
+
{
|
|
34
|
+
dim["Value"]
|
|
35
|
+
for metric in response.get("Metrics", [])
|
|
36
|
+
for dim in metric.get("Dimensions", [])
|
|
37
|
+
if dim.get("Name") == "Domain"
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
rows: list[dict[str, Any]] = []
|
|
42
|
+
for domain in domains:
|
|
43
|
+
trust = client.get_metric_statistics(
|
|
44
|
+
Namespace=NAMESPACE,
|
|
45
|
+
MetricName="VRPTrustScore",
|
|
46
|
+
Dimensions=[{"Name": "Domain", "Value": domain}],
|
|
47
|
+
StartTime=start,
|
|
48
|
+
EndTime=end,
|
|
49
|
+
Period=3600,
|
|
50
|
+
Statistics=["Maximum"],
|
|
51
|
+
)
|
|
52
|
+
count = client.get_metric_statistics(
|
|
53
|
+
Namespace=NAMESPACE,
|
|
54
|
+
MetricName="VRPRowCount",
|
|
55
|
+
Dimensions=[{"Name": "Domain", "Value": domain}],
|
|
56
|
+
StartTime=start,
|
|
57
|
+
EndTime=end,
|
|
58
|
+
Period=3600,
|
|
59
|
+
Statistics=["Maximum"],
|
|
60
|
+
)
|
|
61
|
+
trust_points = trust.get("Datapoints", [])
|
|
62
|
+
count_points = count.get("Datapoints", [])
|
|
63
|
+
latest_trust = max(trust_points, key=lambda p: p["Timestamp"]) if trust_points else None
|
|
64
|
+
latest_count = max(count_points, key=lambda p: p["Timestamp"]) if count_points else None
|
|
65
|
+
|
|
66
|
+
score = latest_trust["Maximum"] if latest_trust else 0.0
|
|
67
|
+
rows.append(
|
|
68
|
+
{
|
|
69
|
+
"domain": domain,
|
|
70
|
+
"last_vrp": (
|
|
71
|
+
latest_trust["Timestamp"].strftime("%I:%M %p")
|
|
72
|
+
if latest_trust
|
|
73
|
+
else "no data"
|
|
74
|
+
),
|
|
75
|
+
"status": "PASS" if score >= 1.0 else "FAIL",
|
|
76
|
+
"rows": str(int(latest_count["Maximum"])) if latest_count else "?",
|
|
77
|
+
"detail": "" if score >= 1.0 else "VRP trust score below 1.0",
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
return rows
|