serverless-data-mesh 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. serverless_data_mesh/__init__.py +93 -0
  2. serverless_data_mesh/catalog/__init__.py +6 -0
  3. serverless_data_mesh/catalog/glue_connector.py +17 -0
  4. serverless_data_mesh/catalog/glue_rest.py +134 -0
  5. serverless_data_mesh/cli.py +165 -0
  6. serverless_data_mesh/config.py +42 -0
  7. serverless_data_mesh/dashboard/__init__.py +5 -0
  8. serverless_data_mesh/dashboard/cloudwatch.py +80 -0
  9. serverless_data_mesh/dashboard/trust.py +162 -0
  10. serverless_data_mesh/exceptions.py +23 -0
  11. serverless_data_mesh/governance/__init__.py +9 -0
  12. serverless_data_mesh/governance/consumer_sla.py +109 -0
  13. serverless_data_mesh/lineage/__init__.py +5 -0
  14. serverless_data_mesh/lineage/openlineage.py +96 -0
  15. serverless_data_mesh/local/__init__.py +5 -0
  16. serverless_data_mesh/local/runtime.py +380 -0
  17. serverless_data_mesh/metrics/__init__.py +5 -0
  18. serverless_data_mesh/metrics/mesh_trust.py +56 -0
  19. serverless_data_mesh/orchestration/__init__.py +28 -0
  20. serverless_data_mesh/orchestration/canary.py +127 -0
  21. serverless_data_mesh/orchestration/coordinator.py +265 -0
  22. serverless_data_mesh/orchestration/durable_steps.py +74 -0
  23. serverless_data_mesh/orchestration/reprocess.py +143 -0
  24. serverless_data_mesh/orchestration/state.py +16 -0
  25. serverless_data_mesh/py.typed +0 -0
  26. serverless_data_mesh/rules/__init__.py +8 -0
  27. serverless_data_mesh/rules/sparkrules_connector.py +193 -0
  28. serverless_data_mesh/scaffold/__init__.py +5 -0
  29. serverless_data_mesh/scaffold/init_domain.py +210 -0
  30. serverless_data_mesh/types/__init__.py +21 -0
  31. serverless_data_mesh/types/workload.py +123 -0
  32. serverless_data_mesh/verification/__init__.py +21 -0
  33. serverless_data_mesh/verification/backend.py +41 -0
  34. serverless_data_mesh/verification/fallback.py +200 -0
  35. serverless_data_mesh/verification/vrp.py +202 -0
  36. serverless_data_mesh-0.2.0.dist-info/METADATA +143 -0
  37. serverless_data_mesh-0.2.0.dist-info/RECORD +40 -0
  38. serverless_data_mesh-0.2.0.dist-info/WHEEL +4 -0
  39. serverless_data_mesh-0.2.0.dist-info/entry_points.txt +2 -0
  40. serverless_data_mesh-0.2.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,93 @@
1
+ """Serverless Data Mesh: cross-domain lakehouse write coordination on AWS Lambda."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from serverless_data_mesh.config import MeshSettings
9
+ from serverless_data_mesh.exceptions import (
10
+ CatalogCommitError,
11
+ RuleEvaluationError,
12
+ ServerlessDataMeshError,
13
+ VerificationRejectedError,
14
+ WorkloadConfigurationError,
15
+ )
16
+ from serverless_data_mesh.types import (
17
+ ChunkWriteResult,
18
+ DataProductContract,
19
+ DataWriteWorkload,
20
+ DomainTransactionBoundary,
21
+ WriteOutcome,
22
+ )
23
+
24
+ def _read_version() -> str:
25
+ try:
26
+ from importlib.metadata import version
27
+
28
+ return version("serverless-data-mesh")
29
+ except Exception:
30
+ root = Path(__file__).resolve().parents[2]
31
+ return (root / "VERSION").read_text(encoding="utf-8").strip()
32
+
33
+
34
+ __version__ = _read_version()
35
+
36
+ __all__ = [
37
+ "CatalogCommitError",
38
+ "ChunkWriteResult",
39
+ "DataProductContract",
40
+ "DataWriteWorkload",
41
+ "DomainTransactionBoundary",
42
+ "GlueCatalogConnector",
43
+ "GlueRestCatalogAdapter",
44
+ "IceGuardDurableCoordinator",
45
+ "MeshSettings",
46
+ "OrchestrationState",
47
+ "RuleEvaluationError",
48
+ "RuleFireSummary",
49
+ "SparkRulesConnector",
50
+ "ServerlessDataMeshError",
51
+ "VRPProofGenerator",
52
+ "VerificationRejectedError",
53
+ "WorkloadConfigurationError",
54
+ "WriteOutcome",
55
+ "validate_then_commit",
56
+ "__version__",
57
+ ]
58
+
59
+
60
+ def __getattr__(name: str) -> Any:
61
+ """Lazy-load heavy integrations (IceGuard, veridata-recon, PyIceberg)."""
62
+ if name in ("GlueCatalogConnector", "GlueRestCatalogAdapter"):
63
+ from serverless_data_mesh.catalog import GlueCatalogConnector, GlueRestCatalogAdapter
64
+
65
+ return {
66
+ "GlueCatalogConnector": GlueCatalogConnector,
67
+ "GlueRestCatalogAdapter": GlueRestCatalogAdapter,
68
+ }[name]
69
+ if name in ("SparkRulesConnector", "RuleFireSummary"):
70
+ from serverless_data_mesh.rules import RuleFireSummary, SparkRulesConnector
71
+
72
+ return {
73
+ "SparkRulesConnector": SparkRulesConnector,
74
+ "RuleFireSummary": RuleFireSummary,
75
+ }[name]
76
+ if name in ("IceGuardDurableCoordinator", "OrchestrationState"):
77
+ from serverless_data_mesh.orchestration import (
78
+ IceGuardDurableCoordinator,
79
+ OrchestrationState,
80
+ )
81
+
82
+ return {
83
+ "IceGuardDurableCoordinator": IceGuardDurableCoordinator,
84
+ "OrchestrationState": OrchestrationState,
85
+ }[name]
86
+ if name in ("VRPProofGenerator", "validate_then_commit"):
87
+ from serverless_data_mesh.verification import VRPProofGenerator, validate_then_commit
88
+
89
+ return {
90
+ "VRPProofGenerator": VRPProofGenerator,
91
+ "validate_then_commit": validate_then_commit,
92
+ }[name]
93
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,6 @@
1
+ """Iceberg catalog adapters for serverless metadata commits."""
2
+
3
+ from serverless_data_mesh.catalog.glue_connector import GlueCatalogConnector
4
+ from serverless_data_mesh.catalog.glue_rest import GlueRestCatalogAdapter
5
+
6
+ __all__ = ["GlueCatalogConnector", "GlueRestCatalogAdapter"]
@@ -0,0 +1,17 @@
1
+ """Glue Catalog Connector: metadata-only integration for Lambda domain writers.
2
+
3
+ AWS Glue **ETL jobs** (managed Spark runners) do not run inside Lambda containers.
4
+ Domain writers execute **physical** transforms on Lambda (PySpark-on-Lambda, Polars,
5
+ PyArrow, DuckDB, etc.) and use this connector for **metadata** commits against the
6
+ Glue Data Catalog Iceberg REST endpoint.
7
+
8
+ The connector is a thin SigV4 HTTPS client via PyIceberg: no Glue job runtime, no JVM
9
+ Spark catalog session, no Glue Studio dependency.
10
+ """
11
+
12
+ from serverless_data_mesh.catalog.glue_rest import GlueRestCatalogAdapter
13
+
14
+ # Public alias: "Glue connector" in docs maps to this class.
15
+ GlueCatalogConnector = GlueRestCatalogAdapter
16
+
17
+ __all__ = ["GlueCatalogConnector", "GlueRestCatalogAdapter"]
@@ -0,0 +1,134 @@
1
+ """Zero-config AWS Glue Iceberg REST catalog adapter for serverless 2PC commits."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from dataclasses import dataclass, field
8
+ from typing import Any
9
+
10
+ import boto3
11
+ from pyiceberg.catalog import load_catalog
12
+ from pyiceberg.table import Table
13
+
14
+ from serverless_data_mesh.exceptions import CatalogCommitError
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass(slots=True)
20
+ class GlueRestCatalogAdapter:
21
+ """Native Python catalog commit adapter using Glue's Iceberg REST endpoint.
22
+
23
+ Replaces heavy PySpark JVM catalog operations with lightweight HTTPS REST
24
+ calls authenticated via AWS SigV4. IceGuard performs physical file writes;
25
+ this adapter executes the final metadata commit in the two-phase commit (2PC)
26
+ sequence purely over REST.
27
+ """
28
+
29
+ catalog_name: str
30
+ namespace: str
31
+ table_name: str
32
+ region: str
33
+ warehouse: str | None = None
34
+ catalog: Any = field(default=None, repr=False)
35
+ _prepared_files: list[str] = field(default_factory=list, init=False, repr=False)
36
+
37
+ @classmethod
38
+ def from_environment(
39
+ cls,
40
+ *,
41
+ namespace: str,
42
+ table_name: str,
43
+ catalog_name: str = "glue_rest",
44
+ aws_account_id: str | None = None,
45
+ warehouse: str | None = None,
46
+ ) -> GlueRestCatalogAdapter:
47
+ """Construct adapter from standard Lambda environment variables."""
48
+ region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))
49
+ account_id = aws_account_id or os.environ.get("AWS_ACCOUNT_ID")
50
+ if not account_id:
51
+ account_id = boto3.client("sts").get_caller_identity()["Account"]
52
+ resolved_warehouse = warehouse or os.environ.get(
53
+ "ICEBERG_WAREHOUSE",
54
+ f"{account_id}:s3tablescatalog/{os.environ.get('ICEBERG_TABLE_BUCKET', 'default')}",
55
+ )
56
+ return cls(
57
+ catalog_name=catalog_name,
58
+ namespace=namespace,
59
+ table_name=table_name,
60
+ region=region,
61
+ warehouse=resolved_warehouse,
62
+ )
63
+
64
+ def _rest_properties(self) -> dict[str, str]:
65
+ """Build pyiceberg REST catalog properties with SigV4 signing."""
66
+ if not self.warehouse:
67
+ raise ValueError("warehouse is required for Glue REST catalog access")
68
+ return {
69
+ "type": "rest",
70
+ "uri": f"https://glue.{self.region}.amazonaws.com/iceberg",
71
+ "warehouse": self.warehouse,
72
+ "rest.sigv4-enabled": "true",
73
+ "rest.signing-name": "glue",
74
+ "rest.signing-region": self.region,
75
+ }
76
+
77
+ def connect(self) -> Table:
78
+ """Authenticate via SigV4 and load the target Iceberg table handle."""
79
+ if self.catalog is None:
80
+ self.catalog = load_catalog(self.catalog_name, **self._rest_properties())
81
+ identifier = f"{self.namespace}.{self.table_name}"
82
+ table = self.catalog.load_table(identifier)
83
+ logger.info("Connected to Iceberg table %s via Glue REST", identifier)
84
+ return table
85
+
86
+ def prepare_commit(self, parquet_paths: list[str]) -> None:
87
+ """Phase-1 prepare: stage file paths for the pending metadata transaction."""
88
+ if not parquet_paths:
89
+ raise ValueError("prepare_commit requires at least one parquet path")
90
+ self._prepared_files = list(dict.fromkeys(parquet_paths))
91
+ logger.info("Prepared %d data files for REST metadata commit", len(self._prepared_files))
92
+
93
+ def commit(self, *, snapshot_properties: dict[str, str] | None = None) -> int:
94
+ """Phase-2 commit: publish a new Iceberg snapshot via HTTPS REST."""
95
+ if not self._prepared_files:
96
+ raise CatalogCommitError("commit called before prepare_commit")
97
+
98
+ table = self.connect()
99
+ props = snapshot_properties or {
100
+ "write.format.default": "parquet",
101
+ "app-id": "serverless-data-mesh",
102
+ }
103
+ try:
104
+ with table.transaction() as tx:
105
+ tx.add_files(self._prepared_files, snapshot_properties=props)
106
+ except Exception as exc:
107
+ raise CatalogCommitError(f"Glue REST commit failed: {exc}") from exc
108
+
109
+ snapshot_id = table.metadata.current_snapshot_id()
110
+ logger.info(
111
+ "Committed snapshot %s with %d files to %s.%s",
112
+ snapshot_id,
113
+ len(self._prepared_files),
114
+ self.namespace,
115
+ self.table_name,
116
+ )
117
+ self._prepared_files.clear()
118
+ return int(snapshot_id or 0)
119
+
120
+ def abort(self) -> None:
121
+ """Abort the in-flight metadata transaction without catalog side effects."""
122
+ self._prepared_files.clear()
123
+ logger.info("Aborted pending REST catalog commit for %s.%s", self.namespace, self.table_name)
124
+
125
+ def rollback_to_snapshot(self, snapshot_id: int) -> None:
126
+ """Rollback table metadata to a prior snapshot (IceGuard timeout recovery)."""
127
+ table = self.connect()
128
+ table.manage_snapshots().rollback_to_snapshot(snapshot_id).commit()
129
+ logger.warning(
130
+ "Rolled back %s.%s to snapshot %s",
131
+ self.namespace,
132
+ self.table_name,
133
+ snapshot_id,
134
+ )
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env python3
2
+ """CLI entry points for Serverless Data Mesh."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ import sys
9
+
10
+
11
+ def _cmd_demo(args: argparse.Namespace) -> int:
12
+ from serverless_data_mesh.local.runtime import LocalPVDMRuntime
13
+ from serverless_data_mesh.verification.backend import veridata_available
14
+
15
+ runtime = LocalPVDMRuntime()
16
+ result = runtime.run_demo_sequence()
17
+
18
+ if args.json:
19
+ print(json.dumps(result, indent=2))
20
+ else:
21
+ backend = result.get("verifier_backend", "unknown")
22
+ fallback_note = ""
23
+ if backend == "pure-python-fallback":
24
+ fallback_note = " (pure-Python verifier - no Rust wheel needed)\n"
25
+ print("\n Serverless Data Mesh - local PVDM demo (no AWS)\n")
26
+ print(f" Verifier: {backend}{fallback_note}")
27
+ print(f" Workspace: {result['root']}")
28
+ print(f" Elapsed: {result['elapsed_ms']} ms\n")
29
+ clean = result["phases"]["clean_write"]
30
+ corrupt = result["phases"]["corrupt_write"]
31
+ consumer = result["consumer"]
32
+ print(f" Phase 1 - clean write: {clean['outcome']} ({clean['records_written']} rows)")
33
+ print(f" Phase 2 - corrupt write: {corrupt['outcome']} (VRP {corrupt['proof_verdict']})")
34
+ print(f" Consumer visible rows: {consumer['visible_row_count']}")
35
+ print(f" Gate blocked bad data: {consumer['gate_blocked_bad_data']}\n")
36
+ print(f" {result['summary']}\n")
37
+ print(" Vaquar Pattern (PVDM): Physical -> Verify -> Durable -> Metadata")
38
+ if not veridata_available():
39
+ print(" Tip: pip install veridata-recon on Linux for cryptographic VRP proofs.")
40
+ print(" Docs: docs/vaquar-pattern.md\n")
41
+
42
+ return 0 if result["consumer"]["gate_blocked_bad_data"] else 1
43
+
44
+
45
+ def _cmd_init(args: argparse.Namespace) -> int:
46
+ from serverless_data_mesh.scaffold.init_domain import scaffold_domain
47
+
48
+ root = scaffold_domain(
49
+ domain=args.domain,
50
+ table=args.table,
51
+ account_id=args.account,
52
+ output_dir=args.output,
53
+ )
54
+ print(f"\n Domain scaffold created: {root}\n")
55
+ print(" Next steps:")
56
+ print(f" 1. Edit {root}/handler.py")
57
+ print(f" 2. Review {root}/contract.yaml")
58
+ print(f" 3. Deploy {root}/terraform/ (copy tfvars.example -> tfvars)")
59
+ print(f" 4. Run tests: pytest {root}/tests/\n")
60
+ return 0
61
+
62
+
63
+ def _cmd_dashboard(args: argparse.Namespace) -> int:
64
+ from serverless_data_mesh.dashboard.trust import render_trust_dashboard
65
+
66
+ path = render_trust_dashboard(
67
+ proofs_dir=args.proofs_dir,
68
+ output=args.output,
69
+ demo=not args.proofs_dir and not args.cloudwatch,
70
+ cloudwatch=args.cloudwatch,
71
+ cloudwatch_region=args.region,
72
+ )
73
+ print(f"Trust dashboard written: {path}")
74
+ if args.open_browser:
75
+ import webbrowser
76
+
77
+ webbrowser.open(f"file://{path}")
78
+ return 0
79
+
80
+
81
+ def _cmd_canary(args: argparse.Namespace) -> int:
82
+ from serverless_data_mesh.orchestration.canary import run_canary
83
+
84
+ result = run_canary(
85
+ record_count=args.records,
86
+ inject_canary_drift=args.drift,
87
+ max_divergence_pct=args.max_divergence,
88
+ )
89
+ if args.json:
90
+ print(json.dumps(result, indent=2))
91
+ else:
92
+ print(f"\n Canary outcome: {result['outcome']}")
93
+ print(f" Production VRP: {result['production_verdict']}")
94
+ print(f" Canary VRP: {result['canary_verdict']}")
95
+ print(f" Divergence: {result['divergence_pct']}%")
96
+ print(f" Promote: {result['promote']}\n")
97
+ print(f" {result['message']}\n")
98
+ return 0 if result["promote"] else 1
99
+
100
+
101
+ def _cmd_reprocess_demo(args: argparse.Namespace) -> int:
102
+ from serverless_data_mesh.local.runtime import LocalPVDMRuntime
103
+
104
+ runtime = LocalPVDMRuntime()
105
+ result = runtime.run_write_with_auto_repair(
106
+ record_count=args.records,
107
+ drop_count=args.drop,
108
+ )
109
+ if args.json:
110
+ print(json.dumps(result, indent=2, default=str))
111
+ else:
112
+ print("\n Auto VRP reprocessing demo\n")
113
+ print(f" Outcome: {result['outcome']}")
114
+ repair = result.get("repair", {})
115
+ print(f" Missing before: {repair.get('missing_before', '?')}")
116
+ print(f" Repair attempts: {repair.get('attempts', '?')}")
117
+ print(f" Consumer rows: {result.get('consumer_row_count', 0)}\n")
118
+ return 0 if result.get("outcome") == "repaired_and_committed" else 1
119
+
120
+
121
+ def main(argv: list[str] | None = None) -> int:
122
+ parser = argparse.ArgumentParser(
123
+ prog="serverless-data-mesh",
124
+ description="Serverless Data Mesh - federated lakehouse writes on AWS Lambda",
125
+ )
126
+ sub = parser.add_subparsers(dest="command", required=True)
127
+
128
+ demo = sub.add_parser("demo", help="Run local PVDM demo in <60s without AWS")
129
+ demo.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
130
+ demo.set_defaults(func=_cmd_demo)
131
+
132
+ init_p = sub.add_parser("init", help="Scaffold a new proof-gated domain writer")
133
+ init_p.add_argument("--domain", required=True, help="Domain id (e.g. payments)")
134
+ init_p.add_argument("--table", required=True, help="Target Iceberg table")
135
+ init_p.add_argument("--account", required=True, help="Producer AWS account ID")
136
+ init_p.add_argument("--output", default="domains", help="Output parent directory")
137
+ init_p.set_defaults(func=_cmd_init)
138
+
139
+ dash = sub.add_parser("dashboard", help="Generate mesh trust dashboard HTML")
140
+ dash.add_argument("--proofs-dir", help="Steward proofs directory (local or mounted S3)")
141
+ dash.add_argument("--cloudwatch", action="store_true", help="Pull live metrics from CloudWatch")
142
+ dash.add_argument("--region", help="AWS region for CloudWatch")
143
+ dash.add_argument("--output", default="mesh-trust-dashboard.html")
144
+ dash.add_argument("--open", dest="open_browser", action="store_true")
145
+ dash.set_defaults(func=_cmd_dashboard)
146
+
147
+ canary = sub.add_parser("canary", help="Run VRP canary comparison before promotion")
148
+ canary.add_argument("--records", type=int, default=1000)
149
+ canary.add_argument("--drift", action="store_true", help="Inject canary row-count drift")
150
+ canary.add_argument("--max-divergence", type=float, default=1.0)
151
+ canary.add_argument("--json", action="store_true")
152
+ canary.set_defaults(func=_cmd_canary)
153
+
154
+ repro = sub.add_parser("reprocess-demo", help="Demo auto VRP repair after dropped records")
155
+ repro.add_argument("--records", type=int, default=100)
156
+ repro.add_argument("--drop", type=int, default=5)
157
+ repro.add_argument("--json", action="store_true")
158
+ repro.set_defaults(func=_cmd_reprocess_demo)
159
+
160
+ args = parser.parse_args(argv)
161
+ return int(args.func(args))
162
+
163
+
164
+ if __name__ == "__main__":
165
+ raise SystemExit(main())
@@ -0,0 +1,42 @@
1
+ """Runtime configuration loaded from environment variables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass(frozen=True, slots=True)
10
+ class MeshSettings:
11
+ """Operational settings for Lambda domain writers."""
12
+
13
+ checkpoint_bucket: str
14
+ proof_bucket: str
15
+ iceberg_table_bucket: str
16
+ aws_region: str
17
+ checkpoint_interval: int = 5000
18
+ rollback_threshold_ms: int = 30_000
19
+ lambda_timeout_seconds: int = 900
20
+ iceberg_warehouse: str | None = None
21
+ aws_account_id: str | None = None
22
+
23
+ @classmethod
24
+ def from_environment(cls) -> MeshSettings:
25
+ checkpoint = os.environ.get("ICEGUARD_CHECKPOINT_BUCKET")
26
+ if not checkpoint:
27
+ raise ValueError("ICEGUARD_CHECKPOINT_BUCKET is required")
28
+
29
+ region = os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))
30
+ table_bucket = os.environ.get("ICEBERG_TABLE_BUCKET", "default")
31
+
32
+ return cls(
33
+ checkpoint_bucket=checkpoint,
34
+ proof_bucket=os.environ.get("VRP_PROOF_BUCKET", checkpoint),
35
+ iceberg_table_bucket=table_bucket,
36
+ aws_region=region,
37
+ checkpoint_interval=int(os.environ.get("ICEGUARD_CHECKPOINT_INTERVAL", "5000")),
38
+ rollback_threshold_ms=int(os.environ.get("ICEGUARD_ROLLBACK_THRESHOLD_MS", "30000")),
39
+ lambda_timeout_seconds=int(os.environ.get("LAMBDA_TIMEOUT_SECONDS", "900")),
40
+ iceberg_warehouse=os.environ.get("ICEBERG_WAREHOUSE"),
41
+ aws_account_id=os.environ.get("AWS_ACCOUNT_ID"),
42
+ )
@@ -0,0 +1,5 @@
1
+ """Mesh trust dashboard."""
2
+
3
+ from serverless_data_mesh.dashboard.trust import render_trust_dashboard
4
+
5
+ __all__ = ["render_trust_dashboard"]
@@ -0,0 +1,80 @@
1
+ """Fetch live VRP trust rows from CloudWatch metrics."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from datetime import datetime, timedelta, timezone
7
+ from typing import Any
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ NAMESPACE = "ServerlessDataMesh/Trust"
12
+
13
+
14
+ def fetch_cloudwatch_trust_rows(
15
+ *,
16
+ hours: int = 24,
17
+ region: str | None = None,
18
+ cloudwatch_client: Any | None = None,
19
+ ) -> list[dict[str, Any]]:
20
+ """Pull latest VRPTrustScore per domain from CloudWatch for dashboard rendering."""
21
+ try:
22
+ import boto3
23
+ except ImportError:
24
+ logger.debug("boto3 unavailable for CloudWatch dashboard")
25
+ return []
26
+
27
+ client = cloudwatch_client or boto3.client("cloudwatch", region_name=region)
28
+ end = datetime.now(timezone.utc)
29
+ start = end - timedelta(hours=hours)
30
+
31
+ response = client.list_metrics(Namespace=NAMESPACE, MetricName="VRPTrustScore")
32
+ domains = sorted(
33
+ {
34
+ dim["Value"]
35
+ for metric in response.get("Metrics", [])
36
+ for dim in metric.get("Dimensions", [])
37
+ if dim.get("Name") == "Domain"
38
+ }
39
+ )
40
+
41
+ rows: list[dict[str, Any]] = []
42
+ for domain in domains:
43
+ trust = client.get_metric_statistics(
44
+ Namespace=NAMESPACE,
45
+ MetricName="VRPTrustScore",
46
+ Dimensions=[{"Name": "Domain", "Value": domain}],
47
+ StartTime=start,
48
+ EndTime=end,
49
+ Period=3600,
50
+ Statistics=["Maximum"],
51
+ )
52
+ count = client.get_metric_statistics(
53
+ Namespace=NAMESPACE,
54
+ MetricName="VRPRowCount",
55
+ Dimensions=[{"Name": "Domain", "Value": domain}],
56
+ StartTime=start,
57
+ EndTime=end,
58
+ Period=3600,
59
+ Statistics=["Maximum"],
60
+ )
61
+ trust_points = trust.get("Datapoints", [])
62
+ count_points = count.get("Datapoints", [])
63
+ latest_trust = max(trust_points, key=lambda p: p["Timestamp"]) if trust_points else None
64
+ latest_count = max(count_points, key=lambda p: p["Timestamp"]) if count_points else None
65
+
66
+ score = latest_trust["Maximum"] if latest_trust else 0.0
67
+ rows.append(
68
+ {
69
+ "domain": domain,
70
+ "last_vrp": (
71
+ latest_trust["Timestamp"].strftime("%I:%M %p")
72
+ if latest_trust
73
+ else "no data"
74
+ ),
75
+ "status": "PASS" if score >= 1.0 else "FAIL",
76
+ "rows": str(int(latest_count["Maximum"])) if latest_count else "?",
77
+ "detail": "" if score >= 1.0 else "VRP trust score below 1.0",
78
+ }
79
+ )
80
+ return rows