serverless-data-mesh 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- serverless_data_mesh/__init__.py +93 -0
- serverless_data_mesh/catalog/__init__.py +6 -0
- serverless_data_mesh/catalog/glue_connector.py +17 -0
- serverless_data_mesh/catalog/glue_rest.py +134 -0
- serverless_data_mesh/cli.py +165 -0
- serverless_data_mesh/config.py +42 -0
- serverless_data_mesh/dashboard/__init__.py +5 -0
- serverless_data_mesh/dashboard/cloudwatch.py +80 -0
- serverless_data_mesh/dashboard/trust.py +162 -0
- serverless_data_mesh/exceptions.py +23 -0
- serverless_data_mesh/governance/__init__.py +9 -0
- serverless_data_mesh/governance/consumer_sla.py +109 -0
- serverless_data_mesh/lineage/__init__.py +5 -0
- serverless_data_mesh/lineage/openlineage.py +96 -0
- serverless_data_mesh/local/__init__.py +5 -0
- serverless_data_mesh/local/runtime.py +380 -0
- serverless_data_mesh/metrics/__init__.py +5 -0
- serverless_data_mesh/metrics/mesh_trust.py +56 -0
- serverless_data_mesh/orchestration/__init__.py +28 -0
- serverless_data_mesh/orchestration/canary.py +127 -0
- serverless_data_mesh/orchestration/coordinator.py +265 -0
- serverless_data_mesh/orchestration/durable_steps.py +74 -0
- serverless_data_mesh/orchestration/reprocess.py +143 -0
- serverless_data_mesh/orchestration/state.py +16 -0
- serverless_data_mesh/py.typed +0 -0
- serverless_data_mesh/rules/__init__.py +8 -0
- serverless_data_mesh/rules/sparkrules_connector.py +193 -0
- serverless_data_mesh/scaffold/__init__.py +5 -0
- serverless_data_mesh/scaffold/init_domain.py +210 -0
- serverless_data_mesh/types/__init__.py +21 -0
- serverless_data_mesh/types/workload.py +123 -0
- serverless_data_mesh/verification/__init__.py +21 -0
- serverless_data_mesh/verification/backend.py +41 -0
- serverless_data_mesh/verification/fallback.py +200 -0
- serverless_data_mesh/verification/vrp.py +202 -0
- serverless_data_mesh-0.2.0.dist-info/METADATA +143 -0
- serverless_data_mesh-0.2.0.dist-info/RECORD +40 -0
- serverless_data_mesh-0.2.0.dist-info/WHEEL +4 -0
- serverless_data_mesh-0.2.0.dist-info/entry_points.txt +2 -0
- serverless_data_mesh-0.2.0.dist-info/licenses/LICENSE +17 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""Run the Vaquar Pattern (PVDM) lifecycle on local disk without AWS."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import tempfile
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import asdict, dataclass
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from serverless_data_mesh.types.workload import (
|
|
14
|
+
DataWriteWorkload,
|
|
15
|
+
DomainTransactionBoundary,
|
|
16
|
+
WriteOutcome,
|
|
17
|
+
)
|
|
18
|
+
from serverless_data_mesh.metrics.mesh_trust import publish_vrp_metric
|
|
19
|
+
from serverless_data_mesh.orchestration.reprocess import attempt_vrp_repair
|
|
20
|
+
from serverless_data_mesh.verification.backend import create_proof_generator
|
|
21
|
+
from serverless_data_mesh.verification.vrp import validate_then_commit
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True, slots=True)
|
|
25
|
+
class LocalWriteResult:
|
|
26
|
+
"""Outcome of a local PVDM chunk write."""
|
|
27
|
+
|
|
28
|
+
outcome: str
|
|
29
|
+
workload_id: str
|
|
30
|
+
records_written: int
|
|
31
|
+
proof_verdict: str
|
|
32
|
+
snapshot_id: str | None
|
|
33
|
+
proof_path: str | None
|
|
34
|
+
consumer_row_count: int
|
|
35
|
+
message: str | None = None
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> dict[str, Any]:
|
|
38
|
+
return asdict(self)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _default_boundary() -> DomainTransactionBoundary:
|
|
42
|
+
return DomainTransactionBoundary(
|
|
43
|
+
domain_id="orders-domain",
|
|
44
|
+
source_namespace="raw_orders",
|
|
45
|
+
target_table="orders_curated",
|
|
46
|
+
partition_spec={"dt": "2026-06-14"},
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _default_workload(root: Path, *, workload_id: str, total_records: int) -> DataWriteWorkload:
|
|
51
|
+
boundary = _default_boundary()
|
|
52
|
+
return DataWriteWorkload(
|
|
53
|
+
workload_id=workload_id,
|
|
54
|
+
boundary=boundary,
|
|
55
|
+
source_uri=f"file://{root}/source/",
|
|
56
|
+
target_uri=f"file://{root}/lakehouse/orders_curated/",
|
|
57
|
+
total_records=total_records,
|
|
58
|
+
checkpoint_bucket=str(root / "checkpoints"),
|
|
59
|
+
proof_bucket=str(root / "proofs"),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _records(n: int, *, corrupt_last: bool = False) -> list[dict[str, str]]:
|
|
64
|
+
rows = [{"id": str(i), "payload_hash": f"h{i}"} for i in range(n)]
|
|
65
|
+
if corrupt_last and rows:
|
|
66
|
+
rows[-1] = {"id": rows[-1]["id"], "payload_hash": "CORRUPT"}
|
|
67
|
+
return rows
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class LocalPVDMRuntime:
|
|
71
|
+
"""Simulate Physical → Verify → Durable → Metadata on a laptop.
|
|
72
|
+
|
|
73
|
+
Uses veridata-recon when available; pure-Python fallback on Windows/Mac without wheels.
|
|
74
|
+
Checkpoints, proofs, and catalog snapshots are stored on local disk.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, root: Path | None = None) -> None:
|
|
78
|
+
self.root = root or Path(tempfile.mkdtemp(prefix="sdm-demo-"))
|
|
79
|
+
self.checkpoints = self.root / "checkpoints"
|
|
80
|
+
self.proofs = self.root / "proofs"
|
|
81
|
+
self.lakehouse = self.root / "lakehouse" / "orders_curated" / "dt=2026-06-14"
|
|
82
|
+
self.catalog = self.root / "catalog"
|
|
83
|
+
for path in (self.checkpoints, self.proofs, self.lakehouse, self.catalog):
|
|
84
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
self._snapshot_file = self.catalog / "snapshots.json"
|
|
86
|
+
if not self._snapshot_file.exists():
|
|
87
|
+
self._snapshot_file.write_text("[]", encoding="utf-8")
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def consumer_row_count(self) -> int:
|
|
91
|
+
"""Rows visible to consumers from the latest committed snapshot."""
|
|
92
|
+
snapshots = json.loads(self._snapshot_file.read_text(encoding="utf-8"))
|
|
93
|
+
if not snapshots:
|
|
94
|
+
return 0
|
|
95
|
+
return int(snapshots[-1]["row_count"])
|
|
96
|
+
|
|
97
|
+
def _persist_proof(
|
|
98
|
+
self,
|
|
99
|
+
proof: dict[str, Any],
|
|
100
|
+
*,
|
|
101
|
+
workload: DataWriteWorkload,
|
|
102
|
+
chunk_index: int,
|
|
103
|
+
) -> Path:
|
|
104
|
+
rel = f"{workload.boundary.domain_id}/{workload.workload_id}"
|
|
105
|
+
dest_dir = self.proofs / rel
|
|
106
|
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
path = dest_dir / f"chunk-{chunk_index:06d}.vrp.json"
|
|
108
|
+
path.write_text(json.dumps(proof, indent=2, sort_keys=True), encoding="utf-8")
|
|
109
|
+
return path
|
|
110
|
+
|
|
111
|
+
def _write_physical(self, records: list[dict[str, str]], *, part_name: str) -> Path:
|
|
112
|
+
part = self.lakehouse / f"{part_name}.jsonl"
|
|
113
|
+
with part.open("w", encoding="utf-8") as handle:
|
|
114
|
+
for row in records:
|
|
115
|
+
handle.write(json.dumps(row) + "\n")
|
|
116
|
+
return part
|
|
117
|
+
|
|
118
|
+
def _commit_metadata(self, *, workload: DataWriteWorkload, row_count: int, proof_id: str) -> str:
|
|
119
|
+
snapshots = json.loads(self._snapshot_file.read_text(encoding="utf-8"))
|
|
120
|
+
snapshot_id = f"snap-{len(snapshots) + 1:06d}"
|
|
121
|
+
snapshots.append(
|
|
122
|
+
{
|
|
123
|
+
"snapshot_id": snapshot_id,
|
|
124
|
+
"table": workload.boundary.target_table,
|
|
125
|
+
"partition": workload.boundary.partition_spec,
|
|
126
|
+
"row_count": row_count,
|
|
127
|
+
"proof_id": proof_id,
|
|
128
|
+
"committed_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
|
|
129
|
+
}
|
|
130
|
+
)
|
|
131
|
+
self._snapshot_file.write_text(json.dumps(snapshots, indent=2), encoding="utf-8")
|
|
132
|
+
checkpoint = self.checkpoints / f"{workload.workload_id}.json"
|
|
133
|
+
checkpoint.write_text(
|
|
134
|
+
json.dumps({"workload_id": workload.workload_id, "snapshot_id": snapshot_id}),
|
|
135
|
+
encoding="utf-8",
|
|
136
|
+
)
|
|
137
|
+
return snapshot_id
|
|
138
|
+
|
|
139
|
+
def run_write(
|
|
140
|
+
self,
|
|
141
|
+
*,
|
|
142
|
+
workload_id: str = "local-demo-001",
|
|
143
|
+
record_count: int = 1000,
|
|
144
|
+
corrupt_sink: bool = False,
|
|
145
|
+
proof_generator: Any | None = None,
|
|
146
|
+
defer_snapshot: bool = False,
|
|
147
|
+
) -> LocalWriteResult:
|
|
148
|
+
"""Execute one PVDM write cycle on local disk."""
|
|
149
|
+
workload = _default_workload(self.root, workload_id=workload_id, total_records=record_count)
|
|
150
|
+
if proof_generator is None:
|
|
151
|
+
gen, self._last_backend = create_proof_generator()
|
|
152
|
+
else:
|
|
153
|
+
gen = proof_generator
|
|
154
|
+
self._last_backend = getattr(gen, "producer", "custom")
|
|
155
|
+
|
|
156
|
+
source = _records(record_count)
|
|
157
|
+
sink = _records(record_count, corrupt_last=corrupt_sink)
|
|
158
|
+
|
|
159
|
+
self._write_physical(sink, part_name=f"{workload_id}-part-00000")
|
|
160
|
+
|
|
161
|
+
proof = gen.build_proof(
|
|
162
|
+
source_records=source,
|
|
163
|
+
sink_records=sink,
|
|
164
|
+
workload=workload,
|
|
165
|
+
chunk_start=0,
|
|
166
|
+
chunk_end=record_count,
|
|
167
|
+
)
|
|
168
|
+
verification = validate_then_commit(proof)
|
|
169
|
+
proof_path = self._persist_proof(proof, workload=workload, chunk_index=0)
|
|
170
|
+
verdict = proof["reconciliation"]["verdict"]
|
|
171
|
+
publish_vrp_metric(
|
|
172
|
+
domain_id=workload.boundary.domain_id,
|
|
173
|
+
verdict=verdict,
|
|
174
|
+
row_count=record_count,
|
|
175
|
+
workload_id=workload_id,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
if verification.outcome != "PASS":
|
|
179
|
+
return LocalWriteResult(
|
|
180
|
+
outcome=WriteOutcome.VERIFICATION_FAILED.value,
|
|
181
|
+
workload_id=workload_id,
|
|
182
|
+
records_written=0,
|
|
183
|
+
proof_verdict=verdict,
|
|
184
|
+
snapshot_id=None,
|
|
185
|
+
proof_path=str(proof_path),
|
|
186
|
+
consumer_row_count=self.consumer_row_count,
|
|
187
|
+
message=verification.reason,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
if defer_snapshot:
|
|
191
|
+
pending = self.catalog / "pending.json"
|
|
192
|
+
pending_rows = []
|
|
193
|
+
if pending.exists():
|
|
194
|
+
pending_rows = json.loads(pending.read_text(encoding="utf-8"))
|
|
195
|
+
pending_rows.append(
|
|
196
|
+
{
|
|
197
|
+
"workload_id": workload_id,
|
|
198
|
+
"row_count": record_count,
|
|
199
|
+
"proof_id": proof["proof_id"],
|
|
200
|
+
}
|
|
201
|
+
)
|
|
202
|
+
pending.write_text(json.dumps(pending_rows, indent=2), encoding="utf-8")
|
|
203
|
+
return LocalWriteResult(
|
|
204
|
+
outcome=WriteOutcome.COMMITTED.value,
|
|
205
|
+
workload_id=workload_id,
|
|
206
|
+
records_written=record_count,
|
|
207
|
+
proof_verdict=verdict,
|
|
208
|
+
snapshot_id=None,
|
|
209
|
+
proof_path=str(proof_path),
|
|
210
|
+
consumer_row_count=self.consumer_row_count,
|
|
211
|
+
message="VRP PASS; snapshot deferred for mesh leader commit",
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
snapshot_id = self._commit_metadata(
|
|
215
|
+
workload=workload,
|
|
216
|
+
row_count=record_count,
|
|
217
|
+
proof_id=proof["proof_id"],
|
|
218
|
+
)
|
|
219
|
+
return LocalWriteResult(
|
|
220
|
+
outcome=WriteOutcome.COMMITTED.value,
|
|
221
|
+
workload_id=workload_id,
|
|
222
|
+
records_written=record_count,
|
|
223
|
+
proof_verdict=verdict,
|
|
224
|
+
snapshot_id=snapshot_id,
|
|
225
|
+
proof_path=str(proof_path),
|
|
226
|
+
consumer_row_count=self.consumer_row_count,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def finalize_mesh_transaction(self, domain_results: list[LocalWriteResult]) -> dict[str, Any]:
|
|
230
|
+
"""Leader commit: all domains must VRP PASS or no consumer snapshot."""
|
|
231
|
+
pending_file = self.catalog / "pending.json"
|
|
232
|
+
if any(r.outcome != WriteOutcome.COMMITTED.value for r in domain_results):
|
|
233
|
+
if pending_file.exists():
|
|
234
|
+
pending_file.unlink()
|
|
235
|
+
return {
|
|
236
|
+
"mesh_outcome": WriteOutcome.VERIFICATION_FAILED.value,
|
|
237
|
+
"consumer_row_count": self.consumer_row_count,
|
|
238
|
+
"message": "At least one domain failed VRP; pending snapshots discarded",
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
if not pending_file.exists():
|
|
242
|
+
return {
|
|
243
|
+
"mesh_outcome": WriteOutcome.VERIFICATION_FAILED.value,
|
|
244
|
+
"consumer_row_count": self.consumer_row_count,
|
|
245
|
+
"message": "No pending domain writes",
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
pending_rows = json.loads(pending_file.read_text(encoding="utf-8"))
|
|
249
|
+
total_rows = sum(int(row["row_count"]) for row in pending_rows)
|
|
250
|
+
proof_id = pending_rows[-1]["proof_id"]
|
|
251
|
+
workload = _default_workload(self.root, workload_id="mesh-txn", total_records=total_rows)
|
|
252
|
+
snapshot_id = self._commit_metadata(
|
|
253
|
+
workload=workload,
|
|
254
|
+
row_count=total_rows,
|
|
255
|
+
proof_id=proof_id,
|
|
256
|
+
)
|
|
257
|
+
pending_file.unlink()
|
|
258
|
+
return {
|
|
259
|
+
"mesh_outcome": WriteOutcome.COMMITTED.value,
|
|
260
|
+
"snapshot_id": snapshot_id,
|
|
261
|
+
"consumer_row_count": self.consumer_row_count,
|
|
262
|
+
"domains_committed": len(domain_results),
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
def run_write_with_auto_repair(
|
|
266
|
+
self,
|
|
267
|
+
*,
|
|
268
|
+
workload_id: str = "auto-repair-demo",
|
|
269
|
+
record_count: int = 100,
|
|
270
|
+
drop_count: int = 5,
|
|
271
|
+
proof_generator: Any | None = None,
|
|
272
|
+
) -> dict[str, Any]:
|
|
273
|
+
"""Simulate dropped records, auto-repair via VRP reprocessing, then commit."""
|
|
274
|
+
workload = _default_workload(self.root, workload_id=workload_id, total_records=record_count)
|
|
275
|
+
if proof_generator is None:
|
|
276
|
+
gen, backend = create_proof_generator()
|
|
277
|
+
else:
|
|
278
|
+
gen = proof_generator
|
|
279
|
+
backend = getattr(gen, "producer", "custom")
|
|
280
|
+
|
|
281
|
+
source = _records(record_count)
|
|
282
|
+
sink = _records(record_count - drop_count)
|
|
283
|
+
repaired_sink = list(sink)
|
|
284
|
+
|
|
285
|
+
def _merge_missing(missing: list[dict[str, str]]) -> list[dict[str, str]]:
|
|
286
|
+
nonlocal repaired_sink
|
|
287
|
+
repaired_sink = repaired_sink + missing
|
|
288
|
+
return repaired_sink
|
|
289
|
+
|
|
290
|
+
repair = attempt_vrp_repair(
|
|
291
|
+
source_records=source,
|
|
292
|
+
sink_records=sink,
|
|
293
|
+
workload=workload,
|
|
294
|
+
chunk_start=0,
|
|
295
|
+
chunk_end=record_count,
|
|
296
|
+
proof_generator=gen,
|
|
297
|
+
write_repair_fn=_merge_missing,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
if repair.outcome != "repaired_pass" or repair.proof is None:
|
|
301
|
+
return {
|
|
302
|
+
"outcome": repair.outcome,
|
|
303
|
+
"backend": backend,
|
|
304
|
+
"repair": {
|
|
305
|
+
"outcome": repair.outcome,
|
|
306
|
+
"attempts": repair.attempts,
|
|
307
|
+
"missing_before": repair.missing_before,
|
|
308
|
+
"missing_after": repair.missing_after,
|
|
309
|
+
"message": repair.message,
|
|
310
|
+
},
|
|
311
|
+
"consumer_row_count": self.consumer_row_count,
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
self._write_physical(repaired_sink, part_name=f"{workload_id}-repaired")
|
|
315
|
+
proof_path = self._persist_proof(repair.proof, workload=workload, chunk_index=0)
|
|
316
|
+
publish_vrp_metric(
|
|
317
|
+
domain_id=workload.boundary.domain_id,
|
|
318
|
+
verdict="PASS",
|
|
319
|
+
row_count=record_count,
|
|
320
|
+
workload_id=workload_id,
|
|
321
|
+
)
|
|
322
|
+
snapshot_id = self._commit_metadata(
|
|
323
|
+
workload=workload,
|
|
324
|
+
row_count=record_count,
|
|
325
|
+
proof_id=repair.proof["proof_id"],
|
|
326
|
+
)
|
|
327
|
+
return {
|
|
328
|
+
"outcome": "repaired_and_committed",
|
|
329
|
+
"backend": backend,
|
|
330
|
+
"repair": {
|
|
331
|
+
"outcome": repair.outcome,
|
|
332
|
+
"attempts": repair.attempts,
|
|
333
|
+
"missing_before": repair.missing_before,
|
|
334
|
+
"missing_after": repair.missing_after,
|
|
335
|
+
"message": repair.message,
|
|
336
|
+
},
|
|
337
|
+
"snapshot_id": snapshot_id,
|
|
338
|
+
"proof_path": str(proof_path),
|
|
339
|
+
"consumer_row_count": self.consumer_row_count,
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
def run_demo_sequence(self) -> dict[str, Any]:
|
|
343
|
+
"""Run clean write, corrupt write, and consumer visibility check."""
|
|
344
|
+
gen, backend = create_proof_generator()
|
|
345
|
+
|
|
346
|
+
started = time.perf_counter()
|
|
347
|
+
clean = self.run_write(
|
|
348
|
+
workload_id="demo-clean",
|
|
349
|
+
record_count=1000,
|
|
350
|
+
corrupt_sink=False,
|
|
351
|
+
proof_generator=gen,
|
|
352
|
+
)
|
|
353
|
+
corrupt = self.run_write(
|
|
354
|
+
workload_id="demo-corrupt",
|
|
355
|
+
record_count=1000,
|
|
356
|
+
corrupt_sink=True,
|
|
357
|
+
proof_generator=gen,
|
|
358
|
+
)
|
|
359
|
+
elapsed_ms = round((time.perf_counter() - started) * 1000, 1)
|
|
360
|
+
|
|
361
|
+
return {
|
|
362
|
+
"mode": "local-pvdm",
|
|
363
|
+
"verifier_backend": backend,
|
|
364
|
+
"root": str(self.root),
|
|
365
|
+
"elapsed_ms": elapsed_ms,
|
|
366
|
+
"phases": {
|
|
367
|
+
"clean_write": clean.to_dict(),
|
|
368
|
+
"corrupt_write": corrupt.to_dict(),
|
|
369
|
+
},
|
|
370
|
+
"consumer": {
|
|
371
|
+
"visible_row_count": self.consumer_row_count,
|
|
372
|
+
"corrupt_data_visible": corrupt.outcome == WriteOutcome.COMMITTED.value,
|
|
373
|
+
"gate_blocked_bad_data": corrupt.outcome == WriteOutcome.VERIFICATION_FAILED.value,
|
|
374
|
+
},
|
|
375
|
+
"summary": (
|
|
376
|
+
"VRP gate blocked corrupt write; consumers see only clean snapshot."
|
|
377
|
+
if corrupt.outcome == WriteOutcome.VERIFICATION_FAILED.value
|
|
378
|
+
else "Unexpected: corrupt write committed."
|
|
379
|
+
),
|
|
380
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Publish VRP trust metrics to CloudWatch for live dashboards."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
NAMESPACE = "ServerlessDataMesh/Trust"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def publish_vrp_metric(
|
|
15
|
+
*,
|
|
16
|
+
domain_id: str,
|
|
17
|
+
verdict: str,
|
|
18
|
+
row_count: int = 0,
|
|
19
|
+
workload_id: str | None = None,
|
|
20
|
+
cloudwatch_client: Any | None = None,
|
|
21
|
+
) -> None:
|
|
22
|
+
"""Emit VRP PASS/FAIL metric for CloudWatch / Grafana dashboards."""
|
|
23
|
+
if os.environ.get("SDM_DISABLE_METRICS", "").lower() in ("1", "true", "yes"):
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import boto3
|
|
28
|
+
except ImportError:
|
|
29
|
+
logger.debug("boto3 unavailable; skip CloudWatch metric")
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
client = cloudwatch_client or boto3.client("cloudwatch")
|
|
33
|
+
value = 1.0 if verdict == "PASS" else 0.0
|
|
34
|
+
|
|
35
|
+
dimensions = [{"Name": "Domain", "Value": domain_id}]
|
|
36
|
+
if workload_id:
|
|
37
|
+
dimensions.append({"Name": "WorkloadId", "Value": workload_id})
|
|
38
|
+
|
|
39
|
+
client.put_metric_data(
|
|
40
|
+
Namespace=NAMESPACE,
|
|
41
|
+
MetricData=[
|
|
42
|
+
{
|
|
43
|
+
"MetricName": "VRPTrustScore",
|
|
44
|
+
"Dimensions": dimensions,
|
|
45
|
+
"Value": value,
|
|
46
|
+
"Unit": "None",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"MetricName": "VRPRowCount",
|
|
50
|
+
"Dimensions": dimensions,
|
|
51
|
+
"Value": float(row_count),
|
|
52
|
+
"Unit": "Count",
|
|
53
|
+
},
|
|
54
|
+
],
|
|
55
|
+
)
|
|
56
|
+
logger.info("Published VRP metric domain=%s verdict=%s", domain_id, verdict)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Durable orchestration bridging IceGuard and AWS Lambda Durable Execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"IceGuardDurableCoordinator",
|
|
9
|
+
"OrchestrationState",
|
|
10
|
+
"durable_commit_metadata",
|
|
11
|
+
"durable_write_chunk",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def __getattr__(name: str) -> Any:
|
|
16
|
+
if name == "IceGuardDurableCoordinator":
|
|
17
|
+
from serverless_data_mesh.orchestration.coordinator import IceGuardDurableCoordinator
|
|
18
|
+
|
|
19
|
+
return IceGuardDurableCoordinator
|
|
20
|
+
if name == "OrchestrationState":
|
|
21
|
+
from serverless_data_mesh.orchestration.state import OrchestrationState
|
|
22
|
+
|
|
23
|
+
return OrchestrationState
|
|
24
|
+
if name in ("durable_commit_metadata", "durable_write_chunk"):
|
|
25
|
+
from serverless_data_mesh.orchestration import durable_steps
|
|
26
|
+
|
|
27
|
+
return getattr(durable_steps, name)
|
|
28
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Canary write comparison using VRP proof divergence (roadmap 11/10)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, Callable
|
|
7
|
+
|
|
8
|
+
from serverless_data_mesh.verification.backend import create_proof_generator
|
|
9
|
+
from serverless_data_mesh.verification.vrp import validate_then_commit
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True, slots=True)
|
|
13
|
+
class CanaryResult:
|
|
14
|
+
outcome: str # canary_approved | canary_diverged
|
|
15
|
+
production_verdict: str
|
|
16
|
+
canary_verdict: str
|
|
17
|
+
divergence_pct: float
|
|
18
|
+
message: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def run_canary_comparison(
|
|
22
|
+
*,
|
|
23
|
+
production_source: list[dict[str, Any]],
|
|
24
|
+
production_sink: list[dict[str, Any]],
|
|
25
|
+
canary_sink: list[dict[str, Any]],
|
|
26
|
+
workload_factory: Callable[[], Any],
|
|
27
|
+
max_divergence_pct: float = 1.0,
|
|
28
|
+
) -> CanaryResult:
|
|
29
|
+
"""Compare production vs canary VRP proofs before promoting a schema/logic change."""
|
|
30
|
+
gen, _ = create_proof_generator()
|
|
31
|
+
workload = workload_factory()
|
|
32
|
+
n = len(production_source)
|
|
33
|
+
|
|
34
|
+
prod_proof = gen.build_proof(
|
|
35
|
+
source_records=production_source,
|
|
36
|
+
sink_records=production_sink,
|
|
37
|
+
workload=workload,
|
|
38
|
+
chunk_start=0,
|
|
39
|
+
chunk_end=n,
|
|
40
|
+
)
|
|
41
|
+
canary_proof = gen.build_proof(
|
|
42
|
+
source_records=production_source,
|
|
43
|
+
sink_records=canary_sink,
|
|
44
|
+
workload=workload,
|
|
45
|
+
chunk_start=0,
|
|
46
|
+
chunk_end=n,
|
|
47
|
+
)
|
|
48
|
+
prod_v = validate_then_commit(prod_proof).outcome
|
|
49
|
+
canary_v = validate_then_commit(canary_proof).outcome
|
|
50
|
+
|
|
51
|
+
if prod_v != "PASS" or canary_v != "PASS":
|
|
52
|
+
return CanaryResult(
|
|
53
|
+
outcome="canary_diverged",
|
|
54
|
+
production_verdict=prod_v,
|
|
55
|
+
canary_verdict=canary_v,
|
|
56
|
+
divergence_pct=100.0,
|
|
57
|
+
message="One or both proofs failed VRP",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
prod_count = prod_proof["reconciliation"].get("sink_count", n)
|
|
61
|
+
canary_count = canary_proof["reconciliation"].get("sink_count", n)
|
|
62
|
+
divergence = abs(canary_count - prod_count) / max(prod_count, 1) * 100
|
|
63
|
+
|
|
64
|
+
if divergence > max_divergence_pct:
|
|
65
|
+
return CanaryResult(
|
|
66
|
+
outcome="canary_diverged",
|
|
67
|
+
production_verdict=prod_v,
|
|
68
|
+
canary_verdict=canary_v,
|
|
69
|
+
divergence_pct=round(divergence, 2),
|
|
70
|
+
message=f"Row count divergence {divergence:.2f}% exceeds {max_divergence_pct}%",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return CanaryResult(
|
|
74
|
+
outcome="canary_approved",
|
|
75
|
+
production_verdict=prod_v,
|
|
76
|
+
canary_verdict=canary_v,
|
|
77
|
+
divergence_pct=round(divergence, 2),
|
|
78
|
+
message="Canary within tolerance",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def run_canary(
|
|
83
|
+
*,
|
|
84
|
+
record_count: int = 1000,
|
|
85
|
+
inject_canary_drift: bool = False,
|
|
86
|
+
max_divergence_pct: float = 1.0,
|
|
87
|
+
) -> dict[str, object]:
|
|
88
|
+
"""End-to-end canary promotion check with sample production vs canary sinks."""
|
|
89
|
+
from serverless_data_mesh.types.workload import DataWriteWorkload, DomainTransactionBoundary
|
|
90
|
+
|
|
91
|
+
source = [{"id": str(i), "payload_hash": f"h{i}"} for i in range(record_count)]
|
|
92
|
+
production_sink = list(source)
|
|
93
|
+
canary_sink = list(source)
|
|
94
|
+
if inject_canary_drift:
|
|
95
|
+
canary_sink = canary_sink[: int(record_count * 0.98)]
|
|
96
|
+
|
|
97
|
+
boundary = DomainTransactionBoundary(
|
|
98
|
+
domain_id="canary-demo",
|
|
99
|
+
source_namespace="raw_canary",
|
|
100
|
+
target_table="orders_curated",
|
|
101
|
+
partition_spec={"dt": "2026-06-14"},
|
|
102
|
+
)
|
|
103
|
+
workload = DataWriteWorkload(
|
|
104
|
+
workload_id="canary-001",
|
|
105
|
+
boundary=boundary,
|
|
106
|
+
source_uri="s3://publisher/source/",
|
|
107
|
+
target_uri="s3://publisher/lakehouse/orders_curated/",
|
|
108
|
+
total_records=record_count,
|
|
109
|
+
checkpoint_bucket="s3://steward/checkpoints",
|
|
110
|
+
proof_bucket="s3://steward/proofs",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
result = run_canary_comparison(
|
|
114
|
+
production_source=source,
|
|
115
|
+
production_sink=production_sink,
|
|
116
|
+
canary_sink=canary_sink,
|
|
117
|
+
workload_factory=lambda: workload,
|
|
118
|
+
max_divergence_pct=max_divergence_pct,
|
|
119
|
+
)
|
|
120
|
+
return {
|
|
121
|
+
"outcome": result.outcome,
|
|
122
|
+
"production_verdict": result.production_verdict,
|
|
123
|
+
"canary_verdict": result.canary_verdict,
|
|
124
|
+
"divergence_pct": result.divergence_pct,
|
|
125
|
+
"message": result.message,
|
|
126
|
+
"promote": result.outcome == "canary_approved",
|
|
127
|
+
}
|