serverless-data-mesh 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. serverless_data_mesh/__init__.py +93 -0
  2. serverless_data_mesh/catalog/__init__.py +6 -0
  3. serverless_data_mesh/catalog/glue_connector.py +17 -0
  4. serverless_data_mesh/catalog/glue_rest.py +134 -0
  5. serverless_data_mesh/cli.py +165 -0
  6. serverless_data_mesh/config.py +42 -0
  7. serverless_data_mesh/dashboard/__init__.py +5 -0
  8. serverless_data_mesh/dashboard/cloudwatch.py +80 -0
  9. serverless_data_mesh/dashboard/trust.py +162 -0
  10. serverless_data_mesh/exceptions.py +23 -0
  11. serverless_data_mesh/governance/__init__.py +9 -0
  12. serverless_data_mesh/governance/consumer_sla.py +109 -0
  13. serverless_data_mesh/lineage/__init__.py +5 -0
  14. serverless_data_mesh/lineage/openlineage.py +96 -0
  15. serverless_data_mesh/local/__init__.py +5 -0
  16. serverless_data_mesh/local/runtime.py +380 -0
  17. serverless_data_mesh/metrics/__init__.py +5 -0
  18. serverless_data_mesh/metrics/mesh_trust.py +56 -0
  19. serverless_data_mesh/orchestration/__init__.py +28 -0
  20. serverless_data_mesh/orchestration/canary.py +127 -0
  21. serverless_data_mesh/orchestration/coordinator.py +265 -0
  22. serverless_data_mesh/orchestration/durable_steps.py +74 -0
  23. serverless_data_mesh/orchestration/reprocess.py +143 -0
  24. serverless_data_mesh/orchestration/state.py +16 -0
  25. serverless_data_mesh/py.typed +0 -0
  26. serverless_data_mesh/rules/__init__.py +8 -0
  27. serverless_data_mesh/rules/sparkrules_connector.py +193 -0
  28. serverless_data_mesh/scaffold/__init__.py +5 -0
  29. serverless_data_mesh/scaffold/init_domain.py +210 -0
  30. serverless_data_mesh/types/__init__.py +21 -0
  31. serverless_data_mesh/types/workload.py +123 -0
  32. serverless_data_mesh/verification/__init__.py +21 -0
  33. serverless_data_mesh/verification/backend.py +41 -0
  34. serverless_data_mesh/verification/fallback.py +200 -0
  35. serverless_data_mesh/verification/vrp.py +202 -0
  36. serverless_data_mesh-0.2.0.dist-info/METADATA +143 -0
  37. serverless_data_mesh-0.2.0.dist-info/RECORD +40 -0
  38. serverless_data_mesh-0.2.0.dist-info/WHEEL +4 -0
  39. serverless_data_mesh-0.2.0.dist-info/entry_points.txt +2 -0
  40. serverless_data_mesh-0.2.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,380 @@
1
+ """Run the Vaquar Pattern (PVDM) lifecycle on local disk without AWS."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import tempfile
7
+ import time
8
+ from dataclasses import asdict, dataclass
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from serverless_data_mesh.types.workload import (
14
+ DataWriteWorkload,
15
+ DomainTransactionBoundary,
16
+ WriteOutcome,
17
+ )
18
+ from serverless_data_mesh.metrics.mesh_trust import publish_vrp_metric
19
+ from serverless_data_mesh.orchestration.reprocess import attempt_vrp_repair
20
+ from serverless_data_mesh.verification.backend import create_proof_generator
21
+ from serverless_data_mesh.verification.vrp import validate_then_commit
22
+
23
+
24
+ @dataclass(frozen=True, slots=True)
25
+ class LocalWriteResult:
26
+ """Outcome of a local PVDM chunk write."""
27
+
28
+ outcome: str
29
+ workload_id: str
30
+ records_written: int
31
+ proof_verdict: str
32
+ snapshot_id: str | None
33
+ proof_path: str | None
34
+ consumer_row_count: int
35
+ message: str | None = None
36
+
37
+ def to_dict(self) -> dict[str, Any]:
38
+ return asdict(self)
39
+
40
+
41
+ def _default_boundary() -> DomainTransactionBoundary:
42
+ return DomainTransactionBoundary(
43
+ domain_id="orders-domain",
44
+ source_namespace="raw_orders",
45
+ target_table="orders_curated",
46
+ partition_spec={"dt": "2026-06-14"},
47
+ )
48
+
49
+
50
+ def _default_workload(root: Path, *, workload_id: str, total_records: int) -> DataWriteWorkload:
51
+ boundary = _default_boundary()
52
+ return DataWriteWorkload(
53
+ workload_id=workload_id,
54
+ boundary=boundary,
55
+ source_uri=f"file://{root}/source/",
56
+ target_uri=f"file://{root}/lakehouse/orders_curated/",
57
+ total_records=total_records,
58
+ checkpoint_bucket=str(root / "checkpoints"),
59
+ proof_bucket=str(root / "proofs"),
60
+ )
61
+
62
+
63
+ def _records(n: int, *, corrupt_last: bool = False) -> list[dict[str, str]]:
64
+ rows = [{"id": str(i), "payload_hash": f"h{i}"} for i in range(n)]
65
+ if corrupt_last and rows:
66
+ rows[-1] = {"id": rows[-1]["id"], "payload_hash": "CORRUPT"}
67
+ return rows
68
+
69
+
70
+ class LocalPVDMRuntime:
71
+ """Simulate Physical → Verify → Durable → Metadata on a laptop.
72
+
73
+ Uses veridata-recon when available; pure-Python fallback on Windows/Mac without wheels.
74
+ Checkpoints, proofs, and catalog snapshots are stored on local disk.
75
+ """
76
+
77
+ def __init__(self, root: Path | None = None) -> None:
78
+ self.root = root or Path(tempfile.mkdtemp(prefix="sdm-demo-"))
79
+ self.checkpoints = self.root / "checkpoints"
80
+ self.proofs = self.root / "proofs"
81
+ self.lakehouse = self.root / "lakehouse" / "orders_curated" / "dt=2026-06-14"
82
+ self.catalog = self.root / "catalog"
83
+ for path in (self.checkpoints, self.proofs, self.lakehouse, self.catalog):
84
+ path.mkdir(parents=True, exist_ok=True)
85
+ self._snapshot_file = self.catalog / "snapshots.json"
86
+ if not self._snapshot_file.exists():
87
+ self._snapshot_file.write_text("[]", encoding="utf-8")
88
+
89
+ @property
90
+ def consumer_row_count(self) -> int:
91
+ """Rows visible to consumers from the latest committed snapshot."""
92
+ snapshots = json.loads(self._snapshot_file.read_text(encoding="utf-8"))
93
+ if not snapshots:
94
+ return 0
95
+ return int(snapshots[-1]["row_count"])
96
+
97
+ def _persist_proof(
98
+ self,
99
+ proof: dict[str, Any],
100
+ *,
101
+ workload: DataWriteWorkload,
102
+ chunk_index: int,
103
+ ) -> Path:
104
+ rel = f"{workload.boundary.domain_id}/{workload.workload_id}"
105
+ dest_dir = self.proofs / rel
106
+ dest_dir.mkdir(parents=True, exist_ok=True)
107
+ path = dest_dir / f"chunk-{chunk_index:06d}.vrp.json"
108
+ path.write_text(json.dumps(proof, indent=2, sort_keys=True), encoding="utf-8")
109
+ return path
110
+
111
+ def _write_physical(self, records: list[dict[str, str]], *, part_name: str) -> Path:
112
+ part = self.lakehouse / f"{part_name}.jsonl"
113
+ with part.open("w", encoding="utf-8") as handle:
114
+ for row in records:
115
+ handle.write(json.dumps(row) + "\n")
116
+ return part
117
+
118
+ def _commit_metadata(self, *, workload: DataWriteWorkload, row_count: int, proof_id: str) -> str:
119
+ snapshots = json.loads(self._snapshot_file.read_text(encoding="utf-8"))
120
+ snapshot_id = f"snap-{len(snapshots) + 1:06d}"
121
+ snapshots.append(
122
+ {
123
+ "snapshot_id": snapshot_id,
124
+ "table": workload.boundary.target_table,
125
+ "partition": workload.boundary.partition_spec,
126
+ "row_count": row_count,
127
+ "proof_id": proof_id,
128
+ "committed_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(),
129
+ }
130
+ )
131
+ self._snapshot_file.write_text(json.dumps(snapshots, indent=2), encoding="utf-8")
132
+ checkpoint = self.checkpoints / f"{workload.workload_id}.json"
133
+ checkpoint.write_text(
134
+ json.dumps({"workload_id": workload.workload_id, "snapshot_id": snapshot_id}),
135
+ encoding="utf-8",
136
+ )
137
+ return snapshot_id
138
+
139
+ def run_write(
140
+ self,
141
+ *,
142
+ workload_id: str = "local-demo-001",
143
+ record_count: int = 1000,
144
+ corrupt_sink: bool = False,
145
+ proof_generator: Any | None = None,
146
+ defer_snapshot: bool = False,
147
+ ) -> LocalWriteResult:
148
+ """Execute one PVDM write cycle on local disk."""
149
+ workload = _default_workload(self.root, workload_id=workload_id, total_records=record_count)
150
+ if proof_generator is None:
151
+ gen, self._last_backend = create_proof_generator()
152
+ else:
153
+ gen = proof_generator
154
+ self._last_backend = getattr(gen, "producer", "custom")
155
+
156
+ source = _records(record_count)
157
+ sink = _records(record_count, corrupt_last=corrupt_sink)
158
+
159
+ self._write_physical(sink, part_name=f"{workload_id}-part-00000")
160
+
161
+ proof = gen.build_proof(
162
+ source_records=source,
163
+ sink_records=sink,
164
+ workload=workload,
165
+ chunk_start=0,
166
+ chunk_end=record_count,
167
+ )
168
+ verification = validate_then_commit(proof)
169
+ proof_path = self._persist_proof(proof, workload=workload, chunk_index=0)
170
+ verdict = proof["reconciliation"]["verdict"]
171
+ publish_vrp_metric(
172
+ domain_id=workload.boundary.domain_id,
173
+ verdict=verdict,
174
+ row_count=record_count,
175
+ workload_id=workload_id,
176
+ )
177
+
178
+ if verification.outcome != "PASS":
179
+ return LocalWriteResult(
180
+ outcome=WriteOutcome.VERIFICATION_FAILED.value,
181
+ workload_id=workload_id,
182
+ records_written=0,
183
+ proof_verdict=verdict,
184
+ snapshot_id=None,
185
+ proof_path=str(proof_path),
186
+ consumer_row_count=self.consumer_row_count,
187
+ message=verification.reason,
188
+ )
189
+
190
+ if defer_snapshot:
191
+ pending = self.catalog / "pending.json"
192
+ pending_rows = []
193
+ if pending.exists():
194
+ pending_rows = json.loads(pending.read_text(encoding="utf-8"))
195
+ pending_rows.append(
196
+ {
197
+ "workload_id": workload_id,
198
+ "row_count": record_count,
199
+ "proof_id": proof["proof_id"],
200
+ }
201
+ )
202
+ pending.write_text(json.dumps(pending_rows, indent=2), encoding="utf-8")
203
+ return LocalWriteResult(
204
+ outcome=WriteOutcome.COMMITTED.value,
205
+ workload_id=workload_id,
206
+ records_written=record_count,
207
+ proof_verdict=verdict,
208
+ snapshot_id=None,
209
+ proof_path=str(proof_path),
210
+ consumer_row_count=self.consumer_row_count,
211
+ message="VRP PASS; snapshot deferred for mesh leader commit",
212
+ )
213
+
214
+ snapshot_id = self._commit_metadata(
215
+ workload=workload,
216
+ row_count=record_count,
217
+ proof_id=proof["proof_id"],
218
+ )
219
+ return LocalWriteResult(
220
+ outcome=WriteOutcome.COMMITTED.value,
221
+ workload_id=workload_id,
222
+ records_written=record_count,
223
+ proof_verdict=verdict,
224
+ snapshot_id=snapshot_id,
225
+ proof_path=str(proof_path),
226
+ consumer_row_count=self.consumer_row_count,
227
+ )
228
+
229
+ def finalize_mesh_transaction(self, domain_results: list[LocalWriteResult]) -> dict[str, Any]:
230
+ """Leader commit: all domains must VRP PASS or no consumer snapshot."""
231
+ pending_file = self.catalog / "pending.json"
232
+ if any(r.outcome != WriteOutcome.COMMITTED.value for r in domain_results):
233
+ if pending_file.exists():
234
+ pending_file.unlink()
235
+ return {
236
+ "mesh_outcome": WriteOutcome.VERIFICATION_FAILED.value,
237
+ "consumer_row_count": self.consumer_row_count,
238
+ "message": "At least one domain failed VRP; pending snapshots discarded",
239
+ }
240
+
241
+ if not pending_file.exists():
242
+ return {
243
+ "mesh_outcome": WriteOutcome.VERIFICATION_FAILED.value,
244
+ "consumer_row_count": self.consumer_row_count,
245
+ "message": "No pending domain writes",
246
+ }
247
+
248
+ pending_rows = json.loads(pending_file.read_text(encoding="utf-8"))
249
+ total_rows = sum(int(row["row_count"]) for row in pending_rows)
250
+ proof_id = pending_rows[-1]["proof_id"]
251
+ workload = _default_workload(self.root, workload_id="mesh-txn", total_records=total_rows)
252
+ snapshot_id = self._commit_metadata(
253
+ workload=workload,
254
+ row_count=total_rows,
255
+ proof_id=proof_id,
256
+ )
257
+ pending_file.unlink()
258
+ return {
259
+ "mesh_outcome": WriteOutcome.COMMITTED.value,
260
+ "snapshot_id": snapshot_id,
261
+ "consumer_row_count": self.consumer_row_count,
262
+ "domains_committed": len(domain_results),
263
+ }
264
+
265
+ def run_write_with_auto_repair(
266
+ self,
267
+ *,
268
+ workload_id: str = "auto-repair-demo",
269
+ record_count: int = 100,
270
+ drop_count: int = 5,
271
+ proof_generator: Any | None = None,
272
+ ) -> dict[str, Any]:
273
+ """Simulate dropped records, auto-repair via VRP reprocessing, then commit."""
274
+ workload = _default_workload(self.root, workload_id=workload_id, total_records=record_count)
275
+ if proof_generator is None:
276
+ gen, backend = create_proof_generator()
277
+ else:
278
+ gen = proof_generator
279
+ backend = getattr(gen, "producer", "custom")
280
+
281
+ source = _records(record_count)
282
+ sink = _records(record_count - drop_count)
283
+ repaired_sink = list(sink)
284
+
285
+ def _merge_missing(missing: list[dict[str, str]]) -> list[dict[str, str]]:
286
+ nonlocal repaired_sink
287
+ repaired_sink = repaired_sink + missing
288
+ return repaired_sink
289
+
290
+ repair = attempt_vrp_repair(
291
+ source_records=source,
292
+ sink_records=sink,
293
+ workload=workload,
294
+ chunk_start=0,
295
+ chunk_end=record_count,
296
+ proof_generator=gen,
297
+ write_repair_fn=_merge_missing,
298
+ )
299
+
300
+ if repair.outcome != "repaired_pass" or repair.proof is None:
301
+ return {
302
+ "outcome": repair.outcome,
303
+ "backend": backend,
304
+ "repair": {
305
+ "outcome": repair.outcome,
306
+ "attempts": repair.attempts,
307
+ "missing_before": repair.missing_before,
308
+ "missing_after": repair.missing_after,
309
+ "message": repair.message,
310
+ },
311
+ "consumer_row_count": self.consumer_row_count,
312
+ }
313
+
314
+ self._write_physical(repaired_sink, part_name=f"{workload_id}-repaired")
315
+ proof_path = self._persist_proof(repair.proof, workload=workload, chunk_index=0)
316
+ publish_vrp_metric(
317
+ domain_id=workload.boundary.domain_id,
318
+ verdict="PASS",
319
+ row_count=record_count,
320
+ workload_id=workload_id,
321
+ )
322
+ snapshot_id = self._commit_metadata(
323
+ workload=workload,
324
+ row_count=record_count,
325
+ proof_id=repair.proof["proof_id"],
326
+ )
327
+ return {
328
+ "outcome": "repaired_and_committed",
329
+ "backend": backend,
330
+ "repair": {
331
+ "outcome": repair.outcome,
332
+ "attempts": repair.attempts,
333
+ "missing_before": repair.missing_before,
334
+ "missing_after": repair.missing_after,
335
+ "message": repair.message,
336
+ },
337
+ "snapshot_id": snapshot_id,
338
+ "proof_path": str(proof_path),
339
+ "consumer_row_count": self.consumer_row_count,
340
+ }
341
+
342
+ def run_demo_sequence(self) -> dict[str, Any]:
343
+ """Run clean write, corrupt write, and consumer visibility check."""
344
+ gen, backend = create_proof_generator()
345
+
346
+ started = time.perf_counter()
347
+ clean = self.run_write(
348
+ workload_id="demo-clean",
349
+ record_count=1000,
350
+ corrupt_sink=False,
351
+ proof_generator=gen,
352
+ )
353
+ corrupt = self.run_write(
354
+ workload_id="demo-corrupt",
355
+ record_count=1000,
356
+ corrupt_sink=True,
357
+ proof_generator=gen,
358
+ )
359
+ elapsed_ms = round((time.perf_counter() - started) * 1000, 1)
360
+
361
+ return {
362
+ "mode": "local-pvdm",
363
+ "verifier_backend": backend,
364
+ "root": str(self.root),
365
+ "elapsed_ms": elapsed_ms,
366
+ "phases": {
367
+ "clean_write": clean.to_dict(),
368
+ "corrupt_write": corrupt.to_dict(),
369
+ },
370
+ "consumer": {
371
+ "visible_row_count": self.consumer_row_count,
372
+ "corrupt_data_visible": corrupt.outcome == WriteOutcome.COMMITTED.value,
373
+ "gate_blocked_bad_data": corrupt.outcome == WriteOutcome.VERIFICATION_FAILED.value,
374
+ },
375
+ "summary": (
376
+ "VRP gate blocked corrupt write; consumers see only clean snapshot."
377
+ if corrupt.outcome == WriteOutcome.VERIFICATION_FAILED.value
378
+ else "Unexpected: corrupt write committed."
379
+ ),
380
+ }
@@ -0,0 +1,5 @@
1
+ """Mesh trust metrics for CloudWatch and Grafana."""
2
+
3
+ from serverless_data_mesh.metrics.mesh_trust import publish_vrp_metric
4
+
5
+ __all__ = ["publish_vrp_metric"]
@@ -0,0 +1,56 @@
1
+ """Publish VRP trust metrics to CloudWatch for live dashboards."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from typing import Any
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ NAMESPACE = "ServerlessDataMesh/Trust"
12
+
13
+
14
+ def publish_vrp_metric(
15
+ *,
16
+ domain_id: str,
17
+ verdict: str,
18
+ row_count: int = 0,
19
+ workload_id: str | None = None,
20
+ cloudwatch_client: Any | None = None,
21
+ ) -> None:
22
+ """Emit VRP PASS/FAIL metric for CloudWatch / Grafana dashboards."""
23
+ if os.environ.get("SDM_DISABLE_METRICS", "").lower() in ("1", "true", "yes"):
24
+ return
25
+
26
+ try:
27
+ import boto3
28
+ except ImportError:
29
+ logger.debug("boto3 unavailable; skip CloudWatch metric")
30
+ return
31
+
32
+ client = cloudwatch_client or boto3.client("cloudwatch")
33
+ value = 1.0 if verdict == "PASS" else 0.0
34
+
35
+ dimensions = [{"Name": "Domain", "Value": domain_id}]
36
+ if workload_id:
37
+ dimensions.append({"Name": "WorkloadId", "Value": workload_id})
38
+
39
+ client.put_metric_data(
40
+ Namespace=NAMESPACE,
41
+ MetricData=[
42
+ {
43
+ "MetricName": "VRPTrustScore",
44
+ "Dimensions": dimensions,
45
+ "Value": value,
46
+ "Unit": "None",
47
+ },
48
+ {
49
+ "MetricName": "VRPRowCount",
50
+ "Dimensions": dimensions,
51
+ "Value": float(row_count),
52
+ "Unit": "Count",
53
+ },
54
+ ],
55
+ )
56
+ logger.info("Published VRP metric domain=%s verdict=%s", domain_id, verdict)
@@ -0,0 +1,28 @@
1
+ """Durable orchestration bridging IceGuard and AWS Lambda Durable Execution."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ __all__ = [
8
+ "IceGuardDurableCoordinator",
9
+ "OrchestrationState",
10
+ "durable_commit_metadata",
11
+ "durable_write_chunk",
12
+ ]
13
+
14
+
15
+ def __getattr__(name: str) -> Any:
16
+ if name == "IceGuardDurableCoordinator":
17
+ from serverless_data_mesh.orchestration.coordinator import IceGuardDurableCoordinator
18
+
19
+ return IceGuardDurableCoordinator
20
+ if name == "OrchestrationState":
21
+ from serverless_data_mesh.orchestration.state import OrchestrationState
22
+
23
+ return OrchestrationState
24
+ if name in ("durable_commit_metadata", "durable_write_chunk"):
25
+ from serverless_data_mesh.orchestration import durable_steps
26
+
27
+ return getattr(durable_steps, name)
28
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,127 @@
1
+ """Canary write comparison using VRP proof divergence (roadmap 11/10)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Callable
7
+
8
+ from serverless_data_mesh.verification.backend import create_proof_generator
9
+ from serverless_data_mesh.verification.vrp import validate_then_commit
10
+
11
+
12
+ @dataclass(frozen=True, slots=True)
13
+ class CanaryResult:
14
+ outcome: str # canary_approved | canary_diverged
15
+ production_verdict: str
16
+ canary_verdict: str
17
+ divergence_pct: float
18
+ message: str
19
+
20
+
21
+ def run_canary_comparison(
22
+ *,
23
+ production_source: list[dict[str, Any]],
24
+ production_sink: list[dict[str, Any]],
25
+ canary_sink: list[dict[str, Any]],
26
+ workload_factory: Callable[[], Any],
27
+ max_divergence_pct: float = 1.0,
28
+ ) -> CanaryResult:
29
+ """Compare production vs canary VRP proofs before promoting a schema/logic change."""
30
+ gen, _ = create_proof_generator()
31
+ workload = workload_factory()
32
+ n = len(production_source)
33
+
34
+ prod_proof = gen.build_proof(
35
+ source_records=production_source,
36
+ sink_records=production_sink,
37
+ workload=workload,
38
+ chunk_start=0,
39
+ chunk_end=n,
40
+ )
41
+ canary_proof = gen.build_proof(
42
+ source_records=production_source,
43
+ sink_records=canary_sink,
44
+ workload=workload,
45
+ chunk_start=0,
46
+ chunk_end=n,
47
+ )
48
+ prod_v = validate_then_commit(prod_proof).outcome
49
+ canary_v = validate_then_commit(canary_proof).outcome
50
+
51
+ if prod_v != "PASS" or canary_v != "PASS":
52
+ return CanaryResult(
53
+ outcome="canary_diverged",
54
+ production_verdict=prod_v,
55
+ canary_verdict=canary_v,
56
+ divergence_pct=100.0,
57
+ message="One or both proofs failed VRP",
58
+ )
59
+
60
+ prod_count = prod_proof["reconciliation"].get("sink_count", n)
61
+ canary_count = canary_proof["reconciliation"].get("sink_count", n)
62
+ divergence = abs(canary_count - prod_count) / max(prod_count, 1) * 100
63
+
64
+ if divergence > max_divergence_pct:
65
+ return CanaryResult(
66
+ outcome="canary_diverged",
67
+ production_verdict=prod_v,
68
+ canary_verdict=canary_v,
69
+ divergence_pct=round(divergence, 2),
70
+ message=f"Row count divergence {divergence:.2f}% exceeds {max_divergence_pct}%",
71
+ )
72
+
73
+ return CanaryResult(
74
+ outcome="canary_approved",
75
+ production_verdict=prod_v,
76
+ canary_verdict=canary_v,
77
+ divergence_pct=round(divergence, 2),
78
+ message="Canary within tolerance",
79
+ )
80
+
81
+
82
+ def run_canary(
83
+ *,
84
+ record_count: int = 1000,
85
+ inject_canary_drift: bool = False,
86
+ max_divergence_pct: float = 1.0,
87
+ ) -> dict[str, object]:
88
+ """End-to-end canary promotion check with sample production vs canary sinks."""
89
+ from serverless_data_mesh.types.workload import DataWriteWorkload, DomainTransactionBoundary
90
+
91
+ source = [{"id": str(i), "payload_hash": f"h{i}"} for i in range(record_count)]
92
+ production_sink = list(source)
93
+ canary_sink = list(source)
94
+ if inject_canary_drift:
95
+ canary_sink = canary_sink[: int(record_count * 0.98)]
96
+
97
+ boundary = DomainTransactionBoundary(
98
+ domain_id="canary-demo",
99
+ source_namespace="raw_canary",
100
+ target_table="orders_curated",
101
+ partition_spec={"dt": "2026-06-14"},
102
+ )
103
+ workload = DataWriteWorkload(
104
+ workload_id="canary-001",
105
+ boundary=boundary,
106
+ source_uri="s3://publisher/source/",
107
+ target_uri="s3://publisher/lakehouse/orders_curated/",
108
+ total_records=record_count,
109
+ checkpoint_bucket="s3://steward/checkpoints",
110
+ proof_bucket="s3://steward/proofs",
111
+ )
112
+
113
+ result = run_canary_comparison(
114
+ production_source=source,
115
+ production_sink=production_sink,
116
+ canary_sink=canary_sink,
117
+ workload_factory=lambda: workload,
118
+ max_divergence_pct=max_divergence_pct,
119
+ )
120
+ return {
121
+ "outcome": result.outcome,
122
+ "production_verdict": result.production_verdict,
123
+ "canary_verdict": result.canary_verdict,
124
+ "divergence_pct": result.divergence_pct,
125
+ "message": result.message,
126
+ "promote": result.outcome == "canary_approved",
127
+ }