serverless-data-mesh 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- serverless_data_mesh/__init__.py +93 -0
- serverless_data_mesh/catalog/__init__.py +6 -0
- serverless_data_mesh/catalog/glue_connector.py +17 -0
- serverless_data_mesh/catalog/glue_rest.py +134 -0
- serverless_data_mesh/cli.py +165 -0
- serverless_data_mesh/config.py +42 -0
- serverless_data_mesh/dashboard/__init__.py +5 -0
- serverless_data_mesh/dashboard/cloudwatch.py +80 -0
- serverless_data_mesh/dashboard/trust.py +162 -0
- serverless_data_mesh/exceptions.py +23 -0
- serverless_data_mesh/governance/__init__.py +9 -0
- serverless_data_mesh/governance/consumer_sla.py +109 -0
- serverless_data_mesh/lineage/__init__.py +5 -0
- serverless_data_mesh/lineage/openlineage.py +96 -0
- serverless_data_mesh/local/__init__.py +5 -0
- serverless_data_mesh/local/runtime.py +380 -0
- serverless_data_mesh/metrics/__init__.py +5 -0
- serverless_data_mesh/metrics/mesh_trust.py +56 -0
- serverless_data_mesh/orchestration/__init__.py +28 -0
- serverless_data_mesh/orchestration/canary.py +127 -0
- serverless_data_mesh/orchestration/coordinator.py +265 -0
- serverless_data_mesh/orchestration/durable_steps.py +74 -0
- serverless_data_mesh/orchestration/reprocess.py +143 -0
- serverless_data_mesh/orchestration/state.py +16 -0
- serverless_data_mesh/py.typed +0 -0
- serverless_data_mesh/rules/__init__.py +8 -0
- serverless_data_mesh/rules/sparkrules_connector.py +193 -0
- serverless_data_mesh/scaffold/__init__.py +5 -0
- serverless_data_mesh/scaffold/init_domain.py +210 -0
- serverless_data_mesh/types/__init__.py +21 -0
- serverless_data_mesh/types/workload.py +123 -0
- serverless_data_mesh/verification/__init__.py +21 -0
- serverless_data_mesh/verification/backend.py +41 -0
- serverless_data_mesh/verification/fallback.py +200 -0
- serverless_data_mesh/verification/vrp.py +202 -0
- serverless_data_mesh-0.2.0.dist-info/METADATA +143 -0
- serverless_data_mesh-0.2.0.dist-info/RECORD +40 -0
- serverless_data_mesh-0.2.0.dist-info/WHEEL +4 -0
- serverless_data_mesh-0.2.0.dist-info/entry_points.txt +2 -0
- serverless_data_mesh-0.2.0.dist-info/licenses/LICENSE +17 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""SparkRules engine connector for Lambda domain writers.
|
|
2
|
+
|
|
3
|
+
SparkRules runs on Lambda in two modes:
|
|
4
|
+
|
|
5
|
+
1. **Pure Python (default)**: ``LocalRuleExecutor`` evaluates DRL per chunk without
|
|
6
|
+
a Spark cluster or Glue ETL job. Ideal for enrichment and quality gates before VRP.
|
|
7
|
+
|
|
8
|
+
2. **Spark-on-Lambda (optional)**: ``apply_drl`` when PySpark is bundled in the
|
|
9
|
+
Lambda layer/container.
|
|
10
|
+
|
|
11
|
+
Install: ``pip install serverless-data-mesh[rules]`` or ``[spark]`` for PySpark path.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from serverless_data_mesh.exceptions import RuleEvaluationError
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _require_sparkrules() -> Any:
|
|
28
|
+
try:
|
|
29
|
+
import sparkrules # noqa: F401
|
|
30
|
+
except ImportError as exc:
|
|
31
|
+
raise ImportError(
|
|
32
|
+
"sparkrules is required for SparkRulesConnector. "
|
|
33
|
+
"Install with: pip install serverless-data-mesh[rules]"
|
|
34
|
+
) from exc
|
|
35
|
+
return sparkrules
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True, slots=True)
|
|
39
|
+
class RuleFireSummary:
|
|
40
|
+
"""Audit-friendly summary of one rule evaluation on a fact."""
|
|
41
|
+
|
|
42
|
+
fact_index: int
|
|
43
|
+
rule_name: str
|
|
44
|
+
fired: bool
|
|
45
|
+
reason_codes: tuple[str, ...]
|
|
46
|
+
action_output: dict[str, Any]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass(slots=True)
|
|
50
|
+
class SparkRulesConnector:
|
|
51
|
+
"""Lambda-native business rules connector using SparkRules ``LocalRuleExecutor``.
|
|
52
|
+
|
|
53
|
+
Use between ``source_reader`` and ``batch_writer`` to enrich or filter records
|
|
54
|
+
with Drools-style DRL before VRP verification and Iceberg commit.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
drl: str
|
|
58
|
+
policy_id: str = "mesh-default"
|
|
59
|
+
_executor: Any = field(default=None, repr=False)
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_drl(cls, drl: str, *, policy_id: str = "mesh-default") -> SparkRulesConnector:
|
|
63
|
+
"""Compile DRL and build a pure-Python executor (no JVM, no Glue ETL)."""
|
|
64
|
+
_require_sparkrules()
|
|
65
|
+
from sparkrules.executor.local_executor import LocalRuleExecutor
|
|
66
|
+
|
|
67
|
+
connector = cls(drl=drl, policy_id=policy_id)
|
|
68
|
+
connector._executor = LocalRuleExecutor.from_drl(drl)
|
|
69
|
+
return connector
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_environment(cls) -> SparkRulesConnector:
|
|
73
|
+
"""Load DRL from ``SPARKRULES_DRL`` (inline) or ``SPARKRULES_DRL_S3_URI``."""
|
|
74
|
+
inline = os.environ.get("SPARKRULES_DRL")
|
|
75
|
+
if inline:
|
|
76
|
+
return cls.from_drl(inline, policy_id=os.environ.get("SPARKRULES_POLICY_ID", "mesh-default"))
|
|
77
|
+
|
|
78
|
+
s3_uri = os.environ.get("SPARKRULES_DRL_S3_URI")
|
|
79
|
+
if s3_uri:
|
|
80
|
+
return cls.from_s3(s3_uri, policy_id=os.environ.get("SPARKRULES_POLICY_ID", "mesh-default"))
|
|
81
|
+
|
|
82
|
+
raise ValueError("Set SPARKRULES_DRL or SPARKRULES_DRL_S3_URI for SparkRulesConnector")
|
|
83
|
+
|
|
84
|
+
@classmethod
|
|
85
|
+
def from_s3(cls, s3_uri: str, *, policy_id: str = "mesh-default") -> SparkRulesConnector:
|
|
86
|
+
"""Load rule pack DRL from S3 (Steward governance bucket pattern)."""
|
|
87
|
+
import boto3
|
|
88
|
+
|
|
89
|
+
if not s3_uri.startswith("s3://"):
|
|
90
|
+
raise ValueError(f"Expected s3:// URI, got {s3_uri!r}")
|
|
91
|
+
path = s3_uri[5:]
|
|
92
|
+
bucket, _, key = path.partition("/")
|
|
93
|
+
body = boto3.client("s3").get_object(Bucket=bucket, Key=key)["Body"].read()
|
|
94
|
+
return cls.from_drl(body.decode("utf-8"), policy_id=policy_id)
|
|
95
|
+
|
|
96
|
+
def _ensure_executor(self) -> Any:
|
|
97
|
+
if self._executor is None:
|
|
98
|
+
_require_sparkrules()
|
|
99
|
+
from sparkrules.executor.local_executor import LocalRuleExecutor
|
|
100
|
+
|
|
101
|
+
self._executor = LocalRuleExecutor.from_drl(self.drl)
|
|
102
|
+
return self._executor
|
|
103
|
+
|
|
104
|
+
def apply_chunk(
|
|
105
|
+
self,
|
|
106
|
+
records: list[dict[str, Any]],
|
|
107
|
+
*,
|
|
108
|
+
fact_binding: str = "row",
|
|
109
|
+
) -> tuple[list[dict[str, Any]], list[RuleFireSummary]]:
|
|
110
|
+
"""Evaluate DRL against each record; merge rule actions into enriched rows.
|
|
111
|
+
|
|
112
|
+
Each record is wrapped as ``{fact_binding: record}`` for DRL binding, e.g.
|
|
113
|
+
``$row : Row ( $row.amount > 0 )``.
|
|
114
|
+
"""
|
|
115
|
+
executor = self._ensure_executor()
|
|
116
|
+
enriched: list[dict[str, Any]] = []
|
|
117
|
+
audit: list[RuleFireSummary] = []
|
|
118
|
+
|
|
119
|
+
for idx, record in enumerate(records):
|
|
120
|
+
fact: dict[str, Any] = {fact_binding: dict(record)}
|
|
121
|
+
result = executor.score(fact)
|
|
122
|
+
merged = dict(record)
|
|
123
|
+
merged.update(result.merged_actions)
|
|
124
|
+
merged["_sparkrules_fired"] = result.fired_any
|
|
125
|
+
enriched.append(merged)
|
|
126
|
+
|
|
127
|
+
for fire in result.fires:
|
|
128
|
+
if fire.fired:
|
|
129
|
+
audit.append(
|
|
130
|
+
RuleFireSummary(
|
|
131
|
+
fact_index=idx,
|
|
132
|
+
rule_name=fire.rule_name,
|
|
133
|
+
fired=True,
|
|
134
|
+
reason_codes=fire.reason_codes,
|
|
135
|
+
action_output=dict(fire.action_output),
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
return enriched, audit
|
|
139
|
+
|
|
140
|
+
def quality_gate(
|
|
141
|
+
self,
|
|
142
|
+
records: list[dict[str, Any]],
|
|
143
|
+
*,
|
|
144
|
+
require_any_rule_fired: bool = False,
|
|
145
|
+
reject_field: str = "_mesh_reject",
|
|
146
|
+
) -> list[dict[str, Any]]:
|
|
147
|
+
"""Filter or mark records that fail the rules policy before physical write."""
|
|
148
|
+
enriched, audit = self.apply_chunk(records)
|
|
149
|
+
if not require_any_rule_fired:
|
|
150
|
+
return enriched
|
|
151
|
+
|
|
152
|
+
fired_indices = {a.fact_index for a in audit if a.fired}
|
|
153
|
+
passed: list[dict[str, Any]] = []
|
|
154
|
+
for idx, row in enumerate(enriched):
|
|
155
|
+
if idx in fired_indices:
|
|
156
|
+
passed.append(row)
|
|
157
|
+
else:
|
|
158
|
+
logger.warning("SparkRules quality gate rejected record index=%d", idx)
|
|
159
|
+
if not passed:
|
|
160
|
+
raise RuleEvaluationError(
|
|
161
|
+
f"No records passed SparkRules policy {self.policy_id!r} "
|
|
162
|
+
f"(require_any_rule_fired=True, chunk_size={len(records)})"
|
|
163
|
+
)
|
|
164
|
+
return passed
|
|
165
|
+
|
|
166
|
+
def audit_json(self, audit: list[RuleFireSummary]) -> str:
|
|
167
|
+
"""Serialize rule fires for Steward proof / lineage buckets."""
|
|
168
|
+
payload = [
|
|
169
|
+
{
|
|
170
|
+
"fact_index": a.fact_index,
|
|
171
|
+
"rule_name": a.rule_name,
|
|
172
|
+
"fired": a.fired,
|
|
173
|
+
"reason_codes": list(a.reason_codes),
|
|
174
|
+
"action_output": a.action_output,
|
|
175
|
+
"policy_id": self.policy_id,
|
|
176
|
+
}
|
|
177
|
+
for a in audit
|
|
178
|
+
]
|
|
179
|
+
return json.dumps(payload, default=str)
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def apply_drl_spark(
|
|
183
|
+
spark: Any,
|
|
184
|
+
dataframe: Any,
|
|
185
|
+
drl: str,
|
|
186
|
+
*,
|
|
187
|
+
fact_id_field: str = "id",
|
|
188
|
+
) -> Any:
|
|
189
|
+
"""Spark-on-Lambda path: distributed ``apply_drl`` over a DataFrame."""
|
|
190
|
+
_require_sparkrules()
|
|
191
|
+
from sparkrules.spark.dataframe import apply_drl
|
|
192
|
+
|
|
193
|
+
return apply_drl(dataframe, drl, fact_id_field=fact_id_field)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Scaffold new domain writers for the Vaquar Pattern."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def scaffold_domain(
|
|
9
|
+
*,
|
|
10
|
+
domain: str,
|
|
11
|
+
table: str,
|
|
12
|
+
account_id: str,
|
|
13
|
+
output_dir: str = "domains",
|
|
14
|
+
) -> Path:
|
|
15
|
+
"""Create handler, contract, terraform stub, and tests for a new domain."""
|
|
16
|
+
root = Path(output_dir) / domain
|
|
17
|
+
root.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
(root / "tests").mkdir(exist_ok=True)
|
|
20
|
+
(root / "terraform").mkdir(exist_ok=True)
|
|
21
|
+
|
|
22
|
+
handler = root / "handler.py"
|
|
23
|
+
handler.write_text(
|
|
24
|
+
HANDLER_TEMPLATE.format(domain=domain, table=table),
|
|
25
|
+
encoding="utf-8",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
contract = root / "contract.yaml"
|
|
29
|
+
contract.write_text(
|
|
30
|
+
CONTRACT_TEMPLATE.format(domain=domain, table=table, account_id=account_id),
|
|
31
|
+
encoding="utf-8",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
(root / "terraform" / "main.tf").write_text(
|
|
35
|
+
TERRAFORM_TEMPLATE.format(domain=domain, account_id=account_id),
|
|
36
|
+
encoding="utf-8",
|
|
37
|
+
)
|
|
38
|
+
(root / "terraform" / "terraform.tfvars.example").write_text(
|
|
39
|
+
TFVARS_TEMPLATE.format(domain=domain, account_id=account_id),
|
|
40
|
+
encoding="utf-8",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
(root / "tests" / f"test_{domain}.py").write_text(
|
|
44
|
+
TEST_TEMPLATE.format(domain=domain, table=table),
|
|
45
|
+
encoding="utf-8",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
(root / "consumer_sla.yaml").write_text(
|
|
49
|
+
CONSUMER_SLA_TEMPLATE.format(domain=domain, table=table),
|
|
50
|
+
encoding="utf-8",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
(root / "step_function.asl.json").write_text(
|
|
54
|
+
STEP_FUNCTION_TEMPLATE.format(domain=domain, table=table),
|
|
55
|
+
encoding="utf-8",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
(root / "README.md").write_text(
|
|
59
|
+
README_TEMPLATE.format(domain=domain, table=table),
|
|
60
|
+
encoding="utf-8",
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return root
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
HANDLER_TEMPLATE = '''"""Domain writer: {domain} -> {table} (Vaquar Pattern PVDM)."""
|
|
67
|
+
|
|
68
|
+
from __future__ import annotations
|
|
69
|
+
|
|
70
|
+
from typing import Any
|
|
71
|
+
|
|
72
|
+
from serverless_data_mesh.governance.consumer_sla import enforce_consumer_sla
|
|
73
|
+
from serverless_data_mesh.metrics.mesh_trust import publish_vrp_metric
|
|
74
|
+
from serverless_data_mesh.orchestration.reprocess import attempt_vrp_repair
|
|
75
|
+
from serverless_data_mesh.types.workload import ConsumerSLAContract, DataWriteWorkload
|
|
76
|
+
from serverless_data_mesh.verification.backend import create_proof_generator
|
|
77
|
+
from serverless_data_mesh.verification.vrp import validate_then_commit
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def source_reader(start: int, end: int) -> list[dict[str, Any]]:
|
|
81
|
+
return [{{"id": str(i), "payload_hash": f"h{{i}}"}} for i in range(start, end)]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def sink_reader(start: int, end: int) -> list[dict[str, Any]]:
|
|
85
|
+
"""Read physical sink for VRP; replace with Parquet reader in production."""
|
|
86
|
+
return source_reader(start, end)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def batch_writer(start: int, end: int) -> list[str]:
|
|
90
|
+
base = "s3://publisher-lakehouse/{table}/dt=PARTITION"
|
|
91
|
+
return [f"{{base}}/part-{{i:08d}}.parquet" for i in range(start, end)]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def lambda_handler(event: dict[str, Any], context: Any) -> dict[str, Any]:
|
|
95
|
+
"""Wire IceGuardDurableCoordinator with auto-repair and consumer SLA gate."""
|
|
96
|
+
raise NotImplementedError("Copy wiring from examples/domain_writer/handler.py")
|
|
97
|
+
'''
|
|
98
|
+
|
|
99
|
+
CONTRACT_TEMPLATE = """# Data product contract: {domain}
|
|
100
|
+
product_id: {domain}-{table}
|
|
101
|
+
owner_team: {domain}-platform
|
|
102
|
+
domain_id: {domain}
|
|
103
|
+
target_table: {table}
|
|
104
|
+
source_namespace: raw_{domain}
|
|
105
|
+
producer_account_id: \"{account_id}\"
|
|
106
|
+
sla_freshness_hours: 2
|
|
107
|
+
schema_version: \"1.0.0\"
|
|
108
|
+
quality_policy_id: strict-zero-drop
|
|
109
|
+
vaquar_pattern: PVDM
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
TERRAFORM_TEMPLATE = """# Producer Terraform stub for domain: {domain}
|
|
113
|
+
variable "producer_account_id" {{
|
|
114
|
+
default = "{account_id}"
|
|
115
|
+
}}
|
|
116
|
+
|
|
117
|
+
variable "domain_id" {{
|
|
118
|
+
default = "{domain}"
|
|
119
|
+
}}
|
|
120
|
+
|
|
121
|
+
# Copy modules from infrastructure/terraform/environments/multi-account/producer/
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
TFVARS_TEMPLATE = """aws_region = \"us-east-2\"
|
|
125
|
+
name_prefix = \"sdm-{domain}\"
|
|
126
|
+
producer_account_id = \"{account_id}\"
|
|
127
|
+
# steward_account_id = \"STEWARD_ACCOUNT\"
|
|
128
|
+
# publisher_account_id = \"PUBLISHER_ACCOUNT\"
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
TEST_TEMPLATE = '''"""Tests for {domain} domain writer."""
|
|
132
|
+
|
|
133
|
+
from __future__ import annotations
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def test_boundary_declared() -> None:
|
|
137
|
+
from serverless_data_mesh import DomainTransactionBoundary
|
|
138
|
+
|
|
139
|
+
boundary = DomainTransactionBoundary(
|
|
140
|
+
domain_id="{domain}",
|
|
141
|
+
source_namespace="raw_{domain}",
|
|
142
|
+
target_table="{table}",
|
|
143
|
+
partition_spec={{"dt": "2026-06-14"}},
|
|
144
|
+
)
|
|
145
|
+
assert boundary.domain_id == "{domain}"
|
|
146
|
+
'''
|
|
147
|
+
|
|
148
|
+
README_TEMPLATE = """# {domain} domain writer
|
|
149
|
+
|
|
150
|
+
Target table: `{table}`
|
|
151
|
+
|
|
152
|
+
## Scaffolded by
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
serverless-data-mesh init --domain {domain} --table {table} --account YOUR_ACCOUNT_ID
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Next steps
|
|
159
|
+
|
|
160
|
+
1. Implement `handler.py` (copy from `examples/domain_writer/handler.py`)
|
|
161
|
+
2. Review `consumer_sla.yaml` for Lake Formation read gates
|
|
162
|
+
3. Deploy `step_function.asl.json` durable workflow
|
|
163
|
+
4. Deploy Terraform in `terraform/`
|
|
164
|
+
5. Run `make demo` locally to verify PVDM gate
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
CONSUMER_SLA_TEMPLATE = """# Consumer SLA for {table} (VRP-backed Lake Formation gate)
|
|
168
|
+
consumer_id: analytics-team
|
|
169
|
+
target_table: {table}
|
|
170
|
+
max_freshness_minutes: 60
|
|
171
|
+
min_completeness_pct: 99.9
|
|
172
|
+
required_columns:
|
|
173
|
+
- id
|
|
174
|
+
- payload_hash
|
|
175
|
+
enforcement: vrp_backed
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
STEP_FUNCTION_TEMPLATE = """{{
|
|
179
|
+
"Comment": "PVDM durable write for {domain} -> {table}",
|
|
180
|
+
"StartAt": "WriteChunk",
|
|
181
|
+
"States": {{
|
|
182
|
+
"WriteChunk": {{
|
|
183
|
+
"Type": "Task",
|
|
184
|
+
"Resource": "arn:aws:states:::lambda:invoke",
|
|
185
|
+
"Parameters": {{
|
|
186
|
+
"FunctionName": "${{DomainWriterArn}}",
|
|
187
|
+
"Payload.$": "$"
|
|
188
|
+
}},
|
|
189
|
+
"Retry": [
|
|
190
|
+
{{
|
|
191
|
+
"ErrorEquals": ["VerificationRejectedError"],
|
|
192
|
+
"IntervalSeconds": 30,
|
|
193
|
+
"MaxAttempts": 2,
|
|
194
|
+
"BackoffRate": 2.0
|
|
195
|
+
}}
|
|
196
|
+
],
|
|
197
|
+
"Next": "CommitMetadata"
|
|
198
|
+
}},
|
|
199
|
+
"CommitMetadata": {{
|
|
200
|
+
"Type": "Task",
|
|
201
|
+
"Resource": "arn:aws:states:::lambda:invoke",
|
|
202
|
+
"Parameters": {{
|
|
203
|
+
"FunctionName": "${{CatalogCommitArn}}",
|
|
204
|
+
"Payload.$": "$"
|
|
205
|
+
}},
|
|
206
|
+
"End": true
|
|
207
|
+
}}
|
|
208
|
+
}}
|
|
209
|
+
}}
|
|
210
|
+
"""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Domain contracts and workload types."""
|
|
2
|
+
|
|
3
|
+
from serverless_data_mesh.types.workload import (
|
|
4
|
+
BatchWriterFn,
|
|
5
|
+
ChunkWriteResult,
|
|
6
|
+
DataProductContract,
|
|
7
|
+
DataWriteWorkload,
|
|
8
|
+
DomainTransactionBoundary,
|
|
9
|
+
SourceReaderFn,
|
|
10
|
+
WriteOutcome,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"BatchWriterFn",
|
|
15
|
+
"ChunkWriteResult",
|
|
16
|
+
"DataProductContract",
|
|
17
|
+
"DataWriteWorkload",
|
|
18
|
+
"DomainTransactionBoundary",
|
|
19
|
+
"SourceReaderFn",
|
|
20
|
+
"WriteOutcome",
|
|
21
|
+
]
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""Shared types for multi-domain write coordination and contract enforcement."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Callable
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class WriteOutcome(str, Enum):
|
|
11
|
+
"""Terminal state of a domain write transaction."""
|
|
12
|
+
|
|
13
|
+
COMMITTED = "committed"
|
|
14
|
+
ROLLED_BACK = "rolled_back"
|
|
15
|
+
RESUMED = "resumed"
|
|
16
|
+
VERIFICATION_FAILED = "verification_failed"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True, slots=True)
|
|
20
|
+
class DomainTransactionBoundary:
|
|
21
|
+
"""Declarative contract for a single domain team's write scope.
|
|
22
|
+
|
|
23
|
+
In a federated data mesh, each domain owns its tables and publishes
|
|
24
|
+
consumption contracts. This boundary encodes the partition scope and
|
|
25
|
+
quality gates that the central coordinator enforces before metadata commit.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
domain_id: str
|
|
29
|
+
source_namespace: str
|
|
30
|
+
target_table: str
|
|
31
|
+
partition_spec: dict[str, str]
|
|
32
|
+
quality_policy_id: str = "default:strict"
|
|
33
|
+
max_chunk_records: int = 5000
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True, slots=True)
|
|
37
|
+
class DataWriteWorkload:
|
|
38
|
+
"""Description of a large backfill or cross-account copy job.
|
|
39
|
+
|
|
40
|
+
The durable orchestrator shards ``total_records`` into checkpoint-sized
|
|
41
|
+
chunks. Each chunk is an independently resumable unit guarded by IceGuard.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
workload_id: str
|
|
45
|
+
boundary: DomainTransactionBoundary
|
|
46
|
+
source_uri: str
|
|
47
|
+
target_uri: str
|
|
48
|
+
total_records: int
|
|
49
|
+
checkpoint_bucket: str
|
|
50
|
+
proof_bucket: str
|
|
51
|
+
content_fields: tuple[str, ...] = ("id", "payload_hash")
|
|
52
|
+
identity_fields: tuple[str, ...] = ("id",)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True, slots=True)
|
|
56
|
+
class DataProductContract:
|
|
57
|
+
"""Published data product contract for the federated mesh registry.
|
|
58
|
+
|
|
59
|
+
Extends ``DomainTransactionBoundary`` with ownership and SLA metadata that
|
|
60
|
+
Steward governance and Publisher consumers rely on for discovery and audit.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
product_id: str
|
|
64
|
+
owner_team: str
|
|
65
|
+
boundary: DomainTransactionBoundary
|
|
66
|
+
sla_freshness_hours: int = 24
|
|
67
|
+
schema_version: str = "1.0"
|
|
68
|
+
description: str = ""
|
|
69
|
+
|
|
70
|
+
def to_registry_entry(self) -> dict[str, str | int | dict[str, str]]:
|
|
71
|
+
"""Serialize for mesh catalog / Backstage / internal registry APIs."""
|
|
72
|
+
return {
|
|
73
|
+
"product_id": self.product_id,
|
|
74
|
+
"owner_team": self.owner_team,
|
|
75
|
+
"domain_id": self.boundary.domain_id,
|
|
76
|
+
"target_table": self.boundary.target_table,
|
|
77
|
+
"quality_policy_id": self.boundary.quality_policy_id,
|
|
78
|
+
"sla_freshness_hours": self.sla_freshness_hours,
|
|
79
|
+
"schema_version": self.schema_version,
|
|
80
|
+
"description": self.description,
|
|
81
|
+
"partition_spec": dict(self.boundary.partition_spec),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass(frozen=True, slots=True)
|
|
86
|
+
class ConsumerSLAContract:
|
|
87
|
+
"""Consumer-declared SLA enforced via VRP-backed proofs before read access."""
|
|
88
|
+
|
|
89
|
+
consumer_id: str
|
|
90
|
+
target_table: str
|
|
91
|
+
max_freshness_minutes: int = 60
|
|
92
|
+
min_completeness_pct: float = 99.9
|
|
93
|
+
required_columns: tuple[str, ...] = ()
|
|
94
|
+
enforcement: str = "vrp_backed"
|
|
95
|
+
|
|
96
|
+
def to_dict(self) -> dict[str, str | int | float | tuple[str, ...]]:
|
|
97
|
+
return {
|
|
98
|
+
"consumer_id": self.consumer_id,
|
|
99
|
+
"target_table": self.target_table,
|
|
100
|
+
"max_freshness_minutes": self.max_freshness_minutes,
|
|
101
|
+
"min_completeness_pct": self.min_completeness_pct,
|
|
102
|
+
"required_columns": self.required_columns,
|
|
103
|
+
"enforcement": self.enforcement,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass(slots=True)
|
|
108
|
+
class ChunkWriteResult:
|
|
109
|
+
"""Outcome of one durable chunk step."""
|
|
110
|
+
|
|
111
|
+
chunk_index: int
|
|
112
|
+
record_offset: int
|
|
113
|
+
record_end: int
|
|
114
|
+
parquet_paths: list[str]
|
|
115
|
+
proof_s3_uri: str | None = None
|
|
116
|
+
verification_passed: bool = False
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
BatchWriterFn = Callable[[int, int], list[str]]
|
|
120
|
+
"""Write records ``[start, end)`` and return newly created Parquet S3 URIs."""
|
|
121
|
+
|
|
122
|
+
SourceReaderFn = Callable[[int, int], list[dict[str, Any]]]
|
|
123
|
+
"""Read source records ``[start, end)`` for VRP fingerprinting."""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Cryptographic verification and reconciliation proofs."""
|
|
2
|
+
|
|
3
|
+
from serverless_data_mesh.verification.vrp import (
|
|
4
|
+
ValidateResult,
|
|
5
|
+
VRPProofGenerator,
|
|
6
|
+
VerifyResult,
|
|
7
|
+
validate_then_commit,
|
|
8
|
+
)
|
|
9
|
+
from serverless_data_mesh.verification.backend import create_proof_generator, veridata_available
|
|
10
|
+
from serverless_data_mesh.verification.fallback import FallbackProofGenerator, reconcile_multiset
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"VRPProofGenerator",
|
|
14
|
+
"FallbackProofGenerator",
|
|
15
|
+
"ValidateResult",
|
|
16
|
+
"VerifyResult",
|
|
17
|
+
"validate_then_commit",
|
|
18
|
+
"create_proof_generator",
|
|
19
|
+
"veridata_available",
|
|
20
|
+
"reconcile_multiset",
|
|
21
|
+
]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Select veridata-recon or pure-Python fallback proof backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Protocol
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProofGeneratorProtocol(Protocol):
|
|
9
|
+
def build_proof(self, **kwargs: Any) -> dict[str, Any]: ...
|
|
10
|
+
def persist_proof(self, **kwargs: Any) -> str: ...
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def veridata_available() -> bool:
|
|
14
|
+
try:
|
|
15
|
+
import veridata_recon # noqa: F401
|
|
16
|
+
|
|
17
|
+
return True
|
|
18
|
+
except ImportError:
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_proof_generator() -> tuple[Any, str]:
|
|
23
|
+
"""Return (generator, backend_name). Prefers veridata-recon; falls back to pure Python."""
|
|
24
|
+
if veridata_available():
|
|
25
|
+
import veridata_recon as vr
|
|
26
|
+
|
|
27
|
+
from serverless_data_mesh.verification.vrp import VRPProofGenerator
|
|
28
|
+
|
|
29
|
+
keys = vr.generate_keypair()
|
|
30
|
+
return (
|
|
31
|
+
VRPProofGenerator(
|
|
32
|
+
private_key_b64=keys["private_key"],
|
|
33
|
+
public_key_b64=keys["public_key"],
|
|
34
|
+
salt_hex=vr.generate_salt(),
|
|
35
|
+
),
|
|
36
|
+
"veridata-recon",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
from serverless_data_mesh.verification.fallback import FallbackProofGenerator
|
|
40
|
+
|
|
41
|
+
return FallbackProofGenerator(), "pure-python-fallback"
|