serverless-data-mesh 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. serverless_data_mesh/__init__.py +93 -0
  2. serverless_data_mesh/catalog/__init__.py +6 -0
  3. serverless_data_mesh/catalog/glue_connector.py +17 -0
  4. serverless_data_mesh/catalog/glue_rest.py +134 -0
  5. serverless_data_mesh/cli.py +165 -0
  6. serverless_data_mesh/config.py +42 -0
  7. serverless_data_mesh/dashboard/__init__.py +5 -0
  8. serverless_data_mesh/dashboard/cloudwatch.py +80 -0
  9. serverless_data_mesh/dashboard/trust.py +162 -0
  10. serverless_data_mesh/exceptions.py +23 -0
  11. serverless_data_mesh/governance/__init__.py +9 -0
  12. serverless_data_mesh/governance/consumer_sla.py +109 -0
  13. serverless_data_mesh/lineage/__init__.py +5 -0
  14. serverless_data_mesh/lineage/openlineage.py +96 -0
  15. serverless_data_mesh/local/__init__.py +5 -0
  16. serverless_data_mesh/local/runtime.py +380 -0
  17. serverless_data_mesh/metrics/__init__.py +5 -0
  18. serverless_data_mesh/metrics/mesh_trust.py +56 -0
  19. serverless_data_mesh/orchestration/__init__.py +28 -0
  20. serverless_data_mesh/orchestration/canary.py +127 -0
  21. serverless_data_mesh/orchestration/coordinator.py +265 -0
  22. serverless_data_mesh/orchestration/durable_steps.py +74 -0
  23. serverless_data_mesh/orchestration/reprocess.py +143 -0
  24. serverless_data_mesh/orchestration/state.py +16 -0
  25. serverless_data_mesh/py.typed +0 -0
  26. serverless_data_mesh/rules/__init__.py +8 -0
  27. serverless_data_mesh/rules/sparkrules_connector.py +193 -0
  28. serverless_data_mesh/scaffold/__init__.py +5 -0
  29. serverless_data_mesh/scaffold/init_domain.py +210 -0
  30. serverless_data_mesh/types/__init__.py +21 -0
  31. serverless_data_mesh/types/workload.py +123 -0
  32. serverless_data_mesh/verification/__init__.py +21 -0
  33. serverless_data_mesh/verification/backend.py +41 -0
  34. serverless_data_mesh/verification/fallback.py +200 -0
  35. serverless_data_mesh/verification/vrp.py +202 -0
  36. serverless_data_mesh-0.2.0.dist-info/METADATA +143 -0
  37. serverless_data_mesh-0.2.0.dist-info/RECORD +40 -0
  38. serverless_data_mesh-0.2.0.dist-info/WHEEL +4 -0
  39. serverless_data_mesh-0.2.0.dist-info/entry_points.txt +2 -0
  40. serverless_data_mesh-0.2.0.dist-info/licenses/LICENSE +17 -0
@@ -0,0 +1,193 @@
1
+ """SparkRules engine connector for Lambda domain writers.
2
+
3
+ SparkRules runs on Lambda in two modes:
4
+
5
+ 1. **Pure Python (default)**: ``LocalRuleExecutor`` evaluates DRL per chunk without
6
+ a Spark cluster or Glue ETL job. Ideal for enrichment and quality gates before VRP.
7
+
8
+ 2. **Spark-on-Lambda (optional)**: ``apply_drl`` when PySpark is bundled in the
9
+ Lambda layer/container.
10
+
11
+ Install: ``pip install serverless-data-mesh[rules]`` or ``[spark]`` for PySpark path.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ import os
19
+ from dataclasses import dataclass, field
20
+ from typing import Any
21
+
22
+ from serverless_data_mesh.exceptions import RuleEvaluationError
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def _require_sparkrules() -> Any:
28
+ try:
29
+ import sparkrules # noqa: F401
30
+ except ImportError as exc:
31
+ raise ImportError(
32
+ "sparkrules is required for SparkRulesConnector. "
33
+ "Install with: pip install serverless-data-mesh[rules]"
34
+ ) from exc
35
+ return sparkrules
36
+
37
+
38
+ @dataclass(frozen=True, slots=True)
39
+ class RuleFireSummary:
40
+ """Audit-friendly summary of one rule evaluation on a fact."""
41
+
42
+ fact_index: int
43
+ rule_name: str
44
+ fired: bool
45
+ reason_codes: tuple[str, ...]
46
+ action_output: dict[str, Any]
47
+
48
+
49
+ @dataclass(slots=True)
50
+ class SparkRulesConnector:
51
+ """Lambda-native business rules connector using SparkRules ``LocalRuleExecutor``.
52
+
53
+ Use between ``source_reader`` and ``batch_writer`` to enrich or filter records
54
+ with Drools-style DRL before VRP verification and Iceberg commit.
55
+ """
56
+
57
+ drl: str
58
+ policy_id: str = "mesh-default"
59
+ _executor: Any = field(default=None, repr=False)
60
+
61
+ @classmethod
62
+ def from_drl(cls, drl: str, *, policy_id: str = "mesh-default") -> SparkRulesConnector:
63
+ """Compile DRL and build a pure-Python executor (no JVM, no Glue ETL)."""
64
+ _require_sparkrules()
65
+ from sparkrules.executor.local_executor import LocalRuleExecutor
66
+
67
+ connector = cls(drl=drl, policy_id=policy_id)
68
+ connector._executor = LocalRuleExecutor.from_drl(drl)
69
+ return connector
70
+
71
+ @classmethod
72
+ def from_environment(cls) -> SparkRulesConnector:
73
+ """Load DRL from ``SPARKRULES_DRL`` (inline) or ``SPARKRULES_DRL_S3_URI``."""
74
+ inline = os.environ.get("SPARKRULES_DRL")
75
+ if inline:
76
+ return cls.from_drl(inline, policy_id=os.environ.get("SPARKRULES_POLICY_ID", "mesh-default"))
77
+
78
+ s3_uri = os.environ.get("SPARKRULES_DRL_S3_URI")
79
+ if s3_uri:
80
+ return cls.from_s3(s3_uri, policy_id=os.environ.get("SPARKRULES_POLICY_ID", "mesh-default"))
81
+
82
+ raise ValueError("Set SPARKRULES_DRL or SPARKRULES_DRL_S3_URI for SparkRulesConnector")
83
+
84
+ @classmethod
85
+ def from_s3(cls, s3_uri: str, *, policy_id: str = "mesh-default") -> SparkRulesConnector:
86
+ """Load rule pack DRL from S3 (Steward governance bucket pattern)."""
87
+ import boto3
88
+
89
+ if not s3_uri.startswith("s3://"):
90
+ raise ValueError(f"Expected s3:// URI, got {s3_uri!r}")
91
+ path = s3_uri[5:]
92
+ bucket, _, key = path.partition("/")
93
+ body = boto3.client("s3").get_object(Bucket=bucket, Key=key)["Body"].read()
94
+ return cls.from_drl(body.decode("utf-8"), policy_id=policy_id)
95
+
96
+ def _ensure_executor(self) -> Any:
97
+ if self._executor is None:
98
+ _require_sparkrules()
99
+ from sparkrules.executor.local_executor import LocalRuleExecutor
100
+
101
+ self._executor = LocalRuleExecutor.from_drl(self.drl)
102
+ return self._executor
103
+
104
+ def apply_chunk(
105
+ self,
106
+ records: list[dict[str, Any]],
107
+ *,
108
+ fact_binding: str = "row",
109
+ ) -> tuple[list[dict[str, Any]], list[RuleFireSummary]]:
110
+ """Evaluate DRL against each record; merge rule actions into enriched rows.
111
+
112
+ Each record is wrapped as ``{fact_binding: record}`` for DRL binding, e.g.
113
+ ``$row : Row ( $row.amount > 0 )``.
114
+ """
115
+ executor = self._ensure_executor()
116
+ enriched: list[dict[str, Any]] = []
117
+ audit: list[RuleFireSummary] = []
118
+
119
+ for idx, record in enumerate(records):
120
+ fact: dict[str, Any] = {fact_binding: dict(record)}
121
+ result = executor.score(fact)
122
+ merged = dict(record)
123
+ merged.update(result.merged_actions)
124
+ merged["_sparkrules_fired"] = result.fired_any
125
+ enriched.append(merged)
126
+
127
+ for fire in result.fires:
128
+ if fire.fired:
129
+ audit.append(
130
+ RuleFireSummary(
131
+ fact_index=idx,
132
+ rule_name=fire.rule_name,
133
+ fired=True,
134
+ reason_codes=fire.reason_codes,
135
+ action_output=dict(fire.action_output),
136
+ )
137
+ )
138
+ return enriched, audit
139
+
140
+ def quality_gate(
141
+ self,
142
+ records: list[dict[str, Any]],
143
+ *,
144
+ require_any_rule_fired: bool = False,
145
+ reject_field: str = "_mesh_reject",
146
+ ) -> list[dict[str, Any]]:
147
+ """Filter or mark records that fail the rules policy before physical write."""
148
+ enriched, audit = self.apply_chunk(records)
149
+ if not require_any_rule_fired:
150
+ return enriched
151
+
152
+ fired_indices = {a.fact_index for a in audit if a.fired}
153
+ passed: list[dict[str, Any]] = []
154
+ for idx, row in enumerate(enriched):
155
+ if idx in fired_indices:
156
+ passed.append(row)
157
+ else:
158
+ logger.warning("SparkRules quality gate rejected record index=%d", idx)
159
+ if not passed:
160
+ raise RuleEvaluationError(
161
+ f"No records passed SparkRules policy {self.policy_id!r} "
162
+ f"(require_any_rule_fired=True, chunk_size={len(records)})"
163
+ )
164
+ return passed
165
+
166
+ def audit_json(self, audit: list[RuleFireSummary]) -> str:
167
+ """Serialize rule fires for Steward proof / lineage buckets."""
168
+ payload = [
169
+ {
170
+ "fact_index": a.fact_index,
171
+ "rule_name": a.rule_name,
172
+ "fired": a.fired,
173
+ "reason_codes": list(a.reason_codes),
174
+ "action_output": a.action_output,
175
+ "policy_id": self.policy_id,
176
+ }
177
+ for a in audit
178
+ ]
179
+ return json.dumps(payload, default=str)
180
+
181
+ @staticmethod
182
+ def apply_drl_spark(
183
+ spark: Any,
184
+ dataframe: Any,
185
+ drl: str,
186
+ *,
187
+ fact_id_field: str = "id",
188
+ ) -> Any:
189
+ """Spark-on-Lambda path: distributed ``apply_drl`` over a DataFrame."""
190
+ _require_sparkrules()
191
+ from sparkrules.spark.dataframe import apply_drl
192
+
193
+ return apply_drl(dataframe, drl, fact_id_field=fact_id_field)
@@ -0,0 +1,5 @@
1
+ """Scaffold new Vaquar Pattern domain writers."""
2
+
3
+ from serverless_data_mesh.scaffold.init_domain import scaffold_domain
4
+
5
+ __all__ = ["scaffold_domain"]
@@ -0,0 +1,210 @@
1
+ """Scaffold new domain writers for the Vaquar Pattern."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+
8
+ def scaffold_domain(
9
+ *,
10
+ domain: str,
11
+ table: str,
12
+ account_id: str,
13
+ output_dir: str = "domains",
14
+ ) -> Path:
15
+ """Create handler, contract, terraform stub, and tests for a new domain."""
16
+ root = Path(output_dir) / domain
17
+ root.mkdir(parents=True, exist_ok=True)
18
+
19
+ (root / "tests").mkdir(exist_ok=True)
20
+ (root / "terraform").mkdir(exist_ok=True)
21
+
22
+ handler = root / "handler.py"
23
+ handler.write_text(
24
+ HANDLER_TEMPLATE.format(domain=domain, table=table),
25
+ encoding="utf-8",
26
+ )
27
+
28
+ contract = root / "contract.yaml"
29
+ contract.write_text(
30
+ CONTRACT_TEMPLATE.format(domain=domain, table=table, account_id=account_id),
31
+ encoding="utf-8",
32
+ )
33
+
34
+ (root / "terraform" / "main.tf").write_text(
35
+ TERRAFORM_TEMPLATE.format(domain=domain, account_id=account_id),
36
+ encoding="utf-8",
37
+ )
38
+ (root / "terraform" / "terraform.tfvars.example").write_text(
39
+ TFVARS_TEMPLATE.format(domain=domain, account_id=account_id),
40
+ encoding="utf-8",
41
+ )
42
+
43
+ (root / "tests" / f"test_{domain}.py").write_text(
44
+ TEST_TEMPLATE.format(domain=domain, table=table),
45
+ encoding="utf-8",
46
+ )
47
+
48
+ (root / "consumer_sla.yaml").write_text(
49
+ CONSUMER_SLA_TEMPLATE.format(domain=domain, table=table),
50
+ encoding="utf-8",
51
+ )
52
+
53
+ (root / "step_function.asl.json").write_text(
54
+ STEP_FUNCTION_TEMPLATE.format(domain=domain, table=table),
55
+ encoding="utf-8",
56
+ )
57
+
58
+ (root / "README.md").write_text(
59
+ README_TEMPLATE.format(domain=domain, table=table),
60
+ encoding="utf-8",
61
+ )
62
+
63
+ return root
64
+
65
+
66
+ HANDLER_TEMPLATE = '''"""Domain writer: {domain} -> {table} (Vaquar Pattern PVDM)."""
67
+
68
+ from __future__ import annotations
69
+
70
+ from typing import Any
71
+
72
+ from serverless_data_mesh.governance.consumer_sla import enforce_consumer_sla
73
+ from serverless_data_mesh.metrics.mesh_trust import publish_vrp_metric
74
+ from serverless_data_mesh.orchestration.reprocess import attempt_vrp_repair
75
+ from serverless_data_mesh.types.workload import ConsumerSLAContract, DataWriteWorkload
76
+ from serverless_data_mesh.verification.backend import create_proof_generator
77
+ from serverless_data_mesh.verification.vrp import validate_then_commit
78
+
79
+
80
+ def source_reader(start: int, end: int) -> list[dict[str, Any]]:
81
+ return [{{"id": str(i), "payload_hash": f"h{{i}}"}} for i in range(start, end)]
82
+
83
+
84
+ def sink_reader(start: int, end: int) -> list[dict[str, Any]]:
85
+ """Read physical sink for VRP; replace with Parquet reader in production."""
86
+ return source_reader(start, end)
87
+
88
+
89
+ def batch_writer(start: int, end: int) -> list[str]:
90
+ base = "s3://publisher-lakehouse/{table}/dt=PARTITION"
91
+ return [f"{{base}}/part-{{i:08d}}.parquet" for i in range(start, end)]
92
+
93
+
94
+ def lambda_handler(event: dict[str, Any], context: Any) -> dict[str, Any]:
95
+ """Wire IceGuardDurableCoordinator with auto-repair and consumer SLA gate."""
96
+ raise NotImplementedError("Copy wiring from examples/domain_writer/handler.py")
97
+ '''
98
+
99
+ CONTRACT_TEMPLATE = """# Data product contract: {domain}
100
+ product_id: {domain}-{table}
101
+ owner_team: {domain}-platform
102
+ domain_id: {domain}
103
+ target_table: {table}
104
+ source_namespace: raw_{domain}
105
+ producer_account_id: \"{account_id}\"
106
+ sla_freshness_hours: 2
107
+ schema_version: \"1.0.0\"
108
+ quality_policy_id: strict-zero-drop
109
+ vaquar_pattern: PVDM
110
+ """
111
+
112
+ TERRAFORM_TEMPLATE = """# Producer Terraform stub for domain: {domain}
113
+ variable "producer_account_id" {{
114
+ default = "{account_id}"
115
+ }}
116
+
117
+ variable "domain_id" {{
118
+ default = "{domain}"
119
+ }}
120
+
121
+ # Copy modules from infrastructure/terraform/environments/multi-account/producer/
122
+ """
123
+
124
+ TFVARS_TEMPLATE = """aws_region = \"us-east-2\"
125
+ name_prefix = \"sdm-{domain}\"
126
+ producer_account_id = \"{account_id}\"
127
+ # steward_account_id = \"STEWARD_ACCOUNT\"
128
+ # publisher_account_id = \"PUBLISHER_ACCOUNT\"
129
+ """
130
+
131
+ TEST_TEMPLATE = '''"""Tests for {domain} domain writer."""
132
+
133
+ from __future__ import annotations
134
+
135
+
136
+ def test_boundary_declared() -> None:
137
+ from serverless_data_mesh import DomainTransactionBoundary
138
+
139
+ boundary = DomainTransactionBoundary(
140
+ domain_id="{domain}",
141
+ source_namespace="raw_{domain}",
142
+ target_table="{table}",
143
+ partition_spec={{"dt": "2026-06-14"}},
144
+ )
145
+ assert boundary.domain_id == "{domain}"
146
+ '''
147
+
148
+ README_TEMPLATE = """# {domain} domain writer
149
+
150
+ Target table: `{table}`
151
+
152
+ ## Scaffolded by
153
+
154
+ ```bash
155
+ serverless-data-mesh init --domain {domain} --table {table} --account YOUR_ACCOUNT_ID
156
+ ```
157
+
158
+ ## Next steps
159
+
160
+ 1. Implement `handler.py` (copy from `examples/domain_writer/handler.py`)
161
+ 2. Review `consumer_sla.yaml` for Lake Formation read gates
162
+ 3. Deploy `step_function.asl.json` durable workflow
163
+ 4. Deploy Terraform in `terraform/`
164
+ 5. Run `make demo` locally to verify PVDM gate
165
+ """
166
+
167
+ CONSUMER_SLA_TEMPLATE = """# Consumer SLA for {table} (VRP-backed Lake Formation gate)
168
+ consumer_id: analytics-team
169
+ target_table: {table}
170
+ max_freshness_minutes: 60
171
+ min_completeness_pct: 99.9
172
+ required_columns:
173
+ - id
174
+ - payload_hash
175
+ enforcement: vrp_backed
176
+ """
177
+
178
+ STEP_FUNCTION_TEMPLATE = """{{
179
+ "Comment": "PVDM durable write for {domain} -> {table}",
180
+ "StartAt": "WriteChunk",
181
+ "States": {{
182
+ "WriteChunk": {{
183
+ "Type": "Task",
184
+ "Resource": "arn:aws:states:::lambda:invoke",
185
+ "Parameters": {{
186
+ "FunctionName": "${{DomainWriterArn}}",
187
+ "Payload.$": "$"
188
+ }},
189
+ "Retry": [
190
+ {{
191
+ "ErrorEquals": ["VerificationRejectedError"],
192
+ "IntervalSeconds": 30,
193
+ "MaxAttempts": 2,
194
+ "BackoffRate": 2.0
195
+ }}
196
+ ],
197
+ "Next": "CommitMetadata"
198
+ }},
199
+ "CommitMetadata": {{
200
+ "Type": "Task",
201
+ "Resource": "arn:aws:states:::lambda:invoke",
202
+ "Parameters": {{
203
+ "FunctionName": "${{CatalogCommitArn}}",
204
+ "Payload.$": "$"
205
+ }},
206
+ "End": true
207
+ }}
208
+ }}
209
+ }}
210
+ """
@@ -0,0 +1,21 @@
1
+ """Domain contracts and workload types."""
2
+
3
+ from serverless_data_mesh.types.workload import (
4
+ BatchWriterFn,
5
+ ChunkWriteResult,
6
+ DataProductContract,
7
+ DataWriteWorkload,
8
+ DomainTransactionBoundary,
9
+ SourceReaderFn,
10
+ WriteOutcome,
11
+ )
12
+
13
+ __all__ = [
14
+ "BatchWriterFn",
15
+ "ChunkWriteResult",
16
+ "DataProductContract",
17
+ "DataWriteWorkload",
18
+ "DomainTransactionBoundary",
19
+ "SourceReaderFn",
20
+ "WriteOutcome",
21
+ ]
@@ -0,0 +1,123 @@
1
+ """Shared types for multi-domain write coordination and contract enforcement."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from typing import Any, Callable
8
+
9
+
10
+ class WriteOutcome(str, Enum):
11
+ """Terminal state of a domain write transaction."""
12
+
13
+ COMMITTED = "committed"
14
+ ROLLED_BACK = "rolled_back"
15
+ RESUMED = "resumed"
16
+ VERIFICATION_FAILED = "verification_failed"
17
+
18
+
19
+ @dataclass(frozen=True, slots=True)
20
+ class DomainTransactionBoundary:
21
+ """Declarative contract for a single domain team's write scope.
22
+
23
+ In a federated data mesh, each domain owns its tables and publishes
24
+ consumption contracts. This boundary encodes the partition scope and
25
+ quality gates that the central coordinator enforces before metadata commit.
26
+ """
27
+
28
+ domain_id: str
29
+ source_namespace: str
30
+ target_table: str
31
+ partition_spec: dict[str, str]
32
+ quality_policy_id: str = "default:strict"
33
+ max_chunk_records: int = 5000
34
+
35
+
36
+ @dataclass(frozen=True, slots=True)
37
+ class DataWriteWorkload:
38
+ """Description of a large backfill or cross-account copy job.
39
+
40
+ The durable orchestrator shards ``total_records`` into checkpoint-sized
41
+ chunks. Each chunk is an independently resumable unit guarded by IceGuard.
42
+ """
43
+
44
+ workload_id: str
45
+ boundary: DomainTransactionBoundary
46
+ source_uri: str
47
+ target_uri: str
48
+ total_records: int
49
+ checkpoint_bucket: str
50
+ proof_bucket: str
51
+ content_fields: tuple[str, ...] = ("id", "payload_hash")
52
+ identity_fields: tuple[str, ...] = ("id",)
53
+
54
+
55
+ @dataclass(frozen=True, slots=True)
56
+ class DataProductContract:
57
+ """Published data product contract for the federated mesh registry.
58
+
59
+ Extends ``DomainTransactionBoundary`` with ownership and SLA metadata that
60
+ Steward governance and Publisher consumers rely on for discovery and audit.
61
+ """
62
+
63
+ product_id: str
64
+ owner_team: str
65
+ boundary: DomainTransactionBoundary
66
+ sla_freshness_hours: int = 24
67
+ schema_version: str = "1.0"
68
+ description: str = ""
69
+
70
+ def to_registry_entry(self) -> dict[str, str | int | dict[str, str]]:
71
+ """Serialize for mesh catalog / Backstage / internal registry APIs."""
72
+ return {
73
+ "product_id": self.product_id,
74
+ "owner_team": self.owner_team,
75
+ "domain_id": self.boundary.domain_id,
76
+ "target_table": self.boundary.target_table,
77
+ "quality_policy_id": self.boundary.quality_policy_id,
78
+ "sla_freshness_hours": self.sla_freshness_hours,
79
+ "schema_version": self.schema_version,
80
+ "description": self.description,
81
+ "partition_spec": dict(self.boundary.partition_spec),
82
+ }
83
+
84
+
85
+ @dataclass(frozen=True, slots=True)
86
+ class ConsumerSLAContract:
87
+ """Consumer-declared SLA enforced via VRP-backed proofs before read access."""
88
+
89
+ consumer_id: str
90
+ target_table: str
91
+ max_freshness_minutes: int = 60
92
+ min_completeness_pct: float = 99.9
93
+ required_columns: tuple[str, ...] = ()
94
+ enforcement: str = "vrp_backed"
95
+
96
+ def to_dict(self) -> dict[str, str | int | float | tuple[str, ...]]:
97
+ return {
98
+ "consumer_id": self.consumer_id,
99
+ "target_table": self.target_table,
100
+ "max_freshness_minutes": self.max_freshness_minutes,
101
+ "min_completeness_pct": self.min_completeness_pct,
102
+ "required_columns": self.required_columns,
103
+ "enforcement": self.enforcement,
104
+ }
105
+
106
+
107
+ @dataclass(slots=True)
108
+ class ChunkWriteResult:
109
+ """Outcome of one durable chunk step."""
110
+
111
+ chunk_index: int
112
+ record_offset: int
113
+ record_end: int
114
+ parquet_paths: list[str]
115
+ proof_s3_uri: str | None = None
116
+ verification_passed: bool = False
117
+
118
+
119
+ BatchWriterFn = Callable[[int, int], list[str]]
120
+ """Write records ``[start, end)`` and return newly created Parquet S3 URIs."""
121
+
122
+ SourceReaderFn = Callable[[int, int], list[dict[str, Any]]]
123
+ """Read source records ``[start, end)`` for VRP fingerprinting."""
@@ -0,0 +1,21 @@
1
+ """Cryptographic verification and reconciliation proofs."""
2
+
3
+ from serverless_data_mesh.verification.vrp import (
4
+ ValidateResult,
5
+ VRPProofGenerator,
6
+ VerifyResult,
7
+ validate_then_commit,
8
+ )
9
+ from serverless_data_mesh.verification.backend import create_proof_generator, veridata_available
10
+ from serverless_data_mesh.verification.fallback import FallbackProofGenerator, reconcile_multiset
11
+
12
+ __all__ = [
13
+ "VRPProofGenerator",
14
+ "FallbackProofGenerator",
15
+ "ValidateResult",
16
+ "VerifyResult",
17
+ "validate_then_commit",
18
+ "create_proof_generator",
19
+ "veridata_available",
20
+ "reconcile_multiset",
21
+ ]
@@ -0,0 +1,41 @@
1
+ """Select veridata-recon or pure-Python fallback proof backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Protocol
6
+
7
+
8
+ class ProofGeneratorProtocol(Protocol):
9
+ def build_proof(self, **kwargs: Any) -> dict[str, Any]: ...
10
+ def persist_proof(self, **kwargs: Any) -> str: ...
11
+
12
+
13
+ def veridata_available() -> bool:
14
+ try:
15
+ import veridata_recon # noqa: F401
16
+
17
+ return True
18
+ except ImportError:
19
+ return False
20
+
21
+
22
+ def create_proof_generator() -> tuple[Any, str]:
23
+ """Return (generator, backend_name). Prefers veridata-recon; falls back to pure Python."""
24
+ if veridata_available():
25
+ import veridata_recon as vr
26
+
27
+ from serverless_data_mesh.verification.vrp import VRPProofGenerator
28
+
29
+ keys = vr.generate_keypair()
30
+ return (
31
+ VRPProofGenerator(
32
+ private_key_b64=keys["private_key"],
33
+ public_key_b64=keys["public_key"],
34
+ salt_hex=vr.generate_salt(),
35
+ ),
36
+ "veridata-recon",
37
+ )
38
+
39
+ from serverless_data_mesh.verification.fallback import FallbackProofGenerator
40
+
41
+ return FallbackProofGenerator(), "pure-python-fallback"