cfa-kernel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. cfa/__init__.py +39 -0
  2. cfa/_lazy.py +39 -0
  3. cfa/adapters/__init__.py +104 -0
  4. cfa/adapters/autogen.py +19 -0
  5. cfa/adapters/crewai.py +19 -0
  6. cfa/adapters/dspy.py +19 -0
  7. cfa/adapters/langgraph.py +19 -0
  8. cfa/adapters/openai_agents.py +19 -0
  9. cfa/audit/__init__.py +15 -0
  10. cfa/audit/context.py +205 -0
  11. cfa/audit/hashing.py +41 -0
  12. cfa/audit/trail.py +194 -0
  13. cfa/backends/__init__.py +132 -0
  14. cfa/backends/dbt.py +338 -0
  15. cfa/backends/pyspark.py +240 -0
  16. cfa/backends/sql.py +270 -0
  17. cfa/behavior/__init__.py +49 -0
  18. cfa/behavior/llm.py +244 -0
  19. cfa/behavior/spec.py +235 -0
  20. cfa/behavior/systematizer.py +222 -0
  21. cfa/cli/__init__.py +296 -0
  22. cfa/cli/__main__.py +6 -0
  23. cfa/cli/_helpers.py +109 -0
  24. cfa/cli/core/__init__.py +0 -0
  25. cfa/cli/core/evaluate.py +72 -0
  26. cfa/cli/core/validate.py +29 -0
  27. cfa/cli/formatters.py +280 -0
  28. cfa/cli/governance/__init__.py +0 -0
  29. cfa/cli/governance/audit.py +65 -0
  30. cfa/cli/governance/catalog.py +28 -0
  31. cfa/cli/governance/policy.py +119 -0
  32. cfa/cli/governance/rules.py +42 -0
  33. cfa/cli/governance/signature.py +31 -0
  34. cfa/cli/infrastructure/__init__.py +0 -0
  35. cfa/cli/infrastructure/backend_list.py +24 -0
  36. cfa/cli/infrastructure/storage.py +87 -0
  37. cfa/cli/project/__init__.py +0 -0
  38. cfa/cli/project/init.py +73 -0
  39. cfa/cli/project/lifecycle.py +92 -0
  40. cfa/cli/project/status.py +75 -0
  41. cfa/cli/project/taxonomy.py +38 -0
  42. cfa/cli/reporting/__init__.py +0 -0
  43. cfa/cli/reporting/report.py +109 -0
  44. cfa/cli/reporting/serve.py +43 -0
  45. cfa/config.py +103 -0
  46. cfa/core/__init__.py +19 -0
  47. cfa/core/codegen.py +65 -0
  48. cfa/core/conditions.py +129 -0
  49. cfa/core/kernel.py +224 -0
  50. cfa/core/phases/__init__.py +0 -0
  51. cfa/core/phases/runner.py +477 -0
  52. cfa/core/planner.py +290 -0
  53. cfa/execution/__init__.py +12 -0
  54. cfa/execution/partial.py +339 -0
  55. cfa/execution/state_projection.py +216 -0
  56. cfa/governance/__init__.py +76 -0
  57. cfa/lifecycle/__init__.py +51 -0
  58. cfa/mcp/__init__.py +347 -0
  59. cfa/mcp/__main__.py +4 -0
  60. cfa/normalizer/__init__.py +15 -0
  61. cfa/normalizer/base.py +441 -0
  62. cfa/normalizer/llm.py +426 -0
  63. cfa/observability/__init__.py +14 -0
  64. cfa/observability/indices.py +177 -0
  65. cfa/observability/metrics.py +91 -0
  66. cfa/observability/notify.py +79 -0
  67. cfa/observability/otel.py +81 -0
  68. cfa/observability/promotion.py +367 -0
  69. cfa/policy/__init__.py +12 -0
  70. cfa/policy/bundle.py +317 -0
  71. cfa/policy/catalog.py +117 -0
  72. cfa/policy/engine.py +306 -0
  73. cfa/reporting/__init__.py +42 -0
  74. cfa/reporting/charts.py +223 -0
  75. cfa/reporting/engine.py +456 -0
  76. cfa/resolution/__init__.py +62 -0
  77. cfa/runtime/__init__.py +13 -0
  78. cfa/runtime/gate.py +287 -0
  79. cfa/sandbox/__init__.py +189 -0
  80. cfa/sandbox/executor.py +92 -0
  81. cfa/sandbox/mock.py +89 -0
  82. cfa/sandbox/panic.py +52 -0
  83. cfa/storage/__init__.py +591 -0
  84. cfa/testing/__init__.py +60 -0
  85. cfa/testing/asserts.py +77 -0
  86. cfa/testing/evaluate.py +168 -0
  87. cfa/testing/fixtures.py +89 -0
  88. cfa/testing/markers.py +36 -0
  89. cfa/types.py +489 -0
  90. cfa/validation/__init__.py +14 -0
  91. cfa/validation/runtime.py +285 -0
  92. cfa/validation/signature.py +146 -0
  93. cfa/validation/static.py +252 -0
  94. cfa_kernel-0.1.0.dist-info/METADATA +32 -0
  95. cfa_kernel-0.1.0.dist-info/RECORD +98 -0
  96. cfa_kernel-0.1.0.dist-info/WHEEL +4 -0
  97. cfa_kernel-0.1.0.dist-info/entry_points.txt +3 -0
  98. cfa_kernel-0.1.0.dist-info/licenses/LICENSE +21 -0
cfa/core/planner.py ADDED
@@ -0,0 +1,290 @@
1
+ """
2
+ CFA Execution Planner
3
+ =====================
4
+ Generates a governed execution DAG from an approved State Signature.
5
+
6
+ The Planner is NOT free — it fills templates, follows the plan approved
7
+ by the Policy Engine, and respects all constraints declared in the Signature.
8
+
9
+ Key properties:
10
+ - Every plan is idempotent (merge with deterministic key, partition overwrite)
11
+ - Supports Composite Intent decomposition
12
+ - Consistency unit selection follows whitepaper enum (partition | dataset | dag_branch | time_window)
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass, field
18
+ from enum import StrEnum
19
+ from typing import Any
20
+
21
+ from cfa.types import StateSignature, TargetLayer
22
+
23
+ # ── Consistency Unit ─────────────────────────────────────────────────────────
24
+
25
+
26
+ class ConsistencyUnit(StrEnum):
27
+ PARTITION = "partition"
28
+ DATASET = "dataset"
29
+ DAG_BRANCH = "dag_branch"
30
+ TIME_WINDOW = "time_window"
31
+
32
+
33
+ # ── Execution Steps ──────────────────────────────────────────────────────────
34
+
35
+
36
+ class StepType(StrEnum):
37
+ EXTRACT = "extract"
38
+ ANONYMIZE = "anonymize"
39
+ JOIN = "join"
40
+ TRANSFORM = "transform"
41
+ LOAD = "load"
42
+ FILTER = "filter"
43
+ AGGREGATE = "aggregate"
44
+
45
+
46
+ @dataclass(frozen=True)
47
+ class ExecutionStep:
48
+ """Single node in the execution DAG."""
49
+
50
+ id: str
51
+ step_type: StepType
52
+ source: str | None = None
53
+ target: str | None = None
54
+ config: dict[str, Any] = field(default_factory=dict)
55
+ depends_on: tuple[str, ...] = ()
56
+
57
+ @property
58
+ def description(self) -> str:
59
+ parts = [f"{self.step_type.value}"]
60
+ if self.source:
61
+ parts.append(f"source={self.source}")
62
+ if self.target:
63
+ parts.append(f"target={self.target}")
64
+ return " | ".join(parts)
65
+
66
+
67
+ # ── Execution Plan ───────────────────────────────────────────────────────────
68
+
69
+
70
+ class WriteMode(StrEnum):
71
+ MERGE = "merge"
72
+ OVERWRITE_PARTITION = "overwrite_partition"
73
+ APPEND = "append" # only allowed in Bronze
74
+
75
+
76
+ @dataclass
77
+ class ExecutionPlan:
78
+ """
79
+ Governed execution DAG generated from an approved Signature.
80
+ Immutable once finalized — any change requires a new plan.
81
+ """
82
+
83
+ signature_hash: str
84
+ intent_id: str
85
+ steps: list[ExecutionStep]
86
+ consistency_unit: ConsistencyUnit
87
+ write_mode: WriteMode
88
+ idempotent: bool = True
89
+ metadata: dict[str, Any] = field(default_factory=dict)
90
+
91
+ @property
92
+ def step_ids(self) -> list[str]:
93
+ return [s.id for s in self.steps]
94
+
95
+ @property
96
+ def step_count(self) -> int:
97
+ return len(self.steps)
98
+
99
+ def get_step(self, step_id: str) -> ExecutionStep | None:
100
+ return next((s for s in self.steps if s.id == step_id), None)
101
+
102
+ def execution_order(self) -> list[ExecutionStep]:
103
+ """Topological sort of steps respecting depends_on."""
104
+ resolved: list[ExecutionStep] = []
105
+ resolved_ids: set[str] = set()
106
+ pending = list(self.steps)
107
+
108
+ max_iterations = len(pending) * len(pending)
109
+ iteration = 0
110
+ while pending:
111
+ iteration += 1
112
+ if iteration > max_iterations:
113
+ raise ValueError("Cyclic dependency detected in execution plan")
114
+
115
+ for step in list(pending):
116
+ if all(dep in resolved_ids for dep in step.depends_on):
117
+ resolved.append(step)
118
+ resolved_ids.add(step.id)
119
+ pending.remove(step)
120
+
121
+ return resolved
122
+
123
+ def to_dict(self) -> dict[str, Any]:
124
+ return {
125
+ "signature_hash": self.signature_hash,
126
+ "intent_id": self.intent_id,
127
+ "consistency_unit": self.consistency_unit.value,
128
+ "write_mode": self.write_mode.value,
129
+ "idempotent": self.idempotent,
130
+ "steps": [
131
+ {
132
+ "id": s.id,
133
+ "type": s.step_type.value,
134
+ "source": s.source,
135
+ "target": s.target,
136
+ "config": s.config,
137
+ "depends_on": list(s.depends_on),
138
+ }
139
+ for s in self.steps
140
+ ],
141
+ }
142
+
143
+
144
+ # ── Execution Planner ────────────────────────────────────────────────────────
145
+
146
+
147
+ class ExecutionPlanner:
148
+ """
149
+ Generates an ExecutionPlan from an approved StateSignature.
150
+
151
+ The planner does NOT generate arbitrary code — it assembles governed steps
152
+ based on the Signature's intent, datasets, constraints and target layer.
153
+ """
154
+
155
+ def plan(self, signature: StateSignature) -> ExecutionPlan:
156
+ steps = self._build_steps(signature)
157
+ consistency_unit = self._select_consistency_unit(signature)
158
+ write_mode = self._select_write_mode(signature)
159
+
160
+ return ExecutionPlan(
161
+ signature_hash=signature.signature_hash,
162
+ intent_id=signature.intent_id,
163
+ steps=steps,
164
+ consistency_unit=consistency_unit,
165
+ write_mode=write_mode,
166
+ metadata={
167
+ "domain": signature.domain,
168
+ "target_layer": signature.target_layer.value,
169
+ },
170
+ )
171
+
172
+ def _build_steps(self, sig: StateSignature) -> list[ExecutionStep]:
173
+ steps: list[ExecutionStep] = []
174
+ extract_ids: list[str] = []
175
+ post_extract_ids: list[str] = []
176
+
177
+ # Step 1: Extract each dataset (with partition filter if required)
178
+ for _i, ds in enumerate(sig.datasets):
179
+ step_id = f"extract_{ds.name}"
180
+ config: dict[str, Any] = {}
181
+
182
+ if sig.constraints.partition_by:
183
+ config["filter"] = {
184
+ "column": sig.constraints.partition_by[0],
185
+ "predicate": ">=",
186
+ "required_by": "FINOPS",
187
+ }
188
+
189
+ steps.append(ExecutionStep(
190
+ id=step_id,
191
+ step_type=StepType.EXTRACT,
192
+ source=ds.name,
193
+ config=config,
194
+ ))
195
+ extract_ids.append(step_id)
196
+
197
+ # Step 2: Anonymize datasets with PII
198
+ for ds in sig.datasets:
199
+ if ds.contains_pii and sig.constraints.no_pii_raw:
200
+ anon_id = f"anonymize_{ds.name}"
201
+ depends = (f"extract_{ds.name}",)
202
+ steps.append(ExecutionStep(
203
+ id=anon_id,
204
+ step_type=StepType.ANONYMIZE,
205
+ source=ds.name,
206
+ config={
207
+ "pii_columns": list(ds.pii_columns),
208
+ "strategy": "sha256",
209
+ },
210
+ depends_on=depends,
211
+ ))
212
+ post_extract_ids.append(anon_id)
213
+ else:
214
+ post_extract_ids.append(f"extract_{ds.name}")
215
+
216
+ # Step 3: Join if multiple datasets and intent is reconciliation
217
+ if len(sig.datasets) > 1 and "reconcil" in sig.intent:
218
+ join_id = "join_datasets"
219
+ merge_keys = list(sig.datasets[0].merge_keys) if sig.datasets[0].merge_keys else ["id"]
220
+ steps.append(ExecutionStep(
221
+ id=join_id,
222
+ step_type=StepType.JOIN,
223
+ config={
224
+ "type": "broadcast" if self._needs_broadcast(sig) else "sort_merge",
225
+ "datasets": [d.name for d in sig.datasets],
226
+ "merge_keys": merge_keys,
227
+ },
228
+ depends_on=tuple(post_extract_ids),
229
+ ))
230
+ load_depends = (join_id,)
231
+ elif len(post_extract_ids) == 1:
232
+ load_depends = (post_extract_ids[0],)
233
+ else:
234
+ load_depends = tuple(post_extract_ids)
235
+
236
+ # Step 4: Aggregate if intent calls for it
237
+ if "aggregate" in sig.intent:
238
+ agg_id = "aggregate"
239
+ steps.append(ExecutionStep(
240
+ id=agg_id,
241
+ step_type=StepType.AGGREGATE,
242
+ config={"group_by": list(sig.constraints.partition_by)},
243
+ depends_on=load_depends,
244
+ ))
245
+ load_depends = (agg_id,)
246
+
247
+ # Step 5: Load to target
248
+ target_name = self._derive_target_name(sig)
249
+ load_config: dict[str, Any] = {
250
+ "write_mode": self._select_write_mode(sig).value,
251
+ }
252
+ if sig.constraints.merge_key_required:
253
+ load_config["merge_key"] = True
254
+ load_config["merge_keys"] = list(sig.datasets[0].merge_keys) if sig.datasets and sig.datasets[0].merge_keys else ["id"]
255
+ if sig.constraints.partition_by:
256
+ load_config["partition_by"] = list(sig.constraints.partition_by)
257
+
258
+ steps.append(ExecutionStep(
259
+ id="load_target",
260
+ step_type=StepType.LOAD,
261
+ target=target_name,
262
+ config=load_config,
263
+ depends_on=load_depends,
264
+ ))
265
+
266
+ return steps
267
+
268
+ def _needs_broadcast(self, sig: StateSignature) -> bool:
269
+ """Use broadcast join when one dataset is much smaller than the other."""
270
+ if len(sig.datasets) != 2:
271
+ return False
272
+ sizes = sorted(d.size_gb for d in sig.datasets)
273
+ return sizes[0] < 1.0 and sizes[1] > 100.0
274
+
275
+ def _select_consistency_unit(self, sig: StateSignature) -> ConsistencyUnit:
276
+ """Per whitepaper: selection based on execution context."""
277
+ if sig.constraints.partition_by:
278
+ return ConsistencyUnit.PARTITION
279
+ if len(sig.datasets) > 2:
280
+ return ConsistencyUnit.DAG_BRANCH
281
+ return ConsistencyUnit.DATASET
282
+
283
+ def _select_write_mode(self, sig: StateSignature) -> WriteMode:
284
+ """Per whitepaper: append only in Bronze, merge in Silver/Gold."""
285
+ if sig.target_layer == TargetLayer.BRONZE:
286
+ return WriteMode.OVERWRITE_PARTITION if sig.constraints.partition_by else WriteMode.APPEND
287
+ return WriteMode.MERGE
288
+
289
+ def _derive_target_name(self, sig: StateSignature) -> str:
290
+ return sig.target_dataset_name
@@ -0,0 +1,12 @@
1
+ """CFA Execution — partial execution and state projection."""
2
+ from cfa._lazy import LazyLoader
3
+
4
+ __getattr__ = LazyLoader({
5
+ "PartialExecutionManager": ("cfa.execution.partial", "PartialExecutionManager"),
6
+ "PartialExecutionState": ("cfa.execution.partial", "PartialExecutionState"),
7
+ "PublishState": ("cfa.execution.partial", "PublishState"),
8
+ "FailurePolicy": ("cfa.execution.partial", "FailurePolicy"),
9
+ "RetryPolicy": ("cfa.execution.partial", "RetryPolicy"),
10
+ "StateProjectionProtocol": ("cfa.execution.state_projection", "StateProjectionProtocol"),
11
+ "ProjectionResult": ("cfa.execution.state_projection", "ProjectionResult"),
12
+ })
@@ -0,0 +1,339 @@
1
+ """
2
+ CFA Partial Execution State
3
+ ============================
4
+ Manages partial failures, retry policies, and publish semantics.
5
+
6
+ When a plan partially fails, CFA does NOT silently succeed or blindly fail.
7
+ Instead, it applies a FailurePolicy to determine next action:
8
+ - FULL_ROLLBACK: discard everything, mark as rolled_back
9
+ - SELECTIVE_QUARANTINE: quarantine failed consistency units, commit the rest
10
+ - PARTIAL_COMMIT_NO_PUBLISH: commit all succeeded, but do not publish
11
+ - DEGRADED_PUBLISH: commit and publish with degradation flag
12
+
13
+ Retry policy: max 3 attempts, failed consistency units only.
14
+ Publish semantics: committed_not_published -> published | degraded.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass, field
20
+ from enum import StrEnum
21
+ from typing import Any
22
+
23
+ from cfa.core.codegen import GeneratedCode
24
+ from cfa.core.planner import ExecutionPlan
25
+ from cfa.sandbox import (
26
+ ExecutionMetrics,
27
+ SandboxOutcome,
28
+ SandboxResult,
29
+ StepOutcome,
30
+ )
31
+ from cfa.sandbox.executor import SandboxExecutor
32
+ from cfa.types import (
33
+ Fault,
34
+ FaultFamily,
35
+ FaultSeverity,
36
+ PolicyAction,
37
+ StateSignature,
38
+ )
39
+ from cfa.validation.runtime import RuntimeValidationResult, RuntimeValidator
40
+
41
+ # ── Enums ───────────────────────────────────────────────────────────────────
42
+
43
+
44
+ class FailurePolicy(StrEnum):
45
+ FULL_ROLLBACK = "full_rollback"
46
+ SELECTIVE_QUARANTINE = "selective_quarantine"
47
+ PARTIAL_COMMIT_NO_PUBLISH = "partial_commit_no_publish"
48
+ DEGRADED_PUBLISH = "degraded_publish"
49
+
50
+
51
+ class PublishState(StrEnum):
52
+ NOT_STARTED = "not_started"
53
+ COMMITTED_NOT_PUBLISHED = "committed_not_published"
54
+ PUBLISHED = "published"
55
+ DEGRADED = "degraded"
56
+ ROLLED_BACK = "rolled_back"
57
+ QUARANTINED = "quarantined"
58
+
59
+
60
+ # ── Retry Policy ────────────────────────────────────────────────────────────
61
+
62
+
63
+ @dataclass(frozen=True)
64
+ class RetryPolicy:
65
+ """Controls retry behavior for failed steps."""
66
+
67
+ max_attempts: int = 3
68
+ retry_failed_only: bool = True # only retry failed consistency units
69
+
70
+
71
+ # ── Execution State ─────────────────────────────────────────────────────────
72
+
73
+
74
+ @dataclass
75
+ class PartialExecutionState:
76
+ """
77
+ Tracks the state of a partially executed plan.
78
+ Supports retry, quarantine, and publish semantics.
79
+ """
80
+
81
+ plan_signature_hash: str
82
+ publish_state: PublishState = PublishState.NOT_STARTED
83
+ sandbox_result: SandboxResult | None = None
84
+ runtime_validation: RuntimeValidationResult | None = None
85
+ retry_count: int = 0
86
+ quarantined_steps: list[str] = field(default_factory=list)
87
+ committed_steps: list[str] = field(default_factory=list)
88
+ faults: list[Fault] = field(default_factory=list)
89
+ failure_policy_applied: FailurePolicy | None = None
90
+
91
+ @property
92
+ def is_fully_committed(self) -> bool:
93
+ return (
94
+ self.sandbox_result is not None
95
+ and self.sandbox_result.all_succeeded
96
+ and self.publish_state in (PublishState.COMMITTED_NOT_PUBLISHED, PublishState.PUBLISHED)
97
+ )
98
+
99
+ @property
100
+ def has_quarantined(self) -> bool:
101
+ return len(self.quarantined_steps) > 0
102
+
103
+
104
+ # ── Partial Execution Manager ───────────────────────────────────────────────
105
+
106
+
107
+ class PartialExecutionManager:
108
+ """
109
+ Orchestrates sandbox execution with failure policy, retry, and publish semantics.
110
+
111
+ Flow:
112
+ 1. Execute plan in sandbox
113
+ 2. Validate runtime metrics
114
+ 3. On partial failure, apply failure policy
115
+ 4. Retry if policy allows
116
+ 5. Determine publish state
117
+ """
118
+
119
+ def __init__(
120
+ self,
121
+ sandbox: SandboxExecutor,
122
+ runtime_validator: RuntimeValidator | None = None,
123
+ failure_policy: FailurePolicy = FailurePolicy.SELECTIVE_QUARANTINE,
124
+ retry_policy: RetryPolicy | None = None,
125
+ ) -> None:
126
+ self.sandbox = sandbox
127
+ self.runtime_validator = runtime_validator or RuntimeValidator()
128
+ self.failure_policy = failure_policy
129
+ self.retry_policy = retry_policy or RetryPolicy()
130
+
131
+ def execute(
132
+ self,
133
+ plan: ExecutionPlan,
134
+ code: GeneratedCode,
135
+ signature: StateSignature,
136
+ schema_contract: dict[str, Any] | None = None,
137
+ ) -> PartialExecutionState:
138
+ state = PartialExecutionState(plan_signature_hash=plan.signature_hash)
139
+
140
+ # ── Execute in sandbox ──────────────────────────────────────────
141
+ sandbox_result = self.sandbox.execute(plan, code, signature)
142
+ state.sandbox_result = sandbox_result
143
+ state.faults.extend(sandbox_result.faults)
144
+
145
+ # ── Handle panic (environmental fault) ──────────────────────────
146
+ if sandbox_result.outcome == SandboxOutcome.PANIC:
147
+ state.publish_state = PublishState.ROLLED_BACK
148
+ state.failure_policy_applied = FailurePolicy.FULL_ROLLBACK
149
+ return state
150
+
151
+ # ── Runtime validation ──────────────────────────────────────────
152
+ rv_result = self.runtime_validator.validate(sandbox_result, signature, schema_contract)
153
+ state.runtime_validation = rv_result
154
+ state.faults.extend(rv_result.faults)
155
+
156
+ # ── All succeeded + validation passed ───────────────────────────
157
+ if sandbox_result.outcome == SandboxOutcome.COMPLETED and rv_result.passed:
158
+ state.publish_state = PublishState.PUBLISHED
159
+ state.committed_steps = [r.step_id for r in sandbox_result.step_results]
160
+ return state
161
+
162
+ # ── Runtime validation failed on complete execution ─────────────
163
+ if sandbox_result.outcome == SandboxOutcome.COMPLETED and not rv_result.passed:
164
+ return self._apply_failure_policy_for_validation(state)
165
+
166
+ # ── Partial failure: some steps failed ──────────────────────────
167
+ if sandbox_result.outcome in (SandboxOutcome.PARTIAL, SandboxOutcome.FAILED):
168
+ retried = self._retry_failed_steps(state, plan, code, signature, schema_contract)
169
+ if retried is not None:
170
+ return retried
171
+ return self._apply_failure_policy(state)
172
+
173
+ return state
174
+
175
+ def _retry_failed_steps(
176
+ self,
177
+ state: PartialExecutionState,
178
+ plan: ExecutionPlan,
179
+ code: GeneratedCode,
180
+ signature: StateSignature,
181
+ schema_contract: dict[str, Any] | None,
182
+ ) -> PartialExecutionState | None:
183
+ """Retry failed consistency units before terminal policy application."""
184
+ assert state.sandbox_result is not None
185
+ failed_step_ids = [r.step_id for r in state.sandbox_result.failed_steps]
186
+ if not failed_step_ids or self.retry_policy.max_attempts <= 1:
187
+ return None
188
+
189
+ latest_result = state.sandbox_result
190
+ retryable_ids = list(failed_step_ids)
191
+
192
+ for attempt in range(1, self.retry_policy.max_attempts):
193
+ retry_result = self.sandbox.execute(plan, code, signature, step_ids=retryable_ids)
194
+ state.retry_count = attempt
195
+ state.faults.extend(retry_result.faults)
196
+
197
+ latest_result = self._merge_retry_result(latest_result, retry_result)
198
+ remaining_failed = [r.step_id for r in latest_result.failed_steps]
199
+
200
+ if not remaining_failed:
201
+ state.sandbox_result = latest_result
202
+ rv_result = self.runtime_validator.validate(latest_result, signature, schema_contract)
203
+ state.runtime_validation = rv_result
204
+ state.faults.extend(rv_result.faults)
205
+ if rv_result.passed:
206
+ state.publish_state = PublishState.PUBLISHED
207
+ state.committed_steps = [r.step_id for r in latest_result.step_results]
208
+ state.quarantined_steps = []
209
+ return state
210
+ return self._apply_failure_policy_for_validation(state)
211
+
212
+ if not self.retry_policy.retry_failed_only:
213
+ retryable_ids = [s.id for s in plan.execution_order()]
214
+ else:
215
+ retryable_ids = remaining_failed
216
+
217
+ state.sandbox_result = latest_result
218
+ return None
219
+
220
+ def _merge_retry_result(
221
+ self,
222
+ base: SandboxResult,
223
+ retry: SandboxResult,
224
+ ) -> SandboxResult:
225
+ """Merge retry outcomes over the original attempt, keeping latest result per retried step."""
226
+ replacement = {r.step_id: r for r in retry.step_results}
227
+ merged_steps = [replacement.get(step.step_id, step) for step in base.step_results]
228
+
229
+ aggregate = ExecutionMetrics()
230
+ all_faults: list[Fault] = []
231
+ for step in merged_steps:
232
+ if step.faults:
233
+ all_faults.extend(step.faults)
234
+ if step.outcome == StepOutcome.SUCCESS:
235
+ aggregate.rows_output = step.metrics.rows_output
236
+ aggregate.shuffle_bytes += step.metrics.shuffle_bytes
237
+ aggregate.duration_seconds += step.metrics.duration_seconds
238
+ aggregate.cost_dbu += step.metrics.cost_dbu
239
+ aggregate.output_schema = step.metrics.output_schema
240
+ for col, cnt in step.metrics.null_counts.items():
241
+ aggregate.null_counts[col] = aggregate.null_counts.get(col, 0) + cnt
242
+
243
+ failed = [r for r in merged_steps if r.outcome == StepOutcome.FAILED]
244
+ if not failed:
245
+ outcome = SandboxOutcome.COMPLETED
246
+ elif len(failed) < len(merged_steps):
247
+ outcome = SandboxOutcome.PARTIAL
248
+ else:
249
+ outcome = SandboxOutcome.FAILED
250
+
251
+ return SandboxResult(
252
+ outcome=outcome,
253
+ step_results=merged_steps,
254
+ aggregate_metrics=aggregate,
255
+ faults=all_faults,
256
+ panic_reason=retry.panic_reason or base.panic_reason,
257
+ )
258
+
259
+ def _apply_failure_policy(self, state: PartialExecutionState) -> PartialExecutionState:
260
+ """Apply failure policy when sandbox has partial/full failure."""
261
+ assert state.sandbox_result is not None
262
+
263
+ state.failure_policy_applied = self.failure_policy
264
+ succeeded = state.sandbox_result.successful_steps
265
+ failed = state.sandbox_result.failed_steps
266
+
267
+ match self.failure_policy:
268
+ case FailurePolicy.FULL_ROLLBACK:
269
+ state.publish_state = PublishState.ROLLED_BACK
270
+ state.quarantined_steps = [r.step_id for r in state.sandbox_result.step_results]
271
+
272
+ case FailurePolicy.SELECTIVE_QUARANTINE:
273
+ state.committed_steps = [r.step_id for r in succeeded]
274
+ state.quarantined_steps = [r.step_id for r in failed]
275
+ if succeeded:
276
+ state.publish_state = PublishState.QUARANTINED
277
+ else:
278
+ state.publish_state = PublishState.ROLLED_BACK
279
+
280
+ case FailurePolicy.PARTIAL_COMMIT_NO_PUBLISH:
281
+ state.committed_steps = [r.step_id for r in succeeded]
282
+ state.quarantined_steps = [r.step_id for r in failed]
283
+ state.publish_state = PublishState.COMMITTED_NOT_PUBLISHED
284
+
285
+ case FailurePolicy.DEGRADED_PUBLISH:
286
+ state.committed_steps = [r.step_id for r in succeeded]
287
+ state.quarantined_steps = [r.step_id for r in failed]
288
+ if succeeded:
289
+ state.publish_state = PublishState.DEGRADED
290
+ state.faults.append(Fault(
291
+ code="PARTIAL_DEGRADED_PUBLISH",
292
+ family=FaultFamily.RUNTIME,
293
+ severity=FaultSeverity.WARNING,
294
+ stage="partial_execution",
295
+ message=(
296
+ f"Degraded publish: {len(failed)} of "
297
+ f"{len(state.sandbox_result.step_results)} steps failed."
298
+ ),
299
+ mandatory_action=PolicyAction.APPROVE,
300
+ detected_before_execution=False,
301
+ ))
302
+ else:
303
+ state.publish_state = PublishState.ROLLED_BACK
304
+
305
+ return state
306
+
307
+ def _apply_failure_policy_for_validation(
308
+ self, state: PartialExecutionState
309
+ ) -> PartialExecutionState:
310
+ """Apply failure policy when runtime validation fails on complete execution."""
311
+ state.failure_policy_applied = self.failure_policy
312
+
313
+ match self.failure_policy:
314
+ case FailurePolicy.FULL_ROLLBACK:
315
+ state.publish_state = PublishState.ROLLED_BACK
316
+
317
+ case FailurePolicy.SELECTIVE_QUARANTINE:
318
+ # All steps succeeded but validation failed — quarantine the whole batch
319
+ state.publish_state = PublishState.QUARANTINED
320
+ if state.sandbox_result:
321
+ state.quarantined_steps = [
322
+ r.step_id for r in state.sandbox_result.step_results
323
+ ]
324
+
325
+ case FailurePolicy.PARTIAL_COMMIT_NO_PUBLISH:
326
+ state.publish_state = PublishState.COMMITTED_NOT_PUBLISHED
327
+ if state.sandbox_result:
328
+ state.committed_steps = [
329
+ r.step_id for r in state.sandbox_result.step_results
330
+ ]
331
+
332
+ case FailurePolicy.DEGRADED_PUBLISH:
333
+ state.publish_state = PublishState.DEGRADED
334
+ if state.sandbox_result:
335
+ state.committed_steps = [
336
+ r.step_id for r in state.sandbox_result.step_results
337
+ ]
338
+
339
+ return state