cfa-kernel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfa/__init__.py +39 -0
- cfa/_lazy.py +39 -0
- cfa/adapters/__init__.py +104 -0
- cfa/adapters/autogen.py +19 -0
- cfa/adapters/crewai.py +19 -0
- cfa/adapters/dspy.py +19 -0
- cfa/adapters/langgraph.py +19 -0
- cfa/adapters/openai_agents.py +19 -0
- cfa/audit/__init__.py +15 -0
- cfa/audit/context.py +205 -0
- cfa/audit/hashing.py +41 -0
- cfa/audit/trail.py +194 -0
- cfa/backends/__init__.py +132 -0
- cfa/backends/dbt.py +338 -0
- cfa/backends/pyspark.py +240 -0
- cfa/backends/sql.py +270 -0
- cfa/behavior/__init__.py +49 -0
- cfa/behavior/llm.py +244 -0
- cfa/behavior/spec.py +235 -0
- cfa/behavior/systematizer.py +222 -0
- cfa/cli/__init__.py +296 -0
- cfa/cli/__main__.py +6 -0
- cfa/cli/_helpers.py +109 -0
- cfa/cli/core/__init__.py +0 -0
- cfa/cli/core/evaluate.py +72 -0
- cfa/cli/core/validate.py +29 -0
- cfa/cli/formatters.py +280 -0
- cfa/cli/governance/__init__.py +0 -0
- cfa/cli/governance/audit.py +65 -0
- cfa/cli/governance/catalog.py +28 -0
- cfa/cli/governance/policy.py +119 -0
- cfa/cli/governance/rules.py +42 -0
- cfa/cli/governance/signature.py +31 -0
- cfa/cli/infrastructure/__init__.py +0 -0
- cfa/cli/infrastructure/backend_list.py +24 -0
- cfa/cli/infrastructure/storage.py +87 -0
- cfa/cli/project/__init__.py +0 -0
- cfa/cli/project/init.py +73 -0
- cfa/cli/project/lifecycle.py +92 -0
- cfa/cli/project/status.py +75 -0
- cfa/cli/project/taxonomy.py +38 -0
- cfa/cli/reporting/__init__.py +0 -0
- cfa/cli/reporting/report.py +109 -0
- cfa/cli/reporting/serve.py +43 -0
- cfa/config.py +103 -0
- cfa/core/__init__.py +19 -0
- cfa/core/codegen.py +65 -0
- cfa/core/conditions.py +129 -0
- cfa/core/kernel.py +224 -0
- cfa/core/phases/__init__.py +0 -0
- cfa/core/phases/runner.py +477 -0
- cfa/core/planner.py +290 -0
- cfa/execution/__init__.py +12 -0
- cfa/execution/partial.py +339 -0
- cfa/execution/state_projection.py +216 -0
- cfa/governance/__init__.py +76 -0
- cfa/lifecycle/__init__.py +51 -0
- cfa/mcp/__init__.py +347 -0
- cfa/mcp/__main__.py +4 -0
- cfa/normalizer/__init__.py +15 -0
- cfa/normalizer/base.py +441 -0
- cfa/normalizer/llm.py +426 -0
- cfa/observability/__init__.py +14 -0
- cfa/observability/indices.py +177 -0
- cfa/observability/metrics.py +91 -0
- cfa/observability/notify.py +79 -0
- cfa/observability/otel.py +81 -0
- cfa/observability/promotion.py +367 -0
- cfa/policy/__init__.py +12 -0
- cfa/policy/bundle.py +317 -0
- cfa/policy/catalog.py +117 -0
- cfa/policy/engine.py +306 -0
- cfa/reporting/__init__.py +42 -0
- cfa/reporting/charts.py +223 -0
- cfa/reporting/engine.py +456 -0
- cfa/resolution/__init__.py +62 -0
- cfa/runtime/__init__.py +13 -0
- cfa/runtime/gate.py +287 -0
- cfa/sandbox/__init__.py +189 -0
- cfa/sandbox/executor.py +92 -0
- cfa/sandbox/mock.py +89 -0
- cfa/sandbox/panic.py +52 -0
- cfa/storage/__init__.py +591 -0
- cfa/testing/__init__.py +60 -0
- cfa/testing/asserts.py +77 -0
- cfa/testing/evaluate.py +168 -0
- cfa/testing/fixtures.py +89 -0
- cfa/testing/markers.py +36 -0
- cfa/types.py +489 -0
- cfa/validation/__init__.py +14 -0
- cfa/validation/runtime.py +285 -0
- cfa/validation/signature.py +146 -0
- cfa/validation/static.py +252 -0
- cfa_kernel-0.1.0.dist-info/METADATA +32 -0
- cfa_kernel-0.1.0.dist-info/RECORD +98 -0
- cfa_kernel-0.1.0.dist-info/WHEEL +4 -0
- cfa_kernel-0.1.0.dist-info/entry_points.txt +3 -0
- cfa_kernel-0.1.0.dist-info/licenses/LICENSE +21 -0
cfa/core/planner.py
ADDED
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFA Execution Planner
|
|
3
|
+
=====================
|
|
4
|
+
Generates a governed execution DAG from an approved State Signature.
|
|
5
|
+
|
|
6
|
+
The Planner is NOT free — it fills templates, follows the plan approved
|
|
7
|
+
by the Policy Engine, and respects all constraints declared in the Signature.
|
|
8
|
+
|
|
9
|
+
Key properties:
|
|
10
|
+
- Every plan is idempotent (merge with deterministic key, partition overwrite)
|
|
11
|
+
- Supports Composite Intent decomposition
|
|
12
|
+
- Consistency unit selection follows whitepaper enum (partition | dataset | dag_branch | time_window)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from enum import StrEnum
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
from cfa.types import StateSignature, TargetLayer
|
|
22
|
+
|
|
23
|
+
# ── Consistency Unit ─────────────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ConsistencyUnit(StrEnum):
|
|
27
|
+
PARTITION = "partition"
|
|
28
|
+
DATASET = "dataset"
|
|
29
|
+
DAG_BRANCH = "dag_branch"
|
|
30
|
+
TIME_WINDOW = "time_window"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ── Execution Steps ──────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class StepType(StrEnum):
|
|
37
|
+
EXTRACT = "extract"
|
|
38
|
+
ANONYMIZE = "anonymize"
|
|
39
|
+
JOIN = "join"
|
|
40
|
+
TRANSFORM = "transform"
|
|
41
|
+
LOAD = "load"
|
|
42
|
+
FILTER = "filter"
|
|
43
|
+
AGGREGATE = "aggregate"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass(frozen=True)
|
|
47
|
+
class ExecutionStep:
|
|
48
|
+
"""Single node in the execution DAG."""
|
|
49
|
+
|
|
50
|
+
id: str
|
|
51
|
+
step_type: StepType
|
|
52
|
+
source: str | None = None
|
|
53
|
+
target: str | None = None
|
|
54
|
+
config: dict[str, Any] = field(default_factory=dict)
|
|
55
|
+
depends_on: tuple[str, ...] = ()
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def description(self) -> str:
|
|
59
|
+
parts = [f"{self.step_type.value}"]
|
|
60
|
+
if self.source:
|
|
61
|
+
parts.append(f"source={self.source}")
|
|
62
|
+
if self.target:
|
|
63
|
+
parts.append(f"target={self.target}")
|
|
64
|
+
return " | ".join(parts)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ── Execution Plan ───────────────────────────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class WriteMode(StrEnum):
|
|
71
|
+
MERGE = "merge"
|
|
72
|
+
OVERWRITE_PARTITION = "overwrite_partition"
|
|
73
|
+
APPEND = "append" # only allowed in Bronze
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass
|
|
77
|
+
class ExecutionPlan:
|
|
78
|
+
"""
|
|
79
|
+
Governed execution DAG generated from an approved Signature.
|
|
80
|
+
Immutable once finalized — any change requires a new plan.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
signature_hash: str
|
|
84
|
+
intent_id: str
|
|
85
|
+
steps: list[ExecutionStep]
|
|
86
|
+
consistency_unit: ConsistencyUnit
|
|
87
|
+
write_mode: WriteMode
|
|
88
|
+
idempotent: bool = True
|
|
89
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def step_ids(self) -> list[str]:
|
|
93
|
+
return [s.id for s in self.steps]
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def step_count(self) -> int:
|
|
97
|
+
return len(self.steps)
|
|
98
|
+
|
|
99
|
+
def get_step(self, step_id: str) -> ExecutionStep | None:
|
|
100
|
+
return next((s for s in self.steps if s.id == step_id), None)
|
|
101
|
+
|
|
102
|
+
def execution_order(self) -> list[ExecutionStep]:
|
|
103
|
+
"""Topological sort of steps respecting depends_on."""
|
|
104
|
+
resolved: list[ExecutionStep] = []
|
|
105
|
+
resolved_ids: set[str] = set()
|
|
106
|
+
pending = list(self.steps)
|
|
107
|
+
|
|
108
|
+
max_iterations = len(pending) * len(pending)
|
|
109
|
+
iteration = 0
|
|
110
|
+
while pending:
|
|
111
|
+
iteration += 1
|
|
112
|
+
if iteration > max_iterations:
|
|
113
|
+
raise ValueError("Cyclic dependency detected in execution plan")
|
|
114
|
+
|
|
115
|
+
for step in list(pending):
|
|
116
|
+
if all(dep in resolved_ids for dep in step.depends_on):
|
|
117
|
+
resolved.append(step)
|
|
118
|
+
resolved_ids.add(step.id)
|
|
119
|
+
pending.remove(step)
|
|
120
|
+
|
|
121
|
+
return resolved
|
|
122
|
+
|
|
123
|
+
def to_dict(self) -> dict[str, Any]:
|
|
124
|
+
return {
|
|
125
|
+
"signature_hash": self.signature_hash,
|
|
126
|
+
"intent_id": self.intent_id,
|
|
127
|
+
"consistency_unit": self.consistency_unit.value,
|
|
128
|
+
"write_mode": self.write_mode.value,
|
|
129
|
+
"idempotent": self.idempotent,
|
|
130
|
+
"steps": [
|
|
131
|
+
{
|
|
132
|
+
"id": s.id,
|
|
133
|
+
"type": s.step_type.value,
|
|
134
|
+
"source": s.source,
|
|
135
|
+
"target": s.target,
|
|
136
|
+
"config": s.config,
|
|
137
|
+
"depends_on": list(s.depends_on),
|
|
138
|
+
}
|
|
139
|
+
for s in self.steps
|
|
140
|
+
],
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# ── Execution Planner ────────────────────────────────────────────────────────
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class ExecutionPlanner:
|
|
148
|
+
"""
|
|
149
|
+
Generates an ExecutionPlan from an approved StateSignature.
|
|
150
|
+
|
|
151
|
+
The planner does NOT generate arbitrary code — it assembles governed steps
|
|
152
|
+
based on the Signature's intent, datasets, constraints and target layer.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def plan(self, signature: StateSignature) -> ExecutionPlan:
|
|
156
|
+
steps = self._build_steps(signature)
|
|
157
|
+
consistency_unit = self._select_consistency_unit(signature)
|
|
158
|
+
write_mode = self._select_write_mode(signature)
|
|
159
|
+
|
|
160
|
+
return ExecutionPlan(
|
|
161
|
+
signature_hash=signature.signature_hash,
|
|
162
|
+
intent_id=signature.intent_id,
|
|
163
|
+
steps=steps,
|
|
164
|
+
consistency_unit=consistency_unit,
|
|
165
|
+
write_mode=write_mode,
|
|
166
|
+
metadata={
|
|
167
|
+
"domain": signature.domain,
|
|
168
|
+
"target_layer": signature.target_layer.value,
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
def _build_steps(self, sig: StateSignature) -> list[ExecutionStep]:
|
|
173
|
+
steps: list[ExecutionStep] = []
|
|
174
|
+
extract_ids: list[str] = []
|
|
175
|
+
post_extract_ids: list[str] = []
|
|
176
|
+
|
|
177
|
+
# Step 1: Extract each dataset (with partition filter if required)
|
|
178
|
+
for _i, ds in enumerate(sig.datasets):
|
|
179
|
+
step_id = f"extract_{ds.name}"
|
|
180
|
+
config: dict[str, Any] = {}
|
|
181
|
+
|
|
182
|
+
if sig.constraints.partition_by:
|
|
183
|
+
config["filter"] = {
|
|
184
|
+
"column": sig.constraints.partition_by[0],
|
|
185
|
+
"predicate": ">=",
|
|
186
|
+
"required_by": "FINOPS",
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
steps.append(ExecutionStep(
|
|
190
|
+
id=step_id,
|
|
191
|
+
step_type=StepType.EXTRACT,
|
|
192
|
+
source=ds.name,
|
|
193
|
+
config=config,
|
|
194
|
+
))
|
|
195
|
+
extract_ids.append(step_id)
|
|
196
|
+
|
|
197
|
+
# Step 2: Anonymize datasets with PII
|
|
198
|
+
for ds in sig.datasets:
|
|
199
|
+
if ds.contains_pii and sig.constraints.no_pii_raw:
|
|
200
|
+
anon_id = f"anonymize_{ds.name}"
|
|
201
|
+
depends = (f"extract_{ds.name}",)
|
|
202
|
+
steps.append(ExecutionStep(
|
|
203
|
+
id=anon_id,
|
|
204
|
+
step_type=StepType.ANONYMIZE,
|
|
205
|
+
source=ds.name,
|
|
206
|
+
config={
|
|
207
|
+
"pii_columns": list(ds.pii_columns),
|
|
208
|
+
"strategy": "sha256",
|
|
209
|
+
},
|
|
210
|
+
depends_on=depends,
|
|
211
|
+
))
|
|
212
|
+
post_extract_ids.append(anon_id)
|
|
213
|
+
else:
|
|
214
|
+
post_extract_ids.append(f"extract_{ds.name}")
|
|
215
|
+
|
|
216
|
+
# Step 3: Join if multiple datasets and intent is reconciliation
|
|
217
|
+
if len(sig.datasets) > 1 and "reconcil" in sig.intent:
|
|
218
|
+
join_id = "join_datasets"
|
|
219
|
+
merge_keys = list(sig.datasets[0].merge_keys) if sig.datasets[0].merge_keys else ["id"]
|
|
220
|
+
steps.append(ExecutionStep(
|
|
221
|
+
id=join_id,
|
|
222
|
+
step_type=StepType.JOIN,
|
|
223
|
+
config={
|
|
224
|
+
"type": "broadcast" if self._needs_broadcast(sig) else "sort_merge",
|
|
225
|
+
"datasets": [d.name for d in sig.datasets],
|
|
226
|
+
"merge_keys": merge_keys,
|
|
227
|
+
},
|
|
228
|
+
depends_on=tuple(post_extract_ids),
|
|
229
|
+
))
|
|
230
|
+
load_depends = (join_id,)
|
|
231
|
+
elif len(post_extract_ids) == 1:
|
|
232
|
+
load_depends = (post_extract_ids[0],)
|
|
233
|
+
else:
|
|
234
|
+
load_depends = tuple(post_extract_ids)
|
|
235
|
+
|
|
236
|
+
# Step 4: Aggregate if intent calls for it
|
|
237
|
+
if "aggregate" in sig.intent:
|
|
238
|
+
agg_id = "aggregate"
|
|
239
|
+
steps.append(ExecutionStep(
|
|
240
|
+
id=agg_id,
|
|
241
|
+
step_type=StepType.AGGREGATE,
|
|
242
|
+
config={"group_by": list(sig.constraints.partition_by)},
|
|
243
|
+
depends_on=load_depends,
|
|
244
|
+
))
|
|
245
|
+
load_depends = (agg_id,)
|
|
246
|
+
|
|
247
|
+
# Step 5: Load to target
|
|
248
|
+
target_name = self._derive_target_name(sig)
|
|
249
|
+
load_config: dict[str, Any] = {
|
|
250
|
+
"write_mode": self._select_write_mode(sig).value,
|
|
251
|
+
}
|
|
252
|
+
if sig.constraints.merge_key_required:
|
|
253
|
+
load_config["merge_key"] = True
|
|
254
|
+
load_config["merge_keys"] = list(sig.datasets[0].merge_keys) if sig.datasets and sig.datasets[0].merge_keys else ["id"]
|
|
255
|
+
if sig.constraints.partition_by:
|
|
256
|
+
load_config["partition_by"] = list(sig.constraints.partition_by)
|
|
257
|
+
|
|
258
|
+
steps.append(ExecutionStep(
|
|
259
|
+
id="load_target",
|
|
260
|
+
step_type=StepType.LOAD,
|
|
261
|
+
target=target_name,
|
|
262
|
+
config=load_config,
|
|
263
|
+
depends_on=load_depends,
|
|
264
|
+
))
|
|
265
|
+
|
|
266
|
+
return steps
|
|
267
|
+
|
|
268
|
+
def _needs_broadcast(self, sig: StateSignature) -> bool:
|
|
269
|
+
"""Use broadcast join when one dataset is much smaller than the other."""
|
|
270
|
+
if len(sig.datasets) != 2:
|
|
271
|
+
return False
|
|
272
|
+
sizes = sorted(d.size_gb for d in sig.datasets)
|
|
273
|
+
return sizes[0] < 1.0 and sizes[1] > 100.0
|
|
274
|
+
|
|
275
|
+
def _select_consistency_unit(self, sig: StateSignature) -> ConsistencyUnit:
|
|
276
|
+
"""Per whitepaper: selection based on execution context."""
|
|
277
|
+
if sig.constraints.partition_by:
|
|
278
|
+
return ConsistencyUnit.PARTITION
|
|
279
|
+
if len(sig.datasets) > 2:
|
|
280
|
+
return ConsistencyUnit.DAG_BRANCH
|
|
281
|
+
return ConsistencyUnit.DATASET
|
|
282
|
+
|
|
283
|
+
def _select_write_mode(self, sig: StateSignature) -> WriteMode:
|
|
284
|
+
"""Per whitepaper: append only in Bronze, merge in Silver/Gold."""
|
|
285
|
+
if sig.target_layer == TargetLayer.BRONZE:
|
|
286
|
+
return WriteMode.OVERWRITE_PARTITION if sig.constraints.partition_by else WriteMode.APPEND
|
|
287
|
+
return WriteMode.MERGE
|
|
288
|
+
|
|
289
|
+
def _derive_target_name(self, sig: StateSignature) -> str:
|
|
290
|
+
return sig.target_dataset_name
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""CFA Execution — partial execution and state projection."""
|
|
2
|
+
from cfa._lazy import LazyLoader
|
|
3
|
+
|
|
4
|
+
__getattr__ = LazyLoader({
|
|
5
|
+
"PartialExecutionManager": ("cfa.execution.partial", "PartialExecutionManager"),
|
|
6
|
+
"PartialExecutionState": ("cfa.execution.partial", "PartialExecutionState"),
|
|
7
|
+
"PublishState": ("cfa.execution.partial", "PublishState"),
|
|
8
|
+
"FailurePolicy": ("cfa.execution.partial", "FailurePolicy"),
|
|
9
|
+
"RetryPolicy": ("cfa.execution.partial", "RetryPolicy"),
|
|
10
|
+
"StateProjectionProtocol": ("cfa.execution.state_projection", "StateProjectionProtocol"),
|
|
11
|
+
"ProjectionResult": ("cfa.execution.state_projection", "ProjectionResult"),
|
|
12
|
+
})
|
cfa/execution/partial.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFA Partial Execution State
|
|
3
|
+
============================
|
|
4
|
+
Manages partial failures, retry policies, and publish semantics.
|
|
5
|
+
|
|
6
|
+
When a plan partially fails, CFA does NOT silently succeed or blindly fail.
|
|
7
|
+
Instead, it applies a FailurePolicy to determine next action:
|
|
8
|
+
- FULL_ROLLBACK: discard everything, mark as rolled_back
|
|
9
|
+
- SELECTIVE_QUARANTINE: quarantine failed consistency units, commit the rest
|
|
10
|
+
- PARTIAL_COMMIT_NO_PUBLISH: commit all succeeded, but do not publish
|
|
11
|
+
- DEGRADED_PUBLISH: commit and publish with degradation flag
|
|
12
|
+
|
|
13
|
+
Retry policy: max 3 attempts, failed consistency units only.
|
|
14
|
+
Publish semantics: committed_not_published -> published | degraded.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from enum import StrEnum
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from cfa.core.codegen import GeneratedCode
|
|
24
|
+
from cfa.core.planner import ExecutionPlan
|
|
25
|
+
from cfa.sandbox import (
|
|
26
|
+
ExecutionMetrics,
|
|
27
|
+
SandboxOutcome,
|
|
28
|
+
SandboxResult,
|
|
29
|
+
StepOutcome,
|
|
30
|
+
)
|
|
31
|
+
from cfa.sandbox.executor import SandboxExecutor
|
|
32
|
+
from cfa.types import (
|
|
33
|
+
Fault,
|
|
34
|
+
FaultFamily,
|
|
35
|
+
FaultSeverity,
|
|
36
|
+
PolicyAction,
|
|
37
|
+
StateSignature,
|
|
38
|
+
)
|
|
39
|
+
from cfa.validation.runtime import RuntimeValidationResult, RuntimeValidator
|
|
40
|
+
|
|
41
|
+
# ── Enums ───────────────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class FailurePolicy(StrEnum):
|
|
45
|
+
FULL_ROLLBACK = "full_rollback"
|
|
46
|
+
SELECTIVE_QUARANTINE = "selective_quarantine"
|
|
47
|
+
PARTIAL_COMMIT_NO_PUBLISH = "partial_commit_no_publish"
|
|
48
|
+
DEGRADED_PUBLISH = "degraded_publish"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PublishState(StrEnum):
|
|
52
|
+
NOT_STARTED = "not_started"
|
|
53
|
+
COMMITTED_NOT_PUBLISHED = "committed_not_published"
|
|
54
|
+
PUBLISHED = "published"
|
|
55
|
+
DEGRADED = "degraded"
|
|
56
|
+
ROLLED_BACK = "rolled_back"
|
|
57
|
+
QUARANTINED = "quarantined"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# ── Retry Policy ────────────────────────────────────────────────────────────
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass(frozen=True)
|
|
64
|
+
class RetryPolicy:
|
|
65
|
+
"""Controls retry behavior for failed steps."""
|
|
66
|
+
|
|
67
|
+
max_attempts: int = 3
|
|
68
|
+
retry_failed_only: bool = True # only retry failed consistency units
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ── Execution State ─────────────────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class PartialExecutionState:
|
|
76
|
+
"""
|
|
77
|
+
Tracks the state of a partially executed plan.
|
|
78
|
+
Supports retry, quarantine, and publish semantics.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
plan_signature_hash: str
|
|
82
|
+
publish_state: PublishState = PublishState.NOT_STARTED
|
|
83
|
+
sandbox_result: SandboxResult | None = None
|
|
84
|
+
runtime_validation: RuntimeValidationResult | None = None
|
|
85
|
+
retry_count: int = 0
|
|
86
|
+
quarantined_steps: list[str] = field(default_factory=list)
|
|
87
|
+
committed_steps: list[str] = field(default_factory=list)
|
|
88
|
+
faults: list[Fault] = field(default_factory=list)
|
|
89
|
+
failure_policy_applied: FailurePolicy | None = None
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def is_fully_committed(self) -> bool:
|
|
93
|
+
return (
|
|
94
|
+
self.sandbox_result is not None
|
|
95
|
+
and self.sandbox_result.all_succeeded
|
|
96
|
+
and self.publish_state in (PublishState.COMMITTED_NOT_PUBLISHED, PublishState.PUBLISHED)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def has_quarantined(self) -> bool:
|
|
101
|
+
return len(self.quarantined_steps) > 0
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# ── Partial Execution Manager ───────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class PartialExecutionManager:
|
|
108
|
+
"""
|
|
109
|
+
Orchestrates sandbox execution with failure policy, retry, and publish semantics.
|
|
110
|
+
|
|
111
|
+
Flow:
|
|
112
|
+
1. Execute plan in sandbox
|
|
113
|
+
2. Validate runtime metrics
|
|
114
|
+
3. On partial failure, apply failure policy
|
|
115
|
+
4. Retry if policy allows
|
|
116
|
+
5. Determine publish state
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
sandbox: SandboxExecutor,
|
|
122
|
+
runtime_validator: RuntimeValidator | None = None,
|
|
123
|
+
failure_policy: FailurePolicy = FailurePolicy.SELECTIVE_QUARANTINE,
|
|
124
|
+
retry_policy: RetryPolicy | None = None,
|
|
125
|
+
) -> None:
|
|
126
|
+
self.sandbox = sandbox
|
|
127
|
+
self.runtime_validator = runtime_validator or RuntimeValidator()
|
|
128
|
+
self.failure_policy = failure_policy
|
|
129
|
+
self.retry_policy = retry_policy or RetryPolicy()
|
|
130
|
+
|
|
131
|
+
def execute(
|
|
132
|
+
self,
|
|
133
|
+
plan: ExecutionPlan,
|
|
134
|
+
code: GeneratedCode,
|
|
135
|
+
signature: StateSignature,
|
|
136
|
+
schema_contract: dict[str, Any] | None = None,
|
|
137
|
+
) -> PartialExecutionState:
|
|
138
|
+
state = PartialExecutionState(plan_signature_hash=plan.signature_hash)
|
|
139
|
+
|
|
140
|
+
# ── Execute in sandbox ──────────────────────────────────────────
|
|
141
|
+
sandbox_result = self.sandbox.execute(plan, code, signature)
|
|
142
|
+
state.sandbox_result = sandbox_result
|
|
143
|
+
state.faults.extend(sandbox_result.faults)
|
|
144
|
+
|
|
145
|
+
# ── Handle panic (environmental fault) ──────────────────────────
|
|
146
|
+
if sandbox_result.outcome == SandboxOutcome.PANIC:
|
|
147
|
+
state.publish_state = PublishState.ROLLED_BACK
|
|
148
|
+
state.failure_policy_applied = FailurePolicy.FULL_ROLLBACK
|
|
149
|
+
return state
|
|
150
|
+
|
|
151
|
+
# ── Runtime validation ──────────────────────────────────────────
|
|
152
|
+
rv_result = self.runtime_validator.validate(sandbox_result, signature, schema_contract)
|
|
153
|
+
state.runtime_validation = rv_result
|
|
154
|
+
state.faults.extend(rv_result.faults)
|
|
155
|
+
|
|
156
|
+
# ── All succeeded + validation passed ───────────────────────────
|
|
157
|
+
if sandbox_result.outcome == SandboxOutcome.COMPLETED and rv_result.passed:
|
|
158
|
+
state.publish_state = PublishState.PUBLISHED
|
|
159
|
+
state.committed_steps = [r.step_id for r in sandbox_result.step_results]
|
|
160
|
+
return state
|
|
161
|
+
|
|
162
|
+
# ── Runtime validation failed on complete execution ─────────────
|
|
163
|
+
if sandbox_result.outcome == SandboxOutcome.COMPLETED and not rv_result.passed:
|
|
164
|
+
return self._apply_failure_policy_for_validation(state)
|
|
165
|
+
|
|
166
|
+
# ── Partial failure: some steps failed ──────────────────────────
|
|
167
|
+
if sandbox_result.outcome in (SandboxOutcome.PARTIAL, SandboxOutcome.FAILED):
|
|
168
|
+
retried = self._retry_failed_steps(state, plan, code, signature, schema_contract)
|
|
169
|
+
if retried is not None:
|
|
170
|
+
return retried
|
|
171
|
+
return self._apply_failure_policy(state)
|
|
172
|
+
|
|
173
|
+
return state
|
|
174
|
+
|
|
175
|
+
def _retry_failed_steps(
|
|
176
|
+
self,
|
|
177
|
+
state: PartialExecutionState,
|
|
178
|
+
plan: ExecutionPlan,
|
|
179
|
+
code: GeneratedCode,
|
|
180
|
+
signature: StateSignature,
|
|
181
|
+
schema_contract: dict[str, Any] | None,
|
|
182
|
+
) -> PartialExecutionState | None:
|
|
183
|
+
"""Retry failed consistency units before terminal policy application."""
|
|
184
|
+
assert state.sandbox_result is not None
|
|
185
|
+
failed_step_ids = [r.step_id for r in state.sandbox_result.failed_steps]
|
|
186
|
+
if not failed_step_ids or self.retry_policy.max_attempts <= 1:
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
latest_result = state.sandbox_result
|
|
190
|
+
retryable_ids = list(failed_step_ids)
|
|
191
|
+
|
|
192
|
+
for attempt in range(1, self.retry_policy.max_attempts):
|
|
193
|
+
retry_result = self.sandbox.execute(plan, code, signature, step_ids=retryable_ids)
|
|
194
|
+
state.retry_count = attempt
|
|
195
|
+
state.faults.extend(retry_result.faults)
|
|
196
|
+
|
|
197
|
+
latest_result = self._merge_retry_result(latest_result, retry_result)
|
|
198
|
+
remaining_failed = [r.step_id for r in latest_result.failed_steps]
|
|
199
|
+
|
|
200
|
+
if not remaining_failed:
|
|
201
|
+
state.sandbox_result = latest_result
|
|
202
|
+
rv_result = self.runtime_validator.validate(latest_result, signature, schema_contract)
|
|
203
|
+
state.runtime_validation = rv_result
|
|
204
|
+
state.faults.extend(rv_result.faults)
|
|
205
|
+
if rv_result.passed:
|
|
206
|
+
state.publish_state = PublishState.PUBLISHED
|
|
207
|
+
state.committed_steps = [r.step_id for r in latest_result.step_results]
|
|
208
|
+
state.quarantined_steps = []
|
|
209
|
+
return state
|
|
210
|
+
return self._apply_failure_policy_for_validation(state)
|
|
211
|
+
|
|
212
|
+
if not self.retry_policy.retry_failed_only:
|
|
213
|
+
retryable_ids = [s.id for s in plan.execution_order()]
|
|
214
|
+
else:
|
|
215
|
+
retryable_ids = remaining_failed
|
|
216
|
+
|
|
217
|
+
state.sandbox_result = latest_result
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
def _merge_retry_result(
|
|
221
|
+
self,
|
|
222
|
+
base: SandboxResult,
|
|
223
|
+
retry: SandboxResult,
|
|
224
|
+
) -> SandboxResult:
|
|
225
|
+
"""Merge retry outcomes over the original attempt, keeping latest result per retried step."""
|
|
226
|
+
replacement = {r.step_id: r for r in retry.step_results}
|
|
227
|
+
merged_steps = [replacement.get(step.step_id, step) for step in base.step_results]
|
|
228
|
+
|
|
229
|
+
aggregate = ExecutionMetrics()
|
|
230
|
+
all_faults: list[Fault] = []
|
|
231
|
+
for step in merged_steps:
|
|
232
|
+
if step.faults:
|
|
233
|
+
all_faults.extend(step.faults)
|
|
234
|
+
if step.outcome == StepOutcome.SUCCESS:
|
|
235
|
+
aggregate.rows_output = step.metrics.rows_output
|
|
236
|
+
aggregate.shuffle_bytes += step.metrics.shuffle_bytes
|
|
237
|
+
aggregate.duration_seconds += step.metrics.duration_seconds
|
|
238
|
+
aggregate.cost_dbu += step.metrics.cost_dbu
|
|
239
|
+
aggregate.output_schema = step.metrics.output_schema
|
|
240
|
+
for col, cnt in step.metrics.null_counts.items():
|
|
241
|
+
aggregate.null_counts[col] = aggregate.null_counts.get(col, 0) + cnt
|
|
242
|
+
|
|
243
|
+
failed = [r for r in merged_steps if r.outcome == StepOutcome.FAILED]
|
|
244
|
+
if not failed:
|
|
245
|
+
outcome = SandboxOutcome.COMPLETED
|
|
246
|
+
elif len(failed) < len(merged_steps):
|
|
247
|
+
outcome = SandboxOutcome.PARTIAL
|
|
248
|
+
else:
|
|
249
|
+
outcome = SandboxOutcome.FAILED
|
|
250
|
+
|
|
251
|
+
return SandboxResult(
|
|
252
|
+
outcome=outcome,
|
|
253
|
+
step_results=merged_steps,
|
|
254
|
+
aggregate_metrics=aggregate,
|
|
255
|
+
faults=all_faults,
|
|
256
|
+
panic_reason=retry.panic_reason or base.panic_reason,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
def _apply_failure_policy(self, state: PartialExecutionState) -> PartialExecutionState:
|
|
260
|
+
"""Apply failure policy when sandbox has partial/full failure."""
|
|
261
|
+
assert state.sandbox_result is not None
|
|
262
|
+
|
|
263
|
+
state.failure_policy_applied = self.failure_policy
|
|
264
|
+
succeeded = state.sandbox_result.successful_steps
|
|
265
|
+
failed = state.sandbox_result.failed_steps
|
|
266
|
+
|
|
267
|
+
match self.failure_policy:
|
|
268
|
+
case FailurePolicy.FULL_ROLLBACK:
|
|
269
|
+
state.publish_state = PublishState.ROLLED_BACK
|
|
270
|
+
state.quarantined_steps = [r.step_id for r in state.sandbox_result.step_results]
|
|
271
|
+
|
|
272
|
+
case FailurePolicy.SELECTIVE_QUARANTINE:
|
|
273
|
+
state.committed_steps = [r.step_id for r in succeeded]
|
|
274
|
+
state.quarantined_steps = [r.step_id for r in failed]
|
|
275
|
+
if succeeded:
|
|
276
|
+
state.publish_state = PublishState.QUARANTINED
|
|
277
|
+
else:
|
|
278
|
+
state.publish_state = PublishState.ROLLED_BACK
|
|
279
|
+
|
|
280
|
+
case FailurePolicy.PARTIAL_COMMIT_NO_PUBLISH:
|
|
281
|
+
state.committed_steps = [r.step_id for r in succeeded]
|
|
282
|
+
state.quarantined_steps = [r.step_id for r in failed]
|
|
283
|
+
state.publish_state = PublishState.COMMITTED_NOT_PUBLISHED
|
|
284
|
+
|
|
285
|
+
case FailurePolicy.DEGRADED_PUBLISH:
|
|
286
|
+
state.committed_steps = [r.step_id for r in succeeded]
|
|
287
|
+
state.quarantined_steps = [r.step_id for r in failed]
|
|
288
|
+
if succeeded:
|
|
289
|
+
state.publish_state = PublishState.DEGRADED
|
|
290
|
+
state.faults.append(Fault(
|
|
291
|
+
code="PARTIAL_DEGRADED_PUBLISH",
|
|
292
|
+
family=FaultFamily.RUNTIME,
|
|
293
|
+
severity=FaultSeverity.WARNING,
|
|
294
|
+
stage="partial_execution",
|
|
295
|
+
message=(
|
|
296
|
+
f"Degraded publish: {len(failed)} of "
|
|
297
|
+
f"{len(state.sandbox_result.step_results)} steps failed."
|
|
298
|
+
),
|
|
299
|
+
mandatory_action=PolicyAction.APPROVE,
|
|
300
|
+
detected_before_execution=False,
|
|
301
|
+
))
|
|
302
|
+
else:
|
|
303
|
+
state.publish_state = PublishState.ROLLED_BACK
|
|
304
|
+
|
|
305
|
+
return state
|
|
306
|
+
|
|
307
|
+
def _apply_failure_policy_for_validation(
|
|
308
|
+
self, state: PartialExecutionState
|
|
309
|
+
) -> PartialExecutionState:
|
|
310
|
+
"""Apply failure policy when runtime validation fails on complete execution."""
|
|
311
|
+
state.failure_policy_applied = self.failure_policy
|
|
312
|
+
|
|
313
|
+
match self.failure_policy:
|
|
314
|
+
case FailurePolicy.FULL_ROLLBACK:
|
|
315
|
+
state.publish_state = PublishState.ROLLED_BACK
|
|
316
|
+
|
|
317
|
+
case FailurePolicy.SELECTIVE_QUARANTINE:
|
|
318
|
+
# All steps succeeded but validation failed — quarantine the whole batch
|
|
319
|
+
state.publish_state = PublishState.QUARANTINED
|
|
320
|
+
if state.sandbox_result:
|
|
321
|
+
state.quarantined_steps = [
|
|
322
|
+
r.step_id for r in state.sandbox_result.step_results
|
|
323
|
+
]
|
|
324
|
+
|
|
325
|
+
case FailurePolicy.PARTIAL_COMMIT_NO_PUBLISH:
|
|
326
|
+
state.publish_state = PublishState.COMMITTED_NOT_PUBLISHED
|
|
327
|
+
if state.sandbox_result:
|
|
328
|
+
state.committed_steps = [
|
|
329
|
+
r.step_id for r in state.sandbox_result.step_results
|
|
330
|
+
]
|
|
331
|
+
|
|
332
|
+
case FailurePolicy.DEGRADED_PUBLISH:
|
|
333
|
+
state.publish_state = PublishState.DEGRADED
|
|
334
|
+
if state.sandbox_result:
|
|
335
|
+
state.committed_steps = [
|
|
336
|
+
r.step_id for r in state.sandbox_result.step_results
|
|
337
|
+
]
|
|
338
|
+
|
|
339
|
+
return state
|