cfa-kernel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. cfa/__init__.py +39 -0
  2. cfa/_lazy.py +39 -0
  3. cfa/adapters/__init__.py +104 -0
  4. cfa/adapters/autogen.py +19 -0
  5. cfa/adapters/crewai.py +19 -0
  6. cfa/adapters/dspy.py +19 -0
  7. cfa/adapters/langgraph.py +19 -0
  8. cfa/adapters/openai_agents.py +19 -0
  9. cfa/audit/__init__.py +15 -0
  10. cfa/audit/context.py +205 -0
  11. cfa/audit/hashing.py +41 -0
  12. cfa/audit/trail.py +194 -0
  13. cfa/backends/__init__.py +132 -0
  14. cfa/backends/dbt.py +338 -0
  15. cfa/backends/pyspark.py +240 -0
  16. cfa/backends/sql.py +270 -0
  17. cfa/behavior/__init__.py +49 -0
  18. cfa/behavior/llm.py +244 -0
  19. cfa/behavior/spec.py +235 -0
  20. cfa/behavior/systematizer.py +222 -0
  21. cfa/cli/__init__.py +296 -0
  22. cfa/cli/__main__.py +6 -0
  23. cfa/cli/_helpers.py +109 -0
  24. cfa/cli/core/__init__.py +0 -0
  25. cfa/cli/core/evaluate.py +72 -0
  26. cfa/cli/core/validate.py +29 -0
  27. cfa/cli/formatters.py +280 -0
  28. cfa/cli/governance/__init__.py +0 -0
  29. cfa/cli/governance/audit.py +65 -0
  30. cfa/cli/governance/catalog.py +28 -0
  31. cfa/cli/governance/policy.py +119 -0
  32. cfa/cli/governance/rules.py +42 -0
  33. cfa/cli/governance/signature.py +31 -0
  34. cfa/cli/infrastructure/__init__.py +0 -0
  35. cfa/cli/infrastructure/backend_list.py +24 -0
  36. cfa/cli/infrastructure/storage.py +87 -0
  37. cfa/cli/project/__init__.py +0 -0
  38. cfa/cli/project/init.py +73 -0
  39. cfa/cli/project/lifecycle.py +92 -0
  40. cfa/cli/project/status.py +75 -0
  41. cfa/cli/project/taxonomy.py +38 -0
  42. cfa/cli/reporting/__init__.py +0 -0
  43. cfa/cli/reporting/report.py +109 -0
  44. cfa/cli/reporting/serve.py +43 -0
  45. cfa/config.py +103 -0
  46. cfa/core/__init__.py +19 -0
  47. cfa/core/codegen.py +65 -0
  48. cfa/core/conditions.py +129 -0
  49. cfa/core/kernel.py +224 -0
  50. cfa/core/phases/__init__.py +0 -0
  51. cfa/core/phases/runner.py +477 -0
  52. cfa/core/planner.py +290 -0
  53. cfa/execution/__init__.py +12 -0
  54. cfa/execution/partial.py +339 -0
  55. cfa/execution/state_projection.py +216 -0
  56. cfa/governance/__init__.py +76 -0
  57. cfa/lifecycle/__init__.py +51 -0
  58. cfa/mcp/__init__.py +347 -0
  59. cfa/mcp/__main__.py +4 -0
  60. cfa/normalizer/__init__.py +15 -0
  61. cfa/normalizer/base.py +441 -0
  62. cfa/normalizer/llm.py +426 -0
  63. cfa/observability/__init__.py +14 -0
  64. cfa/observability/indices.py +177 -0
  65. cfa/observability/metrics.py +91 -0
  66. cfa/observability/notify.py +79 -0
  67. cfa/observability/otel.py +81 -0
  68. cfa/observability/promotion.py +367 -0
  69. cfa/policy/__init__.py +12 -0
  70. cfa/policy/bundle.py +317 -0
  71. cfa/policy/catalog.py +117 -0
  72. cfa/policy/engine.py +306 -0
  73. cfa/reporting/__init__.py +42 -0
  74. cfa/reporting/charts.py +223 -0
  75. cfa/reporting/engine.py +456 -0
  76. cfa/resolution/__init__.py +62 -0
  77. cfa/runtime/__init__.py +13 -0
  78. cfa/runtime/gate.py +287 -0
  79. cfa/sandbox/__init__.py +189 -0
  80. cfa/sandbox/executor.py +92 -0
  81. cfa/sandbox/mock.py +89 -0
  82. cfa/sandbox/panic.py +52 -0
  83. cfa/storage/__init__.py +591 -0
  84. cfa/testing/__init__.py +60 -0
  85. cfa/testing/asserts.py +77 -0
  86. cfa/testing/evaluate.py +168 -0
  87. cfa/testing/fixtures.py +89 -0
  88. cfa/testing/markers.py +36 -0
  89. cfa/types.py +489 -0
  90. cfa/validation/__init__.py +14 -0
  91. cfa/validation/runtime.py +285 -0
  92. cfa/validation/signature.py +146 -0
  93. cfa/validation/static.py +252 -0
  94. cfa_kernel-0.1.0.dist-info/METADATA +32 -0
  95. cfa_kernel-0.1.0.dist-info/RECORD +98 -0
  96. cfa_kernel-0.1.0.dist-info/WHEEL +4 -0
  97. cfa_kernel-0.1.0.dist-info/entry_points.txt +3 -0
  98. cfa_kernel-0.1.0.dist-info/licenses/LICENSE +21 -0
cfa/runtime/gate.py ADDED
@@ -0,0 +1,287 @@
1
+ """
2
+ CFA Runtime Gate
3
+ ================
4
+ Production-grade governance gate for wrapping pipeline execution.
5
+
6
+ Two surfaces, one core:
7
+ - cfa.testing → pytest-native for CI/CD
8
+ - cfa.runtime → production decorator/context-manager for live pipelines
9
+
10
+ Usage:
11
+ from cfa.runtime import RuntimeGate, GateConfig
12
+
13
+ gate = RuntimeGate(
14
+ config=GateConfig(policy_bundle="prod_v4.2"),
15
+ catalog=PROD_CATALOG,
16
+ )
17
+
18
+ # Pre-execution validation
19
+ gate.validate("agregar vendas mensais com PII anonimizado")
20
+
21
+ # Scoped execution with metrics
22
+ with gate.scope("monthly_aggregation"):
23
+ df = run_pipeline()
24
+ gate.record_metrics(rows=1000000, shuffle_mb=450, cost_dbu=12.0)
25
+
26
+ # Decorator for simple functions
27
+ @gate.guard("agregar vendas")
28
+ def my_pipeline():
29
+ ...
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import uuid
35
+ from collections.abc import Callable
36
+ from contextlib import contextmanager
37
+ from dataclasses import dataclass, field
38
+ from typing import Any
39
+
40
+ from cfa.audit.context import ContextRegistry
41
+ from cfa.audit.trail import AuditTrail
42
+ from cfa.core.kernel import KernelConfig, KernelOrchestrator
43
+ from cfa.policy.engine import PolicyRule
44
+ from cfa.types import DecisionState
45
+
46
+
47
+ class GateViolation(str):
48
+ """Policy for handling governance violations at runtime."""
49
+
50
+ BLOCK = "block" # Raise exception, stop execution
51
+ WARN = "warn" # Log warning, continue execution
52
+ AUDIT_ONLY = "audit_only" # Record in audit trail, always continue
53
+
54
+
55
+ @dataclass
56
+ class GateConfig:
57
+ """Production configuration for the Runtime Gate."""
58
+
59
+ policy_bundle: str = "prod_v1.0"
60
+ on_violation: str = GateViolation.BLOCK
61
+ backend: str = "pyspark"
62
+ sandbox: str = "" # name in SandboxRegistry, empty = use kernel default
63
+ execute: bool = False # run sandbox execution phase (Phase 4)
64
+ max_replan_attempts: int = 3
65
+ warnings_are_blocking: bool = True
66
+ enable_planning: bool = True
67
+ enable_codegen: bool = True
68
+ enable_static_validation: bool = True
69
+ enable_sandbox: bool = False # disabled by default — gate is pre-execution
70
+ enable_promotion: bool = True
71
+
72
+ def to_kernel_config(self) -> KernelConfig:
73
+ return KernelConfig(
74
+ policy_bundle_version=self.policy_bundle,
75
+ backend=self.backend,
76
+ max_replan_attempts=self.max_replan_attempts,
77
+ warnings_are_blocking=self.warnings_are_blocking,
78
+ enable_planning=self.enable_planning,
79
+ enable_codegen=self.enable_codegen,
80
+ enable_static_validation=self.enable_static_validation,
81
+ enable_sandbox=self.enable_sandbox,
82
+ enable_promotion=self.enable_promotion,
83
+ )
84
+
85
+
86
+ class GovernanceViolation(Exception):
87
+ """Raised when the runtime gate blocks an intent."""
88
+
89
+ def __init__(self, gate_id: str, intent: str, reason: str, faults: list[str]) -> None:
90
+ self.gate_id = gate_id
91
+ self.intent = intent
92
+ self.reason = reason
93
+ self.faults = faults
94
+ super().__init__(
95
+ f"[gate={gate_id}] Governance violation for '{intent[:80]}': {reason}"
96
+ )
97
+
98
+
99
+ @dataclass
100
+ class GateResult:
101
+ """Result of a runtime gate validation."""
102
+
103
+ gate_id: str
104
+ intent: str
105
+ passed: bool
106
+ state: DecisionState
107
+ blocked_reason: str = ""
108
+ faults: list[str] = field(default_factory=list)
109
+ signature_hash: str = ""
110
+ replay_count: int = 0
111
+ execution_id: str = ""
112
+
113
+
114
+ class RuntimeGate:
115
+ """Production governance gate for live pipelines.
116
+
117
+ Wraps KernelOrchestrator with production defaults, metrics recording,
118
+ and configurable violation handling.
119
+
120
+ Usage modes:
121
+ - validate(intent) → GateResult for pre-execution checks
122
+ - scope(name) → context manager for scoped execution
123
+ - guard(intent) → decorator for function wrapping
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ config: GateConfig | None = None,
129
+ catalog: dict[str, Any] | None = None,
130
+ policy_rules: list[PolicyRule] | None = None,
131
+ context_registry: ContextRegistry | None = None,
132
+ audit_trail: AuditTrail | None = None,
133
+ ) -> None:
134
+ self.config = config or GateConfig()
135
+ self._gate_id = str(uuid.uuid4())[:8]
136
+
137
+ sandbox_backend = None
138
+ if self.config.sandbox:
139
+ from cfa.sandbox import SandboxRegistry
140
+ registry = SandboxRegistry.singleton()
141
+ sandbox_backend = registry.get(self.config.sandbox)()
142
+
143
+ self._kernel = KernelOrchestrator(
144
+ catalog=catalog,
145
+ config=self.config.to_kernel_config(),
146
+ policy_rules=policy_rules,
147
+ context_registry=context_registry,
148
+ audit_trail=audit_trail,
149
+ sandbox_backend=sandbox_backend,
150
+ )
151
+ self._last_result: GateResult | None = None
152
+
153
+ @property
154
+ def gate_id(self) -> str:
155
+ return self._gate_id
156
+
157
+ # ── Pre-execution validation ──────────────────────────────────────────
158
+
159
+ def validate(self, intent: str) -> GateResult:
160
+ """Validate an intent before execution. Does NOT execute code.
161
+
162
+ Raises GovernanceViolation if on_violation='block' and intent fails.
163
+ Always returns GateResult.
164
+ """
165
+ kresult = self._kernel.process(intent)
166
+ passed = kresult.is_executable
167
+
168
+ faults: list[str] = []
169
+ if kresult.policy_result:
170
+ faults = [f.code for f in kresult.policy_result.faults]
171
+
172
+ result = GateResult(
173
+ gate_id=self._gate_id,
174
+ intent=intent,
175
+ passed=passed,
176
+ state=kresult.state,
177
+ blocked_reason=kresult.blocked_reason,
178
+ faults=faults,
179
+ signature_hash=kresult.signature.signature_hash if kresult.signature else "",
180
+ replay_count=len(kresult.replan_history),
181
+ execution_id=kresult.intent_id,
182
+ )
183
+ self._last_result = result
184
+
185
+ if not passed and self.config.on_violation == GateViolation.BLOCK:
186
+ raise GovernanceViolation(
187
+ gate_id=self._gate_id,
188
+ intent=intent,
189
+ reason=kresult.blocked_reason,
190
+ faults=faults,
191
+ )
192
+
193
+ return result
194
+
195
+ # ── Scoped execution ──────────────────────────────────────────────────
196
+
197
+ @contextmanager
198
+ def scope(self, name: str) -> Any:
199
+ """Context manager for governed execution scope.
200
+
201
+ Usage:
202
+ with gate.scope("monthly_aggregation"):
203
+ df = process(...)
204
+ gate.record_metrics(rows=1000, shuffle_mb=5)
205
+ """
206
+ execution_id = str(uuid.uuid4())
207
+ try:
208
+ yield execution_id
209
+ except Exception:
210
+ self._kernel.audit_trail.record(
211
+ intent_id=execution_id,
212
+ stage="runtime_gate",
213
+ event_type="scope_error",
214
+ outcome="error",
215
+ scope_name=name,
216
+ policy_bundle_version=self.config.policy_bundle,
217
+ )
218
+ raise
219
+
220
+ def record_metrics(
221
+ self,
222
+ rows: int = 0,
223
+ shuffle_mb: float = 0.0,
224
+ cost_dbu: float = 0.0,
225
+ duration_seconds: float = 0.0,
226
+ **extra: Any,
227
+ ) -> None:
228
+ """Record execution metrics for the current scope."""
229
+ if self._last_result:
230
+ self._kernel.audit_trail.record(
231
+ intent_id=self._last_result.execution_id,
232
+ stage="runtime_gate",
233
+ event_type="execution_metrics",
234
+ outcome="recorded",
235
+ rows=rows,
236
+ shuffle_mb=shuffle_mb,
237
+ cost_dbu=cost_dbu,
238
+ duration_seconds=duration_seconds,
239
+ policy_bundle_version=self.config.policy_bundle,
240
+ **extra,
241
+ )
242
+
243
+ # ── Decorator ─────────────────────────────────────────────────────────
244
+
245
+ def guard(self, intent: str) -> Callable[[Callable], Callable]:
246
+ """Decorator that guards a function with governance validation.
247
+
248
+ Usage:
249
+ @gate.guard("agregar vendas")
250
+ def my_pipeline():
251
+ ...
252
+ """
253
+
254
+ def decorator(fn: Callable) -> Callable:
255
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
256
+ self.validate(intent)
257
+ return fn(*args, **kwargs)
258
+
259
+ return wrapper
260
+
261
+ return decorator
262
+
263
+
264
+ def runtime_gate(
265
+ intent: str,
266
+ *,
267
+ policy_bundle: str = "prod_v1.0",
268
+ on_violation: str = GateViolation.BLOCK,
269
+ catalog: dict[str, Any] | None = None,
270
+ **gate_kwargs: Any,
271
+ ) -> Callable[[Callable], Callable]:
272
+ """Standalone decorator for quick runtime governance.
273
+
274
+ Usage:
275
+ @runtime_gate("agregar vendas", policy_bundle="prod_v2.0")
276
+ def my_pipeline():
277
+ ...
278
+
279
+ This creates a temporary RuntimeGate for the decorated function.
280
+ For multiple functions sharing the same gate, use RuntimeGate directly.
281
+ """
282
+ gate = RuntimeGate(
283
+ config=GateConfig(policy_bundle=policy_bundle, on_violation=on_violation),
284
+ catalog=catalog,
285
+ **gate_kwargs,
286
+ )
287
+ return gate.guard(intent)
@@ -0,0 +1,189 @@
1
+ """
2
+ CFA Sandbox Registry
3
+ ====================
4
+ Pluggable execution backends for governed code.
5
+
6
+ Each sandbox backend declares its capabilities (execution mode,
7
+ rollback support, metrics accuracy) via ``SandboxCapabilities``.
8
+
9
+ The registry follows the same pattern as ``BackendRegistry``.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from abc import ABC, abstractmethod
15
+ from dataclasses import dataclass, field
16
+ from typing import Any
17
+
18
+ from cfa.core.codegen import GeneratedCode # noqa: F401
19
+ from cfa.core.planner import ExecutionStep
20
+ from cfa.types import Fault
21
+
22
+ # ── Execution Metrics ────────────────────────────────────────────────────────
23
+
24
+
25
+ @dataclass
26
+ class ExecutionMetrics:
27
+ rows_output: int = 0
28
+ shuffle_bytes: int = 0
29
+ duration_seconds: float = 0.0
30
+ cost_dbu: float = 0.0
31
+ null_counts: dict[str, int] = field(default_factory=dict)
32
+ output_schema: list[str] = field(default_factory=list)
33
+ peak_memory_mb: float = 0.0
34
+
35
+ @property
36
+ def shuffle_mb(self) -> float:
37
+ return self.shuffle_bytes / (1024 * 1024)
38
+
39
+ def null_ratio(self, column: str, total_rows: int | None = None) -> float:
40
+ rows = total_rows or self.rows_output
41
+ if rows == 0:
42
+ return 0.0
43
+ return self.null_counts.get(column, 0) / rows
44
+
45
+
46
+ # ── Step Result ──────────────────────────────────────────────────────────────
47
+
48
+
49
+ class StepOutcome(str):
50
+ SUCCESS = "success"
51
+ FAILED = "failed"
52
+ SKIPPED = "skipped"
53
+ INTERRUPTED = "interrupted"
54
+
55
+
56
+ @dataclass
57
+ class StepResult:
58
+ step_id: str
59
+ outcome: str # StepOutcome value
60
+ metrics: ExecutionMetrics = field(default_factory=ExecutionMetrics)
61
+ error: str = ""
62
+ faults: list[Fault] = field(default_factory=list)
63
+ retry_count: int = 0
64
+
65
+
66
+ # ── Sandbox Result ───────────────────────────────────────────────────────────
67
+
68
+
69
+ class SandboxOutcome(str):
70
+ COMPLETED = "completed"
71
+ PARTIAL = "partial"
72
+ FAILED = "failed"
73
+ PANIC = "panic"
74
+
75
+
76
+ @dataclass
77
+ class SandboxResult:
78
+ outcome: str # SandboxOutcome value
79
+ step_results: list[StepResult] = field(default_factory=list)
80
+ aggregate_metrics: ExecutionMetrics = field(default_factory=ExecutionMetrics)
81
+ faults: list[Fault] = field(default_factory=list)
82
+ panic_reason: str = ""
83
+
84
+ @property
85
+ def successful_steps(self) -> list[StepResult]:
86
+ return [r for r in self.step_results if r.outcome == StepOutcome.SUCCESS]
87
+
88
+ @property
89
+ def failed_steps(self) -> list[StepResult]:
90
+ return [r for r in self.step_results if r.outcome == StepOutcome.FAILED]
91
+
92
+ @property
93
+ def all_succeeded(self) -> bool:
94
+ return all(r.outcome == StepOutcome.SUCCESS for r in self.step_results)
95
+
96
+
97
+ # ── Capabilities ─────────────────────────────────────────────────────────────
98
+
99
+
100
+ @dataclass
101
+ class SandboxCapabilities:
102
+ backend_name: str = ""
103
+ backend_version: str = ""
104
+
105
+ execution_mode: str = "simulation" # "simulation" | "local" | "cluster"
106
+ supports_rollback: bool = False
107
+ supports_metrics: bool = True
108
+ supports_environment_check: bool = False
109
+ max_parallel_steps: int = 1
110
+
111
+ custom: dict[str, Any] = field(default_factory=dict)
112
+
113
+
114
+ # ── Sandbox Backend ──────────────────────────────────────────────────────────
115
+
116
+
117
+ class SandboxBackend(ABC):
118
+ """Extension point: pluggable execution backend."""
119
+
120
+ @abstractmethod
121
+ def execute_step(
122
+ self, step: ExecutionStep, code: str, context: dict[str, Any]
123
+ ) -> StepResult: ...
124
+
125
+ @abstractmethod
126
+ def check_environment(self) -> list[Fault]: ...
127
+
128
+ def get_capabilities(self) -> SandboxCapabilities:
129
+ return SandboxCapabilities()
130
+
131
+
132
+ # ── Sandbox Registry ─────────────────────────────────────────────────────────
133
+
134
+
135
+ SandboxFactory = Any # Callable[[], SandboxBackend]
136
+
137
+
138
+ class SandboxRegistry:
139
+ """Global registry of available sandbox backend factories."""
140
+
141
+ _instance: SandboxRegistry | None = None
142
+ _lock: Any = None
143
+
144
+ def __init__(self) -> None:
145
+ self._backends: dict[str, SandboxFactory] = {}
146
+
147
+ @classmethod
148
+ def singleton(cls) -> SandboxRegistry:
149
+ if cls._lock is None:
150
+ import threading
151
+ cls._lock = threading.Lock()
152
+ with cls._lock:
153
+ if cls._instance is None:
154
+ cls._instance = cls()
155
+ cls._instance._bootstrap()
156
+ return cls._instance
157
+
158
+ def _bootstrap(self) -> None:
159
+ if self._backends:
160
+ return
161
+ from .mock import MockSandboxBackend # noqa: F811
162
+ from .panic import PanicSandboxBackend # noqa: F811
163
+
164
+ self.register("mock", lambda: MockSandboxBackend())
165
+ self.register("panic", lambda: PanicSandboxBackend(panic_on_step="extract_nfe"))
166
+
167
+ def register(self, name: str, factory: SandboxFactory) -> None:
168
+ self._backends[name] = factory
169
+
170
+ def get(self, name: str) -> SandboxFactory:
171
+ if name not in self._backends:
172
+ available = ", ".join(sorted(self._backends))
173
+ raise KeyError(f"Unknown sandbox '{name}'. Registered: {available or '(none)'}")
174
+ return self._backends[name]
175
+
176
+ def list(self) -> list[str]:
177
+ return sorted(self._backends)
178
+
179
+ def __contains__(self, name: str) -> bool:
180
+ return name in self._backends
181
+
182
+
183
+ # ── Backward-compatible re-exports ──────────────────────────────────────────
184
+
185
+ __all__ = ["SandboxExecutor", "MockSandboxBackend", "PanicSandboxBackend"]
186
+
187
+ from .executor import SandboxExecutor # noqa: E402
188
+ from .mock import MockSandboxBackend # noqa: E402
189
+ from .panic import PanicSandboxBackend # noqa: E402
@@ -0,0 +1,92 @@
1
+ """Sandbox executor — orchestrates plan execution through a sandbox backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from cfa.core.codegen import GeneratedCode
8
+ from cfa.core.planner import ExecutionPlan
9
+ from cfa.types import StateSignature
10
+
11
+ from . import (
12
+ ExecutionMetrics,
13
+ SandboxBackend,
14
+ SandboxOutcome,
15
+ SandboxResult,
16
+ StepOutcome,
17
+ StepResult,
18
+ )
19
+
20
+
21
+ class SandboxExecutor:
22
+ """Orchestrates isolated execution of a plan in a sandbox."""
23
+
24
+ def __init__(self, backend: SandboxBackend) -> None:
25
+ self.backend = backend
26
+
27
+ def execute(
28
+ self,
29
+ plan: ExecutionPlan,
30
+ code: GeneratedCode,
31
+ signature: StateSignature,
32
+ schema_contract: dict[str, Any] | None = None,
33
+ step_ids: list[str] | None = None,
34
+ ) -> SandboxResult:
35
+ step_results: list[StepResult] = []
36
+ faults: list = []
37
+ aggregate = None
38
+
39
+ context: dict[str, Any] = {"steps": [s.id for s in plan.steps]}
40
+ target_ids = set(step_ids) if step_ids else None
41
+
42
+ for step in plan.execution_order():
43
+ if target_ids is not None and step.id not in target_ids:
44
+ continue
45
+ env_faults = self.backend.check_environment()
46
+ if env_faults:
47
+ return SandboxResult(
48
+ outcome=SandboxOutcome.PANIC,
49
+ step_results=step_results,
50
+ faults=env_faults,
51
+ panic_reason="environment_check_failed",
52
+ )
53
+
54
+ step_code = code.step_code_map.get(step.id, code.code)
55
+ result = self.backend.execute_step(step, step_code, context)
56
+
57
+ if result.outcome == StepOutcome.INTERRUPTED:
58
+ return SandboxResult(
59
+ outcome=SandboxOutcome.PANIC,
60
+ step_results=step_results + [result],
61
+ faults=result.faults,
62
+ panic_reason=result.error,
63
+ )
64
+
65
+ step_results.append(result)
66
+ faults.extend(result.faults)
67
+
68
+ if result.outcome == StepOutcome.FAILED:
69
+ return SandboxResult(
70
+ outcome=SandboxOutcome.PARTIAL,
71
+ step_results=step_results,
72
+ faults=faults,
73
+ )
74
+
75
+ total_rows = sum(r.metrics.rows_output for r in step_results)
76
+ total_shuffle = sum(r.metrics.shuffle_bytes for r in step_results)
77
+ total_duration = sum(r.metrics.duration_seconds for r in step_results)
78
+ total_cost = sum(r.metrics.cost_dbu for r in step_results)
79
+
80
+ aggregate = ExecutionMetrics(
81
+ rows_output=total_rows,
82
+ shuffle_bytes=total_shuffle,
83
+ duration_seconds=total_duration,
84
+ cost_dbu=total_cost,
85
+ )
86
+
87
+ return SandboxResult(
88
+ outcome=SandboxOutcome.COMPLETED,
89
+ step_results=step_results,
90
+ aggregate_metrics=aggregate,
91
+ faults=faults,
92
+ )
cfa/sandbox/mock.py ADDED
@@ -0,0 +1,89 @@
1
+ """Mock sandbox backend — deterministic simulation for testing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from cfa.core.planner import ExecutionStep
8
+ from cfa.types import Fault, FaultFamily, FaultSeverity, PolicyAction
9
+
10
+ from . import (
11
+ ExecutionMetrics,
12
+ SandboxBackend,
13
+ SandboxCapabilities,
14
+ StepOutcome,
15
+ StepResult,
16
+ )
17
+
18
+
19
+ class MockSandboxBackend(SandboxBackend):
20
+ """Deterministic sandbox for testing and local simulation.
21
+
22
+ Configurable outcomes allow testing failure scenarios without
23
+ a real execution environment.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ default_rows: int = 100_000,
29
+ default_shuffle_mb: float = 50.0,
30
+ default_cost_dbu: float = 5.0,
31
+ fail_steps: set[str] | None = None,
32
+ null_ratio: float = 0.01,
33
+ output_schema: list[str] | None = None,
34
+ ) -> None:
35
+ self.default_rows = default_rows
36
+ self.default_shuffle_mb = default_shuffle_mb
37
+ self.default_cost_dbu = default_cost_dbu
38
+ self.fail_steps = fail_steps or set()
39
+ self.null_ratio = null_ratio
40
+ self.output_schema = output_schema or ["nfe_id", "cpf_hash", "processing_date"]
41
+
42
+ def get_capabilities(self) -> SandboxCapabilities:
43
+ return SandboxCapabilities(
44
+ backend_name="mock",
45
+ backend_version="sim-1.0",
46
+ execution_mode="simulation",
47
+ supports_rollback=False,
48
+ supports_metrics=True,
49
+ supports_environment_check=False,
50
+ )
51
+
52
+ def execute_step(
53
+ self, step: ExecutionStep, code: str, context: dict[str, Any]
54
+ ) -> StepResult:
55
+ if step.id in self.fail_steps:
56
+ return StepResult(
57
+ step_id=step.id,
58
+ outcome=StepOutcome.FAILED,
59
+ error=f"Simulated failure on step {step.id}",
60
+ faults=[
61
+ Fault(
62
+ code="RUNTIME_STEP_FAILED",
63
+ family=FaultFamily.RUNTIME,
64
+ severity=FaultSeverity.HIGH,
65
+ stage="sandbox",
66
+ message=f"Step {step.id} failed during execution.",
67
+ mandatory_action=PolicyAction.BLOCK,
68
+ detected_before_execution=False,
69
+ )
70
+ ],
71
+ )
72
+
73
+ null_counts = {col: int(self.default_rows * self.null_ratio) for col in self.output_schema}
74
+
75
+ return StepResult(
76
+ step_id=step.id,
77
+ outcome=StepOutcome.SUCCESS,
78
+ metrics=ExecutionMetrics(
79
+ rows_output=self.default_rows,
80
+ shuffle_bytes=int(self.default_shuffle_mb * 1024 * 1024),
81
+ duration_seconds=2.5,
82
+ cost_dbu=self.default_cost_dbu / max(len(context.get("steps", [1])), 1),
83
+ null_counts=null_counts,
84
+ output_schema=list(self.output_schema),
85
+ ),
86
+ )
87
+
88
+ def check_environment(self) -> list[Fault]:
89
+ return []