cfa-kernel 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. cfa/__init__.py +39 -0
  2. cfa/_lazy.py +39 -0
  3. cfa/adapters/__init__.py +104 -0
  4. cfa/adapters/autogen.py +19 -0
  5. cfa/adapters/crewai.py +19 -0
  6. cfa/adapters/dspy.py +19 -0
  7. cfa/adapters/langgraph.py +19 -0
  8. cfa/adapters/openai_agents.py +19 -0
  9. cfa/audit/__init__.py +15 -0
  10. cfa/audit/context.py +205 -0
  11. cfa/audit/hashing.py +41 -0
  12. cfa/audit/trail.py +194 -0
  13. cfa/backends/__init__.py +132 -0
  14. cfa/backends/dbt.py +338 -0
  15. cfa/backends/pyspark.py +240 -0
  16. cfa/backends/sql.py +270 -0
  17. cfa/behavior/__init__.py +49 -0
  18. cfa/behavior/llm.py +244 -0
  19. cfa/behavior/spec.py +235 -0
  20. cfa/behavior/systematizer.py +222 -0
  21. cfa/cli/__init__.py +296 -0
  22. cfa/cli/__main__.py +6 -0
  23. cfa/cli/_helpers.py +109 -0
  24. cfa/cli/core/__init__.py +0 -0
  25. cfa/cli/core/evaluate.py +72 -0
  26. cfa/cli/core/validate.py +29 -0
  27. cfa/cli/formatters.py +280 -0
  28. cfa/cli/governance/__init__.py +0 -0
  29. cfa/cli/governance/audit.py +65 -0
  30. cfa/cli/governance/catalog.py +28 -0
  31. cfa/cli/governance/policy.py +119 -0
  32. cfa/cli/governance/rules.py +42 -0
  33. cfa/cli/governance/signature.py +31 -0
  34. cfa/cli/infrastructure/__init__.py +0 -0
  35. cfa/cli/infrastructure/backend_list.py +24 -0
  36. cfa/cli/infrastructure/storage.py +87 -0
  37. cfa/cli/project/__init__.py +0 -0
  38. cfa/cli/project/init.py +73 -0
  39. cfa/cli/project/lifecycle.py +92 -0
  40. cfa/cli/project/status.py +75 -0
  41. cfa/cli/project/taxonomy.py +38 -0
  42. cfa/cli/reporting/__init__.py +0 -0
  43. cfa/cli/reporting/report.py +109 -0
  44. cfa/cli/reporting/serve.py +43 -0
  45. cfa/config.py +103 -0
  46. cfa/core/__init__.py +19 -0
  47. cfa/core/codegen.py +65 -0
  48. cfa/core/conditions.py +129 -0
  49. cfa/core/kernel.py +224 -0
  50. cfa/core/phases/__init__.py +0 -0
  51. cfa/core/phases/runner.py +477 -0
  52. cfa/core/planner.py +290 -0
  53. cfa/execution/__init__.py +12 -0
  54. cfa/execution/partial.py +339 -0
  55. cfa/execution/state_projection.py +216 -0
  56. cfa/governance/__init__.py +76 -0
  57. cfa/lifecycle/__init__.py +51 -0
  58. cfa/mcp/__init__.py +347 -0
  59. cfa/mcp/__main__.py +4 -0
  60. cfa/normalizer/__init__.py +15 -0
  61. cfa/normalizer/base.py +441 -0
  62. cfa/normalizer/llm.py +426 -0
  63. cfa/observability/__init__.py +14 -0
  64. cfa/observability/indices.py +177 -0
  65. cfa/observability/metrics.py +91 -0
  66. cfa/observability/notify.py +79 -0
  67. cfa/observability/otel.py +81 -0
  68. cfa/observability/promotion.py +367 -0
  69. cfa/policy/__init__.py +12 -0
  70. cfa/policy/bundle.py +317 -0
  71. cfa/policy/catalog.py +117 -0
  72. cfa/policy/engine.py +306 -0
  73. cfa/reporting/__init__.py +42 -0
  74. cfa/reporting/charts.py +223 -0
  75. cfa/reporting/engine.py +456 -0
  76. cfa/resolution/__init__.py +62 -0
  77. cfa/runtime/__init__.py +13 -0
  78. cfa/runtime/gate.py +287 -0
  79. cfa/sandbox/__init__.py +189 -0
  80. cfa/sandbox/executor.py +92 -0
  81. cfa/sandbox/mock.py +89 -0
  82. cfa/sandbox/panic.py +52 -0
  83. cfa/storage/__init__.py +591 -0
  84. cfa/testing/__init__.py +60 -0
  85. cfa/testing/asserts.py +77 -0
  86. cfa/testing/evaluate.py +168 -0
  87. cfa/testing/fixtures.py +89 -0
  88. cfa/testing/markers.py +36 -0
  89. cfa/types.py +489 -0
  90. cfa/validation/__init__.py +14 -0
  91. cfa/validation/runtime.py +285 -0
  92. cfa/validation/signature.py +146 -0
  93. cfa/validation/static.py +252 -0
  94. cfa_kernel-0.1.0.dist-info/METADATA +32 -0
  95. cfa_kernel-0.1.0.dist-info/RECORD +98 -0
  96. cfa_kernel-0.1.0.dist-info/WHEEL +4 -0
  97. cfa_kernel-0.1.0.dist-info/entry_points.txt +3 -0
  98. cfa_kernel-0.1.0.dist-info/licenses/LICENSE +21 -0
cfa/core/codegen.py ADDED
@@ -0,0 +1,65 @@
1
+ """
2
+ CFA Code Generator
3
+ ==================
4
+ Generates deterministic, governed code from an ExecutionPlan.
5
+
6
+ The code generator is NOT creative — it fills templates governed by the plan.
7
+
8
+ Key properties:
9
+ - Output is deterministic (same plan = same code)
10
+ - All PII handling is explicit (sha256/drop)
11
+ - Partition filters are always present when required
12
+ - Write operations use merge (never raw append in Silver/Gold)
13
+
14
+ Backend-specific implementations live in cfa.backends.*.
15
+ This module provides the core ABC, the GeneratedCode artifact,
16
+ and backward-compatible re-exports.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from abc import ABC, abstractmethod
22
+ from dataclasses import dataclass, field
23
+ from typing import Any
24
+
25
+ from cfa.core.planner import ExecutionPlan
26
+
27
+ # ── Generated Code ───────────────────────────────────────────────────────────
28
+
29
+
30
+ @dataclass
31
+ class GeneratedCode:
32
+ """Complete code artifact generated from an execution plan."""
33
+
34
+ plan_signature_hash: str
35
+ intent_id: str
36
+ language: str
37
+ code: str
38
+ step_code_map: dict[str, str] = field(default_factory=dict)
39
+ metadata: dict[str, Any] = field(default_factory=dict)
40
+
41
+ @property
42
+ def line_count(self) -> int:
43
+ return len(self.code.strip().splitlines())
44
+
45
+
46
+ # ── Code Generator Backend ───────────────────────────────────────────────────
47
+
48
+
49
+ class CodeGenBackend(ABC):
50
+ """Extension point: different code generation targets."""
51
+
52
+ @abstractmethod
53
+ def generate(self, plan: ExecutionPlan) -> GeneratedCode: ...
54
+
55
+
56
+ # ── Backward-compatible re-exports ───────────────────────────────────────────
57
+ # PySparkGenerator now lives in cfa.backends.pyspark as PySparkBackend.
58
+ # Lazy import to avoid circular dependency with backends.__init__.
59
+
60
+
61
+ def __getattr__(name):
62
+ if name == "PySparkGenerator":
63
+ from cfa.backends.pyspark import PySparkBackend
64
+ return PySparkBackend
65
+ raise AttributeError(f"module 'cfa.core.codegen' has no attribute {name!r}")
cfa/core/conditions.py ADDED
@@ -0,0 +1,129 @@
1
+ """
2
+ CFA Condition Registry
3
+ ======================
4
+ Central registry mapping condition strings to callable checks.
5
+
6
+ Used by:
7
+ - PolicyBundle YAML/JSON loader (maps "pii_in_protected_layer" → lambda)
8
+ - BehaviorSpec Systematizer (maps ConditionType → lambda)
9
+ - Programmatic PolicyRule creation
10
+
11
+ Single source of truth for all built-in governance conditions.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Callable
17
+ from typing import Any
18
+
19
+ from cfa.types import DatasetClassification, StateSignature
20
+
21
+ ConditionFn = Callable[[StateSignature], bool]
22
+
23
+
24
+ def _pii_in_protected_layer(meta: dict[str, Any]) -> ConditionFn:
25
+ target_layer = meta.get("target_layer", "")
26
+ def check(sig: StateSignature) -> bool:
27
+ protected = (target_layer in ("silver", "gold")) or sig.writes_to_protected_layer
28
+ return protected and sig.contains_pii and not sig.constraints.no_pii_raw
29
+ return check
30
+
31
+
32
+ def _missing_merge_key(meta: dict[str, Any]) -> ConditionFn:
33
+ def check(sig: StateSignature) -> bool:
34
+ return sig.writes_to_protected_layer and not sig.constraints.merge_key_required
35
+ return check
36
+
37
+
38
+ def _missing_partition(meta: dict[str, Any]) -> ConditionFn:
39
+ min_size_gb = meta.get("min_size_gb", 1.0)
40
+ def check(sig: StateSignature) -> bool:
41
+ has_large = any(
42
+ d.classification in (DatasetClassification.HIGH_VOLUME, DatasetClassification.SENSITIVE)
43
+ or d.size_gb >= min_size_gb
44
+ for d in sig.datasets
45
+ )
46
+ return has_large and len(sig.constraints.partition_by) == 0
47
+ return check
48
+
49
+
50
+ def _enforce_types_disabled(meta: dict[str, Any]) -> ConditionFn:
51
+ def check(sig: StateSignature) -> bool:
52
+ return sig.writes_to_protected_layer and not sig.constraints.enforce_types
53
+ return check
54
+
55
+
56
+ def _pii_without_policy(meta: dict[str, Any]) -> ConditionFn:
57
+ def check(sig: StateSignature) -> bool:
58
+ return sig.contains_pii and not sig.constraints.no_pii_raw
59
+ return check
60
+
61
+
62
+ def _sensitive_without_partition(meta: dict[str, Any]) -> ConditionFn:
63
+ def check(sig: StateSignature) -> bool:
64
+ return (
65
+ any(d.classification == DatasetClassification.SENSITIVE for d in sig.datasets)
66
+ and len(sig.constraints.partition_by) == 0
67
+ )
68
+ return check
69
+
70
+
71
+ def _cost_budget_exceeded(meta: dict[str, Any]) -> ConditionFn:
72
+ max_dbu = meta.get("max_dbu", 0.0)
73
+ def check(sig: StateSignature) -> bool:
74
+ if max_dbu > 0 and sig.constraints.max_cost_dbu is not None:
75
+ return sig.constraints.max_cost_dbu > max_dbu
76
+ return sig.constraints.max_cost_dbu is not None and sig.constraints.max_cost_dbu <= 0
77
+ return check
78
+
79
+
80
+ CONDITION_REGISTRY: dict[str, Callable[[dict[str, Any]], ConditionFn]] = {
81
+ "pii_in_protected_layer": _pii_in_protected_layer,
82
+ "missing_merge_key": _missing_merge_key,
83
+ "missing_partition": _missing_partition,
84
+ "enforce_types_disabled": _enforce_types_disabled,
85
+ "pii_without_policy": _pii_without_policy,
86
+ "sensitive_without_partition": _sensitive_without_partition,
87
+ "cost_budget_exceeded": _cost_budget_exceeded,
88
+ "schema_mismatch": _missing_partition, # alias: schema mismatch = check partition/schema
89
+ "shuffle_budget_exceeded": _missing_partition, # alias
90
+ "unauthorized_gold_write": _pii_in_protected_layer, # alias: gold = protected
91
+ }
92
+
93
+
94
+ def register_condition(name: str, builder: Callable[[dict[str, Any]], ConditionFn]) -> None:
95
+ """Register a custom condition builder for use in policy bundles.
96
+
97
+ Args:
98
+ name: Condition name used in YAML/JSON (e.g. "my_custom_check").
99
+ builder: Function that takes metadata dict and returns ConditionFn.
100
+ """
101
+ CONDITION_REGISTRY[name] = builder
102
+
103
+
104
+ def build_condition(name: str, metadata: dict[str, Any] | None = None) -> ConditionFn:
105
+ """Build a condition function from a registered name and metadata.
106
+
107
+ Args:
108
+ name: Registered condition name (e.g. "pii_in_protected_layer").
109
+ metadata: Optional parameters for the condition builder.
110
+
111
+ Returns:
112
+ A callable that takes StateSignature and returns bool.
113
+
114
+ Raises:
115
+ KeyError: If the condition name is not registered.
116
+ """
117
+ meta = metadata or {}
118
+ builder = CONDITION_REGISTRY.get(name)
119
+ if builder is None:
120
+ raise KeyError(
121
+ f"Unknown condition '{name}'. "
122
+ f"Registered conditions: {', '.join(sorted(CONDITION_REGISTRY))}"
123
+ )
124
+ return builder(meta)
125
+
126
+
127
+ def list_conditions() -> list[str]:
128
+ """Return all registered condition names."""
129
+ return sorted(CONDITION_REGISTRY)
cfa/core/kernel.py ADDED
@@ -0,0 +1,224 @@
1
+ """
2
+ CFA Kernel Orchestrator
3
+ =======================
4
+ Single entry point for the CFA Kernel. Delegates the 5-phase pipeline
5
+ to KernelPhases (core/phases/runner.py).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from enum import StrEnum
12
+ from typing import Any
13
+
14
+ from cfa.audit.context import ContextRegistry
15
+ from cfa.audit.trail import AuditTrail
16
+ from cfa.backends import BackendRegistry
17
+ from cfa.core.codegen import CodeGenBackend
18
+ from cfa.core.planner import ExecutionPlanner
19
+ from cfa.execution.partial import (
20
+ FailurePolicy,
21
+ PartialExecutionManager,
22
+ RetryPolicy,
23
+ )
24
+ from cfa.execution.state_projection import StateProjectionProtocol
25
+ from cfa.normalizer.base import (
26
+ AutoApproveHandler,
27
+ ConfirmationHandler,
28
+ ConfirmationOrchestrator,
29
+ IntentNormalizer,
30
+ MockNormalizerBackend,
31
+ NormalizerBackend,
32
+ RuleBasedNormalizerBackend,
33
+ )
34
+ from cfa.observability.promotion import PromotionEngine
35
+ from cfa.policy.engine import PolicyEngine, PolicyRule
36
+ from cfa.sandbox import SandboxBackend
37
+ from cfa.sandbox.executor import SandboxExecutor
38
+ from cfa.sandbox.mock import MockSandboxBackend
39
+ from cfa.types import KernelResult
40
+ from cfa.validation.runtime import RuntimeValidator
41
+ from cfa.validation.static import StaticValidator
42
+
43
+ # ── Pipeline Phase ────────────────────────────────────────────────────────────
44
+
45
+
46
+ class PipelinePhase(StrEnum):
47
+ FORMALIZE = "formalize"
48
+ GOVERN = "govern"
49
+ GENERATE = "generate"
50
+ EXECUTE = "execute"
51
+ VALIDATE = "validate"
52
+
53
+
54
+ # ── Kernel Config ────────────────────────────────────────────────────────────
55
+
56
+
57
+ @dataclass
58
+ class KernelConfig:
59
+ policy_bundle_version: str = "v1.0"
60
+ catalog_snapshot_version: str = "catalog_default"
61
+ max_replan_attempts: int = 3
62
+ confirmation_timeout_seconds: int = 300
63
+ warnings_are_blocking: bool = False
64
+ backend: str = "pyspark"
65
+
66
+ phase_formalize: bool = True
67
+ phase_govern: bool = True
68
+ phase_generate: bool = True
69
+ phase_execute: bool = True
70
+ phase_validate: bool = True
71
+
72
+ enable_planning: bool = True
73
+ enable_codegen: bool = True
74
+ enable_static_validation: bool = True
75
+ enable_sandbox: bool = True
76
+ failure_policy: FailurePolicy = FailurePolicy.SELECTIVE_QUARANTINE
77
+ enable_promotion: bool = True
78
+ normalizer: str = "rule_based"
79
+ strict_normalization: bool = False
80
+ min_normalizer_confidence: float = 0.65
81
+
82
+
83
+ # ── Kernel Orchestrator ──────────────────────────────────────────────────────
84
+
85
+
86
+ class KernelOrchestrator:
87
+ """Single entry point for the CFA Kernel.
88
+
89
+ Creates a KernelPhases runner with all dependencies and delegates.
90
+ Pipeline phases (5): Formalize → Govern → Generate → Execute → Validate
91
+ """
92
+
93
+ def __init__(
94
+ self,
95
+ normalizer_backend: NormalizerBackend | None = None,
96
+ confirmation_handler: ConfirmationHandler | None = None,
97
+ policy_rules: list[PolicyRule] | None = None,
98
+ context_registry: ContextRegistry | None = None,
99
+ audit_trail: AuditTrail | None = None,
100
+ catalog: dict[str, Any] | None = None,
101
+ config: KernelConfig | None = None,
102
+ planner: ExecutionPlanner | None = None,
103
+ codegen_backend: CodeGenBackend | str | None = None,
104
+ static_validator: StaticValidator | None = None,
105
+ sandbox_backend: SandboxBackend | None = None,
106
+ runtime_validator: RuntimeValidator | None = None,
107
+ retry_policy: RetryPolicy | None = None,
108
+ promotion_engine: PromotionEngine | None = None,
109
+ schema_contract: dict[str, Any] | None = None,
110
+ ) -> None:
111
+ self.config = config or KernelConfig()
112
+ self._context_registry = context_registry or ContextRegistry()
113
+ self._audit_trail = audit_trail or AuditTrail()
114
+ self._catalog = catalog or {"datasets": {}}
115
+ self._schema_contract = schema_contract
116
+
117
+ self._normalizer = IntentNormalizer(
118
+ backend=normalizer_backend or self._resolve_normalizer_backend(),
119
+ policy_bundle_version=self.config.policy_bundle_version,
120
+ catalog_snapshot_version=self.config.catalog_snapshot_version,
121
+ )
122
+ self._confirmation = ConfirmationOrchestrator(
123
+ handler=confirmation_handler or AutoApproveHandler(),
124
+ timeout_seconds=self.config.confirmation_timeout_seconds,
125
+ )
126
+ self._policy = PolicyEngine(
127
+ rules=policy_rules, policy_bundle_version=self.config.policy_bundle_version,
128
+ max_replan_attempts=self.config.max_replan_attempts,
129
+ )
130
+ self._planner = planner or ExecutionPlanner()
131
+ self._codegen = self._resolve_codegen_backend(codegen_backend)
132
+ self._static_validator = static_validator or StaticValidator()
133
+ self._sandbox_executor = SandboxExecutor(backend=sandbox_backend or MockSandboxBackend())
134
+ self._runtime_validator = runtime_validator or RuntimeValidator()
135
+ self._retry_policy = retry_policy or RetryPolicy()
136
+ self._promotion_engine = promotion_engine or PromotionEngine(system_version="cfa_v0.1.0")
137
+
138
+ # Backward-compatible public aliases for internal components
139
+ @property
140
+ def audit_trail(self) -> AuditTrail:
141
+ return self._audit_trail
142
+
143
+ @property
144
+ def context_registry(self) -> ContextRegistry:
145
+ return self._context_registry
146
+
147
+ @property
148
+ def catalog(self) -> dict[str, Any]:
149
+ return self._catalog
150
+
151
+ @property
152
+ def promotion_engine(self) -> PromotionEngine:
153
+ return self._promotion_engine
154
+
155
+ def process(self, raw_intent: str) -> KernelResult:
156
+ from cfa.core.phases.runner import KernelPhases
157
+
158
+ phases = KernelPhases(
159
+ config=self.config,
160
+ context_registry=self._context_registry,
161
+ audit_trail=self._audit_trail,
162
+ catalog=self._catalog,
163
+ schema_contract=self._schema_contract,
164
+ normalizer=self._normalizer,
165
+ confirmation=self._confirmation,
166
+ policy=self._policy,
167
+ planner=self._planner,
168
+ codegen=self._codegen,
169
+ static_validator=self._static_validator,
170
+ sandbox_executor=self._sandbox_executor,
171
+ runtime_validator=self._runtime_validator,
172
+ partial_execution_manager=PartialExecutionManager(
173
+ sandbox=self._sandbox_executor,
174
+ runtime_validator=self._runtime_validator,
175
+ failure_policy=self.config.failure_policy,
176
+ retry_policy=self._retry_policy,
177
+ ),
178
+ state_projection=StateProjectionProtocol(self._context_registry),
179
+ promotion_engine=self._promotion_engine,
180
+ )
181
+ return phases.process(raw_intent)
182
+
183
+ def describe(self) -> dict[str, Any]:
184
+ return {
185
+ "config": {
186
+ "policy_bundle_version": self.config.policy_bundle_version,
187
+ "catalog_snapshot_version": self.config.catalog_snapshot_version,
188
+ "max_replan_attempts": self.config.max_replan_attempts,
189
+ "backend": self.config.backend,
190
+ },
191
+ "pipeline_phases": self.pipeline_config(),
192
+ "context_registry_version": self._context_registry.version_id,
193
+ "catalog_datasets": list(self._catalog.get("datasets", {}).keys()),
194
+ "policy_rules": len(self._policy.rules),
195
+ "audit_events": self._audit_trail.event_count,
196
+ }
197
+
198
+ def pipeline_config(self) -> dict[str, bool]:
199
+ return {
200
+ PipelinePhase.FORMALIZE: self.config.phase_formalize,
201
+ PipelinePhase.GOVERN: self.config.phase_govern,
202
+ PipelinePhase.GENERATE: self.config.phase_generate,
203
+ PipelinePhase.EXECUTE: self.config.phase_execute,
204
+ PipelinePhase.VALIDATE: self.config.phase_validate,
205
+ }
206
+
207
+ # ── Internal helpers ──────────────────────────────────────────────────
208
+
209
+ def _resolve_codegen_backend(
210
+ self, backend: CodeGenBackend | str | None
211
+ ) -> CodeGenBackend:
212
+ if isinstance(backend, CodeGenBackend):
213
+ return backend
214
+ name: str = backend if isinstance(backend, str) else self.config.backend
215
+ factory = BackendRegistry.singleton().get(name)
216
+ return factory()
217
+
218
+ def _resolve_normalizer_backend(self) -> NormalizerBackend:
219
+ if self.config.normalizer == "mock":
220
+ return MockNormalizerBackend()
221
+ return RuleBasedNormalizerBackend(
222
+ strict=self.config.strict_normalization,
223
+ min_confidence=self.config.min_normalizer_confidence,
224
+ )
File without changes