cfa-kernel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfa/__init__.py +39 -0
- cfa/_lazy.py +39 -0
- cfa/adapters/__init__.py +104 -0
- cfa/adapters/autogen.py +19 -0
- cfa/adapters/crewai.py +19 -0
- cfa/adapters/dspy.py +19 -0
- cfa/adapters/langgraph.py +19 -0
- cfa/adapters/openai_agents.py +19 -0
- cfa/audit/__init__.py +15 -0
- cfa/audit/context.py +205 -0
- cfa/audit/hashing.py +41 -0
- cfa/audit/trail.py +194 -0
- cfa/backends/__init__.py +132 -0
- cfa/backends/dbt.py +338 -0
- cfa/backends/pyspark.py +240 -0
- cfa/backends/sql.py +270 -0
- cfa/behavior/__init__.py +49 -0
- cfa/behavior/llm.py +244 -0
- cfa/behavior/spec.py +235 -0
- cfa/behavior/systematizer.py +222 -0
- cfa/cli/__init__.py +296 -0
- cfa/cli/__main__.py +6 -0
- cfa/cli/_helpers.py +109 -0
- cfa/cli/core/__init__.py +0 -0
- cfa/cli/core/evaluate.py +72 -0
- cfa/cli/core/validate.py +29 -0
- cfa/cli/formatters.py +280 -0
- cfa/cli/governance/__init__.py +0 -0
- cfa/cli/governance/audit.py +65 -0
- cfa/cli/governance/catalog.py +28 -0
- cfa/cli/governance/policy.py +119 -0
- cfa/cli/governance/rules.py +42 -0
- cfa/cli/governance/signature.py +31 -0
- cfa/cli/infrastructure/__init__.py +0 -0
- cfa/cli/infrastructure/backend_list.py +24 -0
- cfa/cli/infrastructure/storage.py +87 -0
- cfa/cli/project/__init__.py +0 -0
- cfa/cli/project/init.py +73 -0
- cfa/cli/project/lifecycle.py +92 -0
- cfa/cli/project/status.py +75 -0
- cfa/cli/project/taxonomy.py +38 -0
- cfa/cli/reporting/__init__.py +0 -0
- cfa/cli/reporting/report.py +109 -0
- cfa/cli/reporting/serve.py +43 -0
- cfa/config.py +103 -0
- cfa/core/__init__.py +19 -0
- cfa/core/codegen.py +65 -0
- cfa/core/conditions.py +129 -0
- cfa/core/kernel.py +224 -0
- cfa/core/phases/__init__.py +0 -0
- cfa/core/phases/runner.py +477 -0
- cfa/core/planner.py +290 -0
- cfa/execution/__init__.py +12 -0
- cfa/execution/partial.py +339 -0
- cfa/execution/state_projection.py +216 -0
- cfa/governance/__init__.py +76 -0
- cfa/lifecycle/__init__.py +51 -0
- cfa/mcp/__init__.py +347 -0
- cfa/mcp/__main__.py +4 -0
- cfa/normalizer/__init__.py +15 -0
- cfa/normalizer/base.py +441 -0
- cfa/normalizer/llm.py +426 -0
- cfa/observability/__init__.py +14 -0
- cfa/observability/indices.py +177 -0
- cfa/observability/metrics.py +91 -0
- cfa/observability/notify.py +79 -0
- cfa/observability/otel.py +81 -0
- cfa/observability/promotion.py +367 -0
- cfa/policy/__init__.py +12 -0
- cfa/policy/bundle.py +317 -0
- cfa/policy/catalog.py +117 -0
- cfa/policy/engine.py +306 -0
- cfa/reporting/__init__.py +42 -0
- cfa/reporting/charts.py +223 -0
- cfa/reporting/engine.py +456 -0
- cfa/resolution/__init__.py +62 -0
- cfa/runtime/__init__.py +13 -0
- cfa/runtime/gate.py +287 -0
- cfa/sandbox/__init__.py +189 -0
- cfa/sandbox/executor.py +92 -0
- cfa/sandbox/mock.py +89 -0
- cfa/sandbox/panic.py +52 -0
- cfa/storage/__init__.py +591 -0
- cfa/testing/__init__.py +60 -0
- cfa/testing/asserts.py +77 -0
- cfa/testing/evaluate.py +168 -0
- cfa/testing/fixtures.py +89 -0
- cfa/testing/markers.py +36 -0
- cfa/types.py +489 -0
- cfa/validation/__init__.py +14 -0
- cfa/validation/runtime.py +285 -0
- cfa/validation/signature.py +146 -0
- cfa/validation/static.py +252 -0
- cfa_kernel-0.1.0.dist-info/METADATA +32 -0
- cfa_kernel-0.1.0.dist-info/RECORD +98 -0
- cfa_kernel-0.1.0.dist-info/WHEEL +4 -0
- cfa_kernel-0.1.0.dist-info/entry_points.txt +3 -0
- cfa_kernel-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFA State Projection Protocol
|
|
3
|
+
==============================
|
|
4
|
+
Projects execution outcomes into the Context Registry.
|
|
5
|
+
|
|
6
|
+
After every execution (successful, partial, or failed), the State Projection
|
|
7
|
+
Protocol updates the Context Registry to reflect "what state is the data in now".
|
|
8
|
+
|
|
9
|
+
This is Invariant I4: Mandatory Projection — the Context Registry MUST be updated
|
|
10
|
+
after every execution. Invariant I6 (Safe Execution) takes precedence: if execution
|
|
11
|
+
was rolled back, the projection reflects that.
|
|
12
|
+
|
|
13
|
+
The protocol:
|
|
14
|
+
1. Reads execution outcome (PartialExecutionState or SandboxResult)
|
|
15
|
+
2. Projects dataset states (committed, quarantined, rolled_back, degraded)
|
|
16
|
+
3. Updates Context Registry with new dataset states
|
|
17
|
+
4. Takes a snapshot for reproducibility (Invariant I8)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
|
|
24
|
+
from cfa.audit.context import ContextRegistry
|
|
25
|
+
from cfa.execution.partial import PartialExecutionState, PublishState
|
|
26
|
+
from cfa.types import StateSignature, _utcnow
|
|
27
|
+
|
|
28
|
+
# ── Projection Result ───────────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ProjectionResult:
|
|
33
|
+
"""Result of projecting execution state into the Context Registry."""
|
|
34
|
+
|
|
35
|
+
projected: bool
|
|
36
|
+
snapshot_version: str = ""
|
|
37
|
+
dataset_states_updated: list[str] = None # type: ignore[assignment]
|
|
38
|
+
projection_type: str = "" # "full", "partial", "rollback", "degraded"
|
|
39
|
+
audit_only: bool = False
|
|
40
|
+
|
|
41
|
+
def __post_init__(self) -> None:
|
|
42
|
+
if self.dataset_states_updated is None:
|
|
43
|
+
self.dataset_states_updated = []
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ── State Projection Protocol ───────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class StateProjectionProtocol:
|
|
50
|
+
"""
|
|
51
|
+
Projects execution outcomes into the Context Registry (Invariant I4).
|
|
52
|
+
|
|
53
|
+
Called by the Kernel after sandbox execution completes.
|
|
54
|
+
Always creates a snapshot for reproducibility.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, context_registry: ContextRegistry) -> None:
|
|
58
|
+
self.registry = context_registry
|
|
59
|
+
|
|
60
|
+
def project(
|
|
61
|
+
self,
|
|
62
|
+
signature: StateSignature,
|
|
63
|
+
execution_state: PartialExecutionState,
|
|
64
|
+
) -> ProjectionResult:
|
|
65
|
+
"""Project execution state into the Context Registry."""
|
|
66
|
+
now = _utcnow().isoformat()
|
|
67
|
+
target_scope = [signature.target_dataset_name]
|
|
68
|
+
|
|
69
|
+
match execution_state.publish_state:
|
|
70
|
+
case PublishState.PUBLISHED:
|
|
71
|
+
return self._project_published(signature, execution_state, target_scope, now)
|
|
72
|
+
|
|
73
|
+
case PublishState.DEGRADED:
|
|
74
|
+
return self._project_degraded(signature, execution_state, target_scope, now)
|
|
75
|
+
|
|
76
|
+
case PublishState.COMMITTED_NOT_PUBLISHED:
|
|
77
|
+
return self._project_committed_not_published(
|
|
78
|
+
signature, execution_state, target_scope, now
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
case PublishState.QUARANTINED:
|
|
82
|
+
return self._project_quarantined(signature, execution_state, target_scope, now)
|
|
83
|
+
|
|
84
|
+
case PublishState.ROLLED_BACK:
|
|
85
|
+
return self._project_rolled_back(signature, execution_state, target_scope, now)
|
|
86
|
+
|
|
87
|
+
case _:
|
|
88
|
+
return ProjectionResult(projected=False, projection_type="unknown")
|
|
89
|
+
|
|
90
|
+
def _project_published(
|
|
91
|
+
self,
|
|
92
|
+
signature: StateSignature,
|
|
93
|
+
execution_state: PartialExecutionState,
|
|
94
|
+
datasets: list[str],
|
|
95
|
+
timestamp: str,
|
|
96
|
+
) -> ProjectionResult:
|
|
97
|
+
metrics = {}
|
|
98
|
+
if execution_state.sandbox_result:
|
|
99
|
+
m = execution_state.sandbox_result.aggregate_metrics
|
|
100
|
+
metrics = {
|
|
101
|
+
"rows_output": m.rows_output,
|
|
102
|
+
"cost_dbu": m.cost_dbu,
|
|
103
|
+
"duration_seconds": m.duration_seconds,
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
for ds_name in datasets:
|
|
107
|
+
self.registry.set_dataset_state(ds_name, {
|
|
108
|
+
"state": "published",
|
|
109
|
+
"signature_hash": signature.signature_hash,
|
|
110
|
+
"target_layer": signature.target_layer.value,
|
|
111
|
+
"last_updated": timestamp,
|
|
112
|
+
"metrics": metrics,
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
snapshot_id = self.registry.snapshot()
|
|
116
|
+
return ProjectionResult(
|
|
117
|
+
projected=True,
|
|
118
|
+
snapshot_version=snapshot_id,
|
|
119
|
+
dataset_states_updated=datasets,
|
|
120
|
+
projection_type="full",
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
def _project_degraded(
|
|
124
|
+
self,
|
|
125
|
+
signature: StateSignature,
|
|
126
|
+
execution_state: PartialExecutionState,
|
|
127
|
+
datasets: list[str],
|
|
128
|
+
timestamp: str,
|
|
129
|
+
) -> ProjectionResult:
|
|
130
|
+
updated = []
|
|
131
|
+
for ds_name in datasets:
|
|
132
|
+
self.registry.set_dataset_state(ds_name, {
|
|
133
|
+
"state": "degraded",
|
|
134
|
+
"signature_hash": signature.signature_hash,
|
|
135
|
+
"target_layer": signature.target_layer.value,
|
|
136
|
+
"last_updated": timestamp,
|
|
137
|
+
"quarantined_steps": execution_state.quarantined_steps,
|
|
138
|
+
"committed_steps": execution_state.committed_steps,
|
|
139
|
+
})
|
|
140
|
+
updated.append(ds_name)
|
|
141
|
+
|
|
142
|
+
snapshot_id = self.registry.snapshot()
|
|
143
|
+
return ProjectionResult(
|
|
144
|
+
projected=True,
|
|
145
|
+
snapshot_version=snapshot_id,
|
|
146
|
+
dataset_states_updated=updated,
|
|
147
|
+
projection_type="degraded",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def _project_committed_not_published(
|
|
151
|
+
self,
|
|
152
|
+
signature: StateSignature,
|
|
153
|
+
execution_state: PartialExecutionState,
|
|
154
|
+
datasets: list[str],
|
|
155
|
+
timestamp: str,
|
|
156
|
+
) -> ProjectionResult:
|
|
157
|
+
updated = []
|
|
158
|
+
for ds_name in datasets:
|
|
159
|
+
self.registry.set_dataset_state(ds_name, {
|
|
160
|
+
"state": "committed_not_published",
|
|
161
|
+
"signature_hash": signature.signature_hash,
|
|
162
|
+
"target_layer": signature.target_layer.value,
|
|
163
|
+
"last_updated": timestamp,
|
|
164
|
+
"committed_steps": execution_state.committed_steps,
|
|
165
|
+
"quarantined_steps": execution_state.quarantined_steps,
|
|
166
|
+
})
|
|
167
|
+
updated.append(ds_name)
|
|
168
|
+
|
|
169
|
+
snapshot_id = self.registry.snapshot()
|
|
170
|
+
return ProjectionResult(
|
|
171
|
+
projected=True,
|
|
172
|
+
snapshot_version=snapshot_id,
|
|
173
|
+
dataset_states_updated=updated,
|
|
174
|
+
projection_type="partial",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _project_quarantined(
|
|
178
|
+
self,
|
|
179
|
+
signature: StateSignature,
|
|
180
|
+
execution_state: PartialExecutionState,
|
|
181
|
+
datasets: list[str],
|
|
182
|
+
timestamp: str,
|
|
183
|
+
) -> ProjectionResult:
|
|
184
|
+
updated = []
|
|
185
|
+
for ds_name in datasets:
|
|
186
|
+
self.registry.set_dataset_state(ds_name, {
|
|
187
|
+
"state": "quarantined",
|
|
188
|
+
"signature_hash": signature.signature_hash,
|
|
189
|
+
"target_layer": signature.target_layer.value,
|
|
190
|
+
"last_updated": timestamp,
|
|
191
|
+
"quarantined_steps": execution_state.quarantined_steps,
|
|
192
|
+
})
|
|
193
|
+
updated.append(ds_name)
|
|
194
|
+
|
|
195
|
+
snapshot_id = self.registry.snapshot()
|
|
196
|
+
return ProjectionResult(
|
|
197
|
+
projected=True,
|
|
198
|
+
snapshot_version=snapshot_id,
|
|
199
|
+
dataset_states_updated=updated,
|
|
200
|
+
projection_type="quarantine",
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def _project_rolled_back(
|
|
204
|
+
self,
|
|
205
|
+
signature: StateSignature,
|
|
206
|
+
execution_state: PartialExecutionState,
|
|
207
|
+
datasets: list[str],
|
|
208
|
+
timestamp: str,
|
|
209
|
+
) -> ProjectionResult:
|
|
210
|
+
return ProjectionResult(
|
|
211
|
+
projected=False,
|
|
212
|
+
snapshot_version="",
|
|
213
|
+
dataset_states_updated=[],
|
|
214
|
+
projection_type="rollback",
|
|
215
|
+
audit_only=True,
|
|
216
|
+
)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cfa.governance -- Governanca standalone
|
|
3
|
+
========================================
|
|
4
|
+
Valida operacoes de dados contra regras de governanca SEM precisar de LLM,
|
|
5
|
+
SEM executar codigo, SEM infraestrutura.
|
|
6
|
+
|
|
7
|
+
Funciona em cima de qualquer pipeline existente (Airflow, Dagster, scripts).
|
|
8
|
+
Voce monta a StateSignature a mao e valida.
|
|
9
|
+
|
|
10
|
+
Uso:
|
|
11
|
+
from cfa.governance import PolicyEngine, StaticValidator, StateSignature
|
|
12
|
+
|
|
13
|
+
# Monta a signature do que voce quer fazer
|
|
14
|
+
sig = StateSignature(
|
|
15
|
+
domain="fiscal",
|
|
16
|
+
intent="reconciliation",
|
|
17
|
+
target_layer=TargetLayer.SILVER,
|
|
18
|
+
datasets=(DatasetRef("nfe", DatasetClassification.HIGH_VOLUME),),
|
|
19
|
+
constraints=SignatureConstraints(partition_by=("processing_date",)),
|
|
20
|
+
execution_context=ExecutionContext("v1", "c1", "r1"),
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Valida contra regras de governanca
|
|
24
|
+
engine = PolicyEngine()
|
|
25
|
+
result = engine.evaluate(sig)
|
|
26
|
+
if result.action == PolicyAction.BLOCK:
|
|
27
|
+
raise Exception(f"Blocked: {result.reasoning}")
|
|
28
|
+
|
|
29
|
+
# Valida codigo gerado (opcional)
|
|
30
|
+
validator = StaticValidator()
|
|
31
|
+
sv = validator.validate(code, sig)
|
|
32
|
+
if not sv.passed:
|
|
33
|
+
raise Exception(f"Static validation failed: {sv.fault_codes}")
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from cfa.policy.engine import PolicyEngine, PolicyRule, build_default_ruleset
|
|
37
|
+
from cfa.types import (
|
|
38
|
+
DatasetClassification,
|
|
39
|
+
DatasetRef,
|
|
40
|
+
ExecutionContext,
|
|
41
|
+
Fault,
|
|
42
|
+
FaultFamily,
|
|
43
|
+
FaultSeverity,
|
|
44
|
+
PolicyAction,
|
|
45
|
+
PolicyResult,
|
|
46
|
+
SignatureConstraints,
|
|
47
|
+
StateSignature,
|
|
48
|
+
TargetLayer,
|
|
49
|
+
)
|
|
50
|
+
from cfa.validation.runtime import RuntimeThresholds, RuntimeValidationResult, RuntimeValidator
|
|
51
|
+
from cfa.validation.static import StaticValidationResult, StaticValidator
|
|
52
|
+
|
|
53
|
+
__all__ = [
|
|
54
|
+
# Types
|
|
55
|
+
"DatasetClassification",
|
|
56
|
+
"DatasetRef",
|
|
57
|
+
"ExecutionContext",
|
|
58
|
+
"Fault",
|
|
59
|
+
"FaultFamily",
|
|
60
|
+
"FaultSeverity",
|
|
61
|
+
"PolicyAction",
|
|
62
|
+
"PolicyResult",
|
|
63
|
+
"SignatureConstraints",
|
|
64
|
+
"StateSignature",
|
|
65
|
+
"TargetLayer",
|
|
66
|
+
# Policy
|
|
67
|
+
"PolicyEngine",
|
|
68
|
+
"PolicyRule",
|
|
69
|
+
"build_default_ruleset",
|
|
70
|
+
# Validation
|
|
71
|
+
"StaticValidator",
|
|
72
|
+
"StaticValidationResult",
|
|
73
|
+
"RuntimeValidator",
|
|
74
|
+
"RuntimeThresholds",
|
|
75
|
+
"RuntimeValidationResult",
|
|
76
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cfa.lifecycle -- Lifecycle de intencoes
|
|
3
|
+
========================================
|
|
4
|
+
Indices (IFo, IFs, IFg, IDI) e Promotion/Demotion Engine.
|
|
5
|
+
Transforma intencoes repetitivas em skills industrializadas.
|
|
6
|
+
|
|
7
|
+
Uso:
|
|
8
|
+
from cfa.lifecycle import (
|
|
9
|
+
PromotionEngine, PromotionPolicy,
|
|
10
|
+
IndexCalculator, ExecutionRecord,
|
|
11
|
+
SkillState,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
engine = PromotionEngine(policy=PromotionPolicy(min_executions=5))
|
|
15
|
+
|
|
16
|
+
# Registrar execucoes
|
|
17
|
+
engine.record_execution(ExecutionRecord(
|
|
18
|
+
signature_hash="abc123",
|
|
19
|
+
timestamp=datetime.now(timezone.utc),
|
|
20
|
+
success=True,
|
|
21
|
+
cost_dbu=5.0,
|
|
22
|
+
duration_seconds=30.0,
|
|
23
|
+
))
|
|
24
|
+
|
|
25
|
+
# Avaliar promocao
|
|
26
|
+
skill, scores = engine.evaluate("abc123")
|
|
27
|
+
print(f"State: {skill.state.value}")
|
|
28
|
+
print(f"IFo={scores.ifo:.2f} IFs={scores.ifs:.2f} IFg={scores.ifg} IDI={scores.idi:.2f}")
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from cfa.observability.indices import ExecutionRecord, IndexCalculator, IndexScores
|
|
32
|
+
from cfa.observability.promotion import (
|
|
33
|
+
PromotionEngine,
|
|
34
|
+
PromotionPolicy,
|
|
35
|
+
SkillGenerationMetadata,
|
|
36
|
+
SkillRecord,
|
|
37
|
+
SkillState,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
__all__ = [
|
|
41
|
+
# Indices
|
|
42
|
+
"ExecutionRecord",
|
|
43
|
+
"IndexCalculator",
|
|
44
|
+
"IndexScores",
|
|
45
|
+
# Promotion
|
|
46
|
+
"PromotionEngine",
|
|
47
|
+
"PromotionPolicy",
|
|
48
|
+
"SkillGenerationMetadata",
|
|
49
|
+
"SkillRecord",
|
|
50
|
+
"SkillState",
|
|
51
|
+
]
|
cfa/mcp/__init__.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFA MCP Server
|
|
3
|
+
==============
|
|
4
|
+
Model Context Protocol server exposing CFA governance tools to AI agents.
|
|
5
|
+
|
|
6
|
+
Tools exposed:
|
|
7
|
+
- cfa_evaluate_signature — Evaluate a StateSignature against policy
|
|
8
|
+
- cfa_describe_rules — List all active policy rules
|
|
9
|
+
- cfa_explain_fault — Explain a fault code with remediation
|
|
10
|
+
- cfa_audit_check — Verify audit chain integrity
|
|
11
|
+
- cfa_list_backends — List registered codegen backends
|
|
12
|
+
|
|
13
|
+
Zero external dependencies — pure stdlib JSON-RPC over stdio.
|
|
14
|
+
Compatible with Claude Desktop, Cursor, Windsurf, Copilot, and any MCP client.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
python -m cfa.mcp # run as stdio server
|
|
18
|
+
cfa-mcp # via console script (pip install)
|
|
19
|
+
|
|
20
|
+
Config (claude_desktop_config.json):
|
|
21
|
+
{
|
|
22
|
+
"mcpServers": {
|
|
23
|
+
"cfa": {
|
|
24
|
+
"command": "python", "args": ["-m", "cfa.mcp"]
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from __future__ import annotations
|
|
31
|
+
|
|
32
|
+
import json
|
|
33
|
+
import sys
|
|
34
|
+
from typing import Any
|
|
35
|
+
|
|
36
|
+
from cfa.policy.bundle import PolicyBundle
|
|
37
|
+
from cfa.policy.engine import PolicyEngine
|
|
38
|
+
from cfa.types import StateSignature
|
|
39
|
+
|
|
40
|
+
from ..backends import BackendRegistry
|
|
41
|
+
|
|
42
|
+
SERVER_NAME = "cfa-mcp"
|
|
43
|
+
SERVER_VERSION = "1.0.0"
|
|
44
|
+
|
|
45
|
+
# ── Tool implementations ─────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def tool_evaluate_signature(args: dict[str, Any]) -> dict[str, Any]:
|
|
49
|
+
"""Evaluate a StateSignature JSON against the active policy bundle."""
|
|
50
|
+
sig_data = args.get("signature")
|
|
51
|
+
if not sig_data:
|
|
52
|
+
return {"error": "Missing required argument: signature"}
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
from cfa.types import (
|
|
56
|
+
DatasetClassification,
|
|
57
|
+
DatasetRef,
|
|
58
|
+
ExecutionContext,
|
|
59
|
+
SignatureConstraints,
|
|
60
|
+
TargetLayer,
|
|
61
|
+
)
|
|
62
|
+
layer_map = {"bronze": TargetLayer.BRONZE, "silver": TargetLayer.SILVER, "gold": TargetLayer.GOLD}
|
|
63
|
+
cls_map = {
|
|
64
|
+
"public": DatasetClassification.PUBLIC, "internal": DatasetClassification.INTERNAL,
|
|
65
|
+
"sensitive": DatasetClassification.SENSITIVE, "high_volume": DatasetClassification.HIGH_VOLUME,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
datasets = tuple(
|
|
69
|
+
DatasetRef(
|
|
70
|
+
name=d["name"],
|
|
71
|
+
classification=cls_map.get(d.get("classification", "internal"), DatasetClassification.INTERNAL),
|
|
72
|
+
size_gb=d.get("size_gb", 0.0),
|
|
73
|
+
pii_columns=tuple(d.get("pii_columns", [])),
|
|
74
|
+
partition_column=d.get("partition_column"),
|
|
75
|
+
)
|
|
76
|
+
for d in sig_data.get("datasets", [])
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
c = sig_data.get("constraints", {})
|
|
80
|
+
constraints = SignatureConstraints(
|
|
81
|
+
no_pii_raw=c.get("no_pii_raw", True),
|
|
82
|
+
merge_key_required=c.get("merge_key_required", True),
|
|
83
|
+
enforce_types=c.get("enforce_types", True),
|
|
84
|
+
partition_by=tuple(c.get("partition_by", [])),
|
|
85
|
+
max_cost_dbu=c.get("max_cost_dbu"),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
ctx = sig_data.get("execution_context", {})
|
|
89
|
+
execution_context = ExecutionContext(
|
|
90
|
+
policy_bundle_version=ctx.get("policy_bundle_version", "mcp"),
|
|
91
|
+
catalog_snapshot_version=ctx.get("catalog_snapshot_version", "mcp"),
|
|
92
|
+
context_registry_version_id=ctx.get("context_registry_version_id", "mcp"),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
signature = StateSignature(
|
|
96
|
+
domain=sig_data.get("domain", ""),
|
|
97
|
+
intent=sig_data.get("intent", ""),
|
|
98
|
+
target_layer=layer_map.get(sig_data.get("target_layer", "silver"), TargetLayer.SILVER),
|
|
99
|
+
datasets=datasets,
|
|
100
|
+
constraints=constraints,
|
|
101
|
+
execution_context=execution_context,
|
|
102
|
+
)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
return {"error": f"Invalid signature: {e}"}
|
|
105
|
+
|
|
106
|
+
policy_bundle = args.get("policy_bundle", "")
|
|
107
|
+
if policy_bundle:
|
|
108
|
+
try:
|
|
109
|
+
bundle = PolicyBundle.from_yaml(policy_bundle) if policy_bundle.endswith((".yaml", ".yml")) else PolicyBundle.from_json(policy_bundle)
|
|
110
|
+
engine = PolicyEngine(rules=bundle.rules, policy_bundle_version=bundle.version)
|
|
111
|
+
except Exception as e:
|
|
112
|
+
return {"error": f"Failed to load policy bundle: {e}"}
|
|
113
|
+
else:
|
|
114
|
+
engine = PolicyEngine()
|
|
115
|
+
|
|
116
|
+
result = engine.evaluate(signature)
|
|
117
|
+
|
|
118
|
+
return {
|
|
119
|
+
"action": result.action.value,
|
|
120
|
+
"passed": result.action.value == "approve",
|
|
121
|
+
"faults": [
|
|
122
|
+
{
|
|
123
|
+
"code": f.code,
|
|
124
|
+
"severity": f.severity.value,
|
|
125
|
+
"message": f.message,
|
|
126
|
+
"remediation": list(f.remediation),
|
|
127
|
+
}
|
|
128
|
+
for f in result.faults
|
|
129
|
+
],
|
|
130
|
+
"reasoning": result.reasoning,
|
|
131
|
+
"replan_count": result.replan_count,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def tool_describe_rules(args: dict[str, Any]) -> dict[str, Any]:
|
|
136
|
+
"""List all active policy rules."""
|
|
137
|
+
policy_bundle = args.get("policy_bundle", "")
|
|
138
|
+
if policy_bundle:
|
|
139
|
+
try:
|
|
140
|
+
bundle = PolicyBundle.from_yaml(policy_bundle) if policy_bundle.endswith((".yaml", ".yml")) else PolicyBundle.from_json(policy_bundle)
|
|
141
|
+
engine = PolicyEngine(rules=bundle.rules, policy_bundle_version=bundle.version)
|
|
142
|
+
except Exception as e:
|
|
143
|
+
return {"error": f"Failed to load policy bundle: {e}"}
|
|
144
|
+
else:
|
|
145
|
+
engine = PolicyEngine()
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
"policy_bundle_version": engine.policy_bundle_version,
|
|
149
|
+
"rule_count": len(engine.rules),
|
|
150
|
+
"rules": engine.describe_rules(),
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def tool_explain_fault(args: dict[str, Any]) -> dict[str, Any]:
|
|
155
|
+
"""Explain a fault code with details and remediation steps."""
|
|
156
|
+
code = args.get("fault_code", "")
|
|
157
|
+
if not code:
|
|
158
|
+
return {"error": "Missing required argument: fault_code"}
|
|
159
|
+
|
|
160
|
+
engine = PolicyEngine()
|
|
161
|
+
for r in engine.rules:
|
|
162
|
+
if r.fault_code == code:
|
|
163
|
+
return {
|
|
164
|
+
"fault_code": r.fault_code,
|
|
165
|
+
"rule_name": r.name,
|
|
166
|
+
"action": r.action.value,
|
|
167
|
+
"severity": r.severity.value,
|
|
168
|
+
"family": r.fault_family.value,
|
|
169
|
+
"message": r.message,
|
|
170
|
+
"remediation": list(r.remediation),
|
|
171
|
+
}
|
|
172
|
+
return {"error": f"Unknown fault code: {code}"}
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def tool_audit_check(args: dict[str, Any]) -> dict[str, Any]:
|
|
176
|
+
"""Verify audit chain integrity."""
|
|
177
|
+
from cfa.audit.trail import AuditTrail
|
|
178
|
+
trail = AuditTrail()
|
|
179
|
+
intent_id = args.get("intent_id", "")
|
|
180
|
+
if intent_id:
|
|
181
|
+
events = trail.get_events_for_intent(intent_id)
|
|
182
|
+
chain_ok = trail.verify_chain()
|
|
183
|
+
return {
|
|
184
|
+
"intent_id": intent_id,
|
|
185
|
+
"event_count": len(events),
|
|
186
|
+
"chain_intact": chain_ok,
|
|
187
|
+
}
|
|
188
|
+
chain_ok = trail.verify_chain()
|
|
189
|
+
return {
|
|
190
|
+
"total_events": trail.event_count,
|
|
191
|
+
"chain_intact": chain_ok,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def tool_list_backends(args: dict[str, Any]) -> dict[str, Any]:
|
|
196
|
+
"""List registered codegen backends with capabilities."""
|
|
197
|
+
registry = BackendRegistry.singleton()
|
|
198
|
+
names = registry.list()
|
|
199
|
+
backends: list[dict[str, Any]] = []
|
|
200
|
+
for name in names:
|
|
201
|
+
factory = registry.get(name)
|
|
202
|
+
backend = factory()
|
|
203
|
+
caps = backend.get_capabilities() if hasattr(backend, "get_capabilities") else None
|
|
204
|
+
backends.append({
|
|
205
|
+
"name": name,
|
|
206
|
+
"supports_merge": caps.supports_merge if caps else False,
|
|
207
|
+
"supports_anonymization": caps.supports_anonymization if caps else False,
|
|
208
|
+
"supports_partition_overwrite": caps.supports_partition_overwrite if caps else False,
|
|
209
|
+
"cost_model_available": caps.cost_model_available if caps else False,
|
|
210
|
+
"supported_languages": caps.supported_languages if caps else [],
|
|
211
|
+
})
|
|
212
|
+
return {"backends": backends}
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# ── Tool registry ────────────────────────────────────────────────────────────
|
|
216
|
+
|
|
217
|
+
TOOLS = {
|
|
218
|
+
"cfa_evaluate_signature": {
|
|
219
|
+
"description": "Evaluate a StateSignature JSON against the active CFA policy bundle. Returns APPROVE, REPLAN, or BLOCK with faults and remediation.",
|
|
220
|
+
"inputSchema": {
|
|
221
|
+
"type": "object",
|
|
222
|
+
"properties": {
|
|
223
|
+
"signature": {"type": "object", "description": "StateSignature JSON with domain, intent, target_layer, datasets, constraints"},
|
|
224
|
+
"policy_bundle": {"type": "string", "description": "Optional path to YAML/JSON policy bundle file"},
|
|
225
|
+
},
|
|
226
|
+
"required": ["signature"],
|
|
227
|
+
},
|
|
228
|
+
"handler": tool_evaluate_signature,
|
|
229
|
+
},
|
|
230
|
+
"cfa_describe_rules": {
|
|
231
|
+
"description": "List all active CFA policy rules with descriptions and severities.",
|
|
232
|
+
"inputSchema": {
|
|
233
|
+
"type": "object",
|
|
234
|
+
"properties": {
|
|
235
|
+
"policy_bundle": {"type": "string", "description": "Optional path to YAML/JSON policy bundle file"},
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
"handler": tool_describe_rules,
|
|
239
|
+
},
|
|
240
|
+
"cfa_explain_fault": {
|
|
241
|
+
"description": "Explain a CFA fault code: what it means, why it occurs, and how to fix it.",
|
|
242
|
+
"inputSchema": {
|
|
243
|
+
"type": "object",
|
|
244
|
+
"properties": {
|
|
245
|
+
"fault_code": {"type": "string", "description": "Fault code to explain, e.g. GOVERNANCE_RAW_PII_IN_PROTECTED_LAYER"},
|
|
246
|
+
},
|
|
247
|
+
"required": ["fault_code"],
|
|
248
|
+
},
|
|
249
|
+
"handler": tool_explain_fault,
|
|
250
|
+
},
|
|
251
|
+
"cfa_audit_check": {
|
|
252
|
+
"description": "Verify the integrity of the CFA audit trail hash chain.",
|
|
253
|
+
"inputSchema": {
|
|
254
|
+
"type": "object",
|
|
255
|
+
"properties": {
|
|
256
|
+
"intent_id": {"type": "string", "description": "Optional: check audit trail for a specific intent ID"},
|
|
257
|
+
},
|
|
258
|
+
},
|
|
259
|
+
"handler": tool_audit_check,
|
|
260
|
+
},
|
|
261
|
+
"cfa_list_backends": {
|
|
262
|
+
"description": "List all registered CFA codegen backends with their capabilities.",
|
|
263
|
+
"inputSchema": {
|
|
264
|
+
"type": "object",
|
|
265
|
+
"properties": {},
|
|
266
|
+
},
|
|
267
|
+
"handler": tool_list_backends,
|
|
268
|
+
},
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# ── JSON-RPC Server ──────────────────────────────────────────────────────────
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _rpc_error(id: Any, code: int, message: str) -> dict[str, Any]:
|
|
276
|
+
return {"jsonrpc": "2.0", "id": id, "error": {"code": code, "message": message}}
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _rpc_response(id: Any, result: Any) -> dict[str, Any]:
|
|
280
|
+
return {"jsonrpc": "2.0", "id": id, "result": result}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _handle_request(req: dict[str, Any]) -> dict[str, Any] | None:
|
|
284
|
+
method = req.get("method", "")
|
|
285
|
+
req_id = req.get("id")
|
|
286
|
+
|
|
287
|
+
if method == "initialize":
|
|
288
|
+
return _rpc_response(req_id, {
|
|
289
|
+
"protocolVersion": "2024-11-05",
|
|
290
|
+
"serverInfo": {"name": SERVER_NAME, "version": SERVER_VERSION},
|
|
291
|
+
"capabilities": {"tools": {}},
|
|
292
|
+
})
|
|
293
|
+
|
|
294
|
+
if method == "notifications/initialized":
|
|
295
|
+
return None # No response for notifications
|
|
296
|
+
|
|
297
|
+
if method == "tools/list":
|
|
298
|
+
tools_list = [
|
|
299
|
+
{
|
|
300
|
+
"name": name,
|
|
301
|
+
"description": info["description"],
|
|
302
|
+
"inputSchema": info["inputSchema"],
|
|
303
|
+
}
|
|
304
|
+
for name, info in TOOLS.items()
|
|
305
|
+
]
|
|
306
|
+
return _rpc_response(req_id, {"tools": tools_list})
|
|
307
|
+
|
|
308
|
+
if method == "tools/call":
|
|
309
|
+
tool_name = req.get("params", {}).get("name", "")
|
|
310
|
+
tool_args = req.get("params", {}).get("arguments", {})
|
|
311
|
+
|
|
312
|
+
tool = TOOLS.get(tool_name)
|
|
313
|
+
if not tool:
|
|
314
|
+
return _rpc_error(req_id, -32601, f"Unknown tool: {tool_name}")
|
|
315
|
+
|
|
316
|
+
try:
|
|
317
|
+
result = tool["handler"](tool_args)
|
|
318
|
+
return _rpc_response(req_id, {
|
|
319
|
+
"content": [{"type": "text", "text": json.dumps(result, indent=2, default=str)}]
|
|
320
|
+
})
|
|
321
|
+
except Exception as e:
|
|
322
|
+
return _rpc_error(req_id, -32603, f"Tool error: {e}")
|
|
323
|
+
|
|
324
|
+
if method == "ping":
|
|
325
|
+
return _rpc_response(req_id, {})
|
|
326
|
+
|
|
327
|
+
return _rpc_error(req_id, -32601, f"Method not found: {method}")
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def serve() -> None:
|
|
331
|
+
"""Run the MCP server on stdio (stdin/stdout JSON-RPC)."""
|
|
332
|
+
for line in sys.stdin:
|
|
333
|
+
line = line.strip()
|
|
334
|
+
if not line:
|
|
335
|
+
continue
|
|
336
|
+
try:
|
|
337
|
+
req = json.loads(line)
|
|
338
|
+
except json.JSONDecodeError:
|
|
339
|
+
continue
|
|
340
|
+
resp = _handle_request(req)
|
|
341
|
+
if resp is not None:
|
|
342
|
+
sys.stdout.write(json.dumps(resp) + "\n")
|
|
343
|
+
sys.stdout.flush()
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
if __name__ == "__main__":
|
|
347
|
+
serve()
|
cfa/mcp/__main__.py
ADDED