cfa-kernel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfa/__init__.py +39 -0
- cfa/_lazy.py +39 -0
- cfa/adapters/__init__.py +104 -0
- cfa/adapters/autogen.py +19 -0
- cfa/adapters/crewai.py +19 -0
- cfa/adapters/dspy.py +19 -0
- cfa/adapters/langgraph.py +19 -0
- cfa/adapters/openai_agents.py +19 -0
- cfa/audit/__init__.py +15 -0
- cfa/audit/context.py +205 -0
- cfa/audit/hashing.py +41 -0
- cfa/audit/trail.py +194 -0
- cfa/backends/__init__.py +132 -0
- cfa/backends/dbt.py +338 -0
- cfa/backends/pyspark.py +240 -0
- cfa/backends/sql.py +270 -0
- cfa/behavior/__init__.py +49 -0
- cfa/behavior/llm.py +244 -0
- cfa/behavior/spec.py +235 -0
- cfa/behavior/systematizer.py +222 -0
- cfa/cli/__init__.py +296 -0
- cfa/cli/__main__.py +6 -0
- cfa/cli/_helpers.py +109 -0
- cfa/cli/core/__init__.py +0 -0
- cfa/cli/core/evaluate.py +72 -0
- cfa/cli/core/validate.py +29 -0
- cfa/cli/formatters.py +280 -0
- cfa/cli/governance/__init__.py +0 -0
- cfa/cli/governance/audit.py +65 -0
- cfa/cli/governance/catalog.py +28 -0
- cfa/cli/governance/policy.py +119 -0
- cfa/cli/governance/rules.py +42 -0
- cfa/cli/governance/signature.py +31 -0
- cfa/cli/infrastructure/__init__.py +0 -0
- cfa/cli/infrastructure/backend_list.py +24 -0
- cfa/cli/infrastructure/storage.py +87 -0
- cfa/cli/project/__init__.py +0 -0
- cfa/cli/project/init.py +73 -0
- cfa/cli/project/lifecycle.py +92 -0
- cfa/cli/project/status.py +75 -0
- cfa/cli/project/taxonomy.py +38 -0
- cfa/cli/reporting/__init__.py +0 -0
- cfa/cli/reporting/report.py +109 -0
- cfa/cli/reporting/serve.py +43 -0
- cfa/config.py +103 -0
- cfa/core/__init__.py +19 -0
- cfa/core/codegen.py +65 -0
- cfa/core/conditions.py +129 -0
- cfa/core/kernel.py +224 -0
- cfa/core/phases/__init__.py +0 -0
- cfa/core/phases/runner.py +477 -0
- cfa/core/planner.py +290 -0
- cfa/execution/__init__.py +12 -0
- cfa/execution/partial.py +339 -0
- cfa/execution/state_projection.py +216 -0
- cfa/governance/__init__.py +76 -0
- cfa/lifecycle/__init__.py +51 -0
- cfa/mcp/__init__.py +347 -0
- cfa/mcp/__main__.py +4 -0
- cfa/normalizer/__init__.py +15 -0
- cfa/normalizer/base.py +441 -0
- cfa/normalizer/llm.py +426 -0
- cfa/observability/__init__.py +14 -0
- cfa/observability/indices.py +177 -0
- cfa/observability/metrics.py +91 -0
- cfa/observability/notify.py +79 -0
- cfa/observability/otel.py +81 -0
- cfa/observability/promotion.py +367 -0
- cfa/policy/__init__.py +12 -0
- cfa/policy/bundle.py +317 -0
- cfa/policy/catalog.py +117 -0
- cfa/policy/engine.py +306 -0
- cfa/reporting/__init__.py +42 -0
- cfa/reporting/charts.py +223 -0
- cfa/reporting/engine.py +456 -0
- cfa/resolution/__init__.py +62 -0
- cfa/runtime/__init__.py +13 -0
- cfa/runtime/gate.py +287 -0
- cfa/sandbox/__init__.py +189 -0
- cfa/sandbox/executor.py +92 -0
- cfa/sandbox/mock.py +89 -0
- cfa/sandbox/panic.py +52 -0
- cfa/storage/__init__.py +591 -0
- cfa/testing/__init__.py +60 -0
- cfa/testing/asserts.py +77 -0
- cfa/testing/evaluate.py +168 -0
- cfa/testing/fixtures.py +89 -0
- cfa/testing/markers.py +36 -0
- cfa/types.py +489 -0
- cfa/validation/__init__.py +14 -0
- cfa/validation/runtime.py +285 -0
- cfa/validation/signature.py +146 -0
- cfa/validation/static.py +252 -0
- cfa_kernel-0.1.0.dist-info/METADATA +32 -0
- cfa_kernel-0.1.0.dist-info/RECORD +98 -0
- cfa_kernel-0.1.0.dist-info/WHEEL +4 -0
- cfa_kernel-0.1.0.dist-info/entry_points.txt +3 -0
- cfa_kernel-0.1.0.dist-info/licenses/LICENSE +21 -0
cfa/normalizer/llm.py
ADDED
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFA LLM Normalizer Backend
|
|
3
|
+
===========================
|
|
4
|
+
Replaces keyword matching with real LLM semantic resolution.
|
|
5
|
+
|
|
6
|
+
The Normalizer is the most critical pipeline component — an error here
|
|
7
|
+
contaminates the entire system. This backend uses an LLM to understand
|
|
8
|
+
natural language intents and map them to typed StateSignatures using
|
|
9
|
+
the data catalog as ground truth.
|
|
10
|
+
|
|
11
|
+
Architecture:
|
|
12
|
+
NL intent + catalog → LLM prompt → JSON response → NormalizerOutput → StateSignature
|
|
13
|
+
|
|
14
|
+
Strict mode (default for production):
|
|
15
|
+
- LLM response is validated against catalog
|
|
16
|
+
- Datasets returned by LLM MUST exist in catalog
|
|
17
|
+
- Classifications MUST match catalog metadata
|
|
18
|
+
- No silent fallback — failure raises explicitly
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import hashlib
|
|
24
|
+
import json
|
|
25
|
+
from abc import ABC, abstractmethod
|
|
26
|
+
from dataclasses import dataclass
|
|
27
|
+
from typing import Any
|
|
28
|
+
|
|
29
|
+
from cfa.normalizer.base import NormalizerBackend, NormalizerInput, NormalizerOutput
|
|
30
|
+
|
|
31
|
+
_SYSTEM_PROMPT = """\
|
|
32
|
+
You are a data governance resolution engine. Your task is to analyze a natural \
|
|
33
|
+
language intent and map it to a structured JSON output using the provided data \
|
|
34
|
+
catalog as ground truth.
|
|
35
|
+
|
|
36
|
+
## Rules
|
|
37
|
+
|
|
38
|
+
1. **domain**: Classify the business domain. Choose from: fiscal_data_processing, \
|
|
39
|
+
customer_data, financial_data, inventory, sales, general.
|
|
40
|
+
Use "general" only if no specific domain matches.
|
|
41
|
+
|
|
42
|
+
2. **intent**: Classify the operation type. Choose from:
|
|
43
|
+
- reconciliation_and_persist (join/merge/reconcile datasets and write)
|
|
44
|
+
- aggregate_and_persist (group by, summarize, aggregate and write)
|
|
45
|
+
- ingest (load/import/ingest raw data)
|
|
46
|
+
- transform_and_persist (apply transformations and write)
|
|
47
|
+
- query (read-only, no write)
|
|
48
|
+
|
|
49
|
+
3. **target_layer**: Determine the target data layer. Choose from: bronze, silver, gold.
|
|
50
|
+
- bronze: raw ingestion, landing zone
|
|
51
|
+
- silver: refined, joined, cleaned, trusted
|
|
52
|
+
- gold: aggregated, curated, final, master
|
|
53
|
+
|
|
54
|
+
4. **datasets**: List ONLY datasets that appear in the provided catalog. \
|
|
55
|
+
For each, include:
|
|
56
|
+
- name: exact catalog name
|
|
57
|
+
- classification: from catalog metadata
|
|
58
|
+
- pii_columns: from catalog metadata (empty list if none)
|
|
59
|
+
- size_gb: from catalog metadata
|
|
60
|
+
- partition_column: from catalog metadata (null if none)
|
|
61
|
+
|
|
62
|
+
5. **constraints**: Derive governance constraints from the intent:
|
|
63
|
+
- no_pii_raw: true if the intent implies PII will be protected/anonymized/masked.
|
|
64
|
+
Set to false ONLY if the user EXPLICITLY mentions leaving PII raw, unprotected,
|
|
65
|
+
or writing without anonymization. Keywords: "raw", "without anonymization",
|
|
66
|
+
"unprotected", "as-is", "direct", "without masking".
|
|
67
|
+
- merge_key_required: true if writing to silver or gold (safe default)
|
|
68
|
+
- enforce_types: true (safe default)
|
|
69
|
+
- partition_by: list partition columns from datasets involved
|
|
70
|
+
- max_cost_dbu: null (no limit unless specified in intent)
|
|
71
|
+
|
|
72
|
+
6. **confidence_score**: 0.0 to 1.0.
|
|
73
|
+
- 0.90-1.00: all datasets matched in catalog, clear intent
|
|
74
|
+
- 0.70-0.89: most datasets matched, intent is clear
|
|
75
|
+
- 0.50-0.69: partial match or ambiguous intent
|
|
76
|
+
- 0.00-0.49: no catalog match, highly ambiguous
|
|
77
|
+
|
|
78
|
+
7. **ambiguity_level**: low, medium, or high based on confidence and competing interpretations.
|
|
79
|
+
|
|
80
|
+
8. **reasoning**: One sentence explaining your classification in plain English.
|
|
81
|
+
|
|
82
|
+
## Critical: PII Awareness
|
|
83
|
+
|
|
84
|
+
IMPORTANT: Your job is to faithfully capture the user's expressed intent, NOT to protect
|
|
85
|
+
them. The PolicyEngine downstream will BLOCK dangerous operations. Set no_pii_raw: false
|
|
86
|
+
only when the user explicitly asks for raw/unprotected PII in their own words.
|
|
87
|
+
|
|
88
|
+
If any matched dataset has pii_columns in the catalog:
|
|
89
|
+
- Set no_pii_raw: true
|
|
90
|
+
- Increase merge_key_required if writing to silver/gold
|
|
91
|
+
- Note: the PolicyEngine will BLOCK intents that expose raw PII to protected layers
|
|
92
|
+
|
|
93
|
+
## Output Format
|
|
94
|
+
|
|
95
|
+
Return ONLY valid JSON. No markdown fences, no explanation outside the JSON.
|
|
96
|
+
|
|
97
|
+
{
|
|
98
|
+
"domain": "<domain>",
|
|
99
|
+
"intent": "<intent>",
|
|
100
|
+
"target_layer": "<bronze|silver|gold>",
|
|
101
|
+
"datasets": [
|
|
102
|
+
{
|
|
103
|
+
"name": "<exact_catalog_name>",
|
|
104
|
+
"classification": "<from_catalog>",
|
|
105
|
+
"pii_columns": ["<col1>", "<col2>"],
|
|
106
|
+
"size_gb": <number>,
|
|
107
|
+
"partition_column": "<col or null>"
|
|
108
|
+
}
|
|
109
|
+
],
|
|
110
|
+
"constraints": {
|
|
111
|
+
"no_pii_raw": <true|false>,
|
|
112
|
+
"merge_key_required": <true|false>,
|
|
113
|
+
"enforce_types": <true|false>,
|
|
114
|
+
"partition_by": ["<col1>"],
|
|
115
|
+
"max_cost_dbu": <number or null>
|
|
116
|
+
},
|
|
117
|
+
"confidence_score": <0.0-1.0>,
|
|
118
|
+
"ambiguity_level": "<low|medium|high>",
|
|
119
|
+
"competing_interpretations": [],
|
|
120
|
+
"environment_constraints_injected": [],
|
|
121
|
+
"reasoning": "<one sentence>"
|
|
122
|
+
}
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
_RAW_PII_KEYWORDS = [
|
|
126
|
+
"raw pii", "raw data", "unprotected", "without anonymization",
|
|
127
|
+
"without masking", "without protection", "as-is", "without treatment",
|
|
128
|
+
"sem anonimizacao", "sem mascara", "dados brutos", "pii cru",
|
|
129
|
+
"direct to gold", "direct to silver", "write raw", "raw write",
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
_PII_COLUMN_PATTERNS = [
|
|
133
|
+
"nome", "cpf", "email", "documento", "telefone", "endereco",
|
|
134
|
+
"rg", "passport", "ssn", "credit card", "birth", "nascimento",
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
_ALLOWED_CLASSIFICATIONS = {"public", "internal", "sensitive", "high_volume"}
|
|
138
|
+
_ALLOWED_LAYERS = {"bronze", "silver", "gold"}
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _user_wants_raw_pii(intent: str) -> bool:
|
|
142
|
+
"""Detect if the user explicitly requested raw/unprotected PII."""
|
|
143
|
+
lower = intent.lower()
|
|
144
|
+
if any(kw in lower for kw in _RAW_PII_KEYWORDS):
|
|
145
|
+
return True
|
|
146
|
+
for col in _PII_COLUMN_PATTERNS:
|
|
147
|
+
if f"raw {col}" in lower or f"{col} raw" in lower:
|
|
148
|
+
return True
|
|
149
|
+
if " with raw " in lower:
|
|
150
|
+
return True
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _build_user_message(inp: NormalizerInput) -> str:
|
|
155
|
+
catalog_json = json.dumps(inp.catalog, indent=2, ensure_ascii=False)
|
|
156
|
+
env_json = json.dumps(inp.environment_state, indent=2, ensure_ascii=False)
|
|
157
|
+
return f"""## User Intent
|
|
158
|
+
|
|
159
|
+
{inp.raw_intent}
|
|
160
|
+
|
|
161
|
+
## Data Catalog (ground truth — use ONLY datasets listed here)
|
|
162
|
+
|
|
163
|
+
```json
|
|
164
|
+
{catalog_json}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Environment State
|
|
168
|
+
|
|
169
|
+
```json
|
|
170
|
+
{env_json}
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Metadata
|
|
174
|
+
|
|
175
|
+
- policy_bundle_version: {inp.policy_bundle_version}
|
|
176
|
+
- catalog_snapshot_version: {inp.catalog_snapshot_version}
|
|
177
|
+
- context_registry_version_id: {inp.context_registry_version_id}
|
|
178
|
+
|
|
179
|
+
Analyze the intent against the catalog and return the structured JSON output."""
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class LLMProvider(ABC):
|
|
183
|
+
"""Minimal LLM provider interface — implement for any model."""
|
|
184
|
+
|
|
185
|
+
@abstractmethod
|
|
186
|
+
def complete(self, system_prompt: str, user_message: str) -> str: ...
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class OpenAILMProvider(LLMProvider):
|
|
190
|
+
"""OpenAI-compatible LLM provider.
|
|
191
|
+
Args:
|
|
192
|
+
model: Model name (default: gpt-4o-mini).
|
|
193
|
+
temperature: Sampling temperature (default: 0.0).
|
|
194
|
+
api_key: OpenAI API key. Reads from OPENAI_API_KEY env var if None.
|
|
195
|
+
base_url: Custom API base URL (Azure, local, etc.).
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def __init__(
|
|
199
|
+
self,
|
|
200
|
+
model: str = "gpt-4o-mini",
|
|
201
|
+
temperature: float = 0.0,
|
|
202
|
+
api_key: str | None = None,
|
|
203
|
+
base_url: str | None = None,
|
|
204
|
+
) -> None:
|
|
205
|
+
self.model = model
|
|
206
|
+
self.temperature = temperature
|
|
207
|
+
self.api_key = api_key
|
|
208
|
+
self.base_url = base_url
|
|
209
|
+
|
|
210
|
+
def complete(self, system_prompt: str, user_message: str) -> str:
|
|
211
|
+
try:
|
|
212
|
+
from openai import OpenAI
|
|
213
|
+
except ImportError:
|
|
214
|
+
raise ImportError(
|
|
215
|
+
"openai package is required for OpenAILMProvider. "
|
|
216
|
+
"Install it with: pip install openai"
|
|
217
|
+
)
|
|
218
|
+
kwargs: dict[str, Any] = {}
|
|
219
|
+
if self.api_key:
|
|
220
|
+
kwargs["api_key"] = self.api_key
|
|
221
|
+
if self.base_url:
|
|
222
|
+
kwargs["base_url"] = self.base_url
|
|
223
|
+
client = OpenAI(**kwargs)
|
|
224
|
+
response = client.chat.completions.create(
|
|
225
|
+
model=self.model,
|
|
226
|
+
temperature=self.temperature,
|
|
227
|
+
messages=[
|
|
228
|
+
{"role": "system", "content": system_prompt},
|
|
229
|
+
{"role": "user", "content": user_message},
|
|
230
|
+
],
|
|
231
|
+
)
|
|
232
|
+
return response.choices[0].message.content or ""
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@dataclass
|
|
236
|
+
class LLMAuditRecord:
|
|
237
|
+
model: str
|
|
238
|
+
prompt_hash: str
|
|
239
|
+
response_hash: str
|
|
240
|
+
catalog_hash: str
|
|
241
|
+
raw_response: str
|
|
242
|
+
parsed_json: dict[str, Any] | None
|
|
243
|
+
catalog_validation_errors: list[str]
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class LLMNormalizerBackend(NormalizerBackend):
|
|
247
|
+
"""Normalizer backend powered by an LLM — replaces keyword matching.
|
|
248
|
+
|
|
249
|
+
In strict mode (``strict=True``, the default), the LLM response is validated
|
|
250
|
+
against the catalog and any mismatch is raised as an error. In non-strict
|
|
251
|
+
mode (``strict=False``), fallback to rule-based is used on failure.
|
|
252
|
+
|
|
253
|
+
Every LLM call is audited: prompt hash, response hash, and catalog validation
|
|
254
|
+
are recorded in ``_audit_records`` for traceability.
|
|
255
|
+
|
|
256
|
+
Usage:
|
|
257
|
+
from cfa.normalizer import IntentNormalizer
|
|
258
|
+
from cfa.normalizer_llm import OpenAILMProvider, LLMNormalizerBackend
|
|
259
|
+
|
|
260
|
+
provider = OpenAILMProvider(model="gpt-4o-mini")
|
|
261
|
+
backend = LLMNormalizerBackend(provider=provider)
|
|
262
|
+
normalizer = IntentNormalizer(backend=backend)
|
|
263
|
+
resolution = normalizer.normalize(intent, env_state, catalog)
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
def __init__(
|
|
267
|
+
self,
|
|
268
|
+
provider: LLMProvider,
|
|
269
|
+
*,
|
|
270
|
+
strict: bool = True,
|
|
271
|
+
) -> None:
|
|
272
|
+
self.provider = provider
|
|
273
|
+
self.strict = strict
|
|
274
|
+
self._audit_records: list[LLMAuditRecord] = []
|
|
275
|
+
|
|
276
|
+
@property
|
|
277
|
+
def audit_records(self) -> list[LLMAuditRecord]:
|
|
278
|
+
return list(self._audit_records)
|
|
279
|
+
|
|
280
|
+
def resolve(self, inp: NormalizerInput) -> NormalizerOutput:
|
|
281
|
+
user_msg = _build_user_message(inp)
|
|
282
|
+
prompt_hash = hashlib.sha256(
|
|
283
|
+
(_SYSTEM_PROMPT + user_msg).encode("utf-8")
|
|
284
|
+
).hexdigest()
|
|
285
|
+
|
|
286
|
+
catalog_json = json.dumps(inp.catalog, sort_keys=True, default=str)
|
|
287
|
+
catalog_hash = hashlib.sha256(catalog_json.encode("utf-8")).hexdigest()
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
raw = self.provider.complete(_SYSTEM_PROMPT, user_msg)
|
|
291
|
+
response_hash = hashlib.sha256(
|
|
292
|
+
(raw or "").encode("utf-8")
|
|
293
|
+
).hexdigest()
|
|
294
|
+
|
|
295
|
+
if not raw or not raw.strip():
|
|
296
|
+
raise ValueError("LLM returned empty response")
|
|
297
|
+
|
|
298
|
+
data = self._parse_json(raw)
|
|
299
|
+
record = LLMAuditRecord(
|
|
300
|
+
model=getattr(self.provider, "model", "unknown"),
|
|
301
|
+
prompt_hash=prompt_hash,
|
|
302
|
+
response_hash=response_hash,
|
|
303
|
+
catalog_hash=catalog_hash,
|
|
304
|
+
raw_response=raw,
|
|
305
|
+
parsed_json=data,
|
|
306
|
+
catalog_validation_errors=[],
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
if self.strict:
|
|
310
|
+
errors = self._validate_against_catalog(data, inp.catalog)
|
|
311
|
+
record.catalog_validation_errors = errors
|
|
312
|
+
self._audit_records.append(record)
|
|
313
|
+
if errors:
|
|
314
|
+
err_msg = "; ".join(errors)
|
|
315
|
+
raise ValueError(f"LLM response failed catalog validation: {err_msg}")
|
|
316
|
+
|
|
317
|
+
self._audit_records.append(record)
|
|
318
|
+
return self._build_output(data, inp)
|
|
319
|
+
|
|
320
|
+
except (ValueError, ImportError, ConnectionError, TimeoutError, json.JSONDecodeError, OSError):
|
|
321
|
+
if self.strict:
|
|
322
|
+
raise
|
|
323
|
+
from cfa.normalizer.base import RuleBasedNormalizerBackend
|
|
324
|
+
return RuleBasedNormalizerBackend().resolve(inp)
|
|
325
|
+
|
|
326
|
+
# ── Output builder ────────────────────────────────────────────────────
|
|
327
|
+
|
|
328
|
+
def _build_output(
|
|
329
|
+
self, data: dict[str, Any], inp: NormalizerInput
|
|
330
|
+
) -> NormalizerOutput:
|
|
331
|
+
output = NormalizerOutput(
|
|
332
|
+
domain=data.get("domain", "general"),
|
|
333
|
+
intent=data.get("intent", "transform_and_persist"),
|
|
334
|
+
target_layer=data.get("target_layer", "silver"),
|
|
335
|
+
datasets=data.get("datasets", []),
|
|
336
|
+
constraints=data.get("constraints", {
|
|
337
|
+
"no_pii_raw": True,
|
|
338
|
+
"merge_key_required": True,
|
|
339
|
+
"enforce_types": True,
|
|
340
|
+
"partition_by": [],
|
|
341
|
+
}),
|
|
342
|
+
confidence_score=float(data.get("confidence_score", 0.5)),
|
|
343
|
+
ambiguity_level=data.get("ambiguity_level", "medium"),
|
|
344
|
+
competing_interpretations=data.get("competing_interpretations", []),
|
|
345
|
+
environment_constraints_injected=data.get("environment_constraints_injected", []),
|
|
346
|
+
reasoning=data.get("reasoning", ""),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
if _user_wants_raw_pii(inp.raw_intent):
|
|
350
|
+
output.constraints["no_pii_raw"] = False
|
|
351
|
+
if not output.reasoning:
|
|
352
|
+
output.reasoning = ""
|
|
353
|
+
output.reasoning += " [RAW PII REQUESTED — set no_pii_raw=False]"
|
|
354
|
+
|
|
355
|
+
return output
|
|
356
|
+
|
|
357
|
+
# ── Catalog validation (strict mode) ──────────────────────────────────
|
|
358
|
+
|
|
359
|
+
def _validate_against_catalog(
|
|
360
|
+
self, data: dict[str, Any], catalog: dict[str, Any]
|
|
361
|
+
) -> list[str]:
|
|
362
|
+
errors: list[str] = []
|
|
363
|
+
catalog_datasets = catalog.get("datasets", {})
|
|
364
|
+
|
|
365
|
+
for ds in data.get("datasets", []):
|
|
366
|
+
name = ds.get("name", "")
|
|
367
|
+
if not name:
|
|
368
|
+
errors.append("dataset name is missing in LLM response")
|
|
369
|
+
continue
|
|
370
|
+
if name not in catalog_datasets:
|
|
371
|
+
errors.append(
|
|
372
|
+
f"dataset '{name}' returned by LLM does not exist in catalog. "
|
|
373
|
+
f"Available: {sorted(catalog_datasets.keys())}"
|
|
374
|
+
)
|
|
375
|
+
continue
|
|
376
|
+
|
|
377
|
+
cat_entry = catalog_datasets[name]
|
|
378
|
+
llm_classification = ds.get("classification", "")
|
|
379
|
+
cat_classification = cat_entry.get("classification", "internal")
|
|
380
|
+
if llm_classification and llm_classification != cat_classification:
|
|
381
|
+
errors.append(
|
|
382
|
+
f"dataset '{name}': LLM said classification='{llm_classification}' "
|
|
383
|
+
f"but catalog says '{cat_classification}'"
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
llm_pii = set(ds.get("pii_columns", []))
|
|
387
|
+
cat_pii = set(cat_entry.get("pii_columns", []))
|
|
388
|
+
if cat_pii - llm_pii:
|
|
389
|
+
errors.append(
|
|
390
|
+
f"dataset '{name}': LLM missed PII columns declared in catalog: "
|
|
391
|
+
f"{sorted(cat_pii - llm_pii)}"
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
target_layer = data.get("target_layer", "")
|
|
395
|
+
if target_layer and target_layer not in _ALLOWED_LAYERS:
|
|
396
|
+
errors.append(f"target_layer '{target_layer}' is not valid. Use: {sorted(_ALLOWED_LAYERS)}")
|
|
397
|
+
|
|
398
|
+
for ds in data.get("datasets", []):
|
|
399
|
+
classification = ds.get("classification", "")
|
|
400
|
+
if classification and classification not in _ALLOWED_CLASSIFICATIONS:
|
|
401
|
+
errors.append(
|
|
402
|
+
f"classification '{classification}' for dataset '{ds.get('name', '?')}' "
|
|
403
|
+
f"is not valid. Use: {sorted(_ALLOWED_CLASSIFICATIONS)}"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
return errors
|
|
407
|
+
|
|
408
|
+
# ── JSON parser ───────────────────────────────────────────────────────
|
|
409
|
+
|
|
410
|
+
def _parse_json(self, raw: str) -> dict[str, Any]:
|
|
411
|
+
raw = raw.strip()
|
|
412
|
+
if raw.startswith("```"):
|
|
413
|
+
lines = raw.split("\n")
|
|
414
|
+
if lines[0].startswith("```"):
|
|
415
|
+
lines = lines[1:]
|
|
416
|
+
if lines and lines[-1].strip() == "```":
|
|
417
|
+
lines = lines[:-1]
|
|
418
|
+
raw = "\n".join(lines)
|
|
419
|
+
try:
|
|
420
|
+
return json.loads(raw)
|
|
421
|
+
except json.JSONDecodeError:
|
|
422
|
+
start = raw.find("{")
|
|
423
|
+
end = raw.rfind("}")
|
|
424
|
+
if start >= 0 and end > start:
|
|
425
|
+
return json.loads(raw[start : end + 1])
|
|
426
|
+
raise ValueError("LLM response does not contain valid JSON")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""CFA Observability — metrics, otel, notify, indices, promotion."""
|
|
2
|
+
from cfa._lazy import LazyLoader
|
|
3
|
+
|
|
4
|
+
__getattr__ = LazyLoader({
|
|
5
|
+
"get_metrics_text": ("cfa.observability.metrics", "get_metrics_text"),
|
|
6
|
+
"inc_counter": ("cfa.observability.metrics", "inc_counter"),
|
|
7
|
+
"IndexCalculator": ("cfa.observability.indices", "IndexCalculator"),
|
|
8
|
+
"IndexScores": ("cfa.observability.indices", "IndexScores"),
|
|
9
|
+
"ExecutionRecord": ("cfa.observability.indices", "ExecutionRecord"),
|
|
10
|
+
"PromotionEngine": ("cfa.observability.promotion", "PromotionEngine"),
|
|
11
|
+
"PromotionPolicy": ("cfa.observability.promotion", "PromotionPolicy"),
|
|
12
|
+
"SkillState": ("cfa.observability.promotion", "SkillState"),
|
|
13
|
+
"SkillRecord": ("cfa.observability.promotion", "SkillRecord"),
|
|
14
|
+
})
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CFA Intent Indices
|
|
3
|
+
==================
|
|
4
|
+
Quantitative signals for intent lifecycle management.
|
|
5
|
+
|
|
6
|
+
Four indices track the health and maturity of each intent_signature_hash:
|
|
7
|
+
|
|
8
|
+
- IFo (Índice de Fluidez Operacional): operational fluidity
|
|
9
|
+
IFo = (1 - normalized_latency) × (1 - normalized_cost) × execution_stability
|
|
10
|
+
|
|
11
|
+
- IFs (Índice de Fidelidade Semântica): semantic fidelity
|
|
12
|
+
IFs = output_contract_adherence × absence_of_semantic_drift × invariant_preservation
|
|
13
|
+
|
|
14
|
+
- IFg (Índice de Governança): governance compliance — BINARY by design
|
|
15
|
+
IFg = policy_compliance × absence_of_pii_exposure × layer_adherence
|
|
16
|
+
IFg = 1 is the ONLY acceptable value. IFg < 1 means systemic failure.
|
|
17
|
+
|
|
18
|
+
- IDI (Intent Drift Index): drift detection
|
|
19
|
+
IDI = 1 - (replanned_executions / total_executions) over last 30 days
|
|
20
|
+
IDI near 1.0 = stable; IDI < 0.75 = watchlist; IDI < 0.50 = immediate demotion
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from datetime import datetime, timedelta
|
|
27
|
+
|
|
28
|
+
from cfa.types import _utcnow
|
|
29
|
+
|
|
30
|
+
# ── Execution Record ────────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ExecutionRecord:
|
|
35
|
+
"""Single execution record for index computation."""
|
|
36
|
+
|
|
37
|
+
signature_hash: str
|
|
38
|
+
timestamp: datetime
|
|
39
|
+
success: bool
|
|
40
|
+
replanned: bool = False
|
|
41
|
+
cost_dbu: float = 0.0
|
|
42
|
+
duration_seconds: float = 0.0
|
|
43
|
+
faults: list[str] = field(default_factory=list)
|
|
44
|
+
schema_match: bool = True
|
|
45
|
+
pii_exposure: bool = False
|
|
46
|
+
policy_compliant: bool = True
|
|
47
|
+
layer_adherent: bool = True
|
|
48
|
+
|
|
49
|
+
# Normalization baselines (configurable per domain)
|
|
50
|
+
max_expected_duration: float = 300.0 # 5 minutes baseline
|
|
51
|
+
max_expected_cost: float = 50.0 # 50 DBU baseline
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# ── Index Results ───────────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass(frozen=True)
|
|
58
|
+
class IndexScores:
|
|
59
|
+
"""Computed index scores for a signature_hash."""
|
|
60
|
+
|
|
61
|
+
signature_hash: str
|
|
62
|
+
ifo: float # Operational Fluidity [0, 1]
|
|
63
|
+
ifs: float # Semantic Fidelity [0, 1]
|
|
64
|
+
ifg: float # Governance — binary: 0 or 1
|
|
65
|
+
idi: float # Drift Index [0, 1]
|
|
66
|
+
execution_count: int
|
|
67
|
+
window_days: int
|
|
68
|
+
computed_at: datetime = field(default_factory=_utcnow)
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def promotion_eligible(self) -> bool:
|
|
72
|
+
"""Quick check against default thresholds."""
|
|
73
|
+
return self.ifo >= 0.75 and self.ifs >= 0.90 and self.ifg == 1.0
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def drift_detected(self) -> bool:
|
|
77
|
+
return self.idi < 0.75
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def severe_drift(self) -> bool:
|
|
81
|
+
return self.idi < 0.50
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ── Index Calculator ────────────────────────────────────────────────────────
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class IndexCalculator:
|
|
88
|
+
"""
|
|
89
|
+
Computes IFo, IFs, IFg, IDI from execution records.
|
|
90
|
+
|
|
91
|
+
Operates on a time window (default 30 days).
|
|
92
|
+
All indices are per signature_hash.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self, window_days: int = 30) -> None:
|
|
96
|
+
self.window_days = window_days
|
|
97
|
+
|
|
98
|
+
def compute(
|
|
99
|
+
self, signature_hash: str, records: list[ExecutionRecord]
|
|
100
|
+
) -> IndexScores:
|
|
101
|
+
cutoff = _utcnow() - timedelta(days=self.window_days)
|
|
102
|
+
windowed = [r for r in records if r.signature_hash == signature_hash and r.timestamp >= cutoff]
|
|
103
|
+
|
|
104
|
+
if not windowed:
|
|
105
|
+
return IndexScores(
|
|
106
|
+
signature_hash=signature_hash,
|
|
107
|
+
ifo=0.0, ifs=0.0, ifg=1.0, idi=1.0,
|
|
108
|
+
execution_count=0,
|
|
109
|
+
window_days=self.window_days,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
ifo = self._compute_ifo(windowed)
|
|
113
|
+
ifs = self._compute_ifs(windowed)
|
|
114
|
+
ifg = self._compute_ifg(windowed)
|
|
115
|
+
idi = self._compute_idi(windowed)
|
|
116
|
+
|
|
117
|
+
return IndexScores(
|
|
118
|
+
signature_hash=signature_hash,
|
|
119
|
+
ifo=ifo, ifs=ifs, ifg=ifg, idi=idi,
|
|
120
|
+
execution_count=len(windowed),
|
|
121
|
+
window_days=self.window_days,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _compute_ifo(self, records: list[ExecutionRecord]) -> float:
|
|
125
|
+
"""IFo = (1 - norm_latency) × (1 - norm_cost) × execution_stability"""
|
|
126
|
+
if not records:
|
|
127
|
+
return 0.0
|
|
128
|
+
|
|
129
|
+
# Normalized latency: avg(duration / max_expected_duration), clamped to [0, 1]
|
|
130
|
+
latencies = [
|
|
131
|
+
min(r.duration_seconds / max(r.max_expected_duration, 0.01), 1.0)
|
|
132
|
+
for r in records
|
|
133
|
+
]
|
|
134
|
+
norm_latency = sum(latencies) / len(latencies)
|
|
135
|
+
|
|
136
|
+
# Normalized cost: avg(cost / max_expected_cost), clamped to [0, 1]
|
|
137
|
+
costs = [
|
|
138
|
+
min(r.cost_dbu / max(r.max_expected_cost, 0.01), 1.0)
|
|
139
|
+
for r in records
|
|
140
|
+
]
|
|
141
|
+
norm_cost = sum(costs) / len(costs)
|
|
142
|
+
|
|
143
|
+
# Execution stability: success_rate
|
|
144
|
+
success_rate = sum(1 for r in records if r.success) / len(records)
|
|
145
|
+
|
|
146
|
+
return (1 - norm_latency) * (1 - norm_cost) * success_rate
|
|
147
|
+
|
|
148
|
+
def _compute_ifs(self, records: list[ExecutionRecord]) -> float:
|
|
149
|
+
"""IFs = schema_adherence × absence_of_drift × invariant_preservation"""
|
|
150
|
+
if not records:
|
|
151
|
+
return 0.0
|
|
152
|
+
|
|
153
|
+
# Schema adherence: fraction of executions with matching schema
|
|
154
|
+
schema_match_rate = sum(1 for r in records if r.schema_match) / len(records)
|
|
155
|
+
|
|
156
|
+
# Absence of drift: 1 - (replanned / total)
|
|
157
|
+
replan_rate = sum(1 for r in records if r.replanned) / len(records)
|
|
158
|
+
absence_drift = 1 - replan_rate
|
|
159
|
+
|
|
160
|
+
# Invariant preservation: fraction without faults
|
|
161
|
+
fault_free_rate = sum(1 for r in records if not r.faults) / len(records)
|
|
162
|
+
|
|
163
|
+
return schema_match_rate * absence_drift * fault_free_rate
|
|
164
|
+
|
|
165
|
+
def _compute_ifg(self, records: list[ExecutionRecord]) -> float:
|
|
166
|
+
"""IFg = binary. 1.0 if ALL executions are governance-compliant, else 0.0."""
|
|
167
|
+
for r in records:
|
|
168
|
+
if r.pii_exposure or not r.policy_compliant or not r.layer_adherent:
|
|
169
|
+
return 0.0
|
|
170
|
+
return 1.0
|
|
171
|
+
|
|
172
|
+
def _compute_idi(self, records: list[ExecutionRecord]) -> float:
|
|
173
|
+
"""IDI = 1 - (replanned_executions / total_executions)"""
|
|
174
|
+
if not records:
|
|
175
|
+
return 1.0
|
|
176
|
+
replanned = sum(1 for r in records if r.replanned)
|
|
177
|
+
return 1 - (replanned / len(records))
|