cfa-kernel 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfa/__init__.py +39 -0
- cfa/_lazy.py +39 -0
- cfa/adapters/__init__.py +104 -0
- cfa/adapters/autogen.py +19 -0
- cfa/adapters/crewai.py +19 -0
- cfa/adapters/dspy.py +19 -0
- cfa/adapters/langgraph.py +19 -0
- cfa/adapters/openai_agents.py +19 -0
- cfa/audit/__init__.py +15 -0
- cfa/audit/context.py +205 -0
- cfa/audit/hashing.py +41 -0
- cfa/audit/trail.py +194 -0
- cfa/backends/__init__.py +132 -0
- cfa/backends/dbt.py +338 -0
- cfa/backends/pyspark.py +240 -0
- cfa/backends/sql.py +270 -0
- cfa/behavior/__init__.py +49 -0
- cfa/behavior/llm.py +244 -0
- cfa/behavior/spec.py +235 -0
- cfa/behavior/systematizer.py +222 -0
- cfa/cli/__init__.py +296 -0
- cfa/cli/__main__.py +6 -0
- cfa/cli/_helpers.py +109 -0
- cfa/cli/core/__init__.py +0 -0
- cfa/cli/core/evaluate.py +72 -0
- cfa/cli/core/validate.py +29 -0
- cfa/cli/formatters.py +280 -0
- cfa/cli/governance/__init__.py +0 -0
- cfa/cli/governance/audit.py +65 -0
- cfa/cli/governance/catalog.py +28 -0
- cfa/cli/governance/policy.py +119 -0
- cfa/cli/governance/rules.py +42 -0
- cfa/cli/governance/signature.py +31 -0
- cfa/cli/infrastructure/__init__.py +0 -0
- cfa/cli/infrastructure/backend_list.py +24 -0
- cfa/cli/infrastructure/storage.py +87 -0
- cfa/cli/project/__init__.py +0 -0
- cfa/cli/project/init.py +73 -0
- cfa/cli/project/lifecycle.py +92 -0
- cfa/cli/project/status.py +75 -0
- cfa/cli/project/taxonomy.py +38 -0
- cfa/cli/reporting/__init__.py +0 -0
- cfa/cli/reporting/report.py +109 -0
- cfa/cli/reporting/serve.py +43 -0
- cfa/config.py +103 -0
- cfa/core/__init__.py +19 -0
- cfa/core/codegen.py +65 -0
- cfa/core/conditions.py +129 -0
- cfa/core/kernel.py +224 -0
- cfa/core/phases/__init__.py +0 -0
- cfa/core/phases/runner.py +477 -0
- cfa/core/planner.py +290 -0
- cfa/execution/__init__.py +12 -0
- cfa/execution/partial.py +339 -0
- cfa/execution/state_projection.py +216 -0
- cfa/governance/__init__.py +76 -0
- cfa/lifecycle/__init__.py +51 -0
- cfa/mcp/__init__.py +347 -0
- cfa/mcp/__main__.py +4 -0
- cfa/normalizer/__init__.py +15 -0
- cfa/normalizer/base.py +441 -0
- cfa/normalizer/llm.py +426 -0
- cfa/observability/__init__.py +14 -0
- cfa/observability/indices.py +177 -0
- cfa/observability/metrics.py +91 -0
- cfa/observability/notify.py +79 -0
- cfa/observability/otel.py +81 -0
- cfa/observability/promotion.py +367 -0
- cfa/policy/__init__.py +12 -0
- cfa/policy/bundle.py +317 -0
- cfa/policy/catalog.py +117 -0
- cfa/policy/engine.py +306 -0
- cfa/reporting/__init__.py +42 -0
- cfa/reporting/charts.py +223 -0
- cfa/reporting/engine.py +456 -0
- cfa/resolution/__init__.py +62 -0
- cfa/runtime/__init__.py +13 -0
- cfa/runtime/gate.py +287 -0
- cfa/sandbox/__init__.py +189 -0
- cfa/sandbox/executor.py +92 -0
- cfa/sandbox/mock.py +89 -0
- cfa/sandbox/panic.py +52 -0
- cfa/storage/__init__.py +591 -0
- cfa/testing/__init__.py +60 -0
- cfa/testing/asserts.py +77 -0
- cfa/testing/evaluate.py +168 -0
- cfa/testing/fixtures.py +89 -0
- cfa/testing/markers.py +36 -0
- cfa/types.py +489 -0
- cfa/validation/__init__.py +14 -0
- cfa/validation/runtime.py +285 -0
- cfa/validation/signature.py +146 -0
- cfa/validation/static.py +252 -0
- cfa_kernel-0.1.0.dist-info/METADATA +32 -0
- cfa_kernel-0.1.0.dist-info/RECORD +98 -0
- cfa_kernel-0.1.0.dist-info/WHEEL +4 -0
- cfa_kernel-0.1.0.dist-info/entry_points.txt +3 -0
- cfa_kernel-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""cfa storage — manage CFA storage (stats, cleanup, vacuum)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from datetime import UTC
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def cmd_storage_stats(args) -> int:
|
|
10
|
+
from cfa.cli.formatters import format_json
|
|
11
|
+
from cfa.storage import JsonLinesStorage, SqliteStorage, _sqlite_storage_stats
|
|
12
|
+
|
|
13
|
+
if args.db:
|
|
14
|
+
store = SqliteStorage(args.db)
|
|
15
|
+
store.ensure_schema()
|
|
16
|
+
stats = _sqlite_storage_stats(store)
|
|
17
|
+
store.close()
|
|
18
|
+
elif args.dir:
|
|
19
|
+
store = JsonLinesStorage(args.dir)
|
|
20
|
+
stats = store.stats()
|
|
21
|
+
else:
|
|
22
|
+
print("Error: --db or --dir required", file=sys.stderr)
|
|
23
|
+
return 1
|
|
24
|
+
|
|
25
|
+
if args.format == "json":
|
|
26
|
+
print(format_json(stats.to_dict()))
|
|
27
|
+
else:
|
|
28
|
+
print(f"CFA Storage: {stats.backend} @ {stats.path}")
|
|
29
|
+
print(f" file size: {stats.file_size_bytes:,} bytes")
|
|
30
|
+
print(f" audit events: {stats.audit_events_count}")
|
|
31
|
+
print(f" execution records:{stats.execution_records_count}")
|
|
32
|
+
print(f" skill records: {stats.skill_records_count}")
|
|
33
|
+
print(f" metrics: {stats.metrics_count}")
|
|
34
|
+
if stats.oldest_record:
|
|
35
|
+
print(f" oldest record: {stats.oldest_record[:19]}")
|
|
36
|
+
if stats.newest_record:
|
|
37
|
+
print(f" newest record: {stats.newest_record[:19]}")
|
|
38
|
+
return 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def cmd_storage_cleanup(args) -> int:
|
|
42
|
+
from datetime import datetime, timedelta
|
|
43
|
+
|
|
44
|
+
from cfa.config import CfaConfig
|
|
45
|
+
from cfa.storage import JsonLinesStorage, SqliteStorage, _sqlite_storage_cleanup
|
|
46
|
+
|
|
47
|
+
if args.retention_days:
|
|
48
|
+
before = (datetime.now(UTC) - timedelta(days=args.retention_days)).isoformat()
|
|
49
|
+
elif args.before:
|
|
50
|
+
before = args.before
|
|
51
|
+
else:
|
|
52
|
+
config = CfaConfig.discover()
|
|
53
|
+
if config and config.storage.retention_days:
|
|
54
|
+
before = (datetime.now(UTC) - timedelta(days=config.storage.retention_days)).isoformat()
|
|
55
|
+
else:
|
|
56
|
+
print("Error: --retention or --before required (or set retention_days in cfa.yaml)", file=sys.stderr)
|
|
57
|
+
return 1
|
|
58
|
+
|
|
59
|
+
if args.db:
|
|
60
|
+
store = SqliteStorage(args.db)
|
|
61
|
+
store.ensure_schema()
|
|
62
|
+
deleted = _sqlite_storage_cleanup(store, before)
|
|
63
|
+
store.close()
|
|
64
|
+
elif args.dir:
|
|
65
|
+
store = JsonLinesStorage(args.dir)
|
|
66
|
+
deleted = store.cleanup(before)
|
|
67
|
+
else:
|
|
68
|
+
print("Error: --db or --dir required", file=sys.stderr)
|
|
69
|
+
return 1
|
|
70
|
+
|
|
71
|
+
print(f"Cleaned up {deleted} records before {before[:19]}")
|
|
72
|
+
return 0
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def cmd_storage_vacuum(args) -> int:
|
|
76
|
+
from cfa.storage import SqliteStorage
|
|
77
|
+
|
|
78
|
+
if args.db:
|
|
79
|
+
store = SqliteStorage(args.db)
|
|
80
|
+
store.ensure_schema()
|
|
81
|
+
store.vacuum()
|
|
82
|
+
store.close()
|
|
83
|
+
print(f"Vacuumed {args.db}")
|
|
84
|
+
else:
|
|
85
|
+
print("Error: --db required for vacuum", file=sys.stderr)
|
|
86
|
+
return 1
|
|
87
|
+
return 0
|
|
File without changes
|
cfa/cli/project/init.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""cfa init — bootstrap CFA project."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def cmd_init(args) -> int:
|
|
10
|
+
cfa_dir = Path(args.dir or ".cfa")
|
|
11
|
+
cfa_dir.mkdir(exist_ok=True)
|
|
12
|
+
(cfa_dir / "policies").mkdir(exist_ok=True)
|
|
13
|
+
|
|
14
|
+
example_catalog = {
|
|
15
|
+
"datasets": {
|
|
16
|
+
"nfe": {"classification": "high_volume", "size_gb": 4000, "pii_columns": [],
|
|
17
|
+
"partition_column": "processing_date", "merge_keys": ["nfe_id"]},
|
|
18
|
+
"clientes": {"classification": "sensitive", "size_gb": 0.5,
|
|
19
|
+
"pii_columns": ["cpf", "email"], "partition_column": "processing_date",
|
|
20
|
+
"merge_keys": ["cliente_id"]},
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
(cfa_dir / "catalog.json").write_text(json.dumps(example_catalog, indent=2), encoding="utf-8")
|
|
25
|
+
|
|
26
|
+
prod_policy = {
|
|
27
|
+
"policy_bundle": {
|
|
28
|
+
"version": "prod-v1.0",
|
|
29
|
+
"description": "Production governance rules",
|
|
30
|
+
"rules": [
|
|
31
|
+
{"name": "forbid_raw_pii", "condition": "pii_in_protected_layer",
|
|
32
|
+
"action": "block", "fault_code": "GOVERNANCE_RAW_PII", "severity": "critical",
|
|
33
|
+
"family": "semantic", "message": "PII in protected layer without anonymization.",
|
|
34
|
+
"remediation": ["Apply sha256 on PII columns"]},
|
|
35
|
+
{"name": "require_partition", "condition": "missing_partition",
|
|
36
|
+
"action": "replan", "fault_code": "FINOPS_MISSING_PARTITION", "severity": "high",
|
|
37
|
+
"family": "semantic", "message": "High-volume dataset without partition.",
|
|
38
|
+
"remediation": ["Add partition_by column"]},
|
|
39
|
+
{"name": "require_merge_key", "condition": "missing_merge_key",
|
|
40
|
+
"action": "block", "fault_code": "CONTRACT_MISSING_MERGE_KEY", "severity": "critical",
|
|
41
|
+
"family": "semantic", "message": "Silver/Gold write without merge key.",
|
|
42
|
+
"remediation": ["Set merge_key_required=True"]},
|
|
43
|
+
],
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
(cfa_dir / "policies" / "prod-v1.yaml").write_text(
|
|
47
|
+
json.dumps(prod_policy, indent=2), encoding="utf-8"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
config = (
|
|
51
|
+
"# CFA Configuration\n"
|
|
52
|
+
"version: \"1.0\"\n"
|
|
53
|
+
"\n"
|
|
54
|
+
"storage:\n"
|
|
55
|
+
" backend: sqlite\n"
|
|
56
|
+
" path: cfa.db\n"
|
|
57
|
+
" retention_days: 90\n"
|
|
58
|
+
"\n"
|
|
59
|
+
"defaults:\n"
|
|
60
|
+
f" catalog: {cfa_dir / 'catalog.json'}\n"
|
|
61
|
+
f" policy_bundle: {cfa_dir / 'policies' / 'prod-v1.yaml'}\n"
|
|
62
|
+
" backend: pyspark\n"
|
|
63
|
+
)
|
|
64
|
+
(cfa_dir / "config.yaml").write_text(config, encoding="utf-8")
|
|
65
|
+
|
|
66
|
+
(cfa_dir / ".gitignore").write_text("*\n!.gitignore\n", encoding="utf-8")
|
|
67
|
+
|
|
68
|
+
print(f"CFA initialized in {cfa_dir}/")
|
|
69
|
+
print(" config.yaml — CFA configuration")
|
|
70
|
+
print(" catalog.json — example data catalog")
|
|
71
|
+
print(" policies/prod-v1.yaml — production policy bundle")
|
|
72
|
+
print(f"\nNext: cfa evaluate \"your intent\" --config {cfa_dir / 'config.yaml'}")
|
|
73
|
+
return 0
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""cfa lifecycle — lifecycle management commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def cmd_lifecycle_evaluate(args) -> int:
|
|
7
|
+
from cfa.cli.formatters import format_json
|
|
8
|
+
from cfa.observability.promotion import PromotionEngine, PromotionPolicy
|
|
9
|
+
from cfa.storage import SqliteStorage
|
|
10
|
+
|
|
11
|
+
store = None
|
|
12
|
+
if args.db:
|
|
13
|
+
store = SqliteStorage(args.db)
|
|
14
|
+
store.ensure_schema()
|
|
15
|
+
|
|
16
|
+
engine = PromotionEngine(
|
|
17
|
+
policy=PromotionPolicy(evaluation_window_days=args.window),
|
|
18
|
+
storage=store,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
results = []
|
|
22
|
+
for skill in engine.list_skills():
|
|
23
|
+
_, scores = engine.evaluate(
|
|
24
|
+
skill.signature_hash,
|
|
25
|
+
policy_bundle_version=args.policy_bundle,
|
|
26
|
+
)
|
|
27
|
+
results.append({
|
|
28
|
+
"signature_hash": skill.signature_hash,
|
|
29
|
+
"state": skill.state.value,
|
|
30
|
+
"ifo": round(scores.ifo, 3),
|
|
31
|
+
"ifs": round(scores.ifs, 3),
|
|
32
|
+
"ifg": round(scores.ifg, 3),
|
|
33
|
+
"idi": round(scores.idi, 3),
|
|
34
|
+
"executions": scores.execution_count,
|
|
35
|
+
"promotion_eligible": scores.promotion_eligible,
|
|
36
|
+
"drift_detected": scores.drift_detected,
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
if args.format == "json":
|
|
40
|
+
print(format_json(results))
|
|
41
|
+
else:
|
|
42
|
+
print(f"Lifecycle evaluation ({len(results)} skills, {args.window}d window):")
|
|
43
|
+
for r in results:
|
|
44
|
+
flags = []
|
|
45
|
+
if r["promotion_eligible"]:
|
|
46
|
+
flags.append("PROMOTE")
|
|
47
|
+
if r["drift_detected"]:
|
|
48
|
+
flags.append("DRIFT")
|
|
49
|
+
flag_str = f" [{', '.join(flags)}]" if flags else ""
|
|
50
|
+
print(f" {r['signature_hash'][:12]} state={r['state']:12} "
|
|
51
|
+
f"IFo={r['ifo']:.2f} IFs={r['ifs']:.2f} IFg={r['ifg']:.2f} IDI={r['idi']:.2f} "
|
|
52
|
+
f"execs={r['executions']}{flag_str}")
|
|
53
|
+
|
|
54
|
+
if store:
|
|
55
|
+
store.close()
|
|
56
|
+
return 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def cmd_lifecycle_list(args) -> int:
|
|
60
|
+
from cfa.cli.formatters import format_json
|
|
61
|
+
from cfa.observability.promotion import PromotionEngine
|
|
62
|
+
from cfa.storage import SqliteStorage
|
|
63
|
+
|
|
64
|
+
store = None
|
|
65
|
+
if args.db:
|
|
66
|
+
store = SqliteStorage(args.db)
|
|
67
|
+
store.ensure_schema()
|
|
68
|
+
|
|
69
|
+
engine = PromotionEngine(storage=store)
|
|
70
|
+
skills = engine.list_skills(state=None)
|
|
71
|
+
|
|
72
|
+
results = [
|
|
73
|
+
{
|
|
74
|
+
"signature_hash": s.signature_hash,
|
|
75
|
+
"state": s.state.value,
|
|
76
|
+
"demotion_reason": s.demotion_reason,
|
|
77
|
+
"history_count": len(s.history),
|
|
78
|
+
}
|
|
79
|
+
for s in skills
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
if args.format == "json":
|
|
83
|
+
print(format_json(results))
|
|
84
|
+
else:
|
|
85
|
+
print(f"Lifecycle skills ({len(results)}):")
|
|
86
|
+
for r in results:
|
|
87
|
+
print(f" {r['signature_hash'][:12]} state={r['state']:12} "
|
|
88
|
+
f"transitions={r['history_count']} {r['demotion_reason']}")
|
|
89
|
+
|
|
90
|
+
if store:
|
|
91
|
+
store.close()
|
|
92
|
+
return 0
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""cfa status — overall CFA health and state."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def cmd_status(args) -> int:
|
|
9
|
+
from cfa.cli.formatters import format_json
|
|
10
|
+
from cfa.config import CfaConfig
|
|
11
|
+
from cfa.storage import SqliteStorage, _sqlite_storage_stats
|
|
12
|
+
|
|
13
|
+
config = CfaConfig.discover() if args.config is None else CfaConfig.from_yaml(args.config)
|
|
14
|
+
|
|
15
|
+
output = {
|
|
16
|
+
"config_found": config is not None,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if config is not None:
|
|
20
|
+
output["config"] = config.to_dict()
|
|
21
|
+
output["storage"] = {"backend": config.storage.backend, "path": config.storage.path, "retention_days": config.storage.retention_days}
|
|
22
|
+
|
|
23
|
+
if config.storage.backend == "sqlite":
|
|
24
|
+
try:
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
db_path = Path(config.storage.path)
|
|
27
|
+
if db_path.exists():
|
|
28
|
+
store = SqliteStorage(db_path)
|
|
29
|
+
store.ensure_schema()
|
|
30
|
+
stats = _sqlite_storage_stats(store)
|
|
31
|
+
output["storage_stats"] = stats.to_dict()
|
|
32
|
+
store.close()
|
|
33
|
+
else:
|
|
34
|
+
output["storage_stats"] = {"status": "not_found", "path": str(db_path)}
|
|
35
|
+
except Exception as e:
|
|
36
|
+
output["storage_stats"] = {"status": "error", "error": str(e)}
|
|
37
|
+
else:
|
|
38
|
+
output["config"] = {"status": "not_found", "hint": "Run 'cfa init' to create a new project"}
|
|
39
|
+
|
|
40
|
+
if args.format == "json":
|
|
41
|
+
print(format_json(output))
|
|
42
|
+
else:
|
|
43
|
+
if config is None:
|
|
44
|
+
print("CFA Status: no config found")
|
|
45
|
+
print(" Run 'cfa init' to create a new CFA project.")
|
|
46
|
+
return 0
|
|
47
|
+
|
|
48
|
+
print("CFA Status")
|
|
49
|
+
print(f" config: found (v{config.version})")
|
|
50
|
+
print(f" storage: {config.storage.backend} @ {config.storage.path}")
|
|
51
|
+
print(f" retention: {config.storage.retention_days} days")
|
|
52
|
+
print(f" catalog: {config.defaults.catalog}")
|
|
53
|
+
print(f" policy: {config.defaults.policy_bundle}")
|
|
54
|
+
|
|
55
|
+
stats = output.get("storage_stats", {})
|
|
56
|
+
if stats.get("backend") == "sqlite":
|
|
57
|
+
print(f" file size: {stats.get('file_size_bytes', 0):,} bytes")
|
|
58
|
+
print(f" audit events: {stats.get('audit_events_count', 0)}")
|
|
59
|
+
print(f" executions: {stats.get('execution_records_count', 0)}")
|
|
60
|
+
print(f" skills: {stats.get('skill_records_count', 0)}")
|
|
61
|
+
if stats.get("newest_record"):
|
|
62
|
+
print(f" last record: {stats['newest_record'][:19]}")
|
|
63
|
+
if stats.get("oldest_record") and stats.get("oldest_record"):
|
|
64
|
+
from datetime import datetime, timedelta
|
|
65
|
+
try:
|
|
66
|
+
oldest = datetime.fromisoformat(stats["oldest_record"])
|
|
67
|
+
retention = timedelta(days=config.storage.retention_days)
|
|
68
|
+
overdue = oldest < datetime.now(UTC) - retention
|
|
69
|
+
if overdue:
|
|
70
|
+
print(f" ⚠ retention: records older than {config.storage.retention_days}d exist. Run 'cfa storage cleanup'")
|
|
71
|
+
except (ValueError, TypeError):
|
|
72
|
+
pass
|
|
73
|
+
elif stats.get("status") == "not_found":
|
|
74
|
+
print(" storage: not created yet (will be auto-created on first use)")
|
|
75
|
+
return 0
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""cfa taxonomy — behavior taxonomy operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def cmd_taxonomy_generate(args) -> int:
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
from cfa.behavior import BehaviorSpec, Systematizer
|
|
12
|
+
from cfa.cli.formatters import format_json
|
|
13
|
+
|
|
14
|
+
try: spec = BehaviorSpec.from_yaml(args.spec)
|
|
15
|
+
except Exception as e: print(f"Error loading spec: {e}", file=sys.stderr); return 1
|
|
16
|
+
|
|
17
|
+
taxonomy, rules = Systematizer().systematize(spec)
|
|
18
|
+
output = {"taxonomy": taxonomy.to_dict(), "rules": [{"name": r.name, "action": r.action.value, "fault_code": r.fault_code, "severity": r.severity.value, "message": r.message, "remediation": list(r.remediation)} for r in rules]}
|
|
19
|
+
out_text = format_json(output)
|
|
20
|
+
if args.output: Path(args.output).write_text(out_text, encoding="utf-8"); print(f"Taxonomy saved to {args.output}"); print(f" Categories: {taxonomy.category_count}"); print(f" Rules: {len(rules)}")
|
|
21
|
+
else: print(out_text)
|
|
22
|
+
return 0
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def cmd_taxonomy_test_intents(args) -> int:
|
|
26
|
+
import sys
|
|
27
|
+
|
|
28
|
+
from cfa.behavior import BehaviorSpec, Systematizer
|
|
29
|
+
|
|
30
|
+
try: spec = BehaviorSpec.from_yaml(args.spec)
|
|
31
|
+
except Exception as e: print(f"Error loading spec: {e}", file=sys.stderr); return 1
|
|
32
|
+
|
|
33
|
+
intents = Systematizer().generate_test_intents(spec, count=args.count)
|
|
34
|
+
out_text = "\n".join(intents)
|
|
35
|
+
if args.output: Path(args.output).write_text(out_text, encoding="utf-8"); print(f"{len(intents)} test intents saved to {args.output}")
|
|
36
|
+
else:
|
|
37
|
+
for intent in intents: print(intent)
|
|
38
|
+
return 0
|
|
File without changes
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""cfa report — generate governance reports from real data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _read_audit_for_report(audit_file: str | None) -> tuple[list[Any], int, int, int]:
|
|
10
|
+
from cfa.audit.trail import AuditTrail, JsonLinesAuditStorage
|
|
11
|
+
if not audit_file:
|
|
12
|
+
return ([], 0, 0, 0)
|
|
13
|
+
storage = JsonLinesAuditStorage(audit_file)
|
|
14
|
+
trail = AuditTrail(storage=storage)
|
|
15
|
+
events = trail.get_all_events()
|
|
16
|
+
approved = sum(1 for e in events if e.outcome == "approve")
|
|
17
|
+
replanned = sum(1 for e in events if e.outcome == "replan")
|
|
18
|
+
blocked = sum(1 for e in events if e.outcome == "block")
|
|
19
|
+
return (events, approved, replanned, blocked)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def cmd_report_execution(args) -> int:
|
|
23
|
+
from cfa.core.kernel import KernelOrchestrator
|
|
24
|
+
from cfa.reporting import generate_report
|
|
25
|
+
|
|
26
|
+
kernel = KernelOrchestrator()
|
|
27
|
+
result = kernel.process(args.intent)
|
|
28
|
+
|
|
29
|
+
faults: list[dict[str, Any]] = []
|
|
30
|
+
if result.policy_result:
|
|
31
|
+
for f in result.policy_result.faults:
|
|
32
|
+
faults.append({"code": f.code, "severity": f.severity.value, "family": f.family.value, "message": f.message, "remediation": list(f.remediation)})
|
|
33
|
+
|
|
34
|
+
sandbox = None
|
|
35
|
+
if result.sandbox_result:
|
|
36
|
+
m = result.sandbox_result.aggregate_metrics
|
|
37
|
+
sandbox = {"rows_output": m.rows_output, "shuffle_mb": m.shuffle_mb, "duration_seconds": m.duration_seconds, "cost_dbu": m.cost_dbu}
|
|
38
|
+
|
|
39
|
+
path = generate_report("execution", args.output, intent=args.intent, intent_id=result.intent_id, state=result.state.value, signature_hash=result.signature.signature_hash if result.signature else "", policy_bundle=args.policy_bundle, replan_count=len(result.replan_history), events=result.audit_events, faults=faults, sandbox_metrics=sandbox)
|
|
40
|
+
print(f"Execution report saved to {path}")
|
|
41
|
+
return 0
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def cmd_report_audit(args) -> int:
|
|
45
|
+
from cfa.audit.trail import AuditTrail
|
|
46
|
+
from cfa.reporting import generate_report
|
|
47
|
+
|
|
48
|
+
trail = AuditTrail()
|
|
49
|
+
events_raw = trail.get_events_for_intent(args.intent_id)
|
|
50
|
+
chain_ok = trail.verify_chain()
|
|
51
|
+
events = [{"timestamp": e.timestamp.isoformat() if hasattr(e, "timestamp") else "", "phase": e.phase if hasattr(e, "phase") else e.stage if hasattr(e, "stage") else "", "event_type": e.event_type, "outcome": e.outcome, "event_hash": getattr(e, "event_hash", "")} for e in events_raw]
|
|
52
|
+
path = generate_report("audit", args.output, intent_id=args.intent_id, events=events, chain_intact=chain_ok, policy_bundle=args.policy_bundle)
|
|
53
|
+
print(f"Audit report saved to {path}")
|
|
54
|
+
return 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def cmd_report_lifecycle(args) -> int:
|
|
58
|
+
from cfa.audit.trail import AuditTrail, JsonLinesAuditStorage
|
|
59
|
+
from cfa.reporting import generate_report
|
|
60
|
+
|
|
61
|
+
events, approved, replanned, blocked = _read_audit_for_report(args.audit_file)
|
|
62
|
+
skill_hashes: list[str] = []
|
|
63
|
+
if args.audit_file:
|
|
64
|
+
storage = JsonLinesAuditStorage(args.audit_file)
|
|
65
|
+
trail = AuditTrail(storage=storage)
|
|
66
|
+
seen: set[str] = set()
|
|
67
|
+
for e in trail.get_all_events():
|
|
68
|
+
sig_hash = e.details.get("signature_hash", "")
|
|
69
|
+
if sig_hash and sig_hash not in seen:
|
|
70
|
+
seen.add(sig_hash)
|
|
71
|
+
skill_hashes.append(sig_hash)
|
|
72
|
+
|
|
73
|
+
skills = [{"hash": h[:12], "ifo": 0.0, "ifs": 0.0, "ifg": 1.0, "idi": 1.0, "state": "candidate"} for h in skill_hashes[:20]]
|
|
74
|
+
days = args.period
|
|
75
|
+
dates = [f"Day -{days - i}" for i in range(0, days, max(1, days // 10))]
|
|
76
|
+
zeros = [0.0 for _ in dates]
|
|
77
|
+
ones = [1.0 for _ in dates]
|
|
78
|
+
|
|
79
|
+
path = generate_report("lifecycle", args.output, period_days=days, skills=skills, trend_dates=dates, ifo_vals=zeros, ifs_vals=zeros, idi_vals=ones, ifg_vals=ones, cost_dates=dates, cost_vals=zeros, decisions={"approved": approved, "replanned": replanned, "blocked": blocked})
|
|
80
|
+
print(f"Lifecycle dashboard saved to {path}")
|
|
81
|
+
if not args.audit_file: print("Note: No --audit-file provided. Report generated with zero values.", file=sys.stderr)
|
|
82
|
+
return 0
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def cmd_report_compliance(args) -> int:
|
|
86
|
+
from cfa.policy.engine import PolicyEngine
|
|
87
|
+
from cfa.reporting import generate_report
|
|
88
|
+
|
|
89
|
+
events, approved, replanned, blocked = _read_audit_for_report(args.audit_file)
|
|
90
|
+
engine = PolicyEngine(policy_bundle_version=args.policy_bundle)
|
|
91
|
+
pii_prevented = sum(1 for e in events if "PII" in str(e.details.get("faults", [])) and e.outcome == "block")
|
|
92
|
+
total = len(events)
|
|
93
|
+
|
|
94
|
+
path = generate_report("compliance", args.output, policy_bundle=args.policy_bundle, total_evaluations=total, approved=approved, replanned=replanned, blocked=blocked, rules=engine.describe_rules(), pii_incidents_prevented=pii_prevented, audit_events_total=total, chain_intact=True)
|
|
95
|
+
print(f"Compliance report saved to {path}")
|
|
96
|
+
if not args.audit_file: print("Note: No --audit-file provided. Report generated with zero values.", file=sys.stderr)
|
|
97
|
+
return 0
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def cmd_report_dashboard(args) -> int:
|
|
101
|
+
from cfa.reporting import generate_report
|
|
102
|
+
events, approved, replanned, blocked = _read_audit_for_report(args.audit_file)
|
|
103
|
+
days = args.period
|
|
104
|
+
dates = [f"Day -{days - i}" for i in range(0, days, max(1, days // 10))]
|
|
105
|
+
zeros = [0.0 for _ in dates]
|
|
106
|
+
path = generate_report("dashboard", args.output, period_days=days, skills=[], trend_dates=dates, ifo_vals=zeros, faults_summary={}, decisions={"approved": approved, "replanned": replanned, "blocked": blocked})
|
|
107
|
+
print(f"Dashboard saved to {path}")
|
|
108
|
+
if not args.audit_file: print("Note: No --audit-file provided. Report generated with zero values.", file=sys.stderr)
|
|
109
|
+
return 0
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""cfa serve — live metrics/health server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def cmd_serve(args) -> int:
|
|
7
|
+
import threading
|
|
8
|
+
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
9
|
+
|
|
10
|
+
from cfa.observability.metrics import get_metrics_text
|
|
11
|
+
|
|
12
|
+
port = args.port or 8765
|
|
13
|
+
|
|
14
|
+
if args.metrics_port:
|
|
15
|
+
class MetricsHandler(BaseHTTPRequestHandler):
|
|
16
|
+
def do_GET(self):
|
|
17
|
+
if self.path == "/metrics":
|
|
18
|
+
self.send_response(200)
|
|
19
|
+
self.send_header("Content-Type", "text/plain")
|
|
20
|
+
self.end_headers()
|
|
21
|
+
self.wfile.write(get_metrics_text().encode())
|
|
22
|
+
elif self.path == "/health":
|
|
23
|
+
self.send_response(200)
|
|
24
|
+
self.end_headers()
|
|
25
|
+
threading.Thread(target=lambda: HTTPServer(("", args.metrics_port), MetricsHandler).serve_forever(), daemon=True).start()
|
|
26
|
+
print(f"Metrics endpoint at http://localhost:{args.metrics_port}/metrics")
|
|
27
|
+
print(f"Health endpoint at http://localhost:{args.metrics_port}/health")
|
|
28
|
+
|
|
29
|
+
print(f"Serving at http://localhost:{port}/")
|
|
30
|
+
print("Note: Dashboard uses live metrics only. No synthetic data is generated.")
|
|
31
|
+
print("Use cfa report dashboard --audit-file <file> for an HTML dashboard report.")
|
|
32
|
+
|
|
33
|
+
class PingHandler(BaseHTTPRequestHandler):
|
|
34
|
+
def do_GET(self):
|
|
35
|
+
if self.path == "/health":
|
|
36
|
+
self.send_response(200); self.end_headers(); self.wfile.write(b"OK\n")
|
|
37
|
+
elif self.path == "/metrics":
|
|
38
|
+
self.send_response(200); self.send_header("Content-Type", "text/plain"); self.end_headers(); self.wfile.write(get_metrics_text().encode())
|
|
39
|
+
else:
|
|
40
|
+
self.send_response(200); self.send_header("Content-Type", "text/plain"); self.end_headers(); self.wfile.write(b"CFA v0.1.0 -- See /health and /metrics\n")
|
|
41
|
+
|
|
42
|
+
HTTPServer(("", port), PingHandler).serve_forever()
|
|
43
|
+
return 0
|
cfa/config.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""CFA Configuration — centralized settings for all CFA commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class StorageConfig:
|
|
13
|
+
backend: str = "sqlite" # sqlite | jsonl
|
|
14
|
+
path: str = "cfa.db"
|
|
15
|
+
retention_days: int = 90
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class DefaultsConfig:
|
|
20
|
+
catalog: str = "catalog.yaml"
|
|
21
|
+
policy_bundle: str = "policies/prod-v1.yaml"
|
|
22
|
+
backend: str = "pyspark"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class CfaConfig:
|
|
27
|
+
version: str = "1.0"
|
|
28
|
+
storage: StorageConfig = field(default_factory=StorageConfig)
|
|
29
|
+
defaults: DefaultsConfig = field(default_factory=DefaultsConfig)
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def from_yaml(cls, path: str | Path) -> CfaConfig:
|
|
33
|
+
raw = Path(path).read_text(encoding="utf-8")
|
|
34
|
+
try:
|
|
35
|
+
import yaml
|
|
36
|
+
data = yaml.safe_load(raw)
|
|
37
|
+
except ImportError:
|
|
38
|
+
raise ImportError("PyYAML required for YAML config. Install: pip install pyyaml")
|
|
39
|
+
return cls._from_dict(data or {}, str(path))
|
|
40
|
+
|
|
41
|
+
@classmethod
|
|
42
|
+
def from_json(cls, path: str | Path) -> CfaConfig:
|
|
43
|
+
raw = Path(path).read_text(encoding="utf-8")
|
|
44
|
+
data = json.loads(raw)
|
|
45
|
+
return cls._from_dict(data, str(path))
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def discover(cls, start: str | Path | None = None) -> CfaConfig | None:
|
|
49
|
+
current = Path(start) if start else Path.cwd()
|
|
50
|
+
for candidate in [current / "cfa.yaml", current / "cfa.yml", current / ".cfa" / "config.yaml"]:
|
|
51
|
+
if candidate.exists():
|
|
52
|
+
return cls.from_yaml(candidate)
|
|
53
|
+
if (current / "cfa.json").exists():
|
|
54
|
+
return cls.from_json(current / "cfa.json")
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def _from_dict(cls, data: dict[str, Any], source: str = "") -> CfaConfig:
|
|
59
|
+
storage_data = data.get("storage", {})
|
|
60
|
+
storage = StorageConfig(
|
|
61
|
+
backend=storage_data.get("backend", "sqlite"),
|
|
62
|
+
path=storage_data.get("path", "cfa.db"),
|
|
63
|
+
retention_days=storage_data.get("retention_days", 90),
|
|
64
|
+
)
|
|
65
|
+
defaults_data = data.get("defaults", {})
|
|
66
|
+
defaults = DefaultsConfig(
|
|
67
|
+
catalog=defaults_data.get("catalog", "catalog.yaml"),
|
|
68
|
+
policy_bundle=defaults_data.get("policy_bundle", "policies/prod-v1.yaml"),
|
|
69
|
+
backend=defaults_data.get("backend", "pyspark"),
|
|
70
|
+
)
|
|
71
|
+
return cls(version=data.get("version", "1.0"), storage=storage, defaults=defaults)
|
|
72
|
+
|
|
73
|
+
def to_yaml(self) -> str:
|
|
74
|
+
lines = [
|
|
75
|
+
"# CFA Configuration",
|
|
76
|
+
f"version: \"{self.version}\"",
|
|
77
|
+
"",
|
|
78
|
+
"storage:",
|
|
79
|
+
f" backend: {self.storage.backend}",
|
|
80
|
+
f" path: {self.storage.path}",
|
|
81
|
+
f" retention_days: {self.storage.retention_days}",
|
|
82
|
+
"",
|
|
83
|
+
"defaults:",
|
|
84
|
+
f" catalog: {self.defaults.catalog}",
|
|
85
|
+
f" policy_bundle: {self.defaults.policy_bundle}",
|
|
86
|
+
f" backend: {self.defaults.backend}",
|
|
87
|
+
]
|
|
88
|
+
return "\n".join(lines) + "\n"
|
|
89
|
+
|
|
90
|
+
def to_dict(self) -> dict[str, Any]:
|
|
91
|
+
return {
|
|
92
|
+
"version": self.version,
|
|
93
|
+
"storage": {
|
|
94
|
+
"backend": self.storage.backend,
|
|
95
|
+
"path": self.storage.path,
|
|
96
|
+
"retention_days": self.storage.retention_days,
|
|
97
|
+
},
|
|
98
|
+
"defaults": {
|
|
99
|
+
"catalog": self.defaults.catalog,
|
|
100
|
+
"policy_bundle": self.defaults.policy_bundle,
|
|
101
|
+
"backend": self.defaults.backend,
|
|
102
|
+
},
|
|
103
|
+
}
|
cfa/core/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""CFA Core — governance engine."""
|
|
2
|
+
from cfa._lazy import LazyLoader
|
|
3
|
+
|
|
4
|
+
__getattr__ = LazyLoader({
|
|
5
|
+
"KernelOrchestrator": ("cfa.core.kernel", "KernelOrchestrator"),
|
|
6
|
+
"KernelConfig": ("cfa.core.kernel", "KernelConfig"),
|
|
7
|
+
"PipelinePhase": ("cfa.core.kernel", "PipelinePhase"),
|
|
8
|
+
"ExecutionPlanner": ("cfa.core.planner", "ExecutionPlanner"),
|
|
9
|
+
"ExecutionPlan": ("cfa.core.planner", "ExecutionPlan"),
|
|
10
|
+
"ExecutionStep": ("cfa.core.planner", "ExecutionStep"),
|
|
11
|
+
"StepType": ("cfa.core.planner", "StepType"),
|
|
12
|
+
"WriteMode": ("cfa.core.planner", "WriteMode"),
|
|
13
|
+
"ConsistencyUnit": ("cfa.core.planner", "ConsistencyUnit"),
|
|
14
|
+
"CodeGenBackend": ("cfa.core.codegen", "CodeGenBackend"),
|
|
15
|
+
"GeneratedCode": ("cfa.core.codegen", "GeneratedCode"),
|
|
16
|
+
"build_condition": ("cfa.core.conditions", "build_condition"),
|
|
17
|
+
"list_conditions": ("cfa.core.conditions", "list_conditions"),
|
|
18
|
+
"register_condition": ("cfa.core.conditions", "register_condition"),
|
|
19
|
+
})
|