npm - sage-governance - Versions diffs - 1.0.0 - Mend

sage-governance 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/AGENTS.MD +481 -0
package/LICENSE +21 -0
package/README.md +319 -0
package/bin/sage.js +55 -0
package/claude.json +16 -0
package/codex.json +22 -0
package/cursor.json +27 -0
package/docs/architecture.md +38 -0
package/opencode.json +24 -0
package/package.json +58 -0
package/requirements.txt +7 -0
package/rules/general/EU_AI_Act_Annex_III.md +29 -0
package/rules/general/OECD_Principles.md +20 -0
package/rules/general/UNESCO_AI_Ethics.md +237 -0
package/rules/general/UN_Human_Rights.md +183 -0
package/rules/index.json +145 -0
package/sage/mcp_server.py +459 -0
package/sage/report_gen.py +408 -0
package/sage/sage_agent.py +710 -0
package/sage/security_agent.py +455 -0
package/sage/startup.py +311 -0

package/sage/report_gen.py ADDED Viewed

@@ -0,0 +1,408 @@
+"""
+report_gen.py — SAGE Governance Report Generator
+═════════════════════════════════════════════════
+Reads audit-trail/decisions.jsonl and produces a human-readable
+model card in Markdown, aligned with:
+  • Google Model Cards for Model Reporting (Mitchell et al., 2019)
+  • EU AI Act Article 13 (transparency obligations)
+  • UNESCO AI Ethics Recommendation (2021)
+IMPORTANT LIMITATION (document this honestly)
+─────────────────────────────────────────────
+The SHA-256 hash chain in decisions.jsonl proves sequential integrity
+WITHIN a session — entries were not reordered or modified in place.
+It does NOT prevent local file deletion. This is a session-level
+tamper-detection mechanism, not an immutable external audit log.
+Author: SAGE Team / Team SAGE (Hackathon)
+License: MIT
+"""
+from __future__ import annotations
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Optional
+from startup import AUDIT_FILE, REPORTS_DIR
+# ══════════════════════════════════════════════════════════════════════════════
+# AUDIT READER
+# ══════════════════════════════════════════════════════════════════════════════
+def load_audit_entries(session_id: Optional[str] = None) -> list[dict]:
+    """Load all audit entries, optionally filtered by session_id."""
+    if not AUDIT_FILE.exists():
+        return []
+    entries: list[dict] = []
+    for raw_line in AUDIT_FILE.read_text(encoding="utf-8").splitlines():
+        line = raw_line.strip()
+        if not line:
+            continue
+        try:
+            entry = json.loads(line)
+            if session_id is None or entry.get("session_id") == session_id:
+                entries.append(entry)
+        except json.JSONDecodeError:
+            continue
+    return entries
+# ══════════════════════════════════════════════════════════════════════════════
+# REPORT BUILDERS
+# ══════════════════════════════════════════════════════════════════════════════
+def generate_model_card(session_id: Optional[str] = None) -> str:
+    """
+    Produces a full Markdown model card from the audit trail.
+    Sections follow Mitchell et al. (2019) structure.
+    """
+    entries = load_audit_entries(session_id)
+    now     = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
+    if not entries:
+        return (
+            f"# SAGE Governance Report\n\n"
+            f"_No audit entries found for session `{session_id or 'all'}`. "
+            f"Generated: {now}_\n"
+        )
+    # ── Aggregate ────────────────────────────────────────────────────────────
+    domains      = list({e.get("domain", "unknown") for e in entries})
+    risk_levels  = [e.get("risk_level", "UNKNOWN") for e in entries if e.get("risk_level")]
+    _severity_rank = {"LOW": 0, "MEDIUM": 1, "HIGH": 2, "CRITICAL": 3, "UNKNOWN": -1}
+    highest_risk = (
+        max(risk_levels, key=lambda r: _severity_rank.get(r, -1))
+        if risk_levels else "N/A"
+    )
+    eval_entries       = [e for e in entries if e.get("event_type") == "sage_evaluate"]
+    intercept_entries  = [e for e in entries if e.get("event_type") == "file_write_intercepted"]
+    decision_entries   = [e for e in entries if e.get("developer_choice")]
+    auto_approved      = sum(1 for e in entries if e.get("decision") == "auto_approved")
+    model_trained_entries = [e for e in entries if e.get("event_type") == "model_trained"]
+    all_flags      = [f for e in entries for f in e.get("compliance_flags", [])]
+    all_protected  = list({a for e in entries for a in e.get("protected_attributes", [])})
+    all_proxy      = list({a for e in entries for a in e.get("proxy_attributes", [])})
+    all_regs       = list({r for e in entries for r in e.get("regulations", [])})
+    all_udhr       = list({a for e in entries for a in e.get("udhr_articles", [])})
+    # ── Render ───────────────────────────────────────────────────────────────
+    r = f"""# SAGE Governance Report — Model Card
+> **Generated by** SAGE (Supervisory Agentic Governance Engine)
+> **Report date:** {now}
+> **Session:** `{session_id or 'all'}`
+> **Audit entries:** {len(entries)}
+---
+## 1. Model / System Overview
+| Field | Value |
+|-------|-------|
+| Detected domain(s) | {', '.join(f'`{d}`' for d in domains)} |
+| Highest risk level | **{highest_risk}** |
+| Total audit entries | {len(entries)} |
+| Prompts evaluated | {len(eval_entries)} |
+| File writes intercepted | {len(intercept_entries)} |
+| Auto-approved (low risk) | {auto_approved} |
+| Developer decisions recorded | {len(decision_entries)} |
+---
+## 2. Intended Use
+"""
+    for entry in eval_entries:
+        r += f"### Prompt\n> {entry.get('intent_summary', 'N/A')[:250]}\n\n"
+        r += f"| Field | Value |\n|-------|-------|\n"
+        r += f"| Domain | `{entry.get('domain', 'N/A')}` |\n"
+        r += f"| Risk level | **{entry.get('risk_level', 'N/A')}** |\n"
+        r += f"| EU AI Act | `{entry.get('eu_ai_act_annex') or 'Not classified as high-risk'}` |\n"
+        r += f"| Timestamp | {entry.get('timestamp', 'N/A')} |\n"
+        r += f"| Audit hash | `{entry.get('entry_hash', 'N/A')[:16]}...` |\n\n"
+    r += """---
+## 3. Regulatory Classification
+"""
+    if all_regs:
+        for reg in all_regs:
+            r += f"- {reg}\n"
+    else:
+        r += "_No high-risk regulatory classification triggered in this session._\n"
+    if all_udhr:
+        r += "\n**UDHR Articles implicated:**\n"
+        for art in all_udhr:
+            r += f"- {art}\n"
+    r += """
+---
+## 4. Fairness Analysis
+### 4.1 Protected Attributes
+"""
+    if all_protected:
+        r += "_The following protected attributes were detected in developer prompts or code:_\n\n"
+        for attr in all_protected:
+            r += f"- `{attr}`\n"
+    else:
+        r += "_No protected attributes detected directly in this session._\n"
+    r += "\n### 4.2 Proxy Attributes\n\n"
+    if all_proxy:
+        r += "_The following features are documented proxies for protected characteristics:_\n\n"
+        for attr in all_proxy:
+            r += f"- `{attr}`\n"
+    else:
+        r += "_No proxy attributes detected in this session._\n"
+    r += "\n### 4.3 Developer Fairness Decisions\n\n"
+    if decision_entries:
+        for entry in decision_entries:
+            r += f"**Choice:** `{entry.get('developer_choice', 'N/A')}`\n\n"
+            r += f"- Event type: `{entry.get('event_type', 'N/A')}`\n"
+            r += f"- Reasoning: {entry.get('choice_reasoning') or '_Not provided_'}\n"
+            r += f"- Timestamp: {entry.get('timestamp', 'N/A')}\n"
+            r += f"- Audit hash: `{entry.get('entry_hash', 'N/A')[:16]}...`\n\n"
+    else:
+        r += "_No fairness option selections recorded in this session._\n"
+    r += "\n### 4.4 Fairness Impossibility Note\n\n"
+    any_impossible = any(e.get("fairness_impossibility") for e in entries)
+    if any_impossible:
+        r += (
+            "> ⚠️ **Fairness Impossibility Theorem applies to this session.** "
+            "When base rates differ across groups, Demographic Parity, Equalized Odds, "
+            "and Predictive Parity cannot all be satisfied simultaneously. "
+            "The developer's fairness choice above reflects a values judgment, "
+            "not a technical oversight. "
+            "_(Barocas, Hardt & Narayanan, 2023; Chouldechova, 2016)_\n"
+        )
+    else:
+        r += "_Fairness impossibility theorem not triggered in this session._\n"
+    r += "\n"
+    # Section 5: Training Data
+    r += "---\n\n## 5. Training Data\n\n"
+    if model_trained_entries:
+        for idx, entry in enumerate(model_trained_entries, 1):
+            dataset = entry.get("dataset_info", {})
+            r += f"### Model training instance {idx}\n"
+            r += "| Field | Value |\n|---|---|\n"
+            r += f"| Dataset Name | `{dataset.get('name', 'N/A')}` |\n"
+            r += f"| Sample Count | {dataset.get('samples', 'N/A')} |\n"
+            r += f"| Features | {', '.join(f'`{f}`' for f in dataset.get('features', [])) or 'N/A'} |\n"
+            r += f"| Sensitive features | {', '.join(f'`{f}`' for f in dataset.get('sensitive_features', [])) or 'None'} |\n"
+            r += f"| Timestamp | {entry.get('timestamp', 'N/A')} |\n\n"
+    else:
+        r += "_No model training or dataset profiling recorded in this session._\n\n"
+    # Section 6: Performance & Test Metrics
+    r += "---\n\n## 6. Performance & Test Metrics\n\n"
+    if model_trained_entries:
+        for idx, entry in enumerate(model_trained_entries, 1):
+            metrics = entry.get("metrics", {})
+            r += f"### Model evaluation metrics {idx}\n"
+            r += "| Metric | Value |\n|---|---|\n"
+            for k, v in metrics.items():
+                if isinstance(v, float):
+                    r += f"| {k} | {v:.4f} |\n"
+                else:
+                    r += f"| {k} | {v} |\n"
+            r += "\n"
+    else:
+        r += "_No model performance metrics or evaluation test results recorded in this session._\n\n"
+    # Section 7: Child Safety & Safeguarding
+    r += "---\n\n## 7. Child Safety & Safeguarding\n\n"
+    safeguarding_choices = [
+        e for e in entries
+        if e.get("event_type") == "fairness_option_selected"
+        and (
+            e.get("developer_choice") in ("human_in_loop_escalation", "metadata_only_retention", "recall_first_detection")
+            or "human" in str(e.get("developer_choice")).lower()
+            or "metadata" in str(e.get("developer_choice")).lower()
+            or "recall" in str(e.get("developer_choice")).lower()
+        )
+    ]
+    safeguarding_violations = [
+        e for e in entries
+        if e.get("event_type") == "file_write_intercepted"
+        and e.get("highest_risk_finding", {}).get("category") == "SAFEGUARDING_VIOLATION"
+    ]
+    r += "### 7.1 Safeguarding Decisions\n\n"
+    if safeguarding_choices:
+        for choice in safeguarding_choices:
+            r += f"- **Decision:** `{choice.get('developer_choice')}`\n"
+            r += f"  - Reasoning: {choice.get('choice_reasoning') or '_Not provided_'}\n"
+            r += f"  - Timestamp: {choice.get('timestamp')}\n"
+    else:
+        r += "_No child safeguarding decisions recorded in this session._\n\n"
+    r += "### 7.2 Safeguarding Scanner Warnings & Violations\n\n"
+    if safeguarding_violations:
+        r += "| File | Violation Details | Action taken | Timestamp |\n|---|---|---|---|\n"
+        for v in safeguarding_violations:
+            finding = v.get("highest_risk_finding", {})
+            r += f"| `{v.get('filepath')}` | {finding.get('description')} (Severity: {finding.get('severity')}) | `{v.get('decision')}` | {v.get('timestamp')} |\n"
+        r += "\n"
+    else:
+        r += "_No safeguarding violations or blocked auto-escalation/chat logging patterns encountered._\n\n"
+    r += """---
+## 8. Security Findings
+"""
+    if intercept_entries:
+        for entry in intercept_entries:
+            r += f"### `{entry.get('filepath', 'Code scan')}` — {entry.get('timestamp', '')}\n\n"
+            r += f"- **SAGE decision:** `{entry.get('decision', 'pending')}`\n"
+            r += f"- **Total findings:** {entry.get('total_findings', 0)}\n"
+            finding = entry.get("highest_risk_finding")
+            if finding:
+                r += f"- **Highest severity:** `{finding.get('severity', 'N/A')}`\n"
+                r += f"- **Category:** {finding.get('category', 'N/A')}\n"
+                r += f"- **Description:** {finding.get('description', 'N/A')}\n"
+            dev_decision = entry.get("developer_decision")
+            if dev_decision:
+                r += f"- **Developer action:** `{dev_decision}`\n"
+            r += "\n"
+    else:
+        r += "_No file write interceptions recorded in this session._\n"
+    r += """
+---
+## 9. Compliance Flags
+"""
+    if all_flags:
+        unique_flags = list(dict.fromkeys(all_flags))  # preserve order, deduplicate
+        for flag in unique_flags:
+            r += f"⚠️ {flag}\n\n"
+    else:
+        r += "_No compliance flags raised in this session._\n"
+    r += f"""
+---
+## 10. Audit Trail Integrity
+| Property | Value |
+|----------|-------|
+| Audit file | `audit-trail/decisions.jsonl` |
+| Entry count | {len(entries)} |
+| Hash algorithm | SHA-256 |
+| Chain type | Sequential (prev_hash → entry_hash) |
+| Storage | Local filesystem |
+> **Limitation (be honest):** The SHA-256 chain proves entries were not modified
+> or reordered within the session. It does NOT prevent local file deletion.
+> For an immutable external audit log, forward entries to a remote append-only
+> service (e.g. AWS CloudTrail, Azure Monitor, or a Git commit per entry).
+---
+## 11. Human Oversight
+"""
+    hitl = [e for e in entries if e.get("requires_human_review")]
+    if hitl:
+        r += f"**{len(hitl)} item(s) flagged for mandatory human review:**\n\n"
+        for entry in hitl:
+            r += (
+                f"- {entry.get('intent_summary', 'N/A')[:120]} "
+                f"_(Risk: {entry.get('risk_level')})_\n"
+            )
+    else:
+        r += "_No items required human review escalation in this session._\n"
+    r += """
+---
+## 12. Known Limitations
+- **Policy retrieval is keyword-based** — obfuscated variable names or indirect
+  descriptions of protected categories may evade detection.
+- **SHA-256 chain is session-scoped** — not an immutable external audit record.
+- **Fairness impossibility** — no single configuration satisfies all fairness
+  metrics simultaneously when base rates differ across groups.
+- **LLM reasoning is probabilistic** — the sage_reasoning field is enriched by
+  an LLM and may vary across runs. Deterministic fields (risk_level, protected_attributes,
+  eu_ai_act_annex) are rule-based and stable.
+---
+## 13. References
+- EU AI Act — Regulation (EU) 2024/1689
+- UNESCO Recommendation on the Ethics of AI (2021)
+- OECD AI Principles (2019, updated 2024)
+- Universal Declaration of Human Rights (1948)
+- UN Convention on the Rights of the Child (1989)
+- Mitchell et al. (2019). "Model Cards for Model Reporting." *FAccT 2019.*
+- Chouldechova, A. (2016). "Fair Prediction with Disparate Impact."
+- Barocas, Hardt & Narayanan (2023). *Fairness and Machine Learning.*
+- ProPublica (2016). "Machine Bias." COMPAS analysis.
+- Ali et al. (2019). "Discrimination through Optimization." Facebook ad delivery audit.
+---
+_Report generated by SAGE · Beunec Technologies, Inc. · MIT License_
+_Open source: github.com/[your-org]/sage-governance_
+"""
+    return r
+def generate_terminal_summary(session_id: Optional[str] = None) -> str:
+    """Short summary for CLI display after a session completes."""
+    entries = load_audit_entries(session_id)
+    if not entries:
+        return "No audit events recorded in this session."
+    risk_counts: dict[str, int] = {}
+    for e in entries:
+        r = e.get("risk_level")
+        if r:
+            risk_counts[r] = risk_counts.get(r, 0) + 1
+    choices  = [e for e in entries if e.get("developer_choice")]
+    intercepts = [e for e in entries if e.get("event_type") == "file_write_intercepted"]
+    lines = [
+        "┌─────────────────────────────────────────────┐",
+        "│         SAGE Session Summary                │",
+        "└─────────────────────────────────────────────┘",
+        f"  Total audit events  : {len(entries)}",
+    ]
+    order = ["CRITICAL", "HIGH", "MEDIUM", "LOW"]
+    for level in order:
+        if level in risk_counts:
+            lines.append(f"  {level:<10} risk(s) : {risk_counts[level]}")
+    lines.append(f"  File writes blocked : {len(intercepts)}")
+    lines.append(f"  Developer choices   : {len(choices)}")
+    lines.append(f"  Audit file          : audit-trail/decisions.jsonl")
+    lines.append(f"  Full report         : sage report  (generates governance_report_*.md)")
+    return "\n".join(lines)
+def save_report(content: str, session_id: str) -> Path:
+    """Write report to reports/ directory and return the path."""
+    filename = f"governance_report_{session_id}.md"
+    output_path = REPORTS_DIR / filename
+    output_path.write_text(content, encoding="utf-8")
+    return output_path