sage-governance 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.MD +481 -0
- package/LICENSE +21 -0
- package/README.md +319 -0
- package/bin/sage.js +55 -0
- package/claude.json +16 -0
- package/codex.json +22 -0
- package/cursor.json +27 -0
- package/docs/architecture.md +38 -0
- package/opencode.json +24 -0
- package/package.json +58 -0
- package/requirements.txt +7 -0
- package/rules/general/EU_AI_Act_Annex_III.md +29 -0
- package/rules/general/OECD_Principles.md +20 -0
- package/rules/general/UNESCO_AI_Ethics.md +237 -0
- package/rules/general/UN_Human_Rights.md +183 -0
- package/rules/index.json +145 -0
- package/sage/mcp_server.py +459 -0
- package/sage/report_gen.py +408 -0
- package/sage/sage_agent.py +710 -0
- package/sage/security_agent.py +455 -0
- package/sage/startup.py +311 -0
package/sage/startup.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""
|
|
2
|
+
startup.py — SAGE Pre-Loader
|
|
3
|
+
════════════════════════════
|
|
4
|
+
Imported ONCE when mcp_server.py starts. Loads every heavy dependency,
|
|
5
|
+
policy document, and lookup table into module-level globals.
|
|
6
|
+
|
|
7
|
+
WHY THIS FILE EXISTS
|
|
8
|
+
────────────────────
|
|
9
|
+
MCP stdio servers run as a persistent Python process — they are NOT
|
|
10
|
+
respawned per tool call. However, if heavy imports live inside tool
|
|
11
|
+
functions they still add latency on the FIRST call of each session.
|
|
12
|
+
Preloading here keeps every tool call fast regardless of import order.
|
|
13
|
+
|
|
14
|
+
Author: SAGE Team / Team SAGE (Hackathon)
|
|
15
|
+
License: MIT
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import pathlib
|
|
23
|
+
import sys
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
# ── Resolve project root regardless of CWD ────────────────────────────────────
|
|
28
|
+
_THIS_FILE = pathlib.Path(__file__).resolve()
|
|
29
|
+
PROJECT_ROOT = _THIS_FILE.parent.parent
|
|
30
|
+
|
|
31
|
+
RULES_DIR = PROJECT_ROOT / "rules"
|
|
32
|
+
AUDIT_FILE = PROJECT_ROOT / "audit-trail" / "decisions.jsonl"
|
|
33
|
+
LOGS_FILE = PROJECT_ROOT / "LOGS.md"
|
|
34
|
+
LOCAL_MEMORY = PROJECT_ROOT / "local_memory.md"
|
|
35
|
+
REPORTS_DIR = PROJECT_ROOT / "reports"
|
|
36
|
+
|
|
37
|
+
# ── Ensure required dirs & files exist ───────────────────────────────────────
|
|
38
|
+
for _p in (AUDIT_FILE.parent, REPORTS_DIR):
|
|
39
|
+
_p.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
for _f in (AUDIT_FILE, LOGS_FILE, LOCAL_MEMORY):
|
|
41
|
+
_f.touch(exist_ok=True)
|
|
42
|
+
|
|
43
|
+
import hashlib
|
|
44
|
+
|
|
45
|
+
def write_audit_entry(entry: dict) -> str:
|
|
46
|
+
"""
|
|
47
|
+
Append one JSON line to decisions.jsonl with SHA-256 chain link.
|
|
48
|
+
Reads the last line to extract the last entry's hash to serve as prev_hash,
|
|
49
|
+
ensuring chain integrity across separate runs/processes.
|
|
50
|
+
"""
|
|
51
|
+
prev_hash = ""
|
|
52
|
+
session_id = entry.get("session_id")
|
|
53
|
+
|
|
54
|
+
if AUDIT_FILE.exists() and AUDIT_FILE.stat().st_size > 1:
|
|
55
|
+
try:
|
|
56
|
+
with open(AUDIT_FILE, "r", encoding="utf-8") as fh:
|
|
57
|
+
lines = fh.readlines()
|
|
58
|
+
if lines:
|
|
59
|
+
last_line = lines[-1].strip()
|
|
60
|
+
if last_line:
|
|
61
|
+
last_entry = json.loads(last_line)
|
|
62
|
+
prev_hash = last_entry.get("entry_hash", "")
|
|
63
|
+
if not session_id:
|
|
64
|
+
session_id = last_entry.get("session_id")
|
|
65
|
+
except Exception:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
if not session_id:
|
|
69
|
+
session_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
70
|
+
|
|
71
|
+
entry["session_id"] = session_id
|
|
72
|
+
entry["timestamp"] = datetime.now(timezone.utc).isoformat()
|
|
73
|
+
|
|
74
|
+
payload = json.dumps(entry, sort_keys=True, default=str) + prev_hash
|
|
75
|
+
current_hash = hashlib.sha256(payload.encode()).hexdigest()
|
|
76
|
+
|
|
77
|
+
entry["entry_hash"] = current_hash
|
|
78
|
+
entry["prev_hash"] = prev_hash
|
|
79
|
+
|
|
80
|
+
with open(AUDIT_FILE, "a", encoding="utf-8") as fh:
|
|
81
|
+
fh.write(json.dumps(entry, default=str) + "\n")
|
|
82
|
+
|
|
83
|
+
return current_hash
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
87
|
+
# POLICY DOCUMENTS
|
|
88
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
89
|
+
|
|
90
|
+
POLICY_DOCS: dict[str, str] = {}
|
|
91
|
+
_general_dir = RULES_DIR / "general"
|
|
92
|
+
if _general_dir.exists():
|
|
93
|
+
for _policy_file in _general_dir.glob("*.md"):
|
|
94
|
+
POLICY_DOCS[_policy_file.stem] = _policy_file.read_text(encoding="utf-8")
|
|
95
|
+
|
|
96
|
+
POLICY_INDEX: dict[str, Any] = {}
|
|
97
|
+
_index_path = RULES_DIR / "index.json"
|
|
98
|
+
if _index_path.exists():
|
|
99
|
+
try:
|
|
100
|
+
POLICY_INDEX = json.loads(_index_path.read_text(encoding="utf-8"))
|
|
101
|
+
except json.JSONDecodeError:
|
|
102
|
+
POLICY_INDEX = {}
|
|
103
|
+
|
|
104
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
105
|
+
# LLM CLIENT (OpenAI — used only for human-readable reasoning enrichment)
|
|
106
|
+
# Override model with SAGE_LLM_MODEL env var. Defaults to gpt-4o-mini.
|
|
107
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
108
|
+
|
|
109
|
+
LLM_AVAILABLE = False
|
|
110
|
+
LLM_CLIENT: Any = None
|
|
111
|
+
LLM_MODEL = os.environ.get("SAGE_LLM_MODEL", "gpt-4o-mini")
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
import openai as _openai
|
|
115
|
+
_api_key = os.environ.get("OPENAI_API_KEY", "")
|
|
116
|
+
if _api_key:
|
|
117
|
+
LLM_CLIENT = _openai.OpenAI(api_key=_api_key)
|
|
118
|
+
LLM_AVAILABLE = True
|
|
119
|
+
else:
|
|
120
|
+
print(
|
|
121
|
+
"[SAGE startup] WARNING: OPENAI_API_KEY not set. "
|
|
122
|
+
"Deterministic-only mode active (no LLM enrichment).",
|
|
123
|
+
file=sys.stderr,
|
|
124
|
+
)
|
|
125
|
+
except ImportError:
|
|
126
|
+
print(
|
|
127
|
+
"[SAGE startup] WARNING: 'openai' package not found. "
|
|
128
|
+
"Run: pip install openai",
|
|
129
|
+
file=sys.stderr,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
133
|
+
# OPTIONAL HEAVY DEPS (fail gracefully if not installed)
|
|
134
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
135
|
+
|
|
136
|
+
FAIRLEARN_AVAILABLE = False
|
|
137
|
+
try:
|
|
138
|
+
from fairlearn.metrics import ( # noqa: F401
|
|
139
|
+
demographic_parity_difference,
|
|
140
|
+
equalized_odds_difference,
|
|
141
|
+
MetricFrame,
|
|
142
|
+
)
|
|
143
|
+
FAIRLEARN_AVAILABLE = True
|
|
144
|
+
except ImportError:
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
DIFFPRIVLIB_AVAILABLE = False
|
|
148
|
+
try:
|
|
149
|
+
import diffprivlib # noqa: F401
|
|
150
|
+
DIFFPRIVLIB_AVAILABLE = True
|
|
151
|
+
except ImportError:
|
|
152
|
+
pass
|
|
153
|
+
|
|
154
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
155
|
+
# PROTECTED ATTRIBUTES (direct usage triggers P1 finding)
|
|
156
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
157
|
+
|
|
158
|
+
PROTECTED_ATTRIBUTES: list[str] = [
|
|
159
|
+
"race", "ethnicity", "color", "sex", "gender", "age",
|
|
160
|
+
"religion", "national_origin", "nationality", "disability",
|
|
161
|
+
"pregnancy", "marital_status", "sexual_orientation",
|
|
162
|
+
"gender_identity", "skin_color",
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
166
|
+
# PROXY ATTRIBUTE MAP
|
|
167
|
+
# Semantic — not grep. Maps protected characteristics → known proxy variables.
|
|
168
|
+
# Source: ProPublica COMPAS analysis, Ali et al. 2019, Lambrecht & Tucker 2019.
|
|
169
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
170
|
+
|
|
171
|
+
PROXY_ATTRIBUTE_MAP: dict[str, list[str]] = {
|
|
172
|
+
"race": [
|
|
173
|
+
"zip_code", "zip code", "zipcode", "postal_code", "postal code",
|
|
174
|
+
"neighborhood", "census_tract", "school_district", "ward",
|
|
175
|
+
"surname", "last_name", "family_name", "name",
|
|
176
|
+
"bank_branch", "grocery_store_distance", "church_attendance",
|
|
177
|
+
"prior_arrests", "arrest_history",
|
|
178
|
+
],
|
|
179
|
+
"gender": [
|
|
180
|
+
"browsing_history", "page_likes", "purchase_history",
|
|
181
|
+
"maternity", "paternity", "childcare", "toy_preferences",
|
|
182
|
+
"cosmetics", "sports_interest", "car_type",
|
|
183
|
+
"clothing_category", "grooming_products",
|
|
184
|
+
],
|
|
185
|
+
"age": [
|
|
186
|
+
"graduation_year", "years_experience", "first_job_year",
|
|
187
|
+
"account_age", "profile_creation_date", "class_year",
|
|
188
|
+
],
|
|
189
|
+
"socioeconomic_status": [
|
|
190
|
+
"education_level", "school_name", "employment_gap",
|
|
191
|
+
"credit_history_length", "bank_type", "car_ownership",
|
|
192
|
+
"zip_code", "neighborhood", "income_bracket",
|
|
193
|
+
],
|
|
194
|
+
"criminal_history_as_proxy_for_race": [
|
|
195
|
+
"prior_arrests", "priors_count", "juv_fel_count",
|
|
196
|
+
"family_history", "neighborhood_crime_rate",
|
|
197
|
+
"zip_code", "school_district",
|
|
198
|
+
],
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
202
|
+
# EU AI ACT ANNEX III — HIGH-RISK CATEGORIES
|
|
203
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
204
|
+
|
|
205
|
+
EU_AI_ACT_ANNEX_III: dict[str, dict[str, Any]] = {
|
|
206
|
+
"criminal_justice": {
|
|
207
|
+
"annex": "Annex III.6.d",
|
|
208
|
+
"description": (
|
|
209
|
+
"AI systems for law enforcement assessing risk of offending "
|
|
210
|
+
"or re-offending (e.g. COMPAS, recidivism prediction)"
|
|
211
|
+
),
|
|
212
|
+
"keywords": [
|
|
213
|
+
"recidivism", "criminal", "parole", "sentencing", "compas",
|
|
214
|
+
"reoffend", "bail", "arrest", "two_year_recid", "risk score",
|
|
215
|
+
"reoffending", "criminal justice",
|
|
216
|
+
],
|
|
217
|
+
},
|
|
218
|
+
"employment": {
|
|
219
|
+
"annex": "Annex III.4.a",
|
|
220
|
+
"description": (
|
|
221
|
+
"AI systems for recruitment/selection, including targeted "
|
|
222
|
+
"job advertisements, CV filtering, candidate evaluation"
|
|
223
|
+
),
|
|
224
|
+
"keywords": [
|
|
225
|
+
"job", "recruitment", "hiring", "resume", "cv", "employment",
|
|
226
|
+
"advertisement", "candidate", "fairjob", "job ad", "click",
|
|
227
|
+
"click-through", "ctr",
|
|
228
|
+
],
|
|
229
|
+
},
|
|
230
|
+
"credit_scoring": {
|
|
231
|
+
"annex": "Annex III.5.b",
|
|
232
|
+
"description": (
|
|
233
|
+
"AI systems for creditworthiness evaluation or "
|
|
234
|
+
"credit score determination"
|
|
235
|
+
),
|
|
236
|
+
"keywords": [
|
|
237
|
+
"credit", "loan", "default", "creditworthiness", "mortgage",
|
|
238
|
+
"financial risk", "lending", "apple card", "credit card",
|
|
239
|
+
"credit limit", "credit score",
|
|
240
|
+
],
|
|
241
|
+
},
|
|
242
|
+
"education": {
|
|
243
|
+
"annex": "Annex III.3.a",
|
|
244
|
+
"description": (
|
|
245
|
+
"AI systems determining access to educational institutions "
|
|
246
|
+
"or evaluating students"
|
|
247
|
+
),
|
|
248
|
+
"keywords": [
|
|
249
|
+
"education", "school", "admission", "student", "grade",
|
|
250
|
+
"academic", "university", "college", "exam",
|
|
251
|
+
],
|
|
252
|
+
},
|
|
253
|
+
"essential_services": {
|
|
254
|
+
"annex": "Annex III.5.a",
|
|
255
|
+
"description": (
|
|
256
|
+
"AI systems for essential private and public services "
|
|
257
|
+
"(healthcare, insurance, housing, social security)"
|
|
258
|
+
),
|
|
259
|
+
"keywords": [
|
|
260
|
+
"healthcare", "insurance", "benefit", "welfare",
|
|
261
|
+
"housing", "social security", "medical",
|
|
262
|
+
],
|
|
263
|
+
},
|
|
264
|
+
"children_safety": {
|
|
265
|
+
"annex": "Annex III (context-dependent)",
|
|
266
|
+
"description": (
|
|
267
|
+
"AI systems impacting children's safety, privacy, or wellbeing; "
|
|
268
|
+
"UNICEF 10 Principles apply"
|
|
269
|
+
),
|
|
270
|
+
"keywords": [
|
|
271
|
+
"child", "minor", "children", "safeguarding", "grooming",
|
|
272
|
+
"bullying", "moderation", "self-harm", "abuse", "distress",
|
|
273
|
+
"escalation", "chat safety",
|
|
274
|
+
],
|
|
275
|
+
},
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
279
|
+
# UDHR ARTICLE MAP (domain → relevant UDHR articles)
|
|
280
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
281
|
+
|
|
282
|
+
UDHR_ARTICLE_MAP: dict[str, list[str]] = {
|
|
283
|
+
"criminal_justice": ["Article 7", "Article 10", "Article 11"],
|
|
284
|
+
"employment": ["Article 23"],
|
|
285
|
+
"credit_scoring": ["Article 22"],
|
|
286
|
+
"education": ["Article 26"],
|
|
287
|
+
"essential_services": ["Article 22", "Article 25"],
|
|
288
|
+
"children_safety": [
|
|
289
|
+
"UN CRC Article 3 (best interests)",
|
|
290
|
+
"UN CRC Article 12 (right to be heard)",
|
|
291
|
+
"UN CRC Article 16 (privacy)",
|
|
292
|
+
"UN CRC Article 19 (protection from abuse)",
|
|
293
|
+
"UN CRC Article 34 (sexual exploitation)",
|
|
294
|
+
],
|
|
295
|
+
"general": ["Article 2", "Article 7"],
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
299
|
+
# STARTUP REPORT
|
|
300
|
+
# ══════════════════════════════════════════════════════════════════════════════
|
|
301
|
+
|
|
302
|
+
print(
|
|
303
|
+
f"[SAGE startup] ✅ Policies loaded: {list(POLICY_DOCS.keys()) or 'none (add to rules/general/)'}",
|
|
304
|
+
file=sys.stderr,
|
|
305
|
+
)
|
|
306
|
+
print(f"[SAGE startup] ✅ LLM: {'available (' + LLM_MODEL + ')' if LLM_AVAILABLE else 'deterministic-only mode'}",
|
|
307
|
+
file=sys.stderr)
|
|
308
|
+
print(f"[SAGE startup] ✅ Fairlearn: {FAIRLEARN_AVAILABLE} | diffprivlib: {DIFFPRIVLIB_AVAILABLE}",
|
|
309
|
+
file=sys.stderr)
|
|
310
|
+
print(f"[SAGE startup] ✅ Session start: {datetime.now(timezone.utc).isoformat()}",
|
|
311
|
+
file=sys.stderr)
|