agentic-team-templates 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +280 -0
- package/bin/cli.js +5 -0
- package/package.json +47 -0
- package/src/index.js +521 -0
- package/templates/_shared/code-quality.md +162 -0
- package/templates/_shared/communication.md +114 -0
- package/templates/_shared/core-principles.md +62 -0
- package/templates/_shared/git-workflow.md +165 -0
- package/templates/_shared/security-fundamentals.md +173 -0
- package/templates/blockchain/.cursorrules/defi-patterns.md +520 -0
- package/templates/blockchain/.cursorrules/gas-optimization.md +339 -0
- package/templates/blockchain/.cursorrules/overview.md +130 -0
- package/templates/blockchain/.cursorrules/security.md +318 -0
- package/templates/blockchain/.cursorrules/smart-contracts.md +364 -0
- package/templates/blockchain/.cursorrules/testing.md +415 -0
- package/templates/blockchain/.cursorrules/web3-integration.md +538 -0
- package/templates/blockchain/CLAUDE.md +389 -0
- package/templates/cli-tools/.cursorrules/architecture.md +412 -0
- package/templates/cli-tools/.cursorrules/arguments.md +406 -0
- package/templates/cli-tools/.cursorrules/distribution.md +546 -0
- package/templates/cli-tools/.cursorrules/error-handling.md +455 -0
- package/templates/cli-tools/.cursorrules/overview.md +136 -0
- package/templates/cli-tools/.cursorrules/testing.md +537 -0
- package/templates/cli-tools/.cursorrules/user-experience.md +545 -0
- package/templates/cli-tools/CLAUDE.md +356 -0
- package/templates/data-engineering/.cursorrules/data-modeling.md +367 -0
- package/templates/data-engineering/.cursorrules/data-quality.md +455 -0
- package/templates/data-engineering/.cursorrules/overview.md +85 -0
- package/templates/data-engineering/.cursorrules/performance.md +339 -0
- package/templates/data-engineering/.cursorrules/pipeline-design.md +280 -0
- package/templates/data-engineering/.cursorrules/security.md +460 -0
- package/templates/data-engineering/.cursorrules/testing.md +452 -0
- package/templates/data-engineering/CLAUDE.md +974 -0
- package/templates/devops-sre/.cursorrules/capacity-planning.md +653 -0
- package/templates/devops-sre/.cursorrules/change-management.md +584 -0
- package/templates/devops-sre/.cursorrules/chaos-engineering.md +651 -0
- package/templates/devops-sre/.cursorrules/disaster-recovery.md +641 -0
- package/templates/devops-sre/.cursorrules/incident-management.md +565 -0
- package/templates/devops-sre/.cursorrules/observability.md +714 -0
- package/templates/devops-sre/.cursorrules/overview.md +230 -0
- package/templates/devops-sre/.cursorrules/postmortems.md +588 -0
- package/templates/devops-sre/.cursorrules/runbooks.md +760 -0
- package/templates/devops-sre/.cursorrules/slo-sli.md +617 -0
- package/templates/devops-sre/.cursorrules/toil-reduction.md +567 -0
- package/templates/devops-sre/CLAUDE.md +1007 -0
- package/templates/documentation/.cursorrules/adr.md +277 -0
- package/templates/documentation/.cursorrules/api-documentation.md +411 -0
- package/templates/documentation/.cursorrules/code-comments.md +253 -0
- package/templates/documentation/.cursorrules/maintenance.md +260 -0
- package/templates/documentation/.cursorrules/overview.md +82 -0
- package/templates/documentation/.cursorrules/readme-standards.md +306 -0
- package/templates/documentation/CLAUDE.md +120 -0
- package/templates/fullstack/.cursorrules/api-contracts.md +331 -0
- package/templates/fullstack/.cursorrules/architecture.md +298 -0
- package/templates/fullstack/.cursorrules/overview.md +109 -0
- package/templates/fullstack/.cursorrules/shared-types.md +348 -0
- package/templates/fullstack/.cursorrules/testing.md +386 -0
- package/templates/fullstack/CLAUDE.md +349 -0
- package/templates/ml-ai/.cursorrules/data-engineering.md +483 -0
- package/templates/ml-ai/.cursorrules/deployment.md +601 -0
- package/templates/ml-ai/.cursorrules/model-development.md +538 -0
- package/templates/ml-ai/.cursorrules/monitoring.md +658 -0
- package/templates/ml-ai/.cursorrules/overview.md +131 -0
- package/templates/ml-ai/.cursorrules/security.md +637 -0
- package/templates/ml-ai/.cursorrules/testing.md +678 -0
- package/templates/ml-ai/CLAUDE.md +1136 -0
- package/templates/mobile/.cursorrules/navigation.md +246 -0
- package/templates/mobile/.cursorrules/offline-first.md +302 -0
- package/templates/mobile/.cursorrules/overview.md +71 -0
- package/templates/mobile/.cursorrules/performance.md +345 -0
- package/templates/mobile/.cursorrules/testing.md +339 -0
- package/templates/mobile/CLAUDE.md +233 -0
- package/templates/platform-engineering/.cursorrules/ci-cd.md +778 -0
- package/templates/platform-engineering/.cursorrules/developer-experience.md +632 -0
- package/templates/platform-engineering/.cursorrules/infrastructure-as-code.md +600 -0
- package/templates/platform-engineering/.cursorrules/kubernetes.md +710 -0
- package/templates/platform-engineering/.cursorrules/observability.md +747 -0
- package/templates/platform-engineering/.cursorrules/overview.md +215 -0
- package/templates/platform-engineering/.cursorrules/security.md +855 -0
- package/templates/platform-engineering/.cursorrules/testing.md +878 -0
- package/templates/platform-engineering/CLAUDE.md +850 -0
- package/templates/utility-agent/.cursorrules/action-control.md +284 -0
- package/templates/utility-agent/.cursorrules/context-management.md +186 -0
- package/templates/utility-agent/.cursorrules/hallucination-prevention.md +253 -0
- package/templates/utility-agent/.cursorrules/overview.md +78 -0
- package/templates/utility-agent/.cursorrules/token-optimization.md +369 -0
- package/templates/utility-agent/CLAUDE.md +513 -0
- package/templates/web-backend/.cursorrules/api-design.md +255 -0
- package/templates/web-backend/.cursorrules/authentication.md +309 -0
- package/templates/web-backend/.cursorrules/database-patterns.md +298 -0
- package/templates/web-backend/.cursorrules/error-handling.md +366 -0
- package/templates/web-backend/.cursorrules/overview.md +69 -0
- package/templates/web-backend/.cursorrules/security.md +358 -0
- package/templates/web-backend/.cursorrules/testing.md +395 -0
- package/templates/web-backend/CLAUDE.md +366 -0
- package/templates/web-frontend/.cursorrules/accessibility.md +296 -0
- package/templates/web-frontend/.cursorrules/component-patterns.md +204 -0
- package/templates/web-frontend/.cursorrules/overview.md +72 -0
- package/templates/web-frontend/.cursorrules/performance.md +325 -0
- package/templates/web-frontend/.cursorrules/state-management.md +227 -0
- package/templates/web-frontend/.cursorrules/styling.md +271 -0
- package/templates/web-frontend/.cursorrules/testing.md +311 -0
- package/templates/web-frontend/CLAUDE.md +399 -0
|
@@ -0,0 +1,637 @@
|
|
|
1
|
+
# ML Security & Responsible AI
|
|
2
|
+
|
|
3
|
+
Guidelines for securing machine learning systems, adversarial robustness, fairness assessment, and responsible AI practices.
|
|
4
|
+
|
|
5
|
+
## Security Threats
|
|
6
|
+
|
|
7
|
+
### ML-Specific Attack Vectors
|
|
8
|
+
|
|
9
|
+
| Attack Type | Description | Impact |
|
|
10
|
+
|-------------|-------------|--------|
|
|
11
|
+
| Data Poisoning | Corrupting training data | Model learns wrong patterns |
|
|
12
|
+
| Model Extraction | Querying to steal model | Intellectual property theft |
|
|
13
|
+
| Adversarial Examples | Crafted inputs to fool model | Incorrect predictions |
|
|
14
|
+
| Membership Inference | Detecting if data was in training set | Privacy breach |
|
|
15
|
+
| Model Inversion | Reconstructing training data | Data leakage |
|
|
16
|
+
| Backdoor Attacks | Hidden triggers in model | Targeted misclassification |
|
|
17
|
+
|
|
18
|
+
### NIST AI Risk Framework Categories
|
|
19
|
+
|
|
20
|
+
1. **Evasion Attacks** - Manipulating inputs to cause misclassification
|
|
21
|
+
2. **Poisoning Attacks** - Corrupting training data or process
|
|
22
|
+
3. **Privacy Attacks** - Extracting sensitive information
|
|
23
|
+
4. **Abuse Attacks** - Using model for unintended harmful purposes
|
|
24
|
+
|
|
25
|
+
## Input Validation
|
|
26
|
+
|
|
27
|
+
### Schema Validation
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from pydantic import BaseModel, Field, validator
|
|
31
|
+
from typing import Optional
|
|
32
|
+
import math
|
|
33
|
+
|
|
34
|
+
class PredictionRequest(BaseModel):
|
|
35
|
+
"""Validated and sanitized prediction request."""
|
|
36
|
+
|
|
37
|
+
features: dict[str, float] = Field(..., min_items=1)
|
|
38
|
+
request_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
39
|
+
|
|
40
|
+
@validator("features")
|
|
41
|
+
def validate_features(cls, v):
|
|
42
|
+
"""Validate feature completeness and types."""
|
|
43
|
+
required = {"feature_1", "feature_2", "feature_3"}
|
|
44
|
+
missing = required - set(v.keys())
|
|
45
|
+
if missing:
|
|
46
|
+
raise ValueError(f"Missing required features: {missing}")
|
|
47
|
+
|
|
48
|
+
for name, value in v.items():
|
|
49
|
+
if not isinstance(value, (int, float)):
|
|
50
|
+
raise ValueError(f"Feature {name} must be numeric")
|
|
51
|
+
if math.isnan(value) or math.isinf(value):
|
|
52
|
+
raise ValueError(f"Feature {name} contains invalid value (NaN/Inf)")
|
|
53
|
+
|
|
54
|
+
return v
|
|
55
|
+
|
|
56
|
+
@validator("features")
|
|
57
|
+
def validate_ranges(cls, v):
|
|
58
|
+
"""Validate feature values are within expected ranges."""
|
|
59
|
+
ranges = {
|
|
60
|
+
"feature_1": (0.0, 1.0),
|
|
61
|
+
"feature_2": (-100.0, 100.0),
|
|
62
|
+
"feature_3": (0.0, 10000.0),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
for name, (min_val, max_val) in ranges.items():
|
|
66
|
+
if name in v:
|
|
67
|
+
if not (min_val <= v[name] <= max_val):
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"Feature {name} value {v[name]} out of range [{min_val}, {max_val}]"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
return v
|
|
73
|
+
|
|
74
|
+
@validator("features")
|
|
75
|
+
def sanitize_features(cls, v):
|
|
76
|
+
"""Sanitize feature values."""
|
|
77
|
+
sanitized = {}
|
|
78
|
+
for name, value in v.items():
|
|
79
|
+
# Clip to reasonable ranges
|
|
80
|
+
sanitized[name] = float(np.clip(value, -1e6, 1e6))
|
|
81
|
+
return sanitized
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Rate Limiting
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from fastapi import FastAPI, Request, HTTPException
|
|
88
|
+
from slowapi import Limiter
|
|
89
|
+
from slowapi.util import get_remote_address
|
|
90
|
+
|
|
91
|
+
limiter = Limiter(key_func=get_remote_address)
|
|
92
|
+
app = FastAPI()
|
|
93
|
+
|
|
94
|
+
@app.post("/predict")
|
|
95
|
+
@limiter.limit("100/minute")
|
|
96
|
+
async def predict(request: Request, payload: PredictionRequest):
|
|
97
|
+
"""Rate-limited prediction endpoint."""
|
|
98
|
+
# ... prediction logic
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Anomaly Detection on Inputs
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from sklearn.ensemble import IsolationForest
|
|
105
|
+
|
|
106
|
+
class InputAnomalyDetector:
|
|
107
|
+
"""Detect anomalous inputs before prediction."""
|
|
108
|
+
|
|
109
|
+
def __init__(self, contamination: float = 0.01):
|
|
110
|
+
self.detector = IsolationForest(
|
|
111
|
+
contamination=contamination,
|
|
112
|
+
random_state=42,
|
|
113
|
+
)
|
|
114
|
+
self._fitted = False
|
|
115
|
+
|
|
116
|
+
def fit(self, reference_data: pd.DataFrame) -> None:
|
|
117
|
+
"""Fit on reference (training) data."""
|
|
118
|
+
self.detector.fit(reference_data)
|
|
119
|
+
self._fitted = True
|
|
120
|
+
|
|
121
|
+
def check(self, features: pd.DataFrame) -> dict:
|
|
122
|
+
"""Check if input is anomalous."""
|
|
123
|
+
if not self._fitted:
|
|
124
|
+
raise ValueError("Detector not fitted")
|
|
125
|
+
|
|
126
|
+
scores = self.detector.decision_function(features)
|
|
127
|
+
predictions = self.detector.predict(features)
|
|
128
|
+
|
|
129
|
+
is_anomaly = predictions[0] == -1
|
|
130
|
+
|
|
131
|
+
return {
|
|
132
|
+
"is_anomaly": is_anomaly,
|
|
133
|
+
"anomaly_score": float(scores[0]),
|
|
134
|
+
"action": "reject" if is_anomaly else "accept",
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Adversarial Robustness
|
|
139
|
+
|
|
140
|
+
### Adversarial Testing
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
import foolbox as fb
|
|
144
|
+
|
|
145
|
+
def test_adversarial_robustness(
|
|
146
|
+
model,
|
|
147
|
+
test_data: np.ndarray,
|
|
148
|
+
test_labels: np.ndarray,
|
|
149
|
+
epsilon: float = 0.1,
|
|
150
|
+
) -> dict:
|
|
151
|
+
"""Test model robustness against adversarial examples."""
|
|
152
|
+
|
|
153
|
+
# Wrap model for Foolbox
|
|
154
|
+
fmodel = fb.PyTorchModel(model, bounds=(0, 1))
|
|
155
|
+
|
|
156
|
+
# Test multiple attack types
|
|
157
|
+
attacks = [
|
|
158
|
+
fb.attacks.FGSM(),
|
|
159
|
+
fb.attacks.PGD(),
|
|
160
|
+
fb.attacks.DeepFoolAttack(),
|
|
161
|
+
]
|
|
162
|
+
|
|
163
|
+
results = {}
|
|
164
|
+
|
|
165
|
+
for attack in attacks:
|
|
166
|
+
attack_name = type(attack).__name__
|
|
167
|
+
|
|
168
|
+
# Generate adversarial examples
|
|
169
|
+
_, advs, success = attack(
|
|
170
|
+
fmodel,
|
|
171
|
+
test_data,
|
|
172
|
+
test_labels,
|
|
173
|
+
epsilons=[epsilon],
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Compute success rate
|
|
177
|
+
success_rate = success.float().mean().item()
|
|
178
|
+
|
|
179
|
+
# Compute perturbation magnitude
|
|
180
|
+
perturbations = advs[0] - test_data
|
|
181
|
+
avg_perturbation = np.abs(perturbations).mean()
|
|
182
|
+
|
|
183
|
+
results[attack_name] = {
|
|
184
|
+
"success_rate": success_rate,
|
|
185
|
+
"avg_perturbation": float(avg_perturbation),
|
|
186
|
+
"robust_accuracy": 1 - success_rate,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return results
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
### Adversarial Training
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
def adversarial_training_step(
|
|
196
|
+
model,
|
|
197
|
+
optimizer,
|
|
198
|
+
inputs: torch.Tensor,
|
|
199
|
+
labels: torch.Tensor,
|
|
200
|
+
epsilon: float = 0.1,
|
|
201
|
+
alpha: float = 0.5,
|
|
202
|
+
) -> float:
|
|
203
|
+
"""Training step with adversarial examples."""
|
|
204
|
+
|
|
205
|
+
model.train()
|
|
206
|
+
|
|
207
|
+
# Generate adversarial examples using FGSM
|
|
208
|
+
inputs.requires_grad = True
|
|
209
|
+
outputs = model(inputs)
|
|
210
|
+
loss = F.cross_entropy(outputs, labels)
|
|
211
|
+
loss.backward()
|
|
212
|
+
|
|
213
|
+
# Perturb inputs
|
|
214
|
+
perturbation = epsilon * inputs.grad.sign()
|
|
215
|
+
adv_inputs = inputs + perturbation
|
|
216
|
+
adv_inputs = torch.clamp(adv_inputs, 0, 1)
|
|
217
|
+
|
|
218
|
+
# Train on both clean and adversarial
|
|
219
|
+
optimizer.zero_grad()
|
|
220
|
+
|
|
221
|
+
clean_outputs = model(inputs.detach())
|
|
222
|
+
clean_loss = F.cross_entropy(clean_outputs, labels)
|
|
223
|
+
|
|
224
|
+
adv_outputs = model(adv_inputs.detach())
|
|
225
|
+
adv_loss = F.cross_entropy(adv_outputs, labels)
|
|
226
|
+
|
|
227
|
+
# Combined loss
|
|
228
|
+
total_loss = (1 - alpha) * clean_loss + alpha * adv_loss
|
|
229
|
+
total_loss.backward()
|
|
230
|
+
optimizer.step()
|
|
231
|
+
|
|
232
|
+
return total_loss.item()
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
## Fairness Assessment
|
|
236
|
+
|
|
237
|
+
### Fairness Metrics
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from aif360.datasets import BinaryLabelDataset
|
|
241
|
+
from aif360.metrics import ClassificationMetric
|
|
242
|
+
|
|
243
|
+
def assess_fairness(
|
|
244
|
+
data: pd.DataFrame,
|
|
245
|
+
predictions: np.ndarray,
|
|
246
|
+
protected_attribute: str,
|
|
247
|
+
label_column: str = "label",
|
|
248
|
+
) -> dict:
|
|
249
|
+
"""Comprehensive fairness assessment."""
|
|
250
|
+
|
|
251
|
+
# Define groups
|
|
252
|
+
privileged_groups = [{protected_attribute: 1}]
|
|
253
|
+
unprivileged_groups = [{protected_attribute: 0}]
|
|
254
|
+
|
|
255
|
+
# Create AIF360 datasets
|
|
256
|
+
dataset = BinaryLabelDataset(
|
|
257
|
+
df=data,
|
|
258
|
+
label_names=[label_column],
|
|
259
|
+
protected_attribute_names=[protected_attribute],
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
classified_dataset = dataset.copy()
|
|
263
|
+
classified_dataset.labels = predictions.reshape(-1, 1)
|
|
264
|
+
|
|
265
|
+
# Compute metrics
|
|
266
|
+
metric = ClassificationMetric(
|
|
267
|
+
dataset,
|
|
268
|
+
classified_dataset,
|
|
269
|
+
unprivileged_groups=unprivileged_groups,
|
|
270
|
+
privileged_groups=privileged_groups,
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
return {
|
|
274
|
+
# Group fairness metrics
|
|
275
|
+
"statistical_parity_difference": metric.statistical_parity_difference(),
|
|
276
|
+
"disparate_impact": metric.disparate_impact(),
|
|
277
|
+
|
|
278
|
+
# Equality of opportunity
|
|
279
|
+
"equal_opportunity_difference": metric.equal_opportunity_difference(),
|
|
280
|
+
"average_odds_difference": metric.average_odds_difference(),
|
|
281
|
+
|
|
282
|
+
# Calibration
|
|
283
|
+
"calibration_difference": compute_calibration_difference(
|
|
284
|
+
data, predictions, protected_attribute
|
|
285
|
+
),
|
|
286
|
+
|
|
287
|
+
# Individual fairness (computed separately)
|
|
288
|
+
"consistency": compute_consistency(data, predictions),
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
def check_fairness_thresholds(metrics: dict) -> list[str]:
|
|
292
|
+
"""Check if fairness metrics meet acceptable thresholds."""
|
|
293
|
+
|
|
294
|
+
thresholds = {
|
|
295
|
+
"statistical_parity_difference": (-0.1, 0.1),
|
|
296
|
+
"disparate_impact": (0.8, 1.25), # 80% rule
|
|
297
|
+
"equal_opportunity_difference": (-0.1, 0.1),
|
|
298
|
+
"average_odds_difference": (-0.1, 0.1),
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
violations = []
|
|
302
|
+
|
|
303
|
+
for metric, (lower, upper) in thresholds.items():
|
|
304
|
+
if metric not in metrics:
|
|
305
|
+
continue
|
|
306
|
+
|
|
307
|
+
value = metrics[metric]
|
|
308
|
+
|
|
309
|
+
if not (lower <= value <= upper):
|
|
310
|
+
violations.append({
|
|
311
|
+
"metric": metric,
|
|
312
|
+
"value": value,
|
|
313
|
+
"threshold": f"[{lower}, {upper}]",
|
|
314
|
+
"severity": "high" if abs(value) > 0.2 else "medium",
|
|
315
|
+
})
|
|
316
|
+
|
|
317
|
+
return violations
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
### Bias Mitigation
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
from aif360.algorithms.preprocessing import Reweighing
|
|
324
|
+
from aif360.algorithms.inprocessing import AdversarialDebiasing
|
|
325
|
+
|
|
326
|
+
def mitigate_bias_preprocessing(
|
|
327
|
+
train_data: pd.DataFrame,
|
|
328
|
+
protected_attribute: str,
|
|
329
|
+
) -> tuple[pd.DataFrame, np.ndarray]:
|
|
330
|
+
"""Apply preprocessing bias mitigation."""
|
|
331
|
+
|
|
332
|
+
dataset = BinaryLabelDataset(
|
|
333
|
+
df=train_data,
|
|
334
|
+
label_names=["label"],
|
|
335
|
+
protected_attribute_names=[protected_attribute],
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Reweighing
|
|
339
|
+
reweigher = Reweighing(
|
|
340
|
+
unprivileged_groups=[{protected_attribute: 0}],
|
|
341
|
+
privileged_groups=[{protected_attribute: 1}],
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
reweighed_dataset = reweigher.fit_transform(dataset)
|
|
345
|
+
|
|
346
|
+
return reweighed_dataset.convert_to_dataframe()[0], reweighed_dataset.instance_weights
|
|
347
|
+
|
|
348
|
+
def mitigate_bias_postprocessing(
|
|
349
|
+
predictions: np.ndarray,
|
|
350
|
+
probabilities: np.ndarray,
|
|
351
|
+
data: pd.DataFrame,
|
|
352
|
+
protected_attribute: str,
|
|
353
|
+
target_metric: str = "statistical_parity",
|
|
354
|
+
) -> np.ndarray:
|
|
355
|
+
"""Apply threshold adjustment for fairness."""
|
|
356
|
+
|
|
357
|
+
# Find optimal thresholds per group
|
|
358
|
+
thresholds = {}
|
|
359
|
+
|
|
360
|
+
for group in data[protected_attribute].unique():
|
|
361
|
+
mask = data[protected_attribute] == group
|
|
362
|
+
group_probs = probabilities[mask]
|
|
363
|
+
|
|
364
|
+
# Binary search for threshold that achieves target rate
|
|
365
|
+
thresholds[group] = find_fair_threshold(
|
|
366
|
+
group_probs,
|
|
367
|
+
target_rate=0.5, # Example: equal positive rate
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Apply group-specific thresholds
|
|
371
|
+
adjusted_predictions = np.zeros_like(predictions)
|
|
372
|
+
|
|
373
|
+
for group, threshold in thresholds.items():
|
|
374
|
+
mask = data[protected_attribute] == group
|
|
375
|
+
adjusted_predictions[mask] = (probabilities[mask] >= threshold).astype(int)
|
|
376
|
+
|
|
377
|
+
return adjusted_predictions
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Model Explainability
|
|
381
|
+
|
|
382
|
+
### SHAP Explanations
|
|
383
|
+
|
|
384
|
+
```python
|
|
385
|
+
import shap
|
|
386
|
+
|
|
387
|
+
def explain_prediction(
|
|
388
|
+
model,
|
|
389
|
+
instance: pd.DataFrame,
|
|
390
|
+
background_data: pd.DataFrame,
|
|
391
|
+
max_display: int = 10,
|
|
392
|
+
) -> dict:
|
|
393
|
+
"""Generate SHAP explanation for a single prediction."""
|
|
394
|
+
|
|
395
|
+
# Create explainer
|
|
396
|
+
explainer = shap.TreeExplainer(model)
|
|
397
|
+
|
|
398
|
+
# Compute SHAP values
|
|
399
|
+
shap_values = explainer.shap_values(instance)
|
|
400
|
+
|
|
401
|
+
# Build explanation
|
|
402
|
+
feature_contributions = pd.DataFrame({
|
|
403
|
+
"feature": instance.columns,
|
|
404
|
+
"value": instance.values[0],
|
|
405
|
+
"shap_value": shap_values[0] if len(shap_values.shape) == 2 else shap_values[1][0],
|
|
406
|
+
})
|
|
407
|
+
|
|
408
|
+
feature_contributions["abs_contribution"] = feature_contributions["shap_value"].abs()
|
|
409
|
+
feature_contributions = feature_contributions.sort_values(
|
|
410
|
+
"abs_contribution", ascending=False
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
return {
|
|
414
|
+
"base_value": float(explainer.expected_value),
|
|
415
|
+
"prediction": float(model.predict(instance)[0]),
|
|
416
|
+
"top_features": feature_contributions.head(max_display).to_dict(orient="records"),
|
|
417
|
+
"feature_contributions": feature_contributions.to_dict(orient="records"),
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
def generate_global_explanations(
|
|
421
|
+
model,
|
|
422
|
+
data: pd.DataFrame,
|
|
423
|
+
sample_size: int = 1000,
|
|
424
|
+
) -> dict:
|
|
425
|
+
"""Generate global model explanations."""
|
|
426
|
+
|
|
427
|
+
# Sample data for efficiency
|
|
428
|
+
sample = data.sample(min(sample_size, len(data)), random_state=42)
|
|
429
|
+
|
|
430
|
+
explainer = shap.TreeExplainer(model)
|
|
431
|
+
shap_values = explainer.shap_values(sample)
|
|
432
|
+
|
|
433
|
+
# Feature importance
|
|
434
|
+
feature_importance = pd.DataFrame({
|
|
435
|
+
"feature": sample.columns,
|
|
436
|
+
"importance": np.abs(shap_values).mean(axis=0),
|
|
437
|
+
}).sort_values("importance", ascending=False)
|
|
438
|
+
|
|
439
|
+
return {
|
|
440
|
+
"feature_importance": feature_importance.to_dict(orient="records"),
|
|
441
|
+
"shap_values": shap_values,
|
|
442
|
+
"base_value": float(explainer.expected_value),
|
|
443
|
+
}
|
|
444
|
+
```
|
|
445
|
+
|
|
446
|
+
### Decision Audit Trail
|
|
447
|
+
|
|
448
|
+
```python
|
|
449
|
+
@dataclass
|
|
450
|
+
class PredictionAuditLog:
|
|
451
|
+
"""Complete audit trail for a prediction."""
|
|
452
|
+
|
|
453
|
+
request_id: str
|
|
454
|
+
timestamp: datetime
|
|
455
|
+
model_name: str
|
|
456
|
+
model_version: str
|
|
457
|
+
|
|
458
|
+
# Input
|
|
459
|
+
raw_input: dict
|
|
460
|
+
validated_input: dict
|
|
461
|
+
|
|
462
|
+
# Processing
|
|
463
|
+
feature_values: dict
|
|
464
|
+
preprocessing_steps: list[str]
|
|
465
|
+
|
|
466
|
+
# Output
|
|
467
|
+
prediction: int
|
|
468
|
+
probability: float
|
|
469
|
+
confidence: float
|
|
470
|
+
|
|
471
|
+
# Explanation
|
|
472
|
+
top_contributing_features: list[dict]
|
|
473
|
+
shap_base_value: float
|
|
474
|
+
|
|
475
|
+
# Metadata
|
|
476
|
+
latency_ms: float
|
|
477
|
+
anomaly_score: Optional[float] = None
|
|
478
|
+
|
|
479
|
+
def to_dict(self) -> dict:
|
|
480
|
+
return asdict(self)
|
|
481
|
+
|
|
482
|
+
class AuditLogger:
|
|
483
|
+
"""Log predictions for compliance and debugging."""
|
|
484
|
+
|
|
485
|
+
def __init__(self, storage_path: str):
|
|
486
|
+
self.storage_path = storage_path
|
|
487
|
+
|
|
488
|
+
def log(self, audit: PredictionAuditLog) -> None:
|
|
489
|
+
"""Write audit log."""
|
|
490
|
+
|
|
491
|
+
# Partition by date for efficient querying
|
|
492
|
+
date = audit.timestamp.strftime("%Y-%m-%d")
|
|
493
|
+
path = f"{self.storage_path}/date={date}/audit.jsonl"
|
|
494
|
+
|
|
495
|
+
with open(path, "a") as f:
|
|
496
|
+
f.write(json.dumps(audit.to_dict()) + "\n")
|
|
497
|
+
|
|
498
|
+
def query(
|
|
499
|
+
self,
|
|
500
|
+
start_date: str,
|
|
501
|
+
end_date: str,
|
|
502
|
+
filters: dict = None,
|
|
503
|
+
) -> list[dict]:
|
|
504
|
+
"""Query audit logs for analysis."""
|
|
505
|
+
|
|
506
|
+
logs = []
|
|
507
|
+
|
|
508
|
+
for date in pd.date_range(start_date, end_date):
|
|
509
|
+
path = f"{self.storage_path}/date={date.strftime('%Y-%m-%d')}/audit.jsonl"
|
|
510
|
+
|
|
511
|
+
if not os.path.exists(path):
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
with open(path) as f:
|
|
515
|
+
for line in f:
|
|
516
|
+
log = json.loads(line)
|
|
517
|
+
|
|
518
|
+
if filters and not self._matches_filters(log, filters):
|
|
519
|
+
continue
|
|
520
|
+
|
|
521
|
+
logs.append(log)
|
|
522
|
+
|
|
523
|
+
return logs
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
## Privacy Protection
|
|
527
|
+
|
|
528
|
+
### Differential Privacy
|
|
529
|
+
|
|
530
|
+
```python
|
|
531
|
+
from opacus import PrivacyEngine
|
|
532
|
+
|
|
533
|
+
def train_with_differential_privacy(
|
|
534
|
+
model: nn.Module,
|
|
535
|
+
train_loader: DataLoader,
|
|
536
|
+
epochs: int,
|
|
537
|
+
target_epsilon: float = 1.0,
|
|
538
|
+
target_delta: float = 1e-5,
|
|
539
|
+
max_grad_norm: float = 1.0,
|
|
540
|
+
) -> tuple[nn.Module, float]:
|
|
541
|
+
"""Train model with differential privacy guarantees."""
|
|
542
|
+
|
|
543
|
+
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
|
|
544
|
+
|
|
545
|
+
# Attach privacy engine
|
|
546
|
+
privacy_engine = PrivacyEngine()
|
|
547
|
+
|
|
548
|
+
model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
|
|
549
|
+
module=model,
|
|
550
|
+
optimizer=optimizer,
|
|
551
|
+
data_loader=train_loader,
|
|
552
|
+
epochs=epochs,
|
|
553
|
+
target_epsilon=target_epsilon,
|
|
554
|
+
target_delta=target_delta,
|
|
555
|
+
max_grad_norm=max_grad_norm,
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
# Training loop
|
|
559
|
+
for epoch in range(epochs):
|
|
560
|
+
for batch in train_loader:
|
|
561
|
+
optimizer.zero_grad()
|
|
562
|
+
loss = compute_loss(model, batch)
|
|
563
|
+
loss.backward()
|
|
564
|
+
optimizer.step()
|
|
565
|
+
|
|
566
|
+
# Get actual epsilon spent
|
|
567
|
+
epsilon = privacy_engine.get_epsilon(target_delta)
|
|
568
|
+
|
|
569
|
+
return model, epsilon
|
|
570
|
+
```
|
|
571
|
+
|
|
572
|
+
### Data Anonymization
|
|
573
|
+
|
|
574
|
+
```python
|
|
575
|
+
def anonymize_training_data(
|
|
576
|
+
data: pd.DataFrame,
|
|
577
|
+
quasi_identifiers: list[str],
|
|
578
|
+
sensitive_columns: list[str],
|
|
579
|
+
k: int = 5,
|
|
580
|
+
) -> pd.DataFrame:
|
|
581
|
+
"""Apply k-anonymity to training data."""
|
|
582
|
+
|
|
583
|
+
anonymized = data.copy()
|
|
584
|
+
|
|
585
|
+
# Generalize quasi-identifiers
|
|
586
|
+
for col in quasi_identifiers:
|
|
587
|
+
if anonymized[col].dtype in ["int64", "float64"]:
|
|
588
|
+
# Bin numeric values
|
|
589
|
+
anonymized[col] = pd.cut(anonymized[col], bins=10, labels=False)
|
|
590
|
+
else:
|
|
591
|
+
# Generalize categorical (keep top N, replace rest with "Other")
|
|
592
|
+
top_values = anonymized[col].value_counts().head(10).index
|
|
593
|
+
anonymized[col] = anonymized[col].apply(
|
|
594
|
+
lambda x: x if x in top_values else "Other"
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
# Remove sensitive columns from training
|
|
598
|
+
anonymized = anonymized.drop(columns=sensitive_columns, errors="ignore")
|
|
599
|
+
|
|
600
|
+
# Verify k-anonymity
|
|
601
|
+
group_sizes = anonymized.groupby(quasi_identifiers).size()
|
|
602
|
+
if group_sizes.min() < k:
|
|
603
|
+
raise ValueError(f"k-anonymity not achieved: min group size = {group_sizes.min()}")
|
|
604
|
+
|
|
605
|
+
return anonymized
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
## Security Checklist
|
|
609
|
+
|
|
610
|
+
### Before Deployment
|
|
611
|
+
|
|
612
|
+
- [ ] Input validation implemented
|
|
613
|
+
- [ ] Rate limiting configured
|
|
614
|
+
- [ ] Adversarial robustness tested
|
|
615
|
+
- [ ] Fairness assessed across protected groups
|
|
616
|
+
- [ ] Model explainability available
|
|
617
|
+
- [ ] Audit logging enabled
|
|
618
|
+
- [ ] Access controls in place
|
|
619
|
+
- [ ] Secrets not in code or logs
|
|
620
|
+
|
|
621
|
+
### Ongoing Monitoring
|
|
622
|
+
|
|
623
|
+
- [ ] Monitor for unusual query patterns (model extraction)
|
|
624
|
+
- [ ] Track fairness metrics over time
|
|
625
|
+
- [ ] Alert on prediction distribution shifts
|
|
626
|
+
- [ ] Regular adversarial testing
|
|
627
|
+
- [ ] Periodic fairness audits
|
|
628
|
+
- [ ] Review access logs
|
|
629
|
+
|
|
630
|
+
### Incident Response
|
|
631
|
+
|
|
632
|
+
| Incident | Immediate Action | Follow-up |
|
|
633
|
+
|----------|------------------|-----------|
|
|
634
|
+
| Data breach | Rotate credentials, notify stakeholders | Audit access, review controls |
|
|
635
|
+
| Adversarial attack | Rate limit, add input filters | Adversarial training |
|
|
636
|
+
| Fairness violation | Review decisions, consider rollback | Bias mitigation |
|
|
637
|
+
| Model extraction | Rate limit, monitor queries | Add watermarking |
|