aibt-fl 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aibt/__init__.py +77 -0
- aibt/aggregation.py +68 -0
- aibt/client.py +259 -0
- aibt/core.py +287 -0
- aibt/metrics.py +383 -0
- aibt/models.py +520 -0
- aibt/py.typed +2 -0
- aibt/utils.py +162 -0
- aibt_fl-1.0.0.dist-info/METADATA +247 -0
- aibt_fl-1.0.0.dist-info/RECORD +13 -0
- aibt_fl-1.0.0.dist-info/WHEEL +5 -0
- aibt_fl-1.0.0.dist-info/licenses/LICENSE +21 -0
- aibt_fl-1.0.0.dist-info/top_level.txt +1 -0
aibt/metrics.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Privacy Attack Metrics for AIBT Framework
|
|
3
|
+
|
|
4
|
+
Implements Membership Inference and Attribute Inference attacks for
|
|
5
|
+
evaluating privacy protection of federated learning models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn as nn
|
|
11
|
+
import torch.nn.functional as F
|
|
12
|
+
from torch.utils.data import DataLoader, TensorDataset
|
|
13
|
+
from typing import Dict, Tuple, Optional, List
|
|
14
|
+
from sklearn.linear_model import LogisticRegression
|
|
15
|
+
from sklearn.model_selection import train_test_split
|
|
16
|
+
from sklearn.metrics import roc_auc_score, accuracy_score, precision_recall_fscore_support
|
|
17
|
+
import warnings
|
|
18
|
+
warnings.filterwarnings('ignore')
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ============================================================================
|
|
22
|
+
# MEMBERSHIP INFERENCE ATTACK
|
|
23
|
+
# ============================================================================
|
|
24
|
+
|
|
25
|
+
class MembershipInferenceAttack:
|
|
26
|
+
"""
|
|
27
|
+
Membership Inference Attack (MIA) for evaluating privacy leakage.
|
|
28
|
+
|
|
29
|
+
Reference: Shokri et al., "Membership Inference Attacks Against Machine Learning Models"
|
|
30
|
+
|
|
31
|
+
The attack attempts to determine whether a given sample was part of the
|
|
32
|
+
training data, using model confidence and prediction patterns.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, target_model: nn.Module, device: str = "cpu"):
|
|
36
|
+
self.target_model = target_model
|
|
37
|
+
self.device = device
|
|
38
|
+
self.attack_model = None
|
|
39
|
+
|
|
40
|
+
def get_prediction_features(
|
|
41
|
+
self,
|
|
42
|
+
model: nn.Module,
|
|
43
|
+
X: np.ndarray,
|
|
44
|
+
y: np.ndarray
|
|
45
|
+
) -> np.ndarray:
|
|
46
|
+
"""
|
|
47
|
+
Extract features from model predictions for attack model.
|
|
48
|
+
|
|
49
|
+
Features include:
|
|
50
|
+
- Prediction probabilities
|
|
51
|
+
- Prediction confidence (max prob)
|
|
52
|
+
- Prediction entropy
|
|
53
|
+
- Whether prediction is correct
|
|
54
|
+
- Loss value
|
|
55
|
+
"""
|
|
56
|
+
model.eval()
|
|
57
|
+
X_tensor = torch.FloatTensor(X).to(self.device)
|
|
58
|
+
y_tensor = torch.LongTensor(y).to(self.device)
|
|
59
|
+
|
|
60
|
+
with torch.no_grad():
|
|
61
|
+
output = model(X_tensor)
|
|
62
|
+
if isinstance(output, tuple):
|
|
63
|
+
logits = output[0]
|
|
64
|
+
else:
|
|
65
|
+
logits = output
|
|
66
|
+
|
|
67
|
+
probs = F.softmax(logits, dim=1)
|
|
68
|
+
|
|
69
|
+
# Features
|
|
70
|
+
confidence = probs.max(dim=1)[0].cpu().numpy()
|
|
71
|
+
correct = (probs.argmax(dim=1) == y_tensor).float().cpu().numpy()
|
|
72
|
+
entropy = -(probs * torch.log(probs + 1e-10)).sum(dim=1).cpu().numpy()
|
|
73
|
+
|
|
74
|
+
# Per-sample loss
|
|
75
|
+
loss_per_sample = F.cross_entropy(logits, y_tensor, reduction='none')
|
|
76
|
+
loss = loss_per_sample.cpu().numpy()
|
|
77
|
+
|
|
78
|
+
# Combine features
|
|
79
|
+
features = np.column_stack([confidence, correct, entropy, loss])
|
|
80
|
+
return features
|
|
81
|
+
|
|
82
|
+
def train_attack_model(
|
|
83
|
+
self,
|
|
84
|
+
train_data: Tuple[np.ndarray, np.ndarray],
|
|
85
|
+
test_data: Tuple[np.ndarray, np.ndarray],
|
|
86
|
+
shadow_model: Optional[nn.Module] = None
|
|
87
|
+
) -> float:
|
|
88
|
+
"""
|
|
89
|
+
Train the attack model using shadow model technique.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
train_data: (X, y) used for training target model (members)
|
|
93
|
+
test_data: (X, y) not used for training (non-members)
|
|
94
|
+
shadow_model: Optional shadow model (uses target if not provided)
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
AUC score of attack model
|
|
98
|
+
"""
|
|
99
|
+
model = shadow_model if shadow_model else self.target_model
|
|
100
|
+
|
|
101
|
+
# Get features for members and non-members
|
|
102
|
+
X_train, y_train = train_data
|
|
103
|
+
X_test, y_test = test_data
|
|
104
|
+
|
|
105
|
+
member_features = self.get_prediction_features(model, X_train, y_train)
|
|
106
|
+
non_member_features = self.get_prediction_features(model, X_test, y_test)
|
|
107
|
+
|
|
108
|
+
# Create attack dataset
|
|
109
|
+
X_attack = np.vstack([member_features, non_member_features])
|
|
110
|
+
y_attack = np.concatenate([
|
|
111
|
+
np.ones(len(member_features)),
|
|
112
|
+
np.zeros(len(non_member_features))
|
|
113
|
+
])
|
|
114
|
+
|
|
115
|
+
# Train attack model
|
|
116
|
+
X_attack_train, X_attack_test, y_attack_train, y_attack_test = train_test_split(
|
|
117
|
+
X_attack, y_attack, test_size=0.3, random_state=42, stratify=y_attack
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
self.attack_model = LogisticRegression(max_iter=1000, random_state=42)
|
|
121
|
+
self.attack_model.fit(X_attack_train, y_attack_train)
|
|
122
|
+
|
|
123
|
+
# Evaluate
|
|
124
|
+
y_pred_proba = self.attack_model.predict_proba(X_attack_test)[:, 1]
|
|
125
|
+
auc = roc_auc_score(y_attack_test, y_pred_proba)
|
|
126
|
+
|
|
127
|
+
return auc
|
|
128
|
+
|
|
129
|
+
def attack(
|
|
130
|
+
self,
|
|
131
|
+
X: np.ndarray,
|
|
132
|
+
y: np.ndarray
|
|
133
|
+
) -> Tuple[np.ndarray, float]:
|
|
134
|
+
"""
|
|
135
|
+
Perform membership inference attack.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
membership_probs: Probability of being a member
|
|
139
|
+
auc: Area under ROC curve
|
|
140
|
+
"""
|
|
141
|
+
if self.attack_model is None:
|
|
142
|
+
raise ValueError("Attack model not trained. Call train_attack_model first.")
|
|
143
|
+
|
|
144
|
+
features = self.get_prediction_features(self.target_model, X, y)
|
|
145
|
+
membership_probs = self.attack_model.predict_proba(features)[:, 1]
|
|
146
|
+
|
|
147
|
+
return membership_probs, 0.5 # AUC requires ground truth
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def evaluate_membership_inference(
|
|
151
|
+
model: nn.Module,
|
|
152
|
+
train_data: Tuple[np.ndarray, np.ndarray],
|
|
153
|
+
test_data: Tuple[np.ndarray, np.ndarray],
|
|
154
|
+
device: str = "cpu"
|
|
155
|
+
) -> Dict[str, float]:
|
|
156
|
+
"""
|
|
157
|
+
Complete membership inference evaluation.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Dictionary with AUC and other metrics
|
|
161
|
+
"""
|
|
162
|
+
attack = MembershipInferenceAttack(model, device)
|
|
163
|
+
auc = attack.train_attack_model(train_data, test_data)
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
"membership_auc": auc,
|
|
167
|
+
"privacy_score": 1.0 - abs(auc - 0.5) * 2 # Closer to 0.5 = better privacy
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# ============================================================================
|
|
172
|
+
# ATTRIBUTE INFERENCE ATTACK
|
|
173
|
+
# ============================================================================
|
|
174
|
+
|
|
175
|
+
class AttributeInferenceAttack:
|
|
176
|
+
"""
|
|
177
|
+
Attribute Inference Attack for evaluating sensitive attribute leakage.
|
|
178
|
+
|
|
179
|
+
Attempts to predict sensitive attributes from latent representations.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
def __init__(self, target_model: nn.Module, device: str = "cpu"):
|
|
183
|
+
self.target_model = target_model
|
|
184
|
+
self.device = device
|
|
185
|
+
self.attack_model = None
|
|
186
|
+
|
|
187
|
+
def get_latent_representations(
|
|
188
|
+
self,
|
|
189
|
+
X: np.ndarray
|
|
190
|
+
) -> np.ndarray:
|
|
191
|
+
"""Extract latent representations from the model"""
|
|
192
|
+
self.target_model.eval()
|
|
193
|
+
X_tensor = torch.FloatTensor(X).to(self.device)
|
|
194
|
+
|
|
195
|
+
with torch.no_grad():
|
|
196
|
+
if hasattr(self.target_model, 'get_latent'):
|
|
197
|
+
z = self.target_model.get_latent(X_tensor)
|
|
198
|
+
elif hasattr(self.target_model, 'encoder'):
|
|
199
|
+
encoder_out = self.target_model.encoder(X_tensor)
|
|
200
|
+
if isinstance(encoder_out, tuple):
|
|
201
|
+
z = encoder_out[0]
|
|
202
|
+
else:
|
|
203
|
+
z = encoder_out
|
|
204
|
+
else:
|
|
205
|
+
# Use output directly
|
|
206
|
+
output = self.target_model(X_tensor)
|
|
207
|
+
if isinstance(output, tuple):
|
|
208
|
+
z = output[0]
|
|
209
|
+
else:
|
|
210
|
+
z = output
|
|
211
|
+
|
|
212
|
+
return z.cpu().numpy()
|
|
213
|
+
|
|
214
|
+
def train_attack_model(
|
|
215
|
+
self,
|
|
216
|
+
X: np.ndarray,
|
|
217
|
+
sensitive_attrs: np.ndarray
|
|
218
|
+
) -> float:
|
|
219
|
+
"""
|
|
220
|
+
Train attack model to predict sensitive attributes from latent codes.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
X: Input features
|
|
224
|
+
sensitive_attrs: Sensitive attribute labels
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
AUC score of attribute inference
|
|
228
|
+
"""
|
|
229
|
+
# Get latent representations
|
|
230
|
+
latent = self.get_latent_representations(X)
|
|
231
|
+
|
|
232
|
+
# Train-test split
|
|
233
|
+
z_train, z_test, s_train, s_test = train_test_split(
|
|
234
|
+
latent, sensitive_attrs, test_size=0.3, random_state=42,
|
|
235
|
+
stratify=sensitive_attrs
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Train attack model
|
|
239
|
+
self.attack_model = LogisticRegression(max_iter=1000, random_state=42)
|
|
240
|
+
self.attack_model.fit(z_train, s_train)
|
|
241
|
+
|
|
242
|
+
# Evaluate
|
|
243
|
+
s_pred_proba = self.attack_model.predict_proba(z_test)
|
|
244
|
+
|
|
245
|
+
# Handle binary and multi-class
|
|
246
|
+
if len(np.unique(sensitive_attrs)) == 2:
|
|
247
|
+
auc = roc_auc_score(s_test, s_pred_proba[:, 1])
|
|
248
|
+
else:
|
|
249
|
+
auc = roc_auc_score(s_test, s_pred_proba, multi_class='ovr', average='weighted')
|
|
250
|
+
|
|
251
|
+
return auc
|
|
252
|
+
|
|
253
|
+
def attack(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
254
|
+
"""
|
|
255
|
+
Perform attribute inference attack.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
predictions: Predicted sensitive attributes
|
|
259
|
+
probabilities: Prediction probabilities
|
|
260
|
+
"""
|
|
261
|
+
if self.attack_model is None:
|
|
262
|
+
raise ValueError("Attack model not trained. Call train_attack_model first.")
|
|
263
|
+
|
|
264
|
+
latent = self.get_latent_representations(X)
|
|
265
|
+
predictions = self.attack_model.predict(latent)
|
|
266
|
+
probabilities = self.attack_model.predict_proba(latent)
|
|
267
|
+
|
|
268
|
+
return predictions, probabilities
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def evaluate_attribute_inference(
|
|
272
|
+
model: nn.Module,
|
|
273
|
+
X: np.ndarray,
|
|
274
|
+
sensitive_attrs: np.ndarray,
|
|
275
|
+
device: str = "cpu"
|
|
276
|
+
) -> Dict[str, float]:
|
|
277
|
+
"""
|
|
278
|
+
Complete attribute inference evaluation.
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
Dictionary with AUC and accuracy
|
|
282
|
+
"""
|
|
283
|
+
attack = AttributeInferenceAttack(model, device)
|
|
284
|
+
auc = attack.train_attack_model(X, sensitive_attrs)
|
|
285
|
+
|
|
286
|
+
predictions, _ = attack.attack(X)
|
|
287
|
+
accuracy = accuracy_score(sensitive_attrs, predictions)
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
"attribute_auc": auc,
|
|
291
|
+
"attribute_accuracy": accuracy,
|
|
292
|
+
"privacy_score": 1.0 - abs(auc - 0.5) * 2
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# ============================================================================
|
|
297
|
+
# COMPREHENSIVE PRIVACY EVALUATION
|
|
298
|
+
# ============================================================================
|
|
299
|
+
|
|
300
|
+
def evaluate_privacy(
|
|
301
|
+
model: nn.Module,
|
|
302
|
+
train_data: Tuple[np.ndarray, np.ndarray],
|
|
303
|
+
test_data: Tuple[np.ndarray, np.ndarray],
|
|
304
|
+
sensitive_attrs_train: Optional[np.ndarray] = None,
|
|
305
|
+
sensitive_attrs_test: Optional[np.ndarray] = None,
|
|
306
|
+
device: str = "cpu"
|
|
307
|
+
) -> Dict[str, float]:
|
|
308
|
+
"""
|
|
309
|
+
Comprehensive privacy evaluation.
|
|
310
|
+
|
|
311
|
+
Performs:
|
|
312
|
+
1. Membership Inference Attack
|
|
313
|
+
2. Attribute Inference Attack (if sensitive attrs provided)
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Dictionary with all privacy metrics
|
|
317
|
+
"""
|
|
318
|
+
results = {}
|
|
319
|
+
|
|
320
|
+
# Membership Inference Attack
|
|
321
|
+
mia_metrics = evaluate_membership_inference(model, train_data, test_data, device)
|
|
322
|
+
results.update({
|
|
323
|
+
"membership_auc": mia_metrics["membership_auc"],
|
|
324
|
+
"membership_privacy_score": mia_metrics["privacy_score"]
|
|
325
|
+
})
|
|
326
|
+
|
|
327
|
+
# Attribute Inference Attack
|
|
328
|
+
if sensitive_attrs_train is not None:
|
|
329
|
+
X_train, _ = train_data
|
|
330
|
+
aia_metrics = evaluate_attribute_inference(model, X_train, sensitive_attrs_train, device)
|
|
331
|
+
results.update({
|
|
332
|
+
"attribute_auc": aia_metrics["attribute_auc"],
|
|
333
|
+
"attribute_accuracy": aia_metrics["attribute_accuracy"],
|
|
334
|
+
"attribute_privacy_score": aia_metrics["privacy_score"]
|
|
335
|
+
})
|
|
336
|
+
|
|
337
|
+
# Overall privacy score
|
|
338
|
+
privacy_scores = [v for k, v in results.items() if 'privacy_score' in k]
|
|
339
|
+
results["overall_privacy_score"] = np.mean(privacy_scores) if privacy_scores else 0.0
|
|
340
|
+
|
|
341
|
+
return results
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# ============================================================================
|
|
345
|
+
# PERFORMANCE METRICS
|
|
346
|
+
# ============================================================================
|
|
347
|
+
|
|
348
|
+
def evaluate_performance(
|
|
349
|
+
model: nn.Module,
|
|
350
|
+
test_data: Tuple[np.ndarray, np.ndarray],
|
|
351
|
+
device: str = "cpu"
|
|
352
|
+
) -> Dict[str, float]:
|
|
353
|
+
"""
|
|
354
|
+
Evaluate model performance metrics.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Dictionary with accuracy, precision, recall, f1
|
|
358
|
+
"""
|
|
359
|
+
model.eval()
|
|
360
|
+
X, y = test_data
|
|
361
|
+
X_tensor = torch.FloatTensor(X).to(device)
|
|
362
|
+
y_tensor = torch.LongTensor(y).to(device)
|
|
363
|
+
|
|
364
|
+
with torch.no_grad():
|
|
365
|
+
output = model(X_tensor)
|
|
366
|
+
if isinstance(output, tuple):
|
|
367
|
+
logits = output[0]
|
|
368
|
+
else:
|
|
369
|
+
logits = output
|
|
370
|
+
|
|
371
|
+
predictions = logits.argmax(dim=1).cpu().numpy()
|
|
372
|
+
|
|
373
|
+
accuracy = accuracy_score(y, predictions)
|
|
374
|
+
precision, recall, f1, _ = precision_recall_fscore_support(
|
|
375
|
+
y, predictions, average='weighted', zero_division=0
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
return {
|
|
379
|
+
"accuracy": accuracy,
|
|
380
|
+
"precision": precision,
|
|
381
|
+
"recall": recall,
|
|
382
|
+
"f1_score": f1
|
|
383
|
+
}
|