arena-score 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arena_score/__init__.py +46 -0
- arena_score/aggregator.py +163 -0
- arena_score/client.py +221 -0
- arena_score/score.py +153 -0
- arena_score/server.py +268 -0
- arena_score/utils.py +207 -0
- arena_score-1.0.0.dist-info/METADATA +210 -0
- arena_score-1.0.0.dist-info/RECORD +10 -0
- arena_score-1.0.0.dist-info/WHEEL +4 -0
- arena_score-1.0.0.dist-info/licenses/LICENSE +21 -0
arena_score/__init__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARENA Score - Adaptive Review and Evaluation using Novel Aggregation Score
|
|
3
|
+
|
|
4
|
+
A novel client evaluation and weighted aggregation algorithm for Federated Learning.
|
|
5
|
+
|
|
6
|
+
Key Features:
|
|
7
|
+
- Adaptive client evaluation using ΔAcc, CS, and SN metrics
|
|
8
|
+
- Robust aggregation with anomaly detection
|
|
9
|
+
- Gradient recycling for missing clients
|
|
10
|
+
- Model agnostic design
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from arena_score.score import compute_arena_score, compute_adaptive_weights
|
|
14
|
+
from arena_score.client import ARENAScoreClient
|
|
15
|
+
from arena_score.server import ARENAScoreServer
|
|
16
|
+
from arena_score.aggregator import run_arena_score
|
|
17
|
+
from arena_score.utils import (
|
|
18
|
+
compute_cosine_similarity,
|
|
19
|
+
compute_spectral_norm,
|
|
20
|
+
compute_kl_divergence,
|
|
21
|
+
flatten_weights,
|
|
22
|
+
unflatten_weights
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__version__ = "1.0.0"
|
|
26
|
+
__author__ = "Ronit Mehta"
|
|
27
|
+
__email__ = "ronit26mehta@gmail.com"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
# Core classes
|
|
31
|
+
"ARENAScoreClient",
|
|
32
|
+
"ARENAScoreServer",
|
|
33
|
+
# Main function
|
|
34
|
+
"run_arena_score",
|
|
35
|
+
# Score computation
|
|
36
|
+
"compute_arena_score",
|
|
37
|
+
"compute_adaptive_weights",
|
|
38
|
+
# Utilities
|
|
39
|
+
"compute_cosine_similarity",
|
|
40
|
+
"compute_spectral_norm",
|
|
41
|
+
"compute_kl_divergence",
|
|
42
|
+
"flatten_weights",
|
|
43
|
+
"unflatten_weights",
|
|
44
|
+
# Version
|
|
45
|
+
"__version__",
|
|
46
|
+
]
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARENA Score Aggregator
|
|
3
|
+
|
|
4
|
+
Main federated learning loop using ARENA Score aggregation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from typing import Dict, List, Tuple, Optional, Any, Protocol
|
|
9
|
+
|
|
10
|
+
from arena_score.client import ARENAScoreClient
|
|
11
|
+
from arena_score.server import ARENAScoreServer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ModelProtocol(Protocol):
|
|
15
|
+
"""Protocol defining the required model interface."""
|
|
16
|
+
|
|
17
|
+
def get_weights(self) -> Dict[str, np.ndarray]: ...
|
|
18
|
+
def set_weights(self, weights: Dict[str, np.ndarray]) -> None: ...
|
|
19
|
+
def predict(self, X: np.ndarray) -> np.ndarray: ...
|
|
20
|
+
def predict_proba(self, X: np.ndarray) -> np.ndarray: ...
|
|
21
|
+
def copy(self) -> 'ModelProtocol': ...
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def run_arena_score(
|
|
25
|
+
global_model: ModelProtocol,
|
|
26
|
+
clients: List[ARENAScoreClient],
|
|
27
|
+
n_rounds: int = 10,
|
|
28
|
+
eval_data: Optional[Tuple[np.ndarray, np.ndarray]] = None,
|
|
29
|
+
server_config: Optional[Dict[str, Any]] = None,
|
|
30
|
+
verbose: bool = True
|
|
31
|
+
) -> Dict[str, Any]:
|
|
32
|
+
"""
|
|
33
|
+
Run ARENA Score federated learning algorithm.
|
|
34
|
+
|
|
35
|
+
This is the main entry point for running federated learning with
|
|
36
|
+
ARENA Score aggregation.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
global_model: Global model to train (must implement ModelProtocol)
|
|
40
|
+
clients: List of ARENAScoreClient instances
|
|
41
|
+
n_rounds: Number of communication rounds (default: 10)
|
|
42
|
+
eval_data: Optional (X_test, y_test) tuple for evaluation
|
|
43
|
+
server_config: Optional server configuration dictionary with keys:
|
|
44
|
+
- alpha_0, alpha_min: Accuracy weight parameters
|
|
45
|
+
- gamma_0, gamma_min: Cosine similarity weight parameters
|
|
46
|
+
- eta: Spectral norm coefficient
|
|
47
|
+
- lambda_decay: Decay rate
|
|
48
|
+
- s_min: Minimum score threshold
|
|
49
|
+
- anomaly_threshold: CS anomaly threshold
|
|
50
|
+
- kl_threshold: KL divergence threshold
|
|
51
|
+
- enable_gradient_recycling: Enable gradient recycling
|
|
52
|
+
verbose: Print progress (default: True)
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Training history dictionary with keys:
|
|
56
|
+
- round: List of round numbers
|
|
57
|
+
- train_loss: Average training loss per round
|
|
58
|
+
- train_accuracy: Average training accuracy per round
|
|
59
|
+
- test_accuracy: Test accuracy per round (if eval_data provided)
|
|
60
|
+
- test_loss: Test loss per round (if eval_data provided)
|
|
61
|
+
- arena_scores: ARENA scores per round
|
|
62
|
+
- alpha_t, gamma_t: Adaptive weights per round
|
|
63
|
+
- mean_delta_acc, mean_cosine_sim, mean_spectral_norm: Mean metrics
|
|
64
|
+
- valid_clients_ratio: Ratio of non-anomalous clients
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> from arena_score import ARENAScoreClient, run_arena_score
|
|
68
|
+
>>>
|
|
69
|
+
>>> # Create clients
|
|
70
|
+
>>> clients = [
|
|
71
|
+
... ARENAScoreClient(i, model.copy(), X[i], y[i])
|
|
72
|
+
... for i in range(n_clients)
|
|
73
|
+
... ]
|
|
74
|
+
>>>
|
|
75
|
+
>>> # Run ARENA Score FL
|
|
76
|
+
>>> history = run_arena_score(
|
|
77
|
+
... global_model=model,
|
|
78
|
+
... clients=clients,
|
|
79
|
+
... n_rounds=10,
|
|
80
|
+
... eval_data=(X_test, y_test)
|
|
81
|
+
... )
|
|
82
|
+
>>>
|
|
83
|
+
>>> print(f"Final accuracy: {history['test_accuracy'][-1]:.4f}")
|
|
84
|
+
"""
|
|
85
|
+
# Initialize server
|
|
86
|
+
config = server_config or {}
|
|
87
|
+
server = ARENAScoreServer(global_model, **config)
|
|
88
|
+
|
|
89
|
+
# Initialize history
|
|
90
|
+
history: Dict[str, List[Any]] = {
|
|
91
|
+
'round': [],
|
|
92
|
+
'train_loss': [],
|
|
93
|
+
'train_accuracy': [],
|
|
94
|
+
'test_accuracy': [],
|
|
95
|
+
'test_loss': [],
|
|
96
|
+
'arena_scores': [],
|
|
97
|
+
'alpha_t': [],
|
|
98
|
+
'gamma_t': [],
|
|
99
|
+
'mean_delta_acc': [],
|
|
100
|
+
'mean_cosine_sim': [],
|
|
101
|
+
'mean_spectral_norm': [],
|
|
102
|
+
'valid_clients_ratio': []
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
for round_num in range(n_rounds):
|
|
106
|
+
global_weights = server.get_global_weights()
|
|
107
|
+
|
|
108
|
+
# Collect client updates
|
|
109
|
+
client_weights: List[Dict[str, np.ndarray]] = []
|
|
110
|
+
client_metrics: List[Dict[str, Any]] = []
|
|
111
|
+
client_samples: List[int] = []
|
|
112
|
+
client_ids: List[int] = []
|
|
113
|
+
|
|
114
|
+
for client in clients:
|
|
115
|
+
weights, metrics, n_samples = client.local_train(global_weights)
|
|
116
|
+
client_weights.append(weights)
|
|
117
|
+
client_metrics.append(metrics)
|
|
118
|
+
client_samples.append(n_samples)
|
|
119
|
+
client_ids.append(client.client_id)
|
|
120
|
+
|
|
121
|
+
# ARENA Score aggregation
|
|
122
|
+
_, agg_info = server.aggregate(
|
|
123
|
+
client_weights, client_metrics, client_samples, client_ids
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Record metrics
|
|
127
|
+
avg_loss = float(np.mean([m['avg_loss'] for m in client_metrics]))
|
|
128
|
+
avg_acc = float(np.mean([m['accuracy'] for m in client_metrics]))
|
|
129
|
+
|
|
130
|
+
history['round'].append(round_num + 1)
|
|
131
|
+
history['train_loss'].append(avg_loss)
|
|
132
|
+
history['train_accuracy'].append(avg_acc)
|
|
133
|
+
history['arena_scores'].append(agg_info['scores'])
|
|
134
|
+
history['alpha_t'].append(agg_info['alpha_t'])
|
|
135
|
+
history['gamma_t'].append(agg_info['gamma_t'])
|
|
136
|
+
history['mean_delta_acc'].append(float(np.mean([m['delta_acc'] for m in client_metrics])))
|
|
137
|
+
history['mean_cosine_sim'].append(float(np.mean([m['cosine_sim'] for m in client_metrics])))
|
|
138
|
+
history['mean_spectral_norm'].append(float(np.mean([m['spectral_norm'] for m in client_metrics])))
|
|
139
|
+
history['valid_clients_ratio'].append(agg_info['valid_clients'] / agg_info['total_clients'])
|
|
140
|
+
|
|
141
|
+
# Evaluate on test data
|
|
142
|
+
if eval_data is not None:
|
|
143
|
+
X_test, y_test = eval_data
|
|
144
|
+
test_pred = global_model.predict(X_test)
|
|
145
|
+
test_proba = global_model.predict_proba(X_test)
|
|
146
|
+
test_acc = float(np.mean(test_pred == y_test))
|
|
147
|
+
test_loss = float(-np.mean(
|
|
148
|
+
y_test * np.log(np.clip(test_proba, 1e-7, 1-1e-7)) +
|
|
149
|
+
(1 - y_test) * np.log(np.clip(1 - test_proba, 1e-7, 1-1e-7))
|
|
150
|
+
))
|
|
151
|
+
history['test_accuracy'].append(test_acc)
|
|
152
|
+
history['test_loss'].append(test_loss)
|
|
153
|
+
|
|
154
|
+
if verbose:
|
|
155
|
+
print(f"Round {round_num + 1}/{n_rounds}: "
|
|
156
|
+
f"Train Acc={avg_acc:.4f}, Test Acc={test_acc:.4f}, "
|
|
157
|
+
f"Valid Clients={agg_info['valid_clients']}/{agg_info['total_clients']}")
|
|
158
|
+
elif verbose:
|
|
159
|
+
print(f"Round {round_num + 1}/{n_rounds}: "
|
|
160
|
+
f"Train Acc={avg_acc:.4f}, "
|
|
161
|
+
f"Valid Clients={agg_info['valid_clients']}/{agg_info['total_clients']}")
|
|
162
|
+
|
|
163
|
+
return history
|
arena_score/client.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARENA Score Client
|
|
3
|
+
|
|
4
|
+
Client implementation for ARENA Score federated learning.
|
|
5
|
+
Performs local training and computes ARENA metrics for server evaluation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from typing import Dict, Tuple, Any, Optional, Protocol
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ModelProtocol(Protocol):
|
|
13
|
+
"""Protocol defining the required model interface."""
|
|
14
|
+
|
|
15
|
+
def get_weights(self) -> Dict[str, np.ndarray]: ...
|
|
16
|
+
def set_weights(self, weights: Dict[str, np.ndarray]) -> None: ...
|
|
17
|
+
def forward(self, X: np.ndarray) -> np.ndarray: ...
|
|
18
|
+
def backward(self, X: np.ndarray, y: np.ndarray, y_pred: np.ndarray) -> Dict[str, np.ndarray]: ...
|
|
19
|
+
def update_weights(self, gradients: Dict[str, np.ndarray]) -> None: ...
|
|
20
|
+
def predict(self, X: np.ndarray) -> np.ndarray: ...
|
|
21
|
+
def predict_proba(self, X: np.ndarray) -> np.ndarray: ...
|
|
22
|
+
def copy(self) -> 'ModelProtocol': ...
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ARENAScoreClient:
|
|
26
|
+
"""
|
|
27
|
+
ARENA Score Client with adaptive evaluation metrics.
|
|
28
|
+
|
|
29
|
+
This client performs local training and computes the metrics needed
|
|
30
|
+
for ARENA Score evaluation:
|
|
31
|
+
- ΔAcc (delta accuracy): Accuracy improvement after local training
|
|
32
|
+
- CS (cosine similarity): Alignment with global update direction
|
|
33
|
+
- SN (spectral norm): Weight matrix regularization
|
|
34
|
+
- KL divergence: Distribution shift detection
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
client_id: Unique identifier for this client
|
|
38
|
+
model: Model implementing the ModelProtocol interface
|
|
39
|
+
X: Training data features
|
|
40
|
+
y: Training data labels
|
|
41
|
+
local_epochs: Number of local training epochs (default: 1)
|
|
42
|
+
batch_size: Batch size for training (default: 32)
|
|
43
|
+
eta: Spectral norm coefficient (default: 0.5)
|
|
44
|
+
|
|
45
|
+
Example:
|
|
46
|
+
>>> client = ARENAScoreClient(
|
|
47
|
+
... client_id=0,
|
|
48
|
+
... model=my_model,
|
|
49
|
+
... X=X_train,
|
|
50
|
+
... y=y_train,
|
|
51
|
+
... local_epochs=5
|
|
52
|
+
... )
|
|
53
|
+
>>> weights, metrics, n_samples = client.local_train(global_weights)
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
client_id: int,
|
|
59
|
+
model: ModelProtocol,
|
|
60
|
+
X: np.ndarray,
|
|
61
|
+
y: np.ndarray,
|
|
62
|
+
local_epochs: int = 1,
|
|
63
|
+
batch_size: int = 32,
|
|
64
|
+
eta: float = 0.5
|
|
65
|
+
):
|
|
66
|
+
self.client_id = client_id
|
|
67
|
+
self.model = model
|
|
68
|
+
self.X = X
|
|
69
|
+
self.y = y
|
|
70
|
+
self.local_epochs = local_epochs
|
|
71
|
+
self.batch_size = batch_size
|
|
72
|
+
self.eta = eta
|
|
73
|
+
self.n_samples = len(X)
|
|
74
|
+
|
|
75
|
+
# Track previous state for ΔAcc and CS computation
|
|
76
|
+
self.prev_accuracy: Optional[float] = None
|
|
77
|
+
self.prev_weights: Optional[Dict[str, np.ndarray]] = None
|
|
78
|
+
|
|
79
|
+
def local_train(
|
|
80
|
+
self,
|
|
81
|
+
global_weights: Dict[str, np.ndarray]
|
|
82
|
+
) -> Tuple[Dict[str, np.ndarray], Dict[str, Any], int]:
|
|
83
|
+
"""
|
|
84
|
+
Perform local training and compute ARENA metrics.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
global_weights: Current global model weights
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Tuple of:
|
|
91
|
+
- updated_weights: New model weights after local training
|
|
92
|
+
- metrics: Dict containing accuracy, delta_acc, cosine_sim,
|
|
93
|
+
spectral_norm, kl_divergence, f1, etc.
|
|
94
|
+
- n_samples: Number of local training samples
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
>>> weights, metrics, n = client.local_train(server.get_global_weights())
|
|
98
|
+
>>> print(f"Delta Acc: {metrics['delta_acc']:.4f}")
|
|
99
|
+
>>> print(f"Cosine Sim: {metrics['cosine_sim']:.4f}")
|
|
100
|
+
"""
|
|
101
|
+
from arena_score.utils import (
|
|
102
|
+
compute_spectral_norm,
|
|
103
|
+
flatten_weights,
|
|
104
|
+
compute_kl_divergence
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Store previous weights for CS computation
|
|
108
|
+
if self.prev_weights is None:
|
|
109
|
+
self.prev_weights = {k: v.copy() for k, v in global_weights.items()}
|
|
110
|
+
|
|
111
|
+
# Set model to global weights and compute initial metrics
|
|
112
|
+
self.model.set_weights(global_weights)
|
|
113
|
+
initial_preds = self.model.predict_proba(self.X)
|
|
114
|
+
initial_accuracy = self._compute_accuracy(self.model.predict(self.X), self.y)
|
|
115
|
+
|
|
116
|
+
# Local training
|
|
117
|
+
total_loss = 0.0
|
|
118
|
+
n_batches = 0
|
|
119
|
+
accumulated_grads: Optional[Dict[str, np.ndarray]] = None
|
|
120
|
+
|
|
121
|
+
for epoch in range(self.local_epochs):
|
|
122
|
+
indices = np.random.permutation(len(self.X))
|
|
123
|
+
|
|
124
|
+
for start_idx in range(0, len(self.X), self.batch_size):
|
|
125
|
+
end_idx = min(start_idx + self.batch_size, len(self.X))
|
|
126
|
+
batch_indices = indices[start_idx:end_idx]
|
|
127
|
+
|
|
128
|
+
X_batch = self.X[batch_indices]
|
|
129
|
+
y_batch = self.y[batch_indices]
|
|
130
|
+
|
|
131
|
+
# Forward pass
|
|
132
|
+
y_pred = self.model.forward(X_batch)
|
|
133
|
+
loss = self._compute_loss(y_batch, y_pred)
|
|
134
|
+
total_loss += loss
|
|
135
|
+
n_batches += 1
|
|
136
|
+
|
|
137
|
+
# Backward pass
|
|
138
|
+
gradients = self.model.backward(X_batch, y_batch, y_pred)
|
|
139
|
+
|
|
140
|
+
# Accumulate gradients for analysis
|
|
141
|
+
if accumulated_grads is None:
|
|
142
|
+
accumulated_grads = {k: v.copy() for k, v in gradients.items()}
|
|
143
|
+
else:
|
|
144
|
+
for k in gradients:
|
|
145
|
+
accumulated_grads[k] += gradients[k]
|
|
146
|
+
|
|
147
|
+
# Update weights
|
|
148
|
+
self.model.update_weights(gradients)
|
|
149
|
+
|
|
150
|
+
# Get final weights and predictions
|
|
151
|
+
final_weights = self.model.get_weights()
|
|
152
|
+
final_preds = self.model.predict_proba(self.X)
|
|
153
|
+
final_predictions = self.model.predict(self.X)
|
|
154
|
+
final_accuracy = self._compute_accuracy(final_predictions, self.y)
|
|
155
|
+
|
|
156
|
+
# ===== Compute ARENA Metrics =====
|
|
157
|
+
|
|
158
|
+
# 1. Delta Accuracy (ΔAcc)
|
|
159
|
+
delta_acc = final_accuracy - initial_accuracy
|
|
160
|
+
|
|
161
|
+
# 2. Cosine Similarity (CS) - between update and global gradient direction
|
|
162
|
+
update_flat = flatten_weights(final_weights) - flatten_weights(global_weights)
|
|
163
|
+
global_flat = flatten_weights(global_weights) - flatten_weights(self.prev_weights)
|
|
164
|
+
|
|
165
|
+
if np.linalg.norm(global_flat) > 1e-8:
|
|
166
|
+
cosine_sim = float(np.dot(update_flat, global_flat) /
|
|
167
|
+
(np.linalg.norm(update_flat) * np.linalg.norm(global_flat) + 1e-8))
|
|
168
|
+
else:
|
|
169
|
+
cosine_sim = 1.0 # First round or no previous update
|
|
170
|
+
|
|
171
|
+
# 3. Spectral Norm (SN)
|
|
172
|
+
spectral_norm = compute_spectral_norm(final_weights)
|
|
173
|
+
|
|
174
|
+
# 4. KL Divergence - detect distribution drift
|
|
175
|
+
kl_divergence = compute_kl_divergence(final_preds, initial_preds)
|
|
176
|
+
|
|
177
|
+
# 5. Flag potentially corrupted clients
|
|
178
|
+
is_potentially_corrupted = final_accuracy < 0.5 or delta_acc < -0.1
|
|
179
|
+
|
|
180
|
+
# Compute F1 score
|
|
181
|
+
f1 = self._compute_f1(final_predictions, self.y)
|
|
182
|
+
|
|
183
|
+
# Store current state for next round
|
|
184
|
+
self.prev_accuracy = final_accuracy
|
|
185
|
+
self.prev_weights = {k: v.copy() for k, v in final_weights.items()}
|
|
186
|
+
|
|
187
|
+
metrics = {
|
|
188
|
+
'accuracy': final_accuracy,
|
|
189
|
+
'loss': self._compute_loss(self.y, final_preds),
|
|
190
|
+
'avg_loss': total_loss / n_batches if n_batches > 0 else 0,
|
|
191
|
+
'delta_acc': delta_acc,
|
|
192
|
+
'cosine_sim': cosine_sim,
|
|
193
|
+
'spectral_norm': spectral_norm,
|
|
194
|
+
'kl_divergence': kl_divergence,
|
|
195
|
+
'is_potentially_corrupted': is_potentially_corrupted,
|
|
196
|
+
'f1': f1,
|
|
197
|
+
'accumulated_grads': accumulated_grads
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return final_weights, metrics, self.n_samples
|
|
201
|
+
|
|
202
|
+
def _compute_accuracy(self, y_pred: np.ndarray, y_true: np.ndarray) -> float:
|
|
203
|
+
"""Compute accuracy."""
|
|
204
|
+
return float(np.mean(y_pred == y_true))
|
|
205
|
+
|
|
206
|
+
def _compute_loss(self, y_true: np.ndarray, y_pred: np.ndarray, eps: float = 1e-7) -> float:
|
|
207
|
+
"""Compute binary cross-entropy loss."""
|
|
208
|
+
y_pred = np.clip(y_pred, eps, 1 - eps)
|
|
209
|
+
return float(-np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)))
|
|
210
|
+
|
|
211
|
+
def _compute_f1(self, y_pred: np.ndarray, y_true: np.ndarray) -> float:
|
|
212
|
+
"""Compute F1 score."""
|
|
213
|
+
tp = np.sum((y_pred == 1) & (y_true == 1))
|
|
214
|
+
fp = np.sum((y_pred == 1) & (y_true == 0))
|
|
215
|
+
fn = np.sum((y_pred == 0) & (y_true == 1))
|
|
216
|
+
|
|
217
|
+
precision = tp / (tp + fp + 1e-8)
|
|
218
|
+
recall = tp / (tp + fn + 1e-8)
|
|
219
|
+
f1 = 2 * precision * recall / (precision + recall + 1e-8)
|
|
220
|
+
|
|
221
|
+
return float(f1)
|
arena_score/score.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARENA Score Computation Module
|
|
3
|
+
|
|
4
|
+
Implements the core ARENA Score formula from the research paper:
|
|
5
|
+
S_j = α(t) × max(0, ΔAcc_j) + γ(t) × CS_j + η × SN_j/(1 + SN_j)
|
|
6
|
+
|
|
7
|
+
Where:
|
|
8
|
+
- ΔAcc: Local accuracy improvement
|
|
9
|
+
- CS: Cosine similarity between client update and global direction
|
|
10
|
+
- SN: Spectral norm regularization = log(1 + σ_max(W_j))
|
|
11
|
+
- α(t), γ(t): Adaptive time-varying weights
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def compute_adaptive_weights(
|
|
19
|
+
t: int,
|
|
20
|
+
alpha_0: float = 0.7,
|
|
21
|
+
alpha_min: float = 0.3,
|
|
22
|
+
gamma_0: float = 0.3,
|
|
23
|
+
gamma_min: float = 0.5,
|
|
24
|
+
lambda_decay: float = 0.1
|
|
25
|
+
) -> Tuple[float, float]:
|
|
26
|
+
"""
|
|
27
|
+
Compute time-varying adaptive weights α(t) and γ(t).
|
|
28
|
+
|
|
29
|
+
From the paper:
|
|
30
|
+
- α(t) = α_min + (α_0 - α_min) × exp(-λt)
|
|
31
|
+
- γ(t) = γ_min + (γ_0 - γ_min) × (1 - exp(-λt))
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
t: Current communication round (0-indexed)
|
|
35
|
+
alpha_0: Initial accuracy weight (default: 0.7)
|
|
36
|
+
alpha_min: Minimum accuracy weight (default: 0.3)
|
|
37
|
+
gamma_0: Initial cosine similarity weight (default: 0.3)
|
|
38
|
+
gamma_min: Minimum cosine similarity weight (default: 0.5)
|
|
39
|
+
lambda_decay: Decay rate (default: 0.1)
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Tuple of (alpha_t, gamma_t)
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> alpha_t, gamma_t = compute_adaptive_weights(t=5)
|
|
46
|
+
>>> print(f"Round 5: α={alpha_t:.3f}, γ={gamma_t:.3f}")
|
|
47
|
+
"""
|
|
48
|
+
decay_factor = np.exp(-lambda_decay * t)
|
|
49
|
+
|
|
50
|
+
alpha_t = alpha_min + (alpha_0 - alpha_min) * decay_factor
|
|
51
|
+
gamma_t = gamma_min + (gamma_0 - gamma_min) * (1 - decay_factor)
|
|
52
|
+
|
|
53
|
+
return float(alpha_t), float(gamma_t)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def compute_arena_score(
|
|
57
|
+
delta_acc: float,
|
|
58
|
+
cosine_sim: float,
|
|
59
|
+
spectral_norm: float,
|
|
60
|
+
alpha_t: float,
|
|
61
|
+
gamma_t: float,
|
|
62
|
+
eta: float = 0.5,
|
|
63
|
+
is_potentially_corrupted: bool = False
|
|
64
|
+
) -> float:
|
|
65
|
+
"""
|
|
66
|
+
Compute ARENA Score for a client update.
|
|
67
|
+
|
|
68
|
+
Formula:
|
|
69
|
+
S_j = α(t) × max(0, ΔAcc_j) + γ(t) × CS_j + η × SN_j/(1 + SN_j)
|
|
70
|
+
|
|
71
|
+
Key improvements:
|
|
72
|
+
- Heavily penalizes negative delta_acc (indicates corrupted training)
|
|
73
|
+
- Normalizes CS to [0, 1] range
|
|
74
|
+
- Applies 90% penalty for potentially corrupted clients
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
delta_acc: Accuracy improvement (final - initial accuracy)
|
|
78
|
+
cosine_sim: Cosine similarity between update and global direction [-1, 1]
|
|
79
|
+
spectral_norm: Spectral norm value log(1 + σ_max)
|
|
80
|
+
alpha_t: Current adaptive weight for accuracy
|
|
81
|
+
gamma_t: Current adaptive weight for cosine similarity
|
|
82
|
+
eta: Spectral norm coefficient (default: 0.5)
|
|
83
|
+
is_potentially_corrupted: Flag for suspected corrupted client
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
ARENA score (non-negative float)
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
>>> score = compute_arena_score(
|
|
90
|
+
... delta_acc=0.05,
|
|
91
|
+
... cosine_sim=0.8,
|
|
92
|
+
... spectral_norm=2.5,
|
|
93
|
+
... alpha_t=0.5,
|
|
94
|
+
... gamma_t=0.4
|
|
95
|
+
... )
|
|
96
|
+
>>> print(f"ARENA Score: {score:.2f}")
|
|
97
|
+
"""
|
|
98
|
+
# Accuracy contribution - PENALIZE negative delta_acc heavily
|
|
99
|
+
if delta_acc >= 0:
|
|
100
|
+
acc_term = alpha_t * delta_acc
|
|
101
|
+
else:
|
|
102
|
+
# Negative improvement = likely corrupted, apply strong penalty
|
|
103
|
+
acc_term = alpha_t * delta_acc * 2.0 # Double the penalty
|
|
104
|
+
|
|
105
|
+
# Cosine similarity contribution (normalized to [0, 1] from [-1, 1])
|
|
106
|
+
cs_normalized = (cosine_sim + 1) / 2 # Map [-1, 1] to [0, 1]
|
|
107
|
+
cs_term = gamma_t * cs_normalized
|
|
108
|
+
|
|
109
|
+
# Spectral norm contribution (normalized)
|
|
110
|
+
sn_term = eta * (spectral_norm / (1 + spectral_norm))
|
|
111
|
+
|
|
112
|
+
# Total score (scaled to reasonable range)
|
|
113
|
+
score = 100 * (acc_term + cs_term + sn_term)
|
|
114
|
+
|
|
115
|
+
# Apply additional penalty for potentially corrupted clients
|
|
116
|
+
if is_potentially_corrupted:
|
|
117
|
+
score *= 0.1 # Reduce score by 90%
|
|
118
|
+
|
|
119
|
+
return max(0.0, float(score)) # Ensure non-negative score
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def detect_anomaly(
|
|
123
|
+
cosine_sim: float,
|
|
124
|
+
kl_divergence: float = 0.0,
|
|
125
|
+
is_potentially_corrupted: bool = False,
|
|
126
|
+
anomaly_threshold: float = -0.5,
|
|
127
|
+
kl_threshold: float = 0.5
|
|
128
|
+
) -> bool:
|
|
129
|
+
"""
|
|
130
|
+
Detect anomalous client update using multiple criteria.
|
|
131
|
+
|
|
132
|
+
From the paper (lines 17-18):
|
|
133
|
+
- KL divergence thresholds
|
|
134
|
+
- CS < -0.5 threshold
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
cosine_sim: Cosine similarity value
|
|
138
|
+
kl_divergence: KL divergence between predictions (default: 0.0)
|
|
139
|
+
is_potentially_corrupted: Pre-computed corruption flag
|
|
140
|
+
anomaly_threshold: CS threshold (default: -0.5)
|
|
141
|
+
kl_threshold: KL divergence threshold (default: 0.5)
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
True if anomaly detected, False otherwise
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
>>> is_anomaly = detect_anomaly(cosine_sim=-0.7, kl_divergence=0.3)
|
|
148
|
+
>>> print(f"Anomaly: {is_anomaly}") # True (CS < -0.5)
|
|
149
|
+
"""
|
|
150
|
+
cs_anomaly = cosine_sim < anomaly_threshold
|
|
151
|
+
kl_anomaly = kl_divergence > kl_threshold
|
|
152
|
+
|
|
153
|
+
return cs_anomaly or kl_anomaly or is_potentially_corrupted
|
arena_score/server.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARENA Score Server
|
|
3
|
+
|
|
4
|
+
Server implementation for ARENA Score federated learning.
|
|
5
|
+
Performs adaptive weighted aggregation with anomaly detection.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from typing import Dict, List, Tuple, Any, Optional, Protocol
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ModelProtocol(Protocol):
|
|
13
|
+
"""Protocol defining the required model interface."""
|
|
14
|
+
|
|
15
|
+
def get_weights(self) -> Dict[str, np.ndarray]: ...
|
|
16
|
+
def set_weights(self, weights: Dict[str, np.ndarray]) -> None: ...
|
|
17
|
+
def copy(self) -> 'ModelProtocol': ...
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ARENAScoreServer:
|
|
21
|
+
"""
|
|
22
|
+
ARENA Score Server with adaptive weighted aggregation.
|
|
23
|
+
|
|
24
|
+
Implements the ARENA Score aggregation formula:
|
|
25
|
+
S_j = α(t) × max(0, ΔAcc_j) + γ(t) × CS_j + η × SN_j/(1 + SN_j)
|
|
26
|
+
|
|
27
|
+
With time-varying weights:
|
|
28
|
+
- α(t) = α_min + (α_0 - α_min) × exp(-λt)
|
|
29
|
+
- γ(t) = γ_min + (γ_0 - γ_min) × (1 - exp(-λt))
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
global_model: The global model to aggregate updates into
|
|
33
|
+
alpha_0: Initial accuracy weight (default: 0.7)
|
|
34
|
+
alpha_min: Minimum accuracy weight (default: 0.3)
|
|
35
|
+
gamma_0: Initial cosine similarity weight (default: 0.3)
|
|
36
|
+
gamma_min: Minimum cosine similarity weight (default: 0.5)
|
|
37
|
+
eta: Spectral norm coefficient (default: 0.5)
|
|
38
|
+
lambda_decay: Decay rate for adaptive weights (default: 0.1)
|
|
39
|
+
s_min: Minimum score threshold (default: 0)
|
|
40
|
+
anomaly_threshold: CS threshold for anomaly detection (default: -0.5)
|
|
41
|
+
kl_threshold: KL divergence threshold (default: 0.5)
|
|
42
|
+
enable_gradient_recycling: Enable gradient recycling (default: True)
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
>>> server = ARENAScoreServer(
|
|
46
|
+
... global_model=my_model,
|
|
47
|
+
... alpha_0=0.7,
|
|
48
|
+
... eta=0.5
|
|
49
|
+
... )
|
|
50
|
+
>>> aggregated, info = server.aggregate(client_weights, client_metrics, ...)
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
global_model: ModelProtocol,
|
|
56
|
+
alpha_0: float = 0.7,
|
|
57
|
+
alpha_min: float = 0.3,
|
|
58
|
+
gamma_0: float = 0.3,
|
|
59
|
+
gamma_min: float = 0.5,
|
|
60
|
+
eta: float = 0.5,
|
|
61
|
+
lambda_decay: float = 0.1,
|
|
62
|
+
s_min: float = 0,
|
|
63
|
+
anomaly_threshold: float = -0.5,
|
|
64
|
+
kl_threshold: float = 0.5,
|
|
65
|
+
enable_gradient_recycling: bool = True
|
|
66
|
+
):
|
|
67
|
+
self.global_model = global_model
|
|
68
|
+
self.alpha_0 = alpha_0
|
|
69
|
+
self.alpha_min = alpha_min
|
|
70
|
+
self.gamma_0 = gamma_0
|
|
71
|
+
self.gamma_min = gamma_min
|
|
72
|
+
self.eta = eta
|
|
73
|
+
self.lambda_decay = lambda_decay
|
|
74
|
+
self.s_min = s_min
|
|
75
|
+
self.anomaly_threshold = anomaly_threshold
|
|
76
|
+
self.kl_threshold = kl_threshold
|
|
77
|
+
self.enable_gradient_recycling = enable_gradient_recycling
|
|
78
|
+
|
|
79
|
+
# Gradient recycling storage
|
|
80
|
+
self.gradient_cache: Dict[int, Dict[str, np.ndarray]] = {}
|
|
81
|
+
self.current_round = 0
|
|
82
|
+
|
|
83
|
+
def _compute_adaptive_weights(self, t: int) -> Tuple[float, float]:
|
|
84
|
+
"""
|
|
85
|
+
Compute time-varying α(t) and γ(t).
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Tuple of (alpha_t, gamma_t)
|
|
89
|
+
"""
|
|
90
|
+
from arena_score.score import compute_adaptive_weights
|
|
91
|
+
return compute_adaptive_weights(
|
|
92
|
+
t,
|
|
93
|
+
self.alpha_0, self.alpha_min,
|
|
94
|
+
self.gamma_0, self.gamma_min,
|
|
95
|
+
self.lambda_decay
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _compute_arena_score(
|
|
99
|
+
self,
|
|
100
|
+
delta_acc: float,
|
|
101
|
+
cosine_sim: float,
|
|
102
|
+
spectral_norm: float,
|
|
103
|
+
alpha_t: float,
|
|
104
|
+
gamma_t: float,
|
|
105
|
+
is_potentially_corrupted: bool = False
|
|
106
|
+
) -> float:
|
|
107
|
+
"""Compute ARENA score for a client update."""
|
|
108
|
+
from arena_score.score import compute_arena_score
|
|
109
|
+
return compute_arena_score(
|
|
110
|
+
delta_acc, cosine_sim, spectral_norm,
|
|
111
|
+
alpha_t, gamma_t, self.eta,
|
|
112
|
+
is_potentially_corrupted
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _detect_anomaly(
|
|
116
|
+
self,
|
|
117
|
+
cosine_sim: float,
|
|
118
|
+
kl_divergence: float = 0.0,
|
|
119
|
+
is_potentially_corrupted: bool = False
|
|
120
|
+
) -> bool:
|
|
121
|
+
"""
|
|
122
|
+
Detect anomalous update using multiple criteria.
|
|
123
|
+
|
|
124
|
+
From the paper:
|
|
125
|
+
- CS < -0.5 threshold
|
|
126
|
+
- KL divergence thresholds
|
|
127
|
+
"""
|
|
128
|
+
from arena_score.score import detect_anomaly
|
|
129
|
+
return detect_anomaly(
|
|
130
|
+
cosine_sim, kl_divergence,
|
|
131
|
+
is_potentially_corrupted,
|
|
132
|
+
self.anomaly_threshold, self.kl_threshold
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def aggregate(
|
|
136
|
+
self,
|
|
137
|
+
client_weights: List[Dict[str, np.ndarray]],
|
|
138
|
+
client_metrics: List[Dict[str, Any]],
|
|
139
|
+
client_samples: List[int],
|
|
140
|
+
client_ids: List[int],
|
|
141
|
+
participating_clients: Optional[List[int]] = None
|
|
142
|
+
) -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
|
|
143
|
+
"""
|
|
144
|
+
Aggregate client updates using ARENA scoring.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
client_weights: List of weight dictionaries from clients
|
|
148
|
+
client_metrics: List of metric dictionaries from clients
|
|
149
|
+
client_samples: Number of samples per client
|
|
150
|
+
client_ids: Client identifiers
|
|
151
|
+
participating_clients: Optional list of participating client IDs
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Tuple of:
|
|
155
|
+
- aggregated_weights: New global model weights
|
|
156
|
+
- aggregation_info: Dict with scores, selected clients, etc.
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
>>> aggregated, info = server.aggregate(
|
|
160
|
+
... client_weights, client_metrics, client_samples, client_ids
|
|
161
|
+
... )
|
|
162
|
+
>>> print(f"Mean score: {info['mean_score']:.2f}")
|
|
163
|
+
>>> print(f"Valid clients: {info['valid_clients']}/{info['total_clients']}")
|
|
164
|
+
"""
|
|
165
|
+
self.current_round += 1
|
|
166
|
+
alpha_t, gamma_t = self._compute_adaptive_weights(self.current_round)
|
|
167
|
+
|
|
168
|
+
# Compute ARENA scores for each client
|
|
169
|
+
scores = []
|
|
170
|
+
is_anomaly = []
|
|
171
|
+
|
|
172
|
+
for metrics in client_metrics:
|
|
173
|
+
kl_div = metrics.get('kl_divergence', 0.0)
|
|
174
|
+
is_corrupted = metrics.get('is_potentially_corrupted', False)
|
|
175
|
+
|
|
176
|
+
score = self._compute_arena_score(
|
|
177
|
+
metrics['delta_acc'],
|
|
178
|
+
metrics['cosine_sim'],
|
|
179
|
+
metrics['spectral_norm'],
|
|
180
|
+
alpha_t, gamma_t,
|
|
181
|
+
is_potentially_corrupted=is_corrupted
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Check for anomaly
|
|
185
|
+
anomaly = self._detect_anomaly(
|
|
186
|
+
metrics['cosine_sim'],
|
|
187
|
+
kl_divergence=kl_div,
|
|
188
|
+
is_potentially_corrupted=is_corrupted
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if anomaly:
|
|
192
|
+
score = 0.0 # Set score to 0 for anomalous updates
|
|
193
|
+
|
|
194
|
+
scores.append(score)
|
|
195
|
+
is_anomaly.append(anomaly)
|
|
196
|
+
|
|
197
|
+
scores = np.array(scores)
|
|
198
|
+
|
|
199
|
+
# Filter clients by minimum score threshold
|
|
200
|
+
valid_mask = (scores >= self.s_min) & (~np.array(is_anomaly))
|
|
201
|
+
|
|
202
|
+
# Fallback: if no clients pass, use all non-anomalous
|
|
203
|
+
if not np.any(valid_mask):
|
|
204
|
+
valid_mask = ~np.array(is_anomaly)
|
|
205
|
+
|
|
206
|
+
# Final fallback: use all clients
|
|
207
|
+
if not np.any(valid_mask):
|
|
208
|
+
valid_mask = np.ones(len(scores), dtype=bool)
|
|
209
|
+
|
|
210
|
+
# Normalize scores for weighting
|
|
211
|
+
valid_scores = np.where(valid_mask, scores, 0)
|
|
212
|
+
total_score = np.sum(valid_scores)
|
|
213
|
+
|
|
214
|
+
if total_score > 0:
|
|
215
|
+
weights_normalized = valid_scores / total_score
|
|
216
|
+
else:
|
|
217
|
+
# Fall back to sample-based weighting
|
|
218
|
+
total_samples = sum(n for n, v in zip(client_samples, valid_mask) if v)
|
|
219
|
+
weights_normalized = np.array([
|
|
220
|
+
n / total_samples if v else 0
|
|
221
|
+
for n, v in zip(client_samples, valid_mask)
|
|
222
|
+
])
|
|
223
|
+
|
|
224
|
+
# Aggregate weights
|
|
225
|
+
aggregated: Dict[str, np.ndarray] = {}
|
|
226
|
+
for key in client_weights[0]:
|
|
227
|
+
aggregated[key] = np.zeros_like(client_weights[0][key])
|
|
228
|
+
|
|
229
|
+
for w_client, weight in zip(client_weights, weights_normalized):
|
|
230
|
+
for key in w_client:
|
|
231
|
+
aggregated[key] += w_client[key] * weight
|
|
232
|
+
|
|
233
|
+
# Gradient recycling: cache gradients for clients
|
|
234
|
+
if self.enable_gradient_recycling:
|
|
235
|
+
for client_id, metrics in zip(client_ids, client_metrics):
|
|
236
|
+
if 'accumulated_grads' in metrics and metrics['accumulated_grads'] is not None:
|
|
237
|
+
self.gradient_cache[client_id] = metrics['accumulated_grads']
|
|
238
|
+
|
|
239
|
+
# Update global model
|
|
240
|
+
self.global_model.set_weights(aggregated)
|
|
241
|
+
|
|
242
|
+
aggregation_info = {
|
|
243
|
+
'round': self.current_round,
|
|
244
|
+
'alpha_t': alpha_t,
|
|
245
|
+
'gamma_t': gamma_t,
|
|
246
|
+
'scores': scores.tolist(),
|
|
247
|
+
'weights_normalized': weights_normalized.tolist(),
|
|
248
|
+
'is_anomaly': is_anomaly,
|
|
249
|
+
'valid_clients': int(np.sum(valid_mask)),
|
|
250
|
+
'total_clients': len(scores),
|
|
251
|
+
'mean_score': float(np.mean(scores)),
|
|
252
|
+
'std_score': float(np.std(scores))
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return aggregated, aggregation_info
|
|
256
|
+
|
|
257
|
+
def get_global_weights(self) -> Dict[str, np.ndarray]:
|
|
258
|
+
"""Get current global model weights."""
|
|
259
|
+
return self.global_model.get_weights()
|
|
260
|
+
|
|
261
|
+
def get_recycled_gradient(self, client_id: int) -> Optional[Dict[str, np.ndarray]]:
|
|
262
|
+
"""Get cached gradient for a missing client."""
|
|
263
|
+
return self.gradient_cache.get(client_id)
|
|
264
|
+
|
|
265
|
+
def reset(self) -> None:
|
|
266
|
+
"""Reset server state for a new training run."""
|
|
267
|
+
self.current_round = 0
|
|
268
|
+
self.gradient_cache.clear()
|
arena_score/utils.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARENA Score Utility Functions
|
|
3
|
+
|
|
4
|
+
Helper functions for weight manipulation, similarity computation,
|
|
5
|
+
and anomaly detection metrics.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from typing import Dict
|
|
10
|
+
|
|
11
|
+
# Try Numba for acceleration
|
|
12
|
+
try:
|
|
13
|
+
from numba import jit
|
|
14
|
+
HAS_NUMBA = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
HAS_NUMBA = False
|
|
17
|
+
def jit(*args, **kwargs):
|
|
18
|
+
"""Fallback decorator when Numba is not available."""
|
|
19
|
+
def decorator(func):
|
|
20
|
+
return func
|
|
21
|
+
return decorator
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@jit(nopython=True, cache=True)
|
|
25
|
+
def _compute_cosine_similarity_numba(a: np.ndarray, b: np.ndarray) -> float:
|
|
26
|
+
"""Numba-optimized cosine similarity computation."""
|
|
27
|
+
dot = np.dot(a, b)
|
|
28
|
+
norm_a = np.sqrt(np.dot(a, a))
|
|
29
|
+
norm_b = np.sqrt(np.dot(b, b))
|
|
30
|
+
if norm_a < 1e-8 or norm_b < 1e-8:
|
|
31
|
+
return 0.0
|
|
32
|
+
return dot / (norm_a * norm_b)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def compute_cosine_similarity(
|
|
36
|
+
weights_a: Dict[str, np.ndarray],
|
|
37
|
+
weights_b: Dict[str, np.ndarray]
|
|
38
|
+
) -> float:
|
|
39
|
+
"""
|
|
40
|
+
Compute cosine similarity between two weight dictionaries.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
weights_a: First weight dictionary
|
|
44
|
+
weights_b: Second weight dictionary
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Cosine similarity in range [-1, 1]
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> cs = compute_cosine_similarity(model_a.get_weights(), model_b.get_weights())
|
|
51
|
+
>>> print(f"Similarity: {cs:.4f}")
|
|
52
|
+
"""
|
|
53
|
+
flat_a = flatten_weights(weights_a)
|
|
54
|
+
flat_b = flatten_weights(weights_b)
|
|
55
|
+
|
|
56
|
+
if HAS_NUMBA:
|
|
57
|
+
return float(_compute_cosine_similarity_numba(flat_a, flat_b))
|
|
58
|
+
|
|
59
|
+
# Fallback pure numpy implementation
|
|
60
|
+
dot = np.dot(flat_a, flat_b)
|
|
61
|
+
norm_a = np.linalg.norm(flat_a)
|
|
62
|
+
norm_b = np.linalg.norm(flat_b)
|
|
63
|
+
|
|
64
|
+
if norm_a < 1e-8 or norm_b < 1e-8:
|
|
65
|
+
return 0.0
|
|
66
|
+
|
|
67
|
+
return float(dot / (norm_a * norm_b))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def compute_spectral_norm(weights: Dict[str, np.ndarray]) -> float:
|
|
71
|
+
"""
|
|
72
|
+
Compute spectral norm regularization term.
|
|
73
|
+
|
|
74
|
+
Formula: SN_j = log(1 + σ_max(W_j))
|
|
75
|
+
|
|
76
|
+
Where σ_max is the maximum singular value of the weight matrix.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
weights: Dictionary of weight arrays
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Spectral norm value
|
|
83
|
+
|
|
84
|
+
Example:
|
|
85
|
+
>>> sn = compute_spectral_norm(model.get_weights())
|
|
86
|
+
>>> print(f"Spectral Norm: {sn:.4f}")
|
|
87
|
+
"""
|
|
88
|
+
max_singular = 0.0
|
|
89
|
+
|
|
90
|
+
for key, w in weights.items():
|
|
91
|
+
if len(w.shape) == 2:
|
|
92
|
+
# Compute largest singular value for 2D matrices
|
|
93
|
+
try:
|
|
94
|
+
s = np.linalg.svd(w, compute_uv=False)
|
|
95
|
+
max_singular = max(max_singular, s[0])
|
|
96
|
+
except np.linalg.LinAlgError:
|
|
97
|
+
max_singular = max(max_singular, np.max(np.abs(w)))
|
|
98
|
+
else:
|
|
99
|
+
max_singular = max(max_singular, np.max(np.abs(w)))
|
|
100
|
+
|
|
101
|
+
return float(np.log(1 + max_singular))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def compute_kl_divergence(
|
|
105
|
+
p: np.ndarray,
|
|
106
|
+
q: np.ndarray,
|
|
107
|
+
eps: float = 1e-8
|
|
108
|
+
) -> float:
|
|
109
|
+
"""
|
|
110
|
+
Compute KL divergence between two probability distributions.
|
|
111
|
+
|
|
112
|
+
Used for anomaly detection as per the paper (lines 17-18).
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
p: Prediction distribution after local training
|
|
116
|
+
q: Prediction distribution before training (from global model)
|
|
117
|
+
eps: Small epsilon for numerical stability
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
KL divergence value (higher = more divergent = potentially corrupted)
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
>>> kl = compute_kl_divergence(new_preds, old_preds)
|
|
124
|
+
>>> if kl > 0.5:
|
|
125
|
+
... print("Potential anomaly detected")
|
|
126
|
+
"""
|
|
127
|
+
p = np.clip(p, eps, 1 - eps)
|
|
128
|
+
q = np.clip(q, eps, 1 - eps)
|
|
129
|
+
|
|
130
|
+
# For binary classification, compute symmetric KL divergence
|
|
131
|
+
kl_pq = np.mean(p * np.log(p / q) + (1 - p) * np.log((1 - p) / (1 - q)))
|
|
132
|
+
|
|
133
|
+
return float(kl_pq)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def flatten_weights(weights: Dict[str, np.ndarray]) -> np.ndarray:
|
|
137
|
+
"""
|
|
138
|
+
Flatten all weights into a single vector.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
weights: Dictionary of weight arrays
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
1D numpy array containing all flattened weights
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
>>> flat = flatten_weights(model.get_weights())
|
|
148
|
+
>>> print(f"Total parameters: {len(flat)}")
|
|
149
|
+
"""
|
|
150
|
+
return np.concatenate([w.flatten() for w in weights.values()])
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def unflatten_weights(
|
|
154
|
+
flat: np.ndarray,
|
|
155
|
+
template: Dict[str, np.ndarray]
|
|
156
|
+
) -> Dict[str, np.ndarray]:
|
|
157
|
+
"""
|
|
158
|
+
Unflatten vector back to weight dictionary.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
flat: 1D array of flattened weights
|
|
162
|
+
template: Template dictionary with correct shapes
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Weight dictionary with original shapes
|
|
166
|
+
|
|
167
|
+
Example:
|
|
168
|
+
>>> flat = flatten_weights(weights)
|
|
169
|
+
>>> restored = unflatten_weights(flat, weights)
|
|
170
|
+
"""
|
|
171
|
+
result = {}
|
|
172
|
+
idx = 0
|
|
173
|
+
|
|
174
|
+
for key, w in template.items():
|
|
175
|
+
size = w.size
|
|
176
|
+
result[key] = flat[idx:idx + size].reshape(w.shape)
|
|
177
|
+
idx += size
|
|
178
|
+
|
|
179
|
+
return result
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def compute_weight_distance(
|
|
183
|
+
weights_a: Dict[str, np.ndarray],
|
|
184
|
+
weights_b: Dict[str, np.ndarray]
|
|
185
|
+
) -> float:
|
|
186
|
+
"""
|
|
187
|
+
Compute L2 distance between two weight dictionaries.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
weights_a: First weight dictionary
|
|
191
|
+
weights_b: Second weight dictionary
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
L2 distance (Euclidean norm of difference)
|
|
195
|
+
|
|
196
|
+
Example:
|
|
197
|
+
>>> dist = compute_weight_distance(w1, w2)
|
|
198
|
+
>>> print(f"Weight distance: {dist:.4f}")
|
|
199
|
+
"""
|
|
200
|
+
total_dist = 0.0
|
|
201
|
+
|
|
202
|
+
for key in weights_a:
|
|
203
|
+
if key in weights_b:
|
|
204
|
+
diff = weights_a[key].flatten() - weights_b[key].flatten()
|
|
205
|
+
total_dist += np.sum(diff ** 2)
|
|
206
|
+
|
|
207
|
+
return float(np.sqrt(total_dist))
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: arena-score
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: ARENA Score - Adaptive Review and Evaluation using Novel Aggregation Score for Federated Learning
|
|
5
|
+
Project-URL: Homepage, https://github.com/Ronit26Mehta/Arena_exp
|
|
6
|
+
Project-URL: Documentation, https://github.com/Ronit26Mehta/Arena_exp#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/Ronit26Mehta/Arena_exp
|
|
8
|
+
Project-URL: Issues, https://github.com/Ronit26Mehta/Arena_exp/issues
|
|
9
|
+
Author-email: Ronit Mehta <mehtaronit702@gmail.com>
|
|
10
|
+
Maintainer-email: Ronit Mehta <mehtaronit702@gmail.com>
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Keywords: adaptive-aggregation,aggregation,anomaly-detection,arena-score,client-evaluation,deep-learning,distributed-systems,federated-learning,machine-learning,robust-aggregation
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
|
+
Requires-Python: >=3.8
|
|
28
|
+
Requires-Dist: numpy>=1.21.0
|
|
29
|
+
Provides-Extra: accelerate
|
|
30
|
+
Requires-Dist: numba>=0.56.0; extra == 'accelerate'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# ARENA Score
|
|
37
|
+
|
|
38
|
+
[](https://badge.fury.io/py/arena-score)
|
|
39
|
+
[](https://www.python.org/downloads/)
|
|
40
|
+
[](https://opensource.org/licenses/MIT)
|
|
41
|
+
|
|
42
|
+
**ARENA Score** (Adaptive Review and Evaluation using Novel Aggregation Score) is a novel client evaluation and weighted aggregation algorithm for Federated Learning.
|
|
43
|
+
|
|
44
|
+
## 🎯 Key Features
|
|
45
|
+
|
|
46
|
+
- **Adaptive Client Evaluation**: Dynamically assesses client update quality using multiple metrics
|
|
47
|
+
- **Robust Aggregation**: Filters out unreliable/malicious client contributions
|
|
48
|
+
- **Anomaly Detection**: Detects Byzantine clients using KL divergence and cosine similarity
|
|
49
|
+
- **Gradient Recycling**: Maintains momentum by reusing successful updates from missing clients
|
|
50
|
+
- **Model Agnostic**: Works with any model that implements the simple weight interface
|
|
51
|
+
|
|
52
|
+
## 📊 ARENA Score Formula
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
S_j = α(t) × max(0, ΔAcc_j) + γ(t) × CS_j + η × SN_j/(1 + SN_j)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Where:
|
|
59
|
+
- **ΔAcc** - Local accuracy improvement
|
|
60
|
+
- **CS** - Cosine similarity between client update and global direction
|
|
61
|
+
- **SN** - Spectral norm: `log(1 + σ_max(W_j))`
|
|
62
|
+
- **α(t), γ(t)** - Adaptive time-varying weights
|
|
63
|
+
|
|
64
|
+
## 🚀 Installation
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install arena-score
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
For Numba acceleration (optional):
|
|
71
|
+
```bash
|
|
72
|
+
pip install arena-score[accelerate]
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 📝 Quick Start
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from arena_score import ARENAScoreClient, ARENAScoreServer, run_arena_score
|
|
79
|
+
|
|
80
|
+
# Create clients with your model
|
|
81
|
+
clients = [
|
|
82
|
+
ARENAScoreClient(
|
|
83
|
+
client_id=i,
|
|
84
|
+
model=your_model.copy(), # Model with get_weights/set_weights
|
|
85
|
+
X=X_train[i],
|
|
86
|
+
y=y_train[i],
|
|
87
|
+
local_epochs=5,
|
|
88
|
+
batch_size=32
|
|
89
|
+
)
|
|
90
|
+
for i in range(n_clients)
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
# Run ARENA Score federated learning
|
|
94
|
+
history = run_arena_score(
|
|
95
|
+
global_model=your_model,
|
|
96
|
+
clients=clients,
|
|
97
|
+
n_rounds=10,
|
|
98
|
+
eval_data=(X_test, y_test)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
print(f"Final accuracy: {history['test_accuracy'][-1]:.4f}")
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## 🔧 Advanced Usage
|
|
105
|
+
|
|
106
|
+
### Custom Server Configuration
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from arena_score import ARENAScoreServer
|
|
110
|
+
|
|
111
|
+
server = ARENAScoreServer(
|
|
112
|
+
global_model=model,
|
|
113
|
+
alpha_0=0.7, # Initial accuracy weight
|
|
114
|
+
alpha_min=0.3, # Minimum accuracy weight
|
|
115
|
+
gamma_0=0.3, # Initial cosine similarity weight
|
|
116
|
+
gamma_min=0.5, # Minimum cosine similarity weight
|
|
117
|
+
eta=0.5, # Spectral norm coefficient
|
|
118
|
+
lambda_decay=0.1, # Decay rate for adaptive weights
|
|
119
|
+
anomaly_threshold=-0.5, # CS threshold for anomaly detection
|
|
120
|
+
kl_threshold=0.5, # KL divergence threshold
|
|
121
|
+
enable_gradient_recycling=True
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Computing ARENA Score Directly
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from arena_score import compute_arena_score
|
|
129
|
+
|
|
130
|
+
score = compute_arena_score(
|
|
131
|
+
delta_acc=0.05, # Accuracy improvement
|
|
132
|
+
cosine_sim=0.8, # Cosine similarity
|
|
133
|
+
spectral_norm=2.5, # Spectral norm
|
|
134
|
+
alpha_t=0.5, # Current alpha weight
|
|
135
|
+
gamma_t=0.4 # Current gamma weight
|
|
136
|
+
)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Utility Functions
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from arena_score import (
|
|
143
|
+
compute_cosine_similarity,
|
|
144
|
+
compute_spectral_norm,
|
|
145
|
+
compute_kl_divergence,
|
|
146
|
+
flatten_weights,
|
|
147
|
+
unflatten_weights
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Compute cosine similarity between weight vectors
|
|
151
|
+
cs = compute_cosine_similarity(weights_a, weights_b)
|
|
152
|
+
|
|
153
|
+
# Compute spectral norm of weights
|
|
154
|
+
sn = compute_spectral_norm(weights_dict)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## 📈 Model Interface
|
|
158
|
+
|
|
159
|
+
Your model must implement these methods:
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
class YourModel:
|
|
163
|
+
def get_weights(self) -> Dict[str, np.ndarray]:
|
|
164
|
+
"""Return model weights as a dictionary."""
|
|
165
|
+
pass
|
|
166
|
+
|
|
167
|
+
def set_weights(self, weights: Dict[str, np.ndarray]):
|
|
168
|
+
"""Set model weights from a dictionary."""
|
|
169
|
+
pass
|
|
170
|
+
|
|
171
|
+
def forward(self, X: np.ndarray) -> np.ndarray:
|
|
172
|
+
"""Forward pass, returns predictions."""
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
def backward(self, X, y, y_pred) -> Dict[str, np.ndarray]:
|
|
176
|
+
"""Backward pass, returns gradients."""
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
def update_weights(self, gradients: Dict[str, np.ndarray]):
|
|
180
|
+
"""Update weights using gradients."""
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
184
|
+
"""Make predictions (0/1 for classification)."""
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
def predict_proba(self, X: np.ndarray) -> np.ndarray:
|
|
188
|
+
"""Return probability predictions."""
|
|
189
|
+
pass
|
|
190
|
+
|
|
191
|
+
def copy(self) -> 'YourModel':
|
|
192
|
+
"""Return a deep copy of the model."""
|
|
193
|
+
pass
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
<!-- ## 📄 Citation
|
|
197
|
+
|
|
198
|
+
If you use ARENA Score in your research, please cite:
|
|
199
|
+
|
|
200
|
+
```bibtex
|
|
201
|
+
@article{arena_score2026,
|
|
202
|
+
title={ARENA Score: Adaptive Review and Evaluation using Novel Aggregation Score for Federated Learning},
|
|
203
|
+
author={Mehta, Ronit},
|
|
204
|
+
year={2026}
|
|
205
|
+
}
|
|
206
|
+
``` -->
|
|
207
|
+
|
|
208
|
+
## 📜 License
|
|
209
|
+
|
|
210
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
arena_score/__init__.py,sha256=Cyk7ffH8TPvT0qDSPd7VJfAcNEWy7iQrxe72DsRlokA,1269
|
|
2
|
+
arena_score/aggregator.py,sha256=Umrfx72Fi0QnmuiEQc5KGoB2qA3A9pbEnupBrQ8qNX4,6601
|
|
3
|
+
arena_score/client.py,sha256=fbBkOvpTrr-rYsO4nbpgawpTKS8ba7p_QfO0E94upJ4,8801
|
|
4
|
+
arena_score/score.py,sha256=PbHNhSjFfyrpACTGqvQu3JYLnVyUt4aC834YnQGLAAI,5115
|
|
5
|
+
arena_score/server.py,sha256=wg_lehQasXxAcO-6basXTANwnEcIRlGCnP60sid73iA,9876
|
|
6
|
+
arena_score/utils.py,sha256=44My0OioHsyGDI_JzDQWFTamrnav5uEBVPR2b8xh1BE,5766
|
|
7
|
+
arena_score-1.0.0.dist-info/METADATA,sha256=Eq1gkaq6nlQ7A8EM3Q8ZpJJgEEMY-fUY9qF1LDE0WrA,6609
|
|
8
|
+
arena_score-1.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
9
|
+
arena_score-1.0.0.dist-info/licenses/LICENSE,sha256=vxLqz6mA4Swb4VY17lg4XEpWmiwkUqeXTmlcIqdF92Q,1089
|
|
10
|
+
arena_score-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ronit Mehta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|