adv-lib 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adv_lib/__init__.py +1 -0
- adv_lib/attacks/__init__.py +13 -0
- adv_lib/attacks/augmented_lagrangian.py +243 -0
- adv_lib/attacks/auto_pgd.py +523 -0
- adv_lib/attacks/boundary_projection_tf.py +170 -0
- adv_lib/attacks/carlini_wagner/__init__.py +2 -0
- adv_lib/attacks/carlini_wagner/l2.py +151 -0
- adv_lib/attacks/carlini_wagner/linf.py +158 -0
- adv_lib/attacks/decoupled_direction_norm.py +113 -0
- adv_lib/attacks/fast_adaptive_boundary/__init__.py +1 -0
- adv_lib/attacks/fast_adaptive_boundary/fast_adaptive_boundary.py +215 -0
- adv_lib/attacks/fast_adaptive_boundary/projections.py +164 -0
- adv_lib/attacks/fast_minimum_norm.py +218 -0
- adv_lib/attacks/perceptual_color_attacks/__init__.py +1 -0
- adv_lib/attacks/perceptual_color_attacks/differential_color_functions.py +181 -0
- adv_lib/attacks/perceptual_color_attacks/perceptual_color_distance_al.py +128 -0
- adv_lib/attacks/primal_dual_gradient_descent.py +379 -0
- adv_lib/attacks/projected_gradient_descent.py +109 -0
- adv_lib/attacks/segmentation/__init__.py +4 -0
- adv_lib/attacks/segmentation/alma_prox.py +283 -0
- adv_lib/attacks/segmentation/asma.py +92 -0
- adv_lib/attacks/segmentation/dense_adversary.py +83 -0
- adv_lib/attacks/segmentation/primal_dual_gradient_descent.py +349 -0
- adv_lib/attacks/self_adaptive_norm_update.py +127 -0
- adv_lib/attacks/sigma_zero.py +119 -0
- adv_lib/attacks/stochastic_sparse_attacks.py +237 -0
- adv_lib/attacks/structured_adversarial_attack.py +289 -0
- adv_lib/attacks/trust_region.py +153 -0
- adv_lib/distances/__init__.py +0 -0
- adv_lib/distances/color_difference.py +212 -0
- adv_lib/distances/lp_norms.py +18 -0
- adv_lib/distances/lpips.py +99 -0
- adv_lib/distances/structural_similarity.py +147 -0
- adv_lib/utils/__init__.py +1 -0
- adv_lib/utils/attack_utils.py +226 -0
- adv_lib/utils/color_conversions.py +71 -0
- adv_lib/utils/image_selection.py +27 -0
- adv_lib/utils/lagrangian_penalties/__init__.py +1 -0
- adv_lib/utils/lagrangian_penalties/all_penalties.py +67 -0
- adv_lib/utils/lagrangian_penalties/penalty_functions.py +79 -0
- adv_lib/utils/lagrangian_penalties/scripts/plot_penalties.py +42 -0
- adv_lib/utils/lagrangian_penalties/scripts/plot_univariates.py +32 -0
- adv_lib/utils/lagrangian_penalties/univariate_functions.py +299 -0
- adv_lib/utils/losses.py +29 -0
- adv_lib/utils/projections.py +100 -0
- adv_lib/utils/utils.py +58 -0
- adv_lib/utils/visdom_logger.py +109 -0
- adv_lib-0.2.2.dist-info/LICENSE +29 -0
- adv_lib-0.2.2.dist-info/METADATA +170 -0
- adv_lib-0.2.2.dist-info/RECORD +52 -0
- adv_lib-0.2.2.dist-info/WHEEL +5 -0
- adv_lib-0.2.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
from torch import Tensor, nn
|
|
7
|
+
from torch.autograd import grad
|
|
8
|
+
from torch.nn import functional as F
|
|
9
|
+
|
|
10
|
+
from adv_lib.attacks.primal_dual_gradient_descent import l0_proximal_, l1_proximal, l23_proximal, l2_proximal_, \
|
|
11
|
+
linf_proximal_
|
|
12
|
+
from adv_lib.distances.lp_norms import l0_distances, l1_distances, l2_distances, linf_distances
|
|
13
|
+
from adv_lib.utils.losses import difference_of_logits
|
|
14
|
+
from adv_lib.utils.visdom_logger import VisdomLogger
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def softmax_plus_one(tensor: torch.Tensor) -> torch.Tensor:
|
|
18
|
+
zero_pad = F.pad(tensor.flatten(1), pad=(1, 0), mode='constant', value=0)
|
|
19
|
+
return zero_pad.softmax(dim=1)[:, 1:].view_as(tensor)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def pdgd(model: nn.Module,
|
|
23
|
+
inputs: Tensor,
|
|
24
|
+
labels: Tensor,
|
|
25
|
+
masks: Tensor = None,
|
|
26
|
+
targeted: bool = False,
|
|
27
|
+
adv_threshold: float = 0.99,
|
|
28
|
+
num_steps: int = 500,
|
|
29
|
+
random_init: float = 0,
|
|
30
|
+
primal_lr: float = 0.1,
|
|
31
|
+
primal_lr_decrease: float = 0.01,
|
|
32
|
+
dual_ratio_init: float = 1,
|
|
33
|
+
dual_lr: float = 0.1,
|
|
34
|
+
dual_lr_decrease: float = 0.1,
|
|
35
|
+
dual_ema: float = 0.9,
|
|
36
|
+
dual_min_ratio: float = 1e-6,
|
|
37
|
+
constraint_masking: bool = False,
|
|
38
|
+
mask_decay: bool = False,
|
|
39
|
+
callback: Optional[VisdomLogger] = None) -> Tensor:
|
|
40
|
+
"""Primal-Dual Gradient Descent (PDGD) attack from https://arxiv.org/abs/2106.01538 adapted to semantic
|
|
41
|
+
segmentation. This version is only suitable for the L2-norm."""
|
|
42
|
+
attack_name = 'PDGD L2'
|
|
43
|
+
device = inputs.device
|
|
44
|
+
batch_size = len(inputs)
|
|
45
|
+
batch_view = lambda tensor: tensor.view(batch_size, *[1] * (inputs.ndim - 1))
|
|
46
|
+
multiplier = -1 if targeted else 1
|
|
47
|
+
log_min_dual_ratio = math.log(dual_min_ratio)
|
|
48
|
+
|
|
49
|
+
# Setup variables
|
|
50
|
+
r = torch.zeros_like(inputs, requires_grad=True)
|
|
51
|
+
if random_init:
|
|
52
|
+
nn.init.uniform_(r, -random_init, random_init)
|
|
53
|
+
r.data.add_(inputs).clamp_(min=0, max=1).sub_(inputs)
|
|
54
|
+
|
|
55
|
+
# Adam variables
|
|
56
|
+
exp_avg = torch.zeros_like(inputs)
|
|
57
|
+
exp_avg_sq = torch.zeros_like(inputs)
|
|
58
|
+
β_1, β_2 = 0.9, 0.999
|
|
59
|
+
|
|
60
|
+
# dual variables
|
|
61
|
+
λ = torch.zeros_like(labels, dtype=torch.double)
|
|
62
|
+
|
|
63
|
+
# Init trackers
|
|
64
|
+
best_l2 = torch.full((batch_size,), float('inf'), device=device)
|
|
65
|
+
best_adv = inputs.clone()
|
|
66
|
+
adv_found = torch.zeros_like(best_l2, dtype=torch.bool)
|
|
67
|
+
best_adv_percent = torch.zeros_like(best_l2)
|
|
68
|
+
|
|
69
|
+
for i in range(num_steps):
|
|
70
|
+
|
|
71
|
+
adv_inputs = inputs + r
|
|
72
|
+
logits = model(adv_inputs)
|
|
73
|
+
l2 = r.flatten(1).norm(p=2, dim=1)
|
|
74
|
+
|
|
75
|
+
if i == 0:
|
|
76
|
+
num_classes = logits.size(1)
|
|
77
|
+
if masks is None:
|
|
78
|
+
masks = labels < num_classes
|
|
79
|
+
masks_sum = masks.flatten(1).sum(dim=1)
|
|
80
|
+
labels_ = labels.clone()
|
|
81
|
+
labels_[~masks] = 0
|
|
82
|
+
|
|
83
|
+
masks_inf = torch.zeros_like(masks, dtype=torch.float).masked_fill_(~masks, float('inf'))
|
|
84
|
+
labels_infhot = torch.zeros_like(logits.detach()).scatter(1, labels_.unsqueeze(1), float('inf'))
|
|
85
|
+
dl_func = partial(difference_of_logits, labels=labels_, labels_infhot=labels_infhot)
|
|
86
|
+
|
|
87
|
+
# init dual variables with masks
|
|
88
|
+
λ.add_(masks_sum.float().mul_(dual_ratio_init).log_().neg_().view(-1, 1, 1))
|
|
89
|
+
λ[~masks] = -float('inf')
|
|
90
|
+
λ_ema = softmax_plus_one(λ)
|
|
91
|
+
|
|
92
|
+
# init constraint masking
|
|
93
|
+
k = ((1 - adv_threshold) * masks_sum).long() # number of constraints that can be violated
|
|
94
|
+
constraint_mask = masks
|
|
95
|
+
constraint_inf_mask = torch.zeros_like(constraint_mask, dtype=torch.float)
|
|
96
|
+
constraint_inf_mask.masked_fill_(~constraint_mask, float('inf'))
|
|
97
|
+
|
|
98
|
+
# track progress
|
|
99
|
+
pred = logits.argmax(dim=1)
|
|
100
|
+
pixel_is_adv = (pred == labels) if targeted else (pred != labels)
|
|
101
|
+
adv_percent = (pixel_is_adv & masks).flatten(1).sum(dim=1) / masks_sum
|
|
102
|
+
is_adv = adv_percent >= adv_threshold
|
|
103
|
+
is_smaller = l2 <= best_l2
|
|
104
|
+
improves_constraints = adv_percent >= best_adv_percent.clamp_max(adv_threshold)
|
|
105
|
+
is_better_adv = (is_smaller & is_adv) | (~adv_found & improves_constraints)
|
|
106
|
+
adv_found.logical_or_(is_adv)
|
|
107
|
+
best_l2 = torch.where(is_better_adv, l2.detach(), best_l2)
|
|
108
|
+
best_adv_percent = torch.where(is_better_adv, adv_percent, best_adv_percent)
|
|
109
|
+
best_adv = torch.where(batch_view(is_better_adv), adv_inputs.detach(), best_adv)
|
|
110
|
+
|
|
111
|
+
m_y = multiplier * dl_func(logits)
|
|
112
|
+
|
|
113
|
+
if constraint_masking:
|
|
114
|
+
if mask_decay:
|
|
115
|
+
k = ((1 - adv_threshold) * masks_sum).mul_(i / (num_steps - 1)).long()
|
|
116
|
+
if k.any():
|
|
117
|
+
top_constraints = m_y.detach().sub(masks_inf).flatten(1).topk(k=k.max()).values
|
|
118
|
+
ξ = top_constraints.gather(1, k.unsqueeze(1) - 1).squeeze(1)
|
|
119
|
+
constraint_mask = masks & (m_y <= ξ.view(-1, 1, 1))
|
|
120
|
+
constraint_inf_mask.fill_(0).masked_fill_(~constraint_mask, float('inf'))
|
|
121
|
+
|
|
122
|
+
if i:
|
|
123
|
+
λ_ema.lerp_(softmax_plus_one(λ - constraint_inf_mask), weight=1 - dual_ema)
|
|
124
|
+
λ_ema_masked = λ_ema * constraint_mask
|
|
125
|
+
λ_1 = 1 - λ_ema_masked.flatten(1).sum(dim=1)
|
|
126
|
+
|
|
127
|
+
L_r = λ_1 * l2 + F.softplus(m_y).mul(λ_ema_masked).flatten(1).sum(dim=1)
|
|
128
|
+
|
|
129
|
+
grad_r = grad(L_r.sum(), inputs=r, only_inputs=True)[0]
|
|
130
|
+
grad_λ = m_y.detach().sign().mul_(masks)
|
|
131
|
+
|
|
132
|
+
# Adam algorithm
|
|
133
|
+
exp_avg.lerp_(grad_r, weight=1 - β_1)
|
|
134
|
+
exp_avg_sq.mul_(β_2).addcmul_(grad_r, grad_r, value=1 - β_2)
|
|
135
|
+
bias_correction1 = 1 - β_1 ** (i + 1)
|
|
136
|
+
bias_correction2 = 1 - β_2 ** (i + 1)
|
|
137
|
+
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(1e-8)
|
|
138
|
+
# primal step size exponential decay
|
|
139
|
+
step_size = primal_lr * primal_lr_decrease ** (i / num_steps)
|
|
140
|
+
# gradient descent on primal variables
|
|
141
|
+
r.data.addcdiv_(exp_avg, denom, value=-step_size / bias_correction1)
|
|
142
|
+
|
|
143
|
+
# projection on feasible set
|
|
144
|
+
r.data.add_(inputs).clamp_(min=0, max=1).sub_(inputs)
|
|
145
|
+
|
|
146
|
+
# gradient ascent on dual variables and exponential moving average
|
|
147
|
+
θ_λ = dual_lr * ((num_steps - 1 - i) / (num_steps - 1) * (1 - dual_lr_decrease) + dual_lr_decrease)
|
|
148
|
+
λ.add_(grad_λ, alpha=θ_λ).clamp_(min=log_min_dual_ratio, max=-log_min_dual_ratio)
|
|
149
|
+
λ[~masks] = -float('inf')
|
|
150
|
+
|
|
151
|
+
if callback is not None:
|
|
152
|
+
callback.accumulate_line('m_y', i, m_y.mean(), title=f'{attack_name} - Logit difference')
|
|
153
|
+
callback.accumulate_line('1 - sum(λ)', i, λ_1.mean(), title=f'{attack_name} - Dual variables')
|
|
154
|
+
callback.accumulate_line(['θ_r', 'θ_λ'], i, [step_size, θ_λ], title=f'{attack_name} - Learning rates')
|
|
155
|
+
callback.accumulate_line(['l2', 'best_l2'], i, [l2.mean(), best_l2.mean()],
|
|
156
|
+
title=f'{attack_name} - L2 norms')
|
|
157
|
+
callback.accumulate_line(['adv%', 'best_adv%'], i, [adv_percent.mean(), best_adv_percent.mean()],
|
|
158
|
+
title=f'{attack_name} - APSRs')
|
|
159
|
+
|
|
160
|
+
if (i + 1) % (num_steps // 20) == 0 or (i + 1) == num_steps:
|
|
161
|
+
callback.update_lines()
|
|
162
|
+
|
|
163
|
+
return best_adv
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def pdpgd(model: nn.Module,
|
|
167
|
+
inputs: Tensor,
|
|
168
|
+
labels: Tensor,
|
|
169
|
+
norm: float,
|
|
170
|
+
masks: Tensor = None,
|
|
171
|
+
targeted: bool = False,
|
|
172
|
+
adv_threshold: float = 0.99,
|
|
173
|
+
num_steps: int = 500,
|
|
174
|
+
random_init: float = 0,
|
|
175
|
+
proximal_operator: Optional[float] = None,
|
|
176
|
+
primal_lr: float = 0.1,
|
|
177
|
+
primal_lr_decrease: float = 0.01,
|
|
178
|
+
dual_ratio_init: float = 1,
|
|
179
|
+
dual_lr: float = 0.1,
|
|
180
|
+
dual_lr_decrease: float = 0.1,
|
|
181
|
+
dual_ema: float = 0.9,
|
|
182
|
+
dual_min_ratio: float = 1e-12,
|
|
183
|
+
proximal_steps: int = 5,
|
|
184
|
+
ε_threshold: float = 1e-2,
|
|
185
|
+
constraint_masking: bool = False,
|
|
186
|
+
mask_decay: bool = False,
|
|
187
|
+
callback: Optional[VisdomLogger] = None) -> Tensor:
|
|
188
|
+
"""Primal-Dual Proximal Gradient Descent (PDPGD) attacks from https://arxiv.org/abs/2106.01538 adapted to semantic
|
|
189
|
+
segmentation."""
|
|
190
|
+
attack_name = f'PDPGD L{norm}'
|
|
191
|
+
_distance = {
|
|
192
|
+
0: l0_distances,
|
|
193
|
+
1: l1_distances,
|
|
194
|
+
2: l2_distances,
|
|
195
|
+
float('inf'): linf_distances,
|
|
196
|
+
}
|
|
197
|
+
_proximal_operator = {
|
|
198
|
+
0: l0_proximal_,
|
|
199
|
+
1: l1_proximal,
|
|
200
|
+
2: l2_proximal_,
|
|
201
|
+
float('inf'): linf_proximal_,
|
|
202
|
+
23: l23_proximal,
|
|
203
|
+
}
|
|
204
|
+
device = inputs.device
|
|
205
|
+
batch_size = len(inputs)
|
|
206
|
+
batch_view = lambda tensor: tensor.view(batch_size, *[1] * (inputs.ndim - 1))
|
|
207
|
+
multiplier = -1 if targeted else 1
|
|
208
|
+
distance = _distance[norm]
|
|
209
|
+
proximity_operator = _proximal_operator[norm if proximal_operator is None else proximal_operator]
|
|
210
|
+
log_min_dual_ratio = math.log(dual_min_ratio)
|
|
211
|
+
|
|
212
|
+
# Setup variables
|
|
213
|
+
r = torch.zeros_like(inputs, requires_grad=True)
|
|
214
|
+
if random_init:
|
|
215
|
+
nn.init.uniform_(r, -random_init, random_init)
|
|
216
|
+
r.data.add_(inputs).clamp_(min=0, max=1).sub_(inputs)
|
|
217
|
+
|
|
218
|
+
# Adam variables
|
|
219
|
+
exp_avg = torch.zeros_like(inputs)
|
|
220
|
+
exp_avg_sq = torch.zeros_like(inputs)
|
|
221
|
+
β_1, β_2 = 0.9, 0.999
|
|
222
|
+
|
|
223
|
+
# dual variables
|
|
224
|
+
λ = torch.zeros_like(labels, dtype=torch.double)
|
|
225
|
+
|
|
226
|
+
# Init trackers
|
|
227
|
+
best_dist = torch.full((batch_size,), float('inf'), device=device)
|
|
228
|
+
best_adv = inputs.clone()
|
|
229
|
+
adv_found = torch.zeros_like(best_dist, dtype=torch.bool)
|
|
230
|
+
best_adv_percent = torch.zeros_like(best_dist)
|
|
231
|
+
|
|
232
|
+
for i in range(num_steps):
|
|
233
|
+
|
|
234
|
+
adv_inputs = inputs + r
|
|
235
|
+
logits = model(adv_inputs)
|
|
236
|
+
dist = distance(adv_inputs.detach(), inputs)
|
|
237
|
+
|
|
238
|
+
if i == 0:
|
|
239
|
+
num_classes = logits.size(1)
|
|
240
|
+
if masks is None:
|
|
241
|
+
masks = labels < num_classes
|
|
242
|
+
masks_sum = masks.flatten(1).sum(dim=1)
|
|
243
|
+
labels_ = labels.clone()
|
|
244
|
+
labels_[~masks] = 0
|
|
245
|
+
|
|
246
|
+
masks_inf = torch.zeros_like(masks, dtype=torch.float).masked_fill_(~masks, float('inf'))
|
|
247
|
+
labels_infhot = torch.zeros_like(logits.detach()).scatter(1, labels_.unsqueeze(1), float('inf'))
|
|
248
|
+
dl_func = partial(difference_of_logits, labels=labels_, labels_infhot=labels_infhot)
|
|
249
|
+
|
|
250
|
+
# init dual variables with masks
|
|
251
|
+
λ.add_(masks_sum.float().mul_(dual_ratio_init).log_().neg_().view(-1, 1, 1))
|
|
252
|
+
λ[~masks] = -float('inf')
|
|
253
|
+
λ_ema = softmax_plus_one(λ)
|
|
254
|
+
|
|
255
|
+
# init constraint masking
|
|
256
|
+
k = ((1 - adv_threshold) * masks_sum).long() # number of constraints that can be violated
|
|
257
|
+
constraint_mask = masks
|
|
258
|
+
constraint_inf_mask = torch.zeros_like(constraint_mask, dtype=torch.float)
|
|
259
|
+
constraint_inf_mask.masked_fill_(~constraint_mask, float('inf'))
|
|
260
|
+
|
|
261
|
+
# track progress
|
|
262
|
+
pred = logits.argmax(dim=1)
|
|
263
|
+
pixel_is_adv = (pred == labels) if targeted else (pred != labels)
|
|
264
|
+
adv_percent = (pixel_is_adv & masks).flatten(1).sum(dim=1) / masks_sum
|
|
265
|
+
is_adv = adv_percent >= adv_threshold
|
|
266
|
+
is_smaller = dist <= best_dist
|
|
267
|
+
improves_constraints = adv_percent >= best_adv_percent.clamp_max(adv_threshold)
|
|
268
|
+
is_better_adv = (is_smaller & is_adv) | (~adv_found & improves_constraints)
|
|
269
|
+
adv_found.logical_or_(is_adv)
|
|
270
|
+
best_dist = torch.where(is_better_adv, dist.detach(), best_dist)
|
|
271
|
+
best_adv_percent = torch.where(is_better_adv, adv_percent, best_adv_percent)
|
|
272
|
+
best_adv = torch.where(batch_view(is_better_adv), adv_inputs.detach(), best_adv)
|
|
273
|
+
|
|
274
|
+
m_y = multiplier * dl_func(logits)
|
|
275
|
+
|
|
276
|
+
if constraint_masking:
|
|
277
|
+
if mask_decay:
|
|
278
|
+
k = ((1 - adv_threshold) * masks_sum).mul_(i / (num_steps - 1)).long()
|
|
279
|
+
if k.any():
|
|
280
|
+
top_constraints = m_y.detach().sub(masks_inf).flatten(1).topk(k=k.max()).values
|
|
281
|
+
ξ = top_constraints.gather(1, k.unsqueeze(1) - 1).squeeze(1)
|
|
282
|
+
constraint_mask = masks & (m_y <= ξ.view(-1, 1, 1))
|
|
283
|
+
constraint_inf_mask.fill_(0).masked_fill_(~constraint_mask, float('inf'))
|
|
284
|
+
|
|
285
|
+
if i:
|
|
286
|
+
λ_ema.lerp_(softmax_plus_one(λ - constraint_inf_mask), weight=1 - dual_ema)
|
|
287
|
+
λ_ema_masked = λ_ema * constraint_mask
|
|
288
|
+
|
|
289
|
+
cls_loss = F.softplus(m_y).mul(λ_ema_masked).flatten(1).sum(dim=1)
|
|
290
|
+
|
|
291
|
+
grad_r = grad(cls_loss.sum(), inputs=r, only_inputs=True)[0]
|
|
292
|
+
grad_λ = m_y.detach().sign().mul_(constraint_mask)
|
|
293
|
+
|
|
294
|
+
# Adam algorithm
|
|
295
|
+
exp_avg.lerp_(grad_r, weight=1 - β_1)
|
|
296
|
+
exp_avg_sq.mul_(β_2).addcmul_(grad_r, grad_r, value=1 - β_2)
|
|
297
|
+
bias_correction1 = 1 - β_1 ** (i + 1)
|
|
298
|
+
bias_correction2 = 1 - β_2 ** (i + 1)
|
|
299
|
+
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(1e-8)
|
|
300
|
+
# primal step size exponential decay
|
|
301
|
+
step_size = primal_lr * primal_lr_decrease ** (i / num_steps)
|
|
302
|
+
# gradient descent on primal variables
|
|
303
|
+
r.data.addcdiv_(exp_avg, denom, value=-step_size / bias_correction1)
|
|
304
|
+
|
|
305
|
+
# projection on feasible set
|
|
306
|
+
r.data.add_(inputs).clamp_(min=0, max=1).sub_(inputs)
|
|
307
|
+
|
|
308
|
+
# proximal adam https://arxiv.org/abs/1910.10094
|
|
309
|
+
ψ_max = denom.flatten(1).amax(dim=1)
|
|
310
|
+
effective_lr = step_size / ψ_max
|
|
311
|
+
|
|
312
|
+
# proximal sub-iterations variables
|
|
313
|
+
z_curr = r.detach()
|
|
314
|
+
ε = torch.ones_like(best_dist)
|
|
315
|
+
λ_sum = λ_ema_masked.flatten(1).sum(dim=1)
|
|
316
|
+
μ = ((1 - λ_sum) / λ_sum).to(dtype=torch.float).mul_(effective_lr)
|
|
317
|
+
H_div = denom / batch_view(ψ_max)
|
|
318
|
+
for _ in range(proximal_steps):
|
|
319
|
+
z_prev = z_curr
|
|
320
|
+
|
|
321
|
+
z_new = proximity_operator(z_curr.addcmul(H_div, z_curr - r.detach(), value=-1), batch_view(μ))
|
|
322
|
+
z_new.add_(inputs).clamp_(min=0, max=1).sub_(inputs)
|
|
323
|
+
|
|
324
|
+
z_curr = torch.where(batch_view(ε > ε_threshold), z_new, z_prev)
|
|
325
|
+
ε = torch.norm((z_curr - z_prev).flatten(1), p=2, dim=1, out=ε).div_(z_curr.flatten(1).norm(p=2, dim=1))
|
|
326
|
+
|
|
327
|
+
if (ε < ε_threshold).all():
|
|
328
|
+
break
|
|
329
|
+
|
|
330
|
+
r.data = z_curr
|
|
331
|
+
|
|
332
|
+
# gradient ascent on dual variables and exponential moving average
|
|
333
|
+
θ_λ = dual_lr * ((num_steps - 1 - i) / (num_steps - 1) * (1 - dual_lr_decrease) + dual_lr_decrease)
|
|
334
|
+
λ.add_(grad_λ, alpha=θ_λ).clamp_(min=log_min_dual_ratio, max=-log_min_dual_ratio)
|
|
335
|
+
λ[~masks] = -float('inf')
|
|
336
|
+
|
|
337
|
+
if callback is not None:
|
|
338
|
+
callback.accumulate_line('m_y', i, m_y.mean(), title=f'{attack_name} - Logit difference')
|
|
339
|
+
callback.accumulate_line('1 - sum(λ)', i, (1 - λ_sum).mean(), title=f'{attack_name} - Dual variables')
|
|
340
|
+
callback.accumulate_line(['θ_r', 'θ_λ'], i, [step_size, θ_λ], title=f'{attack_name} - Learning rates')
|
|
341
|
+
callback.accumulate_line([f'l{norm}', f'best_l{norm}'], i, [dist.mean(), best_dist.mean()],
|
|
342
|
+
title=f'{attack_name} - L{norm} norms')
|
|
343
|
+
callback.accumulate_line(['adv%', 'best_adv%'], i, [adv_percent.mean(), best_adv_percent.mean()],
|
|
344
|
+
title=f'{attack_name} - APSRs')
|
|
345
|
+
|
|
346
|
+
if (i + 1) % (num_steps // 20) == 0 or (i + 1) == num_steps:
|
|
347
|
+
callback.update_lines()
|
|
348
|
+
|
|
349
|
+
return best_adv
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
from torch import nn, Tensor
|
|
6
|
+
from torch.autograd import grad
|
|
7
|
+
from torch.nn import functional as F
|
|
8
|
+
|
|
9
|
+
from adv_lib.utils.visdom_logger import VisdomLogger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def anu(model: nn.Module,
|
|
13
|
+
inputs: Tensor,
|
|
14
|
+
labels: Tensor,
|
|
15
|
+
targeted: bool = False,
|
|
16
|
+
steps: int = 100,
|
|
17
|
+
γ_init: float = 0.05,
|
|
18
|
+
α_γ: float = 0.05,
|
|
19
|
+
init_norm: float = 1.,
|
|
20
|
+
levels: Optional[int] = 256,
|
|
21
|
+
callback: Optional[VisdomLogger] = None) -> Tensor:
|
|
22
|
+
"""
|
|
23
|
+
Self Adaptive Norm Update (ANU) attack from https://www.scitepress.org/PublishedPapers/2021/101861/101861.pdf.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
model : nn.Module
|
|
28
|
+
Model to attack.
|
|
29
|
+
inputs : Tensor
|
|
30
|
+
Inputs to attack. Should be in [0, 1].
|
|
31
|
+
labels : Tensor
|
|
32
|
+
Labels corresponding to the inputs if untargeted, else target labels.
|
|
33
|
+
targeted : bool
|
|
34
|
+
Whether to perform a targeted attack or not.
|
|
35
|
+
steps : int
|
|
36
|
+
Number of optimization steps.
|
|
37
|
+
γ : float
|
|
38
|
+
Factor by which the norm will be modified. new_norm = norm * (1 + or - γ).
|
|
39
|
+
init_norm : float
|
|
40
|
+
Initial value for the norm of the attack.
|
|
41
|
+
levels : int
|
|
42
|
+
If not None, the returned adversarials will have quantized values to the specified number of levels.
|
|
43
|
+
|
|
44
|
+
adaptive : bool
|
|
45
|
+
If True, adapt the value of γ based on the last two iterations as in "Self-adaptive Norm Update for Faster
|
|
46
|
+
Gradient-based L2 Adversarial Attacks and Defenses" (https://www.scitepress.org/Papers/2021/101861/).
|
|
47
|
+
α_γ : float
|
|
48
|
+
Factor by which γ will be modified: new_γ = γ * (1 + or - α_γ). An addition clipping has been added (not
|
|
49
|
+
mentioned in the original paper) to ensure that 1 - γ remains strictly positive.
|
|
50
|
+
|
|
51
|
+
callback : Optional
|
|
52
|
+
|
|
53
|
+
Returns
|
|
54
|
+
-------
|
|
55
|
+
adv_inputs : Tensor
|
|
56
|
+
Modified inputs to be adversarial to the model.
|
|
57
|
+
|
|
58
|
+
"""
|
|
59
|
+
if inputs.min() < 0 or inputs.max() > 1: raise ValueError('Input values should be in the [0, 1] range.')
|
|
60
|
+
device = inputs.device
|
|
61
|
+
batch_size = len(inputs)
|
|
62
|
+
batch_view = lambda tensor: tensor.view(batch_size, *[1] * (inputs.ndim - 1))
|
|
63
|
+
|
|
64
|
+
# Init variables
|
|
65
|
+
multiplier = -1 if targeted else 1
|
|
66
|
+
δ = torch.zeros_like(inputs, requires_grad=True)
|
|
67
|
+
γ = torch.full((batch_size,), γ_init, device=device, dtype=torch.float)
|
|
68
|
+
ε = torch.full_like(γ, init_norm)
|
|
69
|
+
worst_norm = torch.max(inputs, 1 - inputs).flatten(1).norm(p=2, dim=1)
|
|
70
|
+
|
|
71
|
+
# Init trackers
|
|
72
|
+
best_l2 = worst_norm.clone()
|
|
73
|
+
best_δ = torch.zeros_like(inputs)
|
|
74
|
+
adv_found = torch.zeros(batch_size, dtype=torch.bool, device=device)
|
|
75
|
+
is_adv_prev = adv_found.clone()
|
|
76
|
+
|
|
77
|
+
for i in range(steps):
|
|
78
|
+
α = torch.tensor(0.01 + (1 - 0.01) * (1 + math.cos(math.pi * i / steps)) / 2, device=device)
|
|
79
|
+
|
|
80
|
+
l2 = δ.data.flatten(1).norm(p=2, dim=1)
|
|
81
|
+
adv_inputs = inputs + δ
|
|
82
|
+
logits = model(adv_inputs)
|
|
83
|
+
pred_labels = logits.argmax(1)
|
|
84
|
+
ce_loss = F.cross_entropy(logits, labels, reduction='none')
|
|
85
|
+
loss = multiplier * ce_loss
|
|
86
|
+
|
|
87
|
+
is_adv = (pred_labels == labels) if targeted else (pred_labels != labels)
|
|
88
|
+
is_smaller = l2 < best_l2
|
|
89
|
+
is_both = is_adv & is_smaller
|
|
90
|
+
adv_found.logical_or_(is_adv)
|
|
91
|
+
best_l2 = torch.where(is_both, l2, best_l2)
|
|
92
|
+
best_δ = torch.where(batch_view(is_both), δ.detach(), best_δ)
|
|
93
|
+
|
|
94
|
+
δ_grad = grad(loss.sum(), δ, only_inputs=True)[0]
|
|
95
|
+
# renorming gradient
|
|
96
|
+
grad_norms = δ_grad.flatten(1).norm(p=2, dim=1)
|
|
97
|
+
δ_grad.div_(batch_view(grad_norms))
|
|
98
|
+
# avoid nan or inf if gradient is 0
|
|
99
|
+
if (zero_grad := (grad_norms < 1e-12)).any():
|
|
100
|
+
δ_grad[zero_grad] = torch.randn_like(δ_grad[zero_grad])
|
|
101
|
+
|
|
102
|
+
if callback is not None:
|
|
103
|
+
cosine = F.cosine_similarity(δ_grad.flatten(1), δ.data.flatten(1), dim=1).mean()
|
|
104
|
+
callback.accumulate_line('ce', i, ce_loss.mean())
|
|
105
|
+
callback_best = best_l2.masked_select(adv_found).mean()
|
|
106
|
+
callback.accumulate_line(['ε', 'γ', 'l2', 'best_l2'], i, [ε.mean(), γ.mean(), l2.mean(), callback_best])
|
|
107
|
+
callback.accumulate_line(['cosine', 'α', 'success'], i, [cosine, α, adv_found.float().mean()])
|
|
108
|
+
|
|
109
|
+
if (i + 1) % (steps // 20) == 0 or (i + 1) == steps:
|
|
110
|
+
callback.update_lines()
|
|
111
|
+
|
|
112
|
+
# gradient step
|
|
113
|
+
δ.data.add_(δ_grad, alpha=α)
|
|
114
|
+
|
|
115
|
+
γ = torch.where(is_adv == is_adv_prev, (1 + α_γ) * γ, (1 - α_γ) * γ).clamp_(min=0, max=1)
|
|
116
|
+
ε = torch.where(is_adv, (1 - γ) * ε, (1 + γ) * ε)
|
|
117
|
+
ε = torch.minimum(ε, worst_norm)
|
|
118
|
+
|
|
119
|
+
δ.data.mul_(batch_view(ε / δ.data.flatten(1).norm(p=2, dim=1)))
|
|
120
|
+
δ.data.add_(inputs).clamp_(0, 1)
|
|
121
|
+
if levels is not None:
|
|
122
|
+
δ.data.mul_(levels - 1).round_().div_(levels - 1)
|
|
123
|
+
δ.data.sub_(inputs)
|
|
124
|
+
|
|
125
|
+
is_adv_prev = is_adv
|
|
126
|
+
|
|
127
|
+
return inputs + best_δ
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Adapted from https://github.com/Cinofix/sigma-zero-adversarial-attack
|
|
2
|
+
import math
|
|
3
|
+
import warnings
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
from torch import Tensor, nn
|
|
7
|
+
from torch.autograd import grad
|
|
8
|
+
|
|
9
|
+
from adv_lib.utils.losses import difference_of_logits
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def sigma_zero(model: nn.Module,
|
|
13
|
+
inputs: Tensor,
|
|
14
|
+
labels: Tensor,
|
|
15
|
+
num_steps: int = 1000,
|
|
16
|
+
η_0: float = 1.0,
|
|
17
|
+
σ: float = 0.001,
|
|
18
|
+
τ_0: float = 0.3,
|
|
19
|
+
τ_factor: float = 0.01,
|
|
20
|
+
grad_norm: float = float('inf'),
|
|
21
|
+
targeted: bool = False) -> Tensor:
|
|
22
|
+
"""
|
|
23
|
+
σ-zero attack from https://arxiv.org/abs/2402.01879.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
model : nn.Module
|
|
28
|
+
Model to attack.
|
|
29
|
+
inputs : Tensor
|
|
30
|
+
Inputs to attack. Should be in [0, 1].
|
|
31
|
+
labels : Tensor
|
|
32
|
+
Labels corresponding to the inputs if untargeted, else target labels.
|
|
33
|
+
num_steps : int
|
|
34
|
+
Number of optimization steps. Corresponds to the number of forward and backward propagations.
|
|
35
|
+
η_0 : float
|
|
36
|
+
Initial step size.
|
|
37
|
+
σ : float
|
|
38
|
+
\ell_0 approximation parameter: smaller values produce sharper approximations while larger values produce a
|
|
39
|
+
smoother approximation.
|
|
40
|
+
τ_0 : float
|
|
41
|
+
Initial sparsity threshold.
|
|
42
|
+
τ_factor : float
|
|
43
|
+
Threshold adjustment factor w.r.t. step size η.
|
|
44
|
+
grad_norm: float
|
|
45
|
+
Norm to use for gradient normalization.
|
|
46
|
+
targeted : bool
|
|
47
|
+
Attack is untargeted only: will raise a warning and return inputs if targeted is True.
|
|
48
|
+
|
|
49
|
+
Returns
|
|
50
|
+
-------
|
|
51
|
+
best_adv : Tensor
|
|
52
|
+
Perturbed inputs (inputs + perturbation) that are adversarial and have smallest distance with the original
|
|
53
|
+
inputs.
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
if targeted:
|
|
57
|
+
warnings.warn('σ-zero attack is untargeted only. Returning inputs.')
|
|
58
|
+
return inputs
|
|
59
|
+
|
|
60
|
+
batch_size, numel = len(inputs), inputs[0].numel()
|
|
61
|
+
batch_view = lambda tensor: tensor.view(batch_size, *[1] * (inputs.ndim - 1))
|
|
62
|
+
|
|
63
|
+
δ = torch.zeros_like(inputs, requires_grad=True)
|
|
64
|
+
# Adam variables
|
|
65
|
+
exp_avg = torch.zeros_like(inputs)
|
|
66
|
+
exp_avg_sq = torch.zeros_like(inputs)
|
|
67
|
+
β_1, β_2 = 0.9, 0.999
|
|
68
|
+
|
|
69
|
+
best_l0 = inputs.new_full((batch_size,), numel)
|
|
70
|
+
best_adv = inputs.clone()
|
|
71
|
+
τ = torch.full_like(best_l0, τ_0)
|
|
72
|
+
|
|
73
|
+
η = η_0
|
|
74
|
+
for i in range(num_steps):
|
|
75
|
+
adv_inputs = inputs + δ
|
|
76
|
+
|
|
77
|
+
# compute loss
|
|
78
|
+
logits = model(adv_inputs)
|
|
79
|
+
dl_loss = difference_of_logits(logits, labels).clamp_(min=0)
|
|
80
|
+
δ_square = δ.square()
|
|
81
|
+
l0_approx_normalized = (δ_square / (δ_square + σ)).flatten(1).mean(dim=1)
|
|
82
|
+
|
|
83
|
+
# keep best solutions
|
|
84
|
+
predicted_classes = logits.argmax(dim=1)
|
|
85
|
+
l0_norm = δ.data.flatten(1).norm(p=0, dim=1)
|
|
86
|
+
is_adv = (predicted_classes == labels) if targeted else (predicted_classes != labels)
|
|
87
|
+
is_smaller = l0_norm < best_l0
|
|
88
|
+
is_both = is_adv & is_smaller
|
|
89
|
+
best_l0 = torch.where(is_both, l0_norm, best_l0)
|
|
90
|
+
best_adv = torch.where(batch_view(is_both), adv_inputs.detach(), best_adv)
|
|
91
|
+
|
|
92
|
+
# compute loss and gradient
|
|
93
|
+
adv_loss = (dl_loss + l0_approx_normalized).sum()
|
|
94
|
+
δ_grad = grad(adv_loss, inputs=δ, only_inputs=True)[0]
|
|
95
|
+
|
|
96
|
+
# normalize gradient based on grad_norm type
|
|
97
|
+
δ_inf_norm = δ_grad.flatten(1).norm(p=grad_norm, dim=1).clamp_(min=1e-12)
|
|
98
|
+
δ_grad.div_(batch_view(δ_inf_norm))
|
|
99
|
+
|
|
100
|
+
# adam computations
|
|
101
|
+
exp_avg.lerp_(δ_grad, weight=1 - β_1)
|
|
102
|
+
exp_avg_sq.mul_(β_2).addcmul_(δ_grad, δ_grad, value=1 - β_2)
|
|
103
|
+
bias_correction1 = 1 - β_1 ** (i + 1)
|
|
104
|
+
bias_correction2 = 1 - β_2 ** (i + 1)
|
|
105
|
+
denom = exp_avg_sq.sqrt().div_(bias_correction2 ** 0.5).add_(1e-8)
|
|
106
|
+
|
|
107
|
+
# step and clamp
|
|
108
|
+
δ.data.addcdiv_(exp_avg, denom, value=-η / bias_correction1)
|
|
109
|
+
δ.data.add_(inputs).clamp_(min=0, max=1).sub_(inputs)
|
|
110
|
+
|
|
111
|
+
# update step size with cosine annealing
|
|
112
|
+
η = 0.1 * η_0 + 0.9 * η_0 * (1 + math.cos(math.pi * i / num_steps)) / 2
|
|
113
|
+
# dynamic thresholding
|
|
114
|
+
τ.add_(torch.where(is_adv, τ_factor * η, -τ_factor * η)).clamp_(min=0, max=1)
|
|
115
|
+
|
|
116
|
+
# filter components
|
|
117
|
+
δ.data[δ.data.abs() < batch_view(τ)] = 0
|
|
118
|
+
|
|
119
|
+
return best_adv
|