dl-backtrace 0.0.18__py3-none-any.whl → 0.0.20.dev36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dl-backtrace might be problematic. Click here for more details.
- dl_backtrace/pytorch_backtrace/backtrace/backtrace.py +194 -70
- dl_backtrace/pytorch_backtrace/backtrace/utils/contrast.py +607 -156
- dl_backtrace/pytorch_backtrace/backtrace/utils/prop.py +892 -265
- dl_backtrace/tf_backtrace/backtrace/backtrace.py +11 -7
- dl_backtrace/tf_backtrace/backtrace/utils/utils_prop.py +249 -47
- dl_backtrace/version.py +2 -2
- {dl_backtrace-0.0.18.dist-info → dl_backtrace-0.0.20.dev36.dist-info}/METADATA +1 -1
- {dl_backtrace-0.0.18.dist-info → dl_backtrace-0.0.20.dev36.dist-info}/RECORD +11 -11
- {dl_backtrace-0.0.18.dist-info → dl_backtrace-0.0.20.dev36.dist-info}/WHEEL +1 -1
- {dl_backtrace-0.0.18.dist-info → dl_backtrace-0.0.20.dev36.dist-info}/LICENSE +0 -0
- {dl_backtrace-0.0.18.dist-info → dl_backtrace-0.0.20.dev36.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import torch
|
|
3
3
|
import torch.nn as nn
|
|
4
|
+
from tqdm import tqdm
|
|
4
5
|
from dl_backtrace.pytorch_backtrace.backtrace.utils import contrast as UC
|
|
5
6
|
from dl_backtrace.pytorch_backtrace.backtrace.utils import prop as UP
|
|
6
7
|
from dl_backtrace.pytorch_backtrace.backtrace.config import activation_master
|
|
@@ -18,47 +19,37 @@ class Backtrace(object):
|
|
|
18
19
|
if model_type == 'encoder':
|
|
19
20
|
self.model = model
|
|
20
21
|
self.model_type = model_type
|
|
21
|
-
|
|
22
22
|
# create a tree-like structure for encoder model
|
|
23
23
|
self.model_resource = EN.build_encoder_tree(model)
|
|
24
|
-
|
|
25
24
|
# create a layer stack for encoder model
|
|
26
25
|
self.create_layer_stack()
|
|
27
|
-
|
|
28
26
|
# extract the encoder model weights
|
|
29
27
|
self.model_weights = EN.extract_encoder_weights(model)
|
|
30
|
-
|
|
31
28
|
# # calculate the output of each submodule of the encoder model
|
|
32
29
|
# self.all_out_model = EN.create_encoder_output(model)
|
|
30
|
+
self.activation_dict = None
|
|
33
31
|
|
|
34
32
|
elif model_type == 'encoder_decoder':
|
|
35
33
|
self.model = model
|
|
36
34
|
self.model_type = model_type
|
|
37
|
-
|
|
38
35
|
# create a tree-like structure and layer_stack for encoder-decoder model
|
|
39
36
|
self.model_resource, self.layer_stack = ED.build_enc_dec_tree(model)
|
|
40
|
-
|
|
41
37
|
# extract the encoder-decoder model weights
|
|
42
|
-
self.model_weights = ED.extract_encoder_decoder_weights(model)
|
|
43
|
-
|
|
38
|
+
self.model_weights = ED.extract_encoder_decoder_weights(model)
|
|
44
39
|
# # calculate the output of each submodule of the encoder-decoder model
|
|
45
40
|
# self.all_out_model = ED.calculate_encoder_decoder_output(model)
|
|
46
|
-
|
|
41
|
+
self.activation_dict = None
|
|
47
42
|
|
|
48
43
|
else:
|
|
49
44
|
self.model_type = model_type
|
|
50
45
|
# create a tree-like structure that represents the layers of the neural network model
|
|
51
46
|
self.create_tree(model)
|
|
52
|
-
|
|
53
47
|
# create a new model (an instance of tf.keras.Model) that produces the output of each layer in the neural network.
|
|
54
48
|
self.create_model_output(model)
|
|
55
|
-
|
|
56
49
|
# create a new model (an instance of tf.keras.Model) that produces the output of each layer in the neural network.
|
|
57
50
|
self.create_every_model_output(model)
|
|
58
|
-
|
|
59
51
|
# create a layer stack that defines the order in which layers should be processed during backpropagation.
|
|
60
52
|
self.create_layer_stack()
|
|
61
|
-
|
|
62
53
|
# checks if the model is sequential or not. If it's sequential, it adds the input layer to the layer stack.
|
|
63
54
|
# identity
|
|
64
55
|
|
|
@@ -72,7 +63,6 @@ class Backtrace(object):
|
|
|
72
63
|
self.model_resource[3].append(inp_name)
|
|
73
64
|
self.sequential = True
|
|
74
65
|
try:
|
|
75
|
-
|
|
76
66
|
# calls the build_activation_dict method to build a dictionary that maps layer names to activation functions.
|
|
77
67
|
# If that fails, it creates a temporary dictionary with default activation functions.
|
|
78
68
|
if len(activation_dict) == 0:
|
|
@@ -92,10 +82,8 @@ class Backtrace(object):
|
|
|
92
82
|
layer_list = list(model_resource[0].keys())
|
|
93
83
|
activation_dict = {}
|
|
94
84
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax'] # You can add more activation functions
|
|
95
|
-
|
|
96
85
|
for l in layer_list:
|
|
97
86
|
activation_found = False
|
|
98
|
-
|
|
99
87
|
try: # could be activation for that layer
|
|
100
88
|
for activation in activation_functions:
|
|
101
89
|
if activation in l.split('/')[1]:
|
|
@@ -103,7 +91,6 @@ class Backtrace(object):
|
|
|
103
91
|
activation_found = True
|
|
104
92
|
except:
|
|
105
93
|
activation_dict[l] = 'None'
|
|
106
|
-
|
|
107
94
|
# activation_master :
|
|
108
95
|
for key, value in activation_dict.items():
|
|
109
96
|
activation_dict[key] = activation_master.get(value)
|
|
@@ -112,10 +99,8 @@ class Backtrace(object):
|
|
|
112
99
|
def create_tree(self, model):
|
|
113
100
|
# create new layers same as tf version
|
|
114
101
|
layers = list(model.named_children())
|
|
115
|
-
|
|
116
102
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax']
|
|
117
103
|
layer_sequence = []
|
|
118
|
-
|
|
119
104
|
for i in range(len(layers) - 1):
|
|
120
105
|
current_layer, current_layer_obj = layers[i]
|
|
121
106
|
next_layer, next_layer_obj = layers[i + 1]
|
|
@@ -129,7 +114,6 @@ class Backtrace(object):
|
|
|
129
114
|
else:
|
|
130
115
|
if any(af in current_layer_name for af in activation_functions) is False:
|
|
131
116
|
layer_sequence.append((current_layer_name, current_layer_obj))
|
|
132
|
-
|
|
133
117
|
# creating model_resource variable
|
|
134
118
|
layer_sequence
|
|
135
119
|
ltree = {}
|
|
@@ -137,37 +121,28 @@ class Backtrace(object):
|
|
|
137
121
|
inputs = []
|
|
138
122
|
outputs = []
|
|
139
123
|
intermediates = []
|
|
140
|
-
|
|
141
124
|
prev_layer_id = None
|
|
142
|
-
|
|
143
125
|
num_layers = len(layer_sequence)
|
|
144
|
-
|
|
145
126
|
for i, (layer_name, layer) in enumerate(layer_sequence):
|
|
146
127
|
layer_id = layer_name
|
|
147
128
|
ltree[layer_id] = {}
|
|
148
129
|
layer_tree[layer_id] = layer
|
|
149
|
-
|
|
150
130
|
layer_type = layer.__class__.__name__
|
|
151
131
|
ltree[layer_id]["name"] = layer_id.split("/")[0]
|
|
152
132
|
ltree[layer_id]["class"] = layer_type
|
|
153
|
-
|
|
154
133
|
if i < num_layers - 1:
|
|
155
134
|
ltree[layer_id]["type"] = "intermediate"
|
|
156
135
|
intermediates.append(layer_id)
|
|
157
136
|
else:
|
|
158
137
|
ltree[layer_id]["type"] = "output"
|
|
159
138
|
outputs.append(layer_id)
|
|
160
|
-
|
|
161
139
|
if prev_layer_id is not None:
|
|
162
140
|
ltree[layer_id]["child"] = [prev_layer_id]
|
|
163
141
|
ltree[prev_layer_id]["parent"] = [layer_id]
|
|
164
|
-
|
|
165
142
|
prev_layer_id = layer_id
|
|
166
|
-
|
|
167
143
|
# Set child of the last layer as an empty list
|
|
168
144
|
if prev_layer_id is not None:
|
|
169
145
|
ltree[prev_layer_id]["parent"] = []
|
|
170
|
-
|
|
171
146
|
layer_tree.pop('identity')
|
|
172
147
|
ltree.pop('identity')
|
|
173
148
|
self.model_resource = (layer_tree, ltree, outputs, inputs)
|
|
@@ -198,7 +173,6 @@ class Backtrace(object):
|
|
|
198
173
|
def __init__(self, base_model):
|
|
199
174
|
super(ModelWithEveryOutputs, self).__init__()
|
|
200
175
|
self.base_model = base_model
|
|
201
|
-
|
|
202
176
|
def forward(self, x):
|
|
203
177
|
outputs = []
|
|
204
178
|
for layer_name, layer in self.base_model._modules.items():
|
|
@@ -212,7 +186,6 @@ class Backtrace(object):
|
|
|
212
186
|
x = layer(x)
|
|
213
187
|
outputs.append((layer_name, x))
|
|
214
188
|
return outputs
|
|
215
|
-
|
|
216
189
|
self.every_out_model = ModelWithEveryOutputs(model)
|
|
217
190
|
|
|
218
191
|
def create_model_output(self, model):
|
|
@@ -248,16 +221,12 @@ class Backtrace(object):
|
|
|
248
221
|
every_out = self.every_out_model(inputs)
|
|
249
222
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax']
|
|
250
223
|
every_temp_out = {}
|
|
251
|
-
|
|
252
224
|
for i in range(len(every_out)):
|
|
253
|
-
|
|
254
225
|
current_layer, current_layer_obj = every_out[i]
|
|
255
226
|
try:
|
|
256
227
|
next_layer, next_layer_obj = every_out[i + 1]
|
|
257
|
-
|
|
258
228
|
current_layer_name = current_layer
|
|
259
229
|
next_layer_name = next_layer
|
|
260
|
-
|
|
261
230
|
next_layer_type = next_layer_name.lower()
|
|
262
231
|
if any(af in next_layer_type for af in activation_functions):
|
|
263
232
|
if isinstance(next_layer_obj, tuple):
|
|
@@ -265,12 +234,10 @@ class Backtrace(object):
|
|
|
265
234
|
next_layer_tensor = next_layer_obj[0]
|
|
266
235
|
else:
|
|
267
236
|
next_layer_tensor = next_layer_obj
|
|
268
|
-
|
|
269
237
|
every_temp_out[
|
|
270
238
|
f"{current_layer_name}/{next_layer_name}"] = next_layer_tensor.detach().numpy().astype(
|
|
271
239
|
np.float32)
|
|
272
240
|
i += 1
|
|
273
|
-
|
|
274
241
|
else:
|
|
275
242
|
if any(af in current_layer_name for af in activation_functions) is False:
|
|
276
243
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -278,12 +245,10 @@ class Backtrace(object):
|
|
|
278
245
|
current_layer_tensor = current_layer_obj[0]
|
|
279
246
|
else:
|
|
280
247
|
current_layer_tensor = current_layer_obj
|
|
281
|
-
|
|
282
248
|
every_temp_out[current_layer_name] = current_layer_tensor.detach().numpy().astype(np.float32)
|
|
283
249
|
except:
|
|
284
250
|
if any(af in next_layer_type for af in activation_functions):
|
|
285
251
|
pass
|
|
286
|
-
|
|
287
252
|
else:
|
|
288
253
|
if any(af in current_layer for af in activation_functions) is False:
|
|
289
254
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -291,7 +256,6 @@ class Backtrace(object):
|
|
|
291
256
|
current_layer_tensor = current_layer_obj[0]
|
|
292
257
|
else:
|
|
293
258
|
current_layer_tensor = current_layer_obj
|
|
294
|
-
|
|
295
259
|
every_temp_out[current_layer] = current_layer_tensor.detach().cpu().numpy().astype(np.float32)
|
|
296
260
|
return every_temp_out
|
|
297
261
|
|
|
@@ -299,16 +263,12 @@ class Backtrace(object):
|
|
|
299
263
|
all_out = self.all_out_model(inputs)
|
|
300
264
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax']
|
|
301
265
|
temp_out = {}
|
|
302
|
-
|
|
303
266
|
for i in range(len(all_out)):
|
|
304
|
-
|
|
305
267
|
current_layer, current_layer_obj = all_out[i]
|
|
306
268
|
try:
|
|
307
269
|
next_layer, next_layer_obj = all_out[i + 1]
|
|
308
|
-
|
|
309
270
|
current_layer_name = current_layer
|
|
310
271
|
next_layer_name = next_layer
|
|
311
|
-
|
|
312
272
|
next_layer_type = next_layer_name.lower()
|
|
313
273
|
if any(af in next_layer_type for af in activation_functions):
|
|
314
274
|
if isinstance(next_layer_obj, tuple):
|
|
@@ -316,12 +276,10 @@ class Backtrace(object):
|
|
|
316
276
|
next_layer_tensor = next_layer_obj[0]
|
|
317
277
|
else:
|
|
318
278
|
next_layer_tensor = next_layer_obj
|
|
319
|
-
|
|
320
279
|
temp_out[
|
|
321
280
|
f"{current_layer_name}/{next_layer_name}"] = next_layer_tensor.detach().cpu().numpy().astype(
|
|
322
281
|
np.float32)
|
|
323
282
|
i += 1
|
|
324
|
-
|
|
325
283
|
else:
|
|
326
284
|
if any(af in current_layer_name for af in activation_functions) is False:
|
|
327
285
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -329,12 +287,10 @@ class Backtrace(object):
|
|
|
329
287
|
current_layer_tensor = current_layer_obj[0]
|
|
330
288
|
else:
|
|
331
289
|
current_layer_tensor = current_layer_obj
|
|
332
|
-
|
|
333
290
|
temp_out[current_layer_name] = current_layer_tensor.detach().numpy().astype(np.float32)
|
|
334
291
|
except:
|
|
335
292
|
if any(af in next_layer_type for af in activation_functions):
|
|
336
293
|
pass
|
|
337
|
-
|
|
338
294
|
else:
|
|
339
295
|
if any(af in current_layer for af in activation_functions) is False:
|
|
340
296
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -342,20 +298,20 @@ class Backtrace(object):
|
|
|
342
298
|
current_layer_tensor = current_layer_obj[0]
|
|
343
299
|
else:
|
|
344
300
|
current_layer_tensor = current_layer_obj
|
|
345
|
-
|
|
346
301
|
temp_out[current_layer] = current_layer_tensor.detach().cpu().numpy().astype(np.float32)
|
|
347
|
-
|
|
348
302
|
return temp_out
|
|
349
303
|
|
|
350
304
|
def eval(
|
|
351
305
|
self,
|
|
352
306
|
all_out,
|
|
353
|
-
mode,
|
|
307
|
+
mode="default",
|
|
354
308
|
start_wt=[],
|
|
355
309
|
multiplier=100.0,
|
|
356
310
|
scaler=0,
|
|
357
311
|
max_unit=0,
|
|
358
312
|
predicted_token=None,
|
|
313
|
+
thresholding=0.5,
|
|
314
|
+
task="binary-classification",
|
|
359
315
|
):
|
|
360
316
|
# This method is used for evaluating layer-wise relevance based on different modes.
|
|
361
317
|
if mode == "default":
|
|
@@ -363,13 +319,21 @@ class Backtrace(object):
|
|
|
363
319
|
all_out=all_out,
|
|
364
320
|
start_wt=start_wt,
|
|
365
321
|
multiplier=multiplier,
|
|
366
|
-
scaler=
|
|
322
|
+
scaler=scaler,
|
|
367
323
|
max_unit=0,
|
|
368
324
|
predicted_token=predicted_token,
|
|
325
|
+
thresholding=thresholding,
|
|
326
|
+
task="binary-classification",
|
|
369
327
|
)
|
|
370
328
|
return output
|
|
371
329
|
elif mode == "contrast":
|
|
372
|
-
temp_output = self.contrast_eval(
|
|
330
|
+
temp_output = self.contrast_eval(
|
|
331
|
+
all_out=all_out,
|
|
332
|
+
multiplier=multiplier,
|
|
333
|
+
scaler=0,
|
|
334
|
+
thresholding=0.5,
|
|
335
|
+
task="binary-classification",
|
|
336
|
+
)
|
|
373
337
|
output = {}
|
|
374
338
|
for k in temp_output[0].keys():
|
|
375
339
|
output[k] = {}
|
|
@@ -378,7 +342,9 @@ class Backtrace(object):
|
|
|
378
342
|
return output
|
|
379
343
|
|
|
380
344
|
def proportional_eval(
|
|
381
|
-
self, all_out, start_wt=[], multiplier=100.0,
|
|
345
|
+
self, all_out, start_wt=[], multiplier=100.0,
|
|
346
|
+
scaler=0, max_unit=0, predicted_token=None,
|
|
347
|
+
thresholding=0.5, task="binary-classification",
|
|
382
348
|
):
|
|
383
349
|
model_resource = self.model_resource
|
|
384
350
|
activation_dict = self.activation_dict
|
|
@@ -387,21 +353,21 @@ class Backtrace(object):
|
|
|
387
353
|
all_wt = {}
|
|
388
354
|
if len(start_wt) == 0:
|
|
389
355
|
if self.model_type == 'encoder':
|
|
390
|
-
start_wt = UP.calculate_start_wt(all_out[out_layer].detach().numpy())
|
|
356
|
+
start_wt = UP.calculate_start_wt(all_out[out_layer].detach().numpy(), scaler=scaler)
|
|
391
357
|
all_wt[out_layer] = start_wt * multiplier
|
|
392
|
-
layer_stack = self.layer_stack
|
|
393
|
-
all_wts = self.model_weights
|
|
394
|
-
|
|
358
|
+
layer_stack = self.layer_stack
|
|
359
|
+
all_wts = self.model_weights
|
|
360
|
+
elif self.model_type == 'encoder_decoder':
|
|
395
361
|
start_wt = UP.calculate_enc_dec_start_wt(all_out[out_layer][0].detach().numpy(), predicted_token)
|
|
396
362
|
all_wt[out_layer] = start_wt * multiplier
|
|
397
363
|
layer_stack = self.layer_stack
|
|
398
364
|
all_wts = self.model_weights
|
|
399
365
|
else:
|
|
400
|
-
start_wt = UP.calculate_start_wt(all_out[out_layer])
|
|
366
|
+
start_wt = UP.calculate_start_wt(all_out[out_layer],scaler,thresholding,task=task)
|
|
401
367
|
all_wt[out_layer] = start_wt * multiplier
|
|
402
368
|
layer_stack = self.layer_stack
|
|
403
369
|
|
|
404
|
-
for start_layer in layer_stack:
|
|
370
|
+
for start_layer in tqdm(layer_stack):
|
|
405
371
|
if model_resource[1][start_layer]["child"]:
|
|
406
372
|
child_nodes = model_resource[1][start_layer]["child"]
|
|
407
373
|
for ch in child_nodes:
|
|
@@ -427,11 +393,65 @@ class Backtrace(object):
|
|
|
427
393
|
l1 = model_resource[0][start_layer]
|
|
428
394
|
w1 = l1.state_dict()['weight']
|
|
429
395
|
b1 = l1.state_dict()['bias']
|
|
396
|
+
pad1 = l1.padding
|
|
397
|
+
strides1 = l1.stride
|
|
430
398
|
temp_wt = UP.calculate_wt_conv(
|
|
431
399
|
all_wt[start_layer],
|
|
432
400
|
all_out[child_nodes[0]][0],
|
|
433
401
|
w1,
|
|
434
402
|
b1,
|
|
403
|
+
pad1,
|
|
404
|
+
strides1,
|
|
405
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
406
|
+
)
|
|
407
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
408
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose2d":
|
|
409
|
+
l1 = model_resource[0][start_layer]
|
|
410
|
+
w1 = l1.state_dict()['weight']
|
|
411
|
+
b1 = l1.state_dict()['bias']
|
|
412
|
+
pad1 = l1.padding
|
|
413
|
+
strides1 = l1.stride
|
|
414
|
+
temp_wt = UP.calculate_wt_conv2d_transpose(
|
|
415
|
+
all_wt[start_layer],
|
|
416
|
+
all_out[child_nodes[0]][0],
|
|
417
|
+
w1,
|
|
418
|
+
b1,
|
|
419
|
+
pad1,
|
|
420
|
+
strides1,
|
|
421
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
422
|
+
)
|
|
423
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
424
|
+
elif model_resource[1][start_layer]["class"] == "Conv1d":
|
|
425
|
+
l1 = model_resource[0][start_layer]
|
|
426
|
+
w1 = l1.state_dict()['weight']
|
|
427
|
+
b1 = l1.state_dict()['bias']
|
|
428
|
+
pad1 = l1.padding[0]
|
|
429
|
+
strides1 = l1.stride[0]
|
|
430
|
+
temp_wt = UP.calculate_wt_conv_1d(
|
|
431
|
+
all_wt[start_layer],
|
|
432
|
+
all_out[child_nodes[0]][0],
|
|
433
|
+
w1,
|
|
434
|
+
b1,
|
|
435
|
+
pad1,
|
|
436
|
+
strides1,
|
|
437
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
438
|
+
)
|
|
439
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
440
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose1d":
|
|
441
|
+
l1 = model_resource[0][start_layer]
|
|
442
|
+
w1 = l1.state_dict()['weight']
|
|
443
|
+
b1 = l1.state_dict()['bias']
|
|
444
|
+
pad1 = l1.padding[0]
|
|
445
|
+
strides1 = l1.stride[0]
|
|
446
|
+
dilation1= l1.dilation[0]
|
|
447
|
+
temp_wt = UP.calculate_wt_conv1d_transpose(
|
|
448
|
+
all_wt[start_layer],
|
|
449
|
+
all_out[child_nodes[0]][0],
|
|
450
|
+
w1,
|
|
451
|
+
b1,
|
|
452
|
+
pad1,
|
|
453
|
+
strides1,
|
|
454
|
+
dilation1,
|
|
435
455
|
activation_dict[model_resource[1][start_layer]["name"]],
|
|
436
456
|
)
|
|
437
457
|
all_wt[child_nodes[0]] += temp_wt.T
|
|
@@ -464,6 +484,22 @@ class Backtrace(object):
|
|
|
464
484
|
all_wt[start_layer], all_out[child_nodes[0]][0], (l1.kernel_size, l1.kernel_size)
|
|
465
485
|
)
|
|
466
486
|
all_wt[child_nodes[0]] += temp_wt.T
|
|
487
|
+
elif model_resource[1][start_layer]["class"] == "MaxPool1d":
|
|
488
|
+
l1 = model_resource[0][start_layer]
|
|
489
|
+
pad1 = l1.padding
|
|
490
|
+
strides1 = l1.stride
|
|
491
|
+
temp_wt = UP.calculate_wt_maxpool_1d(
|
|
492
|
+
all_wt[start_layer], all_out[child_nodes[0]][0], l1.kernel_size,pad1,strides1
|
|
493
|
+
)
|
|
494
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
495
|
+
elif model_resource[1][start_layer]["class"] == "AvgPool1d":
|
|
496
|
+
l1 = model_resource[0][start_layer]
|
|
497
|
+
pad1 = l1.padding
|
|
498
|
+
strides1 = l1.stride
|
|
499
|
+
temp_wt = UP.calculate_wt_avgpool_1d(
|
|
500
|
+
all_wt[start_layer], all_out[child_nodes[0]][0], l1.kernel_size,pad1,strides1
|
|
501
|
+
)
|
|
502
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
467
503
|
elif model_resource[1][start_layer]["class"] == "Concatenate":
|
|
468
504
|
temp_wt = UP.calculate_wt_concat(
|
|
469
505
|
all_wt[start_layer],
|
|
@@ -504,27 +540,28 @@ class Backtrace(object):
|
|
|
504
540
|
elif model_resource[1][start_layer]["class"] == "Self_Attention":
|
|
505
541
|
weights = all_wts[start_layer]
|
|
506
542
|
self_attention_weights = HP.rename_self_attention_keys(weights)
|
|
543
|
+
config = self.model.config
|
|
507
544
|
|
|
508
545
|
temp_wt = UP.calculate_wt_self_attention(
|
|
509
546
|
all_wt[start_layer],
|
|
510
547
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
511
548
|
self_attention_weights,
|
|
549
|
+
config
|
|
512
550
|
)
|
|
513
551
|
all_wt[child_nodes[0]] += temp_wt
|
|
514
|
-
|
|
552
|
+
|
|
515
553
|
elif model_resource[1][start_layer]["class"] == 'Residual':
|
|
516
|
-
temp_wt = UP.
|
|
554
|
+
temp_wt = UP.calculate_wt_residual(
|
|
517
555
|
all_wt[start_layer],
|
|
518
556
|
[all_out[ch].detach().numpy() for ch in child_nodes],
|
|
519
557
|
)
|
|
520
558
|
|
|
521
559
|
for ind, ch in enumerate(child_nodes):
|
|
522
560
|
all_wt[ch] += temp_wt[ind]
|
|
523
|
-
|
|
561
|
+
|
|
524
562
|
elif model_resource[1][start_layer]["class"] == 'Feed_Forward':
|
|
525
563
|
weights = all_wts[start_layer]
|
|
526
564
|
feed_forward_weights = HP.rename_feed_forward_keys(weights)
|
|
527
|
-
|
|
528
565
|
temp_wt = UP.calculate_wt_feed_forward(
|
|
529
566
|
all_wt[start_layer],
|
|
530
567
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -535,7 +572,6 @@ class Backtrace(object):
|
|
|
535
572
|
elif model_resource[1][start_layer]["class"] == "Pooler":
|
|
536
573
|
weights = all_wts[start_layer]
|
|
537
574
|
pooler_weights = HP.rename_pooler_keys(weights)
|
|
538
|
-
|
|
539
575
|
temp_wt = UP.calculate_wt_pooler(
|
|
540
576
|
all_wt[start_layer],
|
|
541
577
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -546,7 +582,6 @@ class Backtrace(object):
|
|
|
546
582
|
elif model_resource[1][start_layer]["class"] == "Classifier":
|
|
547
583
|
weights = all_wts[start_layer]
|
|
548
584
|
classifier_weights = HP.rename_classifier_keys(weights)
|
|
549
|
-
|
|
550
585
|
temp_wt = UP.calculate_wt_classifier(
|
|
551
586
|
all_wt[start_layer],
|
|
552
587
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -557,7 +592,6 @@ class Backtrace(object):
|
|
|
557
592
|
elif model_resource[1][start_layer]["class"] == "LM_Head":
|
|
558
593
|
weights = all_wts[start_layer]
|
|
559
594
|
lm_head_weights = HP.rename_decoder_lm_head(weights)
|
|
560
|
-
|
|
561
595
|
temp_wt = UP.calculate_wt_lm_head(
|
|
562
596
|
all_wt[start_layer],
|
|
563
597
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -572,7 +606,6 @@ class Backtrace(object):
|
|
|
572
606
|
elif model_resource[1][start_layer]["class"] == 'Cross_Attention':
|
|
573
607
|
weights = all_wts[start_layer]
|
|
574
608
|
cross_attention_weights = HP.rename_cross_attention_keys(weights)
|
|
575
|
-
|
|
576
609
|
temp_wt = UP.calculate_wt_cross_attention(
|
|
577
610
|
all_wt[start_layer],
|
|
578
611
|
[all_out[ch][0].detach().numpy() for ch in child_nodes],
|
|
@@ -582,6 +615,10 @@ class Backtrace(object):
|
|
|
582
615
|
for ind, ch in enumerate(child_nodes):
|
|
583
616
|
all_wt[ch] += temp_wt[ind]
|
|
584
617
|
|
|
618
|
+
elif model_resource[1][start_layer]["class"] == "Embedding":
|
|
619
|
+
temp_wt = all_wt[start_layer]
|
|
620
|
+
temp_wt = np.mean(temp_wt,axis=1)
|
|
621
|
+
all_wt[child_nodes[0]] = all_wt[child_nodes[0]] + temp_wt
|
|
585
622
|
else:
|
|
586
623
|
temp_wt = all_wt[start_layer]
|
|
587
624
|
all_wt[child_nodes[0]] += temp_wt
|
|
@@ -598,14 +635,16 @@ class Backtrace(object):
|
|
|
598
635
|
|
|
599
636
|
return all_wt
|
|
600
637
|
|
|
601
|
-
def contrast_eval(self, all_out, multiplier=100.0
|
|
638
|
+
def contrast_eval(self, all_out, multiplier=100.0,
|
|
639
|
+
scaler=None,thresholding=0.5,
|
|
640
|
+
task="binary-classification"):
|
|
602
641
|
model_resource = self.model_resource
|
|
603
642
|
activation_dict = self.activation_dict
|
|
604
643
|
inputcheck = False
|
|
605
644
|
out_layer = model_resource[2][0]
|
|
606
645
|
all_wt_pos = {}
|
|
607
646
|
all_wt_neg = {}
|
|
608
|
-
start_wt_pos, start_wt_neg = UC.calculate_start_wt(all_out[out_layer])
|
|
647
|
+
start_wt_pos, start_wt_neg = UC.calculate_start_wt(all_out[out_layer],scaler,thresholding,task)
|
|
609
648
|
all_wt_pos[out_layer] = start_wt_pos * multiplier
|
|
610
649
|
all_wt_neg[out_layer] = start_wt_neg * multiplier
|
|
611
650
|
layer_stack = [out_layer]
|
|
@@ -636,16 +675,64 @@ class Backtrace(object):
|
|
|
636
675
|
l1 = model_resource[0][start_layer]
|
|
637
676
|
w1 = l1.state_dict()['weight']
|
|
638
677
|
b1 = l1.state_dict()['bias']
|
|
678
|
+
pad1 = l1.padding
|
|
679
|
+
strides1 = l1.stride
|
|
639
680
|
temp_wt_pos, temp_wt_neg = UC.calculate_wt_conv(
|
|
640
681
|
all_wt_pos[start_layer],
|
|
641
682
|
all_wt_neg[start_layer],
|
|
642
683
|
all_out[child_nodes[0]][0],
|
|
643
684
|
w1,
|
|
644
685
|
b1,
|
|
686
|
+
pad1,
|
|
687
|
+
strides1,
|
|
645
688
|
activation_dict[model_resource[1][start_layer]["name"]],
|
|
646
689
|
)
|
|
647
690
|
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
648
691
|
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
692
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose2d":
|
|
693
|
+
l1 = model_resource[0][start_layer]
|
|
694
|
+
w1 = l1.state_dict()['weight']
|
|
695
|
+
b1 = l1.state_dict()['bias']
|
|
696
|
+
pad1 = l1.padding
|
|
697
|
+
strides1 = l1.stride
|
|
698
|
+
temp_wt_pos,temp_wt_neg = UC.calculate_wt_conv2d_transpose(
|
|
699
|
+
all_wt_pos[start_layer],
|
|
700
|
+
all_wt_neg[start_layer],
|
|
701
|
+
all_out[child_nodes[0]][0],
|
|
702
|
+
w1,
|
|
703
|
+
b1,
|
|
704
|
+
pad1,
|
|
705
|
+
strides1,
|
|
706
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
707
|
+
)
|
|
708
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
709
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
710
|
+
elif model_resource[1][start_layer]["class"] == 'Conv1d':
|
|
711
|
+
l1 = model_resource[0][start_layer]
|
|
712
|
+
w1 = l1.state_dict()['weight']
|
|
713
|
+
b1 = l1.state_dict()['bias']
|
|
714
|
+
pad1 = l1.padding[0]
|
|
715
|
+
strides1 = l1.stride[0]
|
|
716
|
+
temp_wt_pos,temp_wt_neg = UC.calculate_wt_conv_1d(all_wt_pos[start_layer],
|
|
717
|
+
all_wt_neg[start_layer],
|
|
718
|
+
all_out[child_nodes[0]][0],
|
|
719
|
+
w1,b1, pad1, strides1,
|
|
720
|
+
activation_dict[model_resource[1][start_layer]['name']])
|
|
721
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
722
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
723
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose1d":
|
|
724
|
+
l1 = model_resource[0][start_layer]
|
|
725
|
+
w1 = l1.state_dict()['weight']
|
|
726
|
+
b1 = l1.state_dict()['bias']
|
|
727
|
+
pad1 = l1.padding[0]
|
|
728
|
+
strides1 = l1.stride[0]
|
|
729
|
+
temp_wt_pos,temp_wt_neg = UC.calculate_wt_conv1d_transpose(all_wt_pos[start_layer],
|
|
730
|
+
all_wt_neg[start_layer],
|
|
731
|
+
all_out[child_nodes[0]][0],
|
|
732
|
+
w1,b1, pad1, strides1,
|
|
733
|
+
activation_dict[model_resource[1][start_layer]['name']])
|
|
734
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
735
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
649
736
|
elif model_resource[1][start_layer]["class"] == "Reshape":
|
|
650
737
|
temp_wt_pos = UC.calculate_wt_rshp(
|
|
651
738
|
all_wt_pos[start_layer], all_out[child_nodes[0]][0]
|
|
@@ -698,6 +785,22 @@ class Backtrace(object):
|
|
|
698
785
|
(l1.kernel_size, l1.kernel_size),
|
|
699
786
|
)
|
|
700
787
|
all_wt_neg[child_nodes[0]] += temp_wt.T
|
|
788
|
+
elif model_resource[1][start_layer]["class"] == "MaxPool1d":
|
|
789
|
+
l1 = model_resource[0][start_layer]
|
|
790
|
+
pad1 = l1.padding
|
|
791
|
+
strides1 = l1.stride
|
|
792
|
+
temp_wt = UC.calculate_wt_maxpool_1d(
|
|
793
|
+
all_wt_pos[start_layer],
|
|
794
|
+
all_out[child_nodes[0]][0],
|
|
795
|
+
l1.kernel_size, pad1, strides1
|
|
796
|
+
)
|
|
797
|
+
all_wt_pos[child_nodes[0]] += temp_wt.T
|
|
798
|
+
temp_wt = UC.calculate_wt_maxpool_1d(
|
|
799
|
+
all_wt_neg[start_layer],
|
|
800
|
+
all_out[child_nodes[0]][0],
|
|
801
|
+
l1.kernel_size, pad1, strides1
|
|
802
|
+
)
|
|
803
|
+
all_wt_neg[child_nodes[0]] += temp_wt.T
|
|
701
804
|
elif model_resource[1][start_layer]["class"] == "AvgPool2d":
|
|
702
805
|
l1 = model_resource[0][start_layer]
|
|
703
806
|
temp_wt_pos, temp_wt_neg = UC.calculate_wt_avgpool(
|
|
@@ -708,6 +811,18 @@ class Backtrace(object):
|
|
|
708
811
|
)
|
|
709
812
|
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
710
813
|
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
814
|
+
elif model_resource[1][start_layer]["class"] == "AvgPool1d":
|
|
815
|
+
l1 = model_resource[0][start_layer]
|
|
816
|
+
pad1 = l1.padding
|
|
817
|
+
strides1 = l1.stride
|
|
818
|
+
temp_wt_pos, temp_wt_neg = UC.calculate_wt_avgpool_1d(
|
|
819
|
+
all_wt_pos[start_layer],
|
|
820
|
+
all_wt_neg[start_layer],
|
|
821
|
+
all_out[child_nodes[0]][0],
|
|
822
|
+
l1.kernel_size, pad1, strides1
|
|
823
|
+
)
|
|
824
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
825
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
711
826
|
elif model_resource[1][start_layer]["class"] == "Concatenate":
|
|
712
827
|
temp_wt = UC.calculate_wt_concat(
|
|
713
828
|
all_wt_pos[start_layer],
|
|
@@ -757,6 +872,15 @@ class Backtrace(object):
|
|
|
757
872
|
)
|
|
758
873
|
all_wt_pos[child_nodes[0]] = temp_wt_pos
|
|
759
874
|
all_wt_neg[child_nodes[0]] = temp_wt_neg
|
|
875
|
+
elif model_resource[1][start_layer]["class"] == "Embedding":
|
|
876
|
+
temp_wt_pos = all_wt_pos[start_layer]
|
|
877
|
+
temp_wt_neg = all_wt_neg[start_layer]
|
|
878
|
+
|
|
879
|
+
temp_wt_pos = np.mean(temp_wt_pos,axis=1)
|
|
880
|
+
temp_wt_neg = np.mean(temp_wt_neg,axis=1)
|
|
881
|
+
|
|
882
|
+
all_wt_pos[child_nodes[0]] = all_wt_pos[child_nodes[0]] + temp_wt_pos
|
|
883
|
+
all_wt_neg[child_nodes[0]] = all_wt_neg[child_nodes[0]] + temp_wt_neg
|
|
760
884
|
else:
|
|
761
885
|
temp_wt_pos = all_wt_pos[start_layer]
|
|
762
886
|
temp_wt_neg = all_wt_neg[start_layer]
|