dl-backtrace 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dl-backtrace might be problematic. Click here for more details.
- dl_backtrace/pytorch_backtrace/backtrace/backtrace.py +180 -62
- dl_backtrace/pytorch_backtrace/backtrace/utils/contrast.py +607 -156
- dl_backtrace/pytorch_backtrace/backtrace/utils/prop.py +658 -228
- dl_backtrace/version.py +2 -2
- {dl_backtrace-0.0.17.dist-info → dl_backtrace-0.0.19.dist-info}/METADATA +10 -1
- {dl_backtrace-0.0.17.dist-info → dl_backtrace-0.0.19.dist-info}/RECORD +9 -9
- {dl_backtrace-0.0.17.dist-info → dl_backtrace-0.0.19.dist-info}/WHEEL +1 -1
- {dl_backtrace-0.0.17.dist-info → dl_backtrace-0.0.19.dist-info}/LICENSE +0 -0
- {dl_backtrace-0.0.17.dist-info → dl_backtrace-0.0.19.dist-info}/top_level.txt +0 -0
|
@@ -18,29 +18,22 @@ class Backtrace(object):
|
|
|
18
18
|
if model_type == 'encoder':
|
|
19
19
|
self.model = model
|
|
20
20
|
self.model_type = model_type
|
|
21
|
-
|
|
22
21
|
# create a tree-like structure for encoder model
|
|
23
22
|
self.model_resource = EN.build_encoder_tree(model)
|
|
24
|
-
|
|
25
23
|
# create a layer stack for encoder model
|
|
26
24
|
self.create_layer_stack()
|
|
27
|
-
|
|
28
25
|
# extract the encoder model weights
|
|
29
26
|
self.model_weights = EN.extract_encoder_weights(model)
|
|
30
|
-
|
|
31
27
|
# # calculate the output of each submodule of the encoder model
|
|
32
28
|
# self.all_out_model = EN.create_encoder_output(model)
|
|
33
29
|
|
|
34
30
|
elif model_type == 'encoder_decoder':
|
|
35
31
|
self.model = model
|
|
36
32
|
self.model_type = model_type
|
|
37
|
-
|
|
38
33
|
# create a tree-like structure and layer_stack for encoder-decoder model
|
|
39
34
|
self.model_resource, self.layer_stack = ED.build_enc_dec_tree(model)
|
|
40
|
-
|
|
41
35
|
# extract the encoder-decoder model weights
|
|
42
|
-
self.model_weights = ED.extract_encoder_decoder_weights(model)
|
|
43
|
-
|
|
36
|
+
self.model_weights = ED.extract_encoder_decoder_weights(model)
|
|
44
37
|
# # calculate the output of each submodule of the encoder-decoder model
|
|
45
38
|
# self.all_out_model = ED.calculate_encoder_decoder_output(model)
|
|
46
39
|
|
|
@@ -49,16 +42,12 @@ class Backtrace(object):
|
|
|
49
42
|
self.model_type = model_type
|
|
50
43
|
# create a tree-like structure that represents the layers of the neural network model
|
|
51
44
|
self.create_tree(model)
|
|
52
|
-
|
|
53
45
|
# create a new model (an instance of tf.keras.Model) that produces the output of each layer in the neural network.
|
|
54
46
|
self.create_model_output(model)
|
|
55
|
-
|
|
56
47
|
# create a new model (an instance of tf.keras.Model) that produces the output of each layer in the neural network.
|
|
57
48
|
self.create_every_model_output(model)
|
|
58
|
-
|
|
59
49
|
# create a layer stack that defines the order in which layers should be processed during backpropagation.
|
|
60
50
|
self.create_layer_stack()
|
|
61
|
-
|
|
62
51
|
# checks if the model is sequential or not. If it's sequential, it adds the input layer to the layer stack.
|
|
63
52
|
# identity
|
|
64
53
|
|
|
@@ -72,7 +61,6 @@ class Backtrace(object):
|
|
|
72
61
|
self.model_resource[3].append(inp_name)
|
|
73
62
|
self.sequential = True
|
|
74
63
|
try:
|
|
75
|
-
|
|
76
64
|
# calls the build_activation_dict method to build a dictionary that maps layer names to activation functions.
|
|
77
65
|
# If that fails, it creates a temporary dictionary with default activation functions.
|
|
78
66
|
if len(activation_dict) == 0:
|
|
@@ -92,10 +80,8 @@ class Backtrace(object):
|
|
|
92
80
|
layer_list = list(model_resource[0].keys())
|
|
93
81
|
activation_dict = {}
|
|
94
82
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax'] # You can add more activation functions
|
|
95
|
-
|
|
96
83
|
for l in layer_list:
|
|
97
84
|
activation_found = False
|
|
98
|
-
|
|
99
85
|
try: # could be activation for that layer
|
|
100
86
|
for activation in activation_functions:
|
|
101
87
|
if activation in l.split('/')[1]:
|
|
@@ -103,7 +89,6 @@ class Backtrace(object):
|
|
|
103
89
|
activation_found = True
|
|
104
90
|
except:
|
|
105
91
|
activation_dict[l] = 'None'
|
|
106
|
-
|
|
107
92
|
# activation_master :
|
|
108
93
|
for key, value in activation_dict.items():
|
|
109
94
|
activation_dict[key] = activation_master.get(value)
|
|
@@ -112,10 +97,8 @@ class Backtrace(object):
|
|
|
112
97
|
def create_tree(self, model):
|
|
113
98
|
# create new layers same as tf version
|
|
114
99
|
layers = list(model.named_children())
|
|
115
|
-
|
|
116
100
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax']
|
|
117
101
|
layer_sequence = []
|
|
118
|
-
|
|
119
102
|
for i in range(len(layers) - 1):
|
|
120
103
|
current_layer, current_layer_obj = layers[i]
|
|
121
104
|
next_layer, next_layer_obj = layers[i + 1]
|
|
@@ -129,7 +112,6 @@ class Backtrace(object):
|
|
|
129
112
|
else:
|
|
130
113
|
if any(af in current_layer_name for af in activation_functions) is False:
|
|
131
114
|
layer_sequence.append((current_layer_name, current_layer_obj))
|
|
132
|
-
|
|
133
115
|
# creating model_resource variable
|
|
134
116
|
layer_sequence
|
|
135
117
|
ltree = {}
|
|
@@ -137,37 +119,28 @@ class Backtrace(object):
|
|
|
137
119
|
inputs = []
|
|
138
120
|
outputs = []
|
|
139
121
|
intermediates = []
|
|
140
|
-
|
|
141
122
|
prev_layer_id = None
|
|
142
|
-
|
|
143
123
|
num_layers = len(layer_sequence)
|
|
144
|
-
|
|
145
124
|
for i, (layer_name, layer) in enumerate(layer_sequence):
|
|
146
125
|
layer_id = layer_name
|
|
147
126
|
ltree[layer_id] = {}
|
|
148
127
|
layer_tree[layer_id] = layer
|
|
149
|
-
|
|
150
128
|
layer_type = layer.__class__.__name__
|
|
151
129
|
ltree[layer_id]["name"] = layer_id.split("/")[0]
|
|
152
130
|
ltree[layer_id]["class"] = layer_type
|
|
153
|
-
|
|
154
131
|
if i < num_layers - 1:
|
|
155
132
|
ltree[layer_id]["type"] = "intermediate"
|
|
156
133
|
intermediates.append(layer_id)
|
|
157
134
|
else:
|
|
158
135
|
ltree[layer_id]["type"] = "output"
|
|
159
136
|
outputs.append(layer_id)
|
|
160
|
-
|
|
161
137
|
if prev_layer_id is not None:
|
|
162
138
|
ltree[layer_id]["child"] = [prev_layer_id]
|
|
163
139
|
ltree[prev_layer_id]["parent"] = [layer_id]
|
|
164
|
-
|
|
165
140
|
prev_layer_id = layer_id
|
|
166
|
-
|
|
167
141
|
# Set child of the last layer as an empty list
|
|
168
142
|
if prev_layer_id is not None:
|
|
169
143
|
ltree[prev_layer_id]["parent"] = []
|
|
170
|
-
|
|
171
144
|
layer_tree.pop('identity')
|
|
172
145
|
ltree.pop('identity')
|
|
173
146
|
self.model_resource = (layer_tree, ltree, outputs, inputs)
|
|
@@ -198,7 +171,6 @@ class Backtrace(object):
|
|
|
198
171
|
def __init__(self, base_model):
|
|
199
172
|
super(ModelWithEveryOutputs, self).__init__()
|
|
200
173
|
self.base_model = base_model
|
|
201
|
-
|
|
202
174
|
def forward(self, x):
|
|
203
175
|
outputs = []
|
|
204
176
|
for layer_name, layer in self.base_model._modules.items():
|
|
@@ -212,7 +184,6 @@ class Backtrace(object):
|
|
|
212
184
|
x = layer(x)
|
|
213
185
|
outputs.append((layer_name, x))
|
|
214
186
|
return outputs
|
|
215
|
-
|
|
216
187
|
self.every_out_model = ModelWithEveryOutputs(model)
|
|
217
188
|
|
|
218
189
|
def create_model_output(self, model):
|
|
@@ -248,16 +219,12 @@ class Backtrace(object):
|
|
|
248
219
|
every_out = self.every_out_model(inputs)
|
|
249
220
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax']
|
|
250
221
|
every_temp_out = {}
|
|
251
|
-
|
|
252
222
|
for i in range(len(every_out)):
|
|
253
|
-
|
|
254
223
|
current_layer, current_layer_obj = every_out[i]
|
|
255
224
|
try:
|
|
256
225
|
next_layer, next_layer_obj = every_out[i + 1]
|
|
257
|
-
|
|
258
226
|
current_layer_name = current_layer
|
|
259
227
|
next_layer_name = next_layer
|
|
260
|
-
|
|
261
228
|
next_layer_type = next_layer_name.lower()
|
|
262
229
|
if any(af in next_layer_type for af in activation_functions):
|
|
263
230
|
if isinstance(next_layer_obj, tuple):
|
|
@@ -265,12 +232,10 @@ class Backtrace(object):
|
|
|
265
232
|
next_layer_tensor = next_layer_obj[0]
|
|
266
233
|
else:
|
|
267
234
|
next_layer_tensor = next_layer_obj
|
|
268
|
-
|
|
269
235
|
every_temp_out[
|
|
270
236
|
f"{current_layer_name}/{next_layer_name}"] = next_layer_tensor.detach().numpy().astype(
|
|
271
237
|
np.float32)
|
|
272
238
|
i += 1
|
|
273
|
-
|
|
274
239
|
else:
|
|
275
240
|
if any(af in current_layer_name for af in activation_functions) is False:
|
|
276
241
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -278,12 +243,10 @@ class Backtrace(object):
|
|
|
278
243
|
current_layer_tensor = current_layer_obj[0]
|
|
279
244
|
else:
|
|
280
245
|
current_layer_tensor = current_layer_obj
|
|
281
|
-
|
|
282
246
|
every_temp_out[current_layer_name] = current_layer_tensor.detach().numpy().astype(np.float32)
|
|
283
247
|
except:
|
|
284
248
|
if any(af in next_layer_type for af in activation_functions):
|
|
285
249
|
pass
|
|
286
|
-
|
|
287
250
|
else:
|
|
288
251
|
if any(af in current_layer for af in activation_functions) is False:
|
|
289
252
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -291,7 +254,6 @@ class Backtrace(object):
|
|
|
291
254
|
current_layer_tensor = current_layer_obj[0]
|
|
292
255
|
else:
|
|
293
256
|
current_layer_tensor = current_layer_obj
|
|
294
|
-
|
|
295
257
|
every_temp_out[current_layer] = current_layer_tensor.detach().cpu().numpy().astype(np.float32)
|
|
296
258
|
return every_temp_out
|
|
297
259
|
|
|
@@ -299,16 +261,12 @@ class Backtrace(object):
|
|
|
299
261
|
all_out = self.all_out_model(inputs)
|
|
300
262
|
activation_functions = ['relu', 'sigmoid', 'tanh', 'softmax']
|
|
301
263
|
temp_out = {}
|
|
302
|
-
|
|
303
264
|
for i in range(len(all_out)):
|
|
304
|
-
|
|
305
265
|
current_layer, current_layer_obj = all_out[i]
|
|
306
266
|
try:
|
|
307
267
|
next_layer, next_layer_obj = all_out[i + 1]
|
|
308
|
-
|
|
309
268
|
current_layer_name = current_layer
|
|
310
269
|
next_layer_name = next_layer
|
|
311
|
-
|
|
312
270
|
next_layer_type = next_layer_name.lower()
|
|
313
271
|
if any(af in next_layer_type for af in activation_functions):
|
|
314
272
|
if isinstance(next_layer_obj, tuple):
|
|
@@ -316,12 +274,10 @@ class Backtrace(object):
|
|
|
316
274
|
next_layer_tensor = next_layer_obj[0]
|
|
317
275
|
else:
|
|
318
276
|
next_layer_tensor = next_layer_obj
|
|
319
|
-
|
|
320
277
|
temp_out[
|
|
321
278
|
f"{current_layer_name}/{next_layer_name}"] = next_layer_tensor.detach().cpu().numpy().astype(
|
|
322
279
|
np.float32)
|
|
323
280
|
i += 1
|
|
324
|
-
|
|
325
281
|
else:
|
|
326
282
|
if any(af in current_layer_name for af in activation_functions) is False:
|
|
327
283
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -329,12 +285,10 @@ class Backtrace(object):
|
|
|
329
285
|
current_layer_tensor = current_layer_obj[0]
|
|
330
286
|
else:
|
|
331
287
|
current_layer_tensor = current_layer_obj
|
|
332
|
-
|
|
333
288
|
temp_out[current_layer_name] = current_layer_tensor.detach().numpy().astype(np.float32)
|
|
334
289
|
except:
|
|
335
290
|
if any(af in next_layer_type for af in activation_functions):
|
|
336
291
|
pass
|
|
337
|
-
|
|
338
292
|
else:
|
|
339
293
|
if any(af in current_layer for af in activation_functions) is False:
|
|
340
294
|
if isinstance(current_layer_obj, tuple):
|
|
@@ -342,20 +296,20 @@ class Backtrace(object):
|
|
|
342
296
|
current_layer_tensor = current_layer_obj[0]
|
|
343
297
|
else:
|
|
344
298
|
current_layer_tensor = current_layer_obj
|
|
345
|
-
|
|
346
299
|
temp_out[current_layer] = current_layer_tensor.detach().cpu().numpy().astype(np.float32)
|
|
347
|
-
|
|
348
300
|
return temp_out
|
|
349
301
|
|
|
350
302
|
def eval(
|
|
351
303
|
self,
|
|
352
304
|
all_out,
|
|
353
|
-
mode,
|
|
305
|
+
mode="default",
|
|
354
306
|
start_wt=[],
|
|
355
307
|
multiplier=100.0,
|
|
356
308
|
scaler=0,
|
|
357
309
|
max_unit=0,
|
|
358
310
|
predicted_token=None,
|
|
311
|
+
thresholding=0.5,
|
|
312
|
+
task="binary-classification",
|
|
359
313
|
):
|
|
360
314
|
# This method is used for evaluating layer-wise relevance based on different modes.
|
|
361
315
|
if mode == "default":
|
|
@@ -366,10 +320,18 @@ class Backtrace(object):
|
|
|
366
320
|
scaler=0,
|
|
367
321
|
max_unit=0,
|
|
368
322
|
predicted_token=predicted_token,
|
|
323
|
+
thresholding=0.5,
|
|
324
|
+
task="binary-classification",
|
|
369
325
|
)
|
|
370
326
|
return output
|
|
371
327
|
elif mode == "contrast":
|
|
372
|
-
temp_output = self.contrast_eval(
|
|
328
|
+
temp_output = self.contrast_eval(
|
|
329
|
+
all_out=all_out,
|
|
330
|
+
multiplier=multiplier,
|
|
331
|
+
scaler=0,
|
|
332
|
+
thresholding=0.5,
|
|
333
|
+
task="binary-classification",
|
|
334
|
+
)
|
|
373
335
|
output = {}
|
|
374
336
|
for k in temp_output[0].keys():
|
|
375
337
|
output[k] = {}
|
|
@@ -378,7 +340,9 @@ class Backtrace(object):
|
|
|
378
340
|
return output
|
|
379
341
|
|
|
380
342
|
def proportional_eval(
|
|
381
|
-
self, all_out, start_wt=[], multiplier=100.0,
|
|
343
|
+
self, all_out, start_wt=[], multiplier=100.0,
|
|
344
|
+
scaler=0, max_unit=0, predicted_token=None,
|
|
345
|
+
thresholding=0.5, task="binary-classification",
|
|
382
346
|
):
|
|
383
347
|
model_resource = self.model_resource
|
|
384
348
|
activation_dict = self.activation_dict
|
|
@@ -397,7 +361,7 @@ class Backtrace(object):
|
|
|
397
361
|
layer_stack = self.layer_stack
|
|
398
362
|
all_wts = self.model_weights
|
|
399
363
|
else:
|
|
400
|
-
start_wt = UP.calculate_start_wt(all_out[out_layer])
|
|
364
|
+
start_wt = UP.calculate_start_wt(all_out[out_layer],scaler,thresholding,task=task)
|
|
401
365
|
all_wt[out_layer] = start_wt * multiplier
|
|
402
366
|
layer_stack = self.layer_stack
|
|
403
367
|
|
|
@@ -427,11 +391,65 @@ class Backtrace(object):
|
|
|
427
391
|
l1 = model_resource[0][start_layer]
|
|
428
392
|
w1 = l1.state_dict()['weight']
|
|
429
393
|
b1 = l1.state_dict()['bias']
|
|
394
|
+
pad1 = l1.padding
|
|
395
|
+
strides1 = l1.stride
|
|
430
396
|
temp_wt = UP.calculate_wt_conv(
|
|
431
397
|
all_wt[start_layer],
|
|
432
398
|
all_out[child_nodes[0]][0],
|
|
433
399
|
w1,
|
|
434
400
|
b1,
|
|
401
|
+
pad1,
|
|
402
|
+
strides1,
|
|
403
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
404
|
+
)
|
|
405
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
406
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose2d":
|
|
407
|
+
l1 = model_resource[0][start_layer]
|
|
408
|
+
w1 = l1.state_dict()['weight']
|
|
409
|
+
b1 = l1.state_dict()['bias']
|
|
410
|
+
pad1 = l1.padding
|
|
411
|
+
strides1 = l1.stride
|
|
412
|
+
temp_wt = UP.calculate_wt_conv2d_transpose(
|
|
413
|
+
all_wt[start_layer],
|
|
414
|
+
all_out[child_nodes[0]][0],
|
|
415
|
+
w1,
|
|
416
|
+
b1,
|
|
417
|
+
pad1,
|
|
418
|
+
strides1,
|
|
419
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
420
|
+
)
|
|
421
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
422
|
+
elif model_resource[1][start_layer]["class"] == "Conv1d":
|
|
423
|
+
l1 = model_resource[0][start_layer]
|
|
424
|
+
w1 = l1.state_dict()['weight']
|
|
425
|
+
b1 = l1.state_dict()['bias']
|
|
426
|
+
pad1 = l1.padding[0]
|
|
427
|
+
strides1 = l1.stride[0]
|
|
428
|
+
temp_wt = UP.calculate_wt_conv_1d(
|
|
429
|
+
all_wt[start_layer],
|
|
430
|
+
all_out[child_nodes[0]][0],
|
|
431
|
+
w1,
|
|
432
|
+
b1,
|
|
433
|
+
pad1,
|
|
434
|
+
strides1,
|
|
435
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
436
|
+
)
|
|
437
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
438
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose1d":
|
|
439
|
+
l1 = model_resource[0][start_layer]
|
|
440
|
+
w1 = l1.state_dict()['weight']
|
|
441
|
+
b1 = l1.state_dict()['bias']
|
|
442
|
+
pad1 = l1.padding[0]
|
|
443
|
+
strides1 = l1.stride[0]
|
|
444
|
+
dilation1= l1.dilation[0]
|
|
445
|
+
temp_wt = UP.calculate_wt_conv1d_transpose(
|
|
446
|
+
all_wt[start_layer],
|
|
447
|
+
all_out[child_nodes[0]][0],
|
|
448
|
+
w1,
|
|
449
|
+
b1,
|
|
450
|
+
pad1,
|
|
451
|
+
strides1,
|
|
452
|
+
dilation1,
|
|
435
453
|
activation_dict[model_resource[1][start_layer]["name"]],
|
|
436
454
|
)
|
|
437
455
|
all_wt[child_nodes[0]] += temp_wt.T
|
|
@@ -464,6 +482,22 @@ class Backtrace(object):
|
|
|
464
482
|
all_wt[start_layer], all_out[child_nodes[0]][0], (l1.kernel_size, l1.kernel_size)
|
|
465
483
|
)
|
|
466
484
|
all_wt[child_nodes[0]] += temp_wt.T
|
|
485
|
+
elif model_resource[1][start_layer]["class"] == "MaxPool1d":
|
|
486
|
+
l1 = model_resource[0][start_layer]
|
|
487
|
+
pad1 = l1.padding
|
|
488
|
+
strides1 = l1.stride
|
|
489
|
+
temp_wt = UP.calculate_wt_maxpool_1d(
|
|
490
|
+
all_wt[start_layer], all_out[child_nodes[0]][0], l1.kernel_size,pad1,strides1
|
|
491
|
+
)
|
|
492
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
493
|
+
elif model_resource[1][start_layer]["class"] == "AvgPool1d":
|
|
494
|
+
l1 = model_resource[0][start_layer]
|
|
495
|
+
pad1 = l1.padding
|
|
496
|
+
strides1 = l1.stride
|
|
497
|
+
temp_wt = UP.calculate_wt_avgpool_1d(
|
|
498
|
+
all_wt[start_layer], all_out[child_nodes[0]][0], l1.kernel_size,pad1,strides1
|
|
499
|
+
)
|
|
500
|
+
all_wt[child_nodes[0]] += temp_wt.T
|
|
467
501
|
elif model_resource[1][start_layer]["class"] == "Concatenate":
|
|
468
502
|
temp_wt = UP.calculate_wt_concat(
|
|
469
503
|
all_wt[start_layer],
|
|
@@ -511,7 +545,6 @@ class Backtrace(object):
|
|
|
511
545
|
self_attention_weights,
|
|
512
546
|
)
|
|
513
547
|
all_wt[child_nodes[0]] += temp_wt
|
|
514
|
-
|
|
515
548
|
elif model_resource[1][start_layer]["class"] == 'Residual':
|
|
516
549
|
temp_wt = UP.calculate_wt_add(
|
|
517
550
|
all_wt[start_layer],
|
|
@@ -520,11 +553,9 @@ class Backtrace(object):
|
|
|
520
553
|
|
|
521
554
|
for ind, ch in enumerate(child_nodes):
|
|
522
555
|
all_wt[ch] += temp_wt[ind]
|
|
523
|
-
|
|
524
556
|
elif model_resource[1][start_layer]["class"] == 'Feed_Forward':
|
|
525
557
|
weights = all_wts[start_layer]
|
|
526
558
|
feed_forward_weights = HP.rename_feed_forward_keys(weights)
|
|
527
|
-
|
|
528
559
|
temp_wt = UP.calculate_wt_feed_forward(
|
|
529
560
|
all_wt[start_layer],
|
|
530
561
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -535,7 +566,6 @@ class Backtrace(object):
|
|
|
535
566
|
elif model_resource[1][start_layer]["class"] == "Pooler":
|
|
536
567
|
weights = all_wts[start_layer]
|
|
537
568
|
pooler_weights = HP.rename_pooler_keys(weights)
|
|
538
|
-
|
|
539
569
|
temp_wt = UP.calculate_wt_pooler(
|
|
540
570
|
all_wt[start_layer],
|
|
541
571
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -546,7 +576,6 @@ class Backtrace(object):
|
|
|
546
576
|
elif model_resource[1][start_layer]["class"] == "Classifier":
|
|
547
577
|
weights = all_wts[start_layer]
|
|
548
578
|
classifier_weights = HP.rename_classifier_keys(weights)
|
|
549
|
-
|
|
550
579
|
temp_wt = UP.calculate_wt_classifier(
|
|
551
580
|
all_wt[start_layer],
|
|
552
581
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -557,7 +586,6 @@ class Backtrace(object):
|
|
|
557
586
|
elif model_resource[1][start_layer]["class"] == "LM_Head":
|
|
558
587
|
weights = all_wts[start_layer]
|
|
559
588
|
lm_head_weights = HP.rename_decoder_lm_head(weights)
|
|
560
|
-
|
|
561
589
|
temp_wt = UP.calculate_wt_lm_head(
|
|
562
590
|
all_wt[start_layer],
|
|
563
591
|
all_out[child_nodes[0]][0].detach().numpy(),
|
|
@@ -572,7 +600,6 @@ class Backtrace(object):
|
|
|
572
600
|
elif model_resource[1][start_layer]["class"] == 'Cross_Attention':
|
|
573
601
|
weights = all_wts[start_layer]
|
|
574
602
|
cross_attention_weights = HP.rename_cross_attention_keys(weights)
|
|
575
|
-
|
|
576
603
|
temp_wt = UP.calculate_wt_cross_attention(
|
|
577
604
|
all_wt[start_layer],
|
|
578
605
|
[all_out[ch][0].detach().numpy() for ch in child_nodes],
|
|
@@ -582,6 +609,10 @@ class Backtrace(object):
|
|
|
582
609
|
for ind, ch in enumerate(child_nodes):
|
|
583
610
|
all_wt[ch] += temp_wt[ind]
|
|
584
611
|
|
|
612
|
+
elif model_resource[1][start_layer]["class"] == "Embedding":
|
|
613
|
+
temp_wt = all_wt[start_layer]
|
|
614
|
+
temp_wt = np.mean(temp_wt,axis=1)
|
|
615
|
+
all_wt[child_nodes[0]] = all_wt[child_nodes[0]] + temp_wt
|
|
585
616
|
else:
|
|
586
617
|
temp_wt = all_wt[start_layer]
|
|
587
618
|
all_wt[child_nodes[0]] += temp_wt
|
|
@@ -598,14 +629,16 @@ class Backtrace(object):
|
|
|
598
629
|
|
|
599
630
|
return all_wt
|
|
600
631
|
|
|
601
|
-
def contrast_eval(self, all_out, multiplier=100.0
|
|
632
|
+
def contrast_eval(self, all_out, multiplier=100.0,
|
|
633
|
+
scaler=None,thresholding=0.5,
|
|
634
|
+
task="binary-classification"):
|
|
602
635
|
model_resource = self.model_resource
|
|
603
636
|
activation_dict = self.activation_dict
|
|
604
637
|
inputcheck = False
|
|
605
638
|
out_layer = model_resource[2][0]
|
|
606
639
|
all_wt_pos = {}
|
|
607
640
|
all_wt_neg = {}
|
|
608
|
-
start_wt_pos, start_wt_neg = UC.calculate_start_wt(all_out[out_layer])
|
|
641
|
+
start_wt_pos, start_wt_neg = UC.calculate_start_wt(all_out[out_layer],scaler,thresholding,task)
|
|
609
642
|
all_wt_pos[out_layer] = start_wt_pos * multiplier
|
|
610
643
|
all_wt_neg[out_layer] = start_wt_neg * multiplier
|
|
611
644
|
layer_stack = [out_layer]
|
|
@@ -636,16 +669,64 @@ class Backtrace(object):
|
|
|
636
669
|
l1 = model_resource[0][start_layer]
|
|
637
670
|
w1 = l1.state_dict()['weight']
|
|
638
671
|
b1 = l1.state_dict()['bias']
|
|
672
|
+
pad1 = l1.padding
|
|
673
|
+
strides1 = l1.stride
|
|
639
674
|
temp_wt_pos, temp_wt_neg = UC.calculate_wt_conv(
|
|
640
675
|
all_wt_pos[start_layer],
|
|
641
676
|
all_wt_neg[start_layer],
|
|
642
677
|
all_out[child_nodes[0]][0],
|
|
643
678
|
w1,
|
|
644
679
|
b1,
|
|
680
|
+
pad1,
|
|
681
|
+
strides1,
|
|
682
|
+
activation_dict[model_resource[1][start_layer]["name"]],
|
|
683
|
+
)
|
|
684
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
685
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
686
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose2d":
|
|
687
|
+
l1 = model_resource[0][start_layer]
|
|
688
|
+
w1 = l1.state_dict()['weight']
|
|
689
|
+
b1 = l1.state_dict()['bias']
|
|
690
|
+
pad1 = l1.padding
|
|
691
|
+
strides1 = l1.stride
|
|
692
|
+
temp_wt_pos,temp_wt_neg = UC.calculate_wt_conv2d_transpose(
|
|
693
|
+
all_wt_pos[start_layer],
|
|
694
|
+
all_wt_neg[start_layer],
|
|
695
|
+
all_out[child_nodes[0]][0],
|
|
696
|
+
w1,
|
|
697
|
+
b1,
|
|
698
|
+
pad1,
|
|
699
|
+
strides1,
|
|
645
700
|
activation_dict[model_resource[1][start_layer]["name"]],
|
|
646
701
|
)
|
|
647
702
|
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
648
703
|
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
704
|
+
elif model_resource[1][start_layer]["class"] == 'Conv1d':
|
|
705
|
+
l1 = model_resource[0][start_layer]
|
|
706
|
+
w1 = l1.state_dict()['weight']
|
|
707
|
+
b1 = l1.state_dict()['bias']
|
|
708
|
+
pad1 = l1.padding[0]
|
|
709
|
+
strides1 = l1.stride[0]
|
|
710
|
+
temp_wt_pos,temp_wt_neg = UC.calculate_wt_conv_1d(all_wt_pos[start_layer],
|
|
711
|
+
all_wt_neg[start_layer],
|
|
712
|
+
all_out[child_nodes[0]][0],
|
|
713
|
+
w1,b1, pad1, strides1,
|
|
714
|
+
activation_dict[model_resource[1][start_layer]['name']])
|
|
715
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
716
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
717
|
+
elif model_resource[1][start_layer]["class"] == "ConvTranspose1d":
|
|
718
|
+
l1 = model_resource[0][start_layer]
|
|
719
|
+
w1 = l1.state_dict()['weight']
|
|
720
|
+
b1 = l1.state_dict()['bias']
|
|
721
|
+
pad1 = l1.padding[0]
|
|
722
|
+
strides1 = l1.stride[0]
|
|
723
|
+
temp_wt_pos,temp_wt_neg = UC.calculate_wt_conv1d_transpose(all_wt_pos[start_layer],
|
|
724
|
+
all_wt_neg[start_layer],
|
|
725
|
+
all_out[child_nodes[0]][0],
|
|
726
|
+
w1,b1, pad1, strides1,
|
|
727
|
+
activation_dict[model_resource[1][start_layer]['name']])
|
|
728
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
729
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
649
730
|
elif model_resource[1][start_layer]["class"] == "Reshape":
|
|
650
731
|
temp_wt_pos = UC.calculate_wt_rshp(
|
|
651
732
|
all_wt_pos[start_layer], all_out[child_nodes[0]][0]
|
|
@@ -698,6 +779,22 @@ class Backtrace(object):
|
|
|
698
779
|
(l1.kernel_size, l1.kernel_size),
|
|
699
780
|
)
|
|
700
781
|
all_wt_neg[child_nodes[0]] += temp_wt.T
|
|
782
|
+
elif model_resource[1][start_layer]["class"] == "MaxPool1d":
|
|
783
|
+
l1 = model_resource[0][start_layer]
|
|
784
|
+
pad1 = l1.padding
|
|
785
|
+
strides1 = l1.stride
|
|
786
|
+
temp_wt = UC.calculate_wt_maxpool_1d(
|
|
787
|
+
all_wt_pos[start_layer],
|
|
788
|
+
all_out[child_nodes[0]][0],
|
|
789
|
+
l1.kernel_size, pad1, strides1
|
|
790
|
+
)
|
|
791
|
+
all_wt_pos[child_nodes[0]] += temp_wt.T
|
|
792
|
+
temp_wt = UC.calculate_wt_maxpool_1d(
|
|
793
|
+
all_wt_neg[start_layer],
|
|
794
|
+
all_out[child_nodes[0]][0],
|
|
795
|
+
l1.kernel_size, pad1, strides1
|
|
796
|
+
)
|
|
797
|
+
all_wt_neg[child_nodes[0]] += temp_wt.T
|
|
701
798
|
elif model_resource[1][start_layer]["class"] == "AvgPool2d":
|
|
702
799
|
l1 = model_resource[0][start_layer]
|
|
703
800
|
temp_wt_pos, temp_wt_neg = UC.calculate_wt_avgpool(
|
|
@@ -708,6 +805,18 @@ class Backtrace(object):
|
|
|
708
805
|
)
|
|
709
806
|
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
710
807
|
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
808
|
+
elif model_resource[1][start_layer]["class"] == "AvgPool1d":
|
|
809
|
+
l1 = model_resource[0][start_layer]
|
|
810
|
+
pad1 = l1.padding
|
|
811
|
+
strides1 = l1.stride
|
|
812
|
+
temp_wt_pos, temp_wt_neg = UC.calculate_wt_avgpool_1d(
|
|
813
|
+
all_wt_pos[start_layer],
|
|
814
|
+
all_wt_neg[start_layer],
|
|
815
|
+
all_out[child_nodes[0]][0],
|
|
816
|
+
l1.kernel_size, pad1, strides1
|
|
817
|
+
)
|
|
818
|
+
all_wt_pos[child_nodes[0]] += temp_wt_pos.T
|
|
819
|
+
all_wt_neg[child_nodes[0]] += temp_wt_neg.T
|
|
711
820
|
elif model_resource[1][start_layer]["class"] == "Concatenate":
|
|
712
821
|
temp_wt = UC.calculate_wt_concat(
|
|
713
822
|
all_wt_pos[start_layer],
|
|
@@ -757,6 +866,15 @@ class Backtrace(object):
|
|
|
757
866
|
)
|
|
758
867
|
all_wt_pos[child_nodes[0]] = temp_wt_pos
|
|
759
868
|
all_wt_neg[child_nodes[0]] = temp_wt_neg
|
|
869
|
+
elif model_resource[1][start_layer]["class"] == "Embedding":
|
|
870
|
+
temp_wt_pos = all_wt_pos[start_layer]
|
|
871
|
+
temp_wt_neg = all_wt_neg[start_layer]
|
|
872
|
+
|
|
873
|
+
temp_wt_pos = np.mean(temp_wt_pos,axis=1)
|
|
874
|
+
temp_wt_neg = np.mean(temp_wt_neg,axis=1)
|
|
875
|
+
|
|
876
|
+
all_wt_pos[child_nodes[0]] = all_wt_pos[child_nodes[0]] + temp_wt_pos
|
|
877
|
+
all_wt_neg[child_nodes[0]] = all_wt_neg[child_nodes[0]] + temp_wt_neg
|
|
760
878
|
else:
|
|
761
879
|
temp_wt_pos = all_wt_pos[start_layer]
|
|
762
880
|
temp_wt_neg = all_wt_neg[start_layer]
|