PyPI - dl-backtrace - Versions diffs - 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl - Mend

dl-backtrace 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dl-backtrace might be problematic. Click here for more details.

Files changed (27) hide show

dl_backtrace/pytorch_backtrace/backtrace/utils/prop.py CHANGED Viewed

@@ -744,3 +744,484 @@ def calculate_wt_gavgpool(wts, inp):
         temp_wt = temp_wt + ((n_mat / n_sum) * wt * n_agg_wt * -1.0)
         wt_mat[..., c] = temp_wt
     return wt_mat
+####################################################################
+###################    Encoder Model    ####################
+####################################################################
+def stabilize(matrix, epsilon=1e-6):
+    return matrix + epsilon * np.sign(matrix)
+def calculate_relevance_V(wts, value_output):
+    # Initialize wt_mat with zeros
+    wt_mat_V = np.zeros((wts.shape[0], wts.shape[1], *value_output.shape))
+    for i in range(wts.shape[0]):
+        for j in range(wts.shape[1]):
+            l1_ind1 = value_output
+            wt_ind1 = wt_mat_V[i, j]
+            wt = wts[i, j]
+            p_ind = l1_ind1 > 0
+            n_ind = l1_ind1 < 0
+            p_sum = np.sum(l1_ind1[p_ind])
+            n_sum = np.sum(l1_ind1[n_ind]) * -1
+            if p_sum > 0:
+                p_agg_wt = p_sum / (p_sum + n_sum)
+            else:
+                p_agg_wt = 0
+            if n_sum > 0:
+                n_agg_wt = n_sum / (p_sum + n_sum)
+            else:
+                n_agg_wt = 0
+            if p_sum == 0:
+                p_sum = 1
+            if n_sum == 0:
+                n_sum = 1
+            wt_ind1[p_ind] = (l1_ind1[p_ind] / p_sum) * wt * p_agg_wt
+            wt_ind1[n_ind] = (l1_ind1[n_ind] / n_sum) * wt * n_agg_wt * -1.0
+    wt_mat_V = np.sum(wt_mat_V, axis=(0,1))
+    return wt_mat_V
+def calculate_relevance_QK(wts, QK_output):
+    # Initialize wt_mat with zeros
+    wt_mat_QK = np.zeros((wts.shape[0], wts.shape[1], *QK_output.shape))
+    for i in range(wts.shape[0]):
+        for j in range(wts.shape[1]):
+            l1_ind1 = QK_output
+            wt_ind1 = wt_mat_QK[i, j]
+            wt = wts[i, j]
+            p_ind = l1_ind1 > 0
+            n_ind = l1_ind1 < 0
+            p_sum = np.sum(l1_ind1[p_ind])
+            n_sum = np.sum(l1_ind1[n_ind]) * -1
+            t_sum = p_sum - n_sum
+            # This layer has a softmax activation function
+            act = {
+                "name": "softmax",
+                "range": {"l": -1, "u": 2},
+                "type": "mono",
+                "func": None,
+            }
+            if act["type"] == "mono":
+                if act["range"]["l"]:
+                    if t_sum < act["range"]["l"]:
+                        p_sum = 0
+                if act["range"]["u"]:
+                    if t_sum > act["range"]["u"]:
+                        n_sum = 0
+            if p_sum > 0:
+                p_agg_wt = p_sum / (p_sum + n_sum)
+            else:
+                p_agg_wt = 0
+            if n_sum > 0:
+                n_agg_wt = n_sum / (p_sum + n_sum)
+            else:
+                n_agg_wt = 0
+            if p_sum == 0:
+                p_sum = 1
+            if n_sum == 0:
+                n_sum = 1
+            wt_ind1[p_ind] = (l1_ind1[p_ind] / p_sum) * wt * p_agg_wt
+            wt_ind1[n_ind] = (l1_ind1[n_ind] / n_sum) * wt * n_agg_wt * -1.0
+    wt_mat_QK = np.sum(wt_mat_QK, axis=(0, 1))
+    return  wt_mat_QK
+def calculate_wt_self_attention(wts, inp, w):
+    '''
+    Input:
+        wts:  relevance score of the layer
+        inp: input to the layer
+        w: weights of the layer- ['W_q', 'W_k', 'W_v', 'W_o']
+    Outputs:
+        Step-1: outputs = torch.matmul(input_a, input_b)
+        Step-2: outputs = F.softmax(inputs, dim=dim, dtype=dtype)
+        Step-3: outputs = input_a * input_b
+    '''
+    query_output = np.einsum('ij,kj->ik', inp, w['W_q'])
+    key_output = np.einsum('ij,kj->ik', inp, w['W_k'])
+    value_output = np.einsum('ij,kj->ik', inp, w['W_v'])
+    # --------------- Relevance Calculation for Step-3 -----------------------
+    relevance_V = wts / 2
+    relevance_QK = wts / 2
+    # --------------- Relevance Calculation for V --------------------------------
+    wt_mat_V = calculate_relevance_V(relevance_V, value_output)
+    # --------------- Transformed Relevance QK ----------------------------------
+    QK_output = np.einsum('ij,kj->ik', query_output, key_output)
+    wt_mat_QK = calculate_relevance_QK(relevance_QK, QK_output)
+    # --------------- Relevance Calculation for K and Q --------------------------------
+    stabilized_QK_output = stabilize(QK_output * 2)
+    norm_wt_mat_QK = wt_mat_QK / stabilized_QK_output
+    wt_mat_Q = np.einsum('ij,jk->ik', norm_wt_mat_QK, key_output) * query_output
+    wt_mat_K = np.einsum('ij,ik->kj', query_output, norm_wt_mat_QK) * key_output
+    wt_mat = wt_mat_V + wt_mat_K + wt_mat_Q
+    return wt_mat
+def calculate_wt_feed_forward(wts, inp, w):
+    intermediate_output = np.einsum('ij,jk->ik', inp, w['W_int'].T)
+    feed_forward_output = np.einsum('ij,jk->ik', intermediate_output, w['W_out'].T)
+    relevance_input = np.zeros(inp.shape)
+    relevance_out = np.zeros(intermediate_output.shape)
+    # Relevance propagation for 2nd layer
+    for i in range(wts.shape[0]):
+        R2 = wts[i]
+        contribution_matrix2 = np.einsum('ij,j->ij', w['W_out'], intermediate_output[i])
+        wt_mat2 = np.zeros(contribution_matrix2.shape)
+        for j in range(contribution_matrix2.shape[0]):
+            l1_ind1 = contribution_matrix2[j]
+            wt_ind1 = wt_mat2[j]
+            wt = R2[j]
+            p_ind = l1_ind1 > 0
+            n_ind = l1_ind1 < 0
+            p_sum = np.sum(l1_ind1[p_ind])
+            n_sum = np.sum(l1_ind1[n_ind]) * -1
+            if p_sum > 0:
+                p_agg_wt = p_sum / (p_sum + n_sum)
+            else:
+                p_agg_wt = 0
+            if n_sum > 0:
+                n_agg_wt = n_sum / (p_sum + n_sum)
+            else:
+                n_agg_wt = 0
+            if p_sum == 0:
+                p_sum = 1
+            if n_sum == 0:
+                n_sum = 1
+            wt_ind1[p_ind] = (l1_ind1[p_ind] / p_sum) * wt * p_agg_wt
+            wt_ind1[n_ind] = (l1_ind1[n_ind] / n_sum) * wt * n_agg_wt * -1.0
+        relevance_out[i] = wt_mat2.sum(axis=0)
+    # Relevance propagation for 1st layer
+    for i in range(relevance_out.shape[0]):
+        R1 = relevance_out[i]
+        contribution_matrix1 = np.einsum('ij,j->ij', w['W_int'], inp[i])
+        wt_mat1 = np.zeros(contribution_matrix1.shape)
+        for j in range(contribution_matrix1.shape[0]):
+            l1_ind1 = contribution_matrix1[j]
+            wt_ind1 = wt_mat1[j]
+            wt = R1[j]
+            p_ind = l1_ind1 > 0
+            n_ind = l1_ind1 < 0
+            p_sum = np.sum(l1_ind1[p_ind])
+            n_sum = np.sum(l1_ind1[n_ind]) * -1
+            t_sum = p_sum - n_sum
+            # This layer has a ReLU activation function
+            act = {
+                "name": "relu",
+                "range": {"l": 0, "u": None},
+                "type": "mono",
+                "func": None,
+            }
+            if act["type"] == "mono":
+                if act["range"]["l"]:
+                    if t_sum < act["range"]["l"]:
+                        p_sum = 0
+                if act["range"]["u"]:
+                    if t_sum > act["range"]["u"]:
+                        n_sum = 0
+            if p_sum > 0:
+                p_agg_wt = p_sum / (p_sum + n_sum)
+            else:
+                p_agg_wt = 0
+            if n_sum > 0:
+                n_agg_wt = n_sum / (p_sum + n_sum)
+            else:
+                n_agg_wt = 0
+            if p_sum == 0:
+                p_sum = 1
+            if n_sum == 0:
+                n_sum = 1
+            wt_ind1[p_ind] = (l1_ind1[p_ind] / p_sum) * wt * p_agg_wt
+            wt_ind1[n_ind] = (l1_ind1[n_ind] / n_sum) * wt * n_agg_wt * -1.0
+        relevance_input[i] = wt_mat1.sum(axis=0)
+    return relevance_input
+def calculate_wt_classifier(wts, inp, w):
+    '''
+    Input:
+        wts:  relevance score of the layer
+        inp: input to the layer
+        w: weights of the layer- ['W_cls', 'b_cls']
+    '''
+    mul_mat = np.einsum("ij, i->ij", w['W_cls'].T, inp).T
+    wt_mat = np.zeros(mul_mat.shape)
+    for i in range(mul_mat.shape[0]):
+        l1_ind1 = mul_mat[i]
+        wt_ind1 = wt_mat[i]
+        wt = wts[i]
+        p_ind = l1_ind1 > 0
+        n_ind = l1_ind1 < 0
+        p_sum = np.sum(l1_ind1[p_ind])
+        n_sum = np.sum(l1_ind1[n_ind]) * -1
+        if w['b_cls'][i] > 0:
+            pbias = w['b_cls'][i]
+            nbias = 0
+        else:
+            pbias = 0
+            nbias = w['b_cls'][i]
+        t_sum = p_sum + pbias - n_sum - nbias
+        # This layer has a softmax activation function
+        act = {
+            "name": "softmax",
+            "range": {"l": -1, "u": 2},
+            "type": "mono",
+            "func": None,
+        }
+        if act["type"] == "mono":
+            if act["range"]["l"]:
+                if t_sum < act["range"]["l"]:
+                    p_sum = 0
+            if act["range"]["u"]:
+                if t_sum > act["range"]["u"]:
+                    n_sum = 0
+        if p_sum > 0:
+            p_agg_wt = (p_sum + pbias) / (p_sum + n_sum + pbias + nbias)
+            p_agg_wt = p_agg_wt * (p_sum / (p_sum + pbias))
+        else:
+            p_agg_wt = 0
+        if n_sum > 0:
+            n_agg_wt = (n_sum + nbias) / (p_sum + n_sum + pbias + nbias)
+            n_agg_wt = n_agg_wt * (n_sum / (n_sum + nbias))
+        else:
+            n_agg_wt = 0
+        if p_sum == 0:
+            p_sum = 1
+        if n_sum == 0:
+            n_sum = 1
+        wt_ind1[p_ind] = (l1_ind1[p_ind] / p_sum) * wt * p_agg_wt
+        wt_ind1[n_ind] = (l1_ind1[n_ind] / n_sum) * wt * n_agg_wt * -1.0
+    wt_mat = wt_mat.sum(axis=0)
+    return wt_mat
+def calculate_wt_pooler(wts, inp, w):
+    '''
+    Input:
+        wts:  relevance score of the layer
+        inp: input to the layer
+        w: weights of the layer- ['W_p', 'b_p']
+    '''
+    relevance_inp = np.zeros(inp.shape)
+    for i in range(inp.shape[0]):
+        # Compute contribution matrix
+        contribution_matrix = np.einsum('ij,j->ij', w['W_p'], inp[i])
+        wt_mat = np.zeros(contribution_matrix.shape)
+        # Iterate over each unit
+        for j in range(contribution_matrix.shape[0]):
+            l1_ind1 = contribution_matrix[j]
+            wt_ind1 = wt_mat[j]
+            wt = wts[j]
+            p_ind = l1_ind1 > 0
+            n_ind = l1_ind1 < 0
+            p_sum = np.sum(l1_ind1[p_ind])
+            n_sum = np.sum(l1_ind1[n_ind]) * -1
+            # Calculate biases
+            pbias = max(w['b_p'][j], 0)
+            nbias = min(w['b_p'][j], 0) * -1
+            t_sum = p_sum + pbias - n_sum - nbias
+            # This layer has a tanh activation function
+            act = {
+                "name": "tanh",
+                "range": {"l": -2, "u": 2},
+                "type": "mono",
+                "func": None
+            }
+            # Apply activation function constraints
+            if act["type"] == "mono":
+                if act["range"]["l"]:
+                    if t_sum < act["range"]["l"]:
+                        p_sum = 0
+                if act["range"]["u"]:
+                    if t_sum > act["range"]["u"]:
+                        n_sum = 0
+            # Aggregate weights based on positive and negative contributions
+            p_agg_wt = 0
+            n_agg_wt = 0
+            if p_sum > 0:
+                p_agg_wt = (p_sum + pbias) / (p_sum + n_sum + pbias + nbias)
+                p_agg_wt *= (p_sum / (p_sum + pbias))
+            if n_sum > 0:
+                n_agg_wt = (n_sum + nbias) / (p_sum + n_sum + pbias + nbias)
+                n_agg_wt *= (n_sum / (n_sum + nbias))
+            # Prevent division by zero
+            if p_sum == 0:
+                p_sum = 1
+            if n_sum == 0:
+                n_sum = 1
+            # Update weight matrix
+            wt_ind1[p_ind] = (l1_ind1[p_ind] / p_sum) * wt * p_agg_wt
+            wt_ind1[n_ind] = (l1_ind1[n_ind] / n_sum) * wt * n_agg_wt * -1.0
+        # Calculate relevance for each token
+        relevance_inp[i] = wt_mat.sum(axis=0)
+    relevance_inp *= (100 / np.sum(relevance_inp))
+    return relevance_inp
+####################################################################
+###################    Encoder-Decoder Model    ####################
+####################################################################
+def calculate_enc_dec_start_wt(arg, indices):
+    y = np.zeros(arg.shape, dtype=np.float64)
+    value = 1 / arg.shape[0]
+    for i in range(arg.shape[0]):
+        y[i][indices[i]] = value
+    return y
+def calculate_wt_lm_head(wts, inp, w):
+    '''
+    Input:
+        wts:  relevance score of the layer
+        inp: input to the layer
+        w: weights of the layer- ['W_lm_head']
+    '''
+    relevance_input = np.zeros(inp.shape)
+    for i in range(wts.shape[0]):
+        R = wts[i]
+        contribution_matrix = np.einsum('ij,j->ij', w['W_lm_head'], inp[i])
+        wt_mat = np.zeros(contribution_matrix.shape)
+        for j in range(contribution_matrix.shape[0]):
+            l1_ind1 = contribution_matrix[j]
+            wt_ind1 = wt_mat[j]
+            wt = R[j]
+            p_ind = l1_ind1 > 0
+            n_ind = l1_ind1 < 0
+            p_sum = np.sum(l1_ind1[p_ind])
+            n_sum = np.sum(l1_ind1[n_ind]) * -1
+            if p_sum > 0:
+                p_agg_wt = p_sum / (p_sum + n_sum)
+            else:
+                p_agg_wt = 0
+            if n_sum > 0:
+                n_agg_wt = n_sum / (p_sum + n_sum)
+            else:
+                n_agg_wt = 0
+            if p_sum == 0:
+                p_sum = 1
+            if n_sum == 0:
+                n_sum = 1
+            wt_ind1[p_ind] = (l1_ind1[p_ind] / p_sum) * wt * p_agg_wt
+            wt_ind1[n_ind] = (l1_ind1[n_ind] / n_sum) * wt * n_agg_wt * -1.0
+        relevance_input[i] = wt_mat.sum(axis=0)
+    return relevance_input
+def calculate_wt_cross_attention(wts, inp, w):
+    '''
+    Input:
+        wts:  relevance score of the layer
+        inp: input to the layer
+        w: weights of the layer- ['W_q', 'W_k', 'W_v', 'W_o']
+        inputs: dict_keys(['query', 'key', 'value'])
+    Outputs:
+        Step-1: outputs = torch.matmul(input_a, input_b)
+        Step-2: outputs = F.softmax(inputs, dim=dim, dtype=dtype)
+        Step-3: outputs = input_a * input_b
+    '''
+    k_v_inp, q_inp = inp
+    query_output = np.einsum('ij,kj->ik', q_inp, w['W_q'])
+    key_output = np.einsum('ij,kj->ik', k_v_inp, w['W_k'])
+    value_output = np.einsum('ij,kj->ik', k_v_inp, w['W_v'])
+    # --------------- Relevance Calculation for Step-3 -----------------------
+    relevance_V = wts / 2
+    relevance_QK = wts / 2
+    # --------------- Relevance Calculation for V --------------------------------
+    wt_mat_V = calculate_relevance_V(relevance_V, value_output)
+    # --------------- Transformed Relevance QK ----------------------------------
+    QK_output = np.einsum('ij,kj->ik', query_output, key_output)
+    wt_mat_QK = calculate_relevance_QK(relevance_QK, QK_output)
+    # --------------- Relevance Calculation for K and Q --------------------------------
+    stabilized_QK_output = stabilize(QK_output * 2)
+    norm_wt_mat_QK = wt_mat_QK / stabilized_QK_output
+    wt_mat_Q = np.einsum('ij,jk->ik', norm_wt_mat_QK, key_output) * query_output
+    wt_mat_K = np.einsum('ij,ik->kj', query_output, norm_wt_mat_QK) * key_output
+    wt_mat_KV = wt_mat_V + wt_mat_K
+    wt_mat = [wt_mat_KV, wt_mat_Q]
+    return wt_mat

dl_backtrace/tf_backtrace/backtrace/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
 from .backtrace import Backtrace
 from .utils import *
-from .config import *
+from .activation_info import *

dl_backtrace/tf_backtrace/backtrace/activation_info.py ADDED Viewed

@@ -0,0 +1,33 @@
+import numpy as np
+def np_swish(x, beta=0.75):
+    z = 1 / (1 + np.exp(-(beta * x)))
+    return x * z
+activation_master = {'None': {'name': None,
+                      'range': {'l': None, 'u': None},
+                      'type': 'null',
+                      'func': None},
+                     'linear': {'name': None,
+                      'range': {'l': None, 'u': None},
+                      'type': 'mono',
+                      'func': None},
+                     'tanh': {'name': 'tanh',
+                      'range': {'l': -2, 'u': 2},
+                      'type': 'mono',
+                      'func': None},
+                     'sigmoid': {'name': 'sigmoid',
+                      'range': {'l': -4, 'u': 4},
+                      'type': 'mono',
+                      'func': None},
+                     'relu': {'name': 'relu',
+                      'range': {'l': 0, 'u': None},
+                      'type': 'mono',
+                      'func': None},
+                     'swish': {'name': 'swish',
+                      'range': {'l': -6, 'u': None},
+                      'type': 'non_mono',
+                      'func': np_swish},
+                     'softmax': {'name': 'softmax',
+                      'range': {'l': -1, 'u': 2},
+                      'type': 'mono',
+                      'func': None}}

dl-backtrace 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

Potentially problematic release.

dl-backtrace 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl