PyPI - joonmyung - Versions diffs - 1.5.14__tar.gz → 1.5.16__tar.gz - Mend

joonmyung 1.5.14tar.gz → 1.5.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

joonmyung-1.5.16/PKG-INFO ADDED Viewed

@@ -0,0 +1,20 @@
+Metadata-Version: 2.1
+Name: joonmyung
+Version: 1.5.16
+Summary: JoonMyung's Library
+Home-page: https://github.com/pizard/JoonMyung.git
+Author: JoonMyung Choi
+Author-email: pizard@korea.ac.kr
+License: MIT
+License-File: LICENSE.txt
+Requires-Dist: fvcore
+Requires-Dist: timm
+Requires-Dist: torchprofile
+Requires-Dist: thop
+Requires-Dist: wandb
+Requires-Dist: scipy
+Requires-Dist: matplotlib
+Requires-Dist: seaborn
+Requires-Dist: opencv-python
+Requires-Dist: ftfy
+Requires-Dist: regex

{joonmyung-1.5.14 → joonmyung-1.5.16}/joonmyung/analysis/__init__.py RENAMED Viewed

@@ -1,4 +1,3 @@
 from .analysis import *
 from .dataset import *
-from .metric import *
 from .model import *

joonmyung-1.5.16/joonmyung/analysis/analysis.py ADDED Viewed

@@ -0,0 +1,145 @@
+from joonmyung.draw import saliency, overlay, drawImgPlot, unNormalize, drawHeatmap
+from joonmyung.analysis.model import JModel, ZeroShotInference
+from timm.models.vision_transformer import Attention
+from joonmyung.metric import targetPred, accuracy
+from joonmyung.analysis.dataset import JDataset
+from joonmyung.utils import read_classnames
+from joonmyung.meta_data import data2path
+from joonmyung.log import AverageMeter
+import torch.nn.functional as F
+from tqdm import tqdm
+import numpy as np
+import torch
+import cv2
+def anaModel(transformer_class):
+    class VisionTransformer(transformer_class):
+        info_key = []
+        def resetInfo(self):
+            self.info = {n: [] for n in self.info_key}
+        def createHook(self, hooks):
+            [self.info_key.append(hook[3]) for hook in hooks]
+            for name, module in self.named_modules():
+                for idx, hook in enumerate(hooks):
+                    if hook[1] in name and hook[2] not in name:
+                        if hook[0] == "f":
+                            module.register_forward_hook(lambda mod, inp, out, hook_info=hook:
+                                                     self.forward_hook(hook_info, mod, inp, out))
+                        else:
+                            module.register_backward_hook(lambda mod, inp, out, hook_info=hook:
+                                                     self.backward_hook(hook_info, mod, inp, out))
+        def forward_hook(self, hook_info, module, input, output):
+            self.info[hook_info[3]].append(output.detach())
+        def backward_hook(self, hook_info, module, input, output):
+            self.info[hook_info[3]].append(input[0].detach())
+        def forward(self, *args, **kwdargs):
+            self.resetInfo()
+            return super().forward(*args, **kwdargs)
+        def encode_image(self, *args, **kwdargs):
+            self.resetInfo()
+            return super().encode_image(*args, **kwdargs)
+    return VisionTransformer
+def Analysis(model, hook_info= [["f", "attn_drop", "decoder", "attn"]]):
+    model.__class__ = anaModel(model.__class__)
+    model.createHook(hook_info)
+    return model
+if __name__ == '__main__':
+    dataset_name, device, debug = "imagenet", 'cuda', True
+    data_path, num_classes, _, _ = data2path(dataset_name)
+    analysis = [0] # [0] : INPUT TYPE, [0 : SAMPLE + POS, 1 : SAMPLE, 2 : POS]
+    dataset = JDataset(data_path, dataset_name, device=device)
+    data_idxs = [[c, i] for i in range(1000) for c in range(50)]
+    modelMaker = JModel(num_classes, device=device)
+    model = modelMaker.getModel(2, "ViT-B/16")
+    classnames = read_classnames("/hub_data1/joonmyung/data/imagenet/classnames.txt")
+    model = ZeroShotInference(model, classnames, prompt="a photo of a {}.", device=device)
+    hook_info = [["b", "attn_drop", "decoder", "grad"],
+                 ["f", "attn_drop", "decoder", "attn"],
+                 ["f", "ln_pre",  "decoder", "feat_1"],
+                 ["f", "ln_1",    "decoder", "feat_2"],
+                 ["f", "ln_2",    "decoder", "feat_3"],
+                 ["f", "ln_post", "decoder", "feat_4"]]
+    model.model = Analysis(model.model, hook_info)
+    view = [False, False, True, True, True, True]  # [IMG, SALIENCY:ATTN, SALIENCY:OPENCV, SALIENCY:GRAD, ATTN. MOVEMENT]
+    for idx, data_idx in enumerate(data_idxs):
+        print(f"------------------------- [{data_idx[0]}]/[{data_idx[1]}] -------------------------")
+        sample, target, label_name = dataset[data_idx[0], data_idx[1]]
+        sample.requires_grad = True
+        if view[0]:
+            drawImgPlot(unNormalize(sample, "imagenet"))
+        output = model(sample)
+        index = torch.eye(num_classes, device=device)[target]
+        (output * index).sum().backward(retain_graph=True)
+        attns = model.model.info["attn"]
+        grads = model.model.info["grad"]
+        if view[1]:
+            col, discard_ratios, v_ratio, head_fusion, data_from = 12, [0.0], 0.0, "mean", "patch"
+            results = saliency(attns, False, head_fusion=head_fusion, discard_ratios=discard_ratios, data_from=data_from, reshape=True, device=device)
+            data_roll = overlay(sample, results["rollout"], dataset_name)
+            drawImgPlot(data_roll, col=col)
+            data_attn = overlay(sample, results["attentive"], dataset_name)
+            drawImgPlot(data_attn, col=col)
+            data_vidTLDR = overlay(sample, results["vidTLDR"], dataset_name)
+            drawImgPlot(data_vidTLDR, col=col)
+            discard_ratios, v_ratio, head_fusion, data_from = [0.0], 0.1, "mean", "cls"
+            results = saliency(attns, grads, head_fusion=head_fusion, discard_ratios=discard_ratios, data_from=data_from, reshape=True, device=device)
+            data_roll = overlay(sample, results["rollout"], dataset_name)
+            drawImgPlot(data_roll, col=col)
+            data_attn = overlay(sample, results["attentive"], dataset_name)
+            drawImgPlot(data_attn, col=col)
+            data_vidTLDR = overlay(sample, results["vidTLDR"], dataset_name)
+            drawImgPlot(data_vidTLDR, col=col)
+        if view[2]:  # SALIENCY W/ DATA
+            img = (dataset.unNormalize(sample)[0].permute(1, 2, 0).detach().cpu().numpy() * 255)
+            img_saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
+            (success, saliencyMap) = img_saliency.computeSaliency(img)
+            saliencyMap = (saliencyMap * 255).astype("uint8")
+            img_saliency = cv2.saliency.StaticSaliencyFineGrained_create()
+            (success, saliencyFineMap) = img_saliency.computeSaliency(img)
+            threshMap = cv2.threshold((saliencyFineMap * 255).astype("uint8"), 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
+        if view[3]:  # SALIENCY FOR INPUT
+            output = model(sample)
+            attn = torch.stack(attns, dim=1).mean(dim=[2, 3])[0, -2]
+            a = torch.autograd.grad(output[:, 3], sample, retain_graph=True)[0].sum(dim=1)
+            b = F.interpolate(a.unsqueeze(0), scale_factor=1.0, mode='nearest')[0]
+        if view[4]: # ATTENTION MOVEMENT (FROM / TO)
+            attn = torch.stack(attns).mean(dim=2).transpose(0,1) # (8 (B), 12 (L), 197(T_Q), 197(T_K))
+            cls2cls     = attn[:, :, :1, 0].mean(dim=2)              # (8(B), 12(L))
+            patch2cls   = attn[:, :, :1, 1:].mean(dim=2).sum(dim=-1) # (8(B), 12(L))
+            cls2patch   = attn[:, :, 1:, 0].mean(dim=2)
+            patch2patch = attn[:, :, 1:, 1:].mean(dim=2).sum(dim=-1)
+            # to_np(torch.stack([cls2cls.mean(dim=0), patch2cls.mean(dim=0), cls2patch.mean(dim=0), patch2patch.mean(dim=0)]))
+        if view[5]:
+            feats = {k: v for k, v in model.model.info if "feat" in k}
+            for name, feat in feats.items():
+                print(f"Feature Position : {name}")
+                image_feat  = (torch.stack(feat)[:, :, 1:] @ model.model.visual.proj) # (1, 1, 196, 512)
+                L = image_feat.shape[0]
+                image_feat = image_feat / image_feat.norm(dim=-1, keepdim=True)
+                text_feat = model.text_features[1][None].t()
+                sim = (image_feat @ text_feat).reshape(L, 14, 14)
+                drawHeatmap(sim, col = L)

joonmyung-1.5.14/joonmyung/analysis/analysis.py → joonmyung-1.5.16/joonmyung/analysis/analysis_bak.py RENAMED Viewed

@@ -1,16 +1,17 @@
-from joonmyung.analysis.dataset import JDataset
-from joonmyung.analysis.model import JModel
 from joonmyung.draw import saliency, overlay, drawImgPlot, unNormalize
-from joonmyung.meta_data import data2path
 from joonmyung.metric import targetPred, accuracy
+from joonmyung.analysis.dataset import JDataset
+from joonmyung.analysis.model import JModel, ZeroShotInference
+from joonmyung.meta_data import data2path
 from joonmyung.log import AverageMeter
-from tqdm import tqdm
-from contextlib import suppress
 import torch.nn.functional as F
+from tqdm import tqdm
 import numpy as np
 import torch
 import cv2
+from joonmyung.utils import read_classnames
 def anaModel(transformer_class):
     class VisionTransformer(transformer_class):
@@ -44,78 +45,46 @@ def anaModel(transformer_class):
     return VisionTransformer
 class Analysis:
-    def __init__(self, model, analysis = [0], activate = [True, False, False, False], detach=True, key_name=None, num_classes = 1000
-                 , cls_start=0, cls_end=1, patch_start=1, patch_end=None, wrapping=False
-                 , amp_autocast=suppress, device="cuda"):
-        # Section A. Model
-        self.num_classes = num_classes
-        self.key_name = key_name
-        if wrapping:
+    def __init__(self, model, analysis = [0], activate = [True, False, False, False], num_classes = 1000, device="cuda"):
+        if sum(analysis):
             model_ = anaModel(model.__class__)
             model.__class__ = model_
             model.analysis = analysis
-        self.model = model
-        self.detach = detach
-        # Section B. Attention
-        self.kwargs_roll = {"cls_start" : cls_start, "cls_end" : cls_end,
-                            "patch_start" : patch_start, "patch_end" : patch_end}
-        # Section C. Setting
+        self.num_classes = num_classes
+        self.model = model.to(device)
         hooks = [{"name_i": 'attn_drop', "name_o": 'decoder', "fn_f": self.attn_forward, "fn_b": self.attn_backward},
-                 {"name_i": 'qkv', "name_o": 'decoder', "fn_f": self.qkv_forward, "fn_b": self.qkv_backward},
-                 {"name_i": 'head', "name_o": 'decoder', "fn_f": self.head_forward, "fn_b": self.head_backward},
-                 {"name_i": 'patch_embed.norm', "name_o": 'decoder', "fn_f": self.input_forward, "fn_b": self.input_backward}]
-        self.activate = activate
-        self.amp_autocast = amp_autocast
-        self.device       = device
+                 {"name_i": 'qkv', "name_o": 'decoder', "fn_f": self.qkv_forward, "fn_b": None},
+                 {"name_i": 'head', "name_o": 'decoder', "fn_f": self.head_forward, "fn_b": None},
+                 {"name_i": 'patch_embed.norm', "name_o": 'decoder', "fn_f": self.input_forward, "fn_b": None}]
         for name, module in self.model.named_modules():
-            for hook in hooks:
-                if hook["name_i"] in name and hook["name_o"] not in name:
-                    module.register_forward_hook(hook["fn_f"])
-                    module.register_backward_hook(hook["fn_b"])
-        self.resetInfo()
+            for idx, hook in enumerate(hooks):
+                if hook["name_i"] in name and hook["name_o"] not in name and activate[idx]:
+                    if hook["fn_f"]: module.register_forward_hook(hook["fn_f"])
+                    if hook["fn_b"]: module.register_backward_hook(hook["fn_b"])
-    def attn_forward(self, module, input, output):
-        # input/output : 1 * (8, 3, 197, 197) / (8, 3, 197, 197)
-        if self.activate[0]: self.info["attn"]["f"].append(output.detach())
+    def attn_forward(self, module, input, output): # input/output : 1 * (8, 3, 197, 197) / (8, 3, 197, 197)
+        self.info["attn"]["f"].append(output.detach())
-    def attn_backward(self, module, grad_input, grad_output):
-        # input/output : 1 * (8, 3, 197, 192) / (8, 3, 197, 576)
-        if self.activate[0]: self.info["attn"]["b"].append(grad_input[0].detach())
+    def attn_backward(self, module, grad_input, grad_output): # # input/output : 1 * (8, 3, 197, 192) / (8, 3, 197, 576)
+        self.info["attn"]["b"].append(grad_input[0].detach())
-    def qkv_forward(self, module, input, output):
-        # input/output : 1 * (8, 197, 192) / (8, 197, 576)
-        if self.activate[1]: self.info["qkv"]["f"].append(output.detach())
+    def qkv_forward(self, module, input, output): # # input/output : 1 * (8, 197, 192) / (8, 197, 576)
+        self.info["qkv"]["f"].append(output.detach())
-    def qkv_backward(self, module, grad_input, grad_output):
-        self.info["qkv"]["b"].append(grad_input[0].detach())
-       # pass
+    def head_forward(self, module, input, output): # input : 1 * (8(B), 192(D)), output : (8(B), 1000(C))
+        B = output.shape[0]
+        pred = targetPred(output, self.targets, topk=5)
+        self.info["head"]["TF"] += (pred[:, 0] == pred[:, 1])
-    def head_forward(self, module, input, output):
-        # input : 1 * (8(B), 192(D)), output : (8(B), 1000(C))
-        if self.activate[2]:
-            B = output.shape[0]
-            pred = targetPred(output, self.targets, topk=5)
-            self.info["head"]["TF"] += (pred[:, 0] == pred[:, 1])
-            acc1, acc5 = accuracy(output, self.targets, topk=(1,5))
-            self.info["head"]["acc1"].update(acc1.item(), n=B)
-            self.info["head"]["acc5"].update(acc5.item(), n=B)
-    def head_backward(self, module, grad_input, grad_output):
-        pass
+        acc1, acc5 = accuracy(output, self.targets, topk=(1,5))
+        self.info["head"]["acc1"].update(acc1.item(), n=B)
+        self.info["head"]["acc5"].update(acc5.item(), n=B)
     def input_forward(self, module, input, output):
-        if self.activate[3]:
-            norm = F.normalize(output, dim=-1)
-            self.info["input"]["sim"] += (norm @ norm.transpose(-1, -2)).mean(dim=(-1, -2))
-    def input_backward(self, module, grad_input, grad_output):
-        pass
+        norm = F.normalize(output, dim=-1)
+        self.info["input"]["sim"] += (norm @ norm.transpose(-1, -2)).mean(dim=(-1, -2))
     def resetInfo(self):
         self.info = {"attn" : {"f": [], "b": []},
@@ -151,7 +120,7 @@ class Analysis:
             self.info["attn"]["b"] = []
             self.model.zero_grad()
             if index == None: index = output.max(dim=1)[1]
-            index = torch.eye(self.num_classes, device=self.device)[index]
+            index = torch.eye(self.num_classes, device=device)[index]
             loss = (output * index).sum()
             loss.backward(retain_graph=True)
             grad = self.info["attn"]["b"]
@@ -162,31 +131,27 @@ class Analysis:
 if __name__ == '__main__':
-    # Section A. Data
-    dataset_name, device, amp_autocast, debug = "imagenet", 'cuda', torch.cuda.amp.autocast, True
+    dataset_name, device, debug = "imagenet", 'cuda', True
     data_path, num_classes, _, _ = data2path(dataset_name)
-    view, activate = [False, True, False, False, True], [True, False, False]
-        # VIEW     : IMG, SALIENCY:ATTN, SALIENCY:OPENCV, SALIENCY:GRAD, ATTN. MOVEMENT
-        # ACTIVATE : ATTN, QKV, HEAD
+    activate = [True, False, False, False]   # [ATTN, QKV, HEAD]
     analysis = [0] # [0] : INPUT TYPE, [0 : SAMPLE + POS, 1 : SAMPLE, 2 : POS]
     dataset = JDataset(data_path, dataset_name, device=device)
-    # data_idxs = [[c, i] for i in range(1000) for c in range(50)]
-    data_idxs = [[1, 0]]
+    data_idxs = [[c, i] for i in range(1000) for c in range(50)]
-    # Section B. Model
-    model_number, model_name = 0, "deit_tiny_patch16_224" # deit, vit | tiny, small, base
-    # model_number, model_name = 1, "deit_tiny_patch16_224"
-    # Section C. Setting
     modelMaker = JModel(num_classes, device=device)
-    model = modelMaker.getModel(model_number, model_name)
+    model = modelMaker.getModel(2, "ViT-B/16")
+    classnames = read_classnames("/hub_data1/joonmyung/data/imagenet/classnames.txt")
+    model = ZeroShotInference(model, classnames, prompt="a photo of a {}.", device=device)
     model = Analysis(model, analysis = analysis, activate = activate, device=device)
+    view = [False, True, False, False, True]  # [IMG, SALIENCY:ATTN, SALIENCY:OPENCV, SALIENCY:GRAD, ATTN. MOVEMENT]
     for idx, data_idx in enumerate(data_idxs):
         print(f"------------------------- [{data_idx[0]}]/[{data_idx[1]}] -------------------------")
         sample, target, label_name = dataset[data_idx[0], data_idx[1]]
-        # sample, _, img, _ = dataset.getItemPath('/hub_data1/joonmyung/data/imagenet/train/n01440764/n01440764_39.JPEG')
         output = model(sample)
         if view[0]:
             drawImgPlot(unNormalize(sample, "imagenet"))
@@ -219,22 +184,6 @@ if __name__ == '__main__':
             data_vidTLDR = overlay(sample, results["vidTLDR"], dataset_name)
             drawImgPlot(data_vidTLDR, col=col)
-            print(1)
-            # roll = F.normalize(results["rollout"].reshape(12, 196), dim=-1)
-            # datas_rollout = overlay(sample, rollout,   dataset_name)
-            # drawImgPlot(datas_rollout, col=col)
-            # datas_attn = overlay(sample, attentive, dataset_name)
-            # drawImgPlot(datas_attn, col=col)
-            # a = attentive[5]
-            # b = torch.stack([a.clamp(max=a.quantile(1 - v_ratio)) for v_ratio in [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55]])
-            # datas_attn    = overlay(sample, b, dataset_name)
-            # drawImgPlot(datas_attn, col=col)
-            # print(1)
         if view[2]:  # SALIENCY W/ DATA
             img = np.array(dataset[data_idx[0], data_idx[1], 2][0])
@@ -253,12 +202,8 @@ if __name__ == '__main__':
             output = model(sample)
             attn = torch.stack(model.info["attn"]["f"], dim=1).mean(dim=[2,3])[0,-2]
             topK = attn[1:].topk(k, -1, True)[1]
-            # a = torch.autograd.grad(attn.sum(), samples, retain_graph=True)[0].sum(dim=1)
             a = torch.autograd.grad(output[:,3], sample, retain_graph=True)[0].sum(dim=1)
             b = F.interpolate(a.unsqueeze(0), scale_factor=0.05, mode='nearest')[0]
-            # drawHeatmap(b)
-            print(1)
-            # to_np(torch.stack([attn[:, :, 0], attn[:, :, 1:].sum(dim=-1)], -1)[0])
         if view[4]: # ATTENTION MOVEMENT (FROM / TO)
             attn = torch.stack(model.info["attn"]["f"]).mean(dim=2).transpose(0,1) # (8 (B), 12 (L), 197(T_Q), 197(T_K))
@@ -270,5 +215,4 @@ if __name__ == '__main__':
             # PATCH가 얼마나 참고하는지
             cls2patch   = attn[:, :, 1:, 0].mean(dim=2)
             patch2patch = attn[:, :, 1:, 1:].mean(dim=2).sum(dim=-1)
-            # to_np(torch.stack([cls2cls.mean(dim=0), patch2cls.mean(dim=0), cls2patch.mean(dim=0), patch2patch.mean(dim=0)]))
-            print(1)
+            # to_np(torch.stack([cls2cls.mean(dim=0), patch2cls.mean(dim=0), cls2patch.mean(dim=0), patch2patch.mean(dim=0)]))

joonmyung-1.5.16/joonmyung/analysis/analysis_/343/205/240/343/205/217.py ADDED Viewed

@@ -0,0 +1,218 @@
+from joonmyung.draw import saliency, overlay, drawImgPlot, unNormalize
+from joonmyung.metric import targetPred, accuracy
+from joonmyung.analysis.dataset import JDataset
+from joonmyung.analysis.model import JModel, ZeroShotInference
+from joonmyung.meta_data import data2path
+from joonmyung.log import AverageMeter
+import torch.nn.functional as F
+from tqdm import tqdm
+import numpy as np
+import torch
+import cv2
+from joonmyung.utils import read_classnames
+def anaModel(transformer_class):
+    class VisionTransformer(transformer_class):
+        def forward_features(self, x):
+            x = self.patch_embed(x)
+            if hasattr(self, "cls_token"):
+                cls_token = self.cls_token.expand(x.shape[0], -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+                x = torch.cat((cls_token, x), dim=1)
+            if self.analysis[0] == 1:   # PATCH
+                x = x # (8, 197, 192)
+            elif self.analysis[0] == 2: # POS
+                x = self.pos_embed # (1, 197, 192)
+            elif self.analysis[0] == 3:  # PATCH (RANDOM I) + POS
+                x = torch.rand_like(self.pos_embed, device=x.device) + self.pos_embed
+            elif self.analysis[0] == 4:  # PATCH (RANDOM II) + POS
+                x = torch.rand_like(self.cls_token, device=x.device).repeat(1, x.shape[1], 1) + self.pos_embed
+            else: # PATCH + POS
+                x = x + self.pos_embed
+            x = self.pos_drop(x)
+            x = self.blocks(x)
+            x = self.norm(x)
+            if hasattr(self, "cls_token") and hasattr(self, "cls_token"):
+                return x[:, 0], x[:, 1]
+            elif hasattr(self, "cls_token"):
+                return self.pre_logits(x[:, 0])
+            else:
+                return self.pre_logits(x.mean(dim=1))
+    return VisionTransformer
+class Analysis:
+    def __init__(self, model, analysis = [0], activate = [True, False, False, False], num_classes = 1000, device="cuda"):
+        if sum(analysis):
+            model_ = anaModel(model.__class__)
+            model.__class__ = model_
+            model.analysis = analysis
+        self.num_classes = num_classes
+        self.model = model.to(device)
+        hooks = [{"name_i": 'attn_drop', "name_o": 'decoder', "fn_f": self.attn_forward, "fn_b": self.attn_backward},
+                 {"name_i": 'qkv', "name_o": 'decoder', "fn_f": self.qkv_forward, "fn_b": None},
+                 {"name_i": 'head', "name_o": 'decoder', "fn_f": self.head_forward, "fn_b": None},
+                 {"name_i": 'patch_embed.norm', "name_o": 'decoder', "fn_f": self.input_forward, "fn_b": None}]
+        for name, module in self.model.named_modules():
+            for idx, hook in enumerate(hooks):
+                if hook["name_i"] in name and hook["name_o"] not in name and activate[idx]:
+                    if hook["fn_f"]: module.register_forward_hook(hook["fn_f"])
+                    if hook["fn_b"]: module.register_backward_hook(hook["fn_b"])
+    def attn_forward(self, module, input, output): # input/output : 1 * (8, 3, 197, 197) / (8, 3, 197, 197)
+        self.info["attn"]["f"].append(output.detach())
+    def attn_backward(self, module, grad_input, grad_output): # # input/output : 1 * (8, 3, 197, 192) / (8, 3, 197, 576)
+        self.info["attn"]["b"].append(grad_input[0].detach())
+    def qkv_forward(self, module, input, output): # # input/output : 1 * (8, 197, 192) / (8, 197, 576)
+        self.info["qkv"]["f"].append(output.detach())
+    def head_forward(self, module, input, output): # input : 1 * (8(B), 192(D)), output : (8(B), 1000(C))
+        B = output.shape[0]
+        pred = targetPred(output, self.targets, topk=5)
+        self.info["head"]["TF"] += (pred[:, 0] == pred[:, 1])
+        acc1, acc5 = accuracy(output, self.targets, topk=(1,5))
+        self.info["head"]["acc1"].update(acc1.item(), n=B)
+        self.info["head"]["acc5"].update(acc5.item(), n=B)
+    def input_forward(self, module, input, output):
+        norm = F.normalize(output, dim=-1)
+        self.info["input"]["sim"] += (norm @ norm.transpose(-1, -2)).mean(dim=(-1, -2))
+    def resetInfo(self):
+        self.info = {"attn" : {"f": [], "b": []},
+                     "qkv"  : {"f": [], "b": []},
+                     "head" : {"acc1" : AverageMeter(),
+                               "acc5" : AverageMeter(),
+                               "TF"   : [], "pred" : []},
+                     "input": {"sim" : []}
+                     }
+    def __call__(self, samples, targets = None, **kwargs):
+        self.resetInfo()
+        self.model.zero_grad()
+        self.model.eval()
+        if type(samples) == torch.Tensor:
+            self.targets = targets
+            outputs = self.model(samples, **kwargs)
+            return outputs
+        else:
+            for sample, targets in tqdm(samples):
+                self.targets = targets
+                _ = self.model(sample)
+            return False
+    def anaSaliency(self, attn=True, grad=False, output=None, index=None,
+                    head_fusion="mean", discard_ratios=[0.], data_from="cls",
+                    reshape=False, activate= [True, True, False], device="cuda"):
+        if attn:
+            attn = self.info["attn"]["f"]
+        if grad:
+            self.info["attn"]["b"] = []
+            self.model.zero_grad()
+            if index == None: index = output.max(dim=1)[1]
+            index = torch.eye(self.num_classes, device=device)[index]
+            loss = (output * index).sum()
+            loss.backward(retain_graph=True)
+            grad = self.info["attn"]["b"]
+        return saliency(attn, grad, activate=activate,
+                        head_fusion=head_fusion, discard_ratios=discard_ratios, data_from=data_from,
+                        reshape=reshape, device=device)
+if __name__ == '__main__':
+    dataset_name, device, debug = "imagenet", 'cuda', True
+    data_path, num_classes, _, _ = data2path(dataset_name)
+    activate = [True, False, False, False]   # [ATTN, QKV, HEAD]
+    analysis = [0] # [0] : INPUT TYPE, [0 : SAMPLE + POS, 1 : SAMPLE, 2 : POS]
+    dataset = JDataset(data_path, dataset_name, device=device)
+    data_idxs = [[c, i] for i in range(1000) for c in range(50)]
+    modelMaker = JModel(num_classes, device=device)
+    model = modelMaker.getModel(2, "ViT-B/16")
+    classnames = read_classnames("/hub_data1/joonmyung/data/imagenet/classnames.txt")
+    model = ZeroShotInference(model, classnames, prompt="a photo of a {}.", device=device)
+    model = Analysis(model, analysis = analysis, activate = activate, device=device)
+    view = [False, True, False, False, True]  # [IMG, SALIENCY:ATTN, SALIENCY:OPENCV, SALIENCY:GRAD, ATTN. MOVEMENT]
+    for idx, data_idx in enumerate(data_idxs):
+        print(f"------------------------- [{data_idx[0]}]/[{data_idx[1]}] -------------------------")
+        sample, target, label_name = dataset[data_idx[0], data_idx[1]]
+        output = model(sample)
+        if view[0]:
+            drawImgPlot(unNormalize(sample, "imagenet"))
+        if view[1]: # SALIENCY W/ MODEL
+            col, discard_ratios, v_ratio, head_fusion, data_from = 12, [0.0], 0.0, "mean", "patch"  # Attention, Gradient
+            results = model.anaSaliency(True, False, output, discard_ratios=discard_ratios,
+                                                   head_fusion  = head_fusion, index=target, data_from=data_from,
+                                                   reshape      = True, activate=[True, True, True]) # (12(L), 8(B), 14(H), 14(W))
+            data_roll = overlay(sample, results["rollout"], dataset_name)
+            drawImgPlot(data_roll, col=col)
+            data_attn    = overlay(sample, results["attentive"], dataset_name)
+            drawImgPlot(data_attn, col=col)
+            data_vidTLDR = overlay(sample, results["vidTLDR"], dataset_name)
+            drawImgPlot(data_vidTLDR, col=col)
+            discard_ratios, v_ratio, head_fusion, data_from = [0.0], 0.1, "mean", "cls"
+            results = model.anaSaliency(True, False, output, discard_ratios=discard_ratios,
+                                        head_fusion=head_fusion, index=target, data_from=data_from,
+                                        reshape=True, activate=[True, True, True])  # (12(L), 8(B), 14(H), 14(W))
+            data_roll = overlay(sample, results["rollout"], dataset_name)
+            drawImgPlot(data_roll, col=col)
+            data_attn = overlay(sample, results["attentive"], dataset_name)
+            drawImgPlot(data_attn, col=col)
+            data_vidTLDR = overlay(sample, results["vidTLDR"], dataset_name)
+            drawImgPlot(data_vidTLDR, col=col)
+        if view[2]:  # SALIENCY W/ DATA
+            img = np.array(dataset[data_idx[0], data_idx[1], 2][0])
+            saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
+            (success, saliencyMap) = saliency.computeSaliency(img)
+            saliencyMap = (saliencyMap * 255).astype("uint8")
+            saliency = cv2.saliency.StaticSaliencyFineGrained_create()
+            (success, saliencyFineMap) = saliency.computeSaliency(img)
+            threshMap = cv2.threshold((saliencyFineMap * 255).astype("uint8"), 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
+            # plt.imshow(threshMap)
+            # plt.show()
+        if view[3]:  # SALIENCY FOR INPUT
+            sample.requires_grad, model.detach, k = True, False, 3
+            output = model(sample)
+            attn = torch.stack(model.info["attn"]["f"], dim=1).mean(dim=[2,3])[0,-2]
+            topK = attn[1:].topk(k, -1, True)[1]
+            a = torch.autograd.grad(output[:,3], sample, retain_graph=True)[0].sum(dim=1)
+            b = F.interpolate(a.unsqueeze(0), scale_factor=0.05, mode='nearest')[0]
+        if view[4]: # ATTENTION MOVEMENT (FROM / TO)
+            attn = torch.stack(model.info["attn"]["f"]).mean(dim=2).transpose(0,1) # (8 (B), 12 (L), 197(T_Q), 197(T_K))
+            # CLS가 얼마나 참고하는지
+            cls2cls     = attn[:, :, :1, 0].mean(dim=2)              # (8(B), 12(L))
+            patch2cls   = attn[:, :, :1, 1:].mean(dim=2).sum(dim=-1) # (8(B), 12(L))
+            # PATCH가 얼마나 참고하는지
+            cls2patch   = attn[:, :, 1:, 0].mean(dim=2)
+            patch2patch = attn[:, :, 1:, 1:].mean(dim=2).sum(dim=-1)
+            # to_np(torch.stack([cls2cls.mean(dim=0), patch2cls.mean(dim=0), cls2patch.mean(dim=0), patch2patch.mean(dim=0)]))

joonmyung 1.5.14__tar.gz → 1.5.16__tar.gz

joonmyung 1.5.14tar.gz → 1.5.16tar.gz