PyPI - hjxdl - Versions diffs - 0.0.1__py3-none-any.whl - Mend

hjxdl 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

hdl/__init__.py +0 -0
hdl/_version.py +16 -0
hdl/args/__init__.py +0 -0
hdl/args/loss_args.py +5 -0
hdl/controllers/__init__.py +0 -0
hdl/controllers/al/__init__.py +0 -0
hdl/controllers/al/al.py +0 -0
hdl/controllers/al/dispatcher.py +0 -0
hdl/controllers/al/feedback.py +0 -0
hdl/controllers/explain/__init__.py +0 -0
hdl/controllers/explain/shapley.py +293 -0
hdl/controllers/explain/subgraphx.py +865 -0
hdl/controllers/train/__init__.py +0 -0
hdl/controllers/train/rxn_train.py +219 -0
hdl/controllers/train/train.py +50 -0
hdl/controllers/train/train_ginet.py +316 -0
hdl/controllers/train/trainer_base.py +155 -0
hdl/controllers/train/trainer_iterative.py +389 -0
hdl/data/__init__.py +0 -0
hdl/data/dataset/__init__.py +0 -0
hdl/data/dataset/base_dataset.py +98 -0
hdl/data/dataset/fp/__init__.py +0 -0
hdl/data/dataset/fp/fp_dataset.py +122 -0
hdl/data/dataset/graph/__init__.py +0 -0
hdl/data/dataset/graph/chiral.py +62 -0
hdl/data/dataset/graph/gin.py +255 -0
hdl/data/dataset/graph/molnet.py +362 -0
hdl/data/dataset/loaders/__init__.py +0 -0
hdl/data/dataset/loaders/chiral_graph.py +71 -0
hdl/data/dataset/loaders/collate_funcs/__init__.py +0 -0
hdl/data/dataset/loaders/collate_funcs/fp.py +56 -0
hdl/data/dataset/loaders/collate_funcs/rxn.py +40 -0
hdl/data/dataset/loaders/general.py +23 -0
hdl/data/dataset/loaders/spliter.py +86 -0
hdl/data/dataset/samplers/__init__.py +0 -0
hdl/data/dataset/samplers/chiral.py +19 -0
hdl/data/dataset/seq/__init__.py +0 -0
hdl/data/dataset/seq/rxn_dataset.py +61 -0
hdl/data/dataset/utils.py +31 -0
hdl/data/to_mols.py +0 -0
hdl/features/__init__.py +0 -0
hdl/features/fp/__init__.py +0 -0
hdl/features/fp/features_generators.py +235 -0
hdl/features/graph/__init__.py +0 -0
hdl/features/graph/featurization.py +297 -0
hdl/features/utils/__init__.py +0 -0
hdl/features/utils/utils.py +111 -0
hdl/layers/__init__.py +0 -0
hdl/layers/general/__init__.py +0 -0
hdl/layers/general/gp.py +14 -0
hdl/layers/general/linear.py +641 -0
hdl/layers/graph/__init__.py +0 -0
hdl/layers/graph/chiral_graph.py +230 -0
hdl/layers/graph/gcn.py +16 -0
hdl/layers/graph/gin.py +45 -0
hdl/layers/graph/tetra.py +158 -0
hdl/layers/graph/transformer.py +188 -0
hdl/layers/sequential/__init__.py +0 -0
hdl/metric_loss/__init__.py +0 -0
hdl/metric_loss/loss.py +79 -0
hdl/metric_loss/metric.py +178 -0
hdl/metric_loss/multi_label.py +42 -0
hdl/metric_loss/nt_xent.py +65 -0
hdl/models/__init__.py +0 -0
hdl/models/chiral_gnn.py +176 -0
hdl/models/fast_transformer.py +234 -0
hdl/models/ginet.py +189 -0
hdl/models/linear.py +137 -0
hdl/models/model_dict.py +18 -0
hdl/models/norm_flows.py +33 -0
hdl/models/optim_dict.py +16 -0
hdl/models/rxn.py +63 -0
hdl/models/utils.py +83 -0
hdl/ops/__init__.py +0 -0
hdl/ops/utils.py +42 -0
hdl/optims/__init__.py +0 -0
hdl/optims/nadam.py +86 -0
hdl/utils/__init__.py +0 -0
hdl/utils/chemical_tools/__init__.py +2 -0
hdl/utils/chemical_tools/query_info.py +149 -0
hdl/utils/chemical_tools/sdf.py +20 -0
hdl/utils/database_tools/__init__.py +0 -0
hdl/utils/database_tools/connect.py +28 -0
hdl/utils/general/__init__.py +0 -0
hdl/utils/general/glob.py +21 -0
hdl/utils/schedulers/__init__.py +0 -0
hdl/utils/schedulers/norm_lr.py +108 -0
hjxdl-0.0.1.dist-info/METADATA +19 -0
hjxdl-0.0.1.dist-info/RECORD +91 -0
hjxdl-0.0.1.dist-info/WHEEL +5 -0
hjxdl-0.0.1.dist-info/top_level.txt +1 -0

hdl/layers/graph/chiral_graph.py ADDED Viewed

@@ -0,0 +1,230 @@
+from torch_geometric.nn import MessagePassing
+from torch_geometric.utils import degree
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .tetra import (
+    # get_tetra_update,
+    TETRA_UPDATE_DICT
+)
+class GCNConv(MessagePassing):
+    def __init__(
+        self,
+        # args,
+        hidden_size,
+        tetra,
+        message
+    ):
+        super(GCNConv, self).__init__(aggr='add')
+        self.linear = nn.Linear(hidden_size, hidden_size)
+        self.batch_norm = nn.BatchNorm1d(hidden_size)
+        self.tetra = tetra  # bool
+        if self.tetra:
+            # self.tetra_update = get_tetra_update(args)
+            self.tetra_update = TETRA_UPDATE_DICT[message](hidden_size)
+    def forward(
+        self,
+        x,
+        edge_index,
+        edge_attr,
+        parity_atoms
+    ):
+        # no edge updates
+        x = self.linear(x)
+        # Compute normalization
+        row, col = edge_index
+        deg = degree(col, x.size(0), dtype=x.dtype) + 1
+        deg_inv_sqrt = deg.pow(-0.5)
+        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
+        x_new = self.propagate(edge_index, x=x, edge_attr=edge_attr, norm=norm)
+        if self.tetra:
+            tetra_ids = parity_atoms.nonzero().squeeze(1)
+            if tetra_ids.nelement() != 0:
+                x_new[tetra_ids] = self.tetra_message(x, edge_index, edge_attr, tetra_ids, parity_atoms)
+        x = x_new + F.relu(x)
+        return self.batch_norm(x), edge_attr
+    def message(self, x_j, edge_attr, norm):
+        return norm.view(-1, 1) * F.relu(x_j + edge_attr)
+    def tetra_message(self, x, edge_index, edge_attr, tetra_ids, parity_atoms):
+        row, col = edge_index
+        tetra_nei_ids = torch.cat([row[col == i].unsqueeze(0) for i in range(x.size(0)) if i in tetra_ids])
+        # calculate pseudo tetra degree aligned with GCN method
+        deg = degree(col, x.size(0), dtype=x.dtype)
+        t_deg = deg[tetra_nei_ids]
+        t_deg_inv_sqrt = t_deg.pow(-0.5)
+        t_norm = 0.5 * t_deg_inv_sqrt.mean(dim=1)
+        # switch entries for -1 rdkit labels
+        ccw_mask = parity_atoms[tetra_ids] == -1
+        tetra_nei_ids[ccw_mask] = tetra_nei_ids.clone()[ccw_mask][:, [1, 0, 2, 3]]
+        # calculate reps
+        edge_ids = torch.cat([tetra_nei_ids.view(1, -1), tetra_ids.repeat_interleave(4).unsqueeze(0)], dim=0)
+        # dense_edge_attr = to_dense_adj(edge_index, batch=None, edge_attr=edge_attr).squeeze(0)
+        # edge_reps = dense_edge_attr[edge_ids[0], edge_ids[1], :].view(tetra_nei_ids.size(0), 4, -1)
+        attr_ids = [torch.where((a == edge_index.t()).all(dim=1))[0] for a in edge_ids.t()]
+        edge_reps = edge_attr[attr_ids, :].view(tetra_nei_ids.size(0), 4, -1)
+        reps = x[tetra_nei_ids] + edge_reps
+        return t_norm.unsqueeze(-1) * self.tetra_update(reps)
+class GINEConv(MessagePassing):
+    def __init__(
+        self,
+        # args,
+        hidden_size,
+        tetra,
+        message
+    ):
+        super(GINEConv, self).__init__(aggr="add")
+        self.eps = nn.Parameter(torch.Tensor([0]))
+        self.mlp = nn.Sequential(nn.Linear(hidden_size, 2 * hidden_size),
+                                 nn.BatchNorm1d(2 * hidden_size),
+                                 nn.ReLU(),
+                                 nn.Linear(2 * hidden_size, hidden_size))
+        self.batch_norm = nn.BatchNorm1d(hidden_size)
+        self.tetra = tetra
+        if self.tetra:
+            # self.tetra_update = get_tetra_update(args)
+            self.tetra_update = TETRA_UPDATE_DICT[message](hidden_size)
+    def forward(self, x, edge_index, edge_attr, parity_atoms):
+        # no edge updates
+        x_new = self.propagate(edge_index, x=x, edge_attr=edge_attr)
+        if self.tetra:
+            tetra_ids = parity_atoms.nonzero().squeeze(1)
+            if tetra_ids.nelement() != 0:
+                x_new[tetra_ids] = self.tetra_message(x, edge_index, edge_attr, tetra_ids, parity_atoms)
+        x = self.mlp((1 + self.eps) * x + x_new)
+        return self.batch_norm(x), edge_attr
+    def message(self, x_j, edge_attr):
+        return F.relu(x_j + edge_attr)
+    def tetra_message(self, x, edge_index, edge_attr, tetra_ids, parity_atoms):
+        row, col = edge_index
+        tetra_nei_ids = torch.cat([row[col == i].unsqueeze(0) for i in range(x.size(0)) if i in tetra_ids])
+        # switch entries for -1 rdkit labels
+        ccw_mask = parity_atoms[tetra_ids] == -1
+        tetra_nei_ids[ccw_mask] = tetra_nei_ids.clone()[ccw_mask][:, [1, 0, 2, 3]]
+        # calculate reps
+        edge_ids = torch.cat([tetra_nei_ids.view(1, -1), tetra_ids.repeat_interleave(4).unsqueeze(0)], dim=0)
+        # dense_edge_attr = to_dense_adj(edge_index, batch=None, edge_attr=edge_attr).squeeze(0)
+        # edge_reps = dense_edge_attr[edge_ids[0], edge_ids[1], :].view(tetra_nei_ids.size(0), 4, -1)
+        attr_ids = [torch.where((a == edge_index.t()).all(dim=1))[0] for a in edge_ids.t()]
+        edge_reps = edge_attr[attr_ids, :].view(tetra_nei_ids.size(0), 4, -1)
+        reps = x[tetra_nei_ids] + edge_reps
+        return self.tetra_update(reps)
+class DMPNNConv(MessagePassing):
+    def __init__(
+        self,
+        # args,
+        hidden_size,
+        tetra,
+        message
+    ):
+        super(DMPNNConv, self).__init__(aggr='add')
+        self.lin = nn.Linear(hidden_size, hidden_size)
+        self.mlp = nn.Sequential(nn.Linear(hidden_size, hidden_size),
+                                 nn.BatchNorm1d(hidden_size),
+                                 nn.ReLU())
+        self.tetra = tetra
+        if self.tetra:
+            # self.tetra_update = get_tetra_update(args)
+            self.tetra_update = TETRA_UPDATE_DICT[message](hidden_size)
+    def forward(self, x, edge_index, edge_attr, parity_atoms, parity_bond_index):
+        row, col = edge_index
+        a_message = self.propagate(edge_index, x=None, edge_attr=edge_attr)
+        if self.tetra:
+            tetra_ids = parity_atoms.nonzero().squeeze(1)
+            if tetra_ids.nelement() != 0:
+                a_message[tetra_ids] = self.tetra_message(x, edge_index, edge_attr, tetra_ids, parity_atoms, parity_bond_index)
+        rev_message = torch.flip(edge_attr.view(edge_attr.size(0) // 2, 2, -1), dims=[1]).view(edge_attr.size(0), -1)
+        return a_message, self.mlp(a_message[row] - rev_message)
+    def message(self, x_j, edge_attr):
+        return F.relu(self.lin(edge_attr))
+    def tetra_message(self, x, edge_index, edge_attr, tetra_ids, parity_atoms, parity_bond_index):
+        edge_reps = edge_attr[parity_bond_index, :].view(parity_bond_index.size(0)//4, 4, -1)
+        return self.tetra_update(edge_reps)
+        # print('1')
+        row, col = edge_index
+        col_ids = torch.cat(
+            [(col == i).nonzero() for i in tetra_ids]
+        ).squeeze().unsqueeze(0)
+        tetra_nei_ids = row[col_ids].reshape(-1, 4)
+        # tetra_nei_ids = torch.cat([
+        #     row[col == i].unsqueeze(0)
+        #     for i in tetra_ids
+        # ])
+        # print('2')
+        # switch entries for -1 rdkit labels
+        ccw_mask = parity_atoms[tetra_ids] == -1
+        tetra_nei_ids[ccw_mask] = tetra_nei_ids.clone()[ccw_mask][:, [1, 0, 2, 3]]
+        # calculate reps
+        edge_ids = torch.cat([tetra_nei_ids.view(1, -1), tetra_ids.repeat_interleave(4).unsqueeze(0)], dim=0)
+        # dense_edge_attr = to_dense_adj(edge_index, batch=None, edge_attr=edge_attr).squeeze(0)
+        # edge_reps = dense_edge_attr[edge_ids[0], edge_ids[1], :].view(tetra_nei_ids.size(0), 4, -1)
+        # edge_index_T = edge_index.t()
+        # edge_ids_T = edge_ids.t()
+        # attr_ids = [
+        #     torch.where(
+        #         (a == edge_index_T).all(dim=1)
+        #     )[0]
+        #     for a in edge_ids_T
+        # ]
+        # attr_ids = torch.cat([(edge_index_T == i).nonzero() for i in edge_ids_T])[:, 0].unique()
+        edge_index_T = edge_index.t()
+        edge_ids_T = edge_ids.t()
+        c0 = torch.cartesian_prod(
+            edge_index_T[:, 0], edge_ids_T[:, 0]
+        )
+        c1 = torch.cartesian_prod(
+            edge_index_T[:, 1], edge_ids_T[:, 1]
+        )
+        diff = torch.abs(c0[:, 0] - c0[:, 1]) \
+            + torch.abs(c1[:, 0] - c1[:, 1])
+        attr_ids = torch.div(
+            (diff == 0).nonzero(as_tuple=True)[0],
+            edge_ids.size(1),
+            rounding_mode='floor'
+        )
+        edge_reps = edge_attr[attr_ids, :].view(tetra_nei_ids.size(0), 4, -1)
+        return self.tetra_update(edge_reps)

hdl/layers/graph/gcn.py ADDED Viewed

@@ -0,0 +1,16 @@
+from torch import nn
+from torch_geometric.nn import GCNConv
+class GraphConv(nn.Module):
+    def __init__(self, num_features, num_out_features):
+        # Init parent
+        super(GraphConv, self).__init__()
+        # GCN layers
+        self.conv = GCNConv(num_features, num_out_features)
+    def forward(self, x, edge_index):
+        hidden = self.conv(x, edge_index)
+        return hidden

hdl/layers/graph/gin.py ADDED Viewed

@@ -0,0 +1,45 @@
+import torch
+from torch import nn
+from torch_geometric.nn import MessagePassing
+from torch_geometric.utils import add_self_loops
+num_atom_type = 119  # including the extra mask tokens
+num_chirality_tag = 3
+num_bond_type = 5  # including aromatic and self-loop edge
+num_bond_direction = 3
+class GINEConv(MessagePassing):
+    def __init__(self, emb_dim):
+        super(GINEConv, self).__init__()
+        self.mlp = nn.Sequential(
+            nn.Linear(emb_dim, 2*emb_dim),
+            nn.ReLU(),
+            nn.Linear(2 * emb_dim, emb_dim)
+        )
+        self.edge_embedding1 = nn.Embedding(num_bond_type, emb_dim)
+        self.edge_embedding2 = nn.Embedding(num_bond_direction, emb_dim)
+        nn.init.xavier_uniform_(self.edge_embedding1.weight.data)
+        nn.init.xavier_uniform_(self.edge_embedding2.weight.data)
+    def forward(self, x, edge_index, edge_attr):
+        # add self loops in the edge space
+        edge_index = add_self_loops(edge_index, num_nodes=x.size(0))[0]
+        # add features corresponding to self-loop edges.
+        self_loop_attr = torch.zeros(x.size(0), 2)
+        self_loop_attr[:,0] = 4 #bond type for self-loop edge
+        self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype)
+        edge_attr = torch.cat((edge_attr, self_loop_attr), dim=0)
+        edge_embeddings = self.edge_embedding1(edge_attr[:,0]) + self.edge_embedding2(edge_attr[:,1])
+        return self.propagate(edge_index, x=x, edge_attr=edge_embeddings)
+    def message(self, x_j, edge_attr):
+        return x_j + edge_attr
+    def update(self, aggr_out):
+        return self.mlp(aggr_out)

hdl/layers/graph/tetra.py ADDED Viewed

@@ -0,0 +1,158 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+class TetraPermuter(nn.Module):
+    def __init__(
+        self,
+        hidden,
+        # device
+    ):
+        super(TetraPermuter, self).__init__()
+        self.W_bs = nn.ModuleList([copy.deepcopy(nn.Linear(hidden, hidden)) for _ in range(4)])
+        # self.device = device
+        self.drop = nn.Dropout(p=0.2)
+        self.reset_parameters()
+        self.mlp_out = nn.Sequential(nn.Linear(hidden, hidden),
+                                     nn.BatchNorm1d(hidden),
+                                     nn.ReLU(),
+                                     nn.Linear(hidden, hidden))
+        self.tetra_perms = torch.tensor([[0, 1, 2, 3],
+                                         [0, 2, 3, 1],
+                                         [0, 3, 1, 2],
+                                         [1, 0, 3, 2],
+                                         [1, 2, 0, 3],
+                                         [1, 3, 2, 0],
+                                         [2, 0, 1, 3],
+                                         [2, 1, 3, 0],
+                                         [2, 3, 0, 1],
+                                         [3, 0, 2, 1],
+                                         [3, 1, 0, 2],
+                                         [3, 2, 1, 0]])
+    def reset_parameters(self):
+        gain = 0.5
+        for W_b in self.W_bs:
+            nn.init.xavier_uniform_(W_b.weight, gain=gain)
+            gain += 0.5
+    def forward(self, x):
+        nei_messages_list = [self.drop(F.tanh(l(t))) for l, t in zip(self.W_bs, torch.split(x[:, self.tetra_perms, :], 1, dim=-2))]
+        nei_messages = torch.sum(self.drop(F.relu(torch.cat(nei_messages_list, dim=-2).sum(dim=-2))), dim=-2)
+        return self.mlp_out(nei_messages / 3.)
+class ConcatTetraPermuter(nn.Module):
+    def __init__(
+        self,
+        hidden,
+        # device
+    ):
+        super(ConcatTetraPermuter, self).__init__()
+        self.W_bs = nn.Linear(hidden * 4, hidden)
+        torch.nn.init.xavier_normal_(self.W_bs.weight, gain=1.0)
+        self.hidden = hidden
+        # self.device = device
+        self.drop = nn.Dropout(p=0.2)
+        self.mlp_out = nn.Sequential(nn.Linear(hidden, hidden),
+                                     nn.BatchNorm1d(hidden),
+                                     nn.ReLU(),
+                                     nn.Linear(hidden, hidden))
+        tetra_perms = torch.tensor([
+            [0, 1, 2, 3],
+            [0, 2, 3, 1],
+            [0, 3, 1, 2],
+            [1, 0, 3, 2],
+            [1, 2, 0, 3],
+            [1, 3, 2, 0],
+            [2, 0, 1, 3],
+            [2, 1, 3, 0],
+            [2, 3, 0, 1],
+            [3, 0, 2, 1],
+            [3, 1, 0, 2],
+            [3, 2, 1, 0]
+        ])
+        self.register_buffer('tetra_perms', tetra_perms)
+    def forward(self, x):
+        nei_messages = self.drop(
+            F.relu(
+                self.W_bs(
+                    x[
+                        :,
+                        self.tetra_perms,
+                        :
+                    ].flatten(start_dim=2)
+                )
+            )
+        )
+        nei_messages_sum = nei_messages.sum(dim=-2) / 3.
+        if nei_messages_sum.size(0) == 1:
+            nei_messages_sum_repeat = torch.repeat_interleave(nei_messages_sum, 2, dim=0)
+            return self.mlp_out(nei_messages_sum_repeat)[:1, ...]
+        return self.mlp_out(nei_messages_sum)
+class TetraDifferencesProduct(nn.Module):
+    def __init__(
+        self,
+        hidden
+    ):
+        super(TetraDifferencesProduct, self).__init__()
+        self.mlp_out = nn.Sequential(nn.Linear(hidden, hidden),
+                                     nn.BatchNorm1d(hidden),
+                                     nn.ReLU(),
+                                     nn.Linear(hidden, hidden))
+        self.register_buffer('indices', torch.arange(4))
+    def forward(self, x):
+        # indices = torch.arange(4).to(x.device)
+        message_tetra_nbs = [
+            x.index_select(dim=1, index=i).squeeze(1)
+            for i in self.indices
+        ]
+        message_tetra = torch.ones_like(message_tetra_nbs[0])
+        # note: this will zero out reps for chiral centers with multiple carbon neighbors on first pass
+        for i in range(4):
+            for j in range(i + 1, 4):
+                message_tetra = torch.mul(message_tetra, (message_tetra_nbs[i] - message_tetra_nbs[j]))
+        message_tetra = torch.sign(message_tetra) * torch.pow(torch.abs(message_tetra) + 1e-6, 1 / 6)
+        return self.mlp_out(message_tetra)
+# def get_tetra_update(
+#     hidden_size,
+#     device,
+#     message,
+# ):
+#     if message == 'tetra_permute':
+#         return TetraPermuter(hidden_size, device)
+#     elif message == 'tetra_permute_concat':
+#         return ConcatTetraPermuter(hidden_size, device)
+#     elif message == 'tetra_pd':
+#         return TetraDifferencesProduct(hidden_size)
+#     else:
+#         raise ValueError("Invalid message type.")
+TETRA_UPDATE_DICT = {
+    'tetra_permute': TetraPermuter,
+    'tetra_permute_concat': ConcatTetraPermuter,
+    'tetra_pd': TetraDifferencesProduct
+}

hdl/layers/graph/transformer.py ADDED Viewed

@@ -0,0 +1,188 @@
+import math
+from typing import Union, Tuple, Optional
+from torch_geometric.typing import PairTensor, Adj, OptTensor
+import torch
+from torch import Tensor
+import torch.nn.functional as F
+from torch.nn import Linear
+from torch_geometric.nn.conv import MessagePassing
+from torch_geometric.utils import softmax
+class TransformerConv(MessagePassing):
+    r"""The graph transformer operator from the `"Masked Label Prediction:
+    Unified Message Passing Model for Semi-Supervised Classification"
+    <https://arxiv.org/abs/2009.03509>`_ paper
+    .. math::
+        \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i +
+        \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \mathbf{W}_2 \mathbf{x}_{j},
+    where the attention coefficients :math:`\alpha_{i,j}` are computed via
+    multi-head dot product attention:
+    .. math::
+        \alpha_{i,j} = \textrm{softmax} \left(
+        \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top} (\mathbf{W}_4\mathbf{x}_j)}
+        {\sqrt{d}} \right)
+    Args:
+        in_channels (int or tuple): Size of each input sample. A tuple
+            corresponds to the sizes of source and target dimensionalities.
+        out_channels (int): Size of each output sample.
+        heads (int, optional): Number of multi-head-attentions.
+            (default: :obj:`1`)
+        concat (bool, optional): If set to :obj:`False`, the multi-head
+            attentions are averaged instead of concatenated.
+            (default: :obj:`True`)
+        beta (bool, optional): If set, will combine aggregation and
+            skip information via
+            .. math::
+                \mathbf{x}^{\prime}_i = \beta_i \mathbf{W}_1 \mathbf{x}_i +
+                (1 - \beta_i) \underbrace{\left(\sum_{j \in \mathcal{N}(i)}
+                \alpha_{i,j} \mathbf{W}_2 \vec{x}_j \right)}_{=\mathbf{m}_i}
+            with :math:`\beta_i = \textrm{sigmoid}(\mathbf{w}_5^{\top}
+            [ \mathbf{x}_i, \mathbf{m}_i, \mathbf{x}_i - \mathbf{m}_i ])`
+            (default: :obj:`False`)
+        dropout (float, optional): Dropout probability of the normalized
+            attention coefficients which exposes each node to a stochastically
+            sampled neighborhood during training. (default: :obj:`0`)
+        edge_dim (int, optional): Edge feature dimensionality (in case
+            there are any). Edge features are added to the keys after
+            linear transformation, that is, prior to computing the
+            attention dot product. They are also added to final values
+            after the same linear transformation. The model is:
+            .. math::
+                \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i +
+                \sum_{j \in \mathcal{N}(i)} \alpha_{i,j} \left(
+                \mathbf{W}_2 \mathbf{x}_{j} + \mathbf{W}_6 \mathbf{e}_{ij}
+                \right),
+            where the attention coefficients :math:`\alpha_{i,j}` are now
+            computed via:
+            .. math::
+                \alpha_{i,j} = \textrm{softmax} \left(
+                \frac{(\mathbf{W}_3\mathbf{x}_i)^{\top}
+                (\mathbf{W}_4\mathbf{x}_j + \mathbf{W}_6 \mathbf{e}_{ij})}
+                {\sqrt{d}} \right)
+            (default :obj:`None`)
+        bias (bool, optional): If set to :obj:`False`, the layer will not learn
+            an additive bias. (default: :obj:`True`)
+        root_weight (bool, optional): If set to :obj:`False`, the layer will
+            not add the transformed root node features to the output and the
+            option  :attr:`beta` is set to :obj:`False`. (default: :obj:`True`)
+        **kwargs (optional): Additional arguments of
+            :class:`torch_geometric.nn.conv.MessagePassing`.
+    """
+    _alpha: OptTensor
+    def __init__(self, in_channels: Union[int, Tuple[int,
+                                                     int]], out_channels: int,
+                 heads: int = 1, concat: bool = True, beta: bool = False,
+                 dropout: float = 0., edge_dim: Optional[int] = None,
+                 bias: bool = True, root_weight: bool = True, **kwargs):
+        kwargs.setdefault('aggr', 'add')
+        super(TransformerConv, self).__init__(node_dim=0, **kwargs)
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.heads = heads
+        self.beta = beta and root_weight
+        self.root_weight = root_weight
+        self.concat = concat
+        self.dropout = dropout
+        self.edge_dim = edge_dim
+        if isinstance(in_channels, int):
+            in_channels = (in_channels, in_channels)
+        self.lin_key = Linear(in_channels[0], heads * out_channels)
+        self.lin_query = Linear(in_channels[1], heads * out_channels)
+        self.lin_value = Linear(in_channels[0], heads * out_channels)
+        if edge_dim is not None:
+            self.lin_edge = Linear(edge_dim, heads * out_channels, bias=False)
+        else:
+            self.lin_edge = self.register_parameter('lin_edge', None)
+        if concat:
+            self.lin_skip = Linear(in_channels[1], heads * out_channels,
+                                   bias=bias)
+            if self.beta:
+                self.lin_beta = Linear(3 * heads * out_channels, 1, bias=False)
+            else:
+                self.lin_beta = self.register_parameter('lin_beta', None)
+        else:
+            self.lin_skip = Linear(in_channels[1], out_channels, bias=bias)
+            if self.beta:
+                self.lin_beta = Linear(3 * out_channels, 1, bias=False)
+            else:
+                self.lin_beta = self.register_parameter('lin_beta', None)
+        self.reset_parameters()
+    def reset_parameters(self):
+        self.lin_key.reset_parameters()
+        self.lin_query.reset_parameters()
+        self.lin_value.reset_parameters()
+        if self.edge_dim:
+            self.lin_edge.reset_parameters()
+        self.lin_skip.reset_parameters()
+        if self.beta:
+            self.lin_beta.reset_parameters()
+    def forward(self, x: Union[Tensor, PairTensor], edge_index: Adj,
+                edge_attr: OptTensor = None):
+        """"""
+        if isinstance(x, Tensor):
+            x: PairTensor = (x, x)
+        # propagate_type: (x: PairTensor, edge_attr: OptTensor)
+        out = self.propagate(edge_index, x=x, edge_attr=edge_attr, size=None)
+        if self.concat:
+            out = out.view(
+                -1,
+                self.heads * self.out_channels
+            )
+        else:
+            out = out.mean(dim=1)
+        if self.root_weight:
+            x_r = self.lin_skip(x[1])
+            if self.lin_beta is not None:
+                beta = self.lin_beta(torch.cat([out, x_r, out - x_r], dim=-1))
+                beta = beta.sigmoid()
+                out = beta * x_r + (1 - beta) * out
+            else:
+                out += x_r
+        return out
+    def message(self, x_i: Tensor, x_j: Tensor, edge_attr: OptTensor,
+                index: Tensor, ptr: OptTensor,
+                size_i: Optional[int]) -> Tensor:
+        query = self.lin_query(x_i).view(-1, self.heads, self.out_channels)
+        key = self.lin_key(x_j).view(-1, self.heads, self.out_channels)
+        if self.lin_edge is not None:
+            assert edge_attr is not None
+            edge_attr = self.lin_edge(edge_attr).view(-1, self.heads,
+                                                      self.out_channels)
+            key += edge_attr
+        alpha = (query * key).sum(dim=-1) / math.sqrt(self.out_channels)
+        alpha = softmax(alpha, index, ptr, size_i)
+        alpha = F.dropout(alpha, p=self.dropout, training=self.training)
+        out = self.lin_value(x_j).view(-1, self.heads, self.out_channels)
+        if edge_attr is not None:
+            out += edge_attr
+        out *= alpha.view(-1, self.heads, 1)
+        return out
+    def __repr__(self):
+        return '{}({}, {}, heads={})'.format(
+            self.__class__.__name__,
+            self.in_channels,
+            self.out_channels,
+            self.heads
+        )

hdl/layers/sequential/__init__.py ADDED Viewed

File without changes

hdl/metric_loss/__init__.py ADDED Viewed

File without changes