PyPI - glam4cm - Versions diffs - 0.1.0__py3-none-any.whl - Mend

glam4cm 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

glam4cm/__init__.py +9 -0
glam4cm/data_loading/__init__.py +0 -0
glam4cm/data_loading/data.py +631 -0
glam4cm/data_loading/encoding.py +76 -0
glam4cm/data_loading/graph_dataset.py +940 -0
glam4cm/data_loading/metadata.py +84 -0
glam4cm/data_loading/models_dataset.py +361 -0
glam4cm/data_loading/utils.py +20 -0
glam4cm/downstream_tasks/__init__.py +0 -0
glam4cm/downstream_tasks/bert_edge_classification.py +144 -0
glam4cm/downstream_tasks/bert_graph_classification.py +137 -0
glam4cm/downstream_tasks/bert_graph_classification_comp.py +156 -0
glam4cm/downstream_tasks/bert_link_prediction.py +145 -0
glam4cm/downstream_tasks/bert_node_classification.py +164 -0
glam4cm/downstream_tasks/cm_gpt_edge_classification.py +73 -0
glam4cm/downstream_tasks/cm_gpt_node_classification.py +76 -0
glam4cm/downstream_tasks/cm_gpt_pretraining.py +64 -0
glam4cm/downstream_tasks/common_args.py +160 -0
glam4cm/downstream_tasks/create_dataset.py +51 -0
glam4cm/downstream_tasks/gnn_edge_classification.py +106 -0
glam4cm/downstream_tasks/gnn_graph_cls.py +101 -0
glam4cm/downstream_tasks/gnn_link_prediction.py +109 -0
glam4cm/downstream_tasks/gnn_node_classification.py +103 -0
glam4cm/downstream_tasks/tf_idf_text_classification.py +22 -0
glam4cm/downstream_tasks/utils.py +35 -0
glam4cm/downstream_tasks/word2vec_text_classification.py +108 -0
glam4cm/embeddings/__init__.py +0 -0
glam4cm/embeddings/bert.py +72 -0
glam4cm/embeddings/common.py +43 -0
glam4cm/embeddings/fasttext.py +0 -0
glam4cm/embeddings/tfidf.py +25 -0
glam4cm/embeddings/w2v.py +41 -0
glam4cm/encoding/__init__.py +0 -0
glam4cm/encoding/common.py +0 -0
glam4cm/encoding/encoders.py +100 -0
glam4cm/graph2str/__init__.py +0 -0
glam4cm/graph2str/common.py +34 -0
glam4cm/graph2str/constants.py +15 -0
glam4cm/graph2str/ontouml.py +141 -0
glam4cm/graph2str/uml.py +0 -0
glam4cm/lang2graph/__init__.py +0 -0
glam4cm/lang2graph/archimate.py +31 -0
glam4cm/lang2graph/bpmn.py +0 -0
glam4cm/lang2graph/common.py +416 -0
glam4cm/lang2graph/ecore.py +221 -0
glam4cm/lang2graph/ontouml.py +169 -0
glam4cm/lang2graph/utils.py +80 -0
glam4cm/models/cmgpt.py +352 -0
glam4cm/models/gnn_layers.py +273 -0
glam4cm/models/hf.py +10 -0
glam4cm/run.py +99 -0
glam4cm/run_configs.py +126 -0
glam4cm/settings.py +54 -0
glam4cm/tokenization/__init__.py +0 -0
glam4cm/tokenization/special_tokens.py +4 -0
glam4cm/tokenization/utils.py +37 -0
glam4cm/trainers/__init__.py +0 -0
glam4cm/trainers/bert_classifier.py +105 -0
glam4cm/trainers/cm_gpt_trainer.py +153 -0
glam4cm/trainers/gnn_edge_classifier.py +126 -0
glam4cm/trainers/gnn_graph_classifier.py +123 -0
glam4cm/trainers/gnn_link_predictor.py +144 -0
glam4cm/trainers/gnn_node_classifier.py +135 -0
glam4cm/trainers/gnn_trainer.py +129 -0
glam4cm/trainers/metrics.py +55 -0
glam4cm/utils.py +194 -0
glam4cm-0.1.0.dist-info/LICENSE +21 -0
glam4cm-0.1.0.dist-info/METADATA +86 -0
glam4cm-0.1.0.dist-info/RECORD +72 -0
glam4cm-0.1.0.dist-info/WHEEL +5 -0
glam4cm-0.1.0.dist-info/entry_points.txt +2 -0
glam4cm-0.1.0.dist-info/top_level.txt +1 -0

glam4cm/models/cmgpt.py ADDED Viewed

@@ -0,0 +1,352 @@
+import torch
+import torch.nn as nn
+from glam4cm.settings import device
+def weights_init(model):
+    """
+    Initialize the weights of the model
+    xaiver_uniform is used for linear layers and embeddings
+    zeros is used for biases
+    xavier_uniform initializes the weights with a uniform distribution
+    This is done to avoid the exploding gradient problem
+    """
+    if isinstance(model, nn.Linear):
+        nn.init.xavier_uniform_(model.weight.data)
+        if model.bias is not None:
+            nn.init.zeros_(model.bias.data)
+    elif isinstance(model, nn.Embedding):
+        nn.init.xavier_uniform_(model.weight.data)
+    elif isinstance(model, nn.LayerNorm):
+        nn.init.ones_(model.weight.data)
+        nn.init.zeros_(model.bias.data)
+class Head(nn.Module):
+    """ one head of self-attention """
+    def __init__(self, embed_dim, head_size, dropout=0.1):
+        super().__init__()
+        self.key = nn.Linear(embed_dim, head_size, bias=False)
+        self.query = nn.Linear(embed_dim, head_size, bias=False)
+        self.value = nn.Linear(embed_dim, head_size, bias=False)
+        self.register_buffer('tril', torch.tril(torch.ones(head_size, head_size)))
+        self.softmax = nn.Softmax(dim=-1)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x, attention_mask):
+        """
+        x: [batch_size, seq_len, embed_dim]
+        attention_mask: [batch_size, seq_len]
+        This method computes the attention scores between each token in the sequence
+        """
+        _, _, C = x.shape
+        k = self.key(x)
+        q = self.query(x)
+        # Compute attention scores ("affinities") only where the mask is non-zero
+        wei = q @ k.transpose(-2, -1) * C**-0.5
+        wei = wei.masked_fill((attention_mask.unsqueeze(1) == 0), float('-inf'))
+        wei = self.softmax(wei)
+        wei = self.dropout(wei)
+        # Perform the weighted aggregation of the values
+        v = self.value(x)
+        out = wei @ v
+        return out
+class MultiHeadAttention(nn.Module):
+    """
+    multiple heads of self-attention in parallel
+    This class first splits the embedding dimension into multiple heads
+    Then, each head computes the attention scores between each token in the sequence
+    Finally, the outputs of all the heads are concatenated and projected back to the original embedding dimension
+    """
+    def __init__(self, embed_dim, num_heads, dropout=0.1):
+        super().__init__()
+        head_size = embed_dim // num_heads
+        self.heads = nn.ModuleList([Head(embed_dim, head_size) for _ in range(num_heads)])
+        self.proj = nn.Linear(embed_dim, embed_dim)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x, attn_mask):
+        """
+        x: [batch_size, seq_len, embed_dim]
+        """
+        out = torch.cat([h(x, attn_mask) for h in self.heads], dim=-1)
+        out = self.dropout(self.proj(out))
+        return out
+class FeedFoward(nn.Module):
+    """
+    a simple linear layer followed by a non-linearity
+    """
+    def __init__(self, input_dim, embed_dim=None, num_classes=None, dropout=0.1):
+        super().__init__()
+        if num_classes is None:
+            num_classes = input_dim if embed_dim is None else embed_dim
+        if embed_dim is None:
+            embed_dim = input_dim
+        self.net = nn.Sequential(
+            nn.Linear(input_dim, 4 * embed_dim),
+            nn.ReLU(),
+            nn.Linear(4 * embed_dim, num_classes),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x):
+        return self.net(x)
+class Block(nn.Module):
+    """ Transformer block: communication followed by computation """
+    def __init__(self, embed_dim, n_head):
+        # embed_dim: embedding dimension, n_head: the number of heads we'd like
+        super().__init__()
+        self.sa = MultiHeadAttention(embed_dim, n_head)
+        self.ffwd = FeedFoward(embed_dim)
+        self.ln1 = nn.LayerNorm(embed_dim)
+        self.ln2 = nn.LayerNorm(embed_dim)
+    def forward(self, x, attn_mask):
+        x = x + self.sa(self.ln1(x), attn_mask)
+        x = x + self.ffwd(self.ln2(x))
+        return x
+class CMGPT(nn.Module):
+    """
+    UML-GPT model
+    vocab_size: the size of the vocabulary
+    embed_dim: the embedding dimension
+    block_size: the maximum sequence length
+    n_layer: the number of transformer blocks
+    n_head: the number of heads in each transformer block
+    load_pretrained_from: the path to the pretrained model
+    This class uses the string representation of the node as the input
+    The string representation is tokenized using the tokenizer
+    The tokenized sequence is then passed through the transformer blocks
+    Finally, the logits for the next token are computed using a linear layer
+    """
+    def __init__(
+        self,
+        vocab_size,
+        embed_dim,
+        block_size,
+        n_layer,
+        n_head,
+        load_pretrained_from=None
+    ):
+        super().__init__()
+        # each token directly reads off the logits for the next token from a lookup table
+        if load_pretrained_from is not None:
+            self.load_state_dict(torch.load(load_pretrained_from))
+        else:
+            self.token_embedding_table = nn.Embedding(vocab_size, embed_dim)
+            self.position_embedding_table = nn.Embedding(block_size, embed_dim)
+            self.blocks = nn.Sequential(*[Block(embed_dim, n_head) for _ in range(n_layer)])
+            self.ln_f = nn.LayerNorm(embed_dim) # final layer norm
+            self.lm_head = nn.Linear(embed_dim, vocab_size)
+            self.apply(weights_init)
+    def forward(self, x, attention_mask, labels=None):
+        """
+        x: [batch_size, seq_len]
+        attention_mask: [batch_size, seq_len]
+        This method computes the logits for the next token
+        """
+        embeddings = self.get_embedding(x, attention_mask)
+        logits = self.lm_head(embeddings)
+        if labels is not None:
+            loss = self.get_loss(logits, labels)
+            return logits, loss
+        return logits
+    def get_loss(self, logits, labels, ignore_index=-100):
+        """
+        logits: [batch_size, seq_len, vocab_size]
+        labels: [batch_size, seq_len]
+        This method computes the loss for the next token prediction task
+        This is achieved by shifting the labels by one position and computing the cross entropy loss
+        """
+        block_size = self.position_embedding_table.weight.shape[0]
+        labels = labels[..., :block_size]
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = nn.CrossEntropyLoss(ignore_index=ignore_index)
+            loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
+        return loss
+    def get_embedding(self, x, attention_mask):
+        """
+        x: [batch_size, seq_len]
+        attention_mask: [batch_size, seq_len]
+        """
+        block_size = self.position_embedding_table.weight.shape[0]
+        vocab_size = self.token_embedding_table.weight.shape[0]
+        x = x[..., :block_size]
+        attention_mask = attention_mask[..., :block_size]
+        assert x.shape[-1] <= block_size, f"Sequence length {x.shape[-1]} is greater than block size {block_size}"
+        # print("Token embeddings", x.shape, torch.min(x), torch.max(x), vocab_size)
+        assert torch.min(x) <= vocab_size, f"Min token id {torch.min(x)} is greater than vocab size {vocab_size}"
+        assert torch.max(x) <= vocab_size, f"Max token id {torch.max(x)} is greater than vocab size {vocab_size}"
+        token_embeddings = self.token_embedding_table(x)
+        position_ids = torch.arange(x.size(1), dtype=torch.long, device=x.device)
+        position_ids = position_ids.unsqueeze(0).expand_as(x)
+        # print("Position embeddings", position_ids.shape, torch.min(position_ids), torch.max(position_ids), block_size)
+        torch.min(position_ids) <= block_size, f"Min position id {torch.min(position_ids)} is greater than block size {block_size}"
+        torch.max(position_ids) <= block_size, f"Max position id {torch.max(position_ids)} is greater than block size {block_size}"
+        position_embeddings = self.position_embedding_table(position_ids)
+        embeddings = token_embeddings + position_embeddings
+        # # Modify the forward pass to include src_key_padding_mask
+        for block in self.blocks:
+            # print("Embed dim: ", embeddings.shape)
+            embeddings = block(embeddings, attention_mask)
+        embeddings = self.ln_f(embeddings)
+        return embeddings
+    def get_model_size(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
+    def __repr__(self):
+        return super().__repr__() + f'\nNumber of parameters: {self.get_model_size() / 1000000:.3f}M'
+    @property
+    def __name__(self):
+        return 'CMGPT'
+    @property
+    def name_or_path(self):
+        return 'CMGPT'
+    @staticmethod
+    def from_pretrained(state_dict_pth):
+        state_dict = torch.load(state_dict_pth, map_location=device)
+        vocab_size, embed_dim = [s.shape for _, s in state_dict.items() if 'token_embedding_table' in _][0]
+        num_heads = max([int(name.split('.sa.heads.')[1].split('.')[0]) for name, s in state_dict.items() if '.sa.heads.' in name]) + 1
+        block_size = [s.shape[0] for _, s in state_dict.items() if 'position_embedding_table' in _][0]
+        num_layers = max([int(name.split('blocks.')[1].split('.')[0]) for name, s in state_dict.items() if 'blocks.' in name]) + 1
+        model = CMGPT(vocab_size, embed_dim, block_size, num_layers, num_heads)
+        model.load_state_dict(state_dict)
+        return model
+class CMGPTClassifier(nn.Module):
+    """
+    UML-GPT model for classification
+    model: the UML-GPT model
+    num_classes: the number of classes
+    """
+    def __init__(
+        self,
+        model: CMGPT,
+        num_classes: int
+    ):
+        super().__init__()
+        self.model = model
+        _, embed_dim = self.model.lm_head.weight.data.shape
+        self.classifier = FeedFoward(input_dim=embed_dim, num_classes=num_classes)
+        self.apply(weights_init)
+    def forward(self, x, attention_mask, labels=None, pool=None):
+        # x: [batch_size, seq_len]
+        # attention_mask: [batch_size, seq_len]
+        lm_logits = self.model.get_embedding(x, attention_mask)
+        if pool:
+            """Pool the logits across the sequence dimension"""
+            lm_logits = torch.mean(lm_logits, dim=1)
+        else:
+            """Use the logits at the last position"""
+            lm_logits = lm_logits[:, -1, :]
+        logits = self.classifier(lm_logits)
+        if labels is not None:
+            loss = self.get_loss(logits, labels)
+            return logits, loss
+        return logits
+    def get_loss(self, logits, labels):
+        logits = logits.to(device)
+        labels = labels.to(device)
+        if len(labels.shape) == 1:
+            loss_fct = torch.nn.CrossEntropyLoss()
+            loss = loss_fct(logits, labels)
+        else:
+            loss_fct = torch.nn.BCEWithLogitsLoss()
+            loss = loss_fct(logits.float(), labels.float())
+        return loss
+    def get_embedding(self, x, attention_mask):
+        return self.model.get_embedding(x, attention_mask)
+    def get_model_size(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
+    def __repr__(self):
+        return super().__repr__() + f'\nNumber of parameters: {self.get_model_size()/1000000:.3f}M'
+    @staticmethod
+    def from_pretrained(state_dict_path, num_classes, init_classifier=True):
+        if init_classifier:
+            print("Initializing classifier from pretrained model with num classes: ", num_classes)
+            model = CMGPTClassifier(CMGPT.from_pretrained(state_dict), num_classes)
+        else:
+            state_dict = torch.load(state_dict_path, map_location=device)
+            vocab_size, embed_dim = [s.shape for _, s in state_dict.items() if 'token_embedding_table' in _][0]
+            num_heads = max([int(name.split('.sa.heads.')[1].split('.')[0]) for name, s in state_dict.items() if '.sa.heads.' in name]) + 1
+            block_size = [s.shape[0] for _, s in state_dict.items() if 'position_embedding_table' in _][0]
+            num_layers = max([int(name.split('blocks.')[1].split('.')[0]) for name, s in state_dict.items() if 'blocks.' in name]) + 1
+            num_classes = state_dict['classifier.net.2.weight'].shape[0]
+            uml_gpt = CMGPT(vocab_size, embed_dim, block_size, num_layers, num_heads)
+            model = CMGPTClassifier(uml_gpt, num_classes)
+            model.load_state_dict(state_dict)
+        return model

glam4cm/models/gnn_layers.py ADDED Viewed

@@ -0,0 +1,273 @@
+import torch
+from torch.nn import functional as F
+from torch_geometric.nn import aggr
+from torch_geometric.nn import (
+    global_add_pool,
+    global_max_pool,
+    global_mean_pool,
+)
+import torch_geometric
+import torch.nn as nn
+aggregation_methods = {
+    'mean': aggr.MeanAggregation(),
+    'sum': aggr.SumAggregation(),
+    'max': aggr.MaxAggregation(),
+    'mul': aggr.MulAggregation(),
+}
+supported_conv_models = {
+    'GCNConv': False, ## True or False if the model requires num_heads
+    'GraphConv': False,
+    'GATConv': True,
+    'SAGEConv': False,
+    'GINConv': False,
+    'GATv2Conv': True,
+}
+global_pooling_methods = {
+    'sum': global_add_pool,
+    'mean': global_mean_pool,
+    'max': global_max_pool,
+}
+class GNNConv(torch.nn.Module):
+    """
+        A general GNN model created using the PyTorch Geometric library
+        model_name: the name of the GNN model
+        input_dim: the input dimension
+        hidden_dim: the hidden dimension
+        out_dim: the output dimension
+        num_layers: the number of GNN layers
+        num_heads: the number of heads in the GNN layer
+        residual: whether to use residual connections
+        l_norm: whether to use layer normalization
+        dropout: the dropout probability
+    """
+    def __init__(
+            self,
+            model_name,
+            input_dim,
+            hidden_dim,
+            out_dim=None,
+            num_layers=2,
+            num_heads=None,
+            residual=False,
+            l_norm=False,
+            dropout=0.1,
+            aggregation='mean',
+            edge_dim=None
+        ):
+        super(GNNConv, self).__init__()
+        assert model_name in supported_conv_models, f"Model {model_name} not supported. Choose from {supported_conv_models.keys()}"
+        heads_supported = supported_conv_models[model_name]
+        if heads_supported and num_heads is None:
+            raise ValueError(f"Model {model_name} requires num_heads to be set to an integer")
+        if not heads_supported and num_heads is not None:
+            num_heads = None
+        assert aggregation in aggregation_methods, f"Aggregation method {aggregation} not supported. Choose from {aggregation_methods.keys()}"
+        aggregation = aggregation_methods[aggregation]
+        self.input_dim = input_dim
+        self.embed_dim = hidden_dim
+        self.out_dim = out_dim if out_dim is not None else hidden_dim
+        self.num_layers = num_layers
+        self.num_heads = num_heads
+        self.aggregation = aggregation
+        self.edge_dim = edge_dim
+        gnn_model = getattr(torch_geometric.nn, model_name)
+        self.conv_layers = nn.ModuleList()
+        for i in range(num_layers):
+            if num_heads is None:
+                conv = gnn_model(
+                    input_dim,
+                    hidden_dim if i != num_layers - 1 else self.out_dim,
+                    aggr=aggregation
+                )
+            else:
+                conv = gnn_model(
+                    input_dim if i == 0 else num_heads*input_dim,
+                    hidden_dim if i != num_layers - 1 else self.out_dim,
+                    heads=num_heads,
+                    aggr=aggregation,
+                    edge_dim=edge_dim
+                )
+            self.conv_layers.append(conv)
+            input_dim = hidden_dim
+        self.activation = nn.ReLU()
+        self.layer_norm = nn.LayerNorm(hidden_dim if num_heads is None else num_heads*hidden_dim) if l_norm else None
+        self.residual = residual
+        self.dropout = nn.Dropout(dropout) if dropout > 0 else None
+    def forward(self, in_feat, edge_index, edge_attr=None):
+        def activate(h):
+            h = self.activation(h)
+            if self.layer_norm is not None:
+                h = self.layer_norm(h)
+            if self.dropout is not None:
+                h = self.dropout(h)
+            return h
+        h = in_feat
+        h = self.conv_layers[0](h, edge_index, edge_attr) if isinstance(edge_attr, torch.Tensor) else self.conv_layers[0](h, edge_index)
+        activate(h)
+        for conv in self.conv_layers[1:-1]:
+            nh = conv(h, edge_index, edge_attr) if isinstance(edge_attr, torch.Tensor) else conv(h, edge_index)
+            h = nh if not self.residual else nh + h
+            activate(h)
+        h = self.conv_layers[-1](h, edge_index)
+        activate(h)
+        return h
+class EdgeClassifer(nn.Module):
+    """
+    An MLP predictor for link prediction
+    h_feats: the input dimension
+    num_classes: the number of classes
+    num_layers: the number of layers in the MLP
+    This class concatenates the node embeddings of the two nodes in the edge
+    The concatenated embeddings are then passed through an MLP
+    """
+    def __init__(
+            self,
+            input_dim,
+            hidden_dim,
+            num_classes,
+            num_layers=2,
+            dropout=0.3,
+            edge_dim=None,
+            bias=False
+        ):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        self.input_dim = input_dim
+        self.embed_dim = hidden_dim
+        self.num_layers = num_layers
+        self.num_classes = num_classes
+        in_feats = input_dim * 2
+        if edge_dim is not None:
+            in_feats += edge_dim
+        for _ in range(num_layers):
+            self.layers.append(nn.Linear(in_feats, hidden_dim, bias=bias))
+            self.layers.append(nn.ReLU())
+            self.layers.append(nn.Dropout(dropout))
+            in_feats = hidden_dim
+        self.layers.append(nn.Linear(hidden_dim, num_classes, bias=bias))
+    def forward(self, x, edge_index, edge_attr=None):
+        h = torch.cat([x[edge_index[0]], x[edge_index[1]]], dim=-1)
+        if edge_attr is not None:
+            h = torch.cat([h, edge_attr], dim=-1)
+        for layer in self.layers:
+            h = layer(h)
+        return h
+class NodeClassifier(nn.Module):
+    """
+    An MLP predictor for link prediction
+    h_feats: the input dimension
+    num_classes: the number of classes
+    num_layers: the number of layers in the MLP
+    This class concatenates the node embeddings of the two nodes in the edge
+    The concatenated embeddings are then passed through an MLP
+    """
+    def __init__(
+            self,
+            input_dim,
+            hidden_dim,
+            num_classes,
+            num_layers=2,
+            dropout=0.3,
+            bias=True
+        ):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        self.embed_dim = hidden_dim
+        self.num_layers = num_layers
+        self.num_classes = num_classes
+        for _ in range(num_layers - 1):
+            self.layers.append(nn.Linear(input_dim, hidden_dim, bias=bias))
+            self.layers.append(nn.ReLU())
+            self.layers.append(nn.Dropout(dropout))
+            input_dim = hidden_dim
+        self.layers.append(nn.Linear(hidden_dim, num_classes, bias=bias))
+    def forward(self, x):
+        h = x
+        for layer in self.layers:
+            h = layer(h)
+        return h
+class GraphClassifer(nn.Module):
+    """
+    An MLP predictor for link prediction
+    h_feats: the input dimension
+    num_classes: the number of classes
+    num_layers: the number of layers in the MLP
+    This class concatenates the node embeddings of the two nodes in the edge
+    The concatenated embeddings are then passed through an MLP
+    """
+    def __init__(
+            self,
+            input_dim,
+            num_classes,
+            global_pool='mean',
+            bias=False
+        ):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        self.input_dim = input_dim
+        self.num_classes = num_classes
+        self.layers.append(nn.Linear(input_dim, num_classes, bias=bias))
+        self.global_pool = global_pooling_methods[global_pool]
+    def forward(self, x, batch):
+        h = self.global_pool(x, batch)
+        for layer in self.layers:
+            h = layer(h)
+        return h

glam4cm/models/hf.py ADDED Viewed

@@ -0,0 +1,10 @@
+from transformers import AutoModelForSequenceClassification
+def get_model(model_name, num_labels, len_tokenizer=None) -> AutoModelForSequenceClassification:
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
+    if len_tokenizer:
+        model.resize_token_embeddings(len_tokenizer)
+        assert model.config.vocab_size == len_tokenizer,\
+            f"Tokenizer size {len_tokenizer} does not match model size {model.config.vocab_size}"
+    return model