PyPI - libmultilabel - Versions diffs - 0.7.4__tar.gz → 0.8.1__tar.gz - Mend

libmultilabel 0.7.4tar.gz → 0.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: libmultilabel
-Version: 0.7.4
+Version: 0.8.1
 Summary: A library for multi-class and multi-label classification
 Home-page: https://github.com/ASUS-AICS/LibMultiLabel
 Author: LibMultiLabel Team
@@ -8,7 +8,7 @@ License: MIT License
 Project-URL: Bug Tracker, https://github.com/ASUS-AICS/LibMultiLabel/issues
 Project-URL: Documentation, https://www.csie.ntu.edu.tw/~cjlin/libmultilabel
 Project-URL: Source Code, https://github.com/ASUS-AICS/LibMultiLabel/
-Classifier: Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.1
+Classifier: Environment :: GPU :: NVIDIA CUDA :: 12
 Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.8
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Education
@@ -16,23 +16,27 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Requires-Python: >=3.8
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.10
 License-File: LICENSE
 Requires-Dist: liblinear-multicore>=2.49.0
 Requires-Dist: numba
 Requires-Dist: pandas>1.3.0
 Requires-Dist: PyYAML
 Requires-Dist: scikit-learn
-Requires-Dist: scipy<1.14.0
+Requires-Dist: scipy
 Requires-Dist: tqdm
 Requires-Dist: psutil
+Requires-Dist: sparsekmeans
 Provides-Extra: nn
-Requires-Dist: lightning==2.0.9; extra == "nn"
+Requires-Dist: lightning; extra == "nn"
 Requires-Dist: nltk; extra == "nn"
-Requires-Dist: torch<=2.3; extra == "nn"
+Requires-Dist: torch; extra == "nn"
 Requires-Dist: torchmetrics==0.10.3; extra == "nn"
-Requires-Dist: torchtext; extra == "nn"
 Requires-Dist: transformers; extra == "nn"
+Dynamic: license-file
 See documentation here: https://www.csie.ntu.edu.tw/~cjlin/libmultilabel

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/README.md RENAMED Viewed

@@ -9,9 +9,9 @@ LibMultiLabel is a library for binary, multi-class, and multi-label classificati
 This is an on-going development so many improvements are still being made. Comments are very welcome.
 ## Environments
-- Python: 3.8+
-- CUDA: 11.8, 12.1 (if training neural networks by GPU)
-- Pytorch: 2.0.1+
+- Python: 3.10+
+- CUDA: 11.8, 12.1, 12.6 (if training neural networks by GPU)
+- Pytorch: 2.3.0+
 If you have a different version of CUDA, follow the installation instructions for PyTorch LTS at their [website](https://pytorch.org/).

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel/linear/linear.py RENAMED Viewed

@@ -27,7 +27,7 @@ class FlatModel:
     def __init__(
         self,
         name: str,
-        weights: np.matrix,
+        weights: np.matrix | sparse.csr_matrix,
         bias: float,
         thresholds: float | np.ndarray,
         multiclass: bool,
@@ -69,7 +69,21 @@ class FlatModel:
                 "csr",
             )
-        return (x * self.weights).A + self.thresholds
+        return self._to_dense_array(x * self.weights) + self.thresholds
+    def _to_dense_array(self, matrix: np.matrix | sparse.csr_matrix) -> np.ndarray:
+        """Convert a numpy or scipy matrix to a dense ndarray.
+        Args:
+            matrix (np.matrix | sparse.csr_matrix): A numpy or scipy sparse matrix.
+        Returns:
+            np.ndarray: A dense ndarray of `matrix`.
+        """
+        if sparse.issparse(matrix):
+            return matrix.toarray()
+        elif isinstance(matrix, np.matrix):
+            return np.asarray(matrix)
 def train_1vsrest(
@@ -458,7 +472,7 @@ def _cost_sensitive_one_label(y: np.ndarray, x: sparse.csr_matrix, options: str)
     param_space = [1, 1.33, 1.8, 2.5, 3.67, 6, 13]
-    bestScore = -np.Inf
+    bestScore = -np.inf
     for a in param_space:
         cv_options = f"{options} -w1 {a}"
         pred = _cross_validate(y, x, cv_options, perm)
@@ -532,7 +546,7 @@ def train_cost_sensitive_micro(
     l = y.shape[0]
     perm = np.random.permutation(l)
     param_space = [1, 1.33, 1.8, 2.5, 3.67, 6, 13]
-    bestScore = -np.Inf
+    bestScore = -np.inf
     if verbose:
         logging.info(f"Training cost-sensitive model for Micro-F1 on {num_class} labels")

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel/linear/tree.py RENAMED Viewed

@@ -4,7 +4,7 @@ from typing import Callable
 import numpy as np
 import scipy.sparse as sparse
-import sklearn.cluster
+from sparsekmeans import LloydKmeans, ElkanKmeans
 import sklearn.preprocessing
 from tqdm import tqdm
 import psutil
@@ -101,7 +101,7 @@ class TreeModel:
         self.subtree_models = []
         for i in range(len(self.root.children)):
             subtree_weights_start = self.node_ptr[self.root.children[i].index]
-            subtree_weights_end = self.node_ptr[self.root.children[i+1].index] if i+1 < len(self.root.children) else -1
+            subtree_weights_end = self.node_ptr[self.root.children[i+1].index] if i+1 < len(self.root.children) else self.node_ptr[-1]
             slice = np.s_[:, subtree_weights_start:subtree_weights_end]
             subtree_flatmodel = linear.FlatModel(
                 name="subtree-flattened-tree",
@@ -274,28 +274,29 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
     Returns:
         Node: Root of the (sub)tree built from label_representation.
     """
-    if d >= dmax or label_representation.shape[0] <= K:
-        return Node(label_map=label_map, children=[])
-    metalabels = (
-        sklearn.cluster.KMeans(
-            K,
-            random_state=np.random.randint(2**31 - 1),
-            n_init=1,
-            max_iter=300,
-            tol=0.0001,
-            algorithm="elkan",
+    children = []
+    if d < dmax and label_representation.shape[0] > K:
+        if label_representation.shape[0] > 10000:
+            kmeans_algo = ElkanKmeans
+        else:
+            kmeans_algo = LloydKmeans
+        kmeans = kmeans_algo(
+            n_clusters=K, max_iter=300, tol=0.0001, random_state=np.random.randint(2**31 - 1), verbose=True
         )
-        .fit(label_representation)
-        .labels_
-    )
+        metalabels = kmeans.fit(label_representation)
-    children = []
-    for i in range(K):
-        child_representation = label_representation[metalabels == i]
-        child_map = label_map[metalabels == i]
-        child = _build_tree(child_representation, child_map, d + 1, K, dmax)
-        children.append(child)
+        unique_labels = np.unique(metalabels)
+        if len(unique_labels) == K:
+            create_child_node = lambda i: _build_tree(
+                label_representation[metalabels == i], label_map[metalabels == i], d + 1, K, dmax
+            )
+        else:
+            create_child_node = lambda i: Node(label_map=label_map[metalabels == i], children=[])
+        for i in range(K):
+            child = create_child_node(i)
+            children.append(child)
     return Node(label_map=label_map, children=children)

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel/nn/attentionxml.py RENAMED Viewed

@@ -489,7 +489,7 @@ class PLTTrainer:
         # Convert words to numbers according to their indices in word_dict. Then pad each instance to a certain length.
         encoded_text = list(
             map(
-                lambda text: torch.tensor([self.word_dict[word] for word in text], dtype=torch.int64)
+                lambda text: torch.tensor([self.word_dict.get(word, self.word_dict[UNK]) for word in text], dtype=torch.int64)
                 if text
                 else torch.tensor([self.word_dict[UNK]], dtype=torch.int64),
                 [instance["text"][: self.max_seq_length] for instance in dataset],

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel/nn/data_utils.py RENAMED Viewed

@@ -1,7 +1,12 @@
 import csv
 import gc
 import logging
+import os
+import re
 import warnings
+import zipfile
+from urllib.request import urlretrieve
+from collections import Counter, OrderedDict
 import pandas as pd
 import torch
@@ -11,7 +16,6 @@ from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import MultiLabelBinarizer
 from torch.nn.utils.rnn import pad_sequence
 from torch.utils.data import Dataset
-from torchtext.vocab import build_vocab_from_iterator, pretrained_aliases, Vocab
 from tqdm import tqdm
 transformers.logging.set_verbosity_error()
@@ -19,6 +23,14 @@ warnings.simplefilter(action="ignore", category=FutureWarning)
 UNK = "<unk>"
 PAD = "<pad>"
+GLOVE_WORD_EMBEDDING = {
+    "glove.42B.300d",
+    "glove.840B.300d",
+    "glove.6B.50d",
+    "glove.6B.100d",
+    "glove.6B.200d",
+    "glove.6B.300d",
+}
 class TextDataset(Dataset):
@@ -31,8 +43,7 @@ class TextDataset(Dataset):
         add_special_tokens (bool, optional): Whether to add the special tokens. Defaults to True.
         tokenizer (transformers.PreTrainedTokenizerBase, optional): HuggingFace's tokenizer of
             the transformer-based pretrained language model. Defaults to None.
-        word_dict (torchtext.vocab.Vocab, optional): A vocab object for word tokenizer to
-            map tokens to indices. Defaults to None.
+        word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
     """
     def __init__(
@@ -55,7 +66,7 @@ class TextDataset(Dataset):
         self.num_classes = len(self.classes)
         self.label_binarizer = MultiLabelBinarizer().fit([classes])
-        if not isinstance(self.word_dict, Vocab) ^ isinstance(self.tokenizer, transformers.PreTrainedTokenizerBase):
+        if not isinstance(self.word_dict, dict) ^ isinstance(self.tokenizer, transformers.PreTrainedTokenizerBase):
             raise ValueError("Please specify exactly one of word_dict or tokenizer")
     def __len__(self):
@@ -71,7 +82,7 @@ class TextDataset(Dataset):
             else:
                 input_ids = self.tokenizer.encode(data["text"], add_special_tokens=False)
         else:
-            input_ids = [self.word_dict[word] for word in data["text"]]
+            input_ids = [self.word_dict.get(word, self.word_dict[UNK]) for word in data["text"]]
         return {
             "text": torch.LongTensor(input_ids[: self.max_seq_length]),
             "label": torch.IntTensor(self.label_binarizer.transform([data["label"]])[0]),
@@ -128,8 +139,7 @@ def get_dataset_loader(
         add_special_tokens (bool, optional): Whether to add the special tokens. Defaults to True.
         tokenizer (transformers.PreTrainedTokenizerBase, optional): HuggingFace's tokenizer of
             the transformer-based pretrained language model. Defaults to None.
-        word_dict (torchtext.vocab.Vocab, optional): A vocab object for word tokenizer to
-            map tokens to indices. Defaults to None.
+        word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
     Returns:
         torch.utils.data.DataLoader: A pytorch DataLoader.
@@ -154,6 +164,7 @@ def _load_raw_data(data, is_test=False, tokenize_text=True, remove_no_label_data
     Args:
         data (Union[str, pandas,.Dataframe]): Training, test, or validation data in file or dataframe.
         is_test (bool, optional): Whether the data is for test or not. Defaults to False.
+        tokenize_text (bool, optional): Whether to tokenize text. Defaults to True.
         remove_no_label_data (bool, optional): Whether to remove training/validation instances that have no labels.
             This is effective only when is_test=False. Defaults to False.
@@ -265,20 +276,19 @@ def load_or_build_text_dict(
 ):
     """Build or load the vocabulary from the training dataset or the predefined `vocab_file`.
     The pretrained embedding can be either from a self-defined `embed_file` or from one of
-    the vectors defined in torchtext.vocab.pretrained_aliases
-    (https://github.com/pytorch/text/blob/main/torchtext/vocab/vectors.py).
+    the vectors: `glove.6B.50d`, `glove.6B.100d`, `glove.6B.200d`, `glove.6B.300d`, `glove.42B.300d`, or `glove.840B.300d`.
     Args:
         dataset (list): List of training instances with index, label, and tokenized text.
         vocab_file (str, optional): Path to a file holding vocabuaries. Defaults to None.
         min_vocab_freq (int, optional): The minimum frequency needed to include a token in the vocabulary. Defaults to 1.
-        embed_file (str): Path to a file holding pre-trained embeddings.
+        embed_file (str): Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding. Defaults to None.
         embed_cache_dir (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
         silent (bool, optional): Enable silent mode. Defaults to False.
         normalize_embed (bool, optional): Whether the embeddings of each word is normalized to a unit vector. Defaults to False.
     Returns:
-        tuple[torchtext.vocab.Vocab, torch.Tensor]: A vocab object which maps tokens to indices and the pre-trained word vectors of shape (vocab_size, embed_dim).
+        tuple[dict, torch.Tensor]: A dictionary which maps tokens to indices and the pre-trained word vectors of shape (vocab_size, embed_dim).
     """
     if vocab_file:
         logging.info(f"Load vocab from {vocab_file}")
@@ -286,14 +296,14 @@ def load_or_build_text_dict(
             vocab_list = [[vocab.strip() for vocab in fp.readlines()]]
         # Keep PAD index 0 to align `padding_idx` of
         # class Embedding in libmultilabel.nn.networks.modules.
-        vocabs = build_vocab_from_iterator(vocab_list, min_freq=1, specials=[PAD, UNK])
+        word_dict = _build_word_dict(vocab_list, min_vocab_freq=1, specials=[PAD, UNK])
     else:
         vocab_list = [set(data["text"]) for data in dataset]
-        vocabs = build_vocab_from_iterator(vocab_list, min_freq=min_vocab_freq, specials=[PAD, UNK])
-    vocabs.set_default_index(vocabs[UNK])
-    logging.info(f"Read {len(vocabs)} vocabularies.")
+        word_dict = _build_word_dict(vocab_list, min_vocab_freq=min_vocab_freq, specials=[PAD, UNK])
+    logging.info(f"Read {len(word_dict)} vocabularies.")
-    embedding_weights = get_embedding_weights_from_file(vocabs, embed_file, silent, embed_cache_dir)
+    embedding_weights = get_embedding_weights_from_file(word_dict, embed_file, silent, embed_cache_dir)
     if normalize_embed:
         # To have better precision for calculating the normalization, we convert the original
@@ -306,7 +316,41 @@ def load_or_build_text_dict(
             embedding_weights[i] = vector / float(torch.linalg.norm(vector) + 1e-6)
         embedding_weights = embedding_weights.float()
-    return vocabs, embedding_weights
+    return word_dict, embedding_weights
+def _build_word_dict(vocab_list, min_vocab_freq=1, specials=None):
+    r"""Build word dictionary, modified from `torchtext.vocab.build-vocab-from-iterator`
+    (https://docs.pytorch.org/text/stable/vocab.html#build-vocab-from-iterator)
+    Args:
+        vocab_list: List of words.
+        min_vocab_freq (int, optional): The minimum frequency needed to include a token in the vocabulary. Defaults to 1.
+        specials: Special tokens (e.g., <unk>, <pad>) to add. Defaults to None.
+    Returns:
+        dict: A dictionary which maps tokens to indices.
+    """
+    counter = Counter()
+    for tokens in vocab_list:
+        counter.update(tokens)
+    # sort by descending frequency, then lexicographically
+    sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
+    ordered_dict = OrderedDict(sorted_by_freq_tuples)
+    # add special tokens at the beginning
+    tokens = specials or []
+    for token, freq in ordered_dict.items():
+        if freq >= min_vocab_freq:
+            tokens.append(token)
+    # build token to indices dict
+    word_dict = dict()
+    for idx, token in enumerate(tokens):
+        word_dict[token] = idx
+    return word_dict
 def load_or_build_label(datasets, label_file=None, include_test_labels=False):
@@ -344,70 +388,84 @@ def load_or_build_label(datasets, label_file=None, include_test_labels=False):
     return classes
-def get_embedding_weights_from_file(word_dict, embed_file, silent=False, cache=None):
-    """If the word exists in the embedding file, load the pretrained word embedding.
-    Otherwise, assign a zero vector to that word.
+def get_embedding_weights_from_file(word_dict, embed_file, silent=False, cache_dir=None):
+    """Obtain the word embeddings from file. If the word exists in the embedding file,
+    load the pretrained word embedding. Otherwise, assign a zero vector to that word.
+    If the given `embed_file` is the name of a pretrained GloVe embedding, the function
+    will first download the corresponding file.
     Args:
-        word_dict (torchtext.vocab.Vocab): A vocab object which maps tokens to indices.
-        embed_file (str): Path to a file holding pre-trained embeddings.
+        word_dict (dict): A dictionary for mapping tokens to indices.
+        embed_file (str): Path to a file holding pre-trained embeddings or the name of the pretrained GloVe embedding.
         silent (bool, optional): Enable silent mode. Defaults to False.
-        cache (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
+        cache_dir (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
     Returns:
         torch.Tensor: Embedding weights (vocab_size, embed_size).
     """
-    # Load pretrained word embedding
-    load_embedding_from_file = embed_file not in pretrained_aliases
-    if load_embedding_from_file:
-        logging.info(f"Load pretrained embedding from file: {embed_file}.")
-        with open(embed_file) as f:
-            word_vectors = f.readlines()
-        embed_size = len(word_vectors[0].split()) - 1
-        vector_dict = {}
-        for word_vector in tqdm(word_vectors, disable=silent):
-            word, vector = word_vector.rstrip().split(" ", 1)
-            vector = torch.Tensor(list(map(float, vector.split())))
-            vector_dict[word] = vector
-    else:
-        logging.info(f"Load pretrained embedding from torchtext.")
-        # Adapted from https://pytorch.org/text/0.9.0/_modules/torchtext/vocab.html#Vocab.load_vectors.
-        if embed_file not in pretrained_aliases:
-            raise ValueError(
-                "Got embed_file {}, but allowed pretrained "
-                "vectors are {}".format(embed_file, list(pretrained_aliases.keys()))
-            )
-        # Hotfix: Glove URLs are outdated in Torchtext
-        # (https://github.com/pytorch/text/blob/main/torchtext/vocab/vectors.py#L213-L217)
-        pretrained_cls = pretrained_aliases[embed_file]
-        if embed_file.startswith("glove"):
-            for name, url in pretrained_cls.func.url.items():
-                file_name = url.split("/")[-1]
-                pretrained_cls.func.url[name] = f"https://huggingface.co/stanfordnlp/glove/resolve/main/{file_name}"
-        vector_dict = pretrained_cls(cache=cache)
-        embed_size = vector_dict.dim
-    embedding_weights = torch.zeros(len(word_dict), embed_size)
+    if embed_file in GLOVE_WORD_EMBEDDING:
+        embed_file = _download_glove_embedding(embed_file, cache_dir=cache_dir)
+    elif not os.path.isfile(embed_file):
+        raise ValueError(
+            "Got embed_file {}, but allowed pretrained " "embeddings are {}".format(embed_file, GLOVE_WORD_EMBEDDING)
+        )
+    logging.info(f"Load pretrained embedding from {embed_file}.")
+    with open(embed_file) as f:
+        word_vectors = f.readlines()
+    embed_size = len(word_vectors[0].split()) - 1
-    if load_embedding_from_file:
-        # Add UNK embedding
-        # AttentionXML: np.random.uniform(-1.0, 1.0, embed_size)
-        # CAML: np.random.randn(embed_size)
-        unk_vector = torch.randn(embed_size)
-        embedding_weights[word_dict[UNK]] = unk_vector
+    vector_dict = {}
+    for word_vector in tqdm(word_vectors, disable=silent):
+        word, vector = word_vector.rstrip().split(" ", 1)
+        vector = torch.Tensor(list(map(float, vector.split())))
+        vector_dict[word] = vector
+    embedding_weights = torch.zeros(len(word_dict), embed_size)
+    # Add UNK embedding
+    #   AttentionXML: np.random.uniform(-1.0, 1.0, embed_size)
+    #   CAML: np.random.randn(embed_size)
+    unk_vector = torch.randn(embed_size)
+    embedding_weights[word_dict[UNK]] = unk_vector
     # Store pretrained word embedding
     vec_counts = 0
-    for word in word_dict.get_itos():
-        # The condition can be used to process the word that does not in the embedding file.
-        # Note that torchtext vector object has already dealt with this,
-        # so we can directly make a query without addtional handling.
-        if (load_embedding_from_file and word in vector_dict) or not load_embedding_from_file:
+    for word in word_dict.keys():
+        if word in vector_dict:
             embedding_weights[word_dict[word]] = vector_dict[word]
             vec_counts += 1
-    logging.info(f"loaded {vec_counts}/{len(word_dict)} word embeddings")
+    logging.info(f"Loaded {vec_counts}/{len(word_dict)} word embeddings")
     return embedding_weights
+def _download_glove_embedding(embed_name, cache_dir=None):
+    """Download pretrained glove embedding from https://huggingface.co/stanfordnlp/glove/tree/main.
+    Args:
+        embed_name (str): The name of the pretrained GloVe embedding. Defaults to None.
+        cache_dir (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
+    Returns:
+        str: Path to the file that contains the cached embeddings.
+    """
+    cache_dir = ".vector_cache" if cache_dir is None else cache_dir
+    cached_embed_file = f"{cache_dir}/{embed_name}.txt"
+    if os.path.isfile(cached_embed_file):
+        return cached_embed_file
+    os.makedirs(cache_dir, exist_ok=True)
+    remote_embed_file = re.sub(r"6B.*", "6B", embed_name) + ".zip"
+    url = f"https://huggingface.co/stanfordnlp/glove/resolve/main/{remote_embed_file}"
+    logging.info(f"Downloading pretrained embeddings from {url}.")
+    try:
+        zip_file, _ = urlretrieve(url, f"{cache_dir}/{remote_embed_file}")
+        with zipfile.ZipFile(zip_file, "r") as zf:
+            zf.extractall(cache_dir)
+    except Exception as e:
+        os.remove(zip_file)
+        raise e
+    logging.info(f"Downloaded pretrained embeddings {embed_name} to {cached_embed_file}.")
+    return cached_embed_file

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel/nn/model.py RENAMED Viewed

@@ -181,7 +181,7 @@ class Model(MultiLabelModel):
     Args:
         classes (list): List of class names.
-        word_dict (torchtext.vocab.Vocab): A vocab object which maps tokens to indices.
+        word_dict (dict): A dictionary for mapping tokens to indices.
         network (nn.Module): Network (i.e., CAML, KimCNN, or XMLCNN).
         loss_function (str, optional): Loss function name (i.e., binary_cross_entropy_with_logits,
             cross_entropy). Defaults to 'binary_cross_entropy_with_logits'.

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel/nn/nn_utils.py RENAMED Viewed

@@ -61,8 +61,7 @@ def init_model(
         model_name (str): Model to be used such as KimCNN.
         network_config (dict): Configuration for defining the network.
         classes (list): List of class names.
-        word_dict (torchtext.vocab.Vocab, optional): A vocab object for word tokenizer to
-            map tokens to indices. Defaults to None.
+        word_dict (dict, optional): A dictionary for mapping tokens to indices. Defaults to None.
         embed_vecs (torch.Tensor, optional): The pre-trained word vectors of shape
             (vocab_size, embed_dim). Defaults to None.
         init_weight (str): Weight initialization method from `torch.nn.init`.

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: libmultilabel
-Version: 0.7.4
+Version: 0.8.1
 Summary: A library for multi-class and multi-label classification
 Home-page: https://github.com/ASUS-AICS/LibMultiLabel
 Author: LibMultiLabel Team
@@ -8,7 +8,7 @@ License: MIT License
 Project-URL: Bug Tracker, https://github.com/ASUS-AICS/LibMultiLabel/issues
 Project-URL: Documentation, https://www.csie.ntu.edu.tw/~cjlin/libmultilabel
 Project-URL: Source Code, https://github.com/ASUS-AICS/LibMultiLabel/
-Classifier: Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.1
+Classifier: Environment :: GPU :: NVIDIA CUDA :: 12
 Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.8
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Education
@@ -16,23 +16,27 @@ Classifier: Intended Audience :: Science/Research
 Classifier: License :: OSI Approved :: MIT License
 Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.8
-Requires-Python: >=3.8
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.10
 License-File: LICENSE
 Requires-Dist: liblinear-multicore>=2.49.0
 Requires-Dist: numba
 Requires-Dist: pandas>1.3.0
 Requires-Dist: PyYAML
 Requires-Dist: scikit-learn
-Requires-Dist: scipy<1.14.0
+Requires-Dist: scipy
 Requires-Dist: tqdm
 Requires-Dist: psutil
+Requires-Dist: sparsekmeans
 Provides-Extra: nn
-Requires-Dist: lightning==2.0.9; extra == "nn"
+Requires-Dist: lightning; extra == "nn"
 Requires-Dist: nltk; extra == "nn"
-Requires-Dist: torch<=2.3; extra == "nn"
+Requires-Dist: torch; extra == "nn"
 Requires-Dist: torchmetrics==0.10.3; extra == "nn"
-Requires-Dist: torchtext; extra == "nn"
 Requires-Dist: transformers; extra == "nn"
+Dynamic: license-file
 See documentation here: https://www.csie.ntu.edu.tw/~cjlin/libmultilabel

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/libmultilabel.egg-info/requires.txt RENAMED Viewed

@@ -3,14 +3,14 @@ numba
 pandas>1.3.0
 PyYAML
 scikit-learn
-scipy<1.14.0
+scipy
 tqdm
 psutil
+sparsekmeans
 [nn]
-lightning==2.0.9
+lightning
 nltk
-torch<=2.3
+torch
 torchmetrics==0.10.3
-torchtext
 transformers

{libmultilabel-0.7.4 → libmultilabel-0.8.1}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = libmultilabel
-version = 0.7.4
+version = 0.8.1
 author = LibMultiLabel Team
 license = MIT License
 license_file = LICENSE
@@ -12,7 +12,7 @@ project_urls =
 	Documentation = https://www.csie.ntu.edu.tw/~cjlin/libmultilabel
 	Source Code = https://github.com/ASUS-AICS/LibMultiLabel/
 classifiers =
-	Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.1
+	Environment :: GPU :: NVIDIA CUDA :: 12
 	Environment :: GPU :: NVIDIA CUDA :: 11.8
 	Intended Audience :: Developers
 	Intended Audience :: Education
@@ -20,7 +20,10 @@ classifiers =
 	License :: OSI Approved :: MIT License
 	Operating System :: OS Independent
 	Programming Language :: Python :: 3
-	Programming Language :: Python :: 3.8
+	Programming Language :: Python :: 3.10
+	Programming Language :: Python :: 3.11
+	Programming Language :: Python :: 3.12
+	Programming Language :: Python :: 3.13
 [options]
 packages = find:
@@ -30,18 +33,18 @@ install_requires =
 	pandas>1.3.0
 	PyYAML
 	scikit-learn
-	scipy<1.14.0
+	scipy
 	tqdm
 	psutil
-python_requires = >=3.8
+	sparsekmeans
+python_requires = >=3.10
 [options.extras_require]
 nn =
-	lightning==2.0.9
+	lightning
 	nltk
-	torch<=2.3
+	torch
 	torchmetrics==0.10.3
-	torchtext
 	transformers
 [options.packages.find]