libmultilabel 0.7.2__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/PKG-INFO +2 -2
  2. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/common_utils.py +9 -8
  3. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/linear.py +29 -21
  4. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/metrics.py +6 -6
  5. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/tree.py +6 -6
  6. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/utils.py +2 -2
  7. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/attentionxml.py +23 -23
  8. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/data_utils.py +1 -1
  9. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/metrics.py +3 -3
  10. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/model.py +4 -4
  11. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/bert.py +1 -1
  12. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/bert_attention.py +1 -1
  13. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/caml.py +2 -2
  14. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/kim_cnn.py +1 -1
  15. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/labelwise_attention_networks.py +6 -6
  16. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/modules.py +10 -10
  17. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/xml_cnn.py +1 -1
  18. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/PKG-INFO +2 -2
  19. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/requires.txt +1 -1
  20. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/setup.cfg +2 -2
  21. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/LICENSE +0 -0
  22. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/README.md +0 -0
  23. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/__init__.py +0 -0
  24. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/__init__.py +0 -0
  25. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/data_utils.py +0 -0
  26. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/preprocessor.py +0 -0
  27. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/logging.py +0 -0
  28. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/__init__.py +0 -0
  29. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/__init__.py +0 -0
  30. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/nn_utils.py +0 -0
  31. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/SOURCES.txt +0 -0
  32. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/dependency_links.txt +0 -0
  33. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/top_level.txt +0 -0
  34. {libmultilabel-0.7.2 → libmultilabel-0.7.3}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: libmultilabel
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: A library for multi-class and multi-label classification
5
5
  Home-page: https://github.com/ASUS-AICS/LibMultiLabel
6
6
  Author: LibMultiLabel Team
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3
19
19
  Classifier: Programming Language :: Python :: 3.8
20
20
  Requires-Python: >=3.8
21
21
  License-File: LICENSE
22
- Requires-Dist: liblinear-multicore
22
+ Requires-Dist: liblinear-multicore>=2.49.0
23
23
  Requires-Dist: numba
24
24
  Requires-Dist: pandas>1.3.0
25
25
  Requires-Dist: PyYAML
@@ -34,10 +34,10 @@ class AttributeDict(dict):
34
34
  self._used.discard(key)
35
35
 
36
36
  def used_items(self) -> dict:
37
- """Returns the items that have been used at least once after being set.
37
+ """Return the items that have been used at least once after being set.
38
38
 
39
39
  Returns:
40
- dict: the used items.
40
+ dict: The used items.
41
41
  """
42
42
  return {k: self[k] for k in self._used}
43
43
 
@@ -46,10 +46,10 @@ def dump_log(log_path, metrics=None, split=None, config=None):
46
46
  """Write log including the used items of config and the evaluation scores.
47
47
 
48
48
  Args:
49
- log_path(str): path to log path
50
- metrics (dict): metric and scores in dictionary format, defaults to None
51
- split (str): val or test, defaults to None
52
- config (dict): config to save, defaults to None
49
+ log_path(str): Path to log path.
50
+ metrics (dict): Metric and scores in dictionary format, defaults to None.
51
+ split (str): One of `val` or `test`, defaults to None.
52
+ config (dict): Config to save, defaults to None.
53
53
  """
54
54
  os.makedirs(os.path.dirname(log_path), exist_ok=True)
55
55
  if os.path.isfile(log_path):
@@ -82,7 +82,8 @@ def argsort_top_k(vals, k, axis=-1):
82
82
  k: Consider only the top k elements for each query
83
83
  axis: Axis along which to sort. The default is -1 (the last axis).
84
84
 
85
- Returns: Array of indices that sort vals along the specified axis.
85
+ Returns:
86
+ Array of indices that sort vals along the specified axis.
86
87
  """
87
88
  unsorted_top_k_idx = np.argpartition(vals, -k, axis=axis)[:, -k:]
88
89
  unsorted_top_k_scores = np.take_along_axis(vals, unsorted_top_k_idx, axis=axis)
@@ -130,7 +131,7 @@ def is_multiclass_dataset(dataset, label="label"):
130
131
 
131
132
 
132
133
  def timer(func):
133
- """Log info-level wall time"""
134
+ """Log info-level wall time."""
134
135
 
135
136
  @wraps(func)
136
137
  def wrapper(*args, **kwargs):
@@ -5,7 +5,7 @@ import os
5
5
 
6
6
  import numpy as np
7
7
  import scipy.sparse as sparse
8
- from liblinear.liblinearutil import train
8
+ from liblinear.liblinearutil import train, problem, parameter, solver_names
9
9
  from tqdm import tqdm
10
10
 
11
11
  __all__ = [
@@ -39,7 +39,7 @@ class FlatModel:
39
39
  self.multiclass = multiclass
40
40
 
41
41
  def predict_values(self, x: sparse.csr_matrix) -> np.ndarray:
42
- """Calculates the decision values associated with x.
42
+ """Calculate the decision values associated with x.
43
43
 
44
44
  Args:
45
45
  x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
@@ -79,7 +79,7 @@ def train_1vsrest(
79
79
  options: str = "",
80
80
  verbose: bool = True,
81
81
  ) -> FlatModel:
82
- """Trains a linear model for multi-label data using a one-vs-rest strategy.
82
+ """Train a linear model for multi-label data using a one-vs-rest strategy.
83
83
 
84
84
  Args:
85
85
  y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
@@ -169,9 +169,9 @@ def train_thresholding(
169
169
  options: str = "",
170
170
  verbose: bool = True,
171
171
  ) -> FlatModel:
172
- """Trains a linear model for multi-label data using a one-vs-rest strategy
172
+ """Train a linear model for multi-label data using a one-vs-rest strategy
173
173
  and cross-validation to pick decision thresholds optimizing the sum of Macro-F1 and Micro-F1.
174
- Outperforms train_1vsrest in most aspects at the cost of higher time complexity
174
+ Outperform train_1vsrest in most aspects at the cost of higher time complexity
175
175
  due to an internal cross-validation.
176
176
 
177
177
  This method is the micromacro-freq approach from this CIKM 2023 paper:
@@ -235,7 +235,7 @@ def _micromacro_one_label(
235
235
  negatives, and the number of labels processed.
236
236
 
237
237
  Returns:
238
- tuple[np.ndarray, float, dict]: the weights, threshold, and the updated stats for calculating
238
+ tuple[np.ndarray, float, dict]: The weights, threshold, and the updated stats for calculating
239
239
  Micro-F1.
240
240
  """
241
241
 
@@ -319,7 +319,7 @@ def _micromacro_one_label(
319
319
 
320
320
 
321
321
  def _do_train(y: np.ndarray, x: sparse.csr_matrix, options: str) -> np.matrix:
322
- """Wrapper around liblinear.liblinearutil.train.
322
+ """Wrap around liblinear.liblinearutil.train.
323
323
  Forcibly suppresses all IO regardless of options.
324
324
 
325
325
  Args:
@@ -328,13 +328,17 @@ def _do_train(y: np.ndarray, x: sparse.csr_matrix, options: str) -> np.matrix:
328
328
  options (str): The option string passed to liblinear.
329
329
 
330
330
  Returns:
331
- np.matrix: the weights.
331
+ np.matrix: The weights.
332
332
  """
333
333
  if y.shape[0] == 0:
334
334
  return np.matrix(np.zeros((x.shape[1], 1)))
335
335
 
336
+ prob = problem(y, x)
337
+ param = parameter(options)
338
+ if param.solver_type in [solver_names.L2R_L1LOSS_SVC_DUAL, solver_names.L2R_L2LOSS_SVC_DUAL]:
339
+ param.w_recalc = True # only works for solving L1/L2-SVM dual
336
340
  with silent_stderr():
337
- model = train(y, x, options)
341
+ model = train(prob, param)
338
342
 
339
343
  w = np.ctypeslib.as_array(model.w, (x.shape[1], 1))
340
344
  w = np.asmatrix(w)
@@ -373,11 +377,11 @@ def _fmeasure(y_true: np.ndarray, y_pred: np.ndarray) -> float:
373
377
  """Calculate F1 score.
374
378
 
375
379
  Args:
376
- y_true (np.ndarray): array of +1/-1.
377
- y_pred (np.ndarray): array of +1/-1.
380
+ y_true (np.ndarray): Array of +1/-1.
381
+ y_pred (np.ndarray): Array of +1/-1.
378
382
 
379
383
  Returns:
380
- float: the F1 score.
384
+ float: The F1 score.
381
385
  """
382
386
  tp = np.sum(np.logical_and(y_true == 1, y_pred == 1))
383
387
  fn = np.sum(np.logical_and(y_true == 1, y_pred == -1))
@@ -396,10 +400,10 @@ def train_cost_sensitive(
396
400
  options: str = "",
397
401
  verbose: bool = True,
398
402
  ) -> FlatModel:
399
- """Trains a linear model for multi-label data using a one-vs-rest strategy
403
+ """Train a linear model for multi-label data using a one-vs-rest strategy
400
404
  and cross-validation to pick an optimal asymmetric misclassification cost
401
405
  for Macro-F1.
402
- Outperforms train_1vsrest in most aspects at the cost of higher
406
+ Outperform train_1vsrest in most aspects at the cost of higher
403
407
  time complexity.
404
408
  See user guide for more details.
405
409
 
@@ -413,7 +417,7 @@ def train_cost_sensitive(
413
417
  Returns:
414
418
  A model which can be used in predict_values.
415
419
  """
416
- # Follows the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
420
+ # Follow the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
417
421
  x, options, bias = _prepare_options(x, options)
418
422
 
419
423
  y = y.tocsc()
@@ -446,7 +450,7 @@ def _cost_sensitive_one_label(y: np.ndarray, x: sparse.csr_matrix, options: str)
446
450
  options (str): The option string passed to liblinear.
447
451
 
448
452
  Returns:
449
- np.ndarray: the weights.
453
+ np.ndarray: The weights.
450
454
  """
451
455
 
452
456
  l = y.shape[0]
@@ -500,10 +504,10 @@ def train_cost_sensitive_micro(
500
504
  options: str = "",
501
505
  verbose: bool = True,
502
506
  ) -> FlatModel:
503
- """Trains a linear model for multi-label data using a one-vs-rest strategy
507
+ """Train a linear model for multi-label data using a one-vs-rest strategy
504
508
  and cross-validation to pick an optimal asymmetric misclassification cost
505
509
  for Micro-F1.
506
- Outperforms train_1vsrest in most aspects at the cost of higher
510
+ Outperform train_1vsrest in most aspects at the cost of higher
507
511
  time complexity.
508
512
  See user guide for more details.
509
513
 
@@ -571,7 +575,7 @@ def train_binary_and_multiclass(
571
575
  options: str = "",
572
576
  verbose: bool = True,
573
577
  ) -> FlatModel:
574
- """Trains a linear model for binary and multi-class data.
578
+ """Train a linear model for binary and multi-class data.
575
579
 
576
580
  Args:
577
581
  y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
@@ -592,8 +596,12 @@ def train_binary_and_multiclass(
592
596
  Invalid dataset. Only multi-class dataset is allowed."""
593
597
  y = np.squeeze(nonzero_label_ids)
594
598
 
599
+ prob = problem(y, x)
600
+ param = parameter(options)
601
+ if param.solver_type in [solver_names.L2R_L1LOSS_SVC_DUAL, solver_names.L2R_L2LOSS_SVC_DUAL]:
602
+ param.w_recalc = True
595
603
  with silent_stderr():
596
- model = train(y, x, options)
604
+ model = train(prob, param)
597
605
 
598
606
  # Labels appeared in training set; length may be smaller than num_labels
599
607
  train_labels = np.array(model.get_labels(), dtype="int")
@@ -622,7 +630,7 @@ def train_binary_and_multiclass(
622
630
 
623
631
 
624
632
  def predict_values(model, x: sparse.csr_matrix) -> np.ndarray:
625
- """Calculates the decision values associated with x, equivalent to model.predict_values(x).
633
+ """Calculate the decision values associated with x, equivalent to model.predict_values(x).
626
634
 
627
635
  Args:
628
636
  model: A model returned from a training function.
@@ -8,7 +8,7 @@ __all__ = ["get_metrics", "compute_metrics", "tabulate_metrics", "MetricCollecti
8
8
 
9
9
 
10
10
  def _argsort_top_k(preds: np.ndarray, top_k: int) -> np.ndarray:
11
- """Sorts the top k indices in O(n + k log k) time.
11
+ """Sort the top k indices in O(n + k log k) time.
12
12
  The sorting order is ascending to be consistent with np.sort.
13
13
  This means the last element is the largest, the first element is the kth largest.
14
14
  """
@@ -18,7 +18,7 @@ def _argsort_top_k(preds: np.ndarray, top_k: int) -> np.ndarray:
18
18
 
19
19
 
20
20
  def _dcg_argsort(argsort_preds: np.ndarray, target: np.ndarray, top_k: int) -> np.ndarray:
21
- """Computes DCG@k with a sorted preds array and a target array."""
21
+ """Compute DCG@k with a sorted preds array and a target array."""
22
22
  top_k_idx = argsort_preds[:, -top_k:][:, ::-1]
23
23
  gains = np.take_along_axis(target, top_k_idx, axis=-1)
24
24
  discount = 1 / (np.log2(np.arange(top_k) + 2))
@@ -28,7 +28,7 @@ def _dcg_argsort(argsort_preds: np.ndarray, target: np.ndarray, top_k: int) -> n
28
28
 
29
29
 
30
30
  def _idcg(target: np.ndarray, top_k: int) -> np.ndarray:
31
- """Computes IDCG@k for a 0/1 target array. A 0/1 target is a special case that
31
+ """Compute IDCG@k for a 0/1 target array. A 0/1 target is a special case that
32
32
  doesn't require sorting. If IDCG is computed with DCG,
33
33
  then target will need to be sorted, which incurs a large overhead.
34
34
  """
@@ -247,7 +247,7 @@ class MetricCollection(dict):
247
247
  self.max_k = max(getattr(metric, "top_k", 0) for metric in self.metrics.values())
248
248
 
249
249
  def update(self, preds: np.ndarray, target: np.ndarray):
250
- """Adds a batch of decision values and labels.
250
+ """Add a batch of decision values and labels.
251
251
 
252
252
  Args:
253
253
  preds (np.ndarray): A matrix of decision values with dimensions number of instances * number of classes.
@@ -268,7 +268,7 @@ class MetricCollection(dict):
268
268
  metric.update(preds, target)
269
269
 
270
270
  def compute(self) -> dict[str, float]:
271
- """Computes the metrics from the accumulated batches of decision values and labels.
271
+ """Compute the metrics from the accumulated batches of decision values and labels.
272
272
 
273
273
  Returns:
274
274
  dict[str, float]: A dictionary of metric values.
@@ -279,7 +279,7 @@ class MetricCollection(dict):
279
279
  return ret
280
280
 
281
281
  def reset(self):
282
- """Clears the accumulated batches of decision values and labels."""
282
+ """Clear the accumulated batches of decision values and labels."""
283
283
  for metric in self.metrics.values():
284
284
  metric.reset()
285
285
 
@@ -59,7 +59,7 @@ class TreeModel:
59
59
  x: sparse.csr_matrix,
60
60
  beam_width: int = 10,
61
61
  ) -> np.ndarray:
62
- """Calculates the probability estimates associated with x.
62
+ """Calculate the probability estimates associated with x.
63
63
 
64
64
  Args:
65
65
  x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
@@ -118,7 +118,7 @@ def train_tree(
118
118
  dmax=10,
119
119
  verbose: bool = True,
120
120
  ) -> TreeModel:
121
- """Trains a linear model for multi-label data using a divide-and-conquer strategy.
121
+ """Train a linear model for multi-label data using a divide-and-conquer strategy.
122
122
  The algorithm used is based on https://github.com/xmc-aalto/bonsai.
123
123
 
124
124
  Args:
@@ -178,7 +178,7 @@ def train_tree(
178
178
 
179
179
 
180
180
  def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
181
- """Builds the tree recursively by kmeans clustering.
181
+ """Build the tree recursively by kmeans clustering.
182
182
 
183
183
  Args:
184
184
  label_representation (sparse.csr_matrix): A matrix with dimensions number of classes under this node * number of features.
@@ -235,7 +235,7 @@ def get_estimated_model_size(root):
235
235
 
236
236
 
237
237
  def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node: Node):
238
- """If node is internal, computes the metalabels representing each child and trains
238
+ """If node is internal, compute the metalabels representing each child and train
239
239
  on the metalabels. Otherwise, train on y.
240
240
 
241
241
  Args:
@@ -258,7 +258,7 @@ def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node:
258
258
 
259
259
 
260
260
  def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
261
- """Flattens tree weight matrices into a single weight matrix. The flattened weight
261
+ """Flatten tree weight matrices into a single weight matrix. The flattened weight
262
262
  matrix is used to predict all possible values, which is cached for beam search.
263
263
  This pessimizes complexity but is faster in practice.
264
264
  Consecutive values of the returned map denotes the start and end indices of the
@@ -289,7 +289,7 @@ def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
289
289
 
290
290
  model = linear.FlatModel(
291
291
  name="flattened-tree",
292
- weights=sparse.hstack(weights, "csr"),
292
+ weights=sparse.hstack(weights, "csc"),
293
293
  bias=bias,
294
294
  thresholds=0,
295
295
  multiclass=False,
@@ -31,7 +31,7 @@ LINEAR_TECHNIQUES = {
31
31
 
32
32
 
33
33
  def save_pipeline(checkpoint_dir: str, preprocessor: Preprocessor, model):
34
- """Saves preprocessor and model to checkpoint_dir/linear_pipline.pickle.
34
+ """Save preprocessor and model to checkpoint_dir/linear_pipline.pickle.
35
35
 
36
36
  Args:
37
37
  checkpoint_dir (str): The directory to save to.
@@ -53,7 +53,7 @@ def save_pipeline(checkpoint_dir: str, preprocessor: Preprocessor, model):
53
53
 
54
54
 
55
55
  def load_pipeline(checkpoint_path: str) -> tuple[Preprocessor, Any]:
56
- """Loads preprocessor and model from checkpoint_path.
56
+ """Load preprocessor and model from checkpoint_path.
57
57
 
58
58
  Args:
59
59
  checkpoint_path (str): The path to a previously saved pipeline.
@@ -47,7 +47,7 @@ class PLTTrainer:
47
47
  raise ValueError(
48
48
  "The label space of multi-class datasets is usually not large, so PLT training is unnecessary."
49
49
  "Please consider other methods."
50
- "If you have a multi-class set with numerous labels, please let us know"
50
+ "If you have a multi-class set with numerous labels, please let us know."
51
51
  )
52
52
 
53
53
  # cluster
@@ -137,11 +137,11 @@ class PLTTrainer:
137
137
  Given the ground-truth labels, [0, 1, 4], the resulting clusters are [0, 2].
138
138
 
139
139
  Args:
140
- cluster_mapping (np.ndarray): mapping from clusters generated by build_label_tree to labels .
141
- *labels (csr_matrix): labels in CSR sparse format.
140
+ cluster_mapping (np.ndarray): Mapping from clusters generated by build_label_tree to labels.
141
+ *labels (csr_matrix): Labels in CSR sparse format.
142
142
 
143
143
  Returns:
144
- Generator[csr_matrix]: resulting clusters converted from labels in CSR sparse format
144
+ Generator[csr_matrix]: Resulting clusters converted from labels in CSR sparse format.
145
145
  """
146
146
  mapping = np.empty(self.num_classes, dtype=np.uint32)
147
147
  for idx, clusters in enumerate(cluster_mapping):
@@ -169,12 +169,12 @@ class PLTTrainer:
169
169
  Also notice that this function deals with DENSE matrix.
170
170
 
171
171
  Args:
172
- cluster_mapping (np.ndarray): mapping from clusters generated by build_label_tree to labels .
173
- clusters (np.ndarray): predicted clusters from model 0.
174
- cluster_scores (Optional: np.ndarray): predicted scores of each cluster from model 0.
172
+ cluster_mapping (np.ndarray): Mapping from clusters generated by build_label_tree to labels.
173
+ clusters (np.ndarray): Predicted clusters from model 0.
174
+ cluster_scores (Optional: np.ndarray): Predicted scores of each cluster from model 0.
175
175
 
176
176
  Returns:
177
- Generator[np.ndarray]: resulting labels expanded from clusters
177
+ Generator[np.ndarray]: Resulting labels expanded from clusters.
178
178
  """
179
179
 
180
180
  labels_selected = []
@@ -192,10 +192,10 @@ class PLTTrainer:
192
192
  return labels_selected
193
193
 
194
194
  def fit(self, datasets):
195
- """fit model to the training dataset
195
+ """Fit model to the training dataset.
196
196
 
197
197
  Args:
198
- datasets: dict containing training, validation, and/or test datasets
198
+ datasets: Dict containing training, validation, and/or test datasets.
199
199
  """
200
200
  if self.get_best_model_path(level=1).exists():
201
201
  return
@@ -596,8 +596,8 @@ class PlainDataset(Dataset):
596
596
  this while generating clusters. There is no need to do multilabel binarization again.
597
597
 
598
598
  Args:
599
- x (list | ndarray | Tensor): texts.
600
- y (Optional: csr_matrix | ndarray | Tensor): labels.
599
+ x (list | ndarray | Tensor): Texts.
600
+ y (Optional: csr_matrix | ndarray | Tensor): Labels.
601
601
  """
602
602
 
603
603
  def __init__(self, x, y=None):
@@ -633,12 +633,12 @@ class PLTDataset(PlainDataset):
633
633
  """Dataset for model_1 of AttentionXML.
634
634
 
635
635
  Args:
636
- x: texts.
637
- y: labels.
636
+ x: Texts.
637
+ y: Labels.
638
638
  num_classes: number of classes.
639
- num_labels_selected: the number of selected labels.
640
- labels_selected: sampled predicted labels from model_0. Shape: (len(x), predict_top_k).
641
- label_scores: scores for each label. Shape: (len(x), predict_top_k).
639
+ num_labels_selected: The number of selected labels.
640
+ labels_selected: Sampled predicted labels from model_0. Shape: (len(x), predict_top_k).
641
+ label_scores: Scores for each label. Shape: (len(x), predict_top_k).
642
642
  """
643
643
 
644
644
  def __init__(
@@ -709,10 +709,10 @@ def build_label_tree(sparse_x: csr_matrix, sparse_y: csr_matrix, cluster_size: i
709
709
  ((0, 2), (1, 3), (4, 5)).
710
710
 
711
711
  Args:
712
- sparse_x: features extracted from texts in CSR sparse format
713
- sparse_y: binarized labels in CSR sparse format
714
- cluster_size: the maximum number of labels within each cluster
715
- output_dir: directory to store the clustering file
712
+ sparse_x: Features extracted from texts in CSR sparse format.
713
+ sparse_y: Binarized labels in CSR sparse format.
714
+ cluster_size: The maximum number of labels within each cluster.
715
+ output_dir: Directory to store the clustering file.
716
716
  """
717
717
  # skip constructing label tree if the output file already exists
718
718
  output_dir = output_dir if isinstance(output_dir, Path) else Path(output_dir)
@@ -755,8 +755,8 @@ def _split_cluster(cluster: ndarray, label_repr: csr_matrix) -> tuple[ndarray, n
755
755
  2. the end-of-loop criterion is the difference between the new and old average in-cluster distances to centroids.
756
756
 
757
757
  Args:
758
- cluster: a subset of labels
759
- label_repr: the normalized representations of the relationship between labels and texts of the given cluster
758
+ cluster: A subset of labels.
759
+ label_repr: The normalized representations of the relationship between labels and texts of the given cluster.
760
760
  """
761
761
  # Randomly choose two points as initial centroids and obtain their label representations
762
762
  centroids = label_repr[np.random.choice(len(cluster), size=2, replace=False)].toarray()
@@ -355,7 +355,7 @@ def get_embedding_weights_from_file(word_dict, embed_file, silent=False, cache=N
355
355
  cache (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
356
356
 
357
357
  Returns:
358
- torch.Tensor: Embedding weights (vocab_size, embed_size)
358
+ torch.Tensor: Embedding weights (vocab_size, embed_size).
359
359
  """
360
360
  # Load pretrained word embedding
361
361
  load_embedding_from_file = embed_file not in pretrained_aliases
@@ -13,7 +13,7 @@ class _PrecisonRecallWrapperMetric(Metric):
13
13
  """Encapsulate common functions of RPrecision, PrecisionAtK, and RecallAtK.
14
14
 
15
15
  Args:
16
- top_k (int): the top k relevant labels to evaluate.
16
+ top_k (int): The top k relevant labels to evaluate.
17
17
  """
18
18
 
19
19
  # If the metric state of one batch is independent of the state of other batches,
@@ -133,7 +133,7 @@ class NDCGAtK(Metric):
133
133
  As a result, we implement our own batch-wise NDCG.
134
134
 
135
135
  Args:
136
- top_k (int): the top k relevant labels to evaluate.
136
+ top_k (int): The top k relevant labels to evaluate.
137
137
  """
138
138
 
139
139
  # If the metric state of one batch is independent of the state of other batches,
@@ -170,7 +170,7 @@ class NDCGAtK(Metric):
170
170
  return (gains * discount).sum(dim=1)
171
171
 
172
172
  def _idcg(self, target, discount):
173
- """Computes IDCG@k for a 0/1 target tensor.
173
+ """Compute IDCG@k for a 0/1 target tensor.
174
174
  A 0/1 target is a special case that doesn't require sorting.
175
175
  """
176
176
  cum_discount = discount.cumsum(dim=0)
@@ -11,7 +11,7 @@ from ..nn.metrics import get_metrics, tabulate_metrics
11
11
 
12
12
 
13
13
  class MultiLabelModel(L.LightningModule):
14
- """Abstract class handling Pytorch Lightning training flow
14
+ """Abstract class handling Pytorch Lightning training flow.
15
15
 
16
16
  Args:
17
17
  num_classes (int): Total number of classes.
@@ -70,7 +70,7 @@ class MultiLabelModel(L.LightningModule):
70
70
 
71
71
  @abstractmethod
72
72
  def shared_step(self, batch):
73
- """Return loss and predicted logits"""
73
+ """Return loss and predicted logits."""
74
74
  return NotImplemented
75
75
 
76
76
  def configure_optimizers(self):
@@ -164,11 +164,11 @@ class MultiLabelModel(L.LightningModule):
164
164
  return {"top_k_pred": top_k_idx, "top_k_pred_scores": top_k_scores}
165
165
 
166
166
  def forward(self, batch):
167
- """compute predicted logits"""
167
+ """Compute predicted logits."""
168
168
  return self.network(batch)["logits"]
169
169
 
170
170
  def print(self, *args, **kwargs):
171
- """Prints only from process 0 and not in silent mode. Use this in any
171
+ """Print only from process 0 and not in silent mode. Use this in any
172
172
  distributed mode to log only once."""
173
173
 
174
174
  if not self.silent:
@@ -3,7 +3,7 @@ from transformers import AutoModelForSequenceClassification
3
3
 
4
4
 
5
5
  class BERT(nn.Module):
6
- """BERT
6
+ """BERT.
7
7
 
8
8
  Args:
9
9
  num_classes (int): Total number of classes.
@@ -6,7 +6,7 @@ from .modules import LabelwiseAttention, LabelwiseLinearOutput, LabelwiseMultiHe
6
6
 
7
7
 
8
8
  class BERTAttention(nn.Module):
9
- """BERT + Label-wise Document Attention or Multi-Head Attention
9
+ """BERT + Label-wise Document Attention or Multi-Head Attention.
10
10
 
11
11
  Args:
12
12
  num_classes (int): Total number of classes.
@@ -7,8 +7,8 @@ from torch.nn.init import xavier_uniform_
7
7
 
8
8
  class CAML(nn.Module):
9
9
  """CAML (Convolutional Attention for Multi-Label classification)
10
- Follows the work of Mullenbach et al. [https://aclanthology.org/N18-1100.pdf]
11
- This class is for reproducing the results in the paper.
10
+ Following Mullenbach et al. [https://aclanthology.org/N18-1100.pdf],
11
+ this class is for reproducing the results in the paper.
12
12
  Use CNNLWAN instead for better modularization.
13
13
 
14
14
  Args:
@@ -5,7 +5,7 @@ from .modules import Embedding, CNNEncoder
5
5
 
6
6
 
7
7
  class KimCNN(nn.Module):
8
- """KimCNN
8
+ """KimCNN.
9
9
 
10
10
  Args:
11
11
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -16,7 +16,7 @@ from .modules import (
16
16
 
17
17
 
18
18
  class LabelwiseAttentionNetwork(ABC, nn.Module):
19
- """Base class for Labelwise Attention Network
19
+ """Base class for Labelwise Attention Network.
20
20
 
21
21
  Args:
22
22
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -48,7 +48,7 @@ class LabelwiseAttentionNetwork(ABC, nn.Module):
48
48
 
49
49
 
50
50
  class RNNLWAN(LabelwiseAttentionNetwork):
51
- """Base class for RNN Labelwise Attention Network"""
51
+ """Base class for RNN Labelwise Attention Network."""
52
52
 
53
53
  def forward(self, input):
54
54
  # (batch_size, sequence_length, embed_dim)
@@ -61,7 +61,7 @@ class RNNLWAN(LabelwiseAttentionNetwork):
61
61
 
62
62
 
63
63
  class BiGRULWAN(RNNLWAN):
64
- """BiGRU Labelwise Attention Network
64
+ """BiGRU Labelwise Attention Network.
65
65
 
66
66
  Args:
67
67
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -111,7 +111,7 @@ class BiGRULWAN(RNNLWAN):
111
111
 
112
112
 
113
113
  class BiLSTMLWAN(RNNLWAN):
114
- """BiLSTM Labelwise Attention Network
114
+ """BiLSTM Labelwise Attention Network.
115
115
 
116
116
  Args:
117
117
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -155,7 +155,7 @@ class BiLSTMLWAN(RNNLWAN):
155
155
 
156
156
 
157
157
  class BiLSTMLWMHAN(LabelwiseAttentionNetwork):
158
- """BiLSTM Labelwise Multihead Attention Network
158
+ """BiLSTM Labelwise Multihead Attention Network.
159
159
 
160
160
  Args:
161
161
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -217,7 +217,7 @@ class BiLSTMLWMHAN(LabelwiseAttentionNetwork):
217
217
 
218
218
 
219
219
  class CNNLWAN(LabelwiseAttentionNetwork):
220
- """CNN Labelwise Attention Network
220
+ """CNN Labelwise Attention Network.
221
221
 
222
222
  Args:
223
223
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -7,7 +7,7 @@ from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
7
7
 
8
8
 
9
9
  class Embedding(nn.Module):
10
- """Embedding layer with dropout
10
+ """Embedding layer with dropout.
11
11
 
12
12
  Args:
13
13
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -26,7 +26,7 @@ class Embedding(nn.Module):
26
26
 
27
27
 
28
28
  class RNNEncoder(ABC, nn.Module):
29
- """Base class of RNN encoder with dropout
29
+ """Base class of RNN encoder with dropout.
30
30
 
31
31
  Args:
32
32
  input_size (int): The number of expected features in the input.
@@ -55,7 +55,7 @@ class RNNEncoder(ABC, nn.Module):
55
55
 
56
56
 
57
57
  class GRUEncoder(RNNEncoder):
58
- """Bi-directional GRU encoder with dropout
58
+ """Bi-directional GRU encoder with dropout.
59
59
 
60
60
  Args:
61
61
  input_size (int): The number of expected features in the input.
@@ -73,7 +73,7 @@ class GRUEncoder(RNNEncoder):
73
73
 
74
74
 
75
75
  class LSTMEncoder(RNNEncoder):
76
- """Bi-directional LSTM encoder with dropout
76
+ """Bi-directional LSTM encoder with dropout.
77
77
 
78
78
  Args:
79
79
  input_size (int): The number of expected features in the input.
@@ -91,7 +91,7 @@ class LSTMEncoder(RNNEncoder):
91
91
 
92
92
 
93
93
  class CNNEncoder(nn.Module):
94
- """Multi-filter-size CNN encoder for text classification with max-pooling
94
+ """Multi-filter-size CNN encoder for text classification with max-pooling.
95
95
 
96
96
  Args:
97
97
  input_size (int): The number of expected features in the input.
@@ -103,7 +103,7 @@ class CNNEncoder(nn.Module):
103
103
  If num_pool = 0, do nothing.
104
104
  If num_pool = 1, do typical max-pooling.
105
105
  If num_pool > 1, do adaptive max-pooling.
106
- channel_last (bool): Whether to transpose the dimension from (batch_size, num_channel, length) to (batch_size, length, num_channel)
106
+ channel_last (bool): Whether to transpose the dimension from (batch_size, num_channel, length) to (batch_size, length, num_channel).
107
107
  """
108
108
 
109
109
  def __init__(
@@ -149,8 +149,8 @@ class CNNEncoder(nn.Module):
149
149
 
150
150
 
151
151
  class LabelwiseAttention(nn.Module):
152
- """Applies attention technique to summarize the sequence for each label
153
- See `Explainable Prediction of Medical Codes from Clinical Text <https://aclanthology.org/N18-1100.pdf>`_
152
+ """Apply attention technique to summarize the sequence for each label.
153
+ See `Explainable Prediction of Medical Codes from Clinical Text <https://aclanthology.org/N18-1100.pdf>`_.
154
154
 
155
155
  Args:
156
156
  input_size (int): The number of expected features in the input.
@@ -171,7 +171,7 @@ class LabelwiseAttention(nn.Module):
171
171
 
172
172
 
173
173
  class LabelwiseMultiHeadAttention(nn.Module):
174
- """Labelwise multi-head attention
174
+ """Labelwise multi-head attention.
175
175
 
176
176
  Args:
177
177
  input_size (int): The number of expected features in the input.
@@ -197,7 +197,7 @@ class LabelwiseMultiHeadAttention(nn.Module):
197
197
 
198
198
 
199
199
  class LabelwiseLinearOutput(nn.Module):
200
- """Applies a linear transformation to the incoming data for each label
200
+ """Apply a linear transformation to the incoming data for each label.
201
201
 
202
202
  Args:
203
203
  input_size (int): The number of expected features in the input.
@@ -6,7 +6,7 @@ from .modules import Embedding, CNNEncoder
6
6
 
7
7
 
8
8
  class XMLCNN(nn.Module):
9
- """XML-CNN
9
+ """XML-CNN.
10
10
 
11
11
  Args:
12
12
  embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: libmultilabel
3
- Version: 0.7.2
3
+ Version: 0.7.3
4
4
  Summary: A library for multi-class and multi-label classification
5
5
  Home-page: https://github.com/ASUS-AICS/LibMultiLabel
6
6
  Author: LibMultiLabel Team
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3
19
19
  Classifier: Programming Language :: Python :: 3.8
20
20
  Requires-Python: >=3.8
21
21
  License-File: LICENSE
22
- Requires-Dist: liblinear-multicore
22
+ Requires-Dist: liblinear-multicore>=2.49.0
23
23
  Requires-Dist: numba
24
24
  Requires-Dist: pandas>1.3.0
25
25
  Requires-Dist: PyYAML
@@ -1,4 +1,4 @@
1
- liblinear-multicore
1
+ liblinear-multicore>=2.49.0
2
2
  numba
3
3
  pandas>1.3.0
4
4
  PyYAML
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = libmultilabel
3
- version = 0.7.2
3
+ version = 0.7.3
4
4
  author = LibMultiLabel Team
5
5
  license = MIT License
6
6
  license_file = LICENSE
@@ -25,7 +25,7 @@ classifiers =
25
25
  [options]
26
26
  packages = find:
27
27
  install_requires =
28
- liblinear-multicore
28
+ liblinear-multicore>=2.49.0
29
29
  numba
30
30
  pandas>1.3.0
31
31
  PyYAML
File without changes
File without changes