libmultilabel 0.7.2__tar.gz → 0.7.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/PKG-INFO +2 -2
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/common_utils.py +9 -8
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/linear.py +29 -21
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/metrics.py +6 -6
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/tree.py +6 -6
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/utils.py +2 -2
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/attentionxml.py +23 -23
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/data_utils.py +1 -1
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/metrics.py +3 -3
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/model.py +4 -4
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/bert.py +1 -1
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/bert_attention.py +1 -1
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/caml.py +2 -2
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/kim_cnn.py +1 -1
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/labelwise_attention_networks.py +6 -6
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/modules.py +10 -10
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/xml_cnn.py +1 -1
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/PKG-INFO +2 -2
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/requires.txt +1 -1
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/setup.cfg +2 -2
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/LICENSE +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/README.md +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/__init__.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/__init__.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/data_utils.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/linear/preprocessor.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/logging.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/__init__.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/networks/__init__.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel/nn/nn_utils.py +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/SOURCES.txt +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/dependency_links.txt +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/libmultilabel.egg-info/top_level.txt +0 -0
- {libmultilabel-0.7.2 → libmultilabel-0.7.3}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: libmultilabel
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: A library for multi-class and multi-label classification
|
|
5
5
|
Home-page: https://github.com/ASUS-AICS/LibMultiLabel
|
|
6
6
|
Author: LibMultiLabel Team
|
|
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.8
|
|
20
20
|
Requires-Python: >=3.8
|
|
21
21
|
License-File: LICENSE
|
|
22
|
-
Requires-Dist: liblinear-multicore
|
|
22
|
+
Requires-Dist: liblinear-multicore>=2.49.0
|
|
23
23
|
Requires-Dist: numba
|
|
24
24
|
Requires-Dist: pandas>1.3.0
|
|
25
25
|
Requires-Dist: PyYAML
|
|
@@ -34,10 +34,10 @@ class AttributeDict(dict):
|
|
|
34
34
|
self._used.discard(key)
|
|
35
35
|
|
|
36
36
|
def used_items(self) -> dict:
|
|
37
|
-
"""
|
|
37
|
+
"""Return the items that have been used at least once after being set.
|
|
38
38
|
|
|
39
39
|
Returns:
|
|
40
|
-
dict:
|
|
40
|
+
dict: The used items.
|
|
41
41
|
"""
|
|
42
42
|
return {k: self[k] for k in self._used}
|
|
43
43
|
|
|
@@ -46,10 +46,10 @@ def dump_log(log_path, metrics=None, split=None, config=None):
|
|
|
46
46
|
"""Write log including the used items of config and the evaluation scores.
|
|
47
47
|
|
|
48
48
|
Args:
|
|
49
|
-
log_path(str):
|
|
50
|
-
metrics (dict):
|
|
51
|
-
split (str): val or test
|
|
52
|
-
config (dict):
|
|
49
|
+
log_path(str): Path to log path.
|
|
50
|
+
metrics (dict): Metric and scores in dictionary format, defaults to None.
|
|
51
|
+
split (str): One of `val` or `test`, defaults to None.
|
|
52
|
+
config (dict): Config to save, defaults to None.
|
|
53
53
|
"""
|
|
54
54
|
os.makedirs(os.path.dirname(log_path), exist_ok=True)
|
|
55
55
|
if os.path.isfile(log_path):
|
|
@@ -82,7 +82,8 @@ def argsort_top_k(vals, k, axis=-1):
|
|
|
82
82
|
k: Consider only the top k elements for each query
|
|
83
83
|
axis: Axis along which to sort. The default is -1 (the last axis).
|
|
84
84
|
|
|
85
|
-
Returns:
|
|
85
|
+
Returns:
|
|
86
|
+
Array of indices that sort vals along the specified axis.
|
|
86
87
|
"""
|
|
87
88
|
unsorted_top_k_idx = np.argpartition(vals, -k, axis=axis)[:, -k:]
|
|
88
89
|
unsorted_top_k_scores = np.take_along_axis(vals, unsorted_top_k_idx, axis=axis)
|
|
@@ -130,7 +131,7 @@ def is_multiclass_dataset(dataset, label="label"):
|
|
|
130
131
|
|
|
131
132
|
|
|
132
133
|
def timer(func):
|
|
133
|
-
"""Log info-level wall time"""
|
|
134
|
+
"""Log info-level wall time."""
|
|
134
135
|
|
|
135
136
|
@wraps(func)
|
|
136
137
|
def wrapper(*args, **kwargs):
|
|
@@ -5,7 +5,7 @@ import os
|
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
import scipy.sparse as sparse
|
|
8
|
-
from liblinear.liblinearutil import train
|
|
8
|
+
from liblinear.liblinearutil import train, problem, parameter, solver_names
|
|
9
9
|
from tqdm import tqdm
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
@@ -39,7 +39,7 @@ class FlatModel:
|
|
|
39
39
|
self.multiclass = multiclass
|
|
40
40
|
|
|
41
41
|
def predict_values(self, x: sparse.csr_matrix) -> np.ndarray:
|
|
42
|
-
"""
|
|
42
|
+
"""Calculate the decision values associated with x.
|
|
43
43
|
|
|
44
44
|
Args:
|
|
45
45
|
x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
|
|
@@ -79,7 +79,7 @@ def train_1vsrest(
|
|
|
79
79
|
options: str = "",
|
|
80
80
|
verbose: bool = True,
|
|
81
81
|
) -> FlatModel:
|
|
82
|
-
"""
|
|
82
|
+
"""Train a linear model for multi-label data using a one-vs-rest strategy.
|
|
83
83
|
|
|
84
84
|
Args:
|
|
85
85
|
y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
|
|
@@ -169,9 +169,9 @@ def train_thresholding(
|
|
|
169
169
|
options: str = "",
|
|
170
170
|
verbose: bool = True,
|
|
171
171
|
) -> FlatModel:
|
|
172
|
-
"""
|
|
172
|
+
"""Train a linear model for multi-label data using a one-vs-rest strategy
|
|
173
173
|
and cross-validation to pick decision thresholds optimizing the sum of Macro-F1 and Micro-F1.
|
|
174
|
-
|
|
174
|
+
Outperform train_1vsrest in most aspects at the cost of higher time complexity
|
|
175
175
|
due to an internal cross-validation.
|
|
176
176
|
|
|
177
177
|
This method is the micromacro-freq approach from this CIKM 2023 paper:
|
|
@@ -235,7 +235,7 @@ def _micromacro_one_label(
|
|
|
235
235
|
negatives, and the number of labels processed.
|
|
236
236
|
|
|
237
237
|
Returns:
|
|
238
|
-
tuple[np.ndarray, float, dict]:
|
|
238
|
+
tuple[np.ndarray, float, dict]: The weights, threshold, and the updated stats for calculating
|
|
239
239
|
Micro-F1.
|
|
240
240
|
"""
|
|
241
241
|
|
|
@@ -319,7 +319,7 @@ def _micromacro_one_label(
|
|
|
319
319
|
|
|
320
320
|
|
|
321
321
|
def _do_train(y: np.ndarray, x: sparse.csr_matrix, options: str) -> np.matrix:
|
|
322
|
-
"""
|
|
322
|
+
"""Wrap around liblinear.liblinearutil.train.
|
|
323
323
|
Forcibly suppresses all IO regardless of options.
|
|
324
324
|
|
|
325
325
|
Args:
|
|
@@ -328,13 +328,17 @@ def _do_train(y: np.ndarray, x: sparse.csr_matrix, options: str) -> np.matrix:
|
|
|
328
328
|
options (str): The option string passed to liblinear.
|
|
329
329
|
|
|
330
330
|
Returns:
|
|
331
|
-
np.matrix:
|
|
331
|
+
np.matrix: The weights.
|
|
332
332
|
"""
|
|
333
333
|
if y.shape[0] == 0:
|
|
334
334
|
return np.matrix(np.zeros((x.shape[1], 1)))
|
|
335
335
|
|
|
336
|
+
prob = problem(y, x)
|
|
337
|
+
param = parameter(options)
|
|
338
|
+
if param.solver_type in [solver_names.L2R_L1LOSS_SVC_DUAL, solver_names.L2R_L2LOSS_SVC_DUAL]:
|
|
339
|
+
param.w_recalc = True # only works for solving L1/L2-SVM dual
|
|
336
340
|
with silent_stderr():
|
|
337
|
-
model = train(
|
|
341
|
+
model = train(prob, param)
|
|
338
342
|
|
|
339
343
|
w = np.ctypeslib.as_array(model.w, (x.shape[1], 1))
|
|
340
344
|
w = np.asmatrix(w)
|
|
@@ -373,11 +377,11 @@ def _fmeasure(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
|
|
373
377
|
"""Calculate F1 score.
|
|
374
378
|
|
|
375
379
|
Args:
|
|
376
|
-
y_true (np.ndarray):
|
|
377
|
-
y_pred (np.ndarray):
|
|
380
|
+
y_true (np.ndarray): Array of +1/-1.
|
|
381
|
+
y_pred (np.ndarray): Array of +1/-1.
|
|
378
382
|
|
|
379
383
|
Returns:
|
|
380
|
-
float:
|
|
384
|
+
float: The F1 score.
|
|
381
385
|
"""
|
|
382
386
|
tp = np.sum(np.logical_and(y_true == 1, y_pred == 1))
|
|
383
387
|
fn = np.sum(np.logical_and(y_true == 1, y_pred == -1))
|
|
@@ -396,10 +400,10 @@ def train_cost_sensitive(
|
|
|
396
400
|
options: str = "",
|
|
397
401
|
verbose: bool = True,
|
|
398
402
|
) -> FlatModel:
|
|
399
|
-
"""
|
|
403
|
+
"""Train a linear model for multi-label data using a one-vs-rest strategy
|
|
400
404
|
and cross-validation to pick an optimal asymmetric misclassification cost
|
|
401
405
|
for Macro-F1.
|
|
402
|
-
|
|
406
|
+
Outperform train_1vsrest in most aspects at the cost of higher
|
|
403
407
|
time complexity.
|
|
404
408
|
See user guide for more details.
|
|
405
409
|
|
|
@@ -413,7 +417,7 @@ def train_cost_sensitive(
|
|
|
413
417
|
Returns:
|
|
414
418
|
A model which can be used in predict_values.
|
|
415
419
|
"""
|
|
416
|
-
#
|
|
420
|
+
# Follow the MATLAB implementation at https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/multilabel/
|
|
417
421
|
x, options, bias = _prepare_options(x, options)
|
|
418
422
|
|
|
419
423
|
y = y.tocsc()
|
|
@@ -446,7 +450,7 @@ def _cost_sensitive_one_label(y: np.ndarray, x: sparse.csr_matrix, options: str)
|
|
|
446
450
|
options (str): The option string passed to liblinear.
|
|
447
451
|
|
|
448
452
|
Returns:
|
|
449
|
-
np.ndarray:
|
|
453
|
+
np.ndarray: The weights.
|
|
450
454
|
"""
|
|
451
455
|
|
|
452
456
|
l = y.shape[0]
|
|
@@ -500,10 +504,10 @@ def train_cost_sensitive_micro(
|
|
|
500
504
|
options: str = "",
|
|
501
505
|
verbose: bool = True,
|
|
502
506
|
) -> FlatModel:
|
|
503
|
-
"""
|
|
507
|
+
"""Train a linear model for multi-label data using a one-vs-rest strategy
|
|
504
508
|
and cross-validation to pick an optimal asymmetric misclassification cost
|
|
505
509
|
for Micro-F1.
|
|
506
|
-
|
|
510
|
+
Outperform train_1vsrest in most aspects at the cost of higher
|
|
507
511
|
time complexity.
|
|
508
512
|
See user guide for more details.
|
|
509
513
|
|
|
@@ -571,7 +575,7 @@ def train_binary_and_multiclass(
|
|
|
571
575
|
options: str = "",
|
|
572
576
|
verbose: bool = True,
|
|
573
577
|
) -> FlatModel:
|
|
574
|
-
"""
|
|
578
|
+
"""Train a linear model for binary and multi-class data.
|
|
575
579
|
|
|
576
580
|
Args:
|
|
577
581
|
y (sparse.csr_matrix): A 0/1 matrix with dimensions number of instances * number of classes.
|
|
@@ -592,8 +596,12 @@ def train_binary_and_multiclass(
|
|
|
592
596
|
Invalid dataset. Only multi-class dataset is allowed."""
|
|
593
597
|
y = np.squeeze(nonzero_label_ids)
|
|
594
598
|
|
|
599
|
+
prob = problem(y, x)
|
|
600
|
+
param = parameter(options)
|
|
601
|
+
if param.solver_type in [solver_names.L2R_L1LOSS_SVC_DUAL, solver_names.L2R_L2LOSS_SVC_DUAL]:
|
|
602
|
+
param.w_recalc = True
|
|
595
603
|
with silent_stderr():
|
|
596
|
-
model = train(
|
|
604
|
+
model = train(prob, param)
|
|
597
605
|
|
|
598
606
|
# Labels appeared in training set; length may be smaller than num_labels
|
|
599
607
|
train_labels = np.array(model.get_labels(), dtype="int")
|
|
@@ -622,7 +630,7 @@ def train_binary_and_multiclass(
|
|
|
622
630
|
|
|
623
631
|
|
|
624
632
|
def predict_values(model, x: sparse.csr_matrix) -> np.ndarray:
|
|
625
|
-
"""
|
|
633
|
+
"""Calculate the decision values associated with x, equivalent to model.predict_values(x).
|
|
626
634
|
|
|
627
635
|
Args:
|
|
628
636
|
model: A model returned from a training function.
|
|
@@ -8,7 +8,7 @@ __all__ = ["get_metrics", "compute_metrics", "tabulate_metrics", "MetricCollecti
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _argsort_top_k(preds: np.ndarray, top_k: int) -> np.ndarray:
|
|
11
|
-
"""
|
|
11
|
+
"""Sort the top k indices in O(n + k log k) time.
|
|
12
12
|
The sorting order is ascending to be consistent with np.sort.
|
|
13
13
|
This means the last element is the largest, the first element is the kth largest.
|
|
14
14
|
"""
|
|
@@ -18,7 +18,7 @@ def _argsort_top_k(preds: np.ndarray, top_k: int) -> np.ndarray:
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def _dcg_argsort(argsort_preds: np.ndarray, target: np.ndarray, top_k: int) -> np.ndarray:
|
|
21
|
-
"""
|
|
21
|
+
"""Compute DCG@k with a sorted preds array and a target array."""
|
|
22
22
|
top_k_idx = argsort_preds[:, -top_k:][:, ::-1]
|
|
23
23
|
gains = np.take_along_axis(target, top_k_idx, axis=-1)
|
|
24
24
|
discount = 1 / (np.log2(np.arange(top_k) + 2))
|
|
@@ -28,7 +28,7 @@ def _dcg_argsort(argsort_preds: np.ndarray, target: np.ndarray, top_k: int) -> n
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def _idcg(target: np.ndarray, top_k: int) -> np.ndarray:
|
|
31
|
-
"""
|
|
31
|
+
"""Compute IDCG@k for a 0/1 target array. A 0/1 target is a special case that
|
|
32
32
|
doesn't require sorting. If IDCG is computed with DCG,
|
|
33
33
|
then target will need to be sorted, which incurs a large overhead.
|
|
34
34
|
"""
|
|
@@ -247,7 +247,7 @@ class MetricCollection(dict):
|
|
|
247
247
|
self.max_k = max(getattr(metric, "top_k", 0) for metric in self.metrics.values())
|
|
248
248
|
|
|
249
249
|
def update(self, preds: np.ndarray, target: np.ndarray):
|
|
250
|
-
"""
|
|
250
|
+
"""Add a batch of decision values and labels.
|
|
251
251
|
|
|
252
252
|
Args:
|
|
253
253
|
preds (np.ndarray): A matrix of decision values with dimensions number of instances * number of classes.
|
|
@@ -268,7 +268,7 @@ class MetricCollection(dict):
|
|
|
268
268
|
metric.update(preds, target)
|
|
269
269
|
|
|
270
270
|
def compute(self) -> dict[str, float]:
|
|
271
|
-
"""
|
|
271
|
+
"""Compute the metrics from the accumulated batches of decision values and labels.
|
|
272
272
|
|
|
273
273
|
Returns:
|
|
274
274
|
dict[str, float]: A dictionary of metric values.
|
|
@@ -279,7 +279,7 @@ class MetricCollection(dict):
|
|
|
279
279
|
return ret
|
|
280
280
|
|
|
281
281
|
def reset(self):
|
|
282
|
-
"""
|
|
282
|
+
"""Clear the accumulated batches of decision values and labels."""
|
|
283
283
|
for metric in self.metrics.values():
|
|
284
284
|
metric.reset()
|
|
285
285
|
|
|
@@ -59,7 +59,7 @@ class TreeModel:
|
|
|
59
59
|
x: sparse.csr_matrix,
|
|
60
60
|
beam_width: int = 10,
|
|
61
61
|
) -> np.ndarray:
|
|
62
|
-
"""
|
|
62
|
+
"""Calculate the probability estimates associated with x.
|
|
63
63
|
|
|
64
64
|
Args:
|
|
65
65
|
x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
|
|
@@ -118,7 +118,7 @@ def train_tree(
|
|
|
118
118
|
dmax=10,
|
|
119
119
|
verbose: bool = True,
|
|
120
120
|
) -> TreeModel:
|
|
121
|
-
"""
|
|
121
|
+
"""Train a linear model for multi-label data using a divide-and-conquer strategy.
|
|
122
122
|
The algorithm used is based on https://github.com/xmc-aalto/bonsai.
|
|
123
123
|
|
|
124
124
|
Args:
|
|
@@ -178,7 +178,7 @@ def train_tree(
|
|
|
178
178
|
|
|
179
179
|
|
|
180
180
|
def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
|
|
181
|
-
"""
|
|
181
|
+
"""Build the tree recursively by kmeans clustering.
|
|
182
182
|
|
|
183
183
|
Args:
|
|
184
184
|
label_representation (sparse.csr_matrix): A matrix with dimensions number of classes under this node * number of features.
|
|
@@ -235,7 +235,7 @@ def get_estimated_model_size(root):
|
|
|
235
235
|
|
|
236
236
|
|
|
237
237
|
def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node: Node):
|
|
238
|
-
"""If node is internal,
|
|
238
|
+
"""If node is internal, compute the metalabels representing each child and train
|
|
239
239
|
on the metalabels. Otherwise, train on y.
|
|
240
240
|
|
|
241
241
|
Args:
|
|
@@ -258,7 +258,7 @@ def _train_node(y: sparse.csr_matrix, x: sparse.csr_matrix, options: str, node:
|
|
|
258
258
|
|
|
259
259
|
|
|
260
260
|
def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
|
|
261
|
-
"""
|
|
261
|
+
"""Flatten tree weight matrices into a single weight matrix. The flattened weight
|
|
262
262
|
matrix is used to predict all possible values, which is cached for beam search.
|
|
263
263
|
This pessimizes complexity but is faster in practice.
|
|
264
264
|
Consecutive values of the returned map denotes the start and end indices of the
|
|
@@ -289,7 +289,7 @@ def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
|
|
|
289
289
|
|
|
290
290
|
model = linear.FlatModel(
|
|
291
291
|
name="flattened-tree",
|
|
292
|
-
weights=sparse.hstack(weights, "
|
|
292
|
+
weights=sparse.hstack(weights, "csc"),
|
|
293
293
|
bias=bias,
|
|
294
294
|
thresholds=0,
|
|
295
295
|
multiclass=False,
|
|
@@ -31,7 +31,7 @@ LINEAR_TECHNIQUES = {
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def save_pipeline(checkpoint_dir: str, preprocessor: Preprocessor, model):
|
|
34
|
-
"""
|
|
34
|
+
"""Save preprocessor and model to checkpoint_dir/linear_pipline.pickle.
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
37
|
checkpoint_dir (str): The directory to save to.
|
|
@@ -53,7 +53,7 @@ def save_pipeline(checkpoint_dir: str, preprocessor: Preprocessor, model):
|
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
def load_pipeline(checkpoint_path: str) -> tuple[Preprocessor, Any]:
|
|
56
|
-
"""
|
|
56
|
+
"""Load preprocessor and model from checkpoint_path.
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
59
|
checkpoint_path (str): The path to a previously saved pipeline.
|
|
@@ -47,7 +47,7 @@ class PLTTrainer:
|
|
|
47
47
|
raise ValueError(
|
|
48
48
|
"The label space of multi-class datasets is usually not large, so PLT training is unnecessary."
|
|
49
49
|
"Please consider other methods."
|
|
50
|
-
"If you have a multi-class set with numerous labels, please let us know"
|
|
50
|
+
"If you have a multi-class set with numerous labels, please let us know."
|
|
51
51
|
)
|
|
52
52
|
|
|
53
53
|
# cluster
|
|
@@ -137,11 +137,11 @@ class PLTTrainer:
|
|
|
137
137
|
Given the ground-truth labels, [0, 1, 4], the resulting clusters are [0, 2].
|
|
138
138
|
|
|
139
139
|
Args:
|
|
140
|
-
cluster_mapping (np.ndarray):
|
|
141
|
-
*labels (csr_matrix):
|
|
140
|
+
cluster_mapping (np.ndarray): Mapping from clusters generated by build_label_tree to labels.
|
|
141
|
+
*labels (csr_matrix): Labels in CSR sparse format.
|
|
142
142
|
|
|
143
143
|
Returns:
|
|
144
|
-
Generator[csr_matrix]:
|
|
144
|
+
Generator[csr_matrix]: Resulting clusters converted from labels in CSR sparse format.
|
|
145
145
|
"""
|
|
146
146
|
mapping = np.empty(self.num_classes, dtype=np.uint32)
|
|
147
147
|
for idx, clusters in enumerate(cluster_mapping):
|
|
@@ -169,12 +169,12 @@ class PLTTrainer:
|
|
|
169
169
|
Also notice that this function deals with DENSE matrix.
|
|
170
170
|
|
|
171
171
|
Args:
|
|
172
|
-
cluster_mapping (np.ndarray):
|
|
173
|
-
clusters (np.ndarray):
|
|
174
|
-
cluster_scores (Optional: np.ndarray):
|
|
172
|
+
cluster_mapping (np.ndarray): Mapping from clusters generated by build_label_tree to labels.
|
|
173
|
+
clusters (np.ndarray): Predicted clusters from model 0.
|
|
174
|
+
cluster_scores (Optional: np.ndarray): Predicted scores of each cluster from model 0.
|
|
175
175
|
|
|
176
176
|
Returns:
|
|
177
|
-
Generator[np.ndarray]:
|
|
177
|
+
Generator[np.ndarray]: Resulting labels expanded from clusters.
|
|
178
178
|
"""
|
|
179
179
|
|
|
180
180
|
labels_selected = []
|
|
@@ -192,10 +192,10 @@ class PLTTrainer:
|
|
|
192
192
|
return labels_selected
|
|
193
193
|
|
|
194
194
|
def fit(self, datasets):
|
|
195
|
-
"""
|
|
195
|
+
"""Fit model to the training dataset.
|
|
196
196
|
|
|
197
197
|
Args:
|
|
198
|
-
datasets:
|
|
198
|
+
datasets: Dict containing training, validation, and/or test datasets.
|
|
199
199
|
"""
|
|
200
200
|
if self.get_best_model_path(level=1).exists():
|
|
201
201
|
return
|
|
@@ -596,8 +596,8 @@ class PlainDataset(Dataset):
|
|
|
596
596
|
this while generating clusters. There is no need to do multilabel binarization again.
|
|
597
597
|
|
|
598
598
|
Args:
|
|
599
|
-
x (list | ndarray | Tensor):
|
|
600
|
-
y (Optional: csr_matrix | ndarray | Tensor):
|
|
599
|
+
x (list | ndarray | Tensor): Texts.
|
|
600
|
+
y (Optional: csr_matrix | ndarray | Tensor): Labels.
|
|
601
601
|
"""
|
|
602
602
|
|
|
603
603
|
def __init__(self, x, y=None):
|
|
@@ -633,12 +633,12 @@ class PLTDataset(PlainDataset):
|
|
|
633
633
|
"""Dataset for model_1 of AttentionXML.
|
|
634
634
|
|
|
635
635
|
Args:
|
|
636
|
-
x:
|
|
637
|
-
y:
|
|
636
|
+
x: Texts.
|
|
637
|
+
y: Labels.
|
|
638
638
|
num_classes: number of classes.
|
|
639
|
-
num_labels_selected:
|
|
640
|
-
labels_selected:
|
|
641
|
-
label_scores:
|
|
639
|
+
num_labels_selected: The number of selected labels.
|
|
640
|
+
labels_selected: Sampled predicted labels from model_0. Shape: (len(x), predict_top_k).
|
|
641
|
+
label_scores: Scores for each label. Shape: (len(x), predict_top_k).
|
|
642
642
|
"""
|
|
643
643
|
|
|
644
644
|
def __init__(
|
|
@@ -709,10 +709,10 @@ def build_label_tree(sparse_x: csr_matrix, sparse_y: csr_matrix, cluster_size: i
|
|
|
709
709
|
((0, 2), (1, 3), (4, 5)).
|
|
710
710
|
|
|
711
711
|
Args:
|
|
712
|
-
sparse_x:
|
|
713
|
-
sparse_y:
|
|
714
|
-
cluster_size:
|
|
715
|
-
output_dir:
|
|
712
|
+
sparse_x: Features extracted from texts in CSR sparse format.
|
|
713
|
+
sparse_y: Binarized labels in CSR sparse format.
|
|
714
|
+
cluster_size: The maximum number of labels within each cluster.
|
|
715
|
+
output_dir: Directory to store the clustering file.
|
|
716
716
|
"""
|
|
717
717
|
# skip constructing label tree if the output file already exists
|
|
718
718
|
output_dir = output_dir if isinstance(output_dir, Path) else Path(output_dir)
|
|
@@ -755,8 +755,8 @@ def _split_cluster(cluster: ndarray, label_repr: csr_matrix) -> tuple[ndarray, n
|
|
|
755
755
|
2. the end-of-loop criterion is the difference between the new and old average in-cluster distances to centroids.
|
|
756
756
|
|
|
757
757
|
Args:
|
|
758
|
-
cluster:
|
|
759
|
-
label_repr:
|
|
758
|
+
cluster: A subset of labels.
|
|
759
|
+
label_repr: The normalized representations of the relationship between labels and texts of the given cluster.
|
|
760
760
|
"""
|
|
761
761
|
# Randomly choose two points as initial centroids and obtain their label representations
|
|
762
762
|
centroids = label_repr[np.random.choice(len(cluster), size=2, replace=False)].toarray()
|
|
@@ -355,7 +355,7 @@ def get_embedding_weights_from_file(word_dict, embed_file, silent=False, cache=N
|
|
|
355
355
|
cache (str, optional): Path to a directory for storing cached embeddings. Defaults to None.
|
|
356
356
|
|
|
357
357
|
Returns:
|
|
358
|
-
torch.Tensor: Embedding weights (vocab_size, embed_size)
|
|
358
|
+
torch.Tensor: Embedding weights (vocab_size, embed_size).
|
|
359
359
|
"""
|
|
360
360
|
# Load pretrained word embedding
|
|
361
361
|
load_embedding_from_file = embed_file not in pretrained_aliases
|
|
@@ -13,7 +13,7 @@ class _PrecisonRecallWrapperMetric(Metric):
|
|
|
13
13
|
"""Encapsulate common functions of RPrecision, PrecisionAtK, and RecallAtK.
|
|
14
14
|
|
|
15
15
|
Args:
|
|
16
|
-
top_k (int):
|
|
16
|
+
top_k (int): The top k relevant labels to evaluate.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
19
|
# If the metric state of one batch is independent of the state of other batches,
|
|
@@ -133,7 +133,7 @@ class NDCGAtK(Metric):
|
|
|
133
133
|
As a result, we implement our own batch-wise NDCG.
|
|
134
134
|
|
|
135
135
|
Args:
|
|
136
|
-
top_k (int):
|
|
136
|
+
top_k (int): The top k relevant labels to evaluate.
|
|
137
137
|
"""
|
|
138
138
|
|
|
139
139
|
# If the metric state of one batch is independent of the state of other batches,
|
|
@@ -170,7 +170,7 @@ class NDCGAtK(Metric):
|
|
|
170
170
|
return (gains * discount).sum(dim=1)
|
|
171
171
|
|
|
172
172
|
def _idcg(self, target, discount):
|
|
173
|
-
"""
|
|
173
|
+
"""Compute IDCG@k for a 0/1 target tensor.
|
|
174
174
|
A 0/1 target is a special case that doesn't require sorting.
|
|
175
175
|
"""
|
|
176
176
|
cum_discount = discount.cumsum(dim=0)
|
|
@@ -11,7 +11,7 @@ from ..nn.metrics import get_metrics, tabulate_metrics
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class MultiLabelModel(L.LightningModule):
|
|
14
|
-
"""Abstract class handling Pytorch Lightning training flow
|
|
14
|
+
"""Abstract class handling Pytorch Lightning training flow.
|
|
15
15
|
|
|
16
16
|
Args:
|
|
17
17
|
num_classes (int): Total number of classes.
|
|
@@ -70,7 +70,7 @@ class MultiLabelModel(L.LightningModule):
|
|
|
70
70
|
|
|
71
71
|
@abstractmethod
|
|
72
72
|
def shared_step(self, batch):
|
|
73
|
-
"""Return loss and predicted logits"""
|
|
73
|
+
"""Return loss and predicted logits."""
|
|
74
74
|
return NotImplemented
|
|
75
75
|
|
|
76
76
|
def configure_optimizers(self):
|
|
@@ -164,11 +164,11 @@ class MultiLabelModel(L.LightningModule):
|
|
|
164
164
|
return {"top_k_pred": top_k_idx, "top_k_pred_scores": top_k_scores}
|
|
165
165
|
|
|
166
166
|
def forward(self, batch):
|
|
167
|
-
"""
|
|
167
|
+
"""Compute predicted logits."""
|
|
168
168
|
return self.network(batch)["logits"]
|
|
169
169
|
|
|
170
170
|
def print(self, *args, **kwargs):
|
|
171
|
-
"""
|
|
171
|
+
"""Print only from process 0 and not in silent mode. Use this in any
|
|
172
172
|
distributed mode to log only once."""
|
|
173
173
|
|
|
174
174
|
if not self.silent:
|
|
@@ -6,7 +6,7 @@ from .modules import LabelwiseAttention, LabelwiseLinearOutput, LabelwiseMultiHe
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class BERTAttention(nn.Module):
|
|
9
|
-
"""BERT + Label-wise Document Attention or Multi-Head Attention
|
|
9
|
+
"""BERT + Label-wise Document Attention or Multi-Head Attention.
|
|
10
10
|
|
|
11
11
|
Args:
|
|
12
12
|
num_classes (int): Total number of classes.
|
|
@@ -7,8 +7,8 @@ from torch.nn.init import xavier_uniform_
|
|
|
7
7
|
|
|
8
8
|
class CAML(nn.Module):
|
|
9
9
|
"""CAML (Convolutional Attention for Multi-Label classification)
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
Following Mullenbach et al. [https://aclanthology.org/N18-1100.pdf],
|
|
11
|
+
this class is for reproducing the results in the paper.
|
|
12
12
|
Use CNNLWAN instead for better modularization.
|
|
13
13
|
|
|
14
14
|
Args:
|
|
@@ -16,7 +16,7 @@ from .modules import (
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class LabelwiseAttentionNetwork(ABC, nn.Module):
|
|
19
|
-
"""Base class for Labelwise Attention Network
|
|
19
|
+
"""Base class for Labelwise Attention Network.
|
|
20
20
|
|
|
21
21
|
Args:
|
|
22
22
|
embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
|
|
@@ -48,7 +48,7 @@ class LabelwiseAttentionNetwork(ABC, nn.Module):
|
|
|
48
48
|
|
|
49
49
|
|
|
50
50
|
class RNNLWAN(LabelwiseAttentionNetwork):
|
|
51
|
-
"""Base class for RNN Labelwise Attention Network"""
|
|
51
|
+
"""Base class for RNN Labelwise Attention Network."""
|
|
52
52
|
|
|
53
53
|
def forward(self, input):
|
|
54
54
|
# (batch_size, sequence_length, embed_dim)
|
|
@@ -61,7 +61,7 @@ class RNNLWAN(LabelwiseAttentionNetwork):
|
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class BiGRULWAN(RNNLWAN):
|
|
64
|
-
"""BiGRU Labelwise Attention Network
|
|
64
|
+
"""BiGRU Labelwise Attention Network.
|
|
65
65
|
|
|
66
66
|
Args:
|
|
67
67
|
embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
|
|
@@ -111,7 +111,7 @@ class BiGRULWAN(RNNLWAN):
|
|
|
111
111
|
|
|
112
112
|
|
|
113
113
|
class BiLSTMLWAN(RNNLWAN):
|
|
114
|
-
"""BiLSTM Labelwise Attention Network
|
|
114
|
+
"""BiLSTM Labelwise Attention Network.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
117
|
embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
|
|
@@ -155,7 +155,7 @@ class BiLSTMLWAN(RNNLWAN):
|
|
|
155
155
|
|
|
156
156
|
|
|
157
157
|
class BiLSTMLWMHAN(LabelwiseAttentionNetwork):
|
|
158
|
-
"""BiLSTM Labelwise Multihead Attention Network
|
|
158
|
+
"""BiLSTM Labelwise Multihead Attention Network.
|
|
159
159
|
|
|
160
160
|
Args:
|
|
161
161
|
embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
|
|
@@ -217,7 +217,7 @@ class BiLSTMLWMHAN(LabelwiseAttentionNetwork):
|
|
|
217
217
|
|
|
218
218
|
|
|
219
219
|
class CNNLWAN(LabelwiseAttentionNetwork):
|
|
220
|
-
"""CNN Labelwise Attention Network
|
|
220
|
+
"""CNN Labelwise Attention Network.
|
|
221
221
|
|
|
222
222
|
Args:
|
|
223
223
|
embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
|
|
@@ -7,7 +7,7 @@ from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class Embedding(nn.Module):
|
|
10
|
-
"""Embedding layer with dropout
|
|
10
|
+
"""Embedding layer with dropout.
|
|
11
11
|
|
|
12
12
|
Args:
|
|
13
13
|
embed_vecs (torch.Tensor): The pre-trained word vectors of shape (vocab_size, embed_dim).
|
|
@@ -26,7 +26,7 @@ class Embedding(nn.Module):
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class RNNEncoder(ABC, nn.Module):
|
|
29
|
-
"""Base class of RNN encoder with dropout
|
|
29
|
+
"""Base class of RNN encoder with dropout.
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
32
|
input_size (int): The number of expected features in the input.
|
|
@@ -55,7 +55,7 @@ class RNNEncoder(ABC, nn.Module):
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class GRUEncoder(RNNEncoder):
|
|
58
|
-
"""Bi-directional GRU encoder with dropout
|
|
58
|
+
"""Bi-directional GRU encoder with dropout.
|
|
59
59
|
|
|
60
60
|
Args:
|
|
61
61
|
input_size (int): The number of expected features in the input.
|
|
@@ -73,7 +73,7 @@ class GRUEncoder(RNNEncoder):
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
class LSTMEncoder(RNNEncoder):
|
|
76
|
-
"""Bi-directional LSTM encoder with dropout
|
|
76
|
+
"""Bi-directional LSTM encoder with dropout.
|
|
77
77
|
|
|
78
78
|
Args:
|
|
79
79
|
input_size (int): The number of expected features in the input.
|
|
@@ -91,7 +91,7 @@ class LSTMEncoder(RNNEncoder):
|
|
|
91
91
|
|
|
92
92
|
|
|
93
93
|
class CNNEncoder(nn.Module):
|
|
94
|
-
"""Multi-filter-size CNN encoder for text classification with max-pooling
|
|
94
|
+
"""Multi-filter-size CNN encoder for text classification with max-pooling.
|
|
95
95
|
|
|
96
96
|
Args:
|
|
97
97
|
input_size (int): The number of expected features in the input.
|
|
@@ -103,7 +103,7 @@ class CNNEncoder(nn.Module):
|
|
|
103
103
|
If num_pool = 0, do nothing.
|
|
104
104
|
If num_pool = 1, do typical max-pooling.
|
|
105
105
|
If num_pool > 1, do adaptive max-pooling.
|
|
106
|
-
channel_last (bool): Whether to transpose the dimension from (batch_size, num_channel, length) to (batch_size, length, num_channel)
|
|
106
|
+
channel_last (bool): Whether to transpose the dimension from (batch_size, num_channel, length) to (batch_size, length, num_channel).
|
|
107
107
|
"""
|
|
108
108
|
|
|
109
109
|
def __init__(
|
|
@@ -149,8 +149,8 @@ class CNNEncoder(nn.Module):
|
|
|
149
149
|
|
|
150
150
|
|
|
151
151
|
class LabelwiseAttention(nn.Module):
|
|
152
|
-
"""
|
|
153
|
-
See `Explainable Prediction of Medical Codes from Clinical Text <https://aclanthology.org/N18-1100.pdf>`_
|
|
152
|
+
"""Apply attention technique to summarize the sequence for each label.
|
|
153
|
+
See `Explainable Prediction of Medical Codes from Clinical Text <https://aclanthology.org/N18-1100.pdf>`_.
|
|
154
154
|
|
|
155
155
|
Args:
|
|
156
156
|
input_size (int): The number of expected features in the input.
|
|
@@ -171,7 +171,7 @@ class LabelwiseAttention(nn.Module):
|
|
|
171
171
|
|
|
172
172
|
|
|
173
173
|
class LabelwiseMultiHeadAttention(nn.Module):
|
|
174
|
-
"""Labelwise multi-head attention
|
|
174
|
+
"""Labelwise multi-head attention.
|
|
175
175
|
|
|
176
176
|
Args:
|
|
177
177
|
input_size (int): The number of expected features in the input.
|
|
@@ -197,7 +197,7 @@ class LabelwiseMultiHeadAttention(nn.Module):
|
|
|
197
197
|
|
|
198
198
|
|
|
199
199
|
class LabelwiseLinearOutput(nn.Module):
|
|
200
|
-
"""
|
|
200
|
+
"""Apply a linear transformation to the incoming data for each label.
|
|
201
201
|
|
|
202
202
|
Args:
|
|
203
203
|
input_size (int): The number of expected features in the input.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: libmultilabel
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.3
|
|
4
4
|
Summary: A library for multi-class and multi-label classification
|
|
5
5
|
Home-page: https://github.com/ASUS-AICS/LibMultiLabel
|
|
6
6
|
Author: LibMultiLabel Team
|
|
@@ -19,7 +19,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.8
|
|
20
20
|
Requires-Python: >=3.8
|
|
21
21
|
License-File: LICENSE
|
|
22
|
-
Requires-Dist: liblinear-multicore
|
|
22
|
+
Requires-Dist: liblinear-multicore>=2.49.0
|
|
23
23
|
Requires-Dist: numba
|
|
24
24
|
Requires-Dist: pandas>1.3.0
|
|
25
25
|
Requires-Dist: PyYAML
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = libmultilabel
|
|
3
|
-
version = 0.7.
|
|
3
|
+
version = 0.7.3
|
|
4
4
|
author = LibMultiLabel Team
|
|
5
5
|
license = MIT License
|
|
6
6
|
license_file = LICENSE
|
|
@@ -25,7 +25,7 @@ classifiers =
|
|
|
25
25
|
[options]
|
|
26
26
|
packages = find:
|
|
27
27
|
install_requires =
|
|
28
|
-
liblinear-multicore
|
|
28
|
+
liblinear-multicore>=2.49.0
|
|
29
29
|
numba
|
|
30
30
|
pandas>1.3.0
|
|
31
31
|
PyYAML
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|