libmultilabel 0.7.3__tar.gz → 0.7.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/PKG-INFO +2 -1
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/tree.py +101 -18
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/PKG-INFO +2 -1
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/requires.txt +1 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/setup.cfg +2 -1
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/LICENSE +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/README.md +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/__init__.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/common_utils.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/__init__.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/data_utils.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/linear.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/metrics.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/preprocessor.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/utils.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/logging.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/__init__.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/attentionxml.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/data_utils.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/metrics.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/model.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/__init__.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/bert.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/bert_attention.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/caml.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/kim_cnn.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/labelwise_attention_networks.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/modules.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/xml_cnn.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/nn_utils.py +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/SOURCES.txt +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/dependency_links.txt +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/top_level.txt +0 -0
- {libmultilabel-0.7.3 → libmultilabel-0.7.4}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: libmultilabel
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.4
|
|
4
4
|
Summary: A library for multi-class and multi-label classification
|
|
5
5
|
Home-page: https://github.com/ASUS-AICS/LibMultiLabel
|
|
6
6
|
Author: LibMultiLabel Team
|
|
@@ -26,6 +26,7 @@ Requires-Dist: PyYAML
|
|
|
26
26
|
Requires-Dist: scikit-learn
|
|
27
27
|
Requires-Dist: scipy<1.14.0
|
|
28
28
|
Requires-Dist: tqdm
|
|
29
|
+
Requires-Dist: psutil
|
|
29
30
|
Provides-Extra: nn
|
|
30
31
|
Requires-Dist: lightning==2.0.9; extra == "nn"
|
|
31
32
|
Requires-Dist: nltk; extra == "nn"
|
|
@@ -46,13 +46,14 @@ class TreeModel:
|
|
|
46
46
|
self,
|
|
47
47
|
root: Node,
|
|
48
48
|
flat_model: linear.FlatModel,
|
|
49
|
-
|
|
49
|
+
node_ptr: np.ndarray,
|
|
50
50
|
):
|
|
51
51
|
self.name = "tree"
|
|
52
52
|
self.root = root
|
|
53
53
|
self.flat_model = flat_model
|
|
54
|
-
self.
|
|
54
|
+
self.node_ptr = node_ptr
|
|
55
55
|
self.multiclass = False
|
|
56
|
+
self._model_separated = False # Indicates whether the model has been separated for pruning tree.
|
|
56
57
|
|
|
57
58
|
def predict_values(
|
|
58
59
|
self,
|
|
@@ -68,10 +69,93 @@ class TreeModel:
|
|
|
68
69
|
Returns:
|
|
69
70
|
np.ndarray: A matrix with dimension number of instances * number of classes.
|
|
70
71
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
72
|
+
if beam_width >= len(self.root.children):
|
|
73
|
+
# Beam_width is sufficiently large; pruning not applied.
|
|
74
|
+
# Calculates decision values for all nodes.
|
|
75
|
+
all_preds = linear.predict_values(self.flat_model, x) # number of instances * (number of labels + total number of metalabels)
|
|
76
|
+
else:
|
|
77
|
+
# Beam_width is small; pruning applied to reduce computation.
|
|
78
|
+
if not self._model_separated:
|
|
79
|
+
self._separate_model_for_pruning_tree()
|
|
80
|
+
self._model_separated = True
|
|
81
|
+
all_preds = self._prune_tree_and_predict_values(x, beam_width) # number of instances * (number of labels + total number of metalabels)
|
|
73
82
|
return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])])
|
|
74
83
|
|
|
84
|
+
def _separate_model_for_pruning_tree(self):
|
|
85
|
+
"""
|
|
86
|
+
This function separates the weights for the root node and its children into (K+1) FlatModel
|
|
87
|
+
for efficient beam search traversal in Python.
|
|
88
|
+
"""
|
|
89
|
+
tree_flat_model_params = {
|
|
90
|
+
'bias': self.root.model.bias,
|
|
91
|
+
'thresholds': 0,
|
|
92
|
+
'multiclass': False
|
|
93
|
+
}
|
|
94
|
+
slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
|
|
95
|
+
self.root_model = linear.FlatModel(
|
|
96
|
+
name="root-flattened-tree",
|
|
97
|
+
weights=self.flat_model.weights[slice].tocsr(),
|
|
98
|
+
**tree_flat_model_params
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
self.subtree_models = []
|
|
102
|
+
for i in range(len(self.root.children)):
|
|
103
|
+
subtree_weights_start = self.node_ptr[self.root.children[i].index]
|
|
104
|
+
subtree_weights_end = self.node_ptr[self.root.children[i+1].index] if i+1 < len(self.root.children) else -1
|
|
105
|
+
slice = np.s_[:, subtree_weights_start:subtree_weights_end]
|
|
106
|
+
subtree_flatmodel = linear.FlatModel(
|
|
107
|
+
name="subtree-flattened-tree",
|
|
108
|
+
weights=self.flat_model.weights[slice].tocsr(),
|
|
109
|
+
**tree_flat_model_params
|
|
110
|
+
)
|
|
111
|
+
self.subtree_models.append(subtree_flatmodel)
|
|
112
|
+
|
|
113
|
+
def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int) -> np.ndarray:
|
|
114
|
+
"""Calculates the selective decision values associated with instances x by evaluating only the most relevant subtrees.
|
|
115
|
+
|
|
116
|
+
Only subtrees corresponding to the top beam_width candidates from the root are evaluated,
|
|
117
|
+
skipping the rest to avoid unnecessary computation.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
|
|
121
|
+
beam_width (int): Number of top candidate branches considered for prediction.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
np.ndarray: A matrix with dimension number of instances * (number of labels + total number of metalabels).
|
|
125
|
+
"""
|
|
126
|
+
# Initialize space for all predictions with negative infinity
|
|
127
|
+
num_instances, num_labels = x.shape[0], self.node_ptr[-1]
|
|
128
|
+
all_preds = np.full((num_instances, num_labels), -np.inf)
|
|
129
|
+
|
|
130
|
+
# Calculate root decision values and scores
|
|
131
|
+
root_preds = linear.predict_values(self.root_model, x)
|
|
132
|
+
children_scores = 0.0 - np.square(np.maximum(0, 1 - root_preds))
|
|
133
|
+
|
|
134
|
+
slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
|
|
135
|
+
all_preds[slice] = root_preds
|
|
136
|
+
|
|
137
|
+
# Select indices of the top beam_width subtrees for each instance
|
|
138
|
+
top_beam_width_indices = np.argsort(-children_scores, axis=1, kind="stable")[:, :beam_width]
|
|
139
|
+
|
|
140
|
+
# Build a mask where mask[i, j] is True if the j-th subtree is among the top beam_width subtrees for the i-th instance
|
|
141
|
+
mask = np.zeros_like(children_scores, dtype=np.bool_)
|
|
142
|
+
np.put_along_axis(mask, top_beam_width_indices, True, axis=1)
|
|
143
|
+
|
|
144
|
+
# Calculate predictions for each subtree with its corresponding instances
|
|
145
|
+
for subtree_idx in range(len(self.root.children)):
|
|
146
|
+
subtree_model = self.subtree_models[subtree_idx]
|
|
147
|
+
instances_mask = mask[:, subtree_idx]
|
|
148
|
+
reduced_instances = x[np.s_[instances_mask], :]
|
|
149
|
+
|
|
150
|
+
# Locate the position of the subtree root in the weight mapping of all nodes
|
|
151
|
+
subtree_weights_start = self.node_ptr[self.root.children[subtree_idx].index]
|
|
152
|
+
subtree_weights_end = subtree_weights_start + subtree_model.weights.shape[1]
|
|
153
|
+
|
|
154
|
+
slice = np.s_[instances_mask, subtree_weights_start:subtree_weights_end]
|
|
155
|
+
all_preds[slice] = linear.predict_values(subtree_model, reduced_instances)
|
|
156
|
+
|
|
157
|
+
return all_preds
|
|
158
|
+
|
|
75
159
|
def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarray:
|
|
76
160
|
"""Predict with beam search using cached probability estimates for a single instance.
|
|
77
161
|
|
|
@@ -93,7 +177,7 @@ class TreeModel:
|
|
|
93
177
|
if node.isLeaf():
|
|
94
178
|
next_level.append((node, score))
|
|
95
179
|
continue
|
|
96
|
-
slice = np.s_[self.
|
|
180
|
+
slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
|
|
97
181
|
pred = instance_preds[slice]
|
|
98
182
|
children_score = score - np.square(np.maximum(0, 1 - pred))
|
|
99
183
|
next_level.extend(zip(node.children, children_score.tolist()))
|
|
@@ -102,9 +186,9 @@ class TreeModel:
|
|
|
102
186
|
next_level = []
|
|
103
187
|
|
|
104
188
|
num_labels = len(self.root.label_map)
|
|
105
|
-
scores = np.
|
|
189
|
+
scores = np.zeros(num_labels)
|
|
106
190
|
for node, score in cur_level:
|
|
107
|
-
slice = np.s_[self.
|
|
191
|
+
slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
|
|
108
192
|
pred = instance_preds[slice]
|
|
109
193
|
scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
|
|
110
194
|
return scores
|
|
@@ -130,7 +214,7 @@ def train_tree(
|
|
|
130
214
|
verbose (bool, optional): Output extra progress information. Defaults to True.
|
|
131
215
|
|
|
132
216
|
Returns:
|
|
133
|
-
A model which can be used in predict_values.
|
|
217
|
+
TreeModel: A model which can be used in predict_values.
|
|
134
218
|
"""
|
|
135
219
|
label_representation = (y.T * x).tocsr()
|
|
136
220
|
label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
|
|
@@ -173,8 +257,8 @@ def train_tree(
|
|
|
173
257
|
root.dfs(visit)
|
|
174
258
|
pbar.close()
|
|
175
259
|
|
|
176
|
-
flat_model,
|
|
177
|
-
return TreeModel(root, flat_model,
|
|
260
|
+
flat_model, node_ptr = _flatten_model(root)
|
|
261
|
+
return TreeModel(root, flat_model, node_ptr)
|
|
178
262
|
|
|
179
263
|
|
|
180
264
|
def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
|
|
@@ -188,7 +272,7 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
|
|
|
188
272
|
dmax (int): Maximum depth of the tree.
|
|
189
273
|
|
|
190
274
|
Returns:
|
|
191
|
-
Node:
|
|
275
|
+
Node: Root of the (sub)tree built from label_representation.
|
|
192
276
|
"""
|
|
193
277
|
if d >= dmax or label_representation.shape[0] <= K:
|
|
194
278
|
return Node(label_map=label_map, children=[])
|
|
@@ -261,11 +345,10 @@ def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
|
|
|
261
345
|
"""Flatten tree weight matrices into a single weight matrix. The flattened weight
|
|
262
346
|
matrix is used to predict all possible values, which is cached for beam search.
|
|
263
347
|
This pessimizes complexity but is faster in practice.
|
|
264
|
-
Consecutive values of the returned
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
weight_map[node.index+1]]
|
|
348
|
+
Consecutive values of the returned array denote the start and end indices of each node in the tree.
|
|
349
|
+
To extract a node's classifiers:
|
|
350
|
+
slice = np.s_[node_ptr[node.index]:
|
|
351
|
+
node_ptr[node.index+1]]
|
|
269
352
|
node.model.weights == flat_model.weights[:, slice]
|
|
270
353
|
|
|
271
354
|
Args:
|
|
@@ -296,6 +379,6 @@ def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
|
|
|
296
379
|
)
|
|
297
380
|
|
|
298
381
|
# w.shape[1] is the number of labels/metalabels of each node
|
|
299
|
-
|
|
382
|
+
node_ptr = np.cumsum([0] + list(map(lambda w: w.shape[1], weights)))
|
|
300
383
|
|
|
301
|
-
return model,
|
|
384
|
+
return model, node_ptr
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: libmultilabel
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.4
|
|
4
4
|
Summary: A library for multi-class and multi-label classification
|
|
5
5
|
Home-page: https://github.com/ASUS-AICS/LibMultiLabel
|
|
6
6
|
Author: LibMultiLabel Team
|
|
@@ -26,6 +26,7 @@ Requires-Dist: PyYAML
|
|
|
26
26
|
Requires-Dist: scikit-learn
|
|
27
27
|
Requires-Dist: scipy<1.14.0
|
|
28
28
|
Requires-Dist: tqdm
|
|
29
|
+
Requires-Dist: psutil
|
|
29
30
|
Provides-Extra: nn
|
|
30
31
|
Requires-Dist: lightning==2.0.9; extra == "nn"
|
|
31
32
|
Requires-Dist: nltk; extra == "nn"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = libmultilabel
|
|
3
|
-
version = 0.7.
|
|
3
|
+
version = 0.7.4
|
|
4
4
|
author = LibMultiLabel Team
|
|
5
5
|
license = MIT License
|
|
6
6
|
license_file = LICENSE
|
|
@@ -32,6 +32,7 @@ install_requires =
|
|
|
32
32
|
scikit-learn
|
|
33
33
|
scipy<1.14.0
|
|
34
34
|
tqdm
|
|
35
|
+
psutil
|
|
35
36
|
python_requires = >=3.8
|
|
36
37
|
|
|
37
38
|
[options.extras_require]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|