libmultilabel 0.7.3__tar.gz → 0.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/PKG-INFO +2 -1
  2. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/tree.py +101 -18
  3. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/PKG-INFO +2 -1
  4. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/requires.txt +1 -0
  5. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/setup.cfg +2 -1
  6. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/LICENSE +0 -0
  7. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/README.md +0 -0
  8. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/__init__.py +0 -0
  9. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/common_utils.py +0 -0
  10. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/__init__.py +0 -0
  11. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/data_utils.py +0 -0
  12. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/linear.py +0 -0
  13. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/metrics.py +0 -0
  14. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/preprocessor.py +0 -0
  15. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/linear/utils.py +0 -0
  16. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/logging.py +0 -0
  17. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/__init__.py +0 -0
  18. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/attentionxml.py +0 -0
  19. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/data_utils.py +0 -0
  20. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/metrics.py +0 -0
  21. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/model.py +0 -0
  22. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/__init__.py +0 -0
  23. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/bert.py +0 -0
  24. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/bert_attention.py +0 -0
  25. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/caml.py +0 -0
  26. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/kim_cnn.py +0 -0
  27. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/labelwise_attention_networks.py +0 -0
  28. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/modules.py +0 -0
  29. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/networks/xml_cnn.py +0 -0
  30. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel/nn/nn_utils.py +0 -0
  31. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/SOURCES.txt +0 -0
  32. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/dependency_links.txt +0 -0
  33. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/libmultilabel.egg-info/top_level.txt +0 -0
  34. {libmultilabel-0.7.3 → libmultilabel-0.7.4}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: libmultilabel
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: A library for multi-class and multi-label classification
5
5
  Home-page: https://github.com/ASUS-AICS/LibMultiLabel
6
6
  Author: LibMultiLabel Team
@@ -26,6 +26,7 @@ Requires-Dist: PyYAML
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: scipy<1.14.0
28
28
  Requires-Dist: tqdm
29
+ Requires-Dist: psutil
29
30
  Provides-Extra: nn
30
31
  Requires-Dist: lightning==2.0.9; extra == "nn"
31
32
  Requires-Dist: nltk; extra == "nn"
@@ -46,13 +46,14 @@ class TreeModel:
46
46
  self,
47
47
  root: Node,
48
48
  flat_model: linear.FlatModel,
49
- weight_map: np.ndarray,
49
+ node_ptr: np.ndarray,
50
50
  ):
51
51
  self.name = "tree"
52
52
  self.root = root
53
53
  self.flat_model = flat_model
54
- self.weight_map = weight_map
54
+ self.node_ptr = node_ptr
55
55
  self.multiclass = False
56
+ self._model_separated = False # Indicates whether the model has been separated for pruning tree.
56
57
 
57
58
  def predict_values(
58
59
  self,
@@ -68,10 +69,93 @@ class TreeModel:
68
69
  Returns:
69
70
  np.ndarray: A matrix with dimension number of instances * number of classes.
70
71
  """
71
- # number of instances * number of labels + total number of metalabels
72
- all_preds = linear.predict_values(self.flat_model, x)
72
+ if beam_width >= len(self.root.children):
73
+ # Beam_width is sufficiently large; pruning not applied.
74
+ # Calculates decision values for all nodes.
75
+ all_preds = linear.predict_values(self.flat_model, x) # number of instances * (number of labels + total number of metalabels)
76
+ else:
77
+ # Beam_width is small; pruning applied to reduce computation.
78
+ if not self._model_separated:
79
+ self._separate_model_for_pruning_tree()
80
+ self._model_separated = True
81
+ all_preds = self._prune_tree_and_predict_values(x, beam_width) # number of instances * (number of labels + total number of metalabels)
73
82
  return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])])
74
83
 
84
+ def _separate_model_for_pruning_tree(self):
85
+ """
86
+ This function separates the weights for the root node and its children into (K+1) FlatModel
87
+ for efficient beam search traversal in Python.
88
+ """
89
+ tree_flat_model_params = {
90
+ 'bias': self.root.model.bias,
91
+ 'thresholds': 0,
92
+ 'multiclass': False
93
+ }
94
+ slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
95
+ self.root_model = linear.FlatModel(
96
+ name="root-flattened-tree",
97
+ weights=self.flat_model.weights[slice].tocsr(),
98
+ **tree_flat_model_params
99
+ )
100
+
101
+ self.subtree_models = []
102
+ for i in range(len(self.root.children)):
103
+ subtree_weights_start = self.node_ptr[self.root.children[i].index]
104
+ subtree_weights_end = self.node_ptr[self.root.children[i+1].index] if i+1 < len(self.root.children) else -1
105
+ slice = np.s_[:, subtree_weights_start:subtree_weights_end]
106
+ subtree_flatmodel = linear.FlatModel(
107
+ name="subtree-flattened-tree",
108
+ weights=self.flat_model.weights[slice].tocsr(),
109
+ **tree_flat_model_params
110
+ )
111
+ self.subtree_models.append(subtree_flatmodel)
112
+
113
+ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int) -> np.ndarray:
114
+ """Calculates the selective decision values associated with instances x by evaluating only the most relevant subtrees.
115
+
116
+ Only subtrees corresponding to the top beam_width candidates from the root are evaluated,
117
+ skipping the rest to avoid unnecessary computation.
118
+
119
+ Args:
120
+ x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
121
+ beam_width (int): Number of top candidate branches considered for prediction.
122
+
123
+ Returns:
124
+ np.ndarray: A matrix with dimension number of instances * (number of labels + total number of metalabels).
125
+ """
126
+ # Initialize space for all predictions with negative infinity
127
+ num_instances, num_labels = x.shape[0], self.node_ptr[-1]
128
+ all_preds = np.full((num_instances, num_labels), -np.inf)
129
+
130
+ # Calculate root decision values and scores
131
+ root_preds = linear.predict_values(self.root_model, x)
132
+ children_scores = 0.0 - np.square(np.maximum(0, 1 - root_preds))
133
+
134
+ slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
135
+ all_preds[slice] = root_preds
136
+
137
+ # Select indices of the top beam_width subtrees for each instance
138
+ top_beam_width_indices = np.argsort(-children_scores, axis=1, kind="stable")[:, :beam_width]
139
+
140
+ # Build a mask where mask[i, j] is True if the j-th subtree is among the top beam_width subtrees for the i-th instance
141
+ mask = np.zeros_like(children_scores, dtype=np.bool_)
142
+ np.put_along_axis(mask, top_beam_width_indices, True, axis=1)
143
+
144
+ # Calculate predictions for each subtree with its corresponding instances
145
+ for subtree_idx in range(len(self.root.children)):
146
+ subtree_model = self.subtree_models[subtree_idx]
147
+ instances_mask = mask[:, subtree_idx]
148
+ reduced_instances = x[np.s_[instances_mask], :]
149
+
150
+ # Locate the position of the subtree root in the weight mapping of all nodes
151
+ subtree_weights_start = self.node_ptr[self.root.children[subtree_idx].index]
152
+ subtree_weights_end = subtree_weights_start + subtree_model.weights.shape[1]
153
+
154
+ slice = np.s_[instances_mask, subtree_weights_start:subtree_weights_end]
155
+ all_preds[slice] = linear.predict_values(subtree_model, reduced_instances)
156
+
157
+ return all_preds
158
+
75
159
  def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarray:
76
160
  """Predict with beam search using cached probability estimates for a single instance.
77
161
 
@@ -93,7 +177,7 @@ class TreeModel:
93
177
  if node.isLeaf():
94
178
  next_level.append((node, score))
95
179
  continue
96
- slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
180
+ slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
97
181
  pred = instance_preds[slice]
98
182
  children_score = score - np.square(np.maximum(0, 1 - pred))
99
183
  next_level.extend(zip(node.children, children_score.tolist()))
@@ -102,9 +186,9 @@ class TreeModel:
102
186
  next_level = []
103
187
 
104
188
  num_labels = len(self.root.label_map)
105
- scores = np.full(num_labels, 0.0)
189
+ scores = np.zeros(num_labels)
106
190
  for node, score in cur_level:
107
- slice = np.s_[self.weight_map[node.index] : self.weight_map[node.index + 1]]
191
+ slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
108
192
  pred = instance_preds[slice]
109
193
  scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
110
194
  return scores
@@ -130,7 +214,7 @@ def train_tree(
130
214
  verbose (bool, optional): Output extra progress information. Defaults to True.
131
215
 
132
216
  Returns:
133
- A model which can be used in predict_values.
217
+ TreeModel: A model which can be used in predict_values.
134
218
  """
135
219
  label_representation = (y.T * x).tocsr()
136
220
  label_representation = sklearn.preprocessing.normalize(label_representation, norm="l2", axis=1)
@@ -173,8 +257,8 @@ def train_tree(
173
257
  root.dfs(visit)
174
258
  pbar.close()
175
259
 
176
- flat_model, weight_map = _flatten_model(root)
177
- return TreeModel(root, flat_model, weight_map)
260
+ flat_model, node_ptr = _flatten_model(root)
261
+ return TreeModel(root, flat_model, node_ptr)
178
262
 
179
263
 
180
264
  def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray, d: int, K: int, dmax: int) -> Node:
@@ -188,7 +272,7 @@ def _build_tree(label_representation: sparse.csr_matrix, label_map: np.ndarray,
188
272
  dmax (int): Maximum depth of the tree.
189
273
 
190
274
  Returns:
191
- Node: root of the (sub)tree built from label_representation.
275
+ Node: Root of the (sub)tree built from label_representation.
192
276
  """
193
277
  if d >= dmax or label_representation.shape[0] <= K:
194
278
  return Node(label_map=label_map, children=[])
@@ -261,11 +345,10 @@ def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
261
345
  """Flatten tree weight matrices into a single weight matrix. The flattened weight
262
346
  matrix is used to predict all possible values, which is cached for beam search.
263
347
  This pessimizes complexity but is faster in practice.
264
- Consecutive values of the returned map denotes the start and end indices of the
265
- weights of each node. Conceptually, given root and node:
266
- flat_model, weight_map = _flatten_model(root)
267
- slice = np.s_[weight_map[node.index]:
268
- weight_map[node.index+1]]
348
+ Consecutive values of the returned array denote the start and end indices of each node in the tree.
349
+ To extract a node's classifiers:
350
+ slice = np.s_[node_ptr[node.index]:
351
+ node_ptr[node.index+1]]
269
352
  node.model.weights == flat_model.weights[:, slice]
270
353
 
271
354
  Args:
@@ -296,6 +379,6 @@ def _flatten_model(root: Node) -> tuple[linear.FlatModel, np.ndarray]:
296
379
  )
297
380
 
298
381
  # w.shape[1] is the number of labels/metalabels of each node
299
- weight_map = np.cumsum([0] + list(map(lambda w: w.shape[1], weights)))
382
+ node_ptr = np.cumsum([0] + list(map(lambda w: w.shape[1], weights)))
300
383
 
301
- return model, weight_map
384
+ return model, node_ptr
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: libmultilabel
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: A library for multi-class and multi-label classification
5
5
  Home-page: https://github.com/ASUS-AICS/LibMultiLabel
6
6
  Author: LibMultiLabel Team
@@ -26,6 +26,7 @@ Requires-Dist: PyYAML
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: scipy<1.14.0
28
28
  Requires-Dist: tqdm
29
+ Requires-Dist: psutil
29
30
  Provides-Extra: nn
30
31
  Requires-Dist: lightning==2.0.9; extra == "nn"
31
32
  Requires-Dist: nltk; extra == "nn"
@@ -5,6 +5,7 @@ PyYAML
5
5
  scikit-learn
6
6
  scipy<1.14.0
7
7
  tqdm
8
+ psutil
8
9
 
9
10
  [nn]
10
11
  lightning==2.0.9
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = libmultilabel
3
- version = 0.7.3
3
+ version = 0.7.4
4
4
  author = LibMultiLabel Team
5
5
  license = MIT License
6
6
  license_file = LICENSE
@@ -32,6 +32,7 @@ install_requires =
32
32
  scikit-learn
33
33
  scipy<1.14.0
34
34
  tqdm
35
+ psutil
35
36
  python_requires = >=3.8
36
37
 
37
38
  [options.extras_require]
File without changes
File without changes