path-boost 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ import logging
2
+
3
+ import networkx as nx
4
+ import numpy as np
5
+ from sklearn.model_selection import GridSearchCV, train_test_split
6
+
7
+ from path_boost._path_boost import PathBoost
8
+
9
+ logger = logging.getLogger("path_boost")
10
+
11
+
12
+ def independent_cross_validation_on_each_anchor_node(
13
+ X: list[nx.Graph], y, param_grid: dict = None
14
+ ):
15
+ # Define the parameter grid
16
+ # TODO remove this hard coded param_grid
17
+ if param_grid is None:
18
+ param_grid = {
19
+ "learning_rate": [0.01, 0.02, 0.05],
20
+ "max_path_length": [3, 5, 7],
21
+ "kwargs_for_base_learner": [{"max_depth": 3}, {"max_depth": 4}],
22
+ }
23
+
24
+ # Initialize the PathBoost model
25
+ path_boost = PathBoost(n_iter=10, n_of_cores=10, verbose=False)
26
+
27
+ # Initialize GridSearchCV
28
+ grid_search = GridSearchCV(
29
+ estimator=path_boost,
30
+ param_grid=param_grid,
31
+ cv=3,
32
+ scoring="neg_mean_squared_error",
33
+ )
34
+
35
+ # Fit the model on the training data
36
+ grid_search.fit(
37
+ X_train,
38
+ y_train,
39
+ list_anchor_nodes_labels=[25, 47, 48, 80],
40
+ anchor_nodes_label_name="feature_atomic_number",
41
+ )
42
+
43
+ logger.info(f"Best parameters found: {grid_search.best_params_}")
44
+ logger.info(f"Best cross-validation score: {-grid_search.best_score_}")
45
+
46
+ # Evaluate the best model on the test set
47
+ best_model = grid_search.best_estimator_
48
+ test_score = best_model.score(X_test, y_test)
49
+ logger.info(f"Test set score: {test_score}")
@@ -0,0 +1,76 @@
1
+ import networkx as nx
2
+
3
+ from .classes.extended_boosting_matrix import ExtendedBoostingMatrix
4
+ from .classes.sequential_path_boost import SequentialPathBoost
5
+
6
+
7
+ def split_dataset_by_metal_centers(
8
+ graphs_list: list[nx.Graph], anchor_nodes_label_name: str, anchor_nodes: list
9
+ ) -> list[list[int]]:
10
+ """
11
+ Splits a list of graphs into subgroups based on anchor node labels.
12
+
13
+ This static method takes a list of graphs, an anchor nodes label name,
14
+ and a list of anchor nodes. It iterates through each graph and identifies
15
+ nodes labeled with anchor node labels. It then organizes the indices of
16
+ graphs where such anchor nodes are found into corresponding subgroups.
17
+
18
+ Args:
19
+ graphs_list (list[nx.Graph]): A list of networkx Graph objects to be processed.
20
+ anchor_nodes_label_name (str): The name of the attribute used to identify anchor nodes in the graphs.
21
+ anchor_nodes (list): A list of anchor nodes to be used as a reference for grouping.
22
+
23
+ Returns:
24
+ list[list[int]]: A list containing sublists of indices corresponding to the grouping
25
+ of graphs based on the presence of the anchor nodes.
26
+ """
27
+
28
+ indices_list = [[] for _ in range(len(anchor_nodes))]
29
+ for index_in_anchor_nodes, anchor_node_label in enumerate(anchor_nodes):
30
+ for i, graph in enumerate(graphs_list):
31
+ path_found = ExtendedBoostingMatrix.find_labelled_path_in_nx_graph(
32
+ graph=graph,
33
+ path_labels=anchor_node_label,
34
+ main_label_name=anchor_nodes_label_name,
35
+ )
36
+ if len(path_found) > 0:
37
+ indices_list[index_in_anchor_nodes].append(i)
38
+ return indices_list
39
+
40
+
41
+ def train_pattern_boosting(
42
+ input_from_parallelization: tuple,
43
+ ) -> SequentialPathBoost | None:
44
+ model: SequentialPathBoost = input_from_parallelization[0]
45
+ if model is None:
46
+ return None
47
+ X = input_from_parallelization[1]
48
+ y = input_from_parallelization[2]
49
+ list_anchor_nodes_labels: tuple = input_from_parallelization[3]
50
+ name_of_label_attribute = input_from_parallelization[4]
51
+ model.fit(
52
+ X=X,
53
+ y=y,
54
+ eval_set=None,
55
+ list_anchor_nodes_labels=[list_anchor_nodes_labels],
56
+ anchor_nodes_label_name=name_of_label_attribute,
57
+ )
58
+ return model
59
+
60
+
61
+ def parallel_predict(input_from_parallelization: tuple):
62
+ model: SequentialPathBoost = input_from_parallelization[0]
63
+ X = input_from_parallelization[1]
64
+ if model is None or len(X) == 0:
65
+ return None
66
+
67
+ return model.predict(X)
68
+
69
+
70
+ def parallel_predict_step_by_step(input_from_parallelization: tuple):
71
+ model: SequentialPathBoost = input_from_parallelization[0]
72
+ X = input_from_parallelization[1]
73
+ if model is None or len(X) == 0:
74
+ return None
75
+
76
+ return model.predict_step_by_step(X)
@@ -0,0 +1,2 @@
1
+ # Authors: scikit-learn-contrib developers
2
+ # License: BSD 3 clause
@@ -0,0 +1,304 @@
1
+ import numpy as np
2
+ import networkx as nx
3
+ from sklearn.model_selection import train_test_split, GridSearchCV
4
+ from path_boost._path_boost import PathBoost
5
+
6
+
7
+ def _find_path_instances(graph, path_definition, type_attribute_name="feature_0"):
8
+ """
9
+ Finds all instances of a given path definition in a graph.
10
+ A path definition is a sequence of node types.
11
+ An instance is a sequence of connected node IDs whose types match the definition.
12
+ """
13
+ instances = []
14
+ k = len(path_definition)
15
+ if k == 0:
16
+ return []
17
+
18
+ for start_node in graph.nodes:
19
+ if graph.nodes[start_node].get(type_attribute_name) == path_definition[0]:
20
+ # Stack stores (current_node_sequence, current_definition_index)
21
+ dfs_stack = [([start_node], 0)]
22
+ while dfs_stack:
23
+ current_nodes_in_path, def_idx = dfs_stack.pop()
24
+
25
+ if def_idx == k - 1: # Path complete
26
+ instances.append(list(current_nodes_in_path))
27
+ continue
28
+
29
+ last_node_in_current_path = current_nodes_in_path[-1]
30
+ next_def_idx = def_idx + 1
31
+ expected_next_type = path_definition[next_def_idx]
32
+
33
+ for neighbor in graph.neighbors(last_node_in_current_path):
34
+ if (
35
+ neighbor not in current_nodes_in_path
36
+ and graph.nodes[neighbor].get(type_attribute_name)
37
+ == expected_next_type
38
+ ):
39
+ new_path_nodes = list(current_nodes_in_path) # Make a copy
40
+ new_path_nodes.append(neighbor)
41
+ dfs_stack.append((new_path_nodes, next_def_idx))
42
+ return instances
43
+
44
+
45
+ def generate_synthetic_graph_dataset(
46
+ n_graphs=100,
47
+ avg_n_nodes=15,
48
+ std_n_nodes=5,
49
+ graph_density=0.3,
50
+ n_node_types=5,
51
+ anchor_node_types: list | None = None, # New parameter
52
+ n_numerical_node_features=2, # e.g., feature_1, feature_2
53
+ n_edge_features=1,
54
+ n_true_paths=3,
55
+ avg_true_path_length=3,
56
+ std_true_path_length=1,
57
+ numerical_feature_idx_for_label=1, # 0 for feature_0 (type), 1 for feature_1 etc.
58
+ noise_std=0.5,
59
+ random_state=42,
60
+ ):
61
+ """
62
+ Generates a synthetic dataset of graphs.
63
+ Labels 'y' are derived from predefined "true paths" found in the graphs.
64
+ Node types are stored in 'feature_0'. Numerical features are 'feature_1', 'feature_2', ...
65
+ """
66
+ rng = np.random.RandomState(random_state)
67
+
68
+ possible_node_types = list(range(n_node_types))
69
+
70
+ # Determine which types can start a true path
71
+ valid_starting_types_for_true_paths = possible_node_types
72
+ if anchor_node_types is not None and len(anchor_node_types) > 0:
73
+ # Ensure anchor_node_types are valid w.r.t. n_node_types
74
+ if not all(t in possible_node_types for t in anchor_node_types):
75
+ raise ValueError(
76
+ f"All anchor_node_types must be within the range [0, {n_node_types-1}]"
77
+ )
78
+ valid_starting_types_for_true_paths = anchor_node_types
79
+
80
+ # 1. Define True Paths and their weights
81
+ true_paths_definitions = []
82
+ for _ in range(n_true_paths):
83
+ path_len = int(max(1, rng.normal(avg_true_path_length, std_true_path_length)))
84
+ if path_len == 0:
85
+ continue
86
+
87
+ # First element of the path must be from valid_starting_types_for_true_paths
88
+ first_node_type = rng.choice(valid_starting_types_for_true_paths)
89
+
90
+ if path_len == 1:
91
+ path_def = tuple([int(first_node_type)])
92
+ else:
93
+ remaining_path_types = rng.choice(possible_node_types, size=path_len - 1)
94
+ path_def = tuple(
95
+ [int(first_node_type)] + [int(x) for x in remaining_path_types]
96
+ )
97
+
98
+ # Add all prefixes of the path that start with an anchor type
99
+ for i in range(len(path_def)):
100
+ prefix = path_def[: i + 1]
101
+ if (
102
+ prefix[0] in valid_starting_types_for_true_paths
103
+ ): # Ensure prefix starts with a valid anchor
104
+ true_paths_definitions.append(prefix)
105
+
106
+ true_paths_definitions = list(set(true_paths_definitions)) # Remove duplicates
107
+
108
+ # Adjust number of weights if true_paths_definitions changed size due to prefix addition and set conversion
109
+ actual_n_true_paths = len(true_paths_definitions)
110
+ true_path_weights = rng.uniform(-2, 2, size=actual_n_true_paths)
111
+
112
+ graphs = []
113
+ y_labels = []
114
+
115
+ # Determine the name of the numerical feature to use for label calculation
116
+ # feature_0 is type, feature_1 is the first numerical, etc.
117
+ label_feature_name = f"feature_{numerical_feature_idx_for_label}"
118
+
119
+ for i_graph in range(n_graphs):
120
+ # 2. Generate a random graph
121
+ num_nodes = int(max(2, rng.normal(avg_n_nodes, std_n_nodes)))
122
+ G = nx.erdos_renyi_graph(num_nodes, graph_density, seed=random_state + i_graph)
123
+ if not nx.is_connected(
124
+ G
125
+ ): # Ensure graph is connected for more interesting paths
126
+ G = nx.erdos_renyi_graph(
127
+ num_nodes, graph_density, seed=random_state + i_graph + n_graphs
128
+ )
129
+ if not nx.is_connected(G): # If still not connected, take largest component
130
+ if G.number_of_nodes() > 0 and G.number_of_edges() > 0:
131
+ largest_cc = max(nx.connected_components(G), key=len)
132
+ G = G.subgraph(largest_cc).copy()
133
+ if G.number_of_nodes() < 2: # if too small, regenerate a simple one
134
+ G = nx.path_graph(max(2, num_nodes // 2), create_using=nx.Graph())
135
+
136
+ # 3. Assign node features
137
+ for node_idx in G.nodes:
138
+ G.nodes[node_idx]["feature_0"] = rng.choice(
139
+ possible_node_types
140
+ ) # Node type
141
+ for f_idx in range(n_numerical_node_features):
142
+ G.nodes[node_idx][f"feature_{f_idx + 1}"] = rng.uniform(-1, 1)
143
+
144
+ # 4. Assign edge features
145
+ for u, v in G.edges:
146
+ for ef_idx in range(n_edge_features):
147
+ G.edges[u, v][f"edge_feature_{ef_idx}"] = rng.uniform(-1, 1)
148
+
149
+ # 5. Calculate label y for the graph
150
+ current_y = 0.0
151
+ for path_idx, path_def in enumerate(true_paths_definitions):
152
+ path_weight = true_path_weights[path_idx]
153
+ path_instances = _find_path_instances(
154
+ G, path_def, type_attribute_name="feature_0"
155
+ )
156
+
157
+ for instance_nodes in path_instances:
158
+ feature_sum_on_instance = 0.0
159
+ if label_feature_name == "feature_0": # if using the type itself
160
+ feature_sum_on_instance = sum(
161
+ G.nodes[node][label_feature_name]
162
+ for node in instance_nodes
163
+ if label_feature_name in G.nodes[node]
164
+ )
165
+ else: # if using numerical features
166
+ feature_sum_on_instance = sum(
167
+ G.nodes[node].get(label_feature_name, 0)
168
+ for node in instance_nodes
169
+ )
170
+ current_y += path_weight * feature_sum_on_instance
171
+
172
+ current_y += rng.normal(0, noise_std)
173
+
174
+ graphs.append(G)
175
+ y_labels.append(current_y)
176
+
177
+ return graphs, np.array(y_labels), true_paths_definitions, true_path_weights
178
+
179
+
180
+ if __name__ == "__main__":
181
+ N_NODE_TYPES = 5 # Total distinct types of nodes, e.g., 0, 1, 2, 3, 4
182
+ # Define which node types will be considered as anchors for true path generation AND for PathBoost
183
+ anchor_types_for_generation_and_boosting = [
184
+ 0,
185
+ 1,
186
+ 2,
187
+ ] # Example: types 0, 1, and 2 are anchors
188
+
189
+ # Generate synthetic dataset
190
+ nx_graphs, y, true_paths, true_weights = generate_synthetic_graph_dataset(
191
+ n_graphs=100,
192
+ avg_n_nodes=12,
193
+ std_n_nodes=3,
194
+ graph_density=0.4,
195
+ n_node_types=N_NODE_TYPES,
196
+ anchor_node_types=anchor_types_for_generation_and_boosting, # Pass the anchor types here
197
+ n_numerical_node_features=2, # feature_1, feature_2
198
+ n_edge_features=1,
199
+ n_true_paths=4, # This will be the number of "base" true paths, prefixes will be added
200
+ avg_true_path_length=3,
201
+ std_true_path_length=1,
202
+ numerical_feature_idx_for_label=1, # Use feature_1 for label calculation
203
+ noise_std=0.1,
204
+ random_state=42,
205
+ )
206
+
207
+ print(f"Generated {len(nx_graphs)} graphs.")
208
+ print(f"Example y values: {y[:5]}")
209
+ print(f"True paths definitions (may include prefixes): {true_paths}")
210
+ print(f"True path weights: {true_weights}")
211
+
212
+ list_anchor_nodes_labels = (
213
+ anchor_types_for_generation_and_boosting # Use the same for PathBoost
214
+ )
215
+ anchor_nodes_label_name_for_fitting = "feature_0" # Node types are in 'feature_0'
216
+
217
+ parameters_variable_importance: dict = {
218
+ "criterion": "absolute",
219
+ "error_used": "mse",
220
+ "use_correlation": False,
221
+ "normalize": True,
222
+ }
223
+
224
+ X_train, X_test, y_train, y_test = train_test_split(
225
+ nx_graphs, y, test_size=0.25, random_state=42
226
+ )
227
+
228
+ eval_set = [(X_test, y_test)]
229
+
230
+ # --- GridSearchCV for hyperparameter tuning ---
231
+ print("\nStarting GridSearchCV for hyperparameter tuning...")
232
+
233
+ # Define the parameter grid
234
+ param_grid = {
235
+ "learning_rate": [0.01, 0.1, 0.8],
236
+ "max_path_length": [3, 4],
237
+ "kwargs_for_base_learner": [{"max_depth": 3}, {"max_depth": 5}],
238
+ }
239
+
240
+ # Initialize a base PathBoost model for GridSearchCV
241
+ # n_iter is set to a smaller value for faster CV.
242
+ # Other parameters like n_of_cores, verbose are set for CV.
243
+ base_pb_for_cv = PathBoost(
244
+ n_iter=20, # smaller n_iter for quicker CV
245
+ n_of_cores=1,
246
+ verbose=False,
247
+ parameters_variable_importance=None, # Disable var importance during CV
248
+ )
249
+
250
+ grid_search = GridSearchCV(
251
+ estimator=base_pb_for_cv,
252
+ param_grid=param_grid,
253
+ scoring="neg_mean_squared_error",
254
+ cv=3,
255
+ verbose=1,
256
+ n_jobs=1,
257
+ )
258
+
259
+ # Fit GridSearchCV
260
+ # Pass anchor_nodes_label_name and list_anchor_nodes_labels as they are needed by PathBoost.fit
261
+ grid_search.fit(
262
+ X_train,
263
+ y_train,
264
+ anchor_nodes_label_name=anchor_nodes_label_name_for_fitting,
265
+ list_anchor_nodes_labels=list_anchor_nodes_labels,
266
+ )
267
+
268
+ print("\nGridSearchCV finished.")
269
+ print(f"Best parameters found: {grid_search.best_params_}")
270
+ print(f"Best cross-validation score (Negative MSE): {grid_search.best_score_}")
271
+
272
+ # --- Fit final model with best parameters ---
273
+ print("\nFitting final PathBoost model with best parameters...")
274
+ best_params_from_cv = grid_search.best_params_
275
+
276
+ path_boost_final = PathBoost(
277
+ max_path_length=best_params_from_cv["max_path_length"],
278
+ learning_rate=best_params_from_cv["learning_rate"],
279
+ kwargs_for_base_learner=best_params_from_cv["kwargs_for_base_learner"],
280
+ verbose=True,
281
+ parameters_variable_importance=parameters_variable_importance,
282
+ )
283
+
284
+ # Fit the final model
285
+ path_boost_final.fit(
286
+ X=X_train,
287
+ y=y_train,
288
+ eval_set=eval_set,
289
+ list_anchor_nodes_labels=list_anchor_nodes_labels,
290
+ anchor_nodes_label_name=anchor_nodes_label_name_for_fitting,
291
+ )
292
+
293
+ print("\nPlotting results for the final tuned model...")
294
+ path_boost_final.plot_training_and_eval_errors(
295
+ skip_first_n_iterations=0, plot_eval_sets_error=True
296
+ )
297
+ if path_boost_final.parameters_variable_importance is not None and hasattr(
298
+ path_boost_final, "variable_importance_"
299
+ ):
300
+ path_boost_final.plot_variable_importance(top_n_features=10)
301
+ else:
302
+ print("Variable importance not computed or available for the final model.")
303
+
304
+ print("\nExample run with GridSearchCV finished.")
@@ -0,0 +1,217 @@
1
+ """
2
+ The :mod:`path_boost.utils.discovery` module includes utilities to discover
3
+ objects (i.e. estimators, displays, functions) from the `path_boost` package.
4
+ """
5
+
6
+ # Adapted from scikit-learn
7
+ # Authors: scikit-learn-contrib developers
8
+ # License: BSD 3 clause
9
+
10
+ import inspect
11
+ import pkgutil
12
+ from importlib import import_module
13
+ from operator import itemgetter
14
+ from pathlib import Path
15
+
16
+ from sklearn.base import (
17
+ BaseEstimator,
18
+ ClassifierMixin,
19
+ ClusterMixin,
20
+ RegressorMixin,
21
+ TransformerMixin,
22
+ )
23
+ from sklearn.utils._testing import ignore_warnings
24
+
25
+ _MODULE_TO_IGNORE = {"tests"}
26
+
27
+
28
+ def all_estimators(type_filter=None):
29
+ """Get a list of all estimators from `path_boost`.
30
+
31
+ This function crawls the module and gets all classes that inherit
32
+ from `BaseEstimator`. Classes that are defined in test-modules are not
33
+ included.
34
+
35
+ Parameters
36
+ ----------
37
+ type_filter : {"classifier", "regressor", "cluster", "transformer"} \
38
+ or list of such str, default=None
39
+ Which kind of estimators should be returned. If None, no filter is
40
+ applied and all estimators are returned. Possible values are
41
+ 'classifier', 'regressor', 'cluster' and 'transformer' to get
42
+ estimators only of these specific types, or a list of these to
43
+ get the estimators that fit at least one of the types.
44
+
45
+ Returns
46
+ -------
47
+ estimators : list of tuples
48
+ List of (name, class), where ``name`` is the class name as string
49
+ and ``class`` is the actual type of the class.
50
+
51
+ Examples
52
+ --------
53
+ >>> from path_boost.utils.discovery import all_estimators
54
+ >>> estimators = all_estimators()
55
+ >>> type(estimators)
56
+ <class 'list'>
57
+ """
58
+
59
+ def is_abstract(c):
60
+ if not (hasattr(c, "__abstractmethods__")):
61
+ return False
62
+ if not len(c.__abstractmethods__):
63
+ return False
64
+ return True
65
+
66
+ all_classes = []
67
+ root = str(Path(__file__).parent.parent) # path_boost package
68
+ # Ignore deprecation warnings triggered at import time and from walking
69
+ # packages
70
+ with ignore_warnings(category=FutureWarning):
71
+ for _, module_name, _ in pkgutil.walk_packages(
72
+ path=[root], prefix="path_boost."
73
+ ):
74
+ module_parts = module_name.split(".")
75
+ if any(part in _MODULE_TO_IGNORE for part in module_parts):
76
+ continue
77
+ module = import_module(module_name)
78
+ classes = inspect.getmembers(module, inspect.isclass)
79
+ classes = [
80
+ (name, est_cls) for name, est_cls in classes if not name.startswith("_")
81
+ ]
82
+
83
+ all_classes.extend(classes)
84
+
85
+ all_classes = set(all_classes)
86
+
87
+ estimators = [
88
+ c
89
+ for c in all_classes
90
+ if (issubclass(c[1], BaseEstimator) and c[0] != "BaseEstimator")
91
+ ]
92
+ # get rid of abstract base classes
93
+ estimators = [c for c in estimators if not is_abstract(c[1])]
94
+
95
+ if type_filter is not None:
96
+ if not isinstance(type_filter, list):
97
+ type_filter = [type_filter]
98
+ else:
99
+ type_filter = list(type_filter) # copy
100
+ filtered_estimators = []
101
+ filters = {
102
+ "classifier": ClassifierMixin,
103
+ "regressor": RegressorMixin,
104
+ "transformer": TransformerMixin,
105
+ "cluster": ClusterMixin,
106
+ }
107
+ for name, mixin in filters.items():
108
+ if name in type_filter:
109
+ type_filter.remove(name)
110
+ filtered_estimators.extend(
111
+ [est for est in estimators if issubclass(est[1], mixin)]
112
+ )
113
+ estimators = filtered_estimators
114
+ if type_filter:
115
+ raise ValueError(
116
+ "Parameter type_filter must be 'classifier', "
117
+ "'regressor', 'transformer', 'cluster' or "
118
+ "None, got"
119
+ f" {repr(type_filter)}."
120
+ )
121
+
122
+ # drop duplicates, sort for reproducibility
123
+ # itemgetter is used to ensure the sort does not extend to the 2nd item of
124
+ # the tuple
125
+ return sorted(set(estimators), key=itemgetter(0))
126
+
127
+
128
+ def all_displays():
129
+ """Get a list of all displays from `path_boost`.
130
+
131
+ Returns
132
+ -------
133
+ displays : list of tuples
134
+ List of (name, class), where ``name`` is the display class name as
135
+ string and ``class`` is the actual type of the class.
136
+
137
+ Examples
138
+ --------
139
+ >>> from path_boost.utils.discovery import all_displays
140
+ >>> displays = all_displays()
141
+ """
142
+ all_classes = []
143
+ root = str(Path(__file__).parent.parent) # path_boost package
144
+ # Ignore deprecation warnings triggered at import time and from walking
145
+ # packages
146
+ with ignore_warnings(category=FutureWarning):
147
+ for _, module_name, _ in pkgutil.walk_packages(
148
+ path=[root], prefix="path_boost."
149
+ ):
150
+ module_parts = module_name.split(".")
151
+ if any(part in _MODULE_TO_IGNORE for part in module_parts):
152
+ continue
153
+ module = import_module(module_name)
154
+ classes = inspect.getmembers(module, inspect.isclass)
155
+ classes = [
156
+ (name, display_class)
157
+ for name, display_class in classes
158
+ if not name.startswith("_") and name.endswith("Display")
159
+ ]
160
+ all_classes.extend(classes)
161
+
162
+ return sorted(set(all_classes), key=itemgetter(0))
163
+
164
+
165
+ def _is_checked_function(item):
166
+ if not inspect.isfunction(item):
167
+ return False
168
+
169
+ if item.__name__.startswith("_"):
170
+ return False
171
+
172
+ mod = item.__module__
173
+ if not mod.startswith("path_boost.") or mod.endswith("estimator_checks"):
174
+ return False
175
+
176
+ return True
177
+
178
+
179
+ def all_functions():
180
+ """Get a list of all functions from `path_boost`.
181
+
182
+ Returns
183
+ -------
184
+ functions : list of tuples
185
+ List of (name, function), where ``name`` is the function name as
186
+ string and ``function`` is the actual function.
187
+
188
+ Examples
189
+ --------
190
+ >>> from path_boost.utils.discovery import all_functions
191
+ >>> functions = all_functions()
192
+ """
193
+ all_functions = []
194
+ root = str(Path(__file__).parent.parent) # path_boost package
195
+ # Ignore deprecation warnings triggered at import time and from walking
196
+ # packages
197
+ with ignore_warnings(category=FutureWarning):
198
+ for _, module_name, _ in pkgutil.walk_packages(
199
+ path=[root], prefix="path_boost."
200
+ ):
201
+ module_parts = module_name.split(".")
202
+ if any(part in _MODULE_TO_IGNORE for part in module_parts):
203
+ continue
204
+
205
+ module = import_module(module_name)
206
+ functions = inspect.getmembers(module, _is_checked_function)
207
+ functions = [
208
+ (func.__name__, func)
209
+ for name, func in functions
210
+ if not name.startswith("_")
211
+ ]
212
+ all_functions.extend(functions)
213
+
214
+ # drop duplicates, sort for reproducibility
215
+ # itemgetter is used to ensure the sort does not extend to the 2nd item of
216
+ # the tuple
217
+ return sorted(set(all_functions), key=itemgetter(0))