path-boost 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- path_boost-2.1.0/.Rhistory +0 -0
- path_boost-2.1.0/.coveragerc +6 -0
- path_boost-2.1.0/.github/dependabot.yml +18 -0
- path_boost-2.1.0/.github/workflows/deploy-gh-pages.yml +34 -0
- path_boost-2.1.0/.github/workflows/lint.yml +23 -0
- path_boost-2.1.0/.github/workflows/python-app.yml +33 -0
- path_boost-2.1.0/.gitignore +78 -0
- path_boost-2.1.0/.pre-commit-config.yaml +16 -0
- path_boost-2.1.0/LICENSE +21 -0
- path_boost-2.1.0/PKG-INFO +174 -0
- path_boost-2.1.0/README.md +141 -0
- path_boost-2.1.0/doc/Makefile +184 -0
- path_boost-2.1.0/doc/_static/css/project-template.css +59 -0
- path_boost-2.1.0/doc/_static/img/index_api.svg +97 -0
- path_boost-2.1.0/doc/_static/img/index_examples.svg +76 -0
- path_boost-2.1.0/doc/_static/img/index_getting_started.svg +66 -0
- path_boost-2.1.0/doc/_static/img/index_user_guide.svg +67 -0
- path_boost-2.1.0/doc/_static/img/logo.png +0 -0
- path_boost-2.1.0/doc/_static/js/copybutton.js +63 -0
- path_boost-2.1.0/doc/_templates/class.rst +26 -0
- path_boost-2.1.0/doc/_templates/function.rst +12 -0
- path_boost-2.1.0/doc/_templates/numpydoc_docstring.py +16 -0
- path_boost-2.1.0/doc/_templates/sidebar-search-bs.html +14 -0
- path_boost-2.1.0/doc/api.rst +48 -0
- path_boost-2.1.0/doc/conf.py +114 -0
- path_boost-2.1.0/doc/index.rst +108 -0
- path_boost-2.1.0/doc/make.bat +242 -0
- path_boost-2.1.0/doc/quick_start.rst +181 -0
- path_boost-2.1.0/doc/user_guide.rst +180 -0
- path_boost-2.1.0/examples/README.txt +6 -0
- path_boost-2.1.0/examples/example_cross_validation_path_boost.py +103 -0
- path_boost-2.1.0/examples/example_on_TUD_dataset.py +57 -0
- path_boost-2.1.0/examples/example_run_EPB_on_uNatQ_nx_graphs.py +127 -0
- path_boost-2.1.0/examples/example_run_extended_path_boost_on_subsampled_dataset.py +54 -0
- path_boost-2.1.0/examples/examples_utils.py +24 -0
- path_boost-2.1.0/examples/plot_classifier.py +41 -0
- path_boost-2.1.0/examples/plot_template.py +18 -0
- path_boost-2.1.0/examples/plot_transformer.py +27 -0
- path_boost-2.1.0/path_boost/__init__.py +18 -0
- path_boost-2.1.0/path_boost/_path_boost.py +1096 -0
- path_boost-2.1.0/path_boost/_version.py +24 -0
- path_boost-2.1.0/path_boost/utils/__init__.py +2 -0
- path_boost-2.1.0/path_boost/utils/classes/__init__.py +0 -0
- path_boost-2.1.0/path_boost/utils/classes/additive_model_wrapper.py +301 -0
- path_boost-2.1.0/path_boost/utils/classes/additive_model_wrapper_classifier.py +394 -0
- path_boost-2.1.0/path_boost/utils/classes/extended_boosting_matrix.py +596 -0
- path_boost-2.1.0/path_boost/utils/classes/interfaces/__init__.py +0 -0
- path_boost-2.1.0/path_boost/utils/classes/interfaces/interface_base_learner.py +30 -0
- path_boost-2.1.0/path_boost/utils/classes/interfaces/interface_selector.py +27 -0
- path_boost-2.1.0/path_boost/utils/classes/sequential_path_boost.py +1023 -0
- path_boost-2.1.0/path_boost/utils/classes/sequential_path_boost_classifier.py +840 -0
- path_boost-2.1.0/path_boost/utils/cross_validation.py +49 -0
- path_boost-2.1.0/path_boost/utils/cyclic_path_boost_utils.py +76 -0
- path_boost-2.1.0/path_boost/utils/datasets_for_examples/__init__.py +2 -0
- path_boost-2.1.0/path_boost/utils/datasets_for_examples/generate_example_dataset.py +304 -0
- path_boost-2.1.0/path_boost/utils/discovery.py +217 -0
- path_boost-2.1.0/path_boost/utils/plots_functions.py +153 -0
- path_boost-2.1.0/path_boost/utils/validate_data.py +223 -0
- path_boost-2.1.0/path_boost/utils/variable_importance_according_to_path_boost.py +341 -0
- path_boost-2.1.0/path_boost.egg-info/PKG-INFO +174 -0
- path_boost-2.1.0/path_boost.egg-info/SOURCES.txt +94 -0
- path_boost-2.1.0/path_boost.egg-info/dependency_links.txt +1 -0
- path_boost-2.1.0/path_boost.egg-info/requires.txt +13 -0
- path_boost-2.1.0/path_boost.egg-info/top_level.txt +1 -0
- path_boost-2.1.0/pixi.lock +11605 -0
- path_boost-2.1.0/pyproject.toml +148 -0
- path_boost-2.1.0/requirements.txt +23 -0
- path_boost-2.1.0/setup.cfg +4 -0
- path_boost-2.1.0/tests/__init__.py +2 -0
- path_boost-2.1.0/tests/datasets_used_for_tests/__init__.py +2 -0
- path_boost-2.1.0/tests/datasets_used_for_tests/load_test_dataset.py +16 -0
- path_boost-2.1.0/tests/test_algorithm_correctness/__init__.py +0 -0
- path_boost-2.1.0/tests/test_algorithm_correctness/test_algorithm_matches_specification.py +308 -0
- path_boost-2.1.0/tests/test_discovery.py +31 -0
- path_boost-2.1.0/tests/test_extended_path_boost/__init__.py +2 -0
- path_boost-2.1.0/tests/test_extended_path_boost/test_extended_path_boost.py +36 -0
- path_boost-2.1.0/tests/test_extended_path_boost/test_extended_path_boost_base_learners_and_selector_classes.py +88 -0
- path_boost-2.1.0/tests/test_extended_path_boost/test_extended_path_boost_fit.py +89 -0
- path_boost-2.1.0/tests/test_extended_path_boost/test_extended_path_boost_predict.py +173 -0
- path_boost-2.1.0/tests/test_extended_path_boost/test_split_extended_path_boost.py +84 -0
- path_boost-2.1.0/tests/test_sequential_path_boost/__init__.py +0 -0
- path_boost-2.1.0/tests/test_sequential_path_boost/test_fit_single_metal_center_path_boost.py +371 -0
- path_boost-2.1.0/tests/test_sequential_path_boost/test_single_metal_center_path_boost.py +62 -0
- path_boost-2.1.0/tests/test_sequential_path_boost/test_variable_importance.py +55 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/__init__.py +0 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/test_extended_boosting_matrix_finding_attributes.py +95 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/test_extended_boosting_matrix_get_frequency_matrix.py +60 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/test_extended_boosting_matrix_label_path_finding_in_single_graph.py +100 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/test_extended_boosting_matrix_label_path_finding_in_single_graph_with_starting_node.py +123 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/test_extended_boosting_matrix_path_finding_in_dataset.py +200 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/test_initialization_extended_boosting_matrix.py +263 -0
- path_boost-2.1.0/tests/tests_extended_boosting_matrix/test_new_columns_extend_extended_boosting_matrix.py +136 -0
- path_boost-2.1.0/tests/tests_for_cyclyc_path_boost.py +63 -0
- path_boost-2.1.0/tests/tests_sequential_path_boost_classifier/__init__.py +0 -0
- path_boost-2.1.0/tests/tests_sequential_path_boost_classifier/test_fit_single_metal_center_path_boost_classifier.py +195 -0
- path_boost-2.1.0/tests/tests_sequential_path_boost_classifier/test_variable_importance_classifier.py +58 -0
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
# Maintain dependencies for GitHub Actions as recommended in SPEC8:
|
|
4
|
+
# https://github.com/scientific-python/specs/pull/325
|
|
5
|
+
# At the time of writing, release critical workflows such as
|
|
6
|
+
# pypa/gh-action-pypi-publish should use hash-based versioning for security
|
|
7
|
+
# reasons. This strategy may be generalized to all other github actions
|
|
8
|
+
# in the future.
|
|
9
|
+
- package-ecosystem: "github-actions"
|
|
10
|
+
directory: "/"
|
|
11
|
+
schedule:
|
|
12
|
+
interval: "weekly"
|
|
13
|
+
groups:
|
|
14
|
+
actions:
|
|
15
|
+
patterns:
|
|
16
|
+
- "*"
|
|
17
|
+
reviewers:
|
|
18
|
+
- "glemaitre"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Documentation
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
deploy-gh-pages:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
with:
|
|
18
|
+
fetch-depth: 0
|
|
19
|
+
- uses: prefix-dev/setup-pixi@v0.8.1
|
|
20
|
+
with:
|
|
21
|
+
pixi-version: v0.23.0
|
|
22
|
+
environments: doc
|
|
23
|
+
frozen: true
|
|
24
|
+
|
|
25
|
+
- name: Build documentation
|
|
26
|
+
run: pixi run -e doc build-doc
|
|
27
|
+
|
|
28
|
+
- name: Update the main gh-page website
|
|
29
|
+
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
|
|
30
|
+
uses: peaceiris/actions-gh-pages@v4.0.0
|
|
31
|
+
with:
|
|
32
|
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
|
33
|
+
publish_dir: ./doc/_build/html
|
|
34
|
+
commit_message: "[ci skip] ${{ github.event.head_commit.message }}"
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: Linter
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
- uses: prefix-dev/setup-pixi@v0.8.1
|
|
17
|
+
with:
|
|
18
|
+
pixi-version: v0.23.0
|
|
19
|
+
environments: lint
|
|
20
|
+
frozen: true
|
|
21
|
+
|
|
22
|
+
- name: Run linter
|
|
23
|
+
run: pixi run -e lint lint
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Unit Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
pull_request:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
build:
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
os: [windows-latest, ubuntu-latest, macos-latest, macos-12]
|
|
16
|
+
environment: [test]
|
|
17
|
+
runs-on: ${{ matrix.os }}
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v4
|
|
20
|
+
- uses: prefix-dev/setup-pixi@v0.8.1
|
|
21
|
+
with:
|
|
22
|
+
pixi-version: v0.23.0
|
|
23
|
+
environments: ${{ matrix.environment }}
|
|
24
|
+
frozen: true
|
|
25
|
+
|
|
26
|
+
- name: Run tests
|
|
27
|
+
run: pixi run -e ${{ matrix.environment }} test
|
|
28
|
+
|
|
29
|
+
- name: Upload coverage reports to Codecov
|
|
30
|
+
uses: codecov/codecov-action@v4.6.0
|
|
31
|
+
with:
|
|
32
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
|
33
|
+
slug: scikit-learn-contrib/path_boost
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# scikit-learn specific
|
|
10
|
+
doc/_build/
|
|
11
|
+
doc/auto_examples/
|
|
12
|
+
doc/modules/generated/
|
|
13
|
+
doc/datasets/generated/
|
|
14
|
+
|
|
15
|
+
# Distribution / packaging
|
|
16
|
+
|
|
17
|
+
.Python
|
|
18
|
+
env/
|
|
19
|
+
build/
|
|
20
|
+
develop-eggs/
|
|
21
|
+
dist/
|
|
22
|
+
downloads/
|
|
23
|
+
eggs/
|
|
24
|
+
.eggs/
|
|
25
|
+
lib/
|
|
26
|
+
lib64/
|
|
27
|
+
parts/
|
|
28
|
+
sdist/
|
|
29
|
+
var/
|
|
30
|
+
*.egg-info/
|
|
31
|
+
.installed.cfg
|
|
32
|
+
*.egg
|
|
33
|
+
|
|
34
|
+
# PyInstaller
|
|
35
|
+
# Usually these files are written by a python script from a template
|
|
36
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
37
|
+
*.manifest
|
|
38
|
+
*.spec
|
|
39
|
+
|
|
40
|
+
# Installer logs
|
|
41
|
+
pip-log.txt
|
|
42
|
+
pip-delete-this-directory.txt
|
|
43
|
+
|
|
44
|
+
# Unit test / coverage reports
|
|
45
|
+
htmlcov/
|
|
46
|
+
.tox/
|
|
47
|
+
.coverage
|
|
48
|
+
.coverage.*
|
|
49
|
+
.cache
|
|
50
|
+
nosetests.xml
|
|
51
|
+
coverage.xml
|
|
52
|
+
*,cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
|
|
55
|
+
# Translations
|
|
56
|
+
*.mo
|
|
57
|
+
*.pot
|
|
58
|
+
|
|
59
|
+
# Django stuff:
|
|
60
|
+
*.log
|
|
61
|
+
|
|
62
|
+
# Sphinx documentation
|
|
63
|
+
doc/_build/
|
|
64
|
+
doc/generated/
|
|
65
|
+
doc/sg_execution_times.rst
|
|
66
|
+
|
|
67
|
+
# PyBuilder
|
|
68
|
+
target/
|
|
69
|
+
|
|
70
|
+
.pixi
|
|
71
|
+
|
|
72
|
+
# General
|
|
73
|
+
.DS_Store
|
|
74
|
+
.AppleDouble
|
|
75
|
+
.LSOverride
|
|
76
|
+
|
|
77
|
+
# auto-generated files
|
|
78
|
+
path_boost/_version.py
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v4.3.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: check-yaml
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
- id: trailing-whitespace
|
|
8
|
+
- repo: https://github.com/psf/black
|
|
9
|
+
rev: 23.3.0
|
|
10
|
+
hooks:
|
|
11
|
+
- id: black
|
|
12
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
13
|
+
rev: v0.0.272
|
|
14
|
+
hooks:
|
|
15
|
+
- id: ruff
|
|
16
|
+
args: ["--fix", "--show-source"]
|
path_boost-2.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Claudio Meggio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: path_boost
|
|
3
|
+
Version: 2.1.0
|
|
4
|
+
Summary: Interpretable machine learning on graph-structured data using path-based boosting.
|
|
5
|
+
Author-email: Vighnesh Birodkar <vighneshbirodkar@nyu.edu>, Guillaume Lemaitre <g.lemaitre58@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Claudio-Me/extended_path_boost
|
|
8
|
+
Project-URL: Issues, https://github.com/Claudio-Me/extended_path_boost/issues
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Operating System :: POSIX
|
|
15
|
+
Classifier: Operating System :: Unix
|
|
16
|
+
Classifier: Operating System :: MacOS
|
|
17
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
18
|
+
Requires-Python: >=3.9
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: scikit-learn>=1.4.2
|
|
22
|
+
Requires-Dist: networkx>=3.0
|
|
23
|
+
Requires-Dist: pandas>=2.0
|
|
24
|
+
Requires-Dist: numpy>=1.24
|
|
25
|
+
Requires-Dist: matplotlib>=3.5
|
|
26
|
+
Requires-Dist: joblib>=1.2
|
|
27
|
+
Requires-Dist: scipy>=1.10
|
|
28
|
+
Provides-Extra: progress
|
|
29
|
+
Requires-Dist: tqdm>=4.64; extra == "progress"
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: tqdm>=4.64; extra == "all"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# Path Boost
|
|
35
|
+
|
|
36
|
+
Path Boost is a Python library for interpretable machine learning on graph-structured data. It implements the PathBoost and SequentialPathBoost algorithms, which iteratively construct features based on paths in graphs and use boosting to build predictive models. The library is designed for tasks where input data consists of collections of graphs (e.g., molecules, social networks) and supports variable importance analysis for interpretability.
|
|
37
|
+
|
|
38
|
+
## Features
|
|
39
|
+
|
|
40
|
+
- **PathBoost**: Ensemble learning over graph paths, partitioned by anchor nodes.
|
|
41
|
+
- **SequentialPathBoost**: Boosting with path-based features, iteratively expanding the feature space.
|
|
42
|
+
- **Variable Importance**: Quantifies the importance of paths/features in prediction.
|
|
43
|
+
- **Parallel Training**: Supports multi-core training for large datasets.
|
|
44
|
+
- **Evaluation and Visualization**: Built-in tools for error tracking and variable importance plotting.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
Install from PyPI:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install path_boost
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Usage Example
|
|
55
|
+
|
|
56
|
+
Below is a minimal example using the `PathBoost` model:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
import numpy as np
|
|
60
|
+
import networkx as nx
|
|
61
|
+
from sklearn.model_selection import train_test_split
|
|
62
|
+
from path_boost import PathBoost
|
|
63
|
+
from path_boost.utils.datasets_for_examples.generate_example_dataset import generate_synthetic_graph_dataset
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
|
|
70
|
+
# Generate synthetic dataset
|
|
71
|
+
nx_graphs, y, true_paths, true_weights = generate_synthetic_graph_dataset()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
list_anchor_nodes_labels = [0, 1, 2]
|
|
75
|
+
|
|
76
|
+
parameters_variable_importance: dict = {
|
|
77
|
+
'criterion': 'absolute',
|
|
78
|
+
'error_used': 'mse',
|
|
79
|
+
'use_correlation': False,
|
|
80
|
+
'normalize': True,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
X_train, X_test, y_train, y_test = train_test_split(nx_graphs, y, test_size=0.25, random_state=42)
|
|
84
|
+
|
|
85
|
+
eval_set = [(X_test, y_test)]
|
|
86
|
+
|
|
87
|
+
path_boost = PathBoost(
|
|
88
|
+
n_iter=50, # Reduced for quicker example run
|
|
89
|
+
max_path_length=5,
|
|
90
|
+
learning_rate=0.1,
|
|
91
|
+
n_of_cores=1, # Set to >1 for parallel processing if desired
|
|
92
|
+
verbose=True,
|
|
93
|
+
parameters_variable_importance=parameters_variable_importance
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Fit the model
|
|
97
|
+
# anchor_nodes_label_name must correspond to the feature storing node types ('feature_0')
|
|
98
|
+
path_boost.fit(
|
|
99
|
+
X=X_train,
|
|
100
|
+
y=y_train,
|
|
101
|
+
eval_set=eval_set,
|
|
102
|
+
list_anchor_nodes_labels=list_anchor_nodes_labels,
|
|
103
|
+
anchor_nodes_label_name="feature_0" # Node types are in 'feature_0'
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
print(f"Generated {len(nx_graphs)} graphs.")
|
|
107
|
+
print(f"Example y values: {y[:5]}")
|
|
108
|
+
print(f"True paths definitions: {true_paths}")
|
|
109
|
+
print(f"True path weights: {true_weights}")
|
|
110
|
+
|
|
111
|
+
path_boost.plot_training_and_eval_errors(skip_first_n_iterations=0, plot_eval_sets_error=True)
|
|
112
|
+
if path_boost.parameters_variable_importance is not None and hasattr(path_boost, 'variable_importance_'):
|
|
113
|
+
path_boost.plot_variable_importance(top_n_features=10)
|
|
114
|
+
else:
|
|
115
|
+
print("Variable importance not computed or available.")
|
|
116
|
+
|
|
117
|
+
print("Example run finished.")
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## API Overview
|
|
122
|
+
|
|
123
|
+
### PathBoost
|
|
124
|
+
|
|
125
|
+
- `fit(X, y, anchor_nodes_label_name, list_anchor_nodes_labels, eval_set=None)`
|
|
126
|
+
- `predict(X)`
|
|
127
|
+
- `predict_step_by_step(X)`
|
|
128
|
+
- `evaluate(X, y)`
|
|
129
|
+
- `plot_training_and_eval_errors(skip_first_n_iterations=True)`
|
|
130
|
+
- `plot_variable_importance()`
|
|
131
|
+
- **Attributes:**
|
|
132
|
+
- `train_mse_`: Training error (MSE) at each iteration
|
|
133
|
+
- `mse_eval_set_`: Evaluation set error (MSE) at each iteration (if `eval_set` is provided)
|
|
134
|
+
- `variable_importance_`: Variable/path importance scores (if enabled)
|
|
135
|
+
- `is_fitted_`: Whether the model is fitted
|
|
136
|
+
- `models_list_`: List of fitted SequentialPathBoost models (one per anchor node)
|
|
137
|
+
- (Each SequentialPathBoost in `models_list_` exposes the attributes below)
|
|
138
|
+
|
|
139
|
+
### SequentialPathBoost
|
|
140
|
+
|
|
141
|
+
- `fit(X, y, list_anchor_nodes_labels, name_of_label_attribute, eval_set=None)`
|
|
142
|
+
- `predict(X)`
|
|
143
|
+
- `predict_step_by_step(X)`
|
|
144
|
+
- `evaluate(X, y)`
|
|
145
|
+
- `plot_training_and_eval_errors(skip_first_n_iterations=True)`
|
|
146
|
+
- `plot_variable_importance()`
|
|
147
|
+
- **Attributes:**
|
|
148
|
+
- `train_mse_`: Training error (MSE) at each iteration
|
|
149
|
+
- `train_mae_`: Training MAE at each iteration
|
|
150
|
+
- `eval_sets_mse_`: Evaluation set error (MSE) at each iteration (if `eval_set` is provided)
|
|
151
|
+
- `eval_sets_mae_`: Evaluation set MAE at each iteration (if `eval_set` is provided)
|
|
152
|
+
- `variable_importance_`: Variable/path importance scores (if enabled)
|
|
153
|
+
- `paths_selected_by_epb_`: Set of selected paths during boosting
|
|
154
|
+
- `columns_names_`: Names of EBM columns/features used
|
|
155
|
+
- `is_fitted_`: Whether the model is fitted
|
|
156
|
+
|
|
157
|
+
## Requirements
|
|
158
|
+
|
|
159
|
+
- Python 3.10+
|
|
160
|
+
- numpy
|
|
161
|
+
- pandas
|
|
162
|
+
- scikit-learn
|
|
163
|
+
- networkx
|
|
164
|
+
- matplotlib
|
|
165
|
+
|
|
166
|
+
(See `requirements.txt` for the full list.)
|
|
167
|
+
|
|
168
|
+
## Citation
|
|
169
|
+
|
|
170
|
+
If you use this library in your research, please cite the corresponding paper (add citation here).
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
BSD 3-Clause License
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Path Boost
|
|
2
|
+
|
|
3
|
+
Path Boost is a Python library for interpretable machine learning on graph-structured data. It implements the PathBoost and SequentialPathBoost algorithms, which iteratively construct features based on paths in graphs and use boosting to build predictive models. The library is designed for tasks where input data consists of collections of graphs (e.g., molecules, social networks) and supports variable importance analysis for interpretability.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **PathBoost**: Ensemble learning over graph paths, partitioned by anchor nodes.
|
|
8
|
+
- **SequentialPathBoost**: Boosting with path-based features, iteratively expanding the feature space.
|
|
9
|
+
- **Variable Importance**: Quantifies the importance of paths/features in prediction.
|
|
10
|
+
- **Parallel Training**: Supports multi-core training for large datasets.
|
|
11
|
+
- **Evaluation and Visualization**: Built-in tools for error tracking and variable importance plotting.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
Install from PyPI:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install path_boost
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage Example
|
|
22
|
+
|
|
23
|
+
Below is a minimal example using the `PathBoost` model:
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import numpy as np
|
|
27
|
+
import networkx as nx
|
|
28
|
+
from sklearn.model_selection import train_test_split
|
|
29
|
+
from path_boost import PathBoost
|
|
30
|
+
from path_boost.utils.datasets_for_examples.generate_example_dataset import generate_synthetic_graph_dataset
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
|
|
37
|
+
# Generate synthetic dataset
|
|
38
|
+
nx_graphs, y, true_paths, true_weights = generate_synthetic_graph_dataset()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
list_anchor_nodes_labels = [0, 1, 2]
|
|
42
|
+
|
|
43
|
+
parameters_variable_importance: dict = {
|
|
44
|
+
'criterion': 'absolute',
|
|
45
|
+
'error_used': 'mse',
|
|
46
|
+
'use_correlation': False,
|
|
47
|
+
'normalize': True,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
X_train, X_test, y_train, y_test = train_test_split(nx_graphs, y, test_size=0.25, random_state=42)
|
|
51
|
+
|
|
52
|
+
eval_set = [(X_test, y_test)]
|
|
53
|
+
|
|
54
|
+
path_boost = PathBoost(
|
|
55
|
+
n_iter=50, # Reduced for quicker example run
|
|
56
|
+
max_path_length=5,
|
|
57
|
+
learning_rate=0.1,
|
|
58
|
+
n_of_cores=1, # Set to >1 for parallel processing if desired
|
|
59
|
+
verbose=True,
|
|
60
|
+
parameters_variable_importance=parameters_variable_importance
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Fit the model
|
|
64
|
+
# anchor_nodes_label_name must correspond to the feature storing node types ('feature_0')
|
|
65
|
+
path_boost.fit(
|
|
66
|
+
X=X_train,
|
|
67
|
+
y=y_train,
|
|
68
|
+
eval_set=eval_set,
|
|
69
|
+
list_anchor_nodes_labels=list_anchor_nodes_labels,
|
|
70
|
+
anchor_nodes_label_name="feature_0" # Node types are in 'feature_0'
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
print(f"Generated {len(nx_graphs)} graphs.")
|
|
74
|
+
print(f"Example y values: {y[:5]}")
|
|
75
|
+
print(f"True paths definitions: {true_paths}")
|
|
76
|
+
print(f"True path weights: {true_weights}")
|
|
77
|
+
|
|
78
|
+
path_boost.plot_training_and_eval_errors(skip_first_n_iterations=0, plot_eval_sets_error=True)
|
|
79
|
+
if path_boost.parameters_variable_importance is not None and hasattr(path_boost, 'variable_importance_'):
|
|
80
|
+
path_boost.plot_variable_importance(top_n_features=10)
|
|
81
|
+
else:
|
|
82
|
+
print("Variable importance not computed or available.")
|
|
83
|
+
|
|
84
|
+
print("Example run finished.")
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## API Overview
|
|
89
|
+
|
|
90
|
+
### PathBoost
|
|
91
|
+
|
|
92
|
+
- `fit(X, y, anchor_nodes_label_name, list_anchor_nodes_labels, eval_set=None)`
|
|
93
|
+
- `predict(X)`
|
|
94
|
+
- `predict_step_by_step(X)`
|
|
95
|
+
- `evaluate(X, y)`
|
|
96
|
+
- `plot_training_and_eval_errors(skip_first_n_iterations=True)`
|
|
97
|
+
- `plot_variable_importance()`
|
|
98
|
+
- **Attributes:**
|
|
99
|
+
- `train_mse_`: Training error (MSE) at each iteration
|
|
100
|
+
- `mse_eval_set_`: Evaluation set error (MSE) at each iteration (if `eval_set` is provided)
|
|
101
|
+
- `variable_importance_`: Variable/path importance scores (if enabled)
|
|
102
|
+
- `is_fitted_`: Whether the model is fitted
|
|
103
|
+
- `models_list_`: List of fitted SequentialPathBoost models (one per anchor node)
|
|
104
|
+
- (Each SequentialPathBoost in `models_list_` exposes the attributes below)
|
|
105
|
+
|
|
106
|
+
### SequentialPathBoost
|
|
107
|
+
|
|
108
|
+
- `fit(X, y, list_anchor_nodes_labels, name_of_label_attribute, eval_set=None)`
|
|
109
|
+
- `predict(X)`
|
|
110
|
+
- `predict_step_by_step(X)`
|
|
111
|
+
- `evaluate(X, y)`
|
|
112
|
+
- `plot_training_and_eval_errors(skip_first_n_iterations=True)`
|
|
113
|
+
- `plot_variable_importance()`
|
|
114
|
+
- **Attributes:**
|
|
115
|
+
- `train_mse_`: Training error (MSE) at each iteration
|
|
116
|
+
- `train_mae_`: Training MAE at each iteration
|
|
117
|
+
- `eval_sets_mse_`: Evaluation set error (MSE) at each iteration (if `eval_set` is provided)
|
|
118
|
+
- `eval_sets_mae_`: Evaluation set MAE at each iteration (if `eval_set` is provided)
|
|
119
|
+
- `variable_importance_`: Variable/path importance scores (if enabled)
|
|
120
|
+
- `paths_selected_by_epb_`: Set of selected paths during boosting
|
|
121
|
+
- `columns_names_`: Names of EBM columns/features used
|
|
122
|
+
- `is_fitted_`: Whether the model is fitted
|
|
123
|
+
|
|
124
|
+
## Requirements
|
|
125
|
+
|
|
126
|
+
- Python 3.10+
|
|
127
|
+
- numpy
|
|
128
|
+
- pandas
|
|
129
|
+
- scikit-learn
|
|
130
|
+
- networkx
|
|
131
|
+
- matplotlib
|
|
132
|
+
|
|
133
|
+
(See `requirements.txt` for the full list.)
|
|
134
|
+
|
|
135
|
+
## Citation
|
|
136
|
+
|
|
137
|
+
If you use this library in your research, please cite the corresponding paper (add citation here).
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
BSD 3-Clause License
|