pyshapr 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyshapr-0.5.0/LICENSE +2 -0
- pyshapr-0.5.0/LICENSE.md +21 -0
- pyshapr-0.5.0/MANIFEST.in +3 -0
- pyshapr-0.5.0/PKG-INFO +233 -0
- pyshapr-0.5.0/README.md +169 -0
- pyshapr-0.5.0/pyproject.toml +138 -0
- pyshapr-0.5.0/setup.cfg +4 -0
- pyshapr-0.5.0/src/pyshapr/__init__.py +70 -0
- pyshapr-0.5.0/src/pyshapr/_explain.py +680 -0
- pyshapr-0.5.0/src/pyshapr/_rutils.py +58 -0
- pyshapr-0.5.0/src/pyshapr/datasets.py +35 -0
- pyshapr-0.5.0/src/pyshapr/explanation.py +311 -0
- pyshapr-0.5.0/src/pyshapr/utils.py +133 -0
- pyshapr-0.5.0/src/pyshapr.egg-info/PKG-INFO +233 -0
- pyshapr-0.5.0/src/pyshapr.egg-info/SOURCES.txt +16 -0
- pyshapr-0.5.0/src/pyshapr.egg-info/dependency_links.txt +1 -0
- pyshapr-0.5.0/src/pyshapr.egg-info/requires.txt +18 -0
- pyshapr-0.5.0/src/pyshapr.egg-info/top_level.txt +1 -0
pyshapr-0.5.0/LICENSE
ADDED
pyshapr-0.5.0/LICENSE.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2019 Norsk Regnesentral
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pyshapr-0.5.0/PKG-INFO
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyshapr
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: Python wrapper for the R package shapr (via rpy2)
|
|
5
|
+
Author: Martin Jullum, Lars Henry Berge Olsen, Didrik Nielsen
|
|
6
|
+
License: # MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2019 Norsk Regnesentral
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/NorskRegnesentral/shapr
|
|
29
|
+
Project-URL: Documentation, https://norskregnesentral.github.io/shapr/pyshapr.html
|
|
30
|
+
Project-URL: Issues, https://github.com/NorskRegnesentral/shapr/issues
|
|
31
|
+
Project-URL: Changelog, https://github.com/NorskRegnesentral/shapr/blob/master/python/CHANGELOG.md
|
|
32
|
+
Keywords: explainable-ai,shapley-values,machine-learning,model-interpretability
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
39
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
40
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
41
|
+
Classifier: Intended Audience :: Science/Research
|
|
42
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
43
|
+
Requires-Python: >=3.11
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
License-File: LICENSE
|
|
46
|
+
License-File: LICENSE.md
|
|
47
|
+
Requires-Dist: rpy2>=3.5.1
|
|
48
|
+
Requires-Dist: numpy>=1.22.3
|
|
49
|
+
Requires-Dist: pandas>=1.4.2
|
|
50
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
51
|
+
Requires-Dist: tabulate>=0.8.10
|
|
52
|
+
Requires-Dist: shap>=0.40.0
|
|
53
|
+
Requires-Dist: matplotlib>=3.5.0
|
|
54
|
+
Provides-Extra: test
|
|
55
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
56
|
+
Requires-Dist: syrupy>=4.0.0; extra == "test"
|
|
57
|
+
Requires-Dist: xgboost>=1.5.0; extra == "test"
|
|
58
|
+
Provides-Extra: dev
|
|
59
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
60
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
61
|
+
Requires-Dist: syrupy>=4.0.0; extra == "dev"
|
|
62
|
+
Requires-Dist: xgboost>=1.5.0; extra == "dev"
|
|
63
|
+
Dynamic: license-file
|
|
64
|
+
|
|
65
|
+
# pyshapr
|
|
66
|
+
|
|
67
|
+
`pyshapr` is a Python wrapper for the R package [shapr](https://github.com/NorskRegnesentral/shapr),
|
|
68
|
+
using the [`rpy2`](https://rpy2.github.io/) Python library to access R from within Python.
|
|
69
|
+
|
|
70
|
+
> **Renamed:** This package was previously published as `shaprpy`. It has been renamed to `pyshapr`.
|
|
71
|
+
> The old `shaprpy` package remains available on PyPI for a transition period and simply forwards to
|
|
72
|
+
> `pyshapr`. Please switch to `pip install pyshapr` and `import pyshapr`.
|
|
73
|
+
|
|
74
|
+
> **Note:** This wrapper is **not** as comprehensively tested as the R package.
|
|
75
|
+
> `rpy2` has limited support on Windows, and the same therefore applies to `pyshapr`.
|
|
76
|
+
> `pyshapr` has only been tested on Linux (and WSL - Windows Subsystem for Linux), and the below instructions assume a Linux environment.
|
|
77
|
+
>
|
|
78
|
+
> **Requirement:** Python 3.11 or later is required to use `pyshapr`.
|
|
79
|
+
|
|
80
|
+
## Changelog
|
|
81
|
+
|
|
82
|
+
For a list of changes and updates to the `pyshapr` package, see the [pyshapr CHANGELOG](https://norskregnesentral.github.io/shapr/py_changelog.html).
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Installation
|
|
87
|
+
|
|
88
|
+
These instructions assume you already have **pip** and **R** installed and available to the Python environment in which you want to run `pyshapr`.
|
|
89
|
+
|
|
90
|
+
- Official instructions for installing `pip` can be found [here](https://pip.pypa.io/en/stable/installation/).
|
|
91
|
+
- Official instructions for installing R can be found [here](https://cran.r-project.org/).
|
|
92
|
+
|
|
93
|
+
On Debian/Ubuntu-based systems, R can also be installed via:
|
|
94
|
+
```bash
|
|
95
|
+
sudo apt update
|
|
96
|
+
sudo apt install r-base r-base-dev -y
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### 1. Install the R package `shapr`
|
|
100
|
+
|
|
101
|
+
`pyshapr` requires the R package `shapr` (version 1.0.5 or newer).
|
|
102
|
+
In your R environment, install the latest version from CRAN using:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
Rscript -e 'install.packages("shapr", repos="https://cran.rstudio.com")'
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### 2. Ensure R is discoverable (R_HOME and PATH)
|
|
109
|
+
|
|
110
|
+
Sometimes `rpy2` (which `pyshapr` relies on) cannot automatically locate your R installation. To ensure proper detection, verify that:
|
|
111
|
+
|
|
112
|
+
- R is available in your system `PATH`, **or**
|
|
113
|
+
- The `R_HOME` environment variable is set to your R installation directory.
|
|
114
|
+
|
|
115
|
+
Example:
|
|
116
|
+
```bash
|
|
117
|
+
export R_HOME=$(R RHOME)
|
|
118
|
+
export PATH=$PATH:$(R RHOME)/bin
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### 3. Install the Python wrapper
|
|
122
|
+
|
|
123
|
+
Install directly from PyPI with:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
pip install pyshapr
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
#### Local development install (for contributors)
|
|
130
|
+
If you have cloned the repository and want to install in development mode for local changes, navigate to the `./python` directory and run:
|
|
131
|
+
```bash
|
|
132
|
+
pip install -e .
|
|
133
|
+
```
|
|
134
|
+
The `-e` flag installs in editable mode, allowing local code changes to be reflected immediately.
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Quick Demo
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
142
|
+
from pyshapr import explain
|
|
143
|
+
from pyshapr.datasets import load_california_housing
|
|
144
|
+
|
|
145
|
+
# Load example data
|
|
146
|
+
dfx_train, dfx_explain, dfy_train, dfy_explain = load_california_housing()
|
|
147
|
+
|
|
148
|
+
# Fit a model
|
|
149
|
+
model = RandomForestRegressor()
|
|
150
|
+
model.fit(dfx_train, dfy_train.values.flatten())
|
|
151
|
+
|
|
152
|
+
# Explain predictions
|
|
153
|
+
explanation = explain(
|
|
154
|
+
model=model,
|
|
155
|
+
x_train=dfx_train,
|
|
156
|
+
x_explain=dfx_explain,
|
|
157
|
+
approach="gaussian",
|
|
158
|
+
phi0=dfy_train.mean().item(),
|
|
159
|
+
seed=1
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
explanation.print() # Print the Shapley values
|
|
163
|
+
|
|
164
|
+
# Get a summary object with computation details
|
|
165
|
+
summary = explanation.summary()
|
|
166
|
+
print(summary) # Displays a formatted summary (also available directly via explanation.summary())
|
|
167
|
+
|
|
168
|
+
# Access specific summary attributes (available with tab-completion in Jupyter)
|
|
169
|
+
summary['approach'] # Approach used
|
|
170
|
+
summary['timing_summary']['total_time_secs'] # Total computation time
|
|
171
|
+
|
|
172
|
+
# Extract one or more specific result objects directly
|
|
173
|
+
explanation.get_results("proglang") # Programming language used (Python/R)
|
|
174
|
+
explanation.get_results("approach") # Approach used
|
|
175
|
+
explanation.get_results().keys() # All available result objects
|
|
176
|
+
|
|
177
|
+
# Plotting (requires the 'shap' library)
|
|
178
|
+
# Convert to a SHAP Explanation object
|
|
179
|
+
shap_exp = explanation.to_shap()
|
|
180
|
+
|
|
181
|
+
import shap
|
|
182
|
+
shap.plots.waterfall(shap_exp[0]) # Plot the first observation
|
|
183
|
+
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Supported Models
|
|
189
|
+
|
|
190
|
+
`pyshapr` can explain predictions from models built with:
|
|
191
|
+
|
|
192
|
+
- [`scikit-learn`](https://scikit-learn.org/)
|
|
193
|
+
- [`keras`](https://keras.io/) (Sequential API)
|
|
194
|
+
- [`xgboost`](https://xgboost.readthedocs.io/)
|
|
195
|
+
|
|
196
|
+
For other model types, you can supply:
|
|
197
|
+
|
|
198
|
+
- A custom `predict_model` function
|
|
199
|
+
- (Optionally) a custom `get_model_specs` function
|
|
200
|
+
to `pyshapr.explain`.
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Supported Approaches
|
|
205
|
+
|
|
206
|
+
`pyshapr` forwards all approach-specific arguments to `shapr::explain()`. Commonly used approaches include:
|
|
207
|
+
|
|
208
|
+
- `"arf"`, `"categorical"`, `"copula"`, `"ctree"`, `"empirical"`, `"gaussian"`,
|
|
209
|
+
`"regression_separate"`, `"regression_surrogate"`, `"vaeac"`
|
|
210
|
+
- `"independence"` (not recommended)
|
|
211
|
+
|
|
212
|
+
`"arf"`, `"ctree"`, `"regression_separate"`, `"regression_surrogate"` and `"vaeac"` support mixed
|
|
213
|
+
numerical/categorical feature sets, `"categorical"` supports categorical features only,
|
|
214
|
+
while `"copula"`, `"empirical"`, `"gaussian"` and `"independence"` support numerical features only.
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## Examples
|
|
219
|
+
|
|
220
|
+
See the [examples folder](https://github.com/NorskRegnesentral/shapr/tree/master/python/examples) on GitHub for runnable examples, including:
|
|
221
|
+
|
|
222
|
+
- Basic usage with `scikit-learn` models
|
|
223
|
+
- Usage with `xgboost` models
|
|
224
|
+
- Usage with `keras` models
|
|
225
|
+
- A custom PyTorch model
|
|
226
|
+
- Usage of the `Shapr` class and associated `ShaprSummary` class for exploration and extraction of explanation results.
|
|
227
|
+
- Plotting functionality for the Shapley values through the `shap` package
|
|
228
|
+
- ARF and VAEAC examples for both numerical and mixed categorical feature sets
|
|
229
|
+
- The **regression paradigm** described in [Olsen et al. (2024)](https://link.springer.com/article/10.1007/s10618-024-01016-z),
|
|
230
|
+
which shows:
|
|
231
|
+
- How to specify the regression model
|
|
232
|
+
- How to enable automatic cross-validation of hyperparameters
|
|
233
|
+
- How to apply pre-processing steps before fitting regression models
|
pyshapr-0.5.0/README.md
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# pyshapr
|
|
2
|
+
|
|
3
|
+
`pyshapr` is a Python wrapper for the R package [shapr](https://github.com/NorskRegnesentral/shapr),
|
|
4
|
+
using the [`rpy2`](https://rpy2.github.io/) Python library to access R from within Python.
|
|
5
|
+
|
|
6
|
+
> **Renamed:** This package was previously published as `shaprpy`. It has been renamed to `pyshapr`.
|
|
7
|
+
> The old `shaprpy` package remains available on PyPI for a transition period and simply forwards to
|
|
8
|
+
> `pyshapr`. Please switch to `pip install pyshapr` and `import pyshapr`.
|
|
9
|
+
|
|
10
|
+
> **Note:** This wrapper is **not** as comprehensively tested as the R package.
|
|
11
|
+
> `rpy2` has limited support on Windows, and the same therefore applies to `pyshapr`.
|
|
12
|
+
> `pyshapr` has only been tested on Linux (and WSL - Windows Subsystem for Linux), and the below instructions assume a Linux environment.
|
|
13
|
+
>
|
|
14
|
+
> **Requirement:** Python 3.11 or later is required to use `pyshapr`.
|
|
15
|
+
|
|
16
|
+
## Changelog
|
|
17
|
+
|
|
18
|
+
For a list of changes and updates to the `pyshapr` package, see the [pyshapr CHANGELOG](https://norskregnesentral.github.io/shapr/py_changelog.html).
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
These instructions assume you already have **pip** and **R** installed and available to the Python environment in which you want to run `pyshapr`.
|
|
25
|
+
|
|
26
|
+
- Official instructions for installing `pip` can be found [here](https://pip.pypa.io/en/stable/installation/).
|
|
27
|
+
- Official instructions for installing R can be found [here](https://cran.r-project.org/).
|
|
28
|
+
|
|
29
|
+
On Debian/Ubuntu-based systems, R can also be installed via:
|
|
30
|
+
```bash
|
|
31
|
+
sudo apt update
|
|
32
|
+
sudo apt install r-base r-base-dev -y
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### 1. Install the R package `shapr`
|
|
36
|
+
|
|
37
|
+
`pyshapr` requires the R package `shapr` (version 1.0.5 or newer).
|
|
38
|
+
In your R environment, install the latest version from CRAN using:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
Rscript -e 'install.packages("shapr", repos="https://cran.rstudio.com")'
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 2. Ensure R is discoverable (R_HOME and PATH)
|
|
45
|
+
|
|
46
|
+
Sometimes `rpy2` (which `pyshapr` relies on) cannot automatically locate your R installation. To ensure proper detection, verify that:
|
|
47
|
+
|
|
48
|
+
- R is available in your system `PATH`, **or**
|
|
49
|
+
- The `R_HOME` environment variable is set to your R installation directory.
|
|
50
|
+
|
|
51
|
+
Example:
|
|
52
|
+
```bash
|
|
53
|
+
export R_HOME=$(R RHOME)
|
|
54
|
+
export PATH=$PATH:$(R RHOME)/bin
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### 3. Install the Python wrapper
|
|
58
|
+
|
|
59
|
+
Install directly from PyPI with:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install pyshapr
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
#### Local development install (for contributors)
|
|
66
|
+
If you have cloned the repository and want to install in development mode for local changes, navigate to the `./python` directory and run:
|
|
67
|
+
```bash
|
|
68
|
+
pip install -e .
|
|
69
|
+
```
|
|
70
|
+
The `-e` flag installs in editable mode, allowing local code changes to be reflected immediately.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Quick Demo
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
78
|
+
from pyshapr import explain
|
|
79
|
+
from pyshapr.datasets import load_california_housing
|
|
80
|
+
|
|
81
|
+
# Load example data
|
|
82
|
+
dfx_train, dfx_explain, dfy_train, dfy_explain = load_california_housing()
|
|
83
|
+
|
|
84
|
+
# Fit a model
|
|
85
|
+
model = RandomForestRegressor()
|
|
86
|
+
model.fit(dfx_train, dfy_train.values.flatten())
|
|
87
|
+
|
|
88
|
+
# Explain predictions
|
|
89
|
+
explanation = explain(
|
|
90
|
+
model=model,
|
|
91
|
+
x_train=dfx_train,
|
|
92
|
+
x_explain=dfx_explain,
|
|
93
|
+
approach="gaussian",
|
|
94
|
+
phi0=dfy_train.mean().item(),
|
|
95
|
+
seed=1
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
explanation.print() # Print the Shapley values
|
|
99
|
+
|
|
100
|
+
# Get a summary object with computation details
|
|
101
|
+
summary = explanation.summary()
|
|
102
|
+
print(summary) # Displays a formatted summary (also available directly via explanation.summary())
|
|
103
|
+
|
|
104
|
+
# Access specific summary attributes (available with tab-completion in Jupyter)
|
|
105
|
+
summary['approach'] # Approach used
|
|
106
|
+
summary['timing_summary']['total_time_secs'] # Total computation time
|
|
107
|
+
|
|
108
|
+
# Extract one or more specific result objects directly
|
|
109
|
+
explanation.get_results("proglang") # Programming language used (Python/R)
|
|
110
|
+
explanation.get_results("approach") # Approach used
|
|
111
|
+
explanation.get_results().keys() # All available result objects
|
|
112
|
+
|
|
113
|
+
# Plotting (requires the 'shap' library)
|
|
114
|
+
# Convert to a SHAP Explanation object
|
|
115
|
+
shap_exp = explanation.to_shap()
|
|
116
|
+
|
|
117
|
+
import shap
|
|
118
|
+
shap.plots.waterfall(shap_exp[0]) # Plot the first observation
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Supported Models
|
|
125
|
+
|
|
126
|
+
`pyshapr` can explain predictions from models built with:
|
|
127
|
+
|
|
128
|
+
- [`scikit-learn`](https://scikit-learn.org/)
|
|
129
|
+
- [`keras`](https://keras.io/) (Sequential API)
|
|
130
|
+
- [`xgboost`](https://xgboost.readthedocs.io/)
|
|
131
|
+
|
|
132
|
+
For other model types, you can supply:
|
|
133
|
+
|
|
134
|
+
- A custom `predict_model` function
|
|
135
|
+
- (Optionally) a custom `get_model_specs` function
|
|
136
|
+
to `pyshapr.explain`.
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Supported Approaches
|
|
141
|
+
|
|
142
|
+
`pyshapr` forwards all approach-specific arguments to `shapr::explain()`. Commonly used approaches include:
|
|
143
|
+
|
|
144
|
+
- `"arf"`, `"categorical"`, `"copula"`, `"ctree"`, `"empirical"`, `"gaussian"`,
|
|
145
|
+
`"regression_separate"`, `"regression_surrogate"`, `"vaeac"`
|
|
146
|
+
- `"independence"` (not recommended)
|
|
147
|
+
|
|
148
|
+
`"arf"`, `"ctree"`, `"regression_separate"`, `"regression_surrogate"` and `"vaeac"` support mixed
|
|
149
|
+
numerical/categorical feature sets, `"categorical"` supports categorical features only,
|
|
150
|
+
while `"copula"`, `"empirical"`, `"gaussian"` and `"independence"` support numerical features only.
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Examples
|
|
155
|
+
|
|
156
|
+
See the [examples folder](https://github.com/NorskRegnesentral/shapr/tree/master/python/examples) on GitHub for runnable examples, including:
|
|
157
|
+
|
|
158
|
+
- Basic usage with `scikit-learn` models
|
|
159
|
+
- Usage with `xgboost` models
|
|
160
|
+
- Usage with `keras` models
|
|
161
|
+
- A custom PyTorch model
|
|
162
|
+
- Usage of the `Shapr` class and associated `ShaprSummary` class for exploration and extraction of explanation results.
|
|
163
|
+
- Plotting functionality for the Shapley values through the `shap` package
|
|
164
|
+
- ARF and VAEAC examples for both numerical and mixed categorical feature sets
|
|
165
|
+
- The **regression paradigm** described in [Olsen et al. (2024)](https://link.springer.com/article/10.1007/s10618-024-01016-z),
|
|
166
|
+
which shows:
|
|
167
|
+
- How to specify the regression model
|
|
168
|
+
- How to enable automatic cross-validation of hyperparameters
|
|
169
|
+
- How to apply pre-processing steps before fitting regression models
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pyshapr"
|
|
7
|
+
version = "0.5.0"
|
|
8
|
+
description = "Python wrapper for the R package shapr (via rpy2)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE.md" }
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Martin Jullum" },
|
|
13
|
+
{ name = "Lars Henry Berge Olsen" },
|
|
14
|
+
{ name = "Didrik Nielsen" }
|
|
15
|
+
]
|
|
16
|
+
requires-python = ">=3.11"
|
|
17
|
+
keywords = ["explainable-ai", "shapley-values", "machine-learning", "model-interpretability"]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Programming Language :: Python :: 3.13",
|
|
24
|
+
"Programming Language :: Python :: 3.14",
|
|
25
|
+
"License :: OSI Approved :: MIT License",
|
|
26
|
+
"Operating System :: POSIX :: Linux",
|
|
27
|
+
"Intended Audience :: Science/Research",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence"
|
|
29
|
+
]
|
|
30
|
+
dependencies = [
|
|
31
|
+
"rpy2>=3.5.1",
|
|
32
|
+
"numpy>=1.22.3",
|
|
33
|
+
"pandas>=1.4.2",
|
|
34
|
+
"scikit-learn>=1.0.0",
|
|
35
|
+
"tabulate>=0.8.10",
|
|
36
|
+
"shap>=0.40.0",
|
|
37
|
+
"matplotlib>=3.5.0",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
test = [
|
|
42
|
+
"pytest>=7.0.0",
|
|
43
|
+
"syrupy>=4.0.0",
|
|
44
|
+
"xgboost>=1.5.0"
|
|
45
|
+
]
|
|
46
|
+
dev = [
|
|
47
|
+
"ruff>=0.1.0",
|
|
48
|
+
"pytest>=7.0.0",
|
|
49
|
+
"syrupy>=4.0.0",
|
|
50
|
+
"xgboost>=1.5.0"
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[project.urls]
|
|
54
|
+
Homepage = "https://github.com/NorskRegnesentral/shapr"
|
|
55
|
+
Documentation = "https://norskregnesentral.github.io/shapr/pyshapr.html"
|
|
56
|
+
Issues = "https://github.com/NorskRegnesentral/shapr/issues"
|
|
57
|
+
Changelog = "https://github.com/NorskRegnesentral/shapr/blob/master/python/CHANGELOG.md"
|
|
58
|
+
|
|
59
|
+
[tool.setuptools]
|
|
60
|
+
package-dir = { "" = "src" }
|
|
61
|
+
|
|
62
|
+
[tool.setuptools.packages.find]
|
|
63
|
+
where = ["src"]
|
|
64
|
+
include = ["pyshapr*"]
|
|
65
|
+
|
|
66
|
+
[tool.setuptools.dynamic]
|
|
67
|
+
# (none needed now)
|
|
68
|
+
|
|
69
|
+
[tool.pytest.ini_options]
|
|
70
|
+
testpaths = ["tests"]
|
|
71
|
+
python_files = ["test_*.py"]
|
|
72
|
+
python_classes = ["Test*"]
|
|
73
|
+
python_functions = ["test_*"]
|
|
74
|
+
addopts = [
|
|
75
|
+
"--strict-markers",
|
|
76
|
+
"--strict-config",
|
|
77
|
+
"-ra",
|
|
78
|
+
]
|
|
79
|
+
markers = [
|
|
80
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
81
|
+
"localonly: marks tests that require local dependencies like vaeac (deselect with '-m \"not localonly\"')",
|
|
82
|
+
"snapshot: marks tests as snapshot/output tests",
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
[tool.ruff]
|
|
86
|
+
line-length = 100
|
|
87
|
+
target-version = "py311"
|
|
88
|
+
exclude = [
|
|
89
|
+
".git",
|
|
90
|
+
"__pycache__",
|
|
91
|
+
"dist",
|
|
92
|
+
"build",
|
|
93
|
+
"*.egg-info",
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
[tool.ruff.lint]
|
|
97
|
+
select = [
|
|
98
|
+
"E", # pycodestyle errors
|
|
99
|
+
"W", # pycodestyle warnings
|
|
100
|
+
"F", # pyflakes
|
|
101
|
+
"I", # isort
|
|
102
|
+
"N", # pep8-naming
|
|
103
|
+
"UP", # pyupgrade
|
|
104
|
+
"B", # flake8-bugbear
|
|
105
|
+
"C4", # flake8-comprehensions
|
|
106
|
+
"SIM", # flake8-simplify
|
|
107
|
+
"RUF", # Ruff-specific rules
|
|
108
|
+
]
|
|
109
|
+
ignore = [
|
|
110
|
+
"E501", # Line too long (handled by formatter)
|
|
111
|
+
"B008", # Do not perform function calls in argument defaults
|
|
112
|
+
"B905", # zip() without an explicit strict= parameter
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
[tool.ruff.lint.per-file-ignores]
|
|
116
|
+
"src/pyshapr/_explain.py" = [
|
|
117
|
+
"N802", # Function name should be lowercase (mirrors R API names, e.g. compute_vS)
|
|
118
|
+
"N803", # Argument name should be lowercase (mirrors R API names, e.g. n_MC_samples)
|
|
119
|
+
"N806", # Variable should be lowercase (mirrors R objects, e.g. S_batch, vS_list, MSEv)
|
|
120
|
+
]
|
|
121
|
+
"tests/**/*.py" = [
|
|
122
|
+
"N802", # Function name should be lowercase (test names can be descriptive)
|
|
123
|
+
"N803", # Argument name should be lowercase (fixtures mirror R-style names)
|
|
124
|
+
"N806", # Variable in function should be lowercase
|
|
125
|
+
"RUF059", # Unused unpacked variable (destructuring shared fixtures is intentional)
|
|
126
|
+
]
|
|
127
|
+
"examples/**/*.py" = [
|
|
128
|
+
"B007", # Unused loop control variable (demo loops)
|
|
129
|
+
"B018", # Useless expression (examples show attribute access for illustration)
|
|
130
|
+
"E402", # Module import not at top of file (narrative ordering in example scripts)
|
|
131
|
+
"N812", # Lowercase imported as non-lowercase (e.g. torch.nn.functional as F)
|
|
132
|
+
"N816", # Mixed-case variable in global scope (illustrative example variables)
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
[tool.ruff.format]
|
|
136
|
+
quote-style = "double"
|
|
137
|
+
indent-style = "space"
|
|
138
|
+
line-ending = "auto"
|
pyshapr-0.5.0/setup.cfg
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import contextlib
|
|
2
|
+
from importlib import import_module
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
# Lightweight public re-export (no R dependency)
|
|
6
|
+
from . import datasets
|
|
7
|
+
from ._rutils import get_package_lib_loc
|
|
8
|
+
|
|
9
|
+
__all__ = ["Shapr", "datasets", "ensure_r_ready", "explain"]
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
__version__ = version("pyshapr")
|
|
13
|
+
except PackageNotFoundError:
|
|
14
|
+
__version__ = "0.0.0+local"
|
|
15
|
+
|
|
16
|
+
_r_ready = False
|
|
17
|
+
_explain_impl = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def ensure_r_ready() -> bool:
|
|
21
|
+
"""Ensure rpy2 and the R package 'shapr' are available, then bind the real explain() (idempotent)."""
|
|
22
|
+
global _r_ready, _explain_impl
|
|
23
|
+
if _r_ready:
|
|
24
|
+
return True
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import rpy2.robjects as _ro
|
|
28
|
+
from rpy2.robjects.packages import importr
|
|
29
|
+
except Exception as e:
|
|
30
|
+
raise ImportError(
|
|
31
|
+
"pyshapr requires rpy2 and a working R installation.\n"
|
|
32
|
+
"Install R and rpy2, and ensure R is on PATH/R_HOME. See README."
|
|
33
|
+
) from e
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
lib_loc = get_package_lib_loc(_ro, "shapr")
|
|
37
|
+
if lib_loc:
|
|
38
|
+
importr("shapr", lib_loc=lib_loc)
|
|
39
|
+
else:
|
|
40
|
+
importr("shapr")
|
|
41
|
+
except Exception as e:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
"The R package 'shapr' is not installed or not found.\n"
|
|
44
|
+
"In an R session, run: install.packages('shapr')"
|
|
45
|
+
) from e
|
|
46
|
+
|
|
47
|
+
# Import the implementation from a private module to avoid name collision
|
|
48
|
+
_explain_mod = import_module(__name__ + "._explain")
|
|
49
|
+
_explain_impl = _explain_mod.explain
|
|
50
|
+
_r_ready = True
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def explain(*args, **kwargs):
|
|
55
|
+
"""Lazily initialize R/shapr then call the real explain()."""
|
|
56
|
+
ensure_r_ready()
|
|
57
|
+
return _explain_impl(*args, **kwargs)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# Import the Shapr class (lazy import to avoid R dependency issues)
|
|
61
|
+
def _import_shapr():
|
|
62
|
+
from .explanation import Shapr
|
|
63
|
+
|
|
64
|
+
return Shapr
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# Make Shapr available when the module is imported
|
|
68
|
+
Shapr = None
|
|
69
|
+
with contextlib.suppress(ImportError):
|
|
70
|
+
Shapr = _import_shapr()
|