irapy 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
irapy/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """
2
+ Impact Range Assessment (IRA)
3
+
4
+ A model-based sensitivity interpretability measure for regression modelling.
5
+ """
6
+
7
+ from .irapy import single_ira, repeated_ira
8
+
9
+ __all__ = ["single_ira", "repeated_ira"]
10
+ __version__ = "0.1.0"
irapy/irapy.py ADDED
@@ -0,0 +1,178 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from joblib import Parallel, delayed
4
+
5
+
6
+ def single_ira(input_data, model, scaler=None, num_interpol=100, num_background_samples=200, random_state=42, sorted_output=False):
7
+ """
8
+ Single-execution IRA
9
+
10
+ Parameters:
11
+ - input_data: predictor pandas DataFrame (i.e., the training predictor matrix, often denoted as X_train). When using a scaler, this must be the original predictors used to fit the scaler. All predictors must be continuous numeric.
12
+ - model: A model object with a .predict() method, trained on data with the same structure as input_data.
13
+ Alternatively, a callable function can be used, such as:
14
+ def func(a, b):
15
+ y = 3 * a + 1 * b
16
+ return y
17
+ - scaler: Optional fitted scaler (e.g., StandardScaler or MinMaxScaler). Must be trained on a DataFrame with the same structure as input_data. If None, no scaling is applied.
18
+ - num_interpol: Number of interpolation points between min and max of the focus predictor (default: 100).
19
+ - num_background_samples: Number of background observations randomly drawn from the dataset. (default: 200).
20
+ - random_state: Seed for reproducibility when method is 'random'. Integer or None (default: 42).
21
+ - sorted_output: Whether to sort the result by IRA values in descending order (default: False).
22
+
23
+ Returns:
24
+ - A DataFrame with predictor names and their corresponding Impact Range Assessment (IRA).
25
+ """
26
+
27
+ if not isinstance(num_interpol, int) or num_interpol < 2:
28
+ raise ValueError("num_interpol must be an integer ≥ 2.")
29
+
30
+ if not isinstance(num_background_samples, int) or num_background_samples <= 0:
31
+ raise ValueError("num_background_samples must be a positive integer.")
32
+
33
+ if random_state is not None and not isinstance(random_state, int):
34
+ raise ValueError("random_state must be an integer or None.")
35
+
36
+ # Calculate descriptive statistics for training input data
37
+ predictor_summary = input_data.describe()
38
+
39
+ # Create an empty list to store results
40
+ output_range_list = []
41
+
42
+ # Iterate all predictors
43
+ for focus_predictor in predictor_summary.columns:
44
+
45
+ if predictor_summary.loc['min', focus_predictor] == predictor_summary.loc['max', focus_predictor]:
46
+ output_range_list.append([focus_predictor, 0.0])
47
+ continue
48
+
49
+ else:
50
+
51
+ # Create a number of values for the focus predictor between its minimum and maximum
52
+ created_predictor_values = np.linspace(
53
+ predictor_summary.loc['min', focus_predictor],
54
+ predictor_summary.loc['max', focus_predictor],
55
+ num_interpol)
56
+
57
+ # random sampling with seed or without seed
58
+ if isinstance(random_state, int):
59
+ background_samples = input_data.sample(n=num_background_samples, replace=True, random_state=random_state)
60
+ elif random_state is None:
61
+ background_samples = input_data.sample(n=num_background_samples, replace=True)
62
+ else:
63
+ raise ValueError("random_state must be an integer or None.")
64
+
65
+ # Reset index for the sampled data
66
+ background_samples = background_samples.reset_index(drop=True)
67
+
68
+ # Add one index for grouping after making prediction
69
+ background_samples['Index'] = np.arange(num_background_samples)
70
+
71
+ # Create replicates for each observation in the sampled data
72
+ # Number of replicates is equal to the number of interpolating points for focus predictors
73
+ background_samples_replicates = background_samples.loc[
74
+ background_samples.index.repeat(num_interpol)].reset_index(drop=True)
75
+
76
+ # Create dataset including non-focus predictors and the focus predictor
77
+ observations = background_samples_replicates.assign(
78
+ **{focus_predictor: np.tile(created_predictor_values, num_background_samples)})
79
+
80
+ # Excluding the added "index" column for prediction in the next step
81
+ x_input = observations.drop(columns=['Index'])
82
+
83
+ # Use the scaler and model to predict the output, which is an array (num_interpol,)
84
+ if hasattr(model, 'predict'):
85
+ if scaler is not None:
86
+ observations['Prediction'] = model.predict(pd.DataFrame(scaler.transform(x_input),
87
+ columns=x_input.columns))
88
+ else:
89
+ observations['Prediction'] = model.predict(x_input)
90
+ elif callable(model):
91
+ observations['Prediction'] = [model(*row) for row in x_input.itertuples(index=False, name=None)]
92
+ else:
93
+ raise TypeError("Unsupported model type.")
94
+
95
+ # Find the maximum and minimum for each sampled observation
96
+ grouped = observations.loc[:, ['Index', 'Prediction']].copy().groupby('Index')['Prediction']
97
+ max_per_group = grouped.max()
98
+ min_per_group = grouped.min()
99
+
100
+ # Calculate the IRA values and then average them
101
+ ira_values = (max_per_group - min_per_group).mean()
102
+
103
+ # Save the result for the predictor
104
+ output_range_list.append([focus_predictor, ira_values])
105
+
106
+ result = pd.DataFrame(output_range_list, columns=['predictor', 'IRA value'])
107
+
108
+ if sorted_output:
109
+ # sort the result in ascending order based on IRA values.
110
+ result = result.sort_values(by='IRA value', ascending=False).reset_index(drop=True)
111
+
112
+ # Return to the final result dataframe
113
+ return result
114
+
115
+
116
+ def repeated_ira(input_data, model, scaler=None, num_interpol=100, num_background_samples=200, random_state=42,
117
+ n_repeats=50, n_jobs=1, sorted_output=False):
118
+ """
119
+ Repeated IRA with confidence intervals
120
+
121
+ Parameters:
122
+ - input_data: predictor pandas DataFrame (i.e., the training predictor matrix, often denoted as X_train). When using a scaler, this must be the original predictors used to fit the scaler.
123
+ - model: A model object with a .predict() method, trained on data with the same structure as input_data.
124
+ Alternatively, a callable function can be used, such as:
125
+ def func(a, b):
126
+ y = 3 * a + 1 * b
127
+ return y
128
+ - scaler: Optional fitted scaler (e.g., StandardScaler or MinMaxScaler). Must be trained on a DataFrame with the same structure as input_data. If None, no scaling is applied.
129
+ - num_interpol: Number of interpolation points between min and max of the focus predictor (default: 100).
130
+ - num_background_samples: Number of background observations randomly drawn from the dataset (default: 200).
131
+ - random_state: Seed for reproducibility (default: 42).
132
+ - n_repeats: Number of repeated times (default: 50).
133
+ - n_jobs: Number of parallel jobs to run (default: 1).
134
+ - sorted_output: If True, sorts the output by mean IRA values in descending order (default: False).
135
+
136
+ Returns:
137
+ - DataFrame with predictor names, mean IRA, and 95% CI (lower, upper).
138
+ """
139
+
140
+ if not isinstance(n_repeats, int) or n_repeats <= 0:
141
+ raise ValueError("n_repeats must be a positive integer.")
142
+
143
+ if not isinstance(n_jobs, int) or n_jobs == 0:
144
+ raise ValueError("n_jobs must be a non-zero integer.")
145
+
146
+ # Create a list of seeds for reproducible random sampling per iteration
147
+ seeds = [int(s) for s in np.random.RandomState(random_state).randint(0, 1_000_000, size=n_repeats)]
148
+
149
+ # Run IRA in parallel using joblib
150
+ results = Parallel(n_jobs=n_jobs)(
151
+ delayed(single_ira)(
152
+ input_data=input_data,
153
+ model=model,
154
+ scaler=scaler,
155
+ num_interpol=num_interpol,
156
+ num_background_samples=num_background_samples,
157
+ random_state=seed,
158
+ sorted_output=False
159
+ ) for seed in seeds
160
+ )
161
+
162
+ # Extract IRA values and calculate CI
163
+ ira_array = np.array([r['IRA value'].values for r in results])
164
+ mean = ira_array.mean(axis=0)
165
+ ci_lower = np.percentile(ira_array, 2.5, axis=0)
166
+ ci_upper = np.percentile(ira_array, 97.5, axis=0)
167
+
168
+ result = pd.DataFrame({
169
+ 'predictor': input_data.columns,
170
+ 'mean': mean,
171
+ 'ci_lower': ci_lower,
172
+ 'ci_upper': ci_upper
173
+ })
174
+
175
+ if sorted_output:
176
+ result = result.sort_values(by='mean', ascending=False).reset_index(drop=True)
177
+
178
+ return result
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: irapy
3
+ Version: 0.1.0
4
+ Summary: Impact Range Assessment (IRA): An Interpretable Sensitivity Measure for Regression Modeling
5
+ Author: Jihao You, Dan Tulpan, Jiaojiao Diao, Jennifer L. Ellis
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/jyou2025/impact-range-assessment
8
+ Keywords: regression,model interpretability,sensitivity analysis,machine learning,explainable ai
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: numpy<2.3,>=1.26
22
+ Requires-Dist: pandas<3.0,>=2.0
23
+ Requires-Dist: joblib>=1.2.0
24
+ Dynamic: license-file
25
+
26
+ # irapy
27
+
28
+ **irapy** is a Python library for implementing single-execution Impact Range Assessment (IRA) and repeated IRA analyses.
29
+
30
+ ## Installation
31
+ ```bash
32
+ pip install irapy
33
+ ```
34
+
35
+ ## Requirements
36
+ ### irapy Requirements
37
+ irapy depends on the following Python libraries:
38
+ - NumPy
39
+ - pandas
40
+ - joblib
41
+
42
+ These dependencies are installed automatically when using pip install irapy.
43
+
44
+ **irapy was developed using Python 3.8 (NumPy 1.24, pandas 1.5) and has been tested on Python environments including Python 3.9 (NumPy 1.26, pandas 2.0) and Python 3.12 (NumPy 2.2, pandas 2.3). Compatibility issues may occur when NumPy and pandas are installed in binary-incompatible combinations.**
45
+
46
+ ## Usage
47
+ ### Run irapy
48
+ You can apply either a single-execution or a repeated IRA to a trained regression model using the corresponding dataset.
49
+ ```python
50
+ from irapy import single_ira, repeated_ira
51
+
52
+ # single-execution IRA
53
+ single_ira_result = single_ira(input_data=X, model=trained_model)
54
+ print(single_ira_result)
55
+
56
+ # repeated IRA
57
+ repeated_ira_result = repeated_ira(input_data=X, model=trained_model, n_repeats=50)
58
+ print(repeated_ira_result)
59
+ ```
60
+ ### Arguments
61
+ - `input_data`: predictor pandas DataFrame (i.e., the training predictor matrix, often denoted as X_train; **use the original, unscaled predictors when a scaler is applied**); all predictors must be continuous numeric variables.
62
+ - `model`: trained model (object with '.predict()', tested with 'scikit-learn' models) or a callable function
63
+ - `scaler`: fitted scaler (**optional**, e.g., 'StandardScaler' / 'MinMaxScaler' in Python)
64
+ - `num_interpol`: number of interpolated points (default: 100)
65
+ - `num_background_samples`: number of background observations (default: 200)
66
+ - `random_state`: seed for reproducibility (default: 42)
67
+ - `sorted_output`: whether to sort results by IRA value (default: False)
68
+ - `n_repeats` (repeated IRA): number of repeated times (default: 50)
69
+ - `n_jobs` (repeated IRA): number of parallel jobs to run (default: 1)
70
+
71
+ ## Project Homepage
72
+ https://github.com/jyou2025/impact-range-assessment
@@ -0,0 +1,7 @@
1
+ irapy/__init__.py,sha256=H08_qALsjIuCI-l-r9jMBK7pDVvIKJj8kaBbSujeiSU,225
2
+ irapy/irapy.py,sha256=f93i4YQjqoJeZfTXlg5CbDy_W-Z0GwOQZWNw8r60doM,8539
3
+ irapy-0.1.0.dist-info/licenses/LICENSE,sha256=Ye_UPCYDEggq8otPT7lfV3lFFdVVt11udfEg83t0954,1066
4
+ irapy-0.1.0.dist-info/METADATA,sha256=AuBdV6bNS-ezkFOt5Ygsqv87PBxKp379KLuzjPJ-SN8,3218
5
+ irapy-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
6
+ irapy-0.1.0.dist-info/top_level.txt,sha256=jdXCzR0aSy01VMO_NKTYml9GTz_yJUC8tNFg_h4vaBk,6
7
+ irapy-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jihao You
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ irapy