easeai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easeai-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ria
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
easeai-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,138 @@
1
+ Metadata-Version: 2.4
2
+ Name: easeai
3
+ Version: 0.1.0
4
+ Summary: Reusable XGBoost + SHAP workflow for tabular regression explainability
5
+ Author: Ria A. Martins
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/martinsria7/easeai
8
+ Project-URL: Repository, https://github.com/martinsria7/easeai
9
+ Project-URL: Issues, https://github.com/martinsria7/easeai/issues
10
+ Keywords: xgboost,shap,tabular,xai,regression
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: numpy>=1.24
23
+ Requires-Dist: pandas>=2.0
24
+ Requires-Dist: scikit-learn>=1.3
25
+ Requires-Dist: xgboost>=2.0
26
+ Requires-Dist: shap>=0.45
27
+ Requires-Dist: matplotlib>=3.8
28
+ Requires-Dist: Pillow>=10.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=8.0; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # EASEai
34
+
35
+ <p align="center">
36
+ <a href="https://www.python.org/"><img alt="Python" src="https://img.shields.io/badge/python-3.9%2B-blue.svg"></a>
37
+ <a href="./LICENSE"><img alt="License: MIT" src="https://img.shields.io/badge/license-MIT-green.svg"></a>
38
+ <img alt="Status" src="https://img.shields.io/badge/status-alpha-orange.svg">
39
+ <img alt="XGBoost" src="https://img.shields.io/badge/model-XGBoost-5C8A3D.svg">
40
+ <img alt="SHAP" src="https://img.shields.io/badge/explainability-SHAP-7A3E9D.svg">
41
+ </p>
42
+
43
+ <p align="center"><strong>Explainable AI for epidemiology and public health.</strong></p>
44
+
45
+ `easeai` is a lightweight Python package for **tabular regression explainability** built around a practical research workflow:
46
+
47
+ - XGBoost hyperparameter tuning
48
+ - iterative feature elimination based on XGBoost importance
49
+ - cross-validated RMSE and R²
50
+ - SHAP feature ranking
51
+ - partial dependence plots
52
+ - row-level dominant driver extraction
53
+
54
+ It was extracted from a county-level environmental health workflow, but the package is intentionally general so others can use it on any tabular regression dataset.
55
+
56
+ ## Why use EASEai?
57
+
58
+ Many research notebooks mix together preprocessing, model tuning, feature selection, evaluation, and explainability in one place. `easeai` turns that into a reusable package for researchers who want:
59
+
60
+ - a quick XGBoost + SHAP baseline
61
+ - interpretable feature ranking
62
+ - reproducible artifact export
63
+ - a cleaner starting point for GitHub or publication-oriented workflows
64
+
65
+ ## Installation
66
+
67
+ ```bash
68
+ pip install -e .
69
+ ```
70
+
71
+ Or after publication:
72
+
73
+ ```bash
74
+ pip install easeai
75
+ ```
76
+
77
+ ## Minimal example
78
+
79
+ ```python
80
+ import pandas as pd
81
+ import easeai as ea
82
+
83
+
84
+ df = pd.read_csv("Alzheimer_merged1.csv", encoding="ISO-8859-1")
85
+
86
+ workflow = ea.TabularXAIRegressor(
87
+ target="AD_PREV_MEAN",
88
+ drop_columns=["Counties", "FIPS"],
89
+ target_n_features=15,
90
+ )
91
+
92
+ workflow.fit(df)
93
+ results = workflow.summarize(id_column="FIPS", name_column="Counties")
94
+
95
+ print(results.selected_features)
96
+ print(results.metrics)
97
+ print(results.shap_importance.head())
98
+
99
+ results.top_drivers[["id", "name", "top_driver"]].to_csv("county_top_drivers.csv", index=False)
100
+ workflow.export_artifacts("artifacts")
101
+ ```
102
+
103
+ ## Package structure
104
+
105
+ ```text
106
+ easeai/
107
+ data.py # preprocessing helpers
108
+ model.py # tuning, CV, recursive elimination
109
+ explain.py # SHAP summaries and top-driver extraction
110
+ plotting.py # SHAP and PDP export helpers
111
+ workflow.py # end-to-end workflow class
112
+ ```
113
+
114
+ ## Suggested GitHub topics
115
+
116
+ `xgboost`, `shap`, `explainable-ai`, `tabular-data`, `epidemiology`, `public-health`, `machine-learning`, `python`
117
+
118
+ ## Roadmap
119
+
120
+ - add classification support
121
+ - add permutation importance
122
+ - add bootstrap confidence intervals
123
+ - add optional map-ready exports
124
+ - publish on PyPI
125
+
126
+ ## Development
127
+
128
+ ```bash
129
+ pytest
130
+ ```
131
+
132
+ ## License
133
+
134
+ MIT
135
+
136
+ ## Author
137
+
138
+ Ria A. Martins
easeai-0.1.0/README.md ADDED
@@ -0,0 +1,106 @@
1
+ # EASEai
2
+
3
+ <p align="center">
4
+ <a href="https://www.python.org/"><img alt="Python" src="https://img.shields.io/badge/python-3.9%2B-blue.svg"></a>
5
+ <a href="./LICENSE"><img alt="License: MIT" src="https://img.shields.io/badge/license-MIT-green.svg"></a>
6
+ <img alt="Status" src="https://img.shields.io/badge/status-alpha-orange.svg">
7
+ <img alt="XGBoost" src="https://img.shields.io/badge/model-XGBoost-5C8A3D.svg">
8
+ <img alt="SHAP" src="https://img.shields.io/badge/explainability-SHAP-7A3E9D.svg">
9
+ </p>
10
+
11
+ <p align="center"><strong>Explainable AI for epidemiology and public health.</strong></p>
12
+
13
+ `easeai` is a lightweight Python package for **tabular regression explainability** built around a practical research workflow:
14
+
15
+ - XGBoost hyperparameter tuning
16
+ - iterative feature elimination based on XGBoost importance
17
+ - cross-validated RMSE and R²
18
+ - SHAP feature ranking
19
+ - partial dependence plots
20
+ - row-level dominant driver extraction
21
+
22
+ It was extracted from a county-level environmental health workflow, but the package is intentionally general so others can use it on any tabular regression dataset.
23
+
24
+ ## Why use EASEai?
25
+
26
+ Many research notebooks mix together preprocessing, model tuning, feature selection, evaluation, and explainability in one place. `easeai` turns that into a reusable package for researchers who want:
27
+
28
+ - a quick XGBoost + SHAP baseline
29
+ - interpretable feature ranking
30
+ - reproducible artifact export
31
+ - a cleaner starting point for GitHub or publication-oriented workflows
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ pip install -e .
37
+ ```
38
+
39
+ Or after publication:
40
+
41
+ ```bash
42
+ pip install easeai
43
+ ```
44
+
45
+ ## Minimal example
46
+
47
+ ```python
48
+ import pandas as pd
49
+ import easeai as ea
50
+
51
+
52
+ df = pd.read_csv("Alzheimer_merged1.csv", encoding="ISO-8859-1")
53
+
54
+ workflow = ea.TabularXAIRegressor(
55
+ target="AD_PREV_MEAN",
56
+ drop_columns=["Counties", "FIPS"],
57
+ target_n_features=15,
58
+ )
59
+
60
+ workflow.fit(df)
61
+ results = workflow.summarize(id_column="FIPS", name_column="Counties")
62
+
63
+ print(results.selected_features)
64
+ print(results.metrics)
65
+ print(results.shap_importance.head())
66
+
67
+ results.top_drivers[["id", "name", "top_driver"]].to_csv("county_top_drivers.csv", index=False)
68
+ workflow.export_artifacts("artifacts")
69
+ ```
70
+
71
+ ## Package structure
72
+
73
+ ```text
74
+ easeai/
75
+ data.py # preprocessing helpers
76
+ model.py # tuning, CV, recursive elimination
77
+ explain.py # SHAP summaries and top-driver extraction
78
+ plotting.py # SHAP and PDP export helpers
79
+ workflow.py # end-to-end workflow class
80
+ ```
81
+
82
+ ## Suggested GitHub topics
83
+
84
+ `xgboost`, `shap`, `explainable-ai`, `tabular-data`, `epidemiology`, `public-health`, `machine-learning`, `python`
85
+
86
+ ## Roadmap
87
+
88
+ - add classification support
89
+ - add permutation importance
90
+ - add bootstrap confidence intervals
91
+ - add optional map-ready exports
92
+ - publish on PyPI
93
+
94
+ ## Development
95
+
96
+ ```bash
97
+ pytest
98
+ ```
99
+
100
+ ## License
101
+
102
+ MIT
103
+
104
+ ## Author
105
+
106
+ Ria A. Martins
@@ -0,0 +1,4 @@
1
+ from .workflow import TabularXAIRegressor
2
+
3
+ __all__ = ["TabularXAIRegressor"]
4
+ __version__ = "0.1.0"
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable, Optional, Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+
9
+ def preprocess_frame(
10
+ df: pd.DataFrame,
11
+ target: str,
12
+ drop_columns: Optional[Iterable[str]] = None,
13
+ fill_strategy: str = "median",
14
+ coerce_numeric: bool = True,
15
+ clean_column_names: bool = True,
16
+ ) -> Tuple[pd.DataFrame, pd.Series]:
17
+ """Prepare a tabular dataframe for XGBoost regression.
18
+
19
+ Parameters
20
+ ----------
21
+ df:
22
+ Input dataframe.
23
+ target:
24
+ Name of target column.
25
+ drop_columns:
26
+ Extra identifier or metadata columns to remove from the feature matrix.
27
+ fill_strategy:
28
+ One of {"median", "mean", "zero", "drop"}.
29
+ coerce_numeric:
30
+ Convert feature columns to numeric, coercing non-numeric values to NaN.
31
+ clean_column_names:
32
+ Replace non-alphanumeric characters with underscores.
33
+
34
+ Returns
35
+ -------
36
+ X, y
37
+ """
38
+ if target not in df.columns:
39
+ raise KeyError(f"Target column '{target}' not found in dataframe.")
40
+
41
+ drop_columns = list(drop_columns or [])
42
+ y = pd.to_numeric(df[target], errors="raise")
43
+
44
+ feature_drop = [target] + [c for c in drop_columns if c in df.columns]
45
+ X = df.drop(columns=feature_drop).copy()
46
+
47
+ if coerce_numeric:
48
+ X = X.apply(pd.to_numeric, errors="coerce")
49
+
50
+ X = X.replace([np.inf, -np.inf], np.nan)
51
+
52
+ if fill_strategy == "median":
53
+ X = X.fillna(X.median(numeric_only=True))
54
+ elif fill_strategy == "mean":
55
+ X = X.fillna(X.mean(numeric_only=True))
56
+ elif fill_strategy == "zero":
57
+ X = X.fillna(0)
58
+ elif fill_strategy == "drop":
59
+ valid_rows = X.notna().all(axis=1)
60
+ X = X.loc[valid_rows].copy()
61
+ y = y.loc[valid_rows].copy()
62
+ else:
63
+ raise ValueError("fill_strategy must be one of: median, mean, zero, drop")
64
+
65
+ if clean_column_names:
66
+ X.columns = (
67
+ X.columns.str.replace(r"[^A-Za-z0-9_]+", "_", regex=True)
68
+ .str.strip("_")
69
+ )
70
+
71
+ return X, y
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import shap
8
+
9
+
10
+ def compute_shap_values(model, X: pd.DataFrame):
11
+ explainer = shap.TreeExplainer(model)
12
+ shap_values = explainer.shap_values(X)
13
+ return explainer, shap_values
14
+
15
+
16
+
17
+ def shap_importance_table(shap_values, X: pd.DataFrame) -> pd.DataFrame:
18
+ return (
19
+ pd.DataFrame(
20
+ {
21
+ "feature": X.columns,
22
+ "mean_abs_shap": np.abs(shap_values).mean(axis=0),
23
+ }
24
+ )
25
+ .sort_values("mean_abs_shap", ascending=False)
26
+ .reset_index(drop=True)
27
+ )
28
+
29
+
30
+
31
+ def county_top_drivers(
32
+ shap_values,
33
+ X: pd.DataFrame,
34
+ id_col: Optional[pd.Series] = None,
35
+ name_col: Optional[pd.Series] = None,
36
+ ) -> pd.DataFrame:
37
+ shap_df = pd.DataFrame(shap_values, columns=X.columns)
38
+ if id_col is not None:
39
+ shap_df["id"] = list(id_col)
40
+ if name_col is not None:
41
+ shap_df["name"] = list(name_col)
42
+ shap_df["top_driver"] = shap_df[X.columns].abs().idxmax(axis=1)
43
+ return shap_df
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, Iterable, List, Optional, Tuple
4
+
5
+ import pandas as pd
6
+ from sklearn.model_selection import KFold, RandomizedSearchCV, cross_val_score
7
+ from xgboost import XGBRegressor
8
+
9
+
10
+ DEFAULT_PARAM_DIST: Dict[str, Iterable] = {
11
+ "n_estimators": [300, 500, 700, 900],
12
+ "max_depth": [3, 4, 5, 6, 7],
13
+ "learning_rate": [0.01, 0.03, 0.05, 0.1, 0.2],
14
+ "subsample": [0.5, 0.7, 0.85, 1.0],
15
+ "colsample_bytree": [0.7, 0.8, 0.9, 1.0],
16
+ "min_child_weight": [1, 3, 5, 7, 10],
17
+ "gamma": [0, 1, 2, 3, 5],
18
+ "reg_alpha": [0, 0.1, 0.5, 1.0],
19
+ "reg_lambda": [1, 3, 5, 10],
20
+ }
21
+
22
+
23
+ def make_cv(n_splits: int = 5, shuffle: bool = True, random_state: int = 42) -> KFold:
24
+ return KFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
25
+
26
+
27
+ def tune_xgb_regressor(
28
+ X: pd.DataFrame,
29
+ y: pd.Series,
30
+ param_distributions: Optional[Dict[str, Iterable]] = None,
31
+ n_iter: int = 30,
32
+ cv: Optional[KFold] = None,
33
+ scoring: str = "neg_root_mean_squared_error",
34
+ random_state: int = 42,
35
+ n_jobs: int = -1,
36
+ tree_method: str = "hist",
37
+ verbose: int = 0,
38
+ ) -> Tuple[XGBRegressor, RandomizedSearchCV]:
39
+ """Tune an XGBRegressor using randomized search."""
40
+ cv = cv or make_cv(random_state=random_state)
41
+ base_model = XGBRegressor(
42
+ objective="reg:squarederror",
43
+ random_state=random_state,
44
+ n_jobs=n_jobs,
45
+ tree_method=tree_method,
46
+ )
47
+
48
+ search = RandomizedSearchCV(
49
+ estimator=base_model,
50
+ param_distributions=param_distributions or DEFAULT_PARAM_DIST,
51
+ n_iter=n_iter,
52
+ scoring=scoring,
53
+ cv=cv,
54
+ random_state=random_state,
55
+ n_jobs=n_jobs,
56
+ verbose=verbose,
57
+ refit=True,
58
+ )
59
+ search.fit(X, y)
60
+ return search.best_estimator_, search
61
+
62
+
63
+ def rfe_xgb(
64
+ model: XGBRegressor,
65
+ X: pd.DataFrame,
66
+ y: pd.Series,
67
+ target_n: int = 15,
68
+ drop_frac: float = 0.10,
69
+ ) -> List[str]:
70
+ """Iteratively drop the least important features until target_n remain."""
71
+ if target_n < 1:
72
+ raise ValueError("target_n must be >= 1")
73
+ if not 0 < drop_frac < 1:
74
+ raise ValueError("drop_frac must be between 0 and 1")
75
+ if target_n > X.shape[1]:
76
+ raise ValueError("target_n cannot exceed number of features")
77
+
78
+ features = list(X.columns)
79
+ while len(features) > target_n:
80
+ model.fit(X[features], y)
81
+ imp = pd.Series(model.feature_importances_, index=features).sort_values()
82
+ drop_n = max(1, int(len(features) * drop_frac))
83
+ drop_feats = imp.index[:drop_n].tolist()
84
+ features = [f for f in features if f not in drop_feats]
85
+ return features
86
+
87
+
88
+ def evaluate_regression_cv(
89
+ model: XGBRegressor,
90
+ X: pd.DataFrame,
91
+ y: pd.Series,
92
+ cv: Optional[KFold] = None,
93
+ n_jobs: int = -1,
94
+ ) -> Dict[str, float]:
95
+ """Return mean and SD for RMSE and R^2 across CV folds."""
96
+ cv = cv or make_cv()
97
+ rmse_scores = -cross_val_score(
98
+ model,
99
+ X,
100
+ y,
101
+ scoring="neg_root_mean_squared_error",
102
+ cv=cv,
103
+ n_jobs=n_jobs,
104
+ )
105
+ r2_scores = cross_val_score(
106
+ model,
107
+ X,
108
+ y,
109
+ scoring="r2",
110
+ cv=cv,
111
+ n_jobs=n_jobs,
112
+ )
113
+ return {
114
+ "rmse_mean": float(rmse_scores.mean()),
115
+ "rmse_sd": float(rmse_scores.std()),
116
+ "r2_mean": float(r2_scores.mean()),
117
+ "r2_sd": float(r2_scores.std()),
118
+ }
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Iterable, Optional
5
+
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ import shap
9
+ from sklearn.inspection import PartialDependenceDisplay
10
+ from PIL import Image
11
+
12
+
13
+
14
+ def save_shap_summary(X: pd.DataFrame, shap_values, outpath: str, plot_type: Optional[str] = None, dpi: int = 300):
15
+ plt.figure()
16
+ shap.summary_plot(shap_values, X, plot_type=plot_type, show=False)
17
+ plt.tight_layout()
18
+ plt.savefig(outpath, dpi=dpi, bbox_inches="tight")
19
+ plt.close()
20
+
21
+
22
+
23
+ def save_pdp_plots(model, X: pd.DataFrame, features: Iterable[str], outdir: str, grid_resolution: int = 60):
24
+ outdir = Path(outdir)
25
+ outdir.mkdir(parents=True, exist_ok=True)
26
+ saved = []
27
+ for feature in features:
28
+ fig, ax = plt.subplots(figsize=(5.5, 4.0))
29
+ PartialDependenceDisplay.from_estimator(
30
+ model,
31
+ X,
32
+ features=[feature],
33
+ kind="average",
34
+ grid_resolution=grid_resolution,
35
+ percentiles=(0.01, 0.99),
36
+ ax=ax,
37
+ )
38
+ ax.set_title(f"PDP: {feature}")
39
+ outpath = outdir / f"pdp_{str(feature)[:40].replace(' ', '_')}.png"
40
+ plt.tight_layout()
41
+ plt.savefig(outpath, dpi=300, bbox_inches="tight")
42
+ plt.close()
43
+ saved.append(str(outpath))
44
+ return saved
45
+
46
+
47
+
48
+ def save_dependence_panel(X: pd.DataFrame, shap_values, top_vars: Iterable[str], outpath: str):
49
+ fig, axes = plt.subplots(2, 3, figsize=(12, 9))
50
+ axes = axes.ravel()
51
+ for i, var in enumerate(top_vars):
52
+ plt.sca(axes[i])
53
+ shap.dependence_plot(
54
+ ind=var,
55
+ shap_values=shap_values,
56
+ features=X,
57
+ interaction_index="auto",
58
+ show=False,
59
+ ax=axes[i],
60
+ )
61
+ axes[i].set_title(str(var))
62
+ plt.tight_layout()
63
+ plt.savefig(outpath, dpi=300, bbox_inches="tight")
64
+ plt.close()
65
+
66
+
67
+
68
+ def combine_images_side_by_side(left_path: str, right_path: str, outpath: str):
69
+ left = Image.open(left_path)
70
+ right = Image.open(right_path)
71
+ target_h = max(left.height, right.height)
72
+ left = left.resize((int(left.width * target_h / left.height), target_h), Image.Resampling.LANCZOS)
73
+ right = right.resize((int(right.width * target_h / right.height), target_h), Image.Resampling.LANCZOS)
74
+ combined = Image.new("RGB", (left.width + right.width, target_h), (255, 255, 255))
75
+ combined.paste(left, (0, 0))
76
+ combined.paste(right, (left.width, 0))
77
+ combined.save(outpath)
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Iterable, List, Optional
6
+
7
+ import pandas as pd
8
+ from xgboost import XGBRegressor
9
+
10
+ from .data import preprocess_frame
11
+ from .explain import compute_shap_values, county_top_drivers, shap_importance_table
12
+ from .model import evaluate_regression_cv, make_cv, rfe_xgb, tune_xgb_regressor
13
+ from .plotting import save_pdp_plots, save_shap_summary
14
+
15
+
16
+ @dataclass
17
+ class WorkflowResults:
18
+ selected_features: List[str]
19
+ metrics: dict
20
+ shap_importance: pd.DataFrame
21
+ top_drivers: pd.DataFrame
22
+
23
+
24
+ class TabularXAIRegressor:
25
+ """End-to-end workflow for tabular regression with XGBoost + SHAP."""
26
+
27
+ def __init__(
28
+ self,
29
+ target: str,
30
+ drop_columns: Optional[Iterable[str]] = None,
31
+ target_n_features: int = 15,
32
+ random_state: int = 42,
33
+ n_iter_search: int = 15,
34
+ ):
35
+ self.target = target
36
+ self.drop_columns = list(drop_columns or [])
37
+ self.target_n_features = target_n_features
38
+ self.random_state = random_state
39
+ self.n_iter_search = n_iter_search
40
+ self.search_ = None
41
+ self.best_model_: Optional[XGBRegressor] = None
42
+ self.final_model_: Optional[XGBRegressor] = None
43
+ self.selected_features_: Optional[List[str]] = None
44
+ self.explainer_ = None
45
+ self.shap_values_ = None
46
+ self.X_selected_: Optional[pd.DataFrame] = None
47
+
48
+ def fit(self, df: pd.DataFrame) -> "TabularXAIRegressor":
49
+ X, y = preprocess_frame(
50
+ df,
51
+ target=self.target,
52
+ drop_columns=self.drop_columns,
53
+ )
54
+ cv = make_cv(random_state=self.random_state)
55
+ self.best_model_, self.search_ = tune_xgb_regressor(
56
+ X,
57
+ y,
58
+ cv=cv,
59
+ random_state=self.random_state,
60
+ n_iter=self.n_iter_search,
61
+ )
62
+ self.selected_features_ = rfe_xgb(
63
+ self.best_model_,
64
+ X,
65
+ y,
66
+ target_n=self.target_n_features,
67
+ )
68
+ self.final_model_ = XGBRegressor(
69
+ **self.search_.best_params_,
70
+ objective="reg:squarederror",
71
+ random_state=self.random_state,
72
+ n_jobs=-1,
73
+ tree_method="hist",
74
+ )
75
+ self.X_selected_ = X[self.selected_features_].copy()
76
+ self.final_model_.fit(self.X_selected_, y)
77
+ self.explainer_, self.shap_values_ = compute_shap_values(self.final_model_, self.X_selected_)
78
+ self.y_ = y
79
+ self.original_df_ = df.copy()
80
+ return self
81
+
82
+ def summarize(self, id_column: Optional[str] = None, name_column: Optional[str] = None) -> WorkflowResults:
83
+ if self.final_model_ is None or self.X_selected_ is None or self.selected_features_ is None:
84
+ raise RuntimeError("Call fit() before summarize().")
85
+ metrics = evaluate_regression_cv(self.final_model_, self.X_selected_, self.y_)
86
+ shap_rank = shap_importance_table(self.shap_values_, self.X_selected_)
87
+ top_drivers = county_top_drivers(
88
+ self.shap_values_,
89
+ self.X_selected_,
90
+ id_col=self.original_df_[id_column] if id_column and id_column in self.original_df_.columns else None,
91
+ name_col=self.original_df_[name_column] if name_column and name_column in self.original_df_.columns else None,
92
+ )
93
+ return WorkflowResults(
94
+ selected_features=self.selected_features_,
95
+ metrics=metrics,
96
+ shap_importance=shap_rank,
97
+ top_drivers=top_drivers,
98
+ )
99
+
100
+ def export_artifacts(self, outdir: str, top_k_pdp: int = 6) -> None:
101
+ if self.X_selected_ is None:
102
+ raise RuntimeError("Call fit() before export_artifacts().")
103
+ outdir = Path(outdir)
104
+ outdir.mkdir(parents=True, exist_ok=True)
105
+ save_shap_summary(self.X_selected_, self.shap_values_, str(outdir / "shap_beeswarm.png"))
106
+ save_shap_summary(self.X_selected_, self.shap_values_, str(outdir / "shap_bar.png"), plot_type="bar")
107
+ top_vars = list(self.X_selected_.columns[:top_k_pdp])
108
+ save_pdp_plots(self.final_model_, self.X_selected_, top_vars, str(outdir / "pdp_plots"))
@@ -0,0 +1,138 @@
1
+ Metadata-Version: 2.4
2
+ Name: easeai
3
+ Version: 0.1.0
4
+ Summary: Reusable XGBoost + SHAP workflow for tabular regression explainability
5
+ Author: Ria A. Martins
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/martinsria7/easeai
8
+ Project-URL: Repository, https://github.com/martinsria7/easeai
9
+ Project-URL: Issues, https://github.com/martinsria7/easeai/issues
10
+ Keywords: xgboost,shap,tabular,xai,regression
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: numpy>=1.24
23
+ Requires-Dist: pandas>=2.0
24
+ Requires-Dist: scikit-learn>=1.3
25
+ Requires-Dist: xgboost>=2.0
26
+ Requires-Dist: shap>=0.45
27
+ Requires-Dist: matplotlib>=3.8
28
+ Requires-Dist: Pillow>=10.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=8.0; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # EASEai
34
+
35
+ <p align="center">
36
+ <a href="https://www.python.org/"><img alt="Python" src="https://img.shields.io/badge/python-3.9%2B-blue.svg"></a>
37
+ <a href="./LICENSE"><img alt="License: MIT" src="https://img.shields.io/badge/license-MIT-green.svg"></a>
38
+ <img alt="Status" src="https://img.shields.io/badge/status-alpha-orange.svg">
39
+ <img alt="XGBoost" src="https://img.shields.io/badge/model-XGBoost-5C8A3D.svg">
40
+ <img alt="SHAP" src="https://img.shields.io/badge/explainability-SHAP-7A3E9D.svg">
41
+ </p>
42
+
43
+ <p align="center"><strong>Explainable AI for epidemiology and public health.</strong></p>
44
+
45
+ `easeai` is a lightweight Python package for **tabular regression explainability** built around a practical research workflow:
46
+
47
+ - XGBoost hyperparameter tuning
48
+ - iterative feature elimination based on XGBoost importance
49
+ - cross-validated RMSE and R²
50
+ - SHAP feature ranking
51
+ - partial dependence plots
52
+ - row-level dominant driver extraction
53
+
54
+ It was extracted from a county-level environmental health workflow, but the package is intentionally general so others can use it on any tabular regression dataset.
55
+
56
+ ## Why use EASEai?
57
+
58
+ Many research notebooks mix together preprocessing, model tuning, feature selection, evaluation, and explainability in one place. `easeai` turns that into a reusable package for researchers who want:
59
+
60
+ - a quick XGBoost + SHAP baseline
61
+ - interpretable feature ranking
62
+ - reproducible artifact export
63
+ - a cleaner starting point for GitHub or publication-oriented workflows
64
+
65
+ ## Installation
66
+
67
+ ```bash
68
+ pip install -e .
69
+ ```
70
+
71
+ Or after publication:
72
+
73
+ ```bash
74
+ pip install easeai
75
+ ```
76
+
77
+ ## Minimal example
78
+
79
+ ```python
80
+ import pandas as pd
81
+ import easeai as ea
82
+
83
+
84
+ df = pd.read_csv("Alzheimer_merged1.csv", encoding="ISO-8859-1")
85
+
86
+ workflow = ea.TabularXAIRegressor(
87
+ target="AD_PREV_MEAN",
88
+ drop_columns=["Counties", "FIPS"],
89
+ target_n_features=15,
90
+ )
91
+
92
+ workflow.fit(df)
93
+ results = workflow.summarize(id_column="FIPS", name_column="Counties")
94
+
95
+ print(results.selected_features)
96
+ print(results.metrics)
97
+ print(results.shap_importance.head())
98
+
99
+ results.top_drivers[["id", "name", "top_driver"]].to_csv("county_top_drivers.csv", index=False)
100
+ workflow.export_artifacts("artifacts")
101
+ ```
102
+
103
+ ## Package structure
104
+
105
+ ```text
106
+ easeai/
107
+ data.py # preprocessing helpers
108
+ model.py # tuning, CV, recursive elimination
109
+ explain.py # SHAP summaries and top-driver extraction
110
+ plotting.py # SHAP and PDP export helpers
111
+ workflow.py # end-to-end workflow class
112
+ ```
113
+
114
+ ## Suggested GitHub topics
115
+
116
+ `xgboost`, `shap`, `explainable-ai`, `tabular-data`, `epidemiology`, `public-health`, `machine-learning`, `python`
117
+
118
+ ## Roadmap
119
+
120
+ - add classification support
121
+ - add permutation importance
122
+ - add bootstrap confidence intervals
123
+ - add optional map-ready exports
124
+ - publish on PyPI
125
+
126
+ ## Development
127
+
128
+ ```bash
129
+ pytest
130
+ ```
131
+
132
+ ## License
133
+
134
+ MIT
135
+
136
+ ## Author
137
+
138
+ Ria A. Martins
@@ -0,0 +1,16 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ easeai/__init__.py
5
+ easeai/data.py
6
+ easeai/explain.py
7
+ easeai/model.py
8
+ easeai/plotting.py
9
+ easeai/workflow.py
10
+ easeai.egg-info/PKG-INFO
11
+ easeai.egg-info/SOURCES.txt
12
+ easeai.egg-info/dependency_links.txt
13
+ easeai.egg-info/requires.txt
14
+ easeai.egg-info/top_level.txt
15
+ tests/test_basic.py
16
+ tests/test_import.py
@@ -0,0 +1,10 @@
1
+ numpy>=1.24
2
+ pandas>=2.0
3
+ scikit-learn>=1.3
4
+ xgboost>=2.0
5
+ shap>=0.45
6
+ matplotlib>=3.8
7
+ Pillow>=10.0
8
+
9
+ [dev]
10
+ pytest>=8.0
@@ -0,0 +1 @@
1
+ easeai
@@ -0,0 +1,49 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "easeai"
7
+ version = "0.1.0"
8
+ description = "Reusable XGBoost + SHAP workflow for tabular regression explainability"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Ria A. Martins"}
14
+ ]
15
+ dependencies = [
16
+ "numpy>=1.24",
17
+ "pandas>=2.0",
18
+ "scikit-learn>=1.3",
19
+ "xgboost>=2.0",
20
+ "shap>=0.45",
21
+ "matplotlib>=3.8",
22
+ "Pillow>=10.0",
23
+ ]
24
+ keywords = ["xgboost", "shap", "tabular", "xai", "regression"]
25
+ classifiers = [
26
+ "Programming Language :: Python :: 3",
27
+ "Programming Language :: Python :: 3.9",
28
+ "Programming Language :: Python :: 3.10",
29
+ "Programming Language :: Python :: 3.11",
30
+ "License :: OSI Approved :: MIT License",
31
+ "Operating System :: OS Independent",
32
+ "Intended Audience :: Science/Research",
33
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
34
+ ]
35
+ [project.urls]
36
+ Homepage = "https://github.com/martinsria7/easeai"
37
+ Repository = "https://github.com/martinsria7/easeai"
38
+ Issues = "https://github.com/martinsria7/easeai/issues"
39
+
40
+ [project.optional-dependencies]
41
+ dev = [
42
+ "pytest>=8.0",
43
+ ]
44
+
45
+ [tool.setuptools]
46
+ include-package-data = true
47
+
48
+ [tool.setuptools.packages.find]
49
+ include = ["easeai*"]
easeai-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,26 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ import easeai as ea
4
+
5
+
6
+ def test_basic_fit():
7
+ rng = np.random.default_rng(42)
8
+ df = pd.DataFrame({
9
+ "x1": rng.normal(size=100),
10
+ "x2": rng.normal(size=100),
11
+ "FIPS": range(100),
12
+ "Counties": [f"c{i}" for i in range(100)],
13
+ })
14
+ df["y"] = 2 * df["x1"] - 0.5 * df["x2"] + rng.normal(scale=0.1, size=100)
15
+
16
+ workflow = ea.TabularXAIRegressor(
17
+ target="y",
18
+ drop_columns=["FIPS", "Counties"],
19
+ target_n_features=2,
20
+ )
21
+
22
+ workflow.fit(df)
23
+ results = workflow.summarize(id_column="FIPS", name_column="Counties")
24
+
25
+ assert len(results.selected_features) > 0
26
+ assert results.metrics is not None
@@ -0,0 +1,5 @@
1
+ import easeai
2
+
3
+
4
+ def test_import():
5
+ assert hasattr(easeai, "TabularXAIRegressor")