ilovetools 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ilovetools-0.1.7/ilovetools.egg-info → ilovetools-0.1.8}/PKG-INFO +1 -1
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/__init__.py +1 -1
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ml/__init__.py +55 -0
- ilovetools-0.1.8/ilovetools/ml/interpretation.py +915 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8/ilovetools.egg-info}/PKG-INFO +1 -1
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools.egg-info/SOURCES.txt +1 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/pyproject.toml +1 -1
- {ilovetools-0.1.7 → ilovetools-0.1.8}/setup.py +1 -1
- {ilovetools-0.1.7 → ilovetools-0.1.8}/LICENSE +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/MANIFEST.in +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/README.md +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ai/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ai/embeddings.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ai/inference.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ai/llm_helpers.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/audio/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/automation/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/conversion/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/data/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/data/feature_engineering.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/data/preprocessing.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/database/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/datetime/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/files/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/image/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ml/cross_validation.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ml/ensemble.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ml/feature_selection.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ml/metrics.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/ml/tuning.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/security/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/text/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/utils/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/validation/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools/web/__init__.py +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools.egg-info/dependency_links.txt +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/ilovetools.egg-info/top_level.txt +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/requirements.txt +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/setup.cfg +0 -0
- {ilovetools-0.1.7 → ilovetools-0.1.8}/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ilovetools
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
|
|
5
5
|
Home-page: https://github.com/AliMehdi512/ilovetools
|
|
6
6
|
Author: Ali Mehdi
|
|
@@ -125,6 +125,35 @@ from .feature_selection import (
|
|
|
125
125
|
remove_corr,
|
|
126
126
|
)
|
|
127
127
|
|
|
128
|
+
from .interpretation import (
|
|
129
|
+
# Full names
|
|
130
|
+
feature_importance_scores,
|
|
131
|
+
permutation_importance,
|
|
132
|
+
partial_dependence,
|
|
133
|
+
shap_values_approximation,
|
|
134
|
+
lime_explanation,
|
|
135
|
+
decision_path_explanation,
|
|
136
|
+
model_coefficients_interpretation,
|
|
137
|
+
prediction_breakdown,
|
|
138
|
+
feature_contribution_analysis,
|
|
139
|
+
global_feature_importance,
|
|
140
|
+
local_feature_importance,
|
|
141
|
+
model_summary_statistics,
|
|
142
|
+
# Abbreviated aliases
|
|
143
|
+
feat_importance_scores,
|
|
144
|
+
perm_importance,
|
|
145
|
+
pdp,
|
|
146
|
+
shap_approx,
|
|
147
|
+
lime_explain,
|
|
148
|
+
decision_path,
|
|
149
|
+
coef_interpret,
|
|
150
|
+
pred_breakdown,
|
|
151
|
+
feat_contrib,
|
|
152
|
+
global_importance,
|
|
153
|
+
local_importance,
|
|
154
|
+
model_summary,
|
|
155
|
+
)
|
|
156
|
+
|
|
128
157
|
__all__ = [
|
|
129
158
|
# Metrics (full names)
|
|
130
159
|
'accuracy_score',
|
|
@@ -234,4 +263,30 @@ __all__ = [
|
|
|
234
263
|
'univariate_select',
|
|
235
264
|
'select_k_best',
|
|
236
265
|
'remove_corr',
|
|
266
|
+
# Interpretation (full names)
|
|
267
|
+
'feature_importance_scores',
|
|
268
|
+
'permutation_importance',
|
|
269
|
+
'partial_dependence',
|
|
270
|
+
'shap_values_approximation',
|
|
271
|
+
'lime_explanation',
|
|
272
|
+
'decision_path_explanation',
|
|
273
|
+
'model_coefficients_interpretation',
|
|
274
|
+
'prediction_breakdown',
|
|
275
|
+
'feature_contribution_analysis',
|
|
276
|
+
'global_feature_importance',
|
|
277
|
+
'local_feature_importance',
|
|
278
|
+
'model_summary_statistics',
|
|
279
|
+
# Interpretation (aliases)
|
|
280
|
+
'feat_importance_scores',
|
|
281
|
+
'perm_importance',
|
|
282
|
+
'pdp',
|
|
283
|
+
'shap_approx',
|
|
284
|
+
'lime_explain',
|
|
285
|
+
'decision_path',
|
|
286
|
+
'coef_interpret',
|
|
287
|
+
'pred_breakdown',
|
|
288
|
+
'feat_contrib',
|
|
289
|
+
'global_importance',
|
|
290
|
+
'local_importance',
|
|
291
|
+
'model_summary',
|
|
237
292
|
]
|
|
@@ -0,0 +1,915 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model interpretation utilities for ML workflows
|
|
3
|
+
Each function has TWO names: full descriptive name + abbreviated alias
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List, Dict, Any, Callable, Optional, Tuple
|
|
7
|
+
import random
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
# Full names
|
|
11
|
+
'feature_importance_scores',
|
|
12
|
+
'permutation_importance',
|
|
13
|
+
'partial_dependence',
|
|
14
|
+
'shap_values_approximation',
|
|
15
|
+
'lime_explanation',
|
|
16
|
+
'decision_path_explanation',
|
|
17
|
+
'model_coefficients_interpretation',
|
|
18
|
+
'prediction_breakdown',
|
|
19
|
+
'feature_contribution_analysis',
|
|
20
|
+
'global_feature_importance',
|
|
21
|
+
'local_feature_importance',
|
|
22
|
+
'model_summary_statistics',
|
|
23
|
+
# Abbreviated aliases
|
|
24
|
+
'feat_importance_scores',
|
|
25
|
+
'perm_importance',
|
|
26
|
+
'pdp',
|
|
27
|
+
'shap_approx',
|
|
28
|
+
'lime_explain',
|
|
29
|
+
'decision_path',
|
|
30
|
+
'coef_interpret',
|
|
31
|
+
'pred_breakdown',
|
|
32
|
+
'feat_contrib',
|
|
33
|
+
'global_importance',
|
|
34
|
+
'local_importance',
|
|
35
|
+
'model_summary',
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def feature_importance_scores(
|
|
40
|
+
importances: List[float],
|
|
41
|
+
feature_names: Optional[List[str]] = None,
|
|
42
|
+
normalize: bool = True
|
|
43
|
+
) -> Dict[str, float]:
|
|
44
|
+
"""
|
|
45
|
+
Calculate and format feature importance scores.
|
|
46
|
+
|
|
47
|
+
Alias: feat_importance_scores()
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
importances: Raw importance scores
|
|
51
|
+
feature_names: Optional feature names
|
|
52
|
+
normalize: Normalize to sum to 1.0
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
dict: Feature name to importance mapping
|
|
56
|
+
|
|
57
|
+
Examples:
|
|
58
|
+
>>> from ilovetools.ml import feat_importance_scores # Short alias
|
|
59
|
+
|
|
60
|
+
>>> importances = [0.5, 0.3, 0.2]
|
|
61
|
+
>>> feature_names = ['age', 'income', 'debt']
|
|
62
|
+
>>>
|
|
63
|
+
>>> scores = feat_importance_scores(importances, feature_names)
|
|
64
|
+
>>> print(scores)
|
|
65
|
+
{'age': 0.5, 'income': 0.3, 'debt': 0.2}
|
|
66
|
+
|
|
67
|
+
>>> # Normalized
|
|
68
|
+
>>> scores = feat_importance_scores([10, 20, 30], feature_names, normalize=True)
|
|
69
|
+
>>> print(scores)
|
|
70
|
+
{'age': 0.167, 'income': 0.333, 'debt': 0.5}
|
|
71
|
+
|
|
72
|
+
>>> from ilovetools.ml import feature_importance_scores # Full name
|
|
73
|
+
>>> scores = feature_importance_scores(importances, feature_names)
|
|
74
|
+
|
|
75
|
+
Notes:
|
|
76
|
+
- Works with any importance scores
|
|
77
|
+
- Random Forest, XGBoost, etc.
|
|
78
|
+
- Normalize for percentages
|
|
79
|
+
- Easy to visualize
|
|
80
|
+
"""
|
|
81
|
+
n_features = len(importances)
|
|
82
|
+
|
|
83
|
+
if feature_names is None:
|
|
84
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
85
|
+
|
|
86
|
+
if normalize:
|
|
87
|
+
total = sum(importances)
|
|
88
|
+
if total > 0:
|
|
89
|
+
importances = [imp / total for imp in importances]
|
|
90
|
+
|
|
91
|
+
return {name: imp for name, imp in zip(feature_names, importances)}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Create alias
|
|
95
|
+
feat_importance_scores = feature_importance_scores
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def permutation_importance(
|
|
99
|
+
X: List[List[float]],
|
|
100
|
+
y: List,
|
|
101
|
+
model_func: Callable,
|
|
102
|
+
metric_func: Callable,
|
|
103
|
+
feature_names: Optional[List[str]] = None,
|
|
104
|
+
n_repeats: int = 10,
|
|
105
|
+
random_state: Optional[int] = None
|
|
106
|
+
) -> Tuple[Dict[str, float], Dict[str, float]]:
|
|
107
|
+
"""
|
|
108
|
+
Calculate permutation importance for features.
|
|
109
|
+
|
|
110
|
+
Alias: perm_importance()
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
X: Feature matrix [n_samples, n_features]
|
|
114
|
+
y: Target values
|
|
115
|
+
model_func: Function(X) -> predictions
|
|
116
|
+
metric_func: Function(y_true, y_pred) -> score (higher is better)
|
|
117
|
+
feature_names: Optional feature names
|
|
118
|
+
n_repeats: Number of permutation repeats
|
|
119
|
+
random_state: Random seed
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
tuple: (mean_importances, std_importances)
|
|
123
|
+
|
|
124
|
+
Examples:
|
|
125
|
+
>>> from ilovetools.ml import perm_importance # Short alias
|
|
126
|
+
|
|
127
|
+
>>> X = [[1, 2], [2, 4], [3, 6], [4, 8]]
|
|
128
|
+
>>> y = [1, 2, 3, 4]
|
|
129
|
+
>>>
|
|
130
|
+
>>> def model(X_test):
|
|
131
|
+
... return [sum(row) / len(row) for row in X_test]
|
|
132
|
+
>>>
|
|
133
|
+
>>> def metric(y_true, y_pred):
|
|
134
|
+
... return -sum(abs(y_true[i] - y_pred[i]) for i in range(len(y_true)))
|
|
135
|
+
>>>
|
|
136
|
+
>>> mean_imp, std_imp = perm_importance(X, y, model, metric, n_repeats=5)
|
|
137
|
+
>>> print(f"Importances: {mean_imp}")
|
|
138
|
+
|
|
139
|
+
>>> from ilovetools.ml import permutation_importance # Full name
|
|
140
|
+
>>> mean_imp, std_imp = permutation_importance(X, y, model, metric)
|
|
141
|
+
|
|
142
|
+
Notes:
|
|
143
|
+
- Model-agnostic method
|
|
144
|
+
- Measures true importance
|
|
145
|
+
- Shuffle feature, measure drop
|
|
146
|
+
- Higher drop = more important
|
|
147
|
+
"""
|
|
148
|
+
if random_state is not None:
|
|
149
|
+
random.seed(random_state)
|
|
150
|
+
|
|
151
|
+
n_features = len(X[0])
|
|
152
|
+
|
|
153
|
+
if feature_names is None:
|
|
154
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
155
|
+
|
|
156
|
+
# Baseline score
|
|
157
|
+
baseline_preds = model_func(X)
|
|
158
|
+
baseline_score = metric_func(y, baseline_preds)
|
|
159
|
+
|
|
160
|
+
# Calculate importance for each feature
|
|
161
|
+
importances = {name: [] for name in feature_names}
|
|
162
|
+
|
|
163
|
+
for _ in range(n_repeats):
|
|
164
|
+
for i, name in enumerate(feature_names):
|
|
165
|
+
# Copy X and shuffle feature i
|
|
166
|
+
X_permuted = [row[:] for row in X]
|
|
167
|
+
col_values = [row[i] for row in X_permuted]
|
|
168
|
+
random.shuffle(col_values)
|
|
169
|
+
for j, row in enumerate(X_permuted):
|
|
170
|
+
row[i] = col_values[j]
|
|
171
|
+
|
|
172
|
+
# Calculate score with permuted feature
|
|
173
|
+
permuted_preds = model_func(X_permuted)
|
|
174
|
+
permuted_score = metric_func(y, permuted_preds)
|
|
175
|
+
|
|
176
|
+
# Importance = drop in score
|
|
177
|
+
importance = baseline_score - permuted_score
|
|
178
|
+
importances[name].append(importance)
|
|
179
|
+
|
|
180
|
+
# Calculate mean and std
|
|
181
|
+
mean_importances = {name: sum(vals) / len(vals) for name, vals in importances.items()}
|
|
182
|
+
std_importances = {
|
|
183
|
+
name: (sum((x - mean_importances[name]) ** 2 for x in vals) / len(vals)) ** 0.5
|
|
184
|
+
for name, vals in importances.items()
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return mean_importances, std_importances
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# Create alias
|
|
191
|
+
perm_importance = permutation_importance
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def partial_dependence(
|
|
195
|
+
X: List[List[float]],
|
|
196
|
+
model_func: Callable,
|
|
197
|
+
feature_index: int,
|
|
198
|
+
grid_resolution: int = 20
|
|
199
|
+
) -> Tuple[List[float], List[float]]:
|
|
200
|
+
"""
|
|
201
|
+
Calculate partial dependence for a feature.
|
|
202
|
+
|
|
203
|
+
Alias: pdp()
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
X: Feature matrix [n_samples, n_features]
|
|
207
|
+
model_func: Function(X) -> predictions
|
|
208
|
+
feature_index: Index of feature to analyze
|
|
209
|
+
grid_resolution: Number of grid points
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
tuple: (grid_values, pd_values)
|
|
213
|
+
|
|
214
|
+
Examples:
|
|
215
|
+
>>> from ilovetools.ml import pdp # Short alias
|
|
216
|
+
|
|
217
|
+
>>> X = [[1, 10], [2, 20], [3, 30], [4, 40]]
|
|
218
|
+
>>>
|
|
219
|
+
>>> def model(X_test):
|
|
220
|
+
... return [row[0] * 2 + row[1] * 0.5 for row in X_test]
|
|
221
|
+
>>>
|
|
222
|
+
>>> grid, pd_vals = pdp(X, model, feature_index=0, grid_resolution=5)
|
|
223
|
+
>>> print(f"Grid: {grid}")
|
|
224
|
+
>>> print(f"PD values: {pd_vals}")
|
|
225
|
+
|
|
226
|
+
>>> from ilovetools.ml import partial_dependence # Full name
|
|
227
|
+
>>> grid, pd_vals = partial_dependence(X, model, feature_index=0)
|
|
228
|
+
|
|
229
|
+
Notes:
|
|
230
|
+
- Shows feature effect on prediction
|
|
231
|
+
- Marginal effect
|
|
232
|
+
- Model-agnostic
|
|
233
|
+
- Good for visualization
|
|
234
|
+
"""
|
|
235
|
+
# Get feature values
|
|
236
|
+
feature_values = [row[feature_index] for row in X]
|
|
237
|
+
min_val = min(feature_values)
|
|
238
|
+
max_val = max(feature_values)
|
|
239
|
+
|
|
240
|
+
# Create grid
|
|
241
|
+
step = (max_val - min_val) / (grid_resolution - 1) if grid_resolution > 1 else 0
|
|
242
|
+
grid_values = [min_val + i * step for i in range(grid_resolution)]
|
|
243
|
+
|
|
244
|
+
# Calculate PD for each grid point
|
|
245
|
+
pd_values = []
|
|
246
|
+
|
|
247
|
+
for grid_val in grid_values:
|
|
248
|
+
# Create modified X with feature set to grid_val
|
|
249
|
+
X_modified = [row[:] for row in X]
|
|
250
|
+
for row in X_modified:
|
|
251
|
+
row[feature_index] = grid_val
|
|
252
|
+
|
|
253
|
+
# Get predictions and average
|
|
254
|
+
predictions = model_func(X_modified)
|
|
255
|
+
avg_prediction = sum(predictions) / len(predictions)
|
|
256
|
+
pd_values.append(avg_prediction)
|
|
257
|
+
|
|
258
|
+
return grid_values, pd_values
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# Create alias
|
|
262
|
+
pdp = partial_dependence
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def shap_values_approximation(
|
|
266
|
+
X: List[List[float]],
|
|
267
|
+
model_func: Callable,
|
|
268
|
+
instance_index: int,
|
|
269
|
+
feature_names: Optional[List[str]] = None,
|
|
270
|
+
n_samples: int = 100
|
|
271
|
+
) -> Dict[str, float]:
|
|
272
|
+
"""
|
|
273
|
+
Approximate SHAP values for an instance.
|
|
274
|
+
|
|
275
|
+
Alias: shap_approx()
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
X: Feature matrix [n_samples, n_features]
|
|
279
|
+
model_func: Function(X) -> predictions
|
|
280
|
+
instance_index: Index of instance to explain
|
|
281
|
+
feature_names: Optional feature names
|
|
282
|
+
n_samples: Number of samples for approximation
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
dict: Feature name to SHAP value mapping
|
|
286
|
+
|
|
287
|
+
Examples:
|
|
288
|
+
>>> from ilovetools.ml import shap_approx # Short alias
|
|
289
|
+
|
|
290
|
+
>>> X = [[1, 2], [2, 4], [3, 6], [4, 8]]
|
|
291
|
+
>>>
|
|
292
|
+
>>> def model(X_test):
|
|
293
|
+
... return [row[0] * 2 + row[1] * 0.5 for row in X_test]
|
|
294
|
+
>>>
|
|
295
|
+
>>> shap_vals = shap_approx(X, model, instance_index=0, n_samples=50)
|
|
296
|
+
>>> print(f"SHAP values: {shap_vals}")
|
|
297
|
+
|
|
298
|
+
>>> from ilovetools.ml import shap_values_approximation # Full name
|
|
299
|
+
>>> shap_vals = shap_values_approximation(X, model, instance_index=0)
|
|
300
|
+
|
|
301
|
+
Notes:
|
|
302
|
+
- Simplified SHAP approximation
|
|
303
|
+
- Fair feature attribution
|
|
304
|
+
- Game theory based
|
|
305
|
+
- Explains individual predictions
|
|
306
|
+
"""
|
|
307
|
+
n_features = len(X[0])
|
|
308
|
+
|
|
309
|
+
if feature_names is None:
|
|
310
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
311
|
+
|
|
312
|
+
instance = X[instance_index]
|
|
313
|
+
|
|
314
|
+
# Base prediction (average of all predictions)
|
|
315
|
+
all_preds = model_func(X)
|
|
316
|
+
base_value = sum(all_preds) / len(all_preds)
|
|
317
|
+
|
|
318
|
+
# Instance prediction
|
|
319
|
+
instance_pred = model_func([instance])[0]
|
|
320
|
+
|
|
321
|
+
# Approximate SHAP values using marginal contributions
|
|
322
|
+
shap_values = {}
|
|
323
|
+
|
|
324
|
+
for i, name in enumerate(feature_names):
|
|
325
|
+
# Calculate contribution by comparing with and without feature
|
|
326
|
+
contributions = []
|
|
327
|
+
|
|
328
|
+
for _ in range(min(n_samples, len(X))):
|
|
329
|
+
# Random background instance
|
|
330
|
+
bg_idx = random.randint(0, len(X) - 1)
|
|
331
|
+
background = X[bg_idx][:]
|
|
332
|
+
|
|
333
|
+
# With feature
|
|
334
|
+
with_feature = background[:]
|
|
335
|
+
with_feature[i] = instance[i]
|
|
336
|
+
pred_with = model_func([with_feature])[0]
|
|
337
|
+
|
|
338
|
+
# Without feature (background value)
|
|
339
|
+
pred_without = model_func([background])[0]
|
|
340
|
+
|
|
341
|
+
contribution = pred_with - pred_without
|
|
342
|
+
contributions.append(contribution)
|
|
343
|
+
|
|
344
|
+
# Average contribution
|
|
345
|
+
shap_values[name] = sum(contributions) / len(contributions)
|
|
346
|
+
|
|
347
|
+
return shap_values
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# Create alias
|
|
351
|
+
shap_approx = shap_values_approximation
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def lime_explanation(
|
|
355
|
+
X: List[List[float]],
|
|
356
|
+
model_func: Callable,
|
|
357
|
+
instance_index: int,
|
|
358
|
+
feature_names: Optional[List[str]] = None,
|
|
359
|
+
n_samples: int = 100,
|
|
360
|
+
random_state: Optional[int] = None
|
|
361
|
+
) -> Dict[str, float]:
|
|
362
|
+
"""
|
|
363
|
+
LIME local explanation for an instance.
|
|
364
|
+
|
|
365
|
+
Alias: lime_explain()
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
X: Feature matrix [n_samples, n_features]
|
|
369
|
+
model_func: Function(X) -> predictions
|
|
370
|
+
instance_index: Index of instance to explain
|
|
371
|
+
feature_names: Optional feature names
|
|
372
|
+
n_samples: Number of perturbed samples
|
|
373
|
+
random_state: Random seed
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
dict: Feature name to coefficient mapping
|
|
377
|
+
|
|
378
|
+
Examples:
|
|
379
|
+
>>> from ilovetools.ml import lime_explain # Short alias
|
|
380
|
+
|
|
381
|
+
>>> X = [[1, 2], [2, 4], [3, 6], [4, 8]]
|
|
382
|
+
>>>
|
|
383
|
+
>>> def model(X_test):
|
|
384
|
+
... return [row[0] * 2 + row[1] * 0.5 for row in X_test]
|
|
385
|
+
>>>
|
|
386
|
+
>>> explanation = lime_explain(X, model, instance_index=0, n_samples=50)
|
|
387
|
+
>>> print(f"LIME coefficients: {explanation}")
|
|
388
|
+
|
|
389
|
+
>>> from ilovetools.ml import lime_explanation # Full name
|
|
390
|
+
>>> explanation = lime_explanation(X, model, instance_index=0)
|
|
391
|
+
|
|
392
|
+
Notes:
|
|
393
|
+
- Local linear approximation
|
|
394
|
+
- Model-agnostic
|
|
395
|
+
- Easy to understand
|
|
396
|
+
- Perturbs features locally
|
|
397
|
+
"""
|
|
398
|
+
if random_state is not None:
|
|
399
|
+
random.seed(random_state)
|
|
400
|
+
|
|
401
|
+
n_features = len(X[0])
|
|
402
|
+
|
|
403
|
+
if feature_names is None:
|
|
404
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
405
|
+
|
|
406
|
+
instance = X[instance_index]
|
|
407
|
+
|
|
408
|
+
# Generate perturbed samples around instance
|
|
409
|
+
perturbed_X = []
|
|
410
|
+
for _ in range(n_samples):
|
|
411
|
+
perturbed = []
|
|
412
|
+
for i in range(n_features):
|
|
413
|
+
# Add random noise
|
|
414
|
+
noise = random.gauss(0, 0.1 * abs(instance[i]) if instance[i] != 0 else 0.1)
|
|
415
|
+
perturbed.append(instance[i] + noise)
|
|
416
|
+
perturbed_X.append(perturbed)
|
|
417
|
+
|
|
418
|
+
# Get predictions for perturbed samples
|
|
419
|
+
perturbed_preds = model_func(perturbed_X)
|
|
420
|
+
|
|
421
|
+
# Fit simple linear model (simplified)
|
|
422
|
+
# Calculate correlation-based coefficients
|
|
423
|
+
coefficients = {}
|
|
424
|
+
|
|
425
|
+
for i, name in enumerate(feature_names):
|
|
426
|
+
feature_values = [row[i] for row in perturbed_X]
|
|
427
|
+
|
|
428
|
+
# Calculate correlation with predictions
|
|
429
|
+
mean_feat = sum(feature_values) / len(feature_values)
|
|
430
|
+
mean_pred = sum(perturbed_preds) / len(perturbed_preds)
|
|
431
|
+
|
|
432
|
+
numerator = sum((feature_values[j] - mean_feat) * (perturbed_preds[j] - mean_pred)
|
|
433
|
+
for j in range(len(feature_values)))
|
|
434
|
+
|
|
435
|
+
denominator = sum((f - mean_feat) ** 2 for f in feature_values)
|
|
436
|
+
|
|
437
|
+
if denominator > 0:
|
|
438
|
+
coef = numerator / denominator
|
|
439
|
+
else:
|
|
440
|
+
coef = 0.0
|
|
441
|
+
|
|
442
|
+
coefficients[name] = coef
|
|
443
|
+
|
|
444
|
+
return coefficients
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
# Create alias
|
|
448
|
+
lime_explain = lime_explanation
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def decision_path_explanation(
|
|
452
|
+
tree_structure: List[Dict],
|
|
453
|
+
instance: List[float],
|
|
454
|
+
feature_names: Optional[List[str]] = None
|
|
455
|
+
) -> List[str]:
|
|
456
|
+
"""
|
|
457
|
+
Explain decision path through a tree.
|
|
458
|
+
|
|
459
|
+
Alias: decision_path()
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
tree_structure: List of decision nodes
|
|
463
|
+
instance: Feature values for instance
|
|
464
|
+
feature_names: Optional feature names
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
list: Human-readable decision path
|
|
468
|
+
|
|
469
|
+
Examples:
|
|
470
|
+
>>> from ilovetools.ml import decision_path # Short alias
|
|
471
|
+
|
|
472
|
+
>>> tree = [
|
|
473
|
+
... {'feature': 0, 'threshold': 2.5, 'left': 1, 'right': 2},
|
|
474
|
+
... {'value': 'Class A'},
|
|
475
|
+
... {'value': 'Class B'}
|
|
476
|
+
... ]
|
|
477
|
+
>>> instance = [3.0, 10.0]
|
|
478
|
+
>>> feature_names = ['age', 'income']
|
|
479
|
+
>>>
|
|
480
|
+
>>> path = decision_path(tree, instance, feature_names)
|
|
481
|
+
>>> for step in path:
|
|
482
|
+
... print(step)
|
|
483
|
+
|
|
484
|
+
>>> from ilovetools.ml import decision_path_explanation # Full name
|
|
485
|
+
>>> path = decision_path_explanation(tree, instance, feature_names)
|
|
486
|
+
|
|
487
|
+
Notes:
|
|
488
|
+
- For decision trees
|
|
489
|
+
- Shows exact reasoning
|
|
490
|
+
- Naturally interpretable
|
|
491
|
+
- Follow the path
|
|
492
|
+
"""
|
|
493
|
+
n_features = len(instance)
|
|
494
|
+
|
|
495
|
+
if feature_names is None:
|
|
496
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
497
|
+
|
|
498
|
+
path = []
|
|
499
|
+
node_idx = 0
|
|
500
|
+
|
|
501
|
+
while node_idx < len(tree_structure):
|
|
502
|
+
node = tree_structure[node_idx]
|
|
503
|
+
|
|
504
|
+
if 'value' in node:
|
|
505
|
+
# Leaf node
|
|
506
|
+
path.append(f"Prediction: {node['value']}")
|
|
507
|
+
break
|
|
508
|
+
|
|
509
|
+
# Decision node
|
|
510
|
+
feature_idx = node['feature']
|
|
511
|
+
threshold = node['threshold']
|
|
512
|
+
feature_name = feature_names[feature_idx]
|
|
513
|
+
feature_value = instance[feature_idx]
|
|
514
|
+
|
|
515
|
+
if feature_value <= threshold:
|
|
516
|
+
path.append(f"{feature_name} ({feature_value:.2f}) <= {threshold:.2f}")
|
|
517
|
+
node_idx = node['left']
|
|
518
|
+
else:
|
|
519
|
+
path.append(f"{feature_name} ({feature_value:.2f}) > {threshold:.2f}")
|
|
520
|
+
node_idx = node['right']
|
|
521
|
+
|
|
522
|
+
return path
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
# Create alias
|
|
526
|
+
decision_path = decision_path_explanation
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def model_coefficients_interpretation(
|
|
530
|
+
coefficients: List[float],
|
|
531
|
+
feature_names: Optional[List[str]] = None,
|
|
532
|
+
intercept: float = 0.0
|
|
533
|
+
) -> Dict[str, Any]:
|
|
534
|
+
"""
|
|
535
|
+
Interpret linear model coefficients.
|
|
536
|
+
|
|
537
|
+
Alias: coef_interpret()
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
coefficients: Model coefficients
|
|
541
|
+
feature_names: Optional feature names
|
|
542
|
+
intercept: Model intercept
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
dict: Interpretation details
|
|
546
|
+
|
|
547
|
+
Examples:
|
|
548
|
+
>>> from ilovetools.ml import coef_interpret # Short alias
|
|
549
|
+
|
|
550
|
+
>>> coefficients = [2.5, -1.3, 0.8]
|
|
551
|
+
>>> feature_names = ['age', 'debt', 'income']
|
|
552
|
+
>>> intercept = 10.0
|
|
553
|
+
>>>
|
|
554
|
+
>>> interpretation = coef_interpret(coefficients, feature_names, intercept)
|
|
555
|
+
>>> print(interpretation['positive_features'])
|
|
556
|
+
>>> print(interpretation['negative_features'])
|
|
557
|
+
|
|
558
|
+
>>> from ilovetools.ml import model_coefficients_interpretation # Full name
|
|
559
|
+
>>> interpretation = model_coefficients_interpretation(coefficients, feature_names)
|
|
560
|
+
|
|
561
|
+
Notes:
|
|
562
|
+
- For linear models
|
|
563
|
+
- Shows feature effects
|
|
564
|
+
- Positive/negative impact
|
|
565
|
+
- Magnitude matters
|
|
566
|
+
"""
|
|
567
|
+
n_features = len(coefficients)
|
|
568
|
+
|
|
569
|
+
if feature_names is None:
|
|
570
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
571
|
+
|
|
572
|
+
# Separate positive and negative
|
|
573
|
+
positive_features = []
|
|
574
|
+
negative_features = []
|
|
575
|
+
|
|
576
|
+
for name, coef in zip(feature_names, coefficients):
|
|
577
|
+
if coef > 0:
|
|
578
|
+
positive_features.append((name, coef))
|
|
579
|
+
elif coef < 0:
|
|
580
|
+
negative_features.append((name, abs(coef)))
|
|
581
|
+
|
|
582
|
+
# Sort by magnitude
|
|
583
|
+
positive_features.sort(key=lambda x: x[1], reverse=True)
|
|
584
|
+
negative_features.sort(key=lambda x: x[1], reverse=True)
|
|
585
|
+
|
|
586
|
+
return {
|
|
587
|
+
'intercept': intercept,
|
|
588
|
+
'positive_features': positive_features,
|
|
589
|
+
'negative_features': negative_features,
|
|
590
|
+
'strongest_positive': positive_features[0] if positive_features else None,
|
|
591
|
+
'strongest_negative': negative_features[0] if negative_features else None,
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
# Create alias
|
|
596
|
+
coef_interpret = model_coefficients_interpretation
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
def prediction_breakdown(
|
|
600
|
+
instance: List[float],
|
|
601
|
+
coefficients: List[float],
|
|
602
|
+
feature_names: Optional[List[str]] = None,
|
|
603
|
+
intercept: float = 0.0
|
|
604
|
+
) -> Dict[str, Any]:
|
|
605
|
+
"""
|
|
606
|
+
Break down prediction into feature contributions.
|
|
607
|
+
|
|
608
|
+
Alias: pred_breakdown()
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
instance: Feature values
|
|
612
|
+
coefficients: Model coefficients
|
|
613
|
+
feature_names: Optional feature names
|
|
614
|
+
intercept: Model intercept
|
|
615
|
+
|
|
616
|
+
Returns:
|
|
617
|
+
dict: Prediction breakdown
|
|
618
|
+
|
|
619
|
+
Examples:
|
|
620
|
+
>>> from ilovetools.ml import pred_breakdown # Short alias
|
|
621
|
+
|
|
622
|
+
>>> instance = [30, 50000, 10000]
|
|
623
|
+
>>> coefficients = [0.5, 0.0001, -0.0002]
|
|
624
|
+
>>> feature_names = ['age', 'income', 'debt']
|
|
625
|
+
>>> intercept = 10.0
|
|
626
|
+
>>>
|
|
627
|
+
>>> breakdown = pred_breakdown(instance, coefficients, feature_names, intercept)
|
|
628
|
+
>>> print(f"Base: {breakdown['base']}")
|
|
629
|
+
>>> print(f"Contributions: {breakdown['contributions']}")
|
|
630
|
+
>>> print(f"Total: {breakdown['prediction']}")
|
|
631
|
+
|
|
632
|
+
>>> from ilovetools.ml import prediction_breakdown # Full name
|
|
633
|
+
>>> breakdown = prediction_breakdown(instance, coefficients, feature_names)
|
|
634
|
+
|
|
635
|
+
Notes:
|
|
636
|
+
- Shows exact calculation
|
|
637
|
+
- Feature-by-feature contribution
|
|
638
|
+
- Transparent prediction
|
|
639
|
+
- Easy to verify
|
|
640
|
+
"""
|
|
641
|
+
n_features = len(instance)
|
|
642
|
+
|
|
643
|
+
if feature_names is None:
|
|
644
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
645
|
+
|
|
646
|
+
contributions = {}
|
|
647
|
+
total = intercept
|
|
648
|
+
|
|
649
|
+
for name, value, coef in zip(feature_names, instance, coefficients):
|
|
650
|
+
contribution = value * coef
|
|
651
|
+
contributions[name] = contribution
|
|
652
|
+
total += contribution
|
|
653
|
+
|
|
654
|
+
return {
|
|
655
|
+
'base': intercept,
|
|
656
|
+
'contributions': contributions,
|
|
657
|
+
'prediction': total,
|
|
658
|
+
'feature_values': {name: val for name, val in zip(feature_names, instance)},
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
# Create alias
|
|
663
|
+
pred_breakdown = prediction_breakdown
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
def feature_contribution_analysis(
|
|
667
|
+
X: List[List[float]],
|
|
668
|
+
coefficients: List[float],
|
|
669
|
+
feature_names: Optional[List[str]] = None
|
|
670
|
+
) -> Dict[str, Dict[str, float]]:
|
|
671
|
+
"""
|
|
672
|
+
Analyze feature contributions across dataset.
|
|
673
|
+
|
|
674
|
+
Alias: feat_contrib()
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
X: Feature matrix [n_samples, n_features]
|
|
678
|
+
coefficients: Model coefficients
|
|
679
|
+
feature_names: Optional feature names
|
|
680
|
+
|
|
681
|
+
Returns:
|
|
682
|
+
dict: Contribution statistics per feature
|
|
683
|
+
|
|
684
|
+
Examples:
|
|
685
|
+
>>> from ilovetools.ml import feat_contrib # Short alias
|
|
686
|
+
|
|
687
|
+
>>> X = [[1, 2], [2, 4], [3, 6]]
|
|
688
|
+
>>> coefficients = [2.0, 0.5]
|
|
689
|
+
>>> feature_names = ['age', 'income']
|
|
690
|
+
>>>
|
|
691
|
+
>>> analysis = feat_contrib(X, coefficients, feature_names)
|
|
692
|
+
>>> print(analysis['age'])
|
|
693
|
+
{'mean': ..., 'min': ..., 'max': ...}
|
|
694
|
+
|
|
695
|
+
>>> from ilovetools.ml import feature_contribution_analysis # Full name
|
|
696
|
+
>>> analysis = feature_contribution_analysis(X, coefficients, feature_names)
|
|
697
|
+
|
|
698
|
+
Notes:
|
|
699
|
+
- Global contribution view
|
|
700
|
+
- Mean, min, max contributions
|
|
701
|
+
- Understand feature impact
|
|
702
|
+
- Across all predictions
|
|
703
|
+
"""
|
|
704
|
+
n_features = len(X[0])
|
|
705
|
+
|
|
706
|
+
if feature_names is None:
|
|
707
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
708
|
+
|
|
709
|
+
# Calculate contributions for each instance
|
|
710
|
+
contributions = {name: [] for name in feature_names}
|
|
711
|
+
|
|
712
|
+
for instance in X:
|
|
713
|
+
for i, (name, value, coef) in enumerate(zip(feature_names, instance, coefficients)):
|
|
714
|
+
contribution = value * coef
|
|
715
|
+
contributions[name].append(contribution)
|
|
716
|
+
|
|
717
|
+
# Calculate statistics
|
|
718
|
+
analysis = {}
|
|
719
|
+
|
|
720
|
+
for name, contribs in contributions.items():
|
|
721
|
+
analysis[name] = {
|
|
722
|
+
'mean': sum(contribs) / len(contribs),
|
|
723
|
+
'min': min(contribs),
|
|
724
|
+
'max': max(contribs),
|
|
725
|
+
'std': (sum((c - sum(contribs) / len(contribs)) ** 2 for c in contribs) / len(contribs)) ** 0.5,
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
return analysis
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
# Create alias
|
|
732
|
+
feat_contrib = feature_contribution_analysis
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def global_feature_importance(
|
|
736
|
+
importances: List[float],
|
|
737
|
+
feature_names: Optional[List[str]] = None,
|
|
738
|
+
top_k: Optional[int] = None
|
|
739
|
+
) -> List[Tuple[str, float]]:
|
|
740
|
+
"""
|
|
741
|
+
Get global feature importance ranking.
|
|
742
|
+
|
|
743
|
+
Alias: global_importance()
|
|
744
|
+
|
|
745
|
+
Args:
|
|
746
|
+
importances: Feature importance scores
|
|
747
|
+
feature_names: Optional feature names
|
|
748
|
+
top_k: Return only top k features
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
list: Sorted list of (feature_name, importance) tuples
|
|
752
|
+
|
|
753
|
+
Examples:
|
|
754
|
+
>>> from ilovetools.ml import global_importance # Short alias
|
|
755
|
+
|
|
756
|
+
>>> importances = [0.5, 0.3, 0.15, 0.05]
|
|
757
|
+
>>> feature_names = ['age', 'income', 'debt', 'credit']
|
|
758
|
+
>>>
|
|
759
|
+
>>> ranking = global_importance(importances, feature_names, top_k=3)
|
|
760
|
+
>>> for name, score in ranking:
|
|
761
|
+
... print(f"{name}: {score:.2f}")
|
|
762
|
+
|
|
763
|
+
>>> from ilovetools.ml import global_feature_importance # Full name
|
|
764
|
+
>>> ranking = global_feature_importance(importances, feature_names)
|
|
765
|
+
|
|
766
|
+
Notes:
|
|
767
|
+
- Overall model behavior
|
|
768
|
+
- Ranked by importance
|
|
769
|
+
- Top features first
|
|
770
|
+
- Global view
|
|
771
|
+
"""
|
|
772
|
+
n_features = len(importances)
|
|
773
|
+
|
|
774
|
+
if feature_names is None:
|
|
775
|
+
feature_names = [f"feature_{i}" for i in range(n_features)]
|
|
776
|
+
|
|
777
|
+
# Create and sort pairs
|
|
778
|
+
pairs = list(zip(feature_names, importances))
|
|
779
|
+
pairs.sort(key=lambda x: x[1], reverse=True)
|
|
780
|
+
|
|
781
|
+
if top_k is not None:
|
|
782
|
+
pairs = pairs[:top_k]
|
|
783
|
+
|
|
784
|
+
return pairs
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
# Create alias
|
|
788
|
+
global_importance = global_feature_importance
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
def local_feature_importance(
|
|
792
|
+
instance: List[float],
|
|
793
|
+
shap_values: Dict[str, float],
|
|
794
|
+
base_value: float
|
|
795
|
+
) -> Dict[str, Any]:
|
|
796
|
+
"""
|
|
797
|
+
Get local feature importance for an instance.
|
|
798
|
+
|
|
799
|
+
Alias: local_importance()
|
|
800
|
+
|
|
801
|
+
Args:
|
|
802
|
+
instance: Feature values
|
|
803
|
+
shap_values: SHAP values for features
|
|
804
|
+
base_value: Base prediction value
|
|
805
|
+
|
|
806
|
+
Returns:
|
|
807
|
+
dict: Local importance details
|
|
808
|
+
|
|
809
|
+
Examples:
|
|
810
|
+
>>> from ilovetools.ml import local_importance # Short alias
|
|
811
|
+
|
|
812
|
+
>>> instance = [30, 50000]
|
|
813
|
+
>>> shap_values = {'age': 0.2, 'income': 0.15}
|
|
814
|
+
>>> base_value = 0.5
|
|
815
|
+
>>>
|
|
816
|
+
>>> local_imp = local_importance(instance, shap_values, base_value)
|
|
817
|
+
>>> print(f"Prediction: {local_imp['prediction']}")
|
|
818
|
+
>>> print(f"Top contributor: {local_imp['top_contributor']}")
|
|
819
|
+
|
|
820
|
+
>>> from ilovetools.ml import local_feature_importance # Full name
|
|
821
|
+
>>> local_imp = local_feature_importance(instance, shap_values, base_value)
|
|
822
|
+
|
|
823
|
+
Notes:
|
|
824
|
+
- Individual prediction explanation
|
|
825
|
+
- Feature contributions
|
|
826
|
+
- Local view
|
|
827
|
+
- Instance-specific
|
|
828
|
+
"""
|
|
829
|
+
# Sort by absolute SHAP value
|
|
830
|
+
sorted_features = sorted(shap_values.items(), key=lambda x: abs(x[1]), reverse=True)
|
|
831
|
+
|
|
832
|
+
# Calculate prediction
|
|
833
|
+
prediction = base_value + sum(shap_values.values())
|
|
834
|
+
|
|
835
|
+
# Separate positive and negative
|
|
836
|
+
positive_contrib = [(k, v) for k, v in sorted_features if v > 0]
|
|
837
|
+
negative_contrib = [(k, abs(v)) for k, v in sorted_features if v < 0]
|
|
838
|
+
|
|
839
|
+
return {
|
|
840
|
+
'base_value': base_value,
|
|
841
|
+
'prediction': prediction,
|
|
842
|
+
'shap_values': shap_values,
|
|
843
|
+
'top_contributor': sorted_features[0] if sorted_features else None,
|
|
844
|
+
'positive_contributors': positive_contrib,
|
|
845
|
+
'negative_contributors': negative_contrib,
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
# Create alias
|
|
850
|
+
local_importance = local_feature_importance
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
def model_summary_statistics(
|
|
854
|
+
predictions: List[float],
|
|
855
|
+
actuals: Optional[List[float]] = None,
|
|
856
|
+
feature_importances: Optional[Dict[str, float]] = None
|
|
857
|
+
) -> Dict[str, Any]:
|
|
858
|
+
"""
|
|
859
|
+
Generate model summary statistics.
|
|
860
|
+
|
|
861
|
+
Alias: model_summary()
|
|
862
|
+
|
|
863
|
+
Args:
|
|
864
|
+
predictions: Model predictions
|
|
865
|
+
actuals: Optional actual values
|
|
866
|
+
feature_importances: Optional feature importance scores
|
|
867
|
+
|
|
868
|
+
Returns:
|
|
869
|
+
dict: Summary statistics
|
|
870
|
+
|
|
871
|
+
Examples:
|
|
872
|
+
>>> from ilovetools.ml import model_summary # Short alias
|
|
873
|
+
|
|
874
|
+
>>> predictions = [1.2, 2.1, 2.9, 4.1]
|
|
875
|
+
>>> actuals = [1.0, 2.0, 3.0, 4.0]
|
|
876
|
+
>>> importances = {'age': 0.5, 'income': 0.3, 'debt': 0.2}
|
|
877
|
+
>>>
|
|
878
|
+
>>> summary = model_summary(predictions, actuals, importances)
|
|
879
|
+
>>> print(f"Mean prediction: {summary['mean_prediction']}")
|
|
880
|
+
>>> print(f"MAE: {summary['mae']}")
|
|
881
|
+
|
|
882
|
+
>>> from ilovetools.ml import model_summary_statistics # Full name
|
|
883
|
+
>>> summary = model_summary_statistics(predictions, actuals, importances)
|
|
884
|
+
|
|
885
|
+
Notes:
|
|
886
|
+
- Overall model performance
|
|
887
|
+
- Prediction statistics
|
|
888
|
+
- Error metrics
|
|
889
|
+
- Feature importance summary
|
|
890
|
+
"""
|
|
891
|
+
summary = {
|
|
892
|
+
'n_predictions': len(predictions),
|
|
893
|
+
'mean_prediction': sum(predictions) / len(predictions),
|
|
894
|
+
'min_prediction': min(predictions),
|
|
895
|
+
'max_prediction': max(predictions),
|
|
896
|
+
'std_prediction': (sum((p - sum(predictions) / len(predictions)) ** 2
|
|
897
|
+
for p in predictions) / len(predictions)) ** 0.5,
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
if actuals is not None:
|
|
901
|
+
errors = [abs(predictions[i] - actuals[i]) for i in range(len(predictions))]
|
|
902
|
+
summary['mae'] = sum(errors) / len(errors)
|
|
903
|
+
summary['mse'] = sum(e ** 2 for e in errors) / len(errors)
|
|
904
|
+
summary['rmse'] = summary['mse'] ** 0.5
|
|
905
|
+
|
|
906
|
+
if feature_importances is not None:
|
|
907
|
+
sorted_features = sorted(feature_importances.items(), key=lambda x: x[1], reverse=True)
|
|
908
|
+
summary['top_features'] = sorted_features[:5]
|
|
909
|
+
summary['n_features'] = len(feature_importances)
|
|
910
|
+
|
|
911
|
+
return summary
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
# Create alias
|
|
915
|
+
model_summary = model_summary_statistics
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ilovetools
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs
|
|
5
5
|
Home-page: https://github.com/AliMehdi512/ilovetools
|
|
6
6
|
Author: Ali Mehdi
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ilovetools"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.8"
|
|
8
8
|
description = "A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="ilovetools",
|
|
8
|
-
version="0.1.
|
|
8
|
+
version="0.1.7",
|
|
9
9
|
author="Ali Mehdi",
|
|
10
10
|
author_email="ali.mehdi.dev579@gmail.com",
|
|
11
11
|
description="A comprehensive Python utility library with modular tools for AI/ML, data processing, and daily programming needs",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|