ins-pricing 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +39 -105
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +11 -9
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/__init__.py +10 -10
  15. ins_pricing/frontend/example_workflows.py +1 -1
  16. ins_pricing/governance/__init__.py +20 -20
  17. ins_pricing/governance/release.py +159 -159
  18. ins_pricing/modelling/__init__.py +147 -92
  19. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +2 -2
  20. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  21. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -562
  22. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -964
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  29. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  36. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  37. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  38. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  39. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  40. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  42. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  43. ins_pricing/modelling/explain/__init__.py +55 -55
  44. ins_pricing/modelling/explain/metrics.py +27 -174
  45. ins_pricing/modelling/explain/permutation.py +237 -237
  46. ins_pricing/modelling/plotting/__init__.py +40 -36
  47. ins_pricing/modelling/plotting/compat.py +228 -0
  48. ins_pricing/modelling/plotting/curves.py +572 -572
  49. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  50. ins_pricing/modelling/plotting/geo.py +362 -362
  51. ins_pricing/modelling/plotting/importance.py +121 -121
  52. ins_pricing/pricing/__init__.py +27 -27
  53. ins_pricing/production/__init__.py +35 -25
  54. ins_pricing/production/{predict.py → inference.py} +140 -57
  55. ins_pricing/production/monitoring.py +8 -21
  56. ins_pricing/reporting/__init__.py +11 -11
  57. ins_pricing/setup.py +1 -1
  58. ins_pricing/tests/production/test_inference.py +90 -0
  59. ins_pricing/utils/__init__.py +116 -83
  60. ins_pricing/utils/device.py +255 -255
  61. ins_pricing/utils/features.py +53 -0
  62. ins_pricing/utils/io.py +72 -0
  63. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  64. ins_pricing/utils/metrics.py +158 -24
  65. ins_pricing/utils/numerics.py +76 -0
  66. ins_pricing/utils/paths.py +9 -1
  67. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +182 -182
  68. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  69. ins_pricing/modelling/core/BayesOpt.py +0 -146
  70. ins_pricing/modelling/core/__init__.py +0 -1
  71. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  72. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  73. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  74. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  75. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  76. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  77. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  78. ins_pricing/tests/production/test_predict.py +0 -233
  79. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  80. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  81. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  82. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  83. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  84. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,237 +1,237 @@
1
- from __future__ import annotations
2
-
3
- from typing import Callable, Optional, Sequence
4
-
5
- import numpy as np
6
- import pandas as pd
7
- from joblib import Parallel, delayed
8
-
9
- from .metrics import resolve_metric
10
-
11
-
12
- def _compute_feature_importance(
13
- feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
14
- baseline_score, higher_is_better, n_repeats, random_state, metric_name,
15
- return_scores, is_dataframe=True, feat_idx=None
16
- ):
17
- """Helper function to compute importance for a single feature (parallelizable)."""
18
- rng = np.random.default_rng(random_state)
19
-
20
- if is_dataframe:
21
- # Work on a copy for thread safety in parallel execution
22
- X_work = X_data.copy()
23
- orig_values = X_work[feat].to_numpy(copy=False).copy()
24
- scores = []
25
- for _ in range(n_repeats):
26
- X_work[feat] = rng.permutation(orig_values)
27
- pred = predict_fn(X_work)
28
- score = metric_fn(y_arr, pred, w_arr)
29
- scores.append(float(score))
30
- else:
31
- X_work = X_data.copy()
32
- orig_col = X_data[:, feat_idx].copy()
33
- scores = []
34
- for _ in range(n_repeats):
35
- X_work[:, feat_idx] = rng.permutation(orig_col)
36
- pred = predict_fn(X_work)
37
- score = metric_fn(y_arr, pred, w_arr)
38
- scores.append(float(score))
39
-
40
- scores_arr = np.asarray(scores, dtype=float)
41
- if higher_is_better:
42
- delta = baseline_score - scores_arr
43
- else:
44
- delta = scores_arr - baseline_score
45
-
46
- entry = {
47
- "feature": feat,
48
- "importance_mean": float(np.mean(delta)),
49
- "importance_std": float(np.std(delta)),
50
- "baseline_score": float(baseline_score),
51
- "permutation_score_mean": float(np.mean(scores_arr)),
52
- "metric": metric_name,
53
- }
54
- if return_scores:
55
- entry["permutation_scores"] = scores
56
- return entry
57
-
58
-
59
- def _prepare_data(X, y, sample_weight, max_rows, rng):
60
- y_arr = np.asarray(y)
61
- if y_arr.ndim != 1:
62
- y_arr = y_arr.reshape(-1)
63
-
64
- w_arr = None
65
- if sample_weight is not None:
66
- w_arr = np.asarray(sample_weight).reshape(-1)
67
- if w_arr.shape[0] != y_arr.shape[0]:
68
- raise ValueError("sample_weight length must match y.")
69
-
70
- if isinstance(X, pd.DataFrame):
71
- X_data = X
72
- if len(X_data) != len(y_arr):
73
- raise ValueError("X and y must have the same length.")
74
- if max_rows and len(X_data) > max_rows:
75
- idx = rng.choice(len(X_data), size=int(max_rows), replace=False)
76
- X_data = X_data.iloc[idx].copy()
77
- y_arr = y_arr[idx]
78
- if w_arr is not None:
79
- w_arr = w_arr[idx]
80
- return X_data, y_arr, w_arr
81
-
82
- X_np = np.asarray(X)
83
- if X_np.ndim != 2:
84
- raise ValueError("X must be 2d when not a DataFrame.")
85
- if X_np.shape[0] != y_arr.shape[0]:
86
- raise ValueError("X and y must have the same length.")
87
- if max_rows and X_np.shape[0] > max_rows:
88
- idx = rng.choice(X_np.shape[0], size=int(max_rows), replace=False)
89
- X_np = X_np[idx]
90
- y_arr = y_arr[idx]
91
- if w_arr is not None:
92
- w_arr = w_arr[idx]
93
- return X_np, y_arr, w_arr
94
-
95
-
96
- def permutation_importance(
97
- predict_fn: Callable,
98
- X,
99
- y,
100
- *,
101
- sample_weight=None,
102
- metric: str | Callable = "auto",
103
- task_type: Optional[str] = None,
104
- higher_is_better: Optional[bool] = None,
105
- n_repeats: int = 5,
106
- random_state: Optional[int] = None,
107
- max_rows: Optional[int] = 5000,
108
- features: Optional[Sequence[str]] = None,
109
- return_scores: bool = False,
110
- safe_copy: bool = False,
111
- n_jobs: Optional[int] = None,
112
- ) -> pd.DataFrame:
113
- """Permutation importance on tabular data.
114
-
115
- predict_fn should accept the same type as X (DataFrame or ndarray).
116
- Set safe_copy=True if predict_fn mutates its input.
117
- Set n_jobs to enable parallel processing across features (default: None = sequential).
118
- """
119
- rng = np.random.default_rng(random_state)
120
- n_repeats = max(1, int(n_repeats))
121
-
122
- X_data, y_arr, w_arr = _prepare_data(X, y, sample_weight, max_rows, rng)
123
- metric_fn, higher_is_better, metric_name = resolve_metric(
124
- metric, task_type=task_type, higher_is_better=higher_is_better
125
- )
126
-
127
- baseline_pred = predict_fn(X_data)
128
- baseline_score = metric_fn(y_arr, baseline_pred, w_arr)
129
-
130
- if isinstance(X_data, pd.DataFrame):
131
- feature_names = list(X_data.columns)
132
- if features is not None:
133
- feature_names = [f for f in features if f in X_data.columns]
134
-
135
- # Use parallel processing if n_jobs is specified
136
- if n_jobs is not None and n_jobs != 1:
137
- # Generate different random seeds for each feature to ensure reproducibility
138
- seeds = [random_state + i if random_state is not None else None
139
- for i in range(len(feature_names))]
140
- results = Parallel(n_jobs=n_jobs, prefer="threads")(
141
- delayed(_compute_feature_importance)(
142
- feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
143
- baseline_score, higher_is_better, n_repeats, seed,
144
- metric_name, return_scores, is_dataframe=True
145
- )
146
- for feat, seed in zip(feature_names, seeds)
147
- )
148
- else:
149
- # Sequential processing (original optimized version)
150
- X_perm = X_data if not safe_copy else X_data.copy()
151
- results = []
152
- for feat in feature_names:
153
- # Store original values directly without extra copy
154
- orig_values = X_perm[feat].to_numpy(copy=False)
155
- orig_copy = orig_values.copy() # Only copy the column, not the entire DataFrame
156
- scores = []
157
- for _ in range(n_repeats):
158
- X_perm[feat] = rng.permutation(orig_copy)
159
- pred = predict_fn(X_perm)
160
- score = metric_fn(y_arr, pred, w_arr)
161
- scores.append(float(score))
162
- # Restore original column values
163
- X_perm[feat] = orig_copy
164
-
165
- scores_arr = np.asarray(scores, dtype=float)
166
- if higher_is_better:
167
- delta = baseline_score - scores_arr
168
- else:
169
- delta = scores_arr - baseline_score
170
- entry = {
171
- "feature": feat,
172
- "importance_mean": float(np.mean(delta)),
173
- "importance_std": float(np.std(delta)),
174
- "baseline_score": float(baseline_score),
175
- "permutation_score_mean": float(np.mean(scores_arr)),
176
- "metric": metric_name,
177
- }
178
- if return_scores:
179
- entry["permutation_scores"] = scores
180
- results.append(entry)
181
- else:
182
- if features is not None:
183
- if len(features) != X_data.shape[1]:
184
- raise ValueError("features length must match X columns for ndarray input.")
185
- feature_names = list(features)
186
- else:
187
- feature_names = [f"x{i}" for i in range(X_data.shape[1])]
188
-
189
- X_base = np.asarray(X_data)
190
-
191
- # Use parallel processing if n_jobs is specified
192
- if n_jobs is not None and n_jobs != 1:
193
- seeds = [random_state + i if random_state is not None else None
194
- for i in range(len(feature_names))]
195
- results = Parallel(n_jobs=n_jobs, prefer="threads")(
196
- delayed(_compute_feature_importance)(
197
- feat, X_base, y_arr, w_arr, predict_fn, metric_fn,
198
- baseline_score, higher_is_better, n_repeats, seed,
199
- metric_name, return_scores, is_dataframe=False, feat_idx=idx
200
- )
201
- for idx, (feat, seed) in enumerate(zip(feature_names, seeds))
202
- )
203
- else:
204
- # Sequential processing
205
- X_perm = X_base.copy()
206
- results = []
207
- for idx, feat in enumerate(feature_names):
208
- orig_col = X_base[:, idx].copy()
209
- scores = []
210
- for _ in range(n_repeats):
211
- X_perm[:, idx] = rng.permutation(orig_col)
212
- pred_input = X_perm.copy() if safe_copy else X_perm
213
- pred = predict_fn(pred_input)
214
- score = metric_fn(y_arr, pred, w_arr)
215
- scores.append(float(score))
216
- X_perm[:, idx] = orig_col
217
-
218
- scores_arr = np.asarray(scores, dtype=float)
219
- if higher_is_better:
220
- delta = baseline_score - scores_arr
221
- else:
222
- delta = scores_arr - baseline_score
223
- entry = {
224
- "feature": feat,
225
- "importance_mean": float(np.mean(delta)),
226
- "importance_std": float(np.std(delta)),
227
- "baseline_score": float(baseline_score),
228
- "permutation_score_mean": float(np.mean(scores_arr)),
229
- "metric": metric_name,
230
- }
231
- if return_scores:
232
- entry["permutation_scores"] = scores
233
- results.append(entry)
234
-
235
- df = pd.DataFrame(results)
236
- df = df.sort_values(by="importance_mean", ascending=False).reset_index(drop=True)
237
- return df
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, Optional, Sequence
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from joblib import Parallel, delayed
8
+
9
+ from ins_pricing.modelling.explain.metrics import resolve_metric
10
+
11
+
12
+ def _compute_feature_importance(
13
+ feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
14
+ baseline_score, higher_is_better, n_repeats, random_state, metric_name,
15
+ return_scores, is_dataframe=True, feat_idx=None
16
+ ):
17
+ """Helper function to compute importance for a single feature (parallelizable)."""
18
+ rng = np.random.default_rng(random_state)
19
+
20
+ if is_dataframe:
21
+ # Work on a copy for thread safety in parallel execution
22
+ X_work = X_data.copy()
23
+ orig_values = X_work[feat].to_numpy(copy=False).copy()
24
+ scores = []
25
+ for _ in range(n_repeats):
26
+ X_work[feat] = rng.permutation(orig_values)
27
+ pred = predict_fn(X_work)
28
+ score = metric_fn(y_arr, pred, w_arr)
29
+ scores.append(float(score))
30
+ else:
31
+ X_work = X_data.copy()
32
+ orig_col = X_data[:, feat_idx].copy()
33
+ scores = []
34
+ for _ in range(n_repeats):
35
+ X_work[:, feat_idx] = rng.permutation(orig_col)
36
+ pred = predict_fn(X_work)
37
+ score = metric_fn(y_arr, pred, w_arr)
38
+ scores.append(float(score))
39
+
40
+ scores_arr = np.asarray(scores, dtype=float)
41
+ if higher_is_better:
42
+ delta = baseline_score - scores_arr
43
+ else:
44
+ delta = scores_arr - baseline_score
45
+
46
+ entry = {
47
+ "feature": feat,
48
+ "importance_mean": float(np.mean(delta)),
49
+ "importance_std": float(np.std(delta)),
50
+ "baseline_score": float(baseline_score),
51
+ "permutation_score_mean": float(np.mean(scores_arr)),
52
+ "metric": metric_name,
53
+ }
54
+ if return_scores:
55
+ entry["permutation_scores"] = scores
56
+ return entry
57
+
58
+
59
+ def _prepare_data(X, y, sample_weight, max_rows, rng):
60
+ y_arr = np.asarray(y)
61
+ if y_arr.ndim != 1:
62
+ y_arr = y_arr.reshape(-1)
63
+
64
+ w_arr = None
65
+ if sample_weight is not None:
66
+ w_arr = np.asarray(sample_weight).reshape(-1)
67
+ if w_arr.shape[0] != y_arr.shape[0]:
68
+ raise ValueError("sample_weight length must match y.")
69
+
70
+ if isinstance(X, pd.DataFrame):
71
+ X_data = X
72
+ if len(X_data) != len(y_arr):
73
+ raise ValueError("X and y must have the same length.")
74
+ if max_rows and len(X_data) > max_rows:
75
+ idx = rng.choice(len(X_data), size=int(max_rows), replace=False)
76
+ X_data = X_data.iloc[idx].copy()
77
+ y_arr = y_arr[idx]
78
+ if w_arr is not None:
79
+ w_arr = w_arr[idx]
80
+ return X_data, y_arr, w_arr
81
+
82
+ X_np = np.asarray(X)
83
+ if X_np.ndim != 2:
84
+ raise ValueError("X must be 2d when not a DataFrame.")
85
+ if X_np.shape[0] != y_arr.shape[0]:
86
+ raise ValueError("X and y must have the same length.")
87
+ if max_rows and X_np.shape[0] > max_rows:
88
+ idx = rng.choice(X_np.shape[0], size=int(max_rows), replace=False)
89
+ X_np = X_np[idx]
90
+ y_arr = y_arr[idx]
91
+ if w_arr is not None:
92
+ w_arr = w_arr[idx]
93
+ return X_np, y_arr, w_arr
94
+
95
+
96
+ def permutation_importance(
97
+ predict_fn: Callable,
98
+ X,
99
+ y,
100
+ *,
101
+ sample_weight=None,
102
+ metric: str | Callable = "auto",
103
+ task_type: Optional[str] = None,
104
+ higher_is_better: Optional[bool] = None,
105
+ n_repeats: int = 5,
106
+ random_state: Optional[int] = None,
107
+ max_rows: Optional[int] = 5000,
108
+ features: Optional[Sequence[str]] = None,
109
+ return_scores: bool = False,
110
+ safe_copy: bool = False,
111
+ n_jobs: Optional[int] = None,
112
+ ) -> pd.DataFrame:
113
+ """Permutation importance on tabular data.
114
+
115
+ predict_fn should accept the same type as X (DataFrame or ndarray).
116
+ Set safe_copy=True if predict_fn mutates its input.
117
+ Set n_jobs to enable parallel processing across features (default: None = sequential).
118
+ """
119
+ rng = np.random.default_rng(random_state)
120
+ n_repeats = max(1, int(n_repeats))
121
+
122
+ X_data, y_arr, w_arr = _prepare_data(X, y, sample_weight, max_rows, rng)
123
+ metric_fn, higher_is_better, metric_name = resolve_metric(
124
+ metric, task_type=task_type, higher_is_better=higher_is_better
125
+ )
126
+
127
+ baseline_pred = predict_fn(X_data)
128
+ baseline_score = metric_fn(y_arr, baseline_pred, w_arr)
129
+
130
+ if isinstance(X_data, pd.DataFrame):
131
+ feature_names = list(X_data.columns)
132
+ if features is not None:
133
+ feature_names = [f for f in features if f in X_data.columns]
134
+
135
+ # Use parallel processing if n_jobs is specified
136
+ if n_jobs is not None and n_jobs != 1:
137
+ # Generate different random seeds for each feature to ensure reproducibility
138
+ seeds = [random_state + i if random_state is not None else None
139
+ for i in range(len(feature_names))]
140
+ results = Parallel(n_jobs=n_jobs, prefer="threads")(
141
+ delayed(_compute_feature_importance)(
142
+ feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
143
+ baseline_score, higher_is_better, n_repeats, seed,
144
+ metric_name, return_scores, is_dataframe=True
145
+ )
146
+ for feat, seed in zip(feature_names, seeds)
147
+ )
148
+ else:
149
+ # Sequential processing (original optimized version)
150
+ X_perm = X_data if not safe_copy else X_data.copy()
151
+ results = []
152
+ for feat in feature_names:
153
+ # Store original values directly without extra copy
154
+ orig_values = X_perm[feat].to_numpy(copy=False)
155
+ orig_copy = orig_values.copy() # Only copy the column, not the entire DataFrame
156
+ scores = []
157
+ for _ in range(n_repeats):
158
+ X_perm[feat] = rng.permutation(orig_copy)
159
+ pred = predict_fn(X_perm)
160
+ score = metric_fn(y_arr, pred, w_arr)
161
+ scores.append(float(score))
162
+ # Restore original column values
163
+ X_perm[feat] = orig_copy
164
+
165
+ scores_arr = np.asarray(scores, dtype=float)
166
+ if higher_is_better:
167
+ delta = baseline_score - scores_arr
168
+ else:
169
+ delta = scores_arr - baseline_score
170
+ entry = {
171
+ "feature": feat,
172
+ "importance_mean": float(np.mean(delta)),
173
+ "importance_std": float(np.std(delta)),
174
+ "baseline_score": float(baseline_score),
175
+ "permutation_score_mean": float(np.mean(scores_arr)),
176
+ "metric": metric_name,
177
+ }
178
+ if return_scores:
179
+ entry["permutation_scores"] = scores
180
+ results.append(entry)
181
+ else:
182
+ if features is not None:
183
+ if len(features) != X_data.shape[1]:
184
+ raise ValueError("features length must match X columns for ndarray input.")
185
+ feature_names = list(features)
186
+ else:
187
+ feature_names = [f"x{i}" for i in range(X_data.shape[1])]
188
+
189
+ X_base = np.asarray(X_data)
190
+
191
+ # Use parallel processing if n_jobs is specified
192
+ if n_jobs is not None and n_jobs != 1:
193
+ seeds = [random_state + i if random_state is not None else None
194
+ for i in range(len(feature_names))]
195
+ results = Parallel(n_jobs=n_jobs, prefer="threads")(
196
+ delayed(_compute_feature_importance)(
197
+ feat, X_base, y_arr, w_arr, predict_fn, metric_fn,
198
+ baseline_score, higher_is_better, n_repeats, seed,
199
+ metric_name, return_scores, is_dataframe=False, feat_idx=idx
200
+ )
201
+ for idx, (feat, seed) in enumerate(zip(feature_names, seeds))
202
+ )
203
+ else:
204
+ # Sequential processing
205
+ X_perm = X_base.copy()
206
+ results = []
207
+ for idx, feat in enumerate(feature_names):
208
+ orig_col = X_base[:, idx].copy()
209
+ scores = []
210
+ for _ in range(n_repeats):
211
+ X_perm[:, idx] = rng.permutation(orig_col)
212
+ pred_input = X_perm.copy() if safe_copy else X_perm
213
+ pred = predict_fn(pred_input)
214
+ score = metric_fn(y_arr, pred, w_arr)
215
+ scores.append(float(score))
216
+ X_perm[:, idx] = orig_col
217
+
218
+ scores_arr = np.asarray(scores, dtype=float)
219
+ if higher_is_better:
220
+ delta = baseline_score - scores_arr
221
+ else:
222
+ delta = scores_arr - baseline_score
223
+ entry = {
224
+ "feature": feat,
225
+ "importance_mean": float(np.mean(delta)),
226
+ "importance_std": float(np.std(delta)),
227
+ "baseline_score": float(baseline_score),
228
+ "permutation_score_mean": float(np.mean(scores_arr)),
229
+ "metric": metric_name,
230
+ }
231
+ if return_scores:
232
+ entry["permutation_scores"] = scores
233
+ results.append(entry)
234
+
235
+ df = pd.DataFrame(results)
236
+ df = df.sort_values(by="importance_mean", ascending=False).reset_index(drop=True)
237
+ return df
@@ -1,45 +1,49 @@
1
- from __future__ import annotations
2
-
3
- from .common import EPS, PlotStyle
4
- from .curves import (
1
+ from __future__ import annotations
2
+
3
+ from ins_pricing.modelling.plotting.common import EPS, PlotStyle
4
+ from ins_pricing.modelling.plotting.curves import (
5
5
  double_lift_table,
6
6
  lift_table,
7
- plot_calibration_curve,
8
- plot_conversion_lift,
9
- plot_double_lift_curve,
10
- plot_ks_curve,
11
- plot_lift_curve,
7
+ plot_calibration_curve,
8
+ plot_conversion_lift,
9
+ plot_double_lift_curve,
10
+ plot_ks_curve,
11
+ plot_lift_curve,
12
12
  plot_pr_curves,
13
13
  plot_roc_curves,
14
14
  )
15
- from .diagnostics import plot_loss_curve, plot_oneway
16
- from .geo import (
17
- plot_geo_contour,
18
- plot_geo_contour_on_map,
19
- plot_geo_heatmap,
20
- plot_geo_heatmap_on_map,
21
- )
22
- from .importance import plot_feature_importance, plot_shap_importance, shap_importance
23
-
24
- __all__ = [
25
- "EPS",
26
- "PlotStyle",
27
- "double_lift_table",
28
- "lift_table",
29
- "plot_calibration_curve",
30
- "plot_conversion_lift",
31
- "plot_double_lift_curve",
32
- "plot_feature_importance",
33
- "plot_geo_contour",
34
- "plot_geo_contour_on_map",
35
- "plot_geo_heatmap",
36
- "plot_geo_heatmap_on_map",
37
- "plot_ks_curve",
38
- "plot_lift_curve",
39
- "plot_loss_curve",
40
- "plot_oneway",
41
- "plot_pr_curves",
15
+ from ins_pricing.modelling.plotting.compat import PlotUtils, plot_dlift_list, plot_lift_list
16
+ from ins_pricing.modelling.plotting.diagnostics import plot_loss_curve, plot_oneway
17
+ from ins_pricing.modelling.plotting.geo import (
18
+ plot_geo_contour,
19
+ plot_geo_contour_on_map,
20
+ plot_geo_heatmap,
21
+ plot_geo_heatmap_on_map,
22
+ )
23
+ from ins_pricing.modelling.plotting.importance import plot_feature_importance, plot_shap_importance, shap_importance
24
+
25
+ __all__ = [
26
+ "EPS",
27
+ "PlotStyle",
28
+ "double_lift_table",
29
+ "lift_table",
30
+ "plot_calibration_curve",
31
+ "plot_conversion_lift",
32
+ "plot_double_lift_curve",
33
+ "plot_feature_importance",
34
+ "plot_geo_contour",
35
+ "plot_geo_contour_on_map",
36
+ "plot_geo_heatmap",
37
+ "plot_geo_heatmap_on_map",
38
+ "plot_ks_curve",
39
+ "plot_lift_curve",
40
+ "plot_loss_curve",
41
+ "plot_oneway",
42
+ "plot_pr_curves",
42
43
  "plot_roc_curves",
43
44
  "plot_shap_importance",
44
45
  "shap_importance",
46
+ "PlotUtils",
47
+ "plot_lift_list",
48
+ "plot_dlift_list",
45
49
  ]