ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +58 -46
  4. ins_pricing/cli/BayesOpt_incremental.py +77 -110
  5. ins_pricing/cli/Explain_Run.py +42 -23
  6. ins_pricing/cli/Explain_entry.py +551 -577
  7. ins_pricing/cli/Pricing_Run.py +42 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +51 -16
  9. ins_pricing/cli/utils/bootstrap.py +23 -0
  10. ins_pricing/cli/utils/cli_common.py +256 -256
  11. ins_pricing/cli/utils/cli_config.py +379 -360
  12. ins_pricing/cli/utils/import_resolver.py +375 -358
  13. ins_pricing/cli/utils/notebook_utils.py +256 -242
  14. ins_pricing/cli/watchdog_run.py +216 -198
  15. ins_pricing/frontend/__init__.py +10 -10
  16. ins_pricing/frontend/app.py +132 -61
  17. ins_pricing/frontend/config_builder.py +33 -0
  18. ins_pricing/frontend/example_config.json +11 -0
  19. ins_pricing/frontend/example_workflows.py +1 -1
  20. ins_pricing/frontend/runner.py +340 -388
  21. ins_pricing/governance/__init__.py +20 -20
  22. ins_pricing/governance/release.py +159 -159
  23. ins_pricing/modelling/README.md +1 -1
  24. ins_pricing/modelling/__init__.py +147 -92
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
  29. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
  36. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
  37. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  38. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
  39. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
  40. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
  42. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
  43. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
  44. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  45. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  46. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
  47. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  48. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  49. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  50. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
  51. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  52. ins_pricing/modelling/explain/__init__.py +55 -55
  53. ins_pricing/modelling/explain/metrics.py +27 -174
  54. ins_pricing/modelling/explain/permutation.py +237 -237
  55. ins_pricing/modelling/plotting/__init__.py +40 -36
  56. ins_pricing/modelling/plotting/compat.py +228 -0
  57. ins_pricing/modelling/plotting/curves.py +572 -572
  58. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  59. ins_pricing/modelling/plotting/geo.py +362 -362
  60. ins_pricing/modelling/plotting/importance.py +121 -121
  61. ins_pricing/pricing/__init__.py +27 -27
  62. ins_pricing/pricing/factors.py +67 -56
  63. ins_pricing/production/__init__.py +35 -25
  64. ins_pricing/production/{predict.py → inference.py} +140 -57
  65. ins_pricing/production/monitoring.py +8 -21
  66. ins_pricing/reporting/__init__.py +11 -11
  67. ins_pricing/setup.py +1 -1
  68. ins_pricing/tests/production/test_inference.py +90 -0
  69. ins_pricing/utils/__init__.py +112 -78
  70. ins_pricing/utils/device.py +258 -237
  71. ins_pricing/utils/features.py +53 -0
  72. ins_pricing/utils/io.py +72 -0
  73. ins_pricing/utils/logging.py +34 -1
  74. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  75. ins_pricing/utils/metrics.py +158 -24
  76. ins_pricing/utils/numerics.py +76 -0
  77. ins_pricing/utils/paths.py +9 -1
  78. ins_pricing/utils/profiling.py +8 -4
  79. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
  80. ins_pricing-0.5.1.dist-info/RECORD +132 -0
  81. ins_pricing/modelling/core/BayesOpt.py +0 -146
  82. ins_pricing/modelling/core/__init__.py +0 -1
  83. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  84. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  85. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  86. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  87. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  88. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  89. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  90. ins_pricing/tests/production/test_predict.py +0 -233
  91. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  92. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
  93. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0
@@ -1,237 +1,237 @@
1
- from __future__ import annotations
2
-
3
- from typing import Callable, Optional, Sequence
4
-
5
- import numpy as np
6
- import pandas as pd
7
- from joblib import Parallel, delayed
8
-
9
- from .metrics import resolve_metric
10
-
11
-
12
- def _compute_feature_importance(
13
- feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
14
- baseline_score, higher_is_better, n_repeats, random_state, metric_name,
15
- return_scores, is_dataframe=True, feat_idx=None
16
- ):
17
- """Helper function to compute importance for a single feature (parallelizable)."""
18
- rng = np.random.default_rng(random_state)
19
-
20
- if is_dataframe:
21
- # Work on a copy for thread safety in parallel execution
22
- X_work = X_data.copy()
23
- orig_values = X_work[feat].to_numpy(copy=False).copy()
24
- scores = []
25
- for _ in range(n_repeats):
26
- X_work[feat] = rng.permutation(orig_values)
27
- pred = predict_fn(X_work)
28
- score = metric_fn(y_arr, pred, w_arr)
29
- scores.append(float(score))
30
- else:
31
- X_work = X_data.copy()
32
- orig_col = X_data[:, feat_idx].copy()
33
- scores = []
34
- for _ in range(n_repeats):
35
- X_work[:, feat_idx] = rng.permutation(orig_col)
36
- pred = predict_fn(X_work)
37
- score = metric_fn(y_arr, pred, w_arr)
38
- scores.append(float(score))
39
-
40
- scores_arr = np.asarray(scores, dtype=float)
41
- if higher_is_better:
42
- delta = baseline_score - scores_arr
43
- else:
44
- delta = scores_arr - baseline_score
45
-
46
- entry = {
47
- "feature": feat,
48
- "importance_mean": float(np.mean(delta)),
49
- "importance_std": float(np.std(delta)),
50
- "baseline_score": float(baseline_score),
51
- "permutation_score_mean": float(np.mean(scores_arr)),
52
- "metric": metric_name,
53
- }
54
- if return_scores:
55
- entry["permutation_scores"] = scores
56
- return entry
57
-
58
-
59
- def _prepare_data(X, y, sample_weight, max_rows, rng):
60
- y_arr = np.asarray(y)
61
- if y_arr.ndim != 1:
62
- y_arr = y_arr.reshape(-1)
63
-
64
- w_arr = None
65
- if sample_weight is not None:
66
- w_arr = np.asarray(sample_weight).reshape(-1)
67
- if w_arr.shape[0] != y_arr.shape[0]:
68
- raise ValueError("sample_weight length must match y.")
69
-
70
- if isinstance(X, pd.DataFrame):
71
- X_data = X
72
- if len(X_data) != len(y_arr):
73
- raise ValueError("X and y must have the same length.")
74
- if max_rows and len(X_data) > max_rows:
75
- idx = rng.choice(len(X_data), size=int(max_rows), replace=False)
76
- X_data = X_data.iloc[idx].copy()
77
- y_arr = y_arr[idx]
78
- if w_arr is not None:
79
- w_arr = w_arr[idx]
80
- return X_data, y_arr, w_arr
81
-
82
- X_np = np.asarray(X)
83
- if X_np.ndim != 2:
84
- raise ValueError("X must be 2d when not a DataFrame.")
85
- if X_np.shape[0] != y_arr.shape[0]:
86
- raise ValueError("X and y must have the same length.")
87
- if max_rows and X_np.shape[0] > max_rows:
88
- idx = rng.choice(X_np.shape[0], size=int(max_rows), replace=False)
89
- X_np = X_np[idx]
90
- y_arr = y_arr[idx]
91
- if w_arr is not None:
92
- w_arr = w_arr[idx]
93
- return X_np, y_arr, w_arr
94
-
95
-
96
- def permutation_importance(
97
- predict_fn: Callable,
98
- X,
99
- y,
100
- *,
101
- sample_weight=None,
102
- metric: str | Callable = "auto",
103
- task_type: Optional[str] = None,
104
- higher_is_better: Optional[bool] = None,
105
- n_repeats: int = 5,
106
- random_state: Optional[int] = None,
107
- max_rows: Optional[int] = 5000,
108
- features: Optional[Sequence[str]] = None,
109
- return_scores: bool = False,
110
- safe_copy: bool = False,
111
- n_jobs: Optional[int] = None,
112
- ) -> pd.DataFrame:
113
- """Permutation importance on tabular data.
114
-
115
- predict_fn should accept the same type as X (DataFrame or ndarray).
116
- Set safe_copy=True if predict_fn mutates its input.
117
- Set n_jobs to enable parallel processing across features (default: None = sequential).
118
- """
119
- rng = np.random.default_rng(random_state)
120
- n_repeats = max(1, int(n_repeats))
121
-
122
- X_data, y_arr, w_arr = _prepare_data(X, y, sample_weight, max_rows, rng)
123
- metric_fn, higher_is_better, metric_name = resolve_metric(
124
- metric, task_type=task_type, higher_is_better=higher_is_better
125
- )
126
-
127
- baseline_pred = predict_fn(X_data)
128
- baseline_score = metric_fn(y_arr, baseline_pred, w_arr)
129
-
130
- if isinstance(X_data, pd.DataFrame):
131
- feature_names = list(X_data.columns)
132
- if features is not None:
133
- feature_names = [f for f in features if f in X_data.columns]
134
-
135
- # Use parallel processing if n_jobs is specified
136
- if n_jobs is not None and n_jobs != 1:
137
- # Generate different random seeds for each feature to ensure reproducibility
138
- seeds = [random_state + i if random_state is not None else None
139
- for i in range(len(feature_names))]
140
- results = Parallel(n_jobs=n_jobs, prefer="threads")(
141
- delayed(_compute_feature_importance)(
142
- feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
143
- baseline_score, higher_is_better, n_repeats, seed,
144
- metric_name, return_scores, is_dataframe=True
145
- )
146
- for feat, seed in zip(feature_names, seeds)
147
- )
148
- else:
149
- # Sequential processing (original optimized version)
150
- X_perm = X_data if not safe_copy else X_data.copy()
151
- results = []
152
- for feat in feature_names:
153
- # Store original values directly without extra copy
154
- orig_values = X_perm[feat].to_numpy(copy=False)
155
- orig_copy = orig_values.copy() # Only copy the column, not the entire DataFrame
156
- scores = []
157
- for _ in range(n_repeats):
158
- X_perm[feat] = rng.permutation(orig_copy)
159
- pred = predict_fn(X_perm)
160
- score = metric_fn(y_arr, pred, w_arr)
161
- scores.append(float(score))
162
- # Restore original column values
163
- X_perm[feat] = orig_copy
164
-
165
- scores_arr = np.asarray(scores, dtype=float)
166
- if higher_is_better:
167
- delta = baseline_score - scores_arr
168
- else:
169
- delta = scores_arr - baseline_score
170
- entry = {
171
- "feature": feat,
172
- "importance_mean": float(np.mean(delta)),
173
- "importance_std": float(np.std(delta)),
174
- "baseline_score": float(baseline_score),
175
- "permutation_score_mean": float(np.mean(scores_arr)),
176
- "metric": metric_name,
177
- }
178
- if return_scores:
179
- entry["permutation_scores"] = scores
180
- results.append(entry)
181
- else:
182
- if features is not None:
183
- if len(features) != X_data.shape[1]:
184
- raise ValueError("features length must match X columns for ndarray input.")
185
- feature_names = list(features)
186
- else:
187
- feature_names = [f"x{i}" for i in range(X_data.shape[1])]
188
-
189
- X_base = np.asarray(X_data)
190
-
191
- # Use parallel processing if n_jobs is specified
192
- if n_jobs is not None and n_jobs != 1:
193
- seeds = [random_state + i if random_state is not None else None
194
- for i in range(len(feature_names))]
195
- results = Parallel(n_jobs=n_jobs, prefer="threads")(
196
- delayed(_compute_feature_importance)(
197
- feat, X_base, y_arr, w_arr, predict_fn, metric_fn,
198
- baseline_score, higher_is_better, n_repeats, seed,
199
- metric_name, return_scores, is_dataframe=False, feat_idx=idx
200
- )
201
- for idx, (feat, seed) in enumerate(zip(feature_names, seeds))
202
- )
203
- else:
204
- # Sequential processing
205
- X_perm = X_base.copy()
206
- results = []
207
- for idx, feat in enumerate(feature_names):
208
- orig_col = X_base[:, idx].copy()
209
- scores = []
210
- for _ in range(n_repeats):
211
- X_perm[:, idx] = rng.permutation(orig_col)
212
- pred_input = X_perm.copy() if safe_copy else X_perm
213
- pred = predict_fn(pred_input)
214
- score = metric_fn(y_arr, pred, w_arr)
215
- scores.append(float(score))
216
- X_perm[:, idx] = orig_col
217
-
218
- scores_arr = np.asarray(scores, dtype=float)
219
- if higher_is_better:
220
- delta = baseline_score - scores_arr
221
- else:
222
- delta = scores_arr - baseline_score
223
- entry = {
224
- "feature": feat,
225
- "importance_mean": float(np.mean(delta)),
226
- "importance_std": float(np.std(delta)),
227
- "baseline_score": float(baseline_score),
228
- "permutation_score_mean": float(np.mean(scores_arr)),
229
- "metric": metric_name,
230
- }
231
- if return_scores:
232
- entry["permutation_scores"] = scores
233
- results.append(entry)
234
-
235
- df = pd.DataFrame(results)
236
- df = df.sort_values(by="importance_mean", ascending=False).reset_index(drop=True)
237
- return df
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, Optional, Sequence
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from joblib import Parallel, delayed
8
+
9
+ from ins_pricing.modelling.explain.metrics import resolve_metric
10
+
11
+
12
+ def _compute_feature_importance(
13
+ feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
14
+ baseline_score, higher_is_better, n_repeats, random_state, metric_name,
15
+ return_scores, is_dataframe=True, feat_idx=None
16
+ ):
17
+ """Helper function to compute importance for a single feature (parallelizable)."""
18
+ rng = np.random.default_rng(random_state)
19
+
20
+ if is_dataframe:
21
+ # Work on a copy for thread safety in parallel execution
22
+ X_work = X_data.copy()
23
+ orig_values = X_work[feat].to_numpy(copy=False).copy()
24
+ scores = []
25
+ for _ in range(n_repeats):
26
+ X_work[feat] = rng.permutation(orig_values)
27
+ pred = predict_fn(X_work)
28
+ score = metric_fn(y_arr, pred, w_arr)
29
+ scores.append(float(score))
30
+ else:
31
+ X_work = X_data.copy()
32
+ orig_col = X_data[:, feat_idx].copy()
33
+ scores = []
34
+ for _ in range(n_repeats):
35
+ X_work[:, feat_idx] = rng.permutation(orig_col)
36
+ pred = predict_fn(X_work)
37
+ score = metric_fn(y_arr, pred, w_arr)
38
+ scores.append(float(score))
39
+
40
+ scores_arr = np.asarray(scores, dtype=float)
41
+ if higher_is_better:
42
+ delta = baseline_score - scores_arr
43
+ else:
44
+ delta = scores_arr - baseline_score
45
+
46
+ entry = {
47
+ "feature": feat,
48
+ "importance_mean": float(np.mean(delta)),
49
+ "importance_std": float(np.std(delta)),
50
+ "baseline_score": float(baseline_score),
51
+ "permutation_score_mean": float(np.mean(scores_arr)),
52
+ "metric": metric_name,
53
+ }
54
+ if return_scores:
55
+ entry["permutation_scores"] = scores
56
+ return entry
57
+
58
+
59
+ def _prepare_data(X, y, sample_weight, max_rows, rng):
60
+ y_arr = np.asarray(y)
61
+ if y_arr.ndim != 1:
62
+ y_arr = y_arr.reshape(-1)
63
+
64
+ w_arr = None
65
+ if sample_weight is not None:
66
+ w_arr = np.asarray(sample_weight).reshape(-1)
67
+ if w_arr.shape[0] != y_arr.shape[0]:
68
+ raise ValueError("sample_weight length must match y.")
69
+
70
+ if isinstance(X, pd.DataFrame):
71
+ X_data = X
72
+ if len(X_data) != len(y_arr):
73
+ raise ValueError("X and y must have the same length.")
74
+ if max_rows and len(X_data) > max_rows:
75
+ idx = rng.choice(len(X_data), size=int(max_rows), replace=False)
76
+ X_data = X_data.iloc[idx].copy()
77
+ y_arr = y_arr[idx]
78
+ if w_arr is not None:
79
+ w_arr = w_arr[idx]
80
+ return X_data, y_arr, w_arr
81
+
82
+ X_np = np.asarray(X)
83
+ if X_np.ndim != 2:
84
+ raise ValueError("X must be 2d when not a DataFrame.")
85
+ if X_np.shape[0] != y_arr.shape[0]:
86
+ raise ValueError("X and y must have the same length.")
87
+ if max_rows and X_np.shape[0] > max_rows:
88
+ idx = rng.choice(X_np.shape[0], size=int(max_rows), replace=False)
89
+ X_np = X_np[idx]
90
+ y_arr = y_arr[idx]
91
+ if w_arr is not None:
92
+ w_arr = w_arr[idx]
93
+ return X_np, y_arr, w_arr
94
+
95
+
96
+ def permutation_importance(
97
+ predict_fn: Callable,
98
+ X,
99
+ y,
100
+ *,
101
+ sample_weight=None,
102
+ metric: str | Callable = "auto",
103
+ task_type: Optional[str] = None,
104
+ higher_is_better: Optional[bool] = None,
105
+ n_repeats: int = 5,
106
+ random_state: Optional[int] = None,
107
+ max_rows: Optional[int] = 5000,
108
+ features: Optional[Sequence[str]] = None,
109
+ return_scores: bool = False,
110
+ safe_copy: bool = False,
111
+ n_jobs: Optional[int] = None,
112
+ ) -> pd.DataFrame:
113
+ """Permutation importance on tabular data.
114
+
115
+ predict_fn should accept the same type as X (DataFrame or ndarray).
116
+ Set safe_copy=True if predict_fn mutates its input.
117
+ Set n_jobs to enable parallel processing across features (default: None = sequential).
118
+ """
119
+ rng = np.random.default_rng(random_state)
120
+ n_repeats = max(1, int(n_repeats))
121
+
122
+ X_data, y_arr, w_arr = _prepare_data(X, y, sample_weight, max_rows, rng)
123
+ metric_fn, higher_is_better, metric_name = resolve_metric(
124
+ metric, task_type=task_type, higher_is_better=higher_is_better
125
+ )
126
+
127
+ baseline_pred = predict_fn(X_data)
128
+ baseline_score = metric_fn(y_arr, baseline_pred, w_arr)
129
+
130
+ if isinstance(X_data, pd.DataFrame):
131
+ feature_names = list(X_data.columns)
132
+ if features is not None:
133
+ feature_names = [f for f in features if f in X_data.columns]
134
+
135
+ # Use parallel processing if n_jobs is specified
136
+ if n_jobs is not None and n_jobs != 1:
137
+ # Generate different random seeds for each feature to ensure reproducibility
138
+ seeds = [random_state + i if random_state is not None else None
139
+ for i in range(len(feature_names))]
140
+ results = Parallel(n_jobs=n_jobs, prefer="threads")(
141
+ delayed(_compute_feature_importance)(
142
+ feat, X_data, y_arr, w_arr, predict_fn, metric_fn,
143
+ baseline_score, higher_is_better, n_repeats, seed,
144
+ metric_name, return_scores, is_dataframe=True
145
+ )
146
+ for feat, seed in zip(feature_names, seeds)
147
+ )
148
+ else:
149
+ # Sequential processing (original optimized version)
150
+ X_perm = X_data if not safe_copy else X_data.copy()
151
+ results = []
152
+ for feat in feature_names:
153
+ # Store original values directly without extra copy
154
+ orig_values = X_perm[feat].to_numpy(copy=False)
155
+ orig_copy = orig_values.copy() # Only copy the column, not the entire DataFrame
156
+ scores = []
157
+ for _ in range(n_repeats):
158
+ X_perm[feat] = rng.permutation(orig_copy)
159
+ pred = predict_fn(X_perm)
160
+ score = metric_fn(y_arr, pred, w_arr)
161
+ scores.append(float(score))
162
+ # Restore original column values
163
+ X_perm[feat] = orig_copy
164
+
165
+ scores_arr = np.asarray(scores, dtype=float)
166
+ if higher_is_better:
167
+ delta = baseline_score - scores_arr
168
+ else:
169
+ delta = scores_arr - baseline_score
170
+ entry = {
171
+ "feature": feat,
172
+ "importance_mean": float(np.mean(delta)),
173
+ "importance_std": float(np.std(delta)),
174
+ "baseline_score": float(baseline_score),
175
+ "permutation_score_mean": float(np.mean(scores_arr)),
176
+ "metric": metric_name,
177
+ }
178
+ if return_scores:
179
+ entry["permutation_scores"] = scores
180
+ results.append(entry)
181
+ else:
182
+ if features is not None:
183
+ if len(features) != X_data.shape[1]:
184
+ raise ValueError("features length must match X columns for ndarray input.")
185
+ feature_names = list(features)
186
+ else:
187
+ feature_names = [f"x{i}" for i in range(X_data.shape[1])]
188
+
189
+ X_base = np.asarray(X_data)
190
+
191
+ # Use parallel processing if n_jobs is specified
192
+ if n_jobs is not None and n_jobs != 1:
193
+ seeds = [random_state + i if random_state is not None else None
194
+ for i in range(len(feature_names))]
195
+ results = Parallel(n_jobs=n_jobs, prefer="threads")(
196
+ delayed(_compute_feature_importance)(
197
+ feat, X_base, y_arr, w_arr, predict_fn, metric_fn,
198
+ baseline_score, higher_is_better, n_repeats, seed,
199
+ metric_name, return_scores, is_dataframe=False, feat_idx=idx
200
+ )
201
+ for idx, (feat, seed) in enumerate(zip(feature_names, seeds))
202
+ )
203
+ else:
204
+ # Sequential processing
205
+ X_perm = X_base.copy()
206
+ results = []
207
+ for idx, feat in enumerate(feature_names):
208
+ orig_col = X_base[:, idx].copy()
209
+ scores = []
210
+ for _ in range(n_repeats):
211
+ X_perm[:, idx] = rng.permutation(orig_col)
212
+ pred_input = X_perm.copy() if safe_copy else X_perm
213
+ pred = predict_fn(pred_input)
214
+ score = metric_fn(y_arr, pred, w_arr)
215
+ scores.append(float(score))
216
+ X_perm[:, idx] = orig_col
217
+
218
+ scores_arr = np.asarray(scores, dtype=float)
219
+ if higher_is_better:
220
+ delta = baseline_score - scores_arr
221
+ else:
222
+ delta = scores_arr - baseline_score
223
+ entry = {
224
+ "feature": feat,
225
+ "importance_mean": float(np.mean(delta)),
226
+ "importance_std": float(np.std(delta)),
227
+ "baseline_score": float(baseline_score),
228
+ "permutation_score_mean": float(np.mean(scores_arr)),
229
+ "metric": metric_name,
230
+ }
231
+ if return_scores:
232
+ entry["permutation_scores"] = scores
233
+ results.append(entry)
234
+
235
+ df = pd.DataFrame(results)
236
+ df = df.sort_values(by="importance_mean", ascending=False).reset_index(drop=True)
237
+ return df
@@ -1,45 +1,49 @@
1
- from __future__ import annotations
2
-
3
- from .common import EPS, PlotStyle
4
- from .curves import (
1
+ from __future__ import annotations
2
+
3
+ from ins_pricing.modelling.plotting.common import EPS, PlotStyle
4
+ from ins_pricing.modelling.plotting.curves import (
5
5
  double_lift_table,
6
6
  lift_table,
7
- plot_calibration_curve,
8
- plot_conversion_lift,
9
- plot_double_lift_curve,
10
- plot_ks_curve,
11
- plot_lift_curve,
7
+ plot_calibration_curve,
8
+ plot_conversion_lift,
9
+ plot_double_lift_curve,
10
+ plot_ks_curve,
11
+ plot_lift_curve,
12
12
  plot_pr_curves,
13
13
  plot_roc_curves,
14
14
  )
15
- from .diagnostics import plot_loss_curve, plot_oneway
16
- from .geo import (
17
- plot_geo_contour,
18
- plot_geo_contour_on_map,
19
- plot_geo_heatmap,
20
- plot_geo_heatmap_on_map,
21
- )
22
- from .importance import plot_feature_importance, plot_shap_importance, shap_importance
23
-
24
- __all__ = [
25
- "EPS",
26
- "PlotStyle",
27
- "double_lift_table",
28
- "lift_table",
29
- "plot_calibration_curve",
30
- "plot_conversion_lift",
31
- "plot_double_lift_curve",
32
- "plot_feature_importance",
33
- "plot_geo_contour",
34
- "plot_geo_contour_on_map",
35
- "plot_geo_heatmap",
36
- "plot_geo_heatmap_on_map",
37
- "plot_ks_curve",
38
- "plot_lift_curve",
39
- "plot_loss_curve",
40
- "plot_oneway",
41
- "plot_pr_curves",
15
+ from ins_pricing.modelling.plotting.compat import PlotUtils, plot_dlift_list, plot_lift_list
16
+ from ins_pricing.modelling.plotting.diagnostics import plot_loss_curve, plot_oneway
17
+ from ins_pricing.modelling.plotting.geo import (
18
+ plot_geo_contour,
19
+ plot_geo_contour_on_map,
20
+ plot_geo_heatmap,
21
+ plot_geo_heatmap_on_map,
22
+ )
23
+ from ins_pricing.modelling.plotting.importance import plot_feature_importance, plot_shap_importance, shap_importance
24
+
25
+ __all__ = [
26
+ "EPS",
27
+ "PlotStyle",
28
+ "double_lift_table",
29
+ "lift_table",
30
+ "plot_calibration_curve",
31
+ "plot_conversion_lift",
32
+ "plot_double_lift_curve",
33
+ "plot_feature_importance",
34
+ "plot_geo_contour",
35
+ "plot_geo_contour_on_map",
36
+ "plot_geo_heatmap",
37
+ "plot_geo_heatmap_on_map",
38
+ "plot_ks_curve",
39
+ "plot_lift_curve",
40
+ "plot_loss_curve",
41
+ "plot_oneway",
42
+ "plot_pr_curves",
42
43
  "plot_roc_curves",
43
44
  "plot_shap_importance",
44
45
  "shap_importance",
46
+ "PlotUtils",
47
+ "plot_lift_list",
48
+ "plot_dlift_list",
45
49
  ]