ins-pricing 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. ins_pricing/README.md +60 -0
  2. ins_pricing/__init__.py +102 -0
  3. ins_pricing/governance/README.md +18 -0
  4. ins_pricing/governance/__init__.py +20 -0
  5. ins_pricing/governance/approval.py +93 -0
  6. ins_pricing/governance/audit.py +37 -0
  7. ins_pricing/governance/registry.py +99 -0
  8. ins_pricing/governance/release.py +159 -0
  9. ins_pricing/modelling/BayesOpt.py +146 -0
  10. ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
  11. ins_pricing/modelling/BayesOpt_entry.py +575 -0
  12. ins_pricing/modelling/BayesOpt_incremental.py +731 -0
  13. ins_pricing/modelling/Explain_Run.py +36 -0
  14. ins_pricing/modelling/Explain_entry.py +539 -0
  15. ins_pricing/modelling/Pricing_Run.py +36 -0
  16. ins_pricing/modelling/README.md +33 -0
  17. ins_pricing/modelling/__init__.py +44 -0
  18. ins_pricing/modelling/bayesopt/__init__.py +98 -0
  19. ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
  20. ins_pricing/modelling/bayesopt/core.py +1476 -0
  21. ins_pricing/modelling/bayesopt/models.py +2196 -0
  22. ins_pricing/modelling/bayesopt/trainers.py +2446 -0
  23. ins_pricing/modelling/bayesopt/utils.py +1021 -0
  24. ins_pricing/modelling/cli_common.py +136 -0
  25. ins_pricing/modelling/explain/__init__.py +55 -0
  26. ins_pricing/modelling/explain/gradients.py +334 -0
  27. ins_pricing/modelling/explain/metrics.py +176 -0
  28. ins_pricing/modelling/explain/permutation.py +155 -0
  29. ins_pricing/modelling/explain/shap_utils.py +146 -0
  30. ins_pricing/modelling/notebook_utils.py +284 -0
  31. ins_pricing/modelling/plotting/__init__.py +45 -0
  32. ins_pricing/modelling/plotting/common.py +63 -0
  33. ins_pricing/modelling/plotting/curves.py +572 -0
  34. ins_pricing/modelling/plotting/diagnostics.py +139 -0
  35. ins_pricing/modelling/plotting/geo.py +362 -0
  36. ins_pricing/modelling/plotting/importance.py +121 -0
  37. ins_pricing/modelling/run_logging.py +133 -0
  38. ins_pricing/modelling/tests/conftest.py +8 -0
  39. ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
  40. ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
  41. ins_pricing/modelling/tests/test_explain.py +56 -0
  42. ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
  43. ins_pricing/modelling/tests/test_graph_cache.py +33 -0
  44. ins_pricing/modelling/tests/test_plotting.py +63 -0
  45. ins_pricing/modelling/tests/test_plotting_library.py +150 -0
  46. ins_pricing/modelling/tests/test_preprocessor.py +48 -0
  47. ins_pricing/modelling/watchdog_run.py +211 -0
  48. ins_pricing/pricing/README.md +44 -0
  49. ins_pricing/pricing/__init__.py +27 -0
  50. ins_pricing/pricing/calibration.py +39 -0
  51. ins_pricing/pricing/data_quality.py +117 -0
  52. ins_pricing/pricing/exposure.py +85 -0
  53. ins_pricing/pricing/factors.py +91 -0
  54. ins_pricing/pricing/monitoring.py +99 -0
  55. ins_pricing/pricing/rate_table.py +78 -0
  56. ins_pricing/production/__init__.py +21 -0
  57. ins_pricing/production/drift.py +30 -0
  58. ins_pricing/production/monitoring.py +143 -0
  59. ins_pricing/production/scoring.py +40 -0
  60. ins_pricing/reporting/README.md +20 -0
  61. ins_pricing/reporting/__init__.py +11 -0
  62. ins_pricing/reporting/report_builder.py +72 -0
  63. ins_pricing/reporting/scheduler.py +45 -0
  64. ins_pricing/setup.py +41 -0
  65. ins_pricing v2/__init__.py +23 -0
  66. ins_pricing v2/governance/__init__.py +20 -0
  67. ins_pricing v2/governance/approval.py +93 -0
  68. ins_pricing v2/governance/audit.py +37 -0
  69. ins_pricing v2/governance/registry.py +99 -0
  70. ins_pricing v2/governance/release.py +159 -0
  71. ins_pricing v2/modelling/Explain_Run.py +36 -0
  72. ins_pricing v2/modelling/Pricing_Run.py +36 -0
  73. ins_pricing v2/modelling/__init__.py +151 -0
  74. ins_pricing v2/modelling/cli_common.py +141 -0
  75. ins_pricing v2/modelling/config.py +249 -0
  76. ins_pricing v2/modelling/config_preprocess.py +254 -0
  77. ins_pricing v2/modelling/core.py +741 -0
  78. ins_pricing v2/modelling/data_container.py +42 -0
  79. ins_pricing v2/modelling/explain/__init__.py +55 -0
  80. ins_pricing v2/modelling/explain/gradients.py +334 -0
  81. ins_pricing v2/modelling/explain/metrics.py +176 -0
  82. ins_pricing v2/modelling/explain/permutation.py +155 -0
  83. ins_pricing v2/modelling/explain/shap_utils.py +146 -0
  84. ins_pricing v2/modelling/features.py +215 -0
  85. ins_pricing v2/modelling/model_manager.py +148 -0
  86. ins_pricing v2/modelling/model_plotting.py +463 -0
  87. ins_pricing v2/modelling/models.py +2203 -0
  88. ins_pricing v2/modelling/notebook_utils.py +294 -0
  89. ins_pricing v2/modelling/plotting/__init__.py +45 -0
  90. ins_pricing v2/modelling/plotting/common.py +63 -0
  91. ins_pricing v2/modelling/plotting/curves.py +572 -0
  92. ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
  93. ins_pricing v2/modelling/plotting/geo.py +362 -0
  94. ins_pricing v2/modelling/plotting/importance.py +121 -0
  95. ins_pricing v2/modelling/run_logging.py +133 -0
  96. ins_pricing v2/modelling/tests/conftest.py +8 -0
  97. ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
  98. ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
  99. ins_pricing v2/modelling/tests/test_explain.py +56 -0
  100. ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
  101. ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
  102. ins_pricing v2/modelling/tests/test_plotting.py +63 -0
  103. ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
  104. ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
  105. ins_pricing v2/modelling/trainers.py +2447 -0
  106. ins_pricing v2/modelling/utils.py +1020 -0
  107. ins_pricing v2/modelling/watchdog_run.py +211 -0
  108. ins_pricing v2/pricing/__init__.py +27 -0
  109. ins_pricing v2/pricing/calibration.py +39 -0
  110. ins_pricing v2/pricing/data_quality.py +117 -0
  111. ins_pricing v2/pricing/exposure.py +85 -0
  112. ins_pricing v2/pricing/factors.py +91 -0
  113. ins_pricing v2/pricing/monitoring.py +99 -0
  114. ins_pricing v2/pricing/rate_table.py +78 -0
  115. ins_pricing v2/production/__init__.py +21 -0
  116. ins_pricing v2/production/drift.py +30 -0
  117. ins_pricing v2/production/monitoring.py +143 -0
  118. ins_pricing v2/production/scoring.py +40 -0
  119. ins_pricing v2/reporting/__init__.py +11 -0
  120. ins_pricing v2/reporting/report_builder.py +72 -0
  121. ins_pricing v2/reporting/scheduler.py +45 -0
  122. ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
  123. ins_pricing v2/scripts/Explain_entry.py +545 -0
  124. ins_pricing v2/scripts/__init__.py +1 -0
  125. ins_pricing v2/scripts/train.py +568 -0
  126. ins_pricing v2/setup.py +55 -0
  127. ins_pricing v2/smoke_test.py +28 -0
  128. ins_pricing-0.1.6.dist-info/METADATA +78 -0
  129. ins_pricing-0.1.6.dist-info/RECORD +169 -0
  130. ins_pricing-0.1.6.dist-info/WHEEL +5 -0
  131. ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
  132. user_packages/__init__.py +105 -0
  133. user_packages legacy/BayesOpt.py +5659 -0
  134. user_packages legacy/BayesOpt_entry.py +513 -0
  135. user_packages legacy/BayesOpt_incremental.py +685 -0
  136. user_packages legacy/Pricing_Run.py +36 -0
  137. user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
  138. user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
  139. user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
  140. user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
  141. user_packages legacy/Try/BayesOpt legacy.py +3280 -0
  142. user_packages legacy/Try/BayesOpt.py +838 -0
  143. user_packages legacy/Try/BayesOptAll.py +1569 -0
  144. user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
  145. user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
  146. user_packages legacy/Try/BayesOptSearch.py +830 -0
  147. user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
  148. user_packages legacy/Try/BayesOptV1.py +1911 -0
  149. user_packages legacy/Try/BayesOptV10.py +2973 -0
  150. user_packages legacy/Try/BayesOptV11.py +3001 -0
  151. user_packages legacy/Try/BayesOptV12.py +3001 -0
  152. user_packages legacy/Try/BayesOptV2.py +2065 -0
  153. user_packages legacy/Try/BayesOptV3.py +2209 -0
  154. user_packages legacy/Try/BayesOptV4.py +2342 -0
  155. user_packages legacy/Try/BayesOptV5.py +2372 -0
  156. user_packages legacy/Try/BayesOptV6.py +2759 -0
  157. user_packages legacy/Try/BayesOptV7.py +2832 -0
  158. user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
  159. user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
  160. user_packages legacy/Try/BayesOptV9.py +2927 -0
  161. user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
  162. user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
  163. user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
  164. user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
  165. user_packages legacy/Try/xgbbayesopt.py +523 -0
  166. user_packages legacy/__init__.py +19 -0
  167. user_packages legacy/cli_common.py +124 -0
  168. user_packages legacy/notebook_utils.py +228 -0
  169. user_packages legacy/watchdog_run.py +202 -0
@@ -0,0 +1,362 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Sequence, Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import matplotlib.tri as mtri
8
+
9
+ from .common import EPS, PlotStyle, finalize_figure, plt
10
+
11
+ try: # optional map basemap support
12
+ import contextily as cx
13
+ except Exception: # pragma: no cover - optional dependency
14
+ cx = None
15
+
16
+
17
+ _MERCATOR_MAX_LAT = 85.05112878
18
+ _MERCATOR_FACTOR = 20037508.34
19
+
20
+
21
+ def _require_contextily(func_name: str) -> None:
22
+ if cx is None:
23
+ raise RuntimeError(
24
+ f"{func_name} requires contextily. Install it via 'pip install contextily'."
25
+ )
26
+
27
+
28
+ def _lonlat_to_mercator(lon: np.ndarray, lat: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
29
+ lon = np.asarray(lon, dtype=float)
30
+ lat = np.asarray(lat, dtype=float)
31
+ lat = np.clip(lat, -_MERCATOR_MAX_LAT, _MERCATOR_MAX_LAT)
32
+ x = lon * _MERCATOR_FACTOR / 180.0
33
+ y = np.log(np.tan((90.0 + lat) * np.pi / 360.0)) * _MERCATOR_FACTOR / np.pi
34
+ return x, y
35
+
36
+
37
+ def _apply_bounds(ax: plt.Axes, x: np.ndarray, y: np.ndarray, padding: float) -> None:
38
+ x_min, x_max = float(np.min(x)), float(np.max(x))
39
+ y_min, y_max = float(np.min(y)), float(np.max(y))
40
+ pad_x = (x_max - x_min) * padding
41
+ pad_y = (y_max - y_min) * padding
42
+ if pad_x == 0:
43
+ pad_x = 1.0
44
+ if pad_y == 0:
45
+ pad_y = 1.0
46
+ ax.set_xlim(x_min - pad_x, x_max + pad_x)
47
+ ax.set_ylim(y_min - pad_y, y_max + pad_y)
48
+
49
+
50
+ def _resolve_basemap(source):
51
+ if cx is None or source is None:
52
+ return source
53
+ if isinstance(source, str):
54
+ provider = cx.providers
55
+ for part in source.split("."):
56
+ if isinstance(provider, dict):
57
+ provider = provider[part]
58
+ else:
59
+ provider = getattr(provider, part)
60
+ return provider
61
+ return source
62
+
63
+
64
+ def _sanitize_geo(
65
+ df: pd.DataFrame,
66
+ x_col: str,
67
+ y_col: str,
68
+ value_col: str,
69
+ weight_col: Optional[str] = None,
70
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
71
+ x = pd.to_numeric(df[x_col], errors="coerce").to_numpy(dtype=float)
72
+ y = pd.to_numeric(df[y_col], errors="coerce").to_numpy(dtype=float)
73
+ z = pd.to_numeric(df[value_col], errors="coerce").to_numpy(dtype=float)
74
+ w = None
75
+ if weight_col:
76
+ w = pd.to_numeric(df[weight_col], errors="coerce").to_numpy(dtype=float)
77
+
78
+ if w is None:
79
+ mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z)
80
+ else:
81
+ mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z) & np.isfinite(w)
82
+ w = w[mask]
83
+ return x[mask], y[mask], z[mask], w
84
+
85
+
86
+ def _downsample_points(
87
+ x: np.ndarray,
88
+ y: np.ndarray,
89
+ z: np.ndarray,
90
+ w: Optional[np.ndarray],
91
+ max_points: Optional[int],
92
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
93
+ if max_points is None:
94
+ return x, y, z, w
95
+ max_points = int(max_points)
96
+ if max_points <= 0 or len(x) <= max_points:
97
+ return x, y, z, w
98
+ rng = np.random.default_rng(13)
99
+ idx = rng.choice(len(x), size=max_points, replace=False)
100
+ if w is None:
101
+ return x[idx], y[idx], z[idx], None
102
+ return x[idx], y[idx], z[idx], w[idx]
103
+
104
+
105
+ def plot_geo_heatmap(
106
+ df: pd.DataFrame,
107
+ *,
108
+ x_col: str,
109
+ y_col: str,
110
+ value_col: str,
111
+ weight_col: Optional[str] = None,
112
+ bins: int | Tuple[int, int] = 50,
113
+ agg: str = "mean",
114
+ cmap: str = "YlOrRd",
115
+ title: str = "Geo Heatmap",
116
+ ax: Optional[plt.Axes] = None,
117
+ show: bool = False,
118
+ save_path: Optional[str] = None,
119
+ style: Optional[PlotStyle] = None,
120
+ ) -> plt.Figure:
121
+ style = style or PlotStyle()
122
+ if agg not in {"mean", "sum"}:
123
+ raise ValueError("agg must be 'mean' or 'sum'.")
124
+ x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
125
+
126
+ if isinstance(bins, int):
127
+ bins = (bins, bins)
128
+
129
+ if w is None:
130
+ sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
131
+ if agg == "sum":
132
+ grid = sum_z
133
+ else:
134
+ count, _, _ = np.histogram2d(x, y, bins=bins)
135
+ grid = sum_z / np.maximum(count, 1.0)
136
+ else:
137
+ sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
138
+ sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
139
+ grid = sum_zw / np.maximum(sum_w, EPS)
140
+
141
+ created_fig = ax is None
142
+ if created_fig:
143
+ fig, ax = plt.subplots(figsize=style.figsize)
144
+ else:
145
+ fig = ax.figure
146
+
147
+ im = ax.imshow(
148
+ grid.T,
149
+ origin="lower",
150
+ extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
151
+ aspect="auto",
152
+ cmap=cmap,
153
+ )
154
+ cbar = fig.colorbar(im, ax=ax)
155
+ cbar.set_label(value_col, fontsize=style.label_size)
156
+ cbar.ax.tick_params(labelsize=style.tick_size)
157
+
158
+ ax.set_xlabel(x_col, fontsize=style.label_size)
159
+ ax.set_ylabel(y_col, fontsize=style.label_size)
160
+ ax.set_title(title, fontsize=style.title_size)
161
+ ax.tick_params(axis="both", labelsize=style.tick_size)
162
+
163
+ if created_fig:
164
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
165
+
166
+ return fig
167
+
168
+
169
+ def plot_geo_contour(
170
+ df: pd.DataFrame,
171
+ *,
172
+ x_col: str,
173
+ y_col: str,
174
+ value_col: str,
175
+ weight_col: Optional[str] = None,
176
+ max_points: Optional[int] = None,
177
+ levels: int | Sequence[float] = 10,
178
+ cmap: str = "viridis",
179
+ title: str = "Geo Contour",
180
+ ax: Optional[plt.Axes] = None,
181
+ show_points: bool = False,
182
+ show: bool = False,
183
+ save_path: Optional[str] = None,
184
+ style: Optional[PlotStyle] = None,
185
+ ) -> plt.Figure:
186
+ style = style or PlotStyle()
187
+ x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
188
+ x, y, z, w = _downsample_points(x, y, z, w, max_points)
189
+
190
+ if w is not None:
191
+ z = z * w
192
+
193
+ triang = mtri.Triangulation(x, y)
194
+
195
+ created_fig = ax is None
196
+ if created_fig:
197
+ fig, ax = plt.subplots(figsize=style.figsize)
198
+ else:
199
+ fig = ax.figure
200
+
201
+ contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap)
202
+ if show_points:
203
+ ax.scatter(x, y, s=6, c="k", alpha=0.2)
204
+ cbar = fig.colorbar(contour, ax=ax)
205
+ cbar.set_label(value_col, fontsize=style.label_size)
206
+ cbar.ax.tick_params(labelsize=style.tick_size)
207
+
208
+ ax.set_xlabel(x_col, fontsize=style.label_size)
209
+ ax.set_ylabel(y_col, fontsize=style.label_size)
210
+ ax.set_title(title, fontsize=style.title_size)
211
+ ax.tick_params(axis="both", labelsize=style.tick_size)
212
+
213
+ if created_fig:
214
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
215
+
216
+ return fig
217
+
218
+
219
+ def plot_geo_heatmap_on_map(
220
+ df: pd.DataFrame,
221
+ *,
222
+ lon_col: str,
223
+ lat_col: str,
224
+ value_col: str,
225
+ weight_col: Optional[str] = None,
226
+ bins: int | Tuple[int, int] = 100,
227
+ agg: str = "mean",
228
+ cmap: str = "YlOrRd",
229
+ alpha: float = 0.6,
230
+ basemap: Optional[object] = "CartoDB.Positron",
231
+ zoom: Optional[int] = None,
232
+ padding: float = 0.05,
233
+ title: str = "Geo Heatmap (Map)",
234
+ ax: Optional[plt.Axes] = None,
235
+ show_points: bool = False,
236
+ show: bool = False,
237
+ save_path: Optional[str] = None,
238
+ style: Optional[PlotStyle] = None,
239
+ ) -> plt.Figure:
240
+ _require_contextily("plot_geo_heatmap_on_map")
241
+ style = style or PlotStyle()
242
+ if agg not in {"mean", "sum"}:
243
+ raise ValueError("agg must be 'mean' or 'sum'.")
244
+ lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
245
+ x, y = _lonlat_to_mercator(lon, lat)
246
+
247
+ if isinstance(bins, int):
248
+ bins = (bins, bins)
249
+
250
+ if w is None:
251
+ sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
252
+ if agg == "sum":
253
+ grid = sum_z
254
+ else:
255
+ count, _, _ = np.histogram2d(x, y, bins=bins)
256
+ grid = sum_z / np.maximum(count, 1.0)
257
+ else:
258
+ sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
259
+ sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
260
+ grid = sum_zw / np.maximum(sum_w, EPS)
261
+
262
+ created_fig = ax is None
263
+ if created_fig:
264
+ fig, ax = plt.subplots(figsize=style.figsize)
265
+ else:
266
+ fig = ax.figure
267
+
268
+ _apply_bounds(ax, x, y, padding)
269
+ ax.set_aspect("equal", adjustable="box")
270
+
271
+ source = _resolve_basemap(basemap)
272
+ if source is not None:
273
+ if zoom is None:
274
+ cx.add_basemap(ax, source=source, crs="EPSG:3857")
275
+ else:
276
+ cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
277
+
278
+ im = ax.imshow(
279
+ grid.T,
280
+ origin="lower",
281
+ extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
282
+ aspect="auto",
283
+ cmap=cmap,
284
+ alpha=alpha,
285
+ )
286
+ if show_points:
287
+ ax.scatter(x, y, s=6, c="k", alpha=0.25)
288
+
289
+ cbar = fig.colorbar(im, ax=ax)
290
+ cbar.set_label(value_col, fontsize=style.label_size)
291
+ cbar.ax.tick_params(labelsize=style.tick_size)
292
+
293
+ ax.set_title(title, fontsize=style.title_size)
294
+ ax.tick_params(axis="both", labelsize=style.tick_size)
295
+
296
+ if created_fig:
297
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
298
+
299
+ return fig
300
+
301
+
302
+ def plot_geo_contour_on_map(
303
+ df: pd.DataFrame,
304
+ *,
305
+ lon_col: str,
306
+ lat_col: str,
307
+ value_col: str,
308
+ weight_col: Optional[str] = None,
309
+ max_points: Optional[int] = None,
310
+ levels: int | Sequence[float] = 10,
311
+ cmap: str = "viridis",
312
+ alpha: float = 0.6,
313
+ basemap: Optional[object] = "CartoDB.Positron",
314
+ zoom: Optional[int] = None,
315
+ padding: float = 0.05,
316
+ title: str = "Geo Contour (Map)",
317
+ ax: Optional[plt.Axes] = None,
318
+ show_points: bool = False,
319
+ show: bool = False,
320
+ save_path: Optional[str] = None,
321
+ style: Optional[PlotStyle] = None,
322
+ ) -> plt.Figure:
323
+ _require_contextily("plot_geo_contour_on_map")
324
+ style = style or PlotStyle()
325
+ lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
326
+ lon, lat, z, w = _downsample_points(lon, lat, z, w, max_points)
327
+ x, y = _lonlat_to_mercator(lon, lat)
328
+ if w is not None:
329
+ z = z * w
330
+
331
+ created_fig = ax is None
332
+ if created_fig:
333
+ fig, ax = plt.subplots(figsize=style.figsize)
334
+ else:
335
+ fig = ax.figure
336
+
337
+ _apply_bounds(ax, x, y, padding)
338
+ ax.set_aspect("equal", adjustable="box")
339
+
340
+ source = _resolve_basemap(basemap)
341
+ if source is not None:
342
+ if zoom is None:
343
+ cx.add_basemap(ax, source=source, crs="EPSG:3857")
344
+ else:
345
+ cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
346
+
347
+ triang = mtri.Triangulation(x, y)
348
+ contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap, alpha=alpha)
349
+ if show_points:
350
+ ax.scatter(x, y, s=6, c="k", alpha=0.25)
351
+
352
+ cbar = fig.colorbar(contour, ax=ax)
353
+ cbar.set_label(value_col, fontsize=style.label_size)
354
+ cbar.ax.tick_params(labelsize=style.tick_size)
355
+
356
+ ax.set_title(title, fontsize=style.title_size)
357
+ ax.tick_params(axis="both", labelsize=style.tick_size)
358
+
359
+ if created_fig:
360
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
361
+
362
+ return fig
@@ -0,0 +1,121 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Mapping, Optional, Sequence, Tuple
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from .common import PlotStyle, finalize_figure, plt
9
+
10
+
11
+ def _to_series(
12
+ importance: Mapping[str, float]
13
+ | Sequence[Tuple[str, float]]
14
+ | pd.Series
15
+ | np.ndarray,
16
+ feature_names: Optional[Sequence[str]] = None,
17
+ ) -> pd.Series:
18
+ if isinstance(importance, pd.Series):
19
+ return importance.copy()
20
+ if isinstance(importance, Mapping):
21
+ return pd.Series(dict(importance))
22
+ if isinstance(importance, np.ndarray):
23
+ if feature_names is None:
24
+ raise ValueError("feature_names is required when importance is an array.")
25
+ return pd.Series(importance, index=list(feature_names))
26
+ return pd.Series(dict(importance))
27
+
28
+
29
+ def shap_importance(
30
+ shap_values: np.ndarray,
31
+ feature_names: Sequence[str],
32
+ ) -> pd.Series:
33
+ if shap_values.ndim == 3:
34
+ shap_values = shap_values[0]
35
+ if shap_values.ndim != 2:
36
+ raise ValueError("shap_values should be 2d (n_samples, n_features).")
37
+ scores = np.abs(shap_values).mean(axis=0)
38
+ return pd.Series(scores, index=list(feature_names))
39
+
40
+
41
+ def plot_feature_importance(
42
+ importance: Mapping[str, float]
43
+ | Sequence[Tuple[str, float]]
44
+ | pd.Series
45
+ | np.ndarray,
46
+ *,
47
+ feature_names: Optional[Sequence[str]] = None,
48
+ top_n: int = 30,
49
+ title: str = "Feature Importance",
50
+ sort_by: str = "abs",
51
+ descending: bool = True,
52
+ show_values: bool = False,
53
+ ax: Optional[plt.Axes] = None,
54
+ show: bool = False,
55
+ save_path: Optional[str] = None,
56
+ style: Optional[PlotStyle] = None,
57
+ ) -> plt.Figure:
58
+ style = style or PlotStyle()
59
+ series = _to_series(importance, feature_names=feature_names)
60
+ series = series.replace([np.inf, -np.inf], np.nan).dropna()
61
+
62
+ if sort_by not in {"abs", "value"}:
63
+ raise ValueError("sort_by must be 'abs' or 'value'.")
64
+ sort_key = series.abs() if sort_by == "abs" else series
65
+ series = series.loc[sort_key.sort_values(ascending=not descending).index]
66
+
67
+ if top_n > 0:
68
+ series = series.head(int(top_n))
69
+
70
+ created_fig = ax is None
71
+ if created_fig:
72
+ height = max(3.0, 0.3 * len(series))
73
+ fig, ax = plt.subplots(figsize=(style.figsize[0], height))
74
+ else:
75
+ fig = ax.figure
76
+
77
+ y_pos = np.arange(len(series))
78
+ ax.barh(y_pos, series.values, color=style.palette[0])
79
+ ax.set_yticks(y_pos)
80
+ ax.set_yticklabels(series.index, fontsize=style.tick_size)
81
+ ax.invert_yaxis()
82
+ ax.set_title(title, fontsize=style.title_size)
83
+ ax.tick_params(axis="x", labelsize=style.tick_size)
84
+ if style.grid:
85
+ ax.grid(True, axis="x", linestyle=style.grid_style, alpha=style.grid_alpha)
86
+
87
+ if show_values:
88
+ for idx, val in enumerate(series.values):
89
+ ax.text(val, idx, f" {val:.3f}", va="center", fontsize=style.tick_size)
90
+
91
+ if created_fig:
92
+ finalize_figure(fig, save_path=save_path, show=show, style=style)
93
+
94
+ return fig
95
+
96
+
97
+ def plot_shap_importance(
98
+ shap_values: np.ndarray,
99
+ feature_names: Sequence[str],
100
+ *,
101
+ top_n: int = 30,
102
+ title: str = "SHAP Importance",
103
+ show_values: bool = False,
104
+ ax: Optional[plt.Axes] = None,
105
+ show: bool = False,
106
+ save_path: Optional[str] = None,
107
+ style: Optional[PlotStyle] = None,
108
+ ) -> plt.Figure:
109
+ series = shap_importance(shap_values, feature_names)
110
+ return plot_feature_importance(
111
+ series,
112
+ top_n=top_n,
113
+ title=title,
114
+ sort_by="abs",
115
+ descending=True,
116
+ show_values=show_values,
117
+ ax=ax,
118
+ show=show,
119
+ save_path=save_path,
120
+ style=style,
121
+ )
@@ -0,0 +1,133 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import sys
6
+ import tempfile
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Optional, TextIO
10
+
11
+ _LOGGING_CONFIGURED = False
12
+ _LOG_PATH: Optional[Path] = None
13
+ _LOG_FILE: Optional[TextIO] = None
14
+
15
+ _TRUTHY = {"1", "true", "yes", "y", "on"}
16
+
17
+
18
+ class _TeeStream:
19
+ def __init__(self, primary: TextIO, secondary: TextIO) -> None:
20
+ self._primary = primary
21
+ self._secondary = secondary
22
+
23
+ def write(self, data: str) -> int:
24
+ if not data:
25
+ return 0
26
+ try:
27
+ self._primary.write(data)
28
+ except Exception:
29
+ pass
30
+ try:
31
+ self._secondary.write(data)
32
+ except Exception:
33
+ pass
34
+ return len(data)
35
+
36
+ def flush(self) -> None:
37
+ for stream in (self._primary, self._secondary):
38
+ try:
39
+ stream.flush()
40
+ except Exception:
41
+ pass
42
+
43
+ def isatty(self) -> bool:
44
+ return bool(getattr(self._primary, "isatty", lambda: False)())
45
+
46
+ def fileno(self) -> int:
47
+ return self._primary.fileno()
48
+
49
+ def __getattr__(self, name: str):
50
+ return getattr(self._primary, name)
51
+
52
+
53
+ def _is_truthy(value: Optional[str]) -> bool:
54
+ return str(value).strip().lower() in _TRUTHY
55
+
56
+
57
+ def _resolve_log_dir(log_dir: Optional[str | Path]) -> Optional[Path]:
58
+ candidates: list[Path] = []
59
+ if log_dir:
60
+ candidates.append(Path(log_dir).expanduser())
61
+ env_dir = os.environ.get("INS_PRICING_LOG_DIR")
62
+ if env_dir:
63
+ candidates.append(Path(env_dir).expanduser())
64
+ candidates.append(Path.cwd() / "logs")
65
+ candidates.append(Path.home() / ".ins_pricing" / "logs")
66
+ candidates.append(Path(tempfile.gettempdir()) / "ins_pricing_logs")
67
+
68
+ for cand in candidates:
69
+ try:
70
+ cand.mkdir(parents=True, exist_ok=True)
71
+ return cand
72
+ except Exception:
73
+ continue
74
+ return None
75
+
76
+
77
+ def _build_log_filename(prefix: str) -> str:
78
+ ts = datetime.now().strftime("%Y%m%d_%H%M%S")
79
+ rank = (
80
+ os.environ.get("RANK")
81
+ or os.environ.get("LOCAL_RANK")
82
+ or os.environ.get("SLURM_PROCID")
83
+ )
84
+ suffix = f"r{rank}" if rank is not None else f"pid{os.getpid()}"
85
+ safe_prefix = "".join(
86
+ ch if ch.isalnum() or ch in "-_." else "_" for ch in prefix)
87
+ return f"{safe_prefix}_{ts}_{suffix}.log"
88
+
89
+
90
+ def configure_run_logging(
91
+ *,
92
+ prefix: str = "ins_pricing",
93
+ log_dir: Optional[str | Path] = None,
94
+ level: int = logging.INFO,
95
+ announce: bool = True,
96
+ ) -> Optional[Path]:
97
+ global _LOGGING_CONFIGURED, _LOG_PATH, _LOG_FILE
98
+
99
+ if _LOGGING_CONFIGURED:
100
+ return _LOG_PATH
101
+ if _is_truthy(os.environ.get("INS_PRICING_LOG_DISABLE")):
102
+ return None
103
+
104
+ resolved_dir = _resolve_log_dir(log_dir)
105
+ if resolved_dir is None:
106
+ return None
107
+
108
+ log_path = resolved_dir / _build_log_filename(prefix)
109
+ try:
110
+ log_file = log_path.open("a", encoding="utf-8")
111
+ except Exception:
112
+ return None
113
+
114
+ sys.stdout = _TeeStream(sys.stdout, log_file) # type: ignore[assignment]
115
+ sys.stderr = _TeeStream(sys.stderr, log_file) # type: ignore[assignment]
116
+ _LOG_FILE = log_file
117
+ _LOG_PATH = log_path
118
+ _LOGGING_CONFIGURED = True
119
+
120
+ root = logging.getLogger()
121
+ if not root.handlers:
122
+ logging.basicConfig(
123
+ level=level,
124
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
125
+ handlers=[logging.StreamHandler(sys.stdout)],
126
+ )
127
+ else:
128
+ root.setLevel(level)
129
+
130
+ if announce:
131
+ print(f"[ins_pricing] log saved to {log_path}", flush=True)
132
+
133
+ return log_path
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ ROOT = Path(__file__).resolve().parents[1]
7
+ if str(ROOT) not in sys.path:
8
+ sys.path.insert(0, str(ROOT))
@@ -0,0 +1,66 @@
1
+ import types
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ import pytest
6
+
7
+ pytest.importorskip("torch")
8
+ pytest.importorskip("optuna")
9
+ pytest.importorskip("statsmodels")
10
+ pytest.importorskip("xgboost")
11
+
12
+ from ins_pricing.bayesopt.trainers import TrainerBase
13
+
14
+
15
+ class DummyTrainer(TrainerBase):
16
+ def __init__(self):
17
+ ctx = types.SimpleNamespace(prop_test=0.2, rand_seed=123)
18
+ super().__init__(ctx, "Dummy", "Dummy")
19
+
20
+ def train(self) -> None: # pragma: no cover - not used
21
+ raise NotImplementedError
22
+
23
+
24
+ def test_cross_val_generic_iterates_all_splits():
25
+ trainer = DummyTrainer()
26
+
27
+ X = pd.DataFrame({"x": np.arange(12, dtype=float)})
28
+ y = pd.Series(np.arange(12, dtype=float))
29
+ w = pd.Series(np.ones(12, dtype=float))
30
+
31
+ def data_provider():
32
+ return X, y, w
33
+
34
+ class DummyModel:
35
+ def fit(self, X_train, y_train, sample_weight=None):
36
+ return self
37
+
38
+ def predict(self, X_val):
39
+ return np.zeros(len(X_val))
40
+
41
+ def model_builder(_params):
42
+ return DummyModel()
43
+
44
+ calls = []
45
+
46
+ def metric_fn(y_true, y_pred, weight):
47
+ calls.append(len(y_true))
48
+ return float(np.mean(y_pred))
49
+
50
+ splits = [
51
+ (np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([10, 11])),
52
+ (np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), np.array([0, 1])),
53
+ (np.array([0, 2, 4, 6, 8, 10]), np.array([1, 3, 5, 7, 9, 11])),
54
+ ]
55
+
56
+ result = trainer.cross_val_generic(
57
+ trial=object(),
58
+ hyperparameter_space={"p": lambda _t: 1.0},
59
+ data_provider=data_provider,
60
+ model_builder=model_builder,
61
+ metric_fn=metric_fn,
62
+ splitter=splits,
63
+ )
64
+
65
+ assert result == 0.0
66
+ assert len(calls) == len(splits)
@@ -0,0 +1,18 @@
1
+ import pytest
2
+
3
+ pytest.importorskip("torch")
4
+
5
+ from ins_pricing.bayesopt.utils import DistributedUtils
6
+
7
+
8
+ def test_setup_ddp_without_env(monkeypatch):
9
+ monkeypatch.delenv("RANK", raising=False)
10
+ monkeypatch.delenv("WORLD_SIZE", raising=False)
11
+ monkeypatch.delenv("LOCAL_RANK", raising=False)
12
+
13
+ ok, local_rank, rank, world_size = DistributedUtils.setup_ddp()
14
+
15
+ assert ok is False
16
+ assert local_rank == 0
17
+ assert rank == 0
18
+ assert world_size == 1