ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ins_pricing/README.md +9 -6
  2. ins_pricing/__init__.py +3 -11
  3. ins_pricing/cli/BayesOpt_entry.py +24 -0
  4. ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
  5. ins_pricing/cli/Explain_Run.py +25 -0
  6. ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
  7. ins_pricing/cli/Pricing_Run.py +25 -0
  8. ins_pricing/cli/__init__.py +1 -0
  9. ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
  10. ins_pricing/cli/utils/__init__.py +1 -0
  11. ins_pricing/cli/utils/cli_common.py +320 -0
  12. ins_pricing/cli/utils/cli_config.py +375 -0
  13. ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
  14. {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
  15. ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
  16. ins_pricing/docs/modelling/README.md +34 -0
  17. ins_pricing/modelling/__init__.py +57 -6
  18. ins_pricing/modelling/core/__init__.py +1 -0
  19. ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
  20. ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
  21. ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
  22. ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
  23. ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
  24. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
  25. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
  26. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
  27. ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
  28. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
  29. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
  30. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
  31. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
  32. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
  33. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
  34. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
  35. ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
  36. ins_pricing/modelling/core/evaluation.py +115 -0
  37. ins_pricing/production/__init__.py +4 -0
  38. ins_pricing/production/preprocess.py +71 -0
  39. ins_pricing/setup.py +10 -5
  40. {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
  41. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
  42. ins_pricing-0.2.0.dist-info/RECORD +125 -0
  43. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
  44. ins_pricing/modelling/BayesOpt_entry.py +0 -633
  45. ins_pricing/modelling/Explain_Run.py +0 -36
  46. ins_pricing/modelling/Pricing_Run.py +0 -36
  47. ins_pricing/modelling/README.md +0 -33
  48. ins_pricing/modelling/bayesopt/models.py +0 -2196
  49. ins_pricing/modelling/bayesopt/trainers.py +0 -2446
  50. ins_pricing/modelling/cli_common.py +0 -136
  51. ins_pricing/modelling/tests/test_plotting.py +0 -63
  52. ins_pricing/modelling/watchdog_run.py +0 -211
  53. ins_pricing-0.1.11.dist-info/RECORD +0 -169
  54. ins_pricing_gemini/__init__.py +0 -23
  55. ins_pricing_gemini/governance/__init__.py +0 -20
  56. ins_pricing_gemini/governance/approval.py +0 -93
  57. ins_pricing_gemini/governance/audit.py +0 -37
  58. ins_pricing_gemini/governance/registry.py +0 -99
  59. ins_pricing_gemini/governance/release.py +0 -159
  60. ins_pricing_gemini/modelling/Explain_Run.py +0 -36
  61. ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
  62. ins_pricing_gemini/modelling/__init__.py +0 -151
  63. ins_pricing_gemini/modelling/cli_common.py +0 -141
  64. ins_pricing_gemini/modelling/config.py +0 -249
  65. ins_pricing_gemini/modelling/config_preprocess.py +0 -254
  66. ins_pricing_gemini/modelling/core.py +0 -741
  67. ins_pricing_gemini/modelling/data_container.py +0 -42
  68. ins_pricing_gemini/modelling/explain/__init__.py +0 -55
  69. ins_pricing_gemini/modelling/explain/gradients.py +0 -334
  70. ins_pricing_gemini/modelling/explain/metrics.py +0 -176
  71. ins_pricing_gemini/modelling/explain/permutation.py +0 -155
  72. ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
  73. ins_pricing_gemini/modelling/features.py +0 -215
  74. ins_pricing_gemini/modelling/model_manager.py +0 -148
  75. ins_pricing_gemini/modelling/model_plotting.py +0 -463
  76. ins_pricing_gemini/modelling/models.py +0 -2203
  77. ins_pricing_gemini/modelling/notebook_utils.py +0 -294
  78. ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
  79. ins_pricing_gemini/modelling/plotting/common.py +0 -63
  80. ins_pricing_gemini/modelling/plotting/curves.py +0 -572
  81. ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
  82. ins_pricing_gemini/modelling/plotting/geo.py +0 -362
  83. ins_pricing_gemini/modelling/plotting/importance.py +0 -121
  84. ins_pricing_gemini/modelling/run_logging.py +0 -133
  85. ins_pricing_gemini/modelling/tests/conftest.py +0 -8
  86. ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
  87. ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
  88. ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
  89. ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
  90. ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
  91. ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
  92. ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
  93. ins_pricing_gemini/modelling/trainers.py +0 -2447
  94. ins_pricing_gemini/modelling/utils.py +0 -1020
  95. ins_pricing_gemini/pricing/__init__.py +0 -27
  96. ins_pricing_gemini/pricing/calibration.py +0 -39
  97. ins_pricing_gemini/pricing/data_quality.py +0 -117
  98. ins_pricing_gemini/pricing/exposure.py +0 -85
  99. ins_pricing_gemini/pricing/factors.py +0 -91
  100. ins_pricing_gemini/pricing/monitoring.py +0 -99
  101. ins_pricing_gemini/pricing/rate_table.py +0 -78
  102. ins_pricing_gemini/production/__init__.py +0 -21
  103. ins_pricing_gemini/production/drift.py +0 -30
  104. ins_pricing_gemini/production/monitoring.py +0 -143
  105. ins_pricing_gemini/production/scoring.py +0 -40
  106. ins_pricing_gemini/reporting/__init__.py +0 -11
  107. ins_pricing_gemini/reporting/report_builder.py +0 -72
  108. ins_pricing_gemini/reporting/scheduler.py +0 -45
  109. ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
  110. ins_pricing_gemini/scripts/Explain_entry.py +0 -545
  111. ins_pricing_gemini/scripts/__init__.py +0 -1
  112. ins_pricing_gemini/scripts/train.py +0 -568
  113. ins_pricing_gemini/setup.py +0 -55
  114. ins_pricing_gemini/smoke_test.py +0 -28
  115. /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
  116. /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
  117. /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
  118. /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
  119. /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
  120. /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
  121. /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
  122. /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
  123. /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
  124. /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
  125. /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
  126. {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
@@ -1,362 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Optional, Sequence, Tuple
4
-
5
- import numpy as np
6
- import pandas as pd
7
- import matplotlib.tri as mtri
8
-
9
- from .common import EPS, PlotStyle, finalize_figure, plt
10
-
11
- try: # optional map basemap support
12
- import contextily as cx
13
- except Exception: # pragma: no cover - optional dependency
14
- cx = None
15
-
16
-
17
- _MERCATOR_MAX_LAT = 85.05112878
18
- _MERCATOR_FACTOR = 20037508.34
19
-
20
-
21
- def _require_contextily(func_name: str) -> None:
22
- if cx is None:
23
- raise RuntimeError(
24
- f"{func_name} requires contextily. Install it via 'pip install contextily'."
25
- )
26
-
27
-
28
- def _lonlat_to_mercator(lon: np.ndarray, lat: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
29
- lon = np.asarray(lon, dtype=float)
30
- lat = np.asarray(lat, dtype=float)
31
- lat = np.clip(lat, -_MERCATOR_MAX_LAT, _MERCATOR_MAX_LAT)
32
- x = lon * _MERCATOR_FACTOR / 180.0
33
- y = np.log(np.tan((90.0 + lat) * np.pi / 360.0)) * _MERCATOR_FACTOR / np.pi
34
- return x, y
35
-
36
-
37
- def _apply_bounds(ax: plt.Axes, x: np.ndarray, y: np.ndarray, padding: float) -> None:
38
- x_min, x_max = float(np.min(x)), float(np.max(x))
39
- y_min, y_max = float(np.min(y)), float(np.max(y))
40
- pad_x = (x_max - x_min) * padding
41
- pad_y = (y_max - y_min) * padding
42
- if pad_x == 0:
43
- pad_x = 1.0
44
- if pad_y == 0:
45
- pad_y = 1.0
46
- ax.set_xlim(x_min - pad_x, x_max + pad_x)
47
- ax.set_ylim(y_min - pad_y, y_max + pad_y)
48
-
49
-
50
- def _resolve_basemap(source):
51
- if cx is None or source is None:
52
- return source
53
- if isinstance(source, str):
54
- provider = cx.providers
55
- for part in source.split("."):
56
- if isinstance(provider, dict):
57
- provider = provider[part]
58
- else:
59
- provider = getattr(provider, part)
60
- return provider
61
- return source
62
-
63
-
64
- def _sanitize_geo(
65
- df: pd.DataFrame,
66
- x_col: str,
67
- y_col: str,
68
- value_col: str,
69
- weight_col: Optional[str] = None,
70
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
71
- x = pd.to_numeric(df[x_col], errors="coerce").to_numpy(dtype=float)
72
- y = pd.to_numeric(df[y_col], errors="coerce").to_numpy(dtype=float)
73
- z = pd.to_numeric(df[value_col], errors="coerce").to_numpy(dtype=float)
74
- w = None
75
- if weight_col:
76
- w = pd.to_numeric(df[weight_col], errors="coerce").to_numpy(dtype=float)
77
-
78
- if w is None:
79
- mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z)
80
- else:
81
- mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z) & np.isfinite(w)
82
- w = w[mask]
83
- return x[mask], y[mask], z[mask], w
84
-
85
-
86
- def _downsample_points(
87
- x: np.ndarray,
88
- y: np.ndarray,
89
- z: np.ndarray,
90
- w: Optional[np.ndarray],
91
- max_points: Optional[int],
92
- ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
93
- if max_points is None:
94
- return x, y, z, w
95
- max_points = int(max_points)
96
- if max_points <= 0 or len(x) <= max_points:
97
- return x, y, z, w
98
- rng = np.random.default_rng(13)
99
- idx = rng.choice(len(x), size=max_points, replace=False)
100
- if w is None:
101
- return x[idx], y[idx], z[idx], None
102
- return x[idx], y[idx], z[idx], w[idx]
103
-
104
-
105
- def plot_geo_heatmap(
106
- df: pd.DataFrame,
107
- *,
108
- x_col: str,
109
- y_col: str,
110
- value_col: str,
111
- weight_col: Optional[str] = None,
112
- bins: int | Tuple[int, int] = 50,
113
- agg: str = "mean",
114
- cmap: str = "YlOrRd",
115
- title: str = "Geo Heatmap",
116
- ax: Optional[plt.Axes] = None,
117
- show: bool = False,
118
- save_path: Optional[str] = None,
119
- style: Optional[PlotStyle] = None,
120
- ) -> plt.Figure:
121
- style = style or PlotStyle()
122
- if agg not in {"mean", "sum"}:
123
- raise ValueError("agg must be 'mean' or 'sum'.")
124
- x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
125
-
126
- if isinstance(bins, int):
127
- bins = (bins, bins)
128
-
129
- if w is None:
130
- sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
131
- if agg == "sum":
132
- grid = sum_z
133
- else:
134
- count, _, _ = np.histogram2d(x, y, bins=bins)
135
- grid = sum_z / np.maximum(count, 1.0)
136
- else:
137
- sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
138
- sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
139
- grid = sum_zw / np.maximum(sum_w, EPS)
140
-
141
- created_fig = ax is None
142
- if created_fig:
143
- fig, ax = plt.subplots(figsize=style.figsize)
144
- else:
145
- fig = ax.figure
146
-
147
- im = ax.imshow(
148
- grid.T,
149
- origin="lower",
150
- extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
151
- aspect="auto",
152
- cmap=cmap,
153
- )
154
- cbar = fig.colorbar(im, ax=ax)
155
- cbar.set_label(value_col, fontsize=style.label_size)
156
- cbar.ax.tick_params(labelsize=style.tick_size)
157
-
158
- ax.set_xlabel(x_col, fontsize=style.label_size)
159
- ax.set_ylabel(y_col, fontsize=style.label_size)
160
- ax.set_title(title, fontsize=style.title_size)
161
- ax.tick_params(axis="both", labelsize=style.tick_size)
162
-
163
- if created_fig:
164
- finalize_figure(fig, save_path=save_path, show=show, style=style)
165
-
166
- return fig
167
-
168
-
169
- def plot_geo_contour(
170
- df: pd.DataFrame,
171
- *,
172
- x_col: str,
173
- y_col: str,
174
- value_col: str,
175
- weight_col: Optional[str] = None,
176
- max_points: Optional[int] = None,
177
- levels: int | Sequence[float] = 10,
178
- cmap: str = "viridis",
179
- title: str = "Geo Contour",
180
- ax: Optional[plt.Axes] = None,
181
- show_points: bool = False,
182
- show: bool = False,
183
- save_path: Optional[str] = None,
184
- style: Optional[PlotStyle] = None,
185
- ) -> plt.Figure:
186
- style = style or PlotStyle()
187
- x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
188
- x, y, z, w = _downsample_points(x, y, z, w, max_points)
189
-
190
- if w is not None:
191
- z = z * w
192
-
193
- triang = mtri.Triangulation(x, y)
194
-
195
- created_fig = ax is None
196
- if created_fig:
197
- fig, ax = plt.subplots(figsize=style.figsize)
198
- else:
199
- fig = ax.figure
200
-
201
- contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap)
202
- if show_points:
203
- ax.scatter(x, y, s=6, c="k", alpha=0.2)
204
- cbar = fig.colorbar(contour, ax=ax)
205
- cbar.set_label(value_col, fontsize=style.label_size)
206
- cbar.ax.tick_params(labelsize=style.tick_size)
207
-
208
- ax.set_xlabel(x_col, fontsize=style.label_size)
209
- ax.set_ylabel(y_col, fontsize=style.label_size)
210
- ax.set_title(title, fontsize=style.title_size)
211
- ax.tick_params(axis="both", labelsize=style.tick_size)
212
-
213
- if created_fig:
214
- finalize_figure(fig, save_path=save_path, show=show, style=style)
215
-
216
- return fig
217
-
218
-
219
- def plot_geo_heatmap_on_map(
220
- df: pd.DataFrame,
221
- *,
222
- lon_col: str,
223
- lat_col: str,
224
- value_col: str,
225
- weight_col: Optional[str] = None,
226
- bins: int | Tuple[int, int] = 100,
227
- agg: str = "mean",
228
- cmap: str = "YlOrRd",
229
- alpha: float = 0.6,
230
- basemap: Optional[object] = "CartoDB.Positron",
231
- zoom: Optional[int] = None,
232
- padding: float = 0.05,
233
- title: str = "Geo Heatmap (Map)",
234
- ax: Optional[plt.Axes] = None,
235
- show_points: bool = False,
236
- show: bool = False,
237
- save_path: Optional[str] = None,
238
- style: Optional[PlotStyle] = None,
239
- ) -> plt.Figure:
240
- _require_contextily("plot_geo_heatmap_on_map")
241
- style = style or PlotStyle()
242
- if agg not in {"mean", "sum"}:
243
- raise ValueError("agg must be 'mean' or 'sum'.")
244
- lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
245
- x, y = _lonlat_to_mercator(lon, lat)
246
-
247
- if isinstance(bins, int):
248
- bins = (bins, bins)
249
-
250
- if w is None:
251
- sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
252
- if agg == "sum":
253
- grid = sum_z
254
- else:
255
- count, _, _ = np.histogram2d(x, y, bins=bins)
256
- grid = sum_z / np.maximum(count, 1.0)
257
- else:
258
- sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
259
- sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
260
- grid = sum_zw / np.maximum(sum_w, EPS)
261
-
262
- created_fig = ax is None
263
- if created_fig:
264
- fig, ax = plt.subplots(figsize=style.figsize)
265
- else:
266
- fig = ax.figure
267
-
268
- _apply_bounds(ax, x, y, padding)
269
- ax.set_aspect("equal", adjustable="box")
270
-
271
- source = _resolve_basemap(basemap)
272
- if source is not None:
273
- if zoom is None:
274
- cx.add_basemap(ax, source=source, crs="EPSG:3857")
275
- else:
276
- cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
277
-
278
- im = ax.imshow(
279
- grid.T,
280
- origin="lower",
281
- extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
282
- aspect="auto",
283
- cmap=cmap,
284
- alpha=alpha,
285
- )
286
- if show_points:
287
- ax.scatter(x, y, s=6, c="k", alpha=0.25)
288
-
289
- cbar = fig.colorbar(im, ax=ax)
290
- cbar.set_label(value_col, fontsize=style.label_size)
291
- cbar.ax.tick_params(labelsize=style.tick_size)
292
-
293
- ax.set_title(title, fontsize=style.title_size)
294
- ax.tick_params(axis="both", labelsize=style.tick_size)
295
-
296
- if created_fig:
297
- finalize_figure(fig, save_path=save_path, show=show, style=style)
298
-
299
- return fig
300
-
301
-
302
- def plot_geo_contour_on_map(
303
- df: pd.DataFrame,
304
- *,
305
- lon_col: str,
306
- lat_col: str,
307
- value_col: str,
308
- weight_col: Optional[str] = None,
309
- max_points: Optional[int] = None,
310
- levels: int | Sequence[float] = 10,
311
- cmap: str = "viridis",
312
- alpha: float = 0.6,
313
- basemap: Optional[object] = "CartoDB.Positron",
314
- zoom: Optional[int] = None,
315
- padding: float = 0.05,
316
- title: str = "Geo Contour (Map)",
317
- ax: Optional[plt.Axes] = None,
318
- show_points: bool = False,
319
- show: bool = False,
320
- save_path: Optional[str] = None,
321
- style: Optional[PlotStyle] = None,
322
- ) -> plt.Figure:
323
- _require_contextily("plot_geo_contour_on_map")
324
- style = style or PlotStyle()
325
- lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
326
- lon, lat, z, w = _downsample_points(lon, lat, z, w, max_points)
327
- x, y = _lonlat_to_mercator(lon, lat)
328
- if w is not None:
329
- z = z * w
330
-
331
- created_fig = ax is None
332
- if created_fig:
333
- fig, ax = plt.subplots(figsize=style.figsize)
334
- else:
335
- fig = ax.figure
336
-
337
- _apply_bounds(ax, x, y, padding)
338
- ax.set_aspect("equal", adjustable="box")
339
-
340
- source = _resolve_basemap(basemap)
341
- if source is not None:
342
- if zoom is None:
343
- cx.add_basemap(ax, source=source, crs="EPSG:3857")
344
- else:
345
- cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
346
-
347
- triang = mtri.Triangulation(x, y)
348
- contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap, alpha=alpha)
349
- if show_points:
350
- ax.scatter(x, y, s=6, c="k", alpha=0.25)
351
-
352
- cbar = fig.colorbar(contour, ax=ax)
353
- cbar.set_label(value_col, fontsize=style.label_size)
354
- cbar.ax.tick_params(labelsize=style.tick_size)
355
-
356
- ax.set_title(title, fontsize=style.title_size)
357
- ax.tick_params(axis="both", labelsize=style.tick_size)
358
-
359
- if created_fig:
360
- finalize_figure(fig, save_path=save_path, show=show, style=style)
361
-
362
- return fig
@@ -1,121 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Mapping, Optional, Sequence, Tuple
4
-
5
- import numpy as np
6
- import pandas as pd
7
-
8
- from .common import PlotStyle, finalize_figure, plt
9
-
10
-
11
- def _to_series(
12
- importance: Mapping[str, float]
13
- | Sequence[Tuple[str, float]]
14
- | pd.Series
15
- | np.ndarray,
16
- feature_names: Optional[Sequence[str]] = None,
17
- ) -> pd.Series:
18
- if isinstance(importance, pd.Series):
19
- return importance.copy()
20
- if isinstance(importance, Mapping):
21
- return pd.Series(dict(importance))
22
- if isinstance(importance, np.ndarray):
23
- if feature_names is None:
24
- raise ValueError("feature_names is required when importance is an array.")
25
- return pd.Series(importance, index=list(feature_names))
26
- return pd.Series(dict(importance))
27
-
28
-
29
- def shap_importance(
30
- shap_values: np.ndarray,
31
- feature_names: Sequence[str],
32
- ) -> pd.Series:
33
- if shap_values.ndim == 3:
34
- shap_values = shap_values[0]
35
- if shap_values.ndim != 2:
36
- raise ValueError("shap_values should be 2d (n_samples, n_features).")
37
- scores = np.abs(shap_values).mean(axis=0)
38
- return pd.Series(scores, index=list(feature_names))
39
-
40
-
41
- def plot_feature_importance(
42
- importance: Mapping[str, float]
43
- | Sequence[Tuple[str, float]]
44
- | pd.Series
45
- | np.ndarray,
46
- *,
47
- feature_names: Optional[Sequence[str]] = None,
48
- top_n: int = 30,
49
- title: str = "Feature Importance",
50
- sort_by: str = "abs",
51
- descending: bool = True,
52
- show_values: bool = False,
53
- ax: Optional[plt.Axes] = None,
54
- show: bool = False,
55
- save_path: Optional[str] = None,
56
- style: Optional[PlotStyle] = None,
57
- ) -> plt.Figure:
58
- style = style or PlotStyle()
59
- series = _to_series(importance, feature_names=feature_names)
60
- series = series.replace([np.inf, -np.inf], np.nan).dropna()
61
-
62
- if sort_by not in {"abs", "value"}:
63
- raise ValueError("sort_by must be 'abs' or 'value'.")
64
- sort_key = series.abs() if sort_by == "abs" else series
65
- series = series.loc[sort_key.sort_values(ascending=not descending).index]
66
-
67
- if top_n > 0:
68
- series = series.head(int(top_n))
69
-
70
- created_fig = ax is None
71
- if created_fig:
72
- height = max(3.0, 0.3 * len(series))
73
- fig, ax = plt.subplots(figsize=(style.figsize[0], height))
74
- else:
75
- fig = ax.figure
76
-
77
- y_pos = np.arange(len(series))
78
- ax.barh(y_pos, series.values, color=style.palette[0])
79
- ax.set_yticks(y_pos)
80
- ax.set_yticklabels(series.index, fontsize=style.tick_size)
81
- ax.invert_yaxis()
82
- ax.set_title(title, fontsize=style.title_size)
83
- ax.tick_params(axis="x", labelsize=style.tick_size)
84
- if style.grid:
85
- ax.grid(True, axis="x", linestyle=style.grid_style, alpha=style.grid_alpha)
86
-
87
- if show_values:
88
- for idx, val in enumerate(series.values):
89
- ax.text(val, idx, f" {val:.3f}", va="center", fontsize=style.tick_size)
90
-
91
- if created_fig:
92
- finalize_figure(fig, save_path=save_path, show=show, style=style)
93
-
94
- return fig
95
-
96
-
97
- def plot_shap_importance(
98
- shap_values: np.ndarray,
99
- feature_names: Sequence[str],
100
- *,
101
- top_n: int = 30,
102
- title: str = "SHAP Importance",
103
- show_values: bool = False,
104
- ax: Optional[plt.Axes] = None,
105
- show: bool = False,
106
- save_path: Optional[str] = None,
107
- style: Optional[PlotStyle] = None,
108
- ) -> plt.Figure:
109
- series = shap_importance(shap_values, feature_names)
110
- return plot_feature_importance(
111
- series,
112
- top_n=top_n,
113
- title=title,
114
- sort_by="abs",
115
- descending=True,
116
- show_values=show_values,
117
- ax=ax,
118
- show=show,
119
- save_path=save_path,
120
- style=style,
121
- )
@@ -1,133 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- import os
5
- import sys
6
- import tempfile
7
- from datetime import datetime
8
- from pathlib import Path
9
- from typing import Optional, TextIO
10
-
11
- _LOGGING_CONFIGURED = False
12
- _LOG_PATH: Optional[Path] = None
13
- _LOG_FILE: Optional[TextIO] = None
14
-
15
- _TRUTHY = {"1", "true", "yes", "y", "on"}
16
-
17
-
18
- class _TeeStream:
19
- def __init__(self, primary: TextIO, secondary: TextIO) -> None:
20
- self._primary = primary
21
- self._secondary = secondary
22
-
23
- def write(self, data: str) -> int:
24
- if not data:
25
- return 0
26
- try:
27
- self._primary.write(data)
28
- except Exception:
29
- pass
30
- try:
31
- self._secondary.write(data)
32
- except Exception:
33
- pass
34
- return len(data)
35
-
36
- def flush(self) -> None:
37
- for stream in (self._primary, self._secondary):
38
- try:
39
- stream.flush()
40
- except Exception:
41
- pass
42
-
43
- def isatty(self) -> bool:
44
- return bool(getattr(self._primary, "isatty", lambda: False)())
45
-
46
- def fileno(self) -> int:
47
- return self._primary.fileno()
48
-
49
- def __getattr__(self, name: str):
50
- return getattr(self._primary, name)
51
-
52
-
53
- def _is_truthy(value: Optional[str]) -> bool:
54
- return str(value).strip().lower() in _TRUTHY
55
-
56
-
57
- def _resolve_log_dir(log_dir: Optional[str | Path]) -> Optional[Path]:
58
- candidates: list[Path] = []
59
- if log_dir:
60
- candidates.append(Path(log_dir).expanduser())
61
- env_dir = os.environ.get("INS_PRICING_LOG_DIR")
62
- if env_dir:
63
- candidates.append(Path(env_dir).expanduser())
64
- candidates.append(Path.cwd() / "logs")
65
- candidates.append(Path.home() / ".ins_pricing" / "logs")
66
- candidates.append(Path(tempfile.gettempdir()) / "ins_pricing_logs")
67
-
68
- for cand in candidates:
69
- try:
70
- cand.mkdir(parents=True, exist_ok=True)
71
- return cand
72
- except Exception:
73
- continue
74
- return None
75
-
76
-
77
- def _build_log_filename(prefix: str) -> str:
78
- ts = datetime.now().strftime("%Y%m%d_%H%M%S")
79
- rank = (
80
- os.environ.get("RANK")
81
- or os.environ.get("LOCAL_RANK")
82
- or os.environ.get("SLURM_PROCID")
83
- )
84
- suffix = f"r{rank}" if rank is not None else f"pid{os.getpid()}"
85
- safe_prefix = "".join(
86
- ch if ch.isalnum() or ch in "-_." else "_" for ch in prefix)
87
- return f"{safe_prefix}_{ts}_{suffix}.log"
88
-
89
-
90
- def configure_run_logging(
91
- *,
92
- prefix: str = "ins_pricing",
93
- log_dir: Optional[str | Path] = None,
94
- level: int = logging.INFO,
95
- announce: bool = True,
96
- ) -> Optional[Path]:
97
- global _LOGGING_CONFIGURED, _LOG_PATH, _LOG_FILE
98
-
99
- if _LOGGING_CONFIGURED:
100
- return _LOG_PATH
101
- if _is_truthy(os.environ.get("INS_PRICING_LOG_DISABLE")):
102
- return None
103
-
104
- resolved_dir = _resolve_log_dir(log_dir)
105
- if resolved_dir is None:
106
- return None
107
-
108
- log_path = resolved_dir / _build_log_filename(prefix)
109
- try:
110
- log_file = log_path.open("a", encoding="utf-8")
111
- except Exception:
112
- return None
113
-
114
- sys.stdout = _TeeStream(sys.stdout, log_file) # type: ignore[assignment]
115
- sys.stderr = _TeeStream(sys.stderr, log_file) # type: ignore[assignment]
116
- _LOG_FILE = log_file
117
- _LOG_PATH = log_path
118
- _LOGGING_CONFIGURED = True
119
-
120
- root = logging.getLogger()
121
- if not root.handlers:
122
- logging.basicConfig(
123
- level=level,
124
- format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
125
- handlers=[logging.StreamHandler(sys.stdout)],
126
- )
127
- else:
128
- root.setLevel(level)
129
-
130
- if announce:
131
- print(f"[ins_pricing] log saved to {log_path}", flush=True)
132
-
133
- return log_path
@@ -1,8 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import sys
4
- from pathlib import Path
5
-
6
- ROOT = Path(__file__).resolve().parents[1]
7
- if str(ROOT) not in sys.path:
8
- sys.path.insert(0, str(ROOT))
@@ -1,66 +0,0 @@
1
- import types
2
-
3
- import numpy as np
4
- import pandas as pd
5
- import pytest
6
-
7
- pytest.importorskip("torch")
8
- pytest.importorskip("optuna")
9
- pytest.importorskip("statsmodels")
10
- pytest.importorskip("xgboost")
11
-
12
- from ins_pricing.bayesopt.trainers import TrainerBase
13
-
14
-
15
- class DummyTrainer(TrainerBase):
16
- def __init__(self):
17
- ctx = types.SimpleNamespace(prop_test=0.2, rand_seed=123)
18
- super().__init__(ctx, "Dummy", "Dummy")
19
-
20
- def train(self) -> None: # pragma: no cover - not used
21
- raise NotImplementedError
22
-
23
-
24
- def test_cross_val_generic_iterates_all_splits():
25
- trainer = DummyTrainer()
26
-
27
- X = pd.DataFrame({"x": np.arange(12, dtype=float)})
28
- y = pd.Series(np.arange(12, dtype=float))
29
- w = pd.Series(np.ones(12, dtype=float))
30
-
31
- def data_provider():
32
- return X, y, w
33
-
34
- class DummyModel:
35
- def fit(self, X_train, y_train, sample_weight=None):
36
- return self
37
-
38
- def predict(self, X_val):
39
- return np.zeros(len(X_val))
40
-
41
- def model_builder(_params):
42
- return DummyModel()
43
-
44
- calls = []
45
-
46
- def metric_fn(y_true, y_pred, weight):
47
- calls.append(len(y_true))
48
- return float(np.mean(y_pred))
49
-
50
- splits = [
51
- (np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([10, 11])),
52
- (np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), np.array([0, 1])),
53
- (np.array([0, 2, 4, 6, 8, 10]), np.array([1, 3, 5, 7, 9, 11])),
54
- ]
55
-
56
- result = trainer.cross_val_generic(
57
- trial=object(),
58
- hyperparameter_space={"p": lambda _t: 1.0},
59
- data_provider=data_provider,
60
- model_builder=model_builder,
61
- metric_fn=metric_fn,
62
- splitter=splits,
63
- )
64
-
65
- assert result == 0.0
66
- assert len(calls) == len(splits)
@@ -1,18 +0,0 @@
1
- import pytest
2
-
3
- pytest.importorskip("torch")
4
-
5
- from ins_pricing.bayesopt.utils import DistributedUtils
6
-
7
-
8
- def test_setup_ddp_without_env(monkeypatch):
9
- monkeypatch.delenv("RANK", raising=False)
10
- monkeypatch.delenv("WORLD_SIZE", raising=False)
11
- monkeypatch.delenv("LOCAL_RANK", raising=False)
12
-
13
- ok, local_rank, rank, world_size = DistributedUtils.setup_ddp()
14
-
15
- assert ok is False
16
- assert local_rank == 0
17
- assert rank == 0
18
- assert world_size == 1