ins-pricing 0.1.11__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +9 -6
- ins_pricing/__init__.py +3 -11
- ins_pricing/cli/BayesOpt_entry.py +24 -0
- ins_pricing/{modelling → cli}/BayesOpt_incremental.py +197 -64
- ins_pricing/cli/Explain_Run.py +25 -0
- ins_pricing/{modelling → cli}/Explain_entry.py +169 -124
- ins_pricing/cli/Pricing_Run.py +25 -0
- ins_pricing/cli/__init__.py +1 -0
- ins_pricing/cli/bayesopt_entry_runner.py +1312 -0
- ins_pricing/cli/utils/__init__.py +1 -0
- ins_pricing/cli/utils/cli_common.py +320 -0
- ins_pricing/cli/utils/cli_config.py +375 -0
- ins_pricing/{modelling → cli/utils}/notebook_utils.py +74 -19
- {ins_pricing_gemini/modelling → ins_pricing/cli}/watchdog_run.py +2 -2
- ins_pricing/{modelling → docs/modelling}/BayesOpt_USAGE.md +69 -49
- ins_pricing/docs/modelling/README.md +34 -0
- ins_pricing/modelling/__init__.py +57 -6
- ins_pricing/modelling/core/__init__.py +1 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/config_preprocess.py +64 -1
- ins_pricing/modelling/{bayesopt → core/bayesopt}/core.py +150 -810
- ins_pricing/modelling/core/bayesopt/model_explain_mixin.py +296 -0
- ins_pricing/modelling/core/bayesopt/model_plotting_mixin.py +548 -0
- ins_pricing/modelling/core/bayesopt/models/__init__.py +27 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +316 -0
- ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +808 -0
- ins_pricing/modelling/core/bayesopt/models/model_gnn.py +675 -0
- ins_pricing/modelling/core/bayesopt/models/model_resn.py +435 -0
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +1020 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +787 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +195 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +312 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +261 -0
- ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +348 -0
- ins_pricing/modelling/{bayesopt → core/bayesopt}/utils.py +2 -2
- ins_pricing/modelling/core/evaluation.py +115 -0
- ins_pricing/production/__init__.py +4 -0
- ins_pricing/production/preprocess.py +71 -0
- ins_pricing/setup.py +10 -5
- {ins_pricing_gemini/modelling/tests → ins_pricing/tests/modelling}/test_plotting.py +2 -2
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/METADATA +4 -4
- ins_pricing-0.2.0.dist-info/RECORD +125 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/top_level.txt +0 -1
- ins_pricing/modelling/BayesOpt_entry.py +0 -633
- ins_pricing/modelling/Explain_Run.py +0 -36
- ins_pricing/modelling/Pricing_Run.py +0 -36
- ins_pricing/modelling/README.md +0 -33
- ins_pricing/modelling/bayesopt/models.py +0 -2196
- ins_pricing/modelling/bayesopt/trainers.py +0 -2446
- ins_pricing/modelling/cli_common.py +0 -136
- ins_pricing/modelling/tests/test_plotting.py +0 -63
- ins_pricing/modelling/watchdog_run.py +0 -211
- ins_pricing-0.1.11.dist-info/RECORD +0 -169
- ins_pricing_gemini/__init__.py +0 -23
- ins_pricing_gemini/governance/__init__.py +0 -20
- ins_pricing_gemini/governance/approval.py +0 -93
- ins_pricing_gemini/governance/audit.py +0 -37
- ins_pricing_gemini/governance/registry.py +0 -99
- ins_pricing_gemini/governance/release.py +0 -159
- ins_pricing_gemini/modelling/Explain_Run.py +0 -36
- ins_pricing_gemini/modelling/Pricing_Run.py +0 -36
- ins_pricing_gemini/modelling/__init__.py +0 -151
- ins_pricing_gemini/modelling/cli_common.py +0 -141
- ins_pricing_gemini/modelling/config.py +0 -249
- ins_pricing_gemini/modelling/config_preprocess.py +0 -254
- ins_pricing_gemini/modelling/core.py +0 -741
- ins_pricing_gemini/modelling/data_container.py +0 -42
- ins_pricing_gemini/modelling/explain/__init__.py +0 -55
- ins_pricing_gemini/modelling/explain/gradients.py +0 -334
- ins_pricing_gemini/modelling/explain/metrics.py +0 -176
- ins_pricing_gemini/modelling/explain/permutation.py +0 -155
- ins_pricing_gemini/modelling/explain/shap_utils.py +0 -146
- ins_pricing_gemini/modelling/features.py +0 -215
- ins_pricing_gemini/modelling/model_manager.py +0 -148
- ins_pricing_gemini/modelling/model_plotting.py +0 -463
- ins_pricing_gemini/modelling/models.py +0 -2203
- ins_pricing_gemini/modelling/notebook_utils.py +0 -294
- ins_pricing_gemini/modelling/plotting/__init__.py +0 -45
- ins_pricing_gemini/modelling/plotting/common.py +0 -63
- ins_pricing_gemini/modelling/plotting/curves.py +0 -572
- ins_pricing_gemini/modelling/plotting/diagnostics.py +0 -139
- ins_pricing_gemini/modelling/plotting/geo.py +0 -362
- ins_pricing_gemini/modelling/plotting/importance.py +0 -121
- ins_pricing_gemini/modelling/run_logging.py +0 -133
- ins_pricing_gemini/modelling/tests/conftest.py +0 -8
- ins_pricing_gemini/modelling/tests/test_cross_val_generic.py +0 -66
- ins_pricing_gemini/modelling/tests/test_distributed_utils.py +0 -18
- ins_pricing_gemini/modelling/tests/test_explain.py +0 -56
- ins_pricing_gemini/modelling/tests/test_geo_tokens_split.py +0 -49
- ins_pricing_gemini/modelling/tests/test_graph_cache.py +0 -33
- ins_pricing_gemini/modelling/tests/test_plotting_library.py +0 -150
- ins_pricing_gemini/modelling/tests/test_preprocessor.py +0 -48
- ins_pricing_gemini/modelling/trainers.py +0 -2447
- ins_pricing_gemini/modelling/utils.py +0 -1020
- ins_pricing_gemini/pricing/__init__.py +0 -27
- ins_pricing_gemini/pricing/calibration.py +0 -39
- ins_pricing_gemini/pricing/data_quality.py +0 -117
- ins_pricing_gemini/pricing/exposure.py +0 -85
- ins_pricing_gemini/pricing/factors.py +0 -91
- ins_pricing_gemini/pricing/monitoring.py +0 -99
- ins_pricing_gemini/pricing/rate_table.py +0 -78
- ins_pricing_gemini/production/__init__.py +0 -21
- ins_pricing_gemini/production/drift.py +0 -30
- ins_pricing_gemini/production/monitoring.py +0 -143
- ins_pricing_gemini/production/scoring.py +0 -40
- ins_pricing_gemini/reporting/__init__.py +0 -11
- ins_pricing_gemini/reporting/report_builder.py +0 -72
- ins_pricing_gemini/reporting/scheduler.py +0 -45
- ins_pricing_gemini/scripts/BayesOpt_incremental.py +0 -722
- ins_pricing_gemini/scripts/Explain_entry.py +0 -545
- ins_pricing_gemini/scripts/__init__.py +0 -1
- ins_pricing_gemini/scripts/train.py +0 -568
- ins_pricing_gemini/setup.py +0 -55
- ins_pricing_gemini/smoke_test.py +0 -28
- /ins_pricing/{modelling → cli/utils}/run_logging.py +0 -0
- /ins_pricing/modelling/{BayesOpt.py → core/BayesOpt.py} +0 -0
- /ins_pricing/modelling/{bayesopt → core/bayesopt}/__init__.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/conftest.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_cross_val_generic.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_distributed_utils.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_explain.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_geo_tokens_split.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_graph_cache.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_plotting_library.py +0 -0
- /ins_pricing/{modelling/tests → tests/modelling}/test_preprocessor.py +0 -0
- {ins_pricing-0.1.11.dist-info → ins_pricing-0.2.0.dist-info}/WHEEL +0 -0
|
@@ -1,362 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import Optional, Sequence, Tuple
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
7
|
-
import matplotlib.tri as mtri
|
|
8
|
-
|
|
9
|
-
from .common import EPS, PlotStyle, finalize_figure, plt
|
|
10
|
-
|
|
11
|
-
try: # optional map basemap support
|
|
12
|
-
import contextily as cx
|
|
13
|
-
except Exception: # pragma: no cover - optional dependency
|
|
14
|
-
cx = None
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
_MERCATOR_MAX_LAT = 85.05112878
|
|
18
|
-
_MERCATOR_FACTOR = 20037508.34
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def _require_contextily(func_name: str) -> None:
|
|
22
|
-
if cx is None:
|
|
23
|
-
raise RuntimeError(
|
|
24
|
-
f"{func_name} requires contextily. Install it via 'pip install contextily'."
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def _lonlat_to_mercator(lon: np.ndarray, lat: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
29
|
-
lon = np.asarray(lon, dtype=float)
|
|
30
|
-
lat = np.asarray(lat, dtype=float)
|
|
31
|
-
lat = np.clip(lat, -_MERCATOR_MAX_LAT, _MERCATOR_MAX_LAT)
|
|
32
|
-
x = lon * _MERCATOR_FACTOR / 180.0
|
|
33
|
-
y = np.log(np.tan((90.0 + lat) * np.pi / 360.0)) * _MERCATOR_FACTOR / np.pi
|
|
34
|
-
return x, y
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def _apply_bounds(ax: plt.Axes, x: np.ndarray, y: np.ndarray, padding: float) -> None:
|
|
38
|
-
x_min, x_max = float(np.min(x)), float(np.max(x))
|
|
39
|
-
y_min, y_max = float(np.min(y)), float(np.max(y))
|
|
40
|
-
pad_x = (x_max - x_min) * padding
|
|
41
|
-
pad_y = (y_max - y_min) * padding
|
|
42
|
-
if pad_x == 0:
|
|
43
|
-
pad_x = 1.0
|
|
44
|
-
if pad_y == 0:
|
|
45
|
-
pad_y = 1.0
|
|
46
|
-
ax.set_xlim(x_min - pad_x, x_max + pad_x)
|
|
47
|
-
ax.set_ylim(y_min - pad_y, y_max + pad_y)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def _resolve_basemap(source):
|
|
51
|
-
if cx is None or source is None:
|
|
52
|
-
return source
|
|
53
|
-
if isinstance(source, str):
|
|
54
|
-
provider = cx.providers
|
|
55
|
-
for part in source.split("."):
|
|
56
|
-
if isinstance(provider, dict):
|
|
57
|
-
provider = provider[part]
|
|
58
|
-
else:
|
|
59
|
-
provider = getattr(provider, part)
|
|
60
|
-
return provider
|
|
61
|
-
return source
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _sanitize_geo(
|
|
65
|
-
df: pd.DataFrame,
|
|
66
|
-
x_col: str,
|
|
67
|
-
y_col: str,
|
|
68
|
-
value_col: str,
|
|
69
|
-
weight_col: Optional[str] = None,
|
|
70
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
|
|
71
|
-
x = pd.to_numeric(df[x_col], errors="coerce").to_numpy(dtype=float)
|
|
72
|
-
y = pd.to_numeric(df[y_col], errors="coerce").to_numpy(dtype=float)
|
|
73
|
-
z = pd.to_numeric(df[value_col], errors="coerce").to_numpy(dtype=float)
|
|
74
|
-
w = None
|
|
75
|
-
if weight_col:
|
|
76
|
-
w = pd.to_numeric(df[weight_col], errors="coerce").to_numpy(dtype=float)
|
|
77
|
-
|
|
78
|
-
if w is None:
|
|
79
|
-
mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z)
|
|
80
|
-
else:
|
|
81
|
-
mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z) & np.isfinite(w)
|
|
82
|
-
w = w[mask]
|
|
83
|
-
return x[mask], y[mask], z[mask], w
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def _downsample_points(
|
|
87
|
-
x: np.ndarray,
|
|
88
|
-
y: np.ndarray,
|
|
89
|
-
z: np.ndarray,
|
|
90
|
-
w: Optional[np.ndarray],
|
|
91
|
-
max_points: Optional[int],
|
|
92
|
-
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
|
|
93
|
-
if max_points is None:
|
|
94
|
-
return x, y, z, w
|
|
95
|
-
max_points = int(max_points)
|
|
96
|
-
if max_points <= 0 or len(x) <= max_points:
|
|
97
|
-
return x, y, z, w
|
|
98
|
-
rng = np.random.default_rng(13)
|
|
99
|
-
idx = rng.choice(len(x), size=max_points, replace=False)
|
|
100
|
-
if w is None:
|
|
101
|
-
return x[idx], y[idx], z[idx], None
|
|
102
|
-
return x[idx], y[idx], z[idx], w[idx]
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def plot_geo_heatmap(
|
|
106
|
-
df: pd.DataFrame,
|
|
107
|
-
*,
|
|
108
|
-
x_col: str,
|
|
109
|
-
y_col: str,
|
|
110
|
-
value_col: str,
|
|
111
|
-
weight_col: Optional[str] = None,
|
|
112
|
-
bins: int | Tuple[int, int] = 50,
|
|
113
|
-
agg: str = "mean",
|
|
114
|
-
cmap: str = "YlOrRd",
|
|
115
|
-
title: str = "Geo Heatmap",
|
|
116
|
-
ax: Optional[plt.Axes] = None,
|
|
117
|
-
show: bool = False,
|
|
118
|
-
save_path: Optional[str] = None,
|
|
119
|
-
style: Optional[PlotStyle] = None,
|
|
120
|
-
) -> plt.Figure:
|
|
121
|
-
style = style or PlotStyle()
|
|
122
|
-
if agg not in {"mean", "sum"}:
|
|
123
|
-
raise ValueError("agg must be 'mean' or 'sum'.")
|
|
124
|
-
x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
|
|
125
|
-
|
|
126
|
-
if isinstance(bins, int):
|
|
127
|
-
bins = (bins, bins)
|
|
128
|
-
|
|
129
|
-
if w is None:
|
|
130
|
-
sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
|
|
131
|
-
if agg == "sum":
|
|
132
|
-
grid = sum_z
|
|
133
|
-
else:
|
|
134
|
-
count, _, _ = np.histogram2d(x, y, bins=bins)
|
|
135
|
-
grid = sum_z / np.maximum(count, 1.0)
|
|
136
|
-
else:
|
|
137
|
-
sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
|
|
138
|
-
sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
|
|
139
|
-
grid = sum_zw / np.maximum(sum_w, EPS)
|
|
140
|
-
|
|
141
|
-
created_fig = ax is None
|
|
142
|
-
if created_fig:
|
|
143
|
-
fig, ax = plt.subplots(figsize=style.figsize)
|
|
144
|
-
else:
|
|
145
|
-
fig = ax.figure
|
|
146
|
-
|
|
147
|
-
im = ax.imshow(
|
|
148
|
-
grid.T,
|
|
149
|
-
origin="lower",
|
|
150
|
-
extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
|
|
151
|
-
aspect="auto",
|
|
152
|
-
cmap=cmap,
|
|
153
|
-
)
|
|
154
|
-
cbar = fig.colorbar(im, ax=ax)
|
|
155
|
-
cbar.set_label(value_col, fontsize=style.label_size)
|
|
156
|
-
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
157
|
-
|
|
158
|
-
ax.set_xlabel(x_col, fontsize=style.label_size)
|
|
159
|
-
ax.set_ylabel(y_col, fontsize=style.label_size)
|
|
160
|
-
ax.set_title(title, fontsize=style.title_size)
|
|
161
|
-
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
162
|
-
|
|
163
|
-
if created_fig:
|
|
164
|
-
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
165
|
-
|
|
166
|
-
return fig
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
def plot_geo_contour(
|
|
170
|
-
df: pd.DataFrame,
|
|
171
|
-
*,
|
|
172
|
-
x_col: str,
|
|
173
|
-
y_col: str,
|
|
174
|
-
value_col: str,
|
|
175
|
-
weight_col: Optional[str] = None,
|
|
176
|
-
max_points: Optional[int] = None,
|
|
177
|
-
levels: int | Sequence[float] = 10,
|
|
178
|
-
cmap: str = "viridis",
|
|
179
|
-
title: str = "Geo Contour",
|
|
180
|
-
ax: Optional[plt.Axes] = None,
|
|
181
|
-
show_points: bool = False,
|
|
182
|
-
show: bool = False,
|
|
183
|
-
save_path: Optional[str] = None,
|
|
184
|
-
style: Optional[PlotStyle] = None,
|
|
185
|
-
) -> plt.Figure:
|
|
186
|
-
style = style or PlotStyle()
|
|
187
|
-
x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
|
|
188
|
-
x, y, z, w = _downsample_points(x, y, z, w, max_points)
|
|
189
|
-
|
|
190
|
-
if w is not None:
|
|
191
|
-
z = z * w
|
|
192
|
-
|
|
193
|
-
triang = mtri.Triangulation(x, y)
|
|
194
|
-
|
|
195
|
-
created_fig = ax is None
|
|
196
|
-
if created_fig:
|
|
197
|
-
fig, ax = plt.subplots(figsize=style.figsize)
|
|
198
|
-
else:
|
|
199
|
-
fig = ax.figure
|
|
200
|
-
|
|
201
|
-
contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap)
|
|
202
|
-
if show_points:
|
|
203
|
-
ax.scatter(x, y, s=6, c="k", alpha=0.2)
|
|
204
|
-
cbar = fig.colorbar(contour, ax=ax)
|
|
205
|
-
cbar.set_label(value_col, fontsize=style.label_size)
|
|
206
|
-
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
207
|
-
|
|
208
|
-
ax.set_xlabel(x_col, fontsize=style.label_size)
|
|
209
|
-
ax.set_ylabel(y_col, fontsize=style.label_size)
|
|
210
|
-
ax.set_title(title, fontsize=style.title_size)
|
|
211
|
-
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
212
|
-
|
|
213
|
-
if created_fig:
|
|
214
|
-
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
215
|
-
|
|
216
|
-
return fig
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def plot_geo_heatmap_on_map(
|
|
220
|
-
df: pd.DataFrame,
|
|
221
|
-
*,
|
|
222
|
-
lon_col: str,
|
|
223
|
-
lat_col: str,
|
|
224
|
-
value_col: str,
|
|
225
|
-
weight_col: Optional[str] = None,
|
|
226
|
-
bins: int | Tuple[int, int] = 100,
|
|
227
|
-
agg: str = "mean",
|
|
228
|
-
cmap: str = "YlOrRd",
|
|
229
|
-
alpha: float = 0.6,
|
|
230
|
-
basemap: Optional[object] = "CartoDB.Positron",
|
|
231
|
-
zoom: Optional[int] = None,
|
|
232
|
-
padding: float = 0.05,
|
|
233
|
-
title: str = "Geo Heatmap (Map)",
|
|
234
|
-
ax: Optional[plt.Axes] = None,
|
|
235
|
-
show_points: bool = False,
|
|
236
|
-
show: bool = False,
|
|
237
|
-
save_path: Optional[str] = None,
|
|
238
|
-
style: Optional[PlotStyle] = None,
|
|
239
|
-
) -> plt.Figure:
|
|
240
|
-
_require_contextily("plot_geo_heatmap_on_map")
|
|
241
|
-
style = style or PlotStyle()
|
|
242
|
-
if agg not in {"mean", "sum"}:
|
|
243
|
-
raise ValueError("agg must be 'mean' or 'sum'.")
|
|
244
|
-
lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
|
|
245
|
-
x, y = _lonlat_to_mercator(lon, lat)
|
|
246
|
-
|
|
247
|
-
if isinstance(bins, int):
|
|
248
|
-
bins = (bins, bins)
|
|
249
|
-
|
|
250
|
-
if w is None:
|
|
251
|
-
sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
|
|
252
|
-
if agg == "sum":
|
|
253
|
-
grid = sum_z
|
|
254
|
-
else:
|
|
255
|
-
count, _, _ = np.histogram2d(x, y, bins=bins)
|
|
256
|
-
grid = sum_z / np.maximum(count, 1.0)
|
|
257
|
-
else:
|
|
258
|
-
sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
|
|
259
|
-
sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
|
|
260
|
-
grid = sum_zw / np.maximum(sum_w, EPS)
|
|
261
|
-
|
|
262
|
-
created_fig = ax is None
|
|
263
|
-
if created_fig:
|
|
264
|
-
fig, ax = plt.subplots(figsize=style.figsize)
|
|
265
|
-
else:
|
|
266
|
-
fig = ax.figure
|
|
267
|
-
|
|
268
|
-
_apply_bounds(ax, x, y, padding)
|
|
269
|
-
ax.set_aspect("equal", adjustable="box")
|
|
270
|
-
|
|
271
|
-
source = _resolve_basemap(basemap)
|
|
272
|
-
if source is not None:
|
|
273
|
-
if zoom is None:
|
|
274
|
-
cx.add_basemap(ax, source=source, crs="EPSG:3857")
|
|
275
|
-
else:
|
|
276
|
-
cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
|
|
277
|
-
|
|
278
|
-
im = ax.imshow(
|
|
279
|
-
grid.T,
|
|
280
|
-
origin="lower",
|
|
281
|
-
extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
|
|
282
|
-
aspect="auto",
|
|
283
|
-
cmap=cmap,
|
|
284
|
-
alpha=alpha,
|
|
285
|
-
)
|
|
286
|
-
if show_points:
|
|
287
|
-
ax.scatter(x, y, s=6, c="k", alpha=0.25)
|
|
288
|
-
|
|
289
|
-
cbar = fig.colorbar(im, ax=ax)
|
|
290
|
-
cbar.set_label(value_col, fontsize=style.label_size)
|
|
291
|
-
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
292
|
-
|
|
293
|
-
ax.set_title(title, fontsize=style.title_size)
|
|
294
|
-
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
295
|
-
|
|
296
|
-
if created_fig:
|
|
297
|
-
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
298
|
-
|
|
299
|
-
return fig
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
def plot_geo_contour_on_map(
|
|
303
|
-
df: pd.DataFrame,
|
|
304
|
-
*,
|
|
305
|
-
lon_col: str,
|
|
306
|
-
lat_col: str,
|
|
307
|
-
value_col: str,
|
|
308
|
-
weight_col: Optional[str] = None,
|
|
309
|
-
max_points: Optional[int] = None,
|
|
310
|
-
levels: int | Sequence[float] = 10,
|
|
311
|
-
cmap: str = "viridis",
|
|
312
|
-
alpha: float = 0.6,
|
|
313
|
-
basemap: Optional[object] = "CartoDB.Positron",
|
|
314
|
-
zoom: Optional[int] = None,
|
|
315
|
-
padding: float = 0.05,
|
|
316
|
-
title: str = "Geo Contour (Map)",
|
|
317
|
-
ax: Optional[plt.Axes] = None,
|
|
318
|
-
show_points: bool = False,
|
|
319
|
-
show: bool = False,
|
|
320
|
-
save_path: Optional[str] = None,
|
|
321
|
-
style: Optional[PlotStyle] = None,
|
|
322
|
-
) -> plt.Figure:
|
|
323
|
-
_require_contextily("plot_geo_contour_on_map")
|
|
324
|
-
style = style or PlotStyle()
|
|
325
|
-
lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
|
|
326
|
-
lon, lat, z, w = _downsample_points(lon, lat, z, w, max_points)
|
|
327
|
-
x, y = _lonlat_to_mercator(lon, lat)
|
|
328
|
-
if w is not None:
|
|
329
|
-
z = z * w
|
|
330
|
-
|
|
331
|
-
created_fig = ax is None
|
|
332
|
-
if created_fig:
|
|
333
|
-
fig, ax = plt.subplots(figsize=style.figsize)
|
|
334
|
-
else:
|
|
335
|
-
fig = ax.figure
|
|
336
|
-
|
|
337
|
-
_apply_bounds(ax, x, y, padding)
|
|
338
|
-
ax.set_aspect("equal", adjustable="box")
|
|
339
|
-
|
|
340
|
-
source = _resolve_basemap(basemap)
|
|
341
|
-
if source is not None:
|
|
342
|
-
if zoom is None:
|
|
343
|
-
cx.add_basemap(ax, source=source, crs="EPSG:3857")
|
|
344
|
-
else:
|
|
345
|
-
cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
|
|
346
|
-
|
|
347
|
-
triang = mtri.Triangulation(x, y)
|
|
348
|
-
contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap, alpha=alpha)
|
|
349
|
-
if show_points:
|
|
350
|
-
ax.scatter(x, y, s=6, c="k", alpha=0.25)
|
|
351
|
-
|
|
352
|
-
cbar = fig.colorbar(contour, ax=ax)
|
|
353
|
-
cbar.set_label(value_col, fontsize=style.label_size)
|
|
354
|
-
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
355
|
-
|
|
356
|
-
ax.set_title(title, fontsize=style.title_size)
|
|
357
|
-
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
358
|
-
|
|
359
|
-
if created_fig:
|
|
360
|
-
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
361
|
-
|
|
362
|
-
return fig
|
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import Mapping, Optional, Sequence, Tuple
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
7
|
-
|
|
8
|
-
from .common import PlotStyle, finalize_figure, plt
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def _to_series(
|
|
12
|
-
importance: Mapping[str, float]
|
|
13
|
-
| Sequence[Tuple[str, float]]
|
|
14
|
-
| pd.Series
|
|
15
|
-
| np.ndarray,
|
|
16
|
-
feature_names: Optional[Sequence[str]] = None,
|
|
17
|
-
) -> pd.Series:
|
|
18
|
-
if isinstance(importance, pd.Series):
|
|
19
|
-
return importance.copy()
|
|
20
|
-
if isinstance(importance, Mapping):
|
|
21
|
-
return pd.Series(dict(importance))
|
|
22
|
-
if isinstance(importance, np.ndarray):
|
|
23
|
-
if feature_names is None:
|
|
24
|
-
raise ValueError("feature_names is required when importance is an array.")
|
|
25
|
-
return pd.Series(importance, index=list(feature_names))
|
|
26
|
-
return pd.Series(dict(importance))
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def shap_importance(
|
|
30
|
-
shap_values: np.ndarray,
|
|
31
|
-
feature_names: Sequence[str],
|
|
32
|
-
) -> pd.Series:
|
|
33
|
-
if shap_values.ndim == 3:
|
|
34
|
-
shap_values = shap_values[0]
|
|
35
|
-
if shap_values.ndim != 2:
|
|
36
|
-
raise ValueError("shap_values should be 2d (n_samples, n_features).")
|
|
37
|
-
scores = np.abs(shap_values).mean(axis=0)
|
|
38
|
-
return pd.Series(scores, index=list(feature_names))
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def plot_feature_importance(
|
|
42
|
-
importance: Mapping[str, float]
|
|
43
|
-
| Sequence[Tuple[str, float]]
|
|
44
|
-
| pd.Series
|
|
45
|
-
| np.ndarray,
|
|
46
|
-
*,
|
|
47
|
-
feature_names: Optional[Sequence[str]] = None,
|
|
48
|
-
top_n: int = 30,
|
|
49
|
-
title: str = "Feature Importance",
|
|
50
|
-
sort_by: str = "abs",
|
|
51
|
-
descending: bool = True,
|
|
52
|
-
show_values: bool = False,
|
|
53
|
-
ax: Optional[plt.Axes] = None,
|
|
54
|
-
show: bool = False,
|
|
55
|
-
save_path: Optional[str] = None,
|
|
56
|
-
style: Optional[PlotStyle] = None,
|
|
57
|
-
) -> plt.Figure:
|
|
58
|
-
style = style or PlotStyle()
|
|
59
|
-
series = _to_series(importance, feature_names=feature_names)
|
|
60
|
-
series = series.replace([np.inf, -np.inf], np.nan).dropna()
|
|
61
|
-
|
|
62
|
-
if sort_by not in {"abs", "value"}:
|
|
63
|
-
raise ValueError("sort_by must be 'abs' or 'value'.")
|
|
64
|
-
sort_key = series.abs() if sort_by == "abs" else series
|
|
65
|
-
series = series.loc[sort_key.sort_values(ascending=not descending).index]
|
|
66
|
-
|
|
67
|
-
if top_n > 0:
|
|
68
|
-
series = series.head(int(top_n))
|
|
69
|
-
|
|
70
|
-
created_fig = ax is None
|
|
71
|
-
if created_fig:
|
|
72
|
-
height = max(3.0, 0.3 * len(series))
|
|
73
|
-
fig, ax = plt.subplots(figsize=(style.figsize[0], height))
|
|
74
|
-
else:
|
|
75
|
-
fig = ax.figure
|
|
76
|
-
|
|
77
|
-
y_pos = np.arange(len(series))
|
|
78
|
-
ax.barh(y_pos, series.values, color=style.palette[0])
|
|
79
|
-
ax.set_yticks(y_pos)
|
|
80
|
-
ax.set_yticklabels(series.index, fontsize=style.tick_size)
|
|
81
|
-
ax.invert_yaxis()
|
|
82
|
-
ax.set_title(title, fontsize=style.title_size)
|
|
83
|
-
ax.tick_params(axis="x", labelsize=style.tick_size)
|
|
84
|
-
if style.grid:
|
|
85
|
-
ax.grid(True, axis="x", linestyle=style.grid_style, alpha=style.grid_alpha)
|
|
86
|
-
|
|
87
|
-
if show_values:
|
|
88
|
-
for idx, val in enumerate(series.values):
|
|
89
|
-
ax.text(val, idx, f" {val:.3f}", va="center", fontsize=style.tick_size)
|
|
90
|
-
|
|
91
|
-
if created_fig:
|
|
92
|
-
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
93
|
-
|
|
94
|
-
return fig
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def plot_shap_importance(
|
|
98
|
-
shap_values: np.ndarray,
|
|
99
|
-
feature_names: Sequence[str],
|
|
100
|
-
*,
|
|
101
|
-
top_n: int = 30,
|
|
102
|
-
title: str = "SHAP Importance",
|
|
103
|
-
show_values: bool = False,
|
|
104
|
-
ax: Optional[plt.Axes] = None,
|
|
105
|
-
show: bool = False,
|
|
106
|
-
save_path: Optional[str] = None,
|
|
107
|
-
style: Optional[PlotStyle] = None,
|
|
108
|
-
) -> plt.Figure:
|
|
109
|
-
series = shap_importance(shap_values, feature_names)
|
|
110
|
-
return plot_feature_importance(
|
|
111
|
-
series,
|
|
112
|
-
top_n=top_n,
|
|
113
|
-
title=title,
|
|
114
|
-
sort_by="abs",
|
|
115
|
-
descending=True,
|
|
116
|
-
show_values=show_values,
|
|
117
|
-
ax=ax,
|
|
118
|
-
show=show,
|
|
119
|
-
save_path=save_path,
|
|
120
|
-
style=style,
|
|
121
|
-
)
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
import sys
|
|
6
|
-
import tempfile
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import Optional, TextIO
|
|
10
|
-
|
|
11
|
-
_LOGGING_CONFIGURED = False
|
|
12
|
-
_LOG_PATH: Optional[Path] = None
|
|
13
|
-
_LOG_FILE: Optional[TextIO] = None
|
|
14
|
-
|
|
15
|
-
_TRUTHY = {"1", "true", "yes", "y", "on"}
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class _TeeStream:
|
|
19
|
-
def __init__(self, primary: TextIO, secondary: TextIO) -> None:
|
|
20
|
-
self._primary = primary
|
|
21
|
-
self._secondary = secondary
|
|
22
|
-
|
|
23
|
-
def write(self, data: str) -> int:
|
|
24
|
-
if not data:
|
|
25
|
-
return 0
|
|
26
|
-
try:
|
|
27
|
-
self._primary.write(data)
|
|
28
|
-
except Exception:
|
|
29
|
-
pass
|
|
30
|
-
try:
|
|
31
|
-
self._secondary.write(data)
|
|
32
|
-
except Exception:
|
|
33
|
-
pass
|
|
34
|
-
return len(data)
|
|
35
|
-
|
|
36
|
-
def flush(self) -> None:
|
|
37
|
-
for stream in (self._primary, self._secondary):
|
|
38
|
-
try:
|
|
39
|
-
stream.flush()
|
|
40
|
-
except Exception:
|
|
41
|
-
pass
|
|
42
|
-
|
|
43
|
-
def isatty(self) -> bool:
|
|
44
|
-
return bool(getattr(self._primary, "isatty", lambda: False)())
|
|
45
|
-
|
|
46
|
-
def fileno(self) -> int:
|
|
47
|
-
return self._primary.fileno()
|
|
48
|
-
|
|
49
|
-
def __getattr__(self, name: str):
|
|
50
|
-
return getattr(self._primary, name)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def _is_truthy(value: Optional[str]) -> bool:
|
|
54
|
-
return str(value).strip().lower() in _TRUTHY
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _resolve_log_dir(log_dir: Optional[str | Path]) -> Optional[Path]:
|
|
58
|
-
candidates: list[Path] = []
|
|
59
|
-
if log_dir:
|
|
60
|
-
candidates.append(Path(log_dir).expanduser())
|
|
61
|
-
env_dir = os.environ.get("INS_PRICING_LOG_DIR")
|
|
62
|
-
if env_dir:
|
|
63
|
-
candidates.append(Path(env_dir).expanduser())
|
|
64
|
-
candidates.append(Path.cwd() / "logs")
|
|
65
|
-
candidates.append(Path.home() / ".ins_pricing" / "logs")
|
|
66
|
-
candidates.append(Path(tempfile.gettempdir()) / "ins_pricing_logs")
|
|
67
|
-
|
|
68
|
-
for cand in candidates:
|
|
69
|
-
try:
|
|
70
|
-
cand.mkdir(parents=True, exist_ok=True)
|
|
71
|
-
return cand
|
|
72
|
-
except Exception:
|
|
73
|
-
continue
|
|
74
|
-
return None
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _build_log_filename(prefix: str) -> str:
|
|
78
|
-
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
79
|
-
rank = (
|
|
80
|
-
os.environ.get("RANK")
|
|
81
|
-
or os.environ.get("LOCAL_RANK")
|
|
82
|
-
or os.environ.get("SLURM_PROCID")
|
|
83
|
-
)
|
|
84
|
-
suffix = f"r{rank}" if rank is not None else f"pid{os.getpid()}"
|
|
85
|
-
safe_prefix = "".join(
|
|
86
|
-
ch if ch.isalnum() or ch in "-_." else "_" for ch in prefix)
|
|
87
|
-
return f"{safe_prefix}_{ts}_{suffix}.log"
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def configure_run_logging(
|
|
91
|
-
*,
|
|
92
|
-
prefix: str = "ins_pricing",
|
|
93
|
-
log_dir: Optional[str | Path] = None,
|
|
94
|
-
level: int = logging.INFO,
|
|
95
|
-
announce: bool = True,
|
|
96
|
-
) -> Optional[Path]:
|
|
97
|
-
global _LOGGING_CONFIGURED, _LOG_PATH, _LOG_FILE
|
|
98
|
-
|
|
99
|
-
if _LOGGING_CONFIGURED:
|
|
100
|
-
return _LOG_PATH
|
|
101
|
-
if _is_truthy(os.environ.get("INS_PRICING_LOG_DISABLE")):
|
|
102
|
-
return None
|
|
103
|
-
|
|
104
|
-
resolved_dir = _resolve_log_dir(log_dir)
|
|
105
|
-
if resolved_dir is None:
|
|
106
|
-
return None
|
|
107
|
-
|
|
108
|
-
log_path = resolved_dir / _build_log_filename(prefix)
|
|
109
|
-
try:
|
|
110
|
-
log_file = log_path.open("a", encoding="utf-8")
|
|
111
|
-
except Exception:
|
|
112
|
-
return None
|
|
113
|
-
|
|
114
|
-
sys.stdout = _TeeStream(sys.stdout, log_file) # type: ignore[assignment]
|
|
115
|
-
sys.stderr = _TeeStream(sys.stderr, log_file) # type: ignore[assignment]
|
|
116
|
-
_LOG_FILE = log_file
|
|
117
|
-
_LOG_PATH = log_path
|
|
118
|
-
_LOGGING_CONFIGURED = True
|
|
119
|
-
|
|
120
|
-
root = logging.getLogger()
|
|
121
|
-
if not root.handlers:
|
|
122
|
-
logging.basicConfig(
|
|
123
|
-
level=level,
|
|
124
|
-
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
125
|
-
handlers=[logging.StreamHandler(sys.stdout)],
|
|
126
|
-
)
|
|
127
|
-
else:
|
|
128
|
-
root.setLevel(level)
|
|
129
|
-
|
|
130
|
-
if announce:
|
|
131
|
-
print(f"[ins_pricing] log saved to {log_path}", flush=True)
|
|
132
|
-
|
|
133
|
-
return log_path
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
import types
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
import pytest
|
|
6
|
-
|
|
7
|
-
pytest.importorskip("torch")
|
|
8
|
-
pytest.importorskip("optuna")
|
|
9
|
-
pytest.importorskip("statsmodels")
|
|
10
|
-
pytest.importorskip("xgboost")
|
|
11
|
-
|
|
12
|
-
from ins_pricing.bayesopt.trainers import TrainerBase
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class DummyTrainer(TrainerBase):
|
|
16
|
-
def __init__(self):
|
|
17
|
-
ctx = types.SimpleNamespace(prop_test=0.2, rand_seed=123)
|
|
18
|
-
super().__init__(ctx, "Dummy", "Dummy")
|
|
19
|
-
|
|
20
|
-
def train(self) -> None: # pragma: no cover - not used
|
|
21
|
-
raise NotImplementedError
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def test_cross_val_generic_iterates_all_splits():
|
|
25
|
-
trainer = DummyTrainer()
|
|
26
|
-
|
|
27
|
-
X = pd.DataFrame({"x": np.arange(12, dtype=float)})
|
|
28
|
-
y = pd.Series(np.arange(12, dtype=float))
|
|
29
|
-
w = pd.Series(np.ones(12, dtype=float))
|
|
30
|
-
|
|
31
|
-
def data_provider():
|
|
32
|
-
return X, y, w
|
|
33
|
-
|
|
34
|
-
class DummyModel:
|
|
35
|
-
def fit(self, X_train, y_train, sample_weight=None):
|
|
36
|
-
return self
|
|
37
|
-
|
|
38
|
-
def predict(self, X_val):
|
|
39
|
-
return np.zeros(len(X_val))
|
|
40
|
-
|
|
41
|
-
def model_builder(_params):
|
|
42
|
-
return DummyModel()
|
|
43
|
-
|
|
44
|
-
calls = []
|
|
45
|
-
|
|
46
|
-
def metric_fn(y_true, y_pred, weight):
|
|
47
|
-
calls.append(len(y_true))
|
|
48
|
-
return float(np.mean(y_pred))
|
|
49
|
-
|
|
50
|
-
splits = [
|
|
51
|
-
(np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([10, 11])),
|
|
52
|
-
(np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), np.array([0, 1])),
|
|
53
|
-
(np.array([0, 2, 4, 6, 8, 10]), np.array([1, 3, 5, 7, 9, 11])),
|
|
54
|
-
]
|
|
55
|
-
|
|
56
|
-
result = trainer.cross_val_generic(
|
|
57
|
-
trial=object(),
|
|
58
|
-
hyperparameter_space={"p": lambda _t: 1.0},
|
|
59
|
-
data_provider=data_provider,
|
|
60
|
-
model_builder=model_builder,
|
|
61
|
-
metric_fn=metric_fn,
|
|
62
|
-
splitter=splits,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
assert result == 0.0
|
|
66
|
-
assert len(calls) == len(splits)
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
pytest.importorskip("torch")
|
|
4
|
-
|
|
5
|
-
from ins_pricing.bayesopt.utils import DistributedUtils
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def test_setup_ddp_without_env(monkeypatch):
|
|
9
|
-
monkeypatch.delenv("RANK", raising=False)
|
|
10
|
-
monkeypatch.delenv("WORLD_SIZE", raising=False)
|
|
11
|
-
monkeypatch.delenv("LOCAL_RANK", raising=False)
|
|
12
|
-
|
|
13
|
-
ok, local_rank, rank, world_size = DistributedUtils.setup_ddp()
|
|
14
|
-
|
|
15
|
-
assert ok is False
|
|
16
|
-
assert local_rank == 0
|
|
17
|
-
assert rank == 0
|
|
18
|
-
assert world_size == 1
|