ins-pricing 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +60 -0
- ins_pricing/__init__.py +102 -0
- ins_pricing/governance/README.md +18 -0
- ins_pricing/governance/__init__.py +20 -0
- ins_pricing/governance/approval.py +93 -0
- ins_pricing/governance/audit.py +37 -0
- ins_pricing/governance/registry.py +99 -0
- ins_pricing/governance/release.py +159 -0
- ins_pricing/modelling/BayesOpt.py +146 -0
- ins_pricing/modelling/BayesOpt_USAGE.md +925 -0
- ins_pricing/modelling/BayesOpt_entry.py +575 -0
- ins_pricing/modelling/BayesOpt_incremental.py +731 -0
- ins_pricing/modelling/Explain_Run.py +36 -0
- ins_pricing/modelling/Explain_entry.py +539 -0
- ins_pricing/modelling/Pricing_Run.py +36 -0
- ins_pricing/modelling/README.md +33 -0
- ins_pricing/modelling/__init__.py +44 -0
- ins_pricing/modelling/bayesopt/__init__.py +98 -0
- ins_pricing/modelling/bayesopt/config_preprocess.py +303 -0
- ins_pricing/modelling/bayesopt/core.py +1476 -0
- ins_pricing/modelling/bayesopt/models.py +2196 -0
- ins_pricing/modelling/bayesopt/trainers.py +2446 -0
- ins_pricing/modelling/bayesopt/utils.py +1021 -0
- ins_pricing/modelling/cli_common.py +136 -0
- ins_pricing/modelling/explain/__init__.py +55 -0
- ins_pricing/modelling/explain/gradients.py +334 -0
- ins_pricing/modelling/explain/metrics.py +176 -0
- ins_pricing/modelling/explain/permutation.py +155 -0
- ins_pricing/modelling/explain/shap_utils.py +146 -0
- ins_pricing/modelling/notebook_utils.py +284 -0
- ins_pricing/modelling/plotting/__init__.py +45 -0
- ins_pricing/modelling/plotting/common.py +63 -0
- ins_pricing/modelling/plotting/curves.py +572 -0
- ins_pricing/modelling/plotting/diagnostics.py +139 -0
- ins_pricing/modelling/plotting/geo.py +362 -0
- ins_pricing/modelling/plotting/importance.py +121 -0
- ins_pricing/modelling/run_logging.py +133 -0
- ins_pricing/modelling/tests/conftest.py +8 -0
- ins_pricing/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing/modelling/tests/test_explain.py +56 -0
- ins_pricing/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing/modelling/tests/test_plotting.py +63 -0
- ins_pricing/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing/modelling/watchdog_run.py +211 -0
- ins_pricing/pricing/README.md +44 -0
- ins_pricing/pricing/__init__.py +27 -0
- ins_pricing/pricing/calibration.py +39 -0
- ins_pricing/pricing/data_quality.py +117 -0
- ins_pricing/pricing/exposure.py +85 -0
- ins_pricing/pricing/factors.py +91 -0
- ins_pricing/pricing/monitoring.py +99 -0
- ins_pricing/pricing/rate_table.py +78 -0
- ins_pricing/production/__init__.py +21 -0
- ins_pricing/production/drift.py +30 -0
- ins_pricing/production/monitoring.py +143 -0
- ins_pricing/production/scoring.py +40 -0
- ins_pricing/reporting/README.md +20 -0
- ins_pricing/reporting/__init__.py +11 -0
- ins_pricing/reporting/report_builder.py +72 -0
- ins_pricing/reporting/scheduler.py +45 -0
- ins_pricing/setup.py +41 -0
- ins_pricing v2/__init__.py +23 -0
- ins_pricing v2/governance/__init__.py +20 -0
- ins_pricing v2/governance/approval.py +93 -0
- ins_pricing v2/governance/audit.py +37 -0
- ins_pricing v2/governance/registry.py +99 -0
- ins_pricing v2/governance/release.py +159 -0
- ins_pricing v2/modelling/Explain_Run.py +36 -0
- ins_pricing v2/modelling/Pricing_Run.py +36 -0
- ins_pricing v2/modelling/__init__.py +151 -0
- ins_pricing v2/modelling/cli_common.py +141 -0
- ins_pricing v2/modelling/config.py +249 -0
- ins_pricing v2/modelling/config_preprocess.py +254 -0
- ins_pricing v2/modelling/core.py +741 -0
- ins_pricing v2/modelling/data_container.py +42 -0
- ins_pricing v2/modelling/explain/__init__.py +55 -0
- ins_pricing v2/modelling/explain/gradients.py +334 -0
- ins_pricing v2/modelling/explain/metrics.py +176 -0
- ins_pricing v2/modelling/explain/permutation.py +155 -0
- ins_pricing v2/modelling/explain/shap_utils.py +146 -0
- ins_pricing v2/modelling/features.py +215 -0
- ins_pricing v2/modelling/model_manager.py +148 -0
- ins_pricing v2/modelling/model_plotting.py +463 -0
- ins_pricing v2/modelling/models.py +2203 -0
- ins_pricing v2/modelling/notebook_utils.py +294 -0
- ins_pricing v2/modelling/plotting/__init__.py +45 -0
- ins_pricing v2/modelling/plotting/common.py +63 -0
- ins_pricing v2/modelling/plotting/curves.py +572 -0
- ins_pricing v2/modelling/plotting/diagnostics.py +139 -0
- ins_pricing v2/modelling/plotting/geo.py +362 -0
- ins_pricing v2/modelling/plotting/importance.py +121 -0
- ins_pricing v2/modelling/run_logging.py +133 -0
- ins_pricing v2/modelling/tests/conftest.py +8 -0
- ins_pricing v2/modelling/tests/test_cross_val_generic.py +66 -0
- ins_pricing v2/modelling/tests/test_distributed_utils.py +18 -0
- ins_pricing v2/modelling/tests/test_explain.py +56 -0
- ins_pricing v2/modelling/tests/test_geo_tokens_split.py +49 -0
- ins_pricing v2/modelling/tests/test_graph_cache.py +33 -0
- ins_pricing v2/modelling/tests/test_plotting.py +63 -0
- ins_pricing v2/modelling/tests/test_plotting_library.py +150 -0
- ins_pricing v2/modelling/tests/test_preprocessor.py +48 -0
- ins_pricing v2/modelling/trainers.py +2447 -0
- ins_pricing v2/modelling/utils.py +1020 -0
- ins_pricing v2/modelling/watchdog_run.py +211 -0
- ins_pricing v2/pricing/__init__.py +27 -0
- ins_pricing v2/pricing/calibration.py +39 -0
- ins_pricing v2/pricing/data_quality.py +117 -0
- ins_pricing v2/pricing/exposure.py +85 -0
- ins_pricing v2/pricing/factors.py +91 -0
- ins_pricing v2/pricing/monitoring.py +99 -0
- ins_pricing v2/pricing/rate_table.py +78 -0
- ins_pricing v2/production/__init__.py +21 -0
- ins_pricing v2/production/drift.py +30 -0
- ins_pricing v2/production/monitoring.py +143 -0
- ins_pricing v2/production/scoring.py +40 -0
- ins_pricing v2/reporting/__init__.py +11 -0
- ins_pricing v2/reporting/report_builder.py +72 -0
- ins_pricing v2/reporting/scheduler.py +45 -0
- ins_pricing v2/scripts/BayesOpt_incremental.py +722 -0
- ins_pricing v2/scripts/Explain_entry.py +545 -0
- ins_pricing v2/scripts/__init__.py +1 -0
- ins_pricing v2/scripts/train.py +568 -0
- ins_pricing v2/setup.py +55 -0
- ins_pricing v2/smoke_test.py +28 -0
- ins_pricing-0.1.6.dist-info/METADATA +78 -0
- ins_pricing-0.1.6.dist-info/RECORD +169 -0
- ins_pricing-0.1.6.dist-info/WHEEL +5 -0
- ins_pricing-0.1.6.dist-info/top_level.txt +4 -0
- user_packages/__init__.py +105 -0
- user_packages legacy/BayesOpt.py +5659 -0
- user_packages legacy/BayesOpt_entry.py +513 -0
- user_packages legacy/BayesOpt_incremental.py +685 -0
- user_packages legacy/Pricing_Run.py +36 -0
- user_packages legacy/Try/BayesOpt Legacy251213.py +3719 -0
- user_packages legacy/Try/BayesOpt Legacy251215.py +3758 -0
- user_packages legacy/Try/BayesOpt lagecy251201.py +3506 -0
- user_packages legacy/Try/BayesOpt lagecy251218.py +3992 -0
- user_packages legacy/Try/BayesOpt legacy.py +3280 -0
- user_packages legacy/Try/BayesOpt.py +838 -0
- user_packages legacy/Try/BayesOptAll.py +1569 -0
- user_packages legacy/Try/BayesOptAllPlatform.py +909 -0
- user_packages legacy/Try/BayesOptCPUGPU.py +1877 -0
- user_packages legacy/Try/BayesOptSearch.py +830 -0
- user_packages legacy/Try/BayesOptSearchOrigin.py +829 -0
- user_packages legacy/Try/BayesOptV1.py +1911 -0
- user_packages legacy/Try/BayesOptV10.py +2973 -0
- user_packages legacy/Try/BayesOptV11.py +3001 -0
- user_packages legacy/Try/BayesOptV12.py +3001 -0
- user_packages legacy/Try/BayesOptV2.py +2065 -0
- user_packages legacy/Try/BayesOptV3.py +2209 -0
- user_packages legacy/Try/BayesOptV4.py +2342 -0
- user_packages legacy/Try/BayesOptV5.py +2372 -0
- user_packages legacy/Try/BayesOptV6.py +2759 -0
- user_packages legacy/Try/BayesOptV7.py +2832 -0
- user_packages legacy/Try/BayesOptV8Codex.py +2731 -0
- user_packages legacy/Try/BayesOptV8Gemini.py +2614 -0
- user_packages legacy/Try/BayesOptV9.py +2927 -0
- user_packages legacy/Try/BayesOpt_entry legacy.py +313 -0
- user_packages legacy/Try/ModelBayesOptSearch.py +359 -0
- user_packages legacy/Try/ResNetBayesOptSearch.py +249 -0
- user_packages legacy/Try/XgbBayesOptSearch.py +121 -0
- user_packages legacy/Try/xgbbayesopt.py +523 -0
- user_packages legacy/__init__.py +19 -0
- user_packages legacy/cli_common.py +124 -0
- user_packages legacy/notebook_utils.py +228 -0
- user_packages legacy/watchdog_run.py +202 -0
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Sequence, Tuple
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import matplotlib.tri as mtri
|
|
8
|
+
|
|
9
|
+
from .common import EPS, PlotStyle, finalize_figure, plt
|
|
10
|
+
|
|
11
|
+
try: # optional map basemap support
|
|
12
|
+
import contextily as cx
|
|
13
|
+
except Exception: # pragma: no cover - optional dependency
|
|
14
|
+
cx = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
_MERCATOR_MAX_LAT = 85.05112878
|
|
18
|
+
_MERCATOR_FACTOR = 20037508.34
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _require_contextily(func_name: str) -> None:
|
|
22
|
+
if cx is None:
|
|
23
|
+
raise RuntimeError(
|
|
24
|
+
f"{func_name} requires contextily. Install it via 'pip install contextily'."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _lonlat_to_mercator(lon: np.ndarray, lat: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
29
|
+
lon = np.asarray(lon, dtype=float)
|
|
30
|
+
lat = np.asarray(lat, dtype=float)
|
|
31
|
+
lat = np.clip(lat, -_MERCATOR_MAX_LAT, _MERCATOR_MAX_LAT)
|
|
32
|
+
x = lon * _MERCATOR_FACTOR / 180.0
|
|
33
|
+
y = np.log(np.tan((90.0 + lat) * np.pi / 360.0)) * _MERCATOR_FACTOR / np.pi
|
|
34
|
+
return x, y
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _apply_bounds(ax: plt.Axes, x: np.ndarray, y: np.ndarray, padding: float) -> None:
|
|
38
|
+
x_min, x_max = float(np.min(x)), float(np.max(x))
|
|
39
|
+
y_min, y_max = float(np.min(y)), float(np.max(y))
|
|
40
|
+
pad_x = (x_max - x_min) * padding
|
|
41
|
+
pad_y = (y_max - y_min) * padding
|
|
42
|
+
if pad_x == 0:
|
|
43
|
+
pad_x = 1.0
|
|
44
|
+
if pad_y == 0:
|
|
45
|
+
pad_y = 1.0
|
|
46
|
+
ax.set_xlim(x_min - pad_x, x_max + pad_x)
|
|
47
|
+
ax.set_ylim(y_min - pad_y, y_max + pad_y)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _resolve_basemap(source):
|
|
51
|
+
if cx is None or source is None:
|
|
52
|
+
return source
|
|
53
|
+
if isinstance(source, str):
|
|
54
|
+
provider = cx.providers
|
|
55
|
+
for part in source.split("."):
|
|
56
|
+
if isinstance(provider, dict):
|
|
57
|
+
provider = provider[part]
|
|
58
|
+
else:
|
|
59
|
+
provider = getattr(provider, part)
|
|
60
|
+
return provider
|
|
61
|
+
return source
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _sanitize_geo(
|
|
65
|
+
df: pd.DataFrame,
|
|
66
|
+
x_col: str,
|
|
67
|
+
y_col: str,
|
|
68
|
+
value_col: str,
|
|
69
|
+
weight_col: Optional[str] = None,
|
|
70
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
|
|
71
|
+
x = pd.to_numeric(df[x_col], errors="coerce").to_numpy(dtype=float)
|
|
72
|
+
y = pd.to_numeric(df[y_col], errors="coerce").to_numpy(dtype=float)
|
|
73
|
+
z = pd.to_numeric(df[value_col], errors="coerce").to_numpy(dtype=float)
|
|
74
|
+
w = None
|
|
75
|
+
if weight_col:
|
|
76
|
+
w = pd.to_numeric(df[weight_col], errors="coerce").to_numpy(dtype=float)
|
|
77
|
+
|
|
78
|
+
if w is None:
|
|
79
|
+
mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z)
|
|
80
|
+
else:
|
|
81
|
+
mask = np.isfinite(x) & np.isfinite(y) & np.isfinite(z) & np.isfinite(w)
|
|
82
|
+
w = w[mask]
|
|
83
|
+
return x[mask], y[mask], z[mask], w
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _downsample_points(
|
|
87
|
+
x: np.ndarray,
|
|
88
|
+
y: np.ndarray,
|
|
89
|
+
z: np.ndarray,
|
|
90
|
+
w: Optional[np.ndarray],
|
|
91
|
+
max_points: Optional[int],
|
|
92
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Optional[np.ndarray]]:
|
|
93
|
+
if max_points is None:
|
|
94
|
+
return x, y, z, w
|
|
95
|
+
max_points = int(max_points)
|
|
96
|
+
if max_points <= 0 or len(x) <= max_points:
|
|
97
|
+
return x, y, z, w
|
|
98
|
+
rng = np.random.default_rng(13)
|
|
99
|
+
idx = rng.choice(len(x), size=max_points, replace=False)
|
|
100
|
+
if w is None:
|
|
101
|
+
return x[idx], y[idx], z[idx], None
|
|
102
|
+
return x[idx], y[idx], z[idx], w[idx]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def plot_geo_heatmap(
|
|
106
|
+
df: pd.DataFrame,
|
|
107
|
+
*,
|
|
108
|
+
x_col: str,
|
|
109
|
+
y_col: str,
|
|
110
|
+
value_col: str,
|
|
111
|
+
weight_col: Optional[str] = None,
|
|
112
|
+
bins: int | Tuple[int, int] = 50,
|
|
113
|
+
agg: str = "mean",
|
|
114
|
+
cmap: str = "YlOrRd",
|
|
115
|
+
title: str = "Geo Heatmap",
|
|
116
|
+
ax: Optional[plt.Axes] = None,
|
|
117
|
+
show: bool = False,
|
|
118
|
+
save_path: Optional[str] = None,
|
|
119
|
+
style: Optional[PlotStyle] = None,
|
|
120
|
+
) -> plt.Figure:
|
|
121
|
+
style = style or PlotStyle()
|
|
122
|
+
if agg not in {"mean", "sum"}:
|
|
123
|
+
raise ValueError("agg must be 'mean' or 'sum'.")
|
|
124
|
+
x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
|
|
125
|
+
|
|
126
|
+
if isinstance(bins, int):
|
|
127
|
+
bins = (bins, bins)
|
|
128
|
+
|
|
129
|
+
if w is None:
|
|
130
|
+
sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
|
|
131
|
+
if agg == "sum":
|
|
132
|
+
grid = sum_z
|
|
133
|
+
else:
|
|
134
|
+
count, _, _ = np.histogram2d(x, y, bins=bins)
|
|
135
|
+
grid = sum_z / np.maximum(count, 1.0)
|
|
136
|
+
else:
|
|
137
|
+
sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
|
|
138
|
+
sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
|
|
139
|
+
grid = sum_zw / np.maximum(sum_w, EPS)
|
|
140
|
+
|
|
141
|
+
created_fig = ax is None
|
|
142
|
+
if created_fig:
|
|
143
|
+
fig, ax = plt.subplots(figsize=style.figsize)
|
|
144
|
+
else:
|
|
145
|
+
fig = ax.figure
|
|
146
|
+
|
|
147
|
+
im = ax.imshow(
|
|
148
|
+
grid.T,
|
|
149
|
+
origin="lower",
|
|
150
|
+
extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
|
|
151
|
+
aspect="auto",
|
|
152
|
+
cmap=cmap,
|
|
153
|
+
)
|
|
154
|
+
cbar = fig.colorbar(im, ax=ax)
|
|
155
|
+
cbar.set_label(value_col, fontsize=style.label_size)
|
|
156
|
+
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
157
|
+
|
|
158
|
+
ax.set_xlabel(x_col, fontsize=style.label_size)
|
|
159
|
+
ax.set_ylabel(y_col, fontsize=style.label_size)
|
|
160
|
+
ax.set_title(title, fontsize=style.title_size)
|
|
161
|
+
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
162
|
+
|
|
163
|
+
if created_fig:
|
|
164
|
+
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
165
|
+
|
|
166
|
+
return fig
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def plot_geo_contour(
|
|
170
|
+
df: pd.DataFrame,
|
|
171
|
+
*,
|
|
172
|
+
x_col: str,
|
|
173
|
+
y_col: str,
|
|
174
|
+
value_col: str,
|
|
175
|
+
weight_col: Optional[str] = None,
|
|
176
|
+
max_points: Optional[int] = None,
|
|
177
|
+
levels: int | Sequence[float] = 10,
|
|
178
|
+
cmap: str = "viridis",
|
|
179
|
+
title: str = "Geo Contour",
|
|
180
|
+
ax: Optional[plt.Axes] = None,
|
|
181
|
+
show_points: bool = False,
|
|
182
|
+
show: bool = False,
|
|
183
|
+
save_path: Optional[str] = None,
|
|
184
|
+
style: Optional[PlotStyle] = None,
|
|
185
|
+
) -> plt.Figure:
|
|
186
|
+
style = style or PlotStyle()
|
|
187
|
+
x, y, z, w = _sanitize_geo(df, x_col, y_col, value_col, weight_col)
|
|
188
|
+
x, y, z, w = _downsample_points(x, y, z, w, max_points)
|
|
189
|
+
|
|
190
|
+
if w is not None:
|
|
191
|
+
z = z * w
|
|
192
|
+
|
|
193
|
+
triang = mtri.Triangulation(x, y)
|
|
194
|
+
|
|
195
|
+
created_fig = ax is None
|
|
196
|
+
if created_fig:
|
|
197
|
+
fig, ax = plt.subplots(figsize=style.figsize)
|
|
198
|
+
else:
|
|
199
|
+
fig = ax.figure
|
|
200
|
+
|
|
201
|
+
contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap)
|
|
202
|
+
if show_points:
|
|
203
|
+
ax.scatter(x, y, s=6, c="k", alpha=0.2)
|
|
204
|
+
cbar = fig.colorbar(contour, ax=ax)
|
|
205
|
+
cbar.set_label(value_col, fontsize=style.label_size)
|
|
206
|
+
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
207
|
+
|
|
208
|
+
ax.set_xlabel(x_col, fontsize=style.label_size)
|
|
209
|
+
ax.set_ylabel(y_col, fontsize=style.label_size)
|
|
210
|
+
ax.set_title(title, fontsize=style.title_size)
|
|
211
|
+
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
212
|
+
|
|
213
|
+
if created_fig:
|
|
214
|
+
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
215
|
+
|
|
216
|
+
return fig
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def plot_geo_heatmap_on_map(
|
|
220
|
+
df: pd.DataFrame,
|
|
221
|
+
*,
|
|
222
|
+
lon_col: str,
|
|
223
|
+
lat_col: str,
|
|
224
|
+
value_col: str,
|
|
225
|
+
weight_col: Optional[str] = None,
|
|
226
|
+
bins: int | Tuple[int, int] = 100,
|
|
227
|
+
agg: str = "mean",
|
|
228
|
+
cmap: str = "YlOrRd",
|
|
229
|
+
alpha: float = 0.6,
|
|
230
|
+
basemap: Optional[object] = "CartoDB.Positron",
|
|
231
|
+
zoom: Optional[int] = None,
|
|
232
|
+
padding: float = 0.05,
|
|
233
|
+
title: str = "Geo Heatmap (Map)",
|
|
234
|
+
ax: Optional[plt.Axes] = None,
|
|
235
|
+
show_points: bool = False,
|
|
236
|
+
show: bool = False,
|
|
237
|
+
save_path: Optional[str] = None,
|
|
238
|
+
style: Optional[PlotStyle] = None,
|
|
239
|
+
) -> plt.Figure:
|
|
240
|
+
_require_contextily("plot_geo_heatmap_on_map")
|
|
241
|
+
style = style or PlotStyle()
|
|
242
|
+
if agg not in {"mean", "sum"}:
|
|
243
|
+
raise ValueError("agg must be 'mean' or 'sum'.")
|
|
244
|
+
lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
|
|
245
|
+
x, y = _lonlat_to_mercator(lon, lat)
|
|
246
|
+
|
|
247
|
+
if isinstance(bins, int):
|
|
248
|
+
bins = (bins, bins)
|
|
249
|
+
|
|
250
|
+
if w is None:
|
|
251
|
+
sum_z, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=z)
|
|
252
|
+
if agg == "sum":
|
|
253
|
+
grid = sum_z
|
|
254
|
+
else:
|
|
255
|
+
count, _, _ = np.histogram2d(x, y, bins=bins)
|
|
256
|
+
grid = sum_z / np.maximum(count, 1.0)
|
|
257
|
+
else:
|
|
258
|
+
sum_w, x_edges, y_edges = np.histogram2d(x, y, bins=bins, weights=w)
|
|
259
|
+
sum_zw, _, _ = np.histogram2d(x, y, bins=bins, weights=z * w)
|
|
260
|
+
grid = sum_zw / np.maximum(sum_w, EPS)
|
|
261
|
+
|
|
262
|
+
created_fig = ax is None
|
|
263
|
+
if created_fig:
|
|
264
|
+
fig, ax = plt.subplots(figsize=style.figsize)
|
|
265
|
+
else:
|
|
266
|
+
fig = ax.figure
|
|
267
|
+
|
|
268
|
+
_apply_bounds(ax, x, y, padding)
|
|
269
|
+
ax.set_aspect("equal", adjustable="box")
|
|
270
|
+
|
|
271
|
+
source = _resolve_basemap(basemap)
|
|
272
|
+
if source is not None:
|
|
273
|
+
if zoom is None:
|
|
274
|
+
cx.add_basemap(ax, source=source, crs="EPSG:3857")
|
|
275
|
+
else:
|
|
276
|
+
cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
|
|
277
|
+
|
|
278
|
+
im = ax.imshow(
|
|
279
|
+
grid.T,
|
|
280
|
+
origin="lower",
|
|
281
|
+
extent=[x_edges[0], x_edges[-1], y_edges[0], y_edges[-1]],
|
|
282
|
+
aspect="auto",
|
|
283
|
+
cmap=cmap,
|
|
284
|
+
alpha=alpha,
|
|
285
|
+
)
|
|
286
|
+
if show_points:
|
|
287
|
+
ax.scatter(x, y, s=6, c="k", alpha=0.25)
|
|
288
|
+
|
|
289
|
+
cbar = fig.colorbar(im, ax=ax)
|
|
290
|
+
cbar.set_label(value_col, fontsize=style.label_size)
|
|
291
|
+
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
292
|
+
|
|
293
|
+
ax.set_title(title, fontsize=style.title_size)
|
|
294
|
+
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
295
|
+
|
|
296
|
+
if created_fig:
|
|
297
|
+
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
298
|
+
|
|
299
|
+
return fig
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def plot_geo_contour_on_map(
|
|
303
|
+
df: pd.DataFrame,
|
|
304
|
+
*,
|
|
305
|
+
lon_col: str,
|
|
306
|
+
lat_col: str,
|
|
307
|
+
value_col: str,
|
|
308
|
+
weight_col: Optional[str] = None,
|
|
309
|
+
max_points: Optional[int] = None,
|
|
310
|
+
levels: int | Sequence[float] = 10,
|
|
311
|
+
cmap: str = "viridis",
|
|
312
|
+
alpha: float = 0.6,
|
|
313
|
+
basemap: Optional[object] = "CartoDB.Positron",
|
|
314
|
+
zoom: Optional[int] = None,
|
|
315
|
+
padding: float = 0.05,
|
|
316
|
+
title: str = "Geo Contour (Map)",
|
|
317
|
+
ax: Optional[plt.Axes] = None,
|
|
318
|
+
show_points: bool = False,
|
|
319
|
+
show: bool = False,
|
|
320
|
+
save_path: Optional[str] = None,
|
|
321
|
+
style: Optional[PlotStyle] = None,
|
|
322
|
+
) -> plt.Figure:
|
|
323
|
+
_require_contextily("plot_geo_contour_on_map")
|
|
324
|
+
style = style or PlotStyle()
|
|
325
|
+
lon, lat, z, w = _sanitize_geo(df, lon_col, lat_col, value_col, weight_col)
|
|
326
|
+
lon, lat, z, w = _downsample_points(lon, lat, z, w, max_points)
|
|
327
|
+
x, y = _lonlat_to_mercator(lon, lat)
|
|
328
|
+
if w is not None:
|
|
329
|
+
z = z * w
|
|
330
|
+
|
|
331
|
+
created_fig = ax is None
|
|
332
|
+
if created_fig:
|
|
333
|
+
fig, ax = plt.subplots(figsize=style.figsize)
|
|
334
|
+
else:
|
|
335
|
+
fig = ax.figure
|
|
336
|
+
|
|
337
|
+
_apply_bounds(ax, x, y, padding)
|
|
338
|
+
ax.set_aspect("equal", adjustable="box")
|
|
339
|
+
|
|
340
|
+
source = _resolve_basemap(basemap)
|
|
341
|
+
if source is not None:
|
|
342
|
+
if zoom is None:
|
|
343
|
+
cx.add_basemap(ax, source=source, crs="EPSG:3857")
|
|
344
|
+
else:
|
|
345
|
+
cx.add_basemap(ax, source=source, crs="EPSG:3857", zoom=zoom)
|
|
346
|
+
|
|
347
|
+
triang = mtri.Triangulation(x, y)
|
|
348
|
+
contour = ax.tricontourf(triang, z, levels=levels, cmap=cmap, alpha=alpha)
|
|
349
|
+
if show_points:
|
|
350
|
+
ax.scatter(x, y, s=6, c="k", alpha=0.25)
|
|
351
|
+
|
|
352
|
+
cbar = fig.colorbar(contour, ax=ax)
|
|
353
|
+
cbar.set_label(value_col, fontsize=style.label_size)
|
|
354
|
+
cbar.ax.tick_params(labelsize=style.tick_size)
|
|
355
|
+
|
|
356
|
+
ax.set_title(title, fontsize=style.title_size)
|
|
357
|
+
ax.tick_params(axis="both", labelsize=style.tick_size)
|
|
358
|
+
|
|
359
|
+
if created_fig:
|
|
360
|
+
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
361
|
+
|
|
362
|
+
return fig
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Mapping, Optional, Sequence, Tuple
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from .common import PlotStyle, finalize_figure, plt
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _to_series(
|
|
12
|
+
importance: Mapping[str, float]
|
|
13
|
+
| Sequence[Tuple[str, float]]
|
|
14
|
+
| pd.Series
|
|
15
|
+
| np.ndarray,
|
|
16
|
+
feature_names: Optional[Sequence[str]] = None,
|
|
17
|
+
) -> pd.Series:
|
|
18
|
+
if isinstance(importance, pd.Series):
|
|
19
|
+
return importance.copy()
|
|
20
|
+
if isinstance(importance, Mapping):
|
|
21
|
+
return pd.Series(dict(importance))
|
|
22
|
+
if isinstance(importance, np.ndarray):
|
|
23
|
+
if feature_names is None:
|
|
24
|
+
raise ValueError("feature_names is required when importance is an array.")
|
|
25
|
+
return pd.Series(importance, index=list(feature_names))
|
|
26
|
+
return pd.Series(dict(importance))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def shap_importance(
|
|
30
|
+
shap_values: np.ndarray,
|
|
31
|
+
feature_names: Sequence[str],
|
|
32
|
+
) -> pd.Series:
|
|
33
|
+
if shap_values.ndim == 3:
|
|
34
|
+
shap_values = shap_values[0]
|
|
35
|
+
if shap_values.ndim != 2:
|
|
36
|
+
raise ValueError("shap_values should be 2d (n_samples, n_features).")
|
|
37
|
+
scores = np.abs(shap_values).mean(axis=0)
|
|
38
|
+
return pd.Series(scores, index=list(feature_names))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def plot_feature_importance(
|
|
42
|
+
importance: Mapping[str, float]
|
|
43
|
+
| Sequence[Tuple[str, float]]
|
|
44
|
+
| pd.Series
|
|
45
|
+
| np.ndarray,
|
|
46
|
+
*,
|
|
47
|
+
feature_names: Optional[Sequence[str]] = None,
|
|
48
|
+
top_n: int = 30,
|
|
49
|
+
title: str = "Feature Importance",
|
|
50
|
+
sort_by: str = "abs",
|
|
51
|
+
descending: bool = True,
|
|
52
|
+
show_values: bool = False,
|
|
53
|
+
ax: Optional[plt.Axes] = None,
|
|
54
|
+
show: bool = False,
|
|
55
|
+
save_path: Optional[str] = None,
|
|
56
|
+
style: Optional[PlotStyle] = None,
|
|
57
|
+
) -> plt.Figure:
|
|
58
|
+
style = style or PlotStyle()
|
|
59
|
+
series = _to_series(importance, feature_names=feature_names)
|
|
60
|
+
series = series.replace([np.inf, -np.inf], np.nan).dropna()
|
|
61
|
+
|
|
62
|
+
if sort_by not in {"abs", "value"}:
|
|
63
|
+
raise ValueError("sort_by must be 'abs' or 'value'.")
|
|
64
|
+
sort_key = series.abs() if sort_by == "abs" else series
|
|
65
|
+
series = series.loc[sort_key.sort_values(ascending=not descending).index]
|
|
66
|
+
|
|
67
|
+
if top_n > 0:
|
|
68
|
+
series = series.head(int(top_n))
|
|
69
|
+
|
|
70
|
+
created_fig = ax is None
|
|
71
|
+
if created_fig:
|
|
72
|
+
height = max(3.0, 0.3 * len(series))
|
|
73
|
+
fig, ax = plt.subplots(figsize=(style.figsize[0], height))
|
|
74
|
+
else:
|
|
75
|
+
fig = ax.figure
|
|
76
|
+
|
|
77
|
+
y_pos = np.arange(len(series))
|
|
78
|
+
ax.barh(y_pos, series.values, color=style.palette[0])
|
|
79
|
+
ax.set_yticks(y_pos)
|
|
80
|
+
ax.set_yticklabels(series.index, fontsize=style.tick_size)
|
|
81
|
+
ax.invert_yaxis()
|
|
82
|
+
ax.set_title(title, fontsize=style.title_size)
|
|
83
|
+
ax.tick_params(axis="x", labelsize=style.tick_size)
|
|
84
|
+
if style.grid:
|
|
85
|
+
ax.grid(True, axis="x", linestyle=style.grid_style, alpha=style.grid_alpha)
|
|
86
|
+
|
|
87
|
+
if show_values:
|
|
88
|
+
for idx, val in enumerate(series.values):
|
|
89
|
+
ax.text(val, idx, f" {val:.3f}", va="center", fontsize=style.tick_size)
|
|
90
|
+
|
|
91
|
+
if created_fig:
|
|
92
|
+
finalize_figure(fig, save_path=save_path, show=show, style=style)
|
|
93
|
+
|
|
94
|
+
return fig
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def plot_shap_importance(
|
|
98
|
+
shap_values: np.ndarray,
|
|
99
|
+
feature_names: Sequence[str],
|
|
100
|
+
*,
|
|
101
|
+
top_n: int = 30,
|
|
102
|
+
title: str = "SHAP Importance",
|
|
103
|
+
show_values: bool = False,
|
|
104
|
+
ax: Optional[plt.Axes] = None,
|
|
105
|
+
show: bool = False,
|
|
106
|
+
save_path: Optional[str] = None,
|
|
107
|
+
style: Optional[PlotStyle] = None,
|
|
108
|
+
) -> plt.Figure:
|
|
109
|
+
series = shap_importance(shap_values, feature_names)
|
|
110
|
+
return plot_feature_importance(
|
|
111
|
+
series,
|
|
112
|
+
top_n=top_n,
|
|
113
|
+
title=title,
|
|
114
|
+
sort_by="abs",
|
|
115
|
+
descending=True,
|
|
116
|
+
show_values=show_values,
|
|
117
|
+
ax=ax,
|
|
118
|
+
show=show,
|
|
119
|
+
save_path=save_path,
|
|
120
|
+
style=style,
|
|
121
|
+
)
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional, TextIO
|
|
10
|
+
|
|
11
|
+
_LOGGING_CONFIGURED = False
|
|
12
|
+
_LOG_PATH: Optional[Path] = None
|
|
13
|
+
_LOG_FILE: Optional[TextIO] = None
|
|
14
|
+
|
|
15
|
+
_TRUTHY = {"1", "true", "yes", "y", "on"}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class _TeeStream:
|
|
19
|
+
def __init__(self, primary: TextIO, secondary: TextIO) -> None:
|
|
20
|
+
self._primary = primary
|
|
21
|
+
self._secondary = secondary
|
|
22
|
+
|
|
23
|
+
def write(self, data: str) -> int:
|
|
24
|
+
if not data:
|
|
25
|
+
return 0
|
|
26
|
+
try:
|
|
27
|
+
self._primary.write(data)
|
|
28
|
+
except Exception:
|
|
29
|
+
pass
|
|
30
|
+
try:
|
|
31
|
+
self._secondary.write(data)
|
|
32
|
+
except Exception:
|
|
33
|
+
pass
|
|
34
|
+
return len(data)
|
|
35
|
+
|
|
36
|
+
def flush(self) -> None:
|
|
37
|
+
for stream in (self._primary, self._secondary):
|
|
38
|
+
try:
|
|
39
|
+
stream.flush()
|
|
40
|
+
except Exception:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
def isatty(self) -> bool:
|
|
44
|
+
return bool(getattr(self._primary, "isatty", lambda: False)())
|
|
45
|
+
|
|
46
|
+
def fileno(self) -> int:
|
|
47
|
+
return self._primary.fileno()
|
|
48
|
+
|
|
49
|
+
def __getattr__(self, name: str):
|
|
50
|
+
return getattr(self._primary, name)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _is_truthy(value: Optional[str]) -> bool:
|
|
54
|
+
return str(value).strip().lower() in _TRUTHY
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _resolve_log_dir(log_dir: Optional[str | Path]) -> Optional[Path]:
|
|
58
|
+
candidates: list[Path] = []
|
|
59
|
+
if log_dir:
|
|
60
|
+
candidates.append(Path(log_dir).expanduser())
|
|
61
|
+
env_dir = os.environ.get("INS_PRICING_LOG_DIR")
|
|
62
|
+
if env_dir:
|
|
63
|
+
candidates.append(Path(env_dir).expanduser())
|
|
64
|
+
candidates.append(Path.cwd() / "logs")
|
|
65
|
+
candidates.append(Path.home() / ".ins_pricing" / "logs")
|
|
66
|
+
candidates.append(Path(tempfile.gettempdir()) / "ins_pricing_logs")
|
|
67
|
+
|
|
68
|
+
for cand in candidates:
|
|
69
|
+
try:
|
|
70
|
+
cand.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
return cand
|
|
72
|
+
except Exception:
|
|
73
|
+
continue
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _build_log_filename(prefix: str) -> str:
|
|
78
|
+
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
79
|
+
rank = (
|
|
80
|
+
os.environ.get("RANK")
|
|
81
|
+
or os.environ.get("LOCAL_RANK")
|
|
82
|
+
or os.environ.get("SLURM_PROCID")
|
|
83
|
+
)
|
|
84
|
+
suffix = f"r{rank}" if rank is not None else f"pid{os.getpid()}"
|
|
85
|
+
safe_prefix = "".join(
|
|
86
|
+
ch if ch.isalnum() or ch in "-_." else "_" for ch in prefix)
|
|
87
|
+
return f"{safe_prefix}_{ts}_{suffix}.log"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def configure_run_logging(
|
|
91
|
+
*,
|
|
92
|
+
prefix: str = "ins_pricing",
|
|
93
|
+
log_dir: Optional[str | Path] = None,
|
|
94
|
+
level: int = logging.INFO,
|
|
95
|
+
announce: bool = True,
|
|
96
|
+
) -> Optional[Path]:
|
|
97
|
+
global _LOGGING_CONFIGURED, _LOG_PATH, _LOG_FILE
|
|
98
|
+
|
|
99
|
+
if _LOGGING_CONFIGURED:
|
|
100
|
+
return _LOG_PATH
|
|
101
|
+
if _is_truthy(os.environ.get("INS_PRICING_LOG_DISABLE")):
|
|
102
|
+
return None
|
|
103
|
+
|
|
104
|
+
resolved_dir = _resolve_log_dir(log_dir)
|
|
105
|
+
if resolved_dir is None:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
log_path = resolved_dir / _build_log_filename(prefix)
|
|
109
|
+
try:
|
|
110
|
+
log_file = log_path.open("a", encoding="utf-8")
|
|
111
|
+
except Exception:
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
sys.stdout = _TeeStream(sys.stdout, log_file) # type: ignore[assignment]
|
|
115
|
+
sys.stderr = _TeeStream(sys.stderr, log_file) # type: ignore[assignment]
|
|
116
|
+
_LOG_FILE = log_file
|
|
117
|
+
_LOG_PATH = log_path
|
|
118
|
+
_LOGGING_CONFIGURED = True
|
|
119
|
+
|
|
120
|
+
root = logging.getLogger()
|
|
121
|
+
if not root.handlers:
|
|
122
|
+
logging.basicConfig(
|
|
123
|
+
level=level,
|
|
124
|
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
125
|
+
handlers=[logging.StreamHandler(sys.stdout)],
|
|
126
|
+
)
|
|
127
|
+
else:
|
|
128
|
+
root.setLevel(level)
|
|
129
|
+
|
|
130
|
+
if announce:
|
|
131
|
+
print(f"[ins_pricing] log saved to {log_path}", flush=True)
|
|
132
|
+
|
|
133
|
+
return log_path
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import types
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
pytest.importorskip("torch")
|
|
8
|
+
pytest.importorskip("optuna")
|
|
9
|
+
pytest.importorskip("statsmodels")
|
|
10
|
+
pytest.importorskip("xgboost")
|
|
11
|
+
|
|
12
|
+
from ins_pricing.bayesopt.trainers import TrainerBase
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DummyTrainer(TrainerBase):
|
|
16
|
+
def __init__(self):
|
|
17
|
+
ctx = types.SimpleNamespace(prop_test=0.2, rand_seed=123)
|
|
18
|
+
super().__init__(ctx, "Dummy", "Dummy")
|
|
19
|
+
|
|
20
|
+
def train(self) -> None: # pragma: no cover - not used
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_cross_val_generic_iterates_all_splits():
|
|
25
|
+
trainer = DummyTrainer()
|
|
26
|
+
|
|
27
|
+
X = pd.DataFrame({"x": np.arange(12, dtype=float)})
|
|
28
|
+
y = pd.Series(np.arange(12, dtype=float))
|
|
29
|
+
w = pd.Series(np.ones(12, dtype=float))
|
|
30
|
+
|
|
31
|
+
def data_provider():
|
|
32
|
+
return X, y, w
|
|
33
|
+
|
|
34
|
+
class DummyModel:
|
|
35
|
+
def fit(self, X_train, y_train, sample_weight=None):
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
def predict(self, X_val):
|
|
39
|
+
return np.zeros(len(X_val))
|
|
40
|
+
|
|
41
|
+
def model_builder(_params):
|
|
42
|
+
return DummyModel()
|
|
43
|
+
|
|
44
|
+
calls = []
|
|
45
|
+
|
|
46
|
+
def metric_fn(y_true, y_pred, weight):
|
|
47
|
+
calls.append(len(y_true))
|
|
48
|
+
return float(np.mean(y_pred))
|
|
49
|
+
|
|
50
|
+
splits = [
|
|
51
|
+
(np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([10, 11])),
|
|
52
|
+
(np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), np.array([0, 1])),
|
|
53
|
+
(np.array([0, 2, 4, 6, 8, 10]), np.array([1, 3, 5, 7, 9, 11])),
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
result = trainer.cross_val_generic(
|
|
57
|
+
trial=object(),
|
|
58
|
+
hyperparameter_space={"p": lambda _t: 1.0},
|
|
59
|
+
data_provider=data_provider,
|
|
60
|
+
model_builder=model_builder,
|
|
61
|
+
metric_fn=metric_fn,
|
|
62
|
+
splitter=splits,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
assert result == 0.0
|
|
66
|
+
assert len(calls) == len(splits)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
pytest.importorskip("torch")
|
|
4
|
+
|
|
5
|
+
from ins_pricing.bayesopt.utils import DistributedUtils
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_setup_ddp_without_env(monkeypatch):
|
|
9
|
+
monkeypatch.delenv("RANK", raising=False)
|
|
10
|
+
monkeypatch.delenv("WORLD_SIZE", raising=False)
|
|
11
|
+
monkeypatch.delenv("LOCAL_RANK", raising=False)
|
|
12
|
+
|
|
13
|
+
ok, local_rank, rank, world_size = DistributedUtils.setup_ddp()
|
|
14
|
+
|
|
15
|
+
assert ok is False
|
|
16
|
+
assert local_rank == 0
|
|
17
|
+
assert rank == 0
|
|
18
|
+
assert world_size == 1
|