ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/README.md +48 -22
- ins_pricing/__init__.py +142 -90
- ins_pricing/cli/BayesOpt_entry.py +58 -46
- ins_pricing/cli/BayesOpt_incremental.py +77 -110
- ins_pricing/cli/Explain_Run.py +42 -23
- ins_pricing/cli/Explain_entry.py +551 -577
- ins_pricing/cli/Pricing_Run.py +42 -23
- ins_pricing/cli/bayesopt_entry_runner.py +51 -16
- ins_pricing/cli/utils/bootstrap.py +23 -0
- ins_pricing/cli/utils/cli_common.py +256 -256
- ins_pricing/cli/utils/cli_config.py +379 -360
- ins_pricing/cli/utils/import_resolver.py +375 -358
- ins_pricing/cli/utils/notebook_utils.py +256 -242
- ins_pricing/cli/watchdog_run.py +216 -198
- ins_pricing/frontend/__init__.py +10 -10
- ins_pricing/frontend/app.py +132 -61
- ins_pricing/frontend/config_builder.py +33 -0
- ins_pricing/frontend/example_config.json +11 -0
- ins_pricing/frontend/example_workflows.py +1 -1
- ins_pricing/frontend/runner.py +340 -388
- ins_pricing/governance/__init__.py +20 -20
- ins_pricing/governance/release.py +159 -159
- ins_pricing/modelling/README.md +1 -1
- ins_pricing/modelling/__init__.py +147 -92
- ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
- ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
- ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
- ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
- ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
- ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
- ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
- ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
- ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
- ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
- ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
- ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
- ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
- ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
- ins_pricing/modelling/explain/__init__.py +55 -55
- ins_pricing/modelling/explain/metrics.py +27 -174
- ins_pricing/modelling/explain/permutation.py +237 -237
- ins_pricing/modelling/plotting/__init__.py +40 -36
- ins_pricing/modelling/plotting/compat.py +228 -0
- ins_pricing/modelling/plotting/curves.py +572 -572
- ins_pricing/modelling/plotting/diagnostics.py +163 -163
- ins_pricing/modelling/plotting/geo.py +362 -362
- ins_pricing/modelling/plotting/importance.py +121 -121
- ins_pricing/pricing/__init__.py +27 -27
- ins_pricing/pricing/factors.py +67 -56
- ins_pricing/production/__init__.py +35 -25
- ins_pricing/production/{predict.py → inference.py} +140 -57
- ins_pricing/production/monitoring.py +8 -21
- ins_pricing/reporting/__init__.py +11 -11
- ins_pricing/setup.py +1 -1
- ins_pricing/tests/production/test_inference.py +90 -0
- ins_pricing/utils/__init__.py +112 -78
- ins_pricing/utils/device.py +258 -237
- ins_pricing/utils/features.py +53 -0
- ins_pricing/utils/io.py +72 -0
- ins_pricing/utils/logging.py +34 -1
- ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
- ins_pricing/utils/metrics.py +158 -24
- ins_pricing/utils/numerics.py +76 -0
- ins_pricing/utils/paths.py +9 -1
- ins_pricing/utils/profiling.py +8 -4
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
- ins_pricing-0.5.1.dist-info/RECORD +132 -0
- ins_pricing/modelling/core/BayesOpt.py +0 -146
- ins_pricing/modelling/core/__init__.py +0 -1
- ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
- ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
- ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
- ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
- ins_pricing/modelling/core/bayesopt/utils.py +0 -105
- ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
- ins_pricing/tests/production/test_predict.py +0 -233
- ins_pricing-0.4.5.dist-info/RECORD +0 -130
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
- {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0
ins_pricing/utils/device.py
CHANGED
|
@@ -1,76 +1,91 @@
|
|
|
1
|
-
"""Device management utilities for PyTorch models.
|
|
2
|
-
|
|
1
|
+
"""Device management utilities for PyTorch models.
|
|
2
|
+
|
|
3
3
|
This module consolidates GPU/CPU device management logic from:
|
|
4
|
-
- modelling/
|
|
5
|
-
- modelling/
|
|
6
|
-
- production/
|
|
7
|
-
|
|
8
|
-
Example:
|
|
9
|
-
>>> from ins_pricing.utils import DeviceManager, GPUMemoryManager
|
|
10
|
-
>>> device = DeviceManager.get_best_device()
|
|
11
|
-
>>> DeviceManager.move_to_device(model, device)
|
|
12
|
-
>>> with GPUMemoryManager.cleanup_context():
|
|
13
|
-
... model.train()
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
from __future__ import annotations
|
|
17
|
-
|
|
18
|
-
import gc
|
|
19
|
-
import os
|
|
20
|
-
from contextlib import contextmanager
|
|
21
|
-
from typing import Any, Dict, Optional
|
|
22
|
-
|
|
23
|
-
try:
|
|
24
|
-
import torch
|
|
25
|
-
import torch.nn as nn
|
|
26
|
-
from torch.nn.parallel import DistributedDataParallel as DDP
|
|
27
|
-
|
|
28
|
-
TORCH_AVAILABLE = True
|
|
29
|
-
except ImportError:
|
|
30
|
-
TORCH_AVAILABLE = False
|
|
31
|
-
torch = None
|
|
32
|
-
nn = None
|
|
33
|
-
DDP = None
|
|
34
|
-
|
|
35
|
-
from .logging import get_logger
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# =============================================================================
|
|
39
|
-
# GPU Memory Manager
|
|
40
|
-
# =============================================================================
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
class GPUMemoryManager:
|
|
44
|
-
"""Context manager for GPU memory management and cleanup.
|
|
45
|
-
|
|
46
|
-
This class consolidates GPU memory cleanup logic that was previously
|
|
47
|
-
scattered across multiple trainer files.
|
|
48
|
-
|
|
49
|
-
Example:
|
|
50
|
-
>>> with GPUMemoryManager.cleanup_context():
|
|
51
|
-
... model.train()
|
|
52
|
-
... # Memory cleaned up after exiting context
|
|
53
|
-
|
|
54
|
-
>>> # Or use directly:
|
|
55
|
-
>>> GPUMemoryManager.clean()
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
_logger = get_logger("ins_pricing.gpu")
|
|
59
|
-
|
|
60
|
-
@classmethod
|
|
61
|
-
def clean(
|
|
4
|
+
- modelling/bayesopt/utils.py
|
|
5
|
+
- modelling/bayesopt/trainers/trainer_base.py
|
|
6
|
+
- production/inference.py
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
>>> from ins_pricing.utils import DeviceManager, GPUMemoryManager
|
|
10
|
+
>>> device = DeviceManager.get_best_device()
|
|
11
|
+
>>> DeviceManager.move_to_device(model, device)
|
|
12
|
+
>>> with GPUMemoryManager.cleanup_context():
|
|
13
|
+
... model.train()
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import gc
|
|
19
|
+
import os
|
|
20
|
+
from contextlib import contextmanager
|
|
21
|
+
from typing import Any, Dict, Optional
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import torch
|
|
25
|
+
import torch.nn as nn
|
|
26
|
+
from torch.nn.parallel import DistributedDataParallel as DDP
|
|
27
|
+
|
|
28
|
+
TORCH_AVAILABLE = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
TORCH_AVAILABLE = False
|
|
31
|
+
torch = None
|
|
32
|
+
nn = None
|
|
33
|
+
DDP = None
|
|
34
|
+
|
|
35
|
+
from ins_pricing.utils.logging import get_logger
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# =============================================================================
|
|
39
|
+
# GPU Memory Manager
|
|
40
|
+
# =============================================================================
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class GPUMemoryManager:
|
|
44
|
+
"""Context manager for GPU memory management and cleanup.
|
|
45
|
+
|
|
46
|
+
This class consolidates GPU memory cleanup logic that was previously
|
|
47
|
+
scattered across multiple trainer files.
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> with GPUMemoryManager.cleanup_context():
|
|
51
|
+
... model.train()
|
|
52
|
+
... # Memory cleaned up after exiting context
|
|
53
|
+
|
|
54
|
+
>>> # Or use directly:
|
|
55
|
+
>>> GPUMemoryManager.clean()
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
_logger = get_logger("ins_pricing.gpu")
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def clean(
|
|
62
|
+
cls,
|
|
63
|
+
verbose: bool = False,
|
|
64
|
+
*,
|
|
65
|
+
synchronize: bool = True,
|
|
66
|
+
empty_cache: bool = True,
|
|
67
|
+
) -> None:
|
|
62
68
|
"""Clean up GPU memory.
|
|
63
69
|
|
|
64
70
|
Args:
|
|
65
71
|
verbose: If True, log cleanup details
|
|
72
|
+
synchronize: If True, synchronize CUDA device after cleanup
|
|
73
|
+
empty_cache: If True, clear CUDA cache
|
|
66
74
|
"""
|
|
67
75
|
gc.collect()
|
|
68
76
|
|
|
69
77
|
if TORCH_AVAILABLE and torch.cuda.is_available():
|
|
70
|
-
|
|
71
|
-
|
|
78
|
+
if empty_cache:
|
|
79
|
+
torch.cuda.empty_cache()
|
|
80
|
+
if synchronize:
|
|
81
|
+
torch.cuda.synchronize()
|
|
72
82
|
if verbose:
|
|
73
|
-
|
|
83
|
+
if empty_cache and synchronize:
|
|
84
|
+
cls._logger.debug("CUDA cache cleared and synchronized")
|
|
85
|
+
elif empty_cache:
|
|
86
|
+
cls._logger.debug("CUDA cache cleared")
|
|
87
|
+
elif synchronize:
|
|
88
|
+
cls._logger.debug("CUDA synchronized")
|
|
74
89
|
|
|
75
90
|
# Optional: Force IPC collect for multi-process scenarios
|
|
76
91
|
if os.environ.get("BAYESOPT_CUDA_IPC_COLLECT", "0") == "1":
|
|
@@ -80,177 +95,183 @@ class GPUMemoryManager:
|
|
|
80
95
|
cls._logger.debug("CUDA IPC collect performed")
|
|
81
96
|
except Exception:
|
|
82
97
|
pass
|
|
83
|
-
|
|
84
|
-
@classmethod
|
|
85
|
-
@contextmanager
|
|
86
|
-
def cleanup_context(
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
model
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
allocated
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
return {
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
if
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
@contextmanager
|
|
101
|
+
def cleanup_context(
|
|
102
|
+
cls,
|
|
103
|
+
verbose: bool = False,
|
|
104
|
+
*,
|
|
105
|
+
synchronize: bool = True,
|
|
106
|
+
empty_cache: bool = True,
|
|
107
|
+
):
|
|
108
|
+
"""Context manager that cleans GPU memory on exit.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
verbose: If True, log cleanup details
|
|
112
|
+
|
|
113
|
+
Yields:
|
|
114
|
+
None
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
yield
|
|
118
|
+
finally:
|
|
119
|
+
cls.clean(verbose=verbose, synchronize=synchronize, empty_cache=empty_cache)
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def move_model_to_cpu(cls, model: Any) -> Any:
|
|
123
|
+
"""Move a model to CPU and clean GPU memory.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
model: PyTorch model to move
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Model on CPU
|
|
130
|
+
"""
|
|
131
|
+
if model is not None and hasattr(model, "to"):
|
|
132
|
+
model.to("cpu")
|
|
133
|
+
cls.clean()
|
|
134
|
+
return model
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def get_memory_info(cls) -> Dict[str, Any]:
|
|
138
|
+
"""Get current GPU memory usage information.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Dictionary with memory info (allocated, reserved, free)
|
|
142
|
+
"""
|
|
143
|
+
if not TORCH_AVAILABLE or not torch.cuda.is_available():
|
|
144
|
+
return {"available": False}
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
allocated = torch.cuda.memory_allocated()
|
|
148
|
+
reserved = torch.cuda.memory_reserved()
|
|
149
|
+
free, total = torch.cuda.mem_get_info()
|
|
150
|
+
return {
|
|
151
|
+
"available": True,
|
|
152
|
+
"allocated_mb": allocated // (1024 * 1024),
|
|
153
|
+
"reserved_mb": reserved // (1024 * 1024),
|
|
154
|
+
"free_mb": free // (1024 * 1024),
|
|
155
|
+
"total_mb": total // (1024 * 1024),
|
|
156
|
+
}
|
|
157
|
+
except Exception:
|
|
158
|
+
return {"available": False}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# =============================================================================
|
|
162
|
+
# Device Manager
|
|
163
|
+
# =============================================================================
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class DeviceManager:
|
|
167
|
+
"""Unified device management for model and tensor placement.
|
|
168
|
+
|
|
169
|
+
This class consolidates device detection and model movement logic
|
|
170
|
+
that was previously duplicated across trainer_base.py and predict.py.
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
>>> device = DeviceManager.get_best_device()
|
|
174
|
+
>>> model = DeviceManager.move_to_device(model)
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
_logger = get_logger("ins_pricing.device")
|
|
178
|
+
_cached_device: Optional[Any] = None # torch.device when available
|
|
179
|
+
|
|
180
|
+
@classmethod
|
|
181
|
+
def get_best_device(cls, prefer_cuda: bool = True) -> Any:
|
|
182
|
+
"""Get the best available device.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
prefer_cuda: If True, prefer CUDA over MPS
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Best available torch.device
|
|
189
|
+
"""
|
|
190
|
+
if not TORCH_AVAILABLE:
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
if cls._cached_device is not None:
|
|
194
|
+
return cls._cached_device
|
|
195
|
+
|
|
196
|
+
if prefer_cuda and torch.cuda.is_available():
|
|
197
|
+
cls._cached_device = torch.device("cuda")
|
|
198
|
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
199
|
+
cls._cached_device = torch.device("mps")
|
|
200
|
+
else:
|
|
201
|
+
cls._cached_device = torch.device("cpu")
|
|
202
|
+
|
|
203
|
+
cls._logger.debug(f"Selected device: {cls._cached_device}")
|
|
204
|
+
return cls._cached_device
|
|
205
|
+
|
|
206
|
+
@classmethod
|
|
207
|
+
def move_to_device(cls, model_obj: Any, device: Optional[Any] = None) -> None:
|
|
208
|
+
"""Move a model object to the specified device.
|
|
209
|
+
|
|
210
|
+
Handles sklearn-style wrappers that have .ft, .resnet, or .gnn attributes.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
model_obj: Model object to move (may be sklearn wrapper)
|
|
214
|
+
device: Target device (defaults to best available)
|
|
215
|
+
"""
|
|
216
|
+
if model_obj is None:
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
device = device or cls.get_best_device()
|
|
220
|
+
if device is None:
|
|
221
|
+
return
|
|
222
|
+
|
|
223
|
+
# Update device attribute if present
|
|
224
|
+
if hasattr(model_obj, "device"):
|
|
225
|
+
model_obj.device = device
|
|
226
|
+
|
|
227
|
+
# Move the main model
|
|
228
|
+
if hasattr(model_obj, "to"):
|
|
229
|
+
model_obj.to(device)
|
|
230
|
+
|
|
231
|
+
# Move nested submodules (sklearn wrappers)
|
|
232
|
+
for attr_name in ("ft", "resnet", "gnn"):
|
|
233
|
+
submodule = getattr(model_obj, attr_name, None)
|
|
234
|
+
if submodule is not None and hasattr(submodule, "to"):
|
|
235
|
+
submodule.to(device)
|
|
236
|
+
|
|
237
|
+
@classmethod
|
|
238
|
+
def unwrap_module(cls, module: Any) -> Any:
|
|
239
|
+
"""Unwrap DDP or DataParallel wrapper to get the base module.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
module: Potentially wrapped PyTorch module
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Unwrapped base module
|
|
246
|
+
"""
|
|
247
|
+
if not TORCH_AVAILABLE:
|
|
248
|
+
return module
|
|
249
|
+
|
|
250
|
+
if isinstance(module, (DDP, nn.DataParallel)):
|
|
251
|
+
return module.module
|
|
252
|
+
return module
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def reset_cache(cls) -> None:
|
|
256
|
+
"""Reset cached device selection."""
|
|
257
|
+
cls._cached_device = None
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def is_cuda_available(cls) -> bool:
|
|
261
|
+
"""Check if CUDA is available.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
True if CUDA is available
|
|
265
|
+
"""
|
|
266
|
+
return TORCH_AVAILABLE and torch.cuda.is_available()
|
|
267
|
+
|
|
268
|
+
@classmethod
|
|
269
|
+
def is_mps_available(cls) -> bool:
|
|
270
|
+
"""Check if MPS (Apple Silicon) is available.
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
True if MPS is available
|
|
274
|
+
"""
|
|
275
|
+
if not TORCH_AVAILABLE:
|
|
276
|
+
return False
|
|
277
|
+
return hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Feature inference helpers shared across training and production."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import List, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def infer_factor_and_cate_list(
|
|
11
|
+
train_df: pd.DataFrame,
|
|
12
|
+
test_df: pd.DataFrame,
|
|
13
|
+
resp_nme: str,
|
|
14
|
+
weight_nme: str,
|
|
15
|
+
*,
|
|
16
|
+
binary_resp_nme: Optional[str] = None,
|
|
17
|
+
factor_nmes: Optional[List[str]] = None,
|
|
18
|
+
cate_list: Optional[List[str]] = None,
|
|
19
|
+
infer_categorical_max_unique: int = 50,
|
|
20
|
+
infer_categorical_max_ratio: float = 0.05,
|
|
21
|
+
) -> Tuple[List[str], List[str]]:
|
|
22
|
+
"""Infer factor_nmes/cate_list when feature names are not provided."""
|
|
23
|
+
excluded = {resp_nme, weight_nme}
|
|
24
|
+
if binary_resp_nme:
|
|
25
|
+
excluded.add(binary_resp_nme)
|
|
26
|
+
|
|
27
|
+
common_cols = [c for c in train_df.columns if c in test_df.columns]
|
|
28
|
+
if factor_nmes is None:
|
|
29
|
+
factors = [c for c in common_cols if c not in excluded]
|
|
30
|
+
else:
|
|
31
|
+
factors = [c for c in factor_nmes if c in common_cols and c not in excluded]
|
|
32
|
+
|
|
33
|
+
if cate_list is not None:
|
|
34
|
+
cats = [c for c in cate_list if c in factors]
|
|
35
|
+
return factors, cats
|
|
36
|
+
|
|
37
|
+
n_rows = max(1, len(train_df))
|
|
38
|
+
cats: List[str] = []
|
|
39
|
+
for col in factors:
|
|
40
|
+
s = train_df[col]
|
|
41
|
+
if (
|
|
42
|
+
pd.api.types.is_bool_dtype(s)
|
|
43
|
+
or pd.api.types.is_object_dtype(s)
|
|
44
|
+
or isinstance(s.dtype, pd.CategoricalDtype)
|
|
45
|
+
):
|
|
46
|
+
cats.append(col)
|
|
47
|
+
continue
|
|
48
|
+
if pd.api.types.is_integer_dtype(s):
|
|
49
|
+
nunique = int(s.nunique(dropna=True))
|
|
50
|
+
if nunique <= infer_categorical_max_unique or (nunique / n_rows) <= infer_categorical_max_ratio:
|
|
51
|
+
cats.append(col)
|
|
52
|
+
|
|
53
|
+
return factors, cats
|
ins_pricing/utils/io.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""File and path helpers shared across ins_pricing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, List
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def ensure_parent_dir(file_path: str) -> None:
|
|
14
|
+
"""Create parent directories when missing."""
|
|
15
|
+
directory = Path(file_path).parent
|
|
16
|
+
if directory and not directory.exists():
|
|
17
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class IOUtils:
|
|
21
|
+
"""File and path utilities for model parameters and configs."""
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
|
|
25
|
+
"""Load CSV file as list of dictionaries."""
|
|
26
|
+
with open(file_path, mode="r", encoding="utf-8") as file:
|
|
27
|
+
reader = csv.DictReader(file)
|
|
28
|
+
return [dict(filter(lambda item: item[0] != "", row.items())) for row in reader]
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def ensure_parent_dir(file_path: str) -> None:
|
|
32
|
+
"""Create parent directories when missing."""
|
|
33
|
+
ensure_parent_dir(file_path)
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def _sanitize_params_dict(params: Dict[str, Any]) -> Dict[str, Any]:
|
|
37
|
+
"""Filter index-like columns such as "Unnamed: 0" from pandas I/O."""
|
|
38
|
+
return {
|
|
39
|
+
k: v
|
|
40
|
+
for k, v in (params or {}).items()
|
|
41
|
+
if k and not str(k).startswith("Unnamed")
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def load_params_file(path: str) -> Dict[str, Any]:
|
|
46
|
+
"""Load parameter dict from JSON/CSV/TSV files."""
|
|
47
|
+
file_path = Path(path).expanduser().resolve()
|
|
48
|
+
if not file_path.exists():
|
|
49
|
+
raise FileNotFoundError(f"params file not found: {file_path}")
|
|
50
|
+
|
|
51
|
+
suffix = file_path.suffix.lower()
|
|
52
|
+
if suffix == ".json":
|
|
53
|
+
payload = json.loads(file_path.read_text(encoding="utf-8", errors="replace"))
|
|
54
|
+
if isinstance(payload, dict) and "best_params" in payload:
|
|
55
|
+
payload = payload.get("best_params") or {}
|
|
56
|
+
if not isinstance(payload, dict):
|
|
57
|
+
raise ValueError(f"Invalid JSON params file (expect dict): {file_path}")
|
|
58
|
+
return IOUtils._sanitize_params_dict(dict(payload))
|
|
59
|
+
|
|
60
|
+
if suffix in (".csv", ".tsv"):
|
|
61
|
+
df = pd.read_csv(file_path, sep="\t" if suffix == ".tsv" else ",")
|
|
62
|
+
if df.empty:
|
|
63
|
+
raise ValueError(f"Empty params file: {file_path}")
|
|
64
|
+
params = df.iloc[0].to_dict()
|
|
65
|
+
return IOUtils._sanitize_params_dict(params)
|
|
66
|
+
|
|
67
|
+
raise ValueError(f"Unsupported params file type '{suffix}': {file_path}")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
|
|
71
|
+
"""Load CSV file as list of dictionaries (legacy function)."""
|
|
72
|
+
return IOUtils.csv_to_dict(file_path)
|