ins-pricing 0.4.5__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. ins_pricing/README.md +48 -22
  2. ins_pricing/__init__.py +142 -90
  3. ins_pricing/cli/BayesOpt_entry.py +52 -50
  4. ins_pricing/cli/BayesOpt_incremental.py +39 -105
  5. ins_pricing/cli/Explain_Run.py +31 -23
  6. ins_pricing/cli/Explain_entry.py +532 -579
  7. ins_pricing/cli/Pricing_Run.py +31 -23
  8. ins_pricing/cli/bayesopt_entry_runner.py +11 -9
  9. ins_pricing/cli/utils/cli_common.py +256 -256
  10. ins_pricing/cli/utils/cli_config.py +375 -375
  11. ins_pricing/cli/utils/import_resolver.py +382 -365
  12. ins_pricing/cli/utils/notebook_utils.py +340 -340
  13. ins_pricing/cli/watchdog_run.py +209 -201
  14. ins_pricing/frontend/__init__.py +10 -10
  15. ins_pricing/frontend/example_workflows.py +1 -1
  16. ins_pricing/governance/__init__.py +20 -20
  17. ins_pricing/governance/release.py +159 -159
  18. ins_pricing/modelling/__init__.py +147 -92
  19. ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +2 -2
  20. ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
  21. ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +562 -562
  22. ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +965 -964
  23. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
  24. ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +482 -548
  25. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
  26. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +915 -913
  27. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +788 -785
  28. ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +448 -446
  29. ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
  30. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1308 -1308
  31. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +3 -3
  32. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +197 -198
  33. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +344 -344
  34. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +283 -283
  35. ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +346 -347
  36. ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
  37. ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
  38. ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
  39. ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
  40. ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
  41. ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +623 -623
  42. ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
  43. ins_pricing/modelling/explain/__init__.py +55 -55
  44. ins_pricing/modelling/explain/metrics.py +27 -174
  45. ins_pricing/modelling/explain/permutation.py +237 -237
  46. ins_pricing/modelling/plotting/__init__.py +40 -36
  47. ins_pricing/modelling/plotting/compat.py +228 -0
  48. ins_pricing/modelling/plotting/curves.py +572 -572
  49. ins_pricing/modelling/plotting/diagnostics.py +163 -163
  50. ins_pricing/modelling/plotting/geo.py +362 -362
  51. ins_pricing/modelling/plotting/importance.py +121 -121
  52. ins_pricing/pricing/__init__.py +27 -27
  53. ins_pricing/production/__init__.py +35 -25
  54. ins_pricing/production/{predict.py → inference.py} +140 -57
  55. ins_pricing/production/monitoring.py +8 -21
  56. ins_pricing/reporting/__init__.py +11 -11
  57. ins_pricing/setup.py +1 -1
  58. ins_pricing/tests/production/test_inference.py +90 -0
  59. ins_pricing/utils/__init__.py +116 -83
  60. ins_pricing/utils/device.py +255 -255
  61. ins_pricing/utils/features.py +53 -0
  62. ins_pricing/utils/io.py +72 -0
  63. ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
  64. ins_pricing/utils/metrics.py +158 -24
  65. ins_pricing/utils/numerics.py +76 -0
  66. ins_pricing/utils/paths.py +9 -1
  67. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/METADATA +182 -182
  68. ins_pricing-0.5.0.dist-info/RECORD +131 -0
  69. ins_pricing/modelling/core/BayesOpt.py +0 -146
  70. ins_pricing/modelling/core/__init__.py +0 -1
  71. ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
  72. ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
  73. ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
  74. ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
  75. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
  76. ins_pricing/modelling/core/bayesopt/utils.py +0 -105
  77. ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
  78. ins_pricing/tests/production/test_predict.py +0 -233
  79. ins_pricing-0.4.5.dist-info/RECORD +0 -130
  80. /ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +0 -0
  81. /ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +0 -0
  82. /ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +0 -0
  83. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/WHEEL +0 -0
  84. {ins_pricing-0.4.5.dist-info → ins_pricing-0.5.0.dist-info}/top_level.txt +0 -0
@@ -1,256 +1,256 @@
1
- """Device management utilities for PyTorch models.
2
-
1
+ """Device management utilities for PyTorch models.
2
+
3
3
  This module consolidates GPU/CPU device management logic from:
4
- - modelling/core/bayesopt/utils.py
5
- - modelling/core/bayesopt/trainers/trainer_base.py
6
- - production/predict.py
7
-
8
- Example:
9
- >>> from ins_pricing.utils import DeviceManager, GPUMemoryManager
10
- >>> device = DeviceManager.get_best_device()
11
- >>> DeviceManager.move_to_device(model, device)
12
- >>> with GPUMemoryManager.cleanup_context():
13
- ... model.train()
14
- """
15
-
16
- from __future__ import annotations
17
-
18
- import gc
19
- import os
20
- from contextlib import contextmanager
21
- from typing import Any, Dict, Optional
22
-
23
- try:
24
- import torch
25
- import torch.nn as nn
26
- from torch.nn.parallel import DistributedDataParallel as DDP
27
-
28
- TORCH_AVAILABLE = True
29
- except ImportError:
30
- TORCH_AVAILABLE = False
31
- torch = None
32
- nn = None
33
- DDP = None
34
-
35
- from .logging import get_logger
36
-
37
-
38
- # =============================================================================
39
- # GPU Memory Manager
40
- # =============================================================================
41
-
42
-
43
- class GPUMemoryManager:
44
- """Context manager for GPU memory management and cleanup.
45
-
46
- This class consolidates GPU memory cleanup logic that was previously
47
- scattered across multiple trainer files.
48
-
49
- Example:
50
- >>> with GPUMemoryManager.cleanup_context():
51
- ... model.train()
52
- ... # Memory cleaned up after exiting context
53
-
54
- >>> # Or use directly:
55
- >>> GPUMemoryManager.clean()
56
- """
57
-
58
- _logger = get_logger("ins_pricing.gpu")
59
-
60
- @classmethod
61
- def clean(cls, verbose: bool = False) -> None:
62
- """Clean up GPU memory.
63
-
64
- Args:
65
- verbose: If True, log cleanup details
66
- """
67
- gc.collect()
68
-
69
- if TORCH_AVAILABLE and torch.cuda.is_available():
70
- torch.cuda.empty_cache()
71
- torch.cuda.synchronize()
72
- if verbose:
73
- cls._logger.debug("CUDA cache cleared and synchronized")
74
-
75
- # Optional: Force IPC collect for multi-process scenarios
76
- if os.environ.get("BAYESOPT_CUDA_IPC_COLLECT", "0") == "1":
77
- try:
78
- torch.cuda.ipc_collect()
79
- if verbose:
80
- cls._logger.debug("CUDA IPC collect performed")
81
- except Exception:
82
- pass
83
-
84
- @classmethod
85
- @contextmanager
86
- def cleanup_context(cls, verbose: bool = False):
87
- """Context manager that cleans GPU memory on exit.
88
-
89
- Args:
90
- verbose: If True, log cleanup details
91
-
92
- Yields:
93
- None
94
- """
95
- try:
96
- yield
97
- finally:
98
- cls.clean(verbose=verbose)
99
-
100
- @classmethod
101
- def move_model_to_cpu(cls, model: Any) -> Any:
102
- """Move a model to CPU and clean GPU memory.
103
-
104
- Args:
105
- model: PyTorch model to move
106
-
107
- Returns:
108
- Model on CPU
109
- """
110
- if model is not None and hasattr(model, "to"):
111
- model.to("cpu")
112
- cls.clean()
113
- return model
114
-
115
- @classmethod
116
- def get_memory_info(cls) -> Dict[str, Any]:
117
- """Get current GPU memory usage information.
118
-
119
- Returns:
120
- Dictionary with memory info (allocated, reserved, free)
121
- """
122
- if not TORCH_AVAILABLE or not torch.cuda.is_available():
123
- return {"available": False}
124
-
125
- try:
126
- allocated = torch.cuda.memory_allocated()
127
- reserved = torch.cuda.memory_reserved()
128
- free, total = torch.cuda.mem_get_info()
129
- return {
130
- "available": True,
131
- "allocated_mb": allocated // (1024 * 1024),
132
- "reserved_mb": reserved // (1024 * 1024),
133
- "free_mb": free // (1024 * 1024),
134
- "total_mb": total // (1024 * 1024),
135
- }
136
- except Exception:
137
- return {"available": False}
138
-
139
-
140
- # =============================================================================
141
- # Device Manager
142
- # =============================================================================
143
-
144
-
145
- class DeviceManager:
146
- """Unified device management for model and tensor placement.
147
-
148
- This class consolidates device detection and model movement logic
149
- that was previously duplicated across trainer_base.py and predict.py.
150
-
151
- Example:
152
- >>> device = DeviceManager.get_best_device()
153
- >>> model = DeviceManager.move_to_device(model)
154
- """
155
-
156
- _logger = get_logger("ins_pricing.device")
157
- _cached_device: Optional[Any] = None # torch.device when available
158
-
159
- @classmethod
160
- def get_best_device(cls, prefer_cuda: bool = True) -> Any:
161
- """Get the best available device.
162
-
163
- Args:
164
- prefer_cuda: If True, prefer CUDA over MPS
165
-
166
- Returns:
167
- Best available torch.device
168
- """
169
- if not TORCH_AVAILABLE:
170
- return None
171
-
172
- if cls._cached_device is not None:
173
- return cls._cached_device
174
-
175
- if prefer_cuda and torch.cuda.is_available():
176
- cls._cached_device = torch.device("cuda")
177
- elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
178
- cls._cached_device = torch.device("mps")
179
- else:
180
- cls._cached_device = torch.device("cpu")
181
-
182
- cls._logger.debug(f"Selected device: {cls._cached_device}")
183
- return cls._cached_device
184
-
185
- @classmethod
186
- def move_to_device(cls, model_obj: Any, device: Optional[Any] = None) -> None:
187
- """Move a model object to the specified device.
188
-
189
- Handles sklearn-style wrappers that have .ft, .resnet, or .gnn attributes.
190
-
191
- Args:
192
- model_obj: Model object to move (may be sklearn wrapper)
193
- device: Target device (defaults to best available)
194
- """
195
- if model_obj is None:
196
- return
197
-
198
- device = device or cls.get_best_device()
199
- if device is None:
200
- return
201
-
202
- # Update device attribute if present
203
- if hasattr(model_obj, "device"):
204
- model_obj.device = device
205
-
206
- # Move the main model
207
- if hasattr(model_obj, "to"):
208
- model_obj.to(device)
209
-
210
- # Move nested submodules (sklearn wrappers)
211
- for attr_name in ("ft", "resnet", "gnn"):
212
- submodule = getattr(model_obj, attr_name, None)
213
- if submodule is not None and hasattr(submodule, "to"):
214
- submodule.to(device)
215
-
216
- @classmethod
217
- def unwrap_module(cls, module: Any) -> Any:
218
- """Unwrap DDP or DataParallel wrapper to get the base module.
219
-
220
- Args:
221
- module: Potentially wrapped PyTorch module
222
-
223
- Returns:
224
- Unwrapped base module
225
- """
226
- if not TORCH_AVAILABLE:
227
- return module
228
-
229
- if isinstance(module, (DDP, nn.DataParallel)):
230
- return module.module
231
- return module
232
-
233
- @classmethod
234
- def reset_cache(cls) -> None:
235
- """Reset cached device selection."""
236
- cls._cached_device = None
237
-
238
- @classmethod
239
- def is_cuda_available(cls) -> bool:
240
- """Check if CUDA is available.
241
-
242
- Returns:
243
- True if CUDA is available
244
- """
245
- return TORCH_AVAILABLE and torch.cuda.is_available()
246
-
247
- @classmethod
248
- def is_mps_available(cls) -> bool:
249
- """Check if MPS (Apple Silicon) is available.
250
-
251
- Returns:
252
- True if MPS is available
253
- """
254
- if not TORCH_AVAILABLE:
255
- return False
256
- return hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
4
+ - modelling/bayesopt/utils.py
5
+ - modelling/bayesopt/trainers/trainer_base.py
6
+ - production/inference.py
7
+
8
+ Example:
9
+ >>> from ins_pricing.utils import DeviceManager, GPUMemoryManager
10
+ >>> device = DeviceManager.get_best_device()
11
+ >>> DeviceManager.move_to_device(model, device)
12
+ >>> with GPUMemoryManager.cleanup_context():
13
+ ... model.train()
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import gc
19
+ import os
20
+ from contextlib import contextmanager
21
+ from typing import Any, Dict, Optional
22
+
23
+ try:
24
+ import torch
25
+ import torch.nn as nn
26
+ from torch.nn.parallel import DistributedDataParallel as DDP
27
+
28
+ TORCH_AVAILABLE = True
29
+ except ImportError:
30
+ TORCH_AVAILABLE = False
31
+ torch = None
32
+ nn = None
33
+ DDP = None
34
+
35
+ from ins_pricing.utils.logging import get_logger
36
+
37
+
38
+ # =============================================================================
39
+ # GPU Memory Manager
40
+ # =============================================================================
41
+
42
+
43
+ class GPUMemoryManager:
44
+ """Context manager for GPU memory management and cleanup.
45
+
46
+ This class consolidates GPU memory cleanup logic that was previously
47
+ scattered across multiple trainer files.
48
+
49
+ Example:
50
+ >>> with GPUMemoryManager.cleanup_context():
51
+ ... model.train()
52
+ ... # Memory cleaned up after exiting context
53
+
54
+ >>> # Or use directly:
55
+ >>> GPUMemoryManager.clean()
56
+ """
57
+
58
+ _logger = get_logger("ins_pricing.gpu")
59
+
60
+ @classmethod
61
+ def clean(cls, verbose: bool = False) -> None:
62
+ """Clean up GPU memory.
63
+
64
+ Args:
65
+ verbose: If True, log cleanup details
66
+ """
67
+ gc.collect()
68
+
69
+ if TORCH_AVAILABLE and torch.cuda.is_available():
70
+ torch.cuda.empty_cache()
71
+ torch.cuda.synchronize()
72
+ if verbose:
73
+ cls._logger.debug("CUDA cache cleared and synchronized")
74
+
75
+ # Optional: Force IPC collect for multi-process scenarios
76
+ if os.environ.get("BAYESOPT_CUDA_IPC_COLLECT", "0") == "1":
77
+ try:
78
+ torch.cuda.ipc_collect()
79
+ if verbose:
80
+ cls._logger.debug("CUDA IPC collect performed")
81
+ except Exception:
82
+ pass
83
+
84
+ @classmethod
85
+ @contextmanager
86
+ def cleanup_context(cls, verbose: bool = False):
87
+ """Context manager that cleans GPU memory on exit.
88
+
89
+ Args:
90
+ verbose: If True, log cleanup details
91
+
92
+ Yields:
93
+ None
94
+ """
95
+ try:
96
+ yield
97
+ finally:
98
+ cls.clean(verbose=verbose)
99
+
100
+ @classmethod
101
+ def move_model_to_cpu(cls, model: Any) -> Any:
102
+ """Move a model to CPU and clean GPU memory.
103
+
104
+ Args:
105
+ model: PyTorch model to move
106
+
107
+ Returns:
108
+ Model on CPU
109
+ """
110
+ if model is not None and hasattr(model, "to"):
111
+ model.to("cpu")
112
+ cls.clean()
113
+ return model
114
+
115
+ @classmethod
116
+ def get_memory_info(cls) -> Dict[str, Any]:
117
+ """Get current GPU memory usage information.
118
+
119
+ Returns:
120
+ Dictionary with memory info (allocated, reserved, free)
121
+ """
122
+ if not TORCH_AVAILABLE or not torch.cuda.is_available():
123
+ return {"available": False}
124
+
125
+ try:
126
+ allocated = torch.cuda.memory_allocated()
127
+ reserved = torch.cuda.memory_reserved()
128
+ free, total = torch.cuda.mem_get_info()
129
+ return {
130
+ "available": True,
131
+ "allocated_mb": allocated // (1024 * 1024),
132
+ "reserved_mb": reserved // (1024 * 1024),
133
+ "free_mb": free // (1024 * 1024),
134
+ "total_mb": total // (1024 * 1024),
135
+ }
136
+ except Exception:
137
+ return {"available": False}
138
+
139
+
140
+ # =============================================================================
141
+ # Device Manager
142
+ # =============================================================================
143
+
144
+
145
+ class DeviceManager:
146
+ """Unified device management for model and tensor placement.
147
+
148
+ This class consolidates device detection and model movement logic
149
+ that was previously duplicated across trainer_base.py and predict.py.
150
+
151
+ Example:
152
+ >>> device = DeviceManager.get_best_device()
153
+ >>> model = DeviceManager.move_to_device(model)
154
+ """
155
+
156
+ _logger = get_logger("ins_pricing.device")
157
+ _cached_device: Optional[Any] = None # torch.device when available
158
+
159
+ @classmethod
160
+ def get_best_device(cls, prefer_cuda: bool = True) -> Any:
161
+ """Get the best available device.
162
+
163
+ Args:
164
+ prefer_cuda: If True, prefer CUDA over MPS
165
+
166
+ Returns:
167
+ Best available torch.device
168
+ """
169
+ if not TORCH_AVAILABLE:
170
+ return None
171
+
172
+ if cls._cached_device is not None:
173
+ return cls._cached_device
174
+
175
+ if prefer_cuda and torch.cuda.is_available():
176
+ cls._cached_device = torch.device("cuda")
177
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
178
+ cls._cached_device = torch.device("mps")
179
+ else:
180
+ cls._cached_device = torch.device("cpu")
181
+
182
+ cls._logger.debug(f"Selected device: {cls._cached_device}")
183
+ return cls._cached_device
184
+
185
+ @classmethod
186
+ def move_to_device(cls, model_obj: Any, device: Optional[Any] = None) -> None:
187
+ """Move a model object to the specified device.
188
+
189
+ Handles sklearn-style wrappers that have .ft, .resnet, or .gnn attributes.
190
+
191
+ Args:
192
+ model_obj: Model object to move (may be sklearn wrapper)
193
+ device: Target device (defaults to best available)
194
+ """
195
+ if model_obj is None:
196
+ return
197
+
198
+ device = device or cls.get_best_device()
199
+ if device is None:
200
+ return
201
+
202
+ # Update device attribute if present
203
+ if hasattr(model_obj, "device"):
204
+ model_obj.device = device
205
+
206
+ # Move the main model
207
+ if hasattr(model_obj, "to"):
208
+ model_obj.to(device)
209
+
210
+ # Move nested submodules (sklearn wrappers)
211
+ for attr_name in ("ft", "resnet", "gnn"):
212
+ submodule = getattr(model_obj, attr_name, None)
213
+ if submodule is not None and hasattr(submodule, "to"):
214
+ submodule.to(device)
215
+
216
+ @classmethod
217
+ def unwrap_module(cls, module: Any) -> Any:
218
+ """Unwrap DDP or DataParallel wrapper to get the base module.
219
+
220
+ Args:
221
+ module: Potentially wrapped PyTorch module
222
+
223
+ Returns:
224
+ Unwrapped base module
225
+ """
226
+ if not TORCH_AVAILABLE:
227
+ return module
228
+
229
+ if isinstance(module, (DDP, nn.DataParallel)):
230
+ return module.module
231
+ return module
232
+
233
+ @classmethod
234
+ def reset_cache(cls) -> None:
235
+ """Reset cached device selection."""
236
+ cls._cached_device = None
237
+
238
+ @classmethod
239
+ def is_cuda_available(cls) -> bool:
240
+ """Check if CUDA is available.
241
+
242
+ Returns:
243
+ True if CUDA is available
244
+ """
245
+ return TORCH_AVAILABLE and torch.cuda.is_available()
246
+
247
+ @classmethod
248
+ def is_mps_available(cls) -> bool:
249
+ """Check if MPS (Apple Silicon) is available.
250
+
251
+ Returns:
252
+ True if MPS is available
253
+ """
254
+ if not TORCH_AVAILABLE:
255
+ return False
256
+ return hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
@@ -0,0 +1,53 @@
1
+ """Feature inference helpers shared across training and production."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import List, Optional, Tuple
6
+
7
+ import pandas as pd
8
+
9
+
10
+ def infer_factor_and_cate_list(
11
+ train_df: pd.DataFrame,
12
+ test_df: pd.DataFrame,
13
+ resp_nme: str,
14
+ weight_nme: str,
15
+ *,
16
+ binary_resp_nme: Optional[str] = None,
17
+ factor_nmes: Optional[List[str]] = None,
18
+ cate_list: Optional[List[str]] = None,
19
+ infer_categorical_max_unique: int = 50,
20
+ infer_categorical_max_ratio: float = 0.05,
21
+ ) -> Tuple[List[str], List[str]]:
22
+ """Infer factor_nmes/cate_list when feature names are not provided."""
23
+ excluded = {resp_nme, weight_nme}
24
+ if binary_resp_nme:
25
+ excluded.add(binary_resp_nme)
26
+
27
+ common_cols = [c for c in train_df.columns if c in test_df.columns]
28
+ if factor_nmes is None:
29
+ factors = [c for c in common_cols if c not in excluded]
30
+ else:
31
+ factors = [c for c in factor_nmes if c in common_cols and c not in excluded]
32
+
33
+ if cate_list is not None:
34
+ cats = [c for c in cate_list if c in factors]
35
+ return factors, cats
36
+
37
+ n_rows = max(1, len(train_df))
38
+ cats: List[str] = []
39
+ for col in factors:
40
+ s = train_df[col]
41
+ if (
42
+ pd.api.types.is_bool_dtype(s)
43
+ or pd.api.types.is_object_dtype(s)
44
+ or isinstance(s.dtype, pd.CategoricalDtype)
45
+ ):
46
+ cats.append(col)
47
+ continue
48
+ if pd.api.types.is_integer_dtype(s):
49
+ nunique = int(s.nunique(dropna=True))
50
+ if nunique <= infer_categorical_max_unique or (nunique / n_rows) <= infer_categorical_max_ratio:
51
+ cats.append(col)
52
+
53
+ return factors, cats
@@ -0,0 +1,72 @@
1
+ """File and path helpers shared across ins_pricing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List
9
+
10
+ import pandas as pd
11
+
12
+
13
+ def ensure_parent_dir(file_path: str) -> None:
14
+ """Create parent directories when missing."""
15
+ directory = Path(file_path).parent
16
+ if directory and not directory.exists():
17
+ directory.mkdir(parents=True, exist_ok=True)
18
+
19
+
20
+ class IOUtils:
21
+ """File and path utilities for model parameters and configs."""
22
+
23
+ @staticmethod
24
+ def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
25
+ """Load CSV file as list of dictionaries."""
26
+ with open(file_path, mode="r", encoding="utf-8") as file:
27
+ reader = csv.DictReader(file)
28
+ return [dict(filter(lambda item: item[0] != "", row.items())) for row in reader]
29
+
30
+ @staticmethod
31
+ def ensure_parent_dir(file_path: str) -> None:
32
+ """Create parent directories when missing."""
33
+ ensure_parent_dir(file_path)
34
+
35
+ @staticmethod
36
+ def _sanitize_params_dict(params: Dict[str, Any]) -> Dict[str, Any]:
37
+ """Filter index-like columns such as "Unnamed: 0" from pandas I/O."""
38
+ return {
39
+ k: v
40
+ for k, v in (params or {}).items()
41
+ if k and not str(k).startswith("Unnamed")
42
+ }
43
+
44
+ @staticmethod
45
+ def load_params_file(path: str) -> Dict[str, Any]:
46
+ """Load parameter dict from JSON/CSV/TSV files."""
47
+ file_path = Path(path).expanduser().resolve()
48
+ if not file_path.exists():
49
+ raise FileNotFoundError(f"params file not found: {file_path}")
50
+
51
+ suffix = file_path.suffix.lower()
52
+ if suffix == ".json":
53
+ payload = json.loads(file_path.read_text(encoding="utf-8", errors="replace"))
54
+ if isinstance(payload, dict) and "best_params" in payload:
55
+ payload = payload.get("best_params") or {}
56
+ if not isinstance(payload, dict):
57
+ raise ValueError(f"Invalid JSON params file (expect dict): {file_path}")
58
+ return IOUtils._sanitize_params_dict(dict(payload))
59
+
60
+ if suffix in (".csv", ".tsv"):
61
+ df = pd.read_csv(file_path, sep="\t" if suffix == ".tsv" else ",")
62
+ if df.empty:
63
+ raise ValueError(f"Empty params file: {file_path}")
64
+ params = df.iloc[0].to_dict()
65
+ return IOUtils._sanitize_params_dict(params)
66
+
67
+ raise ValueError(f"Unsupported params file type '{suffix}': {file_path}")
68
+
69
+
70
+ def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
71
+ """Load CSV file as list of dictionaries (legacy function)."""
72
+ return IOUtils.csv_to_dict(file_path)