ins-pricing 0.2.8__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ins_pricing/CHANGELOG.md +93 -0
- ins_pricing/README.md +11 -0
- ins_pricing/cli/bayesopt_entry_runner.py +626 -499
- ins_pricing/cli/utils/evaluation_context.py +320 -0
- ins_pricing/cli/utils/import_resolver.py +350 -0
- ins_pricing/modelling/core/bayesopt/PHASE2_REFACTORING_SUMMARY.md +449 -0
- ins_pricing/modelling/core/bayesopt/PHASE3_REFACTORING_SUMMARY.md +406 -0
- ins_pricing/modelling/core/bayesopt/REFACTORING_SUMMARY.md +247 -0
- ins_pricing/modelling/core/bayesopt/config_components.py +351 -0
- ins_pricing/modelling/core/bayesopt/config_preprocess.py +3 -4
- ins_pricing/modelling/core/bayesopt/core.py +153 -94
- ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +118 -31
- ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +294 -139
- ins_pricing/modelling/core/bayesopt/utils/__init__.py +86 -0
- ins_pricing/modelling/core/bayesopt/utils/constants.py +183 -0
- ins_pricing/modelling/core/bayesopt/utils/distributed_utils.py +186 -0
- ins_pricing/modelling/core/bayesopt/utils/io_utils.py +126 -0
- ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +540 -0
- ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py +587 -0
- ins_pricing/modelling/core/bayesopt/utils.py +98 -1495
- ins_pricing/modelling/core/bayesopt/utils_backup.py +1503 -0
- ins_pricing/setup.py +1 -1
- ins_pricing-0.3.0.dist-info/METADATA +162 -0
- {ins_pricing-0.2.8.dist-info → ins_pricing-0.3.0.dist-info}/RECORD +26 -13
- ins_pricing-0.2.8.dist-info/METADATA +0 -51
- {ins_pricing-0.2.8.dist-info → ins_pricing-0.3.0.dist-info}/WHEEL +0 -0
- {ins_pricing-0.2.8.dist-info → ins_pricing-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Backward compatibility re-exports from refactored utils modules.
|
|
2
|
+
|
|
3
|
+
This module ensures all existing imports continue to work:
|
|
4
|
+
from ins_pricing.modelling.core.bayesopt.utils import EPS, IOUtils, ...
|
|
5
|
+
|
|
6
|
+
The utils.py file has been split into focused modules for better maintainability:
|
|
7
|
+
- constants.py: EPS, set_global_seed, etc.
|
|
8
|
+
- io_utils.py: IOUtils for file I/O
|
|
9
|
+
- distributed_utils.py: DistributedUtils, TrainingUtils for DDP
|
|
10
|
+
- torch_trainer_mixin.py: TorchTrainerMixin for PyTorch training
|
|
11
|
+
- metrics_and_devices.py: Metrics, GPU/device management, CV strategies, plotting
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
# Constants and simple utilities
|
|
17
|
+
from .constants import (
|
|
18
|
+
EPS,
|
|
19
|
+
set_global_seed,
|
|
20
|
+
ensure_parent_dir,
|
|
21
|
+
compute_batch_size,
|
|
22
|
+
tweedie_loss,
|
|
23
|
+
infer_factor_and_cate_list,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# I/O utilities
|
|
27
|
+
from .io_utils import (
|
|
28
|
+
IOUtils,
|
|
29
|
+
csv_to_dict,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Distributed training
|
|
33
|
+
from .distributed_utils import (
|
|
34
|
+
DistributedUtils,
|
|
35
|
+
TrainingUtils,
|
|
36
|
+
free_cuda,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# PyTorch training mixin
|
|
40
|
+
from .torch_trainer_mixin import (
|
|
41
|
+
TorchTrainerMixin,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Metrics, devices, CV, and plotting
|
|
45
|
+
from .metrics_and_devices import (
|
|
46
|
+
get_logger,
|
|
47
|
+
MetricFactory,
|
|
48
|
+
GPUMemoryManager,
|
|
49
|
+
DeviceManager,
|
|
50
|
+
CVStrategyResolver,
|
|
51
|
+
PlotUtils,
|
|
52
|
+
split_data,
|
|
53
|
+
plot_lift_list,
|
|
54
|
+
plot_dlift_list,
|
|
55
|
+
_OrderedSplitter,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
# Constants
|
|
60
|
+
'EPS',
|
|
61
|
+
'set_global_seed',
|
|
62
|
+
'ensure_parent_dir',
|
|
63
|
+
'compute_batch_size',
|
|
64
|
+
'tweedie_loss',
|
|
65
|
+
'infer_factor_and_cate_list',
|
|
66
|
+
# I/O
|
|
67
|
+
'IOUtils',
|
|
68
|
+
'csv_to_dict',
|
|
69
|
+
# Distributed
|
|
70
|
+
'DistributedUtils',
|
|
71
|
+
'TrainingUtils',
|
|
72
|
+
'free_cuda',
|
|
73
|
+
# PyTorch
|
|
74
|
+
'TorchTrainerMixin',
|
|
75
|
+
# Utilities
|
|
76
|
+
'get_logger',
|
|
77
|
+
'MetricFactory',
|
|
78
|
+
'GPUMemoryManager',
|
|
79
|
+
'DeviceManager',
|
|
80
|
+
'CVStrategyResolver',
|
|
81
|
+
'PlotUtils',
|
|
82
|
+
'split_data',
|
|
83
|
+
'plot_lift_list',
|
|
84
|
+
'plot_dlift_list',
|
|
85
|
+
'_OrderedSplitter',
|
|
86
|
+
]
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Core constants and simple utility functions.
|
|
2
|
+
|
|
3
|
+
This module contains:
|
|
4
|
+
- EPS constant for numerical stability
|
|
5
|
+
- set_global_seed() for reproducibility
|
|
6
|
+
- ensure_parent_dir() for file operations
|
|
7
|
+
- compute_batch_size() for adaptive batching
|
|
8
|
+
- tweedie_loss() for regression loss
|
|
9
|
+
- infer_factor_and_cate_list() for automatic feature detection
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import random
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import List, Optional, Tuple
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
import torch
|
|
21
|
+
|
|
22
|
+
# Constants
|
|
23
|
+
# =============================================================================
|
|
24
|
+
EPS = 1e-8
|
|
25
|
+
"""Small epsilon value for numerical stability."""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Simple utility functions
|
|
29
|
+
# =============================================================================
|
|
30
|
+
|
|
31
|
+
def set_global_seed(seed: int) -> None:
|
|
32
|
+
"""Set random seed for reproducibility across all libraries.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
seed: Random seed value
|
|
36
|
+
"""
|
|
37
|
+
random.seed(seed)
|
|
38
|
+
np.random.seed(seed)
|
|
39
|
+
torch.manual_seed(seed)
|
|
40
|
+
if torch.cuda.is_available():
|
|
41
|
+
torch.cuda.manual_seed_all(seed)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_parent_dir(file_path: str) -> None:
|
|
45
|
+
"""Create parent directories when missing.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
file_path: Path to file whose parent directory should be created
|
|
49
|
+
"""
|
|
50
|
+
directory = Path(file_path).parent
|
|
51
|
+
if directory and not directory.exists():
|
|
52
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def compute_batch_size(data_size: int, learning_rate: float,
|
|
56
|
+
batch_num: int, minimum: int) -> int:
|
|
57
|
+
"""Compute adaptive batch size based on data size and learning rate.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
data_size: Total number of samples
|
|
61
|
+
learning_rate: Learning rate value
|
|
62
|
+
batch_num: Target number of batches
|
|
63
|
+
minimum: Minimum batch size
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Computed batch size
|
|
67
|
+
"""
|
|
68
|
+
estimated = int((learning_rate / 1e-4) ** 0.5 *
|
|
69
|
+
(data_size / max(batch_num, 1)))
|
|
70
|
+
return max(1, min(data_size, max(minimum, estimated)))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def tweedie_loss(pred, target, p=1.5, eps=1e-6, max_clip=1e6):
|
|
74
|
+
"""Compute Tweedie deviance loss for PyTorch.
|
|
75
|
+
|
|
76
|
+
Reference: https://scikit-learn.org/stable/modules/model_evaluation.html
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
pred: Predicted values (tensor)
|
|
80
|
+
target: True values (tensor)
|
|
81
|
+
p: Tweedie power parameter (1.0-2.0)
|
|
82
|
+
eps: Small epsilon for numerical stability
|
|
83
|
+
max_clip: Maximum value for clipping
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Tweedie negative log-likelihood (tensor)
|
|
87
|
+
"""
|
|
88
|
+
# Clamp predictions to positive values for stability
|
|
89
|
+
pred_clamped = torch.clamp(pred, min=eps)
|
|
90
|
+
|
|
91
|
+
if p == 1:
|
|
92
|
+
# Poisson
|
|
93
|
+
term1 = target * torch.log(target / pred_clamped + eps)
|
|
94
|
+
term2 = -target + pred_clamped
|
|
95
|
+
term3 = 0
|
|
96
|
+
elif p == 0:
|
|
97
|
+
# Gaussian
|
|
98
|
+
term1 = 0.5 * torch.pow(target - pred_clamped, 2)
|
|
99
|
+
term2 = 0
|
|
100
|
+
term3 = 0
|
|
101
|
+
elif p == 2:
|
|
102
|
+
# Gamma
|
|
103
|
+
term1 = torch.log(pred_clamped / target + eps)
|
|
104
|
+
term2 = -target / pred_clamped + 1
|
|
105
|
+
term3 = 0
|
|
106
|
+
else:
|
|
107
|
+
# General Tweedie
|
|
108
|
+
term1 = torch.pow(target, 2 - p) / ((1 - p) * (2 - p))
|
|
109
|
+
term2 = target * torch.pow(pred_clamped, 1 - p) / (1 - p)
|
|
110
|
+
term3 = torch.pow(pred_clamped, 2 - p) / (2 - p)
|
|
111
|
+
|
|
112
|
+
return torch.nan_to_num(
|
|
113
|
+
2 * (term1 - term2 + term3),
|
|
114
|
+
nan=eps,
|
|
115
|
+
posinf=max_clip,
|
|
116
|
+
neginf=-max_clip
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def infer_factor_and_cate_list(
|
|
121
|
+
train_df: pd.DataFrame,
|
|
122
|
+
test_df: pd.DataFrame,
|
|
123
|
+
resp_nme: str,
|
|
124
|
+
weight_nme: str,
|
|
125
|
+
binary_resp_nme: Optional[str] = None,
|
|
126
|
+
factor_nmes: Optional[List[str]] = None,
|
|
127
|
+
cate_list: Optional[List[str]] = None,
|
|
128
|
+
infer_categorical_max_unique: int = 50,
|
|
129
|
+
infer_categorical_max_ratio: float = 0.05
|
|
130
|
+
) -> Tuple[List[str], List[str]]:
|
|
131
|
+
"""Infer factor_nmes/cate_list when feature names are not provided.
|
|
132
|
+
|
|
133
|
+
Rules:
|
|
134
|
+
- factor_nmes: start from shared train/test columns, exclude target/weight/(optional binary target).
|
|
135
|
+
- cate_list: object/category/bool plus low-cardinality integer columns.
|
|
136
|
+
- Always intersect with shared train/test columns to avoid mismatches.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
train_df: Training DataFrame
|
|
140
|
+
test_df: Test DataFrame
|
|
141
|
+
resp_nme: Response/target column name
|
|
142
|
+
weight_nme: Sample weight column name
|
|
143
|
+
binary_resp_nme: Optional binary response column name
|
|
144
|
+
factor_nmes: Optional list of feature column names
|
|
145
|
+
cate_list: Optional list of categorical feature names
|
|
146
|
+
infer_categorical_max_unique: Max unique values for categorical inference
|
|
147
|
+
infer_categorical_max_ratio: Max ratio of unique/total for categorical inference
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Tuple of (factor_nmes, cate_list)
|
|
151
|
+
"""
|
|
152
|
+
excluded = {resp_nme, weight_nme}
|
|
153
|
+
if binary_resp_nme:
|
|
154
|
+
excluded.add(binary_resp_nme)
|
|
155
|
+
|
|
156
|
+
common_cols = [c for c in train_df.columns if c in test_df.columns]
|
|
157
|
+
if factor_nmes is None:
|
|
158
|
+
factors = [c for c in common_cols if c not in excluded]
|
|
159
|
+
else:
|
|
160
|
+
factors = [
|
|
161
|
+
c for c in factor_nmes if c in common_cols and c not in excluded
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
if cate_list is not None:
|
|
165
|
+
cats = [c for c in cate_list if c in factors]
|
|
166
|
+
return factors, cats
|
|
167
|
+
|
|
168
|
+
n_rows = max(1, len(train_df))
|
|
169
|
+
cats: List[str] = []
|
|
170
|
+
for col in factors:
|
|
171
|
+
s = train_df[col]
|
|
172
|
+
if (pd.api.types.is_bool_dtype(s) or
|
|
173
|
+
pd.api.types.is_object_dtype(s) or
|
|
174
|
+
isinstance(s.dtype, pd.CategoricalDtype)):
|
|
175
|
+
cats.append(col)
|
|
176
|
+
continue
|
|
177
|
+
if pd.api.types.is_integer_dtype(s):
|
|
178
|
+
nunique = int(s.nunique(dropna=True))
|
|
179
|
+
if (nunique <= infer_categorical_max_unique or
|
|
180
|
+
(nunique / n_rows) <= infer_categorical_max_ratio):
|
|
181
|
+
cats.append(col)
|
|
182
|
+
|
|
183
|
+
return factors, cats
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Distributed training utilities for PyTorch DDP.
|
|
2
|
+
|
|
3
|
+
This module contains:
|
|
4
|
+
- DistributedUtils for DDP setup and process coordination
|
|
5
|
+
- TrainingUtils for CUDA memory management
|
|
6
|
+
- free_cuda() for legacy compatibility
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import gc
|
|
12
|
+
import os
|
|
13
|
+
from datetime import timedelta
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
import torch
|
|
17
|
+
import torch.distributed as dist
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _select_ddp_backend() -> str:
|
|
21
|
+
"""Select the appropriate DDP backend based on system capabilities.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
"nccl" if CUDA is available and NCCL is supported (non-Windows),
|
|
25
|
+
otherwise "gloo"
|
|
26
|
+
"""
|
|
27
|
+
if not torch.cuda.is_available():
|
|
28
|
+
return "gloo"
|
|
29
|
+
|
|
30
|
+
if os.name == "nt": # Windows doesn't support NCCL
|
|
31
|
+
return "gloo"
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
nccl_available = getattr(dist, "is_nccl_available", lambda: False)()
|
|
35
|
+
return "nccl" if nccl_available else "gloo"
|
|
36
|
+
except Exception:
|
|
37
|
+
return "gloo"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _get_ddp_timeout() -> timedelta:
|
|
41
|
+
"""Get the DDP timeout from environment variable.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
timedelta for DDP timeout (default: 1800 seconds)
|
|
45
|
+
"""
|
|
46
|
+
timeout_seconds = int(os.environ.get("BAYESOPT_DDP_TIMEOUT_SECONDS", "1800"))
|
|
47
|
+
return timedelta(seconds=max(1, timeout_seconds))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _cache_ddp_state(local_rank: int, rank: int, world_size: int) -> tuple:
|
|
51
|
+
"""Cache and return DDP state tuple."""
|
|
52
|
+
state = (True, local_rank, rank, world_size)
|
|
53
|
+
DistributedUtils._cached_state = state
|
|
54
|
+
return state
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class DistributedUtils:
|
|
58
|
+
"""Utilities for distributed data parallel training.
|
|
59
|
+
|
|
60
|
+
This class provides methods for:
|
|
61
|
+
- Initializing DDP process groups
|
|
62
|
+
- Checking process rank and world size
|
|
63
|
+
- Cleanup after distributed training
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
_cached_state: Optional[tuple] = None
|
|
67
|
+
|
|
68
|
+
@staticmethod
|
|
69
|
+
def setup_ddp():
|
|
70
|
+
"""Initialize the DDP process group for distributed training.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Tuple of (success, local_rank, rank, world_size)
|
|
74
|
+
"""
|
|
75
|
+
# Return cached state if already initialized
|
|
76
|
+
if dist.is_initialized():
|
|
77
|
+
if DistributedUtils._cached_state is None:
|
|
78
|
+
DistributedUtils._cached_state = _cache_ddp_state(
|
|
79
|
+
int(os.environ.get("LOCAL_RANK", 0)),
|
|
80
|
+
dist.get_rank(),
|
|
81
|
+
dist.get_world_size(),
|
|
82
|
+
)
|
|
83
|
+
return DistributedUtils._cached_state
|
|
84
|
+
|
|
85
|
+
# Check for required environment variables
|
|
86
|
+
if 'RANK' not in os.environ or 'WORLD_SIZE' not in os.environ:
|
|
87
|
+
print(
|
|
88
|
+
f">>> DDP Setup Failed: RANK or WORLD_SIZE not found in env. "
|
|
89
|
+
f"Keys found: {list(os.environ.keys())}"
|
|
90
|
+
)
|
|
91
|
+
print(">>> Hint: launch with torchrun --nproc_per_node=<N> <script.py>")
|
|
92
|
+
return False, 0, 0, 1
|
|
93
|
+
|
|
94
|
+
rank = int(os.environ["RANK"])
|
|
95
|
+
world_size = int(os.environ["WORLD_SIZE"])
|
|
96
|
+
local_rank = int(os.environ.get("LOCAL_RANK", 0))
|
|
97
|
+
|
|
98
|
+
# Windows CUDA DDP is not supported
|
|
99
|
+
if os.name == "nt" and torch.cuda.is_available() and world_size > 1:
|
|
100
|
+
print(
|
|
101
|
+
">>> DDP Setup Disabled: Windows CUDA DDP is not supported. "
|
|
102
|
+
"Falling back to single process."
|
|
103
|
+
)
|
|
104
|
+
return False, 0, 0, 1
|
|
105
|
+
|
|
106
|
+
# Set CUDA device for this process
|
|
107
|
+
if torch.cuda.is_available():
|
|
108
|
+
torch.cuda.set_device(local_rank)
|
|
109
|
+
|
|
110
|
+
# Initialize process group
|
|
111
|
+
backend = _select_ddp_backend()
|
|
112
|
+
timeout = _get_ddp_timeout()
|
|
113
|
+
|
|
114
|
+
dist.init_process_group(backend=backend, init_method="env://", timeout=timeout)
|
|
115
|
+
print(
|
|
116
|
+
f">>> DDP Initialized ({backend}, timeout={timeout.total_seconds():.0f}s): "
|
|
117
|
+
f"Rank {rank}/{world_size}, Local Rank {local_rank}"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
return _cache_ddp_state(local_rank, rank, world_size)
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def cleanup_ddp():
|
|
124
|
+
"""Destroy the DDP process group and clear cached state."""
|
|
125
|
+
if dist.is_initialized():
|
|
126
|
+
dist.destroy_process_group()
|
|
127
|
+
DistributedUtils._cached_state = None
|
|
128
|
+
|
|
129
|
+
@staticmethod
|
|
130
|
+
def is_main_process():
|
|
131
|
+
"""Check if current process is rank 0 (main process).
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
True if main process or DDP not initialized
|
|
135
|
+
"""
|
|
136
|
+
return not dist.is_initialized() or dist.get_rank() == 0
|
|
137
|
+
|
|
138
|
+
@staticmethod
|
|
139
|
+
def world_size() -> int:
|
|
140
|
+
"""Get the total number of processes in the distributed group.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
World size (1 if DDP not initialized)
|
|
144
|
+
"""
|
|
145
|
+
return dist.get_world_size() if dist.is_initialized() else 1
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class TrainingUtils:
|
|
149
|
+
"""General training utilities including CUDA management."""
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def free_cuda() -> None:
|
|
153
|
+
"""Release CUDA memory and clear cache.
|
|
154
|
+
|
|
155
|
+
This performs aggressive cleanup:
|
|
156
|
+
1. Move all PyTorch models to CPU
|
|
157
|
+
2. Run garbage collection
|
|
158
|
+
3. Clear CUDA cache
|
|
159
|
+
"""
|
|
160
|
+
print(">>> Moving all models to CPU...")
|
|
161
|
+
for obj in gc.get_objects():
|
|
162
|
+
try:
|
|
163
|
+
if hasattr(obj, "to") and callable(obj.to):
|
|
164
|
+
obj.to("cpu")
|
|
165
|
+
except Exception:
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
print(">>> Releasing tensor/optimizer/DataLoader references...")
|
|
169
|
+
gc.collect()
|
|
170
|
+
|
|
171
|
+
print(">>> Clearing CUDA cache...")
|
|
172
|
+
if torch.cuda.is_available():
|
|
173
|
+
torch.cuda.empty_cache()
|
|
174
|
+
torch.cuda.synchronize()
|
|
175
|
+
print(">>> CUDA memory released.")
|
|
176
|
+
else:
|
|
177
|
+
print(">>> CUDA not available; cleanup skipped.")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# Backward compatibility function wrapper
|
|
181
|
+
def free_cuda():
|
|
182
|
+
"""Legacy function wrapper for CUDA memory cleanup.
|
|
183
|
+
|
|
184
|
+
This function calls TrainingUtils.free_cuda() for backward compatibility.
|
|
185
|
+
"""
|
|
186
|
+
TrainingUtils.free_cuda()
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""File I/O and parameter loading utilities.
|
|
2
|
+
|
|
3
|
+
This module contains:
|
|
4
|
+
- IOUtils class for loading parameters from JSON/CSV/TSV files
|
|
5
|
+
- csv_to_dict() for CSV file handling
|
|
6
|
+
- File path sanitization utilities
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import csv
|
|
12
|
+
import json
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Dict, List
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class IOUtils:
|
|
20
|
+
"""File and path utilities for model parameters and configs."""
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
|
|
24
|
+
"""Load CSV file as list of dictionaries.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
file_path: Path to CSV file
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
List of dictionaries, one per row
|
|
31
|
+
"""
|
|
32
|
+
with open(file_path, mode='r', encoding='utf-8') as file:
|
|
33
|
+
reader = csv.DictReader(file)
|
|
34
|
+
return [
|
|
35
|
+
dict(filter(lambda item: item[0] != '', row.items()))
|
|
36
|
+
for row in reader
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def ensure_parent_dir(file_path: str) -> None:
|
|
41
|
+
"""Create parent directories when missing.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
file_path: Path to file whose parent directory should be created
|
|
45
|
+
"""
|
|
46
|
+
directory = Path(file_path).parent
|
|
47
|
+
if directory and not directory.exists():
|
|
48
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def _sanitize_params_dict(params: Dict[str, Any]) -> Dict[str, Any]:
|
|
52
|
+
"""Filter index-like columns such as "Unnamed: 0" from pandas I/O.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
params: Parameter dictionary
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Sanitized parameter dictionary
|
|
59
|
+
"""
|
|
60
|
+
return {
|
|
61
|
+
k: v
|
|
62
|
+
for k, v in (params or {}).items()
|
|
63
|
+
if k and not str(k).startswith("Unnamed")
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def load_params_file(path: str) -> Dict[str, Any]:
|
|
68
|
+
"""Load parameter dict from JSON/CSV/TSV files.
|
|
69
|
+
|
|
70
|
+
Supported formats:
|
|
71
|
+
- JSON: accept dict or {"best_params": {...}} wrapper
|
|
72
|
+
- CSV/TSV: read the first row as params
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
path: Path to parameter file
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Parameter dictionary
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
FileNotFoundError: If file doesn't exist
|
|
82
|
+
ValueError: If file format is unsupported or invalid
|
|
83
|
+
"""
|
|
84
|
+
file_path = Path(path).expanduser().resolve()
|
|
85
|
+
if not file_path.exists():
|
|
86
|
+
raise FileNotFoundError(f"params file not found: {file_path}")
|
|
87
|
+
|
|
88
|
+
suffix = file_path.suffix.lower()
|
|
89
|
+
|
|
90
|
+
if suffix == ".json":
|
|
91
|
+
payload = json.loads(file_path.read_text(
|
|
92
|
+
encoding="utf-8", errors="replace"))
|
|
93
|
+
if isinstance(payload, dict) and "best_params" in payload:
|
|
94
|
+
payload = payload.get("best_params") or {}
|
|
95
|
+
if not isinstance(payload, dict):
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"Invalid JSON params file (expect dict): {file_path}")
|
|
98
|
+
return IOUtils._sanitize_params_dict(dict(payload))
|
|
99
|
+
|
|
100
|
+
if suffix in (".csv", ".tsv"):
|
|
101
|
+
df = pd.read_csv(file_path, sep="\t" if suffix == ".tsv" else ",")
|
|
102
|
+
if df.empty:
|
|
103
|
+
raise ValueError(f"Empty params file: {file_path}")
|
|
104
|
+
params = df.iloc[0].to_dict()
|
|
105
|
+
return IOUtils._sanitize_params_dict(params)
|
|
106
|
+
|
|
107
|
+
raise ValueError(
|
|
108
|
+
f"Unsupported params file type '{suffix}': {file_path}")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# Backward compatibility function wrapper
|
|
112
|
+
def csv_to_dict(file_path: str) -> List[Dict[str, Any]]:
|
|
113
|
+
"""Load CSV file as list of dictionaries (legacy function).
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
file_path: Path to CSV file
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
List of dictionaries, one per row
|
|
120
|
+
"""
|
|
121
|
+
return IOUtils.csv_to_dict(file_path)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def ensure_parent_dir(file_path: str) -> None:
|
|
125
|
+
"""Create parent directories when missing (legacy function)."""
|
|
126
|
+
IOUtils.ensure_parent_dir(file_path)
|