ins-pricing 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ins_pricing/cli/BayesOpt_entry.py +32 -0
  2. ins_pricing/cli/utils/import_resolver.py +29 -3
  3. ins_pricing/cli/utils/notebook_utils.py +3 -2
  4. ins_pricing/docs/modelling/BayesOpt_USAGE.md +3 -3
  5. ins_pricing/modelling/core/bayesopt/__init__.py +4 -0
  6. ins_pricing/modelling/core/bayesopt/config_preprocess.py +12 -0
  7. ins_pricing/modelling/core/bayesopt/core.py +21 -8
  8. ins_pricing/modelling/core/bayesopt/models/model_ft_components.py +38 -12
  9. ins_pricing/modelling/core/bayesopt/models/model_ft_trainer.py +16 -6
  10. ins_pricing/modelling/core/bayesopt/models/model_gnn.py +16 -6
  11. ins_pricing/modelling/core/bayesopt/models/model_resn.py +16 -7
  12. ins_pricing/modelling/core/bayesopt/trainers/trainer_base.py +2 -0
  13. ins_pricing/modelling/core/bayesopt/trainers/trainer_ft.py +25 -8
  14. ins_pricing/modelling/core/bayesopt/trainers/trainer_glm.py +14 -11
  15. ins_pricing/modelling/core/bayesopt/trainers/trainer_gnn.py +29 -10
  16. ins_pricing/modelling/core/bayesopt/trainers/trainer_resn.py +28 -12
  17. ins_pricing/modelling/core/bayesopt/trainers/trainer_xgb.py +13 -14
  18. ins_pricing/modelling/core/bayesopt/utils/losses.py +129 -0
  19. ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +18 -3
  20. ins_pricing/modelling/core/bayesopt/utils/torch_trainer_mixin.py +24 -3
  21. ins_pricing/production/predict.py +38 -9
  22. ins_pricing/setup.py +1 -1
  23. ins_pricing/utils/metrics.py +27 -3
  24. ins_pricing/utils/torch_compat.py +40 -0
  25. {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/METADATA +162 -162
  26. {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/RECORD +28 -27
  27. {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/WHEEL +0 -0
  28. {ins_pricing-0.3.2.dist-info → ins_pricing-0.3.4.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,8 @@ The main implementation lives in bayesopt_entry_runner.py.
6
6
  from __future__ import annotations
7
7
 
8
8
  from pathlib import Path
9
+ import json
10
+ import os
9
11
  import sys
10
12
 
11
13
  if __package__ in {None, ""}:
@@ -13,6 +15,36 @@ if __package__ in {None, ""}:
13
15
  if str(repo_root) not in sys.path:
14
16
  sys.path.insert(0, str(repo_root))
15
17
 
18
+ def _apply_env_from_config(argv: list[str]) -> None:
19
+ if "--config-json" not in argv:
20
+ return
21
+ idx = argv.index("--config-json")
22
+ if idx + 1 >= len(argv):
23
+ return
24
+ raw_path = argv[idx + 1]
25
+ try:
26
+ cfg_path = Path(raw_path).expanduser()
27
+ if not cfg_path.is_absolute():
28
+ cfg_path = cfg_path.resolve()
29
+ if not cfg_path.exists():
30
+ script_dir = Path(__file__).resolve().parents[1]
31
+ candidate = (script_dir / raw_path).resolve()
32
+ if candidate.exists():
33
+ cfg_path = candidate
34
+ if not cfg_path.exists():
35
+ return
36
+ cfg = json.loads(cfg_path.read_text(encoding="utf-8", errors="replace"))
37
+ env = cfg.get("env", {})
38
+ if isinstance(env, dict):
39
+ for key, value in env.items():
40
+ if key is None:
41
+ continue
42
+ os.environ.setdefault(str(key), str(value))
43
+ except Exception:
44
+ return
45
+
46
+ _apply_env_from_config(sys.argv)
47
+
16
48
  try:
17
49
  from .bayesopt_entry_runner import main
18
50
  except Exception: # pragma: no cover
@@ -13,6 +13,7 @@ Usage:
13
13
  from __future__ import annotations
14
14
 
15
15
  import importlib
16
+ import os
16
17
  import sys
17
18
  from dataclasses import dataclass, field
18
19
  from pathlib import Path
@@ -70,14 +71,39 @@ class ResolvedImports:
70
71
  plot_loss_curve: Optional[Callable] = None
71
72
 
72
73
 
74
+ def _debug_imports_enabled() -> bool:
75
+ value = os.environ.get("BAYESOPT_DEBUG_IMPORTS")
76
+ if value is None:
77
+ return False
78
+ return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
79
+
80
+
73
81
  def _try_import(module_path: str, attr_name: Optional[str] = None) -> Optional[Any]:
74
82
  """Attempt to import a module or attribute, returning None on failure."""
75
83
  try:
76
84
  module = importlib.import_module(module_path)
77
85
  if attr_name:
78
- return getattr(module, attr_name, None)
79
- return module
80
- except Exception:
86
+ result = getattr(module, attr_name, None)
87
+ else:
88
+ result = module
89
+ if _debug_imports_enabled():
90
+ origin = getattr(module, "__file__", None)
91
+ origin = origin or getattr(module, "__path__", None)
92
+ print(
93
+ f"[BAYESOPT_DEBUG_IMPORTS] imported {module_path}"
94
+ f"{'::' + attr_name if attr_name else ''} from {origin}",
95
+ file=sys.stderr,
96
+ flush=True,
97
+ )
98
+ return result
99
+ except Exception as exc:
100
+ if _debug_imports_enabled():
101
+ print(
102
+ f"[BAYESOPT_DEBUG_IMPORTS] failed import {module_path}"
103
+ f"{'::' + attr_name if attr_name else ''}: {exc.__class__.__name__}: {exc}",
104
+ file=sys.stderr,
105
+ flush=True,
106
+ )
81
107
  return None
82
108
 
83
109
 
@@ -8,9 +8,9 @@ from pathlib import Path
8
8
  from typing import Iterable, List, Optional, Sequence, cast
9
9
 
10
10
  try:
11
- from .cli_config import add_config_json_arg # type: ignore
11
+ from .cli_config import add_config_json_arg, set_env # type: ignore
12
12
  except Exception: # pragma: no cover
13
- from cli_config import add_config_json_arg # type: ignore
13
+ from cli_config import add_config_json_arg, set_env # type: ignore
14
14
 
15
15
 
16
16
  def _find_ins_pricing_dir(cwd: Optional[Path] = None) -> Path:
@@ -261,6 +261,7 @@ def run_from_config(config_json: str | Path) -> subprocess.CompletedProcess:
261
261
  if not config_path.is_absolute():
262
262
  config_path = (pkg_dir / config_path).resolve() if (pkg_dir / config_path).exists() else config_path.resolve()
263
263
  raw = json.loads(config_path.read_text(encoding="utf-8", errors="replace"))
264
+ set_env(raw.get("env", {}))
264
265
  runner = cast(dict, raw.get("runner") or {})
265
266
 
266
267
  mode = str(runner.get("mode") or "entry").strip().lower()
@@ -75,13 +75,13 @@ Under `ins_pricing/modelling/core/bayesopt/`:
75
75
 
76
76
  1) **Tools and utilities**
77
77
 
78
- - `IOUtils / TrainingUtils / PlotUtils`: I/O, training utilities (batch size, tweedie loss, free_cuda), plotting helpers
78
+ - `IOUtils / TrainingUtils / PlotUtils`: I/O, training utilities (batch size, loss functions, free_cuda), plotting helpers
79
79
  - `DistributedUtils`: DDP init, rank/world_size helpers
80
80
 
81
81
  2) **TorchTrainerMixin (common components for torch tabular training)**
82
82
 
83
83
  - DataLoader: `_build_dataloader()` / `_build_val_dataloader()` (prints batch/accum/workers)
84
- - Loss: `_compute_losses()` / `_compute_weighted_loss()` (regression uses tweedie; classification uses BCEWithLogits)
84
+ - Loss: `_compute_losses()` / `_compute_weighted_loss()` (regression supports tweedie/poisson/gamma/mse/mae; classification uses BCEWithLogits)
85
85
  - Early stop: `_early_stop_update()`
86
86
 
87
87
  3) **Sklearn-style model classes (core training objects)**
@@ -292,7 +292,7 @@ FT role is controlled by `ft_role` (from config or CLI `--ft-role`):
292
292
  ### 4.1 Supervised models (GLM/XGB/ResNet/FT-as-model)
293
293
 
294
294
  - `TrainerBase.tune()` calls each trainer's `cross_val()` and minimizes validation metric (default direction `minimize`)
295
- - Regression typically uses Tweedie deviance or related loss; classification uses logloss
295
+ - Regression loss is configurable (tweedie/poisson/gamma/mse/mae); classification uses logloss
296
296
 
297
297
  ### 4.2 FT self-supervised (`unsupervised_embedding`)
298
298
 
@@ -4,6 +4,10 @@ from __future__ import annotations
4
4
 
5
5
  import torch
6
6
 
7
+ from ins_pricing.utils.torch_compat import disable_torch_dynamo_if_requested
8
+
9
+ disable_torch_dynamo_if_requested()
10
+
7
11
  from .config_preprocess import (
8
12
  BayesOptConfig,
9
13
  DatasetPreprocessor,
@@ -12,6 +12,7 @@ import pandas as pd
12
12
  from sklearn.preprocessing import StandardScaler
13
13
 
14
14
  from .utils import IOUtils
15
+ from .utils.losses import normalize_loss_name
15
16
  from ....exceptions import ConfigurationError, DataValidationError
16
17
 
17
18
  # NOTE: Some CSV exports may contain invisible BOM characters or leading/trailing
@@ -81,6 +82,7 @@ class BayesOptConfig:
81
82
  task_type: Either 'regression' or 'classification'
82
83
  binary_resp_nme: Column name for binary response (optional)
83
84
  cate_list: List of categorical feature column names
85
+ loss_name: Regression loss ('auto', 'tweedie', 'poisson', 'gamma', 'mse', 'mae')
84
86
  prop_test: Proportion of data for validation (0.0-1.0)
85
87
  rand_seed: Random seed for reproducibility
86
88
  epochs: Number of training epochs
@@ -117,6 +119,7 @@ class BayesOptConfig:
117
119
  task_type: str = 'regression'
118
120
  binary_resp_nme: Optional[str] = None
119
121
  cate_list: Optional[List[str]] = None
122
+ loss_name: str = "auto"
120
123
 
121
124
  # Training configuration
122
125
  prop_test: float = 0.25
@@ -207,6 +210,15 @@ class BayesOptConfig:
207
210
  errors.append(
208
211
  f"task_type must be one of {valid_task_types}, got '{self.task_type}'"
209
212
  )
213
+ # Validate loss_name
214
+ try:
215
+ normalized_loss = normalize_loss_name(self.loss_name, self.task_type)
216
+ if self.task_type == "classification" and normalized_loss not in {"auto", "logloss", "bce"}:
217
+ errors.append(
218
+ "loss_name must be 'auto', 'logloss', or 'bce' for classification tasks."
219
+ )
220
+ except ValueError as exc:
221
+ errors.append(str(exc))
210
222
 
211
223
  # Validate prop_test
212
224
  if not 0.0 < self.prop_test < 1.0:
@@ -17,6 +17,12 @@ from .model_plotting_mixin import BayesOptPlottingMixin
17
17
  from .models import GraphNeuralNetSklearn
18
18
  from .trainers import FTTrainer, GLMTrainer, GNNTrainer, ResNetTrainer, XGBTrainer
19
19
  from .utils import EPS, infer_factor_and_cate_list, set_global_seed
20
+ from .utils.losses import (
21
+ infer_loss_name_from_model_name,
22
+ normalize_loss_name,
23
+ resolve_tweedie_power,
24
+ resolve_xgb_objective,
25
+ )
20
26
 
21
27
 
22
28
  class _CVSplitter:
@@ -293,6 +299,14 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
293
299
  self.config = cfg
294
300
  self.model_nme = cfg.model_nme
295
301
  self.task_type = cfg.task_type
302
+ normalized_loss = normalize_loss_name(getattr(cfg, "loss_name", None), self.task_type)
303
+ if self.task_type == "classification":
304
+ self.loss_name = "logloss" if normalized_loss == "auto" else normalized_loss
305
+ else:
306
+ if normalized_loss == "auto":
307
+ self.loss_name = infer_loss_name_from_model_name(self.model_nme)
308
+ else:
309
+ self.loss_name = normalized_loss
296
310
  self.resp_nme = cfg.resp_nme
297
311
  self.weight_nme = cfg.weight_nme
298
312
  self.factor_nmes = cfg.factor_nmes
@@ -339,14 +353,7 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
339
353
  if self.task_type == 'classification':
340
354
  self.obj = 'binary:logistic'
341
355
  else: # regression task
342
- if 'f' in self.model_nme:
343
- self.obj = 'count:poisson'
344
- elif 's' in self.model_nme:
345
- self.obj = 'reg:gamma'
346
- elif 'bc' in self.model_nme:
347
- self.obj = 'reg:tweedie'
348
- else:
349
- self.obj = 'reg:tweedie'
356
+ self.obj = resolve_xgb_objective(self.loss_name)
350
357
  self.fit_params = {
351
358
  'sample_weight': self.train_data[self.weight_nme].values
352
359
  }
@@ -426,6 +433,11 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
426
433
  def default_tweedie_power(self, obj: Optional[str] = None) -> Optional[float]:
427
434
  if self.task_type == 'classification':
428
435
  return None
436
+ loss_name = getattr(self, "loss_name", None)
437
+ if loss_name:
438
+ resolved = resolve_tweedie_power(str(loss_name), default=1.5)
439
+ if resolved is not None:
440
+ return resolved
429
441
  objective = obj or getattr(self, "obj", None)
430
442
  if objective == 'count:poisson':
431
443
  return 1.0
@@ -503,6 +515,7 @@ class BayesOptModel(BayesOptPlottingMixin, BayesOptExplainMixin):
503
515
  patience=5,
504
516
  task_type=self.task_type,
505
517
  tweedie_power=tw_power,
518
+ loss_name=self.loss_name,
506
519
  use_data_parallel=False,
507
520
  use_ddp=False,
508
521
  use_approx_knn=self.config.gnn_use_approx_knn,
@@ -106,31 +106,58 @@ class ScaledTransformerEncoderLayer(nn.Module):
106
106
  self.res_scale_attn = residual_scale_attn
107
107
  self.res_scale_ffn = residual_scale_ffn
108
108
 
109
- def forward(self, src, src_mask=None, src_key_padding_mask=None):
109
+ def forward(self, src, src_mask=None, src_key_padding_mask=None, is_causal: Optional[bool] = None, **_kwargs):
110
110
  # Input tensor shape: (batch, seq_len, d_model).
111
111
  x = src
112
112
 
113
113
  if self.norm_first:
114
114
  # Pre-norm before attention.
115
- x = x + self._sa_block(self.norm1(x), src_mask,
116
- src_key_padding_mask)
115
+ x = x + self._sa_block(
116
+ self.norm1(x),
117
+ src_mask,
118
+ src_key_padding_mask,
119
+ is_causal=is_causal,
120
+ )
117
121
  x = x + self._ff_block(self.norm2(x))
118
122
  else:
119
123
  # Post-norm (usually disabled).
120
124
  x = self.norm1(
121
- x + self._sa_block(x, src_mask, src_key_padding_mask))
125
+ x + self._sa_block(
126
+ x,
127
+ src_mask,
128
+ src_key_padding_mask,
129
+ is_causal=is_causal,
130
+ )
131
+ )
122
132
  x = self.norm2(x + self._ff_block(x))
123
133
 
124
134
  return x
125
135
 
126
- def _sa_block(self, x, attn_mask, key_padding_mask):
136
+ def _sa_block(self, x, attn_mask, key_padding_mask, *, is_causal: Optional[bool] = None):
127
137
  # Self-attention with residual scaling.
128
- attn_out, _ = self.self_attn(
129
- x, x, x,
130
- attn_mask=attn_mask,
131
- key_padding_mask=key_padding_mask,
132
- need_weights=False
133
- )
138
+ if is_causal is None:
139
+ attn_out, _ = self.self_attn(
140
+ x, x, x,
141
+ attn_mask=attn_mask,
142
+ key_padding_mask=key_padding_mask,
143
+ need_weights=False,
144
+ )
145
+ else:
146
+ try:
147
+ attn_out, _ = self.self_attn(
148
+ x, x, x,
149
+ attn_mask=attn_mask,
150
+ key_padding_mask=key_padding_mask,
151
+ need_weights=False,
152
+ is_causal=is_causal,
153
+ )
154
+ except TypeError:
155
+ attn_out, _ = self.self_attn(
156
+ x, x, x,
157
+ attn_mask=attn_mask,
158
+ key_padding_mask=key_padding_mask,
159
+ need_weights=False,
160
+ )
134
161
  return self.res_scale_attn * self.dropout1(attn_out)
135
162
 
136
163
  def _ff_block(self, x):
@@ -313,4 +340,3 @@ class MaskedTabularDataset(Dataset):
313
340
  None if self.X_cat_true is None else self.X_cat_true[idx],
314
341
  None if self.cat_mask is None else self.cat_mask[idx],
315
342
  )
316
-
@@ -16,6 +16,11 @@ from torch.nn.parallel import DistributedDataParallel as DDP
16
16
  from torch.nn.utils import clip_grad_norm_
17
17
 
18
18
  from ..utils import DistributedUtils, EPS, TorchTrainerMixin
19
+ from ..utils.losses import (
20
+ infer_loss_name_from_model_name,
21
+ normalize_loss_name,
22
+ resolve_tweedie_power,
23
+ )
19
24
  from .model_ft_components import FTTransformerCore, MaskedTabularDataset, TabularDataset
20
25
 
21
26
 
@@ -159,7 +164,8 @@ class FTTransformerSklearn(TorchTrainerMixin, nn.Module):
159
164
  weight_decay: float = 0.0,
160
165
  use_data_parallel: bool = True,
161
166
  use_ddp: bool = False,
162
- num_numeric_tokens: Optional[int] = None
167
+ num_numeric_tokens: Optional[int] = None,
168
+ loss_name: Optional[str] = None
163
169
  ):
164
170
  super().__init__()
165
171
 
@@ -187,14 +193,18 @@ class FTTransformerSklearn(TorchTrainerMixin, nn.Module):
187
193
  self.weight_decay = weight_decay
188
194
  self.task_type = task_type
189
195
  self.patience = patience
196
+ resolved_loss = normalize_loss_name(loss_name, self.task_type)
190
197
  if self.task_type == 'classification':
198
+ self.loss_name = "logloss"
191
199
  self.tw_power = None # No Tweedie power for classification.
192
- elif 'f' in self.model_nme:
193
- self.tw_power = 1.0
194
- elif 's' in self.model_nme:
195
- self.tw_power = 2.0
196
200
  else:
197
- self.tw_power = tweedie_power
201
+ if resolved_loss == "auto":
202
+ resolved_loss = infer_loss_name_from_model_name(self.model_nme)
203
+ self.loss_name = resolved_loss
204
+ if self.loss_name == "tweedie":
205
+ self.tw_power = float(tweedie_power) if tweedie_power is not None else 1.5
206
+ else:
207
+ self.tw_power = resolve_tweedie_power(self.loss_name, default=1.5)
198
208
 
199
209
  if self.is_ddp_enabled:
200
210
  self.device = torch.device(f"cuda:{self.local_rank}")
@@ -17,6 +17,11 @@ from torch.nn.parallel import DistributedDataParallel as DDP
17
17
  from torch.nn.utils import clip_grad_norm_
18
18
 
19
19
  from ..utils import DistributedUtils, EPS, IOUtils, TorchTrainerMixin
20
+ from ..utils.losses import (
21
+ infer_loss_name_from_model_name,
22
+ normalize_loss_name,
23
+ resolve_tweedie_power,
24
+ )
20
25
 
21
26
  try:
22
27
  from torch_geometric.nn import knn_graph
@@ -109,7 +114,8 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
109
114
  max_gpu_knn_nodes: Optional[int] = None,
110
115
  knn_gpu_mem_ratio: float = 0.9,
111
116
  knn_gpu_mem_overhead: float = 2.0,
112
- knn_cpu_jobs: Optional[int] = -1) -> None:
117
+ knn_cpu_jobs: Optional[int] = -1,
118
+ loss_name: Optional[str] = None) -> None:
113
119
  super().__init__()
114
120
  self.model_nme = model_nme
115
121
  self.input_dim = input_dim
@@ -139,14 +145,18 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
139
145
  self._adj_cache_key: Optional[Tuple[Any, ...]] = None
140
146
  self._adj_cache_tensor: Optional[torch.Tensor] = None
141
147
 
148
+ resolved_loss = normalize_loss_name(loss_name, self.task_type)
142
149
  if self.task_type == 'classification':
150
+ self.loss_name = "logloss"
143
151
  self.tw_power = None
144
- elif 'f' in self.model_nme:
145
- self.tw_power = 1.0
146
- elif 's' in self.model_nme:
147
- self.tw_power = 2.0
148
152
  else:
149
- self.tw_power = tweedie_power
153
+ if resolved_loss == "auto":
154
+ resolved_loss = infer_loss_name_from_model_name(self.model_nme)
155
+ self.loss_name = resolved_loss
156
+ if self.loss_name == "tweedie":
157
+ self.tw_power = float(tweedie_power) if tweedie_power is not None else 1.5
158
+ else:
159
+ self.tw_power = resolve_tweedie_power(self.loss_name, default=1.5)
150
160
 
151
161
  self.ddp_enabled = False
152
162
  self.local_rank = int(os.environ.get("LOCAL_RANK", 0))
@@ -12,6 +12,11 @@ from torch.nn.utils import clip_grad_norm_
12
12
  from torch.utils.data import TensorDataset
13
13
 
14
14
  from ..utils import DistributedUtils, EPS, TorchTrainerMixin
15
+ from ..utils.losses import (
16
+ infer_loss_name_from_model_name,
17
+ normalize_loss_name,
18
+ resolve_tweedie_power,
19
+ )
15
20
 
16
21
 
17
22
  # =============================================================================
@@ -140,7 +145,8 @@ class ResNetSklearn(TorchTrainerMixin, nn.Module):
140
145
  stochastic_depth: float = 0.0,
141
146
  weight_decay: float = 1e-4,
142
147
  use_data_parallel: bool = True,
143
- use_ddp: bool = False):
148
+ use_ddp: bool = False,
149
+ loss_name: Optional[str] = None):
144
150
  super(ResNetSklearn, self).__init__()
145
151
 
146
152
  self.use_ddp = use_ddp
@@ -179,15 +185,18 @@ class ResNetSklearn(TorchTrainerMixin, nn.Module):
179
185
  else:
180
186
  self.device = torch.device('cpu')
181
187
 
182
- # Tweedie power (unused for classification)
188
+ resolved_loss = normalize_loss_name(loss_name, self.task_type)
183
189
  if self.task_type == 'classification':
190
+ self.loss_name = "logloss"
184
191
  self.tw_power = None
185
- elif 'f' in self.model_nme:
186
- self.tw_power = 1
187
- elif 's' in self.model_nme:
188
- self.tw_power = 2
189
192
  else:
190
- self.tw_power = tweedie_power
193
+ if resolved_loss == "auto":
194
+ resolved_loss = infer_loss_name_from_model_name(self.model_nme)
195
+ self.loss_name = resolved_loss
196
+ if self.loss_name == "tweedie":
197
+ self.tw_power = float(tweedie_power) if tweedie_power is not None else 1.5
198
+ else:
199
+ self.tw_power = resolve_tweedie_power(self.loss_name, default=1.5)
191
200
 
192
201
  # Build network (construct on CPU first)
193
202
  core = ResNetSequential(
@@ -578,6 +578,7 @@ class TrainerBase:
578
578
  "n_layers": getattr(self.model, "n_layers", 4),
579
579
  "dropout": getattr(self.model, "dropout", 0.1),
580
580
  "task_type": getattr(self.model, "task_type", "regression"),
581
+ "loss_name": getattr(self.model, "loss_name", None),
581
582
  "tw_power": getattr(self.model, "tw_power", 1.5),
582
583
  "num_geo": getattr(self.model, "num_geo", 0),
583
584
  "num_numeric_tokens": getattr(self.model, "num_numeric_tokens", None),
@@ -638,6 +639,7 @@ class TrainerBase:
638
639
  n_layers=model_config.get("n_layers", 4),
639
640
  dropout=model_config.get("dropout", 0.1),
640
641
  task_type=model_config.get("task_type", "regression"),
642
+ loss_name=model_config.get("loss_name", None),
641
643
  tweedie_power=model_config.get("tw_power", 1.5),
642
644
  num_numeric_tokens=model_config.get("num_numeric_tokens"),
643
645
  use_data_parallel=False,
@@ -5,11 +5,12 @@ from typing import Any, Dict, List, Optional, Tuple
5
5
  import numpy as np
6
6
  import optuna
7
7
  import pandas as pd
8
- from sklearn.metrics import log_loss, mean_tweedie_deviance
8
+ from sklearn.metrics import log_loss
9
9
  from sklearn.model_selection import GroupKFold, TimeSeriesSplit
10
10
 
11
11
  from .trainer_base import TrainerBase
12
12
  from ..models import FTTransformerSklearn
13
+ from ..utils.losses import regression_loss
13
14
 
14
15
  class FTTrainer(TrainerBase):
15
16
  def __init__(self, context: "BayesOptModel") -> None:
@@ -67,6 +68,7 @@ class FTTrainer(TrainerBase):
67
68
 
68
69
  def cross_val_unsupervised(self, trial: Optional[optuna.trial.Trial]) -> float:
69
70
  """Optuna objective A: minimize validation loss for masked reconstruction."""
71
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
70
72
  param_space: Dict[str, Callable[[optuna.trial.Trial], Any]] = {
71
73
  "learning_rate": lambda t: t.suggest_float('learning_rate', 1e-5, 5e-3, log=True),
72
74
  "d_model": lambda t: t.suggest_int('d_model', 16, 128, step=16),
@@ -159,6 +161,7 @@ class FTTrainer(TrainerBase):
159
161
  use_data_parallel=self.ctx.config.use_ft_data_parallel,
160
162
  use_ddp=self.ctx.config.use_ft_ddp,
161
163
  num_numeric_tokens=num_numeric_tokens,
164
+ loss_name=loss_name,
162
165
  )
163
166
  model.set_params(model_params)
164
167
  try:
@@ -191,7 +194,8 @@ class FTTrainer(TrainerBase):
191
194
  "dropout": lambda t: t.suggest_float('dropout', 0.0, 0.2),
192
195
  "weight_decay": lambda t: t.suggest_float('weight_decay', 1e-6, 1e-2, log=True),
193
196
  }
194
- if self.ctx.task_type == 'regression' and self.ctx.obj == 'reg:tweedie':
197
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
198
+ if self.ctx.task_type == 'regression' and loss_name == 'tweedie':
195
199
  param_space["tw_power"] = lambda t: t.suggest_float(
196
200
  'tw_power', 1.0, 2.0)
197
201
  geo_enabled = bool(
@@ -231,10 +235,12 @@ class FTTrainer(TrainerBase):
231
235
  tw_power = params.get("tw_power")
232
236
  if self.ctx.task_type == 'regression':
233
237
  base_tw = self.ctx.default_tweedie_power()
234
- if self.ctx.obj in ('count:poisson', 'reg:gamma'):
235
- tw_power = base_tw
236
- elif tw_power is None:
238
+ if loss_name == "tweedie":
239
+ tw_power = base_tw if tw_power is None else tw_power
240
+ elif loss_name in ("poisson", "gamma"):
237
241
  tw_power = base_tw
242
+ else:
243
+ tw_power = None
238
244
  metric_ctx["tw_power"] = tw_power
239
245
 
240
246
  adaptive_heads, _ = self._resolve_adaptive_heads(
@@ -259,6 +265,7 @@ class FTTrainer(TrainerBase):
259
265
  use_data_parallel=self.ctx.config.use_ft_data_parallel,
260
266
  use_ddp=self.ctx.config.use_ft_ddp,
261
267
  num_numeric_tokens=num_numeric_tokens,
268
+ loss_name=loss_name,
262
269
  ).set_params({"_geo_params": geo_params_local} if geo_enabled else {})
263
270
 
264
271
  def fit_predict(model, X_train, y_train, w_train, X_val, y_val, w_val, trial_obj):
@@ -286,11 +293,12 @@ class FTTrainer(TrainerBase):
286
293
 
287
294
  def metric_fn(y_true, y_pred, weight):
288
295
  if self.ctx.task_type == 'regression':
289
- return mean_tweedie_deviance(
296
+ return regression_loss(
290
297
  y_true,
291
298
  y_pred,
292
- sample_weight=weight,
293
- power=metric_ctx.get("tw_power", 1.5)
299
+ weight,
300
+ loss_name=loss_name,
301
+ tweedie_power=metric_ctx.get("tw_power", 1.5),
294
302
  )
295
303
  return log_loss(y_true, y_pred, sample_weight=weight)
296
304
 
@@ -313,6 +321,7 @@ class FTTrainer(TrainerBase):
313
321
  def train(self) -> None:
314
322
  if not self.best_params:
315
323
  raise RuntimeError("Run tune() first to obtain best FT-Transformer parameters.")
324
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
316
325
  resolved_params = dict(self.best_params)
317
326
  d_model_value = resolved_params.get("d_model", 64)
318
327
  adaptive_heads, heads_adjusted = self._resolve_adaptive_heads(
@@ -342,6 +351,7 @@ class FTTrainer(TrainerBase):
342
351
  use_ddp=self.ctx.config.use_ft_ddp,
343
352
  num_numeric_tokens=self._resolve_numeric_tokens(),
344
353
  weight_decay=float(resolved_params.get("weight_decay", 0.0)),
354
+ loss_name=loss_name,
345
355
  )
346
356
  tmp_model.set_params(resolved_params)
347
357
  geo_train_full = self.ctx.train_geo_tokens
@@ -375,6 +385,7 @@ class FTTrainer(TrainerBase):
375
385
  use_ddp=self.ctx.config.use_ft_ddp,
376
386
  num_numeric_tokens=self._resolve_numeric_tokens(),
377
387
  weight_decay=float(resolved_params.get("weight_decay", 0.0)),
388
+ loss_name=loss_name,
378
389
  )
379
390
  if refit_epochs is not None:
380
391
  self.model.epochs = int(refit_epochs)
@@ -408,6 +419,7 @@ class FTTrainer(TrainerBase):
408
419
  def ensemble_predict(self, k: int) -> None:
409
420
  if not self.best_params:
410
421
  raise RuntimeError("Run tune() first to obtain best FT-Transformer parameters.")
422
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
411
423
  k = max(2, int(k))
412
424
  X_all = self.ctx.train_data[self.ctx.factor_nmes]
413
425
  y_all = self.ctx.train_data[self.ctx.resp_nme]
@@ -446,6 +458,7 @@ class FTTrainer(TrainerBase):
446
458
  use_ddp=self.ctx.config.use_ft_ddp,
447
459
  num_numeric_tokens=self._resolve_numeric_tokens(),
448
460
  weight_decay=float(resolved_params.get("weight_decay", 0.0)),
461
+ loss_name=loss_name,
449
462
  )
450
463
  model.set_params(resolved_params)
451
464
 
@@ -541,6 +554,7 @@ class FTTrainer(TrainerBase):
541
554
  return splitter, None, oof_folds
542
555
 
543
556
  def _build_ft_feature_model(self, resolved_params: Dict[str, Any]) -> FTTransformerSklearn:
557
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
544
558
  model = FTTransformerSklearn(
545
559
  model_nme=self.ctx.model_nme,
546
560
  num_cols=self.ctx.num_features,
@@ -549,6 +563,7 @@ class FTTrainer(TrainerBase):
549
563
  use_data_parallel=self.ctx.config.use_ft_data_parallel,
550
564
  use_ddp=self.ctx.config.use_ft_ddp,
551
565
  num_numeric_tokens=self._resolve_numeric_tokens(),
566
+ loss_name=loss_name,
552
567
  )
553
568
  adaptive_heads, heads_adjusted = self._resolve_adaptive_heads(
554
569
  d_model=resolved_params.get("d_model", model.d_model),
@@ -702,6 +717,7 @@ class FTTrainer(TrainerBase):
702
717
  num_loss_weight: float = 1.0,
703
718
  cat_loss_weight: float = 1.0) -> None:
704
719
  """Self-supervised pretraining (masked reconstruction) and cache embeddings."""
720
+ loss_name = getattr(self.ctx, "loss_name", "tweedie")
705
721
  self.model = FTTransformerSklearn(
706
722
  model_nme=self.ctx.model_nme,
707
723
  num_cols=self.ctx.num_features,
@@ -710,6 +726,7 @@ class FTTrainer(TrainerBase):
710
726
  use_data_parallel=self.ctx.config.use_ft_data_parallel,
711
727
  use_ddp=self.ctx.config.use_ft_ddp,
712
728
  num_numeric_tokens=self._resolve_numeric_tokens(),
729
+ loss_name=loss_name,
713
730
  )
714
731
  resolved_params = dict(params or {})
715
732
  # Reuse supervised tuning structure params unless explicitly overridden.