PyPI - ins-pricing - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

ins-pricing 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

ins_pricing/cli/BayesOpt_entry.py +15 -5
ins_pricing/cli/BayesOpt_incremental.py +43 -10
ins_pricing/cli/Explain_Run.py +16 -5
ins_pricing/cli/Explain_entry.py +29 -8
ins_pricing/cli/Pricing_Run.py +16 -5
ins_pricing/cli/bayesopt_entry_runner.py +45 -12
ins_pricing/cli/utils/bootstrap.py +23 -0
ins_pricing/cli/utils/cli_config.py +34 -15
ins_pricing/cli/utils/import_resolver.py +14 -14
ins_pricing/cli/utils/notebook_utils.py +120 -106
ins_pricing/cli/watchdog_run.py +15 -5
ins_pricing/frontend/app.py +132 -61
ins_pricing/frontend/config_builder.py +33 -0
ins_pricing/frontend/example_config.json +11 -0
ins_pricing/frontend/runner.py +340 -388
ins_pricing/modelling/README.md +1 -1
ins_pricing/modelling/bayesopt/README.md +29 -11
ins_pricing/modelling/bayesopt/config_components.py +12 -0
ins_pricing/modelling/bayesopt/config_preprocess.py +50 -13
ins_pricing/modelling/bayesopt/core.py +47 -19
ins_pricing/modelling/bayesopt/model_plotting_mixin.py +20 -14
ins_pricing/modelling/bayesopt/models/model_ft_components.py +349 -342
ins_pricing/modelling/bayesopt/models/model_ft_trainer.py +11 -5
ins_pricing/modelling/bayesopt/models/model_gnn.py +20 -14
ins_pricing/modelling/bayesopt/models/model_resn.py +9 -3
ins_pricing/modelling/bayesopt/trainers/trainer_base.py +62 -50
ins_pricing/modelling/bayesopt/trainers/trainer_ft.py +61 -53
ins_pricing/modelling/bayesopt/trainers/trainer_glm.py +9 -3
ins_pricing/modelling/bayesopt/trainers/trainer_gnn.py +40 -32
ins_pricing/modelling/bayesopt/trainers/trainer_resn.py +36 -24
ins_pricing/modelling/bayesopt/trainers/trainer_xgb.py +240 -37
ins_pricing/modelling/bayesopt/utils/distributed_utils.py +193 -186
ins_pricing/modelling/bayesopt/utils/torch_trainer_mixin.py +23 -10
ins_pricing/pricing/factors.py +67 -56
ins_pricing/setup.py +1 -1
ins_pricing/utils/__init__.py +7 -6
ins_pricing/utils/device.py +45 -24
ins_pricing/utils/logging.py +34 -1
ins_pricing/utils/profiling.py +8 -4
{ins_pricing-0.5.0.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +182 -182
{ins_pricing-0.5.0.dist-info → ins_pricing-0.5.1.dist-info}/RECORD +43 -42
{ins_pricing-0.5.0.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
{ins_pricing-0.5.0.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0

ins_pricing/modelling/bayesopt/models/model_ft_trainer.py CHANGED Viewed

@@ -17,7 +17,7 @@ from torch.nn.utils import clip_grad_norm_
 from ins_pricing.modelling.bayesopt.utils.distributed_utils import DistributedUtils
 from ins_pricing.modelling.bayesopt.utils.torch_trainer_mixin import TorchTrainerMixin
-from ins_pricing.utils import EPS
+from ins_pricing.utils import EPS, get_logger, log_print
 from ins_pricing.utils.losses import (
     infer_loss_name_from_model_name,
     normalize_loss_name,
@@ -25,6 +25,12 @@ from ins_pricing.utils.losses import (
 )
 from ins_pricing.modelling.bayesopt.models.model_ft_components import FTTransformerCore, MaskedTabularDataset, TabularDataset
+_logger = get_logger("ins_pricing.modelling.bayesopt.models.model_ft_trainer")
+def _log(*args, **kwargs) -> None:
+    log_print(_logger, *args, **kwargs)
 # --- Helper functions for reconstruction loss computation ---
@@ -281,7 +287,7 @@ class FTTransformerSklearn(TorchTrainerMixin, nn.Module):
             self.use_data_parallel = False
         elif use_dp:
             if self.use_ddp and not self.is_ddp_enabled:
-                print(
+                _log(
                     ">>> DDP requested but not initialized; falling back to DataParallel.")
             core = nn.DataParallel(core, device_ids=list(
                 range(torch.cuda.device_count())))
@@ -699,15 +705,15 @@ class FTTransformerSklearn(TorchTrainerMixin, nn.Module):
                             should_log = (not dist.is_initialized()
                                           or DistributedUtils.is_main_process())
                             if should_log:
-                                print(msg, flush=True)
-                                print(
+                                _log(msg, flush=True)
+                                _log(
                                     f"  X_num: finite={bool(torch.isfinite(X_num_b).all())} "
                                     f"min={float(X_num_b.min().detach().cpu()) if X_num_b.numel() else 0.0:.3g} "
                                     f"max={float(X_num_b.max().detach().cpu()) if X_num_b.numel() else 0.0:.3g}",
                                     flush=True,
                                 )
                                 if X_geo_b is not None:
-                                    print(
+                                    _log(
                                         f"  X_geo: finite={bool(torch.isfinite(X_geo_b).all())} "
                                         f"min={float(X_geo_b.min().detach().cpu()) if X_geo_b.numel() else 0.0:.3g} "
                                         f"max={float(X_geo_b.max().detach().cpu()) if X_geo_b.numel() else 0.0:.3g}",

ins_pricing/modelling/bayesopt/models/model_gnn.py CHANGED Viewed

@@ -18,7 +18,7 @@ from torch.nn.utils import clip_grad_norm_
 from ins_pricing.modelling.bayesopt.utils.distributed_utils import DistributedUtils
 from ins_pricing.modelling.bayesopt.utils.torch_trainer_mixin import TorchTrainerMixin
-from ins_pricing.utils import EPS
+from ins_pricing.utils import EPS, get_logger, log_print
 from ins_pricing.utils.io import IOUtils
 from ins_pricing.utils.losses import (
     infer_loss_name_from_model_name,
@@ -45,6 +45,12 @@ except Exception:
 _GNN_MPS_WARNED = False
+_logger = get_logger("ins_pricing.modelling.bayesopt.models.model_gnn")
+def _log(*args, **kwargs) -> None:
+    log_print(_logger, *args, **kwargs)
 # =============================================================================
 # Simplified GNN implementation.
@@ -169,7 +175,7 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
         if use_ddp:
             world_size = int(os.environ.get("WORLD_SIZE", "1"))
             if world_size > 1:
-                print(
+                _log(
                     "[GNN] DDP training is not supported; falling back to single process.",
                     flush=True,
                 )
@@ -194,7 +200,7 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
             self.device = torch.device('mps')
             global _GNN_MPS_WARNED
             if not _GNN_MPS_WARNED:
-                print(
+                _log(
                     "[GNN] Using MPS backend; will fall back to CPU on unsupported ops.",
                     flush=True,
                 )
@@ -271,7 +277,7 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
         if self.device.type != "mps" or self._mps_fallback_triggered:
             return
         self._mps_fallback_triggered = True
-        print(f"[GNN] MPS op unsupported ({reason}); falling back to CPU.", flush=True)
+        _log(f"[GNN] MPS op unsupported ({reason}); falling back to CPU.", flush=True)
         self.device = torch.device("cpu")
         self.use_pyg_knn = False
         self.data_parallel_enabled = False
@@ -347,7 +353,7 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
             try:
                 payload = torch.load(self.graph_cache_path, map_location="cpu")
             except Exception as exc:
-                print(
+                _log(
                     f"[GNN] Failed to load cached graph from {self.graph_cache_path}: {exc}")
                 return None
             if isinstance(payload, dict) and "adj" in payload:
@@ -355,19 +361,19 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
                 if meta_cached == meta_expected:
                     adj = payload["adj"]
                     if self.device.type == "mps" and getattr(adj, "is_sparse", False):
-                        print(
+                        _log(
                             f"[GNN] Cached sparse graph incompatible with MPS; rebuilding: {self.graph_cache_path}"
                         )
                         return None
                     return adj.to(self.device)
-                print(
+                _log(
                     f"[GNN] Cached graph metadata mismatch; rebuilding: {self.graph_cache_path}")
                 return None
             if isinstance(payload, torch.Tensor):
-                print(
+                _log(
                     f"[GNN] Cached graph missing metadata; rebuilding: {self.graph_cache_path}")
                 return None
-            print(
+            _log(
                 f"[GNN] Invalid cached graph format; rebuilding: {self.graph_cache_path}")
         return None
@@ -387,7 +393,7 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
                 )
                 indices, _ = nn_index.neighbor_graph
             except Exception as exc:
-                print(
+                _log(
                     f"[GNN] Approximate kNN failed ({exc}); falling back to exact search.")
                 use_approx = False
@@ -440,7 +446,7 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
         if self._knn_warning_emitted:
             return
         if (not self.ddp_enabled) or self.local_rank == 0:
-            print(f"[GNN] Falling back to CPU kNN builder: {reason}")
+            _log(f"[GNN] Falling back to CPU kNN builder: {reason}")
         self._knn_warning_emitted = True
     def _should_use_gpu_knn(self, n_samples: int, X_tensor: torch.Tensor) -> bool:
@@ -592,7 +598,7 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
                 IOUtils.ensure_parent_dir(str(self.graph_cache_path))
                 torch.save({"adj": adj_norm.cpu(), "meta": meta_expected}, self.graph_cache_path)
             except Exception as exc:
-                print(
+                _log(
                     f"[GNN] Failed to cache graph to {self.graph_cache_path}: {exc}")
             self._adj_cache_meta = meta_expected
             self._adj_cache_key = None
@@ -712,12 +718,12 @@ class GraphNeuralNetSklearn(TorchTrainerMixin, nn.Module):
             if should_log:
                 elapsed = int(time.time() - epoch_start_ts)
                 if val_loss is None:
-                    print(
+                    _log(
                         f"[GNN] Epoch {epoch}/{self.epochs} loss={float(loss):.6f} elapsed={elapsed}s",
                         flush=True,
                     )
                 else:
-                    print(
+                    _log(
                         f"[GNN] Epoch {epoch}/{self.epochs} loss={float(loss):.6f} "
                         f"val_loss={float(val_loss):.6f} elapsed={elapsed}s",
                         flush=True,

ins_pricing/modelling/bayesopt/models/model_resn.py CHANGED Viewed

@@ -13,13 +13,19 @@ from torch.utils.data import TensorDataset
 from ins_pricing.modelling.bayesopt.utils.distributed_utils import DistributedUtils
 from ins_pricing.modelling.bayesopt.utils.torch_trainer_mixin import TorchTrainerMixin
-from ins_pricing.utils import EPS
+from ins_pricing.utils import EPS, get_logger, log_print
 from ins_pricing.utils.losses import (
     infer_loss_name_from_model_name,
     normalize_loss_name,
     resolve_tweedie_power,
 )
+_logger = get_logger("ins_pricing.modelling.bayesopt.models.model_resn")
+def _log(*args, **kwargs) -> None:
+    log_print(_logger, *args, **kwargs)
 # =============================================================================
 # ResNet model and sklearn-style wrapper
@@ -130,7 +136,7 @@ class ResNetSequential(nn.Module):
     def forward(self, x):
         if self.training and not hasattr(self, '_printed_device'):
-            print(f">>> ResNetSequential executing on device: {x.device}")
+            _log(f">>> ResNetSequential executing on device: {x.device}")
             self._printed_device = True
         return self.net(x)
@@ -220,7 +226,7 @@ class ResNetSklearn(TorchTrainerMixin, nn.Module):
             self.use_data_parallel = False
         elif use_data_parallel and (self.device.type == 'cuda') and (torch.cuda.device_count() > 1):
             if self.use_ddp and not self.is_ddp_enabled:
-                print(
+                _log(
                     ">>> DDP requested but not initialized; falling back to DataParallel.")
             core = nn.DataParallel(core, device_ids=list(
                 range(torch.cuda.device_count())))

ins_pricing/modelling/bayesopt/trainers/trainer_base.py CHANGED Viewed

@@ -26,12 +26,16 @@ from sklearn.preprocessing import StandardScaler
 from ins_pricing.modelling.bayesopt.config_preprocess import BayesOptConfig, OutputManager
 from ins_pricing.modelling.bayesopt.utils.distributed_utils import DistributedUtils
-from ins_pricing.utils import EPS, ensure_parent_dir, get_logger, GPUMemoryManager, DeviceManager
+from ins_pricing.utils import EPS, ensure_parent_dir, get_logger, GPUMemoryManager, DeviceManager, log_print
 from ins_pricing.utils.torch_compat import torch_load
 # Module-level logger
 _logger = get_logger("ins_pricing.trainer")
+def _log(*args, **kwargs) -> None:
+    log_print(_logger, *args, **kwargs)
 class _OrderSplitter:
     def __init__(self, splitter, order: np.ndarray) -> None:
         self._splitter = splitter
@@ -364,7 +368,7 @@ class TrainerBase:
             try:
                 rank = dist.get_rank()
                 world = dist.get_world_size()
-                print(f"[DDP][{self.label}] entering barrier({reason}) rank={rank}/{world}", flush=True)
+                _log(f"[DDP][{self.label}] entering barrier({reason}) rank={rank}/{world}", flush=True)
             except Exception:
                 debug_barrier = False
         try:
@@ -399,9 +403,9 @@ class TrainerBase:
                 else:
                     dist.barrier()
             if debug_barrier:
-                print(f"[DDP][{self.label}] exit barrier({reason}) rank={rank}/{world}", flush=True)
+                _log(f"[DDP][{self.label}] exit barrier({reason}) rank={rank}/{world}", flush=True)
         except Exception as exc:
-            print(
+            _log(
                 f"[DDP][{self.label}] barrier failed during {reason}: {exc}",
                 flush=True,
             )
@@ -433,12 +437,15 @@ class TrainerBase:
         ensure_parent_dir(str(path))
         return f"sqlite:///{path.as_posix()}"
-    def _resolve_optuna_study_name(self) -> str:
-        prefix = getattr(self.config, "optuna_study_prefix",
-                         None) or "bayesopt"
-        raw = f"{prefix}_{self.ctx.model_nme}_{self.model_name_prefix}"
-        safe = "".join([c if c.isalnum() or c in "._-" else "_" for c in raw])
-        return safe.lower()
+    def _resolve_optuna_study_name(self) -> str:
+        prefix = getattr(self.config, "optuna_study_prefix",
+                         None) or "bayesopt"
+        raw = f"{prefix}_{self.ctx.model_nme}_{self.model_name_prefix}"
+        safe = "".join([c if c.isalnum() or c in "._-" else "_" for c in raw])
+        return safe.lower()
+    def _optuna_cleanup_sync(self) -> bool:
+        return bool(getattr(self.config, "optuna_cleanup_synchronize", False))
     def tune(self, max_evals: int, objective_fn=None) -> None:
         # Generic Optuna tuning loop.
@@ -457,27 +464,27 @@ class TrainerBase:
             should_log = DistributedUtils.is_main_process()
             if should_log:
                 current_idx = progress_counter["count"] + 1
-                print(
+                _log(
                     f"[Optuna][{self.label}] Trial {current_idx}/{total_trials} started "
                     f"(trial_id={trial.number})."
                 )
             try:
                 result = objective_fn(trial)
-            except RuntimeError as exc:
-                if "out of memory" in str(exc).lower():
-                    print(
-                        f"[Optuna][{self.label}] OOM detected. Pruning trial and clearing CUDA cache."
-                    )
-                    self._clean_gpu()
-                    raise optuna.TrialPruned() from exc
-                raise
-            finally:
-                self._clean_gpu()
+            except RuntimeError as exc:
+                if "out of memory" in str(exc).lower():
+                    _log(
+                        f"[Optuna][{self.label}] OOM detected. Pruning trial and clearing CUDA cache."
+                    )
+                    self._clean_gpu(synchronize=True)
+                    raise optuna.TrialPruned() from exc
+                raise
+            finally:
+                self._clean_gpu(synchronize=self._optuna_cleanup_sync())
                 if should_log:
                     progress_counter["count"] = progress_counter["count"] + 1
                     trial_state = getattr(trial, "state", None)
                     state_repr = getattr(trial_state, "name", "OK")
-                    print(
+                    _log(
                         f"[Optuna][{self.label}] Trial {progress_counter['count']}/{total_trials} finished "
                         f"(status={state_repr})."
                     )
@@ -552,7 +559,7 @@ class TrainerBase:
     def save(self) -> None:
         if self.model is None:
-            print(f"[save] Warning: No model to save for {self.label}")
+            _log(f"[save] Warning: No model to save for {self.label}")
             return
         path = self.output.model_path(self._get_model_filename())
@@ -615,7 +622,7 @@ class TrainerBase:
     def load(self) -> None:
         path = self.output.model_path(self._get_model_filename())
         if not os.path.exists(path):
-            print(f"[load] Warning: Model file not found: {path}")
+            _log(f"[load] Warning: Model file not found: {path}")
             return
         if self.label in ['Xgboost', 'GLM']:
@@ -695,7 +702,7 @@ class TrainerBase:
                         self.model = loaded_model
                     else:
                         # Unknown format
-                        print(f"[load] Warning: Unknown model format in {path}")
+                        _log(f"[load] Warning: Unknown model format in {path}")
                 else:
                     # Very old format: direct model object
                     if loaded is not None:
@@ -749,14 +756,14 @@ class TrainerBase:
     def _distributed_worker_loop(self, objective_fn: Callable[[Optional[optuna.trial.Trial]], float]) -> None:
         if dist is None:
-            print(
+            _log(
                 f"[Optuna][Worker][{self.label}] torch.distributed unavailable. Worker exit.",
                 flush=True,
             )
             return
         DistributedUtils.setup_ddp()
         if not dist.is_initialized():
-            print(
+            _log(
                 f"[Optuna][Worker][{self.label}] DDP init failed. Worker exit.",
                 flush=True,
             )
@@ -783,16 +790,16 @@ class TrainerBase:
                 except optuna.TrialPruned:
                     pass
                 except Exception as exc:
-                    print(
+                    _log(
                         f"[Optuna][Worker][{self.label}] Exception: {exc}", flush=True)
-                finally:
-                    self._clean_gpu()
-                    # STEP 2 (DDP/Optuna): align worker with rank0 after objective_fn returns/raises.
-                    self._dist_barrier("worker_end")
+                finally:
+                    self._clean_gpu(synchronize=self._optuna_cleanup_sync())
+                    # STEP 2 (DDP/Optuna): align worker with rank0 after objective_fn returns/raises.
+                    self._dist_barrier("worker_end")
     def _distributed_tune(self, max_evals: int, objective_fn: Callable[[optuna.trial.Trial], float]) -> None:
         if dist is None:
-            print(
+            _log(
                 f"[Optuna][{self.label}] torch.distributed unavailable. Fallback to single-process.",
                 flush=True,
             )
@@ -807,12 +814,12 @@ class TrainerBase:
         if not dist.is_initialized():
             rank_env = os.environ.get("RANK", "0")
             if str(rank_env) != "0":
-                print(
+                _log(
                     f"[Optuna][{self.label}] DDP init failed on worker. Skip.",
                     flush=True,
                 )
                 return
-            print(
+            _log(
                 f"[Optuna][{self.label}] DDP init failed. Fallback to single-process.",
                 flush=True,
             )
@@ -834,27 +841,27 @@ class TrainerBase:
             should_log = True
             if should_log:
                 current_idx = progress_counter["count"] + 1
-                print(
+                _log(
                     f"[Optuna][{self.label}] Trial {current_idx}/{total_trials} started "
                     f"(trial_id={trial.number})."
                 )
             try:
                 result = objective_fn(trial)
-            except RuntimeError as exc:
-                if "out of memory" in str(exc).lower():
-                    print(
-                        f"[Optuna][{self.label}] OOM detected. Pruning trial and clearing CUDA cache."
-                    )
-                    self._clean_gpu()
-                    raise optuna.TrialPruned() from exc
-                raise
-            finally:
-                self._clean_gpu()
+            except RuntimeError as exc:
+                if "out of memory" in str(exc).lower():
+                    _log(
+                        f"[Optuna][{self.label}] OOM detected. Pruning trial and clearing CUDA cache."
+                    )
+                    self._clean_gpu(synchronize=True)
+                    raise optuna.TrialPruned() from exc
+                raise
+            finally:
+                self._clean_gpu(synchronize=self._optuna_cleanup_sync())
                 if should_log:
                     progress_counter["count"] = progress_counter["count"] + 1
                     trial_state = getattr(trial, "state", None)
                     state_repr = getattr(trial_state, "name", "OK")
-                    print(
+                    _log(
                         f"[Optuna][{self.label}] Trial {progress_counter['count']}/{total_trials} finished "
                         f"(status={state_repr})."
                     )
@@ -919,9 +926,14 @@ class TrainerBase:
             self._distributed_send_command(
                 {"type": "STOP", "best_params": self.best_params})
-    def _clean_gpu(self):
-        """Clean up GPU memory using shared GPUMemoryManager."""
-        GPUMemoryManager.clean()
+    def _clean_gpu(
+        self,
+        *,
+        synchronize: bool = True,
+        empty_cache: bool = True,
+    ) -> None:
+        """Clean up GPU memory using shared GPUMemoryManager."""
+        GPUMemoryManager.clean(synchronize=synchronize, empty_cache=empty_cache)
     def _standardize_fold(self,
                           X_train: pd.DataFrame,

ins_pricing/modelling/bayesopt/trainers/trainer_ft.py CHANGED Viewed

@@ -11,17 +11,33 @@ from sklearn.model_selection import GroupKFold, TimeSeriesSplit
 from ins_pricing.modelling.bayesopt.trainers.trainer_base import TrainerBase
 from ins_pricing.modelling.bayesopt.models import FTTransformerSklearn
 from ins_pricing.utils.losses import regression_loss
-class FTTrainer(TrainerBase):
-    def __init__(self, context: "BayesOptModel") -> None:
-        if context.task_type == 'classification':
-            super().__init__(context, 'FTTransformerClassifier', 'FTTransformer')
-        else:
-            super().__init__(context, 'FTTransformer', 'FTTransformer')
-        self.model: Optional[FTTransformerSklearn] = None
-        self.enable_distributed_optuna = bool(context.config.use_ft_ddp)
-        self._cv_geo_warned = False
+from ins_pricing.utils import get_logger, log_print
+_logger = get_logger("ins_pricing.trainer.ft")
+def _log(*args, **kwargs) -> None:
+    log_print(_logger, *args, **kwargs)
+class FTTrainer(TrainerBase):
+    def __init__(self, context: "BayesOptModel") -> None:
+        if context.task_type == 'classification':
+            super().__init__(context, 'FTTransformerClassifier', 'FTTransformer')
+        else:
+            super().__init__(context, 'FTTransformer', 'FTTransformer')
+        self.model: Optional[FTTransformerSklearn] = None
+        self.enable_distributed_optuna = bool(context.config.use_ft_ddp)
+        self._cv_geo_warned = False
+    def _maybe_cleanup_gpu(self, model: Optional[FTTransformerSklearn]) -> None:
+        if not bool(getattr(self.ctx.config, "ft_cleanup_per_fold", False)):
+            return
+        if model is not None:
+            getattr(getattr(model, "ft", None), "to",
+                    lambda *_args, **_kwargs: None)("cpu")
+        synchronize = bool(getattr(self.ctx.config, "ft_cleanup_synchronize", False))
+        self._clean_gpu(synchronize=synchronize)
     def _resolve_numeric_tokens(self) -> int:
         requested = getattr(self.ctx.config, "ft_num_numeric_tokens", None)
@@ -121,7 +137,7 @@ class FTTrainer(TrainerBase):
             if built is not None:
                 geo_train, geo_val, _, _ = built
             elif not self._cv_geo_warned:
-                print(
+                _log(
                     "[FTTrainer] Geo tokens unavailable for CV split; continue without geo tokens.",
                     flush=True,
                 )
@@ -168,22 +184,20 @@ class FTTrainer(TrainerBase):
         )
         model = self._apply_dataloader_overrides(model)
         model.set_params(model_params)
-        try:
-            return float(model.fit_unsupervised(
-                X_train,
-                X_val=X_val,
-                trial=trial,
-                geo_train=geo_train,
-                geo_val=geo_val,
-                mask_prob_num=mask_prob_num,
-                mask_prob_cat=mask_prob_cat,
-                num_loss_weight=num_loss_weight,
-                cat_loss_weight=cat_loss_weight
-            ))
-        finally:
-            getattr(getattr(model, "ft", None), "to",
-                    lambda *_args, **_kwargs: None)("cpu")
-            self._clean_gpu()
+        try:
+            return float(model.fit_unsupervised(
+                X_train,
+                X_val=X_val,
+                trial=trial,
+                geo_train=geo_train,
+                geo_val=geo_val,
+                mask_prob_num=mask_prob_num,
+                mask_prob_cat=mask_prob_cat,
+                num_loss_weight=num_loss_weight,
+                cat_loss_weight=cat_loss_weight
+            ))
+        finally:
+            self._maybe_cleanup_gpu(model)
     def cross_val(self, trial: optuna.trial.Trial) -> float:
         # FT-Transformer CV also focuses on memory control:
@@ -229,7 +243,7 @@ class FTTrainer(TrainerBase):
                 token_count += 1
             approx_units = d_model * n_layers * max(1, token_count)
             if approx_units > 12_000_000:
-                print(
+                _log(
                     f"[FTTrainer] Trial pruned early: d_model={d_model}, n_layers={n_layers} -> approx_units={approx_units}")
                 raise optuna.TrialPruned(
                     "config exceeds safe memory budget; prune before training")
@@ -285,7 +299,7 @@ class FTTrainer(TrainerBase):
                 if built is not None:
                     geo_train, geo_val, _, _ = built
                 elif not self._cv_geo_warned:
-                    print(
+                    _log(
                         "[FTTrainer] Geo tokens unavailable for CV split; continue without geo tokens.",
                         flush=True,
                     )
@@ -338,7 +352,7 @@ class FTTrainer(TrainerBase):
             requested_heads=resolved_params.get("n_heads")
         )
         if heads_adjusted:
-            print(f"[FTTrainer] Auto-adjusted n_heads from "
+            _log(f"[FTTrainer] Auto-adjusted n_heads from "
                   f"{resolved_params.get('n_heads')} to {adaptive_heads} "
                   f"(d_model={d_model_value}).")
         resolved_params["n_heads"] = adaptive_heads
@@ -378,13 +392,11 @@ class FTTrainer(TrainerBase):
                 geo_train=geo_train,
                 geo_val=geo_val,
             )
-            refit_epochs = self._resolve_best_epoch(
-                getattr(tmp_model, "training_history", None),
-                default_epochs=int(self.ctx.epochs),
-            )
-            getattr(getattr(tmp_model, "ft", None), "to",
-                    lambda *_args, **_kwargs: None)("cpu")
-            self._clean_gpu()
+            refit_epochs = self._resolve_best_epoch(
+                getattr(tmp_model, "training_history", None),
+                default_epochs=int(self.ctx.epochs),
+            )
+            self._maybe_cleanup_gpu(tmp_model)
         self.model = FTTransformerSklearn(
             model_nme=self.ctx.model_nme,
@@ -451,7 +463,7 @@ class FTTrainer(TrainerBase):
         split_iter, _ = self._resolve_ensemble_splits(X_all, k=k)
         if split_iter is None:
-            print(
+            _log(
                 f"[FT Ensemble] unable to build CV split (n_samples={n_samples}); skip ensemble.",
                 flush=True,
             )
@@ -494,15 +506,13 @@ class FTTrainer(TrainerBase):
             pred_train = model.predict(X_all, geo_tokens=geo_train_full)
             pred_test = model.predict(X_test, geo_tokens=geo_test_full)
-            preds_train_sum += np.asarray(pred_train, dtype=np.float64)
-            preds_test_sum += np.asarray(pred_test, dtype=np.float64)
-            getattr(getattr(model, "ft", None), "to",
-                    lambda *_args, **_kwargs: None)("cpu")
-            self._clean_gpu()
-            split_count += 1
+            preds_train_sum += np.asarray(pred_train, dtype=np.float64)
+            preds_test_sum += np.asarray(pred_test, dtype=np.float64)
+            self._maybe_cleanup_gpu(model)
+            split_count += 1
         if split_count < 1:
-            print(
+            _log(
                 f"[FT Ensemble] no CV splits generated; skip ensemble.",
                 flush=True,
             )
@@ -591,7 +601,7 @@ class FTTrainer(TrainerBase):
             requested_heads=resolved_params.get("n_heads"),
         )
         if heads_adjusted:
-            print(
+            _log(
                 f"[FTTrainer] Auto-adjusted n_heads from "
                 f"{resolved_params.get('n_heads')} to {adaptive_heads} "
                 f"(d_model={resolved_params.get('d_model', model.d_model)})."
@@ -652,11 +662,9 @@ class FTTrainer(TrainerBase):
             if preds_train is None:
                 preds_train = np.empty(
                     (len(X_all),) + fold_pred.shape[1:], dtype=fold_pred.dtype)
-            preds_train[val_idx] = fold_pred
-            getattr(getattr(model, "ft", None), "to",
-                    lambda *_a, **_k: None)("cpu")
-            self._clean_gpu()
+            preds_train[val_idx] = fold_pred
+            self._maybe_cleanup_gpu(model)
         if preds_train is None:
             return None
@@ -773,7 +781,7 @@ class FTTrainer(TrainerBase):
             requested_heads=resolved_params.get("n_heads")
         )
         if heads_adjusted:
-            print(f"[FTTrainer] Auto-adjusted n_heads from "
+            _log(f"[FTTrainer] Auto-adjusted n_heads from "
                   f"{resolved_params.get('n_heads')} to {adaptive_heads} "
                   f"(d_model={resolved_params.get('d_model', self.model.d_model)}).")
         resolved_params["n_heads"] = adaptive_heads

ins-pricing 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

ins-pricing 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl