PyPI - ins-pricing - Versions diffs - 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

ins-pricing 0.4.5py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

ins_pricing/README.md +48 -22
ins_pricing/__init__.py +142 -90
ins_pricing/cli/BayesOpt_entry.py +58 -46
ins_pricing/cli/BayesOpt_incremental.py +77 -110
ins_pricing/cli/Explain_Run.py +42 -23
ins_pricing/cli/Explain_entry.py +551 -577
ins_pricing/cli/Pricing_Run.py +42 -23
ins_pricing/cli/bayesopt_entry_runner.py +51 -16
ins_pricing/cli/utils/bootstrap.py +23 -0
ins_pricing/cli/utils/cli_common.py +256 -256
ins_pricing/cli/utils/cli_config.py +379 -360
ins_pricing/cli/utils/import_resolver.py +375 -358
ins_pricing/cli/utils/notebook_utils.py +256 -242
ins_pricing/cli/watchdog_run.py +216 -198
ins_pricing/frontend/__init__.py +10 -10
ins_pricing/frontend/app.py +132 -61
ins_pricing/frontend/config_builder.py +33 -0
ins_pricing/frontend/example_config.json +11 -0
ins_pricing/frontend/example_workflows.py +1 -1
ins_pricing/frontend/runner.py +340 -388
ins_pricing/governance/__init__.py +20 -20
ins_pricing/governance/release.py +159 -159
ins_pricing/modelling/README.md +1 -1
ins_pricing/modelling/__init__.py +147 -92
ins_pricing/modelling/{core/bayesopt → bayesopt}/README.md +31 -13
ins_pricing/modelling/{core/bayesopt → bayesopt}/__init__.py +64 -102
ins_pricing/modelling/{core/bayesopt → bayesopt}/config_components.py +12 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/config_preprocess.py +589 -552
ins_pricing/modelling/{core/bayesopt → bayesopt}/core.py +987 -958
ins_pricing/modelling/{core/bayesopt → bayesopt}/model_explain_mixin.py +296 -296
ins_pricing/modelling/{core/bayesopt → bayesopt}/model_plotting_mixin.py +488 -548
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/__init__.py +27 -27
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py +349 -342
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_trainer.py +921 -913
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_gnn.py +794 -785
ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_resn.py +454 -446
ins_pricing/modelling/bayesopt/trainers/__init__.py +19 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_base.py +1294 -1282
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_ft.py +64 -56
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_glm.py +203 -198
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_gnn.py +333 -325
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_resn.py +279 -267
ins_pricing/modelling/{core/bayesopt → bayesopt}/trainers/trainer_xgb.py +515 -313
ins_pricing/modelling/bayesopt/utils/__init__.py +67 -0
ins_pricing/modelling/bayesopt/utils/constants.py +21 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/distributed_utils.py +193 -186
ins_pricing/modelling/bayesopt/utils/io_utils.py +7 -0
ins_pricing/modelling/bayesopt/utils/losses.py +27 -0
ins_pricing/modelling/bayesopt/utils/metrics_and_devices.py +17 -0
ins_pricing/modelling/{core/bayesopt → bayesopt}/utils/torch_trainer_mixin.py +636 -623
ins_pricing/modelling/{core/evaluation.py → evaluation.py} +113 -104
ins_pricing/modelling/explain/__init__.py +55 -55
ins_pricing/modelling/explain/metrics.py +27 -174
ins_pricing/modelling/explain/permutation.py +237 -237
ins_pricing/modelling/plotting/__init__.py +40 -36
ins_pricing/modelling/plotting/compat.py +228 -0
ins_pricing/modelling/plotting/curves.py +572 -572
ins_pricing/modelling/plotting/diagnostics.py +163 -163
ins_pricing/modelling/plotting/geo.py +362 -362
ins_pricing/modelling/plotting/importance.py +121 -121
ins_pricing/pricing/__init__.py +27 -27
ins_pricing/pricing/factors.py +67 -56
ins_pricing/production/__init__.py +35 -25
ins_pricing/production/{predict.py → inference.py} +140 -57
ins_pricing/production/monitoring.py +8 -21
ins_pricing/reporting/__init__.py +11 -11
ins_pricing/setup.py +1 -1
ins_pricing/tests/production/test_inference.py +90 -0
ins_pricing/utils/__init__.py +112 -78
ins_pricing/utils/device.py +258 -237
ins_pricing/utils/features.py +53 -0
ins_pricing/utils/io.py +72 -0
ins_pricing/utils/logging.py +34 -1
ins_pricing/{modelling/core/bayesopt/utils → utils}/losses.py +125 -129
ins_pricing/utils/metrics.py +158 -24
ins_pricing/utils/numerics.py +76 -0
ins_pricing/utils/paths.py +9 -1
ins_pricing/utils/profiling.py +8 -4
{ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/METADATA +1 -1
ins_pricing-0.5.1.dist-info/RECORD +132 -0
ins_pricing/modelling/core/BayesOpt.py +0 -146
ins_pricing/modelling/core/__init__.py +0 -1
ins_pricing/modelling/core/bayesopt/trainers/__init__.py +0 -19
ins_pricing/modelling/core/bayesopt/utils/__init__.py +0 -86
ins_pricing/modelling/core/bayesopt/utils/constants.py +0 -183
ins_pricing/modelling/core/bayesopt/utils/io_utils.py +0 -126
ins_pricing/modelling/core/bayesopt/utils/metrics_and_devices.py +0 -555
ins_pricing/modelling/core/bayesopt/utils.py +0 -105
ins_pricing/modelling/core/bayesopt/utils_backup.py +0 -1503
ins_pricing/tests/production/test_predict.py +0 -233
ins_pricing-0.4.5.dist-info/RECORD +0 -130
{ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/WHEEL +0 -0
{ins_pricing-0.4.5.dist-info → ins_pricing-0.5.1.dist-info}/top_level.txt +0 -0

ins_pricing/modelling/{core/bayesopt → bayesopt}/models/model_ft_components.py RENAMED Viewed

@@ -1,342 +1,349 @@
-from __future__ import annotations
-import math
-from typing import List, Optional, Tuple
-import torch
-import torch.nn as nn
-from torch.utils.data import Dataset
-# =============================================================================
-# FT-Transformer model and sklearn-style wrapper.
-# =============================================================================
-# Define FT-Transformer model structure.
-class FeatureTokenizer(nn.Module):
-    """Map numeric/categorical/geo tokens into transformer input tokens."""
-    def __init__(
-        self,
-        num_numeric: int,
-        cat_cardinalities,
-        d_model: int,
-        num_geo: int = 0,
-        num_numeric_tokens: int = 1,
-    ):
-        super().__init__()
-        self.num_numeric = num_numeric
-        self.num_geo = num_geo
-        self.has_geo = num_geo > 0
-        if num_numeric > 0:
-            if int(num_numeric_tokens) <= 0:
-                raise ValueError("num_numeric_tokens must be >= 1 when numeric features exist.")
-            self.num_numeric_tokens = int(num_numeric_tokens)
-            self.has_numeric = True
-            self.num_linear = nn.Linear(num_numeric, d_model * self.num_numeric_tokens)
-        else:
-            self.num_numeric_tokens = 0
-            self.has_numeric = False
-        self.embeddings = nn.ModuleList([
-            nn.Embedding(card, d_model) for card in cat_cardinalities
-        ])
-        if self.has_geo:
-            # Map geo tokens with a linear layer to avoid one-hot on raw strings; upstream is encoded/normalized.
-            self.geo_linear = nn.Linear(num_geo, d_model)
-    def forward(self, X_num, X_cat, X_geo=None):
-        tokens = []
-        if self.has_numeric:
-            batch_size = X_num.shape[0]
-            num_token = self.num_linear(X_num)
-            num_token = num_token.view(batch_size, self.num_numeric_tokens, -1)
-            tokens.append(num_token)
-        for i, emb in enumerate(self.embeddings):
-            tok = emb(X_cat[:, i])
-            tokens.append(tok.unsqueeze(1))
-        if self.has_geo:
-            if X_geo is None:
-                raise RuntimeError("Geo tokens are enabled but X_geo was not provided.")
-            geo_token = self.geo_linear(X_geo)
-            tokens.append(geo_token.unsqueeze(1))
-        x = torch.cat(tokens, dim=1)
-        return x
-# Encoder layer with residual scaling.
-class ScaledTransformerEncoderLayer(nn.Module):
-    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048,
-                 dropout: float = 0.1, residual_scale_attn: float = 1.0,
-                 residual_scale_ffn: float = 1.0, norm_first: bool = True,
-                 ):
-        super().__init__()
-        self.self_attn = nn.MultiheadAttention(
-            embed_dim=d_model,
-            num_heads=nhead,
-            dropout=dropout,
-            batch_first=True
-        )
-        # Feed-forward network.
-        self.linear1 = nn.Linear(d_model, dim_feedforward)
-        self.dropout = nn.Dropout(dropout)
-        self.linear2 = nn.Linear(dim_feedforward, d_model)
-        # Normalization and dropout.
-        self.norm1 = nn.LayerNorm(d_model)
-        self.norm2 = nn.LayerNorm(d_model)
-        self.dropout1 = nn.Dropout(dropout)
-        self.dropout2 = nn.Dropout(dropout)
-        self.activation = nn.GELU()
-        # If you prefer ReLU, set: self.activation = nn.ReLU()
-        self.norm_first = norm_first
-        # Residual scaling coefficients.
-        self.res_scale_attn = residual_scale_attn
-        self.res_scale_ffn = residual_scale_ffn
-    def forward(self, src, src_mask=None, src_key_padding_mask=None, is_causal: Optional[bool] = None, **_kwargs):
-        # Input tensor shape: (batch, seq_len, d_model).
-        x = src
-        if self.norm_first:
-            # Pre-norm before attention.
-            x = x + self._sa_block(
-                self.norm1(x),
-                src_mask,
-                src_key_padding_mask,
-                is_causal=is_causal,
-            )
-            x = x + self._ff_block(self.norm2(x))
-        else:
-            # Post-norm (usually disabled).
-            x = self.norm1(
-                x + self._sa_block(
-                    x,
-                    src_mask,
-                    src_key_padding_mask,
-                    is_causal=is_causal,
-                )
-            )
-            x = self.norm2(x + self._ff_block(x))
-        return x
-    def _sa_block(self, x, attn_mask, key_padding_mask, *, is_causal: Optional[bool] = None):
-        # Self-attention with residual scaling.
-        if is_causal is None:
-            attn_out, _ = self.self_attn(
-                x, x, x,
-                attn_mask=attn_mask,
-                key_padding_mask=key_padding_mask,
-                need_weights=False,
-            )
-        else:
-            try:
-                attn_out, _ = self.self_attn(
-                    x, x, x,
-                    attn_mask=attn_mask,
-                    key_padding_mask=key_padding_mask,
-                    need_weights=False,
-                    is_causal=is_causal,
-                )
-            except TypeError:
-                attn_out, _ = self.self_attn(
-                    x, x, x,
-                    attn_mask=attn_mask,
-                    key_padding_mask=key_padding_mask,
-                    need_weights=False,
-                )
-        return self.res_scale_attn * self.dropout1(attn_out)
-    def _ff_block(self, x):
-        # Feed-forward block with residual scaling.
-        x2 = self.linear2(self.dropout(self.activation(self.linear1(x))))
-        return self.res_scale_ffn * self.dropout2(x2)
-# FT-Transformer core model.
-class FTTransformerCore(nn.Module):
-    # Minimal FT-Transformer built from:
-    #   1) FeatureTokenizer: convert numeric/categorical features to tokens;
-    #   2) TransformerEncoder: model feature interactions;
-    #   3) Pooling + MLP + Softplus: positive outputs for Tweedie/Gamma tasks.
-    def __init__(self, num_numeric: int, cat_cardinalities, d_model: int = 64,
-                 n_heads: int = 8, n_layers: int = 4, dropout: float = 0.1,
-                 task_type: str = 'regression', num_geo: int = 0,
-                 num_numeric_tokens: int = 1
-                 ):
-        super().__init__()
-        self.num_numeric = int(num_numeric)
-        self.cat_cardinalities = list(cat_cardinalities or [])
-        self.tokenizer = FeatureTokenizer(
-            num_numeric=num_numeric,
-            cat_cardinalities=cat_cardinalities,
-            d_model=d_model,
-            num_geo=num_geo,
-            num_numeric_tokens=num_numeric_tokens
-        )
-        scale = 1.0 / math.sqrt(n_layers)  # Recommended default.
-        encoder_layer = ScaledTransformerEncoderLayer(
-            d_model=d_model,
-            nhead=n_heads,
-            dim_feedforward=d_model * 4,
-            dropout=dropout,
-            residual_scale_attn=scale,
-            residual_scale_ffn=scale,
-            norm_first=True,
-        )
-        self.encoder = nn.TransformerEncoder(
-            encoder_layer,
-            num_layers=n_layers
-        )
-        self.n_layers = n_layers
-        layers = [
-            # If you need a deeper head, enable the sample layers below:
-            # nn.LayerNorm(d_model),  # Extra normalization
-            # nn.Linear(d_model, d_model),  # Extra fully connected layer
-            # nn.GELU(),  # Activation
-            nn.Linear(d_model, 1),
-        ]
-        if task_type == 'classification':
-            # Classification outputs logits for BCEWithLogitsLoss.
-            layers.append(nn.Identity())
-        else:
-            # Regression keeps positive outputs for Tweedie/Gamma.
-            layers.append(nn.Softplus())
-        self.head = nn.Sequential(*layers)
-        # ---- Self-supervised reconstruction head (masked modeling) ----
-        self.num_recon_head = nn.Linear(
-            d_model, self.num_numeric) if self.num_numeric > 0 else None
-        self.cat_recon_heads = nn.ModuleList([
-            nn.Linear(d_model, int(card)) for card in self.cat_cardinalities
-        ])
-    def forward(
-            self,
-            X_num,
-            X_cat,
-            X_geo=None,
-            return_embedding: bool = False,
-            return_reconstruction: bool = False):
-        # Inputs:
-        #   X_num -> float32 tensor with shape (batch, num_numeric_features)
-        #   X_cat -> long tensor with shape (batch, num_categorical_features)
-        #   X_geo -> float32 tensor with shape (batch, geo_token_dim)
-        if self.training and not hasattr(self, '_printed_device'):
-            print(f">>> FTTransformerCore executing on device: {X_num.device}")
-            self._printed_device = True
-        # => tensor shape (batch, token_num, d_model)
-        tokens = self.tokenizer(X_num, X_cat, X_geo)
-        # => tensor shape (batch, token_num, d_model)
-        x = self.encoder(tokens)
-        # Mean-pool tokens, then send to the head.
-        x = x.mean(dim=1)                      # => tensor shape (batch, d_model)
-        if return_reconstruction:
-            num_pred, cat_logits = self.reconstruct(x)
-            cat_logits_out = tuple(
-                cat_logits) if cat_logits is not None else tuple()
-            if return_embedding:
-                return x, num_pred, cat_logits_out
-            return num_pred, cat_logits_out
-        if return_embedding:
-            return x
-        # => tensor shape (batch, 1); Softplus keeps it positive.
-        out = self.head(x)
-        return out
-    def reconstruct(self, embedding: torch.Tensor) -> Tuple[Optional[torch.Tensor], List[torch.Tensor]]:
-        """Reconstruct numeric/categorical inputs from pooled embedding (batch, d_model)."""
-        num_pred = self.num_recon_head(
-            embedding) if self.num_recon_head is not None else None
-        cat_logits = [head(embedding) for head in self.cat_recon_heads]
-        return num_pred, cat_logits
-# TabularDataset.
-class TabularDataset(Dataset):
-    def __init__(self, X_num, X_cat, X_geo, y, w):
-        # Input tensors:
-        #   X_num: torch.float32, shape=(N, num_numeric_features)
-        #   X_cat: torch.long,   shape=(N, num_categorical_features)
-        #   X_geo: torch.float32, shape=(N, geo_token_dim), can be empty
-        #   y:     torch.float32, shape=(N, 1)
-        #   w:     torch.float32, shape=(N, 1)
-        self.X_num = X_num
-        self.X_cat = X_cat
-        self.X_geo = X_geo
-        self.y = y
-        self.w = w
-    def __len__(self):
-        return self.y.shape[0]
-    def __getitem__(self, idx):
-        return (
-            self.X_num[idx],
-            self.X_cat[idx],
-            self.X_geo[idx],
-            self.y[idx],
-            self.w[idx],
-        )
-class MaskedTabularDataset(Dataset):
-    def __init__(self,
-                 X_num_masked: torch.Tensor,
-                 X_cat_masked: torch.Tensor,
-                 X_geo: torch.Tensor,
-                 X_num_true: Optional[torch.Tensor],
-                 num_mask: Optional[torch.Tensor],
-                 X_cat_true: Optional[torch.Tensor],
-                 cat_mask: Optional[torch.Tensor]):
-        self.X_num_masked = X_num_masked
-        self.X_cat_masked = X_cat_masked
-        self.X_geo = X_geo
-        self.X_num_true = X_num_true
-        self.num_mask = num_mask
-        self.X_cat_true = X_cat_true
-        self.cat_mask = cat_mask
-    def __len__(self):
-        return self.X_num_masked.shape[0]
-    def __getitem__(self, idx):
-        return (
-            self.X_num_masked[idx],
-            self.X_cat_masked[idx],
-            self.X_geo[idx],
-            None if self.X_num_true is None else self.X_num_true[idx],
-            None if self.num_mask is None else self.num_mask[idx],
-            None if self.X_cat_true is None else self.X_cat_true[idx],
-            None if self.cat_mask is None else self.cat_mask[idx],
-        )
+from __future__ import annotations
+import math
+from typing import List, Optional, Tuple
+import torch
+import torch.nn as nn
+from torch.utils.data import Dataset
+from ins_pricing.utils import get_logger, log_print
+_logger = get_logger("ins_pricing.modelling.bayesopt.models.model_ft_components")
+def _log(*args, **kwargs) -> None:
+    log_print(_logger, *args, **kwargs)
+# =============================================================================
+# FT-Transformer model and sklearn-style wrapper.
+# =============================================================================
+# Define FT-Transformer model structure.
+class FeatureTokenizer(nn.Module):
+    """Map numeric/categorical/geo tokens into transformer input tokens."""
+    def __init__(
+        self,
+        num_numeric: int,
+        cat_cardinalities,
+        d_model: int,
+        num_geo: int = 0,
+        num_numeric_tokens: int = 1,
+    ):
+        super().__init__()
+        self.num_numeric = num_numeric
+        self.num_geo = num_geo
+        self.has_geo = num_geo > 0
+        if num_numeric > 0:
+            if int(num_numeric_tokens) <= 0:
+                raise ValueError("num_numeric_tokens must be >= 1 when numeric features exist.")
+            self.num_numeric_tokens = int(num_numeric_tokens)
+            self.has_numeric = True
+            self.num_linear = nn.Linear(num_numeric, d_model * self.num_numeric_tokens)
+        else:
+            self.num_numeric_tokens = 0
+            self.has_numeric = False
+        self.embeddings = nn.ModuleList([
+            nn.Embedding(card, d_model) for card in cat_cardinalities
+        ])
+        if self.has_geo:
+            # Map geo tokens with a linear layer to avoid one-hot on raw strings; upstream is encoded/normalized.
+            self.geo_linear = nn.Linear(num_geo, d_model)
+    def forward(self, X_num, X_cat, X_geo=None):
+        tokens = []
+        if self.has_numeric:
+            batch_size = X_num.shape[0]
+            num_token = self.num_linear(X_num)
+            num_token = num_token.view(batch_size, self.num_numeric_tokens, -1)
+            tokens.append(num_token)
+        for i, emb in enumerate(self.embeddings):
+            tok = emb(X_cat[:, i])
+            tokens.append(tok.unsqueeze(1))
+        if self.has_geo:
+            if X_geo is None:
+                raise RuntimeError("Geo tokens are enabled but X_geo was not provided.")
+            geo_token = self.geo_linear(X_geo)
+            tokens.append(geo_token.unsqueeze(1))
+        x = torch.cat(tokens, dim=1)
+        return x
+# Encoder layer with residual scaling.
+class ScaledTransformerEncoderLayer(nn.Module):
+    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048,
+                 dropout: float = 0.1, residual_scale_attn: float = 1.0,
+                 residual_scale_ffn: float = 1.0, norm_first: bool = True,
+                 ):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(
+            embed_dim=d_model,
+            num_heads=nhead,
+            dropout=dropout,
+            batch_first=True
+        )
+        # Feed-forward network.
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        # Normalization and dropout.
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = nn.GELU()
+        # If you prefer ReLU, set: self.activation = nn.ReLU()
+        self.norm_first = norm_first
+        # Residual scaling coefficients.
+        self.res_scale_attn = residual_scale_attn
+        self.res_scale_ffn = residual_scale_ffn
+    def forward(self, src, src_mask=None, src_key_padding_mask=None, is_causal: Optional[bool] = None, **_kwargs):
+        # Input tensor shape: (batch, seq_len, d_model).
+        x = src
+        if self.norm_first:
+            # Pre-norm before attention.
+            x = x + self._sa_block(
+                self.norm1(x),
+                src_mask,
+                src_key_padding_mask,
+                is_causal=is_causal,
+            )
+            x = x + self._ff_block(self.norm2(x))
+        else:
+            # Post-norm (usually disabled).
+            x = self.norm1(
+                x + self._sa_block(
+                    x,
+                    src_mask,
+                    src_key_padding_mask,
+                    is_causal=is_causal,
+                )
+            )
+            x = self.norm2(x + self._ff_block(x))
+        return x
+    def _sa_block(self, x, attn_mask, key_padding_mask, *, is_causal: Optional[bool] = None):
+        # Self-attention with residual scaling.
+        if is_causal is None:
+            attn_out, _ = self.self_attn(
+                x, x, x,
+                attn_mask=attn_mask,
+                key_padding_mask=key_padding_mask,
+                need_weights=False,
+            )
+        else:
+            try:
+                attn_out, _ = self.self_attn(
+                    x, x, x,
+                    attn_mask=attn_mask,
+                    key_padding_mask=key_padding_mask,
+                    need_weights=False,
+                    is_causal=is_causal,
+                )
+            except TypeError:
+                attn_out, _ = self.self_attn(
+                    x, x, x,
+                    attn_mask=attn_mask,
+                    key_padding_mask=key_padding_mask,
+                    need_weights=False,
+                )
+        return self.res_scale_attn * self.dropout1(attn_out)
+    def _ff_block(self, x):
+        # Feed-forward block with residual scaling.
+        x2 = self.linear2(self.dropout(self.activation(self.linear1(x))))
+        return self.res_scale_ffn * self.dropout2(x2)
+# FT-Transformer core model.
+class FTTransformerCore(nn.Module):
+    # Minimal FT-Transformer built from:
+    #   1) FeatureTokenizer: convert numeric/categorical features to tokens;
+    #   2) TransformerEncoder: model feature interactions;
+    #   3) Pooling + MLP + Softplus: positive outputs for Tweedie/Gamma tasks.
+    def __init__(self, num_numeric: int, cat_cardinalities, d_model: int = 64,
+                 n_heads: int = 8, n_layers: int = 4, dropout: float = 0.1,
+                 task_type: str = 'regression', num_geo: int = 0,
+                 num_numeric_tokens: int = 1
+                 ):
+        super().__init__()
+        self.num_numeric = int(num_numeric)
+        self.cat_cardinalities = list(cat_cardinalities or [])
+        self.tokenizer = FeatureTokenizer(
+            num_numeric=num_numeric,
+            cat_cardinalities=cat_cardinalities,
+            d_model=d_model,
+            num_geo=num_geo,
+            num_numeric_tokens=num_numeric_tokens
+        )
+        scale = 1.0 / math.sqrt(n_layers)  # Recommended default.
+        encoder_layer = ScaledTransformerEncoderLayer(
+            d_model=d_model,
+            nhead=n_heads,
+            dim_feedforward=d_model * 4,
+            dropout=dropout,
+            residual_scale_attn=scale,
+            residual_scale_ffn=scale,
+            norm_first=True,
+        )
+        self.encoder = nn.TransformerEncoder(
+            encoder_layer,
+            num_layers=n_layers
+        )
+        self.n_layers = n_layers
+        layers = [
+            # If you need a deeper head, enable the sample layers below:
+            # nn.LayerNorm(d_model),  # Extra normalization
+            # nn.Linear(d_model, d_model),  # Extra fully connected layer
+            # nn.GELU(),  # Activation
+            nn.Linear(d_model, 1),
+        ]
+        if task_type == 'classification':
+            # Classification outputs logits for BCEWithLogitsLoss.
+            layers.append(nn.Identity())
+        else:
+            # Regression keeps positive outputs for Tweedie/Gamma.
+            layers.append(nn.Softplus())
+        self.head = nn.Sequential(*layers)
+        # ---- Self-supervised reconstruction head (masked modeling) ----
+        self.num_recon_head = nn.Linear(
+            d_model, self.num_numeric) if self.num_numeric > 0 else None
+        self.cat_recon_heads = nn.ModuleList([
+            nn.Linear(d_model, int(card)) for card in self.cat_cardinalities
+        ])
+    def forward(
+            self,
+            X_num,
+            X_cat,
+            X_geo=None,
+            return_embedding: bool = False,
+            return_reconstruction: bool = False):
+        # Inputs:
+        #   X_num -> float32 tensor with shape (batch, num_numeric_features)
+        #   X_cat -> long tensor with shape (batch, num_categorical_features)
+        #   X_geo -> float32 tensor with shape (batch, geo_token_dim)
+        if self.training and not hasattr(self, '_printed_device'):
+            _log(f">>> FTTransformerCore executing on device: {X_num.device}")
+            self._printed_device = True
+        # => tensor shape (batch, token_num, d_model)
+        tokens = self.tokenizer(X_num, X_cat, X_geo)
+        # => tensor shape (batch, token_num, d_model)
+        x = self.encoder(tokens)
+        # Mean-pool tokens, then send to the head.
+        x = x.mean(dim=1)                      # => tensor shape (batch, d_model)
+        if return_reconstruction:
+            num_pred, cat_logits = self.reconstruct(x)
+            cat_logits_out = tuple(
+                cat_logits) if cat_logits is not None else tuple()
+            if return_embedding:
+                return x, num_pred, cat_logits_out
+            return num_pred, cat_logits_out
+        if return_embedding:
+            return x
+        # => tensor shape (batch, 1); Softplus keeps it positive.
+        out = self.head(x)
+        return out
+    def reconstruct(self, embedding: torch.Tensor) -> Tuple[Optional[torch.Tensor], List[torch.Tensor]]:
+        """Reconstruct numeric/categorical inputs from pooled embedding (batch, d_model)."""
+        num_pred = self.num_recon_head(
+            embedding) if self.num_recon_head is not None else None
+        cat_logits = [head(embedding) for head in self.cat_recon_heads]
+        return num_pred, cat_logits
+# TabularDataset.
+class TabularDataset(Dataset):
+    def __init__(self, X_num, X_cat, X_geo, y, w):
+        # Input tensors:
+        #   X_num: torch.float32, shape=(N, num_numeric_features)
+        #   X_cat: torch.long,   shape=(N, num_categorical_features)
+        #   X_geo: torch.float32, shape=(N, geo_token_dim), can be empty
+        #   y:     torch.float32, shape=(N, 1)
+        #   w:     torch.float32, shape=(N, 1)
+        self.X_num = X_num
+        self.X_cat = X_cat
+        self.X_geo = X_geo
+        self.y = y
+        self.w = w
+    def __len__(self):
+        return self.y.shape[0]
+    def __getitem__(self, idx):
+        return (
+            self.X_num[idx],
+            self.X_cat[idx],
+            self.X_geo[idx],
+            self.y[idx],
+            self.w[idx],
+        )
+class MaskedTabularDataset(Dataset):
+    def __init__(self,
+                 X_num_masked: torch.Tensor,
+                 X_cat_masked: torch.Tensor,
+                 X_geo: torch.Tensor,
+                 X_num_true: Optional[torch.Tensor],
+                 num_mask: Optional[torch.Tensor],
+                 X_cat_true: Optional[torch.Tensor],
+                 cat_mask: Optional[torch.Tensor]):
+        self.X_num_masked = X_num_masked
+        self.X_cat_masked = X_cat_masked
+        self.X_geo = X_geo
+        self.X_num_true = X_num_true
+        self.num_mask = num_mask
+        self.X_cat_true = X_cat_true
+        self.cat_mask = cat_mask
+    def __len__(self):
+        return self.X_num_masked.shape[0]
+    def __getitem__(self, idx):
+        return (
+            self.X_num_masked[idx],
+            self.X_cat_masked[idx],
+            self.X_geo[idx],
+            None if self.X_num_true is None else self.X_num_true[idx],
+            None if self.num_mask is None else self.num_mask[idx],
+            None if self.X_cat_true is None else self.X_cat_true[idx],
+            None if self.cat_mask is None else self.cat_mask[idx],
+        )

ins-pricing 0.4.5__py3-none-any.whl → 0.5.1__py3-none-any.whl

ins-pricing 0.4.5py3-none-any.whl → 0.5.1py3-none-any.whl