PyPI - nextrec - Versions diffs - 0.4.25__py3-none-any.whl → 0.4.28__py3-none-any.whl - Mend

nextrec 0.4.25py3-none-any.whl → 0.4.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

nextrec/__version__.py +1 -1
nextrec/basic/asserts.py +72 -0
nextrec/basic/loggers.py +18 -1
nextrec/basic/model.py +54 -51
nextrec/data/batch_utils.py +23 -3
nextrec/data/dataloader.py +3 -8
nextrec/models/multi_task/[pre]aitm.py +173 -0
nextrec/models/multi_task/[pre]snr_trans.py +232 -0
nextrec/models/multi_task/[pre]star.py +192 -0
nextrec/models/multi_task/apg.py +330 -0
nextrec/models/multi_task/cross_stitch.py +229 -0
nextrec/models/multi_task/escm.py +290 -0
nextrec/models/multi_task/esmm.py +8 -21
nextrec/models/multi_task/hmoe.py +203 -0
nextrec/models/multi_task/mmoe.py +20 -28
nextrec/models/multi_task/pepnet.py +81 -76
nextrec/models/multi_task/ple.py +30 -44
nextrec/models/multi_task/poso.py +13 -22
nextrec/models/multi_task/share_bottom.py +14 -25
nextrec/models/ranking/afm.py +2 -2
nextrec/models/ranking/autoint.py +2 -4
nextrec/models/ranking/dcn.py +2 -3
nextrec/models/ranking/dcn_v2.py +2 -3
nextrec/models/ranking/deepfm.py +2 -3
nextrec/models/ranking/dien.py +7 -9
nextrec/models/ranking/din.py +8 -10
nextrec/models/ranking/eulernet.py +1 -2
nextrec/models/ranking/ffm.py +1 -2
nextrec/models/ranking/fibinet.py +2 -3
nextrec/models/ranking/fm.py +1 -1
nextrec/models/ranking/lr.py +1 -1
nextrec/models/ranking/masknet.py +1 -2
nextrec/models/ranking/pnn.py +1 -2
nextrec/models/ranking/widedeep.py +2 -3
nextrec/models/ranking/xdeepfm.py +2 -4
nextrec/models/representation/rqvae.py +4 -4
nextrec/models/retrieval/dssm.py +18 -26
nextrec/models/retrieval/dssm_v2.py +15 -22
nextrec/models/retrieval/mind.py +9 -15
nextrec/models/retrieval/sdm.py +36 -33
nextrec/models/retrieval/youtube_dnn.py +16 -24
nextrec/models/sequential/hstu.py +2 -2
nextrec/utils/__init__.py +5 -1
nextrec/utils/model.py +9 -14
{nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/METADATA +72 -62
nextrec-0.4.28.dist-info/RECORD +90 -0
nextrec/models/multi_task/aitm.py +0 -0
nextrec/models/multi_task/snr_trans.py +0 -0
nextrec-0.4.25.dist-info/RECORD +0 -86
{nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/WHEEL +0 -0
{nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/entry_points.txt +0 -0
{nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/licenses/LICENSE +0 -0

nextrec/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.4.25"
1	+ __version__ = "0.4.28"

nextrec/basic/asserts.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+Assert function definitions for NextRec models.
+Date: create on 01/01/2026
+Checkpoint: edit on 01/01/2026
+Author: Yang Zhou, zyaztec@gmail.com
+"""
+from __future__ import annotations
+from nextrec.utils.types import TaskTypeName, TrainingModeName
+def assert_task(
+    task: list[TaskTypeName] | TaskTypeName | None,
+    nums_task: int,
+    *,
+    model_name: str,
+) -> None:
+    if task is None:
+        raise ValueError(f"{model_name} requires task to be specified.")
+    # case 1: task is str
+    if isinstance(task, str):
+        if nums_task != 1:
+            raise ValueError(
+                f"{model_name} received task='{task}' but nums_task={nums_task}. "
+                "String task is only allowed for single-task models."
+            )
+        return  # single-task, valid
+    # case 2: task is list
+    if not isinstance(task, list):
+        raise TypeError(
+            f"{model_name} requires task to be a string or a list of strings."
+        )
+    # list but length == 1
+    if len(task) == 1:
+        if nums_task != 1:
+            raise ValueError(
+                f"{model_name} received task list of length 1 but nums_task={nums_task}. "
+                "Length-1 task list is only allowed for single-task models."
+            )
+        return  # single-task, valid
+    # multi-task: length must match nums_task
+    if len(task) != nums_task:
+        raise ValueError(
+            f"{model_name} requires task length {nums_task}, got {len(task)}."
+        )
+def assert_training_mode(
+    training_mode: TrainingModeName | list[TrainingModeName],
+    nums_task: int,
+    *,
+    model_name: str,
+) -> None:
+    valid_modes = {"pointwise", "pairwise", "listwise"}
+    if not isinstance(training_mode, list):
+        raise TypeError(
+            f"[{model_name}-init Error] training_mode must be a list with length {nums_task}."
+        )
+    if len(training_mode) != nums_task:
+        raise ValueError(
+            f"[{model_name}-init Error] training_mode list length must match number of tasks."
+        )
+    if any(mode not in valid_modes for mode in training_mode):
+        raise ValueError(
+            f"[{model_name}-init Error] training_mode must be one of {'pointwise', 'pairwise', 'listwise'}."
+        )

nextrec/basic/loggers.py CHANGED Viewed

@@ -2,7 +2,7 @@
 NextRec Basic Loggers
 Date: create on 27/10/2025
-Checkpoint: edit on 27/12/2025
+Checkpoint: edit on 01/01/2026
 Author: Yang Zhou, zyaztec@gmail.com
 """
@@ -190,6 +190,19 @@ class BasicLogger:
     def close(self) -> None:
         for backend in self.backends:
             backend.close()
+        for backend in self.backends:
+            if isinstance(backend, SwanLabLogger):
+                swanlab = backend.swanlab
+                if not backend.enabled or swanlab is None:
+                    continue
+                finish_fn = getattr(swanlab, "finish", None)
+                if finish_fn is None:
+                    continue
+                try:
+                    finish_fn()
+                except TypeError:
+                    finish_fn()
+                break
 class TensorBoardLogger(MetricsLoggerBackend):
@@ -369,10 +382,14 @@ class TrainingLogger(BasicLogger):
         wandb_kwargs = dict(wandb_kwargs or {})
         wandb_kwargs.setdefault("config", {})
         wandb_kwargs["config"].update(config)
+        if "notes" in wandb_kwargs:
+            wandb_kwargs["config"].pop("note", None)
         swanlab_kwargs = dict(swanlab_kwargs or {})
         swanlab_kwargs.setdefault("config", {})
         swanlab_kwargs["config"].update(config)
+        if "description" in swanlab_kwargs:
+            swanlab_kwargs["config"].pop("note", None)
         self.wandb_logger = None
         if use_wandb:

nextrec/basic/model.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Base Model & Base Match Model Class
 Date: create on 27/10/2025
-Checkpoint: edit on 31/12/2025
+Checkpoint: edit on 01/01/2026
 Author: Yang Zhou,zyaztec@gmail.com
 """
@@ -36,6 +36,7 @@ from torch.utils.data import DataLoader
 from torch.utils.data.distributed import DistributedSampler
 from nextrec import __version__
+from nextrec.basic.asserts import assert_task
 from nextrec.basic.callback import (
     CallbackList,
     CheckpointSaver,
@@ -101,6 +102,7 @@ from nextrec.utils.types import (
 from nextrec.utils.data import FILE_FORMAT_CONFIG
 class BaseModel(SummarySet, FeatureSet, nn.Module):
     @property
     def model_name(self) -> str:
@@ -110,30 +112,6 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
     def default_task(self) -> TaskTypeName | list[TaskTypeName]:
         raise NotImplementedError
-    @property
-    def training_mode(self) -> TrainingModeName | list[TrainingModeName]:
-        if self.nums_task > 1:
-            return self.training_modes
-        return self.training_modes[0] if self.training_modes else "pointwise"
-    @training_mode.setter
-    def training_mode(self, training_mode: TrainingModeName | list[TrainingModeName]):
-        valid_modes = {"pointwise", "pairwise", "listwise"}
-        if isinstance(training_mode, list):
-            training_modes = list(training_mode)
-            if len(training_modes) != self.nums_task:
-                raise ValueError(
-                    "[BaseModel-init Error] training_mode list length must match number of tasks."
-                )
-        else:
-            training_modes = [training_mode] * self.nums_task
-        if any(mode not in valid_modes for mode in training_modes):
-            raise ValueError(
-                "[BaseModel-init Error] training_mode must be one of {'pointwise', 'pairwise', 'listwise'}."
-            )
-        self.training_modes = list(training_modes)
     def __init__(
         self,
         dense_features: list[DenseFeature] | None = None,
@@ -142,7 +120,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         target: list[str] | str | None = None,
         id_columns: list[str] | str | None = None,
         task: TaskTypeName | list[TaskTypeName] | None = None,
-        training_mode: TrainingModeName | list[TrainingModeName] = "pointwise",
+        training_mode: TrainingModeName | list[TrainingModeName] | None = None,
         embedding_l1_reg: float = 0.0,
         dense_l1_reg: float = 0.0,
         embedding_l2_reg: float = 0.0,
@@ -162,10 +140,10 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             dense_features: DenseFeature definitions.
             sparse_features: SparseFeature definitions.
             sequence_features: SequenceFeature definitions.
-            target: Target column name. e.g., 'label' or ['label1', 'label2'].
+            target: Target column name. e.g., 'label_ctr' or ['label_ctr', 'label_cvr'].
             id_columns: Identifier column name, only need to specify if GAUC is required. e.g., 'user_id'.
             task: Task types, e.g., 'binary', 'regression', or ['binary', 'regression']. If None, falls back to self.default_task.
-            training_mode: Training mode for ranking tasks; a single mode or a list per task.
+            training_mode: Training mode for different tasks. e.g., 'pointwise', ['pointwise', 'pairwise'].
             embedding_l1_reg: L1 regularization strength for embedding params. e.g., 1e-6.
             dense_l1_reg: L1 regularization strength for dense params. e.g., 1e-5.
@@ -218,7 +196,11 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         self.task = task or self.default_task
         self.nums_task = len(self.task) if isinstance(self.task, list) else 1
-        self.training_mode = training_mode
+        training_mode = training_mode or "pointwise"
+        if isinstance(training_mode, list):
+            self.training_modes = list(training_mode)
+        else:
+            self.training_modes = [training_mode] * self.nums_task
         self.embedding_l1_reg = embedding_l1_reg
         self.dense_l1_reg = dense_l1_reg
@@ -328,13 +310,13 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
     def get_input(self, input_data: dict, require_labels: bool = True):
         """
         Prepare unified input features and labels from the given input data.
         Args:
             input_data: Input data dictionary containing 'features' and optionally 'labels', e.g., {'features': {'feat1': [...], 'feat2': [...]}, 'labels': {'label': [...]}}.
             require_labels: Whether labels are required in the input data. Default is True: for training and evaluation with labels.
-        Note:
+        Note:
             target tensor shape will always be (batch_size, num_targets)
         """
         feature_source = input_data.get("features", {})
@@ -491,9 +473,9 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             ignore_label: Label value to ignore when computing loss. Use this to skip gradients for unknown labels.
         """
         self.ignore_label = ignore_label
-        loss_list = get_loss_list(
-            loss, self.training_modes, self.nums_task
-        )
+        # get loss list
+        loss_list = get_loss_list(loss, self.training_modes, self.nums_task)
         self.loss_params = {} if loss_params is None else loss_params
         self.optimizer_params = optimizer_params or {}
@@ -546,7 +528,9 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 raise ValueError(
                     "[BaseModel-compile Error] GradNorm requires multi-task setup."
                 )
-            grad_norm_params = dict(loss_weights) if isinstance(loss_weights, dict) else {}
+            grad_norm_params = (
+                dict(loss_weights) if isinstance(loss_weights, dict) else {}
+            )
             grad_norm_params.pop("method", None)
             self.grad_norm = GradNormLossWeighting(
                 nums_task=self.nums_task, device=self.device, **grad_norm_params
@@ -594,7 +578,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 y_true = y_true.view(-1, 1)
             loss_fn = self.loss_fn[0]
             # mask ignored labels
             # we don't suggest using ignore_label for single task training
             if self.ignore_label is not None:
@@ -685,6 +669,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         batch_size: int = 32,
         shuffle: bool = True,
         num_workers: int = 0,
+        prefetch_factor: int | None = None,
         sampler=None,
         return_dataset: bool = False,
     ):
@@ -696,6 +681,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             batch_size: Batch size.
             shuffle: Whether to shuffle the data (ignored when a sampler is provided).
             num_workers: Number of DataLoader workers.
+            prefetch_factor: Number of batches loaded in advance by each worker.
             sampler: Optional sampler for DataLoader.
             return_dataset: Whether to return the tensor dataset along with the DataLoader, used for valid data
         Returns:
@@ -715,6 +701,9 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                 "[BaseModel-prepare_data_loader Error] No data available to create DataLoader."
             )
         dataset = TensorDictDataset(tensors)
+        loader_kwargs = {}
+        if num_workers > 0 and prefetch_factor is not None:
+            loader_kwargs["prefetch_factor"] = prefetch_factor
         loader = DataLoader(
             dataset,
             batch_size=batch_size,
@@ -724,6 +713,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             num_workers=num_workers,
             pin_memory=self.device.type == "cuda",
             persistent_workers=num_workers > 0,
+            **loader_kwargs,
         )
         return (loader, dataset) if return_dataset else loader
@@ -798,6 +788,8 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
         )
         self.to(self.device)
+        assert_task(self.task, len(self.target_columns), model_name=self.model_name)
         if not self.compiled:
             self.compile(
                 optimizer="adam",
@@ -902,6 +894,14 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
                     else:
                         swanlab.login(api_key=swanlab_api)
+        if use_wandb and self.note:
+            wandb_kwargs = dict(wandb_kwargs or {})
+            wandb_kwargs.setdefault("notes", self.note)
+        if use_swanlab and self.note:
+            swanlab_kwargs = dict(swanlab_kwargs or {})
+            swanlab_kwargs.setdefault("description", self.note)
         self.training_logger = (
             TrainingLogger(
                 session=self.session,
@@ -1649,7 +1649,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
             stream_chunk_size: Number of rows per chunk when using streaming mode for large datasets.
             num_workers: DataLoader worker count.
-        Note:
+        Note:
             predict does not support distributed mode currently, consider it as a single-process operation.
         """
         self.eval()
@@ -1837,7 +1837,7 @@ class BaseModel(SummarySet, FeatureSet, nn.Module):
     ):
         """
         Make predictions on the given data using streaming mode for large datasets.
         Args:
             data: Input data for prediction (file path, dict, DataFrame, or DataLoader).
             batch_size: Batch size for prediction.
@@ -2279,9 +2279,10 @@ class BaseMatchModel(BaseModel):
         self.num_negative_samples = num_negative_samples
         self.temperature = temperature
         self.similarity_metric = similarity_metric
-        if self.training_mode not in self.support_training_modes:
+        primary_mode = self.training_modes[0] if self.training_modes else "pointwise"
+        if primary_mode not in self.support_training_modes:
             raise ValueError(
-                f"{self.model_name.upper()} does not support training_mode='{self.training_mode}'. Supported modes: {self.support_training_modes}"
+                f"{self.model_name.upper()} does not support training_mode='{primary_mode}'. Supported modes: {self.support_training_modes}"
             )
         self.user_features_all = (
             self.user_dense_features
@@ -2298,7 +2299,7 @@ class BaseMatchModel(BaseModel):
         self.head = RetrievalHead(
             similarity_metric=self.similarity_metric,
             temperature=self.temperature,
-            training_mode=self.training_mode,
+            training_mode=primary_mode,
             apply_sigmoid=True,
         )
@@ -2338,26 +2339,27 @@ class BaseMatchModel(BaseModel):
         }
         effective_loss = loss
+        primary_mode = self.training_modes[0] if self.training_modes else "pointwise"
         if effective_loss is None:
-            effective_loss = default_loss_by_mode[self.training_mode]
+            effective_loss = default_loss_by_mode[primary_mode]
         elif isinstance(effective_loss, str):
-            if self.training_mode in {"pairwise", "listwise"} and effective_loss in {
+            if primary_mode in {"pairwise", "listwise"} and effective_loss in {
                 "bce",
                 "binary_crossentropy",
             }:
-                effective_loss = default_loss_by_mode[self.training_mode]
+                effective_loss = default_loss_by_mode[primary_mode]
         elif isinstance(effective_loss, list):
             if not effective_loss:
-                effective_loss = [default_loss_by_mode[self.training_mode]]
+                effective_loss = [default_loss_by_mode[primary_mode]]
             else:
                 first = effective_loss[0]
                 if (
-                    self.training_mode in {"pairwise", "listwise"}
+                    primary_mode in {"pairwise", "listwise"}
                     and isinstance(first, str)
                     and first in {"bce", "binary_crossentropy"}
                 ):
                     effective_loss = [
-                        default_loss_by_mode[self.training_mode],
+                        default_loss_by_mode[primary_mode],
                         *effective_loss[1:],
                     ]
         return super().compile(
@@ -2435,11 +2437,12 @@ class BaseMatchModel(BaseModel):
         return self.head(user_emb, item_emb, similarity_fn=self.compute_similarity)
     def compute_loss(self, y_pred, y_true):
-        if self.training_mode == "pointwise":
+        primary_mode = self.training_modes[0] if self.training_modes else "pointwise"
+        if primary_mode == "pointwise":
             return super().compute_loss(y_pred, y_true)
         # pairwise / listwise using inbatch neg
-        elif self.training_mode in ["pairwise", "listwise"]:
+        elif primary_mode in ["pairwise", "listwise"]:
             if not isinstance(y_pred, (tuple, list)) or len(y_pred) != 2:
                 raise ValueError(
                     "For pairwise/listwise training, forward should return (user_emb, item_emb). Please check BaseMatchModel.forward implementation."
@@ -2482,7 +2485,7 @@ class BaseMatchModel(BaseModel):
                 loss *= float(self.loss_weights[0])
             return loss
         else:
-            raise ValueError(f"Unknown training mode: {self.training_mode}")
+            raise ValueError(f"Unknown training mode: {primary_mode}")
     def prepare_feature_data(
         self,

nextrec/data/batch_utils.py CHANGED Viewed

@@ -5,13 +5,27 @@ Date: create on 03/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
-from typing import Any, Mapping
+from typing import Any, Mapping, Literal
 import numpy as np
 import torch
-def stack_section(batch: list[dict], section: str):
+def stack_section(batch: list[dict], section: Literal["features", "labels", "ids"]):
+    """
+    input example:
+    batch = [
+        {"features": {"f1": tensor1, "f2": tensor2}, "labels": {"label": tensor3}},
+        {"features": {"f1": tensor4, "f2": tensor5}, "labels": {"label": tensor6}},
+        ...
+    ]
+    output example:
+    {
+        "f1": torch.stack([tensor1, tensor4], dim=0),
+        "f2": torch.stack([tensor2, tensor5], dim=0),
+    }
+    """
     entries = [item.get(section) for item in batch if item.get(section) is not None]
     if not entries:
         return None
@@ -22,7 +36,13 @@ def stack_section(batch: list[dict], section: str):
             for item in batch
             if item.get(section) is not None and name in item[section]
         ]
-        merged[name] = torch.stack(tensors, dim=0)
+        tensor_sample = tensors[0]
+        if isinstance(tensor_sample, torch.Tensor):
+            merged[name] = torch.stack(tensors, dim=0)
+        elif isinstance(tensor_sample, np.ndarray):
+            merged[name] = np.stack(tensors, axis=0)
+        else:
+            merged[name] = tensors
     return merged

nextrec/data/dataloader.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Dataloader definitions
 Date: create on 27/10/2025
-Checkpoint: edit on 24/12/2025
+Checkpoint: edit on 01/01/2026
 Author: Yang Zhou,zyaztec@gmail.com
 """
@@ -523,13 +523,8 @@ def build_tensors_from_data(
                 raise KeyError(
                     f"[RecDataLoader Error] ID column '{id_col}' not found in provided data."
                 )
-            try:
-                id_arr = np.asarray(column, dtype=np.int64)
-            except Exception as exc:
-                raise TypeError(
-                    f"[RecDataLoader Error] ID column '{id_col}' must contain numeric values. Received dtype={np.asarray(column).dtype}, error: {exc}"
-                ) from exc
-            id_tensors[id_col] = to_tensor(id_arr, dtype=torch.long)
+            # Normalize all id columns to strings for consistent downstream handling.
+            id_tensors[id_col] = np.asarray(column, dtype=str)
     if not feature_tensors:
         return None
     return {"features": feature_tensors, "labels": label_tensors, "ids": id_tensors}

nextrec/models/multi_task/[pre]aitm.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""
+Date: create on 01/01/2026 - prerelease version: need to overwrite compute_loss later
+Checkpoint: edit on 01/01/2026
+Author: Yang Zhou, zyaztec@gmail.com
+Reference:
+- [1] Xi D, Chen Z, Yan P, Zhang Y, Zhu Y, Zhuang F, Chen Y. Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising. Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery & Data Mining (KDD ’21), 2021, pp. 3745–3755.
+URL: https://arxiv.org/abs/2105.08489
+- [2] MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation: https://github.com/alipay/MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation/
+"""
+from __future__ import annotations
+import math
+import torch
+import torch.nn as nn
+from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
+from nextrec.basic.layers import MLP, EmbeddingLayer
+from nextrec.basic.heads import TaskHead
+from nextrec.basic.model import BaseModel
+from nextrec.utils.model import get_mlp_output_dim
+from nextrec.utils.types import TaskTypeName
+class AITMTransfer(nn.Module):
+    """Attentive information transfer from previous task to current task."""
+    def __init__(self, input_dim: int):
+        super().__init__()
+        self.input_dim = input_dim
+        self.prev_proj = nn.Linear(input_dim, input_dim)
+        self.value = nn.Linear(input_dim, input_dim)
+        self.key = nn.Linear(input_dim, input_dim)
+        self.query = nn.Linear(input_dim, input_dim)
+    def forward(self, prev_feat: torch.Tensor, curr_feat: torch.Tensor) -> torch.Tensor:
+        prev = self.prev_proj(prev_feat).unsqueeze(1)
+        curr = curr_feat.unsqueeze(1)
+        stacked = torch.cat([prev, curr], dim=1)
+        value = self.value(stacked)
+        key = self.key(stacked)
+        query = self.query(stacked)
+        attn_scores = torch.sum(key * query, dim=2, keepdim=True) / math.sqrt(
+            self.input_dim
+        )
+        attn = torch.softmax(attn_scores, dim=1)
+        return torch.sum(attn * value, dim=1)
+class AITM(BaseModel):
+    """
+    Attentive Information Transfer Multi-Task model.
+    AITM learns task-specific representations and transfers information from
+    task i-1 to task i via attention, enabling sequential task dependency modeling.
+    """
+    @property
+    def model_name(self):
+        return "AITM"
+    @property
+    def default_task(self):
+        nums_task = getattr(self, "nums_task", None)
+        if nums_task is not None and nums_task > 0:
+            return ["binary"] * nums_task
+        return ["binary"]
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | None = None,
+        sparse_features: list[SparseFeature] | None = None,
+        sequence_features: list[SequenceFeature] | None = None,
+        bottom_mlp_params: dict | list[dict] | None = None,
+        tower_mlp_params_list: list[dict] | None = None,
+        calibrator_alpha: float = 0.1,
+        target: list[str] | str | None = None,
+        task: list[TaskTypeName] | None = None,
+        **kwargs,
+    ):
+        dense_features = dense_features or []
+        sparse_features = sparse_features or []
+        sequence_features = sequence_features or []
+        bottom_mlp_params = bottom_mlp_params or {}
+        tower_mlp_params_list = tower_mlp_params_list or []
+        self.calibrator_alpha = calibrator_alpha
+        if target is None:
+            raise ValueError("AITM requires target names for all tasks.")
+        if isinstance(target, str):
+            target = [target]
+        self.nums_task = len(target)
+        if self.nums_task < 2:
+            raise ValueError("AITM requires at least 2 tasks.")
+        super(AITM, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=task,
+            **kwargs,
+        )
+        if len(tower_mlp_params_list) != self.nums_task:
+            raise ValueError(
+                "Number of tower mlp params "
+                f"({len(tower_mlp_params_list)}) must match number of tasks ({self.nums_task})."
+            )
+        bottom_mlp_params_list: list[dict]
+        if isinstance(bottom_mlp_params, list):
+            if len(bottom_mlp_params) != self.nums_task:
+                raise ValueError(
+                    "Number of bottom mlp params "
+                    f"({len(bottom_mlp_params)}) must match number of tasks ({self.nums_task})."
+                )
+            bottom_mlp_params_list = [params.copy() for params in bottom_mlp_params]
+        else:
+            bottom_mlp_params_list = [
+                bottom_mlp_params.copy() for _ in range(self.nums_task)
+            ]
+        self.embedding = EmbeddingLayer(features=self.all_features)
+        input_dim = self.embedding.input_dim
+        self.bottoms = nn.ModuleList(
+            [
+                MLP(input_dim=input_dim, output_dim=None, **params)
+                for params in bottom_mlp_params_list
+            ]
+        )
+        bottom_dims = [
+            get_mlp_output_dim(params, input_dim) for params in bottom_mlp_params_list
+        ]
+        if len(set(bottom_dims)) != 1:
+            raise ValueError(f"All bottom output dims must match, got {bottom_dims}.")
+        bottom_output_dim = bottom_dims[0]
+        self.transfers = nn.ModuleList(
+            [AITMTransfer(bottom_output_dim) for _ in range(self.nums_task - 1)]
+        )
+        self.grad_norm_shared_modules = ["embedding", "transfers"]
+        self.towers = nn.ModuleList(
+            [
+                MLP(input_dim=bottom_output_dim, output_dim=1, **params)
+                for params in tower_mlp_params_list
+            ]
+        )
+        self.prediction_layer = TaskHead(
+            task_type=self.task, task_dims=[1] * self.nums_task
+        )
+        self.register_regularization_weights(
+            embedding_attr="embedding",
+            include_modules=["bottoms", "transfers", "towers"],
+        )
+    def forward(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
+        input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
+        task_feats = [bottom(input_flat) for bottom in self.bottoms]
+        for idx in range(1, self.nums_task):
+            task_feats[idx] = self.transfers[idx - 1](
+                task_feats[idx - 1], task_feats[idx]
+            )
+        task_outputs = [tower(task_feats[idx]) for idx, tower in enumerate(self.towers)]
+        logits = torch.cat(task_outputs, dim=1)
+        return self.prediction_layer(logits)

nextrec 0.4.25__py3-none-any.whl → 0.4.28__py3-none-any.whl

nextrec 0.4.25py3-none-any.whl → 0.4.28py3-none-any.whl