PyPI - nextrec - Versions diffs - 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

nextrec 0.3.6py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +244 -113
nextrec/basic/loggers.py +62 -43
nextrec/basic/metrics.py +268 -119
nextrec/basic/model.py +1373 -443
nextrec/basic/session.py +10 -3
nextrec/cli.py +498 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +42 -24
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +303 -96
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +106 -40
nextrec/models/match/dssm.py +82 -69
nextrec/models/match/dssm_v2.py +72 -58
nextrec/models/match/mind.py +175 -108
nextrec/models/match/sdm.py +104 -88
nextrec/models/match/youtube_dnn.py +73 -60
nextrec/models/multi_task/esmm.py +53 -39
nextrec/models/multi_task/mmoe.py +70 -47
nextrec/models/multi_task/ple.py +107 -50
nextrec/models/multi_task/poso.py +121 -41
nextrec/models/multi_task/share_bottom.py +54 -38
nextrec/models/ranking/afm.py +172 -45
nextrec/models/ranking/autoint.py +84 -61
nextrec/models/ranking/dcn.py +59 -42
nextrec/models/ranking/dcn_v2.py +64 -23
nextrec/models/ranking/deepfm.py +36 -26
nextrec/models/ranking/dien.py +158 -102
nextrec/models/ranking/din.py +88 -60
nextrec/models/ranking/fibinet.py +55 -35
nextrec/models/ranking/fm.py +32 -26
nextrec/models/ranking/masknet.py +95 -34
nextrec/models/ranking/pnn.py +34 -31
nextrec/models/ranking/widedeep.py +37 -29
nextrec/models/ranking/xdeepfm.py +63 -41
nextrec/utils/__init__.py +61 -32
nextrec/utils/config.py +490 -0
nextrec/utils/device.py +52 -12
nextrec/utils/distributed.py +141 -0
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +32 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +531 -0
nextrec/utils/tensor.py +24 -13
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
nextrec-0.4.2.dist-info/RECORD +69 -0
nextrec-0.4.2.dist-info/entry_points.txt +2 -0
nextrec-0.3.6.dist-info/RECORD +0 -64
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/ranking/afm.py CHANGED Viewed

@@ -1,17 +1,46 @@
 """
 Date: create on 09/11/2025
-Author:
-    Yang Zhou,zyaztec@gmail.com
+Checkpoint: edit on 06/12/2025
+Author: Yang Zhou,zyaztec@gmail.com
 Reference:
-    [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of
-        feature interactions via attention networks[C]//IJCAI. 2017: 3119-3125.
+[1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of
+feature interactions via attention networks[C]//IJCAI. 2017: 3119-3125.
+Attentional Factorization Machine (AFM) builds on FM by learning an importance
+weight for every second-order interaction instead of treating all pairs equally.
+It retains FM’s linear (first-order) component for sparsity-friendly modeling,
+while using an attention network to reweight the element-wise product of field
+embeddings before aggregation.
+In each forward pass:
+  (1) Embed each field and compute pairwise element-wise products v_i ⊙ v_j
+  (2) Pass interactions through an attention MLP (ReLU + projection) to score them
+  (3) Softmax-normalize scores to obtain interaction weights
+  (4) Weighted sum of interactions -> linear projection -> add FM first-order term
+Key Advantages:
+- Learns which feature pairs contribute most via attention weights
+- Keeps FM efficiency and interpretability by preserving first-order terms
+- Softmax-normalized reweighting reduces noise from uninformative interactions
+AFM 在 FM 的二阶交互上引入注意力，为每个特征对学习重要性权重；同时保留 FM 的一阶项，
+保持对稀疏特征的友好与可解释性。具体流程：
+  (1) 对各字段做 embedding，并计算所有特征对的元素积 v_i ⊙ v_j
+  (2) 经由注意力 MLP（ReLU + 线性映射）得到交互得分
+  (3) 通过 softmax 归一化交互得分，得到权重
+  (4) 将加权交互求和、线性映射，再与一阶项相加得到最终预测
+主要优点：
+- 注意力显式告诉哪些特征对更重要
+- 保留 FM 的效率和可解释性
+- softmax 归一化减弱噪声交互的影响
 """
 import torch
 import torch.nn as nn
 from nextrec.basic.model import BaseModel
-from nextrec.basic.layers import EmbeddingLayer, LR, PredictionLayer
+from nextrec.basic.layers import EmbeddingLayer, PredictionLayer, InputMask
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
@@ -21,69 +50,113 @@ class AFM(BaseModel):
         return "AFM"
     @property
-    def task_type(self):
+    def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature] | list = [],
-                 sparse_features: list[SparseFeature] | list = [],
-                 sequence_features: list[SequenceFeature] | list = [],
-                 attention_dim: int = 32,
-                 attention_dropout: float = 0.0,
-                 target: list[str] | list = [],
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4, **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | list = [],
+        sparse_features: list[SparseFeature] | list = [],
+        sequence_features: list[SequenceFeature] | list = [],
+        attention_dim: int = 32,
+        attention_dropout: float = 0.0,
+        target: list[str] | list = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(AFM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
             sequence_features=sequence_features,
             target=target,
-            task=self.task_type,
+            task=task or self.default_task,
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            early_stop_patience=20,
-            **kwargs
+            **kwargs,
         )
-        self.loss = loss
-        if self.loss is None:
-            self.loss = "bce"
+        if target is None:
+            target = []
+        if optimizer_params is None:
+            optimizer_params = {}
+        if loss is None:
+            loss = "bce"
         self.fm_features = sparse_features + sequence_features
         if len(self.fm_features) < 2:
-            raise ValueError("AFM requires at least two sparse/sequence features to build pairwise interactions.")
+            raise ValueError(
+                "AFM requires at least two sparse/sequence features to build pairwise interactions."
+            )
-        # Assume uniform embedding dimension across FM fields
+        # make sure all embedding dimension are the same for FM features
         self.embedding_dim = self.fm_features[0].embedding_dim
         if any(f.embedding_dim != self.embedding_dim for f in self.fm_features):
-            raise ValueError("All FM features must share the same embedding_dim for AFM.")
-        self.embedding = EmbeddingLayer(features=self.fm_features)
+            raise ValueError(
+                "All FM features must share the same embedding_dim for AFM."
+            )
+        self.embedding = EmbeddingLayer(
+            features=self.fm_features
+        )  # [Batch, Field, Dim ]
+        # First-order terms: dense linear + one hot embeddings
+        self.dense_features = list(dense_features)
+        dense_input_dim = sum([f.input_dim for f in self.dense_features])
+        self.linear_dense = (
+            nn.Linear(dense_input_dim, 1, bias=True) if dense_input_dim > 0 else None
+        )
-        fm_input_dim = sum([f.embedding_dim for f in self.fm_features])
-        self.linear = LR(fm_input_dim)
+        # First-order term: sparse/sequence features one-hot
+        # **INFO**: source paper does not contain sequence features in experiments,
+        # but we implement it here for completeness. if you want follow the paper strictly,
+        # remove sequence features from fm_features.
+        self.first_order_embeddings = nn.ModuleDict()
+        for feature in self.fm_features:
+            if (
+                feature.embedding_name in self.first_order_embeddings
+            ):  # shared embedding
+                continue
+            emb = nn.Embedding(
+                num_embeddings=feature.vocab_size,
+                embedding_dim=1,
+                padding_idx=feature.padding_idx,
+            )  # equal to one-hot encoding weight
+            # nn.init.zeros_(emb.weight)
+            self.first_order_embeddings[feature.embedding_name] = emb
         self.attention_linear = nn.Linear(self.embedding_dim, attention_dim)
         self.attention_p = nn.Linear(attention_dim, 1, bias=False)
         self.attention_dropout = nn.Dropout(attention_dropout)
         self.output_projection = nn.Linear(self.embedding_dim, 1, bias=False)
-        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        self.prediction_layer = PredictionLayer(task_type=self.default_task)
+        self.input_mask = InputMask()
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['linear', 'attention_linear', 'attention_p', 'output_projection']
+            embedding_attr="embedding",
+            include_modules=[
+                "linear_dense",
+                "attention_linear",
+                "attention_p",
+                "output_projection",
+            ],
+        )
+        # add first-order embeddings to embedding regularization list
+        self.embedding_params.extend(
+            emb.weight for emb in self.first_order_embeddings.values()
         )
         self.compile(
@@ -94,11 +167,65 @@ class AFM(BaseModel):
         )
     def forward(self, x):
-        field_emb = self.embedding(x=x, features=self.fm_features, squeeze_dim=False)  # [B, F, D]
-        input_linear = field_emb.flatten(start_dim=1)
-        y_linear = self.linear(input_linear)
+        field_emb = self.embedding(
+            x=x, features=self.fm_features, squeeze_dim=False
+        )  # [B, F, D]
+        batch_size = field_emb.size(0)
+        y_linear = torch.zeros(batch_size, 1, device=field_emb.device)
+        # First-order dense part
+        if self.linear_dense is not None:
+            dense_inputs = [
+                x[f.name].float().view(batch_size, -1) for f in self.dense_features
+            ]
+            dense_stack = torch.cat(dense_inputs, dim=1) if dense_inputs else None
+            if dense_stack is not None:
+                y_linear = y_linear + self.linear_dense(dense_stack)
+        # First-order sparse/sequence part
+        first_order_terms = []
+        for feature in self.fm_features:
+            emb = self.first_order_embeddings[feature.embedding_name]
+            if isinstance(feature, SparseFeature):
+                term = emb(x[feature.name].long())  # [B, 1]
+            else:  # SequenceFeature
+                seq_input = x[feature.name].long()  # [B, 1]
+                if feature.max_len is not None and seq_input.size(1) > feature.max_len:
+                    seq_input = seq_input[:, -feature.max_len :]
+                mask = self.input_mask(x, feature, seq_input).squeeze(1)  # [B, 1]
+                seq_weight = emb(seq_input).squeeze(-1)  # [B, L]
+                term = (seq_weight * mask).sum(dim=1, keepdim=True)  # [B, 1]
+            first_order_terms.append(term)
+        if first_order_terms:
+            y_linear = y_linear + torch.sum(
+                torch.cat(first_order_terms, dim=1), dim=1, keepdim=True
+            )
         interactions = []
+        feature_values = []
+        for feature in self.fm_features:
+            value = x.get(f"{feature.name}_value")
+            if value is not None:
+                value = value.float()
+                if value.dim() == 1:
+                    value = value.unsqueeze(-1)
+            else:
+                if isinstance(feature, SequenceFeature):
+                    seq_input = x[feature.name].long()
+                    if (
+                        feature.max_len is not None
+                        and seq_input.size(1) > feature.max_len
+                    ):
+                        seq_input = seq_input[:, -feature.max_len :]
+                    value = self.input_mask(x, feature, seq_input).sum(dim=2)  # [B, 1]
+                else:
+                    value = torch.ones(batch_size, 1, device=field_emb.device)
+            feature_values.append(value)
+        feature_values_tensor = torch.cat(feature_values, dim=1).unsqueeze(
+            -1
+        )  # [B, F, 1]
+        field_emb = field_emb * feature_values_tensor
         num_fields = field_emb.shape[1]
         for i in range(num_fields - 1):
             vi = field_emb[:, i, :]
@@ -107,7 +234,7 @@ class AFM(BaseModel):
                 interactions.append(vi * vj)
         pair_tensor = torch.stack(interactions, dim=1)  # [B, num_pairs, D]
-        attention_scores = torch.tanh(self.attention_linear(pair_tensor))
+        attention_scores = torch.relu(self.attention_linear(pair_tensor))
         attention_scores = self.attention_p(attention_scores)  # [B, num_pairs, 1]
         attention_weights = torch.softmax(attention_scores, dim=1)

nextrec/models/ranking/autoint.py CHANGED Viewed

@@ -3,8 +3,8 @@ Date: create on 09/11/2025
 Checkpoint: edit on 24/11/2025
 Author: Yang Zhou,zyaztec@gmail.com
 Reference:
-[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
-self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
+[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
+self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
 on information and knowledge management. 2019: 1161-1170.
 (https://arxiv.org/abs/1810.11921)
@@ -68,91 +68,106 @@ class AutoInt(BaseModel):
         return "AutoInt"
     @property
-    def task_type(self):
+    def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 att_layer_num: int = 3,
-                 att_embedding_dim: int = 8,
-                 att_head_num: int = 2,
-                 att_dropout: float = 0.0,
-                 att_use_residual: bool = True,
-                 target: list[str] = [],
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        att_layer_num: int = 3,
+        att_embedding_dim: int = 8,
+        att_head_num: int = 2,
+        att_dropout: float = 0.0,
+        att_use_residual: bool = True,
+        target: list[str] | None = None,
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict | None = None,
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(AutoInt, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
             sequence_features=sequence_features,
             target=target,
-            task=self.task_type,
+            task=task or self.default_task,
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            early_stop_patience=20,
-            **kwargs
+            **kwargs,
         )
-        self.loss = loss
-        if self.loss is None:
-            self.loss = "bce"
+        if target is None:
+            target = []
+        if optimizer_params is None:
+            optimizer_params = {}
+        if loss is None:
+            loss = "bce"
         self.att_layer_num = att_layer_num
         self.att_embedding_dim = att_embedding_dim
         # Use sparse and sequence features for interaction
-        self.interaction_features = dense_features + sparse_features + sequence_features
+        # **INFO**: this is different from the original paper, we also include dense features
+        # if you want to follow the paper strictly, set dense_features=[]
+        # or modify the code accordingly
+        self.interaction_features = dense_features + sparse_features + sequence_features
         # All features for embedding
         self.all_features = dense_features + sparse_features + sequence_features
         # Embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Project embeddings to attention embedding dimension
         num_fields = len(self.interaction_features)
         # If embeddings have different dimensions, project them to att_embedding_dim
-        self.need_projection = not all(f.embedding_dim == att_embedding_dim for f in self.interaction_features)
+        self.need_projection = not all(
+            f.embedding_dim == att_embedding_dim for f in self.interaction_features
+        )
         self.projection_layers = None
         if self.need_projection:
-            self.projection_layers = nn.ModuleList([
-                nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
-                for f in self.interaction_features
-            ])
+            self.projection_layers = nn.ModuleList(
+                [
+                    nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
+                    for f in self.interaction_features
+                ]
+            )
         # Multi-head self-attention layers
-        self.attention_layers = nn.ModuleList([
-            MultiHeadSelfAttention(
-                embedding_dim=att_embedding_dim,
-                num_heads=att_head_num,
-                dropout=att_dropout,
-                use_residual=att_use_residual
-            ) for _ in range(att_layer_num)
-        ])
+        self.attention_layers = nn.ModuleList(
+            [
+                MultiHeadSelfAttention(
+                    embedding_dim=att_embedding_dim,
+                    num_heads=att_head_num,
+                    dropout=att_dropout,
+                    use_residual=att_use_residual,
+                )
+                for _ in range(att_layer_num)
+            ]
+        )
         # Final prediction layer
         self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
-        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        self.prediction_layer = PredictionLayer(task_type=self.default_task)
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['projection_layers', 'attention_layers', 'fc']
+            embedding_attr="embedding",
+            include_modules=["projection_layers", "attention_layers", "fc"],
         )
         self.compile(
@@ -166,21 +181,29 @@ class AutoInt(BaseModel):
         # Get embeddings field-by-field so mixed dimensions can be projected safely
         field_embeddings = []
         if len(self.interaction_features) == 0:
-            raise ValueError("AutoInt requires at least one sparse or sequence feature for interactions.")
+            raise ValueError(
+                "AutoInt requires at least one sparse or sequence feature for interactions."
+            )
         for idx, feature in enumerate(self.interaction_features):
             feature_emb = self.embedding(x=x, features=[feature], squeeze_dim=False)
             feature_emb = feature_emb.squeeze(1)  # [B, embedding_dim]
             if self.need_projection and self.projection_layers is not None:
                 feature_emb = self.projection_layers[idx](feature_emb)
-            field_embeddings.append(feature_emb.unsqueeze(1))  # [B, 1, att_embedding_dim or original_dim]
+            field_embeddings.append(
+                feature_emb.unsqueeze(1)
+            )  # [B, 1, att_embedding_dim or original_dim]
         embeddings = torch.cat(field_embeddings, dim=1)
         # Apply multi-head self-attention layers
         attention_output = embeddings
         for att_layer in self.attention_layers:
-            attention_output = att_layer(attention_output)  # [B, num_fields, att_embedding_dim]
+            attention_output = att_layer(
+                attention_output
+            )  # [B, num_fields, att_embedding_dim]
         # Flatten and predict
-        attention_output_flat = attention_output.flatten(start_dim=1)  # [B, num_fields * att_embedding_dim]
+        attention_output_flat = attention_output.flatten(
+            start_dim=1
+        )  # [B, num_fields * att_embedding_dim]
         y = self.fc(attention_output_flat)  # [B, 1]
         return self.prediction_layer(y)

nextrec/models/ranking/dcn.py CHANGED Viewed

@@ -15,24 +15,26 @@ from nextrec.basic.model import BaseModel
 from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class CrossNetwork(nn.Module):
     """Stacked Cross Layers from DCN (Wang et al., 2017)."""
     def __init__(self, input_dim, num_layers):
         super().__init__()
         self.num_layers = num_layers
-        self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)])
-        self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])
+        self.w = torch.nn.ModuleList(
+            [torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)]
+        )
+        self.b = torch.nn.ParameterList(
+            [torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
+        )
     def forward(self, x):
-        """
-        :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)``
-        """
         x0 = x
         for i in range(self.num_layers):
             xw = self.w[i](x)
             x = x0 * xw + self.b[i] + x
-        return x
+        return x  # [batch_size, input_dim]
 class DCN(BaseModel):
@@ -41,46 +43,48 @@ class DCN(BaseModel):
         return "DCN"
     @property
-    def task_type(self):
+    def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 cross_num: int = 3,
-                 mlp_params: dict | None = None,
-                 target: list[str] = [],
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        cross_num: int = 3,
+        mlp_params: dict | None = None,
+        target: list[str] = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(DCN, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
             sequence_features=sequence_features,
             target=target,
-            task=self.task_type,
+            task=task or self.default_task,
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            early_stop_patience=20,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # All features
         self.all_features = dense_features + sparse_features + sequence_features
@@ -88,30 +92,41 @@ class DCN(BaseModel):
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
-        emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
-        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        emb_dim_total = sum(
+            [
+                f.embedding_dim
+                for f in self.all_features
+                if not isinstance(f, DenseFeature)
+            ]
+        )
+        dense_input_dim = sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         input_dim = emb_dim_total + dense_input_dim
         # Cross Network
         self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
         # Deep Network (optional)
         if mlp_params is not None:
             self.use_dnn = True
             self.mlp = MLP(input_dim=input_dim, **mlp_params)
+            deep_dim = self.mlp.output_dim
             # Final layer combines cross and deep
-            self.final_layer = nn.Linear(input_dim + 1, 1)  # +1 for MLP output
+            self.final_layer = nn.Linear(
+                input_dim + deep_dim, 1
+            )  # + deep_dim for MLP output
         else:
             self.use_dnn = False
             # Final layer only uses cross network output
             self.final_layer = nn.Linear(input_dim, 1)
-        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        self.prediction_layer = PredictionLayer(task_type=self.task)
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['cross_network', 'mlp', 'final_layer']
+            embedding_attr="embedding",
+            include_modules=["cross_network", "mlp", "final_layer"],
         )
         self.compile(
@@ -124,18 +139,20 @@ class DCN(BaseModel):
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # Cross Network
         cross_output = self.cross_network(input_flat)  # [B, input_dim]
         if self.use_dnn:
             # Deep Network
             deep_output = self.mlp(input_flat)  # [B, 1]
             # Concatenate cross and deep
-            combined = torch.cat([cross_output, deep_output], dim=-1)  # [B, input_dim + 1]
+            combined = torch.cat(
+                [cross_output, deep_output], dim=-1
+            )  # [B, input_dim + 1]
         else:
             combined = cross_output
         # Final prediction
         y = self.final_layer(combined)
         return self.prediction_layer(y)

nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl

nextrec 0.3.6py3-none-any.whl → 0.4.2py3-none-any.whl