PyPI - nextrec - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

nextrec 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +250 -112
nextrec/basic/loggers.py +63 -44
nextrec/basic/metrics.py +270 -120
nextrec/basic/model.py +1084 -402
nextrec/basic/session.py +10 -3
nextrec/cli.py +492 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +51 -45
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +273 -96
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +103 -38
nextrec/models/match/dssm.py +82 -68
nextrec/models/match/dssm_v2.py +72 -57
nextrec/models/match/mind.py +175 -107
nextrec/models/match/sdm.py +104 -87
nextrec/models/match/youtube_dnn.py +73 -59
nextrec/models/multi_task/esmm.py +69 -46
nextrec/models/multi_task/mmoe.py +91 -53
nextrec/models/multi_task/ple.py +117 -58
nextrec/models/multi_task/poso.py +163 -55
nextrec/models/multi_task/share_bottom.py +63 -36
nextrec/models/ranking/afm.py +80 -45
nextrec/models/ranking/autoint.py +74 -57
nextrec/models/ranking/dcn.py +110 -48
nextrec/models/ranking/dcn_v2.py +265 -45
nextrec/models/ranking/deepfm.py +39 -24
nextrec/models/ranking/dien.py +335 -146
nextrec/models/ranking/din.py +158 -92
nextrec/models/ranking/fibinet.py +134 -52
nextrec/models/ranking/fm.py +68 -26
nextrec/models/ranking/masknet.py +95 -33
nextrec/models/ranking/pnn.py +128 -58
nextrec/models/ranking/widedeep.py +40 -28
nextrec/models/ranking/xdeepfm.py +67 -40
nextrec/utils/__init__.py +59 -34
nextrec/utils/config.py +496 -0
nextrec/utils/device.py +30 -20
nextrec/utils/distributed.py +36 -9
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +33 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/model.py +22 -0
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +283 -165
nextrec/utils/tensor.py +24 -13
{nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/METADATA +53 -24
nextrec-0.4.3.dist-info/RECORD +69 -0
nextrec-0.4.3.dist-info/entry_points.txt +2 -0
nextrec-0.4.1.dist-info/RECORD +0 -66
{nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/WHEEL +0 -0
{nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/licenses/LICENSE +0 -0

nextrec/models/ranking/afm.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
 Date: create on 09/11/2025
-Checkpoint: edit on 06/12/2025
-Author: Yang Zhou,zyaztec@gmail.com
+Checkpoint: edit on 09/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
 Reference:
 [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of
 feature interactions via attention networks[C]//IJCAI. 2017: 3119-3125.
@@ -40,7 +40,7 @@ import torch
 import torch.nn as nn
 from nextrec.basic.model import BaseModel
-from nextrec.basic.layers import EmbeddingLayer, LR, PredictionLayer, InputMask
+from nextrec.basic.layers import EmbeddingLayer, PredictionLayer, InputMask
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
@@ -52,25 +52,35 @@ class AFM(BaseModel):
     @property
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature] | list = [],
-                 sparse_features: list[SparseFeature] | list = [],
-                 sequence_features: list[SequenceFeature] | list = [],
-                 attention_dim: int = 32,
-                 attention_dropout: float = 0.0,
-                 target: list[str] | list = [],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4, **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | None = None,
+        sparse_features: list[SparseFeature] | None = None,
+        sequence_features: list[SequenceFeature] | None = None,
+        attention_dim: int = 32,
+        attention_dropout: float = 0.0,
+        target: list[str] | str | None = None,
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict | None = None,
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
+        dense_features = dense_features or []
+        sparse_features = sparse_features or []
+        sequence_features = sequence_features or []
+        optimizer_params = optimizer_params or {}
+        if loss is None:
+            loss = "bce"
         super(AFM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -82,31 +92,32 @@ class AFM(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
-        if target is None:
-            target = []
-        if optimizer_params is None:
-            optimizer_params = {}
-        if loss is None:
-            loss = "bce"
         self.fm_features = sparse_features + sequence_features
         if len(self.fm_features) < 2:
-            raise ValueError("AFM requires at least two sparse/sequence features to build pairwise interactions.")
+            raise ValueError(
+                "AFM requires at least two sparse/sequence features to build pairwise interactions."
+            )
         # make sure all embedding dimension are the same for FM features
         self.embedding_dim = self.fm_features[0].embedding_dim
         if any(f.embedding_dim != self.embedding_dim for f in self.fm_features):
-            raise ValueError("All FM features must share the same embedding_dim for AFM.")
+            raise ValueError(
+                "All FM features must share the same embedding_dim for AFM."
+            )
-        self.embedding = EmbeddingLayer(features=self.fm_features) # [Batch, Field, Dim ]
+        self.embedding = EmbeddingLayer(
+            features=self.fm_features
+        )  # [Batch, Field, Dim ]
         # First-order terms: dense linear + one hot embeddings
         self.dense_features = list(dense_features)
         dense_input_dim = sum([f.input_dim for f in self.dense_features])
-        self.linear_dense = nn.Linear(dense_input_dim, 1, bias=True) if dense_input_dim > 0 else None
+        self.linear_dense = (
+            nn.Linear(dense_input_dim, 1, bias=True) if dense_input_dim > 0 else None
+        )
         # First-order term: sparse/sequence features one-hot
         # **INFO**: source paper does not contain sequence features in experiments,
@@ -114,9 +125,15 @@ class AFM(BaseModel):
         # remove sequence features from fm_features.
         self.first_order_embeddings = nn.ModuleDict()
         for feature in self.fm_features:
-            if feature.embedding_name in self.first_order_embeddings: # shared embedding
+            if (
+                feature.embedding_name in self.first_order_embeddings
+            ):  # shared embedding
                 continue
-            emb = nn.Embedding(num_embeddings=feature.vocab_size, embedding_dim=1, padding_idx=feature.padding_idx) # equal to one-hot encoding weight
+            emb = nn.Embedding(
+                num_embeddings=feature.vocab_size,
+                embedding_dim=1,
+                padding_idx=feature.padding_idx,
+            )  # equal to one-hot encoding weight
             # nn.init.zeros_(emb.weight)
             self.first_order_embeddings[feature.embedding_name] = emb
@@ -129,11 +146,18 @@ class AFM(BaseModel):
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['linear_dense', 'attention_linear', 'attention_p', 'output_projection']
+            embedding_attr="embedding",
+            include_modules=[
+                "linear_dense",
+                "attention_linear",
+                "attention_p",
+                "output_projection",
+            ],
         )
         # add first-order embeddings to embedding regularization list
-        self.embedding_params.extend(emb.weight for emb in self.first_order_embeddings.values())
+        self.embedding_params.extend(
+            emb.weight for emb in self.first_order_embeddings.values()
+        )
         self.compile(
             optimizer=optimizer,
@@ -143,13 +167,17 @@ class AFM(BaseModel):
         )
     def forward(self, x):
-        field_emb = self.embedding(x=x, features=self.fm_features, squeeze_dim=False)  # [B, F, D]
+        field_emb = self.embedding(
+            x=x, features=self.fm_features, squeeze_dim=False
+        )  # [B, F, D]
         batch_size = field_emb.size(0)
         y_linear = torch.zeros(batch_size, 1, device=field_emb.device)
         # First-order dense part
         if self.linear_dense is not None:
-            dense_inputs = [x[f.name].float().view(batch_size, -1) for f in self.dense_features]
+            dense_inputs = [
+                x[f.name].float().view(batch_size, -1) for f in self.dense_features
+            ]
             dense_stack = torch.cat(dense_inputs, dim=1) if dense_inputs else None
             if dense_stack is not None:
                 y_linear = y_linear + self.linear_dense(dense_stack)
@@ -161,7 +189,7 @@ class AFM(BaseModel):
             if isinstance(feature, SparseFeature):
                 term = emb(x[feature.name].long())  # [B, 1]
             else:  # SequenceFeature
-                seq_input = x[feature.name].long() # [B, 1]
+                seq_input = x[feature.name].long()  # [B, 1]
                 if feature.max_len is not None and seq_input.size(1) > feature.max_len:
                     seq_input = seq_input[:, -feature.max_len :]
                 mask = self.input_mask(x, feature, seq_input).squeeze(1)  # [B, 1]
@@ -169,7 +197,9 @@ class AFM(BaseModel):
                 term = (seq_weight * mask).sum(dim=1, keepdim=True)  # [B, 1]
             first_order_terms.append(term)
         if first_order_terms:
-            y_linear = y_linear + torch.sum(torch.cat(first_order_terms, dim=1), dim=1, keepdim=True)
+            y_linear = y_linear + torch.sum(
+                torch.cat(first_order_terms, dim=1), dim=1, keepdim=True
+            )
         interactions = []
         feature_values = []
@@ -182,13 +212,18 @@ class AFM(BaseModel):
             else:
                 if isinstance(feature, SequenceFeature):
                     seq_input = x[feature.name].long()
-                    if feature.max_len is not None and seq_input.size(1) > feature.max_len:
+                    if (
+                        feature.max_len is not None
+                        and seq_input.size(1) > feature.max_len
+                    ):
                         seq_input = seq_input[:, -feature.max_len :]
                     value = self.input_mask(x, feature, seq_input).sum(dim=2)  # [B, 1]
                 else:
                     value = torch.ones(batch_size, 1, device=field_emb.device)
             feature_values.append(value)
-        feature_values_tensor = torch.cat(feature_values, dim=1).unsqueeze(-1)  # [B, F, 1]
+        feature_values_tensor = torch.cat(feature_values, dim=1).unsqueeze(
+            -1
+        )  # [B, F, 1]
         field_emb = field_emb * feature_values_tensor
         num_fields = field_emb.shape[1]

nextrec/models/ranking/autoint.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """
 Date: create on 09/11/2025
-Checkpoint: edit on 24/11/2025
-Author: Yang Zhou,zyaztec@gmail.com
+Checkpoint: edit on 09/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
 Reference:
-[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
-self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
+[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
+self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
 on information and knowledge management. 2019: 1161-1170.
 (https://arxiv.org/abs/1810.11921)
@@ -70,29 +70,31 @@ class AutoInt(BaseModel):
     @property
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 att_layer_num: int = 3,
-                 att_embedding_dim: int = 8,
-                 att_head_num: int = 2,
-                 att_dropout: float = 0.0,
-                 att_use_residual: bool = True,
-                 target: list[str] | None = None,
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict | None = None,
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        att_layer_num: int = 3,
+        att_embedding_dim: int = 8,
+        att_head_num: int = 2,
+        att_dropout: float = 0.0,
+        att_use_residual: bool = True,
+        target: list[str] | None = None,
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict | None = None,
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(AutoInt, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -104,7 +106,7 @@ class AutoInt(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         if target is None:
@@ -113,52 +115,59 @@ class AutoInt(BaseModel):
             optimizer_params = {}
         if loss is None:
             loss = "bce"
         self.att_layer_num = att_layer_num
         self.att_embedding_dim = att_embedding_dim
         # Use sparse and sequence features for interaction
         # **INFO**: this is different from the original paper, we also include dense features
         # if you want to follow the paper strictly, set dense_features=[]
         # or modify the code accordingly
-        self.interaction_features = dense_features + sparse_features + sequence_features
+        self.interaction_features = dense_features + sparse_features + sequence_features
         # All features for embedding
         self.all_features = dense_features + sparse_features + sequence_features
         # Embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Project embeddings to attention embedding dimension
         num_fields = len(self.interaction_features)
         # If embeddings have different dimensions, project them to att_embedding_dim
-        self.need_projection = not all(f.embedding_dim == att_embedding_dim for f in self.interaction_features)
+        self.need_projection = not all(
+            f.embedding_dim == att_embedding_dim for f in self.interaction_features
+        )
         self.projection_layers = None
         if self.need_projection:
-            self.projection_layers = nn.ModuleList([
-                nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
-                for f in self.interaction_features
-            ])
+            self.projection_layers = nn.ModuleList(
+                [
+                    nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
+                    for f in self.interaction_features
+                ]
+            )
         # Multi-head self-attention layers
-        self.attention_layers = nn.ModuleList([
-            MultiHeadSelfAttention(
-                embedding_dim=att_embedding_dim,
-                num_heads=att_head_num,
-                dropout=att_dropout,
-                use_residual=att_use_residual
-            ) for _ in range(att_layer_num)
-        ])
+        self.attention_layers = nn.ModuleList(
+            [
+                MultiHeadSelfAttention(
+                    embedding_dim=att_embedding_dim,
+                    num_heads=att_head_num,
+                    dropout=att_dropout,
+                    use_residual=att_use_residual,
+                )
+                for _ in range(att_layer_num)
+            ]
+        )
         # Final prediction layer
         self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
         self.prediction_layer = PredictionLayer(task_type=self.default_task)
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['projection_layers', 'attention_layers', 'fc']
+            embedding_attr="embedding",
+            include_modules=["projection_layers", "attention_layers", "fc"],
         )
         self.compile(
@@ -172,21 +181,29 @@ class AutoInt(BaseModel):
         # Get embeddings field-by-field so mixed dimensions can be projected safely
         field_embeddings = []
         if len(self.interaction_features) == 0:
-            raise ValueError("AutoInt requires at least one sparse or sequence feature for interactions.")
+            raise ValueError(
+                "AutoInt requires at least one sparse or sequence feature for interactions."
+            )
         for idx, feature in enumerate(self.interaction_features):
             feature_emb = self.embedding(x=x, features=[feature], squeeze_dim=False)
             feature_emb = feature_emb.squeeze(1)  # [B, embedding_dim]
             if self.need_projection and self.projection_layers is not None:
                 feature_emb = self.projection_layers[idx](feature_emb)
-            field_embeddings.append(feature_emb.unsqueeze(1))  # [B, 1, att_embedding_dim or original_dim]
+            field_embeddings.append(
+                feature_emb.unsqueeze(1)
+            )  # [B, 1, att_embedding_dim or original_dim]
         embeddings = torch.cat(field_embeddings, dim=1)
         # Apply multi-head self-attention layers
         attention_output = embeddings
         for att_layer in self.attention_layers:
-            attention_output = att_layer(attention_output)  # [B, num_fields, att_embedding_dim]
+            attention_output = att_layer(
+                attention_output
+            )  # [B, num_fields, att_embedding_dim]
         # Flatten and predict
-        attention_output_flat = attention_output.flatten(start_dim=1)  # [B, num_fields * att_embedding_dim]
+        attention_output_flat = attention_output.flatten(
+            start_dim=1
+        )  # [B, num_fields * att_embedding_dim]
         y = self.fc(attention_output_flat)  # [B, 1]
         return self.prediction_layer(y)

nextrec/models/ranking/dcn.py CHANGED Viewed

@@ -1,11 +1,53 @@
 """
 Date: create on 09/11/2025
-Author:
-    Yang Zhou,zyaztec@gmail.com
+Checkpoint: edit on 09/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
 Reference:
-    [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]
-        //Proceedings of the ADKDD'17. 2017: 1-7.
-        (https://arxiv.org/abs/1708.05123)
+[1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]
+//Proceedings of the ADKDD'17. 2017: 1-7.
+(https://arxiv.org/abs/1708.05123)
+Deep & Cross Network (DCN) mixes explicit polynomial feature crosses with a deep
+MLP branch to capture both low-order and high-order interactions for CTR-style
+tasks. Cross Layers repeatedly apply x_{l+1} = x0 * (w_l^T x_l) + b_l + x_l,
+which expands feature crosses with linear parameter growth, while the deep branch
+learns nonlinear patterns on the same shared embeddings. The final prediction
+concatenates (or solely uses) cross outputs before a linear head, offering a
+balanced trade-off between interpretability and expressiveness.
+Workflow:
+  (1) Embed sparse/sequence features and concatenate with dense inputs
+  (2) Cross Network builds explicit polynomial interactions via residual crosses
+  (3) Optional MLP models implicit high-order nonlinear relationships
+  (4) Cross output (and deep output if enabled) are fused for the final logit
+  (5) Prediction layer maps logits to binary CTR scores
+Key Advantages:
+- Explicit, low-cost cross features with O(L * d) parameters
+- Residual cross formulation stabilizes optimization
+- Optional deep tower increases capacity without losing interpretability
+- Shared embeddings reduce redundant parameters and preprocessing
+- Strong, simple baseline for ad/recommendation ranking tasks
+DCN（Deep & Cross Network）通过 Cross 层显式生成多项式特征交互，同时可选 Deep
+分支学习高阶非线性关系，两者共享 embedding。Cross 层按
+x_{l+1} = x0 * (w_l^T x_l) + b_l + x_l 递推，参数线性增长且具解释性；
+Deep 分支提升表达能力；最终将 Cross（及 Deep）结果送入线性层与预测层，形成兼具
+效率与效果的 CTR/CVR 预估模型。
+流程：
+  (1) 对稀疏/序列特征做 embedding，并与稠密特征拼接
+  (2) Cross 层以残差形式显式构造多阶交叉特征
+  (3) 可选 MLP 学习隐式高阶非线性交互
+  (4) 将 Cross（及 Deep）输出融合后接线性头得到 logit
+  (5) 预测层输出二分类 CTR 分数
+主要优点：
+- 显式交叉特征、参数线性增长、易解释
+- 残差式 Cross 提升训练稳定性
+- Deep 分支可灵活增强模型容量
+- 共享 embedding，减少冗余参数与预处理
+- CTR/CVR 排序任务的简洁强基线
 """
 import torch
@@ -15,21 +57,27 @@ from nextrec.basic.model import BaseModel
 from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class CrossNetwork(nn.Module):
     """Stacked Cross Layers from DCN (Wang et al., 2017)."""
     def __init__(self, input_dim, num_layers):
         super().__init__()
         self.num_layers = num_layers
-        self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)])
-        self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])
+        self.w = torch.nn.ModuleList(
+            [torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)]
+        )
+        self.b = torch.nn.ParameterList(
+            [torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
+        )
     def forward(self, x):
         x0 = x
         for i in range(self.num_layers):
             xw = self.w[i](x)
             x = x0 * xw + self.b[i] + x
-        return x # [batch_size, input_dim]
+        return x  # [batch_size, input_dim]
 class DCN(BaseModel):
     @property
@@ -40,25 +88,34 @@ class DCN(BaseModel):
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 cross_num: int = 3,
-                 mlp_params: dict | None = None,
-                 target: list[str] = [],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | None = None,
+        sparse_features: list[SparseFeature] | None = None,
+        sequence_features: list[SequenceFeature] | None = None,
+        cross_num: int = 3,
+        mlp_params: dict | None = None,
+        target: list[str] | str | None = None,
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict | None = None,
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
+        dense_features = dense_features or []
+        sparse_features = sparse_features or []
+        sequence_features = sequence_features or []
+        optimizer_params = optimizer_params or {}
+        if loss is None:
+            loss = "bce"
         super(DCN, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -70,34 +127,37 @@ class DCN(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
-        self.loss = loss
-        if self.loss is None:
-            self.loss = "bce"
-        # All features
-        self.all_features = dense_features + sparse_features + sequence_features
         # Embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
-        emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
-        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        emb_dim_total = sum(
+            [
+                f.embedding_dim
+                for f in self.all_features
+                if not isinstance(f, DenseFeature)
+            ]
+        )
+        dense_input_dim = sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         input_dim = emb_dim_total + dense_input_dim
-        # Cross Network
+        # Cross Network for explicit feature crosses
         self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
-        # Deep Network (optional)
+        # Deep Network for implicit high-order interactions
         if mlp_params is not None:
             self.use_dnn = True
             self.mlp = MLP(input_dim=input_dim, **mlp_params)
             deep_dim = self.mlp.output_dim
             # Final layer combines cross and deep
-            self.final_layer = nn.Linear(input_dim + deep_dim, 1)  # + deep_dim for MLP output
+            self.final_layer = nn.Linear(
+                input_dim + deep_dim, 1
+            )  # + deep_dim for MLP output
         else:
             self.use_dnn = False
             # Final layer only uses cross network output
@@ -107,8 +167,8 @@ class DCN(BaseModel):
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['cross_network', 'mlp', 'final_layer']
+            embedding_attr="embedding",
+            include_modules=["cross_network", "mlp", "final_layer"],
         )
         self.compile(
@@ -121,18 +181,20 @@ class DCN(BaseModel):
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # Cross Network
         cross_output = self.cross_network(input_flat)  # [B, input_dim]
         if self.use_dnn:
             # Deep Network
             deep_output = self.mlp(input_flat)  # [B, 1]
             # Concatenate cross and deep
-            combined = torch.cat([cross_output, deep_output], dim=-1)  # [B, input_dim + 1]
+            combined = torch.cat(
+                [cross_output, deep_output], dim=-1
+            )  # [B, input_dim + 1]
         else:
             combined = cross_output
         # Final prediction
         y = self.final_layer(combined)
         return self.prediction_layer(y)

nextrec 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

nextrec 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl