PyPI - nextrec - Versions diffs - 0.1.1__py3-none-any.whl - Mend

nextrec 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

nextrec/__init__.py +41 -0
nextrec/__version__.py +1 -0
nextrec/basic/__init__.py +0 -0
nextrec/basic/activation.py +92 -0
nextrec/basic/callback.py +35 -0
nextrec/basic/dataloader.py +447 -0
nextrec/basic/features.py +87 -0
nextrec/basic/layers.py +985 -0
nextrec/basic/loggers.py +124 -0
nextrec/basic/metrics.py +557 -0
nextrec/basic/model.py +1438 -0
nextrec/data/__init__.py +27 -0
nextrec/data/data_utils.py +132 -0
nextrec/data/preprocessor.py +662 -0
nextrec/loss/__init__.py +35 -0
nextrec/loss/loss_utils.py +136 -0
nextrec/loss/match_losses.py +294 -0
nextrec/models/generative/hstu.py +0 -0
nextrec/models/generative/tiger.py +0 -0
nextrec/models/match/__init__.py +13 -0
nextrec/models/match/dssm.py +200 -0
nextrec/models/match/dssm_v2.py +162 -0
nextrec/models/match/mind.py +210 -0
nextrec/models/match/sdm.py +253 -0
nextrec/models/match/youtube_dnn.py +172 -0
nextrec/models/multi_task/esmm.py +129 -0
nextrec/models/multi_task/mmoe.py +161 -0
nextrec/models/multi_task/ple.py +260 -0
nextrec/models/multi_task/share_bottom.py +126 -0
nextrec/models/ranking/__init__.py +17 -0
nextrec/models/ranking/afm.py +118 -0
nextrec/models/ranking/autoint.py +140 -0
nextrec/models/ranking/dcn.py +120 -0
nextrec/models/ranking/deepfm.py +95 -0
nextrec/models/ranking/dien.py +214 -0
nextrec/models/ranking/din.py +181 -0
nextrec/models/ranking/fibinet.py +130 -0
nextrec/models/ranking/fm.py +87 -0
nextrec/models/ranking/masknet.py +125 -0
nextrec/models/ranking/pnn.py +128 -0
nextrec/models/ranking/widedeep.py +105 -0
nextrec/models/ranking/xdeepfm.py +117 -0
nextrec/utils/__init__.py +18 -0
nextrec/utils/common.py +14 -0
nextrec/utils/embedding.py +19 -0
nextrec/utils/initializer.py +47 -0
nextrec/utils/optimizer.py +75 -0
nextrec-0.1.1.dist-info/METADATA +302 -0
nextrec-0.1.1.dist-info/RECORD +51 -0
nextrec-0.1.1.dist-info/WHEEL +4 -0
nextrec-0.1.1.dist-info/licenses/LICENSE +21 -0

nextrec/models/ranking/fibinet.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""
+Date: create on 09/11/2025
+Author:
+    Yang Zhou,zyaztec@gmail.com
+Reference:
+    [1] Huang T, Zhang Z, Zhang B, et al. FiBiNET: Combining feature importance and bilinear feature interaction
+        for click-through rate prediction[C]//RecSys. 2019: 169-177.
+"""
+import torch
+import torch.nn as nn
+from nextrec.basic.model import BaseModel
+from nextrec.basic.layers import (
+    BiLinearInteractionLayer,
+    EmbeddingLayer,
+    LR,
+    MLP,
+    PredictionLayer,
+    SENETLayer,
+)
+from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
+class FiBiNET(BaseModel):
+    @property
+    def model_name(self):
+        return "FiBiNET"
+    @property
+    def task_type(self):
+        return "binary"
+    def __init__(self,
+                 dense_features: list[DenseFeature] | list = [],
+                 sparse_features: list[SparseFeature] | list = [],
+                 sequence_features: list[SequenceFeature] | list = [],
+                 mlp_params: dict = {},
+                 bilinear_type: str = "field_interaction",
+                 senet_reduction: int = 3,
+                 target: list[str] | list = [],
+                 optimizer: str = "adam",
+                 optimizer_params: dict = {},
+                 loss: str | nn.Module | None = "bce",
+                 device: str = 'cpu',
+                 model_id: str = "baseline",
+                 embedding_l1_reg=1e-6,
+                 dense_l1_reg=1e-5,
+                 embedding_l2_reg=1e-5,
+                 dense_l2_reg=1e-4):
+        super(FiBiNET, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=self.task_type,
+            device=device,
+            embedding_l1_reg=embedding_l1_reg,
+            dense_l1_reg=dense_l1_reg,
+            embedding_l2_reg=embedding_l2_reg,
+            dense_l2_reg=dense_l2_reg,
+            early_stop_patience=20,
+            model_id=model_id
+        )
+        self.loss = loss
+        if self.loss is None:
+            self.loss = "bce"
+        self.linear_features = sparse_features + sequence_features
+        self.deep_features = dense_features + sparse_features + sequence_features
+        self.interaction_features = sparse_features + sequence_features
+        if len(self.interaction_features) < 2:
+            raise ValueError("FiBiNET requires at least two sparse/sequence features for interactions.")
+        self.embedding = EmbeddingLayer(features=self.deep_features)
+        self.num_fields = len(self.interaction_features)
+        self.embedding_dim = self.interaction_features[0].embedding_dim
+        if any(f.embedding_dim != self.embedding_dim for f in self.interaction_features):
+            raise ValueError("All interaction features must share the same embedding_dim in FiBiNET.")
+        self.senet = SENETLayer(num_fields=self.num_fields, reduction_ratio=senet_reduction)
+        self.bilinear_standard = BiLinearInteractionLayer(
+            input_dim=self.embedding_dim,
+            num_fields=self.num_fields,
+            bilinear_type=bilinear_type,
+        )
+        self.bilinear_senet = BiLinearInteractionLayer(
+            input_dim=self.embedding_dim,
+            num_fields=self.num_fields,
+            bilinear_type=bilinear_type,
+        )
+        linear_dim = sum([f.embedding_dim for f in self.linear_features])
+        self.linear = LR(linear_dim)
+        num_pairs = self.num_fields * (self.num_fields - 1) // 2
+        interaction_dim = num_pairs * self.embedding_dim * 2
+        self.mlp = MLP(input_dim=interaction_dim, **mlp_params)
+        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        # Register regularization weights
+        self._register_regularization_weights(
+            embedding_attr='embedding',
+            include_modules=['linear', 'senet', 'bilinear_standard', 'bilinear_senet', 'mlp']
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss
+        )
+    def forward(self, x):
+        input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=True)
+        y_linear = self.linear(input_linear)
+        field_emb = self.embedding(x=x, features=self.interaction_features, squeeze_dim=False)
+        senet_emb = self.senet(field_emb)
+        bilinear_standard = self.bilinear_standard(field_emb).flatten(start_dim=1)
+        bilinear_senet = self.bilinear_senet(senet_emb).flatten(start_dim=1)
+        deep_input = torch.cat([bilinear_standard, bilinear_senet], dim=1)
+        y_deep = self.mlp(deep_input)
+        y = y_linear + y_deep
+        return self.prediction_layer(y)

nextrec/models/ranking/fm.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""
+Date: create on 09/11/2025
+Author:
+    Yang Zhou,zyaztec@gmail.com
+Reference:
+    [1] Rendle S. Factorization machines[C]//ICDM. 2010: 995-1000.
+"""
+import torch.nn as nn
+from nextrec.basic.model import BaseModel
+from nextrec.basic.layers import EmbeddingLayer, FM as FMInteraction, LR, PredictionLayer
+from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
+class FM(BaseModel):
+    @property
+    def model_name(self):
+        return "FM"
+    @property
+    def task_type(self):
+        return "binary"
+    def __init__(self,
+                 dense_features: list[DenseFeature] | list = [],
+                 sparse_features: list[SparseFeature] | list = [],
+                 sequence_features: list[SequenceFeature] | list = [],
+                 target: list[str] | list = [],
+                 optimizer: str = "adam",
+                 optimizer_params: dict = {},
+                 loss: str | nn.Module | None = "bce",
+                 device: str = 'cpu',
+                 model_id: str = "baseline",
+                 embedding_l1_reg=1e-6,
+                 dense_l1_reg=1e-5,
+                 embedding_l2_reg=1e-5,
+                 dense_l2_reg=1e-4):
+        super(FM, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=self.task_type,
+            device=device,
+            embedding_l1_reg=embedding_l1_reg,
+            dense_l1_reg=dense_l1_reg,
+            embedding_l2_reg=embedding_l2_reg,
+            dense_l2_reg=dense_l2_reg,
+            early_stop_patience=20,
+            model_id=model_id
+        )
+        self.loss = loss
+        if self.loss is None:
+            self.loss = "bce"
+        self.fm_features = sparse_features + sequence_features
+        if len(self.fm_features) == 0:
+            raise ValueError("FM requires at least one sparse or sequence feature.")
+        self.embedding = EmbeddingLayer(features=self.fm_features)
+        fm_input_dim = sum([f.embedding_dim for f in self.fm_features])
+        self.linear = LR(fm_input_dim)
+        self.fm = FMInteraction(reduce_sum=True)
+        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        # Register regularization weights
+        self._register_regularization_weights(
+            embedding_attr='embedding',
+            include_modules=['linear']
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss
+        )
+    def forward(self, x):
+        input_fm = self.embedding(x=x, features=self.fm_features, squeeze_dim=False)
+        y_linear = self.linear(input_fm.flatten(start_dim=1))
+        y_fm = self.fm(input_fm)
+        y = y_linear + y_fm
+        return self.prediction_layer(y)

nextrec/models/ranking/masknet.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""
+Date: create on 09/11/2025
+Author:
+    Yang Zhou,zyaztec@gmail.com
+Reference:
+    [1] Pan Z, Sun F, Liu J, et al. MaskNet: Introducing feature-wise gating blocks for high-dimensional
+        sparse recommendation data (CCF-Tencent CTR competition solution, 2020).
+"""
+import torch
+import torch.nn as nn
+from nextrec.basic.model import BaseModel
+from nextrec.basic.layers import EmbeddingLayer, LR, MLP, PredictionLayer
+from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
+class MaskNet(BaseModel):
+    @property
+    def model_name(self):
+        return "MaskNet"
+    @property
+    def task_type(self):
+        return "binary"
+    def __init__(self,
+                 dense_features: list[DenseFeature] | list = [],
+                 sparse_features: list[SparseFeature] | list = [],
+                 sequence_features: list[SequenceFeature] | list = [],
+                 num_blocks: int = 3,
+                 mask_hidden_dim: int = 64,
+                 block_dropout: float = 0.1,
+                 mlp_params: dict = {},
+                 target: list[str] | list = [],
+                 optimizer: str = "adam",
+                 optimizer_params: dict = {},
+                 loss: str | nn.Module | None = "bce",
+                 device: str = 'cpu',
+                 model_id: str = "baseline",
+                 embedding_l1_reg=1e-6,
+                 dense_l1_reg=1e-5,
+                 embedding_l2_reg=1e-5,
+                 dense_l2_reg=1e-4):
+        super(MaskNet, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=self.task_type,
+            device=device,
+            embedding_l1_reg=embedding_l1_reg,
+            dense_l1_reg=dense_l1_reg,
+            embedding_l2_reg=embedding_l2_reg,
+            dense_l2_reg=dense_l2_reg,
+            early_stop_patience=20,
+            model_id=model_id
+        )
+        self.loss = loss
+        if self.loss is None:
+            self.loss = "bce"
+        self.mask_features = sparse_features + sequence_features
+        if len(self.mask_features) == 0:
+            raise ValueError("MaskNet requires at least one sparse/sequence feature.")
+        self.embedding = EmbeddingLayer(features=self.mask_features)
+        self.num_fields = len(self.mask_features)
+        self.embedding_dim = self.mask_features[0].embedding_dim
+        if any(f.embedding_dim != self.embedding_dim for f in self.mask_features):
+            raise ValueError("MaskNet expects identical embedding_dim across mask_features.")
+        self.num_blocks = max(1, num_blocks)
+        self.field_dim = self.num_fields * self.embedding_dim
+        self.linear = LR(self.field_dim)
+        self.mask_generators = nn.ModuleList()
+        for _ in range(self.num_blocks):
+            self.mask_generators.append(
+                nn.Sequential(
+                    nn.Linear(self.field_dim, mask_hidden_dim),
+                    nn.ReLU(),
+                    nn.Linear(mask_hidden_dim, self.num_fields)
+                )
+            )
+        self.block_dropout = nn.Dropout(block_dropout)
+        self.final_mlp = MLP(input_dim=self.field_dim * self.num_blocks, **mlp_params)
+        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        self._register_regularization_weights(
+            embedding_attr='embedding',
+            include_modules=['linear', 'mask_generators', 'final_mlp']
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss
+        )
+    def forward(self, x):
+        field_emb = self.embedding(x=x, features=self.mask_features, squeeze_dim=False)
+        flat_input = field_emb.flatten(start_dim=1)
+        y_linear = self.linear(flat_input)
+        block_input = field_emb
+        mask_input = flat_input
+        block_outputs = []
+        for mask_gen in self.mask_generators:
+            mask_logits = mask_gen(mask_input)
+            mask = torch.sigmoid(mask_logits).unsqueeze(-1)
+            masked_emb = block_input * mask
+            block_output = self.block_dropout(masked_emb.flatten(start_dim=1))
+            block_outputs.append(block_output)
+            mask_input = block_output
+            block_input = masked_emb.view_as(field_emb)
+        stacked = torch.cat(block_outputs, dim=1)
+        y_deep = self.final_mlp(stacked)
+        y = y_linear + y_deep
+        return self.prediction_layer(y)

nextrec/models/ranking/pnn.py ADDED Viewed

@@ -0,0 +1,128 @@
+"""
+Date: create on 09/11/2025
+Author:
+    Yang Zhou,zyaztec@gmail.com
+Reference:
+    [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//ICDM. 2016: 1149-1154.
+"""
+import torch
+import torch.nn as nn
+from nextrec.basic.model import BaseModel
+from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
+from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
+class PNN(BaseModel):
+    @property
+    def model_name(self):
+        return "PNN"
+    @property
+    def task_type(self):
+        return "binary"
+    def __init__(self,
+                 dense_features: list[DenseFeature] | list = [],
+                 sparse_features: list[SparseFeature] | list = [],
+                 sequence_features: list[SequenceFeature] | list = [],
+                 mlp_params: dict = {},
+                 product_type: str = "inner",
+                 outer_product_dim: int | None = None,
+                 target: list[str] | list = [],
+                 optimizer: str = "adam",
+                 optimizer_params: dict = {},
+                 loss: str | nn.Module | None = "bce",
+                 device: str = 'cpu',
+                 model_id: str = "baseline",
+                 embedding_l1_reg=1e-6,
+                 dense_l1_reg=1e-5,
+                 embedding_l2_reg=1e-5,
+                 dense_l2_reg=1e-4):
+        super(PNN, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=self.task_type,
+            device=device,
+            embedding_l1_reg=embedding_l1_reg,
+            dense_l1_reg=dense_l1_reg,
+            embedding_l2_reg=embedding_l2_reg,
+            dense_l2_reg=dense_l2_reg,
+            early_stop_patience=20,
+            model_id=model_id
+        )
+        self.loss = loss
+        if self.loss is None:
+            self.loss = "bce"
+        self.field_features = sparse_features + sequence_features
+        if len(self.field_features) < 2:
+            raise ValueError("PNN requires at least two sparse/sequence features.")
+        self.embedding = EmbeddingLayer(features=self.field_features)
+        self.num_fields = len(self.field_features)
+        self.embedding_dim = self.field_features[0].embedding_dim
+        if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
+            raise ValueError("All field features must share the same embedding_dim for PNN.")
+        self.product_type = product_type.lower()
+        if self.product_type not in {"inner", "outer"}:
+            raise ValueError("product_type must be 'inner' or 'outer'.")
+        self.num_pairs = self.num_fields * (self.num_fields - 1) // 2
+        if self.product_type == "outer":
+            self.outer_dim = outer_product_dim or self.embedding_dim
+            self.kernel = nn.Linear(self.embedding_dim, self.outer_dim, bias=False)
+            product_dim = self.num_pairs * self.outer_dim
+        else:
+            self.outer_dim = None
+            product_dim = self.num_pairs
+        linear_dim = self.num_fields * self.embedding_dim
+        self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
+        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        modules = ['mlp']
+        if self.product_type == "outer":
+            modules.append('kernel')
+        self._register_regularization_weights(
+            embedding_attr='embedding',
+            include_modules=modules
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss
+        )
+    def forward(self, x):
+        field_emb = self.embedding(x=x, features=self.field_features, squeeze_dim=False)
+        linear_signal = field_emb.flatten(start_dim=1)
+        if self.product_type == "inner":
+            interactions = []
+            for i in range(self.num_fields - 1):
+                vi = field_emb[:, i, :]
+                for j in range(i + 1, self.num_fields):
+                    vj = field_emb[:, j, :]
+                    interactions.append(torch.sum(vi * vj, dim=1, keepdim=True))
+            product_signal = torch.cat(interactions, dim=1)
+        else:
+            transformed = self.kernel(field_emb)  # [B, F, outer_dim]
+            interactions = []
+            for i in range(self.num_fields - 1):
+                vi = transformed[:, i, :]
+                for j in range(i + 1, self.num_fields):
+                    vj = transformed[:, j, :]
+                    interactions.append(vi * vj)
+            product_signal = torch.stack(interactions, dim=1).flatten(start_dim=1)
+        deep_input = torch.cat([linear_signal, product_signal], dim=1)
+        y = self.mlp(deep_input)
+        return self.prediction_layer(y)

nextrec/models/ranking/widedeep.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""
+Date: create on 09/11/2025
+Author:
+    Yang Zhou,zyaztec@gmail.com
+Reference:
+    [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]
+        //Proceedings of the 1st workshop on deep learning for recommender systems. 2016: 7-10.
+        (https://arxiv.org/abs/1606.07792)
+"""
+import torch
+import torch.nn as nn
+from nextrec.basic.model import BaseModel
+from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
+from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
+class WideDeep(BaseModel):
+    @property
+    def model_name(self):
+        return "WideDeep"
+    @property
+    def task_type(self):
+        return "binary"
+    def __init__(self,
+                 dense_features: list[DenseFeature],
+                 sparse_features: list[SparseFeature],
+                 sequence_features: list[SequenceFeature],
+                 mlp_params: dict,
+                 target: list[str] = [],
+                 optimizer: str = "adam",
+                 optimizer_params: dict = {},
+                 loss: str | nn.Module | None = "bce",
+                 device: str = 'cpu',
+                 model_id: str = "baseline",
+                 embedding_l1_reg=1e-6,
+                 dense_l1_reg=1e-5,
+                 embedding_l2_reg=1e-5,
+                 dense_l2_reg=1e-4):
+        super(WideDeep, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=self.task_type,
+            device=device,
+            embedding_l1_reg=embedding_l1_reg,
+            dense_l1_reg=dense_l1_reg,
+            embedding_l2_reg=embedding_l2_reg,
+            dense_l2_reg=dense_l2_reg,
+            early_stop_patience=20,
+            model_id=model_id
+        )
+        self.loss = loss
+        if self.loss is None:
+            self.loss = "bce"
+        # Wide part: use all features for linear model
+        self.wide_features = sparse_features + sequence_features
+        # Deep part: use all features
+        self.deep_features = dense_features + sparse_features + sequence_features
+        # Embedding layer for deep part
+        self.embedding = EmbeddingLayer(features=self.deep_features)
+        # Wide part: Linear layer
+        wide_dim = sum([f.embedding_dim for f in self.wide_features])
+        self.linear = LR(wide_dim)
+        # Deep part: MLP
+        deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
+        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
+        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        # Register regularization weights
+        self._register_regularization_weights(
+            embedding_attr='embedding',
+            include_modules=['linear', 'mlp']
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss
+        )
+    def forward(self, x):
+        # Deep part
+        input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
+        y_deep = self.mlp(input_deep)  # [B, 1]
+        # Wide part
+        input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
+        y_wide = self.linear(input_wide)
+        # Combine wide and deep
+        y = y_wide + y_deep
+        return self.prediction_layer(y)

nextrec/models/ranking/xdeepfm.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""
+Date: create on 09/11/2025
+Author:
+    Yang Zhou,zyaztec@gmail.com
+Reference:
+    [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
+        for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
+        knowledge discovery & data mining. 2018: 1754-1763.
+        (https://arxiv.org/abs/1803.05170)
+"""
+import torch
+import torch.nn as nn
+from nextrec.basic.model import BaseModel
+from nextrec.basic.layers import LR, EmbeddingLayer, MLP, CIN, PredictionLayer
+from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
+class xDeepFM(BaseModel):
+    @property
+    def model_name(self):
+        return "xDeepFM"
+    @property
+    def task_type(self):
+        return "binary"
+    def __init__(self,
+                 dense_features: list[DenseFeature],
+                 sparse_features: list[SparseFeature],
+                 sequence_features: list[SequenceFeature],
+                 mlp_params: dict,
+                 cin_size: list[int] = [128, 128],
+                 split_half: bool = True,
+                 target: list[str] = [],
+                 optimizer: str = "adam",
+                 optimizer_params: dict = {},
+                 loss: str | nn.Module | None = "bce",
+                 device: str = 'cpu',
+                 model_id: str = "baseline",
+                 embedding_l1_reg=1e-6,
+                 dense_l1_reg=1e-5,
+                 embedding_l2_reg=1e-5,
+                 dense_l2_reg=1e-4):
+        super(xDeepFM, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=self.task_type,
+            device=device,
+            embedding_l1_reg=embedding_l1_reg,
+            dense_l1_reg=dense_l1_reg,
+            embedding_l2_reg=embedding_l2_reg,
+            dense_l2_reg=dense_l2_reg,
+            early_stop_patience=20,
+            model_id=model_id
+        )
+        self.loss = loss
+        if self.loss is None:
+            self.loss = "bce"
+        # Linear part and CIN part: use sparse and sequence features
+        self.linear_features = sparse_features + sequence_features
+        # Deep part: use all features
+        self.deep_features = dense_features + sparse_features + sequence_features
+        # Embedding layer
+        self.embedding = EmbeddingLayer(features=self.deep_features)
+        # Linear part
+        linear_dim = sum([f.embedding_dim for f in self.linear_features])
+        self.linear = LR(linear_dim)
+        # CIN part: Compressed Interaction Network
+        num_fields = len(self.linear_features)
+        self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
+        # Deep part: DNN
+        deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
+        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
+        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        # Register regularization weights
+        self._register_regularization_weights(
+            embedding_attr='embedding',
+            include_modules=['linear', 'cin', 'mlp']
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss
+        )
+    def forward(self, x):
+        # Get embeddings for linear and CIN (sparse features only)
+        input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=False)
+        # Linear part
+        y_linear = self.linear(input_linear.flatten(start_dim=1))
+        # CIN part
+        y_cin = self.cin(input_linear)  # [B, 1]
+        # Deep part
+        input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
+        y_deep = self.mlp(input_deep)  # [B, 1]
+        # Combine all parts
+        y = y_linear + y_cin + y_deep
+        return self.prediction_layer(y)