PyPI - nextrec - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

nextrec 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +250 -112
nextrec/basic/loggers.py +63 -44
nextrec/basic/metrics.py +270 -120
nextrec/basic/model.py +1084 -402
nextrec/basic/session.py +10 -3
nextrec/cli.py +492 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +51 -45
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +273 -96
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +103 -38
nextrec/models/match/dssm.py +82 -68
nextrec/models/match/dssm_v2.py +72 -57
nextrec/models/match/mind.py +175 -107
nextrec/models/match/sdm.py +104 -87
nextrec/models/match/youtube_dnn.py +73 -59
nextrec/models/multi_task/esmm.py +69 -46
nextrec/models/multi_task/mmoe.py +91 -53
nextrec/models/multi_task/ple.py +117 -58
nextrec/models/multi_task/poso.py +163 -55
nextrec/models/multi_task/share_bottom.py +63 -36
nextrec/models/ranking/afm.py +80 -45
nextrec/models/ranking/autoint.py +74 -57
nextrec/models/ranking/dcn.py +110 -48
nextrec/models/ranking/dcn_v2.py +265 -45
nextrec/models/ranking/deepfm.py +39 -24
nextrec/models/ranking/dien.py +335 -146
nextrec/models/ranking/din.py +158 -92
nextrec/models/ranking/fibinet.py +134 -52
nextrec/models/ranking/fm.py +68 -26
nextrec/models/ranking/masknet.py +95 -33
nextrec/models/ranking/pnn.py +128 -58
nextrec/models/ranking/widedeep.py +40 -28
nextrec/models/ranking/xdeepfm.py +67 -40
nextrec/utils/__init__.py +59 -34
nextrec/utils/config.py +496 -0
nextrec/utils/device.py +30 -20
nextrec/utils/distributed.py +36 -9
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +33 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/model.py +22 -0
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +283 -165
nextrec/utils/tensor.py +24 -13
{nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/METADATA +53 -24
nextrec-0.4.3.dist-info/RECORD +69 -0
nextrec-0.4.3.dist-info/entry_points.txt +2 -0
nextrec-0.4.1.dist-info/RECORD +0 -66
{nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/WHEEL +0 -0
{nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/licenses/LICENSE +0 -0

nextrec/models/ranking/dcn_v2.py CHANGED Viewed

@@ -1,5 +1,46 @@
 """
 Date: create on 09/11/2025
+Checkpoint: edit on 09/12/2025
+Author: Yang Zhou, zyaztec@gmail.com
+Reference:
+[1] R. Wang et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for
+Web-scale Learning to Rank Systems. KDD 2021.
+(https://arxiv.org/abs/2008.13535)
+DCN v2 enhances the original Deep & Cross Network by replacing the scalar cross
+weights with vector-wise (matrix) parameters and a Mixture-of-Low-Rank-Experts
+variant. The matrix cross (CrossNetV2) improves expressiveness with manageable
+parameter growth, while CrossNetMix decomposes the matrix into low-rank factors and
+gates across experts for stronger modeling at a similar cost. As in DCN, the cross
+tower explicitly builds polynomial feature interactions and can be paired with a
+deep MLP tower; their outputs are concatenated before a final linear head.
+Workflow:
+  (1) Embed sparse/sequence fields and concatenate with dense inputs
+  (2) Cross tower: choose matrix CrossNetV2 or low-rank CrossNetMix for explicit crosses
+  (3) Optional deep tower: MLP over the same flattened embeddings
+  (4) Fuse cross and deep outputs, then predict via a linear + prediction layer
+Key Advantages:
+- Vector-wise/matrix cross weights capture richer interactions than DCN v1
+- Low-rank MoE cross (CrossNetMix) boosts capacity without quadratic parameters
+- Compatible with a deep tower for additional nonlinear modeling
+DCN v2 在原始 DCN 基础上，将标量交叉权重升级为向量/矩阵参数，并引入低秩专家混合
+的 CrossNetMix。矩阵交叉（CrossNetV2）在参数可控的前提下提升表达力，CrossNetMix
+通过低秩分解和 gating 进一步增强建模能力且保持参数效率。和 DCN 一样，交叉塔显式
+构造多项式交互，可并行或串联一个 MLP 深塔，最终拼接/输出到线性头做预测。
+流程：
+  (1) 对稀疏/序列特征做 embedding，并与稠密特征拼接
+  (2) 交叉塔：可选矩阵 CrossNetV2 或低秩混合 CrossNetMix 显式构造交互
+  (3) 可选深塔：MLP 处理同一展平后的输入或交叉输出
+  (4) 融合交叉与深塔输出，经线性层和预测层得到最终得分
+主要优点：
+  - 矩阵交叉相较 DCN v1 捕获更丰富的交互
+  - 低秩专家混合在相近参数量下带来更强建模能力
+  - 兼容并行/串行深塔，灵活扩展非线性表示
 """
 import torch
@@ -9,76 +50,255 @@ from nextrec.basic.model import BaseModel
 from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class CrossNetV2(nn.Module):
     """Vector-wise cross network proposed in DCN V2 (Wang et al., 2021)."""
-    def __init__(self, input_dim, num_layers):
+    def __init__(self, input_dim: int, num_layers: int):
         super().__init__()
         self.num_layers = num_layers
-        self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, input_dim, bias=False) for _ in range(num_layers)])
-        self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])
+        self.w = torch.nn.ModuleList(
+            [
+                torch.nn.Linear(input_dim, input_dim, bias=False)
+                for _ in range(num_layers)
+            ]
+        )
+        self.b = torch.nn.ParameterList(
+            [torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
+        )
-    def forward(self, x):
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         x0 = x
         for i in range(self.num_layers):
-            x =x0*self.w[i](x) + self.b[i] + x
+            x = x0 * self.w[i](x) + self.b[i] + x
         return x
 class CrossNetMix(nn.Module):
     """Mixture of low-rank cross experts from DCN V2 (Wang et al., 2021)."""
-    def __init__(self, input_dim, num_layers=2, low_rank=32, num_experts=4):
-        super(CrossNetMix, self).__init__()
+    def __init__(
+        self,
+        input_dim: int,
+        num_layers: int = 2,
+        low_rank: int = 32,
+        num_experts: int = 4,
+    ):
+        super().__init__()
         self.num_layers = num_layers
         self.num_experts = num_experts
-        # U: (input_dim, low_rank)
-        self.u_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
-            torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)])
-        # V: (input_dim, low_rank)
-        self.v_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
-            torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)])
-        # C: (low_rank, low_rank)
-        self.c_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
-            torch.empty(num_experts, low_rank, low_rank))) for i in range(self.num_layers)])
-        self.gating = nn.ModuleList([nn.Linear(input_dim, 1, bias=False) for i in range(self.num_experts)])
-        self.bias = torch.nn.ParameterList([nn.Parameter(nn.init.zeros_(
-            torch.empty(input_dim, 1))) for i in range(self.num_layers)])
-    def forward(self, x):
+        self.u_list = nn.ParameterList(
+            [
+                nn.Parameter(
+                    nn.init.xavier_normal_(
+                        torch.empty(num_experts, input_dim, low_rank)
+                    )
+                )
+                for _ in range(num_layers)
+            ]
+        )
+        self.v_list = nn.ParameterList(
+            [
+                nn.Parameter(
+                    nn.init.xavier_normal_(
+                        torch.empty(num_experts, input_dim, low_rank)
+                    )
+                )
+                for _ in range(num_layers)
+            ]
+        )
+        self.c_list = nn.ParameterList(
+            [
+                nn.Parameter(
+                    nn.init.xavier_normal_(torch.empty(num_experts, low_rank, low_rank))
+                )
+                for _ in range(num_layers)
+            ]
+        )
+        self.gating = nn.ModuleList(
+            [nn.Linear(input_dim, 1, bias=False) for _ in range(num_experts)]
+        )
+        self.bias = nn.ParameterList(
+            [nn.Parameter(torch.zeros(input_dim, 1)) for _ in range(num_layers)]
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # x: (bs, in_features)
         x_0 = x.unsqueeze(2)  # (bs, in_features, 1)
         x_l = x_0
         for i in range(self.num_layers):
             output_of_experts = []
             gating_score_experts = []
+            gating_input = x_l.squeeze(2)  # (bs, in_features)
             for expert_id in range(self.num_experts):
-                # (1) G(x_l)
-                # compute the gating score by x_l
-                gating_score_experts.append(self.gating[expert_id](x_l.squeeze(2)))
+                # Gating
+                gating_score_experts.append(
+                    self.gating[expert_id](gating_input)
+                )  # (bs, 1)
-                # (2) E(x_l)
-                # project the input x_l to $\mathbb{R}^{r}$
-                v_x = torch.matmul(self.v_list[i][expert_id].t(), x_l)  # (bs, low_rank, 1)
+                # Low-rank cross: U C V^T x_l
+                V = self.v_list[i][expert_id]  # (in_features, low_rank)
+                C = self.c_list[i][expert_id]  # (low_rank, low_rank)
+                U = self.u_list[i][expert_id]  # (in_features, low_rank)
-                # nonlinear activation in low rank space
-                v_x = torch.tanh(v_x)
-                v_x = torch.matmul(self.c_list[i][expert_id], v_x)
+                # (bs, 1, low_rank)
+                v_x = x_l.transpose(1, 2).matmul(V)  # x_l^T V
+                v_x = v_x.matmul(C)  # · C
                 v_x = torch.tanh(v_x)
-                # project back to $\mathbb{R}^{d}$
-                uv_x = torch.matmul(self.u_list[i][expert_id], v_x)  # (bs, in_features, 1)
+                # (bs, in_features, 1)
+                uv_x = U.matmul(v_x.transpose(1, 2))
+                # x_0 ⊙ (uv_x + b)
+                dot_ = x_0 * (uv_x + self.bias[i])  # (bs, in_features, 1)
+                output_of_experts.append(dot_.squeeze(2))  # (bs, in_features)
+            # (3) Mixture of experts
+            output_of_experts = torch.stack(
+                output_of_experts, dim=2
+            )  # (bs, in_features, num_experts)
+            gating_score_experts = torch.stack(
+                gating_score_experts, dim=1
+            )  # (bs, num_experts, 1)
+            gating_score_experts = gating_score_experts.softmax(dim=1)
+            moe_out = torch.matmul(
+                output_of_experts, gating_score_experts
+            )  # (bs, in_features, 1)
+            x_l = moe_out + x_l  # residual
+        return x_l.squeeze(-1)  # (bs, in_features)
+class DCNv2(BaseModel):
+    @property
+    def model_name(self) -> str:
+        return "DCNv2"
+    @property
+    def default_task(self):
+        return "binary"
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | None = None,
+        sparse_features: list[SparseFeature] | None = None,
+        sequence_features: list[SequenceFeature] | None = None,
+        cross_num: int = 3,
+        cross_type: str = "matrix",
+        architecture: str = "parallel",
+        low_rank: int = 32,
+        num_experts: int = 4,
+        mlp_params: dict | None = None,
+        target: list[str] | str | None = None,
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict | None = None,
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
+        dense_features = dense_features or []
+        sparse_features = sparse_features or []
+        sequence_features = sequence_features or []
+        optimizer_params = optimizer_params or {}
+        if loss is None:
+            loss = "bce"
+        super(DCNv2, self).__init__(
+            dense_features=dense_features,
+            sparse_features=sparse_features,
+            sequence_features=sequence_features,
+            target=target,
+            task=task or self.default_task,
+            device=device,
+            embedding_l1_reg=embedding_l1_reg,
+            dense_l1_reg=dense_l1_reg,
+            embedding_l2_reg=embedding_l2_reg,
+            dense_l2_reg=dense_l2_reg,
+            **kwargs,
+        )
+        self.all_features = dense_features + sparse_features + sequence_features
+        self.embedding = EmbeddingLayer(features=self.all_features)
+        input_dim = self.embedding.input_dim
+        architecture = architecture.lower()
+        if architecture not in {"parallel", "stacked"}:
+            raise ValueError("architecture must be 'parallel' or 'stacked'.")
+        self.architecture = architecture
+        cross_type = cross_type.lower()
+        if cross_type == "matrix":
+            self.cross_network = CrossNetV2(input_dim=input_dim, num_layers=cross_num)
+        elif cross_type in {"mix", "low_rank"}:
+            self.cross_network = CrossNetMix(
+                input_dim=input_dim,
+                num_layers=cross_num,
+                low_rank=low_rank,
+                num_experts=num_experts,
+            )
+        else:
+            raise ValueError("Unsupported cross_type for DCNv2. Use 'matrix' or 'mix'.")
+        if mlp_params is not None:
+            self.use_dnn = True
+            dnn_params = dict(mlp_params)
+            dnn_params.setdefault("output_layer", False)
+            self.mlp = MLP(input_dim=input_dim, **dnn_params)
+            deep_dim = self.mlp.output_dim
+            final_input_dim = (
+                input_dim + deep_dim if architecture == "parallel" else deep_dim
+            )
+        else:
+            if architecture == "stacked":
+                raise ValueError(
+                    "Stacked architecture requires mlp_params (deep tower)."
+                )
+            self.use_dnn = False
+            self.mlp = None
+            final_input_dim = input_dim
+        self.final_layer = nn.Linear(final_input_dim, 1)
+        self.prediction_layer = PredictionLayer(task_type=self.default_task)
+        self.register_regularization_weights(
+            embedding_attr="embedding",
+            include_modules=["cross_network", "mlp", "final_layer"],
+        )
-                dot_ = uv_x + self.bias[i]
-                dot_ = x_0 * dot_  # Hadamard-product
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
-                output_of_experts.append(dot_.squeeze(2))
+    def forward(self, x) -> torch.Tensor:
+        input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
+        cross_out = self.cross_network(input_flat)
-            # (3) mixture of low-rank experts
-            output_of_experts = torch.stack(output_of_experts, 2)  # (bs, in_features, num_experts)
-            gating_score_experts = torch.stack(gating_score_experts, 1)  # (bs, num_experts, 1)
-            moe_out = torch.matmul(output_of_experts, gating_score_experts.softmax(1))
-            x_l = moe_out + x_l  # (bs, in_features, 1)
+        if self.use_dnn and self.mlp is not None:
+            if self.architecture == "parallel":
+                deep_out = self.mlp(input_flat)
+                combined = torch.cat([cross_out, deep_out], dim=-1)
+            else:  # stacked
+                deep_out = self.mlp(cross_out)
+                combined = deep_out
+        else:
+            combined = cross_out
-        x_l = x_l.squeeze()  # (bs, in_features)
-        return x_l
+        logit = self.final_layer(combined)
+        return self.prediction_layer(logit)

nextrec/models/ranking/deepfm.py CHANGED Viewed

@@ -43,13 +43,13 @@ embedding，无需手工构造交叉特征即可端到端训练，常用于 CTR/
 - CTR/CVR 任务的常用强基线
 """
-import torch
 import torch.nn as nn
 from nextrec.basic.model import BaseModel
 from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class DeepFM(BaseModel):
     @property
     def model_name(self):
@@ -59,23 +59,34 @@ class DeepFM(BaseModel):
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature]|list = [],
-                 sparse_features: list[SparseFeature]|list = [],
-                 sequence_features: list[SequenceFeature]|list = [],
-                 mlp_params: dict = {},
-                 target: list[str]|str = [],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4, **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | None = None,
+        sparse_features: list[SparseFeature] | None = None,
+        sequence_features: list[SequenceFeature] | None = None,
+        mlp_params: dict | None = None,
+        target: list[str] | str | None = None,
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict | None = None,
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
+        dense_features = dense_features or []
+        sparse_features = sparse_features or []
+        sequence_features = sequence_features or []
+        mlp_params = mlp_params or {}
+        optimizer_params = optimizer_params or {}
+        if loss is None:
+            loss = "bce"
         super(DeepFM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -87,13 +98,10 @@ class DeepFM(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
-        if self.loss is None:
-            self.loss = "bce"
         self.fm_features = sparse_features + sequence_features
         self.deep_features = dense_features + sparse_features + sequence_features
         self.embedding = EmbeddingLayer(features=self.deep_features)
@@ -107,8 +115,15 @@ class DeepFM(BaseModel):
         self.prediction_layer = PredictionLayer(task_type=self.default_task)
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=["linear", "mlp"]
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)

nextrec 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

nextrec 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl