PyPI - nextrec - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

nextrec 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

nextrec/__init__.py +4 -4
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -9
nextrec/basic/callback.py +1 -0
nextrec/basic/dataloader.py +168 -127
nextrec/basic/features.py +24 -27
nextrec/basic/layers.py +328 -159
nextrec/basic/loggers.py +50 -37
nextrec/basic/metrics.py +255 -147
nextrec/basic/model.py +817 -462
nextrec/data/__init__.py +5 -5
nextrec/data/data_utils.py +16 -12
nextrec/data/preprocessor.py +276 -252
nextrec/loss/__init__.py +12 -12
nextrec/loss/loss_utils.py +30 -22
nextrec/loss/match_losses.py +116 -83
nextrec/models/match/__init__.py +5 -5
nextrec/models/match/dssm.py +70 -61
nextrec/models/match/dssm_v2.py +61 -51
nextrec/models/match/mind.py +89 -71
nextrec/models/match/sdm.py +93 -81
nextrec/models/match/youtube_dnn.py +62 -53
nextrec/models/multi_task/esmm.py +49 -43
nextrec/models/multi_task/mmoe.py +65 -56
nextrec/models/multi_task/ple.py +92 -65
nextrec/models/multi_task/share_bottom.py +48 -42
nextrec/models/ranking/__init__.py +7 -7
nextrec/models/ranking/afm.py +39 -30
nextrec/models/ranking/autoint.py +70 -57
nextrec/models/ranking/dcn.py +43 -35
nextrec/models/ranking/deepfm.py +34 -28
nextrec/models/ranking/dien.py +115 -79
nextrec/models/ranking/din.py +84 -60
nextrec/models/ranking/fibinet.py +51 -35
nextrec/models/ranking/fm.py +28 -26
nextrec/models/ranking/masknet.py +31 -31
nextrec/models/ranking/pnn.py +30 -31
nextrec/models/ranking/widedeep.py +36 -31
nextrec/models/ranking/xdeepfm.py +46 -39
nextrec/utils/__init__.py +9 -9
nextrec/utils/embedding.py +1 -1
nextrec/utils/initializer.py +23 -15
nextrec/utils/optimizer.py +14 -10
{nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
nextrec-0.1.2.dist-info/RECORD +51 -0
nextrec-0.1.1.dist-info/RECORD +0 -51
{nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
{nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/match/youtube_dnn.py CHANGED Viewed

@@ -6,6 +6,7 @@ Reference:
     [1] Covington P, Adams J, Sargin E. Deep neural networks for youtube recommendations[C]
         //Proceedings of the 10th ACM conference on recommender systems. 2016: 191-198.
 """
 import torch
 import torch.nn as nn
 from typing import Literal
@@ -18,40 +19,42 @@ from nextrec.basic.layers import MLP, EmbeddingLayer, AveragePooling
 class YoutubeDNN(BaseMatchModel):
     """
     YouTube Deep Neural Network for Recommendations
     用户塔：历史行为序列 + 用户特征 -> 用户embedding
     物品塔：物品特征 -> 物品embedding
     训练：sampled softmax loss (listwise)
     """
     @property
     def model_name(self) -> str:
         return "YouTubeDNN"
-    def __init__(self,
-                 user_dense_features: list[DenseFeature] | None = None,
-                 user_sparse_features: list[SparseFeature] | None = None,
-                 user_sequence_features: list[SequenceFeature] | None = None,
-                 item_dense_features: list[DenseFeature] | None = None,
-                 item_sparse_features: list[SparseFeature] | None = None,
-                 item_sequence_features: list[SequenceFeature] | None = None,
-                 user_dnn_hidden_units: list[int] = [256, 128, 64],
-                 item_dnn_hidden_units: list[int] = [256, 128, 64],
-                 embedding_dim: int = 64,
-                 dnn_activation: str = 'relu',
-                 dnn_dropout: float = 0.0,
-                 training_mode: Literal['pointwise', 'pairwise', 'listwise'] = 'listwise',
-                 num_negative_samples: int = 100,
-                 temperature: float = 1.0,
-                 similarity_metric: Literal['dot', 'cosine', 'euclidean'] = 'dot',
-                 device: str = 'cpu',
-                 embedding_l1_reg: float = 0.0,
-                 dense_l1_reg: float = 0.0,
-                 embedding_l2_reg: float = 0.0,
-                 dense_l2_reg: float = 0.0,
-                 early_stop_patience: int = 20,
-                 model_id: str = 'youtube_dnn'):
+    def __init__(
+        self,
+        user_dense_features: list[DenseFeature] | None = None,
+        user_sparse_features: list[SparseFeature] | None = None,
+        user_sequence_features: list[SequenceFeature] | None = None,
+        item_dense_features: list[DenseFeature] | None = None,
+        item_sparse_features: list[SparseFeature] | None = None,
+        item_sequence_features: list[SequenceFeature] | None = None,
+        user_dnn_hidden_units: list[int] = [256, 128, 64],
+        item_dnn_hidden_units: list[int] = [256, 128, 64],
+        embedding_dim: int = 64,
+        dnn_activation: str = "relu",
+        dnn_dropout: float = 0.0,
+        training_mode: Literal["pointwise", "pairwise", "listwise"] = "listwise",
+        num_negative_samples: int = 100,
+        temperature: float = 1.0,
+        similarity_metric: Literal["dot", "cosine", "euclidean"] = "dot",
+        device: str = "cpu",
+        embedding_l1_reg: float = 0.0,
+        dense_l1_reg: float = 0.0,
+        embedding_l2_reg: float = 0.0,
+        dense_l2_reg: float = 0.0,
+        early_stop_patience: int = 20,
+        model_id: str = "youtube_dnn",
+    ):
         super(YoutubeDNN, self).__init__(
             user_dense_features=user_dense_features,
             user_sparse_features=user_sparse_features,
@@ -69,13 +72,13 @@ class YoutubeDNN(BaseMatchModel):
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
             early_stop_patience=early_stop_patience,
-            model_id=model_id
+            model_id=model_id,
         )
         self.embedding_dim = embedding_dim
         self.user_dnn_hidden_units = user_dnn_hidden_units
         self.item_dnn_hidden_units = item_dnn_hidden_units
         # User tower
         user_features = []
         if user_dense_features:
@@ -84,10 +87,10 @@ class YoutubeDNN(BaseMatchModel):
             user_features.extend(user_sparse_features)
         if user_sequence_features:
             user_features.extend(user_sequence_features)
         if len(user_features) > 0:
             self.user_embedding = EmbeddingLayer(user_features)
             user_input_dim = 0
             for feat in user_dense_features or []:
                 user_input_dim += 1
@@ -96,16 +99,16 @@ class YoutubeDNN(BaseMatchModel):
             for feat in user_sequence_features or []:
                 # 序列特征通过平均池化聚合
                 user_input_dim += feat.embedding_dim
             user_dnn_units = user_dnn_hidden_units + [embedding_dim]
             self.user_dnn = MLP(
                 input_dim=user_input_dim,
                 dims=user_dnn_units,
                 output_layer=False,
                 dropout=dnn_dropout,
-                activation=dnn_activation
+                activation=dnn_activation,
             )
         # Item tower
         item_features = []
         if item_dense_features:
@@ -114,10 +117,10 @@ class YoutubeDNN(BaseMatchModel):
             item_features.extend(item_sparse_features)
         if item_sequence_features:
             item_features.extend(item_sequence_features)
         if len(item_features) > 0:
             self.item_embedding = EmbeddingLayer(item_features)
             item_input_dim = 0
             for feat in item_dense_features or []:
                 item_input_dim += 1
@@ -125,48 +128,54 @@ class YoutubeDNN(BaseMatchModel):
                 item_input_dim += feat.embedding_dim
             for feat in item_sequence_features or []:
                 item_input_dim += feat.embedding_dim
             item_dnn_units = item_dnn_hidden_units + [embedding_dim]
             self.item_dnn = MLP(
                 input_dim=item_input_dim,
                 dims=item_dnn_units,
                 output_layer=False,
                 dropout=dnn_dropout,
-                activation=dnn_activation
+                activation=dnn_activation,
             )
         self._register_regularization_weights(
-            embedding_attr='user_embedding',
-            include_modules=['user_dnn']
+            embedding_attr="user_embedding", include_modules=["user_dnn"]
         )
         self._register_regularization_weights(
-            embedding_attr='item_embedding',
-            include_modules=['item_dnn']
+            embedding_attr="item_embedding", include_modules=["item_dnn"]
         )
         self.to(device)
     def user_tower(self, user_input: dict) -> torch.Tensor:
         """
         User tower
         处理用户历史行为序列和其他用户特征
         """
-        all_user_features = self.user_dense_features + self.user_sparse_features + self.user_sequence_features
+        all_user_features = (
+            self.user_dense_features
+            + self.user_sparse_features
+            + self.user_sequence_features
+        )
         user_emb = self.user_embedding(user_input, all_user_features, squeeze_dim=True)
         user_emb = self.user_dnn(user_emb)
         # L2 normalization
         user_emb = torch.nn.functional.normalize(user_emb, p=2, dim=1)
         return user_emb
     def item_tower(self, item_input: dict) -> torch.Tensor:
         """Item tower"""
-        all_item_features = self.item_dense_features + self.item_sparse_features + self.item_sequence_features
+        all_item_features = (
+            self.item_dense_features
+            + self.item_sparse_features
+            + self.item_sequence_features
+        )
         item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
         item_emb = self.item_dnn(item_emb)
         # L2 normalization
         item_emb = torch.nn.functional.normalize(item_emb, p=2, dim=1)
         return item_emb

nextrec/models/multi_task/esmm.py CHANGED Viewed

@@ -17,15 +17,15 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class ESMM(BaseModel):
     """
     Entire Space Multi-Task Model
     ESMM is designed for CVR (Conversion Rate) prediction. It models two related tasks:
     - CTR task: P(click | impression)
     - CVR task: P(conversion | click)
     - CTCVR task (auxiliary): P(click & conversion | impression) = P(click) * P(conversion | click)
     This design addresses the sample selection bias and data sparsity issues in CVR modeling.
     """
     @property
     def model_name(self):
         return "ESMM"
@@ -33,30 +33,34 @@ class ESMM(BaseModel):
     @property
     def task_type(self):
         # ESMM has fixed task types: CTR (binary) and CVR (binary)
-        return ['binary', 'binary']
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 ctr_params: dict,
-                 cvr_params: dict,
-                 target: list[str] = ['ctr', 'ctcvr'],  # Note: ctcvr = ctr * cvr
-                 task: str | list[str] = 'binary',
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 device: str = 'cpu',
-                 model_id: str = "baseline",
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4):
+        return ["binary", "binary"]
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        ctr_params: dict,
+        cvr_params: dict,
+        target: list[str] = ["ctr", "ctcvr"],  # Note: ctcvr = ctr * cvr
+        task: str | list[str] = "binary",
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        device: str = "cpu",
+        model_id: str = "baseline",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+    ):
         # ESMM requires exactly 2 targets: ctr and ctcvr
         if len(target) != 2:
-            raise ValueError(f"ESMM requires exactly 2 targets (ctr and ctcvr), got {len(target)}")
+            raise ValueError(
+                f"ESMM requires exactly 2 targets (ctr and ctcvr), got {len(target)}"
+            )
         super(ESMM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -69,13 +73,13 @@ class ESMM(BaseModel):
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
             early_stop_patience=20,
-            model_id=model_id
+            model_id=model_id,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # All features
         self.all_features = dense_features + sparse_features + sequence_features
@@ -83,46 +87,48 @@ class ESMM(BaseModel):
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
-        emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
-        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        emb_dim_total = sum(
+            [
+                f.embedding_dim
+                for f in self.all_features
+                if not isinstance(f, DenseFeature)
+            ]
+        )
+        dense_input_dim = sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         input_dim = emb_dim_total + dense_input_dim
         # CTR tower
         self.ctr_tower = MLP(input_dim=input_dim, output_layer=True, **ctr_params)
         # CVR tower
         self.cvr_tower = MLP(input_dim=input_dim, output_layer=True, **cvr_params)
         self.prediction_layer = PredictionLayer(
-            task_type=self.task_type,
-            task_dims=[1, 1]
+            task_type=self.task_type, task_dims=[1, 1]
         )
         # Register regularization weights
         self._register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['ctr_tower', 'cvr_tower']
+            embedding_attr="embedding", include_modules=["ctr_tower", "cvr_tower"]
         )
-        self.compile(
-            optimizer=optimizer,
-            optimizer_params=optimizer_params,
-            loss=loss
-        )
+        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # CTR prediction: P(click | impression)
         ctr_logit = self.ctr_tower(input_flat)  # [B, 1]
         cvr_logit = self.cvr_tower(input_flat)  # [B, 1]
         logits = torch.cat([ctr_logit, cvr_logit], dim=1)
         preds = self.prediction_layer(logits)
         ctr, cvr = preds.chunk(2, dim=1)
         # CTCVR prediction: P(click & conversion | impression) = P(click) * P(conversion | click)
         ctcvr = ctr * cvr  # [B, 1]
         # Output: [CTR, CTCVR]
         # Note: We supervise CTR with click labels and CTCVR with conversion labels
         y = torch.cat([ctr, ctcvr], dim=1)  # [B, 2]

nextrec/models/multi_task/mmoe.py CHANGED Viewed

@@ -17,13 +17,13 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class MMOE(BaseModel):
     """
     Multi-gate Mixture-of-Experts
     MMOE improves upon shared-bottom architecture by using multiple expert networks
     and task-specific gating networks. Each task has its own gate that learns to
     weight the contributions of different experts, allowing for both task-specific
     and shared representations.
     """
     @property
     def model_name(self):
         return "MMOE"
@@ -31,26 +31,28 @@ class MMOE(BaseModel):
     @property
     def task_type(self):
         return self.task if isinstance(self.task, list) else [self.task]
-    def __init__(self,
-                 dense_features: list[DenseFeature]=[],
-                 sparse_features: list[SparseFeature]=[],
-                 sequence_features: list[SequenceFeature]=[],
-                 expert_params: dict={},
-                 num_experts: int=3,
-                 tower_params_list: list[dict]=[],
-                 target: list[str]=[],
-                 task: str | list[str] = 'binary',
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 device: str = 'cpu',
-                 model_id: str = "baseline",
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] = [],
+        sparse_features: list[SparseFeature] = [],
+        sequence_features: list[SequenceFeature] = [],
+        expert_params: dict = {},
+        num_experts: int = 3,
+        tower_params_list: list[dict] = [],
+        target: list[str] = [],
+        task: str | list[str] = "binary",
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        device: str = "cpu",
+        model_id: str = "baseline",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+    ):
         super(MMOE, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -63,20 +65,22 @@ class MMOE(BaseModel):
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
             early_stop_patience=20,
-            model_id=model_id
+            model_id=model_id,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # Number of tasks and experts
         self.num_tasks = len(target)
         self.num_experts = num_experts
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
         # All features
         self.all_features = dense_features + sparse_features + sequence_features
@@ -84,78 +88,83 @@ class MMOE(BaseModel):
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
-        emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
-        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        emb_dim_total = sum(
+            [
+                f.embedding_dim
+                for f in self.all_features
+                if not isinstance(f, DenseFeature)
+            ]
+        )
+        dense_input_dim = sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         input_dim = emb_dim_total + dense_input_dim
         # Expert networks (shared by all tasks)
         self.experts = nn.ModuleList()
         for _ in range(num_experts):
             expert = MLP(input_dim=input_dim, output_layer=False, **expert_params)
             self.experts.append(expert)
         # Get expert output dimension
-        if 'dims' in expert_params and len(expert_params['dims']) > 0:
-            expert_output_dim = expert_params['dims'][-1]
+        if "dims" in expert_params and len(expert_params["dims"]) > 0:
+            expert_output_dim = expert_params["dims"][-1]
         else:
             expert_output_dim = input_dim
         # Task-specific gates
         self.gates = nn.ModuleList()
         for _ in range(self.num_tasks):
-            gate = nn.Sequential(
-                nn.Linear(input_dim, num_experts),
-                nn.Softmax(dim=1)
-            )
+            gate = nn.Sequential(nn.Linear(input_dim, num_experts), nn.Softmax(dim=1))
             self.gates.append(gate)
         # Task-specific towers
         self.towers = nn.ModuleList()
         for tower_params in tower_params_list:
             tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
             self.towers.append(tower)
         self.prediction_layer = PredictionLayer(
-            task_type=self.task_type,
-            task_dims=[1] * self.num_tasks
+            task_type=self.task_type, task_dims=[1] * self.num_tasks
         )
         # Register regularization weights
         self._register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['experts', 'gates', 'towers']
+            embedding_attr="embedding", include_modules=["experts", "gates", "towers"]
         )
-        self.compile(
-            optimizer=optimizer,
-            optimizer_params=optimizer_params,
-            loss=loss
-        )
+        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # Expert outputs: [num_experts, B, expert_dim]
         expert_outputs = [expert(input_flat) for expert in self.experts]
-        expert_outputs = torch.stack(expert_outputs, dim=0)  # [num_experts, B, expert_dim]
+        expert_outputs = torch.stack(
+            expert_outputs, dim=0
+        )  # [num_experts, B, expert_dim]
         # Task-specific processing
         task_outputs = []
         for task_idx in range(self.num_tasks):
             # Gate weights for this task: [B, num_experts]
             gate_weights = self.gates[task_idx](input_flat)  # [B, num_experts]
             # Weighted sum of expert outputs
             # gate_weights: [B, num_experts, 1]
             # expert_outputs: [num_experts, B, expert_dim]
             gate_weights = gate_weights.unsqueeze(2)  # [B, num_experts, 1]
-            expert_outputs_t = expert_outputs.permute(1, 0, 2)  # [B, num_experts, expert_dim]
-            gated_output = torch.sum(gate_weights * expert_outputs_t, dim=1)  # [B, expert_dim]
+            expert_outputs_t = expert_outputs.permute(
+                1, 0, 2
+            )  # [B, num_experts, expert_dim]
+            gated_output = torch.sum(
+                gate_weights * expert_outputs_t, dim=1
+            )  # [B, expert_dim]
             # Tower output
             tower_output = self.towers[task_idx](gated_output)  # [B, 1]
             task_outputs.append(tower_output)
         # Stack outputs: [B, num_tasks]
         y = torch.cat(task_outputs, dim=1)
         return self.prediction_layer(y)

nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

nextrec 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl