PyPI - nextrec - Versions diffs - 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

nextrec 0.3.6py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +244 -113
nextrec/basic/loggers.py +62 -43
nextrec/basic/metrics.py +268 -119
nextrec/basic/model.py +1373 -443
nextrec/basic/session.py +10 -3
nextrec/cli.py +498 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +42 -24
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +303 -96
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +106 -40
nextrec/models/match/dssm.py +82 -69
nextrec/models/match/dssm_v2.py +72 -58
nextrec/models/match/mind.py +175 -108
nextrec/models/match/sdm.py +104 -88
nextrec/models/match/youtube_dnn.py +73 -60
nextrec/models/multi_task/esmm.py +53 -39
nextrec/models/multi_task/mmoe.py +70 -47
nextrec/models/multi_task/ple.py +107 -50
nextrec/models/multi_task/poso.py +121 -41
nextrec/models/multi_task/share_bottom.py +54 -38
nextrec/models/ranking/afm.py +172 -45
nextrec/models/ranking/autoint.py +84 -61
nextrec/models/ranking/dcn.py +59 -42
nextrec/models/ranking/dcn_v2.py +64 -23
nextrec/models/ranking/deepfm.py +36 -26
nextrec/models/ranking/dien.py +158 -102
nextrec/models/ranking/din.py +88 -60
nextrec/models/ranking/fibinet.py +55 -35
nextrec/models/ranking/fm.py +32 -26
nextrec/models/ranking/masknet.py +95 -34
nextrec/models/ranking/pnn.py +34 -31
nextrec/models/ranking/widedeep.py +37 -29
nextrec/models/ranking/xdeepfm.py +63 -41
nextrec/utils/__init__.py +61 -32
nextrec/utils/config.py +490 -0
nextrec/utils/device.py +52 -12
nextrec/utils/distributed.py +141 -0
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +32 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +531 -0
nextrec/utils/tensor.py +24 -13
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
nextrec-0.4.2.dist-info/RECORD +69 -0
nextrec-0.4.2.dist-info/entry_points.txt +2 -0
nextrec-0.3.6.dist-info/RECORD +0 -64
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/ranking/dien.py CHANGED Viewed

@@ -13,37 +13,44 @@ import torch.nn as nn
 import torch.nn.functional as F
 from nextrec.basic.model import BaseModel
-from nextrec.basic.layers import EmbeddingLayer, MLP, AttentionPoolingLayer, PredictionLayer
+from nextrec.basic.layers import (
+    EmbeddingLayer,
+    MLP,
+    AttentionPoolingLayer,
+    PredictionLayer,
+)
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class AUGRU(nn.Module):
     """Attention-aware GRU update gate used in DIEN (Zhou et al., 2019)."""
     """
     Attention-based GRU for DIEN
     Uses attention scores to weight the update of hidden states
     """
     def __init__(self, input_size, hidden_size, bias=True):
         super().__init__()
         self.input_size = input_size
         self.hidden_size = hidden_size
         self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
         self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
         if bias:
             self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
             self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
         else:
-            self.register_parameter('bias_ih', None)
-            self.register_parameter('bias_hh', None)
+            self.register_parameter("bias_ih", None)
+            self.register_parameter("bias_hh", None)
         self.reset_parameters()
     def reset_parameters(self):
         std = 1.0 / (self.hidden_size) ** 0.5
         for weight in self.parameters():
             weight.data.uniform_(-std, std)
     def forward(self, x, att_scores):
         """
         Args:
@@ -59,12 +66,12 @@ class AUGRU(nn.Module):
         for t in range(seq_len):
             x_t = x[:, t, :]  # [batch_size, input_size]
             att_t = att_scores[:, t, :]  # [batch_size, 1]
             gi = F.linear(x_t, self.weight_ih, self.bias_ih)
             gh = F.linear(h, self.weight_hh, self.bias_hh)
             i_r, i_i, i_n = gi.chunk(3, 1)
             h_r, h_i, h_n = gh.chunk(3, 1)
             resetgate = torch.sigmoid(i_r + h_r)
             inputgate = torch.sigmoid(i_i + h_i)
             newgate = torch.tanh(i_n + resetgate * h_n)
@@ -72,21 +79,22 @@ class AUGRU(nn.Module):
             h = (1 - att_t) * h + att_t * newgate
             outputs.append(h.unsqueeze(1))
         output = torch.cat(outputs, dim=1)
-        return output, h
+        return output, h
 class DynamicGRU(nn.Module):
     """Dynamic GRU unit with auxiliary loss path from DIEN (Zhou et al., 2019)."""
     """
     GRU with dynamic routing for DIEN
     """
     def __init__(self, input_size, hidden_size, bias=True):
         super().__init__()
         self.input_size = input_size
         self.hidden_size = hidden_size
         # GRU parameters
         self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
         self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
@@ -94,16 +102,16 @@ class DynamicGRU(nn.Module):
             self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
             self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
         else:
-            self.register_parameter('bias_ih', None)
-            self.register_parameter('bias_hh', None)
+            self.register_parameter("bias_ih", None)
+            self.register_parameter("bias_hh", None)
         self.reset_parameters()
     def reset_parameters(self):
         std = 1.0 / (self.hidden_size) ** 0.5
         for weight in self.parameters():
             weight.data.uniform_(-std, std)
     def forward(self, x, att_scores=None):
         """
         Args:
@@ -114,29 +122,29 @@ class DynamicGRU(nn.Module):
             hidden: [batch_size, hidden_size] - final hidden state
         """
         batch_size, seq_len, _ = x.shape
         # Initialize hidden state
         h = torch.zeros(batch_size, self.hidden_size, device=x.device)
         outputs = []
         for t in range(seq_len):
             x_t = x[:, t, :]  # [batch_size, input_size]
             # GRU computation
             gi = F.linear(x_t, self.weight_ih, self.bias_ih)
             gh = F.linear(h, self.weight_hh, self.bias_hh)
             i_r, i_i, i_n = gi.chunk(3, 1)
             h_r, h_i, h_n = gh.chunk(3, 1)
             resetgate = torch.sigmoid(i_r + h_r)
             inputgate = torch.sigmoid(i_i + h_i)
             newgate = torch.tanh(i_n + resetgate * h_n)
             h = newgate + inputgate * (h - newgate)
             outputs.append(h.unsqueeze(1))
         output = torch.cat(outputs, dim=1)  # [batch_size, seq_len, hidden_size]
         return output, h
@@ -146,174 +154,222 @@ class DIEN(BaseModel):
         return "DIEN"
     @property
-    def task_type(self):
+    def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 mlp_params: dict,
-                 gru_hidden_size: int = 64,
-                 attention_hidden_units: list[int] = [80, 40],
-                 attention_activation: str = 'sigmoid',
-                 use_negsampling: bool = False,
-                 target: list[str] = [],
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        mlp_params: dict,
+        gru_hidden_size: int = 64,
+        attention_hidden_units: list[int] = [80, 40],
+        attention_activation: str = "sigmoid",
+        use_negsampling: bool = False,
+        target: list[str] = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(DIEN, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
             sequence_features=sequence_features,
             target=target,
-            task=self.task_type,
+            task=task or self.default_task,
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            early_stop_patience=20,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         self.use_negsampling = use_negsampling
         # Features classification
         if len(sequence_features) == 0:
-            raise ValueError("DIEN requires at least one sequence feature for user behavior history")
+            raise ValueError(
+                "DIEN requires at least one sequence feature for user behavior history"
+            )
         self.behavior_feature = sequence_features[0]  # User behavior sequence
-        self.candidate_feature = sparse_features[-1] if sparse_features else None  # Candidate item
-        self.other_sparse_features = sparse_features[:-1] if self.candidate_feature else sparse_features
+        self.candidate_feature = (
+            sparse_features[-1] if sparse_features else None
+        )  # Candidate item
+        self.other_sparse_features = (
+            sparse_features[:-1] if self.candidate_feature else sparse_features
+        )
         self.dense_features_list = dense_features
         # Embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
         behavior_emb_dim = self.behavior_feature.embedding_dim
         self.candidate_proj = None
-        if self.candidate_feature is not None and self.candidate_feature.embedding_dim != gru_hidden_size:
-            self.candidate_proj = nn.Linear(self.candidate_feature.embedding_dim, gru_hidden_size)
+        if (
+            self.candidate_feature is not None
+            and self.candidate_feature.embedding_dim != gru_hidden_size
+        ):
+            self.candidate_proj = nn.Linear(
+                self.candidate_feature.embedding_dim, gru_hidden_size
+            )
         # Interest Extractor Layer (GRU)
         self.interest_extractor = DynamicGRU(
-            input_size=behavior_emb_dim,
-            hidden_size=gru_hidden_size
+            input_size=behavior_emb_dim, hidden_size=gru_hidden_size
         )
         # Attention layer for computing attention scores
         self.attention_layer = AttentionPoolingLayer(
             embedding_dim=gru_hidden_size,
             hidden_units=attention_hidden_units,
             activation=attention_activation,
-            use_softmax=False  # We'll use scores directly for AUGRU
+            use_softmax=False,  # We'll use scores directly for AUGRU
         )
         # Interest Evolution Layer (AUGRU)
-        self.interest_evolution = AUGRU(input_size=gru_hidden_size, hidden_size=gru_hidden_size)
+        self.interest_evolution = AUGRU(
+            input_size=gru_hidden_size, hidden_size=gru_hidden_size
+        )
         # Calculate MLP input dimension
         mlp_input_dim = 0
         if self.candidate_feature:
             mlp_input_dim += self.candidate_feature.embedding_dim
         mlp_input_dim += gru_hidden_size  # final interest state
         mlp_input_dim += sum([f.embedding_dim for f in self.other_sparse_features])
-        mlp_input_dim += sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        mlp_input_dim += sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         # MLP for final prediction
         self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
-        self.prediction_layer = PredictionLayer(task_type=self.task_type)
+        self.prediction_layer = PredictionLayer(task_type=self.task)
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['interest_extractor', 'interest_evolution', 'attention_layer', 'mlp', 'candidate_proj'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.register_regularization_weights(
+            embedding_attr="embedding",
+            include_modules=[
+                "interest_extractor",
+                "interest_evolution",
+                "attention_layer",
+                "mlp",
+                "candidate_proj",
+            ],
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Get candidate item embedding
         if self.candidate_feature:
-            candidate_emb = self.embedding.embed_dict[self.candidate_feature.embedding_name](x[self.candidate_feature.name].long())  # [B, emb_dim]
+            candidate_emb = self.embedding.embed_dict[
+                self.candidate_feature.embedding_name
+            ](
+                x[self.candidate_feature.name].long()
+            )  # [B, emb_dim]
         else:
             raise ValueError("DIEN requires a candidate item feature")
         # Get behavior sequence embedding
         behavior_seq = x[self.behavior_feature.name].long()  # [B, seq_len]
-        behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](behavior_seq)  # [B, seq_len, emb_dim]
+        behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](
+            behavior_seq
+        )  # [B, seq_len, emb_dim]
         # Create mask for padding
         if self.behavior_feature.padding_idx is not None:
-            mask = (behavior_seq != self.behavior_feature.padding_idx).unsqueeze(-1).float()
+            mask = (
+                (behavior_seq != self.behavior_feature.padding_idx)
+                .unsqueeze(-1)
+                .float()
+            )
         else:
             mask = (behavior_seq != 0).unsqueeze(-1).float()
         # Step 1: Interest Extractor (GRU)
-        interest_states, _ = self.interest_extractor(behavior_emb)  # [B, seq_len, hidden_size]
+        interest_states, _ = self.interest_extractor(
+            behavior_emb
+        )  # [B, seq_len, hidden_size]
         # Step 2: Compute attention scores for each time step
         batch_size, seq_len, hidden_size = interest_states.shape
         # Project candidate to hidden_size if necessary (defined in __init__)
         if self.candidate_proj is not None:
             candidate_for_attention = self.candidate_proj(candidate_emb)
         else:
             candidate_for_attention = candidate_emb
         # Compute attention scores for AUGRU
         attention_scores = []
         for t in range(seq_len):
             score = self.attention_layer.attention_net(
-                torch.cat([
-                    candidate_for_attention,
-                    interest_states[:, t, :],
-                    candidate_for_attention - interest_states[:, t, :],
-                    candidate_for_attention * interest_states[:, t, :]
-                ], dim=-1)
+                torch.cat(
+                    [
+                        candidate_for_attention,
+                        interest_states[:, t, :],
+                        candidate_for_attention - interest_states[:, t, :],
+                        candidate_for_attention * interest_states[:, t, :],
+                    ],
+                    dim=-1,
+                )
             )  # [B, 1]
             attention_scores.append(score)
-        attention_scores = torch.cat(attention_scores, dim=1).unsqueeze(-1)  # [B, seq_len, 1]
+        attention_scores = torch.cat(attention_scores, dim=1).unsqueeze(
+            -1
+        )  # [B, seq_len, 1]
         attention_scores = torch.sigmoid(attention_scores)  # Normalize to [0, 1]
         # Apply mask to attention scores
         attention_scores = attention_scores * mask
         # Step 3: Interest Evolution (AUGRU)
         final_states, final_interest = self.interest_evolution(
-            interest_states,
-            attention_scores
+            interest_states, attention_scores
         )  # final_interest: [B, hidden_size]
         # Get other features
         other_embeddings = []
         other_embeddings.append(candidate_emb)
         other_embeddings.append(final_interest)
         # Other sparse features
         for feat in self.other_sparse_features:
-            feat_emb = self.embedding.embed_dict[feat.embedding_name](x[feat.name].long())
+            feat_emb = self.embedding.embed_dict[feat.embedding_name](
+                x[feat.name].long()
+            )
             other_embeddings.append(feat_emb)
         # Dense features
         for feat in self.dense_features_list:
             val = x[feat.name].float()
             if val.dim() == 1:
                 val = val.unsqueeze(1)
             other_embeddings.append(val)
         # Concatenate all features
         concat_input = torch.cat(other_embeddings, dim=-1)  # [B, total_dim]
         # MLP prediction
         y = self.mlp(concat_input)  # [B, 1]
         return self.prediction_layer(y)

nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl

nextrec 0.3.6py3-none-any.whl → 0.4.2py3-none-any.whl