PyPI - nextrec - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +220 -106
nextrec/basic/loggers.py +62 -43
nextrec/basic/metrics.py +268 -119
nextrec/basic/model.py +1082 -400
nextrec/basic/session.py +10 -3
nextrec/cli.py +498 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +51 -45
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +272 -95
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +103 -38
nextrec/models/match/dssm.py +82 -68
nextrec/models/match/dssm_v2.py +72 -57
nextrec/models/match/mind.py +175 -107
nextrec/models/match/sdm.py +104 -87
nextrec/models/match/youtube_dnn.py +73 -59
nextrec/models/multi_task/esmm.py +53 -37
nextrec/models/multi_task/mmoe.py +64 -45
nextrec/models/multi_task/ple.py +101 -48
nextrec/models/multi_task/poso.py +113 -36
nextrec/models/multi_task/share_bottom.py +48 -35
nextrec/models/ranking/afm.py +72 -37
nextrec/models/ranking/autoint.py +72 -55
nextrec/models/ranking/dcn.py +55 -35
nextrec/models/ranking/dcn_v2.py +64 -23
nextrec/models/ranking/deepfm.py +32 -22
nextrec/models/ranking/dien.py +155 -99
nextrec/models/ranking/din.py +85 -57
nextrec/models/ranking/fibinet.py +52 -32
nextrec/models/ranking/fm.py +29 -23
nextrec/models/ranking/masknet.py +91 -29
nextrec/models/ranking/pnn.py +31 -28
nextrec/models/ranking/widedeep.py +34 -26
nextrec/models/ranking/xdeepfm.py +60 -38
nextrec/utils/__init__.py +59 -34
nextrec/utils/config.py +490 -0
nextrec/utils/device.py +30 -20
nextrec/utils/distributed.py +36 -9
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +32 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +283 -165
nextrec/utils/tensor.py +24 -13
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/METADATA +4 -4
nextrec-0.4.2.dist-info/RECORD +69 -0
nextrec-0.4.2.dist-info/entry_points.txt +2 -0
nextrec-0.4.1.dist-info/RECORD +0 -66
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/multi_task/poso.py CHANGED Viewed

@@ -79,7 +79,7 @@ class POSOGate(nn.Module):
         h = self.act(self.fc1(pc))
         g = torch.sigmoid(self.fc2(h))  # (B, out_dim) in (0,1)
         return self.scale_factor * g
 class POSOFC(nn.Module):
     """
@@ -116,10 +116,10 @@ class POSOFC(nn.Module):
         pc: (B, pc_dim)
         return: (B, out_dim)
         """
-        h = self.act(self.linear(x))          # Standard FC with activation
-        g = self.gate(pc)                     # (B, out_dim)
-        return g * h                          # Element-wise gating
+        h = self.act(self.linear(x))  # Standard FC with activation
+        g = self.gate(pc)  # (B, out_dim)
+        return g * h  # Element-wise gating
 class POSOMLP(nn.Module):
     """
@@ -173,7 +173,7 @@ class POSOMLP(nn.Module):
             if self.dropout is not None:
                 h = self.dropout(h)
         return h
 class POSOMMoE(nn.Module):
     """
@@ -183,7 +183,7 @@ class POSOMMoE(nn.Module):
         - Task gates aggregate the PC-masked expert outputs
     Concretely:
-        h_e = expert_e(x)                 # (B, D)
+        h_e = expert_e(x)                 # (B, D)
         g_e = POSOGate(pc) in (0, C)^{D}  # (B, D)
         h_e_tilde = g_e ⊙ h_e            # (B, D)
         z_t = Σ_e gate_t,e(x) * h_e_tilde
@@ -192,14 +192,14 @@ class POSOMMoE(nn.Module):
     def __init__(
         self,
         input_dim: int,
-        pc_dim: int,                   # for poso feature dimension
+        pc_dim: int,  # for poso feature dimension
         num_experts: int,
         expert_hidden_dims: list[int],
         num_tasks: int,
         activation: str = "relu",
         expert_dropout: float = 0.0,
-        gate_hidden_dim: int = 32,     # for poso gate hidden dimension
-        scale_factor: float = 2.0,     # for poso gate scale factor
+        gate_hidden_dim: int = 32,  # for poso gate hidden dimension
+        scale_factor: float = 2.0,  # for poso gate scale factor
         gate_use_softmax: bool = True,
     ) -> None:
         super().__init__()
@@ -207,15 +207,41 @@ class POSOMMoE(nn.Module):
         self.num_tasks = num_tasks
         # Experts built with framework MLP, same as standard MMoE
-        self.experts = nn.ModuleList([MLP(input_dim=input_dim, output_layer=False, dims=expert_hidden_dims, activation=activation, dropout=expert_dropout,) for _ in range(num_experts)])
-        self.expert_output_dim = expert_hidden_dims[-1] if expert_hidden_dims else input_dim
+        self.experts = nn.ModuleList(
+            [
+                MLP(
+                    input_dim=input_dim,
+                    output_layer=False,
+                    dims=expert_hidden_dims,
+                    activation=activation,
+                    dropout=expert_dropout,
+                )
+                for _ in range(num_experts)
+            ]
+        )
+        self.expert_output_dim = (
+            expert_hidden_dims[-1] if expert_hidden_dims else input_dim
+        )
         # Task-specific gates: gate_t(x) over experts
-        self.gates = nn.ModuleList([nn.Linear(input_dim, num_experts) for _ in range(num_tasks)])
+        self.gates = nn.ModuleList(
+            [nn.Linear(input_dim, num_experts) for _ in range(num_tasks)]
+        )
         self.gate_use_softmax = gate_use_softmax
         # PC gate per expert: g_e(pc) ∈ R^D
-        self.expert_pc_gates = nn.ModuleList([POSOGate(pc_dim=pc_dim, out_dim=self.expert_output_dim, hidden_dim=gate_hidden_dim, scale_factor=scale_factor, activation=activation,) for _ in range(num_experts)])
+        self.expert_pc_gates = nn.ModuleList(
+            [
+                POSOGate(
+                    pc_dim=pc_dim,
+                    out_dim=self.expert_output_dim,
+                    hidden_dim=gate_hidden_dim,
+                    scale_factor=scale_factor,
+                    activation=activation,
+                )
+                for _ in range(num_experts)
+            ]
+        )
     def forward(self, x: torch.Tensor, pc: torch.Tensor) -> list[torch.Tensor]:
         """
@@ -226,9 +252,9 @@ class POSOMMoE(nn.Module):
         # 1) Expert outputs with POSO PC gate
         masked_expert_outputs = []
         for e, expert in enumerate(self.experts):
-            h_e = expert(x)                         # (B, D)
-            g_e = self.expert_pc_gates[e](pc)       # (B, D)
-            h_e_tilde = g_e * h_e                   # (B, D)
+            h_e = expert(x)  # (B, D)
+            g_e = self.expert_pc_gates[e](pc)  # (B, D)
+            h_e_tilde = g_e * h_e  # (B, D)
             masked_expert_outputs.append(h_e_tilde)
         masked_expert_outputs = torch.stack(masked_expert_outputs, dim=1)  # (B, E, D)
@@ -236,13 +262,13 @@ class POSOMMoE(nn.Module):
         # 2) Task gates depend on x as in standard MMoE
         task_outputs: list[torch.Tensor] = []
         for t in range(self.num_tasks):
-            logits = self.gates[t](x)               # (B, E)
+            logits = self.gates[t](x)  # (B, E)
             if self.gate_use_softmax:
                 gate = F.softmax(logits, dim=1)
             else:
                 gate = logits
-            gate = gate.unsqueeze(-1)               # (B, E, 1)
+            gate = gate.unsqueeze(-1)  # (B, E, 1)
             z_t = torch.sum(gate * masked_expert_outputs, dim=1)  # (B, D)
             task_outputs.append(z_t)
@@ -312,12 +338,24 @@ class POSO(BaseModel):
         self.pc_sequence_features = list(pc_sequence_features or [])
         self.num_tasks = len(target)
-        if not self.pc_dense_features and not self.pc_sparse_features and not self.pc_sequence_features:
-            raise ValueError("POSO requires at least one PC feature for personalization.")
+        if (
+            not self.pc_dense_features
+            and not self.pc_sparse_features
+            and not self.pc_sequence_features
+        ):
+            raise ValueError(
+                "POSO requires at least one PC feature for personalization."
+            )
-        dense_features = merge_features(self.main_dense_features, self.pc_dense_features)
-        sparse_features = merge_features(self.main_sparse_features, self.pc_sparse_features)
-        sequence_features = merge_features(self.main_sequence_features, self.pc_sequence_features)
+        dense_features = merge_features(
+            self.main_dense_features, self.pc_dense_features
+        )
+        sparse_features = merge_features(
+            self.main_sparse_features, self.pc_sparse_features
+        )
+        sequence_features = merge_features(
+            self.main_sequence_features, self.pc_sequence_features
+        )
         super().__init__(
             dense_features=dense_features,
@@ -338,10 +376,18 @@ class POSO(BaseModel):
         self.num_tasks = len(target)
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
-        self.main_features = self.main_dense_features + self.main_sparse_features + self.main_sequence_features
-        self.pc_features = self.pc_dense_features + self.pc_sparse_features + self.pc_sequence_features
+        self.main_features = (
+            self.main_dense_features
+            + self.main_sparse_features
+            + self.main_sequence_features
+        )
+        self.pc_features = (
+            self.pc_dense_features + self.pc_sparse_features + self.pc_sequence_features
+        )
         self.embedding = EmbeddingLayer(features=self.all_features)
         self.main_input_dim = self.embedding.get_input_dim(self.main_features)
@@ -349,7 +395,9 @@ class POSO(BaseModel):
         self.architecture = architecture.lower()
         if self.architecture not in {"mlp", "mmoe"}:
-            raise ValueError(f"Unsupported architecture '{architecture}', choose from ['mlp', 'mmoe'].")
+            raise ValueError(
+                f"Unsupported architecture '{architecture}', choose from ['mlp', 'mmoe']."
+            )
         # Build backbones
         if self.architecture == "mlp":
@@ -358,13 +406,17 @@ class POSO(BaseModel):
             for tower_params in tower_params_list:
                 dims = tower_params.get("dims")
                 if not dims:
-                    raise ValueError("tower_params must include a non-empty 'dims' list for POSO-MLP towers.")
+                    raise ValueError(
+                        "tower_params must include a non-empty 'dims' list for POSO-MLP towers."
+                    )
                 dropout = tower_params.get("dropout", 0.0)
                 tower = POSOMLP(
                     input_dim=self.main_input_dim,
                     pc_dim=self.pc_input_dim,
                     dims=dims,
-                    gate_hidden_dim=tower_params.get("gate_hidden_dim", gate_hidden_dim),
+                    gate_hidden_dim=tower_params.get(
+                        "gate_hidden_dim", gate_hidden_dim
+                    ),
                     scale_factor=tower_params.get("scale_factor", gate_scale_factor),
                     activation=tower_params.get("activation", gate_activation),
                     use_bias=tower_params.get("use_bias", gate_use_bias),
@@ -375,7 +427,9 @@ class POSO(BaseModel):
                 self.tower_heads.append(nn.Linear(tower_output_dim, 1))
         else:
             if expert_hidden_dims is None or not expert_hidden_dims:
-                raise ValueError("expert_hidden_dims must be provided for MMoE architecture.")
+                raise ValueError(
+                    "expert_hidden_dims must be provided for MMoE architecture."
+                )
             self.mmoe = POSOMMoE(
                 input_dim=self.main_input_dim,
                 pc_dim=self.pc_input_dim,
@@ -388,12 +442,35 @@ class POSO(BaseModel):
                 scale_factor=expert_gate_scale_factor,
                 gate_use_softmax=gate_use_softmax,
             )
-            self.towers = nn.ModuleList([MLP(input_dim=self.mmoe.expert_output_dim, output_layer=True, **tower_params,) for tower_params in tower_params_list])
+            self.towers = nn.ModuleList(
+                [
+                    MLP(
+                        input_dim=self.mmoe.expert_output_dim,
+                        output_layer=True,
+                        **tower_params,
+                    )
+                    for tower_params in tower_params_list
+                ]
+            )
             self.tower_heads = None
-        self.prediction_layer = PredictionLayer(task_type=self.default_task, task_dims=[1] * self.num_tasks,)
-        include_modules = ["towers", "tower_heads"] if self.architecture == "mlp" else ["mmoe", "towers"]
-        self.register_regularization_weights(embedding_attr="embedding", include_modules=include_modules)
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.prediction_layer = PredictionLayer(
+            task_type=self.default_task,
+            task_dims=[1] * self.num_tasks,
+        )
+        include_modules = (
+            ["towers", "tower_heads"]
+            if self.architecture == "mlp"
+            else ["mmoe", "towers"]
+        )
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=include_modules
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Embed main and PC features separately so PC can gate hidden units

nextrec/models/multi_task/share_bottom.py CHANGED Viewed

@@ -56,28 +56,30 @@ class ShareBottom(BaseModel):
     def default_task(self):
         num_tasks = getattr(self, "num_tasks", None)
         if num_tasks is not None and num_tasks > 0:
-            return ['binary'] * num_tasks
-        return ['binary']
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 bottom_params: dict,
-                 tower_params_list: list[dict],
-                 target: list[str],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+            return ["binary"] * num_tasks
+        return ["binary"]
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        bottom_params: dict,
+        tower_params_list: list[dict],
+        target: list[str],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         self.num_tasks = len(target)
         super(ShareBottom, self).__init__(
@@ -91,7 +93,7 @@ class ShareBottom(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
@@ -100,7 +102,9 @@ class ShareBottom(BaseModel):
         # Number of tasks
         self.num_tasks = len(target)
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
         # Embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
@@ -108,39 +112,48 @@ class ShareBottom(BaseModel):
         # emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
         # dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
         # input_dim = emb_dim_total + dense_input_dim
         # Shared bottom network
         self.bottom = MLP(input_dim=input_dim, output_layer=False, **bottom_params)
         # Get bottom output dimension
-        if 'dims' in bottom_params and len(bottom_params['dims']) > 0:
-            bottom_output_dim = bottom_params['dims'][-1]
+        if "dims" in bottom_params and len(bottom_params["dims"]) > 0:
+            bottom_output_dim = bottom_params["dims"][-1]
         else:
             bottom_output_dim = input_dim
         # Task-specific towers
         self.towers = nn.ModuleList()
         for tower_params in tower_params_list:
             tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
             self.towers.append(tower)
-        self.prediction_layer = PredictionLayer(task_type=self.default_task, task_dims=[1] * self.num_tasks)
+        self.prediction_layer = PredictionLayer(
+            task_type=self.default_task, task_dims=[1] * self.num_tasks
+        )
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['bottom', 'towers'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=["bottom", "towers"]
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # Shared bottom
         bottom_output = self.bottom(input_flat)  # [B, bottom_dim]
         # Task-specific towers
         task_outputs = []
         for tower in self.towers:
             tower_output = tower(bottom_output)  # [B, 1]
             task_outputs.append(tower_output)
         # Stack outputs: [B, num_tasks]
         y = torch.cat(task_outputs, dim=1)
         return self.prediction_layer(y)

nextrec/models/ranking/afm.py CHANGED Viewed

@@ -40,7 +40,7 @@ import torch
 import torch.nn as nn
 from nextrec.basic.model import BaseModel
-from nextrec.basic.layers import EmbeddingLayer, LR, PredictionLayer, InputMask
+from nextrec.basic.layers import EmbeddingLayer, PredictionLayer, InputMask
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
@@ -52,25 +52,28 @@ class AFM(BaseModel):
     @property
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature] | list = [],
-                 sparse_features: list[SparseFeature] | list = [],
-                 sequence_features: list[SequenceFeature] | list = [],
-                 attention_dim: int = 32,
-                 attention_dropout: float = 0.0,
-                 target: list[str] | list = [],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4, **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | list = [],
+        sparse_features: list[SparseFeature] | list = [],
+        sequence_features: list[SequenceFeature] | list = [],
+        attention_dim: int = 32,
+        attention_dropout: float = 0.0,
+        target: list[str] | list = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(AFM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -82,7 +85,7 @@ class AFM(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         if target is None:
@@ -91,22 +94,30 @@ class AFM(BaseModel):
             optimizer_params = {}
         if loss is None:
             loss = "bce"
         self.fm_features = sparse_features + sequence_features
         if len(self.fm_features) < 2:
-            raise ValueError("AFM requires at least two sparse/sequence features to build pairwise interactions.")
+            raise ValueError(
+                "AFM requires at least two sparse/sequence features to build pairwise interactions."
+            )
         # make sure all embedding dimension are the same for FM features
         self.embedding_dim = self.fm_features[0].embedding_dim
         if any(f.embedding_dim != self.embedding_dim for f in self.fm_features):
-            raise ValueError("All FM features must share the same embedding_dim for AFM.")
+            raise ValueError(
+                "All FM features must share the same embedding_dim for AFM."
+            )
-        self.embedding = EmbeddingLayer(features=self.fm_features) # [Batch, Field, Dim ]
+        self.embedding = EmbeddingLayer(
+            features=self.fm_features
+        )  # [Batch, Field, Dim ]
         # First-order terms: dense linear + one hot embeddings
         self.dense_features = list(dense_features)
         dense_input_dim = sum([f.input_dim for f in self.dense_features])
-        self.linear_dense = nn.Linear(dense_input_dim, 1, bias=True) if dense_input_dim > 0 else None
+        self.linear_dense = (
+            nn.Linear(dense_input_dim, 1, bias=True) if dense_input_dim > 0 else None
+        )
         # First-order term: sparse/sequence features one-hot
         # **INFO**: source paper does not contain sequence features in experiments,
@@ -114,9 +125,15 @@ class AFM(BaseModel):
         # remove sequence features from fm_features.
         self.first_order_embeddings = nn.ModuleDict()
         for feature in self.fm_features:
-            if feature.embedding_name in self.first_order_embeddings: # shared embedding
+            if (
+                feature.embedding_name in self.first_order_embeddings
+            ):  # shared embedding
                 continue
-            emb = nn.Embedding(num_embeddings=feature.vocab_size, embedding_dim=1, padding_idx=feature.padding_idx) # equal to one-hot encoding weight
+            emb = nn.Embedding(
+                num_embeddings=feature.vocab_size,
+                embedding_dim=1,
+                padding_idx=feature.padding_idx,
+            )  # equal to one-hot encoding weight
             # nn.init.zeros_(emb.weight)
             self.first_order_embeddings[feature.embedding_name] = emb
@@ -129,11 +146,18 @@ class AFM(BaseModel):
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['linear_dense', 'attention_linear', 'attention_p', 'output_projection']
+            embedding_attr="embedding",
+            include_modules=[
+                "linear_dense",
+                "attention_linear",
+                "attention_p",
+                "output_projection",
+            ],
         )
         # add first-order embeddings to embedding regularization list
-        self.embedding_params.extend(emb.weight for emb in self.first_order_embeddings.values())
+        self.embedding_params.extend(
+            emb.weight for emb in self.first_order_embeddings.values()
+        )
         self.compile(
             optimizer=optimizer,
@@ -143,13 +167,17 @@ class AFM(BaseModel):
         )
     def forward(self, x):
-        field_emb = self.embedding(x=x, features=self.fm_features, squeeze_dim=False)  # [B, F, D]
+        field_emb = self.embedding(
+            x=x, features=self.fm_features, squeeze_dim=False
+        )  # [B, F, D]
         batch_size = field_emb.size(0)
         y_linear = torch.zeros(batch_size, 1, device=field_emb.device)
         # First-order dense part
         if self.linear_dense is not None:
-            dense_inputs = [x[f.name].float().view(batch_size, -1) for f in self.dense_features]
+            dense_inputs = [
+                x[f.name].float().view(batch_size, -1) for f in self.dense_features
+            ]
             dense_stack = torch.cat(dense_inputs, dim=1) if dense_inputs else None
             if dense_stack is not None:
                 y_linear = y_linear + self.linear_dense(dense_stack)
@@ -161,7 +189,7 @@ class AFM(BaseModel):
             if isinstance(feature, SparseFeature):
                 term = emb(x[feature.name].long())  # [B, 1]
             else:  # SequenceFeature
-                seq_input = x[feature.name].long() # [B, 1]
+                seq_input = x[feature.name].long()  # [B, 1]
                 if feature.max_len is not None and seq_input.size(1) > feature.max_len:
                     seq_input = seq_input[:, -feature.max_len :]
                 mask = self.input_mask(x, feature, seq_input).squeeze(1)  # [B, 1]
@@ -169,7 +197,9 @@ class AFM(BaseModel):
                 term = (seq_weight * mask).sum(dim=1, keepdim=True)  # [B, 1]
             first_order_terms.append(term)
         if first_order_terms:
-            y_linear = y_linear + torch.sum(torch.cat(first_order_terms, dim=1), dim=1, keepdim=True)
+            y_linear = y_linear + torch.sum(
+                torch.cat(first_order_terms, dim=1), dim=1, keepdim=True
+            )
         interactions = []
         feature_values = []
@@ -182,13 +212,18 @@ class AFM(BaseModel):
             else:
                 if isinstance(feature, SequenceFeature):
                     seq_input = x[feature.name].long()
-                    if feature.max_len is not None and seq_input.size(1) > feature.max_len:
+                    if (
+                        feature.max_len is not None
+                        and seq_input.size(1) > feature.max_len
+                    ):
                         seq_input = seq_input[:, -feature.max_len :]
                     value = self.input_mask(x, feature, seq_input).sum(dim=2)  # [B, 1]
                 else:
                     value = torch.ones(batch_size, 1, device=field_emb.device)
             feature_values.append(value)
-        feature_values_tensor = torch.cat(feature_values, dim=1).unsqueeze(-1)  # [B, F, 1]
+        feature_values_tensor = torch.cat(feature_values, dim=1).unsqueeze(
+            -1
+        )  # [B, F, 1]
         field_emb = field_emb * feature_values_tensor
         num_fields = field_emb.shape[1]

nextrec 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl