PyPI - nextrec - Versions diffs - 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

nextrec 0.3.6py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +244 -113
nextrec/basic/loggers.py +62 -43
nextrec/basic/metrics.py +268 -119
nextrec/basic/model.py +1373 -443
nextrec/basic/session.py +10 -3
nextrec/cli.py +498 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +42 -24
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +303 -96
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +106 -40
nextrec/models/match/dssm.py +82 -69
nextrec/models/match/dssm_v2.py +72 -58
nextrec/models/match/mind.py +175 -108
nextrec/models/match/sdm.py +104 -88
nextrec/models/match/youtube_dnn.py +73 -60
nextrec/models/multi_task/esmm.py +53 -39
nextrec/models/multi_task/mmoe.py +70 -47
nextrec/models/multi_task/ple.py +107 -50
nextrec/models/multi_task/poso.py +121 -41
nextrec/models/multi_task/share_bottom.py +54 -38
nextrec/models/ranking/afm.py +172 -45
nextrec/models/ranking/autoint.py +84 -61
nextrec/models/ranking/dcn.py +59 -42
nextrec/models/ranking/dcn_v2.py +64 -23
nextrec/models/ranking/deepfm.py +36 -26
nextrec/models/ranking/dien.py +158 -102
nextrec/models/ranking/din.py +88 -60
nextrec/models/ranking/fibinet.py +55 -35
nextrec/models/ranking/fm.py +32 -26
nextrec/models/ranking/masknet.py +95 -34
nextrec/models/ranking/pnn.py +34 -31
nextrec/models/ranking/widedeep.py +37 -29
nextrec/models/ranking/xdeepfm.py +63 -41
nextrec/utils/__init__.py +61 -32
nextrec/utils/config.py +490 -0
nextrec/utils/device.py +52 -12
nextrec/utils/distributed.py +141 -0
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +32 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +531 -0
nextrec/utils/tensor.py +24 -13
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
nextrec-0.4.2.dist-info/RECORD +69 -0
nextrec-0.4.2.dist-info/entry_points.txt +2 -0
nextrec-0.3.6.dist-info/RECORD +0 -64
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
{nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/multi_task/poso.py CHANGED Viewed

@@ -79,7 +79,7 @@ class POSOGate(nn.Module):
         h = self.act(self.fc1(pc))
         g = torch.sigmoid(self.fc2(h))  # (B, out_dim) in (0,1)
         return self.scale_factor * g
 class POSOFC(nn.Module):
     """
@@ -116,10 +116,10 @@ class POSOFC(nn.Module):
         pc: (B, pc_dim)
         return: (B, out_dim)
         """
-        h = self.act(self.linear(x))          # Standard FC with activation
-        g = self.gate(pc)                     # (B, out_dim)
-        return g * h                          # Element-wise gating
+        h = self.act(self.linear(x))  # Standard FC with activation
+        g = self.gate(pc)  # (B, out_dim)
+        return g * h  # Element-wise gating
 class POSOMLP(nn.Module):
     """
@@ -173,7 +173,7 @@ class POSOMLP(nn.Module):
             if self.dropout is not None:
                 h = self.dropout(h)
         return h
 class POSOMMoE(nn.Module):
     """
@@ -183,7 +183,7 @@ class POSOMMoE(nn.Module):
         - Task gates aggregate the PC-masked expert outputs
     Concretely:
-        h_e = expert_e(x)                 # (B, D)
+        h_e = expert_e(x)                 # (B, D)
         g_e = POSOGate(pc) in (0, C)^{D}  # (B, D)
         h_e_tilde = g_e ⊙ h_e            # (B, D)
         z_t = Σ_e gate_t,e(x) * h_e_tilde
@@ -192,14 +192,14 @@ class POSOMMoE(nn.Module):
     def __init__(
         self,
         input_dim: int,
-        pc_dim: int,                   # for poso feature dimension
+        pc_dim: int,  # for poso feature dimension
         num_experts: int,
         expert_hidden_dims: list[int],
         num_tasks: int,
         activation: str = "relu",
         expert_dropout: float = 0.0,
-        gate_hidden_dim: int = 32,     # for poso gate hidden dimension
-        scale_factor: float = 2.0,     # for poso gate scale factor
+        gate_hidden_dim: int = 32,  # for poso gate hidden dimension
+        scale_factor: float = 2.0,  # for poso gate scale factor
         gate_use_softmax: bool = True,
     ) -> None:
         super().__init__()
@@ -207,15 +207,41 @@ class POSOMMoE(nn.Module):
         self.num_tasks = num_tasks
         # Experts built with framework MLP, same as standard MMoE
-        self.experts = nn.ModuleList([MLP(input_dim=input_dim, output_layer=False, dims=expert_hidden_dims, activation=activation, dropout=expert_dropout,) for _ in range(num_experts)])
-        self.expert_output_dim = expert_hidden_dims[-1] if expert_hidden_dims else input_dim
+        self.experts = nn.ModuleList(
+            [
+                MLP(
+                    input_dim=input_dim,
+                    output_layer=False,
+                    dims=expert_hidden_dims,
+                    activation=activation,
+                    dropout=expert_dropout,
+                )
+                for _ in range(num_experts)
+            ]
+        )
+        self.expert_output_dim = (
+            expert_hidden_dims[-1] if expert_hidden_dims else input_dim
+        )
         # Task-specific gates: gate_t(x) over experts
-        self.gates = nn.ModuleList([nn.Linear(input_dim, num_experts) for _ in range(num_tasks)])
+        self.gates = nn.ModuleList(
+            [nn.Linear(input_dim, num_experts) for _ in range(num_tasks)]
+        )
         self.gate_use_softmax = gate_use_softmax
         # PC gate per expert: g_e(pc) ∈ R^D
-        self.expert_pc_gates = nn.ModuleList([POSOGate(pc_dim=pc_dim, out_dim=self.expert_output_dim, hidden_dim=gate_hidden_dim, scale_factor=scale_factor, activation=activation,) for _ in range(num_experts)])
+        self.expert_pc_gates = nn.ModuleList(
+            [
+                POSOGate(
+                    pc_dim=pc_dim,
+                    out_dim=self.expert_output_dim,
+                    hidden_dim=gate_hidden_dim,
+                    scale_factor=scale_factor,
+                    activation=activation,
+                )
+                for _ in range(num_experts)
+            ]
+        )
     def forward(self, x: torch.Tensor, pc: torch.Tensor) -> list[torch.Tensor]:
         """
@@ -226,9 +252,9 @@ class POSOMMoE(nn.Module):
         # 1) Expert outputs with POSO PC gate
         masked_expert_outputs = []
         for e, expert in enumerate(self.experts):
-            h_e = expert(x)                         # (B, D)
-            g_e = self.expert_pc_gates[e](pc)       # (B, D)
-            h_e_tilde = g_e * h_e                   # (B, D)
+            h_e = expert(x)  # (B, D)
+            g_e = self.expert_pc_gates[e](pc)  # (B, D)
+            h_e_tilde = g_e * h_e  # (B, D)
             masked_expert_outputs.append(h_e_tilde)
         masked_expert_outputs = torch.stack(masked_expert_outputs, dim=1)  # (B, E, D)
@@ -236,13 +262,13 @@ class POSOMMoE(nn.Module):
         # 2) Task gates depend on x as in standard MMoE
         task_outputs: list[torch.Tensor] = []
         for t in range(self.num_tasks):
-            logits = self.gates[t](x)               # (B, E)
+            logits = self.gates[t](x)  # (B, E)
             if self.gate_use_softmax:
                 gate = F.softmax(logits, dim=1)
             else:
                 gate = logits
-            gate = gate.unsqueeze(-1)               # (B, E, 1)
+            gate = gate.unsqueeze(-1)  # (B, E, 1)
             z_t = torch.sum(gate * masked_expert_outputs, dim=1)  # (B, D)
             task_outputs.append(z_t)
@@ -261,8 +287,11 @@ class POSO(BaseModel):
         return "POSO"
     @property
-    def task_type(self) -> list[str]:
-        return self.task if isinstance(self.task, list) else [self.task]
+    def default_task(self) -> list[str]:
+        num_tasks = getattr(self, "num_tasks", None)
+        if num_tasks is not None and num_tasks > 0:
+            return ["binary"] * num_tasks
+        return ["binary"]
     def __init__(
         self,
@@ -274,7 +303,7 @@ class POSO(BaseModel):
         pc_sequence_features: list[SequenceFeature] | None,
         tower_params_list: list[dict],
         target: list[str],
-        task: str | list[str] = "binary",
+        task: str | list[str] | None = None,
         architecture: str = "mlp",
         # POSO gating defaults
         gate_hidden_dim: int = 32,
@@ -307,26 +336,38 @@ class POSO(BaseModel):
         self.pc_dense_features = list(pc_dense_features or [])
         self.pc_sparse_features = list(pc_sparse_features or [])
         self.pc_sequence_features = list(pc_sequence_features or [])
+        self.num_tasks = len(target)
-        if not self.pc_dense_features and not self.pc_sparse_features and not self.pc_sequence_features:
-            raise ValueError("POSO requires at least one PC feature for personalization.")
+        if (
+            not self.pc_dense_features
+            and not self.pc_sparse_features
+            and not self.pc_sequence_features
+        ):
+            raise ValueError(
+                "POSO requires at least one PC feature for personalization."
+            )
-        dense_features = merge_features(self.main_dense_features, self.pc_dense_features)
-        sparse_features = merge_features(self.main_sparse_features, self.pc_sparse_features)
-        sequence_features = merge_features(self.main_sequence_features, self.pc_sequence_features)
+        dense_features = merge_features(
+            self.main_dense_features, self.pc_dense_features
+        )
+        sparse_features = merge_features(
+            self.main_sparse_features, self.pc_sparse_features
+        )
+        sequence_features = merge_features(
+            self.main_sequence_features, self.pc_sequence_features
+        )
         super().__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
             sequence_features=sequence_features,
             target=target,
-            task=task,
+            task=task or self.default_task,
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            early_stop_patience=20,
             **kwargs,
         )
@@ -335,10 +376,18 @@ class POSO(BaseModel):
         self.num_tasks = len(target)
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
-        self.main_features = self.main_dense_features + self.main_sparse_features + self.main_sequence_features
-        self.pc_features = self.pc_dense_features + self.pc_sparse_features + self.pc_sequence_features
+        self.main_features = (
+            self.main_dense_features
+            + self.main_sparse_features
+            + self.main_sequence_features
+        )
+        self.pc_features = (
+            self.pc_dense_features + self.pc_sparse_features + self.pc_sequence_features
+        )
         self.embedding = EmbeddingLayer(features=self.all_features)
         self.main_input_dim = self.embedding.get_input_dim(self.main_features)
@@ -346,7 +395,9 @@ class POSO(BaseModel):
         self.architecture = architecture.lower()
         if self.architecture not in {"mlp", "mmoe"}:
-            raise ValueError(f"Unsupported architecture '{architecture}', choose from ['mlp', 'mmoe'].")
+            raise ValueError(
+                f"Unsupported architecture '{architecture}', choose from ['mlp', 'mmoe']."
+            )
         # Build backbones
         if self.architecture == "mlp":
@@ -355,13 +406,17 @@ class POSO(BaseModel):
             for tower_params in tower_params_list:
                 dims = tower_params.get("dims")
                 if not dims:
-                    raise ValueError("tower_params must include a non-empty 'dims' list for POSO-MLP towers.")
+                    raise ValueError(
+                        "tower_params must include a non-empty 'dims' list for POSO-MLP towers."
+                    )
                 dropout = tower_params.get("dropout", 0.0)
                 tower = POSOMLP(
                     input_dim=self.main_input_dim,
                     pc_dim=self.pc_input_dim,
                     dims=dims,
-                    gate_hidden_dim=tower_params.get("gate_hidden_dim", gate_hidden_dim),
+                    gate_hidden_dim=tower_params.get(
+                        "gate_hidden_dim", gate_hidden_dim
+                    ),
                     scale_factor=tower_params.get("scale_factor", gate_scale_factor),
                     activation=tower_params.get("activation", gate_activation),
                     use_bias=tower_params.get("use_bias", gate_use_bias),
@@ -372,7 +427,9 @@ class POSO(BaseModel):
                 self.tower_heads.append(nn.Linear(tower_output_dim, 1))
         else:
             if expert_hidden_dims is None or not expert_hidden_dims:
-                raise ValueError("expert_hidden_dims must be provided for MMoE architecture.")
+                raise ValueError(
+                    "expert_hidden_dims must be provided for MMoE architecture."
+                )
             self.mmoe = POSOMMoE(
                 input_dim=self.main_input_dim,
                 pc_dim=self.pc_input_dim,
@@ -385,12 +442,35 @@ class POSO(BaseModel):
                 scale_factor=expert_gate_scale_factor,
                 gate_use_softmax=gate_use_softmax,
             )
-            self.towers = nn.ModuleList([MLP(input_dim=self.mmoe.expert_output_dim, output_layer=True, **tower_params,) for tower_params in tower_params_list])
+            self.towers = nn.ModuleList(
+                [
+                    MLP(
+                        input_dim=self.mmoe.expert_output_dim,
+                        output_layer=True,
+                        **tower_params,
+                    )
+                    for tower_params in tower_params_list
+                ]
+            )
             self.tower_heads = None
-        self.prediction_layer = PredictionLayer(task_type=self.task_type, task_dims=[1] * self.num_tasks,)
-        include_modules = ["towers", "tower_heads"] if self.architecture == "mlp" else ["mmoe", "towers"]
-        self.register_regularization_weights(embedding_attr="embedding", include_modules=include_modules)
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.prediction_layer = PredictionLayer(
+            task_type=self.default_task,
+            task_dims=[1] * self.num_tasks,
+        )
+        include_modules = (
+            ["towers", "tower_heads"]
+            if self.architecture == "mlp"
+            else ["mmoe", "towers"]
+        )
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=include_modules
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Embed main and PC features separately so PC can gate hidden units

nextrec/models/multi_task/share_bottom.py CHANGED Viewed

@@ -53,42 +53,47 @@ class ShareBottom(BaseModel):
         return "ShareBottom"
     @property
-    def task_type(self):
-        # Multi-task model, return list of task types
-        return self.task if isinstance(self.task, list) else [self.task]
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 bottom_params: dict,
-                 tower_params_list: list[dict],
-                 target: list[str],
-                 task: str | list[str] = 'binary',
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def default_task(self):
+        num_tasks = getattr(self, "num_tasks", None)
+        if num_tasks is not None and num_tasks > 0:
+            return ["binary"] * num_tasks
+        return ["binary"]
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        bottom_params: dict,
+        tower_params_list: list[dict],
+        target: list[str],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
+        self.num_tasks = len(target)
         super(ShareBottom, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
             sequence_features=sequence_features,
             target=target,
-            task=task,
+            task=task or self.default_task,
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            early_stop_patience=20,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
@@ -97,7 +102,9 @@ class ShareBottom(BaseModel):
         # Number of tasks
         self.num_tasks = len(target)
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
         # Embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
@@ -105,39 +112,48 @@ class ShareBottom(BaseModel):
         # emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
         # dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
         # input_dim = emb_dim_total + dense_input_dim
         # Shared bottom network
         self.bottom = MLP(input_dim=input_dim, output_layer=False, **bottom_params)
         # Get bottom output dimension
-        if 'dims' in bottom_params and len(bottom_params['dims']) > 0:
-            bottom_output_dim = bottom_params['dims'][-1]
+        if "dims" in bottom_params and len(bottom_params["dims"]) > 0:
+            bottom_output_dim = bottom_params["dims"][-1]
         else:
             bottom_output_dim = input_dim
         # Task-specific towers
         self.towers = nn.ModuleList()
         for tower_params in tower_params_list:
             tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
             self.towers.append(tower)
-        self.prediction_layer = PredictionLayer(task_type=self.task_type, task_dims=[1] * self.num_tasks)
+        self.prediction_layer = PredictionLayer(
+            task_type=self.default_task, task_dims=[1] * self.num_tasks
+        )
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['bottom', 'towers'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=["bottom", "towers"]
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # Shared bottom
         bottom_output = self.bottom(input_flat)  # [B, bottom_dim]
         # Task-specific towers
         task_outputs = []
         for tower in self.towers:
             tower_output = tower(bottom_output)  # [B, 1]
             task_outputs.append(tower_output)
         # Stack outputs: [B, num_tasks]
         y = torch.cat(task_outputs, dim=1)
         return self.prediction_layer(y)

nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl

nextrec 0.3.6py3-none-any.whl → 0.4.2py3-none-any.whl