PyPI - nextrec - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +220 -106
nextrec/basic/loggers.py +62 -43
nextrec/basic/metrics.py +268 -119
nextrec/basic/model.py +1082 -400
nextrec/basic/session.py +10 -3
nextrec/cli.py +498 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +51 -45
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +272 -95
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +103 -38
nextrec/models/match/dssm.py +82 -68
nextrec/models/match/dssm_v2.py +72 -57
nextrec/models/match/mind.py +175 -107
nextrec/models/match/sdm.py +104 -87
nextrec/models/match/youtube_dnn.py +73 -59
nextrec/models/multi_task/esmm.py +53 -37
nextrec/models/multi_task/mmoe.py +64 -45
nextrec/models/multi_task/ple.py +101 -48
nextrec/models/multi_task/poso.py +113 -36
nextrec/models/multi_task/share_bottom.py +48 -35
nextrec/models/ranking/afm.py +72 -37
nextrec/models/ranking/autoint.py +72 -55
nextrec/models/ranking/dcn.py +55 -35
nextrec/models/ranking/dcn_v2.py +64 -23
nextrec/models/ranking/deepfm.py +32 -22
nextrec/models/ranking/dien.py +155 -99
nextrec/models/ranking/din.py +85 -57
nextrec/models/ranking/fibinet.py +52 -32
nextrec/models/ranking/fm.py +29 -23
nextrec/models/ranking/masknet.py +91 -29
nextrec/models/ranking/pnn.py +31 -28
nextrec/models/ranking/widedeep.py +34 -26
nextrec/models/ranking/xdeepfm.py +60 -38
nextrec/utils/__init__.py +59 -34
nextrec/utils/config.py +490 -0
nextrec/utils/device.py +30 -20
nextrec/utils/distributed.py +36 -9
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +32 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +283 -165
nextrec/utils/tensor.py +24 -13
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/METADATA +4 -4
nextrec-0.4.2.dist-info/RECORD +69 -0
nextrec-0.4.2.dist-info/entry_points.txt +2 -0
nextrec-0.4.1.dist-info/RECORD +0 -66
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/multi_task/esmm.py CHANGED Viewed

@@ -52,87 +52,103 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class ESMM(BaseModel):
     """
     Entire Space Multi-Task Model
     ESMM is designed for CVR (Conversion Rate) prediction. It models two related tasks:
     - CTR task: P(click | impression)
     - CVR task: P(conversion | click)
     - CTCVR task (auxiliary): P(click & conversion | impression) = P(click) * P(conversion | click)
     This design addresses the sample selection bias and data sparsity issues in CVR modeling.
     """
     @property
     def model_name(self):
         return "ESMM"
     @property
     def default_task(self):
-        return ['binary', 'binary']
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 ctr_params: dict,
-                 cvr_params: dict,
-                 target: list[str] = ['ctr', 'ctcvr'],  # Note: ctcvr = ctr * cvr
-                 task: list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+        return ["binary", "binary"]
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        ctr_params: dict,
+        cvr_params: dict,
+        target: list[str] = ["ctr", "ctcvr"],  # Note: ctcvr = ctr * cvr
+        task: list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         # ESMM requires exactly 2 targets: ctr and ctcvr
         if len(target) != 2:
-            raise ValueError(f"ESMM requires exactly 2 targets (ctr and ctcvr), got {len(target)}")
+            raise ValueError(
+                f"ESMM requires exactly 2 targets (ctr and ctcvr), got {len(target)}"
+            )
         super(ESMM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
             sequence_features=sequence_features,
             target=target,
-            task=task or self.default_task,  # Both CTR and CTCVR are binary classification
+            task=task
+            or self.default_task,  # Both CTR and CTCVR are binary classification
             device=device,
             embedding_l1_reg=embedding_l1_reg,
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # All features
         self.all_features = dense_features + sparse_features + sequence_features
         # Shared embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
-        input_dim = self.embedding.input_dim # Calculate input dimension, better way than below
+        input_dim = (
+            self.embedding.input_dim
+        )  # Calculate input dimension, better way than below
         # emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
         # dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
         # input_dim = emb_dim_total + dense_input_dim
         # CTR tower
         self.ctr_tower = MLP(input_dim=input_dim, output_layer=True, **ctr_params)
         # CVR tower
         self.cvr_tower = MLP(input_dim=input_dim, output_layer=True, **cvr_params)
-        self.prediction_layer = PredictionLayer(task_type=self.default_task, task_dims=[1, 1])
+        self.prediction_layer = PredictionLayer(
+            task_type=self.default_task, task_dims=[1, 1]
+        )
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['ctr_tower', 'cvr_tower'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=["ctr_tower", "cvr_tower"]
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # CTR prediction: P(click | impression)
         ctr_logit = self.ctr_tower(input_flat)  # [B, 1]
         cvr_logit = self.cvr_tower(input_flat)  # [B, 1]
@@ -140,7 +156,7 @@ class ESMM(BaseModel):
         preds = self.prediction_layer(logits)
         ctr, cvr = preds.chunk(2, dim=1)
         ctcvr = ctr * cvr  # [B, 1]
         # Output: [CTR, CTCVR], We supervise CTR with click labels and CTCVR with conversion labels
         y = torch.cat([ctr, ctcvr], dim=1)  # [B, 2]
         return y  # [B, 2], where y[:, 0] is CTR and y[:, 1] is CTCVR

nextrec/models/multi_task/mmoe.py CHANGED Viewed

@@ -53,13 +53,13 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class MMOE(BaseModel):
     """
     Multi-gate Mixture-of-Experts
     MMOE improves upon shared-bottom architecture by using multiple expert networks
     and task-specific gating networks. Each task has its own gate that learns to
     weight the contributions of different experts, allowing for both task-specific
     and shared representations.
     """
     @property
     def model_name(self):
         return "MMOE"
@@ -68,29 +68,31 @@ class MMOE(BaseModel):
     def default_task(self):
         num_tasks = getattr(self, "num_tasks", None)
         if num_tasks is not None and num_tasks > 0:
-            return ['binary'] * num_tasks
-        return ['binary']
-    def __init__(self,
-                 dense_features: list[DenseFeature]=[],
-                 sparse_features: list[SparseFeature]=[],
-                 sequence_features: list[SequenceFeature]=[],
-                 expert_params: dict={},
-                 num_experts: int=3,
-                 tower_params_list: list[dict]=[],
-                 target: list[str]=[],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+            return ["binary"] * num_tasks
+        return ["binary"]
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] = [],
+        sparse_features: list[SparseFeature] = [],
+        sequence_features: list[SequenceFeature] = [],
+        expert_params: dict = {},
+        num_experts: int = 3,
+        tower_params_list: list[dict] = [],
+        target: list[str] = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         self.num_tasks = len(target)
         super(MMOE, self).__init__(
@@ -104,19 +106,21 @@ class MMOE(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # Number of tasks and experts
         self.num_tasks = len(target)
         self.num_experts = num_experts
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
         self.all_features = dense_features + sparse_features + sequence_features
         self.embedding = EmbeddingLayer(features=self.all_features)
@@ -130,54 +134,69 @@ class MMOE(BaseModel):
         for _ in range(num_experts):
             expert = MLP(input_dim=input_dim, output_layer=False, **expert_params)
             self.experts.append(expert)
         # Get expert output dimension
-        if 'dims' in expert_params and len(expert_params['dims']) > 0:
-            expert_output_dim = expert_params['dims'][-1]
+        if "dims" in expert_params and len(expert_params["dims"]) > 0:
+            expert_output_dim = expert_params["dims"][-1]
         else:
             expert_output_dim = input_dim
         # Task-specific gates
         self.gates = nn.ModuleList()
         for _ in range(self.num_tasks):
             gate = nn.Sequential(nn.Linear(input_dim, num_experts), nn.Softmax(dim=1))
             self.gates.append(gate)
         # Task-specific towers
         self.towers = nn.ModuleList()
         for tower_params in tower_params_list:
             tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
             self.towers.append(tower)
-        self.prediction_layer = PredictionLayer(task_type=self.default_task, task_dims=[1] * self.num_tasks)
+        self.prediction_layer = PredictionLayer(
+            task_type=self.default_task, task_dims=[1] * self.num_tasks
+        )
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['experts', 'gates', 'towers'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params,)
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=["experts", "gates", "towers"]
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # Expert outputs: [num_experts, B, expert_dim]
         expert_outputs = [expert(input_flat) for expert in self.experts]
-        expert_outputs = torch.stack(expert_outputs, dim=0)  # [num_experts, B, expert_dim]
+        expert_outputs = torch.stack(
+            expert_outputs, dim=0
+        )  # [num_experts, B, expert_dim]
         # Task-specific processing
         task_outputs = []
         for task_idx in range(self.num_tasks):
             # Gate weights for this task: [B, num_experts]
             gate_weights = self.gates[task_idx](input_flat)  # [B, num_experts]
             # Weighted sum of expert outputs
             # gate_weights: [B, num_experts, 1]
             # expert_outputs: [num_experts, B, expert_dim]
             gate_weights = gate_weights.unsqueeze(2)  # [B, num_experts, 1]
-            expert_outputs_t = expert_outputs.permute(1, 0, 2)  # [B, num_experts, expert_dim]
-            gated_output = torch.sum(gate_weights * expert_outputs_t, dim=1)  # [B, expert_dim]
+            expert_outputs_t = expert_outputs.permute(
+                1, 0, 2
+            )  # [B, num_experts, expert_dim]
+            gated_output = torch.sum(
+                gate_weights * expert_outputs_t, dim=1
+            )  # [B, expert_dim]
             # Tower output
             tower_output = self.towers[task_idx](gated_output)  # [B, 1]
             task_outputs.append(tower_output)
         # Stack outputs: [B, num_tasks]
         y = torch.cat(task_outputs, dim=1)
         return self.prediction_layer(y)

nextrec/models/multi_task/ple.py CHANGED Viewed

@@ -52,6 +52,7 @@ from nextrec.basic.model import BaseModel
 from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class CGCLayer(nn.Module):
     """
     CGC (Customized Gate Control) block used by PLE.
@@ -71,26 +72,61 @@ class CGCLayer(nn.Module):
         if num_tasks < 1:
             raise ValueError("num_tasks must be >= 1")
-        specific_params_list = self._normalize_specific_params(specific_expert_params, num_tasks)
+        specific_params_list = self._normalize_specific_params(
+            specific_expert_params, num_tasks
+        )
         self.output_dim = self._get_output_dim(shared_expert_params, input_dim)
-        specific_dims = [self._get_output_dim(params, input_dim) for params in specific_params_list]
+        specific_dims = [
+            self._get_output_dim(params, input_dim) for params in specific_params_list
+        ]
         dims_set = set(specific_dims + [self.output_dim])
         if len(dims_set) != 1:
-            raise ValueError(f"Shared/specific expert output dims must match, got {dims_set}")
+            raise ValueError(
+                f"Shared/specific expert output dims must match, got {dims_set}"
+            )
         # experts
-        self.shared_experts = nn.ModuleList([MLP(input_dim=input_dim, output_layer=False, **shared_expert_params,) for _ in range(num_shared_experts)])
+        self.shared_experts = nn.ModuleList(
+            [
+                MLP(
+                    input_dim=input_dim,
+                    output_layer=False,
+                    **shared_expert_params,
+                )
+                for _ in range(num_shared_experts)
+            ]
+        )
         self.specific_experts = nn.ModuleList()
         for params in specific_params_list:
-            task_experts = nn.ModuleList([MLP(input_dim=input_dim, output_layer=False, **params,) for _ in range(num_specific_experts)])
+            task_experts = nn.ModuleList(
+                [
+                    MLP(
+                        input_dim=input_dim,
+                        output_layer=False,
+                        **params,
+                    )
+                    for _ in range(num_specific_experts)
+                ]
+            )
             self.specific_experts.append(task_experts)
         # gates
         task_gate_expert_num = num_shared_experts + num_specific_experts
-        self.task_gates = nn.ModuleList([nn.Sequential(nn.Linear(input_dim, task_gate_expert_num), nn.Softmax(dim=1),) for _ in range(num_tasks)])
+        self.task_gates = nn.ModuleList(
+            [
+                nn.Sequential(
+                    nn.Linear(input_dim, task_gate_expert_num),
+                    nn.Softmax(dim=1),
+                )
+                for _ in range(num_tasks)
+            ]
+        )
         shared_gate_expert_num = num_shared_experts + num_specific_experts * num_tasks
-        self.shared_gate = nn.Sequential(nn.Linear(input_dim, shared_gate_expert_num), nn.Softmax(dim=1),)
+        self.shared_gate = nn.Sequential(
+            nn.Linear(input_dim, shared_gate_expert_num),
+            nn.Softmax(dim=1),
+        )
         self.num_tasks = num_tasks
@@ -98,7 +134,9 @@ class CGCLayer(nn.Module):
         self, task_inputs: list[torch.Tensor], shared_input: torch.Tensor
     ) -> tuple[list[torch.Tensor], torch.Tensor]:
         if len(task_inputs) != self.num_tasks:
-            raise ValueError(f"Expected {self.num_tasks} task inputs, got {len(task_inputs)}")
+            raise ValueError(
+                f"Expected {self.num_tasks} task inputs, got {len(task_inputs)}"
+            )
         shared_outputs = [expert(shared_input) for expert in self.shared_experts]
         shared_stack = torch.stack(shared_outputs, dim=0)  # [num_shared, B, D]
@@ -108,7 +146,7 @@ class CGCLayer(nn.Module):
         for task_idx in range(self.num_tasks):
             task_input = task_inputs[task_idx]
-            task_specific_outputs = [expert(task_input) for expert in self.specific_experts[task_idx]] # type: ignore
+            task_specific_outputs = [expert(task_input) for expert in self.specific_experts[task_idx]]  # type: ignore
             all_specific_for_shared.extend(task_specific_outputs)
             specific_stack = torch.stack(task_specific_outputs, dim=0)
@@ -139,7 +177,9 @@ class CGCLayer(nn.Module):
     ) -> list[dict]:
         if isinstance(params, list):
             if len(params) != num_tasks:
-                raise ValueError(f"Length of specific_expert_params ({len(params)}) must match num_tasks ({num_tasks}).")
+                raise ValueError(
+                    f"Length of specific_expert_params ({len(params)}) must match num_tasks ({num_tasks})."
+                )
             return [p.copy() for p in params]
         return [params.copy() for _ in range(num_tasks)]
@@ -147,13 +187,13 @@ class CGCLayer(nn.Module):
 class PLE(BaseModel):
     """
     Progressive Layered Extraction
     PLE is an advanced multi-task learning model that extends MMOE by introducing
     both task-specific experts and shared experts at each level. It uses a progressive
     routing mechanism where experts from level k feed into gates at level k+1.
     This design better captures task-specific and shared information progressively.
     """
     @property
     def model_name(self):
         return "PLE"
@@ -162,32 +202,34 @@ class PLE(BaseModel):
     def default_task(self):
         num_tasks = getattr(self, "num_tasks", None)
         if num_tasks is not None and num_tasks > 0:
-            return ['binary'] * num_tasks
-        return ['binary']
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 shared_expert_params: dict,
-                 specific_expert_params: dict | list[dict],
-                 num_shared_experts: int,
-                 num_specific_experts: int,
-                 num_levels: int,
-                 tower_params_list: list[dict],
-                 target: list[str],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict | None = None,
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+            return ["binary"] * num_tasks
+        return ["binary"]
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        shared_expert_params: dict,
+        specific_expert_params: dict | list[dict],
+        num_shared_experts: int,
+        num_specific_experts: int,
+        num_levels: int,
+        tower_params_list: list[dict],
+        target: list[str],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict | None = None,
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         self.num_tasks = len(target)
         super(PLE, self).__init__(
@@ -201,7 +243,7 @@ class PLE(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
@@ -215,7 +257,9 @@ class PLE(BaseModel):
         if optimizer_params is None:
             optimizer_params = {}
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
         # Embedding layer
         self.embedding = EmbeddingLayer(features=self.all_features)
@@ -224,13 +268,13 @@ class PLE(BaseModel):
         # emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
         # dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
         # input_dim = emb_dim_total + dense_input_dim
         # Get expert output dimension
-        if 'dims' in shared_expert_params and len(shared_expert_params['dims']) > 0:
-            expert_output_dim = shared_expert_params['dims'][-1]
+        if "dims" in shared_expert_params and len(shared_expert_params["dims"]) > 0:
+            expert_output_dim = shared_expert_params["dims"][-1]
         else:
             expert_output_dim = input_dim
         # Build CGC layers
         self.cgc_layers = nn.ModuleList()
         for level in range(num_levels):
@@ -245,16 +289,25 @@ class PLE(BaseModel):
             )
             self.cgc_layers.append(cgc_layer)
             expert_output_dim = cgc_layer.output_dim
         # Task-specific towers
         self.towers = nn.ModuleList()
         for tower_params in tower_params_list:
             tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
             self.towers.append(tower)
-        self.prediction_layer = PredictionLayer(task_type=self.default_task, task_dims=[1] * self.num_tasks)
+        self.prediction_layer = PredictionLayer(
+            task_type=self.default_task, task_dims=[1] * self.num_tasks
+        )
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['cgc_layers', 'towers'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=self.loss, loss_params=loss_params)
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=["cgc_layers", "towers"]
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=self.loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Get all embeddings and flatten

nextrec 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl