PyPI - nextrec - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

nextrec 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

nextrec/__init__.py +4 -4
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -9
nextrec/basic/callback.py +1 -0
nextrec/basic/dataloader.py +168 -127
nextrec/basic/features.py +24 -27
nextrec/basic/layers.py +328 -159
nextrec/basic/loggers.py +50 -37
nextrec/basic/metrics.py +255 -147
nextrec/basic/model.py +817 -462
nextrec/data/__init__.py +5 -5
nextrec/data/data_utils.py +16 -12
nextrec/data/preprocessor.py +276 -252
nextrec/loss/__init__.py +12 -12
nextrec/loss/loss_utils.py +30 -22
nextrec/loss/match_losses.py +116 -83
nextrec/models/match/__init__.py +5 -5
nextrec/models/match/dssm.py +70 -61
nextrec/models/match/dssm_v2.py +61 -51
nextrec/models/match/mind.py +89 -71
nextrec/models/match/sdm.py +93 -81
nextrec/models/match/youtube_dnn.py +62 -53
nextrec/models/multi_task/esmm.py +49 -43
nextrec/models/multi_task/mmoe.py +65 -56
nextrec/models/multi_task/ple.py +92 -65
nextrec/models/multi_task/share_bottom.py +48 -42
nextrec/models/ranking/__init__.py +7 -7
nextrec/models/ranking/afm.py +39 -30
nextrec/models/ranking/autoint.py +70 -57
nextrec/models/ranking/dcn.py +43 -35
nextrec/models/ranking/deepfm.py +34 -28
nextrec/models/ranking/dien.py +115 -79
nextrec/models/ranking/din.py +84 -60
nextrec/models/ranking/fibinet.py +51 -35
nextrec/models/ranking/fm.py +28 -26
nextrec/models/ranking/masknet.py +31 -31
nextrec/models/ranking/pnn.py +30 -31
nextrec/models/ranking/widedeep.py +36 -31
nextrec/models/ranking/xdeepfm.py +46 -39
nextrec/utils/__init__.py +9 -9
nextrec/utils/embedding.py +1 -1
nextrec/utils/initializer.py +23 -15
nextrec/utils/optimizer.py +14 -10
{nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
nextrec-0.1.2.dist-info/RECORD +51 -0
nextrec-0.1.1.dist-info/RECORD +0 -51
{nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
{nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/multi_task/ple.py CHANGED Viewed

@@ -17,13 +17,13 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class PLE(BaseModel):
     """
     Progressive Layered Extraction
     PLE is an advanced multi-task learning model that extends MMOE by introducing
     both task-specific experts and shared experts at each level. It uses a progressive
     routing mechanism where experts from level k feed into gates at level k+1.
     This design better captures task-specific and shared information progressively.
     """
     @property
     def model_name(self):
         return "PLE"
@@ -31,29 +31,31 @@ class PLE(BaseModel):
     @property
     def task_type(self):
         return self.task if isinstance(self.task, list) else [self.task]
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 shared_expert_params: dict,
-                 specific_expert_params: dict,
-                 num_shared_experts: int,
-                 num_specific_experts: int,
-                 num_levels: int,
-                 tower_params_list: list[dict],
-                 target: list[str],
-                 task: str | list[str] = 'binary',
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 device: str = 'cpu',
-                 model_id: str = "baseline",
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        shared_expert_params: dict,
+        specific_expert_params: dict,
+        num_shared_experts: int,
+        num_specific_experts: int,
+        num_levels: int,
+        tower_params_list: list[dict],
+        target: list[str],
+        task: str | list[str] = "binary",
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        device: str = "cpu",
+        model_id: str = "baseline",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+    ):
         super(PLE, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -66,13 +68,13 @@ class PLE(BaseModel):
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
             early_stop_patience=20,
-            model_id=model_id
+            model_id=model_id,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # Number of tasks, experts, and levels
         self.num_tasks = len(target)
         self.num_shared_experts = num_shared_experts
@@ -80,10 +82,12 @@ class PLE(BaseModel):
         self.num_levels = num_levels
         if optimizer_params is None:
             optimizer_params = {}
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
         # All features
         self.all_features = dense_features + sparse_features + sequence_features
@@ -91,42 +95,60 @@ class PLE(BaseModel):
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
-        emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
-        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        emb_dim_total = sum(
+            [
+                f.embedding_dim
+                for f in self.all_features
+                if not isinstance(f, DenseFeature)
+            ]
+        )
+        dense_input_dim = sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         input_dim = emb_dim_total + dense_input_dim
         # Get expert output dimension
-        if 'dims' in shared_expert_params and len(shared_expert_params['dims']) > 0:
-            expert_output_dim = shared_expert_params['dims'][-1]
+        if "dims" in shared_expert_params and len(shared_expert_params["dims"]) > 0:
+            expert_output_dim = shared_expert_params["dims"][-1]
         else:
             expert_output_dim = input_dim
         # Build extraction layers (CGC layers)
         self.shared_experts_layers = nn.ModuleList()  # [num_levels]
         self.specific_experts_layers = nn.ModuleList()  # [num_levels, num_tasks]
-        self.gates_layers = nn.ModuleList()  # [num_levels, num_tasks + 1] (+1 for shared gate)
+        self.gates_layers = (
+            nn.ModuleList()
+        )  # [num_levels, num_tasks + 1] (+1 for shared gate)
         for level in range(num_levels):
             # Input dimension for this level
             level_input_dim = input_dim if level == 0 else expert_output_dim
             # Shared experts for this level
             shared_experts = nn.ModuleList()
             for _ in range(num_shared_experts):
-                expert = MLP(input_dim=level_input_dim, output_layer=False, **shared_expert_params)
+                expert = MLP(
+                    input_dim=level_input_dim,
+                    output_layer=False,
+                    **shared_expert_params,
+                )
                 shared_experts.append(expert)
             self.shared_experts_layers.append(shared_experts)
             # Task-specific experts for this level
             specific_experts_for_tasks = nn.ModuleList()
             for _ in range(self.num_tasks):
                 task_experts = nn.ModuleList()
                 for _ in range(num_specific_experts):
-                    expert = MLP(input_dim=level_input_dim, output_layer=False, **specific_expert_params)
+                    expert = MLP(
+                        input_dim=level_input_dim,
+                        output_layer=False,
+                        **specific_expert_params,
+                    )
                     task_experts.append(expert)
                 specific_experts_for_tasks.append(task_experts)
             self.specific_experts_layers.append(specific_experts_for_tasks)
             # Gates for this level (num_tasks task gates + 1 shared gate)
             gates = nn.ModuleList()
             # Task-specific gates
@@ -134,40 +156,42 @@ class PLE(BaseModel):
             for _ in range(self.num_tasks):
                 gate = nn.Sequential(
                     nn.Linear(level_input_dim, num_experts_for_task_gate),
-                    nn.Softmax(dim=1)
+                    nn.Softmax(dim=1),
                 )
                 gates.append(gate)
             # Shared gate: contains all tasks' specific experts + shared experts
             # expert counts = num_shared_experts + num_specific_experts * num_tasks
-            num_experts_for_shared_gate = num_shared_experts + num_specific_experts * self.num_tasks
+            num_experts_for_shared_gate = (
+                num_shared_experts + num_specific_experts * self.num_tasks
+            )
             shared_gate = nn.Sequential(
                 nn.Linear(level_input_dim, num_experts_for_shared_gate),
-                nn.Softmax(dim=1)
+                nn.Softmax(dim=1),
             )
             gates.append(shared_gate)
             self.gates_layers.append(gates)
         # Task-specific towers
         self.towers = nn.ModuleList()
         for tower_params in tower_params_list:
             tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
             self.towers.append(tower)
         self.prediction_layer = PredictionLayer(
-            task_type=self.task_type,
-            task_dims=[1] * self.num_tasks
+            task_type=self.task_type, task_dims=[1] * self.num_tasks
         )
         # Register regularization weights
         self._register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['shared_experts_layers', 'specific_experts_layers', 'gates_layers', 'towers']
+            embedding_attr="embedding",
+            include_modules=[
+                "shared_experts_layers",
+                "specific_experts_layers",
+                "gates_layers",
+                "towers",
+            ],
         )
-        self.compile(
-            optimizer=optimizer,
-            optimizer_params=optimizer_params,
-            loss=loss
-        )
+        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
     def forward(self, x):
         # Get all embeddings and flatten
@@ -179,13 +203,17 @@ class PLE(BaseModel):
         # Progressive Layered Extraction: CGC
         for level in range(self.num_levels):
-            shared_experts = self.shared_experts_layers[level]      # ModuleList[num_shared_experts]
-            specific_experts = self.specific_experts_layers[level]  # ModuleList[num_tasks][num_specific_experts]
-            gates = self.gates_layers[level]                        # ModuleList[num_tasks + 1]
+            shared_experts = self.shared_experts_layers[
+                level
+            ]  # ModuleList[num_shared_experts]
+            specific_experts = self.specific_experts_layers[
+                level
+            ]  # ModuleList[num_tasks][num_specific_experts]
+            gates = self.gates_layers[level]  # ModuleList[num_tasks + 1]
             # Compute shared experts output for this level
             # shared_expert_list: List[Tensor[B, expert_dim]]
-            shared_expert_list = [expert(shared_fea) for expert in shared_experts] # type: ignore[list-item]
+            shared_expert_list = [expert(shared_fea) for expert in shared_experts]  # type: ignore[list-item]
             # [num_shared_experts, B, expert_dim]
             shared_expert_outputs = torch.stack(shared_expert_list, dim=0)
@@ -198,7 +226,7 @@ class PLE(BaseModel):
                 current_task_in = task_fea[task_idx]
                 # Specific task experts for this task
-                task_expert_modules = specific_experts[task_idx] # type: ignore
+                task_expert_modules = specific_experts[task_idx]  # type: ignore
                 # Specific task expert output list List[Tensor[B, expert_dim]]
                 task_specific_list = []
@@ -214,8 +242,7 @@ class PLE(BaseModel):
                 # Input for gate: shared_experts + own specific task experts
                 # [num_shared + num_specific, B, expert_dim]
                 all_expert_outputs = torch.cat(
-                    [shared_expert_outputs, task_specific_outputs],
-                    dim=0
+                    [shared_expert_outputs, task_specific_outputs], dim=0
                 )
                 # [B, num_experts, expert_dim]
                 all_expert_outputs_t = all_expert_outputs.permute(1, 0, 2)
@@ -239,7 +266,7 @@ class PLE(BaseModel):
             all_for_shared = torch.stack(all_for_shared_list, dim=1)
             # [B, num_all_experts]
-            shared_gate_weights = gates[self.num_tasks](shared_fea) # type: ignore
+            shared_gate_weights = gates[self.num_tasks](shared_fea)  # type: ignore
             # [B, 1, num_all_experts]
             shared_gate_weights = shared_gate_weights.unsqueeze(1)
@@ -257,4 +284,4 @@ class PLE(BaseModel):
         # [B, num_tasks]
         y = torch.cat(task_outputs, dim=1)
-        return self.prediction_layer(y)
+        return self.prediction_layer(y)

nextrec/models/multi_task/share_bottom.py CHANGED Viewed

@@ -23,25 +23,27 @@ class ShareBottom(BaseModel):
     def task_type(self):
         # Multi-task model, return list of task types
         return self.task if isinstance(self.task, list) else [self.task]
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 bottom_params: dict,
-                 tower_params_list: list[dict],
-                 target: list[str],
-                 task: str | list[str] = 'binary',
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | list[str | nn.Module] | None = "bce",
-                 device: str = 'cpu',
-                 model_id: str = "baseline",
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        bottom_params: dict,
+        tower_params_list: list[dict],
+        target: list[str],
+        task: str | list[str] = "binary",
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | list[str | nn.Module] | None = "bce",
+        device: str = "cpu",
+        model_id: str = "baseline",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+    ):
         super(ShareBottom, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -54,18 +56,20 @@ class ShareBottom(BaseModel):
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
             early_stop_patience=20,
-            model_id=model_id
+            model_id=model_id,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # Number of tasks
         self.num_tasks = len(target)
         if len(tower_params_list) != self.num_tasks:
-            raise ValueError(f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})")
+            raise ValueError(
+                f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
+            )
         # All features
         self.all_features = dense_features + sparse_features + sequence_features
@@ -73,54 +77,56 @@ class ShareBottom(BaseModel):
         self.embedding = EmbeddingLayer(features=self.all_features)
         # Calculate input dimension
-        emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
-        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        emb_dim_total = sum(
+            [
+                f.embedding_dim
+                for f in self.all_features
+                if not isinstance(f, DenseFeature)
+            ]
+        )
+        dense_input_dim = sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         input_dim = emb_dim_total + dense_input_dim
         # Shared bottom network
         self.bottom = MLP(input_dim=input_dim, output_layer=False, **bottom_params)
         # Get bottom output dimension
-        if 'dims' in bottom_params and len(bottom_params['dims']) > 0:
-            bottom_output_dim = bottom_params['dims'][-1]
+        if "dims" in bottom_params and len(bottom_params["dims"]) > 0:
+            bottom_output_dim = bottom_params["dims"][-1]
         else:
             bottom_output_dim = input_dim
         # Task-specific towers
         self.towers = nn.ModuleList()
         for tower_params in tower_params_list:
             tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
             self.towers.append(tower)
         self.prediction_layer = PredictionLayer(
-            task_type=self.task_type,
-            task_dims=[1] * self.num_tasks
+            task_type=self.task_type, task_dims=[1] * self.num_tasks
         )
         # Register regularization weights
         self._register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['bottom', 'towers']
+            embedding_attr="embedding", include_modules=["bottom", "towers"]
         )
-        self.compile(
-            optimizer=optimizer,
-            optimizer_params=optimizer_params,
-            loss=loss
-        )
+        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
     def forward(self, x):
         # Get all embeddings and flatten
         input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
         # Shared bottom
         bottom_output = self.bottom(input_flat)  # [B, bottom_dim]
         # Task-specific towers
         task_outputs = []
         for tower in self.towers:
             tower_output = tower(bottom_output)  # [B, 1]
             task_outputs.append(tower_output)
         # Stack outputs: [B, num_tasks]
         y = torch.cat(task_outputs, dim=1)
         return self.prediction_layer(y)

nextrec/models/ranking/__init__.py CHANGED Viewed

@@ -7,11 +7,11 @@ from .din import DIN
 from .dien import DIEN
 __all__ = [
-    'DeepFM',
-    'AutoInt',
-    'WideDeep',
-    'xDeepFM',
-    'DCN',
-    'DIN',
-    'DIEN',
+    "DeepFM",
+    "AutoInt",
+    "WideDeep",
+    "xDeepFM",
+    "DCN",
+    "DIN",
+    "DIEN",
 ]

nextrec/models/ranking/afm.py CHANGED Viewed

@@ -23,24 +23,26 @@ class AFM(BaseModel):
     @property
     def task_type(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature] | list = [],
-                 sparse_features: list[SparseFeature] | list = [],
-                 sequence_features: list[SequenceFeature] | list = [],
-                 attention_dim: int = 32,
-                 attention_dropout: float = 0.0,
-                 target: list[str] | list = [],
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 device: str = 'cpu',
-                 model_id: str = "baseline",
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | list = [],
+        sparse_features: list[SparseFeature] | list = [],
+        sequence_features: list[SequenceFeature] | list = [],
+        attention_dim: int = 32,
+        attention_dropout: float = 0.0,
+        target: list[str] | list = [],
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        device: str = "cpu",
+        model_id: str = "baseline",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+    ):
         super(AFM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -53,21 +55,25 @@ class AFM(BaseModel):
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
             early_stop_patience=20,
-            model_id=model_id
+            model_id=model_id,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         self.fm_features = sparse_features + sequence_features
         if len(self.fm_features) < 2:
-            raise ValueError("AFM requires at least two sparse/sequence features to build pairwise interactions.")
+            raise ValueError(
+                "AFM requires at least two sparse/sequence features to build pairwise interactions."
+            )
         # Assume uniform embedding dimension across FM fields
         self.embedding_dim = self.fm_features[0].embedding_dim
         if any(f.embedding_dim != self.embedding_dim for f in self.fm_features):
-            raise ValueError("All FM features must share the same embedding_dim for AFM.")
+            raise ValueError(
+                "All FM features must share the same embedding_dim for AFM."
+            )
         self.embedding = EmbeddingLayer(features=self.fm_features)
@@ -82,18 +88,21 @@ class AFM(BaseModel):
         # Register regularization weights
         self._register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['linear', 'attention_linear', 'attention_p', 'output_projection']
+            embedding_attr="embedding",
+            include_modules=[
+                "linear",
+                "attention_linear",
+                "attention_p",
+                "output_projection",
+            ],
         )
-        self.compile(
-            optimizer=optimizer,
-            optimizer_params=optimizer_params,
-            loss=loss
-        )
+        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
     def forward(self, x):
-        field_emb = self.embedding(x=x, features=self.fm_features, squeeze_dim=False)  # [B, F, D]
+        field_emb = self.embedding(
+            x=x, features=self.fm_features, squeeze_dim=False
+        )  # [B, F, D]
         input_linear = field_emb.flatten(start_dim=1)
         y_linear = self.linear(input_linear)

nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

nextrec 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl