PyPI - nextrec - Versions diffs - 0.2.6__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

nextrec 0.2.6py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

nextrec/__version__.py +1 -1
nextrec/basic/activation.py +4 -8
nextrec/basic/callback.py +1 -1
nextrec/basic/features.py +33 -25
nextrec/basic/layers.py +164 -601
nextrec/basic/loggers.py +3 -4
nextrec/basic/metrics.py +39 -115
nextrec/basic/model.py +248 -174
nextrec/basic/session.py +1 -5
nextrec/data/__init__.py +12 -0
nextrec/data/data_utils.py +3 -27
nextrec/data/dataloader.py +26 -34
nextrec/data/preprocessor.py +2 -1
nextrec/loss/listwise.py +6 -4
nextrec/loss/loss_utils.py +10 -6
nextrec/loss/pairwise.py +5 -3
nextrec/loss/pointwise.py +7 -13
nextrec/models/match/mind.py +110 -1
nextrec/models/multi_task/esmm.py +46 -27
nextrec/models/multi_task/mmoe.py +48 -30
nextrec/models/multi_task/ple.py +156 -141
nextrec/models/multi_task/poso.py +413 -0
nextrec/models/multi_task/share_bottom.py +43 -26
nextrec/models/ranking/__init__.py +2 -0
nextrec/models/ranking/autoint.py +1 -1
nextrec/models/ranking/dcn.py +20 -1
nextrec/models/ranking/dcn_v2.py +84 -0
nextrec/models/ranking/deepfm.py +44 -18
nextrec/models/ranking/dien.py +130 -27
nextrec/models/ranking/masknet.py +13 -67
nextrec/models/ranking/widedeep.py +39 -18
nextrec/models/ranking/xdeepfm.py +34 -1
nextrec/utils/common.py +26 -1
nextrec-0.3.1.dist-info/METADATA +306 -0
nextrec-0.3.1.dist-info/RECORD +56 -0
{nextrec-0.2.6.dist-info → nextrec-0.3.1.dist-info}/WHEEL +1 -1
nextrec-0.2.6.dist-info/METADATA +0 -281
nextrec-0.2.6.dist-info/RECORD +0 -54
{nextrec-0.2.6.dist-info → nextrec-0.3.1.dist-info}/licenses/LICENSE +0 -0

nextrec/basic/session.py CHANGED Viewed

@@ -13,8 +13,6 @@ Date: create on 23/11/2025
 Author: Yang Zhou,zyaztec@gmail.com
 """
-from __future__ import annotations
 import os
 import tempfile
 from dataclasses import dataclass
@@ -95,7 +93,7 @@ def create_session(experiment_id: str | Path | None = None) -> Session:
     return Session(experiment_id=exp_id, root=root)
 def resolve_save_path(
-    path: str | Path | None,
+    path: str | os.PathLike | Path | None,
     default_dir: str | Path,
     default_name: str,
     suffix: str,
@@ -146,5 +144,3 @@ def resolve_save_path(
         file_stem = f"{file_stem}_{timestamp}"
     return (base_dir / f"{file_stem}{normalized_suffix}").resolve()

nextrec/data/__init__.py CHANGED Viewed

@@ -20,6 +20,13 @@ from nextrec.data.data_utils import (
 )
 from nextrec.basic.features import FeatureSpecMixin
 from nextrec.data import data_utils
+from nextrec.data.dataloader import (
+    TensorDictDataset,
+    FileDataset,
+    RecDataLoader,
+    build_tensors_from_data,
+)
+from nextrec.data.preprocessor import DataProcessor
 __all__ = [
     'collate_fn',
@@ -33,4 +40,9 @@ __all__ = [
     'load_dataframes',
     'FeatureSpecMixin',
     'data_utils',
+    'TensorDictDataset',
+    'FileDataset',
+    'RecDataLoader',
+    'build_tensors_from_data',
+    'DataProcessor',
 ]

nextrec/data/data_utils.py CHANGED Viewed

@@ -6,19 +6,17 @@ import pandas as pd
 import pyarrow.parquet as pq
 from pathlib import Path
 def _stack_section(batch: list[dict], section: str):
     """Stack one section of the batch (features/labels/ids)."""
     entries = [item.get(section) for item in batch if item.get(section) is not None]
     if not entries:
         return None
     merged: dict = {}
-    for name in entries[0]:
+    for name in entries[0]: # type: ignore
         tensors = [item[section][name] for item in batch if item.get(section) is not None and name in item[section]]
         merged[name] = torch.stack(tensors, dim=0)
     return merged
 def collate_fn(batch):
     """
     Collate a list of sample dicts into the unified batch format:
@@ -66,7 +64,6 @@ def collate_fn(batch):
         result.append(stacked)
     return tuple(result)
 def get_column_data(data: dict | pd.DataFrame, name: str):
     """Extract column data from various data structures."""
     if isinstance(data, dict):
@@ -80,7 +77,6 @@ def get_column_data(data: dict | pd.DataFrame, name: str):
             return getattr(data, name)
         raise KeyError(f"Unsupported data type for extracting column {name}")
 def resolve_file_paths(path: str) -> tuple[list[str], str]:
     """Resolve file or directory path into a sorted list of files and file type."""
     path_obj = Path(path)
@@ -106,7 +102,6 @@ def resolve_file_paths(path: str) -> tuple[list[str], str]:
     raise ValueError(f"Invalid path: {path}")
 def iter_file_chunks(file_path: str, file_type: str, chunk_size: int):
     """Yield DataFrame chunks for CSV/Parquet without loading the whole file."""
     if file_type == "csv":
@@ -116,19 +111,16 @@ def iter_file_chunks(file_path: str, file_type: str, chunk_size: int):
     for batch in parquet_file.iter_batches(batch_size=chunk_size):
         yield batch.to_pandas()
 def read_table(file_path: str, file_type: str) -> pd.DataFrame:
     """Read a single CSV/Parquet file."""
     if file_type == "csv":
         return pd.read_csv(file_path)
     return pd.read_parquet(file_path)
 def load_dataframes(file_paths: list[str], file_type: str) -> list[pd.DataFrame]:
     """Load multiple files of the same type into DataFrames."""
     return [read_table(fp, file_type) for fp in file_paths]
 def default_output_dir(path: str) -> Path:
     """Generate a default output directory path based on the input path."""
     path_obj = Path(path)
@@ -136,19 +128,16 @@ def default_output_dir(path: str) -> Path:
         return path_obj.parent / f"{path_obj.stem}_preprocessed"
     return path_obj.with_name(f"{path_obj.name}_preprocessed")
-def split_dict_random(data_dict: dict, test_size: float=0.2, random_state:int|None=None):
+def split_dict_random(data_dict: dict, test_size: float = 0.2, random_state: int | None = None):
     """Randomly split a dictionary of data into training and testing sets."""
     lengths = [len(v) for v in data_dict.values()]
     if len(set(lengths)) != 1:
         raise ValueError(f"Length mismatch: {lengths}")
     n = lengths[0]
     rng = np.random.default_rng(random_state)
     perm = rng.permutation(n)
     cut = int(round(n * (1 - test_size)))
     train_idx, test_idx = perm[:cut], perm[cut:]
     def take(v, idx):
         if isinstance(v, np.ndarray):
             return v[idx]
@@ -157,12 +146,10 @@ def split_dict_random(data_dict: dict, test_size: float=0.2, random_state:int|No
         else:
             v_arr = np.asarray(v, dtype=object)
             return v_arr[idx]
     train_dict = {k: take(v, train_idx) for k, v in data_dict.items()}
     test_dict  = {k: take(v, test_idx)  for k, v in data_dict.items()}
     return train_dict, test_dict
 def build_eval_candidates(
     df_all: pd.DataFrame,
     user_col: str,
@@ -179,37 +166,26 @@ def build_eval_candidates(
     users = df_all[user_col].unique()
     all_items = item_features[item_col].unique()
     rows = []
-    user_hist_items = {
-        u: df_all[df_all[user_col] == u][item_col].unique()
-        for u in users
-    }
+    user_hist_items = {u: df_all[df_all[user_col] == u][item_col].unique() for u in users}
     for u in users:
         df_user = df_all[df_all[user_col] == u]
         pos_items = df_user[df_user[label_col] == 1][item_col].unique()
         if len(pos_items) == 0:
             continue
         pos_items = pos_items[:num_pos_per_user]
         seen_items = set(user_hist_items[u])
         neg_pool = np.setdiff1d(all_items, np.fromiter(seen_items, dtype=all_items.dtype))
         if len(neg_pool) == 0:
             continue
         for pos in pos_items:
             if len(neg_pool) <= num_neg_per_pos:
                 neg_items = neg_pool
             else:
                 neg_items = rng.choice(neg_pool, size=num_neg_per_pos, replace=False)
             rows.append((u, pos, 1))
             for ni in neg_items:
                 rows.append((u, ni, 0))
     eval_df = pd.DataFrame(rows, columns=[user_col, item_col, label_col])
     eval_df = eval_df.merge(user_features, on=user_col, how='left')
     eval_df = eval_df.merge(item_features, on=item_col, how='left')

nextrec/data/dataloader.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Dataloader definitions
 Date: create on 27/10/2025
-Update: 25/11/2025
+Checkpoint: edit on 29/11/2025
 Author: Yang Zhou,zyaztec@gmail.com
 """
 import os
@@ -14,7 +14,7 @@ import pandas as pd
 import pyarrow.parquet as pq
 from pathlib import Path
-from typing import Iterator, Literal, Union, Optional
+from typing import cast
 from torch.utils.data import DataLoader, Dataset, IterableDataset
 from nextrec.data.preprocessor import DataProcessor
@@ -35,15 +35,14 @@ class TensorDictDataset(Dataset):
         self.labels = tensors.get("labels")
         self.ids = tensors.get("ids")
         if not self.features:
-            raise ValueError("Dataset requires at least one feature tensor.")
+            raise ValueError("[TensorDictDataset Error] Dataset requires at least one feature tensor.")
         lengths = [tensor.shape[0] for tensor in self.features.values()]
         if not lengths:
-            raise ValueError("Feature tensors are empty.")
+            raise ValueError("[TensorDictDataset Error] Feature tensors are empty.")
         self.length = lengths[0]
         for length in lengths[1:]:
             if length != self.length:
-                raise ValueError("All feature tensors must have the same length.")
+                raise ValueError("[TensorDictDataset Error] All feature tensors must have the same length.")
     def __len__(self) -> int:
         return self.length
@@ -53,7 +52,6 @@ class TensorDictDataset(Dataset):
         sample_ids = {name: tensor[idx] for name, tensor in self.ids.items()} if self.ids else None
         return {"features": sample_features, "labels": sample_labels, "ids": sample_ids}
 class FileDataset(FeatureSpecMixin, IterableDataset):
     def __init__(self,
                  file_paths: list[str],                      # file paths to read, containing CSV or Parquet files
@@ -109,18 +107,14 @@ class FileDataset(FeatureSpecMixin, IterableDataset):
     def _dataframe_to_tensors(self, df: pd.DataFrame) -> dict | None:
         if self.processor is not None:
             if not self.processor.is_fitted:
-                raise ValueError("DataProcessor must be fitted before using in streaming mode")
+                raise ValueError("[DataLoader Error] DataProcessor must be fitted before using in streaming mode")
             transformed_data = self.processor.transform(df, return_dict=True)
         else:
             transformed_data = df
-        batch = build_tensors_from_data(
-            data=transformed_data,
-            raw_data=df,
-            features=self.all_features,
-            target_columns=self.target_columns,
-            id_columns=self.id_columns,
-        )
+        if isinstance(transformed_data, list):
+            raise TypeError("[DataLoader Error] DataProcessor.transform returned file paths; use return_dict=True with in-memory data for streaming.")
+        safe_data = cast(dict | pd.DataFrame, transformed_data)
+        batch = build_tensors_from_data(data=safe_data, raw_data=df, features=self.all_features, target_columns=self.target_columns, id_columns=self.id_columns)
         if batch is not None:
             batch["_already_batched"] = True
         return batch
@@ -133,12 +127,12 @@ class RecDataLoader(FeatureSpecMixin):
                  sequence_features: list[SequenceFeature] | None = None,
                  target: list[str] | None | str = None,
                  id_columns: str | list[str] | None = None,
-                 processor: Optional['DataProcessor'] = None):
+                 processor: DataProcessor | None = None):
         self.processor = processor
         self._set_feature_config(dense_features, sparse_features, sequence_features, target, id_columns)
     def create_dataloader(self,
-                         data: Union[dict, pd.DataFrame, str, DataLoader],
+                         data: dict | pd.DataFrame | str | DataLoader,
                          batch_size: int = 32,
                          shuffle: bool = True,
                          load_full: bool = True,
@@ -150,21 +144,21 @@ class RecDataLoader(FeatureSpecMixin):
         elif isinstance(data, (dict, pd.DataFrame)):
             return self._create_from_memory(data=data, batch_size=batch_size, shuffle=shuffle)
         else:
-            raise ValueError(f"Unsupported data type: {type(data)}")
+            raise ValueError(f"[RecDataLoader Error] Unsupported data type: {type(data)}")
     def _create_from_memory(self,
-                           data: Union[dict, pd.DataFrame],
+                           data: dict | pd.DataFrame,
                            batch_size: int,
                            shuffle: bool) -> DataLoader:
         raw_data = data
         if self.processor is not None:
             if not self.processor.is_fitted:
-                raise ValueError("DataProcessor must be fitted before transforming data in memory")
-            data = self.processor.transform(data, return_dict=True)
+                raise ValueError("[RecDataLoader Error] DataProcessor must be fitted before transforming data in memory")
+            data = self.processor.transform(data, return_dict=True) # type: ignore
         tensors = build_tensors_from_data(data=data,raw_data=raw_data, features=self.all_features, target_columns=self.target_columns, id_columns=self.id_columns,)
         if tensors is None:
-            raise ValueError("No valid tensors could be built from the provided data.")
+            raise ValueError("[RecDataLoader Error] No valid tensors could be built from the provided data.")
         dataset = TensorDictDataset(tensors)
         return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
@@ -188,11 +182,11 @@ class RecDataLoader(FeatureSpecMixin):
                     df = read_table(file_path, file_type)
                     dfs.append(df)
                 except MemoryError as exc:
-                    raise MemoryError(f"Out of memory while reading {file_path}. Consider using load_full=False with streaming.") from exc
+                    raise MemoryError(f"[RecDataLoader Error] Out of memory while reading {file_path}. Consider using load_full=False with streaming.") from exc
             try:
                 combined_df = pd.concat(dfs, ignore_index=True)
             except MemoryError as exc:
-                raise MemoryError(f"Out of memory while concatenating loaded data (approx {total_bytes / (1024**3):.2f} GB). Use load_full=False to stream or reduce chunk_size.") from exc
+                raise MemoryError(f"[RecDataLoader Error] Out of memory while concatenating loaded data (approx {total_bytes / (1024**3):.2f} GB). Use load_full=False to stream or reduce chunk_size.") from exc
             return self._create_from_memory(combined_df, batch_size, shuffle,)
         else:
             return self._load_files_streaming(file_paths, file_type, batch_size, chunk_size, shuffle)
@@ -204,9 +198,9 @@ class RecDataLoader(FeatureSpecMixin):
                              chunk_size: int,
                              shuffle: bool) -> DataLoader:
         if shuffle:
-            logging.warning("Shuffle is ignored in streaming mode (IterableDataset).")
+            logging.warning("[RecDataLoader Warning] Shuffle is ignored in streaming mode (IterableDataset).")
         if batch_size != 1:
-            logging.warning("Streaming mode enforces batch_size=1; tune chunk_size to control memory/throughput.")
+            logging.warning("[RecDataLoader Warning] Streaming mode enforces batch_size=1; tune chunk_size to control memory/throughput.")
         dataset = FileDataset(
             file_paths=file_paths,
             dense_features=self.dense_features,
@@ -230,22 +224,20 @@ def _normalize_sequence_column(column, feature: SequenceFeature) -> np.ndarray:
     if column.ndim == 0:
         column = column.reshape(1)
     if column.dtype == object and any(isinstance(v, str) for v in column.ravel()):
-        raise TypeError(f"Sequence feature '{feature.name}' expects numeric sequences; found string values.")
+        raise TypeError(f"[RecDataLoader Error] Sequence feature '{feature.name}' expects numeric sequences; found string values.")
     if column.dtype == object and len(column) > 0 and isinstance(column[0], (list, tuple, np.ndarray)):
         sequences = []
         for seq in column:
             if isinstance(seq, str):
-                raise TypeError(f"Sequence feature '{feature.name}' expects numeric sequences; found string values.")
+                raise TypeError(f"[RecDataLoader Error] Sequence feature '{feature.name}' expects numeric sequences; found string values.")
             if isinstance(seq, (list, tuple, np.ndarray)):
                 arr = np.asarray(seq, dtype=np.int64)
             else:
                 arr = np.asarray([seq], dtype=np.int64)
             sequences.append(arr)
         max_len = getattr(feature, "max_len", 0)
         if max_len <= 0:
             max_len = max((len(seq) for seq in sequences), default=1)
         pad_value = getattr(feature, "padding_idx", 0)
         padded = []
         for seq in sequences:
@@ -270,7 +262,7 @@ def build_tensors_from_data(  # noqa: C901
     for feature in features:
         column = get_column_data(data, feature.name)
         if column is None:
-            raise ValueError(f"Feature column '{feature.name}' not found in data")
+            raise ValueError(f"[RecDataLoader Error] Feature column '{feature.name}' not found in data")
         if isinstance(feature, SequenceFeature):
             tensor = torch.from_numpy(_normalize_sequence_column(column, feature))
         elif isinstance(feature, DenseFeature):
@@ -301,11 +293,11 @@ def build_tensors_from_data(  # noqa: C901
             if column is None:
                 column = get_column_data(data, id_col)
             if column is None:
-                raise KeyError(f"ID column '{id_col}' not found in provided data.")
+                raise KeyError(f"[RecDataLoader Error] ID column '{id_col}' not found in provided data.")
             try:
                 id_arr = np.asarray(column, dtype=np.int64)
             except Exception as exc:
-                raise TypeError( f"ID column '{id_col}' must contain numeric values. Received dtype={np.asarray(column).dtype}, error: {exc}") from exc
+                raise TypeError( f"[RecDataLoader Error] ID column '{id_col}' must contain numeric values. Received dtype={np.asarray(column).dtype}, error: {exc}") from exc
             id_tensors[id_col] = torch.from_numpy(id_arr)
     if not feature_tensors:
         return None

nextrec/data/preprocessor.py CHANGED Viewed

@@ -12,6 +12,7 @@ import logging
 import numpy as np
 import pandas as pd
+import tqdm
 from pathlib import Path
 from typing import Dict, Union, Optional, Literal, Any
 from sklearn.preprocessing import (
@@ -665,7 +666,7 @@ class DataProcessor(FeatureSpecMixin):
         output_root = base_output_dir / "transformed_data"
         output_root.mkdir(parents=True, exist_ok=True)
         saved_paths = []
-        for file_path in file_paths:
+        for file_path in tqdm.tqdm(file_paths, desc="Transforming files", unit="file"):
             df = read_table(file_path, file_type)
             transformed_df = self._transform_in_memory(
                 df,

nextrec/loss/listwise.py CHANGED Viewed

@@ -1,8 +1,10 @@
 """
 Listwise loss functions for ranking and contrastive training.
-"""
-from typing import Optional
+Date: create on 27/10/2025
+Checkpoint: edit on 29/11/2025
+Author: Yang Zhou, zyaztec@gmail.com
+"""
 import torch
 import torch.nn as nn
@@ -112,7 +114,7 @@ class ApproxNDCGLoss(nn.Module):
         self.temperature = temperature
         self.reduction = reduction
-    def _ideal_dcg(self, labels: torch.Tensor, k: Optional[int]) -> torch.Tensor:
+    def _ideal_dcg(self, labels: torch.Tensor, k: int | None) -> torch.Tensor:
         # labels: [B, L]
         sorted_labels, _ = torch.sort(labels, dim=1, descending=True)
         if k is not None:
@@ -127,7 +129,7 @@ class ApproxNDCGLoss(nn.Module):
         return ideal_dcg
     def forward(
-        self, scores: torch.Tensor, labels: torch.Tensor, k: Optional[int] = None
+        self, scores: torch.Tensor, labels: torch.Tensor, k: int | None = None
     ) -> torch.Tensor:
         """
         scores: [B, L]

nextrec/loss/loss_utils.py CHANGED Viewed

@@ -1,5 +1,9 @@
 """
 Loss utilities for NextRec.
+Date: create on 27/10/2025
+Checkpoint: edit on 29/11/2025
+Author: Yang Zhou, zyaztec@gmail.com
 """
 from typing import Literal
@@ -39,7 +43,7 @@ def get_loss_fn(loss=None, **kw):
     if isinstance(loss, nn.Module):
         return loss
     if loss is None:
-        raise ValueError("loss must be provided explicitly")
+        raise ValueError("[Loss Error] loss must be provided explicitly")
     if loss in ["bce", "binary_crossentropy"]:
         return nn.BCELoss(**kw)
     if loss == "weighted_bce":
@@ -75,15 +79,15 @@ def get_loss_fn(loss=None, **kw):
     if loss == "approx_ndcg":
         return ApproxNDCGLoss(**kw)
-    raise ValueError(f"Unsupported loss: {loss}")
+    raise ValueError(f"[Loss Error] Unsupported loss: {loss}")
 def get_loss_kwargs(loss_params: dict | list[dict] | None, index: int = 0) -> dict:
     """
-    解析每个 head 对应的 loss_kwargs。
+    Parse loss_kwargs for each head.
-    - loss_params 为 None       -> {}
-    - loss_params 为 dict       -> 所有 head 共用
-    - loss_params 为 list[dict] -> 用 loss_params[index]（若存在且非 None），否则 {}
+    - loss_params is None       -> {}
+    - loss_params is dict       -> shared by all heads
+    - loss_params is list[dict] -> use loss_params[index] (if exists and not None), else {}
     """
     if loss_params is None:
         return {}

nextrec/loss/pairwise.py CHANGED Viewed

@@ -1,5 +1,9 @@
 """
 Pairwise loss functions for learning-to-rank and matching tasks.
+Date: create on 27/10/2025
+Checkpoint: edit on 29/11/2025
+Author: Yang Zhou, zyaztec@gmail.com
 """
 from typing import Literal
@@ -32,7 +36,6 @@ class BPRLoss(nn.Module):
             return loss.sum()
         return loss
 class HingeLoss(nn.Module):
     """
     Hinge loss for pairwise ranking.
@@ -56,7 +59,6 @@ class HingeLoss(nn.Module):
             return loss.sum()
         return loss
 class TripletLoss(nn.Module):
     """
     Triplet margin loss with cosine or euclidean distance.
@@ -95,7 +97,7 @@ class TripletLoss(nn.Module):
             if neg_dist.dim() == 2:
                 pos_dist = pos_dist.unsqueeze(1)
         else:
-            raise ValueError(f"Unsupported distance: {self.distance}")
+            raise ValueError(f"[Loss Error] Unsupported distance: {self.distance}")
         loss = torch.clamp(pos_dist - neg_dist + self.margin, min=0)
         if self.reduction == "mean":

nextrec/loss/pointwise.py CHANGED Viewed

@@ -1,5 +1,9 @@
 """
 Pointwise loss functions, including imbalance-aware variants.
+Date: create on 27/10/2025
+Checkpoint: edit on 29/11/2025
+Author: Yang Zhou, zyaztec@gmail.com
 """
 from typing import Optional, Sequence
@@ -55,10 +59,7 @@ class WeightedBCELoss(nn.Module):
         self.auto_balance = auto_balance
         if pos_weight is not None:
-            self.register_buffer(
-                "pos_weight",
-                torch.as_tensor(pos_weight, dtype=torch.float32),
-            )
+            self.register_buffer("pos_weight", torch.as_tensor(pos_weight, dtype=torch.float32),)
         else:
             self.pos_weight = None
@@ -128,9 +129,7 @@ class FocalLoss(nn.Module):
         else:
             targets = targets.float()
             if self.logits:
-                ce_loss = F.binary_cross_entropy_with_logits(
-                    inputs, targets, reduction="none"
-                )
+                ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
                 probs = torch.sigmoid(inputs)
             else:
                 ce_loss = F.binary_cross_entropy(inputs, targets, reduction="none")
@@ -140,7 +139,6 @@ class FocalLoss(nn.Module):
             alpha_factor = self._get_binary_alpha(targets, inputs.device)
             focal_weight = (1.0 - p_t) ** self.gamma
             loss = alpha_factor * focal_weight * ce_loss
         if self.reduction == "mean":
             return loss.mean()
         if self.reduction == "sum":
@@ -163,13 +161,11 @@ class FocalLoss(nn.Module):
         alpha_tensor = torch.tensor(self.alpha, device=device, dtype=targets.dtype)
         return torch.where(targets == 1, alpha_tensor, 1 - alpha_tensor)
 class ClassBalancedFocalLoss(nn.Module):
     """
     Focal loss weighted by effective number of samples per class.
     Reference: "Class-Balanced Loss Based on Effective Number of Samples"
     """
     def __init__(
         self,
         class_counts: Sequence[int] | torch.Tensor,
@@ -187,9 +183,7 @@ class ClassBalancedFocalLoss(nn.Module):
         self.register_buffer("class_weights", weights)
     def forward(self, inputs: torch.Tensor, targets: torch.Tensor) -> torch.Tensor:
-        focal = FocalLoss(
-            gamma=self.gamma, alpha=self.class_weights, reduction="none", logits=True
-        )
+        focal = FocalLoss(gamma=self.gamma, alpha=self.class_weights, reduction="none", logits=True)
         loss = focal(inputs, targets)
         if self.reduction == "mean":
             return loss.mean()

nextrec/models/match/mind.py CHANGED Viewed

@@ -13,7 +13,116 @@ from typing import Literal
 from nextrec.basic.model import BaseMatchModel
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
-from nextrec.basic.layers import MLP, EmbeddingLayer, CapsuleNetwork
+from nextrec.basic.layers import MLP, EmbeddingLayer
+class MultiInterestSA(nn.Module):
+    """Multi-interest self-attention extractor from MIND (Li et al., 2019)."""
+    def __init__(self, embedding_dim, interest_num, hidden_dim=None):
+        super(MultiInterestSA, self).__init__()
+        self.embedding_dim = embedding_dim
+        self.interest_num = interest_num
+        if hidden_dim == None:
+            self.hidden_dim = self.embedding_dim * 4
+        self.W1 = torch.nn.Parameter(torch.rand(self.embedding_dim, self.hidden_dim), requires_grad=True)
+        self.W2 = torch.nn.Parameter(torch.rand(self.hidden_dim, self.interest_num), requires_grad=True)
+        self.W3 = torch.nn.Parameter(torch.rand(self.embedding_dim, self.embedding_dim), requires_grad=True)
+    def forward(self, seq_emb, mask=None):
+        H = torch.einsum('bse, ed -> bsd', seq_emb, self.W1).tanh()
+        if mask != None:
+            A = torch.einsum('bsd, dk -> bsk', H, self.W2) + -1.e9 * (1 - mask.float())
+            A = F.softmax(A, dim=1)
+        else:
+            A = F.softmax(torch.einsum('bsd, dk -> bsk', H, self.W2), dim=1)
+        A = A.permute(0, 2, 1)
+        multi_interest_emb = torch.matmul(A, seq_emb)
+        return multi_interest_emb
+class CapsuleNetwork(nn.Module):
+    """Dynamic routing capsule network used in MIND (Li et al., 2019)."""
+    def __init__(self, embedding_dim, seq_len, bilinear_type=2, interest_num=4, routing_times=3, relu_layer=False):
+        super(CapsuleNetwork, self).__init__()
+        self.embedding_dim = embedding_dim  # h
+        self.seq_len = seq_len  # s
+        self.bilinear_type = bilinear_type
+        self.interest_num = interest_num
+        self.routing_times = routing_times
+        self.relu_layer = relu_layer
+        self.stop_grad = True
+        self.relu = nn.Sequential(nn.Linear(self.embedding_dim, self.embedding_dim, bias=False), nn.ReLU())
+        if self.bilinear_type == 0:  # MIND
+            self.linear = nn.Linear(self.embedding_dim, self.embedding_dim, bias=False)
+        elif self.bilinear_type == 1:
+            self.linear = nn.Linear(self.embedding_dim, self.embedding_dim * self.interest_num, bias=False)
+        else:
+            self.w = nn.Parameter(torch.Tensor(1, self.seq_len, self.interest_num * self.embedding_dim, self.embedding_dim))
+            nn.init.xavier_uniform_(self.w)
+    def forward(self, item_eb, mask):
+        if self.bilinear_type == 0:
+            item_eb_hat = self.linear(item_eb)
+            item_eb_hat = item_eb_hat.repeat(1, 1, self.interest_num)
+        elif self.bilinear_type == 1:
+            item_eb_hat = self.linear(item_eb)
+        else:
+            u = torch.unsqueeze(item_eb, dim=2)
+            item_eb_hat = torch.sum(self.w[:, :self.seq_len, :, :] * u, dim=3)
+        item_eb_hat = torch.reshape(item_eb_hat, (-1, self.seq_len, self.interest_num, self.embedding_dim))
+        item_eb_hat = torch.transpose(item_eb_hat, 1, 2).contiguous()
+        item_eb_hat = torch.reshape(item_eb_hat, (-1, self.interest_num, self.seq_len, self.embedding_dim))
+        if self.stop_grad:
+            item_eb_hat_iter = item_eb_hat.detach()
+        else:
+            item_eb_hat_iter = item_eb_hat
+        if self.bilinear_type > 0:
+            capsule_weight = torch.zeros(item_eb_hat.shape[0],
+                                         self.interest_num,
+                                         self.seq_len,
+                                         device=item_eb.device,
+                                         requires_grad=False)
+        else:
+            capsule_weight = torch.randn(item_eb_hat.shape[0],
+                                         self.interest_num,
+                                         self.seq_len,
+                                         device=item_eb.device,
+                                         requires_grad=False)
+        for i in range(self.routing_times):  # 动态路由传播3次
+            atten_mask = torch.unsqueeze(mask, 1).repeat(1, self.interest_num, 1)
+            paddings = torch.zeros_like(atten_mask, dtype=torch.float)
+            capsule_softmax_weight = F.softmax(capsule_weight, dim=-1)
+            capsule_softmax_weight = torch.where(torch.eq(atten_mask, 0), paddings, capsule_softmax_weight)
+            capsule_softmax_weight = torch.unsqueeze(capsule_softmax_weight, 2)
+            if i < 2:
+                interest_capsule = torch.matmul(capsule_softmax_weight, item_eb_hat_iter)
+                cap_norm = torch.sum(torch.square(interest_capsule), -1, True)
+                scalar_factor = cap_norm / (1 + cap_norm) / torch.sqrt(cap_norm + 1e-9)
+                interest_capsule = scalar_factor * interest_capsule
+                delta_weight = torch.matmul(item_eb_hat_iter, torch.transpose(interest_capsule, 2, 3).contiguous())
+                delta_weight = torch.reshape(delta_weight, (-1, self.interest_num, self.seq_len))
+                capsule_weight = capsule_weight + delta_weight
+            else:
+                interest_capsule = torch.matmul(capsule_softmax_weight, item_eb_hat)
+                cap_norm = torch.sum(torch.square(interest_capsule), -1, True)
+                scalar_factor = cap_norm / (1 + cap_norm) / torch.sqrt(cap_norm + 1e-9)
+                interest_capsule = scalar_factor * interest_capsule
+        interest_capsule = torch.reshape(interest_capsule, (-1, self.interest_num, self.embedding_dim))
+        if self.relu_layer:
+            interest_capsule = self.relu(interest_capsule)
+        return interest_capsule
 class MIND(BaseMatchModel):

nextrec 0.2.6__py3-none-any.whl → 0.3.1__py3-none-any.whl

nextrec 0.2.6py3-none-any.whl → 0.3.1py3-none-any.whl