PyPI - nextrec - Versions diffs - 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl - Mend

nextrec 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

nextrec/__version__.py +1 -1
nextrec/basic/features.py +5 -1
nextrec/basic/layers.py +3 -7
nextrec/basic/model.py +495 -664
nextrec/data/data_utils.py +44 -12
nextrec/data/dataloader.py +84 -285
nextrec/data/preprocessor.py +91 -213
nextrec/loss/__init__.py +0 -1
nextrec/loss/loss_utils.py +51 -120
nextrec/models/multi_task/esmm.py +1 -1
nextrec/models/ranking/autoint.py +51 -7
nextrec/models/ranking/masknet.py +268 -76
nextrec/utils/__init__.py +4 -1
nextrec/utils/common.py +16 -0
{nextrec-0.2.3.dist-info → nextrec-0.2.5.dist-info}/METADATA +2 -2
{nextrec-0.2.3.dist-info → nextrec-0.2.5.dist-info}/RECORD +18 -17
{nextrec-0.2.3.dist-info → nextrec-0.2.5.dist-info}/WHEEL +0 -0
{nextrec-0.2.3.dist-info → nextrec-0.2.5.dist-info}/licenses/LICENSE +0 -0

nextrec/loss/__init__.py CHANGED Viewed

@@ -37,6 +37,5 @@ __all__ = [
     # Utilities
     "get_loss_fn",
     "get_loss_kwargs",
-    "validate_training_mode",
     "VALID_TASK_TYPES",
 ]

nextrec/loss/loss_utils.py CHANGED Viewed

@@ -21,138 +21,69 @@ from nextrec.loss.pointwise import (
     WeightedBCELoss,
 )
-# Valid task types for validation
 VALID_TASK_TYPES = [
-    "binary",
-    "multiclass",
-    "regression",
-    "multivariate_regression",
-    "match",
-    "ranking",
-    "multitask",
-    "multilabel",
+    "binary",
+    "multiclass",
+    "multilabel",
+    "regression",
 ]
+def _build_cb_focal(kw):
+    if "class_counts" not in kw:
+        raise ValueError("class_balanced_focal requires class_counts")
+    return ClassBalancedFocalLoss(**kw)
-def get_loss_fn(
-    task_type: str = "binary",
-    training_mode: str | None = None,
-    loss: str | nn.Module | None = None,
-    **loss_kwargs,
-) -> nn.Module:
-    """
-    Get loss function based on task type and training mode.
-    """
+def get_loss_fn(loss=None, **kw):
     if isinstance(loss, nn.Module):
         return loss
-    # Common mappings
-    if task_type == "match":
-        return _get_match_loss(training_mode, loss, **loss_kwargs)
-    if task_type in ["ranking", "multitask", "binary", "multilabel"]:
-        return _get_classification_loss(loss, **loss_kwargs)
-    if task_type == "multiclass":
-        return _get_multiclass_loss(loss, **loss_kwargs)
-    if task_type == "regression":
-        if loss is None or loss == "mse":
-            return nn.MSELoss(**loss_kwargs)
-        if loss == "mae":
-            return nn.L1Loss(**loss_kwargs)
-        if isinstance(loss, str):
-            raise ValueError(f"Unsupported regression loss: {loss}")
-    raise ValueError(f"Unsupported task_type: {task_type}")
-def _get_match_loss(training_mode: str | None, loss: str | None, **loss_kwargs) -> nn.Module:
-    if training_mode == "pointwise":
-        if loss is None or loss in {"bce", "binary_crossentropy"}:
-            return nn.BCELoss(**loss_kwargs)
-        if loss == "weighted_bce":
-            return WeightedBCELoss(**loss_kwargs)
-        if loss == "focal":
-            return FocalLoss(**loss_kwargs)
-        if loss == "class_balanced_focal":
-            return _build_cb_focal(loss_kwargs)
-        if loss == "cosine_contrastive":
-            return CosineContrastiveLoss(**loss_kwargs)
-        if isinstance(loss, str):
-            raise ValueError(f"Unsupported pointwise loss: {loss}")
-    if training_mode == "pairwise":
-        if loss is None or loss == "bpr":
-            return BPRLoss(**loss_kwargs)
-        if loss == "hinge":
-            return HingeLoss(**loss_kwargs)
-        if loss == "triplet":
-            return TripletLoss(**loss_kwargs)
-        if isinstance(loss, str):
-            raise ValueError(f"Unsupported pairwise loss: {loss}")
-    if training_mode == "listwise":
-        if loss is None or loss in {"sampled_softmax", "softmax"}:
-            return SampledSoftmaxLoss(**loss_kwargs)
-        if loss == "infonce":
-            return InfoNCELoss(**loss_kwargs)
-        if loss == "listnet":
-            return ListNetLoss(**loss_kwargs)
-        if loss == "listmle":
-            return ListMLELoss(**loss_kwargs)
-        if loss == "approx_ndcg":
-            return ApproxNDCGLoss(**loss_kwargs)
-        if loss in {"crossentropy", "ce"}:
-            return nn.CrossEntropyLoss(**loss_kwargs)
-        if isinstance(loss, str):
-            raise ValueError(f"Unsupported listwise loss: {loss}")
-    raise ValueError(f"Unknown training_mode: {training_mode}")
-def _get_classification_loss(loss: str | None, **loss_kwargs) -> nn.Module:
-    if loss is None or loss in {"bce", "binary_crossentropy"}:
-        return nn.BCELoss(**loss_kwargs)
+    if loss is None:
+        raise ValueError("loss must be provided explicitly")
+    if loss in ["bce", "binary_crossentropy"]:
+        return nn.BCELoss(**kw)
     if loss == "weighted_bce":
-        return WeightedBCELoss(**loss_kwargs)
-    if loss == "focal":
-        return FocalLoss(**loss_kwargs)
-    if loss == "class_balanced_focal":
-        return _build_cb_focal(loss_kwargs)
+        return WeightedBCELoss(**kw)
+    if loss in ["focal", "focal_loss"]:
+        return FocalLoss(**kw)
+    if loss in ["cb_focal", "class_balanced_focal"]:
+        return _build_cb_focal(kw)
+    if loss in ["crossentropy", "ce"]:
+        return nn.CrossEntropyLoss(**kw)
     if loss == "mse":
-        return nn.MSELoss(**loss_kwargs)
+        return nn.MSELoss(**kw)
     if loss == "mae":
-        return nn.L1Loss(**loss_kwargs)
-    if loss in {"crossentropy", "ce"}:
-        return nn.CrossEntropyLoss(**loss_kwargs)
-    if isinstance(loss, str):
-        raise ValueError(f"Unsupported loss function: {loss}")
-    raise ValueError("Loss must be specified for classification task.")
-def _get_multiclass_loss(loss: str | None, **loss_kwargs) -> nn.Module:
-    if loss is None or loss in {"crossentropy", "ce"}:
-        return nn.CrossEntropyLoss(**loss_kwargs)
-    if loss == "focal":
-        return FocalLoss(**loss_kwargs)
-    if loss == "class_balanced_focal":
-        return _build_cb_focal(loss_kwargs)
-    if isinstance(loss, str):
-        raise ValueError(f"Unsupported multiclass loss: {loss}")
-    raise ValueError("Loss must be specified for multiclass task.")
-def _build_cb_focal(loss_kwargs: dict) -> ClassBalancedFocalLoss:
-    if "class_counts" not in loss_kwargs:
-        raise ValueError("class_balanced_focal requires `class_counts` argument.")
-    return ClassBalancedFocalLoss(**loss_kwargs)
+        return nn.L1Loss(**kw)
+    # Pairwise ranking Loss
+    if loss == "bpr":
+        return BPRLoss(**kw)
+    if loss == "hinge":
+        return HingeLoss(**kw)
+    if loss == "triplet":
+        return TripletLoss(**kw)
+    # Listwise ranking Loss
+    if loss in ["sampled_softmax", "softmax"]:
+        return SampledSoftmaxLoss(**kw)
+    if loss == "infonce":
+        return InfoNCELoss(**kw)
+    if loss == "listnet":
+        return ListNetLoss(**kw)
+    if loss == "listmle":
+        return ListMLELoss(**kw)
+    if loss == "approx_ndcg":
+        return ApproxNDCGLoss(**kw)
+    raise ValueError(f"Unsupported loss: {loss}")
 def get_loss_kwargs(loss_params: dict | list[dict] | None, index: int = 0) -> dict:
     """
-    Resolve per-task loss kwargs from a dict or list of dicts.
+    解析每个 head 对应的 loss_kwargs。
+    - loss_params 为 None       -> {}
+    - loss_params 为 dict       -> 所有 head 共用
+    - loss_params 为 list[dict] -> 用 loss_params[index]（若存在且非 None），否则 {}
     """
     if loss_params is None:
         return {}
@@ -160,4 +91,4 @@ def get_loss_kwargs(loss_params: dict | list[dict] | None, index: int = 0) -> di
         if index < len(loss_params) and loss_params[index] is not None:
             return loss_params[index]
         return {}
-    return loss_params
+    return loss_params

nextrec/models/multi_task/esmm.py CHANGED Viewed

@@ -40,7 +40,7 @@ class ESMM(BaseModel):
                  ctr_params: dict,
                  cvr_params: dict,
                  target: list[str] = ['ctr', 'ctcvr'],  # Note: ctcvr = ctr * cvr
-                 task: str | list[str] = 'binary',
+                 task: list[str] = ['binary', 'binary'],
                  optimizer: str = "adam",
                  optimizer_params: dict = {},
                  loss: str | nn.Module | list[str | nn.Module] | None = "bce",

nextrec/models/ranking/autoint.py CHANGED Viewed

@@ -1,12 +1,57 @@
 """
 Date: create on 09/11/2025
-Author:
-    Yang Zhou,zyaztec@gmail.com
+Checkpoint: edit on 24/11/2025
+Author: Yang Zhou,zyaztec@gmail.com
 Reference:
-    [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
-        self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
-        on information and knowledge management. 2019: 1161-1170.
-        (https://arxiv.org/abs/1810.11921)
+[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
+self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
+on information and knowledge management. 2019: 1161-1170.
+(https://arxiv.org/abs/1810.11921)
+AutoInt is a CTR prediction model that leverages multi-head self-attention
+to automatically learn high-order feature interactions in an explicit and
+interpretable way. Instead of relying on manual feature engineering or
+implicit MLP-based transformations, AutoInt models feature dependencies
+by attending over all embedded fields and capturing their contextual
+relationships.
+In each Interacting Layer:
+  (1) Each field embedding is projected into multiple attention heads
+  (2) Scaled dot-product attention computes feature-to-feature interactions
+  (3) Outputs are aggregated and passed through residual connections
+  (4) Layer Normalization ensures stable optimization
+By stacking multiple Interacting Layers, AutoInt progressively discovers
+higher-order feature interactions, while maintaining transparency since
+attention weights explicitly show which features interact.
+Key Advantages:
+- Explicit modeling of high-order feature interactions
+- Multi-head attention enhances representation diversity
+- Residual structure facilitates deep interaction learning
+- Attention weights provide interpretability of feature relations
+- Eliminates heavy manual feature engineering
+AutoInt 是一个 CTR 预估模型，通过多头自注意力机制显式学习高阶特征交互，
+并具有良好的可解释性。不同于依赖人工特征工程或 MLP 隐式建模的方法，
+AutoInt 通过对所有特征 embedding 进行注意力计算，捕捉特征之间的上下文依赖关系。
+在每个 Interacting Layer（交互层）中：
+  (1) 每个特征 embedding 通过投影分成多个注意力头
+  (2) 使用缩放点积注意力计算特征间交互权重
+  (3) 将多头输出进行聚合，并使用残差连接
+  (4) Layer Normalization 确保训练稳定性
+通过堆叠多个交互层，AutoInt 能逐步学习更高阶的特征交互；
+同时由于注意力权重可视化，模型具有明确的可解释能力，
+能展示哪些特征之间的关系最重要。
+主要优点：
+- 显式建模高阶特征交互
+- 多头机制增强表示能力
+- 残差结构支持深层交互学习
+- 注意力权重天然具备可解释性
+- 减少繁重的人工特征工程工作
 """
 import torch
@@ -80,7 +125,6 @@ class AutoInt(BaseModel):
         # Project embeddings to attention embedding dimension
         num_fields = len(self.interaction_features)
-        total_embedding_dim = sum([f.embedding_dim for f in self.interaction_features])
         # If embeddings have different dimensions, project them to att_embedding_dim
         self.need_projection = not all(f.embedding_dim == att_embedding_dim for f in self.interaction_features)

nextrec 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

nextrec 0.2.3py3-none-any.whl → 0.2.5py3-none-any.whl