PyPI - nextrec - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl - Mend

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

nextrec/__init__.py +1 -1
nextrec/__version__.py +1 -1
nextrec/basic/activation.py +10 -5
nextrec/basic/callback.py +1 -0
nextrec/basic/features.py +30 -22
nextrec/basic/layers.py +220 -106
nextrec/basic/loggers.py +62 -43
nextrec/basic/metrics.py +268 -119
nextrec/basic/model.py +1082 -400
nextrec/basic/session.py +10 -3
nextrec/cli.py +498 -0
nextrec/data/__init__.py +19 -25
nextrec/data/batch_utils.py +11 -3
nextrec/data/data_processing.py +51 -45
nextrec/data/data_utils.py +26 -15
nextrec/data/dataloader.py +272 -95
nextrec/data/preprocessor.py +320 -199
nextrec/loss/listwise.py +17 -9
nextrec/loss/loss_utils.py +7 -8
nextrec/loss/pairwise.py +2 -0
nextrec/loss/pointwise.py +30 -12
nextrec/models/generative/hstu.py +103 -38
nextrec/models/match/dssm.py +82 -68
nextrec/models/match/dssm_v2.py +72 -57
nextrec/models/match/mind.py +175 -107
nextrec/models/match/sdm.py +104 -87
nextrec/models/match/youtube_dnn.py +73 -59
nextrec/models/multi_task/esmm.py +53 -37
nextrec/models/multi_task/mmoe.py +64 -45
nextrec/models/multi_task/ple.py +101 -48
nextrec/models/multi_task/poso.py +113 -36
nextrec/models/multi_task/share_bottom.py +48 -35
nextrec/models/ranking/afm.py +72 -37
nextrec/models/ranking/autoint.py +72 -55
nextrec/models/ranking/dcn.py +55 -35
nextrec/models/ranking/dcn_v2.py +64 -23
nextrec/models/ranking/deepfm.py +32 -22
nextrec/models/ranking/dien.py +155 -99
nextrec/models/ranking/din.py +85 -57
nextrec/models/ranking/fibinet.py +52 -32
nextrec/models/ranking/fm.py +29 -23
nextrec/models/ranking/masknet.py +91 -29
nextrec/models/ranking/pnn.py +31 -28
nextrec/models/ranking/widedeep.py +34 -26
nextrec/models/ranking/xdeepfm.py +60 -38
nextrec/utils/__init__.py +59 -34
nextrec/utils/config.py +490 -0
nextrec/utils/device.py +30 -20
nextrec/utils/distributed.py +36 -9
nextrec/utils/embedding.py +1 -0
nextrec/utils/feature.py +1 -0
nextrec/utils/file.py +32 -11
nextrec/utils/initializer.py +61 -16
nextrec/utils/optimizer.py +25 -9
nextrec/utils/synthetic_data.py +283 -165
nextrec/utils/tensor.py +24 -13
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/METADATA +4 -4
nextrec-0.4.2.dist-info/RECORD +69 -0
nextrec-0.4.2.dist-info/entry_points.txt +2 -0
nextrec-0.4.1.dist-info/RECORD +0 -66
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
{nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0

nextrec/models/ranking/pnn.py CHANGED Viewed

@@ -22,27 +22,29 @@ class PNN(BaseModel):
     @property
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature] | list = [],
-                 sparse_features: list[SparseFeature] | list = [],
-                 sequence_features: list[SequenceFeature] | list = [],
-                 mlp_params: dict = {},
-                 product_type: str = "inner",
-                 outer_product_dim: int | None = None,
-                 target: list[str] | list = [],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature] | list = [],
+        sparse_features: list[SparseFeature] | list = [],
+        sequence_features: list[SequenceFeature] | list = [],
+        mlp_params: dict = {},
+        product_type: str = "inner",
+        outer_product_dim: int | None = None,
+        target: list[str] | list = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(PNN, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -54,13 +56,13 @@ class PNN(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         self.field_features = sparse_features + sequence_features
         if len(self.field_features) < 2:
             raise ValueError("PNN requires at least two sparse/sequence features.")
@@ -69,7 +71,9 @@ class PNN(BaseModel):
         self.num_fields = len(self.field_features)
         self.embedding_dim = self.field_features[0].embedding_dim
         if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
-            raise ValueError("All field features must share the same embedding_dim for PNN.")
+            raise ValueError(
+                "All field features must share the same embedding_dim for PNN."
+            )
         self.product_type = product_type.lower()
         if self.product_type not in {"inner", "outer"}:
@@ -88,12 +92,11 @@ class PNN(BaseModel):
         self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
         self.prediction_layer = PredictionLayer(task_type=self.task)
-        modules = ['mlp']
+        modules = ["mlp"]
         if self.product_type == "outer":
-            modules.append('kernel')
+            modules.append("kernel")
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=modules
+            embedding_attr="embedding", include_modules=modules
         )
         self.compile(

nextrec/models/ranking/widedeep.py CHANGED Viewed

@@ -39,7 +39,6 @@ Wide & Deep 同时使用宽线性部分（记忆共现/手工交叉）与深网
 - 共享特征空间，减少工程开销
 """
-import torch
 import torch.nn as nn
 from nextrec.basic.model import BaseModel
@@ -55,25 +54,27 @@ class WideDeep(BaseModel):
     @property
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 mlp_params: dict,
-                 target: list[str] = [],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        mlp_params: dict,
+        target: list[str] = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(WideDeep, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -85,13 +86,13 @@ class WideDeep(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # Wide part: use all features for linear model
         self.wide_features = sparse_features + sequence_features
         # Deep part: use all features
@@ -103,7 +104,7 @@ class WideDeep(BaseModel):
         # Wide part: Linear layer
         wide_dim = sum([f.embedding_dim for f in self.wide_features])
         self.linear = LR(wide_dim)
         # Deep part: MLP
         input_dim = self.embedding.input_dim
         # deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
@@ -111,14 +112,21 @@ class WideDeep(BaseModel):
         self.mlp = MLP(input_dim=input_dim, **mlp_params)
         self.prediction_layer = PredictionLayer(task_type=self.task)
         # Register regularization weights
-        self.register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
-        self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
+        self.register_regularization_weights(
+            embedding_attr="embedding", include_modules=["linear", "mlp"]
+        )
+        self.compile(
+            optimizer=optimizer,
+            optimizer_params=optimizer_params,
+            loss=loss,
+            loss_params=loss_params,
+        )
     def forward(self, x):
         # Deep part
         input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
         y_deep = self.mlp(input_deep)  # [B, 1]
         # Wide part
         input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
         y_wide = self.linear(input_wide)

nextrec/models/ranking/xdeepfm.py CHANGED Viewed

@@ -3,8 +3,8 @@ Date: create on 09/11/2025
 Author:
     Yang Zhou,zyaztec@gmail.com
 Reference:
-    [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
-        for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
+    [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
+        for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
         knowledge discovery & data mining. 2018: 1754-1763.
         (https://arxiv.org/abs/1803.05170)
 """
@@ -17,6 +17,7 @@ from nextrec.basic.model import BaseModel
 from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
 from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
 class CIN(nn.Module):
     """Compressed Interaction Network from xDeepFM (Lian et al., 2018)."""
@@ -28,7 +29,16 @@ class CIN(nn.Module):
         prev_dim, fc_input_dim = input_dim, 0
         for i in range(self.num_layers):
             cross_layer_size = cin_size[i]
-            self.conv_layers.append(torch.nn.Conv1d(input_dim * prev_dim, cross_layer_size, 1, stride=1, dilation=1, bias=True))
+            self.conv_layers.append(
+                torch.nn.Conv1d(
+                    input_dim * prev_dim,
+                    cross_layer_size,
+                    1,
+                    stride=1,
+                    dilation=1,
+                    bias=True,
+                )
+            )
             if self.split_half and i != self.num_layers - 1:
                 cross_layer_size //= 2
             prev_dim = cross_layer_size
@@ -49,7 +59,8 @@ class CIN(nn.Module):
                 h = x
             xs.append(x)
         return self.fc(torch.sum(torch.cat(xs, dim=1), 2))
 class xDeepFM(BaseModel):
     @property
     def model_name(self):
@@ -58,27 +69,29 @@ class xDeepFM(BaseModel):
     @property
     def default_task(self):
         return "binary"
-    def __init__(self,
-                 dense_features: list[DenseFeature],
-                 sparse_features: list[SparseFeature],
-                 sequence_features: list[SequenceFeature],
-                 mlp_params: dict,
-                 cin_size: list[int] = [128, 128],
-                 split_half: bool = True,
-                 target: list[str] = [],
-                 task: str | list[str] | None = None,
-                 optimizer: str = "adam",
-                 optimizer_params: dict = {},
-                 loss: str | nn.Module | None = "bce",
-                 loss_params: dict | list[dict] | None = None,
-                 device: str = 'cpu',
-                 embedding_l1_reg=1e-6,
-                 dense_l1_reg=1e-5,
-                 embedding_l2_reg=1e-5,
-                 dense_l2_reg=1e-4,
-                 **kwargs):
+    def __init__(
+        self,
+        dense_features: list[DenseFeature],
+        sparse_features: list[SparseFeature],
+        sequence_features: list[SequenceFeature],
+        mlp_params: dict,
+        cin_size: list[int] = [128, 128],
+        split_half: bool = True,
+        target: list[str] = [],
+        task: str | list[str] | None = None,
+        optimizer: str = "adam",
+        optimizer_params: dict = {},
+        loss: str | nn.Module | None = "bce",
+        loss_params: dict | list[dict] | None = None,
+        device: str = "cpu",
+        embedding_l1_reg=1e-6,
+        dense_l1_reg=1e-5,
+        embedding_l2_reg=1e-5,
+        dense_l2_reg=1e-4,
+        **kwargs,
+    ):
         super(xDeepFM, self).__init__(
             dense_features=dense_features,
             sparse_features=sparse_features,
@@ -90,16 +103,16 @@ class xDeepFM(BaseModel):
             dense_l1_reg=dense_l1_reg,
             embedding_l2_reg=embedding_l2_reg,
             dense_l2_reg=dense_l2_reg,
-            **kwargs
+            **kwargs,
         )
         self.loss = loss
         if self.loss is None:
             self.loss = "bce"
         # Linear part and CIN part: use sparse and sequence features
         self.linear_features = sparse_features + sequence_features
         # Deep part: use all features
         self.deep_features = dense_features + sparse_features + sequence_features
@@ -109,21 +122,28 @@ class xDeepFM(BaseModel):
         # Linear part
         linear_dim = sum([f.embedding_dim for f in self.linear_features])
         self.linear = LR(linear_dim)
         # CIN part: Compressed Interaction Network
         num_fields = len(self.linear_features)
         self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
         # Deep part: DNN
-        deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
-        dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
+        deep_emb_dim_total = sum(
+            [
+                f.embedding_dim
+                for f in self.deep_features
+                if not isinstance(f, DenseFeature)
+            ]
+        )
+        dense_input_dim = sum(
+            [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
+        )
         self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
         self.prediction_layer = PredictionLayer(task_type=self.task)
         # Register regularization weights
         self.register_regularization_weights(
-            embedding_attr='embedding',
-            include_modules=['linear', 'cin', 'mlp']
+            embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
         )
         self.compile(
@@ -135,14 +155,16 @@ class xDeepFM(BaseModel):
     def forward(self, x):
         # Get embeddings for linear and CIN (sparse features only)
-        input_linear = self.embedding(x=x, features=self.linear_features, squeeze_dim=False)
+        input_linear = self.embedding(
+            x=x, features=self.linear_features, squeeze_dim=False
+        )
         # Linear part
         y_linear = self.linear(input_linear.flatten(start_dim=1))
         # CIN part
         y_cin = self.cin(input_linear)  # [B, 1]
         # Deep part
         input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
         y_deep = self.mlp(input_deep)  # [B, 1]

nextrec/utils/__init__.py CHANGED Viewed

@@ -10,63 +10,88 @@ This package provides various utility functions organized by category:
 - file_utils: File I/O operations
 - model_utils: Model-related utilities
 - feature_utils: Feature processing utilities
+- config_utils: Configuration loading and processing utilities
 Date: create on 13/11/2025
 Last update: 06/12/2025
 Author: Yang Zhou, zyaztec@gmail.com
 """
 from . import optimizer, initializer, embedding
 from .optimizer import get_optimizer, get_scheduler
 from .initializer import get_initializer
 from .embedding import get_auto_embedding_dim
 from .device import resolve_device, get_device_info
 from .tensor import to_tensor, stack_tensors, concat_tensors, pad_sequence_tensors
-from .file import resolve_file_paths, read_table, load_dataframes, iter_file_chunks, default_output_dir
+from .file import (
+    resolve_file_paths,
+    read_table,
+    load_dataframes,
+    iter_file_chunks,
+    default_output_dir,
+    read_yaml,
+)
 from .model import merge_features, get_mlp_output_dim
 from .feature import normalize_to_list
-from .synthetic_data import generate_ranking_data, generate_distributed_ranking_data, generate_match_data, generate_multitask_data
+from .synthetic_data import (
+    generate_match_data,
+    generate_ranking_data,
+    generate_multitask_data,
+    generate_distributed_ranking_data,
+)
+from .config import (
+    resolve_path,
+    select_features,
+    register_processor_features,
+    build_feature_objects,
+    extract_feature_groups,
+    load_model_class,
+    build_model_instance,
+)
 __all__ = [
     # Optimizer & Scheduler
-    'get_optimizer',
-    'get_scheduler',
+    "get_optimizer",
+    "get_scheduler",
     # Initializer
-    'get_initializer',
+    "get_initializer",
     # Embedding
-    'get_auto_embedding_dim',
+    "get_auto_embedding_dim",
     # Device utilities
-    'resolve_device',
-    'get_device_info',
+    "resolve_device",
+    "get_device_info",
     # Tensor utilities
-    'to_tensor',
-    'stack_tensors',
-    'concat_tensors',
-    'pad_sequence_tensors',
+    "to_tensor",
+    "stack_tensors",
+    "concat_tensors",
+    "pad_sequence_tensors",
     # File utilities
-    'resolve_file_paths',
-    'read_table',
-    'load_dataframes',
-    'iter_file_chunks',
-    'default_output_dir',
+    "resolve_file_paths",
+    "read_table",
+    "read_yaml",
+    "load_dataframes",
+    "iter_file_chunks",
+    "default_output_dir",
     # Model utilities
-    'merge_features',
-    'get_mlp_output_dim',
+    "merge_features",
+    "get_mlp_output_dim",
     # Feature utilities
-    'normalize_to_list',
+    "normalize_to_list",
+    # Config utilities
+    "resolve_path",
+    "select_features",
+    "register_processor_features",
+    "build_feature_objects",
+    "extract_feature_groups",
+    "load_model_class",
+    "build_model_instance",
     # Synthetic data utilities
-    'generate_ranking_data',
-    'generate_distributed_ranking_data',
+    "generate_ranking_data",
+    "generate_match_data",
+    "generate_multitask_data",
+    "generate_distributed_ranking_data",
     # Module exports
-    'optimizer',
-    'initializer',
-    'embedding',
+    "optimizer",
+    "initializer",
+    "embedding",
 ]

nextrec 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

nextrec 0.4.1py3-none-any.whl → 0.4.2py3-none-any.whl