nextrec 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,35 @@ Date: create on 09/11/2025
3
3
  Author:
4
4
  Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//ICDM. 2016: 1149-1154.
6
+ [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response
7
+ prediction[C]//ICDM. 2016: 1149-1154. (https://arxiv.org/abs/1611.00144)
8
+
9
+ Product-based Neural Networks (PNN) are CTR prediction models that explicitly
10
+ encode feature interactions by combining:
11
+ (1) A linear signal from concatenated field embeddings
12
+ (2) A product signal capturing pairwise feature interactions (inner or outer)
13
+ The product layer augments the linear input to an MLP, enabling the network to
14
+ model both first-order and high-order feature interactions in a structured way.
15
+
16
+ Computation workflow:
17
+ - Embed each categorical/sequence field with a shared embedding dimension
18
+ - Linear signal: flatten and concatenate all field embeddings
19
+ - Product signal:
20
+ * Inner product: dot products over all field pairs
21
+ * Outer product: project embeddings then compute element-wise products
22
+ - Concatenate linear and product signals; feed into MLP for prediction
23
+
24
+ Key Advantages:
25
+ - Explicit pairwise interaction modeling without heavy feature engineering
26
+ - Flexible choice between inner/outer products to trade off capacity vs. cost
27
+ - Combines linear context with interaction signal for stronger expressiveness
28
+ - Simple architecture that integrates cleanly with standard MLP pipelines
29
+
30
+ PNN 是一种 CTR 预估模型,通过将线性信号与乘积信号结合,显式建模特征交互:
31
+ - 线性信号:将各字段的 embedding 拼接,用于保留一阶信息
32
+ - 乘积信号:对所有字段对做内积或外积,捕捉二阶及更高阶交互
33
+ 随后将两类信号拼接送入 MLP,实现对用户响应的预测。内积版本计算量更低,
34
+ 外积版本表达力更强,可根据场景取舍。
7
35
  """
8
36
 
9
37
  import torch
@@ -15,6 +43,7 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
15
43
 
16
44
 
17
45
  class PNN(BaseModel):
46
+
18
47
  @property
19
48
  def model_name(self):
20
49
  return "PNN"
@@ -25,16 +54,16 @@ class PNN(BaseModel):
25
54
 
26
55
  def __init__(
27
56
  self,
28
- dense_features: list[DenseFeature] | list = [],
29
- sparse_features: list[SparseFeature] | list = [],
30
- sequence_features: list[SequenceFeature] | list = [],
31
- mlp_params: dict = {},
32
- product_type: str = "inner",
57
+ dense_features: list[DenseFeature] | None = None,
58
+ sparse_features: list[SparseFeature] | None = None,
59
+ sequence_features: list[SequenceFeature] | None = None,
60
+ mlp_params: dict | None = None,
61
+ product_type: str = "inner", # "inner" (IPNN), "outer" (OPNN), "both" (PNN*)
33
62
  outer_product_dim: int | None = None,
34
- target: list[str] | list = [],
63
+ target: list[str] | str | None = None,
35
64
  task: str | list[str] | None = None,
36
65
  optimizer: str = "adam",
37
- optimizer_params: dict = {},
66
+ optimizer_params: dict | None = None,
38
67
  loss: str | nn.Module | None = "bce",
39
68
  loss_params: dict | list[dict] | None = None,
40
69
  device: str = "cpu",
@@ -45,6 +74,16 @@ class PNN(BaseModel):
45
74
  **kwargs,
46
75
  ):
47
76
 
77
+ dense_features = dense_features or []
78
+ sparse_features = sparse_features or []
79
+ sequence_features = sequence_features or []
80
+ mlp_params = mlp_params or {}
81
+ if outer_product_dim is not None and outer_product_dim <= 0:
82
+ raise ValueError("outer_product_dim must be a positive integer.")
83
+ optimizer_params = optimizer_params or {}
84
+ if loss is None:
85
+ loss = "bce"
86
+
48
87
  super(PNN, self).__init__(
49
88
  dense_features=dense_features,
50
89
  sparse_features=sparse_features,
@@ -59,16 +98,13 @@ class PNN(BaseModel):
59
98
  **kwargs,
60
99
  )
61
100
 
62
- self.loss = loss
63
- if self.loss is None:
64
- self.loss = "bce"
65
-
66
- self.field_features = sparse_features + sequence_features
101
+ self.field_features = dense_features + sparse_features + sequence_features
67
102
  if len(self.field_features) < 2:
68
103
  raise ValueError("PNN requires at least two sparse/sequence features.")
69
104
 
70
105
  self.embedding = EmbeddingLayer(features=self.field_features)
71
106
  self.num_fields = len(self.field_features)
107
+
72
108
  self.embedding_dim = self.field_features[0].embedding_dim
73
109
  if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
74
110
  raise ValueError(
@@ -76,24 +112,34 @@ class PNN(BaseModel):
76
112
  )
77
113
 
78
114
  self.product_type = product_type.lower()
79
- if self.product_type not in {"inner", "outer"}:
80
- raise ValueError("product_type must be 'inner' or 'outer'.")
115
+ if self.product_type not in {"inner", "outer", "both"}:
116
+ raise ValueError("product_type must be 'inner', 'outer', or 'both'.")
81
117
 
82
118
  self.num_pairs = self.num_fields * (self.num_fields - 1) // 2
83
- if self.product_type == "outer":
84
- self.outer_dim = outer_product_dim or self.embedding_dim
85
- self.kernel = nn.Linear(self.embedding_dim, self.outer_dim, bias=False)
86
- product_dim = self.num_pairs * self.outer_dim
119
+ self.outer_product_dim = outer_product_dim or self.embedding_dim
120
+
121
+ if self.product_type in {"outer", "both"}:
122
+ self.kernel = nn.Parameter(
123
+ torch.randn(self.embedding_dim, self.outer_product_dim)
124
+ )
125
+ nn.init.xavier_uniform_(self.kernel)
87
126
  else:
88
- self.outer_dim = None
89
- product_dim = self.num_pairs
127
+ self.kernel = None
90
128
 
91
129
  linear_dim = self.num_fields * self.embedding_dim
130
+
131
+ if self.product_type == "inner":
132
+ product_dim = self.num_pairs
133
+ elif self.product_type == "outer":
134
+ product_dim = self.num_pairs
135
+ else:
136
+ product_dim = 2 * self.num_pairs
137
+
92
138
  self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
93
139
  self.prediction_layer = PredictionLayer(task_type=self.task)
94
140
 
95
141
  modules = ["mlp"]
96
- if self.product_type == "outer":
142
+ if self.kernel is not None:
97
143
  modules.append("kernel")
98
144
  self.register_regularization_weights(
99
145
  embedding_attr="embedding", include_modules=modules
@@ -106,27 +152,48 @@ class PNN(BaseModel):
106
152
  loss_params=loss_params,
107
153
  )
108
154
 
155
+ def compute_inner_products(self, field_emb: torch.Tensor) -> torch.Tensor:
156
+ interactions = []
157
+ for i in range(self.num_fields - 1):
158
+ vi = field_emb[:, i, :] # [B, D]
159
+ for j in range(i + 1, self.num_fields):
160
+ vj = field_emb[:, j, :] # [B, D]
161
+ # <v_i, v_j> = sum_k v_i,k * v_j,k
162
+ pij = torch.sum(vi * vj, dim=1, keepdim=True) # [B, 1]
163
+ interactions.append(pij)
164
+ return torch.cat(interactions, dim=1) # [B, num_pairs]
165
+
166
+ def compute_outer_kernel_products(self, field_emb: torch.Tensor) -> torch.Tensor:
167
+ if self.kernel is None:
168
+ raise RuntimeError("kernel is not initialized for outer product.")
169
+
170
+ interactions = []
171
+ for i in range(self.num_fields - 1):
172
+ vi = field_emb[:, i, :] # [B, D]
173
+ # Project vi with kernel -> [B, K]
174
+ vi_proj = torch.matmul(vi, self.kernel) # [B, K]
175
+ for j in range(i + 1, self.num_fields):
176
+ vj = field_emb[:, j, :] # [B, D]
177
+ vj_proj = torch.matmul(vj, self.kernel) # [B, K]
178
+ # g(vi, vj) = (v_i^T W) * (v_j^T W) summed over projection dim
179
+ pij = torch.sum(vi_proj * vj_proj, dim=1, keepdim=True) # [B, 1]
180
+ interactions.append(pij)
181
+ return torch.cat(interactions, dim=1) # [B, num_pairs]
182
+
109
183
  def forward(self, x):
184
+ # field_emb: [B, F, D]
110
185
  field_emb = self.embedding(x=x, features=self.field_features, squeeze_dim=False)
111
- linear_signal = field_emb.flatten(start_dim=1)
186
+ # Z = [v_1; v_2; ...; v_F]
187
+ linear_signal = field_emb.flatten(start_dim=1) # [B, F*D]
112
188
 
113
189
  if self.product_type == "inner":
114
- interactions = []
115
- for i in range(self.num_fields - 1):
116
- vi = field_emb[:, i, :]
117
- for j in range(i + 1, self.num_fields):
118
- vj = field_emb[:, j, :]
119
- interactions.append(torch.sum(vi * vj, dim=1, keepdim=True))
120
- product_signal = torch.cat(interactions, dim=1)
190
+ product_signal = self.compute_inner_products(field_emb)
191
+ elif self.product_type == "outer":
192
+ product_signal = self.compute_outer_kernel_products(field_emb)
121
193
  else:
122
- transformed = self.kernel(field_emb) # [B, F, outer_dim]
123
- interactions = []
124
- for i in range(self.num_fields - 1):
125
- vi = transformed[:, i, :]
126
- for j in range(i + 1, self.num_fields):
127
- vj = transformed[:, j, :]
128
- interactions.append(vi * vj)
129
- product_signal = torch.stack(interactions, dim=1).flatten(start_dim=1)
194
+ inner_p = self.compute_inner_products(field_emb)
195
+ outer_p = self.compute_outer_kernel_products(field_emb)
196
+ product_signal = torch.cat([inner_p, outer_p], dim=1)
130
197
 
131
198
  deep_input = torch.cat([linear_signal, product_signal], dim=1)
132
199
  y = self.mlp(deep_input)
@@ -61,10 +61,10 @@ class WideDeep(BaseModel):
61
61
  sparse_features: list[SparseFeature],
62
62
  sequence_features: list[SequenceFeature],
63
63
  mlp_params: dict,
64
- target: list[str] = [],
64
+ target: list[str] | str | None = None,
65
65
  task: str | list[str] | None = None,
66
66
  optimizer: str = "adam",
67
- optimizer_params: dict = {},
67
+ optimizer_params: dict | None = None,
68
68
  loss: str | nn.Module | None = "bce",
69
69
  loss_params: dict | list[dict] | None = None,
70
70
  device: str = "cpu",
@@ -75,6 +75,12 @@ class WideDeep(BaseModel):
75
75
  **kwargs,
76
76
  ):
77
77
 
78
+ if target is None:
79
+ target = []
80
+ optimizer_params = optimizer_params or {}
81
+ if loss is None:
82
+ loss = "bce"
83
+
78
84
  super(WideDeep, self).__init__(
79
85
  dense_features=dense_features,
80
86
  sparse_features=sparse_features,
@@ -90,8 +96,6 @@ class WideDeep(BaseModel):
90
96
  )
91
97
 
92
98
  self.loss = loss
93
- if self.loss is None:
94
- self.loss = "bce"
95
99
 
96
100
  # Wide part: use all features for linear model
97
101
  self.wide_features = sparse_features + sequence_features
@@ -76,12 +76,12 @@ class xDeepFM(BaseModel):
76
76
  sparse_features: list[SparseFeature],
77
77
  sequence_features: list[SequenceFeature],
78
78
  mlp_params: dict,
79
- cin_size: list[int] = [128, 128],
79
+ cin_size: list[int] | None = None,
80
80
  split_half: bool = True,
81
- target: list[str] = [],
81
+ target: list[str] | str | None = None,
82
82
  task: str | list[str] | None = None,
83
83
  optimizer: str = "adam",
84
- optimizer_params: dict = {},
84
+ optimizer_params: dict | None = None,
85
85
  loss: str | nn.Module | None = "bce",
86
86
  loss_params: dict | list[dict] | None = None,
87
87
  device: str = "cpu",
@@ -92,6 +92,13 @@ class xDeepFM(BaseModel):
92
92
  **kwargs,
93
93
  ):
94
94
 
95
+ cin_size = cin_size or [128, 128]
96
+ if target is None:
97
+ target = []
98
+ optimizer_params = optimizer_params or {}
99
+ if loss is None:
100
+ loss = "bce"
101
+
95
102
  super(xDeepFM, self).__init__(
96
103
  dense_features=dense_features,
97
104
  sparse_features=sparse_features,
@@ -107,8 +114,6 @@ class xDeepFM(BaseModel):
107
114
  )
108
115
 
109
116
  self.loss = loss
110
- if self.loss is None:
111
- self.loss = "bce"
112
117
 
113
118
  # Linear part and CIN part: use sparse and sequence features
114
119
  self.linear_features = sparse_features + sequence_features
nextrec/utils/config.py CHANGED
@@ -28,9 +28,15 @@ def resolve_path(path_str: str | Path, base_dir: Path) -> Path:
28
28
  path = Path(path_str).expanduser()
29
29
  if path.is_absolute():
30
30
  return path
31
- if path.exists():
32
- return path.resolve()
33
- return (base_dir / path).resolve()
31
+ # Prefer resolving relative to current working directory when the path (or its parent)
32
+ # already exists there; otherwise fall back to the config file's directory.
33
+ cwd_path = (Path.cwd() / path).resolve()
34
+ if cwd_path.exists() or cwd_path.parent.exists():
35
+ return cwd_path
36
+ base_dir_path = (base_dir / path).resolve()
37
+ if base_dir_path.exists() or base_dir_path.parent.exists():
38
+ return base_dir_path
39
+ return cwd_path
34
40
 
35
41
 
36
42
  def select_features(
nextrec/utils/file.py CHANGED
@@ -60,7 +60,8 @@ def read_table(path: str | Path, data_format: str | None = None) -> pd.DataFrame
60
60
  if fmt in {"parquet", ""}:
61
61
  return pd.read_parquet(data_path)
62
62
  if fmt in {"csv", "txt"}:
63
- return pd.read_csv(data_path)
63
+ # Use low_memory=False to avoid mixed-type DtypeWarning on wide CSVs
64
+ return pd.read_csv(data_path, low_memory=False)
64
65
  raise ValueError(f"Unsupported data format: {data_path}")
65
66
 
66
67
 
nextrec/utils/model.py CHANGED
@@ -20,3 +20,25 @@ def get_mlp_output_dim(params: dict, fallback: int) -> int:
20
20
  if dims:
21
21
  return dims[-1]
22
22
  return fallback
23
+
24
+
25
+ def select_features(
26
+ available_features: list,
27
+ names: list[str],
28
+ param_name: str,
29
+ ) -> list:
30
+ if not names:
31
+ return []
32
+
33
+ if len(names) != len(set(names)):
34
+ raise ValueError(f"{param_name} contains duplicate feature names: {names}")
35
+
36
+ feature_map = {feat.name: feat for feat in available_features}
37
+ missing = [name for name in names if name not in feature_map]
38
+ if missing:
39
+ raise ValueError(
40
+ f"{param_name} contains unknown feature names {missing}. "
41
+ f"Available features: {list(feature_map)}"
42
+ )
43
+
44
+ return [feature_map[name] for name in names]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nextrec
3
- Version: 0.4.2
3
+ Version: 0.4.3
4
4
  Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
5
5
  Project-URL: Homepage, https://github.com/zerolovesea/NextRec
6
6
  Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
63
63
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
64
64
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
65
65
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
66
- ![Version](https://img.shields.io/badge/Version-0.4.2-orange.svg)
66
+ ![Version](https://img.shields.io/badge/Version-0.4.3-orange.svg)
67
67
 
68
68
  English | [中文文档](README_zh.md)
69
69
 
@@ -71,16 +71,28 @@ English | [中文文档](README_zh.md)
71
71
 
72
72
  </div>
73
73
 
74
+ ## Table of Contents
75
+
76
+ - [Introduction](#introduction)
77
+ - [Installation](#installation)
78
+ - [Architecture](#architecture)
79
+ - [5-Minute Quick Start](#5-minute-quick-start)
80
+ - [CLI Usage](#cli-usage)
81
+ - [Platform Compatibility](#platform-compatibility)
82
+ - [Supported Models](#supported-models)
83
+ - [Contributing](#contributing)
84
+
74
85
  ## Introduction
75
86
 
76
- NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. It follows a modular design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recall scenarios on Spark clusters, training on massive offline parquet features.
87
+ NextRec is a modern recommendation system framework built on PyTorch, providing researchers and engineering teams with a fast modeling, training, and evaluation experience. The framework adopts a modular design with rich built-in model implementations, data processing tools, and engineering-ready training components, covering various recommendation scenarios. NextRec provides easy-to-use interfaces, command-line tools, and tutorials, enabling recommendation algorithm learners to quickly understand model architectures and train and infer models at the fastest speed.
77
88
 
78
89
  ## Why NextRec
79
90
 
80
- - **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
81
- - **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
82
- - **Developer-friendly experience**: Stream processing/training/inference for csv/parquet/pathlike data, plus GPU/MPS acceleration and visualization support.
83
- - **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
91
+ - **Unified feature engineering & data pipeline**: NextRec provides Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching the model training and inference process based on offline `parquet/csv` features in industrial big-data Spark/Hive scenarios.
92
+ - **Multi-scenario recommendation capabilities**: Covers ranking (CTR/CVR), retrieval, multi-task learning and other recommendation/marketing models, with a continuously expanding model zoo.
93
+ - **Developer-friendly experience**: Supports stream preprocessing/distributed training/inference for various data formats (`csv/parquet/pathlike`), GPU acceleration and visual metric monitoring, facilitating experiments for business algorithm engineers and recommendation algorithm learners.
94
+ - **Flexible command-line tool**: Through configuring training and inference config files, start training and inference processes with one command `nextrec --mode=train --train_config=train_config.yaml`, facilitating rapid experiment iteration and agile deployment.
95
+ - **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
84
96
 
85
97
  ## Architecture
86
98
 
@@ -96,34 +108,36 @@ NextRec adopts a modular and low-coupling engineering design, enabling full-pipe
96
108
 
97
109
  You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
98
110
 
99
- ## Tutorials
100
111
 
101
- See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
112
+ ## Tutorials
102
113
 
103
- - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) DeepFM training on MovieLens 100k
104
- - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on the e-commerce dataset
105
- - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on the e-commerce dataset
106
- - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
114
+ We provide multiple examples in the `tutorials/` directory, covering ranking, retrieval, multi-task, and data processing scenarios:
107
115
 
108
- To dive deeper, Jupyter notebooks are available:
116
+ - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) DeepFM model training example on MovieLens 100k dataset
117
+ - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN deep interest network training example on e-commerce dataset
118
+ - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training example on e-commerce dataset
119
+ - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
120
+ - [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
121
+ - [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
122
+ - [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
109
123
 
110
- - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
111
- - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
124
+ If you want to learn more details about the NextRec framework, we also provide Jupyter notebooks to help you understand:
112
125
 
113
- > Current version [0.4.2]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
126
+ - [How to get started with the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
127
+ - [How to use the data processor for data preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
114
128
 
115
129
  ## 5-Minute Quick Start
116
130
 
117
- We provide a detailed quick start and paired datasets to help you learn the framework. In `datasets/` you’ll find an e-commerce sample dataset like this:
131
+ We provide a detailed quick start guide and paired datasets to help you become familiar with different features of the NextRec framework. We provide a test dataset from an e-commerce scenario in the `datasets/` path, with data examples as follows:
118
132
 
119
133
  | user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
120
134
  |--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
121
135
  | 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
122
136
  | 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
123
137
 
124
- Below is a short example showing how to train a DIN model. DIN (Deep Interest Network) won Best Paper at KDD 2018 for CTR prediction. You can also run `python tutorials/example_ranking_din.py` directly.
138
+ Next, we'll use a short example to show you how to train a DIN model using NextRec. DIN (Deep Interest Network) is from Alibaba's 2018 KDD Best Paper, used for CTR prediction scenarios. You can also directly execute `python tutorials/example_ranking_din.py` to run the training and inference code.
125
139
 
126
- After training, detailed logs are available under `nextrec_logs/din_tutorial`.
140
+ After starting training, you can view detailed training logs in the `nextrec_logs/din_tutorial` path.
127
141
 
128
142
  ```python
129
143
  import pandas as pd
@@ -196,9 +210,25 @@ metrics = model.evaluate(
196
210
  )
197
211
  ```
198
212
 
213
+ ## CLI Usage
214
+
215
+ NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
216
+
217
+ - [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
218
+
219
+ ```bash
220
+ # Train a model
221
+ nextrec --mode=train --train_config=path/to/train_config.yaml
222
+
223
+ # Run prediction
224
+ nextrec --mode=predict --predict_config=path/to/predict_config.yaml
225
+ ```
226
+
227
+ > As of version 0.4.3, NextRec CLI supports single-machine training; distributed training features are currently under development.
228
+
199
229
  ## Platform Compatibility
200
230
 
201
- The current version is 0.4.2. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
231
+ The current version is 0.4.3. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
202
232
 
203
233
  | Platform | Configuration |
204
234
  |----------|---------------|
@@ -247,14 +277,13 @@ The current version is 0.4.2. All models and test code have been validated on th
247
277
  | [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
248
278
  | [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
249
279
  | [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
250
- | [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
251
280
 
252
281
  ### Generative Models
253
282
 
254
283
  | Model | Paper | Year | Status |
255
284
  |-------|-------|------|--------|
256
285
  | [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
257
- | [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | In Progress |
286
+ | [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | Supported |
258
287
 
259
288
  ---
260
289
 
@@ -270,7 +299,7 @@ We welcome contributions of any form!
270
299
  4. Push your branch (`git push origin feature/AmazingFeature`)
271
300
  5. Open a Pull Request
272
301
 
273
- > Before submitting a PR, please run tests using `pytest test/ -v` or `python -m pytest` to ensure everything passes.
302
+ > Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is unified.
274
303
 
275
304
  ### Code Style
276
305
 
@@ -1,20 +1,20 @@
1
1
  nextrec/__init__.py,sha256=_M3oUqyuvQ5k8Th_3wId6hQ_caclh7M5ad51XN09m98,235
2
- nextrec/__version__.py,sha256=6hfVa12Q-nXyUEXr6SyKpqPEDJW6vlRHyPxlA27PfTs,22
3
- nextrec/cli.py,sha256=W6Zvn8YuJ3zy8QXOGkyjXchw5wO5_gD67qy0aja0frQ,18747
2
+ nextrec/__version__.py,sha256=Nyg0pmk5ea9-SLCAFEIF96ByFx4-TJFtrqYPN-Zn6g4,22
3
+ nextrec/cli.py,sha256=b6tv7ZO7UBRVR6IfyqVP24JEcdu9-2_vV5MlfWcQucM,18468
4
4
  nextrec/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  nextrec/basic/activation.py,sha256=uzTWfCOtBSkbu_Gk9XBNTj8__s241CaYLJk6l8nGX9I,2885
6
6
  nextrec/basic/callback.py,sha256=YPkuSmy3WV8cXj8YmLKxwNP2kULpkUlJQf8pV8CkNYQ,1037
7
7
  nextrec/basic/features.py,sha256=ZvFzH05yQzmeWpH74h5gpALz5XOqVZTibUZRzXvwdLU,4141
8
- nextrec/basic/layers.py,sha256=NhT73cpQgFbR70ZNYuhk6DwKHzThi1oTSw5DoTn41aU,24940
9
- nextrec/basic/loggers.py,sha256=FSm9OtsiAn8xRvdGtqqDeTVlFX6tQjQdxaRSu1Gfgmw,6262
10
- nextrec/basic/metrics.py,sha256=1QODkbWhueULfv6T6wlFDduY16T533NW0KAfhlXsDlU,23100
11
- nextrec/basic/model.py,sha256=PhINJiG36j8-nOPGqxXNdpCFxLd2JhyweA3i84Uhukg,97129
8
+ nextrec/basic/layers.py,sha256=hQrxOw1XPmUKODaFG1l_K9TGJrNYHBUYcIQFirjUd7s,26004
9
+ nextrec/basic/loggers.py,sha256=p9wNmLuRYyvHsOzP0eNOYSlV3hrTDjrt6ggrH_r4RE0,6243
10
+ nextrec/basic/metrics.py,sha256=jr6Yqdig1gCZQP3NAWA_1fU8bTIG_7TGatrtrlzTK9E,23135
11
+ nextrec/basic/model.py,sha256=7-9CffXDvUG9G5Yx7_yCF17EWKup4Tl87JLdbmNIjb0,97118
12
12
  nextrec/basic/session.py,sha256=UOG_-EgCOxvqZwCkiEd8sgNV2G1sm_HbzKYVQw8yYDI,4483
13
13
  nextrec/data/__init__.py,sha256=auT_PkbgU9pUCt7KQl6H2ajcUorRhSyHa8NG3wExcG8,1197
14
14
  nextrec/data/batch_utils.py,sha256=FAJiweuDyAIzX7rICVmcxMofdFs2-7RLinovwB-lAYM,2878
15
15
  nextrec/data/data_processing.py,sha256=JTjNU55vj8UV2VgXwo0Qh4MQqWfD3z5uc95uOHIC4ck,5337
16
16
  nextrec/data/data_utils.py,sha256=LaVNXATcqu0ARPV-6WESQz6JXi3g-zq4uKjcoqBFlqI,1219
17
- nextrec/data/dataloader.py,sha256=UYGsNSRFIG9ma43QUNvh8Yk36pKS80ITxktX-PexoHY,18787
17
+ nextrec/data/dataloader.py,sha256=L4VBpWUZrxozFBV54nhJAAC-ZX5Hg6zFwIwpGnguJ9c,18789
18
18
  nextrec/data/preprocessor.py,sha256=BxoD6GHEre86i-TbxPi58Uwmg_G7oLkiER6f7VfmVHo,41583
19
19
  nextrec/loss/__init__.py,sha256=mO5t417BneZ8Ysa51GyjDaffjWyjzFgPXIQrrggasaQ,827
20
20
  nextrec/loss/listwise.py,sha256=UT9vJCOTOQLogVwaeTV7Z5uxIYnngGdxk-p9e97MGkU,5744
@@ -31,39 +31,39 @@ nextrec/models/match/mind.py,sha256=so7XkuCHr5k5UBhEB65GL0JavFOjLGLYeN9Nuc4eNKA,
31
31
  nextrec/models/match/sdm.py,sha256=MGEpLe1-UZ8kiHhR7-Q6zW-d9NnOm0ptHQWYVzh7m_Y,10488
32
32
  nextrec/models/match/youtube_dnn.py,sha256=DxMn-WLaLGAWRy5qhpRszUugbpPxOMUsWEuh7QEAWQw,7214
33
33
  nextrec/models/multi_task/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- nextrec/models/multi_task/esmm.py,sha256=RWdx5K4s8jsnQdYPkogHaW4RT0-QL4PRKrPEfpIj0UU,6322
35
- nextrec/models/multi_task/mmoe.py,sha256=82_g13252xdSFxspqZ33gSj-jTZ1Co3pHuCdbhZdKZ4,7909
36
- nextrec/models/multi_task/ple.py,sha256=_b6kI4XZp_v6tCcMtfq821GZFLupOCKZ9s9vQfk2jqw,12629
37
- nextrec/models/multi_task/poso.py,sha256=uFNoQhb8XTHqeZ7U5GNAnHnQDz7FnSRihrzqg3oV9iQ,17793
38
- nextrec/models/multi_task/share_bottom.py,sha256=mPlXTEW53BnuXBUlpo8aIonanwoLE1b1Sgccmo8S4dg,5934
34
+ nextrec/models/multi_task/esmm.py,sha256=tQg_jE51VDTyc-F0auviyP8CI9uzYQ_KjybbCAXWp1s,6491
35
+ nextrec/models/multi_task/mmoe.py,sha256=qFWKdCE_VSGpVrMgx0NOO-HtLRNGdVxCWdkMfoEgjLA,8583
36
+ nextrec/models/multi_task/ple.py,sha256=SMTgKqz8huXzmyMwACVG8yisHvd3GFGshYl7LOpnJXs,13016
37
+ nextrec/models/multi_task/poso.py,sha256=JkNlMcqjMuE4PTGM6HeGcJTxhbLklXpusfyY8A1BjTQ,19017
38
+ nextrec/models/multi_task/share_bottom.py,sha256=mkWaGHimUqp-2dmPHXjb5ffxX7ixv1BF0gQXTbx9kBo,6519
39
39
  nextrec/models/ranking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- nextrec/models/ranking/afm.py,sha256=QsL8XqQSfZk8ciUQm2K_AMeSk7t6ayyqdnByxhHARog,10027
41
- nextrec/models/ranking/autoint.py,sha256=6aHhunbG2chW7OlS5BCoFltMd285njw2vpWO46gzJKk,8108
42
- nextrec/models/ranking/dcn.py,sha256=sSfD3cA6z7J-LDwNVOnfcaaEJ29yWSYov7FXoHuMZlc,4916
43
- nextrec/models/ranking/dcn_v2.py,sha256=wmauPlflWojRHuPtnf8T2x6OT64EYIjbjlm_8RGdNlg,4113
44
- nextrec/models/ranking/deepfm.py,sha256=thuROmRVp2Ltiz_MJWili63pkdgGYzRkxtLkdyD2OPk,4967
45
- nextrec/models/ranking/dien.py,sha256=zKrbPpzV4L9VJmcJRbhPahy-8zc-Q2BrU70WidmidCM,12924
46
- nextrec/models/ranking/din.py,sha256=KoLJcSA6NOTCmV1JZY21q0IeKH4qSkWRH8bGOXB9VC8,7232
47
- nextrec/models/ranking/fibinet.py,sha256=OgbcUkrQmVpza_e1DZvkvtFeICFmWWNRKjdxi9rQzMs,4961
48
- nextrec/models/ranking/fm.py,sha256=VRlzSIlMDmhM35eBOKOgQDn8QeG4p6kI6-7uqKtZkE4,2853
49
- nextrec/models/ranking/masknet.py,sha256=0MWVgNN64isxj4wzUmxj785_L_Wa8rd3st4bPN9Yj7w,12351
50
- nextrec/models/ranking/pnn.py,sha256=zo2gmrPmD2ZBoqpTCSuKBllnrBUqHna1zjiLcvsnpNY,4831
51
- nextrec/models/ranking/widedeep.py,sha256=RgVICh03LFIIKkouV0VM3uUcpl3u2x5lBF4JDsGb7X0,4961
52
- nextrec/models/ranking/xdeepfm.py,sha256=qw3umdKlKjfBxNqG_7d4v3P594YaPxfeMAW3Tyng8uc,5771
40
+ nextrec/models/ranking/afm.py,sha256=XaiUYm36-pVNzB31lEtMstjg42-shn94khja0LMQB3s,10125
41
+ nextrec/models/ranking/autoint.py,sha256=CyHnYyHJiQIOiPGI-j_16nCpECDQJ3FlVZ4nq3qu-l8,8109
42
+ nextrec/models/ranking/dcn.py,sha256=vxbrDu9RxXznXNpXVeYJR4wdxoc4Vo0ygML6fFArY18,7299
43
+ nextrec/models/ranking/dcn_v2.py,sha256=VNMiHf6BeBOxnoommjGZfF-9t_B88niiVEgmPVcGjQM,11163
44
+ nextrec/models/ranking/deepfm.py,sha256=D9RPM40QAhogw8_RAOfE3JD1gnGf4F3-gXR40EZq-RU,5224
45
+ nextrec/models/ranking/dien.py,sha256=G1W_pj8XyGBPgZo_86I3LgfHzQvR-xvR-PGNJZdRdAM,18958
46
+ nextrec/models/ranking/din.py,sha256=gcibKTxK6nQCCxYMymO9ttu3UG2MSrOWRNBPCmJgMEM,9422
47
+ nextrec/models/ranking/fibinet.py,sha256=OuE4MoG7rHycyRRQtKOvxHbuf7C6zoJFxGFerXmmn9U,7919
48
+ nextrec/models/ranking/fm.py,sha256=ko_Eao9UfklakEk_TVEFZSyVAojmtclo1uIMBhL4FLU,4525
49
+ nextrec/models/ranking/masknet.py,sha256=IDp2XyGHdjuiUTIBv2JxNQlMw5ANdv12_9YJOX7tnzw,12367
50
+ nextrec/models/ranking/pnn.py,sha256=twwixy26mfAVaI9AqNnMLdwOG-WtDga60xsNiyJrFjI,8174
51
+ nextrec/models/ranking/widedeep.py,sha256=Xm2klmKBOoSKWCBQN7FhwLStu0BHSTOgAJ9kwLmtiFY,5077
52
+ nextrec/models/ranking/xdeepfm.py,sha256=LI_cCHjfQCG9H2tQKFC7NfyrLkm8FAUyjjbLoTIIpzY,5930
53
53
  nextrec/utils/__init__.py,sha256=zqU9vjRUpVzJepcvdbxboik68K5jnMR40kdVjr6tpXY,2599
54
- nextrec/utils/config.py,sha256=Rfh75CNYRgutYvuC9q1enMLhe1XTB18mMpRQDEcWi0I,17724
54
+ nextrec/utils/config.py,sha256=KGcKA7a592FkZ5wtbDmpvIc9Fk3uedj-BtJuRk2f4t8,18088
55
55
  nextrec/utils/device.py,sha256=DtgmrJnVJQKtgtVUbm0SW0vZ5Le0R9HU8TsvqPnRLZc,2453
56
56
  nextrec/utils/distributed.py,sha256=tIkgUjzEjR_FHOm9ckyM8KddkCfxNSogP-rdHcVGhuk,4782
57
57
  nextrec/utils/embedding.py,sha256=YSVnBeve0hVTPSfyxN4weGCK_Jd8SezRBqZgwJAR3Qw,496
58
58
  nextrec/utils/feature.py,sha256=LcXaWP98zMZhJTKL92VVHX8mqOE5Q0MyVq3hw5Z9kxs,300
59
- nextrec/utils/file.py,sha256=pwfp-amY1PW7JlYgJgMJHTT6-7cc_3hRJcLaY5UtPrg,2929
59
+ nextrec/utils/file.py,sha256=s2cO1LRbU7xPeAbVoOA6XOoV6wvLrW6oy6p9fVSz9pc,3024
60
60
  nextrec/utils/initializer.py,sha256=GzxasKewn4C14ERNdSo9el2jEa8GXXEB2hTQnRcK2IA,2517
61
- nextrec/utils/model.py,sha256=FB7QbatO0uEvghBEfByJtRS0waaBEB1UI0YzfA_2k04,535
61
+ nextrec/utils/model.py,sha256=dYl1XfIZt6aVjNyV2AAhcArwFRMcEAKrjG_pr8AVHs0,1163
62
62
  nextrec/utils/optimizer.py,sha256=eX8baIvWOpwDTGninbyp6pQfzdHbIL62GTi4ldpYcfM,2337
63
63
  nextrec/utils/synthetic_data.py,sha256=WSbC5cs7TbuDc57BCO74S7VJdlK0fQmnZA2KM4vUpoI,17566
64
64
  nextrec/utils/tensor.py,sha256=Z6MBpSuQpHw4kGjeKxG0cXZMpRBCM45zTKhk9WolyiM,2220
65
- nextrec-0.4.2.dist-info/METADATA,sha256=VKFbNQN7lnQk5K4qZJXKDL8f5JcWr9gvfgCayBY_918,16753
66
- nextrec-0.4.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
67
- nextrec-0.4.2.dist-info/entry_points.txt,sha256=NN-dNSdfMRTv86bNXM7d3ZEPW2BQC6bRi7QP7i9cIps,45
68
- nextrec-0.4.2.dist-info/licenses/LICENSE,sha256=2fQfVKeafywkni7MYHyClC6RGGC3laLTXCNBx-ubtp0,1064
69
- nextrec-0.4.2.dist-info/RECORD,,
65
+ nextrec-0.4.3.dist-info/METADATA,sha256=rD4niOz9T9rLsvQwcXakLQpU6Zn2Jj8BFZeGZDMhiyE,18952
66
+ nextrec-0.4.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
67
+ nextrec-0.4.3.dist-info/entry_points.txt,sha256=NN-dNSdfMRTv86bNXM7d3ZEPW2BQC6bRi7QP7i9cIps,45
68
+ nextrec-0.4.3.dist-info/licenses/LICENSE,sha256=2fQfVKeafywkni7MYHyClC6RGGC3laLTXCNBx-ubtp0,1064
69
+ nextrec-0.4.3.dist-info/RECORD,,