nextrec 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/layers.py +32 -8
- nextrec/basic/loggers.py +1 -1
- nextrec/basic/metrics.py +2 -1
- nextrec/basic/model.py +3 -3
- nextrec/cli.py +41 -47
- nextrec/data/dataloader.py +1 -1
- nextrec/models/multi_task/esmm.py +23 -16
- nextrec/models/multi_task/mmoe.py +36 -17
- nextrec/models/multi_task/ple.py +18 -12
- nextrec/models/multi_task/poso.py +68 -37
- nextrec/models/multi_task/share_bottom.py +16 -2
- nextrec/models/ranking/afm.py +14 -14
- nextrec/models/ranking/autoint.py +2 -2
- nextrec/models/ranking/dcn.py +61 -19
- nextrec/models/ranking/dcn_v2.py +224 -45
- nextrec/models/ranking/deepfm.py +14 -9
- nextrec/models/ranking/dien.py +215 -82
- nextrec/models/ranking/din.py +95 -57
- nextrec/models/ranking/fibinet.py +92 -30
- nextrec/models/ranking/fm.py +44 -8
- nextrec/models/ranking/masknet.py +7 -7
- nextrec/models/ranking/pnn.py +105 -38
- nextrec/models/ranking/widedeep.py +8 -4
- nextrec/models/ranking/xdeepfm.py +10 -5
- nextrec/utils/config.py +9 -3
- nextrec/utils/file.py +2 -1
- nextrec/utils/model.py +22 -0
- {nextrec-0.4.2.dist-info → nextrec-0.4.3.dist-info}/METADATA +53 -24
- {nextrec-0.4.2.dist-info → nextrec-0.4.3.dist-info}/RECORD +33 -33
- {nextrec-0.4.2.dist-info → nextrec-0.4.3.dist-info}/WHEEL +0 -0
- {nextrec-0.4.2.dist-info → nextrec-0.4.3.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.2.dist-info → nextrec-0.4.3.dist-info}/licenses/LICENSE +0 -0
nextrec/models/ranking/pnn.py
CHANGED
|
@@ -3,7 +3,35 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Author:
|
|
4
4
|
Yang Zhou,zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
|
|
6
|
+
[1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response
|
|
7
|
+
prediction[C]//ICDM. 2016: 1149-1154. (https://arxiv.org/abs/1611.00144)
|
|
8
|
+
|
|
9
|
+
Product-based Neural Networks (PNN) are CTR prediction models that explicitly
|
|
10
|
+
encode feature interactions by combining:
|
|
11
|
+
(1) A linear signal from concatenated field embeddings
|
|
12
|
+
(2) A product signal capturing pairwise feature interactions (inner or outer)
|
|
13
|
+
The product layer augments the linear input to an MLP, enabling the network to
|
|
14
|
+
model both first-order and high-order feature interactions in a structured way.
|
|
15
|
+
|
|
16
|
+
Computation workflow:
|
|
17
|
+
- Embed each categorical/sequence field with a shared embedding dimension
|
|
18
|
+
- Linear signal: flatten and concatenate all field embeddings
|
|
19
|
+
- Product signal:
|
|
20
|
+
* Inner product: dot products over all field pairs
|
|
21
|
+
* Outer product: project embeddings then compute element-wise products
|
|
22
|
+
- Concatenate linear and product signals; feed into MLP for prediction
|
|
23
|
+
|
|
24
|
+
Key Advantages:
|
|
25
|
+
- Explicit pairwise interaction modeling without heavy feature engineering
|
|
26
|
+
- Flexible choice between inner/outer products to trade off capacity vs. cost
|
|
27
|
+
- Combines linear context with interaction signal for stronger expressiveness
|
|
28
|
+
- Simple architecture that integrates cleanly with standard MLP pipelines
|
|
29
|
+
|
|
30
|
+
PNN 是一种 CTR 预估模型,通过将线性信号与乘积信号结合,显式建模特征交互:
|
|
31
|
+
- 线性信号:将各字段的 embedding 拼接,用于保留一阶信息
|
|
32
|
+
- 乘积信号:对所有字段对做内积或外积,捕捉二阶及更高阶交互
|
|
33
|
+
随后将两类信号拼接送入 MLP,实现对用户响应的预测。内积版本计算量更低,
|
|
34
|
+
外积版本表达力更强,可根据场景取舍。
|
|
7
35
|
"""
|
|
8
36
|
|
|
9
37
|
import torch
|
|
@@ -15,6 +43,7 @@ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
|
15
43
|
|
|
16
44
|
|
|
17
45
|
class PNN(BaseModel):
|
|
46
|
+
|
|
18
47
|
@property
|
|
19
48
|
def model_name(self):
|
|
20
49
|
return "PNN"
|
|
@@ -25,16 +54,16 @@ class PNN(BaseModel):
|
|
|
25
54
|
|
|
26
55
|
def __init__(
|
|
27
56
|
self,
|
|
28
|
-
dense_features: list[DenseFeature] |
|
|
29
|
-
sparse_features: list[SparseFeature] |
|
|
30
|
-
sequence_features: list[SequenceFeature] |
|
|
31
|
-
mlp_params: dict =
|
|
32
|
-
product_type: str = "inner",
|
|
57
|
+
dense_features: list[DenseFeature] | None = None,
|
|
58
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
59
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
60
|
+
mlp_params: dict | None = None,
|
|
61
|
+
product_type: str = "inner", # "inner" (IPNN), "outer" (OPNN), "both" (PNN*)
|
|
33
62
|
outer_product_dim: int | None = None,
|
|
34
|
-
target: list[str] |
|
|
63
|
+
target: list[str] | str | None = None,
|
|
35
64
|
task: str | list[str] | None = None,
|
|
36
65
|
optimizer: str = "adam",
|
|
37
|
-
optimizer_params: dict =
|
|
66
|
+
optimizer_params: dict | None = None,
|
|
38
67
|
loss: str | nn.Module | None = "bce",
|
|
39
68
|
loss_params: dict | list[dict] | None = None,
|
|
40
69
|
device: str = "cpu",
|
|
@@ -45,6 +74,16 @@ class PNN(BaseModel):
|
|
|
45
74
|
**kwargs,
|
|
46
75
|
):
|
|
47
76
|
|
|
77
|
+
dense_features = dense_features or []
|
|
78
|
+
sparse_features = sparse_features or []
|
|
79
|
+
sequence_features = sequence_features or []
|
|
80
|
+
mlp_params = mlp_params or {}
|
|
81
|
+
if outer_product_dim is not None and outer_product_dim <= 0:
|
|
82
|
+
raise ValueError("outer_product_dim must be a positive integer.")
|
|
83
|
+
optimizer_params = optimizer_params or {}
|
|
84
|
+
if loss is None:
|
|
85
|
+
loss = "bce"
|
|
86
|
+
|
|
48
87
|
super(PNN, self).__init__(
|
|
49
88
|
dense_features=dense_features,
|
|
50
89
|
sparse_features=sparse_features,
|
|
@@ -59,16 +98,13 @@ class PNN(BaseModel):
|
|
|
59
98
|
**kwargs,
|
|
60
99
|
)
|
|
61
100
|
|
|
62
|
-
self.
|
|
63
|
-
if self.loss is None:
|
|
64
|
-
self.loss = "bce"
|
|
65
|
-
|
|
66
|
-
self.field_features = sparse_features + sequence_features
|
|
101
|
+
self.field_features = dense_features + sparse_features + sequence_features
|
|
67
102
|
if len(self.field_features) < 2:
|
|
68
103
|
raise ValueError("PNN requires at least two sparse/sequence features.")
|
|
69
104
|
|
|
70
105
|
self.embedding = EmbeddingLayer(features=self.field_features)
|
|
71
106
|
self.num_fields = len(self.field_features)
|
|
107
|
+
|
|
72
108
|
self.embedding_dim = self.field_features[0].embedding_dim
|
|
73
109
|
if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
|
|
74
110
|
raise ValueError(
|
|
@@ -76,24 +112,34 @@ class PNN(BaseModel):
|
|
|
76
112
|
)
|
|
77
113
|
|
|
78
114
|
self.product_type = product_type.lower()
|
|
79
|
-
if self.product_type not in {"inner", "outer"}:
|
|
80
|
-
raise ValueError("product_type must be 'inner' or '
|
|
115
|
+
if self.product_type not in {"inner", "outer", "both"}:
|
|
116
|
+
raise ValueError("product_type must be 'inner', 'outer', or 'both'.")
|
|
81
117
|
|
|
82
118
|
self.num_pairs = self.num_fields * (self.num_fields - 1) // 2
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
119
|
+
self.outer_product_dim = outer_product_dim or self.embedding_dim
|
|
120
|
+
|
|
121
|
+
if self.product_type in {"outer", "both"}:
|
|
122
|
+
self.kernel = nn.Parameter(
|
|
123
|
+
torch.randn(self.embedding_dim, self.outer_product_dim)
|
|
124
|
+
)
|
|
125
|
+
nn.init.xavier_uniform_(self.kernel)
|
|
87
126
|
else:
|
|
88
|
-
self.
|
|
89
|
-
product_dim = self.num_pairs
|
|
127
|
+
self.kernel = None
|
|
90
128
|
|
|
91
129
|
linear_dim = self.num_fields * self.embedding_dim
|
|
130
|
+
|
|
131
|
+
if self.product_type == "inner":
|
|
132
|
+
product_dim = self.num_pairs
|
|
133
|
+
elif self.product_type == "outer":
|
|
134
|
+
product_dim = self.num_pairs
|
|
135
|
+
else:
|
|
136
|
+
product_dim = 2 * self.num_pairs
|
|
137
|
+
|
|
92
138
|
self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
|
|
93
139
|
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
94
140
|
|
|
95
141
|
modules = ["mlp"]
|
|
96
|
-
if self.
|
|
142
|
+
if self.kernel is not None:
|
|
97
143
|
modules.append("kernel")
|
|
98
144
|
self.register_regularization_weights(
|
|
99
145
|
embedding_attr="embedding", include_modules=modules
|
|
@@ -106,27 +152,48 @@ class PNN(BaseModel):
|
|
|
106
152
|
loss_params=loss_params,
|
|
107
153
|
)
|
|
108
154
|
|
|
155
|
+
def compute_inner_products(self, field_emb: torch.Tensor) -> torch.Tensor:
|
|
156
|
+
interactions = []
|
|
157
|
+
for i in range(self.num_fields - 1):
|
|
158
|
+
vi = field_emb[:, i, :] # [B, D]
|
|
159
|
+
for j in range(i + 1, self.num_fields):
|
|
160
|
+
vj = field_emb[:, j, :] # [B, D]
|
|
161
|
+
# <v_i, v_j> = sum_k v_i,k * v_j,k
|
|
162
|
+
pij = torch.sum(vi * vj, dim=1, keepdim=True) # [B, 1]
|
|
163
|
+
interactions.append(pij)
|
|
164
|
+
return torch.cat(interactions, dim=1) # [B, num_pairs]
|
|
165
|
+
|
|
166
|
+
def compute_outer_kernel_products(self, field_emb: torch.Tensor) -> torch.Tensor:
|
|
167
|
+
if self.kernel is None:
|
|
168
|
+
raise RuntimeError("kernel is not initialized for outer product.")
|
|
169
|
+
|
|
170
|
+
interactions = []
|
|
171
|
+
for i in range(self.num_fields - 1):
|
|
172
|
+
vi = field_emb[:, i, :] # [B, D]
|
|
173
|
+
# Project vi with kernel -> [B, K]
|
|
174
|
+
vi_proj = torch.matmul(vi, self.kernel) # [B, K]
|
|
175
|
+
for j in range(i + 1, self.num_fields):
|
|
176
|
+
vj = field_emb[:, j, :] # [B, D]
|
|
177
|
+
vj_proj = torch.matmul(vj, self.kernel) # [B, K]
|
|
178
|
+
# g(vi, vj) = (v_i^T W) * (v_j^T W) summed over projection dim
|
|
179
|
+
pij = torch.sum(vi_proj * vj_proj, dim=1, keepdim=True) # [B, 1]
|
|
180
|
+
interactions.append(pij)
|
|
181
|
+
return torch.cat(interactions, dim=1) # [B, num_pairs]
|
|
182
|
+
|
|
109
183
|
def forward(self, x):
|
|
184
|
+
# field_emb: [B, F, D]
|
|
110
185
|
field_emb = self.embedding(x=x, features=self.field_features, squeeze_dim=False)
|
|
111
|
-
|
|
186
|
+
# Z = [v_1; v_2; ...; v_F]
|
|
187
|
+
linear_signal = field_emb.flatten(start_dim=1) # [B, F*D]
|
|
112
188
|
|
|
113
189
|
if self.product_type == "inner":
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
for j in range(i + 1, self.num_fields):
|
|
118
|
-
vj = field_emb[:, j, :]
|
|
119
|
-
interactions.append(torch.sum(vi * vj, dim=1, keepdim=True))
|
|
120
|
-
product_signal = torch.cat(interactions, dim=1)
|
|
190
|
+
product_signal = self.compute_inner_products(field_emb)
|
|
191
|
+
elif self.product_type == "outer":
|
|
192
|
+
product_signal = self.compute_outer_kernel_products(field_emb)
|
|
121
193
|
else:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
vi = transformed[:, i, :]
|
|
126
|
-
for j in range(i + 1, self.num_fields):
|
|
127
|
-
vj = transformed[:, j, :]
|
|
128
|
-
interactions.append(vi * vj)
|
|
129
|
-
product_signal = torch.stack(interactions, dim=1).flatten(start_dim=1)
|
|
194
|
+
inner_p = self.compute_inner_products(field_emb)
|
|
195
|
+
outer_p = self.compute_outer_kernel_products(field_emb)
|
|
196
|
+
product_signal = torch.cat([inner_p, outer_p], dim=1)
|
|
130
197
|
|
|
131
198
|
deep_input = torch.cat([linear_signal, product_signal], dim=1)
|
|
132
199
|
y = self.mlp(deep_input)
|
|
@@ -61,10 +61,10 @@ class WideDeep(BaseModel):
|
|
|
61
61
|
sparse_features: list[SparseFeature],
|
|
62
62
|
sequence_features: list[SequenceFeature],
|
|
63
63
|
mlp_params: dict,
|
|
64
|
-
target: list[str] =
|
|
64
|
+
target: list[str] | str | None = None,
|
|
65
65
|
task: str | list[str] | None = None,
|
|
66
66
|
optimizer: str = "adam",
|
|
67
|
-
optimizer_params: dict =
|
|
67
|
+
optimizer_params: dict | None = None,
|
|
68
68
|
loss: str | nn.Module | None = "bce",
|
|
69
69
|
loss_params: dict | list[dict] | None = None,
|
|
70
70
|
device: str = "cpu",
|
|
@@ -75,6 +75,12 @@ class WideDeep(BaseModel):
|
|
|
75
75
|
**kwargs,
|
|
76
76
|
):
|
|
77
77
|
|
|
78
|
+
if target is None:
|
|
79
|
+
target = []
|
|
80
|
+
optimizer_params = optimizer_params or {}
|
|
81
|
+
if loss is None:
|
|
82
|
+
loss = "bce"
|
|
83
|
+
|
|
78
84
|
super(WideDeep, self).__init__(
|
|
79
85
|
dense_features=dense_features,
|
|
80
86
|
sparse_features=sparse_features,
|
|
@@ -90,8 +96,6 @@ class WideDeep(BaseModel):
|
|
|
90
96
|
)
|
|
91
97
|
|
|
92
98
|
self.loss = loss
|
|
93
|
-
if self.loss is None:
|
|
94
|
-
self.loss = "bce"
|
|
95
99
|
|
|
96
100
|
# Wide part: use all features for linear model
|
|
97
101
|
self.wide_features = sparse_features + sequence_features
|
|
@@ -76,12 +76,12 @@ class xDeepFM(BaseModel):
|
|
|
76
76
|
sparse_features: list[SparseFeature],
|
|
77
77
|
sequence_features: list[SequenceFeature],
|
|
78
78
|
mlp_params: dict,
|
|
79
|
-
cin_size: list[int] =
|
|
79
|
+
cin_size: list[int] | None = None,
|
|
80
80
|
split_half: bool = True,
|
|
81
|
-
target: list[str] =
|
|
81
|
+
target: list[str] | str | None = None,
|
|
82
82
|
task: str | list[str] | None = None,
|
|
83
83
|
optimizer: str = "adam",
|
|
84
|
-
optimizer_params: dict =
|
|
84
|
+
optimizer_params: dict | None = None,
|
|
85
85
|
loss: str | nn.Module | None = "bce",
|
|
86
86
|
loss_params: dict | list[dict] | None = None,
|
|
87
87
|
device: str = "cpu",
|
|
@@ -92,6 +92,13 @@ class xDeepFM(BaseModel):
|
|
|
92
92
|
**kwargs,
|
|
93
93
|
):
|
|
94
94
|
|
|
95
|
+
cin_size = cin_size or [128, 128]
|
|
96
|
+
if target is None:
|
|
97
|
+
target = []
|
|
98
|
+
optimizer_params = optimizer_params or {}
|
|
99
|
+
if loss is None:
|
|
100
|
+
loss = "bce"
|
|
101
|
+
|
|
95
102
|
super(xDeepFM, self).__init__(
|
|
96
103
|
dense_features=dense_features,
|
|
97
104
|
sparse_features=sparse_features,
|
|
@@ -107,8 +114,6 @@ class xDeepFM(BaseModel):
|
|
|
107
114
|
)
|
|
108
115
|
|
|
109
116
|
self.loss = loss
|
|
110
|
-
if self.loss is None:
|
|
111
|
-
self.loss = "bce"
|
|
112
117
|
|
|
113
118
|
# Linear part and CIN part: use sparse and sequence features
|
|
114
119
|
self.linear_features = sparse_features + sequence_features
|
nextrec/utils/config.py
CHANGED
|
@@ -28,9 +28,15 @@ def resolve_path(path_str: str | Path, base_dir: Path) -> Path:
|
|
|
28
28
|
path = Path(path_str).expanduser()
|
|
29
29
|
if path.is_absolute():
|
|
30
30
|
return path
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
# Prefer resolving relative to current working directory when the path (or its parent)
|
|
32
|
+
# already exists there; otherwise fall back to the config file's directory.
|
|
33
|
+
cwd_path = (Path.cwd() / path).resolve()
|
|
34
|
+
if cwd_path.exists() or cwd_path.parent.exists():
|
|
35
|
+
return cwd_path
|
|
36
|
+
base_dir_path = (base_dir / path).resolve()
|
|
37
|
+
if base_dir_path.exists() or base_dir_path.parent.exists():
|
|
38
|
+
return base_dir_path
|
|
39
|
+
return cwd_path
|
|
34
40
|
|
|
35
41
|
|
|
36
42
|
def select_features(
|
nextrec/utils/file.py
CHANGED
|
@@ -60,7 +60,8 @@ def read_table(path: str | Path, data_format: str | None = None) -> pd.DataFrame
|
|
|
60
60
|
if fmt in {"parquet", ""}:
|
|
61
61
|
return pd.read_parquet(data_path)
|
|
62
62
|
if fmt in {"csv", "txt"}:
|
|
63
|
-
|
|
63
|
+
# Use low_memory=False to avoid mixed-type DtypeWarning on wide CSVs
|
|
64
|
+
return pd.read_csv(data_path, low_memory=False)
|
|
64
65
|
raise ValueError(f"Unsupported data format: {data_path}")
|
|
65
66
|
|
|
66
67
|
|
nextrec/utils/model.py
CHANGED
|
@@ -20,3 +20,25 @@ def get_mlp_output_dim(params: dict, fallback: int) -> int:
|
|
|
20
20
|
if dims:
|
|
21
21
|
return dims[-1]
|
|
22
22
|
return fallback
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def select_features(
|
|
26
|
+
available_features: list,
|
|
27
|
+
names: list[str],
|
|
28
|
+
param_name: str,
|
|
29
|
+
) -> list:
|
|
30
|
+
if not names:
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
if len(names) != len(set(names)):
|
|
34
|
+
raise ValueError(f"{param_name} contains duplicate feature names: {names}")
|
|
35
|
+
|
|
36
|
+
feature_map = {feat.name: feat for feat in available_features}
|
|
37
|
+
missing = [name for name in names if name not in feature_map]
|
|
38
|
+
if missing:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
f"{param_name} contains unknown feature names {missing}. "
|
|
41
|
+
f"Available features: {list(feature_map)}"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return [feature_map[name] for name in names]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nextrec
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
|
|
5
5
|
Project-URL: Homepage, https://github.com/zerolovesea/NextRec
|
|
6
6
|
Project-URL: Repository, https://github.com/zerolovesea/NextRec
|
|
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
|
|
|
63
63
|

|
|
64
64
|

|
|
65
65
|

|
|
66
|
-

|
|
67
67
|
|
|
68
68
|
English | [中文文档](README_zh.md)
|
|
69
69
|
|
|
@@ -71,16 +71,28 @@ English | [中文文档](README_zh.md)
|
|
|
71
71
|
|
|
72
72
|
</div>
|
|
73
73
|
|
|
74
|
+
## Table of Contents
|
|
75
|
+
|
|
76
|
+
- [Introduction](#introduction)
|
|
77
|
+
- [Installation](#installation)
|
|
78
|
+
- [Architecture](#architecture)
|
|
79
|
+
- [5-Minute Quick Start](#5-minute-quick-start)
|
|
80
|
+
- [CLI Usage](#cli-usage)
|
|
81
|
+
- [Platform Compatibility](#platform-compatibility)
|
|
82
|
+
- [Supported Models](#supported-models)
|
|
83
|
+
- [Contributing](#contributing)
|
|
84
|
+
|
|
74
85
|
## Introduction
|
|
75
86
|
|
|
76
|
-
NextRec is a modern recommendation framework built on PyTorch,
|
|
87
|
+
NextRec is a modern recommendation system framework built on PyTorch, providing researchers and engineering teams with a fast modeling, training, and evaluation experience. The framework adopts a modular design with rich built-in model implementations, data processing tools, and engineering-ready training components, covering various recommendation scenarios. NextRec provides easy-to-use interfaces, command-line tools, and tutorials, enabling recommendation algorithm learners to quickly understand model architectures and train and infer models at the fastest speed.
|
|
77
88
|
|
|
78
89
|
## Why NextRec
|
|
79
90
|
|
|
80
|
-
- **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching
|
|
81
|
-
- **Multi-scenario
|
|
82
|
-
- **Developer-friendly experience**:
|
|
83
|
-
- **
|
|
91
|
+
- **Unified feature engineering & data pipeline**: NextRec provides Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching the model training and inference process based on offline `parquet/csv` features in industrial big-data Spark/Hive scenarios.
|
|
92
|
+
- **Multi-scenario recommendation capabilities**: Covers ranking (CTR/CVR), retrieval, multi-task learning and other recommendation/marketing models, with a continuously expanding model zoo.
|
|
93
|
+
- **Developer-friendly experience**: Supports stream preprocessing/distributed training/inference for various data formats (`csv/parquet/pathlike`), GPU acceleration and visual metric monitoring, facilitating experiments for business algorithm engineers and recommendation algorithm learners.
|
|
94
|
+
- **Flexible command-line tool**: Through configuring training and inference config files, start training and inference processes with one command `nextrec --mode=train --train_config=train_config.yaml`, facilitating rapid experiment iteration and agile deployment.
|
|
95
|
+
- **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
|
|
84
96
|
|
|
85
97
|
## Architecture
|
|
86
98
|
|
|
@@ -96,34 +108,36 @@ NextRec adopts a modular and low-coupling engineering design, enabling full-pipe
|
|
|
96
108
|
|
|
97
109
|
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
|
|
98
110
|
|
|
99
|
-
## Tutorials
|
|
100
111
|
|
|
101
|
-
|
|
112
|
+
## Tutorials
|
|
102
113
|
|
|
103
|
-
|
|
104
|
-
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on the e-commerce dataset
|
|
105
|
-
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on the e-commerce dataset
|
|
106
|
-
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
|
|
114
|
+
We provide multiple examples in the `tutorials/` directory, covering ranking, retrieval, multi-task, and data processing scenarios:
|
|
107
115
|
|
|
108
|
-
|
|
116
|
+
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM model training example on MovieLens 100k dataset
|
|
117
|
+
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN deep interest network training example on e-commerce dataset
|
|
118
|
+
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training example on e-commerce dataset
|
|
119
|
+
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
|
|
120
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
|
|
121
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
|
|
122
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
|
|
109
123
|
|
|
110
|
-
|
|
111
|
-
- [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
124
|
+
If you want to learn more details about the NextRec framework, we also provide Jupyter notebooks to help you understand:
|
|
112
125
|
|
|
113
|
-
|
|
126
|
+
- [How to get started with the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
|
|
127
|
+
- [How to use the data processor for data preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
114
128
|
|
|
115
129
|
## 5-Minute Quick Start
|
|
116
130
|
|
|
117
|
-
We provide a detailed quick start and paired datasets to help you
|
|
131
|
+
We provide a detailed quick start guide and paired datasets to help you become familiar with different features of the NextRec framework. We provide a test dataset from an e-commerce scenario in the `datasets/` path, with data examples as follows:
|
|
118
132
|
|
|
119
133
|
| user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
|
|
120
134
|
|--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
|
|
121
135
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
122
136
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
123
137
|
|
|
124
|
-
|
|
138
|
+
Next, we'll use a short example to show you how to train a DIN model using NextRec. DIN (Deep Interest Network) is from Alibaba's 2018 KDD Best Paper, used for CTR prediction scenarios. You can also directly execute `python tutorials/example_ranking_din.py` to run the training and inference code.
|
|
125
139
|
|
|
126
|
-
After training, detailed logs
|
|
140
|
+
After starting training, you can view detailed training logs in the `nextrec_logs/din_tutorial` path.
|
|
127
141
|
|
|
128
142
|
```python
|
|
129
143
|
import pandas as pd
|
|
@@ -196,9 +210,25 @@ metrics = model.evaluate(
|
|
|
196
210
|
)
|
|
197
211
|
```
|
|
198
212
|
|
|
213
|
+
## CLI Usage
|
|
214
|
+
|
|
215
|
+
NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
|
|
216
|
+
|
|
217
|
+
- [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
# Train a model
|
|
221
|
+
nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
222
|
+
|
|
223
|
+
# Run prediction
|
|
224
|
+
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
> As of version 0.4.3, NextRec CLI supports single-machine training; distributed training features are currently under development.
|
|
228
|
+
|
|
199
229
|
## Platform Compatibility
|
|
200
230
|
|
|
201
|
-
The current version is 0.4.
|
|
231
|
+
The current version is 0.4.3. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
|
|
202
232
|
|
|
203
233
|
| Platform | Configuration |
|
|
204
234
|
|----------|---------------|
|
|
@@ -247,14 +277,13 @@ The current version is 0.4.2. All models and test code have been validated on th
|
|
|
247
277
|
| [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
|
|
248
278
|
| [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
|
|
249
279
|
| [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
|
|
250
|
-
| [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
|
|
251
280
|
|
|
252
281
|
### Generative Models
|
|
253
282
|
|
|
254
283
|
| Model | Paper | Year | Status |
|
|
255
284
|
|-------|-------|------|--------|
|
|
256
285
|
| [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
|
|
257
|
-
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - |
|
|
286
|
+
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | Supported |
|
|
258
287
|
|
|
259
288
|
---
|
|
260
289
|
|
|
@@ -270,7 +299,7 @@ We welcome contributions of any form!
|
|
|
270
299
|
4. Push your branch (`git push origin feature/AmazingFeature`)
|
|
271
300
|
5. Open a Pull Request
|
|
272
301
|
|
|
273
|
-
> Before submitting a PR, please run
|
|
302
|
+
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is unified.
|
|
274
303
|
|
|
275
304
|
### Code Style
|
|
276
305
|
|
|
@@ -1,20 +1,20 @@
|
|
|
1
1
|
nextrec/__init__.py,sha256=_M3oUqyuvQ5k8Th_3wId6hQ_caclh7M5ad51XN09m98,235
|
|
2
|
-
nextrec/__version__.py,sha256=
|
|
3
|
-
nextrec/cli.py,sha256=
|
|
2
|
+
nextrec/__version__.py,sha256=Nyg0pmk5ea9-SLCAFEIF96ByFx4-TJFtrqYPN-Zn6g4,22
|
|
3
|
+
nextrec/cli.py,sha256=b6tv7ZO7UBRVR6IfyqVP24JEcdu9-2_vV5MlfWcQucM,18468
|
|
4
4
|
nextrec/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
nextrec/basic/activation.py,sha256=uzTWfCOtBSkbu_Gk9XBNTj8__s241CaYLJk6l8nGX9I,2885
|
|
6
6
|
nextrec/basic/callback.py,sha256=YPkuSmy3WV8cXj8YmLKxwNP2kULpkUlJQf8pV8CkNYQ,1037
|
|
7
7
|
nextrec/basic/features.py,sha256=ZvFzH05yQzmeWpH74h5gpALz5XOqVZTibUZRzXvwdLU,4141
|
|
8
|
-
nextrec/basic/layers.py,sha256=
|
|
9
|
-
nextrec/basic/loggers.py,sha256=
|
|
10
|
-
nextrec/basic/metrics.py,sha256=
|
|
11
|
-
nextrec/basic/model.py,sha256=
|
|
8
|
+
nextrec/basic/layers.py,sha256=hQrxOw1XPmUKODaFG1l_K9TGJrNYHBUYcIQFirjUd7s,26004
|
|
9
|
+
nextrec/basic/loggers.py,sha256=p9wNmLuRYyvHsOzP0eNOYSlV3hrTDjrt6ggrH_r4RE0,6243
|
|
10
|
+
nextrec/basic/metrics.py,sha256=jr6Yqdig1gCZQP3NAWA_1fU8bTIG_7TGatrtrlzTK9E,23135
|
|
11
|
+
nextrec/basic/model.py,sha256=7-9CffXDvUG9G5Yx7_yCF17EWKup4Tl87JLdbmNIjb0,97118
|
|
12
12
|
nextrec/basic/session.py,sha256=UOG_-EgCOxvqZwCkiEd8sgNV2G1sm_HbzKYVQw8yYDI,4483
|
|
13
13
|
nextrec/data/__init__.py,sha256=auT_PkbgU9pUCt7KQl6H2ajcUorRhSyHa8NG3wExcG8,1197
|
|
14
14
|
nextrec/data/batch_utils.py,sha256=FAJiweuDyAIzX7rICVmcxMofdFs2-7RLinovwB-lAYM,2878
|
|
15
15
|
nextrec/data/data_processing.py,sha256=JTjNU55vj8UV2VgXwo0Qh4MQqWfD3z5uc95uOHIC4ck,5337
|
|
16
16
|
nextrec/data/data_utils.py,sha256=LaVNXATcqu0ARPV-6WESQz6JXi3g-zq4uKjcoqBFlqI,1219
|
|
17
|
-
nextrec/data/dataloader.py,sha256=
|
|
17
|
+
nextrec/data/dataloader.py,sha256=L4VBpWUZrxozFBV54nhJAAC-ZX5Hg6zFwIwpGnguJ9c,18789
|
|
18
18
|
nextrec/data/preprocessor.py,sha256=BxoD6GHEre86i-TbxPi58Uwmg_G7oLkiER6f7VfmVHo,41583
|
|
19
19
|
nextrec/loss/__init__.py,sha256=mO5t417BneZ8Ysa51GyjDaffjWyjzFgPXIQrrggasaQ,827
|
|
20
20
|
nextrec/loss/listwise.py,sha256=UT9vJCOTOQLogVwaeTV7Z5uxIYnngGdxk-p9e97MGkU,5744
|
|
@@ -31,39 +31,39 @@ nextrec/models/match/mind.py,sha256=so7XkuCHr5k5UBhEB65GL0JavFOjLGLYeN9Nuc4eNKA,
|
|
|
31
31
|
nextrec/models/match/sdm.py,sha256=MGEpLe1-UZ8kiHhR7-Q6zW-d9NnOm0ptHQWYVzh7m_Y,10488
|
|
32
32
|
nextrec/models/match/youtube_dnn.py,sha256=DxMn-WLaLGAWRy5qhpRszUugbpPxOMUsWEuh7QEAWQw,7214
|
|
33
33
|
nextrec/models/multi_task/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
-
nextrec/models/multi_task/esmm.py,sha256=
|
|
35
|
-
nextrec/models/multi_task/mmoe.py,sha256=
|
|
36
|
-
nextrec/models/multi_task/ple.py,sha256=
|
|
37
|
-
nextrec/models/multi_task/poso.py,sha256=
|
|
38
|
-
nextrec/models/multi_task/share_bottom.py,sha256=
|
|
34
|
+
nextrec/models/multi_task/esmm.py,sha256=tQg_jE51VDTyc-F0auviyP8CI9uzYQ_KjybbCAXWp1s,6491
|
|
35
|
+
nextrec/models/multi_task/mmoe.py,sha256=qFWKdCE_VSGpVrMgx0NOO-HtLRNGdVxCWdkMfoEgjLA,8583
|
|
36
|
+
nextrec/models/multi_task/ple.py,sha256=SMTgKqz8huXzmyMwACVG8yisHvd3GFGshYl7LOpnJXs,13016
|
|
37
|
+
nextrec/models/multi_task/poso.py,sha256=JkNlMcqjMuE4PTGM6HeGcJTxhbLklXpusfyY8A1BjTQ,19017
|
|
38
|
+
nextrec/models/multi_task/share_bottom.py,sha256=mkWaGHimUqp-2dmPHXjb5ffxX7ixv1BF0gQXTbx9kBo,6519
|
|
39
39
|
nextrec/models/ranking/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
40
|
-
nextrec/models/ranking/afm.py,sha256=
|
|
41
|
-
nextrec/models/ranking/autoint.py,sha256=
|
|
42
|
-
nextrec/models/ranking/dcn.py,sha256=
|
|
43
|
-
nextrec/models/ranking/dcn_v2.py,sha256=
|
|
44
|
-
nextrec/models/ranking/deepfm.py,sha256=
|
|
45
|
-
nextrec/models/ranking/dien.py,sha256=
|
|
46
|
-
nextrec/models/ranking/din.py,sha256=
|
|
47
|
-
nextrec/models/ranking/fibinet.py,sha256=
|
|
48
|
-
nextrec/models/ranking/fm.py,sha256=
|
|
49
|
-
nextrec/models/ranking/masknet.py,sha256=
|
|
50
|
-
nextrec/models/ranking/pnn.py,sha256=
|
|
51
|
-
nextrec/models/ranking/widedeep.py,sha256=
|
|
52
|
-
nextrec/models/ranking/xdeepfm.py,sha256=
|
|
40
|
+
nextrec/models/ranking/afm.py,sha256=XaiUYm36-pVNzB31lEtMstjg42-shn94khja0LMQB3s,10125
|
|
41
|
+
nextrec/models/ranking/autoint.py,sha256=CyHnYyHJiQIOiPGI-j_16nCpECDQJ3FlVZ4nq3qu-l8,8109
|
|
42
|
+
nextrec/models/ranking/dcn.py,sha256=vxbrDu9RxXznXNpXVeYJR4wdxoc4Vo0ygML6fFArY18,7299
|
|
43
|
+
nextrec/models/ranking/dcn_v2.py,sha256=VNMiHf6BeBOxnoommjGZfF-9t_B88niiVEgmPVcGjQM,11163
|
|
44
|
+
nextrec/models/ranking/deepfm.py,sha256=D9RPM40QAhogw8_RAOfE3JD1gnGf4F3-gXR40EZq-RU,5224
|
|
45
|
+
nextrec/models/ranking/dien.py,sha256=G1W_pj8XyGBPgZo_86I3LgfHzQvR-xvR-PGNJZdRdAM,18958
|
|
46
|
+
nextrec/models/ranking/din.py,sha256=gcibKTxK6nQCCxYMymO9ttu3UG2MSrOWRNBPCmJgMEM,9422
|
|
47
|
+
nextrec/models/ranking/fibinet.py,sha256=OuE4MoG7rHycyRRQtKOvxHbuf7C6zoJFxGFerXmmn9U,7919
|
|
48
|
+
nextrec/models/ranking/fm.py,sha256=ko_Eao9UfklakEk_TVEFZSyVAojmtclo1uIMBhL4FLU,4525
|
|
49
|
+
nextrec/models/ranking/masknet.py,sha256=IDp2XyGHdjuiUTIBv2JxNQlMw5ANdv12_9YJOX7tnzw,12367
|
|
50
|
+
nextrec/models/ranking/pnn.py,sha256=twwixy26mfAVaI9AqNnMLdwOG-WtDga60xsNiyJrFjI,8174
|
|
51
|
+
nextrec/models/ranking/widedeep.py,sha256=Xm2klmKBOoSKWCBQN7FhwLStu0BHSTOgAJ9kwLmtiFY,5077
|
|
52
|
+
nextrec/models/ranking/xdeepfm.py,sha256=LI_cCHjfQCG9H2tQKFC7NfyrLkm8FAUyjjbLoTIIpzY,5930
|
|
53
53
|
nextrec/utils/__init__.py,sha256=zqU9vjRUpVzJepcvdbxboik68K5jnMR40kdVjr6tpXY,2599
|
|
54
|
-
nextrec/utils/config.py,sha256=
|
|
54
|
+
nextrec/utils/config.py,sha256=KGcKA7a592FkZ5wtbDmpvIc9Fk3uedj-BtJuRk2f4t8,18088
|
|
55
55
|
nextrec/utils/device.py,sha256=DtgmrJnVJQKtgtVUbm0SW0vZ5Le0R9HU8TsvqPnRLZc,2453
|
|
56
56
|
nextrec/utils/distributed.py,sha256=tIkgUjzEjR_FHOm9ckyM8KddkCfxNSogP-rdHcVGhuk,4782
|
|
57
57
|
nextrec/utils/embedding.py,sha256=YSVnBeve0hVTPSfyxN4weGCK_Jd8SezRBqZgwJAR3Qw,496
|
|
58
58
|
nextrec/utils/feature.py,sha256=LcXaWP98zMZhJTKL92VVHX8mqOE5Q0MyVq3hw5Z9kxs,300
|
|
59
|
-
nextrec/utils/file.py,sha256=
|
|
59
|
+
nextrec/utils/file.py,sha256=s2cO1LRbU7xPeAbVoOA6XOoV6wvLrW6oy6p9fVSz9pc,3024
|
|
60
60
|
nextrec/utils/initializer.py,sha256=GzxasKewn4C14ERNdSo9el2jEa8GXXEB2hTQnRcK2IA,2517
|
|
61
|
-
nextrec/utils/model.py,sha256=
|
|
61
|
+
nextrec/utils/model.py,sha256=dYl1XfIZt6aVjNyV2AAhcArwFRMcEAKrjG_pr8AVHs0,1163
|
|
62
62
|
nextrec/utils/optimizer.py,sha256=eX8baIvWOpwDTGninbyp6pQfzdHbIL62GTi4ldpYcfM,2337
|
|
63
63
|
nextrec/utils/synthetic_data.py,sha256=WSbC5cs7TbuDc57BCO74S7VJdlK0fQmnZA2KM4vUpoI,17566
|
|
64
64
|
nextrec/utils/tensor.py,sha256=Z6MBpSuQpHw4kGjeKxG0cXZMpRBCM45zTKhk9WolyiM,2220
|
|
65
|
-
nextrec-0.4.
|
|
66
|
-
nextrec-0.4.
|
|
67
|
-
nextrec-0.4.
|
|
68
|
-
nextrec-0.4.
|
|
69
|
-
nextrec-0.4.
|
|
65
|
+
nextrec-0.4.3.dist-info/METADATA,sha256=rD4niOz9T9rLsvQwcXakLQpU6Zn2Jj8BFZeGZDMhiyE,18952
|
|
66
|
+
nextrec-0.4.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
67
|
+
nextrec-0.4.3.dist-info/entry_points.txt,sha256=NN-dNSdfMRTv86bNXM7d3ZEPW2BQC6bRi7QP7i9cIps,45
|
|
68
|
+
nextrec-0.4.3.dist-info/licenses/LICENSE,sha256=2fQfVKeafywkni7MYHyClC6RGGC3laLTXCNBx-ubtp0,1064
|
|
69
|
+
nextrec-0.4.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|