nextrec 0.4.23__py3-none-any.whl → 0.4.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/layers.py +96 -46
- nextrec/basic/metrics.py +128 -113
- nextrec/basic/model.py +201 -76
- nextrec/basic/summary.py +58 -0
- nextrec/cli.py +13 -0
- nextrec/data/data_processing.py +3 -9
- nextrec/data/dataloader.py +27 -2
- nextrec/data/preprocessor.py +283 -36
- nextrec/models/multi_task/aitm.py +0 -0
- nextrec/models/multi_task/apg.py +0 -0
- nextrec/models/multi_task/cross_stitch.py +0 -0
- nextrec/models/multi_task/esmm.py +2 -2
- nextrec/models/multi_task/mmoe.py +4 -4
- nextrec/models/multi_task/pepnet.py +335 -0
- nextrec/models/multi_task/ple.py +8 -5
- nextrec/models/multi_task/poso.py +13 -11
- nextrec/models/multi_task/share_bottom.py +4 -4
- nextrec/models/multi_task/snr_trans.py +0 -0
- nextrec/models/ranking/dcn_v2.py +1 -1
- nextrec/models/retrieval/dssm.py +4 -4
- nextrec/models/retrieval/dssm_v2.py +4 -4
- nextrec/models/retrieval/mind.py +2 -2
- nextrec/models/retrieval/sdm.py +4 -4
- nextrec/models/retrieval/youtube_dnn.py +4 -4
- nextrec/utils/config.py +2 -0
- nextrec/utils/model.py +17 -64
- nextrec/utils/torch_utils.py +11 -0
- {nextrec-0.4.23.dist-info → nextrec-0.4.25.dist-info}/METADATA +5 -5
- {nextrec-0.4.23.dist-info → nextrec-0.4.25.dist-info}/RECORD +33 -28
- {nextrec-0.4.23.dist-info → nextrec-0.4.25.dist-info}/WHEEL +0 -0
- {nextrec-0.4.23.dist-info → nextrec-0.4.25.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.23.dist-info → nextrec-0.4.25.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 09/11/2025
|
|
3
|
+
Checkpoint: edit on 30/12/2025
|
|
4
|
+
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
|
+
Reference:
|
|
6
|
+
[1] Yang et al. "PEPNet: Parameter and Embedding Personalized Network for Multi-Task Learning", 2021.
|
|
7
|
+
[2] MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation:
|
|
8
|
+
https://github.com/alipay/MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation/blob/main/model/pepnet.py
|
|
9
|
+
|
|
10
|
+
PEPNet (Parameter and Embedding Personalized Network) is a multi-task learning
|
|
11
|
+
model that personalizes both input features and layer transformations with
|
|
12
|
+
context (scene/domain, user, item). It applies a shared feature gate to the
|
|
13
|
+
backbone embedding and then uses per-task gated MLP blocks (PPNet blocks) whose
|
|
14
|
+
gates are conditioned on task-specific context. This enables task-aware routing
|
|
15
|
+
at both feature and layer levels, improving adaptation across scenarios/tasks.
|
|
16
|
+
|
|
17
|
+
Workflow:
|
|
18
|
+
(1) Embed all features and build the backbone input
|
|
19
|
+
(2) Build task context embedding from domain/user/item features
|
|
20
|
+
(3) Feature gate masks backbone input using domain context
|
|
21
|
+
(4) Each task tower applies layer-wise gates conditioned on context + backbone embedding output
|
|
22
|
+
(5) Task heads produce per-task predictions
|
|
23
|
+
|
|
24
|
+
Key Advantages:
|
|
25
|
+
- Two-level personalization: feature gate + layer gates
|
|
26
|
+
- Context-driven routing for multi-scenario/multi-task recommendation
|
|
27
|
+
- Task towers share embeddings while adapting via gates
|
|
28
|
+
- Gate input uses stop-grad on backbone embedding output for stable training
|
|
29
|
+
- Compatible with heterogeneous features via unified embeddings
|
|
30
|
+
|
|
31
|
+
PEPNet(Parameter and Embedding Personalized Network)通过场景/用户/物品等上下文
|
|
32
|
+
对输入特征与网络层进行双层门控个性化。先用共享特征门控调整主干输入,再在每个
|
|
33
|
+
任务塔中使用条件门控的 MLP 层(PPNet block),实现任务与场景感知的逐层路由。
|
|
34
|
+
|
|
35
|
+
流程:
|
|
36
|
+
(1) 对全部特征做 embedding,得到主干输入
|
|
37
|
+
(2) 由场景/用户/物品特征构建任务上下文向量
|
|
38
|
+
(3) 共享特征门控按场景调制主干输入
|
|
39
|
+
(4) 任务塔逐层门控,结合上下文与主干 embedding 输出进行路由
|
|
40
|
+
(5) 任务头输出各任务预测结果
|
|
41
|
+
|
|
42
|
+
主要优点:
|
|
43
|
+
- 特征级与层级双重个性化
|
|
44
|
+
- 上下文驱动的多场景/多任务适配
|
|
45
|
+
- 共享 embedding 的同时通过门控实现任务定制
|
|
46
|
+
- 对主干 embedding 输出 stop-grad,提高训练稳定性
|
|
47
|
+
- 统一 embedding 支持多类特征
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
from __future__ import annotations
|
|
51
|
+
|
|
52
|
+
import torch
|
|
53
|
+
import torch.nn as nn
|
|
54
|
+
|
|
55
|
+
from nextrec.basic.activation import activation_layer
|
|
56
|
+
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
57
|
+
from nextrec.basic.layers import EmbeddingLayer, GateMLP
|
|
58
|
+
from nextrec.basic.heads import TaskHead
|
|
59
|
+
from nextrec.basic.model import BaseModel
|
|
60
|
+
from nextrec.utils.model import select_features
|
|
61
|
+
from nextrec.utils.types import ActivationName, TaskTypeName
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class PPNetBlock(nn.Module):
|
|
65
|
+
"""
|
|
66
|
+
PEPNet block with per-layer gates conditioned on task context.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
input_dim: int,
|
|
72
|
+
output_dim: int,
|
|
73
|
+
gate_input_dim: int,
|
|
74
|
+
gate_hidden_dim: int | None,
|
|
75
|
+
hidden_units: list[int] | None = None,
|
|
76
|
+
hidden_activations: ActivationName | list[ActivationName] = "relu",
|
|
77
|
+
dropout_rates: float | list[float] = 0.0,
|
|
78
|
+
batch_norm: bool = False,
|
|
79
|
+
use_bias: bool = True,
|
|
80
|
+
gate_activation: ActivationName = "relu",
|
|
81
|
+
gate_dropout: float = 0.0,
|
|
82
|
+
gate_use_bn: bool = False,
|
|
83
|
+
) -> None:
|
|
84
|
+
super().__init__()
|
|
85
|
+
hidden_units = hidden_units or []
|
|
86
|
+
|
|
87
|
+
if isinstance(dropout_rates, list):
|
|
88
|
+
if len(dropout_rates) != len(hidden_units):
|
|
89
|
+
raise ValueError("dropout_rates length must match hidden_units length.")
|
|
90
|
+
dropout_list = dropout_rates
|
|
91
|
+
else:
|
|
92
|
+
dropout_list = [dropout_rates] * len(hidden_units)
|
|
93
|
+
|
|
94
|
+
if isinstance(hidden_activations, list):
|
|
95
|
+
if len(hidden_activations) != len(hidden_units):
|
|
96
|
+
raise ValueError(
|
|
97
|
+
"hidden_activations length must match hidden_units length."
|
|
98
|
+
)
|
|
99
|
+
activation_list = hidden_activations
|
|
100
|
+
else:
|
|
101
|
+
activation_list = [hidden_activations] * len(hidden_units)
|
|
102
|
+
|
|
103
|
+
self.gate_layers = nn.ModuleList()
|
|
104
|
+
self.mlp_layers = nn.ModuleList()
|
|
105
|
+
|
|
106
|
+
layer_units = [input_dim] + hidden_units
|
|
107
|
+
for idx in range(len(layer_units) - 1):
|
|
108
|
+
dense_layers: list[nn.Module] = [
|
|
109
|
+
nn.Linear(layer_units[idx], layer_units[idx + 1], bias=use_bias)
|
|
110
|
+
]
|
|
111
|
+
if batch_norm:
|
|
112
|
+
dense_layers.append(nn.BatchNorm1d(layer_units[idx + 1]))
|
|
113
|
+
dense_layers.append(activation_layer(activation_list[idx]))
|
|
114
|
+
if dropout_list[idx] > 0:
|
|
115
|
+
dense_layers.append(nn.Dropout(p=dropout_list[idx]))
|
|
116
|
+
|
|
117
|
+
self.gate_layers.append(
|
|
118
|
+
GateMLP(
|
|
119
|
+
input_dim=gate_input_dim,
|
|
120
|
+
hidden_dim=gate_hidden_dim,
|
|
121
|
+
output_dim=layer_units[idx],
|
|
122
|
+
activation=gate_activation,
|
|
123
|
+
dropout=gate_dropout,
|
|
124
|
+
use_bn=gate_use_bn,
|
|
125
|
+
scale_factor=2.0,
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
self.mlp_layers.append(nn.Sequential(*dense_layers))
|
|
129
|
+
|
|
130
|
+
self.gate_layers.append(
|
|
131
|
+
GateMLP(
|
|
132
|
+
input_dim=gate_input_dim,
|
|
133
|
+
hidden_dim=gate_hidden_dim,
|
|
134
|
+
output_dim=layer_units[-1],
|
|
135
|
+
activation=gate_activation,
|
|
136
|
+
dropout=gate_dropout,
|
|
137
|
+
use_bn=gate_use_bn,
|
|
138
|
+
scale_factor=1.0,
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
self.mlp_layers.append(nn.Linear(layer_units[-1], output_dim, bias=use_bias))
|
|
142
|
+
|
|
143
|
+
def forward(self, o_ep: torch.Tensor, o_prior: torch.Tensor) -> torch.Tensor:
|
|
144
|
+
"""
|
|
145
|
+
o_ep: EPNet output embedding (will be stop-grad in gate input)
|
|
146
|
+
o_prior: prior/task context embedding
|
|
147
|
+
"""
|
|
148
|
+
gate_input = torch.cat([o_prior, o_ep.detach()], dim=-1)
|
|
149
|
+
|
|
150
|
+
hidden = o_ep
|
|
151
|
+
for gate, mlp in zip(self.gate_layers, self.mlp_layers):
|
|
152
|
+
gw = gate(gate_input)
|
|
153
|
+
hidden = mlp(hidden * gw)
|
|
154
|
+
return hidden
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class PEPNet(BaseModel):
|
|
158
|
+
"""
|
|
159
|
+
PEPNet: feature-gated multi-task tower with task-conditioned gates.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def model_name(self) -> str:
|
|
164
|
+
return "PepNet"
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def default_task(self) -> TaskTypeName | list[TaskTypeName]:
|
|
168
|
+
nums_task = self.nums_task if hasattr(self, "nums_task") else None
|
|
169
|
+
if nums_task is not None and nums_task > 0:
|
|
170
|
+
return ["binary"] * nums_task
|
|
171
|
+
return ["binary"]
|
|
172
|
+
|
|
173
|
+
def __init__(
|
|
174
|
+
self,
|
|
175
|
+
dense_features: list[DenseFeature] | None = None,
|
|
176
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
177
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
178
|
+
target: list[str] | str | None = None,
|
|
179
|
+
task: TaskTypeName | list[TaskTypeName] | None = None,
|
|
180
|
+
dnn_hidden_units: list[int] | None = None,
|
|
181
|
+
dnn_activation: ActivationName = "relu",
|
|
182
|
+
dnn_dropout: float | list[float] = 0.0,
|
|
183
|
+
dnn_use_bn: bool = False,
|
|
184
|
+
feature_gate_hidden_dim: int = 128,
|
|
185
|
+
gate_hidden_dim: int | None = None,
|
|
186
|
+
gate_activation: ActivationName = "relu",
|
|
187
|
+
gate_dropout: float = 0.0,
|
|
188
|
+
gate_use_bn: bool = False,
|
|
189
|
+
domain_features: list[str] | str | None = None,
|
|
190
|
+
user_features: list[str] | str | None = None,
|
|
191
|
+
item_features: list[str] | str | None = None,
|
|
192
|
+
use_bias: bool = True,
|
|
193
|
+
**kwargs,
|
|
194
|
+
) -> None:
|
|
195
|
+
dense_features = dense_features or []
|
|
196
|
+
sparse_features = sparse_features or []
|
|
197
|
+
sequence_features = sequence_features or []
|
|
198
|
+
dnn_hidden_units = dnn_hidden_units or [256, 128]
|
|
199
|
+
|
|
200
|
+
if target is None:
|
|
201
|
+
target = []
|
|
202
|
+
elif isinstance(target, str):
|
|
203
|
+
target = [target]
|
|
204
|
+
|
|
205
|
+
self.nums_task = len(target) if target else 1
|
|
206
|
+
resolved_task = task
|
|
207
|
+
if resolved_task is None:
|
|
208
|
+
resolved_task = self.default_task
|
|
209
|
+
elif isinstance(resolved_task, str):
|
|
210
|
+
resolved_task = [resolved_task] * self.nums_task
|
|
211
|
+
elif len(resolved_task) == 1 and self.nums_task > 1:
|
|
212
|
+
resolved_task = resolved_task * self.nums_task
|
|
213
|
+
elif len(resolved_task) != self.nums_task:
|
|
214
|
+
raise ValueError(
|
|
215
|
+
f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
super().__init__(
|
|
219
|
+
dense_features=dense_features,
|
|
220
|
+
sparse_features=sparse_features,
|
|
221
|
+
sequence_features=sequence_features,
|
|
222
|
+
target=target,
|
|
223
|
+
task=resolved_task,
|
|
224
|
+
**kwargs,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
if isinstance(domain_features, str):
|
|
228
|
+
domain_features = [domain_features]
|
|
229
|
+
if isinstance(user_features, str):
|
|
230
|
+
user_features = [user_features]
|
|
231
|
+
if isinstance(item_features, str):
|
|
232
|
+
item_features = [item_features]
|
|
233
|
+
|
|
234
|
+
self.scene_feature_names = list(domain_features or [])
|
|
235
|
+
self.user_feature_names = list(user_features or [])
|
|
236
|
+
self.item_feature_names = list(item_features or [])
|
|
237
|
+
|
|
238
|
+
if not self.scene_feature_names:
|
|
239
|
+
raise ValueError("PepNet requires at least one scene feature name.")
|
|
240
|
+
|
|
241
|
+
self.domain_features = select_features(
|
|
242
|
+
self.all_features, self.scene_feature_names, "domain_features"
|
|
243
|
+
)
|
|
244
|
+
self.user_features = select_features(
|
|
245
|
+
self.all_features, self.user_feature_names, "user_features"
|
|
246
|
+
)
|
|
247
|
+
self.item_features = select_features(
|
|
248
|
+
self.all_features, self.item_feature_names, "item_features"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
if not self.all_features:
|
|
252
|
+
raise ValueError("PepNet requires at least one input feature.")
|
|
253
|
+
|
|
254
|
+
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
255
|
+
input_dim = self.embedding.get_input_dim(self.all_features)
|
|
256
|
+
domain_dim = self.embedding.get_input_dim(self.domain_features)
|
|
257
|
+
user_dim = (
|
|
258
|
+
self.embedding.get_input_dim(self.user_features)
|
|
259
|
+
if self.user_features
|
|
260
|
+
else 0
|
|
261
|
+
)
|
|
262
|
+
item_dim = (
|
|
263
|
+
self.embedding.get_input_dim(self.item_features)
|
|
264
|
+
if self.item_features
|
|
265
|
+
else 0
|
|
266
|
+
)
|
|
267
|
+
task_dim = domain_dim + user_dim + item_dim
|
|
268
|
+
|
|
269
|
+
self.feature_gate = GateMLP(
|
|
270
|
+
input_dim=input_dim + domain_dim,
|
|
271
|
+
hidden_dim=feature_gate_hidden_dim,
|
|
272
|
+
output_dim=input_dim,
|
|
273
|
+
activation=gate_activation,
|
|
274
|
+
dropout=gate_dropout,
|
|
275
|
+
use_bn=gate_use_bn,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
self.ppn_blocks = nn.ModuleList(
|
|
279
|
+
[
|
|
280
|
+
PPNetBlock(
|
|
281
|
+
input_dim=input_dim,
|
|
282
|
+
output_dim=1,
|
|
283
|
+
gate_input_dim=input_dim + task_dim,
|
|
284
|
+
gate_hidden_dim=gate_hidden_dim,
|
|
285
|
+
hidden_units=dnn_hidden_units,
|
|
286
|
+
hidden_activations=dnn_activation,
|
|
287
|
+
dropout_rates=dnn_dropout,
|
|
288
|
+
batch_norm=dnn_use_bn,
|
|
289
|
+
use_bias=use_bias,
|
|
290
|
+
gate_activation=gate_activation,
|
|
291
|
+
gate_dropout=gate_dropout,
|
|
292
|
+
gate_use_bn=gate_use_bn,
|
|
293
|
+
)
|
|
294
|
+
for _ in range(self.nums_task)
|
|
295
|
+
]
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
self.prediction_layer = TaskHead(
|
|
299
|
+
task_type=self.task, task_dims=[1] * self.nums_task
|
|
300
|
+
)
|
|
301
|
+
self.grad_norm_shared_modules = ["embedding", "feature_gate"]
|
|
302
|
+
self.register_regularization_weights(
|
|
303
|
+
embedding_attr="embedding", include_modules=["feature_gate", "ppn_blocks"]
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
def forward(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
|
|
307
|
+
dnn_input = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
|
|
308
|
+
domain_emb = self.embedding(
|
|
309
|
+
x=x, features=self.domain_features, squeeze_dim=True
|
|
310
|
+
).detach()
|
|
311
|
+
|
|
312
|
+
task_parts = [domain_emb]
|
|
313
|
+
if self.user_features:
|
|
314
|
+
task_parts.append(
|
|
315
|
+
self.embedding(
|
|
316
|
+
x=x, features=self.user_features, squeeze_dim=True
|
|
317
|
+
).detach()
|
|
318
|
+
)
|
|
319
|
+
if self.item_features:
|
|
320
|
+
task_parts.append(
|
|
321
|
+
self.embedding(
|
|
322
|
+
x=x, features=self.item_features, squeeze_dim=True
|
|
323
|
+
).detach()
|
|
324
|
+
)
|
|
325
|
+
task_sf_emb = torch.cat(task_parts, dim=-1)
|
|
326
|
+
|
|
327
|
+
gate_input = torch.cat([dnn_input.detach(), domain_emb], dim=-1)
|
|
328
|
+
dnn_input = self.feature_gate(gate_input) * dnn_input
|
|
329
|
+
|
|
330
|
+
task_logits = []
|
|
331
|
+
for block in self.ppn_blocks:
|
|
332
|
+
task_logits.append(block(o_ep=dnn_input, o_prior=task_sf_emb))
|
|
333
|
+
|
|
334
|
+
y = torch.cat(task_logits, dim=1)
|
|
335
|
+
return self.prediction_layer(y)
|
nextrec/models/multi_task/ple.py
CHANGED
|
@@ -93,7 +93,7 @@ class CGCLayer(nn.Module):
|
|
|
93
93
|
[
|
|
94
94
|
MLP(
|
|
95
95
|
input_dim=input_dim,
|
|
96
|
-
|
|
96
|
+
output_dim=None,
|
|
97
97
|
**shared_expert_params,
|
|
98
98
|
)
|
|
99
99
|
for _ in range(num_shared_experts)
|
|
@@ -105,7 +105,7 @@ class CGCLayer(nn.Module):
|
|
|
105
105
|
[
|
|
106
106
|
MLP(
|
|
107
107
|
input_dim=input_dim,
|
|
108
|
-
|
|
108
|
+
output_dim=None,
|
|
109
109
|
**params,
|
|
110
110
|
)
|
|
111
111
|
for _ in range(num_specific_experts)
|
|
@@ -259,8 +259,11 @@ class PLE(BaseModel):
|
|
|
259
259
|
# input_dim = emb_dim_total + dense_input_dim
|
|
260
260
|
|
|
261
261
|
# Get expert output dimension
|
|
262
|
-
if
|
|
263
|
-
|
|
262
|
+
if (
|
|
263
|
+
"hidden_dims" in shared_expert_params
|
|
264
|
+
and len(shared_expert_params["hidden_dims"]) > 0
|
|
265
|
+
):
|
|
266
|
+
expert_output_dim = shared_expert_params["hidden_dims"][-1]
|
|
264
267
|
else:
|
|
265
268
|
expert_output_dim = input_dim
|
|
266
269
|
|
|
@@ -283,7 +286,7 @@ class PLE(BaseModel):
|
|
|
283
286
|
# Task-specific towers
|
|
284
287
|
self.towers = nn.ModuleList()
|
|
285
288
|
for tower_params in tower_params_list:
|
|
286
|
-
tower = MLP(input_dim=expert_output_dim,
|
|
289
|
+
tower = MLP(input_dim=expert_output_dim, output_dim=1, **tower_params)
|
|
287
290
|
self.towers.append(tower)
|
|
288
291
|
self.prediction_layer = TaskHead(
|
|
289
292
|
task_type=self.task, task_dims=[1] * self.nums_task
|
|
@@ -127,7 +127,7 @@ class POSOMLP(nn.Module):
|
|
|
127
127
|
"""
|
|
128
128
|
POSO-enhanced MLP that stacks multiple POSOFC layers.
|
|
129
129
|
|
|
130
|
-
|
|
130
|
+
hidden_dims: e.g., [256, 128, 64] means
|
|
131
131
|
in_dim -> 256 -> 128 -> 64
|
|
132
132
|
Each layer has its own gate g_l(pc) following Eq. (11).
|
|
133
133
|
"""
|
|
@@ -136,7 +136,7 @@ class POSOMLP(nn.Module):
|
|
|
136
136
|
self,
|
|
137
137
|
input_dim: int,
|
|
138
138
|
pc_dim: int,
|
|
139
|
-
|
|
139
|
+
hidden_dims: list[int],
|
|
140
140
|
gate_hidden_dim: int = 32,
|
|
141
141
|
scale_factor: float = 2.0,
|
|
142
142
|
activation: str = "relu",
|
|
@@ -147,7 +147,7 @@ class POSOMLP(nn.Module):
|
|
|
147
147
|
|
|
148
148
|
layers = []
|
|
149
149
|
in_dim = input_dim
|
|
150
|
-
for out_dim in
|
|
150
|
+
for out_dim in hidden_dims:
|
|
151
151
|
layers.append(
|
|
152
152
|
POSOFC(
|
|
153
153
|
in_dim=in_dim,
|
|
@@ -213,8 +213,8 @@ class POSOMMoE(nn.Module):
|
|
|
213
213
|
[
|
|
214
214
|
MLP(
|
|
215
215
|
input_dim=input_dim,
|
|
216
|
-
|
|
217
|
-
|
|
216
|
+
output_dim=None,
|
|
217
|
+
hidden_dims=expert_hidden_dims,
|
|
218
218
|
activation=activation,
|
|
219
219
|
dropout=expert_dropout,
|
|
220
220
|
)
|
|
@@ -416,16 +416,16 @@ class POSO(BaseModel):
|
|
|
416
416
|
self.towers = nn.ModuleList()
|
|
417
417
|
self.tower_heads = nn.ModuleList()
|
|
418
418
|
for tower_params in tower_params_list:
|
|
419
|
-
|
|
420
|
-
if not
|
|
419
|
+
hidden_dims = tower_params.get("hidden_dims")
|
|
420
|
+
if not hidden_dims:
|
|
421
421
|
raise ValueError(
|
|
422
|
-
"tower_params must include a non-empty '
|
|
422
|
+
"tower_params must include a non-empty 'hidden_dims' list for POSO-MLP towers."
|
|
423
423
|
)
|
|
424
424
|
dropout = tower_params.get("dropout", 0.0)
|
|
425
425
|
tower = POSOMLP(
|
|
426
426
|
input_dim=self.main_input_dim,
|
|
427
427
|
pc_dim=self.pc_input_dim,
|
|
428
|
-
|
|
428
|
+
hidden_dims=hidden_dims,
|
|
429
429
|
gate_hidden_dim=tower_params.get(
|
|
430
430
|
"gate_hidden_dim", gate_hidden_dim
|
|
431
431
|
),
|
|
@@ -435,7 +435,9 @@ class POSO(BaseModel):
|
|
|
435
435
|
dropout=dropout,
|
|
436
436
|
)
|
|
437
437
|
self.towers.append(tower)
|
|
438
|
-
tower_output_dim =
|
|
438
|
+
tower_output_dim = (
|
|
439
|
+
hidden_dims[-1] if hidden_dims else self.main_input_dim
|
|
440
|
+
)
|
|
439
441
|
self.tower_heads.append(nn.Linear(tower_output_dim, 1))
|
|
440
442
|
else:
|
|
441
443
|
if expert_hidden_dims is None or not expert_hidden_dims:
|
|
@@ -458,7 +460,7 @@ class POSO(BaseModel):
|
|
|
458
460
|
[
|
|
459
461
|
MLP(
|
|
460
462
|
input_dim=self.mmoe.expert_output_dim,
|
|
461
|
-
|
|
463
|
+
output_dim=1,
|
|
462
464
|
**tower_params,
|
|
463
465
|
)
|
|
464
466
|
for tower_params in tower_params_list
|
|
@@ -110,19 +110,19 @@ class ShareBottom(BaseModel):
|
|
|
110
110
|
# input_dim = emb_dim_total + dense_input_dim
|
|
111
111
|
|
|
112
112
|
# Shared bottom network
|
|
113
|
-
self.bottom = MLP(input_dim=input_dim,
|
|
113
|
+
self.bottom = MLP(input_dim=input_dim, output_dim=None, **bottom_params)
|
|
114
114
|
self.grad_norm_shared_modules = ["embedding", "bottom"]
|
|
115
115
|
|
|
116
116
|
# Get bottom output dimension
|
|
117
|
-
if "
|
|
118
|
-
bottom_output_dim = bottom_params["
|
|
117
|
+
if "hidden_dims" in bottom_params and len(bottom_params["hidden_dims"]) > 0:
|
|
118
|
+
bottom_output_dim = bottom_params["hidden_dims"][-1]
|
|
119
119
|
else:
|
|
120
120
|
bottom_output_dim = input_dim
|
|
121
121
|
|
|
122
122
|
# Task-specific towers
|
|
123
123
|
self.towers = nn.ModuleList()
|
|
124
124
|
for tower_params in tower_params_list:
|
|
125
|
-
tower = MLP(input_dim=bottom_output_dim,
|
|
125
|
+
tower = MLP(input_dim=bottom_output_dim, output_dim=1, **tower_params)
|
|
126
126
|
self.towers.append(tower)
|
|
127
127
|
self.prediction_layer = TaskHead(
|
|
128
128
|
task_type=self.task, task_dims=[1] * self.nums_task
|
|
File without changes
|
nextrec/models/ranking/dcn_v2.py
CHANGED
|
@@ -234,7 +234,7 @@ class DCNv2(BaseModel):
|
|
|
234
234
|
if mlp_params is not None:
|
|
235
235
|
self.use_dnn = True
|
|
236
236
|
dnn_params = dict(mlp_params)
|
|
237
|
-
dnn_params.setdefault("
|
|
237
|
+
dnn_params.setdefault("output_dim", None)
|
|
238
238
|
self.mlp = MLP(input_dim=input_dim, **dnn_params)
|
|
239
239
|
deep_dim = self.mlp.output_dim
|
|
240
240
|
final_input_dim = (
|
nextrec/models/retrieval/dssm.py
CHANGED
|
@@ -103,8 +103,8 @@ class DSSM(BaseMatchModel):
|
|
|
103
103
|
user_dnn_units = user_dnn_hidden_units + [embedding_dim]
|
|
104
104
|
self.user_dnn = MLP(
|
|
105
105
|
input_dim=user_input_dim,
|
|
106
|
-
|
|
107
|
-
|
|
106
|
+
hidden_dims=user_dnn_units,
|
|
107
|
+
output_dim=None,
|
|
108
108
|
dropout=dnn_dropout,
|
|
109
109
|
activation=dnn_activation,
|
|
110
110
|
)
|
|
@@ -134,8 +134,8 @@ class DSSM(BaseMatchModel):
|
|
|
134
134
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
135
135
|
self.item_dnn = MLP(
|
|
136
136
|
input_dim=item_input_dim,
|
|
137
|
-
|
|
138
|
-
|
|
137
|
+
hidden_dims=item_dnn_units,
|
|
138
|
+
output_dim=None,
|
|
139
139
|
dropout=dnn_dropout,
|
|
140
140
|
activation=dnn_activation,
|
|
141
141
|
)
|
|
@@ -97,8 +97,8 @@ class DSSM_v2(BaseMatchModel):
|
|
|
97
97
|
user_dnn_units = user_dnn_hidden_units + [embedding_dim]
|
|
98
98
|
self.user_dnn = MLP(
|
|
99
99
|
input_dim=user_input_dim,
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
hidden_dims=user_dnn_units,
|
|
101
|
+
output_dim=None,
|
|
102
102
|
dropout=dnn_dropout,
|
|
103
103
|
activation=dnn_activation,
|
|
104
104
|
)
|
|
@@ -126,8 +126,8 @@ class DSSM_v2(BaseMatchModel):
|
|
|
126
126
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
127
127
|
self.item_dnn = MLP(
|
|
128
128
|
input_dim=item_input_dim,
|
|
129
|
-
|
|
130
|
-
|
|
129
|
+
hidden_dims=item_dnn_units,
|
|
130
|
+
output_dim=None,
|
|
131
131
|
dropout=dnn_dropout,
|
|
132
132
|
activation=dnn_activation,
|
|
133
133
|
)
|
nextrec/models/retrieval/mind.py
CHANGED
|
@@ -295,8 +295,8 @@ class MIND(BaseMatchModel):
|
|
|
295
295
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
296
296
|
self.item_dnn = MLP(
|
|
297
297
|
input_dim=item_input_dim,
|
|
298
|
-
|
|
299
|
-
|
|
298
|
+
hidden_dims=item_dnn_units,
|
|
299
|
+
output_dim=None,
|
|
300
300
|
dropout=dnn_dropout,
|
|
301
301
|
activation=dnn_activation,
|
|
302
302
|
)
|
nextrec/models/retrieval/sdm.py
CHANGED
|
@@ -131,8 +131,8 @@ class SDM(BaseMatchModel):
|
|
|
131
131
|
# User DNN to final embedding
|
|
132
132
|
self.user_dnn = MLP(
|
|
133
133
|
input_dim=user_final_dim,
|
|
134
|
-
|
|
135
|
-
|
|
134
|
+
hidden_dims=[rnn_hidden_size * 2, embedding_dim],
|
|
135
|
+
output_dim=None,
|
|
136
136
|
dropout=dnn_dropout,
|
|
137
137
|
activation=dnn_activation,
|
|
138
138
|
)
|
|
@@ -162,8 +162,8 @@ class SDM(BaseMatchModel):
|
|
|
162
162
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
163
163
|
self.item_dnn = MLP(
|
|
164
164
|
input_dim=item_input_dim,
|
|
165
|
-
|
|
166
|
-
|
|
165
|
+
hidden_dims=item_dnn_units,
|
|
166
|
+
output_dim=None,
|
|
167
167
|
dropout=dnn_dropout,
|
|
168
168
|
activation=dnn_activation,
|
|
169
169
|
)
|
|
@@ -102,8 +102,8 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
102
102
|
user_dnn_units = user_dnn_hidden_units + [embedding_dim]
|
|
103
103
|
self.user_dnn = MLP(
|
|
104
104
|
input_dim=user_input_dim,
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
hidden_dims=user_dnn_units,
|
|
106
|
+
output_dim=None,
|
|
107
107
|
dropout=dnn_dropout,
|
|
108
108
|
activation=dnn_activation,
|
|
109
109
|
)
|
|
@@ -131,8 +131,8 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
131
131
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
132
132
|
self.item_dnn = MLP(
|
|
133
133
|
input_dim=item_input_dim,
|
|
134
|
-
|
|
135
|
-
|
|
134
|
+
hidden_dims=item_dnn_units,
|
|
135
|
+
output_dim=None,
|
|
136
136
|
dropout=dnn_dropout,
|
|
137
137
|
activation=dnn_activation,
|
|
138
138
|
)
|
nextrec/utils/config.py
CHANGED
|
@@ -116,6 +116,7 @@ def register_processor_features(
|
|
|
116
116
|
name,
|
|
117
117
|
encode_method=proc_cfg.get("encode_method", "hash"),
|
|
118
118
|
hash_size=proc_cfg.get("hash_size") or proc_cfg.get("vocab_size"),
|
|
119
|
+
min_freq=proc_cfg.get("min_freq"),
|
|
119
120
|
fill_na=proc_cfg.get("fill_na", "<UNK>"),
|
|
120
121
|
)
|
|
121
122
|
|
|
@@ -125,6 +126,7 @@ def register_processor_features(
|
|
|
125
126
|
name,
|
|
126
127
|
encode_method=proc_cfg.get("encode_method", "hash"),
|
|
127
128
|
hash_size=proc_cfg.get("hash_size") or proc_cfg.get("vocab_size"),
|
|
129
|
+
min_freq=proc_cfg.get("min_freq"),
|
|
128
130
|
max_len=proc_cfg.get("max_len", 50),
|
|
129
131
|
pad_value=proc_cfg.get("pad_value", 0),
|
|
130
132
|
truncate=proc_cfg.get("truncate", "post"),
|