nextrec 0.4.25__py3-none-any.whl → 0.4.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. nextrec/__version__.py +1 -1
  2. nextrec/basic/asserts.py +72 -0
  3. nextrec/basic/loggers.py +18 -1
  4. nextrec/basic/model.py +54 -51
  5. nextrec/data/batch_utils.py +23 -3
  6. nextrec/data/dataloader.py +3 -8
  7. nextrec/models/multi_task/[pre]aitm.py +173 -0
  8. nextrec/models/multi_task/[pre]snr_trans.py +232 -0
  9. nextrec/models/multi_task/[pre]star.py +192 -0
  10. nextrec/models/multi_task/apg.py +330 -0
  11. nextrec/models/multi_task/cross_stitch.py +229 -0
  12. nextrec/models/multi_task/escm.py +290 -0
  13. nextrec/models/multi_task/esmm.py +8 -21
  14. nextrec/models/multi_task/hmoe.py +203 -0
  15. nextrec/models/multi_task/mmoe.py +20 -28
  16. nextrec/models/multi_task/pepnet.py +81 -76
  17. nextrec/models/multi_task/ple.py +30 -44
  18. nextrec/models/multi_task/poso.py +13 -22
  19. nextrec/models/multi_task/share_bottom.py +14 -25
  20. nextrec/models/ranking/afm.py +2 -2
  21. nextrec/models/ranking/autoint.py +2 -4
  22. nextrec/models/ranking/dcn.py +2 -3
  23. nextrec/models/ranking/dcn_v2.py +2 -3
  24. nextrec/models/ranking/deepfm.py +2 -3
  25. nextrec/models/ranking/dien.py +7 -9
  26. nextrec/models/ranking/din.py +8 -10
  27. nextrec/models/ranking/eulernet.py +1 -2
  28. nextrec/models/ranking/ffm.py +1 -2
  29. nextrec/models/ranking/fibinet.py +2 -3
  30. nextrec/models/ranking/fm.py +1 -1
  31. nextrec/models/ranking/lr.py +1 -1
  32. nextrec/models/ranking/masknet.py +1 -2
  33. nextrec/models/ranking/pnn.py +1 -2
  34. nextrec/models/ranking/widedeep.py +2 -3
  35. nextrec/models/ranking/xdeepfm.py +2 -4
  36. nextrec/models/representation/rqvae.py +4 -4
  37. nextrec/models/retrieval/dssm.py +18 -26
  38. nextrec/models/retrieval/dssm_v2.py +15 -22
  39. nextrec/models/retrieval/mind.py +9 -15
  40. nextrec/models/retrieval/sdm.py +36 -33
  41. nextrec/models/retrieval/youtube_dnn.py +16 -24
  42. nextrec/models/sequential/hstu.py +2 -2
  43. nextrec/utils/__init__.py +5 -1
  44. nextrec/utils/model.py +9 -14
  45. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/METADATA +72 -62
  46. nextrec-0.4.28.dist-info/RECORD +90 -0
  47. nextrec/models/multi_task/aitm.py +0 -0
  48. nextrec/models/multi_task/snr_trans.py +0 -0
  49. nextrec-0.4.25.dist-info/RECORD +0 -86
  50. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/WHEEL +0 -0
  51. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/entry_points.txt +0 -0
  52. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/licenses/LICENSE +0 -0
@@ -1,11 +1,11 @@
1
1
  """
2
- Date: create on 09/11/2025
3
- Checkpoint: edit on 30/12/2025
2
+ Date: create on 01/01/2026
3
+ Checkpoint: edit on 01/01/2026
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Yang et al. "PEPNet: Parameter and Embedding Personalized Network for Multi-Task Learning", 2021.
7
- [2] MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation:
8
- https://github.com/alipay/MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation/blob/main/model/pepnet.py
6
+ - [1] Chang J, Zhang C, Hui Y, Leng D, Niu Y, Song Y, Gai K. PEPNet: Parameter and Embedding Personalized Network for Infusing with Personalized Prior Information. In: Proceedings of the 29th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD ’23), 2023.
7
+ URL: https://arxiv.org/abs/2302.01115
8
+ - [2] MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation: https://github.com/alipay/MMLRec-A-Unified-Multi-Task-and-Multi-Scenario-Learning-Benchmark-for-Recommendation/
9
9
 
10
10
  PEPNet (Parameter and Embedding Personalized Network) is a multi-task learning
11
11
  model that personalizes both input features and layer transformations with
@@ -58,12 +58,12 @@ from nextrec.basic.layers import EmbeddingLayer, GateMLP
58
58
  from nextrec.basic.heads import TaskHead
59
59
  from nextrec.basic.model import BaseModel
60
60
  from nextrec.utils.model import select_features
61
- from nextrec.utils.types import ActivationName, TaskTypeName
61
+ from nextrec.utils.types import TaskTypeName
62
62
 
63
63
 
64
- class PPNetBlock(nn.Module):
64
+ class PPNet(nn.Module):
65
65
  """
66
- PEPNet block with per-layer gates conditioned on task context.
66
+ PPNet: per-task tower with layer-wise gates conditioned on task context.
67
67
  """
68
68
 
69
69
  def __init__(
@@ -71,34 +71,42 @@ class PPNetBlock(nn.Module):
71
71
  input_dim: int,
72
72
  output_dim: int,
73
73
  gate_input_dim: int,
74
- gate_hidden_dim: int | None,
75
- hidden_units: list[int] | None = None,
76
- hidden_activations: ActivationName | list[ActivationName] = "relu",
77
- dropout_rates: float | list[float] = 0.0,
78
- batch_norm: bool = False,
74
+ mlp_params: dict | None = None,
75
+ gate_mlp_params: dict | None = None,
79
76
  use_bias: bool = True,
80
- gate_activation: ActivationName = "relu",
81
- gate_dropout: float = 0.0,
82
- gate_use_bn: bool = False,
83
77
  ) -> None:
84
78
  super().__init__()
85
- hidden_units = hidden_units or []
79
+ mlp_params = mlp_params or {}
80
+ gate_mlp_params = gate_mlp_params or {}
86
81
 
87
- if isinstance(dropout_rates, list):
88
- if len(dropout_rates) != len(hidden_units):
82
+ mlp_params.setdefault("hidden_dims", [])
83
+ mlp_params.setdefault("activation", "relu")
84
+ mlp_params.setdefault("dropout", 0.0)
85
+ mlp_params.setdefault("norm_type", "none")
86
+
87
+ gate_mlp_params.setdefault("hidden_dim", None)
88
+ gate_mlp_params.setdefault("activation", "relu")
89
+ gate_mlp_params.setdefault("dropout", 0.0)
90
+ gate_mlp_params.setdefault("use_bn", False)
91
+
92
+ hidden_units = mlp_params["hidden_dims"]
93
+ norm_type = mlp_params["norm_type"]
94
+
95
+ if isinstance(mlp_params["dropout"], list):
96
+ if len(mlp_params["dropout"]) != len(hidden_units):
89
97
  raise ValueError("dropout_rates length must match hidden_units length.")
90
- dropout_list = dropout_rates
98
+ dropout_list = mlp_params["dropout"]
91
99
  else:
92
- dropout_list = [dropout_rates] * len(hidden_units)
100
+ dropout_list = [mlp_params["dropout"]] * len(hidden_units)
93
101
 
94
- if isinstance(hidden_activations, list):
95
- if len(hidden_activations) != len(hidden_units):
102
+ if isinstance(mlp_params["activation"], list):
103
+ if len(mlp_params["activation"]) != len(hidden_units):
96
104
  raise ValueError(
97
105
  "hidden_activations length must match hidden_units length."
98
106
  )
99
- activation_list = hidden_activations
107
+ activation_list = mlp_params["activation"]
100
108
  else:
101
- activation_list = [hidden_activations] * len(hidden_units)
109
+ activation_list = [mlp_params["activation"]] * len(hidden_units)
102
110
 
103
111
  self.gate_layers = nn.ModuleList()
104
112
  self.mlp_layers = nn.ModuleList()
@@ -108,7 +116,7 @@ class PPNetBlock(nn.Module):
108
116
  dense_layers: list[nn.Module] = [
109
117
  nn.Linear(layer_units[idx], layer_units[idx + 1], bias=use_bias)
110
118
  ]
111
- if batch_norm:
119
+ if norm_type == "batch_norm":
112
120
  dense_layers.append(nn.BatchNorm1d(layer_units[idx + 1]))
113
121
  dense_layers.append(activation_layer(activation_list[idx]))
114
122
  if dropout_list[idx] > 0:
@@ -117,11 +125,11 @@ class PPNetBlock(nn.Module):
117
125
  self.gate_layers.append(
118
126
  GateMLP(
119
127
  input_dim=gate_input_dim,
120
- hidden_dim=gate_hidden_dim,
128
+ hidden_dim=gate_mlp_params["hidden_dim"],
121
129
  output_dim=layer_units[idx],
122
- activation=gate_activation,
123
- dropout=gate_dropout,
124
- use_bn=gate_use_bn,
130
+ activation=gate_mlp_params["activation"],
131
+ dropout=gate_mlp_params["dropout"],
132
+ use_bn=gate_mlp_params["use_bn"],
125
133
  scale_factor=2.0,
126
134
  )
127
135
  )
@@ -130,11 +138,11 @@ class PPNetBlock(nn.Module):
130
138
  self.gate_layers.append(
131
139
  GateMLP(
132
140
  input_dim=gate_input_dim,
133
- hidden_dim=gate_hidden_dim,
141
+ hidden_dim=gate_mlp_params["hidden_dim"],
134
142
  output_dim=layer_units[-1],
135
- activation=gate_activation,
136
- dropout=gate_dropout,
137
- use_bn=gate_use_bn,
143
+ activation=gate_mlp_params["activation"],
144
+ dropout=gate_mlp_params["dropout"],
145
+ use_bn=gate_mlp_params["use_bn"],
138
146
  scale_factor=1.0,
139
147
  )
140
148
  )
@@ -177,15 +185,9 @@ class PEPNet(BaseModel):
177
185
  sequence_features: list[SequenceFeature] | None = None,
178
186
  target: list[str] | str | None = None,
179
187
  task: TaskTypeName | list[TaskTypeName] | None = None,
180
- dnn_hidden_units: list[int] | None = None,
181
- dnn_activation: ActivationName = "relu",
182
- dnn_dropout: float | list[float] = 0.0,
183
- dnn_use_bn: bool = False,
184
- feature_gate_hidden_dim: int = 128,
185
- gate_hidden_dim: int | None = None,
186
- gate_activation: ActivationName = "relu",
187
- gate_dropout: float = 0.0,
188
- gate_use_bn: bool = False,
188
+ mlp_params: dict | None = None,
189
+ feature_gate_mlp_params: dict | None = None,
190
+ gate_mlp_params: dict | None = None,
189
191
  domain_features: list[str] | str | None = None,
190
192
  user_features: list[str] | str | None = None,
191
193
  item_features: list[str] | str | None = None,
@@ -195,7 +197,24 @@ class PEPNet(BaseModel):
195
197
  dense_features = dense_features or []
196
198
  sparse_features = sparse_features or []
197
199
  sequence_features = sequence_features or []
198
- dnn_hidden_units = dnn_hidden_units or [256, 128]
200
+ mlp_params = mlp_params or {}
201
+ feature_gate_mlp_params = feature_gate_mlp_params or {}
202
+ gate_mlp_params = gate_mlp_params or {}
203
+
204
+ mlp_params.setdefault("hidden_dims", [256, 128])
205
+ mlp_params.setdefault("activation", "relu")
206
+ mlp_params.setdefault("dropout", 0.0)
207
+ mlp_params.setdefault("norm_type", "none")
208
+
209
+ feature_gate_mlp_params.setdefault("hidden_dim", 128)
210
+ feature_gate_mlp_params.setdefault("activation", "relu")
211
+ feature_gate_mlp_params.setdefault("dropout", 0.0)
212
+ feature_gate_mlp_params.setdefault("use_bn", False)
213
+
214
+ gate_mlp_params.setdefault("hidden_dim", None)
215
+ gate_mlp_params.setdefault("activation", "relu")
216
+ gate_mlp_params.setdefault("dropout", 0.0)
217
+ gate_mlp_params.setdefault("use_bn", False)
199
218
 
200
219
  if target is None:
201
220
  target = []
@@ -203,24 +222,13 @@ class PEPNet(BaseModel):
203
222
  target = [target]
204
223
 
205
224
  self.nums_task = len(target) if target else 1
206
- resolved_task = task
207
- if resolved_task is None:
208
- resolved_task = self.default_task
209
- elif isinstance(resolved_task, str):
210
- resolved_task = [resolved_task] * self.nums_task
211
- elif len(resolved_task) == 1 and self.nums_task > 1:
212
- resolved_task = resolved_task * self.nums_task
213
- elif len(resolved_task) != self.nums_task:
214
- raise ValueError(
215
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
216
- )
217
225
 
218
226
  super().__init__(
219
227
  dense_features=dense_features,
220
228
  sparse_features=sparse_features,
221
229
  sequence_features=sequence_features,
222
230
  target=target,
223
- task=resolved_task,
231
+ task=task,
224
232
  **kwargs,
225
233
  )
226
234
 
@@ -266,30 +274,27 @@ class PEPNet(BaseModel):
266
274
  )
267
275
  task_dim = domain_dim + user_dim + item_dim
268
276
 
269
- self.feature_gate = GateMLP(
277
+ # EPNet: shared feature-level gate (paper's EPNet).
278
+ self.epnet = GateMLP(
270
279
  input_dim=input_dim + domain_dim,
271
- hidden_dim=feature_gate_hidden_dim,
280
+ hidden_dim=feature_gate_mlp_params["hidden_dim"],
272
281
  output_dim=input_dim,
273
- activation=gate_activation,
274
- dropout=gate_dropout,
275
- use_bn=gate_use_bn,
282
+ activation=feature_gate_mlp_params["activation"],
283
+ dropout=feature_gate_mlp_params["dropout"],
284
+ use_bn=feature_gate_mlp_params["use_bn"],
285
+ scale_factor=2.0,
276
286
  )
277
287
 
278
- self.ppn_blocks = nn.ModuleList(
288
+ # PPNet: per-task gated towers (paper's PPNet).
289
+ self.ppnet_blocks = nn.ModuleList(
279
290
  [
280
- PPNetBlock(
291
+ PPNet(
281
292
  input_dim=input_dim,
282
293
  output_dim=1,
283
294
  gate_input_dim=input_dim + task_dim,
284
- gate_hidden_dim=gate_hidden_dim,
285
- hidden_units=dnn_hidden_units,
286
- hidden_activations=dnn_activation,
287
- dropout_rates=dnn_dropout,
288
- batch_norm=dnn_use_bn,
295
+ mlp_params=mlp_params,
296
+ gate_mlp_params=gate_mlp_params,
289
297
  use_bias=use_bias,
290
- gate_activation=gate_activation,
291
- gate_dropout=gate_dropout,
292
- gate_use_bn=gate_use_bn,
293
298
  )
294
299
  for _ in range(self.nums_task)
295
300
  ]
@@ -298,9 +303,9 @@ class PEPNet(BaseModel):
298
303
  self.prediction_layer = TaskHead(
299
304
  task_type=self.task, task_dims=[1] * self.nums_task
300
305
  )
301
- self.grad_norm_shared_modules = ["embedding", "feature_gate"]
306
+ self.grad_norm_shared_modules = ["embedding", "epnet"]
302
307
  self.register_regularization_weights(
303
- embedding_attr="embedding", include_modules=["feature_gate", "ppn_blocks"]
308
+ embedding_attr="embedding", include_modules=["epnet", "ppnet_blocks"]
304
309
  )
305
310
 
306
311
  def forward(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
@@ -325,11 +330,11 @@ class PEPNet(BaseModel):
325
330
  task_sf_emb = torch.cat(task_parts, dim=-1)
326
331
 
327
332
  gate_input = torch.cat([dnn_input.detach(), domain_emb], dim=-1)
328
- dnn_input = self.feature_gate(gate_input) * dnn_input
333
+ dnn_input = self.epnet(gate_input) * dnn_input
329
334
 
330
335
  task_logits = []
331
- for block in self.ppn_blocks:
332
- task_logits.append(block(o_ep=dnn_input, o_prior=task_sf_emb))
336
+ for block in self.ppnet_blocks:
337
+ task_logits.append(block(o_ep=dnn_input, o_prior=task_sf_emb))
333
338
 
334
339
  y = torch.cat(task_logits, dim=1)
335
340
  return self.prediction_layer(y)
@@ -3,9 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Tang H, Liu J, Zhao M, et al. Progressive layered extraction (PLE): A novel
7
- multi-task learning (MTL) model for personalized recommendations[C]//RecSys. 2020: 269-278.
8
- (https://dl.acm.org/doi/10.1145/3383313.3412236)
6
+ - [1] Tang H, Liu J, Zhao M, Gong X. Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations. In: Proceedings of the 14th ACM Conference on Recommender Systems (RecSys ’20), 2020, pp. 269–278.
7
+ URL: https://dl.acm.org/doi/10.1145/3383313.3412236
9
8
 
10
9
  Progressive Layered Extraction (PLE) advances multi-task learning by stacking CGC
11
10
  (Customized Gate Control) blocks that mix shared and task-specific experts. Each
@@ -67,18 +66,21 @@ class CGCLayer(nn.Module):
67
66
  nums_task: int,
68
67
  num_shared_experts: int,
69
68
  num_specific_experts: int,
70
- shared_expert_params: dict,
71
- specific_expert_params: dict | list[dict],
69
+ shared_expert_mlp_params: dict,
70
+ specific_expert_mlp_params: list[dict],
72
71
  ):
73
72
  super().__init__()
74
73
  if nums_task < 1:
75
74
  raise ValueError("nums_task must be >= 1")
76
75
 
77
- specific_params_list = self.normalize_specific_params(
78
- specific_expert_params, nums_task
79
- )
76
+ if len(specific_expert_mlp_params) != nums_task:
77
+ raise ValueError(
78
+ "Length of specific_expert_mlp_params "
79
+ f"({len(specific_expert_mlp_params)}) must match number of tasks ({nums_task})."
80
+ )
81
+ specific_params_list = [params.copy() for params in specific_expert_mlp_params]
80
82
 
81
- self.output_dim = get_mlp_output_dim(shared_expert_params, input_dim)
83
+ self.output_dim = get_mlp_output_dim(shared_expert_mlp_params, input_dim)
82
84
  specific_dims = [
83
85
  get_mlp_output_dim(params, input_dim) for params in specific_params_list
84
86
  ]
@@ -94,7 +96,7 @@ class CGCLayer(nn.Module):
94
96
  MLP(
95
97
  input_dim=input_dim,
96
98
  output_dim=None,
97
- **shared_expert_params,
99
+ **shared_expert_mlp_params,
98
100
  )
99
101
  for _ in range(num_shared_experts)
100
102
  ]
@@ -166,18 +168,6 @@ class CGCLayer(nn.Module):
166
168
 
167
169
  return new_task_fea, new_shared
168
170
 
169
- @staticmethod
170
- def normalize_specific_params(
171
- params: dict | list[dict], nums_task: int
172
- ) -> list[dict]:
173
- if isinstance(params, list):
174
- if len(params) != nums_task:
175
- raise ValueError(
176
- f"Length of specific_expert_params ({len(params)}) must match nums_task ({nums_task})."
177
- )
178
- return [p.copy() for p in params]
179
- return [params.copy() for _ in range(nums_task)]
180
-
181
171
 
182
172
  class PLE(BaseModel):
183
173
  """
@@ -205,12 +195,12 @@ class PLE(BaseModel):
205
195
  dense_features: list[DenseFeature] | None = None,
206
196
  sparse_features: list[SparseFeature] | None = None,
207
197
  sequence_features: list[SequenceFeature] | None = None,
208
- shared_expert_params: dict | None = None,
209
- specific_expert_params: dict | list[dict] | None = None,
198
+ shared_expert_mlp_params: dict | None = None,
199
+ specific_expert_mlp_params: list[dict] | None = None,
210
200
  num_shared_experts: int = 2,
211
201
  num_specific_experts: int = 2,
212
202
  num_levels: int = 2,
213
- tower_params_list: list[dict] | None = None,
203
+ tower_mlp_params_list: list[dict] | None = None,
214
204
  target: list[str] | None = None,
215
205
  task: str | list[str] | None = None,
216
206
  **kwargs,
@@ -218,24 +208,19 @@ class PLE(BaseModel):
218
208
 
219
209
  self.nums_task = len(target) if target is not None else 1
220
210
 
221
- resolved_task = task
222
- if resolved_task is None:
223
- resolved_task = self.default_task
224
- elif isinstance(resolved_task, str):
225
- resolved_task = [resolved_task] * self.nums_task
226
- elif len(resolved_task) == 1 and self.nums_task > 1:
227
- resolved_task = resolved_task * self.nums_task
228
- elif len(resolved_task) != self.nums_task:
211
+ shared_expert_mlp_params = shared_expert_mlp_params or {}
212
+ if specific_expert_mlp_params is None:
229
213
  raise ValueError(
230
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
214
+ "specific_expert_mlp_params must be a list of dicts, one per task."
231
215
  )
216
+ tower_mlp_params_list = tower_mlp_params_list or []
232
217
 
233
218
  super(PLE, self).__init__(
234
219
  dense_features=dense_features,
235
220
  sparse_features=sparse_features,
236
221
  sequence_features=sequence_features,
237
222
  target=target,
238
- task=resolved_task,
223
+ task=task,
239
224
  **kwargs,
240
225
  )
241
226
 
@@ -245,9 +230,10 @@ class PLE(BaseModel):
245
230
  self.num_specific_experts = num_specific_experts
246
231
  self.num_levels = num_levels
247
232
 
248
- if len(tower_params_list) != self.nums_task:
233
+ if len(tower_mlp_params_list) != self.nums_task:
249
234
  raise ValueError(
250
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
235
+ "Number of tower mlp params "
236
+ f"({len(tower_mlp_params_list)}) must match number of tasks ({self.nums_task})"
251
237
  )
252
238
  # Embedding layer
253
239
  self.embedding = EmbeddingLayer(features=self.all_features)
@@ -260,10 +246,10 @@ class PLE(BaseModel):
260
246
 
261
247
  # Get expert output dimension
262
248
  if (
263
- "hidden_dims" in shared_expert_params
264
- and len(shared_expert_params["hidden_dims"]) > 0
249
+ "hidden_dims" in shared_expert_mlp_params
250
+ and len(shared_expert_mlp_params["hidden_dims"]) > 0
265
251
  ):
266
- expert_output_dim = shared_expert_params["hidden_dims"][-1]
252
+ expert_output_dim = shared_expert_mlp_params["hidden_dims"][-1]
267
253
  else:
268
254
  expert_output_dim = input_dim
269
255
 
@@ -276,8 +262,8 @@ class PLE(BaseModel):
276
262
  nums_task=self.nums_task,
277
263
  num_shared_experts=num_shared_experts,
278
264
  num_specific_experts=num_specific_experts,
279
- shared_expert_params=shared_expert_params,
280
- specific_expert_params=specific_expert_params,
265
+ shared_expert_mlp_params=shared_expert_mlp_params,
266
+ specific_expert_mlp_params=specific_expert_mlp_params,
281
267
  )
282
268
  self.cgc_layers.append(cgc_layer)
283
269
  expert_output_dim = cgc_layer.output_dim
@@ -285,8 +271,8 @@ class PLE(BaseModel):
285
271
 
286
272
  # Task-specific towers
287
273
  self.towers = nn.ModuleList()
288
- for tower_params in tower_params_list:
289
- tower = MLP(input_dim=expert_output_dim, output_dim=1, **tower_params)
274
+ for tower_mlp_params in tower_mlp_params_list:
275
+ tower = MLP(input_dim=expert_output_dim, output_dim=1, **tower_mlp_params)
290
276
  self.towers.append(tower)
291
277
  self.prediction_layer = TaskHead(
292
278
  task_type=self.task, task_dims=[1] * self.nums_task
@@ -3,7 +3,8 @@ Date: create on 28/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Wang et al. "POSO: Personalized Cold Start Modules for Large-scale Recommender Systems", 2021.
6
+ - [1] Dai S, Lin H, Zhao Z, Lin J, Wu H, Wang Z, Yang S, Liu J. POSO: Personalized Cold Start Modules for Large-scale Recommender Systems. arXiv preprint arXiv:2108.04690, 2021.
7
+ URL: https://arxiv.org/abs/2108.04690
7
8
 
8
9
  POSO (Personalized cOld-start mOdules) augments backbone recommenders by injecting a
9
10
  personalized cold-start vector `pc` that gates hidden units layer by layer. Each fully
@@ -49,6 +50,7 @@ from nextrec.basic.layers import MLP, EmbeddingLayer
49
50
  from nextrec.basic.heads import TaskHead
50
51
  from nextrec.basic.model import BaseModel
51
52
  from nextrec.utils.model import select_features
53
+ from nextrec.utils.types import TaskTypeName
52
54
 
53
55
 
54
56
  class POSOGate(nn.Module):
@@ -306,9 +308,9 @@ class POSO(BaseModel):
306
308
  pc_dense_features: list[str] | None,
307
309
  pc_sparse_features: list[str] | None,
308
310
  pc_sequence_features: list[str] | None,
309
- tower_params_list: list[dict],
311
+ tower_mlp_params_list: list[dict],
310
312
  target: list[str] | None = None,
311
- task: str | list[str] = "binary",
313
+ task: TaskTypeName | list[TaskTypeName] | None = None,
312
314
  architecture: Literal["mlp", "mmoe"] = "mlp",
313
315
  # POSO gating defaults
314
316
  gate_hidden_dim: int = 32,
@@ -327,22 +329,10 @@ class POSO(BaseModel):
327
329
  ):
328
330
  self.nums_task = len(target)
329
331
 
330
- # Normalize task to match nums_task
331
- resolved_task = task
332
- if resolved_task is None:
333
- resolved_task = self.default_task
334
- elif isinstance(resolved_task, str):
335
- resolved_task = [resolved_task] * self.nums_task
336
- elif len(resolved_task) == 1 and self.nums_task > 1:
337
- resolved_task = resolved_task * self.nums_task
338
- elif len(resolved_task) != self.nums_task:
332
+ if len(tower_mlp_params_list) != self.nums_task:
339
333
  raise ValueError(
340
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
341
- )
342
-
343
- if len(tower_params_list) != self.nums_task:
344
- raise ValueError(
345
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
334
+ "Number of tower mlp params "
335
+ f"({len(tower_mlp_params_list)}) must match number of tasks ({self.nums_task})"
346
336
  )
347
337
 
348
338
  super().__init__(
@@ -350,7 +340,7 @@ class POSO(BaseModel):
350
340
  sparse_features=sparse_features,
351
341
  sequence_features=sequence_features,
352
342
  target=target,
353
- task=resolved_task,
343
+ task=task,
354
344
  **kwargs,
355
345
  )
356
346
 
@@ -415,11 +405,12 @@ class POSO(BaseModel):
415
405
  if self.architecture == "mlp":
416
406
  self.towers = nn.ModuleList()
417
407
  self.tower_heads = nn.ModuleList()
418
- for tower_params in tower_params_list:
408
+ for tower_params in tower_mlp_params_list:
419
409
  hidden_dims = tower_params.get("hidden_dims")
420
410
  if not hidden_dims:
421
411
  raise ValueError(
422
- "tower_params must include a non-empty 'hidden_dims' list for POSO-MLP towers."
412
+ "tower_mlp_params_list must include a non-empty 'hidden_dims' "
413
+ "list for POSO-MLP towers."
423
414
  )
424
415
  dropout = tower_params.get("dropout", 0.0)
425
416
  tower = POSOMLP(
@@ -463,7 +454,7 @@ class POSO(BaseModel):
463
454
  output_dim=1,
464
455
  **tower_params,
465
456
  )
466
- for tower_params in tower_params_list
457
+ for tower_params in tower_mlp_params_list
467
458
  ]
468
459
  )
469
460
  self.tower_heads = None
@@ -2,9 +2,6 @@
2
2
  Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
- Reference:
6
- [1] Caruana R. Multitask learning[J]. Machine Learning, 1997, 28: 41-75.
7
- (https://link.springer.com/article/10.1023/A:1007379606734)
8
5
 
9
6
  Shared-Bottom is the classic hard-parameter-sharing baseline for multi-task learning.
10
7
  All tasks share a common bottom network to learn general representations, and each
@@ -65,8 +62,8 @@ class ShareBottom(BaseModel):
65
62
  dense_features: list[DenseFeature],
66
63
  sparse_features: list[SparseFeature],
67
64
  sequence_features: list[SequenceFeature],
68
- bottom_params: dict,
69
- tower_params_list: list[dict],
65
+ bottom_mlp_params: dict,
66
+ tower_mlp_params_list: list[dict],
70
67
  target: list[str],
71
68
  task: str | list[str] | None = None,
72
69
  **kwargs,
@@ -74,32 +71,21 @@ class ShareBottom(BaseModel):
74
71
 
75
72
  self.nums_task = len(target)
76
73
 
77
- resolved_task = task
78
- if resolved_task is None:
79
- resolved_task = self.default_task
80
- elif isinstance(resolved_task, str):
81
- resolved_task = [resolved_task] * self.nums_task
82
- elif len(resolved_task) == 1 and self.nums_task > 1:
83
- resolved_task = resolved_task * self.nums_task
84
- elif len(resolved_task) != self.nums_task:
85
- raise ValueError(
86
- f"Length of task ({len(resolved_task)}) must match number of targets ({self.nums_task})."
87
- )
88
-
89
74
  super(ShareBottom, self).__init__(
90
75
  dense_features=dense_features,
91
76
  sparse_features=sparse_features,
92
77
  sequence_features=sequence_features,
93
78
  target=target,
94
- task=resolved_task,
79
+ task=task,
95
80
  **kwargs,
96
81
  )
97
82
 
98
83
  # Number of tasks
99
84
  self.nums_task = len(target)
100
- if len(tower_params_list) != self.nums_task:
85
+ if len(tower_mlp_params_list) != self.nums_task:
101
86
  raise ValueError(
102
- f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
87
+ "Number of tower mlp params "
88
+ f"({len(tower_mlp_params_list)}) must match number of tasks ({self.nums_task})"
103
89
  )
104
90
  # Embedding layer
105
91
  self.embedding = EmbeddingLayer(features=self.all_features)
@@ -110,19 +96,22 @@ class ShareBottom(BaseModel):
110
96
  # input_dim = emb_dim_total + dense_input_dim
111
97
 
112
98
  # Shared bottom network
113
- self.bottom = MLP(input_dim=input_dim, output_dim=None, **bottom_params)
99
+ self.bottom = MLP(input_dim=input_dim, output_dim=None, **bottom_mlp_params)
114
100
  self.grad_norm_shared_modules = ["embedding", "bottom"]
115
101
 
116
102
  # Get bottom output dimension
117
- if "hidden_dims" in bottom_params and len(bottom_params["hidden_dims"]) > 0:
118
- bottom_output_dim = bottom_params["hidden_dims"][-1]
103
+ if (
104
+ "hidden_dims" in bottom_mlp_params
105
+ and len(bottom_mlp_params["hidden_dims"]) > 0
106
+ ):
107
+ bottom_output_dim = bottom_mlp_params["hidden_dims"][-1]
119
108
  else:
120
109
  bottom_output_dim = input_dim
121
110
 
122
111
  # Task-specific towers
123
112
  self.towers = nn.ModuleList()
124
- for tower_params in tower_params_list:
125
- tower = MLP(input_dim=bottom_output_dim, output_dim=1, **tower_params)
113
+ for tower_mlp_params in tower_mlp_params_list:
114
+ tower = MLP(input_dim=bottom_output_dim, output_dim=1, **tower_mlp_params)
126
115
  self.towers.append(tower)
127
116
  self.prediction_layer = TaskHead(
128
117
  task_type=self.task, task_dims=[1] * self.nums_task
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of
7
- feature interactions via attention networks[C]//IJCAI. 2017: 3119-3125.
6
+ - [1] Xiao J, Ye H, He X, et al. Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks
7
+ URL: https://arxiv.org/abs/1708.04617
8
8
 
9
9
  Attentional Factorization Machine (AFM) builds on FM by learning an importance
10
10
  weight for every second-order interaction instead of treating all pairs equally.
@@ -3,10 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
- self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
8
- on information and knowledge management. 2019: 1161-1170.
9
- (https://arxiv.org/abs/1810.11921)
6
+ - [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic feature interaction learning via self-attentive neural networks. In: Proceedings of the 28th ACM International Conference on Information and Knowledge Management (CIKM ’19), 2019, pp. 1161–1170.
7
+ URL: https://arxiv.org/abs/1810.11921
10
8
 
11
9
  AutoInt is a CTR prediction model that leverages multi-head self-attention
12
10
  to automatically learn high-order feature interactions in an explicit and
@@ -3,9 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]
7
- //Proceedings of the ADKDD'17. 2017: 1-7.
8
- (https://arxiv.org/abs/1708.05123)
6
+ - [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C] //Proceedings of the ADKDD'17. 2017: 1-7.
7
+ URL: https://arxiv.org/abs/1708.05123
9
8
 
10
9
  Deep & Cross Network (DCN) mixes explicit polynomial feature crosses with a deep
11
10
  MLP branch to capture both low-order and high-order interactions for CTR-style
@@ -3,9 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] R. Wang et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for
7
- Web-scale Learning to Rank Systems. KDD 2021.
8
- (https://arxiv.org/abs/2008.13535)
6
+ - [1] R. Wang et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems. KDD 2021.
7
+ URL: https://arxiv.org/abs/2008.13535
9
8
 
10
9
  DCN v2 enhances the original Deep & Cross Network by replacing the scalar cross
11
10
  weights with vector-wise (matrix) parameters and a Mixture-of-Low-Rank-Experts