nextrec 0.4.25__py3-none-any.whl → 0.4.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. nextrec/__version__.py +1 -1
  2. nextrec/basic/asserts.py +72 -0
  3. nextrec/basic/loggers.py +18 -1
  4. nextrec/basic/model.py +54 -51
  5. nextrec/data/batch_utils.py +23 -3
  6. nextrec/data/dataloader.py +3 -8
  7. nextrec/models/multi_task/[pre]aitm.py +173 -0
  8. nextrec/models/multi_task/[pre]snr_trans.py +232 -0
  9. nextrec/models/multi_task/[pre]star.py +192 -0
  10. nextrec/models/multi_task/apg.py +330 -0
  11. nextrec/models/multi_task/cross_stitch.py +229 -0
  12. nextrec/models/multi_task/escm.py +290 -0
  13. nextrec/models/multi_task/esmm.py +8 -21
  14. nextrec/models/multi_task/hmoe.py +203 -0
  15. nextrec/models/multi_task/mmoe.py +20 -28
  16. nextrec/models/multi_task/pepnet.py +81 -76
  17. nextrec/models/multi_task/ple.py +30 -44
  18. nextrec/models/multi_task/poso.py +13 -22
  19. nextrec/models/multi_task/share_bottom.py +14 -25
  20. nextrec/models/ranking/afm.py +2 -2
  21. nextrec/models/ranking/autoint.py +2 -4
  22. nextrec/models/ranking/dcn.py +2 -3
  23. nextrec/models/ranking/dcn_v2.py +2 -3
  24. nextrec/models/ranking/deepfm.py +2 -3
  25. nextrec/models/ranking/dien.py +7 -9
  26. nextrec/models/ranking/din.py +8 -10
  27. nextrec/models/ranking/eulernet.py +1 -2
  28. nextrec/models/ranking/ffm.py +1 -2
  29. nextrec/models/ranking/fibinet.py +2 -3
  30. nextrec/models/ranking/fm.py +1 -1
  31. nextrec/models/ranking/lr.py +1 -1
  32. nextrec/models/ranking/masknet.py +1 -2
  33. nextrec/models/ranking/pnn.py +1 -2
  34. nextrec/models/ranking/widedeep.py +2 -3
  35. nextrec/models/ranking/xdeepfm.py +2 -4
  36. nextrec/models/representation/rqvae.py +4 -4
  37. nextrec/models/retrieval/dssm.py +18 -26
  38. nextrec/models/retrieval/dssm_v2.py +15 -22
  39. nextrec/models/retrieval/mind.py +9 -15
  40. nextrec/models/retrieval/sdm.py +36 -33
  41. nextrec/models/retrieval/youtube_dnn.py +16 -24
  42. nextrec/models/sequential/hstu.py +2 -2
  43. nextrec/utils/__init__.py +5 -1
  44. nextrec/utils/model.py +9 -14
  45. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/METADATA +72 -62
  46. nextrec-0.4.28.dist-info/RECORD +90 -0
  47. nextrec/models/multi_task/aitm.py +0 -0
  48. nextrec/models/multi_task/snr_trans.py +0 -0
  49. nextrec-0.4.25.dist-info/RECORD +0 -86
  50. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/WHEEL +0 -0
  51. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/entry_points.txt +0 -0
  52. {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/licenses/LICENSE +0 -0
@@ -3,9 +3,8 @@ Date: create on 27/10/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Guo H, Tang R, Ye Y, et al. DeepFM: A factorization-machine based neural network
7
- for CTR prediction[J]. arXiv preprint arXiv:1703.04247, 2017.
8
- (https://arxiv.org/abs/1703.04247)
6
+ - [1] Guo H, Tang R, Ye Y, et al. DeepFM: A factorization-machine based neural network for CTR prediction[J]. arXiv preprint arXiv:1703.04247, 2017.
7
+ URL: https://arxiv.org/abs/1703.04247
9
8
 
10
9
  DeepFM combines a Factorization Machine (FM) for explicit second-order feature
11
10
  interactions with a deep MLP for high-order nonlinear patterns. Both parts share
@@ -3,9 +3,7 @@ Date: create on 09/11/2025
3
3
  Author: Yang Zhou, zyaztec@gmail.com
4
4
  Checkpoint: edit on 09/12/2025
5
5
  Reference:
6
- [1] Zhou G, Mou N, Fan Y, et al. Deep interest evolution network for click-through
7
- rate prediction[C] // Proceedings of the AAAI conference on artificial intelligence.
8
- 2019, 33(01): 5941-5948. (https://arxiv.org/abs/1809.03672)
6
+ - [1] Zhou G, Mou N, Fan Y, et al. Deep interest evolution network for click-through rate prediction[C] // Proceedings of the AAAI conference on artificial intelligence. 2019, 33(01): 5941-5948. (https://arxiv.org/abs/1809.03672)
9
7
 
10
8
  DIEN is a CTR prediction model that explicitly models how user interests evolve
11
9
  over time. It introduces a two-stage pipeline:
@@ -58,7 +56,6 @@ from nextrec.basic.layers import (
58
56
  )
59
57
  from nextrec.basic.heads import TaskHead
60
58
  from nextrec.basic.model import BaseModel
61
- from nextrec.utils.types import ActivationName
62
59
 
63
60
 
64
61
  class AUGRU(nn.Module):
@@ -211,8 +208,7 @@ class DIEN(BaseModel):
211
208
  neg_behavior_feature_name: str | None = None,
212
209
  mlp_params: dict | None = None,
213
210
  gru_hidden_size: int = 64,
214
- attention_hidden_units: list[int] | None = None,
215
- attention_activation: ActivationName = "sigmoid",
211
+ attention_mlp_params: dict | None = None,
216
212
  use_negsampling: bool = False,
217
213
  aux_loss_weight: float = 1.0,
218
214
  **kwargs,
@@ -222,7 +218,9 @@ class DIEN(BaseModel):
222
218
  sparse_features = sparse_features or []
223
219
  sequence_features = sequence_features or []
224
220
  mlp_params = mlp_params or {}
225
- attention_hidden_units = attention_hidden_units or [80, 40]
221
+ attention_mlp_params = attention_mlp_params or {}
222
+ attention_mlp_params.setdefault("hidden_dims", [80, 40])
223
+ attention_mlp_params.setdefault("activation", "sigmoid")
226
224
 
227
225
  super(DIEN, self).__init__(
228
226
  dense_features=dense_features,
@@ -285,8 +283,8 @@ class DIEN(BaseModel):
285
283
 
286
284
  self.attention_layer = AttentionPoolingLayer(
287
285
  embedding_dim=gru_hidden_size,
288
- hidden_units=attention_hidden_units,
289
- activation=attention_activation,
286
+ hidden_units=attention_mlp_params["hidden_dims"],
287
+ activation=attention_mlp_params["activation"],
290
288
  use_softmax=False,
291
289
  )
292
290
 
@@ -3,10 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 09/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate
7
- prediction[C] //Proceedings of the 24th ACM SIGKDD international conference on
8
- knowledge discovery & data mining. 2018: 1059-1068.
9
- (https://arxiv.org/abs/1706.06978)
6
+ - [1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C] //Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining. 2018: 1059-1068.
7
+ URL: https://arxiv.org/abs/1706.06978
10
8
 
11
9
  Deep Interest Network (DIN) is a CTR model that builds a target-aware user
12
10
  representation by attending over the historical behavior sequence. Instead of
@@ -58,7 +56,6 @@ from nextrec.basic.layers import (
58
56
  )
59
57
  from nextrec.basic.heads import TaskHead
60
58
  from nextrec.basic.model import BaseModel
61
- from nextrec.utils.types import ActivationName
62
59
 
63
60
 
64
61
  class DIN(BaseModel):
@@ -78,8 +75,7 @@ class DIN(BaseModel):
78
75
  behavior_feature_name: str | None = None,
79
76
  candidate_feature_name: str | None = None,
80
77
  mlp_params: dict | None = None,
81
- attention_hidden_units: list[int] | None = None,
82
- attention_activation: ActivationName = "dice",
78
+ attention_mlp_params: dict | None = None,
83
79
  attention_use_softmax: bool = True,
84
80
  **kwargs,
85
81
  ):
@@ -88,7 +84,9 @@ class DIN(BaseModel):
88
84
  sparse_features = sparse_features or []
89
85
  sequence_features = sequence_features or []
90
86
  mlp_params = mlp_params or {}
91
- attention_hidden_units = attention_hidden_units or [80, 40]
87
+ attention_mlp_params = attention_mlp_params or {}
88
+ attention_mlp_params.setdefault("hidden_dims", [80, 40])
89
+ attention_mlp_params.setdefault("activation", "dice")
92
90
 
93
91
  super(DIN, self).__init__(
94
92
  dense_features=dense_features,
@@ -135,8 +133,8 @@ class DIN(BaseModel):
135
133
  )
136
134
  self.attention = AttentionPoolingLayer(
137
135
  embedding_dim=behavior_emb_dim,
138
- hidden_units=attention_hidden_units,
139
- activation=attention_activation,
136
+ hidden_units=attention_mlp_params["hidden_dims"],
137
+ activation=attention_mlp_params["activation"],
140
138
  use_softmax=attention_use_softmax,
141
139
  )
142
140
 
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Zhao Z, Zhang H, Tang H, et al. EulerNet: Efficient and Effective Feature
7
- Interaction Modeling with Euler's Formula. (SIGIR 2021)
6
+ - [1] Zhao Z, Zhang H, Tang H, et al. EulerNet: Efficient and Effective Feature Interaction Modeling with Euler's Formula. (SIGIR 2021)
8
7
 
9
8
  EulerNet models feature interactions in the complex domain using Euler's
10
9
  formula. Each field embedding is transformed into amplitude and phase,
@@ -3,8 +3,7 @@ Date: create on 19/12/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Juan Y, Zhuang Y, Chin W-S, et al. Field-aware Factorization Machines for CTR
7
- Prediction[C]//RecSys. 2016: 43-50.
6
+ - [1] Juan Y, Zhuang Y, Chin W-S, et al. Field-aware Factorization Machines for CTR Prediction[C]//RecSys. 2016: 43-50.
8
7
 
9
8
  Field-aware Factorization Machines (FFM) extend FM by learning a distinct
10
9
  embedding of each feature for every target field. For a pair of fields (i, j),
@@ -3,9 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 09/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Huang T, Zhang Z, Zhang B, et al. FiBiNET: Combining feature importance and bilinear
7
- feature interaction for click-through rate prediction[C]//RecSys. 2019: 169-177.
8
- (https://arxiv.org/abs/1905.09433)
6
+ - [1] Huang T, Zhang Z, Zhang B, et al. FiBiNET: Combining feature importance and bilinear feature interaction for click-through rate prediction[C]//RecSys. 2019: 169-177.
7
+ URL: https://arxiv.org/abs/1905.09433
9
8
 
10
9
  FiBiNET (Feature Importance and Bilinear Interaction Network) is a CTR model that
11
10
  jointly learns which fields matter most and how they interact. It first uses SENET
@@ -3,7 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Rendle S. Factorization machines[C]//ICDM. 2010: 995-1000.
6
+ - [1] Rendle S. Factorization machines[C]//ICDM. 2010: 995-1000.
7
7
 
8
8
  Factorization Machines (FM) capture second-order feature interactions with
9
9
  linear complexity by factorizing the pairwise interaction matrix. Each field
@@ -3,7 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Hosmer D W, Lemeshow S, Sturdivant R X. Applied Logistic Regression.
6
+ - [1] Hosmer D W, Lemeshow S, Sturdivant R X. Applied Logistic Regression.
7
7
 
8
8
  Logistic Regression (LR) is a classic linear baseline for CTR/ranking tasks.
9
9
  It maps each feature (dense, sparse, or sequence) into a numeric vector and
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Wang Z, She Q, Zhang J. MaskNet: Introducing Feature-Wise
7
- Multiplication to CTR Ranking Models by Instance-Guided Mask.
6
+ - [1] Wang Z, She Q, Zhang J. MaskNet: Introducing Feature-Wise Multiplication to CTR Ranking Models by Instance-Guided Mask.
8
7
 
9
8
  MaskNet is a CTR prediction model that introduces instance-guided,
10
9
  feature-wise multiplicative interactions into deep ranking networks.
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response
7
- prediction[C]//ICDM. 2016: 1149-1154. (https://arxiv.org/abs/1611.00144)
6
+ - [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//ICDM. 2016: 1149-1154. (https://arxiv.org/abs/1611.00144)
8
7
 
9
8
  Product-based Neural Networks (PNN) are CTR prediction models that explicitly
10
9
  encode feature interactions by combining:
@@ -3,9 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Cheng H T, Koc L, Harmsen J, et al. Wide & Deep learning for recommender systems[C]
7
- //Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. 2016: 7-10.
8
- (https://arxiv.org/abs/1606.07792)
6
+ - [1] Cheng H T, Koc L, Harmsen J, et al. Wide & Deep learning for recommender systems[C] //Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. 2016: 7-10.
7
+ URL: https://arxiv.org/abs/1606.07792
9
8
 
10
9
  Wide & Deep blends a linear wide component (memorization of cross features) with a
11
10
  deep neural network (generalization) sharing the same feature space. The wide part
@@ -3,10 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 23/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
- for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
8
- knowledge discovery & data mining. 2018: 1754-1763.
9
- (https://arxiv.org/abs/1803.05170)
6
+ - [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining. 2018: 1754-1763.
7
+ URL: https://arxiv.org/abs/1803.05170
10
8
 
11
9
  xDeepFM is a CTR prediction model that unifies explicit and implicit
12
10
  feature interaction learning. It extends DeepFM by adding the
@@ -4,11 +4,11 @@ Residual Quantized Variational AutoEncoder (RQ-VAE) for Generative Recommendatio
4
4
  Date: created on 11/12/2025
5
5
  Checkpoint: edit on 13/12/2025
6
6
  Author: Yang Zhou, zyaztec@gmail.com
7
- Source code reference:
8
- [1] Tencent-Advertising-Algorithm-Competition-2025-Baseline
7
+ Source Code Reference:
8
+ - [1] Tencent-Advertising-Algorithm-Competition-2025-Baseline
9
9
  Reference:
10
- [1] Lee et al. Autoregressive Image Generation using Residual Quantization. CVPR 2022.
11
- [2] Zeghidour et al. SoundStream: An End-to-End Neural Audio Codec. IEEE/ACM TASLP 2021.
10
+ - [1] Lee et al. Autoregressive Image Generation using Residual Quantization. CVPR 2022.
11
+ - [2] Zeghidour et al. SoundStream: An End-to-End Neural Audio Codec. IEEE/ACM TASLP 2021.
12
12
 
13
13
  RQ-VAE learns hierarchical discrete representations via residual quantization.
14
14
  It encodes continuous embeddings (e.g., item/user embeddings) into multi-level
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 18/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Huang P S, He X, Gao J, et al. Learning deep structured semantic models for web search using clickthrough data[C]
7
- //Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013: 2333-2338.
6
+ - [1] Huang P S, He X, Gao J, et al. Learning deep structured semantic models for web search using clickthrough data[C] //Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013: 2333-2338.
8
7
  """
9
8
 
10
9
  from typing import Literal
@@ -40,11 +39,9 @@ class DSSM(BaseMatchModel):
40
39
  item_dense_features: list[DenseFeature] | None = None,
41
40
  item_sparse_features: list[SparseFeature] | None = None,
42
41
  item_sequence_features: list[SequenceFeature] | None = None,
43
- user_dnn_hidden_units: list[int] = [256, 128, 64],
44
- item_dnn_hidden_units: list[int] = [256, 128, 64],
42
+ user_mlp_params: dict | None = None,
43
+ item_mlp_params: dict | None = None,
45
44
  embedding_dim: int = 64,
46
- dnn_activation: str = "relu",
47
- dnn_dropout: float = 0.0,
48
45
  training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
49
46
  num_negative_samples: int = 4,
50
47
  temperature: float = 1.0,
@@ -75,8 +72,17 @@ class DSSM(BaseMatchModel):
75
72
  )
76
73
 
77
74
  self.embedding_dim = embedding_dim
78
- self.user_dnn_hidden_units = user_dnn_hidden_units
79
- self.item_dnn_hidden_units = item_dnn_hidden_units
75
+ user_mlp_params = user_mlp_params or {}
76
+ item_mlp_params = item_mlp_params or {}
77
+
78
+ user_mlp_params.setdefault("hidden_dims", [256, 128, 64])
79
+ item_mlp_params.setdefault("hidden_dims", [256, 128, 64])
80
+ user_mlp_params.setdefault("activation", "relu")
81
+ user_mlp_params.setdefault("dropout", 0.0)
82
+ item_mlp_params.setdefault("activation", "relu")
83
+ item_mlp_params.setdefault("dropout", 0.0)
84
+ user_mlp_params.setdefault("output_dim", embedding_dim)
85
+ item_mlp_params.setdefault("output_dim", embedding_dim)
80
86
 
81
87
  # User tower embedding layer
82
88
  user_features = []
@@ -99,15 +105,8 @@ class DSSM(BaseMatchModel):
99
105
  for feat in user_sequence_features or []:
100
106
  user_input_dim += feat.embedding_dim
101
107
 
102
- # User DNN
103
- user_dnn_units = user_dnn_hidden_units + [embedding_dim]
104
- self.user_dnn = MLP(
105
- input_dim=user_input_dim,
106
- hidden_dims=user_dnn_units,
107
- output_dim=None,
108
- dropout=dnn_dropout,
109
- activation=dnn_activation,
110
- )
108
+ # User MLP
109
+ self.user_dnn = MLP(input_dim=user_input_dim, **user_mlp_params)
111
110
 
112
111
  # Item tower embedding layer
113
112
  item_features = []
@@ -130,15 +129,8 @@ class DSSM(BaseMatchModel):
130
129
  for feat in item_sequence_features or []:
131
130
  item_input_dim += feat.embedding_dim
132
131
 
133
- # Item DNN
134
- item_dnn_units = item_dnn_hidden_units + [embedding_dim]
135
- self.item_dnn = MLP(
136
- input_dim=item_input_dim,
137
- hidden_dims=item_dnn_units,
138
- output_dim=None,
139
- dropout=dnn_dropout,
140
- activation=dnn_activation,
141
- )
132
+ # Item MLP
133
+ self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
142
134
 
143
135
  self.register_regularization_weights(
144
136
  embedding_attr="user_embedding", include_modules=["user_dnn"]
@@ -36,11 +36,9 @@ class DSSM_v2(BaseMatchModel):
36
36
  item_dense_features: list[DenseFeature] | None = None,
37
37
  item_sparse_features: list[SparseFeature] | None = None,
38
38
  item_sequence_features: list[SequenceFeature] | None = None,
39
- user_dnn_hidden_units: list[int] = [256, 128, 64],
40
- item_dnn_hidden_units: list[int] = [256, 128, 64],
39
+ user_mlp_params: dict | None = None,
40
+ item_mlp_params: dict | None = None,
41
41
  embedding_dim: int = 64,
42
- dnn_activation: str = "relu",
43
- dnn_dropout: float = 0.0,
44
42
  training_mode: Literal["pointwise", "pairwise", "listwise"] = "pairwise",
45
43
  num_negative_samples: int = 4,
46
44
  temperature: float = 1.0,
@@ -71,8 +69,17 @@ class DSSM_v2(BaseMatchModel):
71
69
  )
72
70
 
73
71
  self.embedding_dim = embedding_dim
74
- self.user_dnn_hidden_units = user_dnn_hidden_units
75
- self.item_dnn_hidden_units = item_dnn_hidden_units
72
+ user_mlp_params = user_mlp_params or {}
73
+ item_mlp_params = item_mlp_params or {}
74
+
75
+ user_mlp_params.setdefault("hidden_dims", [256, 128, 64])
76
+ item_mlp_params.setdefault("hidden_dims", [256, 128, 64])
77
+ user_mlp_params.setdefault("activation", "relu")
78
+ user_mlp_params.setdefault("dropout", 0.0)
79
+ item_mlp_params.setdefault("activation", "relu")
80
+ item_mlp_params.setdefault("dropout", 0.0)
81
+ user_mlp_params.setdefault("output_dim", embedding_dim)
82
+ item_mlp_params.setdefault("output_dim", embedding_dim)
76
83
 
77
84
  # User tower
78
85
  user_features = []
@@ -94,14 +101,7 @@ class DSSM_v2(BaseMatchModel):
94
101
  for feat in user_sequence_features or []:
95
102
  user_input_dim += feat.embedding_dim
96
103
 
97
- user_dnn_units = user_dnn_hidden_units + [embedding_dim]
98
- self.user_dnn = MLP(
99
- input_dim=user_input_dim,
100
- hidden_dims=user_dnn_units,
101
- output_dim=None,
102
- dropout=dnn_dropout,
103
- activation=dnn_activation,
104
- )
104
+ self.user_dnn = MLP(input_dim=user_input_dim, **user_mlp_params)
105
105
 
106
106
  # Item tower
107
107
  item_features = []
@@ -123,14 +123,7 @@ class DSSM_v2(BaseMatchModel):
123
123
  for feat in item_sequence_features or []:
124
124
  item_input_dim += feat.embedding_dim
125
125
 
126
- item_dnn_units = item_dnn_hidden_units + [embedding_dim]
127
- self.item_dnn = MLP(
128
- input_dim=item_input_dim,
129
- hidden_dims=item_dnn_units,
130
- output_dim=None,
131
- dropout=dnn_dropout,
132
- activation=dnn_activation,
133
- )
126
+ self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
134
127
 
135
128
  self.register_regularization_weights(
136
129
  embedding_attr="user_embedding", include_modules=["user_dnn"]
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 18/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Li C, Liu Z, Wu M, et al. Multi-interest network with dynamic routing for recommendation at Tmall[C]
7
- //Proceedings of the 28th ACM international conference on information and knowledge management. 2019: 2615-2623.
6
+ - [1] Li C, Liu Z, Wu M, et al. Multi-interest network with dynamic routing for recommendation at Tmall[C] //Proceedings of the 28th ACM international conference on information and knowledge management. 2019: 2615-2623.
8
7
  """
9
8
 
10
9
  from typing import Literal
@@ -195,9 +194,7 @@ class MIND(BaseMatchModel):
195
194
  capsule_bilinear_type: int = 2,
196
195
  routing_times: int = 3,
197
196
  relu_layer: bool = False,
198
- item_dnn_hidden_units: list[int] = [256, 128],
199
- dnn_activation: str = "relu",
200
- dnn_dropout: float = 0.0,
197
+ item_mlp_params: dict | None = None,
201
198
  training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
202
199
  num_negative_samples: int = 100,
203
200
  temperature: float = 1.0,
@@ -229,7 +226,11 @@ class MIND(BaseMatchModel):
229
226
 
230
227
  self.embedding_dim = embedding_dim
231
228
  self.num_interests = num_interests
232
- self.item_dnn_hidden_units = item_dnn_hidden_units
229
+ item_mlp_params = item_mlp_params or {}
230
+ item_mlp_params.setdefault("hidden_dims", [256, 128])
231
+ item_mlp_params.setdefault("activation", "relu")
232
+ item_mlp_params.setdefault("dropout", 0.0)
233
+ item_mlp_params.setdefault("output_dim", embedding_dim)
233
234
 
234
235
  user_features = []
235
236
  if user_dense_features:
@@ -291,15 +292,8 @@ class MIND(BaseMatchModel):
291
292
  item_input_dim += feat.embedding_dim
292
293
 
293
294
  # Item DNN
294
- if len(item_dnn_hidden_units) > 0:
295
- item_dnn_units = item_dnn_hidden_units + [embedding_dim]
296
- self.item_dnn = MLP(
297
- input_dim=item_input_dim,
298
- hidden_dims=item_dnn_units,
299
- output_dim=None,
300
- dropout=dnn_dropout,
301
- activation=dnn_activation,
302
- )
295
+ if len(item_mlp_params["hidden_dims"]) > 0:
296
+ self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
303
297
  else:
304
298
  self.item_dnn = None
305
299
 
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 18/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Ying H, Zhuang F, Zhang F, et al. Sequential recommender system based on hierarchical attention networks[C]
7
- //IJCAI. 2018: 3926-3932.
6
+ - [1] Ying H, Zhuang F, Zhang F, et al. Sequential recommender system based on hierarchical attention networks[C] //IJCAI. 2018: 3926-3932.
8
7
  """
9
8
 
10
9
  from typing import Literal
@@ -37,14 +36,11 @@ class SDM(BaseMatchModel):
37
36
  item_sequence_features: list[SequenceFeature] | None = None,
38
37
  embedding_dim: int = 64,
39
38
  rnn_type: Literal["GRU", "LSTM"] = "GRU",
40
- rnn_hidden_size: int = 64,
41
- rnn_num_layers: int = 1,
42
- rnn_dropout: float = 0.0,
39
+ rnn_params: dict | None = None,
43
40
  use_short_term: bool = True,
44
41
  use_long_term: bool = True,
45
- item_dnn_hidden_units: list[int] = [256, 128],
46
- dnn_activation: str = "relu",
47
- dnn_dropout: float = 0.0,
42
+ user_mlp_params: dict | None = None,
43
+ item_mlp_params: dict | None = None,
48
44
  training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
49
45
  num_negative_samples: int = 4,
50
46
  temperature: float = 1.0,
@@ -76,10 +72,26 @@ class SDM(BaseMatchModel):
76
72
 
77
73
  self.embedding_dim = embedding_dim
78
74
  self.rnn_type = rnn_type
79
- self.rnn_hidden_size = rnn_hidden_size
80
75
  self.use_short_term = use_short_term
81
76
  self.use_long_term = use_long_term
82
- self.item_dnn_hidden_units = item_dnn_hidden_units
77
+ rnn_params = rnn_params or {}
78
+ user_mlp_params = user_mlp_params or {}
79
+ item_mlp_params = item_mlp_params or {}
80
+
81
+ rnn_params.setdefault("hidden_size", 64)
82
+ rnn_params.setdefault("num_layers", 1)
83
+ rnn_params.setdefault("dropout", 0.0)
84
+ self.rnn_hidden_size = rnn_params["hidden_size"]
85
+
86
+ user_mlp_params.setdefault("hidden_dims", [self.rnn_hidden_size * 2])
87
+ user_mlp_params.setdefault("activation", "relu")
88
+ user_mlp_params.setdefault("dropout", 0.0)
89
+ user_mlp_params.setdefault("output_dim", embedding_dim)
90
+
91
+ item_mlp_params.setdefault("hidden_dims", [256, 128])
92
+ item_mlp_params.setdefault("activation", "relu")
93
+ item_mlp_params.setdefault("dropout", 0.0)
94
+ item_mlp_params.setdefault("output_dim", embedding_dim)
83
95
 
84
96
  # User tower
85
97
  user_features = []
@@ -101,25 +113,29 @@ class SDM(BaseMatchModel):
101
113
  if rnn_type == "GRU":
102
114
  self.rnn = nn.GRU(
103
115
  input_size=seq_emb_dim,
104
- hidden_size=rnn_hidden_size,
105
- num_layers=rnn_num_layers,
116
+ hidden_size=self.rnn_hidden_size,
117
+ num_layers=rnn_params["num_layers"],
106
118
  batch_first=True,
107
- dropout=rnn_dropout if rnn_num_layers > 1 else 0.0,
119
+ dropout=(
120
+ rnn_params["dropout"] if rnn_params["num_layers"] > 1 else 0.0
121
+ ),
108
122
  )
109
123
  elif rnn_type == "LSTM":
110
124
  self.rnn = nn.LSTM(
111
125
  input_size=seq_emb_dim,
112
- hidden_size=rnn_hidden_size,
113
- num_layers=rnn_num_layers,
126
+ hidden_size=self.rnn_hidden_size,
127
+ num_layers=rnn_params["num_layers"],
114
128
  batch_first=True,
115
- dropout=rnn_dropout if rnn_num_layers > 1 else 0.0,
129
+ dropout=(
130
+ rnn_params["dropout"] if rnn_params["num_layers"] > 1 else 0.0
131
+ ),
116
132
  )
117
133
  else:
118
134
  raise ValueError(f"Unknown RNN type: {rnn_type}")
119
135
 
120
136
  user_final_dim = 0
121
137
  if use_long_term:
122
- user_final_dim += rnn_hidden_size
138
+ user_final_dim += self.rnn_hidden_size
123
139
  if use_short_term:
124
140
  user_final_dim += seq_emb_dim
125
141
 
@@ -129,13 +145,7 @@ class SDM(BaseMatchModel):
129
145
  user_final_dim += feat.embedding_dim
130
146
 
131
147
  # User DNN to final embedding
132
- self.user_dnn = MLP(
133
- input_dim=user_final_dim,
134
- hidden_dims=[rnn_hidden_size * 2, embedding_dim],
135
- output_dim=None,
136
- dropout=dnn_dropout,
137
- activation=dnn_activation,
138
- )
148
+ self.user_dnn = MLP(input_dim=user_final_dim, **user_mlp_params)
139
149
 
140
150
  # Item tower
141
151
  item_features = []
@@ -158,15 +168,8 @@ class SDM(BaseMatchModel):
158
168
  item_input_dim += feat.embedding_dim
159
169
 
160
170
  # Item DNN
161
- if len(item_dnn_hidden_units) > 0:
162
- item_dnn_units = item_dnn_hidden_units + [embedding_dim]
163
- self.item_dnn = MLP(
164
- input_dim=item_input_dim,
165
- hidden_dims=item_dnn_units,
166
- output_dim=None,
167
- dropout=dnn_dropout,
168
- activation=dnn_activation,
169
- )
171
+ if len(item_mlp_params["hidden_dims"]) > 0:
172
+ self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
170
173
  else:
171
174
  self.item_dnn = None
172
175
 
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 18/12/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Covington P, Adams J, Sargin E. Deep neural networks for youtube recommendations[C]
7
- //Proceedings of the 10th ACM conference on recommender systems. 2016: 191-198.
6
+ - [1] Covington P, Adams J, Sargin E. Deep neural networks for youtube recommendations[C] //Proceedings of the 10th ACM conference on recommender systems. 2016: 191-198.
8
7
  """
9
8
 
10
9
  from typing import Literal
@@ -40,11 +39,9 @@ class YoutubeDNN(BaseMatchModel):
40
39
  item_dense_features: list[DenseFeature] | None = None,
41
40
  item_sparse_features: list[SparseFeature] | None = None,
42
41
  item_sequence_features: list[SequenceFeature] | None = None,
43
- user_dnn_hidden_units: list[int] = [256, 128, 64],
44
- item_dnn_hidden_units: list[int] = [256, 128, 64],
42
+ user_mlp_params: dict | None = None,
43
+ item_mlp_params: dict | None = None,
45
44
  embedding_dim: int = 64,
46
- dnn_activation: str = "relu",
47
- dnn_dropout: float = 0.0,
48
45
  training_mode: Literal["pointwise", "pairwise", "listwise"] = "listwise",
49
46
  num_negative_samples: int = 100,
50
47
  temperature: float = 1.0,
@@ -75,8 +72,17 @@ class YoutubeDNN(BaseMatchModel):
75
72
  )
76
73
 
77
74
  self.embedding_dim = embedding_dim
78
- self.user_dnn_hidden_units = user_dnn_hidden_units
79
- self.item_dnn_hidden_units = item_dnn_hidden_units
75
+ user_mlp_params = user_mlp_params or {}
76
+ item_mlp_params = item_mlp_params or {}
77
+
78
+ user_mlp_params.setdefault("hidden_dims", [256, 128, 64])
79
+ item_mlp_params.setdefault("hidden_dims", [256, 128, 64])
80
+ user_mlp_params.setdefault("activation", "relu")
81
+ user_mlp_params.setdefault("dropout", 0.0)
82
+ item_mlp_params.setdefault("activation", "relu")
83
+ item_mlp_params.setdefault("dropout", 0.0)
84
+ user_mlp_params.setdefault("output_dim", embedding_dim)
85
+ item_mlp_params.setdefault("output_dim", embedding_dim)
80
86
 
81
87
  # User tower
82
88
  user_features = []
@@ -99,14 +105,7 @@ class YoutubeDNN(BaseMatchModel):
99
105
  # Sequence features are pooled before entering the DNN
100
106
  user_input_dim += feat.embedding_dim
101
107
 
102
- user_dnn_units = user_dnn_hidden_units + [embedding_dim]
103
- self.user_dnn = MLP(
104
- input_dim=user_input_dim,
105
- hidden_dims=user_dnn_units,
106
- output_dim=None,
107
- dropout=dnn_dropout,
108
- activation=dnn_activation,
109
- )
108
+ self.user_dnn = MLP(input_dim=user_input_dim, **user_mlp_params)
110
109
 
111
110
  # Item tower
112
111
  item_features = []
@@ -128,14 +127,7 @@ class YoutubeDNN(BaseMatchModel):
128
127
  for feat in item_sequence_features or []:
129
128
  item_input_dim += feat.embedding_dim
130
129
 
131
- item_dnn_units = item_dnn_hidden_units + [embedding_dim]
132
- self.item_dnn = MLP(
133
- input_dim=item_input_dim,
134
- hidden_dims=item_dnn_units,
135
- output_dim=None,
136
- dropout=dnn_dropout,
137
- activation=dnn_activation,
138
- )
130
+ self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
139
131
 
140
132
  self.register_regularization_weights(
141
133
  embedding_attr="user_embedding", include_modules=["user_dnn"]
@@ -4,8 +4,8 @@ Date: create on 01/12/2025
4
4
  Checkpoint: edit on 11/12/2025
5
5
  Author: Yang Zhou, zyaztec@gmail.com
6
6
  Reference:
7
- [1] Meta AI. Generative Recommenders (HSTU encoder) — https://github.com/meta-recsys/generative-recommenders
8
- [2] Ma W, Li P, Chen C, et al. Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv:2402.17152.
7
+ - [1] Meta AI. Generative Recommenders (HSTU encoder) — https://github.com/meta-recsys/generative-recommenders
8
+ - [2] Ma W, Li P, Chen C, et al. Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv:2402.17152.
9
9
 
10
10
  Hierarchical Sequential Transduction Unit (HSTU) is the core encoder behind
11
11
  Meta’s Generative Recommenders. It replaces softmax attention with lightweight