nextrec 0.4.25__py3-none-any.whl → 0.4.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/asserts.py +72 -0
- nextrec/basic/loggers.py +18 -1
- nextrec/basic/model.py +54 -51
- nextrec/data/batch_utils.py +23 -3
- nextrec/data/dataloader.py +3 -8
- nextrec/models/multi_task/[pre]aitm.py +173 -0
- nextrec/models/multi_task/[pre]snr_trans.py +232 -0
- nextrec/models/multi_task/[pre]star.py +192 -0
- nextrec/models/multi_task/apg.py +330 -0
- nextrec/models/multi_task/cross_stitch.py +229 -0
- nextrec/models/multi_task/escm.py +290 -0
- nextrec/models/multi_task/esmm.py +8 -21
- nextrec/models/multi_task/hmoe.py +203 -0
- nextrec/models/multi_task/mmoe.py +20 -28
- nextrec/models/multi_task/pepnet.py +81 -76
- nextrec/models/multi_task/ple.py +30 -44
- nextrec/models/multi_task/poso.py +13 -22
- nextrec/models/multi_task/share_bottom.py +14 -25
- nextrec/models/ranking/afm.py +2 -2
- nextrec/models/ranking/autoint.py +2 -4
- nextrec/models/ranking/dcn.py +2 -3
- nextrec/models/ranking/dcn_v2.py +2 -3
- nextrec/models/ranking/deepfm.py +2 -3
- nextrec/models/ranking/dien.py +7 -9
- nextrec/models/ranking/din.py +8 -10
- nextrec/models/ranking/eulernet.py +1 -2
- nextrec/models/ranking/ffm.py +1 -2
- nextrec/models/ranking/fibinet.py +2 -3
- nextrec/models/ranking/fm.py +1 -1
- nextrec/models/ranking/lr.py +1 -1
- nextrec/models/ranking/masknet.py +1 -2
- nextrec/models/ranking/pnn.py +1 -2
- nextrec/models/ranking/widedeep.py +2 -3
- nextrec/models/ranking/xdeepfm.py +2 -4
- nextrec/models/representation/rqvae.py +4 -4
- nextrec/models/retrieval/dssm.py +18 -26
- nextrec/models/retrieval/dssm_v2.py +15 -22
- nextrec/models/retrieval/mind.py +9 -15
- nextrec/models/retrieval/sdm.py +36 -33
- nextrec/models/retrieval/youtube_dnn.py +16 -24
- nextrec/models/sequential/hstu.py +2 -2
- nextrec/utils/__init__.py +5 -1
- nextrec/utils/model.py +9 -14
- {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/METADATA +72 -62
- nextrec-0.4.28.dist-info/RECORD +90 -0
- nextrec/models/multi_task/aitm.py +0 -0
- nextrec/models/multi_task/snr_trans.py +0 -0
- nextrec-0.4.25.dist-info/RECORD +0 -86
- {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/WHEEL +0 -0
- {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.25.dist-info → nextrec-0.4.28.dist-info}/licenses/LICENSE +0 -0
nextrec/models/ranking/deepfm.py
CHANGED
|
@@ -3,9 +3,8 @@ Date: create on 27/10/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou,zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Guo H, Tang R, Ye Y, et al. DeepFM: A factorization-machine based neural network
|
|
7
|
-
|
|
8
|
-
(https://arxiv.org/abs/1703.04247)
|
|
6
|
+
- [1] Guo H, Tang R, Ye Y, et al. DeepFM: A factorization-machine based neural network for CTR prediction[J]. arXiv preprint arXiv:1703.04247, 2017.
|
|
7
|
+
URL: https://arxiv.org/abs/1703.04247
|
|
9
8
|
|
|
10
9
|
DeepFM combines a Factorization Machine (FM) for explicit second-order feature
|
|
11
10
|
interactions with a deep MLP for high-order nonlinear patterns. Both parts share
|
nextrec/models/ranking/dien.py
CHANGED
|
@@ -3,9 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
4
4
|
Checkpoint: edit on 09/12/2025
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Zhou G, Mou N, Fan Y, et al. Deep interest evolution network for click-through
|
|
7
|
-
rate prediction[C] // Proceedings of the AAAI conference on artificial intelligence.
|
|
8
|
-
2019, 33(01): 5941-5948. (https://arxiv.org/abs/1809.03672)
|
|
6
|
+
- [1] Zhou G, Mou N, Fan Y, et al. Deep interest evolution network for click-through rate prediction[C] // Proceedings of the AAAI conference on artificial intelligence. 2019, 33(01): 5941-5948. (https://arxiv.org/abs/1809.03672)
|
|
9
7
|
|
|
10
8
|
DIEN is a CTR prediction model that explicitly models how user interests evolve
|
|
11
9
|
over time. It introduces a two-stage pipeline:
|
|
@@ -58,7 +56,6 @@ from nextrec.basic.layers import (
|
|
|
58
56
|
)
|
|
59
57
|
from nextrec.basic.heads import TaskHead
|
|
60
58
|
from nextrec.basic.model import BaseModel
|
|
61
|
-
from nextrec.utils.types import ActivationName
|
|
62
59
|
|
|
63
60
|
|
|
64
61
|
class AUGRU(nn.Module):
|
|
@@ -211,8 +208,7 @@ class DIEN(BaseModel):
|
|
|
211
208
|
neg_behavior_feature_name: str | None = None,
|
|
212
209
|
mlp_params: dict | None = None,
|
|
213
210
|
gru_hidden_size: int = 64,
|
|
214
|
-
|
|
215
|
-
attention_activation: ActivationName = "sigmoid",
|
|
211
|
+
attention_mlp_params: dict | None = None,
|
|
216
212
|
use_negsampling: bool = False,
|
|
217
213
|
aux_loss_weight: float = 1.0,
|
|
218
214
|
**kwargs,
|
|
@@ -222,7 +218,9 @@ class DIEN(BaseModel):
|
|
|
222
218
|
sparse_features = sparse_features or []
|
|
223
219
|
sequence_features = sequence_features or []
|
|
224
220
|
mlp_params = mlp_params or {}
|
|
225
|
-
|
|
221
|
+
attention_mlp_params = attention_mlp_params or {}
|
|
222
|
+
attention_mlp_params.setdefault("hidden_dims", [80, 40])
|
|
223
|
+
attention_mlp_params.setdefault("activation", "sigmoid")
|
|
226
224
|
|
|
227
225
|
super(DIEN, self).__init__(
|
|
228
226
|
dense_features=dense_features,
|
|
@@ -285,8 +283,8 @@ class DIEN(BaseModel):
|
|
|
285
283
|
|
|
286
284
|
self.attention_layer = AttentionPoolingLayer(
|
|
287
285
|
embedding_dim=gru_hidden_size,
|
|
288
|
-
hidden_units=
|
|
289
|
-
activation=
|
|
286
|
+
hidden_units=attention_mlp_params["hidden_dims"],
|
|
287
|
+
activation=attention_mlp_params["activation"],
|
|
290
288
|
use_softmax=False,
|
|
291
289
|
)
|
|
292
290
|
|
nextrec/models/ranking/din.py
CHANGED
|
@@ -3,10 +3,8 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 09/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate
|
|
7
|
-
|
|
8
|
-
knowledge discovery & data mining. 2018: 1059-1068.
|
|
9
|
-
(https://arxiv.org/abs/1706.06978)
|
|
6
|
+
- [1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C] //Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining. 2018: 1059-1068.
|
|
7
|
+
URL: https://arxiv.org/abs/1706.06978
|
|
10
8
|
|
|
11
9
|
Deep Interest Network (DIN) is a CTR model that builds a target-aware user
|
|
12
10
|
representation by attending over the historical behavior sequence. Instead of
|
|
@@ -58,7 +56,6 @@ from nextrec.basic.layers import (
|
|
|
58
56
|
)
|
|
59
57
|
from nextrec.basic.heads import TaskHead
|
|
60
58
|
from nextrec.basic.model import BaseModel
|
|
61
|
-
from nextrec.utils.types import ActivationName
|
|
62
59
|
|
|
63
60
|
|
|
64
61
|
class DIN(BaseModel):
|
|
@@ -78,8 +75,7 @@ class DIN(BaseModel):
|
|
|
78
75
|
behavior_feature_name: str | None = None,
|
|
79
76
|
candidate_feature_name: str | None = None,
|
|
80
77
|
mlp_params: dict | None = None,
|
|
81
|
-
|
|
82
|
-
attention_activation: ActivationName = "dice",
|
|
78
|
+
attention_mlp_params: dict | None = None,
|
|
83
79
|
attention_use_softmax: bool = True,
|
|
84
80
|
**kwargs,
|
|
85
81
|
):
|
|
@@ -88,7 +84,9 @@ class DIN(BaseModel):
|
|
|
88
84
|
sparse_features = sparse_features or []
|
|
89
85
|
sequence_features = sequence_features or []
|
|
90
86
|
mlp_params = mlp_params or {}
|
|
91
|
-
|
|
87
|
+
attention_mlp_params = attention_mlp_params or {}
|
|
88
|
+
attention_mlp_params.setdefault("hidden_dims", [80, 40])
|
|
89
|
+
attention_mlp_params.setdefault("activation", "dice")
|
|
92
90
|
|
|
93
91
|
super(DIN, self).__init__(
|
|
94
92
|
dense_features=dense_features,
|
|
@@ -135,8 +133,8 @@ class DIN(BaseModel):
|
|
|
135
133
|
)
|
|
136
134
|
self.attention = AttentionPoolingLayer(
|
|
137
135
|
embedding_dim=behavior_emb_dim,
|
|
138
|
-
hidden_units=
|
|
139
|
-
activation=
|
|
136
|
+
hidden_units=attention_mlp_params["hidden_dims"],
|
|
137
|
+
activation=attention_mlp_params["activation"],
|
|
140
138
|
use_softmax=attention_use_softmax,
|
|
141
139
|
)
|
|
142
140
|
|
|
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Zhao Z, Zhang H, Tang H, et al. EulerNet: Efficient and Effective Feature
|
|
7
|
-
Interaction Modeling with Euler's Formula. (SIGIR 2021)
|
|
6
|
+
- [1] Zhao Z, Zhang H, Tang H, et al. EulerNet: Efficient and Effective Feature Interaction Modeling with Euler's Formula. (SIGIR 2021)
|
|
8
7
|
|
|
9
8
|
EulerNet models feature interactions in the complex domain using Euler's
|
|
10
9
|
formula. Each field embedding is transformed into amplitude and phase,
|
nextrec/models/ranking/ffm.py
CHANGED
|
@@ -3,8 +3,7 @@ Date: create on 19/12/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Juan Y, Zhuang Y, Chin W-S, et al. Field-aware Factorization Machines for CTR
|
|
7
|
-
Prediction[C]//RecSys. 2016: 43-50.
|
|
6
|
+
- [1] Juan Y, Zhuang Y, Chin W-S, et al. Field-aware Factorization Machines for CTR Prediction[C]//RecSys. 2016: 43-50.
|
|
8
7
|
|
|
9
8
|
Field-aware Factorization Machines (FFM) extend FM by learning a distinct
|
|
10
9
|
embedding of each feature for every target field. For a pair of fields (i, j),
|
|
@@ -3,9 +3,8 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 09/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Huang T, Zhang Z, Zhang B, et al. FiBiNET: Combining feature importance and bilinear
|
|
7
|
-
|
|
8
|
-
(https://arxiv.org/abs/1905.09433)
|
|
6
|
+
- [1] Huang T, Zhang Z, Zhang B, et al. FiBiNET: Combining feature importance and bilinear feature interaction for click-through rate prediction[C]//RecSys. 2019: 169-177.
|
|
7
|
+
URL: https://arxiv.org/abs/1905.09433
|
|
9
8
|
|
|
10
9
|
FiBiNET (Feature Importance and Bilinear Interaction Network) is a CTR model that
|
|
11
10
|
jointly learns which fields matter most and how they interact. It first uses SENET
|
nextrec/models/ranking/fm.py
CHANGED
|
@@ -3,7 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Rendle S. Factorization machines[C]//ICDM. 2010: 995-1000.
|
|
6
|
+
- [1] Rendle S. Factorization machines[C]//ICDM. 2010: 995-1000.
|
|
7
7
|
|
|
8
8
|
Factorization Machines (FM) capture second-order feature interactions with
|
|
9
9
|
linear complexity by factorizing the pairwise interaction matrix. Each field
|
nextrec/models/ranking/lr.py
CHANGED
|
@@ -3,7 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Hosmer D W, Lemeshow S, Sturdivant R X. Applied Logistic Regression.
|
|
6
|
+
- [1] Hosmer D W, Lemeshow S, Sturdivant R X. Applied Logistic Regression.
|
|
7
7
|
|
|
8
8
|
Logistic Regression (LR) is a classic linear baseline for CTR/ranking tasks.
|
|
9
9
|
It maps each feature (dense, sparse, or sequence) into a numeric vector and
|
|
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Wang Z, She Q, Zhang J. MaskNet: Introducing Feature-Wise
|
|
7
|
-
Multiplication to CTR Ranking Models by Instance-Guided Mask.
|
|
6
|
+
- [1] Wang Z, She Q, Zhang J. MaskNet: Introducing Feature-Wise Multiplication to CTR Ranking Models by Instance-Guided Mask.
|
|
8
7
|
|
|
9
8
|
MaskNet is a CTR prediction model that introduces instance-guided,
|
|
10
9
|
feature-wise multiplicative interactions into deep ranking networks.
|
nextrec/models/ranking/pnn.py
CHANGED
|
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response
|
|
7
|
-
prediction[C]//ICDM. 2016: 1149-1154. (https://arxiv.org/abs/1611.00144)
|
|
6
|
+
- [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//ICDM. 2016: 1149-1154. (https://arxiv.org/abs/1611.00144)
|
|
8
7
|
|
|
9
8
|
Product-based Neural Networks (PNN) are CTR prediction models that explicitly
|
|
10
9
|
encode feature interactions by combining:
|
|
@@ -3,9 +3,8 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Cheng H T, Koc L, Harmsen J, et al. Wide & Deep learning for recommender systems[C]
|
|
7
|
-
|
|
8
|
-
(https://arxiv.org/abs/1606.07792)
|
|
6
|
+
- [1] Cheng H T, Koc L, Harmsen J, et al. Wide & Deep learning for recommender systems[C] //Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. 2016: 7-10.
|
|
7
|
+
URL: https://arxiv.org/abs/1606.07792
|
|
9
8
|
|
|
10
9
|
Wide & Deep blends a linear wide component (memorization of cross features) with a
|
|
11
10
|
deep neural network (generalization) sharing the same feature space. The wide part
|
|
@@ -3,10 +3,8 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 23/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
|
|
7
|
-
|
|
8
|
-
knowledge discovery & data mining. 2018: 1754-1763.
|
|
9
|
-
(https://arxiv.org/abs/1803.05170)
|
|
6
|
+
- [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on knowledge discovery & data mining. 2018: 1754-1763.
|
|
7
|
+
URL: https://arxiv.org/abs/1803.05170
|
|
10
8
|
|
|
11
9
|
xDeepFM is a CTR prediction model that unifies explicit and implicit
|
|
12
10
|
feature interaction learning. It extends DeepFM by adding the
|
|
@@ -4,11 +4,11 @@ Residual Quantized Variational AutoEncoder (RQ-VAE) for Generative Recommendatio
|
|
|
4
4
|
Date: created on 11/12/2025
|
|
5
5
|
Checkpoint: edit on 13/12/2025
|
|
6
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
7
|
-
Source
|
|
8
|
-
[1] Tencent-Advertising-Algorithm-Competition-2025-Baseline
|
|
7
|
+
Source Code Reference:
|
|
8
|
+
- [1] Tencent-Advertising-Algorithm-Competition-2025-Baseline
|
|
9
9
|
Reference:
|
|
10
|
-
[1] Lee et al. Autoregressive Image Generation using Residual Quantization. CVPR 2022.
|
|
11
|
-
[2] Zeghidour et al. SoundStream: An End-to-End Neural Audio Codec. IEEE/ACM TASLP 2021.
|
|
10
|
+
- [1] Lee et al. Autoregressive Image Generation using Residual Quantization. CVPR 2022.
|
|
11
|
+
- [2] Zeghidour et al. SoundStream: An End-to-End Neural Audio Codec. IEEE/ACM TASLP 2021.
|
|
12
12
|
|
|
13
13
|
RQ-VAE learns hierarchical discrete representations via residual quantization.
|
|
14
14
|
It encodes continuous embeddings (e.g., item/user embeddings) into multi-level
|
nextrec/models/retrieval/dssm.py
CHANGED
|
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 18/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Huang P S, He X, Gao J, et al. Learning deep structured semantic models for web search using clickthrough data[C]
|
|
7
|
-
//Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013: 2333-2338.
|
|
6
|
+
- [1] Huang P S, He X, Gao J, et al. Learning deep structured semantic models for web search using clickthrough data[C] //Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013: 2333-2338.
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
9
|
from typing import Literal
|
|
@@ -40,11 +39,9 @@ class DSSM(BaseMatchModel):
|
|
|
40
39
|
item_dense_features: list[DenseFeature] | None = None,
|
|
41
40
|
item_sparse_features: list[SparseFeature] | None = None,
|
|
42
41
|
item_sequence_features: list[SequenceFeature] | None = None,
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
user_mlp_params: dict | None = None,
|
|
43
|
+
item_mlp_params: dict | None = None,
|
|
45
44
|
embedding_dim: int = 64,
|
|
46
|
-
dnn_activation: str = "relu",
|
|
47
|
-
dnn_dropout: float = 0.0,
|
|
48
45
|
training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
|
|
49
46
|
num_negative_samples: int = 4,
|
|
50
47
|
temperature: float = 1.0,
|
|
@@ -75,8 +72,17 @@ class DSSM(BaseMatchModel):
|
|
|
75
72
|
)
|
|
76
73
|
|
|
77
74
|
self.embedding_dim = embedding_dim
|
|
78
|
-
|
|
79
|
-
|
|
75
|
+
user_mlp_params = user_mlp_params or {}
|
|
76
|
+
item_mlp_params = item_mlp_params or {}
|
|
77
|
+
|
|
78
|
+
user_mlp_params.setdefault("hidden_dims", [256, 128, 64])
|
|
79
|
+
item_mlp_params.setdefault("hidden_dims", [256, 128, 64])
|
|
80
|
+
user_mlp_params.setdefault("activation", "relu")
|
|
81
|
+
user_mlp_params.setdefault("dropout", 0.0)
|
|
82
|
+
item_mlp_params.setdefault("activation", "relu")
|
|
83
|
+
item_mlp_params.setdefault("dropout", 0.0)
|
|
84
|
+
user_mlp_params.setdefault("output_dim", embedding_dim)
|
|
85
|
+
item_mlp_params.setdefault("output_dim", embedding_dim)
|
|
80
86
|
|
|
81
87
|
# User tower embedding layer
|
|
82
88
|
user_features = []
|
|
@@ -99,15 +105,8 @@ class DSSM(BaseMatchModel):
|
|
|
99
105
|
for feat in user_sequence_features or []:
|
|
100
106
|
user_input_dim += feat.embedding_dim
|
|
101
107
|
|
|
102
|
-
# User
|
|
103
|
-
|
|
104
|
-
self.user_dnn = MLP(
|
|
105
|
-
input_dim=user_input_dim,
|
|
106
|
-
hidden_dims=user_dnn_units,
|
|
107
|
-
output_dim=None,
|
|
108
|
-
dropout=dnn_dropout,
|
|
109
|
-
activation=dnn_activation,
|
|
110
|
-
)
|
|
108
|
+
# User MLP
|
|
109
|
+
self.user_dnn = MLP(input_dim=user_input_dim, **user_mlp_params)
|
|
111
110
|
|
|
112
111
|
# Item tower embedding layer
|
|
113
112
|
item_features = []
|
|
@@ -130,15 +129,8 @@ class DSSM(BaseMatchModel):
|
|
|
130
129
|
for feat in item_sequence_features or []:
|
|
131
130
|
item_input_dim += feat.embedding_dim
|
|
132
131
|
|
|
133
|
-
# Item
|
|
134
|
-
|
|
135
|
-
self.item_dnn = MLP(
|
|
136
|
-
input_dim=item_input_dim,
|
|
137
|
-
hidden_dims=item_dnn_units,
|
|
138
|
-
output_dim=None,
|
|
139
|
-
dropout=dnn_dropout,
|
|
140
|
-
activation=dnn_activation,
|
|
141
|
-
)
|
|
132
|
+
# Item MLP
|
|
133
|
+
self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
|
|
142
134
|
|
|
143
135
|
self.register_regularization_weights(
|
|
144
136
|
embedding_attr="user_embedding", include_modules=["user_dnn"]
|
|
@@ -36,11 +36,9 @@ class DSSM_v2(BaseMatchModel):
|
|
|
36
36
|
item_dense_features: list[DenseFeature] | None = None,
|
|
37
37
|
item_sparse_features: list[SparseFeature] | None = None,
|
|
38
38
|
item_sequence_features: list[SequenceFeature] | None = None,
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
user_mlp_params: dict | None = None,
|
|
40
|
+
item_mlp_params: dict | None = None,
|
|
41
41
|
embedding_dim: int = 64,
|
|
42
|
-
dnn_activation: str = "relu",
|
|
43
|
-
dnn_dropout: float = 0.0,
|
|
44
42
|
training_mode: Literal["pointwise", "pairwise", "listwise"] = "pairwise",
|
|
45
43
|
num_negative_samples: int = 4,
|
|
46
44
|
temperature: float = 1.0,
|
|
@@ -71,8 +69,17 @@ class DSSM_v2(BaseMatchModel):
|
|
|
71
69
|
)
|
|
72
70
|
|
|
73
71
|
self.embedding_dim = embedding_dim
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
user_mlp_params = user_mlp_params or {}
|
|
73
|
+
item_mlp_params = item_mlp_params or {}
|
|
74
|
+
|
|
75
|
+
user_mlp_params.setdefault("hidden_dims", [256, 128, 64])
|
|
76
|
+
item_mlp_params.setdefault("hidden_dims", [256, 128, 64])
|
|
77
|
+
user_mlp_params.setdefault("activation", "relu")
|
|
78
|
+
user_mlp_params.setdefault("dropout", 0.0)
|
|
79
|
+
item_mlp_params.setdefault("activation", "relu")
|
|
80
|
+
item_mlp_params.setdefault("dropout", 0.0)
|
|
81
|
+
user_mlp_params.setdefault("output_dim", embedding_dim)
|
|
82
|
+
item_mlp_params.setdefault("output_dim", embedding_dim)
|
|
76
83
|
|
|
77
84
|
# User tower
|
|
78
85
|
user_features = []
|
|
@@ -94,14 +101,7 @@ class DSSM_v2(BaseMatchModel):
|
|
|
94
101
|
for feat in user_sequence_features or []:
|
|
95
102
|
user_input_dim += feat.embedding_dim
|
|
96
103
|
|
|
97
|
-
|
|
98
|
-
self.user_dnn = MLP(
|
|
99
|
-
input_dim=user_input_dim,
|
|
100
|
-
hidden_dims=user_dnn_units,
|
|
101
|
-
output_dim=None,
|
|
102
|
-
dropout=dnn_dropout,
|
|
103
|
-
activation=dnn_activation,
|
|
104
|
-
)
|
|
104
|
+
self.user_dnn = MLP(input_dim=user_input_dim, **user_mlp_params)
|
|
105
105
|
|
|
106
106
|
# Item tower
|
|
107
107
|
item_features = []
|
|
@@ -123,14 +123,7 @@ class DSSM_v2(BaseMatchModel):
|
|
|
123
123
|
for feat in item_sequence_features or []:
|
|
124
124
|
item_input_dim += feat.embedding_dim
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
self.item_dnn = MLP(
|
|
128
|
-
input_dim=item_input_dim,
|
|
129
|
-
hidden_dims=item_dnn_units,
|
|
130
|
-
output_dim=None,
|
|
131
|
-
dropout=dnn_dropout,
|
|
132
|
-
activation=dnn_activation,
|
|
133
|
-
)
|
|
126
|
+
self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
|
|
134
127
|
|
|
135
128
|
self.register_regularization_weights(
|
|
136
129
|
embedding_attr="user_embedding", include_modules=["user_dnn"]
|
nextrec/models/retrieval/mind.py
CHANGED
|
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 18/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Li C, Liu Z, Wu M, et al. Multi-interest network with dynamic routing for recommendation at Tmall[C]
|
|
7
|
-
//Proceedings of the 28th ACM international conference on information and knowledge management. 2019: 2615-2623.
|
|
6
|
+
- [1] Li C, Liu Z, Wu M, et al. Multi-interest network with dynamic routing for recommendation at Tmall[C] //Proceedings of the 28th ACM international conference on information and knowledge management. 2019: 2615-2623.
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
9
|
from typing import Literal
|
|
@@ -195,9 +194,7 @@ class MIND(BaseMatchModel):
|
|
|
195
194
|
capsule_bilinear_type: int = 2,
|
|
196
195
|
routing_times: int = 3,
|
|
197
196
|
relu_layer: bool = False,
|
|
198
|
-
|
|
199
|
-
dnn_activation: str = "relu",
|
|
200
|
-
dnn_dropout: float = 0.0,
|
|
197
|
+
item_mlp_params: dict | None = None,
|
|
201
198
|
training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
|
|
202
199
|
num_negative_samples: int = 100,
|
|
203
200
|
temperature: float = 1.0,
|
|
@@ -229,7 +226,11 @@ class MIND(BaseMatchModel):
|
|
|
229
226
|
|
|
230
227
|
self.embedding_dim = embedding_dim
|
|
231
228
|
self.num_interests = num_interests
|
|
232
|
-
|
|
229
|
+
item_mlp_params = item_mlp_params or {}
|
|
230
|
+
item_mlp_params.setdefault("hidden_dims", [256, 128])
|
|
231
|
+
item_mlp_params.setdefault("activation", "relu")
|
|
232
|
+
item_mlp_params.setdefault("dropout", 0.0)
|
|
233
|
+
item_mlp_params.setdefault("output_dim", embedding_dim)
|
|
233
234
|
|
|
234
235
|
user_features = []
|
|
235
236
|
if user_dense_features:
|
|
@@ -291,15 +292,8 @@ class MIND(BaseMatchModel):
|
|
|
291
292
|
item_input_dim += feat.embedding_dim
|
|
292
293
|
|
|
293
294
|
# Item DNN
|
|
294
|
-
if len(
|
|
295
|
-
|
|
296
|
-
self.item_dnn = MLP(
|
|
297
|
-
input_dim=item_input_dim,
|
|
298
|
-
hidden_dims=item_dnn_units,
|
|
299
|
-
output_dim=None,
|
|
300
|
-
dropout=dnn_dropout,
|
|
301
|
-
activation=dnn_activation,
|
|
302
|
-
)
|
|
295
|
+
if len(item_mlp_params["hidden_dims"]) > 0:
|
|
296
|
+
self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
|
|
303
297
|
else:
|
|
304
298
|
self.item_dnn = None
|
|
305
299
|
|
nextrec/models/retrieval/sdm.py
CHANGED
|
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 18/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Ying H, Zhuang F, Zhang F, et al. Sequential recommender system based on hierarchical attention networks[C]
|
|
7
|
-
//IJCAI. 2018: 3926-3932.
|
|
6
|
+
- [1] Ying H, Zhuang F, Zhang F, et al. Sequential recommender system based on hierarchical attention networks[C] //IJCAI. 2018: 3926-3932.
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
9
|
from typing import Literal
|
|
@@ -37,14 +36,11 @@ class SDM(BaseMatchModel):
|
|
|
37
36
|
item_sequence_features: list[SequenceFeature] | None = None,
|
|
38
37
|
embedding_dim: int = 64,
|
|
39
38
|
rnn_type: Literal["GRU", "LSTM"] = "GRU",
|
|
40
|
-
|
|
41
|
-
rnn_num_layers: int = 1,
|
|
42
|
-
rnn_dropout: float = 0.0,
|
|
39
|
+
rnn_params: dict | None = None,
|
|
43
40
|
use_short_term: bool = True,
|
|
44
41
|
use_long_term: bool = True,
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
dnn_dropout: float = 0.0,
|
|
42
|
+
user_mlp_params: dict | None = None,
|
|
43
|
+
item_mlp_params: dict | None = None,
|
|
48
44
|
training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
|
|
49
45
|
num_negative_samples: int = 4,
|
|
50
46
|
temperature: float = 1.0,
|
|
@@ -76,10 +72,26 @@ class SDM(BaseMatchModel):
|
|
|
76
72
|
|
|
77
73
|
self.embedding_dim = embedding_dim
|
|
78
74
|
self.rnn_type = rnn_type
|
|
79
|
-
self.rnn_hidden_size = rnn_hidden_size
|
|
80
75
|
self.use_short_term = use_short_term
|
|
81
76
|
self.use_long_term = use_long_term
|
|
82
|
-
|
|
77
|
+
rnn_params = rnn_params or {}
|
|
78
|
+
user_mlp_params = user_mlp_params or {}
|
|
79
|
+
item_mlp_params = item_mlp_params or {}
|
|
80
|
+
|
|
81
|
+
rnn_params.setdefault("hidden_size", 64)
|
|
82
|
+
rnn_params.setdefault("num_layers", 1)
|
|
83
|
+
rnn_params.setdefault("dropout", 0.0)
|
|
84
|
+
self.rnn_hidden_size = rnn_params["hidden_size"]
|
|
85
|
+
|
|
86
|
+
user_mlp_params.setdefault("hidden_dims", [self.rnn_hidden_size * 2])
|
|
87
|
+
user_mlp_params.setdefault("activation", "relu")
|
|
88
|
+
user_mlp_params.setdefault("dropout", 0.0)
|
|
89
|
+
user_mlp_params.setdefault("output_dim", embedding_dim)
|
|
90
|
+
|
|
91
|
+
item_mlp_params.setdefault("hidden_dims", [256, 128])
|
|
92
|
+
item_mlp_params.setdefault("activation", "relu")
|
|
93
|
+
item_mlp_params.setdefault("dropout", 0.0)
|
|
94
|
+
item_mlp_params.setdefault("output_dim", embedding_dim)
|
|
83
95
|
|
|
84
96
|
# User tower
|
|
85
97
|
user_features = []
|
|
@@ -101,25 +113,29 @@ class SDM(BaseMatchModel):
|
|
|
101
113
|
if rnn_type == "GRU":
|
|
102
114
|
self.rnn = nn.GRU(
|
|
103
115
|
input_size=seq_emb_dim,
|
|
104
|
-
hidden_size=rnn_hidden_size,
|
|
105
|
-
num_layers=
|
|
116
|
+
hidden_size=self.rnn_hidden_size,
|
|
117
|
+
num_layers=rnn_params["num_layers"],
|
|
106
118
|
batch_first=True,
|
|
107
|
-
dropout=
|
|
119
|
+
dropout=(
|
|
120
|
+
rnn_params["dropout"] if rnn_params["num_layers"] > 1 else 0.0
|
|
121
|
+
),
|
|
108
122
|
)
|
|
109
123
|
elif rnn_type == "LSTM":
|
|
110
124
|
self.rnn = nn.LSTM(
|
|
111
125
|
input_size=seq_emb_dim,
|
|
112
|
-
hidden_size=rnn_hidden_size,
|
|
113
|
-
num_layers=
|
|
126
|
+
hidden_size=self.rnn_hidden_size,
|
|
127
|
+
num_layers=rnn_params["num_layers"],
|
|
114
128
|
batch_first=True,
|
|
115
|
-
dropout=
|
|
129
|
+
dropout=(
|
|
130
|
+
rnn_params["dropout"] if rnn_params["num_layers"] > 1 else 0.0
|
|
131
|
+
),
|
|
116
132
|
)
|
|
117
133
|
else:
|
|
118
134
|
raise ValueError(f"Unknown RNN type: {rnn_type}")
|
|
119
135
|
|
|
120
136
|
user_final_dim = 0
|
|
121
137
|
if use_long_term:
|
|
122
|
-
user_final_dim += rnn_hidden_size
|
|
138
|
+
user_final_dim += self.rnn_hidden_size
|
|
123
139
|
if use_short_term:
|
|
124
140
|
user_final_dim += seq_emb_dim
|
|
125
141
|
|
|
@@ -129,13 +145,7 @@ class SDM(BaseMatchModel):
|
|
|
129
145
|
user_final_dim += feat.embedding_dim
|
|
130
146
|
|
|
131
147
|
# User DNN to final embedding
|
|
132
|
-
self.user_dnn = MLP(
|
|
133
|
-
input_dim=user_final_dim,
|
|
134
|
-
hidden_dims=[rnn_hidden_size * 2, embedding_dim],
|
|
135
|
-
output_dim=None,
|
|
136
|
-
dropout=dnn_dropout,
|
|
137
|
-
activation=dnn_activation,
|
|
138
|
-
)
|
|
148
|
+
self.user_dnn = MLP(input_dim=user_final_dim, **user_mlp_params)
|
|
139
149
|
|
|
140
150
|
# Item tower
|
|
141
151
|
item_features = []
|
|
@@ -158,15 +168,8 @@ class SDM(BaseMatchModel):
|
|
|
158
168
|
item_input_dim += feat.embedding_dim
|
|
159
169
|
|
|
160
170
|
# Item DNN
|
|
161
|
-
if len(
|
|
162
|
-
|
|
163
|
-
self.item_dnn = MLP(
|
|
164
|
-
input_dim=item_input_dim,
|
|
165
|
-
hidden_dims=item_dnn_units,
|
|
166
|
-
output_dim=None,
|
|
167
|
-
dropout=dnn_dropout,
|
|
168
|
-
activation=dnn_activation,
|
|
169
|
-
)
|
|
171
|
+
if len(item_mlp_params["hidden_dims"]) > 0:
|
|
172
|
+
self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
|
|
170
173
|
else:
|
|
171
174
|
self.item_dnn = None
|
|
172
175
|
|
|
@@ -3,8 +3,7 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Checkpoint: edit on 18/12/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Covington P, Adams J, Sargin E. Deep neural networks for youtube recommendations[C]
|
|
7
|
-
//Proceedings of the 10th ACM conference on recommender systems. 2016: 191-198.
|
|
6
|
+
- [1] Covington P, Adams J, Sargin E. Deep neural networks for youtube recommendations[C] //Proceedings of the 10th ACM conference on recommender systems. 2016: 191-198.
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
9
|
from typing import Literal
|
|
@@ -40,11 +39,9 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
40
39
|
item_dense_features: list[DenseFeature] | None = None,
|
|
41
40
|
item_sparse_features: list[SparseFeature] | None = None,
|
|
42
41
|
item_sequence_features: list[SequenceFeature] | None = None,
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
user_mlp_params: dict | None = None,
|
|
43
|
+
item_mlp_params: dict | None = None,
|
|
45
44
|
embedding_dim: int = 64,
|
|
46
|
-
dnn_activation: str = "relu",
|
|
47
|
-
dnn_dropout: float = 0.0,
|
|
48
45
|
training_mode: Literal["pointwise", "pairwise", "listwise"] = "listwise",
|
|
49
46
|
num_negative_samples: int = 100,
|
|
50
47
|
temperature: float = 1.0,
|
|
@@ -75,8 +72,17 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
75
72
|
)
|
|
76
73
|
|
|
77
74
|
self.embedding_dim = embedding_dim
|
|
78
|
-
|
|
79
|
-
|
|
75
|
+
user_mlp_params = user_mlp_params or {}
|
|
76
|
+
item_mlp_params = item_mlp_params or {}
|
|
77
|
+
|
|
78
|
+
user_mlp_params.setdefault("hidden_dims", [256, 128, 64])
|
|
79
|
+
item_mlp_params.setdefault("hidden_dims", [256, 128, 64])
|
|
80
|
+
user_mlp_params.setdefault("activation", "relu")
|
|
81
|
+
user_mlp_params.setdefault("dropout", 0.0)
|
|
82
|
+
item_mlp_params.setdefault("activation", "relu")
|
|
83
|
+
item_mlp_params.setdefault("dropout", 0.0)
|
|
84
|
+
user_mlp_params.setdefault("output_dim", embedding_dim)
|
|
85
|
+
item_mlp_params.setdefault("output_dim", embedding_dim)
|
|
80
86
|
|
|
81
87
|
# User tower
|
|
82
88
|
user_features = []
|
|
@@ -99,14 +105,7 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
99
105
|
# Sequence features are pooled before entering the DNN
|
|
100
106
|
user_input_dim += feat.embedding_dim
|
|
101
107
|
|
|
102
|
-
|
|
103
|
-
self.user_dnn = MLP(
|
|
104
|
-
input_dim=user_input_dim,
|
|
105
|
-
hidden_dims=user_dnn_units,
|
|
106
|
-
output_dim=None,
|
|
107
|
-
dropout=dnn_dropout,
|
|
108
|
-
activation=dnn_activation,
|
|
109
|
-
)
|
|
108
|
+
self.user_dnn = MLP(input_dim=user_input_dim, **user_mlp_params)
|
|
110
109
|
|
|
111
110
|
# Item tower
|
|
112
111
|
item_features = []
|
|
@@ -128,14 +127,7 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
128
127
|
for feat in item_sequence_features or []:
|
|
129
128
|
item_input_dim += feat.embedding_dim
|
|
130
129
|
|
|
131
|
-
|
|
132
|
-
self.item_dnn = MLP(
|
|
133
|
-
input_dim=item_input_dim,
|
|
134
|
-
hidden_dims=item_dnn_units,
|
|
135
|
-
output_dim=None,
|
|
136
|
-
dropout=dnn_dropout,
|
|
137
|
-
activation=dnn_activation,
|
|
138
|
-
)
|
|
130
|
+
self.item_dnn = MLP(input_dim=item_input_dim, **item_mlp_params)
|
|
139
131
|
|
|
140
132
|
self.register_regularization_weights(
|
|
141
133
|
embedding_attr="user_embedding", include_modules=["user_dnn"]
|
|
@@ -4,8 +4,8 @@ Date: create on 01/12/2025
|
|
|
4
4
|
Checkpoint: edit on 11/12/2025
|
|
5
5
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
6
6
|
Reference:
|
|
7
|
-
[1] Meta AI. Generative Recommenders (HSTU encoder) — https://github.com/meta-recsys/generative-recommenders
|
|
8
|
-
[2] Ma W, Li P, Chen C, et al. Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv:2402.17152.
|
|
7
|
+
- [1] Meta AI. Generative Recommenders (HSTU encoder) — https://github.com/meta-recsys/generative-recommenders
|
|
8
|
+
- [2] Ma W, Li P, Chen C, et al. Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv:2402.17152.
|
|
9
9
|
|
|
10
10
|
Hierarchical Sequential Transduction Unit (HSTU) is the core encoder behind
|
|
11
11
|
Meta’s Generative Recommenders. It replaces softmax attention with lightweight
|