nextrec 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/layers.py +96 -46
- nextrec/basic/metrics.py +128 -114
- nextrec/basic/model.py +94 -91
- nextrec/basic/summary.py +36 -2
- nextrec/data/dataloader.py +2 -0
- nextrec/data/preprocessor.py +137 -5
- nextrec/loss/listwise.py +19 -6
- nextrec/loss/pairwise.py +6 -4
- nextrec/loss/pointwise.py +8 -6
- nextrec/models/multi_task/aitm.py +0 -0
- nextrec/models/multi_task/apg.py +0 -0
- nextrec/models/multi_task/cross_stitch.py +0 -0
- nextrec/models/multi_task/esmm.py +5 -28
- nextrec/models/multi_task/mmoe.py +6 -28
- nextrec/models/multi_task/pepnet.py +335 -0
- nextrec/models/multi_task/ple.py +21 -40
- nextrec/models/multi_task/poso.py +17 -39
- nextrec/models/multi_task/share_bottom.py +5 -28
- nextrec/models/multi_task/snr_trans.py +0 -0
- nextrec/models/ranking/afm.py +3 -27
- nextrec/models/ranking/autoint.py +5 -38
- nextrec/models/ranking/dcn.py +1 -26
- nextrec/models/ranking/dcn_v2.py +6 -34
- nextrec/models/ranking/deepfm.py +2 -29
- nextrec/models/ranking/dien.py +2 -28
- nextrec/models/ranking/din.py +2 -27
- nextrec/models/ranking/eulernet.py +3 -30
- nextrec/models/ranking/ffm.py +0 -26
- nextrec/models/ranking/fibinet.py +8 -32
- nextrec/models/ranking/fm.py +0 -29
- nextrec/models/ranking/lr.py +0 -30
- nextrec/models/ranking/masknet.py +4 -30
- nextrec/models/ranking/pnn.py +4 -28
- nextrec/models/ranking/widedeep.py +0 -32
- nextrec/models/ranking/xdeepfm.py +0 -30
- nextrec/models/retrieval/dssm.py +4 -28
- nextrec/models/retrieval/dssm_v2.py +4 -28
- nextrec/models/retrieval/mind.py +2 -22
- nextrec/models/retrieval/sdm.py +4 -24
- nextrec/models/retrieval/youtube_dnn.py +4 -25
- nextrec/models/sequential/hstu.py +0 -18
- nextrec/utils/model.py +91 -4
- nextrec/utils/types.py +35 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/METADATA +8 -6
- nextrec-0.4.24.dist-info/RECORD +86 -0
- nextrec-0.4.22.dist-info/RECORD +0 -81
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/WHEEL +0 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/licenses/LICENSE +0 -0
|
@@ -38,8 +38,6 @@ Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网
|
|
|
38
38
|
- 共享特征空间,减少工程开销
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
|
-
import torch.nn as nn
|
|
42
|
-
|
|
43
41
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
44
42
|
from nextrec.basic.layers import LR, MLP, EmbeddingLayer
|
|
45
43
|
from nextrec.basic.heads import TaskHead
|
|
@@ -61,40 +59,16 @@ class WideDeep(BaseModel):
|
|
|
61
59
|
sparse_features: list[SparseFeature],
|
|
62
60
|
sequence_features: list[SequenceFeature],
|
|
63
61
|
mlp_params: dict,
|
|
64
|
-
target: list[str] | str | None = None,
|
|
65
|
-
task: str | list[str] | None = None,
|
|
66
|
-
optimizer: str = "adam",
|
|
67
|
-
optimizer_params: dict | None = None,
|
|
68
|
-
loss: str | nn.Module | None = "bce",
|
|
69
|
-
loss_params: dict | list[dict] | None = None,
|
|
70
|
-
embedding_l1_reg=0.0,
|
|
71
|
-
dense_l1_reg=0.0,
|
|
72
|
-
embedding_l2_reg=0.0,
|
|
73
|
-
dense_l2_reg=0.0,
|
|
74
62
|
**kwargs,
|
|
75
63
|
):
|
|
76
64
|
|
|
77
|
-
if target is None:
|
|
78
|
-
target = []
|
|
79
|
-
optimizer_params = optimizer_params or {}
|
|
80
|
-
if loss is None:
|
|
81
|
-
loss = "bce"
|
|
82
|
-
|
|
83
65
|
super(WideDeep, self).__init__(
|
|
84
66
|
dense_features=dense_features,
|
|
85
67
|
sparse_features=sparse_features,
|
|
86
68
|
sequence_features=sequence_features,
|
|
87
|
-
target=target,
|
|
88
|
-
task=task or self.default_task,
|
|
89
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
90
|
-
dense_l1_reg=dense_l1_reg,
|
|
91
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
92
|
-
dense_l2_reg=dense_l2_reg,
|
|
93
69
|
**kwargs,
|
|
94
70
|
)
|
|
95
71
|
|
|
96
|
-
self.loss = loss
|
|
97
|
-
|
|
98
72
|
# Wide part: use all features for linear model
|
|
99
73
|
self.wide_features = sparse_features + sequence_features
|
|
100
74
|
# Deep part: use all features
|
|
@@ -117,12 +91,6 @@ class WideDeep(BaseModel):
|
|
|
117
91
|
self.register_regularization_weights(
|
|
118
92
|
embedding_attr="embedding", include_modules=["linear", "mlp"]
|
|
119
93
|
)
|
|
120
|
-
self.compile(
|
|
121
|
-
optimizer=optimizer,
|
|
122
|
-
optimizer_params=optimizer_params,
|
|
123
|
-
loss=loss,
|
|
124
|
-
loss_params=loss_params,
|
|
125
|
-
)
|
|
126
94
|
|
|
127
95
|
def forward(self, x):
|
|
128
96
|
# Deep part
|
|
@@ -121,41 +121,18 @@ class xDeepFM(BaseModel):
|
|
|
121
121
|
mlp_params: dict,
|
|
122
122
|
cin_size: list[int] | None = None,
|
|
123
123
|
split_half: bool = True,
|
|
124
|
-
target: list[str] | str | None = None,
|
|
125
|
-
task: str | list[str] | None = None,
|
|
126
|
-
optimizer: str = "adam",
|
|
127
|
-
optimizer_params: dict | None = None,
|
|
128
|
-
loss: str | nn.Module | None = "bce",
|
|
129
|
-
loss_params: dict | list[dict] | None = None,
|
|
130
|
-
embedding_l1_reg=0.0,
|
|
131
|
-
dense_l1_reg=0.0,
|
|
132
|
-
embedding_l2_reg=0.0,
|
|
133
|
-
dense_l2_reg=0.0,
|
|
134
124
|
**kwargs,
|
|
135
125
|
):
|
|
136
126
|
|
|
137
127
|
cin_size = cin_size or [128, 128]
|
|
138
|
-
if target is None:
|
|
139
|
-
target = []
|
|
140
|
-
optimizer_params = optimizer_params or {}
|
|
141
|
-
if loss is None:
|
|
142
|
-
loss = "bce"
|
|
143
128
|
|
|
144
129
|
super(xDeepFM, self).__init__(
|
|
145
130
|
dense_features=dense_features,
|
|
146
131
|
sparse_features=sparse_features,
|
|
147
132
|
sequence_features=sequence_features,
|
|
148
|
-
target=target,
|
|
149
|
-
task=task or self.default_task,
|
|
150
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
151
|
-
dense_l1_reg=dense_l1_reg,
|
|
152
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
153
|
-
dense_l2_reg=dense_l2_reg,
|
|
154
133
|
**kwargs,
|
|
155
134
|
)
|
|
156
135
|
|
|
157
|
-
self.loss = loss
|
|
158
|
-
|
|
159
136
|
# Linear part and CIN part: use sparse and sequence features
|
|
160
137
|
self.linear_features = sparse_features + sequence_features
|
|
161
138
|
|
|
@@ -195,13 +172,6 @@ class xDeepFM(BaseModel):
|
|
|
195
172
|
embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
|
|
196
173
|
)
|
|
197
174
|
|
|
198
|
-
self.compile(
|
|
199
|
-
optimizer=optimizer,
|
|
200
|
-
optimizer_params=optimizer_params,
|
|
201
|
-
loss=loss,
|
|
202
|
-
loss_params=loss_params,
|
|
203
|
-
)
|
|
204
|
-
|
|
205
175
|
def forward(self, x):
|
|
206
176
|
# Get embeddings for linear and CIN (sparse features only)
|
|
207
177
|
input_linear = self.embedding(
|
nextrec/models/retrieval/dssm.py
CHANGED
|
@@ -10,7 +10,6 @@ Reference:
|
|
|
10
10
|
from typing import Literal
|
|
11
11
|
|
|
12
12
|
import torch
|
|
13
|
-
import torch.nn as nn
|
|
14
13
|
|
|
15
14
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
16
15
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
@@ -54,17 +53,6 @@ class DSSM(BaseMatchModel):
|
|
|
54
53
|
dense_l1_reg=0.0,
|
|
55
54
|
embedding_l2_reg=0.0,
|
|
56
55
|
dense_l2_reg=0.0,
|
|
57
|
-
optimizer: str | torch.optim.Optimizer = "adam",
|
|
58
|
-
optimizer_params: dict | None = None,
|
|
59
|
-
scheduler: (
|
|
60
|
-
str
|
|
61
|
-
| torch.optim.lr_scheduler._LRScheduler
|
|
62
|
-
| type[torch.optim.lr_scheduler._LRScheduler]
|
|
63
|
-
| None
|
|
64
|
-
) = None,
|
|
65
|
-
scheduler_params: dict | None = None,
|
|
66
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
67
|
-
loss_params: dict | list[dict] | None = None,
|
|
68
56
|
**kwargs,
|
|
69
57
|
):
|
|
70
58
|
|
|
@@ -115,8 +103,8 @@ class DSSM(BaseMatchModel):
|
|
|
115
103
|
user_dnn_units = user_dnn_hidden_units + [embedding_dim]
|
|
116
104
|
self.user_dnn = MLP(
|
|
117
105
|
input_dim=user_input_dim,
|
|
118
|
-
|
|
119
|
-
|
|
106
|
+
hidden_dims=user_dnn_units,
|
|
107
|
+
output_dim=None,
|
|
120
108
|
dropout=dnn_dropout,
|
|
121
109
|
activation=dnn_activation,
|
|
122
110
|
)
|
|
@@ -146,8 +134,8 @@ class DSSM(BaseMatchModel):
|
|
|
146
134
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
147
135
|
self.item_dnn = MLP(
|
|
148
136
|
input_dim=item_input_dim,
|
|
149
|
-
|
|
150
|
-
|
|
137
|
+
hidden_dims=item_dnn_units,
|
|
138
|
+
output_dim=None,
|
|
151
139
|
dropout=dnn_dropout,
|
|
152
140
|
activation=dnn_activation,
|
|
153
141
|
)
|
|
@@ -159,18 +147,6 @@ class DSSM(BaseMatchModel):
|
|
|
159
147
|
embedding_attr="item_embedding", include_modules=["item_dnn"]
|
|
160
148
|
)
|
|
161
149
|
|
|
162
|
-
if optimizer_params is None:
|
|
163
|
-
optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5}
|
|
164
|
-
|
|
165
|
-
self.compile(
|
|
166
|
-
optimizer=optimizer,
|
|
167
|
-
optimizer_params=optimizer_params,
|
|
168
|
-
scheduler=scheduler,
|
|
169
|
-
scheduler_params=scheduler_params,
|
|
170
|
-
loss=loss,
|
|
171
|
-
loss_params=loss_params,
|
|
172
|
-
)
|
|
173
|
-
|
|
174
150
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
175
151
|
"""
|
|
176
152
|
User tower encodes user features into embeddings.
|
|
@@ -9,7 +9,6 @@ DSSM v2 - DSSM with pairwise training using BPR loss
|
|
|
9
9
|
from typing import Literal
|
|
10
10
|
|
|
11
11
|
import torch
|
|
12
|
-
import torch.nn as nn
|
|
13
12
|
|
|
14
13
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
15
14
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
@@ -50,17 +49,6 @@ class DSSM_v2(BaseMatchModel):
|
|
|
50
49
|
dense_l1_reg: float = 0.0,
|
|
51
50
|
embedding_l2_reg: float = 0.0,
|
|
52
51
|
dense_l2_reg: float = 0.0,
|
|
53
|
-
optimizer: str | torch.optim.Optimizer = "adam",
|
|
54
|
-
optimizer_params: dict | None = None,
|
|
55
|
-
scheduler: (
|
|
56
|
-
str
|
|
57
|
-
| torch.optim.lr_scheduler._LRScheduler
|
|
58
|
-
| type[torch.optim.lr_scheduler._LRScheduler]
|
|
59
|
-
| None
|
|
60
|
-
) = None,
|
|
61
|
-
scheduler_params: dict | None = None,
|
|
62
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
63
|
-
loss_params: dict | list[dict] | None = None,
|
|
64
52
|
**kwargs,
|
|
65
53
|
):
|
|
66
54
|
|
|
@@ -109,8 +97,8 @@ class DSSM_v2(BaseMatchModel):
|
|
|
109
97
|
user_dnn_units = user_dnn_hidden_units + [embedding_dim]
|
|
110
98
|
self.user_dnn = MLP(
|
|
111
99
|
input_dim=user_input_dim,
|
|
112
|
-
|
|
113
|
-
|
|
100
|
+
hidden_dims=user_dnn_units,
|
|
101
|
+
output_dim=None,
|
|
114
102
|
dropout=dnn_dropout,
|
|
115
103
|
activation=dnn_activation,
|
|
116
104
|
)
|
|
@@ -138,8 +126,8 @@ class DSSM_v2(BaseMatchModel):
|
|
|
138
126
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
139
127
|
self.item_dnn = MLP(
|
|
140
128
|
input_dim=item_input_dim,
|
|
141
|
-
|
|
142
|
-
|
|
129
|
+
hidden_dims=item_dnn_units,
|
|
130
|
+
output_dim=None,
|
|
143
131
|
dropout=dnn_dropout,
|
|
144
132
|
activation=dnn_activation,
|
|
145
133
|
)
|
|
@@ -151,18 +139,6 @@ class DSSM_v2(BaseMatchModel):
|
|
|
151
139
|
embedding_attr="item_embedding", include_modules=["item_dnn"]
|
|
152
140
|
)
|
|
153
141
|
|
|
154
|
-
if optimizer_params is None:
|
|
155
|
-
optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5}
|
|
156
|
-
|
|
157
|
-
self.compile(
|
|
158
|
-
optimizer=optimizer,
|
|
159
|
-
optimizer_params=optimizer_params,
|
|
160
|
-
scheduler=scheduler,
|
|
161
|
-
scheduler_params=scheduler_params,
|
|
162
|
-
loss=loss,
|
|
163
|
-
loss_params=loss_params,
|
|
164
|
-
)
|
|
165
|
-
|
|
166
142
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
167
143
|
"""User tower"""
|
|
168
144
|
all_user_features = (
|
nextrec/models/retrieval/mind.py
CHANGED
|
@@ -206,17 +206,6 @@ class MIND(BaseMatchModel):
|
|
|
206
206
|
dense_l1_reg=0.0,
|
|
207
207
|
embedding_l2_reg=0.0,
|
|
208
208
|
dense_l2_reg=0.0,
|
|
209
|
-
optimizer: str | torch.optim.Optimizer = "adam",
|
|
210
|
-
optimizer_params: dict | None = None,
|
|
211
|
-
scheduler: (
|
|
212
|
-
str
|
|
213
|
-
| torch.optim.lr_scheduler._LRScheduler
|
|
214
|
-
| type[torch.optim.lr_scheduler._LRScheduler]
|
|
215
|
-
| None
|
|
216
|
-
) = None,
|
|
217
|
-
scheduler_params: dict | None = None,
|
|
218
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
219
|
-
loss_params: dict | list[dict] | None = None,
|
|
220
209
|
**kwargs,
|
|
221
210
|
):
|
|
222
211
|
|
|
@@ -306,8 +295,8 @@ class MIND(BaseMatchModel):
|
|
|
306
295
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
307
296
|
self.item_dnn = MLP(
|
|
308
297
|
input_dim=item_input_dim,
|
|
309
|
-
|
|
310
|
-
|
|
298
|
+
hidden_dims=item_dnn_units,
|
|
299
|
+
output_dim=None,
|
|
311
300
|
dropout=dnn_dropout,
|
|
312
301
|
activation=dnn_activation,
|
|
313
302
|
)
|
|
@@ -322,15 +311,6 @@ class MIND(BaseMatchModel):
|
|
|
322
311
|
include_modules=["item_dnn"] if self.item_dnn else [],
|
|
323
312
|
)
|
|
324
313
|
|
|
325
|
-
self.compile(
|
|
326
|
-
optimizer=optimizer,
|
|
327
|
-
optimizer_params=optimizer_params,
|
|
328
|
-
scheduler=scheduler,
|
|
329
|
-
scheduler_params=scheduler_params,
|
|
330
|
-
loss=loss,
|
|
331
|
-
loss_params=loss_params,
|
|
332
|
-
)
|
|
333
|
-
|
|
334
314
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
335
315
|
"""
|
|
336
316
|
User tower with multi-interest extraction
|
nextrec/models/retrieval/sdm.py
CHANGED
|
@@ -53,17 +53,6 @@ class SDM(BaseMatchModel):
|
|
|
53
53
|
dense_l1_reg=0.0,
|
|
54
54
|
embedding_l2_reg=0.0,
|
|
55
55
|
dense_l2_reg=0.0,
|
|
56
|
-
optimizer: str | torch.optim.Optimizer = "adam",
|
|
57
|
-
optimizer_params: dict | None = None,
|
|
58
|
-
scheduler: (
|
|
59
|
-
str
|
|
60
|
-
| torch.optim.lr_scheduler._LRScheduler
|
|
61
|
-
| type[torch.optim.lr_scheduler._LRScheduler]
|
|
62
|
-
| None
|
|
63
|
-
) = None,
|
|
64
|
-
scheduler_params: dict | None = None,
|
|
65
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
66
|
-
loss_params: dict | list[dict] | None = None,
|
|
67
56
|
**kwargs,
|
|
68
57
|
):
|
|
69
58
|
|
|
@@ -142,8 +131,8 @@ class SDM(BaseMatchModel):
|
|
|
142
131
|
# User DNN to final embedding
|
|
143
132
|
self.user_dnn = MLP(
|
|
144
133
|
input_dim=user_final_dim,
|
|
145
|
-
|
|
146
|
-
|
|
134
|
+
hidden_dims=[rnn_hidden_size * 2, embedding_dim],
|
|
135
|
+
output_dim=None,
|
|
147
136
|
dropout=dnn_dropout,
|
|
148
137
|
activation=dnn_activation,
|
|
149
138
|
)
|
|
@@ -173,8 +162,8 @@ class SDM(BaseMatchModel):
|
|
|
173
162
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
174
163
|
self.item_dnn = MLP(
|
|
175
164
|
input_dim=item_input_dim,
|
|
176
|
-
|
|
177
|
-
|
|
165
|
+
hidden_dims=item_dnn_units,
|
|
166
|
+
output_dim=None,
|
|
178
167
|
dropout=dnn_dropout,
|
|
179
168
|
activation=dnn_activation,
|
|
180
169
|
)
|
|
@@ -189,15 +178,6 @@ class SDM(BaseMatchModel):
|
|
|
189
178
|
include_modules=["item_dnn"] if self.item_dnn else [],
|
|
190
179
|
)
|
|
191
180
|
|
|
192
|
-
self.compile(
|
|
193
|
-
optimizer=optimizer,
|
|
194
|
-
optimizer_params=optimizer_params,
|
|
195
|
-
scheduler=scheduler,
|
|
196
|
-
scheduler_params=scheduler_params,
|
|
197
|
-
loss=loss,
|
|
198
|
-
loss_params=loss_params,
|
|
199
|
-
)
|
|
200
|
-
|
|
201
181
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
202
182
|
seq_feature = self.user_sequence_features[0]
|
|
203
183
|
seq_input = user_input[seq_feature.name]
|
|
@@ -10,7 +10,6 @@ Reference:
|
|
|
10
10
|
from typing import Literal
|
|
11
11
|
|
|
12
12
|
import torch
|
|
13
|
-
import torch.nn as nn
|
|
14
13
|
|
|
15
14
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
16
15
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
@@ -54,17 +53,6 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
54
53
|
dense_l1_reg=0.0,
|
|
55
54
|
embedding_l2_reg=0.0,
|
|
56
55
|
dense_l2_reg=0.0,
|
|
57
|
-
optimizer: str | torch.optim.Optimizer = "adam",
|
|
58
|
-
optimizer_params: dict | None = None,
|
|
59
|
-
scheduler: (
|
|
60
|
-
str
|
|
61
|
-
| torch.optim.lr_scheduler._LRScheduler
|
|
62
|
-
| type[torch.optim.lr_scheduler._LRScheduler]
|
|
63
|
-
| None
|
|
64
|
-
) = None,
|
|
65
|
-
scheduler_params: dict | None = None,
|
|
66
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
67
|
-
loss_params: dict | list[dict] | None = None,
|
|
68
56
|
**kwargs,
|
|
69
57
|
):
|
|
70
58
|
|
|
@@ -114,8 +102,8 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
114
102
|
user_dnn_units = user_dnn_hidden_units + [embedding_dim]
|
|
115
103
|
self.user_dnn = MLP(
|
|
116
104
|
input_dim=user_input_dim,
|
|
117
|
-
|
|
118
|
-
|
|
105
|
+
hidden_dims=user_dnn_units,
|
|
106
|
+
output_dim=None,
|
|
119
107
|
dropout=dnn_dropout,
|
|
120
108
|
activation=dnn_activation,
|
|
121
109
|
)
|
|
@@ -143,8 +131,8 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
143
131
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
144
132
|
self.item_dnn = MLP(
|
|
145
133
|
input_dim=item_input_dim,
|
|
146
|
-
|
|
147
|
-
|
|
134
|
+
hidden_dims=item_dnn_units,
|
|
135
|
+
output_dim=None,
|
|
148
136
|
dropout=dnn_dropout,
|
|
149
137
|
activation=dnn_activation,
|
|
150
138
|
)
|
|
@@ -156,15 +144,6 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
156
144
|
embedding_attr="item_embedding", include_modules=["item_dnn"]
|
|
157
145
|
)
|
|
158
146
|
|
|
159
|
-
self.compile(
|
|
160
|
-
optimizer=optimizer,
|
|
161
|
-
optimizer_params=optimizer_params,
|
|
162
|
-
scheduler=scheduler,
|
|
163
|
-
scheduler_params=scheduler_params,
|
|
164
|
-
loss=loss,
|
|
165
|
-
loss_params=loss_params,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
147
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
169
148
|
"""
|
|
170
149
|
User tower to encode historical behavior sequences and user features.
|
|
@@ -323,11 +323,6 @@ class HSTU(BaseModel):
|
|
|
323
323
|
tie_embeddings: bool = True,
|
|
324
324
|
target: Optional[list[str] | str] = None,
|
|
325
325
|
task: str | list[str] | None = None,
|
|
326
|
-
optimizer: str = "adam",
|
|
327
|
-
optimizer_params: Optional[dict] = None,
|
|
328
|
-
scheduler: Optional[str] = None,
|
|
329
|
-
scheduler_params: Optional[dict] = None,
|
|
330
|
-
loss_params: Optional[dict] = None,
|
|
331
326
|
embedding_l1_reg: float = 0.0,
|
|
332
327
|
dense_l1_reg: float = 0.0,
|
|
333
328
|
embedding_l2_reg: float = 0.0,
|
|
@@ -426,19 +421,6 @@ class HSTU(BaseModel):
|
|
|
426
421
|
self.register_buffer("causal_mask", torch.empty(0), persistent=False)
|
|
427
422
|
self.ignore_index = self.padding_idx if self.padding_idx is not None else -100
|
|
428
423
|
|
|
429
|
-
optimizer_params = optimizer_params or {}
|
|
430
|
-
scheduler_params = scheduler_params or {}
|
|
431
|
-
loss_params = loss_params or {}
|
|
432
|
-
loss_params.setdefault("ignore_index", self.ignore_index)
|
|
433
|
-
|
|
434
|
-
self.compile(
|
|
435
|
-
optimizer=optimizer,
|
|
436
|
-
optimizer_params=optimizer_params,
|
|
437
|
-
scheduler=scheduler,
|
|
438
|
-
scheduler_params=scheduler_params,
|
|
439
|
-
loss="crossentropy",
|
|
440
|
-
loss_params=loss_params,
|
|
441
|
-
)
|
|
442
424
|
self.register_regularization_weights(
|
|
443
425
|
embedding_attr="token_embedding",
|
|
444
426
|
include_modules=["layers", "lm_head", "context_proj"],
|
nextrec/utils/model.py
CHANGED
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
Model-related utilities for NextRec
|
|
3
3
|
|
|
4
4
|
Date: create on 03/12/2025
|
|
5
|
-
Checkpoint: edit on
|
|
5
|
+
Checkpoint: edit on 29/12/2025
|
|
6
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from collections import OrderedDict
|
|
10
10
|
|
|
11
11
|
import torch
|
|
12
|
+
from torch import nn
|
|
12
13
|
|
|
13
14
|
from nextrec.loss import (
|
|
14
15
|
ApproxNDCGLoss,
|
|
@@ -19,6 +20,14 @@ from nextrec.loss import (
|
|
|
19
20
|
SampledSoftmaxLoss,
|
|
20
21
|
TripletLoss,
|
|
21
22
|
)
|
|
23
|
+
from nextrec.utils.types import (
|
|
24
|
+
LossName,
|
|
25
|
+
OptimizerName,
|
|
26
|
+
SchedulerName,
|
|
27
|
+
TrainingModeName,
|
|
28
|
+
TaskTypeName,
|
|
29
|
+
MetricsName,
|
|
30
|
+
)
|
|
22
31
|
|
|
23
32
|
|
|
24
33
|
def merge_features(primary, secondary) -> list:
|
|
@@ -29,9 +38,9 @@ def merge_features(primary, secondary) -> list:
|
|
|
29
38
|
|
|
30
39
|
|
|
31
40
|
def get_mlp_output_dim(params: dict, fallback: int) -> int:
|
|
32
|
-
|
|
33
|
-
if
|
|
34
|
-
return
|
|
41
|
+
hidden_dims = params.get("hidden_dims")
|
|
42
|
+
if hidden_dims:
|
|
43
|
+
return hidden_dims[-1]
|
|
35
44
|
return fallback
|
|
36
45
|
|
|
37
46
|
|
|
@@ -72,6 +81,84 @@ def compute_pair_scores(model, data, batch_size: int = 512):
|
|
|
72
81
|
return scores.detach().cpu().numpy()
|
|
73
82
|
|
|
74
83
|
|
|
84
|
+
def get_training_modes(
|
|
85
|
+
training_mode,
|
|
86
|
+
nums_task: int,
|
|
87
|
+
valid_modes: set[str] | None = None,
|
|
88
|
+
) -> list:
|
|
89
|
+
valid_modes = valid_modes or {"pointwise", "pairwise", "listwise"}
|
|
90
|
+
if isinstance(training_mode, list):
|
|
91
|
+
training_modes = list(training_mode)
|
|
92
|
+
if len(training_modes) != nums_task:
|
|
93
|
+
raise ValueError(
|
|
94
|
+
"[BaseModel-init Error] training_mode list length must match number of tasks."
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
training_modes = [training_mode] * nums_task
|
|
98
|
+
if any(mode not in valid_modes for mode in training_modes):
|
|
99
|
+
raise ValueError(
|
|
100
|
+
"[BaseModel-init Error] training_mode must be one of {'pointwise', 'pairwise', 'listwise'}."
|
|
101
|
+
)
|
|
102
|
+
return training_modes
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_loss_list(
|
|
106
|
+
loss,
|
|
107
|
+
training_modes: list[str],
|
|
108
|
+
nums_task: int,
|
|
109
|
+
default_losses: dict[str, str],
|
|
110
|
+
):
|
|
111
|
+
effective_loss = loss
|
|
112
|
+
if effective_loss is None:
|
|
113
|
+
loss_list = [default_losses[mode] for mode in training_modes]
|
|
114
|
+
elif isinstance(effective_loss, list):
|
|
115
|
+
if not effective_loss:
|
|
116
|
+
loss_list = [default_losses[mode] for mode in training_modes]
|
|
117
|
+
else:
|
|
118
|
+
if len(effective_loss) != nums_task:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"[BaseModel-compile Error] Number of loss functions ({len(effective_loss)}) must match number of tasks ({nums_task})."
|
|
121
|
+
)
|
|
122
|
+
loss_list = list(effective_loss)
|
|
123
|
+
else:
|
|
124
|
+
loss_list = [effective_loss] * nums_task
|
|
125
|
+
|
|
126
|
+
for idx, mode in enumerate(training_modes):
|
|
127
|
+
if isinstance(loss_list[idx], str) and loss_list[idx] in {
|
|
128
|
+
"bce",
|
|
129
|
+
"binary_crossentropy",
|
|
130
|
+
}:
|
|
131
|
+
if mode in {"pairwise", "listwise"}:
|
|
132
|
+
loss_list[idx] = default_losses[mode]
|
|
133
|
+
return loss_list
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def resolve_loss_weights(loss_weights, nums_task: int):
|
|
137
|
+
if loss_weights is None:
|
|
138
|
+
return None
|
|
139
|
+
if nums_task == 1:
|
|
140
|
+
if isinstance(loss_weights, (list, tuple)):
|
|
141
|
+
if len(loss_weights) != 1:
|
|
142
|
+
raise ValueError(
|
|
143
|
+
"[BaseModel-compile Error] loss_weights list must have exactly one element for single-task setup."
|
|
144
|
+
)
|
|
145
|
+
loss_weights = loss_weights[0]
|
|
146
|
+
return [float(loss_weights)]
|
|
147
|
+
if isinstance(loss_weights, (int, float)):
|
|
148
|
+
weights = [float(loss_weights)] * nums_task
|
|
149
|
+
elif isinstance(loss_weights, (list, tuple)):
|
|
150
|
+
weights = [float(w) for w in loss_weights]
|
|
151
|
+
if len(weights) != nums_task:
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"[BaseModel-compile Error] Number of loss_weights ({len(weights)}) must match number of tasks ({nums_task})."
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
raise TypeError(
|
|
157
|
+
f"[BaseModel-compile Error] loss_weights must be int, float, list or tuple, got {type(loss_weights)}"
|
|
158
|
+
)
|
|
159
|
+
return weights
|
|
160
|
+
|
|
161
|
+
|
|
75
162
|
def prepare_ranking_targets(
|
|
76
163
|
y_pred: torch.Tensor, y_true: torch.Tensor
|
|
77
164
|
) -> tuple[torch.Tensor, torch.Tensor]:
|
nextrec/utils/types.py
CHANGED
|
@@ -61,3 +61,38 @@ ActivationName = Literal[
|
|
|
61
61
|
TrainingModeName = Literal["pointwise", "pairwise", "listwise"]
|
|
62
62
|
|
|
63
63
|
TaskTypeName = Literal["binary", "regression"]
|
|
64
|
+
|
|
65
|
+
MetricsName = Literal[
|
|
66
|
+
"auc",
|
|
67
|
+
"gauc",
|
|
68
|
+
"ks",
|
|
69
|
+
"logloss",
|
|
70
|
+
"accuracy",
|
|
71
|
+
"acc",
|
|
72
|
+
"precision",
|
|
73
|
+
"recall",
|
|
74
|
+
"f1",
|
|
75
|
+
"micro_f1",
|
|
76
|
+
"macro_f1",
|
|
77
|
+
"mse",
|
|
78
|
+
"mae",
|
|
79
|
+
"rmse",
|
|
80
|
+
"r2",
|
|
81
|
+
"mape",
|
|
82
|
+
"msle",
|
|
83
|
+
"auc",
|
|
84
|
+
"gauc",
|
|
85
|
+
"precision@10",
|
|
86
|
+
"hitrate@10",
|
|
87
|
+
"map@10",
|
|
88
|
+
"cosine",
|
|
89
|
+
"recall@5",
|
|
90
|
+
"recall@10",
|
|
91
|
+
"recall@20",
|
|
92
|
+
"ndcg@5",
|
|
93
|
+
"ndcg@10",
|
|
94
|
+
"ndcg@20",
|
|
95
|
+
"mrr@5",
|
|
96
|
+
"mrr@10",
|
|
97
|
+
"mrr@20",
|
|
98
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nextrec
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.24
|
|
4
4
|
Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
|
|
5
5
|
Project-URL: Homepage, https://github.com/zerolovesea/NextRec
|
|
6
6
|
Project-URL: Repository, https://github.com/zerolovesea/NextRec
|
|
@@ -69,7 +69,7 @@ Description-Content-Type: text/markdown
|
|
|
69
69
|

|
|
70
70
|

|
|
71
71
|

|
|
72
|
-

|
|
73
73
|
[](https://deepwiki.com/zerolovesea/NextRec)
|
|
74
74
|
|
|
75
75
|
中文文档 | [English Version](README_en.md)
|
|
@@ -182,7 +182,7 @@ sequence_features = [
|
|
|
182
182
|
SequenceFeature(name='sequence_1', vocab_size=int(df['sequence_1'].apply(lambda x: max(x)).max() + 1), embedding_dim=16, padding_idx=0, embedding_name='sparse_0_emb'),]
|
|
183
183
|
|
|
184
184
|
mlp_params = {
|
|
185
|
-
"
|
|
185
|
+
"hidden_dims": [256, 128, 64],
|
|
186
186
|
"activation": "relu",
|
|
187
187
|
"dropout": 0.3,
|
|
188
188
|
}
|
|
@@ -191,6 +191,8 @@ model = DIN(
|
|
|
191
191
|
dense_features=dense_features,
|
|
192
192
|
sparse_features=sparse_features,
|
|
193
193
|
sequence_features=sequence_features,
|
|
194
|
+
behavior_feature_name="sequence_0",
|
|
195
|
+
candidate_feature_name="item_id",
|
|
194
196
|
mlp_params=mlp_params,
|
|
195
197
|
attention_hidden_units=[80, 40],
|
|
196
198
|
attention_activation='sigmoid',
|
|
@@ -204,7 +206,7 @@ model = DIN(
|
|
|
204
206
|
session_id="din_tutorial", # 实验id,用于存放训练日志
|
|
205
207
|
)
|
|
206
208
|
|
|
207
|
-
#
|
|
209
|
+
# 编译模型,优化器/损失/学习率调度器统一在 compile 中设置
|
|
208
210
|
model.compile(
|
|
209
211
|
optimizer = "adam",
|
|
210
212
|
optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5},
|
|
@@ -247,11 +249,11 @@ nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
|
247
249
|
|
|
248
250
|
预测结果固定保存到 `{checkpoint_path}/predictions/{name}.{save_data_format}`。
|
|
249
251
|
|
|
250
|
-
> 截止当前版本0.4.
|
|
252
|
+
> 截止当前版本0.4.24,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
|
|
251
253
|
|
|
252
254
|
## 兼容平台
|
|
253
255
|
|
|
254
|
-
当前最新版本为0.4.
|
|
256
|
+
当前最新版本为0.4.24,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
|
|
255
257
|
|
|
256
258
|
| 平台 | 配置 |
|
|
257
259
|
|------|------|
|