nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +1 -1
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -5
- nextrec/basic/callback.py +1 -0
- nextrec/basic/features.py +30 -22
- nextrec/basic/layers.py +244 -113
- nextrec/basic/loggers.py +62 -43
- nextrec/basic/metrics.py +268 -119
- nextrec/basic/model.py +1373 -443
- nextrec/basic/session.py +10 -3
- nextrec/cli.py +498 -0
- nextrec/data/__init__.py +19 -25
- nextrec/data/batch_utils.py +11 -3
- nextrec/data/data_processing.py +42 -24
- nextrec/data/data_utils.py +26 -15
- nextrec/data/dataloader.py +303 -96
- nextrec/data/preprocessor.py +320 -199
- nextrec/loss/listwise.py +17 -9
- nextrec/loss/loss_utils.py +7 -8
- nextrec/loss/pairwise.py +2 -0
- nextrec/loss/pointwise.py +30 -12
- nextrec/models/generative/hstu.py +106 -40
- nextrec/models/match/dssm.py +82 -69
- nextrec/models/match/dssm_v2.py +72 -58
- nextrec/models/match/mind.py +175 -108
- nextrec/models/match/sdm.py +104 -88
- nextrec/models/match/youtube_dnn.py +73 -60
- nextrec/models/multi_task/esmm.py +53 -39
- nextrec/models/multi_task/mmoe.py +70 -47
- nextrec/models/multi_task/ple.py +107 -50
- nextrec/models/multi_task/poso.py +121 -41
- nextrec/models/multi_task/share_bottom.py +54 -38
- nextrec/models/ranking/afm.py +172 -45
- nextrec/models/ranking/autoint.py +84 -61
- nextrec/models/ranking/dcn.py +59 -42
- nextrec/models/ranking/dcn_v2.py +64 -23
- nextrec/models/ranking/deepfm.py +36 -26
- nextrec/models/ranking/dien.py +158 -102
- nextrec/models/ranking/din.py +88 -60
- nextrec/models/ranking/fibinet.py +55 -35
- nextrec/models/ranking/fm.py +32 -26
- nextrec/models/ranking/masknet.py +95 -34
- nextrec/models/ranking/pnn.py +34 -31
- nextrec/models/ranking/widedeep.py +37 -29
- nextrec/models/ranking/xdeepfm.py +63 -41
- nextrec/utils/__init__.py +61 -32
- nextrec/utils/config.py +490 -0
- nextrec/utils/device.py +52 -12
- nextrec/utils/distributed.py +141 -0
- nextrec/utils/embedding.py +1 -0
- nextrec/utils/feature.py +1 -0
- nextrec/utils/file.py +32 -11
- nextrec/utils/initializer.py +61 -16
- nextrec/utils/optimizer.py +25 -9
- nextrec/utils/synthetic_data.py +531 -0
- nextrec/utils/tensor.py +24 -13
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
- nextrec-0.4.2.dist-info/RECORD +69 -0
- nextrec-0.4.2.dist-info/entry_points.txt +2 -0
- nextrec-0.3.6.dist-info/RECORD +0 -64
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
nextrec/models/match/sdm.py
CHANGED
|
@@ -6,6 +6,7 @@ Reference:
|
|
|
6
6
|
[1] Ying H, Zhuang F, Zhang F, et al. Sequential recommender system based on hierarchical attention networks[C]
|
|
7
7
|
//IJCAI. 2018: 3926-3932.
|
|
8
8
|
"""
|
|
9
|
+
|
|
9
10
|
import torch
|
|
10
11
|
import torch.nn as nn
|
|
11
12
|
import torch.nn.functional as F
|
|
@@ -20,46 +21,53 @@ class SDM(BaseMatchModel):
|
|
|
20
21
|
@property
|
|
21
22
|
def model_name(self) -> str:
|
|
22
23
|
return "SDM"
|
|
23
|
-
|
|
24
|
+
|
|
24
25
|
@property
|
|
25
26
|
def support_training_modes(self) -> list[str]:
|
|
26
|
-
return [
|
|
27
|
-
|
|
28
|
-
def __init__(
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
27
|
+
return ["pointwise"]
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
user_dense_features: list[DenseFeature] | None = None,
|
|
32
|
+
user_sparse_features: list[SparseFeature] | None = None,
|
|
33
|
+
user_sequence_features: list[SequenceFeature] | None = None,
|
|
34
|
+
item_dense_features: list[DenseFeature] | None = None,
|
|
35
|
+
item_sparse_features: list[SparseFeature] | None = None,
|
|
36
|
+
item_sequence_features: list[SequenceFeature] | None = None,
|
|
37
|
+
embedding_dim: int = 64,
|
|
38
|
+
rnn_type: Literal["GRU", "LSTM"] = "GRU",
|
|
39
|
+
rnn_hidden_size: int = 64,
|
|
40
|
+
rnn_num_layers: int = 1,
|
|
41
|
+
rnn_dropout: float = 0.0,
|
|
42
|
+
use_short_term: bool = True,
|
|
43
|
+
use_long_term: bool = True,
|
|
44
|
+
item_dnn_hidden_units: list[int] = [256, 128],
|
|
45
|
+
dnn_activation: str = "relu",
|
|
46
|
+
dnn_dropout: float = 0.0,
|
|
47
|
+
training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
|
|
48
|
+
num_negative_samples: int = 4,
|
|
49
|
+
temperature: float = 1.0,
|
|
50
|
+
similarity_metric: Literal["dot", "cosine", "euclidean"] = "dot",
|
|
51
|
+
device: str = "cpu",
|
|
52
|
+
embedding_l1_reg: float = 0.0,
|
|
53
|
+
dense_l1_reg: float = 0.0,
|
|
54
|
+
embedding_l2_reg: float = 0.0,
|
|
55
|
+
dense_l2_reg: float = 0.0,
|
|
56
|
+
early_stop_patience: int = 20,
|
|
57
|
+
optimizer: str | torch.optim.Optimizer = "adam",
|
|
58
|
+
optimizer_params: dict | None = None,
|
|
59
|
+
scheduler: (
|
|
60
|
+
str
|
|
61
|
+
| torch.optim.lr_scheduler._LRScheduler
|
|
62
|
+
| type[torch.optim.lr_scheduler._LRScheduler]
|
|
63
|
+
| None
|
|
64
|
+
) = None,
|
|
65
|
+
scheduler_params: dict | None = None,
|
|
66
|
+
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
67
|
+
loss_params: dict | list[dict] | None = None,
|
|
68
|
+
**kwargs,
|
|
69
|
+
):
|
|
70
|
+
|
|
63
71
|
super(SDM, self).__init__(
|
|
64
72
|
user_dense_features=user_dense_features,
|
|
65
73
|
user_sparse_features=user_sparse_features,
|
|
@@ -76,17 +84,16 @@ class SDM(BaseMatchModel):
|
|
|
76
84
|
dense_l1_reg=dense_l1_reg,
|
|
77
85
|
embedding_l2_reg=embedding_l2_reg,
|
|
78
86
|
dense_l2_reg=dense_l2_reg,
|
|
79
|
-
|
|
80
|
-
**kwargs
|
|
87
|
+
**kwargs,
|
|
81
88
|
)
|
|
82
|
-
|
|
89
|
+
|
|
83
90
|
self.embedding_dim = embedding_dim
|
|
84
91
|
self.rnn_type = rnn_type
|
|
85
92
|
self.rnn_hidden_size = rnn_hidden_size
|
|
86
93
|
self.use_short_term = use_short_term
|
|
87
94
|
self.use_long_term = use_long_term
|
|
88
95
|
self.item_dnn_hidden_units = item_dnn_hidden_units
|
|
89
|
-
|
|
96
|
+
|
|
90
97
|
# User tower
|
|
91
98
|
user_features = []
|
|
92
99
|
if user_dense_features:
|
|
@@ -95,54 +102,54 @@ class SDM(BaseMatchModel):
|
|
|
95
102
|
user_features.extend(user_sparse_features)
|
|
96
103
|
if user_sequence_features:
|
|
97
104
|
user_features.extend(user_sequence_features)
|
|
98
|
-
|
|
105
|
+
|
|
99
106
|
if len(user_features) > 0:
|
|
100
107
|
self.user_embedding = EmbeddingLayer(user_features)
|
|
101
|
-
|
|
108
|
+
|
|
102
109
|
if not user_sequence_features or len(user_sequence_features) == 0:
|
|
103
110
|
raise ValueError("SDM requires at least one user sequence feature")
|
|
104
|
-
|
|
111
|
+
|
|
105
112
|
seq_emb_dim = user_sequence_features[0].embedding_dim
|
|
106
|
-
|
|
107
|
-
if rnn_type ==
|
|
113
|
+
|
|
114
|
+
if rnn_type == "GRU":
|
|
108
115
|
self.rnn = nn.GRU(
|
|
109
116
|
input_size=seq_emb_dim,
|
|
110
117
|
hidden_size=rnn_hidden_size,
|
|
111
118
|
num_layers=rnn_num_layers,
|
|
112
119
|
batch_first=True,
|
|
113
|
-
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0
|
|
120
|
+
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0,
|
|
114
121
|
)
|
|
115
|
-
elif rnn_type ==
|
|
122
|
+
elif rnn_type == "LSTM":
|
|
116
123
|
self.rnn = nn.LSTM(
|
|
117
124
|
input_size=seq_emb_dim,
|
|
118
125
|
hidden_size=rnn_hidden_size,
|
|
119
126
|
num_layers=rnn_num_layers,
|
|
120
127
|
batch_first=True,
|
|
121
|
-
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0
|
|
128
|
+
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0,
|
|
122
129
|
)
|
|
123
130
|
else:
|
|
124
131
|
raise ValueError(f"Unknown RNN type: {rnn_type}")
|
|
125
|
-
|
|
132
|
+
|
|
126
133
|
user_final_dim = 0
|
|
127
134
|
if use_long_term:
|
|
128
|
-
user_final_dim += rnn_hidden_size
|
|
135
|
+
user_final_dim += rnn_hidden_size
|
|
129
136
|
if use_short_term:
|
|
130
|
-
user_final_dim += seq_emb_dim
|
|
131
|
-
|
|
137
|
+
user_final_dim += seq_emb_dim
|
|
138
|
+
|
|
132
139
|
for feat in user_dense_features or []:
|
|
133
140
|
user_final_dim += 1
|
|
134
141
|
for feat in user_sparse_features or []:
|
|
135
142
|
user_final_dim += feat.embedding_dim
|
|
136
|
-
|
|
143
|
+
|
|
137
144
|
# User DNN to final embedding
|
|
138
145
|
self.user_dnn = MLP(
|
|
139
146
|
input_dim=user_final_dim,
|
|
140
147
|
dims=[rnn_hidden_size * 2, embedding_dim],
|
|
141
148
|
output_layer=False,
|
|
142
149
|
dropout=dnn_dropout,
|
|
143
|
-
activation=dnn_activation
|
|
150
|
+
activation=dnn_activation,
|
|
144
151
|
)
|
|
145
|
-
|
|
152
|
+
|
|
146
153
|
# Item tower
|
|
147
154
|
item_features = []
|
|
148
155
|
if item_dense_features:
|
|
@@ -151,10 +158,10 @@ class SDM(BaseMatchModel):
|
|
|
151
158
|
item_features.extend(item_sparse_features)
|
|
152
159
|
if item_sequence_features:
|
|
153
160
|
item_features.extend(item_sequence_features)
|
|
154
|
-
|
|
161
|
+
|
|
155
162
|
if len(item_features) > 0:
|
|
156
163
|
self.item_embedding = EmbeddingLayer(item_features)
|
|
157
|
-
|
|
164
|
+
|
|
158
165
|
item_input_dim = 0
|
|
159
166
|
for feat in item_dense_features or []:
|
|
160
167
|
item_input_dim += 1
|
|
@@ -162,7 +169,7 @@ class SDM(BaseMatchModel):
|
|
|
162
169
|
item_input_dim += feat.embedding_dim
|
|
163
170
|
for feat in item_sequence_features or []:
|
|
164
171
|
item_input_dim += feat.embedding_dim
|
|
165
|
-
|
|
172
|
+
|
|
166
173
|
# Item DNN
|
|
167
174
|
if len(item_dnn_hidden_units) > 0:
|
|
168
175
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
@@ -171,20 +178,19 @@ class SDM(BaseMatchModel):
|
|
|
171
178
|
dims=item_dnn_units,
|
|
172
179
|
output_layer=False,
|
|
173
180
|
dropout=dnn_dropout,
|
|
174
|
-
activation=dnn_activation
|
|
181
|
+
activation=dnn_activation,
|
|
175
182
|
)
|
|
176
183
|
else:
|
|
177
184
|
self.item_dnn = None
|
|
178
|
-
|
|
185
|
+
|
|
179
186
|
self.register_regularization_weights(
|
|
180
|
-
embedding_attr=
|
|
181
|
-
include_modules=['rnn', 'user_dnn']
|
|
187
|
+
embedding_attr="user_embedding", include_modules=["rnn", "user_dnn"]
|
|
182
188
|
)
|
|
183
189
|
self.register_regularization_weights(
|
|
184
|
-
embedding_attr=
|
|
185
|
-
include_modules=[
|
|
190
|
+
embedding_attr="item_embedding",
|
|
191
|
+
include_modules=["item_dnn"] if self.item_dnn else [],
|
|
186
192
|
)
|
|
187
|
-
|
|
193
|
+
|
|
188
194
|
self.compile(
|
|
189
195
|
optimizer=optimizer,
|
|
190
196
|
optimizer_params=optimizer_params,
|
|
@@ -195,38 +201,44 @@ class SDM(BaseMatchModel):
|
|
|
195
201
|
)
|
|
196
202
|
|
|
197
203
|
self.to(device)
|
|
198
|
-
|
|
204
|
+
|
|
199
205
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
200
206
|
seq_feature = self.user_sequence_features[0]
|
|
201
207
|
seq_input = user_input[seq_feature.name]
|
|
202
|
-
|
|
208
|
+
|
|
203
209
|
embed = self.user_embedding.embed_dict[seq_feature.embedding_name]
|
|
204
210
|
seq_emb = embed(seq_input.long()) # [batch_size, seq_len, seq_emb_dim]
|
|
205
|
-
|
|
206
|
-
if self.rnn_type ==
|
|
207
|
-
rnn_output, hidden = self.rnn(
|
|
208
|
-
|
|
211
|
+
|
|
212
|
+
if self.rnn_type == "GRU":
|
|
213
|
+
rnn_output, hidden = self.rnn(
|
|
214
|
+
seq_emb
|
|
215
|
+
) # hidden: [num_layers, batch, hidden_size]
|
|
216
|
+
elif self.rnn_type == "LSTM":
|
|
209
217
|
rnn_output, (hidden, cell) = self.rnn(seq_emb)
|
|
210
|
-
|
|
218
|
+
|
|
211
219
|
features_list = []
|
|
212
|
-
|
|
220
|
+
|
|
213
221
|
if self.use_long_term:
|
|
214
222
|
if self.rnn.num_layers > 1:
|
|
215
223
|
long_term = hidden[-1, :, :] # [batch_size, hidden_size]
|
|
216
224
|
else:
|
|
217
225
|
long_term = hidden.squeeze(0) # [batch_size, hidden_size]
|
|
218
226
|
features_list.append(long_term)
|
|
219
|
-
|
|
227
|
+
|
|
220
228
|
if self.use_short_term:
|
|
221
|
-
mask = (
|
|
229
|
+
mask = (
|
|
230
|
+
seq_input != seq_feature.padding_idx
|
|
231
|
+
).float() # [batch_size, seq_len]
|
|
222
232
|
seq_lengths = mask.sum(dim=1).long() - 1 # [batch_size]
|
|
223
233
|
seq_lengths = torch.clamp(seq_lengths, min=0)
|
|
224
|
-
|
|
234
|
+
|
|
225
235
|
batch_size = seq_emb.size(0)
|
|
226
236
|
batch_indices = torch.arange(batch_size, device=seq_emb.device)
|
|
227
|
-
short_term = seq_emb[
|
|
237
|
+
short_term = seq_emb[
|
|
238
|
+
batch_indices, seq_lengths, :
|
|
239
|
+
] # [batch_size, seq_emb_dim]
|
|
228
240
|
features_list.append(short_term)
|
|
229
|
-
|
|
241
|
+
|
|
230
242
|
if self.user_dense_features:
|
|
231
243
|
dense_features = []
|
|
232
244
|
for feat in self.user_dense_features:
|
|
@@ -237,7 +249,7 @@ class SDM(BaseMatchModel):
|
|
|
237
249
|
dense_features.append(val)
|
|
238
250
|
if dense_features:
|
|
239
251
|
features_list.append(torch.cat(dense_features, dim=1))
|
|
240
|
-
|
|
252
|
+
|
|
241
253
|
if self.user_sparse_features:
|
|
242
254
|
sparse_features = []
|
|
243
255
|
for feat in self.user_sparse_features:
|
|
@@ -247,22 +259,26 @@ class SDM(BaseMatchModel):
|
|
|
247
259
|
sparse_features.append(sparse_emb)
|
|
248
260
|
if sparse_features:
|
|
249
261
|
features_list.append(torch.cat(sparse_features, dim=1))
|
|
250
|
-
|
|
262
|
+
|
|
251
263
|
user_features = torch.cat(features_list, dim=1)
|
|
252
264
|
user_emb = self.user_dnn(user_features)
|
|
253
265
|
user_emb = F.normalize(user_emb, p=2, dim=1)
|
|
254
|
-
|
|
266
|
+
|
|
255
267
|
return user_emb
|
|
256
|
-
|
|
268
|
+
|
|
257
269
|
def item_tower(self, item_input: dict) -> torch.Tensor:
|
|
258
270
|
"""Item tower"""
|
|
259
|
-
all_item_features =
|
|
271
|
+
all_item_features = (
|
|
272
|
+
self.item_dense_features
|
|
273
|
+
+ self.item_sparse_features
|
|
274
|
+
+ self.item_sequence_features
|
|
275
|
+
)
|
|
260
276
|
item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
|
|
261
|
-
|
|
277
|
+
|
|
262
278
|
if self.item_dnn is not None:
|
|
263
279
|
item_emb = self.item_dnn(item_emb)
|
|
264
|
-
|
|
280
|
+
|
|
265
281
|
# L2 normalization
|
|
266
282
|
item_emb = F.normalize(item_emb, p=2, dim=1)
|
|
267
|
-
|
|
283
|
+
|
|
268
284
|
return item_emb
|
|
@@ -6,13 +6,14 @@ Reference:
|
|
|
6
6
|
[1] Covington P, Adams J, Sargin E. Deep neural networks for youtube recommendations[C]
|
|
7
7
|
//Proceedings of the 10th ACM conference on recommender systems. 2016: 191-198.
|
|
8
8
|
"""
|
|
9
|
+
|
|
9
10
|
import torch
|
|
10
11
|
import torch.nn as nn
|
|
11
12
|
from typing import Literal
|
|
12
13
|
|
|
13
14
|
from nextrec.basic.model import BaseMatchModel
|
|
14
15
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
15
|
-
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
16
|
+
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class YoutubeDNN(BaseMatchModel):
|
|
@@ -22,41 +23,48 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
22
23
|
Item tower: item features -> item embedding.
|
|
23
24
|
Training usually uses listwise / sampled softmax style objectives.
|
|
24
25
|
"""
|
|
25
|
-
|
|
26
|
+
|
|
26
27
|
@property
|
|
27
28
|
def model_name(self) -> str:
|
|
28
29
|
return "YouTubeDNN"
|
|
29
|
-
|
|
30
|
-
def __init__(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
user_dense_features: list[DenseFeature] | None = None,
|
|
34
|
+
user_sparse_features: list[SparseFeature] | None = None,
|
|
35
|
+
user_sequence_features: list[SequenceFeature] | None = None,
|
|
36
|
+
item_dense_features: list[DenseFeature] | None = None,
|
|
37
|
+
item_sparse_features: list[SparseFeature] | None = None,
|
|
38
|
+
item_sequence_features: list[SequenceFeature] | None = None,
|
|
39
|
+
user_dnn_hidden_units: list[int] = [256, 128, 64],
|
|
40
|
+
item_dnn_hidden_units: list[int] = [256, 128, 64],
|
|
41
|
+
embedding_dim: int = 64,
|
|
42
|
+
dnn_activation: str = "relu",
|
|
43
|
+
dnn_dropout: float = 0.0,
|
|
44
|
+
training_mode: Literal["pointwise", "pairwise", "listwise"] = "listwise",
|
|
45
|
+
num_negative_samples: int = 100,
|
|
46
|
+
temperature: float = 1.0,
|
|
47
|
+
similarity_metric: Literal["dot", "cosine", "euclidean"] = "dot",
|
|
48
|
+
device: str = "cpu",
|
|
49
|
+
embedding_l1_reg: float = 0.0,
|
|
50
|
+
dense_l1_reg: float = 0.0,
|
|
51
|
+
embedding_l2_reg: float = 0.0,
|
|
52
|
+
dense_l2_reg: float = 0.0,
|
|
53
|
+
early_stop_patience: int = 20,
|
|
54
|
+
optimizer: str | torch.optim.Optimizer = "adam",
|
|
55
|
+
optimizer_params: dict | None = None,
|
|
56
|
+
scheduler: (
|
|
57
|
+
str
|
|
58
|
+
| torch.optim.lr_scheduler._LRScheduler
|
|
59
|
+
| type[torch.optim.lr_scheduler._LRScheduler]
|
|
60
|
+
| None
|
|
61
|
+
) = None,
|
|
62
|
+
scheduler_params: dict | None = None,
|
|
63
|
+
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
64
|
+
loss_params: dict | list[dict] | None = None,
|
|
65
|
+
**kwargs,
|
|
66
|
+
):
|
|
67
|
+
|
|
60
68
|
super(YoutubeDNN, self).__init__(
|
|
61
69
|
user_dense_features=user_dense_features,
|
|
62
70
|
user_sparse_features=user_sparse_features,
|
|
@@ -73,14 +81,13 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
73
81
|
dense_l1_reg=dense_l1_reg,
|
|
74
82
|
embedding_l2_reg=embedding_l2_reg,
|
|
75
83
|
dense_l2_reg=dense_l2_reg,
|
|
76
|
-
|
|
77
|
-
**kwargs
|
|
84
|
+
**kwargs,
|
|
78
85
|
)
|
|
79
|
-
|
|
86
|
+
|
|
80
87
|
self.embedding_dim = embedding_dim
|
|
81
88
|
self.user_dnn_hidden_units = user_dnn_hidden_units
|
|
82
89
|
self.item_dnn_hidden_units = item_dnn_hidden_units
|
|
83
|
-
|
|
90
|
+
|
|
84
91
|
# User tower
|
|
85
92
|
user_features = []
|
|
86
93
|
if user_dense_features:
|
|
@@ -89,10 +96,10 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
89
96
|
user_features.extend(user_sparse_features)
|
|
90
97
|
if user_sequence_features:
|
|
91
98
|
user_features.extend(user_sequence_features)
|
|
92
|
-
|
|
99
|
+
|
|
93
100
|
if len(user_features) > 0:
|
|
94
101
|
self.user_embedding = EmbeddingLayer(user_features)
|
|
95
|
-
|
|
102
|
+
|
|
96
103
|
user_input_dim = 0
|
|
97
104
|
for feat in user_dense_features or []:
|
|
98
105
|
user_input_dim += 1
|
|
@@ -101,16 +108,16 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
101
108
|
for feat in user_sequence_features or []:
|
|
102
109
|
# Sequence features are pooled before entering the DNN
|
|
103
110
|
user_input_dim += feat.embedding_dim
|
|
104
|
-
|
|
111
|
+
|
|
105
112
|
user_dnn_units = user_dnn_hidden_units + [embedding_dim]
|
|
106
113
|
self.user_dnn = MLP(
|
|
107
114
|
input_dim=user_input_dim,
|
|
108
115
|
dims=user_dnn_units,
|
|
109
116
|
output_layer=False,
|
|
110
117
|
dropout=dnn_dropout,
|
|
111
|
-
activation=dnn_activation
|
|
118
|
+
activation=dnn_activation,
|
|
112
119
|
)
|
|
113
|
-
|
|
120
|
+
|
|
114
121
|
# Item tower
|
|
115
122
|
item_features = []
|
|
116
123
|
if item_dense_features:
|
|
@@ -119,10 +126,10 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
119
126
|
item_features.extend(item_sparse_features)
|
|
120
127
|
if item_sequence_features:
|
|
121
128
|
item_features.extend(item_sequence_features)
|
|
122
|
-
|
|
129
|
+
|
|
123
130
|
if len(item_features) > 0:
|
|
124
131
|
self.item_embedding = EmbeddingLayer(item_features)
|
|
125
|
-
|
|
132
|
+
|
|
126
133
|
item_input_dim = 0
|
|
127
134
|
for feat in item_dense_features or []:
|
|
128
135
|
item_input_dim += 1
|
|
@@ -130,25 +137,23 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
130
137
|
item_input_dim += feat.embedding_dim
|
|
131
138
|
for feat in item_sequence_features or []:
|
|
132
139
|
item_input_dim += feat.embedding_dim
|
|
133
|
-
|
|
140
|
+
|
|
134
141
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
135
142
|
self.item_dnn = MLP(
|
|
136
143
|
input_dim=item_input_dim,
|
|
137
144
|
dims=item_dnn_units,
|
|
138
145
|
output_layer=False,
|
|
139
146
|
dropout=dnn_dropout,
|
|
140
|
-
activation=dnn_activation
|
|
147
|
+
activation=dnn_activation,
|
|
141
148
|
)
|
|
142
|
-
|
|
149
|
+
|
|
143
150
|
self.register_regularization_weights(
|
|
144
|
-
embedding_attr=
|
|
145
|
-
include_modules=['user_dnn']
|
|
151
|
+
embedding_attr="user_embedding", include_modules=["user_dnn"]
|
|
146
152
|
)
|
|
147
153
|
self.register_regularization_weights(
|
|
148
|
-
embedding_attr=
|
|
149
|
-
include_modules=['item_dnn']
|
|
154
|
+
embedding_attr="item_embedding", include_modules=["item_dnn"]
|
|
150
155
|
)
|
|
151
|
-
|
|
156
|
+
|
|
152
157
|
self.compile(
|
|
153
158
|
optimizer=optimizer,
|
|
154
159
|
optimizer_params=optimizer_params,
|
|
@@ -159,27 +164,35 @@ class YoutubeDNN(BaseMatchModel):
|
|
|
159
164
|
)
|
|
160
165
|
|
|
161
166
|
self.to(device)
|
|
162
|
-
|
|
167
|
+
|
|
163
168
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
164
169
|
"""
|
|
165
170
|
User tower to encode historical behavior sequences and user features.
|
|
166
171
|
"""
|
|
167
|
-
all_user_features =
|
|
172
|
+
all_user_features = (
|
|
173
|
+
self.user_dense_features
|
|
174
|
+
+ self.user_sparse_features
|
|
175
|
+
+ self.user_sequence_features
|
|
176
|
+
)
|
|
168
177
|
user_emb = self.user_embedding(user_input, all_user_features, squeeze_dim=True)
|
|
169
178
|
user_emb = self.user_dnn(user_emb)
|
|
170
|
-
|
|
179
|
+
|
|
171
180
|
# L2 normalization
|
|
172
181
|
user_emb = torch.nn.functional.normalize(user_emb, p=2, dim=1)
|
|
173
|
-
|
|
182
|
+
|
|
174
183
|
return user_emb
|
|
175
|
-
|
|
184
|
+
|
|
176
185
|
def item_tower(self, item_input: dict) -> torch.Tensor:
|
|
177
186
|
"""Item tower"""
|
|
178
|
-
all_item_features =
|
|
187
|
+
all_item_features = (
|
|
188
|
+
self.item_dense_features
|
|
189
|
+
+ self.item_sparse_features
|
|
190
|
+
+ self.item_sequence_features
|
|
191
|
+
)
|
|
179
192
|
item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
|
|
180
193
|
item_emb = self.item_dnn(item_emb)
|
|
181
|
-
|
|
194
|
+
|
|
182
195
|
# L2 normalization
|
|
183
196
|
item_emb = torch.nn.functional.normalize(item_emb, p=2, dim=1)
|
|
184
|
-
|
|
197
|
+
|
|
185
198
|
return item_emb
|