nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +1 -1
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -5
- nextrec/basic/callback.py +1 -0
- nextrec/basic/features.py +30 -22
- nextrec/basic/layers.py +244 -113
- nextrec/basic/loggers.py +62 -43
- nextrec/basic/metrics.py +268 -119
- nextrec/basic/model.py +1373 -443
- nextrec/basic/session.py +10 -3
- nextrec/cli.py +498 -0
- nextrec/data/__init__.py +19 -25
- nextrec/data/batch_utils.py +11 -3
- nextrec/data/data_processing.py +42 -24
- nextrec/data/data_utils.py +26 -15
- nextrec/data/dataloader.py +303 -96
- nextrec/data/preprocessor.py +320 -199
- nextrec/loss/listwise.py +17 -9
- nextrec/loss/loss_utils.py +7 -8
- nextrec/loss/pairwise.py +2 -0
- nextrec/loss/pointwise.py +30 -12
- nextrec/models/generative/hstu.py +106 -40
- nextrec/models/match/dssm.py +82 -69
- nextrec/models/match/dssm_v2.py +72 -58
- nextrec/models/match/mind.py +175 -108
- nextrec/models/match/sdm.py +104 -88
- nextrec/models/match/youtube_dnn.py +73 -60
- nextrec/models/multi_task/esmm.py +53 -39
- nextrec/models/multi_task/mmoe.py +70 -47
- nextrec/models/multi_task/ple.py +107 -50
- nextrec/models/multi_task/poso.py +121 -41
- nextrec/models/multi_task/share_bottom.py +54 -38
- nextrec/models/ranking/afm.py +172 -45
- nextrec/models/ranking/autoint.py +84 -61
- nextrec/models/ranking/dcn.py +59 -42
- nextrec/models/ranking/dcn_v2.py +64 -23
- nextrec/models/ranking/deepfm.py +36 -26
- nextrec/models/ranking/dien.py +158 -102
- nextrec/models/ranking/din.py +88 -60
- nextrec/models/ranking/fibinet.py +55 -35
- nextrec/models/ranking/fm.py +32 -26
- nextrec/models/ranking/masknet.py +95 -34
- nextrec/models/ranking/pnn.py +34 -31
- nextrec/models/ranking/widedeep.py +37 -29
- nextrec/models/ranking/xdeepfm.py +63 -41
- nextrec/utils/__init__.py +61 -32
- nextrec/utils/config.py +490 -0
- nextrec/utils/device.py +52 -12
- nextrec/utils/distributed.py +141 -0
- nextrec/utils/embedding.py +1 -0
- nextrec/utils/feature.py +1 -0
- nextrec/utils/file.py +32 -11
- nextrec/utils/initializer.py +61 -16
- nextrec/utils/optimizer.py +25 -9
- nextrec/utils/synthetic_data.py +531 -0
- nextrec/utils/tensor.py +24 -13
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
- nextrec-0.4.2.dist-info/RECORD +69 -0
- nextrec-0.4.2.dist-info/entry_points.txt +2 -0
- nextrec-0.3.6.dist-info/RECORD +0 -64
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
nextrec/models/ranking/dien.py
CHANGED
|
@@ -13,37 +13,44 @@ import torch.nn as nn
|
|
|
13
13
|
import torch.nn.functional as F
|
|
14
14
|
|
|
15
15
|
from nextrec.basic.model import BaseModel
|
|
16
|
-
from nextrec.basic.layers import
|
|
16
|
+
from nextrec.basic.layers import (
|
|
17
|
+
EmbeddingLayer,
|
|
18
|
+
MLP,
|
|
19
|
+
AttentionPoolingLayer,
|
|
20
|
+
PredictionLayer,
|
|
21
|
+
)
|
|
17
22
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
18
23
|
|
|
24
|
+
|
|
19
25
|
class AUGRU(nn.Module):
|
|
20
26
|
"""Attention-aware GRU update gate used in DIEN (Zhou et al., 2019)."""
|
|
27
|
+
|
|
21
28
|
"""
|
|
22
29
|
Attention-based GRU for DIEN
|
|
23
30
|
Uses attention scores to weight the update of hidden states
|
|
24
31
|
"""
|
|
25
|
-
|
|
32
|
+
|
|
26
33
|
def __init__(self, input_size, hidden_size, bias=True):
|
|
27
34
|
super().__init__()
|
|
28
35
|
self.input_size = input_size
|
|
29
36
|
self.hidden_size = hidden_size
|
|
30
|
-
|
|
37
|
+
|
|
31
38
|
self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
|
|
32
39
|
self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
|
|
33
40
|
if bias:
|
|
34
41
|
self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
|
|
35
42
|
self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
|
|
36
43
|
else:
|
|
37
|
-
self.register_parameter(
|
|
38
|
-
self.register_parameter(
|
|
39
|
-
|
|
44
|
+
self.register_parameter("bias_ih", None)
|
|
45
|
+
self.register_parameter("bias_hh", None)
|
|
46
|
+
|
|
40
47
|
self.reset_parameters()
|
|
41
|
-
|
|
48
|
+
|
|
42
49
|
def reset_parameters(self):
|
|
43
50
|
std = 1.0 / (self.hidden_size) ** 0.5
|
|
44
51
|
for weight in self.parameters():
|
|
45
52
|
weight.data.uniform_(-std, std)
|
|
46
|
-
|
|
53
|
+
|
|
47
54
|
def forward(self, x, att_scores):
|
|
48
55
|
"""
|
|
49
56
|
Args:
|
|
@@ -59,12 +66,12 @@ class AUGRU(nn.Module):
|
|
|
59
66
|
for t in range(seq_len):
|
|
60
67
|
x_t = x[:, t, :] # [batch_size, input_size]
|
|
61
68
|
att_t = att_scores[:, t, :] # [batch_size, 1]
|
|
62
|
-
|
|
69
|
+
|
|
63
70
|
gi = F.linear(x_t, self.weight_ih, self.bias_ih)
|
|
64
71
|
gh = F.linear(h, self.weight_hh, self.bias_hh)
|
|
65
72
|
i_r, i_i, i_n = gi.chunk(3, 1)
|
|
66
73
|
h_r, h_i, h_n = gh.chunk(3, 1)
|
|
67
|
-
|
|
74
|
+
|
|
68
75
|
resetgate = torch.sigmoid(i_r + h_r)
|
|
69
76
|
inputgate = torch.sigmoid(i_i + h_i)
|
|
70
77
|
newgate = torch.tanh(i_n + resetgate * h_n)
|
|
@@ -72,21 +79,22 @@ class AUGRU(nn.Module):
|
|
|
72
79
|
h = (1 - att_t) * h + att_t * newgate
|
|
73
80
|
outputs.append(h.unsqueeze(1))
|
|
74
81
|
output = torch.cat(outputs, dim=1)
|
|
75
|
-
|
|
76
|
-
return output, h
|
|
82
|
+
|
|
83
|
+
return output, h
|
|
77
84
|
|
|
78
85
|
|
|
79
86
|
class DynamicGRU(nn.Module):
|
|
80
87
|
"""Dynamic GRU unit with auxiliary loss path from DIEN (Zhou et al., 2019)."""
|
|
88
|
+
|
|
81
89
|
"""
|
|
82
90
|
GRU with dynamic routing for DIEN
|
|
83
91
|
"""
|
|
84
|
-
|
|
92
|
+
|
|
85
93
|
def __init__(self, input_size, hidden_size, bias=True):
|
|
86
94
|
super().__init__()
|
|
87
95
|
self.input_size = input_size
|
|
88
96
|
self.hidden_size = hidden_size
|
|
89
|
-
|
|
97
|
+
|
|
90
98
|
# GRU parameters
|
|
91
99
|
self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
|
|
92
100
|
self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
|
|
@@ -94,16 +102,16 @@ class DynamicGRU(nn.Module):
|
|
|
94
102
|
self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
|
|
95
103
|
self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
|
|
96
104
|
else:
|
|
97
|
-
self.register_parameter(
|
|
98
|
-
self.register_parameter(
|
|
99
|
-
|
|
105
|
+
self.register_parameter("bias_ih", None)
|
|
106
|
+
self.register_parameter("bias_hh", None)
|
|
107
|
+
|
|
100
108
|
self.reset_parameters()
|
|
101
|
-
|
|
109
|
+
|
|
102
110
|
def reset_parameters(self):
|
|
103
111
|
std = 1.0 / (self.hidden_size) ** 0.5
|
|
104
112
|
for weight in self.parameters():
|
|
105
113
|
weight.data.uniform_(-std, std)
|
|
106
|
-
|
|
114
|
+
|
|
107
115
|
def forward(self, x, att_scores=None):
|
|
108
116
|
"""
|
|
109
117
|
Args:
|
|
@@ -114,29 +122,29 @@ class DynamicGRU(nn.Module):
|
|
|
114
122
|
hidden: [batch_size, hidden_size] - final hidden state
|
|
115
123
|
"""
|
|
116
124
|
batch_size, seq_len, _ = x.shape
|
|
117
|
-
|
|
125
|
+
|
|
118
126
|
# Initialize hidden state
|
|
119
127
|
h = torch.zeros(batch_size, self.hidden_size, device=x.device)
|
|
120
|
-
|
|
128
|
+
|
|
121
129
|
outputs = []
|
|
122
130
|
for t in range(seq_len):
|
|
123
131
|
x_t = x[:, t, :] # [batch_size, input_size]
|
|
124
|
-
|
|
132
|
+
|
|
125
133
|
# GRU computation
|
|
126
134
|
gi = F.linear(x_t, self.weight_ih, self.bias_ih)
|
|
127
135
|
gh = F.linear(h, self.weight_hh, self.bias_hh)
|
|
128
136
|
i_r, i_i, i_n = gi.chunk(3, 1)
|
|
129
137
|
h_r, h_i, h_n = gh.chunk(3, 1)
|
|
130
|
-
|
|
138
|
+
|
|
131
139
|
resetgate = torch.sigmoid(i_r + h_r)
|
|
132
140
|
inputgate = torch.sigmoid(i_i + h_i)
|
|
133
141
|
newgate = torch.tanh(i_n + resetgate * h_n)
|
|
134
142
|
h = newgate + inputgate * (h - newgate)
|
|
135
|
-
|
|
143
|
+
|
|
136
144
|
outputs.append(h.unsqueeze(1))
|
|
137
|
-
|
|
145
|
+
|
|
138
146
|
output = torch.cat(outputs, dim=1) # [batch_size, seq_len, hidden_size]
|
|
139
|
-
|
|
147
|
+
|
|
140
148
|
return output, h
|
|
141
149
|
|
|
142
150
|
|
|
@@ -146,174 +154,222 @@ class DIEN(BaseModel):
|
|
|
146
154
|
return "DIEN"
|
|
147
155
|
|
|
148
156
|
@property
|
|
149
|
-
def
|
|
157
|
+
def default_task(self):
|
|
150
158
|
return "binary"
|
|
151
|
-
|
|
152
|
-
def __init__(
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
159
|
+
|
|
160
|
+
def __init__(
|
|
161
|
+
self,
|
|
162
|
+
dense_features: list[DenseFeature],
|
|
163
|
+
sparse_features: list[SparseFeature],
|
|
164
|
+
sequence_features: list[SequenceFeature],
|
|
165
|
+
mlp_params: dict,
|
|
166
|
+
gru_hidden_size: int = 64,
|
|
167
|
+
attention_hidden_units: list[int] = [80, 40],
|
|
168
|
+
attention_activation: str = "sigmoid",
|
|
169
|
+
use_negsampling: bool = False,
|
|
170
|
+
target: list[str] = [],
|
|
171
|
+
task: str | list[str] | None = None,
|
|
172
|
+
optimizer: str = "adam",
|
|
173
|
+
optimizer_params: dict = {},
|
|
174
|
+
loss: str | nn.Module | None = "bce",
|
|
175
|
+
loss_params: dict | list[dict] | None = None,
|
|
176
|
+
device: str = "cpu",
|
|
177
|
+
embedding_l1_reg=1e-6,
|
|
178
|
+
dense_l1_reg=1e-5,
|
|
179
|
+
embedding_l2_reg=1e-5,
|
|
180
|
+
dense_l2_reg=1e-4,
|
|
181
|
+
**kwargs,
|
|
182
|
+
):
|
|
183
|
+
|
|
173
184
|
super(DIEN, self).__init__(
|
|
174
185
|
dense_features=dense_features,
|
|
175
186
|
sparse_features=sparse_features,
|
|
176
187
|
sequence_features=sequence_features,
|
|
177
188
|
target=target,
|
|
178
|
-
task=self.
|
|
189
|
+
task=task or self.default_task,
|
|
179
190
|
device=device,
|
|
180
191
|
embedding_l1_reg=embedding_l1_reg,
|
|
181
192
|
dense_l1_reg=dense_l1_reg,
|
|
182
193
|
embedding_l2_reg=embedding_l2_reg,
|
|
183
194
|
dense_l2_reg=dense_l2_reg,
|
|
184
|
-
|
|
185
|
-
**kwargs
|
|
195
|
+
**kwargs,
|
|
186
196
|
)
|
|
187
197
|
|
|
188
198
|
self.loss = loss
|
|
189
199
|
if self.loss is None:
|
|
190
200
|
self.loss = "bce"
|
|
191
|
-
|
|
201
|
+
|
|
192
202
|
self.use_negsampling = use_negsampling
|
|
193
|
-
|
|
203
|
+
|
|
194
204
|
# Features classification
|
|
195
205
|
if len(sequence_features) == 0:
|
|
196
|
-
raise ValueError(
|
|
197
|
-
|
|
206
|
+
raise ValueError(
|
|
207
|
+
"DIEN requires at least one sequence feature for user behavior history"
|
|
208
|
+
)
|
|
209
|
+
|
|
198
210
|
self.behavior_feature = sequence_features[0] # User behavior sequence
|
|
199
|
-
self.candidate_feature =
|
|
200
|
-
|
|
201
|
-
|
|
211
|
+
self.candidate_feature = (
|
|
212
|
+
sparse_features[-1] if sparse_features else None
|
|
213
|
+
) # Candidate item
|
|
214
|
+
|
|
215
|
+
self.other_sparse_features = (
|
|
216
|
+
sparse_features[:-1] if self.candidate_feature else sparse_features
|
|
217
|
+
)
|
|
202
218
|
self.dense_features_list = dense_features
|
|
203
219
|
|
|
204
220
|
# Embedding layer
|
|
205
221
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
206
|
-
|
|
222
|
+
|
|
207
223
|
behavior_emb_dim = self.behavior_feature.embedding_dim
|
|
208
224
|
self.candidate_proj = None
|
|
209
|
-
if
|
|
210
|
-
self.
|
|
211
|
-
|
|
225
|
+
if (
|
|
226
|
+
self.candidate_feature is not None
|
|
227
|
+
and self.candidate_feature.embedding_dim != gru_hidden_size
|
|
228
|
+
):
|
|
229
|
+
self.candidate_proj = nn.Linear(
|
|
230
|
+
self.candidate_feature.embedding_dim, gru_hidden_size
|
|
231
|
+
)
|
|
232
|
+
|
|
212
233
|
# Interest Extractor Layer (GRU)
|
|
213
234
|
self.interest_extractor = DynamicGRU(
|
|
214
|
-
input_size=behavior_emb_dim,
|
|
215
|
-
hidden_size=gru_hidden_size
|
|
235
|
+
input_size=behavior_emb_dim, hidden_size=gru_hidden_size
|
|
216
236
|
)
|
|
217
|
-
|
|
237
|
+
|
|
218
238
|
# Attention layer for computing attention scores
|
|
219
239
|
self.attention_layer = AttentionPoolingLayer(
|
|
220
240
|
embedding_dim=gru_hidden_size,
|
|
221
241
|
hidden_units=attention_hidden_units,
|
|
222
242
|
activation=attention_activation,
|
|
223
|
-
use_softmax=False # We'll use scores directly for AUGRU
|
|
243
|
+
use_softmax=False, # We'll use scores directly for AUGRU
|
|
224
244
|
)
|
|
225
|
-
|
|
245
|
+
|
|
226
246
|
# Interest Evolution Layer (AUGRU)
|
|
227
|
-
self.interest_evolution = AUGRU(
|
|
228
|
-
|
|
247
|
+
self.interest_evolution = AUGRU(
|
|
248
|
+
input_size=gru_hidden_size, hidden_size=gru_hidden_size
|
|
249
|
+
)
|
|
250
|
+
|
|
229
251
|
# Calculate MLP input dimension
|
|
230
252
|
mlp_input_dim = 0
|
|
231
253
|
if self.candidate_feature:
|
|
232
254
|
mlp_input_dim += self.candidate_feature.embedding_dim
|
|
233
255
|
mlp_input_dim += gru_hidden_size # final interest state
|
|
234
256
|
mlp_input_dim += sum([f.embedding_dim for f in self.other_sparse_features])
|
|
235
|
-
mlp_input_dim += sum(
|
|
257
|
+
mlp_input_dim += sum(
|
|
258
|
+
[getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
|
|
259
|
+
)
|
|
236
260
|
# MLP for final prediction
|
|
237
261
|
self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
|
|
238
|
-
self.prediction_layer = PredictionLayer(task_type=self.
|
|
262
|
+
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
239
263
|
# Register regularization weights
|
|
240
|
-
self.register_regularization_weights(
|
|
241
|
-
|
|
264
|
+
self.register_regularization_weights(
|
|
265
|
+
embedding_attr="embedding",
|
|
266
|
+
include_modules=[
|
|
267
|
+
"interest_extractor",
|
|
268
|
+
"interest_evolution",
|
|
269
|
+
"attention_layer",
|
|
270
|
+
"mlp",
|
|
271
|
+
"candidate_proj",
|
|
272
|
+
],
|
|
273
|
+
)
|
|
274
|
+
self.compile(
|
|
275
|
+
optimizer=optimizer,
|
|
276
|
+
optimizer_params=optimizer_params,
|
|
277
|
+
loss=loss,
|
|
278
|
+
loss_params=loss_params,
|
|
279
|
+
)
|
|
242
280
|
|
|
243
281
|
def forward(self, x):
|
|
244
282
|
# Get candidate item embedding
|
|
245
283
|
if self.candidate_feature:
|
|
246
|
-
candidate_emb = self.embedding.embed_dict[
|
|
284
|
+
candidate_emb = self.embedding.embed_dict[
|
|
285
|
+
self.candidate_feature.embedding_name
|
|
286
|
+
](
|
|
287
|
+
x[self.candidate_feature.name].long()
|
|
288
|
+
) # [B, emb_dim]
|
|
247
289
|
else:
|
|
248
290
|
raise ValueError("DIEN requires a candidate item feature")
|
|
249
|
-
|
|
291
|
+
|
|
250
292
|
# Get behavior sequence embedding
|
|
251
293
|
behavior_seq = x[self.behavior_feature.name].long() # [B, seq_len]
|
|
252
|
-
behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](
|
|
253
|
-
|
|
294
|
+
behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](
|
|
295
|
+
behavior_seq
|
|
296
|
+
) # [B, seq_len, emb_dim]
|
|
297
|
+
|
|
254
298
|
# Create mask for padding
|
|
255
299
|
if self.behavior_feature.padding_idx is not None:
|
|
256
|
-
mask = (
|
|
300
|
+
mask = (
|
|
301
|
+
(behavior_seq != self.behavior_feature.padding_idx)
|
|
302
|
+
.unsqueeze(-1)
|
|
303
|
+
.float()
|
|
304
|
+
)
|
|
257
305
|
else:
|
|
258
306
|
mask = (behavior_seq != 0).unsqueeze(-1).float()
|
|
259
|
-
|
|
307
|
+
|
|
260
308
|
# Step 1: Interest Extractor (GRU)
|
|
261
|
-
interest_states, _ = self.interest_extractor(
|
|
262
|
-
|
|
309
|
+
interest_states, _ = self.interest_extractor(
|
|
310
|
+
behavior_emb
|
|
311
|
+
) # [B, seq_len, hidden_size]
|
|
312
|
+
|
|
263
313
|
# Step 2: Compute attention scores for each time step
|
|
264
314
|
batch_size, seq_len, hidden_size = interest_states.shape
|
|
265
|
-
|
|
315
|
+
|
|
266
316
|
# Project candidate to hidden_size if necessary (defined in __init__)
|
|
267
317
|
if self.candidate_proj is not None:
|
|
268
318
|
candidate_for_attention = self.candidate_proj(candidate_emb)
|
|
269
319
|
else:
|
|
270
320
|
candidate_for_attention = candidate_emb
|
|
271
|
-
|
|
321
|
+
|
|
272
322
|
# Compute attention scores for AUGRU
|
|
273
323
|
attention_scores = []
|
|
274
324
|
for t in range(seq_len):
|
|
275
325
|
score = self.attention_layer.attention_net(
|
|
276
|
-
torch.cat(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
326
|
+
torch.cat(
|
|
327
|
+
[
|
|
328
|
+
candidate_for_attention,
|
|
329
|
+
interest_states[:, t, :],
|
|
330
|
+
candidate_for_attention - interest_states[:, t, :],
|
|
331
|
+
candidate_for_attention * interest_states[:, t, :],
|
|
332
|
+
],
|
|
333
|
+
dim=-1,
|
|
334
|
+
)
|
|
282
335
|
) # [B, 1]
|
|
283
336
|
attention_scores.append(score)
|
|
284
|
-
|
|
285
|
-
attention_scores = torch.cat(attention_scores, dim=1).unsqueeze(
|
|
337
|
+
|
|
338
|
+
attention_scores = torch.cat(attention_scores, dim=1).unsqueeze(
|
|
339
|
+
-1
|
|
340
|
+
) # [B, seq_len, 1]
|
|
286
341
|
attention_scores = torch.sigmoid(attention_scores) # Normalize to [0, 1]
|
|
287
|
-
|
|
342
|
+
|
|
288
343
|
# Apply mask to attention scores
|
|
289
344
|
attention_scores = attention_scores * mask
|
|
290
|
-
|
|
345
|
+
|
|
291
346
|
# Step 3: Interest Evolution (AUGRU)
|
|
292
347
|
final_states, final_interest = self.interest_evolution(
|
|
293
|
-
interest_states,
|
|
294
|
-
attention_scores
|
|
348
|
+
interest_states, attention_scores
|
|
295
349
|
) # final_interest: [B, hidden_size]
|
|
296
|
-
|
|
350
|
+
|
|
297
351
|
# Get other features
|
|
298
352
|
other_embeddings = []
|
|
299
353
|
other_embeddings.append(candidate_emb)
|
|
300
354
|
other_embeddings.append(final_interest)
|
|
301
|
-
|
|
355
|
+
|
|
302
356
|
# Other sparse features
|
|
303
357
|
for feat in self.other_sparse_features:
|
|
304
|
-
feat_emb = self.embedding.embed_dict[feat.embedding_name](
|
|
358
|
+
feat_emb = self.embedding.embed_dict[feat.embedding_name](
|
|
359
|
+
x[feat.name].long()
|
|
360
|
+
)
|
|
305
361
|
other_embeddings.append(feat_emb)
|
|
306
|
-
|
|
362
|
+
|
|
307
363
|
# Dense features
|
|
308
364
|
for feat in self.dense_features_list:
|
|
309
365
|
val = x[feat.name].float()
|
|
310
366
|
if val.dim() == 1:
|
|
311
367
|
val = val.unsqueeze(1)
|
|
312
368
|
other_embeddings.append(val)
|
|
313
|
-
|
|
369
|
+
|
|
314
370
|
# Concatenate all features
|
|
315
371
|
concat_input = torch.cat(other_embeddings, dim=-1) # [B, total_dim]
|
|
316
|
-
|
|
372
|
+
|
|
317
373
|
# MLP prediction
|
|
318
374
|
y = self.mlp(concat_input) # [B, 1]
|
|
319
375
|
return self.prediction_layer(y)
|