nextrec 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +4 -4
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +9 -10
- nextrec/basic/callback.py +0 -1
- nextrec/basic/dataloader.py +127 -168
- nextrec/basic/features.py +27 -24
- nextrec/basic/layers.py +159 -328
- nextrec/basic/loggers.py +37 -50
- nextrec/basic/metrics.py +147 -255
- nextrec/basic/model.py +462 -817
- nextrec/data/__init__.py +5 -5
- nextrec/data/data_utils.py +12 -16
- nextrec/data/preprocessor.py +252 -276
- nextrec/loss/__init__.py +12 -12
- nextrec/loss/loss_utils.py +22 -30
- nextrec/loss/match_losses.py +83 -116
- nextrec/models/match/__init__.py +5 -5
- nextrec/models/match/dssm.py +61 -70
- nextrec/models/match/dssm_v2.py +51 -61
- nextrec/models/match/mind.py +71 -89
- nextrec/models/match/sdm.py +81 -93
- nextrec/models/match/youtube_dnn.py +53 -62
- nextrec/models/multi_task/esmm.py +43 -49
- nextrec/models/multi_task/mmoe.py +56 -65
- nextrec/models/multi_task/ple.py +65 -92
- nextrec/models/multi_task/share_bottom.py +42 -48
- nextrec/models/ranking/__init__.py +7 -7
- nextrec/models/ranking/afm.py +30 -39
- nextrec/models/ranking/autoint.py +57 -70
- nextrec/models/ranking/dcn.py +35 -43
- nextrec/models/ranking/deepfm.py +28 -34
- nextrec/models/ranking/dien.py +79 -115
- nextrec/models/ranking/din.py +60 -84
- nextrec/models/ranking/fibinet.py +35 -51
- nextrec/models/ranking/fm.py +26 -28
- nextrec/models/ranking/masknet.py +31 -31
- nextrec/models/ranking/pnn.py +31 -30
- nextrec/models/ranking/widedeep.py +31 -36
- nextrec/models/ranking/xdeepfm.py +39 -46
- nextrec/utils/__init__.py +9 -9
- nextrec/utils/embedding.py +1 -1
- nextrec/utils/initializer.py +15 -23
- nextrec/utils/optimizer.py +10 -14
- {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/METADATA +16 -7
- nextrec-0.1.7.dist-info/RECORD +51 -0
- nextrec-0.1.4.dist-info/RECORD +0 -51
- {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/WHEEL +0 -0
- {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/licenses/LICENSE +0 -0
nextrec/models/match/mind.py
CHANGED
|
@@ -6,7 +6,6 @@ Reference:
|
|
|
6
6
|
[1] Li C, Liu Z, Wu M, et al. Multi-interest network with dynamic routing for recommendation at Tmall[C]
|
|
7
7
|
//Proceedings of the 28th ACM international conference on information and knowledge management. 2019: 2615-2623.
|
|
8
8
|
"""
|
|
9
|
-
|
|
10
9
|
import torch
|
|
11
10
|
import torch.nn as nn
|
|
12
11
|
import torch.nn.functional as F
|
|
@@ -21,41 +20,39 @@ class MIND(BaseMatchModel):
|
|
|
21
20
|
@property
|
|
22
21
|
def model_name(self) -> str:
|
|
23
22
|
return "MIND"
|
|
24
|
-
|
|
23
|
+
|
|
25
24
|
@property
|
|
26
25
|
def support_training_modes(self) -> list[str]:
|
|
27
26
|
"""MIND only supports pointwise training mode"""
|
|
28
|
-
return [
|
|
29
|
-
|
|
30
|
-
def __init__(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
):
|
|
58
|
-
|
|
27
|
+
return ['pointwise']
|
|
28
|
+
|
|
29
|
+
def __init__(self,
|
|
30
|
+
user_dense_features: list[DenseFeature] | None = None,
|
|
31
|
+
user_sparse_features: list[SparseFeature] | None = None,
|
|
32
|
+
user_sequence_features: list[SequenceFeature] | None = None,
|
|
33
|
+
item_dense_features: list[DenseFeature] | None = None,
|
|
34
|
+
item_sparse_features: list[SparseFeature] | None = None,
|
|
35
|
+
item_sequence_features: list[SequenceFeature] | None = None,
|
|
36
|
+
embedding_dim: int = 64,
|
|
37
|
+
num_interests: int = 4,
|
|
38
|
+
capsule_bilinear_type: int = 2,
|
|
39
|
+
routing_times: int = 3,
|
|
40
|
+
relu_layer: bool = False,
|
|
41
|
+
item_dnn_hidden_units: list[int] = [256, 128],
|
|
42
|
+
dnn_activation: str = 'relu',
|
|
43
|
+
dnn_dropout: float = 0.0,
|
|
44
|
+
training_mode: Literal['pointwise', 'pairwise', 'listwise'] = 'listwise',
|
|
45
|
+
num_negative_samples: int = 100,
|
|
46
|
+
temperature: float = 1.0,
|
|
47
|
+
similarity_metric: Literal['dot', 'cosine', 'euclidean'] = 'dot',
|
|
48
|
+
device: str = 'cpu',
|
|
49
|
+
embedding_l1_reg: float = 0.0,
|
|
50
|
+
dense_l1_reg: float = 0.0,
|
|
51
|
+
embedding_l2_reg: float = 0.0,
|
|
52
|
+
dense_l2_reg: float = 0.0,
|
|
53
|
+
early_stop_patience: int = 20,
|
|
54
|
+
model_id: str = 'mind'):
|
|
55
|
+
|
|
59
56
|
super(MIND, self).__init__(
|
|
60
57
|
user_dense_features=user_dense_features,
|
|
61
58
|
user_sparse_features=user_sparse_features,
|
|
@@ -73,9 +70,9 @@ class MIND(BaseMatchModel):
|
|
|
73
70
|
embedding_l2_reg=embedding_l2_reg,
|
|
74
71
|
dense_l2_reg=dense_l2_reg,
|
|
75
72
|
early_stop_patience=early_stop_patience,
|
|
76
|
-
model_id=model_id
|
|
73
|
+
model_id=model_id
|
|
77
74
|
)
|
|
78
|
-
|
|
75
|
+
|
|
79
76
|
self.embedding_dim = embedding_dim
|
|
80
77
|
self.num_interests = num_interests
|
|
81
78
|
self.item_dnn_hidden_units = item_dnn_hidden_units
|
|
@@ -87,20 +84,16 @@ class MIND(BaseMatchModel):
|
|
|
87
84
|
user_features.extend(user_sparse_features)
|
|
88
85
|
if user_sequence_features:
|
|
89
86
|
user_features.extend(user_sequence_features)
|
|
90
|
-
|
|
87
|
+
|
|
91
88
|
if len(user_features) > 0:
|
|
92
89
|
self.user_embedding = EmbeddingLayer(user_features)
|
|
93
|
-
|
|
90
|
+
|
|
94
91
|
if not user_sequence_features or len(user_sequence_features) == 0:
|
|
95
92
|
raise ValueError("MIND requires at least one user sequence feature")
|
|
96
|
-
|
|
97
|
-
seq_max_len =
|
|
98
|
-
user_sequence_features[0].max_len
|
|
99
|
-
if user_sequence_features[0].max_len
|
|
100
|
-
else 50
|
|
101
|
-
)
|
|
93
|
+
|
|
94
|
+
seq_max_len = user_sequence_features[0].max_len if user_sequence_features[0].max_len else 50
|
|
102
95
|
seq_embedding_dim = user_sequence_features[0].embedding_dim
|
|
103
|
-
|
|
96
|
+
|
|
104
97
|
# Capsule Network for multi-interest extraction
|
|
105
98
|
self.capsule_network = CapsuleNetwork(
|
|
106
99
|
embedding_dim=seq_embedding_dim,
|
|
@@ -108,17 +101,15 @@ class MIND(BaseMatchModel):
|
|
|
108
101
|
bilinear_type=capsule_bilinear_type,
|
|
109
102
|
interest_num=num_interests,
|
|
110
103
|
routing_times=routing_times,
|
|
111
|
-
relu_layer=relu_layer
|
|
104
|
+
relu_layer=relu_layer
|
|
112
105
|
)
|
|
113
|
-
|
|
106
|
+
|
|
114
107
|
if seq_embedding_dim != embedding_dim:
|
|
115
|
-
self.interest_projection = nn.Linear(
|
|
116
|
-
seq_embedding_dim, embedding_dim, bias=False
|
|
117
|
-
)
|
|
108
|
+
self.interest_projection = nn.Linear(seq_embedding_dim, embedding_dim, bias=False)
|
|
118
109
|
nn.init.xavier_uniform_(self.interest_projection.weight)
|
|
119
110
|
else:
|
|
120
111
|
self.interest_projection = None
|
|
121
|
-
|
|
112
|
+
|
|
122
113
|
# Item tower
|
|
123
114
|
item_features = []
|
|
124
115
|
if item_dense_features:
|
|
@@ -127,10 +118,10 @@ class MIND(BaseMatchModel):
|
|
|
127
118
|
item_features.extend(item_sparse_features)
|
|
128
119
|
if item_sequence_features:
|
|
129
120
|
item_features.extend(item_sequence_features)
|
|
130
|
-
|
|
121
|
+
|
|
131
122
|
if len(item_features) > 0:
|
|
132
123
|
self.item_embedding = EmbeddingLayer(item_features)
|
|
133
|
-
|
|
124
|
+
|
|
134
125
|
item_input_dim = 0
|
|
135
126
|
for feat in item_dense_features or []:
|
|
136
127
|
item_input_dim += 1
|
|
@@ -138,7 +129,7 @@ class MIND(BaseMatchModel):
|
|
|
138
129
|
item_input_dim += feat.embedding_dim
|
|
139
130
|
for feat in item_sequence_features or []:
|
|
140
131
|
item_input_dim += feat.embedding_dim
|
|
141
|
-
|
|
132
|
+
|
|
142
133
|
# Item DNN
|
|
143
134
|
if len(item_dnn_hidden_units) > 0:
|
|
144
135
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
@@ -147,25 +138,26 @@ class MIND(BaseMatchModel):
|
|
|
147
138
|
dims=item_dnn_units,
|
|
148
139
|
output_layer=False,
|
|
149
140
|
dropout=dnn_dropout,
|
|
150
|
-
activation=dnn_activation
|
|
141
|
+
activation=dnn_activation
|
|
151
142
|
)
|
|
152
143
|
else:
|
|
153
144
|
self.item_dnn = None
|
|
154
|
-
|
|
145
|
+
|
|
155
146
|
self._register_regularization_weights(
|
|
156
|
-
embedding_attr=
|
|
147
|
+
embedding_attr='user_embedding',
|
|
148
|
+
include_modules=['capsule_network']
|
|
157
149
|
)
|
|
158
150
|
self._register_regularization_weights(
|
|
159
|
-
embedding_attr=
|
|
160
|
-
include_modules=[
|
|
151
|
+
embedding_attr='item_embedding',
|
|
152
|
+
include_modules=['item_dnn'] if self.item_dnn else []
|
|
161
153
|
)
|
|
162
|
-
|
|
154
|
+
|
|
163
155
|
self.to(device)
|
|
164
|
-
|
|
156
|
+
|
|
165
157
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
166
158
|
"""
|
|
167
159
|
User tower with multi-interest extraction
|
|
168
|
-
|
|
160
|
+
|
|
169
161
|
Returns:
|
|
170
162
|
user_interests: [batch_size, num_interests, embedding_dim]
|
|
171
163
|
"""
|
|
@@ -176,53 +168,43 @@ class MIND(BaseMatchModel):
|
|
|
176
168
|
seq_emb = embed(seq_input.long()) # [batch_size, seq_len, embedding_dim]
|
|
177
169
|
|
|
178
170
|
mask = (seq_input != seq_feature.padding_idx).float() # [batch_size, seq_len]
|
|
179
|
-
|
|
180
|
-
multi_interests = self.capsule_network(
|
|
181
|
-
|
|
182
|
-
) # [batch_size, num_interests, seq_embedding_dim]
|
|
183
|
-
|
|
171
|
+
|
|
172
|
+
multi_interests = self.capsule_network(seq_emb, mask) # [batch_size, num_interests, seq_embedding_dim]
|
|
173
|
+
|
|
184
174
|
if self.interest_projection is not None:
|
|
185
|
-
multi_interests = self.interest_projection(
|
|
186
|
-
|
|
187
|
-
) # [batch_size, num_interests, embedding_dim]
|
|
188
|
-
|
|
175
|
+
multi_interests = self.interest_projection(multi_interests) # [batch_size, num_interests, embedding_dim]
|
|
176
|
+
|
|
189
177
|
# L2 normalization
|
|
190
178
|
multi_interests = F.normalize(multi_interests, p=2, dim=-1)
|
|
191
|
-
|
|
179
|
+
|
|
192
180
|
return multi_interests
|
|
193
|
-
|
|
181
|
+
|
|
194
182
|
def item_tower(self, item_input: dict) -> torch.Tensor:
|
|
195
183
|
"""Item tower"""
|
|
196
|
-
all_item_features =
|
|
197
|
-
self.item_dense_features
|
|
198
|
-
+ self.item_sparse_features
|
|
199
|
-
+ self.item_sequence_features
|
|
200
|
-
)
|
|
184
|
+
all_item_features = self.item_dense_features + self.item_sparse_features + self.item_sequence_features
|
|
201
185
|
item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
|
|
202
|
-
|
|
186
|
+
|
|
203
187
|
if self.item_dnn is not None:
|
|
204
188
|
item_emb = self.item_dnn(item_emb)
|
|
205
|
-
|
|
189
|
+
|
|
206
190
|
# L2 normalization
|
|
207
191
|
item_emb = F.normalize(item_emb, p=2, dim=1)
|
|
208
|
-
|
|
192
|
+
|
|
209
193
|
return item_emb
|
|
210
|
-
|
|
211
|
-
def compute_similarity(
|
|
212
|
-
self, user_emb: torch.Tensor, item_emb: torch.Tensor
|
|
213
|
-
) -> torch.Tensor:
|
|
194
|
+
|
|
195
|
+
def compute_similarity(self, user_emb: torch.Tensor, item_emb: torch.Tensor) -> torch.Tensor:
|
|
214
196
|
item_emb_expanded = item_emb.unsqueeze(1)
|
|
215
|
-
|
|
216
|
-
if self.similarity_metric ==
|
|
197
|
+
|
|
198
|
+
if self.similarity_metric == 'dot':
|
|
217
199
|
similarities = torch.sum(user_emb * item_emb_expanded, dim=-1)
|
|
218
|
-
elif self.similarity_metric ==
|
|
200
|
+
elif self.similarity_metric == 'cosine':
|
|
219
201
|
similarities = F.cosine_similarity(user_emb, item_emb_expanded, dim=-1)
|
|
220
|
-
elif self.similarity_metric ==
|
|
202
|
+
elif self.similarity_metric == 'euclidean':
|
|
221
203
|
similarities = -torch.sum((user_emb - item_emb_expanded) ** 2, dim=-1)
|
|
222
204
|
else:
|
|
223
205
|
raise ValueError(f"Unknown similarity metric: {self.similarity_metric}")
|
|
224
206
|
|
|
225
207
|
max_similarity, _ = torch.max(similarities, dim=1) # [batch_size]
|
|
226
208
|
max_similarity = max_similarity / self.temperature
|
|
227
|
-
|
|
209
|
+
|
|
228
210
|
return max_similarity
|
nextrec/models/match/sdm.py
CHANGED
|
@@ -6,7 +6,6 @@ Reference:
|
|
|
6
6
|
[1] Ying H, Zhuang F, Zhang F, et al. Sequential recommender system based on hierarchical attention networks[C]
|
|
7
7
|
//IJCAI. 2018: 3926-3932.
|
|
8
8
|
"""
|
|
9
|
-
|
|
10
9
|
import torch
|
|
11
10
|
import torch.nn as nn
|
|
12
11
|
import torch.nn.functional as F
|
|
@@ -21,42 +20,40 @@ class SDM(BaseMatchModel):
|
|
|
21
20
|
@property
|
|
22
21
|
def model_name(self) -> str:
|
|
23
22
|
return "SDM"
|
|
24
|
-
|
|
23
|
+
|
|
25
24
|
@property
|
|
26
25
|
def support_training_modes(self) -> list[str]:
|
|
27
|
-
return [
|
|
28
|
-
|
|
29
|
-
def __init__(
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
):
|
|
59
|
-
|
|
26
|
+
return ['pointwise']
|
|
27
|
+
|
|
28
|
+
def __init__(self,
|
|
29
|
+
user_dense_features: list[DenseFeature] | None = None,
|
|
30
|
+
user_sparse_features: list[SparseFeature] | None = None,
|
|
31
|
+
user_sequence_features: list[SequenceFeature] | None = None,
|
|
32
|
+
item_dense_features: list[DenseFeature] | None = None,
|
|
33
|
+
item_sparse_features: list[SparseFeature] | None = None,
|
|
34
|
+
item_sequence_features: list[SequenceFeature] | None = None,
|
|
35
|
+
embedding_dim: int = 64,
|
|
36
|
+
rnn_type: Literal['GRU', 'LSTM'] = 'GRU',
|
|
37
|
+
rnn_hidden_size: int = 64,
|
|
38
|
+
rnn_num_layers: int = 1,
|
|
39
|
+
rnn_dropout: float = 0.0,
|
|
40
|
+
use_short_term: bool = True,
|
|
41
|
+
use_long_term: bool = True,
|
|
42
|
+
item_dnn_hidden_units: list[int] = [256, 128],
|
|
43
|
+
dnn_activation: str = 'relu',
|
|
44
|
+
dnn_dropout: float = 0.0,
|
|
45
|
+
training_mode: Literal['pointwise', 'pairwise', 'listwise'] = 'pointwise',
|
|
46
|
+
num_negative_samples: int = 4,
|
|
47
|
+
temperature: float = 1.0,
|
|
48
|
+
similarity_metric: Literal['dot', 'cosine', 'euclidean'] = 'dot',
|
|
49
|
+
device: str = 'cpu',
|
|
50
|
+
embedding_l1_reg: float = 0.0,
|
|
51
|
+
dense_l1_reg: float = 0.0,
|
|
52
|
+
embedding_l2_reg: float = 0.0,
|
|
53
|
+
dense_l2_reg: float = 0.0,
|
|
54
|
+
early_stop_patience: int = 20,
|
|
55
|
+
model_id: str = 'sdm'):
|
|
56
|
+
|
|
60
57
|
super(SDM, self).__init__(
|
|
61
58
|
user_dense_features=user_dense_features,
|
|
62
59
|
user_sparse_features=user_sparse_features,
|
|
@@ -74,16 +71,16 @@ class SDM(BaseMatchModel):
|
|
|
74
71
|
embedding_l2_reg=embedding_l2_reg,
|
|
75
72
|
dense_l2_reg=dense_l2_reg,
|
|
76
73
|
early_stop_patience=early_stop_patience,
|
|
77
|
-
model_id=model_id
|
|
74
|
+
model_id=model_id
|
|
78
75
|
)
|
|
79
|
-
|
|
76
|
+
|
|
80
77
|
self.embedding_dim = embedding_dim
|
|
81
78
|
self.rnn_type = rnn_type
|
|
82
79
|
self.rnn_hidden_size = rnn_hidden_size
|
|
83
80
|
self.use_short_term = use_short_term
|
|
84
81
|
self.use_long_term = use_long_term
|
|
85
82
|
self.item_dnn_hidden_units = item_dnn_hidden_units
|
|
86
|
-
|
|
83
|
+
|
|
87
84
|
# User tower
|
|
88
85
|
user_features = []
|
|
89
86
|
if user_dense_features:
|
|
@@ -92,54 +89,54 @@ class SDM(BaseMatchModel):
|
|
|
92
89
|
user_features.extend(user_sparse_features)
|
|
93
90
|
if user_sequence_features:
|
|
94
91
|
user_features.extend(user_sequence_features)
|
|
95
|
-
|
|
92
|
+
|
|
96
93
|
if len(user_features) > 0:
|
|
97
94
|
self.user_embedding = EmbeddingLayer(user_features)
|
|
98
|
-
|
|
95
|
+
|
|
99
96
|
if not user_sequence_features or len(user_sequence_features) == 0:
|
|
100
97
|
raise ValueError("SDM requires at least one user sequence feature")
|
|
101
|
-
|
|
98
|
+
|
|
102
99
|
seq_emb_dim = user_sequence_features[0].embedding_dim
|
|
103
|
-
|
|
104
|
-
if rnn_type ==
|
|
100
|
+
|
|
101
|
+
if rnn_type == 'GRU':
|
|
105
102
|
self.rnn = nn.GRU(
|
|
106
103
|
input_size=seq_emb_dim,
|
|
107
104
|
hidden_size=rnn_hidden_size,
|
|
108
105
|
num_layers=rnn_num_layers,
|
|
109
106
|
batch_first=True,
|
|
110
|
-
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0
|
|
107
|
+
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0
|
|
111
108
|
)
|
|
112
|
-
elif rnn_type ==
|
|
109
|
+
elif rnn_type == 'LSTM':
|
|
113
110
|
self.rnn = nn.LSTM(
|
|
114
111
|
input_size=seq_emb_dim,
|
|
115
112
|
hidden_size=rnn_hidden_size,
|
|
116
113
|
num_layers=rnn_num_layers,
|
|
117
114
|
batch_first=True,
|
|
118
|
-
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0
|
|
115
|
+
dropout=rnn_dropout if rnn_num_layers > 1 else 0.0
|
|
119
116
|
)
|
|
120
117
|
else:
|
|
121
118
|
raise ValueError(f"Unknown RNN type: {rnn_type}")
|
|
122
|
-
|
|
119
|
+
|
|
123
120
|
user_final_dim = 0
|
|
124
121
|
if use_long_term:
|
|
125
|
-
user_final_dim += rnn_hidden_size
|
|
122
|
+
user_final_dim += rnn_hidden_size
|
|
126
123
|
if use_short_term:
|
|
127
|
-
user_final_dim += seq_emb_dim
|
|
128
|
-
|
|
124
|
+
user_final_dim += seq_emb_dim
|
|
125
|
+
|
|
129
126
|
for feat in user_dense_features or []:
|
|
130
127
|
user_final_dim += 1
|
|
131
128
|
for feat in user_sparse_features or []:
|
|
132
129
|
user_final_dim += feat.embedding_dim
|
|
133
|
-
|
|
130
|
+
|
|
134
131
|
# User DNN to final embedding
|
|
135
132
|
self.user_dnn = MLP(
|
|
136
133
|
input_dim=user_final_dim,
|
|
137
134
|
dims=[rnn_hidden_size * 2, embedding_dim],
|
|
138
135
|
output_layer=False,
|
|
139
136
|
dropout=dnn_dropout,
|
|
140
|
-
activation=dnn_activation
|
|
137
|
+
activation=dnn_activation
|
|
141
138
|
)
|
|
142
|
-
|
|
139
|
+
|
|
143
140
|
# Item tower
|
|
144
141
|
item_features = []
|
|
145
142
|
if item_dense_features:
|
|
@@ -148,10 +145,10 @@ class SDM(BaseMatchModel):
|
|
|
148
145
|
item_features.extend(item_sparse_features)
|
|
149
146
|
if item_sequence_features:
|
|
150
147
|
item_features.extend(item_sequence_features)
|
|
151
|
-
|
|
148
|
+
|
|
152
149
|
if len(item_features) > 0:
|
|
153
150
|
self.item_embedding = EmbeddingLayer(item_features)
|
|
154
|
-
|
|
151
|
+
|
|
155
152
|
item_input_dim = 0
|
|
156
153
|
for feat in item_dense_features or []:
|
|
157
154
|
item_input_dim += 1
|
|
@@ -159,7 +156,7 @@ class SDM(BaseMatchModel):
|
|
|
159
156
|
item_input_dim += feat.embedding_dim
|
|
160
157
|
for feat in item_sequence_features or []:
|
|
161
158
|
item_input_dim += feat.embedding_dim
|
|
162
|
-
|
|
159
|
+
|
|
163
160
|
# Item DNN
|
|
164
161
|
if len(item_dnn_hidden_units) > 0:
|
|
165
162
|
item_dnn_units = item_dnn_hidden_units + [embedding_dim]
|
|
@@ -168,58 +165,53 @@ class SDM(BaseMatchModel):
|
|
|
168
165
|
dims=item_dnn_units,
|
|
169
166
|
output_layer=False,
|
|
170
167
|
dropout=dnn_dropout,
|
|
171
|
-
activation=dnn_activation
|
|
168
|
+
activation=dnn_activation
|
|
172
169
|
)
|
|
173
170
|
else:
|
|
174
171
|
self.item_dnn = None
|
|
175
|
-
|
|
172
|
+
|
|
176
173
|
self._register_regularization_weights(
|
|
177
|
-
embedding_attr=
|
|
174
|
+
embedding_attr='user_embedding',
|
|
175
|
+
include_modules=['rnn', 'user_dnn']
|
|
178
176
|
)
|
|
179
177
|
self._register_regularization_weights(
|
|
180
|
-
embedding_attr=
|
|
181
|
-
include_modules=[
|
|
178
|
+
embedding_attr='item_embedding',
|
|
179
|
+
include_modules=['item_dnn'] if self.item_dnn else []
|
|
182
180
|
)
|
|
183
|
-
|
|
181
|
+
|
|
184
182
|
self.to(device)
|
|
185
|
-
|
|
183
|
+
|
|
186
184
|
def user_tower(self, user_input: dict) -> torch.Tensor:
|
|
187
185
|
seq_feature = self.user_sequence_features[0]
|
|
188
186
|
seq_input = user_input[seq_feature.name]
|
|
189
|
-
|
|
187
|
+
|
|
190
188
|
embed = self.user_embedding.embed_dict[seq_feature.embedding_name]
|
|
191
189
|
seq_emb = embed(seq_input.long()) # [batch_size, seq_len, seq_emb_dim]
|
|
192
|
-
|
|
193
|
-
if self.rnn_type ==
|
|
194
|
-
rnn_output, hidden = self.rnn(
|
|
195
|
-
|
|
196
|
-
) # hidden: [num_layers, batch, hidden_size]
|
|
197
|
-
elif self.rnn_type == "LSTM":
|
|
190
|
+
|
|
191
|
+
if self.rnn_type == 'GRU':
|
|
192
|
+
rnn_output, hidden = self.rnn(seq_emb) # hidden: [num_layers, batch, hidden_size]
|
|
193
|
+
elif self.rnn_type == 'LSTM':
|
|
198
194
|
rnn_output, (hidden, cell) = self.rnn(seq_emb)
|
|
199
|
-
|
|
195
|
+
|
|
200
196
|
features_list = []
|
|
201
|
-
|
|
197
|
+
|
|
202
198
|
if self.use_long_term:
|
|
203
199
|
if self.rnn.num_layers > 1:
|
|
204
200
|
long_term = hidden[-1, :, :] # [batch_size, hidden_size]
|
|
205
201
|
else:
|
|
206
202
|
long_term = hidden.squeeze(0) # [batch_size, hidden_size]
|
|
207
203
|
features_list.append(long_term)
|
|
208
|
-
|
|
204
|
+
|
|
209
205
|
if self.use_short_term:
|
|
210
|
-
mask = (
|
|
211
|
-
seq_input != seq_feature.padding_idx
|
|
212
|
-
).float() # [batch_size, seq_len]
|
|
206
|
+
mask = (seq_input != seq_feature.padding_idx).float() # [batch_size, seq_len]
|
|
213
207
|
seq_lengths = mask.sum(dim=1).long() - 1 # [batch_size]
|
|
214
208
|
seq_lengths = torch.clamp(seq_lengths, min=0)
|
|
215
|
-
|
|
209
|
+
|
|
216
210
|
batch_size = seq_emb.size(0)
|
|
217
211
|
batch_indices = torch.arange(batch_size, device=seq_emb.device)
|
|
218
|
-
short_term = seq_emb[
|
|
219
|
-
batch_indices, seq_lengths, :
|
|
220
|
-
] # [batch_size, seq_emb_dim]
|
|
212
|
+
short_term = seq_emb[batch_indices, seq_lengths, :] # [batch_size, seq_emb_dim]
|
|
221
213
|
features_list.append(short_term)
|
|
222
|
-
|
|
214
|
+
|
|
223
215
|
if self.user_dense_features:
|
|
224
216
|
dense_features = []
|
|
225
217
|
for feat in self.user_dense_features:
|
|
@@ -230,7 +222,7 @@ class SDM(BaseMatchModel):
|
|
|
230
222
|
dense_features.append(val)
|
|
231
223
|
if dense_features:
|
|
232
224
|
features_list.append(torch.cat(dense_features, dim=1))
|
|
233
|
-
|
|
225
|
+
|
|
234
226
|
if self.user_sparse_features:
|
|
235
227
|
sparse_features = []
|
|
236
228
|
for feat in self.user_sparse_features:
|
|
@@ -240,26 +232,22 @@ class SDM(BaseMatchModel):
|
|
|
240
232
|
sparse_features.append(sparse_emb)
|
|
241
233
|
if sparse_features:
|
|
242
234
|
features_list.append(torch.cat(sparse_features, dim=1))
|
|
243
|
-
|
|
235
|
+
|
|
244
236
|
user_features = torch.cat(features_list, dim=1)
|
|
245
237
|
user_emb = self.user_dnn(user_features)
|
|
246
238
|
user_emb = F.normalize(user_emb, p=2, dim=1)
|
|
247
|
-
|
|
239
|
+
|
|
248
240
|
return user_emb
|
|
249
|
-
|
|
241
|
+
|
|
250
242
|
def item_tower(self, item_input: dict) -> torch.Tensor:
|
|
251
243
|
"""Item tower"""
|
|
252
|
-
all_item_features =
|
|
253
|
-
self.item_dense_features
|
|
254
|
-
+ self.item_sparse_features
|
|
255
|
-
+ self.item_sequence_features
|
|
256
|
-
)
|
|
244
|
+
all_item_features = self.item_dense_features + self.item_sparse_features + self.item_sequence_features
|
|
257
245
|
item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
|
|
258
|
-
|
|
246
|
+
|
|
259
247
|
if self.item_dnn is not None:
|
|
260
248
|
item_emb = self.item_dnn(item_emb)
|
|
261
|
-
|
|
249
|
+
|
|
262
250
|
# L2 normalization
|
|
263
251
|
item_emb = F.normalize(item_emb, p=2, dim=1)
|
|
264
|
-
|
|
252
|
+
|
|
265
253
|
return item_emb
|