nextrec 0.2.7__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +4 -8
- nextrec/basic/callback.py +1 -1
- nextrec/basic/features.py +33 -25
- nextrec/basic/layers.py +164 -601
- nextrec/basic/loggers.py +3 -4
- nextrec/basic/metrics.py +39 -115
- nextrec/basic/model.py +248 -174
- nextrec/basic/session.py +1 -5
- nextrec/data/__init__.py +12 -0
- nextrec/data/data_utils.py +3 -27
- nextrec/data/dataloader.py +26 -34
- nextrec/data/preprocessor.py +2 -1
- nextrec/loss/listwise.py +6 -4
- nextrec/loss/loss_utils.py +10 -6
- nextrec/loss/pairwise.py +5 -3
- nextrec/loss/pointwise.py +7 -13
- nextrec/models/match/mind.py +110 -1
- nextrec/models/multi_task/esmm.py +46 -27
- nextrec/models/multi_task/mmoe.py +48 -30
- nextrec/models/multi_task/ple.py +156 -141
- nextrec/models/multi_task/poso.py +413 -0
- nextrec/models/multi_task/share_bottom.py +43 -26
- nextrec/models/ranking/__init__.py +2 -0
- nextrec/models/ranking/dcn.py +20 -1
- nextrec/models/ranking/dcn_v2.py +84 -0
- nextrec/models/ranking/deepfm.py +44 -18
- nextrec/models/ranking/dien.py +130 -27
- nextrec/models/ranking/masknet.py +13 -67
- nextrec/models/ranking/widedeep.py +39 -18
- nextrec/models/ranking/xdeepfm.py +34 -1
- nextrec/utils/common.py +26 -1
- nextrec-0.3.1.dist-info/METADATA +306 -0
- nextrec-0.3.1.dist-info/RECORD +56 -0
- nextrec-0.2.7.dist-info/METADATA +0 -281
- nextrec-0.2.7.dist-info/RECORD +0 -54
- {nextrec-0.2.7.dist-info → nextrec-0.3.1.dist-info}/WHEEL +0 -0
- {nextrec-0.2.7.dist-info → nextrec-0.3.1.dist-info}/licenses/LICENSE +0 -0
nextrec/models/ranking/deepfm.py
CHANGED
|
@@ -1,9 +1,46 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 27/10/2025
|
|
3
|
+
Checkpoint: edit on 24/11/2025
|
|
3
4
|
Author:
|
|
4
5
|
Yang Zhou,zyaztec@gmail.com
|
|
5
6
|
Reference:
|
|
6
|
-
[1] Guo H, Tang R, Ye Y, et al.
|
|
7
|
+
[1] Guo H, Tang R, Ye Y, et al. DeepFM: A factorization-machine based neural network
|
|
8
|
+
for CTR prediction[J]. arXiv preprint arXiv:1703.04247, 2017.
|
|
9
|
+
(https://arxiv.org/abs/1703.04247)
|
|
10
|
+
|
|
11
|
+
DeepFM combines a Factorization Machine (FM) for explicit second-order feature
|
|
12
|
+
interactions with a deep MLP for high-order nonlinear patterns. Both parts share
|
|
13
|
+
the same embeddings, avoiding manual feature engineering and delivering strong CTR
|
|
14
|
+
performance with end-to-end training.
|
|
15
|
+
|
|
16
|
+
Workflow:
|
|
17
|
+
(1) Shared embeddings encode sparse/sequence fields; dense features are concatenated
|
|
18
|
+
(2) Wide (LR) term models first-order signals
|
|
19
|
+
(3) FM term captures pairwise interactions via inner products
|
|
20
|
+
(4) Deep MLP learns higher-order interactions over concatenated embeddings
|
|
21
|
+
(5) Outputs from wide, FM, and deep parts are summed before the final prediction
|
|
22
|
+
|
|
23
|
+
Key Advantages:
|
|
24
|
+
- Joint explicit (FM) and implicit (MLP) interaction modeling
|
|
25
|
+
- Shared embeddings remove the need for manual cross features
|
|
26
|
+
- Simple to train end-to-end with minimal feature engineering
|
|
27
|
+
- Strong baseline for CTR/CVR style ranking tasks
|
|
28
|
+
|
|
29
|
+
DeepFM 将 FM 的显式二阶特征交互与 MLP 的高阶非线性交互结合,三部分共享
|
|
30
|
+
embedding,无需手工构造交叉特征即可端到端训练,常用于 CTR/CVR 预估。
|
|
31
|
+
|
|
32
|
+
流程:
|
|
33
|
+
(1) 共享 embedding 处理稀疏/序列特征,稠密特征拼接
|
|
34
|
+
(2) Wide(LR)建模一阶信号
|
|
35
|
+
(3) FM 建模二阶交互
|
|
36
|
+
(4) MLP 学习高阶非线性交互
|
|
37
|
+
(5) Wide + FM + Deep 求和后进入预测
|
|
38
|
+
|
|
39
|
+
主要优点:
|
|
40
|
+
- 显式与隐式交互联合建模
|
|
41
|
+
- 共享 embedding,减少人工交叉特征
|
|
42
|
+
- 端到端训练简单,易于落地
|
|
43
|
+
- CTR/CVR 任务的常用强基线
|
|
7
44
|
"""
|
|
8
45
|
|
|
9
46
|
import torch
|
|
@@ -59,30 +96,19 @@ class DeepFM(BaseModel):
|
|
|
59
96
|
|
|
60
97
|
self.fm_features = sparse_features + sequence_features
|
|
61
98
|
self.deep_features = dense_features + sparse_features + sequence_features
|
|
62
|
-
|
|
63
99
|
self.embedding = EmbeddingLayer(features=self.deep_features)
|
|
64
|
-
|
|
65
100
|
fm_emb_dim_total = sum([f.embedding_dim for f in self.fm_features])
|
|
66
|
-
deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
|
|
67
|
-
dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
|
|
68
|
-
|
|
101
|
+
# deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
|
|
102
|
+
# dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
|
|
103
|
+
mlp_input_dim = self.embedding.input_dim
|
|
69
104
|
self.linear = LR(fm_emb_dim_total)
|
|
70
105
|
self.fm = FM(reduce_sum=True)
|
|
71
|
-
self.mlp = MLP(input_dim=
|
|
106
|
+
self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
|
|
72
107
|
self.prediction_layer = PredictionLayer(task_type=self.task_type)
|
|
73
108
|
|
|
74
109
|
# Register regularization weights
|
|
75
|
-
self._register_regularization_weights(
|
|
76
|
-
|
|
77
|
-
include_modules=['linear', 'mlp']
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
self.compile(
|
|
81
|
-
optimizer=optimizer,
|
|
82
|
-
optimizer_params=optimizer_params,
|
|
83
|
-
loss=loss,
|
|
84
|
-
loss_params=loss_params,
|
|
85
|
-
)
|
|
110
|
+
self._register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
|
|
111
|
+
self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
|
|
86
112
|
|
|
87
113
|
def forward(self, x):
|
|
88
114
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|
nextrec/models/ranking/dien.py
CHANGED
|
@@ -10,11 +10,135 @@ Reference:
|
|
|
10
10
|
|
|
11
11
|
import torch
|
|
12
12
|
import torch.nn as nn
|
|
13
|
+
import torch.nn.functional as F
|
|
13
14
|
|
|
14
15
|
from nextrec.basic.model import BaseModel
|
|
15
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, AttentionPoolingLayer,
|
|
16
|
+
from nextrec.basic.layers import EmbeddingLayer, MLP, AttentionPoolingLayer, PredictionLayer
|
|
16
17
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
17
18
|
|
|
19
|
+
class AUGRU(nn.Module):
|
|
20
|
+
"""Attention-aware GRU update gate used in DIEN (Zhou et al., 2019)."""
|
|
21
|
+
"""
|
|
22
|
+
Attention-based GRU for DIEN
|
|
23
|
+
Uses attention scores to weight the update of hidden states
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, input_size, hidden_size, bias=True):
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.input_size = input_size
|
|
29
|
+
self.hidden_size = hidden_size
|
|
30
|
+
|
|
31
|
+
self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
|
|
32
|
+
self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
|
|
33
|
+
if bias:
|
|
34
|
+
self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
|
|
35
|
+
self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
|
|
36
|
+
else:
|
|
37
|
+
self.register_parameter('bias_ih', None)
|
|
38
|
+
self.register_parameter('bias_hh', None)
|
|
39
|
+
|
|
40
|
+
self.reset_parameters()
|
|
41
|
+
|
|
42
|
+
def reset_parameters(self):
|
|
43
|
+
std = 1.0 / (self.hidden_size) ** 0.5
|
|
44
|
+
for weight in self.parameters():
|
|
45
|
+
weight.data.uniform_(-std, std)
|
|
46
|
+
|
|
47
|
+
def forward(self, x, att_scores):
|
|
48
|
+
"""
|
|
49
|
+
Args:
|
|
50
|
+
x: [batch_size, seq_len, input_size]
|
|
51
|
+
att_scores: [batch_size, seq_len, 1] - attention scores
|
|
52
|
+
Returns:
|
|
53
|
+
output: [batch_size, seq_len, hidden_size]
|
|
54
|
+
hidden: [batch_size, hidden_size] - final hidden state
|
|
55
|
+
"""
|
|
56
|
+
batch_size, seq_len, _ = x.shape
|
|
57
|
+
h = torch.zeros(batch_size, self.hidden_size, device=x.device)
|
|
58
|
+
outputs = []
|
|
59
|
+
for t in range(seq_len):
|
|
60
|
+
x_t = x[:, t, :] # [batch_size, input_size]
|
|
61
|
+
att_t = att_scores[:, t, :] # [batch_size, 1]
|
|
62
|
+
|
|
63
|
+
gi = F.linear(x_t, self.weight_ih, self.bias_ih)
|
|
64
|
+
gh = F.linear(h, self.weight_hh, self.bias_hh)
|
|
65
|
+
i_r, i_i, i_n = gi.chunk(3, 1)
|
|
66
|
+
h_r, h_i, h_n = gh.chunk(3, 1)
|
|
67
|
+
|
|
68
|
+
resetgate = torch.sigmoid(i_r + h_r)
|
|
69
|
+
inputgate = torch.sigmoid(i_i + h_i)
|
|
70
|
+
newgate = torch.tanh(i_n + resetgate * h_n)
|
|
71
|
+
# Use attention score to control update
|
|
72
|
+
h = (1 - att_t) * h + att_t * newgate
|
|
73
|
+
outputs.append(h.unsqueeze(1))
|
|
74
|
+
output = torch.cat(outputs, dim=1)
|
|
75
|
+
|
|
76
|
+
return output, h
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DynamicGRU(nn.Module):
|
|
80
|
+
"""Dynamic GRU unit with auxiliary loss path from DIEN (Zhou et al., 2019)."""
|
|
81
|
+
"""
|
|
82
|
+
GRU with dynamic routing for DIEN
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(self, input_size, hidden_size, bias=True):
|
|
86
|
+
super().__init__()
|
|
87
|
+
self.input_size = input_size
|
|
88
|
+
self.hidden_size = hidden_size
|
|
89
|
+
|
|
90
|
+
# GRU parameters
|
|
91
|
+
self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
|
|
92
|
+
self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
|
|
93
|
+
if bias:
|
|
94
|
+
self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
|
|
95
|
+
self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
|
|
96
|
+
else:
|
|
97
|
+
self.register_parameter('bias_ih', None)
|
|
98
|
+
self.register_parameter('bias_hh', None)
|
|
99
|
+
|
|
100
|
+
self.reset_parameters()
|
|
101
|
+
|
|
102
|
+
def reset_parameters(self):
|
|
103
|
+
std = 1.0 / (self.hidden_size) ** 0.5
|
|
104
|
+
for weight in self.parameters():
|
|
105
|
+
weight.data.uniform_(-std, std)
|
|
106
|
+
|
|
107
|
+
def forward(self, x, att_scores=None):
|
|
108
|
+
"""
|
|
109
|
+
Args:
|
|
110
|
+
x: [batch_size, seq_len, input_size]
|
|
111
|
+
att_scores: [batch_size, seq_len] - attention scores for auxiliary loss
|
|
112
|
+
Returns:
|
|
113
|
+
output: [batch_size, seq_len, hidden_size]
|
|
114
|
+
hidden: [batch_size, hidden_size] - final hidden state
|
|
115
|
+
"""
|
|
116
|
+
batch_size, seq_len, _ = x.shape
|
|
117
|
+
|
|
118
|
+
# Initialize hidden state
|
|
119
|
+
h = torch.zeros(batch_size, self.hidden_size, device=x.device)
|
|
120
|
+
|
|
121
|
+
outputs = []
|
|
122
|
+
for t in range(seq_len):
|
|
123
|
+
x_t = x[:, t, :] # [batch_size, input_size]
|
|
124
|
+
|
|
125
|
+
# GRU computation
|
|
126
|
+
gi = F.linear(x_t, self.weight_ih, self.bias_ih)
|
|
127
|
+
gh = F.linear(h, self.weight_hh, self.bias_hh)
|
|
128
|
+
i_r, i_i, i_n = gi.chunk(3, 1)
|
|
129
|
+
h_r, h_i, h_n = gh.chunk(3, 1)
|
|
130
|
+
|
|
131
|
+
resetgate = torch.sigmoid(i_r + h_r)
|
|
132
|
+
inputgate = torch.sigmoid(i_i + h_i)
|
|
133
|
+
newgate = torch.tanh(i_n + resetgate * h_n)
|
|
134
|
+
h = newgate + inputgate * (h - newgate)
|
|
135
|
+
|
|
136
|
+
outputs.append(h.unsqueeze(1))
|
|
137
|
+
|
|
138
|
+
output = torch.cat(outputs, dim=1) # [batch_size, seq_len, hidden_size]
|
|
139
|
+
|
|
140
|
+
return output, h
|
|
141
|
+
|
|
18
142
|
|
|
19
143
|
class DIEN(BaseModel):
|
|
20
144
|
@property
|
|
@@ -76,9 +200,6 @@ class DIEN(BaseModel):
|
|
|
76
200
|
|
|
77
201
|
self.other_sparse_features = sparse_features[:-1] if self.candidate_feature else sparse_features
|
|
78
202
|
self.dense_features_list = dense_features
|
|
79
|
-
|
|
80
|
-
# All features for embedding
|
|
81
|
-
self.all_features = dense_features + sparse_features + sequence_features
|
|
82
203
|
|
|
83
204
|
# Embedding layer
|
|
84
205
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
@@ -103,10 +224,7 @@ class DIEN(BaseModel):
|
|
|
103
224
|
)
|
|
104
225
|
|
|
105
226
|
# Interest Evolution Layer (AUGRU)
|
|
106
|
-
self.interest_evolution = AUGRU(
|
|
107
|
-
input_size=gru_hidden_size,
|
|
108
|
-
hidden_size=gru_hidden_size
|
|
109
|
-
)
|
|
227
|
+
self.interest_evolution = AUGRU(input_size=gru_hidden_size, hidden_size=gru_hidden_size)
|
|
110
228
|
|
|
111
229
|
# Calculate MLP input dimension
|
|
112
230
|
mlp_input_dim = 0
|
|
@@ -115,38 +233,23 @@ class DIEN(BaseModel):
|
|
|
115
233
|
mlp_input_dim += gru_hidden_size # final interest state
|
|
116
234
|
mlp_input_dim += sum([f.embedding_dim for f in self.other_sparse_features])
|
|
117
235
|
mlp_input_dim += sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
|
|
118
|
-
|
|
119
236
|
# MLP for final prediction
|
|
120
237
|
self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
|
|
121
238
|
self.prediction_layer = PredictionLayer(task_type=self.task_type)
|
|
122
|
-
|
|
123
239
|
# Register regularization weights
|
|
124
|
-
self._register_regularization_weights(
|
|
125
|
-
|
|
126
|
-
include_modules=['interest_extractor', 'interest_evolution', 'attention_layer', 'mlp', 'candidate_proj']
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
self.compile(
|
|
130
|
-
optimizer=optimizer,
|
|
131
|
-
optimizer_params=optimizer_params,
|
|
132
|
-
loss=loss,
|
|
133
|
-
loss_params=loss_params,
|
|
134
|
-
)
|
|
240
|
+
self._register_regularization_weights(embedding_attr='embedding', include_modules=['interest_extractor', 'interest_evolution', 'attention_layer', 'mlp', 'candidate_proj'])
|
|
241
|
+
self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
|
|
135
242
|
|
|
136
243
|
def forward(self, x):
|
|
137
244
|
# Get candidate item embedding
|
|
138
245
|
if self.candidate_feature:
|
|
139
|
-
candidate_emb = self.embedding.embed_dict[self.candidate_feature.embedding_name](
|
|
140
|
-
x[self.candidate_feature.name].long()
|
|
141
|
-
) # [B, emb_dim]
|
|
246
|
+
candidate_emb = self.embedding.embed_dict[self.candidate_feature.embedding_name](x[self.candidate_feature.name].long()) # [B, emb_dim]
|
|
142
247
|
else:
|
|
143
248
|
raise ValueError("DIEN requires a candidate item feature")
|
|
144
249
|
|
|
145
250
|
# Get behavior sequence embedding
|
|
146
251
|
behavior_seq = x[self.behavior_feature.name].long() # [B, seq_len]
|
|
147
|
-
behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](
|
|
148
|
-
behavior_seq
|
|
149
|
-
) # [B, seq_len, emb_dim]
|
|
252
|
+
behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](behavior_seq) # [B, seq_len, emb_dim]
|
|
150
253
|
|
|
151
254
|
# Create mask for padding
|
|
152
255
|
if self.behavior_feature.padding_idx is not None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 09/11/2025
|
|
3
|
-
Checkpoint: edit on
|
|
3
|
+
Checkpoint: edit on 29/11/2025
|
|
4
4
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
6
|
[1] Wang Z, She Q, Zhang J. MaskNet: Introducing Feature-Wise
|
|
@@ -144,7 +144,8 @@ class MaskNet(BaseModel):
|
|
|
144
144
|
|
|
145
145
|
@property
|
|
146
146
|
def task_type(self):
|
|
147
|
-
|
|
147
|
+
# Align with PredictionLayer supported task types
|
|
148
|
+
return "binary"
|
|
148
149
|
|
|
149
150
|
def __init__(
|
|
150
151
|
self,
|
|
@@ -198,26 +199,19 @@ class MaskNet(BaseModel):
|
|
|
198
199
|
self.dense_features = dense_features
|
|
199
200
|
self.sparse_features = sparse_features
|
|
200
201
|
self.sequence_features = sequence_features
|
|
201
|
-
self.mask_features = self.
|
|
202
|
+
self.mask_features = self.all_features # use all features for masking
|
|
202
203
|
assert len(self.mask_features) > 0, "MaskNet requires at least one feature for masking."
|
|
203
|
-
|
|
204
204
|
self.embedding = EmbeddingLayer(features=self.mask_features)
|
|
205
|
-
|
|
206
205
|
self.num_fields = len(self.mask_features)
|
|
207
|
-
|
|
208
206
|
self.embedding_dim = getattr(self.mask_features[0], "embedding_dim", None)
|
|
209
207
|
assert self.embedding_dim is not None, "MaskNet requires mask_features to have 'embedding_dim' defined."
|
|
210
208
|
|
|
211
209
|
for f in self.mask_features:
|
|
212
210
|
edim = getattr(f, "embedding_dim", None)
|
|
213
211
|
if edim is None or edim != self.embedding_dim:
|
|
214
|
-
raise ValueError(
|
|
215
|
-
f"MaskNet expects identical embedding_dim across all mask_features, "
|
|
216
|
-
f"but got {edim} for feature {getattr(f, 'name', type(f))}."
|
|
217
|
-
)
|
|
212
|
+
raise ValueError(f"MaskNet expects identical embedding_dim across all mask_features, but got {edim} for feature {getattr(f, 'name', type(f))}.")
|
|
218
213
|
|
|
219
214
|
self.v_emb_dim = self.num_fields * self.embedding_dim
|
|
220
|
-
|
|
221
215
|
self.model_type = model_type.lower()
|
|
222
216
|
assert self.model_type in ("serial", "parallel"), "model_type must be either 'serial' or 'parallel'."
|
|
223
217
|
|
|
@@ -226,68 +220,25 @@ class MaskNet(BaseModel):
|
|
|
226
220
|
self.block_dropout = nn.Dropout(block_dropout) if block_dropout > 0 else nn.Identity()
|
|
227
221
|
|
|
228
222
|
if self.model_type == "serial":
|
|
229
|
-
self.first_block = MaskBlockOnEmbedding(
|
|
230
|
-
num_fields=self.num_fields,
|
|
231
|
-
embedding_dim=self.embedding_dim,
|
|
232
|
-
mask_hidden_dim=mask_hidden_dim,
|
|
233
|
-
hidden_dim=block_hidden_dim,
|
|
234
|
-
)
|
|
235
|
-
|
|
223
|
+
self.first_block = MaskBlockOnEmbedding(num_fields=self.num_fields, embedding_dim=self.embedding_dim, mask_hidden_dim=mask_hidden_dim, hidden_dim=block_hidden_dim,)
|
|
236
224
|
self.hidden_blocks = nn.ModuleList(
|
|
237
|
-
[
|
|
238
|
-
MaskBlockOnHidden(
|
|
239
|
-
num_fields=self.num_fields,
|
|
240
|
-
embedding_dim=self.embedding_dim,
|
|
241
|
-
mask_hidden_dim=mask_hidden_dim,
|
|
242
|
-
hidden_dim=block_hidden_dim,
|
|
243
|
-
)
|
|
244
|
-
for _ in range(self.num_blocks - 1)
|
|
245
|
-
]
|
|
246
|
-
)
|
|
247
|
-
|
|
225
|
+
[MaskBlockOnHidden(num_fields=self.num_fields, embedding_dim=self.embedding_dim, mask_hidden_dim=mask_hidden_dim, hidden_dim=block_hidden_dim) for _ in range(self.num_blocks - 1)])
|
|
248
226
|
self.mask_blocks = nn.ModuleList([self.first_block, *self.hidden_blocks])
|
|
249
227
|
self.output_layer = nn.Linear(block_hidden_dim, 1)
|
|
250
228
|
self.final_mlp = None
|
|
251
229
|
|
|
252
230
|
else: # parallel
|
|
253
|
-
self.mask_blocks = nn.ModuleList(
|
|
254
|
-
|
|
255
|
-
MaskBlockOnEmbedding(
|
|
256
|
-
num_fields=self.num_fields,
|
|
257
|
-
embedding_dim=self.embedding_dim,
|
|
258
|
-
mask_hidden_dim=mask_hidden_dim,
|
|
259
|
-
hidden_dim=block_hidden_dim,
|
|
260
|
-
)
|
|
261
|
-
for _ in range(self.num_blocks)
|
|
262
|
-
]
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
self.final_mlp = MLP(
|
|
266
|
-
input_dim=self.num_blocks * block_hidden_dim,
|
|
267
|
-
**mlp_params,
|
|
268
|
-
)
|
|
231
|
+
self.mask_blocks = nn.ModuleList([MaskBlockOnEmbedding(num_fields=self.num_fields, embedding_dim=self.embedding_dim, mask_hidden_dim=mask_hidden_dim, hidden_dim=block_hidden_dim) for _ in range(self.num_blocks)])
|
|
232
|
+
self.final_mlp = MLP(input_dim=self.num_blocks * block_hidden_dim, **mlp_params)
|
|
269
233
|
self.output_layer = None
|
|
270
|
-
|
|
271
234
|
self.prediction_layer = PredictionLayer(task_type=self.task_type)
|
|
272
235
|
|
|
273
236
|
if self.model_type == "serial":
|
|
274
|
-
self._register_regularization_weights(
|
|
275
|
-
embedding_attr="embedding",
|
|
276
|
-
include_modules=["mask_blocks", "output_layer"],
|
|
277
|
-
)
|
|
237
|
+
self._register_regularization_weights(embedding_attr="embedding", include_modules=["mask_blocks", "output_layer"],)
|
|
278
238
|
# serial
|
|
279
239
|
else:
|
|
280
|
-
self._register_regularization_weights(
|
|
281
|
-
|
|
282
|
-
include_modules=["mask_blocks", "final_mlp"],
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
self.compile(
|
|
286
|
-
optimizer=optimizer,
|
|
287
|
-
optimizer_params=optimizer_params,
|
|
288
|
-
loss=loss,
|
|
289
|
-
loss_params=loss_params,
|
|
290
|
-
)
|
|
240
|
+
self._register_regularization_weights(embedding_attr="embedding", include_modules=["mask_blocks", "final_mlp"])
|
|
241
|
+
self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
|
|
291
242
|
|
|
292
243
|
def forward(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
|
|
293
244
|
field_emb = self.embedding(x=x, features=self.mask_features, squeeze_dim=False)
|
|
@@ -300,20 +251,15 @@ class MaskNet(BaseModel):
|
|
|
300
251
|
h = block(field_emb, v_emb_flat) # [B, block_hidden_dim]
|
|
301
252
|
h = self.block_dropout(h)
|
|
302
253
|
block_outputs.append(h)
|
|
303
|
-
|
|
304
254
|
concat_hidden = torch.cat(block_outputs, dim=-1)
|
|
305
255
|
logit = self.final_mlp(concat_hidden) # [B, 1]
|
|
306
|
-
|
|
307
256
|
# serial
|
|
308
257
|
else:
|
|
309
258
|
hidden = self.first_block(field_emb, v_emb_flat)
|
|
310
259
|
hidden = self.block_dropout(hidden)
|
|
311
|
-
|
|
312
260
|
for block in self.hidden_blocks:
|
|
313
261
|
hidden = block(hidden, v_emb_flat)
|
|
314
262
|
hidden = self.block_dropout(hidden)
|
|
315
|
-
|
|
316
263
|
logit = self.output_layer(hidden) # [B, 1]
|
|
317
|
-
|
|
318
264
|
y = self.prediction_layer(logit)
|
|
319
|
-
return y
|
|
265
|
+
return y
|
|
@@ -1,11 +1,42 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 09/11/2025
|
|
3
|
+
Checkpoint: edit on 24/11/2025
|
|
3
4
|
Author:
|
|
4
5
|
Yang Zhou,zyaztec@gmail.com
|
|
5
6
|
Reference:
|
|
6
|
-
[1] Cheng H T, Koc L, Harmsen J, et al. Wide &
|
|
7
|
-
//Proceedings of the 1st
|
|
7
|
+
[1] Cheng H T, Koc L, Harmsen J, et al. Wide & Deep learning for recommender systems[C]
|
|
8
|
+
//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. 2016: 7-10.
|
|
8
9
|
(https://arxiv.org/abs/1606.07792)
|
|
10
|
+
|
|
11
|
+
Wide & Deep blends a linear wide component (memorization of cross features) with a
|
|
12
|
+
deep neural network (generalization) sharing the same feature space. The wide part
|
|
13
|
+
captures co-occurrence patterns and manual crosses, while the deep part learns dense
|
|
14
|
+
representations and nonlinear interactions, improving both accuracy and coverage.
|
|
15
|
+
|
|
16
|
+
Workflow:
|
|
17
|
+
(1) Wide: linear/logistic model over raw or embedded features
|
|
18
|
+
(2) Deep: embeddings plus dense features feed into an MLP
|
|
19
|
+
(3) Sum wide and deep logits, then apply the final prediction layer
|
|
20
|
+
|
|
21
|
+
Key Advantages:
|
|
22
|
+
- Balances memorization (wide) and generalization (deep)
|
|
23
|
+
- Compatible with manual crosses and automatically learned embeddings
|
|
24
|
+
- Simple architecture with strong baselines for CTR/ranking
|
|
25
|
+
- Shared feature space reduces duplication and engineering overhead
|
|
26
|
+
|
|
27
|
+
Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网络部分(泛化非线性交互),
|
|
28
|
+
共享特征表示,既保留记忆能力又具备泛化能力,常用于 CTR/排序任务。
|
|
29
|
+
|
|
30
|
+
流程:
|
|
31
|
+
(1) Wide:线性/逻辑回归建模原始或 embedding 后的特征
|
|
32
|
+
(2) Deep:embedding 与稠密特征输入 MLP
|
|
33
|
+
(3) 宽深输出求和后进入最终预测
|
|
34
|
+
|
|
35
|
+
主要优点:
|
|
36
|
+
- 兼顾记忆与泛化
|
|
37
|
+
- 支持手工交叉与自动 embedding 联合
|
|
38
|
+
- 结构简单,基线性能稳定
|
|
39
|
+
- 共享特征空间,减少工程开销
|
|
9
40
|
"""
|
|
10
41
|
|
|
11
42
|
import torch
|
|
@@ -63,7 +94,6 @@ class WideDeep(BaseModel):
|
|
|
63
94
|
|
|
64
95
|
# Wide part: use all features for linear model
|
|
65
96
|
self.wide_features = sparse_features + sequence_features
|
|
66
|
-
|
|
67
97
|
# Deep part: use all features
|
|
68
98
|
self.deep_features = dense_features + sparse_features + sequence_features
|
|
69
99
|
|
|
@@ -75,23 +105,14 @@ class WideDeep(BaseModel):
|
|
|
75
105
|
self.linear = LR(wide_dim)
|
|
76
106
|
|
|
77
107
|
# Deep part: MLP
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
108
|
+
input_dim = self.embedding.input_dim
|
|
109
|
+
# deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
|
|
110
|
+
# dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
|
|
111
|
+
self.mlp = MLP(input_dim=input_dim, **mlp_params)
|
|
81
112
|
self.prediction_layer = PredictionLayer(task_type=self.task_type)
|
|
82
|
-
|
|
83
113
|
# Register regularization weights
|
|
84
|
-
self._register_regularization_weights(
|
|
85
|
-
|
|
86
|
-
include_modules=['linear', 'mlp']
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
self.compile(
|
|
90
|
-
optimizer=optimizer,
|
|
91
|
-
optimizer_params=optimizer_params,
|
|
92
|
-
loss=loss,
|
|
93
|
-
loss_params=loss_params,
|
|
94
|
-
)
|
|
114
|
+
self._register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
|
|
115
|
+
self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
|
|
95
116
|
|
|
96
117
|
def forward(self, x):
|
|
97
118
|
# Deep part
|
|
@@ -11,12 +11,45 @@ Reference:
|
|
|
11
11
|
|
|
12
12
|
import torch
|
|
13
13
|
import torch.nn as nn
|
|
14
|
+
import torch.nn.functional as F
|
|
14
15
|
|
|
15
16
|
from nextrec.basic.model import BaseModel
|
|
16
|
-
from nextrec.basic.layers import LR, EmbeddingLayer, MLP,
|
|
17
|
+
from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
|
|
17
18
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
18
19
|
|
|
20
|
+
class CIN(nn.Module):
|
|
21
|
+
"""Compressed Interaction Network from xDeepFM (Lian et al., 2018)."""
|
|
19
22
|
|
|
23
|
+
def __init__(self, input_dim, cin_size, split_half=True):
|
|
24
|
+
super().__init__()
|
|
25
|
+
self.num_layers = len(cin_size)
|
|
26
|
+
self.split_half = split_half
|
|
27
|
+
self.conv_layers = torch.nn.ModuleList()
|
|
28
|
+
prev_dim, fc_input_dim = input_dim, 0
|
|
29
|
+
for i in range(self.num_layers):
|
|
30
|
+
cross_layer_size = cin_size[i]
|
|
31
|
+
self.conv_layers.append(torch.nn.Conv1d(input_dim * prev_dim, cross_layer_size, 1, stride=1, dilation=1, bias=True))
|
|
32
|
+
if self.split_half and i != self.num_layers - 1:
|
|
33
|
+
cross_layer_size //= 2
|
|
34
|
+
prev_dim = cross_layer_size
|
|
35
|
+
fc_input_dim += prev_dim
|
|
36
|
+
self.fc = torch.nn.Linear(fc_input_dim, 1)
|
|
37
|
+
|
|
38
|
+
def forward(self, x):
|
|
39
|
+
xs = list()
|
|
40
|
+
x0, h = x.unsqueeze(2), x
|
|
41
|
+
for i in range(self.num_layers):
|
|
42
|
+
x = x0 * h.unsqueeze(1)
|
|
43
|
+
batch_size, f0_dim, fin_dim, embed_dim = x.shape
|
|
44
|
+
x = x.view(batch_size, f0_dim * fin_dim, embed_dim)
|
|
45
|
+
x = F.relu(self.conv_layers[i](x))
|
|
46
|
+
if self.split_half and i != self.num_layers - 1:
|
|
47
|
+
x, h = torch.split(x, x.shape[1] // 2, dim=1)
|
|
48
|
+
else:
|
|
49
|
+
h = x
|
|
50
|
+
xs.append(x)
|
|
51
|
+
return self.fc(torch.sum(torch.cat(xs, dim=1), 2))
|
|
52
|
+
|
|
20
53
|
class xDeepFM(BaseModel):
|
|
21
54
|
@property
|
|
22
55
|
def model_name(self):
|
nextrec/utils/common.py
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
import platform
|
|
3
|
+
from collections import OrderedDict
|
|
4
|
+
from typing import Sequence, Union, TYPE_CHECKING
|
|
5
|
+
|
|
3
6
|
|
|
4
7
|
def resolve_device() -> str:
|
|
5
8
|
"""Select a usable device with graceful fallback."""
|
|
@@ -13,4 +16,26 @@ def resolve_device() -> str:
|
|
|
13
16
|
major, minor = 0, 0
|
|
14
17
|
if major >= 14:
|
|
15
18
|
return "mps"
|
|
16
|
-
return "cpu"
|
|
19
|
+
return "cpu"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def merge_features(primary, secondary) -> list:
|
|
23
|
+
"""
|
|
24
|
+
Merge two feature lists while preserving order and deduplicating by feature name.
|
|
25
|
+
Later duplicates are skipped.
|
|
26
|
+
"""
|
|
27
|
+
merged: OrderedDict[str, object] = OrderedDict()
|
|
28
|
+
for feat in list(primary or []) + list(secondary or []):
|
|
29
|
+
merged.setdefault(feat.name, feat)
|
|
30
|
+
return list(merged.values())
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_mlp_output_dim(params: dict, fallback: int) -> int:
|
|
34
|
+
"""
|
|
35
|
+
Get the output dimension of an MLP-like config.
|
|
36
|
+
If dims are provided, use the last dim; otherwise fall back to input dim.
|
|
37
|
+
"""
|
|
38
|
+
dims = params.get("dims")
|
|
39
|
+
if dims:
|
|
40
|
+
return dims[-1]
|
|
41
|
+
return fallback
|