nextrec 0.2.6__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. nextrec/__version__.py +1 -1
  2. nextrec/basic/activation.py +4 -8
  3. nextrec/basic/callback.py +1 -1
  4. nextrec/basic/features.py +33 -25
  5. nextrec/basic/layers.py +164 -601
  6. nextrec/basic/loggers.py +3 -4
  7. nextrec/basic/metrics.py +39 -115
  8. nextrec/basic/model.py +248 -174
  9. nextrec/basic/session.py +1 -5
  10. nextrec/data/__init__.py +12 -0
  11. nextrec/data/data_utils.py +3 -27
  12. nextrec/data/dataloader.py +26 -34
  13. nextrec/data/preprocessor.py +2 -1
  14. nextrec/loss/listwise.py +6 -4
  15. nextrec/loss/loss_utils.py +10 -6
  16. nextrec/loss/pairwise.py +5 -3
  17. nextrec/loss/pointwise.py +7 -13
  18. nextrec/models/match/mind.py +110 -1
  19. nextrec/models/multi_task/esmm.py +46 -27
  20. nextrec/models/multi_task/mmoe.py +48 -30
  21. nextrec/models/multi_task/ple.py +156 -141
  22. nextrec/models/multi_task/poso.py +413 -0
  23. nextrec/models/multi_task/share_bottom.py +43 -26
  24. nextrec/models/ranking/__init__.py +2 -0
  25. nextrec/models/ranking/autoint.py +1 -1
  26. nextrec/models/ranking/dcn.py +20 -1
  27. nextrec/models/ranking/dcn_v2.py +84 -0
  28. nextrec/models/ranking/deepfm.py +44 -18
  29. nextrec/models/ranking/dien.py +130 -27
  30. nextrec/models/ranking/masknet.py +13 -67
  31. nextrec/models/ranking/widedeep.py +39 -18
  32. nextrec/models/ranking/xdeepfm.py +34 -1
  33. nextrec/utils/common.py +26 -1
  34. nextrec-0.3.1.dist-info/METADATA +306 -0
  35. nextrec-0.3.1.dist-info/RECORD +56 -0
  36. {nextrec-0.2.6.dist-info → nextrec-0.3.1.dist-info}/WHEEL +1 -1
  37. nextrec-0.2.6.dist-info/METADATA +0 -281
  38. nextrec-0.2.6.dist-info/RECORD +0 -54
  39. {nextrec-0.2.6.dist-info → nextrec-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,46 @@
1
1
  """
2
2
  Date: create on 27/10/2025
3
+ Checkpoint: edit on 24/11/2025
3
4
  Author:
4
5
  Yang Zhou,zyaztec@gmail.com
5
6
  Reference:
6
- [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
7
+ [1] Guo H, Tang R, Ye Y, et al. DeepFM: A factorization-machine based neural network
8
+ for CTR prediction[J]. arXiv preprint arXiv:1703.04247, 2017.
9
+ (https://arxiv.org/abs/1703.04247)
10
+
11
+ DeepFM combines a Factorization Machine (FM) for explicit second-order feature
12
+ interactions with a deep MLP for high-order nonlinear patterns. Both parts share
13
+ the same embeddings, avoiding manual feature engineering and delivering strong CTR
14
+ performance with end-to-end training.
15
+
16
+ Workflow:
17
+ (1) Shared embeddings encode sparse/sequence fields; dense features are concatenated
18
+ (2) Wide (LR) term models first-order signals
19
+ (3) FM term captures pairwise interactions via inner products
20
+ (4) Deep MLP learns higher-order interactions over concatenated embeddings
21
+ (5) Outputs from wide, FM, and deep parts are summed before the final prediction
22
+
23
+ Key Advantages:
24
+ - Joint explicit (FM) and implicit (MLP) interaction modeling
25
+ - Shared embeddings remove the need for manual cross features
26
+ - Simple to train end-to-end with minimal feature engineering
27
+ - Strong baseline for CTR/CVR style ranking tasks
28
+
29
+ DeepFM 将 FM 的显式二阶特征交互与 MLP 的高阶非线性交互结合,三部分共享
30
+ embedding,无需手工构造交叉特征即可端到端训练,常用于 CTR/CVR 预估。
31
+
32
+ 流程:
33
+ (1) 共享 embedding 处理稀疏/序列特征,稠密特征拼接
34
+ (2) Wide(LR)建模一阶信号
35
+ (3) FM 建模二阶交互
36
+ (4) MLP 学习高阶非线性交互
37
+ (5) Wide + FM + Deep 求和后进入预测
38
+
39
+ 主要优点:
40
+ - 显式与隐式交互联合建模
41
+ - 共享 embedding,减少人工交叉特征
42
+ - 端到端训练简单,易于落地
43
+ - CTR/CVR 任务的常用强基线
7
44
  """
8
45
 
9
46
  import torch
@@ -59,30 +96,19 @@ class DeepFM(BaseModel):
59
96
 
60
97
  self.fm_features = sparse_features + sequence_features
61
98
  self.deep_features = dense_features + sparse_features + sequence_features
62
-
63
99
  self.embedding = EmbeddingLayer(features=self.deep_features)
64
-
65
100
  fm_emb_dim_total = sum([f.embedding_dim for f in self.fm_features])
66
- deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
67
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
68
-
101
+ # deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
102
+ # dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
103
+ mlp_input_dim = self.embedding.input_dim
69
104
  self.linear = LR(fm_emb_dim_total)
70
105
  self.fm = FM(reduce_sum=True)
71
- self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
106
+ self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
72
107
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
73
108
 
74
109
  # Register regularization weights
75
- self._register_regularization_weights(
76
- embedding_attr='embedding',
77
- include_modules=['linear', 'mlp']
78
- )
79
-
80
- self.compile(
81
- optimizer=optimizer,
82
- optimizer_params=optimizer_params,
83
- loss=loss,
84
- loss_params=loss_params,
85
- )
110
+ self._register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
111
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
86
112
 
87
113
  def forward(self, x):
88
114
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
@@ -10,11 +10,135 @@ Reference:
10
10
 
11
11
  import torch
12
12
  import torch.nn as nn
13
+ import torch.nn.functional as F
13
14
 
14
15
  from nextrec.basic.model import BaseModel
15
- from nextrec.basic.layers import EmbeddingLayer, MLP, AttentionPoolingLayer, DynamicGRU, AUGRU, PredictionLayer
16
+ from nextrec.basic.layers import EmbeddingLayer, MLP, AttentionPoolingLayer, PredictionLayer
16
17
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
17
18
 
19
+ class AUGRU(nn.Module):
20
+ """Attention-aware GRU update gate used in DIEN (Zhou et al., 2019)."""
21
+ """
22
+ Attention-based GRU for DIEN
23
+ Uses attention scores to weight the update of hidden states
24
+ """
25
+
26
+ def __init__(self, input_size, hidden_size, bias=True):
27
+ super().__init__()
28
+ self.input_size = input_size
29
+ self.hidden_size = hidden_size
30
+
31
+ self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
32
+ self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
33
+ if bias:
34
+ self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
35
+ self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
36
+ else:
37
+ self.register_parameter('bias_ih', None)
38
+ self.register_parameter('bias_hh', None)
39
+
40
+ self.reset_parameters()
41
+
42
+ def reset_parameters(self):
43
+ std = 1.0 / (self.hidden_size) ** 0.5
44
+ for weight in self.parameters():
45
+ weight.data.uniform_(-std, std)
46
+
47
+ def forward(self, x, att_scores):
48
+ """
49
+ Args:
50
+ x: [batch_size, seq_len, input_size]
51
+ att_scores: [batch_size, seq_len, 1] - attention scores
52
+ Returns:
53
+ output: [batch_size, seq_len, hidden_size]
54
+ hidden: [batch_size, hidden_size] - final hidden state
55
+ """
56
+ batch_size, seq_len, _ = x.shape
57
+ h = torch.zeros(batch_size, self.hidden_size, device=x.device)
58
+ outputs = []
59
+ for t in range(seq_len):
60
+ x_t = x[:, t, :] # [batch_size, input_size]
61
+ att_t = att_scores[:, t, :] # [batch_size, 1]
62
+
63
+ gi = F.linear(x_t, self.weight_ih, self.bias_ih)
64
+ gh = F.linear(h, self.weight_hh, self.bias_hh)
65
+ i_r, i_i, i_n = gi.chunk(3, 1)
66
+ h_r, h_i, h_n = gh.chunk(3, 1)
67
+
68
+ resetgate = torch.sigmoid(i_r + h_r)
69
+ inputgate = torch.sigmoid(i_i + h_i)
70
+ newgate = torch.tanh(i_n + resetgate * h_n)
71
+ # Use attention score to control update
72
+ h = (1 - att_t) * h + att_t * newgate
73
+ outputs.append(h.unsqueeze(1))
74
+ output = torch.cat(outputs, dim=1)
75
+
76
+ return output, h
77
+
78
+
79
+ class DynamicGRU(nn.Module):
80
+ """Dynamic GRU unit with auxiliary loss path from DIEN (Zhou et al., 2019)."""
81
+ """
82
+ GRU with dynamic routing for DIEN
83
+ """
84
+
85
+ def __init__(self, input_size, hidden_size, bias=True):
86
+ super().__init__()
87
+ self.input_size = input_size
88
+ self.hidden_size = hidden_size
89
+
90
+ # GRU parameters
91
+ self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
92
+ self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
93
+ if bias:
94
+ self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
95
+ self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
96
+ else:
97
+ self.register_parameter('bias_ih', None)
98
+ self.register_parameter('bias_hh', None)
99
+
100
+ self.reset_parameters()
101
+
102
+ def reset_parameters(self):
103
+ std = 1.0 / (self.hidden_size) ** 0.5
104
+ for weight in self.parameters():
105
+ weight.data.uniform_(-std, std)
106
+
107
+ def forward(self, x, att_scores=None):
108
+ """
109
+ Args:
110
+ x: [batch_size, seq_len, input_size]
111
+ att_scores: [batch_size, seq_len] - attention scores for auxiliary loss
112
+ Returns:
113
+ output: [batch_size, seq_len, hidden_size]
114
+ hidden: [batch_size, hidden_size] - final hidden state
115
+ """
116
+ batch_size, seq_len, _ = x.shape
117
+
118
+ # Initialize hidden state
119
+ h = torch.zeros(batch_size, self.hidden_size, device=x.device)
120
+
121
+ outputs = []
122
+ for t in range(seq_len):
123
+ x_t = x[:, t, :] # [batch_size, input_size]
124
+
125
+ # GRU computation
126
+ gi = F.linear(x_t, self.weight_ih, self.bias_ih)
127
+ gh = F.linear(h, self.weight_hh, self.bias_hh)
128
+ i_r, i_i, i_n = gi.chunk(3, 1)
129
+ h_r, h_i, h_n = gh.chunk(3, 1)
130
+
131
+ resetgate = torch.sigmoid(i_r + h_r)
132
+ inputgate = torch.sigmoid(i_i + h_i)
133
+ newgate = torch.tanh(i_n + resetgate * h_n)
134
+ h = newgate + inputgate * (h - newgate)
135
+
136
+ outputs.append(h.unsqueeze(1))
137
+
138
+ output = torch.cat(outputs, dim=1) # [batch_size, seq_len, hidden_size]
139
+
140
+ return output, h
141
+
18
142
 
19
143
  class DIEN(BaseModel):
20
144
  @property
@@ -76,9 +200,6 @@ class DIEN(BaseModel):
76
200
 
77
201
  self.other_sparse_features = sparse_features[:-1] if self.candidate_feature else sparse_features
78
202
  self.dense_features_list = dense_features
79
-
80
- # All features for embedding
81
- self.all_features = dense_features + sparse_features + sequence_features
82
203
 
83
204
  # Embedding layer
84
205
  self.embedding = EmbeddingLayer(features=self.all_features)
@@ -103,10 +224,7 @@ class DIEN(BaseModel):
103
224
  )
104
225
 
105
226
  # Interest Evolution Layer (AUGRU)
106
- self.interest_evolution = AUGRU(
107
- input_size=gru_hidden_size,
108
- hidden_size=gru_hidden_size
109
- )
227
+ self.interest_evolution = AUGRU(input_size=gru_hidden_size, hidden_size=gru_hidden_size)
110
228
 
111
229
  # Calculate MLP input dimension
112
230
  mlp_input_dim = 0
@@ -115,38 +233,23 @@ class DIEN(BaseModel):
115
233
  mlp_input_dim += gru_hidden_size # final interest state
116
234
  mlp_input_dim += sum([f.embedding_dim for f in self.other_sparse_features])
117
235
  mlp_input_dim += sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
118
-
119
236
  # MLP for final prediction
120
237
  self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
121
238
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
122
-
123
239
  # Register regularization weights
124
- self._register_regularization_weights(
125
- embedding_attr='embedding',
126
- include_modules=['interest_extractor', 'interest_evolution', 'attention_layer', 'mlp', 'candidate_proj']
127
- )
128
-
129
- self.compile(
130
- optimizer=optimizer,
131
- optimizer_params=optimizer_params,
132
- loss=loss,
133
- loss_params=loss_params,
134
- )
240
+ self._register_regularization_weights(embedding_attr='embedding', include_modules=['interest_extractor', 'interest_evolution', 'attention_layer', 'mlp', 'candidate_proj'])
241
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
135
242
 
136
243
  def forward(self, x):
137
244
  # Get candidate item embedding
138
245
  if self.candidate_feature:
139
- candidate_emb = self.embedding.embed_dict[self.candidate_feature.embedding_name](
140
- x[self.candidate_feature.name].long()
141
- ) # [B, emb_dim]
246
+ candidate_emb = self.embedding.embed_dict[self.candidate_feature.embedding_name](x[self.candidate_feature.name].long()) # [B, emb_dim]
142
247
  else:
143
248
  raise ValueError("DIEN requires a candidate item feature")
144
249
 
145
250
  # Get behavior sequence embedding
146
251
  behavior_seq = x[self.behavior_feature.name].long() # [B, seq_len]
147
- behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](
148
- behavior_seq
149
- ) # [B, seq_len, emb_dim]
252
+ behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](behavior_seq) # [B, seq_len, emb_dim]
150
253
 
151
254
  # Create mask for padding
152
255
  if self.behavior_feature.padding_idx is not None:
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 24/11/2025
3
+ Checkpoint: edit on 29/11/2025
4
4
  Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Wang Z, She Q, Zhang J. MaskNet: Introducing Feature-Wise
@@ -144,7 +144,8 @@ class MaskNet(BaseModel):
144
144
 
145
145
  @property
146
146
  def task_type(self):
147
- return "binary_classification"
147
+ # Align with PredictionLayer supported task types
148
+ return "binary"
148
149
 
149
150
  def __init__(
150
151
  self,
@@ -198,26 +199,19 @@ class MaskNet(BaseModel):
198
199
  self.dense_features = dense_features
199
200
  self.sparse_features = sparse_features
200
201
  self.sequence_features = sequence_features
201
- self.mask_features = self.sparse_features + self.sequence_features + self.dense_features
202
+ self.mask_features = self.all_features # use all features for masking
202
203
  assert len(self.mask_features) > 0, "MaskNet requires at least one feature for masking."
203
-
204
204
  self.embedding = EmbeddingLayer(features=self.mask_features)
205
-
206
205
  self.num_fields = len(self.mask_features)
207
-
208
206
  self.embedding_dim = getattr(self.mask_features[0], "embedding_dim", None)
209
207
  assert self.embedding_dim is not None, "MaskNet requires mask_features to have 'embedding_dim' defined."
210
208
 
211
209
  for f in self.mask_features:
212
210
  edim = getattr(f, "embedding_dim", None)
213
211
  if edim is None or edim != self.embedding_dim:
214
- raise ValueError(
215
- f"MaskNet expects identical embedding_dim across all mask_features, "
216
- f"but got {edim} for feature {getattr(f, 'name', type(f))}."
217
- )
212
+ raise ValueError(f"MaskNet expects identical embedding_dim across all mask_features, but got {edim} for feature {getattr(f, 'name', type(f))}.")
218
213
 
219
214
  self.v_emb_dim = self.num_fields * self.embedding_dim
220
-
221
215
  self.model_type = model_type.lower()
222
216
  assert self.model_type in ("serial", "parallel"), "model_type must be either 'serial' or 'parallel'."
223
217
 
@@ -226,68 +220,25 @@ class MaskNet(BaseModel):
226
220
  self.block_dropout = nn.Dropout(block_dropout) if block_dropout > 0 else nn.Identity()
227
221
 
228
222
  if self.model_type == "serial":
229
- self.first_block = MaskBlockOnEmbedding(
230
- num_fields=self.num_fields,
231
- embedding_dim=self.embedding_dim,
232
- mask_hidden_dim=mask_hidden_dim,
233
- hidden_dim=block_hidden_dim,
234
- )
235
-
223
+ self.first_block = MaskBlockOnEmbedding(num_fields=self.num_fields, embedding_dim=self.embedding_dim, mask_hidden_dim=mask_hidden_dim, hidden_dim=block_hidden_dim,)
236
224
  self.hidden_blocks = nn.ModuleList(
237
- [
238
- MaskBlockOnHidden(
239
- num_fields=self.num_fields,
240
- embedding_dim=self.embedding_dim,
241
- mask_hidden_dim=mask_hidden_dim,
242
- hidden_dim=block_hidden_dim,
243
- )
244
- for _ in range(self.num_blocks - 1)
245
- ]
246
- )
247
-
225
+ [MaskBlockOnHidden(num_fields=self.num_fields, embedding_dim=self.embedding_dim, mask_hidden_dim=mask_hidden_dim, hidden_dim=block_hidden_dim) for _ in range(self.num_blocks - 1)])
248
226
  self.mask_blocks = nn.ModuleList([self.first_block, *self.hidden_blocks])
249
227
  self.output_layer = nn.Linear(block_hidden_dim, 1)
250
228
  self.final_mlp = None
251
229
 
252
230
  else: # parallel
253
- self.mask_blocks = nn.ModuleList(
254
- [
255
- MaskBlockOnEmbedding(
256
- num_fields=self.num_fields,
257
- embedding_dim=self.embedding_dim,
258
- mask_hidden_dim=mask_hidden_dim,
259
- hidden_dim=block_hidden_dim,
260
- )
261
- for _ in range(self.num_blocks)
262
- ]
263
- )
264
-
265
- self.final_mlp = MLP(
266
- input_dim=self.num_blocks * block_hidden_dim,
267
- **mlp_params,
268
- )
231
+ self.mask_blocks = nn.ModuleList([MaskBlockOnEmbedding(num_fields=self.num_fields, embedding_dim=self.embedding_dim, mask_hidden_dim=mask_hidden_dim, hidden_dim=block_hidden_dim) for _ in range(self.num_blocks)])
232
+ self.final_mlp = MLP(input_dim=self.num_blocks * block_hidden_dim, **mlp_params)
269
233
  self.output_layer = None
270
-
271
234
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
272
235
 
273
236
  if self.model_type == "serial":
274
- self._register_regularization_weights(
275
- embedding_attr="embedding",
276
- include_modules=["mask_blocks", "output_layer"],
277
- )
237
+ self._register_regularization_weights(embedding_attr="embedding", include_modules=["mask_blocks", "output_layer"],)
278
238
  # serial
279
239
  else:
280
- self._register_regularization_weights(
281
- embedding_attr="embedding",
282
- include_modules=["mask_blocks", "final_mlp"],
283
- )
284
-
285
- self.compile(
286
- optimizer=optimizer,
287
- optimizer_params=optimizer_params,
288
- loss=loss,
289
- loss_params=loss_params,
290
- )
240
+ self._register_regularization_weights(embedding_attr="embedding", include_modules=["mask_blocks", "final_mlp"])
241
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
291
242
 
292
243
  def forward(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
293
244
  field_emb = self.embedding(x=x, features=self.mask_features, squeeze_dim=False)
@@ -300,20 +251,15 @@ class MaskNet(BaseModel):
300
251
  h = block(field_emb, v_emb_flat) # [B, block_hidden_dim]
301
252
  h = self.block_dropout(h)
302
253
  block_outputs.append(h)
303
-
304
254
  concat_hidden = torch.cat(block_outputs, dim=-1)
305
255
  logit = self.final_mlp(concat_hidden) # [B, 1]
306
-
307
256
  # serial
308
257
  else:
309
258
  hidden = self.first_block(field_emb, v_emb_flat)
310
259
  hidden = self.block_dropout(hidden)
311
-
312
260
  for block in self.hidden_blocks:
313
261
  hidden = block(hidden, v_emb_flat)
314
262
  hidden = self.block_dropout(hidden)
315
-
316
263
  logit = self.output_layer(hidden) # [B, 1]
317
-
318
264
  y = self.prediction_layer(logit)
319
- return y
265
+ return y
@@ -1,11 +1,42 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
+ Checkpoint: edit on 24/11/2025
3
4
  Author:
4
5
  Yang Zhou,zyaztec@gmail.com
5
6
  Reference:
6
- [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]
7
- //Proceedings of the 1st workshop on deep learning for recommender systems. 2016: 7-10.
7
+ [1] Cheng H T, Koc L, Harmsen J, et al. Wide & Deep learning for recommender systems[C]
8
+ //Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. 2016: 7-10.
8
9
  (https://arxiv.org/abs/1606.07792)
10
+
11
+ Wide & Deep blends a linear wide component (memorization of cross features) with a
12
+ deep neural network (generalization) sharing the same feature space. The wide part
13
+ captures co-occurrence patterns and manual crosses, while the deep part learns dense
14
+ representations and nonlinear interactions, improving both accuracy and coverage.
15
+
16
+ Workflow:
17
+ (1) Wide: linear/logistic model over raw or embedded features
18
+ (2) Deep: embeddings plus dense features feed into an MLP
19
+ (3) Sum wide and deep logits, then apply the final prediction layer
20
+
21
+ Key Advantages:
22
+ - Balances memorization (wide) and generalization (deep)
23
+ - Compatible with manual crosses and automatically learned embeddings
24
+ - Simple architecture with strong baselines for CTR/ranking
25
+ - Shared feature space reduces duplication and engineering overhead
26
+
27
+ Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网络部分(泛化非线性交互),
28
+ 共享特征表示,既保留记忆能力又具备泛化能力,常用于 CTR/排序任务。
29
+
30
+ 流程:
31
+ (1) Wide:线性/逻辑回归建模原始或 embedding 后的特征
32
+ (2) Deep:embedding 与稠密特征输入 MLP
33
+ (3) 宽深输出求和后进入最终预测
34
+
35
+ 主要优点:
36
+ - 兼顾记忆与泛化
37
+ - 支持手工交叉与自动 embedding 联合
38
+ - 结构简单,基线性能稳定
39
+ - 共享特征空间,减少工程开销
9
40
  """
10
41
 
11
42
  import torch
@@ -63,7 +94,6 @@ class WideDeep(BaseModel):
63
94
 
64
95
  # Wide part: use all features for linear model
65
96
  self.wide_features = sparse_features + sequence_features
66
-
67
97
  # Deep part: use all features
68
98
  self.deep_features = dense_features + sparse_features + sequence_features
69
99
 
@@ -75,23 +105,14 @@ class WideDeep(BaseModel):
75
105
  self.linear = LR(wide_dim)
76
106
 
77
107
  # Deep part: MLP
78
- deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
79
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
80
- self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
108
+ input_dim = self.embedding.input_dim
109
+ # deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
110
+ # dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
111
+ self.mlp = MLP(input_dim=input_dim, **mlp_params)
81
112
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
82
-
83
113
  # Register regularization weights
84
- self._register_regularization_weights(
85
- embedding_attr='embedding',
86
- include_modules=['linear', 'mlp']
87
- )
88
-
89
- self.compile(
90
- optimizer=optimizer,
91
- optimizer_params=optimizer_params,
92
- loss=loss,
93
- loss_params=loss_params,
94
- )
114
+ self._register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
115
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
95
116
 
96
117
  def forward(self, x):
97
118
  # Deep part
@@ -11,12 +11,45 @@ Reference:
11
11
 
12
12
  import torch
13
13
  import torch.nn as nn
14
+ import torch.nn.functional as F
14
15
 
15
16
  from nextrec.basic.model import BaseModel
16
- from nextrec.basic.layers import LR, EmbeddingLayer, MLP, CIN, PredictionLayer
17
+ from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
17
18
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
18
19
 
20
+ class CIN(nn.Module):
21
+ """Compressed Interaction Network from xDeepFM (Lian et al., 2018)."""
19
22
 
23
+ def __init__(self, input_dim, cin_size, split_half=True):
24
+ super().__init__()
25
+ self.num_layers = len(cin_size)
26
+ self.split_half = split_half
27
+ self.conv_layers = torch.nn.ModuleList()
28
+ prev_dim, fc_input_dim = input_dim, 0
29
+ for i in range(self.num_layers):
30
+ cross_layer_size = cin_size[i]
31
+ self.conv_layers.append(torch.nn.Conv1d(input_dim * prev_dim, cross_layer_size, 1, stride=1, dilation=1, bias=True))
32
+ if self.split_half and i != self.num_layers - 1:
33
+ cross_layer_size //= 2
34
+ prev_dim = cross_layer_size
35
+ fc_input_dim += prev_dim
36
+ self.fc = torch.nn.Linear(fc_input_dim, 1)
37
+
38
+ def forward(self, x):
39
+ xs = list()
40
+ x0, h = x.unsqueeze(2), x
41
+ for i in range(self.num_layers):
42
+ x = x0 * h.unsqueeze(1)
43
+ batch_size, f0_dim, fin_dim, embed_dim = x.shape
44
+ x = x.view(batch_size, f0_dim * fin_dim, embed_dim)
45
+ x = F.relu(self.conv_layers[i](x))
46
+ if self.split_half and i != self.num_layers - 1:
47
+ x, h = torch.split(x, x.shape[1] // 2, dim=1)
48
+ else:
49
+ h = x
50
+ xs.append(x)
51
+ return self.fc(torch.sum(torch.cat(xs, dim=1), 2))
52
+
20
53
  class xDeepFM(BaseModel):
21
54
  @property
22
55
  def model_name(self):
nextrec/utils/common.py CHANGED
@@ -1,5 +1,8 @@
1
1
  import torch
2
2
  import platform
3
+ from collections import OrderedDict
4
+ from typing import Sequence, Union, TYPE_CHECKING
5
+
3
6
 
4
7
  def resolve_device() -> str:
5
8
  """Select a usable device with graceful fallback."""
@@ -13,4 +16,26 @@ def resolve_device() -> str:
13
16
  major, minor = 0, 0
14
17
  if major >= 14:
15
18
  return "mps"
16
- return "cpu"
19
+ return "cpu"
20
+
21
+
22
+ def merge_features(primary, secondary) -> list:
23
+ """
24
+ Merge two feature lists while preserving order and deduplicating by feature name.
25
+ Later duplicates are skipped.
26
+ """
27
+ merged: OrderedDict[str, object] = OrderedDict()
28
+ for feat in list(primary or []) + list(secondary or []):
29
+ merged.setdefault(feat.name, feat)
30
+ return list(merged.values())
31
+
32
+
33
+ def get_mlp_output_dim(params: dict, fallback: int) -> int:
34
+ """
35
+ Get the output dimension of an MLP-like config.
36
+ If dims are provided, use the last dim; otherwise fall back to input dim.
37
+ """
38
+ dims = params.get("dims")
39
+ if dims:
40
+ return dims[-1]
41
+ return fallback