nextrec 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. nextrec/__init__.py +1 -1
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -5
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/features.py +30 -22
  6. nextrec/basic/layers.py +250 -112
  7. nextrec/basic/loggers.py +63 -44
  8. nextrec/basic/metrics.py +270 -120
  9. nextrec/basic/model.py +1084 -402
  10. nextrec/basic/session.py +10 -3
  11. nextrec/cli.py +492 -0
  12. nextrec/data/__init__.py +19 -25
  13. nextrec/data/batch_utils.py +11 -3
  14. nextrec/data/data_processing.py +51 -45
  15. nextrec/data/data_utils.py +26 -15
  16. nextrec/data/dataloader.py +273 -96
  17. nextrec/data/preprocessor.py +320 -199
  18. nextrec/loss/listwise.py +17 -9
  19. nextrec/loss/loss_utils.py +7 -8
  20. nextrec/loss/pairwise.py +2 -0
  21. nextrec/loss/pointwise.py +30 -12
  22. nextrec/models/generative/hstu.py +103 -38
  23. nextrec/models/match/dssm.py +82 -68
  24. nextrec/models/match/dssm_v2.py +72 -57
  25. nextrec/models/match/mind.py +175 -107
  26. nextrec/models/match/sdm.py +104 -87
  27. nextrec/models/match/youtube_dnn.py +73 -59
  28. nextrec/models/multi_task/esmm.py +69 -46
  29. nextrec/models/multi_task/mmoe.py +91 -53
  30. nextrec/models/multi_task/ple.py +117 -58
  31. nextrec/models/multi_task/poso.py +163 -55
  32. nextrec/models/multi_task/share_bottom.py +63 -36
  33. nextrec/models/ranking/afm.py +80 -45
  34. nextrec/models/ranking/autoint.py +74 -57
  35. nextrec/models/ranking/dcn.py +110 -48
  36. nextrec/models/ranking/dcn_v2.py +265 -45
  37. nextrec/models/ranking/deepfm.py +39 -24
  38. nextrec/models/ranking/dien.py +335 -146
  39. nextrec/models/ranking/din.py +158 -92
  40. nextrec/models/ranking/fibinet.py +134 -52
  41. nextrec/models/ranking/fm.py +68 -26
  42. nextrec/models/ranking/masknet.py +95 -33
  43. nextrec/models/ranking/pnn.py +128 -58
  44. nextrec/models/ranking/widedeep.py +40 -28
  45. nextrec/models/ranking/xdeepfm.py +67 -40
  46. nextrec/utils/__init__.py +59 -34
  47. nextrec/utils/config.py +496 -0
  48. nextrec/utils/device.py +30 -20
  49. nextrec/utils/distributed.py +36 -9
  50. nextrec/utils/embedding.py +1 -0
  51. nextrec/utils/feature.py +1 -0
  52. nextrec/utils/file.py +33 -11
  53. nextrec/utils/initializer.py +61 -16
  54. nextrec/utils/model.py +22 -0
  55. nextrec/utils/optimizer.py +25 -9
  56. nextrec/utils/synthetic_data.py +283 -165
  57. nextrec/utils/tensor.py +24 -13
  58. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/METADATA +53 -24
  59. nextrec-0.4.3.dist-info/RECORD +69 -0
  60. nextrec-0.4.3.dist-info/entry_points.txt +2 -0
  61. nextrec-0.4.1.dist-info/RECORD +0 -66
  62. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/WHEEL +0 -0
  63. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 06/12/2025
4
- Author: Yang Zhou,zyaztec@gmail.com
3
+ Checkpoint: edit on 09/12/2025
4
+ Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
6
  [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of
7
7
  feature interactions via attention networks[C]//IJCAI. 2017: 3119-3125.
@@ -40,7 +40,7 @@ import torch
40
40
  import torch.nn as nn
41
41
 
42
42
  from nextrec.basic.model import BaseModel
43
- from nextrec.basic.layers import EmbeddingLayer, LR, PredictionLayer, InputMask
43
+ from nextrec.basic.layers import EmbeddingLayer, PredictionLayer, InputMask
44
44
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
45
45
 
46
46
 
@@ -52,25 +52,35 @@ class AFM(BaseModel):
52
52
  @property
53
53
  def default_task(self):
54
54
  return "binary"
55
-
56
- def __init__(self,
57
- dense_features: list[DenseFeature] | list = [],
58
- sparse_features: list[SparseFeature] | list = [],
59
- sequence_features: list[SequenceFeature] | list = [],
60
- attention_dim: int = 32,
61
- attention_dropout: float = 0.0,
62
- target: list[str] | list = [],
63
- task: str | list[str] | None = None,
64
- optimizer: str = "adam",
65
- optimizer_params: dict = {},
66
- loss: str | nn.Module | None = "bce",
67
- loss_params: dict | list[dict] | None = None,
68
- device: str = 'cpu',
69
- embedding_l1_reg=1e-6,
70
- dense_l1_reg=1e-5,
71
- embedding_l2_reg=1e-5,
72
- dense_l2_reg=1e-4, **kwargs):
73
-
55
+
56
+ def __init__(
57
+ self,
58
+ dense_features: list[DenseFeature] | None = None,
59
+ sparse_features: list[SparseFeature] | None = None,
60
+ sequence_features: list[SequenceFeature] | None = None,
61
+ attention_dim: int = 32,
62
+ attention_dropout: float = 0.0,
63
+ target: list[str] | str | None = None,
64
+ task: str | list[str] | None = None,
65
+ optimizer: str = "adam",
66
+ optimizer_params: dict | None = None,
67
+ loss: str | nn.Module | None = "bce",
68
+ loss_params: dict | list[dict] | None = None,
69
+ device: str = "cpu",
70
+ embedding_l1_reg=1e-6,
71
+ dense_l1_reg=1e-5,
72
+ embedding_l2_reg=1e-5,
73
+ dense_l2_reg=1e-4,
74
+ **kwargs,
75
+ ):
76
+
77
+ dense_features = dense_features or []
78
+ sparse_features = sparse_features or []
79
+ sequence_features = sequence_features or []
80
+ optimizer_params = optimizer_params or {}
81
+ if loss is None:
82
+ loss = "bce"
83
+
74
84
  super(AFM, self).__init__(
75
85
  dense_features=dense_features,
76
86
  sparse_features=sparse_features,
@@ -82,31 +92,32 @@ class AFM(BaseModel):
82
92
  dense_l1_reg=dense_l1_reg,
83
93
  embedding_l2_reg=embedding_l2_reg,
84
94
  dense_l2_reg=dense_l2_reg,
85
- **kwargs
95
+ **kwargs,
86
96
  )
87
97
 
88
- if target is None:
89
- target = []
90
- if optimizer_params is None:
91
- optimizer_params = {}
92
- if loss is None:
93
- loss = "bce"
94
-
95
98
  self.fm_features = sparse_features + sequence_features
96
99
  if len(self.fm_features) < 2:
97
- raise ValueError("AFM requires at least two sparse/sequence features to build pairwise interactions.")
100
+ raise ValueError(
101
+ "AFM requires at least two sparse/sequence features to build pairwise interactions."
102
+ )
98
103
 
99
104
  # make sure all embedding dimension are the same for FM features
100
105
  self.embedding_dim = self.fm_features[0].embedding_dim
101
106
  if any(f.embedding_dim != self.embedding_dim for f in self.fm_features):
102
- raise ValueError("All FM features must share the same embedding_dim for AFM.")
107
+ raise ValueError(
108
+ "All FM features must share the same embedding_dim for AFM."
109
+ )
103
110
 
104
- self.embedding = EmbeddingLayer(features=self.fm_features) # [Batch, Field, Dim ]
111
+ self.embedding = EmbeddingLayer(
112
+ features=self.fm_features
113
+ ) # [Batch, Field, Dim ]
105
114
 
106
115
  # First-order terms: dense linear + one hot embeddings
107
116
  self.dense_features = list(dense_features)
108
117
  dense_input_dim = sum([f.input_dim for f in self.dense_features])
109
- self.linear_dense = nn.Linear(dense_input_dim, 1, bias=True) if dense_input_dim > 0 else None
118
+ self.linear_dense = (
119
+ nn.Linear(dense_input_dim, 1, bias=True) if dense_input_dim > 0 else None
120
+ )
110
121
 
111
122
  # First-order term: sparse/sequence features one-hot
112
123
  # **INFO**: source paper does not contain sequence features in experiments,
@@ -114,9 +125,15 @@ class AFM(BaseModel):
114
125
  # remove sequence features from fm_features.
115
126
  self.first_order_embeddings = nn.ModuleDict()
116
127
  for feature in self.fm_features:
117
- if feature.embedding_name in self.first_order_embeddings: # shared embedding
128
+ if (
129
+ feature.embedding_name in self.first_order_embeddings
130
+ ): # shared embedding
118
131
  continue
119
- emb = nn.Embedding(num_embeddings=feature.vocab_size, embedding_dim=1, padding_idx=feature.padding_idx) # equal to one-hot encoding weight
132
+ emb = nn.Embedding(
133
+ num_embeddings=feature.vocab_size,
134
+ embedding_dim=1,
135
+ padding_idx=feature.padding_idx,
136
+ ) # equal to one-hot encoding weight
120
137
  # nn.init.zeros_(emb.weight)
121
138
  self.first_order_embeddings[feature.embedding_name] = emb
122
139
 
@@ -129,11 +146,18 @@ class AFM(BaseModel):
129
146
 
130
147
  # Register regularization weights
131
148
  self.register_regularization_weights(
132
- embedding_attr='embedding',
133
- include_modules=['linear_dense', 'attention_linear', 'attention_p', 'output_projection']
149
+ embedding_attr="embedding",
150
+ include_modules=[
151
+ "linear_dense",
152
+ "attention_linear",
153
+ "attention_p",
154
+ "output_projection",
155
+ ],
134
156
  )
135
157
  # add first-order embeddings to embedding regularization list
136
- self.embedding_params.extend(emb.weight for emb in self.first_order_embeddings.values())
158
+ self.embedding_params.extend(
159
+ emb.weight for emb in self.first_order_embeddings.values()
160
+ )
137
161
 
138
162
  self.compile(
139
163
  optimizer=optimizer,
@@ -143,13 +167,17 @@ class AFM(BaseModel):
143
167
  )
144
168
 
145
169
  def forward(self, x):
146
- field_emb = self.embedding(x=x, features=self.fm_features, squeeze_dim=False) # [B, F, D]
170
+ field_emb = self.embedding(
171
+ x=x, features=self.fm_features, squeeze_dim=False
172
+ ) # [B, F, D]
147
173
  batch_size = field_emb.size(0)
148
174
  y_linear = torch.zeros(batch_size, 1, device=field_emb.device)
149
175
 
150
176
  # First-order dense part
151
177
  if self.linear_dense is not None:
152
- dense_inputs = [x[f.name].float().view(batch_size, -1) for f in self.dense_features]
178
+ dense_inputs = [
179
+ x[f.name].float().view(batch_size, -1) for f in self.dense_features
180
+ ]
153
181
  dense_stack = torch.cat(dense_inputs, dim=1) if dense_inputs else None
154
182
  if dense_stack is not None:
155
183
  y_linear = y_linear + self.linear_dense(dense_stack)
@@ -161,7 +189,7 @@ class AFM(BaseModel):
161
189
  if isinstance(feature, SparseFeature):
162
190
  term = emb(x[feature.name].long()) # [B, 1]
163
191
  else: # SequenceFeature
164
- seq_input = x[feature.name].long() # [B, 1]
192
+ seq_input = x[feature.name].long() # [B, 1]
165
193
  if feature.max_len is not None and seq_input.size(1) > feature.max_len:
166
194
  seq_input = seq_input[:, -feature.max_len :]
167
195
  mask = self.input_mask(x, feature, seq_input).squeeze(1) # [B, 1]
@@ -169,7 +197,9 @@ class AFM(BaseModel):
169
197
  term = (seq_weight * mask).sum(dim=1, keepdim=True) # [B, 1]
170
198
  first_order_terms.append(term)
171
199
  if first_order_terms:
172
- y_linear = y_linear + torch.sum(torch.cat(first_order_terms, dim=1), dim=1, keepdim=True)
200
+ y_linear = y_linear + torch.sum(
201
+ torch.cat(first_order_terms, dim=1), dim=1, keepdim=True
202
+ )
173
203
 
174
204
  interactions = []
175
205
  feature_values = []
@@ -182,13 +212,18 @@ class AFM(BaseModel):
182
212
  else:
183
213
  if isinstance(feature, SequenceFeature):
184
214
  seq_input = x[feature.name].long()
185
- if feature.max_len is not None and seq_input.size(1) > feature.max_len:
215
+ if (
216
+ feature.max_len is not None
217
+ and seq_input.size(1) > feature.max_len
218
+ ):
186
219
  seq_input = seq_input[:, -feature.max_len :]
187
220
  value = self.input_mask(x, feature, seq_input).sum(dim=2) # [B, 1]
188
221
  else:
189
222
  value = torch.ones(batch_size, 1, device=field_emb.device)
190
223
  feature_values.append(value)
191
- feature_values_tensor = torch.cat(feature_values, dim=1).unsqueeze(-1) # [B, F, 1]
224
+ feature_values_tensor = torch.cat(feature_values, dim=1).unsqueeze(
225
+ -1
226
+ ) # [B, F, 1]
192
227
  field_emb = field_emb * feature_values_tensor
193
228
 
194
229
  num_fields = field_emb.shape[1]
@@ -1,10 +1,10 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Checkpoint: edit on 24/11/2025
4
- Author: Yang Zhou,zyaztec@gmail.com
3
+ Checkpoint: edit on 09/12/2025
4
+ Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
- self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
6
+ [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
+ self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
8
8
  on information and knowledge management. 2019: 1161-1170.
9
9
  (https://arxiv.org/abs/1810.11921)
10
10
 
@@ -70,29 +70,31 @@ class AutoInt(BaseModel):
70
70
  @property
71
71
  def default_task(self):
72
72
  return "binary"
73
-
74
- def __init__(self,
75
- dense_features: list[DenseFeature],
76
- sparse_features: list[SparseFeature],
77
- sequence_features: list[SequenceFeature],
78
- att_layer_num: int = 3,
79
- att_embedding_dim: int = 8,
80
- att_head_num: int = 2,
81
- att_dropout: float = 0.0,
82
- att_use_residual: bool = True,
83
- target: list[str] | None = None,
84
- task: str | list[str] | None = None,
85
- optimizer: str = "adam",
86
- optimizer_params: dict | None = None,
87
- loss: str | nn.Module | None = "bce",
88
- loss_params: dict | list[dict] | None = None,
89
- device: str = 'cpu',
90
- embedding_l1_reg=1e-6,
91
- dense_l1_reg=1e-5,
92
- embedding_l2_reg=1e-5,
93
- dense_l2_reg=1e-4,
94
- **kwargs):
95
-
73
+
74
+ def __init__(
75
+ self,
76
+ dense_features: list[DenseFeature],
77
+ sparse_features: list[SparseFeature],
78
+ sequence_features: list[SequenceFeature],
79
+ att_layer_num: int = 3,
80
+ att_embedding_dim: int = 8,
81
+ att_head_num: int = 2,
82
+ att_dropout: float = 0.0,
83
+ att_use_residual: bool = True,
84
+ target: list[str] | None = None,
85
+ task: str | list[str] | None = None,
86
+ optimizer: str = "adam",
87
+ optimizer_params: dict | None = None,
88
+ loss: str | nn.Module | None = "bce",
89
+ loss_params: dict | list[dict] | None = None,
90
+ device: str = "cpu",
91
+ embedding_l1_reg=1e-6,
92
+ dense_l1_reg=1e-5,
93
+ embedding_l2_reg=1e-5,
94
+ dense_l2_reg=1e-4,
95
+ **kwargs,
96
+ ):
97
+
96
98
  super(AutoInt, self).__init__(
97
99
  dense_features=dense_features,
98
100
  sparse_features=sparse_features,
@@ -104,7 +106,7 @@ class AutoInt(BaseModel):
104
106
  dense_l1_reg=dense_l1_reg,
105
107
  embedding_l2_reg=embedding_l2_reg,
106
108
  dense_l2_reg=dense_l2_reg,
107
- **kwargs
109
+ **kwargs,
108
110
  )
109
111
 
110
112
  if target is None:
@@ -113,52 +115,59 @@ class AutoInt(BaseModel):
113
115
  optimizer_params = {}
114
116
  if loss is None:
115
117
  loss = "bce"
116
-
118
+
117
119
  self.att_layer_num = att_layer_num
118
120
  self.att_embedding_dim = att_embedding_dim
119
-
121
+
120
122
  # Use sparse and sequence features for interaction
121
123
  # **INFO**: this is different from the original paper, we also include dense features
122
124
  # if you want to follow the paper strictly, set dense_features=[]
123
125
  # or modify the code accordingly
124
- self.interaction_features = dense_features + sparse_features + sequence_features
125
-
126
+ self.interaction_features = dense_features + sparse_features + sequence_features
127
+
126
128
  # All features for embedding
127
129
  self.all_features = dense_features + sparse_features + sequence_features
128
130
 
129
131
  # Embedding layer
130
132
  self.embedding = EmbeddingLayer(features=self.all_features)
131
-
133
+
132
134
  # Project embeddings to attention embedding dimension
133
135
  num_fields = len(self.interaction_features)
134
-
136
+
135
137
  # If embeddings have different dimensions, project them to att_embedding_dim
136
- self.need_projection = not all(f.embedding_dim == att_embedding_dim for f in self.interaction_features)
138
+ self.need_projection = not all(
139
+ f.embedding_dim == att_embedding_dim for f in self.interaction_features
140
+ )
137
141
  self.projection_layers = None
138
142
  if self.need_projection:
139
- self.projection_layers = nn.ModuleList([
140
- nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
141
- for f in self.interaction_features
142
- ])
143
-
143
+ self.projection_layers = nn.ModuleList(
144
+ [
145
+ nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
146
+ for f in self.interaction_features
147
+ ]
148
+ )
149
+
144
150
  # Multi-head self-attention layers
145
- self.attention_layers = nn.ModuleList([
146
- MultiHeadSelfAttention(
147
- embedding_dim=att_embedding_dim,
148
- num_heads=att_head_num,
149
- dropout=att_dropout,
150
- use_residual=att_use_residual
151
- ) for _ in range(att_layer_num)
152
- ])
153
-
151
+ self.attention_layers = nn.ModuleList(
152
+ [
153
+ MultiHeadSelfAttention(
154
+ embedding_dim=att_embedding_dim,
155
+ num_heads=att_head_num,
156
+ dropout=att_dropout,
157
+ use_residual=att_use_residual,
158
+ )
159
+ for _ in range(att_layer_num)
160
+ ]
161
+ )
162
+
154
163
  # Final prediction layer
155
164
  self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
156
165
  self.prediction_layer = PredictionLayer(task_type=self.default_task)
157
166
 
158
167
  # Register regularization weights
159
168
  self.register_regularization_weights(
160
- embedding_attr='embedding',
161
- include_modules=['projection_layers', 'attention_layers', 'fc']
169
+ embedding_attr="embedding",
170
+ include_modules=["projection_layers", "attention_layers", "fc"],
162
171
  )
163
172
 
164
173
  self.compile(
@@ -172,21 +181,29 @@ class AutoInt(BaseModel):
172
181
  # Get embeddings field-by-field so mixed dimensions can be projected safely
173
182
  field_embeddings = []
174
183
  if len(self.interaction_features) == 0:
175
- raise ValueError("AutoInt requires at least one sparse or sequence feature for interactions.")
184
+ raise ValueError(
185
+ "AutoInt requires at least one sparse or sequence feature for interactions."
186
+ )
176
187
  for idx, feature in enumerate(self.interaction_features):
177
188
  feature_emb = self.embedding(x=x, features=[feature], squeeze_dim=False)
178
189
  feature_emb = feature_emb.squeeze(1) # [B, embedding_dim]
179
190
  if self.need_projection and self.projection_layers is not None:
180
191
  feature_emb = self.projection_layers[idx](feature_emb)
181
- field_embeddings.append(feature_emb.unsqueeze(1)) # [B, 1, att_embedding_dim or original_dim]
192
+ field_embeddings.append(
193
+ feature_emb.unsqueeze(1)
194
+ ) # [B, 1, att_embedding_dim or original_dim]
182
195
  embeddings = torch.cat(field_embeddings, dim=1)
183
-
196
+
184
197
  # Apply multi-head self-attention layers
185
198
  attention_output = embeddings
186
199
  for att_layer in self.attention_layers:
187
- attention_output = att_layer(attention_output) # [B, num_fields, att_embedding_dim]
188
-
200
+ attention_output = att_layer(
201
+ attention_output
202
+ ) # [B, num_fields, att_embedding_dim]
203
+
189
204
  # Flatten and predict
190
- attention_output_flat = attention_output.flatten(start_dim=1) # [B, num_fields * att_embedding_dim]
205
+ attention_output_flat = attention_output.flatten(
206
+ start_dim=1
207
+ ) # [B, num_fields * att_embedding_dim]
191
208
  y = self.fc(attention_output_flat) # [B, 1]
192
209
  return self.prediction_layer(y)
@@ -1,11 +1,53 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
- Author:
4
- Yang Zhou,zyaztec@gmail.com
3
+ Checkpoint: edit on 09/12/2025
4
+ Author: Yang Zhou, zyaztec@gmail.com
5
5
  Reference:
6
- [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]
7
- //Proceedings of the ADKDD'17. 2017: 1-7.
8
- (https://arxiv.org/abs/1708.05123)
6
+ [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]
7
+ //Proceedings of the ADKDD'17. 2017: 1-7.
8
+ (https://arxiv.org/abs/1708.05123)
9
+
10
+ Deep & Cross Network (DCN) mixes explicit polynomial feature crosses with a deep
11
+ MLP branch to capture both low-order and high-order interactions for CTR-style
12
+ tasks. Cross Layers repeatedly apply x_{l+1} = x0 * (w_l^T x_l) + b_l + x_l,
13
+ which expands feature crosses with linear parameter growth, while the deep branch
14
+ learns nonlinear patterns on the same shared embeddings. The final prediction
15
+ concatenates (or solely uses) cross outputs before a linear head, offering a
16
+ balanced trade-off between interpretability and expressiveness.
17
+
18
+ Workflow:
19
+ (1) Embed sparse/sequence features and concatenate with dense inputs
20
+ (2) Cross Network builds explicit polynomial interactions via residual crosses
21
+ (3) Optional MLP models implicit high-order nonlinear relationships
22
+ (4) Cross output (and deep output if enabled) are fused for the final logit
23
+ (5) Prediction layer maps logits to binary CTR scores
24
+
25
+ Key Advantages:
26
+ - Explicit, low-cost cross features with O(L * d) parameters
27
+ - Residual cross formulation stabilizes optimization
28
+ - Optional deep tower increases capacity without losing interpretability
29
+ - Shared embeddings reduce redundant parameters and preprocessing
30
+ - Strong, simple baseline for ad/recommendation ranking tasks
31
+
32
+ DCN(Deep & Cross Network)通过 Cross 层显式生成多项式特征交互,同时可选 Deep
33
+ 分支学习高阶非线性关系,两者共享 embedding。Cross 层按
34
+ x_{l+1} = x0 * (w_l^T x_l) + b_l + x_l 递推,参数线性增长且具解释性;
35
+ Deep 分支提升表达能力;最终将 Cross(及 Deep)结果送入线性层与预测层,形成兼具
36
+ 效率与效果的 CTR/CVR 预估模型。
37
+
38
+ 流程:
39
+ (1) 对稀疏/序列特征做 embedding,并与稠密特征拼接
40
+ (2) Cross 层以残差形式显式构造多阶交叉特征
41
+ (3) 可选 MLP 学习隐式高阶非线性交互
42
+ (4) 将 Cross(及 Deep)输出融合后接线性头得到 logit
43
+ (5) 预测层输出二分类 CTR 分数
44
+
45
+ 主要优点:
46
+ - 显式交叉特征、参数线性增长、易解释
47
+ - 残差式 Cross 提升训练稳定性
48
+ - Deep 分支可灵活增强模型容量
49
+ - 共享 embedding,减少冗余参数与预处理
50
+ - CTR/CVR 排序任务的简洁强基线
9
51
  """
10
52
 
11
53
  import torch
@@ -15,21 +57,27 @@ from nextrec.basic.model import BaseModel
15
57
  from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
16
58
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
17
59
 
60
+
18
61
  class CrossNetwork(nn.Module):
19
62
  """Stacked Cross Layers from DCN (Wang et al., 2017)."""
20
63
 
21
64
  def __init__(self, input_dim, num_layers):
22
65
  super().__init__()
23
66
  self.num_layers = num_layers
24
- self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)])
25
- self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])
67
+ self.w = torch.nn.ModuleList(
68
+ [torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)]
69
+ )
70
+ self.b = torch.nn.ParameterList(
71
+ [torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
72
+ )
26
73
 
27
74
  def forward(self, x):
28
75
  x0 = x
29
76
  for i in range(self.num_layers):
30
77
  xw = self.w[i](x)
31
78
  x = x0 * xw + self.b[i] + x
32
- return x # [batch_size, input_dim]
79
+ return x # [batch_size, input_dim]
80
+
33
81
 
34
82
  class DCN(BaseModel):
35
83
  @property
@@ -40,25 +88,34 @@ class DCN(BaseModel):
40
88
  def default_task(self):
41
89
  return "binary"
42
90
 
43
- def __init__(self,
44
- dense_features: list[DenseFeature],
45
- sparse_features: list[SparseFeature],
46
- sequence_features: list[SequenceFeature],
47
- cross_num: int = 3,
48
- mlp_params: dict | None = None,
49
- target: list[str] = [],
50
- task: str | list[str] | None = None,
51
- optimizer: str = "adam",
52
- optimizer_params: dict = {},
53
- loss: str | nn.Module | None = "bce",
54
- loss_params: dict | list[dict] | None = None,
55
- device: str = 'cpu',
56
- embedding_l1_reg=1e-6,
57
- dense_l1_reg=1e-5,
58
- embedding_l2_reg=1e-5,
59
- dense_l2_reg=1e-4,
60
- **kwargs):
61
-
91
+ def __init__(
92
+ self,
93
+ dense_features: list[DenseFeature] | None = None,
94
+ sparse_features: list[SparseFeature] | None = None,
95
+ sequence_features: list[SequenceFeature] | None = None,
96
+ cross_num: int = 3,
97
+ mlp_params: dict | None = None,
98
+ target: list[str] | str | None = None,
99
+ task: str | list[str] | None = None,
100
+ optimizer: str = "adam",
101
+ optimizer_params: dict | None = None,
102
+ loss: str | nn.Module | None = "bce",
103
+ loss_params: dict | list[dict] | None = None,
104
+ device: str = "cpu",
105
+ embedding_l1_reg=1e-6,
106
+ dense_l1_reg=1e-5,
107
+ embedding_l2_reg=1e-5,
108
+ dense_l2_reg=1e-4,
109
+ **kwargs,
110
+ ):
111
+
112
+ dense_features = dense_features or []
113
+ sparse_features = sparse_features or []
114
+ sequence_features = sequence_features or []
115
+ optimizer_params = optimizer_params or {}
116
+ if loss is None:
117
+ loss = "bce"
118
+
62
119
  super(DCN, self).__init__(
63
120
  dense_features=dense_features,
64
121
  sparse_features=sparse_features,
@@ -70,34 +127,37 @@ class DCN(BaseModel):
70
127
  dense_l1_reg=dense_l1_reg,
71
128
  embedding_l2_reg=embedding_l2_reg,
72
129
  dense_l2_reg=dense_l2_reg,
73
- **kwargs
130
+ **kwargs,
74
131
  )
75
132
 
76
- self.loss = loss
77
- if self.loss is None:
78
- self.loss = "bce"
79
-
80
- # All features
81
- self.all_features = dense_features + sparse_features + sequence_features
82
-
83
133
  # Embedding layer
84
134
  self.embedding = EmbeddingLayer(features=self.all_features)
85
135
 
86
136
  # Calculate input dimension
87
- emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
88
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
137
+ emb_dim_total = sum(
138
+ [
139
+ f.embedding_dim
140
+ for f in self.all_features
141
+ if not isinstance(f, DenseFeature)
142
+ ]
143
+ )
144
+ dense_input_dim = sum(
145
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
146
+ )
89
147
  input_dim = emb_dim_total + dense_input_dim
90
-
91
- # Cross Network
148
+
149
+ # Cross Network for explicit feature crosses
92
150
  self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
93
-
94
- # Deep Network (optional)
151
+
152
+ # Deep Network for implicit high-order interactions
95
153
  if mlp_params is not None:
96
154
  self.use_dnn = True
97
155
  self.mlp = MLP(input_dim=input_dim, **mlp_params)
98
156
  deep_dim = self.mlp.output_dim
99
157
  # Final layer combines cross and deep
100
- self.final_layer = nn.Linear(input_dim + deep_dim, 1) # + deep_dim for MLP output
158
+ self.final_layer = nn.Linear(
159
+ input_dim + deep_dim, 1
160
+ ) # + deep_dim for MLP output
101
161
  else:
102
162
  self.use_dnn = False
103
163
  # Final layer only uses cross network output
@@ -107,8 +167,8 @@ class DCN(BaseModel):
107
167
 
108
168
  # Register regularization weights
109
169
  self.register_regularization_weights(
110
- embedding_attr='embedding',
111
- include_modules=['cross_network', 'mlp', 'final_layer']
170
+ embedding_attr="embedding",
171
+ include_modules=["cross_network", "mlp", "final_layer"],
112
172
  )
113
173
 
114
174
  self.compile(
@@ -121,18 +181,20 @@ class DCN(BaseModel):
121
181
  def forward(self, x):
122
182
  # Get all embeddings and flatten
123
183
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
124
-
184
+
125
185
  # Cross Network
126
186
  cross_output = self.cross_network(input_flat) # [B, input_dim]
127
-
187
+
128
188
  if self.use_dnn:
129
189
  # Deep Network
130
190
  deep_output = self.mlp(input_flat) # [B, 1]
131
191
  # Concatenate cross and deep
132
- combined = torch.cat([cross_output, deep_output], dim=-1) # [B, input_dim + 1]
192
+ combined = torch.cat(
193
+ [cross_output, deep_output], dim=-1
194
+ ) # [B, input_dim + 1]
133
195
  else:
134
196
  combined = cross_output
135
-
197
+
136
198
  # Final prediction
137
199
  y = self.final_layer(combined)
138
200
  return self.prediction_layer(y)