nextrec 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. nextrec/__init__.py +1 -1
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -5
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/features.py +30 -22
  6. nextrec/basic/layers.py +220 -106
  7. nextrec/basic/loggers.py +62 -43
  8. nextrec/basic/metrics.py +268 -119
  9. nextrec/basic/model.py +1082 -400
  10. nextrec/basic/session.py +10 -3
  11. nextrec/cli.py +498 -0
  12. nextrec/data/__init__.py +19 -25
  13. nextrec/data/batch_utils.py +11 -3
  14. nextrec/data/data_processing.py +51 -45
  15. nextrec/data/data_utils.py +26 -15
  16. nextrec/data/dataloader.py +272 -95
  17. nextrec/data/preprocessor.py +320 -199
  18. nextrec/loss/listwise.py +17 -9
  19. nextrec/loss/loss_utils.py +7 -8
  20. nextrec/loss/pairwise.py +2 -0
  21. nextrec/loss/pointwise.py +30 -12
  22. nextrec/models/generative/hstu.py +103 -38
  23. nextrec/models/match/dssm.py +82 -68
  24. nextrec/models/match/dssm_v2.py +72 -57
  25. nextrec/models/match/mind.py +175 -107
  26. nextrec/models/match/sdm.py +104 -87
  27. nextrec/models/match/youtube_dnn.py +73 -59
  28. nextrec/models/multi_task/esmm.py +53 -37
  29. nextrec/models/multi_task/mmoe.py +64 -45
  30. nextrec/models/multi_task/ple.py +101 -48
  31. nextrec/models/multi_task/poso.py +113 -36
  32. nextrec/models/multi_task/share_bottom.py +48 -35
  33. nextrec/models/ranking/afm.py +72 -37
  34. nextrec/models/ranking/autoint.py +72 -55
  35. nextrec/models/ranking/dcn.py +55 -35
  36. nextrec/models/ranking/dcn_v2.py +64 -23
  37. nextrec/models/ranking/deepfm.py +32 -22
  38. nextrec/models/ranking/dien.py +155 -99
  39. nextrec/models/ranking/din.py +85 -57
  40. nextrec/models/ranking/fibinet.py +52 -32
  41. nextrec/models/ranking/fm.py +29 -23
  42. nextrec/models/ranking/masknet.py +91 -29
  43. nextrec/models/ranking/pnn.py +31 -28
  44. nextrec/models/ranking/widedeep.py +34 -26
  45. nextrec/models/ranking/xdeepfm.py +60 -38
  46. nextrec/utils/__init__.py +59 -34
  47. nextrec/utils/config.py +490 -0
  48. nextrec/utils/device.py +30 -20
  49. nextrec/utils/distributed.py +36 -9
  50. nextrec/utils/embedding.py +1 -0
  51. nextrec/utils/feature.py +1 -0
  52. nextrec/utils/file.py +32 -11
  53. nextrec/utils/initializer.py +61 -16
  54. nextrec/utils/optimizer.py +25 -9
  55. nextrec/utils/synthetic_data.py +283 -165
  56. nextrec/utils/tensor.py +24 -13
  57. {nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/METADATA +4 -4
  58. nextrec-0.4.2.dist-info/RECORD +69 -0
  59. nextrec-0.4.2.dist-info/entry_points.txt +2 -0
  60. nextrec-0.4.1.dist-info/RECORD +0 -66
  61. {nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
  62. {nextrec-0.4.1.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Checkpoint: edit on 24/11/2025
4
4
  Author: Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
- self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
6
+ [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
+ self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
8
8
  on information and knowledge management. 2019: 1161-1170.
9
9
  (https://arxiv.org/abs/1810.11921)
10
10
 
@@ -70,29 +70,31 @@ class AutoInt(BaseModel):
70
70
  @property
71
71
  def default_task(self):
72
72
  return "binary"
73
-
74
- def __init__(self,
75
- dense_features: list[DenseFeature],
76
- sparse_features: list[SparseFeature],
77
- sequence_features: list[SequenceFeature],
78
- att_layer_num: int = 3,
79
- att_embedding_dim: int = 8,
80
- att_head_num: int = 2,
81
- att_dropout: float = 0.0,
82
- att_use_residual: bool = True,
83
- target: list[str] | None = None,
84
- task: str | list[str] | None = None,
85
- optimizer: str = "adam",
86
- optimizer_params: dict | None = None,
87
- loss: str | nn.Module | None = "bce",
88
- loss_params: dict | list[dict] | None = None,
89
- device: str = 'cpu',
90
- embedding_l1_reg=1e-6,
91
- dense_l1_reg=1e-5,
92
- embedding_l2_reg=1e-5,
93
- dense_l2_reg=1e-4,
94
- **kwargs):
95
-
73
+
74
+ def __init__(
75
+ self,
76
+ dense_features: list[DenseFeature],
77
+ sparse_features: list[SparseFeature],
78
+ sequence_features: list[SequenceFeature],
79
+ att_layer_num: int = 3,
80
+ att_embedding_dim: int = 8,
81
+ att_head_num: int = 2,
82
+ att_dropout: float = 0.0,
83
+ att_use_residual: bool = True,
84
+ target: list[str] | None = None,
85
+ task: str | list[str] | None = None,
86
+ optimizer: str = "adam",
87
+ optimizer_params: dict | None = None,
88
+ loss: str | nn.Module | None = "bce",
89
+ loss_params: dict | list[dict] | None = None,
90
+ device: str = "cpu",
91
+ embedding_l1_reg=1e-6,
92
+ dense_l1_reg=1e-5,
93
+ embedding_l2_reg=1e-5,
94
+ dense_l2_reg=1e-4,
95
+ **kwargs,
96
+ ):
97
+
96
98
  super(AutoInt, self).__init__(
97
99
  dense_features=dense_features,
98
100
  sparse_features=sparse_features,
@@ -104,7 +106,7 @@ class AutoInt(BaseModel):
104
106
  dense_l1_reg=dense_l1_reg,
105
107
  embedding_l2_reg=embedding_l2_reg,
106
108
  dense_l2_reg=dense_l2_reg,
107
- **kwargs
109
+ **kwargs,
108
110
  )
109
111
 
110
112
  if target is None:
@@ -113,52 +115,59 @@ class AutoInt(BaseModel):
113
115
  optimizer_params = {}
114
116
  if loss is None:
115
117
  loss = "bce"
116
-
118
+
117
119
  self.att_layer_num = att_layer_num
118
120
  self.att_embedding_dim = att_embedding_dim
119
-
121
+
120
122
  # Use sparse and sequence features for interaction
121
123
  # **INFO**: this is different from the original paper, we also include dense features
122
124
  # if you want to follow the paper strictly, set dense_features=[]
123
125
  # or modify the code accordingly
124
- self.interaction_features = dense_features + sparse_features + sequence_features
125
-
126
+ self.interaction_features = dense_features + sparse_features + sequence_features
127
+
126
128
  # All features for embedding
127
129
  self.all_features = dense_features + sparse_features + sequence_features
128
130
 
129
131
  # Embedding layer
130
132
  self.embedding = EmbeddingLayer(features=self.all_features)
131
-
133
+
132
134
  # Project embeddings to attention embedding dimension
133
135
  num_fields = len(self.interaction_features)
134
-
136
+
135
137
  # If embeddings have different dimensions, project them to att_embedding_dim
136
- self.need_projection = not all(f.embedding_dim == att_embedding_dim for f in self.interaction_features)
138
+ self.need_projection = not all(
139
+ f.embedding_dim == att_embedding_dim for f in self.interaction_features
140
+ )
137
141
  self.projection_layers = None
138
142
  if self.need_projection:
139
- self.projection_layers = nn.ModuleList([
140
- nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
141
- for f in self.interaction_features
142
- ])
143
-
143
+ self.projection_layers = nn.ModuleList(
144
+ [
145
+ nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
146
+ for f in self.interaction_features
147
+ ]
148
+ )
149
+
144
150
  # Multi-head self-attention layers
145
- self.attention_layers = nn.ModuleList([
146
- MultiHeadSelfAttention(
147
- embedding_dim=att_embedding_dim,
148
- num_heads=att_head_num,
149
- dropout=att_dropout,
150
- use_residual=att_use_residual
151
- ) for _ in range(att_layer_num)
152
- ])
153
-
151
+ self.attention_layers = nn.ModuleList(
152
+ [
153
+ MultiHeadSelfAttention(
154
+ embedding_dim=att_embedding_dim,
155
+ num_heads=att_head_num,
156
+ dropout=att_dropout,
157
+ use_residual=att_use_residual,
158
+ )
159
+ for _ in range(att_layer_num)
160
+ ]
161
+ )
162
+
154
163
  # Final prediction layer
155
164
  self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
156
165
  self.prediction_layer = PredictionLayer(task_type=self.default_task)
157
166
 
158
167
  # Register regularization weights
159
168
  self.register_regularization_weights(
160
- embedding_attr='embedding',
161
- include_modules=['projection_layers', 'attention_layers', 'fc']
169
+ embedding_attr="embedding",
170
+ include_modules=["projection_layers", "attention_layers", "fc"],
162
171
  )
163
172
 
164
173
  self.compile(
@@ -172,21 +181,29 @@ class AutoInt(BaseModel):
172
181
  # Get embeddings field-by-field so mixed dimensions can be projected safely
173
182
  field_embeddings = []
174
183
  if len(self.interaction_features) == 0:
175
- raise ValueError("AutoInt requires at least one sparse or sequence feature for interactions.")
184
+ raise ValueError(
185
+ "AutoInt requires at least one sparse or sequence feature for interactions."
186
+ )
176
187
  for idx, feature in enumerate(self.interaction_features):
177
188
  feature_emb = self.embedding(x=x, features=[feature], squeeze_dim=False)
178
189
  feature_emb = feature_emb.squeeze(1) # [B, embedding_dim]
179
190
  if self.need_projection and self.projection_layers is not None:
180
191
  feature_emb = self.projection_layers[idx](feature_emb)
181
- field_embeddings.append(feature_emb.unsqueeze(1)) # [B, 1, att_embedding_dim or original_dim]
192
+ field_embeddings.append(
193
+ feature_emb.unsqueeze(1)
194
+ ) # [B, 1, att_embedding_dim or original_dim]
182
195
  embeddings = torch.cat(field_embeddings, dim=1)
183
-
196
+
184
197
  # Apply multi-head self-attention layers
185
198
  attention_output = embeddings
186
199
  for att_layer in self.attention_layers:
187
- attention_output = att_layer(attention_output) # [B, num_fields, att_embedding_dim]
188
-
200
+ attention_output = att_layer(
201
+ attention_output
202
+ ) # [B, num_fields, att_embedding_dim]
203
+
189
204
  # Flatten and predict
190
- attention_output_flat = attention_output.flatten(start_dim=1) # [B, num_fields * att_embedding_dim]
205
+ attention_output_flat = attention_output.flatten(
206
+ start_dim=1
207
+ ) # [B, num_fields * att_embedding_dim]
191
208
  y = self.fc(attention_output_flat) # [B, 1]
192
209
  return self.prediction_layer(y)
@@ -15,21 +15,27 @@ from nextrec.basic.model import BaseModel
15
15
  from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
16
16
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
17
17
 
18
+
18
19
  class CrossNetwork(nn.Module):
19
20
  """Stacked Cross Layers from DCN (Wang et al., 2017)."""
20
21
 
21
22
  def __init__(self, input_dim, num_layers):
22
23
  super().__init__()
23
24
  self.num_layers = num_layers
24
- self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)])
25
- self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])
25
+ self.w = torch.nn.ModuleList(
26
+ [torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)]
27
+ )
28
+ self.b = torch.nn.ParameterList(
29
+ [torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
30
+ )
26
31
 
27
32
  def forward(self, x):
28
33
  x0 = x
29
34
  for i in range(self.num_layers):
30
35
  xw = self.w[i](x)
31
36
  x = x0 * xw + self.b[i] + x
32
- return x # [batch_size, input_dim]
37
+ return x # [batch_size, input_dim]
38
+
33
39
 
34
40
  class DCN(BaseModel):
35
41
  @property
@@ -40,25 +46,27 @@ class DCN(BaseModel):
40
46
  def default_task(self):
41
47
  return "binary"
42
48
 
43
- def __init__(self,
44
- dense_features: list[DenseFeature],
45
- sparse_features: list[SparseFeature],
46
- sequence_features: list[SequenceFeature],
47
- cross_num: int = 3,
48
- mlp_params: dict | None = None,
49
- target: list[str] = [],
50
- task: str | list[str] | None = None,
51
- optimizer: str = "adam",
52
- optimizer_params: dict = {},
53
- loss: str | nn.Module | None = "bce",
54
- loss_params: dict | list[dict] | None = None,
55
- device: str = 'cpu',
56
- embedding_l1_reg=1e-6,
57
- dense_l1_reg=1e-5,
58
- embedding_l2_reg=1e-5,
59
- dense_l2_reg=1e-4,
60
- **kwargs):
61
-
49
+ def __init__(
50
+ self,
51
+ dense_features: list[DenseFeature],
52
+ sparse_features: list[SparseFeature],
53
+ sequence_features: list[SequenceFeature],
54
+ cross_num: int = 3,
55
+ mlp_params: dict | None = None,
56
+ target: list[str] = [],
57
+ task: str | list[str] | None = None,
58
+ optimizer: str = "adam",
59
+ optimizer_params: dict = {},
60
+ loss: str | nn.Module | None = "bce",
61
+ loss_params: dict | list[dict] | None = None,
62
+ device: str = "cpu",
63
+ embedding_l1_reg=1e-6,
64
+ dense_l1_reg=1e-5,
65
+ embedding_l2_reg=1e-5,
66
+ dense_l2_reg=1e-4,
67
+ **kwargs,
68
+ ):
69
+
62
70
  super(DCN, self).__init__(
63
71
  dense_features=dense_features,
64
72
  sparse_features=sparse_features,
@@ -70,13 +78,13 @@ class DCN(BaseModel):
70
78
  dense_l1_reg=dense_l1_reg,
71
79
  embedding_l2_reg=embedding_l2_reg,
72
80
  dense_l2_reg=dense_l2_reg,
73
- **kwargs
81
+ **kwargs,
74
82
  )
75
83
 
76
84
  self.loss = loss
77
85
  if self.loss is None:
78
86
  self.loss = "bce"
79
-
87
+
80
88
  # All features
81
89
  self.all_features = dense_features + sparse_features + sequence_features
82
90
 
@@ -84,20 +92,30 @@ class DCN(BaseModel):
84
92
  self.embedding = EmbeddingLayer(features=self.all_features)
85
93
 
86
94
  # Calculate input dimension
87
- emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
88
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
95
+ emb_dim_total = sum(
96
+ [
97
+ f.embedding_dim
98
+ for f in self.all_features
99
+ if not isinstance(f, DenseFeature)
100
+ ]
101
+ )
102
+ dense_input_dim = sum(
103
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
104
+ )
89
105
  input_dim = emb_dim_total + dense_input_dim
90
-
106
+
91
107
  # Cross Network
92
108
  self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
93
-
109
+
94
110
  # Deep Network (optional)
95
111
  if mlp_params is not None:
96
112
  self.use_dnn = True
97
113
  self.mlp = MLP(input_dim=input_dim, **mlp_params)
98
114
  deep_dim = self.mlp.output_dim
99
115
  # Final layer combines cross and deep
100
- self.final_layer = nn.Linear(input_dim + deep_dim, 1) # + deep_dim for MLP output
116
+ self.final_layer = nn.Linear(
117
+ input_dim + deep_dim, 1
118
+ ) # + deep_dim for MLP output
101
119
  else:
102
120
  self.use_dnn = False
103
121
  # Final layer only uses cross network output
@@ -107,8 +125,8 @@ class DCN(BaseModel):
107
125
 
108
126
  # Register regularization weights
109
127
  self.register_regularization_weights(
110
- embedding_attr='embedding',
111
- include_modules=['cross_network', 'mlp', 'final_layer']
128
+ embedding_attr="embedding",
129
+ include_modules=["cross_network", "mlp", "final_layer"],
112
130
  )
113
131
 
114
132
  self.compile(
@@ -121,18 +139,20 @@ class DCN(BaseModel):
121
139
  def forward(self, x):
122
140
  # Get all embeddings and flatten
123
141
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
124
-
142
+
125
143
  # Cross Network
126
144
  cross_output = self.cross_network(input_flat) # [B, input_dim]
127
-
145
+
128
146
  if self.use_dnn:
129
147
  # Deep Network
130
148
  deep_output = self.mlp(input_flat) # [B, 1]
131
149
  # Concatenate cross and deep
132
- combined = torch.cat([cross_output, deep_output], dim=-1) # [B, input_dim + 1]
150
+ combined = torch.cat(
151
+ [cross_output, deep_output], dim=-1
152
+ ) # [B, input_dim + 1]
133
153
  else:
134
154
  combined = cross_output
135
-
155
+
136
156
  # Final prediction
137
157
  y = self.final_layer(combined)
138
158
  return self.prediction_layer(y)
@@ -5,25 +5,30 @@ Date: create on 09/11/2025
5
5
  import torch
6
6
  import torch.nn as nn
7
7
 
8
- from nextrec.basic.model import BaseModel
9
- from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
10
- from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
11
8
 
12
9
  class CrossNetV2(nn.Module):
13
10
  """Vector-wise cross network proposed in DCN V2 (Wang et al., 2021)."""
11
+
14
12
  def __init__(self, input_dim, num_layers):
15
13
  super().__init__()
16
14
  self.num_layers = num_layers
17
- self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, input_dim, bias=False) for _ in range(num_layers)])
18
- self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])
19
-
15
+ self.w = torch.nn.ModuleList(
16
+ [
17
+ torch.nn.Linear(input_dim, input_dim, bias=False)
18
+ for _ in range(num_layers)
19
+ ]
20
+ )
21
+ self.b = torch.nn.ParameterList(
22
+ [torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
23
+ )
20
24
 
21
25
  def forward(self, x):
22
26
  x0 = x
23
27
  for i in range(self.num_layers):
24
- x =x0*self.w[i](x) + self.b[i] + x
28
+ x = x0 * self.w[i](x) + self.b[i] + x
25
29
  return x
26
-
30
+
31
+
27
32
  class CrossNetMix(nn.Module):
28
33
  """Mixture of low-rank cross experts from DCN V2 (Wang et al., 2021)."""
29
34
 
@@ -33,18 +38,46 @@ class CrossNetMix(nn.Module):
33
38
  self.num_experts = num_experts
34
39
 
35
40
  # U: (input_dim, low_rank)
36
- self.u_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
37
- torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)])
41
+ self.u_list = torch.nn.ParameterList(
42
+ [
43
+ nn.Parameter(
44
+ nn.init.xavier_normal_(
45
+ torch.empty(num_experts, input_dim, low_rank)
46
+ )
47
+ )
48
+ for i in range(self.num_layers)
49
+ ]
50
+ )
38
51
  # V: (input_dim, low_rank)
39
- self.v_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
40
- torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)])
52
+ self.v_list = torch.nn.ParameterList(
53
+ [
54
+ nn.Parameter(
55
+ nn.init.xavier_normal_(
56
+ torch.empty(num_experts, input_dim, low_rank)
57
+ )
58
+ )
59
+ for i in range(self.num_layers)
60
+ ]
61
+ )
41
62
  # C: (low_rank, low_rank)
42
- self.c_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
43
- torch.empty(num_experts, low_rank, low_rank))) for i in range(self.num_layers)])
44
- self.gating = nn.ModuleList([nn.Linear(input_dim, 1, bias=False) for i in range(self.num_experts)])
45
-
46
- self.bias = torch.nn.ParameterList([nn.Parameter(nn.init.zeros_(
47
- torch.empty(input_dim, 1))) for i in range(self.num_layers)])
63
+ self.c_list = torch.nn.ParameterList(
64
+ [
65
+ nn.Parameter(
66
+ nn.init.xavier_normal_(torch.empty(num_experts, low_rank, low_rank))
67
+ )
68
+ for i in range(self.num_layers)
69
+ ]
70
+ )
71
+ self.gating = nn.ModuleList(
72
+ [nn.Linear(input_dim, 1, bias=False) for i in range(self.num_experts)]
73
+ )
74
+
75
+ self.bias = torch.nn.ParameterList(
76
+ [
77
+ nn.Parameter(nn.init.zeros_(torch.empty(input_dim, 1)))
78
+ for i in range(self.num_layers)
79
+ ]
80
+ )
48
81
 
49
82
  def forward(self, x):
50
83
  x_0 = x.unsqueeze(2) # (bs, in_features, 1)
@@ -59,7 +92,9 @@ class CrossNetMix(nn.Module):
59
92
 
60
93
  # (2) E(x_l)
61
94
  # project the input x_l to $\mathbb{R}^{r}$
62
- v_x = torch.matmul(self.v_list[i][expert_id].t(), x_l) # (bs, low_rank, 1)
95
+ v_x = torch.matmul(
96
+ self.v_list[i][expert_id].t(), x_l
97
+ ) # (bs, low_rank, 1)
63
98
 
64
99
  # nonlinear activation in low rank space
65
100
  v_x = torch.tanh(v_x)
@@ -67,7 +102,9 @@ class CrossNetMix(nn.Module):
67
102
  v_x = torch.tanh(v_x)
68
103
 
69
104
  # project back to $\mathbb{R}^{d}$
70
- uv_x = torch.matmul(self.u_list[i][expert_id], v_x) # (bs, in_features, 1)
105
+ uv_x = torch.matmul(
106
+ self.u_list[i][expert_id], v_x
107
+ ) # (bs, in_features, 1)
71
108
 
72
109
  dot_ = uv_x + self.bias[i]
73
110
  dot_ = x_0 * dot_ # Hadamard-product
@@ -75,10 +112,14 @@ class CrossNetMix(nn.Module):
75
112
  output_of_experts.append(dot_.squeeze(2))
76
113
 
77
114
  # (3) mixture of low-rank experts
78
- output_of_experts = torch.stack(output_of_experts, 2) # (bs, in_features, num_experts)
79
- gating_score_experts = torch.stack(gating_score_experts, 1) # (bs, num_experts, 1)
115
+ output_of_experts = torch.stack(
116
+ output_of_experts, 2
117
+ ) # (bs, in_features, num_experts)
118
+ gating_score_experts = torch.stack(
119
+ gating_score_experts, 1
120
+ ) # (bs, num_experts, 1)
80
121
  moe_out = torch.matmul(output_of_experts, gating_score_experts.softmax(1))
81
122
  x_l = moe_out + x_l # (bs, in_features, 1)
82
123
 
83
124
  x_l = x_l.squeeze() # (bs, in_features)
84
- return x_l
125
+ return x_l
@@ -43,13 +43,13 @@ embedding,无需手工构造交叉特征即可端到端训练,常用于 CTR/
43
43
  - CTR/CVR 任务的常用强基线
44
44
  """
45
45
 
46
- import torch
47
46
  import torch.nn as nn
48
47
 
49
48
  from nextrec.basic.model import BaseModel
50
49
  from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
51
50
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
52
51
 
52
+
53
53
  class DeepFM(BaseModel):
54
54
  @property
55
55
  def model_name(self):
@@ -59,23 +59,26 @@ class DeepFM(BaseModel):
59
59
  def default_task(self):
60
60
  return "binary"
61
61
 
62
- def __init__(self,
63
- dense_features: list[DenseFeature]|list = [],
64
- sparse_features: list[SparseFeature]|list = [],
65
- sequence_features: list[SequenceFeature]|list = [],
66
- mlp_params: dict = {},
67
- target: list[str]|str = [],
68
- task: str | list[str] | None = None,
69
- optimizer: str = "adam",
70
- optimizer_params: dict = {},
71
- loss: str | nn.Module | None = "bce",
72
- loss_params: dict | list[dict] | None = None,
73
- device: str = 'cpu',
74
- embedding_l1_reg=1e-6,
75
- dense_l1_reg=1e-5,
76
- embedding_l2_reg=1e-5,
77
- dense_l2_reg=1e-4, **kwargs):
78
-
62
+ def __init__(
63
+ self,
64
+ dense_features: list[DenseFeature] | list = [],
65
+ sparse_features: list[SparseFeature] | list = [],
66
+ sequence_features: list[SequenceFeature] | list = [],
67
+ mlp_params: dict = {},
68
+ target: list[str] | str = [],
69
+ task: str | list[str] | None = None,
70
+ optimizer: str = "adam",
71
+ optimizer_params: dict = {},
72
+ loss: str | nn.Module | None = "bce",
73
+ loss_params: dict | list[dict] | None = None,
74
+ device: str = "cpu",
75
+ embedding_l1_reg=1e-6,
76
+ dense_l1_reg=1e-5,
77
+ embedding_l2_reg=1e-5,
78
+ dense_l2_reg=1e-4,
79
+ **kwargs,
80
+ ):
81
+
79
82
  super(DeepFM, self).__init__(
80
83
  dense_features=dense_features,
81
84
  sparse_features=sparse_features,
@@ -87,13 +90,13 @@ class DeepFM(BaseModel):
87
90
  dense_l1_reg=dense_l1_reg,
88
91
  embedding_l2_reg=embedding_l2_reg,
89
92
  dense_l2_reg=dense_l2_reg,
90
- **kwargs
93
+ **kwargs,
91
94
  )
92
95
 
93
96
  self.loss = loss
94
97
  if self.loss is None:
95
98
  self.loss = "bce"
96
-
99
+
97
100
  self.fm_features = sparse_features + sequence_features
98
101
  self.deep_features = dense_features + sparse_features + sequence_features
99
102
  self.embedding = EmbeddingLayer(features=self.deep_features)
@@ -107,8 +110,15 @@ class DeepFM(BaseModel):
107
110
  self.prediction_layer = PredictionLayer(task_type=self.default_task)
108
111
 
109
112
  # Register regularization weights
110
- self.register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
111
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
113
+ self.register_regularization_weights(
114
+ embedding_attr="embedding", include_modules=["linear", "mlp"]
115
+ )
116
+ self.compile(
117
+ optimizer=optimizer,
118
+ optimizer_params=optimizer_params,
119
+ loss=loss,
120
+ loss_params=loss_params,
121
+ )
112
122
 
113
123
  def forward(self, x):
114
124
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)