nextrec 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. nextrec/__init__.py +1 -1
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -5
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/features.py +30 -22
  6. nextrec/basic/layers.py +250 -112
  7. nextrec/basic/loggers.py +63 -44
  8. nextrec/basic/metrics.py +270 -120
  9. nextrec/basic/model.py +1084 -402
  10. nextrec/basic/session.py +10 -3
  11. nextrec/cli.py +492 -0
  12. nextrec/data/__init__.py +19 -25
  13. nextrec/data/batch_utils.py +11 -3
  14. nextrec/data/data_processing.py +51 -45
  15. nextrec/data/data_utils.py +26 -15
  16. nextrec/data/dataloader.py +273 -96
  17. nextrec/data/preprocessor.py +320 -199
  18. nextrec/loss/listwise.py +17 -9
  19. nextrec/loss/loss_utils.py +7 -8
  20. nextrec/loss/pairwise.py +2 -0
  21. nextrec/loss/pointwise.py +30 -12
  22. nextrec/models/generative/hstu.py +103 -38
  23. nextrec/models/match/dssm.py +82 -68
  24. nextrec/models/match/dssm_v2.py +72 -57
  25. nextrec/models/match/mind.py +175 -107
  26. nextrec/models/match/sdm.py +104 -87
  27. nextrec/models/match/youtube_dnn.py +73 -59
  28. nextrec/models/multi_task/esmm.py +69 -46
  29. nextrec/models/multi_task/mmoe.py +91 -53
  30. nextrec/models/multi_task/ple.py +117 -58
  31. nextrec/models/multi_task/poso.py +163 -55
  32. nextrec/models/multi_task/share_bottom.py +63 -36
  33. nextrec/models/ranking/afm.py +80 -45
  34. nextrec/models/ranking/autoint.py +74 -57
  35. nextrec/models/ranking/dcn.py +110 -48
  36. nextrec/models/ranking/dcn_v2.py +265 -45
  37. nextrec/models/ranking/deepfm.py +39 -24
  38. nextrec/models/ranking/dien.py +335 -146
  39. nextrec/models/ranking/din.py +158 -92
  40. nextrec/models/ranking/fibinet.py +134 -52
  41. nextrec/models/ranking/fm.py +68 -26
  42. nextrec/models/ranking/masknet.py +95 -33
  43. nextrec/models/ranking/pnn.py +128 -58
  44. nextrec/models/ranking/widedeep.py +40 -28
  45. nextrec/models/ranking/xdeepfm.py +67 -40
  46. nextrec/utils/__init__.py +59 -34
  47. nextrec/utils/config.py +496 -0
  48. nextrec/utils/device.py +30 -20
  49. nextrec/utils/distributed.py +36 -9
  50. nextrec/utils/embedding.py +1 -0
  51. nextrec/utils/feature.py +1 -0
  52. nextrec/utils/file.py +33 -11
  53. nextrec/utils/initializer.py +61 -16
  54. nextrec/utils/model.py +22 -0
  55. nextrec/utils/optimizer.py +25 -9
  56. nextrec/utils/synthetic_data.py +283 -165
  57. nextrec/utils/tensor.py +24 -13
  58. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/METADATA +53 -24
  59. nextrec-0.4.3.dist-info/RECORD +69 -0
  60. nextrec-0.4.3.dist-info/entry_points.txt +2 -0
  61. nextrec-0.4.1.dist-info/RECORD +0 -66
  62. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/WHEEL +0 -0
  63. {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,46 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
+ Checkpoint: edit on 09/12/2025
4
+ Author: Yang Zhou, zyaztec@gmail.com
5
+ Reference:
6
+ [1] R. Wang et al. DCN V2: Improved Deep & Cross Network and Practical Lessons for
7
+ Web-scale Learning to Rank Systems. KDD 2021.
8
+ (https://arxiv.org/abs/2008.13535)
9
+
10
+ DCN v2 enhances the original Deep & Cross Network by replacing the scalar cross
11
+ weights with vector-wise (matrix) parameters and a Mixture-of-Low-Rank-Experts
12
+ variant. The matrix cross (CrossNetV2) improves expressiveness with manageable
13
+ parameter growth, while CrossNetMix decomposes the matrix into low-rank factors and
14
+ gates across experts for stronger modeling at a similar cost. As in DCN, the cross
15
+ tower explicitly builds polynomial feature interactions and can be paired with a
16
+ deep MLP tower; their outputs are concatenated before a final linear head.
17
+
18
+ Workflow:
19
+ (1) Embed sparse/sequence fields and concatenate with dense inputs
20
+ (2) Cross tower: choose matrix CrossNetV2 or low-rank CrossNetMix for explicit crosses
21
+ (3) Optional deep tower: MLP over the same flattened embeddings
22
+ (4) Fuse cross and deep outputs, then predict via a linear + prediction layer
23
+
24
+ Key Advantages:
25
+ - Vector-wise/matrix cross weights capture richer interactions than DCN v1
26
+ - Low-rank MoE cross (CrossNetMix) boosts capacity without quadratic parameters
27
+ - Compatible with a deep tower for additional nonlinear modeling
28
+
29
+ DCN v2 在原始 DCN 基础上,将标量交叉权重升级为向量/矩阵参数,并引入低秩专家混合
30
+ 的 CrossNetMix。矩阵交叉(CrossNetV2)在参数可控的前提下提升表达力,CrossNetMix
31
+ 通过低秩分解和 gating 进一步增强建模能力且保持参数效率。和 DCN 一样,交叉塔显式
32
+ 构造多项式交互,可并行或串联一个 MLP 深塔,最终拼接/输出到线性头做预测。
33
+
34
+ 流程:
35
+ (1) 对稀疏/序列特征做 embedding,并与稠密特征拼接
36
+ (2) 交叉塔:可选矩阵 CrossNetV2 或低秩混合 CrossNetMix 显式构造交互
37
+ (3) 可选深塔:MLP 处理同一展平后的输入或交叉输出
38
+ (4) 融合交叉与深塔输出,经线性层和预测层得到最终得分
39
+
40
+ 主要优点:
41
+ - 矩阵交叉相较 DCN v1 捕获更丰富的交互
42
+ - 低秩专家混合在相近参数量下带来更强建模能力
43
+ - 兼容并行/串行深塔,灵活扩展非线性表示
3
44
  """
4
45
 
5
46
  import torch
@@ -9,76 +50,255 @@ from nextrec.basic.model import BaseModel
9
50
  from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
10
51
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
11
52
 
53
+
12
54
  class CrossNetV2(nn.Module):
13
55
  """Vector-wise cross network proposed in DCN V2 (Wang et al., 2021)."""
14
- def __init__(self, input_dim, num_layers):
56
+
57
+ def __init__(self, input_dim: int, num_layers: int):
15
58
  super().__init__()
16
59
  self.num_layers = num_layers
17
- self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, input_dim, bias=False) for _ in range(num_layers)])
18
- self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])
19
-
60
+ self.w = torch.nn.ModuleList(
61
+ [
62
+ torch.nn.Linear(input_dim, input_dim, bias=False)
63
+ for _ in range(num_layers)
64
+ ]
65
+ )
66
+ self.b = torch.nn.ParameterList(
67
+ [torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
68
+ )
20
69
 
21
- def forward(self, x):
70
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
22
71
  x0 = x
23
72
  for i in range(self.num_layers):
24
- x =x0*self.w[i](x) + self.b[i] + x
73
+ x = x0 * self.w[i](x) + self.b[i] + x
25
74
  return x
26
-
75
+
76
+
27
77
  class CrossNetMix(nn.Module):
28
78
  """Mixture of low-rank cross experts from DCN V2 (Wang et al., 2021)."""
29
79
 
30
- def __init__(self, input_dim, num_layers=2, low_rank=32, num_experts=4):
31
- super(CrossNetMix, self).__init__()
80
+ def __init__(
81
+ self,
82
+ input_dim: int,
83
+ num_layers: int = 2,
84
+ low_rank: int = 32,
85
+ num_experts: int = 4,
86
+ ):
87
+ super().__init__()
32
88
  self.num_layers = num_layers
33
89
  self.num_experts = num_experts
34
90
 
35
- # U: (input_dim, low_rank)
36
- self.u_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
37
- torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)])
38
- # V: (input_dim, low_rank)
39
- self.v_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
40
- torch.empty(num_experts, input_dim, low_rank))) for i in range(self.num_layers)])
41
- # C: (low_rank, low_rank)
42
- self.c_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
43
- torch.empty(num_experts, low_rank, low_rank))) for i in range(self.num_layers)])
44
- self.gating = nn.ModuleList([nn.Linear(input_dim, 1, bias=False) for i in range(self.num_experts)])
45
-
46
- self.bias = torch.nn.ParameterList([nn.Parameter(nn.init.zeros_(
47
- torch.empty(input_dim, 1))) for i in range(self.num_layers)])
48
-
49
- def forward(self, x):
91
+ self.u_list = nn.ParameterList(
92
+ [
93
+ nn.Parameter(
94
+ nn.init.xavier_normal_(
95
+ torch.empty(num_experts, input_dim, low_rank)
96
+ )
97
+ )
98
+ for _ in range(num_layers)
99
+ ]
100
+ )
101
+ self.v_list = nn.ParameterList(
102
+ [
103
+ nn.Parameter(
104
+ nn.init.xavier_normal_(
105
+ torch.empty(num_experts, input_dim, low_rank)
106
+ )
107
+ )
108
+ for _ in range(num_layers)
109
+ ]
110
+ )
111
+ self.c_list = nn.ParameterList(
112
+ [
113
+ nn.Parameter(
114
+ nn.init.xavier_normal_(torch.empty(num_experts, low_rank, low_rank))
115
+ )
116
+ for _ in range(num_layers)
117
+ ]
118
+ )
119
+
120
+ self.gating = nn.ModuleList(
121
+ [nn.Linear(input_dim, 1, bias=False) for _ in range(num_experts)]
122
+ )
123
+
124
+ self.bias = nn.ParameterList(
125
+ [nn.Parameter(torch.zeros(input_dim, 1)) for _ in range(num_layers)]
126
+ )
127
+
128
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
129
+ # x: (bs, in_features)
50
130
  x_0 = x.unsqueeze(2) # (bs, in_features, 1)
51
131
  x_l = x_0
132
+
52
133
  for i in range(self.num_layers):
53
134
  output_of_experts = []
54
135
  gating_score_experts = []
136
+
137
+ gating_input = x_l.squeeze(2) # (bs, in_features)
138
+
55
139
  for expert_id in range(self.num_experts):
56
- # (1) G(x_l)
57
- # compute the gating score by x_l
58
- gating_score_experts.append(self.gating[expert_id](x_l.squeeze(2)))
140
+ # Gating
141
+ gating_score_experts.append(
142
+ self.gating[expert_id](gating_input)
143
+ ) # (bs, 1)
59
144
 
60
- # (2) E(x_l)
61
- # project the input x_l to $\mathbb{R}^{r}$
62
- v_x = torch.matmul(self.v_list[i][expert_id].t(), x_l) # (bs, low_rank, 1)
145
+ # Low-rank cross: U C V^T x_l
146
+ V = self.v_list[i][expert_id] # (in_features, low_rank)
147
+ C = self.c_list[i][expert_id] # (low_rank, low_rank)
148
+ U = self.u_list[i][expert_id] # (in_features, low_rank)
63
149
 
64
- # nonlinear activation in low rank space
65
- v_x = torch.tanh(v_x)
66
- v_x = torch.matmul(self.c_list[i][expert_id], v_x)
150
+ # (bs, 1, low_rank)
151
+ v_x = x_l.transpose(1, 2).matmul(V) # x_l^T V
152
+ v_x = v_x.matmul(C) # · C
67
153
  v_x = torch.tanh(v_x)
68
154
 
69
- # project back to $\mathbb{R}^{d}$
70
- uv_x = torch.matmul(self.u_list[i][expert_id], v_x) # (bs, in_features, 1)
155
+ # (bs, in_features, 1)
156
+ uv_x = U.matmul(v_x.transpose(1, 2))
157
+
158
+ # x_0 ⊙ (uv_x + b)
159
+ dot_ = x_0 * (uv_x + self.bias[i]) # (bs, in_features, 1)
160
+
161
+ output_of_experts.append(dot_.squeeze(2)) # (bs, in_features)
162
+
163
+ # (3) Mixture of experts
164
+ output_of_experts = torch.stack(
165
+ output_of_experts, dim=2
166
+ ) # (bs, in_features, num_experts)
167
+ gating_score_experts = torch.stack(
168
+ gating_score_experts, dim=1
169
+ ) # (bs, num_experts, 1)
170
+ gating_score_experts = gating_score_experts.softmax(dim=1)
171
+
172
+ moe_out = torch.matmul(
173
+ output_of_experts, gating_score_experts
174
+ ) # (bs, in_features, 1)
175
+ x_l = moe_out + x_l # residual
176
+
177
+ return x_l.squeeze(-1) # (bs, in_features)
178
+
179
+
180
+ class DCNv2(BaseModel):
181
+ @property
182
+ def model_name(self) -> str:
183
+ return "DCNv2"
184
+
185
+ @property
186
+ def default_task(self):
187
+ return "binary"
188
+
189
+ def __init__(
190
+ self,
191
+ dense_features: list[DenseFeature] | None = None,
192
+ sparse_features: list[SparseFeature] | None = None,
193
+ sequence_features: list[SequenceFeature] | None = None,
194
+ cross_num: int = 3,
195
+ cross_type: str = "matrix",
196
+ architecture: str = "parallel",
197
+ low_rank: int = 32,
198
+ num_experts: int = 4,
199
+ mlp_params: dict | None = None,
200
+ target: list[str] | str | None = None,
201
+ task: str | list[str] | None = None,
202
+ optimizer: str = "adam",
203
+ optimizer_params: dict | None = None,
204
+ loss: str | nn.Module | None = "bce",
205
+ loss_params: dict | list[dict] | None = None,
206
+ device: str = "cpu",
207
+ embedding_l1_reg=1e-6,
208
+ dense_l1_reg=1e-5,
209
+ embedding_l2_reg=1e-5,
210
+ dense_l2_reg=1e-4,
211
+ **kwargs,
212
+ ):
213
+ dense_features = dense_features or []
214
+ sparse_features = sparse_features or []
215
+ sequence_features = sequence_features or []
216
+ optimizer_params = optimizer_params or {}
217
+ if loss is None:
218
+ loss = "bce"
219
+
220
+ super(DCNv2, self).__init__(
221
+ dense_features=dense_features,
222
+ sparse_features=sparse_features,
223
+ sequence_features=sequence_features,
224
+ target=target,
225
+ task=task or self.default_task,
226
+ device=device,
227
+ embedding_l1_reg=embedding_l1_reg,
228
+ dense_l1_reg=dense_l1_reg,
229
+ embedding_l2_reg=embedding_l2_reg,
230
+ dense_l2_reg=dense_l2_reg,
231
+ **kwargs,
232
+ )
233
+
234
+ self.all_features = dense_features + sparse_features + sequence_features
235
+ self.embedding = EmbeddingLayer(features=self.all_features)
236
+ input_dim = self.embedding.input_dim
237
+
238
+ architecture = architecture.lower()
239
+ if architecture not in {"parallel", "stacked"}:
240
+ raise ValueError("architecture must be 'parallel' or 'stacked'.")
241
+ self.architecture = architecture
242
+
243
+ cross_type = cross_type.lower()
244
+ if cross_type == "matrix":
245
+ self.cross_network = CrossNetV2(input_dim=input_dim, num_layers=cross_num)
246
+ elif cross_type in {"mix", "low_rank"}:
247
+ self.cross_network = CrossNetMix(
248
+ input_dim=input_dim,
249
+ num_layers=cross_num,
250
+ low_rank=low_rank,
251
+ num_experts=num_experts,
252
+ )
253
+ else:
254
+ raise ValueError("Unsupported cross_type for DCNv2. Use 'matrix' or 'mix'.")
255
+
256
+ if mlp_params is not None:
257
+ self.use_dnn = True
258
+ dnn_params = dict(mlp_params)
259
+ dnn_params.setdefault("output_layer", False)
260
+ self.mlp = MLP(input_dim=input_dim, **dnn_params)
261
+ deep_dim = self.mlp.output_dim
262
+ final_input_dim = (
263
+ input_dim + deep_dim if architecture == "parallel" else deep_dim
264
+ )
265
+ else:
266
+ if architecture == "stacked":
267
+ raise ValueError(
268
+ "Stacked architecture requires mlp_params (deep tower)."
269
+ )
270
+ self.use_dnn = False
271
+ self.mlp = None
272
+ final_input_dim = input_dim
273
+
274
+ self.final_layer = nn.Linear(final_input_dim, 1)
275
+ self.prediction_layer = PredictionLayer(task_type=self.default_task)
276
+
277
+ self.register_regularization_weights(
278
+ embedding_attr="embedding",
279
+ include_modules=["cross_network", "mlp", "final_layer"],
280
+ )
71
281
 
72
- dot_ = uv_x + self.bias[i]
73
- dot_ = x_0 * dot_ # Hadamard-product
282
+ self.compile(
283
+ optimizer=optimizer,
284
+ optimizer_params=optimizer_params,
285
+ loss=loss,
286
+ loss_params=loss_params,
287
+ )
74
288
 
75
- output_of_experts.append(dot_.squeeze(2))
289
+ def forward(self, x) -> torch.Tensor:
290
+ input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
291
+ cross_out = self.cross_network(input_flat)
76
292
 
77
- # (3) mixture of low-rank experts
78
- output_of_experts = torch.stack(output_of_experts, 2) # (bs, in_features, num_experts)
79
- gating_score_experts = torch.stack(gating_score_experts, 1) # (bs, num_experts, 1)
80
- moe_out = torch.matmul(output_of_experts, gating_score_experts.softmax(1))
81
- x_l = moe_out + x_l # (bs, in_features, 1)
293
+ if self.use_dnn and self.mlp is not None:
294
+ if self.architecture == "parallel":
295
+ deep_out = self.mlp(input_flat)
296
+ combined = torch.cat([cross_out, deep_out], dim=-1)
297
+ else: # stacked
298
+ deep_out = self.mlp(cross_out)
299
+ combined = deep_out
300
+ else:
301
+ combined = cross_out
82
302
 
83
- x_l = x_l.squeeze() # (bs, in_features)
84
- return x_l
303
+ logit = self.final_layer(combined)
304
+ return self.prediction_layer(logit)
@@ -43,13 +43,13 @@ embedding,无需手工构造交叉特征即可端到端训练,常用于 CTR/
43
43
  - CTR/CVR 任务的常用强基线
44
44
  """
45
45
 
46
- import torch
47
46
  import torch.nn as nn
48
47
 
49
48
  from nextrec.basic.model import BaseModel
50
49
  from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
51
50
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
52
51
 
52
+
53
53
  class DeepFM(BaseModel):
54
54
  @property
55
55
  def model_name(self):
@@ -59,23 +59,34 @@ class DeepFM(BaseModel):
59
59
  def default_task(self):
60
60
  return "binary"
61
61
 
62
- def __init__(self,
63
- dense_features: list[DenseFeature]|list = [],
64
- sparse_features: list[SparseFeature]|list = [],
65
- sequence_features: list[SequenceFeature]|list = [],
66
- mlp_params: dict = {},
67
- target: list[str]|str = [],
68
- task: str | list[str] | None = None,
69
- optimizer: str = "adam",
70
- optimizer_params: dict = {},
71
- loss: str | nn.Module | None = "bce",
72
- loss_params: dict | list[dict] | None = None,
73
- device: str = 'cpu',
74
- embedding_l1_reg=1e-6,
75
- dense_l1_reg=1e-5,
76
- embedding_l2_reg=1e-5,
77
- dense_l2_reg=1e-4, **kwargs):
78
-
62
+ def __init__(
63
+ self,
64
+ dense_features: list[DenseFeature] | None = None,
65
+ sparse_features: list[SparseFeature] | None = None,
66
+ sequence_features: list[SequenceFeature] | None = None,
67
+ mlp_params: dict | None = None,
68
+ target: list[str] | str | None = None,
69
+ task: str | list[str] | None = None,
70
+ optimizer: str = "adam",
71
+ optimizer_params: dict | None = None,
72
+ loss: str | nn.Module | None = "bce",
73
+ loss_params: dict | list[dict] | None = None,
74
+ device: str = "cpu",
75
+ embedding_l1_reg=1e-6,
76
+ dense_l1_reg=1e-5,
77
+ embedding_l2_reg=1e-5,
78
+ dense_l2_reg=1e-4,
79
+ **kwargs,
80
+ ):
81
+
82
+ dense_features = dense_features or []
83
+ sparse_features = sparse_features or []
84
+ sequence_features = sequence_features or []
85
+ mlp_params = mlp_params or {}
86
+ optimizer_params = optimizer_params or {}
87
+ if loss is None:
88
+ loss = "bce"
89
+
79
90
  super(DeepFM, self).__init__(
80
91
  dense_features=dense_features,
81
92
  sparse_features=sparse_features,
@@ -87,13 +98,10 @@ class DeepFM(BaseModel):
87
98
  dense_l1_reg=dense_l1_reg,
88
99
  embedding_l2_reg=embedding_l2_reg,
89
100
  dense_l2_reg=dense_l2_reg,
90
- **kwargs
101
+ **kwargs,
91
102
  )
92
103
 
93
104
  self.loss = loss
94
- if self.loss is None:
95
- self.loss = "bce"
96
-
97
105
  self.fm_features = sparse_features + sequence_features
98
106
  self.deep_features = dense_features + sparse_features + sequence_features
99
107
  self.embedding = EmbeddingLayer(features=self.deep_features)
@@ -107,8 +115,15 @@ class DeepFM(BaseModel):
107
115
  self.prediction_layer = PredictionLayer(task_type=self.default_task)
108
116
 
109
117
  # Register regularization weights
110
- self.register_regularization_weights(embedding_attr='embedding', include_modules=['linear', 'mlp'])
111
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
118
+ self.register_regularization_weights(
119
+ embedding_attr="embedding", include_modules=["linear", "mlp"]
120
+ )
121
+ self.compile(
122
+ optimizer=optimizer,
123
+ optimizer_params=optimizer_params,
124
+ loss=loss,
125
+ loss_params=loss_params,
126
+ )
112
127
 
113
128
  def forward(self, x):
114
129
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)