nextrec 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. nextrec/__version__.py +1 -1
  2. nextrec/basic/layers.py +96 -46
  3. nextrec/basic/metrics.py +128 -114
  4. nextrec/basic/model.py +94 -91
  5. nextrec/basic/summary.py +36 -2
  6. nextrec/data/dataloader.py +2 -0
  7. nextrec/data/preprocessor.py +137 -5
  8. nextrec/loss/listwise.py +19 -6
  9. nextrec/loss/pairwise.py +6 -4
  10. nextrec/loss/pointwise.py +8 -6
  11. nextrec/models/multi_task/aitm.py +0 -0
  12. nextrec/models/multi_task/apg.py +0 -0
  13. nextrec/models/multi_task/cross_stitch.py +0 -0
  14. nextrec/models/multi_task/esmm.py +5 -28
  15. nextrec/models/multi_task/mmoe.py +6 -28
  16. nextrec/models/multi_task/pepnet.py +335 -0
  17. nextrec/models/multi_task/ple.py +21 -40
  18. nextrec/models/multi_task/poso.py +17 -39
  19. nextrec/models/multi_task/share_bottom.py +5 -28
  20. nextrec/models/multi_task/snr_trans.py +0 -0
  21. nextrec/models/ranking/afm.py +3 -27
  22. nextrec/models/ranking/autoint.py +5 -38
  23. nextrec/models/ranking/dcn.py +1 -26
  24. nextrec/models/ranking/dcn_v2.py +6 -34
  25. nextrec/models/ranking/deepfm.py +2 -29
  26. nextrec/models/ranking/dien.py +2 -28
  27. nextrec/models/ranking/din.py +2 -27
  28. nextrec/models/ranking/eulernet.py +3 -30
  29. nextrec/models/ranking/ffm.py +0 -26
  30. nextrec/models/ranking/fibinet.py +8 -32
  31. nextrec/models/ranking/fm.py +0 -29
  32. nextrec/models/ranking/lr.py +0 -30
  33. nextrec/models/ranking/masknet.py +4 -30
  34. nextrec/models/ranking/pnn.py +4 -28
  35. nextrec/models/ranking/widedeep.py +0 -32
  36. nextrec/models/ranking/xdeepfm.py +0 -30
  37. nextrec/models/retrieval/dssm.py +4 -28
  38. nextrec/models/retrieval/dssm_v2.py +4 -28
  39. nextrec/models/retrieval/mind.py +2 -22
  40. nextrec/models/retrieval/sdm.py +4 -24
  41. nextrec/models/retrieval/youtube_dnn.py +4 -25
  42. nextrec/models/sequential/hstu.py +0 -18
  43. nextrec/utils/model.py +91 -4
  44. nextrec/utils/types.py +35 -0
  45. {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/METADATA +8 -6
  46. nextrec-0.4.24.dist-info/RECORD +86 -0
  47. nextrec-0.4.22.dist-info/RECORD +0 -81
  48. {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/WHEEL +0 -0
  49. {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/entry_points.txt +0 -0
  50. {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/licenses/LICENSE +0 -0
@@ -93,7 +93,7 @@ class CGCLayer(nn.Module):
93
93
  [
94
94
  MLP(
95
95
  input_dim=input_dim,
96
- output_layer=False,
96
+ output_dim=None,
97
97
  **shared_expert_params,
98
98
  )
99
99
  for _ in range(num_shared_experts)
@@ -105,7 +105,7 @@ class CGCLayer(nn.Module):
105
105
  [
106
106
  MLP(
107
107
  input_dim=input_dim,
108
- output_layer=False,
108
+ output_dim=None,
109
109
  **params,
110
110
  )
111
111
  for _ in range(num_specific_experts)
@@ -202,29 +202,21 @@ class PLE(BaseModel):
202
202
 
203
203
  def __init__(
204
204
  self,
205
- dense_features: list[DenseFeature],
206
- sparse_features: list[SparseFeature],
207
- sequence_features: list[SequenceFeature],
208
- shared_expert_params: dict,
209
- specific_expert_params: dict | list[dict],
210
- num_shared_experts: int,
211
- num_specific_experts: int,
212
- num_levels: int,
213
- tower_params_list: list[dict],
214
- target: list[str],
205
+ dense_features: list[DenseFeature] | None = None,
206
+ sparse_features: list[SparseFeature] | None = None,
207
+ sequence_features: list[SequenceFeature] | None = None,
208
+ shared_expert_params: dict | None = None,
209
+ specific_expert_params: dict | list[dict] | None = None,
210
+ num_shared_experts: int = 2,
211
+ num_specific_experts: int = 2,
212
+ num_levels: int = 2,
213
+ tower_params_list: list[dict] | None = None,
214
+ target: list[str] | None = None,
215
215
  task: str | list[str] | None = None,
216
- optimizer: str = "adam",
217
- optimizer_params: dict | None = None,
218
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
219
- loss_params: dict | list[dict] | None = None,
220
- embedding_l1_reg=0.0,
221
- dense_l1_reg=0.0,
222
- embedding_l2_reg=0.0,
223
- dense_l2_reg=0.0,
224
216
  **kwargs,
225
217
  ):
226
218
 
227
- self.nums_task = len(target)
219
+ self.nums_task = len(target) if target is not None else 1
228
220
 
229
221
  resolved_task = task
230
222
  if resolved_task is None:
@@ -244,23 +236,15 @@ class PLE(BaseModel):
244
236
  sequence_features=sequence_features,
245
237
  target=target,
246
238
  task=resolved_task,
247
- embedding_l1_reg=embedding_l1_reg,
248
- dense_l1_reg=dense_l1_reg,
249
- embedding_l2_reg=embedding_l2_reg,
250
- dense_l2_reg=dense_l2_reg,
251
239
  **kwargs,
252
240
  )
253
241
 
254
- self.loss = loss
255
- if self.loss is None:
256
- self.loss = "bce"
257
242
  # Number of tasks, experts, and levels
258
243
  self.nums_task = len(target)
259
244
  self.num_shared_experts = num_shared_experts
260
245
  self.num_specific_experts = num_specific_experts
261
246
  self.num_levels = num_levels
262
- if optimizer_params is None:
263
- optimizer_params = {}
247
+
264
248
  if len(tower_params_list) != self.nums_task:
265
249
  raise ValueError(
266
250
  f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
@@ -275,8 +259,11 @@ class PLE(BaseModel):
275
259
  # input_dim = emb_dim_total + dense_input_dim
276
260
 
277
261
  # Get expert output dimension
278
- if "dims" in shared_expert_params and len(shared_expert_params["dims"]) > 0:
279
- expert_output_dim = shared_expert_params["dims"][-1]
262
+ if (
263
+ "hidden_dims" in shared_expert_params
264
+ and len(shared_expert_params["hidden_dims"]) > 0
265
+ ):
266
+ expert_output_dim = shared_expert_params["hidden_dims"][-1]
280
267
  else:
281
268
  expert_output_dim = input_dim
282
269
 
@@ -299,21 +286,15 @@ class PLE(BaseModel):
299
286
  # Task-specific towers
300
287
  self.towers = nn.ModuleList()
301
288
  for tower_params in tower_params_list:
302
- tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
289
+ tower = MLP(input_dim=expert_output_dim, output_dim=1, **tower_params)
303
290
  self.towers.append(tower)
304
291
  self.prediction_layer = TaskHead(
305
- task_type=self.default_task, task_dims=[1] * self.nums_task
292
+ task_type=self.task, task_dims=[1] * self.nums_task
306
293
  )
307
294
  # Register regularization weights
308
295
  self.register_regularization_weights(
309
296
  embedding_attr="embedding", include_modules=["cgc_layers", "towers"]
310
297
  )
311
- self.compile(
312
- optimizer=optimizer,
313
- optimizer_params=optimizer_params,
314
- loss=self.loss,
315
- loss_params=loss_params,
316
- )
317
298
 
318
299
  def forward(self, x):
319
300
  # Get all embeddings and flatten
@@ -42,7 +42,7 @@ from __future__ import annotations
42
42
  import torch
43
43
  import torch.nn as nn
44
44
  import torch.nn.functional as F
45
-
45
+ from typing import Literal
46
46
  from nextrec.basic.activation import activation_layer
47
47
  from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
48
48
  from nextrec.basic.layers import MLP, EmbeddingLayer
@@ -127,7 +127,7 @@ class POSOMLP(nn.Module):
127
127
  """
128
128
  POSO-enhanced MLP that stacks multiple POSOFC layers.
129
129
 
130
- dims: e.g., [256, 128, 64] means
130
+ hidden_dims: e.g., [256, 128, 64] means
131
131
  in_dim -> 256 -> 128 -> 64
132
132
  Each layer has its own gate g_l(pc) following Eq. (11).
133
133
  """
@@ -136,7 +136,7 @@ class POSOMLP(nn.Module):
136
136
  self,
137
137
  input_dim: int,
138
138
  pc_dim: int,
139
- dims: list[int],
139
+ hidden_dims: list[int],
140
140
  gate_hidden_dim: int = 32,
141
141
  scale_factor: float = 2.0,
142
142
  activation: str = "relu",
@@ -147,7 +147,7 @@ class POSOMLP(nn.Module):
147
147
 
148
148
  layers = []
149
149
  in_dim = input_dim
150
- for out_dim in dims:
150
+ for out_dim in hidden_dims:
151
151
  layers.append(
152
152
  POSOFC(
153
153
  in_dim=in_dim,
@@ -213,8 +213,8 @@ class POSOMMoE(nn.Module):
213
213
  [
214
214
  MLP(
215
215
  input_dim=input_dim,
216
- output_layer=False,
217
- dims=expert_hidden_dims,
216
+ output_dim=None,
217
+ hidden_dims=expert_hidden_dims,
218
218
  activation=activation,
219
219
  dropout=expert_dropout,
220
220
  )
@@ -307,9 +307,9 @@ class POSO(BaseModel):
307
307
  pc_sparse_features: list[str] | None,
308
308
  pc_sequence_features: list[str] | None,
309
309
  tower_params_list: list[dict],
310
- target: list[str],
310
+ target: list[str] | None = None,
311
311
  task: str | list[str] = "binary",
312
- architecture: str = "mlp",
312
+ architecture: Literal["mlp", "mmoe"] = "mlp",
313
313
  # POSO gating defaults
314
314
  gate_hidden_dim: int = 32,
315
315
  gate_scale_factor: float = 2.0,
@@ -323,14 +323,6 @@ class POSO(BaseModel):
323
323
  expert_gate_hidden_dim: int = 32,
324
324
  expert_gate_scale_factor: float = 2.0,
325
325
  gate_use_softmax: bool = True,
326
- optimizer: str = "adam",
327
- optimizer_params: dict | None = None,
328
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
329
- loss_params: dict | list[dict] | None = None,
330
- embedding_l1_reg=0.0,
331
- dense_l1_reg=0.0,
332
- embedding_l2_reg=0.0,
333
- dense_l2_reg=0.0,
334
326
  **kwargs,
335
327
  ):
336
328
  self.nums_task = len(target)
@@ -359,10 +351,6 @@ class POSO(BaseModel):
359
351
  sequence_features=sequence_features,
360
352
  target=target,
361
353
  task=resolved_task,
362
- embedding_l1_reg=embedding_l1_reg,
363
- dense_l1_reg=dense_l1_reg,
364
- embedding_l2_reg=embedding_l2_reg,
365
- dense_l2_reg=dense_l2_reg,
366
354
  **kwargs,
367
355
  )
368
356
 
@@ -373,12 +361,6 @@ class POSO(BaseModel):
373
361
  self.pc_sparse_feature_names = list(pc_sparse_features or [])
374
362
  self.pc_sequence_feature_names = list(pc_sequence_features or [])
375
363
 
376
- if loss is None:
377
- self.loss = "bce"
378
- self.loss = loss
379
-
380
- optimizer_params = optimizer_params or {}
381
-
382
364
  self.main_dense_features = select_features(
383
365
  self.dense_features, self.main_dense_feature_names, "main_dense_features"
384
366
  )
@@ -434,16 +416,16 @@ class POSO(BaseModel):
434
416
  self.towers = nn.ModuleList()
435
417
  self.tower_heads = nn.ModuleList()
436
418
  for tower_params in tower_params_list:
437
- dims = tower_params.get("dims")
438
- if not dims:
419
+ hidden_dims = tower_params.get("hidden_dims")
420
+ if not hidden_dims:
439
421
  raise ValueError(
440
- "tower_params must include a non-empty 'dims' list for POSO-MLP towers."
422
+ "tower_params must include a non-empty 'hidden_dims' list for POSO-MLP towers."
441
423
  )
442
424
  dropout = tower_params.get("dropout", 0.0)
443
425
  tower = POSOMLP(
444
426
  input_dim=self.main_input_dim,
445
427
  pc_dim=self.pc_input_dim,
446
- dims=dims,
428
+ hidden_dims=hidden_dims,
447
429
  gate_hidden_dim=tower_params.get(
448
430
  "gate_hidden_dim", gate_hidden_dim
449
431
  ),
@@ -453,7 +435,9 @@ class POSO(BaseModel):
453
435
  dropout=dropout,
454
436
  )
455
437
  self.towers.append(tower)
456
- tower_output_dim = dims[-1] if dims else self.main_input_dim
438
+ tower_output_dim = (
439
+ hidden_dims[-1] if hidden_dims else self.main_input_dim
440
+ )
457
441
  self.tower_heads.append(nn.Linear(tower_output_dim, 1))
458
442
  else:
459
443
  if expert_hidden_dims is None or not expert_hidden_dims:
@@ -476,7 +460,7 @@ class POSO(BaseModel):
476
460
  [
477
461
  MLP(
478
462
  input_dim=self.mmoe.expert_output_dim,
479
- output_layer=True,
463
+ output_dim=1,
480
464
  **tower_params,
481
465
  )
482
466
  for tower_params in tower_params_list
@@ -488,7 +472,7 @@ class POSO(BaseModel):
488
472
  else:
489
473
  self.grad_norm_shared_modules = ["embedding", "mmoe"]
490
474
  self.prediction_layer = TaskHead(
491
- task_type=self.default_task,
475
+ task_type=self.task,
492
476
  task_dims=[1] * self.nums_task,
493
477
  )
494
478
  include_modules = (
@@ -499,12 +483,6 @@ class POSO(BaseModel):
499
483
  self.register_regularization_weights(
500
484
  embedding_attr="embedding", include_modules=include_modules
501
485
  )
502
- self.compile(
503
- optimizer=optimizer,
504
- optimizer_params=optimizer_params,
505
- loss=loss,
506
- loss_params=loss_params,
507
- )
508
486
 
509
487
  def forward(self, x):
510
488
  # Embed main and PC features separately so PC can gate hidden units
@@ -69,19 +69,9 @@ class ShareBottom(BaseModel):
69
69
  tower_params_list: list[dict],
70
70
  target: list[str],
71
71
  task: str | list[str] | None = None,
72
- optimizer: str = "adam",
73
- optimizer_params: dict | None = None,
74
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
75
- loss_params: dict | list[dict] | None = None,
76
- embedding_l1_reg=0.0,
77
- dense_l1_reg=0.0,
78
- embedding_l2_reg=0.0,
79
- dense_l2_reg=0.0,
80
72
  **kwargs,
81
73
  ):
82
74
 
83
- optimizer_params = optimizer_params or {}
84
-
85
75
  self.nums_task = len(target)
86
76
 
87
77
  resolved_task = task
@@ -102,16 +92,9 @@ class ShareBottom(BaseModel):
102
92
  sequence_features=sequence_features,
103
93
  target=target,
104
94
  task=resolved_task,
105
- embedding_l1_reg=embedding_l1_reg,
106
- dense_l1_reg=dense_l1_reg,
107
- embedding_l2_reg=embedding_l2_reg,
108
- dense_l2_reg=dense_l2_reg,
109
95
  **kwargs,
110
96
  )
111
97
 
112
- self.loss = loss
113
- if self.loss is None:
114
- self.loss = "bce"
115
98
  # Number of tasks
116
99
  self.nums_task = len(target)
117
100
  if len(tower_params_list) != self.nums_task:
@@ -127,33 +110,27 @@ class ShareBottom(BaseModel):
127
110
  # input_dim = emb_dim_total + dense_input_dim
128
111
 
129
112
  # Shared bottom network
130
- self.bottom = MLP(input_dim=input_dim, output_layer=False, **bottom_params)
113
+ self.bottom = MLP(input_dim=input_dim, output_dim=None, **bottom_params)
131
114
  self.grad_norm_shared_modules = ["embedding", "bottom"]
132
115
 
133
116
  # Get bottom output dimension
134
- if "dims" in bottom_params and len(bottom_params["dims"]) > 0:
135
- bottom_output_dim = bottom_params["dims"][-1]
117
+ if "hidden_dims" in bottom_params and len(bottom_params["hidden_dims"]) > 0:
118
+ bottom_output_dim = bottom_params["hidden_dims"][-1]
136
119
  else:
137
120
  bottom_output_dim = input_dim
138
121
 
139
122
  # Task-specific towers
140
123
  self.towers = nn.ModuleList()
141
124
  for tower_params in tower_params_list:
142
- tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
125
+ tower = MLP(input_dim=bottom_output_dim, output_dim=1, **tower_params)
143
126
  self.towers.append(tower)
144
127
  self.prediction_layer = TaskHead(
145
- task_type=self.default_task, task_dims=[1] * self.nums_task
128
+ task_type=self.task, task_dims=[1] * self.nums_task
146
129
  )
147
130
  # Register regularization weights
148
131
  self.register_regularization_weights(
149
132
  embedding_attr="embedding", include_modules=["bottom", "towers"]
150
133
  )
151
- self.compile(
152
- optimizer=optimizer,
153
- optimizer_params=optimizer_params,
154
- loss=loss,
155
- loss_params=loss_params,
156
- )
157
134
 
158
135
  def forward(self, x):
159
136
  # Get all embeddings and flatten
File without changes
@@ -61,36 +61,17 @@ class AFM(BaseModel):
61
61
  sequence_features: list[SequenceFeature] | None = None,
62
62
  attention_dim: int = 32,
63
63
  attention_dropout: float = 0.0,
64
- target: list[str] | str | None = None,
65
- task: str | list[str] | None = None,
66
- optimizer: str = "adam",
67
- optimizer_params: dict | None = None,
68
- loss: str | nn.Module | None = "bce",
69
- loss_params: dict | list[dict] | None = None,
70
- embedding_l1_reg=0.0,
71
- dense_l1_reg=0.0,
72
- embedding_l2_reg=0.0,
73
- dense_l2_reg=0.0,
74
64
  **kwargs,
75
65
  ):
76
66
 
77
67
  dense_features = dense_features or []
78
68
  sparse_features = sparse_features or []
79
69
  sequence_features = sequence_features or []
80
- optimizer_params = optimizer_params or {}
81
- if loss is None:
82
- loss = "bce"
83
70
 
84
71
  super(AFM, self).__init__(
85
72
  dense_features=dense_features,
86
73
  sparse_features=sparse_features,
87
74
  sequence_features=sequence_features,
88
- target=target,
89
- task=task or self.default_task,
90
- embedding_l1_reg=embedding_l1_reg,
91
- dense_l1_reg=dense_l1_reg,
92
- embedding_l2_reg=embedding_l2_reg,
93
- dense_l2_reg=dense_l2_reg,
94
75
  **kwargs,
95
76
  )
96
77
 
@@ -139,8 +120,10 @@ class AFM(BaseModel):
139
120
  self.attention_linear = nn.Linear(self.embedding_dim, attention_dim)
140
121
  self.attention_p = nn.Linear(attention_dim, 1, bias=False)
141
122
  self.attention_dropout = nn.Dropout(attention_dropout)
123
+
142
124
  self.output_projection = nn.Linear(self.embedding_dim, 1, bias=False)
143
- self.prediction_layer = TaskHead(task_type=self.default_task)
125
+
126
+ self.prediction_layer = TaskHead(task_type=self.task)
144
127
  self.input_mask = InputMask()
145
128
 
146
129
  # Register regularization weights
@@ -158,13 +141,6 @@ class AFM(BaseModel):
158
141
  emb.weight for emb in self.first_order_embeddings.values()
159
142
  )
160
143
 
161
- self.compile(
162
- optimizer=optimizer,
163
- optimizer_params=optimizer_params,
164
- loss=loss,
165
- loss_params=loss_params,
166
- )
167
-
168
144
  def forward(self, x):
169
145
  field_emb = self.embedding(
170
146
  x=x, features=self.fm_features, squeeze_dim=False
@@ -82,16 +82,6 @@ class AutoInt(BaseModel):
82
82
  att_head_num: int = 2,
83
83
  att_dropout: float = 0.0,
84
84
  att_use_residual: bool = True,
85
- target: list[str] | None = None,
86
- task: str | list[str] | None = None,
87
- optimizer: str = "adam",
88
- optimizer_params: dict | None = None,
89
- loss: str | nn.Module | None = "bce",
90
- loss_params: dict | list[dict] | None = None,
91
- embedding_l1_reg=0.0,
92
- dense_l1_reg=0.0,
93
- embedding_l2_reg=0.0,
94
- dense_l2_reg=0.0,
95
85
  **kwargs,
96
86
  ):
97
87
 
@@ -99,21 +89,12 @@ class AutoInt(BaseModel):
99
89
  dense_features=dense_features,
100
90
  sparse_features=sparse_features,
101
91
  sequence_features=sequence_features,
102
- target=target,
103
- task=task or self.default_task,
104
- embedding_l1_reg=embedding_l1_reg,
105
- dense_l1_reg=dense_l1_reg,
106
- embedding_l2_reg=embedding_l2_reg,
107
- dense_l2_reg=dense_l2_reg,
108
92
  **kwargs,
109
93
  )
110
94
 
111
- if target is None:
112
- target = []
113
- if optimizer_params is None:
114
- optimizer_params = {}
115
- if loss is None:
116
- loss = "bce"
95
+ dense_features = dense_features or []
96
+ sparse_features = sparse_features or []
97
+ sequence_features = sequence_features or []
117
98
 
118
99
  self.att_layer_num = att_layer_num
119
100
  self.att_embedding_dim = att_embedding_dim
@@ -123,12 +104,7 @@ class AutoInt(BaseModel):
123
104
  # if you want to follow the paper strictly, set dense_features=[]
124
105
  # or modify the code accordingly
125
106
  self.interaction_features = dense_features + sparse_features + sequence_features
126
-
127
- # All features for embedding
128
- self.all_features = dense_features + sparse_features + sequence_features
129
-
130
- # Embedding layer
131
- self.embedding = EmbeddingLayer(features=self.all_features)
107
+ self.embedding = EmbeddingLayer(features=self.interaction_features)
132
108
 
133
109
  # Project embeddings to attention embedding dimension
134
110
  num_fields = len(self.interaction_features)
@@ -159,23 +135,14 @@ class AutoInt(BaseModel):
159
135
  ]
160
136
  )
161
137
 
162
- # Final prediction layer
163
138
  self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
164
- self.prediction_layer = TaskHead(task_type=self.default_task)
139
+ self.prediction_layer = TaskHead(task_type=self.task)
165
140
 
166
- # Register regularization weights
167
141
  self.register_regularization_weights(
168
142
  embedding_attr="embedding",
169
143
  include_modules=["projection_layers", "attention_layers", "fc"],
170
144
  )
171
145
 
172
- self.compile(
173
- optimizer=optimizer,
174
- optimizer_params=optimizer_params,
175
- loss=loss,
176
- loss_params=loss_params,
177
- )
178
-
179
146
  def forward(self, x):
180
147
  # Get embeddings field-by-field so mixed dimensions can be projected safely
181
148
  field_embeddings = []
@@ -96,36 +96,18 @@ class DCN(BaseModel):
96
96
  sequence_features: list[SequenceFeature] | None = None,
97
97
  cross_num: int = 3,
98
98
  mlp_params: dict | None = None,
99
- target: list[str] | str | None = None,
100
- task: str | list[str] | None = None,
101
- optimizer: str = "adam",
102
- optimizer_params: dict | None = None,
103
- loss: str | nn.Module | None = "bce",
104
- loss_params: dict | list[dict] | None = None,
105
- embedding_l1_reg=0.0,
106
- dense_l1_reg=0.0,
107
- embedding_l2_reg=0.0,
108
- dense_l2_reg=0.0,
109
99
  **kwargs,
110
100
  ):
111
101
 
112
102
  dense_features = dense_features or []
113
103
  sparse_features = sparse_features or []
114
104
  sequence_features = sequence_features or []
115
- optimizer_params = optimizer_params or {}
116
- if loss is None:
117
- loss = "bce"
105
+ mlp_params = mlp_params or {}
118
106
 
119
107
  super(DCN, self).__init__(
120
108
  dense_features=dense_features,
121
109
  sparse_features=sparse_features,
122
110
  sequence_features=sequence_features,
123
- target=target,
124
- task=task or self.default_task,
125
- embedding_l1_reg=embedding_l1_reg,
126
- dense_l1_reg=dense_l1_reg,
127
- embedding_l2_reg=embedding_l2_reg,
128
- dense_l2_reg=dense_l2_reg,
129
111
  **kwargs,
130
112
  )
131
113
 
@@ -173,13 +155,6 @@ class DCN(BaseModel):
173
155
  include_modules=["cross_network", "mlp", "final_layer"],
174
156
  )
175
157
 
176
- self.compile(
177
- optimizer=optimizer,
178
- optimizer_params=optimizer_params,
179
- loss=loss,
180
- loss_params=loss_params,
181
- )
182
-
183
158
  def forward(self, x):
184
159
  # Get all embeddings and flatten
185
160
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
@@ -45,7 +45,7 @@ DCN v2 在原始 DCN 基础上,将标量交叉权重升级为向量/矩阵参
45
45
 
46
46
  import torch
47
47
  import torch.nn as nn
48
-
48
+ from typing import Literal
49
49
  from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
50
50
  from nextrec.basic.layers import MLP, EmbeddingLayer
51
51
  from nextrec.basic.heads import TaskHead
@@ -193,53 +193,32 @@ class DCNv2(BaseModel):
193
193
  sparse_features: list[SparseFeature] | None = None,
194
194
  sequence_features: list[SequenceFeature] | None = None,
195
195
  cross_num: int = 3,
196
- cross_type: str = "matrix",
197
- architecture: str = "parallel",
196
+ cross_type: Literal["matrix", "mix", "low_rank"] = "matrix",
197
+ architecture: Literal["parallel", "stacked"] = "parallel",
198
198
  low_rank: int = 32,
199
199
  num_experts: int = 4,
200
200
  mlp_params: dict | None = None,
201
- target: list[str] | str | None = None,
202
- task: str | list[str] | None = None,
203
- optimizer: str = "adam",
204
- optimizer_params: dict | None = None,
205
- loss: str | nn.Module | None = "bce",
206
- loss_params: dict | list[dict] | None = None,
207
- embedding_l1_reg=0.0,
208
- dense_l1_reg=0.0,
209
- embedding_l2_reg=0.0,
210
- dense_l2_reg=0.0,
211
201
  **kwargs,
212
202
  ):
213
203
  dense_features = dense_features or []
214
204
  sparse_features = sparse_features or []
215
205
  sequence_features = sequence_features or []
216
- optimizer_params = optimizer_params or {}
217
- if loss is None:
218
- loss = "bce"
206
+ mlp_params = mlp_params or {}
219
207
 
220
208
  super(DCNv2, self).__init__(
221
209
  dense_features=dense_features,
222
210
  sparse_features=sparse_features,
223
211
  sequence_features=sequence_features,
224
- target=target,
225
- task=task or self.default_task,
226
- embedding_l1_reg=embedding_l1_reg,
227
- dense_l1_reg=dense_l1_reg,
228
- embedding_l2_reg=embedding_l2_reg,
229
- dense_l2_reg=dense_l2_reg,
230
212
  **kwargs,
231
213
  )
232
214
 
233
- self.all_features = dense_features + sparse_features + sequence_features
234
215
  self.embedding = EmbeddingLayer(features=self.all_features)
235
216
  input_dim = self.embedding.input_dim
236
217
 
237
- architecture = architecture.lower()
238
218
  if architecture not in {"parallel", "stacked"}:
239
219
  raise ValueError("architecture must be 'parallel' or 'stacked'.")
240
220
  self.architecture = architecture
241
221
 
242
- cross_type = cross_type.lower()
243
222
  if cross_type == "matrix":
244
223
  self.cross_network = CrossNetV2(input_dim=input_dim, num_layers=cross_num)
245
224
  elif cross_type in {"mix", "low_rank"}:
@@ -255,7 +234,7 @@ class DCNv2(BaseModel):
255
234
  if mlp_params is not None:
256
235
  self.use_dnn = True
257
236
  dnn_params = dict(mlp_params)
258
- dnn_params.setdefault("output_layer", False)
237
+ dnn_params.setdefault("output_dim", None)
259
238
  self.mlp = MLP(input_dim=input_dim, **dnn_params)
260
239
  deep_dim = self.mlp.output_dim
261
240
  final_input_dim = (
@@ -271,20 +250,13 @@ class DCNv2(BaseModel):
271
250
  final_input_dim = input_dim
272
251
 
273
252
  self.final_layer = nn.Linear(final_input_dim, 1)
274
- self.prediction_layer = TaskHead(task_type=self.default_task)
253
+ self.prediction_layer = TaskHead(task_type=self.task)
275
254
 
276
255
  self.register_regularization_weights(
277
256
  embedding_attr="embedding",
278
257
  include_modules=["cross_network", "mlp", "final_layer"],
279
258
  )
280
259
 
281
- self.compile(
282
- optimizer=optimizer,
283
- optimizer_params=optimizer_params,
284
- loss=loss,
285
- loss_params=loss_params,
286
- )
287
-
288
260
  def forward(self, x) -> torch.Tensor:
289
261
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
290
262
  cross_out = self.cross_network(input_flat)