nextrec 0.4.22__py3-none-any.whl → 0.4.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. nextrec/__version__.py +1 -1
  2. nextrec/basic/metrics.py +1 -2
  3. nextrec/basic/model.py +68 -73
  4. nextrec/basic/summary.py +36 -2
  5. nextrec/data/preprocessor.py +137 -5
  6. nextrec/loss/listwise.py +19 -6
  7. nextrec/loss/pairwise.py +6 -4
  8. nextrec/loss/pointwise.py +8 -6
  9. nextrec/models/multi_task/esmm.py +3 -26
  10. nextrec/models/multi_task/mmoe.py +2 -24
  11. nextrec/models/multi_task/ple.py +13 -35
  12. nextrec/models/multi_task/poso.py +4 -28
  13. nextrec/models/multi_task/share_bottom.py +1 -24
  14. nextrec/models/ranking/afm.py +3 -27
  15. nextrec/models/ranking/autoint.py +5 -38
  16. nextrec/models/ranking/dcn.py +1 -26
  17. nextrec/models/ranking/dcn_v2.py +5 -33
  18. nextrec/models/ranking/deepfm.py +2 -29
  19. nextrec/models/ranking/dien.py +2 -28
  20. nextrec/models/ranking/din.py +2 -27
  21. nextrec/models/ranking/eulernet.py +3 -30
  22. nextrec/models/ranking/ffm.py +0 -26
  23. nextrec/models/ranking/fibinet.py +8 -32
  24. nextrec/models/ranking/fm.py +0 -29
  25. nextrec/models/ranking/lr.py +0 -30
  26. nextrec/models/ranking/masknet.py +4 -30
  27. nextrec/models/ranking/pnn.py +4 -28
  28. nextrec/models/ranking/widedeep.py +0 -32
  29. nextrec/models/ranking/xdeepfm.py +0 -30
  30. nextrec/models/retrieval/dssm.py +0 -24
  31. nextrec/models/retrieval/dssm_v2.py +0 -24
  32. nextrec/models/retrieval/mind.py +0 -20
  33. nextrec/models/retrieval/sdm.py +0 -20
  34. nextrec/models/retrieval/youtube_dnn.py +0 -21
  35. nextrec/models/sequential/hstu.py +0 -18
  36. nextrec/utils/model.py +79 -1
  37. nextrec/utils/types.py +35 -0
  38. {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/METADATA +7 -5
  39. nextrec-0.4.23.dist-info/RECORD +81 -0
  40. nextrec-0.4.22.dist-info/RECORD +0 -81
  41. {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/WHEEL +0 -0
  42. {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/entry_points.txt +0 -0
  43. {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/licenses/LICENSE +0 -0
nextrec/loss/pairwise.py CHANGED
@@ -2,7 +2,7 @@
2
2
  Pairwise loss functions for learning-to-rank and matching tasks.
3
3
 
4
4
  Date: create on 27/10/2025
5
- Checkpoint: edit on 29/11/2025
5
+ Checkpoint: edit on 29/12/2025
6
6
  Author: Yang Zhou, zyaztec@gmail.com
7
7
  """
8
8
 
@@ -18,7 +18,7 @@ class BPRLoss(nn.Module):
18
18
  Bayesian Personalized Ranking loss with support for multiple negatives.
19
19
  """
20
20
 
21
- def __init__(self, reduction: str = "mean"):
21
+ def __init__(self, reduction: Literal["mean", "sum", "none"] = "mean"):
22
22
  super().__init__()
23
23
  self.reduction = reduction
24
24
 
@@ -42,7 +42,9 @@ class HingeLoss(nn.Module):
42
42
  Hinge loss for pairwise ranking.
43
43
  """
44
44
 
45
- def __init__(self, margin: float = 1.0, reduction: str = "mean"):
45
+ def __init__(
46
+ self, margin: float = 1.0, reduction: Literal["mean", "sum", "none"] = "mean"
47
+ ):
46
48
  super().__init__()
47
49
  self.margin = margin
48
50
  self.reduction = reduction
@@ -69,7 +71,7 @@ class TripletLoss(nn.Module):
69
71
  def __init__(
70
72
  self,
71
73
  margin: float = 1.0,
72
- reduction: str = "mean",
74
+ reduction: Literal["mean", "sum", "none"] = "mean",
73
75
  distance: Literal["euclidean", "cosine"] = "euclidean",
74
76
  ):
75
77
  super().__init__()
nextrec/loss/pointwise.py CHANGED
@@ -2,11 +2,11 @@
2
2
  Pointwise loss functions, including imbalance-aware variants.
3
3
 
4
4
  Date: create on 27/10/2025
5
- Checkpoint: edit on 29/11/2025
5
+ Checkpoint: edit on 29/12/2025
6
6
  Author: Yang Zhou, zyaztec@gmail.com
7
7
  """
8
8
 
9
- from typing import Optional, Sequence
9
+ from typing import Optional, Sequence, Literal
10
10
 
11
11
  import torch
12
12
  import torch.nn as nn
@@ -18,7 +18,9 @@ class CosineContrastiveLoss(nn.Module):
18
18
  Contrastive loss using cosine similarity for positive/negative pairs.
19
19
  """
20
20
 
21
- def __init__(self, margin: float = 0.5, reduction: str = "mean"):
21
+ def __init__(
22
+ self, margin: float = 0.5, reduction: Literal["mean", "sum", "none"] = "mean"
23
+ ):
22
24
  super().__init__()
23
25
  self.margin = margin
24
26
  self.reduction = reduction
@@ -50,7 +52,7 @@ class WeightedBCELoss(nn.Module):
50
52
  def __init__(
51
53
  self,
52
54
  pos_weight: float | torch.Tensor | None = None,
53
- reduction: str = "mean",
55
+ reduction: Literal["mean", "sum", "none"] = "mean",
54
56
  logits: bool = False,
55
57
  auto_balance: bool = False,
56
58
  ):
@@ -110,7 +112,7 @@ class FocalLoss(nn.Module):
110
112
  self,
111
113
  gamma: float = 2.0,
112
114
  alpha: Optional[float | Sequence[float] | torch.Tensor] = None,
113
- reduction: str = "mean",
115
+ reduction: Literal["mean", "sum", "none"] = "mean",
114
116
  logits: bool = False,
115
117
  ):
116
118
  super().__init__()
@@ -187,7 +189,7 @@ class ClassBalancedFocalLoss(nn.Module):
187
189
  class_counts: Sequence[int] | torch.Tensor,
188
190
  beta: float = 0.9999,
189
191
  gamma: float = 2.0,
190
- reduction: str = "mean",
192
+ reduction: Literal["mean", "sum", "none"] = "mean",
191
193
  ):
192
194
  super().__init__()
193
195
  self.gamma = gamma
@@ -42,12 +42,12 @@ CVR 预测 P(conversion|click),二者相乘得到 CTCVR 并在曝光标签上
42
42
  """
43
43
 
44
44
  import torch
45
- import torch.nn as nn
46
45
 
47
46
  from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
48
47
  from nextrec.basic.layers import MLP, EmbeddingLayer
49
48
  from nextrec.basic.heads import TaskHead
50
49
  from nextrec.basic.model import BaseModel
50
+ from nextrec.utils.types import TaskTypeName
51
51
 
52
52
 
53
53
  class ESMM(BaseModel):
@@ -77,23 +77,12 @@ class ESMM(BaseModel):
77
77
  sequence_features: list[SequenceFeature],
78
78
  ctr_params: dict,
79
79
  cvr_params: dict,
80
+ task: TaskTypeName | list[TaskTypeName] | None = None,
80
81
  target: list[str] | None = None, # Note: ctcvr = ctr * cvr
81
- task: list[str] | None = None,
82
- optimizer: str = "adam",
83
- optimizer_params: dict | None = None,
84
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
85
- loss_params: dict | list[dict] | None = None,
86
- embedding_l1_reg=0.0,
87
- dense_l1_reg=0.0,
88
- embedding_l2_reg=0.0,
89
- dense_l2_reg=0.0,
90
82
  **kwargs,
91
83
  ):
92
84
 
93
85
  target = target or ["ctr", "ctcvr"]
94
- optimizer_params = optimizer_params or {}
95
- if loss is None:
96
- loss = "bce"
97
86
 
98
87
  if len(target) != 2:
99
88
  raise ValueError(
@@ -120,15 +109,9 @@ class ESMM(BaseModel):
120
109
  sequence_features=sequence_features,
121
110
  target=target,
122
111
  task=resolved_task, # Both CTR and CTCVR are binary classification
123
- embedding_l1_reg=embedding_l1_reg,
124
- dense_l1_reg=dense_l1_reg,
125
- embedding_l2_reg=embedding_l2_reg,
126
- dense_l2_reg=dense_l2_reg,
127
112
  **kwargs,
128
113
  )
129
114
 
130
- self.loss = loss
131
-
132
115
  self.embedding = EmbeddingLayer(features=self.all_features)
133
116
  input_dim = self.embedding.input_dim
134
117
 
@@ -138,17 +121,11 @@ class ESMM(BaseModel):
138
121
  # CVR tower
139
122
  self.cvr_tower = MLP(input_dim=input_dim, output_layer=True, **cvr_params)
140
123
  self.grad_norm_shared_modules = ["embedding"]
141
- self.prediction_layer = TaskHead(task_type=self.default_task, task_dims=[1, 1])
124
+ self.prediction_layer = TaskHead(task_type=self.task, task_dims=[1, 1])
142
125
  # Register regularization weights
143
126
  self.register_regularization_weights(
144
127
  embedding_attr="embedding", include_modules=["ctr_tower", "cvr_tower"]
145
128
  )
146
- self.compile(
147
- optimizer=optimizer,
148
- optimizer_params=optimizer_params,
149
- loss=loss,
150
- loss_params=loss_params,
151
- )
152
129
 
153
130
  def forward(self, x):
154
131
  # Get all embeddings and flatten
@@ -82,14 +82,6 @@ class MMOE(BaseModel):
82
82
  tower_params_list: list[dict] | None = None,
83
83
  target: list[str] | str | None = None,
84
84
  task: str | list[str] = "binary",
85
- optimizer: str = "adam",
86
- optimizer_params: dict | None = None,
87
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
88
- loss_params: dict | list[dict] | None = None,
89
- embedding_l1_reg=0.0,
90
- dense_l1_reg=0.0,
91
- embedding_l2_reg=0.0,
92
- dense_l2_reg=0.0,
93
85
  **kwargs,
94
86
  ):
95
87
 
@@ -98,9 +90,7 @@ class MMOE(BaseModel):
98
90
  sequence_features = sequence_features or []
99
91
  expert_params = expert_params or {}
100
92
  tower_params_list = tower_params_list or []
101
- optimizer_params = optimizer_params or {}
102
- if loss is None:
103
- loss = "bce"
93
+
104
94
  if target is None:
105
95
  target = []
106
96
  elif isinstance(target, str):
@@ -126,15 +116,9 @@ class MMOE(BaseModel):
126
116
  sequence_features=sequence_features,
127
117
  target=target,
128
118
  task=resolved_task,
129
- embedding_l1_reg=embedding_l1_reg,
130
- dense_l1_reg=dense_l1_reg,
131
- embedding_l2_reg=embedding_l2_reg,
132
- dense_l2_reg=dense_l2_reg,
133
119
  **kwargs,
134
120
  )
135
121
 
136
- self.loss = loss
137
-
138
122
  # Number of tasks and experts
139
123
  self.nums_task = len(target)
140
124
  self.num_experts = num_experts
@@ -172,18 +156,12 @@ class MMOE(BaseModel):
172
156
  tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
173
157
  self.towers.append(tower)
174
158
  self.prediction_layer = TaskHead(
175
- task_type=self.default_task, task_dims=[1] * self.nums_task
159
+ task_type=self.task, task_dims=[1] * self.nums_task
176
160
  )
177
161
  # Register regularization weights
178
162
  self.register_regularization_weights(
179
163
  embedding_attr="embedding", include_modules=["experts", "gates", "towers"]
180
164
  )
181
- self.compile(
182
- optimizer=optimizer,
183
- optimizer_params=optimizer_params,
184
- loss=self.loss,
185
- loss_params=loss_params,
186
- )
187
165
 
188
166
  def forward(self, x):
189
167
  # Get all embeddings and flatten
@@ -202,29 +202,21 @@ class PLE(BaseModel):
202
202
 
203
203
  def __init__(
204
204
  self,
205
- dense_features: list[DenseFeature],
206
- sparse_features: list[SparseFeature],
207
- sequence_features: list[SequenceFeature],
208
- shared_expert_params: dict,
209
- specific_expert_params: dict | list[dict],
210
- num_shared_experts: int,
211
- num_specific_experts: int,
212
- num_levels: int,
213
- tower_params_list: list[dict],
214
- target: list[str],
205
+ dense_features: list[DenseFeature] | None = None,
206
+ sparse_features: list[SparseFeature] | None = None,
207
+ sequence_features: list[SequenceFeature] | None = None,
208
+ shared_expert_params: dict | None = None,
209
+ specific_expert_params: dict | list[dict] | None = None,
210
+ num_shared_experts: int = 2,
211
+ num_specific_experts: int = 2,
212
+ num_levels: int = 2,
213
+ tower_params_list: list[dict] | None = None,
214
+ target: list[str] | None = None,
215
215
  task: str | list[str] | None = None,
216
- optimizer: str = "adam",
217
- optimizer_params: dict | None = None,
218
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
219
- loss_params: dict | list[dict] | None = None,
220
- embedding_l1_reg=0.0,
221
- dense_l1_reg=0.0,
222
- embedding_l2_reg=0.0,
223
- dense_l2_reg=0.0,
224
216
  **kwargs,
225
217
  ):
226
218
 
227
- self.nums_task = len(target)
219
+ self.nums_task = len(target) if target is not None else 1
228
220
 
229
221
  resolved_task = task
230
222
  if resolved_task is None:
@@ -244,23 +236,15 @@ class PLE(BaseModel):
244
236
  sequence_features=sequence_features,
245
237
  target=target,
246
238
  task=resolved_task,
247
- embedding_l1_reg=embedding_l1_reg,
248
- dense_l1_reg=dense_l1_reg,
249
- embedding_l2_reg=embedding_l2_reg,
250
- dense_l2_reg=dense_l2_reg,
251
239
  **kwargs,
252
240
  )
253
241
 
254
- self.loss = loss
255
- if self.loss is None:
256
- self.loss = "bce"
257
242
  # Number of tasks, experts, and levels
258
243
  self.nums_task = len(target)
259
244
  self.num_shared_experts = num_shared_experts
260
245
  self.num_specific_experts = num_specific_experts
261
246
  self.num_levels = num_levels
262
- if optimizer_params is None:
263
- optimizer_params = {}
247
+
264
248
  if len(tower_params_list) != self.nums_task:
265
249
  raise ValueError(
266
250
  f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
@@ -302,18 +286,12 @@ class PLE(BaseModel):
302
286
  tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
303
287
  self.towers.append(tower)
304
288
  self.prediction_layer = TaskHead(
305
- task_type=self.default_task, task_dims=[1] * self.nums_task
289
+ task_type=self.task, task_dims=[1] * self.nums_task
306
290
  )
307
291
  # Register regularization weights
308
292
  self.register_regularization_weights(
309
293
  embedding_attr="embedding", include_modules=["cgc_layers", "towers"]
310
294
  )
311
- self.compile(
312
- optimizer=optimizer,
313
- optimizer_params=optimizer_params,
314
- loss=self.loss,
315
- loss_params=loss_params,
316
- )
317
295
 
318
296
  def forward(self, x):
319
297
  # Get all embeddings and flatten
@@ -42,7 +42,7 @@ from __future__ import annotations
42
42
  import torch
43
43
  import torch.nn as nn
44
44
  import torch.nn.functional as F
45
-
45
+ from typing import Literal
46
46
  from nextrec.basic.activation import activation_layer
47
47
  from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
48
48
  from nextrec.basic.layers import MLP, EmbeddingLayer
@@ -307,9 +307,9 @@ class POSO(BaseModel):
307
307
  pc_sparse_features: list[str] | None,
308
308
  pc_sequence_features: list[str] | None,
309
309
  tower_params_list: list[dict],
310
- target: list[str],
310
+ target: list[str] | None = None,
311
311
  task: str | list[str] = "binary",
312
- architecture: str = "mlp",
312
+ architecture: Literal["mlp", "mmoe"] = "mlp",
313
313
  # POSO gating defaults
314
314
  gate_hidden_dim: int = 32,
315
315
  gate_scale_factor: float = 2.0,
@@ -323,14 +323,6 @@ class POSO(BaseModel):
323
323
  expert_gate_hidden_dim: int = 32,
324
324
  expert_gate_scale_factor: float = 2.0,
325
325
  gate_use_softmax: bool = True,
326
- optimizer: str = "adam",
327
- optimizer_params: dict | None = None,
328
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
329
- loss_params: dict | list[dict] | None = None,
330
- embedding_l1_reg=0.0,
331
- dense_l1_reg=0.0,
332
- embedding_l2_reg=0.0,
333
- dense_l2_reg=0.0,
334
326
  **kwargs,
335
327
  ):
336
328
  self.nums_task = len(target)
@@ -359,10 +351,6 @@ class POSO(BaseModel):
359
351
  sequence_features=sequence_features,
360
352
  target=target,
361
353
  task=resolved_task,
362
- embedding_l1_reg=embedding_l1_reg,
363
- dense_l1_reg=dense_l1_reg,
364
- embedding_l2_reg=embedding_l2_reg,
365
- dense_l2_reg=dense_l2_reg,
366
354
  **kwargs,
367
355
  )
368
356
 
@@ -373,12 +361,6 @@ class POSO(BaseModel):
373
361
  self.pc_sparse_feature_names = list(pc_sparse_features or [])
374
362
  self.pc_sequence_feature_names = list(pc_sequence_features or [])
375
363
 
376
- if loss is None:
377
- self.loss = "bce"
378
- self.loss = loss
379
-
380
- optimizer_params = optimizer_params or {}
381
-
382
364
  self.main_dense_features = select_features(
383
365
  self.dense_features, self.main_dense_feature_names, "main_dense_features"
384
366
  )
@@ -488,7 +470,7 @@ class POSO(BaseModel):
488
470
  else:
489
471
  self.grad_norm_shared_modules = ["embedding", "mmoe"]
490
472
  self.prediction_layer = TaskHead(
491
- task_type=self.default_task,
473
+ task_type=self.task,
492
474
  task_dims=[1] * self.nums_task,
493
475
  )
494
476
  include_modules = (
@@ -499,12 +481,6 @@ class POSO(BaseModel):
499
481
  self.register_regularization_weights(
500
482
  embedding_attr="embedding", include_modules=include_modules
501
483
  )
502
- self.compile(
503
- optimizer=optimizer,
504
- optimizer_params=optimizer_params,
505
- loss=loss,
506
- loss_params=loss_params,
507
- )
508
484
 
509
485
  def forward(self, x):
510
486
  # Embed main and PC features separately so PC can gate hidden units
@@ -69,19 +69,9 @@ class ShareBottom(BaseModel):
69
69
  tower_params_list: list[dict],
70
70
  target: list[str],
71
71
  task: str | list[str] | None = None,
72
- optimizer: str = "adam",
73
- optimizer_params: dict | None = None,
74
- loss: str | nn.Module | list[str | nn.Module] | None = "bce",
75
- loss_params: dict | list[dict] | None = None,
76
- embedding_l1_reg=0.0,
77
- dense_l1_reg=0.0,
78
- embedding_l2_reg=0.0,
79
- dense_l2_reg=0.0,
80
72
  **kwargs,
81
73
  ):
82
74
 
83
- optimizer_params = optimizer_params or {}
84
-
85
75
  self.nums_task = len(target)
86
76
 
87
77
  resolved_task = task
@@ -102,16 +92,9 @@ class ShareBottom(BaseModel):
102
92
  sequence_features=sequence_features,
103
93
  target=target,
104
94
  task=resolved_task,
105
- embedding_l1_reg=embedding_l1_reg,
106
- dense_l1_reg=dense_l1_reg,
107
- embedding_l2_reg=embedding_l2_reg,
108
- dense_l2_reg=dense_l2_reg,
109
95
  **kwargs,
110
96
  )
111
97
 
112
- self.loss = loss
113
- if self.loss is None:
114
- self.loss = "bce"
115
98
  # Number of tasks
116
99
  self.nums_task = len(target)
117
100
  if len(tower_params_list) != self.nums_task:
@@ -142,18 +125,12 @@ class ShareBottom(BaseModel):
142
125
  tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
143
126
  self.towers.append(tower)
144
127
  self.prediction_layer = TaskHead(
145
- task_type=self.default_task, task_dims=[1] * self.nums_task
128
+ task_type=self.task, task_dims=[1] * self.nums_task
146
129
  )
147
130
  # Register regularization weights
148
131
  self.register_regularization_weights(
149
132
  embedding_attr="embedding", include_modules=["bottom", "towers"]
150
133
  )
151
- self.compile(
152
- optimizer=optimizer,
153
- optimizer_params=optimizer_params,
154
- loss=loss,
155
- loss_params=loss_params,
156
- )
157
134
 
158
135
  def forward(self, x):
159
136
  # Get all embeddings and flatten
@@ -61,36 +61,17 @@ class AFM(BaseModel):
61
61
  sequence_features: list[SequenceFeature] | None = None,
62
62
  attention_dim: int = 32,
63
63
  attention_dropout: float = 0.0,
64
- target: list[str] | str | None = None,
65
- task: str | list[str] | None = None,
66
- optimizer: str = "adam",
67
- optimizer_params: dict | None = None,
68
- loss: str | nn.Module | None = "bce",
69
- loss_params: dict | list[dict] | None = None,
70
- embedding_l1_reg=0.0,
71
- dense_l1_reg=0.0,
72
- embedding_l2_reg=0.0,
73
- dense_l2_reg=0.0,
74
64
  **kwargs,
75
65
  ):
76
66
 
77
67
  dense_features = dense_features or []
78
68
  sparse_features = sparse_features or []
79
69
  sequence_features = sequence_features or []
80
- optimizer_params = optimizer_params or {}
81
- if loss is None:
82
- loss = "bce"
83
70
 
84
71
  super(AFM, self).__init__(
85
72
  dense_features=dense_features,
86
73
  sparse_features=sparse_features,
87
74
  sequence_features=sequence_features,
88
- target=target,
89
- task=task or self.default_task,
90
- embedding_l1_reg=embedding_l1_reg,
91
- dense_l1_reg=dense_l1_reg,
92
- embedding_l2_reg=embedding_l2_reg,
93
- dense_l2_reg=dense_l2_reg,
94
75
  **kwargs,
95
76
  )
96
77
 
@@ -139,8 +120,10 @@ class AFM(BaseModel):
139
120
  self.attention_linear = nn.Linear(self.embedding_dim, attention_dim)
140
121
  self.attention_p = nn.Linear(attention_dim, 1, bias=False)
141
122
  self.attention_dropout = nn.Dropout(attention_dropout)
123
+
142
124
  self.output_projection = nn.Linear(self.embedding_dim, 1, bias=False)
143
- self.prediction_layer = TaskHead(task_type=self.default_task)
125
+
126
+ self.prediction_layer = TaskHead(task_type=self.task)
144
127
  self.input_mask = InputMask()
145
128
 
146
129
  # Register regularization weights
@@ -158,13 +141,6 @@ class AFM(BaseModel):
158
141
  emb.weight for emb in self.first_order_embeddings.values()
159
142
  )
160
143
 
161
- self.compile(
162
- optimizer=optimizer,
163
- optimizer_params=optimizer_params,
164
- loss=loss,
165
- loss_params=loss_params,
166
- )
167
-
168
144
  def forward(self, x):
169
145
  field_emb = self.embedding(
170
146
  x=x, features=self.fm_features, squeeze_dim=False
@@ -82,16 +82,6 @@ class AutoInt(BaseModel):
82
82
  att_head_num: int = 2,
83
83
  att_dropout: float = 0.0,
84
84
  att_use_residual: bool = True,
85
- target: list[str] | None = None,
86
- task: str | list[str] | None = None,
87
- optimizer: str = "adam",
88
- optimizer_params: dict | None = None,
89
- loss: str | nn.Module | None = "bce",
90
- loss_params: dict | list[dict] | None = None,
91
- embedding_l1_reg=0.0,
92
- dense_l1_reg=0.0,
93
- embedding_l2_reg=0.0,
94
- dense_l2_reg=0.0,
95
85
  **kwargs,
96
86
  ):
97
87
 
@@ -99,21 +89,12 @@ class AutoInt(BaseModel):
99
89
  dense_features=dense_features,
100
90
  sparse_features=sparse_features,
101
91
  sequence_features=sequence_features,
102
- target=target,
103
- task=task or self.default_task,
104
- embedding_l1_reg=embedding_l1_reg,
105
- dense_l1_reg=dense_l1_reg,
106
- embedding_l2_reg=embedding_l2_reg,
107
- dense_l2_reg=dense_l2_reg,
108
92
  **kwargs,
109
93
  )
110
94
 
111
- if target is None:
112
- target = []
113
- if optimizer_params is None:
114
- optimizer_params = {}
115
- if loss is None:
116
- loss = "bce"
95
+ dense_features = dense_features or []
96
+ sparse_features = sparse_features or []
97
+ sequence_features = sequence_features or []
117
98
 
118
99
  self.att_layer_num = att_layer_num
119
100
  self.att_embedding_dim = att_embedding_dim
@@ -123,12 +104,7 @@ class AutoInt(BaseModel):
123
104
  # if you want to follow the paper strictly, set dense_features=[]
124
105
  # or modify the code accordingly
125
106
  self.interaction_features = dense_features + sparse_features + sequence_features
126
-
127
- # All features for embedding
128
- self.all_features = dense_features + sparse_features + sequence_features
129
-
130
- # Embedding layer
131
- self.embedding = EmbeddingLayer(features=self.all_features)
107
+ self.embedding = EmbeddingLayer(features=self.interaction_features)
132
108
 
133
109
  # Project embeddings to attention embedding dimension
134
110
  num_fields = len(self.interaction_features)
@@ -159,23 +135,14 @@ class AutoInt(BaseModel):
159
135
  ]
160
136
  )
161
137
 
162
- # Final prediction layer
163
138
  self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
164
- self.prediction_layer = TaskHead(task_type=self.default_task)
139
+ self.prediction_layer = TaskHead(task_type=self.task)
165
140
 
166
- # Register regularization weights
167
141
  self.register_regularization_weights(
168
142
  embedding_attr="embedding",
169
143
  include_modules=["projection_layers", "attention_layers", "fc"],
170
144
  )
171
145
 
172
- self.compile(
173
- optimizer=optimizer,
174
- optimizer_params=optimizer_params,
175
- loss=loss,
176
- loss_params=loss_params,
177
- )
178
-
179
146
  def forward(self, x):
180
147
  # Get embeddings field-by-field so mixed dimensions can be projected safely
181
148
  field_embeddings = []
@@ -96,36 +96,18 @@ class DCN(BaseModel):
96
96
  sequence_features: list[SequenceFeature] | None = None,
97
97
  cross_num: int = 3,
98
98
  mlp_params: dict | None = None,
99
- target: list[str] | str | None = None,
100
- task: str | list[str] | None = None,
101
- optimizer: str = "adam",
102
- optimizer_params: dict | None = None,
103
- loss: str | nn.Module | None = "bce",
104
- loss_params: dict | list[dict] | None = None,
105
- embedding_l1_reg=0.0,
106
- dense_l1_reg=0.0,
107
- embedding_l2_reg=0.0,
108
- dense_l2_reg=0.0,
109
99
  **kwargs,
110
100
  ):
111
101
 
112
102
  dense_features = dense_features or []
113
103
  sparse_features = sparse_features or []
114
104
  sequence_features = sequence_features or []
115
- optimizer_params = optimizer_params or {}
116
- if loss is None:
117
- loss = "bce"
105
+ mlp_params = mlp_params or {}
118
106
 
119
107
  super(DCN, self).__init__(
120
108
  dense_features=dense_features,
121
109
  sparse_features=sparse_features,
122
110
  sequence_features=sequence_features,
123
- target=target,
124
- task=task or self.default_task,
125
- embedding_l1_reg=embedding_l1_reg,
126
- dense_l1_reg=dense_l1_reg,
127
- embedding_l2_reg=embedding_l2_reg,
128
- dense_l2_reg=dense_l2_reg,
129
111
  **kwargs,
130
112
  )
131
113
 
@@ -173,13 +155,6 @@ class DCN(BaseModel):
173
155
  include_modules=["cross_network", "mlp", "final_layer"],
174
156
  )
175
157
 
176
- self.compile(
177
- optimizer=optimizer,
178
- optimizer_params=optimizer_params,
179
- loss=loss,
180
- loss_params=loss_params,
181
- )
182
-
183
158
  def forward(self, x):
184
159
  # Get all embeddings and flatten
185
160
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)