nextrec 0.4.22__py3-none-any.whl → 0.4.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/metrics.py +1 -2
- nextrec/basic/model.py +68 -73
- nextrec/basic/summary.py +36 -2
- nextrec/data/preprocessor.py +137 -5
- nextrec/loss/listwise.py +19 -6
- nextrec/loss/pairwise.py +6 -4
- nextrec/loss/pointwise.py +8 -6
- nextrec/models/multi_task/esmm.py +3 -26
- nextrec/models/multi_task/mmoe.py +2 -24
- nextrec/models/multi_task/ple.py +13 -35
- nextrec/models/multi_task/poso.py +4 -28
- nextrec/models/multi_task/share_bottom.py +1 -24
- nextrec/models/ranking/afm.py +3 -27
- nextrec/models/ranking/autoint.py +5 -38
- nextrec/models/ranking/dcn.py +1 -26
- nextrec/models/ranking/dcn_v2.py +5 -33
- nextrec/models/ranking/deepfm.py +2 -29
- nextrec/models/ranking/dien.py +2 -28
- nextrec/models/ranking/din.py +2 -27
- nextrec/models/ranking/eulernet.py +3 -30
- nextrec/models/ranking/ffm.py +0 -26
- nextrec/models/ranking/fibinet.py +8 -32
- nextrec/models/ranking/fm.py +0 -29
- nextrec/models/ranking/lr.py +0 -30
- nextrec/models/ranking/masknet.py +4 -30
- nextrec/models/ranking/pnn.py +4 -28
- nextrec/models/ranking/widedeep.py +0 -32
- nextrec/models/ranking/xdeepfm.py +0 -30
- nextrec/models/retrieval/dssm.py +0 -24
- nextrec/models/retrieval/dssm_v2.py +0 -24
- nextrec/models/retrieval/mind.py +0 -20
- nextrec/models/retrieval/sdm.py +0 -20
- nextrec/models/retrieval/youtube_dnn.py +0 -21
- nextrec/models/sequential/hstu.py +0 -18
- nextrec/utils/model.py +79 -1
- nextrec/utils/types.py +35 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/METADATA +7 -5
- nextrec-0.4.23.dist-info/RECORD +81 -0
- nextrec-0.4.22.dist-info/RECORD +0 -81
- {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/WHEEL +0 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.23.dist-info}/licenses/LICENSE +0 -0
nextrec/loss/pairwise.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
Pairwise loss functions for learning-to-rank and matching tasks.
|
|
3
3
|
|
|
4
4
|
Date: create on 27/10/2025
|
|
5
|
-
Checkpoint: edit on 29/
|
|
5
|
+
Checkpoint: edit on 29/12/2025
|
|
6
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
7
7
|
"""
|
|
8
8
|
|
|
@@ -18,7 +18,7 @@ class BPRLoss(nn.Module):
|
|
|
18
18
|
Bayesian Personalized Ranking loss with support for multiple negatives.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
def __init__(self, reduction:
|
|
21
|
+
def __init__(self, reduction: Literal["mean", "sum", "none"] = "mean"):
|
|
22
22
|
super().__init__()
|
|
23
23
|
self.reduction = reduction
|
|
24
24
|
|
|
@@ -42,7 +42,9 @@ class HingeLoss(nn.Module):
|
|
|
42
42
|
Hinge loss for pairwise ranking.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
def __init__(
|
|
45
|
+
def __init__(
|
|
46
|
+
self, margin: float = 1.0, reduction: Literal["mean", "sum", "none"] = "mean"
|
|
47
|
+
):
|
|
46
48
|
super().__init__()
|
|
47
49
|
self.margin = margin
|
|
48
50
|
self.reduction = reduction
|
|
@@ -69,7 +71,7 @@ class TripletLoss(nn.Module):
|
|
|
69
71
|
def __init__(
|
|
70
72
|
self,
|
|
71
73
|
margin: float = 1.0,
|
|
72
|
-
reduction:
|
|
74
|
+
reduction: Literal["mean", "sum", "none"] = "mean",
|
|
73
75
|
distance: Literal["euclidean", "cosine"] = "euclidean",
|
|
74
76
|
):
|
|
75
77
|
super().__init__()
|
nextrec/loss/pointwise.py
CHANGED
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
Pointwise loss functions, including imbalance-aware variants.
|
|
3
3
|
|
|
4
4
|
Date: create on 27/10/2025
|
|
5
|
-
Checkpoint: edit on 29/
|
|
5
|
+
Checkpoint: edit on 29/12/2025
|
|
6
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
from typing import Optional, Sequence
|
|
9
|
+
from typing import Optional, Sequence, Literal
|
|
10
10
|
|
|
11
11
|
import torch
|
|
12
12
|
import torch.nn as nn
|
|
@@ -18,7 +18,9 @@ class CosineContrastiveLoss(nn.Module):
|
|
|
18
18
|
Contrastive loss using cosine similarity for positive/negative pairs.
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
|
-
def __init__(
|
|
21
|
+
def __init__(
|
|
22
|
+
self, margin: float = 0.5, reduction: Literal["mean", "sum", "none"] = "mean"
|
|
23
|
+
):
|
|
22
24
|
super().__init__()
|
|
23
25
|
self.margin = margin
|
|
24
26
|
self.reduction = reduction
|
|
@@ -50,7 +52,7 @@ class WeightedBCELoss(nn.Module):
|
|
|
50
52
|
def __init__(
|
|
51
53
|
self,
|
|
52
54
|
pos_weight: float | torch.Tensor | None = None,
|
|
53
|
-
reduction:
|
|
55
|
+
reduction: Literal["mean", "sum", "none"] = "mean",
|
|
54
56
|
logits: bool = False,
|
|
55
57
|
auto_balance: bool = False,
|
|
56
58
|
):
|
|
@@ -110,7 +112,7 @@ class FocalLoss(nn.Module):
|
|
|
110
112
|
self,
|
|
111
113
|
gamma: float = 2.0,
|
|
112
114
|
alpha: Optional[float | Sequence[float] | torch.Tensor] = None,
|
|
113
|
-
reduction:
|
|
115
|
+
reduction: Literal["mean", "sum", "none"] = "mean",
|
|
114
116
|
logits: bool = False,
|
|
115
117
|
):
|
|
116
118
|
super().__init__()
|
|
@@ -187,7 +189,7 @@ class ClassBalancedFocalLoss(nn.Module):
|
|
|
187
189
|
class_counts: Sequence[int] | torch.Tensor,
|
|
188
190
|
beta: float = 0.9999,
|
|
189
191
|
gamma: float = 2.0,
|
|
190
|
-
reduction:
|
|
192
|
+
reduction: Literal["mean", "sum", "none"] = "mean",
|
|
191
193
|
):
|
|
192
194
|
super().__init__()
|
|
193
195
|
self.gamma = gamma
|
|
@@ -42,12 +42,12 @@ CVR 预测 P(conversion|click),二者相乘得到 CTCVR 并在曝光标签上
|
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
44
|
import torch
|
|
45
|
-
import torch.nn as nn
|
|
46
45
|
|
|
47
46
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
48
47
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
49
48
|
from nextrec.basic.heads import TaskHead
|
|
50
49
|
from nextrec.basic.model import BaseModel
|
|
50
|
+
from nextrec.utils.types import TaskTypeName
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
class ESMM(BaseModel):
|
|
@@ -77,23 +77,12 @@ class ESMM(BaseModel):
|
|
|
77
77
|
sequence_features: list[SequenceFeature],
|
|
78
78
|
ctr_params: dict,
|
|
79
79
|
cvr_params: dict,
|
|
80
|
+
task: TaskTypeName | list[TaskTypeName] | None = None,
|
|
80
81
|
target: list[str] | None = None, # Note: ctcvr = ctr * cvr
|
|
81
|
-
task: list[str] | None = None,
|
|
82
|
-
optimizer: str = "adam",
|
|
83
|
-
optimizer_params: dict | None = None,
|
|
84
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
85
|
-
loss_params: dict | list[dict] | None = None,
|
|
86
|
-
embedding_l1_reg=0.0,
|
|
87
|
-
dense_l1_reg=0.0,
|
|
88
|
-
embedding_l2_reg=0.0,
|
|
89
|
-
dense_l2_reg=0.0,
|
|
90
82
|
**kwargs,
|
|
91
83
|
):
|
|
92
84
|
|
|
93
85
|
target = target or ["ctr", "ctcvr"]
|
|
94
|
-
optimizer_params = optimizer_params or {}
|
|
95
|
-
if loss is None:
|
|
96
|
-
loss = "bce"
|
|
97
86
|
|
|
98
87
|
if len(target) != 2:
|
|
99
88
|
raise ValueError(
|
|
@@ -120,15 +109,9 @@ class ESMM(BaseModel):
|
|
|
120
109
|
sequence_features=sequence_features,
|
|
121
110
|
target=target,
|
|
122
111
|
task=resolved_task, # Both CTR and CTCVR are binary classification
|
|
123
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
124
|
-
dense_l1_reg=dense_l1_reg,
|
|
125
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
126
|
-
dense_l2_reg=dense_l2_reg,
|
|
127
112
|
**kwargs,
|
|
128
113
|
)
|
|
129
114
|
|
|
130
|
-
self.loss = loss
|
|
131
|
-
|
|
132
115
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
133
116
|
input_dim = self.embedding.input_dim
|
|
134
117
|
|
|
@@ -138,17 +121,11 @@ class ESMM(BaseModel):
|
|
|
138
121
|
# CVR tower
|
|
139
122
|
self.cvr_tower = MLP(input_dim=input_dim, output_layer=True, **cvr_params)
|
|
140
123
|
self.grad_norm_shared_modules = ["embedding"]
|
|
141
|
-
self.prediction_layer = TaskHead(task_type=self.
|
|
124
|
+
self.prediction_layer = TaskHead(task_type=self.task, task_dims=[1, 1])
|
|
142
125
|
# Register regularization weights
|
|
143
126
|
self.register_regularization_weights(
|
|
144
127
|
embedding_attr="embedding", include_modules=["ctr_tower", "cvr_tower"]
|
|
145
128
|
)
|
|
146
|
-
self.compile(
|
|
147
|
-
optimizer=optimizer,
|
|
148
|
-
optimizer_params=optimizer_params,
|
|
149
|
-
loss=loss,
|
|
150
|
-
loss_params=loss_params,
|
|
151
|
-
)
|
|
152
129
|
|
|
153
130
|
def forward(self, x):
|
|
154
131
|
# Get all embeddings and flatten
|
|
@@ -82,14 +82,6 @@ class MMOE(BaseModel):
|
|
|
82
82
|
tower_params_list: list[dict] | None = None,
|
|
83
83
|
target: list[str] | str | None = None,
|
|
84
84
|
task: str | list[str] = "binary",
|
|
85
|
-
optimizer: str = "adam",
|
|
86
|
-
optimizer_params: dict | None = None,
|
|
87
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
88
|
-
loss_params: dict | list[dict] | None = None,
|
|
89
|
-
embedding_l1_reg=0.0,
|
|
90
|
-
dense_l1_reg=0.0,
|
|
91
|
-
embedding_l2_reg=0.0,
|
|
92
|
-
dense_l2_reg=0.0,
|
|
93
85
|
**kwargs,
|
|
94
86
|
):
|
|
95
87
|
|
|
@@ -98,9 +90,7 @@ class MMOE(BaseModel):
|
|
|
98
90
|
sequence_features = sequence_features or []
|
|
99
91
|
expert_params = expert_params or {}
|
|
100
92
|
tower_params_list = tower_params_list or []
|
|
101
|
-
|
|
102
|
-
if loss is None:
|
|
103
|
-
loss = "bce"
|
|
93
|
+
|
|
104
94
|
if target is None:
|
|
105
95
|
target = []
|
|
106
96
|
elif isinstance(target, str):
|
|
@@ -126,15 +116,9 @@ class MMOE(BaseModel):
|
|
|
126
116
|
sequence_features=sequence_features,
|
|
127
117
|
target=target,
|
|
128
118
|
task=resolved_task,
|
|
129
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
130
|
-
dense_l1_reg=dense_l1_reg,
|
|
131
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
132
|
-
dense_l2_reg=dense_l2_reg,
|
|
133
119
|
**kwargs,
|
|
134
120
|
)
|
|
135
121
|
|
|
136
|
-
self.loss = loss
|
|
137
|
-
|
|
138
122
|
# Number of tasks and experts
|
|
139
123
|
self.nums_task = len(target)
|
|
140
124
|
self.num_experts = num_experts
|
|
@@ -172,18 +156,12 @@ class MMOE(BaseModel):
|
|
|
172
156
|
tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
|
|
173
157
|
self.towers.append(tower)
|
|
174
158
|
self.prediction_layer = TaskHead(
|
|
175
|
-
task_type=self.
|
|
159
|
+
task_type=self.task, task_dims=[1] * self.nums_task
|
|
176
160
|
)
|
|
177
161
|
# Register regularization weights
|
|
178
162
|
self.register_regularization_weights(
|
|
179
163
|
embedding_attr="embedding", include_modules=["experts", "gates", "towers"]
|
|
180
164
|
)
|
|
181
|
-
self.compile(
|
|
182
|
-
optimizer=optimizer,
|
|
183
|
-
optimizer_params=optimizer_params,
|
|
184
|
-
loss=self.loss,
|
|
185
|
-
loss_params=loss_params,
|
|
186
|
-
)
|
|
187
165
|
|
|
188
166
|
def forward(self, x):
|
|
189
167
|
# Get all embeddings and flatten
|
nextrec/models/multi_task/ple.py
CHANGED
|
@@ -202,29 +202,21 @@ class PLE(BaseModel):
|
|
|
202
202
|
|
|
203
203
|
def __init__(
|
|
204
204
|
self,
|
|
205
|
-
dense_features: list[DenseFeature],
|
|
206
|
-
sparse_features: list[SparseFeature],
|
|
207
|
-
sequence_features: list[SequenceFeature],
|
|
208
|
-
shared_expert_params: dict,
|
|
209
|
-
specific_expert_params: dict | list[dict],
|
|
210
|
-
num_shared_experts: int,
|
|
211
|
-
num_specific_experts: int,
|
|
212
|
-
num_levels: int,
|
|
213
|
-
tower_params_list: list[dict],
|
|
214
|
-
target: list[str],
|
|
205
|
+
dense_features: list[DenseFeature] | None = None,
|
|
206
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
207
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
208
|
+
shared_expert_params: dict | None = None,
|
|
209
|
+
specific_expert_params: dict | list[dict] | None = None,
|
|
210
|
+
num_shared_experts: int = 2,
|
|
211
|
+
num_specific_experts: int = 2,
|
|
212
|
+
num_levels: int = 2,
|
|
213
|
+
tower_params_list: list[dict] | None = None,
|
|
214
|
+
target: list[str] | None = None,
|
|
215
215
|
task: str | list[str] | None = None,
|
|
216
|
-
optimizer: str = "adam",
|
|
217
|
-
optimizer_params: dict | None = None,
|
|
218
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
219
|
-
loss_params: dict | list[dict] | None = None,
|
|
220
|
-
embedding_l1_reg=0.0,
|
|
221
|
-
dense_l1_reg=0.0,
|
|
222
|
-
embedding_l2_reg=0.0,
|
|
223
|
-
dense_l2_reg=0.0,
|
|
224
216
|
**kwargs,
|
|
225
217
|
):
|
|
226
218
|
|
|
227
|
-
self.nums_task = len(target)
|
|
219
|
+
self.nums_task = len(target) if target is not None else 1
|
|
228
220
|
|
|
229
221
|
resolved_task = task
|
|
230
222
|
if resolved_task is None:
|
|
@@ -244,23 +236,15 @@ class PLE(BaseModel):
|
|
|
244
236
|
sequence_features=sequence_features,
|
|
245
237
|
target=target,
|
|
246
238
|
task=resolved_task,
|
|
247
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
248
|
-
dense_l1_reg=dense_l1_reg,
|
|
249
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
250
|
-
dense_l2_reg=dense_l2_reg,
|
|
251
239
|
**kwargs,
|
|
252
240
|
)
|
|
253
241
|
|
|
254
|
-
self.loss = loss
|
|
255
|
-
if self.loss is None:
|
|
256
|
-
self.loss = "bce"
|
|
257
242
|
# Number of tasks, experts, and levels
|
|
258
243
|
self.nums_task = len(target)
|
|
259
244
|
self.num_shared_experts = num_shared_experts
|
|
260
245
|
self.num_specific_experts = num_specific_experts
|
|
261
246
|
self.num_levels = num_levels
|
|
262
|
-
|
|
263
|
-
optimizer_params = {}
|
|
247
|
+
|
|
264
248
|
if len(tower_params_list) != self.nums_task:
|
|
265
249
|
raise ValueError(
|
|
266
250
|
f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
|
|
@@ -302,18 +286,12 @@ class PLE(BaseModel):
|
|
|
302
286
|
tower = MLP(input_dim=expert_output_dim, output_layer=True, **tower_params)
|
|
303
287
|
self.towers.append(tower)
|
|
304
288
|
self.prediction_layer = TaskHead(
|
|
305
|
-
task_type=self.
|
|
289
|
+
task_type=self.task, task_dims=[1] * self.nums_task
|
|
306
290
|
)
|
|
307
291
|
# Register regularization weights
|
|
308
292
|
self.register_regularization_weights(
|
|
309
293
|
embedding_attr="embedding", include_modules=["cgc_layers", "towers"]
|
|
310
294
|
)
|
|
311
|
-
self.compile(
|
|
312
|
-
optimizer=optimizer,
|
|
313
|
-
optimizer_params=optimizer_params,
|
|
314
|
-
loss=self.loss,
|
|
315
|
-
loss_params=loss_params,
|
|
316
|
-
)
|
|
317
295
|
|
|
318
296
|
def forward(self, x):
|
|
319
297
|
# Get all embeddings and flatten
|
|
@@ -42,7 +42,7 @@ from __future__ import annotations
|
|
|
42
42
|
import torch
|
|
43
43
|
import torch.nn as nn
|
|
44
44
|
import torch.nn.functional as F
|
|
45
|
-
|
|
45
|
+
from typing import Literal
|
|
46
46
|
from nextrec.basic.activation import activation_layer
|
|
47
47
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
48
48
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
@@ -307,9 +307,9 @@ class POSO(BaseModel):
|
|
|
307
307
|
pc_sparse_features: list[str] | None,
|
|
308
308
|
pc_sequence_features: list[str] | None,
|
|
309
309
|
tower_params_list: list[dict],
|
|
310
|
-
target: list[str],
|
|
310
|
+
target: list[str] | None = None,
|
|
311
311
|
task: str | list[str] = "binary",
|
|
312
|
-
architecture:
|
|
312
|
+
architecture: Literal["mlp", "mmoe"] = "mlp",
|
|
313
313
|
# POSO gating defaults
|
|
314
314
|
gate_hidden_dim: int = 32,
|
|
315
315
|
gate_scale_factor: float = 2.0,
|
|
@@ -323,14 +323,6 @@ class POSO(BaseModel):
|
|
|
323
323
|
expert_gate_hidden_dim: int = 32,
|
|
324
324
|
expert_gate_scale_factor: float = 2.0,
|
|
325
325
|
gate_use_softmax: bool = True,
|
|
326
|
-
optimizer: str = "adam",
|
|
327
|
-
optimizer_params: dict | None = None,
|
|
328
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
329
|
-
loss_params: dict | list[dict] | None = None,
|
|
330
|
-
embedding_l1_reg=0.0,
|
|
331
|
-
dense_l1_reg=0.0,
|
|
332
|
-
embedding_l2_reg=0.0,
|
|
333
|
-
dense_l2_reg=0.0,
|
|
334
326
|
**kwargs,
|
|
335
327
|
):
|
|
336
328
|
self.nums_task = len(target)
|
|
@@ -359,10 +351,6 @@ class POSO(BaseModel):
|
|
|
359
351
|
sequence_features=sequence_features,
|
|
360
352
|
target=target,
|
|
361
353
|
task=resolved_task,
|
|
362
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
363
|
-
dense_l1_reg=dense_l1_reg,
|
|
364
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
365
|
-
dense_l2_reg=dense_l2_reg,
|
|
366
354
|
**kwargs,
|
|
367
355
|
)
|
|
368
356
|
|
|
@@ -373,12 +361,6 @@ class POSO(BaseModel):
|
|
|
373
361
|
self.pc_sparse_feature_names = list(pc_sparse_features or [])
|
|
374
362
|
self.pc_sequence_feature_names = list(pc_sequence_features or [])
|
|
375
363
|
|
|
376
|
-
if loss is None:
|
|
377
|
-
self.loss = "bce"
|
|
378
|
-
self.loss = loss
|
|
379
|
-
|
|
380
|
-
optimizer_params = optimizer_params or {}
|
|
381
|
-
|
|
382
364
|
self.main_dense_features = select_features(
|
|
383
365
|
self.dense_features, self.main_dense_feature_names, "main_dense_features"
|
|
384
366
|
)
|
|
@@ -488,7 +470,7 @@ class POSO(BaseModel):
|
|
|
488
470
|
else:
|
|
489
471
|
self.grad_norm_shared_modules = ["embedding", "mmoe"]
|
|
490
472
|
self.prediction_layer = TaskHead(
|
|
491
|
-
task_type=self.
|
|
473
|
+
task_type=self.task,
|
|
492
474
|
task_dims=[1] * self.nums_task,
|
|
493
475
|
)
|
|
494
476
|
include_modules = (
|
|
@@ -499,12 +481,6 @@ class POSO(BaseModel):
|
|
|
499
481
|
self.register_regularization_weights(
|
|
500
482
|
embedding_attr="embedding", include_modules=include_modules
|
|
501
483
|
)
|
|
502
|
-
self.compile(
|
|
503
|
-
optimizer=optimizer,
|
|
504
|
-
optimizer_params=optimizer_params,
|
|
505
|
-
loss=loss,
|
|
506
|
-
loss_params=loss_params,
|
|
507
|
-
)
|
|
508
484
|
|
|
509
485
|
def forward(self, x):
|
|
510
486
|
# Embed main and PC features separately so PC can gate hidden units
|
|
@@ -69,19 +69,9 @@ class ShareBottom(BaseModel):
|
|
|
69
69
|
tower_params_list: list[dict],
|
|
70
70
|
target: list[str],
|
|
71
71
|
task: str | list[str] | None = None,
|
|
72
|
-
optimizer: str = "adam",
|
|
73
|
-
optimizer_params: dict | None = None,
|
|
74
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
75
|
-
loss_params: dict | list[dict] | None = None,
|
|
76
|
-
embedding_l1_reg=0.0,
|
|
77
|
-
dense_l1_reg=0.0,
|
|
78
|
-
embedding_l2_reg=0.0,
|
|
79
|
-
dense_l2_reg=0.0,
|
|
80
72
|
**kwargs,
|
|
81
73
|
):
|
|
82
74
|
|
|
83
|
-
optimizer_params = optimizer_params or {}
|
|
84
|
-
|
|
85
75
|
self.nums_task = len(target)
|
|
86
76
|
|
|
87
77
|
resolved_task = task
|
|
@@ -102,16 +92,9 @@ class ShareBottom(BaseModel):
|
|
|
102
92
|
sequence_features=sequence_features,
|
|
103
93
|
target=target,
|
|
104
94
|
task=resolved_task,
|
|
105
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
106
|
-
dense_l1_reg=dense_l1_reg,
|
|
107
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
108
|
-
dense_l2_reg=dense_l2_reg,
|
|
109
95
|
**kwargs,
|
|
110
96
|
)
|
|
111
97
|
|
|
112
|
-
self.loss = loss
|
|
113
|
-
if self.loss is None:
|
|
114
|
-
self.loss = "bce"
|
|
115
98
|
# Number of tasks
|
|
116
99
|
self.nums_task = len(target)
|
|
117
100
|
if len(tower_params_list) != self.nums_task:
|
|
@@ -142,18 +125,12 @@ class ShareBottom(BaseModel):
|
|
|
142
125
|
tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
|
|
143
126
|
self.towers.append(tower)
|
|
144
127
|
self.prediction_layer = TaskHead(
|
|
145
|
-
task_type=self.
|
|
128
|
+
task_type=self.task, task_dims=[1] * self.nums_task
|
|
146
129
|
)
|
|
147
130
|
# Register regularization weights
|
|
148
131
|
self.register_regularization_weights(
|
|
149
132
|
embedding_attr="embedding", include_modules=["bottom", "towers"]
|
|
150
133
|
)
|
|
151
|
-
self.compile(
|
|
152
|
-
optimizer=optimizer,
|
|
153
|
-
optimizer_params=optimizer_params,
|
|
154
|
-
loss=loss,
|
|
155
|
-
loss_params=loss_params,
|
|
156
|
-
)
|
|
157
134
|
|
|
158
135
|
def forward(self, x):
|
|
159
136
|
# Get all embeddings and flatten
|
nextrec/models/ranking/afm.py
CHANGED
|
@@ -61,36 +61,17 @@ class AFM(BaseModel):
|
|
|
61
61
|
sequence_features: list[SequenceFeature] | None = None,
|
|
62
62
|
attention_dim: int = 32,
|
|
63
63
|
attention_dropout: float = 0.0,
|
|
64
|
-
target: list[str] | str | None = None,
|
|
65
|
-
task: str | list[str] | None = None,
|
|
66
|
-
optimizer: str = "adam",
|
|
67
|
-
optimizer_params: dict | None = None,
|
|
68
|
-
loss: str | nn.Module | None = "bce",
|
|
69
|
-
loss_params: dict | list[dict] | None = None,
|
|
70
|
-
embedding_l1_reg=0.0,
|
|
71
|
-
dense_l1_reg=0.0,
|
|
72
|
-
embedding_l2_reg=0.0,
|
|
73
|
-
dense_l2_reg=0.0,
|
|
74
64
|
**kwargs,
|
|
75
65
|
):
|
|
76
66
|
|
|
77
67
|
dense_features = dense_features or []
|
|
78
68
|
sparse_features = sparse_features or []
|
|
79
69
|
sequence_features = sequence_features or []
|
|
80
|
-
optimizer_params = optimizer_params or {}
|
|
81
|
-
if loss is None:
|
|
82
|
-
loss = "bce"
|
|
83
70
|
|
|
84
71
|
super(AFM, self).__init__(
|
|
85
72
|
dense_features=dense_features,
|
|
86
73
|
sparse_features=sparse_features,
|
|
87
74
|
sequence_features=sequence_features,
|
|
88
|
-
target=target,
|
|
89
|
-
task=task or self.default_task,
|
|
90
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
91
|
-
dense_l1_reg=dense_l1_reg,
|
|
92
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
93
|
-
dense_l2_reg=dense_l2_reg,
|
|
94
75
|
**kwargs,
|
|
95
76
|
)
|
|
96
77
|
|
|
@@ -139,8 +120,10 @@ class AFM(BaseModel):
|
|
|
139
120
|
self.attention_linear = nn.Linear(self.embedding_dim, attention_dim)
|
|
140
121
|
self.attention_p = nn.Linear(attention_dim, 1, bias=False)
|
|
141
122
|
self.attention_dropout = nn.Dropout(attention_dropout)
|
|
123
|
+
|
|
142
124
|
self.output_projection = nn.Linear(self.embedding_dim, 1, bias=False)
|
|
143
|
-
|
|
125
|
+
|
|
126
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
144
127
|
self.input_mask = InputMask()
|
|
145
128
|
|
|
146
129
|
# Register regularization weights
|
|
@@ -158,13 +141,6 @@ class AFM(BaseModel):
|
|
|
158
141
|
emb.weight for emb in self.first_order_embeddings.values()
|
|
159
142
|
)
|
|
160
143
|
|
|
161
|
-
self.compile(
|
|
162
|
-
optimizer=optimizer,
|
|
163
|
-
optimizer_params=optimizer_params,
|
|
164
|
-
loss=loss,
|
|
165
|
-
loss_params=loss_params,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
144
|
def forward(self, x):
|
|
169
145
|
field_emb = self.embedding(
|
|
170
146
|
x=x, features=self.fm_features, squeeze_dim=False
|
|
@@ -82,16 +82,6 @@ class AutoInt(BaseModel):
|
|
|
82
82
|
att_head_num: int = 2,
|
|
83
83
|
att_dropout: float = 0.0,
|
|
84
84
|
att_use_residual: bool = True,
|
|
85
|
-
target: list[str] | None = None,
|
|
86
|
-
task: str | list[str] | None = None,
|
|
87
|
-
optimizer: str = "adam",
|
|
88
|
-
optimizer_params: dict | None = None,
|
|
89
|
-
loss: str | nn.Module | None = "bce",
|
|
90
|
-
loss_params: dict | list[dict] | None = None,
|
|
91
|
-
embedding_l1_reg=0.0,
|
|
92
|
-
dense_l1_reg=0.0,
|
|
93
|
-
embedding_l2_reg=0.0,
|
|
94
|
-
dense_l2_reg=0.0,
|
|
95
85
|
**kwargs,
|
|
96
86
|
):
|
|
97
87
|
|
|
@@ -99,21 +89,12 @@ class AutoInt(BaseModel):
|
|
|
99
89
|
dense_features=dense_features,
|
|
100
90
|
sparse_features=sparse_features,
|
|
101
91
|
sequence_features=sequence_features,
|
|
102
|
-
target=target,
|
|
103
|
-
task=task or self.default_task,
|
|
104
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
105
|
-
dense_l1_reg=dense_l1_reg,
|
|
106
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
107
|
-
dense_l2_reg=dense_l2_reg,
|
|
108
92
|
**kwargs,
|
|
109
93
|
)
|
|
110
94
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
optimizer_params = {}
|
|
115
|
-
if loss is None:
|
|
116
|
-
loss = "bce"
|
|
95
|
+
dense_features = dense_features or []
|
|
96
|
+
sparse_features = sparse_features or []
|
|
97
|
+
sequence_features = sequence_features or []
|
|
117
98
|
|
|
118
99
|
self.att_layer_num = att_layer_num
|
|
119
100
|
self.att_embedding_dim = att_embedding_dim
|
|
@@ -123,12 +104,7 @@ class AutoInt(BaseModel):
|
|
|
123
104
|
# if you want to follow the paper strictly, set dense_features=[]
|
|
124
105
|
# or modify the code accordingly
|
|
125
106
|
self.interaction_features = dense_features + sparse_features + sequence_features
|
|
126
|
-
|
|
127
|
-
# All features for embedding
|
|
128
|
-
self.all_features = dense_features + sparse_features + sequence_features
|
|
129
|
-
|
|
130
|
-
# Embedding layer
|
|
131
|
-
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
107
|
+
self.embedding = EmbeddingLayer(features=self.interaction_features)
|
|
132
108
|
|
|
133
109
|
# Project embeddings to attention embedding dimension
|
|
134
110
|
num_fields = len(self.interaction_features)
|
|
@@ -159,23 +135,14 @@ class AutoInt(BaseModel):
|
|
|
159
135
|
]
|
|
160
136
|
)
|
|
161
137
|
|
|
162
|
-
# Final prediction layer
|
|
163
138
|
self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
|
|
164
|
-
self.prediction_layer = TaskHead(task_type=self.
|
|
139
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
165
140
|
|
|
166
|
-
# Register regularization weights
|
|
167
141
|
self.register_regularization_weights(
|
|
168
142
|
embedding_attr="embedding",
|
|
169
143
|
include_modules=["projection_layers", "attention_layers", "fc"],
|
|
170
144
|
)
|
|
171
145
|
|
|
172
|
-
self.compile(
|
|
173
|
-
optimizer=optimizer,
|
|
174
|
-
optimizer_params=optimizer_params,
|
|
175
|
-
loss=loss,
|
|
176
|
-
loss_params=loss_params,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
146
|
def forward(self, x):
|
|
180
147
|
# Get embeddings field-by-field so mixed dimensions can be projected safely
|
|
181
148
|
field_embeddings = []
|
nextrec/models/ranking/dcn.py
CHANGED
|
@@ -96,36 +96,18 @@ class DCN(BaseModel):
|
|
|
96
96
|
sequence_features: list[SequenceFeature] | None = None,
|
|
97
97
|
cross_num: int = 3,
|
|
98
98
|
mlp_params: dict | None = None,
|
|
99
|
-
target: list[str] | str | None = None,
|
|
100
|
-
task: str | list[str] | None = None,
|
|
101
|
-
optimizer: str = "adam",
|
|
102
|
-
optimizer_params: dict | None = None,
|
|
103
|
-
loss: str | nn.Module | None = "bce",
|
|
104
|
-
loss_params: dict | list[dict] | None = None,
|
|
105
|
-
embedding_l1_reg=0.0,
|
|
106
|
-
dense_l1_reg=0.0,
|
|
107
|
-
embedding_l2_reg=0.0,
|
|
108
|
-
dense_l2_reg=0.0,
|
|
109
99
|
**kwargs,
|
|
110
100
|
):
|
|
111
101
|
|
|
112
102
|
dense_features = dense_features or []
|
|
113
103
|
sparse_features = sparse_features or []
|
|
114
104
|
sequence_features = sequence_features or []
|
|
115
|
-
|
|
116
|
-
if loss is None:
|
|
117
|
-
loss = "bce"
|
|
105
|
+
mlp_params = mlp_params or {}
|
|
118
106
|
|
|
119
107
|
super(DCN, self).__init__(
|
|
120
108
|
dense_features=dense_features,
|
|
121
109
|
sparse_features=sparse_features,
|
|
122
110
|
sequence_features=sequence_features,
|
|
123
|
-
target=target,
|
|
124
|
-
task=task or self.default_task,
|
|
125
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
126
|
-
dense_l1_reg=dense_l1_reg,
|
|
127
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
128
|
-
dense_l2_reg=dense_l2_reg,
|
|
129
111
|
**kwargs,
|
|
130
112
|
)
|
|
131
113
|
|
|
@@ -173,13 +155,6 @@ class DCN(BaseModel):
|
|
|
173
155
|
include_modules=["cross_network", "mlp", "final_layer"],
|
|
174
156
|
)
|
|
175
157
|
|
|
176
|
-
self.compile(
|
|
177
|
-
optimizer=optimizer,
|
|
178
|
-
optimizer_params=optimizer_params,
|
|
179
|
-
loss=loss,
|
|
180
|
-
loss_params=loss_params,
|
|
181
|
-
)
|
|
182
|
-
|
|
183
158
|
def forward(self, x):
|
|
184
159
|
# Get all embeddings and flatten
|
|
185
160
|
input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
|