nextrec 0.4.22__py3-none-any.whl → 0.4.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/layers.py +96 -46
- nextrec/basic/metrics.py +128 -114
- nextrec/basic/model.py +94 -91
- nextrec/basic/summary.py +36 -2
- nextrec/data/dataloader.py +2 -0
- nextrec/data/preprocessor.py +137 -5
- nextrec/loss/listwise.py +19 -6
- nextrec/loss/pairwise.py +6 -4
- nextrec/loss/pointwise.py +8 -6
- nextrec/models/multi_task/aitm.py +0 -0
- nextrec/models/multi_task/apg.py +0 -0
- nextrec/models/multi_task/cross_stitch.py +0 -0
- nextrec/models/multi_task/esmm.py +5 -28
- nextrec/models/multi_task/mmoe.py +6 -28
- nextrec/models/multi_task/pepnet.py +335 -0
- nextrec/models/multi_task/ple.py +21 -40
- nextrec/models/multi_task/poso.py +17 -39
- nextrec/models/multi_task/share_bottom.py +5 -28
- nextrec/models/multi_task/snr_trans.py +0 -0
- nextrec/models/ranking/afm.py +3 -27
- nextrec/models/ranking/autoint.py +5 -38
- nextrec/models/ranking/dcn.py +1 -26
- nextrec/models/ranking/dcn_v2.py +6 -34
- nextrec/models/ranking/deepfm.py +2 -29
- nextrec/models/ranking/dien.py +2 -28
- nextrec/models/ranking/din.py +2 -27
- nextrec/models/ranking/eulernet.py +3 -30
- nextrec/models/ranking/ffm.py +0 -26
- nextrec/models/ranking/fibinet.py +8 -32
- nextrec/models/ranking/fm.py +0 -29
- nextrec/models/ranking/lr.py +0 -30
- nextrec/models/ranking/masknet.py +4 -30
- nextrec/models/ranking/pnn.py +4 -28
- nextrec/models/ranking/widedeep.py +0 -32
- nextrec/models/ranking/xdeepfm.py +0 -30
- nextrec/models/retrieval/dssm.py +4 -28
- nextrec/models/retrieval/dssm_v2.py +4 -28
- nextrec/models/retrieval/mind.py +2 -22
- nextrec/models/retrieval/sdm.py +4 -24
- nextrec/models/retrieval/youtube_dnn.py +4 -25
- nextrec/models/sequential/hstu.py +0 -18
- nextrec/utils/model.py +91 -4
- nextrec/utils/types.py +35 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/METADATA +8 -6
- nextrec-0.4.24.dist-info/RECORD +86 -0
- nextrec-0.4.22.dist-info/RECORD +0 -81
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/WHEEL +0 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.22.dist-info → nextrec-0.4.24.dist-info}/licenses/LICENSE +0 -0
nextrec/models/multi_task/ple.py
CHANGED
|
@@ -93,7 +93,7 @@ class CGCLayer(nn.Module):
|
|
|
93
93
|
[
|
|
94
94
|
MLP(
|
|
95
95
|
input_dim=input_dim,
|
|
96
|
-
|
|
96
|
+
output_dim=None,
|
|
97
97
|
**shared_expert_params,
|
|
98
98
|
)
|
|
99
99
|
for _ in range(num_shared_experts)
|
|
@@ -105,7 +105,7 @@ class CGCLayer(nn.Module):
|
|
|
105
105
|
[
|
|
106
106
|
MLP(
|
|
107
107
|
input_dim=input_dim,
|
|
108
|
-
|
|
108
|
+
output_dim=None,
|
|
109
109
|
**params,
|
|
110
110
|
)
|
|
111
111
|
for _ in range(num_specific_experts)
|
|
@@ -202,29 +202,21 @@ class PLE(BaseModel):
|
|
|
202
202
|
|
|
203
203
|
def __init__(
|
|
204
204
|
self,
|
|
205
|
-
dense_features: list[DenseFeature],
|
|
206
|
-
sparse_features: list[SparseFeature],
|
|
207
|
-
sequence_features: list[SequenceFeature],
|
|
208
|
-
shared_expert_params: dict,
|
|
209
|
-
specific_expert_params: dict | list[dict],
|
|
210
|
-
num_shared_experts: int,
|
|
211
|
-
num_specific_experts: int,
|
|
212
|
-
num_levels: int,
|
|
213
|
-
tower_params_list: list[dict],
|
|
214
|
-
target: list[str],
|
|
205
|
+
dense_features: list[DenseFeature] | None = None,
|
|
206
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
207
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
208
|
+
shared_expert_params: dict | None = None,
|
|
209
|
+
specific_expert_params: dict | list[dict] | None = None,
|
|
210
|
+
num_shared_experts: int = 2,
|
|
211
|
+
num_specific_experts: int = 2,
|
|
212
|
+
num_levels: int = 2,
|
|
213
|
+
tower_params_list: list[dict] | None = None,
|
|
214
|
+
target: list[str] | None = None,
|
|
215
215
|
task: str | list[str] | None = None,
|
|
216
|
-
optimizer: str = "adam",
|
|
217
|
-
optimizer_params: dict | None = None,
|
|
218
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
219
|
-
loss_params: dict | list[dict] | None = None,
|
|
220
|
-
embedding_l1_reg=0.0,
|
|
221
|
-
dense_l1_reg=0.0,
|
|
222
|
-
embedding_l2_reg=0.0,
|
|
223
|
-
dense_l2_reg=0.0,
|
|
224
216
|
**kwargs,
|
|
225
217
|
):
|
|
226
218
|
|
|
227
|
-
self.nums_task = len(target)
|
|
219
|
+
self.nums_task = len(target) if target is not None else 1
|
|
228
220
|
|
|
229
221
|
resolved_task = task
|
|
230
222
|
if resolved_task is None:
|
|
@@ -244,23 +236,15 @@ class PLE(BaseModel):
|
|
|
244
236
|
sequence_features=sequence_features,
|
|
245
237
|
target=target,
|
|
246
238
|
task=resolved_task,
|
|
247
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
248
|
-
dense_l1_reg=dense_l1_reg,
|
|
249
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
250
|
-
dense_l2_reg=dense_l2_reg,
|
|
251
239
|
**kwargs,
|
|
252
240
|
)
|
|
253
241
|
|
|
254
|
-
self.loss = loss
|
|
255
|
-
if self.loss is None:
|
|
256
|
-
self.loss = "bce"
|
|
257
242
|
# Number of tasks, experts, and levels
|
|
258
243
|
self.nums_task = len(target)
|
|
259
244
|
self.num_shared_experts = num_shared_experts
|
|
260
245
|
self.num_specific_experts = num_specific_experts
|
|
261
246
|
self.num_levels = num_levels
|
|
262
|
-
|
|
263
|
-
optimizer_params = {}
|
|
247
|
+
|
|
264
248
|
if len(tower_params_list) != self.nums_task:
|
|
265
249
|
raise ValueError(
|
|
266
250
|
f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.nums_task})"
|
|
@@ -275,8 +259,11 @@ class PLE(BaseModel):
|
|
|
275
259
|
# input_dim = emb_dim_total + dense_input_dim
|
|
276
260
|
|
|
277
261
|
# Get expert output dimension
|
|
278
|
-
if
|
|
279
|
-
|
|
262
|
+
if (
|
|
263
|
+
"hidden_dims" in shared_expert_params
|
|
264
|
+
and len(shared_expert_params["hidden_dims"]) > 0
|
|
265
|
+
):
|
|
266
|
+
expert_output_dim = shared_expert_params["hidden_dims"][-1]
|
|
280
267
|
else:
|
|
281
268
|
expert_output_dim = input_dim
|
|
282
269
|
|
|
@@ -299,21 +286,15 @@ class PLE(BaseModel):
|
|
|
299
286
|
# Task-specific towers
|
|
300
287
|
self.towers = nn.ModuleList()
|
|
301
288
|
for tower_params in tower_params_list:
|
|
302
|
-
tower = MLP(input_dim=expert_output_dim,
|
|
289
|
+
tower = MLP(input_dim=expert_output_dim, output_dim=1, **tower_params)
|
|
303
290
|
self.towers.append(tower)
|
|
304
291
|
self.prediction_layer = TaskHead(
|
|
305
|
-
task_type=self.
|
|
292
|
+
task_type=self.task, task_dims=[1] * self.nums_task
|
|
306
293
|
)
|
|
307
294
|
# Register regularization weights
|
|
308
295
|
self.register_regularization_weights(
|
|
309
296
|
embedding_attr="embedding", include_modules=["cgc_layers", "towers"]
|
|
310
297
|
)
|
|
311
|
-
self.compile(
|
|
312
|
-
optimizer=optimizer,
|
|
313
|
-
optimizer_params=optimizer_params,
|
|
314
|
-
loss=self.loss,
|
|
315
|
-
loss_params=loss_params,
|
|
316
|
-
)
|
|
317
298
|
|
|
318
299
|
def forward(self, x):
|
|
319
300
|
# Get all embeddings and flatten
|
|
@@ -42,7 +42,7 @@ from __future__ import annotations
|
|
|
42
42
|
import torch
|
|
43
43
|
import torch.nn as nn
|
|
44
44
|
import torch.nn.functional as F
|
|
45
|
-
|
|
45
|
+
from typing import Literal
|
|
46
46
|
from nextrec.basic.activation import activation_layer
|
|
47
47
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
48
48
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
@@ -127,7 +127,7 @@ class POSOMLP(nn.Module):
|
|
|
127
127
|
"""
|
|
128
128
|
POSO-enhanced MLP that stacks multiple POSOFC layers.
|
|
129
129
|
|
|
130
|
-
|
|
130
|
+
hidden_dims: e.g., [256, 128, 64] means
|
|
131
131
|
in_dim -> 256 -> 128 -> 64
|
|
132
132
|
Each layer has its own gate g_l(pc) following Eq. (11).
|
|
133
133
|
"""
|
|
@@ -136,7 +136,7 @@ class POSOMLP(nn.Module):
|
|
|
136
136
|
self,
|
|
137
137
|
input_dim: int,
|
|
138
138
|
pc_dim: int,
|
|
139
|
-
|
|
139
|
+
hidden_dims: list[int],
|
|
140
140
|
gate_hidden_dim: int = 32,
|
|
141
141
|
scale_factor: float = 2.0,
|
|
142
142
|
activation: str = "relu",
|
|
@@ -147,7 +147,7 @@ class POSOMLP(nn.Module):
|
|
|
147
147
|
|
|
148
148
|
layers = []
|
|
149
149
|
in_dim = input_dim
|
|
150
|
-
for out_dim in
|
|
150
|
+
for out_dim in hidden_dims:
|
|
151
151
|
layers.append(
|
|
152
152
|
POSOFC(
|
|
153
153
|
in_dim=in_dim,
|
|
@@ -213,8 +213,8 @@ class POSOMMoE(nn.Module):
|
|
|
213
213
|
[
|
|
214
214
|
MLP(
|
|
215
215
|
input_dim=input_dim,
|
|
216
|
-
|
|
217
|
-
|
|
216
|
+
output_dim=None,
|
|
217
|
+
hidden_dims=expert_hidden_dims,
|
|
218
218
|
activation=activation,
|
|
219
219
|
dropout=expert_dropout,
|
|
220
220
|
)
|
|
@@ -307,9 +307,9 @@ class POSO(BaseModel):
|
|
|
307
307
|
pc_sparse_features: list[str] | None,
|
|
308
308
|
pc_sequence_features: list[str] | None,
|
|
309
309
|
tower_params_list: list[dict],
|
|
310
|
-
target: list[str],
|
|
310
|
+
target: list[str] | None = None,
|
|
311
311
|
task: str | list[str] = "binary",
|
|
312
|
-
architecture:
|
|
312
|
+
architecture: Literal["mlp", "mmoe"] = "mlp",
|
|
313
313
|
# POSO gating defaults
|
|
314
314
|
gate_hidden_dim: int = 32,
|
|
315
315
|
gate_scale_factor: float = 2.0,
|
|
@@ -323,14 +323,6 @@ class POSO(BaseModel):
|
|
|
323
323
|
expert_gate_hidden_dim: int = 32,
|
|
324
324
|
expert_gate_scale_factor: float = 2.0,
|
|
325
325
|
gate_use_softmax: bool = True,
|
|
326
|
-
optimizer: str = "adam",
|
|
327
|
-
optimizer_params: dict | None = None,
|
|
328
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
329
|
-
loss_params: dict | list[dict] | None = None,
|
|
330
|
-
embedding_l1_reg=0.0,
|
|
331
|
-
dense_l1_reg=0.0,
|
|
332
|
-
embedding_l2_reg=0.0,
|
|
333
|
-
dense_l2_reg=0.0,
|
|
334
326
|
**kwargs,
|
|
335
327
|
):
|
|
336
328
|
self.nums_task = len(target)
|
|
@@ -359,10 +351,6 @@ class POSO(BaseModel):
|
|
|
359
351
|
sequence_features=sequence_features,
|
|
360
352
|
target=target,
|
|
361
353
|
task=resolved_task,
|
|
362
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
363
|
-
dense_l1_reg=dense_l1_reg,
|
|
364
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
365
|
-
dense_l2_reg=dense_l2_reg,
|
|
366
354
|
**kwargs,
|
|
367
355
|
)
|
|
368
356
|
|
|
@@ -373,12 +361,6 @@ class POSO(BaseModel):
|
|
|
373
361
|
self.pc_sparse_feature_names = list(pc_sparse_features or [])
|
|
374
362
|
self.pc_sequence_feature_names = list(pc_sequence_features or [])
|
|
375
363
|
|
|
376
|
-
if loss is None:
|
|
377
|
-
self.loss = "bce"
|
|
378
|
-
self.loss = loss
|
|
379
|
-
|
|
380
|
-
optimizer_params = optimizer_params or {}
|
|
381
|
-
|
|
382
364
|
self.main_dense_features = select_features(
|
|
383
365
|
self.dense_features, self.main_dense_feature_names, "main_dense_features"
|
|
384
366
|
)
|
|
@@ -434,16 +416,16 @@ class POSO(BaseModel):
|
|
|
434
416
|
self.towers = nn.ModuleList()
|
|
435
417
|
self.tower_heads = nn.ModuleList()
|
|
436
418
|
for tower_params in tower_params_list:
|
|
437
|
-
|
|
438
|
-
if not
|
|
419
|
+
hidden_dims = tower_params.get("hidden_dims")
|
|
420
|
+
if not hidden_dims:
|
|
439
421
|
raise ValueError(
|
|
440
|
-
"tower_params must include a non-empty '
|
|
422
|
+
"tower_params must include a non-empty 'hidden_dims' list for POSO-MLP towers."
|
|
441
423
|
)
|
|
442
424
|
dropout = tower_params.get("dropout", 0.0)
|
|
443
425
|
tower = POSOMLP(
|
|
444
426
|
input_dim=self.main_input_dim,
|
|
445
427
|
pc_dim=self.pc_input_dim,
|
|
446
|
-
|
|
428
|
+
hidden_dims=hidden_dims,
|
|
447
429
|
gate_hidden_dim=tower_params.get(
|
|
448
430
|
"gate_hidden_dim", gate_hidden_dim
|
|
449
431
|
),
|
|
@@ -453,7 +435,9 @@ class POSO(BaseModel):
|
|
|
453
435
|
dropout=dropout,
|
|
454
436
|
)
|
|
455
437
|
self.towers.append(tower)
|
|
456
|
-
tower_output_dim =
|
|
438
|
+
tower_output_dim = (
|
|
439
|
+
hidden_dims[-1] if hidden_dims else self.main_input_dim
|
|
440
|
+
)
|
|
457
441
|
self.tower_heads.append(nn.Linear(tower_output_dim, 1))
|
|
458
442
|
else:
|
|
459
443
|
if expert_hidden_dims is None or not expert_hidden_dims:
|
|
@@ -476,7 +460,7 @@ class POSO(BaseModel):
|
|
|
476
460
|
[
|
|
477
461
|
MLP(
|
|
478
462
|
input_dim=self.mmoe.expert_output_dim,
|
|
479
|
-
|
|
463
|
+
output_dim=1,
|
|
480
464
|
**tower_params,
|
|
481
465
|
)
|
|
482
466
|
for tower_params in tower_params_list
|
|
@@ -488,7 +472,7 @@ class POSO(BaseModel):
|
|
|
488
472
|
else:
|
|
489
473
|
self.grad_norm_shared_modules = ["embedding", "mmoe"]
|
|
490
474
|
self.prediction_layer = TaskHead(
|
|
491
|
-
task_type=self.
|
|
475
|
+
task_type=self.task,
|
|
492
476
|
task_dims=[1] * self.nums_task,
|
|
493
477
|
)
|
|
494
478
|
include_modules = (
|
|
@@ -499,12 +483,6 @@ class POSO(BaseModel):
|
|
|
499
483
|
self.register_regularization_weights(
|
|
500
484
|
embedding_attr="embedding", include_modules=include_modules
|
|
501
485
|
)
|
|
502
|
-
self.compile(
|
|
503
|
-
optimizer=optimizer,
|
|
504
|
-
optimizer_params=optimizer_params,
|
|
505
|
-
loss=loss,
|
|
506
|
-
loss_params=loss_params,
|
|
507
|
-
)
|
|
508
486
|
|
|
509
487
|
def forward(self, x):
|
|
510
488
|
# Embed main and PC features separately so PC can gate hidden units
|
|
@@ -69,19 +69,9 @@ class ShareBottom(BaseModel):
|
|
|
69
69
|
tower_params_list: list[dict],
|
|
70
70
|
target: list[str],
|
|
71
71
|
task: str | list[str] | None = None,
|
|
72
|
-
optimizer: str = "adam",
|
|
73
|
-
optimizer_params: dict | None = None,
|
|
74
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
75
|
-
loss_params: dict | list[dict] | None = None,
|
|
76
|
-
embedding_l1_reg=0.0,
|
|
77
|
-
dense_l1_reg=0.0,
|
|
78
|
-
embedding_l2_reg=0.0,
|
|
79
|
-
dense_l2_reg=0.0,
|
|
80
72
|
**kwargs,
|
|
81
73
|
):
|
|
82
74
|
|
|
83
|
-
optimizer_params = optimizer_params or {}
|
|
84
|
-
|
|
85
75
|
self.nums_task = len(target)
|
|
86
76
|
|
|
87
77
|
resolved_task = task
|
|
@@ -102,16 +92,9 @@ class ShareBottom(BaseModel):
|
|
|
102
92
|
sequence_features=sequence_features,
|
|
103
93
|
target=target,
|
|
104
94
|
task=resolved_task,
|
|
105
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
106
|
-
dense_l1_reg=dense_l1_reg,
|
|
107
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
108
|
-
dense_l2_reg=dense_l2_reg,
|
|
109
95
|
**kwargs,
|
|
110
96
|
)
|
|
111
97
|
|
|
112
|
-
self.loss = loss
|
|
113
|
-
if self.loss is None:
|
|
114
|
-
self.loss = "bce"
|
|
115
98
|
# Number of tasks
|
|
116
99
|
self.nums_task = len(target)
|
|
117
100
|
if len(tower_params_list) != self.nums_task:
|
|
@@ -127,33 +110,27 @@ class ShareBottom(BaseModel):
|
|
|
127
110
|
# input_dim = emb_dim_total + dense_input_dim
|
|
128
111
|
|
|
129
112
|
# Shared bottom network
|
|
130
|
-
self.bottom = MLP(input_dim=input_dim,
|
|
113
|
+
self.bottom = MLP(input_dim=input_dim, output_dim=None, **bottom_params)
|
|
131
114
|
self.grad_norm_shared_modules = ["embedding", "bottom"]
|
|
132
115
|
|
|
133
116
|
# Get bottom output dimension
|
|
134
|
-
if "
|
|
135
|
-
bottom_output_dim = bottom_params["
|
|
117
|
+
if "hidden_dims" in bottom_params and len(bottom_params["hidden_dims"]) > 0:
|
|
118
|
+
bottom_output_dim = bottom_params["hidden_dims"][-1]
|
|
136
119
|
else:
|
|
137
120
|
bottom_output_dim = input_dim
|
|
138
121
|
|
|
139
122
|
# Task-specific towers
|
|
140
123
|
self.towers = nn.ModuleList()
|
|
141
124
|
for tower_params in tower_params_list:
|
|
142
|
-
tower = MLP(input_dim=bottom_output_dim,
|
|
125
|
+
tower = MLP(input_dim=bottom_output_dim, output_dim=1, **tower_params)
|
|
143
126
|
self.towers.append(tower)
|
|
144
127
|
self.prediction_layer = TaskHead(
|
|
145
|
-
task_type=self.
|
|
128
|
+
task_type=self.task, task_dims=[1] * self.nums_task
|
|
146
129
|
)
|
|
147
130
|
# Register regularization weights
|
|
148
131
|
self.register_regularization_weights(
|
|
149
132
|
embedding_attr="embedding", include_modules=["bottom", "towers"]
|
|
150
133
|
)
|
|
151
|
-
self.compile(
|
|
152
|
-
optimizer=optimizer,
|
|
153
|
-
optimizer_params=optimizer_params,
|
|
154
|
-
loss=loss,
|
|
155
|
-
loss_params=loss_params,
|
|
156
|
-
)
|
|
157
134
|
|
|
158
135
|
def forward(self, x):
|
|
159
136
|
# Get all embeddings and flatten
|
|
File without changes
|
nextrec/models/ranking/afm.py
CHANGED
|
@@ -61,36 +61,17 @@ class AFM(BaseModel):
|
|
|
61
61
|
sequence_features: list[SequenceFeature] | None = None,
|
|
62
62
|
attention_dim: int = 32,
|
|
63
63
|
attention_dropout: float = 0.0,
|
|
64
|
-
target: list[str] | str | None = None,
|
|
65
|
-
task: str | list[str] | None = None,
|
|
66
|
-
optimizer: str = "adam",
|
|
67
|
-
optimizer_params: dict | None = None,
|
|
68
|
-
loss: str | nn.Module | None = "bce",
|
|
69
|
-
loss_params: dict | list[dict] | None = None,
|
|
70
|
-
embedding_l1_reg=0.0,
|
|
71
|
-
dense_l1_reg=0.0,
|
|
72
|
-
embedding_l2_reg=0.0,
|
|
73
|
-
dense_l2_reg=0.0,
|
|
74
64
|
**kwargs,
|
|
75
65
|
):
|
|
76
66
|
|
|
77
67
|
dense_features = dense_features or []
|
|
78
68
|
sparse_features = sparse_features or []
|
|
79
69
|
sequence_features = sequence_features or []
|
|
80
|
-
optimizer_params = optimizer_params or {}
|
|
81
|
-
if loss is None:
|
|
82
|
-
loss = "bce"
|
|
83
70
|
|
|
84
71
|
super(AFM, self).__init__(
|
|
85
72
|
dense_features=dense_features,
|
|
86
73
|
sparse_features=sparse_features,
|
|
87
74
|
sequence_features=sequence_features,
|
|
88
|
-
target=target,
|
|
89
|
-
task=task or self.default_task,
|
|
90
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
91
|
-
dense_l1_reg=dense_l1_reg,
|
|
92
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
93
|
-
dense_l2_reg=dense_l2_reg,
|
|
94
75
|
**kwargs,
|
|
95
76
|
)
|
|
96
77
|
|
|
@@ -139,8 +120,10 @@ class AFM(BaseModel):
|
|
|
139
120
|
self.attention_linear = nn.Linear(self.embedding_dim, attention_dim)
|
|
140
121
|
self.attention_p = nn.Linear(attention_dim, 1, bias=False)
|
|
141
122
|
self.attention_dropout = nn.Dropout(attention_dropout)
|
|
123
|
+
|
|
142
124
|
self.output_projection = nn.Linear(self.embedding_dim, 1, bias=False)
|
|
143
|
-
|
|
125
|
+
|
|
126
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
144
127
|
self.input_mask = InputMask()
|
|
145
128
|
|
|
146
129
|
# Register regularization weights
|
|
@@ -158,13 +141,6 @@ class AFM(BaseModel):
|
|
|
158
141
|
emb.weight for emb in self.first_order_embeddings.values()
|
|
159
142
|
)
|
|
160
143
|
|
|
161
|
-
self.compile(
|
|
162
|
-
optimizer=optimizer,
|
|
163
|
-
optimizer_params=optimizer_params,
|
|
164
|
-
loss=loss,
|
|
165
|
-
loss_params=loss_params,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
144
|
def forward(self, x):
|
|
169
145
|
field_emb = self.embedding(
|
|
170
146
|
x=x, features=self.fm_features, squeeze_dim=False
|
|
@@ -82,16 +82,6 @@ class AutoInt(BaseModel):
|
|
|
82
82
|
att_head_num: int = 2,
|
|
83
83
|
att_dropout: float = 0.0,
|
|
84
84
|
att_use_residual: bool = True,
|
|
85
|
-
target: list[str] | None = None,
|
|
86
|
-
task: str | list[str] | None = None,
|
|
87
|
-
optimizer: str = "adam",
|
|
88
|
-
optimizer_params: dict | None = None,
|
|
89
|
-
loss: str | nn.Module | None = "bce",
|
|
90
|
-
loss_params: dict | list[dict] | None = None,
|
|
91
|
-
embedding_l1_reg=0.0,
|
|
92
|
-
dense_l1_reg=0.0,
|
|
93
|
-
embedding_l2_reg=0.0,
|
|
94
|
-
dense_l2_reg=0.0,
|
|
95
85
|
**kwargs,
|
|
96
86
|
):
|
|
97
87
|
|
|
@@ -99,21 +89,12 @@ class AutoInt(BaseModel):
|
|
|
99
89
|
dense_features=dense_features,
|
|
100
90
|
sparse_features=sparse_features,
|
|
101
91
|
sequence_features=sequence_features,
|
|
102
|
-
target=target,
|
|
103
|
-
task=task or self.default_task,
|
|
104
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
105
|
-
dense_l1_reg=dense_l1_reg,
|
|
106
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
107
|
-
dense_l2_reg=dense_l2_reg,
|
|
108
92
|
**kwargs,
|
|
109
93
|
)
|
|
110
94
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
optimizer_params = {}
|
|
115
|
-
if loss is None:
|
|
116
|
-
loss = "bce"
|
|
95
|
+
dense_features = dense_features or []
|
|
96
|
+
sparse_features = sparse_features or []
|
|
97
|
+
sequence_features = sequence_features or []
|
|
117
98
|
|
|
118
99
|
self.att_layer_num = att_layer_num
|
|
119
100
|
self.att_embedding_dim = att_embedding_dim
|
|
@@ -123,12 +104,7 @@ class AutoInt(BaseModel):
|
|
|
123
104
|
# if you want to follow the paper strictly, set dense_features=[]
|
|
124
105
|
# or modify the code accordingly
|
|
125
106
|
self.interaction_features = dense_features + sparse_features + sequence_features
|
|
126
|
-
|
|
127
|
-
# All features for embedding
|
|
128
|
-
self.all_features = dense_features + sparse_features + sequence_features
|
|
129
|
-
|
|
130
|
-
# Embedding layer
|
|
131
|
-
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
107
|
+
self.embedding = EmbeddingLayer(features=self.interaction_features)
|
|
132
108
|
|
|
133
109
|
# Project embeddings to attention embedding dimension
|
|
134
110
|
num_fields = len(self.interaction_features)
|
|
@@ -159,23 +135,14 @@ class AutoInt(BaseModel):
|
|
|
159
135
|
]
|
|
160
136
|
)
|
|
161
137
|
|
|
162
|
-
# Final prediction layer
|
|
163
138
|
self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
|
|
164
|
-
self.prediction_layer = TaskHead(task_type=self.
|
|
139
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
165
140
|
|
|
166
|
-
# Register regularization weights
|
|
167
141
|
self.register_regularization_weights(
|
|
168
142
|
embedding_attr="embedding",
|
|
169
143
|
include_modules=["projection_layers", "attention_layers", "fc"],
|
|
170
144
|
)
|
|
171
145
|
|
|
172
|
-
self.compile(
|
|
173
|
-
optimizer=optimizer,
|
|
174
|
-
optimizer_params=optimizer_params,
|
|
175
|
-
loss=loss,
|
|
176
|
-
loss_params=loss_params,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
146
|
def forward(self, x):
|
|
180
147
|
# Get embeddings field-by-field so mixed dimensions can be projected safely
|
|
181
148
|
field_embeddings = []
|
nextrec/models/ranking/dcn.py
CHANGED
|
@@ -96,36 +96,18 @@ class DCN(BaseModel):
|
|
|
96
96
|
sequence_features: list[SequenceFeature] | None = None,
|
|
97
97
|
cross_num: int = 3,
|
|
98
98
|
mlp_params: dict | None = None,
|
|
99
|
-
target: list[str] | str | None = None,
|
|
100
|
-
task: str | list[str] | None = None,
|
|
101
|
-
optimizer: str = "adam",
|
|
102
|
-
optimizer_params: dict | None = None,
|
|
103
|
-
loss: str | nn.Module | None = "bce",
|
|
104
|
-
loss_params: dict | list[dict] | None = None,
|
|
105
|
-
embedding_l1_reg=0.0,
|
|
106
|
-
dense_l1_reg=0.0,
|
|
107
|
-
embedding_l2_reg=0.0,
|
|
108
|
-
dense_l2_reg=0.0,
|
|
109
99
|
**kwargs,
|
|
110
100
|
):
|
|
111
101
|
|
|
112
102
|
dense_features = dense_features or []
|
|
113
103
|
sparse_features = sparse_features or []
|
|
114
104
|
sequence_features = sequence_features or []
|
|
115
|
-
|
|
116
|
-
if loss is None:
|
|
117
|
-
loss = "bce"
|
|
105
|
+
mlp_params = mlp_params or {}
|
|
118
106
|
|
|
119
107
|
super(DCN, self).__init__(
|
|
120
108
|
dense_features=dense_features,
|
|
121
109
|
sparse_features=sparse_features,
|
|
122
110
|
sequence_features=sequence_features,
|
|
123
|
-
target=target,
|
|
124
|
-
task=task or self.default_task,
|
|
125
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
126
|
-
dense_l1_reg=dense_l1_reg,
|
|
127
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
128
|
-
dense_l2_reg=dense_l2_reg,
|
|
129
111
|
**kwargs,
|
|
130
112
|
)
|
|
131
113
|
|
|
@@ -173,13 +155,6 @@ class DCN(BaseModel):
|
|
|
173
155
|
include_modules=["cross_network", "mlp", "final_layer"],
|
|
174
156
|
)
|
|
175
157
|
|
|
176
|
-
self.compile(
|
|
177
|
-
optimizer=optimizer,
|
|
178
|
-
optimizer_params=optimizer_params,
|
|
179
|
-
loss=loss,
|
|
180
|
-
loss_params=loss_params,
|
|
181
|
-
)
|
|
182
|
-
|
|
183
158
|
def forward(self, x):
|
|
184
159
|
# Get all embeddings and flatten
|
|
185
160
|
input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
|
nextrec/models/ranking/dcn_v2.py
CHANGED
|
@@ -45,7 +45,7 @@ DCN v2 在原始 DCN 基础上,将标量交叉权重升级为向量/矩阵参
|
|
|
45
45
|
|
|
46
46
|
import torch
|
|
47
47
|
import torch.nn as nn
|
|
48
|
-
|
|
48
|
+
from typing import Literal
|
|
49
49
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
50
50
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
51
51
|
from nextrec.basic.heads import TaskHead
|
|
@@ -193,53 +193,32 @@ class DCNv2(BaseModel):
|
|
|
193
193
|
sparse_features: list[SparseFeature] | None = None,
|
|
194
194
|
sequence_features: list[SequenceFeature] | None = None,
|
|
195
195
|
cross_num: int = 3,
|
|
196
|
-
cross_type:
|
|
197
|
-
architecture:
|
|
196
|
+
cross_type: Literal["matrix", "mix", "low_rank"] = "matrix",
|
|
197
|
+
architecture: Literal["parallel", "stacked"] = "parallel",
|
|
198
198
|
low_rank: int = 32,
|
|
199
199
|
num_experts: int = 4,
|
|
200
200
|
mlp_params: dict | None = None,
|
|
201
|
-
target: list[str] | str | None = None,
|
|
202
|
-
task: str | list[str] | None = None,
|
|
203
|
-
optimizer: str = "adam",
|
|
204
|
-
optimizer_params: dict | None = None,
|
|
205
|
-
loss: str | nn.Module | None = "bce",
|
|
206
|
-
loss_params: dict | list[dict] | None = None,
|
|
207
|
-
embedding_l1_reg=0.0,
|
|
208
|
-
dense_l1_reg=0.0,
|
|
209
|
-
embedding_l2_reg=0.0,
|
|
210
|
-
dense_l2_reg=0.0,
|
|
211
201
|
**kwargs,
|
|
212
202
|
):
|
|
213
203
|
dense_features = dense_features or []
|
|
214
204
|
sparse_features = sparse_features or []
|
|
215
205
|
sequence_features = sequence_features or []
|
|
216
|
-
|
|
217
|
-
if loss is None:
|
|
218
|
-
loss = "bce"
|
|
206
|
+
mlp_params = mlp_params or {}
|
|
219
207
|
|
|
220
208
|
super(DCNv2, self).__init__(
|
|
221
209
|
dense_features=dense_features,
|
|
222
210
|
sparse_features=sparse_features,
|
|
223
211
|
sequence_features=sequence_features,
|
|
224
|
-
target=target,
|
|
225
|
-
task=task or self.default_task,
|
|
226
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
227
|
-
dense_l1_reg=dense_l1_reg,
|
|
228
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
229
|
-
dense_l2_reg=dense_l2_reg,
|
|
230
212
|
**kwargs,
|
|
231
213
|
)
|
|
232
214
|
|
|
233
|
-
self.all_features = dense_features + sparse_features + sequence_features
|
|
234
215
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
235
216
|
input_dim = self.embedding.input_dim
|
|
236
217
|
|
|
237
|
-
architecture = architecture.lower()
|
|
238
218
|
if architecture not in {"parallel", "stacked"}:
|
|
239
219
|
raise ValueError("architecture must be 'parallel' or 'stacked'.")
|
|
240
220
|
self.architecture = architecture
|
|
241
221
|
|
|
242
|
-
cross_type = cross_type.lower()
|
|
243
222
|
if cross_type == "matrix":
|
|
244
223
|
self.cross_network = CrossNetV2(input_dim=input_dim, num_layers=cross_num)
|
|
245
224
|
elif cross_type in {"mix", "low_rank"}:
|
|
@@ -255,7 +234,7 @@ class DCNv2(BaseModel):
|
|
|
255
234
|
if mlp_params is not None:
|
|
256
235
|
self.use_dnn = True
|
|
257
236
|
dnn_params = dict(mlp_params)
|
|
258
|
-
dnn_params.setdefault("
|
|
237
|
+
dnn_params.setdefault("output_dim", None)
|
|
259
238
|
self.mlp = MLP(input_dim=input_dim, **dnn_params)
|
|
260
239
|
deep_dim = self.mlp.output_dim
|
|
261
240
|
final_input_dim = (
|
|
@@ -271,20 +250,13 @@ class DCNv2(BaseModel):
|
|
|
271
250
|
final_input_dim = input_dim
|
|
272
251
|
|
|
273
252
|
self.final_layer = nn.Linear(final_input_dim, 1)
|
|
274
|
-
self.prediction_layer = TaskHead(task_type=self.
|
|
253
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
275
254
|
|
|
276
255
|
self.register_regularization_weights(
|
|
277
256
|
embedding_attr="embedding",
|
|
278
257
|
include_modules=["cross_network", "mlp", "final_layer"],
|
|
279
258
|
)
|
|
280
259
|
|
|
281
|
-
self.compile(
|
|
282
|
-
optimizer=optimizer,
|
|
283
|
-
optimizer_params=optimizer_params,
|
|
284
|
-
loss=loss,
|
|
285
|
-
loss_params=loss_params,
|
|
286
|
-
)
|
|
287
|
-
|
|
288
260
|
def forward(self, x) -> torch.Tensor:
|
|
289
261
|
input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
|
|
290
262
|
cross_out = self.cross_network(input_flat)
|