nextrec 0.4.21__py3-none-any.whl → 0.4.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +1 -1
- nextrec/basic/heads.py +2 -3
- nextrec/basic/metrics.py +1 -2
- nextrec/basic/model.py +115 -80
- nextrec/basic/summary.py +36 -2
- nextrec/data/preprocessor.py +137 -5
- nextrec/loss/__init__.py +0 -4
- nextrec/loss/grad_norm.py +3 -3
- nextrec/loss/listwise.py +19 -6
- nextrec/loss/pairwise.py +6 -4
- nextrec/loss/pointwise.py +8 -6
- nextrec/models/multi_task/esmm.py +3 -26
- nextrec/models/multi_task/mmoe.py +2 -24
- nextrec/models/multi_task/ple.py +13 -35
- nextrec/models/multi_task/poso.py +4 -28
- nextrec/models/multi_task/share_bottom.py +1 -24
- nextrec/models/ranking/afm.py +3 -27
- nextrec/models/ranking/autoint.py +5 -38
- nextrec/models/ranking/dcn.py +1 -26
- nextrec/models/ranking/dcn_v2.py +5 -33
- nextrec/models/ranking/deepfm.py +2 -29
- nextrec/models/ranking/dien.py +2 -28
- nextrec/models/ranking/din.py +2 -27
- nextrec/models/ranking/eulernet.py +3 -30
- nextrec/models/ranking/ffm.py +0 -26
- nextrec/models/ranking/fibinet.py +8 -32
- nextrec/models/ranking/fm.py +0 -29
- nextrec/models/ranking/lr.py +0 -30
- nextrec/models/ranking/masknet.py +4 -30
- nextrec/models/ranking/pnn.py +4 -28
- nextrec/models/ranking/widedeep.py +0 -32
- nextrec/models/ranking/xdeepfm.py +0 -30
- nextrec/models/retrieval/dssm.py +0 -24
- nextrec/models/retrieval/dssm_v2.py +0 -24
- nextrec/models/retrieval/mind.py +0 -20
- nextrec/models/retrieval/sdm.py +0 -20
- nextrec/models/retrieval/youtube_dnn.py +0 -21
- nextrec/models/sequential/hstu.py +0 -18
- nextrec/utils/__init__.py +5 -1
- nextrec/{loss/loss_utils.py → utils/loss.py} +17 -7
- nextrec/utils/model.py +79 -1
- nextrec/utils/types.py +62 -23
- {nextrec-0.4.21.dist-info → nextrec-0.4.23.dist-info}/METADATA +8 -6
- nextrec-0.4.23.dist-info/RECORD +81 -0
- nextrec-0.4.21.dist-info/RECORD +0 -81
- {nextrec-0.4.21.dist-info → nextrec-0.4.23.dist-info}/WHEEL +0 -0
- {nextrec-0.4.21.dist-info → nextrec-0.4.23.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.21.dist-info → nextrec-0.4.23.dist-info}/licenses/LICENSE +0 -0
|
@@ -42,7 +42,7 @@ from __future__ import annotations
|
|
|
42
42
|
import torch
|
|
43
43
|
import torch.nn as nn
|
|
44
44
|
import torch.nn.functional as F
|
|
45
|
-
|
|
45
|
+
from typing import Literal
|
|
46
46
|
from nextrec.basic.activation import activation_layer
|
|
47
47
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
48
48
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
@@ -307,9 +307,9 @@ class POSO(BaseModel):
|
|
|
307
307
|
pc_sparse_features: list[str] | None,
|
|
308
308
|
pc_sequence_features: list[str] | None,
|
|
309
309
|
tower_params_list: list[dict],
|
|
310
|
-
target: list[str],
|
|
310
|
+
target: list[str] | None = None,
|
|
311
311
|
task: str | list[str] = "binary",
|
|
312
|
-
architecture:
|
|
312
|
+
architecture: Literal["mlp", "mmoe"] = "mlp",
|
|
313
313
|
# POSO gating defaults
|
|
314
314
|
gate_hidden_dim: int = 32,
|
|
315
315
|
gate_scale_factor: float = 2.0,
|
|
@@ -323,14 +323,6 @@ class POSO(BaseModel):
|
|
|
323
323
|
expert_gate_hidden_dim: int = 32,
|
|
324
324
|
expert_gate_scale_factor: float = 2.0,
|
|
325
325
|
gate_use_softmax: bool = True,
|
|
326
|
-
optimizer: str = "adam",
|
|
327
|
-
optimizer_params: dict | None = None,
|
|
328
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
329
|
-
loss_params: dict | list[dict] | None = None,
|
|
330
|
-
embedding_l1_reg=0.0,
|
|
331
|
-
dense_l1_reg=0.0,
|
|
332
|
-
embedding_l2_reg=0.0,
|
|
333
|
-
dense_l2_reg=0.0,
|
|
334
326
|
**kwargs,
|
|
335
327
|
):
|
|
336
328
|
self.nums_task = len(target)
|
|
@@ -359,10 +351,6 @@ class POSO(BaseModel):
|
|
|
359
351
|
sequence_features=sequence_features,
|
|
360
352
|
target=target,
|
|
361
353
|
task=resolved_task,
|
|
362
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
363
|
-
dense_l1_reg=dense_l1_reg,
|
|
364
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
365
|
-
dense_l2_reg=dense_l2_reg,
|
|
366
354
|
**kwargs,
|
|
367
355
|
)
|
|
368
356
|
|
|
@@ -373,12 +361,6 @@ class POSO(BaseModel):
|
|
|
373
361
|
self.pc_sparse_feature_names = list(pc_sparse_features or [])
|
|
374
362
|
self.pc_sequence_feature_names = list(pc_sequence_features or [])
|
|
375
363
|
|
|
376
|
-
if loss is None:
|
|
377
|
-
self.loss = "bce"
|
|
378
|
-
self.loss = loss
|
|
379
|
-
|
|
380
|
-
optimizer_params = optimizer_params or {}
|
|
381
|
-
|
|
382
364
|
self.main_dense_features = select_features(
|
|
383
365
|
self.dense_features, self.main_dense_feature_names, "main_dense_features"
|
|
384
366
|
)
|
|
@@ -488,7 +470,7 @@ class POSO(BaseModel):
|
|
|
488
470
|
else:
|
|
489
471
|
self.grad_norm_shared_modules = ["embedding", "mmoe"]
|
|
490
472
|
self.prediction_layer = TaskHead(
|
|
491
|
-
task_type=self.
|
|
473
|
+
task_type=self.task,
|
|
492
474
|
task_dims=[1] * self.nums_task,
|
|
493
475
|
)
|
|
494
476
|
include_modules = (
|
|
@@ -499,12 +481,6 @@ class POSO(BaseModel):
|
|
|
499
481
|
self.register_regularization_weights(
|
|
500
482
|
embedding_attr="embedding", include_modules=include_modules
|
|
501
483
|
)
|
|
502
|
-
self.compile(
|
|
503
|
-
optimizer=optimizer,
|
|
504
|
-
optimizer_params=optimizer_params,
|
|
505
|
-
loss=loss,
|
|
506
|
-
loss_params=loss_params,
|
|
507
|
-
)
|
|
508
484
|
|
|
509
485
|
def forward(self, x):
|
|
510
486
|
# Embed main and PC features separately so PC can gate hidden units
|
|
@@ -69,19 +69,9 @@ class ShareBottom(BaseModel):
|
|
|
69
69
|
tower_params_list: list[dict],
|
|
70
70
|
target: list[str],
|
|
71
71
|
task: str | list[str] | None = None,
|
|
72
|
-
optimizer: str = "adam",
|
|
73
|
-
optimizer_params: dict | None = None,
|
|
74
|
-
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
75
|
-
loss_params: dict | list[dict] | None = None,
|
|
76
|
-
embedding_l1_reg=0.0,
|
|
77
|
-
dense_l1_reg=0.0,
|
|
78
|
-
embedding_l2_reg=0.0,
|
|
79
|
-
dense_l2_reg=0.0,
|
|
80
72
|
**kwargs,
|
|
81
73
|
):
|
|
82
74
|
|
|
83
|
-
optimizer_params = optimizer_params or {}
|
|
84
|
-
|
|
85
75
|
self.nums_task = len(target)
|
|
86
76
|
|
|
87
77
|
resolved_task = task
|
|
@@ -102,16 +92,9 @@ class ShareBottom(BaseModel):
|
|
|
102
92
|
sequence_features=sequence_features,
|
|
103
93
|
target=target,
|
|
104
94
|
task=resolved_task,
|
|
105
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
106
|
-
dense_l1_reg=dense_l1_reg,
|
|
107
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
108
|
-
dense_l2_reg=dense_l2_reg,
|
|
109
95
|
**kwargs,
|
|
110
96
|
)
|
|
111
97
|
|
|
112
|
-
self.loss = loss
|
|
113
|
-
if self.loss is None:
|
|
114
|
-
self.loss = "bce"
|
|
115
98
|
# Number of tasks
|
|
116
99
|
self.nums_task = len(target)
|
|
117
100
|
if len(tower_params_list) != self.nums_task:
|
|
@@ -142,18 +125,12 @@ class ShareBottom(BaseModel):
|
|
|
142
125
|
tower = MLP(input_dim=bottom_output_dim, output_layer=True, **tower_params)
|
|
143
126
|
self.towers.append(tower)
|
|
144
127
|
self.prediction_layer = TaskHead(
|
|
145
|
-
task_type=self.
|
|
128
|
+
task_type=self.task, task_dims=[1] * self.nums_task
|
|
146
129
|
)
|
|
147
130
|
# Register regularization weights
|
|
148
131
|
self.register_regularization_weights(
|
|
149
132
|
embedding_attr="embedding", include_modules=["bottom", "towers"]
|
|
150
133
|
)
|
|
151
|
-
self.compile(
|
|
152
|
-
optimizer=optimizer,
|
|
153
|
-
optimizer_params=optimizer_params,
|
|
154
|
-
loss=loss,
|
|
155
|
-
loss_params=loss_params,
|
|
156
|
-
)
|
|
157
134
|
|
|
158
135
|
def forward(self, x):
|
|
159
136
|
# Get all embeddings and flatten
|
nextrec/models/ranking/afm.py
CHANGED
|
@@ -61,36 +61,17 @@ class AFM(BaseModel):
|
|
|
61
61
|
sequence_features: list[SequenceFeature] | None = None,
|
|
62
62
|
attention_dim: int = 32,
|
|
63
63
|
attention_dropout: float = 0.0,
|
|
64
|
-
target: list[str] | str | None = None,
|
|
65
|
-
task: str | list[str] | None = None,
|
|
66
|
-
optimizer: str = "adam",
|
|
67
|
-
optimizer_params: dict | None = None,
|
|
68
|
-
loss: str | nn.Module | None = "bce",
|
|
69
|
-
loss_params: dict | list[dict] | None = None,
|
|
70
|
-
embedding_l1_reg=0.0,
|
|
71
|
-
dense_l1_reg=0.0,
|
|
72
|
-
embedding_l2_reg=0.0,
|
|
73
|
-
dense_l2_reg=0.0,
|
|
74
64
|
**kwargs,
|
|
75
65
|
):
|
|
76
66
|
|
|
77
67
|
dense_features = dense_features or []
|
|
78
68
|
sparse_features = sparse_features or []
|
|
79
69
|
sequence_features = sequence_features or []
|
|
80
|
-
optimizer_params = optimizer_params or {}
|
|
81
|
-
if loss is None:
|
|
82
|
-
loss = "bce"
|
|
83
70
|
|
|
84
71
|
super(AFM, self).__init__(
|
|
85
72
|
dense_features=dense_features,
|
|
86
73
|
sparse_features=sparse_features,
|
|
87
74
|
sequence_features=sequence_features,
|
|
88
|
-
target=target,
|
|
89
|
-
task=task or self.default_task,
|
|
90
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
91
|
-
dense_l1_reg=dense_l1_reg,
|
|
92
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
93
|
-
dense_l2_reg=dense_l2_reg,
|
|
94
75
|
**kwargs,
|
|
95
76
|
)
|
|
96
77
|
|
|
@@ -139,8 +120,10 @@ class AFM(BaseModel):
|
|
|
139
120
|
self.attention_linear = nn.Linear(self.embedding_dim, attention_dim)
|
|
140
121
|
self.attention_p = nn.Linear(attention_dim, 1, bias=False)
|
|
141
122
|
self.attention_dropout = nn.Dropout(attention_dropout)
|
|
123
|
+
|
|
142
124
|
self.output_projection = nn.Linear(self.embedding_dim, 1, bias=False)
|
|
143
|
-
|
|
125
|
+
|
|
126
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
144
127
|
self.input_mask = InputMask()
|
|
145
128
|
|
|
146
129
|
# Register regularization weights
|
|
@@ -158,13 +141,6 @@ class AFM(BaseModel):
|
|
|
158
141
|
emb.weight for emb in self.first_order_embeddings.values()
|
|
159
142
|
)
|
|
160
143
|
|
|
161
|
-
self.compile(
|
|
162
|
-
optimizer=optimizer,
|
|
163
|
-
optimizer_params=optimizer_params,
|
|
164
|
-
loss=loss,
|
|
165
|
-
loss_params=loss_params,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
144
|
def forward(self, x):
|
|
169
145
|
field_emb = self.embedding(
|
|
170
146
|
x=x, features=self.fm_features, squeeze_dim=False
|
|
@@ -82,16 +82,6 @@ class AutoInt(BaseModel):
|
|
|
82
82
|
att_head_num: int = 2,
|
|
83
83
|
att_dropout: float = 0.0,
|
|
84
84
|
att_use_residual: bool = True,
|
|
85
|
-
target: list[str] | None = None,
|
|
86
|
-
task: str | list[str] | None = None,
|
|
87
|
-
optimizer: str = "adam",
|
|
88
|
-
optimizer_params: dict | None = None,
|
|
89
|
-
loss: str | nn.Module | None = "bce",
|
|
90
|
-
loss_params: dict | list[dict] | None = None,
|
|
91
|
-
embedding_l1_reg=0.0,
|
|
92
|
-
dense_l1_reg=0.0,
|
|
93
|
-
embedding_l2_reg=0.0,
|
|
94
|
-
dense_l2_reg=0.0,
|
|
95
85
|
**kwargs,
|
|
96
86
|
):
|
|
97
87
|
|
|
@@ -99,21 +89,12 @@ class AutoInt(BaseModel):
|
|
|
99
89
|
dense_features=dense_features,
|
|
100
90
|
sparse_features=sparse_features,
|
|
101
91
|
sequence_features=sequence_features,
|
|
102
|
-
target=target,
|
|
103
|
-
task=task or self.default_task,
|
|
104
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
105
|
-
dense_l1_reg=dense_l1_reg,
|
|
106
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
107
|
-
dense_l2_reg=dense_l2_reg,
|
|
108
92
|
**kwargs,
|
|
109
93
|
)
|
|
110
94
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
optimizer_params = {}
|
|
115
|
-
if loss is None:
|
|
116
|
-
loss = "bce"
|
|
95
|
+
dense_features = dense_features or []
|
|
96
|
+
sparse_features = sparse_features or []
|
|
97
|
+
sequence_features = sequence_features or []
|
|
117
98
|
|
|
118
99
|
self.att_layer_num = att_layer_num
|
|
119
100
|
self.att_embedding_dim = att_embedding_dim
|
|
@@ -123,12 +104,7 @@ class AutoInt(BaseModel):
|
|
|
123
104
|
# if you want to follow the paper strictly, set dense_features=[]
|
|
124
105
|
# or modify the code accordingly
|
|
125
106
|
self.interaction_features = dense_features + sparse_features + sequence_features
|
|
126
|
-
|
|
127
|
-
# All features for embedding
|
|
128
|
-
self.all_features = dense_features + sparse_features + sequence_features
|
|
129
|
-
|
|
130
|
-
# Embedding layer
|
|
131
|
-
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
107
|
+
self.embedding = EmbeddingLayer(features=self.interaction_features)
|
|
132
108
|
|
|
133
109
|
# Project embeddings to attention embedding dimension
|
|
134
110
|
num_fields = len(self.interaction_features)
|
|
@@ -159,23 +135,14 @@ class AutoInt(BaseModel):
|
|
|
159
135
|
]
|
|
160
136
|
)
|
|
161
137
|
|
|
162
|
-
# Final prediction layer
|
|
163
138
|
self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
|
|
164
|
-
self.prediction_layer = TaskHead(task_type=self.
|
|
139
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
165
140
|
|
|
166
|
-
# Register regularization weights
|
|
167
141
|
self.register_regularization_weights(
|
|
168
142
|
embedding_attr="embedding",
|
|
169
143
|
include_modules=["projection_layers", "attention_layers", "fc"],
|
|
170
144
|
)
|
|
171
145
|
|
|
172
|
-
self.compile(
|
|
173
|
-
optimizer=optimizer,
|
|
174
|
-
optimizer_params=optimizer_params,
|
|
175
|
-
loss=loss,
|
|
176
|
-
loss_params=loss_params,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
146
|
def forward(self, x):
|
|
180
147
|
# Get embeddings field-by-field so mixed dimensions can be projected safely
|
|
181
148
|
field_embeddings = []
|
nextrec/models/ranking/dcn.py
CHANGED
|
@@ -96,36 +96,18 @@ class DCN(BaseModel):
|
|
|
96
96
|
sequence_features: list[SequenceFeature] | None = None,
|
|
97
97
|
cross_num: int = 3,
|
|
98
98
|
mlp_params: dict | None = None,
|
|
99
|
-
target: list[str] | str | None = None,
|
|
100
|
-
task: str | list[str] | None = None,
|
|
101
|
-
optimizer: str = "adam",
|
|
102
|
-
optimizer_params: dict | None = None,
|
|
103
|
-
loss: str | nn.Module | None = "bce",
|
|
104
|
-
loss_params: dict | list[dict] | None = None,
|
|
105
|
-
embedding_l1_reg=0.0,
|
|
106
|
-
dense_l1_reg=0.0,
|
|
107
|
-
embedding_l2_reg=0.0,
|
|
108
|
-
dense_l2_reg=0.0,
|
|
109
99
|
**kwargs,
|
|
110
100
|
):
|
|
111
101
|
|
|
112
102
|
dense_features = dense_features or []
|
|
113
103
|
sparse_features = sparse_features or []
|
|
114
104
|
sequence_features = sequence_features or []
|
|
115
|
-
|
|
116
|
-
if loss is None:
|
|
117
|
-
loss = "bce"
|
|
105
|
+
mlp_params = mlp_params or {}
|
|
118
106
|
|
|
119
107
|
super(DCN, self).__init__(
|
|
120
108
|
dense_features=dense_features,
|
|
121
109
|
sparse_features=sparse_features,
|
|
122
110
|
sequence_features=sequence_features,
|
|
123
|
-
target=target,
|
|
124
|
-
task=task or self.default_task,
|
|
125
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
126
|
-
dense_l1_reg=dense_l1_reg,
|
|
127
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
128
|
-
dense_l2_reg=dense_l2_reg,
|
|
129
111
|
**kwargs,
|
|
130
112
|
)
|
|
131
113
|
|
|
@@ -173,13 +155,6 @@ class DCN(BaseModel):
|
|
|
173
155
|
include_modules=["cross_network", "mlp", "final_layer"],
|
|
174
156
|
)
|
|
175
157
|
|
|
176
|
-
self.compile(
|
|
177
|
-
optimizer=optimizer,
|
|
178
|
-
optimizer_params=optimizer_params,
|
|
179
|
-
loss=loss,
|
|
180
|
-
loss_params=loss_params,
|
|
181
|
-
)
|
|
182
|
-
|
|
183
158
|
def forward(self, x):
|
|
184
159
|
# Get all embeddings and flatten
|
|
185
160
|
input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
|
nextrec/models/ranking/dcn_v2.py
CHANGED
|
@@ -45,7 +45,7 @@ DCN v2 在原始 DCN 基础上,将标量交叉权重升级为向量/矩阵参
|
|
|
45
45
|
|
|
46
46
|
import torch
|
|
47
47
|
import torch.nn as nn
|
|
48
|
-
|
|
48
|
+
from typing import Literal
|
|
49
49
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
50
50
|
from nextrec.basic.layers import MLP, EmbeddingLayer
|
|
51
51
|
from nextrec.basic.heads import TaskHead
|
|
@@ -193,53 +193,32 @@ class DCNv2(BaseModel):
|
|
|
193
193
|
sparse_features: list[SparseFeature] | None = None,
|
|
194
194
|
sequence_features: list[SequenceFeature] | None = None,
|
|
195
195
|
cross_num: int = 3,
|
|
196
|
-
cross_type:
|
|
197
|
-
architecture:
|
|
196
|
+
cross_type: Literal["matrix", "mix", "low_rank"] = "matrix",
|
|
197
|
+
architecture: Literal["parallel", "stacked"] = "parallel",
|
|
198
198
|
low_rank: int = 32,
|
|
199
199
|
num_experts: int = 4,
|
|
200
200
|
mlp_params: dict | None = None,
|
|
201
|
-
target: list[str] | str | None = None,
|
|
202
|
-
task: str | list[str] | None = None,
|
|
203
|
-
optimizer: str = "adam",
|
|
204
|
-
optimizer_params: dict | None = None,
|
|
205
|
-
loss: str | nn.Module | None = "bce",
|
|
206
|
-
loss_params: dict | list[dict] | None = None,
|
|
207
|
-
embedding_l1_reg=0.0,
|
|
208
|
-
dense_l1_reg=0.0,
|
|
209
|
-
embedding_l2_reg=0.0,
|
|
210
|
-
dense_l2_reg=0.0,
|
|
211
201
|
**kwargs,
|
|
212
202
|
):
|
|
213
203
|
dense_features = dense_features or []
|
|
214
204
|
sparse_features = sparse_features or []
|
|
215
205
|
sequence_features = sequence_features or []
|
|
216
|
-
|
|
217
|
-
if loss is None:
|
|
218
|
-
loss = "bce"
|
|
206
|
+
mlp_params = mlp_params or {}
|
|
219
207
|
|
|
220
208
|
super(DCNv2, self).__init__(
|
|
221
209
|
dense_features=dense_features,
|
|
222
210
|
sparse_features=sparse_features,
|
|
223
211
|
sequence_features=sequence_features,
|
|
224
|
-
target=target,
|
|
225
|
-
task=task or self.default_task,
|
|
226
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
227
|
-
dense_l1_reg=dense_l1_reg,
|
|
228
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
229
|
-
dense_l2_reg=dense_l2_reg,
|
|
230
212
|
**kwargs,
|
|
231
213
|
)
|
|
232
214
|
|
|
233
|
-
self.all_features = dense_features + sparse_features + sequence_features
|
|
234
215
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
235
216
|
input_dim = self.embedding.input_dim
|
|
236
217
|
|
|
237
|
-
architecture = architecture.lower()
|
|
238
218
|
if architecture not in {"parallel", "stacked"}:
|
|
239
219
|
raise ValueError("architecture must be 'parallel' or 'stacked'.")
|
|
240
220
|
self.architecture = architecture
|
|
241
221
|
|
|
242
|
-
cross_type = cross_type.lower()
|
|
243
222
|
if cross_type == "matrix":
|
|
244
223
|
self.cross_network = CrossNetV2(input_dim=input_dim, num_layers=cross_num)
|
|
245
224
|
elif cross_type in {"mix", "low_rank"}:
|
|
@@ -271,20 +250,13 @@ class DCNv2(BaseModel):
|
|
|
271
250
|
final_input_dim = input_dim
|
|
272
251
|
|
|
273
252
|
self.final_layer = nn.Linear(final_input_dim, 1)
|
|
274
|
-
self.prediction_layer = TaskHead(task_type=self.
|
|
253
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
275
254
|
|
|
276
255
|
self.register_regularization_weights(
|
|
277
256
|
embedding_attr="embedding",
|
|
278
257
|
include_modules=["cross_network", "mlp", "final_layer"],
|
|
279
258
|
)
|
|
280
259
|
|
|
281
|
-
self.compile(
|
|
282
|
-
optimizer=optimizer,
|
|
283
|
-
optimizer_params=optimizer_params,
|
|
284
|
-
loss=loss,
|
|
285
|
-
loss_params=loss_params,
|
|
286
|
-
)
|
|
287
|
-
|
|
288
260
|
def forward(self, x) -> torch.Tensor:
|
|
289
261
|
input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
|
|
290
262
|
cross_out = self.cross_network(input_flat)
|
nextrec/models/ranking/deepfm.py
CHANGED
|
@@ -42,8 +42,6 @@ embedding,无需手工构造交叉特征即可端到端训练,常用于 CTR/
|
|
|
42
42
|
- CTR/CVR 任务的常用强基线
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
import torch.nn as nn
|
|
46
|
-
|
|
47
45
|
from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
48
46
|
from nextrec.basic.layers import FM, LR, MLP, EmbeddingLayer
|
|
49
47
|
from nextrec.basic.heads import TaskHead
|
|
@@ -65,16 +63,6 @@ class DeepFM(BaseModel):
|
|
|
65
63
|
sparse_features: list[SparseFeature] | None = None,
|
|
66
64
|
sequence_features: list[SequenceFeature] | None = None,
|
|
67
65
|
mlp_params: dict | None = None,
|
|
68
|
-
target: list[str] | str | None = None,
|
|
69
|
-
task: str | list[str] | None = None,
|
|
70
|
-
optimizer: str = "adam",
|
|
71
|
-
optimizer_params: dict | None = None,
|
|
72
|
-
loss: str | nn.Module | None = "bce",
|
|
73
|
-
loss_params: dict | list[dict] | None = None,
|
|
74
|
-
embedding_l1_reg=0.0,
|
|
75
|
-
dense_l1_reg=0.0,
|
|
76
|
-
embedding_l2_reg=0.0,
|
|
77
|
-
dense_l2_reg=0.0,
|
|
78
66
|
**kwargs,
|
|
79
67
|
):
|
|
80
68
|
|
|
@@ -82,24 +70,14 @@ class DeepFM(BaseModel):
|
|
|
82
70
|
sparse_features = sparse_features or []
|
|
83
71
|
sequence_features = sequence_features or []
|
|
84
72
|
mlp_params = mlp_params or {}
|
|
85
|
-
optimizer_params = optimizer_params or {}
|
|
86
|
-
if loss is None:
|
|
87
|
-
loss = "bce"
|
|
88
73
|
|
|
89
74
|
super(DeepFM, self).__init__(
|
|
90
75
|
dense_features=dense_features,
|
|
91
76
|
sparse_features=sparse_features,
|
|
92
77
|
sequence_features=sequence_features,
|
|
93
|
-
target=target,
|
|
94
|
-
task=task or self.default_task,
|
|
95
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
96
|
-
dense_l1_reg=dense_l1_reg,
|
|
97
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
98
|
-
dense_l2_reg=dense_l2_reg,
|
|
99
78
|
**kwargs,
|
|
100
79
|
)
|
|
101
80
|
|
|
102
|
-
self.loss = loss
|
|
103
81
|
self.fm_features = sparse_features + sequence_features
|
|
104
82
|
self.deep_features = dense_features + sparse_features + sequence_features
|
|
105
83
|
self.embedding = EmbeddingLayer(features=self.deep_features)
|
|
@@ -110,18 +88,13 @@ class DeepFM(BaseModel):
|
|
|
110
88
|
self.linear = LR(fm_emb_dim_total)
|
|
111
89
|
self.fm = FM(reduce_sum=True)
|
|
112
90
|
self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
|
|
113
|
-
|
|
91
|
+
|
|
92
|
+
self.prediction_layer = TaskHead(task_type=self.task)
|
|
114
93
|
|
|
115
94
|
# Register regularization weights
|
|
116
95
|
self.register_regularization_weights(
|
|
117
96
|
embedding_attr="embedding", include_modules=["linear", "mlp"]
|
|
118
97
|
)
|
|
119
|
-
self.compile(
|
|
120
|
-
optimizer=optimizer,
|
|
121
|
-
optimizer_params=optimizer_params,
|
|
122
|
-
loss=loss,
|
|
123
|
-
loss_params=loss_params,
|
|
124
|
-
)
|
|
125
98
|
|
|
126
99
|
def forward(self, x):
|
|
127
100
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|
nextrec/models/ranking/dien.py
CHANGED
|
@@ -58,6 +58,7 @@ from nextrec.basic.layers import (
|
|
|
58
58
|
)
|
|
59
59
|
from nextrec.basic.heads import TaskHead
|
|
60
60
|
from nextrec.basic.model import BaseModel
|
|
61
|
+
from nextrec.utils.types import ActivationName
|
|
61
62
|
|
|
62
63
|
|
|
63
64
|
class AUGRU(nn.Module):
|
|
@@ -211,19 +212,9 @@ class DIEN(BaseModel):
|
|
|
211
212
|
mlp_params: dict | None = None,
|
|
212
213
|
gru_hidden_size: int = 64,
|
|
213
214
|
attention_hidden_units: list[int] | None = None,
|
|
214
|
-
attention_activation:
|
|
215
|
+
attention_activation: ActivationName = "sigmoid",
|
|
215
216
|
use_negsampling: bool = False,
|
|
216
217
|
aux_loss_weight: float = 1.0,
|
|
217
|
-
target: list[str] | str | None = None,
|
|
218
|
-
task: str | list[str] | None = None,
|
|
219
|
-
optimizer: str = "adam",
|
|
220
|
-
optimizer_params: dict | None = None,
|
|
221
|
-
loss: str | nn.Module | None = "bce",
|
|
222
|
-
loss_params: dict | list[dict] | None = None,
|
|
223
|
-
embedding_l1_reg=0.0,
|
|
224
|
-
dense_l1_reg=0.0,
|
|
225
|
-
embedding_l2_reg=0.0,
|
|
226
|
-
dense_l2_reg=0.0,
|
|
227
218
|
**kwargs,
|
|
228
219
|
):
|
|
229
220
|
|
|
@@ -232,24 +223,14 @@ class DIEN(BaseModel):
|
|
|
232
223
|
sequence_features = sequence_features or []
|
|
233
224
|
mlp_params = mlp_params or {}
|
|
234
225
|
attention_hidden_units = attention_hidden_units or [80, 40]
|
|
235
|
-
optimizer_params = optimizer_params or {}
|
|
236
|
-
if loss is None:
|
|
237
|
-
loss = "bce"
|
|
238
226
|
|
|
239
227
|
super(DIEN, self).__init__(
|
|
240
228
|
dense_features=dense_features,
|
|
241
229
|
sparse_features=sparse_features,
|
|
242
230
|
sequence_features=sequence_features,
|
|
243
|
-
target=target,
|
|
244
|
-
task=task or self.default_task,
|
|
245
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
246
|
-
dense_l1_reg=dense_l1_reg,
|
|
247
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
248
|
-
dense_l2_reg=dense_l2_reg,
|
|
249
231
|
**kwargs,
|
|
250
232
|
)
|
|
251
233
|
|
|
252
|
-
self.loss = loss
|
|
253
234
|
self.use_negsampling = use_negsampling
|
|
254
235
|
self.aux_loss_weight = float(aux_loss_weight)
|
|
255
236
|
self.auxiliary_cache = None
|
|
@@ -361,13 +342,6 @@ class DIEN(BaseModel):
|
|
|
361
342
|
],
|
|
362
343
|
)
|
|
363
344
|
|
|
364
|
-
self.compile(
|
|
365
|
-
optimizer=optimizer,
|
|
366
|
-
optimizer_params=optimizer_params,
|
|
367
|
-
loss=loss,
|
|
368
|
-
loss_params=loss_params,
|
|
369
|
-
)
|
|
370
|
-
|
|
371
345
|
def forward(self, x):
|
|
372
346
|
self.auxiliary_cache = None
|
|
373
347
|
if self.candidate_feature:
|
nextrec/models/ranking/din.py
CHANGED
|
@@ -58,6 +58,7 @@ from nextrec.basic.layers import (
|
|
|
58
58
|
)
|
|
59
59
|
from nextrec.basic.heads import TaskHead
|
|
60
60
|
from nextrec.basic.model import BaseModel
|
|
61
|
+
from nextrec.utils.types import ActivationName
|
|
61
62
|
|
|
62
63
|
|
|
63
64
|
class DIN(BaseModel):
|
|
@@ -78,18 +79,8 @@ class DIN(BaseModel):
|
|
|
78
79
|
candidate_feature_name: str | None = None,
|
|
79
80
|
mlp_params: dict | None = None,
|
|
80
81
|
attention_hidden_units: list[int] | None = None,
|
|
81
|
-
attention_activation:
|
|
82
|
+
attention_activation: ActivationName = "dice",
|
|
82
83
|
attention_use_softmax: bool = True,
|
|
83
|
-
target: list[str] | str | None = None,
|
|
84
|
-
task: str | list[str] | None = None,
|
|
85
|
-
optimizer: str = "adam",
|
|
86
|
-
optimizer_params: dict | None = None,
|
|
87
|
-
loss: str | nn.Module | None = "bce",
|
|
88
|
-
loss_params: dict | list[dict] | None = None,
|
|
89
|
-
embedding_l1_reg=0.0,
|
|
90
|
-
dense_l1_reg=0.0,
|
|
91
|
-
embedding_l2_reg=0.0,
|
|
92
|
-
dense_l2_reg=0.0,
|
|
93
84
|
**kwargs,
|
|
94
85
|
):
|
|
95
86
|
|
|
@@ -98,20 +89,11 @@ class DIN(BaseModel):
|
|
|
98
89
|
sequence_features = sequence_features or []
|
|
99
90
|
mlp_params = mlp_params or {}
|
|
100
91
|
attention_hidden_units = attention_hidden_units or [80, 40]
|
|
101
|
-
optimizer_params = optimizer_params or {}
|
|
102
|
-
if loss is None:
|
|
103
|
-
loss = "bce"
|
|
104
92
|
|
|
105
93
|
super(DIN, self).__init__(
|
|
106
94
|
dense_features=dense_features,
|
|
107
95
|
sparse_features=sparse_features,
|
|
108
96
|
sequence_features=sequence_features,
|
|
109
|
-
target=target,
|
|
110
|
-
task=task or self.default_task,
|
|
111
|
-
embedding_l1_reg=embedding_l1_reg,
|
|
112
|
-
dense_l1_reg=dense_l1_reg,
|
|
113
|
-
embedding_l2_reg=embedding_l2_reg,
|
|
114
|
-
dense_l2_reg=dense_l2_reg,
|
|
115
97
|
**kwargs,
|
|
116
98
|
)
|
|
117
99
|
|
|
@@ -182,13 +164,6 @@ class DIN(BaseModel):
|
|
|
182
164
|
include_modules=["attention", "mlp", "candidate_attention_proj"],
|
|
183
165
|
)
|
|
184
166
|
|
|
185
|
-
self.compile(
|
|
186
|
-
optimizer=optimizer,
|
|
187
|
-
optimizer_params=optimizer_params,
|
|
188
|
-
loss=loss,
|
|
189
|
-
loss_params=loss_params,
|
|
190
|
-
)
|
|
191
|
-
|
|
192
167
|
def forward(self, x):
|
|
193
168
|
# Get candidate item embedding
|
|
194
169
|
if self.candidate_feature is None:
|