nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +1 -1
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -5
- nextrec/basic/callback.py +1 -0
- nextrec/basic/features.py +30 -22
- nextrec/basic/layers.py +244 -113
- nextrec/basic/loggers.py +62 -43
- nextrec/basic/metrics.py +268 -119
- nextrec/basic/model.py +1373 -443
- nextrec/basic/session.py +10 -3
- nextrec/cli.py +498 -0
- nextrec/data/__init__.py +19 -25
- nextrec/data/batch_utils.py +11 -3
- nextrec/data/data_processing.py +42 -24
- nextrec/data/data_utils.py +26 -15
- nextrec/data/dataloader.py +303 -96
- nextrec/data/preprocessor.py +320 -199
- nextrec/loss/listwise.py +17 -9
- nextrec/loss/loss_utils.py +7 -8
- nextrec/loss/pairwise.py +2 -0
- nextrec/loss/pointwise.py +30 -12
- nextrec/models/generative/hstu.py +106 -40
- nextrec/models/match/dssm.py +82 -69
- nextrec/models/match/dssm_v2.py +72 -58
- nextrec/models/match/mind.py +175 -108
- nextrec/models/match/sdm.py +104 -88
- nextrec/models/match/youtube_dnn.py +73 -60
- nextrec/models/multi_task/esmm.py +53 -39
- nextrec/models/multi_task/mmoe.py +70 -47
- nextrec/models/multi_task/ple.py +107 -50
- nextrec/models/multi_task/poso.py +121 -41
- nextrec/models/multi_task/share_bottom.py +54 -38
- nextrec/models/ranking/afm.py +172 -45
- nextrec/models/ranking/autoint.py +84 -61
- nextrec/models/ranking/dcn.py +59 -42
- nextrec/models/ranking/dcn_v2.py +64 -23
- nextrec/models/ranking/deepfm.py +36 -26
- nextrec/models/ranking/dien.py +158 -102
- nextrec/models/ranking/din.py +88 -60
- nextrec/models/ranking/fibinet.py +55 -35
- nextrec/models/ranking/fm.py +32 -26
- nextrec/models/ranking/masknet.py +95 -34
- nextrec/models/ranking/pnn.py +34 -31
- nextrec/models/ranking/widedeep.py +37 -29
- nextrec/models/ranking/xdeepfm.py +63 -41
- nextrec/utils/__init__.py +61 -32
- nextrec/utils/config.py +490 -0
- nextrec/utils/device.py +52 -12
- nextrec/utils/distributed.py +141 -0
- nextrec/utils/embedding.py +1 -0
- nextrec/utils/feature.py +1 -0
- nextrec/utils/file.py +32 -11
- nextrec/utils/initializer.py +61 -16
- nextrec/utils/optimizer.py +25 -9
- nextrec/utils/synthetic_data.py +531 -0
- nextrec/utils/tensor.py +24 -13
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
- nextrec-0.4.2.dist-info/RECORD +69 -0
- nextrec-0.4.2.dist-info/entry_points.txt +2 -0
- nextrec-0.3.6.dist-info/RECORD +0 -64
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -69,12 +69,13 @@ class InstanceGuidedMask(nn.Module):
|
|
|
69
69
|
self.fc2 = nn.Linear(hidden_dim, output_dim)
|
|
70
70
|
|
|
71
71
|
def forward(self, v_emb_flat: torch.Tensor) -> torch.Tensor:
|
|
72
|
-
# v_emb_flat: [batch, features count * embedding_dim]
|
|
72
|
+
# v_emb_flat: [batch, features count * embedding_dim]
|
|
73
73
|
x = self.fc1(v_emb_flat)
|
|
74
74
|
x = F.relu(x)
|
|
75
75
|
v_mask = self.fc2(x)
|
|
76
76
|
return v_mask
|
|
77
77
|
|
|
78
|
+
|
|
78
79
|
class MaskBlockOnEmbedding(nn.Module):
|
|
79
80
|
def __init__(
|
|
80
81
|
self,
|
|
@@ -86,20 +87,28 @@ class MaskBlockOnEmbedding(nn.Module):
|
|
|
86
87
|
super().__init__()
|
|
87
88
|
self.num_fields = num_fields
|
|
88
89
|
self.embedding_dim = embedding_dim
|
|
89
|
-
self.input_dim =
|
|
90
|
+
self.input_dim = (
|
|
91
|
+
num_fields * embedding_dim
|
|
92
|
+
) # input_dim = features count * embedding_dim
|
|
90
93
|
self.ln_emb = nn.LayerNorm(embedding_dim)
|
|
91
|
-
self.mask_gen = InstanceGuidedMask(
|
|
94
|
+
self.mask_gen = InstanceGuidedMask(
|
|
95
|
+
input_dim=self.input_dim,
|
|
96
|
+
hidden_dim=mask_hidden_dim,
|
|
97
|
+
output_dim=self.input_dim,
|
|
98
|
+
)
|
|
92
99
|
self.ffn = nn.Linear(self.input_dim, hidden_dim)
|
|
93
100
|
self.ln_hid = nn.LayerNorm(hidden_dim)
|
|
94
101
|
|
|
95
102
|
# different from MaskBlockOnHidden: input is field embeddings
|
|
96
|
-
def forward(
|
|
103
|
+
def forward(
|
|
104
|
+
self, field_emb: torch.Tensor, v_emb_flat: torch.Tensor
|
|
105
|
+
) -> torch.Tensor:
|
|
97
106
|
B = field_emb.size(0)
|
|
98
|
-
norm_emb = self.ln_emb(field_emb)
|
|
99
|
-
norm_emb_flat = norm_emb.view(B, -1)
|
|
100
|
-
v_mask = self.mask_gen(v_emb_flat)
|
|
101
|
-
v_masked_emb = v_mask * norm_emb_flat
|
|
102
|
-
hidden = self.ffn(v_masked_emb)
|
|
107
|
+
norm_emb = self.ln_emb(field_emb) # [B, features count, embedding_dim]
|
|
108
|
+
norm_emb_flat = norm_emb.view(B, -1) # [B, features count * embedding_dim]
|
|
109
|
+
v_mask = self.mask_gen(v_emb_flat) # [B, features count * embedding_dim]
|
|
110
|
+
v_masked_emb = v_mask * norm_emb_flat # [B, features count * embedding_dim]
|
|
111
|
+
hidden = self.ffn(v_masked_emb) # [B, hidden_dim]
|
|
103
112
|
hidden = self.ln_hid(hidden)
|
|
104
113
|
hidden = F.relu(hidden)
|
|
105
114
|
|
|
@@ -123,15 +132,21 @@ class MaskBlockOnHidden(nn.Module):
|
|
|
123
132
|
self.ln_input = nn.LayerNorm(hidden_dim)
|
|
124
133
|
self.ln_output = nn.LayerNorm(hidden_dim)
|
|
125
134
|
|
|
126
|
-
self.mask_gen = InstanceGuidedMask(
|
|
135
|
+
self.mask_gen = InstanceGuidedMask(
|
|
136
|
+
input_dim=self.v_emb_dim,
|
|
137
|
+
hidden_dim=mask_hidden_dim,
|
|
138
|
+
output_dim=hidden_dim,
|
|
139
|
+
)
|
|
127
140
|
self.ffn = nn.Linear(hidden_dim, hidden_dim)
|
|
128
141
|
|
|
129
142
|
# different from MaskBlockOnEmbedding: input is hidden representation
|
|
130
|
-
def forward(
|
|
131
|
-
|
|
143
|
+
def forward(
|
|
144
|
+
self, hidden_in: torch.Tensor, v_emb_flat: torch.Tensor
|
|
145
|
+
) -> torch.Tensor:
|
|
146
|
+
norm_hidden = self.ln_input(hidden_in)
|
|
132
147
|
v_mask = self.mask_gen(v_emb_flat)
|
|
133
|
-
v_masked_hid = v_mask * norm_hidden
|
|
134
|
-
out = self.ffn(v_masked_hid)
|
|
148
|
+
v_masked_hid = v_mask * norm_hidden
|
|
149
|
+
out = self.ffn(v_masked_hid)
|
|
135
150
|
out = self.ln_output(out)
|
|
136
151
|
out = F.relu(out)
|
|
137
152
|
return out
|
|
@@ -143,8 +158,7 @@ class MaskNet(BaseModel):
|
|
|
143
158
|
return "MaskNet"
|
|
144
159
|
|
|
145
160
|
@property
|
|
146
|
-
def
|
|
147
|
-
# Align with PredictionLayer supported task types
|
|
161
|
+
def default_task(self):
|
|
148
162
|
return "binary"
|
|
149
163
|
|
|
150
164
|
def __init__(
|
|
@@ -152,13 +166,14 @@ class MaskNet(BaseModel):
|
|
|
152
166
|
dense_features: list[DenseFeature] | None = None,
|
|
153
167
|
sparse_features: list[SparseFeature] | None = None,
|
|
154
168
|
sequence_features: list[SequenceFeature] | None = None,
|
|
155
|
-
model_type: str = "parallel",
|
|
169
|
+
model_type: str = "parallel", # "serial" or "parallel"
|
|
156
170
|
num_blocks: int = 3,
|
|
157
171
|
mask_hidden_dim: int = 64,
|
|
158
172
|
block_hidden_dim: int = 256,
|
|
159
173
|
block_dropout: float = 0.0,
|
|
160
174
|
mlp_params: dict | None = None,
|
|
161
175
|
target: list[str] | None = None,
|
|
176
|
+
task: str | list[str] | None = None,
|
|
162
177
|
optimizer: str = "adam",
|
|
163
178
|
optimizer_params: dict | None = None,
|
|
164
179
|
loss: str | nn.Module | None = "bce",
|
|
@@ -182,13 +197,12 @@ class MaskNet(BaseModel):
|
|
|
182
197
|
sparse_features=sparse_features,
|
|
183
198
|
sequence_features=sequence_features,
|
|
184
199
|
target=target,
|
|
185
|
-
task=self.
|
|
200
|
+
task=task or self.default_task,
|
|
186
201
|
device=device,
|
|
187
202
|
embedding_l1_reg=embedding_l1_reg,
|
|
188
203
|
dense_l1_reg=dense_l1_reg,
|
|
189
204
|
embedding_l2_reg=embedding_l2_reg,
|
|
190
205
|
dense_l2_reg=dense_l2_reg,
|
|
191
|
-
early_stop_patience=20,
|
|
192
206
|
**kwargs,
|
|
193
207
|
)
|
|
194
208
|
|
|
@@ -200,50 +214,97 @@ class MaskNet(BaseModel):
|
|
|
200
214
|
self.sparse_features = sparse_features
|
|
201
215
|
self.sequence_features = sequence_features
|
|
202
216
|
self.mask_features = self.all_features # use all features for masking
|
|
203
|
-
assert
|
|
217
|
+
assert (
|
|
218
|
+
len(self.mask_features) > 0
|
|
219
|
+
), "MaskNet requires at least one feature for masking."
|
|
204
220
|
self.embedding = EmbeddingLayer(features=self.mask_features)
|
|
205
221
|
self.num_fields = len(self.mask_features)
|
|
206
222
|
self.embedding_dim = getattr(self.mask_features[0], "embedding_dim", None)
|
|
207
|
-
assert
|
|
223
|
+
assert (
|
|
224
|
+
self.embedding_dim is not None
|
|
225
|
+
), "MaskNet requires mask_features to have 'embedding_dim' defined."
|
|
208
226
|
|
|
209
227
|
for f in self.mask_features:
|
|
210
228
|
edim = getattr(f, "embedding_dim", None)
|
|
211
229
|
if edim is None or edim != self.embedding_dim:
|
|
212
|
-
raise ValueError(
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"MaskNet expects identical embedding_dim across all mask_features, but got {edim} for feature {getattr(f, 'name', type(f))}."
|
|
232
|
+
)
|
|
213
233
|
|
|
214
234
|
self.v_emb_dim = self.num_fields * self.embedding_dim
|
|
215
235
|
self.model_type = model_type.lower()
|
|
216
|
-
assert self.model_type in (
|
|
236
|
+
assert self.model_type in (
|
|
237
|
+
"serial",
|
|
238
|
+
"parallel",
|
|
239
|
+
), "model_type must be either 'serial' or 'parallel'."
|
|
217
240
|
|
|
218
241
|
self.num_blocks = max(1, num_blocks)
|
|
219
242
|
self.block_hidden_dim = block_hidden_dim
|
|
220
|
-
self.block_dropout =
|
|
243
|
+
self.block_dropout = (
|
|
244
|
+
nn.Dropout(block_dropout) if block_dropout > 0 else nn.Identity()
|
|
245
|
+
)
|
|
221
246
|
|
|
222
247
|
if self.model_type == "serial":
|
|
223
|
-
self.first_block = MaskBlockOnEmbedding(
|
|
248
|
+
self.first_block = MaskBlockOnEmbedding(
|
|
249
|
+
num_fields=self.num_fields,
|
|
250
|
+
embedding_dim=self.embedding_dim,
|
|
251
|
+
mask_hidden_dim=mask_hidden_dim,
|
|
252
|
+
hidden_dim=block_hidden_dim,
|
|
253
|
+
)
|
|
224
254
|
self.hidden_blocks = nn.ModuleList(
|
|
225
|
-
[
|
|
255
|
+
[
|
|
256
|
+
MaskBlockOnHidden(
|
|
257
|
+
num_fields=self.num_fields,
|
|
258
|
+
embedding_dim=self.embedding_dim,
|
|
259
|
+
mask_hidden_dim=mask_hidden_dim,
|
|
260
|
+
hidden_dim=block_hidden_dim,
|
|
261
|
+
)
|
|
262
|
+
for _ in range(self.num_blocks - 1)
|
|
263
|
+
]
|
|
264
|
+
)
|
|
226
265
|
self.mask_blocks = nn.ModuleList([self.first_block, *self.hidden_blocks])
|
|
227
266
|
self.output_layer = nn.Linear(block_hidden_dim, 1)
|
|
228
267
|
self.final_mlp = None
|
|
229
268
|
|
|
230
269
|
else: # parallel
|
|
231
|
-
self.mask_blocks = nn.ModuleList(
|
|
232
|
-
|
|
270
|
+
self.mask_blocks = nn.ModuleList(
|
|
271
|
+
[
|
|
272
|
+
MaskBlockOnEmbedding(
|
|
273
|
+
num_fields=self.num_fields,
|
|
274
|
+
embedding_dim=self.embedding_dim,
|
|
275
|
+
mask_hidden_dim=mask_hidden_dim,
|
|
276
|
+
hidden_dim=block_hidden_dim,
|
|
277
|
+
)
|
|
278
|
+
for _ in range(self.num_blocks)
|
|
279
|
+
]
|
|
280
|
+
)
|
|
281
|
+
self.final_mlp = MLP(
|
|
282
|
+
input_dim=self.num_blocks * block_hidden_dim, **mlp_params
|
|
283
|
+
)
|
|
233
284
|
self.output_layer = None
|
|
234
|
-
self.prediction_layer = PredictionLayer(task_type=self.
|
|
285
|
+
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
235
286
|
|
|
236
287
|
if self.model_type == "serial":
|
|
237
|
-
self.register_regularization_weights(
|
|
288
|
+
self.register_regularization_weights(
|
|
289
|
+
embedding_attr="embedding",
|
|
290
|
+
include_modules=["mask_blocks", "output_layer"],
|
|
291
|
+
)
|
|
238
292
|
# serial
|
|
239
293
|
else:
|
|
240
|
-
self.register_regularization_weights(
|
|
241
|
-
|
|
294
|
+
self.register_regularization_weights(
|
|
295
|
+
embedding_attr="embedding", include_modules=["mask_blocks", "final_mlp"]
|
|
296
|
+
)
|
|
297
|
+
self.compile(
|
|
298
|
+
optimizer=optimizer,
|
|
299
|
+
optimizer_params=optimizer_params,
|
|
300
|
+
loss=loss,
|
|
301
|
+
loss_params=loss_params,
|
|
302
|
+
)
|
|
242
303
|
|
|
243
304
|
def forward(self, x: dict[str, torch.Tensor]) -> torch.Tensor:
|
|
244
305
|
field_emb = self.embedding(x=x, features=self.mask_features, squeeze_dim=False)
|
|
245
306
|
B = field_emb.size(0)
|
|
246
|
-
v_emb_flat = field_emb.view(B, -1) # flattened embeddings
|
|
307
|
+
v_emb_flat = field_emb.view(B, -1) # flattened embeddings
|
|
247
308
|
|
|
248
309
|
if self.model_type == "parallel":
|
|
249
310
|
block_outputs = []
|
|
@@ -254,7 +315,7 @@ class MaskNet(BaseModel):
|
|
|
254
315
|
concat_hidden = torch.cat(block_outputs, dim=-1)
|
|
255
316
|
logit = self.final_mlp(concat_hidden) # [B, 1]
|
|
256
317
|
# serial
|
|
257
|
-
else:
|
|
318
|
+
else:
|
|
258
319
|
hidden = self.first_block(field_emb, v_emb_flat)
|
|
259
320
|
hidden = self.block_dropout(hidden)
|
|
260
321
|
for block in self.hidden_blocks:
|
nextrec/models/ranking/pnn.py
CHANGED
|
@@ -20,47 +20,49 @@ class PNN(BaseModel):
|
|
|
20
20
|
return "PNN"
|
|
21
21
|
|
|
22
22
|
@property
|
|
23
|
-
def
|
|
23
|
+
def default_task(self):
|
|
24
24
|
return "binary"
|
|
25
|
-
|
|
26
|
-
def __init__(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
dense_features: list[DenseFeature] | list = [],
|
|
29
|
+
sparse_features: list[SparseFeature] | list = [],
|
|
30
|
+
sequence_features: list[SequenceFeature] | list = [],
|
|
31
|
+
mlp_params: dict = {},
|
|
32
|
+
product_type: str = "inner",
|
|
33
|
+
outer_product_dim: int | None = None,
|
|
34
|
+
target: list[str] | list = [],
|
|
35
|
+
task: str | list[str] | None = None,
|
|
36
|
+
optimizer: str = "adam",
|
|
37
|
+
optimizer_params: dict = {},
|
|
38
|
+
loss: str | nn.Module | None = "bce",
|
|
39
|
+
loss_params: dict | list[dict] | None = None,
|
|
40
|
+
device: str = "cpu",
|
|
41
|
+
embedding_l1_reg=1e-6,
|
|
42
|
+
dense_l1_reg=1e-5,
|
|
43
|
+
embedding_l2_reg=1e-5,
|
|
44
|
+
dense_l2_reg=1e-4,
|
|
45
|
+
**kwargs,
|
|
46
|
+
):
|
|
47
|
+
|
|
45
48
|
super(PNN, self).__init__(
|
|
46
49
|
dense_features=dense_features,
|
|
47
50
|
sparse_features=sparse_features,
|
|
48
51
|
sequence_features=sequence_features,
|
|
49
52
|
target=target,
|
|
50
|
-
task=self.
|
|
53
|
+
task=task or self.default_task,
|
|
51
54
|
device=device,
|
|
52
55
|
embedding_l1_reg=embedding_l1_reg,
|
|
53
56
|
dense_l1_reg=dense_l1_reg,
|
|
54
57
|
embedding_l2_reg=embedding_l2_reg,
|
|
55
58
|
dense_l2_reg=dense_l2_reg,
|
|
56
|
-
|
|
57
|
-
**kwargs
|
|
59
|
+
**kwargs,
|
|
58
60
|
)
|
|
59
61
|
|
|
60
62
|
self.loss = loss
|
|
61
63
|
if self.loss is None:
|
|
62
64
|
self.loss = "bce"
|
|
63
|
-
|
|
65
|
+
|
|
64
66
|
self.field_features = sparse_features + sequence_features
|
|
65
67
|
if len(self.field_features) < 2:
|
|
66
68
|
raise ValueError("PNN requires at least two sparse/sequence features.")
|
|
@@ -69,7 +71,9 @@ class PNN(BaseModel):
|
|
|
69
71
|
self.num_fields = len(self.field_features)
|
|
70
72
|
self.embedding_dim = self.field_features[0].embedding_dim
|
|
71
73
|
if any(f.embedding_dim != self.embedding_dim for f in self.field_features):
|
|
72
|
-
raise ValueError(
|
|
74
|
+
raise ValueError(
|
|
75
|
+
"All field features must share the same embedding_dim for PNN."
|
|
76
|
+
)
|
|
73
77
|
|
|
74
78
|
self.product_type = product_type.lower()
|
|
75
79
|
if self.product_type not in {"inner", "outer"}:
|
|
@@ -86,14 +90,13 @@ class PNN(BaseModel):
|
|
|
86
90
|
|
|
87
91
|
linear_dim = self.num_fields * self.embedding_dim
|
|
88
92
|
self.mlp = MLP(input_dim=linear_dim + product_dim, **mlp_params)
|
|
89
|
-
self.prediction_layer = PredictionLayer(task_type=self.
|
|
93
|
+
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
90
94
|
|
|
91
|
-
modules = [
|
|
95
|
+
modules = ["mlp"]
|
|
92
96
|
if self.product_type == "outer":
|
|
93
|
-
modules.append(
|
|
97
|
+
modules.append("kernel")
|
|
94
98
|
self.register_regularization_weights(
|
|
95
|
-
embedding_attr=
|
|
96
|
-
include_modules=modules
|
|
99
|
+
embedding_attr="embedding", include_modules=modules
|
|
97
100
|
)
|
|
98
101
|
|
|
99
102
|
self.compile(
|
|
@@ -39,7 +39,6 @@ Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网
|
|
|
39
39
|
- 共享特征空间,减少工程开销
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
|
-
import torch
|
|
43
42
|
import torch.nn as nn
|
|
44
43
|
|
|
45
44
|
from nextrec.basic.model import BaseModel
|
|
@@ -53,45 +52,47 @@ class WideDeep(BaseModel):
|
|
|
53
52
|
return "WideDeep"
|
|
54
53
|
|
|
55
54
|
@property
|
|
56
|
-
def
|
|
55
|
+
def default_task(self):
|
|
57
56
|
return "binary"
|
|
58
|
-
|
|
59
|
-
def __init__(
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
dense_features: list[DenseFeature],
|
|
61
|
+
sparse_features: list[SparseFeature],
|
|
62
|
+
sequence_features: list[SequenceFeature],
|
|
63
|
+
mlp_params: dict,
|
|
64
|
+
target: list[str] = [],
|
|
65
|
+
task: str | list[str] | None = None,
|
|
66
|
+
optimizer: str = "adam",
|
|
67
|
+
optimizer_params: dict = {},
|
|
68
|
+
loss: str | nn.Module | None = "bce",
|
|
69
|
+
loss_params: dict | list[dict] | None = None,
|
|
70
|
+
device: str = "cpu",
|
|
71
|
+
embedding_l1_reg=1e-6,
|
|
72
|
+
dense_l1_reg=1e-5,
|
|
73
|
+
embedding_l2_reg=1e-5,
|
|
74
|
+
dense_l2_reg=1e-4,
|
|
75
|
+
**kwargs,
|
|
76
|
+
):
|
|
77
|
+
|
|
76
78
|
super(WideDeep, self).__init__(
|
|
77
79
|
dense_features=dense_features,
|
|
78
80
|
sparse_features=sparse_features,
|
|
79
81
|
sequence_features=sequence_features,
|
|
80
82
|
target=target,
|
|
81
|
-
task=self.
|
|
83
|
+
task=task or self.default_task,
|
|
82
84
|
device=device,
|
|
83
85
|
embedding_l1_reg=embedding_l1_reg,
|
|
84
86
|
dense_l1_reg=dense_l1_reg,
|
|
85
87
|
embedding_l2_reg=embedding_l2_reg,
|
|
86
88
|
dense_l2_reg=dense_l2_reg,
|
|
87
|
-
|
|
88
|
-
**kwargs
|
|
89
|
+
**kwargs,
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
self.loss = loss
|
|
92
93
|
if self.loss is None:
|
|
93
94
|
self.loss = "bce"
|
|
94
|
-
|
|
95
|
+
|
|
95
96
|
# Wide part: use all features for linear model
|
|
96
97
|
self.wide_features = sparse_features + sequence_features
|
|
97
98
|
# Deep part: use all features
|
|
@@ -103,22 +104,29 @@ class WideDeep(BaseModel):
|
|
|
103
104
|
# Wide part: Linear layer
|
|
104
105
|
wide_dim = sum([f.embedding_dim for f in self.wide_features])
|
|
105
106
|
self.linear = LR(wide_dim)
|
|
106
|
-
|
|
107
|
+
|
|
107
108
|
# Deep part: MLP
|
|
108
109
|
input_dim = self.embedding.input_dim
|
|
109
110
|
# deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
|
|
110
111
|
# dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
|
|
111
112
|
self.mlp = MLP(input_dim=input_dim, **mlp_params)
|
|
112
|
-
self.prediction_layer = PredictionLayer(task_type=self.
|
|
113
|
+
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
113
114
|
# Register regularization weights
|
|
114
|
-
self.register_regularization_weights(
|
|
115
|
-
|
|
115
|
+
self.register_regularization_weights(
|
|
116
|
+
embedding_attr="embedding", include_modules=["linear", "mlp"]
|
|
117
|
+
)
|
|
118
|
+
self.compile(
|
|
119
|
+
optimizer=optimizer,
|
|
120
|
+
optimizer_params=optimizer_params,
|
|
121
|
+
loss=loss,
|
|
122
|
+
loss_params=loss_params,
|
|
123
|
+
)
|
|
116
124
|
|
|
117
125
|
def forward(self, x):
|
|
118
126
|
# Deep part
|
|
119
127
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|
|
120
128
|
y_deep = self.mlp(input_deep) # [B, 1]
|
|
121
|
-
|
|
129
|
+
|
|
122
130
|
# Wide part
|
|
123
131
|
input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
|
|
124
132
|
y_wide = self.linear(input_wide)
|
|
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Author:
|
|
4
4
|
Yang Zhou,zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
|
|
7
|
-
for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
|
|
6
|
+
[1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
|
|
7
|
+
for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
|
|
8
8
|
knowledge discovery & data mining. 2018: 1754-1763.
|
|
9
9
|
(https://arxiv.org/abs/1803.05170)
|
|
10
10
|
"""
|
|
@@ -17,6 +17,7 @@ from nextrec.basic.model import BaseModel
|
|
|
17
17
|
from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
|
|
18
18
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
19
19
|
|
|
20
|
+
|
|
20
21
|
class CIN(nn.Module):
|
|
21
22
|
"""Compressed Interaction Network from xDeepFM (Lian et al., 2018)."""
|
|
22
23
|
|
|
@@ -28,7 +29,16 @@ class CIN(nn.Module):
|
|
|
28
29
|
prev_dim, fc_input_dim = input_dim, 0
|
|
29
30
|
for i in range(self.num_layers):
|
|
30
31
|
cross_layer_size = cin_size[i]
|
|
31
|
-
self.conv_layers.append(
|
|
32
|
+
self.conv_layers.append(
|
|
33
|
+
torch.nn.Conv1d(
|
|
34
|
+
input_dim * prev_dim,
|
|
35
|
+
cross_layer_size,
|
|
36
|
+
1,
|
|
37
|
+
stride=1,
|
|
38
|
+
dilation=1,
|
|
39
|
+
bias=True,
|
|
40
|
+
)
|
|
41
|
+
)
|
|
32
42
|
if self.split_half and i != self.num_layers - 1:
|
|
33
43
|
cross_layer_size //= 2
|
|
34
44
|
prev_dim = cross_layer_size
|
|
@@ -49,57 +59,60 @@ class CIN(nn.Module):
|
|
|
49
59
|
h = x
|
|
50
60
|
xs.append(x)
|
|
51
61
|
return self.fc(torch.sum(torch.cat(xs, dim=1), 2))
|
|
52
|
-
|
|
62
|
+
|
|
63
|
+
|
|
53
64
|
class xDeepFM(BaseModel):
|
|
54
65
|
@property
|
|
55
66
|
def model_name(self):
|
|
56
67
|
return "xDeepFM"
|
|
57
68
|
|
|
58
69
|
@property
|
|
59
|
-
def
|
|
70
|
+
def default_task(self):
|
|
60
71
|
return "binary"
|
|
61
|
-
|
|
62
|
-
def __init__(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
dense_features: list[DenseFeature],
|
|
76
|
+
sparse_features: list[SparseFeature],
|
|
77
|
+
sequence_features: list[SequenceFeature],
|
|
78
|
+
mlp_params: dict,
|
|
79
|
+
cin_size: list[int] = [128, 128],
|
|
80
|
+
split_half: bool = True,
|
|
81
|
+
target: list[str] = [],
|
|
82
|
+
task: str | list[str] | None = None,
|
|
83
|
+
optimizer: str = "adam",
|
|
84
|
+
optimizer_params: dict = {},
|
|
85
|
+
loss: str | nn.Module | None = "bce",
|
|
86
|
+
loss_params: dict | list[dict] | None = None,
|
|
87
|
+
device: str = "cpu",
|
|
88
|
+
embedding_l1_reg=1e-6,
|
|
89
|
+
dense_l1_reg=1e-5,
|
|
90
|
+
embedding_l2_reg=1e-5,
|
|
91
|
+
dense_l2_reg=1e-4,
|
|
92
|
+
**kwargs,
|
|
93
|
+
):
|
|
94
|
+
|
|
81
95
|
super(xDeepFM, self).__init__(
|
|
82
96
|
dense_features=dense_features,
|
|
83
97
|
sparse_features=sparse_features,
|
|
84
98
|
sequence_features=sequence_features,
|
|
85
99
|
target=target,
|
|
86
|
-
task=self.
|
|
100
|
+
task=task or self.default_task,
|
|
87
101
|
device=device,
|
|
88
102
|
embedding_l1_reg=embedding_l1_reg,
|
|
89
103
|
dense_l1_reg=dense_l1_reg,
|
|
90
104
|
embedding_l2_reg=embedding_l2_reg,
|
|
91
105
|
dense_l2_reg=dense_l2_reg,
|
|
92
|
-
|
|
93
|
-
**kwargs
|
|
106
|
+
**kwargs,
|
|
94
107
|
)
|
|
95
108
|
|
|
96
109
|
self.loss = loss
|
|
97
110
|
if self.loss is None:
|
|
98
111
|
self.loss = "bce"
|
|
99
|
-
|
|
112
|
+
|
|
100
113
|
# Linear part and CIN part: use sparse and sequence features
|
|
101
114
|
self.linear_features = sparse_features + sequence_features
|
|
102
|
-
|
|
115
|
+
|
|
103
116
|
# Deep part: use all features
|
|
104
117
|
self.deep_features = dense_features + sparse_features + sequence_features
|
|
105
118
|
|
|
@@ -109,21 +122,28 @@ class xDeepFM(BaseModel):
|
|
|
109
122
|
# Linear part
|
|
110
123
|
linear_dim = sum([f.embedding_dim for f in self.linear_features])
|
|
111
124
|
self.linear = LR(linear_dim)
|
|
112
|
-
|
|
125
|
+
|
|
113
126
|
# CIN part: Compressed Interaction Network
|
|
114
127
|
num_fields = len(self.linear_features)
|
|
115
128
|
self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
|
|
116
|
-
|
|
129
|
+
|
|
117
130
|
# Deep part: DNN
|
|
118
|
-
deep_emb_dim_total = sum(
|
|
119
|
-
|
|
131
|
+
deep_emb_dim_total = sum(
|
|
132
|
+
[
|
|
133
|
+
f.embedding_dim
|
|
134
|
+
for f in self.deep_features
|
|
135
|
+
if not isinstance(f, DenseFeature)
|
|
136
|
+
]
|
|
137
|
+
)
|
|
138
|
+
dense_input_dim = sum(
|
|
139
|
+
[getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
|
|
140
|
+
)
|
|
120
141
|
self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
|
|
121
|
-
self.prediction_layer = PredictionLayer(task_type=self.
|
|
142
|
+
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
122
143
|
|
|
123
144
|
# Register regularization weights
|
|
124
145
|
self.register_regularization_weights(
|
|
125
|
-
embedding_attr=
|
|
126
|
-
include_modules=['linear', 'cin', 'mlp']
|
|
146
|
+
embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
|
|
127
147
|
)
|
|
128
148
|
|
|
129
149
|
self.compile(
|
|
@@ -135,14 +155,16 @@ class xDeepFM(BaseModel):
|
|
|
135
155
|
|
|
136
156
|
def forward(self, x):
|
|
137
157
|
# Get embeddings for linear and CIN (sparse features only)
|
|
138
|
-
input_linear = self.embedding(
|
|
139
|
-
|
|
158
|
+
input_linear = self.embedding(
|
|
159
|
+
x=x, features=self.linear_features, squeeze_dim=False
|
|
160
|
+
)
|
|
161
|
+
|
|
140
162
|
# Linear part
|
|
141
163
|
y_linear = self.linear(input_linear.flatten(start_dim=1))
|
|
142
|
-
|
|
164
|
+
|
|
143
165
|
# CIN part
|
|
144
166
|
y_cin = self.cin(input_linear) # [B, 1]
|
|
145
|
-
|
|
167
|
+
|
|
146
168
|
# Deep part
|
|
147
169
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|
|
148
170
|
y_deep = self.mlp(input_deep) # [B, 1]
|