nextrec 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__version__.py +1 -1
- nextrec/basic/layers.py +32 -8
- nextrec/basic/loggers.py +1 -1
- nextrec/basic/metrics.py +2 -1
- nextrec/basic/model.py +3 -3
- nextrec/cli.py +41 -47
- nextrec/data/dataloader.py +1 -1
- nextrec/models/multi_task/esmm.py +23 -16
- nextrec/models/multi_task/mmoe.py +36 -17
- nextrec/models/multi_task/ple.py +18 -12
- nextrec/models/multi_task/poso.py +68 -37
- nextrec/models/multi_task/share_bottom.py +16 -2
- nextrec/models/ranking/afm.py +14 -14
- nextrec/models/ranking/autoint.py +2 -2
- nextrec/models/ranking/dcn.py +61 -19
- nextrec/models/ranking/dcn_v2.py +224 -45
- nextrec/models/ranking/deepfm.py +14 -9
- nextrec/models/ranking/dien.py +215 -82
- nextrec/models/ranking/din.py +95 -57
- nextrec/models/ranking/fibinet.py +92 -30
- nextrec/models/ranking/fm.py +44 -8
- nextrec/models/ranking/masknet.py +7 -7
- nextrec/models/ranking/pnn.py +105 -38
- nextrec/models/ranking/widedeep.py +8 -4
- nextrec/models/ranking/xdeepfm.py +57 -10
- nextrec/utils/config.py +15 -3
- nextrec/utils/file.py +2 -1
- nextrec/utils/initializer.py +12 -16
- nextrec/utils/model.py +22 -0
- {nextrec-0.4.2.dist-info → nextrec-0.4.4.dist-info}/METADATA +57 -22
- {nextrec-0.4.2.dist-info → nextrec-0.4.4.dist-info}/RECORD +34 -34
- {nextrec-0.4.2.dist-info → nextrec-0.4.4.dist-info}/WHEEL +0 -0
- {nextrec-0.4.2.dist-info → nextrec-0.4.4.dist-info}/entry_points.txt +0 -0
- {nextrec-0.4.2.dist-info → nextrec-0.4.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -46,7 +46,8 @@ from nextrec.basic.features import DenseFeature, SequenceFeature, SparseFeature
|
|
|
46
46
|
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
47
47
|
from nextrec.basic.activation import activation_layer
|
|
48
48
|
from nextrec.basic.model import BaseModel
|
|
49
|
-
|
|
49
|
+
|
|
50
|
+
from nextrec.utils.model import select_features
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
class POSOGate(nn.Module):
|
|
@@ -295,15 +296,18 @@ class POSO(BaseModel):
|
|
|
295
296
|
|
|
296
297
|
def __init__(
|
|
297
298
|
self,
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
299
|
+
dense_features: list[DenseFeature] | None,
|
|
300
|
+
sparse_features: list[SparseFeature] | None,
|
|
301
|
+
sequence_features: list[SequenceFeature] | None,
|
|
302
|
+
main_dense_features: list[str] | None,
|
|
303
|
+
main_sparse_features: list[str] | None,
|
|
304
|
+
main_sequence_features: list[str] | None,
|
|
305
|
+
pc_dense_features: list[str] | None,
|
|
306
|
+
pc_sparse_features: list[str] | None,
|
|
307
|
+
pc_sequence_features: list[str] | None,
|
|
304
308
|
tower_params_list: list[dict],
|
|
305
309
|
target: list[str],
|
|
306
|
-
task: str | list[str]
|
|
310
|
+
task: str | list[str] = "binary",
|
|
307
311
|
architecture: str = "mlp",
|
|
308
312
|
# POSO gating defaults
|
|
309
313
|
gate_hidden_dim: int = 32,
|
|
@@ -329,40 +333,32 @@ class POSO(BaseModel):
|
|
|
329
333
|
dense_l2_reg: float = 1e-4,
|
|
330
334
|
**kwargs,
|
|
331
335
|
):
|
|
332
|
-
# Keep explicit copies of main and PC features
|
|
333
|
-
self.main_dense_features = list(main_dense_features or [])
|
|
334
|
-
self.main_sparse_features = list(main_sparse_features or [])
|
|
335
|
-
self.main_sequence_features = list(main_sequence_features or [])
|
|
336
|
-
self.pc_dense_features = list(pc_dense_features or [])
|
|
337
|
-
self.pc_sparse_features = list(pc_sparse_features or [])
|
|
338
|
-
self.pc_sequence_features = list(pc_sequence_features or [])
|
|
339
336
|
self.num_tasks = len(target)
|
|
340
337
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
):
|
|
338
|
+
# Normalize task to match num_tasks
|
|
339
|
+
resolved_task = task
|
|
340
|
+
if resolved_task is None:
|
|
341
|
+
resolved_task = self.default_task
|
|
342
|
+
elif isinstance(resolved_task, str):
|
|
343
|
+
resolved_task = [resolved_task] * self.num_tasks
|
|
344
|
+
elif len(resolved_task) == 1 and self.num_tasks > 1:
|
|
345
|
+
resolved_task = resolved_task * self.num_tasks
|
|
346
|
+
elif len(resolved_task) != self.num_tasks:
|
|
346
347
|
raise ValueError(
|
|
347
|
-
"
|
|
348
|
+
f"Length of task ({len(resolved_task)}) must match number of targets ({self.num_tasks})."
|
|
348
349
|
)
|
|
349
350
|
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
self.main_sparse_features, self.pc_sparse_features
|
|
355
|
-
)
|
|
356
|
-
sequence_features = merge_features(
|
|
357
|
-
self.main_sequence_features, self.pc_sequence_features
|
|
358
|
-
)
|
|
351
|
+
if len(tower_params_list) != self.num_tasks:
|
|
352
|
+
raise ValueError(
|
|
353
|
+
f"Number of tower params ({len(tower_params_list)}) must match number of tasks ({self.num_tasks})"
|
|
354
|
+
)
|
|
359
355
|
|
|
360
356
|
super().__init__(
|
|
361
357
|
dense_features=dense_features,
|
|
362
358
|
sparse_features=sparse_features,
|
|
363
359
|
sequence_features=sequence_features,
|
|
364
360
|
target=target,
|
|
365
|
-
task=
|
|
361
|
+
task=resolved_task,
|
|
366
362
|
device=device,
|
|
367
363
|
embedding_l1_reg=embedding_l1_reg,
|
|
368
364
|
dense_l1_reg=dense_l1_reg,
|
|
@@ -371,14 +367,42 @@ class POSO(BaseModel):
|
|
|
371
367
|
**kwargs,
|
|
372
368
|
)
|
|
373
369
|
|
|
374
|
-
self.
|
|
370
|
+
self.main_dense_feature_names = list(main_dense_features or [])
|
|
371
|
+
self.main_sparse_feature_names = list(main_sparse_features or [])
|
|
372
|
+
self.main_sequence_feature_names = list(main_sequence_features or [])
|
|
373
|
+
self.pc_dense_feature_names = list(pc_dense_features or [])
|
|
374
|
+
self.pc_sparse_feature_names = list(pc_sparse_features or [])
|
|
375
|
+
self.pc_sequence_feature_names = list(pc_sequence_features or [])
|
|
376
|
+
|
|
377
|
+
if loss is None:
|
|
378
|
+
self.loss = "bce"
|
|
379
|
+
self.loss = loss
|
|
380
|
+
|
|
375
381
|
optimizer_params = optimizer_params or {}
|
|
376
382
|
|
|
377
|
-
self.
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
383
|
+
self.main_dense_features = select_features(
|
|
384
|
+
self.dense_features, self.main_dense_feature_names, "main_dense_features"
|
|
385
|
+
)
|
|
386
|
+
self.main_sparse_features = select_features(
|
|
387
|
+
self.sparse_features, self.main_sparse_feature_names, "main_sparse_features"
|
|
388
|
+
)
|
|
389
|
+
self.main_sequence_features = select_features(
|
|
390
|
+
self.sequence_features,
|
|
391
|
+
self.main_sequence_feature_names,
|
|
392
|
+
"main_sequence_features",
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
self.pc_dense_features = select_features(
|
|
396
|
+
self.dense_features, self.pc_dense_feature_names, "pc_dense_features"
|
|
397
|
+
)
|
|
398
|
+
self.pc_sparse_features = select_features(
|
|
399
|
+
self.sparse_features, self.pc_sparse_feature_names, "pc_sparse_features"
|
|
400
|
+
)
|
|
401
|
+
self.pc_sequence_features = select_features(
|
|
402
|
+
self.sequence_features,
|
|
403
|
+
self.pc_sequence_feature_names,
|
|
404
|
+
"pc_sequence_features",
|
|
405
|
+
)
|
|
382
406
|
|
|
383
407
|
self.main_features = (
|
|
384
408
|
self.main_dense_features
|
|
@@ -389,6 +413,13 @@ class POSO(BaseModel):
|
|
|
389
413
|
self.pc_dense_features + self.pc_sparse_features + self.pc_sequence_features
|
|
390
414
|
)
|
|
391
415
|
|
|
416
|
+
if not self.main_features:
|
|
417
|
+
raise ValueError("POSO requires at least one main feature.")
|
|
418
|
+
if not self.pc_features:
|
|
419
|
+
raise ValueError(
|
|
420
|
+
"POSO requires at least one PC feature for personalization."
|
|
421
|
+
)
|
|
422
|
+
|
|
392
423
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
393
424
|
self.main_input_dim = self.embedding.get_input_dim(self.main_features)
|
|
394
425
|
self.pc_input_dim = self.embedding.get_input_dim(self.pc_features)
|
|
@@ -69,7 +69,7 @@ class ShareBottom(BaseModel):
|
|
|
69
69
|
target: list[str],
|
|
70
70
|
task: str | list[str] | None = None,
|
|
71
71
|
optimizer: str = "adam",
|
|
72
|
-
optimizer_params: dict =
|
|
72
|
+
optimizer_params: dict | None = None,
|
|
73
73
|
loss: str | nn.Module | list[str | nn.Module] | None = "bce",
|
|
74
74
|
loss_params: dict | list[dict] | None = None,
|
|
75
75
|
device: str = "cpu",
|
|
@@ -80,14 +80,28 @@ class ShareBottom(BaseModel):
|
|
|
80
80
|
**kwargs,
|
|
81
81
|
):
|
|
82
82
|
|
|
83
|
+
optimizer_params = optimizer_params or {}
|
|
84
|
+
|
|
83
85
|
self.num_tasks = len(target)
|
|
84
86
|
|
|
87
|
+
resolved_task = task
|
|
88
|
+
if resolved_task is None:
|
|
89
|
+
resolved_task = self.default_task
|
|
90
|
+
elif isinstance(resolved_task, str):
|
|
91
|
+
resolved_task = [resolved_task] * self.num_tasks
|
|
92
|
+
elif len(resolved_task) == 1 and self.num_tasks > 1:
|
|
93
|
+
resolved_task = resolved_task * self.num_tasks
|
|
94
|
+
elif len(resolved_task) != self.num_tasks:
|
|
95
|
+
raise ValueError(
|
|
96
|
+
f"Length of task ({len(resolved_task)}) must match number of targets ({self.num_tasks})."
|
|
97
|
+
)
|
|
98
|
+
|
|
85
99
|
super(ShareBottom, self).__init__(
|
|
86
100
|
dense_features=dense_features,
|
|
87
101
|
sparse_features=sparse_features,
|
|
88
102
|
sequence_features=sequence_features,
|
|
89
103
|
target=target,
|
|
90
|
-
task=
|
|
104
|
+
task=resolved_task,
|
|
91
105
|
device=device,
|
|
92
106
|
embedding_l1_reg=embedding_l1_reg,
|
|
93
107
|
dense_l1_reg=dense_l1_reg,
|
nextrec/models/ranking/afm.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 09/11/2025
|
|
3
|
-
Checkpoint: edit on
|
|
4
|
-
Author: Yang Zhou,zyaztec@gmail.com
|
|
3
|
+
Checkpoint: edit on 09/12/2025
|
|
4
|
+
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
6
|
[1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of
|
|
7
7
|
feature interactions via attention networks[C]//IJCAI. 2017: 3119-3125.
|
|
@@ -55,15 +55,15 @@ class AFM(BaseModel):
|
|
|
55
55
|
|
|
56
56
|
def __init__(
|
|
57
57
|
self,
|
|
58
|
-
dense_features: list[DenseFeature] |
|
|
59
|
-
sparse_features: list[SparseFeature] |
|
|
60
|
-
sequence_features: list[SequenceFeature] |
|
|
58
|
+
dense_features: list[DenseFeature] | None = None,
|
|
59
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
60
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
61
61
|
attention_dim: int = 32,
|
|
62
62
|
attention_dropout: float = 0.0,
|
|
63
|
-
target: list[str] |
|
|
63
|
+
target: list[str] | str | None = None,
|
|
64
64
|
task: str | list[str] | None = None,
|
|
65
65
|
optimizer: str = "adam",
|
|
66
|
-
optimizer_params: dict =
|
|
66
|
+
optimizer_params: dict | None = None,
|
|
67
67
|
loss: str | nn.Module | None = "bce",
|
|
68
68
|
loss_params: dict | list[dict] | None = None,
|
|
69
69
|
device: str = "cpu",
|
|
@@ -74,6 +74,13 @@ class AFM(BaseModel):
|
|
|
74
74
|
**kwargs,
|
|
75
75
|
):
|
|
76
76
|
|
|
77
|
+
dense_features = dense_features or []
|
|
78
|
+
sparse_features = sparse_features or []
|
|
79
|
+
sequence_features = sequence_features or []
|
|
80
|
+
optimizer_params = optimizer_params or {}
|
|
81
|
+
if loss is None:
|
|
82
|
+
loss = "bce"
|
|
83
|
+
|
|
77
84
|
super(AFM, self).__init__(
|
|
78
85
|
dense_features=dense_features,
|
|
79
86
|
sparse_features=sparse_features,
|
|
@@ -88,13 +95,6 @@ class AFM(BaseModel):
|
|
|
88
95
|
**kwargs,
|
|
89
96
|
)
|
|
90
97
|
|
|
91
|
-
if target is None:
|
|
92
|
-
target = []
|
|
93
|
-
if optimizer_params is None:
|
|
94
|
-
optimizer_params = {}
|
|
95
|
-
if loss is None:
|
|
96
|
-
loss = "bce"
|
|
97
|
-
|
|
98
98
|
self.fm_features = sparse_features + sequence_features
|
|
99
99
|
if len(self.fm_features) < 2:
|
|
100
100
|
raise ValueError(
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 09/11/2025
|
|
3
|
-
Checkpoint: edit on
|
|
4
|
-
Author: Yang Zhou,zyaztec@gmail.com
|
|
3
|
+
Checkpoint: edit on 09/12/2025
|
|
4
|
+
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
6
|
[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
|
|
7
7
|
self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
|
nextrec/models/ranking/dcn.py
CHANGED
|
@@ -1,11 +1,53 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 09/11/2025
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
Checkpoint: edit on 09/12/2025
|
|
4
|
+
Author: Yang Zhou, zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
[1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]
|
|
7
|
+
//Proceedings of the ADKDD'17. 2017: 1-7.
|
|
8
|
+
(https://arxiv.org/abs/1708.05123)
|
|
9
|
+
|
|
10
|
+
Deep & Cross Network (DCN) mixes explicit polynomial feature crosses with a deep
|
|
11
|
+
MLP branch to capture both low-order and high-order interactions for CTR-style
|
|
12
|
+
tasks. Cross Layers repeatedly apply x_{l+1} = x0 * (w_l^T x_l) + b_l + x_l,
|
|
13
|
+
which expands feature crosses with linear parameter growth, while the deep branch
|
|
14
|
+
learns nonlinear patterns on the same shared embeddings. The final prediction
|
|
15
|
+
concatenates (or solely uses) cross outputs before a linear head, offering a
|
|
16
|
+
balanced trade-off between interpretability and expressiveness.
|
|
17
|
+
|
|
18
|
+
Workflow:
|
|
19
|
+
(1) Embed sparse/sequence features and concatenate with dense inputs
|
|
20
|
+
(2) Cross Network builds explicit polynomial interactions via residual crosses
|
|
21
|
+
(3) Optional MLP models implicit high-order nonlinear relationships
|
|
22
|
+
(4) Cross output (and deep output if enabled) are fused for the final logit
|
|
23
|
+
(5) Prediction layer maps logits to binary CTR scores
|
|
24
|
+
|
|
25
|
+
Key Advantages:
|
|
26
|
+
- Explicit, low-cost cross features with O(L * d) parameters
|
|
27
|
+
- Residual cross formulation stabilizes optimization
|
|
28
|
+
- Optional deep tower increases capacity without losing interpretability
|
|
29
|
+
- Shared embeddings reduce redundant parameters and preprocessing
|
|
30
|
+
- Strong, simple baseline for ad/recommendation ranking tasks
|
|
31
|
+
|
|
32
|
+
DCN(Deep & Cross Network)通过 Cross 层显式生成多项式特征交互,同时可选 Deep
|
|
33
|
+
分支学习高阶非线性关系,两者共享 embedding。Cross 层按
|
|
34
|
+
x_{l+1} = x0 * (w_l^T x_l) + b_l + x_l 递推,参数线性增长且具解释性;
|
|
35
|
+
Deep 分支提升表达能力;最终将 Cross(及 Deep)结果送入线性层与预测层,形成兼具
|
|
36
|
+
效率与效果的 CTR/CVR 预估模型。
|
|
37
|
+
|
|
38
|
+
流程:
|
|
39
|
+
(1) 对稀疏/序列特征做 embedding,并与稠密特征拼接
|
|
40
|
+
(2) Cross 层以残差形式显式构造多阶交叉特征
|
|
41
|
+
(3) 可选 MLP 学习隐式高阶非线性交互
|
|
42
|
+
(4) 将 Cross(及 Deep)输出融合后接线性头得到 logit
|
|
43
|
+
(5) 预测层输出二分类 CTR 分数
|
|
44
|
+
|
|
45
|
+
主要优点:
|
|
46
|
+
- 显式交叉特征、参数线性增长、易解释
|
|
47
|
+
- 残差式 Cross 提升训练稳定性
|
|
48
|
+
- Deep 分支可灵活增强模型容量
|
|
49
|
+
- 共享 embedding,减少冗余参数与预处理
|
|
50
|
+
- CTR/CVR 排序任务的简洁强基线
|
|
9
51
|
"""
|
|
10
52
|
|
|
11
53
|
import torch
|
|
@@ -48,15 +90,15 @@ class DCN(BaseModel):
|
|
|
48
90
|
|
|
49
91
|
def __init__(
|
|
50
92
|
self,
|
|
51
|
-
dense_features: list[DenseFeature],
|
|
52
|
-
sparse_features: list[SparseFeature],
|
|
53
|
-
sequence_features: list[SequenceFeature],
|
|
93
|
+
dense_features: list[DenseFeature] | None = None,
|
|
94
|
+
sparse_features: list[SparseFeature] | None = None,
|
|
95
|
+
sequence_features: list[SequenceFeature] | None = None,
|
|
54
96
|
cross_num: int = 3,
|
|
55
97
|
mlp_params: dict | None = None,
|
|
56
|
-
target: list[str] =
|
|
98
|
+
target: list[str] | str | None = None,
|
|
57
99
|
task: str | list[str] | None = None,
|
|
58
100
|
optimizer: str = "adam",
|
|
59
|
-
optimizer_params: dict =
|
|
101
|
+
optimizer_params: dict | None = None,
|
|
60
102
|
loss: str | nn.Module | None = "bce",
|
|
61
103
|
loss_params: dict | list[dict] | None = None,
|
|
62
104
|
device: str = "cpu",
|
|
@@ -67,6 +109,13 @@ class DCN(BaseModel):
|
|
|
67
109
|
**kwargs,
|
|
68
110
|
):
|
|
69
111
|
|
|
112
|
+
dense_features = dense_features or []
|
|
113
|
+
sparse_features = sparse_features or []
|
|
114
|
+
sequence_features = sequence_features or []
|
|
115
|
+
optimizer_params = optimizer_params or {}
|
|
116
|
+
if loss is None:
|
|
117
|
+
loss = "bce"
|
|
118
|
+
|
|
70
119
|
super(DCN, self).__init__(
|
|
71
120
|
dense_features=dense_features,
|
|
72
121
|
sparse_features=sparse_features,
|
|
@@ -81,13 +130,6 @@ class DCN(BaseModel):
|
|
|
81
130
|
**kwargs,
|
|
82
131
|
)
|
|
83
132
|
|
|
84
|
-
self.loss = loss
|
|
85
|
-
if self.loss is None:
|
|
86
|
-
self.loss = "bce"
|
|
87
|
-
|
|
88
|
-
# All features
|
|
89
|
-
self.all_features = dense_features + sparse_features + sequence_features
|
|
90
|
-
|
|
91
133
|
# Embedding layer
|
|
92
134
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
93
135
|
|
|
@@ -104,10 +146,10 @@ class DCN(BaseModel):
|
|
|
104
146
|
)
|
|
105
147
|
input_dim = emb_dim_total + dense_input_dim
|
|
106
148
|
|
|
107
|
-
# Cross Network
|
|
149
|
+
# Cross Network for explicit feature crosses
|
|
108
150
|
self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
|
|
109
151
|
|
|
110
|
-
# Deep Network
|
|
152
|
+
# Deep Network for implicit high-order interactions
|
|
111
153
|
if mlp_params is not None:
|
|
112
154
|
self.use_dnn = True
|
|
113
155
|
self.mlp = MLP(input_dim=input_dim, **mlp_params)
|