nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +1 -1
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -5
- nextrec/basic/callback.py +1 -0
- nextrec/basic/features.py +30 -22
- nextrec/basic/layers.py +244 -113
- nextrec/basic/loggers.py +62 -43
- nextrec/basic/metrics.py +268 -119
- nextrec/basic/model.py +1373 -443
- nextrec/basic/session.py +10 -3
- nextrec/cli.py +498 -0
- nextrec/data/__init__.py +19 -25
- nextrec/data/batch_utils.py +11 -3
- nextrec/data/data_processing.py +42 -24
- nextrec/data/data_utils.py +26 -15
- nextrec/data/dataloader.py +303 -96
- nextrec/data/preprocessor.py +320 -199
- nextrec/loss/listwise.py +17 -9
- nextrec/loss/loss_utils.py +7 -8
- nextrec/loss/pairwise.py +2 -0
- nextrec/loss/pointwise.py +30 -12
- nextrec/models/generative/hstu.py +106 -40
- nextrec/models/match/dssm.py +82 -69
- nextrec/models/match/dssm_v2.py +72 -58
- nextrec/models/match/mind.py +175 -108
- nextrec/models/match/sdm.py +104 -88
- nextrec/models/match/youtube_dnn.py +73 -60
- nextrec/models/multi_task/esmm.py +53 -39
- nextrec/models/multi_task/mmoe.py +70 -47
- nextrec/models/multi_task/ple.py +107 -50
- nextrec/models/multi_task/poso.py +121 -41
- nextrec/models/multi_task/share_bottom.py +54 -38
- nextrec/models/ranking/afm.py +172 -45
- nextrec/models/ranking/autoint.py +84 -61
- nextrec/models/ranking/dcn.py +59 -42
- nextrec/models/ranking/dcn_v2.py +64 -23
- nextrec/models/ranking/deepfm.py +36 -26
- nextrec/models/ranking/dien.py +158 -102
- nextrec/models/ranking/din.py +88 -60
- nextrec/models/ranking/fibinet.py +55 -35
- nextrec/models/ranking/fm.py +32 -26
- nextrec/models/ranking/masknet.py +95 -34
- nextrec/models/ranking/pnn.py +34 -31
- nextrec/models/ranking/widedeep.py +37 -29
- nextrec/models/ranking/xdeepfm.py +63 -41
- nextrec/utils/__init__.py +61 -32
- nextrec/utils/config.py +490 -0
- nextrec/utils/device.py +52 -12
- nextrec/utils/distributed.py +141 -0
- nextrec/utils/embedding.py +1 -0
- nextrec/utils/feature.py +1 -0
- nextrec/utils/file.py +32 -11
- nextrec/utils/initializer.py +61 -16
- nextrec/utils/optimizer.py +25 -9
- nextrec/utils/synthetic_data.py +531 -0
- nextrec/utils/tensor.py +24 -13
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
- nextrec-0.4.2.dist-info/RECORD +69 -0
- nextrec-0.4.2.dist-info/entry_points.txt +2 -0
- nextrec-0.3.6.dist-info/RECORD +0 -64
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
nextrec/models/ranking/dcn_v2.py
CHANGED
|
@@ -5,25 +5,30 @@ Date: create on 09/11/2025
|
|
|
5
5
|
import torch
|
|
6
6
|
import torch.nn as nn
|
|
7
7
|
|
|
8
|
-
from nextrec.basic.model import BaseModel
|
|
9
|
-
from nextrec.basic.layers import EmbeddingLayer, MLP, PredictionLayer
|
|
10
|
-
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
11
8
|
|
|
12
9
|
class CrossNetV2(nn.Module):
|
|
13
10
|
"""Vector-wise cross network proposed in DCN V2 (Wang et al., 2021)."""
|
|
11
|
+
|
|
14
12
|
def __init__(self, input_dim, num_layers):
|
|
15
13
|
super().__init__()
|
|
16
14
|
self.num_layers = num_layers
|
|
17
|
-
self.w = torch.nn.ModuleList(
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
self.w = torch.nn.ModuleList(
|
|
16
|
+
[
|
|
17
|
+
torch.nn.Linear(input_dim, input_dim, bias=False)
|
|
18
|
+
for _ in range(num_layers)
|
|
19
|
+
]
|
|
20
|
+
)
|
|
21
|
+
self.b = torch.nn.ParameterList(
|
|
22
|
+
[torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)]
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
def forward(self, x):
|
|
22
26
|
x0 = x
|
|
23
27
|
for i in range(self.num_layers):
|
|
24
|
-
x =x0*self.w[i](x) + self.b[i] + x
|
|
28
|
+
x = x0 * self.w[i](x) + self.b[i] + x
|
|
25
29
|
return x
|
|
26
|
-
|
|
30
|
+
|
|
31
|
+
|
|
27
32
|
class CrossNetMix(nn.Module):
|
|
28
33
|
"""Mixture of low-rank cross experts from DCN V2 (Wang et al., 2021)."""
|
|
29
34
|
|
|
@@ -33,18 +38,46 @@ class CrossNetMix(nn.Module):
|
|
|
33
38
|
self.num_experts = num_experts
|
|
34
39
|
|
|
35
40
|
# U: (input_dim, low_rank)
|
|
36
|
-
self.u_list = torch.nn.ParameterList(
|
|
37
|
-
|
|
41
|
+
self.u_list = torch.nn.ParameterList(
|
|
42
|
+
[
|
|
43
|
+
nn.Parameter(
|
|
44
|
+
nn.init.xavier_normal_(
|
|
45
|
+
torch.empty(num_experts, input_dim, low_rank)
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
for i in range(self.num_layers)
|
|
49
|
+
]
|
|
50
|
+
)
|
|
38
51
|
# V: (input_dim, low_rank)
|
|
39
|
-
self.v_list = torch.nn.ParameterList(
|
|
40
|
-
|
|
52
|
+
self.v_list = torch.nn.ParameterList(
|
|
53
|
+
[
|
|
54
|
+
nn.Parameter(
|
|
55
|
+
nn.init.xavier_normal_(
|
|
56
|
+
torch.empty(num_experts, input_dim, low_rank)
|
|
57
|
+
)
|
|
58
|
+
)
|
|
59
|
+
for i in range(self.num_layers)
|
|
60
|
+
]
|
|
61
|
+
)
|
|
41
62
|
# C: (low_rank, low_rank)
|
|
42
|
-
self.c_list = torch.nn.ParameterList(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
63
|
+
self.c_list = torch.nn.ParameterList(
|
|
64
|
+
[
|
|
65
|
+
nn.Parameter(
|
|
66
|
+
nn.init.xavier_normal_(torch.empty(num_experts, low_rank, low_rank))
|
|
67
|
+
)
|
|
68
|
+
for i in range(self.num_layers)
|
|
69
|
+
]
|
|
70
|
+
)
|
|
71
|
+
self.gating = nn.ModuleList(
|
|
72
|
+
[nn.Linear(input_dim, 1, bias=False) for i in range(self.num_experts)]
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
self.bias = torch.nn.ParameterList(
|
|
76
|
+
[
|
|
77
|
+
nn.Parameter(nn.init.zeros_(torch.empty(input_dim, 1)))
|
|
78
|
+
for i in range(self.num_layers)
|
|
79
|
+
]
|
|
80
|
+
)
|
|
48
81
|
|
|
49
82
|
def forward(self, x):
|
|
50
83
|
x_0 = x.unsqueeze(2) # (bs, in_features, 1)
|
|
@@ -59,7 +92,9 @@ class CrossNetMix(nn.Module):
|
|
|
59
92
|
|
|
60
93
|
# (2) E(x_l)
|
|
61
94
|
# project the input x_l to $\mathbb{R}^{r}$
|
|
62
|
-
v_x = torch.matmul(
|
|
95
|
+
v_x = torch.matmul(
|
|
96
|
+
self.v_list[i][expert_id].t(), x_l
|
|
97
|
+
) # (bs, low_rank, 1)
|
|
63
98
|
|
|
64
99
|
# nonlinear activation in low rank space
|
|
65
100
|
v_x = torch.tanh(v_x)
|
|
@@ -67,7 +102,9 @@ class CrossNetMix(nn.Module):
|
|
|
67
102
|
v_x = torch.tanh(v_x)
|
|
68
103
|
|
|
69
104
|
# project back to $\mathbb{R}^{d}$
|
|
70
|
-
uv_x = torch.matmul(
|
|
105
|
+
uv_x = torch.matmul(
|
|
106
|
+
self.u_list[i][expert_id], v_x
|
|
107
|
+
) # (bs, in_features, 1)
|
|
71
108
|
|
|
72
109
|
dot_ = uv_x + self.bias[i]
|
|
73
110
|
dot_ = x_0 * dot_ # Hadamard-product
|
|
@@ -75,10 +112,14 @@ class CrossNetMix(nn.Module):
|
|
|
75
112
|
output_of_experts.append(dot_.squeeze(2))
|
|
76
113
|
|
|
77
114
|
# (3) mixture of low-rank experts
|
|
78
|
-
output_of_experts = torch.stack(
|
|
79
|
-
|
|
115
|
+
output_of_experts = torch.stack(
|
|
116
|
+
output_of_experts, 2
|
|
117
|
+
) # (bs, in_features, num_experts)
|
|
118
|
+
gating_score_experts = torch.stack(
|
|
119
|
+
gating_score_experts, 1
|
|
120
|
+
) # (bs, num_experts, 1)
|
|
80
121
|
moe_out = torch.matmul(output_of_experts, gating_score_experts.softmax(1))
|
|
81
122
|
x_l = moe_out + x_l # (bs, in_features, 1)
|
|
82
123
|
|
|
83
124
|
x_l = x_l.squeeze() # (bs, in_features)
|
|
84
|
-
return x_l
|
|
125
|
+
return x_l
|
nextrec/models/ranking/deepfm.py
CHANGED
|
@@ -43,57 +43,60 @@ embedding,无需手工构造交叉特征即可端到端训练,常用于 CTR/
|
|
|
43
43
|
- CTR/CVR 任务的常用强基线
|
|
44
44
|
"""
|
|
45
45
|
|
|
46
|
-
import torch
|
|
47
46
|
import torch.nn as nn
|
|
48
47
|
|
|
49
48
|
from nextrec.basic.model import BaseModel
|
|
50
49
|
from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
|
|
51
50
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
52
51
|
|
|
52
|
+
|
|
53
53
|
class DeepFM(BaseModel):
|
|
54
54
|
@property
|
|
55
55
|
def model_name(self):
|
|
56
56
|
return "DeepFM"
|
|
57
57
|
|
|
58
58
|
@property
|
|
59
|
-
def
|
|
59
|
+
def default_task(self):
|
|
60
60
|
return "binary"
|
|
61
|
-
|
|
62
|
-
def __init__(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
dense_features: list[DenseFeature] | list = [],
|
|
65
|
+
sparse_features: list[SparseFeature] | list = [],
|
|
66
|
+
sequence_features: list[SequenceFeature] | list = [],
|
|
67
|
+
mlp_params: dict = {},
|
|
68
|
+
target: list[str] | str = [],
|
|
69
|
+
task: str | list[str] | None = None,
|
|
70
|
+
optimizer: str = "adam",
|
|
71
|
+
optimizer_params: dict = {},
|
|
72
|
+
loss: str | nn.Module | None = "bce",
|
|
73
|
+
loss_params: dict | list[dict] | None = None,
|
|
74
|
+
device: str = "cpu",
|
|
75
|
+
embedding_l1_reg=1e-6,
|
|
76
|
+
dense_l1_reg=1e-5,
|
|
77
|
+
embedding_l2_reg=1e-5,
|
|
78
|
+
dense_l2_reg=1e-4,
|
|
79
|
+
**kwargs,
|
|
80
|
+
):
|
|
81
|
+
|
|
78
82
|
super(DeepFM, self).__init__(
|
|
79
83
|
dense_features=dense_features,
|
|
80
84
|
sparse_features=sparse_features,
|
|
81
85
|
sequence_features=sequence_features,
|
|
82
86
|
target=target,
|
|
83
|
-
task=self.
|
|
87
|
+
task=task or self.default_task,
|
|
84
88
|
device=device,
|
|
85
89
|
embedding_l1_reg=embedding_l1_reg,
|
|
86
90
|
dense_l1_reg=dense_l1_reg,
|
|
87
91
|
embedding_l2_reg=embedding_l2_reg,
|
|
88
92
|
dense_l2_reg=dense_l2_reg,
|
|
89
|
-
|
|
90
|
-
**kwargs
|
|
93
|
+
**kwargs,
|
|
91
94
|
)
|
|
92
95
|
|
|
93
96
|
self.loss = loss
|
|
94
97
|
if self.loss is None:
|
|
95
98
|
self.loss = "bce"
|
|
96
|
-
|
|
99
|
+
|
|
97
100
|
self.fm_features = sparse_features + sequence_features
|
|
98
101
|
self.deep_features = dense_features + sparse_features + sequence_features
|
|
99
102
|
self.embedding = EmbeddingLayer(features=self.deep_features)
|
|
@@ -104,11 +107,18 @@ class DeepFM(BaseModel):
|
|
|
104
107
|
self.linear = LR(fm_emb_dim_total)
|
|
105
108
|
self.fm = FM(reduce_sum=True)
|
|
106
109
|
self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
|
|
107
|
-
self.prediction_layer = PredictionLayer(task_type=self.
|
|
110
|
+
self.prediction_layer = PredictionLayer(task_type=self.default_task)
|
|
108
111
|
|
|
109
112
|
# Register regularization weights
|
|
110
|
-
self.register_regularization_weights(
|
|
111
|
-
|
|
113
|
+
self.register_regularization_weights(
|
|
114
|
+
embedding_attr="embedding", include_modules=["linear", "mlp"]
|
|
115
|
+
)
|
|
116
|
+
self.compile(
|
|
117
|
+
optimizer=optimizer,
|
|
118
|
+
optimizer_params=optimizer_params,
|
|
119
|
+
loss=loss,
|
|
120
|
+
loss_params=loss_params,
|
|
121
|
+
)
|
|
112
122
|
|
|
113
123
|
def forward(self, x):
|
|
114
124
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|