nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +4 -4
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -9
- nextrec/basic/callback.py +1 -0
- nextrec/basic/dataloader.py +168 -127
- nextrec/basic/features.py +24 -27
- nextrec/basic/layers.py +328 -159
- nextrec/basic/loggers.py +50 -37
- nextrec/basic/metrics.py +255 -147
- nextrec/basic/model.py +817 -462
- nextrec/data/__init__.py +5 -5
- nextrec/data/data_utils.py +16 -12
- nextrec/data/preprocessor.py +276 -252
- nextrec/loss/__init__.py +12 -12
- nextrec/loss/loss_utils.py +30 -22
- nextrec/loss/match_losses.py +116 -83
- nextrec/models/match/__init__.py +5 -5
- nextrec/models/match/dssm.py +70 -61
- nextrec/models/match/dssm_v2.py +61 -51
- nextrec/models/match/mind.py +89 -71
- nextrec/models/match/sdm.py +93 -81
- nextrec/models/match/youtube_dnn.py +62 -53
- nextrec/models/multi_task/esmm.py +49 -43
- nextrec/models/multi_task/mmoe.py +65 -56
- nextrec/models/multi_task/ple.py +92 -65
- nextrec/models/multi_task/share_bottom.py +48 -42
- nextrec/models/ranking/__init__.py +7 -7
- nextrec/models/ranking/afm.py +39 -30
- nextrec/models/ranking/autoint.py +70 -57
- nextrec/models/ranking/dcn.py +43 -35
- nextrec/models/ranking/deepfm.py +34 -28
- nextrec/models/ranking/dien.py +115 -79
- nextrec/models/ranking/din.py +84 -60
- nextrec/models/ranking/fibinet.py +51 -35
- nextrec/models/ranking/fm.py +28 -26
- nextrec/models/ranking/masknet.py +31 -31
- nextrec/models/ranking/pnn.py +30 -31
- nextrec/models/ranking/widedeep.py +36 -31
- nextrec/models/ranking/xdeepfm.py +46 -39
- nextrec/utils/__init__.py +9 -9
- nextrec/utils/embedding.py +1 -1
- nextrec/utils/initializer.py +23 -15
- nextrec/utils/optimizer.py +14 -10
- {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
- nextrec-0.1.2.dist-info/RECORD +51 -0
- nextrec-0.1.1.dist-info/RECORD +0 -51
- {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
- {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Author:
|
|
4
4
|
Yang Zhou,zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
|
|
7
|
-
self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
|
|
6
|
+
[1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
|
|
7
|
+
self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
|
|
8
8
|
on information and knowledge management. 2019: 1161-1170.
|
|
9
9
|
(https://arxiv.org/abs/1810.11921)
|
|
10
10
|
"""
|
|
@@ -25,27 +25,29 @@ class AutoInt(BaseModel):
|
|
|
25
25
|
@property
|
|
26
26
|
def task_type(self):
|
|
27
27
|
return "binary"
|
|
28
|
-
|
|
29
|
-
def __init__(
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
dense_features: list[DenseFeature],
|
|
32
|
+
sparse_features: list[SparseFeature],
|
|
33
|
+
sequence_features: list[SequenceFeature],
|
|
34
|
+
att_layer_num: int = 3,
|
|
35
|
+
att_embedding_dim: int = 8,
|
|
36
|
+
att_head_num: int = 2,
|
|
37
|
+
att_dropout: float = 0.0,
|
|
38
|
+
att_use_residual: bool = True,
|
|
39
|
+
target: list[str] = [],
|
|
40
|
+
optimizer: str = "adam",
|
|
41
|
+
optimizer_params: dict = {},
|
|
42
|
+
loss: str | nn.Module | None = "bce",
|
|
43
|
+
device: str = "cpu",
|
|
44
|
+
model_id: str = "baseline",
|
|
45
|
+
embedding_l1_reg=1e-6,
|
|
46
|
+
dense_l1_reg=1e-5,
|
|
47
|
+
embedding_l2_reg=1e-5,
|
|
48
|
+
dense_l2_reg=1e-4,
|
|
49
|
+
):
|
|
50
|
+
|
|
49
51
|
super(AutoInt, self).__init__(
|
|
50
52
|
dense_features=dense_features,
|
|
51
53
|
sparse_features=sparse_features,
|
|
@@ -58,83 +60,94 @@ class AutoInt(BaseModel):
|
|
|
58
60
|
embedding_l2_reg=embedding_l2_reg,
|
|
59
61
|
dense_l2_reg=dense_l2_reg,
|
|
60
62
|
early_stop_patience=20,
|
|
61
|
-
model_id=model_id
|
|
63
|
+
model_id=model_id,
|
|
62
64
|
)
|
|
63
65
|
|
|
64
66
|
self.loss = loss
|
|
65
67
|
if self.loss is None:
|
|
66
68
|
self.loss = "bce"
|
|
67
|
-
|
|
69
|
+
|
|
68
70
|
self.att_layer_num = att_layer_num
|
|
69
71
|
self.att_embedding_dim = att_embedding_dim
|
|
70
|
-
|
|
72
|
+
|
|
71
73
|
# Use sparse and sequence features for interaction
|
|
72
74
|
self.interaction_features = sparse_features + sequence_features
|
|
73
|
-
|
|
75
|
+
|
|
74
76
|
# All features for embedding
|
|
75
77
|
self.all_features = dense_features + sparse_features + sequence_features
|
|
76
78
|
|
|
77
79
|
# Embedding layer
|
|
78
80
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
79
|
-
|
|
81
|
+
|
|
80
82
|
# Project embeddings to attention embedding dimension
|
|
81
83
|
num_fields = len(self.interaction_features)
|
|
82
84
|
total_embedding_dim = sum([f.embedding_dim for f in self.interaction_features])
|
|
83
|
-
|
|
85
|
+
|
|
84
86
|
# If embeddings have different dimensions, project them to att_embedding_dim
|
|
85
|
-
self.need_projection = not all(
|
|
87
|
+
self.need_projection = not all(
|
|
88
|
+
f.embedding_dim == att_embedding_dim for f in self.interaction_features
|
|
89
|
+
)
|
|
86
90
|
self.projection_layers = None
|
|
87
91
|
if self.need_projection:
|
|
88
|
-
self.projection_layers = nn.ModuleList(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
92
|
+
self.projection_layers = nn.ModuleList(
|
|
93
|
+
[
|
|
94
|
+
nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
|
|
95
|
+
for f in self.interaction_features
|
|
96
|
+
]
|
|
97
|
+
)
|
|
98
|
+
|
|
93
99
|
# Multi-head self-attention layers
|
|
94
|
-
self.attention_layers = nn.ModuleList(
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
self.attention_layers = nn.ModuleList(
|
|
101
|
+
[
|
|
102
|
+
MultiHeadSelfAttention(
|
|
103
|
+
embedding_dim=att_embedding_dim,
|
|
104
|
+
num_heads=att_head_num,
|
|
105
|
+
dropout=att_dropout,
|
|
106
|
+
use_residual=att_use_residual,
|
|
107
|
+
)
|
|
108
|
+
for _ in range(att_layer_num)
|
|
109
|
+
]
|
|
110
|
+
)
|
|
111
|
+
|
|
103
112
|
# Final prediction layer
|
|
104
113
|
self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
|
|
105
114
|
self.prediction_layer = PredictionLayer(task_type=self.task_type)
|
|
106
115
|
|
|
107
116
|
# Register regularization weights
|
|
108
117
|
self._register_regularization_weights(
|
|
109
|
-
embedding_attr=
|
|
110
|
-
include_modules=[
|
|
118
|
+
embedding_attr="embedding",
|
|
119
|
+
include_modules=["projection_layers", "attention_layers", "fc"],
|
|
111
120
|
)
|
|
112
121
|
|
|
113
|
-
self.compile(
|
|
114
|
-
optimizer=optimizer,
|
|
115
|
-
optimizer_params=optimizer_params,
|
|
116
|
-
loss=loss
|
|
117
|
-
)
|
|
122
|
+
self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
|
|
118
123
|
|
|
119
124
|
def forward(self, x):
|
|
120
125
|
# Get embeddings field-by-field so mixed dimensions can be projected safely
|
|
121
126
|
field_embeddings = []
|
|
122
127
|
if len(self.interaction_features) == 0:
|
|
123
|
-
raise ValueError(
|
|
128
|
+
raise ValueError(
|
|
129
|
+
"AutoInt requires at least one sparse or sequence feature for interactions."
|
|
130
|
+
)
|
|
124
131
|
for idx, feature in enumerate(self.interaction_features):
|
|
125
132
|
feature_emb = self.embedding(x=x, features=[feature], squeeze_dim=False)
|
|
126
133
|
feature_emb = feature_emb.squeeze(1) # [B, embedding_dim]
|
|
127
134
|
if self.need_projection and self.projection_layers is not None:
|
|
128
135
|
feature_emb = self.projection_layers[idx](feature_emb)
|
|
129
|
-
field_embeddings.append(
|
|
136
|
+
field_embeddings.append(
|
|
137
|
+
feature_emb.unsqueeze(1)
|
|
138
|
+
) # [B, 1, att_embedding_dim or original_dim]
|
|
130
139
|
embeddings = torch.cat(field_embeddings, dim=1)
|
|
131
|
-
|
|
140
|
+
|
|
132
141
|
# Apply multi-head self-attention layers
|
|
133
142
|
attention_output = embeddings
|
|
134
143
|
for att_layer in self.attention_layers:
|
|
135
|
-
attention_output = att_layer(
|
|
136
|
-
|
|
144
|
+
attention_output = att_layer(
|
|
145
|
+
attention_output
|
|
146
|
+
) # [B, num_fields, att_embedding_dim]
|
|
147
|
+
|
|
137
148
|
# Flatten and predict
|
|
138
|
-
attention_output_flat = attention_output.flatten(
|
|
149
|
+
attention_output_flat = attention_output.flatten(
|
|
150
|
+
start_dim=1
|
|
151
|
+
) # [B, num_fields * att_embedding_dim]
|
|
139
152
|
y = self.fc(attention_output_flat) # [B, 1]
|
|
140
153
|
return self.prediction_layer(y)
|
nextrec/models/ranking/dcn.py
CHANGED
|
@@ -24,24 +24,26 @@ class DCN(BaseModel):
|
|
|
24
24
|
@property
|
|
25
25
|
def task_type(self):
|
|
26
26
|
return "binary"
|
|
27
|
-
|
|
28
|
-
def __init__(
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
dense_features: list[DenseFeature],
|
|
31
|
+
sparse_features: list[SparseFeature],
|
|
32
|
+
sequence_features: list[SequenceFeature],
|
|
33
|
+
cross_num: int = 3,
|
|
34
|
+
mlp_params: dict | None = None,
|
|
35
|
+
target: list[str] = [],
|
|
36
|
+
optimizer: str = "adam",
|
|
37
|
+
optimizer_params: dict = {},
|
|
38
|
+
loss: str | nn.Module | None = "bce",
|
|
39
|
+
device: str = "cpu",
|
|
40
|
+
model_id: str = "baseline",
|
|
41
|
+
embedding_l1_reg=1e-6,
|
|
42
|
+
dense_l1_reg=1e-5,
|
|
43
|
+
embedding_l2_reg=1e-5,
|
|
44
|
+
dense_l2_reg=1e-4,
|
|
45
|
+
):
|
|
46
|
+
|
|
45
47
|
super(DCN, self).__init__(
|
|
46
48
|
dense_features=dense_features,
|
|
47
49
|
sparse_features=sparse_features,
|
|
@@ -54,13 +56,13 @@ class DCN(BaseModel):
|
|
|
54
56
|
embedding_l2_reg=embedding_l2_reg,
|
|
55
57
|
dense_l2_reg=dense_l2_reg,
|
|
56
58
|
early_stop_patience=20,
|
|
57
|
-
model_id=model_id
|
|
59
|
+
model_id=model_id,
|
|
58
60
|
)
|
|
59
61
|
|
|
60
62
|
self.loss = loss
|
|
61
63
|
if self.loss is None:
|
|
62
64
|
self.loss = "bce"
|
|
63
|
-
|
|
65
|
+
|
|
64
66
|
# All features
|
|
65
67
|
self.all_features = dense_features + sparse_features + sequence_features
|
|
66
68
|
|
|
@@ -68,13 +70,21 @@ class DCN(BaseModel):
|
|
|
68
70
|
self.embedding = EmbeddingLayer(features=self.all_features)
|
|
69
71
|
|
|
70
72
|
# Calculate input dimension
|
|
71
|
-
emb_dim_total = sum(
|
|
72
|
-
|
|
73
|
+
emb_dim_total = sum(
|
|
74
|
+
[
|
|
75
|
+
f.embedding_dim
|
|
76
|
+
for f in self.all_features
|
|
77
|
+
if not isinstance(f, DenseFeature)
|
|
78
|
+
]
|
|
79
|
+
)
|
|
80
|
+
dense_input_dim = sum(
|
|
81
|
+
[getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
|
|
82
|
+
)
|
|
73
83
|
input_dim = emb_dim_total + dense_input_dim
|
|
74
|
-
|
|
84
|
+
|
|
75
85
|
# Cross Network
|
|
76
86
|
self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
|
|
77
|
-
|
|
87
|
+
|
|
78
88
|
# Deep Network (optional)
|
|
79
89
|
if mlp_params is not None:
|
|
80
90
|
self.use_dnn = True
|
|
@@ -90,31 +100,29 @@ class DCN(BaseModel):
|
|
|
90
100
|
|
|
91
101
|
# Register regularization weights
|
|
92
102
|
self._register_regularization_weights(
|
|
93
|
-
embedding_attr=
|
|
94
|
-
include_modules=[
|
|
103
|
+
embedding_attr="embedding",
|
|
104
|
+
include_modules=["cross_network", "mlp", "final_layer"],
|
|
95
105
|
)
|
|
96
106
|
|
|
97
|
-
self.compile(
|
|
98
|
-
optimizer=optimizer,
|
|
99
|
-
optimizer_params=optimizer_params,
|
|
100
|
-
loss=loss
|
|
101
|
-
)
|
|
107
|
+
self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
|
|
102
108
|
|
|
103
109
|
def forward(self, x):
|
|
104
110
|
# Get all embeddings and flatten
|
|
105
111
|
input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
|
|
106
|
-
|
|
112
|
+
|
|
107
113
|
# Cross Network
|
|
108
114
|
cross_output = self.cross_network(input_flat) # [B, input_dim]
|
|
109
|
-
|
|
115
|
+
|
|
110
116
|
if self.use_dnn:
|
|
111
117
|
# Deep Network
|
|
112
118
|
deep_output = self.mlp(input_flat) # [B, 1]
|
|
113
119
|
# Concatenate cross and deep
|
|
114
|
-
combined = torch.cat(
|
|
120
|
+
combined = torch.cat(
|
|
121
|
+
[cross_output, deep_output], dim=-1
|
|
122
|
+
) # [B, input_dim + 1]
|
|
115
123
|
else:
|
|
116
124
|
combined = cross_output
|
|
117
|
-
|
|
125
|
+
|
|
118
126
|
# Final prediction
|
|
119
127
|
y = self.final_layer(combined)
|
|
120
128
|
return self.prediction_layer(y)
|
nextrec/models/ranking/deepfm.py
CHANGED
|
@@ -13,6 +13,7 @@ from nextrec.basic.model import BaseModel
|
|
|
13
13
|
from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
|
|
14
14
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
15
15
|
|
|
16
|
+
|
|
16
17
|
class DeepFM(BaseModel):
|
|
17
18
|
@property
|
|
18
19
|
def model_name(self):
|
|
@@ -21,23 +22,25 @@ class DeepFM(BaseModel):
|
|
|
21
22
|
@property
|
|
22
23
|
def task_type(self):
|
|
23
24
|
return "binary"
|
|
24
|
-
|
|
25
|
-
def __init__(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
dense_features: list[DenseFeature] | list = [],
|
|
29
|
+
sparse_features: list[SparseFeature] | list = [],
|
|
30
|
+
sequence_features: list[SequenceFeature] | list = [],
|
|
31
|
+
mlp_params: dict = {},
|
|
32
|
+
target: list[str] | str = [],
|
|
33
|
+
optimizer: str = "adam",
|
|
34
|
+
optimizer_params: dict = {},
|
|
35
|
+
loss: str | nn.Module | None = "bce",
|
|
36
|
+
device: str = "cpu",
|
|
37
|
+
model_id: str = "baseline",
|
|
38
|
+
embedding_l1_reg=1e-6,
|
|
39
|
+
dense_l1_reg=1e-5,
|
|
40
|
+
embedding_l2_reg=1e-5,
|
|
41
|
+
dense_l2_reg=1e-4,
|
|
42
|
+
):
|
|
43
|
+
|
|
41
44
|
super(DeepFM, self).__init__(
|
|
42
45
|
dense_features=dense_features,
|
|
43
46
|
sparse_features=sparse_features,
|
|
@@ -50,21 +53,29 @@ class DeepFM(BaseModel):
|
|
|
50
53
|
embedding_l2_reg=embedding_l2_reg,
|
|
51
54
|
dense_l2_reg=dense_l2_reg,
|
|
52
55
|
early_stop_patience=20,
|
|
53
|
-
model_id=model_id
|
|
56
|
+
model_id=model_id,
|
|
54
57
|
)
|
|
55
58
|
|
|
56
59
|
self.loss = loss
|
|
57
60
|
if self.loss is None:
|
|
58
61
|
self.loss = "bce"
|
|
59
|
-
|
|
62
|
+
|
|
60
63
|
self.fm_features = sparse_features + sequence_features
|
|
61
64
|
self.deep_features = dense_features + sparse_features + sequence_features
|
|
62
65
|
|
|
63
66
|
self.embedding = EmbeddingLayer(features=self.deep_features)
|
|
64
67
|
|
|
65
68
|
fm_emb_dim_total = sum([f.embedding_dim for f in self.fm_features])
|
|
66
|
-
deep_emb_dim_total = sum(
|
|
67
|
-
|
|
69
|
+
deep_emb_dim_total = sum(
|
|
70
|
+
[
|
|
71
|
+
f.embedding_dim
|
|
72
|
+
for f in self.deep_features
|
|
73
|
+
if not isinstance(f, DenseFeature)
|
|
74
|
+
]
|
|
75
|
+
)
|
|
76
|
+
dense_input_dim = sum(
|
|
77
|
+
[getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
|
|
78
|
+
)
|
|
68
79
|
|
|
69
80
|
self.linear = LR(fm_emb_dim_total)
|
|
70
81
|
self.fm = FM(reduce_sum=True)
|
|
@@ -73,15 +84,10 @@ class DeepFM(BaseModel):
|
|
|
73
84
|
|
|
74
85
|
# Register regularization weights
|
|
75
86
|
self._register_regularization_weights(
|
|
76
|
-
embedding_attr=
|
|
77
|
-
include_modules=['linear', 'mlp']
|
|
87
|
+
embedding_attr="embedding", include_modules=["linear", "mlp"]
|
|
78
88
|
)
|
|
79
89
|
|
|
80
|
-
self.compile(
|
|
81
|
-
optimizer=optimizer,
|
|
82
|
-
optimizer_params=optimizer_params,
|
|
83
|
-
loss=loss
|
|
84
|
-
)
|
|
90
|
+
self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
|
|
85
91
|
|
|
86
92
|
def forward(self, x):
|
|
87
93
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|