nextrec 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +1 -1
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -5
- nextrec/basic/callback.py +1 -0
- nextrec/basic/features.py +30 -22
- nextrec/basic/layers.py +250 -112
- nextrec/basic/loggers.py +63 -44
- nextrec/basic/metrics.py +270 -120
- nextrec/basic/model.py +1084 -402
- nextrec/basic/session.py +10 -3
- nextrec/cli.py +492 -0
- nextrec/data/__init__.py +19 -25
- nextrec/data/batch_utils.py +11 -3
- nextrec/data/data_processing.py +51 -45
- nextrec/data/data_utils.py +26 -15
- nextrec/data/dataloader.py +273 -96
- nextrec/data/preprocessor.py +320 -199
- nextrec/loss/listwise.py +17 -9
- nextrec/loss/loss_utils.py +7 -8
- nextrec/loss/pairwise.py +2 -0
- nextrec/loss/pointwise.py +30 -12
- nextrec/models/generative/hstu.py +103 -38
- nextrec/models/match/dssm.py +82 -68
- nextrec/models/match/dssm_v2.py +72 -57
- nextrec/models/match/mind.py +175 -107
- nextrec/models/match/sdm.py +104 -87
- nextrec/models/match/youtube_dnn.py +73 -59
- nextrec/models/multi_task/esmm.py +69 -46
- nextrec/models/multi_task/mmoe.py +91 -53
- nextrec/models/multi_task/ple.py +117 -58
- nextrec/models/multi_task/poso.py +163 -55
- nextrec/models/multi_task/share_bottom.py +63 -36
- nextrec/models/ranking/afm.py +80 -45
- nextrec/models/ranking/autoint.py +74 -57
- nextrec/models/ranking/dcn.py +110 -48
- nextrec/models/ranking/dcn_v2.py +265 -45
- nextrec/models/ranking/deepfm.py +39 -24
- nextrec/models/ranking/dien.py +335 -146
- nextrec/models/ranking/din.py +158 -92
- nextrec/models/ranking/fibinet.py +134 -52
- nextrec/models/ranking/fm.py +68 -26
- nextrec/models/ranking/masknet.py +95 -33
- nextrec/models/ranking/pnn.py +128 -58
- nextrec/models/ranking/widedeep.py +40 -28
- nextrec/models/ranking/xdeepfm.py +67 -40
- nextrec/utils/__init__.py +59 -34
- nextrec/utils/config.py +496 -0
- nextrec/utils/device.py +30 -20
- nextrec/utils/distributed.py +36 -9
- nextrec/utils/embedding.py +1 -0
- nextrec/utils/feature.py +1 -0
- nextrec/utils/file.py +33 -11
- nextrec/utils/initializer.py +61 -16
- nextrec/utils/model.py +22 -0
- nextrec/utils/optimizer.py +25 -9
- nextrec/utils/synthetic_data.py +283 -165
- nextrec/utils/tensor.py +24 -13
- {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/METADATA +53 -24
- nextrec-0.4.3.dist-info/RECORD +69 -0
- nextrec-0.4.3.dist-info/entry_points.txt +2 -0
- nextrec-0.4.1.dist-info/RECORD +0 -66
- {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/WHEEL +0 -0
- {nextrec-0.4.1.dist-info → nextrec-0.4.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -39,7 +39,6 @@ Wide & Deep 同时使用宽线性部分(记忆共现/手工交叉)与深网
|
|
|
39
39
|
- 共享特征空间,减少工程开销
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
|
-
import torch
|
|
43
42
|
import torch.nn as nn
|
|
44
43
|
|
|
45
44
|
from nextrec.basic.model import BaseModel
|
|
@@ -55,25 +54,33 @@ class WideDeep(BaseModel):
|
|
|
55
54
|
@property
|
|
56
55
|
def default_task(self):
|
|
57
56
|
return "binary"
|
|
58
|
-
|
|
59
|
-
def __init__(
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
dense_features: list[DenseFeature],
|
|
61
|
+
sparse_features: list[SparseFeature],
|
|
62
|
+
sequence_features: list[SequenceFeature],
|
|
63
|
+
mlp_params: dict,
|
|
64
|
+
target: list[str] | str | None = None,
|
|
65
|
+
task: str | list[str] | None = None,
|
|
66
|
+
optimizer: str = "adam",
|
|
67
|
+
optimizer_params: dict | None = None,
|
|
68
|
+
loss: str | nn.Module | None = "bce",
|
|
69
|
+
loss_params: dict | list[dict] | None = None,
|
|
70
|
+
device: str = "cpu",
|
|
71
|
+
embedding_l1_reg=1e-6,
|
|
72
|
+
dense_l1_reg=1e-5,
|
|
73
|
+
embedding_l2_reg=1e-5,
|
|
74
|
+
dense_l2_reg=1e-4,
|
|
75
|
+
**kwargs,
|
|
76
|
+
):
|
|
77
|
+
|
|
78
|
+
if target is None:
|
|
79
|
+
target = []
|
|
80
|
+
optimizer_params = optimizer_params or {}
|
|
81
|
+
if loss is None:
|
|
82
|
+
loss = "bce"
|
|
83
|
+
|
|
77
84
|
super(WideDeep, self).__init__(
|
|
78
85
|
dense_features=dense_features,
|
|
79
86
|
sparse_features=sparse_features,
|
|
@@ -85,13 +92,11 @@ class WideDeep(BaseModel):
|
|
|
85
92
|
dense_l1_reg=dense_l1_reg,
|
|
86
93
|
embedding_l2_reg=embedding_l2_reg,
|
|
87
94
|
dense_l2_reg=dense_l2_reg,
|
|
88
|
-
**kwargs
|
|
95
|
+
**kwargs,
|
|
89
96
|
)
|
|
90
97
|
|
|
91
98
|
self.loss = loss
|
|
92
|
-
|
|
93
|
-
self.loss = "bce"
|
|
94
|
-
|
|
99
|
+
|
|
95
100
|
# Wide part: use all features for linear model
|
|
96
101
|
self.wide_features = sparse_features + sequence_features
|
|
97
102
|
# Deep part: use all features
|
|
@@ -103,7 +108,7 @@ class WideDeep(BaseModel):
|
|
|
103
108
|
# Wide part: Linear layer
|
|
104
109
|
wide_dim = sum([f.embedding_dim for f in self.wide_features])
|
|
105
110
|
self.linear = LR(wide_dim)
|
|
106
|
-
|
|
111
|
+
|
|
107
112
|
# Deep part: MLP
|
|
108
113
|
input_dim = self.embedding.input_dim
|
|
109
114
|
# deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
|
|
@@ -111,14 +116,21 @@ class WideDeep(BaseModel):
|
|
|
111
116
|
self.mlp = MLP(input_dim=input_dim, **mlp_params)
|
|
112
117
|
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
113
118
|
# Register regularization weights
|
|
114
|
-
self.register_regularization_weights(
|
|
115
|
-
|
|
119
|
+
self.register_regularization_weights(
|
|
120
|
+
embedding_attr="embedding", include_modules=["linear", "mlp"]
|
|
121
|
+
)
|
|
122
|
+
self.compile(
|
|
123
|
+
optimizer=optimizer,
|
|
124
|
+
optimizer_params=optimizer_params,
|
|
125
|
+
loss=loss,
|
|
126
|
+
loss_params=loss_params,
|
|
127
|
+
)
|
|
116
128
|
|
|
117
129
|
def forward(self, x):
|
|
118
130
|
# Deep part
|
|
119
131
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|
|
120
132
|
y_deep = self.mlp(input_deep) # [B, 1]
|
|
121
|
-
|
|
133
|
+
|
|
122
134
|
# Wide part
|
|
123
135
|
input_wide = self.embedding(x=x, features=self.wide_features, squeeze_dim=True)
|
|
124
136
|
y_wide = self.linear(input_wide)
|
|
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
|
|
|
3
3
|
Author:
|
|
4
4
|
Yang Zhou,zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
[1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
|
|
7
|
-
for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
|
|
6
|
+
[1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
|
|
7
|
+
for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
|
|
8
8
|
knowledge discovery & data mining. 2018: 1754-1763.
|
|
9
9
|
(https://arxiv.org/abs/1803.05170)
|
|
10
10
|
"""
|
|
@@ -17,6 +17,7 @@ from nextrec.basic.model import BaseModel
|
|
|
17
17
|
from nextrec.basic.layers import LR, EmbeddingLayer, MLP, PredictionLayer
|
|
18
18
|
from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
|
|
19
19
|
|
|
20
|
+
|
|
20
21
|
class CIN(nn.Module):
|
|
21
22
|
"""Compressed Interaction Network from xDeepFM (Lian et al., 2018)."""
|
|
22
23
|
|
|
@@ -28,7 +29,16 @@ class CIN(nn.Module):
|
|
|
28
29
|
prev_dim, fc_input_dim = input_dim, 0
|
|
29
30
|
for i in range(self.num_layers):
|
|
30
31
|
cross_layer_size = cin_size[i]
|
|
31
|
-
self.conv_layers.append(
|
|
32
|
+
self.conv_layers.append(
|
|
33
|
+
torch.nn.Conv1d(
|
|
34
|
+
input_dim * prev_dim,
|
|
35
|
+
cross_layer_size,
|
|
36
|
+
1,
|
|
37
|
+
stride=1,
|
|
38
|
+
dilation=1,
|
|
39
|
+
bias=True,
|
|
40
|
+
)
|
|
41
|
+
)
|
|
32
42
|
if self.split_half and i != self.num_layers - 1:
|
|
33
43
|
cross_layer_size //= 2
|
|
34
44
|
prev_dim = cross_layer_size
|
|
@@ -49,7 +59,8 @@ class CIN(nn.Module):
|
|
|
49
59
|
h = x
|
|
50
60
|
xs.append(x)
|
|
51
61
|
return self.fc(torch.sum(torch.cat(xs, dim=1), 2))
|
|
52
|
-
|
|
62
|
+
|
|
63
|
+
|
|
53
64
|
class xDeepFM(BaseModel):
|
|
54
65
|
@property
|
|
55
66
|
def model_name(self):
|
|
@@ -58,27 +69,36 @@ class xDeepFM(BaseModel):
|
|
|
58
69
|
@property
|
|
59
70
|
def default_task(self):
|
|
60
71
|
return "binary"
|
|
61
|
-
|
|
62
|
-
def __init__(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
dense_features: list[DenseFeature],
|
|
76
|
+
sparse_features: list[SparseFeature],
|
|
77
|
+
sequence_features: list[SequenceFeature],
|
|
78
|
+
mlp_params: dict,
|
|
79
|
+
cin_size: list[int] | None = None,
|
|
80
|
+
split_half: bool = True,
|
|
81
|
+
target: list[str] | str | None = None,
|
|
82
|
+
task: str | list[str] | None = None,
|
|
83
|
+
optimizer: str = "adam",
|
|
84
|
+
optimizer_params: dict | None = None,
|
|
85
|
+
loss: str | nn.Module | None = "bce",
|
|
86
|
+
loss_params: dict | list[dict] | None = None,
|
|
87
|
+
device: str = "cpu",
|
|
88
|
+
embedding_l1_reg=1e-6,
|
|
89
|
+
dense_l1_reg=1e-5,
|
|
90
|
+
embedding_l2_reg=1e-5,
|
|
91
|
+
dense_l2_reg=1e-4,
|
|
92
|
+
**kwargs,
|
|
93
|
+
):
|
|
94
|
+
|
|
95
|
+
cin_size = cin_size or [128, 128]
|
|
96
|
+
if target is None:
|
|
97
|
+
target = []
|
|
98
|
+
optimizer_params = optimizer_params or {}
|
|
99
|
+
if loss is None:
|
|
100
|
+
loss = "bce"
|
|
101
|
+
|
|
82
102
|
super(xDeepFM, self).__init__(
|
|
83
103
|
dense_features=dense_features,
|
|
84
104
|
sparse_features=sparse_features,
|
|
@@ -90,16 +110,14 @@ class xDeepFM(BaseModel):
|
|
|
90
110
|
dense_l1_reg=dense_l1_reg,
|
|
91
111
|
embedding_l2_reg=embedding_l2_reg,
|
|
92
112
|
dense_l2_reg=dense_l2_reg,
|
|
93
|
-
**kwargs
|
|
113
|
+
**kwargs,
|
|
94
114
|
)
|
|
95
115
|
|
|
96
116
|
self.loss = loss
|
|
97
|
-
|
|
98
|
-
self.loss = "bce"
|
|
99
|
-
|
|
117
|
+
|
|
100
118
|
# Linear part and CIN part: use sparse and sequence features
|
|
101
119
|
self.linear_features = sparse_features + sequence_features
|
|
102
|
-
|
|
120
|
+
|
|
103
121
|
# Deep part: use all features
|
|
104
122
|
self.deep_features = dense_features + sparse_features + sequence_features
|
|
105
123
|
|
|
@@ -109,21 +127,28 @@ class xDeepFM(BaseModel):
|
|
|
109
127
|
# Linear part
|
|
110
128
|
linear_dim = sum([f.embedding_dim for f in self.linear_features])
|
|
111
129
|
self.linear = LR(linear_dim)
|
|
112
|
-
|
|
130
|
+
|
|
113
131
|
# CIN part: Compressed Interaction Network
|
|
114
132
|
num_fields = len(self.linear_features)
|
|
115
133
|
self.cin = CIN(input_dim=num_fields, cin_size=cin_size, split_half=split_half)
|
|
116
|
-
|
|
134
|
+
|
|
117
135
|
# Deep part: DNN
|
|
118
|
-
deep_emb_dim_total = sum(
|
|
119
|
-
|
|
136
|
+
deep_emb_dim_total = sum(
|
|
137
|
+
[
|
|
138
|
+
f.embedding_dim
|
|
139
|
+
for f in self.deep_features
|
|
140
|
+
if not isinstance(f, DenseFeature)
|
|
141
|
+
]
|
|
142
|
+
)
|
|
143
|
+
dense_input_dim = sum(
|
|
144
|
+
[getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
|
|
145
|
+
)
|
|
120
146
|
self.mlp = MLP(input_dim=deep_emb_dim_total + dense_input_dim, **mlp_params)
|
|
121
147
|
self.prediction_layer = PredictionLayer(task_type=self.task)
|
|
122
148
|
|
|
123
149
|
# Register regularization weights
|
|
124
150
|
self.register_regularization_weights(
|
|
125
|
-
embedding_attr=
|
|
126
|
-
include_modules=['linear', 'cin', 'mlp']
|
|
151
|
+
embedding_attr="embedding", include_modules=["linear", "cin", "mlp"]
|
|
127
152
|
)
|
|
128
153
|
|
|
129
154
|
self.compile(
|
|
@@ -135,14 +160,16 @@ class xDeepFM(BaseModel):
|
|
|
135
160
|
|
|
136
161
|
def forward(self, x):
|
|
137
162
|
# Get embeddings for linear and CIN (sparse features only)
|
|
138
|
-
input_linear = self.embedding(
|
|
139
|
-
|
|
163
|
+
input_linear = self.embedding(
|
|
164
|
+
x=x, features=self.linear_features, squeeze_dim=False
|
|
165
|
+
)
|
|
166
|
+
|
|
140
167
|
# Linear part
|
|
141
168
|
y_linear = self.linear(input_linear.flatten(start_dim=1))
|
|
142
|
-
|
|
169
|
+
|
|
143
170
|
# CIN part
|
|
144
171
|
y_cin = self.cin(input_linear) # [B, 1]
|
|
145
|
-
|
|
172
|
+
|
|
146
173
|
# Deep part
|
|
147
174
|
input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)
|
|
148
175
|
y_deep = self.mlp(input_deep) # [B, 1]
|
nextrec/utils/__init__.py
CHANGED
|
@@ -10,63 +10,88 @@ This package provides various utility functions organized by category:
|
|
|
10
10
|
- file_utils: File I/O operations
|
|
11
11
|
- model_utils: Model-related utilities
|
|
12
12
|
- feature_utils: Feature processing utilities
|
|
13
|
+
- config_utils: Configuration loading and processing utilities
|
|
13
14
|
|
|
14
15
|
Date: create on 13/11/2025
|
|
15
16
|
Last update: 06/12/2025
|
|
16
17
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
17
18
|
"""
|
|
19
|
+
|
|
18
20
|
from . import optimizer, initializer, embedding
|
|
19
21
|
from .optimizer import get_optimizer, get_scheduler
|
|
20
22
|
from .initializer import get_initializer
|
|
21
23
|
from .embedding import get_auto_embedding_dim
|
|
22
24
|
from .device import resolve_device, get_device_info
|
|
23
25
|
from .tensor import to_tensor, stack_tensors, concat_tensors, pad_sequence_tensors
|
|
24
|
-
from .file import
|
|
26
|
+
from .file import (
|
|
27
|
+
resolve_file_paths,
|
|
28
|
+
read_table,
|
|
29
|
+
load_dataframes,
|
|
30
|
+
iter_file_chunks,
|
|
31
|
+
default_output_dir,
|
|
32
|
+
read_yaml,
|
|
33
|
+
)
|
|
25
34
|
from .model import merge_features, get_mlp_output_dim
|
|
26
35
|
from .feature import normalize_to_list
|
|
27
|
-
from .synthetic_data import
|
|
36
|
+
from .synthetic_data import (
|
|
37
|
+
generate_match_data,
|
|
38
|
+
generate_ranking_data,
|
|
39
|
+
generate_multitask_data,
|
|
40
|
+
generate_distributed_ranking_data,
|
|
41
|
+
)
|
|
42
|
+
from .config import (
|
|
43
|
+
resolve_path,
|
|
44
|
+
select_features,
|
|
45
|
+
register_processor_features,
|
|
46
|
+
build_feature_objects,
|
|
47
|
+
extract_feature_groups,
|
|
48
|
+
load_model_class,
|
|
49
|
+
build_model_instance,
|
|
50
|
+
)
|
|
28
51
|
|
|
29
52
|
__all__ = [
|
|
30
53
|
# Optimizer & Scheduler
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
54
|
+
"get_optimizer",
|
|
55
|
+
"get_scheduler",
|
|
34
56
|
# Initializer
|
|
35
|
-
|
|
36
|
-
|
|
57
|
+
"get_initializer",
|
|
37
58
|
# Embedding
|
|
38
|
-
|
|
39
|
-
|
|
59
|
+
"get_auto_embedding_dim",
|
|
40
60
|
# Device utilities
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
61
|
+
"resolve_device",
|
|
62
|
+
"get_device_info",
|
|
44
63
|
# Tensor utilities
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
64
|
+
"to_tensor",
|
|
65
|
+
"stack_tensors",
|
|
66
|
+
"concat_tensors",
|
|
67
|
+
"pad_sequence_tensors",
|
|
50
68
|
# File utilities
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
69
|
+
"resolve_file_paths",
|
|
70
|
+
"read_table",
|
|
71
|
+
"read_yaml",
|
|
72
|
+
"load_dataframes",
|
|
73
|
+
"iter_file_chunks",
|
|
74
|
+
"default_output_dir",
|
|
57
75
|
# Model utilities
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
76
|
+
"merge_features",
|
|
77
|
+
"get_mlp_output_dim",
|
|
61
78
|
# Feature utilities
|
|
62
|
-
|
|
63
|
-
|
|
79
|
+
"normalize_to_list",
|
|
80
|
+
# Config utilities
|
|
81
|
+
"resolve_path",
|
|
82
|
+
"select_features",
|
|
83
|
+
"register_processor_features",
|
|
84
|
+
"build_feature_objects",
|
|
85
|
+
"extract_feature_groups",
|
|
86
|
+
"load_model_class",
|
|
87
|
+
"build_model_instance",
|
|
64
88
|
# Synthetic data utilities
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
89
|
+
"generate_ranking_data",
|
|
90
|
+
"generate_match_data",
|
|
91
|
+
"generate_multitask_data",
|
|
92
|
+
"generate_distributed_ranking_data",
|
|
68
93
|
# Module exports
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
94
|
+
"optimizer",
|
|
95
|
+
"initializer",
|
|
96
|
+
"embedding",
|
|
72
97
|
]
|