nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nextrec/__init__.py +1 -1
- nextrec/__version__.py +1 -1
- nextrec/basic/activation.py +10 -5
- nextrec/basic/callback.py +1 -0
- nextrec/basic/features.py +30 -22
- nextrec/basic/layers.py +244 -113
- nextrec/basic/loggers.py +62 -43
- nextrec/basic/metrics.py +268 -119
- nextrec/basic/model.py +1373 -443
- nextrec/basic/session.py +10 -3
- nextrec/cli.py +498 -0
- nextrec/data/__init__.py +19 -25
- nextrec/data/batch_utils.py +11 -3
- nextrec/data/data_processing.py +42 -24
- nextrec/data/data_utils.py +26 -15
- nextrec/data/dataloader.py +303 -96
- nextrec/data/preprocessor.py +320 -199
- nextrec/loss/listwise.py +17 -9
- nextrec/loss/loss_utils.py +7 -8
- nextrec/loss/pairwise.py +2 -0
- nextrec/loss/pointwise.py +30 -12
- nextrec/models/generative/hstu.py +106 -40
- nextrec/models/match/dssm.py +82 -69
- nextrec/models/match/dssm_v2.py +72 -58
- nextrec/models/match/mind.py +175 -108
- nextrec/models/match/sdm.py +104 -88
- nextrec/models/match/youtube_dnn.py +73 -60
- nextrec/models/multi_task/esmm.py +53 -39
- nextrec/models/multi_task/mmoe.py +70 -47
- nextrec/models/multi_task/ple.py +107 -50
- nextrec/models/multi_task/poso.py +121 -41
- nextrec/models/multi_task/share_bottom.py +54 -38
- nextrec/models/ranking/afm.py +172 -45
- nextrec/models/ranking/autoint.py +84 -61
- nextrec/models/ranking/dcn.py +59 -42
- nextrec/models/ranking/dcn_v2.py +64 -23
- nextrec/models/ranking/deepfm.py +36 -26
- nextrec/models/ranking/dien.py +158 -102
- nextrec/models/ranking/din.py +88 -60
- nextrec/models/ranking/fibinet.py +55 -35
- nextrec/models/ranking/fm.py +32 -26
- nextrec/models/ranking/masknet.py +95 -34
- nextrec/models/ranking/pnn.py +34 -31
- nextrec/models/ranking/widedeep.py +37 -29
- nextrec/models/ranking/xdeepfm.py +63 -41
- nextrec/utils/__init__.py +61 -32
- nextrec/utils/config.py +490 -0
- nextrec/utils/device.py +52 -12
- nextrec/utils/distributed.py +141 -0
- nextrec/utils/embedding.py +1 -0
- nextrec/utils/feature.py +1 -0
- nextrec/utils/file.py +32 -11
- nextrec/utils/initializer.py +61 -16
- nextrec/utils/optimizer.py +25 -9
- nextrec/utils/synthetic_data.py +531 -0
- nextrec/utils/tensor.py +24 -13
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
- nextrec-0.4.2.dist-info/RECORD +69 -0
- nextrec-0.4.2.dist-info/entry_points.txt +2 -0
- nextrec-0.3.6.dist-info/RECORD +0 -64
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
- {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
nextrec/__init__.py
CHANGED
nextrec/__version__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.4.2"
|
nextrec/basic/activation.py
CHANGED
|
@@ -14,20 +14,21 @@ class Dice(nn.Module):
|
|
|
14
14
|
"""
|
|
15
15
|
Dice activation function from the paper:
|
|
16
16
|
"Deep Interest Network for Click-Through Rate Prediction" (Zhou et al., 2018)
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
Dice(x) = p(x) * x + (1 - p(x)) * alpha * x
|
|
19
19
|
where p(x) = sigmoid((x - E[x]) / sqrt(Var[x] + epsilon))
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
def __init__(self, emb_size: int, epsilon: float = 1e-9):
|
|
22
23
|
super(Dice, self).__init__()
|
|
23
24
|
self.epsilon = epsilon
|
|
24
25
|
self.alpha = nn.Parameter(torch.zeros(emb_size))
|
|
25
26
|
self.bn = nn.BatchNorm1d(emb_size)
|
|
26
|
-
|
|
27
|
+
|
|
27
28
|
def forward(self, x):
|
|
28
29
|
# x shape: (batch_size, emb_size) or (batch_size, seq_len, emb_size)
|
|
29
30
|
original_shape = x.shape
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
if x.dim() == 3:
|
|
32
33
|
# For 3D input (batch_size, seq_len, emb_size), reshape to 2D
|
|
33
34
|
batch_size, seq_len, emb_size = x.shape
|
|
@@ -45,7 +46,9 @@ def activation_layer(activation: str, emb_size: int | None = None):
|
|
|
45
46
|
activation = activation.lower()
|
|
46
47
|
if activation == "dice":
|
|
47
48
|
if emb_size is None:
|
|
48
|
-
raise ValueError(
|
|
49
|
+
raise ValueError(
|
|
50
|
+
"[ActivationLayer Error]: emb_size is required for Dice activation"
|
|
51
|
+
)
|
|
49
52
|
return Dice(emb_size)
|
|
50
53
|
elif activation == "relu":
|
|
51
54
|
return nn.ReLU()
|
|
@@ -84,4 +87,6 @@ def activation_layer(activation: str, emb_size: int | None = None):
|
|
|
84
87
|
elif activation in ["none", "linear", "identity"]:
|
|
85
88
|
return nn.Identity()
|
|
86
89
|
else:
|
|
87
|
-
raise ValueError(
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"[ActivationLayer Error]: Unsupported activation function: {activation}"
|
|
92
|
+
)
|
nextrec/basic/callback.py
CHANGED
nextrec/basic/features.py
CHANGED
|
@@ -5,28 +5,31 @@ Date: create on 27/10/2025
|
|
|
5
5
|
Checkpoint: edit on 02/12/2025
|
|
6
6
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
import torch
|
|
9
10
|
from nextrec.utils.embedding import get_auto_embedding_dim
|
|
10
11
|
from nextrec.utils.feature import normalize_to_list
|
|
11
12
|
|
|
13
|
+
|
|
12
14
|
class BaseFeature(object):
|
|
13
15
|
def __repr__(self):
|
|
14
|
-
params = {k: v for k, v in self.__dict__.items() if not k.startswith("_")
|
|
16
|
+
params = {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
|
|
15
17
|
param_str = ", ".join(f"{k}={v!r}" for k, v in params.items())
|
|
16
18
|
return f"{self.__class__.__name__}({param_str})"
|
|
17
19
|
|
|
20
|
+
|
|
18
21
|
class SequenceFeature(BaseFeature):
|
|
19
22
|
def __init__(
|
|
20
23
|
self,
|
|
21
24
|
name: str,
|
|
22
25
|
vocab_size: int,
|
|
23
26
|
max_len: int = 20,
|
|
24
|
-
embedding_name: str =
|
|
27
|
+
embedding_name: str = "",
|
|
25
28
|
embedding_dim: int | None = 4,
|
|
26
29
|
combiner: str = "mean",
|
|
27
30
|
padding_idx: int | None = None,
|
|
28
|
-
init_type: str=
|
|
29
|
-
init_params: dict|None = None,
|
|
31
|
+
init_type: str = "normal",
|
|
32
|
+
init_params: dict | None = None,
|
|
30
33
|
l1_reg: float = 0.0,
|
|
31
34
|
l2_reg: float = 1e-5,
|
|
32
35
|
trainable: bool = True,
|
|
@@ -44,22 +47,24 @@ class SequenceFeature(BaseFeature):
|
|
|
44
47
|
self.l1_reg = l1_reg
|
|
45
48
|
self.l2_reg = l2_reg
|
|
46
49
|
self.trainable = trainable
|
|
47
|
-
|
|
50
|
+
|
|
51
|
+
|
|
48
52
|
class SparseFeature(BaseFeature):
|
|
49
|
-
def __init__(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
name: str,
|
|
56
|
+
vocab_size: int,
|
|
57
|
+
embedding_name: str = "",
|
|
58
|
+
embedding_dim: int | None = 4,
|
|
59
|
+
padding_idx: int | None = None,
|
|
60
|
+
init_type: str = "normal",
|
|
61
|
+
init_params: dict | None = None,
|
|
62
|
+
l1_reg: float = 0.0,
|
|
63
|
+
l2_reg: float = 1e-5,
|
|
64
|
+
trainable: bool = True,
|
|
65
|
+
pretrained_weight: torch.Tensor | None = None,
|
|
66
|
+
freeze_pretrained: bool = False,
|
|
67
|
+
):
|
|
63
68
|
self.name = name
|
|
64
69
|
self.vocab_size = vocab_size
|
|
65
70
|
self.embedding_name = embedding_name or name
|
|
@@ -74,12 +79,13 @@ class SparseFeature(BaseFeature):
|
|
|
74
79
|
self.pretrained_weight = pretrained_weight
|
|
75
80
|
self.freeze_pretrained = freeze_pretrained
|
|
76
81
|
|
|
82
|
+
|
|
77
83
|
class DenseFeature(BaseFeature):
|
|
78
84
|
def __init__(
|
|
79
85
|
self,
|
|
80
86
|
name: str,
|
|
81
87
|
embedding_dim: int | None = 1,
|
|
82
|
-
input_dim: int
|
|
88
|
+
input_dim: int = 1,
|
|
83
89
|
use_embedding: bool = False,
|
|
84
90
|
):
|
|
85
91
|
self.name = name
|
|
@@ -90,6 +96,7 @@ class DenseFeature(BaseFeature):
|
|
|
90
96
|
else:
|
|
91
97
|
self.use_embedding = use_embedding # user decides for dim <= 1
|
|
92
98
|
|
|
99
|
+
|
|
93
100
|
class FeatureSet:
|
|
94
101
|
def set_all_features(
|
|
95
102
|
self,
|
|
@@ -103,7 +110,9 @@ class FeatureSet:
|
|
|
103
110
|
self.sparse_features = list(sparse_features) if sparse_features else []
|
|
104
111
|
self.sequence_features = list(sequence_features) if sequence_features else []
|
|
105
112
|
|
|
106
|
-
self.all_features =
|
|
113
|
+
self.all_features = (
|
|
114
|
+
self.dense_features + self.sparse_features + self.sequence_features
|
|
115
|
+
)
|
|
107
116
|
self.feature_names = [feat.name for feat in self.all_features]
|
|
108
117
|
self.target_columns = normalize_to_list(target)
|
|
109
118
|
self.id_columns = normalize_to_list(id_columns)
|
|
@@ -115,4 +124,3 @@ class FeatureSet:
|
|
|
115
124
|
) -> None:
|
|
116
125
|
self.target_columns = normalize_to_list(target)
|
|
117
126
|
self.id_columns = normalize_to_list(id_columns)
|
|
118
|
-
|