nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -9
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/dataloader.py +168 -127
  6. nextrec/basic/features.py +24 -27
  7. nextrec/basic/layers.py +328 -159
  8. nextrec/basic/loggers.py +50 -37
  9. nextrec/basic/metrics.py +255 -147
  10. nextrec/basic/model.py +817 -462
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +16 -12
  13. nextrec/data/preprocessor.py +276 -252
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +30 -22
  16. nextrec/loss/match_losses.py +116 -83
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +70 -61
  19. nextrec/models/match/dssm_v2.py +61 -51
  20. nextrec/models/match/mind.py +89 -71
  21. nextrec/models/match/sdm.py +93 -81
  22. nextrec/models/match/youtube_dnn.py +62 -53
  23. nextrec/models/multi_task/esmm.py +49 -43
  24. nextrec/models/multi_task/mmoe.py +65 -56
  25. nextrec/models/multi_task/ple.py +92 -65
  26. nextrec/models/multi_task/share_bottom.py +48 -42
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +39 -30
  29. nextrec/models/ranking/autoint.py +70 -57
  30. nextrec/models/ranking/dcn.py +43 -35
  31. nextrec/models/ranking/deepfm.py +34 -28
  32. nextrec/models/ranking/dien.py +115 -79
  33. nextrec/models/ranking/din.py +84 -60
  34. nextrec/models/ranking/fibinet.py +51 -35
  35. nextrec/models/ranking/fm.py +28 -26
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +30 -31
  38. nextrec/models/ranking/widedeep.py +36 -31
  39. nextrec/models/ranking/xdeepfm.py +46 -39
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +23 -15
  43. nextrec/utils/optimizer.py +14 -10
  44. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
  45. nextrec-0.1.2.dist-info/RECORD +51 -0
  46. nextrec-0.1.1.dist-info/RECORD +0 -51
  47. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Author:
4
4
  Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
- self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
6
+ [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
+ self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
8
8
  on information and knowledge management. 2019: 1161-1170.
9
9
  (https://arxiv.org/abs/1810.11921)
10
10
  """
@@ -25,27 +25,29 @@ class AutoInt(BaseModel):
25
25
  @property
26
26
  def task_type(self):
27
27
  return "binary"
28
-
29
- def __init__(self,
30
- dense_features: list[DenseFeature],
31
- sparse_features: list[SparseFeature],
32
- sequence_features: list[SequenceFeature],
33
- att_layer_num: int = 3,
34
- att_embedding_dim: int = 8,
35
- att_head_num: int = 2,
36
- att_dropout: float = 0.0,
37
- att_use_residual: bool = True,
38
- target: list[str] = [],
39
- optimizer: str = "adam",
40
- optimizer_params: dict = {},
41
- loss: str | nn.Module | None = "bce",
42
- device: str = 'cpu',
43
- model_id: str = "baseline",
44
- embedding_l1_reg=1e-6,
45
- dense_l1_reg=1e-5,
46
- embedding_l2_reg=1e-5,
47
- dense_l2_reg=1e-4):
48
-
28
+
29
+ def __init__(
30
+ self,
31
+ dense_features: list[DenseFeature],
32
+ sparse_features: list[SparseFeature],
33
+ sequence_features: list[SequenceFeature],
34
+ att_layer_num: int = 3,
35
+ att_embedding_dim: int = 8,
36
+ att_head_num: int = 2,
37
+ att_dropout: float = 0.0,
38
+ att_use_residual: bool = True,
39
+ target: list[str] = [],
40
+ optimizer: str = "adam",
41
+ optimizer_params: dict = {},
42
+ loss: str | nn.Module | None = "bce",
43
+ device: str = "cpu",
44
+ model_id: str = "baseline",
45
+ embedding_l1_reg=1e-6,
46
+ dense_l1_reg=1e-5,
47
+ embedding_l2_reg=1e-5,
48
+ dense_l2_reg=1e-4,
49
+ ):
50
+
49
51
  super(AutoInt, self).__init__(
50
52
  dense_features=dense_features,
51
53
  sparse_features=sparse_features,
@@ -58,83 +60,94 @@ class AutoInt(BaseModel):
58
60
  embedding_l2_reg=embedding_l2_reg,
59
61
  dense_l2_reg=dense_l2_reg,
60
62
  early_stop_patience=20,
61
- model_id=model_id
63
+ model_id=model_id,
62
64
  )
63
65
 
64
66
  self.loss = loss
65
67
  if self.loss is None:
66
68
  self.loss = "bce"
67
-
69
+
68
70
  self.att_layer_num = att_layer_num
69
71
  self.att_embedding_dim = att_embedding_dim
70
-
72
+
71
73
  # Use sparse and sequence features for interaction
72
74
  self.interaction_features = sparse_features + sequence_features
73
-
75
+
74
76
  # All features for embedding
75
77
  self.all_features = dense_features + sparse_features + sequence_features
76
78
 
77
79
  # Embedding layer
78
80
  self.embedding = EmbeddingLayer(features=self.all_features)
79
-
81
+
80
82
  # Project embeddings to attention embedding dimension
81
83
  num_fields = len(self.interaction_features)
82
84
  total_embedding_dim = sum([f.embedding_dim for f in self.interaction_features])
83
-
85
+
84
86
  # If embeddings have different dimensions, project them to att_embedding_dim
85
- self.need_projection = not all(f.embedding_dim == att_embedding_dim for f in self.interaction_features)
87
+ self.need_projection = not all(
88
+ f.embedding_dim == att_embedding_dim for f in self.interaction_features
89
+ )
86
90
  self.projection_layers = None
87
91
  if self.need_projection:
88
- self.projection_layers = nn.ModuleList([
89
- nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
90
- for f in self.interaction_features
91
- ])
92
-
92
+ self.projection_layers = nn.ModuleList(
93
+ [
94
+ nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
95
+ for f in self.interaction_features
96
+ ]
97
+ )
98
+
93
99
  # Multi-head self-attention layers
94
- self.attention_layers = nn.ModuleList([
95
- MultiHeadSelfAttention(
96
- embedding_dim=att_embedding_dim,
97
- num_heads=att_head_num,
98
- dropout=att_dropout,
99
- use_residual=att_use_residual
100
- ) for _ in range(att_layer_num)
101
- ])
102
-
100
+ self.attention_layers = nn.ModuleList(
101
+ [
102
+ MultiHeadSelfAttention(
103
+ embedding_dim=att_embedding_dim,
104
+ num_heads=att_head_num,
105
+ dropout=att_dropout,
106
+ use_residual=att_use_residual,
107
+ )
108
+ for _ in range(att_layer_num)
109
+ ]
110
+ )
111
+
103
112
  # Final prediction layer
104
113
  self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
105
114
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
106
115
 
107
116
  # Register regularization weights
108
117
  self._register_regularization_weights(
109
- embedding_attr='embedding',
110
- include_modules=['projection_layers', 'attention_layers', 'fc']
118
+ embedding_attr="embedding",
119
+ include_modules=["projection_layers", "attention_layers", "fc"],
111
120
  )
112
121
 
113
- self.compile(
114
- optimizer=optimizer,
115
- optimizer_params=optimizer_params,
116
- loss=loss
117
- )
122
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
118
123
 
119
124
  def forward(self, x):
120
125
  # Get embeddings field-by-field so mixed dimensions can be projected safely
121
126
  field_embeddings = []
122
127
  if len(self.interaction_features) == 0:
123
- raise ValueError("AutoInt requires at least one sparse or sequence feature for interactions.")
128
+ raise ValueError(
129
+ "AutoInt requires at least one sparse or sequence feature for interactions."
130
+ )
124
131
  for idx, feature in enumerate(self.interaction_features):
125
132
  feature_emb = self.embedding(x=x, features=[feature], squeeze_dim=False)
126
133
  feature_emb = feature_emb.squeeze(1) # [B, embedding_dim]
127
134
  if self.need_projection and self.projection_layers is not None:
128
135
  feature_emb = self.projection_layers[idx](feature_emb)
129
- field_embeddings.append(feature_emb.unsqueeze(1)) # [B, 1, att_embedding_dim or original_dim]
136
+ field_embeddings.append(
137
+ feature_emb.unsqueeze(1)
138
+ ) # [B, 1, att_embedding_dim or original_dim]
130
139
  embeddings = torch.cat(field_embeddings, dim=1)
131
-
140
+
132
141
  # Apply multi-head self-attention layers
133
142
  attention_output = embeddings
134
143
  for att_layer in self.attention_layers:
135
- attention_output = att_layer(attention_output) # [B, num_fields, att_embedding_dim]
136
-
144
+ attention_output = att_layer(
145
+ attention_output
146
+ ) # [B, num_fields, att_embedding_dim]
147
+
137
148
  # Flatten and predict
138
- attention_output_flat = attention_output.flatten(start_dim=1) # [B, num_fields * att_embedding_dim]
149
+ attention_output_flat = attention_output.flatten(
150
+ start_dim=1
151
+ ) # [B, num_fields * att_embedding_dim]
139
152
  y = self.fc(attention_output_flat) # [B, 1]
140
153
  return self.prediction_layer(y)
@@ -24,24 +24,26 @@ class DCN(BaseModel):
24
24
  @property
25
25
  def task_type(self):
26
26
  return "binary"
27
-
28
- def __init__(self,
29
- dense_features: list[DenseFeature],
30
- sparse_features: list[SparseFeature],
31
- sequence_features: list[SequenceFeature],
32
- cross_num: int = 3,
33
- mlp_params: dict | None = None,
34
- target: list[str] = [],
35
- optimizer: str = "adam",
36
- optimizer_params: dict = {},
37
- loss: str | nn.Module | None = "bce",
38
- device: str = 'cpu',
39
- model_id: str = "baseline",
40
- embedding_l1_reg=1e-6,
41
- dense_l1_reg=1e-5,
42
- embedding_l2_reg=1e-5,
43
- dense_l2_reg=1e-4):
44
-
27
+
28
+ def __init__(
29
+ self,
30
+ dense_features: list[DenseFeature],
31
+ sparse_features: list[SparseFeature],
32
+ sequence_features: list[SequenceFeature],
33
+ cross_num: int = 3,
34
+ mlp_params: dict | None = None,
35
+ target: list[str] = [],
36
+ optimizer: str = "adam",
37
+ optimizer_params: dict = {},
38
+ loss: str | nn.Module | None = "bce",
39
+ device: str = "cpu",
40
+ model_id: str = "baseline",
41
+ embedding_l1_reg=1e-6,
42
+ dense_l1_reg=1e-5,
43
+ embedding_l2_reg=1e-5,
44
+ dense_l2_reg=1e-4,
45
+ ):
46
+
45
47
  super(DCN, self).__init__(
46
48
  dense_features=dense_features,
47
49
  sparse_features=sparse_features,
@@ -54,13 +56,13 @@ class DCN(BaseModel):
54
56
  embedding_l2_reg=embedding_l2_reg,
55
57
  dense_l2_reg=dense_l2_reg,
56
58
  early_stop_patience=20,
57
- model_id=model_id
59
+ model_id=model_id,
58
60
  )
59
61
 
60
62
  self.loss = loss
61
63
  if self.loss is None:
62
64
  self.loss = "bce"
63
-
65
+
64
66
  # All features
65
67
  self.all_features = dense_features + sparse_features + sequence_features
66
68
 
@@ -68,13 +70,21 @@ class DCN(BaseModel):
68
70
  self.embedding = EmbeddingLayer(features=self.all_features)
69
71
 
70
72
  # Calculate input dimension
71
- emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
72
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
73
+ emb_dim_total = sum(
74
+ [
75
+ f.embedding_dim
76
+ for f in self.all_features
77
+ if not isinstance(f, DenseFeature)
78
+ ]
79
+ )
80
+ dense_input_dim = sum(
81
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
82
+ )
73
83
  input_dim = emb_dim_total + dense_input_dim
74
-
84
+
75
85
  # Cross Network
76
86
  self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
77
-
87
+
78
88
  # Deep Network (optional)
79
89
  if mlp_params is not None:
80
90
  self.use_dnn = True
@@ -90,31 +100,29 @@ class DCN(BaseModel):
90
100
 
91
101
  # Register regularization weights
92
102
  self._register_regularization_weights(
93
- embedding_attr='embedding',
94
- include_modules=['cross_network', 'mlp', 'final_layer']
103
+ embedding_attr="embedding",
104
+ include_modules=["cross_network", "mlp", "final_layer"],
95
105
  )
96
106
 
97
- self.compile(
98
- optimizer=optimizer,
99
- optimizer_params=optimizer_params,
100
- loss=loss
101
- )
107
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
102
108
 
103
109
  def forward(self, x):
104
110
  # Get all embeddings and flatten
105
111
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
106
-
112
+
107
113
  # Cross Network
108
114
  cross_output = self.cross_network(input_flat) # [B, input_dim]
109
-
115
+
110
116
  if self.use_dnn:
111
117
  # Deep Network
112
118
  deep_output = self.mlp(input_flat) # [B, 1]
113
119
  # Concatenate cross and deep
114
- combined = torch.cat([cross_output, deep_output], dim=-1) # [B, input_dim + 1]
120
+ combined = torch.cat(
121
+ [cross_output, deep_output], dim=-1
122
+ ) # [B, input_dim + 1]
115
123
  else:
116
124
  combined = cross_output
117
-
125
+
118
126
  # Final prediction
119
127
  y = self.final_layer(combined)
120
128
  return self.prediction_layer(y)
@@ -13,6 +13,7 @@ from nextrec.basic.model import BaseModel
13
13
  from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
14
14
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
15
15
 
16
+
16
17
  class DeepFM(BaseModel):
17
18
  @property
18
19
  def model_name(self):
@@ -21,23 +22,25 @@ class DeepFM(BaseModel):
21
22
  @property
22
23
  def task_type(self):
23
24
  return "binary"
24
-
25
- def __init__(self,
26
- dense_features: list[DenseFeature]|list = [],
27
- sparse_features: list[SparseFeature]|list = [],
28
- sequence_features: list[SequenceFeature]|list = [],
29
- mlp_params: dict = {},
30
- target: list[str]|str = [],
31
- optimizer: str = "adam",
32
- optimizer_params: dict = {},
33
- loss: str | nn.Module | None = "bce",
34
- device: str = 'cpu',
35
- model_id: str = "baseline",
36
- embedding_l1_reg=1e-6,
37
- dense_l1_reg=1e-5,
38
- embedding_l2_reg=1e-5,
39
- dense_l2_reg=1e-4):
40
-
25
+
26
+ def __init__(
27
+ self,
28
+ dense_features: list[DenseFeature] | list = [],
29
+ sparse_features: list[SparseFeature] | list = [],
30
+ sequence_features: list[SequenceFeature] | list = [],
31
+ mlp_params: dict = {},
32
+ target: list[str] | str = [],
33
+ optimizer: str = "adam",
34
+ optimizer_params: dict = {},
35
+ loss: str | nn.Module | None = "bce",
36
+ device: str = "cpu",
37
+ model_id: str = "baseline",
38
+ embedding_l1_reg=1e-6,
39
+ dense_l1_reg=1e-5,
40
+ embedding_l2_reg=1e-5,
41
+ dense_l2_reg=1e-4,
42
+ ):
43
+
41
44
  super(DeepFM, self).__init__(
42
45
  dense_features=dense_features,
43
46
  sparse_features=sparse_features,
@@ -50,21 +53,29 @@ class DeepFM(BaseModel):
50
53
  embedding_l2_reg=embedding_l2_reg,
51
54
  dense_l2_reg=dense_l2_reg,
52
55
  early_stop_patience=20,
53
- model_id=model_id
56
+ model_id=model_id,
54
57
  )
55
58
 
56
59
  self.loss = loss
57
60
  if self.loss is None:
58
61
  self.loss = "bce"
59
-
62
+
60
63
  self.fm_features = sparse_features + sequence_features
61
64
  self.deep_features = dense_features + sparse_features + sequence_features
62
65
 
63
66
  self.embedding = EmbeddingLayer(features=self.deep_features)
64
67
 
65
68
  fm_emb_dim_total = sum([f.embedding_dim for f in self.fm_features])
66
- deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
67
- dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
69
+ deep_emb_dim_total = sum(
70
+ [
71
+ f.embedding_dim
72
+ for f in self.deep_features
73
+ if not isinstance(f, DenseFeature)
74
+ ]
75
+ )
76
+ dense_input_dim = sum(
77
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
78
+ )
68
79
 
69
80
  self.linear = LR(fm_emb_dim_total)
70
81
  self.fm = FM(reduce_sum=True)
@@ -73,15 +84,10 @@ class DeepFM(BaseModel):
73
84
 
74
85
  # Register regularization weights
75
86
  self._register_regularization_weights(
76
- embedding_attr='embedding',
77
- include_modules=['linear', 'mlp']
87
+ embedding_attr="embedding", include_modules=["linear", "mlp"]
78
88
  )
79
89
 
80
- self.compile(
81
- optimizer=optimizer,
82
- optimizer_params=optimizer_params,
83
- loss=loss
84
- )
90
+ self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
85
91
 
86
92
  def forward(self, x):
87
93
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)