nextrec 0.1.4__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +9 -10
  4. nextrec/basic/callback.py +0 -1
  5. nextrec/basic/dataloader.py +127 -168
  6. nextrec/basic/features.py +27 -24
  7. nextrec/basic/layers.py +159 -328
  8. nextrec/basic/loggers.py +37 -50
  9. nextrec/basic/metrics.py +147 -255
  10. nextrec/basic/model.py +462 -817
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +12 -16
  13. nextrec/data/preprocessor.py +252 -276
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +22 -30
  16. nextrec/loss/match_losses.py +83 -116
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +61 -70
  19. nextrec/models/match/dssm_v2.py +51 -61
  20. nextrec/models/match/mind.py +71 -89
  21. nextrec/models/match/sdm.py +81 -93
  22. nextrec/models/match/youtube_dnn.py +53 -62
  23. nextrec/models/multi_task/esmm.py +43 -49
  24. nextrec/models/multi_task/mmoe.py +56 -65
  25. nextrec/models/multi_task/ple.py +65 -92
  26. nextrec/models/multi_task/share_bottom.py +42 -48
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +30 -39
  29. nextrec/models/ranking/autoint.py +57 -70
  30. nextrec/models/ranking/dcn.py +35 -43
  31. nextrec/models/ranking/deepfm.py +28 -34
  32. nextrec/models/ranking/dien.py +79 -115
  33. nextrec/models/ranking/din.py +60 -84
  34. nextrec/models/ranking/fibinet.py +35 -51
  35. nextrec/models/ranking/fm.py +26 -28
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +31 -30
  38. nextrec/models/ranking/widedeep.py +31 -36
  39. nextrec/models/ranking/xdeepfm.py +39 -46
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +15 -23
  43. nextrec/utils/optimizer.py +10 -14
  44. {nextrec-0.1.4.dist-info → nextrec-0.1.8.dist-info}/METADATA +16 -7
  45. nextrec-0.1.8.dist-info/RECORD +51 -0
  46. nextrec-0.1.4.dist-info/RECORD +0 -51
  47. {nextrec-0.1.4.dist-info → nextrec-0.1.8.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.4.dist-info → nextrec-0.1.8.dist-info}/licenses/LICENSE +0 -0
@@ -3,8 +3,8 @@ Date: create on 09/11/2025
3
3
  Author:
4
4
  Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
- self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
6
+ [1] Song W, Shi C, Xiao Z, et al. Autoint: Automatic feature interaction learning via
7
+ self-attentive neural networks[C]//Proceedings of the 28th ACM international conference
8
8
  on information and knowledge management. 2019: 1161-1170.
9
9
  (https://arxiv.org/abs/1810.11921)
10
10
  """
@@ -25,29 +25,27 @@ class AutoInt(BaseModel):
25
25
  @property
26
26
  def task_type(self):
27
27
  return "binary"
28
-
29
- def __init__(
30
- self,
31
- dense_features: list[DenseFeature],
32
- sparse_features: list[SparseFeature],
33
- sequence_features: list[SequenceFeature],
34
- att_layer_num: int = 3,
35
- att_embedding_dim: int = 8,
36
- att_head_num: int = 2,
37
- att_dropout: float = 0.0,
38
- att_use_residual: bool = True,
39
- target: list[str] = [],
40
- optimizer: str = "adam",
41
- optimizer_params: dict = {},
42
- loss: str | nn.Module | None = "bce",
43
- device: str = "cpu",
44
- model_id: str = "baseline",
45
- embedding_l1_reg=1e-6,
46
- dense_l1_reg=1e-5,
47
- embedding_l2_reg=1e-5,
48
- dense_l2_reg=1e-4,
49
- ):
50
-
28
+
29
+ def __init__(self,
30
+ dense_features: list[DenseFeature],
31
+ sparse_features: list[SparseFeature],
32
+ sequence_features: list[SequenceFeature],
33
+ att_layer_num: int = 3,
34
+ att_embedding_dim: int = 8,
35
+ att_head_num: int = 2,
36
+ att_dropout: float = 0.0,
37
+ att_use_residual: bool = True,
38
+ target: list[str] = [],
39
+ optimizer: str = "adam",
40
+ optimizer_params: dict = {},
41
+ loss: str | nn.Module | None = "bce",
42
+ device: str = 'cpu',
43
+ model_id: str = "baseline",
44
+ embedding_l1_reg=1e-6,
45
+ dense_l1_reg=1e-5,
46
+ embedding_l2_reg=1e-5,
47
+ dense_l2_reg=1e-4):
48
+
51
49
  super(AutoInt, self).__init__(
52
50
  dense_features=dense_features,
53
51
  sparse_features=sparse_features,
@@ -60,94 +58,83 @@ class AutoInt(BaseModel):
60
58
  embedding_l2_reg=embedding_l2_reg,
61
59
  dense_l2_reg=dense_l2_reg,
62
60
  early_stop_patience=20,
63
- model_id=model_id,
61
+ model_id=model_id
64
62
  )
65
63
 
66
64
  self.loss = loss
67
65
  if self.loss is None:
68
66
  self.loss = "bce"
69
-
67
+
70
68
  self.att_layer_num = att_layer_num
71
69
  self.att_embedding_dim = att_embedding_dim
72
-
70
+
73
71
  # Use sparse and sequence features for interaction
74
72
  self.interaction_features = sparse_features + sequence_features
75
-
73
+
76
74
  # All features for embedding
77
75
  self.all_features = dense_features + sparse_features + sequence_features
78
76
 
79
77
  # Embedding layer
80
78
  self.embedding = EmbeddingLayer(features=self.all_features)
81
-
79
+
82
80
  # Project embeddings to attention embedding dimension
83
81
  num_fields = len(self.interaction_features)
84
82
  total_embedding_dim = sum([f.embedding_dim for f in self.interaction_features])
85
-
83
+
86
84
  # If embeddings have different dimensions, project them to att_embedding_dim
87
- self.need_projection = not all(
88
- f.embedding_dim == att_embedding_dim for f in self.interaction_features
89
- )
85
+ self.need_projection = not all(f.embedding_dim == att_embedding_dim for f in self.interaction_features)
90
86
  self.projection_layers = None
91
87
  if self.need_projection:
92
- self.projection_layers = nn.ModuleList(
93
- [
94
- nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
95
- for f in self.interaction_features
96
- ]
97
- )
98
-
88
+ self.projection_layers = nn.ModuleList([
89
+ nn.Linear(f.embedding_dim, att_embedding_dim, bias=False)
90
+ for f in self.interaction_features
91
+ ])
92
+
99
93
  # Multi-head self-attention layers
100
- self.attention_layers = nn.ModuleList(
101
- [
102
- MultiHeadSelfAttention(
103
- embedding_dim=att_embedding_dim,
104
- num_heads=att_head_num,
105
- dropout=att_dropout,
106
- use_residual=att_use_residual,
107
- )
108
- for _ in range(att_layer_num)
109
- ]
110
- )
111
-
94
+ self.attention_layers = nn.ModuleList([
95
+ MultiHeadSelfAttention(
96
+ embedding_dim=att_embedding_dim,
97
+ num_heads=att_head_num,
98
+ dropout=att_dropout,
99
+ use_residual=att_use_residual
100
+ ) for _ in range(att_layer_num)
101
+ ])
102
+
112
103
  # Final prediction layer
113
104
  self.fc = nn.Linear(num_fields * att_embedding_dim, 1)
114
105
  self.prediction_layer = PredictionLayer(task_type=self.task_type)
115
106
 
116
107
  # Register regularization weights
117
108
  self._register_regularization_weights(
118
- embedding_attr="embedding",
119
- include_modules=["projection_layers", "attention_layers", "fc"],
109
+ embedding_attr='embedding',
110
+ include_modules=['projection_layers', 'attention_layers', 'fc']
120
111
  )
121
112
 
122
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
113
+ self.compile(
114
+ optimizer=optimizer,
115
+ optimizer_params=optimizer_params,
116
+ loss=loss
117
+ )
123
118
 
124
119
  def forward(self, x):
125
120
  # Get embeddings field-by-field so mixed dimensions can be projected safely
126
121
  field_embeddings = []
127
122
  if len(self.interaction_features) == 0:
128
- raise ValueError(
129
- "AutoInt requires at least one sparse or sequence feature for interactions."
130
- )
123
+ raise ValueError("AutoInt requires at least one sparse or sequence feature for interactions.")
131
124
  for idx, feature in enumerate(self.interaction_features):
132
125
  feature_emb = self.embedding(x=x, features=[feature], squeeze_dim=False)
133
126
  feature_emb = feature_emb.squeeze(1) # [B, embedding_dim]
134
127
  if self.need_projection and self.projection_layers is not None:
135
128
  feature_emb = self.projection_layers[idx](feature_emb)
136
- field_embeddings.append(
137
- feature_emb.unsqueeze(1)
138
- ) # [B, 1, att_embedding_dim or original_dim]
129
+ field_embeddings.append(feature_emb.unsqueeze(1)) # [B, 1, att_embedding_dim or original_dim]
139
130
  embeddings = torch.cat(field_embeddings, dim=1)
140
-
131
+
141
132
  # Apply multi-head self-attention layers
142
133
  attention_output = embeddings
143
134
  for att_layer in self.attention_layers:
144
- attention_output = att_layer(
145
- attention_output
146
- ) # [B, num_fields, att_embedding_dim]
147
-
135
+ attention_output = att_layer(attention_output) # [B, num_fields, att_embedding_dim]
136
+
148
137
  # Flatten and predict
149
- attention_output_flat = attention_output.flatten(
150
- start_dim=1
151
- ) # [B, num_fields * att_embedding_dim]
138
+ attention_output_flat = attention_output.flatten(start_dim=1) # [B, num_fields * att_embedding_dim]
152
139
  y = self.fc(attention_output_flat) # [B, 1]
153
140
  return self.prediction_layer(y)
@@ -24,26 +24,24 @@ class DCN(BaseModel):
24
24
  @property
25
25
  def task_type(self):
26
26
  return "binary"
27
-
28
- def __init__(
29
- self,
30
- dense_features: list[DenseFeature],
31
- sparse_features: list[SparseFeature],
32
- sequence_features: list[SequenceFeature],
33
- cross_num: int = 3,
34
- mlp_params: dict | None = None,
35
- target: list[str] = [],
36
- optimizer: str = "adam",
37
- optimizer_params: dict = {},
38
- loss: str | nn.Module | None = "bce",
39
- device: str = "cpu",
40
- model_id: str = "baseline",
41
- embedding_l1_reg=1e-6,
42
- dense_l1_reg=1e-5,
43
- embedding_l2_reg=1e-5,
44
- dense_l2_reg=1e-4,
45
- ):
46
-
27
+
28
+ def __init__(self,
29
+ dense_features: list[DenseFeature],
30
+ sparse_features: list[SparseFeature],
31
+ sequence_features: list[SequenceFeature],
32
+ cross_num: int = 3,
33
+ mlp_params: dict | None = None,
34
+ target: list[str] = [],
35
+ optimizer: str = "adam",
36
+ optimizer_params: dict = {},
37
+ loss: str | nn.Module | None = "bce",
38
+ device: str = 'cpu',
39
+ model_id: str = "baseline",
40
+ embedding_l1_reg=1e-6,
41
+ dense_l1_reg=1e-5,
42
+ embedding_l2_reg=1e-5,
43
+ dense_l2_reg=1e-4):
44
+
47
45
  super(DCN, self).__init__(
48
46
  dense_features=dense_features,
49
47
  sparse_features=sparse_features,
@@ -56,13 +54,13 @@ class DCN(BaseModel):
56
54
  embedding_l2_reg=embedding_l2_reg,
57
55
  dense_l2_reg=dense_l2_reg,
58
56
  early_stop_patience=20,
59
- model_id=model_id,
57
+ model_id=model_id
60
58
  )
61
59
 
62
60
  self.loss = loss
63
61
  if self.loss is None:
64
62
  self.loss = "bce"
65
-
63
+
66
64
  # All features
67
65
  self.all_features = dense_features + sparse_features + sequence_features
68
66
 
@@ -70,21 +68,13 @@ class DCN(BaseModel):
70
68
  self.embedding = EmbeddingLayer(features=self.all_features)
71
69
 
72
70
  # Calculate input dimension
73
- emb_dim_total = sum(
74
- [
75
- f.embedding_dim
76
- for f in self.all_features
77
- if not isinstance(f, DenseFeature)
78
- ]
79
- )
80
- dense_input_dim = sum(
81
- [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
82
- )
71
+ emb_dim_total = sum([f.embedding_dim for f in self.all_features if not isinstance(f, DenseFeature)])
72
+ dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
83
73
  input_dim = emb_dim_total + dense_input_dim
84
-
74
+
85
75
  # Cross Network
86
76
  self.cross_network = CrossNetwork(input_dim=input_dim, num_layers=cross_num)
87
-
77
+
88
78
  # Deep Network (optional)
89
79
  if mlp_params is not None:
90
80
  self.use_dnn = True
@@ -100,29 +90,31 @@ class DCN(BaseModel):
100
90
 
101
91
  # Register regularization weights
102
92
  self._register_regularization_weights(
103
- embedding_attr="embedding",
104
- include_modules=["cross_network", "mlp", "final_layer"],
93
+ embedding_attr='embedding',
94
+ include_modules=['cross_network', 'mlp', 'final_layer']
105
95
  )
106
96
 
107
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
97
+ self.compile(
98
+ optimizer=optimizer,
99
+ optimizer_params=optimizer_params,
100
+ loss=loss
101
+ )
108
102
 
109
103
  def forward(self, x):
110
104
  # Get all embeddings and flatten
111
105
  input_flat = self.embedding(x=x, features=self.all_features, squeeze_dim=True)
112
-
106
+
113
107
  # Cross Network
114
108
  cross_output = self.cross_network(input_flat) # [B, input_dim]
115
-
109
+
116
110
  if self.use_dnn:
117
111
  # Deep Network
118
112
  deep_output = self.mlp(input_flat) # [B, 1]
119
113
  # Concatenate cross and deep
120
- combined = torch.cat(
121
- [cross_output, deep_output], dim=-1
122
- ) # [B, input_dim + 1]
114
+ combined = torch.cat([cross_output, deep_output], dim=-1) # [B, input_dim + 1]
123
115
  else:
124
116
  combined = cross_output
125
-
117
+
126
118
  # Final prediction
127
119
  y = self.final_layer(combined)
128
120
  return self.prediction_layer(y)
@@ -13,7 +13,6 @@ from nextrec.basic.model import BaseModel
13
13
  from nextrec.basic.layers import FM, LR, EmbeddingLayer, MLP, PredictionLayer
14
14
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
15
15
 
16
-
17
16
  class DeepFM(BaseModel):
18
17
  @property
19
18
  def model_name(self):
@@ -22,25 +21,23 @@ class DeepFM(BaseModel):
22
21
  @property
23
22
  def task_type(self):
24
23
  return "binary"
25
-
26
- def __init__(
27
- self,
28
- dense_features: list[DenseFeature] | list = [],
29
- sparse_features: list[SparseFeature] | list = [],
30
- sequence_features: list[SequenceFeature] | list = [],
31
- mlp_params: dict = {},
32
- target: list[str] | str = [],
33
- optimizer: str = "adam",
34
- optimizer_params: dict = {},
35
- loss: str | nn.Module | None = "bce",
36
- device: str = "cpu",
37
- model_id: str = "baseline",
38
- embedding_l1_reg=1e-6,
39
- dense_l1_reg=1e-5,
40
- embedding_l2_reg=1e-5,
41
- dense_l2_reg=1e-4,
42
- ):
43
-
24
+
25
+ def __init__(self,
26
+ dense_features: list[DenseFeature]|list = [],
27
+ sparse_features: list[SparseFeature]|list = [],
28
+ sequence_features: list[SequenceFeature]|list = [],
29
+ mlp_params: dict = {},
30
+ target: list[str]|str = [],
31
+ optimizer: str = "adam",
32
+ optimizer_params: dict = {},
33
+ loss: str | nn.Module | None = "bce",
34
+ device: str = 'cpu',
35
+ model_id: str = "baseline",
36
+ embedding_l1_reg=1e-6,
37
+ dense_l1_reg=1e-5,
38
+ embedding_l2_reg=1e-5,
39
+ dense_l2_reg=1e-4):
40
+
44
41
  super(DeepFM, self).__init__(
45
42
  dense_features=dense_features,
46
43
  sparse_features=sparse_features,
@@ -53,29 +50,21 @@ class DeepFM(BaseModel):
53
50
  embedding_l2_reg=embedding_l2_reg,
54
51
  dense_l2_reg=dense_l2_reg,
55
52
  early_stop_patience=20,
56
- model_id=model_id,
53
+ model_id=model_id
57
54
  )
58
55
 
59
56
  self.loss = loss
60
57
  if self.loss is None:
61
58
  self.loss = "bce"
62
-
59
+
63
60
  self.fm_features = sparse_features + sequence_features
64
61
  self.deep_features = dense_features + sparse_features + sequence_features
65
62
 
66
63
  self.embedding = EmbeddingLayer(features=self.deep_features)
67
64
 
68
65
  fm_emb_dim_total = sum([f.embedding_dim for f in self.fm_features])
69
- deep_emb_dim_total = sum(
70
- [
71
- f.embedding_dim
72
- for f in self.deep_features
73
- if not isinstance(f, DenseFeature)
74
- ]
75
- )
76
- dense_input_dim = sum(
77
- [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
78
- )
66
+ deep_emb_dim_total = sum([f.embedding_dim for f in self.deep_features if not isinstance(f, DenseFeature)])
67
+ dense_input_dim = sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
79
68
 
80
69
  self.linear = LR(fm_emb_dim_total)
81
70
  self.fm = FM(reduce_sum=True)
@@ -84,10 +73,15 @@ class DeepFM(BaseModel):
84
73
 
85
74
  # Register regularization weights
86
75
  self._register_regularization_weights(
87
- embedding_attr="embedding", include_modules=["linear", "mlp"]
76
+ embedding_attr='embedding',
77
+ include_modules=['linear', 'mlp']
88
78
  )
89
79
 
90
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss)
80
+ self.compile(
81
+ optimizer=optimizer,
82
+ optimizer_params=optimizer_params,
83
+ loss=loss
84
+ )
91
85
 
92
86
  def forward(self, x):
93
87
  input_deep = self.embedding(x=x, features=self.deep_features, squeeze_dim=True)