nextrec 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +9 -10
  4. nextrec/basic/callback.py +0 -1
  5. nextrec/basic/dataloader.py +127 -168
  6. nextrec/basic/features.py +27 -24
  7. nextrec/basic/layers.py +159 -328
  8. nextrec/basic/loggers.py +37 -50
  9. nextrec/basic/metrics.py +147 -255
  10. nextrec/basic/model.py +462 -817
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +12 -16
  13. nextrec/data/preprocessor.py +252 -276
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +22 -30
  16. nextrec/loss/match_losses.py +83 -116
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +61 -70
  19. nextrec/models/match/dssm_v2.py +51 -61
  20. nextrec/models/match/mind.py +71 -89
  21. nextrec/models/match/sdm.py +81 -93
  22. nextrec/models/match/youtube_dnn.py +53 -62
  23. nextrec/models/multi_task/esmm.py +43 -49
  24. nextrec/models/multi_task/mmoe.py +56 -65
  25. nextrec/models/multi_task/ple.py +65 -92
  26. nextrec/models/multi_task/share_bottom.py +42 -48
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +30 -39
  29. nextrec/models/ranking/autoint.py +57 -70
  30. nextrec/models/ranking/dcn.py +35 -43
  31. nextrec/models/ranking/deepfm.py +28 -34
  32. nextrec/models/ranking/dien.py +79 -115
  33. nextrec/models/ranking/din.py +60 -84
  34. nextrec/models/ranking/fibinet.py +35 -51
  35. nextrec/models/ranking/fm.py +26 -28
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +31 -30
  38. nextrec/models/ranking/widedeep.py +31 -36
  39. nextrec/models/ranking/xdeepfm.py +39 -46
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +15 -23
  43. nextrec/utils/optimizer.py +10 -14
  44. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/METADATA +16 -7
  45. nextrec-0.1.7.dist-info/RECORD +51 -0
  46. nextrec-0.1.4.dist-info/RECORD +0 -51
  47. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.4.dist-info → nextrec-0.1.7.dist-info}/licenses/LICENSE +0 -0
@@ -6,7 +6,6 @@ Reference:
6
6
  [1] Huang P S, He X, Gao J, et al. Learning deep structured semantic models for web search using clickthrough data[C]
7
7
  //Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013: 2333-2338.
8
8
  """
9
-
10
9
  import torch
11
10
  import torch.nn as nn
12
11
  from typing import Optional, Literal
@@ -19,40 +18,38 @@ from nextrec.basic.layers import MLP, EmbeddingLayer
19
18
  class DSSM(BaseMatchModel):
20
19
  """
21
20
  Deep Structured Semantic Model
22
-
21
+
23
22
  双塔模型,分别对user和item特征编码为embedding,通过余弦相似度或点积计算匹配分数
24
23
  """
25
-
24
+
26
25
  @property
27
26
  def model_name(self) -> str:
28
27
  return "DSSM"
29
-
30
- def __init__(
31
- self,
32
- user_dense_features: list[DenseFeature] | None = None,
33
- user_sparse_features: list[SparseFeature] | None = None,
34
- user_sequence_features: list[SequenceFeature] | None = None,
35
- item_dense_features: list[DenseFeature] | None = None,
36
- item_sparse_features: list[SparseFeature] | None = None,
37
- item_sequence_features: list[SequenceFeature] | None = None,
38
- user_dnn_hidden_units: list[int] = [256, 128, 64],
39
- item_dnn_hidden_units: list[int] = [256, 128, 64],
40
- embedding_dim: int = 64,
41
- dnn_activation: str = "relu",
42
- dnn_dropout: float = 0.0,
43
- training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
44
- num_negative_samples: int = 4,
45
- temperature: float = 1.0,
46
- similarity_metric: Literal["dot", "cosine", "euclidean"] = "cosine",
47
- device: str = "cpu",
48
- embedding_l1_reg: float = 0.0,
49
- dense_l1_reg: float = 0.0,
50
- embedding_l2_reg: float = 0.0,
51
- dense_l2_reg: float = 0.0,
52
- early_stop_patience: int = 20,
53
- model_id: str = "dssm",
54
- ):
55
-
28
+
29
+ def __init__(self,
30
+ user_dense_features: list[DenseFeature] | None = None,
31
+ user_sparse_features: list[SparseFeature] | None = None,
32
+ user_sequence_features: list[SequenceFeature] | None = None,
33
+ item_dense_features: list[DenseFeature] | None = None,
34
+ item_sparse_features: list[SparseFeature] | None = None,
35
+ item_sequence_features: list[SequenceFeature] | None = None,
36
+ user_dnn_hidden_units: list[int] = [256, 128, 64],
37
+ item_dnn_hidden_units: list[int] = [256, 128, 64],
38
+ embedding_dim: int = 64,
39
+ dnn_activation: str = 'relu',
40
+ dnn_dropout: float = 0.0,
41
+ training_mode: Literal['pointwise', 'pairwise', 'listwise'] = 'pointwise',
42
+ num_negative_samples: int = 4,
43
+ temperature: float = 1.0,
44
+ similarity_metric: Literal['dot', 'cosine', 'euclidean'] = 'cosine',
45
+ device: str = 'cpu',
46
+ embedding_l1_reg: float = 0.0,
47
+ dense_l1_reg: float = 0.0,
48
+ embedding_l2_reg: float = 0.0,
49
+ dense_l2_reg: float = 0.0,
50
+ early_stop_patience: int = 20,
51
+ model_id: str = 'dssm'):
52
+
56
53
  super(DSSM, self).__init__(
57
54
  user_dense_features=user_dense_features,
58
55
  user_sparse_features=user_sparse_features,
@@ -70,13 +67,13 @@ class DSSM(BaseMatchModel):
70
67
  embedding_l2_reg=embedding_l2_reg,
71
68
  dense_l2_reg=dense_l2_reg,
72
69
  early_stop_patience=early_stop_patience,
73
- model_id=model_id,
70
+ model_id=model_id
74
71
  )
75
-
72
+
76
73
  self.embedding_dim = embedding_dim
77
74
  self.user_dnn_hidden_units = user_dnn_hidden_units
78
75
  self.item_dnn_hidden_units = item_dnn_hidden_units
79
-
76
+
80
77
  # User tower embedding layer
81
78
  user_features = []
82
79
  if user_dense_features:
@@ -85,10 +82,10 @@ class DSSM(BaseMatchModel):
85
82
  user_features.extend(user_sparse_features)
86
83
  if user_sequence_features:
87
84
  user_features.extend(user_sequence_features)
88
-
85
+
89
86
  if len(user_features) > 0:
90
87
  self.user_embedding = EmbeddingLayer(user_features)
91
-
88
+
92
89
  # 计算user tower输入维度
93
90
  user_input_dim = 0
94
91
  for feat in user_dense_features or []:
@@ -97,7 +94,7 @@ class DSSM(BaseMatchModel):
97
94
  user_input_dim += feat.embedding_dim
98
95
  for feat in user_sequence_features or []:
99
96
  user_input_dim += feat.embedding_dim
100
-
97
+
101
98
  # User DNN
102
99
  user_dnn_units = user_dnn_hidden_units + [embedding_dim]
103
100
  self.user_dnn = MLP(
@@ -105,9 +102,9 @@ class DSSM(BaseMatchModel):
105
102
  dims=user_dnn_units,
106
103
  output_layer=False,
107
104
  dropout=dnn_dropout,
108
- activation=dnn_activation,
105
+ activation=dnn_activation
109
106
  )
110
-
107
+
111
108
  # Item tower embedding layer
112
109
  item_features = []
113
110
  if item_dense_features:
@@ -116,10 +113,10 @@ class DSSM(BaseMatchModel):
116
113
  item_features.extend(item_sparse_features)
117
114
  if item_sequence_features:
118
115
  item_features.extend(item_sequence_features)
119
-
116
+
120
117
  if len(item_features) > 0:
121
118
  self.item_embedding = EmbeddingLayer(item_features)
122
-
119
+
123
120
  # 计算item tower输入维度
124
121
  item_input_dim = 0
125
122
  for feat in item_dense_features or []:
@@ -128,7 +125,7 @@ class DSSM(BaseMatchModel):
128
125
  item_input_dim += feat.embedding_dim
129
126
  for feat in item_sequence_features or []:
130
127
  item_input_dim += feat.embedding_dim
131
-
128
+
132
129
  # Item DNN
133
130
  item_dnn_units = item_dnn_hidden_units + [embedding_dim]
134
131
  self.item_dnn = MLP(
@@ -136,74 +133,68 @@ class DSSM(BaseMatchModel):
136
133
  dims=item_dnn_units,
137
134
  output_layer=False,
138
135
  dropout=dnn_dropout,
139
- activation=dnn_activation,
136
+ activation=dnn_activation
140
137
  )
141
-
138
+
142
139
  # 注册正则化权重
143
140
  self._register_regularization_weights(
144
- embedding_attr="user_embedding", include_modules=["user_dnn"]
141
+ embedding_attr='user_embedding',
142
+ include_modules=['user_dnn']
145
143
  )
146
144
  self._register_regularization_weights(
147
- embedding_attr="item_embedding", include_modules=["item_dnn"]
145
+ embedding_attr='item_embedding',
146
+ include_modules=['item_dnn']
148
147
  )
149
-
148
+
150
149
  self.compile(
151
150
  optimizer="adam",
152
151
  optimizer_params={"lr": 1e-3, "weight_decay": 1e-5},
153
152
  )
154
153
 
155
154
  self.to(device)
156
-
155
+
157
156
  def user_tower(self, user_input: dict) -> torch.Tensor:
158
157
  """
159
158
  User tower: 将user特征编码为embedding
160
-
159
+
161
160
  Args:
162
161
  user_input: user特征字典
163
-
162
+
164
163
  Returns:
165
164
  user_emb: [batch_size, embedding_dim]
166
165
  """
167
166
  # 获取user特征的embedding
168
- all_user_features = (
169
- self.user_dense_features
170
- + self.user_sparse_features
171
- + self.user_sequence_features
172
- )
167
+ all_user_features = self.user_dense_features + self.user_sparse_features + self.user_sequence_features
173
168
  user_emb = self.user_embedding(user_input, all_user_features, squeeze_dim=True)
174
-
169
+
175
170
  # 通过user DNN
176
171
  user_emb = self.user_dnn(user_emb)
177
-
172
+
178
173
  # L2 normalize for cosine similarity
179
- if self.similarity_metric == "cosine":
174
+ if self.similarity_metric == 'cosine':
180
175
  user_emb = torch.nn.functional.normalize(user_emb, p=2, dim=1)
181
-
176
+
182
177
  return user_emb
183
-
178
+
184
179
  def item_tower(self, item_input: dict) -> torch.Tensor:
185
180
  """
186
181
  Item tower: 将item特征编码为embedding
187
-
182
+
188
183
  Args:
189
184
  item_input: item特征字典
190
-
185
+
191
186
  Returns:
192
187
  item_emb: [batch_size, embedding_dim] 或 [batch_size, num_items, embedding_dim]
193
188
  """
194
189
  # 获取item特征的embedding
195
- all_item_features = (
196
- self.item_dense_features
197
- + self.item_sparse_features
198
- + self.item_sequence_features
199
- )
190
+ all_item_features = self.item_dense_features + self.item_sparse_features + self.item_sequence_features
200
191
  item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
201
-
192
+
202
193
  # 通过item DNN
203
194
  item_emb = self.item_dnn(item_emb)
204
-
195
+
205
196
  # L2 normalize for cosine similarity
206
- if self.similarity_metric == "cosine":
197
+ if self.similarity_metric == 'cosine':
207
198
  item_emb = torch.nn.functional.normalize(item_emb, p=2, dim=1)
208
-
199
+
209
200
  return item_emb
@@ -5,7 +5,6 @@ Author:
5
5
  Reference:
6
6
  DSSM v2 - DSSM with pairwise training using BPR loss
7
7
  """
8
-
9
8
  import torch
10
9
  import torch.nn as nn
11
10
  from typing import Literal
@@ -19,37 +18,34 @@ class DSSM_v2(BaseMatchModel):
19
18
  """
20
19
  DSSM with Pairwise Training
21
20
  """
22
-
23
21
  @property
24
22
  def model_name(self) -> str:
25
23
  return "DSSM_v2"
26
-
27
- def __init__(
28
- self,
29
- user_dense_features: list[DenseFeature] | None = None,
30
- user_sparse_features: list[SparseFeature] | None = None,
31
- user_sequence_features: list[SequenceFeature] | None = None,
32
- item_dense_features: list[DenseFeature] | None = None,
33
- item_sparse_features: list[SparseFeature] | None = None,
34
- item_sequence_features: list[SequenceFeature] | None = None,
35
- user_dnn_hidden_units: list[int] = [256, 128, 64],
36
- item_dnn_hidden_units: list[int] = [256, 128, 64],
37
- embedding_dim: int = 64,
38
- dnn_activation: str = "relu",
39
- dnn_dropout: float = 0.0,
40
- training_mode: Literal["pointwise", "pairwise", "listwise"] = "pairwise",
41
- num_negative_samples: int = 4,
42
- temperature: float = 1.0,
43
- similarity_metric: Literal["dot", "cosine", "euclidean"] = "dot",
44
- device: str = "cpu",
45
- embedding_l1_reg: float = 0.0,
46
- dense_l1_reg: float = 0.0,
47
- embedding_l2_reg: float = 0.0,
48
- dense_l2_reg: float = 0.0,
49
- early_stop_patience: int = 20,
50
- model_id: str = "dssm_v2",
51
- ):
52
-
24
+
25
+ def __init__(self,
26
+ user_dense_features: list[DenseFeature] | None = None,
27
+ user_sparse_features: list[SparseFeature] | None = None,
28
+ user_sequence_features: list[SequenceFeature] | None = None,
29
+ item_dense_features: list[DenseFeature] | None = None,
30
+ item_sparse_features: list[SparseFeature] | None = None,
31
+ item_sequence_features: list[SequenceFeature] | None = None,
32
+ user_dnn_hidden_units: list[int] = [256, 128, 64],
33
+ item_dnn_hidden_units: list[int] = [256, 128, 64],
34
+ embedding_dim: int = 64,
35
+ dnn_activation: str = 'relu',
36
+ dnn_dropout: float = 0.0,
37
+ training_mode: Literal['pointwise', 'pairwise', 'listwise'] = 'pairwise',
38
+ num_negative_samples: int = 4,
39
+ temperature: float = 1.0,
40
+ similarity_metric: Literal['dot', 'cosine', 'euclidean'] = 'dot',
41
+ device: str = 'cpu',
42
+ embedding_l1_reg: float = 0.0,
43
+ dense_l1_reg: float = 0.0,
44
+ embedding_l2_reg: float = 0.0,
45
+ dense_l2_reg: float = 0.0,
46
+ early_stop_patience: int = 20,
47
+ model_id: str = 'dssm_v2'):
48
+
53
49
  super(DSSM_v2, self).__init__(
54
50
  user_dense_features=user_dense_features,
55
51
  user_sparse_features=user_sparse_features,
@@ -67,13 +63,13 @@ class DSSM_v2(BaseMatchModel):
67
63
  embedding_l2_reg=embedding_l2_reg,
68
64
  dense_l2_reg=dense_l2_reg,
69
65
  early_stop_patience=early_stop_patience,
70
- model_id=model_id,
66
+ model_id=model_id
71
67
  )
72
-
68
+
73
69
  self.embedding_dim = embedding_dim
74
70
  self.user_dnn_hidden_units = user_dnn_hidden_units
75
71
  self.item_dnn_hidden_units = item_dnn_hidden_units
76
-
72
+
77
73
  # User tower
78
74
  user_features = []
79
75
  if user_dense_features:
@@ -82,10 +78,10 @@ class DSSM_v2(BaseMatchModel):
82
78
  user_features.extend(user_sparse_features)
83
79
  if user_sequence_features:
84
80
  user_features.extend(user_sequence_features)
85
-
81
+
86
82
  if len(user_features) > 0:
87
83
  self.user_embedding = EmbeddingLayer(user_features)
88
-
84
+
89
85
  user_input_dim = 0
90
86
  for feat in user_dense_features or []:
91
87
  user_input_dim += 1
@@ -93,16 +89,16 @@ class DSSM_v2(BaseMatchModel):
93
89
  user_input_dim += feat.embedding_dim
94
90
  for feat in user_sequence_features or []:
95
91
  user_input_dim += feat.embedding_dim
96
-
92
+
97
93
  user_dnn_units = user_dnn_hidden_units + [embedding_dim]
98
94
  self.user_dnn = MLP(
99
95
  input_dim=user_input_dim,
100
96
  dims=user_dnn_units,
101
97
  output_layer=False,
102
98
  dropout=dnn_dropout,
103
- activation=dnn_activation,
99
+ activation=dnn_activation
104
100
  )
105
-
101
+
106
102
  # Item tower
107
103
  item_features = []
108
104
  if item_dense_features:
@@ -111,10 +107,10 @@ class DSSM_v2(BaseMatchModel):
111
107
  item_features.extend(item_sparse_features)
112
108
  if item_sequence_features:
113
109
  item_features.extend(item_sequence_features)
114
-
110
+
115
111
  if len(item_features) > 0:
116
112
  self.item_embedding = EmbeddingLayer(item_features)
117
-
113
+
118
114
  item_input_dim = 0
119
115
  for feat in item_dense_features or []:
120
116
  item_input_dim += 1
@@ -122,51 +118,45 @@ class DSSM_v2(BaseMatchModel):
122
118
  item_input_dim += feat.embedding_dim
123
119
  for feat in item_sequence_features or []:
124
120
  item_input_dim += feat.embedding_dim
125
-
121
+
126
122
  item_dnn_units = item_dnn_hidden_units + [embedding_dim]
127
123
  self.item_dnn = MLP(
128
124
  input_dim=item_input_dim,
129
125
  dims=item_dnn_units,
130
126
  output_layer=False,
131
127
  dropout=dnn_dropout,
132
- activation=dnn_activation,
128
+ activation=dnn_activation
133
129
  )
134
-
130
+
135
131
  self._register_regularization_weights(
136
- embedding_attr="user_embedding", include_modules=["user_dnn"]
132
+ embedding_attr='user_embedding',
133
+ include_modules=['user_dnn']
137
134
  )
138
135
  self._register_regularization_weights(
139
- embedding_attr="item_embedding", include_modules=["item_dnn"]
136
+ embedding_attr='item_embedding',
137
+ include_modules=['item_dnn']
140
138
  )
141
-
139
+
142
140
  self.to(device)
143
-
141
+
144
142
  def user_tower(self, user_input: dict) -> torch.Tensor:
145
143
  """User tower"""
146
- all_user_features = (
147
- self.user_dense_features
148
- + self.user_sparse_features
149
- + self.user_sequence_features
150
- )
144
+ all_user_features = self.user_dense_features + self.user_sparse_features + self.user_sequence_features
151
145
  user_emb = self.user_embedding(user_input, all_user_features, squeeze_dim=True)
152
146
  user_emb = self.user_dnn(user_emb)
153
-
147
+
154
148
  # Normalization for better pairwise training
155
149
  user_emb = torch.nn.functional.normalize(user_emb, p=2, dim=1)
156
-
150
+
157
151
  return user_emb
158
-
152
+
159
153
  def item_tower(self, item_input: dict) -> torch.Tensor:
160
154
  """Item tower"""
161
- all_item_features = (
162
- self.item_dense_features
163
- + self.item_sparse_features
164
- + self.item_sequence_features
165
- )
155
+ all_item_features = self.item_dense_features + self.item_sparse_features + self.item_sequence_features
166
156
  item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
167
157
  item_emb = self.item_dnn(item_emb)
168
-
158
+
169
159
  # Normalization for better pairwise training
170
160
  item_emb = torch.nn.functional.normalize(item_emb, p=2, dim=1)
171
-
161
+
172
162
  return item_emb