nextrec 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. nextrec/__init__.py +4 -4
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -9
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/dataloader.py +168 -127
  6. nextrec/basic/features.py +24 -27
  7. nextrec/basic/layers.py +328 -159
  8. nextrec/basic/loggers.py +50 -37
  9. nextrec/basic/metrics.py +255 -147
  10. nextrec/basic/model.py +817 -462
  11. nextrec/data/__init__.py +5 -5
  12. nextrec/data/data_utils.py +16 -12
  13. nextrec/data/preprocessor.py +276 -252
  14. nextrec/loss/__init__.py +12 -12
  15. nextrec/loss/loss_utils.py +30 -22
  16. nextrec/loss/match_losses.py +116 -83
  17. nextrec/models/match/__init__.py +5 -5
  18. nextrec/models/match/dssm.py +70 -61
  19. nextrec/models/match/dssm_v2.py +61 -51
  20. nextrec/models/match/mind.py +89 -71
  21. nextrec/models/match/sdm.py +93 -81
  22. nextrec/models/match/youtube_dnn.py +62 -53
  23. nextrec/models/multi_task/esmm.py +49 -43
  24. nextrec/models/multi_task/mmoe.py +65 -56
  25. nextrec/models/multi_task/ple.py +92 -65
  26. nextrec/models/multi_task/share_bottom.py +48 -42
  27. nextrec/models/ranking/__init__.py +7 -7
  28. nextrec/models/ranking/afm.py +39 -30
  29. nextrec/models/ranking/autoint.py +70 -57
  30. nextrec/models/ranking/dcn.py +43 -35
  31. nextrec/models/ranking/deepfm.py +34 -28
  32. nextrec/models/ranking/dien.py +115 -79
  33. nextrec/models/ranking/din.py +84 -60
  34. nextrec/models/ranking/fibinet.py +51 -35
  35. nextrec/models/ranking/fm.py +28 -26
  36. nextrec/models/ranking/masknet.py +31 -31
  37. nextrec/models/ranking/pnn.py +30 -31
  38. nextrec/models/ranking/widedeep.py +36 -31
  39. nextrec/models/ranking/xdeepfm.py +46 -39
  40. nextrec/utils/__init__.py +9 -9
  41. nextrec/utils/embedding.py +1 -1
  42. nextrec/utils/initializer.py +23 -15
  43. nextrec/utils/optimizer.py +14 -10
  44. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/METADATA +6 -40
  45. nextrec-0.1.2.dist-info/RECORD +51 -0
  46. nextrec-0.1.1.dist-info/RECORD +0 -51
  47. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/WHEEL +0 -0
  48. {nextrec-0.1.1.dist-info → nextrec-0.1.2.dist-info}/licenses/LICENSE +0 -0
@@ -6,6 +6,7 @@ Reference:
6
6
  [1] Huang P S, He X, Gao J, et al. Learning deep structured semantic models for web search using clickthrough data[C]
7
7
  //Proceedings of the 22nd ACM international conference on Information & Knowledge Management. 2013: 2333-2338.
8
8
  """
9
+
9
10
  import torch
10
11
  import torch.nn as nn
11
12
  from typing import Optional, Literal
@@ -18,38 +19,40 @@ from nextrec.basic.layers import MLP, EmbeddingLayer
18
19
  class DSSM(BaseMatchModel):
19
20
  """
20
21
  Deep Structured Semantic Model
21
-
22
+
22
23
  双塔模型,分别对user和item特征编码为embedding,通过余弦相似度或点积计算匹配分数
23
24
  """
24
-
25
+
25
26
  @property
26
27
  def model_name(self) -> str:
27
28
  return "DSSM"
28
-
29
- def __init__(self,
30
- user_dense_features: list[DenseFeature] | None = None,
31
- user_sparse_features: list[SparseFeature] | None = None,
32
- user_sequence_features: list[SequenceFeature] | None = None,
33
- item_dense_features: list[DenseFeature] | None = None,
34
- item_sparse_features: list[SparseFeature] | None = None,
35
- item_sequence_features: list[SequenceFeature] | None = None,
36
- user_dnn_hidden_units: list[int] = [256, 128, 64],
37
- item_dnn_hidden_units: list[int] = [256, 128, 64],
38
- embedding_dim: int = 64,
39
- dnn_activation: str = 'relu',
40
- dnn_dropout: float = 0.0,
41
- training_mode: Literal['pointwise', 'pairwise', 'listwise'] = 'pointwise',
42
- num_negative_samples: int = 4,
43
- temperature: float = 1.0,
44
- similarity_metric: Literal['dot', 'cosine', 'euclidean'] = 'cosine',
45
- device: str = 'cpu',
46
- embedding_l1_reg: float = 0.0,
47
- dense_l1_reg: float = 0.0,
48
- embedding_l2_reg: float = 0.0,
49
- dense_l2_reg: float = 0.0,
50
- early_stop_patience: int = 20,
51
- model_id: str = 'dssm'):
52
-
29
+
30
+ def __init__(
31
+ self,
32
+ user_dense_features: list[DenseFeature] | None = None,
33
+ user_sparse_features: list[SparseFeature] | None = None,
34
+ user_sequence_features: list[SequenceFeature] | None = None,
35
+ item_dense_features: list[DenseFeature] | None = None,
36
+ item_sparse_features: list[SparseFeature] | None = None,
37
+ item_sequence_features: list[SequenceFeature] | None = None,
38
+ user_dnn_hidden_units: list[int] = [256, 128, 64],
39
+ item_dnn_hidden_units: list[int] = [256, 128, 64],
40
+ embedding_dim: int = 64,
41
+ dnn_activation: str = "relu",
42
+ dnn_dropout: float = 0.0,
43
+ training_mode: Literal["pointwise", "pairwise", "listwise"] = "pointwise",
44
+ num_negative_samples: int = 4,
45
+ temperature: float = 1.0,
46
+ similarity_metric: Literal["dot", "cosine", "euclidean"] = "cosine",
47
+ device: str = "cpu",
48
+ embedding_l1_reg: float = 0.0,
49
+ dense_l1_reg: float = 0.0,
50
+ embedding_l2_reg: float = 0.0,
51
+ dense_l2_reg: float = 0.0,
52
+ early_stop_patience: int = 20,
53
+ model_id: str = "dssm",
54
+ ):
55
+
53
56
  super(DSSM, self).__init__(
54
57
  user_dense_features=user_dense_features,
55
58
  user_sparse_features=user_sparse_features,
@@ -67,13 +70,13 @@ class DSSM(BaseMatchModel):
67
70
  embedding_l2_reg=embedding_l2_reg,
68
71
  dense_l2_reg=dense_l2_reg,
69
72
  early_stop_patience=early_stop_patience,
70
- model_id=model_id
73
+ model_id=model_id,
71
74
  )
72
-
75
+
73
76
  self.embedding_dim = embedding_dim
74
77
  self.user_dnn_hidden_units = user_dnn_hidden_units
75
78
  self.item_dnn_hidden_units = item_dnn_hidden_units
76
-
79
+
77
80
  # User tower embedding layer
78
81
  user_features = []
79
82
  if user_dense_features:
@@ -82,10 +85,10 @@ class DSSM(BaseMatchModel):
82
85
  user_features.extend(user_sparse_features)
83
86
  if user_sequence_features:
84
87
  user_features.extend(user_sequence_features)
85
-
88
+
86
89
  if len(user_features) > 0:
87
90
  self.user_embedding = EmbeddingLayer(user_features)
88
-
91
+
89
92
  # 计算user tower输入维度
90
93
  user_input_dim = 0
91
94
  for feat in user_dense_features or []:
@@ -94,7 +97,7 @@ class DSSM(BaseMatchModel):
94
97
  user_input_dim += feat.embedding_dim
95
98
  for feat in user_sequence_features or []:
96
99
  user_input_dim += feat.embedding_dim
97
-
100
+
98
101
  # User DNN
99
102
  user_dnn_units = user_dnn_hidden_units + [embedding_dim]
100
103
  self.user_dnn = MLP(
@@ -102,9 +105,9 @@ class DSSM(BaseMatchModel):
102
105
  dims=user_dnn_units,
103
106
  output_layer=False,
104
107
  dropout=dnn_dropout,
105
- activation=dnn_activation
108
+ activation=dnn_activation,
106
109
  )
107
-
110
+
108
111
  # Item tower embedding layer
109
112
  item_features = []
110
113
  if item_dense_features:
@@ -113,10 +116,10 @@ class DSSM(BaseMatchModel):
113
116
  item_features.extend(item_sparse_features)
114
117
  if item_sequence_features:
115
118
  item_features.extend(item_sequence_features)
116
-
119
+
117
120
  if len(item_features) > 0:
118
121
  self.item_embedding = EmbeddingLayer(item_features)
119
-
122
+
120
123
  # 计算item tower输入维度
121
124
  item_input_dim = 0
122
125
  for feat in item_dense_features or []:
@@ -125,7 +128,7 @@ class DSSM(BaseMatchModel):
125
128
  item_input_dim += feat.embedding_dim
126
129
  for feat in item_sequence_features or []:
127
130
  item_input_dim += feat.embedding_dim
128
-
131
+
129
132
  # Item DNN
130
133
  item_dnn_units = item_dnn_hidden_units + [embedding_dim]
131
134
  self.item_dnn = MLP(
@@ -133,68 +136,74 @@ class DSSM(BaseMatchModel):
133
136
  dims=item_dnn_units,
134
137
  output_layer=False,
135
138
  dropout=dnn_dropout,
136
- activation=dnn_activation
139
+ activation=dnn_activation,
137
140
  )
138
-
141
+
139
142
  # 注册正则化权重
140
143
  self._register_regularization_weights(
141
- embedding_attr='user_embedding',
142
- include_modules=['user_dnn']
144
+ embedding_attr="user_embedding", include_modules=["user_dnn"]
143
145
  )
144
146
  self._register_regularization_weights(
145
- embedding_attr='item_embedding',
146
- include_modules=['item_dnn']
147
+ embedding_attr="item_embedding", include_modules=["item_dnn"]
147
148
  )
148
-
149
+
149
150
  self.compile(
150
151
  optimizer="adam",
151
152
  optimizer_params={"lr": 1e-3, "weight_decay": 1e-5},
152
153
  )
153
154
 
154
155
  self.to(device)
155
-
156
+
156
157
  def user_tower(self, user_input: dict) -> torch.Tensor:
157
158
  """
158
159
  User tower: 将user特征编码为embedding
159
-
160
+
160
161
  Args:
161
162
  user_input: user特征字典
162
-
163
+
163
164
  Returns:
164
165
  user_emb: [batch_size, embedding_dim]
165
166
  """
166
167
  # 获取user特征的embedding
167
- all_user_features = self.user_dense_features + self.user_sparse_features + self.user_sequence_features
168
+ all_user_features = (
169
+ self.user_dense_features
170
+ + self.user_sparse_features
171
+ + self.user_sequence_features
172
+ )
168
173
  user_emb = self.user_embedding(user_input, all_user_features, squeeze_dim=True)
169
-
174
+
170
175
  # 通过user DNN
171
176
  user_emb = self.user_dnn(user_emb)
172
-
177
+
173
178
  # L2 normalize for cosine similarity
174
- if self.similarity_metric == 'cosine':
179
+ if self.similarity_metric == "cosine":
175
180
  user_emb = torch.nn.functional.normalize(user_emb, p=2, dim=1)
176
-
181
+
177
182
  return user_emb
178
-
183
+
179
184
  def item_tower(self, item_input: dict) -> torch.Tensor:
180
185
  """
181
186
  Item tower: 将item特征编码为embedding
182
-
187
+
183
188
  Args:
184
189
  item_input: item特征字典
185
-
190
+
186
191
  Returns:
187
192
  item_emb: [batch_size, embedding_dim] 或 [batch_size, num_items, embedding_dim]
188
193
  """
189
194
  # 获取item特征的embedding
190
- all_item_features = self.item_dense_features + self.item_sparse_features + self.item_sequence_features
195
+ all_item_features = (
196
+ self.item_dense_features
197
+ + self.item_sparse_features
198
+ + self.item_sequence_features
199
+ )
191
200
  item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
192
-
201
+
193
202
  # 通过item DNN
194
203
  item_emb = self.item_dnn(item_emb)
195
-
204
+
196
205
  # L2 normalize for cosine similarity
197
- if self.similarity_metric == 'cosine':
206
+ if self.similarity_metric == "cosine":
198
207
  item_emb = torch.nn.functional.normalize(item_emb, p=2, dim=1)
199
-
208
+
200
209
  return item_emb
@@ -5,6 +5,7 @@ Author:
5
5
  Reference:
6
6
  DSSM v2 - DSSM with pairwise training using BPR loss
7
7
  """
8
+
8
9
  import torch
9
10
  import torch.nn as nn
10
11
  from typing import Literal
@@ -18,34 +19,37 @@ class DSSM_v2(BaseMatchModel):
18
19
  """
19
20
  DSSM with Pairwise Training
20
21
  """
22
+
21
23
  @property
22
24
  def model_name(self) -> str:
23
25
  return "DSSM_v2"
24
-
25
- def __init__(self,
26
- user_dense_features: list[DenseFeature] | None = None,
27
- user_sparse_features: list[SparseFeature] | None = None,
28
- user_sequence_features: list[SequenceFeature] | None = None,
29
- item_dense_features: list[DenseFeature] | None = None,
30
- item_sparse_features: list[SparseFeature] | None = None,
31
- item_sequence_features: list[SequenceFeature] | None = None,
32
- user_dnn_hidden_units: list[int] = [256, 128, 64],
33
- item_dnn_hidden_units: list[int] = [256, 128, 64],
34
- embedding_dim: int = 64,
35
- dnn_activation: str = 'relu',
36
- dnn_dropout: float = 0.0,
37
- training_mode: Literal['pointwise', 'pairwise', 'listwise'] = 'pairwise',
38
- num_negative_samples: int = 4,
39
- temperature: float = 1.0,
40
- similarity_metric: Literal['dot', 'cosine', 'euclidean'] = 'dot',
41
- device: str = 'cpu',
42
- embedding_l1_reg: float = 0.0,
43
- dense_l1_reg: float = 0.0,
44
- embedding_l2_reg: float = 0.0,
45
- dense_l2_reg: float = 0.0,
46
- early_stop_patience: int = 20,
47
- model_id: str = 'dssm_v2'):
48
-
26
+
27
+ def __init__(
28
+ self,
29
+ user_dense_features: list[DenseFeature] | None = None,
30
+ user_sparse_features: list[SparseFeature] | None = None,
31
+ user_sequence_features: list[SequenceFeature] | None = None,
32
+ item_dense_features: list[DenseFeature] | None = None,
33
+ item_sparse_features: list[SparseFeature] | None = None,
34
+ item_sequence_features: list[SequenceFeature] | None = None,
35
+ user_dnn_hidden_units: list[int] = [256, 128, 64],
36
+ item_dnn_hidden_units: list[int] = [256, 128, 64],
37
+ embedding_dim: int = 64,
38
+ dnn_activation: str = "relu",
39
+ dnn_dropout: float = 0.0,
40
+ training_mode: Literal["pointwise", "pairwise", "listwise"] = "pairwise",
41
+ num_negative_samples: int = 4,
42
+ temperature: float = 1.0,
43
+ similarity_metric: Literal["dot", "cosine", "euclidean"] = "dot",
44
+ device: str = "cpu",
45
+ embedding_l1_reg: float = 0.0,
46
+ dense_l1_reg: float = 0.0,
47
+ embedding_l2_reg: float = 0.0,
48
+ dense_l2_reg: float = 0.0,
49
+ early_stop_patience: int = 20,
50
+ model_id: str = "dssm_v2",
51
+ ):
52
+
49
53
  super(DSSM_v2, self).__init__(
50
54
  user_dense_features=user_dense_features,
51
55
  user_sparse_features=user_sparse_features,
@@ -63,13 +67,13 @@ class DSSM_v2(BaseMatchModel):
63
67
  embedding_l2_reg=embedding_l2_reg,
64
68
  dense_l2_reg=dense_l2_reg,
65
69
  early_stop_patience=early_stop_patience,
66
- model_id=model_id
70
+ model_id=model_id,
67
71
  )
68
-
72
+
69
73
  self.embedding_dim = embedding_dim
70
74
  self.user_dnn_hidden_units = user_dnn_hidden_units
71
75
  self.item_dnn_hidden_units = item_dnn_hidden_units
72
-
76
+
73
77
  # User tower
74
78
  user_features = []
75
79
  if user_dense_features:
@@ -78,10 +82,10 @@ class DSSM_v2(BaseMatchModel):
78
82
  user_features.extend(user_sparse_features)
79
83
  if user_sequence_features:
80
84
  user_features.extend(user_sequence_features)
81
-
85
+
82
86
  if len(user_features) > 0:
83
87
  self.user_embedding = EmbeddingLayer(user_features)
84
-
88
+
85
89
  user_input_dim = 0
86
90
  for feat in user_dense_features or []:
87
91
  user_input_dim += 1
@@ -89,16 +93,16 @@ class DSSM_v2(BaseMatchModel):
89
93
  user_input_dim += feat.embedding_dim
90
94
  for feat in user_sequence_features or []:
91
95
  user_input_dim += feat.embedding_dim
92
-
96
+
93
97
  user_dnn_units = user_dnn_hidden_units + [embedding_dim]
94
98
  self.user_dnn = MLP(
95
99
  input_dim=user_input_dim,
96
100
  dims=user_dnn_units,
97
101
  output_layer=False,
98
102
  dropout=dnn_dropout,
99
- activation=dnn_activation
103
+ activation=dnn_activation,
100
104
  )
101
-
105
+
102
106
  # Item tower
103
107
  item_features = []
104
108
  if item_dense_features:
@@ -107,10 +111,10 @@ class DSSM_v2(BaseMatchModel):
107
111
  item_features.extend(item_sparse_features)
108
112
  if item_sequence_features:
109
113
  item_features.extend(item_sequence_features)
110
-
114
+
111
115
  if len(item_features) > 0:
112
116
  self.item_embedding = EmbeddingLayer(item_features)
113
-
117
+
114
118
  item_input_dim = 0
115
119
  for feat in item_dense_features or []:
116
120
  item_input_dim += 1
@@ -118,45 +122,51 @@ class DSSM_v2(BaseMatchModel):
118
122
  item_input_dim += feat.embedding_dim
119
123
  for feat in item_sequence_features or []:
120
124
  item_input_dim += feat.embedding_dim
121
-
125
+
122
126
  item_dnn_units = item_dnn_hidden_units + [embedding_dim]
123
127
  self.item_dnn = MLP(
124
128
  input_dim=item_input_dim,
125
129
  dims=item_dnn_units,
126
130
  output_layer=False,
127
131
  dropout=dnn_dropout,
128
- activation=dnn_activation
132
+ activation=dnn_activation,
129
133
  )
130
-
134
+
131
135
  self._register_regularization_weights(
132
- embedding_attr='user_embedding',
133
- include_modules=['user_dnn']
136
+ embedding_attr="user_embedding", include_modules=["user_dnn"]
134
137
  )
135
138
  self._register_regularization_weights(
136
- embedding_attr='item_embedding',
137
- include_modules=['item_dnn']
139
+ embedding_attr="item_embedding", include_modules=["item_dnn"]
138
140
  )
139
-
141
+
140
142
  self.to(device)
141
-
143
+
142
144
  def user_tower(self, user_input: dict) -> torch.Tensor:
143
145
  """User tower"""
144
- all_user_features = self.user_dense_features + self.user_sparse_features + self.user_sequence_features
146
+ all_user_features = (
147
+ self.user_dense_features
148
+ + self.user_sparse_features
149
+ + self.user_sequence_features
150
+ )
145
151
  user_emb = self.user_embedding(user_input, all_user_features, squeeze_dim=True)
146
152
  user_emb = self.user_dnn(user_emb)
147
-
153
+
148
154
  # Normalization for better pairwise training
149
155
  user_emb = torch.nn.functional.normalize(user_emb, p=2, dim=1)
150
-
156
+
151
157
  return user_emb
152
-
158
+
153
159
  def item_tower(self, item_input: dict) -> torch.Tensor:
154
160
  """Item tower"""
155
- all_item_features = self.item_dense_features + self.item_sparse_features + self.item_sequence_features
161
+ all_item_features = (
162
+ self.item_dense_features
163
+ + self.item_sparse_features
164
+ + self.item_sequence_features
165
+ )
156
166
  item_emb = self.item_embedding(item_input, all_item_features, squeeze_dim=True)
157
167
  item_emb = self.item_dnn(item_emb)
158
-
168
+
159
169
  # Normalization for better pairwise training
160
170
  item_emb = torch.nn.functional.normalize(item_emb, p=2, dim=1)
161
-
171
+
162
172
  return item_emb