nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. nextrec/__init__.py +1 -1
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -5
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/features.py +30 -22
  6. nextrec/basic/layers.py +244 -113
  7. nextrec/basic/loggers.py +62 -43
  8. nextrec/basic/metrics.py +268 -119
  9. nextrec/basic/model.py +1373 -443
  10. nextrec/basic/session.py +10 -3
  11. nextrec/cli.py +498 -0
  12. nextrec/data/__init__.py +19 -25
  13. nextrec/data/batch_utils.py +11 -3
  14. nextrec/data/data_processing.py +42 -24
  15. nextrec/data/data_utils.py +26 -15
  16. nextrec/data/dataloader.py +303 -96
  17. nextrec/data/preprocessor.py +320 -199
  18. nextrec/loss/listwise.py +17 -9
  19. nextrec/loss/loss_utils.py +7 -8
  20. nextrec/loss/pairwise.py +2 -0
  21. nextrec/loss/pointwise.py +30 -12
  22. nextrec/models/generative/hstu.py +106 -40
  23. nextrec/models/match/dssm.py +82 -69
  24. nextrec/models/match/dssm_v2.py +72 -58
  25. nextrec/models/match/mind.py +175 -108
  26. nextrec/models/match/sdm.py +104 -88
  27. nextrec/models/match/youtube_dnn.py +73 -60
  28. nextrec/models/multi_task/esmm.py +53 -39
  29. nextrec/models/multi_task/mmoe.py +70 -47
  30. nextrec/models/multi_task/ple.py +107 -50
  31. nextrec/models/multi_task/poso.py +121 -41
  32. nextrec/models/multi_task/share_bottom.py +54 -38
  33. nextrec/models/ranking/afm.py +172 -45
  34. nextrec/models/ranking/autoint.py +84 -61
  35. nextrec/models/ranking/dcn.py +59 -42
  36. nextrec/models/ranking/dcn_v2.py +64 -23
  37. nextrec/models/ranking/deepfm.py +36 -26
  38. nextrec/models/ranking/dien.py +158 -102
  39. nextrec/models/ranking/din.py +88 -60
  40. nextrec/models/ranking/fibinet.py +55 -35
  41. nextrec/models/ranking/fm.py +32 -26
  42. nextrec/models/ranking/masknet.py +95 -34
  43. nextrec/models/ranking/pnn.py +34 -31
  44. nextrec/models/ranking/widedeep.py +37 -29
  45. nextrec/models/ranking/xdeepfm.py +63 -41
  46. nextrec/utils/__init__.py +61 -32
  47. nextrec/utils/config.py +490 -0
  48. nextrec/utils/device.py +52 -12
  49. nextrec/utils/distributed.py +141 -0
  50. nextrec/utils/embedding.py +1 -0
  51. nextrec/utils/feature.py +1 -0
  52. nextrec/utils/file.py +32 -11
  53. nextrec/utils/initializer.py +61 -16
  54. nextrec/utils/optimizer.py +25 -9
  55. nextrec/utils/synthetic_data.py +531 -0
  56. nextrec/utils/tensor.py +24 -13
  57. {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
  58. nextrec-0.4.2.dist-info/RECORD +69 -0
  59. nextrec-0.4.2.dist-info/entry_points.txt +2 -0
  60. nextrec-0.3.6.dist-info/RECORD +0 -64
  61. {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
  62. {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -13,37 +13,44 @@ import torch.nn as nn
13
13
  import torch.nn.functional as F
14
14
 
15
15
  from nextrec.basic.model import BaseModel
16
- from nextrec.basic.layers import EmbeddingLayer, MLP, AttentionPoolingLayer, PredictionLayer
16
+ from nextrec.basic.layers import (
17
+ EmbeddingLayer,
18
+ MLP,
19
+ AttentionPoolingLayer,
20
+ PredictionLayer,
21
+ )
17
22
  from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
18
23
 
24
+
19
25
  class AUGRU(nn.Module):
20
26
  """Attention-aware GRU update gate used in DIEN (Zhou et al., 2019)."""
27
+
21
28
  """
22
29
  Attention-based GRU for DIEN
23
30
  Uses attention scores to weight the update of hidden states
24
31
  """
25
-
32
+
26
33
  def __init__(self, input_size, hidden_size, bias=True):
27
34
  super().__init__()
28
35
  self.input_size = input_size
29
36
  self.hidden_size = hidden_size
30
-
37
+
31
38
  self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
32
39
  self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
33
40
  if bias:
34
41
  self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
35
42
  self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
36
43
  else:
37
- self.register_parameter('bias_ih', None)
38
- self.register_parameter('bias_hh', None)
39
-
44
+ self.register_parameter("bias_ih", None)
45
+ self.register_parameter("bias_hh", None)
46
+
40
47
  self.reset_parameters()
41
-
48
+
42
49
  def reset_parameters(self):
43
50
  std = 1.0 / (self.hidden_size) ** 0.5
44
51
  for weight in self.parameters():
45
52
  weight.data.uniform_(-std, std)
46
-
53
+
47
54
  def forward(self, x, att_scores):
48
55
  """
49
56
  Args:
@@ -59,12 +66,12 @@ class AUGRU(nn.Module):
59
66
  for t in range(seq_len):
60
67
  x_t = x[:, t, :] # [batch_size, input_size]
61
68
  att_t = att_scores[:, t, :] # [batch_size, 1]
62
-
69
+
63
70
  gi = F.linear(x_t, self.weight_ih, self.bias_ih)
64
71
  gh = F.linear(h, self.weight_hh, self.bias_hh)
65
72
  i_r, i_i, i_n = gi.chunk(3, 1)
66
73
  h_r, h_i, h_n = gh.chunk(3, 1)
67
-
74
+
68
75
  resetgate = torch.sigmoid(i_r + h_r)
69
76
  inputgate = torch.sigmoid(i_i + h_i)
70
77
  newgate = torch.tanh(i_n + resetgate * h_n)
@@ -72,21 +79,22 @@ class AUGRU(nn.Module):
72
79
  h = (1 - att_t) * h + att_t * newgate
73
80
  outputs.append(h.unsqueeze(1))
74
81
  output = torch.cat(outputs, dim=1)
75
-
76
- return output, h
82
+
83
+ return output, h
77
84
 
78
85
 
79
86
  class DynamicGRU(nn.Module):
80
87
  """Dynamic GRU unit with auxiliary loss path from DIEN (Zhou et al., 2019)."""
88
+
81
89
  """
82
90
  GRU with dynamic routing for DIEN
83
91
  """
84
-
92
+
85
93
  def __init__(self, input_size, hidden_size, bias=True):
86
94
  super().__init__()
87
95
  self.input_size = input_size
88
96
  self.hidden_size = hidden_size
89
-
97
+
90
98
  # GRU parameters
91
99
  self.weight_ih = nn.Parameter(torch.randn(3 * hidden_size, input_size))
92
100
  self.weight_hh = nn.Parameter(torch.randn(3 * hidden_size, hidden_size))
@@ -94,16 +102,16 @@ class DynamicGRU(nn.Module):
94
102
  self.bias_ih = nn.Parameter(torch.randn(3 * hidden_size))
95
103
  self.bias_hh = nn.Parameter(torch.randn(3 * hidden_size))
96
104
  else:
97
- self.register_parameter('bias_ih', None)
98
- self.register_parameter('bias_hh', None)
99
-
105
+ self.register_parameter("bias_ih", None)
106
+ self.register_parameter("bias_hh", None)
107
+
100
108
  self.reset_parameters()
101
-
109
+
102
110
  def reset_parameters(self):
103
111
  std = 1.0 / (self.hidden_size) ** 0.5
104
112
  for weight in self.parameters():
105
113
  weight.data.uniform_(-std, std)
106
-
114
+
107
115
  def forward(self, x, att_scores=None):
108
116
  """
109
117
  Args:
@@ -114,29 +122,29 @@ class DynamicGRU(nn.Module):
114
122
  hidden: [batch_size, hidden_size] - final hidden state
115
123
  """
116
124
  batch_size, seq_len, _ = x.shape
117
-
125
+
118
126
  # Initialize hidden state
119
127
  h = torch.zeros(batch_size, self.hidden_size, device=x.device)
120
-
128
+
121
129
  outputs = []
122
130
  for t in range(seq_len):
123
131
  x_t = x[:, t, :] # [batch_size, input_size]
124
-
132
+
125
133
  # GRU computation
126
134
  gi = F.linear(x_t, self.weight_ih, self.bias_ih)
127
135
  gh = F.linear(h, self.weight_hh, self.bias_hh)
128
136
  i_r, i_i, i_n = gi.chunk(3, 1)
129
137
  h_r, h_i, h_n = gh.chunk(3, 1)
130
-
138
+
131
139
  resetgate = torch.sigmoid(i_r + h_r)
132
140
  inputgate = torch.sigmoid(i_i + h_i)
133
141
  newgate = torch.tanh(i_n + resetgate * h_n)
134
142
  h = newgate + inputgate * (h - newgate)
135
-
143
+
136
144
  outputs.append(h.unsqueeze(1))
137
-
145
+
138
146
  output = torch.cat(outputs, dim=1) # [batch_size, seq_len, hidden_size]
139
-
147
+
140
148
  return output, h
141
149
 
142
150
 
@@ -146,174 +154,222 @@ class DIEN(BaseModel):
146
154
  return "DIEN"
147
155
 
148
156
  @property
149
- def task_type(self):
157
+ def default_task(self):
150
158
  return "binary"
151
-
152
- def __init__(self,
153
- dense_features: list[DenseFeature],
154
- sparse_features: list[SparseFeature],
155
- sequence_features: list[SequenceFeature],
156
- mlp_params: dict,
157
- gru_hidden_size: int = 64,
158
- attention_hidden_units: list[int] = [80, 40],
159
- attention_activation: str = 'sigmoid',
160
- use_negsampling: bool = False,
161
- target: list[str] = [],
162
- optimizer: str = "adam",
163
- optimizer_params: dict = {},
164
- loss: str | nn.Module | None = "bce",
165
- loss_params: dict | list[dict] | None = None,
166
- device: str = 'cpu',
167
- embedding_l1_reg=1e-6,
168
- dense_l1_reg=1e-5,
169
- embedding_l2_reg=1e-5,
170
- dense_l2_reg=1e-4,
171
- **kwargs):
172
-
159
+
160
+ def __init__(
161
+ self,
162
+ dense_features: list[DenseFeature],
163
+ sparse_features: list[SparseFeature],
164
+ sequence_features: list[SequenceFeature],
165
+ mlp_params: dict,
166
+ gru_hidden_size: int = 64,
167
+ attention_hidden_units: list[int] = [80, 40],
168
+ attention_activation: str = "sigmoid",
169
+ use_negsampling: bool = False,
170
+ target: list[str] = [],
171
+ task: str | list[str] | None = None,
172
+ optimizer: str = "adam",
173
+ optimizer_params: dict = {},
174
+ loss: str | nn.Module | None = "bce",
175
+ loss_params: dict | list[dict] | None = None,
176
+ device: str = "cpu",
177
+ embedding_l1_reg=1e-6,
178
+ dense_l1_reg=1e-5,
179
+ embedding_l2_reg=1e-5,
180
+ dense_l2_reg=1e-4,
181
+ **kwargs,
182
+ ):
183
+
173
184
  super(DIEN, self).__init__(
174
185
  dense_features=dense_features,
175
186
  sparse_features=sparse_features,
176
187
  sequence_features=sequence_features,
177
188
  target=target,
178
- task=self.task_type,
189
+ task=task or self.default_task,
179
190
  device=device,
180
191
  embedding_l1_reg=embedding_l1_reg,
181
192
  dense_l1_reg=dense_l1_reg,
182
193
  embedding_l2_reg=embedding_l2_reg,
183
194
  dense_l2_reg=dense_l2_reg,
184
- early_stop_patience=20,
185
- **kwargs
195
+ **kwargs,
186
196
  )
187
197
 
188
198
  self.loss = loss
189
199
  if self.loss is None:
190
200
  self.loss = "bce"
191
-
201
+
192
202
  self.use_negsampling = use_negsampling
193
-
203
+
194
204
  # Features classification
195
205
  if len(sequence_features) == 0:
196
- raise ValueError("DIEN requires at least one sequence feature for user behavior history")
197
-
206
+ raise ValueError(
207
+ "DIEN requires at least one sequence feature for user behavior history"
208
+ )
209
+
198
210
  self.behavior_feature = sequence_features[0] # User behavior sequence
199
- self.candidate_feature = sparse_features[-1] if sparse_features else None # Candidate item
200
-
201
- self.other_sparse_features = sparse_features[:-1] if self.candidate_feature else sparse_features
211
+ self.candidate_feature = (
212
+ sparse_features[-1] if sparse_features else None
213
+ ) # Candidate item
214
+
215
+ self.other_sparse_features = (
216
+ sparse_features[:-1] if self.candidate_feature else sparse_features
217
+ )
202
218
  self.dense_features_list = dense_features
203
219
 
204
220
  # Embedding layer
205
221
  self.embedding = EmbeddingLayer(features=self.all_features)
206
-
222
+
207
223
  behavior_emb_dim = self.behavior_feature.embedding_dim
208
224
  self.candidate_proj = None
209
- if self.candidate_feature is not None and self.candidate_feature.embedding_dim != gru_hidden_size:
210
- self.candidate_proj = nn.Linear(self.candidate_feature.embedding_dim, gru_hidden_size)
211
-
225
+ if (
226
+ self.candidate_feature is not None
227
+ and self.candidate_feature.embedding_dim != gru_hidden_size
228
+ ):
229
+ self.candidate_proj = nn.Linear(
230
+ self.candidate_feature.embedding_dim, gru_hidden_size
231
+ )
232
+
212
233
  # Interest Extractor Layer (GRU)
213
234
  self.interest_extractor = DynamicGRU(
214
- input_size=behavior_emb_dim,
215
- hidden_size=gru_hidden_size
235
+ input_size=behavior_emb_dim, hidden_size=gru_hidden_size
216
236
  )
217
-
237
+
218
238
  # Attention layer for computing attention scores
219
239
  self.attention_layer = AttentionPoolingLayer(
220
240
  embedding_dim=gru_hidden_size,
221
241
  hidden_units=attention_hidden_units,
222
242
  activation=attention_activation,
223
- use_softmax=False # We'll use scores directly for AUGRU
243
+ use_softmax=False, # We'll use scores directly for AUGRU
224
244
  )
225
-
245
+
226
246
  # Interest Evolution Layer (AUGRU)
227
- self.interest_evolution = AUGRU(input_size=gru_hidden_size, hidden_size=gru_hidden_size)
228
-
247
+ self.interest_evolution = AUGRU(
248
+ input_size=gru_hidden_size, hidden_size=gru_hidden_size
249
+ )
250
+
229
251
  # Calculate MLP input dimension
230
252
  mlp_input_dim = 0
231
253
  if self.candidate_feature:
232
254
  mlp_input_dim += self.candidate_feature.embedding_dim
233
255
  mlp_input_dim += gru_hidden_size # final interest state
234
256
  mlp_input_dim += sum([f.embedding_dim for f in self.other_sparse_features])
235
- mlp_input_dim += sum([getattr(f, "embedding_dim", 1) or 1 for f in dense_features])
257
+ mlp_input_dim += sum(
258
+ [getattr(f, "embedding_dim", 1) or 1 for f in dense_features]
259
+ )
236
260
  # MLP for final prediction
237
261
  self.mlp = MLP(input_dim=mlp_input_dim, **mlp_params)
238
- self.prediction_layer = PredictionLayer(task_type=self.task_type)
262
+ self.prediction_layer = PredictionLayer(task_type=self.task)
239
263
  # Register regularization weights
240
- self.register_regularization_weights(embedding_attr='embedding', include_modules=['interest_extractor', 'interest_evolution', 'attention_layer', 'mlp', 'candidate_proj'])
241
- self.compile(optimizer=optimizer, optimizer_params=optimizer_params, loss=loss, loss_params=loss_params)
264
+ self.register_regularization_weights(
265
+ embedding_attr="embedding",
266
+ include_modules=[
267
+ "interest_extractor",
268
+ "interest_evolution",
269
+ "attention_layer",
270
+ "mlp",
271
+ "candidate_proj",
272
+ ],
273
+ )
274
+ self.compile(
275
+ optimizer=optimizer,
276
+ optimizer_params=optimizer_params,
277
+ loss=loss,
278
+ loss_params=loss_params,
279
+ )
242
280
 
243
281
  def forward(self, x):
244
282
  # Get candidate item embedding
245
283
  if self.candidate_feature:
246
- candidate_emb = self.embedding.embed_dict[self.candidate_feature.embedding_name](x[self.candidate_feature.name].long()) # [B, emb_dim]
284
+ candidate_emb = self.embedding.embed_dict[
285
+ self.candidate_feature.embedding_name
286
+ ](
287
+ x[self.candidate_feature.name].long()
288
+ ) # [B, emb_dim]
247
289
  else:
248
290
  raise ValueError("DIEN requires a candidate item feature")
249
-
291
+
250
292
  # Get behavior sequence embedding
251
293
  behavior_seq = x[self.behavior_feature.name].long() # [B, seq_len]
252
- behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](behavior_seq) # [B, seq_len, emb_dim]
253
-
294
+ behavior_emb = self.embedding.embed_dict[self.behavior_feature.embedding_name](
295
+ behavior_seq
296
+ ) # [B, seq_len, emb_dim]
297
+
254
298
  # Create mask for padding
255
299
  if self.behavior_feature.padding_idx is not None:
256
- mask = (behavior_seq != self.behavior_feature.padding_idx).unsqueeze(-1).float()
300
+ mask = (
301
+ (behavior_seq != self.behavior_feature.padding_idx)
302
+ .unsqueeze(-1)
303
+ .float()
304
+ )
257
305
  else:
258
306
  mask = (behavior_seq != 0).unsqueeze(-1).float()
259
-
307
+
260
308
  # Step 1: Interest Extractor (GRU)
261
- interest_states, _ = self.interest_extractor(behavior_emb) # [B, seq_len, hidden_size]
262
-
309
+ interest_states, _ = self.interest_extractor(
310
+ behavior_emb
311
+ ) # [B, seq_len, hidden_size]
312
+
263
313
  # Step 2: Compute attention scores for each time step
264
314
  batch_size, seq_len, hidden_size = interest_states.shape
265
-
315
+
266
316
  # Project candidate to hidden_size if necessary (defined in __init__)
267
317
  if self.candidate_proj is not None:
268
318
  candidate_for_attention = self.candidate_proj(candidate_emb)
269
319
  else:
270
320
  candidate_for_attention = candidate_emb
271
-
321
+
272
322
  # Compute attention scores for AUGRU
273
323
  attention_scores = []
274
324
  for t in range(seq_len):
275
325
  score = self.attention_layer.attention_net(
276
- torch.cat([
277
- candidate_for_attention,
278
- interest_states[:, t, :],
279
- candidate_for_attention - interest_states[:, t, :],
280
- candidate_for_attention * interest_states[:, t, :]
281
- ], dim=-1)
326
+ torch.cat(
327
+ [
328
+ candidate_for_attention,
329
+ interest_states[:, t, :],
330
+ candidate_for_attention - interest_states[:, t, :],
331
+ candidate_for_attention * interest_states[:, t, :],
332
+ ],
333
+ dim=-1,
334
+ )
282
335
  ) # [B, 1]
283
336
  attention_scores.append(score)
284
-
285
- attention_scores = torch.cat(attention_scores, dim=1).unsqueeze(-1) # [B, seq_len, 1]
337
+
338
+ attention_scores = torch.cat(attention_scores, dim=1).unsqueeze(
339
+ -1
340
+ ) # [B, seq_len, 1]
286
341
  attention_scores = torch.sigmoid(attention_scores) # Normalize to [0, 1]
287
-
342
+
288
343
  # Apply mask to attention scores
289
344
  attention_scores = attention_scores * mask
290
-
345
+
291
346
  # Step 3: Interest Evolution (AUGRU)
292
347
  final_states, final_interest = self.interest_evolution(
293
- interest_states,
294
- attention_scores
348
+ interest_states, attention_scores
295
349
  ) # final_interest: [B, hidden_size]
296
-
350
+
297
351
  # Get other features
298
352
  other_embeddings = []
299
353
  other_embeddings.append(candidate_emb)
300
354
  other_embeddings.append(final_interest)
301
-
355
+
302
356
  # Other sparse features
303
357
  for feat in self.other_sparse_features:
304
- feat_emb = self.embedding.embed_dict[feat.embedding_name](x[feat.name].long())
358
+ feat_emb = self.embedding.embed_dict[feat.embedding_name](
359
+ x[feat.name].long()
360
+ )
305
361
  other_embeddings.append(feat_emb)
306
-
362
+
307
363
  # Dense features
308
364
  for feat in self.dense_features_list:
309
365
  val = x[feat.name].float()
310
366
  if val.dim() == 1:
311
367
  val = val.unsqueeze(1)
312
368
  other_embeddings.append(val)
313
-
369
+
314
370
  # Concatenate all features
315
371
  concat_input = torch.cat(other_embeddings, dim=-1) # [B, total_dim]
316
-
372
+
317
373
  # MLP prediction
318
374
  y = self.mlp(concat_input) # [B, 1]
319
375
  return self.prediction_layer(y)