nextrec 0.3.6__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. nextrec/__init__.py +1 -1
  2. nextrec/__version__.py +1 -1
  3. nextrec/basic/activation.py +10 -5
  4. nextrec/basic/callback.py +1 -0
  5. nextrec/basic/features.py +30 -22
  6. nextrec/basic/layers.py +244 -113
  7. nextrec/basic/loggers.py +62 -43
  8. nextrec/basic/metrics.py +268 -119
  9. nextrec/basic/model.py +1373 -443
  10. nextrec/basic/session.py +10 -3
  11. nextrec/cli.py +498 -0
  12. nextrec/data/__init__.py +19 -25
  13. nextrec/data/batch_utils.py +11 -3
  14. nextrec/data/data_processing.py +42 -24
  15. nextrec/data/data_utils.py +26 -15
  16. nextrec/data/dataloader.py +303 -96
  17. nextrec/data/preprocessor.py +320 -199
  18. nextrec/loss/listwise.py +17 -9
  19. nextrec/loss/loss_utils.py +7 -8
  20. nextrec/loss/pairwise.py +2 -0
  21. nextrec/loss/pointwise.py +30 -12
  22. nextrec/models/generative/hstu.py +106 -40
  23. nextrec/models/match/dssm.py +82 -69
  24. nextrec/models/match/dssm_v2.py +72 -58
  25. nextrec/models/match/mind.py +175 -108
  26. nextrec/models/match/sdm.py +104 -88
  27. nextrec/models/match/youtube_dnn.py +73 -60
  28. nextrec/models/multi_task/esmm.py +53 -39
  29. nextrec/models/multi_task/mmoe.py +70 -47
  30. nextrec/models/multi_task/ple.py +107 -50
  31. nextrec/models/multi_task/poso.py +121 -41
  32. nextrec/models/multi_task/share_bottom.py +54 -38
  33. nextrec/models/ranking/afm.py +172 -45
  34. nextrec/models/ranking/autoint.py +84 -61
  35. nextrec/models/ranking/dcn.py +59 -42
  36. nextrec/models/ranking/dcn_v2.py +64 -23
  37. nextrec/models/ranking/deepfm.py +36 -26
  38. nextrec/models/ranking/dien.py +158 -102
  39. nextrec/models/ranking/din.py +88 -60
  40. nextrec/models/ranking/fibinet.py +55 -35
  41. nextrec/models/ranking/fm.py +32 -26
  42. nextrec/models/ranking/masknet.py +95 -34
  43. nextrec/models/ranking/pnn.py +34 -31
  44. nextrec/models/ranking/widedeep.py +37 -29
  45. nextrec/models/ranking/xdeepfm.py +63 -41
  46. nextrec/utils/__init__.py +61 -32
  47. nextrec/utils/config.py +490 -0
  48. nextrec/utils/device.py +52 -12
  49. nextrec/utils/distributed.py +141 -0
  50. nextrec/utils/embedding.py +1 -0
  51. nextrec/utils/feature.py +1 -0
  52. nextrec/utils/file.py +32 -11
  53. nextrec/utils/initializer.py +61 -16
  54. nextrec/utils/optimizer.py +25 -9
  55. nextrec/utils/synthetic_data.py +531 -0
  56. nextrec/utils/tensor.py +24 -13
  57. {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/METADATA +15 -5
  58. nextrec-0.4.2.dist-info/RECORD +69 -0
  59. nextrec-0.4.2.dist-info/entry_points.txt +2 -0
  60. nextrec-0.3.6.dist-info/RECORD +0 -64
  61. {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/WHEEL +0 -0
  62. {nextrec-0.3.6.dist-info → nextrec-0.4.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,531 @@
1
+ """
2
+ Synthetic Data Generation Utilities
3
+
4
+ This module provides utilities for generating synthetic datasets for testing
5
+ and tutorial purposes in the NextRec framework.
6
+
7
+ Date: create on 06/12/2025
8
+ Author: Yang Zhou, zyaztec@gmail.com
9
+ """
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from typing import Optional, Dict, List, Tuple
14
+
15
+
16
+ def generate_ranking_data(
17
+ n_samples: int = 10000,
18
+ n_dense: int = 5,
19
+ n_sparse: int = 8,
20
+ n_sequences: int = 2,
21
+ user_vocab_size: int = 1000,
22
+ item_vocab_size: int = 500,
23
+ sparse_vocab_size: int = 50,
24
+ sequence_max_len: int = 20,
25
+ embedding_dim: int = 16,
26
+ seed: int = 42,
27
+ custom_sparse_features: Optional[Dict[str, int]] = None,
28
+ use_simple_names: bool = True,
29
+ ) -> Tuple[pd.DataFrame, List, List, List]:
30
+ """
31
+ Generate synthetic data for ranking tasks (CTR prediction)
32
+
33
+ Returns:
34
+ tuple: (dataframe, dense_features, sparse_features, sequence_features)
35
+ """
36
+ print(f"Generating {n_samples} synthetic ranking samples...")
37
+
38
+ np.random.seed(seed)
39
+ data = {}
40
+
41
+ for i in range(n_dense):
42
+ data[f"dense_{i}"] = np.random.randn(n_samples).astype(np.float32)
43
+
44
+ # Generate basic sparse features (always include user_id and item_id)
45
+ data["user_id"] = np.random.randint(1, user_vocab_size, n_samples)
46
+ data["item_id"] = np.random.randint(1, item_vocab_size, n_samples)
47
+
48
+ # Generate additional sparse features
49
+ if custom_sparse_features:
50
+ for feat_name, vocab_size in custom_sparse_features.items():
51
+ data[feat_name] = np.random.randint(0, vocab_size, n_samples)
52
+ else:
53
+ for i in range(n_sparse - 2):
54
+ data[f"sparse_{i}"] = np.random.randint(1, sparse_vocab_size, n_samples)
55
+
56
+ # Generate sequence features (list of IDs)
57
+ sequence_names = []
58
+ sequence_vocabs = []
59
+
60
+ for i in range(n_sequences):
61
+ sequences = []
62
+ for _ in range(n_samples):
63
+ seq_len = np.random.randint(5, sequence_max_len + 1)
64
+ if i == 0:
65
+ # First sequence uses item vocabulary
66
+ seq = np.random.randint(0, item_vocab_size, seq_len).tolist()
67
+ seq_vocab = item_vocab_size
68
+ if custom_sparse_features:
69
+ seq_name = "hist_items"
70
+ else:
71
+ seq_name = "sequence_0"
72
+ else:
73
+ # Other sequences use category vocabulary
74
+ if custom_sparse_features and "category" in custom_sparse_features:
75
+ seq_vocab = custom_sparse_features["category"]
76
+ seq = np.random.randint(0, seq_vocab, seq_len).tolist()
77
+ seq_name = "hist_categories" if i == 1 else f"sequence_{i}"
78
+ else:
79
+ seq_vocab = sparse_vocab_size
80
+ seq = np.random.randint(0, seq_vocab, seq_len).tolist()
81
+ seq_name = f"sequence_{i}"
82
+
83
+ # Padding
84
+ seq = seq + [0] * (sequence_max_len - len(seq))
85
+ sequences.append(seq)
86
+
87
+ data[seq_name] = sequences
88
+ sequence_names.append(seq_name)
89
+ sequence_vocabs.append(seq_vocab)
90
+
91
+ if "gender" in data and "dense_0" in data:
92
+ # Complex label generation with feature correlation
93
+ label_probs = 1 / (
94
+ 1
95
+ + np.exp(
96
+ -(
97
+ data["dense_0"] * 0.3
98
+ + data["dense_1"] * 0.2
99
+ + (data["gender"] - 0.5) * 0.5
100
+ + np.random.randn(n_samples) * 0.1
101
+ )
102
+ )
103
+ )
104
+ data["label"] = (label_probs > 0.5).astype(np.float32)
105
+ else:
106
+ data["label"] = np.random.randint(0, 2, n_samples).astype(np.float32)
107
+
108
+ df = pd.DataFrame(data)
109
+ print(f"Generated data shape: {df.shape}")
110
+ if "gender" in data:
111
+ print(f"Positive rate: {data['label'].mean():.4f}")
112
+
113
+ # Import here to avoid circular import
114
+ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
115
+
116
+ # Create feature definitions
117
+ # Use input_dim for dense features to be compatible with both simple and complex scenarios
118
+ dense_features = [
119
+ DenseFeature(name=f"dense_{i}", input_dim=1) for i in range(n_dense)
120
+ ]
121
+
122
+ # Create sparse features
123
+ sparse_features = [
124
+ SparseFeature(
125
+ name="user_id",
126
+ embedding_name="user_emb",
127
+ vocab_size=user_vocab_size,
128
+ embedding_dim=embedding_dim,
129
+ ),
130
+ SparseFeature(
131
+ name="item_id",
132
+ embedding_name="item_emb",
133
+ vocab_size=item_vocab_size,
134
+ embedding_dim=embedding_dim,
135
+ ),
136
+ ]
137
+
138
+ if custom_sparse_features:
139
+ # Add custom sparse features with proper vocab sizes
140
+ for feat_name, vocab_size in custom_sparse_features.items():
141
+ sparse_features.append(
142
+ SparseFeature(
143
+ name=feat_name,
144
+ embedding_name=f"{feat_name}_emb",
145
+ vocab_size=vocab_size,
146
+ embedding_dim=embedding_dim,
147
+ )
148
+ )
149
+ else:
150
+ # Add generic sparse features
151
+ sparse_features.extend(
152
+ [
153
+ SparseFeature(
154
+ name=f"sparse_{i}",
155
+ embedding_name=f"sparse_{i}_emb",
156
+ vocab_size=sparse_vocab_size,
157
+ embedding_dim=embedding_dim,
158
+ )
159
+ for i in range(n_sparse - 2)
160
+ ]
161
+ )
162
+
163
+ # Create sequence features
164
+ sequence_features = []
165
+ for i, (seq_name, seq_vocab) in enumerate(zip(sequence_names, sequence_vocabs)):
166
+ if i == 0:
167
+ # First sequence shares embedding with item_id
168
+ embedding_name = "item_emb"
169
+ elif (
170
+ custom_sparse_features
171
+ and "category" in custom_sparse_features
172
+ and seq_name == "hist_categories"
173
+ ):
174
+ # hist_categories shares embedding with category
175
+ embedding_name = "category_emb"
176
+ else:
177
+ # Other sequences share with sparse_0
178
+ embedding_name = "sparse_0_emb"
179
+ sequence_features.append(
180
+ SequenceFeature(
181
+ name=seq_name,
182
+ vocab_size=seq_vocab,
183
+ max_len=sequence_max_len,
184
+ embedding_dim=embedding_dim,
185
+ padding_idx=0,
186
+ embedding_name=embedding_name,
187
+ )
188
+ )
189
+ return df, dense_features, sparse_features, sequence_features
190
+
191
+
192
+ def generate_match_data(
193
+ n_samples: int = 10000,
194
+ user_vocab_size: int = 1000,
195
+ item_vocab_size: int = 5000,
196
+ category_vocab_size: int = 100,
197
+ brand_vocab_size: int = 200,
198
+ city_vocab_size: int = 100,
199
+ user_feature_vocab_size: int = 50,
200
+ item_feature_vocab_size: int = 50,
201
+ sequence_max_len: int = 50,
202
+ user_embedding_dim: int = 32,
203
+ item_embedding_dim: int = 32,
204
+ seed: int = 42,
205
+ ) -> Tuple[pd.DataFrame, List, List, List, List, List, List]:
206
+ """
207
+ Generate synthetic data for match/retrieval tasks
208
+
209
+ Returns:
210
+ tuple: (dataframe, user_dense_features, user_sparse_features, user_sequence_features,
211
+ item_dense_features, item_sparse_features, item_sequence_features)
212
+ """
213
+ print(f"Generating {n_samples} synthetic match samples...")
214
+
215
+ np.random.seed(seed)
216
+ data = {}
217
+
218
+ # User features
219
+ data["user_id"] = np.random.randint(1, user_vocab_size, n_samples)
220
+ data["user_age"] = np.random.randn(n_samples).astype(np.float32)
221
+ data["user_gender"] = np.random.randint(0, 2, n_samples)
222
+ data["user_city"] = np.random.randint(0, city_vocab_size, n_samples)
223
+
224
+ for i in range(3):
225
+ data[f"user_feature_{i}"] = np.random.randint(
226
+ 1, user_feature_vocab_size, n_samples
227
+ )
228
+
229
+ # User behavior sequences
230
+ user_hist_items = []
231
+ user_hist_categories = []
232
+ for _ in range(n_samples):
233
+ seq_len = np.random.randint(10, sequence_max_len + 1)
234
+ hist_items = np.random.randint(1, item_vocab_size, seq_len).tolist()
235
+ hist_items = hist_items + [0] * (sequence_max_len - len(hist_items))
236
+ user_hist_items.append(hist_items)
237
+
238
+ hist_cats = np.random.randint(1, category_vocab_size, seq_len).tolist()
239
+ hist_cats = hist_cats + [0] * (sequence_max_len - len(hist_cats))
240
+ user_hist_categories.append(hist_cats)
241
+
242
+ data["user_hist_items"] = user_hist_items
243
+ data["user_hist_categories"] = user_hist_categories
244
+
245
+ # Item features
246
+ data["item_id"] = np.random.randint(1, item_vocab_size, n_samples)
247
+ data["item_price"] = np.random.randn(n_samples).astype(np.float32)
248
+ data["item_category"] = np.random.randint(1, category_vocab_size, n_samples)
249
+ data["item_brand"] = np.random.randint(1, brand_vocab_size, n_samples)
250
+
251
+ for i in range(3):
252
+ data[f"item_feature_{i}"] = np.random.randint(
253
+ 1, item_feature_vocab_size, n_samples
254
+ )
255
+
256
+ # Generate labels with some correlation to features
257
+ label_probs = 1 / (
258
+ 1
259
+ + np.exp(
260
+ -(
261
+ data["user_age"] * 0.2
262
+ + (data["user_gender"] - 0.5) * 0.3
263
+ + data["item_price"] * 0.15
264
+ + np.random.randn(n_samples) * 0.5
265
+ )
266
+ )
267
+ )
268
+ data["label"] = (label_probs > 0.5).astype(np.float32)
269
+
270
+ df = pd.DataFrame(data)
271
+ print(f"Generated data shape: {df.shape}")
272
+ print(f"Positive rate: {data['label'].mean():.4f}")
273
+
274
+ # Import here to avoid circular import
275
+ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
276
+
277
+ # User dense features
278
+ user_dense_features = [DenseFeature(name="user_age", input_dim=1)]
279
+
280
+ # User sparse features
281
+ user_sparse_features = [
282
+ SparseFeature(
283
+ name="user_id", vocab_size=user_vocab_size, embedding_dim=user_embedding_dim
284
+ ),
285
+ SparseFeature(name="user_gender", vocab_size=2, embedding_dim=8),
286
+ SparseFeature(name="user_city", vocab_size=city_vocab_size, embedding_dim=16),
287
+ ]
288
+ user_sparse_features.extend(
289
+ [
290
+ SparseFeature(
291
+ name=f"user_feature_{i}",
292
+ vocab_size=user_feature_vocab_size,
293
+ embedding_dim=8,
294
+ )
295
+ for i in range(3)
296
+ ]
297
+ )
298
+
299
+ # User sequence features
300
+ user_sequence_features = [
301
+ SequenceFeature(
302
+ name="user_hist_items",
303
+ vocab_size=item_vocab_size,
304
+ max_len=sequence_max_len,
305
+ embedding_dim=user_embedding_dim,
306
+ padding_idx=0,
307
+ ),
308
+ SequenceFeature(
309
+ name="user_hist_categories",
310
+ vocab_size=category_vocab_size,
311
+ max_len=sequence_max_len,
312
+ embedding_dim=16,
313
+ padding_idx=0,
314
+ ),
315
+ ]
316
+
317
+ # Item dense features
318
+ item_dense_features = [DenseFeature(name="item_price", input_dim=1)]
319
+
320
+ # Item sparse features
321
+ item_sparse_features = [
322
+ SparseFeature(
323
+ name="item_id", vocab_size=item_vocab_size, embedding_dim=item_embedding_dim
324
+ ),
325
+ SparseFeature(
326
+ name="item_category", vocab_size=category_vocab_size, embedding_dim=16
327
+ ),
328
+ SparseFeature(name="item_brand", vocab_size=brand_vocab_size, embedding_dim=16),
329
+ ]
330
+ item_sparse_features.extend(
331
+ [
332
+ SparseFeature(
333
+ name=f"item_feature_{i}",
334
+ vocab_size=item_feature_vocab_size,
335
+ embedding_dim=8,
336
+ )
337
+ for i in range(3)
338
+ ]
339
+ )
340
+
341
+ # Item sequence features (empty for most match models)
342
+ item_sequence_features = []
343
+
344
+ return (
345
+ df,
346
+ user_dense_features,
347
+ user_sparse_features,
348
+ user_sequence_features,
349
+ item_dense_features,
350
+ item_sparse_features,
351
+ item_sequence_features,
352
+ )
353
+
354
+
355
+ def generate_multitask_data(
356
+ n_samples: int = 10000,
357
+ n_dense: int = 5,
358
+ n_sparse: int = 8,
359
+ n_sequences: int = 2,
360
+ user_vocab_size: int = 1000,
361
+ item_vocab_size: int = 500,
362
+ sparse_vocab_size: int = 50,
363
+ sequence_max_len: int = 20,
364
+ embedding_dim: int = 16,
365
+ seed: int = 42,
366
+ ) -> Tuple[pd.DataFrame, List, List, List]:
367
+ """
368
+ Generate synthetic data for multi-task learning
369
+
370
+ Returns:
371
+ tuple: (dataframe, dense_features, sparse_features, sequence_features)
372
+ """
373
+ print(f"Generating {n_samples} synthetic multi-task samples...")
374
+
375
+ np.random.seed(seed)
376
+ data = {}
377
+
378
+ # Generate dense features
379
+ for i in range(n_dense):
380
+ data[f"dense_{i}"] = np.random.randn(n_samples).astype(np.float32)
381
+
382
+ # Generate sparse features
383
+ data["user_id"] = np.random.randint(1, user_vocab_size, n_samples)
384
+ data["item_id"] = np.random.randint(1, item_vocab_size, n_samples)
385
+
386
+ for i in range(n_sparse - 2):
387
+ data[f"sparse_{i}"] = np.random.randint(1, sparse_vocab_size, n_samples)
388
+
389
+ # Generate sequence features
390
+ sequence_names = []
391
+ sequence_vocabs = []
392
+
393
+ for i in range(n_sequences):
394
+ sequences = []
395
+ for _ in range(n_samples):
396
+ seq_len = np.random.randint(5, sequence_max_len + 1)
397
+ if i == 0:
398
+ seq = np.random.randint(0, item_vocab_size, seq_len).tolist()
399
+ seq_vocab = item_vocab_size
400
+ seq_name = "sequence_0"
401
+ else:
402
+ seq = np.random.randint(0, sparse_vocab_size, seq_len).tolist()
403
+ seq_vocab = sparse_vocab_size
404
+ seq_name = f"sequence_{i}"
405
+
406
+ seq = seq + [0] * (sequence_max_len - len(seq))
407
+ sequences.append(seq)
408
+
409
+ data[seq_name] = sequences
410
+ sequence_names.append(seq_name)
411
+ sequence_vocabs.append(seq_vocab)
412
+
413
+ # Generate multi-task labels with correlation
414
+ # CTR (click) is relatively easier to predict
415
+ ctr_logits = (
416
+ data["dense_0"] * 0.3 + data["dense_1"] * 0.2 + np.random.randn(n_samples) * 0.5
417
+ )
418
+ data["click"] = (1 / (1 + np.exp(-ctr_logits)) > 0.5).astype(np.float32)
419
+
420
+ # CVR (conversion) depends on click and is harder
421
+ cvr_logits = (
422
+ data["dense_2"] * 0.2
423
+ + data["dense_3"] * 0.15
424
+ + data["click"] * 1.5 # Strong dependency on click
425
+ + np.random.randn(n_samples) * 0.8
426
+ )
427
+ data["conversion"] = (1 / (1 + np.exp(-cvr_logits)) > 0.3).astype(np.float32)
428
+
429
+ # CTCVR = click AND conversion
430
+ data["ctcvr"] = (data["click"] * data["conversion"]).astype(np.float32)
431
+
432
+ df = pd.DataFrame(data)
433
+ print(f"Generated data shape: {df.shape}")
434
+ print(f"Click rate: {data['click'].mean():.4f}")
435
+ print(f"Conversion rate (overall): {data['conversion'].mean():.4f}")
436
+ if data["click"].sum() > 0:
437
+ print(
438
+ f"Conversion rate (given click): {data['conversion'][data['click'] == 1].mean():.4f}"
439
+ )
440
+ print(f"CTCVR rate: {data['ctcvr'].mean():.4f}")
441
+
442
+ # Import here to avoid circular import
443
+ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
444
+
445
+ # Create feature definitions
446
+ dense_features = [
447
+ DenseFeature(name=f"dense_{i}", input_dim=1) for i in range(n_dense)
448
+ ]
449
+
450
+ # Create sparse features
451
+ sparse_features = [
452
+ SparseFeature(
453
+ name="user_id",
454
+ embedding_name="user_emb",
455
+ vocab_size=user_vocab_size,
456
+ embedding_dim=embedding_dim,
457
+ ),
458
+ SparseFeature(
459
+ name="item_id",
460
+ embedding_name="item_emb",
461
+ vocab_size=item_vocab_size,
462
+ embedding_dim=embedding_dim,
463
+ ),
464
+ ]
465
+ sparse_features.extend(
466
+ [
467
+ SparseFeature(
468
+ name=f"sparse_{i}",
469
+ embedding_name=f"sparse_{i}_emb",
470
+ vocab_size=sparse_vocab_size,
471
+ embedding_dim=embedding_dim,
472
+ )
473
+ for i in range(n_sparse - 2)
474
+ ]
475
+ )
476
+
477
+ # Create sequence features
478
+ sequence_features = []
479
+ for i, (seq_name, seq_vocab) in enumerate(zip(sequence_names, sequence_vocabs)):
480
+ if i == 0:
481
+ embedding_name = "item_emb"
482
+ else:
483
+ embedding_name = "sparse_0_emb"
484
+ sequence_features.append(
485
+ SequenceFeature(
486
+ name=seq_name,
487
+ vocab_size=seq_vocab,
488
+ max_len=sequence_max_len,
489
+ embedding_dim=embedding_dim,
490
+ padding_idx=0,
491
+ embedding_name=embedding_name,
492
+ )
493
+ )
494
+
495
+ return df, dense_features, sparse_features, sequence_features
496
+
497
+
498
+ def generate_distributed_ranking_data(
499
+ num_samples: int = 100000,
500
+ num_users: int = 10000,
501
+ num_items: int = 5000,
502
+ num_categories: int = 20,
503
+ num_cities: int = 100,
504
+ max_seq_len: int = 50,
505
+ embedding_dim: int = 32,
506
+ seed: int = 42,
507
+ ) -> Tuple[pd.DataFrame, List, List, List]:
508
+ """
509
+ Generate synthetic data for distributed training scenarios
510
+
511
+ Returns:
512
+ tuple: (dataframe, dense_features, sparse_features, sequence_features)
513
+ """
514
+ return generate_ranking_data(
515
+ n_samples=num_samples,
516
+ n_dense=5,
517
+ n_sparse=6, # user_id, item_id + 4 custom features
518
+ n_sequences=2,
519
+ user_vocab_size=num_users + 1,
520
+ item_vocab_size=num_items + 1,
521
+ sequence_max_len=max_seq_len,
522
+ embedding_dim=embedding_dim,
523
+ seed=seed,
524
+ custom_sparse_features={
525
+ "gender": 2,
526
+ "age_group": 7,
527
+ "category": num_categories,
528
+ "city": num_cities,
529
+ },
530
+ use_simple_names=False,
531
+ )
nextrec/utils/tensor.py CHANGED
@@ -6,56 +6,67 @@ Author: Yang Zhou, zyaztec@gmail.com
6
6
  """
7
7
 
8
8
  import torch
9
- import numpy as np
10
9
  from typing import Any
11
10
 
12
11
 
13
12
  def to_tensor(
14
- value: Any,
15
- dtype: torch.dtype,
16
- device: torch.device | str | None = None
13
+ value: Any, dtype: torch.dtype, device: torch.device | str | None = None
17
14
  ) -> torch.Tensor:
18
15
  if value is None:
19
16
  raise ValueError("[Tensor Utils Error] Cannot convert None to tensor.")
20
17
  tensor = value if isinstance(value, torch.Tensor) else torch.as_tensor(value)
21
18
  if tensor.dtype != dtype:
22
19
  tensor = tensor.to(dtype=dtype)
23
-
20
+
24
21
  if device is not None:
25
- target_device = device if isinstance(device, torch.device) else torch.device(device)
22
+ target_device = (
23
+ device if isinstance(device, torch.device) else torch.device(device)
24
+ )
26
25
  if tensor.device != target_device:
27
26
  tensor = tensor.to(target_device)
28
27
  return tensor
29
28
 
29
+
30
30
  def stack_tensors(tensors: list[torch.Tensor], dim: int = 0) -> torch.Tensor:
31
31
  if not tensors:
32
32
  raise ValueError("[Tensor Utils Error] Cannot stack empty list of tensors.")
33
33
  return torch.stack(tensors, dim=dim)
34
34
 
35
+
35
36
  def concat_tensors(tensors: list[torch.Tensor], dim: int = 0) -> torch.Tensor:
36
37
  if not tensors:
37
- raise ValueError("[Tensor Utils Error] Cannot concatenate empty list of tensors.")
38
+ raise ValueError(
39
+ "[Tensor Utils Error] Cannot concatenate empty list of tensors."
40
+ )
38
41
  return torch.cat(tensors, dim=dim)
39
42
 
43
+
40
44
  def pad_sequence_tensors(
41
45
  tensors: list[torch.Tensor],
42
46
  max_len: int | None = None,
43
47
  padding_value: float = 0.0,
44
- padding_side: str = 'right'
48
+ padding_side: str = "right",
45
49
  ) -> torch.Tensor:
46
50
  if not tensors:
47
51
  raise ValueError("[Tensor Utils Error] Cannot pad empty list of tensors.")
48
52
  if max_len is None:
49
53
  max_len = max(t.size(0) for t in tensors)
50
54
  batch_size = len(tensors)
51
- padded = torch.full((batch_size, max_len), padding_value, dtype=tensors[0].dtype, device=tensors[0].device)
52
-
55
+ padded = torch.full(
56
+ (batch_size, max_len),
57
+ padding_value,
58
+ dtype=tensors[0].dtype,
59
+ device=tensors[0].device,
60
+ )
61
+
53
62
  for i, tensor in enumerate(tensors):
54
63
  length = min(tensor.size(0), max_len)
55
- if padding_side == 'right':
64
+ if padding_side == "right":
56
65
  padded[i, :length] = tensor[:length]
57
- elif padding_side == 'left':
66
+ elif padding_side == "left":
58
67
  padded[i, -length:] = tensor[:length]
59
68
  else:
60
- raise ValueError(f"[Tensor Utils Error] padding_side must be 'right' or 'left', got {padding_side}")
69
+ raise ValueError(
70
+ f"[Tensor Utils Error] padding_side must be 'right' or 'left', got {padding_side}"
71
+ )
61
72
  return padded
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nextrec
3
- Version: 0.3.6
3
+ Version: 0.4.2
4
4
  Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
5
5
  Project-URL: Homepage, https://github.com/zerolovesea/NextRec
6
6
  Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -55,7 +55,7 @@ Requires-Dist: seaborn>=0.12.0; extra == 'dev'
55
55
  Description-Content-Type: text/markdown
56
56
 
57
57
  <p align="center">
58
- <img align="center" src="asserts/logo.png" width="40%">
58
+ <img align="center" src="assets/logo.png" width="40%">
59
59
  <p>
60
60
 
61
61
  <div align="center">
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
63
63
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
64
64
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
65
65
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
66
- ![Version](https://img.shields.io/badge/Version-0.3.6-orange.svg)
66
+ ![Version](https://img.shields.io/badge/Version-0.4.2-orange.svg)
67
67
 
68
68
  English | [中文文档](README_zh.md)
69
69
 
@@ -86,7 +86,7 @@ NextRec is a modern recommendation framework built on PyTorch, delivering a unif
86
86
 
87
87
  NextRec adopts a modular and low-coupling engineering design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
88
88
 
89
- ![NextRec Architecture](asserts/nextrec_diagram_en.png)
89
+ ![NextRec Architecture](assets/nextrec_diagram_en.png)
90
90
 
91
91
  > The project borrows ideas from excellent open-source rec libraries. Early layers referenced [torch-rechub](https://github.com/datawhalechina/torch-rechub) but have been replaced with in-house implementations. torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
92
92
 
@@ -110,7 +110,7 @@ To dive deeper, Jupyter notebooks are available:
110
110
  - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
111
111
  - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
112
112
 
113
- > Current version [0.3.6]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
113
+ > Current version [0.4.2]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
114
114
 
115
115
  ## 5-Minute Quick Start
116
116
 
@@ -196,6 +196,16 @@ metrics = model.evaluate(
196
196
  )
197
197
  ```
198
198
 
199
+ ## Platform Compatibility
200
+
201
+ The current version is 0.4.2. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
202
+
203
+ | Platform | Configuration |
204
+ |----------|---------------|
205
+ | MacOS latest | MacBook Pro M4 Pro 24GB RAM |
206
+ | Ubuntu latest | AutoDL 4070D Dual GPU |
207
+ | CentOS 7 | Intel Xeon 5138Y 96 cores 377GB RAM |
208
+
199
209
  ---
200
210
 
201
211
  ## Supported Models