nextrec 0.2.7__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. nextrec-0.3.1/PKG-INFO +306 -0
  2. nextrec-0.3.1/README.md +250 -0
  3. nextrec-0.3.1/README_zh.md +247 -0
  4. nextrec-0.3.1/asserts/logo.png +0 -0
  5. nextrec-0.3.1/asserts/mmoe_tutorial.png +0 -0
  6. nextrec-0.3.1/docs/en/Getting started guide.md +105 -0
  7. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/conf.py +1 -1
  8. nextrec-0.3.1/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md +105 -0
  9. nextrec-0.3.1/nextrec/__version__.py +1 -0
  10. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/activation.py +4 -8
  11. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/callback.py +1 -1
  12. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/features.py +33 -25
  13. nextrec-0.3.1/nextrec/basic/layers.py +543 -0
  14. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/loggers.py +3 -4
  15. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/metrics.py +39 -115
  16. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/model.py +248 -174
  17. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/session.py +1 -5
  18. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/data/__init__.py +12 -0
  19. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/data/data_utils.py +3 -27
  20. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/data/dataloader.py +26 -34
  21. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/data/preprocessor.py +2 -1
  22. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/loss/listwise.py +6 -4
  23. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/loss/loss_utils.py +10 -6
  24. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/loss/pairwise.py +5 -3
  25. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/loss/pointwise.py +7 -13
  26. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/match/mind.py +110 -1
  27. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/multi_task/esmm.py +46 -27
  28. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/multi_task/mmoe.py +48 -30
  29. nextrec-0.3.1/nextrec/models/multi_task/ple.py +275 -0
  30. nextrec-0.3.1/nextrec/models/multi_task/poso.py +413 -0
  31. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/multi_task/share_bottom.py +43 -26
  32. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/__init__.py +2 -0
  33. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/dcn.py +20 -1
  34. nextrec-0.3.1/nextrec/models/ranking/dcn_v2.py +84 -0
  35. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/deepfm.py +44 -18
  36. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/dien.py +130 -27
  37. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/masknet.py +13 -67
  38. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/widedeep.py +39 -18
  39. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/xdeepfm.py +34 -1
  40. nextrec-0.3.1/nextrec/utils/common.py +41 -0
  41. {nextrec-0.2.7 → nextrec-0.3.1}/pyproject.toml +1 -1
  42. {nextrec-0.2.7 → nextrec-0.3.1}/test/test_layers.py +3 -3
  43. {nextrec-0.2.7 → nextrec-0.3.1}/tutorials/example_match_dssm.py +0 -2
  44. {nextrec-0.2.7 → nextrec-0.3.1}/tutorials/example_multitask.py +25 -31
  45. {nextrec-0.2.7 → nextrec-0.3.1}/tutorials/example_ranking_din.py +22 -59
  46. nextrec-0.3.1/tutorials/movielen_match_dssm.py +121 -0
  47. nextrec-0.3.1/tutorials/movielen_ranking_deepfm.py +60 -0
  48. nextrec-0.3.1/tutorials/notebooks/en/Hands on dataprocessor.ipynb +850 -0
  49. nextrec-0.3.1/tutorials/notebooks/en/Hands on nextrec.ipynb +1652 -0
  50. nextrec-0.3.1/tutorials/notebooks/zh/Hands on dataprocessor.ipynb +850 -0
  51. {nextrec-0.2.7 → nextrec-0.3.1}/tutorials/notebooks/zh/Hands on nextrec.ipynb +1 -1
  52. nextrec-0.2.7/PKG-INFO +0 -281
  53. nextrec-0.2.7/README.md +0 -225
  54. nextrec-0.2.7/README_zh.md +0 -222
  55. nextrec-0.2.7/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md +0 -97
  56. nextrec-0.2.7/nextrec/__version__.py +0 -1
  57. nextrec-0.2.7/nextrec/basic/layers.py +0 -980
  58. nextrec-0.2.7/nextrec/models/multi_task/ple.py +0 -260
  59. nextrec-0.2.7/nextrec/utils/common.py +0 -16
  60. nextrec-0.2.7/tutorials/movielen_match_dssm.py +0 -133
  61. nextrec-0.2.7/tutorials/movielen_ranking_deepfm.py +0 -66
  62. nextrec-0.2.7/tutorials/notebooks/zh/Hands on dataprocessor.ipynb +0 -1368
  63. {nextrec-0.2.7 → nextrec-0.3.1}/.github/workflows/publish.yml +0 -0
  64. {nextrec-0.2.7 → nextrec-0.3.1}/.github/workflows/tests.yml +0 -0
  65. {nextrec-0.2.7 → nextrec-0.3.1}/.gitignore +0 -0
  66. {nextrec-0.2.7 → nextrec-0.3.1}/.readthedocs.yaml +0 -0
  67. {nextrec-0.2.7 → nextrec-0.3.1}/CODE_OF_CONDUCT.md +0 -0
  68. {nextrec-0.2.7 → nextrec-0.3.1}/CONTRIBUTING.md +0 -0
  69. {nextrec-0.2.7 → nextrec-0.3.1}/LICENSE +0 -0
  70. {nextrec-0.2.7 → nextrec-0.3.1}/MANIFEST.in +0 -0
  71. {nextrec-0.2.7 → nextrec-0.3.1}/dataset/ctcvr_task.csv +0 -0
  72. {nextrec-0.2.7 → nextrec-0.3.1}/dataset/match_task.csv +0 -0
  73. {nextrec-0.2.7 → nextrec-0.3.1}/dataset/movielens_100k.csv +0 -0
  74. {nextrec-0.2.7 → nextrec-0.3.1}/dataset/multitask_task.csv +0 -0
  75. {nextrec-0.2.7 → nextrec-0.3.1}/dataset/ranking_task.csv +0 -0
  76. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/Makefile +0 -0
  77. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/index.md +0 -0
  78. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/make.bat +0 -0
  79. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/modules.rst +0 -0
  80. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/nextrec.basic.rst +0 -0
  81. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/nextrec.data.rst +0 -0
  82. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/nextrec.loss.rst +0 -0
  83. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/nextrec.rst +0 -0
  84. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/nextrec.utils.rst +0 -0
  85. {nextrec-0.2.7 → nextrec-0.3.1}/docs/rtd/requirements.txt +0 -0
  86. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/__init__.py +0 -0
  87. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/basic/__init__.py +0 -0
  88. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/loss/__init__.py +0 -0
  89. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/generative/hstu.py +0 -0
  90. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/generative/tiger.py +0 -0
  91. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/match/__init__.py +0 -0
  92. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/match/dssm.py +0 -0
  93. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/match/dssm_v2.py +0 -0
  94. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/match/sdm.py +0 -0
  95. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/match/youtube_dnn.py +0 -0
  96. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/afm.py +0 -0
  97. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/autoint.py +0 -0
  98. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/din.py +0 -0
  99. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/fibinet.py +0 -0
  100. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/fm.py +0 -0
  101. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/models/ranking/pnn.py +0 -0
  102. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/utils/__init__.py +0 -0
  103. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/utils/embedding.py +0 -0
  104. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/utils/initializer.py +0 -0
  105. {nextrec-0.2.7 → nextrec-0.3.1}/nextrec/utils/optimizer.py +0 -0
  106. {nextrec-0.2.7 → nextrec-0.3.1}/pytest.ini +0 -0
  107. {nextrec-0.2.7 → nextrec-0.3.1}/requirements.txt +0 -0
  108. {nextrec-0.2.7 → nextrec-0.3.1}/test/__init__.py +0 -0
  109. {nextrec-0.2.7 → nextrec-0.3.1}/test/conftest.py +0 -0
  110. {nextrec-0.2.7 → nextrec-0.3.1}/test/run_tests.py +0 -0
  111. {nextrec-0.2.7 → nextrec-0.3.1}/test/test_losses.py +0 -0
  112. {nextrec-0.2.7 → nextrec-0.3.1}/test/test_match_models.py +0 -0
  113. {nextrec-0.2.7 → nextrec-0.3.1}/test/test_multitask_models.py +0 -0
  114. {nextrec-0.2.7 → nextrec-0.3.1}/test/test_preprocessor.py +0 -0
  115. {nextrec-0.2.7 → nextrec-0.3.1}/test/test_ranking_models.py +0 -0
  116. {nextrec-0.2.7 → nextrec-0.3.1}/test/test_utils.py +0 -0
  117. {nextrec-0.2.7 → nextrec-0.3.1}/test_requirements.txt +0 -0
nextrec-0.3.1/PKG-INFO ADDED
@@ -0,0 +1,306 @@
1
+ Metadata-Version: 2.4
2
+ Name: nextrec
3
+ Version: 0.3.1
4
+ Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
5
+ Project-URL: Homepage, https://github.com/zerolovesea/NextRec
6
+ Project-URL: Repository, https://github.com/zerolovesea/NextRec
7
+ Project-URL: Documentation, https://github.com/zerolovesea/NextRec/blob/main/README.md
8
+ Project-URL: Issues, https://github.com/zerolovesea/NextRec/issues
9
+ Author-email: zerolovesea <zyaztec@gmail.com>
10
+ License-File: LICENSE
11
+ Keywords: ctr,deep-learning,match,pytorch,ranking,recommendation
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: numpy<2.0,>=1.21; sys_platform == 'linux' and python_version < '3.12'
23
+ Requires-Dist: numpy<3.0,>=1.26; sys_platform == 'linux' and python_version >= '3.12'
24
+ Requires-Dist: numpy>=1.23.0; sys_platform == 'win32'
25
+ Requires-Dist: numpy>=1.24.0; sys_platform == 'darwin'
26
+ Requires-Dist: pandas<2.0,>=1.5; sys_platform == 'linux' and python_version < '3.12'
27
+ Requires-Dist: pandas<2.3.0,>=2.1.0; sys_platform == 'win32'
28
+ Requires-Dist: pandas>=2.0.0; sys_platform == 'darwin'
29
+ Requires-Dist: pandas>=2.1.0; sys_platform == 'linux' and python_version >= '3.12'
30
+ Requires-Dist: pyarrow<13.0.0,>=10.0.0; sys_platform == 'linux' and python_version < '3.12'
31
+ Requires-Dist: pyarrow<15.0.0,>=12.0.0; sys_platform == 'win32'
32
+ Requires-Dist: pyarrow>=12.0.0; sys_platform == 'darwin'
33
+ Requires-Dist: pyarrow>=16.0.0; sys_platform == 'linux' and python_version >= '3.12'
34
+ Requires-Dist: scikit-learn<2.0,>=1.2; sys_platform == 'linux' and python_version < '3.12'
35
+ Requires-Dist: scikit-learn>=1.3.0; sys_platform == 'darwin'
36
+ Requires-Dist: scikit-learn>=1.3.0; sys_platform == 'linux' and python_version >= '3.12'
37
+ Requires-Dist: scikit-learn>=1.3.0; sys_platform == 'win32'
38
+ Requires-Dist: scipy<1.12,>=1.8; sys_platform == 'linux' and python_version < '3.12'
39
+ Requires-Dist: scipy>=1.10.0; sys_platform == 'darwin'
40
+ Requires-Dist: scipy>=1.10.0; sys_platform == 'win32'
41
+ Requires-Dist: scipy>=1.11.0; sys_platform == 'linux' and python_version >= '3.12'
42
+ Requires-Dist: torch>=2.0.0
43
+ Requires-Dist: torchvision>=0.15.0
44
+ Requires-Dist: tqdm>=4.65.0
45
+ Provides-Extra: dev
46
+ Requires-Dist: jupyter>=1.0.0; extra == 'dev'
47
+ Requires-Dist: matplotlib>=3.7.0; extra == 'dev'
48
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
49
+ Requires-Dist: pytest-html>=3.2.0; extra == 'dev'
50
+ Requires-Dist: pytest-mock>=3.11.0; extra == 'dev'
51
+ Requires-Dist: pytest-timeout>=2.1.0; extra == 'dev'
52
+ Requires-Dist: pytest-xdist>=3.3.0; extra == 'dev'
53
+ Requires-Dist: pytest>=7.4.0; extra == 'dev'
54
+ Requires-Dist: seaborn>=0.12.0; extra == 'dev'
55
+ Description-Content-Type: text/markdown
56
+
57
+ <p align="center">
58
+ <img align="center" src="asserts/logo.png" width="40%">
59
+ <p>
60
+
61
+ <div align="center">
62
+
63
+ ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
64
+ ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
65
+ ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
66
+ ![Version](https://img.shields.io/badge/Version-0.3.1-orange.svg)
67
+
68
+ English | [中文文档](README_zh.md)
69
+
70
+ **A Unified, Efficient, and Scalable Recommendation System Framework**
71
+
72
+ </div>
73
+
74
+ ## Introduction
75
+
76
+ NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. It follows a modular design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recall scenarios on Spark clusters, training on massive offline parquet features.
77
+
78
+ ### Why NextRec
79
+
80
+ - **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
81
+ - **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
82
+ - **Developer-friendly experience**: Stream processing/training/inference for csv/parquet/pathlike data, plus GPU/MPS acceleration and visualization support.
83
+ - **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
84
+
85
+ > The project borrows ideas from excellent open-source rec libraries. Early layers referenced [torch-rechub](https://github.com/datawhalechina/torch-rechub) but have been replaced with in-house implementations. torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
86
+
87
+ ---
88
+
89
+ ## Installation
90
+
91
+ You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
92
+
93
+ ## Tutorials
94
+
95
+ See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
96
+
97
+ - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k
98
+ - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on the e-commerce dataset
99
+ - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on the e-commerce dataset
100
+ - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
101
+
102
+ To dive deeper, Jupyter notebooks are available:
103
+
104
+ - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
105
+ - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
106
+
107
+ > Current version [0.3.1]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
108
+
109
+ ## 5-Minute Quick Start
110
+
111
+ We provide a detailed quick start and paired datasets to help you learn the framework. In `datasets/` you’ll find an e-commerce sample dataset like this:
112
+
113
+ | user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
114
+ |--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
115
+ | 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
116
+ | 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
117
+
118
+ Below is a short example showing how to train a DIN model. DIN (Deep Interest Network) won Best Paper at KDD 2018 for CTR prediction. You can also run `python tutorials/example_ranking_din.py` directly.
119
+
120
+ After training, detailed logs are available under `nextrec_logs/din_tutorial`.
121
+
122
+ ```python
123
+ import pandas as pd
124
+
125
+ from nextrec.models.ranking.din import DIN
126
+ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
127
+
128
+ df = pd.read_csv('dataset/ranking_task.csv')
129
+
130
+ for col in df.columns and 'sequence' in col: # csv loads lists as text; convert them back to objects
131
+ df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
132
+
133
+ # Define feature columns
134
+ dense_features = [DenseFeature(name=f'dense_{i}', input_dim=1) for i in range(8)]
135
+
136
+ sparse_features = [SparseFeature(name='user_id', embedding_name='user_emb', vocab_size=int(df['user_id'].max() + 1), embedding_dim=32), SparseFeature(name='item_id', embedding_name='item_emb', vocab_size=int(df['item_id'].max() + 1), embedding_dim=32),]
137
+
138
+ sparse_features.extend([SparseFeature(name=f'sparse_{i}', embedding_name=f'sparse_{i}_emb', vocab_size=int(df[f'sparse_{i}'].max() + 1), embedding_dim=32) for i in range(10)])
139
+
140
+ sequence_features = [
141
+ SequenceFeature(name='sequence_0', vocab_size=int(df['sequence_0'].apply(lambda x: max(x)).max() + 1), embedding_dim=32, padding_idx=0, embedding_name='item_emb'),
142
+ SequenceFeature(name='sequence_1', vocab_size=int(df['sequence_1'].apply(lambda x: max(x)).max() + 1), embedding_dim=16, padding_idx=0, embedding_name='sparse_0_emb'),]
143
+
144
+ mlp_params = {
145
+ "dims": [256, 128, 64],
146
+ "activation": "relu",
147
+ "dropout": 0.3,
148
+ }
149
+
150
+ model = DIN(
151
+ dense_features=dense_features,
152
+ sparse_features=sparse_features,
153
+ sequence_features=sequence_features,
154
+ mlp_params=mlp_params,
155
+ attention_hidden_units=[80, 40],
156
+ attention_activation='sigmoid',
157
+ attention_use_softmax=True,
158
+ target=['label'], # target variable
159
+ device='mps',
160
+ embedding_l1_reg=1e-6,
161
+ embedding_l2_reg=1e-5,
162
+ dense_l1_reg=1e-5,
163
+ dense_l2_reg=1e-4,
164
+ session_id="din_tutorial", # experiment id for logs
165
+ )
166
+
167
+ # Compile model with optimizer and loss
168
+ model.compile(
169
+ optimizer = "adam",
170
+ optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5},
171
+ loss = "focal",
172
+ loss_params={"gamma": 2.0, "alpha": 0.25},
173
+ )
174
+
175
+ model.fit(
176
+ train_data=df,
177
+ metrics=['auc', 'gauc', 'logloss'], # metrics to track
178
+ epochs=3,
179
+ batch_size=512,
180
+ shuffle=True,
181
+ user_id_column='user_id' # used for GAUC
182
+ )
183
+
184
+ # Evaluate after training
185
+ metrics = model.evaluate(
186
+ df,
187
+ metrics=['auc', 'gauc', 'logloss'],
188
+ batch_size=512,
189
+ user_id_column='user_id'
190
+ )
191
+ ```
192
+
193
+ ---
194
+
195
+ ## Supported Models
196
+
197
+ ### Ranking Models
198
+
199
+ | Model | Paper | Year | Status |
200
+ |-------|-------|------|--------|
201
+ | [FM](nextrec/models/ranking/fm.py) | Factorization Machines | ICDM 2010 | Supported |
202
+ | [AFM](nextrec/models/ranking/afm.py) | Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks | IJCAI 2017 | Supported |
203
+ | [DeepFM](nextrec/models/ranking/deepfm.py) | DeepFM: A Factorization-Machine based Neural Network for CTR Prediction | IJCAI 2017 | Supported |
204
+ | [Wide&Deep](nextrec/models/ranking/widedeep.py) | Wide & Deep Learning for Recommender Systems | DLRS 2016 | Supported |
205
+ | [xDeepFM](nextrec/models/ranking/xdeepfm.py) | xDeepFM: Combining Explicit and Implicit Feature Interactions | KDD 2018 | Supported |
206
+ | [FiBiNET](nextrec/models/ranking/fibinet.py) | FiBiNET: Combining Feature Importance and Bilinear Feature Interaction for CTR Prediction | RecSys 2019 | Supported |
207
+ | [PNN](nextrec/models/ranking/pnn.py) | Product-based Neural Networks for User Response Prediction | ICDM 2016 | Supported |
208
+ | [AutoInt](nextrec/models/ranking/autoint.py) | AutoInt: Automatic Feature Interaction Learning | CIKM 2019 | Supported |
209
+ | [DCN](nextrec/models/ranking/dcn.py) | Deep & Cross Network for Ad Click Predictions | ADKDD 2017 | Supported |
210
+ | [DCN v2](nextrec/models/ranking/dcn_v2.py) | DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems | KDD 2021 | In Progress |
211
+ | [DIN](nextrec/models/ranking/din.py) | Deep Interest Network for CTR Prediction | KDD 2018 | Supported |
212
+ | [DIEN](nextrec/models/ranking/dien.py) | Deep Interest Evolution Network | AAAI 2019 | Supported |
213
+ | [MaskNet](nextrec/models/ranking/masknet.py) | MaskNet: Feature-wise Gating Blocks for High-dimensional Sparse Recommendation Data | 2020 | Supported |
214
+
215
+ ### Retrieval Models
216
+
217
+ | Model | Paper | Year | Status |
218
+ |-------|-------|------|--------|
219
+ | [DSSM](nextrec/models/match/dssm.py) | Learning Deep Structured Semantic Models | CIKM 2013 | Supported |
220
+ | [DSSM v2](nextrec/models/match/dssm_v2.py) | DSSM with pairwise BPR-style optimization | - | Supported |
221
+ | [YouTube DNN](nextrec/models/match/youtube_dnn.py) | Deep Neural Networks for YouTube Recommendations | RecSys 2016 | Supported |
222
+ | [MIND](nextrec/models/match/mind.py) | Multi-Interest Network with Dynamic Routing | CIKM 2019 | Supported |
223
+ | [SDM](nextrec/models/match/sdm.py) | Sequential Deep Matching Model | - | Supported |
224
+
225
+ ### Multi-task Models
226
+
227
+ | Model | Paper | Year | Status |
228
+ |-------|-------|------|--------|
229
+ | [MMOE](nextrec/models/multi_task/mmoe.py) | Modeling Task Relationships in Multi-task Learning | KDD 2018 | Supported |
230
+ | [PLE](nextrec/models/multi_task/ple.py) | Progressive Layered Extraction | RecSys 2020 | Supported |
231
+ | [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
232
+ | [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
233
+ | [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
234
+ | [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
235
+
236
+ ### Generative Models
237
+
238
+ | Model | Paper | Year | Status |
239
+ |-------|-------|------|--------|
240
+ | [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
241
+ | [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | In Progress |
242
+
243
+ ---
244
+
245
+ ## Contributing
246
+
247
+ We welcome contributions of any form!
248
+
249
+ ### How to Contribute
250
+
251
+ 1. Fork the repository
252
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
253
+ 3. Commit your changes (`git commit -m 'Add AmazingFeature'`)
254
+ 4. Push your branch (`git push origin feature/AmazingFeature`)
255
+ 5. Open a Pull Request
256
+
257
+ > Before submitting a PR, please run tests using `pytest test/ -v` or `python -m pytest` to ensure everything passes.
258
+
259
+ ### Code Style
260
+
261
+ - Follow PEP8
262
+ - Provide unit tests for new functionality
263
+ - Update documentation accordingly
264
+
265
+ ### Reporting Issues
266
+
267
+ When submitting issues on GitHub, please include:
268
+
269
+ - Description of the problem
270
+ - Reproduction steps
271
+ - Expected behavior
272
+ - Actual behavior
273
+ - Environment info (Python version, PyTorch version, etc.)
274
+
275
+ ---
276
+
277
+ ## License
278
+
279
+ This project is licensed under the [Apache 2.0 License](./LICENSE).
280
+
281
+ ---
282
+
283
+ ## Contact
284
+
285
+ - **GitHub Issues**: [Submit an issue](https://github.com/zerolovesea/NextRec/issues)
286
+ - **Email**: zyaztec@gmail.com
287
+
288
+ ---
289
+
290
+ ## Acknowledgements
291
+
292
+ NextRec is inspired by the following great open-source projects:
293
+
294
+ - [torch-rechub](https://github.com/datawhalechina/torch-rechub) — Flexible, easy-to-extend recommendation framework
295
+ - [FuxiCTR](https://github.com/reczoo/FuxiCTR) — Configurable, tunable, and reproducible CTR library
296
+ - [RecBole](https://github.com/RUCAIBox/RecBole) — Unified, comprehensive, and efficient recommendation library
297
+
298
+ Special thanks to all open-source contributors!
299
+
300
+ ---
301
+
302
+ <div align="center">
303
+
304
+ **[Back to Top](#nextrec)**
305
+
306
+ </div>
@@ -0,0 +1,250 @@
1
+ <p align="center">
2
+ <img align="center" src="asserts/logo.png" width="40%">
3
+ <p>
4
+
5
+ <div align="center">
6
+
7
+ ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
8
+ ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
9
+ ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
10
+ ![Version](https://img.shields.io/badge/Version-0.3.1-orange.svg)
11
+
12
+ English | [中文文档](README_zh.md)
13
+
14
+ **A Unified, Efficient, and Scalable Recommendation System Framework**
15
+
16
+ </div>
17
+
18
+ ## Introduction
19
+
20
+ NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. It follows a modular design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recall scenarios on Spark clusters, training on massive offline parquet features.
21
+
22
+ ### Why NextRec
23
+
24
+ - **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
25
+ - **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
26
+ - **Developer-friendly experience**: Stream processing/training/inference for csv/parquet/pathlike data, plus GPU/MPS acceleration and visualization support.
27
+ - **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
28
+
29
+ > The project borrows ideas from excellent open-source rec libraries. Early layers referenced [torch-rechub](https://github.com/datawhalechina/torch-rechub) but have been replaced with in-house implementations. torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
30
+
31
+ ---
32
+
33
+ ## Installation
34
+
35
+ You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
36
+
37
+ ## Tutorials
38
+
39
+ See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
40
+
41
+ - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k
42
+ - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on the e-commerce dataset
43
+ - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on the e-commerce dataset
44
+ - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
45
+
46
+ To dive deeper, Jupyter notebooks are available:
47
+
48
+ - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
49
+ - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
50
+
51
+ > Current version [0.3.1]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
52
+
53
+ ## 5-Minute Quick Start
54
+
55
+ We provide a detailed quick start and paired datasets to help you learn the framework. In `datasets/` you’ll find an e-commerce sample dataset like this:
56
+
57
+ | user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
58
+ |--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
59
+ | 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
60
+ | 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
61
+
62
+ Below is a short example showing how to train a DIN model. DIN (Deep Interest Network) won Best Paper at KDD 2018 for CTR prediction. You can also run `python tutorials/example_ranking_din.py` directly.
63
+
64
+ After training, detailed logs are available under `nextrec_logs/din_tutorial`.
65
+
66
+ ```python
67
+ import pandas as pd
68
+
69
+ from nextrec.models.ranking.din import DIN
70
+ from nextrec.basic.features import DenseFeature, SparseFeature, SequenceFeature
71
+
72
+ df = pd.read_csv('dataset/ranking_task.csv')
73
+
74
+ for col in df.columns and 'sequence' in col: # csv loads lists as text; convert them back to objects
75
+ df[col] = df[col].apply(lambda x: eval(x) if isinstance(x, str) else x)
76
+
77
+ # Define feature columns
78
+ dense_features = [DenseFeature(name=f'dense_{i}', input_dim=1) for i in range(8)]
79
+
80
+ sparse_features = [SparseFeature(name='user_id', embedding_name='user_emb', vocab_size=int(df['user_id'].max() + 1), embedding_dim=32), SparseFeature(name='item_id', embedding_name='item_emb', vocab_size=int(df['item_id'].max() + 1), embedding_dim=32),]
81
+
82
+ sparse_features.extend([SparseFeature(name=f'sparse_{i}', embedding_name=f'sparse_{i}_emb', vocab_size=int(df[f'sparse_{i}'].max() + 1), embedding_dim=32) for i in range(10)])
83
+
84
+ sequence_features = [
85
+ SequenceFeature(name='sequence_0', vocab_size=int(df['sequence_0'].apply(lambda x: max(x)).max() + 1), embedding_dim=32, padding_idx=0, embedding_name='item_emb'),
86
+ SequenceFeature(name='sequence_1', vocab_size=int(df['sequence_1'].apply(lambda x: max(x)).max() + 1), embedding_dim=16, padding_idx=0, embedding_name='sparse_0_emb'),]
87
+
88
+ mlp_params = {
89
+ "dims": [256, 128, 64],
90
+ "activation": "relu",
91
+ "dropout": 0.3,
92
+ }
93
+
94
+ model = DIN(
95
+ dense_features=dense_features,
96
+ sparse_features=sparse_features,
97
+ sequence_features=sequence_features,
98
+ mlp_params=mlp_params,
99
+ attention_hidden_units=[80, 40],
100
+ attention_activation='sigmoid',
101
+ attention_use_softmax=True,
102
+ target=['label'], # target variable
103
+ device='mps',
104
+ embedding_l1_reg=1e-6,
105
+ embedding_l2_reg=1e-5,
106
+ dense_l1_reg=1e-5,
107
+ dense_l2_reg=1e-4,
108
+ session_id="din_tutorial", # experiment id for logs
109
+ )
110
+
111
+ # Compile model with optimizer and loss
112
+ model.compile(
113
+ optimizer = "adam",
114
+ optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5},
115
+ loss = "focal",
116
+ loss_params={"gamma": 2.0, "alpha": 0.25},
117
+ )
118
+
119
+ model.fit(
120
+ train_data=df,
121
+ metrics=['auc', 'gauc', 'logloss'], # metrics to track
122
+ epochs=3,
123
+ batch_size=512,
124
+ shuffle=True,
125
+ user_id_column='user_id' # used for GAUC
126
+ )
127
+
128
+ # Evaluate after training
129
+ metrics = model.evaluate(
130
+ df,
131
+ metrics=['auc', 'gauc', 'logloss'],
132
+ batch_size=512,
133
+ user_id_column='user_id'
134
+ )
135
+ ```
136
+
137
+ ---
138
+
139
+ ## Supported Models
140
+
141
+ ### Ranking Models
142
+
143
+ | Model | Paper | Year | Status |
144
+ |-------|-------|------|--------|
145
+ | [FM](nextrec/models/ranking/fm.py) | Factorization Machines | ICDM 2010 | Supported |
146
+ | [AFM](nextrec/models/ranking/afm.py) | Attentional Factorization Machines: Learning the Weight of Feature Interactions via Attention Networks | IJCAI 2017 | Supported |
147
+ | [DeepFM](nextrec/models/ranking/deepfm.py) | DeepFM: A Factorization-Machine based Neural Network for CTR Prediction | IJCAI 2017 | Supported |
148
+ | [Wide&Deep](nextrec/models/ranking/widedeep.py) | Wide & Deep Learning for Recommender Systems | DLRS 2016 | Supported |
149
+ | [xDeepFM](nextrec/models/ranking/xdeepfm.py) | xDeepFM: Combining Explicit and Implicit Feature Interactions | KDD 2018 | Supported |
150
+ | [FiBiNET](nextrec/models/ranking/fibinet.py) | FiBiNET: Combining Feature Importance and Bilinear Feature Interaction for CTR Prediction | RecSys 2019 | Supported |
151
+ | [PNN](nextrec/models/ranking/pnn.py) | Product-based Neural Networks for User Response Prediction | ICDM 2016 | Supported |
152
+ | [AutoInt](nextrec/models/ranking/autoint.py) | AutoInt: Automatic Feature Interaction Learning | CIKM 2019 | Supported |
153
+ | [DCN](nextrec/models/ranking/dcn.py) | Deep & Cross Network for Ad Click Predictions | ADKDD 2017 | Supported |
154
+ | [DCN v2](nextrec/models/ranking/dcn_v2.py) | DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems | KDD 2021 | In Progress |
155
+ | [DIN](nextrec/models/ranking/din.py) | Deep Interest Network for CTR Prediction | KDD 2018 | Supported |
156
+ | [DIEN](nextrec/models/ranking/dien.py) | Deep Interest Evolution Network | AAAI 2019 | Supported |
157
+ | [MaskNet](nextrec/models/ranking/masknet.py) | MaskNet: Feature-wise Gating Blocks for High-dimensional Sparse Recommendation Data | 2020 | Supported |
158
+
159
+ ### Retrieval Models
160
+
161
+ | Model | Paper | Year | Status |
162
+ |-------|-------|------|--------|
163
+ | [DSSM](nextrec/models/match/dssm.py) | Learning Deep Structured Semantic Models | CIKM 2013 | Supported |
164
+ | [DSSM v2](nextrec/models/match/dssm_v2.py) | DSSM with pairwise BPR-style optimization | - | Supported |
165
+ | [YouTube DNN](nextrec/models/match/youtube_dnn.py) | Deep Neural Networks for YouTube Recommendations | RecSys 2016 | Supported |
166
+ | [MIND](nextrec/models/match/mind.py) | Multi-Interest Network with Dynamic Routing | CIKM 2019 | Supported |
167
+ | [SDM](nextrec/models/match/sdm.py) | Sequential Deep Matching Model | - | Supported |
168
+
169
+ ### Multi-task Models
170
+
171
+ | Model | Paper | Year | Status |
172
+ |-------|-------|------|--------|
173
+ | [MMOE](nextrec/models/multi_task/mmoe.py) | Modeling Task Relationships in Multi-task Learning | KDD 2018 | Supported |
174
+ | [PLE](nextrec/models/multi_task/ple.py) | Progressive Layered Extraction | RecSys 2020 | Supported |
175
+ | [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
176
+ | [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
177
+ | [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
178
+ | [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
179
+
180
+ ### Generative Models
181
+
182
+ | Model | Paper | Year | Status |
183
+ |-------|-------|------|--------|
184
+ | [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
185
+ | [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | In Progress |
186
+
187
+ ---
188
+
189
+ ## Contributing
190
+
191
+ We welcome contributions of any form!
192
+
193
+ ### How to Contribute
194
+
195
+ 1. Fork the repository
196
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
197
+ 3. Commit your changes (`git commit -m 'Add AmazingFeature'`)
198
+ 4. Push your branch (`git push origin feature/AmazingFeature`)
199
+ 5. Open a Pull Request
200
+
201
+ > Before submitting a PR, please run tests using `pytest test/ -v` or `python -m pytest` to ensure everything passes.
202
+
203
+ ### Code Style
204
+
205
+ - Follow PEP8
206
+ - Provide unit tests for new functionality
207
+ - Update documentation accordingly
208
+
209
+ ### Reporting Issues
210
+
211
+ When submitting issues on GitHub, please include:
212
+
213
+ - Description of the problem
214
+ - Reproduction steps
215
+ - Expected behavior
216
+ - Actual behavior
217
+ - Environment info (Python version, PyTorch version, etc.)
218
+
219
+ ---
220
+
221
+ ## License
222
+
223
+ This project is licensed under the [Apache 2.0 License](./LICENSE).
224
+
225
+ ---
226
+
227
+ ## Contact
228
+
229
+ - **GitHub Issues**: [Submit an issue](https://github.com/zerolovesea/NextRec/issues)
230
+ - **Email**: zyaztec@gmail.com
231
+
232
+ ---
233
+
234
+ ## Acknowledgements
235
+
236
+ NextRec is inspired by the following great open-source projects:
237
+
238
+ - [torch-rechub](https://github.com/datawhalechina/torch-rechub) — Flexible, easy-to-extend recommendation framework
239
+ - [FuxiCTR](https://github.com/reczoo/FuxiCTR) — Configurable, tunable, and reproducible CTR library
240
+ - [RecBole](https://github.com/RUCAIBox/RecBole) — Unified, comprehensive, and efficient recommendation library
241
+
242
+ Special thanks to all open-source contributors!
243
+
244
+ ---
245
+
246
+ <div align="center">
247
+
248
+ **[Back to Top](#nextrec)**
249
+
250
+ </div>