nextrec 0.4.3__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {nextrec-0.4.3 → nextrec-0.4.5}/PKG-INFO +34 -28
  2. {nextrec-0.4.3 → nextrec-0.4.5}/README.md +33 -27
  3. {nextrec-0.4.3 → nextrec-0.4.5}/README_zh.md +21 -13
  4. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/conf.py +1 -1
  5. nextrec-0.4.5/nextrec/__version__.py +1 -0
  6. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/xdeepfm.py +47 -5
  7. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/config.py +6 -0
  8. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/initializer.py +12 -16
  9. {nextrec-0.4.3 → nextrec-0.4.5}/pyproject.toml +1 -1
  10. nextrec-0.4.3/nextrec/__version__.py +0 -1
  11. {nextrec-0.4.3 → nextrec-0.4.5}/.github/workflows/publish.yml +0 -0
  12. {nextrec-0.4.3 → nextrec-0.4.5}/.github/workflows/tests.yml +0 -0
  13. {nextrec-0.4.3 → nextrec-0.4.5}/.gitignore +0 -0
  14. {nextrec-0.4.3 → nextrec-0.4.5}/.readthedocs.yaml +0 -0
  15. {nextrec-0.4.3 → nextrec-0.4.5}/CODE_OF_CONDUCT.md +0 -0
  16. {nextrec-0.4.3 → nextrec-0.4.5}/CONTRIBUTING.md +0 -0
  17. {nextrec-0.4.3 → nextrec-0.4.5}/LICENSE +0 -0
  18. {nextrec-0.4.3 → nextrec-0.4.5}/MANIFEST.in +0 -0
  19. {nextrec-0.4.3 → nextrec-0.4.5}/assets/Feature Configuration.png +0 -0
  20. {nextrec-0.4.3 → nextrec-0.4.5}/assets/Model Parameters.png +0 -0
  21. {nextrec-0.4.3 → nextrec-0.4.5}/assets/Training Configuration.png +0 -0
  22. {nextrec-0.4.3 → nextrec-0.4.5}/assets/Training logs.png +0 -0
  23. {nextrec-0.4.3 → nextrec-0.4.5}/assets/logo.png +0 -0
  24. {nextrec-0.4.3 → nextrec-0.4.5}/assets/mmoe_tutorial.png +0 -0
  25. {nextrec-0.4.3 → nextrec-0.4.5}/assets/nextrec_diagram_en.png +0 -0
  26. {nextrec-0.4.3 → nextrec-0.4.5}/assets/nextrec_diagram_zh.png +0 -0
  27. {nextrec-0.4.3 → nextrec-0.4.5}/assets/test data.png +0 -0
  28. {nextrec-0.4.3 → nextrec-0.4.5}/dataset/ctcvr_task.csv +0 -0
  29. {nextrec-0.4.3 → nextrec-0.4.5}/dataset/match_task.csv +0 -0
  30. {nextrec-0.4.3 → nextrec-0.4.5}/dataset/movielens_100k.csv +0 -0
  31. {nextrec-0.4.3 → nextrec-0.4.5}/dataset/multitask_task.csv +0 -0
  32. {nextrec-0.4.3 → nextrec-0.4.5}/dataset/ranking_task.csv +0 -0
  33. {nextrec-0.4.3 → nextrec-0.4.5}/docs/en/Getting started guide.md +0 -0
  34. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/Makefile +0 -0
  35. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/index.md +0 -0
  36. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/make.bat +0 -0
  37. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/modules.rst +0 -0
  38. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/nextrec.basic.rst +0 -0
  39. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/nextrec.data.rst +0 -0
  40. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/nextrec.loss.rst +0 -0
  41. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/nextrec.rst +0 -0
  42. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/nextrec.utils.rst +0 -0
  43. {nextrec-0.4.3 → nextrec-0.4.5}/docs/rtd/requirements.txt +0 -0
  44. {nextrec-0.4.3 → nextrec-0.4.5}/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md" +0 -0
  45. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/__init__.py +0 -0
  46. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/__init__.py +0 -0
  47. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/activation.py +0 -0
  48. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/callback.py +0 -0
  49. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/features.py +0 -0
  50. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/layers.py +0 -0
  51. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/loggers.py +0 -0
  52. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/metrics.py +0 -0
  53. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/model.py +0 -0
  54. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/basic/session.py +0 -0
  55. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/cli.py +0 -0
  56. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/data/__init__.py +0 -0
  57. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/data/batch_utils.py +0 -0
  58. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/data/data_processing.py +0 -0
  59. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/data/data_utils.py +0 -0
  60. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/data/dataloader.py +0 -0
  61. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/data/preprocessor.py +0 -0
  62. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/loss/__init__.py +0 -0
  63. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/loss/listwise.py +0 -0
  64. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/loss/loss_utils.py +0 -0
  65. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/loss/pairwise.py +0 -0
  66. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/loss/pointwise.py +0 -0
  67. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/generative/__init__.py +0 -0
  68. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/generative/hstu.py +0 -0
  69. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/generative/tiger.py +0 -0
  70. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/match/__init__.py +0 -0
  71. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/match/dssm.py +0 -0
  72. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/match/dssm_v2.py +0 -0
  73. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/match/mind.py +0 -0
  74. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/match/sdm.py +0 -0
  75. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/match/youtube_dnn.py +0 -0
  76. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/multi_task/__init__.py +0 -0
  77. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/multi_task/esmm.py +0 -0
  78. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/multi_task/mmoe.py +0 -0
  79. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/multi_task/ple.py +0 -0
  80. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/multi_task/poso.py +0 -0
  81. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/multi_task/share_bottom.py +0 -0
  82. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/__init__.py +0 -0
  83. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/afm.py +0 -0
  84. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/autoint.py +0 -0
  85. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/dcn.py +0 -0
  86. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/dcn_v2.py +0 -0
  87. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/deepfm.py +0 -0
  88. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/dien.py +0 -0
  89. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/din.py +0 -0
  90. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/fibinet.py +0 -0
  91. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/fm.py +0 -0
  92. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/masknet.py +0 -0
  93. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/pnn.py +0 -0
  94. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/models/ranking/widedeep.py +0 -0
  95. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/__init__.py +0 -0
  96. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/device.py +0 -0
  97. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/distributed.py +0 -0
  98. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/embedding.py +0 -0
  99. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/feature.py +0 -0
  100. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/file.py +0 -0
  101. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/model.py +0 -0
  102. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/optimizer.py +0 -0
  103. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/synthetic_data.py +0 -0
  104. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec/utils/tensor.py +0 -0
  105. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/NextRec-CLI.md +0 -0
  106. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/NextRec-CLI_zh.md +0 -0
  107. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/feature_config.yaml +0 -0
  108. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/afm.yaml +0 -0
  109. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/autoint.yaml +0 -0
  110. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/dcn.yaml +0 -0
  111. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/deepfm.yaml +0 -0
  112. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/din.yaml +0 -0
  113. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/esmm.yaml +0 -0
  114. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/fibinet.yaml +0 -0
  115. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/fm.yaml +0 -0
  116. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/masknet.yaml +0 -0
  117. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/mmoe.yaml +0 -0
  118. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/ple.yaml +0 -0
  119. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/pnn.yaml +0 -0
  120. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/poso.yaml +0 -0
  121. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/share_bottom.yaml +0 -0
  122. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/widedeep.yaml +0 -0
  123. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/model_configs/xdeepfm.yaml +0 -0
  124. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/predict_config.yaml +0 -0
  125. {nextrec-0.4.3 → nextrec-0.4.5}/nextrec_cli_preset/train_config.yaml +0 -0
  126. {nextrec-0.4.3 → nextrec-0.4.5}/pytest.ini +0 -0
  127. {nextrec-0.4.3 → nextrec-0.4.5}/requirements.txt +0 -0
  128. {nextrec-0.4.3 → nextrec-0.4.5}/scripts/format_code.py +0 -0
  129. {nextrec-0.4.3 → nextrec-0.4.5}/test/__init__.py +0 -0
  130. {nextrec-0.4.3 → nextrec-0.4.5}/test/conftest.py +0 -0
  131. {nextrec-0.4.3 → nextrec-0.4.5}/test/run_tests.py +0 -0
  132. {nextrec-0.4.3 → nextrec-0.4.5}/test/test_layers.py +0 -0
  133. {nextrec-0.4.3 → nextrec-0.4.5}/test/test_losses.py +0 -0
  134. {nextrec-0.4.3 → nextrec-0.4.5}/test/test_match_models.py +0 -0
  135. {nextrec-0.4.3 → nextrec-0.4.5}/test/test_multitask_models.py +0 -0
  136. {nextrec-0.4.3 → nextrec-0.4.5}/test/test_preprocessor.py +0 -0
  137. {nextrec-0.4.3 → nextrec-0.4.5}/test/test_ranking_models.py +0 -0
  138. {nextrec-0.4.3 → nextrec-0.4.5}/test/test_utils.py +0 -0
  139. {nextrec-0.4.3 → nextrec-0.4.5}/test_requirements.txt +0 -0
  140. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/distributed/example_distributed_training.py +0 -0
  141. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/distributed/example_distributed_training_large_dataset.py +0 -0
  142. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/example_match_dssm.py +0 -0
  143. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/example_multitask.py +0 -0
  144. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/example_ranking_din.py +0 -0
  145. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/movielen_match_dssm.py +0 -0
  146. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/movielen_ranking_deepfm.py +0 -0
  147. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/notebooks/en/Hands on dataprocessor.ipynb +0 -0
  148. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/notebooks/en/Hands on nextrec.ipynb +0 -0
  149. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/notebooks/zh/Hands on dataprocessor.ipynb +0 -0
  150. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/notebooks/zh/Hands on nextrec.ipynb +0 -0
  151. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/run_all_match_models.py +0 -0
  152. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/run_all_multitask_models.py +0 -0
  153. {nextrec-0.4.3 → nextrec-0.4.5}/tutorials/run_all_ranking_models.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nextrec
3
- Version: 0.4.3
3
+ Version: 0.4.5
4
4
  Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
5
5
  Project-URL: Homepage, https://github.com/zerolovesea/NextRec
6
6
  Project-URL: Repository, https://github.com/zerolovesea/NextRec
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
63
63
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
64
64
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
65
65
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
66
- ![Version](https://img.shields.io/badge/Version-0.4.3-orange.svg)
66
+ ![Version](https://img.shields.io/badge/Version-0.4.5-orange.svg)
67
67
 
68
68
  English | [中文文档](README_zh.md)
69
69
 
@@ -84,60 +84,65 @@ English | [中文文档](README_zh.md)
84
84
 
85
85
  ## Introduction
86
86
 
87
- NextRec is a modern recommendation system framework built on PyTorch, providing researchers and engineering teams with a fast modeling, training, and evaluation experience. The framework adopts a modular design with rich built-in model implementations, data processing tools, and engineering-ready training components, covering various recommendation scenarios. NextRec provides easy-to-use interfaces, command-line tools, and tutorials, enabling recommendation algorithm learners to quickly understand model architectures and train and infer models at the fastest speed.
87
+ NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. Design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recommendation scenarios on Spark clusters, training on massive offline features(`parquet/csv`).
88
88
 
89
89
  ## Why NextRec
90
90
 
91
- - **Unified feature engineering & data pipeline**: NextRec provides Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching the model training and inference process based on offline `parquet/csv` features in industrial big-data Spark/Hive scenarios.
92
- - **Multi-scenario recommendation capabilities**: Covers ranking (CTR/CVR), retrieval, multi-task learning and other recommendation/marketing models, with a continuously expanding model zoo.
93
- - **Developer-friendly experience**: Supports stream preprocessing/distributed training/inference for various data formats (`csv/parquet/pathlike`), GPU acceleration and visual metric monitoring, facilitating experiments for business algorithm engineers and recommendation algorithm learners.
94
- - **Flexible command-line tool**: Through configuring training and inference config files, start training and inference processes with one command `nextrec --mode=train --train_config=train_config.yaml`, facilitating rapid experiment iteration and agile deployment.
95
- - **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
91
+ - **Unified feature engineering & data pipeline**: NextRec provide unified Dense/Sparse/Sequence feature definitions, DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
92
+ - **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
93
+ - **Developer-friendly experience**: `Stream processing/distributed training/inference` for `csv/parquet/pathlike` data, plus GPU/MPS acceleration and visualization support.
94
+ - **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
96
95
 
97
96
  ## Architecture
98
97
 
99
- NextRec adopts a modular and low-coupling engineering design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
98
+ NextRec adopts a modular design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
100
99
 
101
100
  ![NextRec Architecture](assets/nextrec_diagram_en.png)
102
101
 
103
- > The project borrows ideas from excellent open-source rec libraries. Early layers referenced [torch-rechub](https://github.com/datawhalechina/torch-rechub) but have been replaced with in-house implementations. torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
102
+ > The project borrows ideas from excellent open-source rec libraries, for example: [torch-rechub](https://github.com/datawhalechina/torch-rechub). torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
104
103
 
105
104
  ---
106
105
 
107
106
  ## Installation
108
107
 
109
- You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
108
+ You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required. If you want to run some tutorial codes, pull this project first:
110
109
 
110
+ ```bash
111
+ git clone https://github.com/zerolovesea/NextRec.git
112
+ cd NextRec/
113
+ pip install nextrec # or pip install -e .
114
+ ```
111
115
 
112
116
  ## Tutorials
113
117
 
114
- We provide multiple examples in the `tutorials/` directory, covering ranking, retrieval, multi-task, and data processing scenarios:
118
+ See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
119
+
120
+ - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k dataset
121
+ - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN Deep Interest Network training on e-commerce dataset
122
+ - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training on e-commerce dataset
123
+ - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model training on MovieLens 100k dataset
115
124
 
116
- - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM model training example on MovieLens 100k dataset
117
- - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN deep interest network training example on e-commerce dataset
118
- - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training example on e-commerce dataset
119
- - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
120
- - [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
121
- - [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
122
- - [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
125
+ - [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly validate availability of all ranking models
126
+ - [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly validate availability of all multi-task models
127
+ - [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly validate availability of all retrieval models
123
128
 
124
- If you want to learn more details about the NextRec framework, we also provide Jupyter notebooks to help you understand:
129
+ To dive deeper into NextRec framework details, Jupyter notebooks are available:
125
130
 
126
- - [How to get started with the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
127
- - [How to use the data processor for data preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
131
+ - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
132
+ - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
128
133
 
129
134
  ## 5-Minute Quick Start
130
135
 
131
- We provide a detailed quick start guide and paired datasets to help you become familiar with different features of the NextRec framework. We provide a test dataset from an e-commerce scenario in the `datasets/` path, with data examples as follows:
136
+ We provide a detailed quick-start guide and paired datasets to help you get familiar with different features of NextRec framework. In `datasets/` you'll find an e-commerce scenario test dataset like this:
132
137
 
133
138
  | user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
134
139
  |--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
135
140
  | 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
136
141
  | 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
137
142
 
138
- Next, we'll use a short example to show you how to train a DIN model using NextRec. DIN (Deep Interest Network) is from Alibaba's 2018 KDD Best Paper, used for CTR prediction scenarios. You can also directly execute `python tutorials/example_ranking_din.py` to run the training and inference code.
143
+ Below is a short example showing how to train a DIN (Deep Interest Network) model. You can also run `python tutorials/example_ranking_din.py` directly to execute the training and inference code.
139
144
 
140
- After starting training, you can view detailed training logs in the `nextrec_logs/din_tutorial` path.
145
+ After training starts, you can find detailed training logs at `nextrec_logs/din_tutorial`.
141
146
 
142
147
  ```python
143
148
  import pandas as pd
@@ -215,6 +220,7 @@ metrics = model.evaluate(
215
220
  NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
216
221
 
217
222
  - [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
223
+ - [NextRec CLI Configuration Examples](/nextrec_cli_preset/) - CLI configuration file examples
218
224
 
219
225
  ```bash
220
226
  # Train a model
@@ -224,11 +230,11 @@ nextrec --mode=train --train_config=path/to/train_config.yaml
224
230
  nextrec --mode=predict --predict_config=path/to/predict_config.yaml
225
231
  ```
226
232
 
227
- > As of version 0.4.3, NextRec CLI supports single-machine training; distributed training features are currently under development.
233
+ > As of version 0.4.5, NextRec CLI supports single-machine training; distributed training features are currently under development.
228
234
 
229
235
  ## Platform Compatibility
230
236
 
231
- The current version is 0.4.3. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
237
+ The current version is 0.4.5. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
232
238
 
233
239
  | Platform | Configuration |
234
240
  |----------|---------------|
@@ -299,7 +305,7 @@ We welcome contributions of any form!
299
305
  4. Push your branch (`git push origin feature/AmazingFeature`)
300
306
  5. Open a Pull Request
301
307
 
302
- > Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is unified.
308
+ > Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is consistent.
303
309
 
304
310
  ### Code Style
305
311
 
@@ -7,7 +7,7 @@
7
7
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
8
8
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
9
9
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
10
- ![Version](https://img.shields.io/badge/Version-0.4.3-orange.svg)
10
+ ![Version](https://img.shields.io/badge/Version-0.4.5-orange.svg)
11
11
 
12
12
  English | [中文文档](README_zh.md)
13
13
 
@@ -28,60 +28,65 @@ English | [中文文档](README_zh.md)
28
28
 
29
29
  ## Introduction
30
30
 
31
- NextRec is a modern recommendation system framework built on PyTorch, providing researchers and engineering teams with a fast modeling, training, and evaluation experience. The framework adopts a modular design with rich built-in model implementations, data processing tools, and engineering-ready training components, covering various recommendation scenarios. NextRec provides easy-to-use interfaces, command-line tools, and tutorials, enabling recommendation algorithm learners to quickly understand model architectures and train and infer models at the fastest speed.
31
+ NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. Design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recommendation scenarios on Spark clusters, training on massive offline features(`parquet/csv`).
32
32
 
33
33
  ## Why NextRec
34
34
 
35
- - **Unified feature engineering & data pipeline**: NextRec provides Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching the model training and inference process based on offline `parquet/csv` features in industrial big-data Spark/Hive scenarios.
36
- - **Multi-scenario recommendation capabilities**: Covers ranking (CTR/CVR), retrieval, multi-task learning and other recommendation/marketing models, with a continuously expanding model zoo.
37
- - **Developer-friendly experience**: Supports stream preprocessing/distributed training/inference for various data formats (`csv/parquet/pathlike`), GPU acceleration and visual metric monitoring, facilitating experiments for business algorithm engineers and recommendation algorithm learners.
38
- - **Flexible command-line tool**: Through configuring training and inference config files, start training and inference processes with one command `nextrec --mode=train --train_config=train_config.yaml`, facilitating rapid experiment iteration and agile deployment.
39
- - **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
35
+ - **Unified feature engineering & data pipeline**: NextRec provide unified Dense/Sparse/Sequence feature definitions, DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
36
+ - **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
37
+ - **Developer-friendly experience**: `Stream processing/distributed training/inference` for `csv/parquet/pathlike` data, plus GPU/MPS acceleration and visualization support.
38
+ - **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
40
39
 
41
40
  ## Architecture
42
41
 
43
- NextRec adopts a modular and low-coupling engineering design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
42
+ NextRec adopts a modular design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
44
43
 
45
44
  ![NextRec Architecture](assets/nextrec_diagram_en.png)
46
45
 
47
- > The project borrows ideas from excellent open-source rec libraries. Early layers referenced [torch-rechub](https://github.com/datawhalechina/torch-rechub) but have been replaced with in-house implementations. torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
46
+ > The project borrows ideas from excellent open-source rec libraries, for example: [torch-rechub](https://github.com/datawhalechina/torch-rechub). torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
48
47
 
49
48
  ---
50
49
 
51
50
  ## Installation
52
51
 
53
- You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
52
+ You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required. If you want to run some tutorial codes, pull this project first:
54
53
 
54
+ ```bash
55
+ git clone https://github.com/zerolovesea/NextRec.git
56
+ cd NextRec/
57
+ pip install nextrec # or pip install -e .
58
+ ```
55
59
 
56
60
  ## Tutorials
57
61
 
58
- We provide multiple examples in the `tutorials/` directory, covering ranking, retrieval, multi-task, and data processing scenarios:
62
+ See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
63
+
64
+ - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k dataset
65
+ - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN Deep Interest Network training on e-commerce dataset
66
+ - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training on e-commerce dataset
67
+ - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model training on MovieLens 100k dataset
59
68
 
60
- - [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM model training example on MovieLens 100k dataset
61
- - [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN deep interest network training example on e-commerce dataset
62
- - [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training example on e-commerce dataset
63
- - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
64
- - [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
65
- - [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
66
- - [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
69
+ - [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly validate availability of all ranking models
70
+ - [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly validate availability of all multi-task models
71
+ - [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly validate availability of all retrieval models
67
72
 
68
- If you want to learn more details about the NextRec framework, we also provide Jupyter notebooks to help you understand:
73
+ To dive deeper into NextRec framework details, Jupyter notebooks are available:
69
74
 
70
- - [How to get started with the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
71
- - [How to use the data processor for data preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
75
+ - [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
76
+ - [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
72
77
 
73
78
  ## 5-Minute Quick Start
74
79
 
75
- We provide a detailed quick start guide and paired datasets to help you become familiar with different features of the NextRec framework. We provide a test dataset from an e-commerce scenario in the `datasets/` path, with data examples as follows:
80
+ We provide a detailed quick-start guide and paired datasets to help you get familiar with different features of NextRec framework. In `datasets/` you'll find an e-commerce scenario test dataset like this:
76
81
 
77
82
  | user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
78
83
  |--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
79
84
  | 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
80
85
  | 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
81
86
 
82
- Next, we'll use a short example to show you how to train a DIN model using NextRec. DIN (Deep Interest Network) is from Alibaba's 2018 KDD Best Paper, used for CTR prediction scenarios. You can also directly execute `python tutorials/example_ranking_din.py` to run the training and inference code.
87
+ Below is a short example showing how to train a DIN (Deep Interest Network) model. You can also run `python tutorials/example_ranking_din.py` directly to execute the training and inference code.
83
88
 
84
- After starting training, you can view detailed training logs in the `nextrec_logs/din_tutorial` path.
89
+ After training starts, you can find detailed training logs at `nextrec_logs/din_tutorial`.
85
90
 
86
91
  ```python
87
92
  import pandas as pd
@@ -159,6 +164,7 @@ metrics = model.evaluate(
159
164
  NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
160
165
 
161
166
  - [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
167
+ - [NextRec CLI Configuration Examples](/nextrec_cli_preset/) - CLI configuration file examples
162
168
 
163
169
  ```bash
164
170
  # Train a model
@@ -168,11 +174,11 @@ nextrec --mode=train --train_config=path/to/train_config.yaml
168
174
  nextrec --mode=predict --predict_config=path/to/predict_config.yaml
169
175
  ```
170
176
 
171
- > As of version 0.4.3, NextRec CLI supports single-machine training; distributed training features are currently under development.
177
+ > As of version 0.4.5, NextRec CLI supports single-machine training; distributed training features are currently under development.
172
178
 
173
179
  ## Platform Compatibility
174
180
 
175
- The current version is 0.4.3. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
181
+ The current version is 0.4.5. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
176
182
 
177
183
  | Platform | Configuration |
178
184
  |----------|---------------|
@@ -243,7 +249,7 @@ We welcome contributions of any form!
243
249
  4. Push your branch (`git push origin feature/AmazingFeature`)
244
250
  5. Open a Pull Request
245
251
 
246
- > Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is unified.
252
+ > Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is consistent.
247
253
 
248
254
  ### Code Style
249
255
 
@@ -7,7 +7,7 @@
7
7
  ![Python](https://img.shields.io/badge/Python-3.10+-blue.svg)
8
8
  ![PyTorch](https://img.shields.io/badge/PyTorch-1.10+-ee4c2c.svg)
9
9
  ![License](https://img.shields.io/badge/License-Apache%202.0-green.svg)
10
- ![Version](https://img.shields.io/badge/Version-0.4.3-orange.svg)
10
+ ![Version](https://img.shields.io/badge/Version-0.4.5-orange.svg)
11
11
 
12
12
  [English Version](README.md) | 中文文档
13
13
 
@@ -28,29 +28,35 @@
28
28
 
29
29
  ## 简介
30
30
 
31
- NextRec是一个基于 PyTorch 构建的现代推荐系统框架,为研究人员与工程团队提供快速的建模、训练与评估体验。框架采用模块化设计,内置丰富的模型实现、数据处理工具和工程化训练组件,覆盖多种推荐场景。NextRec提供了易上手的接口,命令行工具及教程,推荐算法学习者能以最快速度了解模型架构并训练和推理模型。
31
+ NextRec是一个基于PyTorch的现代推荐系统框架,旨在为研究与工程团队提供快速的建模、训练与评估流程。框架内置丰富的模型实现、数据处理工具和工程化训练组件,覆盖多种推荐场景。此外提供了易上手的接口,命令行工具及教程,推荐算法学习者能以最快速度了解模型架构,复现学术论文并进行训练和部署。
32
32
 
33
33
  ## Why NextRec
34
34
 
35
- - **统一的特征工程与数据流水线**:NextRec框架提供了 Dense/Sparse/Sequence 特征定义、可持久化的 DataProcessor、批处理优化的 RecDataLoader,符合工业大数据Spark/Hive场景下,基于离线`parquet/csv`特征的模型训练推理流程。
36
- - **多场景推荐能力**:同时覆盖排序(CTR/CVR)、召回、多任务学习等推荐/营销模型,并且持续扩充模型库中。
37
- - **友好的工程体验**:支持各种格式数据(`csv/parquet/pathlike`)的流式预处理/分布式训练/推理,GPU加速与可视化指标监控,方便业务算法工程师和推荐算法学习者进行实验。
38
- - **灵活的命令行工具**:通过配置训练配置文件和推理配置文件,通过`nextrec --mode=train --train_config=train_config.yaml` 一键启动训练和推理进程,方便快速实验迭代和敏捷部署。
39
- - **高效训练与评估**:NextRec框架的标准化训练引擎内置多种优化器、学习率调度、早停、模型检查点与详细的日志管理,开箱即用。
35
+ - **统一的特征工程与数据流水线**:NextRec框架提供了统一的特征定义、可持久化的数据处理、并对批处理进行了优化,符合工业大数据Spark/Hive场景下,基于离线特征的模型训练推理流程。
36
+ - **多场景推荐能力**:覆盖排序(CTR/CVR)、召回、多任务学习、生成式召回等推荐/营销模型,持续跟进业界进展。
37
+ - **友好的工程体验**:支持各种格式数据(`csv/parquet/pathlike`)的流式预处理/分布式训练/推理,GPU加速与可视化指标监控,方便业务算法工程师和推荐算法学习者快速复现实验。
38
+ - **灵活的命令行工具**:支持通过命令行和配置文件,一键启动训练和推理进程,方便快速实验迭代和敏捷部署。
39
+ - **高效训练与评估**:内置多种优化器、学习率调度、早停、模型检查点与详细的日志管理,开箱即用。
40
40
 
41
41
  ## 架构
42
42
 
43
- NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据处理 → 模型构建 → 训练评估 → 推理部署 全链路都具备复用性与可扩展性。其中的核心组件包括:Feature Spec驱动的Embedding架构;模型基类BaseModel;独立Layer模块;支持训练和推理的统一的DataLoader;开箱即用的模型库。
43
+ NextRec采用模块化工程设计,核心组件包括:Feature Spec驱动的Embedding架构;模型基类BaseModel;独立Layer模块;支持训练和推理的统一的DataLoader;开箱即用的模型库等。
44
44
 
45
45
  ![NextRec架构](assets/nextrec_diagram_zh.png)
46
46
 
47
- > 项目的架构借鉴了一些优秀的开源推荐算法库,其中最初版的layer层借鉴了DataWhaleChina社区的[torch-rechub](https://github.com/datawhalechina/torch-rechub),现已替换为了自有实现。torch-rechub在开发架构和模型实现上相对成熟,本人也参与了其中一小部分的维护,欢迎感兴趣的开发者前往了解。
47
+ > 项目的架构借鉴了一些优秀的开源推荐算法库,例如DataWhaleChina社区的[torch-rechub](https://github.com/datawhalechina/torch-rechub)torch-rechub在开发架构和模型实现上相对成熟,本人也参与了其中一小部分的维护,欢迎感兴趣的开发者前往了解。
48
48
 
49
49
  ---
50
50
 
51
51
  ## 安装
52
52
 
53
- 你可以通过`pip install nextrec`快速安装NextRec的最新版本,环境要求为Python 3.10+。
53
+ 开发者可以通过`pip install nextrec`快速安装NextRec的最新版本,环境要求为Python 3.10+。如果需要执行示例代码,则需要先拉取仓库:
54
+
55
+ ```bash
56
+ git clone https://github.com/zerolovesea/NextRec.git
57
+ cd NextRec/
58
+ pip install nextrec # or pip install -e .
59
+ ```
54
60
 
55
61
  ## 示例代码
56
62
 
@@ -60,6 +66,7 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
60
66
  - [example_ranking_din.py](/tutorials/example_ranking_din.py) - 电商数据集上的DIN 深度兴趣网络训练示例
61
67
  - [example_multitask.py](/tutorials/example_multitask.py) - 电商数据集上的ESMM多任务学习训练示例
62
68
  - [movielen_match_dssm.py](/tutorials/example_match_dssm.py) - 基于movielen 100k数据集训练的 DSSM 召回模型示例
69
+
63
70
  - [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) - 快速校验所有排序模型的可用性
64
71
  - [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) - 快速校验所有多任务模型的可用性
65
72
  - [run_all_match_models.py](/tutorials/run_all_match_models.py) - 快速校验所有召回模型的可用性
@@ -78,7 +85,7 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
78
85
  | 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
79
86
  | 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
80
87
 
81
- 接下来我们将用一个简短的示例,展示如何使用NextRec训练一个DIN模型。DIN(Deep Interest Network)来自于阿里妈妈2018年KDD最佳论文模型,用于CTR预估场景。你也可以直接执行`python tutorials/example_ranking_din.py`来执行训练推理代码。
88
+ 接下来我们将用一个简短的示例,展示如何使用NextRec训练一个DIN(Deep Interest Network)模型。您也可以直接执行`python tutorials/example_ranking_din.py`来执行训练推理代码。
82
89
 
83
90
  开始训练以后,你可以在`nextrec_logs/din_tutorial`路径下查看详细的训练日志。
84
91
 
@@ -157,6 +164,7 @@ metrics = model.evaluate(
157
164
  NextRec 提供了强大的命令行界面,支持通过 YAML 配置文件进行模型训练和预测。详细的 CLI 文档请参见:
158
165
 
159
166
  - [NextRec CLI 使用指南](/nextrec_cli_preset/NextRec-CLI_zh.md) - 完整的 CLI 使用文档
167
+ - [NextRec CLI 配置文件示例](/nextrec_cli_preset/) - CLI 使用配置文件示例
160
168
 
161
169
  ```bash
162
170
  # 训练模型
@@ -166,11 +174,11 @@ nextrec --mode=train --train_config=path/to/train_config.yaml
166
174
  nextrec --mode=predict --predict_config=path/to/predict_config.yaml
167
175
  ```
168
176
 
169
- > 截止当前版本0.4.3,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
177
+ > 截止当前版本0.4.5,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
170
178
 
171
179
  ## 兼容平台
172
180
 
173
- 当前最新版本为0.4.3,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
181
+ 当前最新版本为0.4.5,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
174
182
 
175
183
  | 平台 | 配置 |
176
184
  |------|------|
@@ -11,7 +11,7 @@ sys.path.insert(0, str(PROJECT_ROOT / "nextrec"))
11
11
  project = "NextRec"
12
12
  copyright = "2025, Yang Zhou"
13
13
  author = "Yang Zhou"
14
- release = "0.4.3"
14
+ release = "0.4.5"
15
15
 
16
16
  extensions = [
17
17
  "myst_parser",
@@ -0,0 +1 @@
1
+ __version__ = "0.4.5"
@@ -1,12 +1,54 @@
1
1
  """
2
2
  Date: create on 09/11/2025
3
3
  Author:
4
- Yang Zhou,zyaztec@gmail.com
4
+ Yang Zhou,zyaztec@gmail.com
5
5
  Reference:
6
- [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
- for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
8
- knowledge discovery & data mining. 2018: 1754-1763.
9
- (https://arxiv.org/abs/1803.05170)
6
+ [1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
7
+ for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
8
+ knowledge discovery & data mining. 2018: 1754-1763.
9
+ (https://arxiv.org/abs/1803.05170)
10
+
11
+ xDeepFM is a CTR prediction model that unifies explicit and implicit
12
+ feature interaction learning. It extends DeepFM by adding the
13
+ Compressed Interaction Network (CIN) to explicitly model high-order
14
+ interactions at the vector-wise level, while an MLP captures implicit
15
+ non-linear crosses. A linear term retains first-order signals, and all
16
+ three parts are learned jointly end-to-end.
17
+
18
+ In the forward pass:
19
+ (1) Embedding Layer: transforms sparse/sequence fields into dense vectors
20
+ (2) Linear Part: captures first-order contributions of sparse/sequence fields
21
+ (3) CIN: explicitly builds higher-order feature crosses via convolution over
22
+ outer products of field embeddings, with optional split-half connections
23
+ (4) Deep Part (MLP): models implicit, non-linear interactions across all fields
24
+ (5) Combination: sums outputs from linear, CIN, and deep branches before the
25
+ task-specific prediction layer
26
+
27
+ Key Advantages:
28
+ - Jointly learns first-order, explicit high-order, and implicit interactions
29
+ - CIN offers interpretable vector-wise crosses with controlled complexity
30
+ - Deep branch enhances representation power for non-linear patterns
31
+ - End-to-end optimization eliminates heavy manual feature engineering
32
+ - Flexible design supports both sparse and sequence features
33
+
34
+ xDeepFM 是一个 CTR 预估模型,将显式与隐式的特征交互学习统一到同一框架。
35
+ 在 DeepFM 的基础上,额外引入了 CIN(Compressed Interaction Network)
36
+ 显式建模高阶向量级交互,同时 MLP 负责隐式非线性交互,线性部分保留一阶信号,
37
+ 三者联合训练。
38
+
39
+ 前向流程:
40
+ (1) 嵌入层:将稀疏/序列特征映射为稠密向量
41
+ (2) 线性部分:建模稀疏/序列特征的一阶贡献
42
+ (3) CIN:通过对字段嵌入做外积并卷积,显式捕获高阶交叉,可选 split-half 以控参
43
+ (4) 深层部分(MLP):对所有特征进行隐式非线性交互建模
44
+ (5) 融合:线性、CIN、MLP 输出求和后进入任务预测层
45
+
46
+ 主要优点:
47
+ - 同时学习一阶、显式高阶、隐式交互
48
+ - CIN 提供可解释的向量级交叉并可控复杂度
49
+ - 深层分支提升非线性表达能力
50
+ - 端到端训练降低人工特征工程需求
51
+ - 兼容稀疏与序列特征的建模
10
52
  """
11
53
 
12
54
  import torch
@@ -160,8 +160,11 @@ def build_feature_objects(
160
160
  SparseFeature(
161
161
  name=name,
162
162
  vocab_size=int(vocab_size),
163
+ embedding_name=embed_cfg.get("embedding_name", name),
163
164
  embedding_dim=embed_cfg.get("embedding_dim"),
164
165
  padding_idx=embed_cfg.get("padding_idx"),
166
+ init_type=embed_cfg.get("init_type", "xavier_uniform"),
167
+ init_params=embed_cfg.get("init_params"),
165
168
  l1_reg=embed_cfg.get("l1_reg", 0.0),
166
169
  l2_reg=embed_cfg.get("l2_reg", 1e-5),
167
170
  trainable=embed_cfg.get("trainable", True),
@@ -184,9 +187,12 @@ def build_feature_objects(
184
187
  name=name,
185
188
  vocab_size=int(vocab_size),
186
189
  max_len=embed_cfg.get("max_len") or proc_cfg.get("max_len", 50),
190
+ embedding_name=embed_cfg.get("embedding_name", name),
187
191
  embedding_dim=embed_cfg.get("embedding_dim"),
188
192
  padding_idx=embed_cfg.get("padding_idx"),
189
193
  combiner=embed_cfg.get("combiner", "mean"),
194
+ init_type=embed_cfg.get("init_type", "xavier_uniform"),
195
+ init_params=embed_cfg.get("init_params"),
190
196
  l1_reg=embed_cfg.get("l1_reg", 0.0),
191
197
  l2_reg=embed_cfg.get("l2_reg", 1e-5),
192
198
  trainable=embed_cfg.get("trainable", True),
@@ -5,10 +5,9 @@ Date: create on 13/11/2025
5
5
  Author: Yang Zhou, zyaztec@gmail.com
6
6
  """
7
7
 
8
- from typing import Any, Dict, Set, cast
8
+ from typing import Any, Dict, Set
9
9
 
10
10
  import torch.nn as nn
11
- from torch.nn.init import _NonlinearityType
12
11
 
13
12
  KNOWN_NONLINEARITIES: Set[str] = {
14
13
  "linear",
@@ -27,28 +26,25 @@ KNOWN_NONLINEARITIES: Set[str] = {
27
26
  }
28
27
 
29
28
 
30
- def resolve_nonlinearity(activation: str | _NonlinearityType) -> _NonlinearityType:
31
- if isinstance(activation, str):
32
- if activation in KNOWN_NONLINEARITIES:
33
- return cast(_NonlinearityType, activation)
34
- # Fall back to linear for custom activations (gain handled separately).
35
- return "linear"
36
- return activation
29
+ def resolve_nonlinearity(activation: str):
30
+ if activation in KNOWN_NONLINEARITIES:
31
+ return activation
32
+ return "linear"
37
33
 
38
34
 
39
- def resolve_gain(activation: str | _NonlinearityType, param: Dict[str, Any]) -> float:
35
+ def resolve_gain(activation: str, param: Dict[str, Any]) -> float:
40
36
  if "gain" in param:
41
37
  return param["gain"]
42
38
  nonlinearity = resolve_nonlinearity(activation)
43
39
  try:
44
- return nn.init.calculate_gain(nonlinearity, param.get("param"))
40
+ return nn.init.calculate_gain(nonlinearity, param.get("param")) # type: ignore
45
41
  except ValueError:
46
- return 1.0 # custom activation with no gain estimate available
42
+ return 1.0
47
43
 
48
44
 
49
45
  def get_initializer(
50
46
  init_type: str = "normal",
51
- activation: str | _NonlinearityType = "linear",
47
+ activation: str = "linear",
52
48
  param: Dict[str, Any] | None = None,
53
49
  ):
54
50
  param = param or {}
@@ -62,11 +58,11 @@ def get_initializer(
62
58
  nn.init.xavier_normal_(tensor, gain=gain)
63
59
  elif init_type == "kaiming_uniform":
64
60
  nn.init.kaiming_uniform_(
65
- tensor, a=param.get("a", 0), nonlinearity=nonlinearity
61
+ tensor, a=param.get("a", 0), nonlinearity=nonlinearity # type: ignore
66
62
  )
67
63
  elif init_type == "kaiming_normal":
68
64
  nn.init.kaiming_normal_(
69
- tensor, a=param.get("a", 0), nonlinearity=nonlinearity
65
+ tensor, a=param.get("a", 0), nonlinearity=nonlinearity # type: ignore
70
66
  )
71
67
  elif init_type == "orthogonal":
72
68
  nn.init.orthogonal_(tensor, gain=gain)
@@ -80,4 +76,4 @@ def get_initializer(
80
76
  raise ValueError(f"Unknown init_type: {init_type}")
81
77
  return tensor
82
78
 
83
- return initializer_fn
79
+ return initializer_fn
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nextrec"
3
- version = "0.4.3"
3
+ version = "0.4.5"
4
4
  description = "A comprehensive recommendation library with match, ranking, and multi-task learning models"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1 +0,0 @@
1
- __version__ = "0.4.3"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes