nextrec 0.4.2__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nextrec-0.4.2 → nextrec-0.4.4}/.github/workflows/tests.yml +16 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/.gitignore +1 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/PKG-INFO +57 -22
- {nextrec-0.4.2 → nextrec-0.4.4}/README.md +56 -21
- {nextrec-0.4.2 → nextrec-0.4.4}/README_zh.md +53 -16
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/conf.py +1 -1
- nextrec-0.4.4/nextrec/__version__.py +1 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/layers.py +32 -8
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/loggers.py +1 -1
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/metrics.py +2 -1
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/model.py +3 -3
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/cli.py +41 -47
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/data/dataloader.py +1 -1
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/multi_task/esmm.py +23 -16
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/multi_task/mmoe.py +36 -17
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/multi_task/ple.py +18 -12
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/multi_task/poso.py +68 -37
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/multi_task/share_bottom.py +16 -2
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/afm.py +14 -14
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/autoint.py +2 -2
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/dcn.py +61 -19
- nextrec-0.4.4/nextrec/models/ranking/dcn_v2.py +304 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/deepfm.py +14 -9
- nextrec-0.4.4/nextrec/models/ranking/dien.py +508 -0
- nextrec-0.4.4/nextrec/models/ranking/din.py +249 -0
- nextrec-0.4.4/nextrec/models/ranking/fibinet.py +214 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/fm.py +44 -8
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/masknet.py +7 -7
- nextrec-0.4.4/nextrec/models/ranking/pnn.py +200 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/widedeep.py +8 -4
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/xdeepfm.py +57 -10
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/config.py +15 -3
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/file.py +2 -1
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/initializer.py +12 -16
- nextrec-0.4.4/nextrec/utils/model.py +44 -0
- nextrec-0.4.4/nextrec_cli_preset/NextRec-CLI.md +898 -0
- nextrec-0.4.4/nextrec_cli_preset/NextRec-CLI_zh.md +898 -0
- nextrec-0.4.4/nextrec_cli_preset/feature_config.yaml +50 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/afm.yaml +10 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/autoint.yaml +13 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/dcn.yaml +13 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/deepfm.yaml +12 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/din.yaml +17 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/esmm.yaml +16 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/fibinet.yaml +14 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/fm.yaml +8 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/masknet.yaml +17 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/mmoe.yaml +22 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/ple.yaml +30 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/pnn.yaml +14 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/poso.yaml +38 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/share_bottom.yaml +20 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/widedeep.yaml +12 -0
- nextrec-0.4.4/nextrec_cli_preset/model_configs/xdeepfm.yaml +14 -0
- nextrec-0.4.4/nextrec_cli_preset/predict_config.yaml +24 -0
- nextrec-0.4.4/nextrec_cli_preset/train_config.yaml +45 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/pyproject.toml +1 -1
- {nextrec-0.4.2 → nextrec-0.4.4}/test/test_ranking_models.py +68 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/run_all_ranking_models.py +34 -2
- nextrec-0.4.2/coverage.xml +0 -542
- nextrec-0.4.2/nextrec/__version__.py +0 -1
- nextrec-0.4.2/nextrec/models/ranking/dcn_v2.py +0 -125
- nextrec-0.4.2/nextrec/models/ranking/dien.py +0 -375
- nextrec-0.4.2/nextrec/models/ranking/din.py +0 -211
- nextrec-0.4.2/nextrec/models/ranking/fibinet.py +0 -152
- nextrec-0.4.2/nextrec/models/ranking/pnn.py +0 -133
- nextrec-0.4.2/nextrec/utils/model.py +0 -22
- {nextrec-0.4.2 → nextrec-0.4.4}/.github/workflows/publish.yml +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/.readthedocs.yaml +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/CODE_OF_CONDUCT.md +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/CONTRIBUTING.md +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/LICENSE +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/MANIFEST.in +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/Feature Configuration.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/Model Parameters.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/Training Configuration.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/Training logs.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/logo.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/mmoe_tutorial.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/nextrec_diagram_en.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/nextrec_diagram_zh.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/assets/test data.png +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/dataset/ctcvr_task.csv +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/dataset/match_task.csv +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/dataset/movielens_100k.csv +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/dataset/multitask_task.csv +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/dataset/ranking_task.csv +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/en/Getting started guide.md +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/Makefile +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/index.md +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/make.bat +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/modules.rst +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/nextrec.basic.rst +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/nextrec.data.rst +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/nextrec.loss.rst +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/nextrec.rst +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/nextrec.utils.rst +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/rtd/requirements.txt +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md" +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/activation.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/callback.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/features.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/basic/session.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/data/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/data/batch_utils.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/data/data_processing.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/data/data_utils.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/data/preprocessor.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/loss/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/loss/listwise.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/loss/loss_utils.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/loss/pairwise.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/loss/pointwise.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/generative/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/generative/hstu.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/generative/tiger.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/match/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/match/dssm.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/match/dssm_v2.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/match/mind.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/match/sdm.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/match/youtube_dnn.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/multi_task/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/models/ranking/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/device.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/distributed.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/embedding.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/feature.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/optimizer.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/synthetic_data.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/nextrec/utils/tensor.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/pytest.ini +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/requirements.txt +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/scripts/format_code.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/__init__.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/conftest.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/run_tests.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/test_layers.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/test_losses.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/test_match_models.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/test_multitask_models.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/test_preprocessor.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test/test_utils.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/test_requirements.txt +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/distributed/example_distributed_training.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/distributed/example_distributed_training_large_dataset.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/example_match_dssm.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/example_multitask.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/example_ranking_din.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/movielen_match_dssm.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/movielen_ranking_deepfm.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/notebooks/en/Hands on dataprocessor.ipynb +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/notebooks/en/Hands on nextrec.ipynb +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/notebooks/zh/Hands on dataprocessor.ipynb +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/notebooks/zh/Hands on nextrec.ipynb +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/run_all_match_models.py +0 -0
- {nextrec-0.4.2 → nextrec-0.4.4}/tutorials/run_all_multitask_models.py +0 -0
|
@@ -45,7 +45,23 @@ jobs:
|
|
|
45
45
|
run: |
|
|
46
46
|
python -c "import torch; print(f'PyTorch version: {torch.__version__}')"
|
|
47
47
|
python -c "import nextrec; print('NextRec imported successfully')"
|
|
48
|
+
|
|
49
|
+
- name: Format codebase
|
|
50
|
+
run: |
|
|
51
|
+
python scripts/format_code.py
|
|
52
|
+
|
|
53
|
+
- name: Run tutorial multi-task models
|
|
54
|
+
run: |
|
|
55
|
+
python tutorials/run_all_multitask_models.py
|
|
48
56
|
|
|
57
|
+
- name: Run tutorial ranking models
|
|
58
|
+
run: |
|
|
59
|
+
python tutorials/run_all_ranking_models.py
|
|
60
|
+
|
|
61
|
+
- name: Run tutorial match models
|
|
62
|
+
run: |
|
|
63
|
+
python tutorials/run_all_match_models.py
|
|
64
|
+
|
|
49
65
|
- name: Run match model tests
|
|
50
66
|
run: |
|
|
51
67
|
pytest test/test_match_models.py -v --cov=nextrec/models/match --cov-report=xml
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nextrec
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.4
|
|
4
4
|
Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
|
|
5
5
|
Project-URL: Homepage, https://github.com/zerolovesea/NextRec
|
|
6
6
|
Project-URL: Repository, https://github.com/zerolovesea/NextRec
|
|
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
|
|
|
63
63
|

|
|
64
64
|

|
|
65
65
|

|
|
66
|
-

|
|
67
67
|
|
|
68
68
|
English | [中文文档](README_zh.md)
|
|
69
69
|
|
|
@@ -71,59 +71,78 @@ English | [中文文档](README_zh.md)
|
|
|
71
71
|
|
|
72
72
|
</div>
|
|
73
73
|
|
|
74
|
+
## Table of Contents
|
|
75
|
+
|
|
76
|
+
- [Introduction](#introduction)
|
|
77
|
+
- [Installation](#installation)
|
|
78
|
+
- [Architecture](#architecture)
|
|
79
|
+
- [5-Minute Quick Start](#5-minute-quick-start)
|
|
80
|
+
- [CLI Usage](#cli-usage)
|
|
81
|
+
- [Platform Compatibility](#platform-compatibility)
|
|
82
|
+
- [Supported Models](#supported-models)
|
|
83
|
+
- [Contributing](#contributing)
|
|
84
|
+
|
|
74
85
|
## Introduction
|
|
75
86
|
|
|
76
|
-
NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation.
|
|
87
|
+
NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. Design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recommendation scenarios on Spark clusters, training on massive offline features(`parquet/csv`).
|
|
77
88
|
|
|
78
89
|
## Why NextRec
|
|
79
90
|
|
|
80
|
-
- **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions,
|
|
91
|
+
- **Unified feature engineering & data pipeline**: NextRec provide unified Dense/Sparse/Sequence feature definitions, DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
|
|
81
92
|
- **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
|
|
82
|
-
- **Developer-friendly experience**: Stream processing/training/inference for csv/parquet/pathlike data, plus GPU/MPS acceleration and visualization support.
|
|
93
|
+
- **Developer-friendly experience**: `Stream processing/distributed training/inference` for `csv/parquet/pathlike` data, plus GPU/MPS acceleration and visualization support.
|
|
83
94
|
- **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
|
|
84
95
|
|
|
85
96
|
## Architecture
|
|
86
97
|
|
|
87
|
-
NextRec adopts a modular
|
|
98
|
+
NextRec adopts a modular design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
|
|
88
99
|
|
|
89
100
|

|
|
90
101
|
|
|
91
|
-
> The project borrows ideas from excellent open-source rec libraries
|
|
102
|
+
> The project borrows ideas from excellent open-source rec libraries, for example: [torch-rechub](https://github.com/datawhalechina/torch-rechub). torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
|
|
92
103
|
|
|
93
104
|
---
|
|
94
105
|
|
|
95
106
|
## Installation
|
|
96
107
|
|
|
97
|
-
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
|
|
108
|
+
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required. If you want to run some tutorial codes, pull this project first:
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
git clone https://github.com/zerolovesea/NextRec.git
|
|
112
|
+
cd NextRec/
|
|
113
|
+
pip install nextrec # or pip install -e .
|
|
114
|
+
```
|
|
98
115
|
|
|
99
116
|
## Tutorials
|
|
100
117
|
|
|
101
118
|
See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
|
|
102
119
|
|
|
103
|
-
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k
|
|
104
|
-
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on
|
|
105
|
-
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on
|
|
106
|
-
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
|
|
120
|
+
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k dataset
|
|
121
|
+
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN Deep Interest Network training on e-commerce dataset
|
|
122
|
+
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training on e-commerce dataset
|
|
123
|
+
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model training on MovieLens 100k dataset
|
|
107
124
|
|
|
108
|
-
|
|
125
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly validate availability of all ranking models
|
|
126
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly validate availability of all multi-task models
|
|
127
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly validate availability of all retrieval models
|
|
128
|
+
|
|
129
|
+
To dive deeper into NextRec framework details, Jupyter notebooks are available:
|
|
109
130
|
|
|
110
131
|
- [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
|
|
111
132
|
- [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
112
133
|
|
|
113
|
-
> Current version [0.4.2]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
|
|
114
|
-
|
|
115
134
|
## 5-Minute Quick Start
|
|
116
135
|
|
|
117
|
-
We provide a detailed quick
|
|
136
|
+
We provide a detailed quick-start guide and paired datasets to help you get familiar with different features of NextRec framework. In `datasets/` you'll find an e-commerce scenario test dataset like this:
|
|
118
137
|
|
|
119
138
|
| user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
|
|
120
139
|
|--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
|
|
121
140
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
122
141
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
123
142
|
|
|
124
|
-
Below is a short example showing how to train a DIN
|
|
143
|
+
Below is a short example showing how to train a DIN (Deep Interest Network) model. You can also run `python tutorials/example_ranking_din.py` directly to execute the training and inference code.
|
|
125
144
|
|
|
126
|
-
After training, detailed logs
|
|
145
|
+
After training starts, you can find detailed training logs at `nextrec_logs/din_tutorial`.
|
|
127
146
|
|
|
128
147
|
```python
|
|
129
148
|
import pandas as pd
|
|
@@ -196,9 +215,26 @@ metrics = model.evaluate(
|
|
|
196
215
|
)
|
|
197
216
|
```
|
|
198
217
|
|
|
218
|
+
## CLI Usage
|
|
219
|
+
|
|
220
|
+
NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
|
|
221
|
+
|
|
222
|
+
- [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
|
|
223
|
+
- [NextRec CLI Configuration Examples](/nextrec_cli_preset/) - CLI configuration file examples
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
# Train a model
|
|
227
|
+
nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
228
|
+
|
|
229
|
+
# Run prediction
|
|
230
|
+
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
> As of version 0.4.4, NextRec CLI supports single-machine training; distributed training features are currently under development.
|
|
234
|
+
|
|
199
235
|
## Platform Compatibility
|
|
200
236
|
|
|
201
|
-
The current version is 0.4.
|
|
237
|
+
The current version is 0.4.4. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
|
|
202
238
|
|
|
203
239
|
| Platform | Configuration |
|
|
204
240
|
|----------|---------------|
|
|
@@ -247,14 +283,13 @@ The current version is 0.4.2. All models and test code have been validated on th
|
|
|
247
283
|
| [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
|
|
248
284
|
| [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
|
|
249
285
|
| [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
|
|
250
|
-
| [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
|
|
251
286
|
|
|
252
287
|
### Generative Models
|
|
253
288
|
|
|
254
289
|
| Model | Paper | Year | Status |
|
|
255
290
|
|-------|-------|------|--------|
|
|
256
291
|
| [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
|
|
257
|
-
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - |
|
|
292
|
+
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | Supported |
|
|
258
293
|
|
|
259
294
|
---
|
|
260
295
|
|
|
@@ -270,7 +305,7 @@ We welcome contributions of any form!
|
|
|
270
305
|
4. Push your branch (`git push origin feature/AmazingFeature`)
|
|
271
306
|
5. Open a Pull Request
|
|
272
307
|
|
|
273
|
-
> Before submitting a PR, please run
|
|
308
|
+
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is consistent.
|
|
274
309
|
|
|
275
310
|
### Code Style
|
|
276
311
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|

|
|
8
8
|

|
|
9
9
|

|
|
10
|
-

|
|
11
11
|
|
|
12
12
|
English | [中文文档](README_zh.md)
|
|
13
13
|
|
|
@@ -15,59 +15,78 @@ English | [中文文档](README_zh.md)
|
|
|
15
15
|
|
|
16
16
|
</div>
|
|
17
17
|
|
|
18
|
+
## Table of Contents
|
|
19
|
+
|
|
20
|
+
- [Introduction](#introduction)
|
|
21
|
+
- [Installation](#installation)
|
|
22
|
+
- [Architecture](#architecture)
|
|
23
|
+
- [5-Minute Quick Start](#5-minute-quick-start)
|
|
24
|
+
- [CLI Usage](#cli-usage)
|
|
25
|
+
- [Platform Compatibility](#platform-compatibility)
|
|
26
|
+
- [Supported Models](#supported-models)
|
|
27
|
+
- [Contributing](#contributing)
|
|
28
|
+
|
|
18
29
|
## Introduction
|
|
19
30
|
|
|
20
|
-
NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation.
|
|
31
|
+
NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. Design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recommendation scenarios on Spark clusters, training on massive offline features(`parquet/csv`).
|
|
21
32
|
|
|
22
33
|
## Why NextRec
|
|
23
34
|
|
|
24
|
-
- **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions,
|
|
35
|
+
- **Unified feature engineering & data pipeline**: NextRec provide unified Dense/Sparse/Sequence feature definitions, DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
|
|
25
36
|
- **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
|
|
26
|
-
- **Developer-friendly experience**: Stream processing/training/inference for csv/parquet/pathlike data, plus GPU/MPS acceleration and visualization support.
|
|
37
|
+
- **Developer-friendly experience**: `Stream processing/distributed training/inference` for `csv/parquet/pathlike` data, plus GPU/MPS acceleration and visualization support.
|
|
27
38
|
- **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
|
|
28
39
|
|
|
29
40
|
## Architecture
|
|
30
41
|
|
|
31
|
-
NextRec adopts a modular
|
|
42
|
+
NextRec adopts a modular design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
|
|
32
43
|
|
|
33
44
|

|
|
34
45
|
|
|
35
|
-
> The project borrows ideas from excellent open-source rec libraries
|
|
46
|
+
> The project borrows ideas from excellent open-source rec libraries, for example: [torch-rechub](https://github.com/datawhalechina/torch-rechub). torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
|
|
36
47
|
|
|
37
48
|
---
|
|
38
49
|
|
|
39
50
|
## Installation
|
|
40
51
|
|
|
41
|
-
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
|
|
52
|
+
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required. If you want to run some tutorial codes, pull this project first:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
git clone https://github.com/zerolovesea/NextRec.git
|
|
56
|
+
cd NextRec/
|
|
57
|
+
pip install nextrec # or pip install -e .
|
|
58
|
+
```
|
|
42
59
|
|
|
43
60
|
## Tutorials
|
|
44
61
|
|
|
45
62
|
See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
|
|
46
63
|
|
|
47
|
-
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k
|
|
48
|
-
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on
|
|
49
|
-
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on
|
|
50
|
-
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
|
|
64
|
+
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k dataset
|
|
65
|
+
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN Deep Interest Network training on e-commerce dataset
|
|
66
|
+
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training on e-commerce dataset
|
|
67
|
+
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model training on MovieLens 100k dataset
|
|
51
68
|
|
|
52
|
-
|
|
69
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly validate availability of all ranking models
|
|
70
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly validate availability of all multi-task models
|
|
71
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly validate availability of all retrieval models
|
|
72
|
+
|
|
73
|
+
To dive deeper into NextRec framework details, Jupyter notebooks are available:
|
|
53
74
|
|
|
54
75
|
- [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
|
|
55
76
|
- [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
56
77
|
|
|
57
|
-
> Current version [0.4.2]: the matching module is not fully polished yet and may have compatibility issues or unexpected errors. Please raise an issue if you run into problems.
|
|
58
|
-
|
|
59
78
|
## 5-Minute Quick Start
|
|
60
79
|
|
|
61
|
-
We provide a detailed quick
|
|
80
|
+
We provide a detailed quick-start guide and paired datasets to help you get familiar with different features of NextRec framework. In `datasets/` you'll find an e-commerce scenario test dataset like this:
|
|
62
81
|
|
|
63
82
|
| user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
|
|
64
83
|
|--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
|
|
65
84
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
66
85
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
67
86
|
|
|
68
|
-
Below is a short example showing how to train a DIN
|
|
87
|
+
Below is a short example showing how to train a DIN (Deep Interest Network) model. You can also run `python tutorials/example_ranking_din.py` directly to execute the training and inference code.
|
|
69
88
|
|
|
70
|
-
After training, detailed logs
|
|
89
|
+
After training starts, you can find detailed training logs at `nextrec_logs/din_tutorial`.
|
|
71
90
|
|
|
72
91
|
```python
|
|
73
92
|
import pandas as pd
|
|
@@ -140,9 +159,26 @@ metrics = model.evaluate(
|
|
|
140
159
|
)
|
|
141
160
|
```
|
|
142
161
|
|
|
162
|
+
## CLI Usage
|
|
163
|
+
|
|
164
|
+
NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
|
|
165
|
+
|
|
166
|
+
- [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
|
|
167
|
+
- [NextRec CLI Configuration Examples](/nextrec_cli_preset/) - CLI configuration file examples
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# Train a model
|
|
171
|
+
nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
172
|
+
|
|
173
|
+
# Run prediction
|
|
174
|
+
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
> As of version 0.4.4, NextRec CLI supports single-machine training; distributed training features are currently under development.
|
|
178
|
+
|
|
143
179
|
## Platform Compatibility
|
|
144
180
|
|
|
145
|
-
The current version is 0.4.
|
|
181
|
+
The current version is 0.4.4. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
|
|
146
182
|
|
|
147
183
|
| Platform | Configuration |
|
|
148
184
|
|----------|---------------|
|
|
@@ -191,14 +227,13 @@ The current version is 0.4.2. All models and test code have been validated on th
|
|
|
191
227
|
| [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
|
|
192
228
|
| [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
|
|
193
229
|
| [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
|
|
194
|
-
| [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
|
|
195
230
|
|
|
196
231
|
### Generative Models
|
|
197
232
|
|
|
198
233
|
| Model | Paper | Year | Status |
|
|
199
234
|
|-------|-------|------|--------|
|
|
200
235
|
| [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
|
|
201
|
-
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - |
|
|
236
|
+
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | Supported |
|
|
202
237
|
|
|
203
238
|
---
|
|
204
239
|
|
|
@@ -214,7 +249,7 @@ We welcome contributions of any form!
|
|
|
214
249
|
4. Push your branch (`git push origin feature/AmazingFeature`)
|
|
215
250
|
5. Open a Pull Request
|
|
216
251
|
|
|
217
|
-
> Before submitting a PR, please run
|
|
252
|
+
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is consistent.
|
|
218
253
|
|
|
219
254
|
### Code Style
|
|
220
255
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|

|
|
8
8
|

|
|
9
9
|

|
|
10
|
-

|
|
11
11
|
|
|
12
12
|
[English Version](README.md) | 中文文档
|
|
13
13
|
|
|
@@ -15,30 +15,48 @@
|
|
|
15
15
|
|
|
16
16
|
</div>
|
|
17
17
|
|
|
18
|
+
## 目录
|
|
19
|
+
|
|
20
|
+
- [简介](#简介)
|
|
21
|
+
- [安装](#安装)
|
|
22
|
+
- [架构](#架构)
|
|
23
|
+
- [5分钟快速上手](#5分钟快速上手)
|
|
24
|
+
- [命令行工具](#命令行工具)
|
|
25
|
+
- [兼容平台](#兼容平台)
|
|
26
|
+
- [支持模型](#支持模型)
|
|
27
|
+
- [贡献指南](#贡献指南)
|
|
28
|
+
|
|
18
29
|
## 简介
|
|
19
30
|
|
|
20
|
-
NextRec
|
|
31
|
+
NextRec是一个基于PyTorch的现代推荐系统框架,旨在为研究与工程团队提供快速的建模、训练与评估流程。框架内置丰富的模型实现、数据处理工具和工程化训练组件,覆盖多种推荐场景。此外提供了易上手的接口,命令行工具及教程,推荐算法学习者能以最快速度了解模型架构,复现学术论文并进行训练和部署。
|
|
21
32
|
|
|
22
33
|
## Why NextRec
|
|
23
34
|
|
|
24
|
-
- **统一的特征工程与数据流水线**:NextRec
|
|
25
|
-
-
|
|
26
|
-
-
|
|
27
|
-
-
|
|
35
|
+
- **统一的特征工程与数据流水线**:NextRec框架提供了统一的特征定义、可持久化的数据处理、并对批处理进行了优化,符合工业大数据Spark/Hive场景下,基于离线特征的模型训练推理流程。
|
|
36
|
+
- **多场景推荐能力**:覆盖排序(CTR/CVR)、召回、多任务学习、生成式召回等推荐/营销模型,持续跟进业界进展。
|
|
37
|
+
- **友好的工程体验**:支持各种格式数据(`csv/parquet/pathlike`)的流式预处理/分布式训练/推理,GPU加速与可视化指标监控,方便业务算法工程师和推荐算法学习者快速复现实验。
|
|
38
|
+
- **灵活的命令行工具**:支持通过命令行和配置文件,一键启动训练和推理进程,方便快速实验迭代和敏捷部署。
|
|
39
|
+
- **高效训练与评估**:内置多种优化器、学习率调度、早停、模型检查点与详细的日志管理,开箱即用。
|
|
28
40
|
|
|
29
41
|
## 架构
|
|
30
42
|
|
|
31
|
-
NextRec
|
|
43
|
+
NextRec采用模块化工程设计,核心组件包括:Feature Spec驱动的Embedding架构;模型基类BaseModel;独立Layer模块;支持训练和推理的统一的DataLoader;开箱即用的模型库等。
|
|
32
44
|
|
|
33
45
|

|
|
34
46
|
|
|
35
|
-
>
|
|
47
|
+
> 项目的架构借鉴了一些优秀的开源推荐算法库,例如DataWhaleChina社区的[torch-rechub](https://github.com/datawhalechina/torch-rechub)。torch-rechub在开发架构和模型实现上相对成熟,本人也参与了其中一小部分的维护,欢迎感兴趣的开发者前往了解。
|
|
36
48
|
|
|
37
49
|
---
|
|
38
50
|
|
|
39
51
|
## 安装
|
|
40
52
|
|
|
41
|
-
|
|
53
|
+
开发者可以通过`pip install nextrec`快速安装NextRec的最新版本,环境要求为Python 3.10+。如果需要执行示例代码,则需要先拉取仓库:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git clone https://github.com/zerolovesea/NextRec.git
|
|
57
|
+
cd NextRec/
|
|
58
|
+
pip install nextrec # or pip install -e .
|
|
59
|
+
```
|
|
42
60
|
|
|
43
61
|
## 示例代码
|
|
44
62
|
|
|
@@ -49,13 +67,15 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
|
|
|
49
67
|
- [example_multitask.py](/tutorials/example_multitask.py) - 电商数据集上的ESMM多任务学习训练示例
|
|
50
68
|
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) - 基于movielen 100k数据集训练的 DSSM 召回模型示例
|
|
51
69
|
|
|
70
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) - 快速校验所有排序模型的可用性
|
|
71
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) - 快速校验所有多任务模型的可用性
|
|
72
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) - 快速校验所有召回模型的可用性
|
|
73
|
+
|
|
52
74
|
如果想了解更多NextRec框架的细节,我们还提供了Jupyter notebook来帮助你了解:
|
|
53
75
|
|
|
54
76
|
- [如何上手NextRec框架](/tutorials/notebooks/zh/Hands%20on%20nextrec.ipynb)
|
|
55
77
|
- [如何使用数据处理器进行数据预处理](/tutorials/notebooks/zh/Hands%20on%20dataprocessor.ipynb)
|
|
56
78
|
|
|
57
|
-
> 当前版本[0.4.2],召回模型模块尚不完善,可能存在一些兼容性问题或意外报错,如果遇到问题,欢迎开发者在Issue区提出问题。
|
|
58
|
-
|
|
59
79
|
## 5分钟快速上手
|
|
60
80
|
|
|
61
81
|
我们提供了详细的上手指南和配套数据集,帮助您熟悉NextRec框架的不同功能。我们在`datasets/`路径下提供了一个来自电商场景的测试数据集,数据示例如下:
|
|
@@ -65,7 +85,7 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
|
|
|
65
85
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
66
86
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
67
87
|
|
|
68
|
-
接下来我们将用一个简短的示例,展示如何使用NextRec训练一个DIN
|
|
88
|
+
接下来我们将用一个简短的示例,展示如何使用NextRec训练一个DIN(Deep Interest Network)模型。您也可以直接执行`python tutorials/example_ranking_din.py`来执行训练推理代码。
|
|
69
89
|
|
|
70
90
|
开始训练以后,你可以在`nextrec_logs/din_tutorial`路径下查看详细的训练日志。
|
|
71
91
|
|
|
@@ -138,9 +158,27 @@ metrics = model.evaluate(
|
|
|
138
158
|
user_id_column='user_id'
|
|
139
159
|
)
|
|
140
160
|
```
|
|
161
|
+
|
|
162
|
+
## 命令行工具
|
|
163
|
+
|
|
164
|
+
NextRec 提供了强大的命令行界面,支持通过 YAML 配置文件进行模型训练和预测。详细的 CLI 文档请参见:
|
|
165
|
+
|
|
166
|
+
- [NextRec CLI 使用指南](/nextrec_cli_preset/NextRec-CLI_zh.md) - 完整的 CLI 使用文档
|
|
167
|
+
- [NextRec CLI 配置文件示例](/nextrec_cli_preset/) - CLI 使用配置文件示例
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# 训练模型
|
|
171
|
+
nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
172
|
+
|
|
173
|
+
# 运行预测
|
|
174
|
+
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
> 截止当前版本0.4.4,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
|
|
178
|
+
|
|
141
179
|
## 兼容平台
|
|
142
180
|
|
|
143
|
-
当前最新版本为0.4.
|
|
181
|
+
当前最新版本为0.4.4,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
|
|
144
182
|
|
|
145
183
|
| 平台 | 配置 |
|
|
146
184
|
|------|------|
|
|
@@ -187,14 +225,13 @@ metrics = model.evaluate(
|
|
|
187
225
|
| [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-Task Model | SIGIR 2018 | 已支持 |
|
|
188
226
|
| [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | 已支持 |
|
|
189
227
|
| [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | 已支持 |
|
|
190
|
-
| [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | 已支持 |
|
|
191
228
|
|
|
192
229
|
### 生成式模型
|
|
193
230
|
|
|
194
231
|
| 模型 | 论文 | 年份 | 状态 |
|
|
195
232
|
|------|------|------|------|
|
|
196
233
|
| [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | 开发中 |
|
|
197
|
-
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - |
|
|
234
|
+
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | 已支持 |
|
|
198
235
|
|
|
199
236
|
---
|
|
200
237
|
|
|
@@ -210,7 +247,7 @@ metrics = model.evaluate(
|
|
|
210
247
|
4. 推送到分支 (`git push origin feature/AmazingFeature`)
|
|
211
248
|
5. 创建 Pull Request
|
|
212
249
|
|
|
213
|
-
> 在提交 PR 之前,请运行 `python test/run_tests.py`
|
|
250
|
+
> 在提交 PR 之前,请运行 `python test/run_tests.py` 和 `python scripts/format_code.py` 确保所有测试通过并统一代码风格。
|
|
214
251
|
|
|
215
252
|
### 代码规范
|
|
216
253
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.4.4"
|
|
@@ -51,11 +51,11 @@ class PredictionLayer(nn.Module):
|
|
|
51
51
|
|
|
52
52
|
# slice offsets per task
|
|
53
53
|
start = 0
|
|
54
|
-
self.
|
|
54
|
+
self.task_slices: list[tuple[int, int]] = []
|
|
55
55
|
for dim in self.task_dims:
|
|
56
56
|
if dim < 1:
|
|
57
57
|
raise ValueError("Each task dimension must be >= 1.")
|
|
58
|
-
self.
|
|
58
|
+
self.task_slices.append((start, start + dim))
|
|
59
59
|
start += dim
|
|
60
60
|
if use_bias:
|
|
61
61
|
self.bias = nn.Parameter(torch.zeros(self.total_dim))
|
|
@@ -71,7 +71,7 @@ class PredictionLayer(nn.Module):
|
|
|
71
71
|
)
|
|
72
72
|
logits = x if self.bias is None else x + self.bias
|
|
73
73
|
outputs = []
|
|
74
|
-
for task_type, (start, end) in zip(self.task_types, self.
|
|
74
|
+
for task_type, (start, end) in zip(self.task_types, self.task_slices):
|
|
75
75
|
task_logits = logits[..., start:end] # logits for the current task
|
|
76
76
|
if self.return_logits:
|
|
77
77
|
outputs.append(task_logits)
|
|
@@ -367,20 +367,29 @@ class MLP(nn.Module):
|
|
|
367
367
|
dims: list[int] | None = None,
|
|
368
368
|
dropout: float = 0.0,
|
|
369
369
|
activation: str = "relu",
|
|
370
|
+
use_norm: bool = True,
|
|
371
|
+
norm_type: str = "layer_norm",
|
|
370
372
|
):
|
|
371
373
|
super().__init__()
|
|
372
374
|
if dims is None:
|
|
373
375
|
dims = []
|
|
374
376
|
layers = []
|
|
375
377
|
current_dim = input_dim
|
|
376
|
-
|
|
377
378
|
for i_dim in dims:
|
|
378
379
|
layers.append(nn.Linear(current_dim, i_dim))
|
|
379
|
-
|
|
380
|
+
if use_norm:
|
|
381
|
+
if norm_type == "batch_norm":
|
|
382
|
+
# **IMPORTANT** be careful when using BatchNorm1d in distributed training, nextrec does not support sync batch norm now
|
|
383
|
+
layers.append(nn.BatchNorm1d(i_dim))
|
|
384
|
+
elif norm_type == "layer_norm":
|
|
385
|
+
layers.append(nn.LayerNorm(i_dim))
|
|
386
|
+
else:
|
|
387
|
+
raise ValueError(f"Unsupported norm_type: {norm_type}")
|
|
388
|
+
|
|
380
389
|
layers.append(activation_layer(activation))
|
|
381
390
|
layers.append(nn.Dropout(p=dropout))
|
|
382
391
|
current_dim = i_dim
|
|
383
|
-
|
|
392
|
+
# output layer
|
|
384
393
|
if output_layer:
|
|
385
394
|
layers.append(nn.Linear(current_dim, 1))
|
|
386
395
|
self.output_dim = 1
|
|
@@ -471,6 +480,21 @@ class BiLinearInteractionLayer(nn.Module):
|
|
|
471
480
|
return torch.cat(bilinear_list, dim=1)
|
|
472
481
|
|
|
473
482
|
|
|
483
|
+
class HadamardInteractionLayer(nn.Module):
|
|
484
|
+
"""Hadamard interaction layer for Deep-FiBiNET (0 case in 01/11)."""
|
|
485
|
+
|
|
486
|
+
def __init__(self, num_fields: int):
|
|
487
|
+
super().__init__()
|
|
488
|
+
self.num_fields = num_fields
|
|
489
|
+
|
|
490
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
491
|
+
# x: [B, F, D]
|
|
492
|
+
feature_emb = torch.split(x, 1, dim=1) # list of F tensors [B,1,D]
|
|
493
|
+
|
|
494
|
+
hadamard_list = [v_i * v_j for (v_i, v_j) in combinations(feature_emb, 2)]
|
|
495
|
+
return torch.cat(hadamard_list, dim=1) # [B, num_pairs, D]
|
|
496
|
+
|
|
497
|
+
|
|
474
498
|
class MultiHeadSelfAttention(nn.Module):
|
|
475
499
|
def __init__(
|
|
476
500
|
self,
|
|
@@ -542,7 +566,7 @@ class AttentionPoolingLayer(nn.Module):
|
|
|
542
566
|
embedding_dim: int,
|
|
543
567
|
hidden_units: list = [80, 40],
|
|
544
568
|
activation: str = "sigmoid",
|
|
545
|
-
use_softmax: bool =
|
|
569
|
+
use_softmax: bool = False,
|
|
546
570
|
):
|
|
547
571
|
super().__init__()
|
|
548
572
|
self.embedding_dim = embedding_dim
|
|
@@ -553,7 +577,7 @@ class AttentionPoolingLayer(nn.Module):
|
|
|
553
577
|
layers = []
|
|
554
578
|
for hidden_unit in hidden_units:
|
|
555
579
|
layers.append(nn.Linear(input_dim, hidden_unit))
|
|
556
|
-
layers.append(activation_layer(activation))
|
|
580
|
+
layers.append(activation_layer(activation, emb_size=hidden_unit))
|
|
557
581
|
input_dim = hidden_unit
|
|
558
582
|
layers.append(nn.Linear(input_dim, 1))
|
|
559
583
|
self.attention_net = nn.Sequential(*layers)
|
|
@@ -103,7 +103,7 @@ def setup_logger(session_id: str | os.PathLike | None = None):
|
|
|
103
103
|
session = create_session(str(session_id) if session_id is not None else None)
|
|
104
104
|
log_dir = session.logs_dir
|
|
105
105
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
106
|
-
log_file = log_dir /
|
|
106
|
+
log_file = log_dir / "runs.log"
|
|
107
107
|
|
|
108
108
|
console_format = "%(message)s"
|
|
109
109
|
file_format = "%(asctime)s - %(levelname)s - %(message)s"
|