nextrec 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nextrec-0.4.1 → nextrec-0.4.3}/.github/workflows/tests.yml +16 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/.gitignore +1 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/PKG-INFO +53 -24
- {nextrec-0.4.1 → nextrec-0.4.3}/README.md +52 -23
- {nextrec-0.4.1 → nextrec-0.4.3}/README_zh.md +39 -10
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/conf.py +1 -1
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/__init__.py +1 -1
- nextrec-0.4.3/nextrec/__version__.py +1 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/activation.py +10 -5
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/callback.py +1 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/features.py +30 -22
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/layers.py +250 -112
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/loggers.py +63 -44
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/metrics.py +270 -120
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/model.py +1084 -402
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/session.py +10 -3
- nextrec-0.4.3/nextrec/cli.py +492 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/data/__init__.py +19 -25
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/data/batch_utils.py +11 -3
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/data/data_processing.py +51 -45
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/data/data_utils.py +26 -15
- nextrec-0.4.3/nextrec/data/dataloader.py +497 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/data/preprocessor.py +320 -199
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/loss/listwise.py +17 -9
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/loss/loss_utils.py +7 -8
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/loss/pairwise.py +2 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/loss/pointwise.py +30 -12
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/generative/hstu.py +103 -38
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/match/dssm.py +82 -68
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/match/dssm_v2.py +72 -57
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/match/mind.py +175 -107
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/match/sdm.py +104 -87
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/match/youtube_dnn.py +73 -59
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/multi_task/esmm.py +69 -46
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/multi_task/mmoe.py +91 -53
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/multi_task/ple.py +117 -58
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/multi_task/poso.py +163 -55
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/multi_task/share_bottom.py +63 -36
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/ranking/afm.py +80 -45
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/ranking/autoint.py +74 -57
- nextrec-0.4.3/nextrec/models/ranking/dcn.py +200 -0
- nextrec-0.4.3/nextrec/models/ranking/dcn_v2.py +304 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/ranking/deepfm.py +39 -24
- nextrec-0.4.3/nextrec/models/ranking/dien.py +508 -0
- nextrec-0.4.3/nextrec/models/ranking/din.py +249 -0
- nextrec-0.4.3/nextrec/models/ranking/fibinet.py +214 -0
- nextrec-0.4.3/nextrec/models/ranking/fm.py +131 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/ranking/masknet.py +95 -33
- nextrec-0.4.3/nextrec/models/ranking/pnn.py +200 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/ranking/widedeep.py +40 -28
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/ranking/xdeepfm.py +67 -40
- nextrec-0.4.3/nextrec/utils/__init__.py +97 -0
- nextrec-0.4.3/nextrec/utils/config.py +496 -0
- nextrec-0.4.3/nextrec/utils/device.py +78 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/utils/distributed.py +36 -9
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/utils/embedding.py +1 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/utils/feature.py +1 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/utils/file.py +33 -11
- nextrec-0.4.3/nextrec/utils/initializer.py +83 -0
- nextrec-0.4.3/nextrec/utils/model.py +44 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/utils/optimizer.py +25 -9
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/utils/synthetic_data.py +283 -165
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/utils/tensor.py +24 -13
- nextrec-0.4.3/nextrec_cli_preset/NextRec-CLI.md +898 -0
- nextrec-0.4.3/nextrec_cli_preset/NextRec-CLI_zh.md +898 -0
- nextrec-0.4.3/nextrec_cli_preset/feature_config.yaml +50 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/afm.yaml +10 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/autoint.yaml +13 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/dcn.yaml +13 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/deepfm.yaml +12 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/din.yaml +17 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/esmm.yaml +16 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/fibinet.yaml +14 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/fm.yaml +8 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/masknet.yaml +17 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/mmoe.yaml +22 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/ple.yaml +30 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/pnn.yaml +14 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/poso.yaml +38 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/share_bottom.yaml +20 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/widedeep.yaml +12 -0
- nextrec-0.4.3/nextrec_cli_preset/model_configs/xdeepfm.yaml +14 -0
- nextrec-0.4.3/nextrec_cli_preset/predict_config.yaml +24 -0
- nextrec-0.4.3/nextrec_cli_preset/train_config.yaml +45 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/pyproject.toml +4 -1
- {nextrec-0.4.1 → nextrec-0.4.3}/requirements.txt +2 -1
- nextrec-0.4.3/scripts/format_code.py +231 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/test/conftest.py +61 -56
- {nextrec-0.4.1 → nextrec-0.4.3}/test/run_tests.py +44 -54
- {nextrec-0.4.1 → nextrec-0.4.3}/test/test_layers.py +161 -190
- {nextrec-0.4.1 → nextrec-0.4.3}/test/test_losses.py +10 -2
- {nextrec-0.4.1 → nextrec-0.4.3}/test/test_match_models.py +330 -292
- {nextrec-0.4.1 → nextrec-0.4.3}/test/test_multitask_models.py +370 -254
- {nextrec-0.4.1 → nextrec-0.4.3}/test/test_preprocessor.py +3 -3
- {nextrec-0.4.1 → nextrec-0.4.3}/test/test_ranking_models.py +646 -401
- {nextrec-0.4.1 → nextrec-0.4.3}/test/test_utils.py +61 -41
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/distributed/example_distributed_training.py +49 -43
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/distributed/example_distributed_training_large_dataset.py +57 -39
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/example_match_dssm.py +50 -20
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/example_multitask.py +46 -39
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/example_ranking_din.py +53 -15
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/movielen_match_dssm.py +65 -35
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/movielen_ranking_deepfm.py +23 -15
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/run_all_match_models.py +85 -55
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/run_all_multitask_models.py +73 -47
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/run_all_ranking_models.py +127 -33
- nextrec-0.4.1/nextrec/__version__.py +0 -1
- nextrec-0.4.1/nextrec/data/dataloader.py +0 -320
- nextrec-0.4.1/nextrec/models/ranking/dcn.py +0 -138
- nextrec-0.4.1/nextrec/models/ranking/dcn_v2.py +0 -84
- nextrec-0.4.1/nextrec/models/ranking/dien.py +0 -319
- nextrec-0.4.1/nextrec/models/ranking/din.py +0 -183
- nextrec-0.4.1/nextrec/models/ranking/fibinet.py +0 -132
- nextrec-0.4.1/nextrec/models/ranking/fm.py +0 -89
- nextrec-0.4.1/nextrec/models/ranking/pnn.py +0 -130
- nextrec-0.4.1/nextrec/utils/__init__.py +0 -72
- nextrec-0.4.1/nextrec/utils/device.py +0 -68
- nextrec-0.4.1/nextrec/utils/initializer.py +0 -38
- nextrec-0.4.1/nextrec/utils/model.py +0 -22
- {nextrec-0.4.1 → nextrec-0.4.3}/.github/workflows/publish.yml +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/.readthedocs.yaml +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/CODE_OF_CONDUCT.md +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/CONTRIBUTING.md +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/LICENSE +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/MANIFEST.in +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/Feature Configuration.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/Model Parameters.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/Training Configuration.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/Training logs.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/logo.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/mmoe_tutorial.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/nextrec_diagram_en.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/nextrec_diagram_zh.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/assets/test data.png +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/dataset/ctcvr_task.csv +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/dataset/match_task.csv +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/dataset/movielens_100k.csv +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/dataset/multitask_task.csv +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/dataset/ranking_task.csv +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/en/Getting started guide.md +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/Makefile +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/index.md +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/make.bat +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/modules.rst +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/nextrec.basic.rst +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/nextrec.data.rst +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/nextrec.loss.rst +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/nextrec.rst +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/nextrec.utils.rst +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/rtd/requirements.txt +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md" +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/basic/__init__.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/loss/__init__.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/generative/__init__.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/generative/tiger.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/match/__init__.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/multi_task/__init__.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/nextrec/models/ranking/__init__.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/pytest.ini +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/test/__init__.py +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/test_requirements.txt +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/notebooks/en/Hands on dataprocessor.ipynb +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/notebooks/en/Hands on nextrec.ipynb +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/notebooks/zh/Hands on dataprocessor.ipynb +0 -0
- {nextrec-0.4.1 → nextrec-0.4.3}/tutorials/notebooks/zh/Hands on nextrec.ipynb +0 -0
|
@@ -45,7 +45,23 @@ jobs:
|
|
|
45
45
|
run: |
|
|
46
46
|
python -c "import torch; print(f'PyTorch version: {torch.__version__}')"
|
|
47
47
|
python -c "import nextrec; print('NextRec imported successfully')"
|
|
48
|
+
|
|
49
|
+
- name: Format codebase
|
|
50
|
+
run: |
|
|
51
|
+
python scripts/format_code.py
|
|
52
|
+
|
|
53
|
+
- name: Run tutorial multi-task models
|
|
54
|
+
run: |
|
|
55
|
+
python tutorials/run_all_multitask_models.py
|
|
48
56
|
|
|
57
|
+
- name: Run tutorial ranking models
|
|
58
|
+
run: |
|
|
59
|
+
python tutorials/run_all_ranking_models.py
|
|
60
|
+
|
|
61
|
+
- name: Run tutorial match models
|
|
62
|
+
run: |
|
|
63
|
+
python tutorials/run_all_match_models.py
|
|
64
|
+
|
|
49
65
|
- name: Run match model tests
|
|
50
66
|
run: |
|
|
51
67
|
pytest test/test_match_models.py -v --cov=nextrec/models/match --cov-report=xml
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nextrec
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
|
|
5
5
|
Project-URL: Homepage, https://github.com/zerolovesea/NextRec
|
|
6
6
|
Project-URL: Repository, https://github.com/zerolovesea/NextRec
|
|
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
|
|
|
63
63
|

|
|
64
64
|

|
|
65
65
|

|
|
66
|
-

|
|
67
67
|
|
|
68
68
|
English | [中文文档](README_zh.md)
|
|
69
69
|
|
|
@@ -71,16 +71,28 @@ English | [中文文档](README_zh.md)
|
|
|
71
71
|
|
|
72
72
|
</div>
|
|
73
73
|
|
|
74
|
+
## Table of Contents
|
|
75
|
+
|
|
76
|
+
- [Introduction](#introduction)
|
|
77
|
+
- [Installation](#installation)
|
|
78
|
+
- [Architecture](#architecture)
|
|
79
|
+
- [5-Minute Quick Start](#5-minute-quick-start)
|
|
80
|
+
- [CLI Usage](#cli-usage)
|
|
81
|
+
- [Platform Compatibility](#platform-compatibility)
|
|
82
|
+
- [Supported Models](#supported-models)
|
|
83
|
+
- [Contributing](#contributing)
|
|
84
|
+
|
|
74
85
|
## Introduction
|
|
75
86
|
|
|
76
|
-
NextRec is a modern recommendation framework built on PyTorch,
|
|
87
|
+
NextRec is a modern recommendation system framework built on PyTorch, providing researchers and engineering teams with a fast modeling, training, and evaluation experience. The framework adopts a modular design with rich built-in model implementations, data processing tools, and engineering-ready training components, covering various recommendation scenarios. NextRec provides easy-to-use interfaces, command-line tools, and tutorials, enabling recommendation algorithm learners to quickly understand model architectures and train and infer models at the fastest speed.
|
|
77
88
|
|
|
78
89
|
## Why NextRec
|
|
79
90
|
|
|
80
|
-
- **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching
|
|
81
|
-
- **Multi-scenario
|
|
82
|
-
- **Developer-friendly experience**:
|
|
83
|
-
- **
|
|
91
|
+
- **Unified feature engineering & data pipeline**: NextRec provides Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching the model training and inference process based on offline `parquet/csv` features in industrial big-data Spark/Hive scenarios.
|
|
92
|
+
- **Multi-scenario recommendation capabilities**: Covers ranking (CTR/CVR), retrieval, multi-task learning and other recommendation/marketing models, with a continuously expanding model zoo.
|
|
93
|
+
- **Developer-friendly experience**: Supports stream preprocessing/distributed training/inference for various data formats (`csv/parquet/pathlike`), GPU acceleration and visual metric monitoring, facilitating experiments for business algorithm engineers and recommendation algorithm learners.
|
|
94
|
+
- **Flexible command-line tool**: Through configuring training and inference config files, start training and inference processes with one command `nextrec --mode=train --train_config=train_config.yaml`, facilitating rapid experiment iteration and agile deployment.
|
|
95
|
+
- **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
|
|
84
96
|
|
|
85
97
|
## Architecture
|
|
86
98
|
|
|
@@ -96,34 +108,36 @@ NextRec adopts a modular and low-coupling engineering design, enabling full-pipe
|
|
|
96
108
|
|
|
97
109
|
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
|
|
98
110
|
|
|
99
|
-
## Tutorials
|
|
100
111
|
|
|
101
|
-
|
|
112
|
+
## Tutorials
|
|
102
113
|
|
|
103
|
-
|
|
104
|
-
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on the e-commerce dataset
|
|
105
|
-
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on the e-commerce dataset
|
|
106
|
-
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
|
|
114
|
+
We provide multiple examples in the `tutorials/` directory, covering ranking, retrieval, multi-task, and data processing scenarios:
|
|
107
115
|
|
|
108
|
-
|
|
116
|
+
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM model training example on MovieLens 100k dataset
|
|
117
|
+
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN deep interest network training example on e-commerce dataset
|
|
118
|
+
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training example on e-commerce dataset
|
|
119
|
+
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
|
|
120
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
|
|
121
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
|
|
122
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
|
|
109
123
|
|
|
110
|
-
|
|
111
|
-
- [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
124
|
+
If you want to learn more details about the NextRec framework, we also provide Jupyter notebooks to help you understand:
|
|
112
125
|
|
|
113
|
-
|
|
126
|
+
- [How to get started with the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
|
|
127
|
+
- [How to use the data processor for data preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
114
128
|
|
|
115
129
|
## 5-Minute Quick Start
|
|
116
130
|
|
|
117
|
-
We provide a detailed quick start and paired datasets to help you
|
|
131
|
+
We provide a detailed quick start guide and paired datasets to help you become familiar with different features of the NextRec framework. We provide a test dataset from an e-commerce scenario in the `datasets/` path, with data examples as follows:
|
|
118
132
|
|
|
119
133
|
| user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
|
|
120
134
|
|--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
|
|
121
135
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
122
136
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
123
137
|
|
|
124
|
-
|
|
138
|
+
Next, we'll use a short example to show you how to train a DIN model using NextRec. DIN (Deep Interest Network) is from Alibaba's 2018 KDD Best Paper, used for CTR prediction scenarios. You can also directly execute `python tutorials/example_ranking_din.py` to run the training and inference code.
|
|
125
139
|
|
|
126
|
-
After training, detailed logs
|
|
140
|
+
After starting training, you can view detailed training logs in the `nextrec_logs/din_tutorial` path.
|
|
127
141
|
|
|
128
142
|
```python
|
|
129
143
|
import pandas as pd
|
|
@@ -196,9 +210,25 @@ metrics = model.evaluate(
|
|
|
196
210
|
)
|
|
197
211
|
```
|
|
198
212
|
|
|
213
|
+
## CLI Usage
|
|
214
|
+
|
|
215
|
+
NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
|
|
216
|
+
|
|
217
|
+
- [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
# Train a model
|
|
221
|
+
nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
222
|
+
|
|
223
|
+
# Run prediction
|
|
224
|
+
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
> As of version 0.4.3, NextRec CLI supports single-machine training; distributed training features are currently under development.
|
|
228
|
+
|
|
199
229
|
## Platform Compatibility
|
|
200
230
|
|
|
201
|
-
The current version is 0.4.
|
|
231
|
+
The current version is 0.4.3. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
|
|
202
232
|
|
|
203
233
|
| Platform | Configuration |
|
|
204
234
|
|----------|---------------|
|
|
@@ -247,14 +277,13 @@ The current version is 0.4.1. All models and test code have been validated on th
|
|
|
247
277
|
| [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
|
|
248
278
|
| [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
|
|
249
279
|
| [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
|
|
250
|
-
| [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
|
|
251
280
|
|
|
252
281
|
### Generative Models
|
|
253
282
|
|
|
254
283
|
| Model | Paper | Year | Status |
|
|
255
284
|
|-------|-------|------|--------|
|
|
256
285
|
| [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
|
|
257
|
-
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - |
|
|
286
|
+
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | Supported |
|
|
258
287
|
|
|
259
288
|
---
|
|
260
289
|
|
|
@@ -270,7 +299,7 @@ We welcome contributions of any form!
|
|
|
270
299
|
4. Push your branch (`git push origin feature/AmazingFeature`)
|
|
271
300
|
5. Open a Pull Request
|
|
272
301
|
|
|
273
|
-
> Before submitting a PR, please run
|
|
302
|
+
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is unified.
|
|
274
303
|
|
|
275
304
|
### Code Style
|
|
276
305
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|

|
|
8
8
|

|
|
9
9
|

|
|
10
|
-

|
|
11
11
|
|
|
12
12
|
English | [中文文档](README_zh.md)
|
|
13
13
|
|
|
@@ -15,16 +15,28 @@ English | [中文文档](README_zh.md)
|
|
|
15
15
|
|
|
16
16
|
</div>
|
|
17
17
|
|
|
18
|
+
## Table of Contents
|
|
19
|
+
|
|
20
|
+
- [Introduction](#introduction)
|
|
21
|
+
- [Installation](#installation)
|
|
22
|
+
- [Architecture](#architecture)
|
|
23
|
+
- [5-Minute Quick Start](#5-minute-quick-start)
|
|
24
|
+
- [CLI Usage](#cli-usage)
|
|
25
|
+
- [Platform Compatibility](#platform-compatibility)
|
|
26
|
+
- [Supported Models](#supported-models)
|
|
27
|
+
- [Contributing](#contributing)
|
|
28
|
+
|
|
18
29
|
## Introduction
|
|
19
30
|
|
|
20
|
-
NextRec is a modern recommendation framework built on PyTorch,
|
|
31
|
+
NextRec is a modern recommendation system framework built on PyTorch, providing researchers and engineering teams with a fast modeling, training, and evaluation experience. The framework adopts a modular design with rich built-in model implementations, data processing tools, and engineering-ready training components, covering various recommendation scenarios. NextRec provides easy-to-use interfaces, command-line tools, and tutorials, enabling recommendation algorithm learners to quickly understand model architectures and train and infer models at the fastest speed.
|
|
21
32
|
|
|
22
33
|
## Why NextRec
|
|
23
34
|
|
|
24
|
-
- **Unified feature engineering & data pipeline**: Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching
|
|
25
|
-
- **Multi-scenario
|
|
26
|
-
- **Developer-friendly experience**:
|
|
27
|
-
- **
|
|
35
|
+
- **Unified feature engineering & data pipeline**: NextRec provides Dense/Sparse/Sequence feature definitions, persistent DataProcessor, and batch-optimized RecDataLoader, matching the model training and inference process based on offline `parquet/csv` features in industrial big-data Spark/Hive scenarios.
|
|
36
|
+
- **Multi-scenario recommendation capabilities**: Covers ranking (CTR/CVR), retrieval, multi-task learning and other recommendation/marketing models, with a continuously expanding model zoo.
|
|
37
|
+
- **Developer-friendly experience**: Supports stream preprocessing/distributed training/inference for various data formats (`csv/parquet/pathlike`), GPU acceleration and visual metric monitoring, facilitating experiments for business algorithm engineers and recommendation algorithm learners.
|
|
38
|
+
- **Flexible command-line tool**: Through configuring training and inference config files, start training and inference processes with one command `nextrec --mode=train --train_config=train_config.yaml`, facilitating rapid experiment iteration and agile deployment.
|
|
39
|
+
- **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
|
|
28
40
|
|
|
29
41
|
## Architecture
|
|
30
42
|
|
|
@@ -40,34 +52,36 @@ NextRec adopts a modular and low-coupling engineering design, enabling full-pipe
|
|
|
40
52
|
|
|
41
53
|
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
|
|
42
54
|
|
|
43
|
-
## Tutorials
|
|
44
55
|
|
|
45
|
-
|
|
56
|
+
## Tutorials
|
|
46
57
|
|
|
47
|
-
|
|
48
|
-
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN training on the e-commerce dataset
|
|
49
|
-
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task training on the e-commerce dataset
|
|
50
|
-
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval on MovieLens 100k
|
|
58
|
+
We provide multiple examples in the `tutorials/` directory, covering ranking, retrieval, multi-task, and data processing scenarios:
|
|
51
59
|
|
|
52
|
-
|
|
60
|
+
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM model training example on MovieLens 100k dataset
|
|
61
|
+
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN deep interest network training example on e-commerce dataset
|
|
62
|
+
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training example on e-commerce dataset
|
|
63
|
+
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
|
|
64
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
|
|
65
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
|
|
66
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
|
|
53
67
|
|
|
54
|
-
|
|
55
|
-
- [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
68
|
+
If you want to learn more details about the NextRec framework, we also provide Jupyter notebooks to help you understand:
|
|
56
69
|
|
|
57
|
-
|
|
70
|
+
- [How to get started with the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
|
|
71
|
+
- [How to use the data processor for data preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
58
72
|
|
|
59
73
|
## 5-Minute Quick Start
|
|
60
74
|
|
|
61
|
-
We provide a detailed quick start and paired datasets to help you
|
|
75
|
+
We provide a detailed quick start guide and paired datasets to help you become familiar with different features of the NextRec framework. We provide a test dataset from an e-commerce scenario in the `datasets/` path, with data examples as follows:
|
|
62
76
|
|
|
63
77
|
| user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
|
|
64
78
|
|--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
|
|
65
79
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
66
80
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
67
81
|
|
|
68
|
-
|
|
82
|
+
Next, we'll use a short example to show you how to train a DIN model using NextRec. DIN (Deep Interest Network) is from Alibaba's 2018 KDD Best Paper, used for CTR prediction scenarios. You can also directly execute `python tutorials/example_ranking_din.py` to run the training and inference code.
|
|
69
83
|
|
|
70
|
-
After training, detailed logs
|
|
84
|
+
After starting training, you can view detailed training logs in the `nextrec_logs/din_tutorial` path.
|
|
71
85
|
|
|
72
86
|
```python
|
|
73
87
|
import pandas as pd
|
|
@@ -140,9 +154,25 @@ metrics = model.evaluate(
|
|
|
140
154
|
)
|
|
141
155
|
```
|
|
142
156
|
|
|
157
|
+
## CLI Usage
|
|
158
|
+
|
|
159
|
+
NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
|
|
160
|
+
|
|
161
|
+
- [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
# Train a model
|
|
165
|
+
nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
166
|
+
|
|
167
|
+
# Run prediction
|
|
168
|
+
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
> As of version 0.4.3, NextRec CLI supports single-machine training; distributed training features are currently under development.
|
|
172
|
+
|
|
143
173
|
## Platform Compatibility
|
|
144
174
|
|
|
145
|
-
The current version is 0.4.
|
|
175
|
+
The current version is 0.4.3. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
|
|
146
176
|
|
|
147
177
|
| Platform | Configuration |
|
|
148
178
|
|----------|---------------|
|
|
@@ -191,14 +221,13 @@ The current version is 0.4.1. All models and test code have been validated on th
|
|
|
191
221
|
| [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-task Model | SIGIR 2018 | Supported |
|
|
192
222
|
| [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | Supported |
|
|
193
223
|
| [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | Supported |
|
|
194
|
-
| [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | Supported |
|
|
195
224
|
|
|
196
225
|
### Generative Models
|
|
197
226
|
|
|
198
227
|
| Model | Paper | Year | Status |
|
|
199
228
|
|-------|-------|------|--------|
|
|
200
229
|
| [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | In Progress |
|
|
201
|
-
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - |
|
|
230
|
+
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | Supported |
|
|
202
231
|
|
|
203
232
|
---
|
|
204
233
|
|
|
@@ -214,7 +243,7 @@ We welcome contributions of any form!
|
|
|
214
243
|
4. Push your branch (`git push origin feature/AmazingFeature`)
|
|
215
244
|
5. Open a Pull Request
|
|
216
245
|
|
|
217
|
-
> Before submitting a PR, please run
|
|
246
|
+
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is unified.
|
|
218
247
|
|
|
219
248
|
### Code Style
|
|
220
249
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|

|
|
8
8
|

|
|
9
9
|

|
|
10
|
-

|
|
11
11
|
|
|
12
12
|
[English Version](README.md) | 中文文档
|
|
13
13
|
|
|
@@ -15,15 +15,27 @@
|
|
|
15
15
|
|
|
16
16
|
</div>
|
|
17
17
|
|
|
18
|
+
## 目录
|
|
19
|
+
|
|
20
|
+
- [简介](#简介)
|
|
21
|
+
- [安装](#安装)
|
|
22
|
+
- [架构](#架构)
|
|
23
|
+
- [5分钟快速上手](#5分钟快速上手)
|
|
24
|
+
- [命令行工具](#命令行工具)
|
|
25
|
+
- [兼容平台](#兼容平台)
|
|
26
|
+
- [支持模型](#支持模型)
|
|
27
|
+
- [贡献指南](#贡献指南)
|
|
28
|
+
|
|
18
29
|
## 简介
|
|
19
30
|
|
|
20
|
-
NextRec
|
|
31
|
+
NextRec是一个基于 PyTorch 构建的现代推荐系统框架,为研究人员与工程团队提供快速的建模、训练与评估体验。框架采用模块化设计,内置丰富的模型实现、数据处理工具和工程化训练组件,覆盖多种推荐场景。NextRec提供了易上手的接口,命令行工具及教程,推荐算法学习者能以最快速度了解模型架构并训练和推理模型。
|
|
21
32
|
|
|
22
33
|
## Why NextRec
|
|
23
34
|
|
|
24
|
-
- **统一的特征工程与数据流水线**:NextRec框架提供了 Dense/Sparse/Sequence 特征定义、可持久化的 DataProcessor、批处理优化的 RecDataLoader
|
|
35
|
+
- **统一的特征工程与数据流水线**:NextRec框架提供了 Dense/Sparse/Sequence 特征定义、可持久化的 DataProcessor、批处理优化的 RecDataLoader,符合工业大数据Spark/Hive场景下,基于离线`parquet/csv`特征的模型训练推理流程。
|
|
25
36
|
- **多场景推荐能力**:同时覆盖排序(CTR/CVR)、召回、多任务学习等推荐/营销模型,并且持续扩充模型库中。
|
|
26
|
-
-
|
|
37
|
+
- **友好的工程体验**:支持各种格式数据(`csv/parquet/pathlike`)的流式预处理/分布式训练/推理,GPU加速与可视化指标监控,方便业务算法工程师和推荐算法学习者进行实验。
|
|
38
|
+
- **灵活的命令行工具**:通过配置训练配置文件和推理配置文件,通过`nextrec --mode=train --train_config=train_config.yaml` 一键启动训练和推理进程,方便快速实验迭代和敏捷部署。
|
|
27
39
|
- **高效训练与评估**:NextRec框架的标准化训练引擎内置多种优化器、学习率调度、早停、模型检查点与详细的日志管理,开箱即用。
|
|
28
40
|
|
|
29
41
|
## 架构
|
|
@@ -48,14 +60,15 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
|
|
|
48
60
|
- [example_ranking_din.py](/tutorials/example_ranking_din.py) - 电商数据集上的DIN 深度兴趣网络训练示例
|
|
49
61
|
- [example_multitask.py](/tutorials/example_multitask.py) - 电商数据集上的ESMM多任务学习训练示例
|
|
50
62
|
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) - 基于movielen 100k数据集训练的 DSSM 召回模型示例
|
|
63
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) - 快速校验所有排序模型的可用性
|
|
64
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) - 快速校验所有多任务模型的可用性
|
|
65
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) - 快速校验所有召回模型的可用性
|
|
51
66
|
|
|
52
67
|
如果想了解更多NextRec框架的细节,我们还提供了Jupyter notebook来帮助你了解:
|
|
53
68
|
|
|
54
69
|
- [如何上手NextRec框架](/tutorials/notebooks/zh/Hands%20on%20nextrec.ipynb)
|
|
55
70
|
- [如何使用数据处理器进行数据预处理](/tutorials/notebooks/zh/Hands%20on%20dataprocessor.ipynb)
|
|
56
71
|
|
|
57
|
-
> 当前版本[0.4.1],召回模型模块尚不完善,可能存在一些兼容性问题或意外报错,如果遇到问题,欢迎开发者在Issue区提出问题。
|
|
58
|
-
|
|
59
72
|
## 5分钟快速上手
|
|
60
73
|
|
|
61
74
|
我们提供了详细的上手指南和配套数据集,帮助您熟悉NextRec框架的不同功能。我们在`datasets/`路径下提供了一个来自电商场景的测试数据集,数据示例如下:
|
|
@@ -138,9 +151,26 @@ metrics = model.evaluate(
|
|
|
138
151
|
user_id_column='user_id'
|
|
139
152
|
)
|
|
140
153
|
```
|
|
154
|
+
|
|
155
|
+
## 命令行工具
|
|
156
|
+
|
|
157
|
+
NextRec 提供了强大的命令行界面,支持通过 YAML 配置文件进行模型训练和预测。详细的 CLI 文档请参见:
|
|
158
|
+
|
|
159
|
+
- [NextRec CLI 使用指南](/nextrec_cli_preset/NextRec-CLI_zh.md) - 完整的 CLI 使用文档
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
# 训练模型
|
|
163
|
+
nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
164
|
+
|
|
165
|
+
# 运行预测
|
|
166
|
+
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
> 截止当前版本0.4.3,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
|
|
170
|
+
|
|
141
171
|
## 兼容平台
|
|
142
172
|
|
|
143
|
-
当前最新版本为0.4.
|
|
173
|
+
当前最新版本为0.4.3,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
|
|
144
174
|
|
|
145
175
|
| 平台 | 配置 |
|
|
146
176
|
|------|------|
|
|
@@ -187,14 +217,13 @@ metrics = model.evaluate(
|
|
|
187
217
|
| [ESMM](nextrec/models/multi_task/esmm.py) | Entire Space Multi-Task Model | SIGIR 2018 | 已支持 |
|
|
188
218
|
| [ShareBottom](nextrec/models/multi_task/share_bottom.py) | Multitask Learning | - | 已支持 |
|
|
189
219
|
| [POSO](nextrec/models/multi_task/poso.py) | POSO: Personalized Cold-start Modules for Large-scale Recommender Systems | 2021 | 已支持 |
|
|
190
|
-
| [POSO-IFLYTEK](nextrec/models/multi_task/poso_iflytek.py) | POSO with PLE-style gating for sequential marketing tasks | - | 已支持 |
|
|
191
220
|
|
|
192
221
|
### 生成式模型
|
|
193
222
|
|
|
194
223
|
| 模型 | 论文 | 年份 | 状态 |
|
|
195
224
|
|------|------|------|------|
|
|
196
225
|
| [TIGER](nextrec/models/generative/tiger.py) | Recommender Systems with Generative Retrieval | NeurIPS 2023 | 开发中 |
|
|
197
|
-
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - |
|
|
226
|
+
| [HSTU](nextrec/models/generative/hstu.py) | Hierarchical Sequential Transduction Units | - | 已支持 |
|
|
198
227
|
|
|
199
228
|
---
|
|
200
229
|
|
|
@@ -210,7 +239,7 @@ metrics = model.evaluate(
|
|
|
210
239
|
4. 推送到分支 (`git push origin feature/AmazingFeature`)
|
|
211
240
|
5. 创建 Pull Request
|
|
212
241
|
|
|
213
|
-
> 在提交 PR 之前,请运行 `python test/run_tests.py`
|
|
242
|
+
> 在提交 PR 之前,请运行 `python test/run_tests.py` 和 `python scripts/format_code.py` 确保所有测试通过并统一代码风格。
|
|
214
243
|
|
|
215
244
|
### 代码规范
|
|
216
245
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.4.3"
|
|
@@ -14,20 +14,21 @@ class Dice(nn.Module):
|
|
|
14
14
|
"""
|
|
15
15
|
Dice activation function from the paper:
|
|
16
16
|
"Deep Interest Network for Click-Through Rate Prediction" (Zhou et al., 2018)
|
|
17
|
-
|
|
17
|
+
|
|
18
18
|
Dice(x) = p(x) * x + (1 - p(x)) * alpha * x
|
|
19
19
|
where p(x) = sigmoid((x - E[x]) / sqrt(Var[x] + epsilon))
|
|
20
20
|
"""
|
|
21
|
+
|
|
21
22
|
def __init__(self, emb_size: int, epsilon: float = 1e-9):
|
|
22
23
|
super(Dice, self).__init__()
|
|
23
24
|
self.epsilon = epsilon
|
|
24
25
|
self.alpha = nn.Parameter(torch.zeros(emb_size))
|
|
25
26
|
self.bn = nn.BatchNorm1d(emb_size)
|
|
26
|
-
|
|
27
|
+
|
|
27
28
|
def forward(self, x):
|
|
28
29
|
# x shape: (batch_size, emb_size) or (batch_size, seq_len, emb_size)
|
|
29
30
|
original_shape = x.shape
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
if x.dim() == 3:
|
|
32
33
|
# For 3D input (batch_size, seq_len, emb_size), reshape to 2D
|
|
33
34
|
batch_size, seq_len, emb_size = x.shape
|
|
@@ -45,7 +46,9 @@ def activation_layer(activation: str, emb_size: int | None = None):
|
|
|
45
46
|
activation = activation.lower()
|
|
46
47
|
if activation == "dice":
|
|
47
48
|
if emb_size is None:
|
|
48
|
-
raise ValueError(
|
|
49
|
+
raise ValueError(
|
|
50
|
+
"[ActivationLayer Error]: emb_size is required for Dice activation"
|
|
51
|
+
)
|
|
49
52
|
return Dice(emb_size)
|
|
50
53
|
elif activation == "relu":
|
|
51
54
|
return nn.ReLU()
|
|
@@ -84,4 +87,6 @@ def activation_layer(activation: str, emb_size: int | None = None):
|
|
|
84
87
|
elif activation in ["none", "linear", "identity"]:
|
|
85
88
|
return nn.Identity()
|
|
86
89
|
else:
|
|
87
|
-
raise ValueError(
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"[ActivationLayer Error]: Unsupported activation function: {activation}"
|
|
92
|
+
)
|