nextrec 0.4.3__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nextrec-0.4.3 → nextrec-0.4.4}/PKG-INFO +34 -28
- {nextrec-0.4.3 → nextrec-0.4.4}/README.md +33 -27
- {nextrec-0.4.3 → nextrec-0.4.4}/README_zh.md +21 -13
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/conf.py +1 -1
- nextrec-0.4.4/nextrec/__version__.py +1 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/xdeepfm.py +47 -5
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/config.py +6 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/initializer.py +12 -16
- {nextrec-0.4.3 → nextrec-0.4.4}/pyproject.toml +1 -1
- nextrec-0.4.3/nextrec/__version__.py +0 -1
- {nextrec-0.4.3 → nextrec-0.4.4}/.github/workflows/publish.yml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/.github/workflows/tests.yml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/.gitignore +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/.readthedocs.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/CODE_OF_CONDUCT.md +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/CONTRIBUTING.md +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/LICENSE +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/MANIFEST.in +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/Feature Configuration.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/Model Parameters.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/Training Configuration.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/Training logs.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/logo.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/mmoe_tutorial.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/nextrec_diagram_en.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/nextrec_diagram_zh.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/assets/test data.png +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/dataset/ctcvr_task.csv +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/dataset/match_task.csv +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/dataset/movielens_100k.csv +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/dataset/multitask_task.csv +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/dataset/ranking_task.csv +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/en/Getting started guide.md +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/Makefile +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/index.md +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/make.bat +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/modules.rst +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/nextrec.basic.rst +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/nextrec.data.rst +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/nextrec.loss.rst +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/nextrec.rst +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/nextrec.utils.rst +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/rtd/requirements.txt +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md" +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/activation.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/callback.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/features.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/layers.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/loggers.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/metrics.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/model.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/basic/session.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/cli.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/data/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/data/batch_utils.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/data/data_processing.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/data/data_utils.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/data/dataloader.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/data/preprocessor.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/loss/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/loss/listwise.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/loss/loss_utils.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/loss/pairwise.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/loss/pointwise.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/generative/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/generative/hstu.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/generative/tiger.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/match/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/match/dssm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/match/dssm_v2.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/match/mind.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/match/sdm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/match/youtube_dnn.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/multi_task/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/multi_task/esmm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/multi_task/mmoe.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/multi_task/ple.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/multi_task/poso.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/multi_task/share_bottom.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/afm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/autoint.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/dcn.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/dcn_v2.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/deepfm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/dien.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/din.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/fibinet.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/fm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/masknet.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/pnn.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/models/ranking/widedeep.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/device.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/distributed.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/embedding.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/feature.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/file.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/model.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/optimizer.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/synthetic_data.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec/utils/tensor.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/NextRec-CLI.md +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/NextRec-CLI_zh.md +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/feature_config.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/afm.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/autoint.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/dcn.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/deepfm.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/din.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/esmm.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/fibinet.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/fm.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/masknet.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/mmoe.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/ple.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/pnn.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/poso.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/share_bottom.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/widedeep.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/model_configs/xdeepfm.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/predict_config.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/nextrec_cli_preset/train_config.yaml +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/pytest.ini +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/requirements.txt +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/scripts/format_code.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/__init__.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/conftest.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/run_tests.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/test_layers.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/test_losses.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/test_match_models.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/test_multitask_models.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/test_preprocessor.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/test_ranking_models.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test/test_utils.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/test_requirements.txt +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/distributed/example_distributed_training.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/distributed/example_distributed_training_large_dataset.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/example_match_dssm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/example_multitask.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/example_ranking_din.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/movielen_match_dssm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/movielen_ranking_deepfm.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/notebooks/en/Hands on dataprocessor.ipynb +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/notebooks/en/Hands on nextrec.ipynb +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/notebooks/zh/Hands on dataprocessor.ipynb +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/notebooks/zh/Hands on nextrec.ipynb +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/run_all_match_models.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/run_all_multitask_models.py +0 -0
- {nextrec-0.4.3 → nextrec-0.4.4}/tutorials/run_all_ranking_models.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nextrec
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.4
|
|
4
4
|
Summary: A comprehensive recommendation library with match, ranking, and multi-task learning models
|
|
5
5
|
Project-URL: Homepage, https://github.com/zerolovesea/NextRec
|
|
6
6
|
Project-URL: Repository, https://github.com/zerolovesea/NextRec
|
|
@@ -63,7 +63,7 @@ Description-Content-Type: text/markdown
|
|
|
63
63
|

|
|
64
64
|

|
|
65
65
|

|
|
66
|
-

|
|
67
67
|
|
|
68
68
|
English | [中文文档](README_zh.md)
|
|
69
69
|
|
|
@@ -84,60 +84,65 @@ English | [中文文档](README_zh.md)
|
|
|
84
84
|
|
|
85
85
|
## Introduction
|
|
86
86
|
|
|
87
|
-
NextRec is a modern recommendation
|
|
87
|
+
NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. Design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recommendation scenarios on Spark clusters, training on massive offline features(`parquet/csv`).
|
|
88
88
|
|
|
89
89
|
## Why NextRec
|
|
90
90
|
|
|
91
|
-
- **Unified feature engineering & data pipeline**: NextRec
|
|
92
|
-
- **Multi-scenario
|
|
93
|
-
- **Developer-friendly experience**:
|
|
94
|
-
- **
|
|
95
|
-
- **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
|
|
91
|
+
- **Unified feature engineering & data pipeline**: NextRec provide unified Dense/Sparse/Sequence feature definitions, DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
|
|
92
|
+
- **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
|
|
93
|
+
- **Developer-friendly experience**: `Stream processing/distributed training/inference` for `csv/parquet/pathlike` data, plus GPU/MPS acceleration and visualization support.
|
|
94
|
+
- **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
|
|
96
95
|
|
|
97
96
|
## Architecture
|
|
98
97
|
|
|
99
|
-
NextRec adopts a modular
|
|
98
|
+
NextRec adopts a modular design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
|
|
100
99
|
|
|
101
100
|

|
|
102
101
|
|
|
103
|
-
> The project borrows ideas from excellent open-source rec libraries
|
|
102
|
+
> The project borrows ideas from excellent open-source rec libraries, for example: [torch-rechub](https://github.com/datawhalechina/torch-rechub). torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
|
|
104
103
|
|
|
105
104
|
---
|
|
106
105
|
|
|
107
106
|
## Installation
|
|
108
107
|
|
|
109
|
-
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
|
|
108
|
+
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required. If you want to run some tutorial codes, pull this project first:
|
|
110
109
|
|
|
110
|
+
```bash
|
|
111
|
+
git clone https://github.com/zerolovesea/NextRec.git
|
|
112
|
+
cd NextRec/
|
|
113
|
+
pip install nextrec # or pip install -e .
|
|
114
|
+
```
|
|
111
115
|
|
|
112
116
|
## Tutorials
|
|
113
117
|
|
|
114
|
-
|
|
118
|
+
See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
|
|
119
|
+
|
|
120
|
+
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k dataset
|
|
121
|
+
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN Deep Interest Network training on e-commerce dataset
|
|
122
|
+
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training on e-commerce dataset
|
|
123
|
+
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model training on MovieLens 100k dataset
|
|
115
124
|
|
|
116
|
-
- [
|
|
117
|
-
- [
|
|
118
|
-
- [
|
|
119
|
-
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
|
|
120
|
-
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
|
|
121
|
-
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
|
|
122
|
-
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
|
|
125
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly validate availability of all ranking models
|
|
126
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly validate availability of all multi-task models
|
|
127
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly validate availability of all retrieval models
|
|
123
128
|
|
|
124
|
-
|
|
129
|
+
To dive deeper into NextRec framework details, Jupyter notebooks are available:
|
|
125
130
|
|
|
126
|
-
- [
|
|
127
|
-
- [
|
|
131
|
+
- [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
|
|
132
|
+
- [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
128
133
|
|
|
129
134
|
## 5-Minute Quick Start
|
|
130
135
|
|
|
131
|
-
We provide a detailed quick
|
|
136
|
+
We provide a detailed quick-start guide and paired datasets to help you get familiar with different features of NextRec framework. In `datasets/` you'll find an e-commerce scenario test dataset like this:
|
|
132
137
|
|
|
133
138
|
| user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
|
|
134
139
|
|--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
|
|
135
140
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
136
141
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
137
142
|
|
|
138
|
-
|
|
143
|
+
Below is a short example showing how to train a DIN (Deep Interest Network) model. You can also run `python tutorials/example_ranking_din.py` directly to execute the training and inference code.
|
|
139
144
|
|
|
140
|
-
After
|
|
145
|
+
After training starts, you can find detailed training logs at `nextrec_logs/din_tutorial`.
|
|
141
146
|
|
|
142
147
|
```python
|
|
143
148
|
import pandas as pd
|
|
@@ -215,6 +220,7 @@ metrics = model.evaluate(
|
|
|
215
220
|
NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
|
|
216
221
|
|
|
217
222
|
- [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
|
|
223
|
+
- [NextRec CLI Configuration Examples](/nextrec_cli_preset/) - CLI configuration file examples
|
|
218
224
|
|
|
219
225
|
```bash
|
|
220
226
|
# Train a model
|
|
@@ -224,11 +230,11 @@ nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
|
224
230
|
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
225
231
|
```
|
|
226
232
|
|
|
227
|
-
> As of version 0.4.
|
|
233
|
+
> As of version 0.4.4, NextRec CLI supports single-machine training; distributed training features are currently under development.
|
|
228
234
|
|
|
229
235
|
## Platform Compatibility
|
|
230
236
|
|
|
231
|
-
The current version is 0.4.
|
|
237
|
+
The current version is 0.4.4. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
|
|
232
238
|
|
|
233
239
|
| Platform | Configuration |
|
|
234
240
|
|----------|---------------|
|
|
@@ -299,7 +305,7 @@ We welcome contributions of any form!
|
|
|
299
305
|
4. Push your branch (`git push origin feature/AmazingFeature`)
|
|
300
306
|
5. Open a Pull Request
|
|
301
307
|
|
|
302
|
-
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is
|
|
308
|
+
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is consistent.
|
|
303
309
|
|
|
304
310
|
### Code Style
|
|
305
311
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|

|
|
8
8
|

|
|
9
9
|

|
|
10
|
-

|
|
11
11
|
|
|
12
12
|
English | [中文文档](README_zh.md)
|
|
13
13
|
|
|
@@ -28,60 +28,65 @@ English | [中文文档](README_zh.md)
|
|
|
28
28
|
|
|
29
29
|
## Introduction
|
|
30
30
|
|
|
31
|
-
NextRec is a modern recommendation
|
|
31
|
+
NextRec is a modern recommendation framework built on PyTorch, delivering a unified experience for modeling, training, and evaluation. Design with rich model implementations, data-processing utilities, and engineering-ready training components. NextRec focuses on large-scale industrial recommendation scenarios on Spark clusters, training on massive offline features(`parquet/csv`).
|
|
32
32
|
|
|
33
33
|
## Why NextRec
|
|
34
34
|
|
|
35
|
-
- **Unified feature engineering & data pipeline**: NextRec
|
|
36
|
-
- **Multi-scenario
|
|
37
|
-
- **Developer-friendly experience**:
|
|
38
|
-
- **
|
|
39
|
-
- **Efficient training & evaluation**: NextRec's standardized training engine comes with various optimizers, learning rate schedulers, early stopping, model checkpoints, and detailed log management built-in, ready to use out of the box.
|
|
35
|
+
- **Unified feature engineering & data pipeline**: NextRec provide unified Dense/Sparse/Sequence feature definitions, DataProcessor, and batch-optimized RecDataLoader, matching offline feature training/inference in industrial big-data settings.
|
|
36
|
+
- **Multi-scenario coverage**: Ranking (CTR/CVR), retrieval, multi-task learning, and more marketing/rec models, with a continuously expanding model zoo.
|
|
37
|
+
- **Developer-friendly experience**: `Stream processing/distributed training/inference` for `csv/parquet/pathlike` data, plus GPU/MPS acceleration and visualization support.
|
|
38
|
+
- **Efficient training & evaluation**: Standardized engine with optimizers, LR schedulers, early stopping, checkpoints, and detailed logging out of the box.
|
|
40
39
|
|
|
41
40
|
## Architecture
|
|
42
41
|
|
|
43
|
-
NextRec adopts a modular
|
|
42
|
+
NextRec adopts a modular design, enabling full-pipeline reusability and scalability across data processing → model construction → training & evaluation → inference & deployment. Its core components include: a Feature-Spec-driven Embedding architecture, the BaseModel abstraction, a set of independent reusable Layers, a unified DataLoader for both training and inference, and a ready-to-use Model Zoo.
|
|
44
43
|
|
|
45
44
|

|
|
46
45
|
|
|
47
|
-
> The project borrows ideas from excellent open-source rec libraries
|
|
46
|
+
> The project borrows ideas from excellent open-source rec libraries, for example: [torch-rechub](https://github.com/datawhalechina/torch-rechub). torch-rechub remains mature in architecture and models; the author contributed a bit there—feel free to check it out.
|
|
48
47
|
|
|
49
48
|
---
|
|
50
49
|
|
|
51
50
|
## Installation
|
|
52
51
|
|
|
53
|
-
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required.
|
|
52
|
+
You can quickly install the latest NextRec via `pip install nextrec`; Python 3.10+ is required. If you want to run some tutorial codes, pull this project first:
|
|
54
53
|
|
|
54
|
+
```bash
|
|
55
|
+
git clone https://github.com/zerolovesea/NextRec.git
|
|
56
|
+
cd NextRec/
|
|
57
|
+
pip install nextrec # or pip install -e .
|
|
58
|
+
```
|
|
55
59
|
|
|
56
60
|
## Tutorials
|
|
57
61
|
|
|
58
|
-
|
|
62
|
+
See `tutorials/` for examples covering ranking, retrieval, multi-task learning, and data processing:
|
|
63
|
+
|
|
64
|
+
- [movielen_ranking_deepfm.py](/tutorials/movielen_ranking_deepfm.py) — DeepFM training on MovieLens 100k dataset
|
|
65
|
+
- [example_ranking_din.py](/tutorials/example_ranking_din.py) — DIN Deep Interest Network training on e-commerce dataset
|
|
66
|
+
- [example_multitask.py](/tutorials/example_multitask.py) — ESMM multi-task learning training on e-commerce dataset
|
|
67
|
+
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model training on MovieLens 100k dataset
|
|
59
68
|
|
|
60
|
-
- [
|
|
61
|
-
- [
|
|
62
|
-
- [
|
|
63
|
-
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) — DSSM retrieval model example trained on MovieLens 100k dataset
|
|
64
|
-
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly verify the availability of all ranking models
|
|
65
|
-
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly verify the availability of all multi-task models
|
|
66
|
-
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly verify the availability of all retrieval models
|
|
69
|
+
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) — Quickly validate availability of all ranking models
|
|
70
|
+
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) — Quickly validate availability of all multi-task models
|
|
71
|
+
- [run_all_match_models.py](/tutorials/run_all_match_models.py) — Quickly validate availability of all retrieval models
|
|
67
72
|
|
|
68
|
-
|
|
73
|
+
To dive deeper into NextRec framework details, Jupyter notebooks are available:
|
|
69
74
|
|
|
70
|
-
- [
|
|
71
|
-
- [
|
|
75
|
+
- [Hands on the NextRec framework](/tutorials/notebooks/en/Hands%20on%20nextrec.ipynb)
|
|
76
|
+
- [Using the data processor for preprocessing](/tutorials/notebooks/en/Hands%20on%20dataprocessor.ipynb)
|
|
72
77
|
|
|
73
78
|
## 5-Minute Quick Start
|
|
74
79
|
|
|
75
|
-
We provide a detailed quick
|
|
80
|
+
We provide a detailed quick-start guide and paired datasets to help you get familiar with different features of NextRec framework. In `datasets/` you'll find an e-commerce scenario test dataset like this:
|
|
76
81
|
|
|
77
82
|
| user_id | item_id | dense_0 | dense_1 | dense_2 | dense_3 | dense_4 | dense_5 | dense_6 | dense_7 | sparse_0 | sparse_1 | sparse_2 | sparse_3 | sparse_4 | sparse_5 | sparse_6 | sparse_7 | sparse_8 | sparse_9 | sequence_0 | sequence_1 | label |
|
|
78
83
|
|--------|---------|-------------|-------------|-------------|------------|-------------|-------------|-------------|-------------|----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|-----------------------------------------------------------|-----------------------------------------------------------|-------|
|
|
79
84
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
80
85
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
81
86
|
|
|
82
|
-
|
|
87
|
+
Below is a short example showing how to train a DIN (Deep Interest Network) model. You can also run `python tutorials/example_ranking_din.py` directly to execute the training and inference code.
|
|
83
88
|
|
|
84
|
-
After
|
|
89
|
+
After training starts, you can find detailed training logs at `nextrec_logs/din_tutorial`.
|
|
85
90
|
|
|
86
91
|
```python
|
|
87
92
|
import pandas as pd
|
|
@@ -159,6 +164,7 @@ metrics = model.evaluate(
|
|
|
159
164
|
NextRec provides a powerful command-line interface for model training and prediction using YAML configuration files. For detailed CLI documentation, see:
|
|
160
165
|
|
|
161
166
|
- [NextRec CLI User Guide](/nextrec_cli_preset/NextRec-CLI.md) - Complete guide for using the CLI
|
|
167
|
+
- [NextRec CLI Configuration Examples](/nextrec_cli_preset/) - CLI configuration file examples
|
|
162
168
|
|
|
163
169
|
```bash
|
|
164
170
|
# Train a model
|
|
@@ -168,11 +174,11 @@ nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
|
168
174
|
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
169
175
|
```
|
|
170
176
|
|
|
171
|
-
> As of version 0.4.
|
|
177
|
+
> As of version 0.4.4, NextRec CLI supports single-machine training; distributed training features are currently under development.
|
|
172
178
|
|
|
173
179
|
## Platform Compatibility
|
|
174
180
|
|
|
175
|
-
The current version is 0.4.
|
|
181
|
+
The current version is 0.4.4. All models and test code have been validated on the following platforms. If you encounter compatibility issues, please report them in the issue tracker with your system version:
|
|
176
182
|
|
|
177
183
|
| Platform | Configuration |
|
|
178
184
|
|----------|---------------|
|
|
@@ -243,7 +249,7 @@ We welcome contributions of any form!
|
|
|
243
249
|
4. Push your branch (`git push origin feature/AmazingFeature`)
|
|
244
250
|
5. Open a Pull Request
|
|
245
251
|
|
|
246
|
-
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is
|
|
252
|
+
> Before submitting a PR, please run `python test/run_tests.py` and `python scripts/format_code.py` to ensure all tests pass and code style is consistent.
|
|
247
253
|
|
|
248
254
|
### Code Style
|
|
249
255
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|

|
|
8
8
|

|
|
9
9
|

|
|
10
|
-

|
|
11
11
|
|
|
12
12
|
[English Version](README.md) | 中文文档
|
|
13
13
|
|
|
@@ -28,29 +28,35 @@
|
|
|
28
28
|
|
|
29
29
|
## 简介
|
|
30
30
|
|
|
31
|
-
NextRec是一个基于
|
|
31
|
+
NextRec是一个基于PyTorch的现代推荐系统框架,旨在为研究与工程团队提供快速的建模、训练与评估流程。框架内置丰富的模型实现、数据处理工具和工程化训练组件,覆盖多种推荐场景。此外提供了易上手的接口,命令行工具及教程,推荐算法学习者能以最快速度了解模型架构,复现学术论文并进行训练和部署。
|
|
32
32
|
|
|
33
33
|
## Why NextRec
|
|
34
34
|
|
|
35
|
-
- **统一的特征工程与数据流水线**:NextRec
|
|
36
|
-
-
|
|
37
|
-
- **友好的工程体验**:支持各种格式数据(`csv/parquet/pathlike`)的流式预处理/分布式训练/推理,GPU
|
|
38
|
-
-
|
|
39
|
-
-
|
|
35
|
+
- **统一的特征工程与数据流水线**:NextRec框架提供了统一的特征定义、可持久化的数据处理、并对批处理进行了优化,符合工业大数据Spark/Hive场景下,基于离线特征的模型训练推理流程。
|
|
36
|
+
- **多场景推荐能力**:覆盖排序(CTR/CVR)、召回、多任务学习、生成式召回等推荐/营销模型,持续跟进业界进展。
|
|
37
|
+
- **友好的工程体验**:支持各种格式数据(`csv/parquet/pathlike`)的流式预处理/分布式训练/推理,GPU加速与可视化指标监控,方便业务算法工程师和推荐算法学习者快速复现实验。
|
|
38
|
+
- **灵活的命令行工具**:支持通过命令行和配置文件,一键启动训练和推理进程,方便快速实验迭代和敏捷部署。
|
|
39
|
+
- **高效训练与评估**:内置多种优化器、学习率调度、早停、模型检查点与详细的日志管理,开箱即用。
|
|
40
40
|
|
|
41
41
|
## 架构
|
|
42
42
|
|
|
43
|
-
NextRec
|
|
43
|
+
NextRec采用模块化工程设计,核心组件包括:Feature Spec驱动的Embedding架构;模型基类BaseModel;独立Layer模块;支持训练和推理的统一的DataLoader;开箱即用的模型库等。
|
|
44
44
|
|
|
45
45
|

|
|
46
46
|
|
|
47
|
-
>
|
|
47
|
+
> 项目的架构借鉴了一些优秀的开源推荐算法库,例如DataWhaleChina社区的[torch-rechub](https://github.com/datawhalechina/torch-rechub)。torch-rechub在开发架构和模型实现上相对成熟,本人也参与了其中一小部分的维护,欢迎感兴趣的开发者前往了解。
|
|
48
48
|
|
|
49
49
|
---
|
|
50
50
|
|
|
51
51
|
## 安装
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
开发者可以通过`pip install nextrec`快速安装NextRec的最新版本,环境要求为Python 3.10+。如果需要执行示例代码,则需要先拉取仓库:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
git clone https://github.com/zerolovesea/NextRec.git
|
|
57
|
+
cd NextRec/
|
|
58
|
+
pip install nextrec # or pip install -e .
|
|
59
|
+
```
|
|
54
60
|
|
|
55
61
|
## 示例代码
|
|
56
62
|
|
|
@@ -60,6 +66,7 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
|
|
|
60
66
|
- [example_ranking_din.py](/tutorials/example_ranking_din.py) - 电商数据集上的DIN 深度兴趣网络训练示例
|
|
61
67
|
- [example_multitask.py](/tutorials/example_multitask.py) - 电商数据集上的ESMM多任务学习训练示例
|
|
62
68
|
- [movielen_match_dssm.py](/tutorials/example_match_dssm.py) - 基于movielen 100k数据集训练的 DSSM 召回模型示例
|
|
69
|
+
|
|
63
70
|
- [run_all_ranking_models.py](/tutorials/run_all_ranking_models.py) - 快速校验所有排序模型的可用性
|
|
64
71
|
- [run_all_multitask_models.py](/tutorials/run_all_multitask_models.py) - 快速校验所有多任务模型的可用性
|
|
65
72
|
- [run_all_match_models.py](/tutorials/run_all_match_models.py) - 快速校验所有召回模型的可用性
|
|
@@ -78,7 +85,7 @@ NextRec采用模块化、低耦合的工程设计,使得推荐系统从数据
|
|
|
78
85
|
| 1 | 7817 | 0.14704075 | 0.31020382 | 0.77780896 | 0.944897 | 0.62315375 | 0.57124174 | 0.77009535 | 0.3211029 | 315 | 260 | 379 | 146 | 168 | 161 | 138 | 88 | 5 | 312 | [170,175,97,338,105,353,272,546,175,545,463,128,0,0,0] | [368,414,820,405,548,63,327,0,0,0,0,0,0,0,0] | 0 |
|
|
79
86
|
| 1 | 3579 | 0.77811223 | 0.80359334 | 0.5185201 | 0.91091245 | 0.043562356 | 0.82142705 | 0.8803686 | 0.33748195 | 149 | 229 | 442 | 6 | 167 | 252 | 25 | 402 | 7 | 168 | [179,48,61,551,284,165,344,151,0,0,0,0,0,0,0] | [814,0,0,0,0,0,0,0,0,0,0,0,0,0,0] | 1 |
|
|
80
87
|
|
|
81
|
-
接下来我们将用一个简短的示例,展示如何使用NextRec训练一个DIN
|
|
88
|
+
接下来我们将用一个简短的示例,展示如何使用NextRec训练一个DIN(Deep Interest Network)模型。您也可以直接执行`python tutorials/example_ranking_din.py`来执行训练推理代码。
|
|
82
89
|
|
|
83
90
|
开始训练以后,你可以在`nextrec_logs/din_tutorial`路径下查看详细的训练日志。
|
|
84
91
|
|
|
@@ -157,6 +164,7 @@ metrics = model.evaluate(
|
|
|
157
164
|
NextRec 提供了强大的命令行界面,支持通过 YAML 配置文件进行模型训练和预测。详细的 CLI 文档请参见:
|
|
158
165
|
|
|
159
166
|
- [NextRec CLI 使用指南](/nextrec_cli_preset/NextRec-CLI_zh.md) - 完整的 CLI 使用文档
|
|
167
|
+
- [NextRec CLI 配置文件示例](/nextrec_cli_preset/) - CLI 使用配置文件示例
|
|
160
168
|
|
|
161
169
|
```bash
|
|
162
170
|
# 训练模型
|
|
@@ -166,11 +174,11 @@ nextrec --mode=train --train_config=path/to/train_config.yaml
|
|
|
166
174
|
nextrec --mode=predict --predict_config=path/to/predict_config.yaml
|
|
167
175
|
```
|
|
168
176
|
|
|
169
|
-
> 截止当前版本0.4.
|
|
177
|
+
> 截止当前版本0.4.4,NextRec CLI支持单机训练,分布式训练相关功能尚在开发中。
|
|
170
178
|
|
|
171
179
|
## 兼容平台
|
|
172
180
|
|
|
173
|
-
当前最新版本为0.4.
|
|
181
|
+
当前最新版本为0.4.4,所有模型和测试代码均已在以下平台通过验证,如果开发者在使用中遇到兼容问题,请在issue区提出错误报告及系统版本:
|
|
174
182
|
|
|
175
183
|
| 平台 | 配置 |
|
|
176
184
|
|------|------|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.4.4"
|
|
@@ -1,12 +1,54 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 09/11/2025
|
|
3
3
|
Author:
|
|
4
|
-
|
|
4
|
+
Yang Zhou,zyaztec@gmail.com
|
|
5
5
|
Reference:
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
6
|
+
[1] Lian J, Zhou X, Zhang F, et al. xdeepfm: Combining explicit and implicit feature interactions
|
|
7
|
+
for recommender systems[C]//Proceedings of the 24th ACM SIGKDD international conference on
|
|
8
|
+
knowledge discovery & data mining. 2018: 1754-1763.
|
|
9
|
+
(https://arxiv.org/abs/1803.05170)
|
|
10
|
+
|
|
11
|
+
xDeepFM is a CTR prediction model that unifies explicit and implicit
|
|
12
|
+
feature interaction learning. It extends DeepFM by adding the
|
|
13
|
+
Compressed Interaction Network (CIN) to explicitly model high-order
|
|
14
|
+
interactions at the vector-wise level, while an MLP captures implicit
|
|
15
|
+
non-linear crosses. A linear term retains first-order signals, and all
|
|
16
|
+
three parts are learned jointly end-to-end.
|
|
17
|
+
|
|
18
|
+
In the forward pass:
|
|
19
|
+
(1) Embedding Layer: transforms sparse/sequence fields into dense vectors
|
|
20
|
+
(2) Linear Part: captures first-order contributions of sparse/sequence fields
|
|
21
|
+
(3) CIN: explicitly builds higher-order feature crosses via convolution over
|
|
22
|
+
outer products of field embeddings, with optional split-half connections
|
|
23
|
+
(4) Deep Part (MLP): models implicit, non-linear interactions across all fields
|
|
24
|
+
(5) Combination: sums outputs from linear, CIN, and deep branches before the
|
|
25
|
+
task-specific prediction layer
|
|
26
|
+
|
|
27
|
+
Key Advantages:
|
|
28
|
+
- Jointly learns first-order, explicit high-order, and implicit interactions
|
|
29
|
+
- CIN offers interpretable vector-wise crosses with controlled complexity
|
|
30
|
+
- Deep branch enhances representation power for non-linear patterns
|
|
31
|
+
- End-to-end optimization eliminates heavy manual feature engineering
|
|
32
|
+
- Flexible design supports both sparse and sequence features
|
|
33
|
+
|
|
34
|
+
xDeepFM 是一个 CTR 预估模型,将显式与隐式的特征交互学习统一到同一框架。
|
|
35
|
+
在 DeepFM 的基础上,额外引入了 CIN(Compressed Interaction Network)
|
|
36
|
+
显式建模高阶向量级交互,同时 MLP 负责隐式非线性交互,线性部分保留一阶信号,
|
|
37
|
+
三者联合训练。
|
|
38
|
+
|
|
39
|
+
前向流程:
|
|
40
|
+
(1) 嵌入层:将稀疏/序列特征映射为稠密向量
|
|
41
|
+
(2) 线性部分:建模稀疏/序列特征的一阶贡献
|
|
42
|
+
(3) CIN:通过对字段嵌入做外积并卷积,显式捕获高阶交叉,可选 split-half 以控参
|
|
43
|
+
(4) 深层部分(MLP):对所有特征进行隐式非线性交互建模
|
|
44
|
+
(5) 融合:线性、CIN、MLP 输出求和后进入任务预测层
|
|
45
|
+
|
|
46
|
+
主要优点:
|
|
47
|
+
- 同时学习一阶、显式高阶、隐式交互
|
|
48
|
+
- CIN 提供可解释的向量级交叉并可控复杂度
|
|
49
|
+
- 深层分支提升非线性表达能力
|
|
50
|
+
- 端到端训练降低人工特征工程需求
|
|
51
|
+
- 兼容稀疏与序列特征的建模
|
|
10
52
|
"""
|
|
11
53
|
|
|
12
54
|
import torch
|
|
@@ -160,8 +160,11 @@ def build_feature_objects(
|
|
|
160
160
|
SparseFeature(
|
|
161
161
|
name=name,
|
|
162
162
|
vocab_size=int(vocab_size),
|
|
163
|
+
embedding_name=embed_cfg.get("embedding_name", name),
|
|
163
164
|
embedding_dim=embed_cfg.get("embedding_dim"),
|
|
164
165
|
padding_idx=embed_cfg.get("padding_idx"),
|
|
166
|
+
init_type=embed_cfg.get("init_type", "xavier_uniform"),
|
|
167
|
+
init_params=embed_cfg.get("init_params"),
|
|
165
168
|
l1_reg=embed_cfg.get("l1_reg", 0.0),
|
|
166
169
|
l2_reg=embed_cfg.get("l2_reg", 1e-5),
|
|
167
170
|
trainable=embed_cfg.get("trainable", True),
|
|
@@ -184,9 +187,12 @@ def build_feature_objects(
|
|
|
184
187
|
name=name,
|
|
185
188
|
vocab_size=int(vocab_size),
|
|
186
189
|
max_len=embed_cfg.get("max_len") or proc_cfg.get("max_len", 50),
|
|
190
|
+
embedding_name=embed_cfg.get("embedding_name", name),
|
|
187
191
|
embedding_dim=embed_cfg.get("embedding_dim"),
|
|
188
192
|
padding_idx=embed_cfg.get("padding_idx"),
|
|
189
193
|
combiner=embed_cfg.get("combiner", "mean"),
|
|
194
|
+
init_type=embed_cfg.get("init_type", "xavier_uniform"),
|
|
195
|
+
init_params=embed_cfg.get("init_params"),
|
|
190
196
|
l1_reg=embed_cfg.get("l1_reg", 0.0),
|
|
191
197
|
l2_reg=embed_cfg.get("l2_reg", 1e-5),
|
|
192
198
|
trainable=embed_cfg.get("trainable", True),
|
|
@@ -5,10 +5,9 @@ Date: create on 13/11/2025
|
|
|
5
5
|
Author: Yang Zhou, zyaztec@gmail.com
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import Any, Dict, Set,
|
|
8
|
+
from typing import Any, Dict, Set,
|
|
9
9
|
|
|
10
10
|
import torch.nn as nn
|
|
11
|
-
from torch.nn.init import _NonlinearityType
|
|
12
11
|
|
|
13
12
|
KNOWN_NONLINEARITIES: Set[str] = {
|
|
14
13
|
"linear",
|
|
@@ -27,28 +26,25 @@ KNOWN_NONLINEARITIES: Set[str] = {
|
|
|
27
26
|
}
|
|
28
27
|
|
|
29
28
|
|
|
30
|
-
def resolve_nonlinearity(activation: str
|
|
31
|
-
if
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# Fall back to linear for custom activations (gain handled separately).
|
|
35
|
-
return "linear"
|
|
36
|
-
return activation
|
|
29
|
+
def resolve_nonlinearity(activation: str):
|
|
30
|
+
if activation in KNOWN_NONLINEARITIES:
|
|
31
|
+
return activation
|
|
32
|
+
return "linear"
|
|
37
33
|
|
|
38
34
|
|
|
39
|
-
def resolve_gain(activation: str
|
|
35
|
+
def resolve_gain(activation: str, param: Dict[str, Any]) -> float:
|
|
40
36
|
if "gain" in param:
|
|
41
37
|
return param["gain"]
|
|
42
38
|
nonlinearity = resolve_nonlinearity(activation)
|
|
43
39
|
try:
|
|
44
|
-
return nn.init.calculate_gain(nonlinearity, param.get("param"))
|
|
40
|
+
return nn.init.calculate_gain(nonlinearity, param.get("param")) # type: ignore
|
|
45
41
|
except ValueError:
|
|
46
|
-
return 1.0
|
|
42
|
+
return 1.0
|
|
47
43
|
|
|
48
44
|
|
|
49
45
|
def get_initializer(
|
|
50
46
|
init_type: str = "normal",
|
|
51
|
-
activation: str
|
|
47
|
+
activation: str = "linear",
|
|
52
48
|
param: Dict[str, Any] | None = None,
|
|
53
49
|
):
|
|
54
50
|
param = param or {}
|
|
@@ -62,11 +58,11 @@ def get_initializer(
|
|
|
62
58
|
nn.init.xavier_normal_(tensor, gain=gain)
|
|
63
59
|
elif init_type == "kaiming_uniform":
|
|
64
60
|
nn.init.kaiming_uniform_(
|
|
65
|
-
tensor, a=param.get("a", 0), nonlinearity=nonlinearity
|
|
61
|
+
tensor, a=param.get("a", 0), nonlinearity=nonlinearity # type: ignore
|
|
66
62
|
)
|
|
67
63
|
elif init_type == "kaiming_normal":
|
|
68
64
|
nn.init.kaiming_normal_(
|
|
69
|
-
tensor, a=param.get("a", 0), nonlinearity=nonlinearity
|
|
65
|
+
tensor, a=param.get("a", 0), nonlinearity=nonlinearity # type: ignore
|
|
70
66
|
)
|
|
71
67
|
elif init_type == "orthogonal":
|
|
72
68
|
nn.init.orthogonal_(tensor, gain=gain)
|
|
@@ -80,4 +76,4 @@ def get_initializer(
|
|
|
80
76
|
raise ValueError(f"Unknown init_type: {init_type}")
|
|
81
77
|
return tensor
|
|
82
78
|
|
|
83
|
-
return initializer_fn
|
|
79
|
+
return initializer_fn
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.4.3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nextrec-0.4.3 → nextrec-0.4.4}/docs/zh//345/277/253/351/200/237/344/270/212/346/211/213.md"
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nextrec-0.4.3 → nextrec-0.4.4}/tutorials/distributed/example_distributed_training_large_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|