FAI-RL 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fai_rl-0.1.0/FAI_RL.egg-info/PKG-INFO +203 -0
- fai_rl-0.1.0/FAI_RL.egg-info/SOURCES.txt +59 -0
- fai_rl-0.1.0/FAI_RL.egg-info/dependency_links.txt +1 -0
- fai_rl-0.1.0/FAI_RL.egg-info/entry_points.txt +4 -0
- fai_rl-0.1.0/FAI_RL.egg-info/requires.txt +21 -0
- fai_rl-0.1.0/FAI_RL.egg-info/top_level.txt +8 -0
- fai_rl-0.1.0/PKG-INFO +203 -0
- fai_rl-0.1.0/README.md +159 -0
- fai_rl-0.1.0/configs/__init__.py +1 -0
- fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu1.json +31 -0
- fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu2.json +31 -0
- fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu4.json +31 -0
- fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu8.json +31 -0
- fai_rl-0.1.0/configs/evaluation/mmlu/llama3_3B_recipe.yaml +32 -0
- fai_rl-0.1.0/configs/inference/llama3_3B_recipe.yaml +54 -0
- fai_rl-0.1.0/configs/training/dpo/llama3_3B_full_recipe.yaml +62 -0
- fai_rl-0.1.0/configs/training/dpo/llama3_3B_lora_recipe.yaml +77 -0
- fai_rl-0.1.0/configs/training/dpo/llama3_3B_qlora_recipe.yaml +82 -0
- fai_rl-0.1.0/configs/training/grpo/llama3_3B_full_recipe.yaml +60 -0
- fai_rl-0.1.0/configs/training/grpo/llama3_3B_lora_recipe.yaml +76 -0
- fai_rl-0.1.0/configs/training/gspo/llama3_3B_full_recipe.yaml +68 -0
- fai_rl-0.1.0/configs/training/gspo/llama3_3B_lora_recipe.yaml +84 -0
- fai_rl-0.1.0/configs/training/ppo/llama3_3B_full_recipe.yaml +54 -0
- fai_rl-0.1.0/configs/training/ppo/llama3_3B_lora_recipe.yaml +69 -0
- fai_rl-0.1.0/configs/training/ppo/llama3_3B_qlora_recipe.yaml +74 -0
- fai_rl-0.1.0/configs/training/sft/llama3_3B_full_recipe.yaml +74 -0
- fai_rl-0.1.0/configs/training/sft/llama3_3B_lora_recipe.yaml +89 -0
- fai_rl-0.1.0/configs/training/sft/llama3_3B_qlora_recipe.yaml +94 -0
- fai_rl-0.1.0/core/__init__.py +17 -0
- fai_rl-0.1.0/core/config.py +316 -0
- fai_rl-0.1.0/core/model_utils.py +165 -0
- fai_rl-0.1.0/core/trainer_base.py +257 -0
- fai_rl-0.1.0/evaluations/README.md +69 -0
- fai_rl-0.1.0/evaluations/__init__.py +0 -0
- fai_rl-0.1.0/evaluations/eval.py +522 -0
- fai_rl-0.1.0/inference/README.md +61 -0
- fai_rl-0.1.0/inference/__init__.py +0 -0
- fai_rl-0.1.0/inference/inference.py +506 -0
- fai_rl-0.1.0/pyproject.toml +83 -0
- fai_rl-0.1.0/scripts/run_evaluation.sh +88 -0
- fai_rl-0.1.0/scripts/run_inference.py +100 -0
- fai_rl-0.1.0/scripts/run_inference.sh +95 -0
- fai_rl-0.1.0/scripts/run_training.sh +183 -0
- fai_rl-0.1.0/scripts/train.py +110 -0
- fai_rl-0.1.0/setup.cfg +4 -0
- fai_rl-0.1.0/setup.py +11 -0
- fai_rl-0.1.0/trainers/README.md +203 -0
- fai_rl-0.1.0/trainers/__init__.py +16 -0
- fai_rl-0.1.0/trainers/dpo_trainer.py +186 -0
- fai_rl-0.1.0/trainers/grpo_trainer.py +175 -0
- fai_rl-0.1.0/trainers/gspo_trainer.py +183 -0
- fai_rl-0.1.0/trainers/ppo_trainer.py +363 -0
- fai_rl-0.1.0/trainers/rewards/__init__.py +4 -0
- fai_rl-0.1.0/trainers/rewards/accuracy_rewards.py +48 -0
- fai_rl-0.1.0/trainers/rewards/format_rewards.py +24 -0
- fai_rl-0.1.0/trainers/sft_trainer.py +171 -0
- fai_rl-0.1.0/trainers/templates/__init__.py +5 -0
- fai_rl-0.1.0/trainers/templates/gsm8k_template.py +99 -0
- fai_rl-0.1.0/trainers/templates/openmathinstruct_template.py +94 -0
- fai_rl-0.1.0/utils/__init__.py +11 -0
- fai_rl-0.1.0/utils/logging_utils.py +147 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: FAI-RL
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Foundation of AI - Reinforcement learning Library
|
|
5
|
+
Author-email: Roblox <ylim@roblox.com>, Roblox <mnandwana@roblox.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Roblox/FAI-RL
|
|
8
|
+
Project-URL: Documentation, https://github.com/Roblox/FAI-RL#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/Roblox/FAI-RL
|
|
10
|
+
Project-URL: Issues, https://github.com/Roblox/FAI-RL/issues
|
|
11
|
+
Keywords: reinforcement learning,language models,transformers,rlhf,dpo,ppo,sft
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: torch>=2.7.0
|
|
25
|
+
Requires-Dist: torchvision>=0.22.0
|
|
26
|
+
Requires-Dist: torchaudio>=2.7.0
|
|
27
|
+
Requires-Dist: datasets>=4.0.0
|
|
28
|
+
Requires-Dist: transformers>=4.56.0
|
|
29
|
+
Requires-Dist: trl>=0.23.0
|
|
30
|
+
Requires-Dist: wandb>=0.21.0
|
|
31
|
+
Requires-Dist: bitsandbytes>=0.46.0
|
|
32
|
+
Requires-Dist: peft>=0.17.0
|
|
33
|
+
Requires-Dist: deepspeed>=0.17.0
|
|
34
|
+
Requires-Dist: ipykernel>=6.30.0
|
|
35
|
+
Requires-Dist: ipywidgets>=8.1.0
|
|
36
|
+
Requires-Dist: fsspec>=2025.3.0
|
|
37
|
+
Requires-Dist: huggingface_hub>=0.34.0
|
|
38
|
+
Requires-Dist: mpi4py>=4.1.0
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
42
|
+
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
|
43
|
+
Requires-Dist: mypy>=0.950; extra == "dev"
|
|
44
|
+
|
|
45
|
+
# FAI-RL: Foundation of AI - Reinforcement learning Library
|
|
46
|
+
|
|
47
|
+
A modular, production-ready library designed for **easy training, inference, and evaluation** of language models using reinforcement learning methods. Currently supports:
|
|
48
|
+
- SFT (Supervised Fine-Tuning)
|
|
49
|
+
- DPO (Direct Preference Optimization)
|
|
50
|
+
- PPO (Proximal Policy Optimization)
|
|
51
|
+
- GRPO (Group Relative Preference Optimization)
|
|
52
|
+
- GSPO (Group Sequence Policy Optimization)
|
|
53
|
+
|
|
54
|
+
### Flexible Configuration System
|
|
55
|
+
* YAML-based configuration for all training parameters
|
|
56
|
+
* Pre-configured recipes for popular models
|
|
57
|
+
* DeepSpeed ZeRO-3 integration for distributed training
|
|
58
|
+
|
|
59
|
+
## ๐ Quick Start
|
|
60
|
+
|
|
61
|
+
Get started with installation, training, inference, and evaluation in just a few commands:
|
|
62
|
+
|
|
63
|
+
### ๐ฆ Installation
|
|
64
|
+
|
|
65
|
+
#### Option 1: Install from PyPI (Recommended)
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install FAI-RL
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
#### Option 2: Install from source
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Clone the repository
|
|
75
|
+
git clone https://github.com/Roblox/FAI-RL.git
|
|
76
|
+
cd FAI-RL
|
|
77
|
+
|
|
78
|
+
# Install in development mode
|
|
79
|
+
pip install -e .
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### Option 3: Manual setup with virtual environment
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Clone the repository
|
|
86
|
+
git clone https://github.com/Roblox/FAI-RL.git
|
|
87
|
+
cd FAI-RL
|
|
88
|
+
|
|
89
|
+
# Create virtual environment
|
|
90
|
+
python -m venv venv_fai_rl
|
|
91
|
+
source venv_fai_rl/bin/activate
|
|
92
|
+
|
|
93
|
+
# Install the package
|
|
94
|
+
pip install -e .
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Training
|
|
98
|
+
|
|
99
|
+
Train a model using SFT, DPO, PPO, GRPO, or GSPO:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# Single GPU training
|
|
103
|
+
./scripts/run_training.sh \
|
|
104
|
+
--config configs/training/dpo/llama3_3B_recipe.yaml \
|
|
105
|
+
--num-gpus 1
|
|
106
|
+
|
|
107
|
+
# Multi-GPU training (8 GPUs)
|
|
108
|
+
./scripts/run_training.sh \
|
|
109
|
+
--config configs/training/dpo/llama3_3B_recipe.yaml \
|
|
110
|
+
--num-gpus 8 \
|
|
111
|
+
--nohup # Run in background
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Inference
|
|
115
|
+
|
|
116
|
+
Generate responses from your trained models:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Run inference on trained model
|
|
120
|
+
./scripts/run_inference.sh \
|
|
121
|
+
--config configs/inference/llama3_3B_recipe.yaml
|
|
122
|
+
|
|
123
|
+
# Run inference with debug mode
|
|
124
|
+
./scripts/run_inference.sh \
|
|
125
|
+
--config configs/inference/llama3_3B_recipe.yaml \
|
|
126
|
+
--debug
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Evaluation
|
|
130
|
+
|
|
131
|
+
Evaluate model performance on benchmarks:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Evaluate on MMLU benchmark
|
|
135
|
+
./scripts/run_evaluation.sh \
|
|
136
|
+
--config configs/evaluation/mmlu/llama3_3B_recipe.yaml
|
|
137
|
+
|
|
138
|
+
# Evaluate with debug output
|
|
139
|
+
./scripts/run_evaluation.sh \
|
|
140
|
+
--config configs/evaluation/mmlu/llama3_3B_recipe.yaml \
|
|
141
|
+
--debug
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
-----
|
|
145
|
+
|
|
146
|
+
## ๐ Project Structure
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
FAI-RL/
|
|
150
|
+
โโโ core/ # Core framework components
|
|
151
|
+
โโโ trainers/ # Training method implementations
|
|
152
|
+
โโโ inference/ # Inference components
|
|
153
|
+
โโโ evaluations/ # Evaluation system
|
|
154
|
+
โโโ configs/ # Configuration files
|
|
155
|
+
โ โโโ training/ # Training configurations
|
|
156
|
+
โ โโโ inference/ # Inference configurations
|
|
157
|
+
โ โโโ evaluation/ # Evaluation configurations
|
|
158
|
+
โ โโโ deepspeed/ # DeepSpeed ZeRO configurations
|
|
159
|
+
โโโ utils/ # Utility modules
|
|
160
|
+
โโโ scripts/ # Scripts
|
|
161
|
+
โโโ logs/ # Training logs (auto-generated)
|
|
162
|
+
โโโ outputs/ # Inference output (auto-generated)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
-----
|
|
166
|
+
|
|
167
|
+
## ๐ Quick Links
|
|
168
|
+
|
|
169
|
+
* **[Training Guide](./trainers/README.md)** - Comprehensive guide to configuring and running model training with detailed parameter explanations
|
|
170
|
+
* **[Inference Guide](./inference/README.md)** - Running model inference and text generation
|
|
171
|
+
* **[Evaluation Guide](./evaluations/README.md)** - Evaluating model performance on standard benchmarks
|
|
172
|
+
|
|
173
|
+
## Algorithm Selection Guide
|
|
174
|
+
|
|
175
|
+
Choose the right algorithm for your use case:
|
|
176
|
+
|
|
177
|
+
| Algorithm | Best For | Requirements | Key Benefits |
|
|
178
|
+
|-----------|----------|--------------|--------------|
|
|
179
|
+
| **SFT** | Initial instruction tuning, domain adaptation | Prompt-response pairs | Simple, fast, establishes baseline |
|
|
180
|
+
| **DPO** | Aligning to human preferences | Preference pairs (chosen/rejected) | No reward model needed, stable training |
|
|
181
|
+
| **PPO** | Complex sequential tasks, agentic workflows | Preference pairs + reward model | Most flexible, handles multi-turn interactions |
|
|
182
|
+
| **GRPO** | Math reasoning, efficiency-focused tasks | Question-answer pairs | No critic model, faster training |
|
|
183
|
+
| **GSPO** | Multi-turn RL, stable sequence-level optimization | Question-answer pairs | Better stability than GRPO |
|
|
184
|
+
|
|
185
|
+
## Memory Optimization
|
|
186
|
+
|
|
187
|
+
FAI-RL supports various techniques to train large models efficiently:
|
|
188
|
+
|
|
189
|
+
* **Full Fine-tuning:** Train all model parameters (requires most memory)
|
|
190
|
+
* **LoRA:** Parameter-efficient training (~10% memory of full fine-tuning)
|
|
191
|
+
* **QLoRA:** 4-bit quantized LoRA (train 7B+ models on single consumer GPU)
|
|
192
|
+
* **DeepSpeed ZeRO-3:** Distributed training for models that don't fit on single GPU
|
|
193
|
+
|
|
194
|
+
## ๐งช Tested Environment
|
|
195
|
+
|
|
196
|
+
This framework has been validated on:
|
|
197
|
+
|
|
198
|
+
* **Instance:** AWS EC2 p4d.24xlarge
|
|
199
|
+
* **GPUs:** 8 x NVIDIA A100-SXM4-80GB (80GB VRAM each)
|
|
200
|
+
* **CPU:** 96 vCPUs
|
|
201
|
+
* **Memory:** 1152 GiB
|
|
202
|
+
* **Storage:** 8TB NVMe SSD
|
|
203
|
+
* **Network:** 400 Gbps
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
setup.py
|
|
4
|
+
FAI_RL.egg-info/PKG-INFO
|
|
5
|
+
FAI_RL.egg-info/SOURCES.txt
|
|
6
|
+
FAI_RL.egg-info/dependency_links.txt
|
|
7
|
+
FAI_RL.egg-info/entry_points.txt
|
|
8
|
+
FAI_RL.egg-info/requires.txt
|
|
9
|
+
FAI_RL.egg-info/top_level.txt
|
|
10
|
+
configs/__init__.py
|
|
11
|
+
configs/deepspeed/zero3_config_gpu1.json
|
|
12
|
+
configs/deepspeed/zero3_config_gpu2.json
|
|
13
|
+
configs/deepspeed/zero3_config_gpu4.json
|
|
14
|
+
configs/deepspeed/zero3_config_gpu8.json
|
|
15
|
+
configs/evaluation/mmlu/llama3_3B_recipe.yaml
|
|
16
|
+
configs/inference/llama3_3B_recipe.yaml
|
|
17
|
+
configs/training/dpo/llama3_3B_full_recipe.yaml
|
|
18
|
+
configs/training/dpo/llama3_3B_lora_recipe.yaml
|
|
19
|
+
configs/training/dpo/llama3_3B_qlora_recipe.yaml
|
|
20
|
+
configs/training/grpo/llama3_3B_full_recipe.yaml
|
|
21
|
+
configs/training/grpo/llama3_3B_lora_recipe.yaml
|
|
22
|
+
configs/training/gspo/llama3_3B_full_recipe.yaml
|
|
23
|
+
configs/training/gspo/llama3_3B_lora_recipe.yaml
|
|
24
|
+
configs/training/ppo/llama3_3B_full_recipe.yaml
|
|
25
|
+
configs/training/ppo/llama3_3B_lora_recipe.yaml
|
|
26
|
+
configs/training/ppo/llama3_3B_qlora_recipe.yaml
|
|
27
|
+
configs/training/sft/llama3_3B_full_recipe.yaml
|
|
28
|
+
configs/training/sft/llama3_3B_lora_recipe.yaml
|
|
29
|
+
configs/training/sft/llama3_3B_qlora_recipe.yaml
|
|
30
|
+
core/__init__.py
|
|
31
|
+
core/config.py
|
|
32
|
+
core/model_utils.py
|
|
33
|
+
core/trainer_base.py
|
|
34
|
+
evaluations/README.md
|
|
35
|
+
evaluations/__init__.py
|
|
36
|
+
evaluations/eval.py
|
|
37
|
+
inference/README.md
|
|
38
|
+
inference/__init__.py
|
|
39
|
+
inference/inference.py
|
|
40
|
+
scripts/run_evaluation.sh
|
|
41
|
+
scripts/run_inference.py
|
|
42
|
+
scripts/run_inference.sh
|
|
43
|
+
scripts/run_training.sh
|
|
44
|
+
scripts/train.py
|
|
45
|
+
trainers/README.md
|
|
46
|
+
trainers/__init__.py
|
|
47
|
+
trainers/dpo_trainer.py
|
|
48
|
+
trainers/grpo_trainer.py
|
|
49
|
+
trainers/gspo_trainer.py
|
|
50
|
+
trainers/ppo_trainer.py
|
|
51
|
+
trainers/sft_trainer.py
|
|
52
|
+
trainers/rewards/__init__.py
|
|
53
|
+
trainers/rewards/accuracy_rewards.py
|
|
54
|
+
trainers/rewards/format_rewards.py
|
|
55
|
+
trainers/templates/__init__.py
|
|
56
|
+
trainers/templates/gsm8k_template.py
|
|
57
|
+
trainers/templates/openmathinstruct_template.py
|
|
58
|
+
utils/__init__.py
|
|
59
|
+
utils/logging_utils.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
torch>=2.7.0
|
|
2
|
+
torchvision>=0.22.0
|
|
3
|
+
torchaudio>=2.7.0
|
|
4
|
+
datasets>=4.0.0
|
|
5
|
+
transformers>=4.56.0
|
|
6
|
+
trl>=0.23.0
|
|
7
|
+
wandb>=0.21.0
|
|
8
|
+
bitsandbytes>=0.46.0
|
|
9
|
+
peft>=0.17.0
|
|
10
|
+
deepspeed>=0.17.0
|
|
11
|
+
ipykernel>=6.30.0
|
|
12
|
+
ipywidgets>=8.1.0
|
|
13
|
+
fsspec>=2025.3.0
|
|
14
|
+
huggingface_hub>=0.34.0
|
|
15
|
+
mpi4py>=4.1.0
|
|
16
|
+
|
|
17
|
+
[dev]
|
|
18
|
+
pytest>=7.0.0
|
|
19
|
+
black>=22.0.0
|
|
20
|
+
flake8>=4.0.0
|
|
21
|
+
mypy>=0.950
|
fai_rl-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: FAI-RL
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Foundation of AI - Reinforcement learning Library
|
|
5
|
+
Author-email: Roblox <ylim@roblox.com>, Roblox <mnandwana@roblox.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Roblox/FAI-RL
|
|
8
|
+
Project-URL: Documentation, https://github.com/Roblox/FAI-RL#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/Roblox/FAI-RL
|
|
10
|
+
Project-URL: Issues, https://github.com/Roblox/FAI-RL/issues
|
|
11
|
+
Keywords: reinforcement learning,language models,transformers,rlhf,dpo,ppo,sft
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: torch>=2.7.0
|
|
25
|
+
Requires-Dist: torchvision>=0.22.0
|
|
26
|
+
Requires-Dist: torchaudio>=2.7.0
|
|
27
|
+
Requires-Dist: datasets>=4.0.0
|
|
28
|
+
Requires-Dist: transformers>=4.56.0
|
|
29
|
+
Requires-Dist: trl>=0.23.0
|
|
30
|
+
Requires-Dist: wandb>=0.21.0
|
|
31
|
+
Requires-Dist: bitsandbytes>=0.46.0
|
|
32
|
+
Requires-Dist: peft>=0.17.0
|
|
33
|
+
Requires-Dist: deepspeed>=0.17.0
|
|
34
|
+
Requires-Dist: ipykernel>=6.30.0
|
|
35
|
+
Requires-Dist: ipywidgets>=8.1.0
|
|
36
|
+
Requires-Dist: fsspec>=2025.3.0
|
|
37
|
+
Requires-Dist: huggingface_hub>=0.34.0
|
|
38
|
+
Requires-Dist: mpi4py>=4.1.0
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
42
|
+
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
|
43
|
+
Requires-Dist: mypy>=0.950; extra == "dev"
|
|
44
|
+
|
|
45
|
+
# FAI-RL: Foundation of AI - Reinforcement learning Library
|
|
46
|
+
|
|
47
|
+
A modular, production-ready library designed for **easy training, inference, and evaluation** of language models using reinforcement learning methods. Currently supports:
|
|
48
|
+
- SFT (Supervised Fine-Tuning)
|
|
49
|
+
- DPO (Direct Preference Optimization)
|
|
50
|
+
- PPO (Proximal Policy Optimization)
|
|
51
|
+
- GRPO (Group Relative Preference Optimization)
|
|
52
|
+
- GSPO (Group Sequence Policy Optimization)
|
|
53
|
+
|
|
54
|
+
### Flexible Configuration System
|
|
55
|
+
* YAML-based configuration for all training parameters
|
|
56
|
+
* Pre-configured recipes for popular models
|
|
57
|
+
* DeepSpeed ZeRO-3 integration for distributed training
|
|
58
|
+
|
|
59
|
+
## ๐ Quick Start
|
|
60
|
+
|
|
61
|
+
Get started with installation, training, inference, and evaluation in just a few commands:
|
|
62
|
+
|
|
63
|
+
### ๐ฆ Installation
|
|
64
|
+
|
|
65
|
+
#### Option 1: Install from PyPI (Recommended)
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pip install FAI-RL
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
#### Option 2: Install from source
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# Clone the repository
|
|
75
|
+
git clone https://github.com/Roblox/FAI-RL.git
|
|
76
|
+
cd FAI-RL
|
|
77
|
+
|
|
78
|
+
# Install in development mode
|
|
79
|
+
pip install -e .
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
#### Option 3: Manual setup with virtual environment
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Clone the repository
|
|
86
|
+
git clone https://github.com/Roblox/FAI-RL.git
|
|
87
|
+
cd FAI-RL
|
|
88
|
+
|
|
89
|
+
# Create virtual environment
|
|
90
|
+
python -m venv venv_fai_rl
|
|
91
|
+
source venv_fai_rl/bin/activate
|
|
92
|
+
|
|
93
|
+
# Install the package
|
|
94
|
+
pip install -e .
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Training
|
|
98
|
+
|
|
99
|
+
Train a model using SFT, DPO, PPO, GRPO, or GSPO:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# Single GPU training
|
|
103
|
+
./scripts/run_training.sh \
|
|
104
|
+
--config configs/training/dpo/llama3_3B_recipe.yaml \
|
|
105
|
+
--num-gpus 1
|
|
106
|
+
|
|
107
|
+
# Multi-GPU training (8 GPUs)
|
|
108
|
+
./scripts/run_training.sh \
|
|
109
|
+
--config configs/training/dpo/llama3_3B_recipe.yaml \
|
|
110
|
+
--num-gpus 8 \
|
|
111
|
+
--nohup # Run in background
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Inference
|
|
115
|
+
|
|
116
|
+
Generate responses from your trained models:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Run inference on trained model
|
|
120
|
+
./scripts/run_inference.sh \
|
|
121
|
+
--config configs/inference/llama3_3B_recipe.yaml
|
|
122
|
+
|
|
123
|
+
# Run inference with debug mode
|
|
124
|
+
./scripts/run_inference.sh \
|
|
125
|
+
--config configs/inference/llama3_3B_recipe.yaml \
|
|
126
|
+
--debug
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Evaluation
|
|
130
|
+
|
|
131
|
+
Evaluate model performance on benchmarks:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# Evaluate on MMLU benchmark
|
|
135
|
+
./scripts/run_evaluation.sh \
|
|
136
|
+
--config configs/evaluation/mmlu/llama3_3B_recipe.yaml
|
|
137
|
+
|
|
138
|
+
# Evaluate with debug output
|
|
139
|
+
./scripts/run_evaluation.sh \
|
|
140
|
+
--config configs/evaluation/mmlu/llama3_3B_recipe.yaml \
|
|
141
|
+
--debug
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
-----
|
|
145
|
+
|
|
146
|
+
## ๐ Project Structure
|
|
147
|
+
|
|
148
|
+
```
|
|
149
|
+
FAI-RL/
|
|
150
|
+
โโโ core/ # Core framework components
|
|
151
|
+
โโโ trainers/ # Training method implementations
|
|
152
|
+
โโโ inference/ # Inference components
|
|
153
|
+
โโโ evaluations/ # Evaluation system
|
|
154
|
+
โโโ configs/ # Configuration files
|
|
155
|
+
โ โโโ training/ # Training configurations
|
|
156
|
+
โ โโโ inference/ # Inference configurations
|
|
157
|
+
โ โโโ evaluation/ # Evaluation configurations
|
|
158
|
+
โ โโโ deepspeed/ # DeepSpeed ZeRO configurations
|
|
159
|
+
โโโ utils/ # Utility modules
|
|
160
|
+
โโโ scripts/ # Scripts
|
|
161
|
+
โโโ logs/ # Training logs (auto-generated)
|
|
162
|
+
โโโ outputs/ # Inference output (auto-generated)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
-----
|
|
166
|
+
|
|
167
|
+
## ๐ Quick Links
|
|
168
|
+
|
|
169
|
+
* **[Training Guide](./trainers/README.md)** - Comprehensive guide to configuring and running model training with detailed parameter explanations
|
|
170
|
+
* **[Inference Guide](./inference/README.md)** - Running model inference and text generation
|
|
171
|
+
* **[Evaluation Guide](./evaluations/README.md)** - Evaluating model performance on standard benchmarks
|
|
172
|
+
|
|
173
|
+
## Algorithm Selection Guide
|
|
174
|
+
|
|
175
|
+
Choose the right algorithm for your use case:
|
|
176
|
+
|
|
177
|
+
| Algorithm | Best For | Requirements | Key Benefits |
|
|
178
|
+
|-----------|----------|--------------|--------------|
|
|
179
|
+
| **SFT** | Initial instruction tuning, domain adaptation | Prompt-response pairs | Simple, fast, establishes baseline |
|
|
180
|
+
| **DPO** | Aligning to human preferences | Preference pairs (chosen/rejected) | No reward model needed, stable training |
|
|
181
|
+
| **PPO** | Complex sequential tasks, agentic workflows | Preference pairs + reward model | Most flexible, handles multi-turn interactions |
|
|
182
|
+
| **GRPO** | Math reasoning, efficiency-focused tasks | Question-answer pairs | No critic model, faster training |
|
|
183
|
+
| **GSPO** | Multi-turn RL, stable sequence-level optimization | Question-answer pairs | Better stability than GRPO |
|
|
184
|
+
|
|
185
|
+
## Memory Optimization
|
|
186
|
+
|
|
187
|
+
FAI-RL supports various techniques to train large models efficiently:
|
|
188
|
+
|
|
189
|
+
* **Full Fine-tuning:** Train all model parameters (requires most memory)
|
|
190
|
+
* **LoRA:** Parameter-efficient training (~10% memory of full fine-tuning)
|
|
191
|
+
* **QLoRA:** 4-bit quantized LoRA (train 7B+ models on single consumer GPU)
|
|
192
|
+
* **DeepSpeed ZeRO-3:** Distributed training for models that don't fit on single GPU
|
|
193
|
+
|
|
194
|
+
## ๐งช Tested Environment
|
|
195
|
+
|
|
196
|
+
This framework has been validated on:
|
|
197
|
+
|
|
198
|
+
* **Instance:** AWS EC2 p4d.24xlarge
|
|
199
|
+
* **GPUs:** 8 x NVIDIA A100-SXM4-80GB (80GB VRAM each)
|
|
200
|
+
* **CPU:** 96 vCPUs
|
|
201
|
+
* **Memory:** 1152 GiB
|
|
202
|
+
* **Storage:** 8TB NVMe SSD
|
|
203
|
+
* **Network:** 400 Gbps
|
fai_rl-0.1.0/README.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# FAI-RL: Foundation of AI - Reinforcement learning Library
|
|
2
|
+
|
|
3
|
+
A modular, production-ready library designed for **easy training, inference, and evaluation** of language models using reinforcement learning methods. Currently supports:
|
|
4
|
+
- SFT (Supervised Fine-Tuning)
|
|
5
|
+
- DPO (Direct Preference Optimization)
|
|
6
|
+
- PPO (Proximal Policy Optimization)
|
|
7
|
+
- GRPO (Group Relative Preference Optimization)
|
|
8
|
+
- GSPO (Group Sequence Policy Optimization)
|
|
9
|
+
|
|
10
|
+
### Flexible Configuration System
|
|
11
|
+
* YAML-based configuration for all training parameters
|
|
12
|
+
* Pre-configured recipes for popular models
|
|
13
|
+
* DeepSpeed ZeRO-3 integration for distributed training
|
|
14
|
+
|
|
15
|
+
## ๐ Quick Start
|
|
16
|
+
|
|
17
|
+
Get started with installation, training, inference, and evaluation in just a few commands:
|
|
18
|
+
|
|
19
|
+
### ๐ฆ Installation
|
|
20
|
+
|
|
21
|
+
#### Option 1: Install from PyPI (Recommended)
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install FAI-RL
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
#### Option 2: Install from source
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
# Clone the repository
|
|
31
|
+
git clone https://github.com/Roblox/FAI-RL.git
|
|
32
|
+
cd FAI-RL
|
|
33
|
+
|
|
34
|
+
# Install in development mode
|
|
35
|
+
pip install -e .
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
#### Option 3: Manual setup with virtual environment
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Clone the repository
|
|
42
|
+
git clone https://github.com/Roblox/FAI-RL.git
|
|
43
|
+
cd FAI-RL
|
|
44
|
+
|
|
45
|
+
# Create virtual environment
|
|
46
|
+
python -m venv venv_fai_rl
|
|
47
|
+
source venv_fai_rl/bin/activate
|
|
48
|
+
|
|
49
|
+
# Install the package
|
|
50
|
+
pip install -e .
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Training
|
|
54
|
+
|
|
55
|
+
Train a model using SFT, DPO, PPO, GRPO, or GSPO:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Single GPU training
|
|
59
|
+
./scripts/run_training.sh \
|
|
60
|
+
--config configs/training/dpo/llama3_3B_recipe.yaml \
|
|
61
|
+
--num-gpus 1
|
|
62
|
+
|
|
63
|
+
# Multi-GPU training (8 GPUs)
|
|
64
|
+
./scripts/run_training.sh \
|
|
65
|
+
--config configs/training/dpo/llama3_3B_recipe.yaml \
|
|
66
|
+
--num-gpus 8 \
|
|
67
|
+
--nohup # Run in background
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Inference
|
|
71
|
+
|
|
72
|
+
Generate responses from your trained models:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# Run inference on trained model
|
|
76
|
+
./scripts/run_inference.sh \
|
|
77
|
+
--config configs/inference/llama3_3B_recipe.yaml
|
|
78
|
+
|
|
79
|
+
# Run inference with debug mode
|
|
80
|
+
./scripts/run_inference.sh \
|
|
81
|
+
--config configs/inference/llama3_3B_recipe.yaml \
|
|
82
|
+
--debug
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Evaluation
|
|
86
|
+
|
|
87
|
+
Evaluate model performance on benchmarks:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# Evaluate on MMLU benchmark
|
|
91
|
+
./scripts/run_evaluation.sh \
|
|
92
|
+
--config configs/evaluation/mmlu/llama3_3B_recipe.yaml
|
|
93
|
+
|
|
94
|
+
# Evaluate with debug output
|
|
95
|
+
./scripts/run_evaluation.sh \
|
|
96
|
+
--config configs/evaluation/mmlu/llama3_3B_recipe.yaml \
|
|
97
|
+
--debug
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
-----
|
|
101
|
+
|
|
102
|
+
## ๐ Project Structure
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
FAI-RL/
|
|
106
|
+
โโโ core/ # Core framework components
|
|
107
|
+
โโโ trainers/ # Training method implementations
|
|
108
|
+
โโโ inference/ # Inference components
|
|
109
|
+
โโโ evaluations/ # Evaluation system
|
|
110
|
+
โโโ configs/ # Configuration files
|
|
111
|
+
โ โโโ training/ # Training configurations
|
|
112
|
+
โ โโโ inference/ # Inference configurations
|
|
113
|
+
โ โโโ evaluation/ # Evaluation configurations
|
|
114
|
+
โ โโโ deepspeed/ # DeepSpeed ZeRO configurations
|
|
115
|
+
โโโ utils/ # Utility modules
|
|
116
|
+
โโโ scripts/ # Scripts
|
|
117
|
+
โโโ logs/ # Training logs (auto-generated)
|
|
118
|
+
โโโ outputs/ # Inference output (auto-generated)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
-----
|
|
122
|
+
|
|
123
|
+
## ๐ Quick Links
|
|
124
|
+
|
|
125
|
+
* **[Training Guide](./trainers/README.md)** - Comprehensive guide to configuring and running model training with detailed parameter explanations
|
|
126
|
+
* **[Inference Guide](./inference/README.md)** - Running model inference and text generation
|
|
127
|
+
* **[Evaluation Guide](./evaluations/README.md)** - Evaluating model performance on standard benchmarks
|
|
128
|
+
|
|
129
|
+
## Algorithm Selection Guide
|
|
130
|
+
|
|
131
|
+
Choose the right algorithm for your use case:
|
|
132
|
+
|
|
133
|
+
| Algorithm | Best For | Requirements | Key Benefits |
|
|
134
|
+
|-----------|----------|--------------|--------------|
|
|
135
|
+
| **SFT** | Initial instruction tuning, domain adaptation | Prompt-response pairs | Simple, fast, establishes baseline |
|
|
136
|
+
| **DPO** | Aligning to human preferences | Preference pairs (chosen/rejected) | No reward model needed, stable training |
|
|
137
|
+
| **PPO** | Complex sequential tasks, agentic workflows | Preference pairs + reward model | Most flexible, handles multi-turn interactions |
|
|
138
|
+
| **GRPO** | Math reasoning, efficiency-focused tasks | Question-answer pairs | No critic model, faster training |
|
|
139
|
+
| **GSPO** | Multi-turn RL, stable sequence-level optimization | Question-answer pairs | Better stability than GRPO |
|
|
140
|
+
|
|
141
|
+
## Memory Optimization
|
|
142
|
+
|
|
143
|
+
FAI-RL supports various techniques to train large models efficiently:
|
|
144
|
+
|
|
145
|
+
* **Full Fine-tuning:** Train all model parameters (requires most memory)
|
|
146
|
+
* **LoRA:** Parameter-efficient training (~10% memory of full fine-tuning)
|
|
147
|
+
* **QLoRA:** 4-bit quantized LoRA (train 7B+ models on single consumer GPU)
|
|
148
|
+
* **DeepSpeed ZeRO-3:** Distributed training for models that don't fit on single GPU
|
|
149
|
+
|
|
150
|
+
## ๐งช Tested Environment
|
|
151
|
+
|
|
152
|
+
This framework has been validated on:
|
|
153
|
+
|
|
154
|
+
* **Instance:** AWS EC2 p4d.24xlarge
|
|
155
|
+
* **GPUs:** 8 x NVIDIA A100-SXM4-80GB (80GB VRAM each)
|
|
156
|
+
* **CPU:** 96 vCPUs
|
|
157
|
+
* **Memory:** 1152 GiB
|
|
158
|
+
* **Storage:** 8TB NVMe SSD
|
|
159
|
+
* **Network:** 400 Gbps
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Configuration files and templates."""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"zero_optimization": {
|
|
3
|
+
"stage": 3,
|
|
4
|
+
"offload_optimizer": {
|
|
5
|
+
"device": "cpu",
|
|
6
|
+
"pin_memory": true
|
|
7
|
+
},
|
|
8
|
+
"offload_param": {
|
|
9
|
+
"device": "cpu",
|
|
10
|
+
"pin_memory": true
|
|
11
|
+
},
|
|
12
|
+
"overlap_comm": true,
|
|
13
|
+
"contiguous_gradients": true,
|
|
14
|
+
"reduce_bucket_size": 5e8,
|
|
15
|
+
"stage3_prefetch_bucket_size": 5e8,
|
|
16
|
+
"stage3_param_persistence_threshold": 1e6,
|
|
17
|
+
"sub_group_size": 1e9,
|
|
18
|
+
"stage3_max_live_parameters": 1e9,
|
|
19
|
+
"stage3_max_reuse_distance": 1e9,
|
|
20
|
+
"stage3_gather_16bit_weights_on_model_save": true
|
|
21
|
+
},
|
|
22
|
+
"gradient_accumulation_steps": 16,
|
|
23
|
+
"gradient_clipping": 1.0,
|
|
24
|
+
"steps_per_print": 10,
|
|
25
|
+
"train_batch_size": 16,
|
|
26
|
+
"train_micro_batch_size_per_gpu": 1,
|
|
27
|
+
"wall_clock_breakdown": false,
|
|
28
|
+
"bf16": {
|
|
29
|
+
"enabled": true
|
|
30
|
+
}
|
|
31
|
+
}
|