FAI-RL 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. fai_rl-0.1.0/FAI_RL.egg-info/PKG-INFO +203 -0
  2. fai_rl-0.1.0/FAI_RL.egg-info/SOURCES.txt +59 -0
  3. fai_rl-0.1.0/FAI_RL.egg-info/dependency_links.txt +1 -0
  4. fai_rl-0.1.0/FAI_RL.egg-info/entry_points.txt +4 -0
  5. fai_rl-0.1.0/FAI_RL.egg-info/requires.txt +21 -0
  6. fai_rl-0.1.0/FAI_RL.egg-info/top_level.txt +8 -0
  7. fai_rl-0.1.0/PKG-INFO +203 -0
  8. fai_rl-0.1.0/README.md +159 -0
  9. fai_rl-0.1.0/configs/__init__.py +1 -0
  10. fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu1.json +31 -0
  11. fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu2.json +31 -0
  12. fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu4.json +31 -0
  13. fai_rl-0.1.0/configs/deepspeed/zero3_config_gpu8.json +31 -0
  14. fai_rl-0.1.0/configs/evaluation/mmlu/llama3_3B_recipe.yaml +32 -0
  15. fai_rl-0.1.0/configs/inference/llama3_3B_recipe.yaml +54 -0
  16. fai_rl-0.1.0/configs/training/dpo/llama3_3B_full_recipe.yaml +62 -0
  17. fai_rl-0.1.0/configs/training/dpo/llama3_3B_lora_recipe.yaml +77 -0
  18. fai_rl-0.1.0/configs/training/dpo/llama3_3B_qlora_recipe.yaml +82 -0
  19. fai_rl-0.1.0/configs/training/grpo/llama3_3B_full_recipe.yaml +60 -0
  20. fai_rl-0.1.0/configs/training/grpo/llama3_3B_lora_recipe.yaml +76 -0
  21. fai_rl-0.1.0/configs/training/gspo/llama3_3B_full_recipe.yaml +68 -0
  22. fai_rl-0.1.0/configs/training/gspo/llama3_3B_lora_recipe.yaml +84 -0
  23. fai_rl-0.1.0/configs/training/ppo/llama3_3B_full_recipe.yaml +54 -0
  24. fai_rl-0.1.0/configs/training/ppo/llama3_3B_lora_recipe.yaml +69 -0
  25. fai_rl-0.1.0/configs/training/ppo/llama3_3B_qlora_recipe.yaml +74 -0
  26. fai_rl-0.1.0/configs/training/sft/llama3_3B_full_recipe.yaml +74 -0
  27. fai_rl-0.1.0/configs/training/sft/llama3_3B_lora_recipe.yaml +89 -0
  28. fai_rl-0.1.0/configs/training/sft/llama3_3B_qlora_recipe.yaml +94 -0
  29. fai_rl-0.1.0/core/__init__.py +17 -0
  30. fai_rl-0.1.0/core/config.py +316 -0
  31. fai_rl-0.1.0/core/model_utils.py +165 -0
  32. fai_rl-0.1.0/core/trainer_base.py +257 -0
  33. fai_rl-0.1.0/evaluations/README.md +69 -0
  34. fai_rl-0.1.0/evaluations/__init__.py +0 -0
  35. fai_rl-0.1.0/evaluations/eval.py +522 -0
  36. fai_rl-0.1.0/inference/README.md +61 -0
  37. fai_rl-0.1.0/inference/__init__.py +0 -0
  38. fai_rl-0.1.0/inference/inference.py +506 -0
  39. fai_rl-0.1.0/pyproject.toml +83 -0
  40. fai_rl-0.1.0/scripts/run_evaluation.sh +88 -0
  41. fai_rl-0.1.0/scripts/run_inference.py +100 -0
  42. fai_rl-0.1.0/scripts/run_inference.sh +95 -0
  43. fai_rl-0.1.0/scripts/run_training.sh +183 -0
  44. fai_rl-0.1.0/scripts/train.py +110 -0
  45. fai_rl-0.1.0/setup.cfg +4 -0
  46. fai_rl-0.1.0/setup.py +11 -0
  47. fai_rl-0.1.0/trainers/README.md +203 -0
  48. fai_rl-0.1.0/trainers/__init__.py +16 -0
  49. fai_rl-0.1.0/trainers/dpo_trainer.py +186 -0
  50. fai_rl-0.1.0/trainers/grpo_trainer.py +175 -0
  51. fai_rl-0.1.0/trainers/gspo_trainer.py +183 -0
  52. fai_rl-0.1.0/trainers/ppo_trainer.py +363 -0
  53. fai_rl-0.1.0/trainers/rewards/__init__.py +4 -0
  54. fai_rl-0.1.0/trainers/rewards/accuracy_rewards.py +48 -0
  55. fai_rl-0.1.0/trainers/rewards/format_rewards.py +24 -0
  56. fai_rl-0.1.0/trainers/sft_trainer.py +171 -0
  57. fai_rl-0.1.0/trainers/templates/__init__.py +5 -0
  58. fai_rl-0.1.0/trainers/templates/gsm8k_template.py +99 -0
  59. fai_rl-0.1.0/trainers/templates/openmathinstruct_template.py +94 -0
  60. fai_rl-0.1.0/utils/__init__.py +11 -0
  61. fai_rl-0.1.0/utils/logging_utils.py +147 -0
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: FAI-RL
3
+ Version: 0.1.0
4
+ Summary: Foundation of AI - Reinforcement learning Library
5
+ Author-email: Roblox <ylim@roblox.com>, Roblox <mnandwana@roblox.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Roblox/FAI-RL
8
+ Project-URL: Documentation, https://github.com/Roblox/FAI-RL#readme
9
+ Project-URL: Repository, https://github.com/Roblox/FAI-RL
10
+ Project-URL: Issues, https://github.com/Roblox/FAI-RL/issues
11
+ Keywords: reinforcement learning,language models,transformers,rlhf,dpo,ppo,sft
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: torch>=2.7.0
25
+ Requires-Dist: torchvision>=0.22.0
26
+ Requires-Dist: torchaudio>=2.7.0
27
+ Requires-Dist: datasets>=4.0.0
28
+ Requires-Dist: transformers>=4.56.0
29
+ Requires-Dist: trl>=0.23.0
30
+ Requires-Dist: wandb>=0.21.0
31
+ Requires-Dist: bitsandbytes>=0.46.0
32
+ Requires-Dist: peft>=0.17.0
33
+ Requires-Dist: deepspeed>=0.17.0
34
+ Requires-Dist: ipykernel>=6.30.0
35
+ Requires-Dist: ipywidgets>=8.1.0
36
+ Requires-Dist: fsspec>=2025.3.0
37
+ Requires-Dist: huggingface_hub>=0.34.0
38
+ Requires-Dist: mpi4py>=4.1.0
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
41
+ Requires-Dist: black>=22.0.0; extra == "dev"
42
+ Requires-Dist: flake8>=4.0.0; extra == "dev"
43
+ Requires-Dist: mypy>=0.950; extra == "dev"
44
+
45
+ # FAI-RL: Foundation of AI - Reinforcement learning Library
46
+
47
+ A modular, production-ready library designed for **easy training, inference, and evaluation** of language models using reinforcement learning methods. Currently supports:
48
+ - SFT (Supervised Fine-Tuning)
49
+ - DPO (Direct Preference Optimization)
50
+ - PPO (Proximal Policy Optimization)
51
+ - GRPO (Group Relative Preference Optimization)
52
+ - GSPO (Group Sequence Policy Optimization)
53
+
54
+ ### Flexible Configuration System
55
+ * YAML-based configuration for all training parameters
56
+ * Pre-configured recipes for popular models
57
+ * DeepSpeed ZeRO-3 integration for distributed training
58
+
59
+ ## ๐Ÿš€ Quick Start
60
+
61
+ Get started with installation, training, inference, and evaluation in just a few commands:
62
+
63
+ ### ๐Ÿ“ฆ Installation
64
+
65
+ #### Option 1: Install from PyPI (Recommended)
66
+
67
+ ```bash
68
+ pip install FAI-RL
69
+ ```
70
+
71
+ #### Option 2: Install from source
72
+
73
+ ```bash
74
+ # Clone the repository
75
+ git clone https://github.com/Roblox/FAI-RL.git
76
+ cd FAI-RL
77
+
78
+ # Install in development mode
79
+ pip install -e .
80
+ ```
81
+
82
+ #### Option 3: Manual setup with virtual environment
83
+
84
+ ```bash
85
+ # Clone the repository
86
+ git clone https://github.com/Roblox/FAI-RL.git
87
+ cd FAI-RL
88
+
89
+ # Create virtual environment
90
+ python -m venv venv_fai_rl
91
+ source venv_fai_rl/bin/activate
92
+
93
+ # Install the package
94
+ pip install -e .
95
+ ```
96
+
97
+ ### Training
98
+
99
+ Train a model using SFT, DPO, PPO, GRPO, or GSPO:
100
+
101
+ ```bash
102
+ # Single GPU training
103
+ ./scripts/run_training.sh \
104
+ --config configs/training/dpo/llama3_3B_recipe.yaml \
105
+ --num-gpus 1
106
+
107
+ # Multi-GPU training (8 GPUs)
108
+ ./scripts/run_training.sh \
109
+ --config configs/training/dpo/llama3_3B_recipe.yaml \
110
+ --num-gpus 8 \
111
+ --nohup # Run in background
112
+ ```
113
+
114
+ ### Inference
115
+
116
+ Generate responses from your trained models:
117
+
118
+ ```bash
119
+ # Run inference on trained model
120
+ ./scripts/run_inference.sh \
121
+ --config configs/inference/llama3_3B_recipe.yaml
122
+
123
+ # Run inference with debug mode
124
+ ./scripts/run_inference.sh \
125
+ --config configs/inference/llama3_3B_recipe.yaml \
126
+ --debug
127
+ ```
128
+
129
+ ### Evaluation
130
+
131
+ Evaluate model performance on benchmarks:
132
+
133
+ ```bash
134
+ # Evaluate on MMLU benchmark
135
+ ./scripts/run_evaluation.sh \
136
+ --config configs/evaluation/mmlu/llama3_3B_recipe.yaml
137
+
138
+ # Evaluate with debug output
139
+ ./scripts/run_evaluation.sh \
140
+ --config configs/evaluation/mmlu/llama3_3B_recipe.yaml \
141
+ --debug
142
+ ```
143
+
144
+ -----
145
+
146
+ ## ๐Ÿ“ Project Structure
147
+
148
+ ```
149
+ FAI-RL/
150
+ โ”œโ”€โ”€ core/ # Core framework components
151
+ โ”œโ”€โ”€ trainers/ # Training method implementations
152
+ โ”œโ”€โ”€ inference/ # Inference components
153
+ โ”œโ”€โ”€ evaluations/ # Evaluation system
154
+ โ”œโ”€โ”€ configs/ # Configuration files
155
+ โ”‚ โ”œโ”€โ”€ training/ # Training configurations
156
+ โ”‚ โ”œโ”€โ”€ inference/ # Inference configurations
157
+ โ”‚ โ”œโ”€โ”€ evaluation/ # Evaluation configurations
158
+ โ”‚ โ””โ”€โ”€ deepspeed/ # DeepSpeed ZeRO configurations
159
+ โ”œโ”€โ”€ utils/ # Utility modules
160
+ โ”œโ”€โ”€ scripts/ # Scripts
161
+ โ”œโ”€โ”€ logs/ # Training logs (auto-generated)
162
+ โ””โ”€โ”€ outputs/ # Inference output (auto-generated)
163
+ ```
164
+
165
+ -----
166
+
167
+ ## ๐Ÿ”— Quick Links
168
+
169
+ * **[Training Guide](./trainers/README.md)** - Comprehensive guide to configuring and running model training with detailed parameter explanations
170
+ * **[Inference Guide](./inference/README.md)** - Running model inference and text generation
171
+ * **[Evaluation Guide](./evaluations/README.md)** - Evaluating model performance on standard benchmarks
172
+
173
+ ## Algorithm Selection Guide
174
+
175
+ Choose the right algorithm for your use case:
176
+
177
+ | Algorithm | Best For | Requirements | Key Benefits |
178
+ |-----------|----------|--------------|--------------|
179
+ | **SFT** | Initial instruction tuning, domain adaptation | Prompt-response pairs | Simple, fast, establishes baseline |
180
+ | **DPO** | Aligning to human preferences | Preference pairs (chosen/rejected) | No reward model needed, stable training |
181
+ | **PPO** | Complex sequential tasks, agentic workflows | Preference pairs + reward model | Most flexible, handles multi-turn interactions |
182
+ | **GRPO** | Math reasoning, efficiency-focused tasks | Question-answer pairs | No critic model, faster training |
183
+ | **GSPO** | Multi-turn RL, stable sequence-level optimization | Question-answer pairs | Better stability than GRPO |
184
+
185
+ ## Memory Optimization
186
+
187
+ FAI-RL supports various techniques to train large models efficiently:
188
+
189
+ * **Full Fine-tuning:** Train all model parameters (requires most memory)
190
+ * **LoRA:** Parameter-efficient training (~10% memory of full fine-tuning)
191
+ * **QLoRA:** 4-bit quantized LoRA (train 7B+ models on single consumer GPU)
192
+ * **DeepSpeed ZeRO-3:** Distributed training for models that don't fit on single GPU
193
+
194
+ ## ๐Ÿงช Tested Environment
195
+
196
+ This framework has been validated on:
197
+
198
+ * **Instance:** AWS EC2 p4d.24xlarge
199
+ * **GPUs:** 8 x NVIDIA A100-SXM4-80GB (80GB VRAM each)
200
+ * **CPU:** 96 vCPUs
201
+ * **Memory:** 1152 GiB
202
+ * **Storage:** 8TB NVMe SSD
203
+ * **Network:** 400 Gbps
@@ -0,0 +1,59 @@
1
+ README.md
2
+ pyproject.toml
3
+ setup.py
4
+ FAI_RL.egg-info/PKG-INFO
5
+ FAI_RL.egg-info/SOURCES.txt
6
+ FAI_RL.egg-info/dependency_links.txt
7
+ FAI_RL.egg-info/entry_points.txt
8
+ FAI_RL.egg-info/requires.txt
9
+ FAI_RL.egg-info/top_level.txt
10
+ configs/__init__.py
11
+ configs/deepspeed/zero3_config_gpu1.json
12
+ configs/deepspeed/zero3_config_gpu2.json
13
+ configs/deepspeed/zero3_config_gpu4.json
14
+ configs/deepspeed/zero3_config_gpu8.json
15
+ configs/evaluation/mmlu/llama3_3B_recipe.yaml
16
+ configs/inference/llama3_3B_recipe.yaml
17
+ configs/training/dpo/llama3_3B_full_recipe.yaml
18
+ configs/training/dpo/llama3_3B_lora_recipe.yaml
19
+ configs/training/dpo/llama3_3B_qlora_recipe.yaml
20
+ configs/training/grpo/llama3_3B_full_recipe.yaml
21
+ configs/training/grpo/llama3_3B_lora_recipe.yaml
22
+ configs/training/gspo/llama3_3B_full_recipe.yaml
23
+ configs/training/gspo/llama3_3B_lora_recipe.yaml
24
+ configs/training/ppo/llama3_3B_full_recipe.yaml
25
+ configs/training/ppo/llama3_3B_lora_recipe.yaml
26
+ configs/training/ppo/llama3_3B_qlora_recipe.yaml
27
+ configs/training/sft/llama3_3B_full_recipe.yaml
28
+ configs/training/sft/llama3_3B_lora_recipe.yaml
29
+ configs/training/sft/llama3_3B_qlora_recipe.yaml
30
+ core/__init__.py
31
+ core/config.py
32
+ core/model_utils.py
33
+ core/trainer_base.py
34
+ evaluations/README.md
35
+ evaluations/__init__.py
36
+ evaluations/eval.py
37
+ inference/README.md
38
+ inference/__init__.py
39
+ inference/inference.py
40
+ scripts/run_evaluation.sh
41
+ scripts/run_inference.py
42
+ scripts/run_inference.sh
43
+ scripts/run_training.sh
44
+ scripts/train.py
45
+ trainers/README.md
46
+ trainers/__init__.py
47
+ trainers/dpo_trainer.py
48
+ trainers/grpo_trainer.py
49
+ trainers/gspo_trainer.py
50
+ trainers/ppo_trainer.py
51
+ trainers/sft_trainer.py
52
+ trainers/rewards/__init__.py
53
+ trainers/rewards/accuracy_rewards.py
54
+ trainers/rewards/format_rewards.py
55
+ trainers/templates/__init__.py
56
+ trainers/templates/gsm8k_template.py
57
+ trainers/templates/openmathinstruct_template.py
58
+ utils/__init__.py
59
+ utils/logging_utils.py
@@ -0,0 +1,4 @@
1
+ [console_scripts]
2
+ fai-rl-eval = evaluations.eval:main
3
+ fai-rl-inference = scripts.run_inference:main
4
+ fai-rl-train = scripts.train:main
@@ -0,0 +1,21 @@
1
+ torch>=2.7.0
2
+ torchvision>=0.22.0
3
+ torchaudio>=2.7.0
4
+ datasets>=4.0.0
5
+ transformers>=4.56.0
6
+ trl>=0.23.0
7
+ wandb>=0.21.0
8
+ bitsandbytes>=0.46.0
9
+ peft>=0.17.0
10
+ deepspeed>=0.17.0
11
+ ipykernel>=6.30.0
12
+ ipywidgets>=8.1.0
13
+ fsspec>=2025.3.0
14
+ huggingface_hub>=0.34.0
15
+ mpi4py>=4.1.0
16
+
17
+ [dev]
18
+ pytest>=7.0.0
19
+ black>=22.0.0
20
+ flake8>=4.0.0
21
+ mypy>=0.950
@@ -0,0 +1,8 @@
1
+ configs
2
+ core
3
+ dist
4
+ evaluations
5
+ inference
6
+ scripts
7
+ trainers
8
+ utils
fai_rl-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: FAI-RL
3
+ Version: 0.1.0
4
+ Summary: Foundation of AI - Reinforcement learning Library
5
+ Author-email: Roblox <ylim@roblox.com>, Roblox <mnandwana@roblox.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Roblox/FAI-RL
8
+ Project-URL: Documentation, https://github.com/Roblox/FAI-RL#readme
9
+ Project-URL: Repository, https://github.com/Roblox/FAI-RL
10
+ Project-URL: Issues, https://github.com/Roblox/FAI-RL/issues
11
+ Keywords: reinforcement learning,language models,transformers,rlhf,dpo,ppo,sft
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: torch>=2.7.0
25
+ Requires-Dist: torchvision>=0.22.0
26
+ Requires-Dist: torchaudio>=2.7.0
27
+ Requires-Dist: datasets>=4.0.0
28
+ Requires-Dist: transformers>=4.56.0
29
+ Requires-Dist: trl>=0.23.0
30
+ Requires-Dist: wandb>=0.21.0
31
+ Requires-Dist: bitsandbytes>=0.46.0
32
+ Requires-Dist: peft>=0.17.0
33
+ Requires-Dist: deepspeed>=0.17.0
34
+ Requires-Dist: ipykernel>=6.30.0
35
+ Requires-Dist: ipywidgets>=8.1.0
36
+ Requires-Dist: fsspec>=2025.3.0
37
+ Requires-Dist: huggingface_hub>=0.34.0
38
+ Requires-Dist: mpi4py>=4.1.0
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
41
+ Requires-Dist: black>=22.0.0; extra == "dev"
42
+ Requires-Dist: flake8>=4.0.0; extra == "dev"
43
+ Requires-Dist: mypy>=0.950; extra == "dev"
44
+
45
+ # FAI-RL: Foundation of AI - Reinforcement learning Library
46
+
47
+ A modular, production-ready library designed for **easy training, inference, and evaluation** of language models using reinforcement learning methods. Currently supports:
48
+ - SFT (Supervised Fine-Tuning)
49
+ - DPO (Direct Preference Optimization)
50
+ - PPO (Proximal Policy Optimization)
51
+ - GRPO (Group Relative Preference Optimization)
52
+ - GSPO (Group Sequence Policy Optimization)
53
+
54
+ ### Flexible Configuration System
55
+ * YAML-based configuration for all training parameters
56
+ * Pre-configured recipes for popular models
57
+ * DeepSpeed ZeRO-3 integration for distributed training
58
+
59
+ ## ๐Ÿš€ Quick Start
60
+
61
+ Get started with installation, training, inference, and evaluation in just a few commands:
62
+
63
+ ### ๐Ÿ“ฆ Installation
64
+
65
+ #### Option 1: Install from PyPI (Recommended)
66
+
67
+ ```bash
68
+ pip install FAI-RL
69
+ ```
70
+
71
+ #### Option 2: Install from source
72
+
73
+ ```bash
74
+ # Clone the repository
75
+ git clone https://github.com/Roblox/FAI-RL.git
76
+ cd FAI-RL
77
+
78
+ # Install in development mode
79
+ pip install -e .
80
+ ```
81
+
82
+ #### Option 3: Manual setup with virtual environment
83
+
84
+ ```bash
85
+ # Clone the repository
86
+ git clone https://github.com/Roblox/FAI-RL.git
87
+ cd FAI-RL
88
+
89
+ # Create virtual environment
90
+ python -m venv venv_fai_rl
91
+ source venv_fai_rl/bin/activate
92
+
93
+ # Install the package
94
+ pip install -e .
95
+ ```
96
+
97
+ ### Training
98
+
99
+ Train a model using SFT, DPO, PPO, GRPO, or GSPO:
100
+
101
+ ```bash
102
+ # Single GPU training
103
+ ./scripts/run_training.sh \
104
+ --config configs/training/dpo/llama3_3B_recipe.yaml \
105
+ --num-gpus 1
106
+
107
+ # Multi-GPU training (8 GPUs)
108
+ ./scripts/run_training.sh \
109
+ --config configs/training/dpo/llama3_3B_recipe.yaml \
110
+ --num-gpus 8 \
111
+ --nohup # Run in background
112
+ ```
113
+
114
+ ### Inference
115
+
116
+ Generate responses from your trained models:
117
+
118
+ ```bash
119
+ # Run inference on trained model
120
+ ./scripts/run_inference.sh \
121
+ --config configs/inference/llama3_3B_recipe.yaml
122
+
123
+ # Run inference with debug mode
124
+ ./scripts/run_inference.sh \
125
+ --config configs/inference/llama3_3B_recipe.yaml \
126
+ --debug
127
+ ```
128
+
129
+ ### Evaluation
130
+
131
+ Evaluate model performance on benchmarks:
132
+
133
+ ```bash
134
+ # Evaluate on MMLU benchmark
135
+ ./scripts/run_evaluation.sh \
136
+ --config configs/evaluation/mmlu/llama3_3B_recipe.yaml
137
+
138
+ # Evaluate with debug output
139
+ ./scripts/run_evaluation.sh \
140
+ --config configs/evaluation/mmlu/llama3_3B_recipe.yaml \
141
+ --debug
142
+ ```
143
+
144
+ -----
145
+
146
+ ## ๐Ÿ“ Project Structure
147
+
148
+ ```
149
+ FAI-RL/
150
+ โ”œโ”€โ”€ core/ # Core framework components
151
+ โ”œโ”€โ”€ trainers/ # Training method implementations
152
+ โ”œโ”€โ”€ inference/ # Inference components
153
+ โ”œโ”€โ”€ evaluations/ # Evaluation system
154
+ โ”œโ”€โ”€ configs/ # Configuration files
155
+ โ”‚ โ”œโ”€โ”€ training/ # Training configurations
156
+ โ”‚ โ”œโ”€โ”€ inference/ # Inference configurations
157
+ โ”‚ โ”œโ”€โ”€ evaluation/ # Evaluation configurations
158
+ โ”‚ โ””โ”€โ”€ deepspeed/ # DeepSpeed ZeRO configurations
159
+ โ”œโ”€โ”€ utils/ # Utility modules
160
+ โ”œโ”€โ”€ scripts/ # Scripts
161
+ โ”œโ”€โ”€ logs/ # Training logs (auto-generated)
162
+ โ””โ”€โ”€ outputs/ # Inference output (auto-generated)
163
+ ```
164
+
165
+ -----
166
+
167
+ ## ๐Ÿ”— Quick Links
168
+
169
+ * **[Training Guide](./trainers/README.md)** - Comprehensive guide to configuring and running model training with detailed parameter explanations
170
+ * **[Inference Guide](./inference/README.md)** - Running model inference and text generation
171
+ * **[Evaluation Guide](./evaluations/README.md)** - Evaluating model performance on standard benchmarks
172
+
173
+ ## Algorithm Selection Guide
174
+
175
+ Choose the right algorithm for your use case:
176
+
177
+ | Algorithm | Best For | Requirements | Key Benefits |
178
+ |-----------|----------|--------------|--------------|
179
+ | **SFT** | Initial instruction tuning, domain adaptation | Prompt-response pairs | Simple, fast, establishes baseline |
180
+ | **DPO** | Aligning to human preferences | Preference pairs (chosen/rejected) | No reward model needed, stable training |
181
+ | **PPO** | Complex sequential tasks, agentic workflows | Preference pairs + reward model | Most flexible, handles multi-turn interactions |
182
+ | **GRPO** | Math reasoning, efficiency-focused tasks | Question-answer pairs | No critic model, faster training |
183
+ | **GSPO** | Multi-turn RL, stable sequence-level optimization | Question-answer pairs | Better stability than GRPO |
184
+
185
+ ## Memory Optimization
186
+
187
+ FAI-RL supports various techniques to train large models efficiently:
188
+
189
+ * **Full Fine-tuning:** Train all model parameters (requires most memory)
190
+ * **LoRA:** Parameter-efficient training (~10% memory of full fine-tuning)
191
+ * **QLoRA:** 4-bit quantized LoRA (train 7B+ models on single consumer GPU)
192
+ * **DeepSpeed ZeRO-3:** Distributed training for models that don't fit on single GPU
193
+
194
+ ## ๐Ÿงช Tested Environment
195
+
196
+ This framework has been validated on:
197
+
198
+ * **Instance:** AWS EC2 p4d.24xlarge
199
+ * **GPUs:** 8 x NVIDIA A100-SXM4-80GB (80GB VRAM each)
200
+ * **CPU:** 96 vCPUs
201
+ * **Memory:** 1152 GiB
202
+ * **Storage:** 8TB NVMe SSD
203
+ * **Network:** 400 Gbps
fai_rl-0.1.0/README.md ADDED
@@ -0,0 +1,159 @@
1
+ # FAI-RL: Foundation of AI - Reinforcement learning Library
2
+
3
+ A modular, production-ready library designed for **easy training, inference, and evaluation** of language models using reinforcement learning methods. Currently supports:
4
+ - SFT (Supervised Fine-Tuning)
5
+ - DPO (Direct Preference Optimization)
6
+ - PPO (Proximal Policy Optimization)
7
+ - GRPO (Group Relative Preference Optimization)
8
+ - GSPO (Group Sequence Policy Optimization)
9
+
10
+ ### Flexible Configuration System
11
+ * YAML-based configuration for all training parameters
12
+ * Pre-configured recipes for popular models
13
+ * DeepSpeed ZeRO-3 integration for distributed training
14
+
15
+ ## ๐Ÿš€ Quick Start
16
+
17
+ Get started with installation, training, inference, and evaluation in just a few commands:
18
+
19
+ ### ๐Ÿ“ฆ Installation
20
+
21
+ #### Option 1: Install from PyPI (Recommended)
22
+
23
+ ```bash
24
+ pip install FAI-RL
25
+ ```
26
+
27
+ #### Option 2: Install from source
28
+
29
+ ```bash
30
+ # Clone the repository
31
+ git clone https://github.com/Roblox/FAI-RL.git
32
+ cd FAI-RL
33
+
34
+ # Install in development mode
35
+ pip install -e .
36
+ ```
37
+
38
+ #### Option 3: Manual setup with virtual environment
39
+
40
+ ```bash
41
+ # Clone the repository
42
+ git clone https://github.com/Roblox/FAI-RL.git
43
+ cd FAI-RL
44
+
45
+ # Create virtual environment
46
+ python -m venv venv_fai_rl
47
+ source venv_fai_rl/bin/activate
48
+
49
+ # Install the package
50
+ pip install -e .
51
+ ```
52
+
53
+ ### Training
54
+
55
+ Train a model using SFT, DPO, PPO, GRPO, or GSPO:
56
+
57
+ ```bash
58
+ # Single GPU training
59
+ ./scripts/run_training.sh \
60
+ --config configs/training/dpo/llama3_3B_recipe.yaml \
61
+ --num-gpus 1
62
+
63
+ # Multi-GPU training (8 GPUs)
64
+ ./scripts/run_training.sh \
65
+ --config configs/training/dpo/llama3_3B_recipe.yaml \
66
+ --num-gpus 8 \
67
+ --nohup # Run in background
68
+ ```
69
+
70
+ ### Inference
71
+
72
+ Generate responses from your trained models:
73
+
74
+ ```bash
75
+ # Run inference on trained model
76
+ ./scripts/run_inference.sh \
77
+ --config configs/inference/llama3_3B_recipe.yaml
78
+
79
+ # Run inference with debug mode
80
+ ./scripts/run_inference.sh \
81
+ --config configs/inference/llama3_3B_recipe.yaml \
82
+ --debug
83
+ ```
84
+
85
+ ### Evaluation
86
+
87
+ Evaluate model performance on benchmarks:
88
+
89
+ ```bash
90
+ # Evaluate on MMLU benchmark
91
+ ./scripts/run_evaluation.sh \
92
+ --config configs/evaluation/mmlu/llama3_3B_recipe.yaml
93
+
94
+ # Evaluate with debug output
95
+ ./scripts/run_evaluation.sh \
96
+ --config configs/evaluation/mmlu/llama3_3B_recipe.yaml \
97
+ --debug
98
+ ```
99
+
100
+ -----
101
+
102
+ ## ๐Ÿ“ Project Structure
103
+
104
+ ```
105
+ FAI-RL/
106
+ โ”œโ”€โ”€ core/ # Core framework components
107
+ โ”œโ”€โ”€ trainers/ # Training method implementations
108
+ โ”œโ”€โ”€ inference/ # Inference components
109
+ โ”œโ”€โ”€ evaluations/ # Evaluation system
110
+ โ”œโ”€โ”€ configs/ # Configuration files
111
+ โ”‚ โ”œโ”€โ”€ training/ # Training configurations
112
+ โ”‚ โ”œโ”€โ”€ inference/ # Inference configurations
113
+ โ”‚ โ”œโ”€โ”€ evaluation/ # Evaluation configurations
114
+ โ”‚ โ””โ”€โ”€ deepspeed/ # DeepSpeed ZeRO configurations
115
+ โ”œโ”€โ”€ utils/ # Utility modules
116
+ โ”œโ”€โ”€ scripts/ # Scripts
117
+ โ”œโ”€โ”€ logs/ # Training logs (auto-generated)
118
+ โ””โ”€โ”€ outputs/ # Inference output (auto-generated)
119
+ ```
120
+
121
+ -----
122
+
123
+ ## ๐Ÿ”— Quick Links
124
+
125
+ * **[Training Guide](./trainers/README.md)** - Comprehensive guide to configuring and running model training with detailed parameter explanations
126
+ * **[Inference Guide](./inference/README.md)** - Running model inference and text generation
127
+ * **[Evaluation Guide](./evaluations/README.md)** - Evaluating model performance on standard benchmarks
128
+
129
+ ## Algorithm Selection Guide
130
+
131
+ Choose the right algorithm for your use case:
132
+
133
+ | Algorithm | Best For | Requirements | Key Benefits |
134
+ |-----------|----------|--------------|--------------|
135
+ | **SFT** | Initial instruction tuning, domain adaptation | Prompt-response pairs | Simple, fast, establishes baseline |
136
+ | **DPO** | Aligning to human preferences | Preference pairs (chosen/rejected) | No reward model needed, stable training |
137
+ | **PPO** | Complex sequential tasks, agentic workflows | Preference pairs + reward model | Most flexible, handles multi-turn interactions |
138
+ | **GRPO** | Math reasoning, efficiency-focused tasks | Question-answer pairs | No critic model, faster training |
139
+ | **GSPO** | Multi-turn RL, stable sequence-level optimization | Question-answer pairs | Better stability than GRPO |
140
+
141
+ ## Memory Optimization
142
+
143
+ FAI-RL supports various techniques to train large models efficiently:
144
+
145
+ * **Full Fine-tuning:** Train all model parameters (requires most memory)
146
+ * **LoRA:** Parameter-efficient training (~10% memory of full fine-tuning)
147
+ * **QLoRA:** 4-bit quantized LoRA (train 7B+ models on single consumer GPU)
148
+ * **DeepSpeed ZeRO-3:** Distributed training for models that don't fit on single GPU
149
+
150
+ ## ๐Ÿงช Tested Environment
151
+
152
+ This framework has been validated on:
153
+
154
+ * **Instance:** AWS EC2 p4d.24xlarge
155
+ * **GPUs:** 8 x NVIDIA A100-SXM4-80GB (80GB VRAM each)
156
+ * **CPU:** 96 vCPUs
157
+ * **Memory:** 1152 GiB
158
+ * **Storage:** 8TB NVMe SSD
159
+ * **Network:** 400 Gbps
@@ -0,0 +1 @@
1
+ """Configuration files and templates."""
@@ -0,0 +1,31 @@
1
+ {
2
+ "zero_optimization": {
3
+ "stage": 3,
4
+ "offload_optimizer": {
5
+ "device": "cpu",
6
+ "pin_memory": true
7
+ },
8
+ "offload_param": {
9
+ "device": "cpu",
10
+ "pin_memory": true
11
+ },
12
+ "overlap_comm": true,
13
+ "contiguous_gradients": true,
14
+ "reduce_bucket_size": 5e8,
15
+ "stage3_prefetch_bucket_size": 5e8,
16
+ "stage3_param_persistence_threshold": 1e6,
17
+ "sub_group_size": 1e9,
18
+ "stage3_max_live_parameters": 1e9,
19
+ "stage3_max_reuse_distance": 1e9,
20
+ "stage3_gather_16bit_weights_on_model_save": true
21
+ },
22
+ "gradient_accumulation_steps": 16,
23
+ "gradient_clipping": 1.0,
24
+ "steps_per_print": 10,
25
+ "train_batch_size": 16,
26
+ "train_micro_batch_size_per_gpu": 1,
27
+ "wall_clock_breakdown": false,
28
+ "bf16": {
29
+ "enabled": true
30
+ }
31
+ }