litgpt 0.2.0.dev0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/PKG-INFO +322 -113
- litgpt-0.3.1/README.md +607 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/__init__.py +2 -11
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/__main__.py +14 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/adapter.py +4 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/adapter_v2.py +33 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/args.py +22 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/chat/base.py +38 -9
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/config.py +123 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/__init__.py +2 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/alpaca.py +3 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/alpaca_2k.py +2 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/base.py +2 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/deita.py +2 -3
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/dolly.py +1 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/flan.py +2 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/json_data.py +10 -5
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/lima.py +2 -3
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/lit_data.py +1 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/longform.py +1 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/openwebtext.py +1 -1
- litgpt-0.3.1/litgpt/data/text_files.py +133 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/tinyllama.py +1 -1
- litgpt-0.3.1/litgpt/data/tinystories.py +143 -0
- litgpt-0.3.1/litgpt/deploy/serve.py +168 -0
- litgpt-0.3.1/litgpt/eval/evaluate.py +118 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/finetune/adapter.py +35 -18
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/finetune/adapter_v2.py +35 -18
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/finetune/full.py +33 -15
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/finetune/lora.py +36 -18
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/generate/adapter.py +17 -3
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/generate/adapter_v2.py +17 -3
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/generate/base.py +63 -8
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/generate/full.py +17 -3
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/generate/sequentially.py +17 -3
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/generate/tp.py +16 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/lora.py +52 -50
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/model.py +28 -10
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/pretrain.py +45 -25
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/prompts.py +32 -4
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/scripts/convert_hf_checkpoint.py +12 -3
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/scripts/convert_lit_checkpoint.py +1 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/scripts/download.py +27 -11
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/scripts/merge_lora.py +16 -9
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/tokenizer.py +7 -7
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/utils.py +41 -7
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt.egg-info/PKG-INFO +322 -113
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt.egg-info/SOURCES.txt +11 -1
- litgpt-0.3.1/litgpt.egg-info/requires.txt +33 -0
- litgpt-0.3.1/pyproject.toml +72 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_adapter.py +58 -25
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_adapter_v2.py +65 -47
- litgpt-0.3.1/tests/test_args.py +36 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_chat.py +32 -11
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_ci.py +1 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_cli.py +15 -15
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_config.py +3 -21
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_config_hub.py +12 -7
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_convert_hf_checkpoint.py +3 -7
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_convert_lit_checkpoint.py +26 -126
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_convert_pretrained_checkpoint.py +2 -2
- litgpt-0.3.1/tests/test_evaluate.py +55 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_full.py +8 -7
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_generate.py +26 -11
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_generate_adapter.py +2 -2
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_generate_sequentially.py +6 -15
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_generate_tp.py +6 -10
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_lora.py +141 -86
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_merge_lora.py +14 -13
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_model.py +39 -171
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_pretrain.py +29 -23
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_prompts.py +16 -16
- litgpt-0.3.1/tests/test_readme.py +170 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_rope.py +3 -4
- litgpt-0.3.1/tests/test_serve.py +42 -0
- litgpt-0.3.1/tests/test_thunder_ddp.py +89 -0
- litgpt-0.3.1/tests/test_thunder_fsdp.py +332 -0
- litgpt-0.3.1/tests/test_thunder_pretrain.py +55 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_tokenizer.py +2 -9
- litgpt-0.3.1/tests/test_unsloth_executor.py +172 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/tests/test_utils.py +54 -32
- litgpt-0.2.0.dev0/README.md +0 -404
- litgpt-0.2.0.dev0/litgpt/data/tinystories.py +0 -181
- litgpt-0.2.0.dev0/litgpt.egg-info/requires.txt +0 -25
- litgpt-0.2.0.dev0/pyproject.toml +0 -66
- litgpt-0.2.0.dev0/tests/test_lm_eval_harness.py +0 -91
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/LICENSE +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/chat/__init__.py +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/alpaca_gpt4.py +1 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/prepare_slimpajama.py +1 -1
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/data/prepare_starcoder.py +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/finetune/__init__.py +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/generate/__init__.py +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/scripts/__init__.py +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt/scripts/convert_pretrained_checkpoint.py +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt.egg-info/dependency_links.txt +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt.egg-info/entry_points.txt +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/litgpt.egg-info/top_level.txt +0 -0
- {litgpt-0.2.0.dev0 → litgpt-0.3.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: litgpt
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Hackable implementation of state-of-the-art open-source LLMs
|
|
5
5
|
Author-email: Lightning AI <contact@lightning.ai>
|
|
6
6
|
License: Apache License
|
|
@@ -210,193 +210,394 @@ Project-URL: documentation, https://github.com/lightning-AI/litgpt/tutorials
|
|
|
210
210
|
Description-Content-Type: text/markdown
|
|
211
211
|
License-File: LICENSE
|
|
212
212
|
Requires-Dist: torch>=2.2.0
|
|
213
|
-
Requires-Dist: lightning==2.3.0.
|
|
213
|
+
Requires-Dist: lightning==2.3.0.dev20240428
|
|
214
214
|
Requires-Dist: jsonargparse[signatures]>=4.27.6
|
|
215
215
|
Provides-Extra: test
|
|
216
|
-
Requires-Dist: pytest; extra == "test"
|
|
217
|
-
Requires-Dist: pytest-rerunfailures; extra == "test"
|
|
218
|
-
Requires-Dist: pytest-timeout; extra == "test"
|
|
216
|
+
Requires-Dist: pytest>=8.1.1; extra == "test"
|
|
217
|
+
Requires-Dist: pytest-rerunfailures>=14.0; extra == "test"
|
|
218
|
+
Requires-Dist: pytest-timeout>=2.3.1; extra == "test"
|
|
219
|
+
Requires-Dist: pytest-dependency>=0.6.0; extra == "test"
|
|
219
220
|
Requires-Dist: transformers>=4.38.0; extra == "test"
|
|
220
|
-
Requires-Dist: einops; extra == "test"
|
|
221
|
-
Requires-Dist: protobuf; extra == "test"
|
|
221
|
+
Requires-Dist: einops>=0.7.0; extra == "test"
|
|
222
|
+
Requires-Dist: protobuf>=4.23.4; extra == "test"
|
|
223
|
+
Requires-Dist: lightning-thunder==0.2.0.dev20240505; python_version >= "3.10" and extra == "test"
|
|
222
224
|
Provides-Extra: all
|
|
223
225
|
Requires-Dist: bitsandbytes==0.42.0; extra == "all"
|
|
224
|
-
Requires-Dist: sentencepiece; extra == "all"
|
|
225
|
-
Requires-Dist: tokenizers; extra == "all"
|
|
226
|
-
Requires-Dist:
|
|
227
|
-
Requires-Dist:
|
|
228
|
-
Requires-Dist:
|
|
229
|
-
Requires-Dist: zstandard; extra == "all"
|
|
230
|
-
Requires-Dist: pandas; extra == "all"
|
|
231
|
-
Requires-Dist: pyarrow; extra == "all"
|
|
232
|
-
Requires-Dist: tensorboard; extra == "all"
|
|
233
|
-
Requires-Dist: torchmetrics; extra == "all"
|
|
226
|
+
Requires-Dist: sentencepiece>=0.2.0; extra == "all"
|
|
227
|
+
Requires-Dist: tokenizers>=0.15.2; extra == "all"
|
|
228
|
+
Requires-Dist: requests>=2.31.0; extra == "all"
|
|
229
|
+
Requires-Dist: litdata>=0.2.2; extra == "all"
|
|
230
|
+
Requires-Dist: litserve>=0.1.0; extra == "all"
|
|
231
|
+
Requires-Dist: zstandard>=0.22.0; extra == "all"
|
|
232
|
+
Requires-Dist: pandas>=1.9.0; extra == "all"
|
|
233
|
+
Requires-Dist: pyarrow>=15.0.2; extra == "all"
|
|
234
|
+
Requires-Dist: tensorboard>=2.14.0; extra == "all"
|
|
235
|
+
Requires-Dist: torchmetrics>=1.3.1; extra == "all"
|
|
236
|
+
Requires-Dist: datasets>=2.18.0; extra == "all"
|
|
237
|
+
Requires-Dist: transformers>=4.38.0; extra == "all"
|
|
238
|
+
Requires-Dist: lm-eval>=0.4.2; extra == "all"
|
|
239
|
+
Requires-Dist: safetensors>=0.4.3; extra == "all"
|
|
234
240
|
Requires-Dist: huggingface_hub[hf_transfer]>=0.21.0; extra == "all"
|
|
235
241
|
|
|
236
242
|
<div align="center">
|
|
237
|
-
<img src="https://pl-public-data.s3.amazonaws.com/assets_lightning/LitStableLM_Badge.png" alt="LitGPT" width="128"/>
|
|
238
243
|
|
|
239
|
-
# ⚡ LitGPT
|
|
240
|
-
|
|
241
|
-
<!--
|
|
242
|
-
<p align="center">
|
|
243
|
-
<a href="https://www.lightning.ai/">Lightning.ai</a> •
|
|
244
|
-
<a href="https://lightning.ai/docs/pytorch/stable/">PyTorch Lightning</a> •
|
|
245
|
-
<a href="https://lightning.ai/docs/fabric/stable/">Fabric</a>
|
|
246
|
-
</p>
|
|
247
|
-
-->
|
|
248
|
-
|
|
249
|
-

|
|
250
|
-
 [](https://github.com/Lightning-AI/lit-stablelm/blob/master/LICENSE) [](https://discord.gg/VptPCZkGNa)
|
|
251
|
-
|
|
252
|
-
</div>
|
|
253
244
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
⚡ LitGPT is a hackable [implementation](litgpt/model.py) of state-of-the-art open-source large language models released under the **Apache 2.0 license**.
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
## LitGPT supports
|
|
260
|
-
|
|
261
|
-
✅ [The latest model weights](tutorials/download_model_weights.md): Gemma, Mistral, Mixtral, Phi 2, Llama 2, Falcon, CodeLlama, and [many more](tutorials/download_model_weights.md).
|
|
262
|
-
|
|
263
|
-
✅ Optimized and efficient code: Flash Attention v2, multi-GPU support via fully-sharded data parallelism, [optional CPU offloading](tutorials/oom.md#do-sharding-across-multiple-gpus), and [TPU and XLA support](./xla).
|
|
245
|
+
# ⚡ LitGPT
|
|
264
246
|
|
|
265
|
-
|
|
247
|
+
**Pretrain, finetune, evaluate, and deploy 20+ LLMs on your own data**
|
|
266
248
|
|
|
267
|
-
|
|
249
|
+
Uses the latest state-of-the-art techniques:
|
|
268
250
|
|
|
269
|
-
✅
|
|
251
|
+
✅ flash attention ✅ fp4/8/16/32 ✅ LoRA, QLoRA, Adapter (v1, v2) ✅ FSDP ✅ 1-1000+ GPUs/TPUs
|
|
270
252
|
|
|
271
|
-
|
|
253
|
+
---
|
|
272
254
|
|
|
273
|
-
✅ [Exporting](https://github.com/Lightning-AI/litgpt/blob/wip/tutorials/convert_lit_models.md) to other popular model weight formats.
|
|
274
255
|
|
|
275
|
-
|
|
256
|
+

|
|
257
|
+
 [](https://github.com/Lightning-AI/lit-stablelm/blob/master/LICENSE) [](https://discord.gg/VptPCZkGNa)
|
|
276
258
|
|
|
277
|
-
|
|
259
|
+
<p align="center">
|
|
260
|
+
<a href="https://lightning.ai/">Lightning AI</a> •
|
|
261
|
+
<a href="#choose-from-20-llms">Models</a> •
|
|
262
|
+
<a href="#quick-start">Quick start</a> •
|
|
263
|
+
<a href="#use-an-llm-for-inference">Inference</a> •
|
|
264
|
+
<a href="#finetune-an-llm">Finetune</a> •
|
|
265
|
+
<a href="#finetune-an-llm">Pretrain</a> •
|
|
266
|
+
<a href="#deploy-an-llm">Deploy</a> •
|
|
267
|
+
<a href="#state-of-the-art-features">Features</a> •
|
|
268
|
+
<a href="#training-recipes">Training recipes (YAML)</a>
|
|
269
|
+
</p>
|
|
278
270
|
|
|
271
|
+
</div>
|
|
279
272
|
|
|
280
273
|
|
|
281
|
-
<
|
|
274
|
+
<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/GithubLitGPTDAG2.png" alt="LitGPT steps" width="auto"/>
|
|
282
275
|
|
|
283
276
|
|
|
284
|
-
|
|
277
|
+
# Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡
|
|
278
|
+
LitGPT is a command-line tool designed to easily [finetune](#finetune-an-llm), [pretrain](#pretrain-an-llm), [evaluate](#use-an-llm), and [deploy](#deploy-an-llm) [20+ LLMs](#choose-from-20-llms) **on your own data**. It features highly-optimized [training recipes](#training-recipes) for the world's most powerful open-source large language models (LLMs).
|
|
285
279
|
|
|
286
|
-
|
|
280
|
+
We reimplemented all model architectures and training recipes from scratch for 4 reasons:
|
|
287
281
|
|
|
282
|
+
1. Remove all abstraction layers and have single file implementations.
|
|
283
|
+
2. Guarantee Apache 2.0 compliance to enable enterprise use without limits.
|
|
284
|
+
3. Optimized each model's architectural detail to maximize performance, reduce costs, and speed up training.
|
|
285
|
+
4. Highly-optimized [recipe configs](#training-recipes) we have tested at enterprise scale.
|
|
288
286
|
|
|
289
|
-
|
|
290
|
-
|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
291
|
-
| <p align="left">[Prepare the TinyLlama 1T token dataset](https://lightning.ai/lightning-ai/studios/prepare-the-tinyllama-1t-token-dataset) <br> [<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/3.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/prepare-the-tinyllama-1t-token-dataset) | [Pretrain LLMs - TinyLlama 1.1B](https://lightning.ai/lightning-ai/studios/pretrain-llms-tinyllama-1-1b) <br> <p align="left">[<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/4.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/pretrain-llms-tinyllama-1-1b) |
|
|
292
|
-
| [Continued Pretraining with TinyLlama 1.1B](https://lightning.ai/lightning-ai/studios/continued-pretraining-with-tinyllama-1-1b) <br> <p align="left">[<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/1.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/continued-pretraining-with-tinyllama-1-1b) | [Instruction finetuning - TinyLlama 1.1B LLM](https://lightning.ai/lightning-ai/studios/instruction-finetuning-tinyllama-1-1b-llm) <br> <p align="left">[<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/2.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/instruction-finetuning-tinyllama-1-1b-llm) |
|
|
293
|
-
| | |
|
|
287
|
+
---
|
|
294
288
|
|
|
289
|
+
|
|
295
290
|
|
|
291
|
+
# Choose from 20+ LLMs
|
|
292
|
+
LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials/download_model_weights.md) without layers of abstraction:
|
|
296
293
|
|
|
294
|
+
| Model | Model size | Author | Reference |
|
|
295
|
+
|----|----|----|----|
|
|
296
|
+
| Llama 3 | 8B, 70B | Meta AI | [Meta AI 2024](https://github.com/meta-llama/llama3) |
|
|
297
|
+
| Llama 2 | 7B, 13B, 70B | Meta AI | [Touvron et al. 2023](https://arxiv.org/abs/2307.09288) |
|
|
298
|
+
| Code Llama | 7B, 13B, 34B, 70B | Meta AI | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950) |
|
|
299
|
+
| Mixtral MoE | 8x7B | Mistral AI | [Mistral AI 2023](https://mistral.ai/news/mixtral-of-experts/) |
|
|
300
|
+
| Mistral | 7B | Mistral AI | [Mistral AI 2023](https://mistral.ai/news/announcing-mistral-7b/) |
|
|
301
|
+
| CodeGemma | 7B | Google | [Google Team, Google Deepmind](https://ai.google.dev/gemma/docs/codegemma) |
|
|
302
|
+
| ... | ... | ... | ... |
|
|
297
303
|
|
|
304
|
+
<details>
|
|
305
|
+
<summary>See full list of 20+ LLMs</summary>
|
|
298
306
|
|
|
299
|
-
|
|
300
|
-
<br>
|
|
301
307
|
|
|
302
308
|
|
|
309
|
+
#### All models
|
|
310
|
+
|
|
311
|
+
| Model | Model size | Author | Reference |
|
|
312
|
+
|----|----|----|----|
|
|
313
|
+
| CodeGemma | 7B | Google | [Google Team, Google Deepmind](https://ai.google.dev/gemma/docs/codegemma) |
|
|
314
|
+
| Code Llama | 7B, 13B, 34B, 70B | Meta AI | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950) |
|
|
315
|
+
| Danube2 | 1.8B | H2O.ai | [H2O.ai](https://h2o.ai/platform/danube-1-8b/) |
|
|
316
|
+
| Dolly | 3B, 7B, 12B | Databricks | [Conover et al. 2023](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm) |
|
|
317
|
+
| Falcon | 7B, 40B, 180B | TII UAE | [TII 2023](https://falconllm.tii.ae) |
|
|
318
|
+
| FreeWilly2 (Stable Beluga 2) | 70B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stable-beluga-large-instruction-fine-tuned-models) |
|
|
319
|
+
| Function Calling Llama 2 | 7B | Trelis | [Trelis et al. 2023](https://huggingface.co/Trelis/Llama-2-7b-chat-hf-function-calling-v2) |
|
|
320
|
+
| Gemma | 2B, 7B | Google | [Google Team, Google Deepmind](https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf) |
|
|
321
|
+
| Llama 2 | 7B, 13B, 70B | Meta AI | [Touvron et al. 2023](https://arxiv.org/abs/2307.09288) |
|
|
322
|
+
| Llama 3 | 8B, 70B | Meta AI | [Meta AI 2024](https://github.com/meta-llama/llama3) |
|
|
323
|
+
| LongChat | 7B, 13B | LMSYS | [LongChat Team 2023](https://lmsys.org/blog/2023-06-29-longchat/) |
|
|
324
|
+
| Mixtral MoE | 8x7B | Mistral AI | [Mistral AI 2023](https://mistral.ai/news/mixtral-of-experts/) |
|
|
325
|
+
| Mistral | 7B | Mistral AI | [Mistral AI 2023](https://mistral.ai/news/announcing-mistral-7b/) |
|
|
326
|
+
| Nous-Hermes | 7B, 13B, 70B | NousResearch | [Org page](https://huggingface.co/NousResearch) |
|
|
327
|
+
| OpenLLaMA | 3B, 7B, 13B | OpenLM Research | [Geng & Liu 2023](https://github.com/openlm-research/open_llama) |
|
|
328
|
+
| Phi | 1.3B, 2.7B | Microsoft Research | [Li et al. 2023](https://arxiv.org/abs/2309.05463) |
|
|
329
|
+
| Platypus | 7B, 13B, 70B | Lee et al. | [Lee, Hunter, and Ruiz 2023](https://arxiv.org/abs/2308.07317) |
|
|
330
|
+
| Pythia | {14,31,70,160,410}M, {1,1.4,2.8,6.9,12}B | EleutherAI | [Biderman et al. 2023](https://arxiv.org/abs/2304.01373) |
|
|
331
|
+
| RedPajama-INCITE | 3B, 7B | Together | [Together 2023](https://together.ai/blog/redpajama-models-v1) |
|
|
332
|
+
| StableCode | 3B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stablecode-llm-generative-ai-coding) |
|
|
333
|
+
| StableLM | 3B, 7B | Stability AI | [Stability AI 2023](https://github.com/Stability-AI/StableLM) |
|
|
334
|
+
| StableLM Zephyr | 3B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stablecode-llm-generative-ai-coding) |
|
|
335
|
+
| TinyLlama | 1.1B | Zhang et al. | [Zhang et al. 2023](https://github.com/jzhang38/TinyLlama) |
|
|
336
|
+
| Vicuna | 7B, 13B, 33B | LMSYS | [Li et al. 2023](https://lmsys.org/blog/2023-03-30-vicuna/)
|
|
337
|
+
|
|
338
|
+
</details>
|
|
303
339
|
|
|
340
|
+
|
|
304
341
|
|
|
305
|
-
##
|
|
342
|
+
## Install LitGPT
|
|
306
343
|
|
|
307
|
-
|
|
344
|
+
Install LitGPT with all dependencies (including CLI, quantization, tokenizers for all models, etc.):
|
|
308
345
|
|
|
309
346
|
```bash
|
|
310
|
-
pip install 'litgpt[all]
|
|
347
|
+
pip install 'litgpt[all]'
|
|
311
348
|
```
|
|
312
349
|
|
|
313
|
-
|
|
350
|
+
<details>
|
|
351
|
+
<summary>Advanced install options</summary>
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
Install from source:
|
|
314
356
|
|
|
315
357
|
```bash
|
|
316
358
|
git clone https://github.com/Lightning-AI/litgpt
|
|
317
359
|
cd litgpt
|
|
318
360
|
pip install -e '.[all]'
|
|
319
361
|
```
|
|
362
|
+
</details>
|
|
320
363
|
|
|
364
|
+
---
|
|
321
365
|
|
|
322
366
|
|
|
367
|
+
# Quick start
|
|
368
|
+
After installing LitGPT, select the model and action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...):
|
|
369
|
+
|
|
370
|
+
```bash
|
|
371
|
+
# ligpt [action] [model]
|
|
372
|
+
litgpt download meta-llama/Meta-Llama-3-8B-Instruct
|
|
373
|
+
litgpt chat meta-llama/Meta-Llama-3-8B-Instruct
|
|
374
|
+
litgpt finetune meta-llama/Meta-Llama-3-8B-Instruct
|
|
375
|
+
litgpt pretrain meta-llama/Meta-Llama-3-8B-Instruct
|
|
376
|
+
litgpt serve meta-llama/Meta-Llama-3-8B-Instruct
|
|
377
|
+
```
|
|
323
378
|
|
|
324
|
-
|
|
379
|
+
|
|
325
380
|
|
|
381
|
+
### Use an LLM for inference
|
|
382
|
+
Use LLMs for inference to test its chatting capabilities, run evaluations, or extract embeddings, etc...
|
|
383
|
+
Here's an example showing how to use the Phi-2 LLM.
|
|
326
384
|
|
|
327
|
-
|
|
385
|
+
<a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-chat">
|
|
386
|
+
<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open In Studio"/>
|
|
387
|
+
</a>
|
|
328
388
|
|
|
389
|
+
|
|
329
390
|
|
|
330
391
|
```bash
|
|
331
392
|
# 1) Download a pretrained model
|
|
332
|
-
litgpt download --repo_id
|
|
393
|
+
litgpt download --repo_id microsoft/phi-2
|
|
333
394
|
|
|
334
395
|
# 2) Chat with the model
|
|
335
396
|
litgpt chat \
|
|
336
|
-
--checkpoint_dir checkpoints/
|
|
397
|
+
--checkpoint_dir checkpoints/microsoft/phi-2
|
|
337
398
|
|
|
338
399
|
>> Prompt: What do Llamas eat?
|
|
339
400
|
```
|
|
340
401
|
|
|
341
|
-
|
|
342
|
-
|
|
402
|
+
The download of certain models requires an additional access token. You can read more about this in the [download](tutorials/download_model_weights.md#specific-models-and-access-tokens) documentation.
|
|
403
|
+
For more information on the different inference options, refer to the [inference](tutorials/inference.md) tutorial.
|
|
343
404
|
|
|
344
405
|
|
|
345
|
-
## Finetuning and pretraining
|
|
346
406
|
|
|
347
|
-
|
|
407
|
+
### Finetune an LLM
|
|
408
|
+
[Finetune](tutorials/finetune.md) a model to specialize it on your own custom dataset:
|
|
409
|
+
|
|
410
|
+
<a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-finetune">
|
|
411
|
+
<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open In Studio"/>
|
|
412
|
+
</a>
|
|
413
|
+
|
|
414
|
+
|
|
348
415
|
|
|
349
416
|
```bash
|
|
350
417
|
# 1) Download a pretrained model
|
|
351
418
|
litgpt download --repo_id microsoft/phi-2
|
|
352
419
|
|
|
353
420
|
# 2) Finetune the model
|
|
354
|
-
|
|
421
|
+
curl -L https://huggingface.co/datasets/ksaw008/finance_alpaca/resolve/main/finance_alpaca.json -o my_custom_dataset.json
|
|
422
|
+
|
|
423
|
+
litgpt finetune \
|
|
355
424
|
--checkpoint_dir checkpoints/microsoft/phi-2 \
|
|
356
|
-
--data
|
|
357
|
-
--
|
|
425
|
+
--data JSON \
|
|
426
|
+
--data.json_path my_custom_dataset.json \
|
|
427
|
+
--data.val_split_fraction 0.1 \
|
|
428
|
+
--out_dir out/custom-model
|
|
358
429
|
|
|
359
430
|
# 3) Chat with the model
|
|
360
431
|
litgpt chat \
|
|
361
|
-
--checkpoint_dir out/
|
|
432
|
+
--checkpoint_dir out/custom-model/final
|
|
362
433
|
```
|
|
363
434
|
|
|
364
435
|
|
|
365
|
-
## Configuration files for enhanced performance
|
|
366
436
|
|
|
367
|
-
|
|
437
|
+
### Pretrain an LLM
|
|
438
|
+
Train an LLM from scratch on your own data via pretraining:
|
|
368
439
|
|
|
440
|
+
<a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-pretrain">
|
|
441
|
+
<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg"; alt="Open In Studio"/>
|
|
442
|
+
</a>
|
|
443
|
+
|
|
444
|
+
|
|
369
445
|
|
|
370
446
|
```bash
|
|
371
|
-
|
|
372
|
-
|
|
447
|
+
mkdir -p custom_texts
|
|
448
|
+
curl https://www.gutenberg.org/cache/epub/24440/pg24440.txt --output custom_texts/book1.txt
|
|
449
|
+
curl https://www.gutenberg.org/cache/epub/26393/pg26393.txt --output custom_texts/book2.txt
|
|
450
|
+
|
|
451
|
+
# 1) Download a tokenizer
|
|
452
|
+
litgpt download \
|
|
453
|
+
--repo_id EleutherAI/pythia-160m \
|
|
454
|
+
--tokenizer_only True
|
|
455
|
+
|
|
456
|
+
# 2) Pretrain the model
|
|
457
|
+
litgpt pretrain \
|
|
458
|
+
--model_name pythia-160m \
|
|
459
|
+
--tokenizer_dir checkpoints/EleutherAI/pythia-160m \
|
|
460
|
+
--data TextFiles \
|
|
461
|
+
--data.train_data_path "custom_texts/" \
|
|
462
|
+
--train.max_tokens 10_000_000 \
|
|
463
|
+
--out_dir out/custom-model
|
|
464
|
+
|
|
465
|
+
# 3) Chat with the model
|
|
466
|
+
litgpt chat \
|
|
467
|
+
--checkpoint_dir out/custom-model/final
|
|
373
468
|
```
|
|
374
469
|
|
|
375
|
-
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
### Continue pretraining an LLM
|
|
473
|
+
This is another way of finetuning that specializes an already pretrained model by training on custom data:
|
|
474
|
+
|
|
376
475
|
|
|
476
|
+
<a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-continue-pretraining">
|
|
477
|
+
<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg"; alt="Open In Studio"/>
|
|
478
|
+
</a>
|
|
479
|
+
|
|
480
|
+
|
|
377
481
|
|
|
378
482
|
```bash
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
483
|
+
mkdir -p custom_texts
|
|
484
|
+
curl https://www.gutenberg.org/cache/epub/24440/pg24440.txt --output custom_texts/book1.txt
|
|
485
|
+
curl https://www.gutenberg.org/cache/epub/26393/pg26393.txt --output custom_texts/book2.txt
|
|
486
|
+
|
|
487
|
+
# 1) Download a pretrained model
|
|
488
|
+
litgpt download --repo_id EleutherAI/pythia-160m
|
|
489
|
+
|
|
490
|
+
# 2) Continue pretraining the model
|
|
491
|
+
litgpt pretrain \
|
|
492
|
+
--model_name pythia-160m \
|
|
493
|
+
--tokenizer_dir checkpoints/EleutherAI/pythia-160m \
|
|
494
|
+
--initial_checkpoint_dir checkpoints/EleutherAI/pythia-160m \
|
|
495
|
+
--data TextFiles \
|
|
496
|
+
--data.train_data_path "custom_texts/" \
|
|
497
|
+
--train.max_tokens 10_000_000 \
|
|
498
|
+
--out_dir out/custom-model
|
|
499
|
+
|
|
500
|
+
# 3) Chat with the model
|
|
501
|
+
litgpt chat \
|
|
502
|
+
--checkpoint_dir out/custom-model/final
|
|
382
503
|
```
|
|
383
504
|
|
|
384
|
-
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
### Deploy an LLM
|
|
508
|
+
Once you're ready to deploy a finetuned LLM, run this command:
|
|
509
|
+
|
|
510
|
+
<a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-serve">
|
|
511
|
+
<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open In Studio"/>
|
|
512
|
+
</a>
|
|
385
513
|
|
|
386
514
|
|
|
387
515
|
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
516
|
+
```bash
|
|
517
|
+
# locate the checkpoint to your finetuned or pretrained model and call the `serve` command:
|
|
518
|
+
litgpt serve --checkpoint_dir path/to/your/checkpoint/microsoft/phi-2
|
|
519
|
+
|
|
520
|
+
# Alternative: if you haven't finetuned, download any checkpoint to deploy it:
|
|
521
|
+
litgpt download --repo_id microsoft/phi-2
|
|
522
|
+
litgpt serve --checkpoint_dir checkpoints/microsoft/phi-2
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
Test the server in a separate terminal and integrate the model API into your AI product:
|
|
526
|
+
```python
|
|
527
|
+
# 3) Use the server (in a separate session)
|
|
528
|
+
import requests, json
|
|
529
|
+
response = requests.post(
|
|
530
|
+
"http://127.0.0.1:8000/predict",
|
|
531
|
+
json={"prompt": "Fix typos in the following sentence: Exampel input"}
|
|
532
|
+
)
|
|
533
|
+
print(response.json()["output"])
|
|
534
|
+
```
|
|
535
|
+
|
|
536
|
+
|
|
391
537
|
|
|
538
|
+
> [!NOTE]
|
|
539
|
+
> **[Read the full docs](tutorials/0_to_litgpt.md)**.
|
|
392
540
|
|
|
393
541
|
|
|
394
|
-
|
|
542
|
+
|
|
543
|
+
----
|
|
544
|
+
|
|
545
|
+
# State-of-the-art features
|
|
546
|
+
✅ State-of-the-art optimizations: Flash Attention v2, multi-GPU support via fully-sharded data parallelism, [optional CPU offloading](tutorials/oom.md#do-sharding-across-multiple-gpus), and [TPU and XLA support](extensions/xla).
|
|
547
|
+
|
|
548
|
+
✅ [Pretrain](tutorials/pretrain.md), [finetune](tutorials/finetune.md), and [deploy](tutorials/inference.md)
|
|
549
|
+
|
|
550
|
+
✅ Reduce compute requirements with low-precision settings: FP16, BF16, and FP16/FP32 mixed.
|
|
551
|
+
|
|
552
|
+
✅ Lower memory requirements with [quantization](tutorials/quantize.md): 4-bit floats, 8-bit integers, and double quantization.
|
|
553
|
+
|
|
554
|
+
✅ [Configuration files](config_hub) for great out-of-the-box performance.
|
|
555
|
+
|
|
556
|
+
✅ Parameter-efficient finetuning: [LoRA](tutorials/finetune_lora.md), [QLoRA](tutorials/finetune_lora.md), [Adapter](tutorials/finetune_adapter.md), and [Adapter v2](tutorials/finetune_adapter.md).
|
|
557
|
+
|
|
558
|
+
✅ [Exporting](tutorials/convert_lit_models.md) to other popular model weight formats.
|
|
559
|
+
|
|
560
|
+
✅ Many popular datasets for [pretraining](tutorials/pretrain.md) and [finetuning](tutorials/prepare_dataset.md), and [support for custom datasets](tutorials/prepare_dataset.md#preparing-custom-datasets-for-instruction-finetuning).
|
|
561
|
+
|
|
562
|
+
✅ Readable and easy-to-modify code to experiment with the latest research ideas.
|
|
563
|
+
|
|
395
564
|
|
|
396
565
|
|
|
397
|
-
|
|
566
|
+
---
|
|
567
|
+
|
|
568
|
+
# Training recipes
|
|
569
|
+
|
|
570
|
+
LitGPT comes with validated recipes (YAML configs) to train models under different conditions. We've generated these recipes based on the parameters we found to perform the best for different training conditions.
|
|
571
|
+
|
|
572
|
+
Browse all training recipes [here](config_hub).
|
|
573
|
+
|
|
574
|
+
### Example
|
|
575
|
+
|
|
576
|
+
```bash
|
|
577
|
+
litgpt finetune \
|
|
578
|
+
--config https://raw.githubusercontent.com/Lightning-AI/litgpt/main/config_hub/finetune/llama-2-7b/lora.yaml
|
|
579
|
+
```
|
|
580
|
+
|
|
581
|
+
### What is a config
|
|
582
|
+
Configs let you customize training for all granular parameters like:
|
|
583
|
+
|
|
584
|
+
```yaml
|
|
585
|
+
# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
|
|
586
|
+
checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf
|
|
587
|
+
|
|
588
|
+
# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/lora)
|
|
589
|
+
out_dir: out/finetune/qlora-llama2-7b
|
|
590
|
+
|
|
591
|
+
# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
|
|
592
|
+
precision: bf16-true
|
|
593
|
+
|
|
594
|
+
...
|
|
595
|
+
```
|
|
398
596
|
|
|
399
|
-
|
|
597
|
+
<details>
|
|
598
|
+
<summary>Example: LoRA finetuning config</summary>
|
|
599
|
+
|
|
600
|
+
|
|
400
601
|
|
|
401
602
|
```yaml
|
|
402
603
|
# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
|
|
@@ -522,25 +723,21 @@ logger_name: csv
|
|
|
522
723
|
# The random seed to use for reproducibility. (type: int, default: 1337)
|
|
523
724
|
seed: 1337
|
|
524
725
|
```
|
|
726
|
+
</details>
|
|
525
727
|
|
|
728
|
+
### Override config params via CLI
|
|
729
|
+
Override any parameter in the CLI:
|
|
526
730
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
**LitGPT** is:
|
|
534
|
-
|
|
535
|
-
- **Simple:** Single-file implementation without boilerplate.
|
|
536
|
-
- **Correct:** Numerically equivalent to the original model.
|
|
537
|
-
- **Optimized:** Runs fast on consumer hardware or at scale.
|
|
538
|
-
- **Open-source:** No strings attached.
|
|
539
|
-
|
|
540
|
-
Avoiding code duplication is **not** a goal. **Readability** and **hackability** are.
|
|
731
|
+
```bash
|
|
732
|
+
litgpt finetune \
|
|
733
|
+
--config https://raw.githubusercontent.com/Lightning-AI/litgpt/main/config_hub/finetune/llama-2-7b/lora.yaml \
|
|
734
|
+
--lora_r 4
|
|
735
|
+
```
|
|
541
736
|
|
|
542
737
|
|
|
543
738
|
|
|
739
|
+
# Community
|
|
740
|
+
|
|
544
741
|
## Get involved!
|
|
545
742
|
|
|
546
743
|
We appreciate your feedback and contributions. If you have feature requests, questions, or want to contribute code or config files, please don't hesitate to use the [GitHub Issue](https://github.com/Lightning-AI/litgpt/issues) tracker.
|
|
@@ -559,27 +756,29 @@ If you have general questions about building with LitGPT, please [join our Disco
|
|
|
559
756
|
|
|
560
757
|
## Tutorials, how-to guides, and docs
|
|
561
758
|
|
|
759
|
+
|
|
760
|
+
> [!NOTE]
|
|
761
|
+
> We recommend starting with the **[Zero to LitGPT: Getting Started with Pretraining, Finetuning, and Using LLMs](tutorials/0_to_litgpt.md)** if you are looking to get started with using LitGPT.
|
|
762
|
+
|
|
763
|
+
Tutorials and in-depth feature documentation can be found below:
|
|
764
|
+
|
|
562
765
|
- Finetuning, incl. LoRA, QLoRA, and Adapters ([tutorials/finetune.md](tutorials/finetune.md))
|
|
563
|
-
- Pretraining ([tutorials/
|
|
766
|
+
- Pretraining ([tutorials/pretrain.md](tutorials/pretrain.md))
|
|
564
767
|
- Model evaluation ([tutorials/evaluation.md](tutorials/evaluation.md))
|
|
565
768
|
- Supported and custom datasets ([tutorials/prepare_dataset.md](tutorials/prepare_dataset.md))
|
|
566
769
|
- Quantization ([tutorials/quantize.md](tutorials/quantize.md))
|
|
567
770
|
- Tips for dealing with out-of-memory (OOM) errors ([tutorials/oom.md](tutorials/oom.md))
|
|
568
771
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
772
|
|
|
574
773
|
|
|
575
774
|
## XLA
|
|
576
775
|
|
|
577
|
-
Lightning AI has partnered with Google to add first-class support for [Cloud TPUs](https://cloud.google.com/tpu) in [Lightning
|
|
776
|
+
Lightning AI has partnered with Google to add first-class support for [Cloud TPUs](https://cloud.google.com/tpu) in [Lightning's frameworks](https://github.com/Lightning-AI/lightning) and LitGPT,
|
|
578
777
|
helping democratize AI for millions of developers and researchers worldwide.
|
|
579
778
|
|
|
580
779
|
Using TPUs with Lightning is as straightforward as changing one line of code.
|
|
581
780
|
|
|
582
|
-
We provide scripts fully optimized for TPUs in the [XLA directory](xla).
|
|
781
|
+
We provide scripts fully optimized for TPUs in the [XLA directory](extensions/xla).
|
|
583
782
|
|
|
584
783
|
|
|
585
784
|
|
|
@@ -601,7 +800,7 @@ This implementation extends on [Lit-LLaMA](https://github.com/lightning-AI/lit-l
|
|
|
601
800
|
|
|
602
801
|
## Community showcase
|
|
603
802
|
|
|
604
|
-
Check out the projects below
|
|
803
|
+
Check out the projects below that use and build on LitGPT. If you have a project you'd like to add to this section, please don't hesitate to open a pull request.
|
|
605
804
|
|
|
606
805
|
|
|
607
806
|
|
|
@@ -615,6 +814,17 @@ The LitGPT repository was the official starter kit for the [NeurIPS 2023 LLM Eff
|
|
|
615
814
|
|
|
616
815
|
LitGPT powered the [TinyLlama project](https://github.com/jzhang38/TinyLlama) and [TinyLlama: An Open-Source Small Language Model](https://arxiv.org/abs/2401.02385) research paper.
|
|
617
816
|
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
**🍪 MicroLlama: MicroLlama-300M**
|
|
820
|
+
|
|
821
|
+
[MicroLlama](https://github.com/keeeeenw/MicroLlama) is a 300M Llama model pretrained on 50B tokens powered by TinyLlama and LitGPT.
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
**🔬 Pre-training Small Base LMs with Fewer Tokens**
|
|
826
|
+
|
|
827
|
+
The research paper ["Pre-training Small Base LMs with Fewer Tokens"](https://arxiv.org/abs/2404.08634), which utilizes LitGPT, develops smaller base language models by inheriting a few transformer blocks from larger models and training on a tiny fraction of the data used by the larger models. It demonstrates that these smaller models can perform comparably to larger models despite using significantly less training data and resources.
|
|
618
828
|
|
|
619
829
|
|
|
620
830
|
|
|
@@ -636,4 +846,3 @@ If you use LitGPT in your research, please cite the following work:
|
|
|
636
846
|
## License
|
|
637
847
|
|
|
638
848
|
LitGPT is released under the [Apache 2.0](https://github.com/Lightning-AI/litgpt/blob/main/LICENSE) license.
|
|
639
|
-
|