litgpt 0.2.0.dev0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/PKG-INFO +318 -114
  2. litgpt-0.3.0/README.md +603 -0
  3. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/__init__.py +2 -11
  4. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/__main__.py +14 -0
  5. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/adapter.py +4 -1
  6. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/adapter_v2.py +33 -2
  7. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/args.py +20 -2
  8. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/chat/base.py +5 -6
  9. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/config.py +96 -0
  10. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/__init__.py +2 -0
  11. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/alpaca.py +3 -2
  12. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/alpaca_2k.py +2 -1
  13. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/base.py +2 -2
  14. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/deita.py +2 -3
  15. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/dolly.py +1 -1
  16. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/flan.py +2 -2
  17. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/json_data.py +10 -5
  18. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/lima.py +2 -3
  19. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/lit_data.py +1 -1
  20. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/longform.py +1 -2
  21. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/openwebtext.py +1 -1
  22. litgpt-0.3.0/litgpt/data/text_files.py +133 -0
  23. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/tinyllama.py +1 -1
  24. litgpt-0.3.0/litgpt/data/tinystories.py +143 -0
  25. litgpt-0.3.0/litgpt/deploy/serve.py +138 -0
  26. litgpt-0.3.0/litgpt/eval/evaluate.py +121 -0
  27. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/finetune/adapter.py +30 -17
  28. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/finetune/adapter_v2.py +30 -17
  29. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/finetune/full.py +27 -14
  30. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/finetune/lora.py +31 -17
  31. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/generate/adapter.py +1 -2
  32. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/generate/adapter_v2.py +1 -2
  33. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/generate/base.py +1 -2
  34. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/generate/full.py +1 -2
  35. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/generate/sequentially.py +1 -2
  36. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/generate/tp.py +1 -2
  37. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/lora.py +21 -18
  38. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/model.py +28 -10
  39. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/pretrain.py +31 -21
  40. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/prompts.py +24 -4
  41. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/scripts/convert_hf_checkpoint.py +12 -3
  42. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/scripts/convert_lit_checkpoint.py +1 -1
  43. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/scripts/download.py +7 -4
  44. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/scripts/merge_lora.py +7 -7
  45. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/tokenizer.py +7 -7
  46. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/utils.py +33 -4
  47. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt.egg-info/PKG-INFO +318 -114
  48. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt.egg-info/SOURCES.txt +10 -1
  49. litgpt-0.3.0/litgpt.egg-info/requires.txt +32 -0
  50. litgpt-0.3.0/pyproject.toml +71 -0
  51. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_adapter.py +57 -24
  52. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_adapter_v2.py +64 -46
  53. litgpt-0.3.0/tests/test_args.py +36 -0
  54. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_chat.py +30 -9
  55. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_cli.py +15 -15
  56. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_config.py +3 -21
  57. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_config_hub.py +12 -7
  58. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_convert_hf_checkpoint.py +3 -7
  59. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_convert_lit_checkpoint.py +25 -123
  60. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_convert_pretrained_checkpoint.py +2 -2
  61. litgpt-0.3.0/tests/test_evaluate.py +64 -0
  62. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_full.py +8 -7
  63. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_generate.py +4 -7
  64. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_generate_sequentially.py +5 -14
  65. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_generate_tp.py +4 -8
  66. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_lora.py +102 -82
  67. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_merge_lora.py +14 -13
  68. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_model.py +38 -168
  69. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_pretrain.py +29 -21
  70. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_prompts.py +15 -15
  71. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_rope.py +3 -4
  72. litgpt-0.3.0/tests/test_serve.py +42 -0
  73. litgpt-0.3.0/tests/test_thunder_ddp.py +89 -0
  74. litgpt-0.3.0/tests/test_thunder_fsdp.py +350 -0
  75. litgpt-0.3.0/tests/test_thunder_pretrain.py +55 -0
  76. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_tokenizer.py +2 -9
  77. litgpt-0.3.0/tests/test_unsloth_executor.py +172 -0
  78. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_utils.py +53 -31
  79. litgpt-0.2.0.dev0/README.md +0 -404
  80. litgpt-0.2.0.dev0/litgpt/data/tinystories.py +0 -181
  81. litgpt-0.2.0.dev0/litgpt.egg-info/requires.txt +0 -25
  82. litgpt-0.2.0.dev0/pyproject.toml +0 -66
  83. litgpt-0.2.0.dev0/tests/test_lm_eval_harness.py +0 -91
  84. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/LICENSE +0 -0
  85. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/chat/__init__.py +0 -0
  86. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/alpaca_gpt4.py +1 -1
  87. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/prepare_slimpajama.py +1 -1
  88. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/data/prepare_starcoder.py +0 -0
  89. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/finetune/__init__.py +0 -0
  90. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/generate/__init__.py +0 -0
  91. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/scripts/__init__.py +0 -0
  92. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt/scripts/convert_pretrained_checkpoint.py +0 -0
  93. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt.egg-info/dependency_links.txt +0 -0
  94. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt.egg-info/entry_points.txt +0 -0
  95. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/litgpt.egg-info/top_level.txt +0 -0
  96. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/setup.cfg +0 -0
  97. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_ci.py +0 -0
  98. {litgpt-0.2.0.dev0 → litgpt-0.3.0}/tests/test_generate_adapter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: litgpt
3
- Version: 0.2.0.dev0
3
+ Version: 0.3.0
4
4
  Summary: Hackable implementation of state-of-the-art open-source LLMs
5
5
  Author-email: Lightning AI <contact@lightning.ai>
6
6
  License: Apache License
@@ -210,122 +210,179 @@ Project-URL: documentation, https://github.com/lightning-AI/litgpt/tutorials
210
210
  Description-Content-Type: text/markdown
211
211
  License-File: LICENSE
212
212
  Requires-Dist: torch>=2.2.0
213
- Requires-Dist: lightning==2.3.0.dev20240318
213
+ Requires-Dist: lightning==2.3.0.dev20240328
214
214
  Requires-Dist: jsonargparse[signatures]>=4.27.6
215
+ Requires-Dist: litserve>=0.1.0
215
216
  Provides-Extra: test
216
- Requires-Dist: pytest; extra == "test"
217
- Requires-Dist: pytest-rerunfailures; extra == "test"
218
- Requires-Dist: pytest-timeout; extra == "test"
217
+ Requires-Dist: pytest>=8.1.1; extra == "test"
218
+ Requires-Dist: pytest-rerunfailures>=14.0; extra == "test"
219
+ Requires-Dist: pytest-timeout>=2.3.1; extra == "test"
219
220
  Requires-Dist: transformers>=4.38.0; extra == "test"
220
- Requires-Dist: einops; extra == "test"
221
- Requires-Dist: protobuf; extra == "test"
221
+ Requires-Dist: einops>=0.7.0; extra == "test"
222
+ Requires-Dist: protobuf>=4.23.4; extra == "test"
223
+ Requires-Dist: lightning-thunder==0.2.0.dev20240404; python_version >= "3.10" and extra == "test"
222
224
  Provides-Extra: all
223
225
  Requires-Dist: bitsandbytes==0.42.0; extra == "all"
224
- Requires-Dist: sentencepiece; extra == "all"
225
- Requires-Dist: tokenizers; extra == "all"
226
- Requires-Dist: datasets; extra == "all"
227
- Requires-Dist: requests; extra == "all"
228
- Requires-Dist: litdata; extra == "all"
229
- Requires-Dist: zstandard; extra == "all"
230
- Requires-Dist: pandas; extra == "all"
231
- Requires-Dist: pyarrow; extra == "all"
232
- Requires-Dist: tensorboard; extra == "all"
233
- Requires-Dist: torchmetrics; extra == "all"
226
+ Requires-Dist: sentencepiece>=0.2.0; extra == "all"
227
+ Requires-Dist: tokenizers>=0.15.2; extra == "all"
228
+ Requires-Dist: requests>=2.31.0; extra == "all"
229
+ Requires-Dist: litdata>=0.2.2; extra == "all"
230
+ Requires-Dist: zstandard>=0.22.0; extra == "all"
231
+ Requires-Dist: pandas>=1.9.0; extra == "all"
232
+ Requires-Dist: pyarrow>=15.0.2; extra == "all"
233
+ Requires-Dist: tensorboard>=2.14.0; extra == "all"
234
+ Requires-Dist: torchmetrics>=1.3.1; extra == "all"
235
+ Requires-Dist: datasets>=2.18.0; extra == "all"
236
+ Requires-Dist: transformers>=4.38.0; extra == "all"
237
+ Requires-Dist: lm-eval>=0.4.2; extra == "all"
238
+ Requires-Dist: safetensors>=0.4.3; extra == "all"
234
239
  Requires-Dist: huggingface_hub[hf_transfer]>=0.21.0; extra == "all"
235
240
 
236
241
  <div align="center">
237
- <img src="https://pl-public-data.s3.amazonaws.com/assets_lightning/LitStableLM_Badge.png" alt="LitGPT" width="128"/>
238
242
 
239
- # ⚡ LitGPT
240
-
241
- <!--
242
- <p align="center">
243
- <a href="https://www.lightning.ai/">Lightning.ai</a> •
244
- <a href="https://lightning.ai/docs/pytorch/stable/">PyTorch Lightning</a> •
245
- <a href="https://lightning.ai/docs/fabric/stable/">Fabric</a>
246
- </p>
247
- -->
248
-
249
- ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pytorch-lightning)
250
- ![cpu-tests](https://github.com/lightning-AI/lit-stablelm/actions/workflows/cpu-tests.yml/badge.svg) [![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/lit-stablelm/blob/master/LICENSE) [![Discord](https://img.shields.io/discord/1077906959069626439?style=plastic)](https://discord.gg/VptPCZkGNa)
251
-
252
- </div>
253
243
 
254
- &nbsp;
255
-
256
- ⚡ LitGPT is a hackable [implementation](litgpt/model.py) of state-of-the-art open-source large language models released under the **Apache 2.0 license**.
257
-
258
- &nbsp;
259
- ## LitGPT supports
260
-
261
- ✅ &nbsp;[The latest model weights](tutorials/download_model_weights.md): Gemma, Mistral, Mixtral, Phi 2, Llama 2, Falcon, CodeLlama, and [many more](tutorials/download_model_weights.md).
262
-
263
- ✅ &nbsp;Optimized and efficient code: Flash Attention v2, multi-GPU support via fully-sharded data parallelism, [optional CPU offloading](tutorials/oom.md#do-sharding-across-multiple-gpus), and [TPU and XLA support](./xla).
244
+ # ⚡ LitGPT
264
245
 
265
- ✅ &nbsp;[Pretraining](tutorials/pretraining.md), [finetuning](tutorials/finetune.md), and [inference](tutorials/inference.md) in various precision settings: FP32, FP16, BF16, and FP16/FP32 mixed.
246
+ **Pretrain, finetune, evaluate, and deploy 20+ LLMs on your own data**
266
247
 
267
- &nbsp;[Configuration files](config_hub) for great out-of-the-box performance.
248
+ Uses the latest state-of-the-art techniques:
268
249
 
269
- ✅ &nbsp;Efficient finetuning: [LoRA](tutorials/finetune_lora.md), [QLoRA](tutorials/finetune_lora.md), [Adapter](tutorials/finetune_adapter.md), and [Adapter v2](tutorials/finetune_adapter.md).
250
+ flash attention &nbsp; &nbsp; ✅ fp4/8/16/32 &nbsp; &nbsp; ✅ LoRA, QLoRA, Adapter (v1, v2) &nbsp; &nbsp; ✅ FSDP &nbsp; &nbsp; ✅ 1-1000+ GPUs/TPUs
270
251
 
271
- ✅ &nbsp;[Quantization](tutorials/quantize.md): 4-bit floats, 8-bit integers, and double quantization.
252
+ ---
272
253
 
273
- ✅ &nbsp;[Exporting](https://github.com/Lightning-AI/litgpt/blob/wip/tutorials/convert_lit_models.md) to other popular model weight formats.
274
254
 
275
- &nbsp;Many popular datasets for [pretraining](tutorials/pretrain_tinyllama.md) and [finetuning](tutorials/prepare_dataset.md), and [support for custom datasets](tutorials/prepare_dataset.md#preparing-custom-datasets-for-instruction-finetuning).
255
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pytorch-lightning)
256
+ ![cpu-tests](https://github.com/lightning-AI/lit-stablelm/actions/workflows/cpu-tests.yml/badge.svg) [![license](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://github.com/Lightning-AI/lit-stablelm/blob/master/LICENSE) [![Discord](https://img.shields.io/discord/1077906959069626439)](https://discord.gg/VptPCZkGNa)
276
257
 
277
- &nbsp;Readable and easy-to-modify code to experiment with the latest research ideas.
258
+ <p align="center">
259
+ <a href="https://lightning.ai/">Lightning AI</a> •
260
+ <a href="#choose-from-20-llms">Models</a> •
261
+ <a href="#quick-start">Quick start</a> •
262
+ <a href="#use-an-llm-for-inference">Inference</a> •
263
+ <a href="#finetune-an-llm">Finetune</a> •
264
+ <a href="#finetune-an-llm">Pretrain</a> •
265
+ <a href="#deploy-an-llm">Deploy</a> •
266
+ <a href="#state-of-the-art-features">Features</a> •
267
+ <a href="#training-recipes">Training recipes (YAML)</a>
268
+ </p>
278
269
 
270
+ </div>
279
271
 
280
272
  &nbsp;
281
- <br>
273
+ <img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/GithubLitGPTDAG2.png" alt="LitGPT steps" width="auto"/>
282
274
  &nbsp;
283
275
 
284
- ## Project templates
285
-
286
- The following [Lightning Studio](https://lightning.ai/lightning-ai/studios) templates provide LitGPT tutorials and projects in reproducible environments with multi-GPU and multi-node support:
287
-
288
-
289
- | | |
290
- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
291
- | <p align="left">[Prepare the TinyLlama 1T token dataset](https://lightning.ai/lightning-ai/studios/prepare-the-tinyllama-1t-token-dataset) <br> [<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/3.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/prepare-the-tinyllama-1t-token-dataset) | [Pretrain LLMs - TinyLlama 1.1B](https://lightning.ai/lightning-ai/studios/pretrain-llms-tinyllama-1-1b) <br> <p align="left">[<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/4.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/pretrain-llms-tinyllama-1-1b) |
292
- | [Continued Pretraining with TinyLlama 1.1B](https://lightning.ai/lightning-ai/studios/continued-pretraining-with-tinyllama-1-1b) <br> <p align="left">[<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/1.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/continued-pretraining-with-tinyllama-1-1b) | [Instruction finetuning - TinyLlama 1.1B LLM](https://lightning.ai/lightning-ai/studios/instruction-finetuning-tinyllama-1-1b-llm) <br> <p align="left">[<img src="https://pl-public-data.s3.amazonaws.com/assets_litgpt/readme/2.webp" width="300"></p>](https://lightning.ai/lightning-ai/studios/instruction-finetuning-tinyllama-1-1b-llm) |
293
- | | |
276
+ # Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡
277
+ LitGPT is a command-line tool designed to easily [finetune](#finetune-an-llm), [pretrain](#pretrain-an-llm), [evaluate](#use-an-llm), and [deploy](#deploy-an-llm) [20+ LLMs](#choose-from-20-llms) **on your own data**. It features highly-optimized [training recipes](#training-recipes) for the world's most powerful open-source large language models (LLMs).
294
278
 
279
+ We reimplemented all model architectures and training recipes from scratch for 4 reasons:
295
280
 
281
+ 1. Remove all abstraction layers and have single file implementations.
282
+ 2. Guarantee Apache 2.0 compliance to enable enterprise use without limits.
283
+ 3. Optimized each model's architectural detail to maximize performance, reduce costs, and speed up training.
284
+ 4. Highly-optimized [recipe configs](#training-recipes) we have tested at enterprise scale.
296
285
 
286
+ ---
297
287
 
298
-
299
- &nbsp;
300
- <br>
301
288
  &nbsp;
302
289
 
290
+ # Choose from 20+ LLMs
291
+ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials/download_model_weights.md) without layers of abstraction:
292
+
293
+ | Model | Model size | Author | Reference |
294
+ |----|----|----|----|
295
+ | Llama 3 | 8B, 70B | Meta AI | [Meta AI 2024](https://github.com/meta-llama/llama3) |
296
+ | Llama 2 | 7B, 13B, 70B | Meta AI | [Touvron et al. 2023](https://arxiv.org/abs/2307.09288) |
297
+ | Code Llama | 7B, 13B, 34B, 70B | Meta AI | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950) |
298
+ | Mistral | 7B | Mistral AI | [Mistral website](https://mistral.ai/) |
299
+ | CodeGemma | 7B | Google | [Google Team, Google Deepmind](https://ai.google.dev/gemma/docs/codegemma) |
300
+ | ... | ... | ... | ... |
301
+
302
+ <details>
303
+ <summary>See full list of 20+ LLMs</summary>
304
+
305
+ &nbsp;
306
+
307
+ #### All models
308
+
309
+ | Model | Model size | Author | Reference |
310
+ |----|----|----|----|
311
+ | CodeGemma | 7B | Google | [Google Team, Google Deepmind](https://ai.google.dev/gemma/docs/codegemma) |
312
+ | Code Llama | 7B, 13B, 34B, 70B | Meta AI | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950) |
313
+ | Dolly | 3B, 7B, 12B | Databricks | [Conover et al. 2023](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm) |
314
+ | Falcon | 7B, 40B, 180B | TII UAE | [TII 2023](https://falconllm.tii.ae) |
315
+ | FreeWilly2 (Stable Beluga 2) | 70B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stable-beluga-large-instruction-fine-tuned-models) |
316
+ | Function Calling Llama 2 | 7B | Trelis | [Trelis et al. 2023](https://huggingface.co/Trelis/Llama-2-7b-chat-hf-function-calling-v2) |
317
+ | Gemma | 2B, 7B | Google | [Google Team, Google Deepmind](https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf) |
318
+ | Llama 2 | 7B, 13B, 70B | Meta AI | [Touvron et al. 2023](https://arxiv.org/abs/2307.09288) |
319
+ | Llama 3 | 8B, 70B | Meta AI | [Meta AI 2024](https://github.com/meta-llama/llama3) |
320
+ | LongChat | 7B, 13B | LMSYS | [LongChat Team 2023](https://lmsys.org/blog/2023-06-29-longchat/) |
321
+ | Mistral | 7B | Mistral AI | [Mistral website](https://mistral.ai/) |
322
+ | Nous-Hermes | 7B, 13B, 70B | NousResearch | [Org page](https://huggingface.co/NousResearch) |
323
+ | OpenLLaMA | 3B, 7B, 13B | OpenLM Research | [Geng & Liu 2023](https://github.com/openlm-research/open_llama) |
324
+ | Phi | 1.3B, 2.7B | Microsoft Research | [Li et al. 2023](https://arxiv.org/abs/2309.05463) |
325
+ | Platypus | 7B, 13B, 70B | Lee et al. | [Lee, Hunter, and Ruiz 2023](https://arxiv.org/abs/2308.07317) |
326
+ | Pythia | {14,31,70,160,410}M, {1,1.4,2.8,6.9,12}B | EleutherAI | [Biderman et al. 2023](https://arxiv.org/abs/2304.01373) |
327
+ | RedPajama-INCITE | 3B, 7B | Together | [Together 2023](https://together.ai/blog/redpajama-models-v1) |
328
+ | StableCode | 3B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stablecode-llm-generative-ai-coding) |
329
+ | StableLM | 3B, 7B | Stability AI | [Stability AI 2023](https://github.com/Stability-AI/StableLM) |
330
+ | StableLM Zephyr | 3B | Stability AI | [Stability AI 2023](https://stability.ai/blog/stablecode-llm-generative-ai-coding) |
331
+ | TinyLlama | 1.1B | Zhang et al. | [Zhang et al. 2023](https://github.com/jzhang38/TinyLlama) |
332
+ | Vicuna | 7B, 13B, 33B | LMSYS | [Li et al. 2023](https://lmsys.org/blog/2023-03-30-vicuna/)
333
+
334
+ </details>
303
335
 
336
+ &nbsp;
304
337
 
305
- ## Installing LitGPT
338
+ ## Install LitGPT
306
339
 
307
- You can install LitGPT with all dependencies (including CLI, quantization, tokenizers for all models, etc.) using the following pip command:
340
+ Install LitGPT with all dependencies (including CLI, quantization, tokenizers for all models, etc.):
308
341
 
309
342
  ```bash
310
- pip install 'litgpt[all] @ git+https://github.com/Lightning-AI/litgpt'
343
+ pip install 'litgpt[all]'
311
344
  ```
312
345
 
313
- Alternatively, can install litgpt from a cloned GitHub repository:
346
+ <details>
347
+ <summary>Advanced install options</summary>
348
+
349
+ &nbsp;
350
+
351
+ Install from source:
314
352
 
315
353
  ```bash
316
354
  git clone https://github.com/Lightning-AI/litgpt
317
355
  cd litgpt
318
356
  pip install -e '.[all]'
319
357
  ```
358
+ </details>
320
359
 
360
+ ---
321
361
 
322
362
  &nbsp;
363
+ # Quick start
364
+ After installing LitGPT, select the model and action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...):
323
365
 
324
- ## Using LitGPT
366
+ ```bash
367
+ # ligpt [action] [model]
368
+ litgpt download meta-llama/Meta-Llama-3-8B-Instruct
369
+ litgpt chat meta-llama/Meta-Llama-3-8B-Instruct
370
+ litgpt finetune meta-llama/Meta-Llama-3-8B-Instruct
371
+ litgpt pretrain meta-llama/Meta-Llama-3-8B-Instruct
372
+ litgpt serve meta-llama/Meta-Llama-3-8B-Instruct
373
+ ```
325
374
 
375
+ &nbsp;
376
+
377
+ ### Use an LLM for inference
378
+ Use LLMs for inference to test its chatting capabilities, run evaluations, or extract embeddings, etc...
379
+ Here's an example showing how to use the Mistral 7B LLM.
326
380
 
327
- Below is a minimal example to get started with the LitGPT command line interface (CLI), illustrating how to download and use a model:
381
+ <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-chat">
382
+ <img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open In Studio"/>
383
+ </a>
328
384
 
385
+ &nbsp;
329
386
 
330
387
  ```bash
331
388
  # 1) Download a pretrained model
@@ -340,63 +397,202 @@ litgpt chat \
340
397
 
341
398
  For more information, refer to the [download](tutorials/download_model_weights.md) and [inference](tutorials/inference.md) tutorials.
342
399
 
343
-
344
400
  &nbsp;
345
- ## Finetuning and pretraining
346
401
 
347
- LitGPT supports [pretraining](tutorials/pretrain_tinyllama.md) and [finetuning](tutorials/finetune.md) to optimize models on excisting or custom datasets. Below is an example showing how to finetune a model with LoRA:
402
+ ### Finetune an LLM
403
+ [Finetune](tutorials/finetune.md) a model to specialize it on your own custom dataset:
404
+
405
+ <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-finetune">
406
+ <img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open In Studio"/>
407
+ </a>
408
+
409
+ &nbsp;
348
410
 
349
411
  ```bash
350
412
  # 1) Download a pretrained model
351
413
  litgpt download --repo_id microsoft/phi-2
352
414
 
353
415
  # 2) Finetune the model
354
- litgpt finetune lora \
416
+ curl -L https://huggingface.co/datasets/ksaw008/finance_alpaca/resolve/main/finance_alpaca.json -o my_custom_dataset.json
417
+
418
+ litgpt finetune \
355
419
  --checkpoint_dir checkpoints/microsoft/phi-2 \
356
- --data Alpaca2k \
357
- --out_dir out/phi-2-lora
420
+ --data JSON \
421
+ --data.json_path my_custom_dataset.json \
422
+ --data.val_split_fraction 0.1 \
423
+ --out_dir out/custom-model
358
424
 
359
425
  # 3) Chat with the model
360
426
  litgpt chat \
361
- --checkpoint_dir out/phi-2-lora/final
427
+ --checkpoint_dir out/custom-model/final
362
428
  ```
363
429
 
364
430
  &nbsp;
365
- ## Configuration files for enhanced performance
366
431
 
367
- LitGPT also allows users to use configuration files in YAML format instead of specifying settings via the command line interface and comes with a set of model-specific defaults for good out-of-the-box performance:
432
+ ### Pretrain an LLM
433
+ Train an LLM from scratch on your own data via pretraining:
368
434
 
435
+ <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-pretrain">
436
+ <img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg"; alt="Open In Studio"/>
437
+ </a>
438
+
439
+ &nbsp;
369
440
 
370
441
  ```bash
371
- litgpt finetune lora \
372
- --config https://github.com/Lightning-AI/litgpt/blob/wip/config_hub/finetune/llama-2-7b/lora.yaml
442
+ mkdir -p custom_texts
443
+ curl https://www.gutenberg.org/cache/epub/24440/pg24440.txt --output custom_texts/book1.txt
444
+ curl https://www.gutenberg.org/cache/epub/26393/pg26393.txt --output custom_texts/book2.txt
445
+
446
+ # 1) Download a tokenizer
447
+ litgpt download \
448
+ --repo_id EleutherAI/pythia-160m \
449
+ --tokenizer_only True
450
+
451
+ # 2) Pretrain the model
452
+ litgpt pretrain \
453
+ --model_name pythia-160m \
454
+ --tokenizer_dir checkpoints/EleutherAI/pythia-160m \
455
+ --data TextFiles \
456
+ --data.train_data_path "custom_texts/" \
457
+ --train.max_tokens 10_000_000 \
458
+ --out_dir out/custom-model
459
+
460
+ # 3) Chat with the model
461
+ litgpt chat \
462
+ --checkpoint_dir out/custom-model/final
373
463
  ```
374
464
 
375
- For added convenience, you can also manually override config file setting via the CLI:
465
+ &nbsp;
466
+
467
+ ### Continue pretraining an LLM
468
+ This is another way of finetuning that specializes an already pretrained model by training on custom data:
376
469
 
377
470
 
471
+ <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-continue-pretraining">
472
+ <img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg"; alt="Open In Studio"/>
473
+ </a>
474
+
475
+ &nbsp;
476
+
378
477
  ```bash
379
- litgpt finetune lora \
380
- --config https://raw.githubusercontent.com/Lightning-AI/litgpt/main/config_hub/finetune/llama-2-7b/lora.yaml \
381
- --lora_r 4
478
+ mkdir -p custom_texts
479
+ curl https://www.gutenberg.org/cache/epub/24440/pg24440.txt --output custom_texts/book1.txt
480
+ curl https://www.gutenberg.org/cache/epub/26393/pg26393.txt --output custom_texts/book2.txt
481
+
482
+ # 1) Download a pretrained model
483
+ litgpt download --repo_id EleutherAI/pythia-160m
484
+
485
+ # 2) Continue pretraining the model
486
+ litgpt pretrain \
487
+ --model_name pythia-160m \
488
+ --tokenizer_dir checkpoints/EleutherAI/pythia-160m \
489
+ --initial_checkpoint_dir checkpoints/EleutherAI/pythia-160m \
490
+ --data TextFiles \
491
+ --data.train_data_path "custom_texts/" \
492
+ --train.max_tokens 10_000_000 \
493
+ --out_dir out/custom-model
494
+
495
+ # 3) Chat with the model
496
+ litgpt chat \
497
+ --checkpoint_dir out/custom-model/final
382
498
  ```
383
499
 
384
- You can browse the available configuration files [here](https://github.com/Lightning-AI/litgpt/tree/main/config_hub).
500
+ &nbsp;
501
+
502
+ ### Deploy an LLM
503
+ Once you're ready to deploy a finetuned LLM, run this command:
504
+
505
+ <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-serve">
506
+ <img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open In Studio"/>
507
+ </a>
385
508
 
386
509
  &nbsp;
387
510
 
388
- > [!TIP]
389
- > **Run large models on smaller consumer devices:**
390
- > We support 4-bit quantization (as in QLoRA), (bnb.nf4, bnb.nf4-dq, bnb.fp4, bnb.fp4-dq) and 8-bit quantization (bnb.int8) for inference by following [this guide](tutorials/quantize.md).
511
+ ```bash
512
+ # locate the checkpoint to your finetuned or pretrained model and call the `serve` command:
513
+ litgpt serve --checkpoint_dir path/to/your/checkpoint/microsoft/phi-2
514
+
515
+ # Alternative: if you haven't finetuned, download any checkpoint to deploy it:
516
+ litgpt download --repo_id microsoft/phi-2
517
+ litgpt serve --checkpoint_dir checkpoints/microsoft/phi-2
518
+ ```
519
+
520
+ Test the server in a separate terminal and integrate the model API into your AI product:
521
+ ```python
522
+ # 3) Use the server (in a separate session)
523
+ import requests, json
524
+ response = requests.post(
525
+ "http://127.0.0.1:8000/predict",
526
+ json={"prompt": "Fix typos in the following sentence: Exampel input"}
527
+ )
528
+ print(response.json()["output"])
529
+ ```
391
530
 
531
+ &nbsp;
532
+
533
+ > [!NOTE]
534
+ > **[Read the full docs](tutorials/0_to_litgpt.md)**.
392
535
 
393
536
  &nbsp;
394
- <br>
537
+
538
+ ----
539
+
540
+ # State-of-the-art features
541
+ ✅ &nbsp;State-of-the-art optimizations: Flash Attention v2, multi-GPU support via fully-sharded data parallelism, [optional CPU offloading](tutorials/oom.md#do-sharding-across-multiple-gpus), and [TPU and XLA support](extensions/xla).
542
+
543
+ ✅ &nbsp;[Pretrain](tutorials/pretrain.md), [finetune](tutorials/finetune.md), and [deploy](tutorials/inference.md)
544
+
545
+ ✅ &nbsp;Reduce compute requirements with low-precision settings: FP16, BF16, and FP16/FP32 mixed.
546
+
547
+ ✅ &nbsp;Lower memory requirements with [quantization](tutorials/quantize.md): 4-bit floats, 8-bit integers, and double quantization.
548
+
549
+ ✅ &nbsp;[Configuration files](config_hub) for great out-of-the-box performance.
550
+
551
+ ✅ &nbsp;Parameter-efficient finetuning: [LoRA](tutorials/finetune_lora.md), [QLoRA](tutorials/finetune_lora.md), [Adapter](tutorials/finetune_adapter.md), and [Adapter v2](tutorials/finetune_adapter.md).
552
+
553
+ ✅ &nbsp;[Exporting](tutorials/convert_lit_models.md) to other popular model weight formats.
554
+
555
+ ✅ &nbsp;Many popular datasets for [pretraining](tutorials/pretrain.md) and [finetuning](tutorials/prepare_dataset.md), and [support for custom datasets](tutorials/prepare_dataset.md#preparing-custom-datasets-for-instruction-finetuning).
556
+
557
+ ✅ &nbsp;Readable and easy-to-modify code to experiment with the latest research ideas.
558
+
395
559
  &nbsp;
396
560
 
397
- ## Customization
561
+ ---
562
+
563
+ # Training recipes
398
564
 
399
- LitGPT supports rich and customizable [config files](config_hub) to tailor the LLM training to your dataset and hardware needs. Shown below is a configuration file for LoRA finetuning:
565
+ LitGPT comes with validated recipes (YAML configs) to train models under different conditions. We've generated these recipes based on the parameters we found to perform the best for different training conditions.
566
+
567
+ Browse all training recipes [here](config_hub).
568
+
569
+ ### Example
570
+
571
+ ```bash
572
+ litgpt finetune \
573
+ --config https://raw.githubusercontent.com/Lightning-AI/litgpt/main/config_hub/finetune/llama-2-7b/lora.yaml
574
+ ```
575
+
576
+ ### What is a config
577
+ Configs let you customize training for all granular parameters like:
578
+
579
+ ```yaml
580
+ # The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
581
+ checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf
582
+
583
+ # Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/lora)
584
+ out_dir: out/finetune/qlora-llama2-7b
585
+
586
+ # The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
587
+ precision: bf16-true
588
+
589
+ ...
590
+ ```
591
+
592
+ <details>
593
+ <summary>Example: LoRA finetuning config</summary>
594
+
595
+ &nbsp;
400
596
 
401
597
  ```yaml
402
598
  # The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
@@ -522,25 +718,21 @@ logger_name: csv
522
718
  # The random seed to use for reproducibility. (type: int, default: 1337)
523
719
  seed: 1337
524
720
  ```
721
+ </details>
525
722
 
723
+ ### Override config params via CLI
724
+ Override any parameter in the CLI:
526
725
 
527
- &nbsp;
528
-
529
- ## LitGPT design principles
530
-
531
- This repository follows the main principle of **openness through clarity**.
532
-
533
- **LitGPT** is:
534
-
535
- - **Simple:** Single-file implementation without boilerplate.
536
- - **Correct:** Numerically equivalent to the original model.
537
- - **Optimized:** Runs fast on consumer hardware or at scale.
538
- - **Open-source:** No strings attached.
539
-
540
- Avoiding code duplication is **not** a goal. **Readability** and **hackability** are.
726
+ ```bash
727
+ litgpt finetune \
728
+ --config https://raw.githubusercontent.com/Lightning-AI/litgpt/main/config_hub/finetune/llama-2-7b/lora.yaml \
729
+ --lora_r 4
730
+ ```
541
731
 
542
732
  &nbsp;
543
733
 
734
+ # Community
735
+
544
736
  ## Get involved!
545
737
 
546
738
  We appreciate your feedback and contributions. If you have feature requests, questions, or want to contribute code or config files, please don't hesitate to use the [GitHub Issue](https://github.com/Lightning-AI/litgpt/issues) tracker.
@@ -559,27 +751,29 @@ If you have general questions about building with LitGPT, please [join our Disco
559
751
 
560
752
  ## Tutorials, how-to guides, and docs
561
753
 
754
+
755
+ > [!NOTE]
756
+ > We recommend starting with the **[Zero to LitGPT: Getting Started with Pretraining, Finetuning, and Using LLMs](tutorials/0_to_litgpt.md)** if you are looking to get started with using LitGPT.
757
+
758
+ Tutorials and in-depth feature documentation can be found below:
759
+
562
760
  - Finetuning, incl. LoRA, QLoRA, and Adapters ([tutorials/finetune.md](tutorials/finetune.md))
563
- - Pretraining ([tutorials/pretrain_tinyllama.md](tutorials/pretrain_tinyllama.md))
761
+ - Pretraining ([tutorials/pretrain.md](tutorials/pretrain.md))
564
762
  - Model evaluation ([tutorials/evaluation.md](tutorials/evaluation.md))
565
763
  - Supported and custom datasets ([tutorials/prepare_dataset.md](tutorials/prepare_dataset.md))
566
764
  - Quantization ([tutorials/quantize.md](tutorials/quantize.md))
567
765
  - Tips for dealing with out-of-memory (OOM) errors ([tutorials/oom.md](tutorials/oom.md))
568
766
 
569
-
570
-
571
-
572
-
573
767
  &nbsp;
574
768
 
575
769
  ## XLA
576
770
 
577
- Lightning AI has partnered with Google to add first-class support for [Cloud TPUs](https://cloud.google.com/tpu) in [Lightnings frameworks](https://github.com/Lightning-AI/lightning) and LitGPT,
771
+ Lightning AI has partnered with Google to add first-class support for [Cloud TPUs](https://cloud.google.com/tpu) in [Lightning's frameworks](https://github.com/Lightning-AI/lightning) and LitGPT,
578
772
  helping democratize AI for millions of developers and researchers worldwide.
579
773
 
580
774
  Using TPUs with Lightning is as straightforward as changing one line of code.
581
775
 
582
- We provide scripts fully optimized for TPUs in the [XLA directory](xla).
776
+ We provide scripts fully optimized for TPUs in the [XLA directory](extensions/xla).
583
777
 
584
778
 
585
779
 
@@ -601,7 +795,7 @@ This implementation extends on [Lit-LLaMA](https://github.com/lightning-AI/lit-l
601
795
 
602
796
  ## Community showcase
603
797
 
604
- Check out the projects below using and building on LitGPT. If you have a project you'd like to add to this section, please don't hestiate to open a pull request.
798
+ Check out the projects below that use and build on LitGPT. If you have a project you'd like to add to this section, please don't hesitate to open a pull request.
605
799
 
606
800
  &nbsp;
607
801
 
@@ -615,6 +809,17 @@ The LitGPT repository was the official starter kit for the [NeurIPS 2023 LLM Eff
615
809
 
616
810
  LitGPT powered the [TinyLlama project](https://github.com/jzhang38/TinyLlama) and [TinyLlama: An Open-Source Small Language Model](https://arxiv.org/abs/2401.02385) research paper.
617
811
 
812
+ &nbsp;
813
+
814
+ **🍪 MicroLlama: MicroLlama-300M**
815
+
816
+ [MicroLlama](https://github.com/keeeeenw/MicroLlama) is a 300M Llama model pretrained on 50B tokens powered by TinyLlama and LitGPT.
817
+
818
+ &nbsp;
819
+
820
+ **🔬 Pre-training Small Base LMs with Fewer Tokens**
821
+
822
+ The research paper ["Pre-training Small Base LMs with Fewer Tokens"](https://arxiv.org/abs/2404.08634), which utilizes LitGPT, develops smaller base language models by inheriting a few transformer blocks from larger models and training on a tiny fraction of the data used by the larger models. It demonstrates that these smaller models can perform comparably to larger models despite using significantly less training data and resources.
618
823
 
619
824
  &nbsp;
620
825
 
@@ -636,4 +841,3 @@ If you use LitGPT in your research, please cite the following work:
636
841
  ## License
637
842
 
638
843
  LitGPT is released under the [Apache 2.0](https://github.com/Lightning-AI/litgpt/blob/main/LICENSE) license.
639
-