langvision 0.0.1__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langvision might be problematic. Click here for more details.
- {langvision-0.0.1 → langvision-0.1.0}/MANIFEST.in +0 -1
- langvision-0.1.0/PKG-INFO +49 -0
- langvision-0.1.0/README.md +428 -0
- langvision-0.1.0/docs/_config.yml +11 -0
- langvision-0.1.0/requirements.txt +34 -0
- langvision-0.1.0/setup.py +70 -0
- langvision-0.1.0/src/langvision/__init__.py +82 -0
- langvision-0.1.0/src/langvision/callbacks/base.py +170 -0
- langvision-0.1.0/src/langvision/cli/__init__.py +85 -0
- langvision-0.1.0/src/langvision/cli/complete_cli.py +319 -0
- langvision-0.1.0/src/langvision/cli/config.py +344 -0
- langvision-0.1.0/src/langvision/cli/evaluate.py +201 -0
- langvision-0.1.0/src/langvision/cli/export.py +177 -0
- langvision-0.1.0/src/langvision/cli/finetune.py +298 -0
- langvision-0.1.0/src/langvision/cli/model_zoo.py +162 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/cli/train.py +27 -13
- langvision-0.1.0/src/langvision/cli/utils.py +258 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/components/attention.py +4 -1
- langvision-0.1.0/src/langvision/concepts/__init__.py +9 -0
- langvision-0.1.0/src/langvision/concepts/ccot.py +30 -0
- langvision-0.1.0/src/langvision/concepts/cot.py +29 -0
- langvision-0.1.0/src/langvision/concepts/dpo.py +37 -0
- langvision-0.1.0/src/langvision/concepts/grpo.py +25 -0
- langvision-0.1.0/src/langvision/concepts/lime.py +37 -0
- langvision-0.1.0/src/langvision/concepts/ppo.py +47 -0
- langvision-0.1.0/src/langvision/concepts/rlhf.py +40 -0
- langvision-0.1.0/src/langvision/concepts/rlvr.py +25 -0
- langvision-0.1.0/src/langvision/concepts/shap.py +37 -0
- langvision-0.1.0/src/langvision/data/enhanced_datasets.py +582 -0
- langvision-0.1.0/src/langvision/model_zoo.py +169 -0
- langvision-0.1.0/src/langvision/models/lora.py +202 -0
- langvision-0.1.0/src/langvision/models/multimodal.py +297 -0
- langvision-0.1.0/src/langvision/models/resnet.py +303 -0
- langvision-0.1.0/src/langvision/training/advanced_trainer.py +478 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/training/trainer.py +30 -2
- langvision-0.1.0/src/langvision/utils/config.py +186 -0
- langvision-0.1.0/src/langvision/utils/metrics.py +448 -0
- langvision-0.1.0/src/langvision/utils/setup.py +266 -0
- langvision-0.1.0/src/langvision.egg-info/PKG-INFO +49 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision.egg-info/SOURCES.txt +25 -2
- langvision-0.1.0/src/langvision.egg-info/entry_points.txt +2 -0
- langvision-0.1.0/src/langvision.egg-info/not-zip-safe +1 -0
- langvision-0.1.0/src/langvision.egg-info/requires.txt +31 -0
- langvision-0.1.0/tests/test_vision_transformer.py +198 -0
- langvision-0.0.1/LICENSE +0 -21
- langvision-0.0.1/PKG-INFO +0 -463
- langvision-0.0.1/README.md +0 -421
- langvision-0.0.1/pyproject.toml +0 -38
- langvision-0.0.1/requirements.txt +0 -5
- langvision-0.0.1/src/langvision/__init__.py +0 -7
- langvision-0.0.1/src/langvision/callbacks/base.py +0 -11
- langvision-0.0.1/src/langvision/cli/finetune.py +0 -181
- langvision-0.0.1/src/langvision/model_zoo.py +0 -2
- langvision-0.0.1/src/langvision/models/lora.py +0 -30
- langvision-0.0.1/src/langvision/training/__init__.py +0 -0
- langvision-0.0.1/src/langvision/utils/config.py +0 -15
- langvision-0.0.1/src/langvision.egg-info/PKG-INFO +0 -463
- langvision-0.0.1/src/langvision.egg-info/entry_points.txt +0 -2
- langvision-0.0.1/src/langvision.egg-info/requires.txt +0 -7
- langvision-0.0.1/tests/test_vision_transformer.py +0 -19
- {langvision-0.0.1 → langvision-0.1.0}/docs/index.md +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/setup.cfg +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/agents/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/callbacks/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/callbacks/early_stopping.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/callbacks/logging.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/callbacks/registry.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/components/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/components/mlp.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/components/patch_embedding.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/config/__init__.py +0 -0
- {langvision-0.0.1/src/langvision/cli → langvision-0.1.0/src/langvision/data}/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/data/datasets.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/example.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/filesystem/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/llm/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/memory/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/models/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/models/vision_transformer.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/sync/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/telemetry/__init__.py +0 -0
- {langvision-0.0.1/src/langvision/data → langvision-0.1.0/src/langvision/training}/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/utils/__init__.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/utils/cuda.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/utils/data.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision/utils/device.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision.egg-info/dependency_links.txt +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/src/langvision.egg-info/top_level.txt +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/tests/test_example.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/tests/test_lora.py +0 -0
- {langvision-0.0.1 → langvision-0.1.0}/tests/test_version.py +0 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: langvision
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Efficient LoRA Fine-Tuning for Vision LLMs with advanced CLI and model zoo
|
|
5
|
+
Home-page: https://github.com/langtrain-ai/langtrain
|
|
6
|
+
Author: Pritesh Raj
|
|
7
|
+
Author-email: priteshraj10@gmail.com
|
|
8
|
+
Keywords: vision,transformer,lora,fine-tuning,deep-learning,computer-vision
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Requires-Dist: torch>=1.10.0
|
|
23
|
+
Requires-Dist: torchvision>=0.11.0
|
|
24
|
+
Requires-Dist: numpy>=1.21.0
|
|
25
|
+
Requires-Dist: pillow>=8.3.0
|
|
26
|
+
Requires-Dist: tqdm>=4.62.0
|
|
27
|
+
Requires-Dist: pyyaml>=6.0
|
|
28
|
+
Requires-Dist: tensorboard>=2.9.0
|
|
29
|
+
Requires-Dist: wandb>=0.13.0
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
32
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-mock>=3.8.0; extra == "dev"
|
|
34
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
35
|
+
Requires-Dist: isort>=5.10.0; extra == "dev"
|
|
36
|
+
Requires-Dist: flake8>=5.0.0; extra == "dev"
|
|
37
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
38
|
+
Requires-Dist: pre-commit>=2.20.0; extra == "dev"
|
|
39
|
+
Requires-Dist: bandit>=1.7.0; extra == "dev"
|
|
40
|
+
Provides-Extra: docs
|
|
41
|
+
Requires-Dist: sphinx>=5.0.0; extra == "docs"
|
|
42
|
+
Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
|
|
43
|
+
Requires-Dist: myst-parser>=0.18.0; extra == "docs"
|
|
44
|
+
Requires-Dist: sphinx-autodoc-typehints>=1.19.0; extra == "docs"
|
|
45
|
+
Provides-Extra: examples
|
|
46
|
+
Requires-Dist: jupyter>=1.0.0; extra == "examples"
|
|
47
|
+
Requires-Dist: ipywidgets>=7.6.0; extra == "examples"
|
|
48
|
+
Requires-Dist: tensorboard>=2.9.0; extra == "examples"
|
|
49
|
+
Requires-Dist: wandb>=0.13.0; extra == "examples"
|
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
# Langvision: Efficient LoRA Fine-Tuning for Vision LLMs
|
|
2
|
+
|
|
3
|
+
<hr/>
|
|
4
|
+
<p align="center">
|
|
5
|
+
<picture>
|
|
6
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/langtrain-ai/langtrain/main/static/langvision-use-dark.png">
|
|
7
|
+
<img alt="Langvision Logo" src="https://raw.githubusercontent.com/langtrain-ai/langtrain/main/static/langvision-white.png" width="full" />
|
|
8
|
+
</picture>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
<!-- Badges -->
|
|
12
|
+
<p align="center">
|
|
13
|
+
<a href="https://pypi.org/project/langvision/"><img src="https://img.shields.io/pypi/v/langvision.svg" alt="PyPI version"></a>
|
|
14
|
+
<a href="https://pepy.tech/project/langvision"><img src="https://pepy.tech/badge/langvision" alt="Downloads"></a>
|
|
15
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License"></a>
|
|
16
|
+
<a href="https://img.shields.io/badge/coverage-90%25-brightgreen" alt="Coverage"> <img src="https://img.shields.io/badge/coverage-90%25-brightgreen"/></a>
|
|
17
|
+
<a href="https://img.shields.io/badge/python-3.8%2B-blue" alt="Python Version"> <img src="https://img.shields.io/badge/python-3.8%2B-blue"/></a>
|
|
18
|
+
<a href="https://github.com/psf/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code style: black"></a>
|
|
19
|
+
</p>
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 🚀 Installation
|
|
24
|
+
|
|
25
|
+
> **Requirements:**
|
|
26
|
+
> - Python 3.8 or newer
|
|
27
|
+
> - [PyTorch](https://pytorch.org/get-started/locally/) (CPU or GPU)
|
|
28
|
+
|
|
29
|
+
**Install the latest release from PyPI:**
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install langvision
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
[](https://pypi.org/project/langvision/)
|
|
36
|
+
|
|
37
|
+
**Optional installs:**
|
|
38
|
+
|
|
39
|
+
- For development:
|
|
40
|
+
```bash
|
|
41
|
+
pip install langvision[dev]
|
|
42
|
+
```
|
|
43
|
+
- For documentation:
|
|
44
|
+
```bash
|
|
45
|
+
pip install langvision[docs]
|
|
46
|
+
```
|
|
47
|
+
- For examples and notebooks:
|
|
48
|
+
```bash
|
|
49
|
+
pip install langvision[examples]
|
|
50
|
+
```
|
|
51
|
+
- For GPU (CUDA 11.3+):
|
|
52
|
+
```bash
|
|
53
|
+
pip install langvision[gpu]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Troubleshooting:**
|
|
57
|
+
- For GPU support, ensure you have the correct CUDA version and install PyTorch as per [official instructions](https://pytorch.org/get-started/locally/).
|
|
58
|
+
- If you encounter issues, see [Troubleshooting](docs/troubleshooting.md) or [open an issue](https://github.com/langtrain-ai/langtrain/issues).
|
|
59
|
+
|
|
60
|
+
---
|
|
61
|
+
|
|
62
|
+
## Quick Links
|
|
63
|
+
- [Documentation](docs/index.md)
|
|
64
|
+
- [Tutorials](docs/tutorials/index.md)
|
|
65
|
+
- [Changelog](CHANGELOG.md)
|
|
66
|
+
- [Contributing Guide](CONTRIBUTING.md)
|
|
67
|
+
- [Roadmap](ROADMAP.md)
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Table of Contents
|
|
72
|
+
- [Features](#features)
|
|
73
|
+
- [Showcase](#showcase)
|
|
74
|
+
- [Getting Started](#getting-started)
|
|
75
|
+
- [Supported Python Versions](#supported-python-versions)
|
|
76
|
+
- [Why langvision?](#why-langvision)
|
|
77
|
+
- [Architecture Overview](#architecture-overview)
|
|
78
|
+
- [Core Modules](#core-modules)
|
|
79
|
+
- [Performance & Efficiency](#performance--efficiency)
|
|
80
|
+
- [Advanced Configuration](#advanced-configuration)
|
|
81
|
+
- [Documentation & Resources](#documentation--resources)
|
|
82
|
+
- [Testing & Quality](#testing--quality)
|
|
83
|
+
- [Examples & Use Cases](#examples--use-cases)
|
|
84
|
+
- [Extending the Framework](#extending-the-framework)
|
|
85
|
+
- [Contributing](#contributing)
|
|
86
|
+
- [License](#license)
|
|
87
|
+
- [Citation](#citation)
|
|
88
|
+
- [Acknowledgements](#acknowledgements)
|
|
89
|
+
- [Advanced LLM Concepts](#advanced-llm-concepts)
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Features
|
|
94
|
+
- LoRA adapters for efficient fine-tuning
|
|
95
|
+
- Modular Vision Transformer (ViT) backbone
|
|
96
|
+
- Model zoo for vision models
|
|
97
|
+
- Configurable and extensible codebase
|
|
98
|
+
- Checkpointing and resume
|
|
99
|
+
- Mixed precision and distributed training
|
|
100
|
+
- Metrics and visualization tools
|
|
101
|
+
- CLI for training and evaluation
|
|
102
|
+
- Callback support (early stopping, logging, etc.)
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Showcase
|
|
107
|
+
|
|
108
|
+
Langvision is intended for building and fine-tuning vision models with LoRA. It can be used for image classification, visual question answering, and other computer vision tasks.
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Getting Started
|
|
113
|
+
|
|
114
|
+
Install:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pip install langvision
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### CLI Usage
|
|
121
|
+
|
|
122
|
+
After installation, you can use the comprehensive CLI:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# Show all available commands
|
|
126
|
+
langvision --help
|
|
127
|
+
|
|
128
|
+
# Training commands
|
|
129
|
+
langvision train --help
|
|
130
|
+
langvision finetune --help
|
|
131
|
+
|
|
132
|
+
# Model management
|
|
133
|
+
langvision evaluate --help
|
|
134
|
+
langvision export --help
|
|
135
|
+
langvision model-zoo --help
|
|
136
|
+
langvision config --help
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
**Quick Examples:**
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
# Train a model
|
|
143
|
+
langvision train --dataset cifar10 --epochs 5 --batch_size 32
|
|
144
|
+
|
|
145
|
+
# Fine-tune with advanced features
|
|
146
|
+
langvision finetune --dataset cifar100 --epochs 10 --lora_r 8 --rlhf
|
|
147
|
+
|
|
148
|
+
# Evaluate a trained model
|
|
149
|
+
langvision evaluate --checkpoint model.pth --dataset cifar10
|
|
150
|
+
|
|
151
|
+
# Export to ONNX
|
|
152
|
+
langvision export --checkpoint model.pth --format onnx --output model.onnx
|
|
153
|
+
|
|
154
|
+
# Browse model zoo
|
|
155
|
+
langvision model-zoo list
|
|
156
|
+
langvision model-zoo download vit_base_patch16_224
|
|
157
|
+
|
|
158
|
+
# Create configuration
|
|
159
|
+
langvision config create --template advanced --output my_config.yaml
|
|
160
|
+
|
|
161
|
+
# Check version
|
|
162
|
+
langvision --version
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Example usage:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
import torch
|
|
169
|
+
from langvision.models.vision_transformer import VisionTransformer
|
|
170
|
+
from langvision.utils.config import default_config
|
|
171
|
+
|
|
172
|
+
x = torch.randn(2, 3, 224, 224)
|
|
173
|
+
model = VisionTransformer(
|
|
174
|
+
img_size=default_config['img_size'],
|
|
175
|
+
patch_size=default_config['patch_size'],
|
|
176
|
+
in_chans=default_config['in_chans'],
|
|
177
|
+
num_classes=default_config['num_classes'],
|
|
178
|
+
embed_dim=default_config['embed_dim'],
|
|
179
|
+
depth=default_config['depth'],
|
|
180
|
+
num_heads=default_config['num_heads'],
|
|
181
|
+
mlp_ratio=default_config['mlp_ratio'],
|
|
182
|
+
lora_config=default_config['lora'],
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
with torch.no_grad():
|
|
186
|
+
out = model(x)
|
|
187
|
+
print('Output shape:', out.shape)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
See the [Documentation](docs/index.md) and [src/langvision/cli/finetune.py](src/langvision/cli/finetune.py) for more details.
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Supported Python Versions
|
|
195
|
+
- Python 3.8 or newer
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Why langvision?
|
|
200
|
+
|
|
201
|
+
- Fine-tuning with LoRA adapters
|
|
202
|
+
- Modular ViT backbone design
|
|
203
|
+
- Unified interface for vision models
|
|
204
|
+
- Suitable for research and production
|
|
205
|
+
- Efficient memory usage
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Architecture Overview
|
|
210
|
+
|
|
211
|
+
Langvision uses a Vision Transformer backbone with LoRA adapters in attention and MLP layers. This enables adaptation of pre-trained models with fewer trainable parameters.
|
|
212
|
+
|
|
213
|
+
### Model Data Flow
|
|
214
|
+
|
|
215
|
+
```mermaid
|
|
216
|
+
---
|
|
217
|
+
config:
|
|
218
|
+
layout: dagre
|
|
219
|
+
---
|
|
220
|
+
flowchart TD
|
|
221
|
+
subgraph LoRA_Adapters["LoRA Adapters in Attention and MLP"]
|
|
222
|
+
LA1(["LoRA Adapter 1"])
|
|
223
|
+
LA2(["LoRA Adapter 2"])
|
|
224
|
+
LA3(["LoRA Adapter N"])
|
|
225
|
+
end
|
|
226
|
+
A(["Input Image"]) --> B(["Patch Embedding"])
|
|
227
|
+
B --> C(["CLS Token & Positional Encoding"])
|
|
228
|
+
C --> D1(["Encoder Layer 1"])
|
|
229
|
+
D1 --> D2(["Encoder Layer 2"])
|
|
230
|
+
D2 --> D3(["Encoder Layer N"])
|
|
231
|
+
D3 --> E(["LayerNorm"])
|
|
232
|
+
E --> F(["MLP Head"])
|
|
233
|
+
F --> G(["Output Class Logits"])
|
|
234
|
+
LA1 -.-> D1
|
|
235
|
+
LA2 -.-> D2
|
|
236
|
+
LA3 -.-> D3
|
|
237
|
+
LA1:::loraStyle
|
|
238
|
+
LA2:::loraStyle
|
|
239
|
+
LA3:::loraStyle
|
|
240
|
+
classDef loraStyle fill:#e1f5fe,stroke:#0277bd,stroke-width:2px
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## Core Modules
|
|
246
|
+
|
|
247
|
+
| Module | Description | Key Features |
|
|
248
|
+
|--------|-------------|--------------|
|
|
249
|
+
| PatchEmbedding | Image-to-patch conversion and embedding | Configurable patch sizes, position embeddings |
|
|
250
|
+
| TransformerEncoder | Multi-layer transformer backbone | Self-attention, LoRA integration, checkpointing |
|
|
251
|
+
| LoRALinear | Low-rank adaptation layers | Configurable rank, memory-efficient updates |
|
|
252
|
+
| MLPHead | Output projection layer | Classification, regression, dropout |
|
|
253
|
+
| Config System | Centralized configuration | YAML/JSON config, CLI overrides |
|
|
254
|
+
| Data Utils | Preprocessing and augmentation | Built-in transforms, custom loaders |
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## Performance & Efficiency
|
|
259
|
+
|
|
260
|
+
| Metric | Full Fine-tuning | LoRA Fine-tuning | Improvement |
|
|
261
|
+
|--------|------------------|------------------|-------------|
|
|
262
|
+
| Trainable Parameters | 86M | 2.4M | 97% reduction |
|
|
263
|
+
| Memory Usage | 12GB | 4GB | 67% reduction |
|
|
264
|
+
| Training Time | 4h | 1.5h | 62% faster |
|
|
265
|
+
| Storage per Task | 344MB | 9.6MB | 97% smaller |
|
|
266
|
+
|
|
267
|
+
*Benchmarks: ViT-Base, CIFAR-100, RTX 3090*
|
|
268
|
+
|
|
269
|
+
Supported model sizes: ViT-Tiny, ViT-Small, ViT-Base, ViT-Large
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Advanced Configuration
|
|
274
|
+
|
|
275
|
+
Example LoRA config:
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
lora_config = {
|
|
279
|
+
"rank": 16,
|
|
280
|
+
"alpha": 32,
|
|
281
|
+
"dropout": 0.1,
|
|
282
|
+
"target_modules": ["attention.qkv", "attention.proj", "mlp.fc1", "mlp.fc2"],
|
|
283
|
+
"merge_weights": False
|
|
284
|
+
}
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
Example training config:
|
|
288
|
+
|
|
289
|
+
```yaml
|
|
290
|
+
model:
|
|
291
|
+
name: "vit_base"
|
|
292
|
+
img_size: 224
|
|
293
|
+
patch_size: 16
|
|
294
|
+
num_classes: 1000
|
|
295
|
+
training:
|
|
296
|
+
epochs: 10
|
|
297
|
+
batch_size: 32
|
|
298
|
+
learning_rate: 1e-4
|
|
299
|
+
weight_decay: 0.01
|
|
300
|
+
warmup_steps: 1000
|
|
301
|
+
lora:
|
|
302
|
+
rank: 16
|
|
303
|
+
alpha: 32
|
|
304
|
+
dropout: 0.1
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## Documentation & Resources
|
|
310
|
+
- [API Reference](docs/index.md)
|
|
311
|
+
- [Tutorials and Examples](docs/tutorials/index.md)
|
|
312
|
+
- [Research Papers](#research-papers)
|
|
313
|
+
- [Best Practices Guide](docs/best_practices.md)
|
|
314
|
+
- [Troubleshooting](docs/troubleshooting.md)
|
|
315
|
+
|
|
316
|
+
### Research Papers
|
|
317
|
+
- [LoRA: Low-Rank Adaptation of Large Language Models](https://arxiv.org/abs/2106.09685)
|
|
318
|
+
- [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929)
|
|
319
|
+
- [Vision Transformer for Fine-Grained Image Classification](https://arxiv.org/abs/2103.07579)
|
|
320
|
+
|
|
321
|
+
---
|
|
322
|
+
|
|
323
|
+
## Testing & Quality
|
|
324
|
+
|
|
325
|
+
Run tests:
|
|
326
|
+
|
|
327
|
+
```bash
|
|
328
|
+
pytest tests/
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
Code quality tools:
|
|
332
|
+
|
|
333
|
+
```bash
|
|
334
|
+
flake8 src/
|
|
335
|
+
black src/ --check
|
|
336
|
+
mypy src/
|
|
337
|
+
bandit -r src/
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## Examples & Use Cases
|
|
343
|
+
|
|
344
|
+
Image classification:
|
|
345
|
+
|
|
346
|
+
```python
|
|
347
|
+
from langvision import VisionTransformer
|
|
348
|
+
from langvision.datasets import CIFAR10Dataset
|
|
349
|
+
|
|
350
|
+
model = VisionTransformer.from_pretrained("vit_base_patch16_224")
|
|
351
|
+
dataset = CIFAR10Dataset(train=True, transform=model.default_transform)
|
|
352
|
+
model.finetune(dataset, epochs=10, lora_rank=16)
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
Custom dataset:
|
|
356
|
+
|
|
357
|
+
```python
|
|
358
|
+
from langvision.datasets import ImageFolderDataset
|
|
359
|
+
|
|
360
|
+
dataset = ImageFolderDataset(
|
|
361
|
+
root="/path/to/dataset",
|
|
362
|
+
split="train",
|
|
363
|
+
transform=model.default_transform
|
|
364
|
+
)
|
|
365
|
+
model.finetune(dataset, config_path="configs/custom_config.yaml")
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
## Extending the Framework
|
|
371
|
+
- Add datasets in `src/langvision/data/datasets.py`
|
|
372
|
+
- Add callbacks in `src/langvision/callbacks/`
|
|
373
|
+
- Add models in `src/langvision/models/`
|
|
374
|
+
- Add CLI tools in `src/langvision/cli/`
|
|
375
|
+
|
|
376
|
+
## Documentation
|
|
377
|
+
- See code comments and docstrings for details.
|
|
378
|
+
- For advanced usage, see `src/langvision/cli/finetune.py`.
|
|
379
|
+
|
|
380
|
+
## Contributing
|
|
381
|
+
Contributions are welcome. See the [Contributing Guide](CONTRIBUTING.md) for details.
|
|
382
|
+
|
|
383
|
+
## License
|
|
384
|
+
|
|
385
|
+
This project is licensed under the MIT License. See [LICENSE](LICENSE) for details.
|
|
386
|
+
|
|
387
|
+
## Citation
|
|
388
|
+
|
|
389
|
+
If you use langvision in your research, please cite:
|
|
390
|
+
|
|
391
|
+
```bibtex
|
|
392
|
+
@software{langtrain2025,
|
|
393
|
+
author = {Pritesh Raj},
|
|
394
|
+
title = {langtrain: Vision LLMs with Efficient LoRA Fine-Tuning},
|
|
395
|
+
url = {https://github.com/langtrain-ai/langvision},
|
|
396
|
+
year = {2025},
|
|
397
|
+
version = {1.0.0}
|
|
398
|
+
}
|
|
399
|
+
```
|
|
400
|
+
|
|
401
|
+
## Acknowledgements
|
|
402
|
+
|
|
403
|
+
We thank the following projects and communities:
|
|
404
|
+
- [PyTorch](https://pytorch.org/)
|
|
405
|
+
- [HuggingFace](https://huggingface.co/)
|
|
406
|
+
- [timm](https://github.com/rwightman/pytorch-image-models)
|
|
407
|
+
- [PEFT](https://github.com/huggingface/peft)
|
|
408
|
+
|
|
409
|
+
<p align="center">
|
|
410
|
+
<b>Made in India 🇮🇳 with ❤️ by the langtrain team</b><br/>
|
|
411
|
+
<i>Star ⭐ this repo if you find it useful!</i>
|
|
412
|
+
</p>
|
|
413
|
+
|
|
414
|
+
## Advanced LLM Concepts
|
|
415
|
+
|
|
416
|
+
This library is designed to be extensible with advanced concepts in large language models (LLMs) and explainable AI, including:
|
|
417
|
+
|
|
418
|
+
- **RLHF**: Reinforcement Learning from Human Feedback
|
|
419
|
+
- **CoT**: Chain-of-Thought
|
|
420
|
+
- **CCoT**: Contrastive Chain-of-Thought
|
|
421
|
+
- **GRPO**: Generalized Reinforcement Policy Optimization
|
|
422
|
+
- **RLVR**: Reinforcement Learning with Value Ranking
|
|
423
|
+
- **DPO**: Direct Preference Optimization
|
|
424
|
+
- **PPO**: Proximal Policy Optimization
|
|
425
|
+
- **LIME**: Local Interpretable Model-agnostic Explanations
|
|
426
|
+
- **SHAP**: SHapley Additive exPlanations
|
|
427
|
+
|
|
428
|
+
See the [`src/langvision/concepts/`](src/langvision/concepts/) directory for modularized explanations and stubs for each concept. These serve as a foundation for future development and integration into the library's training and evaluation workflows.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
remote_theme: pages-themes/cayman@v0.2.0
|
|
2
|
+
plugins:
|
|
3
|
+
- jekyll-remote-theme
|
|
4
|
+
|
|
5
|
+
title: Langvision
|
|
6
|
+
description: Efficient Vision LLMs with LoRA Fine-Tuning
|
|
7
|
+
show_downloads: true
|
|
8
|
+
github:
|
|
9
|
+
repository_url: https://github.com/langtrain-ai/langvision
|
|
10
|
+
zip_url: https://github.com/langtrain-ai/langvision/zipball/main
|
|
11
|
+
tar_url: https://github.com/langtrain-ai/langvision/tarball/main
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Core dependencies
|
|
2
|
+
torch>=1.10.0
|
|
3
|
+
torchvision>=0.11.0
|
|
4
|
+
numpy>=1.21.0
|
|
5
|
+
tqdm>=4.62.0
|
|
6
|
+
pyyaml>=6.0
|
|
7
|
+
scipy>=1.7.0
|
|
8
|
+
matplotlib>=3.5.0
|
|
9
|
+
pillow>=8.3.0
|
|
10
|
+
timm>=0.6.0
|
|
11
|
+
transformers>=4.20.0
|
|
12
|
+
|
|
13
|
+
# Development dependencies (install with: pip install -r requirements-dev.txt)
|
|
14
|
+
# pytest>=7.0.0
|
|
15
|
+
# pytest-cov>=4.0.0
|
|
16
|
+
# pytest-mock>=3.8.0
|
|
17
|
+
# black>=22.0.0
|
|
18
|
+
# isort>=5.10.0
|
|
19
|
+
# flake8>=5.0.0
|
|
20
|
+
# mypy>=1.0.0
|
|
21
|
+
# pre-commit>=2.20.0
|
|
22
|
+
# bandit>=1.7.0
|
|
23
|
+
|
|
24
|
+
# Documentation dependencies (install with: pip install -r requirements-docs.txt)
|
|
25
|
+
# sphinx>=5.0.0
|
|
26
|
+
# sphinx-rtd-theme>=1.0.0
|
|
27
|
+
# myst-parser>=0.18.0
|
|
28
|
+
# sphinx-autodoc-typehints>=1.19.0
|
|
29
|
+
|
|
30
|
+
# Example dependencies (install with: pip install -r requirements-examples.txt)
|
|
31
|
+
# jupyter>=1.0.0
|
|
32
|
+
# ipywidgets>=7.6.0
|
|
33
|
+
# tensorboard>=2.9.0
|
|
34
|
+
# wandb>=0.13.0
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="langvision",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
description="Efficient LoRA Fine-Tuning for Vision LLMs with advanced CLI and model zoo",
|
|
7
|
+
author="Pritesh Raj",
|
|
8
|
+
author_email="priteshraj10@gmail.com",
|
|
9
|
+
url="https://github.com/langtrain-ai/langtrain",
|
|
10
|
+
packages=find_packages(where="src"),
|
|
11
|
+
package_dir={"": "src"},
|
|
12
|
+
python_requires=">=3.8",
|
|
13
|
+
install_requires=[
|
|
14
|
+
"torch>=1.10.0",
|
|
15
|
+
"torchvision>=0.11.0",
|
|
16
|
+
"numpy>=1.21.0",
|
|
17
|
+
"pillow>=8.3.0",
|
|
18
|
+
"tqdm>=4.62.0",
|
|
19
|
+
"pyyaml>=6.0",
|
|
20
|
+
"tensorboard>=2.9.0",
|
|
21
|
+
"wandb>=0.13.0",
|
|
22
|
+
],
|
|
23
|
+
extras_require={
|
|
24
|
+
"dev": [
|
|
25
|
+
"pytest>=7.0.0",
|
|
26
|
+
"pytest-cov>=4.0.0",
|
|
27
|
+
"pytest-mock>=3.8.0",
|
|
28
|
+
"black>=22.0.0",
|
|
29
|
+
"isort>=5.10.0",
|
|
30
|
+
"flake8>=5.0.0",
|
|
31
|
+
"mypy>=1.0.0",
|
|
32
|
+
"pre-commit>=2.20.0",
|
|
33
|
+
"bandit>=1.7.0",
|
|
34
|
+
],
|
|
35
|
+
"docs": [
|
|
36
|
+
"sphinx>=5.0.0",
|
|
37
|
+
"sphinx-rtd-theme>=1.0.0",
|
|
38
|
+
"myst-parser>=0.18.0",
|
|
39
|
+
"sphinx-autodoc-typehints>=1.19.0",
|
|
40
|
+
],
|
|
41
|
+
"examples": [
|
|
42
|
+
"jupyter>=1.0.0",
|
|
43
|
+
"ipywidgets>=7.6.0",
|
|
44
|
+
"tensorboard>=2.9.0",
|
|
45
|
+
"wandb>=0.13.0",
|
|
46
|
+
],
|
|
47
|
+
},
|
|
48
|
+
entry_points={
|
|
49
|
+
"console_scripts": [
|
|
50
|
+
"langvision=langvision.cli:main",
|
|
51
|
+
],
|
|
52
|
+
},
|
|
53
|
+
classifiers=[
|
|
54
|
+
"Development Status :: 4 - Beta",
|
|
55
|
+
"Intended Audience :: Developers",
|
|
56
|
+
"Intended Audience :: Science/Research",
|
|
57
|
+
"Operating System :: OS Independent",
|
|
58
|
+
"Programming Language :: Python :: 3",
|
|
59
|
+
"Programming Language :: Python :: 3.8",
|
|
60
|
+
"Programming Language :: Python :: 3.9",
|
|
61
|
+
"Programming Language :: Python :: 3.10",
|
|
62
|
+
"Programming Language :: Python :: 3.11",
|
|
63
|
+
"Programming Language :: Python :: 3.12",
|
|
64
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
65
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
66
|
+
],
|
|
67
|
+
keywords=["vision", "transformer", "lora", "fine-tuning", "deep-learning", "computer-vision"],
|
|
68
|
+
include_package_data=True,
|
|
69
|
+
zip_safe=False,
|
|
70
|
+
)
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
langvision - Modular Vision LLMs with Efficient LoRA Fine-Tuning
|
|
3
|
+
|
|
4
|
+
A research-friendly framework for building and fine-tuning Vision Large Language Models
|
|
5
|
+
with efficient Low-Rank Adaptation (LoRA) support.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
__author__ = "Pritesh Raj"
|
|
10
|
+
__email__ = "priteshraj10@gmail.com"
|
|
11
|
+
|
|
12
|
+
# Core imports for easy access
|
|
13
|
+
from .models.vision_transformer import VisionTransformer
|
|
14
|
+
from .models.lora import LoRALinear, LoRAConfig, AdaLoRALinear, QLoRALinear
|
|
15
|
+
from .models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152
|
|
16
|
+
from .models.multimodal import VisionLanguageModel, create_multimodal_model, CLIPLoss
|
|
17
|
+
from .utils.config import default_config
|
|
18
|
+
from .training.trainer import Trainer
|
|
19
|
+
from .training.advanced_trainer import AdvancedTrainer, TrainingConfig
|
|
20
|
+
from .data.datasets import get_dataset
|
|
21
|
+
from .data.enhanced_datasets import (
|
|
22
|
+
EnhancedImageDataset, MultimodalDataset, DatasetConfig,
|
|
23
|
+
create_enhanced_dataloaders, SmartAugmentation
|
|
24
|
+
)
|
|
25
|
+
from .utils.metrics import (
|
|
26
|
+
MetricsTracker, ClassificationMetrics, ContrastiveMetrics,
|
|
27
|
+
EvaluationSuite, PerformanceMetrics
|
|
28
|
+
)
|
|
29
|
+
from .callbacks.base import Callback, CallbackManager
|
|
30
|
+
from .concepts import RLHF, CoT, CCoT, GRPO, RLVR, DPO, PPO, LIME, SHAP
|
|
31
|
+
|
|
32
|
+
# Version info
|
|
33
|
+
__all__ = [
|
|
34
|
+
"__version__",
|
|
35
|
+
"__author__",
|
|
36
|
+
"__email__",
|
|
37
|
+
# Core Models
|
|
38
|
+
"VisionTransformer",
|
|
39
|
+
"resnet18", "resnet34", "resnet50", "resnet101", "resnet152",
|
|
40
|
+
"VisionLanguageModel", "create_multimodal_model",
|
|
41
|
+
# LoRA Components
|
|
42
|
+
"LoRALinear", "LoRAConfig", "AdaLoRALinear", "QLoRALinear",
|
|
43
|
+
# Training
|
|
44
|
+
"Trainer", "AdvancedTrainer", "TrainingConfig",
|
|
45
|
+
# Data
|
|
46
|
+
"get_dataset", "EnhancedImageDataset", "MultimodalDataset",
|
|
47
|
+
"DatasetConfig", "create_enhanced_dataloaders", "SmartAugmentation",
|
|
48
|
+
# Utilities
|
|
49
|
+
"default_config", "MetricsTracker", "ClassificationMetrics",
|
|
50
|
+
"ContrastiveMetrics", "EvaluationSuite", "PerformanceMetrics",
|
|
51
|
+
# Callbacks
|
|
52
|
+
"Callback", "CallbackManager",
|
|
53
|
+
# Loss Functions
|
|
54
|
+
"CLIPLoss",
|
|
55
|
+
# Concepts
|
|
56
|
+
"RLHF", "CoT", "CCoT", "GRPO", "RLVR", "DPO", "PPO", "LIME", "SHAP",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
# Optional imports for advanced usage
|
|
60
|
+
try:
|
|
61
|
+
from .callbacks import EarlyStoppingCallback, LoggingCallback
|
|
62
|
+
from .utils.device import get_device, to_device
|
|
63
|
+
__all__.extend([
|
|
64
|
+
"EarlyStoppingCallback",
|
|
65
|
+
"LoggingCallback",
|
|
66
|
+
"get_device",
|
|
67
|
+
"to_device"
|
|
68
|
+
])
|
|
69
|
+
except ImportError:
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
# Package metadata
|
|
73
|
+
PACKAGE_METADATA = {
|
|
74
|
+
"name": "langvision",
|
|
75
|
+
"version": __version__,
|
|
76
|
+
"description": "Modular Vision LLMs with Efficient LoRA Fine-Tuning",
|
|
77
|
+
"author": __author__,
|
|
78
|
+
"email": __email__,
|
|
79
|
+
"url": "https://github.com/langtrain-ai/langtrain",
|
|
80
|
+
"license": "MIT",
|
|
81
|
+
"python_requires": ">=3.8",
|
|
82
|
+
}
|