langvision 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langvision might be problematic. Click here for more details.
- langvision/__init__.py +7 -0
- langvision/agents/__init__.py +8 -0
- langvision/callbacks/__init__.py +0 -0
- langvision/callbacks/base.py +11 -0
- langvision/callbacks/early_stopping.py +16 -0
- langvision/callbacks/logging.py +17 -0
- langvision/callbacks/registry.py +10 -0
- langvision/cli/__init__.py +0 -0
- langvision/cli/finetune.py +181 -0
- langvision/cli/train.py +101 -0
- langvision/components/__init__.py +1 -0
- langvision/components/attention.py +58 -0
- langvision/components/mlp.py +10 -0
- langvision/components/patch_embedding.py +15 -0
- langvision/config/__init__.py +14 -0
- langvision/data/__init__.py +0 -0
- langvision/data/datasets.py +21 -0
- langvision/example.py +5 -0
- langvision/filesystem/__init__.py +15 -0
- langvision/llm/__init__.py +5 -0
- langvision/memory/__init__.py +21 -0
- langvision/model_zoo.py +2 -0
- langvision/models/__init__.py +1 -0
- langvision/models/lora.py +30 -0
- langvision/models/vision_transformer.py +28 -0
- langvision/sync/__init__.py +16 -0
- langvision/telemetry/__init__.py +9 -0
- langvision/training/__init__.py +0 -0
- langvision/training/trainer.py +100 -0
- langvision/utils/__init__.py +25 -0
- langvision/utils/config.py +15 -0
- langvision/utils/cuda.py +26 -0
- langvision/utils/data.py +8 -0
- langvision/utils/device.py +20 -0
- langvision-0.0.1.dist-info/METADATA +463 -0
- langvision-0.0.1.dist-info/RECORD +40 -0
- langvision-0.0.1.dist-info/WHEEL +5 -0
- langvision-0.0.1.dist-info/entry_points.txt +2 -0
- langvision-0.0.1.dist-info/licenses/LICENSE +21 -0
- langvision-0.0.1.dist-info/top_level.txt +1 -0
langvision/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
class Callback:
|
|
2
|
+
def on_train_begin(self, trainer):
|
|
3
|
+
pass
|
|
4
|
+
def on_epoch_begin(self, trainer, epoch):
|
|
5
|
+
pass
|
|
6
|
+
def on_batch_end(self, trainer, batch, logs=None):
|
|
7
|
+
pass
|
|
8
|
+
def on_epoch_end(self, trainer, epoch, logs=None):
|
|
9
|
+
pass
|
|
10
|
+
def on_train_end(self, trainer):
|
|
11
|
+
pass
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class EarlyStopping:
|
|
2
|
+
def __init__(self, patience=5, min_delta=0.0):
|
|
3
|
+
self.patience = patience
|
|
4
|
+
self.min_delta = min_delta
|
|
5
|
+
self.counter = 0
|
|
6
|
+
self.best_score = None
|
|
7
|
+
self.early_stop = False
|
|
8
|
+
|
|
9
|
+
def __call__(self, val_loss):
|
|
10
|
+
if self.best_score is None or val_loss < self.best_score - self.min_delta:
|
|
11
|
+
self.best_score = val_loss
|
|
12
|
+
self.counter = 0
|
|
13
|
+
else:
|
|
14
|
+
self.counter += 1
|
|
15
|
+
if self.counter >= self.patience:
|
|
16
|
+
self.early_stop = True
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
class LoggingCallback:
|
|
2
|
+
def __init__(self, log_file=None):
|
|
3
|
+
self.log_file = log_file
|
|
4
|
+
if log_file:
|
|
5
|
+
self.f = open(log_file, 'a')
|
|
6
|
+
else:
|
|
7
|
+
self.f = None
|
|
8
|
+
|
|
9
|
+
def log(self, message):
|
|
10
|
+
print(message)
|
|
11
|
+
if self.f:
|
|
12
|
+
self.f.write(message + '\n')
|
|
13
|
+
self.f.flush()
|
|
14
|
+
|
|
15
|
+
def close(self):
|
|
16
|
+
if self.f:
|
|
17
|
+
self.f.close()
|
|
File without changes
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import torch
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import random
|
|
6
|
+
import numpy as np
|
|
7
|
+
from langvision.models.vision_transformer import VisionTransformer
|
|
8
|
+
from langvision.data.datasets import get_dataset
|
|
9
|
+
from langvision.training.trainer import Trainer
|
|
10
|
+
from langvision.callbacks.early_stopping import EarlyStopping
|
|
11
|
+
from langvision.utils.device import get_device
|
|
12
|
+
from langvision.utils.cuda import setup_cuda
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def set_seed(seed: int) -> None:
|
|
16
|
+
"""Set random seed for reproducibility."""
|
|
17
|
+
torch.manual_seed(seed)
|
|
18
|
+
random.seed(seed)
|
|
19
|
+
np.random.seed(seed)
|
|
20
|
+
if torch.cuda.is_available():
|
|
21
|
+
torch.cuda.manual_seed_all(seed)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def parse_args() -> argparse.Namespace:
|
|
25
|
+
"""Parse command-line arguments for fine-tuning."""
|
|
26
|
+
parser = argparse.ArgumentParser(description='Fine-tune VisionTransformer with LoRA')
|
|
27
|
+
# Data
|
|
28
|
+
parser.add_argument('--dataset', type=str, default='cifar10', choices=['cifar10', 'cifar100'], help='Dataset to use')
|
|
29
|
+
parser.add_argument('--data_dir', type=str, default='./data', help='Dataset directory')
|
|
30
|
+
parser.add_argument('--num_classes', type=int, default=10, help='Number of classes')
|
|
31
|
+
parser.add_argument('--img_size', type=int, default=224, help='Input image size')
|
|
32
|
+
parser.add_argument('--patch_size', type=int, default=16, help='Patch size for ViT')
|
|
33
|
+
parser.add_argument('--num_workers', type=int, default=2, help='Number of data loader workers')
|
|
34
|
+
# Model
|
|
35
|
+
parser.add_argument('--embed_dim', type=int, default=768, help='Embedding dimension')
|
|
36
|
+
parser.add_argument('--depth', type=int, default=12, help='Number of transformer layers')
|
|
37
|
+
parser.add_argument('--num_heads', type=int, default=12, help='Number of attention heads')
|
|
38
|
+
parser.add_argument('--mlp_ratio', type=float, default=4.0, help='MLP ratio')
|
|
39
|
+
parser.add_argument('--lora_r', type=int, default=4, help='LoRA rank')
|
|
40
|
+
parser.add_argument('--lora_alpha', type=float, default=1.0, help='LoRA alpha')
|
|
41
|
+
parser.add_argument('--lora_dropout', type=float, default=0.1, help='LoRA dropout')
|
|
42
|
+
# Training
|
|
43
|
+
parser.add_argument('--batch_size', type=int, default=64, help='Batch size for training')
|
|
44
|
+
parser.add_argument('--epochs', type=int, default=10, help='Number of epochs')
|
|
45
|
+
parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate')
|
|
46
|
+
parser.add_argument('--optimizer', type=str, default='adam', choices=['adam', 'adamw', 'sgd'], help='Optimizer')
|
|
47
|
+
parser.add_argument('--weight_decay', type=float, default=0.01, help='Weight decay (L2 regularization)')
|
|
48
|
+
parser.add_argument('--scheduler', type=str, default='cosine', choices=['cosine', 'step'], help='LR scheduler')
|
|
49
|
+
parser.add_argument('--step_size', type=int, default=5, help='StepLR step size')
|
|
50
|
+
parser.add_argument('--gamma', type=float, default=0.5, help='StepLR gamma')
|
|
51
|
+
parser.add_argument('--resume', type=str, default=None, help='Path to checkpoint to resume from')
|
|
52
|
+
parser.add_argument('--eval_only', action='store_true', help='Only run evaluation')
|
|
53
|
+
# Output
|
|
54
|
+
parser.add_argument('--output_dir', type=str, default='outputs', help='Directory to save outputs and checkpoints')
|
|
55
|
+
parser.add_argument('--save_name', type=str, default='vit_lora_best.pth', help='Checkpoint file name')
|
|
56
|
+
# Callbacks
|
|
57
|
+
parser.add_argument('--early_stopping', action='store_true', help='Enable early stopping')
|
|
58
|
+
parser.add_argument('--patience', type=int, default=5, help='Early stopping patience')
|
|
59
|
+
# CUDA
|
|
60
|
+
parser.add_argument('--device', type=str, default='cuda' if torch.cuda.is_available() else 'cpu', help='Device to use')
|
|
61
|
+
parser.add_argument('--cuda_deterministic', action='store_true', help='Enable deterministic CUDA (reproducible, slower)')
|
|
62
|
+
parser.add_argument('--cuda_benchmark', action='store_true', default=True, help='Enable cudnn.benchmark for fast training (default: True)')
|
|
63
|
+
parser.add_argument('--cuda_max_split_size_mb', type=int, default=None, help='Set CUDA max split size in MB (for large models, PyTorch >=1.10)')
|
|
64
|
+
# Misc
|
|
65
|
+
parser.add_argument('--seed', type=int, default=42, help='Random seed for reproducibility')
|
|
66
|
+
parser.add_argument('--log_level', type=str, default='info', help='Logging level (debug, info, warning, error)')
|
|
67
|
+
return parser.parse_args()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def setup_logging(log_level: str) -> None:
|
|
71
|
+
"""Set up logging with the specified log level."""
|
|
72
|
+
numeric_level = getattr(logging, log_level.upper(), None)
|
|
73
|
+
if not isinstance(numeric_level, int):
|
|
74
|
+
numeric_level = logging.INFO
|
|
75
|
+
logging.basicConfig(level=numeric_level, format='[%(levelname)s] %(message)s')
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def main() -> None:
|
|
79
|
+
"""Main function for fine-tuning VisionTransformer with LoRA."""
|
|
80
|
+
args = parse_args()
|
|
81
|
+
setup_logging(args.log_level)
|
|
82
|
+
logger = logging.getLogger(__name__)
|
|
83
|
+
logger.info(f"Using device: {args.device}")
|
|
84
|
+
|
|
85
|
+
setup_cuda(seed=args.seed, deterministic=args.cuda_deterministic, benchmark=args.cuda_benchmark, max_split_size_mb=args.cuda_max_split_size_mb)
|
|
86
|
+
set_seed(args.seed)
|
|
87
|
+
|
|
88
|
+
# Data
|
|
89
|
+
try:
|
|
90
|
+
train_dataset = get_dataset(args.dataset, args.data_dir, train=True, img_size=args.img_size)
|
|
91
|
+
val_dataset = get_dataset(args.dataset, args.data_dir, train=False, img_size=args.img_size)
|
|
92
|
+
except Exception as e:
|
|
93
|
+
logger.error(f"Failed to load dataset: {e}")
|
|
94
|
+
return
|
|
95
|
+
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)
|
|
96
|
+
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)
|
|
97
|
+
|
|
98
|
+
# Model
|
|
99
|
+
try:
|
|
100
|
+
model = VisionTransformer(
|
|
101
|
+
img_size=args.img_size,
|
|
102
|
+
patch_size=args.patch_size,
|
|
103
|
+
in_chans=3,
|
|
104
|
+
num_classes=args.num_classes,
|
|
105
|
+
embed_dim=args.embed_dim,
|
|
106
|
+
depth=args.depth,
|
|
107
|
+
num_heads=args.num_heads,
|
|
108
|
+
mlp_ratio=args.mlp_ratio,
|
|
109
|
+
lora_config={
|
|
110
|
+
'r': args.lora_r,
|
|
111
|
+
'alpha': args.lora_alpha,
|
|
112
|
+
'dropout': args.lora_dropout,
|
|
113
|
+
},
|
|
114
|
+
).to(args.device)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.error(f"Failed to initialize model: {e}")
|
|
117
|
+
return
|
|
118
|
+
|
|
119
|
+
# Optimizer
|
|
120
|
+
lora_params = [p for n, p in model.named_parameters() if 'lora' in n and p.requires_grad]
|
|
121
|
+
if args.optimizer == 'adam':
|
|
122
|
+
optimizer = torch.optim.Adam(lora_params, lr=args.lr, weight_decay=args.weight_decay)
|
|
123
|
+
elif args.optimizer == 'adamw':
|
|
124
|
+
optimizer = torch.optim.AdamW(lora_params, lr=args.lr, weight_decay=args.weight_decay)
|
|
125
|
+
else:
|
|
126
|
+
optimizer = torch.optim.SGD(lora_params, lr=args.lr, momentum=0.9, weight_decay=args.weight_decay)
|
|
127
|
+
|
|
128
|
+
# Scheduler
|
|
129
|
+
if args.scheduler == 'cosine':
|
|
130
|
+
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
|
|
131
|
+
else:
|
|
132
|
+
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)
|
|
133
|
+
|
|
134
|
+
criterion = torch.nn.CrossEntropyLoss()
|
|
135
|
+
scaler = torch.cuda.amp.GradScaler() if args.device == 'cuda' else None
|
|
136
|
+
|
|
137
|
+
# Callbacks
|
|
138
|
+
callbacks = []
|
|
139
|
+
if args.early_stopping:
|
|
140
|
+
callbacks.append(EarlyStopping(patience=args.patience))
|
|
141
|
+
|
|
142
|
+
# Trainer
|
|
143
|
+
trainer = Trainer(
|
|
144
|
+
model=model,
|
|
145
|
+
optimizer=optimizer,
|
|
146
|
+
criterion=criterion,
|
|
147
|
+
scheduler=scheduler,
|
|
148
|
+
scaler=scaler,
|
|
149
|
+
callbacks=callbacks,
|
|
150
|
+
device=args.device,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Optionally resume
|
|
154
|
+
start_epoch = 0
|
|
155
|
+
best_acc = 0.0
|
|
156
|
+
if args.resume and os.path.isfile(args.resume):
|
|
157
|
+
checkpoint = torch.load(args.resume, map_location=args.device)
|
|
158
|
+
model.load_state_dict(checkpoint['model'])
|
|
159
|
+
optimizer.load_state_dict(checkpoint['optimizer'])
|
|
160
|
+
start_epoch = checkpoint['epoch'] + 1
|
|
161
|
+
best_acc = checkpoint.get('best_acc', 0.0)
|
|
162
|
+
logger.info(f"Resumed from {args.resume} at epoch {start_epoch}")
|
|
163
|
+
|
|
164
|
+
if args.eval_only:
|
|
165
|
+
val_loss, val_acc = trainer.evaluate(val_loader)
|
|
166
|
+
logger.info(f"Eval Loss: {val_loss:.4f}, Eval Acc: {val_acc:.4f}")
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
# Training
|
|
170
|
+
best_acc = trainer.fit(
|
|
171
|
+
train_loader,
|
|
172
|
+
val_loader,
|
|
173
|
+
epochs=args.epochs,
|
|
174
|
+
start_epoch=start_epoch,
|
|
175
|
+
best_acc=best_acc,
|
|
176
|
+
checkpoint_path=os.path.join(args.output_dir, args.save_name),
|
|
177
|
+
)
|
|
178
|
+
logger.info(f"Best validation accuracy: {best_acc:.4f}")
|
|
179
|
+
|
|
180
|
+
if __name__ == '__main__':
|
|
181
|
+
main()
|
langvision/cli/train.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import torch
|
|
3
|
+
from langvision.models.vision_transformer import VisionTransformer
|
|
4
|
+
from langvision.utils.config import default_config
|
|
5
|
+
from langvision.utils.data import get_preprocessing
|
|
6
|
+
from torchvision import datasets
|
|
7
|
+
from torch.utils.data import DataLoader
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def parse_args():
|
|
12
|
+
parser = argparse.ArgumentParser(description='Train or evaluate VisionTransformer with LoRA')
|
|
13
|
+
parser.add_argument('--dataset', type=str, default='cifar10', choices=['cifar10', 'cifar100'], help='Dataset to use')
|
|
14
|
+
parser.add_argument('--data_dir', type=str, default='./data', help='Dataset directory')
|
|
15
|
+
parser.add_argument('--epochs', type=int, default=10)
|
|
16
|
+
parser.add_argument('--batch_size', type=int, default=64)
|
|
17
|
+
parser.add_argument('--learning_rate', type=float, default=1e-3)
|
|
18
|
+
parser.add_argument('--lora_rank', type=int, default=4)
|
|
19
|
+
parser.add_argument('--lora_alpha', type=float, default=1.0)
|
|
20
|
+
parser.add_argument('--lora_dropout', type=float, default=0.1)
|
|
21
|
+
parser.add_argument('--output_dir', type=str, default='./checkpoints')
|
|
22
|
+
parser.add_argument('--eval', action='store_true', help='Run evaluation only')
|
|
23
|
+
parser.add_argument('--export', action='store_true', help='Export model for inference')
|
|
24
|
+
parser.add_argument('--device', type=str, default='cuda' if torch.cuda.is_available() else 'cpu')
|
|
25
|
+
return parser.parse_args()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_dataloaders(args):
|
|
29
|
+
transform = get_preprocessing(default_config['img_size'])
|
|
30
|
+
if args.dataset == 'cifar10':
|
|
31
|
+
train_dataset = datasets.CIFAR10(root=args.data_dir, train=True, download=True, transform=transform)
|
|
32
|
+
val_dataset = datasets.CIFAR10(root=args.data_dir, train=False, download=True, transform=transform)
|
|
33
|
+
num_classes = 10
|
|
34
|
+
elif args.dataset == 'cifar100':
|
|
35
|
+
train_dataset = datasets.CIFAR100(root=args.data_dir, train=True, download=True, transform=transform)
|
|
36
|
+
val_dataset = datasets.CIFAR100(root=args.data_dir, train=False, download=True, transform=transform)
|
|
37
|
+
num_classes = 100
|
|
38
|
+
else:
|
|
39
|
+
raise ValueError(f"Unsupported dataset: {args.dataset}")
|
|
40
|
+
train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=2)
|
|
41
|
+
val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=2)
|
|
42
|
+
return train_loader, val_loader, num_classes
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def main():
|
|
46
|
+
args = parse_args()
|
|
47
|
+
train_loader, val_loader, num_classes = get_dataloaders(args)
|
|
48
|
+
model = VisionTransformer(
|
|
49
|
+
img_size=default_config['img_size'],
|
|
50
|
+
patch_size=default_config['patch_size'],
|
|
51
|
+
in_chans=default_config['in_chans'],
|
|
52
|
+
num_classes=num_classes,
|
|
53
|
+
embed_dim=default_config['embed_dim'],
|
|
54
|
+
depth=default_config['depth'],
|
|
55
|
+
num_heads=default_config['num_heads'],
|
|
56
|
+
mlp_ratio=default_config['mlp_ratio'],
|
|
57
|
+
lora_config={
|
|
58
|
+
'r': args.lora_rank,
|
|
59
|
+
'alpha': args.lora_alpha,
|
|
60
|
+
'dropout': args.lora_dropout,
|
|
61
|
+
},
|
|
62
|
+
).to(args.device)
|
|
63
|
+
|
|
64
|
+
if args.eval:
|
|
65
|
+
print('Evaluation mode (stub)')
|
|
66
|
+
# TODO: Implement evaluation logic
|
|
67
|
+
return
|
|
68
|
+
if args.export:
|
|
69
|
+
print('Export mode (stub)')
|
|
70
|
+
# TODO: Implement model export logic
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
optimizer = torch.optim.Adam(
|
|
74
|
+
[p for n, p in model.named_parameters() if 'lora' in n and p.requires_grad],
|
|
75
|
+
lr=args.learning_rate
|
|
76
|
+
)
|
|
77
|
+
criterion = torch.nn.CrossEntropyLoss()
|
|
78
|
+
|
|
79
|
+
for epoch in range(args.epochs):
|
|
80
|
+
model.train()
|
|
81
|
+
total_loss, correct, total = 0, 0, 0
|
|
82
|
+
for imgs, labels in train_loader:
|
|
83
|
+
imgs, labels = imgs.to(args.device), labels.to(args.device)
|
|
84
|
+
optimizer.zero_grad()
|
|
85
|
+
outputs = model(imgs)
|
|
86
|
+
loss = criterion(outputs, labels)
|
|
87
|
+
loss.backward()
|
|
88
|
+
optimizer.step()
|
|
89
|
+
total_loss += loss.item() * imgs.size(0)
|
|
90
|
+
_, preds = outputs.max(1)
|
|
91
|
+
correct += preds.eq(labels).sum().item()
|
|
92
|
+
total += imgs.size(0)
|
|
93
|
+
print(f"Epoch {epoch+1}/{args.epochs} | Train Loss: {total_loss/total:.4f}, Acc: {correct/total:.4f}")
|
|
94
|
+
# TODO: Add validation and checkpoint saving
|
|
95
|
+
|
|
96
|
+
os.makedirs(args.output_dir, exist_ok=True)
|
|
97
|
+
torch.save(model.state_dict(), os.path.join(args.output_dir, 'vit_lora_final.pth'))
|
|
98
|
+
print(f"Model saved to {os.path.join(args.output_dir, 'vit_lora_final.pth')}")
|
|
99
|
+
|
|
100
|
+
if __name__ == '__main__':
|
|
101
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
from langchain.models.lora import LoRALinear
|
|
4
|
+
|
|
5
|
+
class Attention(nn.Module):
|
|
6
|
+
def __init__(self, dim, num_heads=8, lora_config=None):
|
|
7
|
+
super().__init__()
|
|
8
|
+
self.num_heads = num_heads
|
|
9
|
+
self.head_dim = dim // num_heads
|
|
10
|
+
self.scale = self.head_dim ** -0.5
|
|
11
|
+
self.qkv = nn.Linear(dim, dim * 3)
|
|
12
|
+
self.lora_q = LoRALinear(self.head_dim, self.head_dim, **lora_config) if lora_config else None
|
|
13
|
+
self.lora_v = LoRALinear(self.head_dim, self.head_dim, **lora_config) if lora_config else None
|
|
14
|
+
self.proj = nn.Linear(dim, dim)
|
|
15
|
+
|
|
16
|
+
def forward(self, x):
|
|
17
|
+
B, N, C = x.shape
|
|
18
|
+
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4)
|
|
19
|
+
q, k, v = qkv[0], qkv[1], qkv[2]
|
|
20
|
+
if self.lora_q:
|
|
21
|
+
q = q + self.lora_q(q)
|
|
22
|
+
if self.lora_v:
|
|
23
|
+
v = v + self.lora_v(v)
|
|
24
|
+
attn = (q @ k.transpose(-2, -1)) * self.scale
|
|
25
|
+
attn = attn.softmax(dim=-1)
|
|
26
|
+
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
|
27
|
+
x = self.proj(x)
|
|
28
|
+
return x
|
|
29
|
+
|
|
30
|
+
class TransformerEncoderLayer(nn.Module):
|
|
31
|
+
def __init__(self, dim, num_heads, mlp_ratio=4.0, lora_config=None):
|
|
32
|
+
super().__init__()
|
|
33
|
+
self.norm1 = nn.LayerNorm(dim)
|
|
34
|
+
self.attn = Attention(dim, num_heads, lora_config)
|
|
35
|
+
self.norm2 = nn.LayerNorm(dim)
|
|
36
|
+
self.mlp = nn.Sequential(
|
|
37
|
+
nn.Linear(dim, int(dim * mlp_ratio)),
|
|
38
|
+
nn.GELU(),
|
|
39
|
+
nn.Linear(int(dim * mlp_ratio), dim),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def forward(self, x):
|
|
43
|
+
x = x + self.attn(self.norm1(x))
|
|
44
|
+
x = x + self.mlp(self.norm2(x))
|
|
45
|
+
return x
|
|
46
|
+
|
|
47
|
+
class TransformerEncoder(nn.Module):
|
|
48
|
+
def __init__(self, depth, dim, num_heads, mlp_ratio=4.0, lora_config=None):
|
|
49
|
+
super().__init__()
|
|
50
|
+
self.layers = nn.ModuleList([
|
|
51
|
+
TransformerEncoderLayer(dim, num_heads, mlp_ratio, lora_config)
|
|
52
|
+
for _ in range(depth)
|
|
53
|
+
])
|
|
54
|
+
|
|
55
|
+
def forward(self, x):
|
|
56
|
+
for layer in self.layers:
|
|
57
|
+
x = layer(x)
|
|
58
|
+
return x
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
|
|
4
|
+
class PatchEmbedding(nn.Module):
|
|
5
|
+
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
|
|
6
|
+
super().__init__()
|
|
7
|
+
self.img_size = img_size
|
|
8
|
+
self.patch_size = patch_size
|
|
9
|
+
self.num_patches = (img_size // patch_size) ** 2
|
|
10
|
+
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
|
|
11
|
+
|
|
12
|
+
def forward(self, x):
|
|
13
|
+
x = self.proj(x) # (B, embed_dim, H/patch, W/patch)
|
|
14
|
+
x = x.flatten(2).transpose(1, 2) # (B, num_patches, embed_dim)
|
|
15
|
+
return x
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
langvision.config
|
|
3
|
+
|
|
4
|
+
This package contains configuration management utilities and classes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Example placeholder for a config class
|
|
8
|
+
def get_default_config():
|
|
9
|
+
"""Return a default configuration dictionary."""
|
|
10
|
+
return {
|
|
11
|
+
'seed': 42,
|
|
12
|
+
'device': 'cuda',
|
|
13
|
+
'log_level': 'INFO',
|
|
14
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from torchvision import datasets, transforms
|
|
2
|
+
|
|
3
|
+
def get_dataset(name, data_dir, train, img_size=224):
|
|
4
|
+
if name.lower() == 'cifar10':
|
|
5
|
+
return datasets.CIFAR10(
|
|
6
|
+
root=data_dir, train=train, download=True,
|
|
7
|
+
transform=transforms.Compose([
|
|
8
|
+
transforms.Resize((img_size, img_size)),
|
|
9
|
+
transforms.ToTensor(),
|
|
10
|
+
])
|
|
11
|
+
)
|
|
12
|
+
elif name.lower() == 'cifar100':
|
|
13
|
+
return datasets.CIFAR100(
|
|
14
|
+
root=data_dir, train=train, download=True,
|
|
15
|
+
transform=transforms.Compose([
|
|
16
|
+
transforms.Resize((img_size, img_size)),
|
|
17
|
+
transforms.ToTensor(),
|
|
18
|
+
])
|
|
19
|
+
)
|
|
20
|
+
else:
|
|
21
|
+
raise ValueError(f"Unknown dataset: {name}")
|
langvision/example.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
langvision.filesystem
|
|
3
|
+
|
|
4
|
+
This package contains filesystem utility functions and classes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
def list_files(path):
|
|
8
|
+
"""List files in a directory."""
|
|
9
|
+
import os
|
|
10
|
+
return os.listdir(path)
|
|
11
|
+
|
|
12
|
+
# Example placeholder for a filesystem utility
|
|
13
|
+
def ensure_dir(path):
|
|
14
|
+
import os
|
|
15
|
+
os.makedirs(path, exist_ok=True)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
langvision.memory
|
|
3
|
+
|
|
4
|
+
This package contains memory management utilities and classes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Example placeholder for a memory utility
|
|
8
|
+
class MemoryBuffer:
|
|
9
|
+
def __init__(self, size=100):
|
|
10
|
+
self.size = size
|
|
11
|
+
self.buffer = []
|
|
12
|
+
def add(self, item):
|
|
13
|
+
if len(self.buffer) >= self.size:
|
|
14
|
+
self.buffer.pop(0)
|
|
15
|
+
self.buffer.append(item)
|
|
16
|
+
def get_all(self):
|
|
17
|
+
return self.buffer
|
|
18
|
+
|
|
19
|
+
def init_memory():
|
|
20
|
+
"""Initialize memory resources."""
|
|
21
|
+
pass
|
langvision/model_zoo.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
|
|
4
|
+
class LoRALinear(nn.Module):
|
|
5
|
+
def __init__(self, in_features, out_features, r=4, alpha=1.0, dropout=0.0):
|
|
6
|
+
super().__init__()
|
|
7
|
+
self.r = r
|
|
8
|
+
self.alpha = alpha
|
|
9
|
+
self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
|
|
10
|
+
if r > 0:
|
|
11
|
+
self.lora_A = nn.Parameter(torch.randn(in_features, r) * 0.01)
|
|
12
|
+
self.lora_B = nn.Parameter(torch.randn(r, out_features) * 0.01)
|
|
13
|
+
else:
|
|
14
|
+
self.lora_A = None
|
|
15
|
+
self.lora_B = None
|
|
16
|
+
self.scale = alpha / r if r > 0 else 1.0
|
|
17
|
+
|
|
18
|
+
def forward(self, x):
|
|
19
|
+
if self.r > 0:
|
|
20
|
+
orig_shape = x.shape
|
|
21
|
+
# Flatten all but last dim
|
|
22
|
+
x_2d = x.reshape(-1, x.shape[-1])
|
|
23
|
+
# Apply LoRA: (N, in_features) @ (in_features, r) @ (r, out_features) = (N, out_features)
|
|
24
|
+
lora_out = self.dropout(x_2d) @ self.lora_A @ self.lora_B * self.scale
|
|
25
|
+
# Reshape back to original except last dim is out_features
|
|
26
|
+
out_shape = list(orig_shape[:-1]) + [self.lora_B.shape[1]]
|
|
27
|
+
lora_out = lora_out.view(*out_shape)
|
|
28
|
+
return lora_out
|
|
29
|
+
else:
|
|
30
|
+
return 0.0
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
from .lora import LoRALinear
|
|
4
|
+
from ..components.patch_embedding import PatchEmbedding
|
|
5
|
+
from ..components.attention import TransformerEncoder
|
|
6
|
+
from ..components.mlp import MLPHead
|
|
7
|
+
|
|
8
|
+
class VisionTransformer(nn.Module):
|
|
9
|
+
def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4.0, lora_config=None):
|
|
10
|
+
super().__init__()
|
|
11
|
+
self.patch_embed = PatchEmbedding(img_size, patch_size, in_chans, embed_dim)
|
|
12
|
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
|
|
13
|
+
self.pos_embed = nn.Parameter(torch.zeros(1, 1 + self.patch_embed.num_patches, embed_dim))
|
|
14
|
+
self.pos_drop = nn.Dropout(p=0.1)
|
|
15
|
+
self.encoder = TransformerEncoder(depth, embed_dim, num_heads, mlp_ratio, lora_config)
|
|
16
|
+
self.norm = nn.LayerNorm(embed_dim)
|
|
17
|
+
self.head = MLPHead(embed_dim, num_classes)
|
|
18
|
+
|
|
19
|
+
def forward(self, x):
|
|
20
|
+
B = x.shape[0]
|
|
21
|
+
x = self.patch_embed(x)
|
|
22
|
+
cls_tokens = self.cls_token.expand(B, -1, -1)
|
|
23
|
+
x = torch.cat((cls_tokens, x), dim=1)
|
|
24
|
+
x = x + self.pos_embed
|
|
25
|
+
x = self.pos_drop(x)
|
|
26
|
+
x = self.encoder(x)
|
|
27
|
+
x = self.norm(x)
|
|
28
|
+
return self.head(x[:, 0])
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
langvision.sync
|
|
3
|
+
|
|
4
|
+
This package contains synchronization utilities and classes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
"""Sync module for distributed synchronization and checkpoints."""
|
|
8
|
+
|
|
9
|
+
def sync_checkpoint():
|
|
10
|
+
"""Synchronize checkpoints across nodes."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
# Example placeholder for a sync utility
|
|
14
|
+
def sync_data(source, destination):
|
|
15
|
+
# Placeholder for data synchronization logic
|
|
16
|
+
pass
|
|
File without changes
|