langtune 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langtune/__init__.py +315 -0
- langtune/acceleration.py +132 -0
- langtune/api.py +320 -0
- langtune/auth.py +434 -0
- langtune/callbacks.py +268 -0
- langtune/cli.py +687 -0
- langtune/client.py +721 -0
- langtune/config.py +356 -0
- langtune/data.py +526 -0
- langtune/distributed.py +154 -0
- langtune/facade.py +174 -0
- langtune/finetune.py +491 -0
- langtune/generation.py +95 -0
- langtune/logging_utils.py +182 -0
- langtune/metrics.py +345 -0
- langtune/model/__init__.py +20 -0
- langtune/model/hub.py +109 -0
- langtune/model/loader.py +84 -0
- langtune/model/safetensors.py +104 -0
- langtune/model/weights.py +100 -0
- langtune/models.py +19 -0
- langtune/nn/fast_transformer.py +399 -0
- langtune/nn/layers.py +178 -0
- langtune/nn/transformer.py +254 -0
- langtune/optimizations.py +870 -0
- langtune/py.typed +2 -0
- langtune/schedulers.py +234 -0
- langtune/tokenizers.py +275 -0
- langtune/trainer.py +889 -0
- langtune/training/neftune.py +80 -0
- langtune/utils.py +337 -0
- langtune-0.1.19.dist-info/METADATA +257 -0
- langtune-0.1.19.dist-info/RECORD +37 -0
- langtune-0.1.19.dist-info/WHEEL +5 -0
- langtune-0.1.19.dist-info/entry_points.txt +2 -0
- langtune-0.1.19.dist-info/licenses/LICENSE +21 -0
- langtune-0.1.19.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NEFTune: Noisy Embeddings for Instruction Fine-tuning.
|
|
3
|
+
Adds uniform noise to the embedding inputs during training to improve generalization.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import math
|
|
7
|
+
import torch
|
|
8
|
+
from torch import nn
|
|
9
|
+
from typing import Optional, List, Dict, Any, Union
|
|
10
|
+
from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl
|
|
11
|
+
|
|
12
|
+
class NEFTuneCallback(TrainerCallback):
|
|
13
|
+
"""
|
|
14
|
+
Callback for adding uniform noise to embeddings during training (NEFTune).
|
|
15
|
+
Reference: https://arxiv.org/abs/2310.05914
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, noise_alpha: float = 5.0):
|
|
19
|
+
self.noise_alpha = noise_alpha
|
|
20
|
+
self.hooks = []
|
|
21
|
+
|
|
22
|
+
def on_train_begin(self, args, state, control, model=None, **kwargs):
|
|
23
|
+
"""
|
|
24
|
+
Attach forward hooks to the embedding layer(s) of the model.
|
|
25
|
+
"""
|
|
26
|
+
print(f"🔊 Enabling NEFTune with alpha={self.noise_alpha}")
|
|
27
|
+
|
|
28
|
+
# Identify embedding layers
|
|
29
|
+
# Common HF models: model.embed_tokens, transformer.wte, etc.
|
|
30
|
+
forward_hook = self._get_neftune_hook()
|
|
31
|
+
|
|
32
|
+
if hasattr(model, "get_input_embeddings"):
|
|
33
|
+
embeddings = model.get_input_embeddings()
|
|
34
|
+
if embeddings:
|
|
35
|
+
self.hooks.append(embeddings.register_forward_hook(forward_hook))
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
# Fallback recursive search if get_input_embeddings is not standard
|
|
39
|
+
for name, module in model.named_modules():
|
|
40
|
+
if isinstance(module, nn.Embedding):
|
|
41
|
+
self.hooks.append(module.register_forward_hook(forward_hook))
|
|
42
|
+
|
|
43
|
+
def on_train_end(self, args, state, control, **kwargs):
|
|
44
|
+
"""Remove hooks after training."""
|
|
45
|
+
for hook in self.hooks:
|
|
46
|
+
hook.remove()
|
|
47
|
+
self.hooks = []
|
|
48
|
+
print("🔊 Disabled NEFTune hooks")
|
|
49
|
+
|
|
50
|
+
def _get_neftune_hook(self):
|
|
51
|
+
"""
|
|
52
|
+
Returns a forward hook that adds noise.
|
|
53
|
+
"""
|
|
54
|
+
def hook(module, args, output):
|
|
55
|
+
# args[0] is usually input_ids, output is embeddings
|
|
56
|
+
if module.training:
|
|
57
|
+
# noise ~ Uniform(-1, 1) * alpha / sqrt(sequence_length * hidden_dim)
|
|
58
|
+
dims = torch.tensor(output.size(1) * output.size(2))
|
|
59
|
+
mag_norm = self.noise_alpha / torch.sqrt(dims)
|
|
60
|
+
noise = torch.zeros_like(output).uniform_(-mag_norm, mag_norm)
|
|
61
|
+
return output + noise
|
|
62
|
+
return output
|
|
63
|
+
|
|
64
|
+
return hook
|
|
65
|
+
|
|
66
|
+
def activate_neftune(model: nn.Module, noise_alpha: float = 5.0):
|
|
67
|
+
"""
|
|
68
|
+
Directly activate NEFTune on a model without using a callback (manual loop).
|
|
69
|
+
"""
|
|
70
|
+
embeddings = model.get_input_embeddings()
|
|
71
|
+
|
|
72
|
+
def neftune_forward(module, input, output):
|
|
73
|
+
if module.training:
|
|
74
|
+
dims = torch.tensor(output.size(1) * output.size(2))
|
|
75
|
+
mag_norm = noise_alpha / torch.sqrt(dims)
|
|
76
|
+
output = output + torch.zeros_like(output).uniform_(-mag_norm, mag_norm)
|
|
77
|
+
return output
|
|
78
|
+
|
|
79
|
+
embeddings.register_forward_hook(neftune_forward)
|
|
80
|
+
print(f"🔊 NEFTune activated manually (alpha={noise_alpha})")
|
langtune/utils.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""
|
|
2
|
+
utils.py: Utility functions for Langtune
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
import numpy as np
|
|
7
|
+
import random
|
|
8
|
+
import logging
|
|
9
|
+
from typing import List, Dict, Any, Optional, Union
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
def set_seed(seed: int = 42):
|
|
17
|
+
"""Set random seeds for reproducibility."""
|
|
18
|
+
random.seed(seed)
|
|
19
|
+
np.random.seed(seed)
|
|
20
|
+
torch.manual_seed(seed)
|
|
21
|
+
if torch.cuda.is_available():
|
|
22
|
+
torch.cuda.manual_seed(seed)
|
|
23
|
+
torch.cuda.manual_seed_all(seed)
|
|
24
|
+
logger.info(f"Random seed set to {seed}")
|
|
25
|
+
|
|
26
|
+
def get_device(device: str = "auto") -> torch.device:
|
|
27
|
+
"""Get the appropriate device for computation."""
|
|
28
|
+
if device == "auto":
|
|
29
|
+
if torch.cuda.is_available():
|
|
30
|
+
device = "cuda"
|
|
31
|
+
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
|
32
|
+
device = "mps"
|
|
33
|
+
else:
|
|
34
|
+
device = "cpu"
|
|
35
|
+
|
|
36
|
+
return torch.device(device)
|
|
37
|
+
|
|
38
|
+
def count_parameters(model: torch.nn.Module) -> int:
|
|
39
|
+
"""Count the number of trainable parameters in a model."""
|
|
40
|
+
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
41
|
+
|
|
42
|
+
def count_lora_parameters(model: torch.nn.Module) -> int:
|
|
43
|
+
"""Count the number of LoRA parameters in a model."""
|
|
44
|
+
lora_params = 0
|
|
45
|
+
for name, param in model.named_parameters():
|
|
46
|
+
if 'lora' in name.lower() and param.requires_grad:
|
|
47
|
+
lora_params += param.numel()
|
|
48
|
+
return lora_params
|
|
49
|
+
|
|
50
|
+
def save_model_info(model: torch.nn.Module, save_path: str):
|
|
51
|
+
"""Save model information to a JSON file."""
|
|
52
|
+
info = {
|
|
53
|
+
"total_parameters": count_parameters(model),
|
|
54
|
+
"lora_parameters": count_lora_parameters(model),
|
|
55
|
+
"model_class": model.__class__.__name__,
|
|
56
|
+
"model_config": getattr(model, 'config', None)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
with open(save_path, 'w') as f:
|
|
60
|
+
json.dump(info, f, indent=2)
|
|
61
|
+
|
|
62
|
+
logger.info(f"Model info saved to {save_path}")
|
|
63
|
+
|
|
64
|
+
def load_model_info(load_path: str) -> Dict[str, Any]:
|
|
65
|
+
"""Load model information from a JSON file."""
|
|
66
|
+
with open(load_path, 'r') as f:
|
|
67
|
+
info = json.load(f)
|
|
68
|
+
return info
|
|
69
|
+
|
|
70
|
+
def encode_text(text: str, tokenizer=None) -> List[int]:
|
|
71
|
+
"""
|
|
72
|
+
Encodes text into token IDs using the provided tokenizer.
|
|
73
|
+
If no tokenizer is given, uses character-level encoding as fallback.
|
|
74
|
+
"""
|
|
75
|
+
if tokenizer:
|
|
76
|
+
if hasattr(tokenizer, 'encode'):
|
|
77
|
+
return tokenizer.encode(text)
|
|
78
|
+
elif callable(tokenizer):
|
|
79
|
+
return tokenizer(text)
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError("Invalid tokenizer provided")
|
|
82
|
+
|
|
83
|
+
# Fallback to character-level encoding
|
|
84
|
+
return [ord(c) for c in text]
|
|
85
|
+
|
|
86
|
+
def decode_tokens(token_ids: List[int], tokenizer=None) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Decodes a list of token IDs back into a string.
|
|
89
|
+
"""
|
|
90
|
+
if tokenizer:
|
|
91
|
+
if hasattr(tokenizer, 'decode'):
|
|
92
|
+
return tokenizer.decode(token_ids)
|
|
93
|
+
elif callable(tokenizer):
|
|
94
|
+
return tokenizer(token_ids)
|
|
95
|
+
else:
|
|
96
|
+
raise ValueError("Invalid tokenizer provided")
|
|
97
|
+
|
|
98
|
+
# Fallback to character-level decoding
|
|
99
|
+
return ''.join([chr(i) for i in token_ids if i > 0])
|
|
100
|
+
|
|
101
|
+
class SimpleTokenizer:
|
|
102
|
+
"""
|
|
103
|
+
A simple character-level tokenizer for demonstration purposes.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(self, vocab_size: int = 32000):
|
|
107
|
+
self.vocab_size = vocab_size
|
|
108
|
+
self.pad_token_id = 0
|
|
109
|
+
self.unk_token_id = 1
|
|
110
|
+
self.bos_token_id = 2
|
|
111
|
+
self.eos_token_id = 3
|
|
112
|
+
|
|
113
|
+
# Create a simple vocabulary
|
|
114
|
+
self.vocab = {
|
|
115
|
+
"<pad>": self.pad_token_id,
|
|
116
|
+
"<unk>": self.unk_token_id,
|
|
117
|
+
"<bos>": self.bos_token_id,
|
|
118
|
+
"<eos>": self.eos_token_id
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
# Add common characters
|
|
122
|
+
for i, char in enumerate("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 .,!?;:'\"()[]{}"):
|
|
123
|
+
if len(self.vocab) < vocab_size:
|
|
124
|
+
self.vocab[char] = len(self.vocab)
|
|
125
|
+
|
|
126
|
+
# Create reverse vocabulary
|
|
127
|
+
self.id_to_token = {v: k for k, v in self.vocab.items()}
|
|
128
|
+
|
|
129
|
+
def encode(self, text: str, add_special_tokens: bool = True) -> List[int]:
|
|
130
|
+
"""Encode text to token IDs."""
|
|
131
|
+
tokens = []
|
|
132
|
+
|
|
133
|
+
if add_special_tokens:
|
|
134
|
+
tokens.append(self.bos_token_id)
|
|
135
|
+
|
|
136
|
+
for char in text:
|
|
137
|
+
if char in self.vocab:
|
|
138
|
+
tokens.append(self.vocab[char])
|
|
139
|
+
else:
|
|
140
|
+
tokens.append(self.unk_token_id)
|
|
141
|
+
|
|
142
|
+
if add_special_tokens:
|
|
143
|
+
tokens.append(self.eos_token_id)
|
|
144
|
+
|
|
145
|
+
return tokens
|
|
146
|
+
|
|
147
|
+
def decode(self, token_ids: List[int], skip_special_tokens: bool = True) -> str:
|
|
148
|
+
"""Decode token IDs to text."""
|
|
149
|
+
tokens = []
|
|
150
|
+
|
|
151
|
+
for token_id in token_ids:
|
|
152
|
+
if token_id in self.id_to_token:
|
|
153
|
+
token = self.id_to_token[token_id]
|
|
154
|
+
if skip_special_tokens and token.startswith("<"):
|
|
155
|
+
continue
|
|
156
|
+
tokens.append(token)
|
|
157
|
+
|
|
158
|
+
return "".join(tokens)
|
|
159
|
+
|
|
160
|
+
def __call__(self, text: str, **kwargs) -> Dict[str, List[int]]:
|
|
161
|
+
"""Callable interface for compatibility."""
|
|
162
|
+
token_ids = self.encode(text, **kwargs)
|
|
163
|
+
return {"input_ids": token_ids}
|
|
164
|
+
|
|
165
|
+
def create_attention_mask(input_ids: torch.Tensor, pad_token_id: int = 0) -> torch.Tensor:
|
|
166
|
+
"""Create attention mask from input IDs."""
|
|
167
|
+
return (input_ids != pad_token_id).long()
|
|
168
|
+
|
|
169
|
+
def pad_sequences(sequences: List[torch.Tensor], pad_token_id: int = 0, max_length: Optional[int] = None) -> torch.Tensor:
|
|
170
|
+
"""Pad sequences to the same length."""
|
|
171
|
+
if max_length is None:
|
|
172
|
+
max_length = max(seq.size(0) for seq in sequences)
|
|
173
|
+
|
|
174
|
+
padded_sequences = []
|
|
175
|
+
for seq in sequences:
|
|
176
|
+
if seq.size(0) < max_length:
|
|
177
|
+
padding = torch.full((max_length - seq.size(0),), pad_token_id, dtype=seq.dtype)
|
|
178
|
+
padded_seq = torch.cat([seq, padding])
|
|
179
|
+
else:
|
|
180
|
+
padded_seq = seq[:max_length]
|
|
181
|
+
padded_sequences.append(padded_seq)
|
|
182
|
+
|
|
183
|
+
return torch.stack(padded_sequences)
|
|
184
|
+
|
|
185
|
+
def truncate_sequences(sequences: List[torch.Tensor], max_length: int) -> List[torch.Tensor]:
|
|
186
|
+
"""Truncate sequences to maximum length."""
|
|
187
|
+
return [seq[:max_length] for seq in sequences]
|
|
188
|
+
|
|
189
|
+
def compute_perplexity(loss: float) -> float:
|
|
190
|
+
"""Compute perplexity from cross-entropy loss."""
|
|
191
|
+
return np.exp(loss)
|
|
192
|
+
|
|
193
|
+
def compute_bleu_score(predictions: List[str], references: List[str]) -> float:
|
|
194
|
+
"""Compute BLEU score (simplified implementation)."""
|
|
195
|
+
# This is a very simplified BLEU implementation
|
|
196
|
+
# In practice, you'd use a proper BLEU implementation like nltk.translate.bleu_score
|
|
197
|
+
|
|
198
|
+
def get_ngrams(text: str, n: int) -> set:
|
|
199
|
+
words = text.split()
|
|
200
|
+
return set(tuple(words[i:i+n]) for i in range(len(words)-n+1))
|
|
201
|
+
|
|
202
|
+
total_score = 0.0
|
|
203
|
+
for pred, ref in zip(predictions, references):
|
|
204
|
+
pred_ngrams = get_ngrams(pred, 1) # Using unigrams for simplicity
|
|
205
|
+
ref_ngrams = get_ngrams(ref, 1)
|
|
206
|
+
|
|
207
|
+
if len(pred_ngrams) == 0:
|
|
208
|
+
score = 0.0
|
|
209
|
+
else:
|
|
210
|
+
overlap = len(pred_ngrams.intersection(ref_ngrams))
|
|
211
|
+
score = overlap / len(pred_ngrams)
|
|
212
|
+
|
|
213
|
+
total_score += score
|
|
214
|
+
|
|
215
|
+
return total_score / len(predictions)
|
|
216
|
+
|
|
217
|
+
def format_time(seconds: float) -> str:
|
|
218
|
+
"""Format time in a human-readable format."""
|
|
219
|
+
if seconds < 60:
|
|
220
|
+
return f"{seconds:.1f}s"
|
|
221
|
+
elif seconds < 3600:
|
|
222
|
+
minutes = seconds / 60
|
|
223
|
+
return f"{minutes:.1f}m"
|
|
224
|
+
else:
|
|
225
|
+
hours = seconds / 3600
|
|
226
|
+
return f"{hours:.1f}h"
|
|
227
|
+
|
|
228
|
+
def format_size(size_bytes: int) -> str:
|
|
229
|
+
"""Format size in a human-readable format."""
|
|
230
|
+
if size_bytes < 1024:
|
|
231
|
+
return f"{size_bytes}B"
|
|
232
|
+
elif size_bytes < 1024**2:
|
|
233
|
+
return f"{size_bytes/1024:.1f}KB"
|
|
234
|
+
elif size_bytes < 1024**3:
|
|
235
|
+
return f"{size_bytes/(1024**2):.1f}MB"
|
|
236
|
+
else:
|
|
237
|
+
return f"{size_bytes/(1024**3):.1f}GB"
|
|
238
|
+
|
|
239
|
+
def create_directory_structure(base_dir: str, subdirs: List[str]):
|
|
240
|
+
"""Create a directory structure."""
|
|
241
|
+
base_path = Path(base_dir)
|
|
242
|
+
base_path.mkdir(parents=True, exist_ok=True)
|
|
243
|
+
|
|
244
|
+
for subdir in subdirs:
|
|
245
|
+
(base_path / subdir).mkdir(parents=True, exist_ok=True)
|
|
246
|
+
|
|
247
|
+
logger.info(f"Created directory structure: {base_dir}")
|
|
248
|
+
|
|
249
|
+
def save_json(data: Dict[str, Any], file_path: str):
|
|
250
|
+
"""Save data to a JSON file."""
|
|
251
|
+
with open(file_path, 'w') as f:
|
|
252
|
+
json.dump(data, f, indent=2)
|
|
253
|
+
logger.info(f"Data saved to {file_path}")
|
|
254
|
+
|
|
255
|
+
def load_json(file_path: str) -> Dict[str, Any]:
|
|
256
|
+
"""Load data from a JSON file."""
|
|
257
|
+
with open(file_path, 'r') as f:
|
|
258
|
+
data = json.load(f)
|
|
259
|
+
logger.info(f"Data loaded from {file_path}")
|
|
260
|
+
return data
|
|
261
|
+
|
|
262
|
+
def get_model_size(model: torch.nn.Module) -> Dict[str, Any]:
|
|
263
|
+
"""Get model size information."""
|
|
264
|
+
total_params = count_parameters(model)
|
|
265
|
+
lora_params = count_lora_parameters(model)
|
|
266
|
+
|
|
267
|
+
# Estimate memory usage (rough approximation)
|
|
268
|
+
param_size = sum(p.numel() * p.element_size() for p in model.parameters())
|
|
269
|
+
buffer_size = sum(b.numel() * b.element_size() for b in model.buffers())
|
|
270
|
+
model_size = param_size + buffer_size
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
"total_parameters": total_params,
|
|
274
|
+
"lora_parameters": lora_params,
|
|
275
|
+
"regular_parameters": total_params - lora_params,
|
|
276
|
+
"model_size_bytes": model_size,
|
|
277
|
+
"model_size_mb": model_size / (1024**2),
|
|
278
|
+
"lora_ratio": lora_params / total_params if total_params > 0 else 0
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
def print_model_summary(model: torch.nn.Module):
|
|
282
|
+
"""Print a summary of the model."""
|
|
283
|
+
size_info = get_model_size(model)
|
|
284
|
+
|
|
285
|
+
print("=" * 50)
|
|
286
|
+
print("MODEL SUMMARY")
|
|
287
|
+
print("=" * 50)
|
|
288
|
+
print(f"Total parameters: {size_info['total_parameters']:,}")
|
|
289
|
+
print(f"LoRA parameters: {size_info['lora_parameters']:,}")
|
|
290
|
+
print(f"Regular parameters: {size_info['regular_parameters']:,}")
|
|
291
|
+
print(f"LoRA ratio: {size_info['lora_ratio']:.2%}")
|
|
292
|
+
print(f"Model size: {format_size(size_info['model_size_bytes'])}")
|
|
293
|
+
print("=" * 50)
|
|
294
|
+
|
|
295
|
+
def warmup_lr_scheduler(optimizer, warmup_steps: int, total_steps: int, base_lr: float, max_lr: float):
|
|
296
|
+
"""Create a learning rate scheduler with warmup."""
|
|
297
|
+
def lr_lambda(current_step):
|
|
298
|
+
if current_step < warmup_steps:
|
|
299
|
+
return float(current_step) / float(max(1, warmup_steps))
|
|
300
|
+
return max(0.0, float(total_steps - current_step) / float(max(1, total_steps - warmup_steps)))
|
|
301
|
+
|
|
302
|
+
return torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
|
|
303
|
+
|
|
304
|
+
def gradient_accumulation_steps(batch_size: int, effective_batch_size: int) -> int:
|
|
305
|
+
"""Calculate gradient accumulation steps."""
|
|
306
|
+
if batch_size >= effective_batch_size:
|
|
307
|
+
return 1
|
|
308
|
+
|
|
309
|
+
steps = effective_batch_size // batch_size
|
|
310
|
+
if effective_batch_size % batch_size != 0:
|
|
311
|
+
steps += 1
|
|
312
|
+
|
|
313
|
+
return steps
|
|
314
|
+
|
|
315
|
+
def log_gpu_memory():
|
|
316
|
+
"""Log GPU memory usage if available."""
|
|
317
|
+
if torch.cuda.is_available():
|
|
318
|
+
allocated = torch.cuda.memory_allocated() / 1024**3 # GB
|
|
319
|
+
reserved = torch.cuda.memory_reserved() / 1024**3 # GB
|
|
320
|
+
logger.info(f"GPU Memory - Allocated: {allocated:.2f}GB, Reserved: {reserved:.2f}GB")
|
|
321
|
+
|
|
322
|
+
def cleanup_gpu_memory():
|
|
323
|
+
"""Clean up GPU memory."""
|
|
324
|
+
if torch.cuda.is_available():
|
|
325
|
+
torch.cuda.empty_cache()
|
|
326
|
+
logger.info("GPU memory cleaned up")
|
|
327
|
+
|
|
328
|
+
# Backward compatibility
|
|
329
|
+
def encode_text_legacy(text, tokenizer=None):
|
|
330
|
+
"""Legacy function for backward compatibility."""
|
|
331
|
+
logger.warning("encode_text_legacy is deprecated. Use encode_text instead.")
|
|
332
|
+
return encode_text(text, tokenizer)
|
|
333
|
+
|
|
334
|
+
def decode_tokens_legacy(token_ids):
|
|
335
|
+
"""Legacy function for backward compatibility."""
|
|
336
|
+
logger.warning("decode_tokens_legacy is deprecated. Use decode_tokens instead.")
|
|
337
|
+
return decode_tokens(token_ids)
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langtune
|
|
3
|
+
Version: 0.1.19
|
|
4
|
+
Summary: Efficient LoRA Fine-Tuning for Large Language Models - Train smarter, not harder.
|
|
5
|
+
Author-email: Pritesh Raj <priteshraj41@gmail.com>
|
|
6
|
+
Maintainer-email: Langtrain AI <contact@langtrain.ai>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2025 Pritesh Raj
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://github.com/langtrain-ai/langtune
|
|
30
|
+
Project-URL: Documentation, https://github.com/langtrain-ai/langtune/tree/main/docs
|
|
31
|
+
Project-URL: Repository, https://github.com/langtrain-ai/langtune
|
|
32
|
+
Project-URL: Changelog, https://github.com/langtrain-ai/langtune/blob/main/CHANGELOG.md
|
|
33
|
+
Project-URL: Bug Tracker, https://github.com/langtrain-ai/langtune/issues
|
|
34
|
+
Keywords: llm,lora,fine-tuning,machine-learning,deep-learning,transformers,nlp,language-model,pytorch,rlhf,dpo,ppo
|
|
35
|
+
Classifier: Development Status :: 4 - Beta
|
|
36
|
+
Classifier: Intended Audience :: Developers
|
|
37
|
+
Classifier: Intended Audience :: Science/Research
|
|
38
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
39
|
+
Classifier: Operating System :: OS Independent
|
|
40
|
+
Classifier: Programming Language :: Python :: 3
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
43
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
44
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
45
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
46
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
47
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
48
|
+
Classifier: Typing :: Typed
|
|
49
|
+
Requires-Python: >=3.8
|
|
50
|
+
Description-Content-Type: text/markdown
|
|
51
|
+
License-File: LICENSE
|
|
52
|
+
Requires-Dist: torch>=1.10
|
|
53
|
+
Requires-Dist: numpy
|
|
54
|
+
Requires-Dist: tqdm
|
|
55
|
+
Requires-Dist: pyyaml
|
|
56
|
+
Requires-Dist: scipy
|
|
57
|
+
Requires-Dist: wandb
|
|
58
|
+
Requires-Dist: rich>=13.0.0
|
|
59
|
+
Requires-Dist: rich>=13.0.0
|
|
60
|
+
Provides-Extra: dev
|
|
61
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
62
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
63
|
+
Requires-Dist: black; extra == "dev"
|
|
64
|
+
Requires-Dist: flake8; extra == "dev"
|
|
65
|
+
Requires-Dist: mypy; extra == "dev"
|
|
66
|
+
Requires-Dist: isort; extra == "dev"
|
|
67
|
+
Provides-Extra: all
|
|
68
|
+
Requires-Dist: transformers; extra == "all"
|
|
69
|
+
Requires-Dist: datasets; extra == "all"
|
|
70
|
+
Requires-Dist: accelerate; extra == "all"
|
|
71
|
+
Requires-Dist: bitsandbytes; extra == "all"
|
|
72
|
+
Dynamic: license-file
|
|
73
|
+
|
|
74
|
+
<div align="center">
|
|
75
|
+
|
|
76
|
+
<img src="https://raw.githubusercontent.com/langtrain-ai/langtune/main/static/langtune-white.png" alt="Langtune" width="400" />
|
|
77
|
+
|
|
78
|
+
<h3>The fastest way to fine-tune LLMs</h3>
|
|
79
|
+
|
|
80
|
+
<p>
|
|
81
|
+
<strong>Production-ready LoRA fine-tuning in minutes, not days.</strong><br>
|
|
82
|
+
Built for ML engineers who need results, not complexity.
|
|
83
|
+
</p>
|
|
84
|
+
|
|
85
|
+
<p>
|
|
86
|
+
<a href="https://www.producthunt.com/products/langtrain-2" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/featured.svg?post_id=1049974&theme=light" alt="Product Hunt" width="200" /></a>
|
|
87
|
+
</p>
|
|
88
|
+
|
|
89
|
+
<p>
|
|
90
|
+
<a href="https://pypi.org/project/langtune/"><img src="https://img.shields.io/pypi/v/langtune.svg?style=for-the-badge&logo=pypi&logoColor=white" alt="PyPI" /></a>
|
|
91
|
+
<a href="https://pepy.tech/project/langtune"><img src="https://img.shields.io/pepy/dt/langtune?style=for-the-badge&logo=python&logoColor=white&label=downloads" alt="Downloads" /></a>
|
|
92
|
+
<a href="https://github.com/langtrain-ai/langtune/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue?style=for-the-badge" alt="License" /></a>
|
|
93
|
+
</p>
|
|
94
|
+
|
|
95
|
+
<p>
|
|
96
|
+
<a href="#quick-start">Quick Start</a> •
|
|
97
|
+
<a href="#features">Features</a> •
|
|
98
|
+
<a href="#why-langtune">Why Langtune</a> •
|
|
99
|
+
<a href="https://langtrain.xyz/docs">Docs</a>
|
|
100
|
+
</p>
|
|
101
|
+
|
|
102
|
+
</div>
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## ⚡ Quick Start
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
pip install langtune
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Fine-tune your first model in **3 lines of code**:
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from langtune import LoRATrainer
|
|
116
|
+
|
|
117
|
+
trainer = LoRATrainer(model_name="meta-llama/Llama-2-7b-hf")
|
|
118
|
+
trainer.train_from_file("data.jsonl")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
That's it. Your fine-tuned model is ready.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## ✨ Features
|
|
126
|
+
|
|
127
|
+
<table>
|
|
128
|
+
<tr>
|
|
129
|
+
<td width="50%">
|
|
130
|
+
|
|
131
|
+
### 🚀 **Blazing Fast**
|
|
132
|
+
Train 7B models in under 30 minutes on a single GPU. Our optimized kernels squeeze every last FLOP.
|
|
133
|
+
|
|
134
|
+
### 🎯 **Zero Config Required**
|
|
135
|
+
Smart defaults that just work. No PhD required. Start training in seconds.
|
|
136
|
+
|
|
137
|
+
### 💾 **Memory Efficient**
|
|
138
|
+
4-bit quantization + gradient checkpointing = Train 70B models on consumer hardware.
|
|
139
|
+
|
|
140
|
+
</td>
|
|
141
|
+
<td width="50%">
|
|
142
|
+
|
|
143
|
+
### 🔧 **Production Ready**
|
|
144
|
+
Battle-tested at scale. Used by teams fine-tuning thousands of models daily.
|
|
145
|
+
|
|
146
|
+
### 🌐 **Any Model, Any Data**
|
|
147
|
+
Works with Llama, Mistral, Qwen, Phi, and more. JSONL, CSV, or HuggingFace datasets.
|
|
148
|
+
|
|
149
|
+
### ☁️ **Cloud Native**
|
|
150
|
+
One-click deployment to Langtrain Cloud. Or export to GGUF, ONNX, HuggingFace.
|
|
151
|
+
|
|
152
|
+
</td>
|
|
153
|
+
</tr>
|
|
154
|
+
</table>
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## 🎯 Why Langtune?
|
|
159
|
+
|
|
160
|
+
| | Langtune | Others |
|
|
161
|
+
|---|:---:|:---:|
|
|
162
|
+
| **Time to first training** | 30 seconds | 2+ hours |
|
|
163
|
+
| **Lines of code** | 3 | 100+ |
|
|
164
|
+
| **Memory usage** | 8GB | 24GB+ |
|
|
165
|
+
| **Learning curve** | Minutes | Days |
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## 📖 Full Example
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
from langtune import LoRATrainer
|
|
173
|
+
from langtune.config import TrainingConfig, LoRAConfig
|
|
174
|
+
|
|
175
|
+
# Configure your training
|
|
176
|
+
config = TrainingConfig(
|
|
177
|
+
num_epochs=3,
|
|
178
|
+
batch_size=4,
|
|
179
|
+
learning_rate=2e-4,
|
|
180
|
+
lora=LoRAConfig(rank=16, alpha=32)
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Initialize and train
|
|
184
|
+
trainer = LoRATrainer(
|
|
185
|
+
model_name="mistralai/Mistral-7B-v0.1",
|
|
186
|
+
output_dir="./my-model",
|
|
187
|
+
config=config
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Train on your data
|
|
191
|
+
trainer.train_from_file("training_data.jsonl")
|
|
192
|
+
|
|
193
|
+
# Push to Hub (optional)
|
|
194
|
+
trainer.push_to_hub("my-username/my-fine-tuned-model")
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## 🛠️ Advanced Usage
|
|
200
|
+
|
|
201
|
+
<details>
|
|
202
|
+
<summary><b>Custom Dataset Format</b></summary>
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
# JSONL format (recommended)
|
|
206
|
+
{"text": "Your training example here"}
|
|
207
|
+
{"text": "Another example"}
|
|
208
|
+
|
|
209
|
+
# Or instruction format
|
|
210
|
+
{"instruction": "Summarize this:", "input": "Long text...", "output": "Summary"}
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
</details>
|
|
214
|
+
|
|
215
|
+
<details>
|
|
216
|
+
<summary><b>Distributed Training</b></summary>
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
trainer = LoRATrainer(
|
|
220
|
+
model_name="meta-llama/Llama-2-70b-hf",
|
|
221
|
+
device_map="auto", # Automatic multi-GPU
|
|
222
|
+
)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
</details>
|
|
226
|
+
|
|
227
|
+
<details>
|
|
228
|
+
<summary><b>Export Formats</b></summary>
|
|
229
|
+
|
|
230
|
+
```python
|
|
231
|
+
# Export to different formats
|
|
232
|
+
trainer.export("gguf") # For llama.cpp
|
|
233
|
+
trainer.export("onnx") # For ONNX Runtime
|
|
234
|
+
trainer.export("hf") # HuggingFace format
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
</details>
|
|
238
|
+
|
|
239
|
+
---
|
|
240
|
+
|
|
241
|
+
## 🤝 Community
|
|
242
|
+
|
|
243
|
+
<p align="center">
|
|
244
|
+
<a href="https://discord.gg/langtrain">Discord</a> •
|
|
245
|
+
<a href="https://twitter.com/langtrainai">Twitter</a> •
|
|
246
|
+
<a href="https://langtrain.xyz">Website</a>
|
|
247
|
+
</p>
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
251
|
+
<div align="center">
|
|
252
|
+
|
|
253
|
+
**Built with ❤️ by [Langtrain AI](https://langtrain.xyz)**
|
|
254
|
+
|
|
255
|
+
*Making LLM fine-tuning accessible to everyone.*
|
|
256
|
+
|
|
257
|
+
</div>
|