PraisonAI 2.0.12__cp311-cp311-macosx_15_0_arm64.whl → 2.2.16__cp311-cp311-macosx_15_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of PraisonAI might be problematic. Click here for more details.
- praisonai/README.md +5 -0
- praisonai/agents_generator.py +83 -44
- praisonai/api/call.py +3 -3
- praisonai/auto.py +1 -1
- praisonai/cli.py +151 -16
- praisonai/deploy.py +1 -1
- praisonai/inbuilt_tools/__init__.py +1 -1
- praisonai/public/praison-ai-agents-architecture-dark.png +0 -0
- praisonai/public/praison-ai-agents-architecture.png +0 -0
- praisonai/setup/setup_conda_env.sh +55 -22
- praisonai/train.py +442 -156
- praisonai/train_vision.py +306 -0
- praisonai/ui/agents.py +822 -0
- praisonai/ui/callbacks.py +57 -0
- praisonai/ui/code.py +4 -2
- praisonai/ui/colab.py +474 -0
- praisonai/ui/colab_chainlit.py +81 -0
- praisonai/ui/config/chainlit.md +1 -1
- praisonai/ui/realtime.py +65 -10
- praisonai/ui/sql_alchemy.py +6 -5
- praisonai/ui/tools.md +133 -0
- praisonai/upload_vision.py +140 -0
- praisonai-2.2.16.dist-info/METADATA +103 -0
- {praisonai-2.0.12.dist-info → praisonai-2.2.16.dist-info}/RECORD +26 -29
- {praisonai-2.0.12.dist-info → praisonai-2.2.16.dist-info}/WHEEL +1 -1
- praisonai/ui/config/.chainlit/config.toml +0 -120
- praisonai/ui/config/.chainlit/translations/bn.json +0 -231
- praisonai/ui/config/.chainlit/translations/en-US.json +0 -229
- praisonai/ui/config/.chainlit/translations/gu.json +0 -231
- praisonai/ui/config/.chainlit/translations/he-IL.json +0 -231
- praisonai/ui/config/.chainlit/translations/hi.json +0 -231
- praisonai/ui/config/.chainlit/translations/kn.json +0 -231
- praisonai/ui/config/.chainlit/translations/ml.json +0 -231
- praisonai/ui/config/.chainlit/translations/mr.json +0 -231
- praisonai/ui/config/.chainlit/translations/ta.json +0 -231
- praisonai/ui/config/.chainlit/translations/te.json +0 -231
- praisonai/ui/config/.chainlit/translations/zh-CN.json +0 -229
- praisonai-2.0.12.dist-info/LICENSE +0 -20
- praisonai-2.0.12.dist-info/METADATA +0 -498
- {praisonai-2.0.12.dist-info → praisonai-2.2.16.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
This script finetunes a vision language model using Unsloth's fast training framework.
|
|
5
|
+
It supports vision tasks by converting raw image-caption samples into a conversation format,
|
|
6
|
+
adding vision-specific LoRA adapters, and training using TRL's SFTTrainer with UnslothVisionDataCollator.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import sys
|
|
11
|
+
import yaml
|
|
12
|
+
import torch
|
|
13
|
+
import shutil
|
|
14
|
+
import subprocess
|
|
15
|
+
import gc # For garbage collection
|
|
16
|
+
|
|
17
|
+
from datasets import load_dataset, concatenate_datasets, Dataset
|
|
18
|
+
from unsloth import FastVisionModel, is_bf16_supported
|
|
19
|
+
from unsloth.trainer import UnslothVisionDataCollator
|
|
20
|
+
from transformers import TrainingArguments
|
|
21
|
+
from trl import SFTTrainer
|
|
22
|
+
from tqdm import tqdm # Add progress bar
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TrainVisionModel:
|
|
26
|
+
def __init__(self, config_path="config.yaml"):
|
|
27
|
+
self.load_config(config_path)
|
|
28
|
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
29
|
+
self.model = None
|
|
30
|
+
self.hf_tokenizer = None # The underlying tokenizer
|
|
31
|
+
|
|
32
|
+
def load_config(self, path):
|
|
33
|
+
with open(path, "r") as file:
|
|
34
|
+
self.config = yaml.safe_load(file)
|
|
35
|
+
print("DEBUG: Loaded config:", self.config)
|
|
36
|
+
|
|
37
|
+
def print_system_info(self):
|
|
38
|
+
print("DEBUG: PyTorch version:", torch.__version__)
|
|
39
|
+
print("DEBUG: CUDA version:", torch.version.cuda)
|
|
40
|
+
if torch.cuda.is_available():
|
|
41
|
+
print("DEBUG: CUDA Device Capability:", torch.cuda.get_device_capability())
|
|
42
|
+
else:
|
|
43
|
+
print("DEBUG: CUDA is not available")
|
|
44
|
+
print("DEBUG: Python Version:", sys.version)
|
|
45
|
+
print("DEBUG: Python Path:", sys.executable)
|
|
46
|
+
|
|
47
|
+
def check_gpu(self):
|
|
48
|
+
gpu_stats = torch.cuda.get_device_properties(0)
|
|
49
|
+
print(f"DEBUG: GPU = {gpu_stats.name}. Max memory = {round(gpu_stats.total_memory/(1024**3),3)} GB.")
|
|
50
|
+
|
|
51
|
+
def check_ram(self):
|
|
52
|
+
from psutil import virtual_memory
|
|
53
|
+
ram_gb = virtual_memory().total / 1e9
|
|
54
|
+
print(f"DEBUG: Your runtime has {ram_gb:.1f} gigabytes of available RAM")
|
|
55
|
+
if ram_gb < 20:
|
|
56
|
+
print("DEBUG: Not using a high-RAM runtime")
|
|
57
|
+
else:
|
|
58
|
+
print("DEBUG: You are using a high-RAM runtime!")
|
|
59
|
+
|
|
60
|
+
def prepare_model(self):
|
|
61
|
+
print("DEBUG: Preparing vision model and tokenizer...")
|
|
62
|
+
self.model, original_tokenizer = FastVisionModel.from_pretrained(
|
|
63
|
+
model_name=self.config["model_name"],
|
|
64
|
+
load_in_4bit=self.config["load_in_4bit"],
|
|
65
|
+
use_gradient_checkpointing="unsloth"
|
|
66
|
+
)
|
|
67
|
+
print("DEBUG: Vision model and original tokenizer loaded.")
|
|
68
|
+
|
|
69
|
+
# Use the full processor that supports image inputs.
|
|
70
|
+
self.hf_tokenizer = original_tokenizer
|
|
71
|
+
|
|
72
|
+
# Set pad token if needed
|
|
73
|
+
if not hasattr(self.hf_tokenizer, 'pad_token') or self.hf_tokenizer.pad_token is None:
|
|
74
|
+
if hasattr(self.hf_tokenizer, 'eos_token'):
|
|
75
|
+
self.hf_tokenizer.pad_token = self.hf_tokenizer.eos_token
|
|
76
|
+
elif hasattr(self.hf_tokenizer, 'bos_token'):
|
|
77
|
+
self.hf_tokenizer.pad_token = self.hf_tokenizer.bos_token
|
|
78
|
+
|
|
79
|
+
# Set max length
|
|
80
|
+
if hasattr(self.hf_tokenizer, 'model_max_length'):
|
|
81
|
+
self.hf_tokenizer.model_max_length = self.config.get("max_seq_length", 2048)
|
|
82
|
+
|
|
83
|
+
# Add vision-specific LoRA adapters
|
|
84
|
+
self.model = FastVisionModel.get_peft_model(
|
|
85
|
+
self.model,
|
|
86
|
+
finetune_vision_layers=self.config.get("finetune_vision_layers", False),
|
|
87
|
+
finetune_language_layers=self.config.get("finetune_language_layers", True),
|
|
88
|
+
finetune_attention_modules=self.config.get("finetune_attention_modules", True),
|
|
89
|
+
finetune_mlp_modules=self.config.get("finetune_mlp_modules", True),
|
|
90
|
+
r=16,
|
|
91
|
+
lora_alpha=16,
|
|
92
|
+
lora_dropout=0,
|
|
93
|
+
bias="none",
|
|
94
|
+
random_state=3407,
|
|
95
|
+
use_rslora=False,
|
|
96
|
+
loftq_config=None
|
|
97
|
+
)
|
|
98
|
+
print("DEBUG: Vision LoRA adapters added.")
|
|
99
|
+
|
|
100
|
+
def convert_sample(self, sample):
|
|
101
|
+
|
|
102
|
+
instruction = self.config.get(
|
|
103
|
+
"vision_instruction",
|
|
104
|
+
"You are an expert radiographer. Describe accurately what you see in this image."
|
|
105
|
+
)
|
|
106
|
+
conversation = [
|
|
107
|
+
{
|
|
108
|
+
"role": "user",
|
|
109
|
+
"content": [
|
|
110
|
+
{"type": "text", "text": instruction},
|
|
111
|
+
{"type": "image", "image": sample["image"]}
|
|
112
|
+
]
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"role": "assistant",
|
|
116
|
+
"content": [
|
|
117
|
+
{"type": "text", "text": sample["caption"]}
|
|
118
|
+
]
|
|
119
|
+
},
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
return {"messages": conversation}
|
|
123
|
+
|
|
124
|
+
def load_datasets(self):
|
|
125
|
+
all_converted = []
|
|
126
|
+
for dataset_info in self.config["dataset"]:
|
|
127
|
+
print("\nDEBUG: Loading vision dataset:", dataset_info)
|
|
128
|
+
ds = load_dataset(
|
|
129
|
+
dataset_info["name"],
|
|
130
|
+
split=dataset_info.get("split_type", "train")
|
|
131
|
+
)
|
|
132
|
+
print("DEBUG: Dataset size:", len(ds))
|
|
133
|
+
print("DEBUG: First raw sample:", ds[0])
|
|
134
|
+
print("DEBUG: Dataset features:", ds.features)
|
|
135
|
+
|
|
136
|
+
print("\nDEBUG: Converting dataset to vision conversation format...")
|
|
137
|
+
converted_ds = [self.convert_sample(sample) for sample in ds]
|
|
138
|
+
|
|
139
|
+
# Debug first converted sample
|
|
140
|
+
print("\nDEBUG: First converted sample structure:")
|
|
141
|
+
first = converted_ds[0]
|
|
142
|
+
print("DEBUG: Message keys:", first["messages"][0]["content"][1].keys())
|
|
143
|
+
print("DEBUG: Image type in converted:", type(first["messages"][0]["content"][1].get("image")))
|
|
144
|
+
|
|
145
|
+
all_converted.extend(converted_ds)
|
|
146
|
+
|
|
147
|
+
print("\nDEBUG: Combined vision dataset has", len(all_converted), "examples.")
|
|
148
|
+
return all_converted
|
|
149
|
+
|
|
150
|
+
def train_model(self):
|
|
151
|
+
print("DEBUG: Starting vision training...")
|
|
152
|
+
raw_dataset = self.load_datasets()
|
|
153
|
+
|
|
154
|
+
# Build training arguments using TrainingArguments
|
|
155
|
+
training_args = TrainingArguments(
|
|
156
|
+
per_device_train_batch_size=self.config.get("per_device_train_batch_size", 1),
|
|
157
|
+
gradient_accumulation_steps=self.config.get("gradient_accumulation_steps", 4),
|
|
158
|
+
warmup_steps=self.config.get("warmup_steps", 5),
|
|
159
|
+
max_steps=self.config.get("max_steps", 30),
|
|
160
|
+
learning_rate=self.config.get("learning_rate", 2e-4),
|
|
161
|
+
fp16=self.config.get("fp16", not is_bf16_supported()),
|
|
162
|
+
bf16=self.config.get("bf16", is_bf16_supported()),
|
|
163
|
+
logging_steps=self.config.get("logging_steps", 1),
|
|
164
|
+
optim=self.config.get("optim", "adamw_8bit"),
|
|
165
|
+
weight_decay=self.config.get("weight_decay", 0.01),
|
|
166
|
+
lr_scheduler_type=self.config.get("lr_scheduler_type", "linear"),
|
|
167
|
+
seed=self.config.get("seed", 3407),
|
|
168
|
+
output_dir=self.config.get("output_dir", "outputs"),
|
|
169
|
+
report_to="none" if not os.getenv("PRAISON_WANDB") else "wandb",
|
|
170
|
+
remove_unused_columns=False,
|
|
171
|
+
# Add memory optimization settings
|
|
172
|
+
gradient_checkpointing=True,
|
|
173
|
+
max_grad_norm=1.0,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
trainer = SFTTrainer(
|
|
177
|
+
model=self.model,
|
|
178
|
+
tokenizer=self.hf_tokenizer,
|
|
179
|
+
data_collator=UnslothVisionDataCollator(self.model, self.hf_tokenizer),
|
|
180
|
+
train_dataset=raw_dataset,
|
|
181
|
+
args=training_args,
|
|
182
|
+
max_seq_length=self.config.get("max_seq_length", 2048),
|
|
183
|
+
dataset_text_field="", # Required for vision training
|
|
184
|
+
dataset_kwargs={"skip_prepare_dataset": True}, # Required for vision training
|
|
185
|
+
packing=False # Explicitly set packing to False
|
|
186
|
+
)
|
|
187
|
+
print("DEBUG: Beginning vision trainer.train() ...")
|
|
188
|
+
trainer.train()
|
|
189
|
+
print("DEBUG: Vision training complete. Saving model and tokenizer locally...")
|
|
190
|
+
self.model.save_pretrained("lora_vision_model")
|
|
191
|
+
self.hf_tokenizer.save_pretrained("lora_vision_model")
|
|
192
|
+
print("DEBUG: Saved vision model and tokenizer to 'lora_vision_model'.")
|
|
193
|
+
|
|
194
|
+
def vision_inference(self, instruction, image):
|
|
195
|
+
FastVisionModel.for_inference(self.model)
|
|
196
|
+
messages = [
|
|
197
|
+
{"role": "user", "content": [
|
|
198
|
+
{"type": "image"},
|
|
199
|
+
{"type": "text", "text": instruction}
|
|
200
|
+
]}
|
|
201
|
+
]
|
|
202
|
+
input_text = self.hf_tokenizer.apply_chat_template(messages, add_generation_prompt=True)
|
|
203
|
+
inputs = self.hf_tokenizer(
|
|
204
|
+
image,
|
|
205
|
+
input_text,
|
|
206
|
+
add_special_tokens=False,
|
|
207
|
+
return_tensors="pt"
|
|
208
|
+
).to("cuda")
|
|
209
|
+
outputs = self.model.generate(
|
|
210
|
+
**inputs,
|
|
211
|
+
max_new_tokens=128,
|
|
212
|
+
use_cache=True,
|
|
213
|
+
temperature=1.5,
|
|
214
|
+
min_p=0.1
|
|
215
|
+
)
|
|
216
|
+
print("DEBUG: Vision inference output:", self.hf_tokenizer.batch_decode(outputs))
|
|
217
|
+
|
|
218
|
+
def save_model_merged(self):
|
|
219
|
+
if os.path.exists(self.config["hf_model_name"]):
|
|
220
|
+
shutil.rmtree(self.config["hf_model_name"])
|
|
221
|
+
self.model.push_to_hub_merged(
|
|
222
|
+
self.config["hf_model_name"],
|
|
223
|
+
self.hf_tokenizer,
|
|
224
|
+
save_method="merged_16bit",
|
|
225
|
+
token=os.getenv("HF_TOKEN")
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def push_model_gguf(self):
|
|
229
|
+
self.model.push_to_hub_gguf(
|
|
230
|
+
self.config["hf_model_name"],
|
|
231
|
+
self.hf_tokenizer,
|
|
232
|
+
quantization_method=self.config.get("quantization_method", "q4_k_m"),
|
|
233
|
+
token=os.getenv("HF_TOKEN")
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
def save_model_gguf(self):
|
|
237
|
+
self.model.save_pretrained_gguf(
|
|
238
|
+
self.config["hf_model_name"],
|
|
239
|
+
self.hf_tokenizer,
|
|
240
|
+
quantization_method="q4_k_m"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def prepare_modelfile_content(self):
|
|
244
|
+
output_model = self.config["hf_model_name"]
|
|
245
|
+
|
|
246
|
+
template = '''{{- range $index, $_ := .Messages }}<|start_header_id|>{{ .Role }}<|end_header_id|>
|
|
247
|
+
|
|
248
|
+
{{ .Content }}
|
|
249
|
+
{{- if gt (len (slice $.Messages $index)) 1 }}<|eot_id|>
|
|
250
|
+
{{- else if ne .Role "assistant" }}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|
251
|
+
|
|
252
|
+
{{ end }}
|
|
253
|
+
{{- end }}'''
|
|
254
|
+
|
|
255
|
+
return f"""FROM {output_model}
|
|
256
|
+
TEMPLATE {template}
|
|
257
|
+
PARAMETER temperature 0.6
|
|
258
|
+
PARAMETER top_p 0.9
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
def create_and_push_ollama_model(self):
|
|
262
|
+
modelfile_content = self.prepare_modelfile_content()
|
|
263
|
+
with open("Modelfile", "w") as file:
|
|
264
|
+
file.write(modelfile_content)
|
|
265
|
+
subprocess.run(["ollama", "serve"])
|
|
266
|
+
subprocess.run(["ollama", "create", f"{self.config['ollama_model']}:{self.config['model_parameters']}", "-f", "Modelfile"])
|
|
267
|
+
subprocess.run(["ollama", "push", f"{self.config['ollama_model']}:{self.config['model_parameters']}"])
|
|
268
|
+
|
|
269
|
+
def run(self):
|
|
270
|
+
self.print_system_info()
|
|
271
|
+
self.check_gpu()
|
|
272
|
+
self.check_ram()
|
|
273
|
+
if self.config.get("train", "true").lower() == "true":
|
|
274
|
+
self.prepare_model()
|
|
275
|
+
self.train_model()
|
|
276
|
+
if self.config.get("huggingface_save", "true").lower() == "true":
|
|
277
|
+
self.save_model_merged()
|
|
278
|
+
if self.config.get("huggingface_save_gguf", "true").lower() == "true":
|
|
279
|
+
self.push_model_gguf()
|
|
280
|
+
if self.config.get("ollama_save", "true").lower() == "true":
|
|
281
|
+
self.create_and_push_ollama_model()
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def main():
|
|
285
|
+
import argparse
|
|
286
|
+
parser = argparse.ArgumentParser(description="PraisonAI Vision Training Script")
|
|
287
|
+
parser.add_argument("command", choices=["train", "inference"], help="Command to execute")
|
|
288
|
+
parser.add_argument("--config", default="config.yaml", help="Path to configuration file")
|
|
289
|
+
args = parser.parse_args()
|
|
290
|
+
|
|
291
|
+
trainer_obj = TrainVisionModel(config_path=args.config)
|
|
292
|
+
if args.command == "train":
|
|
293
|
+
trainer_obj.run()
|
|
294
|
+
elif args.command == "inference":
|
|
295
|
+
# For inference, we load a sample image from the first dataset
|
|
296
|
+
instr = trainer_obj.config.get("vision_instruction", "You are an expert radiographer. Describe accurately what you see in this image.")
|
|
297
|
+
ds_info = trainer_obj.config["dataset"][0]
|
|
298
|
+
ds = load_dataset(ds_info["name"], split=ds_info.get("split_type", "train"))
|
|
299
|
+
sample_image = ds[0]["image"]
|
|
300
|
+
if trainer_obj.model is None or trainer_obj.hf_tokenizer is None:
|
|
301
|
+
trainer_obj.prepare_model()
|
|
302
|
+
trainer_obj.vision_inference(instr, sample_image)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
if __name__ == "__main__":
|
|
306
|
+
main()
|