scout-ai 0.2.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +155 -9
- data/README.md +296 -0
- data/Rakefile +3 -0
- data/VERSION +1 -1
- data/bin/scout-ai +2 -0
- data/doc/Agent.md +279 -0
- data/doc/Chat.md +258 -0
- data/doc/LLM.md +446 -0
- data/doc/Model.md +513 -0
- data/doc/RAG.md +129 -0
- data/lib/scout/llm/agent/chat.rb +74 -0
- data/lib/scout/llm/agent/delegate.rb +39 -0
- data/lib/scout/llm/agent/iterate.rb +44 -0
- data/lib/scout/llm/agent.rb +51 -30
- data/lib/scout/llm/ask.rb +63 -21
- data/lib/scout/llm/backends/anthropic.rb +147 -0
- data/lib/scout/llm/backends/bedrock.rb +129 -0
- data/lib/scout/llm/backends/huggingface.rb +6 -21
- data/lib/scout/llm/backends/ollama.rb +62 -35
- data/lib/scout/llm/backends/openai.rb +77 -33
- data/lib/scout/llm/backends/openwebui.rb +1 -1
- data/lib/scout/llm/backends/relay.rb +3 -2
- data/lib/scout/llm/backends/responses.rb +320 -0
- data/lib/scout/llm/chat.rb +703 -0
- data/lib/scout/llm/embed.rb +4 -4
- data/lib/scout/llm/mcp.rb +28 -0
- data/lib/scout/llm/parse.rb +71 -13
- data/lib/scout/llm/rag.rb +9 -0
- data/lib/scout/llm/tools/call.rb +66 -0
- data/lib/scout/llm/tools/knowledge_base.rb +158 -0
- data/lib/scout/llm/tools/mcp.rb +59 -0
- data/lib/scout/llm/tools/workflow.rb +69 -0
- data/lib/scout/llm/tools.rb +112 -76
- data/lib/scout/llm/utils.rb +17 -10
- data/lib/scout/model/base.rb +19 -0
- data/lib/scout/model/python/base.rb +25 -0
- data/lib/scout/model/python/huggingface/causal/next_token.rb +23 -0
- data/lib/scout/model/python/huggingface/causal.rb +29 -0
- data/lib/scout/model/python/huggingface/classification +0 -0
- data/lib/scout/model/python/huggingface/classification.rb +50 -0
- data/lib/scout/model/python/huggingface.rb +112 -0
- data/lib/scout/model/python/torch/dataloader.rb +57 -0
- data/lib/scout/model/python/torch/helpers.rb +84 -0
- data/lib/scout/model/python/torch/introspection.rb +34 -0
- data/lib/scout/model/python/torch/load_and_save.rb +47 -0
- data/lib/scout/model/python/torch.rb +94 -0
- data/lib/scout/model/util/run.rb +181 -0
- data/lib/scout/model/util/save.rb +81 -0
- data/lib/scout-ai.rb +4 -1
- data/python/scout_ai/__init__.py +35 -0
- data/python/scout_ai/huggingface/data.py +48 -0
- data/python/scout_ai/huggingface/eval.py +60 -0
- data/python/scout_ai/huggingface/model.py +29 -0
- data/python/scout_ai/huggingface/rlhf.py +83 -0
- data/python/scout_ai/huggingface/train/__init__.py +34 -0
- data/python/scout_ai/huggingface/train/next_token.py +315 -0
- data/python/scout_ai/util.py +32 -0
- data/scout-ai.gemspec +143 -0
- data/scout_commands/agent/ask +89 -14
- data/scout_commands/agent/kb +15 -0
- data/scout_commands/documenter +148 -0
- data/scout_commands/llm/ask +71 -12
- data/scout_commands/llm/process +4 -2
- data/scout_commands/llm/server +319 -0
- data/share/server/chat.html +138 -0
- data/share/server/chat.js +468 -0
- data/test/data/cat.jpg +0 -0
- data/test/scout/llm/agent/test_chat.rb +14 -0
- data/test/scout/llm/backends/test_anthropic.rb +134 -0
- data/test/scout/llm/backends/test_bedrock.rb +60 -0
- data/test/scout/llm/backends/test_huggingface.rb +3 -3
- data/test/scout/llm/backends/test_ollama.rb +48 -10
- data/test/scout/llm/backends/test_openai.rb +134 -10
- data/test/scout/llm/backends/test_responses.rb +239 -0
- data/test/scout/llm/test_agent.rb +0 -70
- data/test/scout/llm/test_ask.rb +4 -1
- data/test/scout/llm/test_chat.rb +256 -0
- data/test/scout/llm/test_mcp.rb +29 -0
- data/test/scout/llm/test_parse.rb +81 -2
- data/test/scout/llm/tools/test_call.rb +0 -0
- data/test/scout/llm/tools/test_knowledge_base.rb +22 -0
- data/test/scout/llm/tools/test_mcp.rb +11 -0
- data/test/scout/llm/tools/test_workflow.rb +39 -0
- data/test/scout/model/python/huggingface/causal/test_next_token.rb +59 -0
- data/test/scout/model/python/huggingface/test_causal.rb +33 -0
- data/test/scout/model/python/huggingface/test_classification.rb +30 -0
- data/test/scout/model/python/test_base.rb +44 -0
- data/test/scout/model/python/test_huggingface.rb +9 -0
- data/test/scout/model/python/test_torch.rb +71 -0
- data/test/scout/model/python/torch/test_helpers.rb +14 -0
- data/test/scout/model/test_base.rb +117 -0
- data/test/scout/model/util/test_save.rb +31 -0
- metadata +113 -7
- data/README.rdoc +0 -18
- data/questions/coach +0 -2
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from trl import PPOTrainer, AutoModelForCausalLMWithValueHead, PPOConfig
|
|
2
|
+
import torch
|
|
3
|
+
import scout_ai
|
|
4
|
+
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from datasets import Dataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PPOTrainerWithPrecomputedReward(PPOTrainer):
|
|
10
|
+
def get_rewards(self, **kwargs):
|
|
11
|
+
return torch.tensor(self.train_dataset['reward'], dtype=torch.float32)
|
|
12
|
+
|
|
13
|
+
def train_rlhf(path, tokenizer, pairs, rewards, config=None, generation_config=None):
|
|
14
|
+
"""
|
|
15
|
+
pairs: List of tuples (messages, response)
|
|
16
|
+
- messages: List[Dict[str, str]] (OpenAI/chatML-style messages)
|
|
17
|
+
- response: string (the model output to be rewarded)
|
|
18
|
+
"""
|
|
19
|
+
config = config or {}
|
|
20
|
+
device = scout_ai.device()
|
|
21
|
+
device = 'cuda'
|
|
22
|
+
|
|
23
|
+
tokenizer.padding_side = "left"
|
|
24
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
25
|
+
|
|
26
|
+
prompts, responses = [], []
|
|
27
|
+
for pair in pairs:
|
|
28
|
+
messages, response = pair
|
|
29
|
+
# Ensure tokenizer supports chat template (HF >=4.34)
|
|
30
|
+
if hasattr(tokenizer, 'apply_chat_template'):
|
|
31
|
+
# Use default: add_generation_prompt needed for LLMs like Llama, Mistral, etc
|
|
32
|
+
prompt = tokenizer.apply_chat_template(
|
|
33
|
+
messages, add_generation_prompt=True, tokenize=False
|
|
34
|
+
)
|
|
35
|
+
else:
|
|
36
|
+
# Fallback: join user/assistant messages
|
|
37
|
+
prompt = "\n".join(msg['content'] for msg in messages)
|
|
38
|
+
prompts.append(prompt)
|
|
39
|
+
responses.append(response)
|
|
40
|
+
|
|
41
|
+
train_dataset = Dataset.from_dict({'prompt': prompts, 'response': responses, 'reward': rewards})
|
|
42
|
+
|
|
43
|
+
# Wrap model with Value Head for PPO
|
|
44
|
+
from trl import PPOTrainer, AutoModelForCausalLMWithValueHead, PPOConfig
|
|
45
|
+
model = AutoModelForCausalLMWithValueHead.from_pretrained(path)
|
|
46
|
+
model.to(device)
|
|
47
|
+
|
|
48
|
+
from transformers import GenerationConfig
|
|
49
|
+
|
|
50
|
+
generation_config = GenerationConfig()
|
|
51
|
+
|
|
52
|
+
ppo_config = PPOConfig(
|
|
53
|
+
batch_size=config.get('batch_size', 4),
|
|
54
|
+
learning_rate=config.get('learning_rate', 1e-5),
|
|
55
|
+
mini_batch_size=config.get('mini_batch_size', 1),
|
|
56
|
+
gradient_accumulation_steps=1,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
model.base_model_prefix = 'model'
|
|
60
|
+
|
|
61
|
+
ref_model = deepcopy(model)
|
|
62
|
+
ref_model.to(device)
|
|
63
|
+
|
|
64
|
+
model.generation_config=generation_config
|
|
65
|
+
|
|
66
|
+
print(model)
|
|
67
|
+
print(ref_model)
|
|
68
|
+
|
|
69
|
+
ppo_trainer = PPOTrainerWithPrecomputedReward(
|
|
70
|
+
args=ppo_config,
|
|
71
|
+
model=model,
|
|
72
|
+
ref_model=ref_model,
|
|
73
|
+
reward_model=model, # dummy
|
|
74
|
+
value_model=model, # dummy
|
|
75
|
+
train_dataset=train_dataset,
|
|
76
|
+
processing_class=None,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
print("Step")
|
|
81
|
+
stats = ppo_trainer.train(prompts, responses, rewards)
|
|
82
|
+
model.save
|
|
83
|
+
return stats
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from transformers import TrainingArguments, Trainer
|
|
2
|
+
from typing import Any
|
|
3
|
+
from ..data import json_dataset, tsv_dataset, tokenize_dataset
|
|
4
|
+
|
|
5
|
+
def training_args(*args, **kwargs) -> TrainingArguments:
|
|
6
|
+
return TrainingArguments(*args, **kwargs)
|
|
7
|
+
|
|
8
|
+
def train_model(model: Any, tokenizer: Any, training_args: TrainingArguments, dataset: Any, class_weights=None, **kwargs):
|
|
9
|
+
for param in model.parameters():
|
|
10
|
+
param.data = param.data.contiguous()
|
|
11
|
+
|
|
12
|
+
if (isinstance(dataset, str)):
|
|
13
|
+
if (dataset.endswith('.json')):
|
|
14
|
+
tokenized_dataset = json_dataset(tokenizer, dataset)
|
|
15
|
+
else:
|
|
16
|
+
tokenized_dataset = tsv_dataset(tokenizer, dataset)
|
|
17
|
+
else:
|
|
18
|
+
tokenized_dataset = tokenize_dataset(tokenizer, dataset)
|
|
19
|
+
|
|
20
|
+
if class_weights is not None:
|
|
21
|
+
import torch
|
|
22
|
+
from torch import nn
|
|
23
|
+
class WeightTrainer(Trainer):
|
|
24
|
+
def compute_loss(self, model, inputs, return_outputs=False):
|
|
25
|
+
labels = inputs.get("labels")
|
|
26
|
+
outputs = model(**inputs)
|
|
27
|
+
logits = outputs.get('logits')
|
|
28
|
+
loss_fct = nn.CrossEntropyLoss(weight=torch.tensor(class_weights).to(model.device))
|
|
29
|
+
loss = loss_fct(logits.view(-1, model.config.num_labels), labels.view(-1))
|
|
30
|
+
return (loss, outputs) if return_outputs else loss
|
|
31
|
+
trainer = WeightTrainer(model, training_args, train_dataset=tokenized_dataset["train"], tokenizer=tokenizer, **kwargs)
|
|
32
|
+
else:
|
|
33
|
+
trainer = Trainer(model, training_args, train_dataset=tokenized_dataset["train"], tokenizer=tokenizer, **kwargs)
|
|
34
|
+
trainer.train()
|
|
@@ -0,0 +1,315 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import math
|
|
3
|
+
import time
|
|
4
|
+
import shutil
|
|
5
|
+
import random
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import List, Optional, Dict, Any, Union
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
from torch.utils.data import DataLoader
|
|
11
|
+
from datasets import Dataset, load_dataset
|
|
12
|
+
|
|
13
|
+
from transformers import (
|
|
14
|
+
PreTrainedModel,
|
|
15
|
+
PreTrainedTokenizer,
|
|
16
|
+
get_scheduler,
|
|
17
|
+
DataCollatorForLanguageModeling
|
|
18
|
+
)
|
|
19
|
+
from torch.optim import AdamW
|
|
20
|
+
from transformers.utils import logging
|
|
21
|
+
|
|
22
|
+
logger = logging.get_logger(__name__)
|
|
23
|
+
|
|
24
|
+
def set_seed(seed: int):
|
|
25
|
+
random.seed(seed)
|
|
26
|
+
torch.manual_seed(seed)
|
|
27
|
+
torch.cuda.manual_seed_all(seed)
|
|
28
|
+
try:
|
|
29
|
+
import numpy as np
|
|
30
|
+
np.random.seed(seed)
|
|
31
|
+
except ImportError:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class TrainingState:
|
|
36
|
+
global_step: int = 0
|
|
37
|
+
best_eval_loss: float = float("inf")
|
|
38
|
+
|
|
39
|
+
def tokenize_function(examples, tokenizer, max_seq_length):
|
|
40
|
+
# examples: dict with key 'text' or single texts
|
|
41
|
+
# Always output input_ids and attention_mask
|
|
42
|
+
output = tokenizer(
|
|
43
|
+
examples["text"] if "text" in examples else examples,
|
|
44
|
+
truncation=True,
|
|
45
|
+
padding="max_length",
|
|
46
|
+
max_length=max_seq_length,
|
|
47
|
+
return_attention_mask=True,
|
|
48
|
+
)
|
|
49
|
+
output["labels"] = output["input_ids"].copy()
|
|
50
|
+
return output
|
|
51
|
+
|
|
52
|
+
def group_texts(examples, block_size):
|
|
53
|
+
# For paragraph-based datasets: simply return; for huge files, use this.
|
|
54
|
+
concatenated = {k: sum(examples[k], []) for k in examples.keys()}
|
|
55
|
+
total_length = len(concatenated[list(examples.keys())[0]])
|
|
56
|
+
# Drop the small remainder
|
|
57
|
+
total_length = (total_length // block_size) * block_size
|
|
58
|
+
result = {
|
|
59
|
+
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
|
60
|
+
for k, t in concatenated.items()
|
|
61
|
+
}
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
def train_next_token(
|
|
65
|
+
model: PreTrainedModel,
|
|
66
|
+
tokenizer: PreTrainedTokenizer,
|
|
67
|
+
dataset: Union[List[str], Dataset],
|
|
68
|
+
*,
|
|
69
|
+
output_dir: str,
|
|
70
|
+
eval_dataset: Optional[Union[List[str], Dataset]] = None,
|
|
71
|
+
max_seq_length: int = 2048,
|
|
72
|
+
batch_size: int = 8,
|
|
73
|
+
gradient_accumulation_steps: int = 1,
|
|
74
|
+
num_train_epochs: int = 3,
|
|
75
|
+
learning_rate: float = 1e-4,
|
|
76
|
+
weight_decay: float = 0.01,
|
|
77
|
+
lr_scheduler_type: str = "linear",
|
|
78
|
+
warmup_steps: int = 0,
|
|
79
|
+
logging_steps: int = 50,
|
|
80
|
+
eval_steps: int = 200,
|
|
81
|
+
save_steps: int = 500,
|
|
82
|
+
save_total_limit: int = 3,
|
|
83
|
+
fp16: bool = False,
|
|
84
|
+
bf16: bool = False,
|
|
85
|
+
max_train_steps: int = None,
|
|
86
|
+
seed: int = 42,
|
|
87
|
+
report_to: str = "none", # or "wandb", "tensorboard"
|
|
88
|
+
use_lora: bool = False,
|
|
89
|
+
lora_config: Optional[dict] = None,
|
|
90
|
+
resume_from_checkpoint: str = None,
|
|
91
|
+
callbacks: Optional[List] = None,
|
|
92
|
+
device_map: str = "auto",
|
|
93
|
+
dataloader_num_workers: int = 4,
|
|
94
|
+
group_by_length: bool = False,
|
|
95
|
+
description: str = "",
|
|
96
|
+
):
|
|
97
|
+
"""
|
|
98
|
+
Fine-tunes a causal LM for next-token prediction.
|
|
99
|
+
"""
|
|
100
|
+
#assert isinstance(model, PreTrainedModel), "Model must be a HuggingFace PreTrainedModel"
|
|
101
|
+
#assert isinstance(tokenizer, PreTrainedTokenizer), "Tokenizer must be a HuggingFace PreTrainedTokenizer"
|
|
102
|
+
assert isinstance(dataset, (list, Dataset)), "Dataset must be a HuggingFace Dataset or a list of texts"
|
|
103
|
+
|
|
104
|
+
set_seed(seed)
|
|
105
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
106
|
+
|
|
107
|
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
108
|
+
n_gpus = torch.cuda.device_count()
|
|
109
|
+
|
|
110
|
+
if resume_from_checkpoint:
|
|
111
|
+
logger.info(f"Loading checkpoint from {resume_from_checkpoint}")
|
|
112
|
+
model.load_state_dict(torch.load(os.path.join(resume_from_checkpoint, "pytorch_model.bin")))
|
|
113
|
+
|
|
114
|
+
model.to(device)
|
|
115
|
+
|
|
116
|
+
if fp16:
|
|
117
|
+
scaler = torch.cuda.amp.GradScaler()
|
|
118
|
+
else:
|
|
119
|
+
scaler = None
|
|
120
|
+
|
|
121
|
+
# 1. Prepare Dataset
|
|
122
|
+
if isinstance(dataset, list):
|
|
123
|
+
dataset = Dataset.from_dict({"text": dataset})
|
|
124
|
+
|
|
125
|
+
if eval_dataset is not None and isinstance(eval_dataset, list):
|
|
126
|
+
eval_dataset = Dataset.from_dict({"text": eval_dataset})
|
|
127
|
+
|
|
128
|
+
# Tokenization and formatting
|
|
129
|
+
def preprocess(examples):
|
|
130
|
+
return tokenize_function(examples, tokenizer, max_seq_length)
|
|
131
|
+
|
|
132
|
+
dataset = dataset.map(preprocess, batched=True, remove_columns=list(dataset.column_names))
|
|
133
|
+
if eval_dataset is not None:
|
|
134
|
+
eval_dataset = eval_dataset.map(preprocess, batched=True, remove_columns=list(eval_dataset.column_names))
|
|
135
|
+
|
|
136
|
+
# 2. Loader & Collator
|
|
137
|
+
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
|
|
138
|
+
|
|
139
|
+
train_loader = DataLoader(
|
|
140
|
+
dataset,
|
|
141
|
+
batch_size=batch_size,
|
|
142
|
+
shuffle=True,
|
|
143
|
+
collate_fn=data_collator,
|
|
144
|
+
num_workers=dataloader_num_workers,
|
|
145
|
+
drop_last=True,
|
|
146
|
+
)
|
|
147
|
+
eval_loader = None
|
|
148
|
+
if eval_dataset is not None:
|
|
149
|
+
eval_loader = DataLoader(
|
|
150
|
+
eval_dataset,
|
|
151
|
+
batch_size=batch_size,
|
|
152
|
+
shuffle=False,
|
|
153
|
+
collate_fn=data_collator,
|
|
154
|
+
num_workers=dataloader_num_workers,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# 3. Optimizer & Scheduler
|
|
158
|
+
no_decay = ["bias", "LayerNorm.weight"]
|
|
159
|
+
grouped_params = [
|
|
160
|
+
{
|
|
161
|
+
"params": [
|
|
162
|
+
p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)
|
|
163
|
+
],
|
|
164
|
+
"weight_decay": weight_decay,
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
"params": [
|
|
168
|
+
p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)
|
|
169
|
+
],
|
|
170
|
+
"weight_decay": 0.0,
|
|
171
|
+
},
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
optimizer = AdamW(grouped_params, lr=learning_rate)
|
|
175
|
+
|
|
176
|
+
total_train_steps = (
|
|
177
|
+
max_train_steps if max_train_steps is not None
|
|
178
|
+
else (len(train_loader) * num_train_epochs) // gradient_accumulation_steps
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
lr_scheduler = get_scheduler(
|
|
182
|
+
lr_scheduler_type,
|
|
183
|
+
optimizer=optimizer,
|
|
184
|
+
num_warmup_steps=warmup_steps,
|
|
185
|
+
num_training_steps=total_train_steps,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# 4. LoRA/PEFT Support (placeholder)
|
|
189
|
+
if use_lora:
|
|
190
|
+
logger.warning("PEFT/LoRA integration not yet implemented. Skipping.")
|
|
191
|
+
|
|
192
|
+
# 5. Checkpoint Management
|
|
193
|
+
saved_checkpoints = []
|
|
194
|
+
|
|
195
|
+
# 6. Training Loop
|
|
196
|
+
state = TrainingState()
|
|
197
|
+
model.train()
|
|
198
|
+
start_time = time.time()
|
|
199
|
+
for epoch in range(num_train_epochs):
|
|
200
|
+
logger.info(f"Epoch {epoch+1}/{num_train_epochs}")
|
|
201
|
+
for step, batch in enumerate(train_loader):
|
|
202
|
+
true_step = state.global_step + 1
|
|
203
|
+
batch = {k: v.to(device) for k, v in batch.items()}
|
|
204
|
+
with torch.cuda.amp.autocast(dtype=torch.float16 if fp16 else torch.bfloat16 if bf16 else torch.float32, enabled=(fp16 or bf16)):
|
|
205
|
+
outputs = model(**batch)
|
|
206
|
+
loss = outputs.loss
|
|
207
|
+
loss = loss / gradient_accumulation_steps
|
|
208
|
+
|
|
209
|
+
if fp16:
|
|
210
|
+
scaler.scale(loss).backward()
|
|
211
|
+
else:
|
|
212
|
+
loss.backward()
|
|
213
|
+
|
|
214
|
+
if true_step % gradient_accumulation_steps == 0:
|
|
215
|
+
if fp16:
|
|
216
|
+
scaler.step(optimizer)
|
|
217
|
+
scaler.update()
|
|
218
|
+
else:
|
|
219
|
+
optimizer.step()
|
|
220
|
+
optimizer.zero_grad()
|
|
221
|
+
lr_scheduler.step()
|
|
222
|
+
|
|
223
|
+
if true_step % logging_steps == 0:
|
|
224
|
+
logger.info(f"Step {true_step}: loss {loss.item() * gradient_accumulation_steps:.4f}")
|
|
225
|
+
|
|
226
|
+
if eval_loader is not None and true_step % eval_steps == 0:
|
|
227
|
+
eval_loss = evaluate(model, eval_loader, device, fp16, bf16)
|
|
228
|
+
logger.info(f"Step {true_step}: eval_loss {eval_loss:.4f}, ppl {math.exp(eval_loss):.2f}")
|
|
229
|
+
# Save best
|
|
230
|
+
if eval_loss < state.best_eval_loss:
|
|
231
|
+
state.best_eval_loss = eval_loss
|
|
232
|
+
save_checkpoint(model, output_dir, f"best")
|
|
233
|
+
if true_step % save_steps == 0:
|
|
234
|
+
ckpt_dir = save_checkpoint(model, output_dir, f"step-{true_step}")
|
|
235
|
+
saved_checkpoints.append(ckpt_dir)
|
|
236
|
+
# Cleanup
|
|
237
|
+
if len(saved_checkpoints) > save_total_limit:
|
|
238
|
+
old = saved_checkpoints.pop(0)
|
|
239
|
+
shutil.rmtree(old, ignore_errors=True)
|
|
240
|
+
state.global_step = true_step
|
|
241
|
+
if max_train_steps is not None and true_step >= max_train_steps:
|
|
242
|
+
break
|
|
243
|
+
# End-of-epoch eval/save
|
|
244
|
+
if eval_loader is not None:
|
|
245
|
+
eval_loss = evaluate(model, eval_loader, device, fp16, bf16)
|
|
246
|
+
logger.info(f"Epoch {epoch+1} end: eval_loss {eval_loss:.4f}, ppl {math.exp(eval_loss):.2f}")
|
|
247
|
+
if eval_loss < state.best_eval_loss:
|
|
248
|
+
state.best_eval_loss = eval_loss
|
|
249
|
+
save_checkpoint(model, output_dir, "best")
|
|
250
|
+
save_checkpoint(model, output_dir, f"epoch-{epoch+1}")
|
|
251
|
+
logger.info(f"Training completed in {time.time() - start_time:.2f} sec on {device}")
|
|
252
|
+
|
|
253
|
+
def evaluate(model, eval_loader, device, fp16, bf16):
|
|
254
|
+
model.eval()
|
|
255
|
+
losses = []
|
|
256
|
+
for batch in eval_loader:
|
|
257
|
+
batch = {k: v.to(device) for k, v in batch.items()}
|
|
258
|
+
with torch.no_grad():
|
|
259
|
+
with torch.cuda.amp.autocast(dtype=torch.float16 if fp16 else torch.bfloat16 if bf16 else torch.float32, enabled=(fp16 or bf16)):
|
|
260
|
+
outputs = model(**batch)
|
|
261
|
+
losses.append(outputs.loss.item())
|
|
262
|
+
model.train()
|
|
263
|
+
return sum(losses) / len(losses)
|
|
264
|
+
|
|
265
|
+
def save_checkpoint(model, output_dir, tag):
|
|
266
|
+
output_ckpt_dir = os.path.join(output_dir, tag)
|
|
267
|
+
os.makedirs(output_ckpt_dir, exist_ok=True)
|
|
268
|
+
model.save_pretrained(output_ckpt_dir)
|
|
269
|
+
return output_ckpt_dir
|
|
270
|
+
|
|
271
|
+
def main():
|
|
272
|
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
273
|
+
|
|
274
|
+
# Example tiny dataset: few sentences
|
|
275
|
+
train_texts = [
|
|
276
|
+
"The quick brown fox jumps over the lazy dog.",
|
|
277
|
+
"Artificial intelligence is the future.",
|
|
278
|
+
"Llama models are great for language tasks.",
|
|
279
|
+
"Open source is important for research.",
|
|
280
|
+
]
|
|
281
|
+
eval_texts = [
|
|
282
|
+
"Transformers enable powerful NLP models.",
|
|
283
|
+
"Fine-tuning improves performance."
|
|
284
|
+
]
|
|
285
|
+
|
|
286
|
+
#model_name = "openlm-research/open_llama_3b" # Replace with your local/other HF Llama checkpoint as needed
|
|
287
|
+
model_name = "distilgpt2" # Replace with your local/other HF Llama checkpoint as needed
|
|
288
|
+
|
|
289
|
+
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
|
290
|
+
# Make sure tokenizer pads on right for causal LMs (Llama does not have pad by default)
|
|
291
|
+
if tokenizer.pad_token is None:
|
|
292
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
293
|
+
|
|
294
|
+
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
295
|
+
|
|
296
|
+
train_next_token(
|
|
297
|
+
model=model,
|
|
298
|
+
tokenizer=tokenizer,
|
|
299
|
+
dataset=train_texts,
|
|
300
|
+
output_dir="./output_test",
|
|
301
|
+
eval_dataset=eval_texts,
|
|
302
|
+
max_seq_length=32,
|
|
303
|
+
batch_size=2,
|
|
304
|
+
num_train_epochs=1,
|
|
305
|
+
gradient_accumulation_steps=1,
|
|
306
|
+
learning_rate=5e-5,
|
|
307
|
+
fp16=False, # Change to True if running on GPU with enough VRAM
|
|
308
|
+
bf16=False,
|
|
309
|
+
logging_steps=1,
|
|
310
|
+
eval_steps=2,
|
|
311
|
+
save_steps=10
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if __name__ == "__main__":
|
|
315
|
+
main()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import torch
|
|
3
|
+
import numpy
|
|
4
|
+
|
|
5
|
+
def set_seed(seed):
|
|
6
|
+
"""
|
|
7
|
+
Set seed in several backends
|
|
8
|
+
"""
|
|
9
|
+
random.seed(seed)
|
|
10
|
+
numpy.random.seed(seed)
|
|
11
|
+
torch.manual_seed(seed)
|
|
12
|
+
if torch.cuda.is_available():
|
|
13
|
+
torch.cuda.manual_seed(seed)
|
|
14
|
+
torch.cuda.manual_seed_all(seed)
|
|
15
|
+
|
|
16
|
+
def deterministic():
|
|
17
|
+
"""
|
|
18
|
+
Ensure that all operations are deterministic on GPU (if used) for
|
|
19
|
+
reproducibility
|
|
20
|
+
"""
|
|
21
|
+
torch.backends.cudnn.deterministic = True
|
|
22
|
+
torch.backends.cudnn.benchmark = False
|
|
23
|
+
|
|
24
|
+
def device():
|
|
25
|
+
return torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
|
|
26
|
+
|
|
27
|
+
def data_directory():
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
print(Path.home())
|
|
30
|
+
|
|
31
|
+
def model_device(model):
|
|
32
|
+
return next(model.parameters()).device
|
data/scout-ai.gemspec
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# Generated by juwelier
|
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
+
# Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
|
+
# -*- encoding: utf-8 -*-
|
|
5
|
+
# stub: scout-ai 1.0.1 ruby lib
|
|
6
|
+
|
|
7
|
+
Gem::Specification.new do |s|
|
|
8
|
+
s.name = "scout-ai".freeze
|
|
9
|
+
s.version = "1.0.1".freeze
|
|
10
|
+
|
|
11
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
12
|
+
s.require_paths = ["lib".freeze]
|
|
13
|
+
s.authors = ["Miguel Vazquez".freeze]
|
|
14
|
+
s.date = "1980-01-02"
|
|
15
|
+
s.description = "assorted functionalities to help scouts use AI".freeze
|
|
16
|
+
s.email = "mikisvaz@gmail.com".freeze
|
|
17
|
+
s.executables = ["scout-ai".freeze]
|
|
18
|
+
s.extra_rdoc_files = [
|
|
19
|
+
"LICENSE",
|
|
20
|
+
"LICENSE.txt",
|
|
21
|
+
"README.md"
|
|
22
|
+
]
|
|
23
|
+
s.files = [
|
|
24
|
+
".document",
|
|
25
|
+
".vimproject",
|
|
26
|
+
"LICENSE",
|
|
27
|
+
"LICENSE.txt",
|
|
28
|
+
"README.md",
|
|
29
|
+
"Rakefile",
|
|
30
|
+
"VERSION",
|
|
31
|
+
"bin/scout-ai",
|
|
32
|
+
"doc/Agent.md",
|
|
33
|
+
"doc/Chat.md",
|
|
34
|
+
"doc/LLM.md",
|
|
35
|
+
"doc/Model.md",
|
|
36
|
+
"doc/RAG.md",
|
|
37
|
+
"lib/scout-ai.rb",
|
|
38
|
+
"lib/scout/llm/agent.rb",
|
|
39
|
+
"lib/scout/llm/agent/chat.rb",
|
|
40
|
+
"lib/scout/llm/agent/delegate.rb",
|
|
41
|
+
"lib/scout/llm/agent/iterate.rb",
|
|
42
|
+
"lib/scout/llm/ask.rb",
|
|
43
|
+
"lib/scout/llm/backends/anthropic.rb",
|
|
44
|
+
"lib/scout/llm/backends/bedrock.rb",
|
|
45
|
+
"lib/scout/llm/backends/huggingface.rb",
|
|
46
|
+
"lib/scout/llm/backends/ollama.rb",
|
|
47
|
+
"lib/scout/llm/backends/openai.rb",
|
|
48
|
+
"lib/scout/llm/backends/openwebui.rb",
|
|
49
|
+
"lib/scout/llm/backends/relay.rb",
|
|
50
|
+
"lib/scout/llm/backends/responses.rb",
|
|
51
|
+
"lib/scout/llm/chat.rb",
|
|
52
|
+
"lib/scout/llm/embed.rb",
|
|
53
|
+
"lib/scout/llm/mcp.rb",
|
|
54
|
+
"lib/scout/llm/parse.rb",
|
|
55
|
+
"lib/scout/llm/rag.rb",
|
|
56
|
+
"lib/scout/llm/tools.rb",
|
|
57
|
+
"lib/scout/llm/tools/call.rb",
|
|
58
|
+
"lib/scout/llm/tools/knowledge_base.rb",
|
|
59
|
+
"lib/scout/llm/tools/mcp.rb",
|
|
60
|
+
"lib/scout/llm/tools/workflow.rb",
|
|
61
|
+
"lib/scout/llm/utils.rb",
|
|
62
|
+
"lib/scout/model/base.rb",
|
|
63
|
+
"lib/scout/model/python/base.rb",
|
|
64
|
+
"lib/scout/model/python/huggingface.rb",
|
|
65
|
+
"lib/scout/model/python/huggingface/causal.rb",
|
|
66
|
+
"lib/scout/model/python/huggingface/causal/next_token.rb",
|
|
67
|
+
"lib/scout/model/python/huggingface/classification",
|
|
68
|
+
"lib/scout/model/python/huggingface/classification.rb",
|
|
69
|
+
"lib/scout/model/python/torch.rb",
|
|
70
|
+
"lib/scout/model/python/torch/dataloader.rb",
|
|
71
|
+
"lib/scout/model/python/torch/helpers.rb",
|
|
72
|
+
"lib/scout/model/python/torch/introspection.rb",
|
|
73
|
+
"lib/scout/model/python/torch/load_and_save.rb",
|
|
74
|
+
"lib/scout/model/util/run.rb",
|
|
75
|
+
"lib/scout/model/util/save.rb",
|
|
76
|
+
"python/scout_ai/__init__.py",
|
|
77
|
+
"python/scout_ai/huggingface/data.py",
|
|
78
|
+
"python/scout_ai/huggingface/eval.py",
|
|
79
|
+
"python/scout_ai/huggingface/model.py",
|
|
80
|
+
"python/scout_ai/huggingface/rlhf.py",
|
|
81
|
+
"python/scout_ai/huggingface/train/__init__.py",
|
|
82
|
+
"python/scout_ai/huggingface/train/next_token.py",
|
|
83
|
+
"python/scout_ai/util.py",
|
|
84
|
+
"scout-ai.gemspec",
|
|
85
|
+
"scout_commands/agent/ask",
|
|
86
|
+
"scout_commands/agent/kb",
|
|
87
|
+
"scout_commands/documenter",
|
|
88
|
+
"scout_commands/llm/ask",
|
|
89
|
+
"scout_commands/llm/process",
|
|
90
|
+
"scout_commands/llm/server",
|
|
91
|
+
"scout_commands/llm/template",
|
|
92
|
+
"share/server/chat.html",
|
|
93
|
+
"share/server/chat.js",
|
|
94
|
+
"test/data/cat.jpg",
|
|
95
|
+
"test/data/person/brothers",
|
|
96
|
+
"test/data/person/identifiers",
|
|
97
|
+
"test/data/person/marriages",
|
|
98
|
+
"test/data/person/parents",
|
|
99
|
+
"test/scout/llm/agent/test_chat.rb",
|
|
100
|
+
"test/scout/llm/backends/test_anthropic.rb",
|
|
101
|
+
"test/scout/llm/backends/test_bedrock.rb",
|
|
102
|
+
"test/scout/llm/backends/test_huggingface.rb",
|
|
103
|
+
"test/scout/llm/backends/test_ollama.rb",
|
|
104
|
+
"test/scout/llm/backends/test_openai.rb",
|
|
105
|
+
"test/scout/llm/backends/test_openwebui.rb",
|
|
106
|
+
"test/scout/llm/backends/test_relay.rb",
|
|
107
|
+
"test/scout/llm/backends/test_responses.rb",
|
|
108
|
+
"test/scout/llm/test_agent.rb",
|
|
109
|
+
"test/scout/llm/test_ask.rb",
|
|
110
|
+
"test/scout/llm/test_chat.rb",
|
|
111
|
+
"test/scout/llm/test_embed.rb",
|
|
112
|
+
"test/scout/llm/test_mcp.rb",
|
|
113
|
+
"test/scout/llm/test_parse.rb",
|
|
114
|
+
"test/scout/llm/test_rag.rb",
|
|
115
|
+
"test/scout/llm/test_tools.rb",
|
|
116
|
+
"test/scout/llm/test_utils.rb",
|
|
117
|
+
"test/scout/llm/tools/test_call.rb",
|
|
118
|
+
"test/scout/llm/tools/test_knowledge_base.rb",
|
|
119
|
+
"test/scout/llm/tools/test_mcp.rb",
|
|
120
|
+
"test/scout/llm/tools/test_workflow.rb",
|
|
121
|
+
"test/scout/model/python/huggingface/causal/test_next_token.rb",
|
|
122
|
+
"test/scout/model/python/huggingface/test_causal.rb",
|
|
123
|
+
"test/scout/model/python/huggingface/test_classification.rb",
|
|
124
|
+
"test/scout/model/python/test_base.rb",
|
|
125
|
+
"test/scout/model/python/test_huggingface.rb",
|
|
126
|
+
"test/scout/model/python/test_torch.rb",
|
|
127
|
+
"test/scout/model/python/torch/test_helpers.rb",
|
|
128
|
+
"test/scout/model/test_base.rb",
|
|
129
|
+
"test/scout/model/util/test_save.rb",
|
|
130
|
+
"test/test_helper.rb"
|
|
131
|
+
]
|
|
132
|
+
s.homepage = "http://github.com/mikisvaz/scout-ai".freeze
|
|
133
|
+
s.licenses = ["MIT".freeze]
|
|
134
|
+
s.rubygems_version = "3.7.0.dev".freeze
|
|
135
|
+
s.summary = "AI gear for scouts".freeze
|
|
136
|
+
|
|
137
|
+
s.specification_version = 4
|
|
138
|
+
|
|
139
|
+
s.add_runtime_dependency(%q<scout-rig>.freeze, [">= 0".freeze])
|
|
140
|
+
s.add_runtime_dependency(%q<ruby-openai>.freeze, [">= 0".freeze])
|
|
141
|
+
s.add_runtime_dependency(%q<ruby-mcp-client>.freeze, [">= 0".freeze])
|
|
142
|
+
end
|
|
143
|
+
|