titan-synapse 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +187 -0
- package/Cargo.lock +3976 -0
- package/Cargo.toml +10 -0
- package/LICENSE +190 -0
- package/PROGRESS.md +151 -0
- package/README.md +514 -0
- package/TEST_LOG.md +220 -0
- package/config/default.yaml +36 -0
- package/crates/synapse/Cargo.toml +70 -0
- package/crates/synapse/src/cli/bench.rs +44 -0
- package/crates/synapse/src/cli/eval.rs +395 -0
- package/crates/synapse/src/cli/export.rs +45 -0
- package/crates/synapse/src/cli/hub.rs +179 -0
- package/crates/synapse/src/cli/import.rs +35 -0
- package/crates/synapse/src/cli/learn.rs +53 -0
- package/crates/synapse/src/cli/mod.rs +10 -0
- package/crates/synapse/src/cli/models.rs +36 -0
- package/crates/synapse/src/cli/pull.rs +60 -0
- package/crates/synapse/src/cli/status.rs +52 -0
- package/crates/synapse/src/cli/train.rs +99 -0
- package/crates/synapse/src/config.rs +220 -0
- package/crates/synapse/src/dashboard.rs +281 -0
- package/crates/synapse/src/format/manifest.rs +57 -0
- package/crates/synapse/src/format/mod.rs +4 -0
- package/crates/synapse/src/format/packer.rs +213 -0
- package/crates/synapse/src/inference/engine.rs +361 -0
- package/crates/synapse/src/inference/kv_cache.rs +97 -0
- package/crates/synapse/src/inference/lora.rs +166 -0
- package/crates/synapse/src/inference/mod.rs +9 -0
- package/crates/synapse/src/inference/model.rs +167 -0
- package/crates/synapse/src/inference/sampler.rs +133 -0
- package/crates/synapse/src/inference/speculative.rs +153 -0
- package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
- package/crates/synapse/src/learn/engine.rs +109 -0
- package/crates/synapse/src/learn/mod.rs +5 -0
- package/crates/synapse/src/main.rs +185 -0
- package/crates/synapse/src/memory/extractor.rs +201 -0
- package/crates/synapse/src/memory/graph.rs +332 -0
- package/crates/synapse/src/memory/hallucination.rs +259 -0
- package/crates/synapse/src/memory/mod.rs +7 -0
- package/crates/synapse/src/openai.rs +232 -0
- package/crates/synapse/src/server.rs +166 -0
- package/crates/synapse/src/streaming.rs +80 -0
- package/crates/synapse/src/swarm/coordinator.rs +198 -0
- package/crates/synapse/src/swarm/mod.rs +8 -0
- package/crates/synapse/src/swarm/orchestrator.rs +225 -0
- package/crates/synapse/src/swarm/pool.rs +64 -0
- package/crates/synapse/src/swarm/spawner.rs +199 -0
- package/crates/synapse/src/swarm/synthesizer.rs +26 -0
- package/crates/synapse/src/vram/manager.rs +67 -0
- package/crates/synapse/src/vram/mod.rs +3 -0
- package/docker-compose.yml +19 -0
- package/install.sh +311 -0
- package/package.json +36 -0
- package/python/Dockerfile.learn +18 -0
- package/python/requirements.txt +11 -0
- package/python/synapse_learn/__init__.py +0 -0
- package/python/synapse_learn/datasets.py +233 -0
- package/python/synapse_learn/real_eval.py +616 -0
- package/python/synapse_learn/server.py +431 -0
- package/python/synapse_learn/train_base.py +672 -0
- package/python/synapse_learn/train_specialists.py +787 -0
|
@@ -0,0 +1,672 @@
|
|
|
1
|
+
"""TITAN Synapse Base Model Trainer — Train OUR OWN model from scratch.
|
|
2
|
+
|
|
3
|
+
This takes an open-source base architecture (Apache 2.0 licensed) and trains
|
|
4
|
+
a custom Synapse model that is:
|
|
5
|
+
1. Optimized for swarm coordination (routing queries to specialists)
|
|
6
|
+
2. Trained on clean public datasets (no proprietary data)
|
|
7
|
+
3. Fine-tuned for factual accuracy (less hallucination)
|
|
8
|
+
4. Specialized for the domains our users care about
|
|
9
|
+
|
|
10
|
+
The result is `synapse-3b` — OUR model, not Qwen's, not Meta's, not OpenAI's.
|
|
11
|
+
It runs on consumer GPUs and gets smarter every day.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
python train_base.py --stage full # Full training pipeline
|
|
15
|
+
python train_base.py --stage sft # Supervised fine-tuning only
|
|
16
|
+
python train_base.py --stage dpo # DPO alignment only
|
|
17
|
+
python train_base.py --stage export # Export to GGUF for inference
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import json
|
|
22
|
+
import logging
|
|
23
|
+
import argparse
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from datetime import datetime
|
|
26
|
+
from typing import Optional
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger("synapse-trainer")
|
|
29
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
|
30
|
+
|
|
31
|
+
DATA_DIR = Path(os.environ.get("SYNAPSE_DATA_DIR", os.path.expanduser("~/.synapse")))
|
|
32
|
+
MODELS_DIR = DATA_DIR / "models"
|
|
33
|
+
TRAINING_DIR = DATA_DIR / "training"
|
|
34
|
+
ADAPTERS_DIR = DATA_DIR / "adapters"
|
|
35
|
+
|
|
36
|
+
for d in [MODELS_DIR, TRAINING_DIR, ADAPTERS_DIR]:
|
|
37
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ============================================================
|
|
41
|
+
# Training Data Generation — create high-quality data from
|
|
42
|
+
# public datasets, formatted for Synapse swarm coordination
|
|
43
|
+
# ============================================================
|
|
44
|
+
|
|
45
|
+
SWARM_ROUTING_EXAMPLES = [
|
|
46
|
+
# These teach the coordinator HOW to route queries
|
|
47
|
+
{
|
|
48
|
+
"instruction": "Route this query to the appropriate specialist: 'Write a Python function to parse JSON'",
|
|
49
|
+
"output": '{"specialist": "python_expert", "confidence": 0.95, "reasoning": "Direct Python code request"}'
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"instruction": "Route this query to the appropriate specialist: 'What is the capital of France?'",
|
|
53
|
+
"output": '{"specialist": "general", "confidence": 0.90, "reasoning": "General knowledge question"}'
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"instruction": "Route this query to the appropriate specialist: 'Optimize this SQL query for performance'",
|
|
57
|
+
"output": '{"specialist": "sql_expert", "confidence": 0.92, "reasoning": "SQL optimization request"}'
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"instruction": "Route this query to the appropriate specialist: 'Build a REST API with authentication, database, and tests'",
|
|
61
|
+
"output": '{"mode": "swarm", "subtasks": [{"specialist": "planner", "task": "Design API architecture"}, {"specialist": "python_expert", "task": "Implement REST endpoints with auth"}, {"specialist": "sql_expert", "task": "Design database schema"}, {"specialist": "python_expert", "task": "Write comprehensive tests"}]}'
|
|
62
|
+
},
|
|
63
|
+
{
|
|
64
|
+
"instruction": "Route this query to the appropriate specialist: 'Explain quantum entanglement in simple terms'",
|
|
65
|
+
"output": '{"specialist": "science_expert", "confidence": 0.88, "reasoning": "Physics question requiring clear explanation"}'
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"instruction": "Route this query to the appropriate specialist: 'Fix the memory leak in my Rust code'",
|
|
69
|
+
"output": '{"specialist": "rust_expert", "confidence": 0.93, "reasoning": "Rust-specific debugging task"}'
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"instruction": "Evaluate this response quality (1-5): Question: 'What is 2+2?' Response: 'The answer is 4.'",
|
|
73
|
+
"output": '{"score": 5, "reasoning": "Correct, concise, no hallucination"}'
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"instruction": "Evaluate this response quality (1-5): Question: 'Who invented the telephone?' Response: 'Thomas Edison invented the telephone in 1870.'",
|
|
77
|
+
"output": '{"score": 2, "reasoning": "Incorrect — Alexander Graham Bell invented the telephone. This is a factual error."}'
|
|
78
|
+
},
|
|
79
|
+
]
|
|
80
|
+
|
|
81
|
+
# Factual accuracy training data — teach the model to be honest about uncertainty
|
|
82
|
+
HONESTY_EXAMPLES = [
|
|
83
|
+
{
|
|
84
|
+
"instruction": "What is the population of Mars?",
|
|
85
|
+
"output": "Mars does not have a permanent human population. It is currently uninhabited by humans. Various space agencies have plans for future crewed missions, but as of now, only robotic missions have been sent to Mars."
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"instruction": "Who won the Super Bowl in 2027?",
|
|
89
|
+
"output": "I don't have information about the Super Bowl in 2027. My training data has a knowledge cutoff, and I cannot predict future events. I'd recommend checking a current news source for the latest results."
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"instruction": "Tell me about the health benefits of drinking bleach.",
|
|
93
|
+
"output": "I cannot and will not provide information suggesting bleach has health benefits. Drinking bleach is extremely dangerous and can cause severe chemical burns to the mouth, throat, and stomach, and can be fatal. If someone has ingested bleach, call poison control (1-800-222-1222) or emergency services immediately."
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"instruction": "What are you certain about regarding dark matter?",
|
|
97
|
+
"output": "There's a lot we DON'T know about dark matter, so I want to be honest about the limits of our knowledge:\n\n**What we're fairly confident about:**\n- Dark matter makes up roughly 27% of the universe's mass-energy content\n- It interacts gravitationally with normal matter\n- It doesn't emit, absorb, or reflect light\n\n**What we're uncertain about:**\n- What dark matter actually IS (WIMPs? Axions? Something else?)\n- Whether dark matter interacts through any force besides gravity\n- The exact distribution of dark matter in our galaxy\n\nI'd rather tell you what we don't know than pretend we have all the answers."
|
|
98
|
+
},
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def generate_training_data(output_path: Path, max_samples: int = 50000) -> dict:
|
|
103
|
+
"""Generate comprehensive training data for the Synapse base model.
|
|
104
|
+
|
|
105
|
+
Combines:
|
|
106
|
+
1. Swarm routing examples (teach coordinator behavior)
|
|
107
|
+
2. Honesty/factual accuracy examples (reduce hallucination)
|
|
108
|
+
3. Public dataset samples (domain knowledge)
|
|
109
|
+
4. User-collected preference data (if available)
|
|
110
|
+
"""
|
|
111
|
+
all_data = []
|
|
112
|
+
|
|
113
|
+
# 1. Swarm routing training data
|
|
114
|
+
logger.info("Adding swarm routing examples...")
|
|
115
|
+
for example in SWARM_ROUTING_EXAMPLES:
|
|
116
|
+
all_data.append({
|
|
117
|
+
"text": f"<|im_start|>user\n{example['instruction']}<|im_end|>\n<|im_start|>assistant\n{example['output']}<|im_end|>",
|
|
118
|
+
"source": "synapse_routing",
|
|
119
|
+
})
|
|
120
|
+
|
|
121
|
+
# 2. Honesty/anti-hallucination examples
|
|
122
|
+
logger.info("Adding honesty training examples...")
|
|
123
|
+
for example in HONESTY_EXAMPLES:
|
|
124
|
+
all_data.append({
|
|
125
|
+
"text": f"<|im_start|>user\n{example['instruction']}<|im_end|>\n<|im_start|>assistant\n{example['output']}<|im_end|>",
|
|
126
|
+
"source": "honesty",
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
# 3. Load any public datasets we've downloaded
|
|
130
|
+
datasets_dir = DATA_DIR / "datasets"
|
|
131
|
+
if datasets_dir.exists():
|
|
132
|
+
for dataset_dir in datasets_dir.iterdir():
|
|
133
|
+
train_file = dataset_dir / "train.jsonl"
|
|
134
|
+
if train_file.exists():
|
|
135
|
+
logger.info(f"Loading dataset: {dataset_dir.name}")
|
|
136
|
+
count = 0
|
|
137
|
+
with open(train_file) as f:
|
|
138
|
+
for line in f:
|
|
139
|
+
if count >= max_samples // 6: # Distribute evenly
|
|
140
|
+
break
|
|
141
|
+
item = json.loads(line.strip())
|
|
142
|
+
if "text" in item:
|
|
143
|
+
all_data.append({
|
|
144
|
+
"text": item["text"],
|
|
145
|
+
"source": dataset_dir.name,
|
|
146
|
+
})
|
|
147
|
+
count += 1
|
|
148
|
+
logger.info(f" Added {count} samples from {dataset_dir.name}")
|
|
149
|
+
|
|
150
|
+
# 4. Load user-collected preference data (conversations → training pairs)
|
|
151
|
+
prefs_dir = DATA_DIR / "preferences"
|
|
152
|
+
if prefs_dir.exists():
|
|
153
|
+
for pref_file in prefs_dir.glob("*.jsonl"):
|
|
154
|
+
logger.info(f"Loading user preferences: {pref_file.name}")
|
|
155
|
+
count = 0
|
|
156
|
+
with open(pref_file) as f:
|
|
157
|
+
for line in f:
|
|
158
|
+
item = json.loads(line.strip())
|
|
159
|
+
# Use the "chosen" response as training data
|
|
160
|
+
if "prompt" in item and "chosen" in item:
|
|
161
|
+
all_data.append({
|
|
162
|
+
"text": f"<|im_start|>user\n{item['prompt']}<|im_end|>\n<|im_start|>assistant\n{item['chosen']}<|im_end|>",
|
|
163
|
+
"source": "user_preferences",
|
|
164
|
+
})
|
|
165
|
+
count += 1
|
|
166
|
+
logger.info(f" Added {count} preference-based samples")
|
|
167
|
+
|
|
168
|
+
# Shuffle and save
|
|
169
|
+
import random
|
|
170
|
+
random.shuffle(all_data)
|
|
171
|
+
|
|
172
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
173
|
+
with open(output_path, "w") as f:
|
|
174
|
+
for item in all_data:
|
|
175
|
+
f.write(json.dumps(item) + "\n")
|
|
176
|
+
|
|
177
|
+
stats = {}
|
|
178
|
+
for item in all_data:
|
|
179
|
+
src = item.get("source", "unknown")
|
|
180
|
+
stats[src] = stats.get(src, 0) + 1
|
|
181
|
+
|
|
182
|
+
logger.info(f"Total training samples: {len(all_data)}")
|
|
183
|
+
logger.info(f"Sources: {json.dumps(stats, indent=2)}")
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
"total_samples": len(all_data),
|
|
187
|
+
"sources": stats,
|
|
188
|
+
"output_path": str(output_path),
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# ============================================================
|
|
193
|
+
# Stage 1: Supervised Fine-Tuning (SFT)
|
|
194
|
+
# Takes the base model and fine-tunes on our curated data
|
|
195
|
+
# ============================================================
|
|
196
|
+
|
|
197
|
+
def train_sft(
|
|
198
|
+
base_model: str = "Qwen/Qwen2.5-3B",
|
|
199
|
+
output_name: str = "synapse-3b-sft",
|
|
200
|
+
training_data: Optional[str] = None,
|
|
201
|
+
epochs: int = 3,
|
|
202
|
+
batch_size: int = 4,
|
|
203
|
+
learning_rate: float = 2e-4,
|
|
204
|
+
lora_rank: int = 64,
|
|
205
|
+
max_seq_length: int = 2048,
|
|
206
|
+
) -> dict:
|
|
207
|
+
"""Stage 1: Supervised Fine-Tuning with QLoRA.
|
|
208
|
+
|
|
209
|
+
Uses 4-bit quantization so we can train a 3B model on a single GPU.
|
|
210
|
+
LoRA rank 64 gives us enough capacity to learn new behaviors
|
|
211
|
+
while keeping training fast (~720 tok/s on RTX 5090).
|
|
212
|
+
"""
|
|
213
|
+
logger.info("=" * 60)
|
|
214
|
+
logger.info("STAGE 1: Supervised Fine-Tuning (SFT)")
|
|
215
|
+
logger.info(f"Base model: {base_model}")
|
|
216
|
+
logger.info(f"Output: {output_name}")
|
|
217
|
+
logger.info(f"LoRA rank: {lora_rank}, LR: {learning_rate}, Epochs: {epochs}")
|
|
218
|
+
logger.info("=" * 60)
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
import torch
|
|
222
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
223
|
+
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
|
|
224
|
+
from trl import SFTTrainer, SFTConfig
|
|
225
|
+
from datasets import load_dataset
|
|
226
|
+
|
|
227
|
+
# 4-bit quantization config
|
|
228
|
+
bnb_config = BitsAndBytesConfig(
|
|
229
|
+
load_in_4bit=True,
|
|
230
|
+
bnb_4bit_quant_type="nf4",
|
|
231
|
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
|
232
|
+
bnb_4bit_use_double_quant=True,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
logger.info("Loading base model...")
|
|
236
|
+
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
|
237
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
238
|
+
base_model,
|
|
239
|
+
quantization_config=bnb_config,
|
|
240
|
+
device_map="auto",
|
|
241
|
+
trust_remote_code=True,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# LoRA configuration — target all attention + MLP layers
|
|
245
|
+
lora_config = LoraConfig(
|
|
246
|
+
r=lora_rank,
|
|
247
|
+
lora_alpha=lora_rank * 2,
|
|
248
|
+
target_modules=[
|
|
249
|
+
"q_proj", "k_proj", "v_proj", "o_proj",
|
|
250
|
+
"gate_proj", "up_proj", "down_proj",
|
|
251
|
+
],
|
|
252
|
+
lora_dropout=0.05,
|
|
253
|
+
bias="none",
|
|
254
|
+
task_type="CAUSAL_LM",
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
model = prepare_model_for_kbit_training(model)
|
|
258
|
+
model = get_peft_model(model, lora_config)
|
|
259
|
+
|
|
260
|
+
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
261
|
+
total_params = sum(p.numel() for p in model.parameters())
|
|
262
|
+
logger.info(f"Trainable params: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)")
|
|
263
|
+
|
|
264
|
+
# Load training data
|
|
265
|
+
if training_data and Path(training_data).exists():
|
|
266
|
+
dataset = load_dataset("json", data_files=training_data, split="train")
|
|
267
|
+
else:
|
|
268
|
+
# Generate training data if none provided
|
|
269
|
+
data_path = TRAINING_DIR / "sft_data.jsonl"
|
|
270
|
+
generate_training_data(data_path)
|
|
271
|
+
dataset = load_dataset("json", data_files=str(data_path), split="train")
|
|
272
|
+
|
|
273
|
+
logger.info(f"Training on {len(dataset)} samples")
|
|
274
|
+
|
|
275
|
+
# Training config
|
|
276
|
+
output_dir = str(ADAPTERS_DIR / output_name)
|
|
277
|
+
training_config = SFTConfig(
|
|
278
|
+
output_dir=output_dir,
|
|
279
|
+
num_train_epochs=epochs,
|
|
280
|
+
per_device_train_batch_size=batch_size,
|
|
281
|
+
gradient_accumulation_steps=4,
|
|
282
|
+
learning_rate=learning_rate,
|
|
283
|
+
weight_decay=0.01,
|
|
284
|
+
warmup_ratio=0.03,
|
|
285
|
+
lr_scheduler_type="cosine",
|
|
286
|
+
logging_steps=10,
|
|
287
|
+
save_strategy="epoch",
|
|
288
|
+
bf16=True,
|
|
289
|
+
max_seq_length=max_seq_length,
|
|
290
|
+
dataset_text_field="text",
|
|
291
|
+
packing=True, # Pack multiple short examples into one sequence
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
trainer = SFTTrainer(
|
|
295
|
+
model=model,
|
|
296
|
+
args=training_config,
|
|
297
|
+
train_dataset=dataset,
|
|
298
|
+
processing_class=tokenizer,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
logger.info("Starting SFT training...")
|
|
302
|
+
start_time = datetime.now()
|
|
303
|
+
result = trainer.train()
|
|
304
|
+
duration = (datetime.now() - start_time).total_seconds()
|
|
305
|
+
|
|
306
|
+
# Save the adapter
|
|
307
|
+
trainer.save_model(output_dir)
|
|
308
|
+
tokenizer.save_pretrained(output_dir)
|
|
309
|
+
|
|
310
|
+
logger.info(f"SFT training complete in {duration:.0f}s")
|
|
311
|
+
logger.info(f"Final loss: {result.training_loss:.4f}")
|
|
312
|
+
logger.info(f"Adapter saved to: {output_dir}")
|
|
313
|
+
|
|
314
|
+
# Save training metadata
|
|
315
|
+
meta = {
|
|
316
|
+
"stage": "sft",
|
|
317
|
+
"base_model": base_model,
|
|
318
|
+
"output_name": output_name,
|
|
319
|
+
"training_loss": result.training_loss,
|
|
320
|
+
"duration_seconds": duration,
|
|
321
|
+
"samples": len(dataset),
|
|
322
|
+
"epochs": epochs,
|
|
323
|
+
"lora_rank": lora_rank,
|
|
324
|
+
"trainable_params": trainable_params,
|
|
325
|
+
"total_params": total_params,
|
|
326
|
+
"timestamp": datetime.now().isoformat(),
|
|
327
|
+
"created_by": "titan-synapse",
|
|
328
|
+
}
|
|
329
|
+
with open(Path(output_dir) / "training_meta.json", "w") as f:
|
|
330
|
+
json.dump(meta, f, indent=2)
|
|
331
|
+
|
|
332
|
+
return meta
|
|
333
|
+
|
|
334
|
+
except ImportError as e:
|
|
335
|
+
logger.error(f"Missing dependency: {e}")
|
|
336
|
+
logger.error("Install: pip install torch transformers peft trl bitsandbytes datasets")
|
|
337
|
+
return {"error": str(e)}
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# ============================================================
|
|
341
|
+
# Stage 2: DPO Alignment
|
|
342
|
+
# Makes the model prefer good answers over bad ones
|
|
343
|
+
# ============================================================
|
|
344
|
+
|
|
345
|
+
def train_dpo(
|
|
346
|
+
sft_model: str = None,
|
|
347
|
+
output_name: str = "synapse-3b-dpo",
|
|
348
|
+
lora_rank: int = 32,
|
|
349
|
+
epochs: int = 1,
|
|
350
|
+
beta: float = 0.1,
|
|
351
|
+
) -> dict:
|
|
352
|
+
"""Stage 2: Direct Preference Optimization.
|
|
353
|
+
|
|
354
|
+
Uses preference pairs (chosen vs rejected) to align the model:
|
|
355
|
+
- Prefer factual answers over hallucinations
|
|
356
|
+
- Prefer concise answers over rambling
|
|
357
|
+
- Prefer safe answers over harmful ones
|
|
358
|
+
- Prefer user-preferred style
|
|
359
|
+
"""
|
|
360
|
+
logger.info("=" * 60)
|
|
361
|
+
logger.info("STAGE 2: DPO Alignment")
|
|
362
|
+
logger.info(f"SFT model: {sft_model or 'synapse-3b-sft'}")
|
|
363
|
+
logger.info(f"Output: {output_name}")
|
|
364
|
+
logger.info(f"Beta: {beta}")
|
|
365
|
+
logger.info("=" * 60)
|
|
366
|
+
|
|
367
|
+
try:
|
|
368
|
+
import torch
|
|
369
|
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
370
|
+
from peft import LoraConfig
|
|
371
|
+
from trl import DPOTrainer, DPOConfig
|
|
372
|
+
from datasets import load_dataset, Dataset
|
|
373
|
+
|
|
374
|
+
sft_path = sft_model or str(ADAPTERS_DIR / "synapse-3b-sft")
|
|
375
|
+
|
|
376
|
+
# Collect all preference pairs
|
|
377
|
+
prefs = []
|
|
378
|
+
prefs_dir = DATA_DIR / "preferences"
|
|
379
|
+
if prefs_dir.exists():
|
|
380
|
+
for pref_file in prefs_dir.glob("*.jsonl"):
|
|
381
|
+
with open(pref_file) as f:
|
|
382
|
+
for line in f:
|
|
383
|
+
item = json.loads(line.strip())
|
|
384
|
+
if "prompt" in item and "chosen" in item and "rejected" in item:
|
|
385
|
+
prefs.append({
|
|
386
|
+
"prompt": item["prompt"],
|
|
387
|
+
"chosen": item["chosen"],
|
|
388
|
+
"rejected": item["rejected"],
|
|
389
|
+
})
|
|
390
|
+
|
|
391
|
+
if len(prefs) < 10:
|
|
392
|
+
logger.warning(f"Only {len(prefs)} preference pairs available. Need more conversations to train DPO.")
|
|
393
|
+
logger.info("The system collects preference pairs automatically from:")
|
|
394
|
+
logger.info(" - User feedback (positive/negative signals)")
|
|
395
|
+
logger.info(" - Cloud fallback responses (cloud vs local)")
|
|
396
|
+
logger.info(" - Self-evaluation scoring")
|
|
397
|
+
return {"error": "insufficient_data", "pairs": len(prefs)}
|
|
398
|
+
|
|
399
|
+
dataset = Dataset.from_list(prefs)
|
|
400
|
+
logger.info(f"Training DPO on {len(prefs)} preference pairs")
|
|
401
|
+
|
|
402
|
+
# Load SFT model
|
|
403
|
+
bnb_config = BitsAndBytesConfig(
|
|
404
|
+
load_in_4bit=True,
|
|
405
|
+
bnb_4bit_quant_type="nf4",
|
|
406
|
+
bnb_4bit_compute_dtype=torch.bfloat16,
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
tokenizer = AutoTokenizer.from_pretrained(sft_path, trust_remote_code=True)
|
|
410
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
411
|
+
sft_path,
|
|
412
|
+
quantization_config=bnb_config,
|
|
413
|
+
device_map="auto",
|
|
414
|
+
trust_remote_code=True,
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
lora_config = LoraConfig(
|
|
418
|
+
r=lora_rank,
|
|
419
|
+
lora_alpha=lora_rank * 2,
|
|
420
|
+
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
|
|
421
|
+
lora_dropout=0.05,
|
|
422
|
+
bias="none",
|
|
423
|
+
task_type="CAUSAL_LM",
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
output_dir = str(ADAPTERS_DIR / output_name)
|
|
427
|
+
dpo_config = DPOConfig(
|
|
428
|
+
output_dir=output_dir,
|
|
429
|
+
num_train_epochs=epochs,
|
|
430
|
+
per_device_train_batch_size=2,
|
|
431
|
+
gradient_accumulation_steps=4,
|
|
432
|
+
learning_rate=5e-5,
|
|
433
|
+
beta=beta,
|
|
434
|
+
logging_steps=10,
|
|
435
|
+
save_strategy="epoch",
|
|
436
|
+
bf16=True,
|
|
437
|
+
max_length=1024,
|
|
438
|
+
max_prompt_length=512,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
trainer = DPOTrainer(
|
|
442
|
+
model=model,
|
|
443
|
+
args=dpo_config,
|
|
444
|
+
train_dataset=dataset,
|
|
445
|
+
processing_class=tokenizer,
|
|
446
|
+
peft_config=lora_config,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
logger.info("Starting DPO training...")
|
|
450
|
+
start_time = datetime.now()
|
|
451
|
+
result = trainer.train()
|
|
452
|
+
duration = (datetime.now() - start_time).total_seconds()
|
|
453
|
+
|
|
454
|
+
trainer.save_model(output_dir)
|
|
455
|
+
tokenizer.save_pretrained(output_dir)
|
|
456
|
+
|
|
457
|
+
logger.info(f"DPO training complete in {duration:.0f}s")
|
|
458
|
+
|
|
459
|
+
meta = {
|
|
460
|
+
"stage": "dpo",
|
|
461
|
+
"sft_model": sft_path,
|
|
462
|
+
"output_name": output_name,
|
|
463
|
+
"training_loss": result.training_loss,
|
|
464
|
+
"duration_seconds": duration,
|
|
465
|
+
"preference_pairs": len(prefs),
|
|
466
|
+
"beta": beta,
|
|
467
|
+
"timestamp": datetime.now().isoformat(),
|
|
468
|
+
"created_by": "titan-synapse",
|
|
469
|
+
}
|
|
470
|
+
with open(Path(output_dir) / "training_meta.json", "w") as f:
|
|
471
|
+
json.dump(meta, f, indent=2)
|
|
472
|
+
|
|
473
|
+
return meta
|
|
474
|
+
|
|
475
|
+
except ImportError as e:
|
|
476
|
+
logger.error(f"Missing dependency: {e}")
|
|
477
|
+
return {"error": str(e)}
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
# ============================================================
|
|
481
|
+
# Stage 3: Export to GGUF
|
|
482
|
+
# Convert the trained model to GGUF format for fast inference
|
|
483
|
+
# ============================================================
|
|
484
|
+
|
|
485
|
+
def export_gguf(
|
|
486
|
+
model_path: str = None,
|
|
487
|
+
output_name: str = "synapse-3b",
|
|
488
|
+
quantization: str = "Q4_K_M",
|
|
489
|
+
) -> dict:
|
|
490
|
+
"""Stage 3: Export trained model to GGUF for the Synapse inference engine.
|
|
491
|
+
|
|
492
|
+
This produces the final model file that ships with Synapse.
|
|
493
|
+
"""
|
|
494
|
+
logger.info("=" * 60)
|
|
495
|
+
logger.info("STAGE 3: Export to GGUF")
|
|
496
|
+
logger.info(f"Model: {model_path or 'synapse-3b-dpo'}")
|
|
497
|
+
logger.info(f"Quantization: {quantization}")
|
|
498
|
+
logger.info("=" * 60)
|
|
499
|
+
|
|
500
|
+
model_path = model_path or str(ADAPTERS_DIR / "synapse-3b-dpo")
|
|
501
|
+
output_file = MODELS_DIR / f"{output_name}-{quantization.lower()}.gguf"
|
|
502
|
+
|
|
503
|
+
try:
|
|
504
|
+
import subprocess
|
|
505
|
+
|
|
506
|
+
# First merge LoRA into base model
|
|
507
|
+
logger.info("Merging LoRA adapter into base model...")
|
|
508
|
+
merge_dir = TRAINING_DIR / "merged"
|
|
509
|
+
merge_dir.mkdir(parents=True, exist_ok=True)
|
|
510
|
+
|
|
511
|
+
# Use Python to merge
|
|
512
|
+
from peft import AutoPeftModelForCausalLM
|
|
513
|
+
from transformers import AutoTokenizer
|
|
514
|
+
|
|
515
|
+
model = AutoPeftModelForCausalLM.from_pretrained(
|
|
516
|
+
model_path,
|
|
517
|
+
device_map="auto",
|
|
518
|
+
trust_remote_code=True,
|
|
519
|
+
)
|
|
520
|
+
merged_model = model.merge_and_unload()
|
|
521
|
+
merged_model.save_pretrained(str(merge_dir))
|
|
522
|
+
|
|
523
|
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
|
524
|
+
tokenizer.save_pretrained(str(merge_dir))
|
|
525
|
+
|
|
526
|
+
logger.info(f"Merged model saved to {merge_dir}")
|
|
527
|
+
|
|
528
|
+
# Convert to GGUF using llama.cpp's convert script
|
|
529
|
+
# This assumes llama.cpp is available (install via: pip install llama-cpp-python)
|
|
530
|
+
logger.info(f"Converting to GGUF ({quantization})...")
|
|
531
|
+
|
|
532
|
+
# Try using the convert script from llama-cpp-python
|
|
533
|
+
convert_script = None
|
|
534
|
+
possible_paths = [
|
|
535
|
+
"/usr/local/bin/convert-hf-to-gguf.py",
|
|
536
|
+
os.path.expanduser("~/llama.cpp/convert-hf-to-gguf.py"),
|
|
537
|
+
"convert-hf-to-gguf.py",
|
|
538
|
+
]
|
|
539
|
+
for path in possible_paths:
|
|
540
|
+
if os.path.exists(path):
|
|
541
|
+
convert_script = path
|
|
542
|
+
break
|
|
543
|
+
|
|
544
|
+
if convert_script:
|
|
545
|
+
result = subprocess.run(
|
|
546
|
+
["python", convert_script, str(merge_dir), "--outfile", str(output_file), "--outtype", quantization.lower()],
|
|
547
|
+
capture_output=True, text=True,
|
|
548
|
+
)
|
|
549
|
+
if result.returncode == 0:
|
|
550
|
+
logger.info(f"GGUF exported: {output_file}")
|
|
551
|
+
file_size_mb = output_file.stat().st_size / (1024 * 1024)
|
|
552
|
+
return {
|
|
553
|
+
"output_file": str(output_file),
|
|
554
|
+
"size_mb": file_size_mb,
|
|
555
|
+
"quantization": quantization,
|
|
556
|
+
}
|
|
557
|
+
else:
|
|
558
|
+
logger.warning(f"GGUF conversion failed: {result.stderr}")
|
|
559
|
+
else:
|
|
560
|
+
logger.warning("llama.cpp convert script not found. Saving as safetensors instead.")
|
|
561
|
+
logger.info("To convert to GGUF, install llama.cpp and run:")
|
|
562
|
+
logger.info(f" python convert-hf-to-gguf.py {merge_dir} --outfile {output_file}")
|
|
563
|
+
|
|
564
|
+
return {
|
|
565
|
+
"merged_model": str(merge_dir),
|
|
566
|
+
"gguf_pending": True,
|
|
567
|
+
"instructions": f"Run: python convert-hf-to-gguf.py {merge_dir} --outfile {output_file}",
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
except ImportError as e:
|
|
571
|
+
logger.error(f"Missing dependency: {e}")
|
|
572
|
+
return {"error": str(e)}
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
# ============================================================
|
|
576
|
+
# Full Training Pipeline
|
|
577
|
+
# ============================================================
|
|
578
|
+
|
|
579
|
+
def train_full_pipeline(
|
|
580
|
+
base_model: str = "Qwen/Qwen2.5-3B",
|
|
581
|
+
output_name: str = "synapse-3b",
|
|
582
|
+
) -> dict:
|
|
583
|
+
"""Run the complete training pipeline:
|
|
584
|
+
1. Generate training data
|
|
585
|
+
2. SFT (Supervised Fine-Tuning)
|
|
586
|
+
3. DPO (Direct Preference Optimization)
|
|
587
|
+
4. Export to GGUF
|
|
588
|
+
|
|
589
|
+
This produces a custom Synapse model — OUR model.
|
|
590
|
+
"""
|
|
591
|
+
logger.info("=" * 60)
|
|
592
|
+
logger.info("TITAN SYNAPSE — Full Model Training Pipeline")
|
|
593
|
+
logger.info(f"Creating: {output_name}")
|
|
594
|
+
logger.info(f"Base: {base_model}")
|
|
595
|
+
logger.info("=" * 60)
|
|
596
|
+
|
|
597
|
+
results = {}
|
|
598
|
+
|
|
599
|
+
# Step 1: Generate training data
|
|
600
|
+
logger.info("\n[1/4] Generating training data...")
|
|
601
|
+
data_path = TRAINING_DIR / "full_training_data.jsonl"
|
|
602
|
+
data_result = generate_training_data(data_path)
|
|
603
|
+
results["data"] = data_result
|
|
604
|
+
|
|
605
|
+
# Step 2: SFT
|
|
606
|
+
logger.info("\n[2/4] Supervised Fine-Tuning...")
|
|
607
|
+
sft_result = train_sft(
|
|
608
|
+
base_model=base_model,
|
|
609
|
+
output_name=f"{output_name}-sft",
|
|
610
|
+
training_data=str(data_path),
|
|
611
|
+
)
|
|
612
|
+
results["sft"] = sft_result
|
|
613
|
+
|
|
614
|
+
if "error" in sft_result:
|
|
615
|
+
logger.error(f"SFT failed: {sft_result['error']}")
|
|
616
|
+
return results
|
|
617
|
+
|
|
618
|
+
# Step 3: DPO (only if we have preference data)
|
|
619
|
+
logger.info("\n[3/4] DPO Alignment...")
|
|
620
|
+
dpo_result = train_dpo(
|
|
621
|
+
sft_model=str(ADAPTERS_DIR / f"{output_name}-sft"),
|
|
622
|
+
output_name=f"{output_name}-dpo",
|
|
623
|
+
)
|
|
624
|
+
results["dpo"] = dpo_result
|
|
625
|
+
|
|
626
|
+
# Step 4: Export to GGUF
|
|
627
|
+
logger.info("\n[4/4] Exporting to GGUF...")
|
|
628
|
+
final_model = f"{output_name}-dpo" if "error" not in dpo_result else f"{output_name}-sft"
|
|
629
|
+
export_result = export_gguf(
|
|
630
|
+
model_path=str(ADAPTERS_DIR / final_model),
|
|
631
|
+
output_name=output_name,
|
|
632
|
+
)
|
|
633
|
+
results["export"] = export_result
|
|
634
|
+
|
|
635
|
+
logger.info("\n" + "=" * 60)
|
|
636
|
+
logger.info("Training pipeline complete!")
|
|
637
|
+
logger.info(f"Model: {output_name}")
|
|
638
|
+
logger.info(f"This is YOUR model. Not Qwen's. Not Meta's. Yours.")
|
|
639
|
+
logger.info("=" * 60)
|
|
640
|
+
|
|
641
|
+
# Save pipeline results
|
|
642
|
+
with open(TRAINING_DIR / f"{output_name}_pipeline.json", "w") as f:
|
|
643
|
+
json.dump(results, f, indent=2, default=str)
|
|
644
|
+
|
|
645
|
+
return results
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
if __name__ == "__main__":
|
|
649
|
+
parser = argparse.ArgumentParser(description="Train the Synapse base model")
|
|
650
|
+
parser.add_argument("--stage", choices=["full", "data", "sft", "dpo", "export"],
|
|
651
|
+
default="full", help="Training stage to run")
|
|
652
|
+
parser.add_argument("--base-model", default="Qwen/Qwen2.5-3B",
|
|
653
|
+
help="Base model to fine-tune (Apache 2.0 licensed)")
|
|
654
|
+
parser.add_argument("--output", default="synapse-3b",
|
|
655
|
+
help="Output model name")
|
|
656
|
+
parser.add_argument("--epochs", type=int, default=3,
|
|
657
|
+
help="Number of training epochs")
|
|
658
|
+
parser.add_argument("--lora-rank", type=int, default=64,
|
|
659
|
+
help="LoRA rank (higher = more capacity)")
|
|
660
|
+
|
|
661
|
+
args = parser.parse_args()
|
|
662
|
+
|
|
663
|
+
if args.stage == "full":
|
|
664
|
+
train_full_pipeline(args.base_model, args.output)
|
|
665
|
+
elif args.stage == "data":
|
|
666
|
+
generate_training_data(TRAINING_DIR / "training_data.jsonl")
|
|
667
|
+
elif args.stage == "sft":
|
|
668
|
+
train_sft(args.base_model, f"{args.output}-sft", epochs=args.epochs, lora_rank=args.lora_rank)
|
|
669
|
+
elif args.stage == "dpo":
|
|
670
|
+
train_dpo(output_name=f"{args.output}-dpo")
|
|
671
|
+
elif args.stage == "export":
|
|
672
|
+
export_gguf(output_name=args.output)
|