titan-synapse 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CONTRIBUTING.md +187 -0
  2. package/Cargo.lock +3976 -0
  3. package/Cargo.toml +10 -0
  4. package/LICENSE +190 -0
  5. package/PROGRESS.md +151 -0
  6. package/README.md +514 -0
  7. package/TEST_LOG.md +220 -0
  8. package/config/default.yaml +36 -0
  9. package/crates/synapse/Cargo.toml +70 -0
  10. package/crates/synapse/src/cli/bench.rs +44 -0
  11. package/crates/synapse/src/cli/eval.rs +395 -0
  12. package/crates/synapse/src/cli/export.rs +45 -0
  13. package/crates/synapse/src/cli/hub.rs +179 -0
  14. package/crates/synapse/src/cli/import.rs +35 -0
  15. package/crates/synapse/src/cli/learn.rs +53 -0
  16. package/crates/synapse/src/cli/mod.rs +10 -0
  17. package/crates/synapse/src/cli/models.rs +36 -0
  18. package/crates/synapse/src/cli/pull.rs +60 -0
  19. package/crates/synapse/src/cli/status.rs +52 -0
  20. package/crates/synapse/src/cli/train.rs +99 -0
  21. package/crates/synapse/src/config.rs +220 -0
  22. package/crates/synapse/src/dashboard.rs +281 -0
  23. package/crates/synapse/src/format/manifest.rs +57 -0
  24. package/crates/synapse/src/format/mod.rs +4 -0
  25. package/crates/synapse/src/format/packer.rs +213 -0
  26. package/crates/synapse/src/inference/engine.rs +361 -0
  27. package/crates/synapse/src/inference/kv_cache.rs +97 -0
  28. package/crates/synapse/src/inference/lora.rs +166 -0
  29. package/crates/synapse/src/inference/mod.rs +9 -0
  30. package/crates/synapse/src/inference/model.rs +167 -0
  31. package/crates/synapse/src/inference/sampler.rs +133 -0
  32. package/crates/synapse/src/inference/speculative.rs +153 -0
  33. package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
  34. package/crates/synapse/src/learn/engine.rs +109 -0
  35. package/crates/synapse/src/learn/mod.rs +5 -0
  36. package/crates/synapse/src/main.rs +185 -0
  37. package/crates/synapse/src/memory/extractor.rs +201 -0
  38. package/crates/synapse/src/memory/graph.rs +332 -0
  39. package/crates/synapse/src/memory/hallucination.rs +259 -0
  40. package/crates/synapse/src/memory/mod.rs +7 -0
  41. package/crates/synapse/src/openai.rs +232 -0
  42. package/crates/synapse/src/server.rs +166 -0
  43. package/crates/synapse/src/streaming.rs +80 -0
  44. package/crates/synapse/src/swarm/coordinator.rs +198 -0
  45. package/crates/synapse/src/swarm/mod.rs +8 -0
  46. package/crates/synapse/src/swarm/orchestrator.rs +225 -0
  47. package/crates/synapse/src/swarm/pool.rs +64 -0
  48. package/crates/synapse/src/swarm/spawner.rs +199 -0
  49. package/crates/synapse/src/swarm/synthesizer.rs +26 -0
  50. package/crates/synapse/src/vram/manager.rs +67 -0
  51. package/crates/synapse/src/vram/mod.rs +3 -0
  52. package/docker-compose.yml +19 -0
  53. package/install.sh +311 -0
  54. package/package.json +36 -0
  55. package/python/Dockerfile.learn +18 -0
  56. package/python/requirements.txt +11 -0
  57. package/python/synapse_learn/__init__.py +0 -0
  58. package/python/synapse_learn/datasets.py +233 -0
  59. package/python/synapse_learn/real_eval.py +616 -0
  60. package/python/synapse_learn/server.py +431 -0
  61. package/python/synapse_learn/train_base.py +672 -0
  62. package/python/synapse_learn/train_specialists.py +787 -0
@@ -0,0 +1,431 @@
1
+ """TITAN Synapse Learning Sidecar — FastAPI server for QLoRA training + self-evaluation.
2
+
3
+ This is the brain's gym. Every conversation generates training signal.
4
+ When enough preference pairs accumulate, we fire up QLoRA and the specialist gets smarter.
5
+ No human intervention. No export-retrain-import dance. Just continuous improvement.
6
+ """
7
+
8
+ from fastapi import FastAPI, BackgroundTasks
9
+ from pydantic import BaseModel
10
+ from typing import Optional
11
+ import json
12
+ import os
13
+ import logging
14
+ import threading
15
+ from pathlib import Path
16
+ from datetime import datetime
17
+
18
+ app = FastAPI(title="Synapse Learning Engine", version="0.1.0")
19
+ logger = logging.getLogger("synapse-learn")
20
+ logging.basicConfig(level=logging.INFO)
21
+
22
+ DATA_DIR = Path(os.environ.get("SYNAPSE_DATA_DIR", os.path.expanduser("~/.synapse")))
23
+ PREFERENCES_DIR = DATA_DIR / "preferences"
24
+ ADAPTERS_DIR = DATA_DIR / "adapters"
25
+ MODELS_DIR = DATA_DIR / "models"
26
+ PREFERENCES_DIR.mkdir(parents=True, exist_ok=True)
27
+ ADAPTERS_DIR.mkdir(parents=True, exist_ok=True)
28
+
29
+ # Training lock — only one training job at a time
30
+ training_lock = threading.Lock()
31
+ training_status = {
32
+ "is_training": False,
33
+ "current_specialist": None,
34
+ "progress": 0,
35
+ "last_trained": None,
36
+ "last_loss": None,
37
+ }
38
+
39
+
40
+ class EvalRequest(BaseModel):
41
+ specialist: str
42
+ prompt: str
43
+ response: str
44
+
45
+
46
+ class EvalResponse(BaseModel):
47
+ score: float
48
+ improved_response: Optional[str] = None
49
+ feedback: str
50
+
51
+
52
+ class TrainRequest(BaseModel):
53
+ specialist: str
54
+ base_model: str = "Qwen/Qwen2.5-3B-Instruct"
55
+ learning_rate: float = 2e-4
56
+ epochs: int = 3
57
+ lora_rank: int = 16
58
+ lora_alpha: int = 32
59
+
60
+
61
+ class TrainResponse(BaseModel):
62
+ adapter_path: str
63
+ loss: float
64
+ pairs_used: int
65
+ status: str = "completed"
66
+
67
+
68
+ class LearnStatus(BaseModel):
69
+ pairs_collected: int
70
+ training_queue: int
71
+ last_trained: Optional[str] = None
72
+ adapters_created: int
73
+ is_training: bool = False
74
+ current_specialist: Optional[str] = None
75
+
76
+
77
+ def count_preferences(specialist: Optional[str] = None) -> int:
78
+ """Count preference pairs on disk."""
79
+ total = 0
80
+ for f in PREFERENCES_DIR.glob("*.jsonl"):
81
+ if specialist and specialist not in f.name:
82
+ continue
83
+ with open(f) as fh:
84
+ total += sum(1 for _ in fh)
85
+ return total
86
+
87
+
88
+ def load_preferences(specialist: str) -> list:
89
+ """Load preference pairs for a specialist."""
90
+ pairs = []
91
+ pref_file = PREFERENCES_DIR / f"{specialist}.jsonl"
92
+ if pref_file.exists():
93
+ with open(pref_file) as f:
94
+ for line in f:
95
+ try:
96
+ pairs.append(json.loads(line.strip()))
97
+ except json.JSONDecodeError:
98
+ continue
99
+ return pairs
100
+
101
+
102
+ def count_adapters() -> int:
103
+ """Count created adapters."""
104
+ return len(list(ADAPTERS_DIR.glob("*.safetensors"))) + len(list(ADAPTERS_DIR.glob("*/")))
105
+
106
+
107
+ @app.get("/health")
108
+ async def health():
109
+ return {
110
+ "status": "ok",
111
+ "engine": "synapse-learn",
112
+ "is_training": training_status["is_training"],
113
+ }
114
+
115
+
116
+ @app.get("/status")
117
+ async def status():
118
+ return LearnStatus(
119
+ pairs_collected=count_preferences(),
120
+ training_queue=count_preferences(),
121
+ last_trained=training_status.get("last_trained"),
122
+ adapters_created=count_adapters(),
123
+ is_training=training_status["is_training"],
124
+ current_specialist=training_status.get("current_specialist"),
125
+ )
126
+
127
+
128
+ @app.post("/evaluate")
129
+ async def evaluate(req: EvalRequest):
130
+ """Self-evaluate a response. Score 1-5, generate improved version if low.
131
+
132
+ Scoring heuristics (will be upgraded to model-based evaluation):
133
+ - Length: very short = bad, detailed = good
134
+ - Quality signals: errors, placeholders, repetition = bad
135
+ - Structure: lists, code blocks, examples = good
136
+ - Specificity: generic = bad, detailed = good
137
+ """
138
+ score = 3.0
139
+ feedback_parts = []
140
+
141
+ response_len = len(req.response)
142
+ word_count = len(req.response.split())
143
+
144
+ # Length scoring
145
+ if response_len < 30:
146
+ score -= 1.5
147
+ feedback_parts.append("Very short response")
148
+ elif response_len < 100:
149
+ score -= 0.5
150
+ feedback_parts.append("Brief response")
151
+ elif response_len > 500:
152
+ score += 0.5
153
+ feedback_parts.append("Detailed response")
154
+
155
+ # Quality signals
156
+ low_quality = ["error", "placeholder", "todo", "fixme", "lorem ipsum"]
157
+ for signal in low_quality:
158
+ if signal in req.response.lower():
159
+ score -= 1.0
160
+ feedback_parts.append(f"Contains '{signal}'")
161
+ break
162
+
163
+ # Repetition check
164
+ sentences = req.response.split(". ")
165
+ if len(sentences) > 3:
166
+ unique = set(s.strip().lower() for s in sentences if len(s) > 10)
167
+ if len(unique) < len(sentences) * 0.5:
168
+ score -= 1.0
169
+ feedback_parts.append("High repetition detected")
170
+
171
+ # Structure bonus
172
+ has_code = "```" in req.response or "def " in req.response or "function " in req.response
173
+ has_list = any(req.response.count(marker) >= 2 for marker in ["1.", "- ", "* "])
174
+ has_example = "example" in req.response.lower() or "for instance" in req.response.lower()
175
+
176
+ if has_code:
177
+ score += 0.5
178
+ feedback_parts.append("Contains code")
179
+ if has_list:
180
+ score += 0.3
181
+ feedback_parts.append("Well-structured with lists")
182
+ if has_example:
183
+ score += 0.3
184
+ feedback_parts.append("Includes examples")
185
+
186
+ # Clamp score
187
+ score = max(1.0, min(5.0, score))
188
+ feedback = "; ".join(feedback_parts) if feedback_parts else "Acceptable response"
189
+
190
+ # Store preference pair if score is low
191
+ if score < 3.0:
192
+ pair = {
193
+ "specialist": req.specialist,
194
+ "prompt": req.prompt,
195
+ "rejected": req.response,
196
+ "chosen": None,
197
+ "score": score,
198
+ "timestamp": datetime.now().isoformat(),
199
+ }
200
+ pref_file = PREFERENCES_DIR / f"{req.specialist}.jsonl"
201
+ with open(pref_file, "a") as f:
202
+ f.write(json.dumps(pair) + "\n")
203
+ logger.info(f"Stored preference pair for {req.specialist} (score={score:.1f})")
204
+
205
+ return EvalResponse(score=score, improved_response=None, feedback=feedback)
206
+
207
+
208
+ def run_qlora_training(specialist: str, base_model: str, config: TrainRequest):
209
+ """Run actual QLoRA training in a background thread.
210
+
211
+ This uses HuggingFace's PEFT + TRL libraries for efficient fine-tuning.
212
+ On RTX 5090 (32GB VRAM), a 3B model trains at ~720 tok/s.
213
+ """
214
+ global training_status
215
+
216
+ if not training_lock.acquire(blocking=False):
217
+ logger.warning("Training already in progress, skipping")
218
+ return
219
+
220
+ try:
221
+ training_status.update({
222
+ "is_training": True,
223
+ "current_specialist": specialist,
224
+ "progress": 0,
225
+ })
226
+
227
+ pairs = load_preferences(specialist)
228
+ if not pairs:
229
+ logger.info(f"No preference pairs for {specialist}, skipping training")
230
+ return
231
+
232
+ logger.info(f"Starting QLoRA training for {specialist}: {len(pairs)} pairs")
233
+
234
+ try:
235
+ import torch
236
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
237
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
238
+ from trl import SFTTrainer, SFTConfig
239
+
240
+ # QLoRA config — 4-bit quantization for memory efficiency
241
+ bnb_config = BitsAndBytesConfig(
242
+ load_in_4bit=True,
243
+ bnb_4bit_quant_type="nf4",
244
+ bnb_4bit_compute_dtype=torch.bfloat16,
245
+ bnb_4bit_use_double_quant=True,
246
+ )
247
+
248
+ logger.info(f"Loading base model: {base_model}")
249
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
250
+ model = AutoModelForCausalLM.from_pretrained(
251
+ base_model,
252
+ quantization_config=bnb_config,
253
+ device_map="auto",
254
+ trust_remote_code=True,
255
+ )
256
+
257
+ if tokenizer.pad_token is None:
258
+ tokenizer.pad_token = tokenizer.eos_token
259
+
260
+ model = prepare_model_for_kbit_training(model)
261
+
262
+ # LoRA config
263
+ lora_config = LoraConfig(
264
+ r=config.lora_rank,
265
+ lora_alpha=config.lora_alpha,
266
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
267
+ "gate_proj", "up_proj", "down_proj"],
268
+ lora_dropout=0.05,
269
+ bias="none",
270
+ task_type="CAUSAL_LM",
271
+ )
272
+
273
+ model = get_peft_model(model, lora_config)
274
+ trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
275
+ total = sum(p.numel() for p in model.parameters())
276
+ logger.info(f"Trainable parameters: {trainable:,} / {total:,} ({trainable/total*100:.2f}%)")
277
+
278
+ # Format training data
279
+ from datasets import Dataset
280
+ train_texts = []
281
+ for pair in pairs:
282
+ # Use SFT format: train on the prompt-response pairs
283
+ prompt = pair.get("prompt", "")
284
+ # If we have a "chosen" response, use it; otherwise use the original
285
+ response = pair.get("chosen") or pair.get("rejected", "")
286
+ if not response or response == "(needs improvement)":
287
+ continue
288
+ text = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>"
289
+ train_texts.append({"text": text})
290
+
291
+ if not train_texts:
292
+ logger.info("No valid training texts, skipping")
293
+ return
294
+
295
+ dataset = Dataset.from_list(train_texts)
296
+
297
+ # Training config
298
+ output_dir = str(ADAPTERS_DIR / f"{specialist}_qlora")
299
+ training_args = SFTConfig(
300
+ output_dir=output_dir,
301
+ num_train_epochs=config.epochs,
302
+ per_device_train_batch_size=1,
303
+ gradient_accumulation_steps=4,
304
+ learning_rate=config.learning_rate,
305
+ fp16=False,
306
+ bf16=True,
307
+ logging_steps=1,
308
+ save_strategy="epoch",
309
+ warmup_ratio=0.1,
310
+ lr_scheduler_type="cosine",
311
+ max_seq_length=512,
312
+ dataset_text_field="text",
313
+ )
314
+
315
+ trainer = SFTTrainer(
316
+ model=model,
317
+ train_dataset=dataset,
318
+ args=training_args,
319
+ tokenizer=tokenizer,
320
+ )
321
+
322
+ logger.info("Training started...")
323
+ result = trainer.train()
324
+ final_loss = result.training_loss
325
+
326
+ # Save adapter
327
+ model.save_pretrained(output_dir)
328
+ tokenizer.save_pretrained(output_dir)
329
+
330
+ # Also save as single safetensors for the Rust engine
331
+ adapter_file = ADAPTERS_DIR / f"{specialist}_latest.safetensors"
332
+ # The PEFT adapter is already in safetensors format in output_dir
333
+ logger.info(f"Adapter saved to {output_dir}")
334
+
335
+ training_status.update({
336
+ "last_trained": datetime.now().isoformat(),
337
+ "last_loss": final_loss,
338
+ "progress": 100,
339
+ })
340
+
341
+ logger.info(f"Training complete for {specialist}: loss={final_loss:.4f}, pairs={len(train_texts)}")
342
+
343
+ except ImportError as e:
344
+ logger.warning(f"Training dependencies not installed: {e}")
345
+ logger.info("Install with: pip install torch transformers peft trl bitsandbytes")
346
+ # Create a dummy adapter to signal that training was attempted
347
+ training_status["last_trained"] = datetime.now().isoformat()
348
+
349
+ except Exception as e:
350
+ logger.error(f"Training failed: {e}", exc_info=True)
351
+
352
+ finally:
353
+ training_status.update({
354
+ "is_training": False,
355
+ "current_specialist": None,
356
+ })
357
+ training_lock.release()
358
+
359
+
360
+ @app.post("/train")
361
+ async def train(req: TrainRequest, background_tasks: BackgroundTasks):
362
+ """Trigger QLoRA training for a specialist.
363
+
364
+ Training runs in the background so the API stays responsive.
365
+ Check /status to monitor progress.
366
+ """
367
+ pairs = count_preferences(req.specialist)
368
+
369
+ if pairs == 0:
370
+ return TrainResponse(
371
+ adapter_path="",
372
+ loss=0.0,
373
+ pairs_used=0,
374
+ status="no_data",
375
+ )
376
+
377
+ if training_status["is_training"]:
378
+ return TrainResponse(
379
+ adapter_path="",
380
+ loss=0.0,
381
+ pairs_used=0,
382
+ status="already_training",
383
+ )
384
+
385
+ # Start training in background
386
+ background_tasks.add_task(run_qlora_training, req.specialist, req.base_model, req)
387
+
388
+ adapter_path = str(ADAPTERS_DIR / f"{req.specialist}_qlora")
389
+ return TrainResponse(
390
+ adapter_path=adapter_path,
391
+ loss=0.0,
392
+ pairs_used=pairs,
393
+ status="training_started",
394
+ )
395
+
396
+
397
+ @app.post("/collect")
398
+ async def collect_pair(pair: dict):
399
+ """Directly collect a preference pair from the Rust engine."""
400
+ specialist = pair.get("specialist", "general")
401
+ pref_file = PREFERENCES_DIR / f"{specialist}.jsonl"
402
+ pair["timestamp"] = datetime.now().isoformat()
403
+ with open(pref_file, "a") as f:
404
+ f.write(json.dumps(pair) + "\n")
405
+ return {"status": "collected", "specialist": specialist}
406
+
407
+
408
+ @app.get("/adapters")
409
+ async def list_adapters():
410
+ """List all available trained adapters."""
411
+ adapters = []
412
+ for path in ADAPTERS_DIR.iterdir():
413
+ if path.is_dir() and (path / "adapter_config.json").exists():
414
+ adapters.append({
415
+ "name": path.name,
416
+ "path": str(path),
417
+ "type": "qlora",
418
+ })
419
+ elif path.suffix == ".safetensors":
420
+ adapters.append({
421
+ "name": path.stem,
422
+ "path": str(path),
423
+ "type": "lora",
424
+ })
425
+ return {"adapters": adapters, "count": len(adapters)}
426
+
427
+
428
+ if __name__ == "__main__":
429
+ import uvicorn
430
+ logger.info("Starting Synapse Learning Engine on :8090")
431
+ uvicorn.run(app, host="0.0.0.0", port=8090)