titan-synapse 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,380 @@
1
+ #!/usr/bin/env python3
2
+ """Full benchmark suite for Synapse-3B merged model.
3
+
4
+ Runs standard evaluations that match published benchmarks for Qwen2-3B class models:
5
+ - GSM8K (math reasoning) — full test set or configurable N
6
+ - HumanEval (code generation) — full 164 problems
7
+ - MMLU (general knowledge) — 5-shot, standard benchmark
8
+ - Speed test — tok/s on RTX 5090
9
+
10
+ Apples-to-apples comparison against published Qwen2-3B scores.
11
+ """
12
+
13
+ import os
14
+ import sys
15
+ import torch
16
+ import json
17
+ import time
18
+ import re
19
+ import traceback
20
+ from transformers import AutoModelForCausalLM, AutoTokenizer
21
+
22
+ MODEL_DIR = os.path.expanduser("~/.synapse/merged/synapse-3b")
23
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
24
+ print(f"[INFO] Device: {DEVICE}")
25
+ if DEVICE == "cuda":
26
+ print(f"[INFO] GPU: {torch.cuda.get_device_name(0)}")
27
+ print(f"[INFO] VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
28
+
29
+ print(f"\nLoading Synapse-3B from {MODEL_DIR}...")
30
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
31
+ model = AutoModelForCausalLM.from_pretrained(
32
+ MODEL_DIR,
33
+ torch_dtype=torch.bfloat16 if DEVICE == "cuda" else torch.float32,
34
+ device_map=DEVICE,
35
+ )
36
+ print(f"Model loaded: {type(model).__name__} on {DEVICE}")
37
+ print(f"Parameters: {sum(p.numel() for p in model.parameters()) / 1e9:.2f}B")
38
+
39
+ results = {}
40
+
41
+ # ============================================================
42
+ # 1. GSM8K — Math Reasoning (standard: 8-shot CoT)
43
+ # Published Qwen2-3B: ~50-55% on GSM8K
44
+ # ============================================================
45
+ print("\n" + "="*60)
46
+ print("BENCHMARK 1: GSM8K (Math Reasoning)")
47
+ print("="*60)
48
+
49
+ from datasets import load_dataset
50
+
51
+ gsm = load_dataset("openai/gsm8k", "main", split="test")
52
+ GSM_N = int(os.environ.get("GSM_N", len(gsm))) # default: full test set (1319)
53
+ print(f"Running {GSM_N} / {len(gsm)} problems")
54
+
55
+ # 8-shot examples (standard for GSM8K benchmark)
56
+ FEW_SHOT_EXAMPLES = """Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there will be 21 trees. How many trees did the grove workers plant today?
57
+ A: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have been 21 - 15 = 6. #### 6
58
+
59
+ Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
60
+ A: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. #### 5
61
+
62
+ Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
63
+ A: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74. After eating 35, they had 74 - 35 = 39. #### 39
64
+
65
+ Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did Jason give to Denny?
66
+ A: Jason started with 20 lollipops. Then he had 12 after giving some to Denny. So he gave Denny 20 - 12 = 8. #### 8
67
+
68
+ Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he have now?
69
+ A: Shawn started with 5 toys. If he got 2 toys each from his mom and dad, then that is 4 more toys. 5 + 4 = 9. #### 9
70
+
71
+ Q: There were nine computers in the server room. Five more computers were installed each day, from monday to thursday. How many computers are now in the server room?
72
+ A: There were originally 9 computers. For each of 4 days, 5 more computers were added. So 4 * 5 = 20 computers were added. 9 + 20 = 29. #### 29
73
+
74
+ Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?
75
+ A: Michael started with 58 golf balls. After losing 23 on tuesday, he had 58 - 23 = 35. After losing 2 more, he had 35 - 2 = 33. #### 33
76
+
77
+ Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
78
+ A: Olivia had 23 dollars. 5 bagels for 3 dollars each = 5 * 3 = 15 dollars. 23 - 15 = 8. #### 8"""
79
+
80
+ def extract_gsm_answer(text):
81
+ """Extract the number after #### in GSM8K format."""
82
+ matches = re.findall(r'####\s*([\-\d,\.]+)', text)
83
+ if matches:
84
+ return matches[-1].replace(",", "").strip()
85
+ # Fallback: last number in the text
86
+ numbers = re.findall(r'[\-]?\d+(?:\.\d+)?', text)
87
+ return numbers[-1] if numbers else ""
88
+
89
+ correct = 0
90
+ for i in range(GSM_N):
91
+ q = gsm[i]["question"]
92
+ gold = gsm[i]["answer"].split("####")[-1].strip().replace(",", "")
93
+
94
+ prompt = f"{FEW_SHOT_EXAMPLES}\n\nQ: {q}\nA:"
95
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(DEVICE)
96
+ with torch.no_grad():
97
+ out = model.generate(**inputs, max_new_tokens=256, temperature=0.0, do_sample=False)
98
+ response = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
99
+ pred = extract_gsm_answer(response)
100
+
101
+ if pred == gold:
102
+ correct += 1
103
+
104
+ if (i + 1) % 50 == 0 or i == GSM_N - 1:
105
+ print(f" GSM8K: {i+1}/{GSM_N} done — {correct}/{i+1} correct ({correct/(i+1)*100:.1f}%)")
106
+
107
+ gsm_score = correct / GSM_N * 100
108
+ results["GSM8K"] = {"score": round(gsm_score, 1), "correct": correct, "total": GSM_N}
109
+ print(f"\nGSM8K Final: {gsm_score:.1f}% ({correct}/{GSM_N})")
110
+
111
+ # ============================================================
112
+ # 2. HumanEval — Code Generation (pass@1)
113
+ # Published Qwen2-3B: ~30-40% HumanEval
114
+ # ============================================================
115
+ print("\n" + "="*60)
116
+ print("BENCHMARK 2: HumanEval (Code Generation, pass@1)")
117
+ print("="*60)
118
+
119
+ try:
120
+ he = load_dataset("openai/openai_humaneval", split="test")
121
+ HE_N = int(os.environ.get("HE_N", len(he))) # default: full 164 problems
122
+ print(f"Running {HE_N} / {len(he)} problems")
123
+
124
+ code_correct = 0
125
+ code_errors = 0
126
+ for i in range(HE_N):
127
+ prompt = he[i]["prompt"]
128
+ test_code = he[i]["test"]
129
+ entry_point = he[i]["entry_point"]
130
+
131
+ messages = [{"role": "user", "content": f"Complete this Python function. Return ONLY the function body, no explanation:\n\n{prompt}"}]
132
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
133
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=2048).to(DEVICE)
134
+
135
+ with torch.no_grad():
136
+ out = model.generate(**inputs, max_new_tokens=512, temperature=0.0, do_sample=False)
137
+ response = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
138
+
139
+ # Extract code
140
+ if "```python" in response:
141
+ code = response.split("```python")[1].split("```")[0]
142
+ elif "```" in response:
143
+ parts = response.split("```")
144
+ code = parts[1] if len(parts) > 1 else response
145
+ else:
146
+ code = response
147
+
148
+ full_code = prompt + code
149
+ try:
150
+ exec_globals = {}
151
+ exec(full_code + "\n" + test_code, exec_globals)
152
+ code_correct += 1
153
+ except Exception:
154
+ code_errors += 1
155
+
156
+ if (i + 1) % 20 == 0 or i == HE_N - 1:
157
+ print(f" HumanEval: {i+1}/{HE_N} done — {code_correct}/{i+1} pass ({code_correct/(i+1)*100:.1f}%)")
158
+
159
+ he_score = code_correct / HE_N * 100
160
+ results["HumanEval"] = {"score": round(he_score, 1), "correct": code_correct, "total": HE_N}
161
+ print(f"\nHumanEval Final: {he_score:.1f}% ({code_correct}/{HE_N})")
162
+ except Exception as e:
163
+ print(f"HumanEval skipped: {e}")
164
+ traceback.print_exc()
165
+
166
+ # ============================================================
167
+ # 3. MMLU — General Knowledge (5-shot)
168
+ # Published Qwen2-3B: ~53-55% on MMLU
169
+ # ============================================================
170
+ print("\n" + "="*60)
171
+ print("BENCHMARK 3: MMLU (General Knowledge, 5-shot)")
172
+ print("="*60)
173
+
174
+ try:
175
+ # Use cais/mmlu which has all subjects
176
+ mmlu = load_dataset("cais/mmlu", "all", split="test")
177
+ mmlu_dev = load_dataset("cais/mmlu", "all", split="dev")
178
+ MMLU_N = int(os.environ.get("MMLU_N", len(mmlu)))
179
+ print(f"Running {MMLU_N} / {len(mmlu)} problems")
180
+
181
+ CHOICES = ["A", "B", "C", "D"]
182
+
183
+ def format_mmlu_question(item, few_shot_items=None):
184
+ """Format an MMLU question with optional few-shot examples."""
185
+ subject = item.get("subject", "general knowledge").replace("_", " ")
186
+ prompt = f"The following are multiple choice questions about {subject}.\n\n"
187
+
188
+ if few_shot_items:
189
+ for fs in few_shot_items[:5]:
190
+ prompt += f"Question: {fs['question']}\n"
191
+ for j, choice in enumerate(fs["choices"]):
192
+ prompt += f"{CHOICES[j]}. {choice}\n"
193
+ prompt += f"Answer: {CHOICES[fs['answer']]}\n\n"
194
+
195
+ prompt += f"Question: {item['question']}\n"
196
+ for j, choice in enumerate(item["choices"]):
197
+ prompt += f"{CHOICES[j]}. {choice}\n"
198
+ prompt += "Answer:"
199
+ return prompt
200
+
201
+ # Group dev set by subject for few-shot
202
+ dev_by_subject = {}
203
+ for item in mmlu_dev:
204
+ subj = item.get("subject", "unknown")
205
+ if subj not in dev_by_subject:
206
+ dev_by_subject[subj] = []
207
+ dev_by_subject[subj].append(item)
208
+
209
+ mmlu_correct = 0
210
+ subject_results = {}
211
+
212
+ for i in range(MMLU_N):
213
+ item = mmlu[i]
214
+ subj = item.get("subject", "unknown")
215
+ few_shot = dev_by_subject.get(subj, [])[:5]
216
+
217
+ prompt = format_mmlu_question(item, few_shot)
218
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(DEVICE)
219
+
220
+ with torch.no_grad():
221
+ out = model.generate(**inputs, max_new_tokens=1, do_sample=False)
222
+ response = tokenizer.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True).strip()
223
+
224
+ # Extract first letter answer
225
+ pred_letter = response[0].upper() if response and response[0].upper() in CHOICES else ""
226
+ gold_letter = CHOICES[item["answer"]]
227
+
228
+ is_correct = pred_letter == gold_letter
229
+ if is_correct:
230
+ mmlu_correct += 1
231
+
232
+ if subj not in subject_results:
233
+ subject_results[subj] = {"correct": 0, "total": 0}
234
+ subject_results[subj]["total"] += 1
235
+ if is_correct:
236
+ subject_results[subj]["correct"] += 1
237
+
238
+ if (i + 1) % 500 == 0 or i == MMLU_N - 1:
239
+ print(f" MMLU: {i+1}/{MMLU_N} done — {mmlu_correct}/{i+1} correct ({mmlu_correct/(i+1)*100:.1f}%)")
240
+
241
+ mmlu_score = mmlu_correct / MMLU_N * 100
242
+ results["MMLU"] = {"score": round(mmlu_score, 1), "correct": mmlu_correct, "total": MMLU_N}
243
+
244
+ # Top and bottom subjects
245
+ subject_scores = {}
246
+ for subj, data in subject_results.items():
247
+ if data["total"] >= 5:
248
+ subject_scores[subj] = data["correct"] / data["total"] * 100
249
+ top_subjects = sorted(subject_scores.items(), key=lambda x: -x[1])[:5]
250
+ bottom_subjects = sorted(subject_scores.items(), key=lambda x: x[1])[:5]
251
+
252
+ results["MMLU_top_subjects"] = {s: round(v, 1) for s, v in top_subjects}
253
+ results["MMLU_bottom_subjects"] = {s: round(v, 1) for s, v in bottom_subjects}
254
+
255
+ print(f"\nMMLU Final: {mmlu_score:.1f}% ({mmlu_correct}/{MMLU_N})")
256
+ print(f"Top subjects: {top_subjects[:3]}")
257
+ print(f"Bottom subjects: {bottom_subjects[:3]}")
258
+
259
+ except Exception as e:
260
+ print(f"MMLU skipped: {e}")
261
+ traceback.print_exc()
262
+
263
+ # ============================================================
264
+ # 4. Speed Benchmark
265
+ # ============================================================
266
+ print("\n" + "="*60)
267
+ print("BENCHMARK 4: Inference Speed")
268
+ print("="*60)
269
+
270
+ messages = [{"role": "user", "content": "Write a detailed explanation of how neural networks learn through backpropagation."}]
271
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
272
+ inputs = tokenizer(text, return_tensors="pt").to(DEVICE)
273
+
274
+ # Warmup (3 runs)
275
+ print("Warming up...")
276
+ for _ in range(3):
277
+ with torch.no_grad():
278
+ model.generate(**inputs, max_new_tokens=32, do_sample=False)
279
+
280
+ # Actual speed tests
281
+ speeds = []
282
+ for run in range(5):
283
+ if DEVICE == "cuda":
284
+ torch.cuda.synchronize()
285
+ start = time.time()
286
+ with torch.no_grad():
287
+ out = model.generate(**inputs, max_new_tokens=256, temperature=0.7, do_sample=True)
288
+ if DEVICE == "cuda":
289
+ torch.cuda.synchronize()
290
+ elapsed = time.time() - start
291
+ tokens = out.shape[1] - inputs["input_ids"].shape[1]
292
+ tok_s = tokens / elapsed
293
+ speeds.append(tok_s)
294
+ print(f" Run {run+1}: {tok_s:.1f} tok/s ({tokens} tokens in {elapsed:.2f}s)")
295
+
296
+ avg_speed = sum(speeds) / len(speeds)
297
+ max_speed = max(speeds)
298
+ min_speed = min(speeds)
299
+
300
+ results["speed"] = {
301
+ "avg_tok_s": round(avg_speed, 1),
302
+ "max_tok_s": round(max_speed, 1),
303
+ "min_tok_s": round(min_speed, 1),
304
+ "device": DEVICE,
305
+ "gpu": torch.cuda.get_device_name(0) if DEVICE == "cuda" else "N/A",
306
+ "dtype": "bfloat16" if DEVICE == "cuda" else "float32",
307
+ }
308
+ print(f"\nSpeed: avg {avg_speed:.1f} tok/s (min {min_speed:.1f}, max {max_speed:.1f})")
309
+
310
+ # ============================================================
311
+ # 5. TTFT (Time to First Token)
312
+ # ============================================================
313
+ print("\n" + "="*60)
314
+ print("BENCHMARK 5: Time to First Token (TTFT)")
315
+ print("="*60)
316
+
317
+ ttft_times = []
318
+ for run in range(10):
319
+ if DEVICE == "cuda":
320
+ torch.cuda.synchronize()
321
+ start = time.time()
322
+ with torch.no_grad():
323
+ out = model.generate(**inputs, max_new_tokens=1, do_sample=False)
324
+ if DEVICE == "cuda":
325
+ torch.cuda.synchronize()
326
+ ttft = (time.time() - start) * 1000 # ms
327
+ ttft_times.append(ttft)
328
+
329
+ avg_ttft = sum(ttft_times) / len(ttft_times)
330
+ p50_ttft = sorted(ttft_times)[5]
331
+ p99_ttft = sorted(ttft_times)[9]
332
+
333
+ results["ttft"] = {
334
+ "avg_ms": round(avg_ttft, 1),
335
+ "p50_ms": round(p50_ttft, 1),
336
+ "p99_ms": round(p99_ttft, 1),
337
+ }
338
+ print(f"TTFT: avg {avg_ttft:.1f}ms, p50 {p50_ttft:.1f}ms, p99 {p99_ttft:.1f}ms")
339
+
340
+ # ============================================================
341
+ # VRAM Usage
342
+ # ============================================================
343
+ if DEVICE == "cuda":
344
+ vram_used = torch.cuda.max_memory_allocated() / 1e9
345
+ vram_total = torch.cuda.get_device_properties(0).total_memory / 1e9
346
+ results["vram"] = {
347
+ "used_gb": round(vram_used, 2),
348
+ "total_gb": round(vram_total, 2),
349
+ "utilization_pct": round(vram_used / vram_total * 100, 1),
350
+ }
351
+ print(f"\nVRAM: {vram_used:.2f} GB / {vram_total:.2f} GB ({vram_used/vram_total*100:.1f}%)")
352
+
353
+ # ============================================================
354
+ # Summary
355
+ # ============================================================
356
+ print("\n" + "="*60)
357
+ print("FINAL RESULTS — Synapse-3B (TIES Merged)")
358
+ print("="*60)
359
+
360
+ # Reference scores for Qwen2-3B (published)
361
+ print(f"\n{'Benchmark':<20} {'Synapse-3B':>12} {'Qwen2-3B (ref)':>15}")
362
+ print("-" * 50)
363
+ if "GSM8K" in results:
364
+ print(f"{'GSM8K':<20} {results['GSM8K']['score']:>11.1f}% {'~54%':>15}")
365
+ if "HumanEval" in results:
366
+ print(f"{'HumanEval':<20} {results['HumanEval']['score']:>11.1f}% {'~36%':>15}")
367
+ if "MMLU" in results:
368
+ print(f"{'MMLU (5-shot)':<20} {results['MMLU']['score']:>11.1f}% {'~53%':>15}")
369
+ if "speed" in results:
370
+ print(f"{'Tok/s (avg)':<20} {results['speed']['avg_tok_s']:>11.1f} {'N/A':>15}")
371
+ if "ttft" in results:
372
+ print(f"{'TTFT (avg)':<20} {results['ttft']['avg_ms']:>10.1f}ms {'N/A':>15}")
373
+
374
+ print(json.dumps(results, indent=2))
375
+
376
+ # Save
377
+ out_path = "/tmp/synapse-bench-results.json"
378
+ with open(out_path, "w") as f:
379
+ json.dump(results, f, indent=2)
380
+ print(f"\nResults saved to {out_path}")
@@ -311,51 +311,166 @@ tags:
311
311
  - specialist-swarm
312
312
  - continuous-learning
313
313
  - merged-model
314
+ - mamba
315
+ - xlstm
316
+ - mixture-of-experts
317
+ - fast-weights
318
+ - brain-inspired
319
+ - rust
320
+ - local-inference
314
321
  base_model: {BASE_MODEL}
315
322
  model_type: qwen2
323
+ pipeline_tag: text-generation
324
+ datasets:
325
+ - gsm8k
326
+ - openwebmath
327
+ - microsoft/orca-math-word-problems-200k
328
+ - sahil2801/CodeAlpaca-20k
329
+ - nickrosh/Evol-Instruct-Code-80k-v1
330
+ - iamtarun/python_code_instructions_18k_alpaca
331
+ - Open-Orca/SlimOrca
332
+ - yahma/alpaca-cleaned
316
333
  ---
317
334
 
318
- # Synapse-3B: {model_name}
335
+ # Synapse-3B
319
336
 
320
- **A specialist model created by TITAN Synapse** — trained through continuous learning on domain-specific datasets, then merged into a single model.
337
+ **Small models that think together. And learn.**
338
+
339
+ Synapse-3B is a merged specialist model created by [TITAN Synapse](https://github.com/Djtony707/titan-synapse) — an open-source Rust inference engine that runs a swarm of tiny specialist models that collaborate and learn continuously on your GPU.
340
+
341
+ This model combines **4 specialist LoRA adapters** (math, code, general, coordinator) trained on curated datasets, then merged into a single model using **TIES merging** (Trim, Elect Sign, Merge) for minimal interference between specializations.
342
+
343
+ ## Key Features
344
+
345
+ - **4 specialist domains** merged into one model without catastrophic forgetting
346
+ - **TIES merging** — trims small deltas, elects signs by majority vote, merges only agreeing directions
347
+ - **Based on Qwen2.5-3B-Instruct** — strong Apache 2.0 base with multilingual support
348
+ - **Part of the Synapse ecosystem** — designed for the brain-inspired Synapse Architecture (Mamba + xLSTM + Sparse MoE + Fast Weights)
321
349
 
322
350
  ## How This Model Was Made
323
351
 
324
- 1. **Base model**: Qwen2.5-3B-Instruct
325
- 2. **Specialist training**: QLoRA fine-tuning on curated datasets
326
- 3. **Adapters merged**: {', '.join(specialists)}
327
- 4. **Merge method**: {method}
328
- 5. **Created**: {datetime.now().isoformat()}
352
+ ```
353
+ Base Model: Qwen/Qwen2.5-3B-Instruct (Apache 2.0)
354
+ |
355
+ +---> QLoRA (rank 64) ---> Math Specialist (GSM8K + OpenWebMath + Orca-Math, 50k samples)
356
+ +---> QLoRA (rank 64) ---> Code Specialist (CodeAlpaca + Evol-Instruct + Python-18k, 50k samples)
357
+ +---> QLoRA (rank 64) ---> General Specialist (SlimOrca + Alpaca-Cleaned, 50k samples)
358
+ +---> QLoRA (rank 32) ---> Coordinator (Synthetic routing, 5k samples)
359
+ |
360
+ +---> TIES Merge (trim 80%, sign election, agreement merge)
361
+ |
362
+ = Synapse-3B
363
+ ```
329
364
 
330
- ## Specialists Merged
365
+ ### Specialist Details
331
366
 
332
- | Specialist | Training Data | Focus |
333
- |---|---|---|
334
- | math | GSM8K + OpenWebMath + Orca-Math (50k samples) | Mathematical reasoning |
335
- | code | CodeAlpaca + Evol-Instruct + Python-18k (50k samples) | Code generation |
336
- | general | SlimOrca + Alpaca-Cleaned (50k samples) | General knowledge |
337
- | coordinator | Synthetic routing examples (5k samples) | Task routing |
367
+ | Specialist | Datasets | Samples | LoRA Rank | Focus |
368
+ |:---|:---|:---:|:---:|:---|
369
+ | **Math** | GSM8K, OpenWebMath, Orca-Math | 50,000 | 64 | Mathematical reasoning, step-by-step problem solving |
370
+ | **Code** | CodeAlpaca-20k, Evol-Instruct-Code-80k, Python-18k | 50,000 | 64 | Code generation, debugging, Python expertise |
371
+ | **General** | SlimOrca, Alpaca-Cleaned | 50,000 | 64 | General knowledge, instruction following, reasoning |
372
+ | **Coordinator** | Synthetic routing examples | 5,000 | 32 | Task analysis, specialist routing, swarm coordination |
373
+
374
+ ### Merge Method: TIES
375
+
376
+ [TIES (Trim, Elect Sign, Merge)](https://arxiv.org/abs/2306.01708) is used to combine adapters with minimal interference:
377
+
378
+ 1. **Trim** — Remove small-magnitude deltas (keep top 20% per parameter)
379
+ 2. **Elect Sign** — For each parameter, take a majority vote on the sign direction across all specialists
380
+ 3. **Merge** — Only average deltas that agree with the elected sign
381
+
382
+ This produces cleaner merges than simple averaging, preserving each specialist's strengths.
338
383
 
339
384
  ## Usage
340
385
 
386
+ ### With Transformers
387
+
341
388
  ```python
342
389
  from transformers import AutoModelForCausalLM, AutoTokenizer
343
390
 
344
- model = AutoModelForCausalLM.from_pretrained("Djtony707/synapse-3b")
345
- tokenizer = AutoTokenizer.from_pretrained("Djtony707/synapse-3b")
391
+ model = AutoModelForCausalLM.from_pretrained("djtony707/synapse-3b")
392
+ tokenizer = AutoTokenizer.from_pretrained("djtony707/synapse-3b")
393
+
394
+ messages = [{{"role": "user", "content": "Solve: If a train travels 120km in 2 hours, what is its speed in m/s?"}}]
395
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
396
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
397
+ outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.7)
398
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
346
399
  ```
347
400
 
348
- Or with TITAN Synapse engine:
401
+ ### With TITAN Synapse Engine (Rust, local inference)
402
+
349
403
  ```bash
404
+ # Install
405
+ curl -sSL https://raw.githubusercontent.com/Djtony707/titan-synapse/main/install.sh | bash
406
+
407
+ # Pull and run
350
408
  synapse pull synapse-3b
351
409
  synapse up
410
+
411
+ # OpenAI-compatible API on localhost:6900
412
+ curl http://localhost:6900/v1/chat/completions \\
413
+ -d '{{"model":"synapse-3b","messages":[{{"role":"user","content":"Hello!"}}]}}'
414
+ ```
415
+
416
+ ## The Synapse Architecture (v1.0 Target)
417
+
418
+ Synapse-3B is the foundation for the **Synapse Architecture** — a brain-inspired modular model that replaces monolithic transformers:
419
+
420
+ ```
421
+ THALAMUS (Mamba Router, O(n))
422
+ |
423
+ +--------------+--------------+
424
+ | | |
425
+ xLSTM Lang Sparse MoE Fast-Weight
426
+ Module Expert Pool Memory
427
+ O(n) top-k of 8+ Learn during
428
+ syntax, specialists inference,
429
+ grammar activate no backprop
430
+ ```
431
+
432
+ - **No O(n^2) attention** — Mamba (state-space) + xLSTM (recurrent)
433
+ - **Sparse activation** — only 2-3 of 8+ modules fire per token
434
+ - **Fast-weight memory** — learn new facts in ONE forward pass
435
+ - **Full observability** — every routing decision is transparent, no black box
436
+
437
+ ## Training Details
438
+
439
+ - **Hardware**: NVIDIA RTX 5090 (32GB VRAM)
440
+ - **Training framework**: QLoRA via TRL SFTTrainer
441
+ - **Quantization**: 4-bit NF4 (for training efficiency)
442
+ - **Learning rate**: 2e-4 with cosine scheduler
443
+ - **Epochs**: 3 per specialist
444
+ - **Batch size**: 2 (gradient accumulation 8, effective batch 16)
445
+ - **Max sequence length**: 2048 tokens
446
+ - **Training time**: ~2 hours per specialist on RTX 5090
447
+ - **Merge method**: TIES (trim ratio 0.8)
448
+ - **Created**: {datetime.now().strftime("%B %d, %Y")}
449
+
450
+ ## Limitations
451
+
452
+ - This is a 3B parameter model — it won't match 70B+ models on complex reasoning
453
+ - Trained on English-focused datasets; multilingual performance inherited from Qwen base
454
+ - The coordinator specialist is trained on synthetic routing data; real-world routing improves with use
455
+ - Best used as part of the TITAN Synapse swarm (multiple specialists collaborating)
456
+
457
+ ## Citation
458
+
459
+ ```bibtex
460
+ @misc{{synapse3b2026,
461
+ title={{Synapse-3B: A Merged Specialist Model for the TITAN Synapse Engine}},
462
+ author={{Tony Elliott}},
463
+ year={{2026}},
464
+ url={{https://huggingface.co/djtony707/synapse-3b}},
465
+ note={{Created with TITAN Synapse — https://github.com/Djtony707/titan-synapse}}
466
+ }}
352
467
  ```
353
468
 
354
469
  ## License
355
470
 
356
- Apache 2.0
471
+ Apache 2.0 — use it for anything.
357
472
 
358
- Built by [Tony Elliott](https://github.com/Djtony707) with TITAN Synapse.
473
+ Built by [Tony Elliott](https://github.com/Djtony707) with [TITAN Synapse](https://github.com/Djtony707/titan-synapse).
359
474
  """
360
475
  (output_dir / "README.md").write_text(card)
361
476
 
@@ -505,7 +620,7 @@ def export_gguf_native(model_dir: Path, output_path: Path):
505
620
  return output_path
506
621
 
507
622
 
508
- def push_to_hub(model_dir: Path, repo_name: str = "Djtony707/synapse-3b"):
623
+ def push_to_hub(model_dir: Path, repo_name: str = "djtony707/synapse-3b"):
509
624
  """Push merged model to HuggingFace Hub."""
510
625
  from huggingface_hub import HfApi
511
626
 
@@ -532,7 +647,7 @@ def main():
532
647
  help="GGUF quantization type (e.g., Q4_K_M, Q5_K_M, Q8_0)")
533
648
  parser.add_argument("--push", action="store_true",
534
649
  help="Push to HuggingFace Hub after merge")
535
- parser.add_argument("--repo", default="Djtony707/synapse-3b",
650
+ parser.add_argument("--repo", default="djtony707/synapse-3b",
536
651
  help="HuggingFace repo name for push")
537
652
  args = parser.parse_args()
538
653