titan-synapse 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CONTRIBUTING.md +187 -0
  2. package/Cargo.lock +3976 -0
  3. package/Cargo.toml +10 -0
  4. package/LICENSE +190 -0
  5. package/PROGRESS.md +151 -0
  6. package/README.md +514 -0
  7. package/TEST_LOG.md +220 -0
  8. package/config/default.yaml +36 -0
  9. package/crates/synapse/Cargo.toml +70 -0
  10. package/crates/synapse/src/cli/bench.rs +44 -0
  11. package/crates/synapse/src/cli/eval.rs +395 -0
  12. package/crates/synapse/src/cli/export.rs +45 -0
  13. package/crates/synapse/src/cli/hub.rs +179 -0
  14. package/crates/synapse/src/cli/import.rs +35 -0
  15. package/crates/synapse/src/cli/learn.rs +53 -0
  16. package/crates/synapse/src/cli/mod.rs +10 -0
  17. package/crates/synapse/src/cli/models.rs +36 -0
  18. package/crates/synapse/src/cli/pull.rs +60 -0
  19. package/crates/synapse/src/cli/status.rs +52 -0
  20. package/crates/synapse/src/cli/train.rs +99 -0
  21. package/crates/synapse/src/config.rs +220 -0
  22. package/crates/synapse/src/dashboard.rs +281 -0
  23. package/crates/synapse/src/format/manifest.rs +57 -0
  24. package/crates/synapse/src/format/mod.rs +4 -0
  25. package/crates/synapse/src/format/packer.rs +213 -0
  26. package/crates/synapse/src/inference/engine.rs +361 -0
  27. package/crates/synapse/src/inference/kv_cache.rs +97 -0
  28. package/crates/synapse/src/inference/lora.rs +166 -0
  29. package/crates/synapse/src/inference/mod.rs +9 -0
  30. package/crates/synapse/src/inference/model.rs +167 -0
  31. package/crates/synapse/src/inference/sampler.rs +133 -0
  32. package/crates/synapse/src/inference/speculative.rs +153 -0
  33. package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
  34. package/crates/synapse/src/learn/engine.rs +109 -0
  35. package/crates/synapse/src/learn/mod.rs +5 -0
  36. package/crates/synapse/src/main.rs +185 -0
  37. package/crates/synapse/src/memory/extractor.rs +201 -0
  38. package/crates/synapse/src/memory/graph.rs +332 -0
  39. package/crates/synapse/src/memory/hallucination.rs +259 -0
  40. package/crates/synapse/src/memory/mod.rs +7 -0
  41. package/crates/synapse/src/openai.rs +232 -0
  42. package/crates/synapse/src/server.rs +166 -0
  43. package/crates/synapse/src/streaming.rs +80 -0
  44. package/crates/synapse/src/swarm/coordinator.rs +198 -0
  45. package/crates/synapse/src/swarm/mod.rs +8 -0
  46. package/crates/synapse/src/swarm/orchestrator.rs +225 -0
  47. package/crates/synapse/src/swarm/pool.rs +64 -0
  48. package/crates/synapse/src/swarm/spawner.rs +199 -0
  49. package/crates/synapse/src/swarm/synthesizer.rs +26 -0
  50. package/crates/synapse/src/vram/manager.rs +67 -0
  51. package/crates/synapse/src/vram/mod.rs +3 -0
  52. package/docker-compose.yml +19 -0
  53. package/install.sh +311 -0
  54. package/package.json +36 -0
  55. package/python/Dockerfile.learn +18 -0
  56. package/python/requirements.txt +11 -0
  57. package/python/synapse_learn/__init__.py +0 -0
  58. package/python/synapse_learn/datasets.py +233 -0
  59. package/python/synapse_learn/real_eval.py +616 -0
  60. package/python/synapse_learn/server.py +431 -0
  61. package/python/synapse_learn/train_base.py +672 -0
  62. package/python/synapse_learn/train_specialists.py +787 -0
package/TEST_LOG.md ADDED
@@ -0,0 +1,220 @@
1
+ # TITAN Synapse — Test Log
2
+
3
+ **Date**: March 20, 2026 (updated throughout DAY 1-2)
4
+ **Platform**: RTX 5090 (32GB VRAM), i9-14900KF, 64GB DDR5-6000, Ubuntu 24.04
5
+ **Models**: Qwen2.5-3B-Instruct (Q4_K_M, 1.9GB) + Qwen2.5-0.5B-Instruct (Q4_K_M, 491MB)
6
+ **Rust**: 1.94.0 (release build)
7
+ **Binary size**: ~6.3MB
8
+
9
+ ---
10
+
11
+ ## Unit Tests (15/15 PASSING)
12
+
13
+ ```
14
+ cargo test — 15/15 PASSING in 0.01s
15
+ ```
16
+
17
+ | # | Test | Module | Result |
18
+ |---|------|--------|--------|
19
+ | 1 | test_default_config | config | PASS |
20
+ | 2 | test_config_serialization | config | PASS |
21
+ | 3 | test_load_missing_config | config | PASS |
22
+ | 4 | test_greedy_sampling | inference::sampler | PASS |
23
+ | 5 | test_empty_logits | inference::sampler | PASS |
24
+ | 6 | test_stochastic_sampling | inference::sampler | PASS |
25
+ | 7 | test_cache_allocation | inference::kv_cache | PASS |
26
+ | 8 | test_knowledge_graph | memory::graph | PASS |
27
+ | 9 | test_preferences | memory::graph | PASS |
28
+ | 10 | test_hebbian_routing | memory::graph | PASS |
29
+ | 11 | test_specialist_stats | memory::graph | PASS |
30
+ | 12 | test_manifest_creation | format::manifest | PASS |
31
+ | 13 | test_manifest_serialization | format::manifest | PASS |
32
+ | 14 | test_pack_and_unpack | format::packer | PASS |
33
+ | 15 | test_list_bundles | format::packer | PASS |
34
+
35
+ ---
36
+
37
+ ## Benchmark Results — CPU (Qwen2.5-3B, Q4_K_M)
38
+
39
+ ```
40
+ synapse bench — 4 prompts, 759 tokens total (CPU inference)
41
+ ```
42
+
43
+ | Prompt | Tokens | Time | Tok/s |
44
+ |--------|--------|------|-------|
45
+ | Python decorator explanation | 109 | 5,001ms | 21.8 |
46
+ | SQL top-10 query | 139 | 6,467ms | 21.5 |
47
+ | TCP vs UDP explanation | 256 | 10,893ms | 23.5 |
48
+ | Go garbage collection | 255 | 10,875ms | 23.4 |
49
+ | **Average** | **190** | **8,309ms** | **23 tok/s** |
50
+
51
+ ---
52
+
53
+ ## Benchmark Results — CUDA GPU (Qwen2.5-3B, Q4_K_M, RTX 5090)
54
+
55
+ ```
56
+ CUDA 12.8 (Blackwell) — 5x speedup over CPU
57
+ ```
58
+
59
+ | Test | Tokens | Time | Tok/s |
60
+ |------|--------|------|-------|
61
+ | TCP vs UDP (run 1) | 119 | 935ms | 127.3 |
62
+ | TCP vs UDP (run 2) | 119 | 1,014ms | 117.4 |
63
+ | TCP vs UDP (run 3) | 120 | 1,017ms | 118.0 |
64
+ | Prime function (256 tokens) | 256 | 7,403ms | 34.6 |
65
+ | BST class (512 tokens) | 512 | 4,028ms | 127.1 |
66
+ | Programming languages list | 128 | ~1,300ms | 97.6 |
67
+ | **Average sustained** | — | — | **~100-128 tok/s** |
68
+
69
+ **Model load times (GPU)**:
70
+ - 3B model: **0.6s** (was 1.1s on CPU — 1.8x faster)
71
+ - 0.5B model: **0.3s** (was 0.7s on CPU — 2.3x faster)
72
+
73
+ **Consistency** (3 runs, same prompt, 119 tokens):
74
+ - Run 1: 119 tokens in 935ms (127.3 tok/s)
75
+ - Run 2: 119 tokens in 1,014ms (117.4 tok/s)
76
+ - Run 3: 120 tokens in 1,017ms (118.0 tok/s)
77
+ - **Variance**: <8% — very consistent
78
+
79
+ ---
80
+
81
+ ## Integration Tests (Live Server, RTX 5090)
82
+
83
+ All tests run against the live server at `http://192.168.1.11:6900`
84
+
85
+ ### Test 1: Health Check
86
+ - **Endpoint**: `GET /health`
87
+ - **Result**: PASS
88
+ - **Response**: `ok`
89
+ - **HTTP Code**: 200
90
+
91
+ ### Test 2: List Models
92
+ - **Endpoint**: `GET /v1/models`
93
+ - **Result**: PASS
94
+ - **Response**: 4 models listed (synapse, synapse/general, synapse/python_expert, synapse/sql_expert)
95
+
96
+ ### Test 3: API Status (Enhanced)
97
+ - **Endpoint**: `GET /api/status`
98
+ - **Result**: PASS
99
+ - **Response**: Includes `models_loaded: ["qwen2.5-3b-instruct-q4_k_m", "qwen2.5-0.5b-instruct-q4_k_m"]`, Hebbian pathway data, knowledge stats
100
+
101
+ ### Test 4: Simple Math — "What is 2+2?"
102
+ - **Endpoint**: `POST /v1/chat/completions`
103
+ - **Result**: PASS
104
+ - **Response**: `"2 + 2 equals 4."` (correct, clean stop)
105
+ - **Usage**: `{prompt_tokens: 34, completion_tokens: 8, total_tokens: 42}`
106
+
107
+ ### Test 5: Python Code Generation
108
+ - **Endpoint**: `POST /v1/chat/completions`
109
+ - **Result**: PASS
110
+ - **Response**: Correct decorator explanation with working code example
111
+
112
+ ### Test 6: SSE Streaming
113
+ - **Endpoint**: `POST /v1/chat/completions` (stream=true)
114
+ - **Result**: PASS
115
+ - **Response**: Role delta → content deltas → [DONE], proper SSE format
116
+
117
+ ### Test 7: SQL Query Generation
118
+ - **Endpoint**: `POST /v1/chat/completions`
119
+ - **Result**: PASS
120
+ - **Response**: Correct SQL with JOIN, GROUP BY, COUNT
121
+ - **Specialist routed**: sql_expert
122
+
123
+ ### Test 8: Token Counting Accuracy
124
+ - **Endpoint**: `POST /v1/chat/completions`
125
+ - **Result**: PASS
126
+ - **Response**: `usage` field with accurate prompt_tokens, completion_tokens, total_tokens
127
+ - **Verified**: total_tokens = prompt_tokens + completion_tokens
128
+
129
+ ### Test 9: Hebbian Routing Accumulation
130
+ - **Method**: Made 3 Python queries, then checked /api/status
131
+ - **Result**: PASS
132
+ - **Hebbian state**: `python_expert: strength=4, avg_score=4.0`, `sql_expert: strength=3`, `general: strength=2`
133
+ - **Verified**: Pathways strengthen with repeated use
134
+
135
+ ### Test 10: Multi-Model Loading
136
+ - **Result**: PASS
137
+ - **Models loaded**: qwen2.5-3b-instruct-q4_k_m (1.1s), qwen2.5-0.5b-instruct-q4_k_m (0.7s)
138
+ - **Simultaneous**: Both models in memory, auto-selected by engine
139
+
140
+ ### Test 11: Counting Task
141
+ - **Prompt**: "Count from 1 to 5"
142
+ - **Result**: PASS
143
+ - **Response**: "1\n2\n3\n4\n5" (correct, clean)
144
+
145
+ ---
146
+
147
+ ## Performance Summary
148
+
149
+ | Metric | CPU | GPU (CUDA) |
150
+ |--------|-----|------------|
151
+ | Model load time (3B) | 1.1s | **0.6s** |
152
+ | Model load time (0.5B) | 0.7s | **0.3s** |
153
+ | Health check latency | <1ms | <1ms |
154
+ | Short response (8 tokens) | ~370ms | ~80ms |
155
+ | Medium response (128 tokens) | ~5.5s | **~1s** |
156
+ | Long response (512 tokens) | ~22s | **~4s** |
157
+ | **Throughput (3B Q4)** | **21-24 tok/s** | **97-128 tok/s** |
158
+ | VRAM used by models | N/A | ~2.4 GB |
159
+
160
+ ---
161
+
162
+ ## Model Output Samples
163
+
164
+ ### Math
165
+ ```
166
+ Q: What is 2+2?
167
+ A: 2 + 2 equals 4.
168
+ ```
169
+
170
+ ### Code
171
+ ```
172
+ Q: What is a Python decorator? Explain briefly.
173
+ A: A Python decorator is a special function that adds functionality
174
+ to another function without modifying its code. [includes working
175
+ code example with @my_decorator syntax]
176
+ ```
177
+
178
+ ### SQL
179
+ ```
180
+ Q: Write a SQL query to find the top 5 users by order count.
181
+ A: SELECT u.user_id, u.user_name, COUNT(o.order_id) AS order_count
182
+ FROM users u JOIN orders o ON u.user_id = o.user_id
183
+ GROUP BY u.user_id, u.user_name
184
+ ORDER BY order_count DESC LIMIT 5;
185
+ ```
186
+
187
+ ---
188
+
189
+ ## Issues Found & Fixed
190
+
191
+ 1. **Tensor rank error**: candle's quantized_qwen2 returns `(batch, vocab)` not `(batch, seq, vocab)`. Fixed with `squeeze(0)`.
192
+
193
+ 2. **Chat template**: Model requires `<|im_start|>/<|im_end|>` format. Added `format_chat_prompt()`.
194
+
195
+ 3. **Stop token bleed**: Model generated past answer. Fixed by collecting all stop token IDs.
196
+
197
+ 4. **`gen` reserved keyword**: Rust 2024 reserves `gen`. Renamed variable.
198
+
199
+ 5. **bench.rs after GenerationResult**: `response.len()` broke when return type changed from String to GenerationResult. Fixed to use `response.completion_tokens`.
200
+
201
+ 6. **KnowledgeGraph not Send+Sync**: `rusqlite::Connection` uses RefCell. Fixed with `std::sync::Mutex<Connection>`.
202
+
203
+ 7. **CUDA "no cuda implementation for rms-norm"**: Only `candle-core` had the `cuda` feature. Fixed by enabling `cuda` on `candle-nn` and `candle-transformers` too. See [candle#1916](https://github.com/huggingface/candle/issues/1916).
204
+
205
+ 8. **CUDA 12.6 → 12.8**: RTX 5090 (Blackwell) requires compute capability sm_120, which needs CUDA 12.8+. Upgraded from 12.6 to 12.8.
206
+
207
+ ---
208
+
209
+ ## Build Info
210
+
211
+ ```
212
+ Rust edition: 2024
213
+ Cargo workspace: titan-synapse
214
+ Binary crate: synapse
215
+ Dependencies: 385 crates
216
+ Build time (release, CPU): ~36s (first), ~3s (incremental)
217
+ Build time (release, CUDA): ~23s (first), ~12s (incremental)
218
+ CUDA toolkit: 12.8 (Blackwell sm_120)
219
+ Target: x86_64-unknown-linux-gnu
220
+ ```
@@ -0,0 +1,36 @@
1
+ port: 6900
2
+
3
+ coordinator_model: qwen3-0.6b
4
+ base_model: qwen3-3b
5
+
6
+ learning:
7
+ enabled: true
8
+ min_pairs_before_training: 10
9
+ sidecar_url: http://localhost:8090
10
+ eval_threshold: 3.0
11
+
12
+ specialists:
13
+ - name: general
14
+ capabilities: [general, chat, help]
15
+ system_prompt: "You are a helpful AI assistant powered by TITAN Synapse."
16
+ priority: 50
17
+
18
+ - name: python_expert
19
+ capabilities: [python, debugging, testing, refactoring, fastapi, django]
20
+ system_prompt: "You are an expert Python developer. Write clean, efficient, well-tested code."
21
+ priority: 60
22
+
23
+ - name: sql_expert
24
+ capabilities: [sql, database, query, postgres, mysql, sqlite]
25
+ system_prompt: "You are an expert database engineer. Write optimized SQL queries."
26
+ priority: 60
27
+
28
+ - name: devops_expert
29
+ capabilities: [docker, kubernetes, ci, cd, deploy, infrastructure, terraform]
30
+ system_prompt: "You are a DevOps expert. Focus on reliability, automation, and security."
31
+ priority: 55
32
+
33
+ - name: frontend_expert
34
+ capabilities: [react, javascript, typescript, css, html, ui, ux, nextjs, vue]
35
+ system_prompt: "You are a frontend expert. Build fast, accessible, beautiful interfaces."
36
+ priority: 55
@@ -0,0 +1,70 @@
1
+ [package]
2
+ name = "synapse"
3
+ version.workspace = true
4
+ edition.workspace = true
5
+ license.workspace = true
6
+ repository.workspace = true
7
+ description.workspace = true
8
+ default-run = "synapse"
9
+
10
+ [[bin]]
11
+ name = "synapse"
12
+ path = "src/main.rs"
13
+
14
+ [dependencies]
15
+ # HTTP / Async
16
+ axum = { version = "0.8", features = ["macros"] }
17
+ tokio = { version = "1", features = ["full"] }
18
+ tokio-stream = "0.1"
19
+ tower-http = { version = "0.6", features = ["cors", "trace"] }
20
+
21
+ # CLI
22
+ clap = { version = "4", features = ["derive"] }
23
+
24
+ # Serialization
25
+ serde = { version = "1", features = ["derive"] }
26
+ serde_json = "1"
27
+ serde_yaml = "0.9"
28
+
29
+ # Inference — we use candle for tensor ops (pure Rust, no C deps)
30
+ candle-core = "0.8"
31
+ candle-nn = "0.8"
32
+ candle-transformers = "0.8"
33
+ tokenizers = "0.21"
34
+ safetensors = "0.4"
35
+ half = "2"
36
+ memmap2 = "0.9"
37
+
38
+ # GPU monitoring
39
+ # nvml-wrapper = "0.10" # Linux only — enabled via feature flag
40
+
41
+ # Storage
42
+ rusqlite = { version = "0.32", features = ["bundled"] }
43
+
44
+ # HTTP client
45
+ reqwest = { version = "0.12", features = ["json", "stream"] }
46
+
47
+ # Logging
48
+ tracing = "0.1"
49
+ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
50
+
51
+ # Utils
52
+ uuid = { version = "1", features = ["v4"] }
53
+ chrono = { version = "0.4", features = ["serde"] }
54
+ anyhow = "1"
55
+ thiserror = "2"
56
+ dirs = "6"
57
+ indicatif = "0.17"
58
+ colored = "3"
59
+ futures = "0.3"
60
+ async-stream = "0.3"
61
+ bytes = "1"
62
+
63
+ [features]
64
+ default = []
65
+ cuda = ["candle-core/cuda", "candle-nn/cuda", "candle-transformers/cuda"]
66
+ metal = ["candle-core/metal", "candle-nn/metal", "candle-transformers/metal"]
67
+ nvidia = [] # Enable NVML GPU monitoring
68
+
69
+ [dev-dependencies]
70
+ tempfile = "3"
@@ -0,0 +1,44 @@
1
+ use anyhow::Result;
2
+ use colored::Colorize;
3
+ use crate::config::SynapseConfig;
4
+ use crate::inference::InferenceEngine;
5
+
6
+ pub async fn run(config: &SynapseConfig, model: Option<&str>) -> Result<()> {
7
+ let model_name = model.unwrap_or(&config.base_model);
8
+ println!("{} {model_name}", "Benchmarking".bold().cyan());
9
+ println!("{}", "═".repeat(50));
10
+
11
+ let engine = InferenceEngine::new(config)?;
12
+
13
+ let prompts = [
14
+ "What is a Python decorator?",
15
+ "Write a SQL query to find the top 10 users by posts.",
16
+ "Explain the difference between TCP and UDP.",
17
+ "How does garbage collection work in Go?",
18
+ ];
19
+
20
+ let mut total_tokens = 0u64;
21
+ let start = std::time::Instant::now();
22
+
23
+ for prompt in &prompts {
24
+ let prompt_start = std::time::Instant::now();
25
+ let response = engine.generate(prompt, None, 256, 0.7).await?;
26
+ let elapsed = prompt_start.elapsed();
27
+
28
+ let tokens = response.completion_tokens as u64;
29
+ total_tokens += tokens;
30
+
31
+ println!(" {} {} tokens in {:.0}ms ({:.1} tok/s)",
32
+ "•".green(), tokens, elapsed.as_millis(), response.tok_per_sec);
33
+ }
34
+
35
+ let total_elapsed = start.elapsed();
36
+ let avg_tok_s = total_tokens as f64 / total_elapsed.as_secs_f64();
37
+
38
+ println!("\n{}", "Results".bold().yellow());
39
+ println!(" {} {:.0} tok/s", "Average throughput:".bold(), avg_tok_s);
40
+ println!(" {} {:.0}ms", "Total time:".bold(), total_elapsed.as_millis());
41
+ println!(" {} {}", "Total tokens:".bold(), total_tokens);
42
+
43
+ Ok(())
44
+ }