titan-synapse 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +187 -0
- package/Cargo.lock +3976 -0
- package/Cargo.toml +10 -0
- package/LICENSE +190 -0
- package/PROGRESS.md +151 -0
- package/README.md +514 -0
- package/TEST_LOG.md +220 -0
- package/config/default.yaml +36 -0
- package/crates/synapse/Cargo.toml +70 -0
- package/crates/synapse/src/cli/bench.rs +44 -0
- package/crates/synapse/src/cli/eval.rs +395 -0
- package/crates/synapse/src/cli/export.rs +45 -0
- package/crates/synapse/src/cli/hub.rs +179 -0
- package/crates/synapse/src/cli/import.rs +35 -0
- package/crates/synapse/src/cli/learn.rs +53 -0
- package/crates/synapse/src/cli/mod.rs +10 -0
- package/crates/synapse/src/cli/models.rs +36 -0
- package/crates/synapse/src/cli/pull.rs +60 -0
- package/crates/synapse/src/cli/status.rs +52 -0
- package/crates/synapse/src/cli/train.rs +99 -0
- package/crates/synapse/src/config.rs +220 -0
- package/crates/synapse/src/dashboard.rs +281 -0
- package/crates/synapse/src/format/manifest.rs +57 -0
- package/crates/synapse/src/format/mod.rs +4 -0
- package/crates/synapse/src/format/packer.rs +213 -0
- package/crates/synapse/src/inference/engine.rs +361 -0
- package/crates/synapse/src/inference/kv_cache.rs +97 -0
- package/crates/synapse/src/inference/lora.rs +166 -0
- package/crates/synapse/src/inference/mod.rs +9 -0
- package/crates/synapse/src/inference/model.rs +167 -0
- package/crates/synapse/src/inference/sampler.rs +133 -0
- package/crates/synapse/src/inference/speculative.rs +153 -0
- package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
- package/crates/synapse/src/learn/engine.rs +109 -0
- package/crates/synapse/src/learn/mod.rs +5 -0
- package/crates/synapse/src/main.rs +185 -0
- package/crates/synapse/src/memory/extractor.rs +201 -0
- package/crates/synapse/src/memory/graph.rs +332 -0
- package/crates/synapse/src/memory/hallucination.rs +259 -0
- package/crates/synapse/src/memory/mod.rs +7 -0
- package/crates/synapse/src/openai.rs +232 -0
- package/crates/synapse/src/server.rs +166 -0
- package/crates/synapse/src/streaming.rs +80 -0
- package/crates/synapse/src/swarm/coordinator.rs +198 -0
- package/crates/synapse/src/swarm/mod.rs +8 -0
- package/crates/synapse/src/swarm/orchestrator.rs +225 -0
- package/crates/synapse/src/swarm/pool.rs +64 -0
- package/crates/synapse/src/swarm/spawner.rs +199 -0
- package/crates/synapse/src/swarm/synthesizer.rs +26 -0
- package/crates/synapse/src/vram/manager.rs +67 -0
- package/crates/synapse/src/vram/mod.rs +3 -0
- package/docker-compose.yml +19 -0
- package/install.sh +311 -0
- package/package.json +36 -0
- package/python/Dockerfile.learn +18 -0
- package/python/requirements.txt +11 -0
- package/python/synapse_learn/__init__.py +0 -0
- package/python/synapse_learn/datasets.py +233 -0
- package/python/synapse_learn/real_eval.py +616 -0
- package/python/synapse_learn/server.py +431 -0
- package/python/synapse_learn/train_base.py +672 -0
- package/python/synapse_learn/train_specialists.py +787 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use futures::future::join_all;
|
|
3
|
+
use crate::config::SynapseConfig;
|
|
4
|
+
use crate::inference::{InferenceEngine, GenerationResult};
|
|
5
|
+
use crate::learn::CloudFallback;
|
|
6
|
+
use crate::memory::KnowledgeGraph;
|
|
7
|
+
use crate::openai::Message;
|
|
8
|
+
use super::coordinator::Coordinator;
|
|
9
|
+
use super::synthesizer::Synthesizer;
|
|
10
|
+
|
|
11
|
+
/// Top-level swarm orchestrator — decides single vs multi-specialist routing
|
|
12
|
+
/// Uses Hebbian routing and parallel specialist execution for swarm mode
|
|
13
|
+
/// Cloud fallback: when confidence is low, routes to cloud and learns from the response
|
|
14
|
+
pub struct Orchestrator {
|
|
15
|
+
coordinator: Coordinator,
|
|
16
|
+
synthesizer: Synthesizer,
|
|
17
|
+
cloud_fallback: Option<CloudFallback>,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
impl Orchestrator {
|
|
21
|
+
pub fn new(config: &SynapseConfig) -> Self {
|
|
22
|
+
Self {
|
|
23
|
+
coordinator: Coordinator::new(config),
|
|
24
|
+
synthesizer: Synthesizer::new(),
|
|
25
|
+
cloud_fallback: CloudFallback::new(&config.cloud),
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/// Build context from full message history (not just last message)
|
|
30
|
+
fn build_context(messages: &[Message]) -> String {
|
|
31
|
+
if messages.len() <= 1 {
|
|
32
|
+
return messages.last().map(|m| m.content.clone()).unwrap_or_default();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Include recent conversation context (last 4 messages max)
|
|
36
|
+
let recent: Vec<&Message> = messages.iter().rev().take(4).collect::<Vec<_>>().into_iter().rev().collect();
|
|
37
|
+
let mut context = String::new();
|
|
38
|
+
for msg in &recent[..recent.len().saturating_sub(1)] {
|
|
39
|
+
context.push_str(&format!("[{}]: {}\n", msg.role, msg.content));
|
|
40
|
+
}
|
|
41
|
+
// Last message is the actual query
|
|
42
|
+
if let Some(last) = recent.last() {
|
|
43
|
+
context.push_str(&last.content);
|
|
44
|
+
}
|
|
45
|
+
context
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/// Process a chat request — route to specialist(s) and return response
|
|
49
|
+
pub async fn process(
|
|
50
|
+
&self,
|
|
51
|
+
messages: &[Message],
|
|
52
|
+
engine: &InferenceEngine,
|
|
53
|
+
max_tokens: Option<u32>,
|
|
54
|
+
temperature: Option<f32>,
|
|
55
|
+
knowledge: Option<&KnowledgeGraph>,
|
|
56
|
+
) -> Result<GenerationResult> {
|
|
57
|
+
let last_message = messages.last()
|
|
58
|
+
.map(|m| m.content.as_str())
|
|
59
|
+
.unwrap_or("");
|
|
60
|
+
|
|
61
|
+
let context = Self::build_context(messages);
|
|
62
|
+
let max_tokens = max_tokens.unwrap_or(2048);
|
|
63
|
+
let temperature = temperature.unwrap_or(0.7);
|
|
64
|
+
|
|
65
|
+
let routing = self.coordinator.route(last_message, knowledge);
|
|
66
|
+
|
|
67
|
+
match routing {
|
|
68
|
+
RoutingDecision::Single { specialist, confidence } => {
|
|
69
|
+
tracing::info!("Routing to specialist: {specialist} (confidence: {confidence:.2})");
|
|
70
|
+
|
|
71
|
+
// Cloud fallback: if confidence is too low and cloud is available,
|
|
72
|
+
// generate locally first, then ask cloud and learn from the difference
|
|
73
|
+
let cloud_threshold = CloudFallback::confidence_threshold();
|
|
74
|
+
if confidence < cloud_threshold {
|
|
75
|
+
if let Some(ref fallback) = self.cloud_fallback {
|
|
76
|
+
tracing::info!(
|
|
77
|
+
"⚡ Low confidence ({confidence:.2} < {cloud_threshold:.2}) — trying cloud fallback"
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
// Try local generation first (we still want the local attempt for DPO)
|
|
81
|
+
let local_result = engine.generate(&context, Some(&specialist), max_tokens, temperature).await;
|
|
82
|
+
let local_text = local_result.as_ref().ok().map(|r| r.text.as_str());
|
|
83
|
+
|
|
84
|
+
// Ask cloud for the better answer
|
|
85
|
+
if let Some(kg) = knowledge {
|
|
86
|
+
match fallback.fallback(last_message, &specialist, local_text, kg).await {
|
|
87
|
+
Ok(cloud_result) => {
|
|
88
|
+
tracing::info!(
|
|
89
|
+
"☁️ Cloud fallback used {}, learned={}",
|
|
90
|
+
cloud_result.model_used, cloud_result.learned
|
|
91
|
+
);
|
|
92
|
+
// Return the cloud's better response
|
|
93
|
+
return Ok(GenerationResult {
|
|
94
|
+
text: cloud_result.text,
|
|
95
|
+
prompt_tokens: 0,
|
|
96
|
+
completion_tokens: 0,
|
|
97
|
+
total_tokens: 0,
|
|
98
|
+
tok_per_sec: 0.0,
|
|
99
|
+
duration_ms: 0,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
Err(e) => {
|
|
103
|
+
tracing::warn!("Cloud fallback failed: {e}, using local response");
|
|
104
|
+
// Fall through to local response
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Cloud failed, return local result if we have one
|
|
110
|
+
if let Ok(result) = local_result {
|
|
111
|
+
return Ok(result);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
let result = engine.generate(&context, Some(&specialist), max_tokens, temperature).await?;
|
|
117
|
+
|
|
118
|
+
// Reinforce the pathway on successful generation
|
|
119
|
+
if let Some(kg) = knowledge {
|
|
120
|
+
let _ = kg.reinforce_pathway(&[specialist.clone()], confidence);
|
|
121
|
+
let _ = kg.update_specialist_stats(
|
|
122
|
+
&specialist, "general", confidence, result.tok_per_sec,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
Ok(result)
|
|
127
|
+
}
|
|
128
|
+
RoutingDecision::Swarm { subtasks } => {
|
|
129
|
+
tracing::info!("⚡ Swarm mode: {} subtasks (PARALLEL)", subtasks.len());
|
|
130
|
+
let start = std::time::Instant::now();
|
|
131
|
+
let tokens_per_task = max_tokens / subtasks.len() as u32;
|
|
132
|
+
|
|
133
|
+
// Execute ALL subtasks in parallel
|
|
134
|
+
let futures: Vec<_> = subtasks.iter().map(|task| {
|
|
135
|
+
let prompt = format!("Task: {}\n\nContext: {context}", task.description);
|
|
136
|
+
let specialist = task.specialist.clone();
|
|
137
|
+
async move {
|
|
138
|
+
let result = engine.generate(
|
|
139
|
+
&prompt,
|
|
140
|
+
Some(&specialist),
|
|
141
|
+
tokens_per_task,
|
|
142
|
+
temperature,
|
|
143
|
+
).await;
|
|
144
|
+
(specialist, result)
|
|
145
|
+
}
|
|
146
|
+
}).collect();
|
|
147
|
+
|
|
148
|
+
let results = join_all(futures).await;
|
|
149
|
+
|
|
150
|
+
let mut texts = Vec::new();
|
|
151
|
+
let mut total_prompt = 0u32;
|
|
152
|
+
let mut total_completion = 0u32;
|
|
153
|
+
let mut specialists_used = Vec::new();
|
|
154
|
+
|
|
155
|
+
for (specialist, result) in results {
|
|
156
|
+
match result {
|
|
157
|
+
Ok(gen_result) => {
|
|
158
|
+
total_prompt += gen_result.prompt_tokens;
|
|
159
|
+
total_completion += gen_result.completion_tokens;
|
|
160
|
+
specialists_used.push(specialist.clone());
|
|
161
|
+
texts.push((specialist, gen_result.text));
|
|
162
|
+
}
|
|
163
|
+
Err(e) => {
|
|
164
|
+
tracing::warn!("Specialist {specialist} failed: {e}");
|
|
165
|
+
// Continue with other specialists — graceful degradation
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if texts.is_empty() {
|
|
171
|
+
return Err(anyhow::anyhow!("All specialists failed in swarm mode"));
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
let elapsed = start.elapsed();
|
|
175
|
+
let merged = self.synthesizer.merge(&texts)?;
|
|
176
|
+
|
|
177
|
+
// Reinforce the swarm pathway
|
|
178
|
+
if let Some(kg) = knowledge {
|
|
179
|
+
let _ = kg.reinforce_pathway(&specialists_used, 4.0);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
let tok_per_sec = if elapsed.as_secs_f64() > 0.0 {
|
|
183
|
+
total_completion as f64 / elapsed.as_secs_f64()
|
|
184
|
+
} else {
|
|
185
|
+
0.0
|
|
186
|
+
};
|
|
187
|
+
|
|
188
|
+
tracing::info!(
|
|
189
|
+
"⚡ Swarm complete: {} specialists, {} tokens in {:.1}s ({:.1} tok/s)",
|
|
190
|
+
specialists_used.len(), total_completion, elapsed.as_secs_f64(), tok_per_sec
|
|
191
|
+
);
|
|
192
|
+
|
|
193
|
+
Ok(GenerationResult {
|
|
194
|
+
text: merged,
|
|
195
|
+
prompt_tokens: total_prompt,
|
|
196
|
+
completion_tokens: total_completion,
|
|
197
|
+
total_tokens: total_prompt + total_completion,
|
|
198
|
+
tok_per_sec,
|
|
199
|
+
duration_ms: elapsed.as_millis() as u64,
|
|
200
|
+
})
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
pub enum RoutingDecision {
|
|
207
|
+
Single { specialist: String, confidence: f32 },
|
|
208
|
+
Swarm { subtasks: Vec<SubTask> },
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
pub struct SubTask {
|
|
212
|
+
pub specialist: String,
|
|
213
|
+
pub description: String,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
impl std::fmt::Display for RoutingDecision {
|
|
217
|
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
218
|
+
match self {
|
|
219
|
+
RoutingDecision::Single { specialist, confidence } => write!(f, "Single({specialist}, confidence={confidence:.2})"),
|
|
220
|
+
RoutingDecision::Swarm { subtasks } => {
|
|
221
|
+
write!(f, "Swarm({})", subtasks.iter().map(|t| t.specialist.as_str()).collect::<Vec<_>>().join(", "))
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
use std::collections::HashMap;
|
|
2
|
+
|
|
3
|
+
/// Manages the pool of active specialists and their LoRA adapters
|
|
4
|
+
pub struct SpecialistPool {
|
|
5
|
+
/// Currently loaded specialists
|
|
6
|
+
loaded: HashMap<String, SpecialistState>,
|
|
7
|
+
/// Max specialists to keep loaded
|
|
8
|
+
max_loaded: usize,
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
pub struct SpecialistState {
|
|
12
|
+
pub name: String,
|
|
13
|
+
pub adapter_loaded: bool,
|
|
14
|
+
pub last_used: std::time::Instant,
|
|
15
|
+
pub request_count: u64,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
impl SpecialistPool {
|
|
19
|
+
pub fn new(max_loaded: usize) -> Self {
|
|
20
|
+
Self {
|
|
21
|
+
loaded: HashMap::new(),
|
|
22
|
+
max_loaded,
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/// Ensure a specialist is loaded, evicting LRU if necessary
|
|
27
|
+
pub fn ensure_loaded(&mut self, name: &str) -> bool {
|
|
28
|
+
if self.loaded.contains_key(name) {
|
|
29
|
+
if let Some(state) = self.loaded.get_mut(name) {
|
|
30
|
+
state.last_used = std::time::Instant::now();
|
|
31
|
+
state.request_count += 1;
|
|
32
|
+
}
|
|
33
|
+
return true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Need to load — evict LRU if at capacity
|
|
37
|
+
if self.loaded.len() >= self.max_loaded {
|
|
38
|
+
self.evict_lru();
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
self.loaded.insert(name.to_string(), SpecialistState {
|
|
42
|
+
name: name.to_string(),
|
|
43
|
+
adapter_loaded: false,
|
|
44
|
+
last_used: std::time::Instant::now(),
|
|
45
|
+
request_count: 1,
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
false // Was not loaded, needs adapter swap
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
fn evict_lru(&mut self) {
|
|
52
|
+
if let Some(oldest) = self.loaded.values()
|
|
53
|
+
.min_by_key(|s| s.last_used)
|
|
54
|
+
.map(|s| s.name.clone())
|
|
55
|
+
{
|
|
56
|
+
tracing::info!("Evicting LRU specialist: {oldest}");
|
|
57
|
+
self.loaded.remove(&oldest);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
pub fn loaded_count(&self) -> usize {
|
|
62
|
+
self.loaded.len()
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use crate::memory::KnowledgeGraph;
|
|
3
|
+
use crate::config::SpecialistConfig;
|
|
4
|
+
|
|
5
|
+
/// Specialist Auto-Spawner — the system that creates new brain regions.
|
|
6
|
+
///
|
|
7
|
+
/// When the coordinator repeatedly routes to "general" for a domain,
|
|
8
|
+
/// and the confidence is low, the spawner:
|
|
9
|
+
/// 1. Detects the pattern ("many Python questions going to general")
|
|
10
|
+
/// 2. Creates a new specialist config for that domain
|
|
11
|
+
/// 3. Queues training data collection
|
|
12
|
+
/// 4. Once enough data: triggers QLoRA training
|
|
13
|
+
/// 5. New specialist joins the swarm automatically
|
|
14
|
+
///
|
|
15
|
+
/// The system literally grows new specialists as needed.
|
|
16
|
+
/// A music producer will end up with audio_expert, midi_expert, mixing_expert.
|
|
17
|
+
/// A data scientist will get pandas_expert, sklearn_expert, visualization_expert.
|
|
18
|
+
/// No configuration needed. The system figures it out.
|
|
19
|
+
pub struct SpecialistSpawner {
|
|
20
|
+
/// Minimum requests in a domain before considering spawning
|
|
21
|
+
min_requests: u32,
|
|
22
|
+
/// Maximum confidence score that triggers spawning (below this = specialist needed)
|
|
23
|
+
confidence_threshold: f32,
|
|
24
|
+
/// Domains that already have specialists (don't spawn duplicates)
|
|
25
|
+
covered_domains: Vec<String>,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/// A proposal for a new specialist
|
|
29
|
+
#[derive(Debug, Clone)]
|
|
30
|
+
pub struct SpawnProposal {
|
|
31
|
+
pub name: String,
|
|
32
|
+
pub domain: String,
|
|
33
|
+
pub capabilities: Vec<String>,
|
|
34
|
+
pub reason: String,
|
|
35
|
+
pub requests_in_domain: u32,
|
|
36
|
+
pub current_avg_score: f64,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
impl SpecialistSpawner {
|
|
40
|
+
pub fn new(covered_domains: Vec<String>) -> Self {
|
|
41
|
+
Self {
|
|
42
|
+
min_requests: 5,
|
|
43
|
+
confidence_threshold: 3.0,
|
|
44
|
+
covered_domains,
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/// Analyze the knowledge graph for domains that need specialists
|
|
49
|
+
pub fn detect_spawn_candidates(&self, kg: &KnowledgeGraph) -> Result<Vec<SpawnProposal>> {
|
|
50
|
+
let mut proposals = Vec::new();
|
|
51
|
+
|
|
52
|
+
// Get all specialist stats
|
|
53
|
+
let stats = kg.specialist_confidence_report().unwrap_or_default();
|
|
54
|
+
|
|
55
|
+
// Look for domains where "general" is handling too many requests with low scores
|
|
56
|
+
for stat in &stats {
|
|
57
|
+
let specialist = stat["specialist"].as_str().unwrap_or("");
|
|
58
|
+
let domain = stat["domain"].as_str().unwrap_or("");
|
|
59
|
+
let requests = stat["requests"].as_u64().unwrap_or(0) as u32;
|
|
60
|
+
let avg_score = stat["avg_score"].as_f64().unwrap_or(0.0);
|
|
61
|
+
|
|
62
|
+
// If general specialist is handling many requests in a specific domain
|
|
63
|
+
// with below-threshold scores, propose a new specialist
|
|
64
|
+
if specialist == "general"
|
|
65
|
+
&& requests >= self.min_requests
|
|
66
|
+
&& avg_score < self.confidence_threshold as f64
|
|
67
|
+
&& !self.is_domain_covered(domain)
|
|
68
|
+
{
|
|
69
|
+
let capabilities = Self::infer_capabilities(domain);
|
|
70
|
+
let name = format!("{}_expert", domain.replace(' ', "_"));
|
|
71
|
+
|
|
72
|
+
proposals.push(SpawnProposal {
|
|
73
|
+
name,
|
|
74
|
+
domain: domain.to_string(),
|
|
75
|
+
capabilities,
|
|
76
|
+
reason: format!(
|
|
77
|
+
"General specialist handling {} requests in '{}' domain with avg score {:.1} (below threshold {:.1})",
|
|
78
|
+
requests, domain, avg_score, self.confidence_threshold
|
|
79
|
+
),
|
|
80
|
+
requests_in_domain: requests,
|
|
81
|
+
current_avg_score: avg_score,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Also analyze conversation patterns for undetected domains
|
|
87
|
+
if let Ok(top_pathways) = kg.top_pathways(20) {
|
|
88
|
+
for (pathway, strength, avg_score) in &top_pathways {
|
|
89
|
+
if *strength > 3 && *avg_score < self.confidence_threshold as f64 {
|
|
90
|
+
// This pathway is used often but scoring low
|
|
91
|
+
if !self.is_domain_covered(pathway) {
|
|
92
|
+
proposals.push(SpawnProposal {
|
|
93
|
+
name: format!("{}_expert", pathway.replace('+', "_")),
|
|
94
|
+
domain: pathway.clone(),
|
|
95
|
+
capabilities: vec![pathway.clone()],
|
|
96
|
+
reason: format!(
|
|
97
|
+
"Pathway '{}' reinforced {} times but avg score only {:.1}",
|
|
98
|
+
pathway, strength, avg_score
|
|
99
|
+
),
|
|
100
|
+
requests_in_domain: *strength as u32,
|
|
101
|
+
current_avg_score: *avg_score,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
Ok(proposals)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/// Convert a spawn proposal into a specialist config
|
|
112
|
+
pub fn create_specialist_config(proposal: &SpawnProposal) -> SpecialistConfig {
|
|
113
|
+
SpecialistConfig {
|
|
114
|
+
name: proposal.name.clone(),
|
|
115
|
+
capabilities: proposal.capabilities.clone(),
|
|
116
|
+
base_model: None, // Use default base model
|
|
117
|
+
adapter: None, // Will be trained
|
|
118
|
+
system_prompt: Some(format!(
|
|
119
|
+
"You are an expert in {}. Provide detailed, accurate answers in your domain of expertise.",
|
|
120
|
+
proposal.domain
|
|
121
|
+
)),
|
|
122
|
+
priority: 70, // Higher than general (50) but lower than existing experts
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
fn is_domain_covered(&self, domain: &str) -> bool {
|
|
127
|
+
self.covered_domains.iter().any(|d| {
|
|
128
|
+
d.to_lowercase().contains(&domain.to_lowercase())
|
|
129
|
+
|| domain.to_lowercase().contains(&d.to_lowercase())
|
|
130
|
+
})
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fn infer_capabilities(domain: &str) -> Vec<String> {
|
|
134
|
+
let domain_lower = domain.to_lowercase();
|
|
135
|
+
let mut caps = vec![domain.to_string()];
|
|
136
|
+
|
|
137
|
+
// Add related capabilities based on domain
|
|
138
|
+
let related: Vec<(&str, &[&str])> = vec![
|
|
139
|
+
("python", &["debugging", "testing", "django", "flask", "fastapi"]),
|
|
140
|
+
("javascript", &["react", "node", "typescript", "frontend"]),
|
|
141
|
+
("sql", &["database", "query", "postgres", "mysql"]),
|
|
142
|
+
("rust", &["systems", "memory", "concurrency", "cargo"]),
|
|
143
|
+
("math", &["algebra", "calculus", "statistics", "probability"]),
|
|
144
|
+
("science", &["physics", "chemistry", "biology"]),
|
|
145
|
+
("writing", &["grammar", "style", "creative", "editing"]),
|
|
146
|
+
("music", &["audio", "production", "mixing", "midi"]),
|
|
147
|
+
("business", &["finance", "marketing", "strategy", "management"]),
|
|
148
|
+
];
|
|
149
|
+
|
|
150
|
+
for (key, related_caps) in &related {
|
|
151
|
+
if domain_lower.contains(key) {
|
|
152
|
+
caps.extend(related_caps.iter().map(|s| s.to_string()));
|
|
153
|
+
break;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
caps
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
#[cfg(test)]
|
|
162
|
+
mod tests {
|
|
163
|
+
use super::*;
|
|
164
|
+
|
|
165
|
+
#[test]
|
|
166
|
+
fn test_infer_capabilities() {
|
|
167
|
+
let caps = SpecialistSpawner::infer_capabilities("python");
|
|
168
|
+
assert!(caps.contains(&"python".to_string()));
|
|
169
|
+
assert!(caps.contains(&"debugging".to_string()));
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
#[test]
|
|
173
|
+
fn test_is_domain_covered() {
|
|
174
|
+
let spawner = SpecialistSpawner::new(vec![
|
|
175
|
+
"python_expert".into(),
|
|
176
|
+
"sql_expert".into(),
|
|
177
|
+
]);
|
|
178
|
+
assert!(spawner.is_domain_covered("python"));
|
|
179
|
+
assert!(spawner.is_domain_covered("sql"));
|
|
180
|
+
assert!(!spawner.is_domain_covered("music"));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
#[test]
|
|
184
|
+
fn test_create_specialist_config() {
|
|
185
|
+
let proposal = SpawnProposal {
|
|
186
|
+
name: "music_expert".into(),
|
|
187
|
+
domain: "music production".into(),
|
|
188
|
+
capabilities: vec!["music".into(), "audio".into()],
|
|
189
|
+
reason: "test".into(),
|
|
190
|
+
requests_in_domain: 10,
|
|
191
|
+
current_avg_score: 2.5,
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
let config = SpecialistSpawner::create_specialist_config(&proposal);
|
|
195
|
+
assert_eq!(config.name, "music_expert");
|
|
196
|
+
assert_eq!(config.priority, 70);
|
|
197
|
+
assert!(config.system_prompt.unwrap().contains("music production"));
|
|
198
|
+
}
|
|
199
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/// Merges outputs from multiple specialists into a coherent response
|
|
2
|
+
pub struct Synthesizer;
|
|
3
|
+
|
|
4
|
+
impl Synthesizer {
|
|
5
|
+
pub fn new() -> Self { Self }
|
|
6
|
+
|
|
7
|
+
/// Merge multiple specialist responses into one
|
|
8
|
+
pub fn merge(&self, results: &[(String, String)]) -> anyhow::Result<String> {
|
|
9
|
+
if results.is_empty() {
|
|
10
|
+
return Ok("No specialist responses to merge.".into());
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if results.len() == 1 {
|
|
14
|
+
return Ok(results[0].1.clone());
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// For now, concatenate with specialist attribution
|
|
18
|
+
// In production, this will use the coordinator model to synthesize
|
|
19
|
+
let mut output = String::new();
|
|
20
|
+
for (specialist, response) in results {
|
|
21
|
+
output.push_str(&format!("**[{specialist}]**\n{response}\n\n"));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
Ok(output.trim().to_string())
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use serde::Serialize;
|
|
3
|
+
|
|
4
|
+
#[derive(Debug, Serialize)]
|
|
5
|
+
pub struct GpuInfo {
|
|
6
|
+
pub name: String,
|
|
7
|
+
pub vram_total_mb: u64,
|
|
8
|
+
pub vram_used_mb: u64,
|
|
9
|
+
pub vram_free_mb: u64,
|
|
10
|
+
pub utilization_percent: f32,
|
|
11
|
+
pub temperature_c: Option<u32>,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
pub struct VramManager {
|
|
15
|
+
pub budget_mb: u64,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
impl VramManager {
|
|
19
|
+
pub fn new(max_vram_mb: u64) -> Self {
|
|
20
|
+
Self {
|
|
21
|
+
budget_mb: if max_vram_mb > 0 { max_vram_mb } else { 32768 }, // Default 32GB
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/// Get GPU info via nvidia-smi (works on both local and remote)
|
|
26
|
+
pub async fn gpu_info() -> Result<GpuInfo> {
|
|
27
|
+
// Try nvidia-smi first
|
|
28
|
+
let output = tokio::process::Command::new("nvidia-smi")
|
|
29
|
+
.args(["--query-gpu=name,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu", "--format=csv,noheader,nounits"])
|
|
30
|
+
.output()
|
|
31
|
+
.await;
|
|
32
|
+
|
|
33
|
+
match output {
|
|
34
|
+
Ok(out) if out.status.success() => {
|
|
35
|
+
let stdout = String::from_utf8_lossy(&out.stdout);
|
|
36
|
+
let parts: Vec<&str> = stdout.trim().split(", ").collect();
|
|
37
|
+
if parts.len() >= 6 {
|
|
38
|
+
return Ok(GpuInfo {
|
|
39
|
+
name: parts[0].to_string(),
|
|
40
|
+
vram_total_mb: parts[1].parse().unwrap_or(0),
|
|
41
|
+
vram_used_mb: parts[2].parse().unwrap_or(0),
|
|
42
|
+
vram_free_mb: parts[3].parse().unwrap_or(0),
|
|
43
|
+
utilization_percent: parts[4].parse().unwrap_or(0.0),
|
|
44
|
+
temperature_c: parts[5].parse().ok(),
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
_ => {}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// No GPU available
|
|
52
|
+
Ok(GpuInfo {
|
|
53
|
+
name: "No GPU detected".into(),
|
|
54
|
+
vram_total_mb: 0,
|
|
55
|
+
vram_used_mb: 0,
|
|
56
|
+
vram_free_mb: 0,
|
|
57
|
+
utilization_percent: 0.0,
|
|
58
|
+
temperature_c: None,
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/// Calculate how much VRAM is available for Synapse
|
|
63
|
+
pub async fn available_vram(&self) -> Result<u64> {
|
|
64
|
+
let info = Self::gpu_info().await?;
|
|
65
|
+
Ok(info.vram_free_mb.min(self.budget_mb))
|
|
66
|
+
}
|
|
67
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
services:
|
|
2
|
+
synapse-learn:
|
|
3
|
+
build:
|
|
4
|
+
context: ./python
|
|
5
|
+
dockerfile: Dockerfile.learn
|
|
6
|
+
ports:
|
|
7
|
+
- "8090:8090"
|
|
8
|
+
volumes:
|
|
9
|
+
- ~/.synapse:/root/.synapse
|
|
10
|
+
environment:
|
|
11
|
+
- SYNAPSE_DATA_DIR=/root/.synapse
|
|
12
|
+
deploy:
|
|
13
|
+
resources:
|
|
14
|
+
reservations:
|
|
15
|
+
devices:
|
|
16
|
+
- driver: nvidia
|
|
17
|
+
count: 1
|
|
18
|
+
capabilities: [gpu]
|
|
19
|
+
restart: unless-stopped
|