titan-synapse 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CONTRIBUTING.md +187 -0
  2. package/Cargo.lock +3976 -0
  3. package/Cargo.toml +10 -0
  4. package/LICENSE +190 -0
  5. package/PROGRESS.md +151 -0
  6. package/README.md +514 -0
  7. package/TEST_LOG.md +220 -0
  8. package/config/default.yaml +36 -0
  9. package/crates/synapse/Cargo.toml +70 -0
  10. package/crates/synapse/src/cli/bench.rs +44 -0
  11. package/crates/synapse/src/cli/eval.rs +395 -0
  12. package/crates/synapse/src/cli/export.rs +45 -0
  13. package/crates/synapse/src/cli/hub.rs +179 -0
  14. package/crates/synapse/src/cli/import.rs +35 -0
  15. package/crates/synapse/src/cli/learn.rs +53 -0
  16. package/crates/synapse/src/cli/mod.rs +10 -0
  17. package/crates/synapse/src/cli/models.rs +36 -0
  18. package/crates/synapse/src/cli/pull.rs +60 -0
  19. package/crates/synapse/src/cli/status.rs +52 -0
  20. package/crates/synapse/src/cli/train.rs +99 -0
  21. package/crates/synapse/src/config.rs +220 -0
  22. package/crates/synapse/src/dashboard.rs +281 -0
  23. package/crates/synapse/src/format/manifest.rs +57 -0
  24. package/crates/synapse/src/format/mod.rs +4 -0
  25. package/crates/synapse/src/format/packer.rs +213 -0
  26. package/crates/synapse/src/inference/engine.rs +361 -0
  27. package/crates/synapse/src/inference/kv_cache.rs +97 -0
  28. package/crates/synapse/src/inference/lora.rs +166 -0
  29. package/crates/synapse/src/inference/mod.rs +9 -0
  30. package/crates/synapse/src/inference/model.rs +167 -0
  31. package/crates/synapse/src/inference/sampler.rs +133 -0
  32. package/crates/synapse/src/inference/speculative.rs +153 -0
  33. package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
  34. package/crates/synapse/src/learn/engine.rs +109 -0
  35. package/crates/synapse/src/learn/mod.rs +5 -0
  36. package/crates/synapse/src/main.rs +185 -0
  37. package/crates/synapse/src/memory/extractor.rs +201 -0
  38. package/crates/synapse/src/memory/graph.rs +332 -0
  39. package/crates/synapse/src/memory/hallucination.rs +259 -0
  40. package/crates/synapse/src/memory/mod.rs +7 -0
  41. package/crates/synapse/src/openai.rs +232 -0
  42. package/crates/synapse/src/server.rs +166 -0
  43. package/crates/synapse/src/streaming.rs +80 -0
  44. package/crates/synapse/src/swarm/coordinator.rs +198 -0
  45. package/crates/synapse/src/swarm/mod.rs +8 -0
  46. package/crates/synapse/src/swarm/orchestrator.rs +225 -0
  47. package/crates/synapse/src/swarm/pool.rs +64 -0
  48. package/crates/synapse/src/swarm/spawner.rs +199 -0
  49. package/crates/synapse/src/swarm/synthesizer.rs +26 -0
  50. package/crates/synapse/src/vram/manager.rs +67 -0
  51. package/crates/synapse/src/vram/mod.rs +3 -0
  52. package/docker-compose.yml +19 -0
  53. package/install.sh +311 -0
  54. package/package.json +36 -0
  55. package/python/Dockerfile.learn +18 -0
  56. package/python/requirements.txt +11 -0
  57. package/python/synapse_learn/__init__.py +0 -0
  58. package/python/synapse_learn/datasets.py +233 -0
  59. package/python/synapse_learn/real_eval.py +616 -0
  60. package/python/synapse_learn/server.py +431 -0
  61. package/python/synapse_learn/train_base.py +672 -0
  62. package/python/synapse_learn/train_specialists.py +787 -0
@@ -0,0 +1,225 @@
1
+ use anyhow::Result;
2
+ use futures::future::join_all;
3
+ use crate::config::SynapseConfig;
4
+ use crate::inference::{InferenceEngine, GenerationResult};
5
+ use crate::learn::CloudFallback;
6
+ use crate::memory::KnowledgeGraph;
7
+ use crate::openai::Message;
8
+ use super::coordinator::Coordinator;
9
+ use super::synthesizer::Synthesizer;
10
+
11
+ /// Top-level swarm orchestrator — decides single vs multi-specialist routing
12
+ /// Uses Hebbian routing and parallel specialist execution for swarm mode
13
+ /// Cloud fallback: when confidence is low, routes to cloud and learns from the response
14
+ pub struct Orchestrator {
15
+ coordinator: Coordinator,
16
+ synthesizer: Synthesizer,
17
+ cloud_fallback: Option<CloudFallback>,
18
+ }
19
+
20
+ impl Orchestrator {
21
+ pub fn new(config: &SynapseConfig) -> Self {
22
+ Self {
23
+ coordinator: Coordinator::new(config),
24
+ synthesizer: Synthesizer::new(),
25
+ cloud_fallback: CloudFallback::new(&config.cloud),
26
+ }
27
+ }
28
+
29
+ /// Build context from full message history (not just last message)
30
+ fn build_context(messages: &[Message]) -> String {
31
+ if messages.len() <= 1 {
32
+ return messages.last().map(|m| m.content.clone()).unwrap_or_default();
33
+ }
34
+
35
+ // Include recent conversation context (last 4 messages max)
36
+ let recent: Vec<&Message> = messages.iter().rev().take(4).collect::<Vec<_>>().into_iter().rev().collect();
37
+ let mut context = String::new();
38
+ for msg in &recent[..recent.len().saturating_sub(1)] {
39
+ context.push_str(&format!("[{}]: {}\n", msg.role, msg.content));
40
+ }
41
+ // Last message is the actual query
42
+ if let Some(last) = recent.last() {
43
+ context.push_str(&last.content);
44
+ }
45
+ context
46
+ }
47
+
48
+ /// Process a chat request — route to specialist(s) and return response
49
+ pub async fn process(
50
+ &self,
51
+ messages: &[Message],
52
+ engine: &InferenceEngine,
53
+ max_tokens: Option<u32>,
54
+ temperature: Option<f32>,
55
+ knowledge: Option<&KnowledgeGraph>,
56
+ ) -> Result<GenerationResult> {
57
+ let last_message = messages.last()
58
+ .map(|m| m.content.as_str())
59
+ .unwrap_or("");
60
+
61
+ let context = Self::build_context(messages);
62
+ let max_tokens = max_tokens.unwrap_or(2048);
63
+ let temperature = temperature.unwrap_or(0.7);
64
+
65
+ let routing = self.coordinator.route(last_message, knowledge);
66
+
67
+ match routing {
68
+ RoutingDecision::Single { specialist, confidence } => {
69
+ tracing::info!("Routing to specialist: {specialist} (confidence: {confidence:.2})");
70
+
71
+ // Cloud fallback: if confidence is too low and cloud is available,
72
+ // generate locally first, then ask cloud and learn from the difference
73
+ let cloud_threshold = CloudFallback::confidence_threshold();
74
+ if confidence < cloud_threshold {
75
+ if let Some(ref fallback) = self.cloud_fallback {
76
+ tracing::info!(
77
+ "⚡ Low confidence ({confidence:.2} < {cloud_threshold:.2}) — trying cloud fallback"
78
+ );
79
+
80
+ // Try local generation first (we still want the local attempt for DPO)
81
+ let local_result = engine.generate(&context, Some(&specialist), max_tokens, temperature).await;
82
+ let local_text = local_result.as_ref().ok().map(|r| r.text.as_str());
83
+
84
+ // Ask cloud for the better answer
85
+ if let Some(kg) = knowledge {
86
+ match fallback.fallback(last_message, &specialist, local_text, kg).await {
87
+ Ok(cloud_result) => {
88
+ tracing::info!(
89
+ "☁️ Cloud fallback used {}, learned={}",
90
+ cloud_result.model_used, cloud_result.learned
91
+ );
92
+ // Return the cloud's better response
93
+ return Ok(GenerationResult {
94
+ text: cloud_result.text,
95
+ prompt_tokens: 0,
96
+ completion_tokens: 0,
97
+ total_tokens: 0,
98
+ tok_per_sec: 0.0,
99
+ duration_ms: 0,
100
+ });
101
+ }
102
+ Err(e) => {
103
+ tracing::warn!("Cloud fallback failed: {e}, using local response");
104
+ // Fall through to local response
105
+ }
106
+ }
107
+ }
108
+
109
+ // Cloud failed, return local result if we have one
110
+ if let Ok(result) = local_result {
111
+ return Ok(result);
112
+ }
113
+ }
114
+ }
115
+
116
+ let result = engine.generate(&context, Some(&specialist), max_tokens, temperature).await?;
117
+
118
+ // Reinforce the pathway on successful generation
119
+ if let Some(kg) = knowledge {
120
+ let _ = kg.reinforce_pathway(&[specialist.clone()], confidence);
121
+ let _ = kg.update_specialist_stats(
122
+ &specialist, "general", confidence, result.tok_per_sec,
123
+ );
124
+ }
125
+
126
+ Ok(result)
127
+ }
128
+ RoutingDecision::Swarm { subtasks } => {
129
+ tracing::info!("⚡ Swarm mode: {} subtasks (PARALLEL)", subtasks.len());
130
+ let start = std::time::Instant::now();
131
+ let tokens_per_task = max_tokens / subtasks.len() as u32;
132
+
133
+ // Execute ALL subtasks in parallel
134
+ let futures: Vec<_> = subtasks.iter().map(|task| {
135
+ let prompt = format!("Task: {}\n\nContext: {context}", task.description);
136
+ let specialist = task.specialist.clone();
137
+ async move {
138
+ let result = engine.generate(
139
+ &prompt,
140
+ Some(&specialist),
141
+ tokens_per_task,
142
+ temperature,
143
+ ).await;
144
+ (specialist, result)
145
+ }
146
+ }).collect();
147
+
148
+ let results = join_all(futures).await;
149
+
150
+ let mut texts = Vec::new();
151
+ let mut total_prompt = 0u32;
152
+ let mut total_completion = 0u32;
153
+ let mut specialists_used = Vec::new();
154
+
155
+ for (specialist, result) in results {
156
+ match result {
157
+ Ok(gen_result) => {
158
+ total_prompt += gen_result.prompt_tokens;
159
+ total_completion += gen_result.completion_tokens;
160
+ specialists_used.push(specialist.clone());
161
+ texts.push((specialist, gen_result.text));
162
+ }
163
+ Err(e) => {
164
+ tracing::warn!("Specialist {specialist} failed: {e}");
165
+ // Continue with other specialists — graceful degradation
166
+ }
167
+ }
168
+ }
169
+
170
+ if texts.is_empty() {
171
+ return Err(anyhow::anyhow!("All specialists failed in swarm mode"));
172
+ }
173
+
174
+ let elapsed = start.elapsed();
175
+ let merged = self.synthesizer.merge(&texts)?;
176
+
177
+ // Reinforce the swarm pathway
178
+ if let Some(kg) = knowledge {
179
+ let _ = kg.reinforce_pathway(&specialists_used, 4.0);
180
+ }
181
+
182
+ let tok_per_sec = if elapsed.as_secs_f64() > 0.0 {
183
+ total_completion as f64 / elapsed.as_secs_f64()
184
+ } else {
185
+ 0.0
186
+ };
187
+
188
+ tracing::info!(
189
+ "⚡ Swarm complete: {} specialists, {} tokens in {:.1}s ({:.1} tok/s)",
190
+ specialists_used.len(), total_completion, elapsed.as_secs_f64(), tok_per_sec
191
+ );
192
+
193
+ Ok(GenerationResult {
194
+ text: merged,
195
+ prompt_tokens: total_prompt,
196
+ completion_tokens: total_completion,
197
+ total_tokens: total_prompt + total_completion,
198
+ tok_per_sec,
199
+ duration_ms: elapsed.as_millis() as u64,
200
+ })
201
+ }
202
+ }
203
+ }
204
+ }
205
+
206
+ pub enum RoutingDecision {
207
+ Single { specialist: String, confidence: f32 },
208
+ Swarm { subtasks: Vec<SubTask> },
209
+ }
210
+
211
+ pub struct SubTask {
212
+ pub specialist: String,
213
+ pub description: String,
214
+ }
215
+
216
+ impl std::fmt::Display for RoutingDecision {
217
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
218
+ match self {
219
+ RoutingDecision::Single { specialist, confidence } => write!(f, "Single({specialist}, confidence={confidence:.2})"),
220
+ RoutingDecision::Swarm { subtasks } => {
221
+ write!(f, "Swarm({})", subtasks.iter().map(|t| t.specialist.as_str()).collect::<Vec<_>>().join(", "))
222
+ }
223
+ }
224
+ }
225
+ }
@@ -0,0 +1,64 @@
1
+ use std::collections::HashMap;
2
+
3
+ /// Manages the pool of active specialists and their LoRA adapters
4
+ pub struct SpecialistPool {
5
+ /// Currently loaded specialists
6
+ loaded: HashMap<String, SpecialistState>,
7
+ /// Max specialists to keep loaded
8
+ max_loaded: usize,
9
+ }
10
+
11
+ pub struct SpecialistState {
12
+ pub name: String,
13
+ pub adapter_loaded: bool,
14
+ pub last_used: std::time::Instant,
15
+ pub request_count: u64,
16
+ }
17
+
18
+ impl SpecialistPool {
19
+ pub fn new(max_loaded: usize) -> Self {
20
+ Self {
21
+ loaded: HashMap::new(),
22
+ max_loaded,
23
+ }
24
+ }
25
+
26
+ /// Ensure a specialist is loaded, evicting LRU if necessary
27
+ pub fn ensure_loaded(&mut self, name: &str) -> bool {
28
+ if self.loaded.contains_key(name) {
29
+ if let Some(state) = self.loaded.get_mut(name) {
30
+ state.last_used = std::time::Instant::now();
31
+ state.request_count += 1;
32
+ }
33
+ return true;
34
+ }
35
+
36
+ // Need to load — evict LRU if at capacity
37
+ if self.loaded.len() >= self.max_loaded {
38
+ self.evict_lru();
39
+ }
40
+
41
+ self.loaded.insert(name.to_string(), SpecialistState {
42
+ name: name.to_string(),
43
+ adapter_loaded: false,
44
+ last_used: std::time::Instant::now(),
45
+ request_count: 1,
46
+ });
47
+
48
+ false // Was not loaded, needs adapter swap
49
+ }
50
+
51
+ fn evict_lru(&mut self) {
52
+ if let Some(oldest) = self.loaded.values()
53
+ .min_by_key(|s| s.last_used)
54
+ .map(|s| s.name.clone())
55
+ {
56
+ tracing::info!("Evicting LRU specialist: {oldest}");
57
+ self.loaded.remove(&oldest);
58
+ }
59
+ }
60
+
61
+ pub fn loaded_count(&self) -> usize {
62
+ self.loaded.len()
63
+ }
64
+ }
@@ -0,0 +1,199 @@
1
+ use anyhow::Result;
2
+ use crate::memory::KnowledgeGraph;
3
+ use crate::config::SpecialistConfig;
4
+
5
+ /// Specialist Auto-Spawner — the system that creates new brain regions.
6
+ ///
7
+ /// When the coordinator repeatedly routes to "general" for a domain,
8
+ /// and the confidence is low, the spawner:
9
+ /// 1. Detects the pattern ("many Python questions going to general")
10
+ /// 2. Creates a new specialist config for that domain
11
+ /// 3. Queues training data collection
12
+ /// 4. Once enough data: triggers QLoRA training
13
+ /// 5. New specialist joins the swarm automatically
14
+ ///
15
+ /// The system literally grows new specialists as needed.
16
+ /// A music producer will end up with audio_expert, midi_expert, mixing_expert.
17
+ /// A data scientist will get pandas_expert, sklearn_expert, visualization_expert.
18
+ /// No configuration needed. The system figures it out.
19
+ pub struct SpecialistSpawner {
20
+ /// Minimum requests in a domain before considering spawning
21
+ min_requests: u32,
22
+ /// Maximum confidence score that triggers spawning (below this = specialist needed)
23
+ confidence_threshold: f32,
24
+ /// Domains that already have specialists (don't spawn duplicates)
25
+ covered_domains: Vec<String>,
26
+ }
27
+
28
+ /// A proposal for a new specialist
29
+ #[derive(Debug, Clone)]
30
+ pub struct SpawnProposal {
31
+ pub name: String,
32
+ pub domain: String,
33
+ pub capabilities: Vec<String>,
34
+ pub reason: String,
35
+ pub requests_in_domain: u32,
36
+ pub current_avg_score: f64,
37
+ }
38
+
39
+ impl SpecialistSpawner {
40
+ pub fn new(covered_domains: Vec<String>) -> Self {
41
+ Self {
42
+ min_requests: 5,
43
+ confidence_threshold: 3.0,
44
+ covered_domains,
45
+ }
46
+ }
47
+
48
+ /// Analyze the knowledge graph for domains that need specialists
49
+ pub fn detect_spawn_candidates(&self, kg: &KnowledgeGraph) -> Result<Vec<SpawnProposal>> {
50
+ let mut proposals = Vec::new();
51
+
52
+ // Get all specialist stats
53
+ let stats = kg.specialist_confidence_report().unwrap_or_default();
54
+
55
+ // Look for domains where "general" is handling too many requests with low scores
56
+ for stat in &stats {
57
+ let specialist = stat["specialist"].as_str().unwrap_or("");
58
+ let domain = stat["domain"].as_str().unwrap_or("");
59
+ let requests = stat["requests"].as_u64().unwrap_or(0) as u32;
60
+ let avg_score = stat["avg_score"].as_f64().unwrap_or(0.0);
61
+
62
+ // If general specialist is handling many requests in a specific domain
63
+ // with below-threshold scores, propose a new specialist
64
+ if specialist == "general"
65
+ && requests >= self.min_requests
66
+ && avg_score < self.confidence_threshold as f64
67
+ && !self.is_domain_covered(domain)
68
+ {
69
+ let capabilities = Self::infer_capabilities(domain);
70
+ let name = format!("{}_expert", domain.replace(' ', "_"));
71
+
72
+ proposals.push(SpawnProposal {
73
+ name,
74
+ domain: domain.to_string(),
75
+ capabilities,
76
+ reason: format!(
77
+ "General specialist handling {} requests in '{}' domain with avg score {:.1} (below threshold {:.1})",
78
+ requests, domain, avg_score, self.confidence_threshold
79
+ ),
80
+ requests_in_domain: requests,
81
+ current_avg_score: avg_score,
82
+ });
83
+ }
84
+ }
85
+
86
+ // Also analyze conversation patterns for undetected domains
87
+ if let Ok(top_pathways) = kg.top_pathways(20) {
88
+ for (pathway, strength, avg_score) in &top_pathways {
89
+ if *strength > 3 && *avg_score < self.confidence_threshold as f64 {
90
+ // This pathway is used often but scoring low
91
+ if !self.is_domain_covered(pathway) {
92
+ proposals.push(SpawnProposal {
93
+ name: format!("{}_expert", pathway.replace('+', "_")),
94
+ domain: pathway.clone(),
95
+ capabilities: vec![pathway.clone()],
96
+ reason: format!(
97
+ "Pathway '{}' reinforced {} times but avg score only {:.1}",
98
+ pathway, strength, avg_score
99
+ ),
100
+ requests_in_domain: *strength as u32,
101
+ current_avg_score: *avg_score,
102
+ });
103
+ }
104
+ }
105
+ }
106
+ }
107
+
108
+ Ok(proposals)
109
+ }
110
+
111
+ /// Convert a spawn proposal into a specialist config
112
+ pub fn create_specialist_config(proposal: &SpawnProposal) -> SpecialistConfig {
113
+ SpecialistConfig {
114
+ name: proposal.name.clone(),
115
+ capabilities: proposal.capabilities.clone(),
116
+ base_model: None, // Use default base model
117
+ adapter: None, // Will be trained
118
+ system_prompt: Some(format!(
119
+ "You are an expert in {}. Provide detailed, accurate answers in your domain of expertise.",
120
+ proposal.domain
121
+ )),
122
+ priority: 70, // Higher than general (50) but lower than existing experts
123
+ }
124
+ }
125
+
126
+ fn is_domain_covered(&self, domain: &str) -> bool {
127
+ self.covered_domains.iter().any(|d| {
128
+ d.to_lowercase().contains(&domain.to_lowercase())
129
+ || domain.to_lowercase().contains(&d.to_lowercase())
130
+ })
131
+ }
132
+
133
+ fn infer_capabilities(domain: &str) -> Vec<String> {
134
+ let domain_lower = domain.to_lowercase();
135
+ let mut caps = vec![domain.to_string()];
136
+
137
+ // Add related capabilities based on domain
138
+ let related: Vec<(&str, &[&str])> = vec![
139
+ ("python", &["debugging", "testing", "django", "flask", "fastapi"]),
140
+ ("javascript", &["react", "node", "typescript", "frontend"]),
141
+ ("sql", &["database", "query", "postgres", "mysql"]),
142
+ ("rust", &["systems", "memory", "concurrency", "cargo"]),
143
+ ("math", &["algebra", "calculus", "statistics", "probability"]),
144
+ ("science", &["physics", "chemistry", "biology"]),
145
+ ("writing", &["grammar", "style", "creative", "editing"]),
146
+ ("music", &["audio", "production", "mixing", "midi"]),
147
+ ("business", &["finance", "marketing", "strategy", "management"]),
148
+ ];
149
+
150
+ for (key, related_caps) in &related {
151
+ if domain_lower.contains(key) {
152
+ caps.extend(related_caps.iter().map(|s| s.to_string()));
153
+ break;
154
+ }
155
+ }
156
+
157
+ caps
158
+ }
159
+ }
160
+
161
+ #[cfg(test)]
162
+ mod tests {
163
+ use super::*;
164
+
165
+ #[test]
166
+ fn test_infer_capabilities() {
167
+ let caps = SpecialistSpawner::infer_capabilities("python");
168
+ assert!(caps.contains(&"python".to_string()));
169
+ assert!(caps.contains(&"debugging".to_string()));
170
+ }
171
+
172
+ #[test]
173
+ fn test_is_domain_covered() {
174
+ let spawner = SpecialistSpawner::new(vec![
175
+ "python_expert".into(),
176
+ "sql_expert".into(),
177
+ ]);
178
+ assert!(spawner.is_domain_covered("python"));
179
+ assert!(spawner.is_domain_covered("sql"));
180
+ assert!(!spawner.is_domain_covered("music"));
181
+ }
182
+
183
+ #[test]
184
+ fn test_create_specialist_config() {
185
+ let proposal = SpawnProposal {
186
+ name: "music_expert".into(),
187
+ domain: "music production".into(),
188
+ capabilities: vec!["music".into(), "audio".into()],
189
+ reason: "test".into(),
190
+ requests_in_domain: 10,
191
+ current_avg_score: 2.5,
192
+ };
193
+
194
+ let config = SpecialistSpawner::create_specialist_config(&proposal);
195
+ assert_eq!(config.name, "music_expert");
196
+ assert_eq!(config.priority, 70);
197
+ assert!(config.system_prompt.unwrap().contains("music production"));
198
+ }
199
+ }
@@ -0,0 +1,26 @@
1
+ /// Merges outputs from multiple specialists into a coherent response
2
+ pub struct Synthesizer;
3
+
4
+ impl Synthesizer {
5
+ pub fn new() -> Self { Self }
6
+
7
+ /// Merge multiple specialist responses into one
8
+ pub fn merge(&self, results: &[(String, String)]) -> anyhow::Result<String> {
9
+ if results.is_empty() {
10
+ return Ok("No specialist responses to merge.".into());
11
+ }
12
+
13
+ if results.len() == 1 {
14
+ return Ok(results[0].1.clone());
15
+ }
16
+
17
+ // For now, concatenate with specialist attribution
18
+ // In production, this will use the coordinator model to synthesize
19
+ let mut output = String::new();
20
+ for (specialist, response) in results {
21
+ output.push_str(&format!("**[{specialist}]**\n{response}\n\n"));
22
+ }
23
+
24
+ Ok(output.trim().to_string())
25
+ }
26
+ }
@@ -0,0 +1,67 @@
1
+ use anyhow::Result;
2
+ use serde::Serialize;
3
+
4
+ #[derive(Debug, Serialize)]
5
+ pub struct GpuInfo {
6
+ pub name: String,
7
+ pub vram_total_mb: u64,
8
+ pub vram_used_mb: u64,
9
+ pub vram_free_mb: u64,
10
+ pub utilization_percent: f32,
11
+ pub temperature_c: Option<u32>,
12
+ }
13
+
14
+ pub struct VramManager {
15
+ pub budget_mb: u64,
16
+ }
17
+
18
+ impl VramManager {
19
+ pub fn new(max_vram_mb: u64) -> Self {
20
+ Self {
21
+ budget_mb: if max_vram_mb > 0 { max_vram_mb } else { 32768 }, // Default 32GB
22
+ }
23
+ }
24
+
25
+ /// Get GPU info via nvidia-smi (works on both local and remote)
26
+ pub async fn gpu_info() -> Result<GpuInfo> {
27
+ // Try nvidia-smi first
28
+ let output = tokio::process::Command::new("nvidia-smi")
29
+ .args(["--query-gpu=name,memory.total,memory.used,memory.free,utilization.gpu,temperature.gpu", "--format=csv,noheader,nounits"])
30
+ .output()
31
+ .await;
32
+
33
+ match output {
34
+ Ok(out) if out.status.success() => {
35
+ let stdout = String::from_utf8_lossy(&out.stdout);
36
+ let parts: Vec<&str> = stdout.trim().split(", ").collect();
37
+ if parts.len() >= 6 {
38
+ return Ok(GpuInfo {
39
+ name: parts[0].to_string(),
40
+ vram_total_mb: parts[1].parse().unwrap_or(0),
41
+ vram_used_mb: parts[2].parse().unwrap_or(0),
42
+ vram_free_mb: parts[3].parse().unwrap_or(0),
43
+ utilization_percent: parts[4].parse().unwrap_or(0.0),
44
+ temperature_c: parts[5].parse().ok(),
45
+ });
46
+ }
47
+ }
48
+ _ => {}
49
+ }
50
+
51
+ // No GPU available
52
+ Ok(GpuInfo {
53
+ name: "No GPU detected".into(),
54
+ vram_total_mb: 0,
55
+ vram_used_mb: 0,
56
+ vram_free_mb: 0,
57
+ utilization_percent: 0.0,
58
+ temperature_c: None,
59
+ })
60
+ }
61
+
62
+ /// Calculate how much VRAM is available for Synapse
63
+ pub async fn available_vram(&self) -> Result<u64> {
64
+ let info = Self::gpu_info().await?;
65
+ Ok(info.vram_free_mb.min(self.budget_mb))
66
+ }
67
+ }
@@ -0,0 +1,3 @@
1
+ pub mod manager;
2
+
3
+ pub use manager::VramManager;
@@ -0,0 +1,19 @@
1
+ services:
2
+ synapse-learn:
3
+ build:
4
+ context: ./python
5
+ dockerfile: Dockerfile.learn
6
+ ports:
7
+ - "8090:8090"
8
+ volumes:
9
+ - ~/.synapse:/root/.synapse
10
+ environment:
11
+ - SYNAPSE_DATA_DIR=/root/.synapse
12
+ deploy:
13
+ resources:
14
+ reservations:
15
+ devices:
16
+ - driver: nvidia
17
+ count: 1
18
+ capabilities: [gpu]
19
+ restart: unless-stopped