titan-synapse 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +187 -0
- package/Cargo.lock +3976 -0
- package/Cargo.toml +10 -0
- package/LICENSE +190 -0
- package/PROGRESS.md +151 -0
- package/README.md +514 -0
- package/TEST_LOG.md +220 -0
- package/config/default.yaml +36 -0
- package/crates/synapse/Cargo.toml +70 -0
- package/crates/synapse/src/cli/bench.rs +44 -0
- package/crates/synapse/src/cli/eval.rs +395 -0
- package/crates/synapse/src/cli/export.rs +45 -0
- package/crates/synapse/src/cli/hub.rs +179 -0
- package/crates/synapse/src/cli/import.rs +35 -0
- package/crates/synapse/src/cli/learn.rs +53 -0
- package/crates/synapse/src/cli/mod.rs +10 -0
- package/crates/synapse/src/cli/models.rs +36 -0
- package/crates/synapse/src/cli/pull.rs +60 -0
- package/crates/synapse/src/cli/status.rs +52 -0
- package/crates/synapse/src/cli/train.rs +99 -0
- package/crates/synapse/src/config.rs +220 -0
- package/crates/synapse/src/dashboard.rs +281 -0
- package/crates/synapse/src/format/manifest.rs +57 -0
- package/crates/synapse/src/format/mod.rs +4 -0
- package/crates/synapse/src/format/packer.rs +213 -0
- package/crates/synapse/src/inference/engine.rs +361 -0
- package/crates/synapse/src/inference/kv_cache.rs +97 -0
- package/crates/synapse/src/inference/lora.rs +166 -0
- package/crates/synapse/src/inference/mod.rs +9 -0
- package/crates/synapse/src/inference/model.rs +167 -0
- package/crates/synapse/src/inference/sampler.rs +133 -0
- package/crates/synapse/src/inference/speculative.rs +153 -0
- package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
- package/crates/synapse/src/learn/engine.rs +109 -0
- package/crates/synapse/src/learn/mod.rs +5 -0
- package/crates/synapse/src/main.rs +185 -0
- package/crates/synapse/src/memory/extractor.rs +201 -0
- package/crates/synapse/src/memory/graph.rs +332 -0
- package/crates/synapse/src/memory/hallucination.rs +259 -0
- package/crates/synapse/src/memory/mod.rs +7 -0
- package/crates/synapse/src/openai.rs +232 -0
- package/crates/synapse/src/server.rs +166 -0
- package/crates/synapse/src/streaming.rs +80 -0
- package/crates/synapse/src/swarm/coordinator.rs +198 -0
- package/crates/synapse/src/swarm/mod.rs +8 -0
- package/crates/synapse/src/swarm/orchestrator.rs +225 -0
- package/crates/synapse/src/swarm/pool.rs +64 -0
- package/crates/synapse/src/swarm/spawner.rs +199 -0
- package/crates/synapse/src/swarm/synthesizer.rs +26 -0
- package/crates/synapse/src/vram/manager.rs +67 -0
- package/crates/synapse/src/vram/mod.rs +3 -0
- package/docker-compose.yml +19 -0
- package/install.sh +311 -0
- package/package.json +36 -0
- package/python/Dockerfile.learn +18 -0
- package/python/requirements.txt +11 -0
- package/python/synapse_learn/__init__.py +0 -0
- package/python/synapse_learn/datasets.py +233 -0
- package/python/synapse_learn/real_eval.py +616 -0
- package/python/synapse_learn/server.py +431 -0
- package/python/synapse_learn/train_base.py +672 -0
- package/python/synapse_learn/train_specialists.py +787 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
use axum::{
|
|
2
|
+
Json,
|
|
3
|
+
extract::State,
|
|
4
|
+
response::{IntoResponse, Response},
|
|
5
|
+
};
|
|
6
|
+
use serde::{Deserialize, Serialize};
|
|
7
|
+
|
|
8
|
+
use crate::server::SharedState;
|
|
9
|
+
use crate::streaming;
|
|
10
|
+
use crate::memory::{KnowledgeExtractor, HallucinationDetector};
|
|
11
|
+
|
|
12
|
+
#[derive(Debug, Deserialize)]
|
|
13
|
+
pub struct ChatCompletionRequest {
|
|
14
|
+
pub model: Option<String>,
|
|
15
|
+
pub messages: Vec<Message>,
|
|
16
|
+
#[serde(default)]
|
|
17
|
+
pub temperature: Option<f32>,
|
|
18
|
+
#[serde(default)]
|
|
19
|
+
pub top_p: Option<f32>,
|
|
20
|
+
#[serde(default)]
|
|
21
|
+
pub max_tokens: Option<u32>,
|
|
22
|
+
#[serde(default)]
|
|
23
|
+
pub stream: Option<bool>,
|
|
24
|
+
#[serde(default)]
|
|
25
|
+
pub stop: Option<Vec<String>>,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
29
|
+
pub struct Message {
|
|
30
|
+
pub role: String,
|
|
31
|
+
pub content: String,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
#[derive(Debug, Serialize)]
|
|
35
|
+
pub struct ChatCompletionResponse {
|
|
36
|
+
pub id: String,
|
|
37
|
+
pub object: String,
|
|
38
|
+
pub created: i64,
|
|
39
|
+
pub model: String,
|
|
40
|
+
pub choices: Vec<Choice>,
|
|
41
|
+
pub usage: Usage,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
#[derive(Debug, Serialize)]
|
|
45
|
+
pub struct Choice {
|
|
46
|
+
pub index: u32,
|
|
47
|
+
pub message: Message,
|
|
48
|
+
pub finish_reason: String,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
#[derive(Debug, Serialize)]
|
|
52
|
+
pub struct Usage {
|
|
53
|
+
pub prompt_tokens: u32,
|
|
54
|
+
pub completion_tokens: u32,
|
|
55
|
+
pub total_tokens: u32,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
#[derive(Debug, Serialize)]
|
|
59
|
+
pub struct ChatCompletionChunk {
|
|
60
|
+
pub id: String,
|
|
61
|
+
pub object: String,
|
|
62
|
+
pub created: i64,
|
|
63
|
+
pub model: String,
|
|
64
|
+
pub choices: Vec<ChunkChoice>,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
#[derive(Debug, Serialize)]
|
|
68
|
+
pub struct ChunkChoice {
|
|
69
|
+
pub index: u32,
|
|
70
|
+
pub delta: Delta,
|
|
71
|
+
pub finish_reason: Option<String>,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
#[derive(Debug, Serialize)]
|
|
75
|
+
pub struct Delta {
|
|
76
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
77
|
+
pub role: Option<String>,
|
|
78
|
+
#[serde(skip_serializing_if = "Option::is_none")]
|
|
79
|
+
pub content: Option<String>,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
#[derive(Debug, Serialize)]
|
|
83
|
+
pub struct ModelList {
|
|
84
|
+
pub object: String,
|
|
85
|
+
pub data: Vec<ModelInfo>,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
#[derive(Debug, Serialize)]
|
|
89
|
+
pub struct ModelInfo {
|
|
90
|
+
pub id: String,
|
|
91
|
+
pub object: String,
|
|
92
|
+
pub created: i64,
|
|
93
|
+
pub owned_by: String,
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
pub async fn chat_completions(
|
|
97
|
+
State(state): State<SharedState>,
|
|
98
|
+
Json(request): Json<ChatCompletionRequest>,
|
|
99
|
+
) -> Response {
|
|
100
|
+
let is_stream = request.stream.unwrap_or(false);
|
|
101
|
+
|
|
102
|
+
if is_stream {
|
|
103
|
+
streaming::stream_response(state, request).await.into_response()
|
|
104
|
+
} else {
|
|
105
|
+
complete_response(state, request).await.into_response()
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async fn complete_response(
|
|
110
|
+
state: SharedState,
|
|
111
|
+
request: ChatCompletionRequest,
|
|
112
|
+
) -> Json<ChatCompletionResponse> {
|
|
113
|
+
let state = state.read().await;
|
|
114
|
+
let model_name = request.model.clone().unwrap_or_else(|| state.config.base_model.clone());
|
|
115
|
+
|
|
116
|
+
// Log the user message
|
|
117
|
+
let session_id = uuid::Uuid::new_v4().to_string();
|
|
118
|
+
if let Some(last_msg) = request.messages.last() {
|
|
119
|
+
let _ = state.knowledge.log_message(&session_id, &last_msg.role, &last_msg.content, None);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Route through orchestrator with Hebbian routing
|
|
123
|
+
let result = state.orchestrator.process(
|
|
124
|
+
&request.messages,
|
|
125
|
+
&state.engine,
|
|
126
|
+
request.max_tokens,
|
|
127
|
+
request.temperature,
|
|
128
|
+
Some(&state.knowledge),
|
|
129
|
+
).await;
|
|
130
|
+
|
|
131
|
+
let (response_text, usage) = match result {
|
|
132
|
+
Ok(result) => {
|
|
133
|
+
// Log the assistant response
|
|
134
|
+
let _ = state.knowledge.log_message(&session_id, "assistant", &result.text, None);
|
|
135
|
+
|
|
136
|
+
// Extract knowledge from the response (real-time learning)
|
|
137
|
+
let _ = KnowledgeExtractor::extract_and_store(
|
|
138
|
+
&state.knowledge, &result.text, "assistant",
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
// Check for user feedback patterns (preference learning)
|
|
142
|
+
if request.messages.len() >= 2 {
|
|
143
|
+
let prev_assistant = request.messages.iter().rev()
|
|
144
|
+
.find(|m| m.role == "assistant")
|
|
145
|
+
.map(|m| m.content.as_str())
|
|
146
|
+
.unwrap_or("");
|
|
147
|
+
if let Some(user_msg) = request.messages.last() {
|
|
148
|
+
let _ = KnowledgeExtractor::extract_preferences(
|
|
149
|
+
&state.knowledge, &user_msg.content, prev_assistant, "general",
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Verify response against knowledge graph (hallucination detection)
|
|
155
|
+
let verification = HallucinationDetector::verify(&state.knowledge, &result.text);
|
|
156
|
+
if let Ok(ref v) = verification {
|
|
157
|
+
if !v.contradictions.is_empty() {
|
|
158
|
+
tracing::warn!(
|
|
159
|
+
"⚠️ Hallucination detected: {} contradictions in response",
|
|
160
|
+
v.contradictions.len()
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
tracing::debug!(
|
|
164
|
+
"Verification: {:.0}% confidence, {} verified, {} unverified, {} contradictions",
|
|
165
|
+
v.confidence * 100.0,
|
|
166
|
+
v.verified_claims.len(),
|
|
167
|
+
v.unverified_claims.len(),
|
|
168
|
+
v.contradictions.len()
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
(result.text, Usage {
|
|
173
|
+
prompt_tokens: result.prompt_tokens,
|
|
174
|
+
completion_tokens: result.completion_tokens,
|
|
175
|
+
total_tokens: result.total_tokens,
|
|
176
|
+
})
|
|
177
|
+
}
|
|
178
|
+
Err(e) => (format!("Error: {e}"), Usage {
|
|
179
|
+
prompt_tokens: 0,
|
|
180
|
+
completion_tokens: 0,
|
|
181
|
+
total_tokens: 0,
|
|
182
|
+
}),
|
|
183
|
+
};
|
|
184
|
+
|
|
185
|
+
let response = ChatCompletionResponse {
|
|
186
|
+
id: format!("chatcmpl-{}", uuid::Uuid::new_v4()),
|
|
187
|
+
object: "chat.completion".into(),
|
|
188
|
+
created: chrono::Utc::now().timestamp(),
|
|
189
|
+
model: model_name,
|
|
190
|
+
choices: vec![Choice {
|
|
191
|
+
index: 0,
|
|
192
|
+
message: Message {
|
|
193
|
+
role: "assistant".into(),
|
|
194
|
+
content: response_text,
|
|
195
|
+
},
|
|
196
|
+
finish_reason: "stop".into(),
|
|
197
|
+
}],
|
|
198
|
+
usage,
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
Json(response)
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
pub async fn list_models(
|
|
205
|
+
State(state): State<SharedState>,
|
|
206
|
+
) -> Json<ModelList> {
|
|
207
|
+
let state = state.read().await;
|
|
208
|
+
|
|
209
|
+
let mut models = vec![
|
|
210
|
+
ModelInfo {
|
|
211
|
+
id: "synapse".into(),
|
|
212
|
+
object: "model".into(),
|
|
213
|
+
created: chrono::Utc::now().timestamp(),
|
|
214
|
+
owned_by: "titan-synapse".into(),
|
|
215
|
+
},
|
|
216
|
+
];
|
|
217
|
+
|
|
218
|
+
// Add each specialist as a model
|
|
219
|
+
for specialist in &state.config.specialists {
|
|
220
|
+
models.push(ModelInfo {
|
|
221
|
+
id: format!("synapse/{}", specialist.name),
|
|
222
|
+
object: "model".into(),
|
|
223
|
+
created: chrono::Utc::now().timestamp(),
|
|
224
|
+
owned_by: "titan-synapse".into(),
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
Json(ModelList {
|
|
229
|
+
object: "list".into(),
|
|
230
|
+
data: models,
|
|
231
|
+
})
|
|
232
|
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use axum::{
|
|
3
|
+
Router,
|
|
4
|
+
routing::{get, post},
|
|
5
|
+
};
|
|
6
|
+
use std::sync::Arc;
|
|
7
|
+
use tokio::sync::RwLock;
|
|
8
|
+
use tower_http::cors::CorsLayer;
|
|
9
|
+
use tower_http::trace::TraceLayer;
|
|
10
|
+
|
|
11
|
+
use crate::config::SynapseConfig;
|
|
12
|
+
use crate::inference::InferenceEngine;
|
|
13
|
+
use crate::swarm::Orchestrator;
|
|
14
|
+
use crate::memory::KnowledgeGraph;
|
|
15
|
+
|
|
16
|
+
pub struct AppState {
|
|
17
|
+
pub config: SynapseConfig,
|
|
18
|
+
pub engine: InferenceEngine,
|
|
19
|
+
pub orchestrator: Orchestrator,
|
|
20
|
+
pub knowledge: KnowledgeGraph,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
pub type SharedState = Arc<RwLock<AppState>>;
|
|
24
|
+
|
|
25
|
+
pub async fn run(config: SynapseConfig, port: u16) -> Result<()> {
|
|
26
|
+
tracing::info!("Starting TITAN Synapse on port {port}");
|
|
27
|
+
|
|
28
|
+
let knowledge = KnowledgeGraph::new(&config.data_dir.join("knowledge.db"))?;
|
|
29
|
+
let engine = InferenceEngine::new(&config)?;
|
|
30
|
+
let orchestrator = Orchestrator::new(&config);
|
|
31
|
+
|
|
32
|
+
let state: SharedState = Arc::new(RwLock::new(AppState {
|
|
33
|
+
config: config.clone(),
|
|
34
|
+
engine,
|
|
35
|
+
orchestrator,
|
|
36
|
+
knowledge,
|
|
37
|
+
}));
|
|
38
|
+
|
|
39
|
+
let app = Router::new()
|
|
40
|
+
// Web Dashboard — normal people can open a browser and chat
|
|
41
|
+
.route("/", get(dashboard))
|
|
42
|
+
// OpenAI-compatible endpoints
|
|
43
|
+
.route("/v1/chat/completions", post(crate::openai::chat_completions))
|
|
44
|
+
.route("/v1/models", get(crate::openai::list_models))
|
|
45
|
+
// Health
|
|
46
|
+
.route("/health", get(health))
|
|
47
|
+
// Status + Metacognition
|
|
48
|
+
.route("/api/status", get(api_status))
|
|
49
|
+
.route("/api/confidence", get(api_confidence))
|
|
50
|
+
// Adapter management
|
|
51
|
+
.route("/api/adapters/reload", post(api_reload_adapters))
|
|
52
|
+
.layer(CorsLayer::permissive())
|
|
53
|
+
.layer(TraceLayer::new_for_http())
|
|
54
|
+
.with_state(state);
|
|
55
|
+
|
|
56
|
+
let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{port}")).await?;
|
|
57
|
+
tracing::info!("TITAN Synapse ready at http://0.0.0.0:{port}");
|
|
58
|
+
tracing::info!("Dashboard: http://0.0.0.0:{port}/");
|
|
59
|
+
tracing::info!("OpenAI-compatible API: http://0.0.0.0:{port}/v1/chat/completions");
|
|
60
|
+
|
|
61
|
+
axum::serve(listener, app)
|
|
62
|
+
.with_graceful_shutdown(shutdown_signal())
|
|
63
|
+
.await?;
|
|
64
|
+
|
|
65
|
+
Ok(())
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async fn dashboard() -> axum::response::Html<&'static str> {
|
|
69
|
+
axum::response::Html(crate::dashboard::DASHBOARD_HTML)
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async fn health() -> &'static str {
|
|
73
|
+
"ok"
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async fn api_status(
|
|
77
|
+
state: axum::extract::State<SharedState>,
|
|
78
|
+
) -> axum::Json<serde_json::Value> {
|
|
79
|
+
let state = state.read().await;
|
|
80
|
+
|
|
81
|
+
let fact_count = state.knowledge.fact_count().unwrap_or(0);
|
|
82
|
+
let top_pathways = state.knowledge.top_pathways(5).unwrap_or_default();
|
|
83
|
+
|
|
84
|
+
axum::Json(serde_json::json!({
|
|
85
|
+
"status": "running",
|
|
86
|
+
"version": env!("CARGO_PKG_VERSION"),
|
|
87
|
+
"engine": "synapse",
|
|
88
|
+
"models_loaded": state.engine.loaded_models(),
|
|
89
|
+
"has_models": state.engine.has_models(),
|
|
90
|
+
"specialists": state.config.specialists.iter().map(|s| &s.name).collect::<Vec<_>>(),
|
|
91
|
+
"adapters": state.engine.available_adapters(),
|
|
92
|
+
"coordinator": state.config.coordinator_model,
|
|
93
|
+
"base_model": state.config.base_model,
|
|
94
|
+
"knowledge": {
|
|
95
|
+
"facts": fact_count,
|
|
96
|
+
"conversations": state.knowledge.conversation_count().unwrap_or(0),
|
|
97
|
+
"preference_pairs": state.knowledge.total_preference_count().unwrap_or(0),
|
|
98
|
+
},
|
|
99
|
+
"hebbian_routing": {
|
|
100
|
+
"top_pathways": top_pathways.iter().map(|(p, s, avg)| {
|
|
101
|
+
serde_json::json!({"pathway": p, "strength": s, "avg_score": avg})
|
|
102
|
+
}).collect::<Vec<_>>(),
|
|
103
|
+
},
|
|
104
|
+
}))
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/// Metacognitive confidence report — what the system knows it's good (and bad) at
|
|
108
|
+
async fn api_confidence(
|
|
109
|
+
state: axum::extract::State<SharedState>,
|
|
110
|
+
) -> axum::Json<serde_json::Value> {
|
|
111
|
+
let state = state.read().await;
|
|
112
|
+
|
|
113
|
+
let specialist_confidence = state.knowledge.specialist_confidence_report().unwrap_or_default();
|
|
114
|
+
let pathways = state.knowledge.top_pathways(10).unwrap_or_default();
|
|
115
|
+
|
|
116
|
+
axum::Json(serde_json::json!({
|
|
117
|
+
"metacognition": {
|
|
118
|
+
"description": "Specialist confidence scores — the system knows what it knows",
|
|
119
|
+
"specialists": specialist_confidence,
|
|
120
|
+
"hebbian_pathways": pathways.iter().map(|(p, s, avg)| {
|
|
121
|
+
serde_json::json!({
|
|
122
|
+
"pathway": p,
|
|
123
|
+
"strength": s,
|
|
124
|
+
"avg_score": avg,
|
|
125
|
+
"description": format!("Pathway {} has been reinforced {} times", p, s)
|
|
126
|
+
})
|
|
127
|
+
}).collect::<Vec<_>>(),
|
|
128
|
+
"total_pathways": pathways.len(),
|
|
129
|
+
"learning_status": {
|
|
130
|
+
"preferences_collected": state.knowledge.total_preference_count().unwrap_or(0),
|
|
131
|
+
"conversations_logged": state.knowledge.conversation_count().unwrap_or(0),
|
|
132
|
+
"facts_known": state.knowledge.fact_count().unwrap_or(0),
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}))
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/// Reload LoRA adapters from disk — picks up newly trained adapters without restart
|
|
139
|
+
async fn api_reload_adapters(
|
|
140
|
+
state: axum::extract::State<SharedState>,
|
|
141
|
+
) -> axum::Json<serde_json::Value> {
|
|
142
|
+
let mut state = state.write().await;
|
|
143
|
+
match state.engine.reload_adapters() {
|
|
144
|
+
Ok(count) => {
|
|
145
|
+
tracing::info!("Reloaded adapters: {count} found");
|
|
146
|
+
axum::Json(serde_json::json!({
|
|
147
|
+
"status": "ok",
|
|
148
|
+
"adapters_loaded": count,
|
|
149
|
+
"adapters": state.engine.available_adapters(),
|
|
150
|
+
}))
|
|
151
|
+
}
|
|
152
|
+
Err(e) => {
|
|
153
|
+
axum::Json(serde_json::json!({
|
|
154
|
+
"status": "error",
|
|
155
|
+
"error": e.to_string(),
|
|
156
|
+
}))
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async fn shutdown_signal() {
|
|
162
|
+
tokio::signal::ctrl_c()
|
|
163
|
+
.await
|
|
164
|
+
.expect("failed to install CTRL+C signal handler");
|
|
165
|
+
tracing::info!("Shutting down TITAN Synapse...");
|
|
166
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
use axum::response::sse::{Event, Sse};
|
|
2
|
+
use axum::response::IntoResponse;
|
|
3
|
+
use futures::stream::{self, Stream};
|
|
4
|
+
use std::convert::Infallible;
|
|
5
|
+
|
|
6
|
+
use crate::openai::{ChatCompletionChunk, ChatCompletionRequest, ChunkChoice, Delta};
|
|
7
|
+
use crate::server::SharedState;
|
|
8
|
+
|
|
9
|
+
pub async fn stream_response(
|
|
10
|
+
state: SharedState,
|
|
11
|
+
request: ChatCompletionRequest,
|
|
12
|
+
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
|
|
13
|
+
let state_read = state.read().await;
|
|
14
|
+
let model_name = request.model.clone().unwrap_or_else(|| state_read.config.base_model.clone());
|
|
15
|
+
|
|
16
|
+
// Generate full response, then stream it token-by-token
|
|
17
|
+
// In production, this will be replaced with true streaming from the inference engine
|
|
18
|
+
let response_text = match state_read.orchestrator.process(
|
|
19
|
+
&request.messages,
|
|
20
|
+
&state_read.engine,
|
|
21
|
+
request.max_tokens,
|
|
22
|
+
request.temperature,
|
|
23
|
+
Some(&state_read.knowledge),
|
|
24
|
+
).await {
|
|
25
|
+
Ok(result) => result.text,
|
|
26
|
+
Err(e) => format!("Error: {e}"),
|
|
27
|
+
};
|
|
28
|
+
drop(state_read);
|
|
29
|
+
|
|
30
|
+
let id = format!("chatcmpl-{}", uuid::Uuid::new_v4());
|
|
31
|
+
let created = chrono::Utc::now().timestamp();
|
|
32
|
+
|
|
33
|
+
// Split into word-level chunks for streaming effect
|
|
34
|
+
let words: Vec<String> = response_text.split_inclusive(' ')
|
|
35
|
+
.map(|s| s.to_string())
|
|
36
|
+
.collect();
|
|
37
|
+
|
|
38
|
+
let stream = stream::iter(
|
|
39
|
+
// First chunk: role
|
|
40
|
+
std::iter::once(Ok(Event::default().data(
|
|
41
|
+
serde_json::to_string(&ChatCompletionChunk {
|
|
42
|
+
id: id.clone(),
|
|
43
|
+
object: "chat.completion.chunk".into(),
|
|
44
|
+
created,
|
|
45
|
+
model: model_name.clone(),
|
|
46
|
+
choices: vec![ChunkChoice {
|
|
47
|
+
index: 0,
|
|
48
|
+
delta: Delta {
|
|
49
|
+
role: Some("assistant".into()),
|
|
50
|
+
content: None,
|
|
51
|
+
},
|
|
52
|
+
finish_reason: None,
|
|
53
|
+
}],
|
|
54
|
+
}).unwrap()
|
|
55
|
+
)))
|
|
56
|
+
// Content chunks
|
|
57
|
+
.chain(words.into_iter().map(move |word| {
|
|
58
|
+
Ok(Event::default().data(
|
|
59
|
+
serde_json::to_string(&ChatCompletionChunk {
|
|
60
|
+
id: id.clone(),
|
|
61
|
+
object: "chat.completion.chunk".into(),
|
|
62
|
+
created,
|
|
63
|
+
model: model_name.clone(),
|
|
64
|
+
choices: vec![ChunkChoice {
|
|
65
|
+
index: 0,
|
|
66
|
+
delta: Delta {
|
|
67
|
+
role: None,
|
|
68
|
+
content: Some(word),
|
|
69
|
+
},
|
|
70
|
+
finish_reason: None,
|
|
71
|
+
}],
|
|
72
|
+
}).unwrap()
|
|
73
|
+
))
|
|
74
|
+
}))
|
|
75
|
+
// Final chunk: [DONE]
|
|
76
|
+
.chain(std::iter::once(Ok(Event::default().data("[DONE]"))))
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
Sse::new(stream)
|
|
80
|
+
}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
use crate::config::SynapseConfig;
|
|
2
|
+
use crate::memory::KnowledgeGraph;
|
|
3
|
+
use super::orchestrator::{RoutingDecision, SubTask};
|
|
4
|
+
|
|
5
|
+
/// Coordinator — routes requests to the right specialist(s)
|
|
6
|
+
/// Uses Hebbian routing: pathways that fire together, wire together
|
|
7
|
+
/// Includes metacognitive confidence scoring
|
|
8
|
+
pub struct Coordinator {
|
|
9
|
+
/// Keyword → specialist mapping (will be replaced by learned routing)
|
|
10
|
+
keyword_routes: Vec<(Vec<String>, String)>,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
impl Coordinator {
|
|
14
|
+
pub fn new(config: &SynapseConfig) -> Self {
|
|
15
|
+
let mut keyword_routes = Vec::new();
|
|
16
|
+
|
|
17
|
+
for specialist in &config.specialists {
|
|
18
|
+
keyword_routes.push((
|
|
19
|
+
specialist.capabilities.clone(),
|
|
20
|
+
specialist.name.clone(),
|
|
21
|
+
));
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
Self {
|
|
25
|
+
keyword_routes,
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/// Route a query to the appropriate specialist(s)
|
|
30
|
+
/// Returns routing decision with metacognitive confidence score
|
|
31
|
+
pub fn route(&self, query: &str, knowledge: Option<&KnowledgeGraph>) -> RoutingDecision {
|
|
32
|
+
let query_lower = query.to_lowercase();
|
|
33
|
+
let words: Vec<&str> = query_lower.split_whitespace().collect();
|
|
34
|
+
|
|
35
|
+
// Score each specialist based on keyword matches
|
|
36
|
+
let mut scores: Vec<(String, f32)> = self.keyword_routes.iter()
|
|
37
|
+
.map(|(keywords, name)| {
|
|
38
|
+
let keyword_matches = keywords.iter()
|
|
39
|
+
.filter(|kw| words.iter().any(|w| w.contains(kw.as_str())))
|
|
40
|
+
.count() as f32;
|
|
41
|
+
|
|
42
|
+
// Normalize by total keywords — more specific matches = higher confidence
|
|
43
|
+
let keyword_ratio = if keywords.is_empty() {
|
|
44
|
+
0.0
|
|
45
|
+
} else {
|
|
46
|
+
keyword_matches / keywords.len() as f32
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// Base confidence: keyword match ratio (0.0 - 1.0)
|
|
50
|
+
let confidence = keyword_matches + keyword_ratio * 2.0;
|
|
51
|
+
(name.clone(), confidence)
|
|
52
|
+
})
|
|
53
|
+
.filter(|(_, score)| *score > 0.0)
|
|
54
|
+
.collect();
|
|
55
|
+
|
|
56
|
+
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
57
|
+
|
|
58
|
+
// Boost scores using Hebbian pathway strengths from the knowledge graph
|
|
59
|
+
if let Some(kg) = knowledge {
|
|
60
|
+
for (name, score) in &mut scores {
|
|
61
|
+
let pathway = vec![name.clone()];
|
|
62
|
+
if let Ok(strength) = kg.pathway_strength(&pathway) {
|
|
63
|
+
// Add pathway strength as bonus (clamped to reasonable range)
|
|
64
|
+
*score += (strength.min(10.0) as f32) * 0.5;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Detect complexity indicators for swarm mode
|
|
71
|
+
let complexity_keywords = ["and", "also", "plus", "then", "after", "build", "create", "implement"];
|
|
72
|
+
let complexity = complexity_keywords.iter()
|
|
73
|
+
.filter(|kw| query_lower.contains(*kw))
|
|
74
|
+
.count();
|
|
75
|
+
|
|
76
|
+
// Calculate confidence — how sure are we about the routing?
|
|
77
|
+
let top_confidence = scores.first().map(|(_, s)| *s).unwrap_or(0.0);
|
|
78
|
+
let second_confidence = scores.get(1).map(|(_, s)| *s).unwrap_or(0.0);
|
|
79
|
+
let confidence_gap = if second_confidence > 0.0 {
|
|
80
|
+
(top_confidence - second_confidence) / top_confidence
|
|
81
|
+
} else if top_confidence > 0.0 {
|
|
82
|
+
1.0 // Only one match — high confidence
|
|
83
|
+
} else {
|
|
84
|
+
0.0 // No matches — low confidence, use general
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// Normalize to 0-5 scale for pathway reinforcement
|
|
88
|
+
let routing_confidence = (top_confidence.min(5.0)).max(1.0);
|
|
89
|
+
|
|
90
|
+
if complexity >= 2 && scores.len() >= 2 {
|
|
91
|
+
// Complex query — use swarm with parallel execution
|
|
92
|
+
let subtasks: Vec<SubTask> = scores.iter()
|
|
93
|
+
.take(3)
|
|
94
|
+
.map(|(specialist, _)| SubTask {
|
|
95
|
+
specialist: specialist.clone(),
|
|
96
|
+
description: format!("Handle {specialist} aspects of: {query}"),
|
|
97
|
+
})
|
|
98
|
+
.collect();
|
|
99
|
+
|
|
100
|
+
RoutingDecision::Swarm { subtasks }
|
|
101
|
+
} else if let Some((specialist, _)) = scores.first() {
|
|
102
|
+
RoutingDecision::Single {
|
|
103
|
+
specialist: specialist.clone(),
|
|
104
|
+
confidence: routing_confidence,
|
|
105
|
+
}
|
|
106
|
+
} else {
|
|
107
|
+
// Default to general specialist — low confidence
|
|
108
|
+
RoutingDecision::Single {
|
|
109
|
+
specialist: "general".into(),
|
|
110
|
+
confidence: 1.0,
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
#[cfg(test)]
|
|
117
|
+
mod tests {
|
|
118
|
+
use super::*;
|
|
119
|
+
use crate::config::{SynapseConfig, SpecialistConfig};
|
|
120
|
+
|
|
121
|
+
fn test_config() -> SynapseConfig {
|
|
122
|
+
let mut config = SynapseConfig::default();
|
|
123
|
+
config.specialists = vec![
|
|
124
|
+
SpecialistConfig {
|
|
125
|
+
name: "python_expert".into(),
|
|
126
|
+
capabilities: vec!["python".into(), "decorator".into(), "django".into(), "flask".into()],
|
|
127
|
+
base_model: None,
|
|
128
|
+
adapter: None,
|
|
129
|
+
system_prompt: None,
|
|
130
|
+
priority: 60,
|
|
131
|
+
},
|
|
132
|
+
SpecialistConfig {
|
|
133
|
+
name: "sql_expert".into(),
|
|
134
|
+
capabilities: vec!["sql".into(), "database".into(), "query".into(), "postgres".into()],
|
|
135
|
+
base_model: None,
|
|
136
|
+
adapter: None,
|
|
137
|
+
system_prompt: None,
|
|
138
|
+
priority: 60,
|
|
139
|
+
},
|
|
140
|
+
SpecialistConfig {
|
|
141
|
+
name: "devops_expert".into(),
|
|
142
|
+
capabilities: vec!["docker".into(), "kubernetes".into(), "deploy".into(), "ci".into()],
|
|
143
|
+
base_model: None,
|
|
144
|
+
adapter: None,
|
|
145
|
+
system_prompt: None,
|
|
146
|
+
priority: 60,
|
|
147
|
+
},
|
|
148
|
+
];
|
|
149
|
+
config
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
#[test]
|
|
153
|
+
fn test_single_routing() {
|
|
154
|
+
let config = test_config();
|
|
155
|
+
let coordinator = Coordinator::new(&config);
|
|
156
|
+
|
|
157
|
+
let decision = coordinator.route("What is a Python decorator?", None);
|
|
158
|
+
match decision {
|
|
159
|
+
RoutingDecision::Single { specialist, confidence } => {
|
|
160
|
+
assert_eq!(specialist, "python_expert");
|
|
161
|
+
assert!(confidence >= 1.0);
|
|
162
|
+
}
|
|
163
|
+
_ => panic!("Expected single routing"),
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
#[test]
|
|
168
|
+
fn test_swarm_routing() {
|
|
169
|
+
let config = test_config();
|
|
170
|
+
let coordinator = Coordinator::new(&config);
|
|
171
|
+
|
|
172
|
+
let decision = coordinator.route(
|
|
173
|
+
"Build a Python API and deploy it with Docker and also create the database",
|
|
174
|
+
None,
|
|
175
|
+
);
|
|
176
|
+
match decision {
|
|
177
|
+
RoutingDecision::Swarm { subtasks } => {
|
|
178
|
+
assert!(subtasks.len() >= 2, "Should route to multiple specialists");
|
|
179
|
+
}
|
|
180
|
+
_ => panic!("Expected swarm routing for complex query"),
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
#[test]
|
|
185
|
+
fn test_default_routing() {
|
|
186
|
+
let config = test_config();
|
|
187
|
+
let coordinator = Coordinator::new(&config);
|
|
188
|
+
|
|
189
|
+
let decision = coordinator.route("What is the meaning of life?", None);
|
|
190
|
+
match decision {
|
|
191
|
+
RoutingDecision::Single { specialist, confidence } => {
|
|
192
|
+
assert_eq!(specialist, "general");
|
|
193
|
+
assert_eq!(confidence, 1.0);
|
|
194
|
+
}
|
|
195
|
+
_ => panic!("Expected default general routing"),
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|