titan-synapse 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +187 -0
- package/Cargo.lock +3976 -0
- package/Cargo.toml +10 -0
- package/LICENSE +190 -0
- package/PROGRESS.md +151 -0
- package/README.md +514 -0
- package/TEST_LOG.md +220 -0
- package/config/default.yaml +36 -0
- package/crates/synapse/Cargo.toml +70 -0
- package/crates/synapse/src/cli/bench.rs +44 -0
- package/crates/synapse/src/cli/eval.rs +395 -0
- package/crates/synapse/src/cli/export.rs +45 -0
- package/crates/synapse/src/cli/hub.rs +179 -0
- package/crates/synapse/src/cli/import.rs +35 -0
- package/crates/synapse/src/cli/learn.rs +53 -0
- package/crates/synapse/src/cli/mod.rs +10 -0
- package/crates/synapse/src/cli/models.rs +36 -0
- package/crates/synapse/src/cli/pull.rs +60 -0
- package/crates/synapse/src/cli/status.rs +52 -0
- package/crates/synapse/src/cli/train.rs +99 -0
- package/crates/synapse/src/config.rs +220 -0
- package/crates/synapse/src/dashboard.rs +281 -0
- package/crates/synapse/src/format/manifest.rs +57 -0
- package/crates/synapse/src/format/mod.rs +4 -0
- package/crates/synapse/src/format/packer.rs +213 -0
- package/crates/synapse/src/inference/engine.rs +361 -0
- package/crates/synapse/src/inference/kv_cache.rs +97 -0
- package/crates/synapse/src/inference/lora.rs +166 -0
- package/crates/synapse/src/inference/mod.rs +9 -0
- package/crates/synapse/src/inference/model.rs +167 -0
- package/crates/synapse/src/inference/sampler.rs +133 -0
- package/crates/synapse/src/inference/speculative.rs +153 -0
- package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
- package/crates/synapse/src/learn/engine.rs +109 -0
- package/crates/synapse/src/learn/mod.rs +5 -0
- package/crates/synapse/src/main.rs +185 -0
- package/crates/synapse/src/memory/extractor.rs +201 -0
- package/crates/synapse/src/memory/graph.rs +332 -0
- package/crates/synapse/src/memory/hallucination.rs +259 -0
- package/crates/synapse/src/memory/mod.rs +7 -0
- package/crates/synapse/src/openai.rs +232 -0
- package/crates/synapse/src/server.rs +166 -0
- package/crates/synapse/src/streaming.rs +80 -0
- package/crates/synapse/src/swarm/coordinator.rs +198 -0
- package/crates/synapse/src/swarm/mod.rs +8 -0
- package/crates/synapse/src/swarm/orchestrator.rs +225 -0
- package/crates/synapse/src/swarm/pool.rs +64 -0
- package/crates/synapse/src/swarm/spawner.rs +199 -0
- package/crates/synapse/src/swarm/synthesizer.rs +26 -0
- package/crates/synapse/src/vram/manager.rs +67 -0
- package/crates/synapse/src/vram/mod.rs +3 -0
- package/docker-compose.yml +19 -0
- package/install.sh +311 -0
- package/package.json +36 -0
- package/python/Dockerfile.learn +18 -0
- package/python/requirements.txt +11 -0
- package/python/synapse_learn/__init__.py +0 -0
- package/python/synapse_learn/datasets.py +233 -0
- package/python/synapse_learn/real_eval.py +616 -0
- package/python/synapse_learn/server.py +431 -0
- package/python/synapse_learn/train_base.py +672 -0
- package/python/synapse_learn/train_specialists.py +787 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use rusqlite::Connection;
|
|
3
|
+
use serde_json;
|
|
4
|
+
use std::path::Path;
|
|
5
|
+
use std::sync::Mutex;
|
|
6
|
+
|
|
7
|
+
/// SQLite-backed knowledge graph shared across all specialists
|
|
8
|
+
pub struct KnowledgeGraph {
|
|
9
|
+
conn: Mutex<Connection>,
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
impl KnowledgeGraph {
|
|
13
|
+
pub fn new(db_path: &Path) -> Result<Self> {
|
|
14
|
+
if let Some(parent) = db_path.parent() {
|
|
15
|
+
std::fs::create_dir_all(parent)?;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
let conn = Connection::open(db_path)?;
|
|
19
|
+
|
|
20
|
+
conn.execute_batch("
|
|
21
|
+
PRAGMA journal_mode=WAL;
|
|
22
|
+
PRAGMA synchronous=NORMAL;
|
|
23
|
+
CREATE TABLE IF NOT EXISTS facts (
|
|
24
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
25
|
+
subject TEXT NOT NULL,
|
|
26
|
+
predicate TEXT NOT NULL,
|
|
27
|
+
object TEXT NOT NULL,
|
|
28
|
+
confidence REAL DEFAULT 1.0,
|
|
29
|
+
source TEXT,
|
|
30
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
31
|
+
updated_at TEXT DEFAULT (datetime('now'))
|
|
32
|
+
);
|
|
33
|
+
CREATE INDEX IF NOT EXISTS idx_facts_subject ON facts(subject);
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_facts_predicate ON facts(predicate);
|
|
35
|
+
|
|
36
|
+
CREATE TABLE IF NOT EXISTS conversations (
|
|
37
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
38
|
+
session_id TEXT NOT NULL,
|
|
39
|
+
role TEXT NOT NULL,
|
|
40
|
+
content TEXT NOT NULL,
|
|
41
|
+
specialist TEXT,
|
|
42
|
+
score REAL,
|
|
43
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
44
|
+
);
|
|
45
|
+
CREATE INDEX IF NOT EXISTS idx_conv_session ON conversations(session_id);
|
|
46
|
+
|
|
47
|
+
CREATE TABLE IF NOT EXISTS preferences (
|
|
48
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
49
|
+
specialist TEXT NOT NULL,
|
|
50
|
+
prompt TEXT NOT NULL,
|
|
51
|
+
chosen TEXT NOT NULL,
|
|
52
|
+
rejected TEXT NOT NULL,
|
|
53
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
CREATE TABLE IF NOT EXISTS routing_pathways (
|
|
57
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
58
|
+
pathway TEXT NOT NULL UNIQUE,
|
|
59
|
+
success_count INTEGER DEFAULT 0,
|
|
60
|
+
failure_count INTEGER DEFAULT 0,
|
|
61
|
+
avg_score REAL DEFAULT 0.0,
|
|
62
|
+
last_used TEXT DEFAULT (datetime('now'))
|
|
63
|
+
);
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_pathway ON routing_pathways(pathway);
|
|
65
|
+
|
|
66
|
+
CREATE TABLE IF NOT EXISTS specialist_stats (
|
|
67
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
68
|
+
specialist TEXT NOT NULL,
|
|
69
|
+
domain TEXT NOT NULL,
|
|
70
|
+
request_count INTEGER DEFAULT 0,
|
|
71
|
+
avg_score REAL DEFAULT 0.0,
|
|
72
|
+
avg_tok_per_sec REAL DEFAULT 0.0,
|
|
73
|
+
last_used TEXT DEFAULT (datetime('now')),
|
|
74
|
+
UNIQUE(specialist, domain)
|
|
75
|
+
);
|
|
76
|
+
")?;
|
|
77
|
+
|
|
78
|
+
Ok(Self { conn: Mutex::new(conn) })
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/// Store a fact triple
|
|
82
|
+
pub fn add_fact(&self, subject: &str, predicate: &str, object: &str, source: Option<&str>) -> Result<()> {
|
|
83
|
+
self.conn.lock().unwrap().execute(
|
|
84
|
+
"INSERT INTO facts (subject, predicate, object, source) VALUES (?1, ?2, ?3, ?4)",
|
|
85
|
+
rusqlite::params![subject, predicate, object, source],
|
|
86
|
+
)?;
|
|
87
|
+
Ok(())
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/// Query facts about a subject
|
|
91
|
+
pub fn query_facts(&self, subject: &str) -> Result<Vec<(String, String, f64)>> {
|
|
92
|
+
let conn = self.conn.lock().unwrap();
|
|
93
|
+
let mut stmt = conn.prepare(
|
|
94
|
+
"SELECT predicate, object, confidence FROM facts WHERE subject = ?1 ORDER BY confidence DESC"
|
|
95
|
+
)?;
|
|
96
|
+
|
|
97
|
+
let facts = stmt.query_map([subject], |row| {
|
|
98
|
+
Ok((
|
|
99
|
+
row.get::<_, String>(0)?,
|
|
100
|
+
row.get::<_, String>(1)?,
|
|
101
|
+
row.get::<_, f64>(2)?,
|
|
102
|
+
))
|
|
103
|
+
})?
|
|
104
|
+
.collect::<Result<Vec<_>, _>>()?;
|
|
105
|
+
|
|
106
|
+
Ok(facts)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/// Store a conversation message
|
|
110
|
+
pub fn log_message(&self, session_id: &str, role: &str, content: &str, specialist: Option<&str>) -> Result<()> {
|
|
111
|
+
self.conn.lock().unwrap().execute(
|
|
112
|
+
"INSERT INTO conversations (session_id, role, content, specialist) VALUES (?1, ?2, ?3, ?4)",
|
|
113
|
+
rusqlite::params![session_id, role, content, specialist],
|
|
114
|
+
)?;
|
|
115
|
+
Ok(())
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/// Store a preference pair for DPO training
|
|
119
|
+
pub fn add_preference(&self, specialist: &str, prompt: &str, chosen: &str, rejected: &str) -> Result<()> {
|
|
120
|
+
self.conn.lock().unwrap().execute(
|
|
121
|
+
"INSERT INTO preferences (specialist, prompt, chosen, rejected) VALUES (?1, ?2, ?3, ?4)",
|
|
122
|
+
rusqlite::params![specialist, prompt, chosen, rejected],
|
|
123
|
+
)?;
|
|
124
|
+
Ok(())
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/// Count preference pairs for a specialist
|
|
128
|
+
pub fn preference_count(&self, specialist: &str) -> Result<u32> {
|
|
129
|
+
let count: u32 = self.conn.lock().unwrap().query_row(
|
|
130
|
+
"SELECT COUNT(*) FROM preferences WHERE specialist = ?1",
|
|
131
|
+
[specialist],
|
|
132
|
+
|row| row.get(0),
|
|
133
|
+
)?;
|
|
134
|
+
Ok(count)
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/// Reinforce a routing pathway (Hebbian: pathways that fire together, wire together)
|
|
138
|
+
pub fn reinforce_pathway(&self, specialists: &[String], score: f32) -> Result<()> {
|
|
139
|
+
let pathway = specialists.join("+");
|
|
140
|
+
let conn = self.conn.lock().unwrap();
|
|
141
|
+
conn.execute(
|
|
142
|
+
"INSERT INTO routing_pathways (pathway, success_count, avg_score, last_used)
|
|
143
|
+
VALUES (?1, 1, ?2, datetime('now'))
|
|
144
|
+
ON CONFLICT(pathway) DO UPDATE SET
|
|
145
|
+
success_count = success_count + 1,
|
|
146
|
+
avg_score = (avg_score * success_count + ?2) / (success_count + 1),
|
|
147
|
+
last_used = datetime('now')",
|
|
148
|
+
rusqlite::params![pathway, score],
|
|
149
|
+
)?;
|
|
150
|
+
Ok(())
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/// Record a pathway failure
|
|
154
|
+
pub fn weaken_pathway(&self, specialists: &[String]) -> Result<()> {
|
|
155
|
+
let pathway = specialists.join("+");
|
|
156
|
+
let conn = self.conn.lock().unwrap();
|
|
157
|
+
conn.execute(
|
|
158
|
+
"INSERT INTO routing_pathways (pathway, failure_count, last_used)
|
|
159
|
+
VALUES (?1, 1, 0.0, datetime('now'))
|
|
160
|
+
ON CONFLICT(pathway) DO UPDATE SET
|
|
161
|
+
failure_count = failure_count + 1,
|
|
162
|
+
last_used = datetime('now')",
|
|
163
|
+
rusqlite::params![pathway],
|
|
164
|
+
)?;
|
|
165
|
+
Ok(())
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/// Get pathway strength (success_count - failure_count, weighted by avg_score)
|
|
169
|
+
pub fn pathway_strength(&self, specialists: &[String]) -> Result<f64> {
|
|
170
|
+
let pathway = specialists.join("+");
|
|
171
|
+
let conn = self.conn.lock().unwrap();
|
|
172
|
+
let result: f64 = conn.query_row(
|
|
173
|
+
"SELECT (success_count - failure_count) * avg_score FROM routing_pathways WHERE pathway = ?1",
|
|
174
|
+
[&pathway],
|
|
175
|
+
|row| row.get(0),
|
|
176
|
+
).unwrap_or(0.0);
|
|
177
|
+
Ok(result)
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/// Get top routing pathways by strength
|
|
181
|
+
pub fn top_pathways(&self, limit: u32) -> Result<Vec<(String, i64, f64)>> {
|
|
182
|
+
let conn = self.conn.lock().unwrap();
|
|
183
|
+
let mut stmt = conn.prepare(
|
|
184
|
+
"SELECT pathway, (success_count - failure_count) as strength, avg_score
|
|
185
|
+
FROM routing_pathways
|
|
186
|
+
ORDER BY strength * avg_score DESC
|
|
187
|
+
LIMIT ?1"
|
|
188
|
+
)?;
|
|
189
|
+
let results = stmt.query_map([limit], |row| {
|
|
190
|
+
Ok((
|
|
191
|
+
row.get::<_, String>(0)?,
|
|
192
|
+
row.get::<_, i64>(1)?,
|
|
193
|
+
row.get::<_, f64>(2)?,
|
|
194
|
+
))
|
|
195
|
+
})?.collect::<Result<Vec<_>, _>>()?;
|
|
196
|
+
Ok(results)
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/// Update specialist stats for a domain
|
|
200
|
+
pub fn update_specialist_stats(
|
|
201
|
+
&self,
|
|
202
|
+
specialist: &str,
|
|
203
|
+
domain: &str,
|
|
204
|
+
score: f32,
|
|
205
|
+
tok_per_sec: f64,
|
|
206
|
+
) -> Result<()> {
|
|
207
|
+
let conn = self.conn.lock().unwrap();
|
|
208
|
+
conn.execute(
|
|
209
|
+
"INSERT INTO specialist_stats (specialist, domain, request_count, avg_score, avg_tok_per_sec, last_used)
|
|
210
|
+
VALUES (?1, ?2, 1, ?3, ?4, datetime('now'))
|
|
211
|
+
ON CONFLICT(specialist, domain) DO UPDATE SET
|
|
212
|
+
request_count = request_count + 1,
|
|
213
|
+
avg_score = (avg_score * request_count + ?3) / (request_count + 1),
|
|
214
|
+
avg_tok_per_sec = (avg_tok_per_sec * request_count + ?4) / (request_count + 1),
|
|
215
|
+
last_used = datetime('now')",
|
|
216
|
+
rusqlite::params![specialist, domain, score, tok_per_sec],
|
|
217
|
+
)?;
|
|
218
|
+
Ok(())
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/// Get total facts count
|
|
222
|
+
pub fn fact_count(&self) -> Result<u32> {
|
|
223
|
+
let count: u32 = self.conn.lock().unwrap().query_row(
|
|
224
|
+
"SELECT COUNT(*) FROM facts",
|
|
225
|
+
[],
|
|
226
|
+
|row| row.get(0),
|
|
227
|
+
)?;
|
|
228
|
+
Ok(count)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/// Get total conversation messages count
|
|
232
|
+
pub fn conversation_count(&self) -> Result<u32> {
|
|
233
|
+
let count: u32 = self.conn.lock().unwrap().query_row(
|
|
234
|
+
"SELECT COUNT(*) FROM conversations",
|
|
235
|
+
[],
|
|
236
|
+
|row| row.get(0),
|
|
237
|
+
)?;
|
|
238
|
+
Ok(count)
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/// Metacognitive confidence report — per-specialist performance stats
|
|
242
|
+
pub fn specialist_confidence_report(&self) -> Result<Vec<serde_json::Value>> {
|
|
243
|
+
let conn = self.conn.lock().unwrap();
|
|
244
|
+
let mut stmt = conn.prepare(
|
|
245
|
+
"SELECT specialist, domain, request_count, avg_score, avg_tok_per_sec
|
|
246
|
+
FROM specialist_stats ORDER BY avg_score DESC"
|
|
247
|
+
)?;
|
|
248
|
+
|
|
249
|
+
let results = stmt.query_map([], |row| {
|
|
250
|
+
Ok(serde_json::json!({
|
|
251
|
+
"specialist": row.get::<_, String>(0)?,
|
|
252
|
+
"domain": row.get::<_, String>(1)?,
|
|
253
|
+
"requests": row.get::<_, i64>(2)?,
|
|
254
|
+
"avg_score": row.get::<_, f64>(3)?,
|
|
255
|
+
"avg_tok_per_sec": row.get::<_, f64>(4)?,
|
|
256
|
+
}))
|
|
257
|
+
})?.filter_map(|r| r.ok()).collect();
|
|
258
|
+
|
|
259
|
+
Ok(results)
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/// Get total preference pairs count
|
|
263
|
+
pub fn total_preference_count(&self) -> Result<u32> {
|
|
264
|
+
let count: u32 = self.conn.lock().unwrap().query_row(
|
|
265
|
+
"SELECT COUNT(*) FROM preferences",
|
|
266
|
+
[],
|
|
267
|
+
|row| row.get(0),
|
|
268
|
+
)?;
|
|
269
|
+
Ok(count)
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
#[cfg(test)]
|
|
274
|
+
mod tests {
|
|
275
|
+
use super::*;
|
|
276
|
+
|
|
277
|
+
#[test]
|
|
278
|
+
fn test_knowledge_graph() {
|
|
279
|
+
let tmp = tempfile::tempdir().unwrap();
|
|
280
|
+
let db_path = tmp.path().join("test.db");
|
|
281
|
+
let kg = KnowledgeGraph::new(&db_path).unwrap();
|
|
282
|
+
|
|
283
|
+
kg.add_fact("Python", "is_a", "programming language", Some("test")).unwrap();
|
|
284
|
+
kg.add_fact("Python", "created_by", "Guido van Rossum", Some("test")).unwrap();
|
|
285
|
+
|
|
286
|
+
let facts = kg.query_facts("Python").unwrap();
|
|
287
|
+
assert_eq!(facts.len(), 2);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
#[test]
|
|
291
|
+
fn test_hebbian_routing() {
|
|
292
|
+
let tmp = tempfile::tempdir().unwrap();
|
|
293
|
+
let db_path = tmp.path().join("test.db");
|
|
294
|
+
let kg = KnowledgeGraph::new(&db_path).unwrap();
|
|
295
|
+
|
|
296
|
+
// Reinforce a pathway multiple times
|
|
297
|
+
let pathway = vec!["python_expert".to_string(), "reviewer".to_string()];
|
|
298
|
+
kg.reinforce_pathway(&pathway, 4.5).unwrap();
|
|
299
|
+
kg.reinforce_pathway(&pathway, 4.8).unwrap();
|
|
300
|
+
|
|
301
|
+
let strength = kg.pathway_strength(&pathway).unwrap();
|
|
302
|
+
assert!(strength > 0.0, "Pathway should have positive strength");
|
|
303
|
+
|
|
304
|
+
// Check top pathways
|
|
305
|
+
let top = kg.top_pathways(10).unwrap();
|
|
306
|
+
assert_eq!(top.len(), 1);
|
|
307
|
+
assert_eq!(top[0].0, "python_expert+reviewer");
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
#[test]
|
|
311
|
+
fn test_specialist_stats() {
|
|
312
|
+
let tmp = tempfile::tempdir().unwrap();
|
|
313
|
+
let db_path = tmp.path().join("test.db");
|
|
314
|
+
let kg = KnowledgeGraph::new(&db_path).unwrap();
|
|
315
|
+
|
|
316
|
+
kg.update_specialist_stats("python_expert", "coding", 4.5, 200.0).unwrap();
|
|
317
|
+
kg.update_specialist_stats("python_expert", "coding", 4.8, 220.0).unwrap();
|
|
318
|
+
|
|
319
|
+
// Should not error — stats are accumulated
|
|
320
|
+
kg.update_specialist_stats("sql_expert", "database", 4.0, 180.0).unwrap();
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
#[test]
|
|
324
|
+
fn test_preferences() {
|
|
325
|
+
let tmp = tempfile::tempdir().unwrap();
|
|
326
|
+
let db_path = tmp.path().join("test.db");
|
|
327
|
+
let kg = KnowledgeGraph::new(&db_path).unwrap();
|
|
328
|
+
|
|
329
|
+
kg.add_preference("python_expert", "What is a list?", "good answer", "bad answer").unwrap();
|
|
330
|
+
assert_eq!(kg.preference_count("python_expert").unwrap(), 1);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use super::graph::KnowledgeGraph;
|
|
3
|
+
|
|
4
|
+
/// Hallucination detector — cross-references model outputs against the knowledge graph.
|
|
5
|
+
/// If a model claims something that contradicts known facts, flag it.
|
|
6
|
+
/// If a model claims something new, check confidence before presenting it.
|
|
7
|
+
///
|
|
8
|
+
/// This is how you make tiny models smarter than 120B: you don't let them lie.
|
|
9
|
+
/// A 3B model that knows what it doesn't know > a 120B model that confidently bullshits.
|
|
10
|
+
pub struct HallucinationDetector;
|
|
11
|
+
|
|
12
|
+
#[derive(Debug)]
|
|
13
|
+
pub struct VerificationResult {
|
|
14
|
+
/// Overall confidence score (0.0 - 1.0)
|
|
15
|
+
pub confidence: f64,
|
|
16
|
+
/// Claims that were verified against knowledge graph
|
|
17
|
+
pub verified_claims: Vec<Claim>,
|
|
18
|
+
/// Claims that contradict known facts
|
|
19
|
+
pub contradictions: Vec<Claim>,
|
|
20
|
+
/// Claims that couldn't be verified (might be hallucination)
|
|
21
|
+
pub unverified_claims: Vec<Claim>,
|
|
22
|
+
/// Whether the response should be flagged
|
|
23
|
+
pub flagged: bool,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
#[derive(Debug)]
|
|
27
|
+
pub struct Claim {
|
|
28
|
+
pub text: String,
|
|
29
|
+
pub subject: String,
|
|
30
|
+
pub status: ClaimStatus,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
#[derive(Debug)]
|
|
34
|
+
pub enum ClaimStatus {
|
|
35
|
+
/// Matches a known fact
|
|
36
|
+
Verified,
|
|
37
|
+
/// Contradicts a known fact
|
|
38
|
+
Contradicted(String), // The known fact that contradicts
|
|
39
|
+
/// No matching fact found
|
|
40
|
+
Unverified,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
impl HallucinationDetector {
|
|
44
|
+
/// Check a response against the knowledge graph
|
|
45
|
+
pub fn verify(kg: &KnowledgeGraph, response: &str) -> Result<VerificationResult> {
|
|
46
|
+
let mut verified = Vec::new();
|
|
47
|
+
let mut contradictions = Vec::new();
|
|
48
|
+
let mut unverified = Vec::new();
|
|
49
|
+
|
|
50
|
+
// Extract potential claims from the response
|
|
51
|
+
let claims = Self::extract_claims(response);
|
|
52
|
+
|
|
53
|
+
for claim in &claims {
|
|
54
|
+
// Check if we have any facts about this subject
|
|
55
|
+
match kg.query_facts(&claim.subject) {
|
|
56
|
+
Ok(facts) if !facts.is_empty() => {
|
|
57
|
+
// We know something about this subject
|
|
58
|
+
let claim_lower = claim.text.to_lowercase();
|
|
59
|
+
let mut found_match = false;
|
|
60
|
+
let mut found_contradiction = false;
|
|
61
|
+
|
|
62
|
+
for (predicate, object, confidence) in &facts {
|
|
63
|
+
let fact_text = format!("{} {}", predicate, object).to_lowercase();
|
|
64
|
+
|
|
65
|
+
// Simple semantic overlap check
|
|
66
|
+
let overlap = word_overlap(&claim_lower, &fact_text);
|
|
67
|
+
|
|
68
|
+
if overlap > 0.3 && *confidence > 0.5 {
|
|
69
|
+
found_match = true;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Check for explicit contradictions
|
|
73
|
+
if contains_negation(&claim_lower, &fact_text) {
|
|
74
|
+
found_contradiction = true;
|
|
75
|
+
contradictions.push(Claim {
|
|
76
|
+
text: claim.text.clone(),
|
|
77
|
+
subject: claim.subject.clone(),
|
|
78
|
+
status: ClaimStatus::Contradicted(format!("{} {} {}", claim.subject, predicate, object)),
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if found_match && !found_contradiction {
|
|
84
|
+
verified.push(Claim {
|
|
85
|
+
text: claim.text.clone(),
|
|
86
|
+
subject: claim.subject.clone(),
|
|
87
|
+
status: ClaimStatus::Verified,
|
|
88
|
+
});
|
|
89
|
+
} else if !found_contradiction {
|
|
90
|
+
unverified.push(Claim {
|
|
91
|
+
text: claim.text.clone(),
|
|
92
|
+
subject: claim.subject.clone(),
|
|
93
|
+
status: ClaimStatus::Unverified,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
_ => {
|
|
98
|
+
// No facts about this subject — can't verify
|
|
99
|
+
unverified.push(Claim {
|
|
100
|
+
text: claim.text.clone(),
|
|
101
|
+
subject: claim.subject.clone(),
|
|
102
|
+
status: ClaimStatus::Unverified,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
let total = verified.len() + contradictions.len() + unverified.len();
|
|
109
|
+
let confidence = if total > 0 {
|
|
110
|
+
(verified.len() as f64 / total as f64).max(0.1)
|
|
111
|
+
} else {
|
|
112
|
+
0.5 // No claims to verify
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
let flagged = !contradictions.is_empty() || (unverified.len() > verified.len() * 2);
|
|
116
|
+
|
|
117
|
+
Ok(VerificationResult {
|
|
118
|
+
confidence,
|
|
119
|
+
verified_claims: verified,
|
|
120
|
+
contradictions,
|
|
121
|
+
unverified_claims: unverified,
|
|
122
|
+
flagged,
|
|
123
|
+
})
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/// Extract potential factual claims from text
|
|
127
|
+
fn extract_claims(text: &str) -> Vec<SimpleClaim> {
|
|
128
|
+
let mut claims = Vec::new();
|
|
129
|
+
|
|
130
|
+
for sentence in text.split(['.', '!', '\n']) {
|
|
131
|
+
let sentence = sentence.trim();
|
|
132
|
+
if sentence.len() < 10 || sentence.len() > 500 {
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Skip questions, code blocks, instructions
|
|
137
|
+
if sentence.starts_with('?') || sentence.starts_with("```")
|
|
138
|
+
|| sentence.starts_with('#') || sentence.starts_with("//")
|
|
139
|
+
{
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
let lower = sentence.to_lowercase();
|
|
144
|
+
|
|
145
|
+
// Look for definitive statements ("X is Y", "X was Y", "X has Y")
|
|
146
|
+
let definitive_patterns = [" is ", " was ", " are ", " were ", " has ", " have "];
|
|
147
|
+
for pattern in &definitive_patterns {
|
|
148
|
+
if let Some(pos) = lower.find(pattern) {
|
|
149
|
+
if pos > 2 {
|
|
150
|
+
let subject_words: Vec<&str> = lower[..pos].split_whitespace().collect();
|
|
151
|
+
let subject = if subject_words.len() > 3 {
|
|
152
|
+
subject_words[subject_words.len()-3..].join(" ")
|
|
153
|
+
} else {
|
|
154
|
+
subject_words.join(" ")
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
if !subject.is_empty() {
|
|
158
|
+
claims.push(SimpleClaim {
|
|
159
|
+
text: sentence.to_string(),
|
|
160
|
+
subject: capitalize(&subject),
|
|
161
|
+
});
|
|
162
|
+
break; // One claim per sentence
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
claims
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
struct SimpleClaim {
|
|
174
|
+
text: String,
|
|
175
|
+
subject: String,
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/// Calculate word overlap ratio between two strings
|
|
179
|
+
fn word_overlap(a: &str, b: &str) -> f64 {
|
|
180
|
+
let a_words: std::collections::HashSet<&str> = a.split_whitespace().collect();
|
|
181
|
+
let b_words: std::collections::HashSet<&str> = b.split_whitespace().collect();
|
|
182
|
+
|
|
183
|
+
if a_words.is_empty() || b_words.is_empty() {
|
|
184
|
+
return 0.0;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
let overlap = a_words.intersection(&b_words).count();
|
|
188
|
+
let max_len = a_words.len().max(b_words.len());
|
|
189
|
+
overlap as f64 / max_len as f64
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/// Check if one text negates or contradicts the other
|
|
193
|
+
fn contains_negation(claim: &str, fact: &str) -> bool {
|
|
194
|
+
let negation_words = ["not", "isn't", "aren't", "wasn't", "weren't", "never", "neither", "nor"];
|
|
195
|
+
|
|
196
|
+
// If claim has a negation word and fact doesn't (or vice versa), possible contradiction
|
|
197
|
+
let claim_negated = negation_words.iter().any(|neg| claim.contains(neg));
|
|
198
|
+
let fact_negated = negation_words.iter().any(|neg| fact.contains(neg));
|
|
199
|
+
|
|
200
|
+
// Simple: if one is negated and the other isn't, and they're about the same thing
|
|
201
|
+
claim_negated != fact_negated && word_overlap(claim, fact) > 0.2
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
fn capitalize(s: &str) -> String {
|
|
205
|
+
let mut chars = s.chars();
|
|
206
|
+
match chars.next() {
|
|
207
|
+
None => String::new(),
|
|
208
|
+
Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
#[cfg(test)]
|
|
213
|
+
mod tests {
|
|
214
|
+
use super::*;
|
|
215
|
+
use tempfile::tempdir;
|
|
216
|
+
|
|
217
|
+
fn test_kg_with_facts() -> KnowledgeGraph {
|
|
218
|
+
let tmp = tempdir().unwrap();
|
|
219
|
+
let kg = KnowledgeGraph::new(&tmp.path().join("test.db")).unwrap();
|
|
220
|
+
|
|
221
|
+
// Add some known facts
|
|
222
|
+
kg.add_fact("Python", "is_a", "programming language", Some("test")).unwrap();
|
|
223
|
+
kg.add_fact("Python", "created_by", "Guido van Rossum", Some("test")).unwrap();
|
|
224
|
+
kg.add_fact("Rust", "is_a", "systems programming language", Some("test")).unwrap();
|
|
225
|
+
kg.add_fact("Rust", "created_by", "Mozilla", Some("test")).unwrap();
|
|
226
|
+
|
|
227
|
+
kg
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
#[test]
|
|
231
|
+
fn test_verify_correct_claim() {
|
|
232
|
+
let kg = test_kg_with_facts();
|
|
233
|
+
let response = "Python is a programming language that is widely used.";
|
|
234
|
+
let result = HallucinationDetector::verify(&kg, response).unwrap();
|
|
235
|
+
assert!(result.contradictions.is_empty(), "Should not flag correct claims");
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
#[test]
|
|
239
|
+
fn test_verify_unknown_claim() {
|
|
240
|
+
let kg = test_kg_with_facts();
|
|
241
|
+
let response = "JavaScript was invented in 1995 by Brendan Eich.";
|
|
242
|
+
let result = HallucinationDetector::verify(&kg, response).unwrap();
|
|
243
|
+
// We don't know about JavaScript, so it should be unverified
|
|
244
|
+
assert!(result.contradictions.is_empty());
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
#[test]
|
|
248
|
+
fn test_word_overlap() {
|
|
249
|
+
assert!(word_overlap("python is great", "python is good") > 0.3);
|
|
250
|
+
assert!(word_overlap("rust memory safe", "java garbage collection") < 0.1);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
#[test]
|
|
254
|
+
fn test_empty_response() {
|
|
255
|
+
let kg = test_kg_with_facts();
|
|
256
|
+
let result = HallucinationDetector::verify(&kg, "").unwrap();
|
|
257
|
+
assert!(!result.flagged);
|
|
258
|
+
}
|
|
259
|
+
}
|