titan-synapse 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +187 -0
- package/Cargo.lock +3976 -0
- package/Cargo.toml +10 -0
- package/LICENSE +190 -0
- package/PROGRESS.md +151 -0
- package/README.md +514 -0
- package/TEST_LOG.md +220 -0
- package/config/default.yaml +36 -0
- package/crates/synapse/Cargo.toml +70 -0
- package/crates/synapse/src/cli/bench.rs +44 -0
- package/crates/synapse/src/cli/eval.rs +395 -0
- package/crates/synapse/src/cli/export.rs +45 -0
- package/crates/synapse/src/cli/hub.rs +179 -0
- package/crates/synapse/src/cli/import.rs +35 -0
- package/crates/synapse/src/cli/learn.rs +53 -0
- package/crates/synapse/src/cli/mod.rs +10 -0
- package/crates/synapse/src/cli/models.rs +36 -0
- package/crates/synapse/src/cli/pull.rs +60 -0
- package/crates/synapse/src/cli/status.rs +52 -0
- package/crates/synapse/src/cli/train.rs +99 -0
- package/crates/synapse/src/config.rs +220 -0
- package/crates/synapse/src/dashboard.rs +281 -0
- package/crates/synapse/src/format/manifest.rs +57 -0
- package/crates/synapse/src/format/mod.rs +4 -0
- package/crates/synapse/src/format/packer.rs +213 -0
- package/crates/synapse/src/inference/engine.rs +361 -0
- package/crates/synapse/src/inference/kv_cache.rs +97 -0
- package/crates/synapse/src/inference/lora.rs +166 -0
- package/crates/synapse/src/inference/mod.rs +9 -0
- package/crates/synapse/src/inference/model.rs +167 -0
- package/crates/synapse/src/inference/sampler.rs +133 -0
- package/crates/synapse/src/inference/speculative.rs +153 -0
- package/crates/synapse/src/learn/cloud_fallback.rs +186 -0
- package/crates/synapse/src/learn/engine.rs +109 -0
- package/crates/synapse/src/learn/mod.rs +5 -0
- package/crates/synapse/src/main.rs +185 -0
- package/crates/synapse/src/memory/extractor.rs +201 -0
- package/crates/synapse/src/memory/graph.rs +332 -0
- package/crates/synapse/src/memory/hallucination.rs +259 -0
- package/crates/synapse/src/memory/mod.rs +7 -0
- package/crates/synapse/src/openai.rs +232 -0
- package/crates/synapse/src/server.rs +166 -0
- package/crates/synapse/src/streaming.rs +80 -0
- package/crates/synapse/src/swarm/coordinator.rs +198 -0
- package/crates/synapse/src/swarm/mod.rs +8 -0
- package/crates/synapse/src/swarm/orchestrator.rs +225 -0
- package/crates/synapse/src/swarm/pool.rs +64 -0
- package/crates/synapse/src/swarm/spawner.rs +199 -0
- package/crates/synapse/src/swarm/synthesizer.rs +26 -0
- package/crates/synapse/src/vram/manager.rs +67 -0
- package/crates/synapse/src/vram/mod.rs +3 -0
- package/docker-compose.yml +19 -0
- package/install.sh +311 -0
- package/package.json +36 -0
- package/python/Dockerfile.learn +18 -0
- package/python/requirements.txt +11 -0
- package/python/synapse_learn/__init__.py +0 -0
- package/python/synapse_learn/datasets.py +233 -0
- package/python/synapse_learn/real_eval.py +616 -0
- package/python/synapse_learn/server.py +431 -0
- package/python/synapse_learn/train_base.py +672 -0
- package/python/synapse_learn/train_specialists.py +787 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use serde::{Deserialize, Serialize};
|
|
3
|
+
|
|
4
|
+
/// Bridge to the Python learning sidecar
|
|
5
|
+
pub struct LearningEngine {
|
|
6
|
+
sidecar_url: String,
|
|
7
|
+
enabled: bool,
|
|
8
|
+
client: reqwest::Client,
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
#[derive(Debug, Serialize)]
|
|
12
|
+
pub struct EvalRequest {
|
|
13
|
+
pub specialist: String,
|
|
14
|
+
pub prompt: String,
|
|
15
|
+
pub response: String,
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
#[derive(Debug, Deserialize)]
|
|
19
|
+
pub struct EvalResponse {
|
|
20
|
+
pub score: f32,
|
|
21
|
+
pub improved_response: Option<String>,
|
|
22
|
+
pub feedback: String,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
#[derive(Debug, Serialize)]
|
|
26
|
+
pub struct TrainRequest {
|
|
27
|
+
pub specialist: String,
|
|
28
|
+
pub base_model: String,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
#[derive(Debug, Deserialize)]
|
|
32
|
+
pub struct TrainResponse {
|
|
33
|
+
pub adapter_path: String,
|
|
34
|
+
pub loss: f32,
|
|
35
|
+
pub pairs_used: u32,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
#[derive(Debug, Deserialize)]
|
|
39
|
+
pub struct LearnStatus {
|
|
40
|
+
pub pairs_collected: u32,
|
|
41
|
+
pub training_queue: u32,
|
|
42
|
+
pub last_trained: Option<String>,
|
|
43
|
+
pub adapters_created: u32,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
impl LearningEngine {
|
|
47
|
+
pub fn new(sidecar_url: &str, enabled: bool) -> Self {
|
|
48
|
+
Self {
|
|
49
|
+
sidecar_url: sidecar_url.to_string(),
|
|
50
|
+
enabled,
|
|
51
|
+
client: reqwest::Client::new(),
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/// Submit a response for evaluation (async, non-blocking)
|
|
56
|
+
pub async fn evaluate(&self, request: EvalRequest) -> Result<EvalResponse> {
|
|
57
|
+
if !self.enabled {
|
|
58
|
+
return Ok(EvalResponse {
|
|
59
|
+
score: 5.0,
|
|
60
|
+
improved_response: None,
|
|
61
|
+
feedback: "Learning disabled".into(),
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let resp = self.client
|
|
66
|
+
.post(format!("{}/evaluate", self.sidecar_url))
|
|
67
|
+
.json(&request)
|
|
68
|
+
.send()
|
|
69
|
+
.await?
|
|
70
|
+
.json()
|
|
71
|
+
.await?;
|
|
72
|
+
|
|
73
|
+
Ok(resp)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/// Trigger training immediately
|
|
77
|
+
pub async fn train_now(&self, request: TrainRequest) -> Result<TrainResponse> {
|
|
78
|
+
let resp = self.client
|
|
79
|
+
.post(format!("{}/train", self.sidecar_url))
|
|
80
|
+
.json(&request)
|
|
81
|
+
.send()
|
|
82
|
+
.await?
|
|
83
|
+
.json()
|
|
84
|
+
.await?;
|
|
85
|
+
|
|
86
|
+
Ok(resp)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/// Get learning status
|
|
90
|
+
pub async fn status(&self) -> Result<LearnStatus> {
|
|
91
|
+
if !self.enabled {
|
|
92
|
+
return Ok(LearnStatus {
|
|
93
|
+
pairs_collected: 0,
|
|
94
|
+
training_queue: 0,
|
|
95
|
+
last_trained: None,
|
|
96
|
+
adapters_created: 0,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
let resp = self.client
|
|
101
|
+
.get(format!("{}/status", self.sidecar_url))
|
|
102
|
+
.send()
|
|
103
|
+
.await?
|
|
104
|
+
.json()
|
|
105
|
+
.await?;
|
|
106
|
+
|
|
107
|
+
Ok(resp)
|
|
108
|
+
}
|
|
109
|
+
}
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
mod cli;
|
|
2
|
+
mod config;
|
|
3
|
+
mod dashboard;
|
|
4
|
+
mod inference;
|
|
5
|
+
mod server;
|
|
6
|
+
mod openai;
|
|
7
|
+
mod streaming;
|
|
8
|
+
mod swarm;
|
|
9
|
+
mod learn;
|
|
10
|
+
mod memory;
|
|
11
|
+
mod vram;
|
|
12
|
+
mod format;
|
|
13
|
+
|
|
14
|
+
use clap::{Parser, Subcommand};
|
|
15
|
+
use anyhow::Result;
|
|
16
|
+
|
|
17
|
+
#[derive(Parser)]
|
|
18
|
+
#[command(name = "synapse")]
|
|
19
|
+
#[command(about = "TITAN Synapse — Small models that think together. And learn.")]
|
|
20
|
+
#[command(version)]
|
|
21
|
+
struct Cli {
|
|
22
|
+
#[command(subcommand)]
|
|
23
|
+
command: Commands,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
#[derive(Subcommand)]
|
|
27
|
+
enum Commands {
|
|
28
|
+
/// Start the Synapse inference server
|
|
29
|
+
Serve {
|
|
30
|
+
/// Port to listen on
|
|
31
|
+
#[arg(short, long, default_value = "6900")]
|
|
32
|
+
port: u16,
|
|
33
|
+
/// Config file path
|
|
34
|
+
#[arg(short, long)]
|
|
35
|
+
config: Option<String>,
|
|
36
|
+
},
|
|
37
|
+
/// Show system status (GPU, loaded models, VRAM)
|
|
38
|
+
Status,
|
|
39
|
+
/// List available models
|
|
40
|
+
Models,
|
|
41
|
+
/// Pull a model from HuggingFace
|
|
42
|
+
Pull {
|
|
43
|
+
/// Model name (e.g., qwen3-3b, qwen3-0.6b)
|
|
44
|
+
model: String,
|
|
45
|
+
},
|
|
46
|
+
/// Export a specialist as .synapse file
|
|
47
|
+
Export {
|
|
48
|
+
/// Specialist name
|
|
49
|
+
name: String,
|
|
50
|
+
/// Output path
|
|
51
|
+
#[arg(short, long)]
|
|
52
|
+
output: Option<String>,
|
|
53
|
+
},
|
|
54
|
+
/// Import a .synapse specialist file
|
|
55
|
+
Import {
|
|
56
|
+
/// Path to .synapse file
|
|
57
|
+
path: String,
|
|
58
|
+
},
|
|
59
|
+
/// Show learning engine status
|
|
60
|
+
Learn {
|
|
61
|
+
#[command(subcommand)]
|
|
62
|
+
command: LearnCommands,
|
|
63
|
+
},
|
|
64
|
+
/// Run inference benchmarks
|
|
65
|
+
Bench {
|
|
66
|
+
/// Model to benchmark
|
|
67
|
+
#[arg(short, long)]
|
|
68
|
+
model: Option<String>,
|
|
69
|
+
},
|
|
70
|
+
/// Run standardized evaluation (MMLU, HumanEval, MT-Bench, Safety)
|
|
71
|
+
Eval,
|
|
72
|
+
/// Community Specialist Hub — share and discover trained specialists
|
|
73
|
+
Hub {
|
|
74
|
+
#[command(subcommand)]
|
|
75
|
+
command: HubCommands,
|
|
76
|
+
},
|
|
77
|
+
/// Train our own Synapse model from scratch (SFT + DPO + GGUF export)
|
|
78
|
+
Train {
|
|
79
|
+
/// Training stage: full, sft, dpo, export
|
|
80
|
+
#[arg(short, long, default_value = "full")]
|
|
81
|
+
stage: String,
|
|
82
|
+
/// Base model architecture (Apache 2.0 licensed, we fine-tune into OUR model)
|
|
83
|
+
#[arg(short, long, default_value = "Qwen/Qwen2.5-3B")]
|
|
84
|
+
base_model: String,
|
|
85
|
+
/// Output model name
|
|
86
|
+
#[arg(short, long, default_value = "synapse-3b")]
|
|
87
|
+
output: String,
|
|
88
|
+
},
|
|
89
|
+
/// Start the server (alias for serve)
|
|
90
|
+
Up {
|
|
91
|
+
/// Port to listen on
|
|
92
|
+
#[arg(short, long, default_value = "6900")]
|
|
93
|
+
port: u16,
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
#[derive(Subcommand)]
|
|
98
|
+
enum LearnCommands {
|
|
99
|
+
/// Show learning status
|
|
100
|
+
Status,
|
|
101
|
+
/// Force training now
|
|
102
|
+
TrainNow,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
#[derive(Subcommand)]
|
|
106
|
+
enum HubCommands {
|
|
107
|
+
/// Search for community specialists
|
|
108
|
+
Search {
|
|
109
|
+
/// Search query
|
|
110
|
+
query: String,
|
|
111
|
+
},
|
|
112
|
+
/// Install a specialist from HuggingFace
|
|
113
|
+
Install {
|
|
114
|
+
/// HuggingFace repo (e.g., user/synapse-python-expert)
|
|
115
|
+
repo: String,
|
|
116
|
+
},
|
|
117
|
+
/// Push your trained specialist to HuggingFace
|
|
118
|
+
Push {
|
|
119
|
+
/// Specialist name to push
|
|
120
|
+
name: String,
|
|
121
|
+
},
|
|
122
|
+
/// List hub info and commands
|
|
123
|
+
List,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#[tokio::main]
|
|
127
|
+
async fn main() -> Result<()> {
|
|
128
|
+
tracing_subscriber::fmt()
|
|
129
|
+
.with_env_filter(
|
|
130
|
+
tracing_subscriber::EnvFilter::try_from_default_env()
|
|
131
|
+
.unwrap_or_else(|_| "synapse=info".into()),
|
|
132
|
+
)
|
|
133
|
+
.init();
|
|
134
|
+
|
|
135
|
+
let cli = Cli::parse();
|
|
136
|
+
let cfg = config::SynapseConfig::load(None)?;
|
|
137
|
+
|
|
138
|
+
match cli.command {
|
|
139
|
+
Commands::Serve { port, config: config_path } => {
|
|
140
|
+
let cfg = if let Some(path) = config_path {
|
|
141
|
+
config::SynapseConfig::load(Some(&path))?
|
|
142
|
+
} else {
|
|
143
|
+
cfg
|
|
144
|
+
};
|
|
145
|
+
server::run(cfg, port).await
|
|
146
|
+
}
|
|
147
|
+
Commands::Up { port } => {
|
|
148
|
+
server::run(cfg, port).await
|
|
149
|
+
}
|
|
150
|
+
Commands::Status => {
|
|
151
|
+
cli::status::run(&cfg).await
|
|
152
|
+
}
|
|
153
|
+
Commands::Models => {
|
|
154
|
+
cli::models::run(&cfg).await
|
|
155
|
+
}
|
|
156
|
+
Commands::Pull { model } => {
|
|
157
|
+
cli::pull::run(&cfg, &model).await
|
|
158
|
+
}
|
|
159
|
+
Commands::Export { name, output } => {
|
|
160
|
+
cli::export::run(&cfg, &name, output.as_deref()).await
|
|
161
|
+
}
|
|
162
|
+
Commands::Import { path } => {
|
|
163
|
+
cli::import::run(&cfg, &path).await
|
|
164
|
+
}
|
|
165
|
+
Commands::Learn { command } => match command {
|
|
166
|
+
LearnCommands::Status => cli::learn::status(&cfg).await,
|
|
167
|
+
LearnCommands::TrainNow => cli::learn::train_now(&cfg).await,
|
|
168
|
+
},
|
|
169
|
+
Commands::Bench { model } => {
|
|
170
|
+
cli::bench::run(&cfg, model.as_deref()).await
|
|
171
|
+
}
|
|
172
|
+
Commands::Eval => {
|
|
173
|
+
cli::eval::run(&cfg).await
|
|
174
|
+
}
|
|
175
|
+
Commands::Train { stage, base_model, output } => {
|
|
176
|
+
cli::train::run(&cfg, &stage, &base_model, &output).await
|
|
177
|
+
}
|
|
178
|
+
Commands::Hub { command } => match command {
|
|
179
|
+
HubCommands::Search { query } => cli::hub::search(&query).await,
|
|
180
|
+
HubCommands::Install { repo } => cli::hub::install(&cfg, &repo).await,
|
|
181
|
+
HubCommands::Push { name } => cli::hub::push(&cfg, &name).await,
|
|
182
|
+
HubCommands::List => cli::hub::list().await,
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use super::graph::KnowledgeGraph;
|
|
3
|
+
|
|
4
|
+
/// Extracts structured knowledge from conversations in real-time.
|
|
5
|
+
/// This is what makes "learns from every conversation" real — not just logging,
|
|
6
|
+
/// but actually building a queryable knowledge graph from natural language.
|
|
7
|
+
pub struct KnowledgeExtractor;
|
|
8
|
+
|
|
9
|
+
impl KnowledgeExtractor {
|
|
10
|
+
/// Extract facts from a conversation message and store in the knowledge graph.
|
|
11
|
+
/// Uses pattern matching for common fact patterns:
|
|
12
|
+
/// - "X is Y" → (X, is_a, Y)
|
|
13
|
+
/// - "X was created by Y" → (X, created_by, Y)
|
|
14
|
+
/// - "X uses Y" → (X, uses, Y)
|
|
15
|
+
/// - "X runs on Y" → (X, runs_on, Y)
|
|
16
|
+
/// - "X supports Y" → (X, supports, Y)
|
|
17
|
+
pub fn extract_and_store(kg: &KnowledgeGraph, text: &str, source: &str) -> Result<u32> {
|
|
18
|
+
let mut facts_added = 0;
|
|
19
|
+
let sentences: Vec<&str> = text.split(['.', '!', '\n'])
|
|
20
|
+
.map(|s| s.trim())
|
|
21
|
+
.filter(|s| s.len() > 5 && s.len() < 500)
|
|
22
|
+
.collect();
|
|
23
|
+
|
|
24
|
+
for sentence in &sentences {
|
|
25
|
+
let lower = sentence.to_lowercase();
|
|
26
|
+
|
|
27
|
+
// Pattern: "X is a/an Y"
|
|
28
|
+
if let Some(fact) = extract_is_pattern(&lower, sentence) {
|
|
29
|
+
kg.add_fact(&fact.0, &fact.1, &fact.2, Some(source))?;
|
|
30
|
+
facts_added += 1;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Pattern: "X uses/runs/supports Y"
|
|
34
|
+
for (keyword, predicate) in &[
|
|
35
|
+
("uses", "uses"),
|
|
36
|
+
("runs on", "runs_on"),
|
|
37
|
+
("supports", "supports"),
|
|
38
|
+
("requires", "requires"),
|
|
39
|
+
("depends on", "depends_on"),
|
|
40
|
+
("created by", "created_by"),
|
|
41
|
+
("built with", "built_with"),
|
|
42
|
+
("written in", "written_in"),
|
|
43
|
+
] {
|
|
44
|
+
if let Some(fact) = extract_verb_pattern(&lower, sentence, keyword, predicate) {
|
|
45
|
+
kg.add_fact(&fact.0, &fact.1, &fact.2, Some(source))?;
|
|
46
|
+
facts_added += 1;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if facts_added > 0 {
|
|
52
|
+
tracing::debug!("Extracted {facts_added} facts from conversation");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
Ok(facts_added)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/// Extract user preferences from conversation patterns
|
|
59
|
+
pub fn extract_preferences(kg: &KnowledgeGraph, user_msg: &str, assistant_msg: &str, specialist: &str) -> Result<()> {
|
|
60
|
+
// If user says "good", "thanks", "correct", "exactly" — positive signal
|
|
61
|
+
let positive_signals = ["good", "thanks", "correct", "exactly", "perfect", "great", "nice"];
|
|
62
|
+
let negative_signals = ["wrong", "incorrect", "no,", "that's not", "actually,", "nope"];
|
|
63
|
+
|
|
64
|
+
let user_lower = user_msg.to_lowercase();
|
|
65
|
+
|
|
66
|
+
let is_positive = positive_signals.iter().any(|s| user_lower.contains(s));
|
|
67
|
+
let is_negative = negative_signals.iter().any(|s| user_lower.contains(s));
|
|
68
|
+
|
|
69
|
+
if is_positive && !is_negative {
|
|
70
|
+
// This was a good response — could be used as chosen in DPO
|
|
71
|
+
tracing::debug!("Positive feedback detected for {specialist}");
|
|
72
|
+
} else if is_negative && !is_positive {
|
|
73
|
+
// This was a bad response — store for improvement
|
|
74
|
+
// Store as a preference pair with placeholder improved response
|
|
75
|
+
kg.add_preference(specialist, user_msg, "(needs improvement)", assistant_msg)?;
|
|
76
|
+
tracing::debug!("Negative feedback detected for {specialist} — stored preference pair");
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
Ok(())
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/// Extract "X is a/an Y" patterns
|
|
84
|
+
fn extract_is_pattern(lower: &str, _original: &str) -> Option<(String, String, String)> {
|
|
85
|
+
// Look for "X is a Y" or "X is an Y"
|
|
86
|
+
let patterns = [" is a ", " is an ", " is the "];
|
|
87
|
+
for pattern in patterns {
|
|
88
|
+
if let Some(pos) = lower.find(pattern) {
|
|
89
|
+
let subject = lower[..pos].trim();
|
|
90
|
+
let object = lower[pos + pattern.len()..].trim();
|
|
91
|
+
|
|
92
|
+
// Only extract if both parts are reasonable length
|
|
93
|
+
if subject.len() > 1 && subject.len() < 100 && object.len() > 1 && object.len() < 200 {
|
|
94
|
+
// Clean up subject (take last noun phrase)
|
|
95
|
+
let subject = subject.split_whitespace().collect::<Vec<_>>();
|
|
96
|
+
let subject = if subject.len() > 3 {
|
|
97
|
+
subject[subject.len()-3..].join(" ")
|
|
98
|
+
} else {
|
|
99
|
+
subject.join(" ")
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
return Some((
|
|
103
|
+
capitalize(&subject),
|
|
104
|
+
"is_a".to_string(),
|
|
105
|
+
object.to_string(),
|
|
106
|
+
));
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
None
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/// Extract "X verb Y" patterns
|
|
114
|
+
fn extract_verb_pattern(lower: &str, _original: &str, keyword: &str, predicate: &str) -> Option<(String, String, String)> {
|
|
115
|
+
if let Some(pos) = lower.find(keyword) {
|
|
116
|
+
let subject = lower[..pos].trim();
|
|
117
|
+
let object = lower[pos + keyword.len()..].trim();
|
|
118
|
+
|
|
119
|
+
if subject.len() > 1 && subject.len() < 100 && object.len() > 1 && object.len() < 200 {
|
|
120
|
+
let subject = subject.split_whitespace().collect::<Vec<_>>();
|
|
121
|
+
let subject = if subject.len() > 3 {
|
|
122
|
+
subject[subject.len()-3..].join(" ")
|
|
123
|
+
} else {
|
|
124
|
+
subject.join(" ")
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
return Some((
|
|
128
|
+
capitalize(&subject),
|
|
129
|
+
predicate.to_string(),
|
|
130
|
+
object.to_string(),
|
|
131
|
+
));
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
None
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
fn capitalize(s: &str) -> String {
|
|
138
|
+
let mut chars = s.chars();
|
|
139
|
+
match chars.next() {
|
|
140
|
+
None => String::new(),
|
|
141
|
+
Some(c) => c.to_uppercase().collect::<String>() + chars.as_str(),
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
#[cfg(test)]
|
|
146
|
+
mod tests {
|
|
147
|
+
use super::*;
|
|
148
|
+
use tempfile::tempdir;
|
|
149
|
+
use std::path::Path;
|
|
150
|
+
|
|
151
|
+
fn test_kg() -> KnowledgeGraph {
|
|
152
|
+
let tmp = tempdir().unwrap();
|
|
153
|
+
KnowledgeGraph::new(&tmp.path().join("test.db")).unwrap()
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
#[test]
|
|
157
|
+
fn test_extract_is_pattern() {
|
|
158
|
+
let kg = test_kg();
|
|
159
|
+
let text = "Python is a programming language. It was designed by Guido van Rossum.";
|
|
160
|
+
let count = KnowledgeExtractor::extract_and_store(&kg, text, "test").unwrap();
|
|
161
|
+
assert!(count >= 1, "Should extract at least 1 fact, got {count}");
|
|
162
|
+
|
|
163
|
+
let facts = kg.query_facts("Python").unwrap();
|
|
164
|
+
assert!(!facts.is_empty(), "Should have facts about Python");
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
#[test]
|
|
168
|
+
fn test_extract_verb_patterns() {
|
|
169
|
+
let kg = test_kg();
|
|
170
|
+
let text = "TITAN Synapse uses Rust for the inference engine. The project runs on CUDA GPUs.";
|
|
171
|
+
let count = KnowledgeExtractor::extract_and_store(&kg, text, "test").unwrap();
|
|
172
|
+
assert!(count >= 1, "Should extract at least 1 fact, got {count}");
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
#[test]
|
|
176
|
+
fn test_extract_preferences_positive() {
|
|
177
|
+
let kg = test_kg();
|
|
178
|
+
KnowledgeExtractor::extract_preferences(
|
|
179
|
+
&kg, "Thanks, that's correct!", "Python is dynamically typed.", "python_expert"
|
|
180
|
+
).unwrap();
|
|
181
|
+
// Positive feedback shouldn't create a preference pair
|
|
182
|
+
assert_eq!(kg.preference_count("python_expert").unwrap(), 0);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
#[test]
|
|
186
|
+
fn test_extract_preferences_negative() {
|
|
187
|
+
let kg = test_kg();
|
|
188
|
+
KnowledgeExtractor::extract_preferences(
|
|
189
|
+
&kg, "No, that's not right at all", "Python is statically typed.", "python_expert"
|
|
190
|
+
).unwrap();
|
|
191
|
+
// Negative feedback should create a preference pair
|
|
192
|
+
assert_eq!(kg.preference_count("python_expert").unwrap(), 1);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
#[test]
|
|
196
|
+
fn test_empty_text() {
|
|
197
|
+
let kg = test_kg();
|
|
198
|
+
let count = KnowledgeExtractor::extract_and_store(&kg, "", "test").unwrap();
|
|
199
|
+
assert_eq!(count, 0);
|
|
200
|
+
}
|
|
201
|
+
}
|