@sesamespace/hivemind 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/PLANNING.md +383 -0
- package/TASKS.md +60 -0
- package/install.sh +187 -0
- package/npm-package.json +28 -0
- package/package.json +13 -20
- package/packages/cli/package.json +23 -0
- package/{dist/chunk-DVR2KBL7.js → packages/cli/src/commands/fleet.ts} +50 -30
- package/packages/cli/src/commands/init.ts +230 -0
- package/{dist/chunk-MBS5A6BZ.js → packages/cli/src/commands/service.ts} +51 -42
- package/{dist/chunk-RNK5Q5GR.js → packages/cli/src/commands/start.ts} +12 -14
- package/{dist/main.js → packages/cli/src/main.ts} +12 -18
- package/packages/cli/tsconfig.json +8 -0
- package/packages/memory/Cargo.lock +6480 -0
- package/packages/memory/Cargo.toml +21 -0
- package/packages/memory/src/context.rs +179 -0
- package/packages/memory/src/embeddings.rs +51 -0
- package/packages/memory/src/main.rs +626 -0
- package/packages/memory/src/promotion.rs +637 -0
- package/packages/memory/src/scoring.rs +131 -0
- package/packages/memory/src/store.rs +460 -0
- package/packages/memory/src/tasks.rs +321 -0
- package/packages/runtime/package.json +24 -0
- package/packages/runtime/src/__tests__/fleet-integration.test.ts +235 -0
- package/packages/runtime/src/__tests__/fleet.test.ts +207 -0
- package/packages/runtime/src/__tests__/integration.test.ts +434 -0
- package/packages/runtime/src/agent.ts +255 -0
- package/packages/runtime/src/config.ts +130 -0
- package/packages/runtime/src/context.ts +192 -0
- package/packages/runtime/src/fleet/fleet-manager.ts +399 -0
- package/packages/runtime/src/fleet/memory-sync.ts +362 -0
- package/packages/runtime/src/fleet/primary-client.ts +285 -0
- package/packages/runtime/src/fleet/worker-protocol.ts +158 -0
- package/packages/runtime/src/fleet/worker-server.ts +246 -0
- package/packages/runtime/src/index.ts +57 -0
- package/packages/runtime/src/llm-client.ts +65 -0
- package/packages/runtime/src/memory-client.ts +309 -0
- package/packages/runtime/src/pipeline.ts +151 -0
- package/packages/runtime/src/prompt.ts +173 -0
- package/packages/runtime/src/sesame.ts +174 -0
- package/{dist/start.js → packages/runtime/src/start.ts} +7 -9
- package/packages/runtime/src/task-engine.ts +113 -0
- package/packages/runtime/src/worker.ts +339 -0
- package/packages/runtime/tsconfig.json +8 -0
- package/pnpm-workspace.yaml +2 -0
- package/run-aidan.sh +23 -0
- package/scripts/bootstrap.sh +196 -0
- package/scripts/build-npm.sh +94 -0
- package/scripts/com.hivemind.agent.plist +44 -0
- package/scripts/com.hivemind.memory.plist +31 -0
- package/tsconfig.json +22 -0
- package/tsup.config.ts +28 -0
- package/dist/chunk-2I2O6X5D.js +0 -1408
- package/dist/chunk-2I2O6X5D.js.map +0 -1
- package/dist/chunk-DVR2KBL7.js.map +0 -1
- package/dist/chunk-MBS5A6BZ.js.map +0 -1
- package/dist/chunk-NVJ424TB.js +0 -731
- package/dist/chunk-NVJ424TB.js.map +0 -1
- package/dist/chunk-RNK5Q5GR.js.map +0 -1
- package/dist/chunk-XNOWVLXD.js +0 -160
- package/dist/chunk-XNOWVLXD.js.map +0 -1
- package/dist/commands/fleet.js +0 -9
- package/dist/commands/fleet.js.map +0 -1
- package/dist/commands/init.js +0 -7
- package/dist/commands/init.js.map +0 -1
- package/dist/commands/service.js +0 -7
- package/dist/commands/service.js.map +0 -1
- package/dist/commands/start.js +0 -9
- package/dist/commands/start.js.map +0 -1
- package/dist/index.js +0 -41
- package/dist/index.js.map +0 -1
- package/dist/main.js.map +0 -1
- package/dist/start.js.map +0 -1
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
use chrono::{DateTime, Utc};
|
|
2
|
+
use std::collections::HashMap;
|
|
3
|
+
use std::sync::RwLock;
|
|
4
|
+
|
|
5
|
+
/// Default half-life in hours for recency decay
|
|
6
|
+
const DEFAULT_HALF_LIFE_HOURS: f64 = 48.0;
|
|
7
|
+
|
|
8
|
+
/// Per-context scoring configuration
|
|
9
|
+
pub struct ScoringConfig {
|
|
10
|
+
/// Half-life per context (in hours). Contexts not in this map use the default.
|
|
11
|
+
context_half_lives: RwLock<HashMap<String, f64>>,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
impl ScoringConfig {
|
|
15
|
+
pub fn new() -> Self {
|
|
16
|
+
Self {
|
|
17
|
+
context_half_lives: RwLock::new(HashMap::new()),
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
pub fn set_half_life(&self, context: &str, hours: f64) {
|
|
22
|
+
let mut map = self.context_half_lives.write().unwrap();
|
|
23
|
+
map.insert(context.to_string(), hours);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
pub fn get_half_life(&self, context: &str) -> f64 {
|
|
27
|
+
let map = self.context_half_lives.read().unwrap();
|
|
28
|
+
map.get(context).copied().unwrap_or(DEFAULT_HALF_LIFE_HOURS)
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/// Compute a recency weight for an episode based on its timestamp.
|
|
33
|
+
/// Uses exponential decay: weight = e^(-lambda * hours_ago)
|
|
34
|
+
/// Half-life of ~48 hours means recent episodes are strongly preferred
|
|
35
|
+
/// but older ones still surface if semantically relevant.
|
|
36
|
+
pub fn recency_weight(timestamp: &str) -> f64 {
|
|
37
|
+
recency_weight_with_half_life(timestamp, DEFAULT_HALF_LIFE_HOURS)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/// Recency weight with a custom half-life (per-context independent decay)
|
|
41
|
+
pub fn recency_weight_with_half_life(timestamp: &str, half_life_hours: f64) -> f64 {
|
|
42
|
+
let lambda = (2.0_f64).ln() / half_life_hours;
|
|
43
|
+
|
|
44
|
+
let parsed = DateTime::parse_from_rfc3339(timestamp)
|
|
45
|
+
.map(|dt| dt.with_timezone(&Utc))
|
|
46
|
+
.unwrap_or_else(|_| Utc::now());
|
|
47
|
+
|
|
48
|
+
let hours_ago = (Utc::now() - parsed).num_seconds() as f64 / 3600.0;
|
|
49
|
+
let hours_ago = hours_ago.max(0.0);
|
|
50
|
+
|
|
51
|
+
(-lambda * hours_ago).exp()
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/// Combined relevance score: recency * semantic similarity
|
|
55
|
+
/// Distance is the vector distance (lower = more similar)
|
|
56
|
+
pub fn combined_score(timestamp: &str, distance: f32, half_life_hours: f64) -> f64 {
|
|
57
|
+
let recency = recency_weight_with_half_life(timestamp, half_life_hours);
|
|
58
|
+
recency * (1.0 / (1.0 + distance as f64))
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
#[cfg(test)]
|
|
62
|
+
mod tests {
|
|
63
|
+
use super::*;
|
|
64
|
+
use chrono::Utc;
|
|
65
|
+
|
|
66
|
+
#[test]
|
|
67
|
+
fn recent_episode_has_high_weight() {
|
|
68
|
+
let now = Utc::now().to_rfc3339();
|
|
69
|
+
let w = recency_weight(&now);
|
|
70
|
+
assert!(w > 0.99, "Weight for now should be ~1.0, got {}", w);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
#[test]
|
|
74
|
+
fn old_episode_has_lower_weight() {
|
|
75
|
+
let two_days_ago = (Utc::now() - chrono::Duration::hours(48)).to_rfc3339();
|
|
76
|
+
let w = recency_weight(&two_days_ago);
|
|
77
|
+
assert!(
|
|
78
|
+
(w - 0.5).abs() < 0.05,
|
|
79
|
+
"Weight at half-life should be ~0.5, got {}",
|
|
80
|
+
w
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
#[test]
|
|
85
|
+
fn custom_half_life_works() {
|
|
86
|
+
let one_hour_ago = (Utc::now() - chrono::Duration::hours(1)).to_rfc3339();
|
|
87
|
+
|
|
88
|
+
// With 1-hour half-life, should be ~0.5
|
|
89
|
+
let w = recency_weight_with_half_life(&one_hour_ago, 1.0);
|
|
90
|
+
assert!(
|
|
91
|
+
(w - 0.5).abs() < 0.05,
|
|
92
|
+
"Weight at 1h with 1h half-life should be ~0.5, got {}",
|
|
93
|
+
w
|
|
94
|
+
);
|
|
95
|
+
|
|
96
|
+
// With 100-hour half-life, should still be close to 1.0
|
|
97
|
+
let w2 = recency_weight_with_half_life(&one_hour_ago, 100.0);
|
|
98
|
+
assert!(
|
|
99
|
+
w2 > 0.99,
|
|
100
|
+
"Weight at 1h with 100h half-life should be ~1.0, got {}",
|
|
101
|
+
w2
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
#[test]
|
|
106
|
+
fn scoring_config_per_context() {
|
|
107
|
+
let config = ScoringConfig::new();
|
|
108
|
+
|
|
109
|
+
// Default
|
|
110
|
+
assert_eq!(config.get_half_life("unknown"), DEFAULT_HALF_LIFE_HOURS);
|
|
111
|
+
|
|
112
|
+
// Set custom
|
|
113
|
+
config.set_half_life("project-a", 24.0);
|
|
114
|
+
assert_eq!(config.get_half_life("project-a"), 24.0);
|
|
115
|
+
assert_eq!(config.get_half_life("project-b"), DEFAULT_HALF_LIFE_HOURS);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
#[test]
|
|
119
|
+
fn combined_score_works() {
|
|
120
|
+
let now = Utc::now().to_rfc3339();
|
|
121
|
+
let score = combined_score(&now, 0.0, DEFAULT_HALF_LIFE_HOURS);
|
|
122
|
+
assert!(score > 0.99, "Perfect match + recent should score ~1.0, got {}", score);
|
|
123
|
+
|
|
124
|
+
let score2 = combined_score(&now, 1.0, DEFAULT_HALF_LIFE_HOURS);
|
|
125
|
+
assert!(
|
|
126
|
+
(score2 - 0.5).abs() < 0.05,
|
|
127
|
+
"Perfect recency + distance 1.0 should score ~0.5, got {}",
|
|
128
|
+
score2
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use arrow_array::{
|
|
3
|
+
types::Float32Type, Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator,
|
|
4
|
+
StringArray, UInt64Array,
|
|
5
|
+
};
|
|
6
|
+
use arrow_schema::{DataType, Field, Schema};
|
|
7
|
+
use chrono::Utc;
|
|
8
|
+
use futures::stream::TryStreamExt;
|
|
9
|
+
use lancedb::{connect, connection::Connection, query::ExecutableQuery, query::QueryBase, Table};
|
|
10
|
+
use serde::{Deserialize, Serialize};
|
|
11
|
+
use std::sync::Arc;
|
|
12
|
+
|
|
13
|
+
const EMBEDDING_DIM: i32 = 768;
|
|
14
|
+
const TABLE_NAME: &str = "episodes";
|
|
15
|
+
|
|
16
|
+
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
17
|
+
pub struct Episode {
|
|
18
|
+
pub id: String,
|
|
19
|
+
pub timestamp: String,
|
|
20
|
+
pub context_name: String,
|
|
21
|
+
pub role: String,
|
|
22
|
+
pub content: String,
|
|
23
|
+
#[serde(default)]
|
|
24
|
+
pub layer: String,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
#[derive(Debug, Deserialize)]
|
|
28
|
+
pub struct EpisodeInput {
|
|
29
|
+
pub context_name: Option<String>,
|
|
30
|
+
pub role: String,
|
|
31
|
+
pub content: String,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
pub struct MemoryStore {
|
|
35
|
+
db: Connection,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
impl MemoryStore {
|
|
39
|
+
pub async fn new(path: &str) -> Result<Self> {
|
|
40
|
+
std::fs::create_dir_all(path)?;
|
|
41
|
+
let db = connect(path).execute().await?;
|
|
42
|
+
let store = Self { db };
|
|
43
|
+
store.ensure_table().await?;
|
|
44
|
+
Ok(store)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
pub fn get_connection(&self) -> Connection {
|
|
48
|
+
self.db.clone()
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
fn schema() -> Arc<Schema> {
|
|
52
|
+
Arc::new(Schema::new(vec![
|
|
53
|
+
Field::new("id", DataType::Utf8, false),
|
|
54
|
+
Field::new("timestamp", DataType::Utf8, false),
|
|
55
|
+
Field::new("context_name", DataType::Utf8, false),
|
|
56
|
+
Field::new("role", DataType::Utf8, false),
|
|
57
|
+
Field::new("content", DataType::Utf8, false),
|
|
58
|
+
Field::new("layer", DataType::Utf8, false),
|
|
59
|
+
Field::new(
|
|
60
|
+
"embedding",
|
|
61
|
+
DataType::FixedSizeList(
|
|
62
|
+
Arc::new(Field::new("item", DataType::Float32, true)),
|
|
63
|
+
EMBEDDING_DIM,
|
|
64
|
+
),
|
|
65
|
+
false,
|
|
66
|
+
),
|
|
67
|
+
]))
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async fn ensure_table(&self) -> Result<()> {
|
|
71
|
+
let names = self.db.table_names().execute().await?;
|
|
72
|
+
if !names.contains(&TABLE_NAME.to_string()) {
|
|
73
|
+
let schema = Self::schema();
|
|
74
|
+
let batch = RecordBatch::new_empty(schema.clone());
|
|
75
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
76
|
+
self.db
|
|
77
|
+
.create_table(TABLE_NAME, Box::new(batches))
|
|
78
|
+
.execute()
|
|
79
|
+
.await?;
|
|
80
|
+
tracing::info!("Created episodes table");
|
|
81
|
+
}
|
|
82
|
+
Ok(())
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
pub async fn insert_episode(
|
|
86
|
+
&self,
|
|
87
|
+
input: EpisodeInput,
|
|
88
|
+
embedding: Vec<f32>,
|
|
89
|
+
) -> Result<Episode> {
|
|
90
|
+
let id = uuid::Uuid::new_v4().to_string();
|
|
91
|
+
let timestamp = Utc::now().to_rfc3339();
|
|
92
|
+
let context_name = input.context_name.unwrap_or_else(|| "global".to_string());
|
|
93
|
+
let layer = "L2".to_string();
|
|
94
|
+
|
|
95
|
+
let episode = Episode {
|
|
96
|
+
id: id.clone(),
|
|
97
|
+
timestamp: timestamp.clone(),
|
|
98
|
+
context_name: context_name.clone(),
|
|
99
|
+
role: input.role.clone(),
|
|
100
|
+
content: input.content.clone(),
|
|
101
|
+
layer: layer.clone(),
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
let schema = Self::schema();
|
|
105
|
+
let embedding_array = FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
|
106
|
+
vec![Some(embedding.into_iter().map(Some).collect::<Vec<_>>())],
|
|
107
|
+
EMBEDDING_DIM,
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
let batch = RecordBatch::try_new(
|
|
111
|
+
schema.clone(),
|
|
112
|
+
vec![
|
|
113
|
+
Arc::new(StringArray::from(vec![id.as_str()])),
|
|
114
|
+
Arc::new(StringArray::from(vec![timestamp.as_str()])),
|
|
115
|
+
Arc::new(StringArray::from(vec![context_name.as_str()])),
|
|
116
|
+
Arc::new(StringArray::from(vec![input.role.as_str()])),
|
|
117
|
+
Arc::new(StringArray::from(vec![input.content.as_str()])),
|
|
118
|
+
Arc::new(StringArray::from(vec![layer.as_str()])),
|
|
119
|
+
Arc::new(embedding_array) as Arc<dyn Array>,
|
|
120
|
+
],
|
|
121
|
+
)?;
|
|
122
|
+
|
|
123
|
+
let table = self.db.open_table(TABLE_NAME).execute().await?;
|
|
124
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
125
|
+
table.add(Box::new(batches)).execute().await?;
|
|
126
|
+
|
|
127
|
+
tracing::debug!(
|
|
128
|
+
"Stored episode {} in context {}",
|
|
129
|
+
episode.id,
|
|
130
|
+
episode.context_name
|
|
131
|
+
);
|
|
132
|
+
Ok(episode)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
pub async fn search(
|
|
136
|
+
&self,
|
|
137
|
+
query_embedding: Vec<f32>,
|
|
138
|
+
context: &str,
|
|
139
|
+
limit: usize,
|
|
140
|
+
) -> Result<Vec<(Episode, f32)>> {
|
|
141
|
+
let table = self.db.open_table(TABLE_NAME).execute().await?;
|
|
142
|
+
|
|
143
|
+
let results = table
|
|
144
|
+
.vector_search(query_embedding)?
|
|
145
|
+
.limit(limit * 3) // over-fetch then filter by context
|
|
146
|
+
.execute()
|
|
147
|
+
.await?;
|
|
148
|
+
|
|
149
|
+
let mut episodes = Vec::new();
|
|
150
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
151
|
+
|
|
152
|
+
for batch in &batches {
|
|
153
|
+
let ids = batch
|
|
154
|
+
.column_by_name("id")
|
|
155
|
+
.unwrap()
|
|
156
|
+
.as_any()
|
|
157
|
+
.downcast_ref::<StringArray>()
|
|
158
|
+
.unwrap();
|
|
159
|
+
let timestamps = batch
|
|
160
|
+
.column_by_name("timestamp")
|
|
161
|
+
.unwrap()
|
|
162
|
+
.as_any()
|
|
163
|
+
.downcast_ref::<StringArray>()
|
|
164
|
+
.unwrap();
|
|
165
|
+
let contexts = batch
|
|
166
|
+
.column_by_name("context_name")
|
|
167
|
+
.unwrap()
|
|
168
|
+
.as_any()
|
|
169
|
+
.downcast_ref::<StringArray>()
|
|
170
|
+
.unwrap();
|
|
171
|
+
let roles = batch
|
|
172
|
+
.column_by_name("role")
|
|
173
|
+
.unwrap()
|
|
174
|
+
.as_any()
|
|
175
|
+
.downcast_ref::<StringArray>()
|
|
176
|
+
.unwrap();
|
|
177
|
+
let contents = batch
|
|
178
|
+
.column_by_name("content")
|
|
179
|
+
.unwrap()
|
|
180
|
+
.as_any()
|
|
181
|
+
.downcast_ref::<StringArray>()
|
|
182
|
+
.unwrap();
|
|
183
|
+
let layers = batch.column_by_name("layer");
|
|
184
|
+
let distances = batch
|
|
185
|
+
.column_by_name("_distance")
|
|
186
|
+
.unwrap()
|
|
187
|
+
.as_any()
|
|
188
|
+
.downcast_ref::<Float32Array>()
|
|
189
|
+
.unwrap();
|
|
190
|
+
|
|
191
|
+
for i in 0..batch.num_rows() {
|
|
192
|
+
let ctx = contexts.value(i);
|
|
193
|
+
if ctx != context && ctx != "global" {
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
let layer = layers
|
|
198
|
+
.and_then(|l| l.as_any().downcast_ref::<StringArray>())
|
|
199
|
+
.map(|l| l.value(i).to_string())
|
|
200
|
+
.unwrap_or_else(|| "L2".to_string());
|
|
201
|
+
|
|
202
|
+
let episode = Episode {
|
|
203
|
+
id: ids.value(i).to_string(),
|
|
204
|
+
timestamp: timestamps.value(i).to_string(),
|
|
205
|
+
context_name: ctx.to_string(),
|
|
206
|
+
role: roles.value(i).to_string(),
|
|
207
|
+
content: contents.value(i).to_string(),
|
|
208
|
+
layer,
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
episodes.push((episode, distances.value(i)));
|
|
212
|
+
|
|
213
|
+
if episodes.len() >= limit {
|
|
214
|
+
break;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if episodes.len() >= limit {
|
|
219
|
+
break;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
Ok(episodes)
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/// Search across all contexts without filtering
|
|
227
|
+
pub async fn search_all(
|
|
228
|
+
&self,
|
|
229
|
+
query_embedding: Vec<f32>,
|
|
230
|
+
limit: usize,
|
|
231
|
+
) -> Result<Vec<(Episode, f32)>> {
|
|
232
|
+
let table = self.db.open_table(TABLE_NAME).execute().await?;
|
|
233
|
+
|
|
234
|
+
let results = table
|
|
235
|
+
.vector_search(query_embedding)?
|
|
236
|
+
.limit(limit)
|
|
237
|
+
.execute()
|
|
238
|
+
.await?;
|
|
239
|
+
|
|
240
|
+
let mut episodes = Vec::new();
|
|
241
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
242
|
+
|
|
243
|
+
for batch in &batches {
|
|
244
|
+
let ids = batch
|
|
245
|
+
.column_by_name("id")
|
|
246
|
+
.unwrap()
|
|
247
|
+
.as_any()
|
|
248
|
+
.downcast_ref::<StringArray>()
|
|
249
|
+
.unwrap();
|
|
250
|
+
let timestamps = batch
|
|
251
|
+
.column_by_name("timestamp")
|
|
252
|
+
.unwrap()
|
|
253
|
+
.as_any()
|
|
254
|
+
.downcast_ref::<StringArray>()
|
|
255
|
+
.unwrap();
|
|
256
|
+
let contexts = batch
|
|
257
|
+
.column_by_name("context_name")
|
|
258
|
+
.unwrap()
|
|
259
|
+
.as_any()
|
|
260
|
+
.downcast_ref::<StringArray>()
|
|
261
|
+
.unwrap();
|
|
262
|
+
let roles = batch
|
|
263
|
+
.column_by_name("role")
|
|
264
|
+
.unwrap()
|
|
265
|
+
.as_any()
|
|
266
|
+
.downcast_ref::<StringArray>()
|
|
267
|
+
.unwrap();
|
|
268
|
+
let contents = batch
|
|
269
|
+
.column_by_name("content")
|
|
270
|
+
.unwrap()
|
|
271
|
+
.as_any()
|
|
272
|
+
.downcast_ref::<StringArray>()
|
|
273
|
+
.unwrap();
|
|
274
|
+
let layers = batch.column_by_name("layer");
|
|
275
|
+
let distances = batch
|
|
276
|
+
.column_by_name("_distance")
|
|
277
|
+
.unwrap()
|
|
278
|
+
.as_any()
|
|
279
|
+
.downcast_ref::<Float32Array>()
|
|
280
|
+
.unwrap();
|
|
281
|
+
|
|
282
|
+
for i in 0..batch.num_rows() {
|
|
283
|
+
let layer = layers
|
|
284
|
+
.and_then(|l| l.as_any().downcast_ref::<StringArray>())
|
|
285
|
+
.map(|l| l.value(i).to_string())
|
|
286
|
+
.unwrap_or_else(|| "L2".to_string());
|
|
287
|
+
|
|
288
|
+
let episode = Episode {
|
|
289
|
+
id: ids.value(i).to_string(),
|
|
290
|
+
timestamp: timestamps.value(i).to_string(),
|
|
291
|
+
context_name: contexts.value(i).to_string(),
|
|
292
|
+
role: roles.value(i).to_string(),
|
|
293
|
+
content: contents.value(i).to_string(),
|
|
294
|
+
layer,
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
episodes.push((episode, distances.value(i)));
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
Ok(episodes)
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
pub async fn get_by_id(&self, id: &str) -> Result<Option<Episode>> {
|
|
305
|
+
let table = self.db.open_table(TABLE_NAME).execute().await?;
|
|
306
|
+
let results = table
|
|
307
|
+
.query()
|
|
308
|
+
.only_if(format!("id = '{}'", id))
|
|
309
|
+
.execute()
|
|
310
|
+
.await?;
|
|
311
|
+
|
|
312
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
313
|
+
for batch in &batches {
|
|
314
|
+
if batch.num_rows() > 0 {
|
|
315
|
+
let ids = batch
|
|
316
|
+
.column_by_name("id")
|
|
317
|
+
.unwrap()
|
|
318
|
+
.as_any()
|
|
319
|
+
.downcast_ref::<StringArray>()
|
|
320
|
+
.unwrap();
|
|
321
|
+
let timestamps = batch
|
|
322
|
+
.column_by_name("timestamp")
|
|
323
|
+
.unwrap()
|
|
324
|
+
.as_any()
|
|
325
|
+
.downcast_ref::<StringArray>()
|
|
326
|
+
.unwrap();
|
|
327
|
+
let contexts = batch
|
|
328
|
+
.column_by_name("context_name")
|
|
329
|
+
.unwrap()
|
|
330
|
+
.as_any()
|
|
331
|
+
.downcast_ref::<StringArray>()
|
|
332
|
+
.unwrap();
|
|
333
|
+
let roles = batch
|
|
334
|
+
.column_by_name("role")
|
|
335
|
+
.unwrap()
|
|
336
|
+
.as_any()
|
|
337
|
+
.downcast_ref::<StringArray>()
|
|
338
|
+
.unwrap();
|
|
339
|
+
let contents = batch
|
|
340
|
+
.column_by_name("content")
|
|
341
|
+
.unwrap()
|
|
342
|
+
.as_any()
|
|
343
|
+
.downcast_ref::<StringArray>()
|
|
344
|
+
.unwrap();
|
|
345
|
+
let layers = batch.column_by_name("layer");
|
|
346
|
+
|
|
347
|
+
let layer = layers
|
|
348
|
+
.and_then(|l| l.as_any().downcast_ref::<StringArray>())
|
|
349
|
+
.map(|l| l.value(0).to_string())
|
|
350
|
+
.unwrap_or_else(|| "L2".to_string());
|
|
351
|
+
|
|
352
|
+
return Ok(Some(Episode {
|
|
353
|
+
id: ids.value(0).to_string(),
|
|
354
|
+
timestamp: timestamps.value(0).to_string(),
|
|
355
|
+
context_name: contexts.value(0).to_string(),
|
|
356
|
+
role: roles.value(0).to_string(),
|
|
357
|
+
content: contents.value(0).to_string(),
|
|
358
|
+
layer,
|
|
359
|
+
}));
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
Ok(None)
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
pub async fn get_by_context(&self, context: &str) -> Result<Vec<Episode>> {
|
|
367
|
+
let table = self.db.open_table(TABLE_NAME).execute().await?;
|
|
368
|
+
|
|
369
|
+
let results = table
|
|
370
|
+
.query()
|
|
371
|
+
.only_if(format!("context_name = '{}'", context))
|
|
372
|
+
.execute()
|
|
373
|
+
.await?;
|
|
374
|
+
|
|
375
|
+
let mut episodes = Vec::new();
|
|
376
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
377
|
+
|
|
378
|
+
for batch in &batches {
|
|
379
|
+
let ids = batch
|
|
380
|
+
.column_by_name("id")
|
|
381
|
+
.unwrap()
|
|
382
|
+
.as_any()
|
|
383
|
+
.downcast_ref::<StringArray>()
|
|
384
|
+
.unwrap();
|
|
385
|
+
let timestamps = batch
|
|
386
|
+
.column_by_name("timestamp")
|
|
387
|
+
.unwrap()
|
|
388
|
+
.as_any()
|
|
389
|
+
.downcast_ref::<StringArray>()
|
|
390
|
+
.unwrap();
|
|
391
|
+
let contexts = batch
|
|
392
|
+
.column_by_name("context_name")
|
|
393
|
+
.unwrap()
|
|
394
|
+
.as_any()
|
|
395
|
+
.downcast_ref::<StringArray>()
|
|
396
|
+
.unwrap();
|
|
397
|
+
let roles = batch
|
|
398
|
+
.column_by_name("role")
|
|
399
|
+
.unwrap()
|
|
400
|
+
.as_any()
|
|
401
|
+
.downcast_ref::<StringArray>()
|
|
402
|
+
.unwrap();
|
|
403
|
+
let contents = batch
|
|
404
|
+
.column_by_name("content")
|
|
405
|
+
.unwrap()
|
|
406
|
+
.as_any()
|
|
407
|
+
.downcast_ref::<StringArray>()
|
|
408
|
+
.unwrap();
|
|
409
|
+
let layers = batch.column_by_name("layer");
|
|
410
|
+
|
|
411
|
+
for i in 0..batch.num_rows() {
|
|
412
|
+
let layer = layers
|
|
413
|
+
.and_then(|l| l.as_any().downcast_ref::<StringArray>())
|
|
414
|
+
.map(|l| l.value(i).to_string())
|
|
415
|
+
.unwrap_or_else(|| "L2".to_string());
|
|
416
|
+
|
|
417
|
+
episodes.push(Episode {
|
|
418
|
+
id: ids.value(i).to_string(),
|
|
419
|
+
timestamp: timestamps.value(i).to_string(),
|
|
420
|
+
context_name: contexts.value(i).to_string(),
|
|
421
|
+
role: roles.value(i).to_string(),
|
|
422
|
+
content: contents.value(i).to_string(),
|
|
423
|
+
layer,
|
|
424
|
+
});
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
Ok(episodes)
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
pub async fn count_by_context(&self, context: &str) -> Result<usize> {
|
|
432
|
+
let episodes = self.get_by_context(context).await?;
|
|
433
|
+
Ok(episodes.len())
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
pub async fn delete_by_context(&self, context: &str) -> Result<()> {
|
|
437
|
+
let table = self.db.open_table(TABLE_NAME).execute().await?;
|
|
438
|
+
table
|
|
439
|
+
.delete(&format!("context_name = '{}'", context))
|
|
440
|
+
.await?;
|
|
441
|
+
tracing::info!("Deleted all episodes in context: {}", context);
|
|
442
|
+
Ok(())
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/// Update the layer field for an episode (used by promotion engine)
|
|
446
|
+
pub async fn promote_to_l3(&self, id: &str) -> Result<()> {
|
|
447
|
+
// LanceDB doesn't support in-place updates easily.
|
|
448
|
+
// We'll use a delete-and-reinsert approach via the promotion engine.
|
|
449
|
+
// For now, this is a no-op marker — the promotion engine tracks L3 status
|
|
450
|
+
// in its own metadata table.
|
|
451
|
+
tracing::debug!("Marking episode {} as L3", id);
|
|
452
|
+
Ok(())
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/// Get all episode IDs for a context
|
|
456
|
+
pub async fn get_ids_by_context(&self, context: &str) -> Result<Vec<String>> {
|
|
457
|
+
let episodes = self.get_by_context(context).await?;
|
|
458
|
+
Ok(episodes.into_iter().map(|e| e.id).collect())
|
|
459
|
+
}
|
|
460
|
+
}
|