@sesamespace/hivemind 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +86 -0
- package/config/TEAM-CHARTER.md +87 -0
- package/config/default.toml +39 -0
- package/dist/__tests__/fleet-integration.test.d.ts +9 -0
- package/dist/__tests__/fleet-integration.test.d.ts.map +1 -0
- package/dist/__tests__/fleet-integration.test.js +201 -0
- package/dist/__tests__/fleet-integration.test.js.map +1 -0
- package/dist/__tests__/fleet.test.d.ts +7 -0
- package/dist/__tests__/fleet.test.d.ts.map +1 -0
- package/dist/__tests__/fleet.test.js +171 -0
- package/dist/__tests__/fleet.test.js.map +1 -0
- package/dist/__tests__/integration.test.d.ts +2 -0
- package/dist/__tests__/integration.test.d.ts.map +1 -0
- package/dist/__tests__/integration.test.js +348 -0
- package/dist/__tests__/integration.test.js.map +1 -0
- package/dist/agent.d.ts +27 -0
- package/dist/agent.d.ts.map +1 -0
- package/dist/agent.js +217 -0
- package/dist/agent.js.map +1 -0
- package/dist/commands/fleet.d.ts +13 -0
- package/dist/commands/fleet.d.ts.map +1 -0
- package/dist/commands/fleet.js +193 -0
- package/dist/commands/fleet.js.map +1 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.d.ts.map +1 -0
- package/dist/commands/init.js +170 -0
- package/dist/commands/init.js.map +1 -0
- package/dist/commands/start.d.ts +2 -0
- package/dist/commands/start.d.ts.map +1 -0
- package/dist/commands/start.js +39 -0
- package/dist/commands/start.js.map +1 -0
- package/dist/config.d.ts +44 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +73 -0
- package/dist/config.js.map +1 -0
- package/dist/context.d.ts +50 -0
- package/dist/context.d.ts.map +1 -0
- package/dist/context.js +153 -0
- package/dist/context.js.map +1 -0
- package/dist/fleet/fleet-manager.d.ts +86 -0
- package/dist/fleet/fleet-manager.d.ts.map +1 -0
- package/dist/fleet/fleet-manager.js +298 -0
- package/dist/fleet/fleet-manager.js.map +1 -0
- package/dist/fleet/memory-sync.d.ts +91 -0
- package/dist/fleet/memory-sync.d.ts.map +1 -0
- package/dist/fleet/memory-sync.js +292 -0
- package/dist/fleet/memory-sync.js.map +1 -0
- package/dist/fleet/primary-client.d.ts +49 -0
- package/dist/fleet/primary-client.d.ts.map +1 -0
- package/dist/fleet/primary-client.js +222 -0
- package/dist/fleet/primary-client.js.map +1 -0
- package/dist/fleet/worker-protocol.d.ts +125 -0
- package/dist/fleet/worker-protocol.d.ts.map +1 -0
- package/dist/fleet/worker-protocol.js +27 -0
- package/dist/fleet/worker-protocol.js.map +1 -0
- package/dist/fleet/worker-server.d.ts +53 -0
- package/dist/fleet/worker-server.d.ts.map +1 -0
- package/dist/fleet/worker-server.js +191 -0
- package/dist/fleet/worker-server.js.map +1 -0
- package/dist/index.d.ts +26 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/llm-client.d.ts +24 -0
- package/dist/llm-client.d.ts.map +1 -0
- package/dist/llm-client.js +40 -0
- package/dist/llm-client.js.map +1 -0
- package/dist/main.d.ts +3 -0
- package/dist/main.d.ts.map +1 -0
- package/dist/main.js +41 -0
- package/dist/main.js.map +1 -0
- package/dist/memory-client.d.ts +88 -0
- package/dist/memory-client.d.ts.map +1 -0
- package/dist/memory-client.js +185 -0
- package/dist/memory-client.js.map +1 -0
- package/dist/pipeline.d.ts +2 -0
- package/dist/pipeline.d.ts.map +1 -0
- package/dist/pipeline.js +125 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/prompt.d.ts +6 -0
- package/dist/prompt.d.ts.map +1 -0
- package/dist/prompt.js +75 -0
- package/dist/prompt.js.map +1 -0
- package/dist/sesame.d.ts +33 -0
- package/dist/sesame.d.ts.map +1 -0
- package/dist/sesame.js +67 -0
- package/dist/sesame.js.map +1 -0
- package/dist/start.d.ts +3 -0
- package/dist/start.d.ts.map +1 -0
- package/dist/start.js +20 -0
- package/dist/start.js.map +1 -0
- package/dist/task-engine.d.ts +32 -0
- package/dist/task-engine.d.ts.map +1 -0
- package/dist/task-engine.js +80 -0
- package/dist/task-engine.js.map +1 -0
- package/dist/worker.d.ts +73 -0
- package/dist/worker.d.ts.map +1 -0
- package/dist/worker.js +279 -0
- package/dist/worker.js.map +1 -0
- package/install.sh +186 -0
- package/package.json +36 -0
- package/packages/memory/Cargo.lock +6480 -0
- package/packages/memory/Cargo.toml +21 -0
- package/packages/memory/src/src/context.rs +179 -0
- package/packages/memory/src/src/embeddings.rs +51 -0
- package/packages/memory/src/src/main.rs +626 -0
- package/packages/memory/src/src/promotion.rs +637 -0
- package/packages/memory/src/src/scoring.rs +131 -0
- package/packages/memory/src/src/store.rs +460 -0
- package/packages/memory/src/src/tasks.rs +321 -0
|
@@ -0,0 +1,637 @@
|
|
|
1
|
+
use anyhow::Result;
|
|
2
|
+
use arrow_array::{RecordBatch, RecordBatchIterator, StringArray, UInt64Array};
|
|
3
|
+
use arrow_schema::{DataType, Field, Schema};
|
|
4
|
+
use chrono::Utc;
|
|
5
|
+
use futures::stream::TryStreamExt;
|
|
6
|
+
use lancedb::{connection::Connection, query::ExecutableQuery, query::QueryBase, Table};
|
|
7
|
+
use serde::{Deserialize, Serialize};
|
|
8
|
+
use std::collections::HashMap;
|
|
9
|
+
use std::sync::Arc;
|
|
10
|
+
|
|
11
|
+
const ACCESS_TABLE: &str = "episode_access";
|
|
12
|
+
const COOCCURRENCE_TABLE: &str = "episode_cooccurrence";
|
|
13
|
+
const L3_TABLE: &str = "l3_knowledge";
|
|
14
|
+
|
|
15
|
+
// Promotion thresholds
|
|
16
|
+
const ACCESS_THRESHOLD: u64 = 5; // episode must be accessed at least N times
|
|
17
|
+
const COOCCURRENCE_THRESHOLD: u64 = 3; // episode must co-occur with others at least N times
|
|
18
|
+
|
|
19
|
+
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
20
|
+
pub struct AccessRecord {
|
|
21
|
+
pub episode_id: String,
|
|
22
|
+
pub access_count: u64,
|
|
23
|
+
pub last_accessed: String,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
27
|
+
pub struct CooccurrenceRecord {
|
|
28
|
+
pub episode_a: String,
|
|
29
|
+
pub episode_b: String,
|
|
30
|
+
pub count: u64,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
#[derive(Debug, Serialize, Deserialize, Clone)]
|
|
34
|
+
pub struct L3Entry {
|
|
35
|
+
pub id: String,
|
|
36
|
+
pub source_episode_id: String,
|
|
37
|
+
pub context_name: String,
|
|
38
|
+
pub content: String,
|
|
39
|
+
pub promoted_at: String,
|
|
40
|
+
pub access_count: u64,
|
|
41
|
+
pub connection_density: u64,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
pub struct PromotionEngine {
|
|
45
|
+
db: Connection,
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
impl PromotionEngine {
|
|
49
|
+
pub async fn new(db: Connection) -> Result<Self> {
|
|
50
|
+
let engine = Self { db };
|
|
51
|
+
engine.ensure_tables().await?;
|
|
52
|
+
Ok(engine)
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
fn access_schema() -> Arc<Schema> {
|
|
56
|
+
Arc::new(Schema::new(vec![
|
|
57
|
+
Field::new("episode_id", DataType::Utf8, false),
|
|
58
|
+
Field::new("access_count", DataType::UInt64, false),
|
|
59
|
+
Field::new("last_accessed", DataType::Utf8, false),
|
|
60
|
+
]))
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
fn cooccurrence_schema() -> Arc<Schema> {
|
|
64
|
+
Arc::new(Schema::new(vec![
|
|
65
|
+
Field::new("episode_a", DataType::Utf8, false),
|
|
66
|
+
Field::new("episode_b", DataType::Utf8, false),
|
|
67
|
+
Field::new("count", DataType::UInt64, false),
|
|
68
|
+
]))
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
fn l3_schema() -> Arc<Schema> {
|
|
72
|
+
Arc::new(Schema::new(vec![
|
|
73
|
+
Field::new("id", DataType::Utf8, false),
|
|
74
|
+
Field::new("source_episode_id", DataType::Utf8, false),
|
|
75
|
+
Field::new("context_name", DataType::Utf8, false),
|
|
76
|
+
Field::new("content", DataType::Utf8, false),
|
|
77
|
+
Field::new("promoted_at", DataType::Utf8, false),
|
|
78
|
+
Field::new("access_count", DataType::UInt64, false),
|
|
79
|
+
Field::new("connection_density", DataType::UInt64, false),
|
|
80
|
+
]))
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async fn ensure_tables(&self) -> Result<()> {
|
|
84
|
+
let names = self.db.table_names().execute().await?;
|
|
85
|
+
|
|
86
|
+
if !names.contains(&ACCESS_TABLE.to_string()) {
|
|
87
|
+
let schema = Self::access_schema();
|
|
88
|
+
let batch = RecordBatch::new_empty(schema.clone());
|
|
89
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
90
|
+
self.db
|
|
91
|
+
.create_table(ACCESS_TABLE, Box::new(batches))
|
|
92
|
+
.execute()
|
|
93
|
+
.await?;
|
|
94
|
+
tracing::info!("Created episode_access table");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if !names.contains(&COOCCURRENCE_TABLE.to_string()) {
|
|
98
|
+
let schema = Self::cooccurrence_schema();
|
|
99
|
+
let batch = RecordBatch::new_empty(schema.clone());
|
|
100
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
101
|
+
self.db
|
|
102
|
+
.create_table(COOCCURRENCE_TABLE, Box::new(batches))
|
|
103
|
+
.execute()
|
|
104
|
+
.await?;
|
|
105
|
+
tracing::info!("Created episode_cooccurrence table");
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if !names.contains(&L3_TABLE.to_string()) {
|
|
109
|
+
let schema = Self::l3_schema();
|
|
110
|
+
let batch = RecordBatch::new_empty(schema.clone());
|
|
111
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
112
|
+
self.db
|
|
113
|
+
.create_table(L3_TABLE, Box::new(batches))
|
|
114
|
+
.execute()
|
|
115
|
+
.await?;
|
|
116
|
+
tracing::info!("Created l3_knowledge table");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
Ok(())
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/// Record a single access for an episode
|
|
123
|
+
pub async fn record_access(&self, episode_id: &str) -> Result<()> {
|
|
124
|
+
let table = self.db.open_table(ACCESS_TABLE).execute().await?;
|
|
125
|
+
|
|
126
|
+
// Check if record exists
|
|
127
|
+
let existing = self.get_access_record(episode_id).await?;
|
|
128
|
+
|
|
129
|
+
if let Some(record) = existing {
|
|
130
|
+
// Delete old record and insert updated one
|
|
131
|
+
table
|
|
132
|
+
.delete(&format!("episode_id = '{}'", episode_id))
|
|
133
|
+
.await?;
|
|
134
|
+
|
|
135
|
+
let new_count = record.access_count + 1;
|
|
136
|
+
let now = Utc::now().to_rfc3339();
|
|
137
|
+
let schema = Self::access_schema();
|
|
138
|
+
let batch = RecordBatch::try_new(
|
|
139
|
+
schema.clone(),
|
|
140
|
+
vec![
|
|
141
|
+
Arc::new(StringArray::from(vec![episode_id])),
|
|
142
|
+
Arc::new(UInt64Array::from(vec![new_count])),
|
|
143
|
+
Arc::new(StringArray::from(vec![now.as_str()])),
|
|
144
|
+
],
|
|
145
|
+
)?;
|
|
146
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
147
|
+
table.add(Box::new(batches)).execute().await?;
|
|
148
|
+
} else {
|
|
149
|
+
// Insert new record
|
|
150
|
+
let now = Utc::now().to_rfc3339();
|
|
151
|
+
let schema = Self::access_schema();
|
|
152
|
+
let batch = RecordBatch::try_new(
|
|
153
|
+
schema.clone(),
|
|
154
|
+
vec![
|
|
155
|
+
Arc::new(StringArray::from(vec![episode_id])),
|
|
156
|
+
Arc::new(UInt64Array::from(vec![1u64])),
|
|
157
|
+
Arc::new(StringArray::from(vec![now.as_str()])),
|
|
158
|
+
],
|
|
159
|
+
)?;
|
|
160
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
161
|
+
table.add(Box::new(batches)).execute().await?;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
Ok(())
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/// Record co-access for a set of episodes retrieved together
|
|
168
|
+
pub async fn record_co_access(&self, episode_ids: &[String]) -> Result<()> {
|
|
169
|
+
if episode_ids.len() < 2 {
|
|
170
|
+
return Ok(());
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
|
|
174
|
+
|
|
175
|
+
// For each pair, update or create cooccurrence record
|
|
176
|
+
for i in 0..episode_ids.len() {
|
|
177
|
+
for j in (i + 1)..episode_ids.len() {
|
|
178
|
+
let (a, b) = if episode_ids[i] < episode_ids[j] {
|
|
179
|
+
(&episode_ids[i], &episode_ids[j])
|
|
180
|
+
} else {
|
|
181
|
+
(&episode_ids[j], &episode_ids[i])
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
let existing = self.get_cooccurrence(a, b).await?;
|
|
185
|
+
|
|
186
|
+
if let Some(record) = existing {
|
|
187
|
+
table
|
|
188
|
+
.delete(&format!(
|
|
189
|
+
"episode_a = '{}' AND episode_b = '{}'",
|
|
190
|
+
a, b
|
|
191
|
+
))
|
|
192
|
+
.await?;
|
|
193
|
+
|
|
194
|
+
let new_count = record.count + 1;
|
|
195
|
+
let schema = Self::cooccurrence_schema();
|
|
196
|
+
let batch = RecordBatch::try_new(
|
|
197
|
+
schema.clone(),
|
|
198
|
+
vec![
|
|
199
|
+
Arc::new(StringArray::from(vec![a.as_str()])),
|
|
200
|
+
Arc::new(StringArray::from(vec![b.as_str()])),
|
|
201
|
+
Arc::new(UInt64Array::from(vec![new_count])),
|
|
202
|
+
],
|
|
203
|
+
)?;
|
|
204
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
205
|
+
table.add(Box::new(batches)).execute().await?;
|
|
206
|
+
} else {
|
|
207
|
+
let schema = Self::cooccurrence_schema();
|
|
208
|
+
let batch = RecordBatch::try_new(
|
|
209
|
+
schema.clone(),
|
|
210
|
+
vec![
|
|
211
|
+
Arc::new(StringArray::from(vec![a.as_str()])),
|
|
212
|
+
Arc::new(StringArray::from(vec![b.as_str()])),
|
|
213
|
+
Arc::new(UInt64Array::from(vec![1u64])),
|
|
214
|
+
],
|
|
215
|
+
)?;
|
|
216
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
217
|
+
table.add(Box::new(batches)).execute().await?;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
Ok(())
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/// Run the promotion engine: check all episodes and promote eligible ones to L3
|
|
226
|
+
pub async fn run_promotion(&self, context_filter: Option<&str>) -> Result<Vec<String>> {
|
|
227
|
+
let mut promoted = Vec::new();
|
|
228
|
+
|
|
229
|
+
// Get all access records that meet the threshold
|
|
230
|
+
let access_records = self.get_high_access_episodes().await?;
|
|
231
|
+
|
|
232
|
+
for record in access_records {
|
|
233
|
+
// Check if already promoted
|
|
234
|
+
if self.is_promoted(&record.episode_id).await? {
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Get connection density for this episode
|
|
239
|
+
let density = self.get_connection_density(&record.episode_id).await?;
|
|
240
|
+
|
|
241
|
+
// Check if both thresholds are met
|
|
242
|
+
if record.access_count >= ACCESS_THRESHOLD && density >= COOCCURRENCE_THRESHOLD {
|
|
243
|
+
// Get the episode to determine its context
|
|
244
|
+
let episode_context = self.get_episode_context(&record.episode_id).await?;
|
|
245
|
+
if let Some(ref ctx) = episode_context {
|
|
246
|
+
// Apply context filter if specified
|
|
247
|
+
if let Some(filter) = context_filter {
|
|
248
|
+
if ctx != filter {
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
let episode_content = self.get_episode_content(&record.episode_id).await?;
|
|
255
|
+
if let (Some(ctx), Some(content)) = (episode_context, episode_content) {
|
|
256
|
+
self.promote_episode(
|
|
257
|
+
&record.episode_id,
|
|
258
|
+
&ctx,
|
|
259
|
+
&content,
|
|
260
|
+
record.access_count,
|
|
261
|
+
density,
|
|
262
|
+
)
|
|
263
|
+
.await?;
|
|
264
|
+
promoted.push(record.episode_id);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if !promoted.is_empty() {
|
|
270
|
+
tracing::info!("Promoted {} episodes to L3", promoted.len());
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
Ok(promoted)
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
async fn get_access_record(&self, episode_id: &str) -> Result<Option<AccessRecord>> {
|
|
277
|
+
let table = self.db.open_table(ACCESS_TABLE).execute().await?;
|
|
278
|
+
let results = table
|
|
279
|
+
.query()
|
|
280
|
+
.only_if(format!("episode_id = '{}'", episode_id))
|
|
281
|
+
.execute()
|
|
282
|
+
.await?;
|
|
283
|
+
|
|
284
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
285
|
+
for batch in &batches {
|
|
286
|
+
if batch.num_rows() > 0 {
|
|
287
|
+
let ids = batch
|
|
288
|
+
.column_by_name("episode_id")
|
|
289
|
+
.unwrap()
|
|
290
|
+
.as_any()
|
|
291
|
+
.downcast_ref::<StringArray>()
|
|
292
|
+
.unwrap();
|
|
293
|
+
let counts = batch
|
|
294
|
+
.column_by_name("access_count")
|
|
295
|
+
.unwrap()
|
|
296
|
+
.as_any()
|
|
297
|
+
.downcast_ref::<UInt64Array>()
|
|
298
|
+
.unwrap();
|
|
299
|
+
let last = batch
|
|
300
|
+
.column_by_name("last_accessed")
|
|
301
|
+
.unwrap()
|
|
302
|
+
.as_any()
|
|
303
|
+
.downcast_ref::<StringArray>()
|
|
304
|
+
.unwrap();
|
|
305
|
+
|
|
306
|
+
return Ok(Some(AccessRecord {
|
|
307
|
+
episode_id: ids.value(0).to_string(),
|
|
308
|
+
access_count: counts.value(0),
|
|
309
|
+
last_accessed: last.value(0).to_string(),
|
|
310
|
+
}));
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
Ok(None)
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
async fn get_cooccurrence(&self, a: &str, b: &str) -> Result<Option<CooccurrenceRecord>> {
|
|
317
|
+
let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
|
|
318
|
+
let results = table
|
|
319
|
+
.query()
|
|
320
|
+
.only_if(format!("episode_a = '{}' AND episode_b = '{}'", a, b))
|
|
321
|
+
.execute()
|
|
322
|
+
.await?;
|
|
323
|
+
|
|
324
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
325
|
+
for batch in &batches {
|
|
326
|
+
if batch.num_rows() > 0 {
|
|
327
|
+
let ep_a = batch
|
|
328
|
+
.column_by_name("episode_a")
|
|
329
|
+
.unwrap()
|
|
330
|
+
.as_any()
|
|
331
|
+
.downcast_ref::<StringArray>()
|
|
332
|
+
.unwrap();
|
|
333
|
+
let ep_b = batch
|
|
334
|
+
.column_by_name("episode_b")
|
|
335
|
+
.unwrap()
|
|
336
|
+
.as_any()
|
|
337
|
+
.downcast_ref::<StringArray>()
|
|
338
|
+
.unwrap();
|
|
339
|
+
let counts = batch
|
|
340
|
+
.column_by_name("count")
|
|
341
|
+
.unwrap()
|
|
342
|
+
.as_any()
|
|
343
|
+
.downcast_ref::<UInt64Array>()
|
|
344
|
+
.unwrap();
|
|
345
|
+
|
|
346
|
+
return Ok(Some(CooccurrenceRecord {
|
|
347
|
+
episode_a: ep_a.value(0).to_string(),
|
|
348
|
+
episode_b: ep_b.value(0).to_string(),
|
|
349
|
+
count: counts.value(0),
|
|
350
|
+
}));
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
Ok(None)
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
async fn get_high_access_episodes(&self) -> Result<Vec<AccessRecord>> {
|
|
357
|
+
let table = self.db.open_table(ACCESS_TABLE).execute().await?;
|
|
358
|
+
let results = table
|
|
359
|
+
.query()
|
|
360
|
+
.only_if(format!("access_count >= {}", ACCESS_THRESHOLD))
|
|
361
|
+
.execute()
|
|
362
|
+
.await?;
|
|
363
|
+
|
|
364
|
+
let mut records = Vec::new();
|
|
365
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
366
|
+
|
|
367
|
+
for batch in &batches {
|
|
368
|
+
let ids = batch
|
|
369
|
+
.column_by_name("episode_id")
|
|
370
|
+
.unwrap()
|
|
371
|
+
.as_any()
|
|
372
|
+
.downcast_ref::<StringArray>()
|
|
373
|
+
.unwrap();
|
|
374
|
+
let counts = batch
|
|
375
|
+
.column_by_name("access_count")
|
|
376
|
+
.unwrap()
|
|
377
|
+
.as_any()
|
|
378
|
+
.downcast_ref::<UInt64Array>()
|
|
379
|
+
.unwrap();
|
|
380
|
+
let last = batch
|
|
381
|
+
.column_by_name("last_accessed")
|
|
382
|
+
.unwrap()
|
|
383
|
+
.as_any()
|
|
384
|
+
.downcast_ref::<StringArray>()
|
|
385
|
+
.unwrap();
|
|
386
|
+
|
|
387
|
+
for i in 0..batch.num_rows() {
|
|
388
|
+
records.push(AccessRecord {
|
|
389
|
+
episode_id: ids.value(i).to_string(),
|
|
390
|
+
access_count: counts.value(i),
|
|
391
|
+
last_accessed: last.value(i).to_string(),
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
Ok(records)
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
async fn get_connection_density(&self, episode_id: &str) -> Result<u64> {
|
|
400
|
+
let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
|
|
401
|
+
|
|
402
|
+
// Sum all cooccurrence counts where this episode is involved
|
|
403
|
+
let results_a = table
|
|
404
|
+
.query()
|
|
405
|
+
.only_if(format!("episode_a = '{}'", episode_id))
|
|
406
|
+
.execute()
|
|
407
|
+
.await?;
|
|
408
|
+
|
|
409
|
+
let mut total: u64 = 0;
|
|
410
|
+
let batches: Vec<RecordBatch> = results_a.try_collect().await?;
|
|
411
|
+
for batch in &batches {
|
|
412
|
+
let counts = batch
|
|
413
|
+
.column_by_name("count")
|
|
414
|
+
.unwrap()
|
|
415
|
+
.as_any()
|
|
416
|
+
.downcast_ref::<UInt64Array>()
|
|
417
|
+
.unwrap();
|
|
418
|
+
for i in 0..batch.num_rows() {
|
|
419
|
+
total += counts.value(i);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
let results_b = table
|
|
424
|
+
.query()
|
|
425
|
+
.only_if(format!("episode_b = '{}'", episode_id))
|
|
426
|
+
.execute()
|
|
427
|
+
.await?;
|
|
428
|
+
|
|
429
|
+
let batches: Vec<RecordBatch> = results_b.try_collect().await?;
|
|
430
|
+
for batch in &batches {
|
|
431
|
+
let counts = batch
|
|
432
|
+
.column_by_name("count")
|
|
433
|
+
.unwrap()
|
|
434
|
+
.as_any()
|
|
435
|
+
.downcast_ref::<UInt64Array>()
|
|
436
|
+
.unwrap();
|
|
437
|
+
for i in 0..batch.num_rows() {
|
|
438
|
+
total += counts.value(i);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
Ok(total)
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
async fn is_promoted(&self, episode_id: &str) -> Result<bool> {
|
|
446
|
+
let table = self.db.open_table(L3_TABLE).execute().await?;
|
|
447
|
+
let results = table
|
|
448
|
+
.query()
|
|
449
|
+
.only_if(format!("source_episode_id = '{}'", episode_id))
|
|
450
|
+
.execute()
|
|
451
|
+
.await?;
|
|
452
|
+
|
|
453
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
454
|
+
for batch in &batches {
|
|
455
|
+
if batch.num_rows() > 0 {
|
|
456
|
+
return Ok(true);
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
Ok(false)
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
async fn get_episode_context(&self, episode_id: &str) -> Result<Option<String>> {
|
|
463
|
+
let table = self.db.open_table("episodes").execute().await?;
|
|
464
|
+
let results = table
|
|
465
|
+
.query()
|
|
466
|
+
.only_if(format!("id = '{}'", episode_id))
|
|
467
|
+
.execute()
|
|
468
|
+
.await?;
|
|
469
|
+
|
|
470
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
471
|
+
for batch in &batches {
|
|
472
|
+
if batch.num_rows() > 0 {
|
|
473
|
+
let contexts = batch
|
|
474
|
+
.column_by_name("context_name")
|
|
475
|
+
.unwrap()
|
|
476
|
+
.as_any()
|
|
477
|
+
.downcast_ref::<StringArray>()
|
|
478
|
+
.unwrap();
|
|
479
|
+
return Ok(Some(contexts.value(0).to_string()));
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
Ok(None)
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
async fn get_episode_content(&self, episode_id: &str) -> Result<Option<String>> {
|
|
486
|
+
let table = self.db.open_table("episodes").execute().await?;
|
|
487
|
+
let results = table
|
|
488
|
+
.query()
|
|
489
|
+
.only_if(format!("id = '{}'", episode_id))
|
|
490
|
+
.execute()
|
|
491
|
+
.await?;
|
|
492
|
+
|
|
493
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
494
|
+
for batch in &batches {
|
|
495
|
+
if batch.num_rows() > 0 {
|
|
496
|
+
let contents = batch
|
|
497
|
+
.column_by_name("content")
|
|
498
|
+
.unwrap()
|
|
499
|
+
.as_any()
|
|
500
|
+
.downcast_ref::<StringArray>()
|
|
501
|
+
.unwrap();
|
|
502
|
+
return Ok(Some(contents.value(0).to_string()));
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
Ok(None)
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
async fn promote_episode(
|
|
509
|
+
&self,
|
|
510
|
+
episode_id: &str,
|
|
511
|
+
context: &str,
|
|
512
|
+
content: &str,
|
|
513
|
+
access_count: u64,
|
|
514
|
+
connection_density: u64,
|
|
515
|
+
) -> Result<()> {
|
|
516
|
+
let id = uuid::Uuid::new_v4().to_string();
|
|
517
|
+
let now = Utc::now().to_rfc3339();
|
|
518
|
+
|
|
519
|
+
let table = self.db.open_table(L3_TABLE).execute().await?;
|
|
520
|
+
let schema = Self::l3_schema();
|
|
521
|
+
let batch = RecordBatch::try_new(
|
|
522
|
+
schema.clone(),
|
|
523
|
+
vec![
|
|
524
|
+
Arc::new(StringArray::from(vec![id.as_str()])),
|
|
525
|
+
Arc::new(StringArray::from(vec![episode_id])),
|
|
526
|
+
Arc::new(StringArray::from(vec![context])),
|
|
527
|
+
Arc::new(StringArray::from(vec![content])),
|
|
528
|
+
Arc::new(StringArray::from(vec![now.as_str()])),
|
|
529
|
+
Arc::new(UInt64Array::from(vec![access_count])),
|
|
530
|
+
Arc::new(UInt64Array::from(vec![connection_density])),
|
|
531
|
+
],
|
|
532
|
+
)?;
|
|
533
|
+
|
|
534
|
+
let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
|
|
535
|
+
table.add(Box::new(batches)).execute().await?;
|
|
536
|
+
|
|
537
|
+
tracing::info!(
|
|
538
|
+
"Promoted episode {} to L3 (access: {}, density: {})",
|
|
539
|
+
episode_id,
|
|
540
|
+
access_count,
|
|
541
|
+
connection_density
|
|
542
|
+
);
|
|
543
|
+
|
|
544
|
+
Ok(())
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/// Get all L3 entries for a context
|
|
548
|
+
pub async fn get_l3_entries(&self, context: &str) -> Result<Vec<L3Entry>> {
|
|
549
|
+
let table = self.db.open_table(L3_TABLE).execute().await?;
|
|
550
|
+
let results = table
|
|
551
|
+
.query()
|
|
552
|
+
.only_if(format!("context_name = '{}'", context))
|
|
553
|
+
.execute()
|
|
554
|
+
.await?;
|
|
555
|
+
|
|
556
|
+
let mut entries = Vec::new();
|
|
557
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
558
|
+
|
|
559
|
+
for batch in &batches {
|
|
560
|
+
let ids = batch.column_by_name("id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
561
|
+
let source_ids = batch.column_by_name("source_episode_id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
562
|
+
let contexts = batch.column_by_name("context_name").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
563
|
+
let contents = batch.column_by_name("content").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
564
|
+
let promoted_ats = batch.column_by_name("promoted_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
|
|
565
|
+
let access_counts = batch.column_by_name("access_count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
|
|
566
|
+
let densities = batch.column_by_name("connection_density").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
|
|
567
|
+
|
|
568
|
+
for i in 0..batch.num_rows() {
|
|
569
|
+
entries.push(L3Entry {
|
|
570
|
+
id: ids.value(i).to_string(),
|
|
571
|
+
source_episode_id: source_ids.value(i).to_string(),
|
|
572
|
+
context_name: contexts.value(i).to_string(),
|
|
573
|
+
content: contents.value(i).to_string(),
|
|
574
|
+
promoted_at: promoted_ats.value(i).to_string(),
|
|
575
|
+
access_count: access_counts.value(i),
|
|
576
|
+
connection_density: densities.value(i),
|
|
577
|
+
});
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
Ok(entries)
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
/// Check if an episode is referenced across multiple contexts and should be auto-promoted to global
|
|
585
|
+
pub async fn check_cross_context_promotion(&self, episode_content: &str) -> Result<Vec<String>> {
|
|
586
|
+
// This is checked by counting how many contexts have similar content
|
|
587
|
+
// Called externally when a new episode is stored
|
|
588
|
+
// Returns list of context names where similar content exists
|
|
589
|
+
let table = self.db.open_table("episodes").execute().await?;
|
|
590
|
+
let results = table.query().execute().await?;
|
|
591
|
+
|
|
592
|
+
let mut context_set: std::collections::HashSet<String> = std::collections::HashSet::new();
|
|
593
|
+
let batches: Vec<RecordBatch> = results.try_collect().await?;
|
|
594
|
+
|
|
595
|
+
let lower_content = episode_content.to_lowercase();
|
|
596
|
+
// Simple substring matching for cross-context detection
|
|
597
|
+
// Real implementation would use embedding similarity
|
|
598
|
+
for batch in &batches {
|
|
599
|
+
let contexts = batch
|
|
600
|
+
.column_by_name("context_name")
|
|
601
|
+
.unwrap()
|
|
602
|
+
.as_any()
|
|
603
|
+
.downcast_ref::<StringArray>()
|
|
604
|
+
.unwrap();
|
|
605
|
+
let contents = batch
|
|
606
|
+
.column_by_name("content")
|
|
607
|
+
.unwrap()
|
|
608
|
+
.as_any()
|
|
609
|
+
.downcast_ref::<StringArray>()
|
|
610
|
+
.unwrap();
|
|
611
|
+
|
|
612
|
+
for i in 0..batch.num_rows() {
|
|
613
|
+
let content = contents.value(i).to_lowercase();
|
|
614
|
+
// Check for significant overlap (shared words)
|
|
615
|
+
let shared = count_shared_significant_words(&lower_content, &content);
|
|
616
|
+
if shared >= 3 {
|
|
617
|
+
context_set.insert(contexts.value(i).to_string());
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
Ok(context_set.into_iter().collect())
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
/// Count shared significant words (length > 4) between two texts
|
|
627
|
+
fn count_shared_significant_words(a: &str, b: &str) -> usize {
|
|
628
|
+
let words_a: std::collections::HashSet<&str> = a
|
|
629
|
+
.split_whitespace()
|
|
630
|
+
.filter(|w| w.len() > 4)
|
|
631
|
+
.collect();
|
|
632
|
+
let words_b: std::collections::HashSet<&str> = b
|
|
633
|
+
.split_whitespace()
|
|
634
|
+
.filter(|w| w.len() > 4)
|
|
635
|
+
.collect();
|
|
636
|
+
words_a.intersection(&words_b).count()
|
|
637
|
+
}
|