@sesamespace/hivemind 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.pnpmrc.json +1 -0
  2. package/AUTO-DEBUG-DESIGN.md +267 -0
  3. package/AUTOMATIC-MEMORY-MANAGEMENT.md +109 -0
  4. package/DASHBOARD-PLAN.md +206 -0
  5. package/MEMORY-ENHANCEMENT-PLAN.md +211 -0
  6. package/TOOL-USE-DESIGN.md +173 -0
  7. package/dist/{chunk-FBQBBAPZ.js → chunk-4C6B2AMB.js} +2 -2
  8. package/dist/{chunk-FK6WYXRM.js → chunk-4YXOQGQC.js} +2 -2
  9. package/dist/{chunk-IXBIAX76.js → chunk-K6KL2VD6.js} +2 -2
  10. package/dist/{chunk-BHCDOHSK.js → chunk-LYL5GG2F.js} +3 -3
  11. package/dist/{chunk-M3A2WRXM.js → chunk-OB6OXLPC.js} +430 -2
  12. package/dist/chunk-OB6OXLPC.js.map +1 -0
  13. package/dist/{chunk-DPLCEMEC.js → chunk-ZA4NWNS6.js} +2 -2
  14. package/dist/commands/fleet.js +3 -3
  15. package/dist/commands/init.js +3 -3
  16. package/dist/commands/start.js +3 -3
  17. package/dist/commands/watchdog.js +3 -3
  18. package/dist/dashboard.html +100 -60
  19. package/dist/index.js +2 -2
  20. package/dist/main.js +6 -6
  21. package/dist/start.js +1 -1
  22. package/docs/TOOL-PARITY-PLAN.md +191 -0
  23. package/package.json +23 -24
  24. package/src/memory/dashboard-integration.ts +295 -0
  25. package/src/memory/index.ts +187 -0
  26. package/src/memory/performance-test.ts +208 -0
  27. package/src/memory/processors/agent-sync.ts +312 -0
  28. package/src/memory/processors/command-learner.ts +298 -0
  29. package/src/memory/processors/memory-api-client.ts +105 -0
  30. package/src/memory/processors/message-flow-integration.ts +168 -0
  31. package/src/memory/processors/research-digester.ts +204 -0
  32. package/test-caitlin-access.md +11 -0
  33. package/dist/chunk-M3A2WRXM.js.map +0 -1
  34. package/install.sh +0 -162
  35. package/packages/memory/Cargo.lock +0 -6480
  36. package/packages/memory/Cargo.toml +0 -21
  37. package/packages/memory/src/src/context.rs +0 -179
  38. package/packages/memory/src/src/embeddings.rs +0 -51
  39. package/packages/memory/src/src/main.rs +0 -887
  40. package/packages/memory/src/src/promotion.rs +0 -808
  41. package/packages/memory/src/src/scoring.rs +0 -142
  42. package/packages/memory/src/src/store.rs +0 -460
  43. package/packages/memory/src/src/tasks.rs +0 -321
  44. /package/dist/{chunk-FBQBBAPZ.js.map → chunk-4C6B2AMB.js.map} +0 -0
  45. /package/dist/{chunk-FK6WYXRM.js.map → chunk-4YXOQGQC.js.map} +0 -0
  46. /package/dist/{chunk-IXBIAX76.js.map → chunk-K6KL2VD6.js.map} +0 -0
  47. /package/dist/{chunk-BHCDOHSK.js.map → chunk-LYL5GG2F.js.map} +0 -0
  48. /package/dist/{chunk-DPLCEMEC.js.map → chunk-ZA4NWNS6.js.map} +0 -0
@@ -1,808 +0,0 @@
1
- use anyhow::Result;
2
- use arrow_array::{RecordBatch, RecordBatchIterator, StringArray, UInt64Array};
3
- use arrow_schema::{DataType, Field, Schema};
4
- use chrono::Utc;
5
- use futures::stream::TryStreamExt;
6
- use lancedb::{connection::Connection, query::ExecutableQuery, query::QueryBase, Table};
7
- use serde::{Deserialize, Serialize};
8
- use std::collections::HashMap;
9
- use std::sync::Arc;
10
-
11
- const ACCESS_TABLE: &str = "episode_access";
12
- const COOCCURRENCE_TABLE: &str = "episode_cooccurrence";
13
- const L3_TABLE: &str = "l3_knowledge";
14
-
15
- // Promotion thresholds
16
- const ACCESS_THRESHOLD: u64 = 5; // episode must be accessed at least N times
17
- const COOCCURRENCE_THRESHOLD: u64 = 3; // episode must co-occur with others at least N times
18
-
19
- #[derive(Debug, Serialize, Deserialize, Clone)]
20
- pub struct AccessRecord {
21
- pub episode_id: String,
22
- pub access_count: u64,
23
- pub last_accessed: String,
24
- }
25
-
26
- #[derive(Debug, Serialize, Deserialize, Clone)]
27
- pub struct CooccurrenceRecord {
28
- pub episode_a: String,
29
- pub episode_b: String,
30
- pub count: u64,
31
- }
32
-
33
- #[derive(Debug, Serialize, Deserialize, Clone)]
34
- pub struct L3Entry {
35
- pub id: String,
36
- pub source_episode_id: String,
37
- pub context_name: String,
38
- pub content: String,
39
- pub promoted_at: String,
40
- pub access_count: u64,
41
- pub connection_density: u64,
42
- }
43
-
44
- pub struct PromotionEngine {
45
- db: Connection,
46
- }
47
-
48
- impl PromotionEngine {
49
- pub async fn new(db: Connection) -> Result<Self> {
50
- let engine = Self { db };
51
- engine.ensure_tables().await?;
52
- Ok(engine)
53
- }
54
-
55
- fn access_schema() -> Arc<Schema> {
56
- Arc::new(Schema::new(vec![
57
- Field::new("episode_id", DataType::Utf8, false),
58
- Field::new("access_count", DataType::UInt64, false),
59
- Field::new("last_accessed", DataType::Utf8, false),
60
- ]))
61
- }
62
-
63
- fn cooccurrence_schema() -> Arc<Schema> {
64
- Arc::new(Schema::new(vec![
65
- Field::new("episode_a", DataType::Utf8, false),
66
- Field::new("episode_b", DataType::Utf8, false),
67
- Field::new("count", DataType::UInt64, false),
68
- ]))
69
- }
70
-
71
- fn l3_schema() -> Arc<Schema> {
72
- Arc::new(Schema::new(vec![
73
- Field::new("id", DataType::Utf8, false),
74
- Field::new("source_episode_id", DataType::Utf8, false),
75
- Field::new("context_name", DataType::Utf8, false),
76
- Field::new("content", DataType::Utf8, false),
77
- Field::new("promoted_at", DataType::Utf8, false),
78
- Field::new("access_count", DataType::UInt64, false),
79
- Field::new("connection_density", DataType::UInt64, false),
80
- ]))
81
- }
82
-
83
- async fn ensure_tables(&self) -> Result<()> {
84
- let names = self.db.table_names().execute().await?;
85
-
86
- if !names.contains(&ACCESS_TABLE.to_string()) {
87
- let schema = Self::access_schema();
88
- let batch = RecordBatch::new_empty(schema.clone());
89
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
90
- self.db
91
- .create_table(ACCESS_TABLE, Box::new(batches))
92
- .execute()
93
- .await?;
94
- tracing::info!("Created episode_access table");
95
- }
96
-
97
- if !names.contains(&COOCCURRENCE_TABLE.to_string()) {
98
- let schema = Self::cooccurrence_schema();
99
- let batch = RecordBatch::new_empty(schema.clone());
100
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
101
- self.db
102
- .create_table(COOCCURRENCE_TABLE, Box::new(batches))
103
- .execute()
104
- .await?;
105
- tracing::info!("Created episode_cooccurrence table");
106
- }
107
-
108
- if !names.contains(&L3_TABLE.to_string()) {
109
- let schema = Self::l3_schema();
110
- let batch = RecordBatch::new_empty(schema.clone());
111
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
112
- self.db
113
- .create_table(L3_TABLE, Box::new(batches))
114
- .execute()
115
- .await?;
116
- tracing::info!("Created l3_knowledge table");
117
- }
118
-
119
- Ok(())
120
- }
121
-
122
- /// Record a single access for an episode
123
- pub async fn record_access(&self, episode_id: &str) -> Result<()> {
124
- let table = self.db.open_table(ACCESS_TABLE).execute().await?;
125
-
126
- // Check if record exists
127
- let existing = self.get_access_record(episode_id).await?;
128
-
129
- if let Some(record) = existing {
130
- // Delete old record and insert updated one
131
- table
132
- .delete(&format!("episode_id = '{}'", episode_id))
133
- .await?;
134
-
135
- let new_count = record.access_count + 1;
136
- let now = Utc::now().to_rfc3339();
137
- let schema = Self::access_schema();
138
- let batch = RecordBatch::try_new(
139
- schema.clone(),
140
- vec![
141
- Arc::new(StringArray::from(vec![episode_id])),
142
- Arc::new(UInt64Array::from(vec![new_count])),
143
- Arc::new(StringArray::from(vec![now.as_str()])),
144
- ],
145
- )?;
146
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
147
- table.add(Box::new(batches)).execute().await?;
148
- } else {
149
- // Insert new record
150
- let now = Utc::now().to_rfc3339();
151
- let schema = Self::access_schema();
152
- let batch = RecordBatch::try_new(
153
- schema.clone(),
154
- vec![
155
- Arc::new(StringArray::from(vec![episode_id])),
156
- Arc::new(UInt64Array::from(vec![1u64])),
157
- Arc::new(StringArray::from(vec![now.as_str()])),
158
- ],
159
- )?;
160
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
161
- table.add(Box::new(batches)).execute().await?;
162
- }
163
-
164
- Ok(())
165
- }
166
-
167
- /// Record co-access for a set of episodes retrieved together
168
- pub async fn record_co_access(&self, episode_ids: &[String]) -> Result<()> {
169
- if episode_ids.len() < 2 {
170
- return Ok(());
171
- }
172
-
173
- let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
174
-
175
- // For each pair, update or create cooccurrence record
176
- for i in 0..episode_ids.len() {
177
- for j in (i + 1)..episode_ids.len() {
178
- let (a, b) = if episode_ids[i] < episode_ids[j] {
179
- (&episode_ids[i], &episode_ids[j])
180
- } else {
181
- (&episode_ids[j], &episode_ids[i])
182
- };
183
-
184
- let existing = self.get_cooccurrence(a, b).await?;
185
-
186
- if let Some(record) = existing {
187
- table
188
- .delete(&format!(
189
- "episode_a = '{}' AND episode_b = '{}'",
190
- a, b
191
- ))
192
- .await?;
193
-
194
- let new_count = record.count + 1;
195
- let schema = Self::cooccurrence_schema();
196
- let batch = RecordBatch::try_new(
197
- schema.clone(),
198
- vec![
199
- Arc::new(StringArray::from(vec![a.as_str()])),
200
- Arc::new(StringArray::from(vec![b.as_str()])),
201
- Arc::new(UInt64Array::from(vec![new_count])),
202
- ],
203
- )?;
204
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
205
- table.add(Box::new(batches)).execute().await?;
206
- } else {
207
- let schema = Self::cooccurrence_schema();
208
- let batch = RecordBatch::try_new(
209
- schema.clone(),
210
- vec![
211
- Arc::new(StringArray::from(vec![a.as_str()])),
212
- Arc::new(StringArray::from(vec![b.as_str()])),
213
- Arc::new(UInt64Array::from(vec![1u64])),
214
- ],
215
- )?;
216
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
217
- table.add(Box::new(batches)).execute().await?;
218
- }
219
- }
220
- }
221
-
222
- Ok(())
223
- }
224
-
225
- /// Run the promotion engine: check all episodes and promote eligible ones to L3
226
- pub async fn run_promotion(&self, context_filter: Option<&str>) -> Result<Vec<String>> {
227
- let mut promoted = Vec::new();
228
-
229
- // Get all access records that meet the threshold
230
- let access_records = self.get_high_access_episodes().await?;
231
-
232
- for record in access_records {
233
- // Check if already promoted
234
- if self.is_promoted(&record.episode_id).await? {
235
- continue;
236
- }
237
-
238
- // Get connection density for this episode
239
- let density = self.get_connection_density(&record.episode_id).await?;
240
-
241
- // Check if both thresholds are met
242
- if record.access_count >= ACCESS_THRESHOLD && density >= COOCCURRENCE_THRESHOLD {
243
- // Get the episode to determine its context
244
- let episode_context = self.get_episode_context(&record.episode_id).await?;
245
- if let Some(ref ctx) = episode_context {
246
- // Apply context filter if specified
247
- if let Some(filter) = context_filter {
248
- if ctx != filter {
249
- continue;
250
- }
251
- }
252
- }
253
-
254
- let episode_content = self.get_episode_content(&record.episode_id).await?;
255
- if let (Some(ctx), Some(content)) = (episode_context, episode_content) {
256
- self.promote_episode(
257
- &record.episode_id,
258
- &ctx,
259
- &content,
260
- record.access_count,
261
- density,
262
- )
263
- .await?;
264
- promoted.push(record.episode_id);
265
- }
266
- }
267
- }
268
-
269
- if !promoted.is_empty() {
270
- tracing::info!("Promoted {} episodes to L3", promoted.len());
271
- }
272
-
273
- Ok(promoted)
274
- }
275
-
276
- async fn get_access_record(&self, episode_id: &str) -> Result<Option<AccessRecord>> {
277
- let table = self.db.open_table(ACCESS_TABLE).execute().await?;
278
- let results = table
279
- .query()
280
- .only_if(format!("episode_id = '{}'", episode_id))
281
- .execute()
282
- .await?;
283
-
284
- let batches: Vec<RecordBatch> = results.try_collect().await?;
285
- for batch in &batches {
286
- if batch.num_rows() > 0 {
287
- let ids = batch
288
- .column_by_name("episode_id")
289
- .unwrap()
290
- .as_any()
291
- .downcast_ref::<StringArray>()
292
- .unwrap();
293
- let counts = batch
294
- .column_by_name("access_count")
295
- .unwrap()
296
- .as_any()
297
- .downcast_ref::<UInt64Array>()
298
- .unwrap();
299
- let last = batch
300
- .column_by_name("last_accessed")
301
- .unwrap()
302
- .as_any()
303
- .downcast_ref::<StringArray>()
304
- .unwrap();
305
-
306
- return Ok(Some(AccessRecord {
307
- episode_id: ids.value(0).to_string(),
308
- access_count: counts.value(0),
309
- last_accessed: last.value(0).to_string(),
310
- }));
311
- }
312
- }
313
- Ok(None)
314
- }
315
-
316
- async fn get_cooccurrence(&self, a: &str, b: &str) -> Result<Option<CooccurrenceRecord>> {
317
- let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
318
- let results = table
319
- .query()
320
- .only_if(format!("episode_a = '{}' AND episode_b = '{}'", a, b))
321
- .execute()
322
- .await?;
323
-
324
- let batches: Vec<RecordBatch> = results.try_collect().await?;
325
- for batch in &batches {
326
- if batch.num_rows() > 0 {
327
- let ep_a = batch
328
- .column_by_name("episode_a")
329
- .unwrap()
330
- .as_any()
331
- .downcast_ref::<StringArray>()
332
- .unwrap();
333
- let ep_b = batch
334
- .column_by_name("episode_b")
335
- .unwrap()
336
- .as_any()
337
- .downcast_ref::<StringArray>()
338
- .unwrap();
339
- let counts = batch
340
- .column_by_name("count")
341
- .unwrap()
342
- .as_any()
343
- .downcast_ref::<UInt64Array>()
344
- .unwrap();
345
-
346
- return Ok(Some(CooccurrenceRecord {
347
- episode_a: ep_a.value(0).to_string(),
348
- episode_b: ep_b.value(0).to_string(),
349
- count: counts.value(0),
350
- }));
351
- }
352
- }
353
- Ok(None)
354
- }
355
-
356
- async fn get_high_access_episodes(&self) -> Result<Vec<AccessRecord>> {
357
- let table = self.db.open_table(ACCESS_TABLE).execute().await?;
358
- let results = table
359
- .query()
360
- .only_if(format!("access_count >= {}", ACCESS_THRESHOLD))
361
- .execute()
362
- .await?;
363
-
364
- let mut records = Vec::new();
365
- let batches: Vec<RecordBatch> = results.try_collect().await?;
366
-
367
- for batch in &batches {
368
- let ids = batch
369
- .column_by_name("episode_id")
370
- .unwrap()
371
- .as_any()
372
- .downcast_ref::<StringArray>()
373
- .unwrap();
374
- let counts = batch
375
- .column_by_name("access_count")
376
- .unwrap()
377
- .as_any()
378
- .downcast_ref::<UInt64Array>()
379
- .unwrap();
380
- let last = batch
381
- .column_by_name("last_accessed")
382
- .unwrap()
383
- .as_any()
384
- .downcast_ref::<StringArray>()
385
- .unwrap();
386
-
387
- for i in 0..batch.num_rows() {
388
- records.push(AccessRecord {
389
- episode_id: ids.value(i).to_string(),
390
- access_count: counts.value(i),
391
- last_accessed: last.value(i).to_string(),
392
- });
393
- }
394
- }
395
-
396
- Ok(records)
397
- }
398
-
399
- async fn get_connection_density(&self, episode_id: &str) -> Result<u64> {
400
- let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
401
-
402
- // Sum all cooccurrence counts where this episode is involved
403
- let results_a = table
404
- .query()
405
- .only_if(format!("episode_a = '{}'", episode_id))
406
- .execute()
407
- .await?;
408
-
409
- let mut total: u64 = 0;
410
- let batches: Vec<RecordBatch> = results_a.try_collect().await?;
411
- for batch in &batches {
412
- let counts = batch
413
- .column_by_name("count")
414
- .unwrap()
415
- .as_any()
416
- .downcast_ref::<UInt64Array>()
417
- .unwrap();
418
- for i in 0..batch.num_rows() {
419
- total += counts.value(i);
420
- }
421
- }
422
-
423
- let results_b = table
424
- .query()
425
- .only_if(format!("episode_b = '{}'", episode_id))
426
- .execute()
427
- .await?;
428
-
429
- let batches: Vec<RecordBatch> = results_b.try_collect().await?;
430
- for batch in &batches {
431
- let counts = batch
432
- .column_by_name("count")
433
- .unwrap()
434
- .as_any()
435
- .downcast_ref::<UInt64Array>()
436
- .unwrap();
437
- for i in 0..batch.num_rows() {
438
- total += counts.value(i);
439
- }
440
- }
441
-
442
- Ok(total)
443
- }
444
-
445
- async fn is_promoted(&self, episode_id: &str) -> Result<bool> {
446
- let table = self.db.open_table(L3_TABLE).execute().await?;
447
- let results = table
448
- .query()
449
- .only_if(format!("source_episode_id = '{}'", episode_id))
450
- .execute()
451
- .await?;
452
-
453
- let batches: Vec<RecordBatch> = results.try_collect().await?;
454
- for batch in &batches {
455
- if batch.num_rows() > 0 {
456
- return Ok(true);
457
- }
458
- }
459
- Ok(false)
460
- }
461
-
462
- async fn get_episode_context(&self, episode_id: &str) -> Result<Option<String>> {
463
- let table = self.db.open_table("episodes").execute().await?;
464
- let results = table
465
- .query()
466
- .only_if(format!("id = '{}'", episode_id))
467
- .execute()
468
- .await?;
469
-
470
- let batches: Vec<RecordBatch> = results.try_collect().await?;
471
- for batch in &batches {
472
- if batch.num_rows() > 0 {
473
- let contexts = batch
474
- .column_by_name("context_name")
475
- .unwrap()
476
- .as_any()
477
- .downcast_ref::<StringArray>()
478
- .unwrap();
479
- return Ok(Some(contexts.value(0).to_string()));
480
- }
481
- }
482
- Ok(None)
483
- }
484
-
485
- async fn get_episode_content(&self, episode_id: &str) -> Result<Option<String>> {
486
- let table = self.db.open_table("episodes").execute().await?;
487
- let results = table
488
- .query()
489
- .only_if(format!("id = '{}'", episode_id))
490
- .execute()
491
- .await?;
492
-
493
- let batches: Vec<RecordBatch> = results.try_collect().await?;
494
- for batch in &batches {
495
- if batch.num_rows() > 0 {
496
- let contents = batch
497
- .column_by_name("content")
498
- .unwrap()
499
- .as_any()
500
- .downcast_ref::<StringArray>()
501
- .unwrap();
502
- return Ok(Some(contents.value(0).to_string()));
503
- }
504
- }
505
- Ok(None)
506
- }
507
-
508
- async fn promote_episode(
509
- &self,
510
- episode_id: &str,
511
- context: &str,
512
- content: &str,
513
- access_count: u64,
514
- connection_density: u64,
515
- ) -> Result<()> {
516
- let id = uuid::Uuid::new_v4().to_string();
517
- let now = Utc::now().to_rfc3339();
518
-
519
- let table = self.db.open_table(L3_TABLE).execute().await?;
520
- let schema = Self::l3_schema();
521
- let batch = RecordBatch::try_new(
522
- schema.clone(),
523
- vec![
524
- Arc::new(StringArray::from(vec![id.as_str()])),
525
- Arc::new(StringArray::from(vec![episode_id])),
526
- Arc::new(StringArray::from(vec![context])),
527
- Arc::new(StringArray::from(vec![content])),
528
- Arc::new(StringArray::from(vec![now.as_str()])),
529
- Arc::new(UInt64Array::from(vec![access_count])),
530
- Arc::new(UInt64Array::from(vec![connection_density])),
531
- ],
532
- )?;
533
-
534
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
535
- table.add(Box::new(batches)).execute().await?;
536
-
537
- tracing::info!(
538
- "Promoted episode {} to L3 (access: {}, density: {})",
539
- episode_id,
540
- access_count,
541
- connection_density
542
- );
543
-
544
- Ok(())
545
- }
546
-
547
- /// Delete an L3 entry by ID
548
- pub async fn delete_l3_entry(&self, id: &str) -> Result<bool> {
549
- let table = self.db.open_table(L3_TABLE).execute().await?;
550
- // Check if exists first
551
- let results = table
552
- .query()
553
- .only_if(format!("id = '{}'", id))
554
- .execute()
555
- .await?;
556
- let batches: Vec<RecordBatch> = results.try_collect().await?;
557
- let exists = batches.iter().any(|b| b.num_rows() > 0);
558
- if !exists {
559
- return Ok(false);
560
- }
561
- table.delete(&format!("id = '{}'", id)).await?;
562
- tracing::info!("Deleted L3 entry {}", id);
563
- Ok(true)
564
- }
565
-
566
- /// Update L3 entry content (delete-and-reinsert pattern for LanceDB)
567
- pub async fn update_l3_content(&self, id: &str, new_content: &str) -> Result<bool> {
568
- let table = self.db.open_table(L3_TABLE).execute().await?;
569
- // Get existing entry
570
- let results = table
571
- .query()
572
- .only_if(format!("id = '{}'", id))
573
- .execute()
574
- .await?;
575
- let batches: Vec<RecordBatch> = results.try_collect().await?;
576
- let mut found_entry: Option<L3Entry> = None;
577
- for batch in &batches {
578
- if batch.num_rows() > 0 {
579
- let ids = batch.column_by_name("id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
580
- let source_ids = batch.column_by_name("source_episode_id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
581
- let contexts = batch.column_by_name("context_name").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
582
- let promoted_ats = batch.column_by_name("promoted_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
583
- let access_counts = batch.column_by_name("access_count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
584
- let densities = batch.column_by_name("connection_density").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
585
- found_entry = Some(L3Entry {
586
- id: ids.value(0).to_string(),
587
- source_episode_id: source_ids.value(0).to_string(),
588
- context_name: contexts.value(0).to_string(),
589
- content: String::new(), // will be replaced
590
- promoted_at: promoted_ats.value(0).to_string(),
591
- access_count: access_counts.value(0),
592
- connection_density: densities.value(0),
593
- });
594
- }
595
- }
596
- let entry = match found_entry {
597
- Some(e) => e,
598
- None => return Ok(false),
599
- };
600
- // Delete old and insert with new content
601
- table.delete(&format!("id = '{}'", id)).await?;
602
- let schema = Self::l3_schema();
603
- let batch = RecordBatch::try_new(
604
- schema.clone(),
605
- vec![
606
- Arc::new(StringArray::from(vec![entry.id.as_str()])),
607
- Arc::new(StringArray::from(vec![entry.source_episode_id.as_str()])),
608
- Arc::new(StringArray::from(vec![entry.context_name.as_str()])),
609
- Arc::new(StringArray::from(vec![new_content])),
610
- Arc::new(StringArray::from(vec![entry.promoted_at.as_str()])),
611
- Arc::new(UInt64Array::from(vec![entry.access_count])),
612
- Arc::new(UInt64Array::from(vec![entry.connection_density])),
613
- ],
614
- )?;
615
- let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
616
- table.add(Box::new(batches)).execute().await?;
617
- tracing::info!("Updated L3 entry {} content", id);
618
- Ok(true)
619
- }
620
-
621
- /// Get all access records sorted by count descending
622
- pub async fn get_all_access_records(&self) -> Result<Vec<AccessRecord>> {
623
- let table = self.db.open_table(ACCESS_TABLE).execute().await?;
624
- let results = table.query().execute().await?;
625
- let mut records = Vec::new();
626
- let batches: Vec<RecordBatch> = results.try_collect().await?;
627
- for batch in &batches {
628
- let ids = batch.column_by_name("episode_id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
629
- let counts = batch.column_by_name("access_count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
630
- let last = batch.column_by_name("last_accessed").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
631
- for i in 0..batch.num_rows() {
632
- records.push(AccessRecord {
633
- episode_id: ids.value(i).to_string(),
634
- access_count: counts.value(i),
635
- last_accessed: last.value(i).to_string(),
636
- });
637
- }
638
- }
639
- records.sort_by(|a, b| b.access_count.cmp(&a.access_count));
640
- Ok(records)
641
- }
642
-
643
- /// Get cooccurrence pairs for a specific episode
644
- pub async fn get_cooccurrence_pairs(&self, episode_id: &str) -> Result<Vec<CooccurrenceRecord>> {
645
- let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
646
- let mut pairs = Vec::new();
647
-
648
- let results_a = table
649
- .query()
650
- .only_if(format!("episode_a = '{}'", episode_id))
651
- .execute()
652
- .await?;
653
- let batches: Vec<RecordBatch> = results_a.try_collect().await?;
654
- for batch in &batches {
655
- let ep_a = batch.column_by_name("episode_a").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
656
- let ep_b = batch.column_by_name("episode_b").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
657
- let counts = batch.column_by_name("count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
658
- for i in 0..batch.num_rows() {
659
- pairs.push(CooccurrenceRecord {
660
- episode_a: ep_a.value(i).to_string(),
661
- episode_b: ep_b.value(i).to_string(),
662
- count: counts.value(i),
663
- });
664
- }
665
- }
666
-
667
- let results_b = table
668
- .query()
669
- .only_if(format!("episode_b = '{}'", episode_id))
670
- .execute()
671
- .await?;
672
- let batches: Vec<RecordBatch> = results_b.try_collect().await?;
673
- for batch in &batches {
674
- let ep_a = batch.column_by_name("episode_a").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
675
- let ep_b = batch.column_by_name("episode_b").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
676
- let counts = batch.column_by_name("count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
677
- for i in 0..batch.num_rows() {
678
- pairs.push(CooccurrenceRecord {
679
- episode_a: ep_a.value(i).to_string(),
680
- episode_b: ep_b.value(i).to_string(),
681
- count: counts.value(i),
682
- });
683
- }
684
- }
685
-
686
- Ok(pairs)
687
- }
688
-
689
- /// Get all L3 entries (all contexts)
690
- pub async fn get_all_l3_entries(&self) -> Result<Vec<L3Entry>> {
691
- let table = self.db.open_table(L3_TABLE).execute().await?;
692
- let results = table.query().execute().await?;
693
- let mut entries = Vec::new();
694
- let batches: Vec<RecordBatch> = results.try_collect().await?;
695
- for batch in &batches {
696
- let ids = batch.column_by_name("id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
697
- let source_ids = batch.column_by_name("source_episode_id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
698
- let contexts = batch.column_by_name("context_name").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
699
- let contents = batch.column_by_name("content").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
700
- let promoted_ats = batch.column_by_name("promoted_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
701
- let access_counts = batch.column_by_name("access_count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
702
- let densities = batch.column_by_name("connection_density").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
703
- for i in 0..batch.num_rows() {
704
- entries.push(L3Entry {
705
- id: ids.value(i).to_string(),
706
- source_episode_id: source_ids.value(i).to_string(),
707
- context_name: contexts.value(i).to_string(),
708
- content: contents.value(i).to_string(),
709
- promoted_at: promoted_ats.value(i).to_string(),
710
- access_count: access_counts.value(i),
711
- connection_density: densities.value(i),
712
- });
713
- }
714
- }
715
- Ok(entries)
716
- }
717
-
718
- /// Get all L3 entries for a context
719
- pub async fn get_l3_entries(&self, context: &str) -> Result<Vec<L3Entry>> {
720
- let table = self.db.open_table(L3_TABLE).execute().await?;
721
- let results = table
722
- .query()
723
- .only_if(format!("context_name = '{}'", context))
724
- .execute()
725
- .await?;
726
-
727
- let mut entries = Vec::new();
728
- let batches: Vec<RecordBatch> = results.try_collect().await?;
729
-
730
- for batch in &batches {
731
- let ids = batch.column_by_name("id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
732
- let source_ids = batch.column_by_name("source_episode_id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
733
- let contexts = batch.column_by_name("context_name").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
734
- let contents = batch.column_by_name("content").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
735
- let promoted_ats = batch.column_by_name("promoted_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
736
- let access_counts = batch.column_by_name("access_count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
737
- let densities = batch.column_by_name("connection_density").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
738
-
739
- for i in 0..batch.num_rows() {
740
- entries.push(L3Entry {
741
- id: ids.value(i).to_string(),
742
- source_episode_id: source_ids.value(i).to_string(),
743
- context_name: contexts.value(i).to_string(),
744
- content: contents.value(i).to_string(),
745
- promoted_at: promoted_ats.value(i).to_string(),
746
- access_count: access_counts.value(i),
747
- connection_density: densities.value(i),
748
- });
749
- }
750
- }
751
-
752
- Ok(entries)
753
- }
754
-
755
- /// Check if an episode is referenced across multiple contexts and should be auto-promoted to global
756
- pub async fn check_cross_context_promotion(&self, episode_content: &str) -> Result<Vec<String>> {
757
- // This is checked by counting how many contexts have similar content
758
- // Called externally when a new episode is stored
759
- // Returns list of context names where similar content exists
760
- let table = self.db.open_table("episodes").execute().await?;
761
- let results = table.query().execute().await?;
762
-
763
- let mut context_set: std::collections::HashSet<String> = std::collections::HashSet::new();
764
- let batches: Vec<RecordBatch> = results.try_collect().await?;
765
-
766
- let lower_content = episode_content.to_lowercase();
767
- // Simple substring matching for cross-context detection
768
- // Real implementation would use embedding similarity
769
- for batch in &batches {
770
- let contexts = batch
771
- .column_by_name("context_name")
772
- .unwrap()
773
- .as_any()
774
- .downcast_ref::<StringArray>()
775
- .unwrap();
776
- let contents = batch
777
- .column_by_name("content")
778
- .unwrap()
779
- .as_any()
780
- .downcast_ref::<StringArray>()
781
- .unwrap();
782
-
783
- for i in 0..batch.num_rows() {
784
- let content = contents.value(i).to_lowercase();
785
- // Check for significant overlap (shared words)
786
- let shared = count_shared_significant_words(&lower_content, &content);
787
- if shared >= 3 {
788
- context_set.insert(contexts.value(i).to_string());
789
- }
790
- }
791
- }
792
-
793
- Ok(context_set.into_iter().collect())
794
- }
795
- }
796
-
797
- /// Count shared significant words (length > 4) between two texts
798
- fn count_shared_significant_words(a: &str, b: &str) -> usize {
799
- let words_a: std::collections::HashSet<&str> = a
800
- .split_whitespace()
801
- .filter(|w| w.len() > 4)
802
- .collect();
803
- let words_b: std::collections::HashSet<&str> = b
804
- .split_whitespace()
805
- .filter(|w| w.len() > 4)
806
- .collect();
807
- words_a.intersection(&words_b).count()
808
- }