@sesamespace/hivemind 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +86 -0
  2. package/config/TEAM-CHARTER.md +87 -0
  3. package/config/default.toml +39 -0
  4. package/dist/__tests__/fleet-integration.test.d.ts +9 -0
  5. package/dist/__tests__/fleet-integration.test.d.ts.map +1 -0
  6. package/dist/__tests__/fleet-integration.test.js +201 -0
  7. package/dist/__tests__/fleet-integration.test.js.map +1 -0
  8. package/dist/__tests__/fleet.test.d.ts +7 -0
  9. package/dist/__tests__/fleet.test.d.ts.map +1 -0
  10. package/dist/__tests__/fleet.test.js +171 -0
  11. package/dist/__tests__/fleet.test.js.map +1 -0
  12. package/dist/__tests__/integration.test.d.ts +2 -0
  13. package/dist/__tests__/integration.test.d.ts.map +1 -0
  14. package/dist/__tests__/integration.test.js +348 -0
  15. package/dist/__tests__/integration.test.js.map +1 -0
  16. package/dist/agent.d.ts +27 -0
  17. package/dist/agent.d.ts.map +1 -0
  18. package/dist/agent.js +217 -0
  19. package/dist/agent.js.map +1 -0
  20. package/dist/commands/fleet.d.ts +13 -0
  21. package/dist/commands/fleet.d.ts.map +1 -0
  22. package/dist/commands/fleet.js +193 -0
  23. package/dist/commands/fleet.js.map +1 -0
  24. package/dist/commands/init.d.ts +2 -0
  25. package/dist/commands/init.d.ts.map +1 -0
  26. package/dist/commands/init.js +170 -0
  27. package/dist/commands/init.js.map +1 -0
  28. package/dist/commands/start.d.ts +2 -0
  29. package/dist/commands/start.d.ts.map +1 -0
  30. package/dist/commands/start.js +39 -0
  31. package/dist/commands/start.js.map +1 -0
  32. package/dist/config.d.ts +44 -0
  33. package/dist/config.d.ts.map +1 -0
  34. package/dist/config.js +73 -0
  35. package/dist/config.js.map +1 -0
  36. package/dist/context.d.ts +50 -0
  37. package/dist/context.d.ts.map +1 -0
  38. package/dist/context.js +153 -0
  39. package/dist/context.js.map +1 -0
  40. package/dist/fleet/fleet-manager.d.ts +86 -0
  41. package/dist/fleet/fleet-manager.d.ts.map +1 -0
  42. package/dist/fleet/fleet-manager.js +298 -0
  43. package/dist/fleet/fleet-manager.js.map +1 -0
  44. package/dist/fleet/memory-sync.d.ts +91 -0
  45. package/dist/fleet/memory-sync.d.ts.map +1 -0
  46. package/dist/fleet/memory-sync.js +292 -0
  47. package/dist/fleet/memory-sync.js.map +1 -0
  48. package/dist/fleet/primary-client.d.ts +49 -0
  49. package/dist/fleet/primary-client.d.ts.map +1 -0
  50. package/dist/fleet/primary-client.js +222 -0
  51. package/dist/fleet/primary-client.js.map +1 -0
  52. package/dist/fleet/worker-protocol.d.ts +125 -0
  53. package/dist/fleet/worker-protocol.d.ts.map +1 -0
  54. package/dist/fleet/worker-protocol.js +27 -0
  55. package/dist/fleet/worker-protocol.js.map +1 -0
  56. package/dist/fleet/worker-server.d.ts +53 -0
  57. package/dist/fleet/worker-server.d.ts.map +1 -0
  58. package/dist/fleet/worker-server.js +191 -0
  59. package/dist/fleet/worker-server.js.map +1 -0
  60. package/dist/index.d.ts +26 -0
  61. package/dist/index.d.ts.map +1 -0
  62. package/dist/index.js +16 -0
  63. package/dist/index.js.map +1 -0
  64. package/dist/llm-client.d.ts +24 -0
  65. package/dist/llm-client.d.ts.map +1 -0
  66. package/dist/llm-client.js +40 -0
  67. package/dist/llm-client.js.map +1 -0
  68. package/dist/main.d.ts +3 -0
  69. package/dist/main.d.ts.map +1 -0
  70. package/dist/main.js +41 -0
  71. package/dist/main.js.map +1 -0
  72. package/dist/memory-client.d.ts +88 -0
  73. package/dist/memory-client.d.ts.map +1 -0
  74. package/dist/memory-client.js +185 -0
  75. package/dist/memory-client.js.map +1 -0
  76. package/dist/pipeline.d.ts +2 -0
  77. package/dist/pipeline.d.ts.map +1 -0
  78. package/dist/pipeline.js +125 -0
  79. package/dist/pipeline.js.map +1 -0
  80. package/dist/prompt.d.ts +6 -0
  81. package/dist/prompt.d.ts.map +1 -0
  82. package/dist/prompt.js +75 -0
  83. package/dist/prompt.js.map +1 -0
  84. package/dist/sesame.d.ts +33 -0
  85. package/dist/sesame.d.ts.map +1 -0
  86. package/dist/sesame.js +67 -0
  87. package/dist/sesame.js.map +1 -0
  88. package/dist/start.d.ts +3 -0
  89. package/dist/start.d.ts.map +1 -0
  90. package/dist/start.js +20 -0
  91. package/dist/start.js.map +1 -0
  92. package/dist/task-engine.d.ts +32 -0
  93. package/dist/task-engine.d.ts.map +1 -0
  94. package/dist/task-engine.js +80 -0
  95. package/dist/task-engine.js.map +1 -0
  96. package/dist/worker.d.ts +73 -0
  97. package/dist/worker.d.ts.map +1 -0
  98. package/dist/worker.js +279 -0
  99. package/dist/worker.js.map +1 -0
  100. package/install.sh +186 -0
  101. package/package.json +36 -0
  102. package/packages/memory/Cargo.lock +6480 -0
  103. package/packages/memory/Cargo.toml +21 -0
  104. package/packages/memory/src/src/context.rs +179 -0
  105. package/packages/memory/src/src/embeddings.rs +51 -0
  106. package/packages/memory/src/src/main.rs +626 -0
  107. package/packages/memory/src/src/promotion.rs +637 -0
  108. package/packages/memory/src/src/scoring.rs +131 -0
  109. package/packages/memory/src/src/store.rs +460 -0
  110. package/packages/memory/src/src/tasks.rs +321 -0
@@ -0,0 +1,637 @@
1
+ use anyhow::Result;
2
+ use arrow_array::{RecordBatch, RecordBatchIterator, StringArray, UInt64Array};
3
+ use arrow_schema::{DataType, Field, Schema};
4
+ use chrono::Utc;
5
+ use futures::stream::TryStreamExt;
6
+ use lancedb::{connection::Connection, query::ExecutableQuery, query::QueryBase, Table};
7
+ use serde::{Deserialize, Serialize};
8
+ use std::collections::HashMap;
9
+ use std::sync::Arc;
10
+
11
+ const ACCESS_TABLE: &str = "episode_access";
12
+ const COOCCURRENCE_TABLE: &str = "episode_cooccurrence";
13
+ const L3_TABLE: &str = "l3_knowledge";
14
+
15
+ // Promotion thresholds
16
+ const ACCESS_THRESHOLD: u64 = 5; // episode must be accessed at least N times
17
+ const COOCCURRENCE_THRESHOLD: u64 = 3; // episode must co-occur with others at least N times
18
+
19
+ #[derive(Debug, Serialize, Deserialize, Clone)]
20
+ pub struct AccessRecord {
21
+ pub episode_id: String,
22
+ pub access_count: u64,
23
+ pub last_accessed: String,
24
+ }
25
+
26
+ #[derive(Debug, Serialize, Deserialize, Clone)]
27
+ pub struct CooccurrenceRecord {
28
+ pub episode_a: String,
29
+ pub episode_b: String,
30
+ pub count: u64,
31
+ }
32
+
33
+ #[derive(Debug, Serialize, Deserialize, Clone)]
34
+ pub struct L3Entry {
35
+ pub id: String,
36
+ pub source_episode_id: String,
37
+ pub context_name: String,
38
+ pub content: String,
39
+ pub promoted_at: String,
40
+ pub access_count: u64,
41
+ pub connection_density: u64,
42
+ }
43
+
44
+ pub struct PromotionEngine {
45
+ db: Connection,
46
+ }
47
+
48
+ impl PromotionEngine {
49
+ pub async fn new(db: Connection) -> Result<Self> {
50
+ let engine = Self { db };
51
+ engine.ensure_tables().await?;
52
+ Ok(engine)
53
+ }
54
+
55
+ fn access_schema() -> Arc<Schema> {
56
+ Arc::new(Schema::new(vec![
57
+ Field::new("episode_id", DataType::Utf8, false),
58
+ Field::new("access_count", DataType::UInt64, false),
59
+ Field::new("last_accessed", DataType::Utf8, false),
60
+ ]))
61
+ }
62
+
63
+ fn cooccurrence_schema() -> Arc<Schema> {
64
+ Arc::new(Schema::new(vec![
65
+ Field::new("episode_a", DataType::Utf8, false),
66
+ Field::new("episode_b", DataType::Utf8, false),
67
+ Field::new("count", DataType::UInt64, false),
68
+ ]))
69
+ }
70
+
71
+ fn l3_schema() -> Arc<Schema> {
72
+ Arc::new(Schema::new(vec![
73
+ Field::new("id", DataType::Utf8, false),
74
+ Field::new("source_episode_id", DataType::Utf8, false),
75
+ Field::new("context_name", DataType::Utf8, false),
76
+ Field::new("content", DataType::Utf8, false),
77
+ Field::new("promoted_at", DataType::Utf8, false),
78
+ Field::new("access_count", DataType::UInt64, false),
79
+ Field::new("connection_density", DataType::UInt64, false),
80
+ ]))
81
+ }
82
+
83
+ async fn ensure_tables(&self) -> Result<()> {
84
+ let names = self.db.table_names().execute().await?;
85
+
86
+ if !names.contains(&ACCESS_TABLE.to_string()) {
87
+ let schema = Self::access_schema();
88
+ let batch = RecordBatch::new_empty(schema.clone());
89
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
90
+ self.db
91
+ .create_table(ACCESS_TABLE, Box::new(batches))
92
+ .execute()
93
+ .await?;
94
+ tracing::info!("Created episode_access table");
95
+ }
96
+
97
+ if !names.contains(&COOCCURRENCE_TABLE.to_string()) {
98
+ let schema = Self::cooccurrence_schema();
99
+ let batch = RecordBatch::new_empty(schema.clone());
100
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
101
+ self.db
102
+ .create_table(COOCCURRENCE_TABLE, Box::new(batches))
103
+ .execute()
104
+ .await?;
105
+ tracing::info!("Created episode_cooccurrence table");
106
+ }
107
+
108
+ if !names.contains(&L3_TABLE.to_string()) {
109
+ let schema = Self::l3_schema();
110
+ let batch = RecordBatch::new_empty(schema.clone());
111
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
112
+ self.db
113
+ .create_table(L3_TABLE, Box::new(batches))
114
+ .execute()
115
+ .await?;
116
+ tracing::info!("Created l3_knowledge table");
117
+ }
118
+
119
+ Ok(())
120
+ }
121
+
122
+ /// Record a single access for an episode
123
+ pub async fn record_access(&self, episode_id: &str) -> Result<()> {
124
+ let table = self.db.open_table(ACCESS_TABLE).execute().await?;
125
+
126
+ // Check if record exists
127
+ let existing = self.get_access_record(episode_id).await?;
128
+
129
+ if let Some(record) = existing {
130
+ // Delete old record and insert updated one
131
+ table
132
+ .delete(&format!("episode_id = '{}'", episode_id))
133
+ .await?;
134
+
135
+ let new_count = record.access_count + 1;
136
+ let now = Utc::now().to_rfc3339();
137
+ let schema = Self::access_schema();
138
+ let batch = RecordBatch::try_new(
139
+ schema.clone(),
140
+ vec![
141
+ Arc::new(StringArray::from(vec![episode_id])),
142
+ Arc::new(UInt64Array::from(vec![new_count])),
143
+ Arc::new(StringArray::from(vec![now.as_str()])),
144
+ ],
145
+ )?;
146
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
147
+ table.add(Box::new(batches)).execute().await?;
148
+ } else {
149
+ // Insert new record
150
+ let now = Utc::now().to_rfc3339();
151
+ let schema = Self::access_schema();
152
+ let batch = RecordBatch::try_new(
153
+ schema.clone(),
154
+ vec![
155
+ Arc::new(StringArray::from(vec![episode_id])),
156
+ Arc::new(UInt64Array::from(vec![1u64])),
157
+ Arc::new(StringArray::from(vec![now.as_str()])),
158
+ ],
159
+ )?;
160
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
161
+ table.add(Box::new(batches)).execute().await?;
162
+ }
163
+
164
+ Ok(())
165
+ }
166
+
167
+ /// Record co-access for a set of episodes retrieved together
168
+ pub async fn record_co_access(&self, episode_ids: &[String]) -> Result<()> {
169
+ if episode_ids.len() < 2 {
170
+ return Ok(());
171
+ }
172
+
173
+ let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
174
+
175
+ // For each pair, update or create cooccurrence record
176
+ for i in 0..episode_ids.len() {
177
+ for j in (i + 1)..episode_ids.len() {
178
+ let (a, b) = if episode_ids[i] < episode_ids[j] {
179
+ (&episode_ids[i], &episode_ids[j])
180
+ } else {
181
+ (&episode_ids[j], &episode_ids[i])
182
+ };
183
+
184
+ let existing = self.get_cooccurrence(a, b).await?;
185
+
186
+ if let Some(record) = existing {
187
+ table
188
+ .delete(&format!(
189
+ "episode_a = '{}' AND episode_b = '{}'",
190
+ a, b
191
+ ))
192
+ .await?;
193
+
194
+ let new_count = record.count + 1;
195
+ let schema = Self::cooccurrence_schema();
196
+ let batch = RecordBatch::try_new(
197
+ schema.clone(),
198
+ vec![
199
+ Arc::new(StringArray::from(vec![a.as_str()])),
200
+ Arc::new(StringArray::from(vec![b.as_str()])),
201
+ Arc::new(UInt64Array::from(vec![new_count])),
202
+ ],
203
+ )?;
204
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
205
+ table.add(Box::new(batches)).execute().await?;
206
+ } else {
207
+ let schema = Self::cooccurrence_schema();
208
+ let batch = RecordBatch::try_new(
209
+ schema.clone(),
210
+ vec![
211
+ Arc::new(StringArray::from(vec![a.as_str()])),
212
+ Arc::new(StringArray::from(vec![b.as_str()])),
213
+ Arc::new(UInt64Array::from(vec![1u64])),
214
+ ],
215
+ )?;
216
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
217
+ table.add(Box::new(batches)).execute().await?;
218
+ }
219
+ }
220
+ }
221
+
222
+ Ok(())
223
+ }
224
+
225
+ /// Run the promotion engine: check all episodes and promote eligible ones to L3
226
+ pub async fn run_promotion(&self, context_filter: Option<&str>) -> Result<Vec<String>> {
227
+ let mut promoted = Vec::new();
228
+
229
+ // Get all access records that meet the threshold
230
+ let access_records = self.get_high_access_episodes().await?;
231
+
232
+ for record in access_records {
233
+ // Check if already promoted
234
+ if self.is_promoted(&record.episode_id).await? {
235
+ continue;
236
+ }
237
+
238
+ // Get connection density for this episode
239
+ let density = self.get_connection_density(&record.episode_id).await?;
240
+
241
+ // Check if both thresholds are met
242
+ if record.access_count >= ACCESS_THRESHOLD && density >= COOCCURRENCE_THRESHOLD {
243
+ // Get the episode to determine its context
244
+ let episode_context = self.get_episode_context(&record.episode_id).await?;
245
+ if let Some(ref ctx) = episode_context {
246
+ // Apply context filter if specified
247
+ if let Some(filter) = context_filter {
248
+ if ctx != filter {
249
+ continue;
250
+ }
251
+ }
252
+ }
253
+
254
+ let episode_content = self.get_episode_content(&record.episode_id).await?;
255
+ if let (Some(ctx), Some(content)) = (episode_context, episode_content) {
256
+ self.promote_episode(
257
+ &record.episode_id,
258
+ &ctx,
259
+ &content,
260
+ record.access_count,
261
+ density,
262
+ )
263
+ .await?;
264
+ promoted.push(record.episode_id);
265
+ }
266
+ }
267
+ }
268
+
269
+ if !promoted.is_empty() {
270
+ tracing::info!("Promoted {} episodes to L3", promoted.len());
271
+ }
272
+
273
+ Ok(promoted)
274
+ }
275
+
276
+ async fn get_access_record(&self, episode_id: &str) -> Result<Option<AccessRecord>> {
277
+ let table = self.db.open_table(ACCESS_TABLE).execute().await?;
278
+ let results = table
279
+ .query()
280
+ .only_if(format!("episode_id = '{}'", episode_id))
281
+ .execute()
282
+ .await?;
283
+
284
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
285
+ for batch in &batches {
286
+ if batch.num_rows() > 0 {
287
+ let ids = batch
288
+ .column_by_name("episode_id")
289
+ .unwrap()
290
+ .as_any()
291
+ .downcast_ref::<StringArray>()
292
+ .unwrap();
293
+ let counts = batch
294
+ .column_by_name("access_count")
295
+ .unwrap()
296
+ .as_any()
297
+ .downcast_ref::<UInt64Array>()
298
+ .unwrap();
299
+ let last = batch
300
+ .column_by_name("last_accessed")
301
+ .unwrap()
302
+ .as_any()
303
+ .downcast_ref::<StringArray>()
304
+ .unwrap();
305
+
306
+ return Ok(Some(AccessRecord {
307
+ episode_id: ids.value(0).to_string(),
308
+ access_count: counts.value(0),
309
+ last_accessed: last.value(0).to_string(),
310
+ }));
311
+ }
312
+ }
313
+ Ok(None)
314
+ }
315
+
316
+ async fn get_cooccurrence(&self, a: &str, b: &str) -> Result<Option<CooccurrenceRecord>> {
317
+ let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
318
+ let results = table
319
+ .query()
320
+ .only_if(format!("episode_a = '{}' AND episode_b = '{}'", a, b))
321
+ .execute()
322
+ .await?;
323
+
324
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
325
+ for batch in &batches {
326
+ if batch.num_rows() > 0 {
327
+ let ep_a = batch
328
+ .column_by_name("episode_a")
329
+ .unwrap()
330
+ .as_any()
331
+ .downcast_ref::<StringArray>()
332
+ .unwrap();
333
+ let ep_b = batch
334
+ .column_by_name("episode_b")
335
+ .unwrap()
336
+ .as_any()
337
+ .downcast_ref::<StringArray>()
338
+ .unwrap();
339
+ let counts = batch
340
+ .column_by_name("count")
341
+ .unwrap()
342
+ .as_any()
343
+ .downcast_ref::<UInt64Array>()
344
+ .unwrap();
345
+
346
+ return Ok(Some(CooccurrenceRecord {
347
+ episode_a: ep_a.value(0).to_string(),
348
+ episode_b: ep_b.value(0).to_string(),
349
+ count: counts.value(0),
350
+ }));
351
+ }
352
+ }
353
+ Ok(None)
354
+ }
355
+
356
+ async fn get_high_access_episodes(&self) -> Result<Vec<AccessRecord>> {
357
+ let table = self.db.open_table(ACCESS_TABLE).execute().await?;
358
+ let results = table
359
+ .query()
360
+ .only_if(format!("access_count >= {}", ACCESS_THRESHOLD))
361
+ .execute()
362
+ .await?;
363
+
364
+ let mut records = Vec::new();
365
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
366
+
367
+ for batch in &batches {
368
+ let ids = batch
369
+ .column_by_name("episode_id")
370
+ .unwrap()
371
+ .as_any()
372
+ .downcast_ref::<StringArray>()
373
+ .unwrap();
374
+ let counts = batch
375
+ .column_by_name("access_count")
376
+ .unwrap()
377
+ .as_any()
378
+ .downcast_ref::<UInt64Array>()
379
+ .unwrap();
380
+ let last = batch
381
+ .column_by_name("last_accessed")
382
+ .unwrap()
383
+ .as_any()
384
+ .downcast_ref::<StringArray>()
385
+ .unwrap();
386
+
387
+ for i in 0..batch.num_rows() {
388
+ records.push(AccessRecord {
389
+ episode_id: ids.value(i).to_string(),
390
+ access_count: counts.value(i),
391
+ last_accessed: last.value(i).to_string(),
392
+ });
393
+ }
394
+ }
395
+
396
+ Ok(records)
397
+ }
398
+
399
+ async fn get_connection_density(&self, episode_id: &str) -> Result<u64> {
400
+ let table = self.db.open_table(COOCCURRENCE_TABLE).execute().await?;
401
+
402
+ // Sum all cooccurrence counts where this episode is involved
403
+ let results_a = table
404
+ .query()
405
+ .only_if(format!("episode_a = '{}'", episode_id))
406
+ .execute()
407
+ .await?;
408
+
409
+ let mut total: u64 = 0;
410
+ let batches: Vec<RecordBatch> = results_a.try_collect().await?;
411
+ for batch in &batches {
412
+ let counts = batch
413
+ .column_by_name("count")
414
+ .unwrap()
415
+ .as_any()
416
+ .downcast_ref::<UInt64Array>()
417
+ .unwrap();
418
+ for i in 0..batch.num_rows() {
419
+ total += counts.value(i);
420
+ }
421
+ }
422
+
423
+ let results_b = table
424
+ .query()
425
+ .only_if(format!("episode_b = '{}'", episode_id))
426
+ .execute()
427
+ .await?;
428
+
429
+ let batches: Vec<RecordBatch> = results_b.try_collect().await?;
430
+ for batch in &batches {
431
+ let counts = batch
432
+ .column_by_name("count")
433
+ .unwrap()
434
+ .as_any()
435
+ .downcast_ref::<UInt64Array>()
436
+ .unwrap();
437
+ for i in 0..batch.num_rows() {
438
+ total += counts.value(i);
439
+ }
440
+ }
441
+
442
+ Ok(total)
443
+ }
444
+
445
+ async fn is_promoted(&self, episode_id: &str) -> Result<bool> {
446
+ let table = self.db.open_table(L3_TABLE).execute().await?;
447
+ let results = table
448
+ .query()
449
+ .only_if(format!("source_episode_id = '{}'", episode_id))
450
+ .execute()
451
+ .await?;
452
+
453
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
454
+ for batch in &batches {
455
+ if batch.num_rows() > 0 {
456
+ return Ok(true);
457
+ }
458
+ }
459
+ Ok(false)
460
+ }
461
+
462
+ async fn get_episode_context(&self, episode_id: &str) -> Result<Option<String>> {
463
+ let table = self.db.open_table("episodes").execute().await?;
464
+ let results = table
465
+ .query()
466
+ .only_if(format!("id = '{}'", episode_id))
467
+ .execute()
468
+ .await?;
469
+
470
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
471
+ for batch in &batches {
472
+ if batch.num_rows() > 0 {
473
+ let contexts = batch
474
+ .column_by_name("context_name")
475
+ .unwrap()
476
+ .as_any()
477
+ .downcast_ref::<StringArray>()
478
+ .unwrap();
479
+ return Ok(Some(contexts.value(0).to_string()));
480
+ }
481
+ }
482
+ Ok(None)
483
+ }
484
+
485
+ async fn get_episode_content(&self, episode_id: &str) -> Result<Option<String>> {
486
+ let table = self.db.open_table("episodes").execute().await?;
487
+ let results = table
488
+ .query()
489
+ .only_if(format!("id = '{}'", episode_id))
490
+ .execute()
491
+ .await?;
492
+
493
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
494
+ for batch in &batches {
495
+ if batch.num_rows() > 0 {
496
+ let contents = batch
497
+ .column_by_name("content")
498
+ .unwrap()
499
+ .as_any()
500
+ .downcast_ref::<StringArray>()
501
+ .unwrap();
502
+ return Ok(Some(contents.value(0).to_string()));
503
+ }
504
+ }
505
+ Ok(None)
506
+ }
507
+
508
+ async fn promote_episode(
509
+ &self,
510
+ episode_id: &str,
511
+ context: &str,
512
+ content: &str,
513
+ access_count: u64,
514
+ connection_density: u64,
515
+ ) -> Result<()> {
516
+ let id = uuid::Uuid::new_v4().to_string();
517
+ let now = Utc::now().to_rfc3339();
518
+
519
+ let table = self.db.open_table(L3_TABLE).execute().await?;
520
+ let schema = Self::l3_schema();
521
+ let batch = RecordBatch::try_new(
522
+ schema.clone(),
523
+ vec![
524
+ Arc::new(StringArray::from(vec![id.as_str()])),
525
+ Arc::new(StringArray::from(vec![episode_id])),
526
+ Arc::new(StringArray::from(vec![context])),
527
+ Arc::new(StringArray::from(vec![content])),
528
+ Arc::new(StringArray::from(vec![now.as_str()])),
529
+ Arc::new(UInt64Array::from(vec![access_count])),
530
+ Arc::new(UInt64Array::from(vec![connection_density])),
531
+ ],
532
+ )?;
533
+
534
+ let batches = RecordBatchIterator::new(vec![Ok(batch)], schema);
535
+ table.add(Box::new(batches)).execute().await?;
536
+
537
+ tracing::info!(
538
+ "Promoted episode {} to L3 (access: {}, density: {})",
539
+ episode_id,
540
+ access_count,
541
+ connection_density
542
+ );
543
+
544
+ Ok(())
545
+ }
546
+
547
+ /// Get all L3 entries for a context
548
+ pub async fn get_l3_entries(&self, context: &str) -> Result<Vec<L3Entry>> {
549
+ let table = self.db.open_table(L3_TABLE).execute().await?;
550
+ let results = table
551
+ .query()
552
+ .only_if(format!("context_name = '{}'", context))
553
+ .execute()
554
+ .await?;
555
+
556
+ let mut entries = Vec::new();
557
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
558
+
559
+ for batch in &batches {
560
+ let ids = batch.column_by_name("id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
561
+ let source_ids = batch.column_by_name("source_episode_id").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
562
+ let contexts = batch.column_by_name("context_name").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
563
+ let contents = batch.column_by_name("content").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
564
+ let promoted_ats = batch.column_by_name("promoted_at").unwrap().as_any().downcast_ref::<StringArray>().unwrap();
565
+ let access_counts = batch.column_by_name("access_count").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
566
+ let densities = batch.column_by_name("connection_density").unwrap().as_any().downcast_ref::<UInt64Array>().unwrap();
567
+
568
+ for i in 0..batch.num_rows() {
569
+ entries.push(L3Entry {
570
+ id: ids.value(i).to_string(),
571
+ source_episode_id: source_ids.value(i).to_string(),
572
+ context_name: contexts.value(i).to_string(),
573
+ content: contents.value(i).to_string(),
574
+ promoted_at: promoted_ats.value(i).to_string(),
575
+ access_count: access_counts.value(i),
576
+ connection_density: densities.value(i),
577
+ });
578
+ }
579
+ }
580
+
581
+ Ok(entries)
582
+ }
583
+
584
+ /// Check if an episode is referenced across multiple contexts and should be auto-promoted to global
585
+ pub async fn check_cross_context_promotion(&self, episode_content: &str) -> Result<Vec<String>> {
586
+ // This is checked by counting how many contexts have similar content
587
+ // Called externally when a new episode is stored
588
+ // Returns list of context names where similar content exists
589
+ let table = self.db.open_table("episodes").execute().await?;
590
+ let results = table.query().execute().await?;
591
+
592
+ let mut context_set: std::collections::HashSet<String> = std::collections::HashSet::new();
593
+ let batches: Vec<RecordBatch> = results.try_collect().await?;
594
+
595
+ let lower_content = episode_content.to_lowercase();
596
+ // Simple substring matching for cross-context detection
597
+ // Real implementation would use embedding similarity
598
+ for batch in &batches {
599
+ let contexts = batch
600
+ .column_by_name("context_name")
601
+ .unwrap()
602
+ .as_any()
603
+ .downcast_ref::<StringArray>()
604
+ .unwrap();
605
+ let contents = batch
606
+ .column_by_name("content")
607
+ .unwrap()
608
+ .as_any()
609
+ .downcast_ref::<StringArray>()
610
+ .unwrap();
611
+
612
+ for i in 0..batch.num_rows() {
613
+ let content = contents.value(i).to_lowercase();
614
+ // Check for significant overlap (shared words)
615
+ let shared = count_shared_significant_words(&lower_content, &content);
616
+ if shared >= 3 {
617
+ context_set.insert(contexts.value(i).to_string());
618
+ }
619
+ }
620
+ }
621
+
622
+ Ok(context_set.into_iter().collect())
623
+ }
624
+ }
625
+
626
+ /// Count shared significant words (length > 4) between two texts
627
+ fn count_shared_significant_words(a: &str, b: &str) -> usize {
628
+ let words_a: std::collections::HashSet<&str> = a
629
+ .split_whitespace()
630
+ .filter(|w| w.len() > 4)
631
+ .collect();
632
+ let words_b: std::collections::HashSet<&str> = b
633
+ .split_whitespace()
634
+ .filter(|w| w.len() > 4)
635
+ .collect();
636
+ words_a.intersection(&words_b).count()
637
+ }