@geravant/sinain 1.12.0 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/.env.example +4 -2
  2. package/config-shared.js +1 -0
  3. package/package.json +4 -1
  4. package/sinain-agent/run.sh +36 -4
  5. package/sinain-core/package-lock.json +963 -0
  6. package/sinain-core/package.json +1 -0
  7. package/sinain-core/src/buffers/feed-buffer.ts +34 -0
  8. package/sinain-core/src/embedding/service.ts +66 -0
  9. package/sinain-core/src/index.ts +65 -17
  10. package/sinain-core/src/learning/local-curation.ts +137 -7
  11. package/sinain-core/src/server.ts +31 -0
  12. package/sinain-memory/README.md +105 -0
  13. package/sinain-memory/embed_client.py +117 -0
  14. package/sinain-memory/graph_query.py +269 -18
  15. package/sinain-memory/knowledge_integrator.py +551 -74
  16. package/sinain-memory/memory-config.json +1 -1
  17. package/sinain-memory/session_distiller.py +43 -19
  18. package/sinain-memory/triplestore.py +60 -0
  19. package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
  20. package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
  21. package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
  22. package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
  23. package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
  24. package/sinain-memory/eval/__init__.py +0 -0
  25. package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc +0 -0
  26. package/sinain-memory/eval/assertions.py +0 -267
  27. package/sinain-memory/eval/benchmarks/__init__.py +0 -0
  28. package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc +0 -0
  29. package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc +0 -0
  30. package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc +0 -0
  31. package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc +0 -0
  32. package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc +0 -0
  33. package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc +0 -0
  34. package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
  35. package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc +0 -0
  36. package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
  37. package/sinain-memory/eval/benchmarks/base_adapter.py +0 -43
  38. package/sinain-memory/eval/benchmarks/config.py +0 -23
  39. package/sinain-memory/eval/benchmarks/evaluate.py +0 -146
  40. package/sinain-memory/eval/benchmarks/ingest.py +0 -152
  41. package/sinain-memory/eval/benchmarks/judges/__init__.py +0 -0
  42. package/sinain-memory/eval/benchmarks/judges/__pycache__/__init__.cpython-312.pyc +0 -0
  43. package/sinain-memory/eval/benchmarks/judges/__pycache__/qa_judge.cpython-312.pyc +0 -0
  44. package/sinain-memory/eval/benchmarks/judges/qa_judge.py +0 -81
  45. package/sinain-memory/eval/benchmarks/longmemeval_adapter.py +0 -177
  46. package/sinain-memory/eval/benchmarks/query.py +0 -172
  47. package/sinain-memory/eval/benchmarks/report.py +0 -87
  48. package/sinain-memory/eval/benchmarks/runner.py +0 -276
  49. package/sinain-memory/eval/judges/__init__.py +0 -0
  50. package/sinain-memory/eval/judges/base_judge.py +0 -61
  51. package/sinain-memory/eval/judges/curation_judge.py +0 -46
  52. package/sinain-memory/eval/judges/insight_judge.py +0 -48
  53. package/sinain-memory/eval/judges/mining_judge.py +0 -42
  54. package/sinain-memory/eval/judges/signal_judge.py +0 -45
  55. package/sinain-memory/eval/retrieval_benchmark.jsonl +0 -12
  56. package/sinain-memory/eval/retrieval_evaluator.py +0 -186
  57. package/sinain-memory/eval/schemas.py +0 -247
  58. package/sinain-memory/tests/__init__.py +0 -0
  59. package/sinain-memory/tests/conftest.py +0 -189
  60. package/sinain-memory/tests/test_curator_helpers.py +0 -94
  61. package/sinain-memory/tests/test_embedder.py +0 -210
  62. package/sinain-memory/tests/test_extract_json.py +0 -124
  63. package/sinain-memory/tests/test_feedback_computation.py +0 -121
  64. package/sinain-memory/tests/test_miner_helpers.py +0 -71
  65. package/sinain-memory/tests/test_module_management.py +0 -458
  66. package/sinain-memory/tests/test_parsers.py +0 -96
  67. package/sinain-memory/tests/test_tick_evaluator.py +0 -430
  68. package/sinain-memory/tests/test_triple_extractor.py +0 -255
  69. package/sinain-memory/tests/test_triple_ingest.py +0 -191
  70. package/sinain-memory/tests/test_triple_migrate.py +0 -138
  71. package/sinain-memory/tests/test_triplestore.py +0 -248
@@ -12,6 +12,7 @@
12
12
  "eval:quick": "tsx eval/harness.ts --scenarios eval/scenarios/ --runs 1 --fast --report /dev/stdout"
13
13
  },
14
14
  "dependencies": {
15
+ "@huggingface/transformers": "^4.0.1",
15
16
  "@types/node": "^22.19.7",
16
17
  "@types/ws": "^8.18.1",
17
18
  "tsx": "^4.21.0",
@@ -10,11 +10,31 @@ export class FeedBuffer {
10
10
  private _version = 0;
11
11
  private maxSize: number;
12
12
  private _hwm = 0;
13
+ private _onFullCb: ((items: FeedItem[]) => void) | null = null;
14
+ private _onFullArmed = true;
15
+ private _onFullVersion = 0; // version at last re-arm
13
16
 
14
17
  constructor(maxSize = 100) {
15
18
  this.maxSize = maxSize;
16
19
  }
17
20
 
21
+ /**
22
+ * Register a callback that fires when the buffer reaches capacity AND
23
+ * at least half the buffer has been replaced with new items since the
24
+ * last distillation. This prevents rapid-fire triggers on the same content.
25
+ */
26
+ onFull(cb: (items: FeedItem[]) => void): void {
27
+ this._onFullCb = cb;
28
+ this._onFullArmed = true;
29
+ this._onFullVersion = 0;
30
+ }
31
+
32
+ /** Re-arm the onFull callback (call after incremental distillation completes). */
33
+ rearmOnFull(): void {
34
+ this._onFullVersion = this._version;
35
+ this._onFullArmed = true;
36
+ }
37
+
18
38
  /** Push a new feed item. Returns the created item. */
19
39
  push(text: string, priority: Priority, source: FeedItem["source"], channel: FeedChannel = "stream"): FeedItem {
20
40
  const item: FeedItem = {
@@ -27,6 +47,20 @@ export class FeedBuffer {
27
47
  };
28
48
  this.items.push(item);
29
49
  if (this.items.length > this._hwm) this._hwm = this.items.length;
50
+
51
+ // Fire when enough new items have arrived since last distillation.
52
+ // 20 items ≈ 1.7 min of audio at ~12 items/min transcription rate.
53
+ // Distillation takes ~7s, so 20-item threshold gives 100s gap — safe margin.
54
+ // This means ~35 passes/hour, leaving <20 items undistilled at shutdown.
55
+ const newSinceRearm = this._version - this._onFullVersion;
56
+ if (this.items.length >= 20
57
+ && this._onFullCb && this._onFullArmed
58
+ && newSinceRearm >= 20) {
59
+ this._onFullArmed = false;
60
+ const snapshot = [...this.items];
61
+ queueMicrotask(() => this._onFullCb!(snapshot));
62
+ }
63
+
30
64
  if (this.items.length > this.maxSize) {
31
65
  this.items.shift();
32
66
  }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * EmbeddingService — in-process sentence embeddings for knowledge dedup + retrieval.
3
+ *
4
+ * Loads all-MiniLM-L6-v2 via @huggingface/transformers (ONNX runtime, no Python).
5
+ * Model loads async at startup (~9s), embeddings are 2-4ms per text after that.
6
+ *
7
+ * Used by:
8
+ * - knowledge_integrator.py (via POST /embed) for dedup before asserting facts
9
+ * - graph_query.py (via POST /embed) for semantic retrieval
10
+ */
11
+
12
+ import { log, warn } from "../log.js";
13
+
14
+ const TAG = "embedding";
15
+ const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
16
+
17
+ type Pipeline = (texts: string | string[], options: { pooling: string; normalize: boolean }) => Promise<{ data: Float32Array; dims: number[] }>;
18
+
19
+ export class EmbeddingService {
20
+ private pipeline: Pipeline | null = null;
21
+ private loading = false;
22
+ private _ready = false;
23
+
24
+ get ready(): boolean {
25
+ return this._ready;
26
+ }
27
+
28
+ /** Load the model in the background. Non-blocking — returns immediately. */
29
+ loadAsync(): void {
30
+ if (this.loading || this._ready) return;
31
+ this.loading = true;
32
+
33
+ const start = Date.now();
34
+ log(TAG, `loading ${MODEL_ID} (background)...`);
35
+
36
+ import("@huggingface/transformers").then(async ({ pipeline }) => {
37
+ this.pipeline = await pipeline("feature-extraction", MODEL_ID) as unknown as Pipeline;
38
+ this._ready = true;
39
+ log(TAG, `model ready in ${Date.now() - start}ms (384 dims)`);
40
+ }).catch((err) => {
41
+ warn(TAG, `failed to load model: ${err.message?.slice(0, 100)}`);
42
+ this.loading = false;
43
+ });
44
+ }
45
+
46
+ /** Embed one or more texts. Returns array of float32 arrays (384 dims each). */
47
+ async embed(texts: string[]): Promise<Float32Array[]> {
48
+ if (!this.pipeline) {
49
+ throw new Error("Embedding model not loaded yet");
50
+ }
51
+
52
+ const results: Float32Array[] = [];
53
+ for (const text of texts) {
54
+ const output = await this.pipeline(text, { pooling: "mean", normalize: true });
55
+ results.push(new Float32Array(output.data));
56
+ }
57
+ return results;
58
+ }
59
+
60
+ /** Compute cosine similarity between two embeddings. */
61
+ static cosine(a: Float32Array, b: Float32Array): number {
62
+ let dot = 0;
63
+ for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
64
+ return dot;
65
+ }
66
+ }
@@ -16,6 +16,7 @@ import { TraceStore } from "./trace/trace-store.js";
16
16
  import { FeedbackStore } from "./learning/feedback-store.js";
17
17
  import { SignalCollector } from "./learning/signal-collector.js";
18
18
  import { LocalCurationService } from "./learning/local-curation.js";
19
+ import { EmbeddingService } from "./embedding/service.js";
19
20
  import { createAppServer } from "./server.js";
20
21
  import { Profiler } from "./profiler.js";
21
22
  import { CostTracker } from "./cost/tracker.js";
@@ -66,35 +67,66 @@ async function queryKnowledgeFactsMulti(entities: string[], maxFacts: number): P
66
67
  ];
67
68
  const scriptPath = scriptCandidates.find(p => existsSync(p)) || scriptCandidates[0];
68
69
 
69
- const results: string[] = [];
70
+ // Step 1: Get candidates from Python (RRF-ranked, no embedding — avoids deadlock)
71
+ // Request 2x candidates in JSON for re-ranking in Node.js
72
+ const candidateFacts: Array<Record<string, string>> = [];
70
73
  for (const dbPath of dbPaths) {
71
74
  if (!existsSync(dbPath)) continue;
72
75
  try {
73
- const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "text"];
76
+ const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts * 2), "--format", "json"];
74
77
  if (entities.length > 0) args.push("--entities", JSON.stringify(entities));
75
78
  const out = execFileSync("python3", args, { timeout: 5000, encoding: "utf-8" }).trim();
76
- if (out) results.push(out);
79
+ if (out) {
80
+ const parsed = JSON.parse(out);
81
+ const facts = parsed.facts || parsed;
82
+ if (Array.isArray(facts)) candidateFacts.push(...facts);
83
+ }
77
84
  } catch { /* skip failed db */ }
78
85
  }
79
86
 
80
- if (results.length === 0) return "";
81
- if (results.length === 1) return results[0];
87
+ if (candidateFacts.length === 0) return "";
82
88
 
83
- // Merge and deduplicate lines from both sources
84
- const seen = new Set<string>();
85
- const merged: string[] = [];
86
- for (const block of results) {
87
- for (const line of block.split("\n")) {
88
- const key = line.replace(/\(confidence:.*$/, "").trim();
89
- if (key && !seen.has(key)) {
90
- seen.add(key);
91
- merged.push(line);
92
- }
89
+ // Step 2: Re-rank by embedding similarity in-process (no deadlock — model is in this process)
90
+ const queryText = entities.join(" ");
91
+ try {
92
+ if (embeddingService?.ready) {
93
+ const allTexts = [queryText, ...candidateFacts.map(f => f.value || "")];
94
+ const embeddings = await embeddingService.embed(allTexts);
95
+ const queryEmb = embeddings[0];
96
+ const scored = candidateFacts.map((f, i) => ({
97
+ fact: f,
98
+ sim: EmbeddingService.cosine(queryEmb, embeddings[i + 1]),
99
+ }));
100
+ scored.sort((a, b) => b.sim - a.sim);
101
+ candidateFacts.length = 0;
102
+ candidateFacts.push(...scored.slice(0, maxFacts).map(s => s.fact));
93
103
  }
104
+ } catch { /* embedding unavailable — use RRF order */ }
105
+
106
+ // Step 3: Format as compact text
107
+ const seen = new Set<string>();
108
+ const lines: string[] = [];
109
+ let total = 0;
110
+ const maxChars = 1200;
111
+ for (const f of candidateFacts.slice(0, maxFacts)) {
112
+ const eid = ((f as any).entity_id || (f as any).entityId || "").split(":").pop()?.slice(0, 20) || "?";
113
+ const value = (f as any).value || "";
114
+ const conf = (f as any).confidence || "?";
115
+ const count = (f as any).reinforce_count || "1";
116
+ const line = `${eid}: ${value} (${conf},${count}x)`;
117
+ const key = value.slice(0, 60);
118
+ if (seen.has(key)) continue;
119
+ seen.add(key);
120
+ if (total + line.length + 2 > maxChars) break;
121
+ lines.push(line);
122
+ total += line.length + 2;
94
123
  }
95
- return merged.slice(0, maxFacts).join("\n");
124
+ return lines.join("; ");
96
125
  }
97
126
 
127
+ // Reference to embedding service — set during init
128
+ let embeddingService: import("./embedding/service.js").EmbeddingService | null = null;
129
+
98
130
  /** List all entities from both local and workspace knowledge graphs. */
99
131
  async function listKnowledgeEntitiesMulti(max: number): Promise<string> {
100
132
  const { execFileSync } = await import("node:child_process");
@@ -338,11 +370,25 @@ async function main() {
338
370
  ? new FeedbackStore(config.learningConfig.feedbackDir, config.learningConfig.retentionDays)
339
371
  : null;
340
372
 
373
+ // ── Initialize embedding service (non-blocking) ──
374
+ embeddingService = new EmbeddingService();
375
+ embeddingService.loadAsync(); // ~9s background load, server starts immediately
376
+
341
377
  // ── Initialize local knowledge pipeline ──
342
378
  const localCuration = new LocalCurationService();
343
- localCuration.distillPendingSession(); // Recover any session saved before a force-kill
379
+ // Distill pending session in background don't block server startup
380
+ setImmediate(() => {
381
+ localCuration.distillPendingSession();
382
+ });
344
383
  localCuration.startPeriodicCuration();
345
384
 
385
+ // Wire incremental distillation: when feed buffer fills, distill before items are lost
386
+ localCuration.setSenseBuffer(senseBuffer);
387
+ localCuration.setRearmCallback(() => feedBuffer.rearmOnFull());
388
+ feedBuffer.onFull((items) => {
389
+ localCuration.distillIncremental(items);
390
+ });
391
+
346
392
  // ── Initialize escalation ──
347
393
  const escalator = new Escalator({
348
394
  feedBuffer,
@@ -668,6 +714,8 @@ async function main() {
668
714
  },
669
715
  getSpawnPending: () => escalator.getSpawnPending(),
670
716
  respondSpawn: (id: string, result: string) => escalator.respondSpawn(id, result),
717
+ embedTexts: (texts: string[]) => embeddingService!.embed(texts),
718
+ isEmbeddingReady: () => embeddingService?.ready ?? false,
671
719
  });
672
720
 
673
721
  // ── Wire overlay profiling ──
@@ -15,6 +15,7 @@ import { existsSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, appendF
15
15
  import { resolve, dirname } from "node:path";
16
16
  import { fileURLToPath } from "node:url";
17
17
  import type { FeedItem } from "../types.js";
18
+ import type { SenseBuffer } from "../buffers/sense-buffer.js";
18
19
  import { log, warn, error } from "../log.js";
19
20
 
20
21
  const TAG = "local-curation";
@@ -55,6 +56,10 @@ export class LocalCurationService {
55
56
  private scriptsDir: string;
56
57
  private sessionStartTs: number;
57
58
  private curationTimer: ReturnType<typeof setInterval> | null = null;
59
+ private _lastDistilledTs = 0; // timestamp of last incremental distillation
60
+ private _incrementalRunning = false;
61
+ private _rearmCb: (() => void) | null = null; // callback to re-arm feed buffer onFull
62
+ private _senseBuffer: SenseBuffer | null = null;
58
63
 
59
64
  constructor() {
60
65
  this.memoryDir = resolveMemoryDir();
@@ -90,6 +95,100 @@ export class LocalCurationService {
90
95
  }
91
96
  }
92
97
 
98
+ /** Timestamp of last incremental distillation (items before this are already distilled). */
99
+ get lastDistilledTs(): number {
100
+ return this._lastDistilledTs;
101
+ }
102
+
103
+ /** Set the callback to re-arm the feed buffer's onFull trigger after distillation. */
104
+ setRearmCallback(cb: () => void): void {
105
+ this._rearmCb = cb;
106
+ }
107
+
108
+ /** Attach sense buffer for screen context in distillation. */
109
+ setSenseBuffer(sb: SenseBuffer): void {
110
+ this._senseBuffer = sb;
111
+ }
112
+
113
+ /** Extract screen context from sense buffer as feed-item-compatible entries. */
114
+ private getSenseContext(): Array<{ text: string; ts: number; source: string; channel: string }> {
115
+ if (!this._senseBuffer) return [];
116
+ const events = this._senseBuffer.queryByTime(this._lastDistilledTs || (Date.now() - 30 * 60 * 1000));
117
+ const items: Array<{ text: string; ts: number; source: string; channel: string }> = [];
118
+ for (const evt of events) {
119
+ // Include OCR text (what's visible on screen)
120
+ if (evt.ocr && evt.ocr.length > 20) {
121
+ const app = evt.semantic?.context?.app || "unknown";
122
+ items.push({
123
+ text: `[screen: ${app}] ${evt.ocr}`,
124
+ ts: evt.ts,
125
+ source: "sense",
126
+ channel: "screen",
127
+ });
128
+ }
129
+ // Include vision summaries (AI description of screen content)
130
+ if (evt.semantic?.visible?.summary) {
131
+ items.push({
132
+ text: `[screen-context] ${evt.semantic.visible.summary}`,
133
+ ts: evt.ts,
134
+ source: "sense",
135
+ channel: "screen",
136
+ });
137
+ }
138
+ }
139
+ return items;
140
+ }
141
+
142
+ /**
143
+ * Incremental distillation — called when the feed buffer reaches capacity.
144
+ * Distills the current buffer contents before they fall off the ring buffer.
145
+ * Runs async so it doesn't block new items from arriving.
146
+ */
147
+ async distillIncremental(feedItems: FeedItem[]): Promise<void> {
148
+ if (this._incrementalRunning) {
149
+ log(TAG, "incremental distillation already running — skipping");
150
+ return;
151
+ }
152
+ this._incrementalRunning = true;
153
+
154
+ try {
155
+ const itemCount = feedItems.length;
156
+ log(TAG, `incremental distillation: ${itemCount} items (buffer full)`);
157
+
158
+ const sessionMeta = {
159
+ ts: new Date().toISOString(),
160
+ sessionKey: "local-incremental",
161
+ durationMs: Date.now() - this.sessionStartTs,
162
+ };
163
+
164
+ const audioItems = feedItems.map(item => ({
165
+ text: item.text,
166
+ ts: item.ts,
167
+ source: item.source || "unknown",
168
+ channel: item.channel || "agent",
169
+ }));
170
+
171
+ // Merge screen context from sense buffer (OCR + vision summaries)
172
+ const senseItems = this.getSenseContext();
173
+ const transcript = [...audioItems, ...senseItems].sort((a, b) => a.ts - b.ts);
174
+
175
+ if (senseItems.length > 0) {
176
+ log(TAG, `including ${senseItems.length} screen context items in distillation`);
177
+ }
178
+
179
+ if (this.runDistillation(transcript, sessionMeta)) {
180
+ this._lastDistilledTs = Date.now();
181
+ log(TAG, `incremental distillation complete — ${itemCount} audio + ${senseItems.length} screen items processed`);
182
+ }
183
+ } catch (err: any) {
184
+ warn(TAG, `incremental distillation failed: ${err.message?.slice(0, 100)}`);
185
+ } finally {
186
+ this._incrementalRunning = false;
187
+ // Re-arm the buffer callback so next fill triggers another distillation
188
+ this._rearmCb?.();
189
+ }
190
+ }
191
+
93
192
  /**
94
193
  * Save feed items to disk for deferred distillation.
95
194
  * Called during shutdown — instant (no LLM), survives tsx force-kill.
@@ -160,13 +259,20 @@ export class LocalCurationService {
160
259
  * picked up on next startup via distillPendingSession().
161
260
  */
162
261
  async distillSession(feedItems: FeedItem[]): Promise<void> {
163
- if (feedItems.length < 1) {
164
- log(TAG, `skipping distillation — only ${feedItems.length} feed items`);
262
+ // Filter to only items not yet covered by incremental distillation
263
+ const items = this._lastDistilledTs > 0
264
+ ? feedItems.filter(i => i.ts > this._lastDistilledTs)
265
+ : feedItems;
266
+
267
+ if (items.length < 1) {
268
+ log(TAG, `skipping shutdown distillation — all ${feedItems.length} items already distilled incrementally`);
165
269
  return;
166
270
  }
167
271
 
272
+ log(TAG, `shutdown distillation: ${items.length} items (${feedItems.length - items.length} already distilled incrementally)`);
273
+
168
274
  // Step 0: Save to disk FIRST — survives force-kill
169
- this.savePendingSession(feedItems);
275
+ this.savePendingSession(items);
170
276
 
171
277
  const sessionMeta = {
172
278
  ts: new Date().toISOString(),
@@ -174,7 +280,7 @@ export class LocalCurationService {
174
280
  durationMs: Date.now() - this.sessionStartTs,
175
281
  };
176
282
 
177
- const transcript = feedItems.map(item => ({
283
+ const transcript = items.map(item => ({
178
284
  text: item.text,
179
285
  ts: item.ts,
180
286
  source: item.source || "unknown",
@@ -204,13 +310,37 @@ export class LocalCurationService {
204
310
  log(TAG, `distilling session: ${transcript.length} items, ${Math.round(sessionMeta.durationMs / 60000)} min`);
205
311
 
206
312
  try {
313
+ // Step 0.5: Retrieve existing entities for context (Mem0 retrieve-before-extract pattern)
314
+ let existingEntities = "";
315
+ const dbPath = resolve(this.memoryDir, "knowledge-graph.db");
316
+ if (existsSync(dbPath)) {
317
+ try {
318
+ existingEntities = execFileSync("python3", [
319
+ resolve(this.scriptsDir, "graph_query.py"),
320
+ "--db", dbPath,
321
+ "--top", "20",
322
+ "--format", "compact",
323
+ ], {
324
+ timeout: 5_000,
325
+ encoding: "utf-8",
326
+ env: { ...process.env, PYTHONPATH: this.scriptsDir },
327
+ }).trim();
328
+ } catch {
329
+ // Non-fatal — distillation works without existing entities
330
+ }
331
+ }
332
+
207
333
  // Step 1: Distill session into a SessionDigest
208
- const digestJson = execFileSync("python3", [
334
+ const distillerArgs = [
209
335
  resolve(this.scriptsDir, "session_distiller.py"),
210
336
  "--memory-dir", this.memoryDir,
211
337
  "--transcript", JSON.stringify(transcript),
212
338
  "--session-meta", JSON.stringify(sessionMeta),
213
- ], {
339
+ ];
340
+ if (existingEntities) {
341
+ distillerArgs.push("--existing-entities", existingEntities);
342
+ }
343
+ const digestJson = execFileSync("python3", distillerArgs, {
214
344
  timeout: 30_000,
215
345
  encoding: "utf-8",
216
346
  env: { ...process.env, PYTHONPATH: this.scriptsDir },
@@ -236,7 +366,7 @@ export class LocalCurationService {
236
366
  "--memory-dir", this.memoryDir,
237
367
  "--digest", JSON.stringify(digest),
238
368
  ], {
239
- timeout: 30_000,
369
+ timeout: 60_000, // 60s: LLM call (~10s) + embedding dedup (~5s) + graph ops
240
370
  encoding: "utf-8",
241
371
  env: { ...process.env, PYTHONPATH: this.scriptsDir },
242
372
  });
@@ -184,6 +184,8 @@ export interface ServerDeps {
184
184
  onSpawnCommand?: (text: string) => void;
185
185
  getSpawnPending?: () => { id: string; task: string; label: string; ts: number } | null;
186
186
  respondSpawn?: (id: string, result: string) => { ok: boolean; error?: string };
187
+ embedTexts?: (texts: string[]) => Promise<Float32Array[]>;
188
+ isEmbeddingReady?: () => boolean;
187
189
  }
188
190
 
189
191
  function readBody(req: IncomingMessage, maxBytes: number): Promise<string> {
@@ -519,6 +521,35 @@ export function createAppServer(deps: ServerDeps) {
519
521
  return;
520
522
  }
521
523
 
524
+ // ── /embed ── (used by knowledge_integrator.py and graph_query.py)
525
+ if (req.method === "POST" && url.pathname === "/embed") {
526
+ if (!deps.embedTexts || !deps.isEmbeddingReady?.()) {
527
+ res.writeHead(503);
528
+ res.end(JSON.stringify({ error: "embedding model loading" }));
529
+ return;
530
+ }
531
+ let body = "";
532
+ req.on("data", (c: Buffer) => { body += c; });
533
+ req.on("end", async () => {
534
+ try {
535
+ const { texts } = JSON.parse(body);
536
+ if (!Array.isArray(texts) || texts.length === 0) {
537
+ res.writeHead(400);
538
+ res.end(JSON.stringify({ error: "texts array required" }));
539
+ return;
540
+ }
541
+ const embeddings = await deps.embedTexts!(texts);
542
+ // Return as base64-encoded float32 arrays for efficiency
543
+ const encoded = embeddings.map(e => Buffer.from(e.buffer).toString("base64"));
544
+ res.end(JSON.stringify({ embeddings: encoded, dims: 384 }));
545
+ } catch (err: any) {
546
+ res.writeHead(500);
547
+ res.end(JSON.stringify({ error: err.message?.slice(0, 200) }));
548
+ }
549
+ });
550
+ return;
551
+ }
552
+
522
553
  // ── /health ──
523
554
  if (req.method === "GET" && url.pathname === "/health") {
524
555
  res.end(JSON.stringify({
@@ -0,0 +1,105 @@
1
+ # sinain-memory
2
+
3
+ Local-first knowledge pipeline for SinainHUD. Captures what the user sees and hears, distills it into a knowledge graph, and makes it retrievable for the agent's context.
4
+
5
+ ## Architecture
6
+
7
+ Two-step pipeline: **LLM extraction** (what to remember) + **deterministic integration** (how to store it).
8
+
9
+ ```
10
+ Audio transcripts + Screen OCR
11
+ |
12
+ session_distiller.py (LLM)
13
+ Extracts: facts[], entities[], decisions[]
14
+ |
15
+ knowledge_integrator.py (code — no LLM)
16
+ - Converts facts to graph assertions (deterministic)
17
+ - Creates entity:* nodes with freeform types
18
+ - Links facts to entities via ref edges
19
+ - Infers cross-entity relationships from fact content
20
+ - Deduplicates via embedding similarity (cosine 0.78)
21
+ - Auto-curates playbook (tag overlap, no LLM)
22
+ |
23
+ triplestore.py (SQLite EAV)
24
+ - 4 covering indexes: EAVT, AEVT, VAET, AVET
25
+ - FTS5 full-text search on fact values
26
+ - Confidence decay (60-day half-life)
27
+ - Touched-entities tracking for cache invalidation
28
+ ```
29
+
30
+ ## Key Design Decisions
31
+
32
+ **Deterministic integration.** The integrator does NOT use an LLM. Early experiments showed that LLM-based integration produced 0-20 facts per run depending on model mood, token truncation, and format errors. The deterministic approach converts every distiller fact to a graph assertion — consistent, fast, and reliable.
33
+
34
+ **Two-layer entity model.** `fact:*` entities store individual claims (searchable via FTS5 and tags). `entity:*` nodes represent real-world entities connected by typed ref edges. The VAET index enables backref traversal: "find all facts about Citibank" is an O(log n) index lookup.
35
+
36
+ **Incremental distillation.** Long sessions (>8 min) trigger distillation when the feed buffer reaches capacity, before items are lost to the ring buffer. The `onFull` callback fires when 50% new items have accumulated since the last pass.
37
+
38
+ **Embedding-based dedup.** sinain-core hosts an in-process all-MiniLM-L6-v2 model (384 dims, 2-4ms per embedding). The `/embed` endpoint is used for both write-time dedup (prevent storing semantic duplicates) and read-time re-ranking (surface most relevant facts for a query).
39
+
40
+ ## Triplestore
41
+
42
+ SQLite-backed EAV store inspired by Datomic/RhizomeDB with 4 covering indexes:
43
+
44
+ | Index | Query Pattern | Example |
45
+ |-------|-------------|---------|
46
+ | **EAVT** | What does entity X look like? | `store.entity("entity:citibank")` |
47
+ | **AEVT** | Which entities have attribute Y? | `store.entities_with_attr("type")` |
48
+ | **VAET** | What references entity Z? (backrefs) | `store.backrefs("entity:citibank")` |
49
+ | **AVET** | Find entity by attribute+value | `store.lookup("type", "person")` |
50
+
51
+ Additional features:
52
+ - **FTS5** full-text search on fact values with auto-sync triggers
53
+ - **Confidence decay**: exponential half-life (60 days) — facts lose relevance without reinforcement
54
+ - **Temporal queries**: `entity_as_of(id, date)` for point-in-time knowledge
55
+ - **Touched-entities index**: O(1) "was entity X modified since tx Y?" for cache invalidation
56
+ - **Soft retraction**: facts are marked retracted, not deleted — preserves history
57
+
58
+ ## Retrieval
59
+
60
+ Hybrid retrieval with Reciprocal Rank Fusion (RRF):
61
+
62
+ 1. **FTS5** keyword search on fact values
63
+ 2. **Tag-based** entity matching via AVET index
64
+ 3. **Top-confidence** facts as baseline
65
+ 4. **Entity graph boost**: facts linked to query-mentioned entities via backrefs get an RRF score bonus
66
+ 5. **Embedding re-ranking** (when sinain-core is running): semantic similarity between query and facts
67
+ 6. **Confidence decay** applied as tiebreaker
68
+
69
+ Results are grouped by entity for cross-fact reasoning.
70
+
71
+ ## Distiller Output Schema
72
+
73
+ ```json
74
+ {
75
+ "whatHappened": "2-3 sentence summary",
76
+ "facts": ["self-contained factual sentence", ...],
77
+ "decisions": ["who decided what, with deadline", ...],
78
+ "entities": [{"name": "entity-slug", "type": "freeform-type"}, ...],
79
+ "patterns": ["reusable technique or workflow", ...],
80
+ "preferences": ["user preference or habit", ...],
81
+ "isEmpty": false
82
+ }
83
+ ```
84
+
85
+ Facts are guided by 5 diversity dimensions: **WHO** (people, roles), **WHAT** (properties, descriptions), **HOW MUCH** (numbers, dates), **WHAT CHANGED** (decisions, agreements), **WHAT'S NEXT** (commitments, plans).
86
+
87
+ ## Files
88
+
89
+ | File | Role |
90
+ |------|------|
91
+ | `session_distiller.py` | LLM extraction: transcript to structured digest |
92
+ | `knowledge_integrator.py` | Deterministic storage: digest to graph ops + playbook |
93
+ | `triplestore.py` | SQLite EAV with 4 indexes + FTS5 + temporal |
94
+ | `graph_query.py` | Hybrid retrieval with RRF fusion |
95
+ | `embed_client.py` | Python client for sinain-core `/embed` endpoint |
96
+ | `common.py` | Shared LLM call utilities |
97
+ | `memory-config.json` | Model selection, token limits, timeouts |
98
+
99
+ ## Configuration
100
+
101
+ | Env Var | Default | Description |
102
+ |---------|---------|-------------|
103
+ | `SINAIN_MEMORY_DIR` | `~/.sinain/memory` | Knowledge graph directory |
104
+ | `LEARNING_ENABLED` | `true` | Enable/disable distillation pipeline |
105
+ | `AGENT_ENABLED` | `true` | Set `false` for capture-only mode |