@geravant/sinain 1.11.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/package.json +1 -1
  2. package/sinain-core/package-lock.json +963 -0
  3. package/sinain-core/package.json +1 -0
  4. package/sinain-core/src/buffers/feed-buffer.ts +32 -0
  5. package/sinain-core/src/embedding/service.ts +66 -0
  6. package/sinain-core/src/escalation/escalator.ts +1 -0
  7. package/sinain-core/src/escalation/message-builder.ts +45 -118
  8. package/sinain-core/src/index.ts +19 -2
  9. package/sinain-core/src/learning/local-curation.ts +137 -7
  10. package/sinain-core/src/overlay/commands.ts +16 -3
  11. package/sinain-core/src/overlay/ws-handler.ts +4 -1
  12. package/sinain-core/src/server.ts +31 -0
  13. package/sinain-core/src/types.ts +3 -0
  14. package/sinain-memory/README.md +105 -0
  15. package/sinain-memory/__pycache__/common.cpython-312.pyc +0 -0
  16. package/sinain-memory/__pycache__/embed_client.cpython-312.pyc +0 -0
  17. package/sinain-memory/__pycache__/graph_query.cpython-312.pyc +0 -0
  18. package/sinain-memory/__pycache__/knowledge_integrator.cpython-312.pyc +0 -0
  19. package/sinain-memory/__pycache__/session_distiller.cpython-312.pyc +0 -0
  20. package/sinain-memory/__pycache__/triplestore.cpython-312.pyc +0 -0
  21. package/sinain-memory/embed_client.py +117 -0
  22. package/sinain-memory/eval/__pycache__/__init__.cpython-312.pyc +0 -0
  23. package/sinain-memory/eval/benchmarks/__init__.py +0 -0
  24. package/sinain-memory/eval/benchmarks/__pycache__/__init__.cpython-312.pyc +0 -0
  25. package/sinain-memory/eval/benchmarks/__pycache__/base_adapter.cpython-312.pyc +0 -0
  26. package/sinain-memory/eval/benchmarks/__pycache__/config.cpython-312.pyc +0 -0
  27. package/sinain-memory/eval/benchmarks/__pycache__/evaluate.cpython-312.pyc +0 -0
  28. package/sinain-memory/eval/benchmarks/__pycache__/ingest.cpython-312.pyc +0 -0
  29. package/sinain-memory/eval/benchmarks/__pycache__/longmemeval_adapter.cpython-312.pyc +0 -0
  30. package/sinain-memory/eval/benchmarks/__pycache__/meeting_adapter.cpython-312.pyc +0 -0
  31. package/sinain-memory/eval/benchmarks/__pycache__/meeting_runner.cpython-312.pyc +0 -0
  32. package/sinain-memory/eval/benchmarks/__pycache__/query.cpython-312.pyc +0 -0
  33. package/sinain-memory/eval/benchmarks/__pycache__/report.cpython-312.pyc +0 -0
  34. package/sinain-memory/eval/benchmarks/__pycache__/runner.cpython-312.pyc +0 -0
  35. package/sinain-memory/eval/benchmarks/base_adapter.py +43 -0
  36. package/sinain-memory/eval/benchmarks/config.py +23 -0
  37. package/sinain-memory/eval/benchmarks/evaluate.py +146 -0
  38. package/sinain-memory/eval/benchmarks/ingest.py +152 -0
  39. package/sinain-memory/eval/benchmarks/judges/__init__.py +0 -0
  40. package/sinain-memory/eval/benchmarks/judges/__pycache__/__init__.cpython-312.pyc +0 -0
  41. package/sinain-memory/eval/benchmarks/judges/__pycache__/qa_judge.cpython-312.pyc +0 -0
  42. package/sinain-memory/eval/benchmarks/judges/qa_judge.py +81 -0
  43. package/sinain-memory/eval/benchmarks/longmemeval_adapter.py +177 -0
  44. package/sinain-memory/eval/benchmarks/meeting_adapter.py +81 -0
  45. package/sinain-memory/eval/benchmarks/meeting_runner.py +230 -0
  46. package/sinain-memory/eval/benchmarks/query.py +193 -0
  47. package/sinain-memory/eval/benchmarks/report.py +87 -0
  48. package/sinain-memory/eval/benchmarks/run_meeting_bench.sh +318 -0
  49. package/sinain-memory/eval/benchmarks/runner.py +283 -0
  50. package/sinain-memory/graph_query.py +257 -15
  51. package/sinain-memory/knowledge_integrator.py +365 -72
  52. package/sinain-memory/koog-config.json +11 -0
  53. package/sinain-memory/memory-config.json +1 -1
  54. package/sinain-memory/session_distiller.py +43 -19
  55. package/sinain-memory/triplestore.py +60 -0
@@ -12,6 +12,7 @@
12
12
  "eval:quick": "tsx eval/harness.ts --scenarios eval/scenarios/ --runs 1 --fast --report /dev/stdout"
13
13
  },
14
14
  "dependencies": {
15
+ "@huggingface/transformers": "^4.0.1",
15
16
  "@types/node": "^22.19.7",
16
17
  "@types/ws": "^8.18.1",
17
18
  "tsx": "^4.21.0",
@@ -10,11 +10,31 @@ export class FeedBuffer {
10
10
  private _version = 0;
11
11
  private maxSize: number;
12
12
  private _hwm = 0;
13
+ private _onFullCb: ((items: FeedItem[]) => void) | null = null;
14
+ private _onFullArmed = true;
15
+ private _onFullVersion = 0; // version at last re-arm
13
16
 
14
17
  constructor(maxSize = 100) {
15
18
  this.maxSize = maxSize;
16
19
  }
17
20
 
21
+ /**
22
+ * Register a callback that fires when the buffer reaches capacity AND
23
+ * at least half the buffer has been replaced with new items since the
24
+ * last distillation. This prevents rapid-fire triggers on the same content.
25
+ */
26
+ onFull(cb: (items: FeedItem[]) => void): void {
27
+ this._onFullCb = cb;
28
+ this._onFullArmed = true;
29
+ this._onFullVersion = 0;
30
+ }
31
+
32
+ /** Re-arm the onFull callback (call after incremental distillation completes). */
33
+ rearmOnFull(): void {
34
+ this._onFullVersion = this._version;
35
+ this._onFullArmed = true;
36
+ }
37
+
18
38
  /** Push a new feed item. Returns the created item. */
19
39
  push(text: string, priority: Priority, source: FeedItem["source"], channel: FeedChannel = "stream"): FeedItem {
20
40
  const item: FeedItem = {
@@ -27,6 +47,18 @@ export class FeedBuffer {
27
47
  };
28
48
  this.items.push(item);
29
49
  if (this.items.length > this._hwm) this._hwm = this.items.length;
50
+
51
+ // Fire onFull when buffer is at capacity AND enough new items have arrived
52
+ // since the last distillation (at least half the buffer replaced)
53
+ const newSinceRearm = this._version - this._onFullVersion;
54
+ if (this.items.length >= this.maxSize
55
+ && this._onFullCb && this._onFullArmed
56
+ && newSinceRearm >= Math.floor(this.maxSize / 2)) {
57
+ this._onFullArmed = false;
58
+ const snapshot = [...this.items];
59
+ queueMicrotask(() => this._onFullCb!(snapshot));
60
+ }
61
+
30
62
  if (this.items.length > this.maxSize) {
31
63
  this.items.shift();
32
64
  }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * EmbeddingService — in-process sentence embeddings for knowledge dedup + retrieval.
3
+ *
4
+ * Loads all-MiniLM-L6-v2 via @huggingface/transformers (ONNX runtime, no Python).
5
+ * Model loads async at startup (~9s), embeddings are 2-4ms per text after that.
6
+ *
7
+ * Used by:
8
+ * - knowledge_integrator.py (via POST /embed) for dedup before asserting facts
9
+ * - graph_query.py (via POST /embed) for semantic retrieval
10
+ */
11
+
12
+ import { log, warn } from "../log.js";
13
+
14
+ const TAG = "embedding";
15
+ const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
16
+
17
+ type Pipeline = (texts: string | string[], options: { pooling: string; normalize: boolean }) => Promise<{ data: Float32Array; dims: number[] }>;
18
+
19
+ export class EmbeddingService {
20
+ private pipeline: Pipeline | null = null;
21
+ private loading = false;
22
+ private _ready = false;
23
+
24
+ get ready(): boolean {
25
+ return this._ready;
26
+ }
27
+
28
+ /** Load the model in the background. Non-blocking — returns immediately. */
29
+ loadAsync(): void {
30
+ if (this.loading || this._ready) return;
31
+ this.loading = true;
32
+
33
+ const start = Date.now();
34
+ log(TAG, `loading ${MODEL_ID} (background)...`);
35
+
36
+ import("@huggingface/transformers").then(async ({ pipeline }) => {
37
+ this.pipeline = await pipeline("feature-extraction", MODEL_ID) as unknown as Pipeline;
38
+ this._ready = true;
39
+ log(TAG, `model ready in ${Date.now() - start}ms (384 dims)`);
40
+ }).catch((err) => {
41
+ warn(TAG, `failed to load model: ${err.message?.slice(0, 100)}`);
42
+ this.loading = false;
43
+ });
44
+ }
45
+
46
+ /** Embed one or more texts. Returns array of float32 arrays (384 dims each). */
47
+ async embed(texts: string[]): Promise<Float32Array[]> {
48
+ if (!this.pipeline) {
49
+ throw new Error("Embedding model not loaded yet");
50
+ }
51
+
52
+ const results: Float32Array[] = [];
53
+ for (const text of texts) {
54
+ const output = await this.pipeline(text, { pooling: "mean", normalize: true });
55
+ results.push(new Float32Array(output.data));
56
+ }
57
+ return results;
58
+ }
59
+
60
+ /** Compute cosine similarity between two embeddings. */
61
+ static cosine(a: Float32Array, b: Float32Array): number {
62
+ let dot = 0;
63
+ for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
64
+ return dot;
65
+ }
66
+ }
@@ -237,6 +237,7 @@ export class Escalator {
237
237
  escalationReason,
238
238
  undefined,
239
239
  this.pendingUserCommand ?? undefined,
240
+ this.deps.wsHandler.getState().responseSize ?? "medium",
240
241
  );
241
242
 
242
243
  // Clear user command after building the message (consumed once)
@@ -1,4 +1,4 @@
1
- import type { ContextWindow, AgentEntry, EscalationMode, FeedbackRecord, UserCommand } from "../types.js";
1
+ import type { ContextWindow, AgentEntry, EscalationMode, FeedbackRecord, UserCommand, ResponseSize } from "../types.js";
2
2
  import { normalizeAppName } from "../agent/context-window.js";
3
3
  import { levelFor, applyLevel } from "../privacy/index.js";
4
4
 
@@ -67,11 +67,18 @@ export function isCodingContext(context: ContextWindow): CodingContextResult {
67
67
  };
68
68
  }
69
69
 
70
- function getInstructions(mode: EscalationMode, context: ContextWindow): string {
70
+ function sizeInstruction(size: ResponseSize): string {
71
+ switch (size) {
72
+ case "small": return "1-2 sentences";
73
+ case "large": return "3-5 sentences";
74
+ default: return "2-3 sentences";
75
+ }
76
+ }
77
+
78
+ function getInstructions(context: ContextWindow): string {
71
79
  const { coding, needsSolution } = isCodingContext(context);
72
80
 
73
81
  if (needsSolution) {
74
- // Coding challenge/problem - be very action-oriented
75
82
  return `The user is working on a coding problem. Be PROACTIVE and SOLVE IT:
76
83
 
77
84
  1. Provide a solution approach and working code based on what you can see
@@ -92,13 +99,10 @@ Response should be actionable: working code with brief explanation.`;
92
99
  - If it's a non-code file (config, markdown, email): share a relevant insight, action item, or connection to their current project
93
100
  - If context is minimal: tell a short clever joke (tech humor — never repeat recent ones)
94
101
 
95
- NEVER just describe what the user is doing. Every response must teach, suggest, or connect dots.
96
- (2-5 sentences, or more + code if there's an error or code question).`;
102
+ NEVER just describe what the user is doing. Every response must teach, suggest, or connect dots.`;
97
103
  }
98
104
 
99
- // Non-coding context proactive insights instead of activity descriptions
100
- if (mode === "focus" || mode === "rich") {
101
- return `Based on the above, ALWAYS provide a useful response for the user's HUD.
105
+ return `Based on the above, ALWAYS provide a useful response for the user's HUD.
102
106
  Important: Do NOT respond with NO_REPLY — a response is always required.
103
107
 
104
108
  - If there's an error: investigate and suggest a fix
@@ -109,40 +113,25 @@ Important: Do NOT respond with NO_REPLY — a response is always required.
109
113
 
110
114
  NEVER just describe what the user is doing — they can see their own screen.
111
115
  NEVER respond with "standing by", "monitoring", or similar filler.
112
- Every response must teach something, suggest something, or connect dots the user hasn't noticed.
113
- (2-5 sentences). Be specific and actionable.`;
114
- }
115
-
116
- return `Based on the above, proactively help the user:
117
- - If there's an error: investigate and suggest a fix
118
- - If they seem stuck: offer guidance
119
- - If they're coding: provide relevant insights
120
- - Keep your response concise and actionable (2-5 sentences)`;
116
+ Every response must teach something, suggest something, or connect dots the user hasn't noticed.`;
121
117
  }
122
118
 
123
119
  /**
124
- * Build a structured escalation message with richness proportional to the context window preset.
125
- *
126
- * Expected message sizes:
127
- * lean (selective): ~7 KB / ~1,700 tokens
128
- * standard (focus): ~25 KB / ~6,000 tokens
129
- * rich: ~111 KB / ~28,000 tokens
120
+ * Build a structured escalation message with full context (rich mode).
130
121
  *
131
- * All fit within the 256 KB HTTP hooks limit and 200K+ model context.
132
- *
133
- * In selective mode, sections are prioritized by relevance:
134
- * - Error escalations prioritize error sections
135
- * - Question escalations prioritize audio sections
136
- * - App context is always included
122
+ * Always includes all sections (screen, audio, errors).
123
+ * Response length is controlled by the `responseSize` parameter (small/medium/large)
124
+ * which is set by the user via the HUD overlay slider.
137
125
  */
138
126
  export function buildEscalationMessage(
139
127
  digest: string,
140
128
  context: ContextWindow,
141
129
  entry: AgentEntry,
142
- mode: EscalationMode,
130
+ _mode: EscalationMode,
143
131
  escalationReason?: string,
144
132
  recentFeedback?: FeedbackRecord[],
145
133
  userCommand?: UserCommand,
134
+ responseSize: ResponseSize = "medium",
146
135
  ): string {
147
136
  const sections: string[] = [];
148
137
 
@@ -167,7 +156,6 @@ export function buildEscalationMessage(
167
156
  // Errors — extracted from OCR, full stack traces in rich mode
168
157
  const errors = context.screen.filter(e => hasErrorPattern(e.ocr));
169
158
  const hasErrors = errors.length > 0;
170
- const hasQuestion = escalationReason?.startsWith("question:");
171
159
 
172
160
  // Privacy levels for agent_gateway destination
173
161
  let ocrLevel: import("../types.js").PrivacyLevel = "full";
@@ -183,99 +171,35 @@ export function buildEscalationMessage(
183
171
  const applyAudio = (text: string) => applyLevel(text.slice(0, context.preset.maxTranscriptChars), audioLevel, "audio");
184
172
  const applyTitle = (title: string | undefined) => title ? applyLevel(title, titlesLevel, "titles") : "";
185
173
 
186
- // In selective mode, prioritize sections based on escalation reason
187
- // In focus/rich modes, include everything
188
- if (mode === "selective") {
189
- // Error-triggered: prioritize errors, then screen
190
- if (hasErrors) {
191
- sections.push("## Errors (high priority)");
192
- for (const e of errors) {
193
- sections.push(`\`\`\`\n${applyOcr(e.ocr)}\n\`\`\``);
194
- }
195
- // Include screen context (reduced)
196
- if (context.screen.length > 0) {
197
- sections.push("## Screen (recent OCR)");
198
- for (const e of context.screen.slice(0, 5)) { // Limit in selective mode
199
- const ago = Math.round((Date.now() - e.ts) / 1000);
200
- const app = normalizeAppName(e.meta.app);
201
- const title = applyTitle(e.meta.windowTitle);
202
- const titlePart = title ? ` [${title}]` : "";
203
- sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
204
- }
205
- }
206
- }
207
- // Question-triggered: prioritize audio, then screen
208
- else if (hasQuestion) {
209
- if (context.audio.length > 0) {
210
- sections.push("## Audio (recent transcripts)");
211
- for (const e of context.audio) {
212
- const ago = Math.round((Date.now() - e.ts) / 1000);
213
- sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
214
- }
215
- }
216
- // Include screen context (reduced)
217
- if (context.screen.length > 0) {
218
- sections.push("## Screen (recent OCR)");
219
- for (const e of context.screen.slice(0, 5)) {
220
- const ago = Math.round((Date.now() - e.ts) / 1000);
221
- const app = normalizeAppName(e.meta.app);
222
- const title = applyTitle(e.meta.windowTitle);
223
- const titlePart = title ? ` [${title}]` : "";
224
- sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
225
- }
226
- }
227
- }
228
- // Other triggers: balanced sections
229
- else {
230
- if (context.screen.length > 0) {
231
- sections.push("## Screen (recent OCR)");
232
- for (const e of context.screen) {
233
- const ago = Math.round((Date.now() - e.ts) / 1000);
234
- const app = normalizeAppName(e.meta.app);
235
- const title = applyTitle(e.meta.windowTitle);
236
- const titlePart = title ? ` [${title}]` : "";
237
- sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
238
- }
239
- }
240
- if (context.audio.length > 0) {
241
- sections.push("## Audio (recent transcripts)");
242
- for (const e of context.audio) {
243
- const ago = Math.round((Date.now() - e.ts) / 1000);
244
- sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
245
- }
246
- }
247
- }
248
- } else {
249
- // Focus/rich mode: include all sections
250
- if (hasErrors) {
251
- sections.push("## Errors (high priority)");
252
- for (const e of errors) {
253
- sections.push(`\`\`\`\n${applyOcr(e.ocr)}\n\`\`\``);
254
- }
174
+ // Always include all sections (rich mode)
175
+ if (hasErrors) {
176
+ sections.push("## Errors (high priority)");
177
+ for (const e of errors) {
178
+ sections.push(`\`\`\`\n${applyOcr(e.ocr)}\n\`\`\``);
255
179
  }
180
+ }
256
181
 
257
- if (context.screen.length > 0) {
258
- sections.push("## Screen (recent OCR)");
259
- for (const e of context.screen) {
260
- const ago = Math.round((Date.now() - e.ts) / 1000);
261
- const app = normalizeAppName(e.meta.app);
262
- const title = applyTitle(e.meta.windowTitle);
263
- const titlePart = title ? ` [${title}]` : "";
264
- sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
265
- }
182
+ if (context.screen.length > 0) {
183
+ sections.push("## Screen (recent OCR)");
184
+ for (const e of context.screen) {
185
+ const ago = Math.round((Date.now() - e.ts) / 1000);
186
+ const app = normalizeAppName(e.meta.app);
187
+ const title = applyTitle(e.meta.windowTitle);
188
+ const titlePart = title ? ` [${title}]` : "";
189
+ sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
266
190
  }
191
+ }
267
192
 
268
- if (context.audio.length > 0) {
269
- sections.push("## Audio (recent transcripts)");
270
- for (const e of context.audio) {
271
- const ago = Math.round((Date.now() - e.ts) / 1000);
272
- sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
273
- }
193
+ if (context.audio.length > 0) {
194
+ sections.push("## Audio (recent transcripts)");
195
+ for (const e of context.audio) {
196
+ const ago = Math.round((Date.now() - e.ts) / 1000);
197
+ sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
274
198
  }
275
199
  }
276
200
 
277
- // Mode-specific instructions (now context-aware)
278
- sections.push(getInstructions(mode, context));
201
+ // Context-aware instructions (no size — that's in the response length section below)
202
+ sections.push(getInstructions(context));
279
203
 
280
204
  // Stale escalation hint — forces a proactive response after prolonged silence
281
205
  if (escalationReason === "stale") {
@@ -293,7 +217,10 @@ the local analyzer reported idle/no-change. Provide a PROACTIVE response:
293
217
  sections.push(formatInlineFeedback(recentFeedback));
294
218
  }
295
219
 
296
- sections.push("Respond naturallythis will appear on the user's HUD overlay.");
220
+ // Response length single authoritative size instruction, placed last for salience
221
+ const limit = sizeInstruction(responseSize);
222
+ sections.push(`## Response Length
223
+ Your response MUST be ${limit}. This appears on the user's HUD overlay — be specific and actionable.`);
297
224
 
298
225
  return sections.join("\n\n");
299
226
  }
@@ -16,6 +16,7 @@ import { TraceStore } from "./trace/trace-store.js";
16
16
  import { FeedbackStore } from "./learning/feedback-store.js";
17
17
  import { SignalCollector } from "./learning/signal-collector.js";
18
18
  import { LocalCurationService } from "./learning/local-curation.js";
19
+ import { EmbeddingService } from "./embedding/service.js";
19
20
  import { createAppServer } from "./server.js";
20
21
  import { Profiler } from "./profiler.js";
21
22
  import { CostTracker } from "./cost/tracker.js";
@@ -70,7 +71,7 @@ async function queryKnowledgeFactsMulti(entities: string[], maxFacts: number): P
70
71
  for (const dbPath of dbPaths) {
71
72
  if (!existsSync(dbPath)) continue;
72
73
  try {
73
- const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "text"];
74
+ const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "compact"];
74
75
  if (entities.length > 0) args.push("--entities", JSON.stringify(entities));
75
76
  const out = execFileSync("python3", args, { timeout: 5000, encoding: "utf-8" }).trim();
76
77
  if (out) results.push(out);
@@ -338,11 +339,25 @@ async function main() {
338
339
  ? new FeedbackStore(config.learningConfig.feedbackDir, config.learningConfig.retentionDays)
339
340
  : null;
340
341
 
342
+ // ── Initialize embedding service (non-blocking) ──
343
+ const embeddingService = new EmbeddingService();
344
+ embeddingService.loadAsync(); // ~9s background load, server starts immediately
345
+
341
346
  // ── Initialize local knowledge pipeline ──
342
347
  const localCuration = new LocalCurationService();
343
- localCuration.distillPendingSession(); // Recover any session saved before a force-kill
348
+ // Distill pending session in background don't block server startup
349
+ setImmediate(() => {
350
+ localCuration.distillPendingSession();
351
+ });
344
352
  localCuration.startPeriodicCuration();
345
353
 
354
+ // Wire incremental distillation: when feed buffer fills, distill before items are lost
355
+ localCuration.setSenseBuffer(senseBuffer);
356
+ localCuration.setRearmCallback(() => feedBuffer.rearmOnFull());
357
+ feedBuffer.onFull((items) => {
358
+ localCuration.distillIncremental(items);
359
+ });
360
+
346
361
  // ── Initialize escalation ──
347
362
  const escalator = new Escalator({
348
363
  feedBuffer,
@@ -668,6 +683,8 @@ async function main() {
668
683
  },
669
684
  getSpawnPending: () => escalator.getSpawnPending(),
670
685
  respondSpawn: (id: string, result: string) => escalator.respondSpawn(id, result),
686
+ embedTexts: (texts: string[]) => embeddingService.embed(texts),
687
+ isEmbeddingReady: () => embeddingService.ready,
671
688
  });
672
689
 
673
690
  // ── Wire overlay profiling ──
@@ -15,6 +15,7 @@ import { existsSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, appendF
15
15
  import { resolve, dirname } from "node:path";
16
16
  import { fileURLToPath } from "node:url";
17
17
  import type { FeedItem } from "../types.js";
18
+ import type { SenseBuffer } from "../buffers/sense-buffer.js";
18
19
  import { log, warn, error } from "../log.js";
19
20
 
20
21
  const TAG = "local-curation";
@@ -55,6 +56,10 @@ export class LocalCurationService {
55
56
  private scriptsDir: string;
56
57
  private sessionStartTs: number;
57
58
  private curationTimer: ReturnType<typeof setInterval> | null = null;
59
+ private _lastDistilledTs = 0; // timestamp of last incremental distillation
60
+ private _incrementalRunning = false;
61
+ private _rearmCb: (() => void) | null = null; // callback to re-arm feed buffer onFull
62
+ private _senseBuffer: SenseBuffer | null = null;
58
63
 
59
64
  constructor() {
60
65
  this.memoryDir = resolveMemoryDir();
@@ -90,6 +95,100 @@ export class LocalCurationService {
90
95
  }
91
96
  }
92
97
 
98
+ /** Timestamp of last incremental distillation (items before this are already distilled). */
99
+ get lastDistilledTs(): number {
100
+ return this._lastDistilledTs;
101
+ }
102
+
103
+ /** Set the callback to re-arm the feed buffer's onFull trigger after distillation. */
104
+ setRearmCallback(cb: () => void): void {
105
+ this._rearmCb = cb;
106
+ }
107
+
108
+ /** Attach sense buffer for screen context in distillation. */
109
+ setSenseBuffer(sb: SenseBuffer): void {
110
+ this._senseBuffer = sb;
111
+ }
112
+
113
+ /** Extract screen context from sense buffer as feed-item-compatible entries. */
114
+ private getSenseContext(): Array<{ text: string; ts: number; source: string; channel: string }> {
115
+ if (!this._senseBuffer) return [];
116
+ const events = this._senseBuffer.queryByTime(this._lastDistilledTs || (Date.now() - 30 * 60 * 1000));
117
+ const items: Array<{ text: string; ts: number; source: string; channel: string }> = [];
118
+ for (const evt of events) {
119
+ // Include OCR text (what's visible on screen)
120
+ if (evt.ocr && evt.ocr.length > 20) {
121
+ const app = evt.semantic?.context?.app || "unknown";
122
+ items.push({
123
+ text: `[screen: ${app}] ${evt.ocr}`,
124
+ ts: evt.ts,
125
+ source: "sense",
126
+ channel: "screen",
127
+ });
128
+ }
129
+ // Include vision summaries (AI description of screen content)
130
+ if (evt.semantic?.visible?.summary) {
131
+ items.push({
132
+ text: `[screen-context] ${evt.semantic.visible.summary}`,
133
+ ts: evt.ts,
134
+ source: "sense",
135
+ channel: "screen",
136
+ });
137
+ }
138
+ }
139
+ return items;
140
+ }
141
+
142
+ /**
143
+ * Incremental distillation — called when the feed buffer reaches capacity.
144
+ * Distills the current buffer contents before they fall off the ring buffer.
145
+ * Runs async so it doesn't block new items from arriving.
146
+ */
147
+ async distillIncremental(feedItems: FeedItem[]): Promise<void> {
148
+ if (this._incrementalRunning) {
149
+ log(TAG, "incremental distillation already running — skipping");
150
+ return;
151
+ }
152
+ this._incrementalRunning = true;
153
+
154
+ try {
155
+ const itemCount = feedItems.length;
156
+ log(TAG, `incremental distillation: ${itemCount} items (buffer full)`);
157
+
158
+ const sessionMeta = {
159
+ ts: new Date().toISOString(),
160
+ sessionKey: "local-incremental",
161
+ durationMs: Date.now() - this.sessionStartTs,
162
+ };
163
+
164
+ const audioItems = feedItems.map(item => ({
165
+ text: item.text,
166
+ ts: item.ts,
167
+ source: item.source || "unknown",
168
+ channel: item.channel || "agent",
169
+ }));
170
+
171
+ // Merge screen context from sense buffer (OCR + vision summaries)
172
+ const senseItems = this.getSenseContext();
173
+ const transcript = [...audioItems, ...senseItems].sort((a, b) => a.ts - b.ts);
174
+
175
+ if (senseItems.length > 0) {
176
+ log(TAG, `including ${senseItems.length} screen context items in distillation`);
177
+ }
178
+
179
+ if (this.runDistillation(transcript, sessionMeta)) {
180
+ this._lastDistilledTs = Date.now();
181
+ log(TAG, `incremental distillation complete — ${itemCount} audio + ${senseItems.length} screen items processed`);
182
+ }
183
+ } catch (err: any) {
184
+ warn(TAG, `incremental distillation failed: ${err.message?.slice(0, 100)}`);
185
+ } finally {
186
+ this._incrementalRunning = false;
187
+ // Re-arm the buffer callback so next fill triggers another distillation
188
+ this._rearmCb?.();
189
+ }
190
+ }
191
+
93
192
  /**
94
193
  * Save feed items to disk for deferred distillation.
95
194
  * Called during shutdown — instant (no LLM), survives tsx force-kill.
@@ -160,13 +259,20 @@ export class LocalCurationService {
160
259
  * picked up on next startup via distillPendingSession().
161
260
  */
162
261
  async distillSession(feedItems: FeedItem[]): Promise<void> {
163
- if (feedItems.length < 1) {
164
- log(TAG, `skipping distillation — only ${feedItems.length} feed items`);
262
+ // Filter to only items not yet covered by incremental distillation
263
+ const items = this._lastDistilledTs > 0
264
+ ? feedItems.filter(i => i.ts > this._lastDistilledTs)
265
+ : feedItems;
266
+
267
+ if (items.length < 1) {
268
+ log(TAG, `skipping shutdown distillation — all ${feedItems.length} items already distilled incrementally`);
165
269
  return;
166
270
  }
167
271
 
272
+ log(TAG, `shutdown distillation: ${items.length} items (${feedItems.length - items.length} already distilled incrementally)`);
273
+
168
274
  // Step 0: Save to disk FIRST — survives force-kill
169
- this.savePendingSession(feedItems);
275
+ this.savePendingSession(items);
170
276
 
171
277
  const sessionMeta = {
172
278
  ts: new Date().toISOString(),
@@ -174,7 +280,7 @@ export class LocalCurationService {
174
280
  durationMs: Date.now() - this.sessionStartTs,
175
281
  };
176
282
 
177
- const transcript = feedItems.map(item => ({
283
+ const transcript = items.map(item => ({
178
284
  text: item.text,
179
285
  ts: item.ts,
180
286
  source: item.source || "unknown",
@@ -204,13 +310,37 @@ export class LocalCurationService {
204
310
  log(TAG, `distilling session: ${transcript.length} items, ${Math.round(sessionMeta.durationMs / 60000)} min`);
205
311
 
206
312
  try {
313
+ // Step 0.5: Retrieve existing entities for context (Mem0 retrieve-before-extract pattern)
314
+ let existingEntities = "";
315
+ const dbPath = resolve(this.memoryDir, "knowledge-graph.db");
316
+ if (existsSync(dbPath)) {
317
+ try {
318
+ existingEntities = execFileSync("python3", [
319
+ resolve(this.scriptsDir, "graph_query.py"),
320
+ "--db", dbPath,
321
+ "--top", "20",
322
+ "--format", "compact",
323
+ ], {
324
+ timeout: 5_000,
325
+ encoding: "utf-8",
326
+ env: { ...process.env, PYTHONPATH: this.scriptsDir },
327
+ }).trim();
328
+ } catch {
329
+ // Non-fatal — distillation works without existing entities
330
+ }
331
+ }
332
+
207
333
  // Step 1: Distill session into a SessionDigest
208
- const digestJson = execFileSync("python3", [
334
+ const distillerArgs = [
209
335
  resolve(this.scriptsDir, "session_distiller.py"),
210
336
  "--memory-dir", this.memoryDir,
211
337
  "--transcript", JSON.stringify(transcript),
212
338
  "--session-meta", JSON.stringify(sessionMeta),
213
- ], {
339
+ ];
340
+ if (existingEntities) {
341
+ distillerArgs.push("--existing-entities", existingEntities);
342
+ }
343
+ const digestJson = execFileSync("python3", distillerArgs, {
214
344
  timeout: 30_000,
215
345
  encoding: "utf-8",
216
346
  env: { ...process.env, PYTHONPATH: this.scriptsDir },
@@ -236,7 +366,7 @@ export class LocalCurationService {
236
366
  "--memory-dir", this.memoryDir,
237
367
  "--digest", JSON.stringify(digest),
238
368
  ], {
239
- timeout: 30_000,
369
+ timeout: 60_000, // 60s: LLM call (~10s) + embedding dedup (~5s) + graph ops
240
370
  encoding: "utf-8",
241
371
  env: { ...process.env, PYTHONPATH: this.scriptsDir },
242
372
  });
@@ -1,5 +1,5 @@
1
1
  import { execFile } from "node:child_process";
2
- import type { InboundMessage } from "../types.js";
2
+ import type { InboundMessage, ResponseSize } from "../types.js";
3
3
  import type { WsHandler } from "./ws-handler.js";
4
4
  import type { AudioPipeline } from "../audio/pipeline.js";
5
5
  import type { CoreConfig } from "../types.js";
@@ -101,7 +101,7 @@ export function setupCommands(deps: CommandDeps): void {
101
101
  break;
102
102
  }
103
103
  case "command": {
104
- handleCommand(msg.action, deps);
104
+ handleCommand(msg, deps);
105
105
  log(TAG, `command processed: ${msg.action}`);
106
106
  break;
107
107
  }
@@ -109,8 +109,11 @@ export function setupCommands(deps: CommandDeps): void {
109
109
  });
110
110
  }
111
111
 
112
- function handleCommand(action: string, deps: CommandDeps): void {
112
+ const VALID_RESPONSE_SIZES = new Set<ResponseSize>(["small", "medium", "large"]);
113
+
114
+ function handleCommand(msg: InboundMessage & { action: string }, deps: CommandDeps): void {
113
115
  const { wsHandler, systemAudioPipeline, micPipeline } = deps;
116
+ const action = msg.action;
114
117
 
115
118
  switch (action) {
116
119
  case "toggle_audio": {
@@ -173,6 +176,16 @@ function handleCommand(action: string, deps: CommandDeps): void {
173
176
  log(TAG, `escalation toggled ${nowActive ? "ON" : "OFF"}`);
174
177
  break;
175
178
  }
179
+ case "set_response_size": {
180
+ const size = (msg as any).responseSize as string;
181
+ if (VALID_RESPONSE_SIZES.has(size as ResponseSize)) {
182
+ wsHandler.updateState({ responseSize: size as ResponseSize });
183
+ log(TAG, `response size set to ${size}`);
184
+ } else {
185
+ log(TAG, `invalid response size: ${size}`);
186
+ }
187
+ break;
188
+ }
176
189
  case "open_settings": {
177
190
  const envPath = loadedEnvPath || `${process.env.HOME || process.env.USERPROFILE}/.sinain/.env`;
178
191
  const cmd = process.platform === "win32" ? "notepad" : "open";