npm - @geravant/sinain - Versions diffs - 1.11.0 → 1.13.0 - Mend

@geravant/sinain 1.11.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/sinain-core/package.json CHANGED Viewed

@@ -12,6 +12,7 @@
     "eval:quick": "tsx eval/harness.ts --scenarios eval/scenarios/ --runs 1 --fast --report /dev/stdout"
   },
   "dependencies": {
+    "@huggingface/transformers": "^4.0.1",
     "@types/node": "^22.19.7",
     "@types/ws": "^8.18.1",
     "tsx": "^4.21.0",

package/sinain-core/src/buffers/feed-buffer.ts CHANGED Viewed

@@ -10,11 +10,31 @@ export class FeedBuffer {
   private _version = 0;
   private maxSize: number;
   private _hwm = 0;
+  private _onFullCb: ((items: FeedItem[]) => void) | null = null;
+  private _onFullArmed = true;
+  private _onFullVersion = 0; // version at last re-arm
   constructor(maxSize = 100) {
     this.maxSize = maxSize;
   }
+  /**
+   * Register a callback that fires when the buffer reaches capacity AND
+   * at least half the buffer has been replaced with new items since the
+   * last distillation. This prevents rapid-fire triggers on the same content.
+   */
+  onFull(cb: (items: FeedItem[]) => void): void {
+    this._onFullCb = cb;
+    this._onFullArmed = true;
+    this._onFullVersion = 0;
+  }
+  /** Re-arm the onFull callback (call after incremental distillation completes). */
+  rearmOnFull(): void {
+    this._onFullVersion = this._version;
+    this._onFullArmed = true;
+  }
   /** Push a new feed item. Returns the created item. */
   push(text: string, priority: Priority, source: FeedItem["source"], channel: FeedChannel = "stream"): FeedItem {
     const item: FeedItem = {
@@ -27,6 +47,18 @@ export class FeedBuffer {
     };
     this.items.push(item);
     if (this.items.length > this._hwm) this._hwm = this.items.length;
+    // Fire onFull when buffer is at capacity AND enough new items have arrived
+    // since the last distillation (at least half the buffer replaced)
+    const newSinceRearm = this._version - this._onFullVersion;
+    if (this.items.length >= this.maxSize
+        && this._onFullCb && this._onFullArmed
+        && newSinceRearm >= Math.floor(this.maxSize / 2)) {
+      this._onFullArmed = false;
+      const snapshot = [...this.items];
+      queueMicrotask(() => this._onFullCb!(snapshot));
+    }
     if (this.items.length > this.maxSize) {
       this.items.shift();
     }

package/sinain-core/src/embedding/service.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * EmbeddingService — in-process sentence embeddings for knowledge dedup + retrieval.
+ *
+ * Loads all-MiniLM-L6-v2 via @huggingface/transformers (ONNX runtime, no Python).
+ * Model loads async at startup (~9s), embeddings are 2-4ms per text after that.
+ *
+ * Used by:
+ * - knowledge_integrator.py (via POST /embed) for dedup before asserting facts
+ * - graph_query.py (via POST /embed) for semantic retrieval
+ */
+import { log, warn } from "../log.js";
+const TAG = "embedding";
+const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
+type Pipeline = (texts: string | string[], options: { pooling: string; normalize: boolean }) => Promise<{ data: Float32Array; dims: number[] }>;
+export class EmbeddingService {
+  private pipeline: Pipeline | null = null;
+  private loading = false;
+  private _ready = false;
+  get ready(): boolean {
+    return this._ready;
+  }
+  /** Load the model in the background. Non-blocking — returns immediately. */
+  loadAsync(): void {
+    if (this.loading || this._ready) return;
+    this.loading = true;
+    const start = Date.now();
+    log(TAG, `loading ${MODEL_ID} (background)...`);
+    import("@huggingface/transformers").then(async ({ pipeline }) => {
+      this.pipeline = await pipeline("feature-extraction", MODEL_ID) as unknown as Pipeline;
+      this._ready = true;
+      log(TAG, `model ready in ${Date.now() - start}ms (384 dims)`);
+    }).catch((err) => {
+      warn(TAG, `failed to load model: ${err.message?.slice(0, 100)}`);
+      this.loading = false;
+    });
+  }
+  /** Embed one or more texts. Returns array of float32 arrays (384 dims each). */
+  async embed(texts: string[]): Promise<Float32Array[]> {
+    if (!this.pipeline) {
+      throw new Error("Embedding model not loaded yet");
+    }
+    const results: Float32Array[] = [];
+    for (const text of texts) {
+      const output = await this.pipeline(text, { pooling: "mean", normalize: true });
+      results.push(new Float32Array(output.data));
+    }
+    return results;
+  }
+  /** Compute cosine similarity between two embeddings. */
+  static cosine(a: Float32Array, b: Float32Array): number {
+    let dot = 0;
+    for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
+    return dot;
+  }
+}

package/sinain-core/src/escalation/escalator.ts CHANGED Viewed

@@ -237,6 +237,7 @@ export class Escalator {
       escalationReason,
       undefined,
       this.pendingUserCommand ?? undefined,
+      this.deps.wsHandler.getState().responseSize ?? "medium",
     );
     // Clear user command after building the message (consumed once)

package/sinain-core/src/escalation/message-builder.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { ContextWindow, AgentEntry, EscalationMode, FeedbackRecord, UserCommand } from "../types.js";
+import type { ContextWindow, AgentEntry, EscalationMode, FeedbackRecord, UserCommand, ResponseSize } from "../types.js";
 import { normalizeAppName } from "../agent/context-window.js";
 import { levelFor, applyLevel } from "../privacy/index.js";
@@ -67,11 +67,18 @@ export function isCodingContext(context: ContextWindow): CodingContextResult {
   };
 }
-function getInstructions(mode: EscalationMode, context: ContextWindow): string {
+function sizeInstruction(size: ResponseSize): string {
+  switch (size) {
+    case "small": return "1-2 sentences";
+    case "large": return "3-5 sentences";
+    default: return "2-3 sentences";
+  }
+}
+function getInstructions(context: ContextWindow): string {
   const { coding, needsSolution } = isCodingContext(context);
   if (needsSolution) {
-    // Coding challenge/problem - be very action-oriented
     return `The user is working on a coding problem. Be PROACTIVE and SOLVE IT:
 1. Provide a solution approach and working code based on what you can see
@@ -92,13 +99,10 @@ Response should be actionable: working code with brief explanation.`;
 - If it's a non-code file (config, markdown, email): share a relevant insight, action item, or connection to their current project
 - If context is minimal: tell a short clever joke (tech humor — never repeat recent ones)
-NEVER just describe what the user is doing. Every response must teach, suggest, or connect dots.
-(2-5 sentences, or more + code if there's an error or code question).`;
+NEVER just describe what the user is doing. Every response must teach, suggest, or connect dots.`;
   }
-  // Non-coding context — proactive insights instead of activity descriptions
-  if (mode === "focus" || mode === "rich") {
-    return `Based on the above, ALWAYS provide a useful response for the user's HUD.
+  return `Based on the above, ALWAYS provide a useful response for the user's HUD.
 Important: Do NOT respond with NO_REPLY — a response is always required.
 - If there's an error: investigate and suggest a fix
@@ -109,40 +113,25 @@ Important: Do NOT respond with NO_REPLY — a response is always required.
 NEVER just describe what the user is doing — they can see their own screen.
 NEVER respond with "standing by", "monitoring", or similar filler.
-Every response must teach something, suggest something, or connect dots the user hasn't noticed.
-(2-5 sentences). Be specific and actionable.`;
-  }
-  return `Based on the above, proactively help the user:
-- If there's an error: investigate and suggest a fix
-- If they seem stuck: offer guidance
-- If they're coding: provide relevant insights
-- Keep your response concise and actionable (2-5 sentences)`;
+Every response must teach something, suggest something, or connect dots the user hasn't noticed.`;
 }
 /**
- * Build a structured escalation message with richness proportional to the context window preset.
- *
- * Expected message sizes:
- *   lean (selective):  ~7 KB  / ~1,700 tokens
- *   standard (focus):  ~25 KB / ~6,000 tokens
- *   rich:              ~111 KB / ~28,000 tokens
+ * Build a structured escalation message with full context (rich mode).
  *
- * All fit within the 256 KB HTTP hooks limit and 200K+ model context.
- *
- * In selective mode, sections are prioritized by relevance:
- * - Error escalations prioritize error sections
- * - Question escalations prioritize audio sections
- * - App context is always included
+ * Always includes all sections (screen, audio, errors).
+ * Response length is controlled by the `responseSize` parameter (small/medium/large)
+ * which is set by the user via the HUD overlay slider.
  */
 export function buildEscalationMessage(
   digest: string,
   context: ContextWindow,
   entry: AgentEntry,
-  mode: EscalationMode,
+  _mode: EscalationMode,
   escalationReason?: string,
   recentFeedback?: FeedbackRecord[],
   userCommand?: UserCommand,
+  responseSize: ResponseSize = "medium",
 ): string {
   const sections: string[] = [];
@@ -167,7 +156,6 @@ export function buildEscalationMessage(
   // Errors — extracted from OCR, full stack traces in rich mode
   const errors = context.screen.filter(e => hasErrorPattern(e.ocr));
   const hasErrors = errors.length > 0;
-  const hasQuestion = escalationReason?.startsWith("question:");
   // Privacy levels for agent_gateway destination
   let ocrLevel: import("../types.js").PrivacyLevel = "full";
@@ -183,99 +171,35 @@ export function buildEscalationMessage(
   const applyAudio = (text: string) => applyLevel(text.slice(0, context.preset.maxTranscriptChars), audioLevel, "audio");
   const applyTitle = (title: string | undefined) => title ? applyLevel(title, titlesLevel, "titles") : "";
-  // In selective mode, prioritize sections based on escalation reason
-  // In focus/rich modes, include everything
-  if (mode === "selective") {
-    // Error-triggered: prioritize errors, then screen
-    if (hasErrors) {
-      sections.push("## Errors (high priority)");
-      for (const e of errors) {
-        sections.push(`\`\`\`\n${applyOcr(e.ocr)}\n\`\`\``);
-      }
-      // Include screen context (reduced)
-      if (context.screen.length > 0) {
-        sections.push("## Screen (recent OCR)");
-        for (const e of context.screen.slice(0, 5)) { // Limit in selective mode
-          const ago = Math.round((Date.now() - e.ts) / 1000);
-          const app = normalizeAppName(e.meta.app);
-          const title = applyTitle(e.meta.windowTitle);
-          const titlePart = title ? ` [${title}]` : "";
-          sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
-        }
-      }
-    }
-    // Question-triggered: prioritize audio, then screen
-    else if (hasQuestion) {
-      if (context.audio.length > 0) {
-        sections.push("## Audio (recent transcripts)");
-        for (const e of context.audio) {
-          const ago = Math.round((Date.now() - e.ts) / 1000);
-          sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
-        }
-      }
-      // Include screen context (reduced)
-      if (context.screen.length > 0) {
-        sections.push("## Screen (recent OCR)");
-        for (const e of context.screen.slice(0, 5)) {
-          const ago = Math.round((Date.now() - e.ts) / 1000);
-          const app = normalizeAppName(e.meta.app);
-          const title = applyTitle(e.meta.windowTitle);
-          const titlePart = title ? ` [${title}]` : "";
-          sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
-        }
-      }
-    }
-    // Other triggers: balanced sections
-    else {
-      if (context.screen.length > 0) {
-        sections.push("## Screen (recent OCR)");
-        for (const e of context.screen) {
-          const ago = Math.round((Date.now() - e.ts) / 1000);
-          const app = normalizeAppName(e.meta.app);
-          const title = applyTitle(e.meta.windowTitle);
-          const titlePart = title ? ` [${title}]` : "";
-          sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
-        }
-      }
-      if (context.audio.length > 0) {
-        sections.push("## Audio (recent transcripts)");
-        for (const e of context.audio) {
-          const ago = Math.round((Date.now() - e.ts) / 1000);
-          sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
-        }
-      }
-    }
-  } else {
-    // Focus/rich mode: include all sections
-    if (hasErrors) {
-      sections.push("## Errors (high priority)");
-      for (const e of errors) {
-        sections.push(`\`\`\`\n${applyOcr(e.ocr)}\n\`\`\``);
-      }
+  // Always include all sections (rich mode)
+  if (hasErrors) {
+    sections.push("## Errors (high priority)");
+    for (const e of errors) {
+      sections.push(`\`\`\`\n${applyOcr(e.ocr)}\n\`\`\``);
     }
+  }
-    if (context.screen.length > 0) {
-      sections.push("## Screen (recent OCR)");
-      for (const e of context.screen) {
-        const ago = Math.round((Date.now() - e.ts) / 1000);
-        const app = normalizeAppName(e.meta.app);
-        const title = applyTitle(e.meta.windowTitle);
-        const titlePart = title ? ` [${title}]` : "";
-        sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
-      }
+  if (context.screen.length > 0) {
+    sections.push("## Screen (recent OCR)");
+    for (const e of context.screen) {
+      const ago = Math.round((Date.now() - e.ts) / 1000);
+      const app = normalizeAppName(e.meta.app);
+      const title = applyTitle(e.meta.windowTitle);
+      const titlePart = title ? ` [${title}]` : "";
+      sections.push(`- [${ago}s ago] [${app}]${titlePart} ${applyOcr(e.ocr)}`);
     }
+  }
-    if (context.audio.length > 0) {
-      sections.push("## Audio (recent transcripts)");
-      for (const e of context.audio) {
-        const ago = Math.round((Date.now() - e.ts) / 1000);
-        sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
-      }
+  if (context.audio.length > 0) {
+    sections.push("## Audio (recent transcripts)");
+    for (const e of context.audio) {
+      const ago = Math.round((Date.now() - e.ts) / 1000);
+      sections.push(`- [${ago}s ago] "${applyAudio(e.text)}"`);
     }
   }
-  // Mode-specific instructions (now context-aware)
-  sections.push(getInstructions(mode, context));
+  // Context-aware instructions (no size — that's in the response length section below)
+  sections.push(getInstructions(context));
   // Stale escalation hint — forces a proactive response after prolonged silence
   if (escalationReason === "stale") {
@@ -293,7 +217,10 @@ the local analyzer reported idle/no-change. Provide a PROACTIVE response:
     sections.push(formatInlineFeedback(recentFeedback));
   }
-  sections.push("Respond naturally — this will appear on the user's HUD overlay.");
+  // Response length — single authoritative size instruction, placed last for salience
+  const limit = sizeInstruction(responseSize);
+  sections.push(`## Response Length
+Your response MUST be ${limit}. This appears on the user's HUD overlay — be specific and actionable.`);
   return sections.join("\n\n");
 }

package/sinain-core/src/index.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import { TraceStore } from "./trace/trace-store.js";
 import { FeedbackStore } from "./learning/feedback-store.js";
 import { SignalCollector } from "./learning/signal-collector.js";
 import { LocalCurationService } from "./learning/local-curation.js";
+import { EmbeddingService } from "./embedding/service.js";
 import { createAppServer } from "./server.js";
 import { Profiler } from "./profiler.js";
 import { CostTracker } from "./cost/tracker.js";
@@ -70,7 +71,7 @@ async function queryKnowledgeFactsMulti(entities: string[], maxFacts: number): P
   for (const dbPath of dbPaths) {
     if (!existsSync(dbPath)) continue;
     try {
-      const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "text"];
+      const args = [scriptPath, "--db", dbPath, "--max-facts", String(maxFacts), "--format", "compact"];
       if (entities.length > 0) args.push("--entities", JSON.stringify(entities));
       const out = execFileSync("python3", args, { timeout: 5000, encoding: "utf-8" }).trim();
       if (out) results.push(out);
@@ -338,11 +339,25 @@ async function main() {
     ? new FeedbackStore(config.learningConfig.feedbackDir, config.learningConfig.retentionDays)
     : null;
+  // ── Initialize embedding service (non-blocking) ──
+  const embeddingService = new EmbeddingService();
+  embeddingService.loadAsync(); // ~9s background load, server starts immediately
   // ── Initialize local knowledge pipeline ──
   const localCuration = new LocalCurationService();
-  localCuration.distillPendingSession(); // Recover any session saved before a force-kill
+  // Distill pending session in background — don't block server startup
+  setImmediate(() => {
+    localCuration.distillPendingSession();
+  });
   localCuration.startPeriodicCuration();
+  // Wire incremental distillation: when feed buffer fills, distill before items are lost
+  localCuration.setSenseBuffer(senseBuffer);
+  localCuration.setRearmCallback(() => feedBuffer.rearmOnFull());
+  feedBuffer.onFull((items) => {
+    localCuration.distillIncremental(items);
+  });
   // ── Initialize escalation ──
   const escalator = new Escalator({
     feedBuffer,
@@ -668,6 +683,8 @@ async function main() {
     },
     getSpawnPending: () => escalator.getSpawnPending(),
     respondSpawn: (id: string, result: string) => escalator.respondSpawn(id, result),
+    embedTexts: (texts: string[]) => embeddingService.embed(texts),
+    isEmbeddingReady: () => embeddingService.ready,
   });
   // ── Wire overlay profiling ──

package/sinain-core/src/learning/local-curation.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import { existsSync, mkdirSync, writeFileSync, readFileSync, unlinkSync, appendF
 import { resolve, dirname } from "node:path";
 import { fileURLToPath } from "node:url";
 import type { FeedItem } from "../types.js";
+import type { SenseBuffer } from "../buffers/sense-buffer.js";
 import { log, warn, error } from "../log.js";
 const TAG = "local-curation";
@@ -55,6 +56,10 @@ export class LocalCurationService {
   private scriptsDir: string;
   private sessionStartTs: number;
   private curationTimer: ReturnType<typeof setInterval> | null = null;
+  private _lastDistilledTs = 0; // timestamp of last incremental distillation
+  private _incrementalRunning = false;
+  private _rearmCb: (() => void) | null = null; // callback to re-arm feed buffer onFull
+  private _senseBuffer: SenseBuffer | null = null;
   constructor() {
     this.memoryDir = resolveMemoryDir();
@@ -90,6 +95,100 @@ export class LocalCurationService {
     }
   }
+  /** Timestamp of last incremental distillation (items before this are already distilled). */
+  get lastDistilledTs(): number {
+    return this._lastDistilledTs;
+  }
+  /** Set the callback to re-arm the feed buffer's onFull trigger after distillation. */
+  setRearmCallback(cb: () => void): void {
+    this._rearmCb = cb;
+  }
+  /** Attach sense buffer for screen context in distillation. */
+  setSenseBuffer(sb: SenseBuffer): void {
+    this._senseBuffer = sb;
+  }
+  /** Extract screen context from sense buffer as feed-item-compatible entries. */
+  private getSenseContext(): Array<{ text: string; ts: number; source: string; channel: string }> {
+    if (!this._senseBuffer) return [];
+    const events = this._senseBuffer.queryByTime(this._lastDistilledTs || (Date.now() - 30 * 60 * 1000));
+    const items: Array<{ text: string; ts: number; source: string; channel: string }> = [];
+    for (const evt of events) {
+      // Include OCR text (what's visible on screen)
+      if (evt.ocr && evt.ocr.length > 20) {
+        const app = evt.semantic?.context?.app || "unknown";
+        items.push({
+          text: `[screen: ${app}] ${evt.ocr}`,
+          ts: evt.ts,
+          source: "sense",
+          channel: "screen",
+        });
+      }
+      // Include vision summaries (AI description of screen content)
+      if (evt.semantic?.visible?.summary) {
+        items.push({
+          text: `[screen-context] ${evt.semantic.visible.summary}`,
+          ts: evt.ts,
+          source: "sense",
+          channel: "screen",
+        });
+      }
+    }
+    return items;
+  }
+  /**
+   * Incremental distillation — called when the feed buffer reaches capacity.
+   * Distills the current buffer contents before they fall off the ring buffer.
+   * Runs async so it doesn't block new items from arriving.
+   */
+  async distillIncremental(feedItems: FeedItem[]): Promise<void> {
+    if (this._incrementalRunning) {
+      log(TAG, "incremental distillation already running — skipping");
+      return;
+    }
+    this._incrementalRunning = true;
+    try {
+      const itemCount = feedItems.length;
+      log(TAG, `incremental distillation: ${itemCount} items (buffer full)`);
+      const sessionMeta = {
+        ts: new Date().toISOString(),
+        sessionKey: "local-incremental",
+        durationMs: Date.now() - this.sessionStartTs,
+      };
+      const audioItems = feedItems.map(item => ({
+        text: item.text,
+        ts: item.ts,
+        source: item.source || "unknown",
+        channel: item.channel || "agent",
+      }));
+      // Merge screen context from sense buffer (OCR + vision summaries)
+      const senseItems = this.getSenseContext();
+      const transcript = [...audioItems, ...senseItems].sort((a, b) => a.ts - b.ts);
+      if (senseItems.length > 0) {
+        log(TAG, `including ${senseItems.length} screen context items in distillation`);
+      }
+      if (this.runDistillation(transcript, sessionMeta)) {
+        this._lastDistilledTs = Date.now();
+        log(TAG, `incremental distillation complete — ${itemCount} audio + ${senseItems.length} screen items processed`);
+      }
+    } catch (err: any) {
+      warn(TAG, `incremental distillation failed: ${err.message?.slice(0, 100)}`);
+    } finally {
+      this._incrementalRunning = false;
+      // Re-arm the buffer callback so next fill triggers another distillation
+      this._rearmCb?.();
+    }
+  }
   /**
    * Save feed items to disk for deferred distillation.
    * Called during shutdown — instant (no LLM), survives tsx force-kill.
@@ -160,13 +259,20 @@ export class LocalCurationService {
    * picked up on next startup via distillPendingSession().
    */
   async distillSession(feedItems: FeedItem[]): Promise<void> {
-    if (feedItems.length < 1) {
-      log(TAG, `skipping distillation — only ${feedItems.length} feed items`);
+    // Filter to only items not yet covered by incremental distillation
+    const items = this._lastDistilledTs > 0
+      ? feedItems.filter(i => i.ts > this._lastDistilledTs)
+      : feedItems;
+    if (items.length < 1) {
+      log(TAG, `skipping shutdown distillation — all ${feedItems.length} items already distilled incrementally`);
       return;
     }
+    log(TAG, `shutdown distillation: ${items.length} items (${feedItems.length - items.length} already distilled incrementally)`);
     // Step 0: Save to disk FIRST — survives force-kill
-    this.savePendingSession(feedItems);
+    this.savePendingSession(items);
     const sessionMeta = {
       ts: new Date().toISOString(),
@@ -174,7 +280,7 @@ export class LocalCurationService {
       durationMs: Date.now() - this.sessionStartTs,
     };
-    const transcript = feedItems.map(item => ({
+    const transcript = items.map(item => ({
       text: item.text,
       ts: item.ts,
       source: item.source || "unknown",
@@ -204,13 +310,37 @@ export class LocalCurationService {
     log(TAG, `distilling session: ${transcript.length} items, ${Math.round(sessionMeta.durationMs / 60000)} min`);
     try {
+      // Step 0.5: Retrieve existing entities for context (Mem0 retrieve-before-extract pattern)
+      let existingEntities = "";
+      const dbPath = resolve(this.memoryDir, "knowledge-graph.db");
+      if (existsSync(dbPath)) {
+        try {
+          existingEntities = execFileSync("python3", [
+            resolve(this.scriptsDir, "graph_query.py"),
+            "--db", dbPath,
+            "--top", "20",
+            "--format", "compact",
+          ], {
+            timeout: 5_000,
+            encoding: "utf-8",
+            env: { ...process.env, PYTHONPATH: this.scriptsDir },
+          }).trim();
+        } catch {
+          // Non-fatal — distillation works without existing entities
+        }
+      }
       // Step 1: Distill session into a SessionDigest
-      const digestJson = execFileSync("python3", [
+      const distillerArgs = [
         resolve(this.scriptsDir, "session_distiller.py"),
         "--memory-dir", this.memoryDir,
         "--transcript", JSON.stringify(transcript),
         "--session-meta", JSON.stringify(sessionMeta),
-      ], {
+      ];
+      if (existingEntities) {
+        distillerArgs.push("--existing-entities", existingEntities);
+      }
+      const digestJson = execFileSync("python3", distillerArgs, {
         timeout: 30_000,
         encoding: "utf-8",
         env: { ...process.env, PYTHONPATH: this.scriptsDir },
@@ -236,7 +366,7 @@ export class LocalCurationService {
           "--memory-dir", this.memoryDir,
           "--digest", JSON.stringify(digest),
         ], {
-          timeout: 30_000,
+          timeout: 60_000, // 60s: LLM call (~10s) + embedding dedup (~5s) + graph ops
           encoding: "utf-8",
           env: { ...process.env, PYTHONPATH: this.scriptsDir },
         });

package/sinain-core/src/overlay/commands.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import { execFile } from "node:child_process";
-import type { InboundMessage } from "../types.js";
+import type { InboundMessage, ResponseSize } from "../types.js";
 import type { WsHandler } from "./ws-handler.js";
 import type { AudioPipeline } from "../audio/pipeline.js";
 import type { CoreConfig } from "../types.js";
@@ -101,7 +101,7 @@ export function setupCommands(deps: CommandDeps): void {
         break;
       }
       case "command": {
-        handleCommand(msg.action, deps);
+        handleCommand(msg, deps);
         log(TAG, `command processed: ${msg.action}`);
         break;
       }
@@ -109,8 +109,11 @@ export function setupCommands(deps: CommandDeps): void {
   });
 }
-function handleCommand(action: string, deps: CommandDeps): void {
+const VALID_RESPONSE_SIZES = new Set<ResponseSize>(["small", "medium", "large"]);
+function handleCommand(msg: InboundMessage & { action: string }, deps: CommandDeps): void {
   const { wsHandler, systemAudioPipeline, micPipeline } = deps;
+  const action = msg.action;
   switch (action) {
     case "toggle_audio": {
@@ -173,6 +176,16 @@ function handleCommand(action: string, deps: CommandDeps): void {
       log(TAG, `escalation toggled ${nowActive ? "ON" : "OFF"}`);
       break;
     }
+    case "set_response_size": {
+      const size = (msg as any).responseSize as string;
+      if (VALID_RESPONSE_SIZES.has(size as ResponseSize)) {
+        wsHandler.updateState({ responseSize: size as ResponseSize });
+        log(TAG, `response size set to ${size}`);
+      } else {
+        log(TAG, `invalid response size: ${size}`);
+      }
+      break;
+    }
     case "open_settings": {
       const envPath = loadedEnvPath || `${process.env.HOME || process.env.USERPROFILE}/.sinain/.env`;
       const cmd = process.platform === "win32" ? "notepad" : "open";