npm - @memtensor/memos-local-openclaw-plugin - Versions diffs - 0.1.2 → 0.1.4 - Mend

@memtensor/memos-local-openclaw-plugin 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/.env.example +13 -5
package/README.md +180 -68
package/dist/capture/index.d.ts +5 -7
package/dist/capture/index.d.ts.map +1 -1
package/dist/capture/index.js +72 -43
package/dist/capture/index.js.map +1 -1
package/dist/ingest/providers/anthropic.d.ts +2 -0
package/dist/ingest/providers/anthropic.d.ts.map +1 -1
package/dist/ingest/providers/anthropic.js +110 -1
package/dist/ingest/providers/anthropic.js.map +1 -1
package/dist/ingest/providers/bedrock.d.ts +2 -5
package/dist/ingest/providers/bedrock.d.ts.map +1 -1
package/dist/ingest/providers/bedrock.js +110 -6
package/dist/ingest/providers/bedrock.js.map +1 -1
package/dist/ingest/providers/gemini.d.ts +2 -0
package/dist/ingest/providers/gemini.d.ts.map +1 -1
package/dist/ingest/providers/gemini.js +106 -1
package/dist/ingest/providers/gemini.js.map +1 -1
package/dist/ingest/providers/index.d.ts +9 -0
package/dist/ingest/providers/index.d.ts.map +1 -1
package/dist/ingest/providers/index.js +66 -4
package/dist/ingest/providers/index.js.map +1 -1
package/dist/ingest/providers/openai.d.ts +2 -0
package/dist/ingest/providers/openai.d.ts.map +1 -1
package/dist/ingest/providers/openai.js +112 -1
package/dist/ingest/providers/openai.js.map +1 -1
package/dist/ingest/task-processor.d.ts +63 -0
package/dist/ingest/task-processor.d.ts.map +1 -0
package/dist/ingest/task-processor.js +339 -0
package/dist/ingest/task-processor.js.map +1 -0
package/dist/ingest/worker.d.ts +1 -1
package/dist/ingest/worker.d.ts.map +1 -1
package/dist/ingest/worker.js +18 -13
package/dist/ingest/worker.js.map +1 -1
package/dist/recall/engine.d.ts +1 -0
package/dist/recall/engine.d.ts.map +1 -1
package/dist/recall/engine.js +21 -11
package/dist/recall/engine.js.map +1 -1
package/dist/recall/mmr.d.ts.map +1 -1
package/dist/recall/mmr.js +3 -1
package/dist/recall/mmr.js.map +1 -1
package/dist/storage/sqlite.d.ts +67 -1
package/dist/storage/sqlite.d.ts.map +1 -1
package/dist/storage/sqlite.js +251 -5
package/dist/storage/sqlite.js.map +1 -1
package/dist/types.d.ts +15 -0
package/dist/types.d.ts.map +1 -1
package/dist/types.js +2 -0
package/dist/types.js.map +1 -1
package/dist/viewer/html.d.ts +1 -1
package/dist/viewer/html.d.ts.map +1 -1
package/dist/viewer/html.js +955 -115
package/dist/viewer/html.js.map +1 -1
package/dist/viewer/server.d.ts +3 -0
package/dist/viewer/server.d.ts.map +1 -1
package/dist/viewer/server.js +59 -1
package/dist/viewer/server.js.map +1 -1
package/index.ts +221 -45
package/openclaw.plugin.json +20 -45
package/package.json +3 -4
package/skill/SKILL.md +59 -0
package/src/capture/index.ts +85 -45
package/src/ingest/providers/anthropic.ts +128 -1
package/src/ingest/providers/bedrock.ts +130 -6
package/src/ingest/providers/gemini.ts +128 -1
package/src/ingest/providers/index.ts +74 -8
package/src/ingest/providers/openai.ts +130 -1
package/src/ingest/task-processor.ts +380 -0
package/src/ingest/worker.ts +21 -15
package/src/recall/engine.ts +22 -12
package/src/recall/mmr.ts +3 -1
package/src/storage/sqlite.ts +298 -5
package/src/types.ts +19 -0
package/src/viewer/html.ts +955 -115
package/src/viewer/server.ts +63 -1
package/SKILL.md +0 -43
package/www/index.html +0 -606

package/src/ingest/task-processor.ts ADDED Viewed

@@ -0,0 +1,380 @@
+import { v4 as uuid } from "uuid";
+import type { SqliteStore } from "../storage/sqlite";
+import type { PluginContext, Task, Chunk } from "../types";
+import { DEFAULTS } from "../types";
+import { Summarizer } from "./providers";
+const TRIVIAL_PATTERNS = [
+  /^(test|testing|hello|hi|hey|ok|okay|yes|no|yeah|nope|sure|thanks|thank you|thx|ping|pong|哈哈|好的|嗯|是的|不是|谢谢|你好|测试)\s*[.!?。！？]*$/,
+  /^(aaa+|bbb+|xxx+|zzz+|123+|asdf+|qwer+|haha+|lol+|hmm+)\s*$/,
+  /^[\s\p{P}\p{S}]*$/u,
+];
+const SKIP_REASONS = {
+  noChunks: "该任务没有对话内容，已自动跳过。",
+} as const;
+/**
+ * Asynchronous task-level processor.
+ *
+ * After each ingestion batch, checks whether the current conversation
+ * constitutes a "new task" compared to the previous one. If so:
+ *   1. Finalizes the previous task (generates a detailed summary).
+ *   2. Creates a new active task for incoming chunks.
+ *
+ * Task boundary detection:
+ *   - Session change → always new task
+ *   - Time gap > 2h → always new task
+ *   - LLM judges whether new user message starts a different topic
+ */
+export class TaskProcessor {
+  private summarizer: Summarizer;
+  private processing = false;
+  constructor(
+    private store: SqliteStore,
+    private ctx: PluginContext,
+  ) {
+    this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log);
+  }
+  /**
+   * Called after new chunks are ingested.
+   * Determines if a new task boundary was crossed and handles transition.
+   */
+  async onChunksIngested(sessionKey: string, latestTimestamp: number): Promise<void> {
+    this.ctx.log.debug(`TaskProcessor.onChunksIngested called session=${sessionKey} ts=${latestTimestamp} processing=${this.processing}`);
+    if (this.processing) {
+      this.ctx.log.debug("TaskProcessor.onChunksIngested skipped — already processing");
+      return;
+    }
+    this.processing = true;
+    try {
+      await this.detectAndProcess(sessionKey, latestTimestamp);
+    } catch (err) {
+      this.ctx.log.error(`TaskProcessor error: ${err}`);
+    } finally {
+      this.processing = false;
+    }
+  }
+  private async detectAndProcess(sessionKey: string, latestTimestamp: number): Promise<void> {
+    this.ctx.log.debug(`TaskProcessor.detectAndProcess session=${sessionKey}`);
+    // Finalize any active tasks from OTHER sessions (session change = task boundary)
+    const allActive = this.store.getAllActiveTasks();
+    for (const t of allActive) {
+      if (t.sessionKey !== sessionKey) {
+        this.ctx.log.info(`Session changed: finalizing task=${t.id} from session=${t.sessionKey}`);
+        await this.finalizeTask(t);
+      }
+    }
+    const activeTask = this.store.getActiveTask(sessionKey);
+    this.ctx.log.debug(`TaskProcessor.detectAndProcess activeTask=${activeTask?.id ?? "none"}`);
+    if (!activeTask) {
+      await this.createNewTask(sessionKey, latestTimestamp);
+      return;
+    }
+    const isNewTask = await this.isTaskBoundary(activeTask, sessionKey, latestTimestamp);
+    if (isNewTask) {
+      await this.finalizeTask(activeTask);
+      await this.createNewTask(sessionKey, latestTimestamp);
+    } else {
+      this.assignUnassignedChunks(sessionKey, activeTask.id);
+      this.store.updateTask(activeTask.id, { endedAt: undefined });
+    }
+  }
+  private async isTaskBoundary(activeTask: Task, sessionKey: string, latestTimestamp: number): Promise<boolean> {
+    if (activeTask.sessionKey !== sessionKey) return true;
+    const chunks = this.store.getChunksByTask(activeTask.id);
+    if (chunks.length === 0) return false;
+    const lastChunkTs = Math.max(...chunks.map((c) => c.createdAt));
+    const gap = latestTimestamp - lastChunkTs;
+    // Hard timeout: always split after 2h regardless of topic
+    if (gap > DEFAULTS.taskIdleTimeoutMs) {
+      this.ctx.log.info(
+        `Task boundary: time gap ${Math.round(gap / 60000)}min > ${Math.round(DEFAULTS.taskIdleTimeoutMs / 60000)}min`,
+      );
+      return true;
+    }
+    // LLM topic judgment: build context from existing task and compare with new message
+    const newUserChunks = this.store.getUnassignedChunks(sessionKey).filter((c) => c.role === "user");
+    if (newUserChunks.length === 0) return false;
+    const existingUserChunks = chunks.filter((c) => c.role === "user");
+    if (existingUserChunks.length === 0) return false;
+    const currentContext = this.buildContextSummary(chunks);
+    const newMessage = newUserChunks.map((c) => c.content).join("\n");
+    const isNew = await this.summarizer.judgeNewTopic(currentContext, newMessage);
+    if (isNew === null) {
+      this.ctx.log.debug("Topic judge unavailable (no LLM configured), keeping current task");
+      return false;
+    }
+    if (isNew) {
+      this.ctx.log.info(`Task boundary: LLM judged new topic. New message: "${newMessage.slice(0, 80)}..."`);
+    } else {
+      this.ctx.log.debug(`LLM judged SAME topic, continuing task=${activeTask.id}`);
+    }
+    return isNew;
+  }
+  /**
+   * Build a concise context string from existing task chunks for the LLM topic judge.
+   * Takes recent user/assistant summaries to keep token usage low.
+   */
+  private buildContextSummary(chunks: Chunk[]): string {
+    const relevant = chunks
+      .filter((c) => c.role === "user" || c.role === "assistant")
+      .slice(-6);
+    return relevant
+      .map((c) => `[${c.role === "user" ? "User" : "Assistant"}]: ${c.summary || c.content.slice(0, 150)}`)
+      .join("\n");
+  }
+  private async createNewTask(sessionKey: string, timestamp: number): Promise<void> {
+    const taskId = uuid();
+    const task: Task = {
+      id: taskId,
+      sessionKey,
+      title: "",
+      summary: "",
+      status: "active",
+      startedAt: timestamp,
+      endedAt: null,
+      updatedAt: timestamp,
+    };
+    this.store.insertTask(task);
+    this.assignUnassignedChunks(sessionKey, taskId);
+    this.ctx.log.info(`Created new task=${taskId} session=${sessionKey}`);
+  }
+  private assignUnassignedChunks(sessionKey: string, taskId: string): void {
+    const unassigned = this.store.getUnassignedChunks(sessionKey);
+    for (const chunk of unassigned) {
+      this.store.setChunkTaskId(chunk.id, taskId);
+    }
+    if (unassigned.length > 0) {
+      this.ctx.log.debug(`Assigned ${unassigned.length} chunks to task=${taskId}`);
+    }
+  }
+  async finalizeTask(task: Task): Promise<void> {
+    const chunks = this.store.getChunksByTask(task.id);
+    const fallbackTitle = chunks.length > 0 ? this.extractTitle(chunks) : "";
+    if (chunks.length === 0) {
+      this.ctx.log.info(`Task ${task.id} skipped: no chunks`);
+      this.store.updateTask(task.id, { title: fallbackTitle, summary: SKIP_REASONS.noChunks, status: "skipped", endedAt: Date.now() });
+      return;
+    }
+    const skipReason = this.shouldSkipSummary(chunks);
+    if (skipReason) {
+      this.ctx.log.info(`Task ${task.id} skipped: ${skipReason} (chunks=${chunks.length}, title="${fallbackTitle}")`);
+      const reason = this.humanReadableSkipReason(skipReason, chunks);
+      this.store.updateTask(task.id, { title: fallbackTitle, summary: reason, status: "skipped", endedAt: Date.now() });
+      return;
+    }
+    const conversationText = this.buildConversationText(chunks);
+    let summary: string;
+    try {
+      summary = await this.summarizer.summarizeTask(conversationText);
+    } catch (err) {
+      this.ctx.log.warn(`Task summary generation failed for task=${task.id}: ${err}`);
+      summary = this.fallbackSummary(chunks);
+    }
+    const { title: llmTitle, body } = this.parseTitleFromSummary(summary);
+    const title = llmTitle || fallbackTitle;
+    this.store.updateTask(task.id, {
+      title,
+      summary: body,
+      status: "completed",
+      endedAt: Date.now(),
+    });
+    this.ctx.log.info(
+      `Finalized task=${task.id} title="${title}" chunks=${chunks.length} summaryLen=${body.length}`,
+    );
+  }
+  /**
+   * Determine if a task is too trivial to warrant an LLM summary call.
+   * Returns a skip reason string, or null if summary should proceed.
+   *
+   * Skip conditions (any one triggers skip):
+   *  1. Total chunks < 4 — too few messages to form a meaningful task
+   *  2. Real conversation turns < 2 — no back-and-forth dialogue
+   *  3. No user messages — purely system/tool generated, no user intent
+   *  4. Total content < 200 chars — not enough substance
+   *  5. User content is trivial/test data — "hello", "test", "ok" etc.
+   *  6. All messages are tool results — automated output, no conversation
+   *  7. High content repetition — user repeated the same thing (debug loops)
+   */
+  private shouldSkipSummary(chunks: Chunk[]): string | null {
+    const userChunks = chunks.filter((c) => c.role === "user");
+    const assistantChunks = chunks.filter((c) => c.role === "assistant");
+    const toolChunks = chunks.filter((c) => c.role === "tool");
+    // 1. Too few chunks
+    if (chunks.length < 4) {
+      return `too few chunks (${chunks.length} < 4 minimum)`;
+    }
+    // 2. Not enough real conversation turns (need at least 2 user-assistant exchanges)
+    const turns = Math.min(userChunks.length, assistantChunks.length);
+    if (turns < 2) {
+      return `too few conversation turns (${turns} < 2 minimum)`;
+    }
+    // 3. No user messages at all — purely automated
+    if (userChunks.length === 0) {
+      return "no user messages — task appears to be automated/system-generated";
+    }
+    // 4. Total content too short
+    // CJK characters carry more info per char, so use a lower threshold
+    const totalContentLen = chunks.reduce((sum, c) => sum + c.content.length, 0);
+    const hasCJK = /[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(
+      userChunks[0]?.content ?? "",
+    );
+    const minContentLen = hasCJK ? 80 : 200;
+    if (totalContentLen < minContentLen) {
+      return `content too short (${totalContentLen} chars < ${minContentLen} minimum)`;
+    }
+    // 5. User content is trivial/test data
+    const userContent = userChunks.map((c) => c.content).join("\n");
+    if (this.looksLikeTrivialContent(userContent)) {
+      return "user content appears to be test/trivial data";
+    }
+    // 6. Assistant content is also trivial (both sides are low-value)
+    const assistantContent = assistantChunks.map((c) => c.content).join("\n");
+    if (this.looksLikeTrivialContent(userContent + "\n" + assistantContent)) {
+      return "conversation content (both user and assistant) appears trivial";
+    }
+    // 7. Almost all messages are tool results with minimal user interaction
+    if (toolChunks.length > 0 && toolChunks.length >= chunks.length * 0.7 && userChunks.length <= 1) {
+      return `dominated by tool results (${toolChunks.length}/${chunks.length} chunks) with minimal user input`;
+    }
+    // 8. High repetition — user keeps saying the same thing
+    if (userChunks.length >= 3) {
+      const uniqueUserMsgs = new Set(userChunks.map((c) => c.content.trim().toLowerCase()));
+      const uniqueRatio = uniqueUserMsgs.size / userChunks.length;
+      if (uniqueRatio < 0.4) {
+        return `high content repetition (${uniqueUserMsgs.size} unique out of ${userChunks.length} user messages)`;
+      }
+    }
+    return null;
+  }
+  private looksLikeTrivialContent(text: string): boolean {
+    const lines = text.toLowerCase().split(/\n/).map((l) => l.trim()).filter(Boolean);
+    if (lines.length === 0) return true;
+    const trivialCount = lines.filter((line) => {
+      if (line.length < 5) return true;
+      if (TRIVIAL_PATTERNS.some((p) => p.test(line))) return true;
+      return false;
+    }).length;
+    return trivialCount / lines.length > 0.7;
+  }
+  private buildConversationText(chunks: Chunk[]): string {
+    const lines: string[] = [];
+    for (const c of chunks) {
+      const roleLabel = c.role === "user" ? "User" : c.role === "assistant" ? "Assistant" : c.role;
+      lines.push(`[${roleLabel}]: ${c.content}`);
+    }
+    return lines.join("\n\n");
+  }
+  /**
+   * Extract the LLM-generated title from the summary output.
+   * The LLM is prompted to output "📌 Title\n<title text>" as the first section.
+   * Returns the title and the remaining body (with the title section stripped).
+   */
+  private parseTitleFromSummary(summary: string): { title: string; body: string } {
+    const titleMatch = summary.match(/📌\s*(?:Title|标题)\s*\n(.+)/);
+    if (titleMatch) {
+      const title = titleMatch[1].trim().slice(0, 80);
+      const body = summary.replace(/📌\s*(?:Title|标题)\s*\n.+\n?/, "").trim();
+      return { title, body };
+    }
+    return { title: "", body: summary };
+  }
+  private extractTitle(chunks: Chunk[]): string {
+    const firstUser = chunks.find((c) => c.role === "user");
+    if (!firstUser) return "Untitled Task";
+    const text = firstUser.content.trim();
+    if (text.length <= 60) return text;
+    return text.slice(0, 57) + "...";
+  }
+  private humanReadableSkipReason(reason: string, chunks: Chunk[]): string {
+    const userCount = chunks.filter((c) => c.role === "user").length;
+    const assistantCount = chunks.filter((c) => c.role === "assistant").length;
+    if (reason.includes("too few chunks")) {
+      return `对话内容过少（${chunks.length} 条消息），不足以生成有效摘要。至少需要 4 条消息。`;
+    }
+    if (reason.includes("too few conversation turns")) {
+      return `对话轮次不足（${Math.min(userCount, assistantCount)} 轮），需要至少 2 轮完整的问答交互才能生成摘要。`;
+    }
+    if (reason.includes("no user messages")) {
+      return "该任务没有用户消息，仅包含系统或工具自动生成的内容。";
+    }
+    if (reason.includes("content too short")) {
+      return "对话内容过短，信息量不足以生成有意义的摘要。";
+    }
+    if (reason.includes("trivial")) {
+      return "对话内容为简单问候或测试数据（如 hello、test、ok），无需生成摘要。";
+    }
+    if (reason.includes("tool results")) {
+      return "该任务主要由工具执行结果组成，缺少足够的用户交互内容。";
+    }
+    if (reason.includes("repetition")) {
+      return "对话中存在大量重复内容，无法提取有效信息生成摘要。";
+    }
+    return `对话未达到生成摘要的条件：${reason}`;
+  }
+  private fallbackSummary(chunks: Chunk[]): string {
+    const title = this.extractTitle(chunks);
+    const summaries = chunks
+      .filter((c) => c.summary)
+      .map((c) => `- ${c.summary}`);
+    const lines = [
+      `🎯 Goal`,
+      title,
+      ``,
+      `📋 Key Steps`,
+      ...summaries.slice(0, 20),
+    ];
+    return lines.join("\n");
+  }
+}

package/src/ingest/worker.ts CHANGED Viewed

@@ -1,13 +1,15 @@
 import { v4 as uuid } from "uuid";
+import { createHash } from "crypto";
 import type { ConversationMessage, Chunk, PluginContext } from "../types";
 import type { SqliteStore } from "../storage/sqlite";
 import type { Embedder } from "../embedding";
 import { Summarizer } from "./providers";
-import { chunkText } from "./chunker";
 import { findDuplicate } from "./dedup";
+import { TaskProcessor } from "./task-processor";
 export class IngestWorker {
   private summarizer: Summarizer;
+  private taskProcessor: TaskProcessor;
   private queue: ConversationMessage[] = [];
   private processing = false;
   private flushResolvers: Array<() => void> = [];
@@ -18,6 +20,7 @@ export class IngestWorker {
     private ctx: PluginContext,
   ) {
     this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log);
+    this.taskProcessor = new TaskProcessor(store, ctx);
   }
   enqueue(messages: ConversationMessage[]): void {
@@ -41,38 +44,40 @@ export class IngestWorker {
   private async processQueue(): Promise<void> {
     this.processing = true;
+    let lastSessionKey: string | undefined;
+    let lastTimestamp = 0;
     while (this.queue.length > 0) {
       const msg = this.queue.shift()!;
       try {
         await this.ingestMessage(msg);
+        lastSessionKey = msg.sessionKey;
+        lastTimestamp = Math.max(lastTimestamp, msg.timestamp);
       } catch (err) {
         this.ctx.log.error(`Failed to ingest message turn=${msg.turnId}: ${err}`);
       }
     }
+    if (lastSessionKey) {
+      this.ctx.log.debug(`Calling TaskProcessor.onChunksIngested session=${lastSessionKey} ts=${lastTimestamp}`);
+      this.taskProcessor
+        .onChunksIngested(lastSessionKey, lastTimestamp)
+        .catch((err) => this.ctx.log.error(`TaskProcessor post-ingest error: ${err}`));
+    }
     this.processing = false;
     for (const resolve of this.flushResolvers) resolve();
     this.flushResolvers = [];
   }
   private async ingestMessage(msg: ConversationMessage): Promise<void> {
-    if (msg.role === "tool") {
-      await this.ingestToolResult(msg);
+    if (this.store.chunkExistsByContent(msg.sessionKey, msg.role, msg.content)) {
+      this.ctx.log.debug(`Skipping duplicate message: session=${msg.sessionKey} role=${msg.role} len=${msg.content.length}`);
       return;
     }
-    const rawChunks = chunkText(msg.content);
-    this.ctx.log.debug(`Chunked turn=${msg.turnId} into ${rawChunks.length} chunks`);
-    for (let seq = 0; seq < rawChunks.length; seq++) {
-      const raw = rawChunks[seq];
-      await this.storeChunk(msg, raw.content, raw.kind, seq);
-    }
-  }
-  private async ingestToolResult(msg: ConversationMessage): Promise<void> {
-    this.ctx.log.debug(`Ingesting tool result turn=${msg.turnId} tool=${msg.toolName ?? "unknown"} len=${msg.content.length}`);
-    await this.storeChunk(msg, msg.content, "tool_result", 0);
+    const kind = msg.role === "tool" ? "tool_result" : "paragraph";
+    await this.storeChunk(msg, msg.content, kind, 0);
   }
   private async storeChunk(
@@ -117,6 +122,7 @@ export class IngestWorker {
       kind,
       summary,
       embedding: null,
+      taskId: null,
       createdAt: msg.timestamp,
       updatedAt: msg.timestamp,
     };

package/src/recall/engine.ts CHANGED Viewed

@@ -10,6 +10,7 @@ export interface RecallOptions {
   query?: string;
   maxResults?: number;
   minScore?: number;
+  role?: string;
 }
 const MAX_RECENT_QUERIES = 20;
@@ -31,6 +32,7 @@ export class RecallEngine {
     );
     const minScore = opts.minScore ?? recallCfg.minScoreDefault!;
     const query = opts.query ?? "";
+    const roleFilter = opts.role;
     const repeatNote = this.checkRepeat(query, maxResults, minScore);
     const candidatePool = maxResults * 5;
@@ -82,24 +84,31 @@ export class RecallEngine {
     });
     const decayed = applyRecencyDecay(withTs, recallCfg.recencyHalfLifeDays);
-    // Step 5: Normalize scores to [0,1]
-    const maxScore = Math.max(...decayed.map((d) => d.score), 1e-10);
-    const normalized = decayed.map((d) => ({
+    // Step 5: Apply relative threshold on raw scores, then normalize to [0,1]
+    const sorted = [...decayed].sort((a, b) => b.score - a.score);
+    const topScore = sorted.length > 0 ? sorted[0].score : 0;
+    const absoluteFloor = topScore * minScore * 0.3;
+    // When role filter is active, keep a larger pool before slicing so we don't
+    // discard target-role candidates that rank below non-target ones.
+    const preSliceLimit = roleFilter ? maxResults * 5 : maxResults;
+    const filtered = sorted
+      .filter((d) => d.score >= absoluteFloor)
+      .slice(0, preSliceLimit);
+    const displayMax = filtered.length > 0 ? filtered[0].score : 1;
+    const normalized = filtered.map((d) => ({
       ...d,
-      score: d.score / maxScore,
+      score: d.score / displayMax,
     }));
-    // Step 6: Filter by minScore and limit
-    const filtered = normalized
-      .filter((d) => d.score >= minScore)
-      .sort((a, b) => b.score - a.score)
-      .slice(0, maxResults);
-    // Step 7: Build hits
+    // Step 6: Build hits (with optional role filter), applying maxResults cap at the end
     const hits: SearchHit[] = [];
-    for (const candidate of filtered) {
+    for (const candidate of normalized) {
+      if (hits.length >= maxResults) break;
       const chunk = this.store.getChunk(candidate.id);
       if (!chunk) continue;
+      if (roleFilter && chunk.role !== roleFilter) continue;
       hits.push({
         summary: chunk.summary,
@@ -111,6 +120,7 @@ export class RecallEngine {
           seq: chunk.seq,
         },
         score: Math.round(candidate.score * 1000) / 1000,
+        taskId: chunk.taskId,
         source: {
           ts: chunk.createdAt,
           role: chunk.role,

package/src/recall/mmr.ts CHANGED Viewed

@@ -53,7 +53,9 @@ export function mmrRerank(
     }
     const chosen = remaining.splice(bestIdx, 1)[0];
-    selected.push({ id: chosen.id, score: bestMmr });
+    // Preserve original RRF score for downstream filtering;
+    // MMR only determines selection order, not the score value.
+    selected.push({ id: chosen.id, score: chosen.score });
   }
   return selected;