@memtensor/memos-local-openclaw-plugin 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.env.example +13 -5
  2. package/README.md +283 -91
  3. package/dist/capture/index.d.ts +5 -7
  4. package/dist/capture/index.d.ts.map +1 -1
  5. package/dist/capture/index.js +72 -43
  6. package/dist/capture/index.js.map +1 -1
  7. package/dist/ingest/dedup.d.ts +8 -0
  8. package/dist/ingest/dedup.d.ts.map +1 -1
  9. package/dist/ingest/dedup.js +21 -0
  10. package/dist/ingest/dedup.js.map +1 -1
  11. package/dist/ingest/providers/anthropic.d.ts +16 -0
  12. package/dist/ingest/providers/anthropic.d.ts.map +1 -1
  13. package/dist/ingest/providers/anthropic.js +214 -1
  14. package/dist/ingest/providers/anthropic.js.map +1 -1
  15. package/dist/ingest/providers/bedrock.d.ts +16 -5
  16. package/dist/ingest/providers/bedrock.d.ts.map +1 -1
  17. package/dist/ingest/providers/bedrock.js +210 -6
  18. package/dist/ingest/providers/bedrock.js.map +1 -1
  19. package/dist/ingest/providers/gemini.d.ts +16 -0
  20. package/dist/ingest/providers/gemini.d.ts.map +1 -1
  21. package/dist/ingest/providers/gemini.js +202 -1
  22. package/dist/ingest/providers/gemini.js.map +1 -1
  23. package/dist/ingest/providers/index.d.ts +31 -0
  24. package/dist/ingest/providers/index.d.ts.map +1 -1
  25. package/dist/ingest/providers/index.js +134 -4
  26. package/dist/ingest/providers/index.js.map +1 -1
  27. package/dist/ingest/providers/openai.d.ts +24 -0
  28. package/dist/ingest/providers/openai.d.ts.map +1 -1
  29. package/dist/ingest/providers/openai.js +255 -1
  30. package/dist/ingest/providers/openai.js.map +1 -1
  31. package/dist/ingest/task-processor.d.ts +65 -0
  32. package/dist/ingest/task-processor.d.ts.map +1 -0
  33. package/dist/ingest/task-processor.js +354 -0
  34. package/dist/ingest/task-processor.js.map +1 -0
  35. package/dist/ingest/worker.d.ts +3 -1
  36. package/dist/ingest/worker.d.ts.map +1 -1
  37. package/dist/ingest/worker.js +131 -23
  38. package/dist/ingest/worker.js.map +1 -1
  39. package/dist/recall/engine.d.ts +1 -0
  40. package/dist/recall/engine.d.ts.map +1 -1
  41. package/dist/recall/engine.js +22 -11
  42. package/dist/recall/engine.js.map +1 -1
  43. package/dist/recall/mmr.d.ts.map +1 -1
  44. package/dist/recall/mmr.js +3 -1
  45. package/dist/recall/mmr.js.map +1 -1
  46. package/dist/skill/bundled-memory-guide.d.ts +6 -0
  47. package/dist/skill/bundled-memory-guide.d.ts.map +1 -0
  48. package/dist/skill/bundled-memory-guide.js +95 -0
  49. package/dist/skill/bundled-memory-guide.js.map +1 -0
  50. package/dist/skill/evaluator.d.ts +31 -0
  51. package/dist/skill/evaluator.d.ts.map +1 -0
  52. package/dist/skill/evaluator.js +194 -0
  53. package/dist/skill/evaluator.js.map +1 -0
  54. package/dist/skill/evolver.d.ts +22 -0
  55. package/dist/skill/evolver.d.ts.map +1 -0
  56. package/dist/skill/evolver.js +193 -0
  57. package/dist/skill/evolver.js.map +1 -0
  58. package/dist/skill/generator.d.ts +25 -0
  59. package/dist/skill/generator.d.ts.map +1 -0
  60. package/dist/skill/generator.js +477 -0
  61. package/dist/skill/generator.js.map +1 -0
  62. package/dist/skill/installer.d.ts +16 -0
  63. package/dist/skill/installer.d.ts.map +1 -0
  64. package/dist/skill/installer.js +89 -0
  65. package/dist/skill/installer.js.map +1 -0
  66. package/dist/skill/upgrader.d.ts +19 -0
  67. package/dist/skill/upgrader.d.ts.map +1 -0
  68. package/dist/skill/upgrader.js +263 -0
  69. package/dist/skill/upgrader.js.map +1 -0
  70. package/dist/skill/validator.d.ts +29 -0
  71. package/dist/skill/validator.d.ts.map +1 -0
  72. package/dist/skill/validator.js +227 -0
  73. package/dist/skill/validator.js.map +1 -0
  74. package/dist/storage/sqlite.d.ts +141 -1
  75. package/dist/storage/sqlite.d.ts.map +1 -1
  76. package/dist/storage/sqlite.js +664 -7
  77. package/dist/storage/sqlite.js.map +1 -1
  78. package/dist/types.d.ts +93 -0
  79. package/dist/types.d.ts.map +1 -1
  80. package/dist/types.js +8 -0
  81. package/dist/types.js.map +1 -1
  82. package/dist/viewer/html.d.ts +1 -1
  83. package/dist/viewer/html.d.ts.map +1 -1
  84. package/dist/viewer/html.js +2391 -159
  85. package/dist/viewer/html.js.map +1 -1
  86. package/dist/viewer/server.d.ts +16 -0
  87. package/dist/viewer/server.d.ts.map +1 -1
  88. package/dist/viewer/server.js +346 -3
  89. package/dist/viewer/server.js.map +1 -1
  90. package/index.ts +572 -89
  91. package/openclaw.plugin.json +20 -45
  92. package/package.json +3 -4
  93. package/skill/memos-memory-guide/SKILL.md +86 -0
  94. package/src/capture/index.ts +85 -45
  95. package/src/ingest/dedup.ts +29 -0
  96. package/src/ingest/providers/anthropic.ts +258 -1
  97. package/src/ingest/providers/bedrock.ts +256 -6
  98. package/src/ingest/providers/gemini.ts +252 -1
  99. package/src/ingest/providers/index.ts +156 -8
  100. package/src/ingest/providers/openai.ts +304 -1
  101. package/src/ingest/task-processor.ts +396 -0
  102. package/src/ingest/worker.ts +145 -34
  103. package/src/recall/engine.ts +23 -12
  104. package/src/recall/mmr.ts +3 -1
  105. package/src/skill/bundled-memory-guide.ts +91 -0
  106. package/src/skill/evaluator.ts +220 -0
  107. package/src/skill/evolver.ts +169 -0
  108. package/src/skill/generator.ts +506 -0
  109. package/src/skill/installer.ts +59 -0
  110. package/src/skill/upgrader.ts +257 -0
  111. package/src/skill/validator.ts +227 -0
  112. package/src/storage/sqlite.ts +802 -7
  113. package/src/types.ts +96 -0
  114. package/src/viewer/html.ts +2391 -159
  115. package/src/viewer/server.ts +346 -3
  116. package/SKILL.md +0 -43
  117. package/www/index.html +0 -632
@@ -1,57 +1,32 @@
1
1
  {
2
2
  "id": "memos-local",
3
+ "name": "MemOS Local Memory",
4
+ "description": "Full-write local conversation memory with hybrid search (RRF + MMR + recency). Provides memory_search, memory_get, task_summary, memory_timeline, memory_viewer for layered retrieval.",
3
5
  "kind": "memory",
6
+ "version": "0.1.4",
7
+ "homepage": "https://github.com/MemTensor/MemOS/tree/main/apps/memos-local-openclaw",
4
8
  "configSchema": {
5
9
  "type": "object",
6
10
  "additionalProperties": true,
11
+ "description": "Configuration for MemOS Local Memory. Use Raw mode to edit embedding/summarizer settings.",
7
12
  "properties": {
8
- "embedding": {
9
- "type": "object",
10
- "properties": {
11
- "provider": { "type": "string" },
12
- "endpoint": { "type": "string" },
13
- "apiKey": { "type": "string" },
14
- "model": { "type": "string" }
15
- }
16
- },
17
- "summarizer": {
18
- "type": "object",
19
- "properties": {
20
- "provider": { "type": "string" },
21
- "endpoint": { "type": "string" },
22
- "apiKey": { "type": "string" },
23
- "model": { "type": "string" },
24
- "temperature": { "type": "number" }
25
- }
13
+ "viewerPort": {
14
+ "type": "number",
15
+ "description": "Memory Viewer HTTP port (default 18799)"
26
16
  }
27
17
  }
28
18
  },
29
- "uiHints": {
30
- "embedding.endpoint": {
31
- "label": "Embedding Endpoint",
32
- "placeholder": "https://api.openai.com/v1",
33
- "help": "OpenAI-compatible embedding API base URL"
34
- },
35
- "embedding.apiKey": {
36
- "label": "Embedding API Key",
37
- "sensitive": true,
38
- "help": "API key for embedding service (or use ${ENV_VAR})"
39
- },
40
- "embedding.model": {
41
- "label": "Embedding Model",
42
- "placeholder": "bge-m3"
43
- },
44
- "summarizer.endpoint": {
45
- "label": "Summarizer Endpoint",
46
- "placeholder": "https://api.openai.com/v1"
47
- },
48
- "summarizer.apiKey": {
49
- "label": "Summarizer API Key",
50
- "sensitive": true
51
- },
52
- "summarizer.model": {
53
- "label": "Summarizer Model",
54
- "placeholder": "gpt-4o-mini"
55
- }
19
+ "requirements": {
20
+ "node": ">=18.0.0",
21
+ "openclaw": ">=2026.2.0"
22
+ },
23
+ "setup": {
24
+ "postInstall": "npm install --omit=dev",
25
+ "notes": [
26
+ "After install, add to ~/.openclaw/openclaw.json: plugins.slots.memory = \"memos-local\"",
27
+ "Set agents.defaults.memorySearch.enabled = false to disable OpenClaw's built-in memory",
28
+ "Restart the gateway: openclaw gateway stop && openclaw gateway start",
29
+ "Memory Viewer will be available at http://127.0.0.1:18799"
30
+ ]
56
31
  }
57
32
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@memtensor/memos-local-openclaw-plugin",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "MemOS Local memory plugin for OpenClaw — full-write, hybrid-recall, progressive retrieval",
5
5
  "type": "module",
6
6
  "main": "index.ts",
@@ -9,11 +9,10 @@
9
9
  "index.ts",
10
10
  "src",
11
11
  "dist",
12
+ "skill",
12
13
  "openclaw.plugin.json",
13
- "SKILL.md",
14
14
  "README.md",
15
- ".env.example",
16
- "www"
15
+ ".env.example"
17
16
  ],
18
17
  "openclaw": {
19
18
  "extensions": [
@@ -0,0 +1,86 @@
1
+ ---
2
+ name: memos-memory-guide
3
+ description: Use the MemOS local memory system to search and use the user's past conversations. Use this skill whenever the user refers to past chats, their own preferences or history, or when you need to answer from prior context. When auto-recall returns nothing (long or unclear user query), generate your own short search query and call memory_search. Use task_summary when you need full task context, skill_get for experience guides, and memory_timeline to expand around a memory hit.
4
+ ---
5
+
6
+ # MemOS Local Memory — Agent Guide
7
+
8
+ This skill describes how to use the MemOS memory tools so you can reliably search and use the user's long-term conversation history.
9
+
10
+ ## How memory is provided each turn
11
+
12
+ - **Automatic recall (hook):** At the start of each turn, the system runs a memory search using the user's current message and injects relevant past memories into your context. You do not need to call any tool for that.
13
+ - **When that is not enough:** If the user's message is very long, vague, or the automatic search returns **no memories**, you should **generate your own short, focused query** and call `memory_search` yourself. For example:
14
+ - User sent a long paragraph → extract 1–2 key topics or a short question and search with that.
15
+ - Auto-recall said "no memories" or you see no memory block → call `memory_search` with a query you derive (e.g. the user's name, a topic they often mention, or a rephrased question).
16
+ - **When you need more detail:** Search results only give excerpts and IDs. Use the tools below to fetch full task context, skill content, or surrounding messages.
17
+
18
+ ## Tools — what they do and when to call
19
+
20
+ ### memory_search
21
+
22
+ - **What it does:** Searches the user's stored conversation memory by a natural-language query. Returns a list of relevant excerpts with `chunkId` and optionally `task_id`.
23
+ - **When to call:**
24
+ - The automatic recall did not run or returned nothing (e.g. no `<memory_context>` block, or a note that no memories were found).
25
+ - The user's query is long or unclear — **generate a short query yourself** (keywords, rephrased question, or a clear sub-question) and call `memory_search(query="...")`.
26
+ - You need to search with a different angle (e.g. filter by `role='user'` to find what the user said, or use a more specific query).
27
+ - **Parameters:** `query` (required), optional `minScore`, `role` (e.g. `"user"`).
28
+ - **Output:** List of items with role, excerpt, `chunkId`, and sometimes `task_id`. Use those IDs with the tools below when you need more context.
29
+
30
+ ### task_summary
31
+
32
+ - **What it does:** Returns the full task summary for a given `task_id`: title, status, and the complete narrative summary of that conversation task (steps, decisions, URLs, commands, etc.).
33
+ - **When to call:** A `memory_search` hit included a `task_id` and you need the full story of that task (e.g. what was done, what the user decided, what failed or succeeded).
34
+ - **Parameters:** `taskId` (from a search hit).
35
+ - **Effect:** You get one coherent summary of the whole task instead of isolated excerpts.
36
+
37
+ ### skill_get
38
+
39
+ - **What it does:** Returns the content of a learned skill (experience guide) by `skillId` or by `taskId`. If you pass `taskId`, the system finds the skill linked to that task.
40
+ - **When to call:** A search hit has a `task_id` and the task is the kind that has a “how to do this again” guide (e.g. a workflow the user has run before). Use this to follow the same approach or reuse steps.
41
+ - **Parameters:** `skillId` (direct) or `taskId` (lookup).
42
+ - **Effect:** You receive the full SKILL.md-style guide. You can then call `skill_install(skillId)` if the user or you want that skill loaded for future turns.
43
+
44
+ ### skill_install
45
+
46
+ - **What it does:** Installs a skill (by `skillId`) into the workspace so it is loaded in future sessions.
47
+ - **When to call:** After `skill_get` when the skill is useful for ongoing use (e.g. the user’s recurring workflow). Optional; only when you want the skill to be permanently available.
48
+ - **Parameters:** `skillId`.
49
+
50
+ ### memory_timeline
51
+
52
+ - **What it does:** Expands context around a single memory chunk: returns the surrounding conversation messages (±N turns) so you see what was said before and after that excerpt.
53
+ - **When to call:** A `memory_search` hit is relevant but you need the surrounding dialogue (e.g. who said what next, or the exact follow-up question).
54
+ - **Parameters:** `chunkId` (from a search hit), optional `window` (default 2).
55
+ - **Effect:** You get a short, linear slice of the conversation around that chunk.
56
+
57
+ ### memory_viewer
58
+
59
+ - **What it does:** Returns the URL of the MemOS Memory Viewer (web UI) where the user can browse, search, and manage their memories.
60
+ - **When to call:** The user asks how to view their memories, open the memory dashboard, or manage stored data.
61
+ - **Parameters:** None.
62
+ - **Effect:** You can tell the user to open that URL in a browser.
63
+
64
+ ## Quick decision flow
65
+
66
+ 1. **No memories in context or auto-recall reported nothing**
67
+ → Call `memory_search` with a **self-generated short query** (e.g. key topic or rephrased question).
68
+
69
+ 2. **Search returned hits with `task_id` and you need full context**
70
+ → Call `task_summary(taskId)`.
71
+
72
+ 3. **Task has an experience guide you want to follow**
73
+ → Call `skill_get(taskId=...)` (or `skill_get(skillId=...)` if you have the id). Optionally `skill_install(skillId)` for future use.
74
+
75
+ 4. **You need the exact surrounding conversation of a hit**
76
+ → Call `memory_timeline(chunkId=...)`.
77
+
78
+ 5. **User asks where to see or manage their memories**
79
+ → Call `memory_viewer()` and share the URL.
80
+
81
+ ## Writing good search queries
82
+
83
+ - Prefer **short, focused** queries (a few words or one clear question).
84
+ - Use **concrete terms**: names, topics, tools, or decisions (e.g. “preferred editor”, “deploy script”, “API key setup”).
85
+ - If the user’s message is long, **derive one or two sub-queries** rather than pasting the whole message.
86
+ - Use `role='user'` when you specifically want to find what the user said (e.g. preferences, past questions).
@@ -1,5 +1,4 @@
1
1
  import type { ConversationMessage, Role, Logger } from "../types";
2
- import { DEFAULTS } from "../types";
3
2
 
4
3
  const SKIP_ROLES: Set<Role> = new Set(["system"]);
5
4
 
@@ -10,20 +9,33 @@ const SELF_TOOLS = new Set([
10
9
  "memory_viewer",
11
10
  ]);
12
11
 
12
+ // OpenClaw inbound metadata sentinels — these are AI-facing prefixes,
13
+ // not user content. Must be stripped before storing as memory.
14
+ const INBOUND_META_SENTINELS = [
15
+ "Conversation info (untrusted metadata):",
16
+ "Sender (untrusted metadata):",
17
+ "Thread starter (untrusted, for context):",
18
+ "Replied message (untrusted, for context):",
19
+ "Forwarded message context (untrusted metadata):",
20
+ "Chat history since last reply (untrusted, for context):",
21
+ ];
22
+
23
+ const SENTINEL_FAST_RE = new RegExp(
24
+ INBOUND_META_SENTINELS.map(s => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|"),
25
+ );
26
+
13
27
  /**
14
- * Filter and extract writable messages from a conversation turn.
28
+ * Extract writable messages from a conversation turn.
15
29
  *
16
- * - Keep user, assistant, and tool messages
17
- * - Skip system prompts
18
- * - Skip tool results from our own memory tools (prevents memory loop)
19
- * - Truncate long tool results to avoid storage bloat
20
- * - Strip injected evidence blocks wrapped in [STORED_MEMORY]...[/STORED_MEMORY]
30
+ * Stores the user's actual text strips only OpenClaw's injected metadata
31
+ * prefixes (Sender info, conversation context, etc.) which are not user content.
32
+ * Only skips: system prompts and our own memory tool results (prevents loop).
21
33
  */
22
34
  export function captureMessages(
23
35
  messages: Array<{ role: string; content: string; toolName?: string }>,
24
36
  sessionKey: string,
25
37
  turnId: string,
26
- evidenceTag: string,
38
+ _evidenceTag: string,
27
39
  log: Logger,
28
40
  ): ConversationMessage[] {
29
41
  const now = Date.now();
@@ -34,39 +46,24 @@ export function captureMessages(
34
46
  if (SKIP_ROLES.has(role)) continue;
35
47
  if (!msg.content || msg.content.trim().length === 0) continue;
36
48
 
37
- if (role === "tool") {
38
- if (msg.toolName && SELF_TOOLS.has(msg.toolName)) {
39
- log.debug(`Skipping self-tool result: ${msg.toolName}`);
40
- continue;
41
- }
42
-
43
- let content = msg.content.trim();
44
- const maxChars = DEFAULTS.toolResultMaxChars;
45
- if (content.length > maxChars) {
46
- content = content.slice(0, maxChars) + `\n\n[truncated — original ${content.length} chars]`;
47
- }
48
-
49
- const toolLabel = msg.toolName ? `[tool:${msg.toolName}] ` : "[tool] ";
50
- result.push({
51
- role: "tool",
52
- content: toolLabel + content,
53
- timestamp: now,
54
- turnId,
55
- sessionKey,
56
- toolName: msg.toolName,
57
- });
49
+ if (role === "tool" && msg.toolName && SELF_TOOLS.has(msg.toolName)) {
50
+ log.debug(`Skipping self-tool result: ${msg.toolName}`);
58
51
  continue;
59
52
  }
60
53
 
61
- const cleaned = stripEvidenceBlocks(msg.content, evidenceTag);
62
- if (cleaned.trim().length === 0) continue;
54
+ let content = msg.content;
55
+ if (role === "user") {
56
+ content = stripInboundMetadata(content);
57
+ }
58
+ if (!content.trim()) continue;
63
59
 
64
60
  result.push({
65
61
  role,
66
- content: cleaned,
62
+ content,
67
63
  timestamp: now,
68
64
  turnId,
69
65
  sessionKey,
66
+ toolName: role === "tool" ? msg.toolName : undefined,
70
67
  });
71
68
  }
72
69
 
@@ -74,19 +71,62 @@ export function captureMessages(
74
71
  return result;
75
72
  }
76
73
 
77
- function stripEvidenceBlocks(text: string, tag: string): string {
78
- const openTag = `[${tag}]`;
79
- const closeTag = `[/${tag}]`;
80
- let result = text;
81
- let safety = 0;
82
-
83
- while (result.includes(openTag) && result.includes(closeTag) && safety < 50) {
84
- const start = result.indexOf(openTag);
85
- const end = result.indexOf(closeTag, start);
86
- if (end === -1) break;
87
- result = result.slice(0, start) + result.slice(end + closeTag.length);
88
- safety++;
74
+ /**
75
+ * Strip OpenClaw-injected inbound metadata blocks from user messages.
76
+ *
77
+ * These blocks have the shape:
78
+ * Sender (untrusted metadata):
79
+ * ```json
80
+ * { "label": "...", "id": "..." }
81
+ * ```
82
+ *
83
+ * Also strips the envelope timestamp prefix like "[Tue 2026-03-03 21:58 GMT+8] "
84
+ */
85
+ function stripInboundMetadata(text: string): string {
86
+ if (!SENTINEL_FAST_RE.test(text)) return text;
87
+
88
+ const lines = text.split("\n");
89
+ const result: string[] = [];
90
+ let inMetaBlock = false;
91
+ let inFencedJson = false;
92
+
93
+ for (let i = 0; i < lines.length; i++) {
94
+ const line = lines[i];
95
+ const trimmed = line.trim();
96
+
97
+ if (!inMetaBlock && INBOUND_META_SENTINELS.some(s => s === trimmed)) {
98
+ if (lines[i + 1]?.trim() === "```json") {
99
+ inMetaBlock = true;
100
+ inFencedJson = false;
101
+ continue;
102
+ }
103
+ // Sentinel without fenced JSON — skip this line only
104
+ continue;
105
+ }
106
+
107
+ if (inMetaBlock) {
108
+ if (!inFencedJson && trimmed === "```json") {
109
+ inFencedJson = true;
110
+ continue;
111
+ }
112
+ if (inFencedJson && trimmed === "```") {
113
+ inMetaBlock = false;
114
+ inFencedJson = false;
115
+ continue;
116
+ }
117
+ continue;
118
+ }
119
+
120
+ result.push(line);
89
121
  }
90
122
 
91
- return result;
123
+ let cleaned = result.join("\n").trim();
124
+
125
+ // Strip envelope timestamp prefix: "[Tue 2026-03-03 21:58 GMT+8] actual message"
126
+ cleaned = cleaned.replace(
127
+ /^\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}(?::\d{2})?\s+[A-Z]{3}[+-]\d{1,2}\]\s*/,
128
+ "",
129
+ );
130
+
131
+ return cleaned;
92
132
  }
@@ -35,3 +35,32 @@ export function findDuplicate(
35
35
 
36
36
  return null;
37
37
  }
38
+
39
+ /**
40
+ * Find Top-N most similar chunks above a threshold.
41
+ * Used for smart dedup: retrieve candidates, then ask LLM to judge.
42
+ */
43
+ export function findTopSimilar(
44
+ store: SqliteStore,
45
+ newVec: number[],
46
+ threshold: number,
47
+ topN: number,
48
+ log: Logger,
49
+ ): Array<{ chunkId: string; score: number }> {
50
+ const all = store.getAllEmbeddings();
51
+ const scored: Array<{ chunkId: string; score: number }> = [];
52
+
53
+ for (const { chunkId, vector } of all) {
54
+ const sim = cosineSimilarity(newVec, vector);
55
+ if (sim >= threshold) {
56
+ scored.push({ chunkId, score: sim });
57
+ }
58
+ }
59
+
60
+ scored.sort((a, b) => b.score - a.score);
61
+ const result = scored.slice(0, topN);
62
+ if (result.length > 0) {
63
+ log.debug(`findTopSimilar: found ${result.length} candidates above ${threshold} (best=${result[0].score.toFixed(4)})`);
64
+ }
65
+ return result;
66
+ }
@@ -1,6 +1,217 @@
1
1
  import type { SummarizerConfig, Logger } from "../../types";
2
2
 
3
- const SYSTEM_PROMPT = `Summarize the text in ONE concise sentence (max 60 tokens). Preserve exact names, commands, error codes. No bullet points, no preamble — output only the sentence.`;
3
+ const SYSTEM_PROMPT = `Summarize the text in ONE concise sentence (max 120 characters). IMPORTANT: Use the SAME language as the input text — if the input is Chinese, write Chinese; if English, write English. Preserve exact names, commands, error codes. No bullet points, no preamble — output only the sentence.`;
4
+
5
+ const TASK_SUMMARY_PROMPT = `You create a DETAILED task summary from a multi-turn conversation. This summary will be the ONLY record of this conversation, so it must preserve ALL important information.
6
+
7
+ CRITICAL LANGUAGE RULE: You MUST write in the SAME language as the user's messages. Chinese input → Chinese output. English input → English output. NEVER mix languages.
8
+
9
+ Output EXACTLY this structure:
10
+
11
+ 📌 Title
12
+ A short, descriptive title (10-30 characters). Like a chat group name.
13
+
14
+ 🎯 Goal
15
+ One sentence: what the user wanted to accomplish.
16
+
17
+ 📋 Key Steps
18
+ - Describe each meaningful step in detail
19
+ - Include the ACTUAL content produced: code snippets, commands, config blocks, formulas, key paragraphs
20
+ - For code: include the function signature and core logic (up to ~30 lines per block), use fenced code blocks
21
+ - For configs: include the actual config values and structure
22
+ - For lists/instructions: include the actual items, not just "provided a list"
23
+ - Merge only truly trivial back-and-forth (like "ok" / "sure")
24
+ - Do NOT over-summarize: "provided a function" is BAD; show the actual function
25
+
26
+ ✅ Result
27
+ What was the final outcome? Include the final version of any code/config/content produced.
28
+
29
+ 💡 Key Details
30
+ - Decisions made, trade-offs discussed, caveats noted, alternative approaches mentioned
31
+ - Specific values: numbers, versions, thresholds, URLs, file paths, model names
32
+ - Omit this section only if there truly are no noteworthy details
33
+
34
+ RULES:
35
+ - This summary is a KNOWLEDGE BASE ENTRY, not a brief note. Be thorough.
36
+ - PRESERVE verbatim: code, commands, URLs, file paths, error messages, config values, version numbers, names, amounts
37
+ - DISCARD only: greetings, filler, the assistant explaining what it will do before doing it
38
+ - Replace secrets (API keys, tokens, passwords) with [REDACTED]
39
+ - Target length: 30-50% of the original conversation length. Longer conversations need longer summaries.
40
+ - Output summary only, no preamble.`;
41
+
42
+ export async function summarizeTaskAnthropic(
43
+ text: string,
44
+ cfg: SummarizerConfig,
45
+ log: Logger,
46
+ ): Promise<string> {
47
+ const endpoint = cfg.endpoint ?? "https://api.anthropic.com/v1/messages";
48
+ const model = cfg.model ?? "claude-3-haiku-20240307";
49
+ const headers: Record<string, string> = {
50
+ "Content-Type": "application/json",
51
+ "x-api-key": cfg.apiKey ?? "",
52
+ "anthropic-version": "2023-06-01",
53
+ ...cfg.headers,
54
+ };
55
+
56
+ const resp = await fetch(endpoint, {
57
+ method: "POST",
58
+ headers,
59
+ body: JSON.stringify({
60
+ model,
61
+ max_tokens: 4096,
62
+ temperature: cfg.temperature ?? 0.1,
63
+ system: TASK_SUMMARY_PROMPT,
64
+ messages: [{ role: "user", content: text }],
65
+ }),
66
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 60_000),
67
+ });
68
+
69
+ if (!resp.ok) {
70
+ const body = await resp.text();
71
+ throw new Error(`Anthropic task-summarize failed (${resp.status}): ${body}`);
72
+ }
73
+
74
+ const json = (await resp.json()) as { content: Array<{ type: string; text: string }> };
75
+ return json.content.find((c) => c.type === "text")?.text?.trim() ?? "";
76
+ }
77
+
78
+ const TOPIC_JUDGE_PROMPT = `You are a conversation topic boundary detector. Given a summary of the CURRENT conversation and a NEW user message, determine if the new message starts a DIFFERENT topic/task.
79
+
80
+ Answer ONLY "NEW" or "SAME".
81
+
82
+ Rules:
83
+ - "NEW" = the new message is about a completely different subject, project, or task
84
+ - "SAME" = the new message continues, follows up on, or is closely related to the current topic
85
+ - Follow-up questions, clarifications, refinements, bug fixes, or next steps on the same task = SAME
86
+ - Greetings or meta-questions like "你好" or "谢谢" without new substance = SAME
87
+ - A clearly unrelated request (e.g., current topic is deployment, new message asks about cooking) = NEW
88
+
89
+ Output exactly one word: NEW or SAME`;
90
+
91
+ export async function judgeNewTopicAnthropic(
92
+ currentContext: string,
93
+ newMessage: string,
94
+ cfg: SummarizerConfig,
95
+ log: Logger,
96
+ ): Promise<boolean> {
97
+ const endpoint = cfg.endpoint ?? "https://api.anthropic.com/v1/messages";
98
+ const model = cfg.model ?? "claude-3-haiku-20240307";
99
+ const headers: Record<string, string> = {
100
+ "Content-Type": "application/json",
101
+ "x-api-key": cfg.apiKey ?? "",
102
+ "anthropic-version": "2023-06-01",
103
+ ...cfg.headers,
104
+ };
105
+
106
+ const userContent = `CURRENT CONVERSATION SUMMARY:\n${currentContext}\n\nNEW USER MESSAGE:\n${newMessage}`;
107
+
108
+ const resp = await fetch(endpoint, {
109
+ method: "POST",
110
+ headers,
111
+ body: JSON.stringify({
112
+ model,
113
+ max_tokens: 10,
114
+ temperature: 0,
115
+ system: TOPIC_JUDGE_PROMPT,
116
+ messages: [{ role: "user", content: userContent }],
117
+ }),
118
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000),
119
+ });
120
+
121
+ if (!resp.ok) {
122
+ const body = await resp.text();
123
+ throw new Error(`Anthropic topic-judge failed (${resp.status}): ${body}`);
124
+ }
125
+
126
+ const json = (await resp.json()) as { content: Array<{ type: string; text: string }> };
127
+ const answer = json.content.find((c) => c.type === "text")?.text?.trim().toUpperCase() ?? "";
128
+ log.debug(`Topic judge result: "${answer}"`);
129
+ return answer.startsWith("NEW");
130
+ }
131
+
132
+ const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge. Given a user's QUERY and a list of CANDIDATE memory summaries, do two things:
133
+
134
+ 1. Select ALL candidates that could be useful for answering the query. When in doubt, INCLUDE the candidate.
135
+ - For questions about lists, history, or "what/where/who" across multiple items (e.g. "which companies did I work at"), include ALL matching items — do NOT stop at the first match.
136
+ - For factual lookups (e.g. "what is the SSH port"), a single direct answer is enough.
137
+ 2. Judge whether the selected memories are SUFFICIENT to fully answer the query WITHOUT fetching additional context.
138
+
139
+ IMPORTANT for "sufficient" judgment:
140
+ - sufficient=true ONLY when the memories contain a concrete ANSWER, fact, decision, or actionable information that directly addresses the query.
141
+ - sufficient=false when:
142
+ - The memories only repeat the same question the user asked before (echo, not answer).
143
+ - The memories show related topics but lack the specific detail needed.
144
+ - The memories contain partial information that would benefit from full task context, timeline, or related skills.
145
+
146
+ Output a JSON object with exactly two fields:
147
+ {"relevant":[1,3,5],"sufficient":true}
148
+
149
+ - "relevant": array of candidate numbers that are useful. Empty array [] if none are relevant.
150
+ - "sufficient": true ONLY if the memories contain a direct answer; false otherwise.
151
+
152
+ Output ONLY the JSON object, nothing else.`;
153
+
154
+ import type { FilterResult } from "./openai";
155
+ export type { FilterResult } from "./openai";
156
+
157
+ export async function filterRelevantAnthropic(
158
+ query: string,
159
+ candidates: Array<{ index: number; summary: string; role: string }>,
160
+ cfg: SummarizerConfig,
161
+ log: Logger,
162
+ ): Promise<FilterResult> {
163
+ const endpoint = cfg.endpoint ?? "https://api.anthropic.com/v1/messages";
164
+ const model = cfg.model ?? "claude-3-haiku-20240307";
165
+ const headers: Record<string, string> = {
166
+ "Content-Type": "application/json",
167
+ "x-api-key": cfg.apiKey ?? "",
168
+ "anthropic-version": "2023-06-01",
169
+ ...cfg.headers,
170
+ };
171
+
172
+ const candidateText = candidates
173
+ .map((c) => `${c.index}. [${c.role}] ${c.summary}`)
174
+ .join("\n");
175
+
176
+ const resp = await fetch(endpoint, {
177
+ method: "POST",
178
+ headers,
179
+ body: JSON.stringify({
180
+ model,
181
+ max_tokens: 200,
182
+ temperature: 0,
183
+ system: FILTER_RELEVANT_PROMPT,
184
+ messages: [{ role: "user", content: `QUERY: ${query}\n\nCANDIDATES:\n${candidateText}` }],
185
+ }),
186
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000),
187
+ });
188
+
189
+ if (!resp.ok) {
190
+ const body = await resp.text();
191
+ throw new Error(`Anthropic filter-relevant failed (${resp.status}): ${body}`);
192
+ }
193
+
194
+ const json = (await resp.json()) as { content: Array<{ type: string; text: string }> };
195
+ const raw = json.content.find((c) => c.type === "text")?.text?.trim() ?? "{}";
196
+ return parseFilterResult(raw, log);
197
+ }
198
+
199
+ function parseFilterResult(raw: string, log: Logger): FilterResult {
200
+ try {
201
+ const match = raw.match(/\{[\s\S]*\}/);
202
+ if (match) {
203
+ const obj = JSON.parse(match[0]);
204
+ if (obj && Array.isArray(obj.relevant)) {
205
+ return {
206
+ relevant: obj.relevant.filter((n: any) => typeof n === "number"),
207
+ sufficient: obj.sufficient === true,
208
+ };
209
+ }
210
+ }
211
+ } catch {}
212
+ log.warn(`filterRelevant: failed to parse LLM output: "${raw}", fallback to all+insufficient`);
213
+ return { relevant: [], sufficient: false };
214
+ }
4
215
 
5
216
  export async function summarizeAnthropic(
6
217
  text: string,
@@ -39,3 +250,49 @@ export async function summarizeAnthropic(
39
250
  };
40
251
  return json.content.find((c) => c.type === "text")?.text?.trim() ?? "";
41
252
  }
253
+
254
+ // ─── Smart Dedup ───
255
+
256
+ import { DEDUP_JUDGE_PROMPT, parseDedupResult } from "./openai";
257
+ import type { DedupResult } from "./openai";
258
+ export type { DedupResult } from "./openai";
259
+
260
+ export async function judgeDedupAnthropic(
261
+ newSummary: string,
262
+ candidates: Array<{ index: number; summary: string; chunkId: string }>,
263
+ cfg: SummarizerConfig,
264
+ log: Logger,
265
+ ): Promise<DedupResult> {
266
+ const endpoint = cfg.endpoint ?? "https://api.anthropic.com/v1/messages";
267
+ const model = cfg.model ?? "claude-3-haiku-20240307";
268
+ const headers: Record<string, string> = {
269
+ "Content-Type": "application/json",
270
+ "x-api-key": cfg.apiKey ?? "",
271
+ "anthropic-version": "2023-06-01",
272
+ ...cfg.headers,
273
+ };
274
+
275
+ const candidateText = candidates.map((c) => `${c.index}. ${c.summary}`).join("\n");
276
+
277
+ const resp = await fetch(endpoint, {
278
+ method: "POST",
279
+ headers,
280
+ body: JSON.stringify({
281
+ model,
282
+ max_tokens: 300,
283
+ temperature: 0,
284
+ system: DEDUP_JUDGE_PROMPT,
285
+ messages: [{ role: "user", content: `NEW MEMORY:\n${newSummary}\n\nEXISTING MEMORIES:\n${candidateText}` }],
286
+ }),
287
+ signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000),
288
+ });
289
+
290
+ if (!resp.ok) {
291
+ const body = await resp.text();
292
+ throw new Error(`Anthropic dedup-judge failed (${resp.status}): ${body}`);
293
+ }
294
+
295
+ const json = (await resp.json()) as { content: Array<{ type: string; text: string }> };
296
+ const raw = json.content.find((c) => c.type === "text")?.text?.trim() ?? "{}";
297
+ return parseDedupResult(raw, log);
298
+ }