@memtensor/memos-local-openclaw-plugin 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/.env.example +13 -5
  2. package/README.md +283 -91
  3. package/dist/capture/index.d.ts +5 -7
  4. package/dist/capture/index.d.ts.map +1 -1
  5. package/dist/capture/index.js +72 -43
  6. package/dist/capture/index.js.map +1 -1
  7. package/dist/ingest/dedup.d.ts +8 -0
  8. package/dist/ingest/dedup.d.ts.map +1 -1
  9. package/dist/ingest/dedup.js +21 -0
  10. package/dist/ingest/dedup.js.map +1 -1
  11. package/dist/ingest/providers/anthropic.d.ts +16 -0
  12. package/dist/ingest/providers/anthropic.d.ts.map +1 -1
  13. package/dist/ingest/providers/anthropic.js +214 -1
  14. package/dist/ingest/providers/anthropic.js.map +1 -1
  15. package/dist/ingest/providers/bedrock.d.ts +16 -5
  16. package/dist/ingest/providers/bedrock.d.ts.map +1 -1
  17. package/dist/ingest/providers/bedrock.js +210 -6
  18. package/dist/ingest/providers/bedrock.js.map +1 -1
  19. package/dist/ingest/providers/gemini.d.ts +16 -0
  20. package/dist/ingest/providers/gemini.d.ts.map +1 -1
  21. package/dist/ingest/providers/gemini.js +202 -1
  22. package/dist/ingest/providers/gemini.js.map +1 -1
  23. package/dist/ingest/providers/index.d.ts +31 -0
  24. package/dist/ingest/providers/index.d.ts.map +1 -1
  25. package/dist/ingest/providers/index.js +134 -4
  26. package/dist/ingest/providers/index.js.map +1 -1
  27. package/dist/ingest/providers/openai.d.ts +24 -0
  28. package/dist/ingest/providers/openai.d.ts.map +1 -1
  29. package/dist/ingest/providers/openai.js +255 -1
  30. package/dist/ingest/providers/openai.js.map +1 -1
  31. package/dist/ingest/task-processor.d.ts +65 -0
  32. package/dist/ingest/task-processor.d.ts.map +1 -0
  33. package/dist/ingest/task-processor.js +354 -0
  34. package/dist/ingest/task-processor.js.map +1 -0
  35. package/dist/ingest/worker.d.ts +3 -1
  36. package/dist/ingest/worker.d.ts.map +1 -1
  37. package/dist/ingest/worker.js +131 -23
  38. package/dist/ingest/worker.js.map +1 -1
  39. package/dist/recall/engine.d.ts +1 -0
  40. package/dist/recall/engine.d.ts.map +1 -1
  41. package/dist/recall/engine.js +22 -11
  42. package/dist/recall/engine.js.map +1 -1
  43. package/dist/recall/mmr.d.ts.map +1 -1
  44. package/dist/recall/mmr.js +3 -1
  45. package/dist/recall/mmr.js.map +1 -1
  46. package/dist/skill/bundled-memory-guide.d.ts +6 -0
  47. package/dist/skill/bundled-memory-guide.d.ts.map +1 -0
  48. package/dist/skill/bundled-memory-guide.js +95 -0
  49. package/dist/skill/bundled-memory-guide.js.map +1 -0
  50. package/dist/skill/evaluator.d.ts +31 -0
  51. package/dist/skill/evaluator.d.ts.map +1 -0
  52. package/dist/skill/evaluator.js +194 -0
  53. package/dist/skill/evaluator.js.map +1 -0
  54. package/dist/skill/evolver.d.ts +22 -0
  55. package/dist/skill/evolver.d.ts.map +1 -0
  56. package/dist/skill/evolver.js +193 -0
  57. package/dist/skill/evolver.js.map +1 -0
  58. package/dist/skill/generator.d.ts +25 -0
  59. package/dist/skill/generator.d.ts.map +1 -0
  60. package/dist/skill/generator.js +477 -0
  61. package/dist/skill/generator.js.map +1 -0
  62. package/dist/skill/installer.d.ts +16 -0
  63. package/dist/skill/installer.d.ts.map +1 -0
  64. package/dist/skill/installer.js +89 -0
  65. package/dist/skill/installer.js.map +1 -0
  66. package/dist/skill/upgrader.d.ts +19 -0
  67. package/dist/skill/upgrader.d.ts.map +1 -0
  68. package/dist/skill/upgrader.js +263 -0
  69. package/dist/skill/upgrader.js.map +1 -0
  70. package/dist/skill/validator.d.ts +29 -0
  71. package/dist/skill/validator.d.ts.map +1 -0
  72. package/dist/skill/validator.js +227 -0
  73. package/dist/skill/validator.js.map +1 -0
  74. package/dist/storage/sqlite.d.ts +141 -1
  75. package/dist/storage/sqlite.d.ts.map +1 -1
  76. package/dist/storage/sqlite.js +664 -7
  77. package/dist/storage/sqlite.js.map +1 -1
  78. package/dist/types.d.ts +93 -0
  79. package/dist/types.d.ts.map +1 -1
  80. package/dist/types.js +8 -0
  81. package/dist/types.js.map +1 -1
  82. package/dist/viewer/html.d.ts +1 -1
  83. package/dist/viewer/html.d.ts.map +1 -1
  84. package/dist/viewer/html.js +2391 -159
  85. package/dist/viewer/html.js.map +1 -1
  86. package/dist/viewer/server.d.ts +16 -0
  87. package/dist/viewer/server.d.ts.map +1 -1
  88. package/dist/viewer/server.js +346 -3
  89. package/dist/viewer/server.js.map +1 -1
  90. package/index.ts +572 -89
  91. package/openclaw.plugin.json +20 -45
  92. package/package.json +3 -4
  93. package/skill/memos-memory-guide/SKILL.md +86 -0
  94. package/src/capture/index.ts +85 -45
  95. package/src/ingest/dedup.ts +29 -0
  96. package/src/ingest/providers/anthropic.ts +258 -1
  97. package/src/ingest/providers/bedrock.ts +256 -6
  98. package/src/ingest/providers/gemini.ts +252 -1
  99. package/src/ingest/providers/index.ts +156 -8
  100. package/src/ingest/providers/openai.ts +304 -1
  101. package/src/ingest/task-processor.ts +396 -0
  102. package/src/ingest/worker.ts +145 -34
  103. package/src/recall/engine.ts +23 -12
  104. package/src/recall/mmr.ts +3 -1
  105. package/src/skill/bundled-memory-guide.ts +91 -0
  106. package/src/skill/evaluator.ts +220 -0
  107. package/src/skill/evolver.ts +169 -0
  108. package/src/skill/generator.ts +506 -0
  109. package/src/skill/installer.ts +59 -0
  110. package/src/skill/upgrader.ts +257 -0
  111. package/src/skill/validator.ts +227 -0
  112. package/src/storage/sqlite.ts +802 -7
  113. package/src/types.ts +96 -0
  114. package/src/viewer/html.ts +2391 -159
  115. package/src/viewer/server.ts +346 -3
  116. package/SKILL.md +0 -43
  117. package/www/index.html +0 -632
@@ -0,0 +1,396 @@
1
+ import { v4 as uuid } from "uuid";
2
+ import type { SqliteStore } from "../storage/sqlite";
3
+ import type { PluginContext, Task, Chunk } from "../types";
4
+ import { DEFAULTS } from "../types";
5
+ import { Summarizer } from "./providers";
6
+
7
+ const TRIVIAL_PATTERNS = [
8
+ /^(test|testing|hello|hi|hey|ok|okay|yes|no|yeah|nope|sure|thanks|thank you|thx|ping|pong|哈哈|好的|嗯|是的|不是|谢谢|你好|测试)\s*[.!?。!?]*$/,
9
+ /^(aaa+|bbb+|xxx+|zzz+|123+|asdf+|qwer+|haha+|lol+|hmm+)\s*$/,
10
+ /^[\s\p{P}\p{S}]*$/u,
11
+ ];
12
+
13
+ const SKIP_REASONS = {
14
+ noChunks: "该任务没有对话内容,已自动跳过。",
15
+ } as const;
16
+
17
+ /**
18
+ * Asynchronous task-level processor.
19
+ *
20
+ * After each ingestion batch, checks whether the current conversation
21
+ * constitutes a "new task" compared to the previous one. If so:
22
+ * 1. Finalizes the previous task (generates a detailed summary).
23
+ * 2. Creates a new active task for incoming chunks.
24
+ *
25
+ * Task boundary detection:
26
+ * - Session change → always new task
27
+ * - Time gap > 2h → always new task
28
+ * - LLM judges whether new user message starts a different topic
29
+ */
30
+ export class TaskProcessor {
31
+ private summarizer: Summarizer;
32
+ private processing = false;
33
+ private onTaskCompletedCallback?: (task: Task) => void;
34
+
35
+ constructor(
36
+ private store: SqliteStore,
37
+ private ctx: PluginContext,
38
+ ) {
39
+ this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log);
40
+ }
41
+
42
+ onTaskCompleted(cb: (task: Task) => void): void {
43
+ this.onTaskCompletedCallback = cb;
44
+ }
45
+
46
+ /**
47
+ * Called after new chunks are ingested.
48
+ * Determines if a new task boundary was crossed and handles transition.
49
+ */
50
+ async onChunksIngested(sessionKey: string, latestTimestamp: number): Promise<void> {
51
+ this.ctx.log.debug(`TaskProcessor.onChunksIngested called session=${sessionKey} ts=${latestTimestamp} processing=${this.processing}`);
52
+ if (this.processing) {
53
+ this.ctx.log.debug("TaskProcessor.onChunksIngested skipped — already processing");
54
+ return;
55
+ }
56
+ this.processing = true;
57
+ try {
58
+ await this.detectAndProcess(sessionKey, latestTimestamp);
59
+ } catch (err) {
60
+ this.ctx.log.error(`TaskProcessor error: ${err}`);
61
+ } finally {
62
+ this.processing = false;
63
+ }
64
+ }
65
+
66
+ private async detectAndProcess(sessionKey: string, latestTimestamp: number): Promise<void> {
67
+ this.ctx.log.debug(`TaskProcessor.detectAndProcess session=${sessionKey}`);
68
+
69
+ // Finalize any active tasks from OTHER sessions (session change = task boundary)
70
+ const allActive = this.store.getAllActiveTasks();
71
+ for (const t of allActive) {
72
+ if (t.sessionKey !== sessionKey) {
73
+ this.ctx.log.info(`Session changed: finalizing task=${t.id} from session=${t.sessionKey}`);
74
+ await this.finalizeTask(t);
75
+ }
76
+ }
77
+
78
+ const activeTask = this.store.getActiveTask(sessionKey);
79
+ this.ctx.log.debug(`TaskProcessor.detectAndProcess activeTask=${activeTask?.id ?? "none"}`);
80
+
81
+ if (!activeTask) {
82
+ await this.createNewTask(sessionKey, latestTimestamp);
83
+ return;
84
+ }
85
+
86
+ const isNewTask = await this.isTaskBoundary(activeTask, sessionKey, latestTimestamp);
87
+
88
+ if (isNewTask) {
89
+ await this.finalizeTask(activeTask);
90
+ await this.createNewTask(sessionKey, latestTimestamp);
91
+ } else {
92
+ this.assignUnassignedChunks(sessionKey, activeTask.id);
93
+ this.store.updateTask(activeTask.id, { endedAt: undefined });
94
+ }
95
+ }
96
+
97
+ private async isTaskBoundary(activeTask: Task, sessionKey: string, latestTimestamp: number): Promise<boolean> {
98
+ if (activeTask.sessionKey !== sessionKey) return true;
99
+
100
+ const chunks = this.store.getChunksByTask(activeTask.id);
101
+ if (chunks.length === 0) return false;
102
+
103
+ const lastChunkTs = Math.max(...chunks.map((c) => c.createdAt));
104
+ const gap = latestTimestamp - lastChunkTs;
105
+
106
+ // Hard timeout: always split after 2h regardless of topic
107
+ if (gap > DEFAULTS.taskIdleTimeoutMs) {
108
+ this.ctx.log.info(
109
+ `Task boundary: time gap ${Math.round(gap / 60000)}min > ${Math.round(DEFAULTS.taskIdleTimeoutMs / 60000)}min`,
110
+ );
111
+ return true;
112
+ }
113
+
114
+ // LLM topic judgment: build context from existing task and compare with new message
115
+ const newUserChunks = this.store.getUnassignedChunks(sessionKey).filter((c) => c.role === "user");
116
+ if (newUserChunks.length === 0) return false;
117
+
118
+ const existingUserChunks = chunks.filter((c) => c.role === "user");
119
+ if (existingUserChunks.length === 0) return false;
120
+
121
+ const currentContext = this.buildContextSummary(chunks);
122
+ const newMessage = newUserChunks.map((c) => c.content).join("\n");
123
+
124
+ const isNew = await this.summarizer.judgeNewTopic(currentContext, newMessage);
125
+
126
+ if (isNew === null) {
127
+ this.ctx.log.debug("Topic judge unavailable (no LLM configured), keeping current task");
128
+ return false;
129
+ }
130
+
131
+ if (isNew) {
132
+ this.ctx.log.info(`Task boundary: LLM judged new topic. New message: "${newMessage.slice(0, 80)}..."`);
133
+ } else {
134
+ this.ctx.log.debug(`LLM judged SAME topic, continuing task=${activeTask.id}`);
135
+ }
136
+
137
+ return isNew;
138
+ }
139
+
140
+ /**
141
+ * Build a concise context string from existing task chunks for the LLM topic judge.
142
+ * Takes recent user/assistant summaries to keep token usage low.
143
+ */
144
+ private buildContextSummary(chunks: Chunk[]): string {
145
+ const relevant = chunks
146
+ .filter((c) => c.role === "user" || c.role === "assistant")
147
+ .slice(-6);
148
+
149
+ return relevant
150
+ .map((c) => `[${c.role === "user" ? "User" : "Assistant"}]: ${c.summary || c.content.slice(0, 150)}`)
151
+ .join("\n");
152
+ }
153
+
154
+ private async createNewTask(sessionKey: string, timestamp: number): Promise<void> {
155
+ const taskId = uuid();
156
+ const task: Task = {
157
+ id: taskId,
158
+ sessionKey,
159
+ title: "",
160
+ summary: "",
161
+ status: "active",
162
+ startedAt: timestamp,
163
+ endedAt: null,
164
+ updatedAt: timestamp,
165
+ };
166
+ this.store.insertTask(task);
167
+ this.assignUnassignedChunks(sessionKey, taskId);
168
+ this.ctx.log.info(`Created new task=${taskId} session=${sessionKey}`);
169
+ }
170
+
171
+ private assignUnassignedChunks(sessionKey: string, taskId: string): void {
172
+ const unassigned = this.store.getUnassignedChunks(sessionKey);
173
+ for (const chunk of unassigned) {
174
+ this.store.setChunkTaskId(chunk.id, taskId);
175
+ }
176
+ if (unassigned.length > 0) {
177
+ this.ctx.log.debug(`Assigned ${unassigned.length} chunks to task=${taskId}`);
178
+ }
179
+ }
180
+
181
+ async finalizeTask(task: Task): Promise<void> {
182
+ const chunks = this.store.getChunksByTask(task.id);
183
+ const fallbackTitle = chunks.length > 0 ? this.extractTitle(chunks) : "";
184
+
185
+ if (chunks.length === 0) {
186
+ this.ctx.log.info(`Task ${task.id} skipped: no chunks`);
187
+ this.store.updateTask(task.id, { title: fallbackTitle, summary: SKIP_REASONS.noChunks, status: "skipped", endedAt: Date.now() });
188
+ return;
189
+ }
190
+
191
+ const skipReason = this.shouldSkipSummary(chunks);
192
+
193
+ if (skipReason) {
194
+ this.ctx.log.info(`Task ${task.id} skipped: ${skipReason} (chunks=${chunks.length}, title="${fallbackTitle}")`);
195
+ const reason = this.humanReadableSkipReason(skipReason, chunks);
196
+ this.store.updateTask(task.id, { title: fallbackTitle, summary: reason, status: "skipped", endedAt: Date.now() });
197
+ return;
198
+ }
199
+
200
+ const conversationText = this.buildConversationText(chunks);
201
+ let summary: string;
202
+ try {
203
+ summary = await this.summarizer.summarizeTask(conversationText);
204
+ } catch (err) {
205
+ this.ctx.log.warn(`Task summary generation failed for task=${task.id}: ${err}`);
206
+ summary = this.fallbackSummary(chunks);
207
+ }
208
+
209
+ const { title: llmTitle, body } = this.parseTitleFromSummary(summary);
210
+ const title = llmTitle || fallbackTitle;
211
+
212
+ this.store.updateTask(task.id, {
213
+ title,
214
+ summary: body,
215
+ status: "completed",
216
+ endedAt: Date.now(),
217
+ });
218
+
219
+ this.ctx.log.info(
220
+ `Finalized task=${task.id} title="${title}" chunks=${chunks.length} summaryLen=${body.length}`,
221
+ );
222
+
223
+ if (this.onTaskCompletedCallback) {
224
+ const finalized = this.store.getTask(task.id);
225
+ if (finalized) {
226
+ try {
227
+ this.onTaskCompletedCallback(finalized);
228
+ } catch (err) {
229
+ this.ctx.log.warn(`TaskProcessor onTaskCompleted callback error: ${err}`);
230
+ }
231
+ }
232
+ }
233
+ }
234
+
235
+ /**
236
+ * Determine if a task is too trivial to warrant an LLM summary call.
237
+ * Returns a skip reason string, or null if summary should proceed.
238
+ *
239
+ * Skip conditions (any one triggers skip):
240
+ * 1. Total chunks < 4 — too few messages to form a meaningful task
241
+ * 2. Real conversation turns < 2 — no back-and-forth dialogue
242
+ * 3. No user messages — purely system/tool generated, no user intent
243
+ * 4. Total content < 200 chars — not enough substance
244
+ * 5. User content is trivial/test data — "hello", "test", "ok" etc.
245
+ * 6. All messages are tool results — automated output, no conversation
246
+ * 7. High content repetition — user repeated the same thing (debug loops)
247
+ */
248
+ private shouldSkipSummary(chunks: Chunk[]): string | null {
249
+ const userChunks = chunks.filter((c) => c.role === "user");
250
+ const assistantChunks = chunks.filter((c) => c.role === "assistant");
251
+ const toolChunks = chunks.filter((c) => c.role === "tool");
252
+
253
+ // 1. Too few chunks
254
+ if (chunks.length < 4) {
255
+ return `too few chunks (${chunks.length} < 4 minimum)`;
256
+ }
257
+
258
+ // 2. Not enough real conversation turns (need at least 2 user-assistant exchanges)
259
+ const turns = Math.min(userChunks.length, assistantChunks.length);
260
+ if (turns < 2) {
261
+ return `too few conversation turns (${turns} < 2 minimum)`;
262
+ }
263
+
264
+ // 3. No user messages at all — purely automated
265
+ if (userChunks.length === 0) {
266
+ return "no user messages — task appears to be automated/system-generated";
267
+ }
268
+
269
+ // 4. Total content too short
270
+ // CJK characters carry more info per char, so use a lower threshold
271
+ const totalContentLen = chunks.reduce((sum, c) => sum + c.content.length, 0);
272
+ const hasCJK = /[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(
273
+ userChunks[0]?.content ?? "",
274
+ );
275
+ const minContentLen = hasCJK ? 80 : 200;
276
+ if (totalContentLen < minContentLen) {
277
+ return `content too short (${totalContentLen} chars < ${minContentLen} minimum)`;
278
+ }
279
+
280
+ // 5. User content is trivial/test data
281
+ const userContent = userChunks.map((c) => c.content).join("\n");
282
+ if (this.looksLikeTrivialContent(userContent)) {
283
+ return "user content appears to be test/trivial data";
284
+ }
285
+
286
+ // 6. Assistant content is also trivial (both sides are low-value)
287
+ const assistantContent = assistantChunks.map((c) => c.content).join("\n");
288
+ if (this.looksLikeTrivialContent(userContent + "\n" + assistantContent)) {
289
+ return "conversation content (both user and assistant) appears trivial";
290
+ }
291
+
292
+ // 7. Almost all messages are tool results with minimal user interaction
293
+ if (toolChunks.length > 0 && toolChunks.length >= chunks.length * 0.7 && userChunks.length <= 1) {
294
+ return `dominated by tool results (${toolChunks.length}/${chunks.length} chunks) with minimal user input`;
295
+ }
296
+
297
+ // 8. High repetition — user keeps saying the same thing
298
+ if (userChunks.length >= 3) {
299
+ const uniqueUserMsgs = new Set(userChunks.map((c) => c.content.trim().toLowerCase()));
300
+ const uniqueRatio = uniqueUserMsgs.size / userChunks.length;
301
+ if (uniqueRatio < 0.4) {
302
+ return `high content repetition (${uniqueUserMsgs.size} unique out of ${userChunks.length} user messages)`;
303
+ }
304
+ }
305
+
306
+ return null;
307
+ }
308
+
309
+ private looksLikeTrivialContent(text: string): boolean {
310
+ const lines = text.toLowerCase().split(/\n/).map((l) => l.trim()).filter(Boolean);
311
+ if (lines.length === 0) return true;
312
+
313
+ const trivialCount = lines.filter((line) => {
314
+ if (line.length < 5) return true;
315
+ if (TRIVIAL_PATTERNS.some((p) => p.test(line))) return true;
316
+ return false;
317
+ }).length;
318
+
319
+ return trivialCount / lines.length > 0.7;
320
+ }
321
+
322
+ private buildConversationText(chunks: Chunk[]): string {
323
+ const lines: string[] = [];
324
+ for (const c of chunks) {
325
+ const roleLabel = c.role === "user" ? "User" : c.role === "assistant" ? "Assistant" : c.role;
326
+ lines.push(`[${roleLabel}]: ${c.content}`);
327
+ }
328
+ return lines.join("\n\n");
329
+ }
330
+
331
+ /**
332
+ * Extract the LLM-generated title from the summary output.
333
+ * The LLM is prompted to output "📌 Title\n<title text>" as the first section.
334
+ * Returns the title and the remaining body (with the title section stripped).
335
+ */
336
+ private parseTitleFromSummary(summary: string): { title: string; body: string } {
337
+ const titleMatch = summary.match(/📌\s*(?:Title|标题)\s*\n(.+)/);
338
+ if (titleMatch) {
339
+ const title = titleMatch[1].trim().slice(0, 80);
340
+ const body = summary.replace(/📌\s*(?:Title|标题)\s*\n.+\n?/, "").trim();
341
+ return { title, body };
342
+ }
343
+ return { title: "", body: summary };
344
+ }
345
+
346
+ private extractTitle(chunks: Chunk[]): string {
347
+ const firstUser = chunks.find((c) => c.role === "user");
348
+ if (!firstUser) return "Untitled Task";
349
+ const text = firstUser.content.trim();
350
+ if (text.length <= 60) return text;
351
+ return text.slice(0, 57) + "...";
352
+ }
353
+
354
+ private humanReadableSkipReason(reason: string, chunks: Chunk[]): string {
355
+ const userCount = chunks.filter((c) => c.role === "user").length;
356
+ const assistantCount = chunks.filter((c) => c.role === "assistant").length;
357
+
358
+ if (reason.includes("too few chunks")) {
359
+ return `对话内容过少(${chunks.length} 条消息),不足以生成有效摘要。至少需要 4 条消息。`;
360
+ }
361
+ if (reason.includes("too few conversation turns")) {
362
+ return `对话轮次不足(${Math.min(userCount, assistantCount)} 轮),需要至少 2 轮完整的问答交互才能生成摘要。`;
363
+ }
364
+ if (reason.includes("no user messages")) {
365
+ return "该任务没有用户消息,仅包含系统或工具自动生成的内容。";
366
+ }
367
+ if (reason.includes("content too short")) {
368
+ return "对话内容过短,信息量不足以生成有意义的摘要。";
369
+ }
370
+ if (reason.includes("trivial")) {
371
+ return "对话内容为简单问候或测试数据(如 hello、test、ok),无需生成摘要。";
372
+ }
373
+ if (reason.includes("tool results")) {
374
+ return "该任务主要由工具执行结果组成,缺少足够的用户交互内容。";
375
+ }
376
+ if (reason.includes("repetition")) {
377
+ return "对话中存在大量重复内容,无法提取有效信息生成摘要。";
378
+ }
379
+ return `对话未达到生成摘要的条件:${reason}`;
380
+ }
381
+
382
+ private fallbackSummary(chunks: Chunk[]): string {
383
+ const title = this.extractTitle(chunks);
384
+ const summaries = chunks
385
+ .filter((c) => c.summary)
386
+ .map((c) => `- ${c.summary}`);
387
+ const lines = [
388
+ `🎯 Goal`,
389
+ title,
390
+ ``,
391
+ `📋 Key Steps`,
392
+ ...summaries.slice(0, 20),
393
+ ];
394
+ return lines.join("\n");
395
+ }
396
+ }
@@ -1,13 +1,15 @@
1
1
  import { v4 as uuid } from "uuid";
2
+ import { createHash } from "crypto";
2
3
  import type { ConversationMessage, Chunk, PluginContext } from "../types";
3
4
  import type { SqliteStore } from "../storage/sqlite";
4
5
  import type { Embedder } from "../embedding";
5
6
  import { Summarizer } from "./providers";
6
- import { chunkText } from "./chunker";
7
- import { findDuplicate } from "./dedup";
7
+ import { findDuplicate, findTopSimilar } from "./dedup";
8
+ import { TaskProcessor } from "./task-processor";
8
9
 
9
10
  export class IngestWorker {
10
11
  private summarizer: Summarizer;
12
+ private taskProcessor: TaskProcessor;
11
13
  private queue: ConversationMessage[] = [];
12
14
  private processing = false;
13
15
  private flushResolvers: Array<() => void> = [];
@@ -18,8 +20,11 @@ export class IngestWorker {
18
20
  private ctx: PluginContext,
19
21
  ) {
20
22
  this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log);
23
+ this.taskProcessor = new TaskProcessor(store, ctx);
21
24
  }
22
25
 
26
+ getTaskProcessor(): TaskProcessor { return this.taskProcessor; }
27
+
23
28
  enqueue(messages: ConversationMessage[]): void {
24
29
  this.queue.push(...messages);
25
30
  if (!this.processing) {
@@ -40,39 +45,83 @@ export class IngestWorker {
40
45
 
41
46
  private async processQueue(): Promise<void> {
42
47
  this.processing = true;
48
+ const t0 = performance.now();
49
+
50
+ let lastSessionKey: string | undefined;
51
+ let lastTimestamp = 0;
52
+ let stored = 0;
53
+ let skipped = 0;
54
+ let merged = 0;
55
+ let duplicated = 0;
56
+ let errors = 0;
57
+ const resultLines: string[] = [];
58
+ const inputLines: string[] = [];
59
+ const totalMessages = this.queue.length;
43
60
 
44
61
  while (this.queue.length > 0) {
45
62
  const msg = this.queue.shift()!;
63
+ inputLines.push(`[${msg.role}] ${msg.content}`);
46
64
  try {
47
- await this.ingestMessage(msg);
65
+ const result = await this.ingestMessage(msg);
66
+ lastSessionKey = msg.sessionKey;
67
+ lastTimestamp = Math.max(lastTimestamp, msg.timestamp);
68
+ if (result === "skipped") {
69
+ skipped++;
70
+ resultLines.push(`[${msg.role}] ⏭ exact-dup → ${msg.content}`);
71
+ } else if (result.action === "stored") {
72
+ stored++;
73
+ resultLines.push(`[${msg.role}] ✅ stored → ${result.summary ?? msg.content}`);
74
+ } else if (result.action === "duplicate") {
75
+ duplicated++;
76
+ resultLines.push(`[${msg.role}] 🔁 dedup(${result.reason ?? "similar"}) → ${msg.content}`);
77
+ } else if (result.action === "merged") {
78
+ merged++;
79
+ resultLines.push(`[${msg.role}] 🔀 merged → ${msg.content}`);
80
+ }
48
81
  } catch (err) {
82
+ errors++;
83
+ resultLines.push(`[${msg.role}] ❌ error → ${msg.content}`);
49
84
  this.ctx.log.error(`Failed to ingest message turn=${msg.turnId}: ${err}`);
50
85
  }
51
86
  }
52
87
 
88
+ const dur = performance.now() - t0;
89
+
90
+ if (stored + merged > 0 || skipped > 0 || duplicated > 0) {
91
+ this.store.recordToolCall("memory_add", dur, errors === 0);
92
+ try {
93
+ const inputInfo = {
94
+ session: lastSessionKey,
95
+ messages: totalMessages,
96
+ details: inputLines,
97
+ };
98
+ const stats = [`stored=${stored}`, skipped > 0 ? `skipped=${skipped}` : null, duplicated > 0 ? `dedup=${duplicated}` : null, merged > 0 ? `merged=${merged}` : null, errors > 0 ? `errors=${errors}` : null].filter(Boolean).join(", ");
99
+ this.store.recordApiLog("memory_add", inputInfo, `${stats}\n${resultLines.join("\n")}`, dur, errors === 0);
100
+ } catch (_) { /* best-effort */ }
101
+ }
102
+
103
+ if (lastSessionKey) {
104
+ this.ctx.log.debug(`Calling TaskProcessor.onChunksIngested session=${lastSessionKey} ts=${lastTimestamp}`);
105
+ this.taskProcessor
106
+ .onChunksIngested(lastSessionKey, lastTimestamp)
107
+ .catch((err) => this.ctx.log.error(`TaskProcessor post-ingest error: ${err}`));
108
+ }
109
+
53
110
  this.processing = false;
54
111
  for (const resolve of this.flushResolvers) resolve();
55
112
  this.flushResolvers = [];
56
113
  }
57
114
 
58
- private async ingestMessage(msg: ConversationMessage): Promise<void> {
59
- if (msg.role === "tool") {
60
- await this.ingestToolResult(msg);
61
- return;
62
- }
63
-
64
- const rawChunks = chunkText(msg.content);
65
- this.ctx.log.debug(`Chunked turn=${msg.turnId} into ${rawChunks.length} chunks`);
66
-
67
- for (let seq = 0; seq < rawChunks.length; seq++) {
68
- const raw = rawChunks[seq];
69
- await this.storeChunk(msg, raw.content, raw.kind, seq);
115
+ private async ingestMessage(msg: ConversationMessage): Promise<
116
+ "skipped" | { action: "stored" | "duplicate" | "merged"; summary?: string; reason?: string }
117
+ > {
118
+ if (this.store.chunkExistsByContent(msg.sessionKey, msg.role, msg.content)) {
119
+ this.ctx.log.debug(`Exact-dup (same session+role+hash), skipping: session=${msg.sessionKey} role=${msg.role} len=${msg.content.length}`);
120
+ return "skipped";
70
121
  }
71
- }
72
122
 
73
- private async ingestToolResult(msg: ConversationMessage): Promise<void> {
74
- this.ctx.log.debug(`Ingesting tool result turn=${msg.turnId} tool=${msg.toolName ?? "unknown"} len=${msg.content.length}`);
75
- await this.storeChunk(msg, msg.content, "tool_result", 0);
123
+ const kind = msg.role === "tool" ? "tool_result" : "paragraph";
124
+ return await this.storeChunk(msg, msg.content, kind, 0);
76
125
  }
77
126
 
78
127
  private async storeChunk(
@@ -80,7 +129,7 @@ export class IngestWorker {
80
129
  content: string,
81
130
  kind: Chunk["kind"],
82
131
  seq: number,
83
- ): Promise<void> {
132
+ ): Promise<{ action: "stored" | "duplicate" | "merged"; chunkId?: string; summary?: string; targetChunkId?: string; reason?: string }> {
84
133
  const chunkId = uuid();
85
134
  const summary = await this.summarizer.summarize(content);
86
135
 
@@ -91,19 +140,65 @@ export class IngestWorker {
91
140
  this.ctx.log.warn(`Embedding failed for chunk=${chunkId}, storing without vector: ${err}`);
92
141
  }
93
142
 
143
+ let dedupStatus: "active" | "duplicate" | "merged" = "active";
144
+ let dedupTarget: string | null = null;
145
+ let dedupReason: string | null = null;
146
+
147
+ // Smart dedup: find Top-5 similar chunks, then ask LLM to judge
94
148
  if (embedding) {
95
- const dupId = findDuplicate(
96
- this.store,
97
- embedding,
98
- this.ctx.config.dedup?.similarityThreshold ?? 0.93,
99
- this.ctx.log,
100
- );
101
-
102
- if (dupId) {
103
- this.store.updateSummary(dupId, summary);
104
- this.store.upsertEmbedding(dupId, embedding);
105
- this.ctx.log.debug(`Dedup-merged into existing chunk=${dupId}`);
106
- return;
149
+ const similarThreshold = this.ctx.config.dedup?.similarityThreshold ?? 0.75;
150
+ const topSimilar = findTopSimilar(this.store, embedding, similarThreshold, 5, this.ctx.log);
151
+
152
+ if (topSimilar.length > 0) {
153
+ const candidates = topSimilar.map((s, i) => {
154
+ const chunk = this.store.getChunk(s.chunkId);
155
+ return {
156
+ index: i + 1,
157
+ summary: chunk?.summary ?? "",
158
+ chunkId: s.chunkId,
159
+ };
160
+ }).filter(c => c.summary);
161
+
162
+ if (candidates.length > 0) {
163
+ const dedupResult = await this.summarizer.judgeDedup(summary, candidates);
164
+
165
+ if (dedupResult && dedupResult.action === "DUPLICATE" && dedupResult.targetIndex) {
166
+ const targetChunkId = candidates[dedupResult.targetIndex - 1]?.chunkId;
167
+ if (targetChunkId) {
168
+ this.store.recordMergeHit(targetChunkId, "DUPLICATE", dedupResult.reason);
169
+ dedupStatus = "duplicate";
170
+ dedupTarget = targetChunkId;
171
+ dedupReason = dedupResult.reason;
172
+ this.ctx.log.debug(`Smart dedup: DUPLICATE → target=${targetChunkId}, storing with status=duplicate, reason: ${dedupResult.reason}`);
173
+ }
174
+ }
175
+
176
+ if (dedupStatus === "active" && dedupResult && dedupResult.action === "UPDATE" && dedupResult.targetIndex && dedupResult.mergedSummary) {
177
+ const targetChunkId = candidates[dedupResult.targetIndex - 1]?.chunkId;
178
+ if (targetChunkId) {
179
+ const oldChunk = this.store.getChunk(targetChunkId);
180
+ const oldSummary = oldChunk?.summary ?? "";
181
+ this.store.recordMergeHit(targetChunkId, "UPDATE", dedupResult.reason, oldSummary, dedupResult.mergedSummary);
182
+ this.store.updateChunkSummaryAndContent(targetChunkId, dedupResult.mergedSummary, content);
183
+
184
+ try {
185
+ const [newEmb] = await this.embedder.embed([dedupResult.mergedSummary]);
186
+ if (newEmb) this.store.upsertEmbedding(targetChunkId, newEmb);
187
+ } catch (err) {
188
+ this.ctx.log.warn(`Re-embed after UPDATE failed: ${err}`);
189
+ }
190
+
191
+ dedupStatus = "merged";
192
+ dedupTarget = targetChunkId;
193
+ dedupReason = dedupResult.reason;
194
+ this.ctx.log.debug(`Smart dedup: UPDATE → merged into chunk=${targetChunkId}, storing with status=merged, reason: ${dedupResult.reason}`);
195
+ }
196
+ }
197
+
198
+ if (dedupStatus === "active") {
199
+ this.ctx.log.debug(`Smart dedup: NEW — creating active chunk (reason: ${dedupResult?.reason ?? "no_result"})`);
200
+ }
201
+ }
107
202
  }
108
203
  }
109
204
 
@@ -117,14 +212,30 @@ export class IngestWorker {
117
212
  kind,
118
213
  summary,
119
214
  embedding: null,
215
+ taskId: null,
216
+ skillId: null,
217
+ dedupStatus,
218
+ dedupTarget,
219
+ dedupReason,
220
+ mergeCount: 0,
221
+ lastHitAt: null,
222
+ mergeHistory: "[]",
120
223
  createdAt: msg.timestamp,
121
224
  updatedAt: msg.timestamp,
122
225
  };
123
226
 
124
227
  this.store.insertChunk(chunk);
125
- if (embedding) {
228
+ if (embedding && dedupStatus === "active") {
126
229
  this.store.upsertEmbedding(chunkId, embedding);
127
230
  }
128
- this.ctx.log.debug(`Stored chunk=${chunkId} kind=${kind} role=${msg.role} len=${content.length} hasVec=${!!embedding}`);
231
+ this.ctx.log.debug(`Stored chunk=${chunkId} kind=${kind} role=${msg.role} dedup=${dedupStatus} len=${content.length} hasVec=${!!embedding && dedupStatus === "active"}`);
232
+
233
+ if (dedupStatus === "duplicate") {
234
+ return { action: "duplicate", summary, targetChunkId: dedupTarget ?? undefined, reason: dedupReason ?? undefined };
235
+ }
236
+ if (dedupStatus === "merged") {
237
+ return { action: "merged", summary, targetChunkId: dedupTarget ?? undefined, reason: dedupReason ?? undefined };
238
+ }
239
+ return { action: "stored", chunkId, summary };
129
240
  }
130
241
  }