@memtensor/memos-local-openclaw-plugin 0.3.20 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/README.md +232 -22
  2. package/dist/capture/index.d.ts +1 -1
  3. package/dist/capture/index.d.ts.map +1 -1
  4. package/dist/capture/index.js +33 -8
  5. package/dist/capture/index.js.map +1 -1
  6. package/dist/index.d.ts +1 -1
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +2 -1
  9. package/dist/index.js.map +1 -1
  10. package/dist/ingest/providers/anthropic.d.ts.map +1 -1
  11. package/dist/ingest/providers/anthropic.js +22 -8
  12. package/dist/ingest/providers/anthropic.js.map +1 -1
  13. package/dist/ingest/providers/bedrock.d.ts.map +1 -1
  14. package/dist/ingest/providers/bedrock.js +22 -8
  15. package/dist/ingest/providers/bedrock.js.map +1 -1
  16. package/dist/ingest/providers/gemini.d.ts.map +1 -1
  17. package/dist/ingest/providers/gemini.js +22 -8
  18. package/dist/ingest/providers/gemini.js.map +1 -1
  19. package/dist/ingest/providers/index.d.ts +13 -18
  20. package/dist/ingest/providers/index.d.ts.map +1 -1
  21. package/dist/ingest/providers/index.js +213 -139
  22. package/dist/ingest/providers/index.js.map +1 -1
  23. package/dist/ingest/providers/openai.d.ts +1 -1
  24. package/dist/ingest/providers/openai.d.ts.map +1 -1
  25. package/dist/ingest/providers/openai.js +37 -17
  26. package/dist/ingest/providers/openai.js.map +1 -1
  27. package/dist/ingest/task-processor.d.ts +28 -3
  28. package/dist/ingest/task-processor.d.ts.map +1 -1
  29. package/dist/ingest/task-processor.js +166 -67
  30. package/dist/ingest/task-processor.js.map +1 -1
  31. package/dist/ingest/worker.d.ts.map +1 -1
  32. package/dist/ingest/worker.js +97 -75
  33. package/dist/ingest/worker.js.map +1 -1
  34. package/dist/shared/llm-call.d.ts +26 -0
  35. package/dist/shared/llm-call.d.ts.map +1 -0
  36. package/dist/shared/llm-call.js +163 -0
  37. package/dist/shared/llm-call.js.map +1 -0
  38. package/dist/skill/evaluator.d.ts +0 -3
  39. package/dist/skill/evaluator.d.ts.map +1 -1
  40. package/dist/skill/evaluator.js +34 -59
  41. package/dist/skill/evaluator.js.map +1 -1
  42. package/dist/skill/evolver.d.ts +22 -1
  43. package/dist/skill/evolver.d.ts.map +1 -1
  44. package/dist/skill/evolver.js +191 -32
  45. package/dist/skill/evolver.js.map +1 -1
  46. package/dist/skill/generator.d.ts +0 -3
  47. package/dist/skill/generator.d.ts.map +1 -1
  48. package/dist/skill/generator.js +15 -50
  49. package/dist/skill/generator.js.map +1 -1
  50. package/dist/skill/upgrader.d.ts +0 -2
  51. package/dist/skill/upgrader.d.ts.map +1 -1
  52. package/dist/skill/upgrader.js +4 -39
  53. package/dist/skill/upgrader.js.map +1 -1
  54. package/dist/skill/validator.d.ts +0 -2
  55. package/dist/skill/validator.d.ts.map +1 -1
  56. package/dist/skill/validator.js +14 -44
  57. package/dist/skill/validator.js.map +1 -1
  58. package/dist/storage/sqlite.d.ts +13 -2
  59. package/dist/storage/sqlite.d.ts.map +1 -1
  60. package/dist/storage/sqlite.js +72 -6
  61. package/dist/storage/sqlite.js.map +1 -1
  62. package/dist/tools/memory-get.d.ts.map +1 -1
  63. package/dist/tools/memory-get.js +5 -1
  64. package/dist/tools/memory-get.js.map +1 -1
  65. package/dist/tools/memory-search.d.ts.map +1 -1
  66. package/dist/tools/memory-search.js +5 -0
  67. package/dist/tools/memory-search.js.map +1 -1
  68. package/dist/tools/memory-timeline.d.ts.map +1 -1
  69. package/dist/tools/memory-timeline.js +11 -2
  70. package/dist/tools/memory-timeline.js.map +1 -1
  71. package/dist/types.d.ts +2 -1
  72. package/dist/types.d.ts.map +1 -1
  73. package/dist/types.js +1 -1
  74. package/dist/types.js.map +1 -1
  75. package/dist/viewer/html.d.ts +1 -1
  76. package/dist/viewer/html.d.ts.map +1 -1
  77. package/dist/viewer/html.js +233 -9
  78. package/dist/viewer/html.js.map +1 -1
  79. package/dist/viewer/server.d.ts +5 -0
  80. package/dist/viewer/server.d.ts.map +1 -1
  81. package/dist/viewer/server.js +383 -177
  82. package/dist/viewer/server.js.map +1 -1
  83. package/index.ts +9 -3
  84. package/package.json +2 -1
  85. package/src/capture/index.ts +39 -10
  86. package/src/index.ts +3 -2
  87. package/src/ingest/providers/anthropic.ts +22 -8
  88. package/src/ingest/providers/bedrock.ts +22 -8
  89. package/src/ingest/providers/gemini.ts +22 -8
  90. package/src/ingest/providers/index.ts +192 -142
  91. package/src/ingest/providers/openai.ts +37 -17
  92. package/src/ingest/task-processor.ts +183 -65
  93. package/src/ingest/worker.ts +98 -77
  94. package/src/shared/llm-call.ts +144 -0
  95. package/src/skill/evaluator.ts +35 -64
  96. package/src/skill/evolver.ts +201 -33
  97. package/src/skill/generator.ts +16 -59
  98. package/src/skill/upgrader.ts +5 -43
  99. package/src/skill/validator.ts +15 -47
  100. package/src/storage/sqlite.ts +88 -6
  101. package/src/tools/memory-get.ts +6 -1
  102. package/src/tools/memory-search.ts +6 -0
  103. package/src/tools/memory-timeline.ts +13 -1
  104. package/src/types.ts +2 -1
  105. package/src/viewer/html.ts +233 -9
  106. package/src/viewer/server.ts +368 -187
@@ -30,13 +30,16 @@ const SKIP_REASONS = {
30
30
  export class TaskProcessor {
31
31
  private summarizer: Summarizer;
32
32
  private processing = false;
33
+ private pendingEvents: Array<{ sessionKey: string; latestTimestamp: number; owner: string }> = [];
34
+ private drainPromise: Promise<void> | null = null;
33
35
  private onTaskCompletedCallback?: (task: Task) => void;
34
36
 
35
37
  constructor(
36
38
  private store: SqliteStore,
37
39
  private ctx: PluginContext,
38
40
  ) {
39
- this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log);
41
+ const strongCfg = ctx.config.skillEvolution?.summarizer;
42
+ this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log, strongCfg);
40
43
  }
41
44
 
42
45
  onTaskCompleted(cb: (task: Task) => void): void {
@@ -48,25 +51,37 @@ export class TaskProcessor {
48
51
  * Determines if a new task boundary was crossed and handles transition.
49
52
  */
50
53
  async onChunksIngested(sessionKey: string, latestTimestamp: number, owner?: string): Promise<void> {
51
- this.ctx.log.debug(`TaskProcessor.onChunksIngested called session=${sessionKey} ts=${latestTimestamp} owner=${owner ?? "agent:main"} processing=${this.processing}`);
52
- if (this.processing) {
53
- this.ctx.log.debug("TaskProcessor.onChunksIngested skipped already processing");
54
- return;
54
+ const resolvedOwner = owner ?? "agent:main";
55
+ this.ctx.log.debug(`TaskProcessor.onChunksIngested called session=${sessionKey} ts=${latestTimestamp} owner=${resolvedOwner} processing=${this.processing}`);
56
+ this.pendingEvents.push({ sessionKey, latestTimestamp, owner: resolvedOwner });
57
+
58
+ if (!this.drainPromise) {
59
+ this.drainPromise = this.drainPending();
55
60
  }
61
+
62
+ await this.drainPromise;
63
+ }
64
+
65
+ private async drainPending(): Promise<void> {
56
66
  this.processing = true;
57
67
  try {
58
- await this.detectAndProcess(sessionKey, latestTimestamp, owner ?? "agent:main");
59
- } catch (err) {
60
- this.ctx.log.error(`TaskProcessor error: ${err}`);
68
+ while (this.pendingEvents.length > 0) {
69
+ const next = this.pendingEvents.shift()!;
70
+ try {
71
+ await this.detectAndProcess(next.sessionKey, next.latestTimestamp, next.owner);
72
+ } catch (err) {
73
+ this.ctx.log.error(`TaskProcessor error: ${err}`);
74
+ }
75
+ }
61
76
  } finally {
62
77
  this.processing = false;
78
+ this.drainPromise = null;
63
79
  }
64
80
  }
65
81
 
66
82
  private async detectAndProcess(sessionKey: string, latestTimestamp: number, owner: string): Promise<void> {
67
83
  this.ctx.log.debug(`TaskProcessor.detectAndProcess session=${sessionKey} owner=${owner}`);
68
84
 
69
- // Finalize any active tasks from OTHER sessions for the SAME owner (session change = task boundary)
70
85
  const allActive = this.store.getAllActiveTasks(owner);
71
86
  for (const t of allActive) {
72
87
  if (t.sessionKey !== sessionKey) {
@@ -75,83 +90,177 @@ export class TaskProcessor {
75
90
  }
76
91
  }
77
92
 
78
- const activeTask = this.store.getActiveTask(sessionKey, owner);
93
+ let activeTask = this.store.getActiveTask(sessionKey, owner);
79
94
  this.ctx.log.debug(`TaskProcessor.detectAndProcess activeTask=${activeTask?.id ?? "none"} owner=${owner}`);
80
95
 
81
96
  if (!activeTask) {
82
- await this.createNewTask(sessionKey, latestTimestamp, owner);
83
- return;
97
+ // Create a new empty task — do NOT assign all chunks yet.
98
+ // processChunksIncrementally will assign them one turn at a time with boundary checks.
99
+ activeTask = await this.createNewTaskReturn(sessionKey, latestTimestamp, owner);
84
100
  }
85
101
 
86
- const isNewTask = await this.isTaskBoundary(activeTask, sessionKey, latestTimestamp);
102
+ await this.processChunksIncrementally(activeTask, sessionKey, latestTimestamp, owner);
103
+ }
87
104
 
88
- if (isNewTask) {
89
- await this.finalizeTask(activeTask);
90
- await this.createNewTask(sessionKey, latestTimestamp, owner);
91
- } else {
92
- this.assignUnassignedChunks(sessionKey, activeTask.id);
93
- this.store.updateTask(activeTask.id, { endedAt: undefined });
105
+ /**
106
+ * Process unassigned chunks one user-turn at a time.
107
+ *
108
+ * Strategy:
109
+ * - Need at least 1 user turn in the current task before starting LLM judgment
110
+ * (0 turns = no reference point for comparison).
111
+ * - Each subsequent user turn is individually checked against the full task context.
112
+ * - Time gap > 2h always triggers a split regardless of topic.
113
+ */
114
+ private async processChunksIncrementally(
115
+ activeTask: Task,
116
+ sessionKey: string,
117
+ latestTimestamp: number,
118
+ owner: string,
119
+ ): Promise<void> {
120
+ const unassigned = this.store.getUnassignedChunks(sessionKey);
121
+ if (unassigned.length === 0) return;
122
+
123
+ const taskChunks = this.store.getChunksByTask(activeTask.id);
124
+
125
+ // Time gap check against the earliest unassigned chunk
126
+ if (taskChunks.length > 0) {
127
+ const lastTaskTs = Math.max(...taskChunks.map((c) => c.createdAt));
128
+ const firstUnassignedTs = Math.min(...unassigned.map((c) => c.createdAt));
129
+ const gap = firstUnassignedTs - lastTaskTs;
130
+ if (gap > DEFAULTS.taskIdleTimeoutMs) {
131
+ this.ctx.log.info(
132
+ `Task boundary: time gap ${Math.round(gap / 60000)}min > ${Math.round(DEFAULTS.taskIdleTimeoutMs / 60000)}min`,
133
+ );
134
+ await this.finalizeTask(activeTask);
135
+ const newTask = await this.createNewTaskReturn(sessionKey, latestTimestamp, owner);
136
+ // Recurse with the new empty task so remaining unassigned chunks get boundary-checked too
137
+ return this.processChunksIncrementally(newTask, sessionKey, latestTimestamp, owner);
138
+ }
94
139
  }
95
- }
96
140
 
97
- private async isTaskBoundary(activeTask: Task, sessionKey: string, latestTimestamp: number): Promise<boolean> {
98
- if (activeTask.sessionKey !== sessionKey) return true;
141
+ const turns = this.groupIntoTurns(unassigned);
142
+ if (turns.length === 0) {
143
+ this.assignChunksToTask(unassigned, activeTask.id);
144
+ return;
145
+ }
99
146
 
100
- const chunks = this.store.getChunksByTask(activeTask.id);
101
- if (chunks.length === 0) return false;
147
+ let currentTask = activeTask;
148
+ let currentTaskChunks = [...taskChunks];
102
149
 
103
- const lastChunkTs = Math.max(...chunks.map((c) => c.createdAt));
104
- const gap = latestTimestamp - lastChunkTs;
150
+ for (let i = 0; i < turns.length; i++) {
151
+ const turn = turns[i];
152
+ const userChunk = turn.find((c) => c.role === "user");
105
153
 
106
- // Hard timeout: always split after 2h regardless of topic
107
- if (gap > DEFAULTS.taskIdleTimeoutMs) {
108
- this.ctx.log.info(
109
- `Task boundary: time gap ${Math.round(gap / 60000)}min > ${Math.round(DEFAULTS.taskIdleTimeoutMs / 60000)}min`,
110
- );
111
- return true;
112
- }
154
+ if (!userChunk) {
155
+ this.assignChunksToTask(turn, currentTask.id);
156
+ currentTaskChunks = currentTaskChunks.concat(turn);
157
+ continue;
158
+ }
113
159
 
114
- // LLM topic judgment: build context from existing task and compare with new message
115
- const newUserChunks = this.store.getUnassignedChunks(sessionKey).filter((c) => c.role === "user");
116
- if (newUserChunks.length === 0) return false;
160
+ // Time gap check per turn
161
+ if (currentTaskChunks.length > 0) {
162
+ const lastTs = Math.max(...currentTaskChunks.map((c) => c.createdAt));
163
+ if (userChunk.createdAt - lastTs > DEFAULTS.taskIdleTimeoutMs) {
164
+ this.ctx.log.info(`Task boundary at turn ${i}: time gap ${Math.round((userChunk.createdAt - lastTs) / 60000)}min`);
165
+ await this.finalizeTask(currentTask);
166
+ currentTask = await this.createNewTaskReturn(sessionKey, userChunk.createdAt, owner);
167
+ currentTaskChunks = [];
168
+ this.assignChunksToTask(turn, currentTask.id);
169
+ currentTaskChunks = currentTaskChunks.concat(turn);
170
+ continue;
171
+ }
172
+ }
117
173
 
118
- const existingUserChunks = chunks.filter((c) => c.role === "user");
119
- if (existingUserChunks.length === 0) return false;
174
+ // Need at least 1 user turn before we can meaningfully judge topic shifts
175
+ const existingUserCount = currentTaskChunks.filter((c) => c.role === "user").length;
176
+ if (existingUserCount < 1) {
177
+ this.assignChunksToTask(turn, currentTask.id);
178
+ currentTaskChunks = currentTaskChunks.concat(turn);
179
+ continue;
180
+ }
120
181
 
121
- const currentContext = this.buildContextSummary(chunks);
122
- const newMessage = newUserChunks.map((c) => c.content).join("\n");
182
+ // LLM topic judgment — check this single user message against full task context
183
+ const context = this.buildContextSummary(currentTaskChunks);
184
+ const newMsg = userChunk.content.slice(0, 500);
185
+ this.ctx.log.info(`Topic judge: "${newMsg.slice(0, 60)}" vs ${existingUserCount} user turns`);
186
+ const isNew = await this.summarizer.judgeNewTopic(context, newMsg);
187
+ this.ctx.log.info(`Topic judge result: ${isNew === null ? "null(fallback)" : isNew ? "NEW" : "SAME"}`);
188
+
189
+ if (isNew === null) {
190
+ this.assignChunksToTask(turn, currentTask.id);
191
+ currentTaskChunks = currentTaskChunks.concat(turn);
192
+ continue;
193
+ }
123
194
 
124
- const isNew = await this.summarizer.judgeNewTopic(currentContext, newMessage);
195
+ if (isNew) {
196
+ this.ctx.log.info(`Task boundary at turn ${i}: LLM judged new topic. Msg: "${newMsg.slice(0, 80)}..."`);
197
+ await this.finalizeTask(currentTask);
198
+ currentTask = await this.createNewTaskReturn(sessionKey, userChunk.createdAt, owner);
199
+ currentTaskChunks = [];
200
+ }
125
201
 
126
- if (isNew === null) {
127
- this.ctx.log.debug("Topic judge unavailable (no LLM configured), keeping current task");
128
- return false;
202
+ this.assignChunksToTask(turn, currentTask.id);
203
+ currentTaskChunks = currentTaskChunks.concat(turn);
129
204
  }
130
205
 
131
- if (isNew) {
132
- this.ctx.log.info(`Task boundary: LLM judged new topic. New message: "${newMessage.slice(0, 80)}..."`);
133
- } else {
134
- this.ctx.log.debug(`LLM judged SAME topic, continuing task=${activeTask.id}`);
135
- }
206
+ this.store.updateTask(currentTask.id, { endedAt: undefined });
207
+ }
136
208
 
137
- return isNew;
209
+ /**
210
+ * Group chunks into user-turns: each turn starts with a user message
211
+ * and includes all subsequent non-user messages until the next user message.
212
+ */
213
+ private groupIntoTurns(chunks: Chunk[]): Chunk[][] {
214
+ const turns: Chunk[][] = [];
215
+ let current: Chunk[] = [];
216
+
217
+ for (const c of chunks) {
218
+ if (c.role === "user" && current.length > 0) {
219
+ turns.push(current);
220
+ current = [];
221
+ }
222
+ current.push(c);
223
+ }
224
+ if (current.length > 0) turns.push(current);
225
+ return turns;
138
226
  }
139
227
 
140
228
  /**
141
- * Build a concise context string from existing task chunks for the LLM topic judge.
142
- * Takes recent user/assistant summaries to keep token usage low.
229
+ * Build context from existing task chunks for the LLM topic judge.
230
+ * Includes both the task's opening topic and recent exchanges,
231
+ * so the LLM understands both what the task was originally about
232
+ * and where the conversation currently is.
233
+ *
234
+ * For user messages, include full content (up to 500 chars) since
235
+ * they carry the topic signal. For assistant messages, use summary
236
+ * or truncated content since they mostly elaborate.
143
237
  */
144
238
  private buildContextSummary(chunks: Chunk[]): string {
145
- const relevant = chunks
146
- .filter((c) => c.role === "user" || c.role === "assistant")
147
- .slice(-6);
239
+ const conversational = chunks.filter((c) => c.role === "user" || c.role === "assistant");
240
+ if (conversational.length === 0) return "";
241
+
242
+ const formatChunk = (c: Chunk) => {
243
+ const label = c.role === "user" ? "User" : "Assistant";
244
+ const maxLen = c.role === "user" ? 500 : 200;
245
+ const text = c.summary || c.content.slice(0, maxLen);
246
+ return `[${label}]: ${text}`;
247
+ };
148
248
 
149
- return relevant
150
- .map((c) => `[${c.role === "user" ? "User" : "Assistant"}]: ${c.summary || c.content.slice(0, 150)}`)
151
- .join("\n");
249
+ if (conversational.length <= 10) {
250
+ return conversational.map(formatChunk).join("\n");
251
+ }
252
+
253
+ const opening = conversational.slice(0, 6).map(formatChunk);
254
+ const recent = conversational.slice(-4).map(formatChunk);
255
+ return [
256
+ "--- Task opening ---",
257
+ ...opening,
258
+ "--- Recent exchanges ---",
259
+ ...recent,
260
+ ].join("\n");
152
261
  }
153
262
 
154
- private async createNewTask(sessionKey: string, timestamp: number, owner: string = "agent:main"): Promise<void> {
263
+ private async createNewTaskReturn(sessionKey: string, timestamp: number, owner: string = "agent:main"): Promise<Task> {
155
264
  const taskId = uuid();
156
265
  const task: Task = {
157
266
  id: taskId,
@@ -165,20 +274,29 @@ export class TaskProcessor {
165
274
  updatedAt: timestamp,
166
275
  };
167
276
  this.store.insertTask(task);
168
- this.assignUnassignedChunks(sessionKey, taskId);
169
277
  this.ctx.log.info(`Created new task=${taskId} session=${sessionKey}`);
278
+ return task;
170
279
  }
171
280
 
172
- private assignUnassignedChunks(sessionKey: string, taskId: string): void {
173
- const unassigned = this.store.getUnassignedChunks(sessionKey);
174
- for (const chunk of unassigned) {
281
+ private async createNewTask(sessionKey: string, timestamp: number, owner: string = "agent:main"): Promise<void> {
282
+ const task = await this.createNewTaskReturn(sessionKey, timestamp, owner);
283
+ this.assignUnassignedChunks(sessionKey, task.id);
284
+ }
285
+
286
+ private assignChunksToTask(chunks: Chunk[], taskId: string): void {
287
+ for (const chunk of chunks) {
175
288
  this.store.setChunkTaskId(chunk.id, taskId);
176
289
  }
177
- if (unassigned.length > 0) {
178
- this.ctx.log.debug(`Assigned ${unassigned.length} chunks to task=${taskId}`);
290
+ if (chunks.length > 0) {
291
+ this.ctx.log.debug(`Assigned ${chunks.length} chunks to task=${taskId}`);
179
292
  }
180
293
  }
181
294
 
295
+ private assignUnassignedChunks(sessionKey: string, taskId: string): void {
296
+ const unassigned = this.store.getUnassignedChunks(sessionKey);
297
+ this.assignChunksToTask(unassigned, taskId);
298
+ }
299
+
182
300
  async finalizeTask(task: Task): Promise<void> {
183
301
  const chunks = this.store.getChunksByTask(task.id);
184
302
  const fallbackTitle = chunks.length > 0 ? this.extractTitle(chunks) : "";
@@ -19,7 +19,8 @@ export class IngestWorker {
19
19
  private embedder: Embedder,
20
20
  private ctx: PluginContext,
21
21
  ) {
22
- this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log);
22
+ const strongCfg = ctx.config.skillEvolution?.summarizer;
23
+ this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log, strongCfg);
23
24
  this.taskProcessor = new TaskProcessor(store, ctx);
24
25
  }
25
26
 
@@ -45,73 +46,79 @@ export class IngestWorker {
45
46
 
46
47
  private async processQueue(): Promise<void> {
47
48
  this.processing = true;
48
- const t0 = performance.now();
49
-
50
- let lastSessionKey: string | undefined;
51
- let lastOwner: string | undefined;
52
- let lastTimestamp = 0;
53
- let stored = 0;
54
- let skipped = 0;
55
- let merged = 0;
56
- let duplicated = 0;
57
- let errors = 0;
58
- const resultLines: string[] = [];
59
- const inputLines: string[] = [];
60
- const totalMessages = this.queue.length;
61
-
62
- while (this.queue.length > 0) {
63
- const msg = this.queue.shift()!;
64
- inputLines.push(`[${msg.role}] ${msg.content}`);
65
- try {
66
- const result = await this.ingestMessage(msg);
67
- lastSessionKey = msg.sessionKey;
68
- lastOwner = msg.owner ?? "agent:main";
69
- lastTimestamp = Math.max(lastTimestamp, msg.timestamp);
70
- if (result === "skipped") {
71
- skipped++;
72
- resultLines.push(`[${msg.role}] exact-dup → ${msg.content}`);
73
- } else if (result.action === "stored") {
74
- stored++;
75
- resultLines.push(`[${msg.role}] stored → ${result.summary ?? msg.content}`);
76
- } else if (result.action === "duplicate") {
77
- duplicated++;
78
- resultLines.push(`[${msg.role}] 🔁 dedup(${result.reason ?? "similar"}) → ${msg.content}`);
79
- } else if (result.action === "merged") {
80
- merged++;
81
- resultLines.push(`[${msg.role}] 🔀 merged → ${msg.content}`);
49
+
50
+ try {
51
+ while (this.queue.length > 0) {
52
+ const t0 = performance.now();
53
+ const batchSize = this.queue.length;
54
+ let lastSessionKey: string | undefined;
55
+ let lastOwner: string | undefined;
56
+ let lastTimestamp = 0;
57
+ let stored = 0;
58
+ let skipped = 0;
59
+ let merged = 0;
60
+ let duplicated = 0;
61
+ let errors = 0;
62
+ const resultLines: string[] = [];
63
+ const inputLines: string[] = [];
64
+
65
+ while (this.queue.length > 0) {
66
+ const msg = this.queue.shift()!;
67
+ inputLines.push(`[${msg.role}] ${msg.content}`);
68
+ try {
69
+ const result = await this.ingestMessage(msg);
70
+ lastSessionKey = msg.sessionKey;
71
+ lastOwner = msg.owner ?? "agent:main";
72
+ lastTimestamp = Math.max(lastTimestamp, msg.timestamp);
73
+ if (result === "skipped") {
74
+ skipped++;
75
+ resultLines.push(`[${msg.role}] ⏭ exact-dup → ${msg.content}`);
76
+ } else if (result.action === "stored") {
77
+ stored++;
78
+ resultLines.push(`[${msg.role}] ✅ stored → ${result.summary ?? msg.content}`);
79
+ } else if (result.action === "duplicate") {
80
+ duplicated++;
81
+ resultLines.push(`[${msg.role}] 🔁 dedup(${result.reason ?? "similar"}) → ${msg.content}`);
82
+ } else if (result.action === "merged") {
83
+ merged++;
84
+ resultLines.push(`[${msg.role}] 🔀 merged → ${msg.content}`);
85
+ }
86
+ } catch (err) {
87
+ errors++;
88
+ resultLines.push(`[${msg.role}] ❌ error → ${msg.content}`);
89
+ this.ctx.log.error(`Failed to ingest message turn=${msg.turnId}: ${err}`);
90
+ }
82
91
  }
83
- } catch (err) {
84
- errors++;
85
- resultLines.push(`[${msg.role}] ❌ error → ${msg.content}`);
86
- this.ctx.log.error(`Failed to ingest message turn=${msg.turnId}: ${err}`);
87
- }
88
- }
89
92
 
90
- const dur = performance.now() - t0;
91
-
92
- if (stored + merged > 0 || skipped > 0 || duplicated > 0) {
93
- this.store.recordToolCall("memory_add", dur, errors === 0);
94
- try {
95
- const inputInfo = {
96
- session: lastSessionKey,
97
- messages: totalMessages,
98
- details: inputLines,
99
- };
100
- const stats = [`stored=${stored}`, skipped > 0 ? `skipped=${skipped}` : null, duplicated > 0 ? `dedup=${duplicated}` : null, merged > 0 ? `merged=${merged}` : null, errors > 0 ? `errors=${errors}` : null].filter(Boolean).join(", ");
101
- this.store.recordApiLog("memory_add", inputInfo, `${stats}\n${resultLines.join("\n")}`, dur, errors === 0);
102
- } catch (_) { /* best-effort */ }
103
- }
93
+ const dur = performance.now() - t0;
104
94
 
105
- if (lastSessionKey) {
106
- this.ctx.log.debug(`Calling TaskProcessor.onChunksIngested session=${lastSessionKey} ts=${lastTimestamp} owner=${lastOwner}`);
107
- this.taskProcessor
108
- .onChunksIngested(lastSessionKey, lastTimestamp, lastOwner)
109
- .catch((err) => this.ctx.log.error(`TaskProcessor post-ingest error: ${err}`));
110
- }
95
+ if (stored + merged > 0 || skipped > 0 || duplicated > 0) {
96
+ this.store.recordToolCall("memory_add", dur, errors === 0);
97
+ try {
98
+ const inputInfo = {
99
+ session: lastSessionKey,
100
+ messages: batchSize,
101
+ details: inputLines,
102
+ };
103
+ const stats = [`stored=${stored}`, skipped > 0 ? `skipped=${skipped}` : null, duplicated > 0 ? `dedup=${duplicated}` : null, merged > 0 ? `merged=${merged}` : null, errors > 0 ? `errors=${errors}` : null].filter(Boolean).join(", ");
104
+ this.store.recordApiLog("memory_add", inputInfo, `${stats}\n${resultLines.join("\n")}`, dur, errors === 0);
105
+ } catch (_) { /* best-effort */ }
106
+ }
111
107
 
112
- this.processing = false;
113
- for (const resolve of this.flushResolvers) resolve();
114
- this.flushResolvers = [];
108
+ if (lastSessionKey) {
109
+ this.ctx.log.debug(`Calling TaskProcessor.onChunksIngested session=${lastSessionKey} ts=${lastTimestamp} owner=${lastOwner}`);
110
+ try {
111
+ await this.taskProcessor.onChunksIngested(lastSessionKey, lastTimestamp, lastOwner);
112
+ } catch (err) {
113
+ this.ctx.log.error(`TaskProcessor post-ingest error: ${err}`);
114
+ }
115
+ }
116
+ }
117
+ } finally {
118
+ this.processing = false;
119
+ for (const resolve of this.flushResolvers) resolve();
120
+ this.flushResolvers = [];
121
+ }
115
122
  }
116
123
 
117
124
  private async ingestMessage(msg: ConversationMessage): Promise<
@@ -133,7 +140,7 @@ export class IngestWorker {
133
140
  seq: number,
134
141
  ): Promise<{ action: "stored" | "duplicate" | "merged"; chunkId?: string; summary?: string; targetChunkId?: string; reason?: string }> {
135
142
  const chunkId = uuid();
136
- const summary = await this.summarizer.summarize(content);
143
+ let summary = await this.summarizer.summarize(content);
137
144
 
138
145
  let embedding: number[] | null = null;
139
146
  try {
@@ -145,11 +152,23 @@ export class IngestWorker {
145
152
  let dedupStatus: "active" | "duplicate" | "merged" = "active";
146
153
  let dedupTarget: string | null = null;
147
154
  let dedupReason: string | null = null;
155
+ let mergedFromOld: string | null = null;
156
+
157
+ // Fast path: exact content_hash match within same owner (agent dimension)
158
+ const chunkOwner = msg.owner ?? "agent:main";
159
+ const existingByHash = this.store.findActiveChunkByHash(content, chunkOwner);
160
+ if (existingByHash) {
161
+ this.ctx.log.debug(`Exact-dup (owner=${chunkOwner}): hash match → existing=${existingByHash}`);
162
+ this.store.recordMergeHit(existingByHash, "DUPLICATE", "exact content hash match");
163
+ dedupStatus = "duplicate";
164
+ dedupTarget = existingByHash;
165
+ dedupReason = "exact content hash match";
166
+ }
148
167
 
149
168
  // Smart dedup: find Top-5 similar chunks, then ask LLM to judge
150
- if (embedding) {
151
- const similarThreshold = this.ctx.config.dedup?.similarityThreshold ?? 0.75;
152
- const dedupOwnerFilter = msg.owner ? [msg.owner, "public"] : undefined;
169
+ if (dedupStatus === "active" && embedding) {
170
+ const similarThreshold = this.ctx.config.dedup?.similarityThreshold ?? 0.60;
171
+ const dedupOwnerFilter = msg.owner ? [msg.owner] : undefined;
153
172
  const topSimilar = findTopSimilar(this.store, embedding, similarThreshold, 5, this.ctx.log, dedupOwnerFilter);
154
173
 
155
174
  if (topSimilar.length > 0) {
@@ -182,19 +201,21 @@ export class IngestWorker {
182
201
  const oldChunk = this.store.getChunk(targetChunkId);
183
202
  const oldSummary = oldChunk?.summary ?? "";
184
203
  this.store.recordMergeHit(targetChunkId, "UPDATE", dedupResult.reason, oldSummary, dedupResult.mergedSummary);
185
- this.store.updateChunkSummaryAndContent(targetChunkId, dedupResult.mergedSummary, content);
186
204
 
205
+ summary = dedupResult.mergedSummary;
187
206
  try {
188
- const [newEmb] = await this.embedder.embed([dedupResult.mergedSummary]);
189
- if (newEmb) this.store.upsertEmbedding(targetChunkId, newEmb);
207
+ const [newEmb] = await this.embedder.embed([summary]);
208
+ if (newEmb) embedding = newEmb;
190
209
  } catch (err) {
191
- this.ctx.log.warn(`Re-embed after UPDATE failed: ${err}`);
210
+ this.ctx.log.warn(`Re-embed after merge failed: ${err}`);
192
211
  }
193
212
 
194
- dedupStatus = "merged";
195
- dedupTarget = targetChunkId;
213
+ this.store.markDedupStatus(targetChunkId, "merged", chunkId, dedupResult.reason);
214
+ this.store.deleteEmbedding(targetChunkId);
215
+
216
+ mergedFromOld = targetChunkId;
196
217
  dedupReason = dedupResult.reason;
197
- this.ctx.log.debug(`Smart dedup: UPDATE → merged into chunk=${targetChunkId}, storing with status=merged, reason: ${dedupResult.reason}`);
218
+ this.ctx.log.debug(`Smart dedup: UPDATE → old chunk=${targetChunkId} retired, new chunk=${chunkId} gets merged summary, reason: ${dedupResult.reason}`);
198
219
  }
199
220
  }
200
221
 
@@ -237,8 +258,8 @@ export class IngestWorker {
237
258
  if (dedupStatus === "duplicate") {
238
259
  return { action: "duplicate", summary, targetChunkId: dedupTarget ?? undefined, reason: dedupReason ?? undefined };
239
260
  }
240
- if (dedupStatus === "merged") {
241
- return { action: "merged", summary, targetChunkId: dedupTarget ?? undefined, reason: dedupReason ?? undefined };
261
+ if (mergedFromOld) {
262
+ return { action: "merged", chunkId, summary, targetChunkId: mergedFromOld, reason: dedupReason ?? undefined };
242
263
  }
243
264
  return { action: "stored", chunkId, summary };
244
265
  }