@memtensor/memos-local-openclaw-plugin 0.3.19 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +232 -22
- package/dist/capture/index.d.ts +1 -1
- package/dist/capture/index.d.ts.map +1 -1
- package/dist/capture/index.js +33 -8
- package/dist/capture/index.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/ingest/providers/anthropic.d.ts.map +1 -1
- package/dist/ingest/providers/anthropic.js +22 -8
- package/dist/ingest/providers/anthropic.js.map +1 -1
- package/dist/ingest/providers/bedrock.d.ts.map +1 -1
- package/dist/ingest/providers/bedrock.js +22 -8
- package/dist/ingest/providers/bedrock.js.map +1 -1
- package/dist/ingest/providers/gemini.d.ts.map +1 -1
- package/dist/ingest/providers/gemini.js +22 -8
- package/dist/ingest/providers/gemini.js.map +1 -1
- package/dist/ingest/providers/index.d.ts +13 -18
- package/dist/ingest/providers/index.d.ts.map +1 -1
- package/dist/ingest/providers/index.js +213 -139
- package/dist/ingest/providers/index.js.map +1 -1
- package/dist/ingest/providers/openai.d.ts +1 -1
- package/dist/ingest/providers/openai.d.ts.map +1 -1
- package/dist/ingest/providers/openai.js +37 -17
- package/dist/ingest/providers/openai.js.map +1 -1
- package/dist/ingest/task-processor.d.ts +28 -3
- package/dist/ingest/task-processor.d.ts.map +1 -1
- package/dist/ingest/task-processor.js +166 -67
- package/dist/ingest/task-processor.js.map +1 -1
- package/dist/ingest/worker.d.ts.map +1 -1
- package/dist/ingest/worker.js +97 -75
- package/dist/ingest/worker.js.map +1 -1
- package/dist/shared/llm-call.d.ts +26 -0
- package/dist/shared/llm-call.d.ts.map +1 -0
- package/dist/shared/llm-call.js +163 -0
- package/dist/shared/llm-call.js.map +1 -0
- package/dist/skill/evaluator.d.ts +0 -3
- package/dist/skill/evaluator.d.ts.map +1 -1
- package/dist/skill/evaluator.js +34 -59
- package/dist/skill/evaluator.js.map +1 -1
- package/dist/skill/evolver.d.ts +22 -1
- package/dist/skill/evolver.d.ts.map +1 -1
- package/dist/skill/evolver.js +191 -32
- package/dist/skill/evolver.js.map +1 -1
- package/dist/skill/generator.d.ts +0 -3
- package/dist/skill/generator.d.ts.map +1 -1
- package/dist/skill/generator.js +15 -50
- package/dist/skill/generator.js.map +1 -1
- package/dist/skill/upgrader.d.ts +0 -2
- package/dist/skill/upgrader.d.ts.map +1 -1
- package/dist/skill/upgrader.js +4 -39
- package/dist/skill/upgrader.js.map +1 -1
- package/dist/skill/validator.d.ts +0 -2
- package/dist/skill/validator.d.ts.map +1 -1
- package/dist/skill/validator.js +14 -44
- package/dist/skill/validator.js.map +1 -1
- package/dist/storage/sqlite.d.ts +13 -2
- package/dist/storage/sqlite.d.ts.map +1 -1
- package/dist/storage/sqlite.js +72 -6
- package/dist/storage/sqlite.js.map +1 -1
- package/dist/tools/memory-get.d.ts.map +1 -1
- package/dist/tools/memory-get.js +5 -1
- package/dist/tools/memory-get.js.map +1 -1
- package/dist/tools/memory-search.d.ts.map +1 -1
- package/dist/tools/memory-search.js +5 -0
- package/dist/tools/memory-search.js.map +1 -1
- package/dist/tools/memory-timeline.d.ts.map +1 -1
- package/dist/tools/memory-timeline.js +11 -2
- package/dist/tools/memory-timeline.js.map +1 -1
- package/dist/types.d.ts +2 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +1 -1
- package/dist/types.js.map +1 -1
- package/dist/viewer/html.d.ts +1 -1
- package/dist/viewer/html.d.ts.map +1 -1
- package/dist/viewer/html.js +233 -9
- package/dist/viewer/html.js.map +1 -1
- package/dist/viewer/server.d.ts +5 -0
- package/dist/viewer/server.d.ts.map +1 -1
- package/dist/viewer/server.js +383 -177
- package/dist/viewer/server.js.map +1 -1
- package/index.ts +26 -4
- package/package.json +2 -1
- package/src/capture/index.ts +39 -10
- package/src/index.ts +3 -2
- package/src/ingest/providers/anthropic.ts +22 -8
- package/src/ingest/providers/bedrock.ts +22 -8
- package/src/ingest/providers/gemini.ts +22 -8
- package/src/ingest/providers/index.ts +192 -142
- package/src/ingest/providers/openai.ts +37 -17
- package/src/ingest/task-processor.ts +183 -65
- package/src/ingest/worker.ts +98 -77
- package/src/shared/llm-call.ts +144 -0
- package/src/skill/evaluator.ts +35 -64
- package/src/skill/evolver.ts +201 -33
- package/src/skill/generator.ts +16 -59
- package/src/skill/upgrader.ts +5 -43
- package/src/skill/validator.ts +15 -47
- package/src/storage/sqlite.ts +88 -6
- package/src/tools/memory-get.ts +6 -1
- package/src/tools/memory-search.ts +6 -0
- package/src/tools/memory-timeline.ts +13 -1
- package/src/types.ts +2 -1
- package/src/viewer/html.ts +233 -9
- package/src/viewer/server.ts +368 -187
|
@@ -30,13 +30,16 @@ const SKIP_REASONS = {
|
|
|
30
30
|
export class TaskProcessor {
|
|
31
31
|
private summarizer: Summarizer;
|
|
32
32
|
private processing = false;
|
|
33
|
+
private pendingEvents: Array<{ sessionKey: string; latestTimestamp: number; owner: string }> = [];
|
|
34
|
+
private drainPromise: Promise<void> | null = null;
|
|
33
35
|
private onTaskCompletedCallback?: (task: Task) => void;
|
|
34
36
|
|
|
35
37
|
constructor(
|
|
36
38
|
private store: SqliteStore,
|
|
37
39
|
private ctx: PluginContext,
|
|
38
40
|
) {
|
|
39
|
-
|
|
41
|
+
const strongCfg = ctx.config.skillEvolution?.summarizer;
|
|
42
|
+
this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log, strongCfg);
|
|
40
43
|
}
|
|
41
44
|
|
|
42
45
|
onTaskCompleted(cb: (task: Task) => void): void {
|
|
@@ -48,25 +51,37 @@ export class TaskProcessor {
|
|
|
48
51
|
* Determines if a new task boundary was crossed and handles transition.
|
|
49
52
|
*/
|
|
50
53
|
async onChunksIngested(sessionKey: string, latestTimestamp: number, owner?: string): Promise<void> {
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
const resolvedOwner = owner ?? "agent:main";
|
|
55
|
+
this.ctx.log.debug(`TaskProcessor.onChunksIngested called session=${sessionKey} ts=${latestTimestamp} owner=${resolvedOwner} processing=${this.processing}`);
|
|
56
|
+
this.pendingEvents.push({ sessionKey, latestTimestamp, owner: resolvedOwner });
|
|
57
|
+
|
|
58
|
+
if (!this.drainPromise) {
|
|
59
|
+
this.drainPromise = this.drainPending();
|
|
55
60
|
}
|
|
61
|
+
|
|
62
|
+
await this.drainPromise;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
private async drainPending(): Promise<void> {
|
|
56
66
|
this.processing = true;
|
|
57
67
|
try {
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
68
|
+
while (this.pendingEvents.length > 0) {
|
|
69
|
+
const next = this.pendingEvents.shift()!;
|
|
70
|
+
try {
|
|
71
|
+
await this.detectAndProcess(next.sessionKey, next.latestTimestamp, next.owner);
|
|
72
|
+
} catch (err) {
|
|
73
|
+
this.ctx.log.error(`TaskProcessor error: ${err}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
61
76
|
} finally {
|
|
62
77
|
this.processing = false;
|
|
78
|
+
this.drainPromise = null;
|
|
63
79
|
}
|
|
64
80
|
}
|
|
65
81
|
|
|
66
82
|
private async detectAndProcess(sessionKey: string, latestTimestamp: number, owner: string): Promise<void> {
|
|
67
83
|
this.ctx.log.debug(`TaskProcessor.detectAndProcess session=${sessionKey} owner=${owner}`);
|
|
68
84
|
|
|
69
|
-
// Finalize any active tasks from OTHER sessions for the SAME owner (session change = task boundary)
|
|
70
85
|
const allActive = this.store.getAllActiveTasks(owner);
|
|
71
86
|
for (const t of allActive) {
|
|
72
87
|
if (t.sessionKey !== sessionKey) {
|
|
@@ -75,83 +90,177 @@ export class TaskProcessor {
|
|
|
75
90
|
}
|
|
76
91
|
}
|
|
77
92
|
|
|
78
|
-
|
|
93
|
+
let activeTask = this.store.getActiveTask(sessionKey, owner);
|
|
79
94
|
this.ctx.log.debug(`TaskProcessor.detectAndProcess activeTask=${activeTask?.id ?? "none"} owner=${owner}`);
|
|
80
95
|
|
|
81
96
|
if (!activeTask) {
|
|
82
|
-
|
|
83
|
-
|
|
97
|
+
// Create a new empty task — do NOT assign all chunks yet.
|
|
98
|
+
// processChunksIncrementally will assign them one turn at a time with boundary checks.
|
|
99
|
+
activeTask = await this.createNewTaskReturn(sessionKey, latestTimestamp, owner);
|
|
84
100
|
}
|
|
85
101
|
|
|
86
|
-
|
|
102
|
+
await this.processChunksIncrementally(activeTask, sessionKey, latestTimestamp, owner);
|
|
103
|
+
}
|
|
87
104
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
105
|
+
/**
|
|
106
|
+
* Process unassigned chunks one user-turn at a time.
|
|
107
|
+
*
|
|
108
|
+
* Strategy:
|
|
109
|
+
* - Need at least 1 user turn in the current task before starting LLM judgment
|
|
110
|
+
* (0 turns = no reference point for comparison).
|
|
111
|
+
* - Each subsequent user turn is individually checked against the full task context.
|
|
112
|
+
* - Time gap > 2h always triggers a split regardless of topic.
|
|
113
|
+
*/
|
|
114
|
+
private async processChunksIncrementally(
|
|
115
|
+
activeTask: Task,
|
|
116
|
+
sessionKey: string,
|
|
117
|
+
latestTimestamp: number,
|
|
118
|
+
owner: string,
|
|
119
|
+
): Promise<void> {
|
|
120
|
+
const unassigned = this.store.getUnassignedChunks(sessionKey);
|
|
121
|
+
if (unassigned.length === 0) return;
|
|
122
|
+
|
|
123
|
+
const taskChunks = this.store.getChunksByTask(activeTask.id);
|
|
124
|
+
|
|
125
|
+
// Time gap check against the earliest unassigned chunk
|
|
126
|
+
if (taskChunks.length > 0) {
|
|
127
|
+
const lastTaskTs = Math.max(...taskChunks.map((c) => c.createdAt));
|
|
128
|
+
const firstUnassignedTs = Math.min(...unassigned.map((c) => c.createdAt));
|
|
129
|
+
const gap = firstUnassignedTs - lastTaskTs;
|
|
130
|
+
if (gap > DEFAULTS.taskIdleTimeoutMs) {
|
|
131
|
+
this.ctx.log.info(
|
|
132
|
+
`Task boundary: time gap ${Math.round(gap / 60000)}min > ${Math.round(DEFAULTS.taskIdleTimeoutMs / 60000)}min`,
|
|
133
|
+
);
|
|
134
|
+
await this.finalizeTask(activeTask);
|
|
135
|
+
const newTask = await this.createNewTaskReturn(sessionKey, latestTimestamp, owner);
|
|
136
|
+
// Recurse with the new empty task so remaining unassigned chunks get boundary-checked too
|
|
137
|
+
return this.processChunksIncrementally(newTask, sessionKey, latestTimestamp, owner);
|
|
138
|
+
}
|
|
94
139
|
}
|
|
95
|
-
}
|
|
96
140
|
|
|
97
|
-
|
|
98
|
-
if (
|
|
141
|
+
const turns = this.groupIntoTurns(unassigned);
|
|
142
|
+
if (turns.length === 0) {
|
|
143
|
+
this.assignChunksToTask(unassigned, activeTask.id);
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
99
146
|
|
|
100
|
-
|
|
101
|
-
|
|
147
|
+
let currentTask = activeTask;
|
|
148
|
+
let currentTaskChunks = [...taskChunks];
|
|
102
149
|
|
|
103
|
-
|
|
104
|
-
|
|
150
|
+
for (let i = 0; i < turns.length; i++) {
|
|
151
|
+
const turn = turns[i];
|
|
152
|
+
const userChunk = turn.find((c) => c.role === "user");
|
|
105
153
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
return true;
|
|
112
|
-
}
|
|
154
|
+
if (!userChunk) {
|
|
155
|
+
this.assignChunksToTask(turn, currentTask.id);
|
|
156
|
+
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
113
159
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
160
|
+
// Time gap check per turn
|
|
161
|
+
if (currentTaskChunks.length > 0) {
|
|
162
|
+
const lastTs = Math.max(...currentTaskChunks.map((c) => c.createdAt));
|
|
163
|
+
if (userChunk.createdAt - lastTs > DEFAULTS.taskIdleTimeoutMs) {
|
|
164
|
+
this.ctx.log.info(`Task boundary at turn ${i}: time gap ${Math.round((userChunk.createdAt - lastTs) / 60000)}min`);
|
|
165
|
+
await this.finalizeTask(currentTask);
|
|
166
|
+
currentTask = await this.createNewTaskReturn(sessionKey, userChunk.createdAt, owner);
|
|
167
|
+
currentTaskChunks = [];
|
|
168
|
+
this.assignChunksToTask(turn, currentTask.id);
|
|
169
|
+
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
117
173
|
|
|
118
|
-
|
|
119
|
-
|
|
174
|
+
// Need at least 1 user turn before we can meaningfully judge topic shifts
|
|
175
|
+
const existingUserCount = currentTaskChunks.filter((c) => c.role === "user").length;
|
|
176
|
+
if (existingUserCount < 1) {
|
|
177
|
+
this.assignChunksToTask(turn, currentTask.id);
|
|
178
|
+
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
120
181
|
|
|
121
|
-
|
|
122
|
-
|
|
182
|
+
// LLM topic judgment — check this single user message against full task context
|
|
183
|
+
const context = this.buildContextSummary(currentTaskChunks);
|
|
184
|
+
const newMsg = userChunk.content.slice(0, 500);
|
|
185
|
+
this.ctx.log.info(`Topic judge: "${newMsg.slice(0, 60)}" vs ${existingUserCount} user turns`);
|
|
186
|
+
const isNew = await this.summarizer.judgeNewTopic(context, newMsg);
|
|
187
|
+
this.ctx.log.info(`Topic judge result: ${isNew === null ? "null(fallback)" : isNew ? "NEW" : "SAME"}`);
|
|
188
|
+
|
|
189
|
+
if (isNew === null) {
|
|
190
|
+
this.assignChunksToTask(turn, currentTask.id);
|
|
191
|
+
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
192
|
+
continue;
|
|
193
|
+
}
|
|
123
194
|
|
|
124
|
-
|
|
195
|
+
if (isNew) {
|
|
196
|
+
this.ctx.log.info(`Task boundary at turn ${i}: LLM judged new topic. Msg: "${newMsg.slice(0, 80)}..."`);
|
|
197
|
+
await this.finalizeTask(currentTask);
|
|
198
|
+
currentTask = await this.createNewTaskReturn(sessionKey, userChunk.createdAt, owner);
|
|
199
|
+
currentTaskChunks = [];
|
|
200
|
+
}
|
|
125
201
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
return false;
|
|
202
|
+
this.assignChunksToTask(turn, currentTask.id);
|
|
203
|
+
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
129
204
|
}
|
|
130
205
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
} else {
|
|
134
|
-
this.ctx.log.debug(`LLM judged SAME topic, continuing task=${activeTask.id}`);
|
|
135
|
-
}
|
|
206
|
+
this.store.updateTask(currentTask.id, { endedAt: undefined });
|
|
207
|
+
}
|
|
136
208
|
|
|
137
|
-
|
|
209
|
+
/**
|
|
210
|
+
* Group chunks into user-turns: each turn starts with a user message
|
|
211
|
+
* and includes all subsequent non-user messages until the next user message.
|
|
212
|
+
*/
|
|
213
|
+
private groupIntoTurns(chunks: Chunk[]): Chunk[][] {
|
|
214
|
+
const turns: Chunk[][] = [];
|
|
215
|
+
let current: Chunk[] = [];
|
|
216
|
+
|
|
217
|
+
for (const c of chunks) {
|
|
218
|
+
if (c.role === "user" && current.length > 0) {
|
|
219
|
+
turns.push(current);
|
|
220
|
+
current = [];
|
|
221
|
+
}
|
|
222
|
+
current.push(c);
|
|
223
|
+
}
|
|
224
|
+
if (current.length > 0) turns.push(current);
|
|
225
|
+
return turns;
|
|
138
226
|
}
|
|
139
227
|
|
|
140
228
|
/**
|
|
141
|
-
* Build
|
|
142
|
-
*
|
|
229
|
+
* Build context from existing task chunks for the LLM topic judge.
|
|
230
|
+
* Includes both the task's opening topic and recent exchanges,
|
|
231
|
+
* so the LLM understands both what the task was originally about
|
|
232
|
+
* and where the conversation currently is.
|
|
233
|
+
*
|
|
234
|
+
* For user messages, include full content (up to 500 chars) since
|
|
235
|
+
* they carry the topic signal. For assistant messages, use summary
|
|
236
|
+
* or truncated content since they mostly elaborate.
|
|
143
237
|
*/
|
|
144
238
|
private buildContextSummary(chunks: Chunk[]): string {
|
|
145
|
-
const
|
|
146
|
-
|
|
147
|
-
|
|
239
|
+
const conversational = chunks.filter((c) => c.role === "user" || c.role === "assistant");
|
|
240
|
+
if (conversational.length === 0) return "";
|
|
241
|
+
|
|
242
|
+
const formatChunk = (c: Chunk) => {
|
|
243
|
+
const label = c.role === "user" ? "User" : "Assistant";
|
|
244
|
+
const maxLen = c.role === "user" ? 500 : 200;
|
|
245
|
+
const text = c.summary || c.content.slice(0, maxLen);
|
|
246
|
+
return `[${label}]: ${text}`;
|
|
247
|
+
};
|
|
148
248
|
|
|
149
|
-
|
|
150
|
-
.map(
|
|
151
|
-
|
|
249
|
+
if (conversational.length <= 10) {
|
|
250
|
+
return conversational.map(formatChunk).join("\n");
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const opening = conversational.slice(0, 6).map(formatChunk);
|
|
254
|
+
const recent = conversational.slice(-4).map(formatChunk);
|
|
255
|
+
return [
|
|
256
|
+
"--- Task opening ---",
|
|
257
|
+
...opening,
|
|
258
|
+
"--- Recent exchanges ---",
|
|
259
|
+
...recent,
|
|
260
|
+
].join("\n");
|
|
152
261
|
}
|
|
153
262
|
|
|
154
|
-
private async
|
|
263
|
+
private async createNewTaskReturn(sessionKey: string, timestamp: number, owner: string = "agent:main"): Promise<Task> {
|
|
155
264
|
const taskId = uuid();
|
|
156
265
|
const task: Task = {
|
|
157
266
|
id: taskId,
|
|
@@ -165,20 +274,29 @@ export class TaskProcessor {
|
|
|
165
274
|
updatedAt: timestamp,
|
|
166
275
|
};
|
|
167
276
|
this.store.insertTask(task);
|
|
168
|
-
this.assignUnassignedChunks(sessionKey, taskId);
|
|
169
277
|
this.ctx.log.info(`Created new task=${taskId} session=${sessionKey}`);
|
|
278
|
+
return task;
|
|
170
279
|
}
|
|
171
280
|
|
|
172
|
-
private
|
|
173
|
-
const
|
|
174
|
-
|
|
281
|
+
private async createNewTask(sessionKey: string, timestamp: number, owner: string = "agent:main"): Promise<void> {
|
|
282
|
+
const task = await this.createNewTaskReturn(sessionKey, timestamp, owner);
|
|
283
|
+
this.assignUnassignedChunks(sessionKey, task.id);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
private assignChunksToTask(chunks: Chunk[], taskId: string): void {
|
|
287
|
+
for (const chunk of chunks) {
|
|
175
288
|
this.store.setChunkTaskId(chunk.id, taskId);
|
|
176
289
|
}
|
|
177
|
-
if (
|
|
178
|
-
this.ctx.log.debug(`Assigned ${
|
|
290
|
+
if (chunks.length > 0) {
|
|
291
|
+
this.ctx.log.debug(`Assigned ${chunks.length} chunks to task=${taskId}`);
|
|
179
292
|
}
|
|
180
293
|
}
|
|
181
294
|
|
|
295
|
+
private assignUnassignedChunks(sessionKey: string, taskId: string): void {
|
|
296
|
+
const unassigned = this.store.getUnassignedChunks(sessionKey);
|
|
297
|
+
this.assignChunksToTask(unassigned, taskId);
|
|
298
|
+
}
|
|
299
|
+
|
|
182
300
|
async finalizeTask(task: Task): Promise<void> {
|
|
183
301
|
const chunks = this.store.getChunksByTask(task.id);
|
|
184
302
|
const fallbackTitle = chunks.length > 0 ? this.extractTitle(chunks) : "";
|
package/src/ingest/worker.ts
CHANGED
|
@@ -19,7 +19,8 @@ export class IngestWorker {
|
|
|
19
19
|
private embedder: Embedder,
|
|
20
20
|
private ctx: PluginContext,
|
|
21
21
|
) {
|
|
22
|
-
|
|
22
|
+
const strongCfg = ctx.config.skillEvolution?.summarizer;
|
|
23
|
+
this.summarizer = new Summarizer(ctx.config.summarizer, ctx.log, strongCfg);
|
|
23
24
|
this.taskProcessor = new TaskProcessor(store, ctx);
|
|
24
25
|
}
|
|
25
26
|
|
|
@@ -45,73 +46,79 @@ export class IngestWorker {
|
|
|
45
46
|
|
|
46
47
|
private async processQueue(): Promise<void> {
|
|
47
48
|
this.processing = true;
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
49
|
+
|
|
50
|
+
try {
|
|
51
|
+
while (this.queue.length > 0) {
|
|
52
|
+
const t0 = performance.now();
|
|
53
|
+
const batchSize = this.queue.length;
|
|
54
|
+
let lastSessionKey: string | undefined;
|
|
55
|
+
let lastOwner: string | undefined;
|
|
56
|
+
let lastTimestamp = 0;
|
|
57
|
+
let stored = 0;
|
|
58
|
+
let skipped = 0;
|
|
59
|
+
let merged = 0;
|
|
60
|
+
let duplicated = 0;
|
|
61
|
+
let errors = 0;
|
|
62
|
+
const resultLines: string[] = [];
|
|
63
|
+
const inputLines: string[] = [];
|
|
64
|
+
|
|
65
|
+
while (this.queue.length > 0) {
|
|
66
|
+
const msg = this.queue.shift()!;
|
|
67
|
+
inputLines.push(`[${msg.role}] ${msg.content}`);
|
|
68
|
+
try {
|
|
69
|
+
const result = await this.ingestMessage(msg);
|
|
70
|
+
lastSessionKey = msg.sessionKey;
|
|
71
|
+
lastOwner = msg.owner ?? "agent:main";
|
|
72
|
+
lastTimestamp = Math.max(lastTimestamp, msg.timestamp);
|
|
73
|
+
if (result === "skipped") {
|
|
74
|
+
skipped++;
|
|
75
|
+
resultLines.push(`[${msg.role}] ⏭ exact-dup → ${msg.content}`);
|
|
76
|
+
} else if (result.action === "stored") {
|
|
77
|
+
stored++;
|
|
78
|
+
resultLines.push(`[${msg.role}] ✅ stored → ${result.summary ?? msg.content}`);
|
|
79
|
+
} else if (result.action === "duplicate") {
|
|
80
|
+
duplicated++;
|
|
81
|
+
resultLines.push(`[${msg.role}] 🔁 dedup(${result.reason ?? "similar"}) → ${msg.content}`);
|
|
82
|
+
} else if (result.action === "merged") {
|
|
83
|
+
merged++;
|
|
84
|
+
resultLines.push(`[${msg.role}] 🔀 merged → ${msg.content}`);
|
|
85
|
+
}
|
|
86
|
+
} catch (err) {
|
|
87
|
+
errors++;
|
|
88
|
+
resultLines.push(`[${msg.role}] ❌ error → ${msg.content}`);
|
|
89
|
+
this.ctx.log.error(`Failed to ingest message turn=${msg.turnId}: ${err}`);
|
|
90
|
+
}
|
|
82
91
|
}
|
|
83
|
-
} catch (err) {
|
|
84
|
-
errors++;
|
|
85
|
-
resultLines.push(`[${msg.role}] ❌ error → ${msg.content}`);
|
|
86
|
-
this.ctx.log.error(`Failed to ingest message turn=${msg.turnId}: ${err}`);
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
92
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if (stored + merged > 0 || skipped > 0 || duplicated > 0) {
|
|
93
|
-
this.store.recordToolCall("memory_add", dur, errors === 0);
|
|
94
|
-
try {
|
|
95
|
-
const inputInfo = {
|
|
96
|
-
session: lastSessionKey,
|
|
97
|
-
messages: totalMessages,
|
|
98
|
-
details: inputLines,
|
|
99
|
-
};
|
|
100
|
-
const stats = [`stored=${stored}`, skipped > 0 ? `skipped=${skipped}` : null, duplicated > 0 ? `dedup=${duplicated}` : null, merged > 0 ? `merged=${merged}` : null, errors > 0 ? `errors=${errors}` : null].filter(Boolean).join(", ");
|
|
101
|
-
this.store.recordApiLog("memory_add", inputInfo, `${stats}\n${resultLines.join("\n")}`, dur, errors === 0);
|
|
102
|
-
} catch (_) { /* best-effort */ }
|
|
103
|
-
}
|
|
93
|
+
const dur = performance.now() - t0;
|
|
104
94
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
95
|
+
if (stored + merged > 0 || skipped > 0 || duplicated > 0) {
|
|
96
|
+
this.store.recordToolCall("memory_add", dur, errors === 0);
|
|
97
|
+
try {
|
|
98
|
+
const inputInfo = {
|
|
99
|
+
session: lastSessionKey,
|
|
100
|
+
messages: batchSize,
|
|
101
|
+
details: inputLines,
|
|
102
|
+
};
|
|
103
|
+
const stats = [`stored=${stored}`, skipped > 0 ? `skipped=${skipped}` : null, duplicated > 0 ? `dedup=${duplicated}` : null, merged > 0 ? `merged=${merged}` : null, errors > 0 ? `errors=${errors}` : null].filter(Boolean).join(", ");
|
|
104
|
+
this.store.recordApiLog("memory_add", inputInfo, `${stats}\n${resultLines.join("\n")}`, dur, errors === 0);
|
|
105
|
+
} catch (_) { /* best-effort */ }
|
|
106
|
+
}
|
|
111
107
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
108
|
+
if (lastSessionKey) {
|
|
109
|
+
this.ctx.log.debug(`Calling TaskProcessor.onChunksIngested session=${lastSessionKey} ts=${lastTimestamp} owner=${lastOwner}`);
|
|
110
|
+
try {
|
|
111
|
+
await this.taskProcessor.onChunksIngested(lastSessionKey, lastTimestamp, lastOwner);
|
|
112
|
+
} catch (err) {
|
|
113
|
+
this.ctx.log.error(`TaskProcessor post-ingest error: ${err}`);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
} finally {
|
|
118
|
+
this.processing = false;
|
|
119
|
+
for (const resolve of this.flushResolvers) resolve();
|
|
120
|
+
this.flushResolvers = [];
|
|
121
|
+
}
|
|
115
122
|
}
|
|
116
123
|
|
|
117
124
|
private async ingestMessage(msg: ConversationMessage): Promise<
|
|
@@ -133,7 +140,7 @@ export class IngestWorker {
|
|
|
133
140
|
seq: number,
|
|
134
141
|
): Promise<{ action: "stored" | "duplicate" | "merged"; chunkId?: string; summary?: string; targetChunkId?: string; reason?: string }> {
|
|
135
142
|
const chunkId = uuid();
|
|
136
|
-
|
|
143
|
+
let summary = await this.summarizer.summarize(content);
|
|
137
144
|
|
|
138
145
|
let embedding: number[] | null = null;
|
|
139
146
|
try {
|
|
@@ -145,11 +152,23 @@ export class IngestWorker {
|
|
|
145
152
|
let dedupStatus: "active" | "duplicate" | "merged" = "active";
|
|
146
153
|
let dedupTarget: string | null = null;
|
|
147
154
|
let dedupReason: string | null = null;
|
|
155
|
+
let mergedFromOld: string | null = null;
|
|
156
|
+
|
|
157
|
+
// Fast path: exact content_hash match within same owner (agent dimension)
|
|
158
|
+
const chunkOwner = msg.owner ?? "agent:main";
|
|
159
|
+
const existingByHash = this.store.findActiveChunkByHash(content, chunkOwner);
|
|
160
|
+
if (existingByHash) {
|
|
161
|
+
this.ctx.log.debug(`Exact-dup (owner=${chunkOwner}): hash match → existing=${existingByHash}`);
|
|
162
|
+
this.store.recordMergeHit(existingByHash, "DUPLICATE", "exact content hash match");
|
|
163
|
+
dedupStatus = "duplicate";
|
|
164
|
+
dedupTarget = existingByHash;
|
|
165
|
+
dedupReason = "exact content hash match";
|
|
166
|
+
}
|
|
148
167
|
|
|
149
168
|
// Smart dedup: find Top-5 similar chunks, then ask LLM to judge
|
|
150
|
-
if (embedding) {
|
|
151
|
-
const similarThreshold = this.ctx.config.dedup?.similarityThreshold ?? 0.
|
|
152
|
-
const dedupOwnerFilter = msg.owner ? [msg.owner
|
|
169
|
+
if (dedupStatus === "active" && embedding) {
|
|
170
|
+
const similarThreshold = this.ctx.config.dedup?.similarityThreshold ?? 0.60;
|
|
171
|
+
const dedupOwnerFilter = msg.owner ? [msg.owner] : undefined;
|
|
153
172
|
const topSimilar = findTopSimilar(this.store, embedding, similarThreshold, 5, this.ctx.log, dedupOwnerFilter);
|
|
154
173
|
|
|
155
174
|
if (topSimilar.length > 0) {
|
|
@@ -182,19 +201,21 @@ export class IngestWorker {
|
|
|
182
201
|
const oldChunk = this.store.getChunk(targetChunkId);
|
|
183
202
|
const oldSummary = oldChunk?.summary ?? "";
|
|
184
203
|
this.store.recordMergeHit(targetChunkId, "UPDATE", dedupResult.reason, oldSummary, dedupResult.mergedSummary);
|
|
185
|
-
this.store.updateChunkSummaryAndContent(targetChunkId, dedupResult.mergedSummary, content);
|
|
186
204
|
|
|
205
|
+
summary = dedupResult.mergedSummary;
|
|
187
206
|
try {
|
|
188
|
-
const [newEmb] = await this.embedder.embed([
|
|
189
|
-
if (newEmb)
|
|
207
|
+
const [newEmb] = await this.embedder.embed([summary]);
|
|
208
|
+
if (newEmb) embedding = newEmb;
|
|
190
209
|
} catch (err) {
|
|
191
|
-
this.ctx.log.warn(`Re-embed after
|
|
210
|
+
this.ctx.log.warn(`Re-embed after merge failed: ${err}`);
|
|
192
211
|
}
|
|
193
212
|
|
|
194
|
-
|
|
195
|
-
|
|
213
|
+
this.store.markDedupStatus(targetChunkId, "merged", chunkId, dedupResult.reason);
|
|
214
|
+
this.store.deleteEmbedding(targetChunkId);
|
|
215
|
+
|
|
216
|
+
mergedFromOld = targetChunkId;
|
|
196
217
|
dedupReason = dedupResult.reason;
|
|
197
|
-
this.ctx.log.debug(`Smart dedup: UPDATE →
|
|
218
|
+
this.ctx.log.debug(`Smart dedup: UPDATE → old chunk=${targetChunkId} retired, new chunk=${chunkId} gets merged summary, reason: ${dedupResult.reason}`);
|
|
198
219
|
}
|
|
199
220
|
}
|
|
200
221
|
|
|
@@ -237,8 +258,8 @@ export class IngestWorker {
|
|
|
237
258
|
if (dedupStatus === "duplicate") {
|
|
238
259
|
return { action: "duplicate", summary, targetChunkId: dedupTarget ?? undefined, reason: dedupReason ?? undefined };
|
|
239
260
|
}
|
|
240
|
-
if (
|
|
241
|
-
return { action: "merged", summary, targetChunkId:
|
|
261
|
+
if (mergedFromOld) {
|
|
262
|
+
return { action: "merged", chunkId, summary, targetChunkId: mergedFromOld, reason: dedupReason ?? undefined };
|
|
242
263
|
}
|
|
243
264
|
return { action: "stored", chunkId, summary };
|
|
244
265
|
}
|