@memtensor/memos-local-openclaw-plugin 1.0.7 → 1.0.8-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -0
- package/index.ts +137 -87
- package/openclaw.plugin.json +1 -1
- package/package.json +4 -3
- package/scripts/postinstall.cjs +59 -25
- package/skill/memos-memory-guide/SKILL.md +5 -2
- package/src/client/hub.ts +11 -0
- package/src/hub/server.ts +13 -6
- package/src/ingest/providers/anthropic.ts +9 -6
- package/src/ingest/providers/bedrock.ts +9 -6
- package/src/ingest/providers/gemini.ts +9 -6
- package/src/ingest/providers/index.ts +136 -22
- package/src/ingest/providers/openai.ts +141 -6
- package/src/ingest/task-processor.ts +61 -41
- package/src/ingest/worker.ts +32 -11
- package/src/recall/engine.ts +2 -1
- package/src/shared/llm-call.ts +14 -1
- package/src/sharing/types.ts +1 -0
- package/src/storage/sqlite.ts +194 -11
- package/src/types.ts +3 -0
- package/src/viewer/html.ts +953 -281
- package/src/viewer/server.ts +305 -20
|
@@ -188,19 +188,26 @@ SAME — the new message:
|
|
|
188
188
|
- Reports a result, error, or feedback about the current task
|
|
189
189
|
- Discusses different tools or approaches for the SAME goal (e.g., learning English via BBC → via ChatGPT = SAME)
|
|
190
190
|
- Is a short acknowledgment (ok, thanks, 好的) in response to the current flow
|
|
191
|
+
- Is a follow-up, update, or different angle on the same news event, person, or story
|
|
192
|
+
- Shares the same core entity (person, company, event) even if the specific detail or angle differs
|
|
193
|
+
- Contains pronouns or references (那, 这, 它, 其中, 哪些, those, which, what about, etc.) pointing to items from the current conversation
|
|
194
|
+
- Asks about a sub-topic, tool, detail, dimension, or aspect of the current discussion topic
|
|
191
195
|
|
|
192
196
|
NEW — the new message:
|
|
193
|
-
- Introduces a subject from a DIFFERENT domain than the current task (e.g., tech → cooking, work → personal life, database → travel)
|
|
194
|
-
- Has NO logical connection to what was being discussed
|
|
197
|
+
- Introduces a subject from a COMPLETELY DIFFERENT domain than the current task (e.g., tech → cooking, work → personal life, database → travel)
|
|
198
|
+
- Has NO logical connection to what was being discussed — no shared entities, events, or themes
|
|
195
199
|
- Starts a request about a different project, system, or life area
|
|
196
200
|
- Begins with a new greeting/reset followed by a different topic
|
|
197
201
|
|
|
198
202
|
Key principles:
|
|
199
|
-
-
|
|
203
|
+
- Default to SAME unless the topic domain CLEARLY changed. When in doubt, choose SAME.
|
|
204
|
+
- CRITICAL: Short messages (under ~30 characters) that use pronouns or ask "what about X" / "哪些" / "那XX呢" are almost always follow-ups referring to the current topic. Only mark them NEW if they explicitly name a completely unrelated domain.
|
|
205
|
+
- If the new message mentions the same person, event, product, or entity as the current task, it is SAME regardless of the angle
|
|
200
206
|
- Different aspects of the SAME project/system are SAME (e.g., Nginx SSL → Nginx gzip = SAME)
|
|
201
|
-
-
|
|
202
|
-
-
|
|
203
|
-
-
|
|
207
|
+
- Asking about tools, systems, or methods for the current topic is SAME (e.g., "港股调研" → "那处理系统有哪些" = SAME; "数据分析" → "用什么工具" = SAME)
|
|
208
|
+
- Follow-up news about the same event is SAME (e.g., "博士失联" → "博士遗体被找到" = SAME; "产品发布" → "产品销量" = SAME)
|
|
209
|
+
- Different unrelated domains discussed independently are NEW (e.g., Redis config → cooking recipe = NEW)
|
|
210
|
+
- Examples: "配置Nginx" → "加gzip压缩" = SAME; "配置Nginx" → "做红烧肉" = NEW; "港股调研" → "那处理系统有哪些" = SAME; "部署服务器" → "年会安排" = NEW
|
|
204
211
|
|
|
205
212
|
Output exactly one word: NEW or SAME`;
|
|
206
213
|
|
|
@@ -246,6 +253,134 @@ export async function judgeNewTopicOpenAI(
|
|
|
246
253
|
return answer.startsWith("NEW");
|
|
247
254
|
}
|
|
248
255
|
|
|
256
|
+
// ─── Structured Topic Classifier ───
|
|
257
|
+
|
|
258
|
+
export interface TopicClassifyResult {
|
|
259
|
+
decision: "NEW" | "SAME";
|
|
260
|
+
confidence: number;
|
|
261
|
+
boundaryType: string;
|
|
262
|
+
reason: string; // may be empty for compact responses
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const TOPIC_CLASSIFIER_PROMPT = `Classify if NEW MESSAGE continues current task or starts an unrelated one.
|
|
266
|
+
Output ONLY JSON: {"d":"S"|"N","c":0.0-1.0}
|
|
267
|
+
d=S(same) or N(new). c=confidence. Default S. Only N if completely unrelated domain.
|
|
268
|
+
Sub-questions, tools, methods, details of current topic = S.`;
|
|
269
|
+
|
|
270
|
+
export async function classifyTopicOpenAI(
|
|
271
|
+
taskState: string,
|
|
272
|
+
newMessage: string,
|
|
273
|
+
cfg: SummarizerConfig,
|
|
274
|
+
log: Logger,
|
|
275
|
+
): Promise<TopicClassifyResult> {
|
|
276
|
+
const endpoint = normalizeChatEndpoint(cfg.endpoint ?? "https://api.openai.com/v1/chat/completions");
|
|
277
|
+
const model = cfg.model ?? "gpt-4o-mini";
|
|
278
|
+
const headers: Record<string, string> = {
|
|
279
|
+
"Content-Type": "application/json",
|
|
280
|
+
Authorization: `Bearer ${cfg.apiKey}`,
|
|
281
|
+
...cfg.headers,
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
const userContent = `TASK:\n${taskState}\n\nMSG:\n${newMessage}`;
|
|
285
|
+
|
|
286
|
+
const resp = await fetch(endpoint, {
|
|
287
|
+
method: "POST",
|
|
288
|
+
headers,
|
|
289
|
+
body: JSON.stringify(buildRequestBody(cfg, {
|
|
290
|
+
model,
|
|
291
|
+
temperature: 0,
|
|
292
|
+
max_tokens: 60,
|
|
293
|
+
messages: [
|
|
294
|
+
{ role: "system", content: TOPIC_CLASSIFIER_PROMPT },
|
|
295
|
+
{ role: "user", content: userContent },
|
|
296
|
+
],
|
|
297
|
+
})),
|
|
298
|
+
signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000),
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
if (!resp.ok) {
|
|
302
|
+
const body = await resp.text();
|
|
303
|
+
throw new Error(`OpenAI topic-classifier failed (${resp.status}): ${body}`);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const json = (await resp.json()) as { choices: Array<{ message: { content: string } }> };
|
|
307
|
+
const raw = json.choices[0]?.message?.content?.trim() ?? "";
|
|
308
|
+
log.debug(`Topic classifier raw: "${raw}"`);
|
|
309
|
+
|
|
310
|
+
return parseTopicClassifyResult(raw, log);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
const TOPIC_ARBITRATION_PROMPT = `A classifier flagged this message as possibly new topic (low confidence). Is it truly UNRELATED, or a sub-question/follow-up?
|
|
314
|
+
Tools/methods/details of current task = SAME. Shared entity/theme = SAME. Entirely different domain = NEW.
|
|
315
|
+
Reply one word: NEW or SAME`;
|
|
316
|
+
|
|
317
|
+
export async function arbitrateTopicSplitOpenAI(
|
|
318
|
+
taskState: string,
|
|
319
|
+
newMessage: string,
|
|
320
|
+
cfg: SummarizerConfig,
|
|
321
|
+
log: Logger,
|
|
322
|
+
): Promise<string> {
|
|
323
|
+
const endpoint = normalizeChatEndpoint(cfg.endpoint ?? "https://api.openai.com/v1/chat/completions");
|
|
324
|
+
const model = cfg.model ?? "gpt-4o-mini";
|
|
325
|
+
const headers: Record<string, string> = {
|
|
326
|
+
"Content-Type": "application/json",
|
|
327
|
+
Authorization: `Bearer ${cfg.apiKey}`,
|
|
328
|
+
...cfg.headers,
|
|
329
|
+
};
|
|
330
|
+
|
|
331
|
+
const userContent = `TASK:\n${taskState}\n\nMSG:\n${newMessage}`;
|
|
332
|
+
|
|
333
|
+
const resp = await fetch(endpoint, {
|
|
334
|
+
method: "POST",
|
|
335
|
+
headers,
|
|
336
|
+
body: JSON.stringify(buildRequestBody(cfg, {
|
|
337
|
+
model,
|
|
338
|
+
temperature: 0,
|
|
339
|
+
max_tokens: 10,
|
|
340
|
+
messages: [
|
|
341
|
+
{ role: "system", content: TOPIC_ARBITRATION_PROMPT },
|
|
342
|
+
{ role: "user", content: userContent },
|
|
343
|
+
],
|
|
344
|
+
})),
|
|
345
|
+
signal: AbortSignal.timeout(cfg.timeoutMs ?? 15_000),
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
if (!resp.ok) {
|
|
349
|
+
const body = await resp.text();
|
|
350
|
+
throw new Error(`OpenAI topic-arbitration failed (${resp.status}): ${body}`);
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
const json = (await resp.json()) as { choices: Array<{ message: { content: string } }> };
|
|
354
|
+
const answer = json.choices[0]?.message?.content?.trim().toUpperCase() ?? "";
|
|
355
|
+
log.debug(`Topic arbitration result: "${answer}"`);
|
|
356
|
+
return answer.startsWith("NEW") ? "NEW" : "SAME";
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
export function parseTopicClassifyResult(raw: string, log: Logger): TopicClassifyResult {
|
|
360
|
+
try {
|
|
361
|
+
const jsonMatch = raw.match(/\{[\s\S]*\}/);
|
|
362
|
+
if (jsonMatch) {
|
|
363
|
+
const p = JSON.parse(jsonMatch[0]);
|
|
364
|
+
const decision: "NEW" | "SAME" =
|
|
365
|
+
(p.d === "N" || p.decision === "NEW") ? "NEW" : "SAME";
|
|
366
|
+
const confidence: number =
|
|
367
|
+
typeof p.c === "number" ? p.c : typeof p.confidence === "number" ? p.confidence : 0.5;
|
|
368
|
+
return {
|
|
369
|
+
decision,
|
|
370
|
+
confidence,
|
|
371
|
+
boundaryType: p.boundaryType || "",
|
|
372
|
+
reason: p.reason || "",
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
} catch (err) {
|
|
376
|
+
log.debug(`Failed to parse topic classify JSON: ${err}`);
|
|
377
|
+
}
|
|
378
|
+
const upper = raw.toUpperCase();
|
|
379
|
+
if (upper.startsWith("NEW") || upper.startsWith("N"))
|
|
380
|
+
return { decision: "NEW", confidence: 0.5, boundaryType: "", reason: "parse fallback" };
|
|
381
|
+
return { decision: "SAME", confidence: 0.5, boundaryType: "", reason: "parse fallback" };
|
|
382
|
+
}
|
|
383
|
+
|
|
249
384
|
const FILTER_RELEVANT_PROMPT = `You are a memory relevance judge.
|
|
250
385
|
|
|
251
386
|
Given a QUERY and CANDIDATE memories, decide: does each candidate's content contain information that would HELP ANSWER the query?
|
|
@@ -51,6 +51,9 @@ export class TaskProcessor {
|
|
|
51
51
|
* Determines if a new task boundary was crossed and handles transition.
|
|
52
52
|
*/
|
|
53
53
|
async onChunksIngested(sessionKey: string, latestTimestamp: number, owner?: string): Promise<void> {
|
|
54
|
+
if (sessionKey.startsWith("temp:") || sessionKey.startsWith("internal:") || sessionKey.startsWith("system:")) {
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
54
57
|
const resolvedOwner = owner ?? "agent:main";
|
|
55
58
|
this.ctx.log.debug(`TaskProcessor.onChunksIngested called session=${sessionKey} ts=${latestTimestamp} owner=${resolvedOwner} processing=${this.processing}`);
|
|
56
59
|
this.pendingEvents.push({ sessionKey, latestTimestamp, owner: resolvedOwner });
|
|
@@ -79,13 +82,19 @@ export class TaskProcessor {
|
|
|
79
82
|
}
|
|
80
83
|
}
|
|
81
84
|
|
|
85
|
+
private static extractAgentPrefix(sessionKey: string): string {
|
|
86
|
+
const parts = sessionKey.split(":");
|
|
87
|
+
return parts.length >= 3 ? parts.slice(0, 3).join(":") : sessionKey;
|
|
88
|
+
}
|
|
89
|
+
|
|
82
90
|
private async detectAndProcess(sessionKey: string, latestTimestamp: number, owner: string): Promise<void> {
|
|
83
91
|
this.ctx.log.debug(`TaskProcessor.detectAndProcess session=${sessionKey} owner=${owner}`);
|
|
84
92
|
|
|
93
|
+
const currentAgentPrefix = TaskProcessor.extractAgentPrefix(sessionKey);
|
|
85
94
|
const allActive = this.store.getAllActiveTasks(owner);
|
|
86
95
|
for (const t of allActive) {
|
|
87
|
-
if (t.sessionKey !== sessionKey) {
|
|
88
|
-
this.ctx.log.info(`Session changed: finalizing task=${t.id} from session=${t.sessionKey} (owner=${owner})`);
|
|
96
|
+
if (t.sessionKey !== sessionKey && TaskProcessor.extractAgentPrefix(t.sessionKey) === currentAgentPrefix) {
|
|
97
|
+
this.ctx.log.info(`Session changed within agent: finalizing task=${t.id} from session=${t.sessionKey} (owner=${owner})`);
|
|
89
98
|
await this.finalizeTask(t);
|
|
90
99
|
}
|
|
91
100
|
}
|
|
@@ -179,26 +188,36 @@ export class TaskProcessor {
|
|
|
179
188
|
continue;
|
|
180
189
|
}
|
|
181
190
|
|
|
182
|
-
//
|
|
183
|
-
const
|
|
191
|
+
// Structured topic classification
|
|
192
|
+
const taskState = this.buildTopicJudgeState(currentTaskChunks, userChunk);
|
|
184
193
|
const newMsg = userChunk.content.slice(0, 500);
|
|
185
|
-
this.ctx.log.info(`Topic
|
|
186
|
-
const
|
|
187
|
-
this.ctx.log.info(`Topic
|
|
194
|
+
this.ctx.log.info(`Topic classify: "${newMsg.slice(0, 60)}" vs ${existingUserCount} user turns`);
|
|
195
|
+
const result = await this.summarizer.classifyTopic(taskState, newMsg);
|
|
196
|
+
this.ctx.log.info(`Topic classify: decision=${result?.decision ?? "null"} confidence=${result?.confidence ?? "?"} type=${result?.boundaryType ?? "?"} reason=${result?.reason ?? ""}`);
|
|
188
197
|
|
|
189
|
-
if (
|
|
198
|
+
if (!result || result.decision === "SAME") {
|
|
190
199
|
this.assignChunksToTask(turn, currentTask.id);
|
|
191
200
|
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
192
201
|
continue;
|
|
193
202
|
}
|
|
194
203
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
204
|
+
// Low-confidence NEW: second-pass arbitration
|
|
205
|
+
if (result.confidence < 0.65) {
|
|
206
|
+
this.ctx.log.info(`Low confidence NEW (${result.confidence}), running second-pass arbitration...`);
|
|
207
|
+
const secondResult = await this.summarizer.arbitrateTopicSplit(taskState, newMsg);
|
|
208
|
+
this.ctx.log.info(`Second-pass result: ${secondResult ?? "null(fallback->SAME)"}`);
|
|
209
|
+
if (!secondResult || secondResult !== "NEW") {
|
|
210
|
+
this.assignChunksToTask(turn, currentTask.id);
|
|
211
|
+
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
200
214
|
}
|
|
201
215
|
|
|
216
|
+
this.ctx.log.info(`Task boundary at turn ${i}: classifier judged NEW (confidence=${result.confidence}). Msg: "${newMsg.slice(0, 80)}..."`);
|
|
217
|
+
await this.finalizeTask(currentTask);
|
|
218
|
+
currentTask = await this.createNewTaskReturn(sessionKey, userChunk.createdAt, owner);
|
|
219
|
+
currentTaskChunks = [];
|
|
220
|
+
|
|
202
221
|
this.assignChunksToTask(turn, currentTask.id);
|
|
203
222
|
currentTaskChunks = currentTaskChunks.concat(turn);
|
|
204
223
|
}
|
|
@@ -226,38 +245,39 @@ export class TaskProcessor {
|
|
|
226
245
|
}
|
|
227
246
|
|
|
228
247
|
/**
|
|
229
|
-
* Build
|
|
230
|
-
* Includes
|
|
231
|
-
*
|
|
232
|
-
* and where the conversation currently is.
|
|
233
|
-
*
|
|
234
|
-
* For user messages, include full content (up to 500 chars) since
|
|
235
|
-
* they carry the topic signal. For assistant messages, use summary
|
|
236
|
-
* or truncated content since they mostly elaborate.
|
|
248
|
+
* Build compact task state for the LLM topic classifier.
|
|
249
|
+
* Includes: topic (first user msg), last 3 turn summaries,
|
|
250
|
+
* and optional assistant snippet for short/ambiguous messages.
|
|
237
251
|
*/
|
|
238
|
-
private
|
|
239
|
-
const
|
|
240
|
-
if (
|
|
241
|
-
|
|
242
|
-
const
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
252
|
+
private buildTopicJudgeState(chunks: Chunk[], newUserChunk: Chunk): string {
|
|
253
|
+
const conv = chunks.filter((c) => c.role === "user" || c.role === "assistant");
|
|
254
|
+
if (conv.length === 0) return "";
|
|
255
|
+
|
|
256
|
+
const firstUser = conv.find((c) => c.role === "user");
|
|
257
|
+
const topic = firstUser?.summary || firstUser?.content.slice(0, 80) || "";
|
|
258
|
+
|
|
259
|
+
const turns: Array<{ u: string; a: string }> = [];
|
|
260
|
+
for (let j = 0; j < conv.length; j++) {
|
|
261
|
+
if (conv[j].role === "user") {
|
|
262
|
+
const u = conv[j].summary || conv[j].content.slice(0, 60);
|
|
263
|
+
const nextA = conv[j + 1]?.role === "assistant" ? conv[j + 1] : null;
|
|
264
|
+
const a = nextA ? (nextA.summary || nextA.content.slice(0, 60)) : "";
|
|
265
|
+
turns.push({ u, a });
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
const recent = turns.slice(-3);
|
|
270
|
+
const turnLines = recent.map((t, i) => `${i + 1}. U:${t.u} A:${t.a}`);
|
|
248
271
|
|
|
249
|
-
|
|
250
|
-
|
|
272
|
+
let snippet = "";
|
|
273
|
+
if (newUserChunk.content.length < 30 || /^[那这它其还哪啥]/.test(newUserChunk.content.trim())) {
|
|
274
|
+
const lastA = [...conv].reverse().find((c) => c.role === "assistant");
|
|
275
|
+
if (lastA) snippet = lastA.content.slice(0, 200);
|
|
251
276
|
}
|
|
252
277
|
|
|
253
|
-
const
|
|
254
|
-
|
|
255
|
-
return
|
|
256
|
-
"--- Task opening ---",
|
|
257
|
-
...opening,
|
|
258
|
-
"--- Recent exchanges ---",
|
|
259
|
-
...recent,
|
|
260
|
-
].join("\n");
|
|
278
|
+
const parts = [`topic:${topic}`, ...turnLines];
|
|
279
|
+
if (snippet) parts.push(`lastA:${snippet}`);
|
|
280
|
+
return parts.join("\n");
|
|
261
281
|
}
|
|
262
282
|
|
|
263
283
|
private async createNewTaskReturn(sessionKey: string, timestamp: number, owner: string = "agent:main"): Promise<Task> {
|
package/src/ingest/worker.ts
CHANGED
|
@@ -25,8 +25,14 @@ export class IngestWorker {
|
|
|
25
25
|
|
|
26
26
|
getTaskProcessor(): TaskProcessor { return this.taskProcessor; }
|
|
27
27
|
|
|
28
|
+
private static isEphemeralSession(sessionKey: string): boolean {
|
|
29
|
+
return sessionKey.startsWith("temp:") || sessionKey.startsWith("internal:") || sessionKey.startsWith("system:");
|
|
30
|
+
}
|
|
31
|
+
|
|
28
32
|
enqueue(messages: ConversationMessage[]): void {
|
|
29
|
-
|
|
33
|
+
const filtered = messages.filter((m) => !IngestWorker.isEphemeralSession(m.sessionKey));
|
|
34
|
+
if (filtered.length === 0) return;
|
|
35
|
+
this.queue.push(...filtered);
|
|
30
36
|
if (!this.processing) {
|
|
31
37
|
this.processQueue().catch((err) => {
|
|
32
38
|
this.ctx.log.error(`Ingest worker error: ${err}`);
|
|
@@ -150,14 +156,23 @@ export class IngestWorker {
|
|
|
150
156
|
let mergeHistory = "[]";
|
|
151
157
|
|
|
152
158
|
// Fast path: exact content_hash match within same owner (agent dimension)
|
|
159
|
+
// Strategy: retire the OLD chunk, keep the NEW one active (latest wins)
|
|
153
160
|
const chunkOwner = msg.owner ?? "agent:main";
|
|
154
161
|
const existingByHash = this.store.findActiveChunkByHash(content, chunkOwner);
|
|
155
162
|
if (existingByHash) {
|
|
156
|
-
this.ctx.log.debug(`Exact-dup (owner=${chunkOwner}): hash match →
|
|
163
|
+
this.ctx.log.debug(`Exact-dup (owner=${chunkOwner}): hash match → retiring old=${existingByHash}, keeping new=${chunkId}`);
|
|
157
164
|
this.store.recordMergeHit(existingByHash, "DUPLICATE", "exact content hash match");
|
|
158
|
-
|
|
159
|
-
|
|
165
|
+
const oldChunk = this.store.getChunk(existingByHash);
|
|
166
|
+
this.store.markDedupStatus(existingByHash, "duplicate", chunkId, "exact content hash match");
|
|
167
|
+
this.store.deleteEmbedding(existingByHash);
|
|
168
|
+
mergedFromOld = existingByHash;
|
|
160
169
|
dedupReason = "exact content hash match";
|
|
170
|
+
if (oldChunk) {
|
|
171
|
+
const oldHistory = JSON.parse(oldChunk.mergeHistory || "[]");
|
|
172
|
+
oldHistory.push({ action: "duplicate_superseded", at: Date.now(), reason: "exact content hash match", sourceChunkId: existingByHash });
|
|
173
|
+
mergeHistory = JSON.stringify(oldHistory);
|
|
174
|
+
mergeCount = (oldChunk.mergeCount || 0) + 1;
|
|
175
|
+
}
|
|
161
176
|
}
|
|
162
177
|
|
|
163
178
|
// Smart dedup: find Top-5 similar chunks, then ask LLM to judge
|
|
@@ -173,8 +188,9 @@ export class IngestWorker {
|
|
|
173
188
|
index: i + 1,
|
|
174
189
|
summary: chunk?.summary ?? "",
|
|
175
190
|
chunkId: s.chunkId,
|
|
191
|
+
role: chunk?.role,
|
|
176
192
|
};
|
|
177
|
-
}).filter(c => c.summary);
|
|
193
|
+
}).filter(c => c.summary && c.role === msg.role);
|
|
178
194
|
|
|
179
195
|
if (candidates.length > 0) {
|
|
180
196
|
const dedupResult = await this.summarizer.judgeDedup(summary, candidates);
|
|
@@ -183,10 +199,18 @@ export class IngestWorker {
|
|
|
183
199
|
const targetChunkId = candidates[dedupResult.targetIndex - 1]?.chunkId;
|
|
184
200
|
if (targetChunkId) {
|
|
185
201
|
this.store.recordMergeHit(targetChunkId, "DUPLICATE", dedupResult.reason);
|
|
186
|
-
|
|
187
|
-
|
|
202
|
+
const oldChunk = this.store.getChunk(targetChunkId);
|
|
203
|
+
this.store.markDedupStatus(targetChunkId, "duplicate", chunkId, dedupResult.reason);
|
|
204
|
+
this.store.deleteEmbedding(targetChunkId);
|
|
205
|
+
mergedFromOld = targetChunkId;
|
|
188
206
|
dedupReason = dedupResult.reason;
|
|
189
|
-
|
|
207
|
+
if (oldChunk) {
|
|
208
|
+
const oldHistory = JSON.parse(oldChunk.mergeHistory || "[]");
|
|
209
|
+
oldHistory.push({ action: "duplicate_superseded", at: Date.now(), reason: dedupResult.reason, sourceChunkId: targetChunkId });
|
|
210
|
+
mergeHistory = JSON.stringify(oldHistory);
|
|
211
|
+
mergeCount = (oldChunk.mergeCount || 0) + 1;
|
|
212
|
+
}
|
|
213
|
+
this.ctx.log.debug(`Smart dedup: DUPLICATE → retiring old=${targetChunkId}, keeping new=${chunkId} active, reason: ${dedupResult.reason}`);
|
|
190
214
|
}
|
|
191
215
|
}
|
|
192
216
|
|
|
@@ -266,9 +290,6 @@ export class IngestWorker {
|
|
|
266
290
|
}
|
|
267
291
|
this.ctx.log.debug(`Stored chunk=${chunkId} kind=${kind} role=${msg.role} dedup=${dedupStatus} len=${content.length} hasVec=${!!embedding && dedupStatus === "active"}`);
|
|
268
292
|
|
|
269
|
-
if (dedupStatus === "duplicate") {
|
|
270
|
-
return { action: "duplicate", summary, targetChunkId: dedupTarget ?? undefined, reason: dedupReason ?? undefined };
|
|
271
|
-
}
|
|
272
293
|
if (mergedFromOld) {
|
|
273
294
|
return { action: "merged", chunkId, summary, targetChunkId: mergedFromOld, reason: dedupReason ?? undefined };
|
|
274
295
|
}
|
package/src/recall/engine.ts
CHANGED
|
@@ -77,7 +77,7 @@ export class RecallEngine {
|
|
|
77
77
|
}
|
|
78
78
|
const shortTerms = [...new Set([...spaceSplit, ...cjkBigrams])];
|
|
79
79
|
const patternHits = shortTerms.length > 0
|
|
80
|
-
? this.store.patternSearch(shortTerms, { limit: candidatePool })
|
|
80
|
+
? this.store.patternSearch(shortTerms, { limit: candidatePool, ownerFilter })
|
|
81
81
|
: [];
|
|
82
82
|
const patternRanked = patternHits.map((h, i) => ({
|
|
83
83
|
id: h.chunkId,
|
|
@@ -234,6 +234,7 @@ export class RecallEngine {
|
|
|
234
234
|
score: Math.round(candidate.score * 1000) / 1000,
|
|
235
235
|
taskId: chunk.taskId,
|
|
236
236
|
skillId: chunk.skillId,
|
|
237
|
+
owner: chunk.owner,
|
|
237
238
|
origin: chunk.owner === "public" ? "local-shared" : "local",
|
|
238
239
|
source: {
|
|
239
240
|
ts: chunk.createdAt,
|
package/src/shared/llm-call.ts
CHANGED
|
@@ -2,6 +2,19 @@ import * as fs from "fs";
|
|
|
2
2
|
import * as path from "path";
|
|
3
3
|
import type { SummarizerConfig, SummaryProvider, Logger, PluginContext, OpenClawAPI } from "../types";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Resolve a SecretInput (string | SecretRef) to a plain string.
|
|
7
|
+
* Supports env-sourced SecretRef from OpenClaw's credential system.
|
|
8
|
+
*/
|
|
9
|
+
function resolveApiKey(
|
|
10
|
+
input: string | { source: string; provider?: string; id: string } | undefined,
|
|
11
|
+
): string | undefined {
|
|
12
|
+
if (!input) return undefined;
|
|
13
|
+
if (typeof input === "string") return input;
|
|
14
|
+
if (input.source === "env") return process.env[input.id];
|
|
15
|
+
return undefined;
|
|
16
|
+
}
|
|
17
|
+
|
|
5
18
|
/**
|
|
6
19
|
* Detect provider type from provider key name or base URL.
|
|
7
20
|
*/
|
|
@@ -56,7 +69,7 @@ export function loadOpenClawFallbackConfig(log: Logger): SummarizerConfig | unde
|
|
|
56
69
|
if (!providerCfg) return undefined;
|
|
57
70
|
|
|
58
71
|
const baseUrl: string | undefined = providerCfg.baseUrl;
|
|
59
|
-
const apiKey
|
|
72
|
+
const apiKey = resolveApiKey(providerCfg.apiKey);
|
|
60
73
|
if (!baseUrl || !apiKey) return undefined;
|
|
61
74
|
|
|
62
75
|
const provider = detectProvider(providerKey, baseUrl);
|