@rubytech/taskmaster 1.0.38 → 1.0.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auto-reply/reply/dispatch-from-config.js +53 -1
- package/dist/auto-reply/reply/get-reply-run.js +2 -1
- package/dist/browser/chrome.js +26 -3
- package/dist/build-info.json +3 -3
- package/dist/control-ui/assets/index-BfV0Mtl7.css +1 -0
- package/dist/control-ui/assets/index-RlAacvDz.js +2944 -0
- package/dist/control-ui/assets/index-RlAacvDz.js.map +1 -0
- package/dist/control-ui/index.html +2 -2
- package/dist/gateway/chat-sanitize.js +24 -7
- package/dist/gateway/server-methods/chat.js +100 -26
- package/dist/gateway/server-methods/memory.js +36 -0
- package/dist/hooks/bundled/conversation-archive/handler.js +15 -2
- package/dist/memory/hybrid.js +28 -1
- package/dist/memory/internal.js +136 -47
- package/dist/memory/manager.js +37 -3
- package/dist/memory/memory-schema.js +18 -0
- package/package.json +1 -1
- package/taskmaster-docs/USER-GUIDE.md +51 -8
- package/dist/control-ui/assets/index-B0Q2Wmm1.js +0 -2892
- package/dist/control-ui/assets/index-B0Q2Wmm1.js.map +0 -1
- package/dist/control-ui/assets/index-DkMDU6zX.css +0 -1
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
<title>Taskmaster Control</title>
|
|
7
7
|
<meta name="color-scheme" content="dark light" />
|
|
8
8
|
<link rel="icon" type="image/png" href="./favicon.png" />
|
|
9
|
-
<script type="module" crossorigin src="./assets/index-
|
|
10
|
-
<link rel="stylesheet" crossorigin href="./assets/index-
|
|
9
|
+
<script type="module" crossorigin src="./assets/index-RlAacvDz.js"></script>
|
|
10
|
+
<link rel="stylesheet" crossorigin href="./assets/index-BfV0Mtl7.css">
|
|
11
11
|
</head>
|
|
12
12
|
<body>
|
|
13
13
|
<taskmaster-app></taskmaster-app>
|
|
@@ -15,6 +15,9 @@ const ENVELOPE_CHANNELS = [
|
|
|
15
15
|
"BlueBubbles",
|
|
16
16
|
];
|
|
17
17
|
const MESSAGE_ID_LINE = /^\s*\[message_id:\s*[^\]]+\]\s*$/i;
|
|
18
|
+
// Internal annotations prepended by buildInboundMediaNote / get-reply-run
|
|
19
|
+
const MEDIA_ATTACHED_LINE = /^\s*\[media attached(?:\s+\d+\/\d+)?:\s*[^\]]+\]\s*$/i;
|
|
20
|
+
const MEDIA_REPLY_HINT = /^\s*To send an image back, prefer the message tool\b/;
|
|
18
21
|
function looksLikeEnvelopeHeader(header) {
|
|
19
22
|
if (/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}Z\b/.test(header))
|
|
20
23
|
return true;
|
|
@@ -23,13 +26,16 @@ function looksLikeEnvelopeHeader(header) {
|
|
|
23
26
|
return ENVELOPE_CHANNELS.some((label) => header.startsWith(`${label} `));
|
|
24
27
|
}
|
|
25
28
|
export function stripEnvelope(text) {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
29
|
+
let result = text;
|
|
30
|
+
const match = result.match(ENVELOPE_PREFIX);
|
|
31
|
+
if (match) {
|
|
32
|
+
const header = match[1] ?? "";
|
|
33
|
+
if (looksLikeEnvelopeHeader(header)) {
|
|
34
|
+
result = result.slice(match[0].length);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
result = stripMediaAnnotations(result);
|
|
38
|
+
return result;
|
|
33
39
|
}
|
|
34
40
|
function stripMessageIdHints(text) {
|
|
35
41
|
if (!text.includes("[message_id:"))
|
|
@@ -38,6 +44,17 @@ function stripMessageIdHints(text) {
|
|
|
38
44
|
const filtered = lines.filter((line) => !MESSAGE_ID_LINE.test(line));
|
|
39
45
|
return filtered.length === lines.length ? text : filtered.join("\n");
|
|
40
46
|
}
|
|
47
|
+
function stripMediaAnnotations(text) {
|
|
48
|
+
if (!text.includes("[media attached"))
|
|
49
|
+
return text;
|
|
50
|
+
const lines = text.split(/\r?\n/);
|
|
51
|
+
const filtered = lines.filter((line) => !MEDIA_ATTACHED_LINE.test(line) && !MEDIA_REPLY_HINT.test(line));
|
|
52
|
+
if (filtered.length === lines.length)
|
|
53
|
+
return text;
|
|
54
|
+
// Also strip the "[media attached: N files]" header line
|
|
55
|
+
const result = filtered.filter((line) => !/^\s*\[media attached:\s*\d+\s+files?\]\s*$/i.test(line));
|
|
56
|
+
return result.join("\n").trim();
|
|
57
|
+
}
|
|
41
58
|
function stripEnvelopeFromContent(content) {
|
|
42
59
|
let changed = false;
|
|
43
60
|
const next = content.map((item) => {
|
|
@@ -10,9 +10,9 @@ import { dispatchInboundMessage } from "../../auto-reply/dispatch.js";
|
|
|
10
10
|
import { createReplyDispatcher } from "../../auto-reply/reply/reply-dispatcher.js";
|
|
11
11
|
import { extractShortModelName, } from "../../auto-reply/reply/response-prefix-template.js";
|
|
12
12
|
import { resolveSendPolicy } from "../../sessions/send-policy.js";
|
|
13
|
+
import { createInternalHookEvent, triggerInternalHook } from "../../hooks/internal-hooks.js";
|
|
13
14
|
import { INTERNAL_MESSAGE_CHANNEL } from "../../utils/message-channel.js";
|
|
14
15
|
import { abortChatRunById, abortChatRunsForSessionKey, isChatStopCommandText, resolveChatRunExpiresAtMs, } from "../chat-abort.js";
|
|
15
|
-
import { parseMessageWithAttachments } from "../chat-attachments.js";
|
|
16
16
|
import { ErrorCodes, errorShape, formatValidationErrors, validateChatAbortParams, validateChatHistoryParams, validateChatInjectParams, validateChatSendParams, } from "../protocol/index.js";
|
|
17
17
|
import { getMaxChatHistoryMessagesBytes } from "../server-constants.js";
|
|
18
18
|
import { capArrayByJsonBytes, loadSessionEntry, readSessionMessages, resolveSessionModelRef, } from "../session-utils.js";
|
|
@@ -250,35 +250,74 @@ export const chatHandlers = {
|
|
|
250
250
|
// Separate document attachments (PDFs, text files) from image attachments
|
|
251
251
|
const imageAttachments = normalizedAttachments.filter((a) => a.type !== "document");
|
|
252
252
|
const documentAttachments = normalizedAttachments.filter((a) => a.type === "document");
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
const parsed = await parseMessageWithAttachments(p.message, imageAttachments, {
|
|
258
|
-
maxBytes: 5_000_000,
|
|
259
|
-
log: context.logGateway,
|
|
260
|
-
});
|
|
261
|
-
parsedMessage = parsed.message;
|
|
262
|
-
parsedImages = parsed.images;
|
|
263
|
-
}
|
|
264
|
-
catch (err) {
|
|
265
|
-
respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, String(err)));
|
|
266
|
-
return;
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
// Save document attachments to workspace uploads dir (persistent, accessible by agent)
|
|
270
|
-
const savedDocPaths = [];
|
|
271
|
-
if (documentAttachments.length > 0) {
|
|
253
|
+
// Resolve workspace uploads dir for all attachments (persistent, no TTL).
|
|
254
|
+
// Both images and documents are saved as plain files — same as every other channel.
|
|
255
|
+
let uploadsDir = null;
|
|
256
|
+
if (normalizedAttachments.length > 0) {
|
|
272
257
|
const { cfg: sessionCfg } = loadSessionEntry(p.sessionKey);
|
|
273
258
|
const agentId = resolveSessionAgentId({ sessionKey: p.sessionKey, config: sessionCfg });
|
|
274
259
|
const workspaceDir = resolveAgentWorkspaceDir(sessionCfg, agentId);
|
|
275
|
-
|
|
260
|
+
uploadsDir = path.join(workspaceDir, "uploads");
|
|
276
261
|
try {
|
|
277
262
|
fs.mkdirSync(uploadsDir, { recursive: true });
|
|
278
263
|
}
|
|
279
264
|
catch {
|
|
280
265
|
/* ignore if exists */
|
|
281
266
|
}
|
|
267
|
+
}
|
|
268
|
+
// Save image attachments to workspace uploads dir (persistent, accessible by agent).
|
|
269
|
+
// The agent runner detects file path references via [media attached: ...] and
|
|
270
|
+
// loads them from disk at inference time — no inline base64 in transcripts.
|
|
271
|
+
const savedImagePaths = [];
|
|
272
|
+
const savedImageTypes = [];
|
|
273
|
+
if (imageAttachments.length > 0 && uploadsDir) {
|
|
274
|
+
for (const att of imageAttachments) {
|
|
275
|
+
if (!att.content || typeof att.content !== "string")
|
|
276
|
+
continue;
|
|
277
|
+
try {
|
|
278
|
+
let b64 = att.content.trim();
|
|
279
|
+
const dataUrlMatch = /^data:[^;]+;base64,(.*)$/.exec(b64);
|
|
280
|
+
if (dataUrlMatch)
|
|
281
|
+
b64 = dataUrlMatch[1];
|
|
282
|
+
const buffer = Buffer.from(b64, "base64");
|
|
283
|
+
// Derive extension from mime type
|
|
284
|
+
const mimeBase = att.mimeType?.split(";")[0]?.trim();
|
|
285
|
+
const extMap = {
|
|
286
|
+
"image/jpeg": ".jpg",
|
|
287
|
+
"image/png": ".png",
|
|
288
|
+
"image/gif": ".gif",
|
|
289
|
+
"image/webp": ".webp",
|
|
290
|
+
"image/heic": ".heic",
|
|
291
|
+
"image/heif": ".heif",
|
|
292
|
+
"image/svg+xml": ".svg",
|
|
293
|
+
"image/avif": ".avif",
|
|
294
|
+
};
|
|
295
|
+
const ext = (mimeBase && extMap[mimeBase]) ?? ".jpg";
|
|
296
|
+
const uuid = randomUUID();
|
|
297
|
+
let safeName;
|
|
298
|
+
if (att.fileName) {
|
|
299
|
+
const base = path
|
|
300
|
+
.parse(att.fileName)
|
|
301
|
+
.name.replace(/[^a-zA-Z0-9._-]/g, "_")
|
|
302
|
+
.slice(0, 60);
|
|
303
|
+
safeName = base ? `${base}---${uuid}${ext}` : `${uuid}${ext}`;
|
|
304
|
+
}
|
|
305
|
+
else {
|
|
306
|
+
safeName = `${uuid}${ext}`;
|
|
307
|
+
}
|
|
308
|
+
const destPath = path.join(uploadsDir, safeName);
|
|
309
|
+
fs.writeFileSync(destPath, buffer);
|
|
310
|
+
savedImagePaths.push(destPath);
|
|
311
|
+
savedImageTypes.push(mimeBase ?? "image/png");
|
|
312
|
+
}
|
|
313
|
+
catch (err) {
|
|
314
|
+
context.logGateway.warn(`chat image save failed: ${String(err)}`);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// Save document attachments to workspace uploads dir (persistent, accessible by agent)
|
|
319
|
+
const savedDocPaths = [];
|
|
320
|
+
if (documentAttachments.length > 0 && uploadsDir) {
|
|
282
321
|
for (const doc of documentAttachments) {
|
|
283
322
|
if (!doc.content || typeof doc.content !== "string")
|
|
284
323
|
continue;
|
|
@@ -354,14 +393,14 @@ export const chatHandlers = {
|
|
|
354
393
|
status: "started",
|
|
355
394
|
};
|
|
356
395
|
respond(true, ackPayload, undefined, { runId: clientRunId });
|
|
357
|
-
const trimmedMessage =
|
|
396
|
+
const trimmedMessage = p.message.trim();
|
|
358
397
|
const injectThinking = Boolean(p.thinking && trimmedMessage && !trimmedMessage.startsWith("/"));
|
|
359
|
-
const commandBody = injectThinking ? `/think ${p.thinking} ${
|
|
398
|
+
const commandBody = injectThinking ? `/think ${p.thinking} ${p.message}` : p.message;
|
|
360
399
|
// If documents were saved, prepend file paths to message so the agent knows about them
|
|
361
400
|
const docNote = savedDocPaths.length > 0
|
|
362
401
|
? savedDocPaths.map((p) => `[file: ${p}]`).join("\n") + "\n\n"
|
|
363
402
|
: "";
|
|
364
|
-
const messageWithDocs = docNote +
|
|
403
|
+
const messageWithDocs = docNote + p.message;
|
|
365
404
|
const clientInfo = client?.connect?.client;
|
|
366
405
|
const ctx = {
|
|
367
406
|
Body: messageWithDocs,
|
|
@@ -379,11 +418,30 @@ export const chatHandlers = {
|
|
|
379
418
|
SenderId: clientInfo?.id,
|
|
380
419
|
SenderName: clientInfo?.displayName,
|
|
381
420
|
SenderUsername: clientInfo?.displayName,
|
|
421
|
+
// Image/media paths — same pattern as WhatsApp. buildInboundMediaNote()
|
|
422
|
+
// will generate [media attached: ...] annotations that the agent runner
|
|
423
|
+
// detects and loads from disk at inference time.
|
|
424
|
+
MediaPaths: savedImagePaths.length > 0 ? savedImagePaths : undefined,
|
|
425
|
+
MediaPath: savedImagePaths[0],
|
|
426
|
+
MediaTypes: savedImageTypes.length > 0 ? savedImageTypes : undefined,
|
|
427
|
+
MediaType: savedImageTypes[0],
|
|
382
428
|
};
|
|
383
429
|
const agentId = resolveSessionAgentId({
|
|
384
430
|
sessionKey: p.sessionKey,
|
|
385
431
|
config: cfg,
|
|
386
432
|
});
|
|
433
|
+
// Fire message:inbound hook for conversation archiving.
|
|
434
|
+
// Include image paths so the archive references the attached media.
|
|
435
|
+
const imageNote = savedImagePaths.length > 0 ? savedImagePaths.map((ip) => `[image: ${ip}]`).join("\n") : "";
|
|
436
|
+
const archiveText = [p.message, imageNote].filter(Boolean).join("\n").trim();
|
|
437
|
+
void triggerInternalHook(createInternalHookEvent("message", "inbound", p.sessionKey, {
|
|
438
|
+
text: archiveText || undefined,
|
|
439
|
+
timestamp: now,
|
|
440
|
+
chatType: "direct",
|
|
441
|
+
agentId,
|
|
442
|
+
channel: "webchat",
|
|
443
|
+
cfg,
|
|
444
|
+
}));
|
|
387
445
|
let prefixContext = {
|
|
388
446
|
identityName: resolveIdentityName(cfg, agentId),
|
|
389
447
|
};
|
|
@@ -419,6 +477,7 @@ export const chatHandlers = {
|
|
|
419
477
|
},
|
|
420
478
|
});
|
|
421
479
|
let agentRunStarted = false;
|
|
480
|
+
context.logGateway.info(`webchat dispatch: sessionKey=${p.sessionKey} runId=${clientRunId} body=${messageWithDocs.length}ch images=${savedImagePaths.length} docs=${savedDocPaths.length}`);
|
|
422
481
|
void dispatchInboundMessage({
|
|
423
482
|
ctx,
|
|
424
483
|
cfg,
|
|
@@ -426,10 +485,10 @@ export const chatHandlers = {
|
|
|
426
485
|
replyOptions: {
|
|
427
486
|
runId: clientRunId,
|
|
428
487
|
abortSignal: abortController.signal,
|
|
429
|
-
images: parsedImages.length > 0 ? parsedImages : undefined,
|
|
430
488
|
disableBlockStreaming: true,
|
|
431
|
-
onAgentRunStart: () => {
|
|
489
|
+
onAgentRunStart: (runId) => {
|
|
432
490
|
agentRunStarted = true;
|
|
491
|
+
context.logGateway.info(`webchat agent run started: sessionKey=${p.sessionKey} runId=${runId}`);
|
|
433
492
|
},
|
|
434
493
|
onModelSelected: (ctx) => {
|
|
435
494
|
prefixContext.provider = ctx.provider;
|
|
@@ -440,6 +499,8 @@ export const chatHandlers = {
|
|
|
440
499
|
},
|
|
441
500
|
})
|
|
442
501
|
.then(() => {
|
|
502
|
+
const { entry: postEntry } = loadSessionEntry(p.sessionKey);
|
|
503
|
+
context.logGateway.info(`webchat dispatch done: sessionKey=${p.sessionKey} agentRunStarted=${agentRunStarted} sessionId=${postEntry?.sessionId ?? "none"} sessionFile=${postEntry?.sessionFile ?? "none"}`);
|
|
443
504
|
if (!agentRunStarted) {
|
|
444
505
|
const combinedReply = finalReplyParts
|
|
445
506
|
.map((part) => part.trim())
|
|
@@ -479,6 +540,18 @@ export const chatHandlers = {
|
|
|
479
540
|
message,
|
|
480
541
|
});
|
|
481
542
|
}
|
|
543
|
+
// Fire message:outbound hook for conversation archiving
|
|
544
|
+
const outboundText = finalReplyParts.join("\n\n").trim();
|
|
545
|
+
if (outboundText) {
|
|
546
|
+
void triggerInternalHook(createInternalHookEvent("message", "outbound", p.sessionKey, {
|
|
547
|
+
text: outboundText,
|
|
548
|
+
timestamp: Date.now(),
|
|
549
|
+
chatType: "direct",
|
|
550
|
+
agentId,
|
|
551
|
+
channel: "webchat",
|
|
552
|
+
cfg,
|
|
553
|
+
}));
|
|
554
|
+
}
|
|
482
555
|
context.dedupe.set(`chat:${clientRunId}`, {
|
|
483
556
|
ts: Date.now(),
|
|
484
557
|
ok: true,
|
|
@@ -486,6 +559,7 @@ export const chatHandlers = {
|
|
|
486
559
|
});
|
|
487
560
|
})
|
|
488
561
|
.catch((err) => {
|
|
562
|
+
context.logGateway.warn(`webchat dispatch failed: sessionKey=${p.sessionKey} runId=${clientRunId} error=${formatForLog(err)}`);
|
|
489
563
|
const error = errorShape(ErrorCodes.UNAVAILABLE, String(err));
|
|
490
564
|
context.dedupe.set(`chat:${clientRunId}`, {
|
|
491
565
|
ts: Date.now(),
|
|
@@ -74,6 +74,42 @@ export const memoryHandlers = {
|
|
|
74
74
|
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, String(err)));
|
|
75
75
|
}
|
|
76
76
|
},
|
|
77
|
+
"memory.search": async ({ params, respond }) => {
|
|
78
|
+
const query = typeof params.query === "string" ? params.query.trim() : "";
|
|
79
|
+
if (!query) {
|
|
80
|
+
respond(false, undefined, errorShape(ErrorCodes.INVALID_REQUEST, "query is required"));
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const cfg = loadConfig();
|
|
84
|
+
const agentId = typeof params.agentId === "string" && params.agentId.trim()
|
|
85
|
+
? params.agentId.trim()
|
|
86
|
+
: resolveDefaultAgentId(cfg);
|
|
87
|
+
const { manager, error } = await getMemorySearchManager({ cfg, agentId });
|
|
88
|
+
if (!manager) {
|
|
89
|
+
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, error ?? "memory index unavailable"));
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
try {
|
|
93
|
+
const maxResults = typeof params.maxResults === "number" ? params.maxResults : 10;
|
|
94
|
+
// minScore: 0 — show all results for diagnostic purposes
|
|
95
|
+
const results = await manager.search(query, { maxResults, minScore: 0 });
|
|
96
|
+
respond(true, {
|
|
97
|
+
ok: true,
|
|
98
|
+
agentId,
|
|
99
|
+
results: results.map((r) => ({
|
|
100
|
+
path: r.path,
|
|
101
|
+
startLine: r.startLine,
|
|
102
|
+
endLine: r.endLine,
|
|
103
|
+
score: r.score,
|
|
104
|
+
snippet: r.snippet,
|
|
105
|
+
source: r.source,
|
|
106
|
+
})),
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
respond(false, undefined, errorShape(ErrorCodes.UNAVAILABLE, String(err)));
|
|
111
|
+
}
|
|
112
|
+
},
|
|
77
113
|
"memory.auditClear": async ({ params, respond }) => {
|
|
78
114
|
try {
|
|
79
115
|
const cfg = loadConfig();
|
|
@@ -31,6 +31,13 @@ function extractPeerFromSessionKey(sessionKey) {
|
|
|
31
31
|
}
|
|
32
32
|
return null;
|
|
33
33
|
}
|
|
34
|
+
/**
|
|
35
|
+
* Detect webchat session key format: agent:{agentId}:main
|
|
36
|
+
*/
|
|
37
|
+
function isWebchatSessionKey(sessionKey) {
|
|
38
|
+
const parts = sessionKey.toLowerCase().split(":").filter(Boolean);
|
|
39
|
+
return parts.length === 3 && parts[0] === "agent" && parts[2] === "main";
|
|
40
|
+
}
|
|
34
41
|
/**
|
|
35
42
|
* Extract group ID from session key
|
|
36
43
|
*
|
|
@@ -148,9 +155,10 @@ const archiveConversation = async (event) => {
|
|
|
148
155
|
}
|
|
149
156
|
// Get timestamp from context or event
|
|
150
157
|
const timestamp = context.timestamp ?? event.timestamp;
|
|
151
|
-
//
|
|
158
|
+
// Determine conversation type from session key and route to correct archive path
|
|
152
159
|
const peer = extractPeerFromSessionKey(event.sessionKey);
|
|
153
160
|
const groupId = peer ? null : extractGroupIdFromSessionKey(event.sessionKey);
|
|
161
|
+
const isWebchat = !peer && !groupId && isWebchatSessionKey(event.sessionKey);
|
|
154
162
|
if (peer) {
|
|
155
163
|
// Admin DMs archive to memory/admin/conversations/ (not accessible by public agent).
|
|
156
164
|
// Public DMs archive to memory/users/{peer}/conversations/.
|
|
@@ -187,8 +195,13 @@ const archiveConversation = async (event) => {
|
|
|
187
195
|
fileHeader,
|
|
188
196
|
});
|
|
189
197
|
}
|
|
198
|
+
else if (isWebchat) {
|
|
199
|
+
// Webchat (control panel) — archive under memory/admin/conversations/
|
|
200
|
+
const role = event.action === "inbound" ? "Admin" : "Assistant";
|
|
201
|
+
await archiveMessage({ workspaceDir, subdir: "admin", role, text, timestamp });
|
|
202
|
+
}
|
|
190
203
|
else {
|
|
191
|
-
//
|
|
204
|
+
// Unknown session key format — skip
|
|
192
205
|
return;
|
|
193
206
|
}
|
|
194
207
|
}
|
package/dist/memory/hybrid.js
CHANGED
|
@@ -12,6 +12,32 @@ export function bm25RankToScore(rank) {
|
|
|
12
12
|
const normalized = Number.isFinite(rank) ? Math.max(0, rank) : 999;
|
|
13
13
|
return 1 / (1 + normalized);
|
|
14
14
|
}
|
|
15
|
+
/**
|
|
16
|
+
* Path-based boost factors applied during hybrid merge.
|
|
17
|
+
* Curated knowledge (public/, shared/, root memory files) is boosted over
|
|
18
|
+
* raw logs (conversations/, session transcripts) so authoritative content
|
|
19
|
+
* outranks casual mentions at similar raw scores.
|
|
20
|
+
*
|
|
21
|
+
* Patterns are checked in order — first match wins.
|
|
22
|
+
*/
|
|
23
|
+
const PATH_BOOST_RULES = [
|
|
24
|
+
// Conversation archives — demote (high volume, low signal-to-noise)
|
|
25
|
+
{ pattern: /\/conversations\//, boost: 0.6 },
|
|
26
|
+
// Session source transcripts — demote
|
|
27
|
+
{ pattern: /^sessions\//, boost: 0.6 },
|
|
28
|
+
// Curated public/shared knowledge — boost
|
|
29
|
+
{ pattern: /^memory\/public\//, boost: 1.4 },
|
|
30
|
+
{ pattern: /^memory\/shared\//, boost: 1.3 },
|
|
31
|
+
// Root memory files (MEMORY.md etc.) — slight boost
|
|
32
|
+
{ pattern: /^(?:MEMORY|memory)\.md$/, boost: 1.2 },
|
|
33
|
+
];
|
|
34
|
+
function pathBoost(filePath) {
|
|
35
|
+
for (const rule of PATH_BOOST_RULES) {
|
|
36
|
+
if (rule.pattern.test(filePath))
|
|
37
|
+
return rule.boost;
|
|
38
|
+
}
|
|
39
|
+
return 1.0;
|
|
40
|
+
}
|
|
15
41
|
export function mergeHybridResults(params) {
|
|
16
42
|
const byId = new Map();
|
|
17
43
|
for (const r of params.vector) {
|
|
@@ -47,7 +73,8 @@ export function mergeHybridResults(params) {
|
|
|
47
73
|
}
|
|
48
74
|
}
|
|
49
75
|
const merged = Array.from(byId.values()).map((entry) => {
|
|
50
|
-
const
|
|
76
|
+
const raw = params.vectorWeight * entry.vectorScore + params.textWeight * entry.textScore;
|
|
77
|
+
const score = raw * pathBoost(entry.path);
|
|
51
78
|
return {
|
|
52
79
|
path: entry.path,
|
|
53
80
|
startLine: entry.startLine,
|
package/dist/memory/internal.js
CHANGED
|
@@ -89,77 +89,166 @@ export async function buildFileEntry(absPath, workspaceDir) {
|
|
|
89
89
|
hash,
|
|
90
90
|
};
|
|
91
91
|
}
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
/**
|
|
93
|
+
* Heading level (1-6) parsed from a markdown heading line, or 0 if not a heading.
|
|
94
|
+
*/
|
|
95
|
+
function headingLevel(line) {
|
|
96
|
+
const match = line.match(/^(#{1,6})\s/);
|
|
97
|
+
return match ? match[1].length : 0;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Build a heading breadcrumb prefix from the current heading stack.
|
|
101
|
+
* E.g., ["# User Guide", "## Updating Taskmaster"] → "# User Guide > ## Updating Taskmaster"
|
|
102
|
+
*/
|
|
103
|
+
function headingPrefix(stack) {
|
|
104
|
+
const filtered = stack.filter(Boolean);
|
|
105
|
+
return filtered.length > 0 ? filtered.join(" > ") + "\n" : "";
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Split lines into fixed-size chunks (the original algorithm).
|
|
109
|
+
* Used as a fallback when a single section exceeds maxChars.
|
|
110
|
+
*/
|
|
111
|
+
function chunkLinesFixed(entries, maxChars, prefix) {
|
|
112
|
+
if (entries.length === 0)
|
|
95
113
|
return [];
|
|
96
|
-
const
|
|
97
|
-
const
|
|
114
|
+
const prefixLen = prefix.length;
|
|
115
|
+
const effectiveMax = Math.max(32, maxChars - prefixLen);
|
|
98
116
|
const chunks = [];
|
|
99
117
|
let current = [];
|
|
100
118
|
let currentChars = 0;
|
|
101
119
|
const flush = () => {
|
|
102
120
|
if (current.length === 0)
|
|
103
121
|
return;
|
|
104
|
-
const
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
const text = current.map((entry) => entry.line).join("\n");
|
|
109
|
-
const startLine = firstEntry.lineNo;
|
|
110
|
-
const endLine = lastEntry.lineNo;
|
|
122
|
+
const first = current[0];
|
|
123
|
+
const last = current[current.length - 1];
|
|
124
|
+
const body = current.map((e) => e.line).join("\n");
|
|
125
|
+
const text = prefix + body;
|
|
111
126
|
chunks.push({
|
|
112
|
-
startLine,
|
|
113
|
-
endLine,
|
|
127
|
+
startLine: first.lineNo,
|
|
128
|
+
endLine: last.lineNo,
|
|
114
129
|
text,
|
|
115
130
|
hash: hashText(text),
|
|
116
131
|
});
|
|
117
132
|
};
|
|
118
|
-
const
|
|
119
|
-
|
|
120
|
-
current = [];
|
|
121
|
-
currentChars = 0;
|
|
122
|
-
return;
|
|
123
|
-
}
|
|
124
|
-
let acc = 0;
|
|
125
|
-
const kept = [];
|
|
126
|
-
for (let i = current.length - 1; i >= 0; i -= 1) {
|
|
127
|
-
const entry = current[i];
|
|
128
|
-
if (!entry)
|
|
129
|
-
continue;
|
|
130
|
-
acc += entry.line.length + 1;
|
|
131
|
-
kept.unshift(entry);
|
|
132
|
-
if (acc >= overlapChars)
|
|
133
|
-
break;
|
|
134
|
-
}
|
|
135
|
-
current = kept;
|
|
136
|
-
currentChars = kept.reduce((sum, entry) => sum + entry.line.length + 1, 0);
|
|
137
|
-
};
|
|
138
|
-
for (let i = 0; i < lines.length; i += 1) {
|
|
139
|
-
const line = lines[i] ?? "";
|
|
140
|
-
const lineNo = i + 1;
|
|
133
|
+
for (const entry of entries) {
|
|
134
|
+
// Split overly long lines into segments that fit within effectiveMax
|
|
141
135
|
const segments = [];
|
|
142
|
-
if (line.length === 0) {
|
|
143
|
-
segments.push(
|
|
136
|
+
if (entry.line.length === 0) {
|
|
137
|
+
segments.push(entry);
|
|
144
138
|
}
|
|
145
139
|
else {
|
|
146
|
-
for (let start = 0; start < line.length; start +=
|
|
147
|
-
segments.push(
|
|
140
|
+
for (let start = 0; start < entry.line.length; start += effectiveMax) {
|
|
141
|
+
segments.push({
|
|
142
|
+
line: entry.line.slice(start, start + effectiveMax),
|
|
143
|
+
lineNo: entry.lineNo,
|
|
144
|
+
});
|
|
148
145
|
}
|
|
149
146
|
}
|
|
150
|
-
for (const
|
|
151
|
-
const
|
|
152
|
-
if (currentChars +
|
|
147
|
+
for (const seg of segments) {
|
|
148
|
+
const segSize = seg.line.length + 1;
|
|
149
|
+
if (currentChars + segSize > effectiveMax && current.length > 0) {
|
|
153
150
|
flush();
|
|
154
|
-
|
|
151
|
+
current = [];
|
|
152
|
+
currentChars = 0;
|
|
155
153
|
}
|
|
156
|
-
current.push(
|
|
157
|
-
currentChars +=
|
|
154
|
+
current.push(seg);
|
|
155
|
+
currentChars += segSize;
|
|
158
156
|
}
|
|
159
157
|
}
|
|
160
158
|
flush();
|
|
161
159
|
return chunks;
|
|
162
160
|
}
|
|
161
|
+
/**
|
|
162
|
+
* Semantic markdown chunker.
|
|
163
|
+
*
|
|
164
|
+
* Splits content at markdown headings so each chunk corresponds to a logical section.
|
|
165
|
+
* Each chunk is prefixed with the heading breadcrumb (ancestor headings) so the embedding
|
|
166
|
+
* model has structural context — e.g., "# User Guide > ## Updating Taskmaster\n...content...".
|
|
167
|
+
*
|
|
168
|
+
* If a section exceeds maxChars, it falls back to fixed-size splitting within that section,
|
|
169
|
+
* but each sub-chunk still receives the heading prefix.
|
|
170
|
+
*
|
|
171
|
+
* Files with no headings are chunked using fixed-size splitting (original behavior).
|
|
172
|
+
*/
|
|
173
|
+
export function chunkMarkdown(content, chunking) {
|
|
174
|
+
if (!content.trim())
|
|
175
|
+
return [];
|
|
176
|
+
const lines = content.split("\n");
|
|
177
|
+
const maxChars = Math.max(32, chunking.tokens * 4);
|
|
178
|
+
// Parse all lines to detect if there are any headings
|
|
179
|
+
const parsedLines = [];
|
|
180
|
+
let hasHeadings = false;
|
|
181
|
+
for (let i = 0; i < lines.length; i++) {
|
|
182
|
+
const line = lines[i] ?? "";
|
|
183
|
+
const level = headingLevel(line);
|
|
184
|
+
if (level > 0)
|
|
185
|
+
hasHeadings = true;
|
|
186
|
+
parsedLines.push({ line, lineNo: i + 1, level });
|
|
187
|
+
}
|
|
188
|
+
// No headings at all — fall back to fixed-size chunking (no prefix)
|
|
189
|
+
if (!hasHeadings) {
|
|
190
|
+
return chunkLinesFixed(parsedLines.map((p) => ({ line: p.line, lineNo: p.lineNo })), maxChars, "");
|
|
191
|
+
}
|
|
192
|
+
const sections = [];
|
|
193
|
+
// headingStack tracks the current heading hierarchy: index = level-1
|
|
194
|
+
const headingStack = [];
|
|
195
|
+
let currentSection = { headingStack: [], lines: [] };
|
|
196
|
+
for (const parsed of parsedLines) {
|
|
197
|
+
if (parsed.level > 0) {
|
|
198
|
+
// Flush the previous section if it has content
|
|
199
|
+
if (currentSection.lines.length > 0) {
|
|
200
|
+
sections.push(currentSection);
|
|
201
|
+
}
|
|
202
|
+
// Update the heading stack: trim to this level, then set this heading.
|
|
203
|
+
// Use splice to avoid sparse arrays (setting .length on a shorter array
|
|
204
|
+
// leaves undefined holes when the heading appears without ancestors).
|
|
205
|
+
if (headingStack.length >= parsed.level) {
|
|
206
|
+
headingStack.length = parsed.level - 1;
|
|
207
|
+
}
|
|
208
|
+
headingStack[parsed.level - 1] = parsed.line;
|
|
209
|
+
// Start a new section with the current heading stack as context
|
|
210
|
+
currentSection = {
|
|
211
|
+
headingStack: [...headingStack],
|
|
212
|
+
lines: [{ line: parsed.line, lineNo: parsed.lineNo }],
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
else {
|
|
216
|
+
currentSection.lines.push({ line: parsed.line, lineNo: parsed.lineNo });
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
// Flush final section
|
|
220
|
+
if (currentSection.lines.length > 0) {
|
|
221
|
+
sections.push(currentSection);
|
|
222
|
+
}
|
|
223
|
+
// Convert sections to chunks
|
|
224
|
+
const chunks = [];
|
|
225
|
+
for (const section of sections) {
|
|
226
|
+
// Build the prefix from ancestor headings (all except the current heading,
|
|
227
|
+
// which is already the first line of the section body)
|
|
228
|
+
const ancestors = section.headingStack.slice(0, -1);
|
|
229
|
+
const prefix = headingPrefix(ancestors);
|
|
230
|
+
const bodyText = section.lines.map((e) => e.line).join("\n");
|
|
231
|
+
const totalLen = prefix.length + bodyText.length;
|
|
232
|
+
if (totalLen <= maxChars) {
|
|
233
|
+
// Section fits in one chunk
|
|
234
|
+
const first = section.lines[0];
|
|
235
|
+
const last = section.lines[section.lines.length - 1];
|
|
236
|
+
const text = prefix + bodyText;
|
|
237
|
+
chunks.push({
|
|
238
|
+
startLine: first.lineNo,
|
|
239
|
+
endLine: last.lineNo,
|
|
240
|
+
text,
|
|
241
|
+
hash: hashText(text),
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
else {
|
|
245
|
+
// Section too large — split with fixed-size chunking, each sub-chunk gets prefix
|
|
246
|
+
const subChunks = chunkLinesFixed(section.lines, maxChars, prefix);
|
|
247
|
+
chunks.push(...subChunks);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return chunks;
|
|
251
|
+
}
|
|
163
252
|
export function parseEmbedding(raw) {
|
|
164
253
|
try {
|
|
165
254
|
const parsed = JSON.parse(raw);
|