@jonathangu/openclawbrain 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -290
- package/docs/END_STATE.md +106 -94
- package/docs/EVIDENCE.md +71 -23
- package/docs/RELEASE_CONTRACT.md +46 -32
- package/docs/agent-tools.md +65 -34
- package/docs/architecture.md +128 -142
- package/docs/configuration.md +62 -25
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/channels-status.txt +20 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/config-snapshot.json +94 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/doctor.json +14 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/gateway-probe.txt +24 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/gateway-status.txt +31 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/init-capture.json +15 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/logs.txt +357 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/status-all.txt +61 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/status.json +275 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/summary.md +18 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/trace.json +222 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/validation-report.json +1515 -0
- package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/workspace-inventory.json +4 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/channels-status.txt +20 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/config-snapshot.json +94 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/doctor.json +14 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/gateway-probe.txt +24 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/gateway-status.txt +31 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/init-capture.json +15 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/logs.txt +362 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/status-all.txt +61 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/status.json +275 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/summary.md +21 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/trace.json +222 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/validation-report.json +4400 -0
- package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/workspace-inventory.json +4 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/channels-status.txt +31 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/config-snapshot.json +94 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/doctor.json +14 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/gateway-probe.txt +34 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/gateway-status.txt +41 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/logs.txt +441 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/status-all.txt +60 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/status.json +276 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/summary.md +13 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/trace.json +4 -0
- package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/validation-report.json +387 -0
- package/docs/tui.md +11 -4
- package/index.ts +194 -1
- package/package.json +1 -1
- package/src/brain-cli.ts +12 -1
- package/src/brain-harvest/scanner.ts +286 -16
- package/src/brain-harvest/self.ts +134 -6
- package/src/brain-runtime/evidence-detectors.ts +3 -1
- package/src/brain-runtime/harvester-extension.ts +3 -0
- package/src/brain-runtime/service.ts +2 -0
- package/src/brain-store/embedding.ts +29 -8
- package/src/brain-worker/worker.ts +40 -0
- package/src/engine.ts +1 -0
package/index.ts
CHANGED
|
@@ -1270,6 +1270,194 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
|
|
|
1270
1270
|
};
|
|
1271
1271
|
}
|
|
1272
1272
|
|
|
1273
|
+
type ContextEngineRegisteringApi = OpenClawPluginApi & {
|
|
1274
|
+
registerContextEngine?: (id: string, factory: () => unknown) => void;
|
|
1275
|
+
};
|
|
1276
|
+
|
|
1277
|
+
function normalizePromptText(value: unknown): string {
|
|
1278
|
+
if (typeof value === "string") {
|
|
1279
|
+
return value.trim();
|
|
1280
|
+
}
|
|
1281
|
+
if (Array.isArray(value)) {
|
|
1282
|
+
return value
|
|
1283
|
+
.map((entry) => normalizePromptText(entry))
|
|
1284
|
+
.filter((entry) => entry.length > 0)
|
|
1285
|
+
.join("\n")
|
|
1286
|
+
.trim();
|
|
1287
|
+
}
|
|
1288
|
+
if (!value || typeof value !== "object") {
|
|
1289
|
+
return "";
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
const record = value as Record<string, unknown>;
|
|
1293
|
+
return [
|
|
1294
|
+
normalizePromptText(record.text),
|
|
1295
|
+
normalizePromptText(record.content),
|
|
1296
|
+
normalizePromptText(record.value),
|
|
1297
|
+
normalizePromptText(record.thinking),
|
|
1298
|
+
normalizePromptText(record.summary),
|
|
1299
|
+
]
|
|
1300
|
+
.filter((entry, index, arr) => entry.length > 0 && arr.indexOf(entry) === index)
|
|
1301
|
+
.join("\n")
|
|
1302
|
+
.trim();
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
function normalizePromptRole(message: unknown): string {
|
|
1306
|
+
if (!message || typeof message !== "object") {
|
|
1307
|
+
return "message";
|
|
1308
|
+
}
|
|
1309
|
+
const role = (message as { role?: unknown }).role;
|
|
1310
|
+
return typeof role === "string" && role.trim().length > 0 ? role.trim() : "message";
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
function promptMessageSignature(message: unknown): string {
|
|
1314
|
+
if (!message || typeof message !== "object") {
|
|
1315
|
+
return JSON.stringify(message);
|
|
1316
|
+
}
|
|
1317
|
+
const record = message as Record<string, unknown>;
|
|
1318
|
+
return JSON.stringify({
|
|
1319
|
+
role: normalizePromptRole(record),
|
|
1320
|
+
content: normalizePromptText(record.content),
|
|
1321
|
+
});
|
|
1322
|
+
}
|
|
1323
|
+
|
|
1324
|
+
function extractPrependedMessages(assembledMessages: unknown[], liveMessages: unknown[]): unknown[] {
|
|
1325
|
+
if (assembledMessages.length === 0) {
|
|
1326
|
+
return [];
|
|
1327
|
+
}
|
|
1328
|
+
if (liveMessages.length === 0) {
|
|
1329
|
+
return assembledMessages;
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
const assembledSignatures = assembledMessages.map((message) => promptMessageSignature(message));
|
|
1333
|
+
const liveSignatures = liveMessages.map((message) => promptMessageSignature(message));
|
|
1334
|
+
const maxOverlap = Math.min(assembledSignatures.length, liveSignatures.length);
|
|
1335
|
+
|
|
1336
|
+
for (let overlap = maxOverlap; overlap > 0; overlap -= 1) {
|
|
1337
|
+
const assembledTail = assembledSignatures.slice(-overlap);
|
|
1338
|
+
const liveTail = liveSignatures.slice(-overlap);
|
|
1339
|
+
if (assembledTail.join("\u0000") === liveTail.join("\u0000")) {
|
|
1340
|
+
return assembledMessages.slice(0, assembledMessages.length - overlap);
|
|
1341
|
+
}
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
return assembledMessages;
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
function formatPrependedContext(messages: unknown[], systemPromptAddition?: string): string | undefined {
|
|
1348
|
+
const sections: string[] = [];
|
|
1349
|
+
const promptAddition = typeof systemPromptAddition === "string" ? systemPromptAddition.trim() : "";
|
|
1350
|
+
if (promptAddition) {
|
|
1351
|
+
sections.push(promptAddition);
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
const renderedMessages = messages
|
|
1355
|
+
.map((message) => {
|
|
1356
|
+
const text = normalizePromptText((message as { content?: unknown } | null)?.content);
|
|
1357
|
+
if (!text) {
|
|
1358
|
+
return "";
|
|
1359
|
+
}
|
|
1360
|
+
return `### ${normalizePromptRole(message)}\n${text}`;
|
|
1361
|
+
})
|
|
1362
|
+
.filter((entry) => entry.length > 0);
|
|
1363
|
+
|
|
1364
|
+
if (renderedMessages.length > 0) {
|
|
1365
|
+
sections.push([
|
|
1366
|
+
"## OpenClawBrain recalled context",
|
|
1367
|
+
"",
|
|
1368
|
+
renderedMessages.join("\n\n"),
|
|
1369
|
+
].join("\n"));
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
if (sections.length === 0) {
|
|
1373
|
+
return undefined;
|
|
1374
|
+
}
|
|
1375
|
+
return sections.join("\n\n");
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
function registerHookCompatibilityBridge(
|
|
1379
|
+
api: OpenClawPluginApi,
|
|
1380
|
+
lcm: LcmContextEngine,
|
|
1381
|
+
deps: ReturnType<typeof createLcmDependencies>,
|
|
1382
|
+
): void {
|
|
1383
|
+
const prePromptMessageCounts = new Map<string, number>();
|
|
1384
|
+
const sessionIdsByKey = new Map<string, string>();
|
|
1385
|
+
|
|
1386
|
+
const rememberSession = (sessionId?: string | null, sessionKey?: string | null) => {
|
|
1387
|
+
if (typeof sessionId === "string" && sessionId.trim().length > 0 && typeof sessionKey === "string" && sessionKey.trim().length > 0) {
|
|
1388
|
+
sessionIdsByKey.set(sessionKey.trim(), sessionId.trim());
|
|
1389
|
+
}
|
|
1390
|
+
};
|
|
1391
|
+
|
|
1392
|
+
const resolveHookSessionId = async (ctx: { sessionId?: string; sessionKey?: string }): Promise<string | undefined> => {
|
|
1393
|
+
if (typeof ctx.sessionId === "string" && ctx.sessionId.trim().length > 0) {
|
|
1394
|
+
rememberSession(ctx.sessionId, ctx.sessionKey);
|
|
1395
|
+
return ctx.sessionId.trim();
|
|
1396
|
+
}
|
|
1397
|
+
if (typeof ctx.sessionKey === "string" && ctx.sessionKey.trim().length > 0) {
|
|
1398
|
+
const key = ctx.sessionKey.trim();
|
|
1399
|
+
const remembered = sessionIdsByKey.get(key);
|
|
1400
|
+
if (remembered) {
|
|
1401
|
+
return remembered;
|
|
1402
|
+
}
|
|
1403
|
+
const resolved = await deps.resolveSessionIdFromSessionKey(key);
|
|
1404
|
+
if (resolved) {
|
|
1405
|
+
sessionIdsByKey.set(key, resolved);
|
|
1406
|
+
return resolved;
|
|
1407
|
+
}
|
|
1408
|
+
return key;
|
|
1409
|
+
}
|
|
1410
|
+
return undefined;
|
|
1411
|
+
};
|
|
1412
|
+
|
|
1413
|
+
api.on("before_prompt_build", async (event, ctx) => {
|
|
1414
|
+
const sessionId = await resolveHookSessionId(ctx);
|
|
1415
|
+
if (!sessionId) {
|
|
1416
|
+
return undefined;
|
|
1417
|
+
}
|
|
1418
|
+
prePromptMessageCounts.set(sessionId, Array.isArray(event.messages) ? event.messages.length : 0);
|
|
1419
|
+
|
|
1420
|
+
const assembled = await lcm.assemble({
|
|
1421
|
+
sessionId,
|
|
1422
|
+
messages: Array.isArray(event.messages) ? event.messages as Parameters<LcmContextEngine["assemble"]>[0]["messages"] : [],
|
|
1423
|
+
}) as AssembleResultWithSystemPrompt;
|
|
1424
|
+
const prependedMessages = extractPrependedMessages(assembled.messages as unknown[], Array.isArray(event.messages) ? event.messages : []);
|
|
1425
|
+
const prependContext = formatPrependedContext(prependedMessages, assembled.systemPromptAddition);
|
|
1426
|
+
if (!prependContext) {
|
|
1427
|
+
return undefined;
|
|
1428
|
+
}
|
|
1429
|
+
return { prependContext };
|
|
1430
|
+
});
|
|
1431
|
+
|
|
1432
|
+
api.on("agent_end", async (event, ctx) => {
|
|
1433
|
+
const sessionId = await resolveHookSessionId(ctx);
|
|
1434
|
+
if (!sessionId) {
|
|
1435
|
+
return;
|
|
1436
|
+
}
|
|
1437
|
+
const prePromptMessageCount = prePromptMessageCounts.get(sessionId) ?? 0;
|
|
1438
|
+
prePromptMessageCounts.delete(sessionId);
|
|
1439
|
+
await lcm.afterTurn({
|
|
1440
|
+
sessionId,
|
|
1441
|
+
sessionFile: "",
|
|
1442
|
+
messages: Array.isArray(event.messages) ? event.messages as Parameters<LcmContextEngine["afterTurn"]>[0]["messages"] : [],
|
|
1443
|
+
prePromptMessageCount,
|
|
1444
|
+
});
|
|
1445
|
+
});
|
|
1446
|
+
|
|
1447
|
+
api.on("session_end", async (_event, ctx) => {
|
|
1448
|
+
prePromptMessageCounts.delete(ctx.sessionId);
|
|
1449
|
+
for (const [sessionKey, sessionId] of sessionIdsByKey.entries()) {
|
|
1450
|
+
if (sessionId === ctx.sessionId) {
|
|
1451
|
+
sessionIdsByKey.delete(sessionKey);
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
});
|
|
1455
|
+
|
|
1456
|
+
api.logger.warn(
|
|
1457
|
+
"[openclawbrain] registerContextEngine unavailable; using hook compatibility bridge for prompt assembly/after-turn ingest.",
|
|
1458
|
+
);
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1273
1461
|
const lcmPlugin = {
|
|
1274
1462
|
id: "openclawbrain",
|
|
1275
1463
|
name: "OpenClawBrain",
|
|
@@ -1290,7 +1478,12 @@ const lcmPlugin = {
|
|
|
1290
1478
|
const deps = createLcmDependencies(api);
|
|
1291
1479
|
const lcm = new LcmContextEngine(deps);
|
|
1292
1480
|
|
|
1293
|
-
api
|
|
1481
|
+
const contextApi = api as ContextEngineRegisteringApi;
|
|
1482
|
+
if (typeof contextApi.registerContextEngine === "function") {
|
|
1483
|
+
contextApi.registerContextEngine("openclawbrain", () => lcm);
|
|
1484
|
+
} else {
|
|
1485
|
+
registerHookCompatibilityBridge(api, lcm, deps);
|
|
1486
|
+
}
|
|
1294
1487
|
api.registerTool((ctx) =>
|
|
1295
1488
|
createLcmGrepTool({
|
|
1296
1489
|
deps,
|
package/package.json
CHANGED
package/src/brain-cli.ts
CHANGED
|
@@ -20,6 +20,17 @@ function printJson(payload: unknown): void {
|
|
|
20
20
|
process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
function buildInitLog(): { info: (msg: string) => void; warn: (msg: string) => void } {
|
|
24
|
+
const verbose = /^(1|true|yes)$/i.test(process.env.OPENCLAWBRAIN_INIT_VERBOSE ?? "");
|
|
25
|
+
if (!verbose) {
|
|
26
|
+
return { info: () => {}, warn: () => {} };
|
|
27
|
+
}
|
|
28
|
+
return {
|
|
29
|
+
info: (msg: string) => process.stderr.write(`${msg}\n`),
|
|
30
|
+
warn: (msg: string) => process.stderr.write(`${msg}\n`),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
23
34
|
function usage(): never {
|
|
24
35
|
process.stderr.write(
|
|
25
36
|
"Usage: openclawbrain <init|status|trace|replay|promote|rollback|disable|enable|doctor> [args]\n",
|
|
@@ -68,7 +79,7 @@ async function commandInit(workspaceArg?: string): Promise<void> {
|
|
|
68
79
|
workspaceRoot,
|
|
69
80
|
embedFn,
|
|
70
81
|
semanticThreshold: brainConfig.semanticThreshold,
|
|
71
|
-
log:
|
|
82
|
+
log: buildInitLog(),
|
|
72
83
|
});
|
|
73
84
|
|
|
74
85
|
store.clearGraph();
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { HarvestResult } from "../brain-runtime/evidence-detectors.js";
|
|
1
|
+
import type { HarvestMessagePart, HarvestResult } from "../brain-runtime/evidence-detectors.js";
|
|
2
2
|
|
|
3
3
|
const EXPLICIT_SCANNER_PATTERNS = [
|
|
4
4
|
/\bexpand for details about\b/i,
|
|
@@ -16,6 +16,20 @@ const BULLET_PATTERN = /^\s*[-*]\s+\S.+$/gm;
|
|
|
16
16
|
const HEADING_PATTERN = /^\s{0,3}#{1,6}\s+\S.+$/m;
|
|
17
17
|
const FILE_REF_PATTERN = /(?:^|[\s(])(?:\.?\/)?[\w./-]+\.(?:md|txt|ts|tsx|js|jsx|json|yaml|yml|sh|mjs)(?=$|[\s):,])/gim;
|
|
18
18
|
const IMPERATIVE_STEP_PATTERN = /^\s*(?:[-*]\s+|\d+\.\s+)?(?:inspect|check|retry|run|use|open|read|edit|verify|restart|re-?run|apply|deploy|create|install|record|compare|promote|rollback)\b/gim;
|
|
19
|
+
const STRUCTURED_TOOL_NAMES = new Set(["bash", "git", "gh", "pnpm", "npm", "node", "openclaw", "python", "python3", "curl", "ollama", "codex", "claude"]);
|
|
20
|
+
const STRUCTURED_GUIDANCE_PART_TYPES = new Set(["file", "snapshot", "subtask", "patch", "compaction", "step_start", "step_finish", "retry"]);
|
|
21
|
+
|
|
22
|
+
type ContentSignalSummary = {
|
|
23
|
+
docMarker: string | null;
|
|
24
|
+
numberedSteps: number;
|
|
25
|
+
bulletLines: number;
|
|
26
|
+
commandLines: number;
|
|
27
|
+
imperativeLines: number;
|
|
28
|
+
hasHeading: boolean;
|
|
29
|
+
fileRefs: number;
|
|
30
|
+
score: number;
|
|
31
|
+
signals: string[];
|
|
32
|
+
};
|
|
19
33
|
|
|
20
34
|
function countMatches(pattern: RegExp, content: string): number {
|
|
21
35
|
const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
|
|
@@ -23,25 +37,132 @@ function countMatches(pattern: RegExp, content: string): number {
|
|
|
23
37
|
return Array.from(content.matchAll(matcher)).length;
|
|
24
38
|
}
|
|
25
39
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
40
|
+
function parseJson(value: string | null | undefined): unknown {
|
|
41
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
try {
|
|
45
|
+
return JSON.parse(value);
|
|
46
|
+
} catch {
|
|
47
|
+
return value;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function asRecord(value: unknown): Record<string, unknown> | null {
|
|
52
|
+
return value && typeof value === "object" && !Array.isArray(value)
|
|
53
|
+
? value as Record<string, unknown>
|
|
54
|
+
: null;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function readPartMetadata(part: HarvestMessagePart): Record<string, unknown> {
|
|
58
|
+
return asRecord(parseJson(part.metadata)) ?? {};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function readString(record: Record<string, unknown> | null, keys: string[]): string | undefined {
|
|
62
|
+
if (!record) {
|
|
63
|
+
return undefined;
|
|
64
|
+
}
|
|
65
|
+
for (const key of keys) {
|
|
66
|
+
const value = record[key];
|
|
67
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
68
|
+
return value.trim();
|
|
37
69
|
}
|
|
38
70
|
}
|
|
71
|
+
return undefined;
|
|
72
|
+
}
|
|
39
73
|
|
|
74
|
+
function readStringArray(value: unknown): string[] {
|
|
75
|
+
if (Array.isArray(value)) {
|
|
76
|
+
return value.filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0);
|
|
77
|
+
}
|
|
78
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
79
|
+
return [value.trim()];
|
|
80
|
+
}
|
|
81
|
+
return [];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function readCommand(value: unknown): string | undefined {
|
|
85
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
86
|
+
return value.trim();
|
|
87
|
+
}
|
|
88
|
+
if (Array.isArray(value)) {
|
|
89
|
+
const parts = value.filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0);
|
|
90
|
+
return parts.length > 0 ? parts.join(" ") : undefined;
|
|
91
|
+
}
|
|
92
|
+
return undefined;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function extractCommand(input: unknown): string | undefined {
|
|
96
|
+
const inputRecord = asRecord(input);
|
|
97
|
+
return readString(inputRecord, ["command", "cmd", "shellCommand"])
|
|
98
|
+
?? readCommand(inputRecord?.args)
|
|
99
|
+
?? (typeof input === "string" && input.trim().length > 0 ? input.trim() : undefined);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function collectStructuredFileHints(part: HarvestMessagePart, metadata: Record<string, unknown>): string[] {
|
|
103
|
+
const parsedInput = parseJson(part.toolInput);
|
|
104
|
+
const parsedOutput = parseJson(part.toolOutput);
|
|
105
|
+
const inputRecord = asRecord(parsedInput);
|
|
106
|
+
const outputRecord = asRecord(parsedOutput);
|
|
107
|
+
const rawRecord = asRecord(metadata.raw);
|
|
108
|
+
const collected = new Set<string>();
|
|
109
|
+
|
|
110
|
+
for (const value of [
|
|
111
|
+
outputRecord?.filesTouched,
|
|
112
|
+
outputRecord?.changedFiles,
|
|
113
|
+
outputRecord?.files,
|
|
114
|
+
outputRecord?.paths,
|
|
115
|
+
inputRecord?.filesTouched,
|
|
116
|
+
inputRecord?.files,
|
|
117
|
+
inputRecord?.paths,
|
|
118
|
+
readString(outputRecord, ["artifactPath", "outputPath", "reportPath", "logPath", "filePath", "path"]),
|
|
119
|
+
readString(inputRecord, ["artifactPath", "outputPath", "reportPath", "logPath", "filePath", "path"]),
|
|
120
|
+
readString(rawRecord, ["path", "filePath", "artifactPath"]),
|
|
121
|
+
]) {
|
|
122
|
+
for (const item of readStringArray(value)) {
|
|
123
|
+
collected.add(item);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return Array.from(collected);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function collectStructuredPartDetails(part: HarvestMessagePart, metadata: Record<string, unknown>): {
|
|
131
|
+
paths: string[];
|
|
132
|
+
labels: string[];
|
|
133
|
+
} {
|
|
134
|
+
const rawRecord = asRecord(metadata.raw);
|
|
135
|
+
const paths = new Set<string>();
|
|
136
|
+
const labels = new Set<string>();
|
|
137
|
+
|
|
138
|
+
for (const value of [
|
|
139
|
+
readString(rawRecord, ["path", "filePath", "artifactPath", "storageUri", "sourcePath", "targetPath", "outputPath"]),
|
|
140
|
+
readString(rawRecord, ["fileName", "title", "label", "name", "summaryId", "taskId", "stepId"]),
|
|
141
|
+
readString(rawRecord, ["summary", "description"]),
|
|
142
|
+
typeof part.textContent === "string" && part.textContent.trim().length > 0 ? part.textContent.trim() : undefined,
|
|
143
|
+
]) {
|
|
144
|
+
for (const item of readStringArray(value)) {
|
|
145
|
+
if (/[/\\.]|^[A-Z0-9_-]+$/i.test(item)) {
|
|
146
|
+
paths.add(item);
|
|
147
|
+
}
|
|
148
|
+
labels.add(item);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
paths: Array.from(paths),
|
|
154
|
+
labels: Array.from(labels),
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function collectContentSignals(content: string): ContentSignalSummary {
|
|
40
159
|
const signals: string[] = [];
|
|
41
160
|
let score = 0;
|
|
161
|
+
let docMarker: string | null = null;
|
|
42
162
|
|
|
43
163
|
for (const pattern of DOC_MARKER_PATTERNS) {
|
|
44
164
|
if (pattern.test(content)) {
|
|
165
|
+
docMarker = pattern.source;
|
|
45
166
|
signals.push(`doc:${pattern.source}`);
|
|
46
167
|
score += 1.0;
|
|
47
168
|
break;
|
|
@@ -72,7 +193,8 @@ export function detectScannerEvidence(content: string): HarvestResult | null {
|
|
|
72
193
|
score += 0.8;
|
|
73
194
|
}
|
|
74
195
|
|
|
75
|
-
|
|
196
|
+
const hasHeading = HEADING_PATTERN.test(content);
|
|
197
|
+
if (hasHeading && (numberedSteps >= 1 || bulletLines >= 2)) {
|
|
76
198
|
signals.push("heading");
|
|
77
199
|
score += 0.4;
|
|
78
200
|
}
|
|
@@ -83,16 +205,164 @@ export function detectScannerEvidence(content: string): HarvestResult | null {
|
|
|
83
205
|
score += 0.3;
|
|
84
206
|
}
|
|
85
207
|
|
|
86
|
-
|
|
208
|
+
return {
|
|
209
|
+
docMarker,
|
|
210
|
+
numberedSteps,
|
|
211
|
+
bulletLines,
|
|
212
|
+
commandLines,
|
|
213
|
+
imperativeLines,
|
|
214
|
+
hasHeading,
|
|
215
|
+
fileRefs,
|
|
216
|
+
score,
|
|
217
|
+
signals,
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function hasGuidanceShape(contentSignals: ContentSignalSummary): boolean {
|
|
222
|
+
return Boolean(contentSignals.docMarker)
|
|
223
|
+
|| contentSignals.numberedSteps >= 2
|
|
224
|
+
|| (contentSignals.hasHeading && contentSignals.bulletLines >= 2)
|
|
225
|
+
|| contentSignals.imperativeLines >= 2;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function detectStructuredScannerEvidence(
|
|
229
|
+
contentSignals: ContentSignalSummary,
|
|
230
|
+
messageParts?: HarvestMessagePart[],
|
|
231
|
+
): HarvestResult | null {
|
|
232
|
+
if (!messageParts || messageParts.length === 0 || !hasGuidanceShape(contentSignals)) {
|
|
233
|
+
return null;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
const toolNames = new Set<string>();
|
|
237
|
+
const commands = new Set<string>();
|
|
238
|
+
const toolFileHints = new Set<string>();
|
|
239
|
+
const structuredPartTypes = new Set<string>();
|
|
240
|
+
const structuredPaths = new Set<string>();
|
|
241
|
+
const structuredLabels = new Set<string>();
|
|
242
|
+
const partOrdinals: number[] = [];
|
|
243
|
+
const rawTypes = new Set<string>();
|
|
244
|
+
|
|
245
|
+
for (const part of messageParts) {
|
|
246
|
+
const metadata = readPartMetadata(part);
|
|
247
|
+
const rawType = typeof metadata.rawType === "string" ? metadata.rawType : null;
|
|
248
|
+
if (rawType) {
|
|
249
|
+
rawTypes.add(rawType);
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
if (typeof part.ordinal === "number") {
|
|
253
|
+
partOrdinals.push(part.ordinal);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (part.partType === "tool") {
|
|
257
|
+
const toolName = typeof part.toolName === "string" ? part.toolName.trim() : "";
|
|
258
|
+
if (toolName && STRUCTURED_TOOL_NAMES.has(toolName)) {
|
|
259
|
+
toolNames.add(toolName);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
const command = extractCommand(parseJson(part.toolInput));
|
|
263
|
+
if (command) {
|
|
264
|
+
commands.add(command);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
for (const hint of collectStructuredFileHints(part, metadata)) {
|
|
268
|
+
toolFileHints.add(hint);
|
|
269
|
+
}
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
if (STRUCTURED_GUIDANCE_PART_TYPES.has(part.partType)) {
|
|
274
|
+
structuredPartTypes.add(part.partType);
|
|
275
|
+
const details = collectStructuredPartDetails(part, metadata);
|
|
276
|
+
for (const path of details.paths) {
|
|
277
|
+
structuredPaths.add(path);
|
|
278
|
+
}
|
|
279
|
+
for (const label of details.labels) {
|
|
280
|
+
structuredLabels.add(label);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
if (toolNames.size > 0 && (commands.size > 0 || toolFileHints.size > 0)) {
|
|
286
|
+
return {
|
|
287
|
+
value: 0.25,
|
|
288
|
+
source: "scanner",
|
|
289
|
+
reason: `scanner structured tool-chain: tools=${Array.from(toolNames).join(",")}`,
|
|
290
|
+
confidence: 0.85,
|
|
291
|
+
kind: "scanner_signal",
|
|
292
|
+
extractor: "structured_tool_chain",
|
|
293
|
+
metadata: {
|
|
294
|
+
toolNames: Array.from(toolNames),
|
|
295
|
+
commands: Array.from(commands),
|
|
296
|
+
fileHints: Array.from(toolFileHints),
|
|
297
|
+
partOrdinals,
|
|
298
|
+
rawTypes: Array.from(rawTypes),
|
|
299
|
+
guidanceSignals: contentSignals.signals,
|
|
300
|
+
},
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (structuredPartTypes.size === 0 || (structuredPaths.size === 0 && structuredLabels.size === 0)) {
|
|
305
|
+
return null;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
value: 0.25,
|
|
310
|
+
source: "scanner",
|
|
311
|
+
reason: `scanner structured guidance parts: ${Array.from(structuredPartTypes).join(",")}`,
|
|
312
|
+
confidence: 0.83,
|
|
313
|
+
kind: "scanner_signal",
|
|
314
|
+
extractor: "structured_guidance_parts",
|
|
315
|
+
metadata: {
|
|
316
|
+
structuredPartTypes: Array.from(structuredPartTypes),
|
|
317
|
+
pathHints: Array.from(structuredPaths),
|
|
318
|
+
labels: Array.from(structuredLabels),
|
|
319
|
+
partOrdinals,
|
|
320
|
+
rawTypes: Array.from(rawTypes),
|
|
321
|
+
guidanceSignals: contentSignals.signals,
|
|
322
|
+
},
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
export function detectScannerEvidence(content: string, messageParts?: HarvestMessagePart[]): HarvestResult | null {
|
|
327
|
+
for (const pattern of EXPLICIT_SCANNER_PATTERNS) {
|
|
328
|
+
if (pattern.test(content)) {
|
|
329
|
+
return {
|
|
330
|
+
value: 0.25,
|
|
331
|
+
source: "scanner",
|
|
332
|
+
reason: `scanner marker: ${pattern.source}`,
|
|
333
|
+
confidence: 0.7,
|
|
334
|
+
kind: "scanner_signal",
|
|
335
|
+
extractor: "scanner_marker",
|
|
336
|
+
metadata: { marker: pattern.source },
|
|
337
|
+
};
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const contentSignals = collectContentSignals(content);
|
|
342
|
+
const structured = detectStructuredScannerEvidence(contentSignals, messageParts);
|
|
343
|
+
if (structured) {
|
|
344
|
+
return structured;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (contentSignals.score < 1.8) {
|
|
87
348
|
return null;
|
|
88
349
|
}
|
|
89
350
|
|
|
90
351
|
return {
|
|
91
352
|
value: 0.25,
|
|
92
353
|
source: "scanner",
|
|
93
|
-
reason: `scanner heuristic: ${signals.join(", ")}`,
|
|
94
|
-
confidence: Math.min(0.8, 0.5 + signals.length * 0.05),
|
|
354
|
+
reason: `scanner heuristic: ${contentSignals.signals.join(", ")}`,
|
|
355
|
+
confidence: Math.min(0.8, 0.5 + contentSignals.signals.length * 0.05),
|
|
95
356
|
kind: "scanner_signal",
|
|
96
357
|
extractor: "scanner_heuristic",
|
|
358
|
+
metadata: {
|
|
359
|
+
guidanceSignals: contentSignals.signals,
|
|
360
|
+
numberedSteps: contentSignals.numberedSteps,
|
|
361
|
+
bulletLines: contentSignals.bulletLines,
|
|
362
|
+
commandLines: contentSignals.commandLines,
|
|
363
|
+
imperativeLines: contentSignals.imperativeLines,
|
|
364
|
+
fileRefs: contentSignals.fileRefs,
|
|
365
|
+
hasHeading: contentSignals.hasHeading,
|
|
366
|
+
},
|
|
97
367
|
};
|
|
98
368
|
}
|