@jonathangu/openclawbrain 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +140 -290
  2. package/docs/END_STATE.md +106 -94
  3. package/docs/EVIDENCE.md +71 -23
  4. package/docs/RELEASE_CONTRACT.md +46 -32
  5. package/docs/agent-tools.md +65 -34
  6. package/docs/architecture.md +128 -142
  7. package/docs/configuration.md +62 -25
  8. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/channels-status.txt +20 -0
  9. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/config-snapshot.json +94 -0
  10. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/doctor.json +14 -0
  11. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/gateway-probe.txt +24 -0
  12. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/gateway-status.txt +31 -0
  13. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/init-capture.json +15 -0
  14. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/logs.txt +357 -0
  15. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/status-all.txt +61 -0
  16. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/status.json +275 -0
  17. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/summary.md +18 -0
  18. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/trace.json +222 -0
  19. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/validation-report.json +1515 -0
  20. package/docs/evidence/2026-03-16/1fc8ee6fd7892e3deb27d111434df948bca2a66b/workspace-inventory.json +4 -0
  21. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/channels-status.txt +20 -0
  22. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/config-snapshot.json +94 -0
  23. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/doctor.json +14 -0
  24. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/gateway-probe.txt +24 -0
  25. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/gateway-status.txt +31 -0
  26. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/init-capture.json +15 -0
  27. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/logs.txt +362 -0
  28. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/status-all.txt +61 -0
  29. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/status.json +275 -0
  30. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/summary.md +21 -0
  31. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/trace.json +222 -0
  32. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/validation-report.json +4400 -0
  33. package/docs/evidence/2026-03-16/4ccd71a22418b9170128b8d948f5a95801a10380/workspace-inventory.json +4 -0
  34. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/channels-status.txt +31 -0
  35. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/config-snapshot.json +94 -0
  36. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/doctor.json +14 -0
  37. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/gateway-probe.txt +34 -0
  38. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/gateway-status.txt +41 -0
  39. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/logs.txt +441 -0
  40. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/status-all.txt +60 -0
  41. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/status.json +276 -0
  42. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/summary.md +13 -0
  43. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/trace.json +4 -0
  44. package/docs/evidence/2026-03-16/d93f09feea123a08d020fcad8a4523b6c1d26507/validation-report.json +387 -0
  45. package/docs/tui.md +11 -4
  46. package/index.ts +194 -1
  47. package/package.json +1 -1
  48. package/src/brain-cli.ts +12 -1
  49. package/src/brain-harvest/scanner.ts +286 -16
  50. package/src/brain-harvest/self.ts +134 -6
  51. package/src/brain-runtime/evidence-detectors.ts +3 -1
  52. package/src/brain-runtime/harvester-extension.ts +3 -0
  53. package/src/brain-runtime/service.ts +2 -0
  54. package/src/brain-store/embedding.ts +29 -8
  55. package/src/brain-worker/worker.ts +40 -0
  56. package/src/engine.ts +1 -0
package/index.ts CHANGED
@@ -1270,6 +1270,194 @@ function createLcmDependencies(api: OpenClawPluginApi): LcmDependencies {
1270
1270
  };
1271
1271
  }
1272
1272
 
1273
+ type ContextEngineRegisteringApi = OpenClawPluginApi & {
1274
+ registerContextEngine?: (id: string, factory: () => unknown) => void;
1275
+ };
1276
+
1277
+ function normalizePromptText(value: unknown): string {
1278
+ if (typeof value === "string") {
1279
+ return value.trim();
1280
+ }
1281
+ if (Array.isArray(value)) {
1282
+ return value
1283
+ .map((entry) => normalizePromptText(entry))
1284
+ .filter((entry) => entry.length > 0)
1285
+ .join("\n")
1286
+ .trim();
1287
+ }
1288
+ if (!value || typeof value !== "object") {
1289
+ return "";
1290
+ }
1291
+
1292
+ const record = value as Record<string, unknown>;
1293
+ return [
1294
+ normalizePromptText(record.text),
1295
+ normalizePromptText(record.content),
1296
+ normalizePromptText(record.value),
1297
+ normalizePromptText(record.thinking),
1298
+ normalizePromptText(record.summary),
1299
+ ]
1300
+ .filter((entry, index, arr) => entry.length > 0 && arr.indexOf(entry) === index)
1301
+ .join("\n")
1302
+ .trim();
1303
+ }
1304
+
1305
+ function normalizePromptRole(message: unknown): string {
1306
+ if (!message || typeof message !== "object") {
1307
+ return "message";
1308
+ }
1309
+ const role = (message as { role?: unknown }).role;
1310
+ return typeof role === "string" && role.trim().length > 0 ? role.trim() : "message";
1311
+ }
1312
+
1313
+ function promptMessageSignature(message: unknown): string {
1314
+ if (!message || typeof message !== "object") {
1315
+ return JSON.stringify(message);
1316
+ }
1317
+ const record = message as Record<string, unknown>;
1318
+ return JSON.stringify({
1319
+ role: normalizePromptRole(record),
1320
+ content: normalizePromptText(record.content),
1321
+ });
1322
+ }
1323
+
1324
+ function extractPrependedMessages(assembledMessages: unknown[], liveMessages: unknown[]): unknown[] {
1325
+ if (assembledMessages.length === 0) {
1326
+ return [];
1327
+ }
1328
+ if (liveMessages.length === 0) {
1329
+ return assembledMessages;
1330
+ }
1331
+
1332
+ const assembledSignatures = assembledMessages.map((message) => promptMessageSignature(message));
1333
+ const liveSignatures = liveMessages.map((message) => promptMessageSignature(message));
1334
+ const maxOverlap = Math.min(assembledSignatures.length, liveSignatures.length);
1335
+
1336
+ for (let overlap = maxOverlap; overlap > 0; overlap -= 1) {
1337
+ const assembledTail = assembledSignatures.slice(-overlap);
1338
+ const liveTail = liveSignatures.slice(-overlap);
1339
+ if (assembledTail.join("\u0000") === liveTail.join("\u0000")) {
1340
+ return assembledMessages.slice(0, assembledMessages.length - overlap);
1341
+ }
1342
+ }
1343
+
1344
+ return assembledMessages;
1345
+ }
1346
+
1347
+ function formatPrependedContext(messages: unknown[], systemPromptAddition?: string): string | undefined {
1348
+ const sections: string[] = [];
1349
+ const promptAddition = typeof systemPromptAddition === "string" ? systemPromptAddition.trim() : "";
1350
+ if (promptAddition) {
1351
+ sections.push(promptAddition);
1352
+ }
1353
+
1354
+ const renderedMessages = messages
1355
+ .map((message) => {
1356
+ const text = normalizePromptText((message as { content?: unknown } | null)?.content);
1357
+ if (!text) {
1358
+ return "";
1359
+ }
1360
+ return `### ${normalizePromptRole(message)}\n${text}`;
1361
+ })
1362
+ .filter((entry) => entry.length > 0);
1363
+
1364
+ if (renderedMessages.length > 0) {
1365
+ sections.push([
1366
+ "## OpenClawBrain recalled context",
1367
+ "",
1368
+ renderedMessages.join("\n\n"),
1369
+ ].join("\n"));
1370
+ }
1371
+
1372
+ if (sections.length === 0) {
1373
+ return undefined;
1374
+ }
1375
+ return sections.join("\n\n");
1376
+ }
1377
+
1378
+ function registerHookCompatibilityBridge(
1379
+ api: OpenClawPluginApi,
1380
+ lcm: LcmContextEngine,
1381
+ deps: ReturnType<typeof createLcmDependencies>,
1382
+ ): void {
1383
+ const prePromptMessageCounts = new Map<string, number>();
1384
+ const sessionIdsByKey = new Map<string, string>();
1385
+
1386
+ const rememberSession = (sessionId?: string | null, sessionKey?: string | null) => {
1387
+ if (typeof sessionId === "string" && sessionId.trim().length > 0 && typeof sessionKey === "string" && sessionKey.trim().length > 0) {
1388
+ sessionIdsByKey.set(sessionKey.trim(), sessionId.trim());
1389
+ }
1390
+ };
1391
+
1392
+ const resolveHookSessionId = async (ctx: { sessionId?: string; sessionKey?: string }): Promise<string | undefined> => {
1393
+ if (typeof ctx.sessionId === "string" && ctx.sessionId.trim().length > 0) {
1394
+ rememberSession(ctx.sessionId, ctx.sessionKey);
1395
+ return ctx.sessionId.trim();
1396
+ }
1397
+ if (typeof ctx.sessionKey === "string" && ctx.sessionKey.trim().length > 0) {
1398
+ const key = ctx.sessionKey.trim();
1399
+ const remembered = sessionIdsByKey.get(key);
1400
+ if (remembered) {
1401
+ return remembered;
1402
+ }
1403
+ const resolved = await deps.resolveSessionIdFromSessionKey(key);
1404
+ if (resolved) {
1405
+ sessionIdsByKey.set(key, resolved);
1406
+ return resolved;
1407
+ }
1408
+ return key;
1409
+ }
1410
+ return undefined;
1411
+ };
1412
+
1413
+ api.on("before_prompt_build", async (event, ctx) => {
1414
+ const sessionId = await resolveHookSessionId(ctx);
1415
+ if (!sessionId) {
1416
+ return undefined;
1417
+ }
1418
+ prePromptMessageCounts.set(sessionId, Array.isArray(event.messages) ? event.messages.length : 0);
1419
+
1420
+ const assembled = await lcm.assemble({
1421
+ sessionId,
1422
+ messages: Array.isArray(event.messages) ? event.messages as Parameters<LcmContextEngine["assemble"]>[0]["messages"] : [],
1423
+ }) as AssembleResultWithSystemPrompt;
1424
+ const prependedMessages = extractPrependedMessages(assembled.messages as unknown[], Array.isArray(event.messages) ? event.messages : []);
1425
+ const prependContext = formatPrependedContext(prependedMessages, assembled.systemPromptAddition);
1426
+ if (!prependContext) {
1427
+ return undefined;
1428
+ }
1429
+ return { prependContext };
1430
+ });
1431
+
1432
+ api.on("agent_end", async (event, ctx) => {
1433
+ const sessionId = await resolveHookSessionId(ctx);
1434
+ if (!sessionId) {
1435
+ return;
1436
+ }
1437
+ const prePromptMessageCount = prePromptMessageCounts.get(sessionId) ?? 0;
1438
+ prePromptMessageCounts.delete(sessionId);
1439
+ await lcm.afterTurn({
1440
+ sessionId,
1441
+ sessionFile: "",
1442
+ messages: Array.isArray(event.messages) ? event.messages as Parameters<LcmContextEngine["afterTurn"]>[0]["messages"] : [],
1443
+ prePromptMessageCount,
1444
+ });
1445
+ });
1446
+
1447
+ api.on("session_end", async (_event, ctx) => {
1448
+ prePromptMessageCounts.delete(ctx.sessionId);
1449
+ for (const [sessionKey, sessionId] of sessionIdsByKey.entries()) {
1450
+ if (sessionId === ctx.sessionId) {
1451
+ sessionIdsByKey.delete(sessionKey);
1452
+ }
1453
+ }
1454
+ });
1455
+
1456
+ api.logger.warn(
1457
+ "[openclawbrain] registerContextEngine unavailable; using hook compatibility bridge for prompt assembly/after-turn ingest.",
1458
+ );
1459
+ }
1460
+
1273
1461
  const lcmPlugin = {
1274
1462
  id: "openclawbrain",
1275
1463
  name: "OpenClawBrain",
@@ -1290,7 +1478,12 @@ const lcmPlugin = {
1290
1478
  const deps = createLcmDependencies(api);
1291
1479
  const lcm = new LcmContextEngine(deps);
1292
1480
 
1293
- api.registerContextEngine("openclawbrain", () => lcm);
1481
+ const contextApi = api as ContextEngineRegisteringApi;
1482
+ if (typeof contextApi.registerContextEngine === "function") {
1483
+ contextApi.registerContextEngine("openclawbrain", () => lcm);
1484
+ } else {
1485
+ registerHookCompatibilityBridge(api, lcm, deps);
1486
+ }
1294
1487
  api.registerTool((ctx) =>
1295
1488
  createLcmGrepTool({
1296
1489
  deps,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jonathangu/openclawbrain",
3
- "version": "0.3.0",
3
+ "version": "0.3.1",
4
4
  "description": "OpenClawBrain v2 for OpenClaw — lossless transcript memory with a clean learning-layer rebuild",
5
5
  "type": "module",
6
6
  "main": "index.ts",
package/src/brain-cli.ts CHANGED
@@ -20,6 +20,17 @@ function printJson(payload: unknown): void {
20
20
  process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
21
21
  }
22
22
 
23
+ function buildInitLog(): { info: (msg: string) => void; warn: (msg: string) => void } {
24
+ const verbose = /^(1|true|yes)$/i.test(process.env.OPENCLAWBRAIN_INIT_VERBOSE ?? "");
25
+ if (!verbose) {
26
+ return { info: () => {}, warn: () => {} };
27
+ }
28
+ return {
29
+ info: (msg: string) => process.stderr.write(`${msg}\n`),
30
+ warn: (msg: string) => process.stderr.write(`${msg}\n`),
31
+ };
32
+ }
33
+
23
34
  function usage(): never {
24
35
  process.stderr.write(
25
36
  "Usage: openclawbrain <init|status|trace|replay|promote|rollback|disable|enable|doctor> [args]\n",
@@ -68,7 +79,7 @@ async function commandInit(workspaceArg?: string): Promise<void> {
68
79
  workspaceRoot,
69
80
  embedFn,
70
81
  semanticThreshold: brainConfig.semanticThreshold,
71
- log: { info: () => {}, warn: () => {} },
82
+ log: buildInitLog(),
72
83
  });
73
84
 
74
85
  store.clearGraph();
@@ -1,4 +1,4 @@
1
- import type { HarvestResult } from "../brain-runtime/evidence-detectors.js";
1
+ import type { HarvestMessagePart, HarvestResult } from "../brain-runtime/evidence-detectors.js";
2
2
 
3
3
  const EXPLICIT_SCANNER_PATTERNS = [
4
4
  /\bexpand for details about\b/i,
@@ -16,6 +16,20 @@ const BULLET_PATTERN = /^\s*[-*]\s+\S.+$/gm;
16
16
  const HEADING_PATTERN = /^\s{0,3}#{1,6}\s+\S.+$/m;
17
17
  const FILE_REF_PATTERN = /(?:^|[\s(])(?:\.?\/)?[\w./-]+\.(?:md|txt|ts|tsx|js|jsx|json|yaml|yml|sh|mjs)(?=$|[\s):,])/gim;
18
18
  const IMPERATIVE_STEP_PATTERN = /^\s*(?:[-*]\s+|\d+\.\s+)?(?:inspect|check|retry|run|use|open|read|edit|verify|restart|re-?run|apply|deploy|create|install|record|compare|promote|rollback)\b/gim;
19
+ const STRUCTURED_TOOL_NAMES = new Set(["bash", "git", "gh", "pnpm", "npm", "node", "openclaw", "python", "python3", "curl", "ollama", "codex", "claude"]);
20
+ const STRUCTURED_GUIDANCE_PART_TYPES = new Set(["file", "snapshot", "subtask", "patch", "compaction", "step_start", "step_finish", "retry"]);
21
+
22
+ type ContentSignalSummary = {
23
+ docMarker: string | null;
24
+ numberedSteps: number;
25
+ bulletLines: number;
26
+ commandLines: number;
27
+ imperativeLines: number;
28
+ hasHeading: boolean;
29
+ fileRefs: number;
30
+ score: number;
31
+ signals: string[];
32
+ };
19
33
 
20
34
  function countMatches(pattern: RegExp, content: string): number {
21
35
  const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
@@ -23,25 +37,132 @@ function countMatches(pattern: RegExp, content: string): number {
23
37
  return Array.from(content.matchAll(matcher)).length;
24
38
  }
25
39
 
26
- export function detectScannerEvidence(content: string): HarvestResult | null {
27
- for (const pattern of EXPLICIT_SCANNER_PATTERNS) {
28
- if (pattern.test(content)) {
29
- return {
30
- value: 0.25,
31
- source: "scanner",
32
- reason: `scanner marker: ${pattern.source}`,
33
- confidence: 0.7,
34
- kind: "scanner_signal",
35
- extractor: "scanner_marker",
36
- };
40
+ function parseJson(value: string | null | undefined): unknown {
41
+ if (typeof value !== "string" || value.trim().length === 0) {
42
+ return null;
43
+ }
44
+ try {
45
+ return JSON.parse(value);
46
+ } catch {
47
+ return value;
48
+ }
49
+ }
50
+
51
+ function asRecord(value: unknown): Record<string, unknown> | null {
52
+ return value && typeof value === "object" && !Array.isArray(value)
53
+ ? value as Record<string, unknown>
54
+ : null;
55
+ }
56
+
57
+ function readPartMetadata(part: HarvestMessagePart): Record<string, unknown> {
58
+ return asRecord(parseJson(part.metadata)) ?? {};
59
+ }
60
+
61
+ function readString(record: Record<string, unknown> | null, keys: string[]): string | undefined {
62
+ if (!record) {
63
+ return undefined;
64
+ }
65
+ for (const key of keys) {
66
+ const value = record[key];
67
+ if (typeof value === "string" && value.trim().length > 0) {
68
+ return value.trim();
37
69
  }
38
70
  }
71
+ return undefined;
72
+ }
39
73
 
74
+ function readStringArray(value: unknown): string[] {
75
+ if (Array.isArray(value)) {
76
+ return value.filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0);
77
+ }
78
+ if (typeof value === "string" && value.trim().length > 0) {
79
+ return [value.trim()];
80
+ }
81
+ return [];
82
+ }
83
+
84
+ function readCommand(value: unknown): string | undefined {
85
+ if (typeof value === "string" && value.trim().length > 0) {
86
+ return value.trim();
87
+ }
88
+ if (Array.isArray(value)) {
89
+ const parts = value.filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0);
90
+ return parts.length > 0 ? parts.join(" ") : undefined;
91
+ }
92
+ return undefined;
93
+ }
94
+
95
+ function extractCommand(input: unknown): string | undefined {
96
+ const inputRecord = asRecord(input);
97
+ return readString(inputRecord, ["command", "cmd", "shellCommand"])
98
+ ?? readCommand(inputRecord?.args)
99
+ ?? (typeof input === "string" && input.trim().length > 0 ? input.trim() : undefined);
100
+ }
101
+
102
+ function collectStructuredFileHints(part: HarvestMessagePart, metadata: Record<string, unknown>): string[] {
103
+ const parsedInput = parseJson(part.toolInput);
104
+ const parsedOutput = parseJson(part.toolOutput);
105
+ const inputRecord = asRecord(parsedInput);
106
+ const outputRecord = asRecord(parsedOutput);
107
+ const rawRecord = asRecord(metadata.raw);
108
+ const collected = new Set<string>();
109
+
110
+ for (const value of [
111
+ outputRecord?.filesTouched,
112
+ outputRecord?.changedFiles,
113
+ outputRecord?.files,
114
+ outputRecord?.paths,
115
+ inputRecord?.filesTouched,
116
+ inputRecord?.files,
117
+ inputRecord?.paths,
118
+ readString(outputRecord, ["artifactPath", "outputPath", "reportPath", "logPath", "filePath", "path"]),
119
+ readString(inputRecord, ["artifactPath", "outputPath", "reportPath", "logPath", "filePath", "path"]),
120
+ readString(rawRecord, ["path", "filePath", "artifactPath"]),
121
+ ]) {
122
+ for (const item of readStringArray(value)) {
123
+ collected.add(item);
124
+ }
125
+ }
126
+
127
+ return Array.from(collected);
128
+ }
129
+
130
+ function collectStructuredPartDetails(part: HarvestMessagePart, metadata: Record<string, unknown>): {
131
+ paths: string[];
132
+ labels: string[];
133
+ } {
134
+ const rawRecord = asRecord(metadata.raw);
135
+ const paths = new Set<string>();
136
+ const labels = new Set<string>();
137
+
138
+ for (const value of [
139
+ readString(rawRecord, ["path", "filePath", "artifactPath", "storageUri", "sourcePath", "targetPath", "outputPath"]),
140
+ readString(rawRecord, ["fileName", "title", "label", "name", "summaryId", "taskId", "stepId"]),
141
+ readString(rawRecord, ["summary", "description"]),
142
+ typeof part.textContent === "string" && part.textContent.trim().length > 0 ? part.textContent.trim() : undefined,
143
+ ]) {
144
+ for (const item of readStringArray(value)) {
145
+ if (/[/\\.]|^[A-Z0-9_-]+$/i.test(item)) {
146
+ paths.add(item);
147
+ }
148
+ labels.add(item);
149
+ }
150
+ }
151
+
152
+ return {
153
+ paths: Array.from(paths),
154
+ labels: Array.from(labels),
155
+ };
156
+ }
157
+
158
+ function collectContentSignals(content: string): ContentSignalSummary {
40
159
  const signals: string[] = [];
41
160
  let score = 0;
161
+ let docMarker: string | null = null;
42
162
 
43
163
  for (const pattern of DOC_MARKER_PATTERNS) {
44
164
  if (pattern.test(content)) {
165
+ docMarker = pattern.source;
45
166
  signals.push(`doc:${pattern.source}`);
46
167
  score += 1.0;
47
168
  break;
@@ -72,7 +193,8 @@ export function detectScannerEvidence(content: string): HarvestResult | null {
72
193
  score += 0.8;
73
194
  }
74
195
 
75
- if (HEADING_PATTERN.test(content) && (numberedSteps >= 1 || bulletLines >= 2)) {
196
+ const hasHeading = HEADING_PATTERN.test(content);
197
+ if (hasHeading && (numberedSteps >= 1 || bulletLines >= 2)) {
76
198
  signals.push("heading");
77
199
  score += 0.4;
78
200
  }
@@ -83,16 +205,164 @@ export function detectScannerEvidence(content: string): HarvestResult | null {
83
205
  score += 0.3;
84
206
  }
85
207
 
86
- if (score < 1.8) {
208
+ return {
209
+ docMarker,
210
+ numberedSteps,
211
+ bulletLines,
212
+ commandLines,
213
+ imperativeLines,
214
+ hasHeading,
215
+ fileRefs,
216
+ score,
217
+ signals,
218
+ };
219
+ }
220
+
221
+ function hasGuidanceShape(contentSignals: ContentSignalSummary): boolean {
222
+ return Boolean(contentSignals.docMarker)
223
+ || contentSignals.numberedSteps >= 2
224
+ || (contentSignals.hasHeading && contentSignals.bulletLines >= 2)
225
+ || contentSignals.imperativeLines >= 2;
226
+ }
227
+
228
+ function detectStructuredScannerEvidence(
229
+ contentSignals: ContentSignalSummary,
230
+ messageParts?: HarvestMessagePart[],
231
+ ): HarvestResult | null {
232
+ if (!messageParts || messageParts.length === 0 || !hasGuidanceShape(contentSignals)) {
233
+ return null;
234
+ }
235
+
236
+ const toolNames = new Set<string>();
237
+ const commands = new Set<string>();
238
+ const toolFileHints = new Set<string>();
239
+ const structuredPartTypes = new Set<string>();
240
+ const structuredPaths = new Set<string>();
241
+ const structuredLabels = new Set<string>();
242
+ const partOrdinals: number[] = [];
243
+ const rawTypes = new Set<string>();
244
+
245
+ for (const part of messageParts) {
246
+ const metadata = readPartMetadata(part);
247
+ const rawType = typeof metadata.rawType === "string" ? metadata.rawType : null;
248
+ if (rawType) {
249
+ rawTypes.add(rawType);
250
+ }
251
+
252
+ if (typeof part.ordinal === "number") {
253
+ partOrdinals.push(part.ordinal);
254
+ }
255
+
256
+ if (part.partType === "tool") {
257
+ const toolName = typeof part.toolName === "string" ? part.toolName.trim() : "";
258
+ if (toolName && STRUCTURED_TOOL_NAMES.has(toolName)) {
259
+ toolNames.add(toolName);
260
+ }
261
+
262
+ const command = extractCommand(parseJson(part.toolInput));
263
+ if (command) {
264
+ commands.add(command);
265
+ }
266
+
267
+ for (const hint of collectStructuredFileHints(part, metadata)) {
268
+ toolFileHints.add(hint);
269
+ }
270
+ continue;
271
+ }
272
+
273
+ if (STRUCTURED_GUIDANCE_PART_TYPES.has(part.partType)) {
274
+ structuredPartTypes.add(part.partType);
275
+ const details = collectStructuredPartDetails(part, metadata);
276
+ for (const path of details.paths) {
277
+ structuredPaths.add(path);
278
+ }
279
+ for (const label of details.labels) {
280
+ structuredLabels.add(label);
281
+ }
282
+ }
283
+ }
284
+
285
+ if (toolNames.size > 0 && (commands.size > 0 || toolFileHints.size > 0)) {
286
+ return {
287
+ value: 0.25,
288
+ source: "scanner",
289
+ reason: `scanner structured tool-chain: tools=${Array.from(toolNames).join(",")}`,
290
+ confidence: 0.85,
291
+ kind: "scanner_signal",
292
+ extractor: "structured_tool_chain",
293
+ metadata: {
294
+ toolNames: Array.from(toolNames),
295
+ commands: Array.from(commands),
296
+ fileHints: Array.from(toolFileHints),
297
+ partOrdinals,
298
+ rawTypes: Array.from(rawTypes),
299
+ guidanceSignals: contentSignals.signals,
300
+ },
301
+ };
302
+ }
303
+
304
+ if (structuredPartTypes.size === 0 || (structuredPaths.size === 0 && structuredLabels.size === 0)) {
305
+ return null;
306
+ }
307
+
308
+ return {
309
+ value: 0.25,
310
+ source: "scanner",
311
+ reason: `scanner structured guidance parts: ${Array.from(structuredPartTypes).join(",")}`,
312
+ confidence: 0.83,
313
+ kind: "scanner_signal",
314
+ extractor: "structured_guidance_parts",
315
+ metadata: {
316
+ structuredPartTypes: Array.from(structuredPartTypes),
317
+ pathHints: Array.from(structuredPaths),
318
+ labels: Array.from(structuredLabels),
319
+ partOrdinals,
320
+ rawTypes: Array.from(rawTypes),
321
+ guidanceSignals: contentSignals.signals,
322
+ },
323
+ };
324
+ }
325
+
326
+ export function detectScannerEvidence(content: string, messageParts?: HarvestMessagePart[]): HarvestResult | null {
327
+ for (const pattern of EXPLICIT_SCANNER_PATTERNS) {
328
+ if (pattern.test(content)) {
329
+ return {
330
+ value: 0.25,
331
+ source: "scanner",
332
+ reason: `scanner marker: ${pattern.source}`,
333
+ confidence: 0.7,
334
+ kind: "scanner_signal",
335
+ extractor: "scanner_marker",
336
+ metadata: { marker: pattern.source },
337
+ };
338
+ }
339
+ }
340
+
341
+ const contentSignals = collectContentSignals(content);
342
+ const structured = detectStructuredScannerEvidence(contentSignals, messageParts);
343
+ if (structured) {
344
+ return structured;
345
+ }
346
+
347
+ if (contentSignals.score < 1.8) {
87
348
  return null;
88
349
  }
89
350
 
90
351
  return {
91
352
  value: 0.25,
92
353
  source: "scanner",
93
- reason: `scanner heuristic: ${signals.join(", ")}`,
94
- confidence: Math.min(0.8, 0.5 + signals.length * 0.05),
354
+ reason: `scanner heuristic: ${contentSignals.signals.join(", ")}`,
355
+ confidence: Math.min(0.8, 0.5 + contentSignals.signals.length * 0.05),
95
356
  kind: "scanner_signal",
96
357
  extractor: "scanner_heuristic",
358
+ metadata: {
359
+ guidanceSignals: contentSignals.signals,
360
+ numberedSteps: contentSignals.numberedSteps,
361
+ bulletLines: contentSignals.bulletLines,
362
+ commandLines: contentSignals.commandLines,
363
+ imperativeLines: contentSignals.imperativeLines,
364
+ fileRefs: contentSignals.fileRefs,
365
+ hasHeading: contentSignals.hasHeading,
366
+ },
97
367
  };
98
368
  }