agentboss 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/aboss.js +288 -288
- package/client/dist/assets/index-DxoLOxZ8.js +141 -0
- package/client/dist/index.html +1 -1
- package/package.json +1 -1
- package/server/analysis/dimensions/judgement.js +111 -107
- package/server/analysis/dimensions/llm-merge.js +59 -57
- package/server/analysis/dimensions/output-quality.js +167 -167
- package/server/analysis/dimensions/problem-definition.js +109 -104
- package/server/analysis/job.js +91 -14
- package/server/analysis/report-builder.js +574 -581
- package/server/analysis/scoring-v2.js +126 -72
- package/server/analysis/thresholds-v2.js +364 -358
- package/server/api/execution.js +94 -0
- package/server/db/schema.js +5 -2
- package/server/etl/opencode.js +5 -1
- package/server/execution/job.js +141 -2
- package/server/llm/advice-prompt.js +74 -11
- package/server/llm/advice.js +50 -1
- package/server/llm/analysis-prompt.js +173 -162
- package/server/llm/cli-runner.js +18 -2
- package/server/llm/judge.js +6 -1
- package/server/llm/mcp-classify.js +147 -0
- package/server/llm/project-advice-prompt.js +106 -6
- package/server/llm/project-advice.js +55 -2
- package/server/llm/session-analyzer.js +10 -1
- package/client/dist/assets/index-DBj1Ujlx.js +0 -137
package/server/api/execution.js
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
* POST /api/execution/start { sessionId, adviceKey, executor?, ephemeral? }
|
|
5
5
|
* POST /api/execution/project/start { project, scope, from?, to?, adviceKey, executor?, ephemeral? }
|
|
6
6
|
* POST /api/execution/cancel/:runId
|
|
7
|
+
* GET /api/execution/preview ?sessionId=&adviceKey=&executor=
|
|
8
|
+
* GET /api/execution/project/preview ?project=&scope=&from=&to=&adviceKey=&executor=
|
|
7
9
|
* GET /api/execution/:runId ?full=1 → return full stdout/stderr
|
|
8
10
|
* GET /api/execution/advice/:sessionId
|
|
9
11
|
* GET /api/execution/project/advice ?project=&scope=&from=&to=
|
|
@@ -229,6 +231,98 @@ module.exports = function (db) {
|
|
|
229
231
|
res.json({ ok: true, data: { run: projectRun(row) } });
|
|
230
232
|
});
|
|
231
233
|
|
|
234
|
+
// -------------------------------------------------------------------------
|
|
235
|
+
// Preview — return the exact prompt + cwd + CLI command that "执行"
|
|
236
|
+
// would invoke. Lets the UI show users *what* the auto-executor is
|
|
237
|
+
// about to do, and serves as a manual fallback (copy the prompt and
|
|
238
|
+
// paste it into your own OpenCode / Claude Code session).
|
|
239
|
+
//
|
|
240
|
+
// Registered BEFORE the catch-all GET /:runId so adviceKey-based
|
|
241
|
+
// lookups don't collide with run-id lookups.
|
|
242
|
+
// -------------------------------------------------------------------------
|
|
243
|
+
|
|
244
|
+
router.get('/preview', (req, res) => {
|
|
245
|
+
const sessionId = req.query.sessionId;
|
|
246
|
+
const adviceKey = req.query.adviceKey;
|
|
247
|
+
const executor = req.query.executor;
|
|
248
|
+
if (typeof sessionId !== 'string' || !sessionId.trim()) {
|
|
249
|
+
return res.status(400).json({
|
|
250
|
+
ok: false, error: { code: 'BAD_REQUEST', message: 'sessionId required' },
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
if (typeof adviceKey !== 'string' || !adviceKey.trim()) {
|
|
254
|
+
return res.status(400).json({
|
|
255
|
+
ok: false, error: { code: 'BAD_REQUEST', message: 'adviceKey required' },
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
if (executor !== undefined && !VALID_EXECUTORS.includes(executor)) {
|
|
259
|
+
return res.status(400).json({
|
|
260
|
+
ok: false, error: { code: 'BAD_REQUEST', message: 'executor must be opencode or claude' },
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
const r = job.previewExecution(db, { sessionId, adviceKey, executor });
|
|
264
|
+
if (!r.ok) return failure(res, r.reason, r);
|
|
265
|
+
res.json({
|
|
266
|
+
ok: true,
|
|
267
|
+
data: {
|
|
268
|
+
scope: r.scope,
|
|
269
|
+
adviceKey: r.adviceKey,
|
|
270
|
+
executor: r.executor,
|
|
271
|
+
project: r.project,
|
|
272
|
+
projectExists: r.projectExists,
|
|
273
|
+
cli: r.cli,
|
|
274
|
+
prompt: r.prompt,
|
|
275
|
+
item: r.item,
|
|
276
|
+
},
|
|
277
|
+
});
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
router.get('/project/preview', (req, res) => {
|
|
281
|
+
const project = req.query.project;
|
|
282
|
+
const scope = req.query.scope;
|
|
283
|
+
const from = req.query.from || '';
|
|
284
|
+
const to = req.query.to || '';
|
|
285
|
+
const adviceKey = req.query.adviceKey;
|
|
286
|
+
const executor = req.query.executor;
|
|
287
|
+
if (typeof project !== 'string' || !project.trim()) {
|
|
288
|
+
return res.status(400).json({
|
|
289
|
+
ok: false, error: { code: 'BAD_REQUEST', message: 'project required' },
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
if (typeof scope !== 'string' || !['daily', 'weekly', 'all'].includes(scope)) {
|
|
293
|
+
return res.status(400).json({
|
|
294
|
+
ok: false, error: { code: 'BAD_REQUEST', message: 'scope must be daily|weekly|all' },
|
|
295
|
+
});
|
|
296
|
+
}
|
|
297
|
+
if (typeof adviceKey !== 'string' || !adviceKey.trim()) {
|
|
298
|
+
return res.status(400).json({
|
|
299
|
+
ok: false, error: { code: 'BAD_REQUEST', message: 'adviceKey required' },
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
if (executor !== undefined && !VALID_EXECUTORS.includes(executor)) {
|
|
303
|
+
return res.status(400).json({
|
|
304
|
+
ok: false, error: { code: 'BAD_REQUEST', message: 'executor must be opencode or claude' },
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
const r = job.previewProjectExecution(db, {
|
|
308
|
+
project, scope, windowFrom: from, windowTo: to, adviceKey, executor,
|
|
309
|
+
});
|
|
310
|
+
if (!r.ok) return failure(res, r.reason, r);
|
|
311
|
+
res.json({
|
|
312
|
+
ok: true,
|
|
313
|
+
data: {
|
|
314
|
+
scope: r.scope,
|
|
315
|
+
adviceKey: r.adviceKey,
|
|
316
|
+
executor: r.executor,
|
|
317
|
+
project: r.project,
|
|
318
|
+
projectExists: r.projectExists,
|
|
319
|
+
cli: r.cli,
|
|
320
|
+
prompt: r.prompt,
|
|
321
|
+
item: r.item,
|
|
322
|
+
},
|
|
323
|
+
});
|
|
324
|
+
});
|
|
325
|
+
|
|
232
326
|
// -------------------------------------------------------------------------
|
|
233
327
|
// Project-level start + list
|
|
234
328
|
//
|
package/server/db/schema.js
CHANGED
|
@@ -325,8 +325,11 @@ function initDatabase(db) {
|
|
|
325
325
|
['currency_rate', '1'],
|
|
326
326
|
['idle_threshold_minutes', '5'],
|
|
327
327
|
['llm_tool_preference', 'auto'],
|
|
328
|
-
// v2:
|
|
329
|
-
|
|
328
|
+
// v2: LLM judge for E1/O1 dimensions. On by default for new
|
|
329
|
+
// installs — existing users' explicit '0' is preserved by the
|
|
330
|
+
// INSERT OR IGNORE seed below, so toggling this only flips fresh
|
|
331
|
+
// boss.db files.
|
|
332
|
+
['enable_llm_judge', '1'],
|
|
330
333
|
];
|
|
331
334
|
|
|
332
335
|
const stmt = db.prepare(
|
package/server/etl/opencode.js
CHANGED
|
@@ -27,6 +27,7 @@ const {
|
|
|
27
27
|
getEtlState,
|
|
28
28
|
updateEtlState,
|
|
29
29
|
} = require('../db/queries');
|
|
30
|
+
const { canonicalProject } = require('../utils/project');
|
|
30
31
|
|
|
31
32
|
// ---------------------------------------------------------------------------
|
|
32
33
|
// Constants
|
|
@@ -210,7 +211,10 @@ function mapSession(row, msgCount, errCount, toolCount, agg = {}) {
|
|
|
210
211
|
tokens_cache_read: agg.tokens_cache_read || 0,
|
|
211
212
|
tokens_cache_write: agg.tokens_cache_write || 0,
|
|
212
213
|
cost_usd: agg.cost_usd || 0,
|
|
213
|
-
|
|
214
|
+
// OpenCode sometimes records Windows drives as "C//felix/code/X" (the
|
|
215
|
+
// colon got dropped). canonicalProject re-inserts the ":" so downstream
|
|
216
|
+
// cwd / whitelist / fs.existsSync checks work. See server/utils/project.js.
|
|
217
|
+
project: canonicalProject(row.directory) || null,
|
|
214
218
|
title: row.title || null,
|
|
215
219
|
model: agg.model_id || null,
|
|
216
220
|
error_count: errCount,
|
package/server/execution/job.js
CHANGED
|
@@ -297,6 +297,140 @@ function cleanupOrphans(db) {
|
|
|
297
297
|
return orphans.length;
|
|
298
298
|
}
|
|
299
299
|
|
|
300
|
+
// ---------------------------------------------------------------------------
|
|
301
|
+
// Public: previewExecution / previewProjectExecution
|
|
302
|
+
//
|
|
303
|
+
// Lookup-only counterpart of startExecution. Returns exactly the prompt
|
|
304
|
+
// text we would have piped through stdin to the executor, along with the
|
|
305
|
+
// resolved cwd, executor name and the shell command we would run. Used
|
|
306
|
+
// by the UI to let the user see (and copy) the full instruction before
|
|
307
|
+
// hitting "执行", and as a manual fallback for cases the auto path can't
|
|
308
|
+
// serve (CLI missing, item.actionable=false, …).
|
|
309
|
+
//
|
|
310
|
+
// Stays close to startExecution's resolution order so they can't drift:
|
|
311
|
+
// 1. session / project + advice cache lookup
|
|
312
|
+
// 2. AdviceItem lookup
|
|
313
|
+
// 3. executor pick (default opencode, falls back to item.executor)
|
|
314
|
+
// 4. prompt assembly via the same builder used in spawnRunAsync
|
|
315
|
+
//
|
|
316
|
+
// We deliberately don't enforce actionable / whitelist / project-exists
|
|
317
|
+
// here — the UI wants to show this info even when the auto-run path is
|
|
318
|
+
// blocked.
|
|
319
|
+
// ---------------------------------------------------------------------------
|
|
320
|
+
|
|
321
|
+
function describeCliCommand(executor, cwd) {
|
|
322
|
+
// What spawnRunAsync → runExecutor will actually invoke. Mirrors
|
|
323
|
+
// server/execution/runner.js (`opencode run` / `claude -p`). The
|
|
324
|
+
// prompt arrives on stdin so we surface that fact in the command.
|
|
325
|
+
if (executor === 'claude') {
|
|
326
|
+
return {
|
|
327
|
+
bin: 'claude',
|
|
328
|
+
args: ['-p'],
|
|
329
|
+
cwd,
|
|
330
|
+
stdinIsPrompt: true,
|
|
331
|
+
shellHint: `cd ${cwd} && claude -p < prompt.txt`,
|
|
332
|
+
};
|
|
333
|
+
}
|
|
334
|
+
return {
|
|
335
|
+
bin: 'opencode',
|
|
336
|
+
args: ['run'],
|
|
337
|
+
cwd,
|
|
338
|
+
stdinIsPrompt: true,
|
|
339
|
+
shellHint: `cd ${cwd} && opencode run < prompt.txt`,
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function previewExecution(db, { sessionId, adviceKey, executor: executorOpt }) {
|
|
344
|
+
const session = getSessionById(db, sessionId);
|
|
345
|
+
if (!session) return { ok: false, reason: 'no-session' };
|
|
346
|
+
|
|
347
|
+
const advice = loadAdvice(db, sessionId);
|
|
348
|
+
if (!advice) return { ok: false, reason: 'no-advice' };
|
|
349
|
+
|
|
350
|
+
const item = findAdviceItem(advice, adviceKey);
|
|
351
|
+
if (!item) return { ok: false, reason: 'no-advice-item' };
|
|
352
|
+
|
|
353
|
+
const executor = executorOpt || item.executor || 'opencode';
|
|
354
|
+
// Normalise — session.project may carry an OpenCode-source path like
|
|
355
|
+
// "C//felix/code/X" that won't pass fs.existsSync; canonicalProject
|
|
356
|
+
// re-inserts the missing ":". Mirrors previewProjectExecution.
|
|
357
|
+
const project = canonicalProject(session.project || '') || '';
|
|
358
|
+
const recentUserMessages = fetchRecentUserMessages(db, sessionId);
|
|
359
|
+
|
|
360
|
+
const prompt = buildExecutionPrompt({
|
|
361
|
+
advice: item,
|
|
362
|
+
session: {
|
|
363
|
+
project,
|
|
364
|
+
title: session.title,
|
|
365
|
+
model: session.model,
|
|
366
|
+
durationMinutes: session.duration_minutes,
|
|
367
|
+
messageCount: session.message_count,
|
|
368
|
+
},
|
|
369
|
+
recentUserMessages,
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
ok: true,
|
|
374
|
+
scope: 'session',
|
|
375
|
+
adviceKey,
|
|
376
|
+
executor,
|
|
377
|
+
item,
|
|
378
|
+
project,
|
|
379
|
+
projectExists: isValidProjectPath(project),
|
|
380
|
+
cli: describeCliCommand(executor, project),
|
|
381
|
+
prompt,
|
|
382
|
+
};
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function previewProjectExecution(db, {
|
|
386
|
+
project: projectRaw, scope, windowFrom = '', windowTo = '',
|
|
387
|
+
adviceKey, executor: executorOpt,
|
|
388
|
+
}) {
|
|
389
|
+
const project = canonicalProject(projectRaw || '');
|
|
390
|
+
if (!project) return { ok: false, reason: 'no-project' };
|
|
391
|
+
if (!scope || (scope !== 'all' && (!windowFrom || !windowTo))) {
|
|
392
|
+
return { ok: false, reason: 'no-window' };
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
const cached = loadProjectAdvice(
|
|
396
|
+
db, project, scope,
|
|
397
|
+
scope === 'all' ? '' : windowFrom,
|
|
398
|
+
scope === 'all' ? '' : windowTo
|
|
399
|
+
);
|
|
400
|
+
if (!cached || !cached.payload) return { ok: false, reason: 'no-advice' };
|
|
401
|
+
|
|
402
|
+
const item = findAdviceItem(cached.payload, adviceKey);
|
|
403
|
+
if (!item) return { ok: false, reason: 'no-advice-item' };
|
|
404
|
+
|
|
405
|
+
const executor = executorOpt || item.executor || 'opencode';
|
|
406
|
+
|
|
407
|
+
const prompt = buildProjectExecutionPrompt({
|
|
408
|
+
advice: item,
|
|
409
|
+
project: {
|
|
410
|
+
path: project,
|
|
411
|
+
scope,
|
|
412
|
+
windowFrom: scope === 'all' ? '' : windowFrom,
|
|
413
|
+
windowTo: scope === 'all' ? '' : windowTo,
|
|
414
|
+
sessionCount: cached.sessionCount,
|
|
415
|
+
},
|
|
416
|
+
crossSessionPatterns: Array.isArray(cached.payload.crossSessionPatterns)
|
|
417
|
+
? cached.payload.crossSessionPatterns
|
|
418
|
+
: [],
|
|
419
|
+
});
|
|
420
|
+
|
|
421
|
+
return {
|
|
422
|
+
ok: true,
|
|
423
|
+
scope: 'project',
|
|
424
|
+
adviceKey,
|
|
425
|
+
executor,
|
|
426
|
+
item,
|
|
427
|
+
project,
|
|
428
|
+
projectExists: isValidProjectPath(project),
|
|
429
|
+
cli: describeCliCommand(executor, project),
|
|
430
|
+
prompt,
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
|
|
300
434
|
// ---------------------------------------------------------------------------
|
|
301
435
|
// Public: startExecution
|
|
302
436
|
// ---------------------------------------------------------------------------
|
|
@@ -344,7 +478,10 @@ async function startExecution(db, opts) {
|
|
|
344
478
|
}
|
|
345
479
|
|
|
346
480
|
// 5. Project path validity.
|
|
347
|
-
|
|
481
|
+
// Normalise the same way preview does so cwd, whitelist comparison
|
|
482
|
+
// and fs.existsSync all agree. OpenCode-source paths can come in
|
|
483
|
+
// as "C//felix/code/X" (colon dropped); canonicalProject fixes it.
|
|
484
|
+
const project = canonicalProject(session.project || '') || '';
|
|
348
485
|
if (!isValidProjectPath(project)) {
|
|
349
486
|
return { ok: false, reason: 'invalid-project-path', extra: { project } };
|
|
350
487
|
}
|
|
@@ -385,7 +522,7 @@ async function startExecution(db, opts) {
|
|
|
385
522
|
const promptBuilder = () => buildExecutionPrompt({
|
|
386
523
|
advice: item,
|
|
387
524
|
session: {
|
|
388
|
-
project
|
|
525
|
+
project,
|
|
389
526
|
title: session.title,
|
|
390
527
|
model: session.model,
|
|
391
528
|
durationMinutes: session.duration_minutes,
|
|
@@ -643,6 +780,8 @@ async function cancelRun(db, runId) {
|
|
|
643
780
|
module.exports = {
|
|
644
781
|
startExecution,
|
|
645
782
|
startProjectExecution,
|
|
783
|
+
previewExecution,
|
|
784
|
+
previewProjectExecution,
|
|
646
785
|
cancelRun,
|
|
647
786
|
getRun,
|
|
648
787
|
listRunsForAdvice,
|
|
@@ -236,6 +236,7 @@ function buildAdvicePrompt(ctx) {
|
|
|
236
236
|
- 流程节奏(回退次数、纠偏速度、是否过早收敛)
|
|
237
237
|
- 是否适合引入一个 opencode skill 或 subagent 来自动化重复模式
|
|
238
238
|
- 成本与 token 经济性
|
|
239
|
+
- **MCP 服务器使用**(见下「关于 MCP」一节)
|
|
239
240
|
|
|
240
241
|
不要谈:
|
|
241
242
|
- 对话中讨论的具体技术 / 代码是否正确(如「你写的 SQL JOIN 错了」、
|
|
@@ -247,6 +248,46 @@ function buildAdvicePrompt(ctx) {
|
|
|
247
248
|
如果对话里只有业务讨论、看不出可改进的协作模式,5 个类别都给空数组,
|
|
248
249
|
summary 写「本会话以业务讨论为主,协作模式无明显问题」。
|
|
249
250
|
|
|
251
|
+
# 关于 MCP(Model Context Protocol)
|
|
252
|
+
|
|
253
|
+
下文的「工具使用 Top 20」里,工具可能来自两类来源:
|
|
254
|
+
|
|
255
|
+
- **内置工具**:名字短、全小写、无分隔符。例如
|
|
256
|
+
\`read\` \`write\` \`edit\` \`bash\` \`glob\` \`grep\` \`todowrite\`
|
|
257
|
+
\`task\` \`skill\` \`websearch\` \`question\` (OpenCode)
|
|
258
|
+
以及 \`ls\` \`webfetch\` \`notebookread\` 等(Claude Code)。
|
|
259
|
+
|
|
260
|
+
- **MCP 工具**(由用户配置的外部 MCP 服务器提供):
|
|
261
|
+
- OpenCode 命名:\`<服务器名>_<方法名>\`(单下划线),例如
|
|
262
|
+
\`atlassian_getJiraIssue\` \`atlassian_searchConfluence\` —
|
|
263
|
+
"atlassian" 就是 MCP 服务器名。
|
|
264
|
+
- Claude Code 命名:\`mcp__<服务器名>__<方法名>\`(双下划线),例如
|
|
265
|
+
\`mcp__github__list_issues\`。
|
|
266
|
+
|
|
267
|
+
请在分析时显式辨认 MCP 工具,并考虑以下角度(只在确实有迹象时谈,不要硬凑):
|
|
268
|
+
|
|
269
|
+
- **该不该用 MCP**:这一次任务里 MCP 工具是不是真的派上了用场?
|
|
270
|
+
如果调用了 MCP 但没真正解决问题(只是来回查),建议下次直接给 AI
|
|
271
|
+
具体信息或换一种问法。
|
|
272
|
+
- **MCP 调用错误率高**:某个 MCP 工具错误率明显偏高(从表中
|
|
273
|
+
err 列读),建议改用其他来源或先用一次手动调用确认参数。
|
|
274
|
+
- **重复 MCP 调用**:连续多次同名 MCP 调用拉同一类数据,建议下次
|
|
275
|
+
一次性指明需要的字段,或用本地缓存/文件代替。
|
|
276
|
+
- **应该用 MCP 而没用**:用户多次让 AI"上网搜"或手抄外部系统数据
|
|
277
|
+
(Jira/GitHub/Slack/Notion 等),而项目本应配置对应 MCP 直接拉,
|
|
278
|
+
可建议引入相应 MCP 服务器。
|
|
279
|
+
|
|
280
|
+
按照上面 5 大类的归属:
|
|
281
|
+
- MCP 调用浪费/重复 → \`cost\`
|
|
282
|
+
- MCP 报错频繁、参数不对 → \`accuracy\`
|
|
283
|
+
- 缺少 MCP 上下文导致需要手抄数据 → \`context\`
|
|
284
|
+
- 反复出现的 MCP 调用模式可包成 skill → \`skills\`
|
|
285
|
+
- MCP 与人工/内置工具的分工节奏 → \`workflow\`
|
|
286
|
+
|
|
287
|
+
不要新增类别,不要新增 AdviceItem 字段。
|
|
288
|
+
evidence 里点名具体 MCP 工具(完整工具名,例如 \`atlassian_getJiraIssue\`)。
|
|
289
|
+
如果对话中没有 MCP 工具,或 MCP 用得很合理,不要硬挑毛病。
|
|
290
|
+
|
|
250
291
|
# 输出契约
|
|
251
292
|
|
|
252
293
|
只输出严格 JSON,不要 markdown 代码块,不要多余文字。结构如下:
|
|
@@ -270,7 +311,7 @@ AdviceItem:
|
|
|
270
311
|
"why": "1 句话,说明协作上的问题",
|
|
271
312
|
"action": "1 句话,具体可操作的改变(下次怎么做)",
|
|
272
313
|
"evidence": "引自第 N 条消息 / 工具 X / 基础统计 — 必须是对话事实,不得引用任何评分",
|
|
273
|
-
"actionable": true | false,
|
|
314
|
+
"actionable": true | false, // 见硬规则 6:仅当 AI 能在本机落盘改动时 true
|
|
274
315
|
"executor": "opencode" | "claude" | "manual",
|
|
275
316
|
"cwd_hint": "project_root"
|
|
276
317
|
}
|
|
@@ -291,21 +332,43 @@ AdviceItem:
|
|
|
291
332
|
每条 action 给出:skill 名 + 触发条件 + 一句话用途。
|
|
292
333
|
- workflow 流程与节奏:拆解、迭代步幅、回退策略、人 ↔ AI 分工。
|
|
293
334
|
|
|
294
|
-
6. actionable=true
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
335
|
+
6. actionable=true 必须是「AI 在用户电脑上**真的会落盘的改动**」。
|
|
336
|
+
只有同时满足下面三点才能填 true:
|
|
337
|
+
|
|
338
|
+
a. **明确的产出物在文件系统里**——新建或修改某个具体的文件 /
|
|
339
|
+
配置项 / skill / 脚本 / 模板。可以一句话说清"AI 将创建/修改
|
|
340
|
+
\`<相对路径>\`"。
|
|
341
|
+
b. **不依赖人类专属知识**——AI 看着项目根目录就能做完;不需要
|
|
342
|
+
访问只有人才能拿到的密码、内部 wiki、外部账号、决策权。
|
|
343
|
+
c. **是项目级的、可重复受益的改变**——例如新增一个 skill、加一
|
|
344
|
+
条 lint 规则、写一份模板,而不是"修这一次的 bug"。
|
|
345
|
+
|
|
346
|
+
只要任何一点不满足 → actionable=false, executor='manual'。
|
|
347
|
+
|
|
348
|
+
**反例(下面这些必须是 actionable=false / executor='manual')**:
|
|
349
|
+
- 「下次开场用模板」「以后多用缓存」「下次先给 AI 文件路径」
|
|
350
|
+
—— 是让**人**改行为,不是落盘改动。
|
|
351
|
+
- 「换更便宜的模型」「关闭 reasoning」「用 sonnet 而非 opus」
|
|
352
|
+
—— 模型切换是用户在 client / 终端做的,不是 AI 改文件。
|
|
353
|
+
- 「检查代码是否正确」「再确认一遍 SQL」
|
|
354
|
+
—— 是要求人或 AI 验证,不是落盘动作。
|
|
355
|
+
- 「拆分任务」「分步提问」「先讨论再写代码」
|
|
356
|
+
—— 协作流程建议,只能人来执行。
|
|
357
|
+
- 「向团队同步」「写文档」(注:这条**有边界** —— 如果是让 AI
|
|
358
|
+
在仓库里新建/更新某个具体 .md 文件,可 actionable=true;
|
|
359
|
+
如果是"和同事开个会"则 false)。
|
|
299
360
|
|
|
300
361
|
7. executor:
|
|
301
|
-
-
|
|
302
|
-
-
|
|
303
|
-
|
|
362
|
+
- 落盘类动作(创建/改文件、写 skill、改配置文件) → 'opencode' 或 'claude';
|
|
363
|
+
- 一切让人类调整行为/认知/沟通的建议 → 'manual'。
|
|
364
|
+
不会判断时,**默认填 'manual'**——错填 manual 只是少一个按钮;
|
|
365
|
+
错填 opencode 会让用户点了按钮后 AI 干一些莫名其妙的事。
|
|
304
366
|
|
|
305
367
|
8. cwd_hint: 目前只能填 "project_root"。
|
|
306
368
|
|
|
307
|
-
9. actionable 与 executor 必须一致:executor='manual'
|
|
308
|
-
executor 是 'opencode'/'claude'
|
|
369
|
+
9. actionable 与 executor 必须一致:executor='manual' ⇔ actionable=false;
|
|
370
|
+
executor 是 'opencode'/'claude' ⇔ actionable=true。
|
|
371
|
+
**不允许出现 executor='opencode' 且 actionable=false 的组合**。
|
|
309
372
|
|
|
310
373
|
# 会话基础(只作事实参考,不要换算成分数)
|
|
311
374
|
|
package/server/llm/advice.js
CHANGED
|
@@ -357,11 +357,21 @@ function normaliseItem(it) {
|
|
|
357
357
|
if (executor === 'manual') actionable = false;
|
|
358
358
|
if (actionable && executor === 'manual') executor = 'opencode';
|
|
359
359
|
|
|
360
|
+
// Heuristic safety net: even with a tight prompt, LLMs sometimes mark
|
|
361
|
+
// "you should do X next time" advice as actionable=true, which would
|
|
362
|
+
// give the user an auto-execute button that does nonsense. Detect
|
|
363
|
+
// unmistakeably human-action phrasing in `action` and downgrade.
|
|
364
|
+
const action = typeof it.action === 'string' ? it.action : '';
|
|
365
|
+
if (actionable && looksLikeHumanAction(action)) {
|
|
366
|
+
actionable = false;
|
|
367
|
+
executor = 'manual';
|
|
368
|
+
}
|
|
369
|
+
|
|
360
370
|
return {
|
|
361
371
|
severity,
|
|
362
372
|
title: typeof it.title === 'string' ? it.title.trim() : '',
|
|
363
373
|
why: typeof it.why === 'string' ? it.why.trim() : '',
|
|
364
|
-
action:
|
|
374
|
+
action: action.trim(),
|
|
365
375
|
evidence: typeof it.evidence === 'string' ? it.evidence.trim() : '',
|
|
366
376
|
actionable,
|
|
367
377
|
executor,
|
|
@@ -369,6 +379,44 @@ function normaliseItem(it) {
|
|
|
369
379
|
};
|
|
370
380
|
}
|
|
371
381
|
|
|
382
|
+
/**
|
|
383
|
+
* Lightweight Chinese-text heuristic. Returns true when `action` reads
|
|
384
|
+
* like a behaviour change the *user* needs to make, rather than a
|
|
385
|
+
* filesystem-level change an AI agent could carry out.
|
|
386
|
+
*
|
|
387
|
+
* Intentionally conservative — we only catch the obvious cases. False
|
|
388
|
+
* negatives (we say "no" but it really is human-only) just leave an
|
|
389
|
+
* over-promising button; false positives (we say "yes" but it really
|
|
390
|
+
* could auto-run) hide a working button. We prefer the former.
|
|
391
|
+
*
|
|
392
|
+
* Triggered phrases were collected from production LLM outputs where
|
|
393
|
+
* the model wrote actionable=true but `action` was clearly "next time
|
|
394
|
+
* the human should …".
|
|
395
|
+
*/
|
|
396
|
+
const HUMAN_ACTION_PATTERNS = [
|
|
397
|
+
// Next-time / future-tense markers
|
|
398
|
+
/下次/, /以后/, /未来/, /后续/, /今后/, /日后/,
|
|
399
|
+
// Switching model / runtime settings (user-side configuration, not repo files)
|
|
400
|
+
/换(成|为)?[\s]*(sonnet|opus|haiku|claude|gpt|gemini|grok|deepseek)/i,
|
|
401
|
+
/(切换|更换|改用|改成|降级|升级).{0,8}模型/,
|
|
402
|
+
/关(掉|闭|去).{0,4}(reasoning|推理|思考)/,
|
|
403
|
+
/启用.{0,4}(reasoning|推理)/,
|
|
404
|
+
/调低.{0,6}(reasoning|temperature|温度)/,
|
|
405
|
+
// Talk-to-humans
|
|
406
|
+
/和(团队|同事|领导|产品|设计)/, /与(团队|同事|领导)/,
|
|
407
|
+
/(告知|通知|同步给|抄送|沟通).{0,6}(团队|同事|领导|项目组)/,
|
|
408
|
+
/(开个|开一次|组织).{0,4}(会议|分享|评审|review)/i,
|
|
409
|
+
// Habit / process changes (no filesystem delta)
|
|
410
|
+
/(养成|形成|建立)[^。;;]{0,30}(习惯|惯例|节奏)/,
|
|
411
|
+
/(定期|每周|每天|每月).{0,8}(回顾|评审|检查|总结)/,
|
|
412
|
+
/(培训|学习|熟悉|掌握).{0,8}(用法|文档|规范)/,
|
|
413
|
+
];
|
|
414
|
+
|
|
415
|
+
function looksLikeHumanAction(action) {
|
|
416
|
+
if (!action || typeof action !== 'string') return false;
|
|
417
|
+
return HUMAN_ACTION_PATTERNS.some((re) => re.test(action));
|
|
418
|
+
}
|
|
419
|
+
|
|
372
420
|
// ---------------------------------------------------------------------------
|
|
373
421
|
// Exports
|
|
374
422
|
// ---------------------------------------------------------------------------
|
|
@@ -381,4 +429,5 @@ module.exports = {
|
|
|
381
429
|
// exported for tests / debugging:
|
|
382
430
|
assembleContext,
|
|
383
431
|
normaliseAdvicePayload,
|
|
432
|
+
looksLikeHumanAction,
|
|
384
433
|
};
|