agentboss 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/client/dist/assets/{index-DxoLOxZ8.js → index-sks7Tuv7.js} +52 -52
- package/client/dist/index.html +1 -1
- package/package.json +1 -1
- package/server/analysis/report-builder.js +28 -1
- package/server/api/execution.js +4 -4
- package/server/api/overview.js +25 -14
- package/server/api/settings.js +139 -119
- package/server/db/queries.js +1108 -1051
- package/server/execution/job.js +63 -12
- package/server/llm/advice.js +15 -7
- package/server/llm/cli-runner.js +316 -265
- package/server/llm/judge.js +149 -123
- package/server/llm/project-advice.js +15 -7
- package/server/llm/session-analyzer.js +141 -131
package/server/llm/judge.js
CHANGED
|
@@ -1,123 +1,149 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* High-level LLM judge — bridges dimension scorers (E1, O1) to the
|
|
3
|
-
* cli-runner. Handles:
|
|
4
|
-
* • opt-in via user_settings.enable_llm_judge
|
|
5
|
-
* • per-session cache via session_analysis.llm_judge_v2
|
|
6
|
-
* • concurrency throttle (cli-runner.withSlot)
|
|
7
|
-
* • fall-back signalling so dimension scorers can branch
|
|
8
|
-
*
|
|
9
|
-
* @author Felix
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
'use strict';
|
|
13
|
-
|
|
14
|
-
const { runJudge, detectAvailableCli, withSlot } = require('./cli-runner');
|
|
15
|
-
const { buildSessionJudgePrompt, PROMPT_VERSION } = require('./judge-prompts');
|
|
16
|
-
const { queryOne } = require('../db/queries');
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
// ---------------------------------------------------------------------------
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
const
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
1
|
+
/**
|
|
2
|
+
* High-level LLM judge — bridges dimension scorers (E1, O1) to the
|
|
3
|
+
* cli-runner. Handles:
|
|
4
|
+
* • opt-in via user_settings.enable_llm_judge
|
|
5
|
+
* • per-session cache via session_analysis.llm_judge_v2
|
|
6
|
+
* • concurrency throttle (cli-runner.withSlot)
|
|
7
|
+
* • fall-back signalling so dimension scorers can branch
|
|
8
|
+
*
|
|
9
|
+
* @author Felix
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
'use strict';
|
|
13
|
+
|
|
14
|
+
const { runJudge, detectAvailableCli, detectAllCli, withSlot } = require('./cli-runner');
|
|
15
|
+
const { buildSessionJudgePrompt, PROMPT_VERSION } = require('./judge-prompts');
|
|
16
|
+
const { queryOne } = require('../db/queries');
|
|
17
|
+
|
|
18
|
+
const VALID_PREFS = new Set(['auto', 'opencode', 'claude']);
|
|
19
|
+
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Settings cache
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
let _settingsCache = null;
|
|
25
|
+
let _settingsCacheAt = 0;
|
|
26
|
+
const SETTINGS_TTL_MS = 10_000;
|
|
27
|
+
|
|
28
|
+
function getSettings(db) {
|
|
29
|
+
const now = Date.now();
|
|
30
|
+
if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) {
|
|
31
|
+
return _settingsCache;
|
|
32
|
+
}
|
|
33
|
+
const rows = db.exec(
|
|
34
|
+
"SELECT key, value FROM user_settings WHERE key IN ('enable_llm_judge', 'llm_tool_preference')"
|
|
35
|
+
);
|
|
36
|
+
const out = { enable_llm_judge: false, llm_tool_preference: 'auto' };
|
|
37
|
+
if (rows[0]) {
|
|
38
|
+
for (const [k, v] of rows[0].values) {
|
|
39
|
+
if (k === 'enable_llm_judge') out.enable_llm_judge = String(v) === '1' || String(v).toLowerCase() === 'true';
|
|
40
|
+
if (k === 'llm_tool_preference') {
|
|
41
|
+
const p = String(v || '').toLowerCase();
|
|
42
|
+
out.llm_tool_preference = VALID_PREFS.has(p) ? p : 'auto';
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
_settingsCache = out;
|
|
47
|
+
_settingsCacheAt = now;
|
|
48
|
+
return out;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/** Public: force a settings reload (e.g. after PUT /api/settings). */
|
|
52
|
+
function invalidateSettingsCache() {
|
|
53
|
+
_settingsCache = null;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// Per-session cache
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
/** Return the cached llm_judge_v2 JSON for a session or null. */
|
|
61
|
+
function loadCache(db, sessionId) {
|
|
62
|
+
const row = queryOne(
|
|
63
|
+
db,
|
|
64
|
+
'SELECT llm_judge_v2 FROM session_analysis WHERE session_id = ?',
|
|
65
|
+
[sessionId]
|
|
66
|
+
);
|
|
67
|
+
if (!row || !row.llm_judge_v2) return null;
|
|
68
|
+
try { return JSON.parse(row.llm_judge_v2); }
|
|
69
|
+
catch { return null; }
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
// Public judge functions
|
|
74
|
+
// ---------------------------------------------------------------------------
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Consolidated judge — one LLM call scoring H1/H2/E1/O1 for a session.
|
|
78
|
+
* Returns the parsed payload (stamped with v / msgCount / cli) or null
|
|
79
|
+
* when disabled, no CLI, or the call fails. Cached in llm_judge_v2.
|
|
80
|
+
*/
|
|
81
|
+
async function judgeSession(db, session, messages, meta = {}) {
|
|
82
|
+
const settings = getSettings(db);
|
|
83
|
+
if (!settings.enable_llm_judge) return null;
|
|
84
|
+
|
|
85
|
+
const msgCount = messages.length;
|
|
86
|
+
const cache = loadCache(db, session.id);
|
|
87
|
+
if (cache && cache.v === PROMPT_VERSION && cache.msgCount === msgCount) return cache;
|
|
88
|
+
|
|
89
|
+
const pref = settings.llm_tool_preference || 'auto';
|
|
90
|
+
const cli = await detectAvailableCli(pref);
|
|
91
|
+
if (!cli) return null;
|
|
92
|
+
|
|
93
|
+
const prompt = buildSessionJudgePrompt(messages, meta);
|
|
94
|
+
const result = await withSlot(() => runJudge({ prompt, timeoutMs: 90_000, preferredCli: pref }));
|
|
95
|
+
if (!result.ok || !result.data) {
|
|
96
|
+
const reason = result.ok ? 'no-data' : (result.reason || 'unknown');
|
|
97
|
+
const detail = result.error ? ` — ${String(result.error).slice(0, 200)}` : '';
|
|
98
|
+
console.error('[judge]', session.id, 'LLM fell through:', reason + detail);
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
...result.data,
|
|
104
|
+
v: PROMPT_VERSION,
|
|
105
|
+
msgCount,
|
|
106
|
+
cli: result.cli,
|
|
107
|
+
cachedAt: new Date().toISOString(),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Pre-flight for the Settings page. Returns the full availability map
|
|
113
|
+
* for every supported CLI, plus the active pick under the current
|
|
114
|
+
* preference.
|
|
115
|
+
*
|
|
116
|
+
* @param {object} [db] sql.js Database. If provided, the user's
|
|
117
|
+
* `llm_tool_preference` is honoured when computing `active`.
|
|
118
|
+
* @returns {Promise<{
|
|
119
|
+
* available: boolean, // any CLI usable
|
|
120
|
+
* name: string|null, // active CLI name
|
|
121
|
+
* active: string|null, // same as name (alias)
|
|
122
|
+
* preference: 'auto'|'opencode'|'claude', // user preference
|
|
123
|
+
* source: 'user'|'auto', // why `active` was chosen
|
|
124
|
+
* detected: Array<{name, bin, available}>, // full availability map
|
|
125
|
+
* }>}
|
|
126
|
+
*/
|
|
127
|
+
async function diagnose(db) {
|
|
128
|
+
const all = await detectAllCli();
|
|
129
|
+
const settings = db ? getSettings(db) : { llm_tool_preference: 'auto' };
|
|
130
|
+
const pref = settings.llm_tool_preference || 'auto';
|
|
131
|
+
const cli = await detectAvailableCli(pref);
|
|
132
|
+
return {
|
|
133
|
+
available: !!cli,
|
|
134
|
+
name: cli ? cli.name : null,
|
|
135
|
+
active: cli ? cli.name : null,
|
|
136
|
+
preference: pref,
|
|
137
|
+
source: pref === 'auto' ? 'auto' : 'user',
|
|
138
|
+
detected: all,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
module.exports = {
|
|
143
|
+
judgeSession,
|
|
144
|
+
diagnose,
|
|
145
|
+
invalidateSettingsCache,
|
|
146
|
+
// re-export so callers don't need cli-runner directly
|
|
147
|
+
detectAvailableCli,
|
|
148
|
+
PROMPT_VERSION,
|
|
149
|
+
};
|
|
@@ -56,21 +56,28 @@ let _settingsCache = null;
|
|
|
56
56
|
let _settingsCacheAt = 0;
|
|
57
57
|
const SETTINGS_TTL_MS = 10_000;
|
|
58
58
|
|
|
59
|
+
const VALID_CLI_PREFS = new Set(['auto', 'opencode', 'claude']);
|
|
60
|
+
|
|
59
61
|
function getSettings(db) {
|
|
60
62
|
const now = Date.now();
|
|
61
63
|
if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) {
|
|
62
64
|
return _settingsCache;
|
|
63
65
|
}
|
|
64
66
|
const rows = db.exec(
|
|
65
|
-
"SELECT key, value FROM user_settings WHERE key
|
|
67
|
+
"SELECT key, value FROM user_settings WHERE key IN ('enable_llm_judge', 'llm_tool_preference')"
|
|
66
68
|
);
|
|
67
69
|
let enable = false;
|
|
70
|
+
let pref = 'auto';
|
|
68
71
|
if (rows[0]) {
|
|
69
|
-
for (const [, v] of rows[0].values) {
|
|
70
|
-
enable = String(v) === '1' || String(v).toLowerCase() === 'true';
|
|
72
|
+
for (const [k, v] of rows[0].values) {
|
|
73
|
+
if (k === 'enable_llm_judge') enable = String(v) === '1' || String(v).toLowerCase() === 'true';
|
|
74
|
+
if (k === 'llm_tool_preference') {
|
|
75
|
+
const p = String(v || '').toLowerCase();
|
|
76
|
+
pref = VALID_CLI_PREFS.has(p) ? p : 'auto';
|
|
77
|
+
}
|
|
71
78
|
}
|
|
72
79
|
}
|
|
73
|
-
_settingsCache = { enable_llm_judge: enable };
|
|
80
|
+
_settingsCache = { enable_llm_judge: enable, llm_tool_preference: pref };
|
|
74
81
|
_settingsCacheAt = now;
|
|
75
82
|
return _settingsCache;
|
|
76
83
|
}
|
|
@@ -413,8 +420,9 @@ async function generateProjectAdvice(db, opts = {}) {
|
|
|
413
420
|
};
|
|
414
421
|
}
|
|
415
422
|
|
|
416
|
-
// 5. CLI detection
|
|
417
|
-
const
|
|
423
|
+
// 5. CLI detection — honour user preference
|
|
424
|
+
const pref = settings.llm_tool_preference || 'auto';
|
|
425
|
+
const cli = await detectAvailableCli(pref);
|
|
418
426
|
if (!cli) return { ok: false, reason: 'no-cli' };
|
|
419
427
|
|
|
420
428
|
// 6. assemble + truncate
|
|
@@ -437,7 +445,7 @@ async function generateProjectAdvice(db, opts = {}) {
|
|
|
437
445
|
'truncated=', trimmed.truncated, 'sessions=', trimmed.sessions.length);
|
|
438
446
|
|
|
439
447
|
// 7. run
|
|
440
|
-
const result = await withSlot(() => runJudge({ prompt, timeoutMs: 120_000 }));
|
|
448
|
+
const result = await withSlot(() => runJudge({ prompt, timeoutMs: 120_000, preferredCli: pref }));
|
|
441
449
|
if (!result.ok) {
|
|
442
450
|
return { ok: false, reason: result.reason, error: result.error };
|
|
443
451
|
}
|
|
@@ -1,131 +1,141 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Unified per-session LLM analyzer — ONE CLI call that returns both the
|
|
3
|
-
* v2.1 capability scores and the collaboration advice.
|
|
4
|
-
*
|
|
5
|
-
* Supersedes the two separate calls (judge.judgeSession + advice.generateAdvice).
|
|
6
|
-
* Pipeline:
|
|
7
|
-
* 1. settings gate (enable_llm_judge)
|
|
8
|
-
* 2. assemble context (reuses advice.assembleContext) + real difficulty
|
|
9
|
-
* 3. cache check in session_analysis.llm_judge_v2 (v + msgCount)
|
|
10
|
-
* 4. truncate + build combined prompt
|
|
11
|
-
* 5. runJudge under withSlot (90 s)
|
|
12
|
-
* 6. return { scores, advice, rationale, v, msgCount, cli, cachedAt } | null
|
|
13
|
-
*
|
|
14
|
-
* Returns null on disabled / no-cli / failure so callers fall back to rules.
|
|
15
|
-
*
|
|
16
|
-
* @author Felix
|
|
17
|
-
*/
|
|
18
|
-
|
|
19
|
-
'use strict';
|
|
20
|
-
|
|
21
|
-
const { detectAvailableCli, runJudge, withSlot } = require('./cli-runner');
|
|
22
|
-
const {
|
|
23
|
-
ANALYSIS_PROMPT_VERSION,
|
|
24
|
-
buildSessionAnalysisPrompt,
|
|
25
|
-
truncateContext,
|
|
26
|
-
} = require('./analysis-prompt');
|
|
27
|
-
const { assembleContext } = require('./advice');
|
|
28
|
-
const { classifySession } = require('../analysis/difficulty');
|
|
29
|
-
const { queryOne } = require('../db/queries');
|
|
30
|
-
|
|
31
|
-
// ---------------------------------------------------------------------------
|
|
32
|
-
// Settings gate (mirrors judge.js / advice.js; tiny TTL cache)
|
|
33
|
-
// ---------------------------------------------------------------------------
|
|
34
|
-
|
|
35
|
-
let _settingsCache = null;
|
|
36
|
-
let _settingsCacheAt = 0;
|
|
37
|
-
const SETTINGS_TTL_MS = 10_000;
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
// ---------------------------------------------------------------------------
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
const
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Unified per-session LLM analyzer — ONE CLI call that returns both the
|
|
3
|
+
* v2.1 capability scores and the collaboration advice.
|
|
4
|
+
*
|
|
5
|
+
* Supersedes the two separate calls (judge.judgeSession + advice.generateAdvice).
|
|
6
|
+
* Pipeline:
|
|
7
|
+
* 1. settings gate (enable_llm_judge)
|
|
8
|
+
* 2. assemble context (reuses advice.assembleContext) + real difficulty
|
|
9
|
+
* 3. cache check in session_analysis.llm_judge_v2 (v + msgCount)
|
|
10
|
+
* 4. truncate + build combined prompt
|
|
11
|
+
* 5. runJudge under withSlot (90 s)
|
|
12
|
+
* 6. return { scores, advice, rationale, v, msgCount, cli, cachedAt } | null
|
|
13
|
+
*
|
|
14
|
+
* Returns null on disabled / no-cli / failure so callers fall back to rules.
|
|
15
|
+
*
|
|
16
|
+
* @author Felix
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
'use strict';
|
|
20
|
+
|
|
21
|
+
const { detectAvailableCli, runJudge, withSlot } = require('./cli-runner');
|
|
22
|
+
const {
|
|
23
|
+
ANALYSIS_PROMPT_VERSION,
|
|
24
|
+
buildSessionAnalysisPrompt,
|
|
25
|
+
truncateContext,
|
|
26
|
+
} = require('./analysis-prompt');
|
|
27
|
+
const { assembleContext } = require('./advice');
|
|
28
|
+
const { classifySession } = require('../analysis/difficulty');
|
|
29
|
+
const { queryOne } = require('../db/queries');
|
|
30
|
+
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Settings gate (mirrors judge.js / advice.js; tiny TTL cache)
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
|
|
35
|
+
let _settingsCache = null;
|
|
36
|
+
let _settingsCacheAt = 0;
|
|
37
|
+
const SETTINGS_TTL_MS = 10_000;
|
|
38
|
+
|
|
39
|
+
const VALID_CLI_PREFS = new Set(['auto', 'opencode', 'claude']);
|
|
40
|
+
|
|
41
|
+
function getSettings(db) {
|
|
42
|
+
const now = Date.now();
|
|
43
|
+
if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) return _settingsCache;
|
|
44
|
+
const rows = db.exec(
|
|
45
|
+
"SELECT key, value FROM user_settings WHERE key IN ('enable_llm_judge', 'llm_tool_preference')"
|
|
46
|
+
);
|
|
47
|
+
let enable = false;
|
|
48
|
+
let pref = 'auto';
|
|
49
|
+
if (rows[0]) {
|
|
50
|
+
for (const [k, v] of rows[0].values) {
|
|
51
|
+
if (k === 'enable_llm_judge') enable = String(v) === '1' || String(v).toLowerCase() === 'true';
|
|
52
|
+
if (k === 'llm_tool_preference') {
|
|
53
|
+
const p = String(v || '').toLowerCase();
|
|
54
|
+
pref = VALID_CLI_PREFS.has(p) ? p : 'auto';
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
_settingsCache = { enable_llm_judge: enable, llm_tool_preference: pref };
|
|
59
|
+
_settingsCacheAt = now;
|
|
60
|
+
return _settingsCache;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/** Drop the settings cache (called by PUT /api/settings). */
|
|
64
|
+
function invalidateAnalyzerSettingsCache() { _settingsCache = null; }
|
|
65
|
+
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
// Cache
|
|
68
|
+
// ---------------------------------------------------------------------------
|
|
69
|
+
|
|
70
|
+
function loadCache(db, sessionId) {
|
|
71
|
+
const row = queryOne(db, 'SELECT llm_judge_v2 FROM session_analysis WHERE session_id = ?', [sessionId]);
|
|
72
|
+
if (!row || !row.llm_judge_v2) return null;
|
|
73
|
+
try { return JSON.parse(row.llm_judge_v2); }
|
|
74
|
+
catch { return null; }
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
// Public: analyzeSessionLLM
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Run (or return cached) the combined scores+advice analysis for a session.
|
|
83
|
+
*
|
|
84
|
+
* @param {object} db
|
|
85
|
+
* @param {object} session unified_session row
|
|
86
|
+
* @param {object} [opts] { force?: boolean }
|
|
87
|
+
* @returns {Promise<{scores:object, advice:object, rationale?:string,
|
|
88
|
+
* v:number, msgCount:number, cli:string, cachedAt:string} | null>}
|
|
89
|
+
*/
|
|
90
|
+
async function analyzeSessionLLM(db, session, opts = {}) {
|
|
91
|
+
const settings = getSettings(db);
|
|
92
|
+
if (!settings.enable_llm_judge) return null;
|
|
93
|
+
|
|
94
|
+
const ctxFull = assembleContext(db, session.id);
|
|
95
|
+
if (!ctxFull) return null;
|
|
96
|
+
const msgCount = ctxFull.messages.length;
|
|
97
|
+
|
|
98
|
+
if (opts.force !== true) {
|
|
99
|
+
const cache = loadCache(db, session.id);
|
|
100
|
+
if (cache && cache.v === ANALYSIS_PROMPT_VERSION && cache.msgCount === msgCount && cache.scores) {
|
|
101
|
+
return cache;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const pref = settings.llm_tool_preference || 'auto';
|
|
106
|
+
const cli = await detectAvailableCli(pref);
|
|
107
|
+
if (!cli) return null;
|
|
108
|
+
|
|
109
|
+
// Surface the REAL difficulty to the rubric (advice.assembleContext nulls
|
|
110
|
+
// it out on purpose; scoring needs it).
|
|
111
|
+
const difficulty = classifySession(session).bucket;
|
|
112
|
+
ctxFull.session.difficulty = difficulty;
|
|
113
|
+
|
|
114
|
+
const ctx = truncateContext(ctxFull);
|
|
115
|
+
ctx.session = ctxFull.session; // truncateContext shallow-copies; keep difficulty
|
|
116
|
+
const prompt = buildSessionAnalysisPrompt(ctx);
|
|
117
|
+
|
|
118
|
+
const result = await withSlot(() => runJudge({ prompt, timeoutMs: 90_000, preferredCli: pref }));
|
|
119
|
+
if (!result.ok || !result.data || !result.data.scores) {
|
|
120
|
+
// Surface why we fell back to rule-based judging. Silent nulls
|
|
121
|
+
// here made macOS `claude -p` failures (timeout / bad-json / exit
|
|
122
|
+
// non-zero) impossible to diagnose from the outside — the HTTP
|
|
123
|
+
// endpoint just returned 200 with empty v2 columns.
|
|
124
|
+
const reason = result.ok ? 'no-scores' : (result.reason || 'unknown');
|
|
125
|
+
const detail = result.error ? ` — ${String(result.error).slice(0, 200)}` : '';
|
|
126
|
+
console.error('[session-analyzer]', session.id, 'LLM fell through:', reason + detail);
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
scores: result.data.scores,
|
|
132
|
+
advice: result.data.advice || null,
|
|
133
|
+
rationale: typeof result.data.rationale === 'string' ? result.data.rationale : '',
|
|
134
|
+
v: ANALYSIS_PROMPT_VERSION,
|
|
135
|
+
msgCount,
|
|
136
|
+
cli: result.cli,
|
|
137
|
+
cachedAt: new Date().toISOString(),
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
module.exports = { analyzeSessionLLM, invalidateAnalyzerSettingsCache };
|