npm - agentboss - Versions diffs - 0.1.2 → 0.1.4 - Mend

agentboss 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/client/dist/assets/{index-DxoLOxZ8.js → index-sks7Tuv7.js} +52 -52
package/client/dist/index.html +1 -1
package/package.json +1 -1
package/server/analysis/report-builder.js +28 -1
package/server/api/execution.js +4 -4
package/server/api/overview.js +25 -14
package/server/api/settings.js +139 -119
package/server/db/queries.js +1108 -1051
package/server/execution/job.js +63 -12
package/server/llm/advice.js +15 -7
package/server/llm/cli-runner.js +316 -265
package/server/llm/judge.js +149 -123
package/server/llm/project-advice.js +15 -7
package/server/llm/session-analyzer.js +141 -131

package/server/llm/judge.js CHANGED Viewed

@@ -1,123 +1,149 @@
-/**
- * High-level LLM judge — bridges dimension scorers (E1, O1) to the
- * cli-runner.  Handles:
- *   • opt-in via user_settings.enable_llm_judge
- *   • per-session cache via session_analysis.llm_judge_v2
- *   • concurrency throttle (cli-runner.withSlot)
- *   • fall-back signalling so dimension scorers can branch
- *
- * @author Felix
- */
-'use strict';
-const { runJudge, detectAvailableCli, withSlot } = require('./cli-runner');
-const { buildSessionJudgePrompt, PROMPT_VERSION } = require('./judge-prompts');
-const { queryOne } = require('../db/queries');
-// ---------------------------------------------------------------------------
-//  Settings cache
-// ---------------------------------------------------------------------------
-let _settingsCache = null;
-let _settingsCacheAt = 0;
-const SETTINGS_TTL_MS = 10_000;
-function getSettings(db) {
-  const now = Date.now();
-  if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) {
-    return _settingsCache;
-  }
-  const rows = db.exec(
-    "SELECT key, value FROM user_settings WHERE key IN ('enable_llm_judge')"
-  );
-  const out = { enable_llm_judge: false };
-  if (rows[0]) {
-    for (const [k, v] of rows[0].values) {
-      if (k === 'enable_llm_judge') out.enable_llm_judge = String(v) === '1' || String(v).toLowerCase() === 'true';
-    }
-  }
-  _settingsCache = out;
-  _settingsCacheAt = now;
-  return out;
-}
-/** Public: force a settings reload (e.g. after PUT /api/settings). */
-function invalidateSettingsCache() {
-  _settingsCache = null;
-}
-// ---------------------------------------------------------------------------
-//  Per-session cache
-// ---------------------------------------------------------------------------
-/** Return the cached llm_judge_v2 JSON for a session or null. */
-function loadCache(db, sessionId) {
-  const row = queryOne(
-    db,
-    'SELECT llm_judge_v2 FROM session_analysis WHERE session_id = ?',
-    [sessionId]
-  );
-  if (!row || !row.llm_judge_v2) return null;
-  try { return JSON.parse(row.llm_judge_v2); }
-  catch { return null; }
-}
-// ---------------------------------------------------------------------------
-//  Public judge functions
-// ---------------------------------------------------------------------------
-/**
- * Consolidated judge — one LLM call scoring H1/H2/E1/O1 for a session.
- * Returns the parsed payload (stamped with v / msgCount / cli) or null
- * when disabled, no CLI, or the call fails.  Cached in llm_judge_v2.
- */
-async function judgeSession(db, session, messages, meta = {}) {
-  const settings = getSettings(db);
-  if (!settings.enable_llm_judge) return null;
-  const msgCount = messages.length;
-  const cache = loadCache(db, session.id);
-  if (cache && cache.v === PROMPT_VERSION && cache.msgCount === msgCount) return cache;
-  const cli = await detectAvailableCli();
-  if (!cli) return null;
-  const prompt = buildSessionJudgePrompt(messages, meta);
-  const result = await withSlot(() => runJudge({ prompt, timeoutMs: 90_000 }));
-  if (!result.ok || !result.data) {
-    const reason = result.ok ? 'no-data' : (result.reason || 'unknown');
-    const detail = result.error ? ` — ${String(result.error).slice(0, 200)}` : '';
-    console.error('[judge]', session.id, 'LLM fell through:', reason + detail);
-    return null;
-  }
-  return {
-    ...result.data,
-    v: PROMPT_VERSION,
-    msgCount,
-    cli: result.cli,
-    cachedAt: new Date().toISOString(),
-  };
-}
-/**
- * One-shot pre-flight to surface whether a CLI is configured.  Used by
- * the Settings page.
- */
-async function diagnose() {
-  const cli = await detectAvailableCli();
-  return {
-    available: !!cli,
-    name: cli ? cli.name : null,
-  };
-}
-module.exports = {
-  judgeSession,
-  diagnose,
-  invalidateSettingsCache,
-  // re-export so callers don't need cli-runner directly
-  detectAvailableCli,
-  PROMPT_VERSION,
-};
+/**
+ * High-level LLM judge — bridges dimension scorers (E1, O1) to the
+ * cli-runner.  Handles:
+ *   • opt-in via user_settings.enable_llm_judge
+ *   • per-session cache via session_analysis.llm_judge_v2
+ *   • concurrency throttle (cli-runner.withSlot)
+ *   • fall-back signalling so dimension scorers can branch
+ *
+ * @author Felix
+ */
+'use strict';
+const { runJudge, detectAvailableCli, detectAllCli, withSlot } = require('./cli-runner');
+const { buildSessionJudgePrompt, PROMPT_VERSION } = require('./judge-prompts');
+const { queryOne } = require('../db/queries');
+const VALID_PREFS = new Set(['auto', 'opencode', 'claude']);
+// ---------------------------------------------------------------------------
+//  Settings cache
+// ---------------------------------------------------------------------------
+let _settingsCache = null;
+let _settingsCacheAt = 0;
+const SETTINGS_TTL_MS = 10_000;
+function getSettings(db) {
+  const now = Date.now();
+  if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) {
+    return _settingsCache;
+  }
+  const rows = db.exec(
+    "SELECT key, value FROM user_settings WHERE key IN ('enable_llm_judge', 'llm_tool_preference')"
+  );
+  const out = { enable_llm_judge: false, llm_tool_preference: 'auto' };
+  if (rows[0]) {
+    for (const [k, v] of rows[0].values) {
+      if (k === 'enable_llm_judge') out.enable_llm_judge = String(v) === '1' || String(v).toLowerCase() === 'true';
+      if (k === 'llm_tool_preference') {
+        const p = String(v || '').toLowerCase();
+        out.llm_tool_preference = VALID_PREFS.has(p) ? p : 'auto';
+      }
+    }
+  }
+  _settingsCache = out;
+  _settingsCacheAt = now;
+  return out;
+}
+/** Public: force a settings reload (e.g. after PUT /api/settings). */
+function invalidateSettingsCache() {
+  _settingsCache = null;
+}
+// ---------------------------------------------------------------------------
+//  Per-session cache
+// ---------------------------------------------------------------------------
+/** Return the cached llm_judge_v2 JSON for a session or null. */
+function loadCache(db, sessionId) {
+  const row = queryOne(
+    db,
+    'SELECT llm_judge_v2 FROM session_analysis WHERE session_id = ?',
+    [sessionId]
+  );
+  if (!row || !row.llm_judge_v2) return null;
+  try { return JSON.parse(row.llm_judge_v2); }
+  catch { return null; }
+}
+// ---------------------------------------------------------------------------
+//  Public judge functions
+// ---------------------------------------------------------------------------
+/**
+ * Consolidated judge — one LLM call scoring H1/H2/E1/O1 for a session.
+ * Returns the parsed payload (stamped with v / msgCount / cli) or null
+ * when disabled, no CLI, or the call fails.  Cached in llm_judge_v2.
+ */
+async function judgeSession(db, session, messages, meta = {}) {
+  const settings = getSettings(db);
+  if (!settings.enable_llm_judge) return null;
+  const msgCount = messages.length;
+  const cache = loadCache(db, session.id);
+  if (cache && cache.v === PROMPT_VERSION && cache.msgCount === msgCount) return cache;
+  const pref = settings.llm_tool_preference || 'auto';
+  const cli = await detectAvailableCli(pref);
+  if (!cli) return null;
+  const prompt = buildSessionJudgePrompt(messages, meta);
+  const result = await withSlot(() => runJudge({ prompt, timeoutMs: 90_000, preferredCli: pref }));
+  if (!result.ok || !result.data) {
+    const reason = result.ok ? 'no-data' : (result.reason || 'unknown');
+    const detail = result.error ? ` — ${String(result.error).slice(0, 200)}` : '';
+    console.error('[judge]', session.id, 'LLM fell through:', reason + detail);
+    return null;
+  }
+  return {
+    ...result.data,
+    v: PROMPT_VERSION,
+    msgCount,
+    cli: result.cli,
+    cachedAt: new Date().toISOString(),
+  };
+}
+/**
+ * Pre-flight for the Settings page.  Returns the full availability map
+ * for every supported CLI, plus the active pick under the current
+ * preference.
+ *
+ * @param {object} [db]  sql.js Database.  If provided, the user's
+ *   `llm_tool_preference` is honoured when computing `active`.
+ * @returns {Promise<{
+ *   available: boolean,                       // any CLI usable
+ *   name: string|null,                        // active CLI name
+ *   active: string|null,                      // same as name (alias)
+ *   preference: 'auto'|'opencode'|'claude',   // user preference
+ *   source: 'user'|'auto',                    // why `active` was chosen
+ *   detected: Array<{name, bin, available}>,  // full availability map
+ * }>}
+ */
+async function diagnose(db) {
+  const all = await detectAllCli();
+  const settings = db ? getSettings(db) : { llm_tool_preference: 'auto' };
+  const pref = settings.llm_tool_preference || 'auto';
+  const cli = await detectAvailableCli(pref);
+  return {
+    available: !!cli,
+    name: cli ? cli.name : null,
+    active: cli ? cli.name : null,
+    preference: pref,
+    source: pref === 'auto' ? 'auto' : 'user',
+    detected: all,
+  };
+}
+module.exports = {
+  judgeSession,
+  diagnose,
+  invalidateSettingsCache,
+  // re-export so callers don't need cli-runner directly
+  detectAvailableCli,
+  PROMPT_VERSION,
+};

package/server/llm/project-advice.js CHANGED Viewed

@@ -56,21 +56,28 @@ let _settingsCache = null;
 let _settingsCacheAt = 0;
 const SETTINGS_TTL_MS = 10_000;
+const VALID_CLI_PREFS = new Set(['auto', 'opencode', 'claude']);
 function getSettings(db) {
   const now = Date.now();
   if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) {
     return _settingsCache;
   }
   const rows = db.exec(
-    "SELECT key, value FROM user_settings WHERE key = 'enable_llm_judge'"
+    "SELECT key, value FROM user_settings WHERE key IN ('enable_llm_judge', 'llm_tool_preference')"
   );
   let enable = false;
+  let pref = 'auto';
   if (rows[0]) {
-    for (const [, v] of rows[0].values) {
-      enable = String(v) === '1' || String(v).toLowerCase() === 'true';
+    for (const [k, v] of rows[0].values) {
+      if (k === 'enable_llm_judge') enable = String(v) === '1' || String(v).toLowerCase() === 'true';
+      if (k === 'llm_tool_preference') {
+        const p = String(v || '').toLowerCase();
+        pref = VALID_CLI_PREFS.has(p) ? p : 'auto';
+      }
     }
   }
-  _settingsCache = { enable_llm_judge: enable };
+  _settingsCache = { enable_llm_judge: enable, llm_tool_preference: pref };
   _settingsCacheAt = now;
   return _settingsCache;
 }
@@ -413,8 +420,9 @@ async function generateProjectAdvice(db, opts = {}) {
       };
     }
-    // 5. CLI detection
-    const cli = await detectAvailableCli();
+    // 5. CLI detection — honour user preference
+    const pref = settings.llm_tool_preference || 'auto';
+    const cli = await detectAvailableCli(pref);
     if (!cli) return { ok: false, reason: 'no-cli' };
     // 6. assemble + truncate
@@ -437,7 +445,7 @@ async function generateProjectAdvice(db, opts = {}) {
         'truncated=', trimmed.truncated, 'sessions=', trimmed.sessions.length);
     // 7. run
-    const result = await withSlot(() => runJudge({ prompt, timeoutMs: 120_000 }));
+    const result = await withSlot(() => runJudge({ prompt, timeoutMs: 120_000, preferredCli: pref }));
     if (!result.ok) {
       return { ok: false, reason: result.reason, error: result.error };
     }

package/server/llm/session-analyzer.js CHANGED Viewed

@@ -1,131 +1,141 @@
-/**
- * Unified per-session LLM analyzer — ONE CLI call that returns both the
- * v2.1 capability scores and the collaboration advice.
- *
- * Supersedes the two separate calls (judge.judgeSession + advice.generateAdvice).
- * Pipeline:
- *   1. settings gate (enable_llm_judge)
- *   2. assemble context (reuses advice.assembleContext) + real difficulty
- *   3. cache check in session_analysis.llm_judge_v2 (v + msgCount)
- *   4. truncate + build combined prompt
- *   5. runJudge under withSlot (90 s)
- *   6. return { scores, advice, rationale, v, msgCount, cli, cachedAt } | null
- *
- * Returns null on disabled / no-cli / failure so callers fall back to rules.
- *
- * @author Felix
- */
-'use strict';
-const { detectAvailableCli, runJudge, withSlot } = require('./cli-runner');
-const {
-  ANALYSIS_PROMPT_VERSION,
-  buildSessionAnalysisPrompt,
-  truncateContext,
-} = require('./analysis-prompt');
-const { assembleContext } = require('./advice');
-const { classifySession } = require('../analysis/difficulty');
-const { queryOne } = require('../db/queries');
-// ---------------------------------------------------------------------------
-//  Settings gate (mirrors judge.js / advice.js; tiny TTL cache)
-// ---------------------------------------------------------------------------
-let _settingsCache = null;
-let _settingsCacheAt = 0;
-const SETTINGS_TTL_MS = 10_000;
-function getSettings(db) {
-  const now = Date.now();
-  if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) return _settingsCache;
-  const rows = db.exec("SELECT value FROM user_settings WHERE key = 'enable_llm_judge'");
-  let enable = false;
-  if (rows[0]) {
-    for (const [v] of rows[0].values) {
-      enable = String(v) === '1' || String(v).toLowerCase() === 'true';
-    }
-  }
-  _settingsCache = { enable_llm_judge: enable };
-  _settingsCacheAt = now;
-  return _settingsCache;
-}
-/** Drop the settings cache (called by PUT /api/settings). */
-function invalidateAnalyzerSettingsCache() { _settingsCache = null; }
-// ---------------------------------------------------------------------------
-//  Cache
-// ---------------------------------------------------------------------------
-function loadCache(db, sessionId) {
-  const row = queryOne(db, 'SELECT llm_judge_v2 FROM session_analysis WHERE session_id = ?', [sessionId]);
-  if (!row || !row.llm_judge_v2) return null;
-  try { return JSON.parse(row.llm_judge_v2); }
-  catch { return null; }
-}
-// ---------------------------------------------------------------------------
-//  Public: analyzeSessionLLM
-// ---------------------------------------------------------------------------
-/**
- * Run (or return cached) the combined scores+advice analysis for a session.
- *
- * @param {object}  db
- * @param {object}  session   unified_session row
- * @param {object} [opts]     { force?: boolean }
- * @returns {Promise<{scores:object, advice:object, rationale?:string,
- *   v:number, msgCount:number, cli:string, cachedAt:string} | null>}
- */
-async function analyzeSessionLLM(db, session, opts = {}) {
-  const settings = getSettings(db);
-  if (!settings.enable_llm_judge) return null;
-  const ctxFull = assembleContext(db, session.id);
-  if (!ctxFull) return null;
-  const msgCount = ctxFull.messages.length;
-  if (opts.force !== true) {
-    const cache = loadCache(db, session.id);
-    if (cache && cache.v === ANALYSIS_PROMPT_VERSION && cache.msgCount === msgCount && cache.scores) {
-      return cache;
-    }
-  }
-  const cli = await detectAvailableCli();
-  if (!cli) return null;
-  // Surface the REAL difficulty to the rubric (advice.assembleContext nulls
-  // it out on purpose; scoring needs it).
-  const difficulty = classifySession(session).bucket;
-  ctxFull.session.difficulty = difficulty;
-  const ctx = truncateContext(ctxFull);
-  ctx.session = ctxFull.session; // truncateContext shallow-copies; keep difficulty
-  const prompt = buildSessionAnalysisPrompt(ctx);
-  const result = await withSlot(() => runJudge({ prompt, timeoutMs: 90_000 }));
-  if (!result.ok || !result.data || !result.data.scores) {
-    // Surface why we fell back to rule-based judging.  Silent nulls
-    // here made macOS `claude -p` failures (timeout / bad-json / exit
-    // non-zero) impossible to diagnose from the outside — the HTTP
-    // endpoint just returned 200 with empty v2 columns.
-    const reason = result.ok ? 'no-scores' : (result.reason || 'unknown');
-    const detail = result.error ? ` — ${String(result.error).slice(0, 200)}` : '';
-    console.error('[session-analyzer]', session.id, 'LLM fell through:', reason + detail);
-    return null;
-  }
-  return {
-    scores: result.data.scores,
-    advice: result.data.advice || null,
-    rationale: typeof result.data.rationale === 'string' ? result.data.rationale : '',
-    v: ANALYSIS_PROMPT_VERSION,
-    msgCount,
-    cli: result.cli,
-    cachedAt: new Date().toISOString(),
-  };
-}
-module.exports = { analyzeSessionLLM, invalidateAnalyzerSettingsCache };
+/**
+ * Unified per-session LLM analyzer — ONE CLI call that returns both the
+ * v2.1 capability scores and the collaboration advice.
+ *
+ * Supersedes the two separate calls (judge.judgeSession + advice.generateAdvice).
+ * Pipeline:
+ *   1. settings gate (enable_llm_judge)
+ *   2. assemble context (reuses advice.assembleContext) + real difficulty
+ *   3. cache check in session_analysis.llm_judge_v2 (v + msgCount)
+ *   4. truncate + build combined prompt
+ *   5. runJudge under withSlot (90 s)
+ *   6. return { scores, advice, rationale, v, msgCount, cli, cachedAt } | null
+ *
+ * Returns null on disabled / no-cli / failure so callers fall back to rules.
+ *
+ * @author Felix
+ */
+'use strict';
+const { detectAvailableCli, runJudge, withSlot } = require('./cli-runner');
+const {
+  ANALYSIS_PROMPT_VERSION,
+  buildSessionAnalysisPrompt,
+  truncateContext,
+} = require('./analysis-prompt');
+const { assembleContext } = require('./advice');
+const { classifySession } = require('../analysis/difficulty');
+const { queryOne } = require('../db/queries');
+// ---------------------------------------------------------------------------
+//  Settings gate (mirrors judge.js / advice.js; tiny TTL cache)
+// ---------------------------------------------------------------------------
+let _settingsCache = null;
+let _settingsCacheAt = 0;
+const SETTINGS_TTL_MS = 10_000;
+const VALID_CLI_PREFS = new Set(['auto', 'opencode', 'claude']);
+function getSettings(db) {
+  const now = Date.now();
+  if (_settingsCache && now - _settingsCacheAt < SETTINGS_TTL_MS) return _settingsCache;
+  const rows = db.exec(
+    "SELECT key, value FROM user_settings WHERE key IN ('enable_llm_judge', 'llm_tool_preference')"
+  );
+  let enable = false;
+  let pref = 'auto';
+  if (rows[0]) {
+    for (const [k, v] of rows[0].values) {
+      if (k === 'enable_llm_judge') enable = String(v) === '1' || String(v).toLowerCase() === 'true';
+      if (k === 'llm_tool_preference') {
+        const p = String(v || '').toLowerCase();
+        pref = VALID_CLI_PREFS.has(p) ? p : 'auto';
+      }
+    }
+  }
+  _settingsCache = { enable_llm_judge: enable, llm_tool_preference: pref };
+  _settingsCacheAt = now;
+  return _settingsCache;
+}
+/** Drop the settings cache (called by PUT /api/settings). */
+function invalidateAnalyzerSettingsCache() { _settingsCache = null; }
+// ---------------------------------------------------------------------------
+//  Cache
+// ---------------------------------------------------------------------------
+function loadCache(db, sessionId) {
+  const row = queryOne(db, 'SELECT llm_judge_v2 FROM session_analysis WHERE session_id = ?', [sessionId]);
+  if (!row || !row.llm_judge_v2) return null;
+  try { return JSON.parse(row.llm_judge_v2); }
+  catch { return null; }
+}
+// ---------------------------------------------------------------------------
+//  Public: analyzeSessionLLM
+// ---------------------------------------------------------------------------
+/**
+ * Run (or return cached) the combined scores+advice analysis for a session.
+ *
+ * @param {object}  db
+ * @param {object}  session   unified_session row
+ * @param {object} [opts]     { force?: boolean }
+ * @returns {Promise<{scores:object, advice:object, rationale?:string,
+ *   v:number, msgCount:number, cli:string, cachedAt:string} | null>}
+ */
+async function analyzeSessionLLM(db, session, opts = {}) {
+  const settings = getSettings(db);
+  if (!settings.enable_llm_judge) return null;
+  const ctxFull = assembleContext(db, session.id);
+  if (!ctxFull) return null;
+  const msgCount = ctxFull.messages.length;
+  if (opts.force !== true) {
+    const cache = loadCache(db, session.id);
+    if (cache && cache.v === ANALYSIS_PROMPT_VERSION && cache.msgCount === msgCount && cache.scores) {
+      return cache;
+    }
+  }
+  const pref = settings.llm_tool_preference || 'auto';
+  const cli = await detectAvailableCli(pref);
+  if (!cli) return null;
+  // Surface the REAL difficulty to the rubric (advice.assembleContext nulls
+  // it out on purpose; scoring needs it).
+  const difficulty = classifySession(session).bucket;
+  ctxFull.session.difficulty = difficulty;
+  const ctx = truncateContext(ctxFull);
+  ctx.session = ctxFull.session; // truncateContext shallow-copies; keep difficulty
+  const prompt = buildSessionAnalysisPrompt(ctx);
+  const result = await withSlot(() => runJudge({ prompt, timeoutMs: 90_000, preferredCli: pref }));
+  if (!result.ok || !result.data || !result.data.scores) {
+    // Surface why we fell back to rule-based judging.  Silent nulls
+    // here made macOS `claude -p` failures (timeout / bad-json / exit
+    // non-zero) impossible to diagnose from the outside — the HTTP
+    // endpoint just returned 200 with empty v2 columns.
+    const reason = result.ok ? 'no-scores' : (result.reason || 'unknown');
+    const detail = result.error ? ` — ${String(result.error).slice(0, 200)}` : '';
+    console.error('[session-analyzer]', session.id, 'LLM fell through:', reason + detail);
+    return null;
+  }
+  return {
+    scores: result.data.scores,
+    advice: result.data.advice || null,
+    rationale: typeof result.data.rationale === 'string' ? result.data.rationale : '',
+    v: ANALYSIS_PROMPT_VERSION,
+    msgCount,
+    cli: result.cli,
+    cachedAt: new Date().toISOString(),
+  };
+}
+module.exports = { analyzeSessionLLM, invalidateAnalyzerSettingsCache };