npm - clementine-agent - Versions diffs - 1.18.40 → 1.18.42 - Mend

clementine-agent 1.18.40 → 1.18.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/agent/orchestrator.js +26 -1
package/dist/tools/admin-tools.js +64 -0
package/package.json +1 -1

package/dist/agent/orchestrator.js CHANGED Viewed

@@ -12,7 +12,28 @@ const MAX_STEPS = 10;
 const MAX_CONCURRENT_STEPS = 3;
 const RESULT_TRUNCATE_CHARS = 4000;
 const LONG_PLAN_WARNING_MS = 30 * 60 * 1000; // 30 minutes
+// Step models the planner is allowed to assign to per-step execution.
+// Opus is intentionally NOT in this list — it's reserved for the planner
+// itself + final synthesis. Steps that need reasoning use Sonnet; routine
+// extraction/lookup steps use Haiku. This keeps the high-cost Opus calls
+// bounded (one planner call + one synthesis call per task) while
+// execution stays cheap.
 const ALLOWED_MODELS = ['haiku', 'sonnet'];
+// The planner's job is the highest-leverage decision in the orchestrator:
+// a smart decomposition saves N sub-agent calls, a bad decomposition wastes
+// them. So we default to Opus here even though it's the most expensive
+// per-call model — one Opus call (~$0.30-0.50, no tools, ~500 input
+// tokens, 1 turn) routinely saves $1-3 in retried sub-agent work.
+//
+// Override via CLEMENTINE_PLANNER_MODEL (haiku|sonnet|opus). Synthesis
+// (combining all step results into the user-facing response) uses the
+// same model — both are reasoning-heavy single-turn calls.
+const PLANNER_MODEL = (() => {
+    const env = process.env.CLEMENTINE_PLANNER_MODEL?.toLowerCase();
+    if (env === 'haiku' || env === 'sonnet' || env === 'opus')
+        return env;
+    return 'opus';
+})();
 const PLANNER_PROMPT = `You are a task planner for an AI assistant. Decompose the following request into executable steps.
 **Planning Principles:**
@@ -475,6 +496,10 @@ export class PlanOrchestrator {
             finalResult = await this.assistant.runPlanStep(synthesisStepId, synthesisPrompt, {
                 tier: 2,
                 maxTurns: 5,
+                // Synthesis is reasoning-heavy: combine N step outputs into one
+                // coherent user-facing response. Same model tier as the planner —
+                // smart in, cheap-execution-out.
+                model: PLANNER_MODEL,
                 disableTools: true,
                 abortSignal: this.abortSignal,
             });
@@ -674,7 +699,7 @@ export class PlanOrchestrator {
                 `If a step matches an agent's specialty, add "delegateTo": "agent-slug" to that step. ` +
                 `The delegated agent will run the step with their own personality, tools, and expertise.\n`;
         }
-        const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: 'sonnet', disableTools: true, abortSignal: this.abortSignal });
+        const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: PLANNER_MODEL, disableTools: true, abortSignal: this.abortSignal });
         // Parse JSON from the planner response
         const parsed = this.parseJsonFromResponse(plannerResult);
         if (!parsed?.steps || !Array.isArray(parsed.steps) || parsed.steps.length === 0) {

package/dist/tools/admin-tools.js CHANGED Viewed

@@ -1881,5 +1881,69 @@ export function registerAdminTools(server) {
         const result = await runConnectNonInteractive({ allowQuitChrome: !!force_quit });
         return textResult(result.message);
     });
+    // ── Broken-job diagnosis + fix-application (chat-equivalent of dashboard buttons) ──
+    //
+    // Before this, when the user asked "fix audit-inbox-check" in chat,
+    // Clementine could read run logs and describe the failure but had no
+    // tool to actually APPLY the stored fix — so she'd just keep returning
+    // the same diagnosis text on every retry. The dashboard had the
+    // "Apply Fix" button; the agent had nothing equivalent. These two
+    // tools close that gap.
+    server.tool('list_broken_jobs', 'List cron jobs that are currently failing repeatedly, with their cached diagnosis (if any) and whether each has an auto-applicable fix proposal. Use this when the user asks "what\'s broken?" or "what jobs are failing?" — it surfaces the same data the dashboard\'s broken-jobs panel shows.', {}, async () => {
+        const { computeBrokenJobs } = await import('../gateway/failure-monitor.js');
+        const { getDiagnosisIfFresh } = await import('../gateway/failure-diagnostics.js');
+        const broken = computeBrokenJobs();
+        if (broken.length === 0) {
+            return textResult('No cron jobs are currently flagged as broken.');
+        }
+        const lines = [`${broken.length} cron job${broken.length === 1 ? '' : 's'} flagged as broken:`];
+        for (const b of broken) {
+            const d = getDiagnosisIfFresh(b.jobName);
+            const fix = d?.proposedFix;
+            const autoApplyAvailable = !!fix?.autoApply && d?.riskLevel === 'low';
+            lines.push(`\n• \`${b.jobName}\``, `  failures last 48h: ${b.errorCount48h}/${b.totalRuns48h}`, b.lastErrors[0] ? `  last error: ${String(b.lastErrors[0]).slice(0, 200)}` : '', d ? `  diagnosis: ${d.rootCause?.slice(0, 200) ?? "(no root cause)"}` : '  diagnosis: pending — wait for next failure-monitor sweep', d ? `  proposed fix: type=${fix?.type ?? 'unknown'} confidence=${d.confidence ?? 'unknown'} risk=${d.riskLevel ?? 'unknown'}` : '', autoApplyAvailable
+                ? `  ✓ auto-applicable — call apply_broken_job_fix with jobName="${b.jobName}"`
+                : '  ✗ not auto-applicable — manual review or dashboard intervention needed');
+        }
+        return textResult(lines.filter(Boolean).join('\n'));
+    });
+    server.tool('apply_broken_job_fix', 'Apply the cached auto-applicable fix for a broken cron job. Use this when the user explicitly asks to "fix" a job that has a confirmed diagnosis with autoApply=true and risk=low. Pass dryRun=true to preview without writing. Returns the applied operations, or refuses with a clear reason when the diagnosis is missing/risky/non-auto-applicable.', {
+        jobName: z.string().describe('The job name as shown in CRON.md or list_broken_jobs output (e.g. "audit-inbox-check" or "ross-the-sdr:reply-detection").'),
+        dryRun: z.boolean().optional().describe('If true, validate + show what would change but do not write. Default false.'),
+    }, async ({ jobName, dryRun }) => {
+        const { getDiagnosisIfFresh, clearDiagnosis } = await import('../gateway/failure-diagnostics.js');
+        const { applyFix } = await import('../gateway/fix-applier.js');
+        const d = getDiagnosisIfFresh(jobName);
+        if (!d) {
+            return textResult(`No fresh diagnosis for \`${jobName}\`. The failure-monitor sweep hasn't produced one yet, ` +
+                `or the diagnosis expired. Wait for the next sweep, or dig into ~/.clementine/cron/runs/${jobName}.jsonl ` +
+                `and the run-trace files for the actual error.`);
+        }
+        if (!d.proposedFix?.autoApply) {
+            return textResult(`Diagnosis for \`${jobName}\` has no auto-applicable operations. ` +
+                `Type: ${d.proposedFix?.type ?? 'unknown'}. ` +
+                `This usually means the fix needs manual review — surface the diagnosis to the owner ` +
+                `(${d.rootCause ?? "(no root cause)"}) instead of attempting auto-fix.`);
+        }
+        if (d.riskLevel !== 'low') {
+            return textResult(`Diagnosis for \`${jobName}\` has riskLevel=${d.riskLevel}. ` +
+                `Auto-apply is gated to risk=low only. ` +
+                `Show the proposed fix to the owner for explicit approval.`);
+        }
+        const isDryRun = dryRun === true;
+        const result = applyFix(jobName, d.proposedFix.autoApply, { dryRun: isDryRun });
+        if (result.ok && !isDryRun)
+            clearDiagnosis(jobName);
+        if (!result.ok) {
+            return textResult(`Apply failed for \`${jobName}\`: ${'error' in result ? result.error : 'unknown error'}`);
+        }
+        const opsCount = 'operations' in result ? result.operations.length : 0;
+        return textResult(isDryRun
+            ? `[DRY RUN] Would apply ${opsCount} operation${opsCount === 1 ? '' : 's'} to fix \`${jobName}\`. ` +
+                `Root cause: ${d.rootCause?.slice(0, 200) ?? ""}. Re-run without dryRun to commit.`
+            : `Applied fix for \`${jobName}\` (${opsCount} operation${opsCount === 1 ? '' : 's'}). ` +
+                `The fix-verification tracker will roll it back automatically if the next runs don't improve. ` +
+                `Root cause: ${d.rootCause?.slice(0, 200) ?? ""}.`);
+    });
 }
 //# sourceMappingURL=admin-tools.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.40",
+  "version": "1.18.42",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",