clementine-agent 1.18.40 → 1.18.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,28 @@ const MAX_STEPS = 10;
12
12
  const MAX_CONCURRENT_STEPS = 3;
13
13
  const RESULT_TRUNCATE_CHARS = 4000;
14
14
  const LONG_PLAN_WARNING_MS = 30 * 60 * 1000; // 30 minutes
15
+ // Step models the planner is allowed to assign to per-step execution.
16
+ // Opus is intentionally NOT in this list — it's reserved for the planner
17
+ // itself + final synthesis. Steps that need reasoning use Sonnet; routine
18
+ // extraction/lookup steps use Haiku. This keeps the high-cost Opus calls
19
+ // bounded (one planner call + one synthesis call per task) while
20
+ // execution stays cheap.
15
21
  const ALLOWED_MODELS = ['haiku', 'sonnet'];
22
+ // The planner's job is the highest-leverage decision in the orchestrator:
23
+ // a smart decomposition saves N sub-agent calls, a bad decomposition wastes
24
+ // them. So we default to Opus here even though it's the most expensive
25
+ // per-call model — one Opus call (~$0.30-0.50, no tools, ~500 input
26
+ // tokens, 1 turn) routinely saves $1-3 in retried sub-agent work.
27
+ //
28
+ // Override via CLEMENTINE_PLANNER_MODEL (haiku|sonnet|opus). Synthesis
29
+ // (combining all step results into the user-facing response) uses the
30
+ // same model — both are reasoning-heavy single-turn calls.
31
+ const PLANNER_MODEL = (() => {
32
+ const env = process.env.CLEMENTINE_PLANNER_MODEL?.toLowerCase();
33
+ if (env === 'haiku' || env === 'sonnet' || env === 'opus')
34
+ return env;
35
+ return 'opus';
36
+ })();
16
37
  const PLANNER_PROMPT = `You are a task planner for an AI assistant. Decompose the following request into executable steps.
17
38
 
18
39
  **Planning Principles:**
@@ -475,6 +496,10 @@ export class PlanOrchestrator {
475
496
  finalResult = await this.assistant.runPlanStep(synthesisStepId, synthesisPrompt, {
476
497
  tier: 2,
477
498
  maxTurns: 5,
499
+ // Synthesis is reasoning-heavy: combine N step outputs into one
500
+ // coherent user-facing response. Same model tier as the planner —
501
+ // smart in, cheap-execution-out.
502
+ model: PLANNER_MODEL,
478
503
  disableTools: true,
479
504
  abortSignal: this.abortSignal,
480
505
  });
@@ -674,7 +699,7 @@ export class PlanOrchestrator {
674
699
  `If a step matches an agent's specialty, add "delegateTo": "agent-slug" to that step. ` +
675
700
  `The delegated agent will run the step with their own personality, tools, and expertise.\n`;
676
701
  }
677
- const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: 'sonnet', disableTools: true, abortSignal: this.abortSignal });
702
+ const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: PLANNER_MODEL, disableTools: true, abortSignal: this.abortSignal });
678
703
  // Parse JSON from the planner response
679
704
  const parsed = this.parseJsonFromResponse(plannerResult);
680
705
  if (!parsed?.steps || !Array.isArray(parsed.steps) || parsed.steps.length === 0) {
@@ -1881,5 +1881,69 @@ export function registerAdminTools(server) {
1881
1881
  const result = await runConnectNonInteractive({ allowQuitChrome: !!force_quit });
1882
1882
  return textResult(result.message);
1883
1883
  });
1884
+ // ── Broken-job diagnosis + fix-application (chat-equivalent of dashboard buttons) ──
1885
+ //
1886
+ // Before this, when the user asked "fix audit-inbox-check" in chat,
1887
+ // Clementine could read run logs and describe the failure but had no
1888
+ // tool to actually APPLY the stored fix — so she'd just keep returning
1889
+ // the same diagnosis text on every retry. The dashboard had the
1890
+ // "Apply Fix" button; the agent had nothing equivalent. These two
1891
+ // tools close that gap.
1892
+ server.tool('list_broken_jobs', 'List cron jobs that are currently failing repeatedly, with their cached diagnosis (if any) and whether each has an auto-applicable fix proposal. Use this when the user asks "what\'s broken?" or "what jobs are failing?" — it surfaces the same data the dashboard\'s broken-jobs panel shows.', {}, async () => {
1893
+ const { computeBrokenJobs } = await import('../gateway/failure-monitor.js');
1894
+ const { getDiagnosisIfFresh } = await import('../gateway/failure-diagnostics.js');
1895
+ const broken = computeBrokenJobs();
1896
+ if (broken.length === 0) {
1897
+ return textResult('No cron jobs are currently flagged as broken.');
1898
+ }
1899
+ const lines = [`${broken.length} cron job${broken.length === 1 ? '' : 's'} flagged as broken:`];
1900
+ for (const b of broken) {
1901
+ const d = getDiagnosisIfFresh(b.jobName);
1902
+ const fix = d?.proposedFix;
1903
+ const autoApplyAvailable = !!fix?.autoApply && d?.riskLevel === 'low';
1904
+ lines.push(`\n• \`${b.jobName}\``, ` failures last 48h: ${b.errorCount48h}/${b.totalRuns48h}`, b.lastErrors[0] ? ` last error: ${String(b.lastErrors[0]).slice(0, 200)}` : '', d ? ` diagnosis: ${d.rootCause?.slice(0, 200) ?? "(no root cause)"}` : ' diagnosis: pending — wait for next failure-monitor sweep', d ? ` proposed fix: type=${fix?.type ?? 'unknown'} confidence=${d.confidence ?? 'unknown'} risk=${d.riskLevel ?? 'unknown'}` : '', autoApplyAvailable
1905
+ ? ` ✓ auto-applicable — call apply_broken_job_fix with jobName="${b.jobName}"`
1906
+ : ' ✗ not auto-applicable — manual review or dashboard intervention needed');
1907
+ }
1908
+ return textResult(lines.filter(Boolean).join('\n'));
1909
+ });
1910
+ server.tool('apply_broken_job_fix', 'Apply the cached auto-applicable fix for a broken cron job. Use this when the user explicitly asks to "fix" a job that has a confirmed diagnosis with autoApply=true and risk=low. Pass dryRun=true to preview without writing. Returns the applied operations, or refuses with a clear reason when the diagnosis is missing/risky/non-auto-applicable.', {
1911
+ jobName: z.string().describe('The job name as shown in CRON.md or list_broken_jobs output (e.g. "audit-inbox-check" or "ross-the-sdr:reply-detection").'),
1912
+ dryRun: z.boolean().optional().describe('If true, validate + show what would change but do not write. Default false.'),
1913
+ }, async ({ jobName, dryRun }) => {
1914
+ const { getDiagnosisIfFresh, clearDiagnosis } = await import('../gateway/failure-diagnostics.js');
1915
+ const { applyFix } = await import('../gateway/fix-applier.js');
1916
+ const d = getDiagnosisIfFresh(jobName);
1917
+ if (!d) {
1918
+ return textResult(`No fresh diagnosis for \`${jobName}\`. The failure-monitor sweep hasn't produced one yet, ` +
1919
+ `or the diagnosis expired. Wait for the next sweep, or dig into ~/.clementine/cron/runs/${jobName}.jsonl ` +
1920
+ `and the run-trace files for the actual error.`);
1921
+ }
1922
+ if (!d.proposedFix?.autoApply) {
1923
+ return textResult(`Diagnosis for \`${jobName}\` has no auto-applicable operations. ` +
1924
+ `Type: ${d.proposedFix?.type ?? 'unknown'}. ` +
1925
+ `This usually means the fix needs manual review — surface the diagnosis to the owner ` +
1926
+ `(${d.rootCause ?? "(no root cause)"}) instead of attempting auto-fix.`);
1927
+ }
1928
+ if (d.riskLevel !== 'low') {
1929
+ return textResult(`Diagnosis for \`${jobName}\` has riskLevel=${d.riskLevel}. ` +
1930
+ `Auto-apply is gated to risk=low only. ` +
1931
+ `Show the proposed fix to the owner for explicit approval.`);
1932
+ }
1933
+ const isDryRun = dryRun === true;
1934
+ const result = applyFix(jobName, d.proposedFix.autoApply, { dryRun: isDryRun });
1935
+ if (result.ok && !isDryRun)
1936
+ clearDiagnosis(jobName);
1937
+ if (!result.ok) {
1938
+ return textResult(`Apply failed for \`${jobName}\`: ${'error' in result ? result.error : 'unknown error'}`);
1939
+ }
1940
+ const opsCount = 'operations' in result ? result.operations.length : 0;
1941
+ return textResult(isDryRun
1942
+ ? `[DRY RUN] Would apply ${opsCount} operation${opsCount === 1 ? '' : 's'} to fix \`${jobName}\`. ` +
1943
+ `Root cause: ${d.rootCause?.slice(0, 200) ?? ""}. Re-run without dryRun to commit.`
1944
+ : `Applied fix for \`${jobName}\` (${opsCount} operation${opsCount === 1 ? '' : 's'}). ` +
1945
+ `The fix-verification tracker will roll it back automatically if the next runs don't improve. ` +
1946
+ `Root cause: ${d.rootCause?.slice(0, 200) ?? ""}.`);
1947
+ });
1884
1948
  }
1885
1949
  //# sourceMappingURL=admin-tools.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.40",
3
+ "version": "1.18.42",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",