clementine-agent 1.18.39 → 1.18.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,7 +72,21 @@ export interface PreLlmPlanOptions {
72
72
  }
73
73
  /**
74
74
  * Decide whether the user's text should bypass the main agent and run
75
- * directly through the planner orchestrator. Conservative by design.
75
+ * directly through the planner orchestrator.
76
+ *
77
+ * Threshold model: combined "evidence count" ≥ 2, where each FANOUT
78
+ * SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
79
+ * evidence. So "research my 100 leads and email each one" gets:
80
+ * - 1 fanout signal (numeric_collection: "100 leads")
81
+ * - 1 multi-target verb (research my 100 leads)
82
+ * = 2 → routes
83
+ *
84
+ * False-positive guards stay in front:
85
+ * - intent != followup/chat
86
+ * - text length ≥ 30 chars
87
+ * - NOT an informational query (what/tell me/show me/...)
88
+ * - at least ONE action verb (no orchestration for pure declarative
89
+ * statements even if they reference multiple items)
76
90
  */
77
91
  export declare function detectPreLlmPlanIntent(text: string, opts?: PreLlmPlanOptions): PreLlmPlanDecision;
78
92
  //# sourceMappingURL=fanout-policy.d.ts.map
@@ -172,11 +172,25 @@ const ACTION_VERB_PATTERNS = [
172
172
  ];
173
173
  /**
174
174
  * Decide whether the user's text should bypass the main agent and run
175
- * directly through the planner orchestrator. Conservative by design.
175
+ * directly through the planner orchestrator.
176
+ *
177
+ * Threshold model: combined "evidence count" ≥ 2, where each FANOUT
178
+ * SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
179
+ * evidence. So "research my 100 leads and email each one" gets:
180
+ * - 1 fanout signal (numeric_collection: "100 leads")
181
+ * - 1 multi-target verb (research my 100 leads)
182
+ * = 2 → routes
183
+ *
184
+ * False-positive guards stay in front:
185
+ * - intent != followup/chat
186
+ * - text length ≥ 30 chars
187
+ * - NOT an informational query (what/tell me/show me/...)
188
+ * - at least ONE action verb (no orchestration for pure declarative
189
+ * statements even if they reference multiple items)
176
190
  */
177
191
  export function detectPreLlmPlanIntent(text, opts = {}) {
178
- const minLength = opts.minLength ?? 40;
179
- const minFanoutSignals = opts.minFanoutSignals ?? 2;
192
+ const minLength = opts.minLength ?? 30;
193
+ const minCombinedEvidence = opts.minFanoutSignals ?? 2;
180
194
  const trimmed = (text ?? '').trim();
181
195
  // Hard skips: intent says "not a task" → don't override.
182
196
  if (opts.intentType === 'followup' || opts.intentType === 'chat') {
@@ -191,7 +205,10 @@ export function detectPreLlmPlanIntent(text, opts = {}) {
191
205
  if (INFORMATIONAL_QUERY_PATTERN.test(trimmed)) {
192
206
  return { shouldRouteToPlanner: false, reason: 'informational_query', signals: [], actionVerbs: [] };
193
207
  }
194
- // Action-verb match: text must contain an explicit "do X for many" verb.
208
+ // Action-verb match: text must contain at least one explicit
209
+ // multi-target verb. This blocks pure declarative statements ("100
210
+ // prospects are in the pipeline" — referencing many items but no
211
+ // ask for work).
195
212
  const matchedVerbs = [];
196
213
  for (const { pattern, reason } of ACTION_VERB_PATTERNS) {
197
214
  if (pattern.test(trimmed))
@@ -200,20 +217,25 @@ export function detectPreLlmPlanIntent(text, opts = {}) {
200
217
  if (matchedVerbs.length === 0) {
201
218
  return { shouldRouteToPlanner: false, reason: 'no_action_verb', signals: [], actionVerbs: [] };
202
219
  }
203
- // Fanout signals (existing detector covers numeric counts,
204
- // collective+quantifier patterns, "for each", comprehensive research, etc.).
220
+ // Combined evidence: fanout signals + verb matches. Each piece of
221
+ // evidence independently suggests multi-step work; together they
222
+ // strongly do. Threshold ≥ 2 means a query with one numeric collection
223
+ // ("100 leads") AND one multi-target verb ("research those") routes,
224
+ // but a query with just a verb and no collection ("research the
225
+ // prospect Mark") does not.
205
226
  const fanoutReport = detectFanoutSignals(trimmed);
206
- if (fanoutReport.signals.length < minFanoutSignals) {
227
+ const combinedEvidence = fanoutReport.signals.length + matchedVerbs.length;
228
+ if (combinedEvidence < minCombinedEvidence) {
207
229
  return {
208
230
  shouldRouteToPlanner: false,
209
- reason: `weak_fanout_signal_count_${fanoutReport.signals.length}_below_${minFanoutSignals}`,
231
+ reason: `weak_evidence_${combinedEvidence}_below_${minCombinedEvidence}_(fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
210
232
  signals: fanoutReport.signals,
211
233
  actionVerbs: matchedVerbs,
212
234
  };
213
235
  }
214
236
  return {
215
237
  shouldRouteToPlanner: true,
216
- reason: `fanout=${fanoutReport.signals.length}+verbs=${matchedVerbs.length}`,
238
+ reason: `evidence=${combinedEvidence} (fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
217
239
  signals: fanoutReport.signals,
218
240
  actionVerbs: matchedVerbs,
219
241
  };
@@ -12,7 +12,28 @@ const MAX_STEPS = 10;
12
12
  const MAX_CONCURRENT_STEPS = 3;
13
13
  const RESULT_TRUNCATE_CHARS = 4000;
14
14
  const LONG_PLAN_WARNING_MS = 30 * 60 * 1000; // 30 minutes
15
+ // Step models the planner is allowed to assign to per-step execution.
16
+ // Opus is intentionally NOT in this list — it's reserved for the planner
17
+ // itself + final synthesis. Steps that need reasoning use Sonnet; routine
18
+ // extraction/lookup steps use Haiku. This keeps the high-cost Opus calls
19
+ // bounded (one planner call + one synthesis call per task) while
20
+ // execution stays cheap.
15
21
  const ALLOWED_MODELS = ['haiku', 'sonnet'];
22
+ // The planner's job is the highest-leverage decision in the orchestrator:
23
+ // a smart decomposition saves N sub-agent calls, a bad decomposition wastes
24
+ // them. So we default to Opus here even though it's the most expensive
25
+ // per-call model — one Opus call (~$0.30-0.50, no tools, ~500 input
26
+ // tokens, 1 turn) routinely saves $1-3 in retried sub-agent work.
27
+ //
28
+ // Override via CLEMENTINE_PLANNER_MODEL (haiku|sonnet|opus). Synthesis
29
+ // (combining all step results into the user-facing response) uses the
30
+ // same model — both are reasoning-heavy single-turn calls.
31
+ const PLANNER_MODEL = (() => {
32
+ const env = process.env.CLEMENTINE_PLANNER_MODEL?.toLowerCase();
33
+ if (env === 'haiku' || env === 'sonnet' || env === 'opus')
34
+ return env;
35
+ return 'opus';
36
+ })();
16
37
  const PLANNER_PROMPT = `You are a task planner for an AI assistant. Decompose the following request into executable steps.
17
38
 
18
39
  **Planning Principles:**
@@ -475,6 +496,10 @@ export class PlanOrchestrator {
475
496
  finalResult = await this.assistant.runPlanStep(synthesisStepId, synthesisPrompt, {
476
497
  tier: 2,
477
498
  maxTurns: 5,
499
+ // Synthesis is reasoning-heavy: combine N step outputs into one
500
+ // coherent user-facing response. Same model tier as the planner —
501
+ // smart in, cheap-execution-out.
502
+ model: PLANNER_MODEL,
478
503
  disableTools: true,
479
504
  abortSignal: this.abortSignal,
480
505
  });
@@ -674,7 +699,7 @@ export class PlanOrchestrator {
674
699
  `If a step matches an agent's specialty, add "delegateTo": "agent-slug" to that step. ` +
675
700
  `The delegated agent will run the step with their own personality, tools, and expertise.\n`;
676
701
  }
677
- const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: 'sonnet', disableTools: true, abortSignal: this.abortSignal });
702
+ const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: PLANNER_MODEL, disableTools: true, abortSignal: this.abortSignal });
678
703
  // Parse JSON from the planner response
679
704
  const parsed = this.parseJsonFromResponse(plannerResult);
680
705
  if (!parsed?.steps || !Array.isArray(parsed.steps) || parsed.steps.length === 0) {
@@ -2608,7 +2608,15 @@ export class Gateway {
2608
2608
  this.getSession(sessionKey).abortController = planAc;
2609
2609
  const { PlanOrchestrator } = await import('../agent/orchestrator.js');
2610
2610
  const orchestrator = new PlanOrchestrator(this.assistant);
2611
- const result = await orchestrator.run(taskDescription, onProgress, onApproval, undefined, planAc.signal);
2611
+ // Make hired agents (Ross, Sasha, Nora, etc.) visible to the
2612
+ // planner so it can `delegateTo: <slug>` for steps that match
2613
+ // an agent's specialty. Without this the planner generates
2614
+ // generic steps even when a specialized agent is the right
2615
+ // choice. Empty list = solo Clementine, planner stays generic.
2616
+ const teamAgents = this.getAgentManager()
2617
+ .listAll()
2618
+ .filter(a => a.slug !== 'clementine');
2619
+ const result = await orchestrator.run(taskDescription, onProgress, onApproval, teamAgents.length > 0 ? teamAgents : undefined, planAc.signal);
2612
2620
  scanner.refreshIntegrity();
2613
2621
  this.assistant.injectContext(sessionKey, `[Plan: ${taskDescription}]`, result);
2614
2622
  return result;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.39",
3
+ "version": "1.18.41",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",