npm - clementine-agent - Versions diffs - 1.18.39 → 1.18.41 - Mend

clementine-agent 1.18.39 → 1.18.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/agent/fanout-policy.d.ts +15 -1
package/dist/agent/fanout-policy.js +31 -9
package/dist/agent/orchestrator.js +26 -1
package/dist/gateway/router.js +9 -1
package/package.json +1 -1

package/dist/agent/fanout-policy.d.ts CHANGED Viewed

@@ -72,7 +72,21 @@ export interface PreLlmPlanOptions {
 }
 /**
  * Decide whether the user's text should bypass the main agent and run
- * directly through the planner orchestrator. Conservative by design.
+ * directly through the planner orchestrator.
+ *
+ * Threshold model: combined "evidence count" ≥ 2, where each FANOUT
+ * SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
+ * evidence. So "research my 100 leads and email each one" gets:
+ *   - 1 fanout signal (numeric_collection: "100 leads")
+ *   - 1 multi-target verb (research my 100 leads)
+ *   = 2 → routes
+ *
+ * False-positive guards stay in front:
+ *   - intent != followup/chat
+ *   - text length ≥ 30 chars
+ *   - NOT an informational query (what/tell me/show me/...)
+ *   - at least ONE action verb (no orchestration for pure declarative
+ *     statements even if they reference multiple items)
  */
 export declare function detectPreLlmPlanIntent(text: string, opts?: PreLlmPlanOptions): PreLlmPlanDecision;
 //# sourceMappingURL=fanout-policy.d.ts.map

package/dist/agent/fanout-policy.js CHANGED Viewed

@@ -172,11 +172,25 @@ const ACTION_VERB_PATTERNS = [
 ];
 /**
  * Decide whether the user's text should bypass the main agent and run
- * directly through the planner orchestrator. Conservative by design.
+ * directly through the planner orchestrator.
+ *
+ * Threshold model: combined "evidence count" ≥ 2, where each FANOUT
+ * SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
+ * evidence. So "research my 100 leads and email each one" gets:
+ *   - 1 fanout signal (numeric_collection: "100 leads")
+ *   - 1 multi-target verb (research my 100 leads)
+ *   = 2 → routes
+ *
+ * False-positive guards stay in front:
+ *   - intent != followup/chat
+ *   - text length ≥ 30 chars
+ *   - NOT an informational query (what/tell me/show me/...)
+ *   - at least ONE action verb (no orchestration for pure declarative
+ *     statements even if they reference multiple items)
  */
 export function detectPreLlmPlanIntent(text, opts = {}) {
-    const minLength = opts.minLength ?? 40;
-    const minFanoutSignals = opts.minFanoutSignals ?? 2;
+    const minLength = opts.minLength ?? 30;
+    const minCombinedEvidence = opts.minFanoutSignals ?? 2;
     const trimmed = (text ?? '').trim();
     // Hard skips: intent says "not a task" → don't override.
     if (opts.intentType === 'followup' || opts.intentType === 'chat') {
@@ -191,7 +205,10 @@ export function detectPreLlmPlanIntent(text, opts = {}) {
     if (INFORMATIONAL_QUERY_PATTERN.test(trimmed)) {
         return { shouldRouteToPlanner: false, reason: 'informational_query', signals: [], actionVerbs: [] };
     }
-    // Action-verb match: text must contain an explicit "do X for many" verb.
+    // Action-verb match: text must contain at least one explicit
+    // multi-target verb. This blocks pure declarative statements ("100
+    // prospects are in the pipeline" — referencing many items but no
+    // ask for work).
     const matchedVerbs = [];
     for (const { pattern, reason } of ACTION_VERB_PATTERNS) {
         if (pattern.test(trimmed))
@@ -200,20 +217,25 @@ export function detectPreLlmPlanIntent(text, opts = {}) {
     if (matchedVerbs.length === 0) {
         return { shouldRouteToPlanner: false, reason: 'no_action_verb', signals: [], actionVerbs: [] };
     }
-    // Fanout signals (existing detector — covers numeric counts,
-    // collective+quantifier patterns, "for each", comprehensive research, etc.).
+    // Combined evidence: fanout signals + verb matches. Each piece of
+    // evidence independently suggests multi-step work; together they
+    // strongly do. Threshold ≥ 2 means a query with one numeric collection
+    // ("100 leads") AND one multi-target verb ("research those") routes,
+    // but a query with just a verb and no collection ("research the
+    // prospect Mark") does not.
     const fanoutReport = detectFanoutSignals(trimmed);
-    if (fanoutReport.signals.length < minFanoutSignals) {
+    const combinedEvidence = fanoutReport.signals.length + matchedVerbs.length;
+    if (combinedEvidence < minCombinedEvidence) {
         return {
             shouldRouteToPlanner: false,
-            reason: `weak_fanout_signal_count_${fanoutReport.signals.length}_below_${minFanoutSignals}`,
+            reason: `weak_evidence_${combinedEvidence}_below_${minCombinedEvidence}_(fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
             signals: fanoutReport.signals,
             actionVerbs: matchedVerbs,
         };
     }
     return {
         shouldRouteToPlanner: true,
-        reason: `fanout=${fanoutReport.signals.length}+verbs=${matchedVerbs.length}`,
+        reason: `evidence=${combinedEvidence} (fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
         signals: fanoutReport.signals,
         actionVerbs: matchedVerbs,
     };

package/dist/agent/orchestrator.js CHANGED Viewed

@@ -12,7 +12,28 @@ const MAX_STEPS = 10;
 const MAX_CONCURRENT_STEPS = 3;
 const RESULT_TRUNCATE_CHARS = 4000;
 const LONG_PLAN_WARNING_MS = 30 * 60 * 1000; // 30 minutes
+// Step models the planner is allowed to assign to per-step execution.
+// Opus is intentionally NOT in this list — it's reserved for the planner
+// itself + final synthesis. Steps that need reasoning use Sonnet; routine
+// extraction/lookup steps use Haiku. This keeps the high-cost Opus calls
+// bounded (one planner call + one synthesis call per task) while
+// execution stays cheap.
 const ALLOWED_MODELS = ['haiku', 'sonnet'];
+// The planner's job is the highest-leverage decision in the orchestrator:
+// a smart decomposition saves N sub-agent calls, a bad decomposition wastes
+// them. So we default to Opus here even though it's the most expensive
+// per-call model — one Opus call (~$0.30-0.50, no tools, ~500 input
+// tokens, 1 turn) routinely saves $1-3 in retried sub-agent work.
+//
+// Override via CLEMENTINE_PLANNER_MODEL (haiku|sonnet|opus). Synthesis
+// (combining all step results into the user-facing response) uses the
+// same model — both are reasoning-heavy single-turn calls.
+const PLANNER_MODEL = (() => {
+    const env = process.env.CLEMENTINE_PLANNER_MODEL?.toLowerCase();
+    if (env === 'haiku' || env === 'sonnet' || env === 'opus')
+        return env;
+    return 'opus';
+})();
 const PLANNER_PROMPT = `You are a task planner for an AI assistant. Decompose the following request into executable steps.
 **Planning Principles:**
@@ -475,6 +496,10 @@ export class PlanOrchestrator {
             finalResult = await this.assistant.runPlanStep(synthesisStepId, synthesisPrompt, {
                 tier: 2,
                 maxTurns: 5,
+                // Synthesis is reasoning-heavy: combine N step outputs into one
+                // coherent user-facing response. Same model tier as the planner —
+                // smart in, cheap-execution-out.
+                model: PLANNER_MODEL,
                 disableTools: true,
                 abortSignal: this.abortSignal,
             });
@@ -674,7 +699,7 @@ export class PlanOrchestrator {
                 `If a step matches an agent's specialty, add "delegateTo": "agent-slug" to that step. ` +
                 `The delegated agent will run the step with their own personality, tools, and expertise.\n`;
         }
-        const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: 'sonnet', disableTools: true, abortSignal: this.abortSignal });
+        const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: PLANNER_MODEL, disableTools: true, abortSignal: this.abortSignal });
         // Parse JSON from the planner response
         const parsed = this.parseJsonFromResponse(plannerResult);
         if (!parsed?.steps || !Array.isArray(parsed.steps) || parsed.steps.length === 0) {

package/dist/gateway/router.js CHANGED Viewed

@@ -2608,7 +2608,15 @@ export class Gateway {
                 this.getSession(sessionKey).abortController = planAc;
                 const { PlanOrchestrator } = await import('../agent/orchestrator.js');
                 const orchestrator = new PlanOrchestrator(this.assistant);
-                const result = await orchestrator.run(taskDescription, onProgress, onApproval, undefined, planAc.signal);
+                // Make hired agents (Ross, Sasha, Nora, etc.) visible to the
+                // planner so it can `delegateTo: <slug>` for steps that match
+                // an agent's specialty. Without this the planner generates
+                // generic steps even when a specialized agent is the right
+                // choice. Empty list = solo Clementine, planner stays generic.
+                const teamAgents = this.getAgentManager()
+                    .listAll()
+                    .filter(a => a.slug !== 'clementine');
+                const result = await orchestrator.run(taskDescription, onProgress, onApproval, teamAgents.length > 0 ? teamAgents : undefined, planAc.signal);
                 scanner.refreshIntegrity();
                 this.assistant.injectContext(sessionKey, `[Plan: ${taskDescription}]`, result);
                 return result;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.18.39",
+  "version": "1.18.41",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",