clementine-agent 1.18.39 → 1.18.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -72,7 +72,21 @@ export interface PreLlmPlanOptions {
|
|
|
72
72
|
}
|
|
73
73
|
/**
|
|
74
74
|
* Decide whether the user's text should bypass the main agent and run
|
|
75
|
-
* directly through the planner orchestrator.
|
|
75
|
+
* directly through the planner orchestrator.
|
|
76
|
+
*
|
|
77
|
+
* Threshold model: combined "evidence count" ≥ 2, where each FANOUT
|
|
78
|
+
* SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
|
|
79
|
+
* evidence. So "research my 100 leads and email each one" gets:
|
|
80
|
+
* - 1 fanout signal (numeric_collection: "100 leads")
|
|
81
|
+
* - 1 multi-target verb (research my 100 leads)
|
|
82
|
+
* = 2 → routes
|
|
83
|
+
*
|
|
84
|
+
* False-positive guards stay in front:
|
|
85
|
+
* - intent != followup/chat
|
|
86
|
+
* - text length ≥ 30 chars
|
|
87
|
+
* - NOT an informational query (what/tell me/show me/...)
|
|
88
|
+
* - at least ONE action verb (no orchestration for pure declarative
|
|
89
|
+
* statements even if they reference multiple items)
|
|
76
90
|
*/
|
|
77
91
|
export declare function detectPreLlmPlanIntent(text: string, opts?: PreLlmPlanOptions): PreLlmPlanDecision;
|
|
78
92
|
//# sourceMappingURL=fanout-policy.d.ts.map
|
|
@@ -172,11 +172,25 @@ const ACTION_VERB_PATTERNS = [
|
|
|
172
172
|
];
|
|
173
173
|
/**
|
|
174
174
|
* Decide whether the user's text should bypass the main agent and run
|
|
175
|
-
* directly through the planner orchestrator.
|
|
175
|
+
* directly through the planner orchestrator.
|
|
176
|
+
*
|
|
177
|
+
* Threshold model: combined "evidence count" ≥ 2, where each FANOUT
|
|
178
|
+
* SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
|
|
179
|
+
* evidence. So "research my 100 leads and email each one" gets:
|
|
180
|
+
* - 1 fanout signal (numeric_collection: "100 leads")
|
|
181
|
+
* - 1 multi-target verb (research my 100 leads)
|
|
182
|
+
* = 2 → routes
|
|
183
|
+
*
|
|
184
|
+
* False-positive guards stay in front:
|
|
185
|
+
* - intent != followup/chat
|
|
186
|
+
* - text length ≥ 30 chars
|
|
187
|
+
* - NOT an informational query (what/tell me/show me/...)
|
|
188
|
+
* - at least ONE action verb (no orchestration for pure declarative
|
|
189
|
+
* statements even if they reference multiple items)
|
|
176
190
|
*/
|
|
177
191
|
export function detectPreLlmPlanIntent(text, opts = {}) {
|
|
178
|
-
const minLength = opts.minLength ??
|
|
179
|
-
const
|
|
192
|
+
const minLength = opts.minLength ?? 30;
|
|
193
|
+
const minCombinedEvidence = opts.minFanoutSignals ?? 2;
|
|
180
194
|
const trimmed = (text ?? '').trim();
|
|
181
195
|
// Hard skips: intent says "not a task" → don't override.
|
|
182
196
|
if (opts.intentType === 'followup' || opts.intentType === 'chat') {
|
|
@@ -191,7 +205,10 @@ export function detectPreLlmPlanIntent(text, opts = {}) {
|
|
|
191
205
|
if (INFORMATIONAL_QUERY_PATTERN.test(trimmed)) {
|
|
192
206
|
return { shouldRouteToPlanner: false, reason: 'informational_query', signals: [], actionVerbs: [] };
|
|
193
207
|
}
|
|
194
|
-
// Action-verb match: text must contain
|
|
208
|
+
// Action-verb match: text must contain at least one explicit
|
|
209
|
+
// multi-target verb. This blocks pure declarative statements ("100
|
|
210
|
+
// prospects are in the pipeline" — referencing many items but no
|
|
211
|
+
// ask for work).
|
|
195
212
|
const matchedVerbs = [];
|
|
196
213
|
for (const { pattern, reason } of ACTION_VERB_PATTERNS) {
|
|
197
214
|
if (pattern.test(trimmed))
|
|
@@ -200,20 +217,25 @@ export function detectPreLlmPlanIntent(text, opts = {}) {
|
|
|
200
217
|
if (matchedVerbs.length === 0) {
|
|
201
218
|
return { shouldRouteToPlanner: false, reason: 'no_action_verb', signals: [], actionVerbs: [] };
|
|
202
219
|
}
|
|
203
|
-
//
|
|
204
|
-
//
|
|
220
|
+
// Combined evidence: fanout signals + verb matches. Each piece of
|
|
221
|
+
// evidence independently suggests multi-step work; together they
|
|
222
|
+
// strongly do. Threshold ≥ 2 means a query with one numeric collection
|
|
223
|
+
// ("100 leads") AND one multi-target verb ("research those") routes,
|
|
224
|
+
// but a query with just a verb and no collection ("research the
|
|
225
|
+
// prospect Mark") does not.
|
|
205
226
|
const fanoutReport = detectFanoutSignals(trimmed);
|
|
206
|
-
|
|
227
|
+
const combinedEvidence = fanoutReport.signals.length + matchedVerbs.length;
|
|
228
|
+
if (combinedEvidence < minCombinedEvidence) {
|
|
207
229
|
return {
|
|
208
230
|
shouldRouteToPlanner: false,
|
|
209
|
-
reason: `
|
|
231
|
+
reason: `weak_evidence_${combinedEvidence}_below_${minCombinedEvidence}_(fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
|
|
210
232
|
signals: fanoutReport.signals,
|
|
211
233
|
actionVerbs: matchedVerbs,
|
|
212
234
|
};
|
|
213
235
|
}
|
|
214
236
|
return {
|
|
215
237
|
shouldRouteToPlanner: true,
|
|
216
|
-
reason: `fanout=${fanoutReport.signals.length}
|
|
238
|
+
reason: `evidence=${combinedEvidence} (fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
|
|
217
239
|
signals: fanoutReport.signals,
|
|
218
240
|
actionVerbs: matchedVerbs,
|
|
219
241
|
};
|
|
@@ -12,7 +12,28 @@ const MAX_STEPS = 10;
|
|
|
12
12
|
const MAX_CONCURRENT_STEPS = 3;
|
|
13
13
|
const RESULT_TRUNCATE_CHARS = 4000;
|
|
14
14
|
const LONG_PLAN_WARNING_MS = 30 * 60 * 1000; // 30 minutes
|
|
15
|
+
// Step models the planner is allowed to assign to per-step execution.
|
|
16
|
+
// Opus is intentionally NOT in this list — it's reserved for the planner
|
|
17
|
+
// itself + final synthesis. Steps that need reasoning use Sonnet; routine
|
|
18
|
+
// extraction/lookup steps use Haiku. This keeps the high-cost Opus calls
|
|
19
|
+
// bounded (one planner call + one synthesis call per task) while
|
|
20
|
+
// execution stays cheap.
|
|
15
21
|
const ALLOWED_MODELS = ['haiku', 'sonnet'];
|
|
22
|
+
// The planner's job is the highest-leverage decision in the orchestrator:
|
|
23
|
+
// a smart decomposition saves N sub-agent calls, a bad decomposition wastes
|
|
24
|
+
// them. So we default to Opus here even though it's the most expensive
|
|
25
|
+
// per-call model — one Opus call (~$0.30-0.50, no tools, ~500 input
|
|
26
|
+
// tokens, 1 turn) routinely saves $1-3 in retried sub-agent work.
|
|
27
|
+
//
|
|
28
|
+
// Override via CLEMENTINE_PLANNER_MODEL (haiku|sonnet|opus). Synthesis
|
|
29
|
+
// (combining all step results into the user-facing response) uses the
|
|
30
|
+
// same model — both are reasoning-heavy single-turn calls.
|
|
31
|
+
const PLANNER_MODEL = (() => {
|
|
32
|
+
const env = process.env.CLEMENTINE_PLANNER_MODEL?.toLowerCase();
|
|
33
|
+
if (env === 'haiku' || env === 'sonnet' || env === 'opus')
|
|
34
|
+
return env;
|
|
35
|
+
return 'opus';
|
|
36
|
+
})();
|
|
16
37
|
const PLANNER_PROMPT = `You are a task planner for an AI assistant. Decompose the following request into executable steps.
|
|
17
38
|
|
|
18
39
|
**Planning Principles:**
|
|
@@ -475,6 +496,10 @@ export class PlanOrchestrator {
|
|
|
475
496
|
finalResult = await this.assistant.runPlanStep(synthesisStepId, synthesisPrompt, {
|
|
476
497
|
tier: 2,
|
|
477
498
|
maxTurns: 5,
|
|
499
|
+
// Synthesis is reasoning-heavy: combine N step outputs into one
|
|
500
|
+
// coherent user-facing response. Same model tier as the planner —
|
|
501
|
+
// smart in, cheap-execution-out.
|
|
502
|
+
model: PLANNER_MODEL,
|
|
478
503
|
disableTools: true,
|
|
479
504
|
abortSignal: this.abortSignal,
|
|
480
505
|
});
|
|
@@ -674,7 +699,7 @@ export class PlanOrchestrator {
|
|
|
674
699
|
`If a step matches an agent's specialty, add "delegateTo": "agent-slug" to that step. ` +
|
|
675
700
|
`The delegated agent will run the step with their own personality, tools, and expertise.\n`;
|
|
676
701
|
}
|
|
677
|
-
const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model:
|
|
702
|
+
const plannerResult = await this.assistant.runPlanStep('planner', PLANNER_PROMPT + agentContext + task + PLANNER_PROMPT_SUFFIX, { tier: 2, maxTurns: 1, model: PLANNER_MODEL, disableTools: true, abortSignal: this.abortSignal });
|
|
678
703
|
// Parse JSON from the planner response
|
|
679
704
|
const parsed = this.parseJsonFromResponse(plannerResult);
|
|
680
705
|
if (!parsed?.steps || !Array.isArray(parsed.steps) || parsed.steps.length === 0) {
|
package/dist/gateway/router.js
CHANGED
|
@@ -2608,7 +2608,15 @@ export class Gateway {
|
|
|
2608
2608
|
this.getSession(sessionKey).abortController = planAc;
|
|
2609
2609
|
const { PlanOrchestrator } = await import('../agent/orchestrator.js');
|
|
2610
2610
|
const orchestrator = new PlanOrchestrator(this.assistant);
|
|
2611
|
-
|
|
2611
|
+
// Make hired agents (Ross, Sasha, Nora, etc.) visible to the
|
|
2612
|
+
// planner so it can `delegateTo: <slug>` for steps that match
|
|
2613
|
+
// an agent's specialty. Without this the planner generates
|
|
2614
|
+
// generic steps even when a specialized agent is the right
|
|
2615
|
+
// choice. Empty list = solo Clementine, planner stays generic.
|
|
2616
|
+
const teamAgents = this.getAgentManager()
|
|
2617
|
+
.listAll()
|
|
2618
|
+
.filter(a => a.slug !== 'clementine');
|
|
2619
|
+
const result = await orchestrator.run(taskDescription, onProgress, onApproval, teamAgents.length > 0 ? teamAgents : undefined, planAc.signal);
|
|
2612
2620
|
scanner.refreshIntegrity();
|
|
2613
2621
|
this.assistant.injectContext(sessionKey, `[Plan: ${taskDescription}]`, result);
|
|
2614
2622
|
return result;
|