clementine-agent 1.18.38 → 1.18.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/fanout-policy.d.ts +33 -0
- package/dist/agent/fanout-policy.js +110 -0
- package/dist/gateway/router.js +64 -1
- package/package.json +1 -1
|
@@ -56,4 +56,37 @@ export declare function buildFanoutDirectiveForText(text: string): {
|
|
|
56
56
|
directive: string;
|
|
57
57
|
report: FanoutSignalReport;
|
|
58
58
|
};
|
|
59
|
+
export interface PreLlmPlanDecision {
|
|
60
|
+
shouldRouteToPlanner: boolean;
|
|
61
|
+
reason: string;
|
|
62
|
+
signals: FanoutSignal[];
|
|
63
|
+
actionVerbs: string[];
|
|
64
|
+
}
|
|
65
|
+
export interface PreLlmPlanOptions {
|
|
66
|
+
/** Result of intent classifier — routing skips followup/chat regardless of content. */
|
|
67
|
+
intentType?: 'task' | 'followup' | 'chat' | 'lookup' | string;
|
|
68
|
+
/** Pre-LLM minimum length. Short queries can't be plan-worthy. */
|
|
69
|
+
minLength?: number;
|
|
70
|
+
/** Conservative AND-threshold: require ≥N fanout signals AND ≥1 action verb. */
|
|
71
|
+
minFanoutSignals?: number;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Decide whether the user's text should bypass the main agent and run
|
|
75
|
+
* directly through the planner orchestrator.
|
|
76
|
+
*
|
|
77
|
+
* Threshold model: combined "evidence count" ≥ 2, where each FANOUT
|
|
78
|
+
* SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
|
|
79
|
+
* evidence. So "research my 100 leads and email each one" gets:
|
|
80
|
+
* - 1 fanout signal (numeric_collection: "100 leads")
|
|
81
|
+
* - 1 multi-target verb (research my 100 leads)
|
|
82
|
+
* = 2 → routes
|
|
83
|
+
*
|
|
84
|
+
* False-positive guards stay in front:
|
|
85
|
+
* - intent != followup/chat
|
|
86
|
+
* - text length ≥ 30 chars
|
|
87
|
+
* - NOT an informational query (what/tell me/show me/...)
|
|
88
|
+
* - at least ONE action verb (no orchestration for pure declarative
|
|
89
|
+
* statements even if they reference multiple items)
|
|
90
|
+
*/
|
|
91
|
+
export declare function detectPreLlmPlanIntent(text: string, opts?: PreLlmPlanOptions): PreLlmPlanDecision;
|
|
59
92
|
//# sourceMappingURL=fanout-policy.d.ts.map
|
|
@@ -130,4 +130,114 @@ export function buildFanoutDirectiveForText(text) {
|
|
|
130
130
|
report,
|
|
131
131
|
};
|
|
132
132
|
}
|
|
133
|
+
// ── Pre-LLM plan intent detection ─────────────────────────────────────
|
|
134
|
+
//
|
|
135
|
+
// detectFanoutSignals + the directive injection (above) are SOFT
|
|
136
|
+
// enforcement: we tell the agent "fan out for this." If the agent
|
|
137
|
+
// honors it, we win. If not, we still pay for a Sonnet turn that
|
|
138
|
+
// thrashes.
|
|
139
|
+
//
|
|
140
|
+
// Pre-LLM plan intent detection is HARD enforcement: when a user's
|
|
141
|
+
// query clearly maps to multi-step parallel work, route through the
|
|
142
|
+
// orchestrator BEFORE the main agent ever runs. The orchestrator
|
|
143
|
+
// decomposes into parallel Haiku/Sonnet sub-agents, each in its own
|
|
144
|
+
// context. The user's main agent never sees the big tool responses
|
|
145
|
+
// — it never gets a chance to thrash.
|
|
146
|
+
//
|
|
147
|
+
// Conservative gate: false positives waste a planner LLM call (~$0.05)
|
|
148
|
+
// + sub-agent calls. False negatives mean the existing soft-enforcement
|
|
149
|
+
// path runs, which is the status quo. So we tune for false positives.
|
|
150
|
+
const INFORMATIONAL_QUERY_PATTERN = /^\s*(what|tell\s+me|show\s+me|is\s|are\s|do\s+you|how\s+(does|is|do)|why\s|when\s|where\s|who\s|did\s|have\s+you|can\s+you\s+(see|tell|show|describe|explain)|describe|explain|summarize)\b/i;
|
|
151
|
+
const ACTION_VERB_PATTERNS = [
|
|
152
|
+
{
|
|
153
|
+
// "research my top 10 prospects", "draft each prospect", "process all leads"
|
|
154
|
+
pattern: /\b(research|analyze|process|review|draft|write|send|email|message|outreach)\s+(each|all|every|those|these|my|our|the\s+\w+|\d+|\w+\s+(of\s+)?(my|our|the)\s+\w+)/i,
|
|
155
|
+
reason: 'multi-target action verb (research/analyze/draft/etc. on a collection)',
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
// "for each prospect, draft a follow-up"
|
|
159
|
+
pattern: /\bfor\s+(each|every|all)\b.*\b(do|run|send|draft|process|email|call|review|analyze|build|create|fetch)/i,
|
|
160
|
+
reason: '"for each X, do Y" pattern',
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
// "build a comprehensive content intelligence brief" — allow up to 4
|
|
164
|
+
// words between the verb and the deliverable noun.
|
|
165
|
+
pattern: /\b(build|prepare|produce|run|generate)\s+(a\s+|an\s+)?(\w+\s+){0,4}(brief|report|summary|analysis|comparison|recap|breakdown|dashboard|deck|index|list)\b/i,
|
|
166
|
+
reason: 'compound deliverable (brief/report/analysis)',
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
pattern: /\b(go\s+through|walk\s+through|process)\s+(every|all|each|my|the)\s+\w+/i,
|
|
170
|
+
reason: '"go through everyone/everything" pattern',
|
|
171
|
+
},
|
|
172
|
+
];
|
|
173
|
+
/**
|
|
174
|
+
* Decide whether the user's text should bypass the main agent and run
|
|
175
|
+
* directly through the planner orchestrator.
|
|
176
|
+
*
|
|
177
|
+
* Threshold model: combined "evidence count" ≥ 2, where each FANOUT
|
|
178
|
+
* SIGNAL and each MULTI-TARGET ACTION VERB counts as one piece of
|
|
179
|
+
* evidence. So "research my 100 leads and email each one" gets:
|
|
180
|
+
* - 1 fanout signal (numeric_collection: "100 leads")
|
|
181
|
+
* - 1 multi-target verb (research my 100 leads)
|
|
182
|
+
* = 2 → routes
|
|
183
|
+
*
|
|
184
|
+
* False-positive guards stay in front:
|
|
185
|
+
* - intent != followup/chat
|
|
186
|
+
* - text length ≥ 30 chars
|
|
187
|
+
* - NOT an informational query (what/tell me/show me/...)
|
|
188
|
+
* - at least ONE action verb (no orchestration for pure declarative
|
|
189
|
+
* statements even if they reference multiple items)
|
|
190
|
+
*/
|
|
191
|
+
export function detectPreLlmPlanIntent(text, opts = {}) {
|
|
192
|
+
const minLength = opts.minLength ?? 30;
|
|
193
|
+
const minCombinedEvidence = opts.minFanoutSignals ?? 2;
|
|
194
|
+
const trimmed = (text ?? '').trim();
|
|
195
|
+
// Hard skips: intent says "not a task" → don't override.
|
|
196
|
+
if (opts.intentType === 'followup' || opts.intentType === 'chat') {
|
|
197
|
+
return { shouldRouteToPlanner: false, reason: `intent_is_${opts.intentType}`, signals: [], actionVerbs: [] };
|
|
198
|
+
}
|
|
199
|
+
if (trimmed.length < minLength) {
|
|
200
|
+
return { shouldRouteToPlanner: false, reason: 'too_short', signals: [], actionVerbs: [] };
|
|
201
|
+
}
|
|
202
|
+
// Information-seeking patterns: "what/tell me/show me/etc." Let the
|
|
203
|
+
// agent answer directly even if collective wording is present
|
|
204
|
+
// ("tell me about all my prospects" is a status request, not work).
|
|
205
|
+
if (INFORMATIONAL_QUERY_PATTERN.test(trimmed)) {
|
|
206
|
+
return { shouldRouteToPlanner: false, reason: 'informational_query', signals: [], actionVerbs: [] };
|
|
207
|
+
}
|
|
208
|
+
// Action-verb match: text must contain at least one explicit
|
|
209
|
+
// multi-target verb. This blocks pure declarative statements ("100
|
|
210
|
+
// prospects are in the pipeline" — referencing many items but no
|
|
211
|
+
// ask for work).
|
|
212
|
+
const matchedVerbs = [];
|
|
213
|
+
for (const { pattern, reason } of ACTION_VERB_PATTERNS) {
|
|
214
|
+
if (pattern.test(trimmed))
|
|
215
|
+
matchedVerbs.push(reason);
|
|
216
|
+
}
|
|
217
|
+
if (matchedVerbs.length === 0) {
|
|
218
|
+
return { shouldRouteToPlanner: false, reason: 'no_action_verb', signals: [], actionVerbs: [] };
|
|
219
|
+
}
|
|
220
|
+
// Combined evidence: fanout signals + verb matches. Each piece of
|
|
221
|
+
// evidence independently suggests multi-step work; together they
|
|
222
|
+
// strongly do. Threshold ≥ 2 means a query with one numeric collection
|
|
223
|
+
// ("100 leads") AND one multi-target verb ("research those") routes,
|
|
224
|
+
// but a query with just a verb and no collection ("research the
|
|
225
|
+
// prospect Mark") does not.
|
|
226
|
+
const fanoutReport = detectFanoutSignals(trimmed);
|
|
227
|
+
const combinedEvidence = fanoutReport.signals.length + matchedVerbs.length;
|
|
228
|
+
if (combinedEvidence < minCombinedEvidence) {
|
|
229
|
+
return {
|
|
230
|
+
shouldRouteToPlanner: false,
|
|
231
|
+
reason: `weak_evidence_${combinedEvidence}_below_${minCombinedEvidence}_(fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
|
|
232
|
+
signals: fanoutReport.signals,
|
|
233
|
+
actionVerbs: matchedVerbs,
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
return {
|
|
237
|
+
shouldRouteToPlanner: true,
|
|
238
|
+
reason: `evidence=${combinedEvidence} (fanout=${fanoutReport.signals.length},verbs=${matchedVerbs.length})`,
|
|
239
|
+
signals: fanoutReport.signals,
|
|
240
|
+
actionVerbs: matchedVerbs,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
133
243
|
//# sourceMappingURL=fanout-policy.js.map
|
package/dist/gateway/router.js
CHANGED
|
@@ -23,6 +23,7 @@ import { assessActionResponse, buildActionEnforcementPrompt, buildApprovalFollow
|
|
|
23
23
|
import { updateClementineJson } from '../config/clementine-json.js';
|
|
24
24
|
import { buildCronDiagnosticResponse } from './cron-diagnostic-turn.js';
|
|
25
25
|
import { classifyIntent } from '../agent/intent-classifier.js';
|
|
26
|
+
import { detectPreLlmPlanIntent } from '../agent/fanout-policy.js';
|
|
26
27
|
import { decideTurn } from '../agent/turn-policy.js';
|
|
27
28
|
import { recordProactiveNotificationEvent, } from './notification-context.js';
|
|
28
29
|
import { isInternalSyntheticPrompt, resolveRecentOperationalContext } from './recent-context.js';
|
|
@@ -2087,6 +2088,60 @@ export class Gateway {
|
|
|
2087
2088
|
delete sessState.pendingInterrupt;
|
|
2088
2089
|
}
|
|
2089
2090
|
try {
|
|
2091
|
+
// ── Pre-LLM plan routing (Gap #3 from orchestration audit) ──
|
|
2092
|
+
// When the user's text clearly maps to multi-step parallel
|
|
2093
|
+
// work, route through the orchestrator BEFORE the main agent
|
|
2094
|
+
// runs. This is HARD enforcement — independent of whether
|
|
2095
|
+
// the agent self-detects via [PLAN_NEEDED:]. Saves a Sonnet
|
|
2096
|
+
// turn that would likely thrash, and the planner's parallel
|
|
2097
|
+
// sub-agents (Haiku-default) keep big tool responses out of
|
|
2098
|
+
// the user's main context.
|
|
2099
|
+
//
|
|
2100
|
+
// Conservative gate: requires explicit action verbs +
|
|
2101
|
+
// multiple fanout signals + non-informational intent. False
|
|
2102
|
+
// positives waste a planner LLM call (~$0.05); false
|
|
2103
|
+
// negatives let the existing soft-enforcement path run, which
|
|
2104
|
+
// is the status quo. Trusted personal sessions only — we
|
|
2105
|
+
// don't surprise random Discord users with auto-orchestration.
|
|
2106
|
+
if (this.isTrustedPersonalSession(sessionKey)
|
|
2107
|
+
&& !sessState.pendingInterrupt /* don't override mid-thought continuations */) {
|
|
2108
|
+
const planIntentDecision = detectPreLlmPlanIntent(originalText, {
|
|
2109
|
+
intentType: classifyIntent(originalText)?.type,
|
|
2110
|
+
});
|
|
2111
|
+
if (planIntentDecision.shouldRouteToPlanner) {
|
|
2112
|
+
logger.info({
|
|
2113
|
+
sessionKey: effectiveSessionKey,
|
|
2114
|
+
reason: planIntentDecision.reason,
|
|
2115
|
+
signals: planIntentDecision.signals.map(s => s.pattern),
|
|
2116
|
+
actionVerbs: planIntentDecision.actionVerbs,
|
|
2117
|
+
originalTextPreview: originalText.slice(0, 200),
|
|
2118
|
+
}, 'Pre-LLM plan routing: bypassing main agent for orchestrator');
|
|
2119
|
+
if (wrappedOnText) {
|
|
2120
|
+
try {
|
|
2121
|
+
await wrappedOnText('Detected a multi-step task — decomposing into parallel sub-agents…\n\n');
|
|
2122
|
+
}
|
|
2123
|
+
catch { /* streaming is best-effort */ }
|
|
2124
|
+
}
|
|
2125
|
+
try {
|
|
2126
|
+
const planResult = await this.handlePlan(effectiveSessionKey, originalText, undefined, // chat path doesn't need structured progress callbacks
|
|
2127
|
+
undefined);
|
|
2128
|
+
clearTimeout(chatTimer);
|
|
2129
|
+
clearTimeout(hardWallTimer);
|
|
2130
|
+
logger.info({
|
|
2131
|
+
sessionKey: effectiveSessionKey,
|
|
2132
|
+
totalMs: Date.now() - tInnerStart,
|
|
2133
|
+
routedVia: 'pre_llm_planner',
|
|
2134
|
+
responseLen: planResult.length,
|
|
2135
|
+
}, 'chat:latency');
|
|
2136
|
+
return planResult;
|
|
2137
|
+
}
|
|
2138
|
+
catch (err) {
|
|
2139
|
+
logger.warn({ err, sessionKey: effectiveSessionKey }, 'Pre-LLM plan routing failed — falling back to direct agent');
|
|
2140
|
+
// Fall through to the regular agent path so the user
|
|
2141
|
+
// still gets a response.
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2090
2145
|
// No artificial turn cap — let the agent work until done.
|
|
2091
2146
|
// Primary guardrail is cost budget (maxBudgetUsd in buildOptions).
|
|
2092
2147
|
// Wall clock (CHAT_MAX_WALL_MS) and StallGuard are safety nets.
|
|
@@ -2553,7 +2608,15 @@ export class Gateway {
|
|
|
2553
2608
|
this.getSession(sessionKey).abortController = planAc;
|
|
2554
2609
|
const { PlanOrchestrator } = await import('../agent/orchestrator.js');
|
|
2555
2610
|
const orchestrator = new PlanOrchestrator(this.assistant);
|
|
2556
|
-
|
|
2611
|
+
// Make hired agents (Ross, Sasha, Nora, etc.) visible to the
|
|
2612
|
+
// planner so it can `delegateTo: <slug>` for steps that match
|
|
2613
|
+
// an agent's specialty. Without this the planner generates
|
|
2614
|
+
// generic steps even when a specialized agent is the right
|
|
2615
|
+
// choice. Empty list = solo Clementine, planner stays generic.
|
|
2616
|
+
const teamAgents = this.getAgentManager()
|
|
2617
|
+
.listAll()
|
|
2618
|
+
.filter(a => a.slug !== 'clementine');
|
|
2619
|
+
const result = await orchestrator.run(taskDescription, onProgress, onApproval, teamAgents.length > 0 ? teamAgents : undefined, planAc.signal);
|
|
2557
2620
|
scanner.refreshIntegrity();
|
|
2558
2621
|
this.assistant.injectContext(sessionKey, `[Plan: ${taskDescription}]`, result);
|
|
2559
2622
|
return result;
|