clementine-agent 1.18.37 → 1.18.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -417,7 +417,17 @@ const SESSIONS_FILE = path.join(BASE_DIR, '.sessions.json');
417
417
  const MAX_SESSION_EXCHANGES = 40;
418
418
  const SESSION_EXPIRY_MS = 24 * 60 * 60 * 1000;
419
419
  const AUTO_MEMORY_MIN_LENGTH = 80;
420
- const AUTO_MEMORY_MODEL = MODELS.sonnet;
420
+ // Model used by the post-exchange memory extractor + the conversation
421
+ // summarizer. Both are routine "read this exchange, extract facts, call
422
+ // memory_write with structured JSON" tasks — Haiku handles them fine and
423
+ // they fire on EVERY substantive exchange, so the multiplier matters.
424
+ // Override with CLEMENTINE_AUTO_MEMORY_MODEL=sonnet if you observe
425
+ // extraction quality drop.
426
+ const AUTO_MEMORY_MODEL = process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('sonnet')
427
+ ? MODELS.sonnet
428
+ : process.env.CLEMENTINE_AUTO_MEMORY_MODEL?.includes('opus')
429
+ ? MODELS.opus
430
+ : MODELS.haiku;
421
431
  const OWNER = OWNER_NAME || 'the user';
422
432
  const MCP_SERVER_SCRIPT = path.join(PKG_DIR, 'dist', 'tools', 'mcp-server.js');
423
433
  const TOOLS_SERVER = `${ASSISTANT_NAME.toLowerCase()}-tools`;
@@ -4623,6 +4633,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
4623
4633
  const blocks = getContentBlocks(message);
4624
4634
  summaryText += extractText(blocks);
4625
4635
  }
4636
+ else if (message.type === 'result') {
4637
+ // Make session-summarization cost visible in usage_log. Without
4638
+ // this, every session rotation spawned a Sonnet summarize call
4639
+ // that didn't appear in any metric.
4640
+ this.logQueryResult(message, 'summarize', `summarize:${sessionKey}`);
4641
+ }
4626
4642
  }
4627
4643
  if (summaryText.trim()) {
4628
4644
  if (this.memoryStore) {
@@ -4993,6 +5009,13 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
4993
5009
  });
4994
5010
  const collectedText = [];
4995
5011
  for await (const message of stream) {
5012
+ if (message.type === 'result') {
5013
+ // Auto-memory extraction fires after every substantive
5014
+ // exchange. Before this log call, its cost was invisible in
5015
+ // usage_log — a per-user-message Sonnet pass running silently.
5016
+ this.logQueryResult(message, 'auto_memory', `auto-memory:${sessionKey ?? 'unknown'}`, undefined, profile?.slug);
5017
+ continue;
5018
+ }
4996
5019
  if (message.type === 'assistant') {
4997
5020
  const blocks = getContentBlocks(message);
4998
5021
  for (const block of blocks) {
@@ -5651,6 +5674,11 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
5651
5674
  const blocks = getContentBlocks(message);
5652
5675
  responseText += extractText(blocks);
5653
5676
  }
5677
+ else if (message.type === 'result') {
5678
+ // Cron reflection (post-task quality check) fires after every
5679
+ // cron run. Cheap (Haiku, 1 turn, ~1KB) but should be visible.
5680
+ this.logQueryResult(message, 'cron_reflection', `reflection:${jobName}`, jobName);
5681
+ }
5654
5682
  }
5655
5683
  if (responseText.trim()) {
5656
5684
  const reflection = JSON.parse(responseText.trim());
@@ -56,4 +56,23 @@ export declare function buildFanoutDirectiveForText(text: string): {
56
56
  directive: string;
57
57
  report: FanoutSignalReport;
58
58
  };
59
+ export interface PreLlmPlanDecision {
60
+ shouldRouteToPlanner: boolean;
61
+ reason: string;
62
+ signals: FanoutSignal[];
63
+ actionVerbs: string[];
64
+ }
65
+ export interface PreLlmPlanOptions {
66
+ /** Result of intent classifier — routing skips followup/chat regardless of content. */
67
+ intentType?: 'task' | 'followup' | 'chat' | 'lookup' | string;
68
+ /** Pre-LLM minimum length. Short queries can't be plan-worthy. */
69
+ minLength?: number;
70
+ /** Conservative AND-threshold: require ≥N fanout signals AND ≥1 action verb. */
71
+ minFanoutSignals?: number;
72
+ }
73
+ /**
74
+ * Decide whether the user's text should bypass the main agent and run
75
+ * directly through the planner orchestrator. Conservative by design.
76
+ */
77
+ export declare function detectPreLlmPlanIntent(text: string, opts?: PreLlmPlanOptions): PreLlmPlanDecision;
59
78
  //# sourceMappingURL=fanout-policy.d.ts.map
@@ -130,4 +130,92 @@ export function buildFanoutDirectiveForText(text) {
130
130
  report,
131
131
  };
132
132
  }
133
+ // ── Pre-LLM plan intent detection ─────────────────────────────────────
134
+ //
135
+ // detectFanoutSignals + the directive injection (above) are SOFT
136
+ // enforcement: we tell the agent "fan out for this." If the agent
137
+ // honors it, we win. If not, we still pay for a Sonnet turn that
138
+ // thrashes.
139
+ //
140
+ // Pre-LLM plan intent detection is HARD enforcement: when a user's
141
+ // query clearly maps to multi-step parallel work, route through the
142
+ // orchestrator BEFORE the main agent ever runs. The orchestrator
143
+ // decomposes into parallel Haiku/Sonnet sub-agents, each in its own
144
+ // context. The user's main agent never sees the big tool responses
145
+ // — it never gets a chance to thrash.
146
+ //
147
+ // Conservative gate: false positives waste a planner LLM call (~$0.05)
148
+ // + sub-agent calls. False negatives mean the existing soft-enforcement
149
+ // path runs, which is the status quo. So we tune for false positives.
150
+ const INFORMATIONAL_QUERY_PATTERN = /^\s*(what|tell\s+me|show\s+me|is\s|are\s|do\s+you|how\s+(does|is|do)|why\s|when\s|where\s|who\s|did\s|have\s+you|can\s+you\s+(see|tell|show|describe|explain)|describe|explain|summarize)\b/i;
151
+ const ACTION_VERB_PATTERNS = [
152
+ {
153
+ // "research my top 10 prospects", "draft each prospect", "process all leads"
154
+ pattern: /\b(research|analyze|process|review|draft|write|send|email|message|outreach)\s+(each|all|every|those|these|my|our|the\s+\w+|\d+|\w+\s+(of\s+)?(my|our|the)\s+\w+)/i,
155
+ reason: 'multi-target action verb (research/analyze/draft/etc. on a collection)',
156
+ },
157
+ {
158
+ // "for each prospect, draft a follow-up"
159
+ pattern: /\bfor\s+(each|every|all)\b.*\b(do|run|send|draft|process|email|call|review|analyze|build|create|fetch)/i,
160
+ reason: '"for each X, do Y" pattern',
161
+ },
162
+ {
163
+ // "build a comprehensive content intelligence brief" — allow up to 4
164
+ // words between the verb and the deliverable noun.
165
+ pattern: /\b(build|prepare|produce|run|generate)\s+(a\s+|an\s+)?(\w+\s+){0,4}(brief|report|summary|analysis|comparison|recap|breakdown|dashboard|deck|index|list)\b/i,
166
+ reason: 'compound deliverable (brief/report/analysis)',
167
+ },
168
+ {
169
+ pattern: /\b(go\s+through|walk\s+through|process)\s+(every|all|each|my|the)\s+\w+/i,
170
+ reason: '"go through everyone/everything" pattern',
171
+ },
172
+ ];
173
+ /**
174
+ * Decide whether the user's text should bypass the main agent and run
175
+ * directly through the planner orchestrator. Conservative by design.
176
+ */
177
+ export function detectPreLlmPlanIntent(text, opts = {}) {
178
+ const minLength = opts.minLength ?? 40;
179
+ const minFanoutSignals = opts.minFanoutSignals ?? 2;
180
+ const trimmed = (text ?? '').trim();
181
+ // Hard skips: intent says "not a task" → don't override.
182
+ if (opts.intentType === 'followup' || opts.intentType === 'chat') {
183
+ return { shouldRouteToPlanner: false, reason: `intent_is_${opts.intentType}`, signals: [], actionVerbs: [] };
184
+ }
185
+ if (trimmed.length < minLength) {
186
+ return { shouldRouteToPlanner: false, reason: 'too_short', signals: [], actionVerbs: [] };
187
+ }
188
+ // Information-seeking patterns: "what/tell me/show me/etc." Let the
189
+ // agent answer directly even if collective wording is present
190
+ // ("tell me about all my prospects" is a status request, not work).
191
+ if (INFORMATIONAL_QUERY_PATTERN.test(trimmed)) {
192
+ return { shouldRouteToPlanner: false, reason: 'informational_query', signals: [], actionVerbs: [] };
193
+ }
194
+ // Action-verb match: text must contain an explicit "do X for many" verb.
195
+ const matchedVerbs = [];
196
+ for (const { pattern, reason } of ACTION_VERB_PATTERNS) {
197
+ if (pattern.test(trimmed))
198
+ matchedVerbs.push(reason);
199
+ }
200
+ if (matchedVerbs.length === 0) {
201
+ return { shouldRouteToPlanner: false, reason: 'no_action_verb', signals: [], actionVerbs: [] };
202
+ }
203
+ // Fanout signals (existing detector — covers numeric counts,
204
+ // collective+quantifier patterns, "for each", comprehensive research, etc.).
205
+ const fanoutReport = detectFanoutSignals(trimmed);
206
+ if (fanoutReport.signals.length < minFanoutSignals) {
207
+ return {
208
+ shouldRouteToPlanner: false,
209
+ reason: `weak_fanout_signal_count_${fanoutReport.signals.length}_below_${minFanoutSignals}`,
210
+ signals: fanoutReport.signals,
211
+ actionVerbs: matchedVerbs,
212
+ };
213
+ }
214
+ return {
215
+ shouldRouteToPlanner: true,
216
+ reason: `fanout=${fanoutReport.signals.length}+verbs=${matchedVerbs.length}`,
217
+ signals: fanoutReport.signals,
218
+ actionVerbs: matchedVerbs,
219
+ };
220
+ }
133
221
  //# sourceMappingURL=fanout-policy.js.map
@@ -23,6 +23,7 @@ import { assessActionResponse, buildActionEnforcementPrompt, buildApprovalFollow
23
23
  import { updateClementineJson } from '../config/clementine-json.js';
24
24
  import { buildCronDiagnosticResponse } from './cron-diagnostic-turn.js';
25
25
  import { classifyIntent } from '../agent/intent-classifier.js';
26
+ import { detectPreLlmPlanIntent } from '../agent/fanout-policy.js';
26
27
  import { decideTurn } from '../agent/turn-policy.js';
27
28
  import { recordProactiveNotificationEvent, } from './notification-context.js';
28
29
  import { isInternalSyntheticPrompt, resolveRecentOperationalContext } from './recent-context.js';
@@ -2087,6 +2088,60 @@ export class Gateway {
2087
2088
  delete sessState.pendingInterrupt;
2088
2089
  }
2089
2090
  try {
2091
+ // ── Pre-LLM plan routing (Gap #3 from orchestration audit) ──
2092
+ // When the user's text clearly maps to multi-step parallel
2093
+ // work, route through the orchestrator BEFORE the main agent
2094
+ // runs. This is HARD enforcement — independent of whether
2095
+ // the agent self-detects via [PLAN_NEEDED:]. Saves a Sonnet
2096
+ // turn that would likely thrash, and the planner's parallel
2097
+ // sub-agents (Haiku-default) keep big tool responses out of
2098
+ // the user's main context.
2099
+ //
2100
+ // Conservative gate: requires explicit action verbs +
2101
+ // multiple fanout signals + non-informational intent. False
2102
+ // positives waste a planner LLM call (~$0.05); false
2103
+ // negatives let the existing soft-enforcement path run, which
2104
+ // is the status quo. Trusted personal sessions only — we
2105
+ // don't surprise random Discord users with auto-orchestration.
2106
+ if (this.isTrustedPersonalSession(sessionKey)
2107
+ && !sessState.pendingInterrupt /* don't override mid-thought continuations */) {
2108
+ const planIntentDecision = detectPreLlmPlanIntent(originalText, {
2109
+ intentType: classifyIntent(originalText)?.type,
2110
+ });
2111
+ if (planIntentDecision.shouldRouteToPlanner) {
2112
+ logger.info({
2113
+ sessionKey: effectiveSessionKey,
2114
+ reason: planIntentDecision.reason,
2115
+ signals: planIntentDecision.signals.map(s => s.pattern),
2116
+ actionVerbs: planIntentDecision.actionVerbs,
2117
+ originalTextPreview: originalText.slice(0, 200),
2118
+ }, 'Pre-LLM plan routing: bypassing main agent for orchestrator');
2119
+ if (wrappedOnText) {
2120
+ try {
2121
+ await wrappedOnText('Detected a multi-step task — decomposing into parallel sub-agents…\n\n');
2122
+ }
2123
+ catch { /* streaming is best-effort */ }
2124
+ }
2125
+ try {
2126
+ const planResult = await this.handlePlan(effectiveSessionKey, originalText, undefined, // chat path doesn't need structured progress callbacks
2127
+ undefined);
2128
+ clearTimeout(chatTimer);
2129
+ clearTimeout(hardWallTimer);
2130
+ logger.info({
2131
+ sessionKey: effectiveSessionKey,
2132
+ totalMs: Date.now() - tInnerStart,
2133
+ routedVia: 'pre_llm_planner',
2134
+ responseLen: planResult.length,
2135
+ }, 'chat:latency');
2136
+ return planResult;
2137
+ }
2138
+ catch (err) {
2139
+ logger.warn({ err, sessionKey: effectiveSessionKey }, 'Pre-LLM plan routing failed — falling back to direct agent');
2140
+ // Fall through to the regular agent path so the user
2141
+ // still gets a response.
2142
+ }
2143
+ }
2144
+ }
2090
2145
  // No artificial turn cap — let the agent work until done.
2091
2146
  // Primary guardrail is cost budget (maxBudgetUsd in buildOptions).
2092
2147
  // Wall clock (CHAT_MAX_WALL_MS) and StallGuard are safety nets.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.37",
3
+ "version": "1.18.39",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",