clementine-agent 1.0.31 → 1.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,13 @@
13
13
  */
14
14
  export interface ComplexityVerdict {
15
15
  complex: boolean;
16
+ /**
17
+ * High-confidence subset of `complex`. When true, the task is ambitious
18
+ * enough that the gateway should route it straight to deep/background
19
+ * execution instead of running a main-agent turn that would almost
20
+ * certainly get auto-escalated after burning tool calls.
21
+ */
22
+ deepWorthy: boolean;
16
23
  reason: string;
17
24
  signals: string[];
18
25
  }
@@ -11,6 +11,19 @@
11
11
  * what "plan" means — but much more consistent than a generic
12
12
  * SOUL.md directive that the model ignores half the time.
13
13
  */
14
+ /**
15
+ * Explicit phrasings that essentially request a long-running background job.
16
+ * Triggers deepWorthy on their own, regardless of other signals.
17
+ */
18
+ const DEEP_MODE_ASKS = [
19
+ /\b(deeply|extensively|thoroughly)\s+(research|analy[sz]e|investigate|audit|review)\b/i,
20
+ /\bcomprehensive(ly)?\s+(research|analy[sz]is|report|audit)\b/i,
21
+ /\bgo\s+(do|handle|tackle)\s+this\b/i,
22
+ /\brun\s+in\s+the\s+background\b/i,
23
+ /\bdeep\s+(mode|dive|work)\b/i,
24
+ /\bbackground\s+(task|work|job)\b/i,
25
+ /\btake\s+your\s+time\b/i,
26
+ ];
14
27
  /**
15
28
  * Action verbs that signal the user is asking Clementine to DO things
16
29
  * (as opposed to asking questions or making small talk). Multiple
@@ -82,18 +95,24 @@ function countEntities(text) {
82
95
  */
83
96
  export function classifyComplexity(text) {
84
97
  if (!text || typeof text !== 'string')
85
- return { complex: false, reason: 'empty', signals: [] };
98
+ return { complex: false, deepWorthy: false, reason: 'empty', signals: [] };
86
99
  const trimmed = text.trim();
87
100
  // Skip commands and very short messages
88
101
  if (trimmed.length < 30)
89
- return { complex: false, reason: 'too short', signals: [] };
102
+ return { complex: false, deepWorthy: false, reason: 'too short', signals: [] };
90
103
  if (trimmed.startsWith('!') || trimmed.startsWith('/'))
91
- return { complex: false, reason: 'command', signals: [] };
104
+ return { complex: false, deepWorthy: false, reason: 'command', signals: [] };
105
+ // Signal 0: explicit deep-mode ask — short-circuits both gates.
106
+ for (const re of DEEP_MODE_ASKS) {
107
+ if (re.test(trimmed)) {
108
+ return { complex: true, deepWorthy: true, reason: 'explicit deep-mode ask', signals: ['deep-mode-ask'] };
109
+ }
110
+ }
92
111
  const signals = [];
93
112
  // Signal 1: explicit ask for plan-first
94
113
  for (const re of EXPLICIT_PLAN_ASKS) {
95
114
  if (re.test(trimmed)) {
96
- return { complex: true, reason: 'user explicitly asked for a plan', signals: ['explicit-plan-ask'] };
115
+ return { complex: true, deepWorthy: false, reason: 'user explicitly asked for a plan', signals: ['explicit-plan-ask'] };
97
116
  }
98
117
  }
99
118
  // Signal 2: multiple action verbs
@@ -101,9 +120,11 @@ export function classifyComplexity(text) {
101
120
  if (verbs >= 3)
102
121
  signals.push(`${verbs} action verbs`);
103
122
  // Signal 3: chain markers
123
+ let hasChain = false;
104
124
  for (const re of CHAIN_MARKERS) {
105
125
  if (re.test(trimmed)) {
106
126
  signals.push('chain marker');
127
+ hasChain = true;
107
128
  break;
108
129
  }
109
130
  }
@@ -112,21 +133,31 @@ export function classifyComplexity(text) {
112
133
  if (entities >= 3)
113
134
  signals.push(`${entities} entities`);
114
135
  // Signal 5: long message with at least one action verb (big scope, not just a question)
115
- if (trimmed.length > 400 && verbs >= 1)
136
+ const isLong = trimmed.length > 400 && verbs >= 1;
137
+ if (isLong)
116
138
  signals.push('long + action');
117
139
  // Gate: at least 2 signals fire, OR a single high-confidence signal
118
140
  // (chain markers, explicit-plan-ask, or 3+ action verbs).
119
- const highConfidenceSingles = [
141
+ const highConfidenceSingles = [verbs >= 3, hasChain];
142
+ const complex = highConfidenceSingles.some(Boolean) || signals.length >= 2;
143
+ // deepWorthy raises the bar: multiple strong signals AND sustained scope.
144
+ // Specifically, any TWO of {3+ verbs, chain marker, long+action, 3+ entities}.
145
+ const strongCount = [
120
146
  verbs >= 3,
121
- signals.includes('chain marker'),
122
- ];
123
- if (highConfidenceSingles.some(Boolean)) {
124
- return { complex: true, reason: 'strong single signal', signals };
125
- }
126
- if (signals.length >= 2) {
127
- return { complex: true, reason: 'multiple signals', signals };
147
+ hasChain,
148
+ isLong,
149
+ entities >= 3,
150
+ ].filter(Boolean).length;
151
+ const deepWorthy = strongCount >= 2;
152
+ if (complex) {
153
+ return {
154
+ complex: true,
155
+ deepWorthy,
156
+ reason: deepWorthy ? 'deep-worthy: multiple strong signals' : (highConfidenceSingles.some(Boolean) ? 'strong single signal' : 'multiple signals'),
157
+ signals,
158
+ };
128
159
  }
129
- return { complex: false, reason: 'below threshold', signals };
160
+ return { complex: false, deepWorthy: false, reason: 'below threshold', signals };
130
161
  }
131
162
  /**
132
163
  * Build a system-prompt directive to inject when a complex message is
@@ -17,6 +17,7 @@ import path from 'node:path';
17
17
  import matter from 'gray-matter';
18
18
  import pino from 'pino';
19
19
  import { VAULT_DIR, AGENTS_DIR, PENDING_SKILLS_DIR } from '../config.js';
20
+ import { embed as embedText, cosineSimilarity, isReady as embeddingsReady } from '../memory/embeddings.js';
20
21
  const logger = pino({ name: 'clementine.skills' });
21
22
  const GLOBAL_SKILLS_DIR = path.join(VAULT_DIR, '00-System', 'skills');
22
23
  function agentSkillsDir(agentSlug) {
@@ -316,6 +317,25 @@ async function mergeSkill(assistant, existing, incoming) {
316
317
  return null;
317
318
  }
318
319
  }
320
+ /**
321
+ * Cache of skill embeddings so we don't re-embed every skill's frontmatter
322
+ * on every query. Keyed by the absolute path of the skill file; invalidated
323
+ * implicitly (the cache stays in memory for the daemon's lifetime — skill
324
+ * edits require a restart, same as the rest of the skill pipeline).
325
+ */
326
+ const skillEmbeddingCache = new Map();
327
+ function getSkillEmbedding(filePath, triggers, title, description) {
328
+ const cached = skillEmbeddingCache.get(filePath);
329
+ if (cached)
330
+ return cached;
331
+ const corpus = [title, description, triggers.join(' ')].filter(Boolean).join(' ');
332
+ if (!corpus)
333
+ return null;
334
+ const vec = embedText(corpus);
335
+ if (vec)
336
+ skillEmbeddingCache.set(filePath, vec);
337
+ return vec;
338
+ }
319
339
  export function searchSkills(query, limit = 3, agentSlug, opts) {
320
340
  const dirs = [];
321
341
  // Agent-scoped skills get priority (boost=2)
@@ -333,6 +353,11 @@ export function searchSkills(query, limit = 3, agentSlug, opts) {
333
353
  const results = [];
334
354
  const seen = new Set();
335
355
  const suppressed = opts?.suppressedNames;
356
+ // Semantic matching is optional — only engages if the vault has built an
357
+ // embedding vocabulary (MemoryStore.buildEmbeddings). Falls back to pure
358
+ // keyword scoring for fresh installs.
359
+ const useSemantic = embeddingsReady();
360
+ const queryVec = useSemantic ? embedText(query) : null;
336
361
  for (const { dir, boost } of dirs) {
337
362
  const files = readdirSync(dir).filter(f => f.endsWith('.md'));
338
363
  for (const file of files) {
@@ -344,8 +369,9 @@ export function searchSkills(query, limit = 3, agentSlug, opts) {
344
369
  // negative user feedback (see store.getSkillsToSuppress).
345
370
  if (suppressed?.has(name))
346
371
  continue;
372
+ const filePath = path.join(dir, file);
347
373
  try {
348
- const raw = readFileSync(path.join(dir, file), 'utf-8');
374
+ const raw = readFileSync(filePath, 'utf-8');
349
375
  const parsed = matter(raw);
350
376
  const triggers = parsed.data.triggers ?? [];
351
377
  const title = parsed.data.title ?? '';
@@ -368,12 +394,27 @@ export function searchSkills(query, limit = 3, agentSlug, opts) {
368
394
  if (description.toLowerCase().includes(word))
369
395
  score += 1;
370
396
  }
371
- if (score > 0) {
397
+ // Semantic bonus: add cosine similarity × 4 so a strong semantic
398
+ // match (cos ~ 0.7+) contributes like a single keyword hit, and
399
+ // very close matches (cos ~ 0.9+) surface as a solid lead even
400
+ // when the user's phrasing doesn't share vocabulary with the
401
+ // skill's triggers. Keyword hits still dominate when present.
402
+ let semanticScore = 0;
403
+ if (queryVec) {
404
+ const skillVec = getSkillEmbedding(filePath, triggerLower, title, description);
405
+ if (skillVec) {
406
+ const cos = cosineSimilarity(queryVec, skillVec);
407
+ if (cos > 0.3)
408
+ semanticScore = cos * 4;
409
+ }
410
+ }
411
+ const totalScore = score + semanticScore;
412
+ if (totalScore > 0) {
372
413
  results.push({
373
414
  name,
374
415
  title,
375
416
  content: parsed.content.slice(0, 1500),
376
- score: score + boost,
417
+ score: totalScore + boost,
377
418
  toolsUsed: parsed.data.toolsUsed ?? [],
378
419
  attachments: parsed.data.attachments ?? [],
379
420
  skillDir: dir,
@@ -872,10 +872,43 @@ export class Gateway {
872
872
  const isInteractive = isOwnerDm
873
873
  || sessionKey.startsWith('dashboard:')
874
874
  || sessionKey.startsWith('cli:');
875
- if (isInteractive && !isInternalMsg && !text.startsWith('!')) {
875
+ if (isInteractive && !isInternalMsg && !text.startsWith('!') && !sess?.deepTask) {
876
876
  try {
877
877
  const { classifyComplexity, planFirstDirective } = await import('../agent/complexity-classifier.js');
878
878
  const verdict = classifyComplexity(text);
879
+ // deepWorthy: skip the main-agent turn entirely and route
880
+ // straight to background execution. Saves the turn that would
881
+ // almost certainly get auto-escalated after burning 3+ tool
882
+ // calls (see the post-flight auto-escalation path below).
883
+ if (verdict.deepWorthy) {
884
+ logger.info({ sessionKey, signals: verdict.signals, reason: verdict.reason }, 'Pre-flight deep-mode gate fired — spawning background task');
885
+ const currentSess = this.getSession(sessionKey);
886
+ const jobName = `deep-${Date.now()}`;
887
+ currentSess.deepTask = { jobName, taskDesc: text.slice(0, 200), startedAt: new Date().toISOString() };
888
+ const preflightAgentSlug = this._agentSlugFromSessionKey(sessionKey);
889
+ this.assistant.runUnleashedTask(jobName, `The user asked: ${text}\n\nThis was routed straight to background execution because it looks like sustained multi-step work. Complete the task thoroughly and return a conversational summary.`, 2, // tier 2 (Bash/Write/Edit enabled)
890
+ undefined, // default maxTurns
891
+ undefined, // default model
892
+ undefined, // default work_dir
893
+ 1, // maxHours
894
+ preflightAgentSlug).then(async (result) => {
895
+ logger.info({ sessionKey, jobName, resultLen: result?.length ?? 0 }, 'Pre-flight deep-mode task completed');
896
+ if (result && result !== '__NOTHING__') {
897
+ this.assistant.injectPendingContext(sessionKey, text, result);
898
+ await this._deliverDeepResult(sessionKey, `[DEEP_MODE_RESULT] You just completed background work for this user request. Summarize conversationally — lead with what matters.\n\nTask: ${text.slice(0, 500)}\n\nResult:\n${result.slice(0, 3000)}`, result);
899
+ }
900
+ }).catch(async (err) => {
901
+ logger.error({ err, sessionKey, jobName }, 'Pre-flight deep-mode task failed');
902
+ const failMsg = `Background work failed: ${String(err).slice(0, 200)}`;
903
+ this.assistant.injectPendingContext(sessionKey, text, failMsg);
904
+ await this._deliverDeepResult(sessionKey, `[DEEP_MODE_RESULT] The background task failed: ${failMsg}. Let the user know and suggest next steps. Be brief.`, `Background task failed: ${failMsg}`);
905
+ }).finally(() => {
906
+ const s = this.sessions.get(sessionKey);
907
+ if (s?.deepTask?.jobName === jobName)
908
+ delete s.deepTask;
909
+ });
910
+ return `On it — this looks like real work. Running it in the background; I'll follow up when it's done. Reply "cancel" to stop or "status" to check in.`;
911
+ }
879
912
  if (verdict.complex) {
880
913
  logger.info({ sessionKey, signals: verdict.signals, reason: verdict.reason }, 'Pre-flight planning directive injected');
881
914
  enrichedText = `${planFirstDirective()}\n\n---\n\n${text}`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.0.31",
3
+ "version": "1.0.32",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",