clementine-agent 1.18.57 → 1.18.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,7 @@ import { type RunAgentResult } from './run-agent.js';
25
25
  export interface CronPostTaskHooks {
26
26
  triggerCronReflection: (jobName: string, jobPrompt: string, deliverable: string, successCriteria?: string[]) => Promise<void>;
27
27
  triggerSkillExtractionFromExecution: (source: 'unleashed' | 'cron' | 'chat', jobName: string, prompt: string, output: string, durationMs: number, agentSlug?: string) => Promise<void>;
28
+ triggerMemoryExtractionPostExchange: (userMessage: string, assistantResponse: string, sessionKey?: string, profile?: AgentProfile) => Promise<void>;
28
29
  }
29
30
  export interface RunAgentCronOptions {
30
31
  /** Job name from CRON.md. Used for telemetry, progress lookup, skill match. */
@@ -319,11 +319,13 @@ export async function runAgentCron(opts) {
319
319
  logger.debug({ err, job: opts.jobName }, 'runAgentCron: transcript mirror failed (non-fatal)');
320
320
  }
321
321
  }
322
- // ── Post-task hooks: reflection + skill extraction ────────────────
323
- // Both fire-and-forget — never block the cron deliverable on these.
324
- // They are the same passes the legacy runCronJob fires; without them
325
- // the new path would lose the success-grading + procedural-memory
326
- // growth that makes Clementine self-improving.
322
+ // ── Post-task hooks: reflection + skill extraction + memory ──────
323
+ // All fire-and-forget — never block the cron deliverable on these.
324
+ // Reflection grades the run, skill extraction banks repeatable
325
+ // procedures, memory extraction distills facts the agent learned
326
+ // (e.g. "Mark Finizio is now the buyer at FamilyCenter") into the
327
+ // agent's MEMORY.md. The legacy runCronJob fired reflection +
328
+ // skill but never memory extraction; that gap is closed now.
327
329
  if (opts.postTaskHooks && deliverable && deliverable.trim() !== '__NOTHING__') {
328
330
  const durationMs = Date.now() - startedAt;
329
331
  opts.postTaskHooks
@@ -332,6 +334,9 @@ export async function runAgentCron(opts) {
332
334
  opts.postTaskHooks
333
335
  .triggerSkillExtractionFromExecution('cron', opts.jobName, opts.jobPrompt, deliverable, durationMs, agentSlug)
334
336
  .catch(err => logger.debug({ err, job: opts.jobName }, 'runAgentCron: skill extraction failed (non-fatal)'));
337
+ opts.postTaskHooks
338
+ .triggerMemoryExtractionPostExchange(opts.jobPrompt, deliverable, `cron:${opts.jobName}`, opts.profile ?? undefined)
339
+ .catch(err => logger.debug({ err, job: opts.jobName }, 'runAgentCron: memory extraction failed (non-fatal)'));
335
340
  }
336
341
  return {
337
342
  ...result,
@@ -81,17 +81,13 @@ export async function runAgentHeartbeat(opts) {
81
81
  allowedTools: [],
82
82
  abortSignal: opts.abortSignal,
83
83
  });
84
- // Mirror the heartbeat into transcripts so dedup + recall work.
85
- // Skip pure __NOTHING__ outputs since they carry no information.
86
- const text = result.text?.trim() ?? '';
87
- if (opts.memoryStore && text && text !== '__NOTHING__') {
88
- try {
89
- opts.memoryStore.saveTurn(sessionKey, 'heartbeat', text, opts.model ?? MODELS.haiku);
90
- }
91
- catch {
92
- /* non-fatal */
93
- }
94
- }
84
+ // Heartbeat output is NOT mirrored to transcripts. Heartbeats fire
85
+ // up to 28x/day per agent and most output is low-value (status
86
+ // pings, dedup'd reminders). The heartbeat dedup that prior versions
87
+ // wanted recall for actually lives in the prompt itself (the
88
+ // dedupContext block + the __NOTHING__ sentinel), not in DB queries.
89
+ // Saving rows here just polluted FTS and the dashboard memory panel
90
+ // for no recall benefit.
95
91
  return result;
96
92
  }
97
93
  //# sourceMappingURL=run-agent-heartbeat.js.map
@@ -2,6 +2,14 @@ import type { AgentProfile } from '../types.js';
2
2
  import type { AgentManager } from './agent-manager.js';
3
3
  import type { MemoryStore } from '../memory/store.js';
4
4
  import { type RunAgentResult } from './run-agent.js';
5
+ /** Minimal post-task hook interface. The PersonalAssistant implements
6
+ * this directly; passing it through keeps the wrapper decoupled from
7
+ * the full assistant graph. */
8
+ export interface TeamTaskPostHooks {
9
+ triggerMemoryExtractionPostExchange: (userMessage: string, assistantResponse: string, sessionKey?: string, profile?: AgentProfile) => Promise<void>;
10
+ triggerSkillExtractionFromExecution: (source: 'unleashed' | 'cron' | 'chat', jobName: string, prompt: string, output: string, durationMs: number, agentSlug?: string) => Promise<void>;
11
+ triggerCronReflection: (jobName: string, jobPrompt: string, deliverable: string, successCriteria?: string[]) => Promise<void>;
12
+ }
5
13
  export interface RunAgentTeamTaskOptions {
6
14
  fromName: string;
7
15
  fromSlug: string;
@@ -17,6 +25,9 @@ export interface RunAgentTeamTaskOptions {
17
25
  maxBudgetUsd?: number;
18
26
  /** Optional max-turns cap. Default: undefined (SDK runs until done, bounded by budget). */
19
27
  maxTurns?: number;
28
+ /** Post-task hooks (memory extraction). Pass the PersonalAssistant.
29
+ * Optional so the helper still works in tests. */
30
+ postTaskHooks?: TeamTaskPostHooks | null;
20
31
  }
21
32
  export interface RunAgentTeamTaskResult extends RunAgentResult {
22
33
  builtPrompt: string;
@@ -56,6 +56,7 @@ export async function runAgentTeamTask(opts) {
56
56
  promptChars: builtPrompt.length,
57
57
  }, 'runAgentTeamTask: dispatching to runAgent');
58
58
  const sessionKey = `team-task:${opts.fromSlug}->${opts.profile.slug}`;
59
+ const startedAt = Date.now();
59
60
  const result = await runAgent(builtPrompt, {
60
61
  sessionKey,
61
62
  source: 'team-task',
@@ -82,6 +83,24 @@ export async function runAgentTeamTask(opts) {
82
83
  /* non-fatal */
83
84
  }
84
85
  }
86
+ // Post-task hooks: memory + skill extraction + reflection. All
87
+ // fire-and-forget. Mirrors the cron wrapper's three-hook pattern.
88
+ // Team tasks often produce repeatable procedures (e.g. "draft a
89
+ // follow-up email after a discovery call") and reflection grades
90
+ // whether the response actually fulfilled the request.
91
+ if (opts.postTaskHooks && result.text?.trim()) {
92
+ const durationMs = Date.now() - startedAt;
93
+ opts.postTaskHooks
94
+ .triggerMemoryExtractionPostExchange(opts.content, result.text, sessionKey, opts.profile)
95
+ .catch(err => logger.debug({ err, fromSlug: opts.fromSlug, toSlug: opts.profile.slug }, 'runAgentTeamTask: memory extraction failed (non-fatal)'));
96
+ opts.postTaskHooks
97
+ .triggerSkillExtractionFromExecution('cron', // 'cron' covers autonomous-task skill source category
98
+ taskName, opts.content, result.text, durationMs, opts.profile.slug)
99
+ .catch(err => logger.debug({ err, fromSlug: opts.fromSlug, toSlug: opts.profile.slug }, 'runAgentTeamTask: skill extraction failed (non-fatal)'));
100
+ opts.postTaskHooks
101
+ .triggerCronReflection(taskName, opts.content, result.text)
102
+ .catch(err => logger.debug({ err, fromSlug: opts.fromSlug, toSlug: opts.profile.slug }, 'runAgentTeamTask: reflection failed (non-fatal)'));
103
+ }
85
104
  return {
86
105
  ...result,
87
106
  builtPrompt,
@@ -281,8 +281,48 @@ async function searchMemory(query, limit = 20, filters = {}) {
281
281
  WHERE ${where.join(' AND ')}
282
282
  ORDER BY ${orderBy}
283
283
  LIMIT ?`;
284
- const rows = db.prepare(sql).all(...params, limit);
285
- return { results: rows, dbExists: true };
284
+ const chunkRows = db.prepare(sql).all(...params, limit);
285
+ // Also surface transcripts from chat / cron / team-task. These
286
+ // are written by saveTurn and would otherwise be invisible to the
287
+ // main search panel (only the per-session viewer surfaced them).
288
+ // chunkType filter is chunk-only — if set, skip transcripts.
289
+ let transcriptRows = [];
290
+ if (words.length > 0 && !filters.chunkType && !filters.pinnedOnly) {
291
+ try {
292
+ const ftsQuery = words.map((w) => `"${w.replace(/"/g, '')}"`).join(' OR ');
293
+ const tWhere = ['transcripts_fts MATCH ?'];
294
+ const tParams = [ftsQuery];
295
+ if (filters.sinceDays && filters.sinceDays > 0) {
296
+ tWhere.push("t.created_at >= datetime('now', ?)");
297
+ tParams.push(`-${filters.sinceDays} days`);
298
+ }
299
+ const tSql = `SELECT t.id, t.session_key, t.role, t.content, t.model, t.created_at,
300
+ bm25(transcripts_fts) as score
301
+ FROM transcripts_fts f JOIN transcripts t ON t.id = f.rowid
302
+ WHERE ${tWhere.join(' AND ')}
303
+ ORDER BY bm25(transcripts_fts)
304
+ LIMIT ?`;
305
+ transcriptRows = db.prepare(tSql).all(...tParams, Math.min(limit, 10))
306
+ .map(r => ({
307
+ id: `transcript:${r.id}`,
308
+ source_file: `transcripts/${r.session_key}`,
309
+ section: `${r.role} @ ${r.created_at}`,
310
+ content: r.content,
311
+ chunk_type: 'transcript',
312
+ updated_at: r.created_at,
313
+ salience: 0,
314
+ pinned: 0,
315
+ score: r.score,
316
+ }));
317
+ }
318
+ catch { /* transcripts FTS may be empty/unavailable — non-fatal */ }
319
+ }
320
+ // Merge: transcripts interleaved by score with chunks. FTS bm25
321
+ // is comparable across both since they use the same tokenizer.
322
+ const merged = [...chunkRows, ...transcriptRows]
323
+ .sort((a, b) => Number(a.score ?? 0) - Number(b.score ?? 0))
324
+ .slice(0, limit);
325
+ return { results: merged, dbExists: true };
286
326
  }
287
327
  catch (err) {
288
328
  return { results: [], dbExists: true, error: String(err) };
@@ -16,8 +16,9 @@
16
16
  */
17
17
  import { createHash } from 'node:crypto';
18
18
  import Anthropic from '@anthropic-ai/sdk';
19
+ import { query } from '@anthropic-ai/claude-agent-sdk';
19
20
  import pino from 'pino';
20
- import { MODELS } from '../config.js';
21
+ import { MODELS, BASE_DIR, CLAUDE_CODE_OAUTH_TOKEN, ANTHROPIC_API_KEY } from '../config.js';
21
22
  import { fingerprintCommitment, parseRelativeDue, } from './commitments.js';
22
23
  const logger = pino({
23
24
  name: 'clementine.episodic-consolidation',
@@ -140,11 +141,65 @@ export function fingerprintLearnedFact(kind, text) {
140
141
  function getAnthropicClient(opts) {
141
142
  if (opts.anthropicClient)
142
143
  return opts.anthropicClient;
143
- const apiKey = process.env.ANTHROPIC_API_KEY;
144
+ const apiKey = process.env.ANTHROPIC_API_KEY ?? ANTHROPIC_API_KEY;
144
145
  if (!apiKey)
145
146
  return null;
146
147
  return new Anthropic({ apiKey });
147
148
  }
149
+ /**
150
+ * One-shot LLM call via the SDK's `query()`. OAuth-aware (uses
151
+ * CLAUDE_CODE_OAUTH_TOKEN when no API key is set), so works on
152
+ * installs that haven't configured ANTHROPIC_API_KEY. Returns the
153
+ * concatenated assistant text — empty string on failure.
154
+ *
155
+ * Used as a fallback when no Anthropic SDK client is available
156
+ * (i.e. the prior path returned null and the entire consolidation
157
+ * pass silently no-op'd).
158
+ */
159
+ async function runConsolidationViaSdk(systemPrompt, userPrompt, model) {
160
+ const env = {
161
+ PATH: process.env.PATH ?? '',
162
+ HOME: process.env.HOME ?? '',
163
+ CLEMENTINE_HOME: BASE_DIR,
164
+ };
165
+ const oauth = CLAUDE_CODE_OAUTH_TOKEN || process.env.CLAUDE_CODE_OAUTH_TOKEN;
166
+ const apiKey = ANTHROPIC_API_KEY || process.env.ANTHROPIC_API_KEY;
167
+ if (oauth)
168
+ env.CLAUDE_CODE_OAUTH_TOKEN = oauth;
169
+ else if (apiKey)
170
+ env.ANTHROPIC_API_KEY = apiKey;
171
+ let text = '';
172
+ try {
173
+ const stream = query({
174
+ prompt: userPrompt,
175
+ options: {
176
+ systemPrompt,
177
+ model,
178
+ permissionMode: 'bypassPermissions',
179
+ allowDangerouslySkipPermissions: true,
180
+ allowedTools: [],
181
+ cwd: BASE_DIR,
182
+ env,
183
+ maxTurns: 1,
184
+ maxBudgetUsd: 0.10,
185
+ },
186
+ });
187
+ for await (const message of stream) {
188
+ if (message.type === 'assistant') {
189
+ const blocks = (message.message?.content ?? []);
190
+ for (const block of blocks) {
191
+ if (block.type === 'text' && typeof block.text === 'string')
192
+ text += block.text;
193
+ }
194
+ }
195
+ }
196
+ }
197
+ catch (err) {
198
+ logger.warn({ err }, 'SDK consolidation call failed');
199
+ return '';
200
+ }
201
+ return text;
202
+ }
148
203
  /**
149
204
  * Consolidate a single candidate session range. Returns the new episode id
150
205
  * + chunk id on success, or null on failure (the caller bumps the failure
@@ -155,10 +210,10 @@ export async function consolidateOneSession(store, candidate, opts = {}) {
155
210
  if (turns.length === 0)
156
211
  return null;
157
212
  const client = getAnthropicClient(opts);
158
- if (!client) {
159
- logger.debug({ sessionKey: candidate.sessionKey }, 'No Anthropic client available — skipping consolidation');
160
- return null;
161
- }
213
+ // No client means no API key. We still try via the SDK's query()
214
+ // which uses OAuth when available — that's the canonical path for
215
+ // installs that haven't configured ANTHROPIC_API_KEY. Tests that
216
+ // pass an explicit anthropicClient will still hit the direct path.
162
217
  // Pull a small snapshot of existing learned facts so the LLM can
163
218
  // detect contradictions and emit supersedes hints. Best-effort —
164
219
  // empty list is fine for first-ever consolidation.
@@ -169,18 +224,28 @@ export async function consolidateOneSession(store, candidate, opts = {}) {
169
224
  }
170
225
  }
171
226
  catch { /* fact snapshot is best-effort */ }
227
+ const userPrompt = buildUserPrompt(turns.map(t => ({ role: t.role, content: t.content, createdAt: t.createdAt })), existingFactsForPrompt);
228
+ const model = opts.model ?? MODELS.haiku;
172
229
  let extraction = null;
173
230
  try {
174
- const response = await client.messages.create({
175
- model: opts.model ?? MODELS.haiku,
176
- max_tokens: 1500,
177
- system: SYSTEM_PROMPT,
178
- messages: [{
179
- role: 'user',
180
- content: buildUserPrompt(turns.map(t => ({ role: t.role, content: t.content, createdAt: t.createdAt })), existingFactsForPrompt),
181
- }],
182
- });
183
- const text = (response.content ?? []).map((b) => b.type === 'text' ? (b.text ?? '') : '').join('');
231
+ let text = '';
232
+ if (client) {
233
+ const response = await client.messages.create({
234
+ model,
235
+ max_tokens: 1500,
236
+ system: SYSTEM_PROMPT,
237
+ messages: [{ role: 'user', content: userPrompt }],
238
+ });
239
+ text = (response.content ?? []).map((b) => b.type === 'text' ? (b.text ?? '') : '').join('');
240
+ }
241
+ else {
242
+ // No API client — fall through to the SDK (OAuth-aware).
243
+ text = await runConsolidationViaSdk(SYSTEM_PROMPT, userPrompt, model);
244
+ }
245
+ if (!text) {
246
+ logger.debug({ sessionKey: candidate.sessionKey }, 'Empty consolidation response — skipping');
247
+ return null;
248
+ }
184
249
  extraction = parseEpisodeJson(text);
185
250
  }
186
251
  catch (err) {
@@ -42,6 +42,8 @@ export declare class HeartbeatScheduler {
42
42
  private runLog;
43
43
  private lastDenseBackfillAt;
44
44
  private denseBackfillInFlight;
45
+ private lastTranscriptDenseBackfillAt;
46
+ private transcriptDenseBackfillInFlight;
45
47
  private lastSalienceDecayDate;
46
48
  private lastMemoryPulseDate;
47
49
  private lastEpisodicConsolidationAt;
@@ -70,6 +72,14 @@ export declare class HeartbeatScheduler {
70
72
  * Coverage climbs over hours/days without user action.
71
73
  */
72
74
  private maybeIdleDenseBackfill;
75
+ /**
76
+ * Sibling of maybeIdleDenseBackfill that targets the transcripts table.
77
+ * Same gates (cooldown + chat-lane idle + dense model ready), separate
78
+ * in-flight + cadence so the two backfills don't starve each other.
79
+ * Without this, new chat/cron/heartbeat turns get FTS5-indexed but
80
+ * never embedded, and the dense leg of recall silently returns 0 hits.
81
+ */
82
+ private maybeIdleTranscriptDenseBackfill;
73
83
  /**
74
84
  * Episodic consolidation pass. Turns idle session transcript ranges into
75
85
  * durable episodes via a small Haiku call per session. Same shape as
@@ -52,6 +52,8 @@ export class HeartbeatScheduler {
52
52
  runLog = new CronRunLog();
53
53
  lastDenseBackfillAt = 0;
54
54
  denseBackfillInFlight = false;
55
+ lastTranscriptDenseBackfillAt = 0;
56
+ transcriptDenseBackfillInFlight = false;
55
57
  lastSalienceDecayDate = '';
56
58
  lastMemoryPulseDate = '';
57
59
  lastEpisodicConsolidationAt = 0;
@@ -155,6 +157,14 @@ export class HeartbeatScheduler {
155
157
  this.maybeIdleDenseBackfill().catch(err => {
156
158
  logger.debug({ err }, 'Idle dense backfill failed (non-fatal)');
157
159
  });
160
+ // Transcript dense backfill — separate cadence from chunks. Transcript
161
+ // turns from chat/cron/heartbeat/team-task accumulate continuously
162
+ // and need their own dense vectors so the recall block's dense leg
163
+ // returns hits for them too. Without this, hybrid recall silently
164
+ // degrades to lexical-only for transcripts.
165
+ this.maybeIdleTranscriptDenseBackfill().catch(err => {
166
+ logger.debug({ err }, 'Idle transcript dense backfill failed (non-fatal)');
167
+ });
158
168
  // Daily salience decay — fades stale, unaccessed chunks so retrieval
159
169
  // doesn't keep boosting facts that aren't earning their context budget.
160
170
  // Pinned + soft-deleted + superseded chunks are exempt. One UPDATE per
@@ -823,6 +833,45 @@ export class HeartbeatScheduler {
823
833
  this.denseBackfillInFlight = false;
824
834
  }
825
835
  }
836
+ /**
837
+ * Sibling of maybeIdleDenseBackfill that targets the transcripts table.
838
+ * Same gates (cooldown + chat-lane idle + dense model ready), separate
839
+ * in-flight + cadence so the two backfills don't starve each other.
840
+ * Without this, new chat/cron/heartbeat turns get FTS5-indexed but
841
+ * never embedded, and the dense leg of recall silently returns 0 hits.
842
+ */
843
+ async maybeIdleTranscriptDenseBackfill() {
844
+ if (this.transcriptDenseBackfillInFlight)
845
+ return;
846
+ const sinceLastMs = Date.now() - this.lastTranscriptDenseBackfillAt;
847
+ if (sinceLastMs < 10 * 60 * 1000)
848
+ return;
849
+ const { lanes } = await import('./lanes.js');
850
+ if (lanes.status().chat.active > 0)
851
+ return;
852
+ const store = this.gateway.getMemoryStore();
853
+ if (!store)
854
+ return;
855
+ const s = store;
856
+ if (typeof s.backfillTranscriptDenseEmbeddings !== 'function')
857
+ return;
858
+ const embeddings = await import('../memory/embeddings.js');
859
+ if (!embeddings.isDenseReady()) {
860
+ embeddings.probeDenseReady().catch(() => { });
861
+ return;
862
+ }
863
+ this.transcriptDenseBackfillInFlight = true;
864
+ this.lastTranscriptDenseBackfillAt = Date.now();
865
+ try {
866
+ const result = await s.backfillTranscriptDenseEmbeddings({ limit: 50 });
867
+ if (result.embedded > 0) {
868
+ logger.info({ embedded: result.embedded, failed: result.failed, model: result.model }, 'Idle transcript dense backfill batch complete');
869
+ }
870
+ }
871
+ finally {
872
+ this.transcriptDenseBackfillInFlight = false;
873
+ }
874
+ }
826
875
  /**
827
876
  * Episodic consolidation pass. Turns idle session transcript ranges into
828
877
  * durable episodes via a small Haiku call per session. Same shape as
@@ -2064,6 +2064,10 @@ export class Gateway {
2064
2064
  agentManager: this.getAgentManager(),
2065
2065
  memoryStore: this.assistant.getMemoryStore?.() ?? null,
2066
2066
  abortSignal: abortController?.signal,
2067
+ // Post-task auto-memory extraction so anything the recipient
2068
+ // learned during the task (new contact, preference, status)
2069
+ // distills into their agents/<slug>/MEMORY.md.
2070
+ postTaskHooks: this.assistant,
2067
2071
  });
2068
2072
  scanner.refreshIntegrity();
2069
2073
  logger.info({
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clementine-agent",
3
- "version": "1.18.57",
3
+ "version": "1.18.59",
4
4
  "description": "Clementine — Personal AI Assistant (TypeScript)",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",