clementine-agent 1.18.20 → 1.18.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/README.md +17 -0
  2. package/dist/agent/action-enforcer.d.ts +29 -0
  3. package/dist/agent/action-enforcer.js +120 -0
  4. package/dist/agent/assistant.d.ts +12 -0
  5. package/dist/agent/assistant.js +165 -31
  6. package/dist/agent/auto-update.js +46 -2
  7. package/dist/agent/local-turn.d.ts +16 -0
  8. package/dist/agent/local-turn.js +54 -1
  9. package/dist/agent/route-classifier.d.ts +1 -0
  10. package/dist/agent/route-classifier.js +30 -3
  11. package/dist/agent/toolsets.d.ts +14 -0
  12. package/dist/agent/toolsets.js +68 -0
  13. package/dist/brain/ingestion-pipeline.d.ts +7 -0
  14. package/dist/brain/ingestion-pipeline.js +107 -21
  15. package/dist/channels/discord.js +38 -7
  16. package/dist/channels/telegram.js +5 -6
  17. package/dist/cli/dashboard.js +56 -6
  18. package/dist/cli/index.js +174 -0
  19. package/dist/cli/ingest.js +8 -2
  20. package/dist/gateway/context-hygiene.d.ts +17 -0
  21. package/dist/gateway/context-hygiene.js +31 -0
  22. package/dist/gateway/heartbeat-scheduler.d.ts +20 -0
  23. package/dist/gateway/heartbeat-scheduler.js +27 -10
  24. package/dist/gateway/router.d.ts +7 -0
  25. package/dist/gateway/router.js +303 -9
  26. package/dist/gateway/turn-ledger.d.ts +32 -0
  27. package/dist/gateway/turn-ledger.js +55 -0
  28. package/dist/memory/embeddings.d.ts +2 -0
  29. package/dist/memory/embeddings.js +8 -1
  30. package/dist/memory/store.d.ts +88 -1
  31. package/dist/memory/store.js +349 -18
  32. package/dist/memory/write-queue.d.ts +16 -0
  33. package/dist/memory/write-queue.js +5 -0
  34. package/dist/tools/shared.d.ts +89 -0
  35. package/dist/types.d.ts +11 -0
  36. package/package.json +1 -1
  37. package/scripts/postinstall.js +56 -6
@@ -5,14 +5,41 @@
5
5
  * Source modifications from self-improve are tracked in ~/.clementine/ (not git),
6
6
  * so git pull is always clean. After pulling, source mods are reconciled.
7
7
  */
8
- import { execSync } from 'node:child_process';
9
- import { writeFileSync } from 'node:fs';
8
+ import { execFileSync, execSync } from 'node:child_process';
9
+ import { existsSync, readFileSync, writeFileSync } from 'node:fs';
10
10
  import path from 'node:path';
11
11
  import pino from 'pino';
12
12
  import { BASE_DIR } from '../config.js';
13
13
  import { reconcileSourceMods } from './source-mods.js';
14
14
  const logger = pino({ name: 'clementine.auto-update' });
15
15
  const SENTINEL_PATH = path.join(BASE_DIR, '.restart-sentinel.json');
16
+ function readDataEnv() {
17
+ const envPath = path.join(BASE_DIR, '.env');
18
+ if (!existsSync(envPath))
19
+ return {};
20
+ try {
21
+ return Object.fromEntries(readFileSync(envPath, 'utf-8')
22
+ .split(/\r?\n/)
23
+ .map((line) => line.trim())
24
+ .filter((line) => line && !line.startsWith('#') && line.includes('='))
25
+ .map((line) => {
26
+ const idx = line.indexOf('=');
27
+ return [line.slice(0, idx).trim(), line.slice(idx + 1).trim().replace(/^["']|["']$/g, '')];
28
+ }));
29
+ }
30
+ catch {
31
+ return {};
32
+ }
33
+ }
34
+ function flagEnabled(name, envFile) {
35
+ const raw = process.env[name] ?? envFile[name];
36
+ return /^(1|true|yes|on)$/i.test(String(raw ?? ''));
37
+ }
38
+ function shouldPrefetchEmbeddings() {
39
+ const envFile = readDataEnv();
40
+ return flagEnabled('CLEMENTINE_INSTALL_EMBEDDINGS', envFile)
41
+ || flagEnabled('CLEMENTINE_PREFETCH_EMBEDDINGS', envFile);
42
+ }
16
43
  /**
17
44
  * Check if upstream has new commits. Safe to call from cron — no side effects.
18
45
  */
@@ -121,6 +148,23 @@ export async function applyUpdate(pkgDir) {
121
148
  logger.error({ err }, 'Build failed after update');
122
149
  return { success: false, error: `Build failed after update: ${String(err)}` };
123
150
  }
151
+ // 4b. Optional embedding model prefetch. npm postinstall may run before
152
+ // the freshly pulled TypeScript has been built; this second pass uses the
153
+ // just-built CLI so repo updates and npm-style updates behave the same.
154
+ if (shouldPrefetchEmbeddings()) {
155
+ try {
156
+ execFileSync(process.execPath, [path.join(pkgDir, 'dist', 'cli', 'index.js'), 'memory', 'model', 'install'], {
157
+ cwd: pkgDir,
158
+ stdio: 'pipe',
159
+ env: { ...process.env, CLEMENTINE_HOME: BASE_DIR },
160
+ timeout: 10 * 60_000,
161
+ });
162
+ logger.info('Local embedding model prefetch succeeded after update');
163
+ }
164
+ catch (err) {
165
+ logger.warn({ err }, 'Local embedding model prefetch failed after update');
166
+ }
167
+ }
124
168
  // 5. Reconcile source modifications
125
169
  const reconcileResult = reconcileSourceMods(pkgDir);
126
170
  logger.info({
@@ -1,4 +1,5 @@
1
1
  import type { ClementineJson } from '../config/clementine-json.js';
2
+ import { type ToolsetName } from './toolsets.js';
2
3
  export type ProactivityMode = 'quiet' | 'balanced' | 'proactive' | 'operator';
3
4
  export type ResponseStyle = 'concise' | 'balanced' | 'detailed';
4
5
  export type ProgressVisibility = 'quiet' | 'normal' | 'detailed';
@@ -19,14 +20,29 @@ export type LocalTurnIntent = {
19
20
  kind: 'stop';
20
21
  } | {
21
22
  kind: 'status';
23
+ } | {
24
+ kind: 'last_action';
25
+ } | {
26
+ kind: 'compress_context';
27
+ } | {
28
+ kind: 'debug_status';
29
+ } | {
30
+ kind: 'toolset';
31
+ toolset: ToolsetName;
22
32
  } | {
23
33
  kind: 'preference_update';
24
34
  updates: AssistantExperienceUpdate;
25
35
  summary: string;
26
36
  };
37
+ export type ApprovalReply = true | false | 'always' | null;
27
38
  export declare function isStopRequest(text: string): boolean;
28
39
  export declare function isStatusRequest(text: string): boolean;
40
+ export declare function isLastActionRequest(text: string): boolean;
41
+ export declare function isCompressContextRequest(text: string): boolean;
42
+ export declare function isDebugStatusRequest(text: string): boolean;
29
43
  export declare function isTinyAcknowledgment(text: string): boolean;
44
+ export declare function detectApprovalReply(text: string): ApprovalReply;
45
+ export declare function looksLikeApprovalPrompt(text: string): boolean;
30
46
  export declare function detectLocalTurn(text: string): LocalTurnIntent;
31
47
  export declare function applyAssistantExperienceUpdate(cfg: ClementineJson, updates: AssistantExperienceUpdate): ClementineJson;
32
48
  //# sourceMappingURL=local-turn.d.ts.map
@@ -1,8 +1,10 @@
1
1
  import { isStandaloneGreeting } from './turn-policy.js';
2
+ import { normalizeToolsetName } from './toolsets.js';
2
3
  function normalize(text) {
3
4
  return text
4
5
  .trim()
5
6
  .toLowerCase()
7
+ .replace(/[‘’`]/g, "'")
6
8
  .replace(/[.!?]+$/g, '')
7
9
  .replace(/\s+/g, ' ');
8
10
  }
@@ -20,7 +22,31 @@ export function isStatusRequest(text) {
20
22
  const n = normalize(text);
21
23
  if (wordCount(n) > 8)
22
24
  return false;
23
- return /^(status|task status|deep status|progress|what'?s happening|what'?s going on|what are you doing|are you working|anything running|what'?s running|background status|check status|where are we)$/.test(n);
25
+ return /^(status|task status|deep status|progress|what'?s happening|what'?s going on|what are you doing|what are you working on|what are you running|are you working|anything running|what'?s runnin?g?(?: now| right now)?|what is runnin?g?(?: now| right now)?|background status|check status|where are we)$/.test(n);
26
+ }
27
+ export function isLastActionRequest(text) {
28
+ const n = normalize(text);
29
+ if (wordCount(n) > 10)
30
+ return false;
31
+ return /^(last action|last turn|what happened last turn|what did you do|did you do it|did that actually run|did you actually do it|why didn'?t you do it|why did that not run|what happened)$/.test(n);
32
+ }
33
+ export function isCompressContextRequest(text) {
34
+ const n = normalize(text);
35
+ if (wordCount(n) > 8)
36
+ return false;
37
+ return /^(compress context|compact context|compress session|compact session|context compact|context compress|save and reset context|reset context but keep memory)$/.test(n);
38
+ }
39
+ export function isDebugStatusRequest(text) {
40
+ const n = normalize(text);
41
+ if (wordCount(n) > 6)
42
+ return false;
43
+ return /^(debug|debug status|session debug|agent debug|diagnostics|show diagnostics)$/.test(n);
44
+ }
45
+ function parseToolsetRequest(text) {
46
+ const n = normalize(text);
47
+ const match = n.match(/^(?:set |switch |use |enable )?(?:toolset|tool set|tools mode|tool mode)(?: to|:)? ([a-z _-]+)$/)
48
+ ?? n.match(/^toolset ([a-z _-]+)$/);
49
+ return match ? normalizeToolsetName(match[1]) : null;
24
50
  }
25
51
  export function isTinyAcknowledgment(text) {
26
52
  const n = normalize(text);
@@ -28,6 +54,24 @@ export function isTinyAcknowledgment(text) {
28
54
  return false;
29
55
  return /^(thanks|thank you|thx|ty|nice|great|perfect|awesome|cool|ok|okay|sounds good|got it|makes sense|love it)$/.test(n);
30
56
  }
57
+ export function detectApprovalReply(text) {
58
+ const n = normalize(text);
59
+ if (wordCount(n) > 4)
60
+ return null;
61
+ if (/^(always)$/.test(n))
62
+ return 'always';
63
+ if (/^(no|nope|deny|denied|skip)$/.test(n))
64
+ return false;
65
+ if (/^(yes|y|yep|yeah|ok|okay|approve|approved|go|go ahead|do it|send it|perfect|sounds good|looks good|lgtm)$/.test(n)) {
66
+ return true;
67
+ }
68
+ return null;
69
+ }
70
+ export function looksLikeApprovalPrompt(text) {
71
+ const n = normalize(text);
72
+ return /\b(good to go|okay to send|ok to send|ready to send|should i send|want me to send|approve|confirm|fire it off)\b/.test(n)
73
+ || /\b(send|email|message|post|publish|delete|change|update|run|execute)\b[\s\S]{0,120}\?$/i.test(text.trim());
74
+ }
31
75
  function parseProactivity(text) {
32
76
  if (/\b(operator mode|operator)\b/i.test(text))
33
77
  return 'operator';
@@ -71,6 +115,15 @@ export function detectLocalTurn(text) {
71
115
  return { kind: 'stop' };
72
116
  if (isStatusRequest(text))
73
117
  return { kind: 'status' };
118
+ if (isLastActionRequest(text))
119
+ return { kind: 'last_action' };
120
+ if (isCompressContextRequest(text))
121
+ return { kind: 'compress_context' };
122
+ if (isDebugStatusRequest(text))
123
+ return { kind: 'debug_status' };
124
+ const toolset = parseToolsetRequest(text);
125
+ if (toolset)
126
+ return { kind: 'toolset', toolset };
74
127
  if (isStandaloneGreeting(text))
75
128
  return { kind: 'greeting' };
76
129
  if (isTinyAcknowledgment(text))
@@ -29,6 +29,7 @@ export declare function isDirectImperative(userMessage: string): {
29
29
  match: boolean;
30
30
  pattern?: string;
31
31
  };
32
+ export declare function hasNoDelegationInstruction(userMessage: string): boolean;
32
33
  /**
33
34
  * Decide whether the user is talking ABOUT an agent rather than to them.
34
35
  * The explicit-mention fast path otherwise routes a message like
@@ -91,6 +91,22 @@ export function isDirectImperative(userMessage) {
91
91
  }
92
92
  return { match: false };
93
93
  }
94
+ export function hasNoDelegationInstruction(userMessage) {
95
+ return /\b(don't|dont|do not|don't you|please don't|please dont)\s+(delegate|route|send|pass|hand ?off|handoff)\b/i.test(userMessage)
96
+ || /\b(no|without)\s+(delegating|routing|sending|passing|handing ?off|handoff)\b/i.test(userMessage)
97
+ || /\bkeep (this|that|it)?\s*(with )?(clementine|you|yourself)\b/i.test(userMessage);
98
+ }
99
+ function isExplicitDelegationToAgent(text, firstName, slug) {
100
+ const ident = `${firstName}|${slug}`;
101
+ const re = new RegExp(`\\b(send|route|delegate|pass|hand\\s*off|handoff)\\b[\\s\\w']{0,40}?\\b(to\\s+)?(${ident})\\b`, 'i');
102
+ return re.test(text);
103
+ }
104
+ function isVocativeAgentAddress(text, firstName, slug) {
105
+ const ident = `${firstName}|${slug}`;
106
+ const normalized = text.trim();
107
+ const openerRe = new RegExp(`^(hey\\s+|hi\\s+|yo\\s+)?(${ident})(\\b|\\s*[,—-])`, 'i');
108
+ return openerRe.test(normalized);
109
+ }
94
110
  /**
95
111
  * Decide whether the user is talking ABOUT an agent rather than to them.
96
112
  * The explicit-mention fast path otherwise routes a message like
@@ -111,8 +127,8 @@ export function isAskingAboutAgent(text, firstName, slug) {
111
127
  const possessiveRe = new RegExp(`\\b(${ident})('s|s')\\b`, 'i');
112
128
  if (possessiveRe.test(text))
113
129
  return true;
114
- const askingRe = new RegExp(`\\b(how|what|where|who|when|why|is|are|was|were|did|does|do|will|can|could|would|should|has|have|had|tell\\s+me|show\\s+me|let\\s+me\\s+know|any\\s+update|update\\s+on|status\\s+of|about)\\b[\\s\\w']{0,40}?\\b(${ident})\\b`, 'i');
115
- return askingRe.test(text);
130
+ const askingRe = new RegExp(`\\b(how|what|where|who|when|why|is|are|was|were|did|does|do|will|can|could|would|should|has|have|had|tell\\s+me|show\\s+me|let\\s+me\\s+know|any\\s+update|update\\s+on|status\\s+of|about|fix|check|review)\\b[\\s\\w']{0,80}?\\b(${ident})\\b`, 'i');
131
+ return askingRe.test(text) || new RegExp(`\\b(for|about|on)\\s+(${ident})\\b`, 'i').test(text);
116
132
  }
117
133
  /**
118
134
  * Session keys eligible for routing. Any key NOT in this set is
@@ -252,6 +268,13 @@ export async function classifyRoute(userMessage, agents, gateway) {
252
268
  const specialists = agents.filter(a => a.slug !== 'clementine');
253
269
  if (specialists.length === 0)
254
270
  return null;
271
+ if (hasNoDelegationInstruction(userMessage)) {
272
+ return {
273
+ targetAgent: 'clementine',
274
+ confidence: 0.95,
275
+ reasoning: 'User explicitly asked not to delegate or route this away from Clementine.',
276
+ };
277
+ }
255
278
  // Direct-imperative guardrail: user is instructing Clementine to act —
256
279
  // do not delegate, even if an agent is named.
257
280
  const imperative = isDirectImperative(userMessage);
@@ -272,7 +295,9 @@ export async function classifyRoute(userMessage, agents, gateway) {
272
295
  const wordRe = new RegExp(`\\b(${firstName}|${a.slug})\\b`, 'i');
273
296
  if (!wordRe.test(trimmed))
274
297
  continue;
275
- if (isAskingAboutAgent(trimmed, firstName, a.slug)) {
298
+ const explicitDelegation = isExplicitDelegationToAgent(trimmed, firstName, a.slug);
299
+ const vocativeAddress = isVocativeAgentAddress(trimmed, firstName, a.slug);
300
+ if (!explicitDelegation && !vocativeAddress && isAskingAboutAgent(trimmed, firstName, a.slug)) {
276
301
  // The user is asking ABOUT the agent ("how is <agent> doing", "<agent>'s
277
302
  // tasks", "did <agent> handle that?") rather than addressing them. Fall
278
303
  // through to the LLM classifier, which has a system-prompt rule for
@@ -280,6 +305,8 @@ export async function classifyRoute(userMessage, agents, gateway) {
280
305
  logger.debug({ slug: a.slug, trigger: 'meta-mention-bypass' }, 'Routing skipped — name appears as topic, not vocative');
281
306
  continue;
282
307
  }
308
+ if (!explicitDelegation && !vocativeAddress)
309
+ continue;
283
310
  logger.debug({ slug: a.slug, trigger: 'explicit-mention' }, 'Fast-path routing decision');
284
311
  return {
285
312
  targetAgent: a.slug,
@@ -0,0 +1,14 @@
1
+ export type ToolsetName = 'auto' | 'safe' | 'diagnostic' | 'communications' | 'memory' | 'full';
2
+ export interface ToolsetPreset {
3
+ name: ToolsetName;
4
+ label: string;
5
+ description: string;
6
+ directive: string;
7
+ }
8
+ export declare const TOOLSET_PRESETS: readonly ToolsetPreset[];
9
+ export declare function normalizeToolsetName(input: string | undefined | null): ToolsetName | null;
10
+ export declare function getToolsetPreset(name: ToolsetName): ToolsetPreset;
11
+ export declare function formatToolsetChoices(): string;
12
+ export declare function isRestrictedToolset(name: ToolsetName): boolean;
13
+ export declare function toolsetAllowsLocalWrites(name: ToolsetName): boolean;
14
+ //# sourceMappingURL=toolsets.d.ts.map
@@ -0,0 +1,68 @@
1
+ export const TOOLSET_PRESETS = [
2
+ {
3
+ name: 'auto',
4
+ label: 'Auto',
5
+ description: 'Route to the smallest inferred tool surface for each turn.',
6
+ directive: '',
7
+ },
8
+ {
9
+ name: 'safe',
10
+ label: 'Safe',
11
+ description: 'Memory and read-only local context; no external sends or local writes.',
12
+ directive: 'Toolset safe: use memory and read-only local context. Do not send messages, email, delete data, deploy, or modify files unless the user switches toolsets.',
13
+ },
14
+ {
15
+ name: 'diagnostic',
16
+ label: 'Diagnostic',
17
+ description: 'Bounded logs, local reads, memory, and diagnostics; no external sends.',
18
+ directive: 'Toolset diagnostic: diagnose with bounded reads and capped command output. Prefer targeted log slices, summaries, and transcript_search. Do not send external messages or make product changes.',
19
+ },
20
+ {
21
+ name: 'communications',
22
+ label: 'Communications',
23
+ description: 'Email/message workflows plus memory; avoid code and deployment tools.',
24
+ directive: 'Toolset communications: focus on email, calendar, messaging, approvals, and memory continuity. Do not edit code, deploy, or run unrelated local commands.',
25
+ },
26
+ {
27
+ name: 'memory',
28
+ label: 'Memory',
29
+ description: 'Memory, transcript, and relationship tools only unless explicitly changed.',
30
+ directive: 'Toolset memory: use memory_read, memory_search, memory_recall, transcript_search, working_memory, and user_model. Avoid external integrations and local shell/file writes.',
31
+ },
32
+ {
33
+ name: 'full',
34
+ label: 'Full',
35
+ description: 'Explicit operator mode for broad integrations and admin work.',
36
+ directive: 'Toolset full: the user explicitly enabled the broad operator surface for this chat. Still keep tool output bounded and ask before destructive or irreversible actions.',
37
+ },
38
+ ];
39
+ const TOOLSET_BY_NAME = new Map(TOOLSET_PRESETS.map((preset) => [preset.name, preset]));
40
+ export function normalizeToolsetName(input) {
41
+ const value = String(input ?? '').trim().toLowerCase().replace(/[\s_-]+/g, '-');
42
+ if (!value)
43
+ return null;
44
+ if (value === 'diagnostics' || value === 'debug')
45
+ return 'diagnostic';
46
+ if (value === 'comm' || value === 'comms' || value === 'communication')
47
+ return 'communications';
48
+ if (value === 'mem')
49
+ return 'memory';
50
+ if (value === 'all' || value === 'operator')
51
+ return 'full';
52
+ return TOOLSET_BY_NAME.has(value) ? value : null;
53
+ }
54
+ export function getToolsetPreset(name) {
55
+ return TOOLSET_BY_NAME.get(name) ?? TOOLSET_BY_NAME.get('auto');
56
+ }
57
+ export function formatToolsetChoices() {
58
+ return TOOLSET_PRESETS
59
+ .map((preset) => `- ${preset.name}: ${preset.description}`)
60
+ .join('\n');
61
+ }
62
+ export function isRestrictedToolset(name) {
63
+ return name === 'safe' || name === 'diagnostic' || name === 'memory';
64
+ }
65
+ export function toolsetAllowsLocalWrites(name) {
66
+ return name === 'auto' || name === 'full';
67
+ }
68
+ //# sourceMappingURL=toolsets.js.map
@@ -32,6 +32,13 @@ export interface IngestResult {
32
32
  recordsWritten: number;
33
33
  recordsSkipped: number;
34
34
  recordsFailed: number;
35
+ recordsUnchanged: number;
36
+ recallCheckStatus?: 'ok' | 'partial' | 'skipped';
37
+ recallCheck?: {
38
+ checked: number;
39
+ hits: number;
40
+ missing: string[];
41
+ };
35
42
  errors: Array<{
36
43
  externalId?: string;
37
44
  error: string;
@@ -10,7 +10,7 @@
10
10
  * → tag provenance → ingested_rows (structured overlay)
11
11
  * → graph extractor → ingestion_runs audit
12
12
  */
13
- import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
13
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
14
14
  import path from 'node:path';
15
15
  import { VAULT_DIR } from '../config.js';
16
16
  import { getStore } from '../tools/shared.js';
@@ -33,6 +33,8 @@ export async function runIngestion(opts) {
33
33
  let recordsWritten = 0;
34
34
  let recordsSkipped = 0;
35
35
  let recordsFailed = 0;
36
+ let recordsUnchanged = 0;
37
+ const touchedExternalIds = new Set();
36
38
  const report = (stage, message) => {
37
39
  opts.onProgress?.({
38
40
  runId: runId ?? -1,
@@ -52,6 +54,14 @@ export async function runIngestion(opts) {
52
54
  recordsWritten += 1;
53
55
  if (sum)
54
56
  writtenSummaries.push(sum);
57
+ if (sum?.externalId)
58
+ touchedExternalIds.add(sum.externalId);
59
+ },
60
+ onUnchanged: (sum) => {
61
+ recordsSkipped += 1;
62
+ recordsUnchanged += 1;
63
+ if (sum?.externalId)
64
+ touchedExternalIds.add(sum.externalId);
55
65
  },
56
66
  onSkip: () => { recordsSkipped += 1; },
57
67
  onFail: (err) => { recordsFailed += 1; errors.push(err); },
@@ -145,12 +155,28 @@ export async function runIngestion(opts) {
145
155
  });
146
156
  }
147
157
  }
158
+ let recallCheckStatus = 'skipped';
159
+ let recallCheck;
160
+ if (!opts.dryRun && touchedExternalIds.size > 0) {
161
+ const missing = [];
162
+ for (const externalId of touchedExternalIds) {
163
+ if (!store.findChunkByExternalId(source.slug, externalId))
164
+ missing.push(externalId);
165
+ }
166
+ recallCheck = {
167
+ checked: touchedExternalIds.size,
168
+ hits: touchedExternalIds.size - missing.length,
169
+ missing,
170
+ };
171
+ recallCheckStatus = missing.length === 0 ? 'ok' : 'partial';
172
+ }
148
173
  // Finalize
149
174
  const status = recordsFailed > 0 && recordsWritten === 0 ? 'error' :
150
175
  recordsFailed > 0 ? 'partial' : 'ok';
151
176
  if (runId !== null) {
152
177
  store.updateIngestionRun(runId, {
153
- recordsIn, recordsWritten, recordsSkipped, recordsFailed,
178
+ recordsIn, recordsWritten, recordsSkipped, recordsFailed, recordsUnchanged,
179
+ recallCheckStatus,
154
180
  errorsJson: errors.length ? JSON.stringify(errors.slice(0, 50)) : null,
155
181
  overviewNotePath,
156
182
  status,
@@ -165,6 +191,9 @@ export async function runIngestion(opts) {
165
191
  recordsWritten,
166
192
  recordsSkipped,
167
193
  recordsFailed,
194
+ recordsUnchanged,
195
+ recallCheckStatus,
196
+ recallCheck,
168
197
  errors,
169
198
  plannedRecords: opts.dryRun ? plannedRecords : undefined,
170
199
  overviewNotePath,
@@ -176,6 +205,7 @@ export async function runIngestion(opts) {
176
205
  if (runId !== null) {
177
206
  store.updateIngestionRun(runId, {
178
207
  recordsIn, recordsWritten, recordsSkipped, recordsFailed: recordsFailed + 1,
208
+ recordsUnchanged,
179
209
  errorsJson: JSON.stringify(errors),
180
210
  status: 'error',
181
211
  finished: true,
@@ -189,6 +219,8 @@ export async function runIngestion(opts) {
189
219
  recordsWritten,
190
220
  recordsSkipped,
191
221
  recordsFailed: recordsFailed + 1,
222
+ recordsUnchanged,
223
+ recallCheckStatus: 'skipped',
192
224
  errors,
193
225
  plannedRecords: opts.dryRun ? plannedRecords : undefined,
194
226
  };
@@ -207,8 +239,11 @@ async function processStructured(record, mapping, source, opts, store, _report,
207
239
  counters.onWrite(summaryBundle);
208
240
  return;
209
241
  }
210
- await writeRecord(ingested, source, store);
211
- counters.onWrite(summaryBundle);
242
+ const outcome = await writeRecord(ingested, source, store);
243
+ if (outcome === 'unchanged')
244
+ counters.onUnchanged(summaryBundle);
245
+ else
246
+ counters.onWrite(summaryBundle);
212
247
  }
213
248
  catch (err) {
214
249
  counters.onFail({ externalId: record.externalId, error: err instanceof Error ? err.message : String(err) });
@@ -242,8 +277,11 @@ async function processFreeForm(record, source, opts, store, report, planned, _er
242
277
  counters.onWrite(summaryBundle);
243
278
  return;
244
279
  }
245
- await writeRecord(ingested, source, store);
246
- counters.onWrite(summaryBundle);
280
+ const outcome = await writeRecord(ingested, source, store);
281
+ if (outcome === 'unchanged')
282
+ counters.onUnchanged(summaryBundle);
283
+ else
284
+ counters.onWrite(summaryBundle);
247
285
  }
248
286
  catch (err) {
249
287
  counters.onFail({ externalId: record.externalId, error: err instanceof Error ? err.message : String(err) });
@@ -266,37 +304,84 @@ async function writeRecord(record, source, store) {
266
304
  record.tags = [...record.tags, `project:${projSlug}`];
267
305
  }
268
306
  }
269
- // 1) Raw payload artifact store
270
- const artifactId = store.storeArtifact({
271
- toolName: `ingest:${source.slug}`,
272
- summary: record.title || record.externalId,
273
- content: record.rawPayload,
274
- tags: [source.slug, sourceType, ...record.tags].join(','),
275
- sessionKey: null,
276
- agentSlug: source.agentSlug ?? null,
277
- });
278
- record.artifactId = artifactId;
279
- // 2) Vault note: write markdown file with frontmatter + body
307
+ // 1) Vault note content. Build this before artifact writes so exact re-runs
308
+ // can be classified as unchanged without duplicating audit blobs.
280
309
  const abs = path.join(VAULT_DIR, record.targetRelPath);
281
310
  const dir = path.dirname(abs);
282
311
  if (!existsSync(dir))
283
312
  mkdirSync(dir, { recursive: true });
313
+ if (existsSync(abs) && record.frontmatter && 'ingested_at' in record.frontmatter) {
314
+ try {
315
+ const current = readFileSync(abs, 'utf-8');
316
+ const match = current.match(/^ingested_at:\s*(.+)$/m);
317
+ if (match?.[1]) {
318
+ const raw = match[1].trim();
319
+ try {
320
+ record.frontmatter.ingested_at = JSON.parse(raw);
321
+ }
322
+ catch {
323
+ record.frontmatter.ingested_at = raw.replace(/^['"]|['"]$/g, '');
324
+ }
325
+ }
326
+ }
327
+ catch {
328
+ // Fall through with a fresh timestamp if the old note can't be read.
329
+ }
330
+ }
284
331
  const fm = { title: record.title, ...record.frontmatter };
285
332
  const frontmatterBlock = Object.entries(fm)
286
333
  .map(([k, v]) => `${k}: ${serializeYaml(v)}`)
287
334
  .join('\n');
288
335
  const fileContent = `---\n${frontmatterBlock}\n---\n\n# ${record.title}\n\n${record.body}\n`;
336
+ if (existsSync(abs)) {
337
+ try {
338
+ const current = readFileSync(abs, 'utf-8');
339
+ if (current === fileContent && store.findChunkByExternalId(record.sourceSlug, record.externalId)) {
340
+ store.recordMemoryEvent?.({
341
+ sourceType: 'ingestion',
342
+ sourceId: null,
343
+ sessionKey: null,
344
+ agentSlug: source.agentSlug ?? null,
345
+ content: `${record.sourceSlug}:${record.externalId}\nunchanged\n${record.summary}`,
346
+ indexed: true,
347
+ });
348
+ return 'unchanged';
349
+ }
350
+ }
351
+ catch {
352
+ // If the existing file can't be read, fall through and rewrite.
353
+ }
354
+ }
355
+ // 2) Raw payload → artifact store
356
+ const artifactId = store.storeArtifact({
357
+ toolName: `ingest:${source.slug}`,
358
+ summary: record.title || record.externalId,
359
+ content: record.rawPayload,
360
+ tags: [source.slug, sourceType, ...record.tags].join(','),
361
+ sessionKey: null,
362
+ agentSlug: source.agentSlug ?? null,
363
+ });
364
+ record.artifactId = artifactId;
365
+ // 3) Vault note: write markdown file with frontmatter + body
289
366
  writeFileSync(abs, fileContent, 'utf-8');
290
- // 3) Re-index via existing vault pipeline (chunks, FTS, wikilinks)
367
+ // 4) Re-index via existing vault pipeline (chunks, FTS, wikilinks)
291
368
  store.updateFile(record.targetRelPath, source.agentSlug ?? undefined);
292
- // 4) Tag provenance on the chunks we just wrote
369
+ // 5) Tag provenance on the chunks we just wrote
293
370
  store.tagChunksForSource(record.targetRelPath, {
294
371
  sourceSlug: record.sourceSlug,
295
372
  externalId: record.externalId,
296
373
  sourceType,
297
374
  lastSyncedAt: nowIso,
298
375
  });
299
- // 5) Structured overlay for SQL aggregates
376
+ store.recordMemoryEvent?.({
377
+ sourceType: 'ingestion',
378
+ sourceId: artifactId,
379
+ sessionKey: null,
380
+ agentSlug: source.agentSlug ?? null,
381
+ content: `${record.sourceSlug}:${record.externalId}\n${record.title}\n${record.summary}`,
382
+ indexed: true,
383
+ });
384
+ // 6) Structured overlay for SQL aggregates
300
385
  if (record.structuredRow) {
301
386
  const chunkRef = store.findChunkByExternalId(record.sourceSlug, record.externalId);
302
387
  store.insertIngestedRow({
@@ -308,8 +393,9 @@ async function writeRecord(record, source, store) {
308
393
  structuredColumns: Object.fromEntries(Object.entries(record.structuredRow).map(([k, v]) => [k, coerceCol(v)])),
309
394
  });
310
395
  }
311
- // 6) Graph (best-effort, no-op if graph unavailable)
396
+ // 7) Graph (best-effort, no-op if graph unavailable)
312
397
  await writeGraphForRecord(record);
398
+ return 'written';
313
399
  }
314
400
  async function applyStructuredColumns(mapping) {
315
401
  const store = await getStore();