clementine-agent 1.18.20 → 1.18.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/agent/action-enforcer.d.ts +29 -0
- package/dist/agent/action-enforcer.js +120 -0
- package/dist/agent/assistant.d.ts +12 -0
- package/dist/agent/assistant.js +165 -31
- package/dist/agent/auto-update.js +46 -2
- package/dist/agent/local-turn.d.ts +16 -0
- package/dist/agent/local-turn.js +54 -1
- package/dist/agent/route-classifier.d.ts +1 -0
- package/dist/agent/route-classifier.js +30 -3
- package/dist/agent/toolsets.d.ts +14 -0
- package/dist/agent/toolsets.js +68 -0
- package/dist/brain/ingestion-pipeline.d.ts +7 -0
- package/dist/brain/ingestion-pipeline.js +107 -21
- package/dist/channels/discord.js +38 -7
- package/dist/channels/telegram.js +5 -6
- package/dist/cli/dashboard.js +56 -6
- package/dist/cli/index.js +174 -0
- package/dist/cli/ingest.js +8 -2
- package/dist/gateway/context-hygiene.d.ts +17 -0
- package/dist/gateway/context-hygiene.js +31 -0
- package/dist/gateway/heartbeat-scheduler.d.ts +20 -0
- package/dist/gateway/heartbeat-scheduler.js +27 -10
- package/dist/gateway/router.d.ts +7 -0
- package/dist/gateway/router.js +303 -9
- package/dist/gateway/turn-ledger.d.ts +32 -0
- package/dist/gateway/turn-ledger.js +55 -0
- package/dist/memory/embeddings.d.ts +2 -0
- package/dist/memory/embeddings.js +8 -1
- package/dist/memory/store.d.ts +88 -1
- package/dist/memory/store.js +349 -18
- package/dist/memory/write-queue.d.ts +16 -0
- package/dist/memory/write-queue.js +5 -0
- package/dist/tools/shared.d.ts +89 -0
- package/dist/types.d.ts +11 -0
- package/package.json +1 -1
- package/scripts/postinstall.js +56 -6
|
@@ -5,14 +5,41 @@
|
|
|
5
5
|
* Source modifications from self-improve are tracked in ~/.clementine/ (not git),
|
|
6
6
|
* so git pull is always clean. After pulling, source mods are reconciled.
|
|
7
7
|
*/
|
|
8
|
-
import { execSync } from 'node:child_process';
|
|
9
|
-
import { writeFileSync } from 'node:fs';
|
|
8
|
+
import { execFileSync, execSync } from 'node:child_process';
|
|
9
|
+
import { existsSync, readFileSync, writeFileSync } from 'node:fs';
|
|
10
10
|
import path from 'node:path';
|
|
11
11
|
import pino from 'pino';
|
|
12
12
|
import { BASE_DIR } from '../config.js';
|
|
13
13
|
import { reconcileSourceMods } from './source-mods.js';
|
|
14
14
|
const logger = pino({ name: 'clementine.auto-update' });
|
|
15
15
|
const SENTINEL_PATH = path.join(BASE_DIR, '.restart-sentinel.json');
|
|
16
|
+
function readDataEnv() {
|
|
17
|
+
const envPath = path.join(BASE_DIR, '.env');
|
|
18
|
+
if (!existsSync(envPath))
|
|
19
|
+
return {};
|
|
20
|
+
try {
|
|
21
|
+
return Object.fromEntries(readFileSync(envPath, 'utf-8')
|
|
22
|
+
.split(/\r?\n/)
|
|
23
|
+
.map((line) => line.trim())
|
|
24
|
+
.filter((line) => line && !line.startsWith('#') && line.includes('='))
|
|
25
|
+
.map((line) => {
|
|
26
|
+
const idx = line.indexOf('=');
|
|
27
|
+
return [line.slice(0, idx).trim(), line.slice(idx + 1).trim().replace(/^["']|["']$/g, '')];
|
|
28
|
+
}));
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return {};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
function flagEnabled(name, envFile) {
|
|
35
|
+
const raw = process.env[name] ?? envFile[name];
|
|
36
|
+
return /^(1|true|yes|on)$/i.test(String(raw ?? ''));
|
|
37
|
+
}
|
|
38
|
+
function shouldPrefetchEmbeddings() {
|
|
39
|
+
const envFile = readDataEnv();
|
|
40
|
+
return flagEnabled('CLEMENTINE_INSTALL_EMBEDDINGS', envFile)
|
|
41
|
+
|| flagEnabled('CLEMENTINE_PREFETCH_EMBEDDINGS', envFile);
|
|
42
|
+
}
|
|
16
43
|
/**
|
|
17
44
|
* Check if upstream has new commits. Safe to call from cron — no side effects.
|
|
18
45
|
*/
|
|
@@ -121,6 +148,23 @@ export async function applyUpdate(pkgDir) {
|
|
|
121
148
|
logger.error({ err }, 'Build failed after update');
|
|
122
149
|
return { success: false, error: `Build failed after update: ${String(err)}` };
|
|
123
150
|
}
|
|
151
|
+
// 4b. Optional embedding model prefetch. npm postinstall may run before
|
|
152
|
+
// the freshly pulled TypeScript has been built; this second pass uses the
|
|
153
|
+
// just-built CLI so repo updates and npm-style updates behave the same.
|
|
154
|
+
if (shouldPrefetchEmbeddings()) {
|
|
155
|
+
try {
|
|
156
|
+
execFileSync(process.execPath, [path.join(pkgDir, 'dist', 'cli', 'index.js'), 'memory', 'model', 'install'], {
|
|
157
|
+
cwd: pkgDir,
|
|
158
|
+
stdio: 'pipe',
|
|
159
|
+
env: { ...process.env, CLEMENTINE_HOME: BASE_DIR },
|
|
160
|
+
timeout: 10 * 60_000,
|
|
161
|
+
});
|
|
162
|
+
logger.info('Local embedding model prefetch succeeded after update');
|
|
163
|
+
}
|
|
164
|
+
catch (err) {
|
|
165
|
+
logger.warn({ err }, 'Local embedding model prefetch failed after update');
|
|
166
|
+
}
|
|
167
|
+
}
|
|
124
168
|
// 5. Reconcile source modifications
|
|
125
169
|
const reconcileResult = reconcileSourceMods(pkgDir);
|
|
126
170
|
logger.info({
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { ClementineJson } from '../config/clementine-json.js';
|
|
2
|
+
import { type ToolsetName } from './toolsets.js';
|
|
2
3
|
export type ProactivityMode = 'quiet' | 'balanced' | 'proactive' | 'operator';
|
|
3
4
|
export type ResponseStyle = 'concise' | 'balanced' | 'detailed';
|
|
4
5
|
export type ProgressVisibility = 'quiet' | 'normal' | 'detailed';
|
|
@@ -19,14 +20,29 @@ export type LocalTurnIntent = {
|
|
|
19
20
|
kind: 'stop';
|
|
20
21
|
} | {
|
|
21
22
|
kind: 'status';
|
|
23
|
+
} | {
|
|
24
|
+
kind: 'last_action';
|
|
25
|
+
} | {
|
|
26
|
+
kind: 'compress_context';
|
|
27
|
+
} | {
|
|
28
|
+
kind: 'debug_status';
|
|
29
|
+
} | {
|
|
30
|
+
kind: 'toolset';
|
|
31
|
+
toolset: ToolsetName;
|
|
22
32
|
} | {
|
|
23
33
|
kind: 'preference_update';
|
|
24
34
|
updates: AssistantExperienceUpdate;
|
|
25
35
|
summary: string;
|
|
26
36
|
};
|
|
37
|
+
export type ApprovalReply = true | false | 'always' | null;
|
|
27
38
|
export declare function isStopRequest(text: string): boolean;
|
|
28
39
|
export declare function isStatusRequest(text: string): boolean;
|
|
40
|
+
export declare function isLastActionRequest(text: string): boolean;
|
|
41
|
+
export declare function isCompressContextRequest(text: string): boolean;
|
|
42
|
+
export declare function isDebugStatusRequest(text: string): boolean;
|
|
29
43
|
export declare function isTinyAcknowledgment(text: string): boolean;
|
|
44
|
+
export declare function detectApprovalReply(text: string): ApprovalReply;
|
|
45
|
+
export declare function looksLikeApprovalPrompt(text: string): boolean;
|
|
30
46
|
export declare function detectLocalTurn(text: string): LocalTurnIntent;
|
|
31
47
|
export declare function applyAssistantExperienceUpdate(cfg: ClementineJson, updates: AssistantExperienceUpdate): ClementineJson;
|
|
32
48
|
//# sourceMappingURL=local-turn.d.ts.map
|
package/dist/agent/local-turn.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import { isStandaloneGreeting } from './turn-policy.js';
|
|
2
|
+
import { normalizeToolsetName } from './toolsets.js';
|
|
2
3
|
function normalize(text) {
|
|
3
4
|
return text
|
|
4
5
|
.trim()
|
|
5
6
|
.toLowerCase()
|
|
7
|
+
.replace(/[‘’`]/g, "'")
|
|
6
8
|
.replace(/[.!?]+$/g, '')
|
|
7
9
|
.replace(/\s+/g, ' ');
|
|
8
10
|
}
|
|
@@ -20,7 +22,31 @@ export function isStatusRequest(text) {
|
|
|
20
22
|
const n = normalize(text);
|
|
21
23
|
if (wordCount(n) > 8)
|
|
22
24
|
return false;
|
|
23
|
-
return /^(status|task status|deep status|progress|what'?s happening|what'?s going on|what are you doing|are you working|anything running|what'?s
|
|
25
|
+
return /^(status|task status|deep status|progress|what'?s happening|what'?s going on|what are you doing|what are you working on|what are you running|are you working|anything running|what'?s runnin?g?(?: now| right now)?|what is runnin?g?(?: now| right now)?|background status|check status|where are we)$/.test(n);
|
|
26
|
+
}
|
|
27
|
+
export function isLastActionRequest(text) {
|
|
28
|
+
const n = normalize(text);
|
|
29
|
+
if (wordCount(n) > 10)
|
|
30
|
+
return false;
|
|
31
|
+
return /^(last action|last turn|what happened last turn|what did you do|did you do it|did that actually run|did you actually do it|why didn'?t you do it|why did that not run|what happened)$/.test(n);
|
|
32
|
+
}
|
|
33
|
+
export function isCompressContextRequest(text) {
|
|
34
|
+
const n = normalize(text);
|
|
35
|
+
if (wordCount(n) > 8)
|
|
36
|
+
return false;
|
|
37
|
+
return /^(compress context|compact context|compress session|compact session|context compact|context compress|save and reset context|reset context but keep memory)$/.test(n);
|
|
38
|
+
}
|
|
39
|
+
export function isDebugStatusRequest(text) {
|
|
40
|
+
const n = normalize(text);
|
|
41
|
+
if (wordCount(n) > 6)
|
|
42
|
+
return false;
|
|
43
|
+
return /^(debug|debug status|session debug|agent debug|diagnostics|show diagnostics)$/.test(n);
|
|
44
|
+
}
|
|
45
|
+
function parseToolsetRequest(text) {
|
|
46
|
+
const n = normalize(text);
|
|
47
|
+
const match = n.match(/^(?:set |switch |use |enable )?(?:toolset|tool set|tools mode|tool mode)(?: to|:)? ([a-z _-]+)$/)
|
|
48
|
+
?? n.match(/^toolset ([a-z _-]+)$/);
|
|
49
|
+
return match ? normalizeToolsetName(match[1]) : null;
|
|
24
50
|
}
|
|
25
51
|
export function isTinyAcknowledgment(text) {
|
|
26
52
|
const n = normalize(text);
|
|
@@ -28,6 +54,24 @@ export function isTinyAcknowledgment(text) {
|
|
|
28
54
|
return false;
|
|
29
55
|
return /^(thanks|thank you|thx|ty|nice|great|perfect|awesome|cool|ok|okay|sounds good|got it|makes sense|love it)$/.test(n);
|
|
30
56
|
}
|
|
57
|
+
export function detectApprovalReply(text) {
|
|
58
|
+
const n = normalize(text);
|
|
59
|
+
if (wordCount(n) > 4)
|
|
60
|
+
return null;
|
|
61
|
+
if (/^(always)$/.test(n))
|
|
62
|
+
return 'always';
|
|
63
|
+
if (/^(no|nope|deny|denied|skip)$/.test(n))
|
|
64
|
+
return false;
|
|
65
|
+
if (/^(yes|y|yep|yeah|ok|okay|approve|approved|go|go ahead|do it|send it|perfect|sounds good|looks good|lgtm)$/.test(n)) {
|
|
66
|
+
return true;
|
|
67
|
+
}
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
export function looksLikeApprovalPrompt(text) {
|
|
71
|
+
const n = normalize(text);
|
|
72
|
+
return /\b(good to go|okay to send|ok to send|ready to send|should i send|want me to send|approve|confirm|fire it off)\b/.test(n)
|
|
73
|
+
|| /\b(send|email|message|post|publish|delete|change|update|run|execute)\b[\s\S]{0,120}\?$/i.test(text.trim());
|
|
74
|
+
}
|
|
31
75
|
function parseProactivity(text) {
|
|
32
76
|
if (/\b(operator mode|operator)\b/i.test(text))
|
|
33
77
|
return 'operator';
|
|
@@ -71,6 +115,15 @@ export function detectLocalTurn(text) {
|
|
|
71
115
|
return { kind: 'stop' };
|
|
72
116
|
if (isStatusRequest(text))
|
|
73
117
|
return { kind: 'status' };
|
|
118
|
+
if (isLastActionRequest(text))
|
|
119
|
+
return { kind: 'last_action' };
|
|
120
|
+
if (isCompressContextRequest(text))
|
|
121
|
+
return { kind: 'compress_context' };
|
|
122
|
+
if (isDebugStatusRequest(text))
|
|
123
|
+
return { kind: 'debug_status' };
|
|
124
|
+
const toolset = parseToolsetRequest(text);
|
|
125
|
+
if (toolset)
|
|
126
|
+
return { kind: 'toolset', toolset };
|
|
74
127
|
if (isStandaloneGreeting(text))
|
|
75
128
|
return { kind: 'greeting' };
|
|
76
129
|
if (isTinyAcknowledgment(text))
|
|
@@ -29,6 +29,7 @@ export declare function isDirectImperative(userMessage: string): {
|
|
|
29
29
|
match: boolean;
|
|
30
30
|
pattern?: string;
|
|
31
31
|
};
|
|
32
|
+
export declare function hasNoDelegationInstruction(userMessage: string): boolean;
|
|
32
33
|
/**
|
|
33
34
|
* Decide whether the user is talking ABOUT an agent rather than to them.
|
|
34
35
|
* The explicit-mention fast path otherwise routes a message like
|
|
@@ -91,6 +91,22 @@ export function isDirectImperative(userMessage) {
|
|
|
91
91
|
}
|
|
92
92
|
return { match: false };
|
|
93
93
|
}
|
|
94
|
+
export function hasNoDelegationInstruction(userMessage) {
|
|
95
|
+
return /\b(don't|dont|do not|don't you|please don't|please dont)\s+(delegate|route|send|pass|hand ?off|handoff)\b/i.test(userMessage)
|
|
96
|
+
|| /\b(no|without)\s+(delegating|routing|sending|passing|handing ?off|handoff)\b/i.test(userMessage)
|
|
97
|
+
|| /\bkeep (this|that|it)?\s*(with )?(clementine|you|yourself)\b/i.test(userMessage);
|
|
98
|
+
}
|
|
99
|
+
function isExplicitDelegationToAgent(text, firstName, slug) {
|
|
100
|
+
const ident = `${firstName}|${slug}`;
|
|
101
|
+
const re = new RegExp(`\\b(send|route|delegate|pass|hand\\s*off|handoff)\\b[\\s\\w']{0,40}?\\b(to\\s+)?(${ident})\\b`, 'i');
|
|
102
|
+
return re.test(text);
|
|
103
|
+
}
|
|
104
|
+
function isVocativeAgentAddress(text, firstName, slug) {
|
|
105
|
+
const ident = `${firstName}|${slug}`;
|
|
106
|
+
const normalized = text.trim();
|
|
107
|
+
const openerRe = new RegExp(`^(hey\\s+|hi\\s+|yo\\s+)?(${ident})(\\b|\\s*[,—-])`, 'i');
|
|
108
|
+
return openerRe.test(normalized);
|
|
109
|
+
}
|
|
94
110
|
/**
|
|
95
111
|
* Decide whether the user is talking ABOUT an agent rather than to them.
|
|
96
112
|
* The explicit-mention fast path otherwise routes a message like
|
|
@@ -111,8 +127,8 @@ export function isAskingAboutAgent(text, firstName, slug) {
|
|
|
111
127
|
const possessiveRe = new RegExp(`\\b(${ident})('s|s')\\b`, 'i');
|
|
112
128
|
if (possessiveRe.test(text))
|
|
113
129
|
return true;
|
|
114
|
-
const askingRe = new RegExp(`\\b(how|what|where|who|when|why|is|are|was|were|did|does|do|will|can|could|would|should|has|have|had|tell\\s+me|show\\s+me|let\\s+me\\s+know|any\\s+update|update\\s+on|status\\s+of|about)\\b[\\s\\w']{0,
|
|
115
|
-
return askingRe.test(text);
|
|
130
|
+
const askingRe = new RegExp(`\\b(how|what|where|who|when|why|is|are|was|were|did|does|do|will|can|could|would|should|has|have|had|tell\\s+me|show\\s+me|let\\s+me\\s+know|any\\s+update|update\\s+on|status\\s+of|about|fix|check|review)\\b[\\s\\w']{0,80}?\\b(${ident})\\b`, 'i');
|
|
131
|
+
return askingRe.test(text) || new RegExp(`\\b(for|about|on)\\s+(${ident})\\b`, 'i').test(text);
|
|
116
132
|
}
|
|
117
133
|
/**
|
|
118
134
|
* Session keys eligible for routing. Any key NOT in this set is
|
|
@@ -252,6 +268,13 @@ export async function classifyRoute(userMessage, agents, gateway) {
|
|
|
252
268
|
const specialists = agents.filter(a => a.slug !== 'clementine');
|
|
253
269
|
if (specialists.length === 0)
|
|
254
270
|
return null;
|
|
271
|
+
if (hasNoDelegationInstruction(userMessage)) {
|
|
272
|
+
return {
|
|
273
|
+
targetAgent: 'clementine',
|
|
274
|
+
confidence: 0.95,
|
|
275
|
+
reasoning: 'User explicitly asked not to delegate or route this away from Clementine.',
|
|
276
|
+
};
|
|
277
|
+
}
|
|
255
278
|
// Direct-imperative guardrail: user is instructing Clementine to act —
|
|
256
279
|
// do not delegate, even if an agent is named.
|
|
257
280
|
const imperative = isDirectImperative(userMessage);
|
|
@@ -272,7 +295,9 @@ export async function classifyRoute(userMessage, agents, gateway) {
|
|
|
272
295
|
const wordRe = new RegExp(`\\b(${firstName}|${a.slug})\\b`, 'i');
|
|
273
296
|
if (!wordRe.test(trimmed))
|
|
274
297
|
continue;
|
|
275
|
-
|
|
298
|
+
const explicitDelegation = isExplicitDelegationToAgent(trimmed, firstName, a.slug);
|
|
299
|
+
const vocativeAddress = isVocativeAgentAddress(trimmed, firstName, a.slug);
|
|
300
|
+
if (!explicitDelegation && !vocativeAddress && isAskingAboutAgent(trimmed, firstName, a.slug)) {
|
|
276
301
|
// The user is asking ABOUT the agent ("how is <agent> doing", "<agent>'s
|
|
277
302
|
// tasks", "did <agent> handle that?") rather than addressing them. Fall
|
|
278
303
|
// through to the LLM classifier, which has a system-prompt rule for
|
|
@@ -280,6 +305,8 @@ export async function classifyRoute(userMessage, agents, gateway) {
|
|
|
280
305
|
logger.debug({ slug: a.slug, trigger: 'meta-mention-bypass' }, 'Routing skipped — name appears as topic, not vocative');
|
|
281
306
|
continue;
|
|
282
307
|
}
|
|
308
|
+
if (!explicitDelegation && !vocativeAddress)
|
|
309
|
+
continue;
|
|
283
310
|
logger.debug({ slug: a.slug, trigger: 'explicit-mention' }, 'Fast-path routing decision');
|
|
284
311
|
return {
|
|
285
312
|
targetAgent: a.slug,
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export type ToolsetName = 'auto' | 'safe' | 'diagnostic' | 'communications' | 'memory' | 'full';
|
|
2
|
+
export interface ToolsetPreset {
|
|
3
|
+
name: ToolsetName;
|
|
4
|
+
label: string;
|
|
5
|
+
description: string;
|
|
6
|
+
directive: string;
|
|
7
|
+
}
|
|
8
|
+
export declare const TOOLSET_PRESETS: readonly ToolsetPreset[];
|
|
9
|
+
export declare function normalizeToolsetName(input: string | undefined | null): ToolsetName | null;
|
|
10
|
+
export declare function getToolsetPreset(name: ToolsetName): ToolsetPreset;
|
|
11
|
+
export declare function formatToolsetChoices(): string;
|
|
12
|
+
export declare function isRestrictedToolset(name: ToolsetName): boolean;
|
|
13
|
+
export declare function toolsetAllowsLocalWrites(name: ToolsetName): boolean;
|
|
14
|
+
//# sourceMappingURL=toolsets.d.ts.map
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
export const TOOLSET_PRESETS = [
|
|
2
|
+
{
|
|
3
|
+
name: 'auto',
|
|
4
|
+
label: 'Auto',
|
|
5
|
+
description: 'Route to the smallest inferred tool surface for each turn.',
|
|
6
|
+
directive: '',
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
name: 'safe',
|
|
10
|
+
label: 'Safe',
|
|
11
|
+
description: 'Memory and read-only local context; no external sends or local writes.',
|
|
12
|
+
directive: 'Toolset safe: use memory and read-only local context. Do not send messages, email, delete data, deploy, or modify files unless the user switches toolsets.',
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
name: 'diagnostic',
|
|
16
|
+
label: 'Diagnostic',
|
|
17
|
+
description: 'Bounded logs, local reads, memory, and diagnostics; no external sends.',
|
|
18
|
+
directive: 'Toolset diagnostic: diagnose with bounded reads and capped command output. Prefer targeted log slices, summaries, and transcript_search. Do not send external messages or make product changes.',
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
name: 'communications',
|
|
22
|
+
label: 'Communications',
|
|
23
|
+
description: 'Email/message workflows plus memory; avoid code and deployment tools.',
|
|
24
|
+
directive: 'Toolset communications: focus on email, calendar, messaging, approvals, and memory continuity. Do not edit code, deploy, or run unrelated local commands.',
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
name: 'memory',
|
|
28
|
+
label: 'Memory',
|
|
29
|
+
description: 'Memory, transcript, and relationship tools only unless explicitly changed.',
|
|
30
|
+
directive: 'Toolset memory: use memory_read, memory_search, memory_recall, transcript_search, working_memory, and user_model. Avoid external integrations and local shell/file writes.',
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
name: 'full',
|
|
34
|
+
label: 'Full',
|
|
35
|
+
description: 'Explicit operator mode for broad integrations and admin work.',
|
|
36
|
+
directive: 'Toolset full: the user explicitly enabled the broad operator surface for this chat. Still keep tool output bounded and ask before destructive or irreversible actions.',
|
|
37
|
+
},
|
|
38
|
+
];
|
|
39
|
+
const TOOLSET_BY_NAME = new Map(TOOLSET_PRESETS.map((preset) => [preset.name, preset]));
|
|
40
|
+
export function normalizeToolsetName(input) {
|
|
41
|
+
const value = String(input ?? '').trim().toLowerCase().replace(/[\s_-]+/g, '-');
|
|
42
|
+
if (!value)
|
|
43
|
+
return null;
|
|
44
|
+
if (value === 'diagnostics' || value === 'debug')
|
|
45
|
+
return 'diagnostic';
|
|
46
|
+
if (value === 'comm' || value === 'comms' || value === 'communication')
|
|
47
|
+
return 'communications';
|
|
48
|
+
if (value === 'mem')
|
|
49
|
+
return 'memory';
|
|
50
|
+
if (value === 'all' || value === 'operator')
|
|
51
|
+
return 'full';
|
|
52
|
+
return TOOLSET_BY_NAME.has(value) ? value : null;
|
|
53
|
+
}
|
|
54
|
+
export function getToolsetPreset(name) {
|
|
55
|
+
return TOOLSET_BY_NAME.get(name) ?? TOOLSET_BY_NAME.get('auto');
|
|
56
|
+
}
|
|
57
|
+
export function formatToolsetChoices() {
|
|
58
|
+
return TOOLSET_PRESETS
|
|
59
|
+
.map((preset) => `- ${preset.name}: ${preset.description}`)
|
|
60
|
+
.join('\n');
|
|
61
|
+
}
|
|
62
|
+
export function isRestrictedToolset(name) {
|
|
63
|
+
return name === 'safe' || name === 'diagnostic' || name === 'memory';
|
|
64
|
+
}
|
|
65
|
+
export function toolsetAllowsLocalWrites(name) {
|
|
66
|
+
return name === 'auto' || name === 'full';
|
|
67
|
+
}
|
|
68
|
+
//# sourceMappingURL=toolsets.js.map
|
|
@@ -32,6 +32,13 @@ export interface IngestResult {
|
|
|
32
32
|
recordsWritten: number;
|
|
33
33
|
recordsSkipped: number;
|
|
34
34
|
recordsFailed: number;
|
|
35
|
+
recordsUnchanged: number;
|
|
36
|
+
recallCheckStatus?: 'ok' | 'partial' | 'skipped';
|
|
37
|
+
recallCheck?: {
|
|
38
|
+
checked: number;
|
|
39
|
+
hits: number;
|
|
40
|
+
missing: string[];
|
|
41
|
+
};
|
|
35
42
|
errors: Array<{
|
|
36
43
|
externalId?: string;
|
|
37
44
|
error: string;
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* → tag provenance → ingested_rows (structured overlay)
|
|
11
11
|
* → graph extractor → ingestion_runs audit
|
|
12
12
|
*/
|
|
13
|
-
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
13
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
14
14
|
import path from 'node:path';
|
|
15
15
|
import { VAULT_DIR } from '../config.js';
|
|
16
16
|
import { getStore } from '../tools/shared.js';
|
|
@@ -33,6 +33,8 @@ export async function runIngestion(opts) {
|
|
|
33
33
|
let recordsWritten = 0;
|
|
34
34
|
let recordsSkipped = 0;
|
|
35
35
|
let recordsFailed = 0;
|
|
36
|
+
let recordsUnchanged = 0;
|
|
37
|
+
const touchedExternalIds = new Set();
|
|
36
38
|
const report = (stage, message) => {
|
|
37
39
|
opts.onProgress?.({
|
|
38
40
|
runId: runId ?? -1,
|
|
@@ -52,6 +54,14 @@ export async function runIngestion(opts) {
|
|
|
52
54
|
recordsWritten += 1;
|
|
53
55
|
if (sum)
|
|
54
56
|
writtenSummaries.push(sum);
|
|
57
|
+
if (sum?.externalId)
|
|
58
|
+
touchedExternalIds.add(sum.externalId);
|
|
59
|
+
},
|
|
60
|
+
onUnchanged: (sum) => {
|
|
61
|
+
recordsSkipped += 1;
|
|
62
|
+
recordsUnchanged += 1;
|
|
63
|
+
if (sum?.externalId)
|
|
64
|
+
touchedExternalIds.add(sum.externalId);
|
|
55
65
|
},
|
|
56
66
|
onSkip: () => { recordsSkipped += 1; },
|
|
57
67
|
onFail: (err) => { recordsFailed += 1; errors.push(err); },
|
|
@@ -145,12 +155,28 @@ export async function runIngestion(opts) {
|
|
|
145
155
|
});
|
|
146
156
|
}
|
|
147
157
|
}
|
|
158
|
+
let recallCheckStatus = 'skipped';
|
|
159
|
+
let recallCheck;
|
|
160
|
+
if (!opts.dryRun && touchedExternalIds.size > 0) {
|
|
161
|
+
const missing = [];
|
|
162
|
+
for (const externalId of touchedExternalIds) {
|
|
163
|
+
if (!store.findChunkByExternalId(source.slug, externalId))
|
|
164
|
+
missing.push(externalId);
|
|
165
|
+
}
|
|
166
|
+
recallCheck = {
|
|
167
|
+
checked: touchedExternalIds.size,
|
|
168
|
+
hits: touchedExternalIds.size - missing.length,
|
|
169
|
+
missing,
|
|
170
|
+
};
|
|
171
|
+
recallCheckStatus = missing.length === 0 ? 'ok' : 'partial';
|
|
172
|
+
}
|
|
148
173
|
// Finalize
|
|
149
174
|
const status = recordsFailed > 0 && recordsWritten === 0 ? 'error' :
|
|
150
175
|
recordsFailed > 0 ? 'partial' : 'ok';
|
|
151
176
|
if (runId !== null) {
|
|
152
177
|
store.updateIngestionRun(runId, {
|
|
153
|
-
recordsIn, recordsWritten, recordsSkipped, recordsFailed,
|
|
178
|
+
recordsIn, recordsWritten, recordsSkipped, recordsFailed, recordsUnchanged,
|
|
179
|
+
recallCheckStatus,
|
|
154
180
|
errorsJson: errors.length ? JSON.stringify(errors.slice(0, 50)) : null,
|
|
155
181
|
overviewNotePath,
|
|
156
182
|
status,
|
|
@@ -165,6 +191,9 @@ export async function runIngestion(opts) {
|
|
|
165
191
|
recordsWritten,
|
|
166
192
|
recordsSkipped,
|
|
167
193
|
recordsFailed,
|
|
194
|
+
recordsUnchanged,
|
|
195
|
+
recallCheckStatus,
|
|
196
|
+
recallCheck,
|
|
168
197
|
errors,
|
|
169
198
|
plannedRecords: opts.dryRun ? plannedRecords : undefined,
|
|
170
199
|
overviewNotePath,
|
|
@@ -176,6 +205,7 @@ export async function runIngestion(opts) {
|
|
|
176
205
|
if (runId !== null) {
|
|
177
206
|
store.updateIngestionRun(runId, {
|
|
178
207
|
recordsIn, recordsWritten, recordsSkipped, recordsFailed: recordsFailed + 1,
|
|
208
|
+
recordsUnchanged,
|
|
179
209
|
errorsJson: JSON.stringify(errors),
|
|
180
210
|
status: 'error',
|
|
181
211
|
finished: true,
|
|
@@ -189,6 +219,8 @@ export async function runIngestion(opts) {
|
|
|
189
219
|
recordsWritten,
|
|
190
220
|
recordsSkipped,
|
|
191
221
|
recordsFailed: recordsFailed + 1,
|
|
222
|
+
recordsUnchanged,
|
|
223
|
+
recallCheckStatus: 'skipped',
|
|
192
224
|
errors,
|
|
193
225
|
plannedRecords: opts.dryRun ? plannedRecords : undefined,
|
|
194
226
|
};
|
|
@@ -207,8 +239,11 @@ async function processStructured(record, mapping, source, opts, store, _report,
|
|
|
207
239
|
counters.onWrite(summaryBundle);
|
|
208
240
|
return;
|
|
209
241
|
}
|
|
210
|
-
await writeRecord(ingested, source, store);
|
|
211
|
-
|
|
242
|
+
const outcome = await writeRecord(ingested, source, store);
|
|
243
|
+
if (outcome === 'unchanged')
|
|
244
|
+
counters.onUnchanged(summaryBundle);
|
|
245
|
+
else
|
|
246
|
+
counters.onWrite(summaryBundle);
|
|
212
247
|
}
|
|
213
248
|
catch (err) {
|
|
214
249
|
counters.onFail({ externalId: record.externalId, error: err instanceof Error ? err.message : String(err) });
|
|
@@ -242,8 +277,11 @@ async function processFreeForm(record, source, opts, store, report, planned, _er
|
|
|
242
277
|
counters.onWrite(summaryBundle);
|
|
243
278
|
return;
|
|
244
279
|
}
|
|
245
|
-
await writeRecord(ingested, source, store);
|
|
246
|
-
|
|
280
|
+
const outcome = await writeRecord(ingested, source, store);
|
|
281
|
+
if (outcome === 'unchanged')
|
|
282
|
+
counters.onUnchanged(summaryBundle);
|
|
283
|
+
else
|
|
284
|
+
counters.onWrite(summaryBundle);
|
|
247
285
|
}
|
|
248
286
|
catch (err) {
|
|
249
287
|
counters.onFail({ externalId: record.externalId, error: err instanceof Error ? err.message : String(err) });
|
|
@@ -266,37 +304,84 @@ async function writeRecord(record, source, store) {
|
|
|
266
304
|
record.tags = [...record.tags, `project:${projSlug}`];
|
|
267
305
|
}
|
|
268
306
|
}
|
|
269
|
-
// 1)
|
|
270
|
-
|
|
271
|
-
toolName: `ingest:${source.slug}`,
|
|
272
|
-
summary: record.title || record.externalId,
|
|
273
|
-
content: record.rawPayload,
|
|
274
|
-
tags: [source.slug, sourceType, ...record.tags].join(','),
|
|
275
|
-
sessionKey: null,
|
|
276
|
-
agentSlug: source.agentSlug ?? null,
|
|
277
|
-
});
|
|
278
|
-
record.artifactId = artifactId;
|
|
279
|
-
// 2) Vault note: write markdown file with frontmatter + body
|
|
307
|
+
// 1) Vault note content. Build this before artifact writes so exact re-runs
|
|
308
|
+
// can be classified as unchanged without duplicating audit blobs.
|
|
280
309
|
const abs = path.join(VAULT_DIR, record.targetRelPath);
|
|
281
310
|
const dir = path.dirname(abs);
|
|
282
311
|
if (!existsSync(dir))
|
|
283
312
|
mkdirSync(dir, { recursive: true });
|
|
313
|
+
if (existsSync(abs) && record.frontmatter && 'ingested_at' in record.frontmatter) {
|
|
314
|
+
try {
|
|
315
|
+
const current = readFileSync(abs, 'utf-8');
|
|
316
|
+
const match = current.match(/^ingested_at:\s*(.+)$/m);
|
|
317
|
+
if (match?.[1]) {
|
|
318
|
+
const raw = match[1].trim();
|
|
319
|
+
try {
|
|
320
|
+
record.frontmatter.ingested_at = JSON.parse(raw);
|
|
321
|
+
}
|
|
322
|
+
catch {
|
|
323
|
+
record.frontmatter.ingested_at = raw.replace(/^['"]|['"]$/g, '');
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
catch {
|
|
328
|
+
// Fall through with a fresh timestamp if the old note can't be read.
|
|
329
|
+
}
|
|
330
|
+
}
|
|
284
331
|
const fm = { title: record.title, ...record.frontmatter };
|
|
285
332
|
const frontmatterBlock = Object.entries(fm)
|
|
286
333
|
.map(([k, v]) => `${k}: ${serializeYaml(v)}`)
|
|
287
334
|
.join('\n');
|
|
288
335
|
const fileContent = `---\n${frontmatterBlock}\n---\n\n# ${record.title}\n\n${record.body}\n`;
|
|
336
|
+
if (existsSync(abs)) {
|
|
337
|
+
try {
|
|
338
|
+
const current = readFileSync(abs, 'utf-8');
|
|
339
|
+
if (current === fileContent && store.findChunkByExternalId(record.sourceSlug, record.externalId)) {
|
|
340
|
+
store.recordMemoryEvent?.({
|
|
341
|
+
sourceType: 'ingestion',
|
|
342
|
+
sourceId: null,
|
|
343
|
+
sessionKey: null,
|
|
344
|
+
agentSlug: source.agentSlug ?? null,
|
|
345
|
+
content: `${record.sourceSlug}:${record.externalId}\nunchanged\n${record.summary}`,
|
|
346
|
+
indexed: true,
|
|
347
|
+
});
|
|
348
|
+
return 'unchanged';
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
catch {
|
|
352
|
+
// If the existing file can't be read, fall through and rewrite.
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// 2) Raw payload → artifact store
|
|
356
|
+
const artifactId = store.storeArtifact({
|
|
357
|
+
toolName: `ingest:${source.slug}`,
|
|
358
|
+
summary: record.title || record.externalId,
|
|
359
|
+
content: record.rawPayload,
|
|
360
|
+
tags: [source.slug, sourceType, ...record.tags].join(','),
|
|
361
|
+
sessionKey: null,
|
|
362
|
+
agentSlug: source.agentSlug ?? null,
|
|
363
|
+
});
|
|
364
|
+
record.artifactId = artifactId;
|
|
365
|
+
// 3) Vault note: write markdown file with frontmatter + body
|
|
289
366
|
writeFileSync(abs, fileContent, 'utf-8');
|
|
290
|
-
//
|
|
367
|
+
// 4) Re-index via existing vault pipeline (chunks, FTS, wikilinks)
|
|
291
368
|
store.updateFile(record.targetRelPath, source.agentSlug ?? undefined);
|
|
292
|
-
//
|
|
369
|
+
// 5) Tag provenance on the chunks we just wrote
|
|
293
370
|
store.tagChunksForSource(record.targetRelPath, {
|
|
294
371
|
sourceSlug: record.sourceSlug,
|
|
295
372
|
externalId: record.externalId,
|
|
296
373
|
sourceType,
|
|
297
374
|
lastSyncedAt: nowIso,
|
|
298
375
|
});
|
|
299
|
-
|
|
376
|
+
store.recordMemoryEvent?.({
|
|
377
|
+
sourceType: 'ingestion',
|
|
378
|
+
sourceId: artifactId,
|
|
379
|
+
sessionKey: null,
|
|
380
|
+
agentSlug: source.agentSlug ?? null,
|
|
381
|
+
content: `${record.sourceSlug}:${record.externalId}\n${record.title}\n${record.summary}`,
|
|
382
|
+
indexed: true,
|
|
383
|
+
});
|
|
384
|
+
// 6) Structured overlay for SQL aggregates
|
|
300
385
|
if (record.structuredRow) {
|
|
301
386
|
const chunkRef = store.findChunkByExternalId(record.sourceSlug, record.externalId);
|
|
302
387
|
store.insertIngestedRow({
|
|
@@ -308,8 +393,9 @@ async function writeRecord(record, source, store) {
|
|
|
308
393
|
structuredColumns: Object.fromEntries(Object.entries(record.structuredRow).map(([k, v]) => [k, coerceCol(v)])),
|
|
309
394
|
});
|
|
310
395
|
}
|
|
311
|
-
//
|
|
396
|
+
// 7) Graph (best-effort, no-op if graph unavailable)
|
|
312
397
|
await writeGraphForRecord(record);
|
|
398
|
+
return 'written';
|
|
313
399
|
}
|
|
314
400
|
async function applyStructuredColumns(mapping) {
|
|
315
401
|
const store = await getStore();
|