npm - clementine-agent - Versions diffs - 1.0.54 → 1.0.56 - Mend

clementine-agent 1.0.54 → 1.0.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/agent/assistant.d.ts +2 -0
package/dist/agent/assistant.js +71 -12
package/dist/agent/contradiction-validator.d.ts +61 -0
package/dist/agent/contradiction-validator.js +128 -0
package/package.json +1 -1

package/dist/agent/assistant.d.ts CHANGED Viewed

@@ -64,6 +64,8 @@ export declare class PersonalAssistant {
     private _lastTerminalReason?;
     /** Per-session stall nudge — set after a query shows stall signals, consumed on the next query. */
     private stallNudges;
+    /** Last contradiction finding per session, consumed by the session transcript writer to splice a correction note. */
+    private _lastContradictionFinding;
     private _compactedSessions;
     /** Last auto-matched project per session — exposed for CLI display. */
     private _lastMatchedProject;

package/dist/agent/assistant.js CHANGED Viewed

@@ -21,6 +21,7 @@ import { agentWorkingMemoryFile, listAllGoals } from '../tools/shared.js';
 import { AgentManager } from './agent-manager.js';
 import { extractLinks } from './link-extractor.js';
 import { StallGuard } from './stall-guard.js';
+import { collectToolCalls, detectContradiction, buildCorrectionPrompt } from './contradiction-validator.js';
 import { assembleContext } from '../memory/context-assembler.js';
 import { PromptCache } from './prompt-cache.js';
 import { searchSkills as searchSkillsSync } from './skill-extractor.js';
@@ -611,6 +612,8 @@ export class PersonalAssistant {
     _lastTerminalReason;
     /** Per-session stall nudge — set after a query shows stall signals, consumed on the next query. */
     stallNudges = new Map();
+    /** Last contradiction finding per session, consumed by the session transcript writer to splice a correction note. */
+    _lastContradictionFinding = new Map();
     _compactedSessions = new Set();
     /** Last auto-matched project per session — exposed for CLI display. */
     _lastMatchedProject = new Map();
@@ -1083,19 +1086,9 @@ If you're unsure what's happening first, run \`where_is_source\` — it reports
 ### Calling Claude Desktop connector tools (Drive, Gmail, etc.)
-The **only source of truth for tool availability is your function schema**. Do not inspect \`claude-integrations.json\`, the inventory file, or run \`ToolSearch\` to "check" first — those are telemetry caches, not reality.
+Just call the tool — e.g. \`mcp__claude_ai_Google_Drive__search_files\`, \`mcp__claude_ai_Gmail__authenticate\`. Report the literal result: real data, auth error, whatever. Your replies are validated against actual tool results; claims that contradict a tool's return value are rejected and you're asked to retry. Don't pre-check with \`integration_status\` — that's for env-var integrations, not schema-driven connectors.
-**The right sequence when the user asks you to do something with a connector:**
-1. **Just call the tool.** \`mcp__claude_ai_Google_Drive__search_files\`, \`mcp__claude_ai_Gmail__authenticate\`, etc. Attempt it. Report the literal result — real data, auth error, or whatever.
-2. **If refused** with "not in my function schema" / "tool not allowed," call \`allow_tool(exact_name)\` and retry. \`allow_tool\` auto-refreshes the inventory if the name is new — handles the case where the owner just added a connector at claude.ai.
-3. **If the owner says "I just added X at claude.ai"** or anything similar, call \`refresh_tool_inventory\` first to pick up the new connector. Report what came online.
-**Never** say the tool "isn't loaded in this session," "doesn't carry over from Claude Desktop," "the tools array is empty," or "MCP server still connecting." If any of those phrasings come to mind, call the tool directly and report what actually happens instead.
-\`list_allowed_tools\` / \`disallow_tool\` manage the whitelist. \`integration_status\` is for env-var (API key) integrations — **not** for claude_ai_* connectors, which are schema-driven. Don't use \`integration_status\` as a proxy for "can I call Drive / Gmail / etc." — those are always tried by direct tool call, not status lookup.
-**Critical rule: if the user asks you to use a claude_ai_* connector, you call the connector tool. Full stop.** Do not report "I tried and it failed" unless there was an actual tool call that returned an actual error — your audit log records every tool call, so narrating a failed attempt when the audit shows no call will be spotted.
+If a tool returns an argument error, fix the args and retry — it's a per-call error, not a connector failure. \`allow_tool(name)\` + \`refresh_tool_inventory\` exist for the case where the owner just added a connector at claude.ai.
 ## Context Window Management
@@ -2050,6 +2043,18 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
         if (key && this.memoryStore) {
             try {
                 this.memoryStore.saveTurn(key, 'user', text);
+                // Fix B: if a contradiction fired this turn, splice a system-role note
+                // into the transcript BEFORE the assistant reply. Future turns that
+                // read transcript history will see the correction and won't anchor on
+                // the bad phrasing. Generic across all connectors; triggered only when
+                // Fix A fired, so benign otherwise.
+                const finding = this._lastContradictionFinding.get(key);
+                if (finding) {
+                    this._lastContradictionFinding.delete(key);
+                    const note = `[system: Previous draft reply contained "${finding.matchedPhrase}" but ${finding.tool.name} ${finding.tool.resultClass === 'success' ? 'succeeded' : 'returned a per-call error (not a connector failure)'}. ` +
+                        `Corrected reply above is based on the actual tool result.]`;
+                    this.memoryStore.saveTurn(key, 'system', note);
+                }
                 this.memoryStore.saveTurn(key, 'assistant', responseText, model ?? MODEL);
             }
             catch (err) {
@@ -2135,6 +2140,10 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 logger.warn({ sessionKey, timeoutMs }, 'Chat query timed out');
             }, timeoutMs);
         }
+        // One-shot flag so a contradiction retry can't chain into infinite loops.
+        // Flipped true on the first intervention; subsequent replies go through
+        // un-validated (but still logged).
+        let contradictionRetried = false;
         try {
             for (let attempt = 0; attempt <= PersonalAssistant.RATE_LIMIT_MAX_RETRIES; attempt++) {
                 const sdkOptions = this.buildOptions({ model, maxTurns: maxTurnsOverride ?? null, retrievalContext, profile, sessionKey, streaming: !!onText, verboseLevel, abortController, stallGuard, intentClassification, effort: intentClassification?.suggestedEffort });
@@ -2194,6 +2203,10 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                 let staleSession = false;
                 let contextRecovery = false;
                 let lastAssistantBlocks = [];
+                // Raw SDK messages for post-turn contradiction validation. We capture
+                // assistant messages (tool_use blocks) and user messages (tool_result
+                // blocks) so we can pair them and compare against the outgoing reply.
+                const collectedSdkMessages = [];
                 const queryStartMs = Date.now();
                 // Event log: track query lifecycle
                 const eventLog = getEventLog();
@@ -2204,6 +2217,12 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                     const stream = query({ prompt, options: sdkOptions });
                     let gotStreamEvents = false;
                     for await (const message of stream) {
+                        // Capture assistant + user messages for post-turn contradiction
+                        // validation. Must happen before the switch below so we catch
+                        // every message type, including ones we don't otherwise handle.
+                        if (message.type === 'assistant' || message.type === 'user') {
+                            collectedSdkMessages.push({ type: message.type, message: message.message });
+                        }
                         if (message.type === 'assistant') {
                             const blocks = getContentBlocks(message);
                             lastAssistantBlocks = blocks; // Track for fallback text extraction
@@ -2520,6 +2539,46 @@ You have a cost budget per message — not a hard turn limit. Work until the tas
                         }
                     }
                 }
+                // ── Contradiction validator ─────────────────────────────────────
+                // If the model's reply claims a claude_ai_* connector is broken but
+                // the audit log (this turn's tool_use/tool_result pairs) shows the
+                // tool actually succeeded or returned a fixable arg error, reject the
+                // reply and force one more turn with the literal tool result in hand.
+                // Deterministic — does not rely on prompt obedience.
+                if (!contradictionRetried && attempt < PersonalAssistant.RATE_LIMIT_MAX_RETRIES && responseText.trim()) {
+                    try {
+                        const toolCallRecords = collectToolCalls(collectedSdkMessages);
+                        const finding = detectContradiction(responseText, toolCallRecords);
+                        if (finding) {
+                            contradictionRetried = true;
+                            logger.warn({
+                                sessionKey,
+                                tool: finding.tool.name,
+                                resultClass: finding.tool.resultClass,
+                                matchedPhrase: finding.matchedPhrase,
+                            }, 'Contradiction detected — rewriting reply');
+                            logAuditJsonl({
+                                event_type: 'confabulation_corrected',
+                                tool_name: finding.tool.name,
+                                result_class: finding.tool.resultClass,
+                                matched_phrase: finding.matchedPhrase,
+                                rejected_reply_preview: responseText.slice(0, 300),
+                                tool_result_preview: finding.tool.resultPreview,
+                            });
+                            // Hand the correction prompt to the SDK as the next user turn.
+                            // Resume the same session so the model keeps its context.
+                            prompt = buildCorrectionPrompt(finding);
+                            responseText = '';
+                            // Also record the contradiction for Fix B (session splice) later.
+                            this._lastContradictionFinding.set(sessionKey ?? '__no_session__', finding);
+                            continue;
+                        }
+                    }
+                    catch (err) {
+                        // Validator errors must never break the main reply path.
+                        logger.debug({ err }, 'Contradiction validator errored — passing reply through');
+                    }
+                }
                 // Event log: query completed successfully
                 if (sessionKey) {
                     eventLog.emitQueryEnd(sessionKey, {

package/dist/agent/contradiction-validator.d.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * Post-turn contradiction validator.
+ *
+ * After a chat turn's SDK stream completes, compares the assistant's outgoing
+ * reply against the actual tool_use/tool_result pairs from that turn. If a
+ * claude_ai_* connector succeeded (or returned an argument error — a fixable
+ * per-call failure) but the reply claims the connector is broken, missing from
+ * the schema, or otherwise generalizes a single failure into connector-level
+ * "deadness," we flag it.
+ *
+ * This is deterministic: it does NOT rely on the model obeying prompt rules.
+ * It's the load-bearing guardrail that replaces the forbidden-phrase list we
+ * used to patch into the system prompt.
+ */
+export type ToolResultClass = 'success' | 'arg_error' | 'auth_error' | 'other_error';
+export interface ToolCallRecord {
+    /** Tool name, e.g. mcp__claude_ai_Google_Drive__search_files */
+    name: string;
+    /** tool_use_id from the assistant's request */
+    id: string;
+    /** Classification of the paired tool_result */
+    resultClass: ToolResultClass;
+    /** First ~200 chars of the literal result content (or error text) */
+    resultPreview: string;
+}
+/** Regex matching reply phrasings that claim a connector-wide failure. */
+export declare const CONTRADICTION_RE: RegExp;
+export declare function classifyResult(content: string, isError: boolean): ToolResultClass;
+/**
+ * Walk collected SDK messages (assistant + user) and pair every tool_use with
+ * its tool_result. Returns one record per tool_use; unpaired ones (still
+ * running at end of stream) are skipped.
+ */
+export declare function collectToolCalls(messages: Array<{
+    type: string;
+    message?: any;
+}>): ToolCallRecord[];
+export interface ContradictionFinding {
+    /** The tool call whose result contradicts the reply */
+    tool: ToolCallRecord;
+    /** The exact phrase from the reply that triggered detection */
+    matchedPhrase: string;
+}
+/**
+ * Check a reply against a set of tool-call records. Returns the first
+ * contradiction found, or null if the reply is consistent with tool results.
+ *
+ * Contradiction = reply contains a CONTRADICTION_RE phrase AND at least one
+ * mcp__claude_ai_* tool in this turn classified `success` or `arg_error`.
+ * `auth_error` and `other_error` are legitimate failures that can support
+ * those reply phrasings.
+ */
+export declare function detectContradiction(reply: string, calls: ToolCallRecord[]): ContradictionFinding | null;
+/**
+ * Build the system-follow-up message we inject when a contradiction fires.
+ * The SDK will run one more turn with this as a user-role message (using
+ * `canUseTool` or similar hook), and the model's next reply replaces the
+ * bad one.
+ */
+export declare function buildCorrectionPrompt(finding: ContradictionFinding): string;
+//# sourceMappingURL=contradiction-validator.d.ts.map

package/dist/agent/contradiction-validator.js ADDED Viewed

@@ -0,0 +1,128 @@
+/**
+ * Post-turn contradiction validator.
+ *
+ * After a chat turn's SDK stream completes, compares the assistant's outgoing
+ * reply against the actual tool_use/tool_result pairs from that turn. If a
+ * claude_ai_* connector succeeded (or returned an argument error — a fixable
+ * per-call failure) but the reply claims the connector is broken, missing from
+ * the schema, or otherwise generalizes a single failure into connector-level
+ * "deadness," we flag it.
+ *
+ * This is deterministic: it does NOT rely on the model obeying prompt rules.
+ * It's the load-bearing guardrail that replaces the forbidden-phrase list we
+ * used to patch into the system prompt.
+ */
+const ARG_ERROR_RE = /\b(invalid|unknown field|required|missing parameter|schema|unrecognized|unexpected property)\b/i;
+const AUTH_ERROR_RE = /\b(unauthori[sz]ed|401|not authenticated|token expired|token has expired|invalid[_ ]?token|access denied)\b/i;
+/** Regex matching reply phrasings that claim a connector-wide failure. */
+export const CONTRADICTION_RE = /(dead\s*end|doesn'?t exist|not in (the |my )?schema|schema[- ]level|not available|isn'?t loaded|tools array is empty|MCP server still connecting|connector is (a )?dead|no such tool available|tool doesn't exist)/i;
+export function classifyResult(content, isError) {
+    if (!isError)
+        return 'success';
+    if (ARG_ERROR_RE.test(content))
+        return 'arg_error';
+    if (AUTH_ERROR_RE.test(content))
+        return 'auth_error';
+    return 'other_error';
+}
+/** Extract string content from a tool_result block (which can be string or array of content blocks). */
+function stringifyResultContent(content) {
+    if (typeof content === 'string')
+        return content;
+    if (Array.isArray(content)) {
+        return content
+            .map((b) => (typeof b === 'string' ? b : (b?.text ?? b?.content ?? JSON.stringify(b))))
+            .join('\n');
+    }
+    if (content == null)
+        return '';
+    try {
+        return JSON.stringify(content);
+    }
+    catch {
+        return String(content);
+    }
+}
+/**
+ * Walk collected SDK messages (assistant + user) and pair every tool_use with
+ * its tool_result. Returns one record per tool_use; unpaired ones (still
+ * running at end of stream) are skipped.
+ */
+export function collectToolCalls(messages) {
+    const toolUses = new Map();
+    const results = new Map();
+    for (const msg of messages) {
+        if (msg.type === 'assistant' && msg.message?.content) {
+            const blocks = Array.isArray(msg.message.content) ? msg.message.content : [];
+            for (const b of blocks) {
+                if (b?.type === 'tool_use' && b.id && b.name) {
+                    toolUses.set(b.id, { name: b.name, id: b.id });
+                }
+            }
+        }
+        else if (msg.type === 'user' && msg.message?.content) {
+            const blocks = Array.isArray(msg.message.content) ? msg.message.content : [];
+            for (const b of blocks) {
+                if (b?.type === 'tool_result' && b.tool_use_id) {
+                    results.set(b.tool_use_id, {
+                        content: stringifyResultContent(b.content),
+                        isError: !!b.is_error,
+                    });
+                }
+            }
+        }
+    }
+    const records = [];
+    for (const [id, tu] of toolUses) {
+        const r = results.get(id);
+        if (!r)
+            continue;
+        records.push({
+            name: tu.name,
+            id,
+            resultClass: classifyResult(r.content, r.isError),
+            resultPreview: r.content.slice(0, 200),
+        });
+    }
+    return records;
+}
+/**
+ * Check a reply against a set of tool-call records. Returns the first
+ * contradiction found, or null if the reply is consistent with tool results.
+ *
+ * Contradiction = reply contains a CONTRADICTION_RE phrase AND at least one
+ * mcp__claude_ai_* tool in this turn classified `success` or `arg_error`.
+ * `auth_error` and `other_error` are legitimate failures that can support
+ * those reply phrasings.
+ */
+export function detectContradiction(reply, calls) {
+    if (!reply)
+        return null;
+    const match = reply.match(CONTRADICTION_RE);
+    if (!match)
+        return null;
+    const connectorCalls = calls.filter(c => c.name.startsWith('mcp__claude_ai_'));
+    const recoverable = connectorCalls.find(c => c.resultClass === 'success' || c.resultClass === 'arg_error');
+    if (!recoverable)
+        return null;
+    return { tool: recoverable, matchedPhrase: match[0] };
+}
+/**
+ * Build the system-follow-up message we inject when a contradiction fires.
+ * The SDK will run one more turn with this as a user-role message (using
+ * `canUseTool` or similar hook), and the model's next reply replaces the
+ * bad one.
+ */
+export function buildCorrectionPrompt(finding) {
+    const { tool, matchedPhrase } = finding;
+    const classLabel = tool.resultClass === 'success' ? 'returned successful content' :
+        tool.resultClass === 'arg_error' ? 'returned an argument error (fixable by correcting the args — the connector itself works)' :
+            tool.resultClass;
+    return (`Your previous reply contained "${matchedPhrase}" but ${tool.name} ${classLabel}.\n\n` +
+        `Literal tool result (first 200 chars):\n${tool.resultPreview}\n\n` +
+        `Rewrite your reply using the actual tool result. ` +
+        (tool.resultClass === 'arg_error'
+            ? `This was an argument error for one call — the connector is NOT broken. Re-read the tool's schema (the rejected argument names are in the error above), retry the call with correct args, and report what comes back.`
+            : `Do not generalize this to "the connector is broken" or "the tool doesn't exist" — those claims contradict the tool's actual return value.`));
+}
+//# sourceMappingURL=contradiction-validator.js.map

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clementine-agent",
-  "version": "1.0.54",
+  "version": "1.0.56",
   "description": "Clementine — Personal AI Assistant (TypeScript)",
   "type": "module",
   "main": "dist/index.js",