npm - omnikey-cli - Versions diffs - 1.5.1 → 1.5.2 - Mend

omnikey-cli 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/backend-dist/agent/agentPrompts.js +11 -7
package/backend-dist/agent/agentServer.js +44 -2
package/package.json +1 -1

package/backend-dist/agent/agentPrompts.js CHANGED Viewed

@@ -26,7 +26,12 @@ function sanitizeMcpField(value, maxLength = 200) {
 function getAgentPrompt(platform, hasTaskInstructions, installedMcps = []) {
     const isWindows = config_1.config.terminalPlatform?.toLowerCase() === 'windows' || platform?.toLowerCase() === 'windows';
     return `
-You are an AI assistant with full terminal access. You reason about user requests and execute shell scripts to gather live data.
+You are an AI agent running on the user's machine with the following capabilities:
+- **Shell execution** (\`<shell_script>\` XML tag) — runs commands on the user's machine; output returns as \`TERMINAL OUTPUT:\`.
+- **Web tools** — call \`web_search\` and \`web_fetch\` via native function calling to retrieve live information from the internet.${config_1.config.aiProvider !== 'anthropic' ? '\n- **Image generation** — call `generate_image` via native function calling to produce images.' : ''}${config_1.config.browserDebugPort !== undefined ? '\n- **Browser automation** — control the user\'s running browser via Playwright scripts inside `<shell_script>` blocks.' : ''}
+${installedMcps.length > 0 ? '- **MCP tools** — native function calls for integrations; see installed servers below.' : ''}
+Use these capabilities to take real action. Default to doing rather than asking.
 **Input:**
 ${hasTaskInstructions
@@ -81,7 +86,7 @@ ${config_1.config.aiProvider === 'anthropic'
 - Use the built-in \`generate_image\` tool **only** when the user explicitly asks you to create, render, draw, design, or produce an image, picture, artwork, mockup, logo, diagram, or other visual asset.
 - Do **not** call \`generate_image\` for tasks that are about code, configuration, terminal commands, file manipulation, data extraction, web lookups, debugging, or any non-visual request — even if the user mentions words like "show", "display", "visualize", or "preview" in a non-image sense.
 - If you are unsure whether an image is required, prefer **not** to call the tool and ask the user (or proceed with a textual answer) instead.
-- Prefer the user-provided output path when available. If none is provided, save to \`~/.omniAgent/garbage/\` (e.g. \`~/.omniAgent/garbage/<descriptive-name>.png\`).
+- Use the user-provided output path when given; otherwise follow the generated file output directory above.
 - After the tool call returns, provide a \`<final_answer>\` that includes the saved file path.
   `}
@@ -116,18 +121,17 @@ ${installedMcps
 - No prefix — direct user message; treat as the primary request.
 **Response format — every response must be exactly one of:**
-1. \`<shell_script>...</shell_script>\` — to run commands and gather more data.
-2. ${config_1.config.aiProvider === 'anthropic' ? 'A `web_search` or `web_fetch`' : 'A `web_search`, `web_fetch`, or `generate_image`'} tool call — to fetch web context or generate images (use native tool calling, not XML tags).
+1. \`<shell_script>...</shell_script>\` — write this XML tag directly in your text response; the client extracts and runs it on the user's machine. **Not a function call** — calling \`shell_script\` via the function-calling API will always fail.
+2. ${config_1.config.aiProvider === 'anthropic' ? 'A `web_search` or `web_fetch`' : 'A `web_search`, `web_fetch`, or `generate_image`'} **native function call** — use the function-calling API for these only; do NOT wrap them in XML tags.${installedMcps.length > 0 ? ' Same for MCP tools (`mcp_<server>__<tool>`).' : ''}
 3. \`<final_answer>...</final_answer>\` — your conclusion once you have enough information.
-**Critical rule — zero tolerance for text outside tags:**
+**Critical rule — zero tolerance for text outside tags or extra wrappers:**
+- Do NOT wrap \`<shell_script>\` inside any other XML tag (e.g. \`<shell_function_calls>\`, \`<function_calls>\`, \`<invoke>\`). The \`<shell_script>\` tag must be the very first character of your response — no prefix, no envelope.
 - Your **entire response** — from the very first character to the very last — must be the tag and its contents. Nothing before the opening tag. Nothing after the closing tag.
 - Do NOT write reasoning, planning, or commentary before acting. Emit the tag immediately. If you need to reason through a step, do it as a comment inside the \`<shell_script>\` block (\`# ...\`), never as free text outside.
 - After receiving \`TERMINAL OUTPUT:\` or \`COMMAND ERROR:\`, your very next characters must be \`<shell_script>\` or \`<final_answer>\`. No exceptions.
 - If you feel you need to plan or think before writing the first script — suppress it. Emit \`<shell_script>\` for the first small step immediately. You will have the output to guide the next step.
-Never wrap in additional XML/JSON.
 **Shell script structure:**
 ${!isWindows
         ? `\`\`\`bash

package/backend-dist/agent/agentServer.js CHANGED Viewed

@@ -126,6 +126,21 @@ async function runToolLoop(initialResult, session, sessionId, send, log, tools,
                 });
                 return { id: tc.id, name: tc.name, result: toolResult };
             }
+            // shell_script is not a callable tool — the model should embed commands
+            // in its text response using <shell_script>...</shell_script> XML tags.
+            // Intercept here so we don't fire a misleading "Fetching URL: undefined"
+            // web-call notification and return a clear correction instead.
+            if (tc.name === 'shell_script') {
+                log.warn('Agent attempted to call shell_script as a function; returning format-correction', {
+                    sessionId,
+                    toolIteration: toolIterations,
+                });
+                return {
+                    id: tc.id,
+                    name: tc.name,
+                    result: 'Error: "shell_script" is not a callable tool. To run shell commands, place them directly in your text response using <shell_script>...</shell_script> XML tags — do not use tool/function calling for this.',
+                };
+            }
             // Notify the frontend that a web tool call is about to execute.
             const webCallContent = tc.name === 'web_search'
                 ? `Searching the web for: "${String(args.query ?? '')}"`
@@ -195,6 +210,33 @@ async function runToolLoop(initialResult, session, sessionId, send, log, tools,
 const aiModel = (0, ai_client_1.getDefaultModel)(config_1.config.aiProvider, 'smart');
 const contextWindowSize = (0, ai_client_1.getContextWindowSize)(config_1.config.aiProvider);
 // ─── DB helpers ───────────────────────────────────────────────────────────────
+/**
+ * Sanitize LLM content before processing or forwarding to the client.
+ *
+ * Two known hallucination patterns are fixed here:
+ *
+ * 1. <shell_function_calls> wrapper — the model sometimes wraps <shell_script>
+ *    in a <shell_function_calls> envelope.  Stored verbatim it compounds on
+ *    every turn (double/triple nesting), so we strip every occurrence.
+ *
+ * 2. Mismatched closing tag — the model opens with <shell_script> but closes
+ *    with a different tag (e.g. </shell_function>, </shell>, </script>).  The
+ *    macOS client's extractor looks for </shell_script> exactly; a wrong tag
+ *    makes it treat the entire script as plain reasoning text and call
+ *    receiveNext(), while the backend waits for terminal output — a deadlock.
+ *    We normalise any </shell…> variant to </shell_script> when the correct
+ *    closing tag is absent.
+ */
+function sanitizeLLMContent(content) {
+    // 1. Strip <shell_function_calls> wrapper tags.
+    let result = content.replace(/<\/?shell_function_calls>/gi, '');
+    // 2. If <shell_script> is present but </shell_script> is missing,
+    //    replace any stray </shell…> closing tag with the correct one.
+    if (result.includes('<shell_script>') && !result.includes('</shell_script>')) {
+        result = result.replace(/<\/shell\w*>/gi, '</shell_script>');
+    }
+    return result.trim();
+}
 async function persistSessionToDB(sessionId, state) {
     try {
         const historyJson = JSON.stringify(state.history);
@@ -514,7 +556,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
             });
             await recordUsage(result);
         }
-        let content = result.content.trim();
+        let content = sanitizeLLMContent(result.content.trim());
         if (!content && result.finish_reason !== 'tool_calls') {
             log.warn('Agent LLM returned empty content; sending generic error to client.');
             const errorMessage = 'The agent returned an empty response. Please try again.';
@@ -531,7 +573,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
                 turn: session.turns,
             });
             const toolLoopResult = await runToolLoop(result, session, sessionId, send, log, tools, mcpBundle.dispatch, recordUsage);
-            const toolLoopContent = toolLoopResult.content.trim();
+            const toolLoopContent = sanitizeLLMContent(toolLoopResult.content.trim());
             const toolLoopHasShell = toolLoopContent.includes('<shell_script>');
             const toolLoopHasFinal = toolLoopContent.includes('<final_answer>');
             const webToolFailed = session.history.some((msg) => msg.role === 'tool' &&

package/package.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "access": "public",
     "registry": "https://registry.npmjs.org/"
   },
-  "version": "1.5.1",
+  "version": "1.5.2",
   "description": "CLI for onboarding users to Omnikey AI and configuring OPENAI_API_KEY. Use Yarn for install/build.",
   "engines": {
     "node": ">=14.0.0",