npm - omnikey-cli - Versions diffs - 1.0.13 → 1.0.15 - Mend

omnikey-cli 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +7 -8
package/backend-dist/agent/agentPrompts.js +91 -0
package/backend-dist/{agentServer.js → agent/agentServer.js} +112 -59
package/backend-dist/agent/index.js +17 -0
package/backend-dist/ai-client.js +469 -0
package/backend-dist/config.js +32 -2
package/backend-dist/featureRoutes.js +18 -37
package/backend-dist/index.js +1 -1
package/backend-dist/prompts.js +86 -66
package/backend-dist/web-search-provider.js +178 -0
package/dist/daemon.js +15 -10
package/dist/index.js +7 -7
package/dist/killDaemon.js +1 -1
package/dist/onboard.js +97 -10
package/dist/removeConfig.js +37 -29
package/package.json +3 -1
package/src/daemon.ts +24 -12
package/src/index.ts +7 -9
package/src/killDaemon.ts +1 -1
package/src/onboard.ts +103 -10
package/src/removeConfig.ts +40 -29
package/backend-dist/agentPrompts.js +0 -124

package/README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 # Omnikey CLI
-A command-line tool for onboarding users to the Omnikey open-source app and configuring their OPENAI_API_KEY.
+A command-line tool for onboarding users to the Omnikey open-source app, configuring your LLM provider (OpenAI, Anthropic, or Gemini), and setting up the web search tool.
 ## About OmnikeyAI
-OmnikeyAI is a productivity tool that helps you quickly rewrite selected text using OpenAI. The CLI allows you to configure and run the backend daemon on your local machine and manage your OpenAI API key with ease.
+OmnikeyAI is a productivity tool that helps you quickly rewrite selected text using your preferred LLM provider. The CLI allows you to configure and run the backend daemon on your local machine, manage your API keys, choose your LLM provider (OpenAI, Anthropic, or Gemini), and optionally configure the web search tool.
 - For more details about the app and its features, see the [main README](https://github.com/GurinderRawala/OmniKey-AI).
 - Download the latest macOS app here: [Download OmniKeyAI for macOS](https://omnikeyai-saas-fmytqc3dra-uc.a.run.app/macos/download)
@@ -12,8 +12,10 @@ OmnikeyAI is a productivity tool that helps you quickly rewrite selected text us
 ## Features
-- `omnikey onboard`: Interactive onboarding to set up your OPENAI_API_KEY.
-- Accepts the `--open-ai-key` parameter for non-interactive setup.
+- `omnikey onboard`: Interactive onboarding to configure your LLM provider and API key.
+- Supports **OpenAI**, **Anthropic**, and **Google Gemini** as LLM providers.
+- Optional **web search tool** integration for enhanced responses.
+- Accepts CLI flags for non-interactive setup.
 - Configure and run the backend daemon — persisted across reboots on both macOS and Windows.
 ## Usage
@@ -22,12 +24,9 @@ OmnikeyAI is a productivity tool that helps you quickly rewrite selected text us
 # Install CLI globally (from this directory)
 npm install -g omnikey-cli
-# Onboard interactively (will prompt for OpenAI key)
+# Onboard interactively (will prompt for LLM key and web search tool)
 omnikey onboard
-# Or onboard non-interactively
-omnikey onboard --open-ai-key YOUR_KEY
 # Start the daemon (auto-restarts on reboot)
 omnikey daemon --port 7071

package/backend-dist/agent/agentPrompts.js ADDED Viewed

@@ -0,0 +1,91 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.getAgentPrompt = getAgentPrompt;
+const config_1 = require("../config");
+function getAgentPrompt(platform) {
+    const isWindows = config_1.config.terminalPlatform?.toLowerCase() === 'windows' || platform?.toLowerCase() === 'windows';
+    const windowsShellScriptInstructions = `
+\`\`\`
+<shell_script>
+# your commands here
+</shell_script>
+\`\`\`
+Follow these guidelines:
+- Use a single, self-contained PowerShell script per response; do not send multiple \`<shell_script>\` blocks in one turn.
+- Inside the script, group related commands logically and add brief inline comments only when they clarify non-obvious or complex steps.
+- Prefer safe, idempotent commands that can be run multiple times without unintended side effects.
+- Never use elevated privileges (do not use \`sudo\`, \`Run as Administrator\`, or equivalent).
+- Use PowerShell cmdlets and syntax (for example, \`Get-ChildItem\`, \`Select-Object\`, \`Where-Object\`) rather than cmd.exe or bash equivalents.`;
+    return `
+You are an AI assistant capable of reasoning about user situations and executing shell scripts in a terminal environment. You have full access to the terminal.
+Your responsibilities are:
+1. **Read and respect stored instructions**: When provided with \`<stored_instructions>\`, follow them carefully regarding behavior, focus areas, and output style.
+2. **Process user input**: Analyze what the user has typed or requested.
+3. **Gather context when needed**: Decide if additional machine-level information is required. If so, generate appropriate shell scripts to collect it.
+4. **Produce a complete answer**: Combine results from any previously executed scripts, the stored instructions, and the user input to deliver a helpful final response.
+**Guidelines for script generation:**
+- Create only safe, read-only commands focused on inspection, diagnostics, and information gathering.
+- Do not generate commands that install software, modify user data, or change system settings.
+- Never ask the user to run commands with \`sudo\` or administrator/root privileges.
+- Ensure all commands are compatible with ${!isWindows ? 'macOS and Linux; avoid Windows-specific commands.' : 'Use Windows-specific commands; avoid macOS and Linux-specific commands.'}
+- Scripts must be self-contained and ready to run without requiring the user to edit them.
+When you generate shell scripts, make them clear, efficient, and focused on gathering the information needed to answer the user's question or complete their request.
+**Instruction handling:**
+- Treat stored task instructions (if present) as authoritative for how to prioritize, what to examine, and how to format your answer, as long as they do not conflict with system rules or safety guidelines.
+- Treat the current user input as the immediate goal or question you must solve, applying the stored instructions to that specific situation.
+- If there is a conflict, follow: system rules first, then stored instructions, then ad-hoc guidance in the current input.
+**Web tools:**
+You have access to web tools you can call at any time during a turn:
+- \`web_fetch(url)\`: Fetches the text content of any publicly accessible URL. Use it to retrieve documentation, error references, API guides, release notes, or any other web resource that would help answer the user's question.
+- \`web_search(query)\`: Searches the web and returns a list of relevant results (title, URL, snippet). Use it when you need to discover the right URL before fetching, or when a quick summary of search results is sufficient.
+Use these tools proactively whenever the question involves current information, external documentation, or anything not already available in the conversation or machine output. You may call web tools multiple times in a single turn; call \`web_fetch\` on a promising URL from \`web_search\` results to get full details. Web tool results are injected back into the conversation automatically; continue reasoning and then emit your shell script or final answer as normal.
+**Interaction rules:**
+- When you need to execute ANY shell command, respond with a single \`<shell_script>\` block that contains the FULL script to run.
+- Within that script, include all steps needed to carry out the current diagnostic or information-gathering task as completely as possible (for example, collect all relevant logs, inspect all relevant services, perform all necessary checks), rather than issuing minimal or placeholder commands.
+- Prefer one comprehensive script over multiple small scripts; only wait for another round of output if you genuinely need the previous results to decide on the next actions.
+- If further machine-level investigation is unnecessary, skip the shell script and respond directly with a \`<final_answer>\`.
+- Every response MUST be exactly one of:
+  - A single \`<shell_script>...</shell_script>\` block, and nothing else; or
+  - A single \`<final_answer>...</final_answer>\` block, and nothing else.
+- Never send plain text or explanation outside of these tags. If you are not emitting a \`<shell_script>\`, you MUST emit a \`<final_answer>\`.
+- When you are completely finished and ready to present the result back to the user, respond with a single \`<final_answer>\` block.
+- Do NOT include reasoning, commentary, or any other tags outside of \`<shell_script>...</shell_script>\` or \`<final_answer>...</final_answer>\`.
+- Never wrap your entire response in other XML or JSON structures.
+**Shell script block structure:**
+Always emit exactly this structure when you want to run commands: ${!isWindows
+        ? `
+\`\`\`bash
+<shell_script>
+#!/usr/bin/env bash
+set -euo pipefail
+# your commands here
+</shell_script>
+\`\`\`
+- Use a single, self-contained script per turn; do not send multiple \`<shell_script>\` blocks in one response.
+- Inside the script, group related commands logically and add brief inline comments ONLY when they clarify non-obvious steps.
+- Prefer safe, idempotent commands. Never ask for sudo.`
+        : windowsShellScriptInstructions}
+**Final answer block structure:**
+When you have gathered enough information and completed the requested work, respond once with:
+\`\`\`
+<final_answer>
+...user-facing result here (clear summary, key findings, concrete recommendations or next steps, formatted according to any stored instructions)...
+</final_answer>
+\`\`\`
+- Do not emit any text before or after the \`<final_answer>\` block; the entire response must be inside the \`<final_answer>\` tags.
+  `;
+}

package/backend-dist/{agentServer.js → agent/agentServer.js} RENAMED Viewed

@@ -39,18 +39,21 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.attachAgentWebSocketServer = attachAgentWebSocketServer;
 const ws_1 = __importStar(require("ws"));
 const jsonwebtoken_1 = __importDefault(require("jsonwebtoken"));
-const openai_1 = __importDefault(require("openai"));
 const cuid_1 = __importDefault(require("cuid"));
-const config_1 = require("./config");
-const logger_1 = require("./logger");
-const subscription_1 = require("./models/subscription");
-const subscriptionUsage_1 = require("./models/subscriptionUsage");
+const config_1 = require("../config");
+const logger_1 = require("../logger");
+const subscription_1 = require("../models/subscription");
+const subscriptionUsage_1 = require("../models/subscriptionUsage");
 const agentPrompts_1 = require("./agentPrompts");
-const featureRoutes_1 = require("./featureRoutes");
-const authMiddleware_1 = require("./authMiddleware");
-const openai = new openai_1.default({
-    apiKey: config_1.config.openaiApiKey,
-});
+const featureRoutes_1 = require("../featureRoutes");
+const authMiddleware_1 = require("../authMiddleware");
+const web_search_provider_1 = require("../web-search-provider");
+const ai_client_1 = require("../ai-client");
+function buildAvailableTools() {
+    // web_search is always available — DuckDuckGo is used as free fallback
+    return [web_search_provider_1.WEB_FETCH_TOOL, web_search_provider_1.WEB_SEARCH_TOOL];
+}
+const aiModel = (0, ai_client_1.getDefaultModel)(config_1.config.aiProvider, 'smart');
 const sessionMessages = new Map();
 const MAX_TURNS = 10;
 async function getOrCreateSession(sessionId, subscription, platform, log) {
@@ -63,7 +66,7 @@ async function getOrCreateSession(sessionId, subscription, platform, log) {
         });
         return existing;
     }
-    const systemPrompt = platform === 'windows' ? agentPrompts_1.AGENT_SYSTEM_PROMPT_WINDOWS : agentPrompts_1.AGENT_SYSTEM_PROMPT_MACOS;
+    const systemPrompt = (0, agentPrompts_1.getAgentPrompt)(platform);
     // use these instructions as user instructions
     const prompt = await (0, featureRoutes_1.getPromptForCommand)(log, 'task', subscription).catch((err) => {
         log.error('Failed to get system prompt for new agent session', { error: err });
@@ -79,11 +82,14 @@ async function getOrCreateSession(sessionId, subscription, platform, log) {
             ...(prompt
                 ? [
                     {
-                        role: 'assistant',
-                        content: `<user_configured_instructions>
-# User-Configured Task Instructions
+                        role: 'user',
+                        content: `<stored_instructions>
+# Stored Instructions
+"""
 ${prompt}
-</user_configured_instructions>`,
+"""
+</stored_instructions>`,
                     },
                 ]
                 : []),
@@ -202,61 +208,108 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
         role: 'user',
         content: userContent,
     });
-    if (!config_1.config.openaiApiKey) {
-        log.warn('OPENAI_API_KEY is not set; returning error to client.');
-        const errorMessage = 'The server is missing its OpenAI API key. Please configure OPENAI_API_KEY on the backend and try again.';
-        send({
-            session_id: sessionId,
-            sender: 'agent',
-            content: `<final_answer>\n${errorMessage}\n</final_answer>`,
-            is_terminal_output: false,
-            is_error: true,
-        });
-        // Clear any cached session state so a subsequent attempt can
-        // start fresh once the environment is correctly configured.
-        sessionMessages.delete(sessionId);
-        return;
-    }
+    // On the final turn we omit tools so the model is forced to emit a
+    // plain text <final_answer> rather than issuing another tool call.
+    const isFinalTurn = session.turns >= MAX_TURNS;
+    const tools = isFinalTurn ? undefined : buildAvailableTools();
+    const recordUsage = async (result) => {
+        const usage = result.usage;
+        if (!usage || !subscription.id)
+            return;
+        try {
+            await subscriptionUsage_1.SubscriptionUsage.create({
+                subscriptionId: subscription.id,
+                model: result.model,
+                promptTokens: usage.prompt_tokens,
+                completionTokens: usage.completion_tokens,
+                totalTokens: usage.total_tokens,
+            });
+            await subscription_1.Subscription.increment('totalTokensUsed', {
+                by: usage.total_tokens,
+                where: { id: subscription.id },
+            });
+        }
+        catch (err) {
+            log.error('Failed to record subscription usage metrics for agent.', {
+                error: err,
+                subscriptionId: subscription.id,
+            });
+        }
+    };
     try {
-        log.debug('Calling OpenAI for agent turn', {
+        log.debug('Calling AI provider for agent turn', {
             sessionId,
+            provider: config_1.config.aiProvider,
+            model: aiModel,
             turn: session.turns,
             historyLength: session.history.length,
         });
-        const completion = await openai.chat.completions.create({
-            model: 'gpt-5.1',
-            // The OpenAI client accepts a superset of this simple
-            // message shape; we safely cast here to keep our local
-            // types minimal.
-            messages: session.history,
+        let result = await ai_client_1.aiClient.complete(aiModel, session.history, {
+            tools: tools?.length ? tools : undefined,
             temperature: 0.2,
         });
-        // Record token usage for this subscription and model, if usage
-        // data is available and we know which subscription made the call.
-        const usage = completion.usage;
-        if (usage && subscription.id) {
-            try {
-                await subscriptionUsage_1.SubscriptionUsage.create({
-                    subscriptionId: subscription.id,
-                    model: completion.model ?? 'gpt-5.1',
-                    promptTokens: usage.prompt_tokens ?? 0,
-                    completionTokens: usage.completion_tokens ?? 0,
-                    totalTokens: usage.total_tokens ?? 0,
+        await recordUsage(result);
+        // Tool-call loop: execute any requested tools and feed results back
+        // until the model emits a non-tool-call response (or we hit the limit).
+        const MAX_TOOL_ITERATIONS = 10;
+        let toolIterations = 0;
+        while (result.finish_reason === 'tool_calls' && toolIterations < MAX_TOOL_ITERATIONS) {
+            toolIterations++;
+            session.history.push(result.assistantMessage);
+            const toolCalls = result.tool_calls ?? [];
+            log.info('Agent executing tool calls', {
+                sessionId,
+                turn: session.turns,
+                toolIteration: toolIterations,
+                tools: toolCalls.map((tc) => tc.name),
+            });
+            const toolResults = await Promise.all(toolCalls.map(async (tc) => {
+                const args = tc.arguments;
+                const toolResult = await (0, web_search_provider_1.executeTool)(tc.name, args, log);
+                log.info('Tool call completed', {
+                    sessionId,
+                    tool: tc.name,
+                    resultLength: toolResult.length,
                 });
-                await subscription_1.Subscription.increment('totalTokensUsed', {
-                    by: usage.total_tokens ?? 0,
-                    where: { id: subscription.id },
-                });
-            }
-            catch (err) {
-                log.error('Failed to record subscription usage metrics for agent.', {
-                    error: err,
-                    subscriptionId: subscription.id,
+                return { id: tc.id, name: tc.name, result: toolResult };
+            }));
+            for (const { id, name, result: toolResult } of toolResults) {
+                session.history.push({
+                    role: 'tool',
+                    tool_call_id: id,
+                    tool_name: name,
+                    content: toolResult,
                 });
             }
+            result = await ai_client_1.aiClient.complete(aiModel, session.history, {
+                tools: tools?.length ? tools : undefined,
+                temperature: 0.2,
+            });
+            await recordUsage(result);
+        }
+        // If the tool loop was exhausted while the model still wants more tool calls,
+        // the last result has empty content. Force one final no-tools call so the model
+        // must synthesize a text answer from everything gathered so far.
+        if (result.finish_reason === 'tool_calls') {
+            log.warn('Tool iteration limit reached with pending tool calls; forcing final text response', {
+                sessionId,
+                turn: session.turns,
+            });
+            // Do NOT push result.assistantMessage here — it contains tool_use blocks that
+            // require corresponding tool_result blocks (Anthropic API constraint). Since we
+            // are not executing those tool calls, just inject a plain user nudge so the model
+            // synthesizes a text answer from the history already accumulated.
+            session.history.push({
+                role: 'user',
+                content: 'You have reached the maximum number of tool calls. Based on all information gathered so far, please provide your best answer now.',
+            });
+            result = await ai_client_1.aiClient.complete(aiModel, session.history, {
+                tools: undefined,
+                temperature: 0.2,
+            });
+            await recordUsage(result);
         }
-        const choice = completion.choices[0];
-        const content = (choice.message.content ?? '').toString().trim();
+        const content = result.content.trim();
         if (!content) {
             log.warn('Agent LLM returned empty content; sending generic error to client.');
             const errorMessage = 'The agent returned an empty response. Please try again.';

package/backend-dist/agent/index.js ADDED Viewed

@@ -0,0 +1,17 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __exportStar = (this && this.__exportStar) || function(m, exports) {
+    for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+__exportStar(require("./agentServer"), exports);