npm - omnikey-cli - Versions diffs - 1.0.36 → 1.0.37 - Mend

omnikey-cli 1.0.36 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/backend-dist/agent/agentPrompts.js +9 -20
package/backend-dist/agent/agentServer.js +32 -24
package/backend-dist/index.js +1 -41
package/backend-dist/scheduledJobExecutor.js +2 -2
package/package.json +1 -1

package/backend-dist/agent/agentPrompts.js CHANGED Viewed

@@ -17,28 +17,17 @@ ${hasTaskInstructions
 - Default to a \`<shell_script>\` for anything involving the machine, network, files, processes, env vars, or system state — never answer from training data alone.
 - **Read vs write:** For open-ended/ambiguous requests run safe read-only commands first to understand the current state. When the user **explicitly** asks to create, update, delete, configure, or run something — do it directly; no need to ask for confirmation unless the scope is genuinely unclear.
 - **Package installation:** Install any package required to complete the task. Include the install step as its own phase so you can confirm it succeeded before building on it. Prefer project-local or user scope; avoid \`sudo\`/admin unless the user explicitly asks.
-${config_1.config.browserDebugPort !== undefined ? `- **Browser automation:** When the user explicitly asks to interact with a browser (click a button, fill a form, check a page, take a screenshot, etc.), generate \`<shell_script>\` blocks that use Node.js and \`playwright-core\` — one phase at a time (phasing rules below apply).
+${config_1.config.browserDebugPort !== undefined
+        ? `- **Browser automation:** Use browser automation proactively when needed to complete the task.
+  - Do NOT wait for explicit user wording like "use browser" if interaction is obviously required to get the final result.
+  - If \`web_search\` / \`web_fetch\` do not provide enough usable context (blocked pages, incomplete data, client-rendered content, auth walls, dynamic tables, hidden details, repeated low-value fetch results), immediately switch to Playwright-based browser interaction.
+  - Generate \`<shell_script>\` blocks that use Node.js and \`playwright-core\` — one phase at a time (phasing rules below apply).
   - **Phase 1 — ensure deps:** Check and install \`playwright-core\` if missing:
     \`node -e "require('/tmp/playwright-runner/node_modules/playwright-core')" 2>/dev/null || npm install --prefix /tmp/playwright-runner playwright-core --silent\`
-  - **Phase 2 — connect & navigate:** Try CDP first; fall back to the existing debug profile. Reuse an open tab if the URL already matches — never open a duplicate.
-    \`\`\`js
-    const { chromium } = require('/tmp/playwright-runner/node_modules/playwright-core');
-    let browser, page;
-    try {
-      browser = await chromium.connectOverCDP('http://localhost:${config_1.config.browserDebugPort}');
-      const pages = browser.contexts().flatMap(c => c.pages());
-      page = pages.find(p => p.url().startsWith(TARGET_URL)) ?? null;
-      if (page) { await page.bringToFront(); }
-      else { page = await browser.contexts()[0].newPage(); await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded', timeout: 15000 }); }
-    } catch {
-      const ctx = await chromium.launchPersistentContext('${config_1.config.browserDebugUserDataDir}', { executablePath: '${config_1.config.browserDebugExecutable}', headless: false });
-      browser = ctx;
-      page = ctx.pages().find(p => p.url().startsWith(TARGET_URL)) ?? await ctx.newPage();
-      if (!page.url().startsWith(TARGET_URL)) await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
-    }
-    \`\`\`
-  - **Phase 3+ — one action per script:** Each subsequent script reconnects the same way, finds the already-open tab, performs exactly one action (click / type / select / screenshot / read text), prints the result, then calls \`browser.disconnect()\` (CDP) or just exits (profile launch — leaves the window open).
-  - Always inline Node.js via a bash heredoc so the script is self-contained. Print structured output to stdout so it returns as \`TERMINAL OUTPUT:\`.` : ''}
+  - **Phase 2 — connect & navigate:** Connect to the running browser via CDP at \`http://localhost:${config_1.config.browserDebugPort}\`. If CDP fails, fall back to launching a persistent context using the debug profile at \`${config_1.config.browserDebugUserDataDir}\` with the executable at \`${config_1.config.browserDebugExecutable}\` (headless: false). Once connected, navigate to any URL required by the task — open any page needed, reusing an existing tab if the URL already matches or creating a new one if not. There is no restriction on which sites or pages you can visit; open whatever is necessary to complete the task.
+  - **Phase 3+ — one action per script:** Each subsequent script reconnects via the same CDP endpoint (\`http://localhost:${config_1.config.browserDebugPort}\`) or profile fallback, finds the already-open tab (or reopens it), performs exactly one action (click, type, select, scroll, screenshot, read text, extract data, fill forms, etc.), prints the result to stdout, then calls \`browser.disconnect()\` (CDP) or exits (profile launch). You may perform any interaction the task requires — reading content, extracting structured data, submitting forms, navigating between pages, or capturing screenshots.
+  - Always inline Node.js via a bash heredoc so the script is self-contained. Print structured output to stdout so it returns as \`TERMINAL OUTPUT:\`.`
+        : ''}
 - Use ${!isWindows ? 'bash (macOS/Linux)' : 'PowerShell'}. Every script must be self-contained and ready to run as-is.
 - Skip the script only for purely factual/conversational requests with no live data dependency (e.g. "what is 2+2").

package/backend-dist/agent/agentServer.js CHANGED Viewed

@@ -171,7 +171,6 @@ async function runToolLoop(initialResult, session, sessionId, send, log, tools,
     return result;
 }
 const aiModel = (0, ai_client_1.getDefaultModel)(config_1.config.aiProvider, 'smart');
-const MAX_TURNS = 20;
 // ─── DB helpers ───────────────────────────────────────────────────────────────
 async function persistSessionToDB(sessionId, state) {
     try {
@@ -276,7 +275,7 @@ ${prompt}
     };
     // Persist immediately so that GET /sessions picks it up right away.
     try {
-        await agentSession_1.AgentSession.findOrCreate({
+        const [dbSession, created] = await agentSession_1.AgentSession.findOrCreate({
             where: { id: sessionId, subscriptionId: subscription.id },
             defaults: {
                 id: sessionId,
@@ -288,6 +287,25 @@ ${prompt}
                 lastActiveAt: new Date(),
             },
         });
+        if (!created) {
+            const history = JSON.parse(dbSession.historyJson || '[]');
+            const existingEntry = {
+                subscription,
+                history,
+                turns: dbSession.turns,
+            };
+            log.info('Reused existing agent session row from DB during create path', {
+                sessionId,
+                subscriptionId: subscription.id,
+                turns: existingEntry.turns,
+            });
+            return {
+                sessionState: existingEntry,
+                hasStoredPrompt: history
+                    .filter((h) => h.role === 'user')
+                    .some((h) => typeof h.content === 'string' && h.content.includes('<stored_instructions>')),
+            };
+        }
         // Prune oldest sessions after each creation so the cap is always respected.
         void enforceSessionCap(subscription.id, log);
     }
@@ -313,14 +331,6 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
         subscriptionId: subscription.id,
         turn: session.turns,
     });
-    const effectiveMaxTurns = options?.maxTurns ?? MAX_TURNS;
-    // On the final iteration, instruct the LLM to provide a consolidated answer.
-    if (session.turns === effectiveMaxTurns) {
-        (0, utils_1.pushToSessionHistory)(logger_1.logger, session, {
-            role: 'system',
-            content: 'Provide a single, final, concise answer based on the entire conversation so far. Wrap the answer in a <final_answer>...</final_answer> block and do not ask for further input or mention additional shell scripts to run. Do not include any <shell_script> block in this response.',
-        });
-    }
     // Append the client message as user content, marking terminal
     // output and errors in the text so the agent can reason about them.
     let userContent = clientMessage.content || '';
@@ -369,10 +379,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
             }
         }
     }
-    // On the final turn we omit tools so the model is forced to emit a
-    // plain text <final_answer> rather than issuing another tool call.
-    const isFinalTurn = session.turns >= effectiveMaxTurns;
-    const tools = isFinalTurn ? undefined : (0, utils_1.buildAvailableTools)();
+    const tools = (0, utils_1.buildAvailableTools)();
     const recordUsage = async (result) => {
         const usage = result.usage;
         if (!usage)
@@ -433,7 +440,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
         }
         // If the model requested web tool calls, execute them and get a follow-up
         // response before deciding what to send to the client.
-        if (!isFinalTurn && result.finish_reason === 'tool_calls') {
+        if (result.finish_reason === 'tool_calls') {
             log.info('Running web tool calls to gather information', {
                 sessionId,
                 subscriptionId: subscription.id,
@@ -488,6 +495,9 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
                             'No plain text. No other format.',
                         ].join('\n'),
                 });
+                // DB-only session state: persist before recursive handoff so the
+                // follow-up turn reads the latest history and turn count.
+                await persistSessionToDB(sessionId, session);
                 await runAgentTurnInternal(sessionId, subscription, {
                     sender: 'agent',
                     session_id: sessionId,
@@ -497,15 +507,9 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
                 return;
             }
         }
-        // Ensure that a proper <final_answer> block is produced for the
-        // desktop clients once we reach the final turn. If the model did
-        // not emit either a <shell_script> or <final_answer> tag on the
-        // MAX_TURNS turn, we treat this as the final natural-language answer
-        // and wrap it in <final_answer> tags so the client can stop
-        // waiting and paste the result.
         const hasShellScriptTag = content.includes('<shell_script>');
         const hasFinalAnswerTag = content.includes('<final_answer>');
-        if (hasShellScriptTag && !isFinalTurn) {
+        if (hasShellScriptTag) {
             log.info('Completed agent turn. Sending back scripts, waiting for results.', {
                 sessionId,
                 subscriptionId: subscription.id,
@@ -516,6 +520,10 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
                 role: 'assistant',
                 content,
             });
+            // Persist before sending so that if the send callback triggers a new
+            // runAgentTurn immediately (e.g. cron shell-script loop), the DB already
+            // has the updated turn count and history.
+            await persistSessionToDB(sessionId, session);
             send({
                 session_id: sessionId,
                 sender: 'agent',
@@ -525,8 +533,8 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
             });
             return;
         }
-        if (isFinalTurn || hasFinalAnswerTag) {
-            log.info('Finalizing agent session after max turns or final answer tag', {
+        if (hasFinalAnswerTag) {
+            log.info('Finalizing agent session after final answer tag', {
                 sessionId,
                 subscriptionId: subscription.id,
                 turns: session.turns,

package/backend-dist/index.js CHANGED Viewed

@@ -8,7 +8,6 @@ const cors_1 = __importDefault(require("cors"));
 const path_1 = __importDefault(require("path"));
 const fs_1 = __importDefault(require("fs"));
 const zlib_1 = __importDefault(require("zlib"));
-const child_process_1 = require("child_process");
 const subscriptionRoutes_1 = require("./subscriptionRoutes");
 const featureRoutes_1 = require("./featureRoutes");
 const db_1 = require("./db");
@@ -24,7 +23,6 @@ require("./models/scheduledJob");
 const bucket_adapter_1 = require("./bucket-adapter");
 const app = (0, express_1.default)();
 const PORT = Number(config_1.config.port);
-const IS_CRON_CHILD = process.env.OMNIKEY_CRON_CHILD === '1';
 app.set('trust proxy', 1);
 app.use((0, cors_1.default)());
 app.use(express_1.default.json());
@@ -167,38 +165,9 @@ app.get('*', (_req, res) => {
     res.sendFile(path_1.default.join(process.cwd(), 'public', 'index.html'));
 });
 let server = null;
-let cronChildProcess = null;
-function startCronChildProcess() {
-    if (IS_CRON_CHILD || cronChildProcess)
-        return;
-    const childPort = PORT + 1;
-    const entry = process.argv[1] || __filename;
-    cronChildProcess = (0, child_process_1.fork)(entry, [], {
-        env: {
-            ...process.env,
-            OMNIKEY_CRON_CHILD: '1',
-            OMNIKEY_PORT: String(childPort),
-        },
-        execArgv: process.execArgv,
-        stdio: 'inherit',
-    });
-    logger_1.logger.info('Spawned cron child process.', {
-        pid: cronChildProcess.pid,
-        port: childPort,
-    });
-    cronChildProcess.on('exit', (code, signal) => {
-        logger_1.logger.warn('Cron child process exited.', { code, signal });
-        cronChildProcess = null;
-    });
-}
 async function start() {
     try {
         await (0, db_1.initDatabase)(logger_1.logger);
-        if (IS_CRON_CHILD) {
-            logger_1.logger.info('Starting cron child process mode.', { port: PORT });
-            (0, scheduledJobExecutor_1.startScheduledJobExecutor)();
-            return;
-        }
         server = app.listen(PORT, () => {
             logger_1.logger.info(`Enhancer API listening on http://localhost:${PORT}`, {
                 isSelfHosted: config_1.config.isSelfHosted,
@@ -212,7 +181,7 @@ async function start() {
             (0, agentServer_1.attachAgentWebSocketServer)(server);
         }
         if (config_1.config.isSelfHosted) {
-            startCronChildProcess();
+            (0, scheduledJobExecutor_1.startScheduledJobExecutor)();
         }
     }
     catch (err) {
@@ -223,15 +192,6 @@ async function start() {
 start();
 function gracefulShutdown(signal) {
     logger_1.logger.info(`Received ${signal}. Starting graceful shutdown...`);
-    if (cronChildProcess) {
-        cronChildProcess.kill('SIGTERM');
-        cronChildProcess = null;
-    }
-    if (IS_CRON_CHILD) {
-        logger_1.logger.info('Cron child process exiting.');
-        process.exit(0);
-        return;
-    }
     if (!server) {
         logger_1.logger.info('Server was not started or already closed. Exiting process.');
         process.exit(0);

package/backend-dist/scheduledJobExecutor.js CHANGED Viewed

@@ -144,7 +144,7 @@ function runCronJob(job, subscription, sessionId) {
                         content: output,
                         is_terminal_output: true,
                         is_error: isError,
-                    }, send, logger_1.logger, { maxTurns: MAX_CRON_TURNS, isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
+                    }, send, logger_1.logger, { isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
                     return;
                 }
                 if (FINAL_ANSWER_RE.test(content)) {
@@ -158,7 +158,7 @@ function runCronJob(job, subscription, sessionId) {
             sender: 'user',
             content: job.prompt,
             platform: job.platform ?? undefined,
-        }, send, logger_1.logger, { maxTurns: MAX_CRON_TURNS, isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
+        }, send, logger_1.logger, { isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
     });
 }
 async function executeJob(job) {

package/package.json CHANGED Viewed

@@ -4,7 +4,7 @@
     "access": "public",
     "registry": "https://registry.npmjs.org/"
   },
-  "version": "1.0.36",
+  "version": "1.0.37",
   "description": "CLI for onboarding users to Omnikey AI and configuring OPENAI_API_KEY. Use Yarn for install/build.",
   "engines": {
     "node": ">=14.0.0",