omnikey-cli 1.0.36 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,28 +17,17 @@ ${hasTaskInstructions
17
17
  - Default to a \`<shell_script>\` for anything involving the machine, network, files, processes, env vars, or system state — never answer from training data alone.
18
18
  - **Read vs write:** For open-ended/ambiguous requests run safe read-only commands first to understand the current state. When the user **explicitly** asks to create, update, delete, configure, or run something — do it directly; no need to ask for confirmation unless the scope is genuinely unclear.
19
19
  - **Package installation:** Install any package required to complete the task. Include the install step as its own phase so you can confirm it succeeded before building on it. Prefer project-local or user scope; avoid \`sudo\`/admin unless the user explicitly asks.
20
- ${config_1.config.browserDebugPort !== undefined ? `- **Browser automation:** When the user explicitly asks to interact with a browser (click a button, fill a form, check a page, take a screenshot, etc.), generate \`<shell_script>\` blocks that use Node.js and \`playwright-core\` — one phase at a time (phasing rules below apply).
20
+ ${config_1.config.browserDebugPort !== undefined
21
+ ? `- **Browser automation:** Use browser automation proactively when needed to complete the task.
22
+ - Do NOT wait for explicit user wording like "use browser" if interaction is obviously required to get the final result.
23
+ - If \`web_search\` / \`web_fetch\` do not provide enough usable context (blocked pages, incomplete data, client-rendered content, auth walls, dynamic tables, hidden details, repeated low-value fetch results), immediately switch to Playwright-based browser interaction.
24
+ - Generate \`<shell_script>\` blocks that use Node.js and \`playwright-core\` — one phase at a time (phasing rules below apply).
21
25
  - **Phase 1 — ensure deps:** Check and install \`playwright-core\` if missing:
22
26
  \`node -e "require('/tmp/playwright-runner/node_modules/playwright-core')" 2>/dev/null || npm install --prefix /tmp/playwright-runner playwright-core --silent\`
23
- - **Phase 2 — connect & navigate:** Try CDP first; fall back to the existing debug profile. Reuse an open tab if the URL already matches never open a duplicate.
24
- \`\`\`js
25
- const { chromium } = require('/tmp/playwright-runner/node_modules/playwright-core');
26
- let browser, page;
27
- try {
28
- browser = await chromium.connectOverCDP('http://localhost:${config_1.config.browserDebugPort}');
29
- const pages = browser.contexts().flatMap(c => c.pages());
30
- page = pages.find(p => p.url().startsWith(TARGET_URL)) ?? null;
31
- if (page) { await page.bringToFront(); }
32
- else { page = await browser.contexts()[0].newPage(); await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded', timeout: 15000 }); }
33
- } catch {
34
- const ctx = await chromium.launchPersistentContext('${config_1.config.browserDebugUserDataDir}', { executablePath: '${config_1.config.browserDebugExecutable}', headless: false });
35
- browser = ctx;
36
- page = ctx.pages().find(p => p.url().startsWith(TARGET_URL)) ?? await ctx.newPage();
37
- if (!page.url().startsWith(TARGET_URL)) await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
38
- }
39
- \`\`\`
40
- - **Phase 3+ — one action per script:** Each subsequent script reconnects the same way, finds the already-open tab, performs exactly one action (click / type / select / screenshot / read text), prints the result, then calls \`browser.disconnect()\` (CDP) or just exits (profile launch — leaves the window open).
41
- - Always inline Node.js via a bash heredoc so the script is self-contained. Print structured output to stdout so it returns as \`TERMINAL OUTPUT:\`.` : ''}
27
+ - **Phase 2 — connect & navigate:** Connect to the running browser via CDP at \`http://localhost:${config_1.config.browserDebugPort}\`. If CDP fails, fall back to launching a persistent context using the debug profile at \`${config_1.config.browserDebugUserDataDir}\` with the executable at \`${config_1.config.browserDebugExecutable}\` (headless: false). Once connected, navigate to any URL required by the task — open any page needed, reusing an existing tab if the URL already matches or creating a new one if not. There is no restriction on which sites or pages you can visit; open whatever is necessary to complete the task.
28
+ - **Phase 3+ — one action per script:** Each subsequent script reconnects via the same CDP endpoint (\`http://localhost:${config_1.config.browserDebugPort}\`) or profile fallback, finds the already-open tab (or reopens it), performs exactly one action (click, type, select, scroll, screenshot, read text, extract data, fill forms, etc.), prints the result to stdout, then calls \`browser.disconnect()\` (CDP) or exits (profile launch). You may perform any interaction the task requires — reading content, extracting structured data, submitting forms, navigating between pages, or capturing screenshots.
29
+ - Always inline Node.js via a bash heredoc so the script is self-contained. Print structured output to stdout so it returns as \`TERMINAL OUTPUT:\`.`
30
+ : ''}
42
31
  - Use ${!isWindows ? 'bash (macOS/Linux)' : 'PowerShell'}. Every script must be self-contained and ready to run as-is.
43
32
  - Skip the script only for purely factual/conversational requests with no live data dependency (e.g. "what is 2+2").
44
33
 
@@ -171,7 +171,6 @@ async function runToolLoop(initialResult, session, sessionId, send, log, tools,
171
171
  return result;
172
172
  }
173
173
  const aiModel = (0, ai_client_1.getDefaultModel)(config_1.config.aiProvider, 'smart');
174
- const MAX_TURNS = 20;
175
174
  // ─── DB helpers ───────────────────────────────────────────────────────────────
176
175
  async function persistSessionToDB(sessionId, state) {
177
176
  try {
@@ -276,7 +275,7 @@ ${prompt}
276
275
  };
277
276
  // Persist immediately so that GET /sessions picks it up right away.
278
277
  try {
279
- await agentSession_1.AgentSession.findOrCreate({
278
+ const [dbSession, created] = await agentSession_1.AgentSession.findOrCreate({
280
279
  where: { id: sessionId, subscriptionId: subscription.id },
281
280
  defaults: {
282
281
  id: sessionId,
@@ -288,6 +287,25 @@ ${prompt}
288
287
  lastActiveAt: new Date(),
289
288
  },
290
289
  });
290
+ if (!created) {
291
+ const history = JSON.parse(dbSession.historyJson || '[]');
292
+ const existingEntry = {
293
+ subscription,
294
+ history,
295
+ turns: dbSession.turns,
296
+ };
297
+ log.info('Reused existing agent session row from DB during create path', {
298
+ sessionId,
299
+ subscriptionId: subscription.id,
300
+ turns: existingEntry.turns,
301
+ });
302
+ return {
303
+ sessionState: existingEntry,
304
+ hasStoredPrompt: history
305
+ .filter((h) => h.role === 'user')
306
+ .some((h) => typeof h.content === 'string' && h.content.includes('<stored_instructions>')),
307
+ };
308
+ }
291
309
  // Prune oldest sessions after each creation so the cap is always respected.
292
310
  void enforceSessionCap(subscription.id, log);
293
311
  }
@@ -313,14 +331,6 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
313
331
  subscriptionId: subscription.id,
314
332
  turn: session.turns,
315
333
  });
316
- const effectiveMaxTurns = options?.maxTurns ?? MAX_TURNS;
317
- // On the final iteration, instruct the LLM to provide a consolidated answer.
318
- if (session.turns === effectiveMaxTurns) {
319
- (0, utils_1.pushToSessionHistory)(logger_1.logger, session, {
320
- role: 'system',
321
- content: 'Provide a single, final, concise answer based on the entire conversation so far. Wrap the answer in a <final_answer>...</final_answer> block and do not ask for further input or mention additional shell scripts to run. Do not include any <shell_script> block in this response.',
322
- });
323
- }
324
334
  // Append the client message as user content, marking terminal
325
335
  // output and errors in the text so the agent can reason about them.
326
336
  let userContent = clientMessage.content || '';
@@ -369,10 +379,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
369
379
  }
370
380
  }
371
381
  }
372
- // On the final turn we omit tools so the model is forced to emit a
373
- // plain text <final_answer> rather than issuing another tool call.
374
- const isFinalTurn = session.turns >= effectiveMaxTurns;
375
- const tools = isFinalTurn ? undefined : (0, utils_1.buildAvailableTools)();
382
+ const tools = (0, utils_1.buildAvailableTools)();
376
383
  const recordUsage = async (result) => {
377
384
  const usage = result.usage;
378
385
  if (!usage)
@@ -433,7 +440,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
433
440
  }
434
441
  // If the model requested web tool calls, execute them and get a follow-up
435
442
  // response before deciding what to send to the client.
436
- if (!isFinalTurn && result.finish_reason === 'tool_calls') {
443
+ if (result.finish_reason === 'tool_calls') {
437
444
  log.info('Running web tool calls to gather information', {
438
445
  sessionId,
439
446
  subscriptionId: subscription.id,
@@ -488,6 +495,9 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
488
495
  'No plain text. No other format.',
489
496
  ].join('\n'),
490
497
  });
498
+ // DB-only session state: persist before recursive handoff so the
499
+ // follow-up turn reads the latest history and turn count.
500
+ await persistSessionToDB(sessionId, session);
491
501
  await runAgentTurnInternal(sessionId, subscription, {
492
502
  sender: 'agent',
493
503
  session_id: sessionId,
@@ -497,15 +507,9 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
497
507
  return;
498
508
  }
499
509
  }
500
- // Ensure that a proper <final_answer> block is produced for the
501
- // desktop clients once we reach the final turn. If the model did
502
- // not emit either a <shell_script> or <final_answer> tag on the
503
- // MAX_TURNS turn, we treat this as the final natural-language answer
504
- // and wrap it in <final_answer> tags so the client can stop
505
- // waiting and paste the result.
506
510
  const hasShellScriptTag = content.includes('<shell_script>');
507
511
  const hasFinalAnswerTag = content.includes('<final_answer>');
508
- if (hasShellScriptTag && !isFinalTurn) {
512
+ if (hasShellScriptTag) {
509
513
  log.info('Completed agent turn. Sending back scripts, waiting for results.', {
510
514
  sessionId,
511
515
  subscriptionId: subscription.id,
@@ -516,6 +520,10 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
516
520
  role: 'assistant',
517
521
  content,
518
522
  });
523
+ // Persist before sending so that if the send callback triggers a new
524
+ // runAgentTurn immediately (e.g. cron shell-script loop), the DB already
525
+ // has the updated turn count and history.
526
+ await persistSessionToDB(sessionId, session);
519
527
  send({
520
528
  session_id: sessionId,
521
529
  sender: 'agent',
@@ -525,8 +533,8 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
525
533
  });
526
534
  return;
527
535
  }
528
- if (isFinalTurn || hasFinalAnswerTag) {
529
- log.info('Finalizing agent session after max turns or final answer tag', {
536
+ if (hasFinalAnswerTag) {
537
+ log.info('Finalizing agent session after final answer tag', {
530
538
  sessionId,
531
539
  subscriptionId: subscription.id,
532
540
  turns: session.turns,
@@ -8,7 +8,6 @@ const cors_1 = __importDefault(require("cors"));
8
8
  const path_1 = __importDefault(require("path"));
9
9
  const fs_1 = __importDefault(require("fs"));
10
10
  const zlib_1 = __importDefault(require("zlib"));
11
- const child_process_1 = require("child_process");
12
11
  const subscriptionRoutes_1 = require("./subscriptionRoutes");
13
12
  const featureRoutes_1 = require("./featureRoutes");
14
13
  const db_1 = require("./db");
@@ -24,7 +23,6 @@ require("./models/scheduledJob");
24
23
  const bucket_adapter_1 = require("./bucket-adapter");
25
24
  const app = (0, express_1.default)();
26
25
  const PORT = Number(config_1.config.port);
27
- const IS_CRON_CHILD = process.env.OMNIKEY_CRON_CHILD === '1';
28
26
  app.set('trust proxy', 1);
29
27
  app.use((0, cors_1.default)());
30
28
  app.use(express_1.default.json());
@@ -167,38 +165,9 @@ app.get('*', (_req, res) => {
167
165
  res.sendFile(path_1.default.join(process.cwd(), 'public', 'index.html'));
168
166
  });
169
167
  let server = null;
170
- let cronChildProcess = null;
171
- function startCronChildProcess() {
172
- if (IS_CRON_CHILD || cronChildProcess)
173
- return;
174
- const childPort = PORT + 1;
175
- const entry = process.argv[1] || __filename;
176
- cronChildProcess = (0, child_process_1.fork)(entry, [], {
177
- env: {
178
- ...process.env,
179
- OMNIKEY_CRON_CHILD: '1',
180
- OMNIKEY_PORT: String(childPort),
181
- },
182
- execArgv: process.execArgv,
183
- stdio: 'inherit',
184
- });
185
- logger_1.logger.info('Spawned cron child process.', {
186
- pid: cronChildProcess.pid,
187
- port: childPort,
188
- });
189
- cronChildProcess.on('exit', (code, signal) => {
190
- logger_1.logger.warn('Cron child process exited.', { code, signal });
191
- cronChildProcess = null;
192
- });
193
- }
194
168
  async function start() {
195
169
  try {
196
170
  await (0, db_1.initDatabase)(logger_1.logger);
197
- if (IS_CRON_CHILD) {
198
- logger_1.logger.info('Starting cron child process mode.', { port: PORT });
199
- (0, scheduledJobExecutor_1.startScheduledJobExecutor)();
200
- return;
201
- }
202
171
  server = app.listen(PORT, () => {
203
172
  logger_1.logger.info(`Enhancer API listening on http://localhost:${PORT}`, {
204
173
  isSelfHosted: config_1.config.isSelfHosted,
@@ -212,7 +181,7 @@ async function start() {
212
181
  (0, agentServer_1.attachAgentWebSocketServer)(server);
213
182
  }
214
183
  if (config_1.config.isSelfHosted) {
215
- startCronChildProcess();
184
+ (0, scheduledJobExecutor_1.startScheduledJobExecutor)();
216
185
  }
217
186
  }
218
187
  catch (err) {
@@ -223,15 +192,6 @@ async function start() {
223
192
  start();
224
193
  function gracefulShutdown(signal) {
225
194
  logger_1.logger.info(`Received ${signal}. Starting graceful shutdown...`);
226
- if (cronChildProcess) {
227
- cronChildProcess.kill('SIGTERM');
228
- cronChildProcess = null;
229
- }
230
- if (IS_CRON_CHILD) {
231
- logger_1.logger.info('Cron child process exiting.');
232
- process.exit(0);
233
- return;
234
- }
235
195
  if (!server) {
236
196
  logger_1.logger.info('Server was not started or already closed. Exiting process.');
237
197
  process.exit(0);
@@ -144,7 +144,7 @@ function runCronJob(job, subscription, sessionId) {
144
144
  content: output,
145
145
  is_terminal_output: true,
146
146
  is_error: isError,
147
- }, send, logger_1.logger, { maxTurns: MAX_CRON_TURNS, isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
147
+ }, send, logger_1.logger, { isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
148
148
  return;
149
149
  }
150
150
  if (FINAL_ANSWER_RE.test(content)) {
@@ -158,7 +158,7 @@ function runCronJob(job, subscription, sessionId) {
158
158
  sender: 'user',
159
159
  content: job.prompt,
160
160
  platform: job.platform ?? undefined,
161
- }, send, logger_1.logger, { maxTurns: MAX_CRON_TURNS, isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
161
+ }, send, logger_1.logger, { isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
162
162
  });
163
163
  }
164
164
  async function executeJob(job) {
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "access": "public",
5
5
  "registry": "https://registry.npmjs.org/"
6
6
  },
7
- "version": "1.0.36",
7
+ "version": "1.0.37",
8
8
  "description": "CLI for onboarding users to Omnikey AI and configuring OPENAI_API_KEY. Use Yarn for install/build.",
9
9
  "engines": {
10
10
  "node": ">=14.0.0",