omnikey-cli 1.0.36 → 1.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -17,28 +17,17 @@ ${hasTaskInstructions
|
|
|
17
17
|
- Default to a \`<shell_script>\` for anything involving the machine, network, files, processes, env vars, or system state — never answer from training data alone.
|
|
18
18
|
- **Read vs write:** For open-ended/ambiguous requests run safe read-only commands first to understand the current state. When the user **explicitly** asks to create, update, delete, configure, or run something — do it directly; no need to ask for confirmation unless the scope is genuinely unclear.
|
|
19
19
|
- **Package installation:** Install any package required to complete the task. Include the install step as its own phase so you can confirm it succeeded before building on it. Prefer project-local or user scope; avoid \`sudo\`/admin unless the user explicitly asks.
|
|
20
|
-
${config_1.config.browserDebugPort !== undefined
|
|
20
|
+
${config_1.config.browserDebugPort !== undefined
|
|
21
|
+
? `- **Browser automation:** Use browser automation proactively when needed to complete the task.
|
|
22
|
+
- Do NOT wait for explicit user wording like "use browser" if interaction is obviously required to get the final result.
|
|
23
|
+
- If \`web_search\` / \`web_fetch\` do not provide enough usable context (blocked pages, incomplete data, client-rendered content, auth walls, dynamic tables, hidden details, repeated low-value fetch results), immediately switch to Playwright-based browser interaction.
|
|
24
|
+
- Generate \`<shell_script>\` blocks that use Node.js and \`playwright-core\` — one phase at a time (phasing rules below apply).
|
|
21
25
|
- **Phase 1 — ensure deps:** Check and install \`playwright-core\` if missing:
|
|
22
26
|
\`node -e "require('/tmp/playwright-runner/node_modules/playwright-core')" 2>/dev/null || npm install --prefix /tmp/playwright-runner playwright-core --silent\`
|
|
23
|
-
- **Phase 2 — connect & navigate:**
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
try {
|
|
28
|
-
browser = await chromium.connectOverCDP('http://localhost:${config_1.config.browserDebugPort}');
|
|
29
|
-
const pages = browser.contexts().flatMap(c => c.pages());
|
|
30
|
-
page = pages.find(p => p.url().startsWith(TARGET_URL)) ?? null;
|
|
31
|
-
if (page) { await page.bringToFront(); }
|
|
32
|
-
else { page = await browser.contexts()[0].newPage(); await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded', timeout: 15000 }); }
|
|
33
|
-
} catch {
|
|
34
|
-
const ctx = await chromium.launchPersistentContext('${config_1.config.browserDebugUserDataDir}', { executablePath: '${config_1.config.browserDebugExecutable}', headless: false });
|
|
35
|
-
browser = ctx;
|
|
36
|
-
page = ctx.pages().find(p => p.url().startsWith(TARGET_URL)) ?? await ctx.newPage();
|
|
37
|
-
if (!page.url().startsWith(TARGET_URL)) await page.goto(TARGET_URL, { waitUntil: 'domcontentloaded', timeout: 15000 });
|
|
38
|
-
}
|
|
39
|
-
\`\`\`
|
|
40
|
-
- **Phase 3+ — one action per script:** Each subsequent script reconnects the same way, finds the already-open tab, performs exactly one action (click / type / select / screenshot / read text), prints the result, then calls \`browser.disconnect()\` (CDP) or just exits (profile launch — leaves the window open).
|
|
41
|
-
- Always inline Node.js via a bash heredoc so the script is self-contained. Print structured output to stdout so it returns as \`TERMINAL OUTPUT:\`.` : ''}
|
|
27
|
+
- **Phase 2 — connect & navigate:** Connect to the running browser via CDP at \`http://localhost:${config_1.config.browserDebugPort}\`. If CDP fails, fall back to launching a persistent context using the debug profile at \`${config_1.config.browserDebugUserDataDir}\` with the executable at \`${config_1.config.browserDebugExecutable}\` (headless: false). Once connected, navigate to any URL required by the task — open any page needed, reusing an existing tab if the URL already matches or creating a new one if not. There is no restriction on which sites or pages you can visit; open whatever is necessary to complete the task.
|
|
28
|
+
- **Phase 3+ — one action per script:** Each subsequent script reconnects via the same CDP endpoint (\`http://localhost:${config_1.config.browserDebugPort}\`) or profile fallback, finds the already-open tab (or reopens it), performs exactly one action (click, type, select, scroll, screenshot, read text, extract data, fill forms, etc.), prints the result to stdout, then calls \`browser.disconnect()\` (CDP) or exits (profile launch). You may perform any interaction the task requires — reading content, extracting structured data, submitting forms, navigating between pages, or capturing screenshots.
|
|
29
|
+
- Always inline Node.js via a bash heredoc so the script is self-contained. Print structured output to stdout so it returns as \`TERMINAL OUTPUT:\`.`
|
|
30
|
+
: ''}
|
|
42
31
|
- Use ${!isWindows ? 'bash (macOS/Linux)' : 'PowerShell'}. Every script must be self-contained and ready to run as-is.
|
|
43
32
|
- Skip the script only for purely factual/conversational requests with no live data dependency (e.g. "what is 2+2").
|
|
44
33
|
|
|
@@ -171,7 +171,6 @@ async function runToolLoop(initialResult, session, sessionId, send, log, tools,
|
|
|
171
171
|
return result;
|
|
172
172
|
}
|
|
173
173
|
const aiModel = (0, ai_client_1.getDefaultModel)(config_1.config.aiProvider, 'smart');
|
|
174
|
-
const MAX_TURNS = 20;
|
|
175
174
|
// ─── DB helpers ───────────────────────────────────────────────────────────────
|
|
176
175
|
async function persistSessionToDB(sessionId, state) {
|
|
177
176
|
try {
|
|
@@ -276,7 +275,7 @@ ${prompt}
|
|
|
276
275
|
};
|
|
277
276
|
// Persist immediately so that GET /sessions picks it up right away.
|
|
278
277
|
try {
|
|
279
|
-
await agentSession_1.AgentSession.findOrCreate({
|
|
278
|
+
const [dbSession, created] = await agentSession_1.AgentSession.findOrCreate({
|
|
280
279
|
where: { id: sessionId, subscriptionId: subscription.id },
|
|
281
280
|
defaults: {
|
|
282
281
|
id: sessionId,
|
|
@@ -288,6 +287,25 @@ ${prompt}
|
|
|
288
287
|
lastActiveAt: new Date(),
|
|
289
288
|
},
|
|
290
289
|
});
|
|
290
|
+
if (!created) {
|
|
291
|
+
const history = JSON.parse(dbSession.historyJson || '[]');
|
|
292
|
+
const existingEntry = {
|
|
293
|
+
subscription,
|
|
294
|
+
history,
|
|
295
|
+
turns: dbSession.turns,
|
|
296
|
+
};
|
|
297
|
+
log.info('Reused existing agent session row from DB during create path', {
|
|
298
|
+
sessionId,
|
|
299
|
+
subscriptionId: subscription.id,
|
|
300
|
+
turns: existingEntry.turns,
|
|
301
|
+
});
|
|
302
|
+
return {
|
|
303
|
+
sessionState: existingEntry,
|
|
304
|
+
hasStoredPrompt: history
|
|
305
|
+
.filter((h) => h.role === 'user')
|
|
306
|
+
.some((h) => typeof h.content === 'string' && h.content.includes('<stored_instructions>')),
|
|
307
|
+
};
|
|
308
|
+
}
|
|
291
309
|
// Prune oldest sessions after each creation so the cap is always respected.
|
|
292
310
|
void enforceSessionCap(subscription.id, log);
|
|
293
311
|
}
|
|
@@ -313,14 +331,6 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
|
|
|
313
331
|
subscriptionId: subscription.id,
|
|
314
332
|
turn: session.turns,
|
|
315
333
|
});
|
|
316
|
-
const effectiveMaxTurns = options?.maxTurns ?? MAX_TURNS;
|
|
317
|
-
// On the final iteration, instruct the LLM to provide a consolidated answer.
|
|
318
|
-
if (session.turns === effectiveMaxTurns) {
|
|
319
|
-
(0, utils_1.pushToSessionHistory)(logger_1.logger, session, {
|
|
320
|
-
role: 'system',
|
|
321
|
-
content: 'Provide a single, final, concise answer based on the entire conversation so far. Wrap the answer in a <final_answer>...</final_answer> block and do not ask for further input or mention additional shell scripts to run. Do not include any <shell_script> block in this response.',
|
|
322
|
-
});
|
|
323
|
-
}
|
|
324
334
|
// Append the client message as user content, marking terminal
|
|
325
335
|
// output and errors in the text so the agent can reason about them.
|
|
326
336
|
let userContent = clientMessage.content || '';
|
|
@@ -369,10 +379,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
|
|
|
369
379
|
}
|
|
370
380
|
}
|
|
371
381
|
}
|
|
372
|
-
|
|
373
|
-
// plain text <final_answer> rather than issuing another tool call.
|
|
374
|
-
const isFinalTurn = session.turns >= effectiveMaxTurns;
|
|
375
|
-
const tools = isFinalTurn ? undefined : (0, utils_1.buildAvailableTools)();
|
|
382
|
+
const tools = (0, utils_1.buildAvailableTools)();
|
|
376
383
|
const recordUsage = async (result) => {
|
|
377
384
|
const usage = result.usage;
|
|
378
385
|
if (!usage)
|
|
@@ -433,7 +440,7 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
|
|
|
433
440
|
}
|
|
434
441
|
// If the model requested web tool calls, execute them and get a follow-up
|
|
435
442
|
// response before deciding what to send to the client.
|
|
436
|
-
if (
|
|
443
|
+
if (result.finish_reason === 'tool_calls') {
|
|
437
444
|
log.info('Running web tool calls to gather information', {
|
|
438
445
|
sessionId,
|
|
439
446
|
subscriptionId: subscription.id,
|
|
@@ -488,6 +495,9 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
|
|
|
488
495
|
'No plain text. No other format.',
|
|
489
496
|
].join('\n'),
|
|
490
497
|
});
|
|
498
|
+
// DB-only session state: persist before recursive handoff so the
|
|
499
|
+
// follow-up turn reads the latest history and turn count.
|
|
500
|
+
await persistSessionToDB(sessionId, session);
|
|
491
501
|
await runAgentTurnInternal(sessionId, subscription, {
|
|
492
502
|
sender: 'agent',
|
|
493
503
|
session_id: sessionId,
|
|
@@ -497,15 +507,9 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
|
|
|
497
507
|
return;
|
|
498
508
|
}
|
|
499
509
|
}
|
|
500
|
-
// Ensure that a proper <final_answer> block is produced for the
|
|
501
|
-
// desktop clients once we reach the final turn. If the model did
|
|
502
|
-
// not emit either a <shell_script> or <final_answer> tag on the
|
|
503
|
-
// MAX_TURNS turn, we treat this as the final natural-language answer
|
|
504
|
-
// and wrap it in <final_answer> tags so the client can stop
|
|
505
|
-
// waiting and paste the result.
|
|
506
510
|
const hasShellScriptTag = content.includes('<shell_script>');
|
|
507
511
|
const hasFinalAnswerTag = content.includes('<final_answer>');
|
|
508
|
-
if (hasShellScriptTag
|
|
512
|
+
if (hasShellScriptTag) {
|
|
509
513
|
log.info('Completed agent turn. Sending back scripts, waiting for results.', {
|
|
510
514
|
sessionId,
|
|
511
515
|
subscriptionId: subscription.id,
|
|
@@ -516,6 +520,10 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
|
|
|
516
520
|
role: 'assistant',
|
|
517
521
|
content,
|
|
518
522
|
});
|
|
523
|
+
// Persist before sending so that if the send callback triggers a new
|
|
524
|
+
// runAgentTurn immediately (e.g. cron shell-script loop), the DB already
|
|
525
|
+
// has the updated turn count and history.
|
|
526
|
+
await persistSessionToDB(sessionId, session);
|
|
519
527
|
send({
|
|
520
528
|
session_id: sessionId,
|
|
521
529
|
sender: 'agent',
|
|
@@ -525,8 +533,8 @@ async function runAgentTurnInternal(sessionId, subscription, clientMessage, send
|
|
|
525
533
|
});
|
|
526
534
|
return;
|
|
527
535
|
}
|
|
528
|
-
if (
|
|
529
|
-
log.info('Finalizing agent session after
|
|
536
|
+
if (hasFinalAnswerTag) {
|
|
537
|
+
log.info('Finalizing agent session after final answer tag', {
|
|
530
538
|
sessionId,
|
|
531
539
|
subscriptionId: subscription.id,
|
|
532
540
|
turns: session.turns,
|
package/backend-dist/index.js
CHANGED
|
@@ -8,7 +8,6 @@ const cors_1 = __importDefault(require("cors"));
|
|
|
8
8
|
const path_1 = __importDefault(require("path"));
|
|
9
9
|
const fs_1 = __importDefault(require("fs"));
|
|
10
10
|
const zlib_1 = __importDefault(require("zlib"));
|
|
11
|
-
const child_process_1 = require("child_process");
|
|
12
11
|
const subscriptionRoutes_1 = require("./subscriptionRoutes");
|
|
13
12
|
const featureRoutes_1 = require("./featureRoutes");
|
|
14
13
|
const db_1 = require("./db");
|
|
@@ -24,7 +23,6 @@ require("./models/scheduledJob");
|
|
|
24
23
|
const bucket_adapter_1 = require("./bucket-adapter");
|
|
25
24
|
const app = (0, express_1.default)();
|
|
26
25
|
const PORT = Number(config_1.config.port);
|
|
27
|
-
const IS_CRON_CHILD = process.env.OMNIKEY_CRON_CHILD === '1';
|
|
28
26
|
app.set('trust proxy', 1);
|
|
29
27
|
app.use((0, cors_1.default)());
|
|
30
28
|
app.use(express_1.default.json());
|
|
@@ -167,38 +165,9 @@ app.get('*', (_req, res) => {
|
|
|
167
165
|
res.sendFile(path_1.default.join(process.cwd(), 'public', 'index.html'));
|
|
168
166
|
});
|
|
169
167
|
let server = null;
|
|
170
|
-
let cronChildProcess = null;
|
|
171
|
-
function startCronChildProcess() {
|
|
172
|
-
if (IS_CRON_CHILD || cronChildProcess)
|
|
173
|
-
return;
|
|
174
|
-
const childPort = PORT + 1;
|
|
175
|
-
const entry = process.argv[1] || __filename;
|
|
176
|
-
cronChildProcess = (0, child_process_1.fork)(entry, [], {
|
|
177
|
-
env: {
|
|
178
|
-
...process.env,
|
|
179
|
-
OMNIKEY_CRON_CHILD: '1',
|
|
180
|
-
OMNIKEY_PORT: String(childPort),
|
|
181
|
-
},
|
|
182
|
-
execArgv: process.execArgv,
|
|
183
|
-
stdio: 'inherit',
|
|
184
|
-
});
|
|
185
|
-
logger_1.logger.info('Spawned cron child process.', {
|
|
186
|
-
pid: cronChildProcess.pid,
|
|
187
|
-
port: childPort,
|
|
188
|
-
});
|
|
189
|
-
cronChildProcess.on('exit', (code, signal) => {
|
|
190
|
-
logger_1.logger.warn('Cron child process exited.', { code, signal });
|
|
191
|
-
cronChildProcess = null;
|
|
192
|
-
});
|
|
193
|
-
}
|
|
194
168
|
async function start() {
|
|
195
169
|
try {
|
|
196
170
|
await (0, db_1.initDatabase)(logger_1.logger);
|
|
197
|
-
if (IS_CRON_CHILD) {
|
|
198
|
-
logger_1.logger.info('Starting cron child process mode.', { port: PORT });
|
|
199
|
-
(0, scheduledJobExecutor_1.startScheduledJobExecutor)();
|
|
200
|
-
return;
|
|
201
|
-
}
|
|
202
171
|
server = app.listen(PORT, () => {
|
|
203
172
|
logger_1.logger.info(`Enhancer API listening on http://localhost:${PORT}`, {
|
|
204
173
|
isSelfHosted: config_1.config.isSelfHosted,
|
|
@@ -212,7 +181,7 @@ async function start() {
|
|
|
212
181
|
(0, agentServer_1.attachAgentWebSocketServer)(server);
|
|
213
182
|
}
|
|
214
183
|
if (config_1.config.isSelfHosted) {
|
|
215
|
-
|
|
184
|
+
(0, scheduledJobExecutor_1.startScheduledJobExecutor)();
|
|
216
185
|
}
|
|
217
186
|
}
|
|
218
187
|
catch (err) {
|
|
@@ -223,15 +192,6 @@ async function start() {
|
|
|
223
192
|
start();
|
|
224
193
|
function gracefulShutdown(signal) {
|
|
225
194
|
logger_1.logger.info(`Received ${signal}. Starting graceful shutdown...`);
|
|
226
|
-
if (cronChildProcess) {
|
|
227
|
-
cronChildProcess.kill('SIGTERM');
|
|
228
|
-
cronChildProcess = null;
|
|
229
|
-
}
|
|
230
|
-
if (IS_CRON_CHILD) {
|
|
231
|
-
logger_1.logger.info('Cron child process exiting.');
|
|
232
|
-
process.exit(0);
|
|
233
|
-
return;
|
|
234
|
-
}
|
|
235
195
|
if (!server) {
|
|
236
196
|
logger_1.logger.info('Server was not started or already closed. Exiting process.');
|
|
237
197
|
process.exit(0);
|
|
@@ -144,7 +144,7 @@ function runCronJob(job, subscription, sessionId) {
|
|
|
144
144
|
content: output,
|
|
145
145
|
is_terminal_output: true,
|
|
146
146
|
is_error: isError,
|
|
147
|
-
}, send, logger_1.logger, {
|
|
147
|
+
}, send, logger_1.logger, { isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
|
|
148
148
|
return;
|
|
149
149
|
}
|
|
150
150
|
if (FINAL_ANSWER_RE.test(content)) {
|
|
@@ -158,7 +158,7 @@ function runCronJob(job, subscription, sessionId) {
|
|
|
158
158
|
sender: 'user',
|
|
159
159
|
content: job.prompt,
|
|
160
160
|
platform: job.platform ?? undefined,
|
|
161
|
-
}, send, logger_1.logger, {
|
|
161
|
+
}, send, logger_1.logger, { isCronJob: true }).catch((err) => settle(err instanceof Error ? err : new Error(String(err))));
|
|
162
162
|
});
|
|
163
163
|
}
|
|
164
164
|
async function executeJob(job) {
|
package/package.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"access": "public",
|
|
5
5
|
"registry": "https://registry.npmjs.org/"
|
|
6
6
|
},
|
|
7
|
-
"version": "1.0.
|
|
7
|
+
"version": "1.0.37",
|
|
8
8
|
"description": "CLI for onboarding users to Omnikey AI and configuring OPENAI_API_KEY. Use Yarn for install/build.",
|
|
9
9
|
"engines": {
|
|
10
10
|
"node": ">=14.0.0",
|