omnikey-cli 1.0.28 → 1.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +68 -19
- package/backend-dist/agent/agentPrompts.js +6 -1
- package/backend-dist/agent/agentServer.js +47 -17
- package/backend-dist/agent/imageTool.js +167 -0
- package/backend-dist/agent/utils.js +13 -1
- package/backend-dist/ai-client.js +79 -0
- package/backend-dist/index.js +13 -5
- package/backend-dist/models/scheduledJob.js +97 -0
- package/backend-dist/scheduledJobExecutor.js +199 -0
- package/backend-dist/scheduledJobRoutes.js +186 -0
- package/dist/index.js +20 -0
- package/dist/onboard.js +7 -6
- package/dist/scheduleJob.js +268 -0
- package/package.json +1 -1
- package/src/index.ts +25 -0
- package/src/onboard.ts +8 -6
- package/src/scheduleJob.ts +309 -0
package/README.md
CHANGED
|
@@ -19,6 +19,7 @@ OmnikeyAI is a productivity tool that helps you quickly rewrite selected text us
|
|
|
19
19
|
- Accepts CLI flags for non-interactive setup.
|
|
20
20
|
- Configure and run the backend daemon — persisted across reboots on both macOS and Windows.
|
|
21
21
|
- `omnikey grant-browser-access`: One-time setup to give Omnikey access to authenticated browser tabs for web fetch.
|
|
22
|
+
- Scheduled Jobs commands to create, list, delete, and trigger jobs from the CLI.
|
|
22
23
|
|
|
23
24
|
## Usage
|
|
24
25
|
|
|
@@ -64,23 +65,71 @@ omnikey grant-browser-access
|
|
|
64
65
|
|
|
65
66
|
# Reopen the browser with its saved Omnikey debug profile at any time
|
|
66
67
|
omnikey browser open
|
|
68
|
+
|
|
69
|
+
# Add a scheduled job (interactive)
|
|
70
|
+
omnikey schedule add
|
|
71
|
+
|
|
72
|
+
# List scheduled jobs
|
|
73
|
+
omnikey schedule list
|
|
74
|
+
|
|
75
|
+
# Remove a scheduled job (interactive select)
|
|
76
|
+
omnikey schedule remove
|
|
77
|
+
|
|
78
|
+
# Trigger a scheduled job immediately by ID
|
|
79
|
+
omnikey schedule run-now <job-id>
|
|
67
80
|
```
|
|
68
81
|
|
|
69
82
|
### Command reference
|
|
70
83
|
|
|
71
|
-
| Command
|
|
72
|
-
|
|
73
|
-
| `omnikey onboard`
|
|
74
|
-
| `omnikey daemon [--port]`
|
|
75
|
-
| `omnikey kill-daemon`
|
|
76
|
-
| `omnikey restart-daemon [--port]`
|
|
77
|
-
| `omnikey config`
|
|
78
|
-
| `omnikey set <key> <value>`
|
|
79
|
-
| `omnikey remove-config [--db]`
|
|
80
|
-
| `omnikey status`
|
|
81
|
-
| `omnikey logs [--lines N] [--errors]` | Tail daemon logs
|
|
82
|
-
| `omnikey grant-browser-access`
|
|
83
|
-
| `omnikey browser open`
|
|
84
|
+
| Command | Description |
|
|
85
|
+
| ------------------------------------- | ------------------------------------------------------------------------- |
|
|
86
|
+
| `omnikey onboard` | Interactive setup for LLM provider and web search |
|
|
87
|
+
| `omnikey daemon [--port]` | Start the backend daemon (default port: 7071) |
|
|
88
|
+
| `omnikey kill-daemon` | Stop the running daemon |
|
|
89
|
+
| `omnikey restart-daemon [--port]` | Kill and restart the daemon |
|
|
90
|
+
| `omnikey config` | Display current config with masked API keys |
|
|
91
|
+
| `omnikey set <key> <value>` | Update a single config value |
|
|
92
|
+
| `omnikey remove-config [--db]` | Remove config files; add `--db` to also delete the database |
|
|
93
|
+
| `omnikey status` | Show what process is using the daemon port |
|
|
94
|
+
| `omnikey logs [--lines N] [--errors]` | Tail daemon logs |
|
|
95
|
+
| `omnikey grant-browser-access` | Set up authenticated browser tab access for web fetch |
|
|
96
|
+
| `omnikey browser open` | Reopen the browser with the saved Omnikey debug profile |
|
|
97
|
+
| `omnikey schedule add` | Create a scheduled job with interactive prompt, schedule type, and timing |
|
|
98
|
+
| `omnikey schedule list` | List all scheduled jobs with status and next run |
|
|
99
|
+
| `omnikey schedule remove` | Remove an existing scheduled job via interactive selection |
|
|
100
|
+
| `omnikey schedule run-now <id>` | Trigger a scheduled job immediately |
|
|
101
|
+
|
|
102
|
+
## Scheduled Jobs
|
|
103
|
+
|
|
104
|
+
The CLI includes a full `schedule` command group to manage recurring and one-time jobs.
|
|
105
|
+
|
|
106
|
+
### `omnikey schedule add`
|
|
107
|
+
|
|
108
|
+
Creates a new job interactively:
|
|
109
|
+
|
|
110
|
+
- Prompts for a job label
|
|
111
|
+
- Lets you enter a multiline prompt directly in terminal (type `END` on its own line when finished)
|
|
112
|
+
- Supports:
|
|
113
|
+
- Recurring schedule with cron presets or custom cron
|
|
114
|
+
- One-time schedule by date/time
|
|
115
|
+
|
|
116
|
+
### `omnikey schedule list`
|
|
117
|
+
|
|
118
|
+
Displays all jobs in a table with:
|
|
119
|
+
|
|
120
|
+
- ID
|
|
121
|
+
- Label
|
|
122
|
+
- Schedule
|
|
123
|
+
- Next run
|
|
124
|
+
- Status
|
|
125
|
+
|
|
126
|
+
### `omnikey schedule remove`
|
|
127
|
+
|
|
128
|
+
Lets you choose a job from a list and confirms deletion.
|
|
129
|
+
|
|
130
|
+
### `omnikey schedule run-now <id>`
|
|
131
|
+
|
|
132
|
+
Runs a job immediately using its job ID.
|
|
84
133
|
|
|
85
134
|
## Browser access (`grant-browser-access` / `browser open`)
|
|
86
135
|
|
|
@@ -133,12 +182,12 @@ The daemon is registered as a **launchd agent** (`~/Library/LaunchAgents/com.omn
|
|
|
133
182
|
|
|
134
183
|
The daemon runs as a **Windows Service** managed by [NSSM (Non-Sucking Service Manager)](https://nssm.cc/). This gives it production-grade persistence:
|
|
135
184
|
|
|
136
|
-
| Behaviour
|
|
137
|
-
|
|
138
|
-
| Starts on boot
|
|
139
|
-
| Auto-restarts on crash | Restarts after a 3-second delay on any unexpected exit
|
|
140
|
-
| Runs in the background | No console window, no logged-in user needed
|
|
141
|
-
| Log rotation
|
|
185
|
+
| Behaviour | Detail |
|
|
186
|
+
| ---------------------- | --------------------------------------------------------------------------------------------- |
|
|
187
|
+
| Starts on boot | Runs as `SERVICE_AUTO_START` — no login required |
|
|
188
|
+
| Auto-restarts on crash | Restarts after a 3-second delay on any unexpected exit |
|
|
189
|
+
| Runs in the background | No console window, no logged-in user needed |
|
|
190
|
+
| Log rotation | stdout/stderr written to `~/.omnikey/daemon.log` and `daemon-error.log` with rotation enabled |
|
|
142
191
|
|
|
143
192
|
#### Prerequisites
|
|
144
193
|
|
|
@@ -25,6 +25,11 @@ ${hasTaskInstructions
|
|
|
25
25
|
- Use the built-in \`web_search\` tool when the user asks to search online, or when current information (prices, docs, recent events) is needed.
|
|
26
26
|
- If a request needs BOTH machine data AND web search: emit a \`<shell_script>\` first → wait for \`TERMINAL OUTPUT:\` → then call the web tool with concrete values. Never use placeholders like "my IP" in a web query.
|
|
27
27
|
|
|
28
|
+
**When to use image tools:**
|
|
29
|
+
- Use the built-in \`generate_image\` tool when the user asks you to create or render an image.
|
|
30
|
+
- Prefer the user-provided output path when available. If none is provided, call the tool without \`file_path\` so it saves to a temporary file.
|
|
31
|
+
- After the tool call returns, provide a \`<final_answer>\` that includes the saved file path.
|
|
32
|
+
|
|
28
33
|
**Incoming message tags:**
|
|
29
34
|
- \`TERMINAL OUTPUT:\` — stdout/stderr from a prior script. Analyze it immediately and respond with EITHER a follow-up \`<shell_script>\` (if more data is needed) OR a \`<final_answer>\` (if you have enough to conclude). You MUST pick one — never respond with plain text.
|
|
30
35
|
- \`COMMAND ERROR:\` — script failed. Diagnose and emit a corrected \`<shell_script>\` or explain in \`<final_answer>\`.
|
|
@@ -32,7 +37,7 @@ ${hasTaskInstructions
|
|
|
32
37
|
|
|
33
38
|
**Response format — every response must be exactly one of:**
|
|
34
39
|
1. \`<shell_script>...</shell_script>\` — to run commands and gather more data.
|
|
35
|
-
2. A \`web_search\` or \`
|
|
40
|
+
2. A \`web_search\`, \`web_fetch\`, or \`generate_image\` tool call — to fetch web context or generate images (use native tool calling, not XML tags).
|
|
36
41
|
3. \`<final_answer>...</final_answer>\` — your conclusion once you have enough information.
|
|
37
42
|
|
|
38
43
|
**Critical rule:** After receiving \`TERMINAL OUTPUT:\` you MUST immediately produce either \`<shell_script>\` or \`<final_answer>\`. Never output raw text, markdown, or any other format. If the terminal output contains enough information to answer the user's request, output \`<final_answer>\` right away.
|
|
@@ -36,6 +36,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
36
36
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
37
|
};
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.runAgentTurn = runAgentTurn;
|
|
39
40
|
exports.attachAgentWebSocketServer = attachAgentWebSocketServer;
|
|
40
41
|
exports.createAgentRouter = createAgentRouter;
|
|
41
42
|
const express_1 = __importDefault(require("express"));
|
|
@@ -51,6 +52,7 @@ const featureRoutes_1 = require("../featureRoutes");
|
|
|
51
52
|
const web_search_provider_1 = require("../web-search/web-search-provider");
|
|
52
53
|
const agentAuth_1 = require("./agentAuth");
|
|
53
54
|
const authMiddleware_1 = require("../authMiddleware");
|
|
55
|
+
const imageTool_1 = require("./imageTool");
|
|
54
56
|
const utils_1 = require("./utils");
|
|
55
57
|
const ai_client_1 = require("../ai-client");
|
|
56
58
|
async function runToolLoop(initialResult, session, sessionId, send, log, tools, onUsage) {
|
|
@@ -74,10 +76,28 @@ async function runToolLoop(initialResult, session, sessionId, send, log, tools,
|
|
|
74
76
|
});
|
|
75
77
|
const toolResults = await Promise.all(toolCalls.map(async (tc) => {
|
|
76
78
|
const args = tc.arguments;
|
|
79
|
+
if (tc.name === 'generate_image') {
|
|
80
|
+
const prompt = typeof args.prompt === 'string' ? args.prompt : '';
|
|
81
|
+
send({
|
|
82
|
+
session_id: sessionId,
|
|
83
|
+
sender: 'agent',
|
|
84
|
+
content: `Generating image: "${prompt.slice(0, 100)}${prompt.length > 100 ? '...' : ''}"`,
|
|
85
|
+
is_terminal_output: false,
|
|
86
|
+
is_error: false,
|
|
87
|
+
is_web_call: false,
|
|
88
|
+
});
|
|
89
|
+
const toolResult = await (0, imageTool_1.executeImageGenerationTool)(args, log);
|
|
90
|
+
log.info('Tool call completed', {
|
|
91
|
+
sessionId,
|
|
92
|
+
tool: tc.name,
|
|
93
|
+
resultLength: toolResult.length,
|
|
94
|
+
});
|
|
95
|
+
return { id: tc.id, name: tc.name, result: toolResult };
|
|
96
|
+
}
|
|
77
97
|
// Notify the frontend that a web tool call is about to execute.
|
|
78
98
|
const webCallContent = tc.name === 'web_search'
|
|
79
|
-
? `Searching the web for: "${args.query ?? ''}"`
|
|
80
|
-
: `Fetching URL: ${args.url ?? ''}`;
|
|
99
|
+
? `Searching the web for: "${String(args.query ?? '')}"`
|
|
100
|
+
: `Fetching URL: ${String(args.url ?? '')}`;
|
|
81
101
|
send({
|
|
82
102
|
session_id: sessionId,
|
|
83
103
|
sender: 'agent',
|
|
@@ -185,7 +205,7 @@ async function enforceSessionCap(subscriptionId, logger) {
|
|
|
185
205
|
logger.error('Failed to enforce agent session cap', { subscriptionId, error: err });
|
|
186
206
|
}
|
|
187
207
|
}
|
|
188
|
-
async function getOrCreateSession(sessionId, subscription, platform, log) {
|
|
208
|
+
async function getOrCreateSession(sessionId, subscription, platform, log, isCronJob = false) {
|
|
189
209
|
// 1. Return the live in-memory entry if already loaded this process lifetime.
|
|
190
210
|
const existing = sessionMessages.get(sessionId);
|
|
191
211
|
if (existing) {
|
|
@@ -246,7 +266,7 @@ async function getOrCreateSession(sessionId, subscription, platform, log) {
|
|
|
246
266
|
role: 'system',
|
|
247
267
|
content: systemPrompt,
|
|
248
268
|
},
|
|
249
|
-
...(prompt
|
|
269
|
+
...(prompt && !isCronJob
|
|
250
270
|
? [
|
|
251
271
|
{
|
|
252
272
|
role: 'user',
|
|
@@ -291,8 +311,8 @@ ${prompt}
|
|
|
291
311
|
hasStoredPrompt: !!prompt,
|
|
292
312
|
};
|
|
293
313
|
}
|
|
294
|
-
async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
295
|
-
const { sessionState: session, hasStoredPrompt } = await getOrCreateSession(sessionId, subscription, clientMessage.platform, log);
|
|
314
|
+
async function runAgentTurn(sessionId, subscription, clientMessage, send, log, options) {
|
|
315
|
+
const { sessionState: session, hasStoredPrompt } = await getOrCreateSession(sessionId, subscription, clientMessage.platform, log, options?.isCronJob);
|
|
296
316
|
// Count this call as one agent iteration.
|
|
297
317
|
session.turns += 1;
|
|
298
318
|
log.info('Starting agent turn', {
|
|
@@ -300,9 +320,9 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
|
300
320
|
subscriptionId: subscription.id,
|
|
301
321
|
turn: session.turns,
|
|
302
322
|
});
|
|
303
|
-
|
|
304
|
-
//
|
|
305
|
-
if (session.turns ===
|
|
323
|
+
const effectiveMaxTurns = options?.maxTurns ?? MAX_TURNS;
|
|
324
|
+
// On the final iteration, instruct the LLM to provide a consolidated answer.
|
|
325
|
+
if (session.turns === effectiveMaxTurns) {
|
|
306
326
|
(0, utils_1.pushToSessionHistory)(logger_1.logger, session, {
|
|
307
327
|
role: 'system',
|
|
308
328
|
content: 'Provide a single, final, concise answer based on the entire conversation so far. Wrap the answer in a <final_answer>...</final_answer> block and do not ask for further input or mention additional shell scripts to run. Do not include any <shell_script> block in this response.',
|
|
@@ -337,7 +357,13 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
|
337
357
|
role: 'user',
|
|
338
358
|
content: isAssistance
|
|
339
359
|
? userContent
|
|
340
|
-
:
|
|
360
|
+
: [
|
|
361
|
+
`<user_input>`,
|
|
362
|
+
!options?.isCronJob
|
|
363
|
+
? (0, utils_1.createUserContent)(userContent, hasStoredPrompt)
|
|
364
|
+
: (0, utils_1.createUserContentForCronJob)(userContent),
|
|
365
|
+
`</user_input>`,
|
|
366
|
+
].join('\n'),
|
|
341
367
|
});
|
|
342
368
|
// Use the first real user message (turn 1) as the session title.
|
|
343
369
|
if (session.turns === 1 && !isAssistance) {
|
|
@@ -352,7 +378,7 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
|
352
378
|
}
|
|
353
379
|
// On the final turn we omit tools so the model is forced to emit a
|
|
354
380
|
// plain text <final_answer> rather than issuing another tool call.
|
|
355
|
-
const isFinalTurn = session.turns >=
|
|
381
|
+
const isFinalTurn = session.turns >= effectiveMaxTurns;
|
|
356
382
|
const tools = isFinalTurn ? undefined : (0, utils_1.buildAvailableTools)();
|
|
357
383
|
const recordUsage = async (result) => {
|
|
358
384
|
const usage = result.usage;
|
|
@@ -426,7 +452,10 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
|
426
452
|
const toolLoopContent = toolLoopResult.content.trim();
|
|
427
453
|
const toolLoopHasShell = toolLoopContent.includes('<shell_script>');
|
|
428
454
|
const toolLoopHasFinal = toolLoopContent.includes('<final_answer>');
|
|
429
|
-
const webToolFailed = session.history.some((msg) => msg.role === 'tool' &&
|
|
455
|
+
const webToolFailed = session.history.some((msg) => msg.role === 'tool' &&
|
|
456
|
+
(msg.tool_name === 'web_search' || msg.tool_name === 'web_fetch') &&
|
|
457
|
+
typeof msg.content === 'string' &&
|
|
458
|
+
msg.content.startsWith('Error'));
|
|
430
459
|
if (toolLoopHasShell || (toolLoopHasFinal && !webToolFailed)) {
|
|
431
460
|
// The tool loop already produced a shell script — use it directly.
|
|
432
461
|
// This avoids a redundant AI call and handles the case where the model
|
|
@@ -473,7 +502,7 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
|
473
502
|
session_id: sessionId,
|
|
474
503
|
content: '',
|
|
475
504
|
is_web_call: true,
|
|
476
|
-
}, send, logger_1.logger);
|
|
505
|
+
}, send, logger_1.logger, options);
|
|
477
506
|
return;
|
|
478
507
|
}
|
|
479
508
|
}
|
|
@@ -512,13 +541,14 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
|
512
541
|
turns: session.turns,
|
|
513
542
|
hasFinalAnswerTag,
|
|
514
543
|
});
|
|
544
|
+
(0, utils_1.pushToSessionHistory)(logger_1.logger, session, { role: 'assistant', content });
|
|
545
|
+
await persistSessionToDB(sessionId, session);
|
|
546
|
+
sessionMessages.delete(sessionId);
|
|
515
547
|
send({
|
|
516
548
|
session_id: sessionId,
|
|
517
549
|
sender: 'agent',
|
|
518
550
|
content: hasFinalAnswerTag ? content : `<final_answer>\n${content}\n</final_answer>`,
|
|
519
551
|
});
|
|
520
|
-
await persistSessionToDB(sessionId, session);
|
|
521
|
-
sessionMessages.delete(sessionId);
|
|
522
552
|
}
|
|
523
553
|
else if (content) {
|
|
524
554
|
// Fallback: the LLM returned content without any recognized tag and it
|
|
@@ -531,13 +561,13 @@ async function runAgentTurn(sessionId, subscription, clientMessage, send, log) {
|
|
|
531
561
|
turn: session.turns,
|
|
532
562
|
});
|
|
533
563
|
(0, utils_1.pushToSessionHistory)(log, session, { role: 'assistant', content });
|
|
564
|
+
await persistSessionToDB(sessionId, session);
|
|
565
|
+
sessionMessages.delete(sessionId);
|
|
534
566
|
send({
|
|
535
567
|
session_id: sessionId,
|
|
536
568
|
sender: 'agent',
|
|
537
569
|
content: `<final_answer>\n${content}\n</final_answer>`,
|
|
538
570
|
});
|
|
539
|
-
await persistSessionToDB(sessionId, session);
|
|
540
|
-
sessionMessages.delete(sessionId);
|
|
541
571
|
}
|
|
542
572
|
else {
|
|
543
573
|
log.warn('Agent returned empty content with no recognized tags; sending error', {
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.IMAGE_GENERATE_TOOL = void 0;
|
|
7
|
+
exports.executeImageGenerationTool = executeImageGenerationTool;
|
|
8
|
+
const promises_1 = __importDefault(require("fs/promises"));
|
|
9
|
+
const os_1 = __importDefault(require("os"));
|
|
10
|
+
const path_1 = __importDefault(require("path"));
|
|
11
|
+
const cuid_1 = __importDefault(require("cuid"));
|
|
12
|
+
const ai_client_1 = require("../ai-client");
|
|
13
|
+
const ALLOWED_FORMATS = new Set(['png', 'webp', 'jpeg']);
|
|
14
|
+
const ALLOWED_SIZES = new Set(['1024x1024', '1024x1536', '1536x1024']);
|
|
15
|
+
const ALLOWED_QUALITIES = new Set(['low', 'medium', 'high']);
|
|
16
|
+
const ALLOWED_BACKGROUNDS = new Set(['transparent', 'opaque', 'auto']);
|
|
17
|
+
exports.IMAGE_GENERATE_TOOL = {
|
|
18
|
+
name: 'generate_image',
|
|
19
|
+
description: 'Generate an image from a prompt and save it to disk. Use this when the user asks you to create artwork, mockups, logos, diagrams, or visual assets.',
|
|
20
|
+
parameters: {
|
|
21
|
+
type: 'object',
|
|
22
|
+
properties: {
|
|
23
|
+
prompt: {
|
|
24
|
+
type: 'string',
|
|
25
|
+
description: 'Detailed image prompt describing what to generate.',
|
|
26
|
+
},
|
|
27
|
+
file_path: {
|
|
28
|
+
type: 'string',
|
|
29
|
+
description: 'Absolute or relative output path where the image should be saved. If omitted, a temp file path is used automatically.',
|
|
30
|
+
},
|
|
31
|
+
format: {
|
|
32
|
+
type: 'string',
|
|
33
|
+
enum: ['png', 'webp', 'jpeg'],
|
|
34
|
+
description: 'Output image format. Defaults to png.',
|
|
35
|
+
},
|
|
36
|
+
size: {
|
|
37
|
+
type: 'string',
|
|
38
|
+
enum: ['1024x1024', '1024x1536', '1536x1024'],
|
|
39
|
+
description: 'Image dimensions. Defaults to 1024x1024.',
|
|
40
|
+
},
|
|
41
|
+
quality: {
|
|
42
|
+
type: 'string',
|
|
43
|
+
enum: ['low', 'medium', 'high'],
|
|
44
|
+
description: 'Generation quality. Defaults to medium.',
|
|
45
|
+
},
|
|
46
|
+
background: {
|
|
47
|
+
type: 'string',
|
|
48
|
+
enum: ['transparent', 'opaque', 'auto'],
|
|
49
|
+
description: 'Background behavior. Defaults to auto.',
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
required: ['prompt'],
|
|
53
|
+
},
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Reads a string argument from a tool-call payload and trims surrounding whitespace.
|
|
57
|
+
*
|
|
58
|
+
* @param args - Raw tool-call argument object.
|
|
59
|
+
* @param key - Argument key to read.
|
|
60
|
+
* @returns A trimmed string value, or `undefined` when missing/non-string/empty.
|
|
61
|
+
*/
|
|
62
|
+
function readStringArg(args, key) {
|
|
63
|
+
const value = args[key];
|
|
64
|
+
if (typeof value !== 'string')
|
|
65
|
+
return undefined;
|
|
66
|
+
const trimmed = value.trim();
|
|
67
|
+
return trimmed.length ? trimmed : undefined;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Resolves the final output path for the generated image.
|
|
71
|
+
*
|
|
72
|
+
* When `filePathArg` is provided, relative paths are resolved from the
|
|
73
|
+
* backend process working directory. Otherwise, a temp-file path is generated.
|
|
74
|
+
*
|
|
75
|
+
* @param filePathArg - Optional caller-provided output path.
|
|
76
|
+
* @param format - File format to use when generating a temp filename.
|
|
77
|
+
* @returns Absolute path where the image should be written.
|
|
78
|
+
*/
|
|
79
|
+
function resolveOutputPath(filePathArg, format) {
|
|
80
|
+
if (filePathArg) {
|
|
81
|
+
return path_1.default.isAbsolute(filePathArg) ? filePathArg : path_1.default.resolve(process.cwd(), filePathArg);
|
|
82
|
+
}
|
|
83
|
+
return path_1.default.join(os_1.default.tmpdir(), `omnikey-generated-${(0, cuid_1.default)()}.${format}`);
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Converts a MIME type to the internal file-format enum.
|
|
87
|
+
*
|
|
88
|
+
* @param mimeType - MIME type returned by the provider (e.g. image/png).
|
|
89
|
+
* @param fallback - Format used when MIME type is missing or unknown.
|
|
90
|
+
* @returns Normalized image format used for file extension selection.
|
|
91
|
+
*/
|
|
92
|
+
function formatFromMime(mimeType, fallback) {
|
|
93
|
+
if (!mimeType)
|
|
94
|
+
return fallback;
|
|
95
|
+
if (mimeType.includes('jpeg') || mimeType.includes('jpg'))
|
|
96
|
+
return 'jpeg';
|
|
97
|
+
if (mimeType.includes('webp'))
|
|
98
|
+
return 'webp';
|
|
99
|
+
return 'png';
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Writes generated image bytes to disk, creating parent directories as needed.
|
|
103
|
+
*
|
|
104
|
+
* @param outputPath - Absolute path to write to.
|
|
105
|
+
* @param imageBuffer - Binary image contents.
|
|
106
|
+
*/
|
|
107
|
+
async function writeImageFile(outputPath, imageBuffer) {
|
|
108
|
+
await promises_1.default.mkdir(path_1.default.dirname(outputPath), { recursive: true });
|
|
109
|
+
await promises_1.default.writeFile(outputPath, imageBuffer);
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Executes the `generate_image` tool call.
|
|
113
|
+
*
|
|
114
|
+
* Validates and normalizes user arguments, requests image generation through
|
|
115
|
+
* the configured AI provider in `aiClient`, writes the image to disk, and
|
|
116
|
+
* returns a user-facing status message containing the saved path.
|
|
117
|
+
*
|
|
118
|
+
* @param args - Tool arguments supplied by the model.
|
|
119
|
+
* @param log - Structured logger scoped to the current agent turn.
|
|
120
|
+
* @returns Success or error message for the tool result block.
|
|
121
|
+
*/
|
|
122
|
+
async function executeImageGenerationTool(args, log) {
|
|
123
|
+
const prompt = readStringArg(args, 'prompt');
|
|
124
|
+
if (!prompt) {
|
|
125
|
+
return 'Error: prompt parameter is required.';
|
|
126
|
+
}
|
|
127
|
+
const rawFormat = readStringArg(args, 'format') ?? 'png';
|
|
128
|
+
const format = (ALLOWED_FORMATS.has(rawFormat) ? rawFormat : 'png');
|
|
129
|
+
const rawSize = readStringArg(args, 'size') ?? '1024x1024';
|
|
130
|
+
const size = (ALLOWED_SIZES.has(rawSize) ? rawSize : '1024x1024');
|
|
131
|
+
const rawQuality = readStringArg(args, 'quality') ?? 'medium';
|
|
132
|
+
const quality = (ALLOWED_QUALITIES.has(rawQuality) ? rawQuality : 'medium');
|
|
133
|
+
const rawBackground = readStringArg(args, 'background') ?? 'auto';
|
|
134
|
+
const background = (ALLOWED_BACKGROUNDS.has(rawBackground) ? rawBackground : 'auto');
|
|
135
|
+
const filePathArg = readStringArg(args, 'file_path');
|
|
136
|
+
try {
|
|
137
|
+
const generated = await ai_client_1.aiClient.generateImage({
|
|
138
|
+
prompt,
|
|
139
|
+
format,
|
|
140
|
+
size,
|
|
141
|
+
quality,
|
|
142
|
+
background,
|
|
143
|
+
});
|
|
144
|
+
const actualFormat = formatFromMime(generated.mimeType, format);
|
|
145
|
+
const outputPath = resolveOutputPath(filePathArg, actualFormat);
|
|
146
|
+
await writeImageFile(outputPath, Buffer.from(generated.imageBase64, 'base64'));
|
|
147
|
+
log.info('Image generated and saved', {
|
|
148
|
+
provider: generated.provider,
|
|
149
|
+
outputPath,
|
|
150
|
+
bytes: Buffer.byteLength(generated.imageBase64, 'base64'),
|
|
151
|
+
size,
|
|
152
|
+
quality,
|
|
153
|
+
format: actualFormat,
|
|
154
|
+
});
|
|
155
|
+
return [
|
|
156
|
+
`Image generated successfully with ${generated.provider}. Saved to: ${outputPath}`,
|
|
157
|
+
generated.note ? `Note: ${generated.note}` : undefined,
|
|
158
|
+
]
|
|
159
|
+
.filter(Boolean)
|
|
160
|
+
.join(' ');
|
|
161
|
+
}
|
|
162
|
+
catch (err) {
|
|
163
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
164
|
+
log.warn('generate_image tool failed', { error: message, provider: ai_client_1.aiClient.getProvider() });
|
|
165
|
+
return `Error generating image: ${message}`;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
@@ -3,11 +3,13 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.MAX_HISTORY_TOTAL = void 0;
|
|
4
4
|
exports.buildAvailableTools = buildAvailableTools;
|
|
5
5
|
exports.createUserContent = createUserContent;
|
|
6
|
+
exports.createUserContentForCronJob = createUserContentForCronJob;
|
|
6
7
|
exports.sendFinalAnswer = sendFinalAnswer;
|
|
7
8
|
exports.pushToSessionHistory = pushToSessionHistory;
|
|
8
9
|
const web_search_provider_1 = require("../web-search/web-search-provider");
|
|
9
10
|
const ai_client_1 = require("../ai-client");
|
|
10
11
|
const config_1 = require("../config");
|
|
12
|
+
const imageTool_1 = require("./imageTool");
|
|
11
13
|
/**
|
|
12
14
|
* Returns the set of web tools available to the agent for every turn.
|
|
13
15
|
*
|
|
@@ -17,7 +19,7 @@ const config_1 = require("../config");
|
|
|
17
19
|
* @returns An array of `AITool` definitions ready to pass to the AI client.
|
|
18
20
|
*/
|
|
19
21
|
function buildAvailableTools() {
|
|
20
|
-
return [web_search_provider_1.WEB_FETCH_TOOL, web_search_provider_1.WEB_SEARCH_TOOL];
|
|
22
|
+
return [web_search_provider_1.WEB_FETCH_TOOL, web_search_provider_1.WEB_SEARCH_TOOL, imageTool_1.IMAGE_GENERATE_TOOL];
|
|
21
23
|
}
|
|
22
24
|
/**
|
|
23
25
|
* Strips the `@omniagent` mention from user-supplied content.
|
|
@@ -36,6 +38,16 @@ function createUserContent(content, hasStoredPrompt) {
|
|
|
36
38
|
}
|
|
37
39
|
return content;
|
|
38
40
|
}
|
|
41
|
+
/**
|
|
42
|
+
*
|
|
43
|
+
* If it is a cron job and the prompt does not contain an @omniAgent mention, we will add it, since we will not consider any base prompt.
|
|
44
|
+
*/
|
|
45
|
+
function createUserContentForCronJob(content) {
|
|
46
|
+
if (!/@omniagent/gi.test(content)) {
|
|
47
|
+
return `@omniAgent ${content}`;
|
|
48
|
+
}
|
|
49
|
+
return content.trim();
|
|
50
|
+
}
|
|
39
51
|
/**
|
|
40
52
|
* Sends a `<final_answer>` message over the WebSocket and closes the agent turn.
|
|
41
53
|
*
|
|
@@ -132,6 +132,32 @@ class OpenAIAdapter {
|
|
|
132
132
|
}
|
|
133
133
|
return { usage, model };
|
|
134
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Generates an image using OpenAI and returns base64 image bytes.
|
|
137
|
+
*
|
|
138
|
+
* @param options - Unified image-generation options.
|
|
139
|
+
* @returns Provider-normalized image payload.
|
|
140
|
+
*/
|
|
141
|
+
async generateImage(options) {
|
|
142
|
+
const format = options.format ?? 'png';
|
|
143
|
+
const size = options.size ?? '1024x1024';
|
|
144
|
+
const quality = options.quality ?? 'medium';
|
|
145
|
+
const background = options.background ?? 'auto';
|
|
146
|
+
const response = await this.client.images.generate({
|
|
147
|
+
model: 'gpt-image-1',
|
|
148
|
+
prompt: options.prompt,
|
|
149
|
+
size,
|
|
150
|
+
quality,
|
|
151
|
+
background,
|
|
152
|
+
output_format: format,
|
|
153
|
+
});
|
|
154
|
+
const b64 = response?.data?.[0]?.b64_json;
|
|
155
|
+
if (!b64 || typeof b64 !== 'string') {
|
|
156
|
+
throw new Error('OpenAI image generation returned no image data');
|
|
157
|
+
}
|
|
158
|
+
const mimeType = format === 'jpeg' ? 'image/jpeg' : format === 'webp' ? 'image/webp' : 'image/png';
|
|
159
|
+
return { imageBase64: b64, mimeType, provider: 'openai' };
|
|
160
|
+
}
|
|
135
161
|
}
|
|
136
162
|
// ---------------------------------------------------------------------------
|
|
137
163
|
// Anthropic adapter
|
|
@@ -300,6 +326,41 @@ class GeminiAdapter {
|
|
|
300
326
|
}
|
|
301
327
|
return { usage, model };
|
|
302
328
|
}
|
|
329
|
+
/**
|
|
330
|
+
* Generates an image using Gemini Imagen and returns base64 image bytes.
|
|
331
|
+
*
|
|
332
|
+
* @param options - Unified image-generation options.
|
|
333
|
+
* @returns Provider-normalized image payload and optional compatibility note.
|
|
334
|
+
*/
|
|
335
|
+
async generateImage(options) {
|
|
336
|
+
const requestedFormat = options.format ?? 'png';
|
|
337
|
+
const size = options.size ?? '1024x1024';
|
|
338
|
+
const quality = options.quality ?? 'medium';
|
|
339
|
+
const aspectRatio = size === '1024x1536' ? '2:3' : size === '1536x1024' ? '3:2' : '1:1';
|
|
340
|
+
// Imagen in this SDK path supports png/jpeg output directly. WebP requests
|
|
341
|
+
// are downgraded to PNG and surfaced with a note.
|
|
342
|
+
const outputMimeType = requestedFormat === 'jpeg' ? 'image/jpeg' : 'image/png';
|
|
343
|
+
const response = await this.client.models.generateImages({
|
|
344
|
+
model: 'imagen-4.0-generate-001',
|
|
345
|
+
prompt: options.prompt,
|
|
346
|
+
config: {
|
|
347
|
+
numberOfImages: 1,
|
|
348
|
+
aspectRatio,
|
|
349
|
+
outputMimeType,
|
|
350
|
+
guidanceScale: quality === 'high' ? 8 : quality === 'low' ? 5 : 6.5,
|
|
351
|
+
},
|
|
352
|
+
});
|
|
353
|
+
const generated = response.generatedImages?.[0]?.image;
|
|
354
|
+
const imageBase64 = generated?.imageBytes;
|
|
355
|
+
if (!imageBase64) {
|
|
356
|
+
throw new Error('Gemini image generation returned no image data');
|
|
357
|
+
}
|
|
358
|
+
const mimeType = generated?.mimeType || outputMimeType;
|
|
359
|
+
const note = requestedFormat === 'webp'
|
|
360
|
+
? 'Gemini does not currently return WebP in this path; image was generated as PNG.'
|
|
361
|
+
: undefined;
|
|
362
|
+
return { imageBase64, mimeType, provider: 'gemini', note };
|
|
363
|
+
}
|
|
303
364
|
}
|
|
304
365
|
// ---------------------------------------------------------------------------
|
|
305
366
|
// Main AIClient
|
|
@@ -344,6 +405,24 @@ class AIClient {
|
|
|
344
405
|
}
|
|
345
406
|
throw new Error(`AI provider "${this.provider}" is not configured.`);
|
|
346
407
|
}
|
|
408
|
+
/**
|
|
409
|
+
* Generates an image with the currently configured provider.
|
|
410
|
+
*
|
|
411
|
+
* Supported providers are OpenAI and Gemini. Anthropic does not currently
|
|
412
|
+
* expose a text-to-image generation endpoint in this project.
|
|
413
|
+
*
|
|
414
|
+
* @param options - Unified image-generation options.
|
|
415
|
+
* @returns Provider-normalized image payload.
|
|
416
|
+
*/
|
|
417
|
+
async generateImage(options) {
|
|
418
|
+
if (this.provider === 'openai' && this.openai) {
|
|
419
|
+
return this.openai.generateImage(options);
|
|
420
|
+
}
|
|
421
|
+
if (this.provider === 'gemini' && this.gemini) {
|
|
422
|
+
return this.gemini.generateImage(options);
|
|
423
|
+
}
|
|
424
|
+
throw new Error(`Image generation is not supported for provider "${this.provider}".`);
|
|
425
|
+
}
|
|
347
426
|
}
|
|
348
427
|
exports.AIClient = AIClient;
|
|
349
428
|
// ---------------------------------------------------------------------------
|