autoclaw 1.0.32 → 1.0.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +42 -1
- package/dist/tools/image.js +239 -0
- package/dist/tools/index.js +3 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -78,7 +78,11 @@ async function runSetup(options = {}) {
|
|
|
78
78
|
// Load both to show current effective values as defaults
|
|
79
79
|
const globalConfig = loadJsonConfig(GLOBAL_CONFIG_FILE);
|
|
80
80
|
const localConfig = loadJsonConfig(LOCAL_CONFIG_FILE);
|
|
81
|
-
|
|
81
|
+
// If setting up Global (default), prioritize Global values for display, falling back to Local.
|
|
82
|
+
// If setting up Project, prioritize Project values (standard effective config).
|
|
83
|
+
const currentConfig = isProject
|
|
84
|
+
? { ...globalConfig, ...localConfig }
|
|
85
|
+
: { ...localConfig, ...globalConfig };
|
|
82
86
|
function maskSecret(secret) {
|
|
83
87
|
if (!secret || secret.length < 8)
|
|
84
88
|
return '******';
|
|
@@ -112,6 +116,12 @@ async function runSetup(options = {}) {
|
|
|
112
116
|
message: 'Enter default Model:',
|
|
113
117
|
default: currentConfig.model || 'gpt-4o'
|
|
114
118
|
},
|
|
119
|
+
{
|
|
120
|
+
type: 'confirm',
|
|
121
|
+
name: 'configureImage',
|
|
122
|
+
message: 'Do you want to configure a separate Image Generation Service (DALL-E)?',
|
|
123
|
+
default: !!currentConfig.imageApiKey
|
|
124
|
+
},
|
|
115
125
|
{
|
|
116
126
|
type: 'confirm',
|
|
117
127
|
name: 'configureEmail',
|
|
@@ -133,6 +143,36 @@ async function runSetup(options = {}) {
|
|
|
133
143
|
]);
|
|
134
144
|
// Resolve sensitive values (Keep old if empty)
|
|
135
145
|
const finalApiKey = answers.apiKey || currentConfig.apiKey;
|
|
146
|
+
let imageConfig = {};
|
|
147
|
+
if (answers.configureImage) {
|
|
148
|
+
const imageAnswers = await inquirer.prompt([
|
|
149
|
+
{
|
|
150
|
+
type: 'password',
|
|
151
|
+
name: 'imageApiKey',
|
|
152
|
+
message: currentConfig.imageApiKey
|
|
153
|
+
? `Enter Image Service API Key (Leave empty to keep ${maskSecret(currentConfig.imageApiKey)}, or leave empty to use main API key):`
|
|
154
|
+
: 'Enter Image Service API Key (Leave empty to use main API key):',
|
|
155
|
+
mask: '*'
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
type: 'input',
|
|
159
|
+
name: 'imageBaseUrl',
|
|
160
|
+
message: 'Enter Image Service Base URL:',
|
|
161
|
+
default: currentConfig.imageBaseUrl || currentConfig.baseUrl || 'https://api.openai.com/v1'
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
type: 'input',
|
|
165
|
+
name: 'imageModel',
|
|
166
|
+
message: 'Default Image Model:',
|
|
167
|
+
default: currentConfig.imageModel || 'dall-e-3'
|
|
168
|
+
}
|
|
169
|
+
]);
|
|
170
|
+
imageConfig = {
|
|
171
|
+
imageApiKey: imageAnswers.imageApiKey || currentConfig.imageApiKey,
|
|
172
|
+
imageBaseUrl: imageAnswers.imageBaseUrl,
|
|
173
|
+
imageModel: imageAnswers.imageModel
|
|
174
|
+
};
|
|
175
|
+
}
|
|
136
176
|
let emailConfig = {};
|
|
137
177
|
if (answers.configureEmail) {
|
|
138
178
|
const emailAnswers = await inquirer.prompt([
|
|
@@ -248,6 +288,7 @@ async function runSetup(options = {}) {
|
|
|
248
288
|
apiKey: finalApiKey,
|
|
249
289
|
baseUrl: answers.baseUrl,
|
|
250
290
|
model: answers.model,
|
|
291
|
+
...imageConfig,
|
|
251
292
|
...emailConfig,
|
|
252
293
|
...searchConfig,
|
|
253
294
|
...notifyConfig
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
const toolDefinition = {
|
|
5
|
+
type: "function",
|
|
6
|
+
function: {
|
|
7
|
+
name: "generate_image",
|
|
8
|
+
description: "Generates or edits images using AI models (DALL-E 3/2). Supports text-to-image, image variation, and image editing. Allows control over size, resolution (quality), and model selection.",
|
|
9
|
+
parameters: {
|
|
10
|
+
type: "object",
|
|
11
|
+
properties: {
|
|
12
|
+
prompt: {
|
|
13
|
+
type: "string",
|
|
14
|
+
description: "Text description of the desired image. Required for text-to-image and edit modes."
|
|
15
|
+
},
|
|
16
|
+
image_path: {
|
|
17
|
+
type: "string",
|
|
18
|
+
description: "Path to an existing image file (local path). Required for variation and editing modes."
|
|
19
|
+
},
|
|
20
|
+
mask_path: {
|
|
21
|
+
type: "string",
|
|
22
|
+
description: "Path to a mask image file (local path). Optional, used only for editing."
|
|
23
|
+
},
|
|
24
|
+
mode: {
|
|
25
|
+
type: "string",
|
|
26
|
+
enum: ["text-to-image", "variation", "edit"],
|
|
27
|
+
description: "Operation mode. Inferred if not provided."
|
|
28
|
+
},
|
|
29
|
+
model: {
|
|
30
|
+
type: "string",
|
|
31
|
+
enum: ["dall-e-3", "dall-e-2"],
|
|
32
|
+
description: "The AI model to use. 'dall-e-3' for high quality (default), 'dall-e-2' for faster/smaller generation or editing.",
|
|
33
|
+
default: "dall-e-3"
|
|
34
|
+
},
|
|
35
|
+
n: {
|
|
36
|
+
type: "integer",
|
|
37
|
+
description: "Number of images to generate. Default is 1.",
|
|
38
|
+
default: 1
|
|
39
|
+
},
|
|
40
|
+
size: {
|
|
41
|
+
type: "string",
|
|
42
|
+
description: "Image resolution/size. DALL-E 3: '1024x1024', '1024x1792' (Portrait), '1792x1024' (Landscape). DALL-E 2: '256x256', '512x512', '1024x1024'.",
|
|
43
|
+
default: "1024x1024"
|
|
44
|
+
},
|
|
45
|
+
quality: {
|
|
46
|
+
type: "string",
|
|
47
|
+
enum: ["standard", "hd"],
|
|
48
|
+
description: "Image quality (DALL-E 3 only). 'hd' creates more detailed images. Default is 'standard'.",
|
|
49
|
+
default: "standard"
|
|
50
|
+
},
|
|
51
|
+
style: {
|
|
52
|
+
type: "string",
|
|
53
|
+
enum: ["vivid", "natural"],
|
|
54
|
+
description: "Image style (DALL-E 3 only). Default is 'vivid'.",
|
|
55
|
+
default: "vivid"
|
|
56
|
+
},
|
|
57
|
+
output_dir: {
|
|
58
|
+
type: "string",
|
|
59
|
+
description: "Directory to save the generated images. Defaults to current directory."
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
required: []
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
};
|
|
66
|
+
async function downloadImage(url, destPath) {
|
|
67
|
+
const response = await fetch(url);
|
|
68
|
+
if (!response.ok)
|
|
69
|
+
throw new Error(`Failed to download image: ${response.statusText}`);
|
|
70
|
+
const buffer = await response.arrayBuffer();
|
|
71
|
+
fs.writeFileSync(destPath, Buffer.from(buffer));
|
|
72
|
+
}
|
|
73
|
+
const handler = async (args, config) => {
|
|
74
|
+
const apiKey = config.imageApiKey || config.apiKey || process.env.OPENAI_API_KEY;
|
|
75
|
+
const baseURL = config.imageBaseUrl || config.baseUrl || process.env.OPENAI_BASE_URL;
|
|
76
|
+
if (!apiKey) {
|
|
77
|
+
return "Error: Image Service API Key is missing. Please configure it in .autoclaw/setting.json (imageApiKey or apiKey).";
|
|
78
|
+
}
|
|
79
|
+
const client = new OpenAI({
|
|
80
|
+
apiKey: apiKey,
|
|
81
|
+
baseURL: baseURL
|
|
82
|
+
});
|
|
83
|
+
const { prompt, image_path, mask_path, n = 1, size = "1024x1024", quality = "standard", style = "vivid", output_dir = "." } = args;
|
|
84
|
+
let mode = args.mode;
|
|
85
|
+
let model = args.model || config.imageModel || "dall-e-3";
|
|
86
|
+
// Infer mode if not provided
|
|
87
|
+
if (!mode) {
|
|
88
|
+
if (image_path && mask_path)
|
|
89
|
+
mode = "edit";
|
|
90
|
+
else if (image_path)
|
|
91
|
+
mode = "variation";
|
|
92
|
+
else
|
|
93
|
+
mode = "text-to-image";
|
|
94
|
+
}
|
|
95
|
+
// Model-specific validations
|
|
96
|
+
if (mode === "text-to-image") {
|
|
97
|
+
// DALL-E 3 Validation
|
|
98
|
+
if (model === "dall-e-3") {
|
|
99
|
+
const validSizes = ["1024x1024", "1024x1792", "1792x1024"];
|
|
100
|
+
if (!validSizes.includes(size)) {
|
|
101
|
+
return `Error: Invalid size '${size}' for DALL-E 3. Supported sizes are: ${validSizes.join(", ")}.`;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// DALL-E 2 Validation
|
|
105
|
+
else if (model === "dall-e-2") {
|
|
106
|
+
const validSizes = ["256x256", "512x512", "1024x1024"];
|
|
107
|
+
if (!validSizes.includes(size)) {
|
|
108
|
+
return `Error: Invalid size '${size}' for DALL-E 2. Supported sizes are: ${validSizes.join(", ")}.`;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
else {
|
|
113
|
+
// Variation and Edit only support DALL-E 2 currently
|
|
114
|
+
if (model === "dall-e-3") {
|
|
115
|
+
console.log("Note: DALL-E 3 does not support variation/edit. Falling back to DALL-E 2.");
|
|
116
|
+
model = "dall-e-2";
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Resolve output directory
|
|
120
|
+
const resolvedOutputDir = path.resolve(process.cwd(), output_dir);
|
|
121
|
+
if (!fs.existsSync(resolvedOutputDir)) {
|
|
122
|
+
fs.mkdirSync(resolvedOutputDir, { recursive: true });
|
|
123
|
+
}
|
|
124
|
+
const generatedFiles = [];
|
|
125
|
+
try {
|
|
126
|
+
if (mode === "text-to-image") {
|
|
127
|
+
if (!prompt)
|
|
128
|
+
return "Error: 'prompt' is required for text-to-image mode.";
|
|
129
|
+
console.log(`Generating ${n} image(s) with ${model} (${size}, ${quality})...`);
|
|
130
|
+
if (model === "dall-e-3") {
|
|
131
|
+
for (let i = 0; i < n; i++) {
|
|
132
|
+
const response = await client.images.generate({
|
|
133
|
+
model: "dall-e-3",
|
|
134
|
+
prompt: prompt,
|
|
135
|
+
n: 1, // DALL-E 3 constraint
|
|
136
|
+
size: size,
|
|
137
|
+
quality: quality,
|
|
138
|
+
style: style,
|
|
139
|
+
response_format: "url"
|
|
140
|
+
});
|
|
141
|
+
const imageUrl = response.data?.[0]?.url;
|
|
142
|
+
if (imageUrl) {
|
|
143
|
+
const fileName = `generated-${Date.now()}-${i + 1}.png`;
|
|
144
|
+
const filePath = path.join(resolvedOutputDir, fileName);
|
|
145
|
+
await downloadImage(imageUrl, filePath);
|
|
146
|
+
generatedFiles.push(filePath);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
// DALL-E 2
|
|
152
|
+
const response = await client.images.generate({
|
|
153
|
+
model: "dall-e-2",
|
|
154
|
+
prompt: prompt,
|
|
155
|
+
n: n,
|
|
156
|
+
size: size,
|
|
157
|
+
response_format: "url"
|
|
158
|
+
});
|
|
159
|
+
const data = response.data || [];
|
|
160
|
+
for (let i = 0; i < data.length; i++) {
|
|
161
|
+
const imageUrl = data[i].url;
|
|
162
|
+
if (imageUrl) {
|
|
163
|
+
const fileName = `generated-${Date.now()}-${i + 1}.png`;
|
|
164
|
+
const filePath = path.join(resolvedOutputDir, fileName);
|
|
165
|
+
await downloadImage(imageUrl, filePath);
|
|
166
|
+
generatedFiles.push(filePath);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
else if (mode === "variation") {
|
|
172
|
+
if (!image_path)
|
|
173
|
+
return "Error: 'image_path' is required for variation mode.";
|
|
174
|
+
if (!fs.existsSync(image_path))
|
|
175
|
+
return `Error: Image file not found at ${image_path}`;
|
|
176
|
+
console.log(`Generating ${n} variation(s) with ${model}...`);
|
|
177
|
+
const response = await client.images.createVariation({
|
|
178
|
+
image: fs.createReadStream(image_path),
|
|
179
|
+
n: n,
|
|
180
|
+
model: "dall-e-2", // Explicitly set model just in case, though it's the default/only option
|
|
181
|
+
size: size,
|
|
182
|
+
response_format: "url"
|
|
183
|
+
});
|
|
184
|
+
const data = response.data || [];
|
|
185
|
+
for (let i = 0; i < data.length; i++) {
|
|
186
|
+
const imageUrl = data[i].url;
|
|
187
|
+
if (imageUrl) {
|
|
188
|
+
const fileName = `variation-${Date.now()}-${i + 1}.png`;
|
|
189
|
+
const filePath = path.join(resolvedOutputDir, fileName);
|
|
190
|
+
await downloadImage(imageUrl, filePath);
|
|
191
|
+
generatedFiles.push(filePath);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
else if (mode === "edit") {
|
|
196
|
+
if (!image_path)
|
|
197
|
+
return "Error: 'image_path' is required for edit mode.";
|
|
198
|
+
if (!prompt)
|
|
199
|
+
return "Error: 'prompt' is required for edit mode.";
|
|
200
|
+
if (!fs.existsSync(image_path))
|
|
201
|
+
return `Error: Image file not found at ${image_path}`;
|
|
202
|
+
console.log(`Editing image with ${model}...`);
|
|
203
|
+
const params = {
|
|
204
|
+
image: fs.createReadStream(image_path),
|
|
205
|
+
prompt: prompt,
|
|
206
|
+
n: n,
|
|
207
|
+
model: "dall-e-2",
|
|
208
|
+
size: size,
|
|
209
|
+
response_format: "url"
|
|
210
|
+
};
|
|
211
|
+
if (mask_path && fs.existsSync(mask_path)) {
|
|
212
|
+
params.mask = fs.createReadStream(mask_path);
|
|
213
|
+
}
|
|
214
|
+
const response = await client.images.edit(params);
|
|
215
|
+
const data = response.data || [];
|
|
216
|
+
for (let i = 0; i < data.length; i++) {
|
|
217
|
+
const imageUrl = data[i].url;
|
|
218
|
+
if (imageUrl) {
|
|
219
|
+
const fileName = `edited-${Date.now()}-${i + 1}.png`;
|
|
220
|
+
const filePath = path.join(resolvedOutputDir, fileName);
|
|
221
|
+
await downloadImage(imageUrl, filePath);
|
|
222
|
+
generatedFiles.push(filePath);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
return `Error: Unknown mode '${mode}'.`;
|
|
228
|
+
}
|
|
229
|
+
return `Successfully generated ${generatedFiles.length} image(s):\n${generatedFiles.join('\n')}`;
|
|
230
|
+
}
|
|
231
|
+
catch (error) {
|
|
232
|
+
return `Error generating image: ${error.message}`;
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
export const ImageTool = {
|
|
236
|
+
name: "Image Generation",
|
|
237
|
+
definition: toolDefinition,
|
|
238
|
+
handler: handler
|
|
239
|
+
};
|
package/dist/tools/index.js
CHANGED
|
@@ -4,6 +4,7 @@ import { SearchTool } from './search.js';
|
|
|
4
4
|
import { NotifyTool } from './notify.js';
|
|
5
5
|
import { BrowserTool } from './browser.js';
|
|
6
6
|
import { ScreenshotTool } from './screenshot.js';
|
|
7
|
+
import { ImageTool } from './image.js';
|
|
7
8
|
// Central Registry of all available tools
|
|
8
9
|
export const toolRegistry = [
|
|
9
10
|
ShellTool,
|
|
@@ -14,7 +15,8 @@ export const toolRegistry = [
|
|
|
14
15
|
SearchTool,
|
|
15
16
|
NotifyTool,
|
|
16
17
|
BrowserTool,
|
|
17
|
-
ScreenshotTool
|
|
18
|
+
ScreenshotTool,
|
|
19
|
+
ImageTool
|
|
18
20
|
];
|
|
19
21
|
export function getToolDefinitions() {
|
|
20
22
|
return toolRegistry.map(t => t.definition);
|