@acmecloud/core 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/llm/vision.d.ts +2 -2
- package/dist/llm/vision.js +16 -13
- package/dist/tools/browser.d.ts +1 -1
- package/dist/tools/browser.js +24 -23
- package/dist/tools/index.js +4 -0
- package/package.json +1 -1
- package/src/llm/vision.ts +47 -43
- package/src/tools/browser.ts +109 -95
- package/src/tools/index.ts +3 -0
package/dist/llm/vision.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { ModelConfig } from
|
|
1
|
+
import { ModelConfig } from "../config/index.js";
|
|
2
2
|
/**
|
|
3
3
|
* Analyzes an image using a specialized vision model and returns a textual description.
|
|
4
4
|
* This allows non-vision primary models to "see" via delegation.
|
|
5
5
|
*/
|
|
6
6
|
export declare function analyzeImage(imageData: string, // base64
|
|
7
|
-
config: ModelConfig): Promise<string>;
|
|
7
|
+
config: ModelConfig, abortSignal?: AbortSignal): Promise<string>;
|
package/dist/llm/vision.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import { generateText } from
|
|
2
|
-
import { getModel } from
|
|
1
|
+
import { generateText } from "ai";
|
|
2
|
+
import { getModel } from "./provider.js";
|
|
3
3
|
/**
|
|
4
4
|
* Analyzes an image using a specialized vision model and returns a textual description.
|
|
5
5
|
* This allows non-vision primary models to "see" via delegation.
|
|
6
6
|
*/
|
|
7
7
|
export async function analyzeImage(imageData, // base64
|
|
8
|
-
config) {
|
|
8
|
+
config, abortSignal) {
|
|
9
9
|
if (!config.visionProvider || !config.visionModel) {
|
|
10
10
|
return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
|
|
11
11
|
}
|
|
@@ -13,24 +13,27 @@ config) {
|
|
|
13
13
|
try {
|
|
14
14
|
const { text } = await generateText({
|
|
15
15
|
model,
|
|
16
|
-
abortSignal: AbortSignal.timeout(
|
|
17
|
-
maxRetries:
|
|
16
|
+
abortSignal: abortSignal || AbortSignal.timeout(90000), // 90 seconds timeout
|
|
17
|
+
maxRetries: 2,
|
|
18
18
|
messages: [
|
|
19
19
|
{
|
|
20
|
-
role:
|
|
20
|
+
role: "user",
|
|
21
21
|
content: [
|
|
22
|
-
{
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
22
|
+
{
|
|
23
|
+
type: "text",
|
|
24
|
+
text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application.",
|
|
25
|
+
},
|
|
26
|
+
{ type: "image", image: imageData },
|
|
27
|
+
],
|
|
28
|
+
},
|
|
29
|
+
],
|
|
27
30
|
});
|
|
28
31
|
return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
|
|
29
32
|
}
|
|
30
33
|
catch (err) {
|
|
31
34
|
let msg = err.message;
|
|
32
|
-
if (err.name ===
|
|
33
|
-
msg = `Analysis timed out after
|
|
35
|
+
if (err.name === "AbortError" || err.message.includes("timeout")) {
|
|
36
|
+
msg = `Analysis timed out after 90s. The image might be too complex or the provider is slow.`;
|
|
34
37
|
}
|
|
35
38
|
return `Error during vision analysis: ${msg}`;
|
|
36
39
|
}
|
package/dist/tools/browser.d.ts
CHANGED
package/dist/tools/browser.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
import { chromium } from
|
|
2
|
-
import { analyzeImage } from
|
|
3
|
-
import { loadModelConfig } from
|
|
4
|
-
import * as fs from
|
|
5
|
-
import { existsSync, mkdirSync } from
|
|
6
|
-
import path from
|
|
1
|
+
import { chromium } from "playwright";
|
|
2
|
+
import { analyzeImage } from "../llm/vision.js";
|
|
3
|
+
import { loadModelConfig } from "../config/index.js";
|
|
4
|
+
import * as fs from "fs/promises";
|
|
5
|
+
import { existsSync, mkdirSync } from "fs";
|
|
6
|
+
import path from "path";
|
|
7
7
|
let browser = null;
|
|
8
8
|
let page = null;
|
|
9
9
|
async function getBrowser() {
|
|
@@ -13,61 +13,62 @@ async function getBrowser() {
|
|
|
13
13
|
}
|
|
14
14
|
if (!page) {
|
|
15
15
|
const context = await browser.newContext({
|
|
16
|
-
viewport: { width: 1280, height: 720 }
|
|
16
|
+
viewport: { width: 1280, height: 720 },
|
|
17
17
|
});
|
|
18
18
|
page = await context.newPage();
|
|
19
19
|
}
|
|
20
20
|
return { browser, page };
|
|
21
21
|
}
|
|
22
22
|
catch (err) {
|
|
23
|
-
if (err.message.includes(
|
|
23
|
+
if (err.message.includes("executable") ||
|
|
24
|
+
err.message.includes("not found")) {
|
|
24
25
|
throw new Error(`Browser not found. Please run: npx playwright install chromium`);
|
|
25
26
|
}
|
|
26
27
|
throw err;
|
|
27
28
|
}
|
|
28
29
|
}
|
|
29
|
-
export async function executeBrowserAction(args) {
|
|
30
|
+
export async function executeBrowserAction(args, abortSignal) {
|
|
30
31
|
const { page } = await getBrowser();
|
|
31
32
|
const config = loadModelConfig();
|
|
32
33
|
try {
|
|
33
34
|
switch (args.action) {
|
|
34
|
-
case
|
|
35
|
+
case "navigate":
|
|
35
36
|
if (!args.url)
|
|
36
|
-
return
|
|
37
|
-
await page.goto(args.url, { waitUntil:
|
|
37
|
+
return "Error: URL is required for navigate action.";
|
|
38
|
+
await page.goto(args.url, { waitUntil: "networkidle" });
|
|
38
39
|
const title = await page.title();
|
|
39
40
|
return `Successfully navigated to ${args.url}. Page title: ${title}`;
|
|
40
|
-
case
|
|
41
|
+
case "screenshot":
|
|
41
42
|
const screenshot = await page.screenshot({ fullPage: false });
|
|
42
|
-
const base64 = screenshot.toString(
|
|
43
|
+
const base64 = screenshot.toString("base64");
|
|
43
44
|
// Save to disk
|
|
44
|
-
const screenshotDir = path.resolve(process.cwd(),
|
|
45
|
+
const screenshotDir = path.resolve(process.cwd(), ".acmecode", "screenshots");
|
|
45
46
|
if (!existsSync(screenshotDir)) {
|
|
46
47
|
mkdirSync(screenshotDir, { recursive: true });
|
|
47
48
|
}
|
|
48
|
-
const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g,
|
|
49
|
+
const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, "-")}.png`;
|
|
49
50
|
const filePath = path.join(screenshotDir, filename);
|
|
50
51
|
await fs.writeFile(filePath, screenshot);
|
|
51
52
|
const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
|
|
52
53
|
// Delegate to vision model if configured
|
|
53
54
|
if (config.visionModel) {
|
|
54
|
-
const analysis = await analyzeImage(base64, config);
|
|
55
|
+
const analysis = await analyzeImage(base64, config, abortSignal);
|
|
55
56
|
return `${stats}\n\n${analysis}`;
|
|
56
57
|
}
|
|
57
58
|
return stats;
|
|
58
|
-
case
|
|
59
|
+
case "click":
|
|
59
60
|
if (!args.selector)
|
|
60
|
-
return
|
|
61
|
+
return "Error: Selector is required for click action.";
|
|
61
62
|
await page.click(args.selector);
|
|
62
63
|
return `Clicked element: ${args.selector}`;
|
|
63
|
-
case
|
|
64
|
+
case "type":
|
|
64
65
|
if (!args.selector || !args.text)
|
|
65
|
-
return
|
|
66
|
+
return "Error: Selector and text are required for type action.";
|
|
66
67
|
await page.fill(args.selector, args.text);
|
|
67
68
|
return `Typed "${args.text}" into ${args.selector}`;
|
|
68
|
-
case
|
|
69
|
+
case "scroll":
|
|
69
70
|
await page.mouse.wheel(0, 500);
|
|
70
|
-
return
|
|
71
|
+
return "Scrolled down.";
|
|
71
72
|
default:
|
|
72
73
|
return `Error: Unknown action "${args.action}"`;
|
|
73
74
|
}
|
package/dist/tools/index.js
CHANGED
|
@@ -12,6 +12,7 @@ import * as diff from "diff";
|
|
|
12
12
|
import { replaceCode } from "./edit.js";
|
|
13
13
|
import { getLspClientForFile } from "./lsp-client.js";
|
|
14
14
|
import { executeBatch, BATCH_WHITELIST } from "./batch.js";
|
|
15
|
+
import { executeBrowserAction } from "./browser.js";
|
|
15
16
|
const execAsync = promisify(exec);
|
|
16
17
|
// ── Constants (from opencode patterns) ──
|
|
17
18
|
const DEFAULT_READ_LIMIT = 2000;
|
|
@@ -967,6 +968,9 @@ export const toolExecutors = {
|
|
|
967
968
|
batch: async (args) => {
|
|
968
969
|
return executeBatch(args, toolExecutors);
|
|
969
970
|
},
|
|
971
|
+
browser_action: async (args) => {
|
|
972
|
+
return executeBrowserAction(args);
|
|
973
|
+
},
|
|
970
974
|
};
|
|
971
975
|
// Create AI SDK tool objects (with execute) for the agent
|
|
972
976
|
const tool = (options) => createTool(options);
|
package/package.json
CHANGED
package/src/llm/vision.ts
CHANGED
|
@@ -1,43 +1,47 @@
|
|
|
1
|
-
import { generateText } from
|
|
2
|
-
import { getModel } from
|
|
3
|
-
import { ModelConfig } from
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Analyzes an image using a specialized vision model and returns a textual description.
|
|
7
|
-
* This allows non-vision primary models to "see" via delegation.
|
|
8
|
-
*/
|
|
9
|
-
export async function analyzeImage(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
1
|
+
import { generateText } from "ai";
|
|
2
|
+
import { getModel } from "./provider.js";
|
|
3
|
+
import { ModelConfig } from "../config/index.js";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Analyzes an image using a specialized vision model and returns a textual description.
|
|
7
|
+
* This allows non-vision primary models to "see" via delegation.
|
|
8
|
+
*/
|
|
9
|
+
export async function analyzeImage(
|
|
10
|
+
imageData: string, // base64
|
|
11
|
+
config: ModelConfig,
|
|
12
|
+
abortSignal?: AbortSignal,
|
|
13
|
+
): Promise<string> {
|
|
14
|
+
if (!config.visionProvider || !config.visionModel) {
|
|
15
|
+
return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const model = getModel(config.visionProvider as any, config.visionModel);
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
const { text } = await generateText({
|
|
22
|
+
model,
|
|
23
|
+
abortSignal: abortSignal || AbortSignal.timeout(90000), // 90 seconds timeout
|
|
24
|
+
maxRetries: 2,
|
|
25
|
+
messages: [
|
|
26
|
+
{
|
|
27
|
+
role: "user",
|
|
28
|
+
content: [
|
|
29
|
+
{
|
|
30
|
+
type: "text",
|
|
31
|
+
text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application.",
|
|
32
|
+
},
|
|
33
|
+
{ type: "image", image: imageData },
|
|
34
|
+
],
|
|
35
|
+
},
|
|
36
|
+
],
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
|
|
40
|
+
} catch (err: any) {
|
|
41
|
+
let msg = err.message;
|
|
42
|
+
if (err.name === "AbortError" || err.message.includes("timeout")) {
|
|
43
|
+
msg = `Analysis timed out after 90s. The image might be too complex or the provider is slow.`;
|
|
44
|
+
}
|
|
45
|
+
return `Error during vision analysis: ${msg}`;
|
|
46
|
+
}
|
|
47
|
+
}
|
package/src/tools/browser.ts
CHANGED
|
@@ -1,95 +1,109 @@
|
|
|
1
|
-
import { chromium, Browser, Page } from
|
|
2
|
-
import { analyzeImage } from
|
|
3
|
-
import { loadModelConfig } from
|
|
4
|
-
import * as fs from
|
|
5
|
-
import { existsSync, mkdirSync } from
|
|
6
|
-
import path from
|
|
7
|
-
|
|
8
|
-
let browser: Browser | null = null;
|
|
9
|
-
let page: Page | null = null;
|
|
10
|
-
|
|
11
|
-
async function getBrowser() {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
await
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
}
|
|
1
|
+
import { chromium, Browser, Page } from "playwright";
|
|
2
|
+
import { analyzeImage } from "../llm/vision.js";
|
|
3
|
+
import { loadModelConfig } from "../config/index.js";
|
|
4
|
+
import * as fs from "fs/promises";
|
|
5
|
+
import { existsSync, mkdirSync } from "fs";
|
|
6
|
+
import path from "path";
|
|
7
|
+
|
|
8
|
+
let browser: Browser | null = null;
|
|
9
|
+
let page: Page | null = null;
|
|
10
|
+
|
|
11
|
+
async function getBrowser() {
|
|
12
|
+
try {
|
|
13
|
+
if (!browser) {
|
|
14
|
+
browser = await chromium.launch({ headless: true });
|
|
15
|
+
}
|
|
16
|
+
if (!page) {
|
|
17
|
+
const context = await browser.newContext({
|
|
18
|
+
viewport: { width: 1280, height: 720 },
|
|
19
|
+
});
|
|
20
|
+
page = await context.newPage();
|
|
21
|
+
}
|
|
22
|
+
return { browser, page };
|
|
23
|
+
} catch (err: any) {
|
|
24
|
+
if (
|
|
25
|
+
err.message.includes("executable") ||
|
|
26
|
+
err.message.includes("not found")
|
|
27
|
+
) {
|
|
28
|
+
throw new Error(
|
|
29
|
+
`Browser not found. Please run: npx playwright install chromium`,
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
throw err;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export async function executeBrowserAction(
|
|
37
|
+
args: { action: string; url?: string; selector?: string; text?: string },
|
|
38
|
+
abortSignal?: AbortSignal,
|
|
39
|
+
): Promise<string> {
|
|
40
|
+
const { page } = await getBrowser();
|
|
41
|
+
const config = loadModelConfig();
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
switch (args.action) {
|
|
45
|
+
case "navigate":
|
|
46
|
+
if (!args.url) return "Error: URL is required for navigate action.";
|
|
47
|
+
await page.goto(args.url, { waitUntil: "networkidle" });
|
|
48
|
+
const title = await page.title();
|
|
49
|
+
return `Successfully navigated to ${args.url}. Page title: ${title}`;
|
|
50
|
+
|
|
51
|
+
case "screenshot":
|
|
52
|
+
const screenshot = await page.screenshot({ fullPage: false });
|
|
53
|
+
const base64 = screenshot.toString("base64");
|
|
54
|
+
|
|
55
|
+
// Save to disk
|
|
56
|
+
const screenshotDir = path.resolve(
|
|
57
|
+
process.cwd(),
|
|
58
|
+
".acmecode",
|
|
59
|
+
"screenshots",
|
|
60
|
+
);
|
|
61
|
+
if (!existsSync(screenshotDir)) {
|
|
62
|
+
mkdirSync(screenshotDir, { recursive: true });
|
|
63
|
+
}
|
|
64
|
+
const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, "-")}.png`;
|
|
65
|
+
const filePath = path.join(screenshotDir, filename);
|
|
66
|
+
await fs.writeFile(filePath, screenshot);
|
|
67
|
+
|
|
68
|
+
const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
|
|
69
|
+
|
|
70
|
+
// Delegate to vision model if configured
|
|
71
|
+
if (config.visionModel) {
|
|
72
|
+
const analysis = await analyzeImage(base64, config, abortSignal);
|
|
73
|
+
return `${stats}\n\n${analysis}`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
return stats;
|
|
77
|
+
|
|
78
|
+
case "click":
|
|
79
|
+
if (!args.selector)
|
|
80
|
+
return "Error: Selector is required for click action.";
|
|
81
|
+
await page.click(args.selector);
|
|
82
|
+
return `Clicked element: ${args.selector}`;
|
|
83
|
+
|
|
84
|
+
case "type":
|
|
85
|
+
if (!args.selector || !args.text)
|
|
86
|
+
return "Error: Selector and text are required for type action.";
|
|
87
|
+
await page.fill(args.selector, args.text);
|
|
88
|
+
return `Typed "${args.text}" into ${args.selector}`;
|
|
89
|
+
|
|
90
|
+
case "scroll":
|
|
91
|
+
await page.mouse.wheel(0, 500);
|
|
92
|
+
return "Scrolled down.";
|
|
93
|
+
|
|
94
|
+
default:
|
|
95
|
+
return `Error: Unknown action "${args.action}"`;
|
|
96
|
+
}
|
|
97
|
+
} catch (err: any) {
|
|
98
|
+
return `Browser error: ${err.message}`;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Cleanup function to be called on process exit
|
|
103
|
+
export async function closeBrowser() {
|
|
104
|
+
if (browser) {
|
|
105
|
+
await browser.close();
|
|
106
|
+
browser = null;
|
|
107
|
+
page = null;
|
|
108
|
+
}
|
|
109
|
+
}
|
package/src/tools/index.ts
CHANGED
|
@@ -1095,6 +1095,9 @@ export const toolExecutors: Record<string, (args: any) => Promise<string>> = {
|
|
|
1095
1095
|
batch: async (args) => {
|
|
1096
1096
|
return executeBatch(args, toolExecutors);
|
|
1097
1097
|
},
|
|
1098
|
+
browser_action: async (args) => {
|
|
1099
|
+
return executeBrowserAction(args);
|
|
1100
|
+
},
|
|
1098
1101
|
};
|
|
1099
1102
|
|
|
1100
1103
|
// Create AI SDK tool objects (with execute) for the agent
|