@acmecloud/core 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
- import { ModelConfig } from '../config/index.js';
1
+ import { ModelConfig } from "../config/index.js";
2
2
  /**
3
3
  * Analyzes an image using a specialized vision model and returns a textual description.
4
4
  * This allows non-vision primary models to "see" via delegation.
5
5
  */
6
6
  export declare function analyzeImage(imageData: string, // base64
7
- config: ModelConfig): Promise<string>;
7
+ config: ModelConfig, abortSignal?: AbortSignal): Promise<string>;
@@ -1,11 +1,11 @@
1
- import { generateText } from 'ai';
2
- import { getModel } from './provider.js';
1
+ import { generateText } from "ai";
2
+ import { getModel } from "./provider.js";
3
3
  /**
4
4
  * Analyzes an image using a specialized vision model and returns a textual description.
5
5
  * This allows non-vision primary models to "see" via delegation.
6
6
  */
7
7
  export async function analyzeImage(imageData, // base64
8
- config) {
8
+ config, abortSignal) {
9
9
  if (!config.visionProvider || !config.visionModel) {
10
10
  return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
11
11
  }
@@ -13,24 +13,27 @@ config) {
13
13
  try {
14
14
  const { text } = await generateText({
15
15
  model,
16
- abortSignal: AbortSignal.timeout(60000), // 60 seconds timeout
17
- maxRetries: 3,
16
+ abortSignal: abortSignal || AbortSignal.timeout(90000), // 90 seconds timeout
17
+ maxRetries: 2,
18
18
  messages: [
19
19
  {
20
- role: 'user',
20
+ role: "user",
21
21
  content: [
22
- { type: 'text', text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application." },
23
- { type: 'image', image: imageData }
24
- ]
25
- }
26
- ]
22
+ {
23
+ type: "text",
24
+ text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application.",
25
+ },
26
+ { type: "image", image: imageData },
27
+ ],
28
+ },
29
+ ],
27
30
  });
28
31
  return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
29
32
  }
30
33
  catch (err) {
31
34
  let msg = err.message;
32
- if (err.name === 'TimeoutError' || err.message.includes('timeout')) {
33
- msg = `Analysis timed out after 60s. The image might be too complex or the provider is slow.`;
35
+ if (err.name === "AbortError" || err.message.includes("timeout")) {
36
+ msg = `Analysis timed out after 90s. The image might be too complex or the provider is slow.`;
34
37
  }
35
38
  return `Error during vision analysis: ${msg}`;
36
39
  }
@@ -3,5 +3,5 @@ export declare function executeBrowserAction(args: {
3
3
  url?: string;
4
4
  selector?: string;
5
5
  text?: string;
6
- }): Promise<string>;
6
+ }, abortSignal?: AbortSignal): Promise<string>;
7
7
  export declare function closeBrowser(): Promise<void>;
@@ -1,9 +1,9 @@
1
- import { chromium } from 'playwright';
2
- import { analyzeImage } from '../llm/vision.js';
3
- import { loadModelConfig } from '../config/index.js';
4
- import * as fs from 'fs/promises';
5
- import { existsSync, mkdirSync } from 'fs';
6
- import path from 'path';
1
+ import { chromium } from "playwright";
2
+ import { analyzeImage } from "../llm/vision.js";
3
+ import { loadModelConfig } from "../config/index.js";
4
+ import * as fs from "fs/promises";
5
+ import { existsSync, mkdirSync } from "fs";
6
+ import path from "path";
7
7
  let browser = null;
8
8
  let page = null;
9
9
  async function getBrowser() {
@@ -13,61 +13,62 @@ async function getBrowser() {
13
13
  }
14
14
  if (!page) {
15
15
  const context = await browser.newContext({
16
- viewport: { width: 1280, height: 720 }
16
+ viewport: { width: 1280, height: 720 },
17
17
  });
18
18
  page = await context.newPage();
19
19
  }
20
20
  return { browser, page };
21
21
  }
22
22
  catch (err) {
23
- if (err.message.includes('executable') || err.message.includes('not found')) {
23
+ if (err.message.includes("executable") ||
24
+ err.message.includes("not found")) {
24
25
  throw new Error(`Browser not found. Please run: npx playwright install chromium`);
25
26
  }
26
27
  throw err;
27
28
  }
28
29
  }
29
- export async function executeBrowserAction(args) {
30
+ export async function executeBrowserAction(args, abortSignal) {
30
31
  const { page } = await getBrowser();
31
32
  const config = loadModelConfig();
32
33
  try {
33
34
  switch (args.action) {
34
- case 'navigate':
35
+ case "navigate":
35
36
  if (!args.url)
36
- return 'Error: URL is required for navigate action.';
37
- await page.goto(args.url, { waitUntil: 'networkidle' });
37
+ return "Error: URL is required for navigate action.";
38
+ await page.goto(args.url, { waitUntil: "networkidle" });
38
39
  const title = await page.title();
39
40
  return `Successfully navigated to ${args.url}. Page title: ${title}`;
40
- case 'screenshot':
41
+ case "screenshot":
41
42
  const screenshot = await page.screenshot({ fullPage: false });
42
- const base64 = screenshot.toString('base64');
43
+ const base64 = screenshot.toString("base64");
43
44
  // Save to disk
44
- const screenshotDir = path.resolve(process.cwd(), '.acmecode', 'screenshots');
45
+ const screenshotDir = path.resolve(process.cwd(), ".acmecode", "screenshots");
45
46
  if (!existsSync(screenshotDir)) {
46
47
  mkdirSync(screenshotDir, { recursive: true });
47
48
  }
48
- const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, '-')}.png`;
49
+ const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, "-")}.png`;
49
50
  const filePath = path.join(screenshotDir, filename);
50
51
  await fs.writeFile(filePath, screenshot);
51
52
  const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
52
53
  // Delegate to vision model if configured
53
54
  if (config.visionModel) {
54
- const analysis = await analyzeImage(base64, config);
55
+ const analysis = await analyzeImage(base64, config, abortSignal);
55
56
  return `${stats}\n\n${analysis}`;
56
57
  }
57
58
  return stats;
58
- case 'click':
59
+ case "click":
59
60
  if (!args.selector)
60
- return 'Error: Selector is required for click action.';
61
+ return "Error: Selector is required for click action.";
61
62
  await page.click(args.selector);
62
63
  return `Clicked element: ${args.selector}`;
63
- case 'type':
64
+ case "type":
64
65
  if (!args.selector || !args.text)
65
- return 'Error: Selector and text are required for type action.';
66
+ return "Error: Selector and text are required for type action.";
66
67
  await page.fill(args.selector, args.text);
67
68
  return `Typed "${args.text}" into ${args.selector}`;
68
- case 'scroll':
69
+ case "scroll":
69
70
  await page.mouse.wheel(0, 500);
70
- return 'Scrolled down.';
71
+ return "Scrolled down.";
71
72
  default:
72
73
  return `Error: Unknown action "${args.action}"`;
73
74
  }
@@ -12,6 +12,7 @@ import * as diff from "diff";
12
12
  import { replaceCode } from "./edit.js";
13
13
  import { getLspClientForFile } from "./lsp-client.js";
14
14
  import { executeBatch, BATCH_WHITELIST } from "./batch.js";
15
+ import { executeBrowserAction } from "./browser.js";
15
16
  const execAsync = promisify(exec);
16
17
  // ── Constants (from opencode patterns) ──
17
18
  const DEFAULT_READ_LIMIT = 2000;
@@ -967,6 +968,9 @@ export const toolExecutors = {
967
968
  batch: async (args) => {
968
969
  return executeBatch(args, toolExecutors);
969
970
  },
971
+ browser_action: async (args) => {
972
+ return executeBrowserAction(args);
973
+ },
970
974
  };
971
975
  // Create AI SDK tool objects (with execute) for the agent
972
976
  const tool = (options) => createTool(options);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@acmecloud/core",
3
- "version": "1.0.7",
3
+ "version": "1.0.9",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "exports": {
package/src/llm/vision.ts CHANGED
@@ -1,43 +1,47 @@
1
- import { generateText } from 'ai';
2
- import { getModel } from './provider.js';
3
- import { ModelConfig } from '../config/index.js';
4
-
5
- /**
6
- * Analyzes an image using a specialized vision model and returns a textual description.
7
- * This allows non-vision primary models to "see" via delegation.
8
- */
9
- export async function analyzeImage(
10
- imageData: string, // base64
11
- config: ModelConfig
12
- ): Promise<string> {
13
- if (!config.visionProvider || !config.visionModel) {
14
- return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
15
- }
16
-
17
- const model = getModel(config.visionProvider as any, config.visionModel);
18
-
19
- try {
20
- const { text } = await generateText({
21
- model,
22
- abortSignal: AbortSignal.timeout(60000), // 60 seconds timeout
23
- maxRetries: 3,
24
- messages: [
25
- {
26
- role: 'user',
27
- content: [
28
- { type: 'text', text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application." },
29
- { type: 'image', image: imageData }
30
- ]
31
- }
32
- ]
33
- });
34
-
35
- return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
36
- } catch (err: any) {
37
- let msg = err.message;
38
- if (err.name === 'TimeoutError' || err.message.includes('timeout')) {
39
- msg = `Analysis timed out after 60s. The image might be too complex or the provider is slow.`;
40
- }
41
- return `Error during vision analysis: ${msg}`;
42
- }
43
- }
1
+ import { generateText } from "ai";
2
+ import { getModel } from "./provider.js";
3
+ import { ModelConfig } from "../config/index.js";
4
+
5
+ /**
6
+ * Analyzes an image using a specialized vision model and returns a textual description.
7
+ * This allows non-vision primary models to "see" via delegation.
8
+ */
9
+ export async function analyzeImage(
10
+ imageData: string, // base64
11
+ config: ModelConfig,
12
+ abortSignal?: AbortSignal,
13
+ ): Promise<string> {
14
+ if (!config.visionProvider || !config.visionModel) {
15
+ return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
16
+ }
17
+
18
+ const model = getModel(config.visionProvider as any, config.visionModel);
19
+
20
+ try {
21
+ const { text } = await generateText({
22
+ model,
23
+ abortSignal: abortSignal || AbortSignal.timeout(90000), // 90 seconds timeout
24
+ maxRetries: 2,
25
+ messages: [
26
+ {
27
+ role: "user",
28
+ content: [
29
+ {
30
+ type: "text",
31
+ text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application.",
32
+ },
33
+ { type: "image", image: imageData },
34
+ ],
35
+ },
36
+ ],
37
+ });
38
+
39
+ return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
40
+ } catch (err: any) {
41
+ let msg = err.message;
42
+ if (err.name === "AbortError" || err.message.includes("timeout")) {
43
+ msg = `Analysis timed out after 90s. The image might be too complex or the provider is slow.`;
44
+ }
45
+ return `Error during vision analysis: ${msg}`;
46
+ }
47
+ }
@@ -1,95 +1,109 @@
1
- import { chromium, Browser, Page } from 'playwright';
2
- import { analyzeImage } from '../llm/vision.js';
3
- import { loadModelConfig } from '../config/index.js';
4
- import * as fs from 'fs/promises';
5
- import { existsSync, mkdirSync } from 'fs';
6
- import path from 'path';
7
-
8
- let browser: Browser | null = null;
9
- let page: Page | null = null;
10
-
11
- async function getBrowser() {
12
- try {
13
- if (!browser) {
14
- browser = await chromium.launch({ headless: true });
15
- }
16
- if (!page) {
17
- const context = await browser.newContext({
18
- viewport: { width: 1280, height: 720 }
19
- });
20
- page = await context.newPage();
21
- }
22
- return { browser, page };
23
- } catch (err: any) {
24
- if (err.message.includes('executable') || err.message.includes('not found')) {
25
- throw new Error(`Browser not found. Please run: npx playwright install chromium`);
26
- }
27
- throw err;
28
- }
29
- }
30
-
31
- export async function executeBrowserAction(args: { action: string, url?: string, selector?: string, text?: string }): Promise<string> {
32
- const { page } = await getBrowser();
33
- const config = loadModelConfig();
34
-
35
- try {
36
- switch (args.action) {
37
- case 'navigate':
38
- if (!args.url) return 'Error: URL is required for navigate action.';
39
- await page.goto(args.url, { waitUntil: 'networkidle' });
40
- const title = await page.title();
41
- return `Successfully navigated to ${args.url}. Page title: ${title}`;
42
-
43
- case 'screenshot':
44
- const screenshot = await page.screenshot({ fullPage: false });
45
- const base64 = screenshot.toString('base64');
46
-
47
- // Save to disk
48
- const screenshotDir = path.resolve(process.cwd(), '.acmecode', 'screenshots');
49
- if (!existsSync(screenshotDir)) {
50
- mkdirSync(screenshotDir, { recursive: true });
51
- }
52
- const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, '-')}.png`;
53
- const filePath = path.join(screenshotDir, filename);
54
- await fs.writeFile(filePath, screenshot);
55
-
56
- const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
57
-
58
- // Delegate to vision model if configured
59
- if (config.visionModel) {
60
- const analysis = await analyzeImage(base64, config);
61
- return `${stats}\n\n${analysis}`;
62
- }
63
-
64
- return stats;
65
-
66
- case 'click':
67
- if (!args.selector) return 'Error: Selector is required for click action.';
68
- await page.click(args.selector);
69
- return `Clicked element: ${args.selector}`;
70
-
71
- case 'type':
72
- if (!args.selector || !args.text) return 'Error: Selector and text are required for type action.';
73
- await page.fill(args.selector, args.text);
74
- return `Typed "${args.text}" into ${args.selector}`;
75
-
76
- case 'scroll':
77
- await page.mouse.wheel(0, 500);
78
- return 'Scrolled down.';
79
-
80
- default:
81
- return `Error: Unknown action "${args.action}"`;
82
- }
83
- } catch (err: any) {
84
- return `Browser error: ${err.message}`;
85
- }
86
- }
87
-
88
- // Cleanup function to be called on process exit
89
- export async function closeBrowser() {
90
- if (browser) {
91
- await browser.close();
92
- browser = null;
93
- page = null;
94
- }
95
- }
1
+ import { chromium, Browser, Page } from "playwright";
2
+ import { analyzeImage } from "../llm/vision.js";
3
+ import { loadModelConfig } from "../config/index.js";
4
+ import * as fs from "fs/promises";
5
+ import { existsSync, mkdirSync } from "fs";
6
+ import path from "path";
7
+
8
+ let browser: Browser | null = null;
9
+ let page: Page | null = null;
10
+
11
+ async function getBrowser() {
12
+ try {
13
+ if (!browser) {
14
+ browser = await chromium.launch({ headless: true });
15
+ }
16
+ if (!page) {
17
+ const context = await browser.newContext({
18
+ viewport: { width: 1280, height: 720 },
19
+ });
20
+ page = await context.newPage();
21
+ }
22
+ return { browser, page };
23
+ } catch (err: any) {
24
+ if (
25
+ err.message.includes("executable") ||
26
+ err.message.includes("not found")
27
+ ) {
28
+ throw new Error(
29
+ `Browser not found. Please run: npx playwright install chromium`,
30
+ );
31
+ }
32
+ throw err;
33
+ }
34
+ }
35
+
36
+ export async function executeBrowserAction(
37
+ args: { action: string; url?: string; selector?: string; text?: string },
38
+ abortSignal?: AbortSignal,
39
+ ): Promise<string> {
40
+ const { page } = await getBrowser();
41
+ const config = loadModelConfig();
42
+
43
+ try {
44
+ switch (args.action) {
45
+ case "navigate":
46
+ if (!args.url) return "Error: URL is required for navigate action.";
47
+ await page.goto(args.url, { waitUntil: "networkidle" });
48
+ const title = await page.title();
49
+ return `Successfully navigated to ${args.url}. Page title: ${title}`;
50
+
51
+ case "screenshot":
52
+ const screenshot = await page.screenshot({ fullPage: false });
53
+ const base64 = screenshot.toString("base64");
54
+
55
+ // Save to disk
56
+ const screenshotDir = path.resolve(
57
+ process.cwd(),
58
+ ".acmecode",
59
+ "screenshots",
60
+ );
61
+ if (!existsSync(screenshotDir)) {
62
+ mkdirSync(screenshotDir, { recursive: true });
63
+ }
64
+ const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, "-")}.png`;
65
+ const filePath = path.join(screenshotDir, filename);
66
+ await fs.writeFile(filePath, screenshot);
67
+
68
+ const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
69
+
70
+ // Delegate to vision model if configured
71
+ if (config.visionModel) {
72
+ const analysis = await analyzeImage(base64, config, abortSignal);
73
+ return `${stats}\n\n${analysis}`;
74
+ }
75
+
76
+ return stats;
77
+
78
+ case "click":
79
+ if (!args.selector)
80
+ return "Error: Selector is required for click action.";
81
+ await page.click(args.selector);
82
+ return `Clicked element: ${args.selector}`;
83
+
84
+ case "type":
85
+ if (!args.selector || !args.text)
86
+ return "Error: Selector and text are required for type action.";
87
+ await page.fill(args.selector, args.text);
88
+ return `Typed "${args.text}" into ${args.selector}`;
89
+
90
+ case "scroll":
91
+ await page.mouse.wheel(0, 500);
92
+ return "Scrolled down.";
93
+
94
+ default:
95
+ return `Error: Unknown action "${args.action}"`;
96
+ }
97
+ } catch (err: any) {
98
+ return `Browser error: ${err.message}`;
99
+ }
100
+ }
101
+
102
+ // Cleanup function to be called on process exit
103
+ export async function closeBrowser() {
104
+ if (browser) {
105
+ await browser.close();
106
+ browser = null;
107
+ page = null;
108
+ }
109
+ }
@@ -1095,6 +1095,9 @@ export const toolExecutors: Record<string, (args: any) => Promise<string>> = {
1095
1095
  batch: async (args) => {
1096
1096
  return executeBatch(args, toolExecutors);
1097
1097
  },
1098
+ browser_action: async (args) => {
1099
+ return executeBrowserAction(args);
1100
+ },
1098
1101
  };
1099
1102
 
1100
1103
  // Create AI SDK tool objects (with execute) for the agent