@acmecloud/core 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -281,6 +281,21 @@ export async function* runAgent(provider, modelName, messages, systemPrompt, abo
281
281
  return [];
282
282
  }
283
283
  else {
284
+ // Check if this is a content safety error (should not retry)
285
+ const errorMessage = err.message?.toLowerCase() || "";
286
+ const isContentSafetyError = errorMessage.includes("sensitive_words_detected") ||
287
+ errorMessage.includes("content_policy") ||
288
+ errorMessage.includes("safety") ||
289
+ errorMessage.includes("content_filter");
290
+ if (isContentSafetyError) {
291
+ // Content safety errors should not be retried - they won't succeed by retrying
292
+ yield {
293
+ type: "text",
294
+ text: `\n[Content Safety Error: ${err.message}]\n\nThis request triggered the content safety filter. Please modify your request to avoid sensitive topics and try again.\n`,
295
+ };
296
+ yield { type: "messages", messages: currentMessages };
297
+ return [];
298
+ }
284
299
  yield {
285
300
  type: "text",
286
301
  text: `\n[Error: ${err.message}]\n[Auto-recovering from stream drop...]\n`,
@@ -1,7 +1,7 @@
1
- import { ModelConfig } from '../config/index.js';
1
+ import { ModelConfig } from "../config/index.js";
2
2
  /**
3
3
  * Analyzes an image using a specialized vision model and returns a textual description.
4
4
  * This allows non-vision primary models to "see" via delegation.
5
5
  */
6
6
  export declare function analyzeImage(imageData: string, // base64
7
- config: ModelConfig): Promise<string>;
7
+ config: ModelConfig, abortSignal?: AbortSignal): Promise<string>;
@@ -1,11 +1,11 @@
1
- import { generateText } from 'ai';
2
- import { getModel } from './provider.js';
1
+ import { generateText } from "ai";
2
+ import { getModel } from "./provider.js";
3
3
  /**
4
4
  * Analyzes an image using a specialized vision model and returns a textual description.
5
5
  * This allows non-vision primary models to "see" via delegation.
6
6
  */
7
7
  export async function analyzeImage(imageData, // base64
8
- config) {
8
+ config, abortSignal) {
9
9
  if (!config.visionProvider || !config.visionModel) {
10
10
  return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
11
11
  }
@@ -13,24 +13,27 @@ config) {
13
13
  try {
14
14
  const { text } = await generateText({
15
15
  model,
16
- abortSignal: AbortSignal.timeout(60000), // 60 seconds timeout
17
- maxRetries: 3,
16
+ abortSignal: abortSignal || AbortSignal.timeout(90000), // 90 seconds timeout
17
+ maxRetries: 2,
18
18
  messages: [
19
19
  {
20
- role: 'user',
20
+ role: "user",
21
21
  content: [
22
- { type: 'text', text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application." },
23
- { type: 'image', image: imageData }
24
- ]
25
- }
26
- ]
22
+ {
23
+ type: "text",
24
+ text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application.",
25
+ },
26
+ { type: "image", image: imageData },
27
+ ],
28
+ },
29
+ ],
27
30
  });
28
31
  return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
29
32
  }
30
33
  catch (err) {
31
34
  let msg = err.message;
32
- if (err.name === 'TimeoutError' || err.message.includes('timeout')) {
33
- msg = `Analysis timed out after 60s. The image might be too complex or the provider is slow.`;
35
+ if (err.name === "AbortError" || err.message.includes("timeout")) {
36
+ msg = `Analysis timed out after 90s. The image might be too complex or the provider is slow.`;
34
37
  }
35
38
  return `Error during vision analysis: ${msg}`;
36
39
  }
@@ -3,5 +3,5 @@ export declare function executeBrowserAction(args: {
3
3
  url?: string;
4
4
  selector?: string;
5
5
  text?: string;
6
- }): Promise<string>;
6
+ }, abortSignal?: AbortSignal): Promise<string>;
7
7
  export declare function closeBrowser(): Promise<void>;
@@ -1,9 +1,9 @@
1
- import { chromium } from 'playwright';
2
- import { analyzeImage } from '../llm/vision.js';
3
- import { loadModelConfig } from '../config/index.js';
4
- import * as fs from 'fs/promises';
5
- import { existsSync, mkdirSync } from 'fs';
6
- import path from 'path';
1
+ import { chromium } from "playwright";
2
+ import { analyzeImage } from "../llm/vision.js";
3
+ import { loadModelConfig } from "../config/index.js";
4
+ import * as fs from "fs/promises";
5
+ import { existsSync, mkdirSync } from "fs";
6
+ import path from "path";
7
7
  let browser = null;
8
8
  let page = null;
9
9
  async function getBrowser() {
@@ -13,61 +13,62 @@ async function getBrowser() {
13
13
  }
14
14
  if (!page) {
15
15
  const context = await browser.newContext({
16
- viewport: { width: 1280, height: 720 }
16
+ viewport: { width: 1280, height: 720 },
17
17
  });
18
18
  page = await context.newPage();
19
19
  }
20
20
  return { browser, page };
21
21
  }
22
22
  catch (err) {
23
- if (err.message.includes('executable') || err.message.includes('not found')) {
23
+ if (err.message.includes("executable") ||
24
+ err.message.includes("not found")) {
24
25
  throw new Error(`Browser not found. Please run: npx playwright install chromium`);
25
26
  }
26
27
  throw err;
27
28
  }
28
29
  }
29
- export async function executeBrowserAction(args) {
30
+ export async function executeBrowserAction(args, abortSignal) {
30
31
  const { page } = await getBrowser();
31
32
  const config = loadModelConfig();
32
33
  try {
33
34
  switch (args.action) {
34
- case 'navigate':
35
+ case "navigate":
35
36
  if (!args.url)
36
- return 'Error: URL is required for navigate action.';
37
- await page.goto(args.url, { waitUntil: 'networkidle' });
37
+ return "Error: URL is required for navigate action.";
38
+ await page.goto(args.url, { waitUntil: "networkidle" });
38
39
  const title = await page.title();
39
40
  return `Successfully navigated to ${args.url}. Page title: ${title}`;
40
- case 'screenshot':
41
+ case "screenshot":
41
42
  const screenshot = await page.screenshot({ fullPage: false });
42
- const base64 = screenshot.toString('base64');
43
+ const base64 = screenshot.toString("base64");
43
44
  // Save to disk
44
- const screenshotDir = path.resolve(process.cwd(), '.acmecode', 'screenshots');
45
+ const screenshotDir = path.resolve(process.cwd(), ".acmecode", "screenshots");
45
46
  if (!existsSync(screenshotDir)) {
46
47
  mkdirSync(screenshotDir, { recursive: true });
47
48
  }
48
- const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, '-')}.png`;
49
+ const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, "-")}.png`;
49
50
  const filePath = path.join(screenshotDir, filename);
50
51
  await fs.writeFile(filePath, screenshot);
51
52
  const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
52
53
  // Delegate to vision model if configured
53
54
  if (config.visionModel) {
54
- const analysis = await analyzeImage(base64, config);
55
+ const analysis = await analyzeImage(base64, config, abortSignal);
55
56
  return `${stats}\n\n${analysis}`;
56
57
  }
57
58
  return stats;
58
- case 'click':
59
+ case "click":
59
60
  if (!args.selector)
60
- return 'Error: Selector is required for click action.';
61
+ return "Error: Selector is required for click action.";
61
62
  await page.click(args.selector);
62
63
  return `Clicked element: ${args.selector}`;
63
- case 'type':
64
+ case "type":
64
65
  if (!args.selector || !args.text)
65
- return 'Error: Selector and text are required for type action.';
66
+ return "Error: Selector and text are required for type action.";
66
67
  await page.fill(args.selector, args.text);
67
68
  return `Typed "${args.text}" into ${args.selector}`;
68
- case 'scroll':
69
+ case "scroll":
69
70
  await page.mouse.wheel(0, 500);
70
- return 'Scrolled down.';
71
+ return "Scrolled down.";
71
72
  default:
72
73
  return `Error: Unknown action "${args.action}"`;
73
74
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@acmecloud/core",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "exports": {
@@ -363,6 +363,24 @@ export async function* runAgent(
363
363
  yield { type: "messages", messages: currentMessages };
364
364
  return [];
365
365
  } else {
366
+ // Check if this is a content safety error (should not retry)
367
+ const errorMessage = err.message?.toLowerCase() || "";
368
+ const isContentSafetyError =
369
+ errorMessage.includes("sensitive_words_detected") ||
370
+ errorMessage.includes("content_policy") ||
371
+ errorMessage.includes("safety") ||
372
+ errorMessage.includes("content_filter");
373
+
374
+ if (isContentSafetyError) {
375
+ // Content safety errors should not be retried - they won't succeed by retrying
376
+ yield {
377
+ type: "text",
378
+ text: `\n[Content Safety Error: ${err.message}]\n\nThis request triggered the content safety filter. Please modify your request to avoid sensitive topics and try again.\n`,
379
+ };
380
+ yield { type: "messages", messages: currentMessages };
381
+ return [];
382
+ }
383
+
366
384
  yield {
367
385
  type: "text",
368
386
  text: `\n[Error: ${err.message}]\n[Auto-recovering from stream drop...]\n`,
package/src/llm/vision.ts CHANGED
@@ -1,43 +1,47 @@
1
- import { generateText } from 'ai';
2
- import { getModel } from './provider.js';
3
- import { ModelConfig } from '../config/index.js';
4
-
5
- /**
6
- * Analyzes an image using a specialized vision model and returns a textual description.
7
- * This allows non-vision primary models to "see" via delegation.
8
- */
9
- export async function analyzeImage(
10
- imageData: string, // base64
11
- config: ModelConfig
12
- ): Promise<string> {
13
- if (!config.visionProvider || !config.visionModel) {
14
- return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
15
- }
16
-
17
- const model = getModel(config.visionProvider as any, config.visionModel);
18
-
19
- try {
20
- const { text } = await generateText({
21
- model,
22
- abortSignal: AbortSignal.timeout(60000), // 60 seconds timeout
23
- maxRetries: 3,
24
- messages: [
25
- {
26
- role: 'user',
27
- content: [
28
- { type: 'text', text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application." },
29
- { type: 'image', image: imageData }
30
- ]
31
- }
32
- ]
33
- });
34
-
35
- return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
36
- } catch (err: any) {
37
- let msg = err.message;
38
- if (err.name === 'TimeoutError' || err.message.includes('timeout')) {
39
- msg = `Analysis timed out after 60s. The image might be too complex or the provider is slow.`;
40
- }
41
- return `Error during vision analysis: ${msg}`;
42
- }
43
- }
1
+ import { generateText } from "ai";
2
+ import { getModel } from "./provider.js";
3
+ import { ModelConfig } from "../config/index.js";
4
+
5
+ /**
6
+ * Analyzes an image using a specialized vision model and returns a textual description.
7
+ * This allows non-vision primary models to "see" via delegation.
8
+ */
9
+ export async function analyzeImage(
10
+ imageData: string, // base64
11
+ config: ModelConfig,
12
+ abortSignal?: AbortSignal,
13
+ ): Promise<string> {
14
+ if (!config.visionProvider || !config.visionModel) {
15
+ return "Error: Vision model not configured. Please set visionProvider and visionModel in config.";
16
+ }
17
+
18
+ const model = getModel(config.visionProvider as any, config.visionModel);
19
+
20
+ try {
21
+ const { text } = await generateText({
22
+ model,
23
+ abortSignal: abortSignal || AbortSignal.timeout(90000), // 90 seconds timeout
24
+ maxRetries: 2,
25
+ messages: [
26
+ {
27
+ role: "user",
28
+ content: [
29
+ {
30
+ type: "text",
31
+ text: "Please describe this screenshot in detail. Focus on the layout, visible text, interactive elements (buttons, inputs), and any apparent errors or status messages. This description will be used by another AI to understand the state of the web application.",
32
+ },
33
+ { type: "image", image: imageData },
34
+ ],
35
+ },
36
+ ],
37
+ });
38
+
39
+ return `[Vision Model Analysis (${config.visionModel})]:\n${text}`;
40
+ } catch (err: any) {
41
+ let msg = err.message;
42
+ if (err.name === "AbortError" || err.message.includes("timeout")) {
43
+ msg = `Analysis timed out after 90s. The image might be too complex or the provider is slow.`;
44
+ }
45
+ return `Error during vision analysis: ${msg}`;
46
+ }
47
+ }
@@ -1,95 +1,109 @@
1
- import { chromium, Browser, Page } from 'playwright';
2
- import { analyzeImage } from '../llm/vision.js';
3
- import { loadModelConfig } from '../config/index.js';
4
- import * as fs from 'fs/promises';
5
- import { existsSync, mkdirSync } from 'fs';
6
- import path from 'path';
7
-
8
- let browser: Browser | null = null;
9
- let page: Page | null = null;
10
-
11
- async function getBrowser() {
12
- try {
13
- if (!browser) {
14
- browser = await chromium.launch({ headless: true });
15
- }
16
- if (!page) {
17
- const context = await browser.newContext({
18
- viewport: { width: 1280, height: 720 }
19
- });
20
- page = await context.newPage();
21
- }
22
- return { browser, page };
23
- } catch (err: any) {
24
- if (err.message.includes('executable') || err.message.includes('not found')) {
25
- throw new Error(`Browser not found. Please run: npx playwright install chromium`);
26
- }
27
- throw err;
28
- }
29
- }
30
-
31
- export async function executeBrowserAction(args: { action: string, url?: string, selector?: string, text?: string }): Promise<string> {
32
- const { page } = await getBrowser();
33
- const config = loadModelConfig();
34
-
35
- try {
36
- switch (args.action) {
37
- case 'navigate':
38
- if (!args.url) return 'Error: URL is required for navigate action.';
39
- await page.goto(args.url, { waitUntil: 'networkidle' });
40
- const title = await page.title();
41
- return `Successfully navigated to ${args.url}. Page title: ${title}`;
42
-
43
- case 'screenshot':
44
- const screenshot = await page.screenshot({ fullPage: false });
45
- const base64 = screenshot.toString('base64');
46
-
47
- // Save to disk
48
- const screenshotDir = path.resolve(process.cwd(), '.acmecode', 'screenshots');
49
- if (!existsSync(screenshotDir)) {
50
- mkdirSync(screenshotDir, { recursive: true });
51
- }
52
- const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, '-')}.png`;
53
- const filePath = path.join(screenshotDir, filename);
54
- await fs.writeFile(filePath, screenshot);
55
-
56
- const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
57
-
58
- // Delegate to vision model if configured
59
- if (config.visionModel) {
60
- const analysis = await analyzeImage(base64, config);
61
- return `${stats}\n\n${analysis}`;
62
- }
63
-
64
- return stats;
65
-
66
- case 'click':
67
- if (!args.selector) return 'Error: Selector is required for click action.';
68
- await page.click(args.selector);
69
- return `Clicked element: ${args.selector}`;
70
-
71
- case 'type':
72
- if (!args.selector || !args.text) return 'Error: Selector and text are required for type action.';
73
- await page.fill(args.selector, args.text);
74
- return `Typed "${args.text}" into ${args.selector}`;
75
-
76
- case 'scroll':
77
- await page.mouse.wheel(0, 500);
78
- return 'Scrolled down.';
79
-
80
- default:
81
- return `Error: Unknown action "${args.action}"`;
82
- }
83
- } catch (err: any) {
84
- return `Browser error: ${err.message}`;
85
- }
86
- }
87
-
88
- // Cleanup function to be called on process exit
89
- export async function closeBrowser() {
90
- if (browser) {
91
- await browser.close();
92
- browser = null;
93
- page = null;
94
- }
95
- }
1
+ import { chromium, Browser, Page } from "playwright";
2
+ import { analyzeImage } from "../llm/vision.js";
3
+ import { loadModelConfig } from "../config/index.js";
4
+ import * as fs from "fs/promises";
5
+ import { existsSync, mkdirSync } from "fs";
6
+ import path from "path";
7
+
8
+ let browser: Browser | null = null;
9
+ let page: Page | null = null;
10
+
11
+ async function getBrowser() {
12
+ try {
13
+ if (!browser) {
14
+ browser = await chromium.launch({ headless: true });
15
+ }
16
+ if (!page) {
17
+ const context = await browser.newContext({
18
+ viewport: { width: 1280, height: 720 },
19
+ });
20
+ page = await context.newPage();
21
+ }
22
+ return { browser, page };
23
+ } catch (err: any) {
24
+ if (
25
+ err.message.includes("executable") ||
26
+ err.message.includes("not found")
27
+ ) {
28
+ throw new Error(
29
+ `Browser not found. Please run: npx playwright install chromium`,
30
+ );
31
+ }
32
+ throw err;
33
+ }
34
+ }
35
+
36
+ export async function executeBrowserAction(
37
+ args: { action: string; url?: string; selector?: string; text?: string },
38
+ abortSignal?: AbortSignal,
39
+ ): Promise<string> {
40
+ const { page } = await getBrowser();
41
+ const config = loadModelConfig();
42
+
43
+ try {
44
+ switch (args.action) {
45
+ case "navigate":
46
+ if (!args.url) return "Error: URL is required for navigate action.";
47
+ await page.goto(args.url, { waitUntil: "networkidle" });
48
+ const title = await page.title();
49
+ return `Successfully navigated to ${args.url}. Page title: ${title}`;
50
+
51
+ case "screenshot":
52
+ const screenshot = await page.screenshot({ fullPage: false });
53
+ const base64 = screenshot.toString("base64");
54
+
55
+ // Save to disk
56
+ const screenshotDir = path.resolve(
57
+ process.cwd(),
58
+ ".acmecode",
59
+ "screenshots",
60
+ );
61
+ if (!existsSync(screenshotDir)) {
62
+ mkdirSync(screenshotDir, { recursive: true });
63
+ }
64
+ const filename = `screenshot_${new Date().toISOString().replace(/[:.]/g, "-")}.png`;
65
+ const filePath = path.join(screenshotDir, filename);
66
+ await fs.writeFile(filePath, screenshot);
67
+
68
+ const stats = `Screenshot saved to: ${path.relative(process.cwd(), filePath)}`;
69
+
70
+ // Delegate to vision model if configured
71
+ if (config.visionModel) {
72
+ const analysis = await analyzeImage(base64, config, abortSignal);
73
+ return `${stats}\n\n${analysis}`;
74
+ }
75
+
76
+ return stats;
77
+
78
+ case "click":
79
+ if (!args.selector)
80
+ return "Error: Selector is required for click action.";
81
+ await page.click(args.selector);
82
+ return `Clicked element: ${args.selector}`;
83
+
84
+ case "type":
85
+ if (!args.selector || !args.text)
86
+ return "Error: Selector and text are required for type action.";
87
+ await page.fill(args.selector, args.text);
88
+ return `Typed "${args.text}" into ${args.selector}`;
89
+
90
+ case "scroll":
91
+ await page.mouse.wheel(0, 500);
92
+ return "Scrolled down.";
93
+
94
+ default:
95
+ return `Error: Unknown action "${args.action}"`;
96
+ }
97
+ } catch (err: any) {
98
+ return `Browser error: ${err.message}`;
99
+ }
100
+ }
101
+
102
+ // Cleanup function to be called on process exit
103
+ export async function closeBrowser() {
104
+ if (browser) {
105
+ await browser.close();
106
+ browser = null;
107
+ page = null;
108
+ }
109
+ }