opensteer 0.4.11 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/{chunk-C3NM6XZH.js → chunk-JSH3VLMH.js} +2072 -159
- package/dist/{chunk-L4FHT64T.js → chunk-QTGJO7RC.js} +14 -1
- package/dist/{chunk-DQIHOUXH.js → chunk-UIUDSWZV.js} +1 -1
- package/dist/{chunk-SRJLH34D.js → chunk-V4OOJO4S.js} +1 -1
- package/dist/cli/server.cjs +2080 -167
- package/dist/cli/server.js +1 -1
- package/dist/{extractor-K5VU7HVC.js → extractor-I6TJPTXV.js} +2 -2
- package/dist/index.cjs +2100 -167
- package/dist/index.d.cts +156 -1
- package/dist/index.d.ts +156 -1
- package/dist/index.js +24 -4
- package/dist/{resolver-WGFFHW4N.js → resolver-HVZJQZ32.js} +2 -2
- package/package.json +6 -2
package/dist/index.cjs
CHANGED
|
@@ -111,6 +111,15 @@ function resolveProviderInfo(modelStr) {
|
|
|
111
111
|
}
|
|
112
112
|
return { pkg: "@ai-sdk/openai", providerFn: "openai" };
|
|
113
113
|
}
|
|
114
|
+
function stripProviderPrefix(modelStr) {
|
|
115
|
+
const slash = modelStr.indexOf("/");
|
|
116
|
+
if (slash <= 0) return modelStr;
|
|
117
|
+
const provider = modelStr.slice(0, slash).toLowerCase();
|
|
118
|
+
if (provider === "openai" || provider === "anthropic" || provider === "google" || provider === "xai" || provider === "groq") {
|
|
119
|
+
return modelStr.slice(slash + 1);
|
|
120
|
+
}
|
|
121
|
+
return modelStr;
|
|
122
|
+
}
|
|
114
123
|
async function getModelProvider(modelStr) {
|
|
115
124
|
const { pkg, providerFn } = resolveProviderInfo(modelStr);
|
|
116
125
|
let mod;
|
|
@@ -127,7 +136,7 @@ async function getModelProvider(modelStr) {
|
|
|
127
136
|
`Provider '${providerFn}' not found in '${pkg}'. Ensure you have the latest version installed.`
|
|
128
137
|
);
|
|
129
138
|
}
|
|
130
|
-
const modelId =
|
|
139
|
+
const modelId = stripProviderPrefix(modelStr);
|
|
131
140
|
return provider(modelId);
|
|
132
141
|
}
|
|
133
142
|
var PROVIDER_MAP;
|
|
@@ -135,6 +144,10 @@ var init_model = __esm({
|
|
|
135
144
|
"src/ai/model.ts"() {
|
|
136
145
|
"use strict";
|
|
137
146
|
PROVIDER_MAP = {
|
|
147
|
+
"openai/": { pkg: "@ai-sdk/openai", providerFn: "openai" },
|
|
148
|
+
"anthropic/": { pkg: "@ai-sdk/anthropic", providerFn: "anthropic" },
|
|
149
|
+
"google/": { pkg: "@ai-sdk/google", providerFn: "google" },
|
|
150
|
+
"xai/": { pkg: "@ai-sdk/xai", providerFn: "xai" },
|
|
138
151
|
"gpt-": { pkg: "@ai-sdk/openai", providerFn: "openai" },
|
|
139
152
|
"o1-": { pkg: "@ai-sdk/openai", providerFn: "openai" },
|
|
140
153
|
"o3-": { pkg: "@ai-sdk/openai", providerFn: "openai" },
|
|
@@ -353,7 +366,15 @@ __export(index_exports, {
|
|
|
353
366
|
OS_UNAVAILABLE_ATTR: () => OS_UNAVAILABLE_ATTR,
|
|
354
367
|
Opensteer: () => Opensteer,
|
|
355
368
|
OpensteerActionError: () => OpensteerActionError,
|
|
369
|
+
OpensteerAgentActionError: () => OpensteerAgentActionError,
|
|
370
|
+
OpensteerAgentApiError: () => OpensteerAgentApiError,
|
|
371
|
+
OpensteerAgentBusyError: () => OpensteerAgentBusyError,
|
|
372
|
+
OpensteerAgentConfigError: () => OpensteerAgentConfigError,
|
|
373
|
+
OpensteerAgentError: () => OpensteerAgentError,
|
|
374
|
+
OpensteerAgentExecutionError: () => OpensteerAgentExecutionError,
|
|
375
|
+
OpensteerAgentProviderError: () => OpensteerAgentProviderError,
|
|
356
376
|
OpensteerCloudError: () => OpensteerCloudError,
|
|
377
|
+
OpensteerCuaAgentHandler: () => OpensteerCuaAgentHandler,
|
|
357
378
|
buildElementPathFromHandle: () => buildElementPathFromHandle,
|
|
358
379
|
buildElementPathFromSelector: () => buildElementPathFromSelector,
|
|
359
380
|
buildPathSelectorHint: () => buildPathSelectorHint,
|
|
@@ -370,6 +391,7 @@ __export(index_exports, {
|
|
|
370
391
|
cloudUnsupportedMethodError: () => cloudUnsupportedMethodError,
|
|
371
392
|
collectLocalSelectorCacheEntries: () => collectLocalSelectorCacheEntries,
|
|
372
393
|
countArrayItemsWithPath: () => countArrayItemsWithPath,
|
|
394
|
+
createCuaClient: () => createCuaClient,
|
|
373
395
|
createEmptyRegistry: () => createEmptyRegistry,
|
|
374
396
|
createExtractCallback: () => createExtractCallback,
|
|
375
397
|
createResolveCallback: () => createResolveCallback,
|
|
@@ -399,6 +421,7 @@ __export(index_exports, {
|
|
|
399
421
|
performSelect: () => performSelect,
|
|
400
422
|
prepareSnapshot: () => prepareSnapshot,
|
|
401
423
|
pressKey: () => pressKey,
|
|
424
|
+
resolveAgentConfig: () => resolveAgentConfig,
|
|
402
425
|
resolveCounterElement: () => resolveCounterElement,
|
|
403
426
|
resolveCountersBatch: () => resolveCountersBatch,
|
|
404
427
|
resolveElementPath: () => resolveElementPath,
|
|
@@ -7882,7 +7905,8 @@ var CloudCdpClient = class {
|
|
|
7882
7905
|
const message = error instanceof Error ? error.message : "Failed to connect to cloud CDP endpoint.";
|
|
7883
7906
|
throw new OpensteerCloudError("CLOUD_TRANSPORT_ERROR", message);
|
|
7884
7907
|
}
|
|
7885
|
-
const
|
|
7908
|
+
const contexts = browser.contexts();
|
|
7909
|
+
const context = contexts[0];
|
|
7886
7910
|
if (!context) {
|
|
7887
7911
|
await browser.close();
|
|
7888
7912
|
throw new OpensteerCloudError(
|
|
@@ -7890,10 +7914,41 @@ var CloudCdpClient = class {
|
|
|
7890
7914
|
"Cloud browser returned no context."
|
|
7891
7915
|
);
|
|
7892
7916
|
}
|
|
7917
|
+
const preferred = selectPreferredContextPage(browser, contexts);
|
|
7918
|
+
if (preferred) {
|
|
7919
|
+
return preferred;
|
|
7920
|
+
}
|
|
7893
7921
|
const page = context.pages()[0] || await context.newPage();
|
|
7894
7922
|
return { browser, context, page };
|
|
7895
7923
|
}
|
|
7896
7924
|
};
|
|
7925
|
+
function selectPreferredContextPage(browser, contexts) {
|
|
7926
|
+
let aboutBlankCandidate = null;
|
|
7927
|
+
for (const context of contexts) {
|
|
7928
|
+
for (const page of context.pages()) {
|
|
7929
|
+
const url = safePageUrl(page);
|
|
7930
|
+
if (!isInternalOrEmptyUrl(url)) {
|
|
7931
|
+
return { browser, context, page };
|
|
7932
|
+
}
|
|
7933
|
+
if (!aboutBlankCandidate && url === "about:blank") {
|
|
7934
|
+
aboutBlankCandidate = { browser, context, page };
|
|
7935
|
+
}
|
|
7936
|
+
}
|
|
7937
|
+
}
|
|
7938
|
+
return aboutBlankCandidate;
|
|
7939
|
+
}
|
|
7940
|
+
function safePageUrl(page) {
|
|
7941
|
+
try {
|
|
7942
|
+
return page.url();
|
|
7943
|
+
} catch {
|
|
7944
|
+
return "";
|
|
7945
|
+
}
|
|
7946
|
+
}
|
|
7947
|
+
function isInternalOrEmptyUrl(url) {
|
|
7948
|
+
if (!url) return true;
|
|
7949
|
+
if (url === "about:blank") return true;
|
|
7950
|
+
return url.startsWith("chrome://") || url.startsWith("devtools://") || url.startsWith("edge://");
|
|
7951
|
+
}
|
|
7897
7952
|
function withTokenQuery2(wsUrl, token) {
|
|
7898
7953
|
const url = new URL(wsUrl);
|
|
7899
7954
|
url.searchParams.set("token", token);
|
|
@@ -8155,7 +8210,7 @@ function toCloudErrorCode(code) {
|
|
|
8155
8210
|
}
|
|
8156
8211
|
|
|
8157
8212
|
// src/cloud/runtime.ts
|
|
8158
|
-
var DEFAULT_CLOUD_BASE_URL = "https://
|
|
8213
|
+
var DEFAULT_CLOUD_BASE_URL = "https://api.opensteer.com";
|
|
8159
8214
|
function createCloudRuntimeState(key, baseUrl = resolveCloudBaseUrl(), authScheme = "api-key") {
|
|
8160
8215
|
const normalizedBaseUrl = normalizeCloudBaseUrl(baseUrl);
|
|
8161
8216
|
return {
|
|
@@ -8187,187 +8242,2006 @@ function readCloudActionDescription(payload) {
|
|
|
8187
8242
|
return normalized.length ? normalized : void 0;
|
|
8188
8243
|
}
|
|
8189
8244
|
|
|
8190
|
-
// src/
|
|
8191
|
-
var
|
|
8192
|
-
|
|
8193
|
-
|
|
8194
|
-
|
|
8195
|
-
|
|
8196
|
-
|
|
8197
|
-
|
|
8198
|
-
|
|
8199
|
-
|
|
8245
|
+
// src/agent/errors.ts
|
|
8246
|
+
var OpensteerAgentError = class extends Error {
|
|
8247
|
+
constructor(message, cause) {
|
|
8248
|
+
super(message, { cause });
|
|
8249
|
+
this.name = "OpensteerAgentError";
|
|
8250
|
+
}
|
|
8251
|
+
};
|
|
8252
|
+
var OpensteerAgentConfigError = class extends OpensteerAgentError {
|
|
8253
|
+
constructor(message) {
|
|
8254
|
+
super(message);
|
|
8255
|
+
this.name = "OpensteerAgentConfigError";
|
|
8256
|
+
}
|
|
8257
|
+
};
|
|
8258
|
+
var OpensteerAgentProviderError = class extends OpensteerAgentError {
|
|
8259
|
+
constructor(message) {
|
|
8260
|
+
super(message);
|
|
8261
|
+
this.name = "OpensteerAgentProviderError";
|
|
8262
|
+
}
|
|
8263
|
+
};
|
|
8264
|
+
var OpensteerAgentExecutionError = class extends OpensteerAgentError {
|
|
8265
|
+
constructor(message, cause) {
|
|
8266
|
+
super(message, cause);
|
|
8267
|
+
this.name = "OpensteerAgentExecutionError";
|
|
8268
|
+
}
|
|
8269
|
+
};
|
|
8270
|
+
var OpensteerAgentBusyError = class extends OpensteerAgentError {
|
|
8271
|
+
constructor() {
|
|
8272
|
+
super("An OpenSteer agent execution is already in progress on this instance.");
|
|
8273
|
+
this.name = "OpensteerAgentBusyError";
|
|
8274
|
+
}
|
|
8275
|
+
};
|
|
8276
|
+
var OpensteerAgentActionError = class extends OpensteerAgentError {
|
|
8277
|
+
constructor(message, cause) {
|
|
8278
|
+
super(message, cause);
|
|
8279
|
+
this.name = "OpensteerAgentActionError";
|
|
8280
|
+
}
|
|
8281
|
+
};
|
|
8282
|
+
var OpensteerAgentApiError = class extends OpensteerAgentError {
|
|
8283
|
+
status;
|
|
8284
|
+
provider;
|
|
8285
|
+
constructor(provider, message, status, cause) {
|
|
8286
|
+
super(message, cause);
|
|
8287
|
+
this.name = "OpensteerAgentApiError";
|
|
8288
|
+
this.provider = provider;
|
|
8289
|
+
this.status = status;
|
|
8290
|
+
}
|
|
8291
|
+
};
|
|
8292
|
+
|
|
8293
|
+
// src/agent/model.ts
|
|
8294
|
+
var SUPPORTED_CUA_PROVIDERS = /* @__PURE__ */ new Set([
|
|
8295
|
+
"openai",
|
|
8296
|
+
"anthropic",
|
|
8297
|
+
"google"
|
|
8200
8298
|
]);
|
|
8201
|
-
|
|
8202
|
-
|
|
8203
|
-
|
|
8204
|
-
|
|
8205
|
-
|
|
8206
|
-
|
|
8207
|
-
|
|
8208
|
-
|
|
8209
|
-
browser = null;
|
|
8210
|
-
pageRef = null;
|
|
8211
|
-
contextRef = null;
|
|
8212
|
-
ownsBrowser = false;
|
|
8213
|
-
snapshotCache = null;
|
|
8214
|
-
constructor(config = {}) {
|
|
8215
|
-
const resolved = resolveConfig(config);
|
|
8216
|
-
const cloudSelection = resolveCloudSelection({
|
|
8217
|
-
cloud: resolved.cloud
|
|
8218
|
-
});
|
|
8219
|
-
const model = resolved.model;
|
|
8220
|
-
this.config = resolved;
|
|
8221
|
-
this.aiResolve = this.createLazyResolveCallback(model);
|
|
8222
|
-
this.aiExtract = this.createLazyExtractCallback(model);
|
|
8223
|
-
const rootDir = resolved.storage?.rootDir || process.cwd();
|
|
8224
|
-
this.namespace = resolveNamespace(resolved, rootDir);
|
|
8225
|
-
this.storage = new LocalSelectorStorage(rootDir, this.namespace);
|
|
8226
|
-
this.pool = new BrowserPool(resolved.browser || {});
|
|
8227
|
-
if (cloudSelection.cloud) {
|
|
8228
|
-
const cloudConfig = resolved.cloud && typeof resolved.cloud === "object" ? resolved.cloud : void 0;
|
|
8229
|
-
const apiKey = cloudConfig?.apiKey?.trim();
|
|
8230
|
-
if (!apiKey) {
|
|
8231
|
-
throw new Error(
|
|
8232
|
-
"Cloud mode requires a non-empty API key via cloud.apiKey or OPENSTEER_API_KEY."
|
|
8233
|
-
);
|
|
8234
|
-
}
|
|
8235
|
-
this.cloud = createCloudRuntimeState(
|
|
8236
|
-
apiKey,
|
|
8237
|
-
cloudConfig?.baseUrl,
|
|
8238
|
-
cloudConfig?.authScheme
|
|
8239
|
-
);
|
|
8240
|
-
} else {
|
|
8241
|
-
this.cloud = null;
|
|
8242
|
-
}
|
|
8299
|
+
function resolveCuaModelConfig(args) {
|
|
8300
|
+
const env = args.env || process.env;
|
|
8301
|
+
const source = resolveModelSource(args.agentConfig.model, args.fallbackModel);
|
|
8302
|
+
const parsed = parseProviderModel(source.modelName);
|
|
8303
|
+
if (!SUPPORTED_CUA_PROVIDERS.has(parsed.provider)) {
|
|
8304
|
+
throw new OpensteerAgentProviderError(
|
|
8305
|
+
`Unsupported CUA provider "${parsed.provider}". Supported providers: openai, anthropic, google.`
|
|
8306
|
+
);
|
|
8243
8307
|
}
|
|
8244
|
-
|
|
8245
|
-
|
|
8246
|
-
|
|
8247
|
-
|
|
8248
|
-
|
|
8249
|
-
|
|
8250
|
-
|
|
8251
|
-
|
|
8252
|
-
|
|
8253
|
-
|
|
8254
|
-
|
|
8255
|
-
|
|
8256
|
-
|
|
8257
|
-
|
|
8258
|
-
|
|
8308
|
+
const apiKey = resolveProviderApiKey(parsed.provider, source.options.apiKey, env);
|
|
8309
|
+
return {
|
|
8310
|
+
provider: parsed.provider,
|
|
8311
|
+
fullModelName: `${parsed.provider}/${parsed.modelName}`,
|
|
8312
|
+
providerModelName: parsed.modelName,
|
|
8313
|
+
apiKey,
|
|
8314
|
+
baseUrl: normalizeOptional(source.options.baseUrl),
|
|
8315
|
+
organization: normalizeOptional(source.options.organization),
|
|
8316
|
+
thinkingBudget: typeof source.options.thinkingBudget === "number" && Number.isFinite(source.options.thinkingBudget) ? source.options.thinkingBudget : void 0,
|
|
8317
|
+
environment: normalizeOptional(source.options.environment)
|
|
8318
|
+
};
|
|
8319
|
+
}
|
|
8320
|
+
function resolveModelSource(model, fallbackModel) {
|
|
8321
|
+
if (model && typeof model === "object") {
|
|
8322
|
+
const modelName2 = normalizeRequired(model.modelName, "agent.model.modelName");
|
|
8323
|
+
const { modelName: _, ...options } = model;
|
|
8324
|
+
return {
|
|
8325
|
+
modelName: modelName2,
|
|
8326
|
+
options
|
|
8259
8327
|
};
|
|
8260
8328
|
}
|
|
8261
|
-
|
|
8262
|
-
|
|
8263
|
-
|
|
8264
|
-
|
|
8265
|
-
|
|
8266
|
-
|
|
8267
|
-
|
|
8268
|
-
|
|
8269
|
-
|
|
8270
|
-
|
|
8271
|
-
|
|
8272
|
-
|
|
8273
|
-
|
|
8274
|
-
|
|
8275
|
-
|
|
8329
|
+
const modelName = normalizeOptional(model) || normalizeOptional(fallbackModel);
|
|
8330
|
+
if (!modelName) {
|
|
8331
|
+
throw new OpensteerAgentConfigError(
|
|
8332
|
+
'A CUA model is required. Pass agent.model (for example "openai/computer-use-preview").'
|
|
8333
|
+
);
|
|
8334
|
+
}
|
|
8335
|
+
return {
|
|
8336
|
+
modelName,
|
|
8337
|
+
options: {}
|
|
8338
|
+
};
|
|
8339
|
+
}
|
|
8340
|
+
function parseProviderModel(modelName) {
|
|
8341
|
+
const slash = modelName.indexOf("/");
|
|
8342
|
+
if (slash <= 0 || slash === modelName.length - 1) {
|
|
8343
|
+
throw new OpensteerAgentConfigError(
|
|
8344
|
+
`Invalid CUA model "${modelName}". Use "provider/model" format (for example "openai/computer-use-preview").`
|
|
8345
|
+
);
|
|
8346
|
+
}
|
|
8347
|
+
const providerRaw = modelName.slice(0, slash).trim().toLowerCase();
|
|
8348
|
+
const providerModelName = modelName.slice(slash + 1).trim();
|
|
8349
|
+
if (!providerModelName) {
|
|
8350
|
+
throw new OpensteerAgentConfigError(
|
|
8351
|
+
`Invalid CUA model "${modelName}". The model name segment after the provider cannot be empty.`
|
|
8352
|
+
);
|
|
8353
|
+
}
|
|
8354
|
+
if (providerRaw !== "openai" && providerRaw !== "anthropic" && providerRaw !== "google") {
|
|
8355
|
+
throw new OpensteerAgentProviderError(
|
|
8356
|
+
`Unsupported CUA provider "${providerRaw}". Supported providers: openai, anthropic, google.`
|
|
8357
|
+
);
|
|
8358
|
+
}
|
|
8359
|
+
return {
|
|
8360
|
+
provider: providerRaw,
|
|
8361
|
+
modelName: providerModelName
|
|
8362
|
+
};
|
|
8363
|
+
}
|
|
8364
|
+
function resolveProviderApiKey(provider, explicitApiKey, env) {
|
|
8365
|
+
const explicit = normalizeOptional(explicitApiKey);
|
|
8366
|
+
if (explicit) return explicit;
|
|
8367
|
+
if (provider === "openai") {
|
|
8368
|
+
const value = normalizeOptional(env.OPENAI_API_KEY);
|
|
8369
|
+
if (value) return value;
|
|
8370
|
+
throw new OpensteerAgentConfigError(
|
|
8371
|
+
"OpenAI CUA requires an API key via agent.model.apiKey or OPENAI_API_KEY."
|
|
8372
|
+
);
|
|
8373
|
+
}
|
|
8374
|
+
if (provider === "anthropic") {
|
|
8375
|
+
const value = normalizeOptional(env.ANTHROPIC_API_KEY);
|
|
8376
|
+
if (value) return value;
|
|
8377
|
+
throw new OpensteerAgentConfigError(
|
|
8378
|
+
"Anthropic CUA requires an API key via agent.model.apiKey or ANTHROPIC_API_KEY."
|
|
8379
|
+
);
|
|
8380
|
+
}
|
|
8381
|
+
const googleApiKey = normalizeOptional(env.GOOGLE_GENERATIVE_AI_API_KEY) || normalizeOptional(env.GEMINI_API_KEY) || normalizeOptional(env.GOOGLE_API_KEY);
|
|
8382
|
+
if (googleApiKey) return googleApiKey;
|
|
8383
|
+
throw new OpensteerAgentConfigError(
|
|
8384
|
+
"Google CUA requires an API key via agent.model.apiKey, GOOGLE_GENERATIVE_AI_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY."
|
|
8385
|
+
);
|
|
8386
|
+
}
|
|
8387
|
+
function normalizeOptional(value) {
|
|
8388
|
+
if (typeof value !== "string") return void 0;
|
|
8389
|
+
const trimmed = value.trim();
|
|
8390
|
+
return trimmed.length ? trimmed : void 0;
|
|
8391
|
+
}
|
|
8392
|
+
function normalizeRequired(value, field) {
|
|
8393
|
+
const normalized = normalizeOptional(value);
|
|
8394
|
+
if (!normalized) {
|
|
8395
|
+
throw new OpensteerAgentConfigError(`${field} is required.`);
|
|
8396
|
+
}
|
|
8397
|
+
return normalized;
|
|
8398
|
+
}
|
|
8399
|
+
|
|
8400
|
+
// src/agent/clients/openai.ts
|
|
8401
|
+
var import_openai = __toESM(require("openai"), 1);
|
|
8402
|
+
|
|
8403
|
+
// src/agent/client.ts
|
|
8404
|
+
var CuaClient = class {
|
|
8405
|
+
screenshotProvider = null;
|
|
8406
|
+
actionHandler = null;
|
|
8407
|
+
viewport = {
|
|
8408
|
+
width: 1288,
|
|
8409
|
+
height: 711
|
|
8410
|
+
};
|
|
8411
|
+
currentUrl = null;
|
|
8412
|
+
setViewport(width, height) {
|
|
8413
|
+
this.viewport = {
|
|
8414
|
+
width,
|
|
8415
|
+
height
|
|
8276
8416
|
};
|
|
8277
|
-
return extract;
|
|
8278
8417
|
}
|
|
8279
|
-
|
|
8280
|
-
|
|
8281
|
-
this.snapshotCache = null;
|
|
8282
|
-
return result;
|
|
8418
|
+
setCurrentUrl(url) {
|
|
8419
|
+
this.currentUrl = url;
|
|
8283
8420
|
}
|
|
8284
|
-
|
|
8285
|
-
|
|
8286
|
-
|
|
8287
|
-
|
|
8288
|
-
|
|
8421
|
+
setScreenshotProvider(provider) {
|
|
8422
|
+
this.screenshotProvider = provider;
|
|
8423
|
+
}
|
|
8424
|
+
setActionHandler(handler) {
|
|
8425
|
+
this.actionHandler = handler;
|
|
8426
|
+
}
|
|
8427
|
+
getScreenshotProvider() {
|
|
8428
|
+
if (!this.screenshotProvider) {
|
|
8429
|
+
throw new Error("CUA screenshot provider is not initialized.");
|
|
8289
8430
|
}
|
|
8290
|
-
|
|
8291
|
-
|
|
8292
|
-
|
|
8293
|
-
|
|
8294
|
-
|
|
8295
|
-
const detailsRecord = err.details && typeof err.details === "object" ? err.details : null;
|
|
8296
|
-
const cloudFailure = normalizeActionFailure(
|
|
8297
|
-
detailsRecord?.actionFailure
|
|
8298
|
-
);
|
|
8299
|
-
const failure = cloudFailure || classifyActionFailure({
|
|
8300
|
-
action: method,
|
|
8301
|
-
error: err,
|
|
8302
|
-
fallbackMessage: defaultActionFailureMessage(method)
|
|
8303
|
-
});
|
|
8304
|
-
const description = readCloudActionDescription(payload);
|
|
8305
|
-
throw this.buildActionError(
|
|
8306
|
-
method,
|
|
8307
|
-
description,
|
|
8308
|
-
failure,
|
|
8309
|
-
null,
|
|
8310
|
-
err
|
|
8311
|
-
);
|
|
8312
|
-
}
|
|
8313
|
-
throw err;
|
|
8431
|
+
return this.screenshotProvider;
|
|
8432
|
+
}
|
|
8433
|
+
getActionHandler() {
|
|
8434
|
+
if (!this.actionHandler) {
|
|
8435
|
+
throw new Error("CUA action handler is not initialized.");
|
|
8314
8436
|
}
|
|
8437
|
+
return this.actionHandler;
|
|
8315
8438
|
}
|
|
8316
|
-
|
|
8317
|
-
|
|
8318
|
-
|
|
8319
|
-
|
|
8320
|
-
|
|
8321
|
-
|
|
8322
|
-
action,
|
|
8323
|
-
description,
|
|
8324
|
-
failure.message
|
|
8325
|
-
),
|
|
8326
|
-
cause
|
|
8327
|
-
});
|
|
8439
|
+
};
|
|
8440
|
+
function normalizeExecuteOptions(instructionOrOptions) {
|
|
8441
|
+
if (typeof instructionOrOptions === "string") {
|
|
8442
|
+
return {
|
|
8443
|
+
instruction: normalizeInstruction(instructionOrOptions)
|
|
8444
|
+
};
|
|
8328
8445
|
}
|
|
8329
|
-
|
|
8330
|
-
|
|
8331
|
-
|
|
8332
|
-
|
|
8333
|
-
);
|
|
8334
|
-
}
|
|
8335
|
-
return this.pageRef;
|
|
8446
|
+
if (!instructionOrOptions || typeof instructionOrOptions !== "object" || Array.isArray(instructionOrOptions)) {
|
|
8447
|
+
throw new OpensteerAgentExecutionError(
|
|
8448
|
+
"agent.execute(...) expects either a string instruction or an options object."
|
|
8449
|
+
);
|
|
8336
8450
|
}
|
|
8337
|
-
|
|
8338
|
-
|
|
8339
|
-
|
|
8340
|
-
|
|
8451
|
+
const normalized = {
|
|
8452
|
+
instruction: normalizeInstruction(instructionOrOptions.instruction)
|
|
8453
|
+
};
|
|
8454
|
+
if (instructionOrOptions.maxSteps !== void 0) {
|
|
8455
|
+
normalized.maxSteps = normalizeMaxSteps(instructionOrOptions.maxSteps);
|
|
8456
|
+
}
|
|
8457
|
+
if (instructionOrOptions.highlightCursor !== void 0) {
|
|
8458
|
+
if (typeof instructionOrOptions.highlightCursor !== "boolean") {
|
|
8459
|
+
throw new OpensteerAgentExecutionError(
|
|
8460
|
+
'agent.execute(...) "highlightCursor" must be a boolean when provided.'
|
|
8341
8461
|
);
|
|
8342
8462
|
}
|
|
8343
|
-
|
|
8463
|
+
normalized.highlightCursor = instructionOrOptions.highlightCursor;
|
|
8344
8464
|
}
|
|
8345
|
-
|
|
8346
|
-
|
|
8465
|
+
return normalized;
|
|
8466
|
+
}
|
|
8467
|
+
function normalizeInstruction(instruction) {
|
|
8468
|
+
if (typeof instruction !== "string") {
|
|
8469
|
+
throw new OpensteerAgentExecutionError(
|
|
8470
|
+
'agent.execute(...) requires a non-empty "instruction" string.'
|
|
8471
|
+
);
|
|
8347
8472
|
}
|
|
8348
|
-
|
|
8349
|
-
|
|
8473
|
+
const normalized = instruction.trim();
|
|
8474
|
+
if (!normalized) {
|
|
8475
|
+
throw new OpensteerAgentExecutionError(
|
|
8476
|
+
'agent.execute(...) requires a non-empty "instruction" string.'
|
|
8477
|
+
);
|
|
8350
8478
|
}
|
|
8351
|
-
|
|
8352
|
-
|
|
8353
|
-
|
|
8354
|
-
|
|
8355
|
-
|
|
8356
|
-
|
|
8357
|
-
|
|
8479
|
+
return normalized;
|
|
8480
|
+
}
|
|
8481
|
+
function normalizeMaxSteps(maxSteps) {
|
|
8482
|
+
if (typeof maxSteps !== "number" || !Number.isInteger(maxSteps) || maxSteps <= 0) {
|
|
8483
|
+
throw new OpensteerAgentExecutionError(
|
|
8484
|
+
'agent.execute(...) "maxSteps" must be a positive integer when provided.'
|
|
8485
|
+
);
|
|
8486
|
+
}
|
|
8487
|
+
return maxSteps;
|
|
8488
|
+
}
|
|
8489
|
+
|
|
8490
|
+
// src/agent/clients/openai.ts
|
|
8491
|
+
var OpenAICuaClient = class extends CuaClient {
|
|
8492
|
+
client;
|
|
8493
|
+
modelConfig;
|
|
8494
|
+
constructor(modelConfig) {
|
|
8495
|
+
super();
|
|
8496
|
+
this.modelConfig = modelConfig;
|
|
8497
|
+
this.client = new import_openai.default({
|
|
8498
|
+
apiKey: modelConfig.apiKey,
|
|
8499
|
+
baseURL: modelConfig.baseUrl,
|
|
8500
|
+
organization: modelConfig.organization
|
|
8501
|
+
});
|
|
8502
|
+
}
|
|
8503
|
+
async execute(input) {
|
|
8504
|
+
const actions = [];
|
|
8505
|
+
let finalMessage = "";
|
|
8506
|
+
let completed = false;
|
|
8507
|
+
let step = 0;
|
|
8508
|
+
let previousResponseId;
|
|
8509
|
+
let nextInputItems = [
|
|
8510
|
+
{
|
|
8511
|
+
role: "system",
|
|
8512
|
+
content: input.systemPrompt
|
|
8513
|
+
},
|
|
8514
|
+
{
|
|
8515
|
+
role: "user",
|
|
8516
|
+
content: input.instruction
|
|
8517
|
+
}
|
|
8358
8518
|
];
|
|
8359
|
-
|
|
8360
|
-
|
|
8519
|
+
let totalInputTokens = 0;
|
|
8520
|
+
let totalOutputTokens = 0;
|
|
8521
|
+
let totalReasoningTokens = 0;
|
|
8522
|
+
let totalInferenceTimeMs = 0;
|
|
8523
|
+
while (!completed && step < input.maxSteps) {
|
|
8524
|
+
const startedAt = Date.now();
|
|
8525
|
+
const response = await this.getAction(nextInputItems, previousResponseId);
|
|
8526
|
+
totalInferenceTimeMs += Date.now() - startedAt;
|
|
8527
|
+
totalInputTokens += toNumber(response.usage?.input_tokens);
|
|
8528
|
+
totalOutputTokens += toNumber(response.usage?.output_tokens);
|
|
8529
|
+
totalReasoningTokens += toNumber(response.usage?.output_tokens_details?.reasoning_tokens) || toNumber(toRecord(response.usage).reasoning_tokens);
|
|
8530
|
+
previousResponseId = normalizeString(response.id) || previousResponseId;
|
|
8531
|
+
const stepResult = await this.processResponse(response.output);
|
|
8532
|
+
actions.push(...stepResult.actions);
|
|
8533
|
+
nextInputItems = stepResult.nextInputItems;
|
|
8534
|
+
completed = stepResult.completed;
|
|
8535
|
+
if (stepResult.message) {
|
|
8536
|
+
finalMessage = stepResult.message;
|
|
8537
|
+
}
|
|
8538
|
+
step += 1;
|
|
8361
8539
|
}
|
|
8362
|
-
|
|
8363
|
-
|
|
8540
|
+
return {
|
|
8541
|
+
success: completed,
|
|
8542
|
+
completed,
|
|
8543
|
+
message: finalMessage,
|
|
8544
|
+
actions,
|
|
8545
|
+
usage: {
|
|
8546
|
+
inputTokens: totalInputTokens,
|
|
8547
|
+
outputTokens: totalOutputTokens,
|
|
8548
|
+
reasoningTokens: totalReasoningTokens > 0 ? totalReasoningTokens : void 0,
|
|
8549
|
+
inferenceTimeMs: totalInferenceTimeMs
|
|
8550
|
+
}
|
|
8551
|
+
};
|
|
8364
8552
|
}
|
|
8365
|
-
|
|
8366
|
-
const
|
|
8367
|
-
|
|
8368
|
-
|
|
8369
|
-
|
|
8370
|
-
|
|
8553
|
+
async getAction(inputItems, previousResponseId) {
|
|
8554
|
+
const request = {
|
|
8555
|
+
model: this.modelConfig.providerModelName,
|
|
8556
|
+
tools: [
|
|
8557
|
+
{
|
|
8558
|
+
type: "computer_use_preview",
|
|
8559
|
+
display_width: this.viewport.width,
|
|
8560
|
+
display_height: this.viewport.height,
|
|
8561
|
+
environment: "browser"
|
|
8562
|
+
}
|
|
8563
|
+
],
|
|
8564
|
+
input: inputItems,
|
|
8565
|
+
truncation: "auto",
|
|
8566
|
+
...previousResponseId ? { previous_response_id: previousResponseId } : {}
|
|
8567
|
+
};
|
|
8568
|
+
try {
|
|
8569
|
+
return await this.client.responses.create(request);
|
|
8570
|
+
} catch (error) {
|
|
8571
|
+
throw mapOpenAiApiError(error);
|
|
8572
|
+
}
|
|
8573
|
+
}
|
|
8574
|
+
async processResponse(output) {
|
|
8575
|
+
const actions = [];
|
|
8576
|
+
const nextInputItems = [];
|
|
8577
|
+
const messageParts = [];
|
|
8578
|
+
let hasComputerAction = false;
|
|
8579
|
+
for (const item of output) {
|
|
8580
|
+
if (item.type === "computer_call") {
|
|
8581
|
+
hasComputerAction = true;
|
|
8582
|
+
const action = toAgentAction(item.action);
|
|
8583
|
+
actions.push(action);
|
|
8584
|
+
let actionError;
|
|
8585
|
+
try {
|
|
8586
|
+
await this.getActionHandler()(action);
|
|
8587
|
+
} catch (error) {
|
|
8588
|
+
actionError = error instanceof Error ? error.message : String(error);
|
|
8589
|
+
}
|
|
8590
|
+
const outputItem = {
|
|
8591
|
+
type: "computer_call_output",
|
|
8592
|
+
call_id: item.call_id
|
|
8593
|
+
};
|
|
8594
|
+
const safetyChecks = item.pending_safety_checks.length ? item.pending_safety_checks : void 0;
|
|
8595
|
+
const screenshotDataUrl = await this.captureScreenshotDataUrl();
|
|
8596
|
+
const outputPayload = {
|
|
8597
|
+
type: "input_image",
|
|
8598
|
+
image_url: screenshotDataUrl
|
|
8599
|
+
};
|
|
8600
|
+
if (this.currentUrl) {
|
|
8601
|
+
outputPayload.current_url = this.currentUrl;
|
|
8602
|
+
}
|
|
8603
|
+
if (actionError) {
|
|
8604
|
+
outputPayload.error = actionError;
|
|
8605
|
+
}
|
|
8606
|
+
outputItem.output = outputPayload;
|
|
8607
|
+
if (safetyChecks) {
|
|
8608
|
+
outputItem.acknowledged_safety_checks = safetyChecks;
|
|
8609
|
+
}
|
|
8610
|
+
nextInputItems.push(outputItem);
|
|
8611
|
+
}
|
|
8612
|
+
if (item.type === "message") {
|
|
8613
|
+
for (const content of item.content) {
|
|
8614
|
+
if (content.type === "output_text") {
|
|
8615
|
+
messageParts.push(content.text);
|
|
8616
|
+
}
|
|
8617
|
+
}
|
|
8618
|
+
}
|
|
8619
|
+
}
|
|
8620
|
+
return {
|
|
8621
|
+
actions,
|
|
8622
|
+
nextInputItems,
|
|
8623
|
+
completed: !hasComputerAction,
|
|
8624
|
+
message: messageParts.join("\n").trim()
|
|
8625
|
+
};
|
|
8626
|
+
}
|
|
8627
|
+
async captureScreenshotDataUrl() {
|
|
8628
|
+
const base64 = await this.getScreenshotProvider()();
|
|
8629
|
+
return `data:image/png;base64,${base64}`;
|
|
8630
|
+
}
|
|
8631
|
+
};
|
|
8632
|
+
function toAgentAction(action) {
|
|
8633
|
+
const actionRecord = toRecord(action);
|
|
8634
|
+
return {
|
|
8635
|
+
type: normalizeString(actionRecord.type) || "unknown",
|
|
8636
|
+
...actionRecord
|
|
8637
|
+
};
|
|
8638
|
+
}
|
|
8639
|
+
function mapOpenAiApiError(error) {
|
|
8640
|
+
const errorRecord = toRecord(error);
|
|
8641
|
+
const nestedError = toRecord(errorRecord.error);
|
|
8642
|
+
const status = toNumber(errorRecord.status);
|
|
8643
|
+
const message = normalizeString(nestedError.message) || (error instanceof Error ? error.message : String(error));
|
|
8644
|
+
return new OpensteerAgentApiError("openai", message, status, error);
|
|
8645
|
+
}
|
|
8646
|
+
function toRecord(value) {
|
|
8647
|
+
return value && typeof value === "object" ? value : {};
|
|
8648
|
+
}
|
|
8649
|
+
function toNumber(value) {
|
|
8650
|
+
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
8651
|
+
}
|
|
8652
|
+
function normalizeString(value) {
|
|
8653
|
+
if (typeof value !== "string") return void 0;
|
|
8654
|
+
const normalized = value.trim();
|
|
8655
|
+
return normalized.length ? normalized : void 0;
|
|
8656
|
+
}
|
|
8657
|
+
|
|
8658
|
+
// src/agent/clients/anthropic.ts
|
|
8659
|
+
var import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
8660
|
+
var AnthropicCuaClient = class extends CuaClient {
|
|
8661
|
+
modelConfig;
|
|
8662
|
+
client;
|
|
8663
|
+
constructor(modelConfig) {
|
|
8664
|
+
super();
|
|
8665
|
+
this.modelConfig = modelConfig;
|
|
8666
|
+
this.client = new import_sdk.default({
|
|
8667
|
+
apiKey: modelConfig.apiKey,
|
|
8668
|
+
baseURL: modelConfig.baseUrl
|
|
8669
|
+
});
|
|
8670
|
+
}
|
|
8671
|
+
async execute(input) {
|
|
8672
|
+
const actions = [];
|
|
8673
|
+
let finalMessage = "";
|
|
8674
|
+
let completed = false;
|
|
8675
|
+
let step = 0;
|
|
8676
|
+
const messages = [
|
|
8677
|
+
{
|
|
8678
|
+
role: "user",
|
|
8679
|
+
content: input.instruction
|
|
8680
|
+
}
|
|
8681
|
+
];
|
|
8682
|
+
let totalInputTokens = 0;
|
|
8683
|
+
let totalOutputTokens = 0;
|
|
8684
|
+
let totalReasoningTokens = 0;
|
|
8685
|
+
let totalInferenceTimeMs = 0;
|
|
8686
|
+
while (!completed && step < input.maxSteps) {
|
|
8687
|
+
const startedAt = Date.now();
|
|
8688
|
+
const response = await this.getAction(messages, input.systemPrompt);
|
|
8689
|
+
totalInferenceTimeMs += Date.now() - startedAt;
|
|
8690
|
+
totalInputTokens += toNumber2(response?.usage?.input_tokens);
|
|
8691
|
+
totalOutputTokens += toNumber2(response?.usage?.output_tokens);
|
|
8692
|
+
totalReasoningTokens += toNumber2(toRecord2(response.usage).reasoning_tokens);
|
|
8693
|
+
const content = response.content.map((item) => toRecord2(item));
|
|
8694
|
+
const toolUseItems = content.filter(
|
|
8695
|
+
(item) => item.type === "tool_use" && item.name === "computer"
|
|
8696
|
+
);
|
|
8697
|
+
const message = extractTextMessage(content);
|
|
8698
|
+
if (message) {
|
|
8699
|
+
finalMessage = message;
|
|
8700
|
+
}
|
|
8701
|
+
messages.push({
|
|
8702
|
+
role: "assistant",
|
|
8703
|
+
content
|
|
8704
|
+
});
|
|
8705
|
+
if (!toolUseItems.length) {
|
|
8706
|
+
completed = true;
|
|
8707
|
+
} else {
|
|
8708
|
+
const stepResult = await this.processToolUseItems(toolUseItems);
|
|
8709
|
+
actions.push(...stepResult.actions);
|
|
8710
|
+
messages.push({
|
|
8711
|
+
role: "user",
|
|
8712
|
+
content: stepResult.toolResults
|
|
8713
|
+
});
|
|
8714
|
+
}
|
|
8715
|
+
step += 1;
|
|
8716
|
+
}
|
|
8717
|
+
return {
|
|
8718
|
+
success: completed,
|
|
8719
|
+
completed,
|
|
8720
|
+
message: finalMessage,
|
|
8721
|
+
actions,
|
|
8722
|
+
usage: {
|
|
8723
|
+
inputTokens: totalInputTokens,
|
|
8724
|
+
outputTokens: totalOutputTokens,
|
|
8725
|
+
reasoningTokens: totalReasoningTokens > 0 ? totalReasoningTokens : void 0,
|
|
8726
|
+
inferenceTimeMs: totalInferenceTimeMs
|
|
8727
|
+
}
|
|
8728
|
+
};
|
|
8729
|
+
}
|
|
8730
|
+
async processToolUseItems(items) {
|
|
8731
|
+
const actions = [];
|
|
8732
|
+
const toolResults = [];
|
|
8733
|
+
for (const item of items) {
|
|
8734
|
+
const toolUseId = normalizeString2(item.id);
|
|
8735
|
+
const input = item.input && typeof item.input === "object" ? item.input : {};
|
|
8736
|
+
const action = convertAnthropicAction(input);
|
|
8737
|
+
actions.push(action);
|
|
8738
|
+
let errorMessage2;
|
|
8739
|
+
try {
|
|
8740
|
+
await this.getActionHandler()(action);
|
|
8741
|
+
} catch (error) {
|
|
8742
|
+
errorMessage2 = error instanceof Error ? error.message : String(error);
|
|
8743
|
+
}
|
|
8744
|
+
let imageBlock = null;
|
|
8745
|
+
try {
|
|
8746
|
+
const screenshot = await this.getScreenshotProvider()();
|
|
8747
|
+
imageBlock = {
|
|
8748
|
+
type: "image",
|
|
8749
|
+
source: {
|
|
8750
|
+
type: "base64",
|
|
8751
|
+
media_type: "image/png",
|
|
8752
|
+
data: screenshot
|
|
8753
|
+
}
|
|
8754
|
+
};
|
|
8755
|
+
} catch (error) {
|
|
8756
|
+
errorMessage2 = errorMessage2 || (error instanceof Error ? error.message : String(error));
|
|
8757
|
+
}
|
|
8758
|
+
const resultContent = [];
|
|
8759
|
+
if (imageBlock) {
|
|
8760
|
+
resultContent.push(imageBlock);
|
|
8761
|
+
}
|
|
8762
|
+
if (this.currentUrl) {
|
|
8763
|
+
resultContent.push({
|
|
8764
|
+
type: "text",
|
|
8765
|
+
text: `Current URL: ${this.currentUrl}`
|
|
8766
|
+
});
|
|
8767
|
+
}
|
|
8768
|
+
if (errorMessage2) {
|
|
8769
|
+
resultContent.push({
|
|
8770
|
+
type: "text",
|
|
8771
|
+
text: `Error: ${errorMessage2}`
|
|
8772
|
+
});
|
|
8773
|
+
}
|
|
8774
|
+
toolResults.push({
|
|
8775
|
+
type: "tool_result",
|
|
8776
|
+
tool_use_id: toolUseId || "unknown_tool_use_id",
|
|
8777
|
+
content: resultContent.length > 0 ? resultContent : [
|
|
8778
|
+
{
|
|
8779
|
+
type: "text",
|
|
8780
|
+
text: "Action completed."
|
|
8781
|
+
}
|
|
8782
|
+
]
|
|
8783
|
+
});
|
|
8784
|
+
}
|
|
8785
|
+
return {
|
|
8786
|
+
actions,
|
|
8787
|
+
toolResults
|
|
8788
|
+
};
|
|
8789
|
+
}
|
|
8790
|
+
async getAction(messages, systemPrompt) {
|
|
8791
|
+
const toolVersion = requiresNewestAnthropicToolVersion(
|
|
8792
|
+
this.modelConfig.providerModelName
|
|
8793
|
+
) ? "computer_20251124" : "computer_20250124";
|
|
8794
|
+
const betaFlag = toolVersion === "computer_20251124" ? "computer-use-2025-11-24" : "computer-use-2025-01-24";
|
|
8795
|
+
const request = {
|
|
8796
|
+
model: this.modelConfig.providerModelName,
|
|
8797
|
+
max_tokens: 4096,
|
|
8798
|
+
system: systemPrompt,
|
|
8799
|
+
messages,
|
|
8800
|
+
tools: [
|
|
8801
|
+
{
|
|
8802
|
+
type: toolVersion,
|
|
8803
|
+
name: "computer",
|
|
8804
|
+
display_width_px: this.viewport.width,
|
|
8805
|
+
display_height_px: this.viewport.height,
|
|
8806
|
+
display_number: 1
|
|
8807
|
+
}
|
|
8808
|
+
],
|
|
8809
|
+
betas: [betaFlag]
|
|
8810
|
+
};
|
|
8811
|
+
if (typeof this.modelConfig.thinkingBudget === "number") {
|
|
8812
|
+
request.thinking = {
|
|
8813
|
+
type: "enabled",
|
|
8814
|
+
budget_tokens: this.modelConfig.thinkingBudget
|
|
8815
|
+
};
|
|
8816
|
+
}
|
|
8817
|
+
try {
|
|
8818
|
+
return await this.client.beta.messages.create(
|
|
8819
|
+
request
|
|
8820
|
+
);
|
|
8821
|
+
} catch (error) {
|
|
8822
|
+
throw mapAnthropicApiError(error);
|
|
8823
|
+
}
|
|
8824
|
+
}
|
|
8825
|
+
};
|
|
8826
|
+
function convertAnthropicAction(input) {
|
|
8827
|
+
const type = normalizeString2(input.action) || "unknown";
|
|
8828
|
+
if (type === "left_click") {
|
|
8829
|
+
const coordinates = resolveCoordinates(input, type);
|
|
8830
|
+
return {
|
|
8831
|
+
type: "click",
|
|
8832
|
+
x: coordinates.x,
|
|
8833
|
+
y: coordinates.y,
|
|
8834
|
+
button: "left"
|
|
8835
|
+
};
|
|
8836
|
+
}
|
|
8837
|
+
if (type === "double_click" || type === "doubleClick") {
|
|
8838
|
+
const coordinates = resolveCoordinates(input, type);
|
|
8839
|
+
return {
|
|
8840
|
+
type: "double_click",
|
|
8841
|
+
x: coordinates.x,
|
|
8842
|
+
y: coordinates.y
|
|
8843
|
+
};
|
|
8844
|
+
}
|
|
8845
|
+
if (type === "drag" || type === "left_click_drag") {
|
|
8846
|
+
const start = resolveCoordinateArray(
|
|
8847
|
+
input.start_coordinate,
|
|
8848
|
+
type,
|
|
8849
|
+
"start_coordinate"
|
|
8850
|
+
);
|
|
8851
|
+
const end = resolveCoordinates(input, type);
|
|
8852
|
+
return {
|
|
8853
|
+
type: "drag",
|
|
8854
|
+
path: [start, end]
|
|
8855
|
+
};
|
|
8856
|
+
}
|
|
8857
|
+
if (type === "scroll") {
|
|
8858
|
+
const coordinates = resolveCoordinates(input, type);
|
|
8859
|
+
const direction = normalizeScrollDirection(input.scroll_direction, type);
|
|
8860
|
+
const amount = resolvePositiveNumber(
|
|
8861
|
+
input.scroll_amount,
|
|
8862
|
+
type,
|
|
8863
|
+
"scroll_amount"
|
|
8864
|
+
);
|
|
8865
|
+
const magnitude = Math.max(1, amount) * 100;
|
|
8866
|
+
let scrollX = 0;
|
|
8867
|
+
let scrollY = 0;
|
|
8868
|
+
if (direction === "up") scrollY = -magnitude;
|
|
8869
|
+
if (direction === "down") scrollY = magnitude;
|
|
8870
|
+
if (direction === "left") scrollX = -magnitude;
|
|
8871
|
+
if (direction === "right") scrollX = magnitude;
|
|
8872
|
+
return {
|
|
8873
|
+
type: "scroll",
|
|
8874
|
+
x: coordinates.x,
|
|
8875
|
+
y: coordinates.y,
|
|
8876
|
+
scrollX,
|
|
8877
|
+
scrollY
|
|
8878
|
+
};
|
|
8879
|
+
}
|
|
8880
|
+
if (type === "keypress" || type === "key") {
|
|
8881
|
+
const keyText = normalizeRequiredString(
|
|
8882
|
+
input.text,
|
|
8883
|
+
`Anthropic action "${type}" requires a non-empty text value.`
|
|
8884
|
+
);
|
|
8885
|
+
return {
|
|
8886
|
+
type: "keypress",
|
|
8887
|
+
keys: [keyText]
|
|
8888
|
+
};
|
|
8889
|
+
}
|
|
8890
|
+
if (type === "move") {
|
|
8891
|
+
const coordinates = resolveCoordinates(input, type);
|
|
8892
|
+
return {
|
|
8893
|
+
type: "move",
|
|
8894
|
+
x: coordinates.x,
|
|
8895
|
+
y: coordinates.y
|
|
8896
|
+
};
|
|
8897
|
+
}
|
|
8898
|
+
if (type === "click") {
|
|
8899
|
+
const coordinates = resolveCoordinates(input, type);
|
|
8900
|
+
return {
|
|
8901
|
+
type: "click",
|
|
8902
|
+
x: coordinates.x,
|
|
8903
|
+
y: coordinates.y,
|
|
8904
|
+
button: normalizeMouseButton(input.button)
|
|
8905
|
+
};
|
|
8906
|
+
}
|
|
8907
|
+
if (type === "type") {
|
|
8908
|
+
const coordinates = resolveCoordinates(input, type);
|
|
8909
|
+
return {
|
|
8910
|
+
type: "type",
|
|
8911
|
+
text: normalizeRequiredString(
|
|
8912
|
+
input.text,
|
|
8913
|
+
`Anthropic action "${type}" requires a non-empty text value.`
|
|
8914
|
+
),
|
|
8915
|
+
x: coordinates.x,
|
|
8916
|
+
y: coordinates.y
|
|
8917
|
+
};
|
|
8918
|
+
}
|
|
8919
|
+
return {
|
|
8920
|
+
type,
|
|
8921
|
+
...input
|
|
8922
|
+
};
|
|
8923
|
+
}
|
|
8924
|
+
function extractTextMessage(content) {
|
|
8925
|
+
const texts = content.filter((item) => item.type === "text" && typeof item.text === "string").map((item) => String(item.text));
|
|
8926
|
+
return texts.join("\n").trim();
|
|
8927
|
+
}
|
|
8928
|
+
function requiresNewestAnthropicToolVersion(modelName) {
|
|
8929
|
+
return modelName === "claude-opus-4-6" || modelName === "claude-sonnet-4-6" || modelName === "claude-opus-4-5-20251101";
|
|
8930
|
+
}
|
|
8931
|
+
function normalizeString2(value) {
|
|
8932
|
+
if (typeof value !== "string") return void 0;
|
|
8933
|
+
const normalized = value.trim();
|
|
8934
|
+
return normalized.length ? normalized : void 0;
|
|
8935
|
+
}
|
|
8936
|
+
function normalizeRequiredString(value, errorMessage2) {
|
|
8937
|
+
const normalized = normalizeString2(value);
|
|
8938
|
+
if (!normalized) {
|
|
8939
|
+
throw new OpensteerAgentActionError(errorMessage2);
|
|
8940
|
+
}
|
|
8941
|
+
return normalized;
|
|
8942
|
+
}
|
|
8943
|
+
function toNumber2(value) {
|
|
8944
|
+
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
8945
|
+
}
|
|
8946
|
+
function arrayNumber(value) {
|
|
8947
|
+
if (!Array.isArray(value)) return [NaN, NaN];
|
|
8948
|
+
return [
|
|
8949
|
+
typeof value[0] === "number" ? value[0] : NaN,
|
|
8950
|
+
typeof value[1] === "number" ? value[1] : NaN
|
|
8951
|
+
];
|
|
8952
|
+
}
|
|
8953
|
+
function resolveCoordinates(input, actionType) {
|
|
8954
|
+
const [xFromCoordinate, yFromCoordinate] = arrayNumber(input.coordinate);
|
|
8955
|
+
const xFromFallback = toFiniteNumber(input.x);
|
|
8956
|
+
const yFromFallback = toFiniteNumber(input.y);
|
|
8957
|
+
const x = Number.isFinite(xFromCoordinate) ? xFromCoordinate : xFromFallback;
|
|
8958
|
+
const y = Number.isFinite(yFromCoordinate) ? yFromCoordinate : yFromFallback;
|
|
8959
|
+
if (x == null || y == null) {
|
|
8960
|
+
throw new OpensteerAgentActionError(
|
|
8961
|
+
`Anthropic action "${actionType}" requires numeric x/y coordinates.`
|
|
8962
|
+
);
|
|
8963
|
+
}
|
|
8964
|
+
return { x, y };
|
|
8965
|
+
}
|
|
8966
|
+
function resolveCoordinateArray(value, actionType, field) {
|
|
8967
|
+
const [x, y] = arrayNumber(value);
|
|
8968
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) {
|
|
8969
|
+
throw new OpensteerAgentActionError(
|
|
8970
|
+
`Anthropic action "${actionType}" requires numeric "${field}" coordinates.`
|
|
8971
|
+
);
|
|
8972
|
+
}
|
|
8973
|
+
return { x, y };
|
|
8974
|
+
}
|
|
8975
|
+
function resolvePositiveNumber(value, actionType, field) {
|
|
8976
|
+
const number = toFiniteNumber(value);
|
|
8977
|
+
if (number == null || number <= 0) {
|
|
8978
|
+
throw new OpensteerAgentActionError(
|
|
8979
|
+
`Anthropic action "${actionType}" requires a positive numeric "${field}" value.`
|
|
8980
|
+
);
|
|
8981
|
+
}
|
|
8982
|
+
return number;
|
|
8983
|
+
}
|
|
8984
|
+
function normalizeScrollDirection(value, actionType) {
|
|
8985
|
+
const direction = normalizeString2(value);
|
|
8986
|
+
if (direction === "up" || direction === "down" || direction === "left" || direction === "right") {
|
|
8987
|
+
return direction;
|
|
8988
|
+
}
|
|
8989
|
+
throw new OpensteerAgentActionError(
|
|
8990
|
+
`Anthropic action "${actionType}" requires "scroll_direction" to be one of: up, down, left, right.`
|
|
8991
|
+
);
|
|
8992
|
+
}
|
|
8993
|
+
function normalizeMouseButton(value) {
|
|
8994
|
+
const button = normalizeRequiredString(
|
|
8995
|
+
value,
|
|
8996
|
+
'Anthropic action "click" requires a non-empty "button" value.'
|
|
8997
|
+
).toLowerCase();
|
|
8998
|
+
if (button === "left" || button === "right" || button === "middle") {
|
|
8999
|
+
return button;
|
|
9000
|
+
}
|
|
9001
|
+
throw new OpensteerAgentActionError(
|
|
9002
|
+
`Anthropic action "click" has unsupported button "${button}".`
|
|
9003
|
+
);
|
|
9004
|
+
}
|
|
9005
|
+
function toFiniteNumber(value) {
|
|
9006
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
9007
|
+
return value;
|
|
9008
|
+
}
|
|
9009
|
+
return null;
|
|
9010
|
+
}
|
|
9011
|
+
function mapAnthropicApiError(error) {
|
|
9012
|
+
const errorRecord = toRecord2(error);
|
|
9013
|
+
const nestedError = toRecord2(errorRecord.error);
|
|
9014
|
+
const status = typeof errorRecord.status === "number" ? errorRecord.status : void 0;
|
|
9015
|
+
const message = normalizeString2(nestedError.message) || (error instanceof Error ? error.message : String(error));
|
|
9016
|
+
return new OpensteerAgentApiError("anthropic", message, status, error);
|
|
9017
|
+
}
|
|
9018
|
+
function toRecord2(value) {
|
|
9019
|
+
return value && typeof value === "object" ? value : {};
|
|
9020
|
+
}
|
|
9021
|
+
|
|
9022
|
+
// src/agent/clients/google.ts
|
|
9023
|
+
var import_genai = require("@google/genai");
|
|
9024
|
+
|
|
9025
|
+
// src/agent/coords.ts
|
|
9026
|
+
var DEFAULT_CUA_VIEWPORT = {
|
|
9027
|
+
width: 1288,
|
|
9028
|
+
height: 711
|
|
9029
|
+
};
|
|
9030
|
+
function normalizeGoogleCoordinates(x, y, viewport) {
|
|
9031
|
+
const clampedX = Math.min(999, Math.max(0, x));
|
|
9032
|
+
const clampedY = Math.min(999, Math.max(0, y));
|
|
9033
|
+
return {
|
|
9034
|
+
x: Math.floor(clampedX / 1e3 * viewport.width),
|
|
9035
|
+
y: Math.floor(clampedY / 1e3 * viewport.height)
|
|
9036
|
+
};
|
|
9037
|
+
}
|
|
9038
|
+
function maybeNormalizeCoordinates(provider, x, y, viewport) {
|
|
9039
|
+
if (provider === "google") {
|
|
9040
|
+
return normalizeGoogleCoordinates(x, y, viewport);
|
|
9041
|
+
}
|
|
9042
|
+
return { x, y };
|
|
9043
|
+
}
|
|
9044
|
+
|
|
9045
|
+
// src/agent/key-mapping.ts
|
|
9046
|
+
var KEY_MAP = {
|
|
9047
|
+
ENTER: "Enter",
|
|
9048
|
+
RETURN: "Enter",
|
|
9049
|
+
ESCAPE: "Escape",
|
|
9050
|
+
ESC: "Escape",
|
|
9051
|
+
BACKSPACE: "Backspace",
|
|
9052
|
+
TAB: "Tab",
|
|
9053
|
+
SPACE: " ",
|
|
9054
|
+
DELETE: "Delete",
|
|
9055
|
+
DEL: "Delete",
|
|
9056
|
+
ARROWUP: "ArrowUp",
|
|
9057
|
+
ARROWDOWN: "ArrowDown",
|
|
9058
|
+
ARROWLEFT: "ArrowLeft",
|
|
9059
|
+
ARROWRIGHT: "ArrowRight",
|
|
9060
|
+
ARROW_UP: "ArrowUp",
|
|
9061
|
+
ARROW_DOWN: "ArrowDown",
|
|
9062
|
+
ARROW_LEFT: "ArrowLeft",
|
|
9063
|
+
ARROW_RIGHT: "ArrowRight",
|
|
9064
|
+
UP: "ArrowUp",
|
|
9065
|
+
DOWN: "ArrowDown",
|
|
9066
|
+
LEFT: "ArrowLeft",
|
|
9067
|
+
RIGHT: "ArrowRight",
|
|
9068
|
+
SHIFT: "Shift",
|
|
9069
|
+
CONTROL: "Control",
|
|
9070
|
+
CTRL: "Control",
|
|
9071
|
+
ALT: "Alt",
|
|
9072
|
+
OPTION: "Alt",
|
|
9073
|
+
META: "Meta",
|
|
9074
|
+
COMMAND: "Meta",
|
|
9075
|
+
CMD: "Meta",
|
|
9076
|
+
SUPER: "Meta",
|
|
9077
|
+
WINDOWS: "Meta",
|
|
9078
|
+
WIN: "Meta",
|
|
9079
|
+
HOME: "Home",
|
|
9080
|
+
END: "End",
|
|
9081
|
+
PAGEUP: "PageUp",
|
|
9082
|
+
PAGEDOWN: "PageDown",
|
|
9083
|
+
PAGE_UP: "PageUp",
|
|
9084
|
+
PAGE_DOWN: "PageDown",
|
|
9085
|
+
PGUP: "PageUp",
|
|
9086
|
+
PGDN: "PageDown",
|
|
9087
|
+
CONTROLORMETA: process.platform === "darwin" ? "Meta" : "Control"
|
|
9088
|
+
};
|
|
9089
|
+
function mapKeyToPlaywright(key) {
|
|
9090
|
+
const normalized = key.trim();
|
|
9091
|
+
if (!normalized) return normalized;
|
|
9092
|
+
const mapped = KEY_MAP[normalized.toUpperCase()];
|
|
9093
|
+
return mapped || normalized;
|
|
9094
|
+
}
|
|
9095
|
+
|
|
9096
|
+
// src/agent/clients/google.ts
|
|
9097
|
+
var GoogleCuaClient = class extends CuaClient {
|
|
9098
|
+
modelConfig;
|
|
9099
|
+
client;
|
|
9100
|
+
history = [];
|
|
9101
|
+
constructor(modelConfig) {
|
|
9102
|
+
super();
|
|
9103
|
+
this.modelConfig = modelConfig;
|
|
9104
|
+
this.client = new import_genai.GoogleGenAI({
|
|
9105
|
+
apiKey: modelConfig.apiKey,
|
|
9106
|
+
...modelConfig.baseUrl ? { httpOptions: { baseUrl: modelConfig.baseUrl } } : {}
|
|
9107
|
+
});
|
|
9108
|
+
}
|
|
9109
|
+
async execute(input) {
|
|
9110
|
+
this.history = [
|
|
9111
|
+
{
|
|
9112
|
+
role: "user",
|
|
9113
|
+
parts: [
|
|
9114
|
+
{
|
|
9115
|
+
text: `System prompt: ${input.systemPrompt}`
|
|
9116
|
+
}
|
|
9117
|
+
]
|
|
9118
|
+
},
|
|
9119
|
+
{
|
|
9120
|
+
role: "user",
|
|
9121
|
+
parts: [
|
|
9122
|
+
{
|
|
9123
|
+
text: input.instruction
|
|
9124
|
+
}
|
|
9125
|
+
]
|
|
9126
|
+
}
|
|
9127
|
+
];
|
|
9128
|
+
const actions = [];
|
|
9129
|
+
let finalMessage = "";
|
|
9130
|
+
let completed = false;
|
|
9131
|
+
let step = 0;
|
|
9132
|
+
let totalInputTokens = 0;
|
|
9133
|
+
let totalOutputTokens = 0;
|
|
9134
|
+
let totalInferenceTimeMs = 0;
|
|
9135
|
+
while (!completed && step < input.maxSteps) {
|
|
9136
|
+
const startedAt = Date.now();
|
|
9137
|
+
const response = await this.generateContent();
|
|
9138
|
+
totalInferenceTimeMs += Date.now() - startedAt;
|
|
9139
|
+
const usageMetadata = response.usageMetadata || {};
|
|
9140
|
+
totalInputTokens += toFiniteNumberOrZero(usageMetadata.promptTokenCount);
|
|
9141
|
+
totalOutputTokens += toFiniteNumberOrZero(
|
|
9142
|
+
usageMetadata.candidatesTokenCount
|
|
9143
|
+
);
|
|
9144
|
+
const candidate = Array.isArray(response.candidates) ? response.candidates[0] : null;
|
|
9145
|
+
const content = candidate && typeof candidate === "object" && candidate.content && typeof candidate.content === "object" ? candidate.content : null;
|
|
9146
|
+
const parts = content && Array.isArray(content.parts) ? content.parts : [];
|
|
9147
|
+
const finishReason = extractFinishReason(candidate);
|
|
9148
|
+
if (content) {
|
|
9149
|
+
this.history.push({
|
|
9150
|
+
role: "model",
|
|
9151
|
+
parts
|
|
9152
|
+
});
|
|
9153
|
+
}
|
|
9154
|
+
const messageParts = [];
|
|
9155
|
+
const functionCalls = [];
|
|
9156
|
+
for (const part of parts) {
|
|
9157
|
+
if (typeof part.text === "string") {
|
|
9158
|
+
messageParts.push(part.text);
|
|
9159
|
+
}
|
|
9160
|
+
if (part.functionCall && typeof part.functionCall === "object") {
|
|
9161
|
+
functionCalls.push(part.functionCall);
|
|
9162
|
+
}
|
|
9163
|
+
}
|
|
9164
|
+
if (messageParts.length) {
|
|
9165
|
+
finalMessage = messageParts.join("\n").trim();
|
|
9166
|
+
}
|
|
9167
|
+
if (!functionCalls.length) {
|
|
9168
|
+
completed = isSuccessfulGoogleFinishReason(finishReason);
|
|
9169
|
+
if (!completed && !finalMessage) {
|
|
9170
|
+
finalMessage = `Google CUA stopped with finish reason: ${finishReason || "unknown"}.`;
|
|
9171
|
+
}
|
|
9172
|
+
} else {
|
|
9173
|
+
const functionResponses = [];
|
|
9174
|
+
for (const functionCall of functionCalls) {
|
|
9175
|
+
const mappedActions = mapGoogleFunctionCallToActions(
|
|
9176
|
+
functionCall,
|
|
9177
|
+
this.viewport
|
|
9178
|
+
);
|
|
9179
|
+
actions.push(...mappedActions);
|
|
9180
|
+
let executionError;
|
|
9181
|
+
for (const mappedAction of mappedActions) {
|
|
9182
|
+
try {
|
|
9183
|
+
await this.getActionHandler()(mappedAction);
|
|
9184
|
+
} catch (error) {
|
|
9185
|
+
executionError = error instanceof Error ? error.message : String(error);
|
|
9186
|
+
}
|
|
9187
|
+
}
|
|
9188
|
+
const screenshotBase64 = await this.getScreenshotProvider()();
|
|
9189
|
+
const responsePayload = {
|
|
9190
|
+
url: this.currentUrl || ""
|
|
9191
|
+
};
|
|
9192
|
+
const args = functionCall.args && typeof functionCall.args === "object" ? functionCall.args : null;
|
|
9193
|
+
if (args && args.safety_decision !== void 0) {
|
|
9194
|
+
responsePayload.safety_acknowledgement = "true";
|
|
9195
|
+
}
|
|
9196
|
+
if (executionError) {
|
|
9197
|
+
responsePayload.error = executionError;
|
|
9198
|
+
}
|
|
9199
|
+
functionResponses.push({
|
|
9200
|
+
functionResponse: {
|
|
9201
|
+
name: typeof functionCall.name === "string" && functionCall.name || "computer_use",
|
|
9202
|
+
response: responsePayload,
|
|
9203
|
+
parts: [
|
|
9204
|
+
{
|
|
9205
|
+
inlineData: {
|
|
9206
|
+
mimeType: "image/png",
|
|
9207
|
+
data: screenshotBase64
|
|
9208
|
+
}
|
|
9209
|
+
}
|
|
9210
|
+
]
|
|
9211
|
+
}
|
|
9212
|
+
});
|
|
9213
|
+
}
|
|
9214
|
+
if (functionResponses.length) {
|
|
9215
|
+
this.history.push({
|
|
9216
|
+
role: "user",
|
|
9217
|
+
parts: functionResponses
|
|
9218
|
+
});
|
|
9219
|
+
}
|
|
9220
|
+
if (finishReason && finishReason !== "STOP") {
|
|
9221
|
+
throw new OpensteerAgentActionError(
|
|
9222
|
+
`Google CUA returned function calls with terminal finish reason "${finishReason}".`
|
|
9223
|
+
);
|
|
9224
|
+
}
|
|
9225
|
+
completed = false;
|
|
9226
|
+
}
|
|
9227
|
+
step += 1;
|
|
9228
|
+
}
|
|
9229
|
+
return {
|
|
9230
|
+
success: completed,
|
|
9231
|
+
completed,
|
|
9232
|
+
message: finalMessage,
|
|
9233
|
+
actions,
|
|
9234
|
+
usage: {
|
|
9235
|
+
inputTokens: totalInputTokens,
|
|
9236
|
+
outputTokens: totalOutputTokens,
|
|
9237
|
+
inferenceTimeMs: totalInferenceTimeMs
|
|
9238
|
+
}
|
|
9239
|
+
};
|
|
9240
|
+
}
|
|
9241
|
+
async generateContent() {
|
|
9242
|
+
const params = {
|
|
9243
|
+
model: this.modelConfig.providerModelName,
|
|
9244
|
+
contents: this.history,
|
|
9245
|
+
config: {
|
|
9246
|
+
temperature: 1,
|
|
9247
|
+
topP: 0.95,
|
|
9248
|
+
topK: 40,
|
|
9249
|
+
maxOutputTokens: 8192,
|
|
9250
|
+
tools: [
|
|
9251
|
+
{
|
|
9252
|
+
computerUse: {
|
|
9253
|
+
environment: resolveGoogleEnvironment(
|
|
9254
|
+
this.modelConfig.environment
|
|
9255
|
+
)
|
|
9256
|
+
}
|
|
9257
|
+
}
|
|
9258
|
+
]
|
|
9259
|
+
}
|
|
9260
|
+
};
|
|
9261
|
+
try {
|
|
9262
|
+
return await this.client.models.generateContent(params);
|
|
9263
|
+
} catch (error) {
|
|
9264
|
+
throw mapGoogleApiError(error);
|
|
9265
|
+
}
|
|
9266
|
+
}
|
|
9267
|
+
};
|
|
9268
|
+
function mapGoogleFunctionCallToActions(functionCall, viewport) {
|
|
9269
|
+
const name = normalizeString3(functionCall.name);
|
|
9270
|
+
const args = functionCall.args && typeof functionCall.args === "object" ? functionCall.args : {};
|
|
9271
|
+
if (!name) {
|
|
9272
|
+
throw new OpensteerAgentActionError(
|
|
9273
|
+
'Google CUA function call is missing a "name" value.'
|
|
9274
|
+
);
|
|
9275
|
+
}
|
|
9276
|
+
switch (name) {
|
|
9277
|
+
case "click_at": {
|
|
9278
|
+
const coordinates = normalizeCoordinates(args, viewport, name);
|
|
9279
|
+
return [
|
|
9280
|
+
{
|
|
9281
|
+
type: "click",
|
|
9282
|
+
x: coordinates.x,
|
|
9283
|
+
y: coordinates.y,
|
|
9284
|
+
button: normalizeString3(args.button) || "left"
|
|
9285
|
+
}
|
|
9286
|
+
];
|
|
9287
|
+
}
|
|
9288
|
+
case "type_text_at": {
|
|
9289
|
+
const coordinates = normalizeCoordinates(args, viewport, name);
|
|
9290
|
+
const clearBeforeTyping = typeof args.clear_before_typing === "boolean" ? args.clear_before_typing : true;
|
|
9291
|
+
const pressEnter = typeof args.press_enter === "boolean" ? args.press_enter : false;
|
|
9292
|
+
const text = normalizeRequiredString2(
|
|
9293
|
+
args.text,
|
|
9294
|
+
'Google action "type_text_at" requires a non-empty "text" value.'
|
|
9295
|
+
);
|
|
9296
|
+
const actions = [
|
|
9297
|
+
{
|
|
9298
|
+
type: "click",
|
|
9299
|
+
x: coordinates.x,
|
|
9300
|
+
y: coordinates.y,
|
|
9301
|
+
button: "left"
|
|
9302
|
+
}
|
|
9303
|
+
];
|
|
9304
|
+
if (clearBeforeTyping) {
|
|
9305
|
+
actions.push({
|
|
9306
|
+
type: "keypress",
|
|
9307
|
+
keys: ["ControlOrMeta+A"]
|
|
9308
|
+
});
|
|
9309
|
+
actions.push({
|
|
9310
|
+
type: "keypress",
|
|
9311
|
+
keys: ["Backspace"]
|
|
9312
|
+
});
|
|
9313
|
+
}
|
|
9314
|
+
actions.push({
|
|
9315
|
+
type: "type",
|
|
9316
|
+
text,
|
|
9317
|
+
x: coordinates.x,
|
|
9318
|
+
y: coordinates.y
|
|
9319
|
+
});
|
|
9320
|
+
if (pressEnter) {
|
|
9321
|
+
actions.push({
|
|
9322
|
+
type: "keypress",
|
|
9323
|
+
keys: ["Enter"]
|
|
9324
|
+
});
|
|
9325
|
+
}
|
|
9326
|
+
return actions;
|
|
9327
|
+
}
|
|
9328
|
+
case "key_combination": {
|
|
9329
|
+
const keysRaw = normalizeRequiredString2(
|
|
9330
|
+
args.keys,
|
|
9331
|
+
'Google action "key_combination" requires a non-empty "keys" value.'
|
|
9332
|
+
);
|
|
9333
|
+
const keys = keysRaw.split("+").map((part) => part.trim()).filter(Boolean).map((part) => mapKeyToPlaywright(part));
|
|
9334
|
+
if (!keys.length) {
|
|
9335
|
+
throw new OpensteerAgentActionError(
|
|
9336
|
+
'Google action "key_combination" did not produce any key tokens.'
|
|
9337
|
+
);
|
|
9338
|
+
}
|
|
9339
|
+
return [
|
|
9340
|
+
{
|
|
9341
|
+
type: "keypress",
|
|
9342
|
+
keys
|
|
9343
|
+
}
|
|
9344
|
+
];
|
|
9345
|
+
}
|
|
9346
|
+
case "scroll_document": {
|
|
9347
|
+
const direction = normalizeVerticalDirection(
|
|
9348
|
+
args.direction,
|
|
9349
|
+
"scroll_document"
|
|
9350
|
+
);
|
|
9351
|
+
return [
|
|
9352
|
+
{
|
|
9353
|
+
type: "keypress",
|
|
9354
|
+
keys: [direction === "up" ? "PageUp" : "PageDown"]
|
|
9355
|
+
}
|
|
9356
|
+
];
|
|
9357
|
+
}
|
|
9358
|
+
case "scroll_at": {
|
|
9359
|
+
const coordinates = normalizeCoordinates(args, viewport, name);
|
|
9360
|
+
const direction = normalizeScrollDirection2(args.direction, "scroll_at");
|
|
9361
|
+
const magnitude = parsePositiveNumber(
|
|
9362
|
+
args.magnitude,
|
|
9363
|
+
"scroll_at",
|
|
9364
|
+
"magnitude"
|
|
9365
|
+
);
|
|
9366
|
+
let scrollX = 0;
|
|
9367
|
+
let scrollY = 0;
|
|
9368
|
+
if (direction === "up") scrollY = -magnitude;
|
|
9369
|
+
if (direction === "down") scrollY = magnitude;
|
|
9370
|
+
if (direction === "left") scrollX = -magnitude;
|
|
9371
|
+
if (direction === "right") scrollX = magnitude;
|
|
9372
|
+
return [
|
|
9373
|
+
{
|
|
9374
|
+
type: "scroll",
|
|
9375
|
+
x: coordinates.x,
|
|
9376
|
+
y: coordinates.y,
|
|
9377
|
+
scrollX,
|
|
9378
|
+
scrollY
|
|
9379
|
+
}
|
|
9380
|
+
];
|
|
9381
|
+
}
|
|
9382
|
+
case "hover_at": {
|
|
9383
|
+
const coordinates = normalizeCoordinates(args, viewport, name);
|
|
9384
|
+
return [
|
|
9385
|
+
{
|
|
9386
|
+
type: "move",
|
|
9387
|
+
x: coordinates.x,
|
|
9388
|
+
y: coordinates.y
|
|
9389
|
+
}
|
|
9390
|
+
];
|
|
9391
|
+
}
|
|
9392
|
+
case "drag_and_drop": {
|
|
9393
|
+
const startX = parseRequiredNumber(args.x, "drag_and_drop", "x");
|
|
9394
|
+
const startY = parseRequiredNumber(args.y, "drag_and_drop", "y");
|
|
9395
|
+
const endX = parseRequiredNumber(
|
|
9396
|
+
args.destination_x,
|
|
9397
|
+
"drag_and_drop",
|
|
9398
|
+
"destination_x"
|
|
9399
|
+
);
|
|
9400
|
+
const endY = parseRequiredNumber(
|
|
9401
|
+
args.destination_y,
|
|
9402
|
+
"drag_and_drop",
|
|
9403
|
+
"destination_y"
|
|
9404
|
+
);
|
|
9405
|
+
const start = maybeNormalizeCoordinates(
|
|
9406
|
+
"google",
|
|
9407
|
+
startX,
|
|
9408
|
+
startY,
|
|
9409
|
+
viewport
|
|
9410
|
+
);
|
|
9411
|
+
const end = maybeNormalizeCoordinates(
|
|
9412
|
+
"google",
|
|
9413
|
+
endX,
|
|
9414
|
+
endY,
|
|
9415
|
+
viewport
|
|
9416
|
+
);
|
|
9417
|
+
return [
|
|
9418
|
+
{
|
|
9419
|
+
type: "drag",
|
|
9420
|
+
path: [start, end]
|
|
9421
|
+
}
|
|
9422
|
+
];
|
|
9423
|
+
}
|
|
9424
|
+
case "navigate":
|
|
9425
|
+
return [
|
|
9426
|
+
{
|
|
9427
|
+
type: "goto",
|
|
9428
|
+
url: normalizeRequiredString2(
|
|
9429
|
+
args.url,
|
|
9430
|
+
'Google action "navigate" requires a non-empty "url" value.'
|
|
9431
|
+
)
|
|
9432
|
+
}
|
|
9433
|
+
];
|
|
9434
|
+
case "go_back":
|
|
9435
|
+
return [{ type: "back" }];
|
|
9436
|
+
case "go_forward":
|
|
9437
|
+
return [{ type: "forward" }];
|
|
9438
|
+
case "wait_5_seconds":
|
|
9439
|
+
return [{ type: "wait", timeMs: 5e3 }];
|
|
9440
|
+
case "search":
|
|
9441
|
+
return [
|
|
9442
|
+
{
|
|
9443
|
+
type: "goto",
|
|
9444
|
+
url: buildGoogleSearchUrl(args)
|
|
9445
|
+
}
|
|
9446
|
+
];
|
|
9447
|
+
case "open_web_browser":
|
|
9448
|
+
return [{ type: "open_web_browser" }];
|
|
9449
|
+
default:
|
|
9450
|
+
throw new OpensteerAgentActionError(
|
|
9451
|
+
`Unsupported Google CUA function call "${name}".`
|
|
9452
|
+
);
|
|
9453
|
+
}
|
|
9454
|
+
}
|
|
9455
|
+
function normalizeCoordinates(args, viewport, actionName) {
|
|
9456
|
+
const x = parseRequiredNumber(args.x, actionName, "x");
|
|
9457
|
+
const y = parseRequiredNumber(args.y, actionName, "y");
|
|
9458
|
+
return maybeNormalizeCoordinates(
|
|
9459
|
+
"google",
|
|
9460
|
+
x,
|
|
9461
|
+
y,
|
|
9462
|
+
viewport
|
|
9463
|
+
);
|
|
9464
|
+
}
|
|
9465
|
+
function parseRequiredNumber(value, actionName, field) {
|
|
9466
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
9467
|
+
return value;
|
|
9468
|
+
}
|
|
9469
|
+
throw new OpensteerAgentActionError(
|
|
9470
|
+
`Google action "${actionName}" requires numeric "${field}" coordinates.`
|
|
9471
|
+
);
|
|
9472
|
+
}
|
|
9473
|
+
function parsePositiveNumber(value, actionName, field) {
|
|
9474
|
+
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
9475
|
+
return value;
|
|
9476
|
+
}
|
|
9477
|
+
throw new OpensteerAgentActionError(
|
|
9478
|
+
`Google action "${actionName}" requires a positive numeric "${field}" value.`
|
|
9479
|
+
);
|
|
9480
|
+
}
|
|
9481
|
+
function toFiniteNumberOrZero(value) {
|
|
9482
|
+
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
|
9483
|
+
}
|
|
9484
|
+
function normalizeString3(value) {
|
|
9485
|
+
if (typeof value !== "string") return void 0;
|
|
9486
|
+
const normalized = value.trim();
|
|
9487
|
+
return normalized.length ? normalized : void 0;
|
|
9488
|
+
}
|
|
9489
|
+
function normalizeRequiredString2(value, errorMessage2) {
|
|
9490
|
+
const normalized = normalizeString3(value);
|
|
9491
|
+
if (!normalized) {
|
|
9492
|
+
throw new OpensteerAgentActionError(errorMessage2);
|
|
9493
|
+
}
|
|
9494
|
+
return normalized;
|
|
9495
|
+
}
|
|
9496
|
+
function normalizeScrollDirection2(value, actionName) {
|
|
9497
|
+
const direction = normalizeString3(value);
|
|
9498
|
+
if (direction === "up" || direction === "down" || direction === "left" || direction === "right") {
|
|
9499
|
+
return direction;
|
|
9500
|
+
}
|
|
9501
|
+
throw new OpensteerAgentActionError(
|
|
9502
|
+
`Google action "${actionName}" requires "direction" to be one of: up, down, left, right.`
|
|
9503
|
+
);
|
|
9504
|
+
}
|
|
9505
|
+
function normalizeVerticalDirection(value, actionName) {
|
|
9506
|
+
const direction = normalizeString3(value);
|
|
9507
|
+
if (direction === "up" || direction === "down") {
|
|
9508
|
+
return direction;
|
|
9509
|
+
}
|
|
9510
|
+
throw new OpensteerAgentActionError(
|
|
9511
|
+
`Google action "${actionName}" requires "direction" to be "up" or "down".`
|
|
9512
|
+
);
|
|
9513
|
+
}
|
|
9514
|
+
function buildGoogleSearchUrl(args) {
|
|
9515
|
+
const query = normalizeRequiredString2(
|
|
9516
|
+
args.query ?? args.text,
|
|
9517
|
+
'Google action "search" requires a non-empty "query" value.'
|
|
9518
|
+
);
|
|
9519
|
+
return `https://www.google.com/search?q=${encodeURIComponent(query)}`;
|
|
9520
|
+
}
|
|
9521
|
+
function extractFinishReason(candidate) {
|
|
9522
|
+
if (!candidate || typeof candidate !== "object") {
|
|
9523
|
+
return void 0;
|
|
9524
|
+
}
|
|
9525
|
+
return normalizeString3(candidate.finishReason);
|
|
9526
|
+
}
|
|
9527
|
+
function isSuccessfulGoogleFinishReason(finishReason) {
|
|
9528
|
+
return !finishReason || finishReason === "STOP";
|
|
9529
|
+
}
|
|
9530
|
+
function resolveGoogleEnvironment(value) {
|
|
9531
|
+
const environment = normalizeString3(value);
|
|
9532
|
+
if (environment === import_genai.Environment.ENVIRONMENT_UNSPECIFIED) {
|
|
9533
|
+
return import_genai.Environment.ENVIRONMENT_UNSPECIFIED;
|
|
9534
|
+
}
|
|
9535
|
+
return import_genai.Environment.ENVIRONMENT_BROWSER;
|
|
9536
|
+
}
|
|
9537
|
+
function mapGoogleApiError(error) {
|
|
9538
|
+
const errorRecord = toRecord3(error);
|
|
9539
|
+
const status = typeof errorRecord.status === "number" ? errorRecord.status : void 0;
|
|
9540
|
+
const message = normalizeString3(errorRecord.message) || (error instanceof Error ? error.message : String(error));
|
|
9541
|
+
return new OpensteerAgentApiError("google", message, status, error);
|
|
9542
|
+
}
|
|
9543
|
+
function toRecord3(value) {
|
|
9544
|
+
return value && typeof value === "object" ? value : {};
|
|
9545
|
+
}
|
|
9546
|
+
|
|
9547
|
+
// src/agent/provider.ts
|
|
9548
|
+
var DEFAULT_SYSTEM_PROMPT = "You are a browser automation agent. Complete the user instruction safely and efficiently. Do not ask follow-up questions. Finish as soon as the task is complete.";
|
|
9549
|
+
function resolveAgentConfig(args) {
|
|
9550
|
+
const { agentConfig } = args;
|
|
9551
|
+
if (!agentConfig || typeof agentConfig !== "object") {
|
|
9552
|
+
throw new OpensteerAgentConfigError(
|
|
9553
|
+
'agent() requires a configuration object with mode: "cua".'
|
|
9554
|
+
);
|
|
9555
|
+
}
|
|
9556
|
+
if (agentConfig.mode !== "cua") {
|
|
9557
|
+
throw new OpensteerAgentConfigError(
|
|
9558
|
+
`Unsupported agent mode "${String(agentConfig.mode)}". OpenSteer currently supports only mode: "cua".`
|
|
9559
|
+
);
|
|
9560
|
+
}
|
|
9561
|
+
const model = resolveCuaModelConfig({
|
|
9562
|
+
agentConfig,
|
|
9563
|
+
fallbackModel: args.fallbackModel,
|
|
9564
|
+
env: args.env
|
|
9565
|
+
});
|
|
9566
|
+
return {
|
|
9567
|
+
mode: "cua",
|
|
9568
|
+
systemPrompt: normalizeNonEmptyString(agentConfig.systemPrompt) || DEFAULT_SYSTEM_PROMPT,
|
|
9569
|
+
waitBetweenActionsMs: normalizeWaitBetween(agentConfig.waitBetweenActionsMs),
|
|
9570
|
+
model
|
|
9571
|
+
};
|
|
9572
|
+
}
|
|
9573
|
+
function createCuaClient(config) {
|
|
9574
|
+
switch (config.model.provider) {
|
|
9575
|
+
case "openai":
|
|
9576
|
+
return new OpenAICuaClient(config.model);
|
|
9577
|
+
case "anthropic":
|
|
9578
|
+
return new AnthropicCuaClient(config.model);
|
|
9579
|
+
case "google":
|
|
9580
|
+
return new GoogleCuaClient(config.model);
|
|
9581
|
+
default:
|
|
9582
|
+
throw new OpensteerAgentProviderError(
|
|
9583
|
+
`Unsupported CUA provider "${String(config.model.provider)}".`
|
|
9584
|
+
);
|
|
9585
|
+
}
|
|
9586
|
+
}
|
|
9587
|
+
function normalizeNonEmptyString(value) {
|
|
9588
|
+
if (typeof value !== "string") return void 0;
|
|
9589
|
+
const normalized = value.trim();
|
|
9590
|
+
return normalized.length ? normalized : void 0;
|
|
9591
|
+
}
|
|
9592
|
+
function normalizeWaitBetween(value) {
|
|
9593
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
9594
|
+
return 500;
|
|
9595
|
+
}
|
|
9596
|
+
return Math.floor(value);
|
|
9597
|
+
}
|
|
9598
|
+
|
|
9599
|
+
// src/agent/action-executor.ts
|
|
9600
|
+
async function executeAgentAction(page, action) {
|
|
9601
|
+
const type = normalizeActionType(action.type);
|
|
9602
|
+
switch (type) {
|
|
9603
|
+
case "click": {
|
|
9604
|
+
const { x, y } = toPoint(action);
|
|
9605
|
+
await page.mouse.click(x, y, {
|
|
9606
|
+
button: normalizeMouseButton2(action.button, "left"),
|
|
9607
|
+
clickCount: normalizeClickCount(action.clickCount, 1)
|
|
9608
|
+
});
|
|
9609
|
+
return;
|
|
9610
|
+
}
|
|
9611
|
+
case "doubleclick": {
|
|
9612
|
+
const { x, y } = toPoint(action);
|
|
9613
|
+
await page.mouse.click(x, y, {
|
|
9614
|
+
button: normalizeMouseButton2(action.button, "left"),
|
|
9615
|
+
clickCount: 2
|
|
9616
|
+
});
|
|
9617
|
+
return;
|
|
9618
|
+
}
|
|
9619
|
+
case "tripleclick": {
|
|
9620
|
+
const { x, y } = toPoint(action);
|
|
9621
|
+
await page.mouse.click(x, y, {
|
|
9622
|
+
button: normalizeMouseButton2(action.button, "left"),
|
|
9623
|
+
clickCount: 3
|
|
9624
|
+
});
|
|
9625
|
+
return;
|
|
9626
|
+
}
|
|
9627
|
+
case "rightclick": {
|
|
9628
|
+
const { x, y } = toPoint(action);
|
|
9629
|
+
await page.mouse.click(x, y, {
|
|
9630
|
+
button: "right",
|
|
9631
|
+
clickCount: normalizeClickCount(action.clickCount, 1)
|
|
9632
|
+
});
|
|
9633
|
+
return;
|
|
9634
|
+
}
|
|
9635
|
+
case "type": {
|
|
9636
|
+
await maybeFocusPoint(page, action);
|
|
9637
|
+
const text = typeof action.text === "string" ? action.text : "";
|
|
9638
|
+
if (action.clearBeforeTyping === true) {
|
|
9639
|
+
await pressKeyCombo(page, "ControlOrMeta+A");
|
|
9640
|
+
await page.keyboard.press("Backspace");
|
|
9641
|
+
}
|
|
9642
|
+
await page.keyboard.type(text);
|
|
9643
|
+
if (action.pressEnter === true) {
|
|
9644
|
+
await page.keyboard.press("Enter");
|
|
9645
|
+
}
|
|
9646
|
+
return;
|
|
9647
|
+
}
|
|
9648
|
+
case "keypress": {
|
|
9649
|
+
const combos = normalizeKeyCombos(action.keys);
|
|
9650
|
+
for (const combo of combos) {
|
|
9651
|
+
await pressKeyCombo(page, combo);
|
|
9652
|
+
}
|
|
9653
|
+
return;
|
|
9654
|
+
}
|
|
9655
|
+
case "scroll": {
|
|
9656
|
+
const x = numberOr(action.scrollX, action.scroll_x, 0);
|
|
9657
|
+
const y = numberOr(action.scrollY, action.scroll_y, 0);
|
|
9658
|
+
const point = maybePoint(action);
|
|
9659
|
+
if (point) {
|
|
9660
|
+
await page.mouse.move(point.x, point.y);
|
|
9661
|
+
}
|
|
9662
|
+
await page.mouse.wheel(x, y);
|
|
9663
|
+
return;
|
|
9664
|
+
}
|
|
9665
|
+
case "drag": {
|
|
9666
|
+
const path5 = normalizePath(action.path);
|
|
9667
|
+
if (!path5.length) {
|
|
9668
|
+
throw new OpensteerAgentActionError(
|
|
9669
|
+
"Drag action requires a non-empty path."
|
|
9670
|
+
);
|
|
9671
|
+
}
|
|
9672
|
+
await page.mouse.move(path5[0].x, path5[0].y);
|
|
9673
|
+
await page.mouse.down();
|
|
9674
|
+
for (const point of path5.slice(1)) {
|
|
9675
|
+
await page.mouse.move(point.x, point.y);
|
|
9676
|
+
}
|
|
9677
|
+
await page.mouse.up();
|
|
9678
|
+
return;
|
|
9679
|
+
}
|
|
9680
|
+
case "move":
|
|
9681
|
+
case "hover": {
|
|
9682
|
+
const { x, y } = toPoint(action);
|
|
9683
|
+
await page.mouse.move(x, y);
|
|
9684
|
+
return;
|
|
9685
|
+
}
|
|
9686
|
+
case "wait": {
|
|
9687
|
+
const ms = numberOr(action.timeMs, action.time_ms, 1e3);
|
|
9688
|
+
await sleep3(ms);
|
|
9689
|
+
return;
|
|
9690
|
+
}
|
|
9691
|
+
case "goto": {
|
|
9692
|
+
const url = normalizeRequiredString3(action.url, "Action URL is required for goto.");
|
|
9693
|
+
await page.goto(url, { waitUntil: "load" });
|
|
9694
|
+
return;
|
|
9695
|
+
}
|
|
9696
|
+
case "back": {
|
|
9697
|
+
await page.goBack({ waitUntil: "load" }).catch(() => void 0);
|
|
9698
|
+
return;
|
|
9699
|
+
}
|
|
9700
|
+
case "forward": {
|
|
9701
|
+
await page.goForward({ waitUntil: "load" }).catch(() => void 0);
|
|
9702
|
+
return;
|
|
9703
|
+
}
|
|
9704
|
+
case "screenshot":
|
|
9705
|
+
case "open_web_browser": {
|
|
9706
|
+
return;
|
|
9707
|
+
}
|
|
9708
|
+
default:
|
|
9709
|
+
throw new OpensteerAgentActionError(
|
|
9710
|
+
`Unsupported CUA action type "${String(action.type)}".`
|
|
9711
|
+
);
|
|
9712
|
+
}
|
|
9713
|
+
}
|
|
9714
|
+
function isMutatingAgentAction(action) {
|
|
9715
|
+
const type = normalizeActionType(action.type);
|
|
9716
|
+
return type !== "wait" && type !== "screenshot" && type !== "open_web_browser";
|
|
9717
|
+
}
|
|
9718
|
+
function normalizeActionType(value) {
|
|
9719
|
+
const raw = typeof value === "string" ? value : "";
|
|
9720
|
+
const normalized = raw.trim().toLowerCase();
|
|
9721
|
+
if (!normalized) return "";
|
|
9722
|
+
if (normalized === "double_click" || normalized === "doubleclick") {
|
|
9723
|
+
return "doubleclick";
|
|
9724
|
+
}
|
|
9725
|
+
if (normalized === "triple_click" || normalized === "tripleclick") {
|
|
9726
|
+
return "tripleclick";
|
|
9727
|
+
}
|
|
9728
|
+
if (normalized === "left_click") {
|
|
9729
|
+
return "click";
|
|
9730
|
+
}
|
|
9731
|
+
if (normalized === "right_click") {
|
|
9732
|
+
return "rightclick";
|
|
9733
|
+
}
|
|
9734
|
+
if (normalized === "openwebbrowser" || normalized === "open_web_browser") {
|
|
9735
|
+
return "open_web_browser";
|
|
9736
|
+
}
|
|
9737
|
+
return normalized;
|
|
9738
|
+
}
|
|
9739
|
+
function toPoint(action) {
|
|
9740
|
+
const point = maybePoint(action);
|
|
9741
|
+
if (point) {
|
|
9742
|
+
return point;
|
|
9743
|
+
}
|
|
9744
|
+
throw new OpensteerAgentActionError(
|
|
9745
|
+
`Action "${String(action.type)}" requires numeric x and y coordinates.`
|
|
9746
|
+
);
|
|
9747
|
+
}
|
|
9748
|
+
function maybePoint(action) {
|
|
9749
|
+
const coordinate = Array.isArray(action.coordinate) ? action.coordinate : Array.isArray(action.coordinates) ? action.coordinates : null;
|
|
9750
|
+
const x = numberOr(action.x, coordinate?.[0]);
|
|
9751
|
+
const y = numberOr(action.y, coordinate?.[1]);
|
|
9752
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) {
|
|
9753
|
+
return null;
|
|
9754
|
+
}
|
|
9755
|
+
return {
|
|
9756
|
+
x,
|
|
9757
|
+
y
|
|
9758
|
+
};
|
|
9759
|
+
}
|
|
9760
|
+
async function maybeFocusPoint(page, action) {
|
|
9761
|
+
const point = maybePoint(action);
|
|
9762
|
+
if (!point) {
|
|
9763
|
+
return;
|
|
9764
|
+
}
|
|
9765
|
+
await page.mouse.click(point.x, point.y, {
|
|
9766
|
+
button: normalizeMouseButton2(action.button, "left"),
|
|
9767
|
+
clickCount: 1
|
|
9768
|
+
});
|
|
9769
|
+
}
|
|
9770
|
+
function normalizePath(path5) {
|
|
9771
|
+
if (!Array.isArray(path5)) return [];
|
|
9772
|
+
const points = [];
|
|
9773
|
+
for (const entry of path5) {
|
|
9774
|
+
if (!entry || typeof entry !== "object") continue;
|
|
9775
|
+
const candidate = entry;
|
|
9776
|
+
const x = Number(candidate.x);
|
|
9777
|
+
const y = Number(candidate.y);
|
|
9778
|
+
if (!Number.isFinite(x) || !Number.isFinite(y)) continue;
|
|
9779
|
+
points.push({ x, y });
|
|
9780
|
+
}
|
|
9781
|
+
return points;
|
|
9782
|
+
}
|
|
9783
|
+
function normalizeMouseButton2(value, fallback) {
|
|
9784
|
+
if (value === "left" || value === "right" || value === "middle") {
|
|
9785
|
+
return value;
|
|
9786
|
+
}
|
|
9787
|
+
if (typeof value === "string") {
|
|
9788
|
+
const normalized = value.toLowerCase();
|
|
9789
|
+
if (normalized === "left" || normalized === "right" || normalized === "middle") {
|
|
9790
|
+
return normalized;
|
|
9791
|
+
}
|
|
9792
|
+
}
|
|
9793
|
+
return fallback;
|
|
9794
|
+
}
|
|
9795
|
+
function normalizeClickCount(value, fallback) {
|
|
9796
|
+
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
9797
|
+
return Math.floor(value);
|
|
9798
|
+
}
|
|
9799
|
+
return fallback;
|
|
9800
|
+
}
|
|
9801
|
+
function normalizeKeyCombos(value) {
|
|
9802
|
+
if (typeof value === "string") {
|
|
9803
|
+
const trimmed = value.trim();
|
|
9804
|
+
return trimmed ? [trimmed] : [];
|
|
9805
|
+
}
|
|
9806
|
+
if (!Array.isArray(value)) {
|
|
9807
|
+
return [];
|
|
9808
|
+
}
|
|
9809
|
+
const keys = value.filter((entry) => typeof entry === "string").map((entry) => entry.trim()).filter(Boolean);
|
|
9810
|
+
if (!keys.length) {
|
|
9811
|
+
return [];
|
|
9812
|
+
}
|
|
9813
|
+
const hasExplicitComboSyntax = keys.some((entry) => entry.includes("+"));
|
|
9814
|
+
if (!hasExplicitComboSyntax && keys.length > 1) {
|
|
9815
|
+
return [keys.join("+")];
|
|
9816
|
+
}
|
|
9817
|
+
return keys;
|
|
9818
|
+
}
|
|
9819
|
+
function numberOr(...values) {
|
|
9820
|
+
for (const value of values) {
|
|
9821
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
9822
|
+
}
|
|
9823
|
+
return NaN;
|
|
9824
|
+
}
|
|
9825
|
+
function normalizeRequiredString3(value, errorMessage2) {
|
|
9826
|
+
if (typeof value !== "string" || !value.trim()) {
|
|
9827
|
+
throw new OpensteerAgentActionError(errorMessage2);
|
|
9828
|
+
}
|
|
9829
|
+
return value.trim();
|
|
9830
|
+
}
|
|
9831
|
+
async function pressKeyCombo(page, combo) {
|
|
9832
|
+
const trimmed = combo.trim();
|
|
9833
|
+
if (!trimmed) return;
|
|
9834
|
+
if (!trimmed.includes("+")) {
|
|
9835
|
+
await page.keyboard.press(mapKeyToPlaywright(trimmed));
|
|
9836
|
+
return;
|
|
9837
|
+
}
|
|
9838
|
+
const parts = trimmed.split("+").map((part) => part.trim()).filter(Boolean).map((part) => mapKeyToPlaywright(part));
|
|
9839
|
+
if (!parts.length) return;
|
|
9840
|
+
const modifiers = parts.slice(0, -1);
|
|
9841
|
+
const last = parts[parts.length - 1];
|
|
9842
|
+
for (const modifier of modifiers) {
|
|
9843
|
+
await page.keyboard.down(modifier);
|
|
9844
|
+
}
|
|
9845
|
+
try {
|
|
9846
|
+
await page.keyboard.press(last);
|
|
9847
|
+
} finally {
|
|
9848
|
+
for (const modifier of modifiers.slice().reverse()) {
|
|
9849
|
+
await page.keyboard.up(modifier);
|
|
9850
|
+
}
|
|
9851
|
+
}
|
|
9852
|
+
}
|
|
9853
|
+
function sleep3(ms) {
|
|
9854
|
+
return new Promise((resolve) => setTimeout(resolve, Math.max(0, ms)));
|
|
9855
|
+
}
|
|
9856
|
+
|
|
9857
|
+
// src/agent/handler.ts
|
|
9858
|
+
var OpensteerCuaAgentHandler = class {
|
|
9859
|
+
page;
|
|
9860
|
+
config;
|
|
9861
|
+
client;
|
|
9862
|
+
debug;
|
|
9863
|
+
onMutatingAction;
|
|
9864
|
+
cursorOverlayInjected = false;
|
|
9865
|
+
constructor(options) {
|
|
9866
|
+
this.page = options.page;
|
|
9867
|
+
this.config = options.config;
|
|
9868
|
+
this.client = options.client;
|
|
9869
|
+
this.debug = options.debug;
|
|
9870
|
+
this.onMutatingAction = options.onMutatingAction;
|
|
9871
|
+
}
|
|
9872
|
+
async execute(options) {
|
|
9873
|
+
const instruction = options.instruction;
|
|
9874
|
+
const maxSteps = options.maxSteps ?? 20;
|
|
9875
|
+
await this.initializeClient();
|
|
9876
|
+
const highlightCursor = options.highlightCursor === true;
|
|
9877
|
+
this.client.setActionHandler(async (action) => {
|
|
9878
|
+
if (highlightCursor) {
|
|
9879
|
+
await this.maybeRenderCursor(action);
|
|
9880
|
+
}
|
|
9881
|
+
await executeAgentAction(this.page, action);
|
|
9882
|
+
this.client.setCurrentUrl(this.page.url());
|
|
9883
|
+
if (isMutatingAgentAction(action)) {
|
|
9884
|
+
this.onMutatingAction?.(action);
|
|
9885
|
+
}
|
|
9886
|
+
await sleep4(this.config.waitBetweenActionsMs);
|
|
9887
|
+
});
|
|
9888
|
+
try {
|
|
9889
|
+
const result = await this.client.execute({
|
|
9890
|
+
instruction,
|
|
9891
|
+
maxSteps,
|
|
9892
|
+
systemPrompt: this.config.systemPrompt
|
|
9893
|
+
});
|
|
9894
|
+
return {
|
|
9895
|
+
...result,
|
|
9896
|
+
provider: this.config.model.provider,
|
|
9897
|
+
model: this.config.model.fullModelName
|
|
9898
|
+
};
|
|
9899
|
+
} catch (error) {
|
|
9900
|
+
throw new OpensteerAgentExecutionError(
|
|
9901
|
+
`CUA agent execution failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
9902
|
+
error
|
|
9903
|
+
);
|
|
9904
|
+
}
|
|
9905
|
+
}
|
|
9906
|
+
async initializeClient() {
|
|
9907
|
+
const viewport = await this.resolveViewport();
|
|
9908
|
+
this.client.setViewport(viewport.width, viewport.height);
|
|
9909
|
+
this.client.setCurrentUrl(this.page.url());
|
|
9910
|
+
this.client.setScreenshotProvider(async () => {
|
|
9911
|
+
const buffer = await this.page.screenshot({
|
|
9912
|
+
fullPage: false,
|
|
9913
|
+
type: "png"
|
|
9914
|
+
});
|
|
9915
|
+
return buffer.toString("base64");
|
|
9916
|
+
});
|
|
9917
|
+
}
|
|
9918
|
+
async resolveViewport() {
|
|
9919
|
+
const directViewport = this.page.viewportSize();
|
|
9920
|
+
if (directViewport?.width && directViewport?.height) {
|
|
9921
|
+
return directViewport;
|
|
9922
|
+
}
|
|
9923
|
+
try {
|
|
9924
|
+
const evaluated = await this.page.evaluate(() => ({
|
|
9925
|
+
width: window.innerWidth,
|
|
9926
|
+
height: window.innerHeight
|
|
9927
|
+
}));
|
|
9928
|
+
if (evaluated && typeof evaluated === "object" && typeof evaluated.width === "number" && typeof evaluated.height === "number" && evaluated.width > 0 && evaluated.height > 0) {
|
|
9929
|
+
return {
|
|
9930
|
+
width: Math.floor(evaluated.width),
|
|
9931
|
+
height: Math.floor(evaluated.height)
|
|
9932
|
+
};
|
|
9933
|
+
}
|
|
9934
|
+
} catch {
|
|
9935
|
+
}
|
|
9936
|
+
return DEFAULT_CUA_VIEWPORT;
|
|
9937
|
+
}
|
|
9938
|
+
async maybeRenderCursor(action) {
|
|
9939
|
+
const x = typeof action.x === "number" ? action.x : null;
|
|
9940
|
+
const y = typeof action.y === "number" ? action.y : null;
|
|
9941
|
+
if (x == null || y == null) {
|
|
9942
|
+
return;
|
|
9943
|
+
}
|
|
9944
|
+
try {
|
|
9945
|
+
if (!this.cursorOverlayInjected) {
|
|
9946
|
+
await this.page.evaluate(() => {
|
|
9947
|
+
if (document.getElementById("__opensteer_cua_cursor")) return;
|
|
9948
|
+
const cursor = document.createElement("div");
|
|
9949
|
+
cursor.id = "__opensteer_cua_cursor";
|
|
9950
|
+
cursor.style.position = "fixed";
|
|
9951
|
+
cursor.style.width = "14px";
|
|
9952
|
+
cursor.style.height = "14px";
|
|
9953
|
+
cursor.style.borderRadius = "999px";
|
|
9954
|
+
cursor.style.background = "rgba(255, 51, 51, 0.85)";
|
|
9955
|
+
cursor.style.border = "2px solid rgba(255, 255, 255, 0.95)";
|
|
9956
|
+
cursor.style.boxShadow = "0 0 0 3px rgba(255, 51, 51, 0.25)";
|
|
9957
|
+
cursor.style.pointerEvents = "none";
|
|
9958
|
+
cursor.style.zIndex = "2147483647";
|
|
9959
|
+
cursor.style.transform = "translate(-9999px, -9999px)";
|
|
9960
|
+
cursor.style.transition = "transform 80ms linear";
|
|
9961
|
+
document.documentElement.appendChild(cursor);
|
|
9962
|
+
});
|
|
9963
|
+
this.cursorOverlayInjected = true;
|
|
9964
|
+
}
|
|
9965
|
+
await this.page.evaluate(
|
|
9966
|
+
({ px, py }) => {
|
|
9967
|
+
const cursor = document.getElementById("__opensteer_cua_cursor");
|
|
9968
|
+
if (!cursor) return;
|
|
9969
|
+
cursor.style.transform = `translate(${Math.round(px - 7)}px, ${Math.round(py - 7)}px)`;
|
|
9970
|
+
},
|
|
9971
|
+
{ px: x, py: y }
|
|
9972
|
+
);
|
|
9973
|
+
} catch (error) {
|
|
9974
|
+
if (this.debug) {
|
|
9975
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9976
|
+
console.warn(`[opensteer] cursor overlay failed: ${message}`);
|
|
9977
|
+
}
|
|
9978
|
+
}
|
|
9979
|
+
}
|
|
9980
|
+
};
|
|
9981
|
+
function sleep4(ms) {
|
|
9982
|
+
return new Promise((resolve) => setTimeout(resolve, Math.max(0, ms)));
|
|
9983
|
+
}
|
|
9984
|
+
|
|
9985
|
+
// src/opensteer.ts
|
|
9986
|
+
var CLOUD_INTERACTION_METHODS = /* @__PURE__ */ new Set([
|
|
9987
|
+
"click",
|
|
9988
|
+
"dblclick",
|
|
9989
|
+
"rightclick",
|
|
9990
|
+
"hover",
|
|
9991
|
+
"input",
|
|
9992
|
+
"select",
|
|
9993
|
+
"scroll",
|
|
9994
|
+
"uploadFile"
|
|
9995
|
+
]);
|
|
9996
|
+
var Opensteer = class _Opensteer {
|
|
9997
|
+
config;
|
|
9998
|
+
aiResolve;
|
|
9999
|
+
aiExtract;
|
|
10000
|
+
namespace;
|
|
10001
|
+
storage;
|
|
10002
|
+
pool;
|
|
10003
|
+
cloud;
|
|
10004
|
+
browser = null;
|
|
10005
|
+
pageRef = null;
|
|
10006
|
+
contextRef = null;
|
|
10007
|
+
ownsBrowser = false;
|
|
10008
|
+
snapshotCache = null;
|
|
10009
|
+
agentExecutionInFlight = false;
|
|
10010
|
+
constructor(config = {}) {
|
|
10011
|
+
const resolved = resolveConfig(config);
|
|
10012
|
+
const cloudSelection = resolveCloudSelection({
|
|
10013
|
+
cloud: resolved.cloud
|
|
10014
|
+
});
|
|
10015
|
+
const model = resolved.model;
|
|
10016
|
+
this.config = resolved;
|
|
10017
|
+
this.aiResolve = this.createLazyResolveCallback(model);
|
|
10018
|
+
this.aiExtract = this.createLazyExtractCallback(model);
|
|
10019
|
+
const rootDir = resolved.storage?.rootDir || process.cwd();
|
|
10020
|
+
this.namespace = resolveNamespace(resolved, rootDir);
|
|
10021
|
+
this.storage = new LocalSelectorStorage(rootDir, this.namespace);
|
|
10022
|
+
this.pool = new BrowserPool(resolved.browser || {});
|
|
10023
|
+
if (cloudSelection.cloud) {
|
|
10024
|
+
const cloudConfig = resolved.cloud && typeof resolved.cloud === "object" ? resolved.cloud : void 0;
|
|
10025
|
+
const apiKey = cloudConfig?.apiKey?.trim();
|
|
10026
|
+
if (!apiKey) {
|
|
10027
|
+
throw new Error(
|
|
10028
|
+
"Cloud mode requires a non-empty API key via cloud.apiKey or OPENSTEER_API_KEY."
|
|
10029
|
+
);
|
|
10030
|
+
}
|
|
10031
|
+
this.cloud = createCloudRuntimeState(
|
|
10032
|
+
apiKey,
|
|
10033
|
+
cloudConfig?.baseUrl,
|
|
10034
|
+
cloudConfig?.authScheme
|
|
10035
|
+
);
|
|
10036
|
+
} else {
|
|
10037
|
+
this.cloud = null;
|
|
10038
|
+
}
|
|
10039
|
+
}
|
|
10040
|
+
createLazyResolveCallback(model) {
|
|
10041
|
+
let resolverPromise = null;
|
|
10042
|
+
return async (...args) => {
|
|
10043
|
+
try {
|
|
10044
|
+
if (!resolverPromise) {
|
|
10045
|
+
resolverPromise = Promise.resolve().then(() => (init_resolver(), resolver_exports)).then(
|
|
10046
|
+
(m) => m.createResolveCallback(model)
|
|
10047
|
+
);
|
|
10048
|
+
}
|
|
10049
|
+
const resolver = await resolverPromise;
|
|
10050
|
+
return resolver(...args);
|
|
10051
|
+
} catch (err) {
|
|
10052
|
+
resolverPromise = null;
|
|
10053
|
+
throw err;
|
|
10054
|
+
}
|
|
10055
|
+
};
|
|
10056
|
+
}
|
|
10057
|
+
createLazyExtractCallback(model) {
|
|
10058
|
+
let extractorPromise = null;
|
|
10059
|
+
const extract = async (args) => {
|
|
10060
|
+
try {
|
|
10061
|
+
if (!extractorPromise) {
|
|
10062
|
+
extractorPromise = Promise.resolve().then(() => (init_extractor(), extractor_exports)).then(
|
|
10063
|
+
(m) => m.createExtractCallback(model)
|
|
10064
|
+
);
|
|
10065
|
+
}
|
|
10066
|
+
const extractor = await extractorPromise;
|
|
10067
|
+
return extractor(args);
|
|
10068
|
+
} catch (err) {
|
|
10069
|
+
extractorPromise = null;
|
|
10070
|
+
throw err;
|
|
10071
|
+
}
|
|
10072
|
+
};
|
|
10073
|
+
return extract;
|
|
10074
|
+
}
|
|
10075
|
+
async invokeCloudActionAndResetCache(method, args) {
|
|
10076
|
+
const result = await this.invokeCloudAction(method, args);
|
|
10077
|
+
this.snapshotCache = null;
|
|
10078
|
+
return result;
|
|
10079
|
+
}
|
|
10080
|
+
async invokeCloudAction(method, args) {
|
|
10081
|
+
const actionClient = this.cloud?.actionClient;
|
|
10082
|
+
const sessionId = this.cloud?.sessionId;
|
|
10083
|
+
if (!actionClient || !sessionId) {
|
|
10084
|
+
throw cloudNotLaunchedError();
|
|
10085
|
+
}
|
|
10086
|
+
const payload = args && typeof args === "object" ? args : {};
|
|
10087
|
+
try {
|
|
10088
|
+
return await actionClient.request(method, payload);
|
|
10089
|
+
} catch (err) {
|
|
10090
|
+
if (err instanceof OpensteerCloudError && err.code === "CLOUD_ACTION_FAILED" && CLOUD_INTERACTION_METHODS.has(method)) {
|
|
10091
|
+
const detailsRecord = err.details && typeof err.details === "object" ? err.details : null;
|
|
10092
|
+
const cloudFailure = normalizeActionFailure(
|
|
10093
|
+
detailsRecord?.actionFailure
|
|
10094
|
+
);
|
|
10095
|
+
const failure = cloudFailure || classifyActionFailure({
|
|
10096
|
+
action: method,
|
|
10097
|
+
error: err,
|
|
10098
|
+
fallbackMessage: defaultActionFailureMessage(method)
|
|
10099
|
+
});
|
|
10100
|
+
const description = readCloudActionDescription(payload);
|
|
10101
|
+
throw this.buildActionError(
|
|
10102
|
+
method,
|
|
10103
|
+
description,
|
|
10104
|
+
failure,
|
|
10105
|
+
null,
|
|
10106
|
+
err
|
|
10107
|
+
);
|
|
10108
|
+
}
|
|
10109
|
+
throw err;
|
|
10110
|
+
}
|
|
10111
|
+
}
|
|
10112
|
+
buildActionError(action, description, failure, selectorUsed, cause) {
|
|
10113
|
+
return new OpensteerActionError({
|
|
10114
|
+
action,
|
|
10115
|
+
failure,
|
|
10116
|
+
selectorUsed: selectorUsed || null,
|
|
10117
|
+
message: formatActionFailureMessage(
|
|
10118
|
+
action,
|
|
10119
|
+
description,
|
|
10120
|
+
failure.message
|
|
10121
|
+
),
|
|
10122
|
+
cause
|
|
10123
|
+
});
|
|
10124
|
+
}
|
|
10125
|
+
async syncCloudPageRef(args) {
|
|
10126
|
+
if (!this.cloud || !this.browser) return;
|
|
10127
|
+
let tabs;
|
|
10128
|
+
try {
|
|
10129
|
+
tabs = await this.invokeCloudAction("tabs", {});
|
|
10130
|
+
} catch {
|
|
10131
|
+
return;
|
|
10132
|
+
}
|
|
10133
|
+
if (!tabs.length) {
|
|
10134
|
+
return;
|
|
10135
|
+
}
|
|
10136
|
+
const contexts = this.browser.contexts();
|
|
10137
|
+
if (!contexts.length) return;
|
|
10138
|
+
const syncContext = this.contextRef && contexts.includes(this.contextRef) ? this.contextRef : contexts[0];
|
|
10139
|
+
const syncContextPages = syncContext.pages();
|
|
10140
|
+
const activeTab = tabs.find((tab) => tab.active) ?? null;
|
|
10141
|
+
if (activeTab && activeTab.index >= 0 && activeTab.index < syncContextPages.length) {
|
|
10142
|
+
this.contextRef = syncContext;
|
|
10143
|
+
this.pageRef = syncContextPages[activeTab.index];
|
|
10144
|
+
return;
|
|
10145
|
+
}
|
|
10146
|
+
const expectedUrl = args?.expectedUrl?.trim() || null;
|
|
10147
|
+
const expectedUrlInSyncContext = expectedUrl ? syncContextPages.find((page) => page.url() === expectedUrl) : void 0;
|
|
10148
|
+
if (expectedUrlInSyncContext) {
|
|
10149
|
+
this.contextRef = syncContext;
|
|
10150
|
+
this.pageRef = expectedUrlInSyncContext;
|
|
10151
|
+
return;
|
|
10152
|
+
}
|
|
10153
|
+
const firstNonInternalInSyncContext = syncContextPages.find(
|
|
10154
|
+
(page) => !isInternalOrBlankPageUrl(page.url())
|
|
10155
|
+
);
|
|
10156
|
+
if (firstNonInternalInSyncContext) {
|
|
10157
|
+
this.contextRef = syncContext;
|
|
10158
|
+
this.pageRef = firstNonInternalInSyncContext;
|
|
10159
|
+
return;
|
|
10160
|
+
}
|
|
10161
|
+
const firstAboutBlankInSyncContext = syncContextPages.find(
|
|
10162
|
+
(page) => page.url() === "about:blank"
|
|
10163
|
+
);
|
|
10164
|
+
if (firstAboutBlankInSyncContext) {
|
|
10165
|
+
this.contextRef = syncContext;
|
|
10166
|
+
this.pageRef = firstAboutBlankInSyncContext;
|
|
10167
|
+
return;
|
|
10168
|
+
}
|
|
10169
|
+
const pages = [];
|
|
10170
|
+
for (const context of contexts) {
|
|
10171
|
+
for (const page of context.pages()) {
|
|
10172
|
+
pages.push({
|
|
10173
|
+
context,
|
|
10174
|
+
page,
|
|
10175
|
+
url: page.url()
|
|
10176
|
+
});
|
|
10177
|
+
}
|
|
10178
|
+
}
|
|
10179
|
+
if (!pages.length) return;
|
|
10180
|
+
const expectedUrlMatch = expectedUrl ? pages.find(({ url }) => url === expectedUrl) : void 0;
|
|
10181
|
+
if (expectedUrlMatch) {
|
|
10182
|
+
this.contextRef = expectedUrlMatch.context;
|
|
10183
|
+
this.pageRef = expectedUrlMatch.page;
|
|
10184
|
+
return;
|
|
10185
|
+
}
|
|
10186
|
+
const firstNonInternal = pages.find(
|
|
10187
|
+
({ url }) => !isInternalOrBlankPageUrl(url)
|
|
10188
|
+
);
|
|
10189
|
+
if (firstNonInternal) {
|
|
10190
|
+
this.contextRef = firstNonInternal.context;
|
|
10191
|
+
this.pageRef = firstNonInternal.page;
|
|
10192
|
+
return;
|
|
10193
|
+
}
|
|
10194
|
+
const firstAboutBlank = pages.find(({ url }) => url === "about:blank");
|
|
10195
|
+
if (firstAboutBlank) {
|
|
10196
|
+
this.contextRef = firstAboutBlank.context;
|
|
10197
|
+
this.pageRef = firstAboutBlank.page;
|
|
10198
|
+
return;
|
|
10199
|
+
}
|
|
10200
|
+
this.contextRef = pages[0].context;
|
|
10201
|
+
this.pageRef = pages[0].page;
|
|
10202
|
+
}
|
|
10203
|
+
get page() {
|
|
10204
|
+
if (!this.pageRef) {
|
|
10205
|
+
throw new Error(
|
|
10206
|
+
"Browser page is not initialized. Call launch() or Opensteer.from(page)."
|
|
10207
|
+
);
|
|
10208
|
+
}
|
|
10209
|
+
return this.pageRef;
|
|
10210
|
+
}
|
|
10211
|
+
get context() {
|
|
10212
|
+
if (!this.contextRef) {
|
|
10213
|
+
throw new Error(
|
|
10214
|
+
"Browser context is not initialized. Call launch() or Opensteer.from(page)."
|
|
10215
|
+
);
|
|
10216
|
+
}
|
|
10217
|
+
return this.contextRef;
|
|
10218
|
+
}
|
|
10219
|
+
getCloudSessionId() {
|
|
10220
|
+
return this.cloud?.sessionId ?? null;
|
|
10221
|
+
}
|
|
10222
|
+
getCloudSessionUrl() {
|
|
10223
|
+
return this.cloud?.cloudSessionUrl ?? null;
|
|
10224
|
+
}
|
|
10225
|
+
announceCloudSession(args) {
|
|
10226
|
+
if (!this.shouldAnnounceCloudSession()) {
|
|
10227
|
+
return;
|
|
10228
|
+
}
|
|
10229
|
+
const fields = [
|
|
10230
|
+
`sessionId=${args.sessionId}`,
|
|
10231
|
+
`workspaceId=${args.workspaceId}`
|
|
10232
|
+
];
|
|
10233
|
+
if (args.cloudSessionUrl) {
|
|
10234
|
+
fields.push(`url=${args.cloudSessionUrl}`);
|
|
10235
|
+
}
|
|
10236
|
+
process.stderr.write(`[opensteer] cloud session ready ${fields.join(" ")}
|
|
10237
|
+
`);
|
|
10238
|
+
}
|
|
10239
|
+
shouldAnnounceCloudSession() {
|
|
10240
|
+
const cloudConfig = this.config.cloud && typeof this.config.cloud === "object" ? this.config.cloud : null;
|
|
10241
|
+
const announce = cloudConfig?.announce ?? "always";
|
|
10242
|
+
if (announce === "off") {
|
|
10243
|
+
return false;
|
|
10244
|
+
}
|
|
8371
10245
|
if (announce === "tty") {
|
|
8372
10246
|
return Boolean(process.stderr.isTTY);
|
|
8373
10247
|
}
|
|
@@ -8428,6 +10302,7 @@ var Opensteer = class _Opensteer {
|
|
|
8428
10302
|
this.cloud.actionClient = actionClient;
|
|
8429
10303
|
this.cloud.sessionId = sessionId;
|
|
8430
10304
|
this.cloud.cloudSessionUrl = session2.cloudSessionUrl;
|
|
10305
|
+
await this.syncCloudPageRef().catch(() => void 0);
|
|
8431
10306
|
this.announceCloudSession({
|
|
8432
10307
|
sessionId: session2.sessionId,
|
|
8433
10308
|
workspaceId: session2.cloudSession.workspaceId,
|
|
@@ -8523,6 +10398,9 @@ var Opensteer = class _Opensteer {
|
|
|
8523
10398
|
async goto(url, options) {
|
|
8524
10399
|
if (this.cloud) {
|
|
8525
10400
|
await this.invokeCloudActionAndResetCache("goto", { url, options });
|
|
10401
|
+
await this.syncCloudPageRef({ expectedUrl: url }).catch(
|
|
10402
|
+
() => void 0
|
|
10403
|
+
);
|
|
8526
10404
|
return;
|
|
8527
10405
|
}
|
|
8528
10406
|
const { waitUntil = "domcontentloaded", ...rest } = options ?? {};
|
|
@@ -9027,9 +10905,16 @@ var Opensteer = class _Opensteer {
|
|
|
9027
10905
|
}
|
|
9028
10906
|
async newTab(url) {
|
|
9029
10907
|
if (this.cloud) {
|
|
9030
|
-
|
|
9031
|
-
|
|
9032
|
-
|
|
10908
|
+
const result = await this.invokeCloudActionAndResetCache(
|
|
10909
|
+
"newTab",
|
|
10910
|
+
{
|
|
10911
|
+
url
|
|
10912
|
+
}
|
|
10913
|
+
);
|
|
10914
|
+
await this.syncCloudPageRef({ expectedUrl: result.url }).catch(
|
|
10915
|
+
() => void 0
|
|
10916
|
+
);
|
|
10917
|
+
return result;
|
|
9033
10918
|
}
|
|
9034
10919
|
const { page, info } = await createTab(this.context, url);
|
|
9035
10920
|
this.pageRef = page;
|
|
@@ -9039,6 +10924,7 @@ var Opensteer = class _Opensteer {
|
|
|
9039
10924
|
async switchTab(index) {
|
|
9040
10925
|
if (this.cloud) {
|
|
9041
10926
|
await this.invokeCloudActionAndResetCache("switchTab", { index });
|
|
10927
|
+
await this.syncCloudPageRef().catch(() => void 0);
|
|
9042
10928
|
return;
|
|
9043
10929
|
}
|
|
9044
10930
|
const page = await switchTab(this.context, index);
|
|
@@ -9048,6 +10934,7 @@ var Opensteer = class _Opensteer {
|
|
|
9048
10934
|
async closeTab(index) {
|
|
9049
10935
|
if (this.cloud) {
|
|
9050
10936
|
await this.invokeCloudActionAndResetCache("closeTab", { index });
|
|
10937
|
+
await this.syncCloudPageRef().catch(() => void 0);
|
|
9051
10938
|
return;
|
|
9052
10939
|
}
|
|
9053
10940
|
const newPage = await closeTab(this.context, this.page, index);
|
|
@@ -9458,6 +11345,37 @@ var Opensteer = class _Opensteer {
|
|
|
9458
11345
|
this.storage.clearNamespace();
|
|
9459
11346
|
this.snapshotCache = null;
|
|
9460
11347
|
}
|
|
11348
|
+
agent(config) {
|
|
11349
|
+
const resolvedAgentConfig = resolveAgentConfig({
|
|
11350
|
+
agentConfig: config,
|
|
11351
|
+
fallbackModel: this.config.model
|
|
11352
|
+
});
|
|
11353
|
+
return {
|
|
11354
|
+
execute: async (instructionOrOptions) => {
|
|
11355
|
+
if (this.agentExecutionInFlight) {
|
|
11356
|
+
throw new OpensteerAgentBusyError();
|
|
11357
|
+
}
|
|
11358
|
+
this.agentExecutionInFlight = true;
|
|
11359
|
+
try {
|
|
11360
|
+
const options = normalizeExecuteOptions(instructionOrOptions);
|
|
11361
|
+
const handler = new OpensteerCuaAgentHandler({
|
|
11362
|
+
page: this.page,
|
|
11363
|
+
config: resolvedAgentConfig,
|
|
11364
|
+
client: createCuaClient(resolvedAgentConfig),
|
|
11365
|
+
debug: Boolean(this.config.debug),
|
|
11366
|
+
onMutatingAction: () => {
|
|
11367
|
+
this.snapshotCache = null;
|
|
11368
|
+
}
|
|
11369
|
+
});
|
|
11370
|
+
const result = await handler.execute(options);
|
|
11371
|
+
this.snapshotCache = null;
|
|
11372
|
+
return result;
|
|
11373
|
+
} finally {
|
|
11374
|
+
this.agentExecutionInFlight = false;
|
|
11375
|
+
}
|
|
11376
|
+
}
|
|
11377
|
+
};
|
|
11378
|
+
}
|
|
9461
11379
|
async runWithPostActionWait(action, waitOverride, execute) {
|
|
9462
11380
|
const waitSession = createPostActionWaitSession(
|
|
9463
11381
|
this.page,
|
|
@@ -10480,6 +12398,11 @@ function getScrollDelta2(options) {
|
|
|
10480
12398
|
return { x: 0, y: absoluteAmount };
|
|
10481
12399
|
}
|
|
10482
12400
|
}
|
|
12401
|
+
function isInternalOrBlankPageUrl(url) {
|
|
12402
|
+
if (!url) return true;
|
|
12403
|
+
if (url === "about:blank") return true;
|
|
12404
|
+
return url.startsWith("chrome://") || url.startsWith("devtools://") || url.startsWith("edge://");
|
|
12405
|
+
}
|
|
10483
12406
|
function buildLocalRunId(namespace) {
|
|
10484
12407
|
const normalized = namespace.trim() || "default";
|
|
10485
12408
|
return `${normalized}-${Date.now().toString(36)}-${(0, import_crypto2.randomUUID)().slice(0, 8)}`;
|
|
@@ -10507,7 +12430,15 @@ init_model();
|
|
|
10507
12430
|
OS_UNAVAILABLE_ATTR,
|
|
10508
12431
|
Opensteer,
|
|
10509
12432
|
OpensteerActionError,
|
|
12433
|
+
OpensteerAgentActionError,
|
|
12434
|
+
OpensteerAgentApiError,
|
|
12435
|
+
OpensteerAgentBusyError,
|
|
12436
|
+
OpensteerAgentConfigError,
|
|
12437
|
+
OpensteerAgentError,
|
|
12438
|
+
OpensteerAgentExecutionError,
|
|
12439
|
+
OpensteerAgentProviderError,
|
|
10510
12440
|
OpensteerCloudError,
|
|
12441
|
+
OpensteerCuaAgentHandler,
|
|
10511
12442
|
buildElementPathFromHandle,
|
|
10512
12443
|
buildElementPathFromSelector,
|
|
10513
12444
|
buildPathSelectorHint,
|
|
@@ -10524,6 +12455,7 @@ init_model();
|
|
|
10524
12455
|
cloudUnsupportedMethodError,
|
|
10525
12456
|
collectLocalSelectorCacheEntries,
|
|
10526
12457
|
countArrayItemsWithPath,
|
|
12458
|
+
createCuaClient,
|
|
10527
12459
|
createEmptyRegistry,
|
|
10528
12460
|
createExtractCallback,
|
|
10529
12461
|
createResolveCallback,
|
|
@@ -10553,6 +12485,7 @@ init_model();
|
|
|
10553
12485
|
performSelect,
|
|
10554
12486
|
prepareSnapshot,
|
|
10555
12487
|
pressKey,
|
|
12488
|
+
resolveAgentConfig,
|
|
10556
12489
|
resolveCounterElement,
|
|
10557
12490
|
resolveCountersBatch,
|
|
10558
12491
|
resolveElementPath,
|