opensteer 0.4.11 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5275,7 +5275,8 @@ var CloudCdpClient = class {
5275
5275
  const message = error instanceof Error ? error.message : "Failed to connect to cloud CDP endpoint.";
5276
5276
  throw new OpensteerCloudError("CLOUD_TRANSPORT_ERROR", message);
5277
5277
  }
5278
- const context = browser.contexts()[0];
5278
+ const contexts = browser.contexts();
5279
+ const context = contexts[0];
5279
5280
  if (!context) {
5280
5281
  await browser.close();
5281
5282
  throw new OpensteerCloudError(
@@ -5283,10 +5284,41 @@ var CloudCdpClient = class {
5283
5284
  "Cloud browser returned no context."
5284
5285
  );
5285
5286
  }
5287
+ const preferred = selectPreferredContextPage(browser, contexts);
5288
+ if (preferred) {
5289
+ return preferred;
5290
+ }
5286
5291
  const page = context.pages()[0] || await context.newPage();
5287
5292
  return { browser, context, page };
5288
5293
  }
5289
5294
  };
5295
+ function selectPreferredContextPage(browser, contexts) {
5296
+ let aboutBlankCandidate = null;
5297
+ for (const context of contexts) {
5298
+ for (const page of context.pages()) {
5299
+ const url = safePageUrl(page);
5300
+ if (!isInternalOrEmptyUrl(url)) {
5301
+ return { browser, context, page };
5302
+ }
5303
+ if (!aboutBlankCandidate && url === "about:blank") {
5304
+ aboutBlankCandidate = { browser, context, page };
5305
+ }
5306
+ }
5307
+ }
5308
+ return aboutBlankCandidate;
5309
+ }
5310
+ function safePageUrl(page) {
5311
+ try {
5312
+ return page.url();
5313
+ } catch {
5314
+ return "";
5315
+ }
5316
+ }
5317
+ function isInternalOrEmptyUrl(url) {
5318
+ if (!url) return true;
5319
+ if (url === "about:blank") return true;
5320
+ return url.startsWith("chrome://") || url.startsWith("devtools://") || url.startsWith("edge://");
5321
+ }
5290
5322
  function withTokenQuery2(wsUrl, token) {
5291
5323
  const url = new URL(wsUrl);
5292
5324
  url.searchParams.set("token", token);
@@ -5371,180 +5403,1923 @@ var CloudSessionClient = class {
5371
5403
  }
5372
5404
  return await response.json();
5373
5405
  }
5374
- authHeaders() {
5375
- if (this.authScheme === "bearer") {
5376
- return {
5377
- authorization: `Bearer ${this.key}`
5378
- };
5379
- }
5380
- return {
5381
- "x-api-key": this.key
5382
- };
5406
+ authHeaders() {
5407
+ if (this.authScheme === "bearer") {
5408
+ return {
5409
+ authorization: `Bearer ${this.key}`
5410
+ };
5411
+ }
5412
+ return {
5413
+ "x-api-key": this.key
5414
+ };
5415
+ }
5416
+ };
5417
+ function normalizeBaseUrl(baseUrl) {
5418
+ return baseUrl.replace(/\/+$/, "");
5419
+ }
5420
+ function parseCreateResponse(body, status) {
5421
+ const root = requireObject(
5422
+ body,
5423
+ "Invalid cloud session create response: expected a JSON object.",
5424
+ status
5425
+ );
5426
+ const sessionId = requireString(root, "sessionId", status);
5427
+ const actionWsUrl = requireString(root, "actionWsUrl", status);
5428
+ const cdpWsUrl = requireString(root, "cdpWsUrl", status);
5429
+ const actionToken = requireString(root, "actionToken", status);
5430
+ const cdpToken = requireString(root, "cdpToken", status);
5431
+ const cloudSessionUrl = requireString(root, "cloudSessionUrl", status);
5432
+ const cloudSessionRoot = requireObject(
5433
+ root.cloudSession,
5434
+ "Invalid cloud session create response: cloudSession must be an object.",
5435
+ status
5436
+ );
5437
+ const cloudSession = {
5438
+ sessionId: requireString(cloudSessionRoot, "sessionId", status, "cloudSession"),
5439
+ workspaceId: requireString(
5440
+ cloudSessionRoot,
5441
+ "workspaceId",
5442
+ status,
5443
+ "cloudSession"
5444
+ ),
5445
+ state: requireString(cloudSessionRoot, "state", status, "cloudSession"),
5446
+ createdAt: requireNumber(cloudSessionRoot, "createdAt", status, "cloudSession"),
5447
+ sourceType: requireSourceType(cloudSessionRoot, "sourceType", status, "cloudSession"),
5448
+ sourceRef: optionalString(cloudSessionRoot, "sourceRef", status, "cloudSession"),
5449
+ label: optionalString(cloudSessionRoot, "label", status, "cloudSession")
5450
+ };
5451
+ const expiresAt = optionalNumber(root, "expiresAt", status);
5452
+ return {
5453
+ sessionId,
5454
+ actionWsUrl,
5455
+ cdpWsUrl,
5456
+ actionToken,
5457
+ cdpToken,
5458
+ expiresAt,
5459
+ cloudSessionUrl,
5460
+ cloudSession
5461
+ };
5462
+ }
5463
+ function requireObject(value, message, status) {
5464
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
5465
+ throw new OpensteerCloudError("CLOUD_CONTRACT_MISMATCH", message, status);
5466
+ }
5467
+ return value;
5468
+ }
5469
+ function requireString(source, field, status, parent) {
5470
+ const value = source[field];
5471
+ if (typeof value !== "string" || !value.trim()) {
5472
+ throw new OpensteerCloudError(
5473
+ "CLOUD_CONTRACT_MISMATCH",
5474
+ `Invalid cloud session create response: ${formatFieldPath(
5475
+ field,
5476
+ parent
5477
+ )} must be a non-empty string.`,
5478
+ status
5479
+ );
5480
+ }
5481
+ return value;
5482
+ }
5483
+ function requireNumber(source, field, status, parent) {
5484
+ const value = source[field];
5485
+ if (typeof value !== "number" || !Number.isFinite(value)) {
5486
+ throw new OpensteerCloudError(
5487
+ "CLOUD_CONTRACT_MISMATCH",
5488
+ `Invalid cloud session create response: ${formatFieldPath(
5489
+ field,
5490
+ parent
5491
+ )} must be a finite number.`,
5492
+ status
5493
+ );
5494
+ }
5495
+ return value;
5496
+ }
5497
+ function optionalString(source, field, status, parent) {
5498
+ const value = source[field];
5499
+ if (value == null) {
5500
+ return void 0;
5501
+ }
5502
+ if (typeof value !== "string") {
5503
+ throw new OpensteerCloudError(
5504
+ "CLOUD_CONTRACT_MISMATCH",
5505
+ `Invalid cloud session create response: ${formatFieldPath(
5506
+ field,
5507
+ parent
5508
+ )} must be a string when present.`,
5509
+ status
5510
+ );
5511
+ }
5512
+ return value;
5513
+ }
5514
+ function optionalNumber(source, field, status, parent) {
5515
+ const value = source[field];
5516
+ if (value == null) {
5517
+ return void 0;
5518
+ }
5519
+ if (typeof value !== "number" || !Number.isFinite(value)) {
5520
+ throw new OpensteerCloudError(
5521
+ "CLOUD_CONTRACT_MISMATCH",
5522
+ `Invalid cloud session create response: ${formatFieldPath(
5523
+ field,
5524
+ parent
5525
+ )} must be a finite number when present.`,
5526
+ status
5527
+ );
5528
+ }
5529
+ return value;
5530
+ }
5531
+ function requireSourceType(source, field, status, parent) {
5532
+ const value = source[field];
5533
+ if (value === "agent-thread" || value === "agent-run" || value === "local-cloud" || value === "manual") {
5534
+ return value;
5535
+ }
5536
+ throw new OpensteerCloudError(
5537
+ "CLOUD_CONTRACT_MISMATCH",
5538
+ `Invalid cloud session create response: ${formatFieldPath(
5539
+ field,
5540
+ parent
5541
+ )} must be one of "agent-thread", "agent-run", "local-cloud", or "manual".`,
5542
+ status
5543
+ );
5544
+ }
5545
+ function formatFieldPath(field, parent) {
5546
+ return parent ? `"${parent}.${field}"` : `"${field}"`;
5547
+ }
5548
+ function zeroImportResponse() {
5549
+ return {
5550
+ imported: 0,
5551
+ inserted: 0,
5552
+ updated: 0,
5553
+ skipped: 0
5554
+ };
5555
+ }
5556
+ function mergeImportResponse(first, second) {
5557
+ return {
5558
+ imported: first.imported + second.imported,
5559
+ inserted: first.inserted + second.inserted,
5560
+ updated: first.updated + second.updated,
5561
+ skipped: first.skipped + second.skipped
5562
+ };
5563
+ }
5564
+ async function parseHttpError(response) {
5565
+ let body = null;
5566
+ try {
5567
+ body = await response.json();
5568
+ } catch {
5569
+ body = null;
5570
+ }
5571
+ const code = typeof body?.code === "string" ? toCloudErrorCode(body.code) : "CLOUD_TRANSPORT_ERROR";
5572
+ const message = typeof body?.error === "string" ? body.error : `Cloud request failed with status ${response.status}.`;
5573
+ return new OpensteerCloudError(code, message, response.status, body?.details);
5574
+ }
5575
+ function toCloudErrorCode(code) {
5576
+ if (code === "CLOUD_AUTH_FAILED" || code === "CLOUD_SESSION_NOT_FOUND" || code === "CLOUD_SESSION_CLOSED" || code === "CLOUD_UNSUPPORTED_METHOD" || code === "CLOUD_INVALID_REQUEST" || code === "CLOUD_MODEL_NOT_ALLOWED" || code === "CLOUD_ACTION_FAILED" || code === "CLOUD_INTERNAL" || code === "CLOUD_CAPACITY_EXHAUSTED" || code === "CLOUD_RUNTIME_UNAVAILABLE" || code === "CLOUD_RUNTIME_MISMATCH" || code === "CLOUD_SESSION_STALE" || code === "CLOUD_CONTRACT_MISMATCH" || code === "CLOUD_CONTROL_PLANE_ERROR") {
5577
+ return code;
5578
+ }
5579
+ return "CLOUD_TRANSPORT_ERROR";
5580
+ }
5581
+
5582
+ // src/agent/errors.ts
5583
+ var OpensteerAgentError = class extends Error {
5584
+ constructor(message, cause) {
5585
+ super(message, { cause });
5586
+ this.name = "OpensteerAgentError";
5587
+ }
5588
+ };
5589
+ var OpensteerAgentConfigError = class extends OpensteerAgentError {
5590
+ constructor(message) {
5591
+ super(message);
5592
+ this.name = "OpensteerAgentConfigError";
5593
+ }
5594
+ };
5595
+ var OpensteerAgentProviderError = class extends OpensteerAgentError {
5596
+ constructor(message) {
5597
+ super(message);
5598
+ this.name = "OpensteerAgentProviderError";
5599
+ }
5600
+ };
5601
+ var OpensteerAgentExecutionError = class extends OpensteerAgentError {
5602
+ constructor(message, cause) {
5603
+ super(message, cause);
5604
+ this.name = "OpensteerAgentExecutionError";
5605
+ }
5606
+ };
5607
+ var OpensteerAgentBusyError = class extends OpensteerAgentError {
5608
+ constructor() {
5609
+ super("An OpenSteer agent execution is already in progress on this instance.");
5610
+ this.name = "OpensteerAgentBusyError";
5611
+ }
5612
+ };
5613
+ var OpensteerAgentActionError = class extends OpensteerAgentError {
5614
+ constructor(message, cause) {
5615
+ super(message, cause);
5616
+ this.name = "OpensteerAgentActionError";
5617
+ }
5618
+ };
5619
+ var OpensteerAgentApiError = class extends OpensteerAgentError {
5620
+ status;
5621
+ provider;
5622
+ constructor(provider, message, status, cause) {
5623
+ super(message, cause);
5624
+ this.name = "OpensteerAgentApiError";
5625
+ this.provider = provider;
5626
+ this.status = status;
5627
+ }
5628
+ };
5629
+
5630
+ // src/agent/model.ts
5631
+ var SUPPORTED_CUA_PROVIDERS = /* @__PURE__ */ new Set([
5632
+ "openai",
5633
+ "anthropic",
5634
+ "google"
5635
+ ]);
5636
+ function resolveCuaModelConfig(args) {
5637
+ const env = args.env || process.env;
5638
+ const source = resolveModelSource(args.agentConfig.model, args.fallbackModel);
5639
+ const parsed = parseProviderModel(source.modelName);
5640
+ if (!SUPPORTED_CUA_PROVIDERS.has(parsed.provider)) {
5641
+ throw new OpensteerAgentProviderError(
5642
+ `Unsupported CUA provider "${parsed.provider}". Supported providers: openai, anthropic, google.`
5643
+ );
5644
+ }
5645
+ const apiKey = resolveProviderApiKey(parsed.provider, source.options.apiKey, env);
5646
+ return {
5647
+ provider: parsed.provider,
5648
+ fullModelName: `${parsed.provider}/${parsed.modelName}`,
5649
+ providerModelName: parsed.modelName,
5650
+ apiKey,
5651
+ baseUrl: normalizeOptional(source.options.baseUrl),
5652
+ organization: normalizeOptional(source.options.organization),
5653
+ thinkingBudget: typeof source.options.thinkingBudget === "number" && Number.isFinite(source.options.thinkingBudget) ? source.options.thinkingBudget : void 0,
5654
+ environment: normalizeOptional(source.options.environment)
5655
+ };
5656
+ }
5657
+ function resolveModelSource(model, fallbackModel) {
5658
+ if (model && typeof model === "object") {
5659
+ const modelName2 = normalizeRequired(model.modelName, "agent.model.modelName");
5660
+ const { modelName: _, ...options } = model;
5661
+ return {
5662
+ modelName: modelName2,
5663
+ options
5664
+ };
5665
+ }
5666
+ const modelName = normalizeOptional(model) || normalizeOptional(fallbackModel);
5667
+ if (!modelName) {
5668
+ throw new OpensteerAgentConfigError(
5669
+ 'A CUA model is required. Pass agent.model (for example "openai/computer-use-preview").'
5670
+ );
5671
+ }
5672
+ return {
5673
+ modelName,
5674
+ options: {}
5675
+ };
5676
+ }
5677
+ function parseProviderModel(modelName) {
5678
+ const slash = modelName.indexOf("/");
5679
+ if (slash <= 0 || slash === modelName.length - 1) {
5680
+ throw new OpensteerAgentConfigError(
5681
+ `Invalid CUA model "${modelName}". Use "provider/model" format (for example "openai/computer-use-preview").`
5682
+ );
5683
+ }
5684
+ const providerRaw = modelName.slice(0, slash).trim().toLowerCase();
5685
+ const providerModelName = modelName.slice(slash + 1).trim();
5686
+ if (!providerModelName) {
5687
+ throw new OpensteerAgentConfigError(
5688
+ `Invalid CUA model "${modelName}". The model name segment after the provider cannot be empty.`
5689
+ );
5690
+ }
5691
+ if (providerRaw !== "openai" && providerRaw !== "anthropic" && providerRaw !== "google") {
5692
+ throw new OpensteerAgentProviderError(
5693
+ `Unsupported CUA provider "${providerRaw}". Supported providers: openai, anthropic, google.`
5694
+ );
5695
+ }
5696
+ return {
5697
+ provider: providerRaw,
5698
+ modelName: providerModelName
5699
+ };
5700
+ }
5701
+ function resolveProviderApiKey(provider, explicitApiKey, env) {
5702
+ const explicit = normalizeOptional(explicitApiKey);
5703
+ if (explicit) return explicit;
5704
+ if (provider === "openai") {
5705
+ const value = normalizeOptional(env.OPENAI_API_KEY);
5706
+ if (value) return value;
5707
+ throw new OpensteerAgentConfigError(
5708
+ "OpenAI CUA requires an API key via agent.model.apiKey or OPENAI_API_KEY."
5709
+ );
5710
+ }
5711
+ if (provider === "anthropic") {
5712
+ const value = normalizeOptional(env.ANTHROPIC_API_KEY);
5713
+ if (value) return value;
5714
+ throw new OpensteerAgentConfigError(
5715
+ "Anthropic CUA requires an API key via agent.model.apiKey or ANTHROPIC_API_KEY."
5716
+ );
5717
+ }
5718
+ const googleApiKey = normalizeOptional(env.GOOGLE_GENERATIVE_AI_API_KEY) || normalizeOptional(env.GEMINI_API_KEY) || normalizeOptional(env.GOOGLE_API_KEY);
5719
+ if (googleApiKey) return googleApiKey;
5720
+ throw new OpensteerAgentConfigError(
5721
+ "Google CUA requires an API key via agent.model.apiKey, GOOGLE_GENERATIVE_AI_API_KEY, GEMINI_API_KEY, or GOOGLE_API_KEY."
5722
+ );
5723
+ }
5724
+ function normalizeOptional(value) {
5725
+ if (typeof value !== "string") return void 0;
5726
+ const trimmed = value.trim();
5727
+ return trimmed.length ? trimmed : void 0;
5728
+ }
5729
+ function normalizeRequired(value, field) {
5730
+ const normalized = normalizeOptional(value);
5731
+ if (!normalized) {
5732
+ throw new OpensteerAgentConfigError(`${field} is required.`);
5733
+ }
5734
+ return normalized;
5735
+ }
5736
+
5737
+ // src/agent/clients/openai.ts
5738
+ import OpenAI from "openai";
5739
+
5740
+ // src/agent/client.ts
5741
+ var CuaClient = class {
5742
+ screenshotProvider = null;
5743
+ actionHandler = null;
5744
+ viewport = {
5745
+ width: 1288,
5746
+ height: 711
5747
+ };
5748
+ currentUrl = null;
5749
+ setViewport(width, height) {
5750
+ this.viewport = {
5751
+ width,
5752
+ height
5753
+ };
5754
+ }
5755
+ setCurrentUrl(url) {
5756
+ this.currentUrl = url;
5757
+ }
5758
+ setScreenshotProvider(provider) {
5759
+ this.screenshotProvider = provider;
5760
+ }
5761
+ setActionHandler(handler) {
5762
+ this.actionHandler = handler;
5763
+ }
5764
+ getScreenshotProvider() {
5765
+ if (!this.screenshotProvider) {
5766
+ throw new Error("CUA screenshot provider is not initialized.");
5767
+ }
5768
+ return this.screenshotProvider;
5769
+ }
5770
+ getActionHandler() {
5771
+ if (!this.actionHandler) {
5772
+ throw new Error("CUA action handler is not initialized.");
5773
+ }
5774
+ return this.actionHandler;
5775
+ }
5776
+ };
5777
+ function normalizeExecuteOptions(instructionOrOptions) {
5778
+ if (typeof instructionOrOptions === "string") {
5779
+ return {
5780
+ instruction: normalizeInstruction(instructionOrOptions)
5781
+ };
5782
+ }
5783
+ if (!instructionOrOptions || typeof instructionOrOptions !== "object" || Array.isArray(instructionOrOptions)) {
5784
+ throw new OpensteerAgentExecutionError(
5785
+ "agent.execute(...) expects either a string instruction or an options object."
5786
+ );
5787
+ }
5788
+ const normalized = {
5789
+ instruction: normalizeInstruction(instructionOrOptions.instruction)
5790
+ };
5791
+ if (instructionOrOptions.maxSteps !== void 0) {
5792
+ normalized.maxSteps = normalizeMaxSteps(instructionOrOptions.maxSteps);
5793
+ }
5794
+ if (instructionOrOptions.highlightCursor !== void 0) {
5795
+ if (typeof instructionOrOptions.highlightCursor !== "boolean") {
5796
+ throw new OpensteerAgentExecutionError(
5797
+ 'agent.execute(...) "highlightCursor" must be a boolean when provided.'
5798
+ );
5799
+ }
5800
+ normalized.highlightCursor = instructionOrOptions.highlightCursor;
5801
+ }
5802
+ return normalized;
5803
+ }
5804
+ function normalizeInstruction(instruction) {
5805
+ if (typeof instruction !== "string") {
5806
+ throw new OpensteerAgentExecutionError(
5807
+ 'agent.execute(...) requires a non-empty "instruction" string.'
5808
+ );
5809
+ }
5810
+ const normalized = instruction.trim();
5811
+ if (!normalized) {
5812
+ throw new OpensteerAgentExecutionError(
5813
+ 'agent.execute(...) requires a non-empty "instruction" string.'
5814
+ );
5815
+ }
5816
+ return normalized;
5817
+ }
5818
+ function normalizeMaxSteps(maxSteps) {
5819
+ if (typeof maxSteps !== "number" || !Number.isInteger(maxSteps) || maxSteps <= 0) {
5820
+ throw new OpensteerAgentExecutionError(
5821
+ 'agent.execute(...) "maxSteps" must be a positive integer when provided.'
5822
+ );
5823
+ }
5824
+ return maxSteps;
5825
+ }
5826
+
5827
+ // src/agent/clients/openai.ts
5828
+ var OpenAICuaClient = class extends CuaClient {
5829
+ client;
5830
+ modelConfig;
5831
+ constructor(modelConfig) {
5832
+ super();
5833
+ this.modelConfig = modelConfig;
5834
+ this.client = new OpenAI({
5835
+ apiKey: modelConfig.apiKey,
5836
+ baseURL: modelConfig.baseUrl,
5837
+ organization: modelConfig.organization
5838
+ });
5839
+ }
5840
+ async execute(input) {
5841
+ const actions = [];
5842
+ let finalMessage = "";
5843
+ let completed = false;
5844
+ let step = 0;
5845
+ let previousResponseId;
5846
+ let nextInputItems = [
5847
+ {
5848
+ role: "system",
5849
+ content: input.systemPrompt
5850
+ },
5851
+ {
5852
+ role: "user",
5853
+ content: input.instruction
5854
+ }
5855
+ ];
5856
+ let totalInputTokens = 0;
5857
+ let totalOutputTokens = 0;
5858
+ let totalReasoningTokens = 0;
5859
+ let totalInferenceTimeMs = 0;
5860
+ while (!completed && step < input.maxSteps) {
5861
+ const startedAt = Date.now();
5862
+ const response = await this.getAction(nextInputItems, previousResponseId);
5863
+ totalInferenceTimeMs += Date.now() - startedAt;
5864
+ totalInputTokens += toNumber(response.usage?.input_tokens);
5865
+ totalOutputTokens += toNumber(response.usage?.output_tokens);
5866
+ totalReasoningTokens += toNumber(response.usage?.output_tokens_details?.reasoning_tokens) || toNumber(toRecord(response.usage).reasoning_tokens);
5867
+ previousResponseId = normalizeString(response.id) || previousResponseId;
5868
+ const stepResult = await this.processResponse(response.output);
5869
+ actions.push(...stepResult.actions);
5870
+ nextInputItems = stepResult.nextInputItems;
5871
+ completed = stepResult.completed;
5872
+ if (stepResult.message) {
5873
+ finalMessage = stepResult.message;
5874
+ }
5875
+ step += 1;
5876
+ }
5877
+ return {
5878
+ success: completed,
5879
+ completed,
5880
+ message: finalMessage,
5881
+ actions,
5882
+ usage: {
5883
+ inputTokens: totalInputTokens,
5884
+ outputTokens: totalOutputTokens,
5885
+ reasoningTokens: totalReasoningTokens > 0 ? totalReasoningTokens : void 0,
5886
+ inferenceTimeMs: totalInferenceTimeMs
5887
+ }
5888
+ };
5889
+ }
5890
+ async getAction(inputItems, previousResponseId) {
5891
+ const request = {
5892
+ model: this.modelConfig.providerModelName,
5893
+ tools: [
5894
+ {
5895
+ type: "computer_use_preview",
5896
+ display_width: this.viewport.width,
5897
+ display_height: this.viewport.height,
5898
+ environment: "browser"
5899
+ }
5900
+ ],
5901
+ input: inputItems,
5902
+ truncation: "auto",
5903
+ ...previousResponseId ? { previous_response_id: previousResponseId } : {}
5904
+ };
5905
+ try {
5906
+ return await this.client.responses.create(request);
5907
+ } catch (error) {
5908
+ throw mapOpenAiApiError(error);
5909
+ }
5910
+ }
5911
+ async processResponse(output) {
5912
+ const actions = [];
5913
+ const nextInputItems = [];
5914
+ const messageParts = [];
5915
+ let hasComputerAction = false;
5916
+ for (const item of output) {
5917
+ if (item.type === "computer_call") {
5918
+ hasComputerAction = true;
5919
+ const action = toAgentAction(item.action);
5920
+ actions.push(action);
5921
+ let actionError;
5922
+ try {
5923
+ await this.getActionHandler()(action);
5924
+ } catch (error) {
5925
+ actionError = error instanceof Error ? error.message : String(error);
5926
+ }
5927
+ const outputItem = {
5928
+ type: "computer_call_output",
5929
+ call_id: item.call_id
5930
+ };
5931
+ const safetyChecks = item.pending_safety_checks.length ? item.pending_safety_checks : void 0;
5932
+ const screenshotDataUrl = await this.captureScreenshotDataUrl();
5933
+ const outputPayload = {
5934
+ type: "input_image",
5935
+ image_url: screenshotDataUrl
5936
+ };
5937
+ if (this.currentUrl) {
5938
+ outputPayload.current_url = this.currentUrl;
5939
+ }
5940
+ if (actionError) {
5941
+ outputPayload.error = actionError;
5942
+ }
5943
+ outputItem.output = outputPayload;
5944
+ if (safetyChecks) {
5945
+ outputItem.acknowledged_safety_checks = safetyChecks;
5946
+ }
5947
+ nextInputItems.push(outputItem);
5948
+ }
5949
+ if (item.type === "message") {
5950
+ for (const content of item.content) {
5951
+ if (content.type === "output_text") {
5952
+ messageParts.push(content.text);
5953
+ }
5954
+ }
5955
+ }
5956
+ }
5957
+ return {
5958
+ actions,
5959
+ nextInputItems,
5960
+ completed: !hasComputerAction,
5961
+ message: messageParts.join("\n").trim()
5962
+ };
5963
+ }
5964
+ async captureScreenshotDataUrl() {
5965
+ const base64 = await this.getScreenshotProvider()();
5966
+ return `data:image/png;base64,${base64}`;
5967
+ }
5968
+ };
5969
+ function toAgentAction(action) {
5970
+ const actionRecord = toRecord(action);
5971
+ return {
5972
+ type: normalizeString(actionRecord.type) || "unknown",
5973
+ ...actionRecord
5974
+ };
5975
+ }
5976
+ function mapOpenAiApiError(error) {
5977
+ const errorRecord = toRecord(error);
5978
+ const nestedError = toRecord(errorRecord.error);
5979
+ const status = toNumber(errorRecord.status);
5980
+ const message = normalizeString(nestedError.message) || (error instanceof Error ? error.message : String(error));
5981
+ return new OpensteerAgentApiError("openai", message, status, error);
5982
+ }
5983
+ function toRecord(value) {
5984
+ return value && typeof value === "object" ? value : {};
5985
+ }
5986
+ function toNumber(value) {
5987
+ return typeof value === "number" && Number.isFinite(value) ? value : 0;
5988
+ }
5989
+ function normalizeString(value) {
5990
+ if (typeof value !== "string") return void 0;
5991
+ const normalized = value.trim();
5992
+ return normalized.length ? normalized : void 0;
5993
+ }
5994
+
5995
+ // src/agent/clients/anthropic.ts
5996
+ import Anthropic from "@anthropic-ai/sdk";
5997
+ var AnthropicCuaClient = class extends CuaClient {
5998
+ modelConfig;
5999
+ client;
6000
+ constructor(modelConfig) {
6001
+ super();
6002
+ this.modelConfig = modelConfig;
6003
+ this.client = new Anthropic({
6004
+ apiKey: modelConfig.apiKey,
6005
+ baseURL: modelConfig.baseUrl
6006
+ });
6007
+ }
6008
+ async execute(input) {
6009
+ const actions = [];
6010
+ let finalMessage = "";
6011
+ let completed = false;
6012
+ let step = 0;
6013
+ const messages = [
6014
+ {
6015
+ role: "user",
6016
+ content: input.instruction
6017
+ }
6018
+ ];
6019
+ let totalInputTokens = 0;
6020
+ let totalOutputTokens = 0;
6021
+ let totalReasoningTokens = 0;
6022
+ let totalInferenceTimeMs = 0;
6023
+ while (!completed && step < input.maxSteps) {
6024
+ const startedAt = Date.now();
6025
+ const response = await this.getAction(messages, input.systemPrompt);
6026
+ totalInferenceTimeMs += Date.now() - startedAt;
6027
+ totalInputTokens += toNumber2(response?.usage?.input_tokens);
6028
+ totalOutputTokens += toNumber2(response?.usage?.output_tokens);
6029
+ totalReasoningTokens += toNumber2(toRecord2(response.usage).reasoning_tokens);
6030
+ const content = response.content.map((item) => toRecord2(item));
6031
+ const toolUseItems = content.filter(
6032
+ (item) => item.type === "tool_use" && item.name === "computer"
6033
+ );
6034
+ const message = extractTextMessage(content);
6035
+ if (message) {
6036
+ finalMessage = message;
6037
+ }
6038
+ messages.push({
6039
+ role: "assistant",
6040
+ content
6041
+ });
6042
+ if (!toolUseItems.length) {
6043
+ completed = true;
6044
+ } else {
6045
+ const stepResult = await this.processToolUseItems(toolUseItems);
6046
+ actions.push(...stepResult.actions);
6047
+ messages.push({
6048
+ role: "user",
6049
+ content: stepResult.toolResults
6050
+ });
6051
+ }
6052
+ step += 1;
6053
+ }
6054
+ return {
6055
+ success: completed,
6056
+ completed,
6057
+ message: finalMessage,
6058
+ actions,
6059
+ usage: {
6060
+ inputTokens: totalInputTokens,
6061
+ outputTokens: totalOutputTokens,
6062
+ reasoningTokens: totalReasoningTokens > 0 ? totalReasoningTokens : void 0,
6063
+ inferenceTimeMs: totalInferenceTimeMs
6064
+ }
6065
+ };
6066
+ }
6067
+ async processToolUseItems(items) {
6068
+ const actions = [];
6069
+ const toolResults = [];
6070
+ for (const item of items) {
6071
+ const toolUseId = normalizeString2(item.id);
6072
+ const input = item.input && typeof item.input === "object" ? item.input : {};
6073
+ const action = convertAnthropicAction(input);
6074
+ actions.push(action);
6075
+ let errorMessage2;
6076
+ try {
6077
+ await this.getActionHandler()(action);
6078
+ } catch (error) {
6079
+ errorMessage2 = error instanceof Error ? error.message : String(error);
6080
+ }
6081
+ let imageBlock = null;
6082
+ try {
6083
+ const screenshot = await this.getScreenshotProvider()();
6084
+ imageBlock = {
6085
+ type: "image",
6086
+ source: {
6087
+ type: "base64",
6088
+ media_type: "image/png",
6089
+ data: screenshot
6090
+ }
6091
+ };
6092
+ } catch (error) {
6093
+ errorMessage2 = errorMessage2 || (error instanceof Error ? error.message : String(error));
6094
+ }
6095
+ const resultContent = [];
6096
+ if (imageBlock) {
6097
+ resultContent.push(imageBlock);
6098
+ }
6099
+ if (this.currentUrl) {
6100
+ resultContent.push({
6101
+ type: "text",
6102
+ text: `Current URL: ${this.currentUrl}`
6103
+ });
6104
+ }
6105
+ if (errorMessage2) {
6106
+ resultContent.push({
6107
+ type: "text",
6108
+ text: `Error: ${errorMessage2}`
6109
+ });
6110
+ }
6111
+ toolResults.push({
6112
+ type: "tool_result",
6113
+ tool_use_id: toolUseId || "unknown_tool_use_id",
6114
+ content: resultContent.length > 0 ? resultContent : [
6115
+ {
6116
+ type: "text",
6117
+ text: "Action completed."
6118
+ }
6119
+ ]
6120
+ });
6121
+ }
6122
+ return {
6123
+ actions,
6124
+ toolResults
6125
+ };
6126
+ }
6127
+ async getAction(messages, systemPrompt) {
6128
+ const toolVersion = requiresNewestAnthropicToolVersion(
6129
+ this.modelConfig.providerModelName
6130
+ ) ? "computer_20251124" : "computer_20250124";
6131
+ const betaFlag = toolVersion === "computer_20251124" ? "computer-use-2025-11-24" : "computer-use-2025-01-24";
6132
+ const request = {
6133
+ model: this.modelConfig.providerModelName,
6134
+ max_tokens: 4096,
6135
+ system: systemPrompt,
6136
+ messages,
6137
+ tools: [
6138
+ {
6139
+ type: toolVersion,
6140
+ name: "computer",
6141
+ display_width_px: this.viewport.width,
6142
+ display_height_px: this.viewport.height,
6143
+ display_number: 1
6144
+ }
6145
+ ],
6146
+ betas: [betaFlag]
6147
+ };
6148
+ if (typeof this.modelConfig.thinkingBudget === "number") {
6149
+ request.thinking = {
6150
+ type: "enabled",
6151
+ budget_tokens: this.modelConfig.thinkingBudget
6152
+ };
6153
+ }
6154
+ try {
6155
+ return await this.client.beta.messages.create(
6156
+ request
6157
+ );
6158
+ } catch (error) {
6159
+ throw mapAnthropicApiError(error);
6160
+ }
6161
+ }
6162
+ };
6163
+ function convertAnthropicAction(input) {
6164
+ const type = normalizeString2(input.action) || "unknown";
6165
+ if (type === "left_click") {
6166
+ const coordinates = resolveCoordinates(input, type);
6167
+ return {
6168
+ type: "click",
6169
+ x: coordinates.x,
6170
+ y: coordinates.y,
6171
+ button: "left"
6172
+ };
6173
+ }
6174
+ if (type === "double_click" || type === "doubleClick") {
6175
+ const coordinates = resolveCoordinates(input, type);
6176
+ return {
6177
+ type: "double_click",
6178
+ x: coordinates.x,
6179
+ y: coordinates.y
6180
+ };
6181
+ }
6182
+ if (type === "drag" || type === "left_click_drag") {
6183
+ const start = resolveCoordinateArray(
6184
+ input.start_coordinate,
6185
+ type,
6186
+ "start_coordinate"
6187
+ );
6188
+ const end = resolveCoordinates(input, type);
6189
+ return {
6190
+ type: "drag",
6191
+ path: [start, end]
6192
+ };
6193
+ }
6194
+ if (type === "scroll") {
6195
+ const coordinates = resolveCoordinates(input, type);
6196
+ const direction = normalizeScrollDirection(input.scroll_direction, type);
6197
+ const amount = resolvePositiveNumber(
6198
+ input.scroll_amount,
6199
+ type,
6200
+ "scroll_amount"
6201
+ );
6202
+ const magnitude = Math.max(1, amount) * 100;
6203
+ let scrollX = 0;
6204
+ let scrollY = 0;
6205
+ if (direction === "up") scrollY = -magnitude;
6206
+ if (direction === "down") scrollY = magnitude;
6207
+ if (direction === "left") scrollX = -magnitude;
6208
+ if (direction === "right") scrollX = magnitude;
6209
+ return {
6210
+ type: "scroll",
6211
+ x: coordinates.x,
6212
+ y: coordinates.y,
6213
+ scrollX,
6214
+ scrollY
6215
+ };
6216
+ }
6217
+ if (type === "keypress" || type === "key") {
6218
+ const keyText = normalizeRequiredString(
6219
+ input.text,
6220
+ `Anthropic action "${type}" requires a non-empty text value.`
6221
+ );
6222
+ return {
6223
+ type: "keypress",
6224
+ keys: [keyText]
6225
+ };
6226
+ }
6227
+ if (type === "move") {
6228
+ const coordinates = resolveCoordinates(input, type);
6229
+ return {
6230
+ type: "move",
6231
+ x: coordinates.x,
6232
+ y: coordinates.y
6233
+ };
6234
+ }
6235
+ if (type === "click") {
6236
+ const coordinates = resolveCoordinates(input, type);
6237
+ return {
6238
+ type: "click",
6239
+ x: coordinates.x,
6240
+ y: coordinates.y,
6241
+ button: normalizeMouseButton(input.button)
6242
+ };
6243
+ }
6244
+ if (type === "type") {
6245
+ const coordinates = resolveCoordinates(input, type);
6246
+ return {
6247
+ type: "type",
6248
+ text: normalizeRequiredString(
6249
+ input.text,
6250
+ `Anthropic action "${type}" requires a non-empty text value.`
6251
+ ),
6252
+ x: coordinates.x,
6253
+ y: coordinates.y
6254
+ };
6255
+ }
6256
+ return {
6257
+ type,
6258
+ ...input
6259
+ };
6260
+ }
6261
+ function extractTextMessage(content) {
6262
+ const texts = content.filter((item) => item.type === "text" && typeof item.text === "string").map((item) => String(item.text));
6263
+ return texts.join("\n").trim();
6264
+ }
6265
+ function requiresNewestAnthropicToolVersion(modelName) {
6266
+ return modelName === "claude-opus-4-6" || modelName === "claude-sonnet-4-6" || modelName === "claude-opus-4-5-20251101";
6267
+ }
6268
+ function normalizeString2(value) {
6269
+ if (typeof value !== "string") return void 0;
6270
+ const normalized = value.trim();
6271
+ return normalized.length ? normalized : void 0;
6272
+ }
6273
+ function normalizeRequiredString(value, errorMessage2) {
6274
+ const normalized = normalizeString2(value);
6275
+ if (!normalized) {
6276
+ throw new OpensteerAgentActionError(errorMessage2);
6277
+ }
6278
+ return normalized;
6279
+ }
6280
+ function toNumber2(value) {
6281
+ return typeof value === "number" && Number.isFinite(value) ? value : 0;
6282
+ }
6283
+ function arrayNumber(value) {
6284
+ if (!Array.isArray(value)) return [NaN, NaN];
6285
+ return [
6286
+ typeof value[0] === "number" ? value[0] : NaN,
6287
+ typeof value[1] === "number" ? value[1] : NaN
6288
+ ];
6289
+ }
6290
+ function resolveCoordinates(input, actionType) {
6291
+ const [xFromCoordinate, yFromCoordinate] = arrayNumber(input.coordinate);
6292
+ const xFromFallback = toFiniteNumber(input.x);
6293
+ const yFromFallback = toFiniteNumber(input.y);
6294
+ const x = Number.isFinite(xFromCoordinate) ? xFromCoordinate : xFromFallback;
6295
+ const y = Number.isFinite(yFromCoordinate) ? yFromCoordinate : yFromFallback;
6296
+ if (x == null || y == null) {
6297
+ throw new OpensteerAgentActionError(
6298
+ `Anthropic action "${actionType}" requires numeric x/y coordinates.`
6299
+ );
6300
+ }
6301
+ return { x, y };
6302
+ }
6303
+ function resolveCoordinateArray(value, actionType, field) {
6304
+ const [x, y] = arrayNumber(value);
6305
+ if (!Number.isFinite(x) || !Number.isFinite(y)) {
6306
+ throw new OpensteerAgentActionError(
6307
+ `Anthropic action "${actionType}" requires numeric "${field}" coordinates.`
6308
+ );
6309
+ }
6310
+ return { x, y };
6311
+ }
6312
+ function resolvePositiveNumber(value, actionType, field) {
6313
+ const number = toFiniteNumber(value);
6314
+ if (number == null || number <= 0) {
6315
+ throw new OpensteerAgentActionError(
6316
+ `Anthropic action "${actionType}" requires a positive numeric "${field}" value.`
6317
+ );
6318
+ }
6319
+ return number;
6320
+ }
6321
+ function normalizeScrollDirection(value, actionType) {
6322
+ const direction = normalizeString2(value);
6323
+ if (direction === "up" || direction === "down" || direction === "left" || direction === "right") {
6324
+ return direction;
6325
+ }
6326
+ throw new OpensteerAgentActionError(
6327
+ `Anthropic action "${actionType}" requires "scroll_direction" to be one of: up, down, left, right.`
6328
+ );
6329
+ }
6330
+ function normalizeMouseButton(value) {
6331
+ const button = normalizeRequiredString(
6332
+ value,
6333
+ 'Anthropic action "click" requires a non-empty "button" value.'
6334
+ ).toLowerCase();
6335
+ if (button === "left" || button === "right" || button === "middle") {
6336
+ return button;
6337
+ }
6338
+ throw new OpensteerAgentActionError(
6339
+ `Anthropic action "click" has unsupported button "${button}".`
6340
+ );
6341
+ }
6342
+ function toFiniteNumber(value) {
6343
+ if (typeof value === "number" && Number.isFinite(value)) {
6344
+ return value;
6345
+ }
6346
+ return null;
6347
+ }
6348
+ function mapAnthropicApiError(error) {
6349
+ const errorRecord = toRecord2(error);
6350
+ const nestedError = toRecord2(errorRecord.error);
6351
+ const status = typeof errorRecord.status === "number" ? errorRecord.status : void 0;
6352
+ const message = normalizeString2(nestedError.message) || (error instanceof Error ? error.message : String(error));
6353
+ return new OpensteerAgentApiError("anthropic", message, status, error);
6354
+ }
6355
+ function toRecord2(value) {
6356
+ return value && typeof value === "object" ? value : {};
6357
+ }
6358
+
6359
+ // src/agent/clients/google.ts
6360
+ import {
6361
+ Environment,
6362
+ GoogleGenAI
6363
+ } from "@google/genai";
6364
+
6365
+ // src/agent/coords.ts
6366
+ var DEFAULT_CUA_VIEWPORT = {
6367
+ width: 1288,
6368
+ height: 711
6369
+ };
6370
+ function normalizeGoogleCoordinates(x, y, viewport) {
6371
+ const clampedX = Math.min(999, Math.max(0, x));
6372
+ const clampedY = Math.min(999, Math.max(0, y));
6373
+ return {
6374
+ x: Math.floor(clampedX / 1e3 * viewport.width),
6375
+ y: Math.floor(clampedY / 1e3 * viewport.height)
6376
+ };
6377
+ }
6378
+ function maybeNormalizeCoordinates(provider, x, y, viewport) {
6379
+ if (provider === "google") {
6380
+ return normalizeGoogleCoordinates(x, y, viewport);
6381
+ }
6382
+ return { x, y };
6383
+ }
6384
+
6385
+ // src/agent/key-mapping.ts
6386
+ var KEY_MAP = {
6387
+ ENTER: "Enter",
6388
+ RETURN: "Enter",
6389
+ ESCAPE: "Escape",
6390
+ ESC: "Escape",
6391
+ BACKSPACE: "Backspace",
6392
+ TAB: "Tab",
6393
+ SPACE: " ",
6394
+ DELETE: "Delete",
6395
+ DEL: "Delete",
6396
+ ARROWUP: "ArrowUp",
6397
+ ARROWDOWN: "ArrowDown",
6398
+ ARROWLEFT: "ArrowLeft",
6399
+ ARROWRIGHT: "ArrowRight",
6400
+ ARROW_UP: "ArrowUp",
6401
+ ARROW_DOWN: "ArrowDown",
6402
+ ARROW_LEFT: "ArrowLeft",
6403
+ ARROW_RIGHT: "ArrowRight",
6404
+ UP: "ArrowUp",
6405
+ DOWN: "ArrowDown",
6406
+ LEFT: "ArrowLeft",
6407
+ RIGHT: "ArrowRight",
6408
+ SHIFT: "Shift",
6409
+ CONTROL: "Control",
6410
+ CTRL: "Control",
6411
+ ALT: "Alt",
6412
+ OPTION: "Alt",
6413
+ META: "Meta",
6414
+ COMMAND: "Meta",
6415
+ CMD: "Meta",
6416
+ SUPER: "Meta",
6417
+ WINDOWS: "Meta",
6418
+ WIN: "Meta",
6419
+ HOME: "Home",
6420
+ END: "End",
6421
+ PAGEUP: "PageUp",
6422
+ PAGEDOWN: "PageDown",
6423
+ PAGE_UP: "PageUp",
6424
+ PAGE_DOWN: "PageDown",
6425
+ PGUP: "PageUp",
6426
+ PGDN: "PageDown",
6427
+ CONTROLORMETA: process.platform === "darwin" ? "Meta" : "Control"
6428
+ };
6429
+ function mapKeyToPlaywright(key) {
6430
+ const normalized = key.trim();
6431
+ if (!normalized) return normalized;
6432
+ const mapped = KEY_MAP[normalized.toUpperCase()];
6433
+ return mapped || normalized;
6434
+ }
6435
+
6436
+ // src/agent/clients/google.ts
6437
+ var GoogleCuaClient = class extends CuaClient {
6438
+ modelConfig;
6439
+ client;
6440
+ history = [];
6441
+ constructor(modelConfig) {
6442
+ super();
6443
+ this.modelConfig = modelConfig;
6444
+ this.client = new GoogleGenAI({
6445
+ apiKey: modelConfig.apiKey,
6446
+ ...modelConfig.baseUrl ? { httpOptions: { baseUrl: modelConfig.baseUrl } } : {}
6447
+ });
6448
+ }
6449
+ async execute(input) {
6450
+ this.history = [
6451
+ {
6452
+ role: "user",
6453
+ parts: [
6454
+ {
6455
+ text: `System prompt: ${input.systemPrompt}`
6456
+ }
6457
+ ]
6458
+ },
6459
+ {
6460
+ role: "user",
6461
+ parts: [
6462
+ {
6463
+ text: input.instruction
6464
+ }
6465
+ ]
6466
+ }
6467
+ ];
6468
+ const actions = [];
6469
+ let finalMessage = "";
6470
+ let completed = false;
6471
+ let step = 0;
6472
+ let totalInputTokens = 0;
6473
+ let totalOutputTokens = 0;
6474
+ let totalInferenceTimeMs = 0;
6475
+ while (!completed && step < input.maxSteps) {
6476
+ const startedAt = Date.now();
6477
+ const response = await this.generateContent();
6478
+ totalInferenceTimeMs += Date.now() - startedAt;
6479
+ const usageMetadata = response.usageMetadata || {};
6480
+ totalInputTokens += toFiniteNumberOrZero(usageMetadata.promptTokenCount);
6481
+ totalOutputTokens += toFiniteNumberOrZero(
6482
+ usageMetadata.candidatesTokenCount
6483
+ );
6484
+ const candidate = Array.isArray(response.candidates) ? response.candidates[0] : null;
6485
+ const content = candidate && typeof candidate === "object" && candidate.content && typeof candidate.content === "object" ? candidate.content : null;
6486
+ const parts = content && Array.isArray(content.parts) ? content.parts : [];
6487
+ const finishReason = extractFinishReason(candidate);
6488
+ if (content) {
6489
+ this.history.push({
6490
+ role: "model",
6491
+ parts
6492
+ });
6493
+ }
6494
+ const messageParts = [];
6495
+ const functionCalls = [];
6496
+ for (const part of parts) {
6497
+ if (typeof part.text === "string") {
6498
+ messageParts.push(part.text);
6499
+ }
6500
+ if (part.functionCall && typeof part.functionCall === "object") {
6501
+ functionCalls.push(part.functionCall);
6502
+ }
6503
+ }
6504
+ if (messageParts.length) {
6505
+ finalMessage = messageParts.join("\n").trim();
6506
+ }
6507
+ if (!functionCalls.length) {
6508
+ completed = isSuccessfulGoogleFinishReason(finishReason);
6509
+ if (!completed && !finalMessage) {
6510
+ finalMessage = `Google CUA stopped with finish reason: ${finishReason || "unknown"}.`;
6511
+ }
6512
+ } else {
6513
+ const functionResponses = [];
6514
+ for (const functionCall of functionCalls) {
6515
+ const mappedActions = mapGoogleFunctionCallToActions(
6516
+ functionCall,
6517
+ this.viewport
6518
+ );
6519
+ actions.push(...mappedActions);
6520
+ let executionError;
6521
+ for (const mappedAction of mappedActions) {
6522
+ try {
6523
+ await this.getActionHandler()(mappedAction);
6524
+ } catch (error) {
6525
+ executionError = error instanceof Error ? error.message : String(error);
6526
+ }
6527
+ }
6528
+ const screenshotBase64 = await this.getScreenshotProvider()();
6529
+ const responsePayload = {
6530
+ url: this.currentUrl || ""
6531
+ };
6532
+ const args = functionCall.args && typeof functionCall.args === "object" ? functionCall.args : null;
6533
+ if (args && args.safety_decision !== void 0) {
6534
+ responsePayload.safety_acknowledgement = "true";
6535
+ }
6536
+ if (executionError) {
6537
+ responsePayload.error = executionError;
6538
+ }
6539
+ functionResponses.push({
6540
+ functionResponse: {
6541
+ name: typeof functionCall.name === "string" && functionCall.name || "computer_use",
6542
+ response: responsePayload,
6543
+ parts: [
6544
+ {
6545
+ inlineData: {
6546
+ mimeType: "image/png",
6547
+ data: screenshotBase64
6548
+ }
6549
+ }
6550
+ ]
6551
+ }
6552
+ });
6553
+ }
6554
+ if (functionResponses.length) {
6555
+ this.history.push({
6556
+ role: "user",
6557
+ parts: functionResponses
6558
+ });
6559
+ }
6560
+ if (finishReason && finishReason !== "STOP") {
6561
+ throw new OpensteerAgentActionError(
6562
+ `Google CUA returned function calls with terminal finish reason "${finishReason}".`
6563
+ );
6564
+ }
6565
+ completed = false;
6566
+ }
6567
+ step += 1;
6568
+ }
6569
+ return {
6570
+ success: completed,
6571
+ completed,
6572
+ message: finalMessage,
6573
+ actions,
6574
+ usage: {
6575
+ inputTokens: totalInputTokens,
6576
+ outputTokens: totalOutputTokens,
6577
+ inferenceTimeMs: totalInferenceTimeMs
6578
+ }
6579
+ };
6580
+ }
6581
+ async generateContent() {
6582
+ const params = {
6583
+ model: this.modelConfig.providerModelName,
6584
+ contents: this.history,
6585
+ config: {
6586
+ temperature: 1,
6587
+ topP: 0.95,
6588
+ topK: 40,
6589
+ maxOutputTokens: 8192,
6590
+ tools: [
6591
+ {
6592
+ computerUse: {
6593
+ environment: resolveGoogleEnvironment(
6594
+ this.modelConfig.environment
6595
+ )
6596
+ }
6597
+ }
6598
+ ]
6599
+ }
6600
+ };
6601
+ try {
6602
+ return await this.client.models.generateContent(params);
6603
+ } catch (error) {
6604
+ throw mapGoogleApiError(error);
6605
+ }
6606
+ }
6607
+ };
6608
+ function mapGoogleFunctionCallToActions(functionCall, viewport) {
6609
+ const name = normalizeString3(functionCall.name);
6610
+ const args = functionCall.args && typeof functionCall.args === "object" ? functionCall.args : {};
6611
+ if (!name) {
6612
+ throw new OpensteerAgentActionError(
6613
+ 'Google CUA function call is missing a "name" value.'
6614
+ );
6615
+ }
6616
+ switch (name) {
6617
+ case "click_at": {
6618
+ const coordinates = normalizeCoordinates(args, viewport, name);
6619
+ return [
6620
+ {
6621
+ type: "click",
6622
+ x: coordinates.x,
6623
+ y: coordinates.y,
6624
+ button: normalizeString3(args.button) || "left"
6625
+ }
6626
+ ];
6627
+ }
6628
+ case "type_text_at": {
6629
+ const coordinates = normalizeCoordinates(args, viewport, name);
6630
+ const clearBeforeTyping = typeof args.clear_before_typing === "boolean" ? args.clear_before_typing : true;
6631
+ const pressEnter = typeof args.press_enter === "boolean" ? args.press_enter : false;
6632
+ const text = normalizeRequiredString2(
6633
+ args.text,
6634
+ 'Google action "type_text_at" requires a non-empty "text" value.'
6635
+ );
6636
+ const actions = [
6637
+ {
6638
+ type: "click",
6639
+ x: coordinates.x,
6640
+ y: coordinates.y,
6641
+ button: "left"
6642
+ }
6643
+ ];
6644
+ if (clearBeforeTyping) {
6645
+ actions.push({
6646
+ type: "keypress",
6647
+ keys: ["ControlOrMeta+A"]
6648
+ });
6649
+ actions.push({
6650
+ type: "keypress",
6651
+ keys: ["Backspace"]
6652
+ });
6653
+ }
6654
+ actions.push({
6655
+ type: "type",
6656
+ text,
6657
+ x: coordinates.x,
6658
+ y: coordinates.y
6659
+ });
6660
+ if (pressEnter) {
6661
+ actions.push({
6662
+ type: "keypress",
6663
+ keys: ["Enter"]
6664
+ });
6665
+ }
6666
+ return actions;
6667
+ }
6668
+ case "key_combination": {
6669
+ const keysRaw = normalizeRequiredString2(
6670
+ args.keys,
6671
+ 'Google action "key_combination" requires a non-empty "keys" value.'
6672
+ );
6673
+ const keys = keysRaw.split("+").map((part) => part.trim()).filter(Boolean).map((part) => mapKeyToPlaywright(part));
6674
+ if (!keys.length) {
6675
+ throw new OpensteerAgentActionError(
6676
+ 'Google action "key_combination" did not produce any key tokens.'
6677
+ );
6678
+ }
6679
+ return [
6680
+ {
6681
+ type: "keypress",
6682
+ keys
6683
+ }
6684
+ ];
6685
+ }
6686
+ case "scroll_document": {
6687
+ const direction = normalizeVerticalDirection(
6688
+ args.direction,
6689
+ "scroll_document"
6690
+ );
6691
+ return [
6692
+ {
6693
+ type: "keypress",
6694
+ keys: [direction === "up" ? "PageUp" : "PageDown"]
6695
+ }
6696
+ ];
6697
+ }
6698
+ case "scroll_at": {
6699
+ const coordinates = normalizeCoordinates(args, viewport, name);
6700
+ const direction = normalizeScrollDirection2(args.direction, "scroll_at");
6701
+ const magnitude = parsePositiveNumber(
6702
+ args.magnitude,
6703
+ "scroll_at",
6704
+ "magnitude"
6705
+ );
6706
+ let scrollX = 0;
6707
+ let scrollY = 0;
6708
+ if (direction === "up") scrollY = -magnitude;
6709
+ if (direction === "down") scrollY = magnitude;
6710
+ if (direction === "left") scrollX = -magnitude;
6711
+ if (direction === "right") scrollX = magnitude;
6712
+ return [
6713
+ {
6714
+ type: "scroll",
6715
+ x: coordinates.x,
6716
+ y: coordinates.y,
6717
+ scrollX,
6718
+ scrollY
6719
+ }
6720
+ ];
6721
+ }
6722
+ case "hover_at": {
6723
+ const coordinates = normalizeCoordinates(args, viewport, name);
6724
+ return [
6725
+ {
6726
+ type: "move",
6727
+ x: coordinates.x,
6728
+ y: coordinates.y
6729
+ }
6730
+ ];
6731
+ }
6732
+ case "drag_and_drop": {
6733
+ const startX = parseRequiredNumber(args.x, "drag_and_drop", "x");
6734
+ const startY = parseRequiredNumber(args.y, "drag_and_drop", "y");
6735
+ const endX = parseRequiredNumber(
6736
+ args.destination_x,
6737
+ "drag_and_drop",
6738
+ "destination_x"
6739
+ );
6740
+ const endY = parseRequiredNumber(
6741
+ args.destination_y,
6742
+ "drag_and_drop",
6743
+ "destination_y"
6744
+ );
6745
+ const start = maybeNormalizeCoordinates(
6746
+ "google",
6747
+ startX,
6748
+ startY,
6749
+ viewport
6750
+ );
6751
+ const end = maybeNormalizeCoordinates(
6752
+ "google",
6753
+ endX,
6754
+ endY,
6755
+ viewport
6756
+ );
6757
+ return [
6758
+ {
6759
+ type: "drag",
6760
+ path: [start, end]
6761
+ }
6762
+ ];
6763
+ }
6764
+ case "navigate":
6765
+ return [
6766
+ {
6767
+ type: "goto",
6768
+ url: normalizeRequiredString2(
6769
+ args.url,
6770
+ 'Google action "navigate" requires a non-empty "url" value.'
6771
+ )
6772
+ }
6773
+ ];
6774
+ case "go_back":
6775
+ return [{ type: "back" }];
6776
+ case "go_forward":
6777
+ return [{ type: "forward" }];
6778
+ case "wait_5_seconds":
6779
+ return [{ type: "wait", timeMs: 5e3 }];
6780
+ case "search":
6781
+ return [
6782
+ {
6783
+ type: "goto",
6784
+ url: buildGoogleSearchUrl(args)
6785
+ }
6786
+ ];
6787
+ case "open_web_browser":
6788
+ return [{ type: "open_web_browser" }];
6789
+ default:
6790
+ throw new OpensteerAgentActionError(
6791
+ `Unsupported Google CUA function call "${name}".`
6792
+ );
6793
+ }
6794
+ }
6795
+ function normalizeCoordinates(args, viewport, actionName) {
6796
+ const x = parseRequiredNumber(args.x, actionName, "x");
6797
+ const y = parseRequiredNumber(args.y, actionName, "y");
6798
+ return maybeNormalizeCoordinates(
6799
+ "google",
6800
+ x,
6801
+ y,
6802
+ viewport
6803
+ );
6804
+ }
6805
+ function parseRequiredNumber(value, actionName, field) {
6806
+ if (typeof value === "number" && Number.isFinite(value)) {
6807
+ return value;
6808
+ }
6809
+ throw new OpensteerAgentActionError(
6810
+ `Google action "${actionName}" requires numeric "${field}" coordinates.`
6811
+ );
6812
+ }
6813
+ function parsePositiveNumber(value, actionName, field) {
6814
+ if (typeof value === "number" && Number.isFinite(value) && value > 0) {
6815
+ return value;
6816
+ }
6817
+ throw new OpensteerAgentActionError(
6818
+ `Google action "${actionName}" requires a positive numeric "${field}" value.`
6819
+ );
6820
+ }
6821
+ function toFiniteNumberOrZero(value) {
6822
+ return typeof value === "number" && Number.isFinite(value) ? value : 0;
6823
+ }
6824
+ function normalizeString3(value) {
6825
+ if (typeof value !== "string") return void 0;
6826
+ const normalized = value.trim();
6827
+ return normalized.length ? normalized : void 0;
6828
+ }
6829
+ function normalizeRequiredString2(value, errorMessage2) {
6830
+ const normalized = normalizeString3(value);
6831
+ if (!normalized) {
6832
+ throw new OpensteerAgentActionError(errorMessage2);
6833
+ }
6834
+ return normalized;
6835
+ }
6836
+ function normalizeScrollDirection2(value, actionName) {
6837
+ const direction = normalizeString3(value);
6838
+ if (direction === "up" || direction === "down" || direction === "left" || direction === "right") {
6839
+ return direction;
6840
+ }
6841
+ throw new OpensteerAgentActionError(
6842
+ `Google action "${actionName}" requires "direction" to be one of: up, down, left, right.`
6843
+ );
6844
+ }
6845
+ function normalizeVerticalDirection(value, actionName) {
6846
+ const direction = normalizeString3(value);
6847
+ if (direction === "up" || direction === "down") {
6848
+ return direction;
6849
+ }
6850
+ throw new OpensteerAgentActionError(
6851
+ `Google action "${actionName}" requires "direction" to be "up" or "down".`
6852
+ );
6853
+ }
6854
+ function buildGoogleSearchUrl(args) {
6855
+ const query = normalizeRequiredString2(
6856
+ args.query ?? args.text,
6857
+ 'Google action "search" requires a non-empty "query" value.'
6858
+ );
6859
+ return `https://www.google.com/search?q=${encodeURIComponent(query)}`;
6860
+ }
6861
+ function extractFinishReason(candidate) {
6862
+ if (!candidate || typeof candidate !== "object") {
6863
+ return void 0;
6864
+ }
6865
+ return normalizeString3(candidate.finishReason);
6866
+ }
6867
+ function isSuccessfulGoogleFinishReason(finishReason) {
6868
+ return !finishReason || finishReason === "STOP";
6869
+ }
6870
+ function resolveGoogleEnvironment(value) {
6871
+ const environment = normalizeString3(value);
6872
+ if (environment === Environment.ENVIRONMENT_UNSPECIFIED) {
6873
+ return Environment.ENVIRONMENT_UNSPECIFIED;
6874
+ }
6875
+ return Environment.ENVIRONMENT_BROWSER;
6876
+ }
6877
+ function mapGoogleApiError(error) {
6878
+ const errorRecord = toRecord3(error);
6879
+ const status = typeof errorRecord.status === "number" ? errorRecord.status : void 0;
6880
+ const message = normalizeString3(errorRecord.message) || (error instanceof Error ? error.message : String(error));
6881
+ return new OpensteerAgentApiError("google", message, status, error);
6882
+ }
6883
+ function toRecord3(value) {
6884
+ return value && typeof value === "object" ? value : {};
6885
+ }
6886
+
6887
+ // src/agent/provider.ts
6888
+ var DEFAULT_SYSTEM_PROMPT = "You are a browser automation agent. Complete the user instruction safely and efficiently. Do not ask follow-up questions. Finish as soon as the task is complete.";
6889
+ function resolveAgentConfig(args) {
6890
+ const { agentConfig } = args;
6891
+ if (!agentConfig || typeof agentConfig !== "object") {
6892
+ throw new OpensteerAgentConfigError(
6893
+ 'agent() requires a configuration object with mode: "cua".'
6894
+ );
6895
+ }
6896
+ if (agentConfig.mode !== "cua") {
6897
+ throw new OpensteerAgentConfigError(
6898
+ `Unsupported agent mode "${String(agentConfig.mode)}". OpenSteer currently supports only mode: "cua".`
6899
+ );
6900
+ }
6901
+ const model = resolveCuaModelConfig({
6902
+ agentConfig,
6903
+ fallbackModel: args.fallbackModel,
6904
+ env: args.env
6905
+ });
6906
+ return {
6907
+ mode: "cua",
6908
+ systemPrompt: normalizeNonEmptyString(agentConfig.systemPrompt) || DEFAULT_SYSTEM_PROMPT,
6909
+ waitBetweenActionsMs: normalizeWaitBetween(agentConfig.waitBetweenActionsMs),
6910
+ model
6911
+ };
6912
+ }
6913
+ function createCuaClient(config) {
6914
+ switch (config.model.provider) {
6915
+ case "openai":
6916
+ return new OpenAICuaClient(config.model);
6917
+ case "anthropic":
6918
+ return new AnthropicCuaClient(config.model);
6919
+ case "google":
6920
+ return new GoogleCuaClient(config.model);
6921
+ default:
6922
+ throw new OpensteerAgentProviderError(
6923
+ `Unsupported CUA provider "${String(config.model.provider)}".`
6924
+ );
6925
+ }
6926
+ }
6927
+ function normalizeNonEmptyString(value) {
6928
+ if (typeof value !== "string") return void 0;
6929
+ const normalized = value.trim();
6930
+ return normalized.length ? normalized : void 0;
6931
+ }
6932
+ function normalizeWaitBetween(value) {
6933
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
6934
+ return 500;
6935
+ }
6936
+ return Math.floor(value);
6937
+ }
6938
+
6939
+ // src/agent/action-executor.ts
6940
+ async function executeAgentAction(page, action) {
6941
+ const type = normalizeActionType(action.type);
6942
+ switch (type) {
6943
+ case "click": {
6944
+ const { x, y } = toPoint(action);
6945
+ await page.mouse.click(x, y, {
6946
+ button: normalizeMouseButton2(action.button, "left"),
6947
+ clickCount: normalizeClickCount(action.clickCount, 1)
6948
+ });
6949
+ return;
6950
+ }
6951
+ case "doubleclick": {
6952
+ const { x, y } = toPoint(action);
6953
+ await page.mouse.click(x, y, {
6954
+ button: normalizeMouseButton2(action.button, "left"),
6955
+ clickCount: 2
6956
+ });
6957
+ return;
6958
+ }
6959
+ case "tripleclick": {
6960
+ const { x, y } = toPoint(action);
6961
+ await page.mouse.click(x, y, {
6962
+ button: normalizeMouseButton2(action.button, "left"),
6963
+ clickCount: 3
6964
+ });
6965
+ return;
6966
+ }
6967
+ case "rightclick": {
6968
+ const { x, y } = toPoint(action);
6969
+ await page.mouse.click(x, y, {
6970
+ button: "right",
6971
+ clickCount: normalizeClickCount(action.clickCount, 1)
6972
+ });
6973
+ return;
6974
+ }
6975
+ case "type": {
6976
+ await maybeFocusPoint(page, action);
6977
+ const text = typeof action.text === "string" ? action.text : "";
6978
+ if (action.clearBeforeTyping === true) {
6979
+ await pressKeyCombo(page, "ControlOrMeta+A");
6980
+ await page.keyboard.press("Backspace");
6981
+ }
6982
+ await page.keyboard.type(text);
6983
+ if (action.pressEnter === true) {
6984
+ await page.keyboard.press("Enter");
6985
+ }
6986
+ return;
6987
+ }
6988
+ case "keypress": {
6989
+ const combos = normalizeKeyCombos(action.keys);
6990
+ for (const combo of combos) {
6991
+ await pressKeyCombo(page, combo);
6992
+ }
6993
+ return;
6994
+ }
6995
+ case "scroll": {
6996
+ const x = numberOr(action.scrollX, action.scroll_x, 0);
6997
+ const y = numberOr(action.scrollY, action.scroll_y, 0);
6998
+ const point = maybePoint(action);
6999
+ if (point) {
7000
+ await page.mouse.move(point.x, point.y);
7001
+ }
7002
+ await page.mouse.wheel(x, y);
7003
+ return;
7004
+ }
7005
+ case "drag": {
7006
+ const path5 = normalizePath(action.path);
7007
+ if (!path5.length) {
7008
+ throw new OpensteerAgentActionError(
7009
+ "Drag action requires a non-empty path."
7010
+ );
7011
+ }
7012
+ await page.mouse.move(path5[0].x, path5[0].y);
7013
+ await page.mouse.down();
7014
+ for (const point of path5.slice(1)) {
7015
+ await page.mouse.move(point.x, point.y);
7016
+ }
7017
+ await page.mouse.up();
7018
+ return;
7019
+ }
7020
+ case "move":
7021
+ case "hover": {
7022
+ const { x, y } = toPoint(action);
7023
+ await page.mouse.move(x, y);
7024
+ return;
7025
+ }
7026
+ case "wait": {
7027
+ const ms = numberOr(action.timeMs, action.time_ms, 1e3);
7028
+ await sleep2(ms);
7029
+ return;
7030
+ }
7031
+ case "goto": {
7032
+ const url = normalizeRequiredString3(action.url, "Action URL is required for goto.");
7033
+ await page.goto(url, { waitUntil: "load" });
7034
+ return;
7035
+ }
7036
+ case "back": {
7037
+ await page.goBack({ waitUntil: "load" }).catch(() => void 0);
7038
+ return;
7039
+ }
7040
+ case "forward": {
7041
+ await page.goForward({ waitUntil: "load" }).catch(() => void 0);
7042
+ return;
7043
+ }
7044
+ case "screenshot":
7045
+ case "open_web_browser": {
7046
+ return;
7047
+ }
7048
+ default:
7049
+ throw new OpensteerAgentActionError(
7050
+ `Unsupported CUA action type "${String(action.type)}".`
7051
+ );
7052
+ }
7053
+ }
7054
+ function isMutatingAgentAction(action) {
7055
+ const type = normalizeActionType(action.type);
7056
+ return type !== "wait" && type !== "screenshot" && type !== "open_web_browser";
7057
+ }
7058
+ function normalizeActionType(value) {
7059
+ const raw = typeof value === "string" ? value : "";
7060
+ const normalized = raw.trim().toLowerCase();
7061
+ if (!normalized) return "";
7062
+ if (normalized === "double_click" || normalized === "doubleclick") {
7063
+ return "doubleclick";
7064
+ }
7065
+ if (normalized === "triple_click" || normalized === "tripleclick") {
7066
+ return "tripleclick";
7067
+ }
7068
+ if (normalized === "left_click") {
7069
+ return "click";
5383
7070
  }
5384
- };
5385
- function normalizeBaseUrl(baseUrl) {
5386
- return baseUrl.replace(/\/+$/, "");
7071
+ if (normalized === "right_click") {
7072
+ return "rightclick";
7073
+ }
7074
+ if (normalized === "openwebbrowser" || normalized === "open_web_browser") {
7075
+ return "open_web_browser";
7076
+ }
7077
+ return normalized;
5387
7078
  }
5388
- function parseCreateResponse(body, status) {
5389
- const root = requireObject(
5390
- body,
5391
- "Invalid cloud session create response: expected a JSON object.",
5392
- status
5393
- );
5394
- const sessionId = requireString(root, "sessionId", status);
5395
- const actionWsUrl = requireString(root, "actionWsUrl", status);
5396
- const cdpWsUrl = requireString(root, "cdpWsUrl", status);
5397
- const actionToken = requireString(root, "actionToken", status);
5398
- const cdpToken = requireString(root, "cdpToken", status);
5399
- const cloudSessionUrl = requireString(root, "cloudSessionUrl", status);
5400
- const cloudSessionRoot = requireObject(
5401
- root.cloudSession,
5402
- "Invalid cloud session create response: cloudSession must be an object.",
5403
- status
7079
+ function toPoint(action) {
7080
+ const point = maybePoint(action);
7081
+ if (point) {
7082
+ return point;
7083
+ }
7084
+ throw new OpensteerAgentActionError(
7085
+ `Action "${String(action.type)}" requires numeric x and y coordinates.`
5404
7086
  );
5405
- const cloudSession = {
5406
- sessionId: requireString(cloudSessionRoot, "sessionId", status, "cloudSession"),
5407
- workspaceId: requireString(
5408
- cloudSessionRoot,
5409
- "workspaceId",
5410
- status,
5411
- "cloudSession"
5412
- ),
5413
- state: requireString(cloudSessionRoot, "state", status, "cloudSession"),
5414
- createdAt: requireNumber(cloudSessionRoot, "createdAt", status, "cloudSession"),
5415
- sourceType: requireSourceType(cloudSessionRoot, "sourceType", status, "cloudSession"),
5416
- sourceRef: optionalString(cloudSessionRoot, "sourceRef", status, "cloudSession"),
5417
- label: optionalString(cloudSessionRoot, "label", status, "cloudSession")
5418
- };
5419
- const expiresAt = optionalNumber(root, "expiresAt", status);
7087
+ }
7088
+ function maybePoint(action) {
7089
+ const coordinate = Array.isArray(action.coordinate) ? action.coordinate : Array.isArray(action.coordinates) ? action.coordinates : null;
7090
+ const x = numberOr(action.x, coordinate?.[0]);
7091
+ const y = numberOr(action.y, coordinate?.[1]);
7092
+ if (!Number.isFinite(x) || !Number.isFinite(y)) {
7093
+ return null;
7094
+ }
5420
7095
  return {
5421
- sessionId,
5422
- actionWsUrl,
5423
- cdpWsUrl,
5424
- actionToken,
5425
- cdpToken,
5426
- expiresAt,
5427
- cloudSessionUrl,
5428
- cloudSession
7096
+ x,
7097
+ y
5429
7098
  };
5430
7099
  }
5431
- function requireObject(value, message, status) {
5432
- if (!value || typeof value !== "object" || Array.isArray(value)) {
5433
- throw new OpensteerCloudError("CLOUD_CONTRACT_MISMATCH", message, status);
7100
+ async function maybeFocusPoint(page, action) {
7101
+ const point = maybePoint(action);
7102
+ if (!point) {
7103
+ return;
5434
7104
  }
5435
- return value;
7105
+ await page.mouse.click(point.x, point.y, {
7106
+ button: normalizeMouseButton2(action.button, "left"),
7107
+ clickCount: 1
7108
+ });
5436
7109
  }
5437
- function requireString(source, field, status, parent) {
5438
- const value = source[field];
5439
- if (typeof value !== "string" || !value.trim()) {
5440
- throw new OpensteerCloudError(
5441
- "CLOUD_CONTRACT_MISMATCH",
5442
- `Invalid cloud session create response: ${formatFieldPath(
5443
- field,
5444
- parent
5445
- )} must be a non-empty string.`,
5446
- status
5447
- );
7110
+ function normalizePath(path5) {
7111
+ if (!Array.isArray(path5)) return [];
7112
+ const points = [];
7113
+ for (const entry of path5) {
7114
+ if (!entry || typeof entry !== "object") continue;
7115
+ const candidate = entry;
7116
+ const x = Number(candidate.x);
7117
+ const y = Number(candidate.y);
7118
+ if (!Number.isFinite(x) || !Number.isFinite(y)) continue;
7119
+ points.push({ x, y });
7120
+ }
7121
+ return points;
7122
+ }
7123
+ function normalizeMouseButton2(value, fallback) {
7124
+ if (value === "left" || value === "right" || value === "middle") {
7125
+ return value;
5448
7126
  }
5449
- return value;
7127
+ if (typeof value === "string") {
7128
+ const normalized = value.toLowerCase();
7129
+ if (normalized === "left" || normalized === "right" || normalized === "middle") {
7130
+ return normalized;
7131
+ }
7132
+ }
7133
+ return fallback;
5450
7134
  }
5451
- function requireNumber(source, field, status, parent) {
5452
- const value = source[field];
5453
- if (typeof value !== "number" || !Number.isFinite(value)) {
5454
- throw new OpensteerCloudError(
5455
- "CLOUD_CONTRACT_MISMATCH",
5456
- `Invalid cloud session create response: ${formatFieldPath(
5457
- field,
5458
- parent
5459
- )} must be a finite number.`,
5460
- status
5461
- );
7135
+ function normalizeClickCount(value, fallback) {
7136
+ if (typeof value === "number" && Number.isFinite(value) && value > 0) {
7137
+ return Math.floor(value);
5462
7138
  }
5463
- return value;
7139
+ return fallback;
5464
7140
  }
5465
- function optionalString(source, field, status, parent) {
5466
- const value = source[field];
5467
- if (value == null) {
5468
- return void 0;
7141
+ function normalizeKeyCombos(value) {
7142
+ if (typeof value === "string") {
7143
+ const trimmed = value.trim();
7144
+ return trimmed ? [trimmed] : [];
5469
7145
  }
5470
- if (typeof value !== "string") {
5471
- throw new OpensteerCloudError(
5472
- "CLOUD_CONTRACT_MISMATCH",
5473
- `Invalid cloud session create response: ${formatFieldPath(
5474
- field,
5475
- parent
5476
- )} must be a string when present.`,
5477
- status
5478
- );
7146
+ if (!Array.isArray(value)) {
7147
+ return [];
5479
7148
  }
5480
- return value;
5481
- }
5482
- function optionalNumber(source, field, status, parent) {
5483
- const value = source[field];
5484
- if (value == null) {
5485
- return void 0;
7149
+ const keys = value.filter((entry) => typeof entry === "string").map((entry) => entry.trim()).filter(Boolean);
7150
+ if (!keys.length) {
7151
+ return [];
5486
7152
  }
5487
- if (typeof value !== "number" || !Number.isFinite(value)) {
5488
- throw new OpensteerCloudError(
5489
- "CLOUD_CONTRACT_MISMATCH",
5490
- `Invalid cloud session create response: ${formatFieldPath(
5491
- field,
5492
- parent
5493
- )} must be a finite number when present.`,
5494
- status
5495
- );
7153
+ const hasExplicitComboSyntax = keys.some((entry) => entry.includes("+"));
7154
+ if (!hasExplicitComboSyntax && keys.length > 1) {
7155
+ return [keys.join("+")];
5496
7156
  }
5497
- return value;
7157
+ return keys;
5498
7158
  }
5499
- function requireSourceType(source, field, status, parent) {
5500
- const value = source[field];
5501
- if (value === "agent-thread" || value === "agent-run" || value === "local-cloud" || value === "manual") {
5502
- return value;
7159
+ function numberOr(...values) {
7160
+ for (const value of values) {
7161
+ if (typeof value === "number" && Number.isFinite(value)) return value;
5503
7162
  }
5504
- throw new OpensteerCloudError(
5505
- "CLOUD_CONTRACT_MISMATCH",
5506
- `Invalid cloud session create response: ${formatFieldPath(
5507
- field,
5508
- parent
5509
- )} must be one of "agent-thread", "agent-run", "local-cloud", or "manual".`,
5510
- status
5511
- );
5512
- }
5513
- function formatFieldPath(field, parent) {
5514
- return parent ? `"${parent}.${field}"` : `"${field}"`;
5515
- }
5516
- function zeroImportResponse() {
5517
- return {
5518
- imported: 0,
5519
- inserted: 0,
5520
- updated: 0,
5521
- skipped: 0
5522
- };
7163
+ return NaN;
5523
7164
  }
5524
- function mergeImportResponse(first, second) {
5525
- return {
5526
- imported: first.imported + second.imported,
5527
- inserted: first.inserted + second.inserted,
5528
- updated: first.updated + second.updated,
5529
- skipped: first.skipped + second.skipped
5530
- };
7165
+ function normalizeRequiredString3(value, errorMessage2) {
7166
+ if (typeof value !== "string" || !value.trim()) {
7167
+ throw new OpensteerAgentActionError(errorMessage2);
7168
+ }
7169
+ return value.trim();
5531
7170
  }
5532
- async function parseHttpError(response) {
5533
- let body = null;
7171
+ async function pressKeyCombo(page, combo) {
7172
+ const trimmed = combo.trim();
7173
+ if (!trimmed) return;
7174
+ if (!trimmed.includes("+")) {
7175
+ await page.keyboard.press(mapKeyToPlaywright(trimmed));
7176
+ return;
7177
+ }
7178
+ const parts = trimmed.split("+").map((part) => part.trim()).filter(Boolean).map((part) => mapKeyToPlaywright(part));
7179
+ if (!parts.length) return;
7180
+ const modifiers = parts.slice(0, -1);
7181
+ const last = parts[parts.length - 1];
7182
+ for (const modifier of modifiers) {
7183
+ await page.keyboard.down(modifier);
7184
+ }
5534
7185
  try {
5535
- body = await response.json();
5536
- } catch {
5537
- body = null;
7186
+ await page.keyboard.press(last);
7187
+ } finally {
7188
+ for (const modifier of modifiers.slice().reverse()) {
7189
+ await page.keyboard.up(modifier);
7190
+ }
5538
7191
  }
5539
- const code = typeof body?.code === "string" ? toCloudErrorCode(body.code) : "CLOUD_TRANSPORT_ERROR";
5540
- const message = typeof body?.error === "string" ? body.error : `Cloud request failed with status ${response.status}.`;
5541
- return new OpensteerCloudError(code, message, response.status, body?.details);
5542
7192
  }
5543
- function toCloudErrorCode(code) {
5544
- if (code === "CLOUD_AUTH_FAILED" || code === "CLOUD_SESSION_NOT_FOUND" || code === "CLOUD_SESSION_CLOSED" || code === "CLOUD_UNSUPPORTED_METHOD" || code === "CLOUD_INVALID_REQUEST" || code === "CLOUD_MODEL_NOT_ALLOWED" || code === "CLOUD_ACTION_FAILED" || code === "CLOUD_INTERNAL" || code === "CLOUD_CAPACITY_EXHAUSTED" || code === "CLOUD_RUNTIME_UNAVAILABLE" || code === "CLOUD_RUNTIME_MISMATCH" || code === "CLOUD_SESSION_STALE" || code === "CLOUD_CONTRACT_MISMATCH" || code === "CLOUD_CONTROL_PLANE_ERROR") {
5545
- return code;
7193
+ function sleep2(ms) {
7194
+ return new Promise((resolve) => setTimeout(resolve, Math.max(0, ms)));
7195
+ }
7196
+
7197
+ // src/agent/handler.ts
7198
+ var OpensteerCuaAgentHandler = class {
7199
+ page;
7200
+ config;
7201
+ client;
7202
+ debug;
7203
+ onMutatingAction;
7204
+ cursorOverlayInjected = false;
7205
+ constructor(options) {
7206
+ this.page = options.page;
7207
+ this.config = options.config;
7208
+ this.client = options.client;
7209
+ this.debug = options.debug;
7210
+ this.onMutatingAction = options.onMutatingAction;
7211
+ }
7212
+ async execute(options) {
7213
+ const instruction = options.instruction;
7214
+ const maxSteps = options.maxSteps ?? 20;
7215
+ await this.initializeClient();
7216
+ const highlightCursor = options.highlightCursor === true;
7217
+ this.client.setActionHandler(async (action) => {
7218
+ if (highlightCursor) {
7219
+ await this.maybeRenderCursor(action);
7220
+ }
7221
+ await executeAgentAction(this.page, action);
7222
+ this.client.setCurrentUrl(this.page.url());
7223
+ if (isMutatingAgentAction(action)) {
7224
+ this.onMutatingAction?.(action);
7225
+ }
7226
+ await sleep3(this.config.waitBetweenActionsMs);
7227
+ });
7228
+ try {
7229
+ const result = await this.client.execute({
7230
+ instruction,
7231
+ maxSteps,
7232
+ systemPrompt: this.config.systemPrompt
7233
+ });
7234
+ return {
7235
+ ...result,
7236
+ provider: this.config.model.provider,
7237
+ model: this.config.model.fullModelName
7238
+ };
7239
+ } catch (error) {
7240
+ throw new OpensteerAgentExecutionError(
7241
+ `CUA agent execution failed: ${error instanceof Error ? error.message : String(error)}`,
7242
+ error
7243
+ );
7244
+ }
5546
7245
  }
5547
- return "CLOUD_TRANSPORT_ERROR";
7246
+ async initializeClient() {
7247
+ const viewport = await this.resolveViewport();
7248
+ this.client.setViewport(viewport.width, viewport.height);
7249
+ this.client.setCurrentUrl(this.page.url());
7250
+ this.client.setScreenshotProvider(async () => {
7251
+ const buffer = await this.page.screenshot({
7252
+ fullPage: false,
7253
+ type: "png"
7254
+ });
7255
+ return buffer.toString("base64");
7256
+ });
7257
+ }
7258
+ async resolveViewport() {
7259
+ const directViewport = this.page.viewportSize();
7260
+ if (directViewport?.width && directViewport?.height) {
7261
+ return directViewport;
7262
+ }
7263
+ try {
7264
+ const evaluated = await this.page.evaluate(() => ({
7265
+ width: window.innerWidth,
7266
+ height: window.innerHeight
7267
+ }));
7268
+ if (evaluated && typeof evaluated === "object" && typeof evaluated.width === "number" && typeof evaluated.height === "number" && evaluated.width > 0 && evaluated.height > 0) {
7269
+ return {
7270
+ width: Math.floor(evaluated.width),
7271
+ height: Math.floor(evaluated.height)
7272
+ };
7273
+ }
7274
+ } catch {
7275
+ }
7276
+ return DEFAULT_CUA_VIEWPORT;
7277
+ }
7278
+ async maybeRenderCursor(action) {
7279
+ const x = typeof action.x === "number" ? action.x : null;
7280
+ const y = typeof action.y === "number" ? action.y : null;
7281
+ if (x == null || y == null) {
7282
+ return;
7283
+ }
7284
+ try {
7285
+ if (!this.cursorOverlayInjected) {
7286
+ await this.page.evaluate(() => {
7287
+ if (document.getElementById("__opensteer_cua_cursor")) return;
7288
+ const cursor = document.createElement("div");
7289
+ cursor.id = "__opensteer_cua_cursor";
7290
+ cursor.style.position = "fixed";
7291
+ cursor.style.width = "14px";
7292
+ cursor.style.height = "14px";
7293
+ cursor.style.borderRadius = "999px";
7294
+ cursor.style.background = "rgba(255, 51, 51, 0.85)";
7295
+ cursor.style.border = "2px solid rgba(255, 255, 255, 0.95)";
7296
+ cursor.style.boxShadow = "0 0 0 3px rgba(255, 51, 51, 0.25)";
7297
+ cursor.style.pointerEvents = "none";
7298
+ cursor.style.zIndex = "2147483647";
7299
+ cursor.style.transform = "translate(-9999px, -9999px)";
7300
+ cursor.style.transition = "transform 80ms linear";
7301
+ document.documentElement.appendChild(cursor);
7302
+ });
7303
+ this.cursorOverlayInjected = true;
7304
+ }
7305
+ await this.page.evaluate(
7306
+ ({ px, py }) => {
7307
+ const cursor = document.getElementById("__opensteer_cua_cursor");
7308
+ if (!cursor) return;
7309
+ cursor.style.transform = `translate(${Math.round(px - 7)}px, ${Math.round(py - 7)}px)`;
7310
+ },
7311
+ { px: x, py: y }
7312
+ );
7313
+ } catch (error) {
7314
+ if (this.debug) {
7315
+ const message = error instanceof Error ? error.message : String(error);
7316
+ console.warn(`[opensteer] cursor overlay failed: ${message}`);
7317
+ }
7318
+ }
7319
+ }
7320
+ };
7321
+ function sleep3(ms) {
7322
+ return new Promise((resolve) => setTimeout(resolve, Math.max(0, ms)));
5548
7323
  }
5549
7324
 
5550
7325
  // src/opensteer.ts
@@ -6572,7 +8347,7 @@ var AdaptiveNetworkTracker = class {
6572
8347
  this.idleSince = 0;
6573
8348
  }
6574
8349
  const remaining = Math.max(1, options.deadline - now);
6575
- await sleep2(Math.min(NETWORK_POLL_MS, remaining));
8350
+ await sleep4(Math.min(NETWORK_POLL_MS, remaining));
6576
8351
  }
6577
8352
  }
6578
8353
  handleRequestStarted = (request) => {
@@ -6617,7 +8392,7 @@ var AdaptiveNetworkTracker = class {
6617
8392
  return false;
6618
8393
  }
6619
8394
  };
6620
- async function sleep2(ms) {
8395
+ async function sleep4(ms) {
6621
8396
  await new Promise((resolve) => {
6622
8397
  setTimeout(resolve, ms);
6623
8398
  });
@@ -7736,7 +9511,7 @@ function clonePersistedExtractNode(node) {
7736
9511
  }
7737
9512
 
7738
9513
  // src/cloud/runtime.ts
7739
- var DEFAULT_CLOUD_BASE_URL = "https://remote.opensteer.com";
9514
+ var DEFAULT_CLOUD_BASE_URL = "https://api.opensteer.com";
7740
9515
  function createCloudRuntimeState(key, baseUrl = resolveCloudBaseUrl(), authScheme = "api-key") {
7741
9516
  const normalizedBaseUrl = normalizeCloudBaseUrl(baseUrl);
7742
9517
  return {
@@ -7792,6 +9567,7 @@ var Opensteer = class _Opensteer {
7792
9567
  contextRef = null;
7793
9568
  ownsBrowser = false;
7794
9569
  snapshotCache = null;
9570
+ agentExecutionInFlight = false;
7795
9571
  constructor(config = {}) {
7796
9572
  const resolved = resolveConfig(config);
7797
9573
  const cloudSelection = resolveCloudSelection({
@@ -7827,7 +9603,7 @@ var Opensteer = class _Opensteer {
7827
9603
  return async (...args) => {
7828
9604
  try {
7829
9605
  if (!resolverPromise) {
7830
- resolverPromise = import("./resolver-WGFFHW4N.js").then(
9606
+ resolverPromise = import("./resolver-HVZJQZ32.js").then(
7831
9607
  (m) => m.createResolveCallback(model)
7832
9608
  );
7833
9609
  }
@@ -7844,7 +9620,7 @@ var Opensteer = class _Opensteer {
7844
9620
  const extract = async (args) => {
7845
9621
  try {
7846
9622
  if (!extractorPromise) {
7847
- extractorPromise = import("./extractor-K5VU7HVC.js").then(
9623
+ extractorPromise = import("./extractor-I6TJPTXV.js").then(
7848
9624
  (m) => m.createExtractCallback(model)
7849
9625
  );
7850
9626
  }
@@ -7907,6 +9683,84 @@ var Opensteer = class _Opensteer {
7907
9683
  cause
7908
9684
  });
7909
9685
  }
9686
+ async syncCloudPageRef(args) {
9687
+ if (!this.cloud || !this.browser) return;
9688
+ let tabs;
9689
+ try {
9690
+ tabs = await this.invokeCloudAction("tabs", {});
9691
+ } catch {
9692
+ return;
9693
+ }
9694
+ if (!tabs.length) {
9695
+ return;
9696
+ }
9697
+ const contexts = this.browser.contexts();
9698
+ if (!contexts.length) return;
9699
+ const syncContext = this.contextRef && contexts.includes(this.contextRef) ? this.contextRef : contexts[0];
9700
+ const syncContextPages = syncContext.pages();
9701
+ const activeTab = tabs.find((tab) => tab.active) ?? null;
9702
+ if (activeTab && activeTab.index >= 0 && activeTab.index < syncContextPages.length) {
9703
+ this.contextRef = syncContext;
9704
+ this.pageRef = syncContextPages[activeTab.index];
9705
+ return;
9706
+ }
9707
+ const expectedUrl = args?.expectedUrl?.trim() || null;
9708
+ const expectedUrlInSyncContext = expectedUrl ? syncContextPages.find((page) => page.url() === expectedUrl) : void 0;
9709
+ if (expectedUrlInSyncContext) {
9710
+ this.contextRef = syncContext;
9711
+ this.pageRef = expectedUrlInSyncContext;
9712
+ return;
9713
+ }
9714
+ const firstNonInternalInSyncContext = syncContextPages.find(
9715
+ (page) => !isInternalOrBlankPageUrl(page.url())
9716
+ );
9717
+ if (firstNonInternalInSyncContext) {
9718
+ this.contextRef = syncContext;
9719
+ this.pageRef = firstNonInternalInSyncContext;
9720
+ return;
9721
+ }
9722
+ const firstAboutBlankInSyncContext = syncContextPages.find(
9723
+ (page) => page.url() === "about:blank"
9724
+ );
9725
+ if (firstAboutBlankInSyncContext) {
9726
+ this.contextRef = syncContext;
9727
+ this.pageRef = firstAboutBlankInSyncContext;
9728
+ return;
9729
+ }
9730
+ const pages = [];
9731
+ for (const context of contexts) {
9732
+ for (const page of context.pages()) {
9733
+ pages.push({
9734
+ context,
9735
+ page,
9736
+ url: page.url()
9737
+ });
9738
+ }
9739
+ }
9740
+ if (!pages.length) return;
9741
+ const expectedUrlMatch = expectedUrl ? pages.find(({ url }) => url === expectedUrl) : void 0;
9742
+ if (expectedUrlMatch) {
9743
+ this.contextRef = expectedUrlMatch.context;
9744
+ this.pageRef = expectedUrlMatch.page;
9745
+ return;
9746
+ }
9747
+ const firstNonInternal = pages.find(
9748
+ ({ url }) => !isInternalOrBlankPageUrl(url)
9749
+ );
9750
+ if (firstNonInternal) {
9751
+ this.contextRef = firstNonInternal.context;
9752
+ this.pageRef = firstNonInternal.page;
9753
+ return;
9754
+ }
9755
+ const firstAboutBlank = pages.find(({ url }) => url === "about:blank");
9756
+ if (firstAboutBlank) {
9757
+ this.contextRef = firstAboutBlank.context;
9758
+ this.pageRef = firstAboutBlank.page;
9759
+ return;
9760
+ }
9761
+ this.contextRef = pages[0].context;
9762
+ this.pageRef = pages[0].page;
9763
+ }
7910
9764
  get page() {
7911
9765
  if (!this.pageRef) {
7912
9766
  throw new Error(
@@ -8009,6 +9863,7 @@ var Opensteer = class _Opensteer {
8009
9863
  this.cloud.actionClient = actionClient;
8010
9864
  this.cloud.sessionId = sessionId;
8011
9865
  this.cloud.cloudSessionUrl = session2.cloudSessionUrl;
9866
+ await this.syncCloudPageRef().catch(() => void 0);
8012
9867
  this.announceCloudSession({
8013
9868
  sessionId: session2.sessionId,
8014
9869
  workspaceId: session2.cloudSession.workspaceId,
@@ -8104,6 +9959,9 @@ var Opensteer = class _Opensteer {
8104
9959
  async goto(url, options) {
8105
9960
  if (this.cloud) {
8106
9961
  await this.invokeCloudActionAndResetCache("goto", { url, options });
9962
+ await this.syncCloudPageRef({ expectedUrl: url }).catch(
9963
+ () => void 0
9964
+ );
8107
9965
  return;
8108
9966
  }
8109
9967
  const { waitUntil = "domcontentloaded", ...rest } = options ?? {};
@@ -8608,9 +10466,16 @@ var Opensteer = class _Opensteer {
8608
10466
  }
8609
10467
  async newTab(url) {
8610
10468
  if (this.cloud) {
8611
- return await this.invokeCloudActionAndResetCache("newTab", {
8612
- url
8613
- });
10469
+ const result = await this.invokeCloudActionAndResetCache(
10470
+ "newTab",
10471
+ {
10472
+ url
10473
+ }
10474
+ );
10475
+ await this.syncCloudPageRef({ expectedUrl: result.url }).catch(
10476
+ () => void 0
10477
+ );
10478
+ return result;
8614
10479
  }
8615
10480
  const { page, info } = await createTab(this.context, url);
8616
10481
  this.pageRef = page;
@@ -8620,6 +10485,7 @@ var Opensteer = class _Opensteer {
8620
10485
  async switchTab(index) {
8621
10486
  if (this.cloud) {
8622
10487
  await this.invokeCloudActionAndResetCache("switchTab", { index });
10488
+ await this.syncCloudPageRef().catch(() => void 0);
8623
10489
  return;
8624
10490
  }
8625
10491
  const page = await switchTab(this.context, index);
@@ -8629,6 +10495,7 @@ var Opensteer = class _Opensteer {
8629
10495
  async closeTab(index) {
8630
10496
  if (this.cloud) {
8631
10497
  await this.invokeCloudActionAndResetCache("closeTab", { index });
10498
+ await this.syncCloudPageRef().catch(() => void 0);
8632
10499
  return;
8633
10500
  }
8634
10501
  const newPage = await closeTab(this.context, this.page, index);
@@ -9039,6 +10906,37 @@ var Opensteer = class _Opensteer {
9039
10906
  this.storage.clearNamespace();
9040
10907
  this.snapshotCache = null;
9041
10908
  }
10909
+ agent(config) {
10910
+ const resolvedAgentConfig = resolveAgentConfig({
10911
+ agentConfig: config,
10912
+ fallbackModel: this.config.model
10913
+ });
10914
+ return {
10915
+ execute: async (instructionOrOptions) => {
10916
+ if (this.agentExecutionInFlight) {
10917
+ throw new OpensteerAgentBusyError();
10918
+ }
10919
+ this.agentExecutionInFlight = true;
10920
+ try {
10921
+ const options = normalizeExecuteOptions(instructionOrOptions);
10922
+ const handler = new OpensteerCuaAgentHandler({
10923
+ page: this.page,
10924
+ config: resolvedAgentConfig,
10925
+ client: createCuaClient(resolvedAgentConfig),
10926
+ debug: Boolean(this.config.debug),
10927
+ onMutatingAction: () => {
10928
+ this.snapshotCache = null;
10929
+ }
10930
+ });
10931
+ const result = await handler.execute(options);
10932
+ this.snapshotCache = null;
10933
+ return result;
10934
+ } finally {
10935
+ this.agentExecutionInFlight = false;
10936
+ }
10937
+ }
10938
+ };
10939
+ }
9042
10940
  async runWithPostActionWait(action, waitOverride, execute) {
9043
10941
  const waitSession = createPostActionWaitSession(
9044
10942
  this.page,
@@ -10061,6 +11959,11 @@ function getScrollDelta2(options) {
10061
11959
  return { x: 0, y: absoluteAmount };
10062
11960
  }
10063
11961
  }
11962
+ function isInternalOrBlankPageUrl(url) {
11963
+ if (!url) return true;
11964
+ if (url === "about:blank") return true;
11965
+ return url.startsWith("chrome://") || url.startsWith("devtools://") || url.startsWith("edge://");
11966
+ }
10064
11967
  function buildLocalRunId(namespace) {
10065
11968
  const normalized = namespace.trim() || "default";
10066
11969
  return `${normalized}-${Date.now().toString(36)}-${randomUUID2().slice(0, 8)}`;
@@ -10135,5 +12038,15 @@ export {
10135
12038
  collectLocalSelectorCacheEntries,
10136
12039
  CloudCdpClient,
10137
12040
  CloudSessionClient,
12041
+ OpensteerAgentError,
12042
+ OpensteerAgentConfigError,
12043
+ OpensteerAgentProviderError,
12044
+ OpensteerAgentExecutionError,
12045
+ OpensteerAgentBusyError,
12046
+ OpensteerAgentActionError,
12047
+ OpensteerAgentApiError,
12048
+ resolveAgentConfig,
12049
+ createCuaClient,
12050
+ OpensteerCuaAgentHandler,
10138
12051
  Opensteer
10139
12052
  };