wuying-agentbay-sdk 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/{chunk-BVWUCG4J.mjs → chunk-P2CXYF4T.mjs} +400 -163
  2. package/dist/chunk-P2CXYF4T.mjs.map +1 -0
  3. package/dist/{chunk-SL5GCAQE.cjs → chunk-WVWGLZDT.cjs} +337 -100
  4. package/dist/chunk-WVWGLZDT.cjs.map +1 -0
  5. package/dist/index.cjs +6789 -752
  6. package/dist/index.cjs.map +1 -1
  7. package/dist/index.d.mts +637 -165
  8. package/dist/index.d.ts +637 -165
  9. package/dist/index.mjs +6654 -617
  10. package/dist/index.mjs.map +1 -1
  11. package/dist/{model-LGWQJWKQ.mjs → model-BRLR6F3P.mjs} +16 -2
  12. package/dist/model-KJHN3WYY.cjs +214 -0
  13. package/dist/{model-CNCGFWJH.cjs.map → model-KJHN3WYY.cjs.map} +1 -1
  14. package/docs/api/README.md +6 -0
  15. package/docs/api/browser-use/browser-agent.md +188 -0
  16. package/docs/api/browser-use/browser.md +1 -1
  17. package/docs/api/browser-use/fingerprint.md +154 -0
  18. package/docs/api/codespace/code.md +3 -0
  19. package/docs/api/common-features/advanced/agent.md +7 -63
  20. package/docs/api/common-features/advanced/browser-use-agent.md +118 -0
  21. package/docs/api/common-features/advanced/computer-use-agent.md +85 -0
  22. package/docs/api/common-features/basics/agentbay.md +3 -2
  23. package/docs/api/common-features/basics/command.md +35 -18
  24. package/docs/api/common-features/basics/filesystem.md +36 -0
  25. package/docs/api/common-features/basics/session-params.md +382 -0
  26. package/docs/api/common-features/basics/session.md +0 -2
  27. package/docs/api/computer-use/computer.md +25 -25
  28. package/docs/api/mobile-use/mobile-simulate.md +135 -0
  29. package/docs/examples/browser-use/browser/basic-usage.ts +31 -24
  30. package/docs/examples/browser-use/browser/browser-type-example.ts +3 -4
  31. package/docs/examples/browser-use/browser/captcha_tongcheng.ts +60 -28
  32. package/docs/examples/browser-use/browser/run-2048.ts +47 -37
  33. package/docs/examples/browser-use/browser/run-sudoku.ts +55 -36
  34. package/docs/examples/browser-use/browser/screenshot-example.ts +6 -6
  35. package/docs/examples/browser-use/extension-example/extension-example.ts +1 -2
  36. package/docs/examples/codespace/enhanced_code/index.ts +86 -0
  37. package/docs/examples/common-features/advanced/agent-module-example.ts +1 -1
  38. package/docs/examples/common-features/advanced/archive-upload-mode-example/README.md +1 -1
  39. package/docs/examples/common-features/advanced/archive-upload-mode-example/archive-upload-mode-example.ts +5 -6
  40. package/docs/examples/common-features/basics/archive-upload-mode-example/README.md +1 -1
  41. package/docs/examples/common-features/basics/archive-upload-mode-example/main.ts +1 -1
  42. package/docs/examples/common-features/basics/filesystem-example/filesystem-example.ts +13 -0
  43. package/docs/examples/common-features/basics/filesystem-example/filesystem-filetransfer-example.ts +6 -7
  44. package/docs/examples/common-features/basics/filesystem-example/watch-directory-example.ts +1 -1
  45. package/docs/examples/mobile-use/mobile-get-adb-url/index.ts +1 -1
  46. package/package.json +4 -4
  47. package/dist/chunk-BVWUCG4J.mjs.map +0 -1
  48. package/dist/chunk-SL5GCAQE.cjs.map +0 -1
  49. package/dist/model-CNCGFWJH.cjs +0 -200
  50. package/docs/examples/mobile-use/mobile-get-adb-url/package-lock.json +0 -279
  51. package/docs/examples/mobile-use/mobile-get-adb-url/package.json +0 -18
  52. /package/dist/{model-LGWQJWKQ.mjs.map → model-BRLR6F3P.mjs.map} +0 -0
@@ -0,0 +1,135 @@
1
+ # Class: MobileSimulateService
2
+
3
+ Provides methods to manage persistent mobile dev info and sync to the mobile device.
4
+
5
+ ## Table of contents
6
+
7
+
8
+ ### Methods
9
+
10
+ - [hasMobileInfo](#hasmobileinfo)
11
+ - [setSimulateContextId](#setsimulatecontextid)
12
+ - [setSimulateEnable](#setsimulateenable)
13
+ - [setSimulateMode](#setsimulatemode)
14
+ - [uploadMobileInfo](#uploadmobileinfo)
15
+
16
+ ## Methods
17
+
18
+ ### hasMobileInfo
19
+
20
+ ▸ **hasMobileInfo**(`contextSync`): `Promise`\<`boolean`\>
21
+
22
+ Check if the mobile dev info file exists in one context sync. (Only for user provided context sync)
23
+
24
+ #### Parameters
25
+
26
+ | Name | Type | Description |
27
+ | :------ | :------ | :------ |
28
+ | `contextSync` | [`ContextSync`](../common-features/basics/context-sync.md) | The context sync to check. |
29
+
30
+ #### Returns
31
+
32
+ `Promise`\<`boolean`\>
33
+
34
+ True if the mobile dev info file exists, False otherwise.
35
+
36
+ **`Throws`**
37
+
38
+ Error if context_sync is not provided or context_sync.context_id or context_sync.path is not provided.
39
+
40
+ **`Remarks`**
41
+
42
+ This method can only be used when mobile simulate context sync is managed by user side. For internal mobile simulate
43
+ context sync, this method will not work.
44
+
45
+ ___
46
+
47
+ ### setSimulateContextId
48
+
49
+ ▸ **setSimulateContextId**(`contextId`): `void`
50
+
51
+ Set a previously saved simulate context id. Please make sure the context id is provided by MobileSimulateService
52
+ but not user side created context.
53
+
54
+ #### Parameters
55
+
56
+ | Name | Type | Description |
57
+ | :------ | :------ | :------ |
58
+ | `contextId` | `string` | The context ID of the previously saved mobile simulate context. |
59
+
60
+ #### Returns
61
+
62
+ `void`
63
+
64
+ ___
65
+
66
+ ### setSimulateEnable
67
+
68
+ ▸ **setSimulateEnable**(`enable`): `void`
69
+
70
+ Set the simulate enable flag.
71
+
72
+ #### Parameters
73
+
74
+ | Name | Type | Description |
75
+ | :------ | :------ | :------ |
76
+ | `enable` | `boolean` | The simulate feature enable flag. |
77
+
78
+ #### Returns
79
+
80
+ `void`
81
+
82
+ ___
83
+
84
+ ### setSimulateMode
85
+
86
+ ▸ **setSimulateMode**(`mode`): `void`
87
+
88
+ Set the simulate mode.
89
+
90
+ #### Parameters
91
+
92
+ | Name | Type | Description |
93
+ | :------ | :------ | :------ |
94
+ | `mode` | ``MobileSimulateMode`` | The simulate mode. - PropertiesOnly: Simulate only device properties. - SensorsOnly: Simulate only device sensors. - PackagesOnly: Simulate only installed packages. - ServicesOnly: Simulate only system services. - All: Simulate all aspects of the device. |
95
+
96
+ #### Returns
97
+
98
+ `void`
99
+
100
+ ___
101
+
102
+ ### uploadMobileInfo
103
+
104
+ ▸ **uploadMobileInfo**(`mobileDevInfoContent`, `contextSync?`): `Promise`\<``MobileSimulateUploadResult``\>
105
+
106
+ Upload the mobile simulate dev info.
107
+
108
+ #### Parameters
109
+
110
+ | Name | Type | Description |
111
+ | :------ | :------ | :------ |
112
+ | `mobileDevInfoContent` | `string` | The mobile simulate dev info content to upload. |
113
+ | `contextSync?` | [`ContextSync`](../common-features/basics/context-sync.md) | Optional - If not provided, a new context sync will be created for the mobile simulate service and this context id will be returned by the MobileSimulateUploadResult. User can use this context id to do persistent mobile simulate across sessions. - If provided, the mobile simulate dev info will be uploaded to the context sync in a specific path. |
114
+
115
+ #### Returns
116
+
117
+ `Promise`\<``MobileSimulateUploadResult``\>
118
+
119
+ The result of the upload operation.
120
+
121
+ **`Throws`**
122
+
123
+ Error if mobile_dev_info_content is not provided or not a valid JSON string.
124
+
125
+ **`Throws`**
126
+
127
+ Error if context_sync is provided but context_sync.context_id is not provided.
128
+
129
+ **`Remarks`**
130
+
131
+ If context_sync is not provided, a new context sync will be created for the mobile simulate.
132
+ If context_sync is provided, the mobile simulate dev info will be uploaded to the context sync.
133
+ If the mobile simulate dev info already exists in the context sync, the context sync will be updated.
134
+ If the mobile simulate dev info does not exist in the context sync, the context sync will be created.
135
+ If the upload operation fails, the error message will be returned.
@@ -3,14 +3,19 @@
3
3
  * This example demonstrates the core browser operations without external dependencies.
4
4
  */
5
5
 
6
- import { AgentBay, CreateSessionParams, ActOptions, ObserveOptions, ExtractOptions } from 'wuying-agentbay-sdk';
7
-
8
- // Simple schema for demonstration
9
- class PageInfo {
10
- title: string = "";
11
- url: string = "";
12
- }
13
-
6
+ import {
7
+ AgentBay,
8
+ CreateSessionParams,
9
+ ActOptions,
10
+ ObserveOptions,
11
+ ExtractOptions,
12
+ } from "wuying-agentbay-sdk";
13
+ import { z } from "zod";
14
+
15
+ const PageInfoSchema = z.object({
16
+ title: z.string().describe("page title"),
17
+ url: z.string().describe("page url"),
18
+ });
14
19
  async function main() {
15
20
  // Get API key from environment variable
16
21
  const apiKey = process.env.AGENTBAY_API_KEY;
@@ -57,7 +62,7 @@ async function main() {
57
62
  // Example: Mock page object (in real usage, you'd get this from Playwright)
58
63
  const mockPage = {
59
64
  url: () => "https://example.com",
60
- title: () => "Example Domain"
65
+ title: () => "Example Domain",
61
66
  };
62
67
 
63
68
  // Example 1: Perform an action
@@ -65,7 +70,7 @@ async function main() {
65
70
  try {
66
71
  const actOptions: ActOptions = {
67
72
  action: "Click the 'More information...' link",
68
- timeoutMS: 5000
73
+ timeout: 5,
69
74
  };
70
75
 
71
76
  const actResult = await session.browser.agent.act(actOptions, mockPage);
@@ -81,10 +86,11 @@ async function main() {
81
86
  console.log("\n--- Example 2: Observing page elements ---");
82
87
  try {
83
88
  const observeOptions: ObserveOptions = {
84
- instruction: "Find all links and buttons on the page"
89
+ instruction: "Find all links and buttons on the page",
85
90
  };
86
91
 
87
- const [observeSuccess, observations] = await session.browser.agent.observe(observeOptions, mockPage);
92
+ const [observeSuccess, observations] =
93
+ await session.browser.agent.observe(observeOptions, mockPage);
88
94
  console.log("Observe success:", observeSuccess);
89
95
  console.log("Number of observations:", observations.length);
90
96
 
@@ -92,7 +98,7 @@ async function main() {
92
98
  console.log(`Observation ${index + 1}:`, {
93
99
  selector: obs.selector,
94
100
  description: obs.description,
95
- method: obs.method
101
+ method: obs.method,
96
102
  });
97
103
  });
98
104
  } catch (error) {
@@ -102,22 +108,24 @@ async function main() {
102
108
  // Example 3: Extract structured data from the page
103
109
  console.log("\n--- Example 3: Extracting structured data ---");
104
110
  try {
105
- const extractOptions: ExtractOptions<PageInfo> = {
111
+ const extractOptions: ExtractOptions<typeof PageInfoSchema> = {
106
112
  instruction: "Extract the page title and URL",
107
- schema: PageInfo,
108
- use_text_extract: false
113
+ schema: PageInfoSchema,
114
+ use_text_extract: false,
109
115
  };
110
116
 
111
- const [extractSuccess, extractedData] = await session.browser.agent.extract(extractOptions, mockPage);
117
+ const [extractSuccess, extracted] = await session.browser.agent.extract(
118
+ extractOptions,
119
+ mockPage
120
+ );
112
121
  console.log("Extract success:", extractSuccess);
113
- console.log("Extracted data count:", extractedData.length);
114
122
 
115
- extractedData.forEach((data: PageInfo, index: number) => {
116
- console.log(`Extracted item ${index + 1}:`, {
117
- title: data.title,
118
- url: data.url
123
+ if (extractSuccess && extracted) {
124
+ console.log("Extracted data:", {
125
+ title: extracted.title,
126
+ url: extracted.url,
119
127
  });
120
- });
128
+ }
121
129
  } catch (error) {
122
130
  console.log("Extraction failed:", error);
123
131
  }
@@ -125,7 +133,6 @@ async function main() {
125
133
  // Clean up
126
134
  console.log("\n--- Cleanup ---");
127
135
  console.log("Browser session completed successfully");
128
-
129
136
  } catch (error) {
130
137
  console.error("Error in main function:", error);
131
138
  }
@@ -66,7 +66,7 @@ async function testBrowserType(
66
66
  console.log(' ✓ Browser initialized successfully');
67
67
 
68
68
  // Get endpoint URL
69
- const endpointUrl = session.browser.getEndpointUrl();
69
+ const endpointUrl = await session.browser.getEndpointUrl();
70
70
  console.log(`\n3. CDP endpoint: ${String(endpointUrl).substring(0, 50)}...`);
71
71
 
72
72
  // Connect Playwright and verify browser
@@ -179,7 +179,7 @@ async function quickExample(): Promise<void> {
179
179
  console.log('✓ Chrome browser initialized successfully');
180
180
 
181
181
  // Get endpoint and use with Playwright
182
- const endpointUrl = session.browser.getEndpointUrl();
182
+ const endpointUrl = await session.browser.getEndpointUrl();
183
183
  const browser = await chromium.connectOverCDP(endpointUrl);
184
184
  const page = await browser.contexts()[0].newPage();
185
185
 
@@ -262,5 +262,4 @@ main()
262
262
  .catch(error => {
263
263
  console.error('Error:', error.message);
264
264
  process.exit(1);
265
- });
266
-
265
+ });
@@ -8,8 +8,30 @@
8
8
  */
9
9
 
10
10
  import { AgentBay, CreateSessionParams } from 'wuying-agentbay-sdk';
11
- import { BrowserOption } from 'wuying-agentbay-sdk';
12
- import { chromium } from 'playwright';
11
+ import { BrowserOption } from 'wuying-agentbay-sdk/dist/browser';
12
+ import { chromium, Page, ConsoleMessage } from 'playwright';
13
+
14
+ // Polling detection function, continuously checks until condition is met or timeout
15
+ async function waitForCondition(
16
+ page: Page,
17
+ conditionCode: string | (() => any),
18
+ timeout: number = 30000,
19
+ interval: number = 200
20
+ ): Promise<boolean> {
21
+ const startTime = Date.now();
22
+ while (Date.now() - startTime < timeout) {
23
+ try {
24
+ const result = await page.evaluate(conditionCode);
25
+ if (result) {
26
+ return true;
27
+ }
28
+ } catch (error) {
29
+ // Ignore errors, continue polling
30
+ }
31
+ await new Promise(resolve => setTimeout(resolve, interval));
32
+ }
33
+ return false;
34
+ }
13
35
 
14
36
  async function main(): Promise<void> {
15
37
  // Get API key from environment variable
@@ -81,42 +103,52 @@ async function main(): Promise<void> {
81
103
  console.log('Clicking next step button...');
82
104
  await page.click('#next_step1');
83
105
 
84
- // Listen for captcha processing messages
85
- let captchaSolvingStarted = false;
86
- let captchaSolvingFinished = false;
87
-
88
106
  // Listen for console messages
89
- const handleConsole = (msg: any) => {
107
+ page.on('console', async (msg: ConsoleMessage) => {
90
108
  console.log(`🔍 Received console message: ${msg.text()}`);
109
+
91
110
  if (msg.text() === 'wuying-captcha-solving-started') {
92
- captchaSolvingStarted = true;
93
- console.log('🎯 Setting captchaSolvingStarted = true');
94
- page.evaluate('window.captchaSolvingStarted = true; window.captchaSolvingFinished = false;');
111
+ console.log('🎯 Captcha processing started');
112
+ await page.evaluate('window.captchaSolvingStarted = true; window.captchaSolvingFinished = false;');
95
113
  } else if (msg.text() === 'wuying-captcha-solving-finished') {
96
- captchaSolvingFinished = true;
97
- console.log('✅ Setting captchaSolvingFinished = true');
98
- page.evaluate('window.captchaSolvingFinished = true;');
114
+ console.log('✅ Captcha processing finished');
115
+ await page.evaluate('window.captchaSolvingFinished = true;');
99
116
  }
100
- };
101
-
102
- page.on('console', handleConsole);
117
+ });
103
118
 
104
119
  // Wait 1 second first, then check if captcha processing has started
105
- try {
106
- await page.waitForTimeout(1000);
107
- await page.waitForFunction('() => window.captchaSolvingStarted === true', { timeout: 1000 });
120
+ await page.waitForTimeout(1000);
121
+
122
+ // Use function form like JavaScript version
123
+ // The function will be serialized and executed in browser context where window exists
124
+ const started = await waitForCondition(
125
+ page,
126
+ () => {
127
+ // @ts-expect-error - window exists in browser context where this function will be executed
128
+ return window.captchaSolvingStarted === true;
129
+ },
130
+ 3000,
131
+ 200
132
+ );
133
+
134
+ if (started) {
108
135
  console.log('🎯 Detected captcha processing started, waiting for completion...');
109
-
110
- // If start is detected, wait for completion (max 30 seconds)
111
- try {
112
- await page.waitForFunction('() => window.captchaSolvingFinished === true', { timeout: 30000 });
136
+ const finished = await waitForCondition(
137
+ page,
138
+ () => {
139
+ // @ts-expect-error - window exists in browser context where this function will be executed
140
+ return window.captchaSolvingFinished === true;
141
+ },
142
+ 30000,
143
+ 200
144
+ );
145
+ if (finished) {
113
146
  console.log('✅ Captcha processing completed');
114
- } catch (error) {
115
- console.log('⚠️ Captcha processing timeout, may still be in progress');
147
+ } else {
148
+ console.log('⚠️ Captcha processing timeout, may still be in progress, continuing execution');
116
149
  }
117
-
118
- } catch (error) {
119
- console.log('⏭️ No captcha processing detected, continuing execution');
150
+ } else {
151
+ console.log('⏭️ No captcha processing detected, may not need to handle captcha');
120
152
  }
121
153
 
122
154
  await page.waitForTimeout(2000);
@@ -6,35 +6,29 @@
6
6
  * - Utilize PageUseAgent to run 2048 game
7
7
  */
8
8
 
9
- import { AgentBay, CreateSessionParams } from '../../../../typescript/src/agent-bay';
10
- import { BrowserOption, ExtractOptions, ActOptions } from '../../../../typescript/src/browser';
11
- // import { chromium } from 'playwright';
12
-
13
- class GameState {
14
- score?: number;
15
- highestTile?: number;
16
- grid: number[][] = [];
17
- }
9
+ import { AgentBay, CreateSessionParams } from "wuying-agentbay-sdk";
10
+ import { BrowserOption, ExtractOptions } from "wuying-agentbay-sdk";
11
+ import { z } from "zod";
18
12
 
19
- class MoveAnalysis {
20
- move?: number;
21
- confidence: number = 0;
22
- reasoning: string = "";
23
- }
13
+ const GameStateSchema = z.object({
14
+ score: z.number().optional(),
15
+ highestTile: z.number().optional(),
16
+ grid: z.array(z.array(z.number())),
17
+ });
24
18
 
25
19
  function transposeGrid(grid: number[][]): number[][] {
26
20
  if (!grid || grid.length === 0) {
27
21
  return [];
28
22
  }
29
- return grid[0].map((_, colIndex) => grid.map(row => row[colIndex]));
23
+ return grid[0].map((_, colIndex) => grid.map((row) => row[colIndex]));
30
24
  }
31
25
 
32
26
  function formatGridForLlmInstruction(gridData: number[][]): string {
33
27
  const formattedRows: string[] = [];
34
28
  for (let i = 0; i < gridData.length; i++) {
35
- formattedRows.push(`row${i + 1}: [${gridData[i].join(', ')}]`);
29
+ formattedRows.push(`row${i + 1}: [${gridData[i].join(", ")}]`);
36
30
  }
37
- return formattedRows.join('\n');
31
+ return formattedRows.join("\n");
38
32
  }
39
33
 
40
34
  async function main() {
@@ -66,17 +60,17 @@ async function main() {
66
60
  console.log("endpoint_url =", endpointUrl);
67
61
 
68
62
  // Note: Install playwright with: npm install playwright
69
- const { chromium } = require('playwright');
63
+ const { chromium } = require("playwright");
70
64
  const browser = await chromium.connectOverCDP(endpointUrl);
71
65
  let page = null;
72
66
 
73
67
  try {
74
- const context = browser.contexts()[0]
68
+ const context = browser.contexts()[0];
75
69
  page = await context.newPage();
76
70
  console.log("🌐 Navigating to 2048...");
77
71
  await page.goto("https://ovolve.github.io/2048-AI/", {
78
72
  waitUntil: "domcontentloaded",
79
- timeout: 180000
73
+ timeout: 180000,
80
74
  });
81
75
  console.log("🌐 Navigated to 2048 done");
82
76
  await page.waitForSelector(".grid-container", { timeout: 10000 });
@@ -87,11 +81,11 @@ async function main() {
87
81
 
88
82
  while (true) {
89
83
  console.log("🔄 Game loop iteration...");
90
- await new Promise(resolve => setTimeout(resolve, 300));
84
+ await new Promise((resolve) => setTimeout(resolve, 300));
91
85
 
92
86
  // Get current game state
93
87
  console.log("📊 Extracting game state...");
94
- const gameStateOptions: ExtractOptions<GameState> = {
88
+ const gameStateOptions: ExtractOptions<typeof GameStateSchema> = {
95
89
  instruction: `
96
90
  Extract the current game state:
97
91
  1. Score from the score counter
@@ -105,20 +99,25 @@ async function main() {
105
99
  For instance, if the only tiles present are the two above, the grid should be:[[0, 0, 0, 2], [0, 0, 0, 0], [0, 0, 0, 0], [2, 0, 0, 0]]
106
100
  3. Highest tile value present
107
101
  `,
108
- schema: GameState,
109
- use_text_extract: false
110
-
102
+ schema: GameStateSchema,
103
+ use_text_extract: false,
111
104
  };
112
105
 
113
- const [success, gameStates] = await session.browser.agent.extract(gameStateOptions, page);
114
- if (success && gameStates.length > 0) {
115
- const gameState = gameStates[0];
106
+ const [success, gameState] = await session.browser.agent.extract(
107
+ gameStateOptions,
108
+ page
109
+ );
110
+ if (success && gameState) {
116
111
  const transposedGrid = transposeGrid(gameState.grid);
117
112
  console.log(`transposed grid: ${JSON.stringify(transposedGrid)}`);
118
113
  console.log(`gameState: ${JSON.stringify(gameState)}`);
119
114
  const gridInstruction = formatGridForLlmInstruction(transposedGrid);
120
115
 
121
- if (lastTransposedGrid !== null && JSON.stringify(transposedGrid) === JSON.stringify(lastTransposedGrid)) {
116
+ if (
117
+ lastTransposedGrid !== null &&
118
+ JSON.stringify(transposedGrid) ===
119
+ JSON.stringify(lastTransposedGrid)
120
+ ) {
122
121
  transposedGridNotChangedTimes += 1;
123
122
  } else {
124
123
  transposedGridNotChangedTimes = 0;
@@ -144,22 +143,33 @@ async function main() {
144
143
 
145
144
  if (transposedGridNotChangedTimes >= 1) {
146
145
  instructionStr += `
147
- 9. Do not generate move value in ${JSON.stringify(lastMoveHistory)}
148
- 10. If last move value ${lastMoveHistory[lastMoveHistory.length - 1]} moves up or down, then generate move value with left or right direction, otherwise generate move value with up or down direction
146
+ 9. Do not generate move value in ${JSON.stringify(
147
+ lastMoveHistory
148
+ )}
149
+ 10. If last move value ${
150
+ lastMoveHistory[lastMoveHistory.length - 1]
151
+ } moves up or down, then generate move value with left or right direction, otherwise generate move value with up or down direction
149
152
  `;
150
153
  }
151
154
 
152
- const nextMoveOptions: ExtractOptions<MoveAnalysis> = {
155
+ const nextMoveOptions: ExtractOptions<any> = {
153
156
  instruction: instructionStr,
154
- schema: MoveAnalysis,
155
- use_text_extract: false
157
+ schema: z.object({
158
+ move: z.number().optional(),
159
+ confidence: z.number().optional(),
160
+ reasoning: z.string().optional(),
161
+ }),
162
+ use_text_extract: false,
156
163
  };
157
164
 
158
- const [moveSuccess, nextMove] = await session.browser.agent.extract(nextMoveOptions, page);
165
+ const [moveSuccess, nextMove] = await session.browser.agent.extract(
166
+ nextMoveOptions,
167
+ page
168
+ );
159
169
  let selectedMove = 4; // Default to no move
160
170
 
161
- if (moveSuccess && nextMove.length > 0) {
162
- selectedMove = nextMove[0].move ?? 4;
171
+ if (moveSuccess && nextMove && typeof nextMove === "object") {
172
+ selectedMove = (nextMove as any).move ?? 4;
163
173
  } else {
164
174
  console.log("❌ Failed to extract next move, retry observing");
165
175
  continue;