wuying-agentbay-sdk 0.10.2 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/dist/chunk-BVWUCG4J.mjs +3402 -0
  2. package/dist/chunk-BVWUCG4J.mjs.map +1 -0
  3. package/dist/chunk-SL5GCAQE.cjs +3393 -0
  4. package/dist/chunk-SL5GCAQE.cjs.map +1 -0
  5. package/dist/index.cjs +4623 -3978
  6. package/dist/index.cjs.map +1 -1
  7. package/dist/index.d.mts +3934 -1964
  8. package/dist/index.d.ts +3934 -1964
  9. package/dist/index.mjs +4601 -3956
  10. package/dist/index.mjs.map +1 -1
  11. package/dist/key-normalizer-AF7APGQZ.cjs +136 -0
  12. package/dist/key-normalizer-AF7APGQZ.cjs.map +1 -0
  13. package/dist/key-normalizer-QDBRLFHF.mjs +145 -0
  14. package/dist/key-normalizer-QDBRLFHF.mjs.map +1 -0
  15. package/dist/model-CNCGFWJH.cjs +200 -0
  16. package/dist/model-CNCGFWJH.cjs.map +1 -0
  17. package/dist/model-LGWQJWKQ.mjs +209 -0
  18. package/docs/api/README.md +26 -0
  19. package/docs/api/browser-use/browser.md +177 -0
  20. package/docs/api/browser-use/extension.md +284 -0
  21. package/docs/api/codespace/code.md +77 -0
  22. package/docs/api/common-features/advanced/agent.md +84 -0
  23. package/docs/api/common-features/advanced/oss.md +221 -0
  24. package/docs/api/common-features/basics/agentbay.md +277 -0
  25. package/docs/api/common-features/basics/command.md +83 -0
  26. package/docs/api/common-features/basics/context-manager.md +149 -0
  27. package/docs/api/common-features/basics/context-sync.md +51 -0
  28. package/docs/api/common-features/basics/context.md +348 -0
  29. package/docs/api/common-features/basics/filesystem.md +499 -0
  30. package/docs/api/common-features/basics/logging.md +77 -0
  31. package/docs/api/common-features/basics/session.md +542 -0
  32. package/docs/api/computer-use/computer.md +786 -0
  33. package/docs/api/mobile-use/mobile.md +395 -0
  34. package/docs/examples/README.md +332 -0
  35. package/docs/examples/basic-usage.ts +86 -0
  36. package/docs/examples/browser-use/browser/README.md +356 -0
  37. package/docs/examples/browser-use/browser/basic-usage.ts +136 -0
  38. package/docs/examples/browser-use/browser/browser-command-args.ts +117 -0
  39. package/docs/examples/browser-use/browser/browser-context-cookie-persistence.ts +348 -0
  40. package/docs/examples/browser-use/browser/browser-fingerprint-basic-usage.ts +121 -0
  41. package/docs/examples/browser-use/browser/browser-fingerprint-construct.ts +114 -0
  42. package/docs/examples/browser-use/browser/browser-fingerprint-local-sync.ts +98 -0
  43. package/docs/examples/browser-use/browser/browser-fingerprint-persistence.ts +242 -0
  44. package/docs/examples/browser-use/browser/browser-proxies.ts +149 -0
  45. package/docs/examples/browser-use/browser/browser-type-example.ts +266 -0
  46. package/docs/examples/browser-use/browser/browser-viewport.ts +129 -0
  47. package/docs/examples/browser-use/browser/call_for_user_jd.ts +184 -0
  48. package/docs/examples/browser-use/browser/captcha_tongcheng.ts +151 -0
  49. package/docs/examples/browser-use/browser/run-2048.ts +209 -0
  50. package/docs/examples/browser-use/browser/run-sudoku.ts +150 -0
  51. package/docs/examples/browser-use/browser/screenshot-example.ts +132 -0
  52. package/docs/examples/browser-use/extension-example/README.md +252 -0
  53. package/docs/examples/browser-use/extension-example/extension-example.ts +381 -0
  54. package/docs/examples/codespace/automation/automation-example.ts +322 -0
  55. package/docs/examples/common-features/advanced/agent-module-example/README.md +40 -0
  56. package/docs/examples/common-features/advanced/agent-module-example.ts +66 -0
  57. package/docs/examples/common-features/advanced/archive-upload-mode-example/README.md +212 -0
  58. package/docs/examples/common-features/advanced/archive-upload-mode-example/archive-upload-mode-example.ts +213 -0
  59. package/docs/examples/common-features/advanced/vpc-session-example/README.md +47 -0
  60. package/docs/examples/common-features/advanced/vpc-session-example.ts +106 -0
  61. package/docs/examples/common-features/basics/archive-upload-mode-example/README.md +236 -0
  62. package/docs/examples/common-features/basics/archive-upload-mode-example/main.ts +236 -0
  63. package/docs/examples/common-features/basics/command-example/README.md +47 -0
  64. package/docs/examples/common-features/basics/command-example/command-example.ts +153 -0
  65. package/docs/examples/common-features/basics/context-management/README.md +55 -0
  66. package/docs/examples/common-features/basics/context-management/context-management.ts +140 -0
  67. package/docs/examples/common-features/basics/data-persistence/README.md +129 -0
  68. package/docs/examples/common-features/basics/data-persistence/context-sync-demo.md +144 -0
  69. package/docs/examples/common-features/basics/data-persistence/context-sync-demo.ts +275 -0
  70. package/docs/examples/common-features/basics/data-persistence/data-persistence.ts +259 -0
  71. package/docs/examples/common-features/basics/data-persistence/recycle-policy-example.ts +294 -0
  72. package/docs/examples/common-features/basics/filesystem-example/README.md +57 -0
  73. package/docs/examples/common-features/basics/filesystem-example/filesystem-example.ts +164 -0
  74. package/docs/examples/common-features/basics/filesystem-example/filesystem-filetransfer-example.ts +153 -0
  75. package/docs/examples/common-features/basics/filesystem-example/watch-directory-example.ts +168 -0
  76. package/docs/examples/common-features/basics/get/README.md +136 -0
  77. package/docs/examples/common-features/basics/get/main.ts +79 -0
  78. package/docs/examples/common-features/basics/list_sessions/README.md +54 -0
  79. package/docs/examples/common-features/basics/list_sessions/main.ts +258 -0
  80. package/docs/examples/common-features/basics/mcp_tool_direct_call/README.md +142 -0
  81. package/docs/examples/common-features/basics/mcp_tool_direct_call/main.ts +135 -0
  82. package/docs/examples/common-features/basics/session-creation/README.md +28 -0
  83. package/docs/examples/common-features/basics/session-creation/session-creation.ts +295 -0
  84. package/docs/examples/common-features/basics/session-pause-resume/README.md +53 -0
  85. package/docs/examples/common-features/basics/session-pause-resume/session-pause-resume.ts +237 -0
  86. package/docs/examples/mobile-use/mobile-get-adb-url/README.md +92 -0
  87. package/docs/examples/mobile-use/mobile-get-adb-url/index.ts +80 -0
  88. package/docs/examples/mobile-use/mobile-get-adb-url/package-lock.json +279 -0
  89. package/docs/examples/mobile-use/mobile-get-adb-url/package.json +18 -0
  90. package/docs/examples/mobile-use/mobile-simulate-basic-usage.ts +202 -0
  91. package/docs/examples/mobile-use/mobile-simulate-with-ctx.ts +170 -0
  92. package/package.json +19 -12
  93. package/dist/application-KZWXH46T.mjs +0 -17
  94. package/dist/application-LMA7KSRH.cjs +0 -8
  95. package/dist/application-LMA7KSRH.cjs.map +0 -1
  96. package/dist/chunk-BQNGKBQF.mjs +0 -386
  97. package/dist/chunk-BQNGKBQF.mjs.map +0 -1
  98. package/dist/chunk-IOVGAAJL.cjs +0 -558
  99. package/dist/chunk-IOVGAAJL.cjs.map +0 -1
  100. package/dist/chunk-UF2TC2R4.mjs +0 -567
  101. package/dist/chunk-UF2TC2R4.mjs.map +0 -1
  102. package/dist/chunk-X6MS7Z5L.cjs +0 -377
  103. package/dist/chunk-X6MS7Z5L.cjs.map +0 -1
  104. package/dist/window-DH37ZDD5.mjs +0 -17
  105. package/dist/window-DH37ZDD5.mjs.map +0 -1
  106. package/dist/window-U7N3H735.cjs +0 -8
  107. package/dist/window-U7N3H735.cjs.map +0 -1
  108. /package/dist/{application-KZWXH46T.mjs.map → model-LGWQJWKQ.mjs.map} +0 -0
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Example demonstrating Browser Viewport configuration with AgentBay SDK.
3
+ *
4
+ * This example shows how to initialize the browser with custom viewport and user-agent settings:
5
+ * - Create AIBrowser session with custom viewport and user-agent.
6
+ * - Use playwright to connect to AIBrowser instance through CDP protocol
7
+ * - Verify custom viewport and screen size
8
+ * - Verify custom user agent
9
+ */
10
+
11
+ // @ts-nocheck
12
+ import { AgentBay, CreateSessionParams } from 'wuying-agentbay-sdk';
13
+ import { BrowserOption, BrowserViewport } from 'wuying-agentbay-sdk';
14
+ import { chromium } from 'playwright';
15
+
16
+ interface ScreenInfo {
17
+ outerWidth: number;
18
+ outerHeight: number;
19
+ innerWidth: number;
20
+ innerHeight: number;
21
+ width: number;
22
+ height: number;
23
+ availWidth: number;
24
+ availHeight: number;
25
+ colorDepth: number;
26
+ pixelDepth: number;
27
+ }
28
+
29
+ interface WindowInfo {
30
+ screen: ScreenInfo | null;
31
+ }
32
+
33
+ interface UserAgentResponse {
34
+ 'user-agent': string;
35
+ }
36
+
37
+ async function main(): Promise<void> {
38
+ // Get API key from environment variable
39
+ const apiKey = process.env.AGENTBAY_API_KEY;
40
+ if (!apiKey) {
41
+ console.log('Error: AGENTBAY_API_KEY environment variable not set');
42
+ return;
43
+ }
44
+
45
+ try {
46
+ // Initialize AgentBay client
47
+ console.log('Initializing AgentBay client...');
48
+ const agentBay = new AgentBay({ apiKey });
49
+
50
+ // Create a session
51
+ console.log('Creating a new session...');
52
+ const params: CreateSessionParams = {
53
+ imageId: 'browser_latest',
54
+ };
55
+ const sessionResult = await agentBay.create(params);
56
+
57
+ if (!sessionResult.success) {
58
+ console.log('Failed to create session');
59
+ return;
60
+ }
61
+
62
+ const session = sessionResult.session;
63
+ console.log(`Session created with ID: ${session.sessionId}`);
64
+
65
+ // Create browser option with viewport and user-agent
66
+ const browserOption: BrowserOption = {
67
+ userAgent: 'Mozilla/5.0 (Mocked Windows Desktop)',
68
+ viewport: { width: 1920, height: 1080 },
69
+ // screen: { width: 1920, height: 1080 },
70
+ };
71
+
72
+ const initialized = await session.browser.initializeAsync(browserOption);
73
+ if (initialized) {
74
+ const endpointUrl = await session.browser.getEndpointUrl();
75
+ console.log('endpoint_url =', endpointUrl);
76
+
77
+ const browser = await chromium.connectOverCDP(endpointUrl);
78
+ const context = browser.contexts()[0];
79
+ const page = await context.newPage();
80
+
81
+ try {
82
+ // Check custom viewport and screen size
83
+ console.log('\n--- Check window Properties ---');
84
+ const windowInfo = await page.evaluate((): WindowInfo => {
85
+ const screenInfo = window.screen ? {
86
+ outerWidth: window.outerWidth,
87
+ outerHeight: window.outerHeight,
88
+ innerWidth: window.innerWidth,
89
+ innerHeight: window.innerHeight,
90
+ width: window.screen.width,
91
+ height: window.screen.height,
92
+ availWidth: window.screen.availWidth,
93
+ availHeight: window.screen.availHeight,
94
+ colorDepth: window.screen.colorDepth,
95
+ pixelDepth: window.screen.pixelDepth
96
+ } : null;
97
+
98
+ return {
99
+ screen: screenInfo
100
+ };
101
+ });
102
+ console.log('Screen Info:', windowInfo.screen);
103
+
104
+ // Check custom user agent
105
+ console.log('\n--- Check User Agent ---');
106
+ await page.goto('https://httpbin.org/user-agent');
107
+
108
+ const response = await page.evaluate((): UserAgentResponse => {
109
+ return JSON.parse(document.body.textContent || '{}');
110
+ });
111
+ const userAgent = response['user-agent'] || '';
112
+ console.log('User Agent:', userAgent);
113
+
114
+ await page.waitForTimeout(3000);
115
+ } finally {
116
+ await browser.close();
117
+ }
118
+ }
119
+
120
+ // Clean up session
121
+ await agentBay.delete(session);
122
+ } catch (error) {
123
+ console.error('Error:', error);
124
+ }
125
+ }
126
+
127
+ if (require.main === module) {
128
+ main().catch(console.error);
129
+ }
@@ -0,0 +1,184 @@
1
+ /**
2
+ * Example demonstrating wuying-call-for-user message handling with AgentBay SDK.
3
+ *
4
+ * This example shows how to handle the 'wuying-call-for-user' message that can be received
5
+ * during browser automation sessions. The wuying-call-for-user message is triggered when:
6
+ *
7
+ * 1. The browser encounters a situation that requires human intervention
8
+ * 2. Authentication challenges that cannot be automatically resolved
9
+ * 3. Complex verification processes that need user input
10
+ * 4. Security measures that require manual verification
11
+ *
12
+ * When you receive a 'wuying-call-for-user' message, the recommended handling flow is:
13
+ *
14
+ * 1. Parse the console message to identify the message type
15
+ * 2. When 'wuying-call-for-user' is detected, open the session resource URL in a browser
16
+ * 3. Allow the user to interact with the browser to complete the required action
17
+ * 4. Wait for the user to complete the interaction (typically 20-30 seconds)
18
+ * 5. Continue with the automation flow
19
+ *
20
+ * This example demonstrates:
21
+ * - Creating an AgentBay session with browser capabilities
22
+ * - Connecting to the browser via CDP protocol using Playwright
23
+ * - Setting up console message listeners to detect wuying-call-for-user messages
24
+ * - Opening the session resource URL for user interaction
25
+ * - Implementing a wait mechanism for user completion
26
+ * - Taking screenshots for debugging purposes
27
+ */
28
+
29
+ import { AgentBay, CreateSessionParams } from 'wuying-agentbay-sdk';
30
+ import { BrowserOption } from 'wuying-agentbay-sdk';
31
+ import { chromium } from 'playwright';
32
+ import { exec } from 'child_process';
33
+ import * as os from 'os';
34
+
35
+ async function main(): Promise<void> {
36
+ /**
37
+ * Main function demonstrating wuying-call-for-user message handling.
38
+ * This function sets up a browser session and navigates to JD.com to trigger
39
+ * scenarios that might require user intervention.
40
+ */
41
+ // Get API key from environment variable
42
+ const apiKey = process.env.AGENTBAY_API_KEY;
43
+ if (!apiKey) {
44
+ console.log('Error: AGENTBAY_API_KEY environment variable not set');
45
+ return;
46
+ }
47
+
48
+ try {
49
+ // Initialize AgentBay client
50
+ console.log('Initializing AgentBay client...');
51
+ const agentBay = new AgentBay({ apiKey });
52
+
53
+ // Create a session
54
+ console.log('Creating a new session...');
55
+ const params: CreateSessionParams = {
56
+ imageId: 'browser_latest', // Specify the image ID
57
+ };
58
+ const sessionResult = await agentBay.create(params);
59
+
60
+ if (!sessionResult.success) {
61
+ console.log('Failed to create session');
62
+ return;
63
+ }
64
+
65
+ const session = sessionResult.session;
66
+ console.log(`Session created with ID: ${session.sessionId}`);
67
+
68
+ const browserOption: BrowserOption = {};
69
+ const initialized = await session.browser.initializeAsync(browserOption);
70
+ if (!initialized) {
71
+ console.log('Failed to initialize browser');
72
+ return;
73
+ }
74
+
75
+ console.log('Browser initialized successfully');
76
+ const endpointUrl = await session.browser.getEndpointUrl();
77
+ console.log('endpoint_url =', endpointUrl);
78
+
79
+ const result = await session.info();
80
+ const info = result.data;
81
+ console.log(`session resource url is ${info.resourceUrl}`);
82
+
83
+ // Connect to browser using Playwright
84
+ const browser = await chromium.connectOverCDP(endpointUrl);
85
+ const context = browser.contexts()[0]
86
+ const page = await context.newPage();
87
+ console.log('🌐 Navigating to jd site...');
88
+ const url = 'https://www.jd.com/';
89
+ await page.goto(url);
90
+
91
+ // Listen for console messages
92
+ const handleConsole = async (msg: any) => {
93
+ console.log(`🔍 Received console message: ${msg.text()}`);
94
+
95
+ // Parse JSON message
96
+ let messageType: string;
97
+ try {
98
+ const messageData = JSON.parse(msg.text());
99
+ messageType = messageData.type || '';
100
+ console.log(`📋 Parsed message type: ${messageType}`);
101
+ } catch (error) {
102
+ // If not JSON, treat as plain text
103
+ messageType = msg.text();
104
+ console.log(`📋 Plain text message: ${messageType}`);
105
+ }
106
+
107
+ if (messageType === 'wuying-call-for-user') {
108
+ console.log('📞 Received wuying-call-for-user message');
109
+ console.log(`session resource url is ${info.resourceUrl}`);
110
+ // You can skip this message or use chrome to open url for user handle
111
+ // Following sample code shows how to use chrome open url
112
+ console.log('🌐 Opening browser with session resource URL...');
113
+
114
+ // Open URL in default browser (cross-platform)
115
+ const platformName = os.platform();
116
+
117
+ let command: string;
118
+ if (platformName === 'win32') {
119
+ // Windows: use double quotes to handle special characters
120
+ command = `start "" "${info.resourceUrl}"`;
121
+ } else if (platformName === 'darwin') {
122
+ // macOS: use single quotes to handle special characters
123
+ command = `open '${info.resourceUrl}'`;
124
+ } else {
125
+ // Linux: use single quotes to handle special characters
126
+ command = `xdg-open '${info.resourceUrl}'`;
127
+ }
128
+
129
+ exec(command, (error) => {
130
+ if (error) {
131
+ console.log('Failed to open browser:', error);
132
+ } else {
133
+ console.log('Browser opened successfully');
134
+ }
135
+ });
136
+
137
+ // wait user to interact with the browser
138
+ console.log('⏳ Starting 20 second wait for user interaction...');
139
+ // Use setTimeout to block for 20 seconds for user interaction, also you can check user input
140
+ await new Promise<void>((resolve) => {
141
+ setTimeout(() => {
142
+ console.log('⏳ User interaction wait completed');
143
+ resolve();
144
+ }, 20000);
145
+ });
146
+ }
147
+ };
148
+
149
+ page.on('console', handleConsole);
150
+
151
+ await page.waitForTimeout(5000);
152
+ console.log('click login');
153
+ await page.click('.link-login');
154
+ await page.waitForTimeout(25000);
155
+
156
+ console.log('Test completed');
157
+
158
+ // Keep browser open for a while to observe results
159
+ await page.waitForTimeout(5000);
160
+
161
+ // Take screenshot and print base64, can be pasted directly into Chrome address bar
162
+ try {
163
+ const screenshotBytes = await page.screenshot({ fullPage: false });
164
+ const b64 = screenshotBytes.toString('base64');
165
+ console.log('page_screenshot_base64 = data:image/png;base64,', b64);
166
+ } catch (error) {
167
+ console.log('screenshot failed:', error);
168
+ }
169
+
170
+ await browser.close();
171
+
172
+ // Clean up session
173
+ await agentBay.delete(session);
174
+ console.log('Session cleaned up successfully');
175
+
176
+ } catch (error) {
177
+ console.error('Error in main function:', error);
178
+ }
179
+ }
180
+
181
+ // Run the main function if this file is executed directly
182
+ if (typeof require !== 'undefined' && require.main === module) {
183
+ main().catch(console.error);
184
+ }
@@ -0,0 +1,151 @@
1
+ /**
2
+ * Example demonstrating AIBrowser capabilities with AgentBay SDK.
3
+ * This example shows how to use AIBrowser to solve captcha automatically, including:
4
+ * - Create AIBrowser session
5
+ * - Use playwright to connect to AIBrowser instance through CDP protocol
6
+ * - Set solve_captchas to be True and goto tongcheng website
7
+ * - We will encounter a captcha and we will solve it automatically.
8
+ */
9
+
10
+ import { AgentBay, CreateSessionParams } from 'wuying-agentbay-sdk';
11
+ import { BrowserOption } from 'wuying-agentbay-sdk';
12
+ import { chromium } from 'playwright';
13
+
14
+ async function main(): Promise<void> {
15
+ // Get API key from environment variable
16
+ const apiKey = process.env.AGENTBAY_API_KEY;
17
+ if (!apiKey) {
18
+ console.log('Error: AGENTBAY_API_KEY environment variable not set');
19
+ return;
20
+ }
21
+
22
+ try {
23
+ // Initialize AgentBay client
24
+ console.log('Initializing AgentBay client...');
25
+ const agentBay = new AgentBay({ apiKey });
26
+
27
+ // Create a session
28
+ console.log('Creating a new session...');
29
+ const params: CreateSessionParams = {
30
+ imageId: 'browser_latest', // Specify the image ID
31
+ };
32
+ const sessionResult = await agentBay.create(params);
33
+
34
+ if (!sessionResult.success) {
35
+ console.log('Failed to create session');
36
+ return;
37
+ }
38
+
39
+ const session = sessionResult.session;
40
+ console.log(`Session created with ID: ${session.sessionId}`);
41
+
42
+ const browserOption: BrowserOption = {
43
+ useStealth: true,
44
+ solveCaptchas: true,
45
+ };
46
+
47
+ const initialized = await session.browser.initializeAsync(browserOption);
48
+ if (!initialized) {
49
+ console.log('Failed to initialize browser');
50
+ return;
51
+ }
52
+
53
+ console.log('Browser initialized successfully');
54
+ const endpointUrl = await session.browser.getEndpointUrl();
55
+ console.log('endpoint_url =', endpointUrl);
56
+
57
+ // Connect to browser using Playwright
58
+ const browser = await chromium.connectOverCDP(endpointUrl);
59
+ const context = browser.contexts()[0]
60
+ const page = await context.newPage();
61
+ console.log('🌐 Navigating to tongcheng site...');
62
+ const url = 'https://passport.ly.com/Passport/GetPassword';
63
+ await page.goto(url, { waitUntil: 'domcontentloaded' });
64
+
65
+ // Use selector to locate input field
66
+ const inputElement = await page.waitForSelector('#name_in', { timeout: 10000 });
67
+ console.log('Found login name input field: #name_in');
68
+
69
+ // Clear input field and enter phone number
70
+ const phoneNumber = '15011556760';
71
+ console.log(`Entering phone number: ${phoneNumber}`);
72
+
73
+ await inputElement.click();
74
+ await inputElement.fill(''); // Clear input field
75
+ await inputElement.type(phoneNumber);
76
+ console.log('Waiting for captcha');
77
+
78
+ // Wait a moment to ensure input is complete
79
+ await page.waitForTimeout(1000);
80
+
81
+ console.log('Clicking next step button...');
82
+ await page.click('#next_step1');
83
+
84
+ // Listen for captcha processing messages
85
+ let captchaSolvingStarted = false;
86
+ let captchaSolvingFinished = false;
87
+
88
+ // Listen for console messages
89
+ const handleConsole = (msg: any) => {
90
+ console.log(`🔍 Received console message: ${msg.text()}`);
91
+ if (msg.text() === 'wuying-captcha-solving-started') {
92
+ captchaSolvingStarted = true;
93
+ console.log('🎯 Setting captchaSolvingStarted = true');
94
+ page.evaluate('window.captchaSolvingStarted = true; window.captchaSolvingFinished = false;');
95
+ } else if (msg.text() === 'wuying-captcha-solving-finished') {
96
+ captchaSolvingFinished = true;
97
+ console.log('✅ Setting captchaSolvingFinished = true');
98
+ page.evaluate('window.captchaSolvingFinished = true;');
99
+ }
100
+ };
101
+
102
+ page.on('console', handleConsole);
103
+
104
+ // Wait 1 second first, then check if captcha processing has started
105
+ try {
106
+ await page.waitForTimeout(1000);
107
+ await page.waitForFunction('() => window.captchaSolvingStarted === true', { timeout: 1000 });
108
+ console.log('🎯 Detected captcha processing started, waiting for completion...');
109
+
110
+ // If start is detected, wait for completion (max 30 seconds)
111
+ try {
112
+ await page.waitForFunction('() => window.captchaSolvingFinished === true', { timeout: 30000 });
113
+ console.log('✅ Captcha processing completed');
114
+ } catch (error) {
115
+ console.log('⚠️ Captcha processing timeout, may still be in progress');
116
+ }
117
+
118
+ } catch (error) {
119
+ console.log('⏭️ No captcha processing detected, continuing execution');
120
+ }
121
+
122
+ await page.waitForTimeout(2000);
123
+ await page.type('#step2_yzm', '1234');
124
+ console.log('Test completed');
125
+
126
+ // Keep browser open for a while to observe results
127
+ await page.waitForTimeout(5000);
128
+
129
+ // Take screenshot and print base64, can be pasted directly into Chrome address bar
130
+ try {
131
+ const screenshotBytes = await page.screenshot({ fullPage: false });
132
+ const b64 = screenshotBytes.toString('base64');
133
+ console.log('page_screenshot_base64 = data:image/png;base64,', b64);
134
+ } catch (error) {
135
+ console.log('screenshot failed:', error);
136
+ }
137
+
138
+ await browser.close();
139
+
140
+ // Clean up session
141
+ await agentBay.delete(session);
142
+ console.log('Session cleaned up successfully');
143
+
144
+ } catch (error) {
145
+ console.error('Error in main function:', error);
146
+ }
147
+ }
148
+
149
+ if (require.main === module) {
150
+ main().catch(console.error);
151
+ }
@@ -0,0 +1,209 @@
1
+ /**
2
+ * Example demonstrating AIBrowser capabilities with AgentBay SDK.
3
+ * This example shows how to use PageUseAgent to run 2048 game, including:
4
+ * - Create AIBrowser session
5
+ * - Use playwright to connect to AIBrowser instance through CDP protocol
6
+ * - Utilize PageUseAgent to run 2048 game
7
+ */
8
+
9
+ import { AgentBay, CreateSessionParams } from '../../../../typescript/src/agent-bay';
10
+ import { BrowserOption, ExtractOptions, ActOptions } from '../../../../typescript/src/browser';
11
+ // import { chromium } from 'playwright';
12
+
13
+ class GameState {
14
+ score?: number;
15
+ highestTile?: number;
16
+ grid: number[][] = [];
17
+ }
18
+
19
+ class MoveAnalysis {
20
+ move?: number;
21
+ confidence: number = 0;
22
+ reasoning: string = "";
23
+ }
24
+
25
+ function transposeGrid(grid: number[][]): number[][] {
26
+ if (!grid || grid.length === 0) {
27
+ return [];
28
+ }
29
+ return grid[0].map((_, colIndex) => grid.map(row => row[colIndex]));
30
+ }
31
+
32
+ function formatGridForLlmInstruction(gridData: number[][]): string {
33
+ const formattedRows: string[] = [];
34
+ for (let i = 0; i < gridData.length; i++) {
35
+ formattedRows.push(`row${i + 1}: [${gridData[i].join(', ')}]`);
36
+ }
37
+ return formattedRows.join('\n');
38
+ }
39
+
40
+ async function main() {
41
+ // Get API key from environment variable
42
+ const apiKey = process.env.AGENTBAY_API_KEY;
43
+ if (!apiKey) {
44
+ console.log("Error: AGENTBAY_API_KEY environment variable not set");
45
+ return;
46
+ }
47
+
48
+ // Initialize AgentBay client
49
+ console.log("Initializing AgentBay client...");
50
+ const agentBay = new AgentBay({ apiKey });
51
+
52
+ // Create a session
53
+ console.log("Creating a new session...");
54
+ const params: CreateSessionParams = {
55
+ imageId: "browser_latest",
56
+ };
57
+ const sessionResult = await agentBay.create(params);
58
+
59
+ if (sessionResult.success) {
60
+ const session = sessionResult.session;
61
+ console.log(`Session created with ID: ${session.sessionId}`);
62
+
63
+ if (await session.browser.initializeAsync({} as BrowserOption)) {
64
+ console.log("Browser initialized successfully");
65
+ const endpointUrl = await session.browser.getEndpointUrl();
66
+ console.log("endpoint_url =", endpointUrl);
67
+
68
+ // Note: Install playwright with: npm install playwright
69
+ const { chromium } = require('playwright');
70
+ const browser = await chromium.connectOverCDP(endpointUrl);
71
+ let page = null;
72
+
73
+ try {
74
+ const context = browser.contexts()[0]
75
+ page = await context.newPage();
76
+ console.log("🌐 Navigating to 2048...");
77
+ await page.goto("https://ovolve.github.io/2048-AI/", {
78
+ waitUntil: "domcontentloaded",
79
+ timeout: 180000
80
+ });
81
+ console.log("🌐 Navigated to 2048 done");
82
+ await page.waitForSelector(".grid-container", { timeout: 10000 });
83
+
84
+ let lastTransposedGrid: number[][] | null = null;
85
+ let transposedGridNotChangedTimes = 0;
86
+ let lastMoveHistory: number[] = [];
87
+
88
+ while (true) {
89
+ console.log("🔄 Game loop iteration...");
90
+ await new Promise(resolve => setTimeout(resolve, 300));
91
+
92
+ // Get current game state
93
+ console.log("📊 Extracting game state...");
94
+ const gameStateOptions: ExtractOptions<GameState> = {
95
+ instruction: `
96
+ Extract the current game state:
97
+ 1. Score from the score counter
98
+ 2. All tile values and their positions in the 4x4 grid must be extracted.
99
+ Each tile's value and position can be obtained from the tile-position-x-y class, where x (1 to 4) is the column and y (1 to 4) is the row.
100
+ For example, tile-position-4-1 means the tile is in column 4, row 1.
101
+ The value of the tile is given by the number in the tile's class.
102
+ For example, <div class='tile tile-2 tile-position-1-4 tile-new'>2</div> means a tile with value 2 at column 1, row 4;
103
+ and <div class='tile tile-2 tile-position-4-1 tile-new'>2</div> means a tile with value 2 at column 4, row 1.
104
+ Empty spaces should be represented as 0 in the grid.
105
+ For instance, if the only tiles present are the two above, the grid should be:[[0, 0, 0, 2], [0, 0, 0, 0], [0, 0, 0, 0], [2, 0, 0, 0]]
106
+ 3. Highest tile value present
107
+ `,
108
+ schema: GameState,
109
+ use_text_extract: false
110
+
111
+ };
112
+
113
+ const [success, gameStates] = await session.browser.agent.extract(gameStateOptions, page);
114
+ if (success && gameStates.length > 0) {
115
+ const gameState = gameStates[0];
116
+ const transposedGrid = transposeGrid(gameState.grid);
117
+ console.log(`transposed grid: ${JSON.stringify(transposedGrid)}`);
118
+ console.log(`gameState: ${JSON.stringify(gameState)}`);
119
+ const gridInstruction = formatGridForLlmInstruction(transposedGrid);
120
+
121
+ if (lastTransposedGrid !== null && JSON.stringify(transposedGrid) === JSON.stringify(lastTransposedGrid)) {
122
+ transposedGridNotChangedTimes += 1;
123
+ } else {
124
+ transposedGridNotChangedTimes = 0;
125
+ lastMoveHistory = [];
126
+ }
127
+ lastTransposedGrid = transposedGrid;
128
+
129
+ let instructionStr = `
130
+ Based on the current game state:
131
+ - Score: ${gameState.score}
132
+ - Highest tile: ${gameState.highestTile}
133
+ - Grid: This is a 4x4 matrix ordered by row (top to bottom) and column (left to right). The rows are stacked vertically, and tiles can move vertically between rows or horizontally between columns:${gridInstruction}
134
+ What is the best move (up/down/left/right)? Consider:
135
+ 1. Keeping high value tiles in corners (bottom left, bottom right, top left, top right)
136
+ 2. Maintaining a clear path to merge tiles
137
+ 3. Avoiding moves that could block merges
138
+ 4. Avoiding moves that merge no blocks as possible
139
+ 5. Only adjacent tiles of the same value can merge
140
+ 6. Making a move will move all tiles in that direction until they hit a tile of a different value or the edge of the board
141
+ 7. Tiles cannot move past the edge of the board
142
+ 8. Each move must move at least one tile
143
+ `;
144
+
145
+ if (transposedGridNotChangedTimes >= 1) {
146
+ instructionStr += `
147
+ 9. Do not generate move value in ${JSON.stringify(lastMoveHistory)}
148
+ 10. If last move value ${lastMoveHistory[lastMoveHistory.length - 1]} moves up or down, then generate move value with left or right direction, otherwise generate move value with up or down direction
149
+ `;
150
+ }
151
+
152
+ const nextMoveOptions: ExtractOptions<MoveAnalysis> = {
153
+ instruction: instructionStr,
154
+ schema: MoveAnalysis,
155
+ use_text_extract: false
156
+ };
157
+
158
+ const [moveSuccess, nextMove] = await session.browser.agent.extract(nextMoveOptions, page);
159
+ let selectedMove = 4; // Default to no move
160
+
161
+ if (moveSuccess && nextMove.length > 0) {
162
+ selectedMove = nextMove[0].move ?? 4;
163
+ } else {
164
+ console.log("❌ Failed to extract next move, retry observing");
165
+ continue;
166
+ }
167
+
168
+ lastMoveHistory.push(selectedMove);
169
+
170
+ const moveKeyMap: { [key: number]: string } = {
171
+ 0: "ArrowUp",
172
+ 1: "ArrowDown",
173
+ 2: "ArrowLeft",
174
+ 3: "ArrowRight",
175
+ 4: "Escape",
176
+ };
177
+
178
+ const moveKey = moveKeyMap[selectedMove];
179
+ await page.keyboard.press(moveKey);
180
+ } else {
181
+ console.log("❌ Failed to extract game state, retry observing");
182
+ }
183
+ }
184
+ } catch (error) {
185
+ console.log(`❌ Error in game loop: ${error}`);
186
+ try {
187
+ if (page !== null) {
188
+ const isGameOver = await page.evaluate(
189
+ "() => document.querySelector('.game-over') !== null"
190
+ );
191
+ if (isGameOver) {
192
+ console.log("🏁 Game Over!");
193
+ return;
194
+ }
195
+ }
196
+ } catch (innerError) {
197
+ console.log(`Could not check game over status: ${innerError}`);
198
+ }
199
+ throw error;
200
+ }
201
+ } else {
202
+ console.log("Failed to initialize browser");
203
+ }
204
+ }
205
+ }
206
+
207
+ if (require.main === module) {
208
+ main().catch(console.error);
209
+ }