btcp-browser-agent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/CLAUDE.md +230 -0
  2. package/LICENSE +21 -0
  3. package/README.md +309 -0
  4. package/SKILL.md +143 -0
  5. package/SNAPSHOT_IMPROVEMENTS.md +302 -0
  6. package/USAGE.md +146 -0
  7. package/dist/index.d.ts +34 -0
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +35 -0
  10. package/dist/index.js.map +1 -0
  11. package/docs/browser-cli-design.md +500 -0
  12. package/examples/chrome-extension/CHANGELOG.md +210 -0
  13. package/examples/chrome-extension/DEBUG.md +231 -0
  14. package/examples/chrome-extension/ERROR_FIXED.md +147 -0
  15. package/examples/chrome-extension/QUICK_TEST.md +189 -0
  16. package/examples/chrome-extension/README.md +149 -0
  17. package/examples/chrome-extension/SESSION_ONLY_MODE.md +305 -0
  18. package/examples/chrome-extension/TEST_WITH_YOUR_TABS.md +97 -0
  19. package/examples/chrome-extension/build.js +43 -0
  20. package/examples/chrome-extension/manifest.json +37 -0
  21. package/examples/chrome-extension/package-lock.json +1063 -0
  22. package/examples/chrome-extension/package.json +21 -0
  23. package/examples/chrome-extension/popup.html +195 -0
  24. package/examples/chrome-extension/src/background.ts +12 -0
  25. package/examples/chrome-extension/src/content.ts +7 -0
  26. package/examples/chrome-extension/src/popup.ts +303 -0
  27. package/examples/chrome-extension/src/scenario-google-github.ts +389 -0
  28. package/examples/chrome-extension/test-page.html +127 -0
  29. package/examples/chrome-extension/tests/README.md +206 -0
  30. package/examples/chrome-extension/tests/scenario-google-to-github-star.ts +380 -0
  31. package/examples/chrome-extension/tsconfig.json +14 -0
  32. package/examples/snapshots/README.md +207 -0
  33. package/examples/snapshots/amazon-com-detail.html +9528 -0
  34. package/examples/snapshots/amazon-com-detail.snapshot.txt +997 -0
  35. package/examples/snapshots/convert-snapshots.ts +97 -0
  36. package/examples/snapshots/edition-cnn-com.html +13292 -0
  37. package/examples/snapshots/edition-cnn-com.snapshot.txt +562 -0
  38. package/examples/snapshots/github-com-microsoft-vscode.html +2916 -0
  39. package/examples/snapshots/github-com-microsoft-vscode.snapshot.txt +455 -0
  40. package/examples/snapshots/google-search.html +20012 -0
  41. package/examples/snapshots/google-search.snapshot.txt +195 -0
  42. package/examples/snapshots/metadata.json +86 -0
  43. package/examples/snapshots/npr-org-templates.html +2031 -0
  44. package/examples/snapshots/npr-org-templates.snapshot.txt +224 -0
  45. package/examples/snapshots/stackoverflow-com.html +5216 -0
  46. package/examples/snapshots/stackoverflow-com.snapshot.txt +2404 -0
  47. package/examples/snapshots/test-all-mode.html +46 -0
  48. package/examples/snapshots/test-all-mode.snapshot.txt +5 -0
  49. package/examples/snapshots/validate.test.ts +296 -0
  50. package/package.json +65 -0
  51. package/packages/cli/package.json +42 -0
  52. package/packages/cli/src/__tests__/cli.test.ts +434 -0
  53. package/packages/cli/src/__tests__/errors.test.ts +226 -0
  54. package/packages/cli/src/__tests__/executor.test.ts +275 -0
  55. package/packages/cli/src/__tests__/formatter.test.ts +260 -0
  56. package/packages/cli/src/__tests__/parser.test.ts +288 -0
  57. package/packages/cli/src/__tests__/suggestions.test.ts +255 -0
  58. package/packages/cli/src/commands/back.ts +22 -0
  59. package/packages/cli/src/commands/check.ts +33 -0
  60. package/packages/cli/src/commands/clear.ts +33 -0
  61. package/packages/cli/src/commands/click.ts +32 -0
  62. package/packages/cli/src/commands/closetab.ts +31 -0
  63. package/packages/cli/src/commands/eval.ts +41 -0
  64. package/packages/cli/src/commands/fill.ts +30 -0
  65. package/packages/cli/src/commands/focus.ts +33 -0
  66. package/packages/cli/src/commands/forward.ts +22 -0
  67. package/packages/cli/src/commands/goto.ts +34 -0
  68. package/packages/cli/src/commands/help.ts +162 -0
  69. package/packages/cli/src/commands/hover.ts +34 -0
  70. package/packages/cli/src/commands/index.ts +129 -0
  71. package/packages/cli/src/commands/newtab.ts +35 -0
  72. package/packages/cli/src/commands/press.ts +40 -0
  73. package/packages/cli/src/commands/reload.ts +25 -0
  74. package/packages/cli/src/commands/screenshot.ts +27 -0
  75. package/packages/cli/src/commands/scroll.ts +64 -0
  76. package/packages/cli/src/commands/select.ts +35 -0
  77. package/packages/cli/src/commands/snapshot.ts +21 -0
  78. package/packages/cli/src/commands/tab.ts +32 -0
  79. package/packages/cli/src/commands/tabs.ts +26 -0
  80. package/packages/cli/src/commands/text.ts +27 -0
  81. package/packages/cli/src/commands/title.ts +17 -0
  82. package/packages/cli/src/commands/type.ts +38 -0
  83. package/packages/cli/src/commands/uncheck.ts +33 -0
  84. package/packages/cli/src/commands/url.ts +17 -0
  85. package/packages/cli/src/commands/wait.ts +54 -0
  86. package/packages/cli/src/errors.ts +164 -0
  87. package/packages/cli/src/executor.ts +68 -0
  88. package/packages/cli/src/formatter.ts +215 -0
  89. package/packages/cli/src/index.ts +257 -0
  90. package/packages/cli/src/parser.ts +195 -0
  91. package/packages/cli/src/suggestions.ts +207 -0
  92. package/packages/cli/src/terminal/Terminal.ts +365 -0
  93. package/packages/cli/src/terminal/index.ts +5 -0
  94. package/packages/cli/src/types.ts +155 -0
  95. package/packages/cli/tsconfig.json +20 -0
  96. package/packages/core/package.json +35 -0
  97. package/packages/core/src/actions.ts +1210 -0
  98. package/packages/core/src/errors.ts +296 -0
  99. package/packages/core/src/index.test.ts +638 -0
  100. package/packages/core/src/index.ts +220 -0
  101. package/packages/core/src/ref-map.ts +107 -0
  102. package/packages/core/src/snapshot.ts +873 -0
  103. package/packages/core/src/types.ts +536 -0
  104. package/packages/core/tsconfig.json +23 -0
  105. package/packages/extension/README.md +129 -0
  106. package/packages/extension/package.json +43 -0
  107. package/packages/extension/src/background.ts +888 -0
  108. package/packages/extension/src/content.ts +172 -0
  109. package/packages/extension/src/index.ts +579 -0
  110. package/packages/extension/src/session-manager.ts +385 -0
  111. package/packages/extension/src/session-types.ts +144 -0
  112. package/packages/extension/src/types.ts +162 -0
  113. package/packages/extension/tsconfig.json +28 -0
  114. package/src/index.ts +64 -0
  115. package/tsconfig.build.json +12 -0
  116. package/tsconfig.json +26 -0
  117. package/vitest.config.ts +13 -0
@@ -0,0 +1,206 @@
1
+ # Chrome Extension Scenario Tests
2
+
3
+ This directory contains demonstration scripts that showcase how AI agents would interact with the BTCP Browser Agent API in real-world scenarios.
4
+
5
+ ## Available Scenarios
6
+
7
+ ### 1. Google Search → GitHub Star (`scenario-google-to-github-star.ts`)
8
+
9
+ A complete workflow demonstrating AI agent reasoning patterns:
10
+
11
+ **Workflow:**
12
+ 1. Navigate to Google
13
+ 2. Search for "btcp-cowork"
14
+ 3. Find and click the first GitHub link in results
15
+ 4. Navigate to the repository
16
+ 5. Attempt to star the repository
17
+
18
+ **What it demonstrates:**
19
+ - AI reasoning simulation with detailed logging
20
+ - Snapshot-based page understanding
21
+ - Element selection strategies using accessibility tree
22
+ - Error recovery with fallback approaches
23
+ - Multi-step navigation and validation
24
+ - Authentication gate detection
25
+
26
+ ## Running the Scenarios
27
+
28
+ ### Prerequisites
29
+
30
+ 1. **Install Dependencies**
31
+ ```bash
32
+ cd examples/chrome-extension
33
+ npm install
34
+ ```
35
+
36
+ 2. **Build the Extension**
37
+ ```bash
38
+ npm run build
39
+ ```
40
+
41
+ 3. **Load Extension in Chrome**
42
+ - Open Chrome and navigate to `chrome://extensions`
43
+ - Enable "Developer mode" (toggle in top right)
44
+ - Click "Load unpacked"
45
+ - Select the `examples/chrome-extension/dist/` directory
46
+
47
+ 4. **Create a Session**
48
+ - Click the extension icon in Chrome toolbar
49
+ - Click "Start Session" button
50
+ - A new tab group will be created
51
+
52
+ ### Running the Demo
53
+
54
+ With the extension loaded and a session active:
55
+
56
+ ```bash
57
+ cd examples/chrome-extension
58
+ npm run demo:scenario
59
+ ```
60
+
61
+ ### Expected Output
62
+
63
+ The script will output detailed logs showing AI reasoning at each step:
64
+
65
+ ```
66
+ šŸ¤” AI Agent: Starting Google → GitHub → Star workflow demonstration
67
+
68
+ [Step 1/15] Initialize connection to browser extension
69
+ ✨ Success: Client connected
70
+
71
+ [Step 2/15] Navigate to Google
72
+ šŸ¤” AI Agent: Need to navigate to google.com to start search
73
+ šŸŽÆ Action: Navigating to https://www.google.com...
74
+ āœ… Navigation complete
75
+
76
+ [Step 3/15] Take snapshot to analyze page structure
77
+ šŸ¤” AI Agent: Taking snapshot to identify interactive elements
78
+ šŸŽÆ Action: Calling snapshot API...
79
+ šŸ“Š Verification: Snapshot captured: 156 elements found
80
+
81
+ [Step 4/15] Locate search input field
82
+ šŸ” Analyzing: Looking for search box (role=searchbox or combobox)
83
+ āœ… Found: @ref:12 - role='combobox' name='Search'
84
+
85
+ ...
86
+ ```
87
+
88
+ ## Understanding the Output
89
+
90
+ The demo uses emoji prefixes to indicate different types of information:
91
+
92
+ - šŸ¤” **Thinking**: AI agent's reasoning process
93
+ - šŸ” **Analyzing**: Element search criteria
94
+ - āœ… **Found**: Successfully located element
95
+ - šŸŽÆ **Action**: Executing a command
96
+ - šŸ“Š **Verification**: Validating results
97
+ - āŒ **Error**: Something went wrong
98
+ - āš ļø **Warning**: Non-critical issue or fallback triggered
99
+ - ✨ **Success**: Step completed successfully
100
+
101
+ ## Common Issues
102
+
103
+ ### "Could not find search input"
104
+
105
+ Google's page structure varies by region and personalization. The script includes fallback strategies, but may need adjustment for your specific Google layout.
106
+
107
+ **Solution**: The script will try multiple approaches automatically. Check the logs to see which strategy worked.
108
+
109
+ ### "No GitHub links found in search results"
110
+
111
+ This can happen if:
112
+ - Google blocks automated searches
113
+ - Search results don't include GitHub repositories
114
+ - Results are personalized differently
115
+
116
+ **Solution**: Try running the script again, or manually verify that searching "btcp-cowork" on Google returns GitHub results.
117
+
118
+ ### "Redirected to login page"
119
+
120
+ GitHub requires authentication to star repositories.
121
+
122
+ **Solution**: This is expected behavior. The script will detect the login redirect and explain the situation. In a real AI agent, this would trigger an OAuth flow or use stored credentials.
123
+
124
+ ### "Error: connect ECONNREFUSED"
125
+
126
+ The extension is not running or no session is active.
127
+
128
+ **Solution**:
129
+ 1. Verify the extension is loaded in Chrome
130
+ 2. Click the extension icon
131
+ 3. Click "Start Session"
132
+ 4. Run the script again
133
+
134
+ ## API Quality Insights
135
+
136
+ This scenario helps evaluate the BTCP API for AI agent use cases:
137
+
138
+ ### Strengths
139
+ āœ… **Clear accessibility tree**: Snapshot provides semantic element information
140
+ āœ… **Stable references**: `@ref:N` selectors work reliably within a session
141
+ āœ… **Simple API**: Navigation and interaction methods are intuitive
142
+ āœ… **Error messages**: Informative feedback when operations fail
143
+
144
+ ### Areas for Improvement
145
+ āš ļø **Complex pages**: Some dynamic content needs multiple snapshot strategies
146
+ āš ļø **Authentication**: OAuth flows require additional handling
147
+ āš ļø **Timing**: Some actions need explicit wait times (could use smart waiting)
148
+ āš ļø **Ref invalidation**: References become stale after navigation (expected, but needs handling)
149
+
150
+ ## Writing Your Own Scenarios
151
+
152
+ To create a new scenario test:
153
+
154
+ 1. **Create a new file** in this directory (e.g., `scenario-form-filling.ts`)
155
+
156
+ 2. **Import the client**:
157
+ ```typescript
158
+ import { createClient } from '../../../packages/extension/src/index.js';
159
+ ```
160
+
161
+ 3. **Structure your workflow**:
162
+ ```typescript
163
+ async function main() {
164
+ const client = createClient();
165
+
166
+ // Step 1: Navigate
167
+ await client.navigate('https://example.com');
168
+
169
+ // Step 2: Understand page
170
+ const snapshot = await client.snapshot({ format: 'tree' });
171
+
172
+ // Step 3: Find and interact with elements
173
+ const button = findElement(snapshot.tree, { role: 'button', name: 'Submit' });
174
+ await client.click(button);
175
+
176
+ // Step 4: Verify outcome
177
+ const newUrl = await client.getUrl();
178
+ console.log('Success:', newUrl.includes('success'));
179
+ }
180
+
181
+ main();
182
+ ```
183
+
184
+ 4. **Add logging** to show AI reasoning at each step
185
+
186
+ 5. **Add to package.json**:
187
+ ```json
188
+ "scripts": {
189
+ "demo:your-scenario": "tsx tests/scenario-your-name.ts"
190
+ }
191
+ ```
192
+
193
+ ## Contributing
194
+
195
+ When adding new scenarios, please:
196
+ - Include detailed AI reasoning logs
197
+ - Handle errors gracefully with fallbacks
198
+ - Document what the scenario tests/demonstrates
199
+ - Update this README with usage instructions
200
+ - Consider edge cases and authentication requirements
201
+
202
+ ## Further Reading
203
+
204
+ - [BTCP API Documentation](../../CLAUDE.md)
205
+ - [Extension Architecture](../../../packages/extension/README.md)
206
+ - [Core Actions Reference](../../../packages/core/README.md)
@@ -0,0 +1,380 @@
1
+ /**
2
+ * Real-World AI Agent Scenario: Google Search → GitHub Repository → Star
3
+ *
4
+ * This demo script mimics how an AI agent would reason through a complete browser workflow:
5
+ * 1. Navigate to Google
6
+ * 2. Search for "btcp-cowork"
7
+ * 3. Find and click the first GitHub link in results
8
+ * 4. Attempt to star the repository
9
+ *
10
+ * The script demonstrates:
11
+ * - AI reasoning patterns (element selection, verification, error handling)
12
+ * - Snapshot-based navigation (accessibility tree parsing)
13
+ * - Graceful error recovery
14
+ * - Step-by-step validation
15
+ *
16
+ * Prerequisites:
17
+ * - Chrome extension must be loaded and running
18
+ * - Extension must have a session created
19
+ *
20
+ * Run with: npm run demo:scenario
21
+ */
22
+
23
+ import { createClient } from '../../../packages/extension/src/index.js';
24
+
25
+ // Utility for delays (simulating think time)
26
+ const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
27
+
28
+ // Console formatting helpers
29
+ const log = {
30
+ thinking: (msg: string) => console.log(`\nšŸ¤” AI Agent: ${msg}`),
31
+ analyzing: (msg: string) => console.log(`šŸ” Analyzing: ${msg}`),
32
+ found: (msg: string) => console.log(`āœ… Found: ${msg}`),
33
+ action: (msg: string) => console.log(`šŸŽÆ Action: ${msg}`),
34
+ verify: (msg: string) => console.log(`šŸ“Š Verification: ${msg}`),
35
+ error: (msg: string) => console.log(`āŒ Error: ${msg}`),
36
+ warning: (msg: string) => console.log(`āš ļø Warning: ${msg}`),
37
+ success: (msg: string) => console.log(`✨ Success: ${msg}`),
38
+ step: (step: number, total: number, msg: string) => console.log(`\n[Step ${step}/${total}] ${msg}`),
39
+ };
40
+
41
+ /**
42
+ * Simulates AI reasoning to find an element in the snapshot tree
43
+ * Returns the @ref:N selector for the found element
44
+ */
45
+ function findElement(tree: string, criteria: {
46
+ role?: string;
47
+ name?: string;
48
+ nameContains?: string;
49
+ type?: string;
50
+ }): string | null {
51
+ const lines = tree.split('\n');
52
+
53
+ for (const line of lines) {
54
+ // Parse line format: "@ref:N role='...' name='...' ..."
55
+ const refMatch = line.match(/@ref:(\d+)/);
56
+ if (!refMatch) continue;
57
+
58
+ const ref = `@ref:${refMatch[1]}`;
59
+
60
+ // Check all criteria
61
+ let matches = true;
62
+
63
+ if (criteria.role) {
64
+ const roleMatch = line.match(/role='([^']+)'/);
65
+ if (!roleMatch || roleMatch[1] !== criteria.role) matches = false;
66
+ }
67
+
68
+ if (criteria.name) {
69
+ const nameMatch = line.match(/name='([^']+)'/);
70
+ if (!nameMatch || nameMatch[1] !== criteria.name) matches = false;
71
+ }
72
+
73
+ if (criteria.nameContains) {
74
+ const nameMatch = line.match(/name='([^']+)'/);
75
+ if (!nameMatch || !nameMatch[1].toLowerCase().includes(criteria.nameContains.toLowerCase())) {
76
+ matches = false;
77
+ }
78
+ }
79
+
80
+ if (criteria.type) {
81
+ const typeMatch = line.match(/type='([^']+)'/);
82
+ if (!typeMatch || typeMatch[1] !== criteria.type) matches = false;
83
+ }
84
+
85
+ if (matches) {
86
+ log.found(`${ref} - ${line.trim()}`);
87
+ return ref;
88
+ }
89
+ }
90
+
91
+ return null;
92
+ }
93
+
94
+ /**
95
+ * Find first link containing specific domain
96
+ */
97
+ function findLinkByDomain(tree: string, domain: string): string | null {
98
+ const lines = tree.split('\n');
99
+
100
+ for (const line of lines) {
101
+ if (!line.includes("role='link'")) continue;
102
+
103
+ const refMatch = line.match(/@ref:(\d+)/);
104
+ if (!refMatch) continue;
105
+
106
+ // Check if line contains the domain (in name or other attributes)
107
+ if (line.toLowerCase().includes(domain.toLowerCase())) {
108
+ const ref = `@ref:${refMatch[1]}`;
109
+ log.found(`${ref} - GitHub link: ${line.trim()}`);
110
+ return ref;
111
+ }
112
+ }
113
+
114
+ return null;
115
+ }
116
+
117
+ async function main() {
118
+ const TOTAL_STEPS = 15;
119
+ let currentStep = 0;
120
+
121
+ log.thinking('Starting Google → GitHub → Star workflow demonstration');
122
+ console.log('This script simulates AI agent reasoning patterns for browser automation\n');
123
+
124
+ try {
125
+ // Step 1: Initialize client
126
+ currentStep++;
127
+ log.step(currentStep, TOTAL_STEPS, 'Initialize connection to browser extension');
128
+ const client = createClient();
129
+ log.success('Client connected');
130
+ await sleep(500);
131
+
132
+ // Step 2: Navigate to Google
133
+ currentStep++;
134
+ log.step(currentStep, TOTAL_STEPS, 'Navigate to Google');
135
+ log.thinking('Need to navigate to google.com to start search');
136
+ log.action('Navigating to https://www.google.com...');
137
+
138
+ await client.navigate('https://www.google.com');
139
+ log.success('Navigation complete');
140
+ await sleep(1000); // Wait for page to settle
141
+
142
+ // Step 3: Take snapshot to understand page structure
143
+ currentStep++;
144
+ log.step(currentStep, TOTAL_STEPS, 'Take snapshot to analyze page structure');
145
+ log.thinking('Taking snapshot to identify interactive elements');
146
+ log.action('Calling snapshot API...');
147
+
148
+ const snapshot1 = await client.snapshot({ format: 'tree' });
149
+ log.verify(`Snapshot captured: ${snapshot1.split('\n').length} elements found`);
150
+ await sleep(500);
151
+
152
+ // Step 4: Find search input
153
+ currentStep++;
154
+ log.step(currentStep, TOTAL_STEPS, 'Locate search input field');
155
+ log.analyzing('Looking for search box (role=searchbox or combobox)');
156
+
157
+ let searchInput = findElement(snapshot1, { role: 'combobox' });
158
+ if (!searchInput) {
159
+ searchInput = findElement(snapshot1, { role: 'searchbox' });
160
+ }
161
+
162
+ if (!searchInput) {
163
+ log.error('Could not find search input on Google homepage');
164
+ log.warning('This might be due to Google\'s dynamic content or region-specific layout');
165
+ // Try alternative approach: look for input with name containing "search" or "q"
166
+ log.analyzing('Trying alternative strategy: looking for textbox with search-related name');
167
+ searchInput = findElement(snapshot1, { role: 'textbox', nameContains: 'search' });
168
+ }
169
+
170
+ if (!searchInput) {
171
+ throw new Error('Unable to locate search input field');
172
+ }
173
+
174
+ await sleep(500);
175
+
176
+ // Step 5: Type search query
177
+ currentStep++;
178
+ log.step(currentStep, TOTAL_STEPS, 'Type search query into input');
179
+ log.thinking('Need to type "btcp-cowork" into the search box');
180
+ log.action(`Typing "btcp-cowork" into ${searchInput}...`);
181
+
182
+ await client.type(searchInput, 'btcp-cowork');
183
+ log.success('Query typed successfully');
184
+ await sleep(500);
185
+
186
+ // Step 6: Find and click search button
187
+ currentStep++;
188
+ log.step(currentStep, TOTAL_STEPS, 'Locate and click search button');
189
+ log.analyzing('Looking for search submit button (role=button, name contains "search")');
190
+
191
+ const searchButton = findElement(snapshot1, { role: 'button', nameContains: 'search' });
192
+
193
+ if (!searchButton) {
194
+ log.warning('Search button not found, trying Enter key instead');
195
+ log.action('Pressing Enter key to submit search...');
196
+ await client.execute({
197
+ id: crypto.randomUUID(),
198
+ action: 'press',
199
+ key: 'Enter',
200
+ });
201
+ } else {
202
+ log.action(`Clicking search button ${searchButton}...`);
203
+ await client.click(searchButton);
204
+ }
205
+
206
+ log.success('Search submitted');
207
+ await sleep(2000); // Wait for search results to load
208
+
209
+ // Step 7: Wait for results page to load
210
+ currentStep++;
211
+ log.step(currentStep, TOTAL_STEPS, 'Wait for search results to load');
212
+ log.thinking('Giving page time to load search results');
213
+
214
+ const currentUrl = await client.getUrl();
215
+ log.verify(`Current URL: ${currentUrl}`);
216
+
217
+ if (!currentUrl.includes('google.com/search')) {
218
+ log.warning('URL does not appear to be a search results page');
219
+ }
220
+ await sleep(1000);
221
+
222
+ // Step 8: Take snapshot of results page
223
+ currentStep++;
224
+ log.step(currentStep, TOTAL_STEPS, 'Snapshot search results page');
225
+ log.action('Taking snapshot of search results...');
226
+
227
+ const snapshot2 = await client.snapshot({ format: 'tree' });
228
+ log.verify(`Results snapshot captured: ${snapshot2.split('\n').length} elements found`);
229
+ await sleep(500);
230
+
231
+ // Step 9: Find first GitHub link
232
+ currentStep++;
233
+ log.step(currentStep, TOTAL_STEPS, 'Locate first GitHub link in results');
234
+ log.analyzing('Searching for links containing "github.com"');
235
+ log.thinking('AI reasoning: GitHub links are likely to contain "github" in the URL or link text');
236
+
237
+ const githubLink = findLinkByDomain(snapshot2, 'github.com');
238
+
239
+ if (!githubLink) {
240
+ log.error('No GitHub links found in search results');
241
+ log.warning('Possible reasons:');
242
+ console.log(' - Search results may not include GitHub repositories');
243
+ console.log(' - Google may have blocked automated searches');
244
+ console.log(' - Results structure may differ from expected format');
245
+ throw new Error('Cannot proceed without GitHub link');
246
+ }
247
+
248
+ await sleep(500);
249
+
250
+ // Step 10: Click GitHub link
251
+ currentStep++;
252
+ log.step(currentStep, TOTAL_STEPS, 'Navigate to GitHub repository');
253
+ log.action(`Clicking GitHub link ${githubLink}...`);
254
+
255
+ await client.click(githubLink);
256
+ log.success('Clicked GitHub link, waiting for page load...');
257
+ await sleep(3000); // Wait for GitHub page to load
258
+
259
+ // Step 11: Verify we're on GitHub
260
+ currentStep++;
261
+ log.step(currentStep, TOTAL_STEPS, 'Verify navigation to GitHub');
262
+
263
+ const githubUrl = await client.getUrl();
264
+ log.verify(`Current URL: ${githubUrl}`);
265
+
266
+ if (!githubUrl.includes('github.com')) {
267
+ log.warning('URL does not appear to be github.com');
268
+ } else {
269
+ log.success('Successfully navigated to GitHub');
270
+ }
271
+
272
+ await sleep(1000);
273
+
274
+ // Step 12: Take snapshot of GitHub repo page
275
+ currentStep++;
276
+ log.step(currentStep, TOTAL_STEPS, 'Snapshot GitHub repository page');
277
+ log.action('Taking snapshot of repository page...');
278
+
279
+ const snapshot3 = await client.snapshot({ format: 'tree' });
280
+ log.verify(`GitHub page snapshot: ${snapshot3.split('\n').length} elements found`);
281
+ await sleep(500);
282
+
283
+ // Step 13: Find star button
284
+ currentStep++;
285
+ log.step(currentStep, TOTAL_STEPS, 'Locate star button');
286
+ log.analyzing('Looking for Star/Unstar button (role=button, name contains "star")');
287
+ log.thinking('AI reasoning: Star button typically has role=button and name includes "star" or "unstar"');
288
+
289
+ let starButton = findElement(snapshot3, { role: 'button', nameContains: 'star' });
290
+
291
+ if (!starButton) {
292
+ log.warning('Star button not found in standard format');
293
+ log.analyzing('Trying alternative: looking for any element with "star" in name');
294
+
295
+ // Try to find any element with "star" in it
296
+ const lines = snapshot3.split('\n');
297
+ for (const line of lines) {
298
+ if (line.toLowerCase().includes('star')) {
299
+ const refMatch = line.match(/@ref:(\d+)/);
300
+ if (refMatch) {
301
+ starButton = `@ref:${refMatch[1]}`;
302
+ log.found(`Possible star element: ${line.trim()}`);
303
+ break;
304
+ }
305
+ }
306
+ }
307
+ }
308
+
309
+ if (!starButton) {
310
+ log.error('Could not locate star button');
311
+ log.warning('Possible reasons:');
312
+ console.log(' - User may not be logged into GitHub');
313
+ console.log(' - Page structure may differ from expected format');
314
+ console.log(' - Repository may have restricted star functionality');
315
+ throw new Error('Cannot proceed without star button');
316
+ }
317
+
318
+ await sleep(500);
319
+
320
+ // Step 14: Click star button
321
+ currentStep++;
322
+ log.step(currentStep, TOTAL_STEPS, 'Attempt to star the repository');
323
+ log.thinking('Clicking star button - this may require GitHub login');
324
+ log.action(`Clicking star button ${starButton}...`);
325
+
326
+ await client.click(starButton);
327
+ log.success('Star button clicked');
328
+ await sleep(2000);
329
+
330
+ // Step 15: Verify outcome
331
+ currentStep++;
332
+ log.step(currentStep, TOTAL_STEPS, 'Verify final state');
333
+
334
+ const finalUrl = await client.getUrl();
335
+ log.verify(`Final URL: ${finalUrl}`);
336
+
337
+ if (finalUrl.includes('login')) {
338
+ log.warning('Redirected to login page - authentication required to star repositories');
339
+ log.thinking('AI reasoning: In a real scenario, the agent would need to handle OAuth flow or use stored credentials');
340
+ } else if (finalUrl.includes('github.com')) {
341
+ log.success('Still on GitHub page - star action may have succeeded');
342
+ log.thinking('Taking final snapshot to verify star state...');
343
+
344
+ const finalSnapshot = await client.snapshot({ format: 'tree' });
345
+ const hasUnstar = finalSnapshot.toLowerCase().includes('unstar');
346
+
347
+ if (hasUnstar) {
348
+ log.success('✨ Repository successfully starred! (Found "Unstar" button)');
349
+ } else {
350
+ log.verify('Star action completed, but state verification inconclusive');
351
+ }
352
+ }
353
+
354
+ console.log('\n' + '='.repeat(80));
355
+ log.success('šŸŽ‰ Demo workflow completed successfully!');
356
+ console.log('='.repeat(80));
357
+
358
+ console.log('\nšŸ“Š Workflow Summary:');
359
+ console.log(` • Navigated to Google`);
360
+ console.log(` • Searched for "btcp-cowork"`);
361
+ console.log(` • Found and clicked GitHub link`);
362
+ console.log(` • Attempted to star repository`);
363
+ console.log(` • Total steps executed: ${currentStep}/${TOTAL_STEPS}`);
364
+
365
+ console.log('\nšŸ’” API Quality Assessment:');
366
+ console.log(' āœ… Snapshot API provides clear accessibility tree');
367
+ console.log(' āœ… Element refs (@ref:N) are stable and easy to use');
368
+ console.log(' āœ… Navigation and interaction methods work reliably');
369
+ console.log(' āš ļø Some complex page structures need fallback strategies');
370
+ console.log(' āš ļø Authentication flows require additional handling');
371
+
372
+ } catch (error) {
373
+ log.error(`Workflow failed: ${error instanceof Error ? error.message : String(error)}`);
374
+ console.error(error);
375
+ process.exit(1);
376
+ }
377
+ }
378
+
379
+ // Run the demo
380
+ main();
@@ -0,0 +1,14 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "ESNext",
5
+ "moduleResolution": "bundler",
6
+ "strict": true,
7
+ "esModuleInterop": true,
8
+ "skipLibCheck": true,
9
+ "outDir": "dist",
10
+ "rootDir": "src",
11
+ "types": ["chrome"]
12
+ },
13
+ "include": ["src/**/*"]
14
+ }