browser-pilot 0.0.11 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -33,6 +33,7 @@ await browser.close();
33
33
  | Bun CDP connection bugs | Custom CDP client that works everywhere |
34
34
  | Single-selector API (fragile) | Multi-selector by default: `['#primary', '.fallback']` |
35
35
  | No action batching (high latency) | Batch DSL: one call for entire sequences |
36
+ | No inline assertions (extra API calls to verify) | Built-in assertions: verify state within the same batch |
36
37
  | No AI-optimized snapshots | Built-in accessibility tree extraction |
37
38
  | No audio I/O for voice agents | Mic injection + output capture + Whisper transcription |
38
39
 
@@ -131,6 +132,25 @@ console.log(result.totalDurationMs); // total execution time
131
132
  console.log(result.steps[5].result); // snapshot from step 5
132
133
  ```
133
134
 
135
+ Assertion steps verify expected state within the same batch — no extra round trips. Available: `assertVisible`, `assertExists`, `assertText`, `assertUrl`, `assertValue`.
136
+
137
+ ```typescript
138
+ const result = await page.batch([
139
+ { action: 'goto', url: 'https://example.com/login' },
140
+ { action: 'fill', selector: '#email', value: 'user@example.com' },
141
+ { action: 'fill', selector: '#password', value: 'secret' },
142
+ { action: 'submit', selector: '#login-btn' },
143
+ { action: 'assertUrl', expect: '/dashboard' },
144
+ { action: 'assertVisible', selector: '.welcome-message' },
145
+ ]);
146
+ ```
147
+
148
+ Any step supports `retry` and `retryDelay` for flaky or async content:
149
+
150
+ ```typescript
151
+ { action: 'assertVisible', selector: '.async-content', retry: 3, retryDelay: 1000 }
152
+ ```
153
+
134
154
  ### AI-Optimized Snapshots
135
155
 
136
156
  Get the page state in a format perfect for LLMs:
@@ -147,10 +167,10 @@ console.log(snapshot.interactiveElements);
147
167
 
148
168
  // Text representation for LLMs
149
169
  console.log(snapshot.text);
150
- // - main [ref=e1]
151
- // - heading "Welcome" [ref=e2]
152
- // - button "Get Started" [ref=e3]
153
- // - textbox [ref=e4] placeholder="Email"
170
+ // - main ref:e1
171
+ // - heading "Welcome" ref:e2
172
+ // - button "Get Started" ref:e3
173
+ // - textbox ref:e4 placeholder="Email"
154
174
  ```
155
175
 
156
176
  ### Ref-Based Selectors
@@ -159,7 +179,7 @@ After taking a snapshot, use element refs directly as selectors:
159
179
 
160
180
  ```typescript
161
181
  const snapshot = await page.snapshot();
162
- // Output shows: button "Submit" [ref=e4]
182
+ // Output shows: button "Submit" ref:e4
163
183
 
164
184
  // Click using the ref - no fragile CSS needed
165
185
  await page.click('ref:e4');
@@ -363,13 +383,19 @@ bp exec -s my-session '[
363
383
 
364
384
  # Get page state (note the refs in output)
365
385
  bp snapshot -s my-session --format text
366
- # Output: button "Submit" [ref=e4], textbox "Email" [ref=e5], ...
386
+ # Output: button "Submit" ref:e4, textbox "Email" ref:e5, ...
367
387
 
368
388
  # Use refs from snapshot for reliable targeting
369
389
  # Refs are cached per session+URL after snapshot
370
390
  bp exec -s my-session '{"action":"click","selector":"ref:e4"}'
371
391
  bp exec -s my-session '{"action":"fill","selector":"ref:e5","value":"test@example.com"}'
372
392
 
393
+ # Quick discovery commands
394
+ bp page -s my-session # URL, title, headings, forms, interactive controls
395
+ bp forms -s my-session # Structured form metadata only
396
+ bp targets -s my-session # Browser tabs with targetIds
397
+ bp connect --new-tab --url https://example.com --name fresh
398
+
373
399
  # Handle native dialogs (alert/confirm/prompt)
374
400
  bp exec --dialog accept '{"action":"click","selector":"#delete-btn"}'
375
401
 
@@ -380,6 +406,16 @@ bp listen ws -m "*voice*" # monitor WebSocket traffic
380
406
  bp list # list all sessions
381
407
  bp close -s my-session # close session
382
408
  bp actions # show complete action reference
409
+ bp run workflow.json # run a workflow file
410
+
411
+ # Actions with inline assertions (no extra bp eval needed)
412
+ bp exec '[
413
+ {"action":"goto","url":"https://example.com/login"},
414
+ {"action":"fill","selector":"#email","value":"user@example.com"},
415
+ {"action":"submit","selector":"form"},
416
+ {"action":"assertUrl","expect":"/dashboard"},
417
+ {"action":"assertText","expect":"Welcome"}
418
+ ]'
383
419
  ```
384
420
 
385
421
  ### CLI for AI Agents
@@ -393,7 +429,7 @@ The CLI is designed for AI agent tool calls. The recommended workflow:
393
429
  ```bash
394
430
  # Step 1: Get page state with refs
395
431
  bp snapshot --format text
396
- # Output shows: button "Add to Cart" [ref=e12], textbox "Search" [ref=e5]
432
+ # Output shows: button "Add to Cart" ref:e12, textbox "Search" ref:e5
397
433
 
398
434
  # Step 2: Use refs to interact (stable, no CSS guessing)
399
435
  bp exec '[
@@ -572,7 +608,7 @@ const browserTool = {
572
608
  items: {
573
609
  type: 'object',
574
610
  properties: {
575
- action: { enum: ['goto', 'click', 'fill', 'submit', 'snapshot'] },
611
+ action: { enum: ['goto', 'click', 'fill', 'submit', 'snapshot', 'assertVisible', 'assertExists', 'assertText', 'assertUrl', 'assertValue'] },
576
612
  selector: { type: ['string', 'array'] },
577
613
  value: { type: 'string' },
578
614
  url: { type: 'string' },