sentienceapi 0.99.0 β†’ 0.99.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +109 -1099
  2. package/dist/actions.d.ts +9 -0
  3. package/dist/actions.d.ts.map +1 -1
  4. package/dist/actions.js +126 -0
  5. package/dist/actions.js.map +1 -1
  6. package/dist/agent-runtime.d.ts +34 -3
  7. package/dist/agent-runtime.d.ts.map +1 -1
  8. package/dist/agent-runtime.js +239 -1
  9. package/dist/agent-runtime.js.map +1 -1
  10. package/dist/agent.d.ts +3 -1
  11. package/dist/agent.d.ts.map +1 -1
  12. package/dist/agent.js +44 -5
  13. package/dist/agent.js.map +1 -1
  14. package/dist/browser.d.ts +24 -1
  15. package/dist/browser.d.ts.map +1 -1
  16. package/dist/browser.js +88 -1
  17. package/dist/browser.js.map +1 -1
  18. package/dist/captcha/types.d.ts +6 -0
  19. package/dist/captcha/types.d.ts.map +1 -1
  20. package/dist/captcha/types.js.map +1 -1
  21. package/dist/cli.js +240 -0
  22. package/dist/cli.js.map +1 -1
  23. package/dist/index.d.ts +4 -3
  24. package/dist/index.d.ts.map +1 -1
  25. package/dist/index.js +6 -2
  26. package/dist/index.js.map +1 -1
  27. package/dist/read.d.ts +7 -0
  28. package/dist/read.d.ts.map +1 -1
  29. package/dist/read.js +42 -0
  30. package/dist/read.js.map +1 -1
  31. package/dist/runtime-agent.d.ts +4 -0
  32. package/dist/runtime-agent.d.ts.map +1 -1
  33. package/dist/runtime-agent.js +40 -4
  34. package/dist/runtime-agent.js.map +1 -1
  35. package/dist/tools/context.d.ts +18 -0
  36. package/dist/tools/context.d.ts.map +1 -0
  37. package/dist/tools/context.js +40 -0
  38. package/dist/tools/context.js.map +1 -0
  39. package/dist/tools/defaults.d.ts +5 -0
  40. package/dist/tools/defaults.d.ts.map +1 -0
  41. package/dist/tools/defaults.js +368 -0
  42. package/dist/tools/defaults.js.map +1 -0
  43. package/dist/tools/filesystem.d.ts +12 -0
  44. package/dist/tools/filesystem.d.ts.map +1 -0
  45. package/dist/tools/filesystem.js +137 -0
  46. package/dist/tools/filesystem.js.map +1 -0
  47. package/dist/tools/index.d.ts +5 -0
  48. package/dist/tools/index.d.ts.map +1 -0
  49. package/dist/tools/index.js +15 -0
  50. package/dist/tools/index.js.map +1 -0
  51. package/dist/tools/registry.d.ts +38 -0
  52. package/dist/tools/registry.d.ts.map +1 -0
  53. package/dist/tools/registry.js +100 -0
  54. package/dist/tools/registry.js.map +1 -0
  55. package/dist/types.d.ts +52 -0
  56. package/dist/types.d.ts.map +1 -1
  57. package/dist/utils/zod.d.ts +5 -0
  58. package/dist/utils/zod.d.ts.map +1 -0
  59. package/dist/utils/zod.js +80 -0
  60. package/dist/utils/zod.js.map +1 -0
  61. package/package.json +1 -1
  62. package/src/extension/content.js +4 -3
  63. package/src/extension/injected_api.js +83 -31
  64. package/src/extension/manifest.json +1 -1
  65. package/src/extension/pkg/sentience_core_bg.wasm +0 -0
  66. package/src/extension/release.json +46 -46
package/README.md CHANGED
@@ -1,1170 +1,180 @@
1
1
  # Sentience TypeScript SDK
2
2
 
3
- **Semantic snapshots and Jest-style assertions for reliable AI web agents with time-travel traces**
3
+ > **A verification & control layer for AI agents that operate browsers**
4
4
 
5
- ## πŸ“¦ Installation
5
+ Sentience is built for **AI agent developers** who already use Playwright / CDP / LangGraph and care about **flakiness, cost, determinism, evals, and debugging**.
6
6
 
7
- ```bash
8
- # Install from npm
9
- npm install sentienceapi
10
-
11
- # Install Playwright browsers (required)
12
- npx playwright install chromium
13
- ```
14
-
15
- **For local development:**
16
-
17
- ```bash
18
- npm install
19
- npm run build
20
- ```
21
-
22
- ## Jest for AI Web Agent
23
-
24
- ### Semantic snapshots and assertions that let agents act, verify, and know when they're done.
25
-
26
- Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify browser state, check task completion, and get clear feedback on what's working:
27
-
28
- ```typescript
29
- import {
30
- SentienceBrowser,
31
- AgentRuntime,
32
- HumanHandoffSolver,
33
- urlContains,
34
- exists,
35
- allOf,
36
- isEnabled,
37
- isChecked,
38
- valueEquals,
39
- } from 'sentienceapi';
40
- import { createTracer } from 'sentienceapi';
41
- import { Page } from 'playwright';
42
-
43
- // Create browser and tracer
44
- const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
45
- const tracer = await createTracer({ runId: 'my-run', uploadTrace: false });
46
-
47
- // Create browser adapter for AgentRuntime
48
- const browserAdapter = {
49
- snapshot: async (_page: Page, options?: Record<string, any>) => {
50
- return await browser.snapshot(options);
51
- },
52
- };
53
- const runtime = new AgentRuntime(browserAdapter, browser.getPage(), tracer);
54
-
55
- // Navigate and take snapshot
56
- await browser.getPage().goto('https://example.com');
57
- runtime.beginStep('Verify page loaded');
58
- await runtime.snapshot();
59
-
60
- // Run assertions (Jest-style)
61
- runtime.assert(urlContains('example.com'), 'on_correct_domain');
62
- runtime.assert(exists('role=heading'), 'has_heading');
63
- runtime.assert(allOf([exists('role=button'), exists('role=link')]), 'has_interactive_elements');
64
-
65
- // v1: state-aware assertions (when Gateway refinement is enabled)
66
- runtime.assert(isEnabled('role=button'), 'button_enabled');
67
- runtime.assert(isChecked("role=checkbox name~'subscribe'"), 'subscribe_checked_if_present');
68
- runtime.assert(
69
- valueEquals("role=textbox name~'email'", 'user@example.com'),
70
- 'email_value_if_present'
71
- );
72
-
73
- // v2: retry loop with snapshot confidence gating + exhaustion
74
- const ok = await runtime
75
- .check(exists('role=heading'), 'heading_eventually_visible', true)
76
- .eventually({ timeoutMs: 10_000, pollMs: 250, minConfidence: 0.7, maxSnapshotAttempts: 3 });
77
- console.log('eventually() result:', ok);
78
-
79
- // CAPTCHA handling (detection + handoff + verify)
80
- runtime.setCaptchaOptions({
81
- policy: 'callback',
82
- handler: HumanHandoffSolver(),
83
- });
84
-
85
- // Check task completion
86
- if (runtime.assertDone(exists("text~'Example'"), 'task_complete')) {
87
- console.log('βœ… Task completed!');
88
- }
89
-
90
- console.log(`Task done: ${runtime.isTaskDone}`);
91
- ```
92
-
93
- #### CAPTCHA strategies (Batteries Included)
94
-
95
- ```typescript
96
- import { ExternalSolver, HumanHandoffSolver, VisionSolver } from 'sentienceapi';
97
-
98
- // Human-in-loop
99
- runtime.setCaptchaOptions({ policy: 'callback', handler: HumanHandoffSolver() });
100
-
101
- // Vision verification only
102
- runtime.setCaptchaOptions({ policy: 'callback', handler: VisionSolver() });
103
-
104
- // External system/webhook
105
- runtime.setCaptchaOptions({
106
- policy: 'callback',
107
- handler: ExternalSolver(async ctx => {
108
- await fetch(process.env.CAPTCHA_WEBHOOK_URL!, {
109
- method: 'POST',
110
- headers: { 'Content-Type': 'application/json' },
111
- body: JSON.stringify({ runId: ctx.runId, url: ctx.url }),
112
- });
113
- }),
114
- });
115
- ```
116
-
117
- ### Failure Artifact Buffer (Phase 1)
118
-
119
- Capture a short ring buffer of screenshots and persist them when a required assertion fails.
120
-
121
- ```typescript
122
- runtime.enableFailureArtifacts({ bufferSeconds: 15, captureOnAction: true, fps: 0 });
123
-
124
- // After each action, record it (best-effort).
125
- await runtime.recordAction('CLICK');
126
- ```
127
-
128
- ### Redaction callback (Phase 3)
129
-
130
- Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction.
131
-
132
- ```typescript
133
- import { RedactionContext, RedactionResult } from 'sentienceapi';
134
-
135
- const redact = (_ctx: RedactionContext): RedactionResult => {
136
- return { dropFrames: true };
137
- };
138
-
139
- runtime.enableFailureArtifacts({ onBeforePersist: redact });
140
- ```
141
-
142
- **See examples:** [`examples/asserts/`](examples/asserts/)
143
-
144
- ## πŸš€ Quick Start: Choose Your Abstraction Level
145
-
146
- Sentience SDK offers **4 levels of abstraction** - choose based on your needs:
147
-
148
- <details open>
149
- <summary><b>πŸ’¬ Level 4: Conversational Agent (Highest Abstraction)</b> - NEW in v0.3.0</summary>
150
-
151
- Complete automation with natural conversation. Just describe what you want, and the agent plans and executes everything:
152
-
153
- ```typescript
154
- import { SentienceBrowser, ConversationalAgent, OpenAIProvider } from 'sentienceapi';
155
-
156
- const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
157
- const llm = new OpenAIProvider(process.env.OPENAI_API_KEY!, 'gpt-4o');
158
- const agent = new ConversationalAgent({ llmProvider: llm, browser });
159
-
160
- // Navigate to starting page
161
- await browser.getPage().goto('https://amazon.com');
162
-
163
- // ONE command does it all - automatic planning and execution!
164
- const response = await agent.execute(
165
- "Search for 'wireless mouse' and tell me the price of the top result"
166
- );
167
- console.log(response); // "I found the top result for wireless mouse on Amazon. It's priced at $24.99..."
168
-
169
- // Follow-up questions maintain context
170
- const followUp = await agent.chat('Add it to cart');
171
- console.log(followUp);
172
-
173
- await browser.close();
174
- ```
175
-
176
- **When to use:** Complex multi-step tasks, conversational interfaces, maximum convenience
177
- **Code reduction:** 99% less code - describe goals in natural language
178
- **Requirements:** OpenAI or Anthropic API key
179
-
180
- </details>
181
-
182
- <details>
183
- <summary><b>πŸ€– Level 3: Agent (Natural Language Commands)</b> - Recommended for Most Users</summary>
184
-
185
- Zero coding knowledge needed. Just write what you want in plain English:
186
-
187
- ```typescript
188
- import { SentienceBrowser, SentienceAgent, OpenAIProvider } from 'sentienceapi';
189
-
190
- const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
191
- const llm = new OpenAIProvider(process.env.OPENAI_API_KEY!, 'gpt-4o-mini');
192
- const agent = new SentienceAgent(browser, llm);
193
-
194
- await browser.getPage().goto('https://www.amazon.com');
195
-
196
- // Just natural language commands - agent handles everything!
197
- await agent.act('Click the search box');
198
- await agent.act("Type 'wireless mouse' into the search field");
199
- await agent.act('Press Enter key');
200
- await agent.act('Click the first product result');
201
-
202
- // Automatic token tracking
203
- console.log(`Tokens used: ${agent.getTokenStats().totalTokens}`);
204
- await browser.close();
205
- ```
206
-
207
- **When to use:** Quick automation, non-technical users, rapid prototyping
208
- **Code reduction:** 95-98% less code vs manual approach
209
- **Requirements:** OpenAI API key (or Anthropic for Claude)
210
-
211
- </details>
212
-
213
- <details>
214
- <summary><b>πŸ”§ Level 2: Direct SDK (Technical Control)</b></summary>
215
-
216
- Full control with semantic selectors. For technical users who want precision:
217
-
218
- ```typescript
219
- import { SentienceBrowser, snapshot, find, click, typeText, press } from 'sentienceapi';
220
-
221
- const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
222
- await browser.getPage().goto('https://www.amazon.com');
223
-
224
- // Get semantic snapshot
225
- const snap = await snapshot(browser);
226
-
227
- // Find elements using query DSL
228
- const searchBox = find(snap, 'role=textbox text~"search"');
229
- await click(browser, searchBox!.id);
230
-
231
- // Type and submit
232
- await typeText(browser, searchBox!.id, 'wireless mouse');
233
- await press(browser, 'Enter');
234
-
235
- await browser.close();
236
- ```
237
-
238
- **When to use:** Need precise control, debugging, custom workflows
239
- **Code reduction:** Still 80% less code vs raw Playwright
240
- **Requirements:** Only Sentience API key
241
-
242
- </details>
243
-
244
- <details>
245
- <summary><b>βš™οΈ Level 1: Raw Playwright (Maximum Control)</b></summary>
246
-
247
- For when you need complete low-level control (rare):
248
-
249
- ```typescript
250
- import { chromium } from 'playwright';
251
-
252
- const browser = await chromium.launch();
253
- const page = await browser.newPage();
254
- await page.goto('https://www.amazon.com');
255
- await page.fill('#twotabsearchtextbox', 'wireless mouse');
256
- await page.press('#twotabsearchtextbox', 'Enter');
257
- await browser.close();
258
- ```
259
-
260
- **When to use:** Very specific edge cases, custom browser configs
261
- **Tradeoffs:** No semantic intelligence, brittle selectors, more code
262
-
263
- </details>
264
-
265
- ---
266
-
267
- ### Human-like Typing
268
-
269
- Add realistic delays between keystrokes to mimic human typing:
270
-
271
- ```typescript
272
- // Type instantly (default)
273
- await typeText(browser, elementId, 'Hello World');
7
+ Often described as _Jest for Browser AI Agents_ - but applied to end-to-end agent runs (not unit tests).
274
8
 
275
- // Type with human-like delay (~10ms between keystrokes)
276
- await typeText(browser, elementId, 'Hello World', false, 10);
277
- ```
278
-
279
- ### Scroll to Element
280
-
281
- Scroll elements into view with smooth animation:
282
-
283
- ```typescript
284
- const snap = await snapshot(browser);
285
- const button = find(snap, 'role=button text~"Submit"');
286
-
287
- // Scroll element into view with smooth animation
288
- await scrollTo(browser, button.id);
289
-
290
- // Scroll instantly to top of viewport
291
- await scrollTo(browser, button.id, 'instant', 'start');
292
- ```
293
-
294
- ---
295
-
296
- <details>
297
- <summary><h2>πŸ“Š Agent Execution Tracing (NEW in v0.3.1)</h2></summary>
298
-
299
- Record complete agent execution traces for debugging, analysis, and replay. Traces capture every step, snapshot, LLM decision, and action in a structured JSONL format.
300
-
301
- ### Quick Start: Agent with Tracing
302
-
303
- ```typescript
304
- import {
305
- SentienceBrowser,
306
- SentienceAgent,
307
- OpenAIProvider,
308
- Tracer,
309
- JsonlTraceSink,
310
- } from 'sentienceapi';
311
- import { randomUUID } from 'crypto';
312
-
313
- const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
314
- const llm = new OpenAIProvider(process.env.OPENAI_API_KEY!, 'gpt-4o');
315
-
316
- // Create a tracer
317
- const runId = randomUUID();
318
- const sink = new JsonlTraceSink(`traces/${runId}.jsonl`);
319
- const tracer = new Tracer(runId, sink);
320
-
321
- // Create agent with tracer
322
- const agent = new SentienceAgent(browser, llm, 50, true, tracer);
323
-
324
- // Emit run_start
325
- tracer.emitRunStart('SentienceAgent', 'gpt-4o');
326
-
327
- try {
328
- await browser.getPage().goto('https://google.com');
329
-
330
- // Every action is automatically traced!
331
- await agent.act('Click the search box');
332
- await agent.act("Type 'sentience ai' into the search field");
333
- await agent.act('Press Enter');
334
-
335
- tracer.emitRunEnd(3);
336
- } finally {
337
- // Flush trace to disk
338
- await agent.closeTracer();
339
- await browser.close();
340
- }
341
-
342
- console.log(`βœ… Trace saved to: traces/${runId}.jsonl`);
343
- ```
9
+ The core loop is:
344
10
 
345
- ### What Gets Traced
11
+ > **Agent β†’ Snapshot β†’ Action β†’ Verification β†’ Artifact**
346
12
 
347
- Each agent action generates multiple events:
13
+ ## What Sentience is
348
14
 
349
- 1. **step_start** - Before action execution (goal, URL, attempt)
350
- 2. **snapshot** - Page state with all interactive elements
351
- 3. **llm_response** - LLM decision (model, tokens, response)
352
- 4. **action** - Executed action (type, element ID, success)
353
- 5. **error** - Any failures (error message, retry attempt)
15
+ - A **verification-first runtime** (`AgentRuntime`) for browser agents
16
+ - Treats the browser as an adapter (Playwright / CDP); **`AgentRuntime` is the product**
17
+ - A **controlled perception** layer (semantic snapshots; pruning/limits; lowers token usage by filtering noise from what models see)
18
+ - A **debugging layer** (structured traces + failure artifacts)
19
+ - Enables **local LLM small models (3B-7B)** for browser automation (privacy, compliance, and cost control)
20
+ - Keeps vision models **optional** (use as a fallback when DOM/snapshot structure falls short, e.g. `<canvas>`)
354
21
 
355
- ### Schema Compatibility
22
+ ## What Sentience is not
356
23
 
357
- Traces are **100% compatible** with Python SDK traces - use the same tools to analyze traces from both TypeScript and Python agents!
24
+ - Not a browser driver
25
+ - Not a Playwright replacement
26
+ - Not a vision-first agent framework
358
27
 
359
- **See full example:** [examples/agent-with-tracing.ts](examples/agent-with-tracing.ts)
28
+ ## Install
360
29
 
361
- </details>
362
-
363
- <details>
364
- <summary><h2>πŸ” Agent Runtime Verification</h2></summary>
365
-
366
- `AgentRuntime` provides assertion predicates for runtime verification in agent loops, enabling programmatic verification of browser state during execution.
367
-
368
- ```typescript
369
- import { SentienceBrowser } from 'sentienceapi';
370
- import { AgentRuntime, urlContains, exists, allOf } from 'sentienceapi';
371
- import { createTracer } from 'sentienceapi';
372
-
373
- const browser = new SentienceBrowser();
374
- await browser.start();
375
- const tracer = await createTracer({ runId: 'my-run', uploadTrace: false });
376
- const runtime = new AgentRuntime(browser, browser.getPage(), tracer);
377
-
378
- // Navigate and take snapshot
379
- await browser.getPage().goto('https://example.com');
380
- runtime.beginStep('Verify page');
381
- await runtime.snapshot();
382
-
383
- // Run assertions
384
- runtime.assert(urlContains('example.com'), 'on_correct_domain');
385
- runtime.assert(exists('role=heading'), 'has_heading');
386
- runtime.assertDone(exists("text~'Example'"), 'task_complete');
387
-
388
- console.log(`Task done: ${runtime.isTaskDone}`);
30
+ ```bash
31
+ npm install sentienceapi
32
+ npx playwright install chromium
389
33
  ```
390
34
 
391
- **See example:** [examples/agent-runtime-verification.ts](examples/agent-runtime-verification.ts)
35
+ ## Conceptual example (why this exists)
392
36
 
393
- </details>
37
+ - Steps are **gated by verifiable UI assertions**
38
+ - If progress can’t be proven, the run **fails with evidence**
39
+ - This is how you make runs **reproducible** and **debuggable**, and how you run evals reliably
394
40
 
395
- ---
41
+ ## Quickstart: a verification-first loop
396
42
 
397
- <details>
398
- <summary><h2>πŸ’Ό Real-World Example: Assertion-driven navigation</h2></summary>
43
+ ```ts
44
+ import { SentienceBrowser, AgentRuntime } from 'sentienceapi';
45
+ import { JsonlTraceSink, Tracer } from 'sentienceapi';
46
+ import { exists, urlContains } from 'sentienceapi';
47
+ import type { Page } from 'playwright';
399
48
 
400
- This example shows how to use **assertions + `.eventually()`** to make an agent loop resilient:
49
+ async function main(): Promise<void> {
50
+ const tracer = new Tracer('demo', new JsonlTraceSink('trace.jsonl'));
401
51
 
402
- ```typescript
403
- import { SentienceBrowser, AgentRuntime, urlContains, exists } from 'sentienceapi';
404
- import { createTracer } from 'sentienceapi';
52
+ const browser = new SentienceBrowser();
53
+ await browser.start();
54
+ const page = browser.getPage();
55
+ if (!page) throw new Error('no page');
405
56
 
406
- async function main() {
407
- const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
408
- const tracer = await createTracer({ runId: 'verified-run', uploadTrace: false });
57
+ await page.goto('https://example.com');
409
58
 
410
- const adapter = {
411
- snapshot: async (_page: any, options?: Record<string, any>) => {
412
- return await browser.snapshot(options);
413
- },
414
- };
415
- const runtime = new AgentRuntime(adapter as any, browser.getPage() as any, tracer);
59
+ // AgentRuntime needs a snapshot provider; SentienceBrowser.snapshot() does not depend on Page,
60
+ // so we wrap it to fit the runtime interface.
61
+ const runtime = new AgentRuntime(
62
+ { snapshot: async (_page: Page, options?: Record<string, any>) => browser.snapshot(options) },
63
+ page,
64
+ tracer
65
+ );
416
66
 
417
- await browser.getPage().goto('https://example.com');
418
- runtime.beginStep('Verify we are on the right page');
67
+ runtime.beginStep('Verify homepage');
68
+ await runtime.snapshot({ limit: 60 });
419
69
 
420
- await runtime
421
- .check(urlContains('example.com'), 'on_domain', true)
422
- .eventually({ timeoutMs: 10_000, pollMs: 250, minConfidence: 0.7, maxSnapshotAttempts: 3 });
70
+ runtime.assert(urlContains('example.com'), 'on_domain', true);
71
+ runtime.assert(exists('role=heading'), 'has_heading');
423
72
 
424
- runtime.assert(exists('role=heading'), 'heading_present');
73
+ runtime.assertDone(exists("text~'Example'"), 'task_complete');
425
74
 
426
- await tracer.close();
427
75
  await browser.close();
428
76
  }
429
77
 
430
- main().catch(console.error);
431
- ```
432
-
433
- </details>
434
-
435
- ---
436
-
437
- ## πŸ“š Core Features
438
-
439
- <details>
440
- <summary><h3>🌐 Browser Control</h3></summary>
441
-
442
- - **`SentienceBrowser`** - Playwright browser with Sentience extension pre-loaded
443
- - **`browser.goto(url)`** - Navigate with automatic extension readiness checks
444
- - Automatic bot evasion and stealth mode
445
- - Configurable headless/headed mode
446
-
447
- </details>
448
-
449
- <details>
450
- <summary><h3>πŸ“Έ Snapshot - Intelligent Page Analysis</h3></summary>
451
-
452
- **`snapshot(browser, options?)`** - Capture page state with AI-ranked elements
453
-
454
- Features:
455
-
456
- - Returns semantic elements with roles, text, importance scores, and bounding boxes
457
- - Optional screenshot capture (PNG/JPEG)
458
- - Optional visual overlay to see what elements are detected
459
- - TypeScript types for type safety
460
-
461
- **Example:**
462
-
463
- ```typescript
464
- const snap = await snapshot(browser, { screenshot: true, show_overlay: true });
465
-
466
- // Access structured data
467
- console.log(`URL: ${snap.url}`);
468
- console.log(`Viewport: ${snap.viewport.width}x${snap.viewport.height}`);
469
- console.log(`Elements: ${snap.elements.length}`);
470
-
471
- // Iterate over elements
472
- for (const element of snap.elements) {
473
- console.log(`${element.role}: ${element.text} (importance: ${element.importance})`);
474
- }
475
- ```
476
-
477
- </details>
478
-
479
- <details>
480
- <summary><h3>πŸ” Query Engine - Semantic Element Selection</h3></summary>
481
-
482
- - **`query(snapshot, selector)`** - Find all matching elements
483
- - **`find(snapshot, selector)`** - Find single best match (by importance)
484
- - Powerful query DSL with multiple operators
485
-
486
- **Query Examples:**
487
-
488
- ```typescript
489
- // Find by role and text
490
- const button = find(snap, 'role=button text="Sign in"');
491
-
492
- // Substring match (case-insensitive)
493
- const link = find(snap, 'role=link text~"more info"');
494
-
495
- // Spatial filtering
496
- const topLeft = find(snap, 'bbox.x<=100 bbox.y<=200');
497
-
498
- // Multiple conditions (AND logic)
499
- const primaryBtn = find(snap, 'role=button clickable=true visible=true importance>800');
500
-
501
- // Prefix/suffix matching
502
- const startsWith = find(snap, 'text^="Add"');
503
- const endsWith = find(snap, 'text$="Cart"');
504
-
505
- // Numeric comparisons
506
- const important = query(snap, 'importance>=700');
507
- const firstRow = query(snap, 'bbox.y<600');
508
- ```
509
-
510
- **πŸ“– [Complete Query DSL Guide](docs/QUERY_DSL.md)** - All operators, fields, and advanced patterns
511
-
512
- </details>
513
-
514
- <details>
515
- <summary><h3>πŸ‘† Actions - Interact with Elements</h3></summary>
516
-
517
- - **`click(browser, elementId)`** - Click element by ID
518
- - **`clickRect(browser, rect)`** - Click at center of rectangle (coordinate-based)
519
- - **`typeText(browser, elementId, text)`** - Type into input fields
520
- - **`press(browser, key)`** - Press keyboard keys (Enter, Escape, Tab, etc.)
521
-
522
- All actions return `ActionResult` with success status, timing, and outcome:
523
-
524
- ```typescript
525
- const result = await click(browser, element.id);
526
-
527
- console.log(`Success: ${result.success}`);
528
- console.log(`Outcome: ${result.outcome}`); // "navigated", "dom_updated", "error"
529
- console.log(`Duration: ${result.duration_ms}ms`);
530
- console.log(`URL changed: ${result.url_changed}`);
531
- ```
532
-
533
- **Coordinate-based clicking:**
534
-
535
- ```typescript
536
- import { clickRect } from './src';
537
-
538
- // Click at center of rectangle (x, y, width, height)
539
- await clickRect(browser, { x: 100, y: 200, w: 50, h: 30 });
540
-
541
- // With visual highlight (default: red border for 2 seconds)
542
- await clickRect(browser, { x: 100, y: 200, w: 50, h: 30 }, true, 2.0);
543
-
544
- // Using element's bounding box
545
- const snap = await snapshot(browser);
546
- const element = find(snap, 'role=button');
547
- if (element) {
548
- await clickRect(browser, {
549
- x: element.bbox.x,
550
- y: element.bbox.y,
551
- w: element.bbox.width,
552
- h: element.bbox.height,
553
- });
554
- }
555
- ```
556
-
557
- </details>
558
-
559
- <details>
560
- <summary><h3>⏱️ Wait & Assertions</h3></summary>
561
-
562
- - **`waitFor(browser, selector, timeout?, interval?, useApi?)`** - Wait for element to appear
563
- - **`expect(browser, selector)`** - Assertion helper with fluent API
564
-
565
- **Examples:**
566
-
567
- ```typescript
568
- // Wait for element (auto-detects optimal interval based on API usage)
569
- const result = await waitFor(browser, 'role=button text="Submit"', 10000);
570
- if (result.found) {
571
- console.log(`Found after ${result.duration_ms}ms`);
572
- }
573
-
574
- // Use local extension with fast polling (250ms interval)
575
- const result = await waitFor(browser, 'role=button', 5000, undefined, false);
576
-
577
- // Use remote API with network-friendly polling (1500ms interval)
578
- const result = await waitFor(browser, 'role=button', 5000, undefined, true);
579
-
580
- // Custom interval override
581
- const result = await waitFor(browser, 'role=button', 5000, 500, false);
582
-
583
- // Semantic wait conditions
584
- await waitFor(browser, 'clickable=true', 5000); // Wait for clickable element
585
- await waitFor(browser, 'importance>100', 5000); // Wait for important element
586
- await waitFor(browser, 'role=link visible=true', 5000); // Wait for visible link
587
-
588
- // Assertions
589
- await expect(browser, 'role=button text="Submit"').toExist(5000);
590
- await expect(browser, 'role=heading').toBeVisible();
591
- await expect(browser, 'role=button').toHaveText('Submit');
592
- await expect(browser, 'role=link').toHaveCount(10);
593
- ```
594
-
595
- </details>
596
-
597
- <details>
598
- <summary><h3>🎨 Visual Overlay - Debug Element Detection</h3></summary>
599
-
600
- - **`showOverlay(browser, elements, targetElementId?)`** - Display visual overlay highlighting elements
601
- - **`clearOverlay(browser)`** - Clear overlay manually
602
-
603
- Show color-coded borders around detected elements to debug, validate, and understand what Sentience sees:
604
-
605
- ```typescript
606
- import { showOverlay, clearOverlay } from 'sentienceapi';
607
-
608
- // Take snapshot once
609
- const snap = await snapshot(browser);
610
-
611
- // Show overlay anytime without re-snapshotting
612
- await showOverlay(browser, snap); // Auto-clears after 5 seconds
613
-
614
- // Highlight specific target element in red
615
- const button = find(snap, 'role=button text~"Submit"');
616
- await showOverlay(browser, snap, button.id);
617
-
618
- // Clear manually before 5 seconds
619
- await new Promise(resolve => setTimeout(resolve, 2000));
620
- await clearOverlay(browser);
621
- ```
622
-
623
- **Color Coding:**
624
-
625
- - πŸ”΄ Red: Target element
626
- - πŸ”΅ Blue: Primary elements (`is_primary=true`)
627
- - 🟒 Green: Regular interactive elements
628
-
629
- **Visual Indicators:**
630
-
631
- - Border thickness/opacity scales with importance
632
- - Semi-transparent fill
633
- - Importance badges
634
- - Star icons for primary elements
635
- - Auto-clear after 5 seconds
636
-
637
- </details>
638
-
639
- <details>
640
- <summary><h3>πŸ“„ Content Reading</h3></summary>
641
-
642
- **`read(browser, options?)`** - Extract page content
643
-
644
- - `format: "text"` - Plain text extraction
645
- - `format: "markdown"` - High-quality markdown conversion (uses Turndown)
646
- - `format: "raw"` - Cleaned HTML (default)
647
-
648
- **Example:**
649
-
650
- ```typescript
651
- import { read } from './src';
652
-
653
- // Get markdown content
654
- const result = await read(browser, { format: 'markdown' });
655
- console.log(result.content); // Markdown text
656
-
657
- // Get plain text
658
- const result = await read(browser, { format: 'text' });
659
- console.log(result.content); // Plain text
660
- ```
661
-
662
- </details>
663
-
664
- <details>
665
- <summary><h3>πŸ“· Screenshots</h3></summary>
666
-
667
- **`screenshot(browser, options?)`** - Standalone screenshot capture
668
-
669
- - Returns base64-encoded data URL
670
- - PNG or JPEG format
671
- - Quality control for JPEG (1-100)
672
-
673
- **Example:**
674
-
675
- ```typescript
676
- import { screenshot } from './src';
677
- import { writeFileSync } from 'fs';
678
-
679
- // Capture PNG screenshot
680
- const dataUrl = await screenshot(browser, { format: 'png' });
681
-
682
- // Save to file
683
- const base64Data = dataUrl.split(',')[1];
684
- const imageData = Buffer.from(base64Data, 'base64');
685
- writeFileSync('screenshot.png', imageData);
686
-
687
- // JPEG with quality control (smaller file size)
688
- const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 });
689
- ```
690
-
691
- </details>
692
-
693
- <details>
694
- <summary><h3>πŸ”Ž Text Search - Find Elements by Visible Text</h3></summary>
695
-
696
- **`findTextRect(page, options)`** - Find text on page and get exact pixel coordinates
697
-
698
- Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.
699
-
700
- **Example:**
701
-
702
- ```typescript
703
- import { SentienceBrowser, findTextRect, clickRect } from 'sentienceapi';
704
-
705
- const browser = await SentienceBrowser.create();
706
- await browser.getPage().goto('https://example.com');
707
-
708
- // Find "Sign In" button (simple string syntax)
709
- const result = await findTextRect(browser.getPage(), 'Sign In');
710
- if (result.status === 'success' && result.results) {
711
- const firstMatch = result.results[0];
712
- console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`);
713
- console.log(`In viewport: ${firstMatch.in_viewport}`);
714
-
715
- // Click on the found text
716
- if (firstMatch.in_viewport) {
717
- await clickRect(browser, {
718
- x: firstMatch.rect.x,
719
- y: firstMatch.rect.y,
720
- w: firstMatch.rect.width,
721
- h: firstMatch.rect.height,
722
- });
723
- }
724
- }
725
- ```
726
-
727
- **Advanced Options:**
728
-
729
- ```typescript
730
- // Case-sensitive search
731
- const result = await findTextRect(browser.getPage(), {
732
- text: 'LOGIN',
733
- caseSensitive: true,
734
- });
735
-
736
- // Whole word only (won't match "login" as part of "loginButton")
737
- const result = await findTextRect(browser.getPage(), {
738
- text: 'log',
739
- wholeWord: true,
740
- });
741
-
742
- // Find multiple matches
743
- const result = await findTextRect(browser.getPage(), {
744
- text: 'Buy',
745
- maxResults: 10,
746
- });
747
- for (const match of result.results || []) {
748
- if (match.in_viewport) {
749
- console.log(`Found '${match.text}' at (${match.rect.x}, ${match.rect.y})`);
750
- console.log(`Context: ...${match.context.before}[${match.text}]${match.context.after}...`);
751
- }
752
- }
78
+ void main();
753
79
  ```
754
80
 
755
- **Returns:** Promise<TextRectSearchResult> with:
756
-
757
- - **`status`**: "success" or "error"
758
- - **`results`**: Array of `TextMatch` objects with:
759
- - `text` - The matched text
760
- - `rect` - Absolute coordinates (with scroll offset)
761
- - `viewport_rect` - Viewport-relative coordinates
762
- - `context` - Surrounding text (before/after)
763
- - `in_viewport` - Whether visible in current viewport
764
-
765
- **Use Cases:**
766
-
767
- - Find buttons/links by visible text without CSS selectors
768
- - Get exact pixel coordinates for click automation
769
- - Verify text visibility and position on page
770
- - Search dynamic content that changes frequently
81
+ ## Capabilities (lifecycle guarantees)
771
82
 
772
- **Note:** Does not consume API credits (runs locally in browser)
83
+ ### Controlled perception
773
84
 
774
- **See example:** `examples/find-text-demo.ts`
85
+ - **Semantic snapshots** instead of raw DOM dumps
86
+ - **Pruning knobs** via `SnapshotOptions` (limit/filter)
87
+ - Snapshot diagnostics that help decide when β€œstructure is insufficient”
775
88
 
776
- </details>
777
-
778
- ---
779
-
780
- ## πŸ“‹ Reference
781
-
782
- <details>
783
- <summary><h3>Element Properties</h3></summary>
784
-
785
- Elements returned by `snapshot()` have the following properties:
786
-
787
- ```typescript
788
- element.id; // Unique identifier for interactions
789
- element.role; // ARIA role (button, link, textbox, heading, etc.)
790
- element.text; // Visible text content
791
- element.importance; // AI importance score (0-1000)
792
- element.bbox; // Bounding box (x, y, width, height)
793
- element.visual_cues; // Visual analysis (is_primary, is_clickable, background_color)
794
- element.in_viewport; // Is element visible in current viewport?
795
- element.is_occluded; // Is element covered by other elements?
796
- element.z_index; // CSS stacking order
797
- ```
89
+ ### Constrained action space
798
90
 
799
- </details>
91
+ - Action primitives operate on **stable IDs / rects** derived from snapshots
92
+ - Optional helpers for ordinality (β€œclick the 3rd result”)
800
93
 
801
- <details>
802
- <summary><h3>Query DSL Reference</h3></summary>
94
+ ### Verified progress
803
95
 
804
- ### Basic Operators
96
+ - Predicates like `exists(...)`, `urlMatches(...)`, `isEnabled(...)`, `valueEquals(...)`
97
+ - Fluent assertion DSL via `expect(...)`
98
+ - Retrying verification via `runtime.check(...).eventually(...)`
805
99
 
806
- | Operator | Description | Example |
807
- | --------- | ---------------------------- | ---------------- |
808
- | `=` | Exact match | `role=button` |
809
- | `!=` | Exclusion | `role!=link` |
810
- | `~` | Substring (case-insensitive) | `text~"sign in"` |
811
- | `^=` | Prefix match | `text^="Add"` |
812
- | `$=` | Suffix match | `text$="Cart"` |
813
- | `>`, `>=` | Greater than | `importance>500` |
814
- | `<`, `<=` | Less than | `bbox.y<600` |
100
+ ### Explained failure
815
101
 
816
- ### Supported Fields
817
-
818
- - **Role**: `role=button|link|textbox|heading|...`
819
- - **Text**: `text`, `text~`, `text^=`, `text$=`
820
- - **Visibility**: `clickable=true|false`, `visible=true|false`
821
- - **Importance**: `importance`, `importance>=N`, `importance<N`
822
- - **Position**: `bbox.x`, `bbox.y`, `bbox.width`, `bbox.height`
823
- - **Layering**: `z_index`
824
-
825
- </details>
826
-
827
- ---
828
-
829
- ## βš™οΈ Configuration
830
-
831
- <details>
832
- <summary><h3>Viewport Size</h3></summary>
833
-
834
- Default viewport is **1280x800** pixels. You can customize it using Playwright's API:
835
-
836
- ```typescript
837
- const browser = new SentienceBrowser();
838
- await browser.start();
839
-
840
- // Set custom viewport before navigating
841
- await browser.getPage().setViewportSize({ width: 1920, height: 1080 });
842
-
843
- await browser.goto('https://example.com');
844
- ```
102
+ - JSONL trace events (`Tracer` + `JsonlTraceSink`)
103
+ - Optional failure artifact bundles (snapshots, diagnostics, step timelines, frames/clip)
104
+ - Deterministic failure semantics: when required assertions can’t be proven, the run fails with artifacts you can replay
845
105
 
846
- </details>
106
+ ### Framework interoperability
847
107
 
848
- <details>
849
- <summary><h3>Headless Mode</h3></summary>
108
+ - Bring your own LLM and orchestration (LangGraph, custom loops)
109
+ - Register explicit LLM-callable tools with `ToolRegistry`
850
110
 
851
- ```typescript
852
- // Headed mode (shows browser window)
853
- const browser = new SentienceBrowser(undefined, undefined, false);
111
+ ## ToolRegistry (LLM-callable tools)
854
112
 
855
- // Headless mode
856
- const browser = new SentienceBrowser(undefined, undefined, true);
113
+ ```ts
114
+ import { ToolRegistry, registerDefaultTools } from 'sentienceapi';
857
115
 
858
- // Auto-detect based on environment (default)
859
- const browser = new SentienceBrowser(); // headless=true if CI=true, else false
116
+ const registry = new ToolRegistry();
117
+ registerDefaultTools(registry);
118
+ const toolsForLLM = registry.llmTools();
860
119
  ```
861
120
 
862
- </details>
121
+ ## Permissions (avoid Chrome permission bubbles)
863
122
 
864
- <details>
865
- <summary><h3>🌍 Residential Proxy Support</h3></summary>
123
+ Chrome permission prompts are outside the DOM and can be invisible to snapshots. Prefer setting a policy **before navigation**.
866
124
 
867
- For users running from datacenters (AWS, DigitalOcean, etc.), you can configure a residential proxy to prevent IP-based detection by Cloudflare, Akamai, and other anti-bot services.
868
-
869
- **Supported Formats:**
870
-
871
- - HTTP: `http://username:password@host:port`
872
- - HTTPS: `https://username:password@host:port`
873
- - SOCKS5: `socks5://username:password@host:port`
125
+ ```ts
126
+ import { SentienceBrowser } from 'sentienceapi';
127
+ import type { PermissionPolicy } from 'sentienceapi';
874
128
 
875
- **Usage:**
129
+ const policy: PermissionPolicy = {
130
+ default: 'clear',
131
+ autoGrant: ['geolocation'],
132
+ geolocation: { latitude: 37.77, longitude: -122.41, accuracy: 50 },
133
+ origin: 'https://example.com',
134
+ };
876
135
 
877
- ```typescript
878
- // Via constructor parameter
136
+ // `permissionPolicy` is the last constructor argument; pass `keepAlive` right before it.
879
137
  const browser = new SentienceBrowser(
880
138
  undefined,
881
139
  undefined,
882
- false,
883
- 'http://username:password@residential-proxy.com:8000'
884
- );
885
- await browser.start();
886
-
887
- // Via environment variable
888
- process.env.SENTIENCE_PROXY = 'http://username:password@proxy.com:8000';
889
- const browser = new SentienceBrowser();
890
- await browser.start();
891
-
892
- // With agent
893
- import { SentienceAgent, OpenAIProvider } from 'sentienceapi';
894
-
895
- const browser = new SentienceBrowser(
896
- 'your-api-key',
140
+ undefined,
141
+ undefined,
142
+ undefined,
143
+ undefined,
144
+ undefined,
145
+ undefined,
146
+ undefined,
147
+ undefined,
148
+ undefined,
897
149
  undefined,
898
150
  false,
899
- 'http://user:pass@proxy.com:8000'
151
+ policy
900
152
  );
901
153
  await browser.start();
902
-
903
- const agent = new SentienceAgent(browser, new OpenAIProvider('openai-key'));
904
- await agent.act('Navigate to example.com');
905
- ```
906
-
907
- **WebRTC Protection:**
908
- The SDK automatically adds WebRTC leak protection flags when a proxy is configured, preventing your real datacenter IP from being exposed via WebRTC even when using proxies.
909
-
910
- **HTTPS Certificate Handling:**
911
- The SDK automatically ignores HTTPS certificate errors when a proxy is configured, as residential proxies often use self-signed certificates for SSL interception.
912
-
913
- </details>
914
-
915
- <details>
916
- <summary><h3>πŸ” Authentication Session Injection</h3></summary>
917
-
918
- Inject pre-recorded authentication sessions (cookies + localStorage) to start your agent already logged in, bypassing login screens, 2FA, and CAPTCHAs. This saves tokens and reduces costs by eliminating login steps.
919
-
920
- ```typescript
921
- // Workflow 1: Inject pre-recorded session from file
922
- import { SentienceBrowser, saveStorageState } from 'sentienceapi';
923
-
924
- // Save session after manual login
925
- const browser = new SentienceBrowser();
926
- await browser.start();
927
- await browser.getPage().goto('https://example.com');
928
- // ... log in manually ...
929
- await saveStorageState(browser.getContext(), 'auth.json');
930
-
931
- // Use saved session in future runs
932
- const browser2 = new SentienceBrowser(
933
- undefined, // apiKey
934
- undefined, // apiUrl
935
- false, // headless
936
- undefined, // proxy
937
- undefined, // userDataDir
938
- 'auth.json' // storageState - inject saved session
939
- );
940
- await browser2.start();
941
- // Agent starts already logged in!
942
-
943
- // Workflow 2: Persistent sessions (cookies persist across runs)
944
- const browser3 = new SentienceBrowser(
945
- undefined, // apiKey
946
- undefined, // apiUrl
947
- false, // headless
948
- undefined, // proxy
949
- './chrome_profile', // userDataDir - persist cookies
950
- undefined // storageState
951
- );
952
- await browser3.start();
953
- // First run: Log in
954
- // Second run: Already logged in (cookies persist automatically)
955
- ```
956
-
957
- **Benefits:**
958
-
959
- - Bypass login screens and CAPTCHAs with valid sessions
960
- - Save 5-10 agent steps and hundreds of tokens per run
961
- - Maintain stateful sessions for accessing authenticated pages
962
- - Act as authenticated users (e.g., "Go to my Orders page")
963
-
964
- See `examples/auth-injection-agent.ts` for complete examples.
965
-
966
- </details>
967
-
968
- ---
969
-
970
- ## πŸ’‘ Best Practices
971
-
972
- <details>
973
- <summary>Click to expand best practices</summary>
974
-
975
- ### 1. Wait for Dynamic Content
976
-
977
- ```typescript
978
- await browser.goto('https://example.com');
979
- await browser.getPage().waitForLoadState('networkidle');
980
- await new Promise(resolve => setTimeout(resolve, 1000)); // Extra buffer
981
154
  ```
982
155
 
983
- ### 2. Use Multiple Strategies for Finding Elements
156
+ If your backend supports it, you can also use ToolRegistry permission tools (`grant_permissions`, `clear_permissions`, `set_geolocation`) mid-run.
984
157
 
985
- ```typescript
986
- // Try exact match first
987
- let btn = find(snap, 'role=button text="Add to Cart"');
158
+ ## Downloads (verification predicate)
988
159
 
989
- // Fallback to fuzzy match
990
- if (!btn) {
991
- btn = find(snap, 'role=button text~"cart"');
992
- }
993
- ```
994
-
995
- ### 3. Check Element Visibility Before Clicking
996
-
997
- ```typescript
998
- if (element.in_viewport && !element.is_occluded) {
999
- await click(browser, element.id);
1000
- }
1001
- ```
1002
-
1003
- ### 4. Handle Navigation
1004
-
1005
- ```typescript
1006
- const result = await click(browser, linkId);
1007
- if (result.url_changed) {
1008
- await browser.getPage().waitForLoadState('networkidle');
1009
- }
1010
- ```
1011
-
1012
- ### 5. Use Screenshots Sparingly
1013
-
1014
- ```typescript
1015
- // Fast - no screenshot (only element data)
1016
- const snap = await snapshot(browser);
160
+ ```ts
161
+ import { downloadCompleted } from 'sentienceapi';
1017
162
 
1018
- // Slower - with screenshot (for debugging/verification)
1019
- const snap = await snapshot(browser, { screenshot: true });
163
+ runtime.assert(downloadCompleted('report.csv'), 'download_ok', true);
1020
164
  ```
1021
165
 
1022
- ### 6. Always Close Browser
166
+ ## Debugging (fast)
1023
167
 
1024
- ```typescript
1025
- const browser = new SentienceBrowser();
1026
-
1027
- try {
1028
- await browser.start();
1029
- // ... your automation code
1030
- } finally {
1031
- await browser.close(); // Always clean up
1032
- }
1033
- ```
1034
-
1035
- </details>
1036
-
1037
- ---
1038
-
1039
- ## πŸ› οΈ Troubleshooting
1040
-
1041
- <details>
1042
- <summary>Click to expand common issues and solutions</summary>
1043
-
1044
- ### "Extension failed to load"
1045
-
1046
- **Solution:** Build the extension first:
168
+ - **Manual driver CLI**:
1047
169
 
1048
170
  ```bash
1049
- cd sentience-chrome
1050
- ./build.sh
171
+ npx sentience driver --url https://example.com
1051
172
  ```
1052
173
 
1053
- ### "Cannot use import statement outside a module"
1054
-
1055
- **Solution:** Don't use `node` directly. Use `ts-node` or npm scripts:
1056
-
1057
- ```bash
1058
- npx ts-node examples/hello.ts
1059
- # or
1060
- npm run example:hello
1061
- ```
1062
-
1063
- ### "Element not found"
1064
-
1065
- **Solutions:**
1066
-
1067
- - Ensure page is loaded: `await browser.getPage().waitForLoadState('networkidle')`
1068
- - Use `waitFor()`: `await waitFor(browser, 'role=button', 10000)`
1069
- - Debug elements: `console.log(snap.elements.map(el => el.text))`
1070
-
1071
- ### Button not clickable
1072
-
1073
- **Solutions:**
1074
-
1075
- - Check visibility: `element.in_viewport && !element.is_occluded`
1076
- - Scroll to element: ``await browser.getPage().evaluate(`window.sentience_registry[${element.id}].scrollIntoView()`)``
1077
-
1078
- </details>
1079
-
1080
- ---
1081
-
1082
- ## πŸ’» Examples & Testing
1083
-
1084
- <details>
1085
- <summary><h3>Agent Layer Examples (Level 3 - Natural Language)</h3></summary>
1086
-
1087
- - **`agent-google-search.ts`** - Google search automation with natural language commands
1088
- - **`agent-amazon-shopping.ts`** - Amazon shopping bot (6 lines vs 350 lines manual code)
1089
- - **`agent-with-anthropic.ts`** - Using Anthropic Claude instead of OpenAI GPT
1090
- - **`agent-with-tracing.ts`** - Agent execution tracing for debugging and analysis
1091
-
1092
- </details>
1093
-
1094
- <details>
1095
- <summary><h3>Direct SDK Examples (Level 2 - Technical Control)</h3></summary>
1096
-
1097
- - **`hello.ts`** - Extension bridge verification
1098
- - **`basic-agent.ts`** - Basic snapshot and element inspection
1099
- - **`query-demo.ts`** - Query engine demonstrations
1100
- - **`wait-and-click.ts`** - Waiting for elements and performing actions
1101
- - **`read-markdown.ts`** - Content extraction and markdown conversion
1102
-
1103
- </details>
1104
-
1105
- <details>
1106
- <summary><h3>Running Examples</h3></summary>
1107
-
1108
- **⚠️ Important**: You cannot use `node` directly to run TypeScript files. Use one of these methods:
1109
-
1110
- ### Option 1: Using npm scripts (recommended)
1111
-
1112
- ```bash
1113
- npm run example:hello
1114
- npm run example:basic
1115
- npm run example:query
1116
- npm run example:wait
1117
- ```
1118
-
1119
- ### Option 2: Using ts-node directly
1120
-
1121
- ```bash
1122
- npx ts-node examples/hello.ts
1123
- # or if ts-node is installed globally:
1124
- ts-node examples/hello.ts
1125
- ```
1126
-
1127
- ### Option 3: Compile then run
1128
-
1129
- ```bash
1130
- npm run build
1131
- # Then use compiled JavaScript from dist/
1132
- ```
1133
-
1134
- </details>
1135
-
1136
- <details>
1137
- <summary><h3>Testing</h3></summary>
1138
-
1139
- ```bash
1140
- # Run all tests
1141
- npm test
1142
-
1143
- # Run with coverage
1144
- npm run test:coverage
1145
-
1146
- # Run specific test file
1147
- npm test -- snapshot.test.ts
1148
- ```
1149
-
1150
- </details>
1151
-
1152
- ---
1153
-
1154
- ## πŸ“– Documentation
1155
-
1156
- - **πŸ“– [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)** - Complete tutorial with real-world example
1157
- - **πŸ“– [Query DSL Guide](docs/QUERY_DSL.md)** - Advanced query patterns and operators
1158
- - **πŸ“„ [API Contract](../spec/SNAPSHOT_V1.md)** - Snapshot API specification
1159
- - **πŸ“„ [Type Definitions](../spec/sdk-types.md)** - TypeScript/Python type definitions
1160
-
1161
- ---
1162
-
1163
- ## πŸ“œ License
174
+ - **Verification + artifacts + debugging with time-travel traces (Sentience Studio demo)**:
1164
175
 
1165
- This project is licensed under either of:
176
+ <video src="https://github.com/user-attachments/assets/7ffde43b-1074-4d70-bb83-2eb8d0469307" controls muted playsinline></video>
1166
177
 
1167
- - Apache License, Version 2.0, ([LICENSE-APACHE](./LICENSE-APACHE))
1168
- - MIT license ([LICENSE-MIT](./LICENSE-MIT))
178
+ If the video tag doesn’t render in your GitHub README view, use this link: [`sentience-studio-demo.mp4`](https://github.com/user-attachments/assets/7ffde43b-1074-4d70-bb83-2eb8d0469307)
1169
179
 
1170
- at your option.
180
+ - **Sentience SDK Documentation**: https://www.sentienceapi.com/docs