sentienceapi 0.99.0 β 0.99.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +109 -1099
- package/dist/actions.d.ts +9 -0
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +126 -0
- package/dist/actions.js.map +1 -1
- package/dist/agent-runtime.d.ts +34 -3
- package/dist/agent-runtime.d.ts.map +1 -1
- package/dist/agent-runtime.js +239 -1
- package/dist/agent-runtime.js.map +1 -1
- package/dist/agent.d.ts +3 -1
- package/dist/agent.d.ts.map +1 -1
- package/dist/agent.js +44 -5
- package/dist/agent.js.map +1 -1
- package/dist/browser.d.ts +24 -1
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +88 -1
- package/dist/browser.js.map +1 -1
- package/dist/captcha/types.d.ts +6 -0
- package/dist/captcha/types.d.ts.map +1 -1
- package/dist/captcha/types.js.map +1 -1
- package/dist/cli.js +240 -0
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +4 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -2
- package/dist/index.js.map +1 -1
- package/dist/read.d.ts +7 -0
- package/dist/read.d.ts.map +1 -1
- package/dist/read.js +42 -0
- package/dist/read.js.map +1 -1
- package/dist/runtime-agent.d.ts +4 -0
- package/dist/runtime-agent.d.ts.map +1 -1
- package/dist/runtime-agent.js +40 -4
- package/dist/runtime-agent.js.map +1 -1
- package/dist/tools/context.d.ts +18 -0
- package/dist/tools/context.d.ts.map +1 -0
- package/dist/tools/context.js +40 -0
- package/dist/tools/context.js.map +1 -0
- package/dist/tools/defaults.d.ts +5 -0
- package/dist/tools/defaults.d.ts.map +1 -0
- package/dist/tools/defaults.js +368 -0
- package/dist/tools/defaults.js.map +1 -0
- package/dist/tools/filesystem.d.ts +12 -0
- package/dist/tools/filesystem.d.ts.map +1 -0
- package/dist/tools/filesystem.js +137 -0
- package/dist/tools/filesystem.js.map +1 -0
- package/dist/tools/index.d.ts +5 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +15 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/registry.d.ts +38 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +100 -0
- package/dist/tools/registry.js.map +1 -0
- package/dist/types.d.ts +52 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/zod.d.ts +5 -0
- package/dist/utils/zod.d.ts.map +1 -0
- package/dist/utils/zod.js +80 -0
- package/dist/utils/zod.js.map +1 -0
- package/package.json +1 -1
- package/src/extension/content.js +4 -3
- package/src/extension/injected_api.js +83 -31
- package/src/extension/manifest.json +1 -1
- package/src/extension/pkg/sentience_core_bg.wasm +0 -0
- package/src/extension/release.json +46 -46
package/README.md
CHANGED
|
@@ -1,1170 +1,180 @@
|
|
|
1
1
|
# Sentience TypeScript SDK
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
> **A verification & control layer for AI agents that operate browsers**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Sentience is built for **AI agent developers** who already use Playwright / CDP / LangGraph and care about **flakiness, cost, determinism, evals, and debugging**.
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
# Install from npm
|
|
9
|
-
npm install sentienceapi
|
|
10
|
-
|
|
11
|
-
# Install Playwright browsers (required)
|
|
12
|
-
npx playwright install chromium
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
**For local development:**
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
npm install
|
|
19
|
-
npm run build
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
## Jest for AI Web Agent
|
|
23
|
-
|
|
24
|
-
### Semantic snapshots and assertions that let agents act, verify, and know when they're done.
|
|
25
|
-
|
|
26
|
-
Use `AgentRuntime` to add Jest-style assertions to your agent loops. Verify browser state, check task completion, and get clear feedback on what's working:
|
|
27
|
-
|
|
28
|
-
```typescript
|
|
29
|
-
import {
|
|
30
|
-
SentienceBrowser,
|
|
31
|
-
AgentRuntime,
|
|
32
|
-
HumanHandoffSolver,
|
|
33
|
-
urlContains,
|
|
34
|
-
exists,
|
|
35
|
-
allOf,
|
|
36
|
-
isEnabled,
|
|
37
|
-
isChecked,
|
|
38
|
-
valueEquals,
|
|
39
|
-
} from 'sentienceapi';
|
|
40
|
-
import { createTracer } from 'sentienceapi';
|
|
41
|
-
import { Page } from 'playwright';
|
|
42
|
-
|
|
43
|
-
// Create browser and tracer
|
|
44
|
-
const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
|
|
45
|
-
const tracer = await createTracer({ runId: 'my-run', uploadTrace: false });
|
|
46
|
-
|
|
47
|
-
// Create browser adapter for AgentRuntime
|
|
48
|
-
const browserAdapter = {
|
|
49
|
-
snapshot: async (_page: Page, options?: Record<string, any>) => {
|
|
50
|
-
return await browser.snapshot(options);
|
|
51
|
-
},
|
|
52
|
-
};
|
|
53
|
-
const runtime = new AgentRuntime(browserAdapter, browser.getPage(), tracer);
|
|
54
|
-
|
|
55
|
-
// Navigate and take snapshot
|
|
56
|
-
await browser.getPage().goto('https://example.com');
|
|
57
|
-
runtime.beginStep('Verify page loaded');
|
|
58
|
-
await runtime.snapshot();
|
|
59
|
-
|
|
60
|
-
// Run assertions (Jest-style)
|
|
61
|
-
runtime.assert(urlContains('example.com'), 'on_correct_domain');
|
|
62
|
-
runtime.assert(exists('role=heading'), 'has_heading');
|
|
63
|
-
runtime.assert(allOf([exists('role=button'), exists('role=link')]), 'has_interactive_elements');
|
|
64
|
-
|
|
65
|
-
// v1: state-aware assertions (when Gateway refinement is enabled)
|
|
66
|
-
runtime.assert(isEnabled('role=button'), 'button_enabled');
|
|
67
|
-
runtime.assert(isChecked("role=checkbox name~'subscribe'"), 'subscribe_checked_if_present');
|
|
68
|
-
runtime.assert(
|
|
69
|
-
valueEquals("role=textbox name~'email'", 'user@example.com'),
|
|
70
|
-
'email_value_if_present'
|
|
71
|
-
);
|
|
72
|
-
|
|
73
|
-
// v2: retry loop with snapshot confidence gating + exhaustion
|
|
74
|
-
const ok = await runtime
|
|
75
|
-
.check(exists('role=heading'), 'heading_eventually_visible', true)
|
|
76
|
-
.eventually({ timeoutMs: 10_000, pollMs: 250, minConfidence: 0.7, maxSnapshotAttempts: 3 });
|
|
77
|
-
console.log('eventually() result:', ok);
|
|
78
|
-
|
|
79
|
-
// CAPTCHA handling (detection + handoff + verify)
|
|
80
|
-
runtime.setCaptchaOptions({
|
|
81
|
-
policy: 'callback',
|
|
82
|
-
handler: HumanHandoffSolver(),
|
|
83
|
-
});
|
|
84
|
-
|
|
85
|
-
// Check task completion
|
|
86
|
-
if (runtime.assertDone(exists("text~'Example'"), 'task_complete')) {
|
|
87
|
-
console.log('β
Task completed!');
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
console.log(`Task done: ${runtime.isTaskDone}`);
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
#### CAPTCHA strategies (Batteries Included)
|
|
94
|
-
|
|
95
|
-
```typescript
|
|
96
|
-
import { ExternalSolver, HumanHandoffSolver, VisionSolver } from 'sentienceapi';
|
|
97
|
-
|
|
98
|
-
// Human-in-loop
|
|
99
|
-
runtime.setCaptchaOptions({ policy: 'callback', handler: HumanHandoffSolver() });
|
|
100
|
-
|
|
101
|
-
// Vision verification only
|
|
102
|
-
runtime.setCaptchaOptions({ policy: 'callback', handler: VisionSolver() });
|
|
103
|
-
|
|
104
|
-
// External system/webhook
|
|
105
|
-
runtime.setCaptchaOptions({
|
|
106
|
-
policy: 'callback',
|
|
107
|
-
handler: ExternalSolver(async ctx => {
|
|
108
|
-
await fetch(process.env.CAPTCHA_WEBHOOK_URL!, {
|
|
109
|
-
method: 'POST',
|
|
110
|
-
headers: { 'Content-Type': 'application/json' },
|
|
111
|
-
body: JSON.stringify({ runId: ctx.runId, url: ctx.url }),
|
|
112
|
-
});
|
|
113
|
-
}),
|
|
114
|
-
});
|
|
115
|
-
```
|
|
116
|
-
|
|
117
|
-
### Failure Artifact Buffer (Phase 1)
|
|
118
|
-
|
|
119
|
-
Capture a short ring buffer of screenshots and persist them when a required assertion fails.
|
|
120
|
-
|
|
121
|
-
```typescript
|
|
122
|
-
runtime.enableFailureArtifacts({ bufferSeconds: 15, captureOnAction: true, fps: 0 });
|
|
123
|
-
|
|
124
|
-
// After each action, record it (best-effort).
|
|
125
|
-
await runtime.recordAction('CLICK');
|
|
126
|
-
```
|
|
127
|
-
|
|
128
|
-
### Redaction callback (Phase 3)
|
|
129
|
-
|
|
130
|
-
Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction.
|
|
131
|
-
|
|
132
|
-
```typescript
|
|
133
|
-
import { RedactionContext, RedactionResult } from 'sentienceapi';
|
|
134
|
-
|
|
135
|
-
const redact = (_ctx: RedactionContext): RedactionResult => {
|
|
136
|
-
return { dropFrames: true };
|
|
137
|
-
};
|
|
138
|
-
|
|
139
|
-
runtime.enableFailureArtifacts({ onBeforePersist: redact });
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
**See examples:** [`examples/asserts/`](examples/asserts/)
|
|
143
|
-
|
|
144
|
-
## π Quick Start: Choose Your Abstraction Level
|
|
145
|
-
|
|
146
|
-
Sentience SDK offers **4 levels of abstraction** - choose based on your needs:
|
|
147
|
-
|
|
148
|
-
<details open>
|
|
149
|
-
<summary><b>π¬ Level 4: Conversational Agent (Highest Abstraction)</b> - NEW in v0.3.0</summary>
|
|
150
|
-
|
|
151
|
-
Complete automation with natural conversation. Just describe what you want, and the agent plans and executes everything:
|
|
152
|
-
|
|
153
|
-
```typescript
|
|
154
|
-
import { SentienceBrowser, ConversationalAgent, OpenAIProvider } from 'sentienceapi';
|
|
155
|
-
|
|
156
|
-
const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
|
|
157
|
-
const llm = new OpenAIProvider(process.env.OPENAI_API_KEY!, 'gpt-4o');
|
|
158
|
-
const agent = new ConversationalAgent({ llmProvider: llm, browser });
|
|
159
|
-
|
|
160
|
-
// Navigate to starting page
|
|
161
|
-
await browser.getPage().goto('https://amazon.com');
|
|
162
|
-
|
|
163
|
-
// ONE command does it all - automatic planning and execution!
|
|
164
|
-
const response = await agent.execute(
|
|
165
|
-
"Search for 'wireless mouse' and tell me the price of the top result"
|
|
166
|
-
);
|
|
167
|
-
console.log(response); // "I found the top result for wireless mouse on Amazon. It's priced at $24.99..."
|
|
168
|
-
|
|
169
|
-
// Follow-up questions maintain context
|
|
170
|
-
const followUp = await agent.chat('Add it to cart');
|
|
171
|
-
console.log(followUp);
|
|
172
|
-
|
|
173
|
-
await browser.close();
|
|
174
|
-
```
|
|
175
|
-
|
|
176
|
-
**When to use:** Complex multi-step tasks, conversational interfaces, maximum convenience
|
|
177
|
-
**Code reduction:** 99% less code - describe goals in natural language
|
|
178
|
-
**Requirements:** OpenAI or Anthropic API key
|
|
179
|
-
|
|
180
|
-
</details>
|
|
181
|
-
|
|
182
|
-
<details>
|
|
183
|
-
<summary><b>π€ Level 3: Agent (Natural Language Commands)</b> - Recommended for Most Users</summary>
|
|
184
|
-
|
|
185
|
-
Zero coding knowledge needed. Just write what you want in plain English:
|
|
186
|
-
|
|
187
|
-
```typescript
|
|
188
|
-
import { SentienceBrowser, SentienceAgent, OpenAIProvider } from 'sentienceapi';
|
|
189
|
-
|
|
190
|
-
const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
|
|
191
|
-
const llm = new OpenAIProvider(process.env.OPENAI_API_KEY!, 'gpt-4o-mini');
|
|
192
|
-
const agent = new SentienceAgent(browser, llm);
|
|
193
|
-
|
|
194
|
-
await browser.getPage().goto('https://www.amazon.com');
|
|
195
|
-
|
|
196
|
-
// Just natural language commands - agent handles everything!
|
|
197
|
-
await agent.act('Click the search box');
|
|
198
|
-
await agent.act("Type 'wireless mouse' into the search field");
|
|
199
|
-
await agent.act('Press Enter key');
|
|
200
|
-
await agent.act('Click the first product result');
|
|
201
|
-
|
|
202
|
-
// Automatic token tracking
|
|
203
|
-
console.log(`Tokens used: ${agent.getTokenStats().totalTokens}`);
|
|
204
|
-
await browser.close();
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
**When to use:** Quick automation, non-technical users, rapid prototyping
|
|
208
|
-
**Code reduction:** 95-98% less code vs manual approach
|
|
209
|
-
**Requirements:** OpenAI API key (or Anthropic for Claude)
|
|
210
|
-
|
|
211
|
-
</details>
|
|
212
|
-
|
|
213
|
-
<details>
|
|
214
|
-
<summary><b>π§ Level 2: Direct SDK (Technical Control)</b></summary>
|
|
215
|
-
|
|
216
|
-
Full control with semantic selectors. For technical users who want precision:
|
|
217
|
-
|
|
218
|
-
```typescript
|
|
219
|
-
import { SentienceBrowser, snapshot, find, click, typeText, press } from 'sentienceapi';
|
|
220
|
-
|
|
221
|
-
const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
|
|
222
|
-
await browser.getPage().goto('https://www.amazon.com');
|
|
223
|
-
|
|
224
|
-
// Get semantic snapshot
|
|
225
|
-
const snap = await snapshot(browser);
|
|
226
|
-
|
|
227
|
-
// Find elements using query DSL
|
|
228
|
-
const searchBox = find(snap, 'role=textbox text~"search"');
|
|
229
|
-
await click(browser, searchBox!.id);
|
|
230
|
-
|
|
231
|
-
// Type and submit
|
|
232
|
-
await typeText(browser, searchBox!.id, 'wireless mouse');
|
|
233
|
-
await press(browser, 'Enter');
|
|
234
|
-
|
|
235
|
-
await browser.close();
|
|
236
|
-
```
|
|
237
|
-
|
|
238
|
-
**When to use:** Need precise control, debugging, custom workflows
|
|
239
|
-
**Code reduction:** Still 80% less code vs raw Playwright
|
|
240
|
-
**Requirements:** Only Sentience API key
|
|
241
|
-
|
|
242
|
-
</details>
|
|
243
|
-
|
|
244
|
-
<details>
|
|
245
|
-
<summary><b>βοΈ Level 1: Raw Playwright (Maximum Control)</b></summary>
|
|
246
|
-
|
|
247
|
-
For when you need complete low-level control (rare):
|
|
248
|
-
|
|
249
|
-
```typescript
|
|
250
|
-
import { chromium } from 'playwright';
|
|
251
|
-
|
|
252
|
-
const browser = await chromium.launch();
|
|
253
|
-
const page = await browser.newPage();
|
|
254
|
-
await page.goto('https://www.amazon.com');
|
|
255
|
-
await page.fill('#twotabsearchtextbox', 'wireless mouse');
|
|
256
|
-
await page.press('#twotabsearchtextbox', 'Enter');
|
|
257
|
-
await browser.close();
|
|
258
|
-
```
|
|
259
|
-
|
|
260
|
-
**When to use:** Very specific edge cases, custom browser configs
|
|
261
|
-
**Tradeoffs:** No semantic intelligence, brittle selectors, more code
|
|
262
|
-
|
|
263
|
-
</details>
|
|
264
|
-
|
|
265
|
-
---
|
|
266
|
-
|
|
267
|
-
### Human-like Typing
|
|
268
|
-
|
|
269
|
-
Add realistic delays between keystrokes to mimic human typing:
|
|
270
|
-
|
|
271
|
-
```typescript
|
|
272
|
-
// Type instantly (default)
|
|
273
|
-
await typeText(browser, elementId, 'Hello World');
|
|
7
|
+
Often described as _Jest for Browser AI Agents_ - but applied to end-to-end agent runs (not unit tests).
|
|
274
8
|
|
|
275
|
-
|
|
276
|
-
await typeText(browser, elementId, 'Hello World', false, 10);
|
|
277
|
-
```
|
|
278
|
-
|
|
279
|
-
### Scroll to Element
|
|
280
|
-
|
|
281
|
-
Scroll elements into view with smooth animation:
|
|
282
|
-
|
|
283
|
-
```typescript
|
|
284
|
-
const snap = await snapshot(browser);
|
|
285
|
-
const button = find(snap, 'role=button text~"Submit"');
|
|
286
|
-
|
|
287
|
-
// Scroll element into view with smooth animation
|
|
288
|
-
await scrollTo(browser, button.id);
|
|
289
|
-
|
|
290
|
-
// Scroll instantly to top of viewport
|
|
291
|
-
await scrollTo(browser, button.id, 'instant', 'start');
|
|
292
|
-
```
|
|
293
|
-
|
|
294
|
-
---
|
|
295
|
-
|
|
296
|
-
<details>
|
|
297
|
-
<summary><h2>π Agent Execution Tracing (NEW in v0.3.1)</h2></summary>
|
|
298
|
-
|
|
299
|
-
Record complete agent execution traces for debugging, analysis, and replay. Traces capture every step, snapshot, LLM decision, and action in a structured JSONL format.
|
|
300
|
-
|
|
301
|
-
### Quick Start: Agent with Tracing
|
|
302
|
-
|
|
303
|
-
```typescript
|
|
304
|
-
import {
|
|
305
|
-
SentienceBrowser,
|
|
306
|
-
SentienceAgent,
|
|
307
|
-
OpenAIProvider,
|
|
308
|
-
Tracer,
|
|
309
|
-
JsonlTraceSink,
|
|
310
|
-
} from 'sentienceapi';
|
|
311
|
-
import { randomUUID } from 'crypto';
|
|
312
|
-
|
|
313
|
-
const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
|
|
314
|
-
const llm = new OpenAIProvider(process.env.OPENAI_API_KEY!, 'gpt-4o');
|
|
315
|
-
|
|
316
|
-
// Create a tracer
|
|
317
|
-
const runId = randomUUID();
|
|
318
|
-
const sink = new JsonlTraceSink(`traces/${runId}.jsonl`);
|
|
319
|
-
const tracer = new Tracer(runId, sink);
|
|
320
|
-
|
|
321
|
-
// Create agent with tracer
|
|
322
|
-
const agent = new SentienceAgent(browser, llm, 50, true, tracer);
|
|
323
|
-
|
|
324
|
-
// Emit run_start
|
|
325
|
-
tracer.emitRunStart('SentienceAgent', 'gpt-4o');
|
|
326
|
-
|
|
327
|
-
try {
|
|
328
|
-
await browser.getPage().goto('https://google.com');
|
|
329
|
-
|
|
330
|
-
// Every action is automatically traced!
|
|
331
|
-
await agent.act('Click the search box');
|
|
332
|
-
await agent.act("Type 'sentience ai' into the search field");
|
|
333
|
-
await agent.act('Press Enter');
|
|
334
|
-
|
|
335
|
-
tracer.emitRunEnd(3);
|
|
336
|
-
} finally {
|
|
337
|
-
// Flush trace to disk
|
|
338
|
-
await agent.closeTracer();
|
|
339
|
-
await browser.close();
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
console.log(`β
Trace saved to: traces/${runId}.jsonl`);
|
|
343
|
-
```
|
|
9
|
+
The core loop is:
|
|
344
10
|
|
|
345
|
-
|
|
11
|
+
> **Agent β Snapshot β Action β Verification β Artifact**
|
|
346
12
|
|
|
347
|
-
|
|
13
|
+
## What Sentience is
|
|
348
14
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
15
|
+
- A **verification-first runtime** (`AgentRuntime`) for browser agents
|
|
16
|
+
- Treats the browser as an adapter (Playwright / CDP); **`AgentRuntime` is the product**
|
|
17
|
+
- A **controlled perception** layer (semantic snapshots; pruning/limits; lowers token usage by filtering noise from what models see)
|
|
18
|
+
- A **debugging layer** (structured traces + failure artifacts)
|
|
19
|
+
- Enables **local LLM small models (3B-7B)** for browser automation (privacy, compliance, and cost control)
|
|
20
|
+
- Keeps vision models **optional** (use as a fallback when DOM/snapshot structure falls short, e.g. `<canvas>`)
|
|
354
21
|
|
|
355
|
-
|
|
22
|
+
## What Sentience is not
|
|
356
23
|
|
|
357
|
-
|
|
24
|
+
- Not a browser driver
|
|
25
|
+
- Not a Playwright replacement
|
|
26
|
+
- Not a vision-first agent framework
|
|
358
27
|
|
|
359
|
-
|
|
28
|
+
## Install
|
|
360
29
|
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
<summary><h2>π Agent Runtime Verification</h2></summary>
|
|
365
|
-
|
|
366
|
-
`AgentRuntime` provides assertion predicates for runtime verification in agent loops, enabling programmatic verification of browser state during execution.
|
|
367
|
-
|
|
368
|
-
```typescript
|
|
369
|
-
import { SentienceBrowser } from 'sentienceapi';
|
|
370
|
-
import { AgentRuntime, urlContains, exists, allOf } from 'sentienceapi';
|
|
371
|
-
import { createTracer } from 'sentienceapi';
|
|
372
|
-
|
|
373
|
-
const browser = new SentienceBrowser();
|
|
374
|
-
await browser.start();
|
|
375
|
-
const tracer = await createTracer({ runId: 'my-run', uploadTrace: false });
|
|
376
|
-
const runtime = new AgentRuntime(browser, browser.getPage(), tracer);
|
|
377
|
-
|
|
378
|
-
// Navigate and take snapshot
|
|
379
|
-
await browser.getPage().goto('https://example.com');
|
|
380
|
-
runtime.beginStep('Verify page');
|
|
381
|
-
await runtime.snapshot();
|
|
382
|
-
|
|
383
|
-
// Run assertions
|
|
384
|
-
runtime.assert(urlContains('example.com'), 'on_correct_domain');
|
|
385
|
-
runtime.assert(exists('role=heading'), 'has_heading');
|
|
386
|
-
runtime.assertDone(exists("text~'Example'"), 'task_complete');
|
|
387
|
-
|
|
388
|
-
console.log(`Task done: ${runtime.isTaskDone}`);
|
|
30
|
+
```bash
|
|
31
|
+
npm install sentienceapi
|
|
32
|
+
npx playwright install chromium
|
|
389
33
|
```
|
|
390
34
|
|
|
391
|
-
|
|
35
|
+
## Conceptual example (why this exists)
|
|
392
36
|
|
|
393
|
-
|
|
37
|
+
- Steps are **gated by verifiable UI assertions**
|
|
38
|
+
- If progress canβt be proven, the run **fails with evidence**
|
|
39
|
+
- This is how you make runs **reproducible** and **debuggable**, and how you run evals reliably
|
|
394
40
|
|
|
395
|
-
|
|
41
|
+
## Quickstart: a verification-first loop
|
|
396
42
|
|
|
397
|
-
|
|
398
|
-
|
|
43
|
+
```ts
|
|
44
|
+
import { SentienceBrowser, AgentRuntime } from 'sentienceapi';
|
|
45
|
+
import { JsonlTraceSink, Tracer } from 'sentienceapi';
|
|
46
|
+
import { exists, urlContains } from 'sentienceapi';
|
|
47
|
+
import type { Page } from 'playwright';
|
|
399
48
|
|
|
400
|
-
|
|
49
|
+
async function main(): Promise<void> {
|
|
50
|
+
const tracer = new Tracer('demo', new JsonlTraceSink('trace.jsonl'));
|
|
401
51
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
52
|
+
const browser = new SentienceBrowser();
|
|
53
|
+
await browser.start();
|
|
54
|
+
const page = browser.getPage();
|
|
55
|
+
if (!page) throw new Error('no page');
|
|
405
56
|
|
|
406
|
-
|
|
407
|
-
const browser = await SentienceBrowser.create({ apiKey: process.env.SENTIENCE_API_KEY });
|
|
408
|
-
const tracer = await createTracer({ runId: 'verified-run', uploadTrace: false });
|
|
57
|
+
await page.goto('https://example.com');
|
|
409
58
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
},
|
|
414
|
-
|
|
415
|
-
|
|
59
|
+
// AgentRuntime needs a snapshot provider; SentienceBrowser.snapshot() does not depend on Page,
|
|
60
|
+
// so we wrap it to fit the runtime interface.
|
|
61
|
+
const runtime = new AgentRuntime(
|
|
62
|
+
{ snapshot: async (_page: Page, options?: Record<string, any>) => browser.snapshot(options) },
|
|
63
|
+
page,
|
|
64
|
+
tracer
|
|
65
|
+
);
|
|
416
66
|
|
|
417
|
-
|
|
418
|
-
runtime.
|
|
67
|
+
runtime.beginStep('Verify homepage');
|
|
68
|
+
await runtime.snapshot({ limit: 60 });
|
|
419
69
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
.eventually({ timeoutMs: 10_000, pollMs: 250, minConfidence: 0.7, maxSnapshotAttempts: 3 });
|
|
70
|
+
runtime.assert(urlContains('example.com'), 'on_domain', true);
|
|
71
|
+
runtime.assert(exists('role=heading'), 'has_heading');
|
|
423
72
|
|
|
424
|
-
runtime.
|
|
73
|
+
runtime.assertDone(exists("text~'Example'"), 'task_complete');
|
|
425
74
|
|
|
426
|
-
await tracer.close();
|
|
427
75
|
await browser.close();
|
|
428
76
|
}
|
|
429
77
|
|
|
430
|
-
main()
|
|
431
|
-
```
|
|
432
|
-
|
|
433
|
-
</details>
|
|
434
|
-
|
|
435
|
-
---
|
|
436
|
-
|
|
437
|
-
## π Core Features
|
|
438
|
-
|
|
439
|
-
<details>
|
|
440
|
-
<summary><h3>π Browser Control</h3></summary>
|
|
441
|
-
|
|
442
|
-
- **`SentienceBrowser`** - Playwright browser with Sentience extension pre-loaded
|
|
443
|
-
- **`browser.goto(url)`** - Navigate with automatic extension readiness checks
|
|
444
|
-
- Automatic bot evasion and stealth mode
|
|
445
|
-
- Configurable headless/headed mode
|
|
446
|
-
|
|
447
|
-
</details>
|
|
448
|
-
|
|
449
|
-
<details>
|
|
450
|
-
<summary><h3>πΈ Snapshot - Intelligent Page Analysis</h3></summary>
|
|
451
|
-
|
|
452
|
-
**`snapshot(browser, options?)`** - Capture page state with AI-ranked elements
|
|
453
|
-
|
|
454
|
-
Features:
|
|
455
|
-
|
|
456
|
-
- Returns semantic elements with roles, text, importance scores, and bounding boxes
|
|
457
|
-
- Optional screenshot capture (PNG/JPEG)
|
|
458
|
-
- Optional visual overlay to see what elements are detected
|
|
459
|
-
- TypeScript types for type safety
|
|
460
|
-
|
|
461
|
-
**Example:**
|
|
462
|
-
|
|
463
|
-
```typescript
|
|
464
|
-
const snap = await snapshot(browser, { screenshot: true, show_overlay: true });
|
|
465
|
-
|
|
466
|
-
// Access structured data
|
|
467
|
-
console.log(`URL: ${snap.url}`);
|
|
468
|
-
console.log(`Viewport: ${snap.viewport.width}x${snap.viewport.height}`);
|
|
469
|
-
console.log(`Elements: ${snap.elements.length}`);
|
|
470
|
-
|
|
471
|
-
// Iterate over elements
|
|
472
|
-
for (const element of snap.elements) {
|
|
473
|
-
console.log(`${element.role}: ${element.text} (importance: ${element.importance})`);
|
|
474
|
-
}
|
|
475
|
-
```
|
|
476
|
-
|
|
477
|
-
</details>
|
|
478
|
-
|
|
479
|
-
<details>
|
|
480
|
-
<summary><h3>π Query Engine - Semantic Element Selection</h3></summary>
|
|
481
|
-
|
|
482
|
-
- **`query(snapshot, selector)`** - Find all matching elements
|
|
483
|
-
- **`find(snapshot, selector)`** - Find single best match (by importance)
|
|
484
|
-
- Powerful query DSL with multiple operators
|
|
485
|
-
|
|
486
|
-
**Query Examples:**
|
|
487
|
-
|
|
488
|
-
```typescript
|
|
489
|
-
// Find by role and text
|
|
490
|
-
const button = find(snap, 'role=button text="Sign in"');
|
|
491
|
-
|
|
492
|
-
// Substring match (case-insensitive)
|
|
493
|
-
const link = find(snap, 'role=link text~"more info"');
|
|
494
|
-
|
|
495
|
-
// Spatial filtering
|
|
496
|
-
const topLeft = find(snap, 'bbox.x<=100 bbox.y<=200');
|
|
497
|
-
|
|
498
|
-
// Multiple conditions (AND logic)
|
|
499
|
-
const primaryBtn = find(snap, 'role=button clickable=true visible=true importance>800');
|
|
500
|
-
|
|
501
|
-
// Prefix/suffix matching
|
|
502
|
-
const startsWith = find(snap, 'text^="Add"');
|
|
503
|
-
const endsWith = find(snap, 'text$="Cart"');
|
|
504
|
-
|
|
505
|
-
// Numeric comparisons
|
|
506
|
-
const important = query(snap, 'importance>=700');
|
|
507
|
-
const firstRow = query(snap, 'bbox.y<600');
|
|
508
|
-
```
|
|
509
|
-
|
|
510
|
-
**π [Complete Query DSL Guide](docs/QUERY_DSL.md)** - All operators, fields, and advanced patterns
|
|
511
|
-
|
|
512
|
-
</details>
|
|
513
|
-
|
|
514
|
-
<details>
|
|
515
|
-
<summary><h3>π Actions - Interact with Elements</h3></summary>
|
|
516
|
-
|
|
517
|
-
- **`click(browser, elementId)`** - Click element by ID
|
|
518
|
-
- **`clickRect(browser, rect)`** - Click at center of rectangle (coordinate-based)
|
|
519
|
-
- **`typeText(browser, elementId, text)`** - Type into input fields
|
|
520
|
-
- **`press(browser, key)`** - Press keyboard keys (Enter, Escape, Tab, etc.)
|
|
521
|
-
|
|
522
|
-
All actions return `ActionResult` with success status, timing, and outcome:
|
|
523
|
-
|
|
524
|
-
```typescript
|
|
525
|
-
const result = await click(browser, element.id);
|
|
526
|
-
|
|
527
|
-
console.log(`Success: ${result.success}`);
|
|
528
|
-
console.log(`Outcome: ${result.outcome}`); // "navigated", "dom_updated", "error"
|
|
529
|
-
console.log(`Duration: ${result.duration_ms}ms`);
|
|
530
|
-
console.log(`URL changed: ${result.url_changed}`);
|
|
531
|
-
```
|
|
532
|
-
|
|
533
|
-
**Coordinate-based clicking:**
|
|
534
|
-
|
|
535
|
-
```typescript
|
|
536
|
-
import { clickRect } from './src';
|
|
537
|
-
|
|
538
|
-
// Click at center of rectangle (x, y, width, height)
|
|
539
|
-
await clickRect(browser, { x: 100, y: 200, w: 50, h: 30 });
|
|
540
|
-
|
|
541
|
-
// With visual highlight (default: red border for 2 seconds)
|
|
542
|
-
await clickRect(browser, { x: 100, y: 200, w: 50, h: 30 }, true, 2.0);
|
|
543
|
-
|
|
544
|
-
// Using element's bounding box
|
|
545
|
-
const snap = await snapshot(browser);
|
|
546
|
-
const element = find(snap, 'role=button');
|
|
547
|
-
if (element) {
|
|
548
|
-
await clickRect(browser, {
|
|
549
|
-
x: element.bbox.x,
|
|
550
|
-
y: element.bbox.y,
|
|
551
|
-
w: element.bbox.width,
|
|
552
|
-
h: element.bbox.height,
|
|
553
|
-
});
|
|
554
|
-
}
|
|
555
|
-
```
|
|
556
|
-
|
|
557
|
-
</details>
|
|
558
|
-
|
|
559
|
-
<details>
|
|
560
|
-
<summary><h3>β±οΈ Wait & Assertions</h3></summary>
|
|
561
|
-
|
|
562
|
-
- **`waitFor(browser, selector, timeout?, interval?, useApi?)`** - Wait for element to appear
|
|
563
|
-
- **`expect(browser, selector)`** - Assertion helper with fluent API
|
|
564
|
-
|
|
565
|
-
**Examples:**
|
|
566
|
-
|
|
567
|
-
```typescript
|
|
568
|
-
// Wait for element (auto-detects optimal interval based on API usage)
|
|
569
|
-
const result = await waitFor(browser, 'role=button text="Submit"', 10000);
|
|
570
|
-
if (result.found) {
|
|
571
|
-
console.log(`Found after ${result.duration_ms}ms`);
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
// Use local extension with fast polling (250ms interval)
|
|
575
|
-
const result = await waitFor(browser, 'role=button', 5000, undefined, false);
|
|
576
|
-
|
|
577
|
-
// Use remote API with network-friendly polling (1500ms interval)
|
|
578
|
-
const result = await waitFor(browser, 'role=button', 5000, undefined, true);
|
|
579
|
-
|
|
580
|
-
// Custom interval override
|
|
581
|
-
const result = await waitFor(browser, 'role=button', 5000, 500, false);
|
|
582
|
-
|
|
583
|
-
// Semantic wait conditions
|
|
584
|
-
await waitFor(browser, 'clickable=true', 5000); // Wait for clickable element
|
|
585
|
-
await waitFor(browser, 'importance>100', 5000); // Wait for important element
|
|
586
|
-
await waitFor(browser, 'role=link visible=true', 5000); // Wait for visible link
|
|
587
|
-
|
|
588
|
-
// Assertions
|
|
589
|
-
await expect(browser, 'role=button text="Submit"').toExist(5000);
|
|
590
|
-
await expect(browser, 'role=heading').toBeVisible();
|
|
591
|
-
await expect(browser, 'role=button').toHaveText('Submit');
|
|
592
|
-
await expect(browser, 'role=link').toHaveCount(10);
|
|
593
|
-
```
|
|
594
|
-
|
|
595
|
-
</details>
|
|
596
|
-
|
|
597
|
-
<details>
|
|
598
|
-
<summary><h3>π¨ Visual Overlay - Debug Element Detection</h3></summary>
|
|
599
|
-
|
|
600
|
-
- **`showOverlay(browser, elements, targetElementId?)`** - Display visual overlay highlighting elements
|
|
601
|
-
- **`clearOverlay(browser)`** - Clear overlay manually
|
|
602
|
-
|
|
603
|
-
Show color-coded borders around detected elements to debug, validate, and understand what Sentience sees:
|
|
604
|
-
|
|
605
|
-
```typescript
|
|
606
|
-
import { showOverlay, clearOverlay } from 'sentienceapi';
|
|
607
|
-
|
|
608
|
-
// Take snapshot once
|
|
609
|
-
const snap = await snapshot(browser);
|
|
610
|
-
|
|
611
|
-
// Show overlay anytime without re-snapshotting
|
|
612
|
-
await showOverlay(browser, snap); // Auto-clears after 5 seconds
|
|
613
|
-
|
|
614
|
-
// Highlight specific target element in red
|
|
615
|
-
const button = find(snap, 'role=button text~"Submit"');
|
|
616
|
-
await showOverlay(browser, snap, button.id);
|
|
617
|
-
|
|
618
|
-
// Clear manually before 5 seconds
|
|
619
|
-
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
620
|
-
await clearOverlay(browser);
|
|
621
|
-
```
|
|
622
|
-
|
|
623
|
-
**Color Coding:**
|
|
624
|
-
|
|
625
|
-
- π΄ Red: Target element
|
|
626
|
-
- π΅ Blue: Primary elements (`is_primary=true`)
|
|
627
|
-
- π’ Green: Regular interactive elements
|
|
628
|
-
|
|
629
|
-
**Visual Indicators:**
|
|
630
|
-
|
|
631
|
-
- Border thickness/opacity scales with importance
|
|
632
|
-
- Semi-transparent fill
|
|
633
|
-
- Importance badges
|
|
634
|
-
- Star icons for primary elements
|
|
635
|
-
- Auto-clear after 5 seconds
|
|
636
|
-
|
|
637
|
-
</details>
|
|
638
|
-
|
|
639
|
-
<details>
|
|
640
|
-
<summary><h3>π Content Reading</h3></summary>
|
|
641
|
-
|
|
642
|
-
**`read(browser, options?)`** - Extract page content
|
|
643
|
-
|
|
644
|
-
- `format: "text"` - Plain text extraction
|
|
645
|
-
- `format: "markdown"` - High-quality markdown conversion (uses Turndown)
|
|
646
|
-
- `format: "raw"` - Cleaned HTML (default)
|
|
647
|
-
|
|
648
|
-
**Example:**
|
|
649
|
-
|
|
650
|
-
```typescript
|
|
651
|
-
import { read } from './src';
|
|
652
|
-
|
|
653
|
-
// Get markdown content
|
|
654
|
-
const result = await read(browser, { format: 'markdown' });
|
|
655
|
-
console.log(result.content); // Markdown text
|
|
656
|
-
|
|
657
|
-
// Get plain text
|
|
658
|
-
const result = await read(browser, { format: 'text' });
|
|
659
|
-
console.log(result.content); // Plain text
|
|
660
|
-
```
|
|
661
|
-
|
|
662
|
-
</details>
|
|
663
|
-
|
|
664
|
-
<details>
|
|
665
|
-
<summary><h3>π· Screenshots</h3></summary>
|
|
666
|
-
|
|
667
|
-
**`screenshot(browser, options?)`** - Standalone screenshot capture
|
|
668
|
-
|
|
669
|
-
- Returns base64-encoded data URL
|
|
670
|
-
- PNG or JPEG format
|
|
671
|
-
- Quality control for JPEG (1-100)
|
|
672
|
-
|
|
673
|
-
**Example:**
|
|
674
|
-
|
|
675
|
-
```typescript
|
|
676
|
-
import { screenshot } from './src';
|
|
677
|
-
import { writeFileSync } from 'fs';
|
|
678
|
-
|
|
679
|
-
// Capture PNG screenshot
|
|
680
|
-
const dataUrl = await screenshot(browser, { format: 'png' });
|
|
681
|
-
|
|
682
|
-
// Save to file
|
|
683
|
-
const base64Data = dataUrl.split(',')[1];
|
|
684
|
-
const imageData = Buffer.from(base64Data, 'base64');
|
|
685
|
-
writeFileSync('screenshot.png', imageData);
|
|
686
|
-
|
|
687
|
-
// JPEG with quality control (smaller file size)
|
|
688
|
-
const dataUrl = await screenshot(browser, { format: 'jpeg', quality: 85 });
|
|
689
|
-
```
|
|
690
|
-
|
|
691
|
-
</details>
|
|
692
|
-
|
|
693
|
-
<details>
|
|
694
|
-
<summary><h3>π Text Search - Find Elements by Visible Text</h3></summary>
|
|
695
|
-
|
|
696
|
-
**`findTextRect(page, options)`** - Find text on page and get exact pixel coordinates
|
|
697
|
-
|
|
698
|
-
Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.
|
|
699
|
-
|
|
700
|
-
**Example:**
|
|
701
|
-
|
|
702
|
-
```typescript
|
|
703
|
-
import { SentienceBrowser, findTextRect, clickRect } from 'sentienceapi';
|
|
704
|
-
|
|
705
|
-
const browser = await SentienceBrowser.create();
|
|
706
|
-
await browser.getPage().goto('https://example.com');
|
|
707
|
-
|
|
708
|
-
// Find "Sign In" button (simple string syntax)
|
|
709
|
-
const result = await findTextRect(browser.getPage(), 'Sign In');
|
|
710
|
-
if (result.status === 'success' && result.results) {
|
|
711
|
-
const firstMatch = result.results[0];
|
|
712
|
-
console.log(`Found at: (${firstMatch.rect.x}, ${firstMatch.rect.y})`);
|
|
713
|
-
console.log(`In viewport: ${firstMatch.in_viewport}`);
|
|
714
|
-
|
|
715
|
-
// Click on the found text
|
|
716
|
-
if (firstMatch.in_viewport) {
|
|
717
|
-
await clickRect(browser, {
|
|
718
|
-
x: firstMatch.rect.x,
|
|
719
|
-
y: firstMatch.rect.y,
|
|
720
|
-
w: firstMatch.rect.width,
|
|
721
|
-
h: firstMatch.rect.height,
|
|
722
|
-
});
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
```
|
|
726
|
-
|
|
727
|
-
**Advanced Options:**
|
|
728
|
-
|
|
729
|
-
```typescript
|
|
730
|
-
// Case-sensitive search
|
|
731
|
-
const result = await findTextRect(browser.getPage(), {
|
|
732
|
-
text: 'LOGIN',
|
|
733
|
-
caseSensitive: true,
|
|
734
|
-
});
|
|
735
|
-
|
|
736
|
-
// Whole word only (won't match "login" as part of "loginButton")
|
|
737
|
-
const result = await findTextRect(browser.getPage(), {
|
|
738
|
-
text: 'log',
|
|
739
|
-
wholeWord: true,
|
|
740
|
-
});
|
|
741
|
-
|
|
742
|
-
// Find multiple matches
|
|
743
|
-
const result = await findTextRect(browser.getPage(), {
|
|
744
|
-
text: 'Buy',
|
|
745
|
-
maxResults: 10,
|
|
746
|
-
});
|
|
747
|
-
for (const match of result.results || []) {
|
|
748
|
-
if (match.in_viewport) {
|
|
749
|
-
console.log(`Found '${match.text}' at (${match.rect.x}, ${match.rect.y})`);
|
|
750
|
-
console.log(`Context: ...${match.context.before}[${match.text}]${match.context.after}...`);
|
|
751
|
-
}
|
|
752
|
-
}
|
|
78
|
+
void main();
|
|
753
79
|
```
|
|
754
80
|
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
- **`status`**: "success" or "error"
|
|
758
|
-
- **`results`**: Array of `TextMatch` objects with:
|
|
759
|
-
- `text` - The matched text
|
|
760
|
-
- `rect` - Absolute coordinates (with scroll offset)
|
|
761
|
-
- `viewport_rect` - Viewport-relative coordinates
|
|
762
|
-
- `context` - Surrounding text (before/after)
|
|
763
|
-
- `in_viewport` - Whether visible in current viewport
|
|
764
|
-
|
|
765
|
-
**Use Cases:**
|
|
766
|
-
|
|
767
|
-
- Find buttons/links by visible text without CSS selectors
|
|
768
|
-
- Get exact pixel coordinates for click automation
|
|
769
|
-
- Verify text visibility and position on page
|
|
770
|
-
- Search dynamic content that changes frequently
|
|
81
|
+
## Capabilities (lifecycle guarantees)
|
|
771
82
|
|
|
772
|
-
|
|
83
|
+
### Controlled perception
|
|
773
84
|
|
|
774
|
-
**
|
|
85
|
+
- **Semantic snapshots** instead of raw DOM dumps
|
|
86
|
+
- **Pruning knobs** via `SnapshotOptions` (limit/filter)
|
|
87
|
+
- Snapshot diagnostics that help decide when βstructure is insufficientβ
|
|
775
88
|
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
---
|
|
779
|
-
|
|
780
|
-
## π Reference
|
|
781
|
-
|
|
782
|
-
<details>
|
|
783
|
-
<summary><h3>Element Properties</h3></summary>
|
|
784
|
-
|
|
785
|
-
Elements returned by `snapshot()` have the following properties:
|
|
786
|
-
|
|
787
|
-
```typescript
|
|
788
|
-
element.id; // Unique identifier for interactions
|
|
789
|
-
element.role; // ARIA role (button, link, textbox, heading, etc.)
|
|
790
|
-
element.text; // Visible text content
|
|
791
|
-
element.importance; // AI importance score (0-1000)
|
|
792
|
-
element.bbox; // Bounding box (x, y, width, height)
|
|
793
|
-
element.visual_cues; // Visual analysis (is_primary, is_clickable, background_color)
|
|
794
|
-
element.in_viewport; // Is element visible in current viewport?
|
|
795
|
-
element.is_occluded; // Is element covered by other elements?
|
|
796
|
-
element.z_index; // CSS stacking order
|
|
797
|
-
```
|
|
89
|
+
### Constrained action space
|
|
798
90
|
|
|
799
|
-
|
|
91
|
+
- Action primitives operate on **stable IDs / rects** derived from snapshots
|
|
92
|
+
- Optional helpers for ordinality (βclick the 3rd resultβ)
|
|
800
93
|
|
|
801
|
-
|
|
802
|
-
<summary><h3>Query DSL Reference</h3></summary>
|
|
94
|
+
### Verified progress
|
|
803
95
|
|
|
804
|
-
|
|
96
|
+
- Predicates like `exists(...)`, `urlMatches(...)`, `isEnabled(...)`, `valueEquals(...)`
|
|
97
|
+
- Fluent assertion DSL via `expect(...)`
|
|
98
|
+
- Retrying verification via `runtime.check(...).eventually(...)`
|
|
805
99
|
|
|
806
|
-
|
|
807
|
-
| --------- | ---------------------------- | ---------------- |
|
|
808
|
-
| `=` | Exact match | `role=button` |
|
|
809
|
-
| `!=` | Exclusion | `role!=link` |
|
|
810
|
-
| `~` | Substring (case-insensitive) | `text~"sign in"` |
|
|
811
|
-
| `^=` | Prefix match | `text^="Add"` |
|
|
812
|
-
| `$=` | Suffix match | `text$="Cart"` |
|
|
813
|
-
| `>`, `>=` | Greater than | `importance>500` |
|
|
814
|
-
| `<`, `<=` | Less than | `bbox.y<600` |
|
|
100
|
+
### Explained failure
|
|
815
101
|
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
-
|
|
819
|
-
- **Text**: `text`, `text~`, `text^=`, `text$=`
|
|
820
|
-
- **Visibility**: `clickable=true|false`, `visible=true|false`
|
|
821
|
-
- **Importance**: `importance`, `importance>=N`, `importance<N`
|
|
822
|
-
- **Position**: `bbox.x`, `bbox.y`, `bbox.width`, `bbox.height`
|
|
823
|
-
- **Layering**: `z_index`
|
|
824
|
-
|
|
825
|
-
</details>
|
|
826
|
-
|
|
827
|
-
---
|
|
828
|
-
|
|
829
|
-
## βοΈ Configuration
|
|
830
|
-
|
|
831
|
-
<details>
|
|
832
|
-
<summary><h3>Viewport Size</h3></summary>
|
|
833
|
-
|
|
834
|
-
Default viewport is **1280x800** pixels. You can customize it using Playwright's API:
|
|
835
|
-
|
|
836
|
-
```typescript
|
|
837
|
-
const browser = new SentienceBrowser();
|
|
838
|
-
await browser.start();
|
|
839
|
-
|
|
840
|
-
// Set custom viewport before navigating
|
|
841
|
-
await browser.getPage().setViewportSize({ width: 1920, height: 1080 });
|
|
842
|
-
|
|
843
|
-
await browser.goto('https://example.com');
|
|
844
|
-
```
|
|
102
|
+
- JSONL trace events (`Tracer` + `JsonlTraceSink`)
|
|
103
|
+
- Optional failure artifact bundles (snapshots, diagnostics, step timelines, frames/clip)
|
|
104
|
+
- Deterministic failure semantics: when required assertions canβt be proven, the run fails with artifacts you can replay
|
|
845
105
|
|
|
846
|
-
|
|
106
|
+
### Framework interoperability
|
|
847
107
|
|
|
848
|
-
|
|
849
|
-
|
|
108
|
+
- Bring your own LLM and orchestration (LangGraph, custom loops)
|
|
109
|
+
- Register explicit LLM-callable tools with `ToolRegistry`
|
|
850
110
|
|
|
851
|
-
|
|
852
|
-
// Headed mode (shows browser window)
|
|
853
|
-
const browser = new SentienceBrowser(undefined, undefined, false);
|
|
111
|
+
## ToolRegistry (LLM-callable tools)
|
|
854
112
|
|
|
855
|
-
|
|
856
|
-
|
|
113
|
+
```ts
|
|
114
|
+
import { ToolRegistry, registerDefaultTools } from 'sentienceapi';
|
|
857
115
|
|
|
858
|
-
|
|
859
|
-
|
|
116
|
+
const registry = new ToolRegistry();
|
|
117
|
+
registerDefaultTools(registry);
|
|
118
|
+
const toolsForLLM = registry.llmTools();
|
|
860
119
|
```
|
|
861
120
|
|
|
862
|
-
|
|
121
|
+
## Permissions (avoid Chrome permission bubbles)
|
|
863
122
|
|
|
864
|
-
|
|
865
|
-
<summary><h3>π Residential Proxy Support</h3></summary>
|
|
123
|
+
Chrome permission prompts are outside the DOM and can be invisible to snapshots. Prefer setting a policy **before navigation**.
|
|
866
124
|
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
- HTTP: `http://username:password@host:port`
|
|
872
|
-
- HTTPS: `https://username:password@host:port`
|
|
873
|
-
- SOCKS5: `socks5://username:password@host:port`
|
|
125
|
+
```ts
|
|
126
|
+
import { SentienceBrowser } from 'sentienceapi';
|
|
127
|
+
import type { PermissionPolicy } from 'sentienceapi';
|
|
874
128
|
|
|
875
|
-
|
|
129
|
+
const policy: PermissionPolicy = {
|
|
130
|
+
default: 'clear',
|
|
131
|
+
autoGrant: ['geolocation'],
|
|
132
|
+
geolocation: { latitude: 37.77, longitude: -122.41, accuracy: 50 },
|
|
133
|
+
origin: 'https://example.com',
|
|
134
|
+
};
|
|
876
135
|
|
|
877
|
-
|
|
878
|
-
// Via constructor parameter
|
|
136
|
+
// `permissionPolicy` is the last constructor argument; pass `keepAlive` right before it.
|
|
879
137
|
const browser = new SentienceBrowser(
|
|
880
138
|
undefined,
|
|
881
139
|
undefined,
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
// With agent
|
|
893
|
-
import { SentienceAgent, OpenAIProvider } from 'sentienceapi';
|
|
894
|
-
|
|
895
|
-
const browser = new SentienceBrowser(
|
|
896
|
-
'your-api-key',
|
|
140
|
+
undefined,
|
|
141
|
+
undefined,
|
|
142
|
+
undefined,
|
|
143
|
+
undefined,
|
|
144
|
+
undefined,
|
|
145
|
+
undefined,
|
|
146
|
+
undefined,
|
|
147
|
+
undefined,
|
|
148
|
+
undefined,
|
|
897
149
|
undefined,
|
|
898
150
|
false,
|
|
899
|
-
|
|
151
|
+
policy
|
|
900
152
|
);
|
|
901
153
|
await browser.start();
|
|
902
|
-
|
|
903
|
-
const agent = new SentienceAgent(browser, new OpenAIProvider('openai-key'));
|
|
904
|
-
await agent.act('Navigate to example.com');
|
|
905
|
-
```
|
|
906
|
-
|
|
907
|
-
**WebRTC Protection:**
|
|
908
|
-
The SDK automatically adds WebRTC leak protection flags when a proxy is configured, preventing your real datacenter IP from being exposed via WebRTC even when using proxies.
|
|
909
|
-
|
|
910
|
-
**HTTPS Certificate Handling:**
|
|
911
|
-
The SDK automatically ignores HTTPS certificate errors when a proxy is configured, as residential proxies often use self-signed certificates for SSL interception.
|
|
912
|
-
|
|
913
|
-
</details>
|
|
914
|
-
|
|
915
|
-
<details>
|
|
916
|
-
<summary><h3>π Authentication Session Injection</h3></summary>
|
|
917
|
-
|
|
918
|
-
Inject pre-recorded authentication sessions (cookies + localStorage) to start your agent already logged in, bypassing login screens, 2FA, and CAPTCHAs. This saves tokens and reduces costs by eliminating login steps.
|
|
919
|
-
|
|
920
|
-
```typescript
|
|
921
|
-
// Workflow 1: Inject pre-recorded session from file
|
|
922
|
-
import { SentienceBrowser, saveStorageState } from 'sentienceapi';
|
|
923
|
-
|
|
924
|
-
// Save session after manual login
|
|
925
|
-
const browser = new SentienceBrowser();
|
|
926
|
-
await browser.start();
|
|
927
|
-
await browser.getPage().goto('https://example.com');
|
|
928
|
-
// ... log in manually ...
|
|
929
|
-
await saveStorageState(browser.getContext(), 'auth.json');
|
|
930
|
-
|
|
931
|
-
// Use saved session in future runs
|
|
932
|
-
const browser2 = new SentienceBrowser(
|
|
933
|
-
undefined, // apiKey
|
|
934
|
-
undefined, // apiUrl
|
|
935
|
-
false, // headless
|
|
936
|
-
undefined, // proxy
|
|
937
|
-
undefined, // userDataDir
|
|
938
|
-
'auth.json' // storageState - inject saved session
|
|
939
|
-
);
|
|
940
|
-
await browser2.start();
|
|
941
|
-
// Agent starts already logged in!
|
|
942
|
-
|
|
943
|
-
// Workflow 2: Persistent sessions (cookies persist across runs)
|
|
944
|
-
const browser3 = new SentienceBrowser(
|
|
945
|
-
undefined, // apiKey
|
|
946
|
-
undefined, // apiUrl
|
|
947
|
-
false, // headless
|
|
948
|
-
undefined, // proxy
|
|
949
|
-
'./chrome_profile', // userDataDir - persist cookies
|
|
950
|
-
undefined // storageState
|
|
951
|
-
);
|
|
952
|
-
await browser3.start();
|
|
953
|
-
// First run: Log in
|
|
954
|
-
// Second run: Already logged in (cookies persist automatically)
|
|
955
|
-
```
|
|
956
|
-
|
|
957
|
-
**Benefits:**
|
|
958
|
-
|
|
959
|
-
- Bypass login screens and CAPTCHAs with valid sessions
|
|
960
|
-
- Save 5-10 agent steps and hundreds of tokens per run
|
|
961
|
-
- Maintain stateful sessions for accessing authenticated pages
|
|
962
|
-
- Act as authenticated users (e.g., "Go to my Orders page")
|
|
963
|
-
|
|
964
|
-
See `examples/auth-injection-agent.ts` for complete examples.
|
|
965
|
-
|
|
966
|
-
</details>
|
|
967
|
-
|
|
968
|
-
---
|
|
969
|
-
|
|
970
|
-
## π‘ Best Practices
|
|
971
|
-
|
|
972
|
-
<details>
|
|
973
|
-
<summary>Click to expand best practices</summary>
|
|
974
|
-
|
|
975
|
-
### 1. Wait for Dynamic Content
|
|
976
|
-
|
|
977
|
-
```typescript
|
|
978
|
-
await browser.goto('https://example.com');
|
|
979
|
-
await browser.getPage().waitForLoadState('networkidle');
|
|
980
|
-
await new Promise(resolve => setTimeout(resolve, 1000)); // Extra buffer
|
|
981
154
|
```
|
|
982
155
|
|
|
983
|
-
|
|
156
|
+
If your backend supports it, you can also use ToolRegistry permission tools (`grant_permissions`, `clear_permissions`, `set_geolocation`) mid-run.
|
|
984
157
|
|
|
985
|
-
|
|
986
|
-
// Try exact match first
|
|
987
|
-
let btn = find(snap, 'role=button text="Add to Cart"');
|
|
158
|
+
## Downloads (verification predicate)
|
|
988
159
|
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
btn = find(snap, 'role=button text~"cart"');
|
|
992
|
-
}
|
|
993
|
-
```
|
|
994
|
-
|
|
995
|
-
### 3. Check Element Visibility Before Clicking
|
|
996
|
-
|
|
997
|
-
```typescript
|
|
998
|
-
if (element.in_viewport && !element.is_occluded) {
|
|
999
|
-
await click(browser, element.id);
|
|
1000
|
-
}
|
|
1001
|
-
```
|
|
1002
|
-
|
|
1003
|
-
### 4. Handle Navigation
|
|
1004
|
-
|
|
1005
|
-
```typescript
|
|
1006
|
-
const result = await click(browser, linkId);
|
|
1007
|
-
if (result.url_changed) {
|
|
1008
|
-
await browser.getPage().waitForLoadState('networkidle');
|
|
1009
|
-
}
|
|
1010
|
-
```
|
|
1011
|
-
|
|
1012
|
-
### 5. Use Screenshots Sparingly
|
|
1013
|
-
|
|
1014
|
-
```typescript
|
|
1015
|
-
// Fast - no screenshot (only element data)
|
|
1016
|
-
const snap = await snapshot(browser);
|
|
160
|
+
```ts
|
|
161
|
+
import { downloadCompleted } from 'sentienceapi';
|
|
1017
162
|
|
|
1018
|
-
|
|
1019
|
-
const snap = await snapshot(browser, { screenshot: true });
|
|
163
|
+
runtime.assert(downloadCompleted('report.csv'), 'download_ok', true);
|
|
1020
164
|
```
|
|
1021
165
|
|
|
1022
|
-
|
|
166
|
+
## Debugging (fast)
|
|
1023
167
|
|
|
1024
|
-
|
|
1025
|
-
const browser = new SentienceBrowser();
|
|
1026
|
-
|
|
1027
|
-
try {
|
|
1028
|
-
await browser.start();
|
|
1029
|
-
// ... your automation code
|
|
1030
|
-
} finally {
|
|
1031
|
-
await browser.close(); // Always clean up
|
|
1032
|
-
}
|
|
1033
|
-
```
|
|
1034
|
-
|
|
1035
|
-
</details>
|
|
1036
|
-
|
|
1037
|
-
---
|
|
1038
|
-
|
|
1039
|
-
## π οΈ Troubleshooting
|
|
1040
|
-
|
|
1041
|
-
<details>
|
|
1042
|
-
<summary>Click to expand common issues and solutions</summary>
|
|
1043
|
-
|
|
1044
|
-
### "Extension failed to load"
|
|
1045
|
-
|
|
1046
|
-
**Solution:** Build the extension first:
|
|
168
|
+
- **Manual driver CLI**:
|
|
1047
169
|
|
|
1048
170
|
```bash
|
|
1049
|
-
|
|
1050
|
-
./build.sh
|
|
171
|
+
npx sentience driver --url https://example.com
|
|
1051
172
|
```
|
|
1052
173
|
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
**Solution:** Don't use `node` directly. Use `ts-node` or npm scripts:
|
|
1056
|
-
|
|
1057
|
-
```bash
|
|
1058
|
-
npx ts-node examples/hello.ts
|
|
1059
|
-
# or
|
|
1060
|
-
npm run example:hello
|
|
1061
|
-
```
|
|
1062
|
-
|
|
1063
|
-
### "Element not found"
|
|
1064
|
-
|
|
1065
|
-
**Solutions:**
|
|
1066
|
-
|
|
1067
|
-
- Ensure page is loaded: `await browser.getPage().waitForLoadState('networkidle')`
|
|
1068
|
-
- Use `waitFor()`: `await waitFor(browser, 'role=button', 10000)`
|
|
1069
|
-
- Debug elements: `console.log(snap.elements.map(el => el.text))`
|
|
1070
|
-
|
|
1071
|
-
### Button not clickable
|
|
1072
|
-
|
|
1073
|
-
**Solutions:**
|
|
1074
|
-
|
|
1075
|
-
- Check visibility: `element.in_viewport && !element.is_occluded`
|
|
1076
|
-
- Scroll to element: ``await browser.getPage().evaluate(`window.sentience_registry[${element.id}].scrollIntoView()`)``
|
|
1077
|
-
|
|
1078
|
-
</details>
|
|
1079
|
-
|
|
1080
|
-
---
|
|
1081
|
-
|
|
1082
|
-
## π» Examples & Testing
|
|
1083
|
-
|
|
1084
|
-
<details>
|
|
1085
|
-
<summary><h3>Agent Layer Examples (Level 3 - Natural Language)</h3></summary>
|
|
1086
|
-
|
|
1087
|
-
- **`agent-google-search.ts`** - Google search automation with natural language commands
|
|
1088
|
-
- **`agent-amazon-shopping.ts`** - Amazon shopping bot (6 lines vs 350 lines manual code)
|
|
1089
|
-
- **`agent-with-anthropic.ts`** - Using Anthropic Claude instead of OpenAI GPT
|
|
1090
|
-
- **`agent-with-tracing.ts`** - Agent execution tracing for debugging and analysis
|
|
1091
|
-
|
|
1092
|
-
</details>
|
|
1093
|
-
|
|
1094
|
-
<details>
|
|
1095
|
-
<summary><h3>Direct SDK Examples (Level 2 - Technical Control)</h3></summary>
|
|
1096
|
-
|
|
1097
|
-
- **`hello.ts`** - Extension bridge verification
|
|
1098
|
-
- **`basic-agent.ts`** - Basic snapshot and element inspection
|
|
1099
|
-
- **`query-demo.ts`** - Query engine demonstrations
|
|
1100
|
-
- **`wait-and-click.ts`** - Waiting for elements and performing actions
|
|
1101
|
-
- **`read-markdown.ts`** - Content extraction and markdown conversion
|
|
1102
|
-
|
|
1103
|
-
</details>
|
|
1104
|
-
|
|
1105
|
-
<details>
|
|
1106
|
-
<summary><h3>Running Examples</h3></summary>
|
|
1107
|
-
|
|
1108
|
-
**β οΈ Important**: You cannot use `node` directly to run TypeScript files. Use one of these methods:
|
|
1109
|
-
|
|
1110
|
-
### Option 1: Using npm scripts (recommended)
|
|
1111
|
-
|
|
1112
|
-
```bash
|
|
1113
|
-
npm run example:hello
|
|
1114
|
-
npm run example:basic
|
|
1115
|
-
npm run example:query
|
|
1116
|
-
npm run example:wait
|
|
1117
|
-
```
|
|
1118
|
-
|
|
1119
|
-
### Option 2: Using ts-node directly
|
|
1120
|
-
|
|
1121
|
-
```bash
|
|
1122
|
-
npx ts-node examples/hello.ts
|
|
1123
|
-
# or if ts-node is installed globally:
|
|
1124
|
-
ts-node examples/hello.ts
|
|
1125
|
-
```
|
|
1126
|
-
|
|
1127
|
-
### Option 3: Compile then run
|
|
1128
|
-
|
|
1129
|
-
```bash
|
|
1130
|
-
npm run build
|
|
1131
|
-
# Then use compiled JavaScript from dist/
|
|
1132
|
-
```
|
|
1133
|
-
|
|
1134
|
-
</details>
|
|
1135
|
-
|
|
1136
|
-
<details>
|
|
1137
|
-
<summary><h3>Testing</h3></summary>
|
|
1138
|
-
|
|
1139
|
-
```bash
|
|
1140
|
-
# Run all tests
|
|
1141
|
-
npm test
|
|
1142
|
-
|
|
1143
|
-
# Run with coverage
|
|
1144
|
-
npm run test:coverage
|
|
1145
|
-
|
|
1146
|
-
# Run specific test file
|
|
1147
|
-
npm test -- snapshot.test.ts
|
|
1148
|
-
```
|
|
1149
|
-
|
|
1150
|
-
</details>
|
|
1151
|
-
|
|
1152
|
-
---
|
|
1153
|
-
|
|
1154
|
-
## π Documentation
|
|
1155
|
-
|
|
1156
|
-
- **π [Amazon Shopping Guide](../docs/AMAZON_SHOPPING_GUIDE.md)** - Complete tutorial with real-world example
|
|
1157
|
-
- **π [Query DSL Guide](docs/QUERY_DSL.md)** - Advanced query patterns and operators
|
|
1158
|
-
- **π [API Contract](../spec/SNAPSHOT_V1.md)** - Snapshot API specification
|
|
1159
|
-
- **π [Type Definitions](../spec/sdk-types.md)** - TypeScript/Python type definitions
|
|
1160
|
-
|
|
1161
|
-
---
|
|
1162
|
-
|
|
1163
|
-
## π License
|
|
174
|
+
- **Verification + artifacts + debugging with time-travel traces (Sentience Studio demo)**:
|
|
1164
175
|
|
|
1165
|
-
|
|
176
|
+
<video src="https://github.com/user-attachments/assets/7ffde43b-1074-4d70-bb83-2eb8d0469307" controls muted playsinline></video>
|
|
1166
177
|
|
|
1167
|
-
|
|
1168
|
-
- MIT license ([LICENSE-MIT](./LICENSE-MIT))
|
|
178
|
+
If the video tag doesnβt render in your GitHub README view, use this link: [`sentience-studio-demo.mp4`](https://github.com/user-attachments/assets/7ffde43b-1074-4d70-bb83-2eb8d0469307)
|
|
1169
179
|
|
|
1170
|
-
|
|
180
|
+
- **Sentience SDK Documentation**: https://www.sentienceapi.com/docs
|