web-task-api 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.md +284 -0
- package/dist/scripts/demo.d.ts +1 -0
- package/dist/scripts/demo.js +32 -0
- package/dist/scripts/demo.js.map +1 -0
- package/dist/scripts/profile-login.d.ts +1 -0
- package/dist/scripts/profile-login.js +38 -0
- package/dist/scripts/profile-login.js.map +1 -0
- package/dist/src/agents/auto-agent.d.ts +22 -0
- package/dist/src/agents/auto-agent.js +54 -0
- package/dist/src/agents/auto-agent.js.map +1 -0
- package/dist/src/agents/cliproxy-agent.d.ts +18 -0
- package/dist/src/agents/cliproxy-agent.js +137 -0
- package/dist/src/agents/cliproxy-agent.js.map +1 -0
- package/dist/src/agents/index.d.ts +2 -0
- package/dist/src/agents/index.js +17 -0
- package/dist/src/agents/index.js.map +1 -0
- package/dist/src/agents/mock-agent.d.ts +15 -0
- package/dist/src/agents/mock-agent.js +132 -0
- package/dist/src/agents/mock-agent.js.map +1 -0
- package/dist/src/agents/opencode-agent.d.ts +20 -0
- package/dist/src/agents/opencode-agent.js +122 -0
- package/dist/src/agents/opencode-agent.js.map +1 -0
- package/dist/src/agents/planner-prompt.d.ts +6 -0
- package/dist/src/agents/planner-prompt.js +116 -0
- package/dist/src/agents/planner-prompt.js.map +1 -0
- package/dist/src/browser/session.d.ts +41 -0
- package/dist/src/browser/session.js +267 -0
- package/dist/src/browser/session.js.map +1 -0
- package/dist/src/client.d.ts +44 -0
- package/dist/src/client.js +59 -0
- package/dist/src/client.js.map +1 -0
- package/dist/src/config.d.ts +16 -0
- package/dist/src/config.js +18 -0
- package/dist/src/config.js.map +1 -0
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +15 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/lib.d.ts +6 -0
- package/dist/src/lib.js +5 -0
- package/dist/src/lib.js.map +1 -0
- package/dist/src/mcp-server.d.ts +3 -0
- package/dist/src/mcp-server.js +191 -0
- package/dist/src/mcp-server.js.map +1 -0
- package/dist/src/mcp.d.ts +2 -0
- package/dist/src/mcp.js +14 -0
- package/dist/src/mcp.js.map +1 -0
- package/dist/src/recipes/registry.d.ts +21 -0
- package/dist/src/recipes/registry.js +38 -0
- package/dist/src/recipes/registry.js.map +1 -0
- package/dist/src/server/app.d.ts +5 -0
- package/dist/src/server/app.js +89 -0
- package/dist/src/server/app.js.map +1 -0
- package/dist/src/sessions/store.d.ts +48 -0
- package/dist/src/sessions/store.js +84 -0
- package/dist/src/sessions/store.js.map +1 -0
- package/dist/src/storage/run-store.d.ts +12 -0
- package/dist/src/storage/run-store.js +30 -0
- package/dist/src/storage/run-store.js.map +1 -0
- package/dist/src/tasks/errors.d.ts +5 -0
- package/dist/src/tasks/errors.js +11 -0
- package/dist/src/tasks/errors.js.map +1 -0
- package/dist/src/tasks/output-validator.d.ts +1 -0
- package/dist/src/tasks/output-validator.js +21 -0
- package/dist/src/tasks/output-validator.js.map +1 -0
- package/dist/src/tasks/runner.d.ts +38 -0
- package/dist/src/tasks/runner.js +236 -0
- package/dist/src/tasks/runner.js.map +1 -0
- package/dist/src/tasks/schemas.d.ts +266 -0
- package/dist/src/tasks/schemas.js +67 -0
- package/dist/src/tasks/schemas.js.map +1 -0
- package/dist/tests/agent-adapters.test.d.ts +1 -0
- package/dist/tests/agent-adapters.test.js +87 -0
- package/dist/tests/agent-adapters.test.js.map +1 -0
- package/dist/tests/agent-selection.test.d.ts +1 -0
- package/dist/tests/agent-selection.test.js +26 -0
- package/dist/tests/agent-selection.test.js.map +1 -0
- package/dist/tests/auto-agent.test.d.ts +1 -0
- package/dist/tests/auto-agent.test.js +86 -0
- package/dist/tests/auto-agent.test.js.map +1 -0
- package/dist/tests/browser-session.test.d.ts +1 -0
- package/dist/tests/browser-session.test.js +41 -0
- package/dist/tests/browser-session.test.js.map +1 -0
- package/dist/tests/client.test.d.ts +1 -0
- package/dist/tests/client.test.js +35 -0
- package/dist/tests/client.test.js.map +1 -0
- package/dist/tests/fixture-site.d.ts +6 -0
- package/dist/tests/fixture-site.js +93 -0
- package/dist/tests/fixture-site.js.map +1 -0
- package/dist/tests/mcp.test.d.ts +1 -0
- package/dist/tests/mcp.test.js +186 -0
- package/dist/tests/mcp.test.js.map +1 -0
- package/dist/tests/output-validator.test.d.ts +1 -0
- package/dist/tests/output-validator.test.js +27 -0
- package/dist/tests/output-validator.test.js.map +1 -0
- package/dist/tests/request-validation.test.d.ts +1 -0
- package/dist/tests/request-validation.test.js +25 -0
- package/dist/tests/request-validation.test.js.map +1 -0
- package/dist/tests/runner-options.test.d.ts +1 -0
- package/dist/tests/runner-options.test.js +44 -0
- package/dist/tests/runner-options.test.js.map +1 -0
- package/dist/tests/session-api.test.d.ts +1 -0
- package/dist/tests/session-api.test.js +244 -0
- package/dist/tests/session-api.test.js.map +1 -0
- package/dist/tests/session-client.test.d.ts +1 -0
- package/dist/tests/session-client.test.js +28 -0
- package/dist/tests/session-client.test.js.map +1 -0
- package/dist/tests/task-api-failure.test.d.ts +1 -0
- package/dist/tests/task-api-failure.test.js +39 -0
- package/dist/tests/task-api-failure.test.js.map +1 -0
- package/dist/tests/task-api.test.d.ts +1 -0
- package/dist/tests/task-api.test.js +50 -0
- package/dist/tests/task-api.test.js.map +1 -0
- package/docs/design.md +513 -0
- package/docs/releasing.md +62 -0
- package/package.json +78 -0
- package/recipes/dexscreener-token-read.json +19 -0
- package/recipes/fixture-catalog.json +14 -0
- package/recipes/generic-search.json +14 -0
- package/recipes/gmgn-token-read.json +19 -0
- package/server.json +79 -0
package/docs/design.md
ADDED
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
# Web Task API Design Doc
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
Build a generalized browser-task platform inside our projects that turns websites into outcome-oriented APIs. Instead of hardcoding one adapter per site, the system runs a real browser, lets an agent choose actions from structured browser tools, validates output against a schema, and stores artifacts for replay and debugging.
|
|
6
|
+
|
|
7
|
+
This MVP intentionally favors the architecture we actually want long term:
|
|
8
|
+
|
|
9
|
+
- agent-first execution
|
|
10
|
+
- browser as the substrate
|
|
11
|
+
- recipes as optimization, not the default
|
|
12
|
+
- login/profile reuse
|
|
13
|
+
- task-level JSON results
|
|
14
|
+
- strong traces and verification
|
|
15
|
+
|
|
16
|
+
## Problem
|
|
17
|
+
|
|
18
|
+
Many useful websites have no API, weak APIs, or UI-only features. Per-site scrapers and automations work short term but do not scale:
|
|
19
|
+
|
|
20
|
+
- selectors break
|
|
21
|
+
- every site needs its own code path
|
|
22
|
+
- login/session handling becomes duplicated
|
|
23
|
+
- observability and replay are inconsistent
|
|
24
|
+
- unknown future websites require fresh engineering every time
|
|
25
|
+
|
|
26
|
+
We want a common layer that lets our projects say:
|
|
27
|
+
|
|
28
|
+
> Start from this URL, achieve this goal, return typed JSON.
|
|
29
|
+
|
|
30
|
+
## Goals
|
|
31
|
+
|
|
32
|
+
1. Expose a single task API for “read” and “act” use cases.
|
|
33
|
+
2. Run against a real browser with persistent login profiles.
|
|
34
|
+
3. Support freeform agent control without shipping per-site code first.
|
|
35
|
+
4. Validate outputs against a supplied JSON schema.
|
|
36
|
+
5. Persist step traces, screenshots, and final artifacts.
|
|
37
|
+
6. Allow repeated high-value flows to be promoted into recipes later.
|
|
38
|
+
7. Keep the runtime framework-light and provider-agnostic.
|
|
39
|
+
8. Preserve browser/session state across related tasks so agents can chain work over time.
|
|
40
|
+
|
|
41
|
+
## Non-goals for MVP
|
|
42
|
+
|
|
43
|
+
- distributed job queue
|
|
44
|
+
- multi-tenant billing/quotas
|
|
45
|
+
- residential proxy/captcha infrastructure
|
|
46
|
+
- human-in-the-loop approvals
|
|
47
|
+
- production-grade secret vault
|
|
48
|
+
- full workflow scheduling/webhooks
|
|
49
|
+
|
|
50
|
+
Those are expected future layers, not blockers for proving the core architecture.
|
|
51
|
+
|
|
52
|
+
## Key decisions
|
|
53
|
+
|
|
54
|
+
### 0) Session continuity is a first-class product feature
|
|
55
|
+
|
|
56
|
+
Users do not always want one isolated task. Real workflows look like:
|
|
57
|
+
|
|
58
|
+
- open Axiom with an authenticated profile
|
|
59
|
+
- inspect a token on GMGN as a guest
|
|
60
|
+
- carry findings into a later action task
|
|
61
|
+
|
|
62
|
+
So the product needs durable session records that survive across task runs, not just durable browser profiles.
|
|
63
|
+
|
|
64
|
+
## Session architecture choices
|
|
65
|
+
|
|
66
|
+
### Choice A — Stateless tasks only
|
|
67
|
+
|
|
68
|
+
- every task fully self-contained
|
|
69
|
+
- browser state supplied ad hoc with `profile`
|
|
70
|
+
- no cross-task context
|
|
71
|
+
|
|
72
|
+
Pros:
|
|
73
|
+
|
|
74
|
+
- simplest implementation
|
|
75
|
+
- easy horizontal scaling
|
|
76
|
+
|
|
77
|
+
Cons:
|
|
78
|
+
|
|
79
|
+
- weak for real agent workflows
|
|
80
|
+
- hard to chain research -> action -> verification
|
|
81
|
+
- no durable task memory except raw run files
|
|
82
|
+
|
|
83
|
+
### Choice B — Named sessions backed by file-backed metadata and browser profiles **(chosen)**
|
|
84
|
+
|
|
85
|
+
- create a session record once
|
|
86
|
+
- bind optional profile, start URL, default agent config, and notes
|
|
87
|
+
- append compact task history after each run
|
|
88
|
+
- allow future tasks to refer to `sessionId`
|
|
89
|
+
|
|
90
|
+
Pros:
|
|
91
|
+
|
|
92
|
+
- simple enough for local/product usage now
|
|
93
|
+
- supports guest sessions and authenticated/profile sessions
|
|
94
|
+
- enables connected tasks without building a full workflow engine
|
|
95
|
+
|
|
96
|
+
Cons:
|
|
97
|
+
|
|
98
|
+
- file-backed state is single-machine scoped
|
|
99
|
+
- not yet multi-worker safe
|
|
100
|
+
|
|
101
|
+
### Choice C — Full workflow engine with long-lived browser workers
|
|
102
|
+
|
|
103
|
+
- queue-backed sessions
|
|
104
|
+
- pinned worker/browser lifecycle
|
|
105
|
+
- richer inter-task memory and live state
|
|
106
|
+
|
|
107
|
+
Pros:
|
|
108
|
+
|
|
109
|
+
- strongest long-term orchestration model
|
|
110
|
+
|
|
111
|
+
Cons:
|
|
112
|
+
|
|
113
|
+
- too much infrastructure for this stage
|
|
114
|
+
- would slow delivery of the core product
|
|
115
|
+
|
|
116
|
+
### Chosen approach
|
|
117
|
+
|
|
118
|
+
Choose **B** now:
|
|
119
|
+
|
|
120
|
+
- it gives real cross-task continuity
|
|
121
|
+
- it composes with persistent browser profiles
|
|
122
|
+
- it can later evolve toward C without breaking the task API
|
|
123
|
+
|
|
124
|
+
### 1) Agent-first, recipe-assisted architecture
|
|
125
|
+
|
|
126
|
+
Default behavior is goal-driven browser control. Recipes are optional overlays that add hints, matching, and reusable assertions. This avoids recreating the brittle adapter trap.
|
|
127
|
+
|
|
128
|
+
### 2) Thin custom orchestrator over large agent frameworks
|
|
129
|
+
|
|
130
|
+
There is no universal “golden standard” agent framework that is both simple and future-proof. For the MVP we use a small internal loop:
|
|
131
|
+
|
|
132
|
+
- provider adapter
|
|
133
|
+
- browser tools
|
|
134
|
+
- run state
|
|
135
|
+
- structured task contract
|
|
136
|
+
|
|
137
|
+
Why not make LangChain the core?
|
|
138
|
+
|
|
139
|
+
- too much framework gravity for the moat we actually care about
|
|
140
|
+
- browser/session/runtime quality matters more than chain abstractions
|
|
141
|
+
- easier to keep provider compatibility with our own small interface
|
|
142
|
+
|
|
143
|
+
### 3) Pluggable planner backends: CLIProxyAPI first, OpenCode optional
|
|
144
|
+
|
|
145
|
+
The preferred production path uses CLIProxyAPI-managed auth and model routing, while keeping OpenCode as an optional adapter.
|
|
146
|
+
|
|
147
|
+
Why this split:
|
|
148
|
+
|
|
149
|
+
- OpenCode is powerful, but it is coding-native and should not be the only foundation for a general web agent platform.
|
|
150
|
+
- CLIProxyAPI is a better auth/routing layer for general LLM access.
|
|
151
|
+
- We still want OpenCode available where it already fits the local stack well.
|
|
152
|
+
|
|
153
|
+
CLIProxy path gives:
|
|
154
|
+
|
|
155
|
+
- proxy-managed auth instead of vendor-specific API keys
|
|
156
|
+
- one place to swap models/providers
|
|
157
|
+
- easy reuse of existing CLI/OAuth-backed setups
|
|
158
|
+
|
|
159
|
+
Important nuance: CLIProxyAPI is not treated here as “one API key for one provider”. It is treated as a routing/auth layer that may expose multiple providers, multiple accounts, and model aliases behind one compatible endpoint.
|
|
160
|
+
|
|
161
|
+
OpenCode path gives:
|
|
162
|
+
|
|
163
|
+
- a local programmable agent runtime we already use
|
|
164
|
+
- support for headless/server usage through `@opencode-ai/sdk`
|
|
165
|
+
- structured JSON output via session prompt formatting
|
|
166
|
+
- compatibility with existing provider routing, including CLIProxyAPI-backed setups
|
|
167
|
+
|
|
168
|
+
For practical local operation, we also support an `auto` mode:
|
|
169
|
+
|
|
170
|
+
- probe CLIProxy first and use it when reachable/authenticated
|
|
171
|
+
- if no planner model alias is configured for CLIProxy, fall back to OpenCode instead of failing late
|
|
172
|
+
- otherwise fall back to OpenCode so existing local GPT/OAuth setup still works
|
|
173
|
+
|
|
174
|
+
This is specifically useful when model auth is already solved in the machine via OpenCode but we still want the product surface to stay general-web focused rather than OpenCode-centric.
|
|
175
|
+
|
|
176
|
+
The code isolates this behind an `AgentAdapter` interface so other planners can still be added later.
|
|
177
|
+
|
|
178
|
+
### 4) Deterministic mock agent for tests and demos
|
|
179
|
+
|
|
180
|
+
We need end-to-end verification without external credentials. The MVP includes a local mock agent that can drive semantically-labeled pages and extract structured data from a fixture site. This proves the whole stack works now.
|
|
181
|
+
|
|
182
|
+
### 5) Element IDs instead of exposing selectors to the model
|
|
183
|
+
|
|
184
|
+
The snapshot tool returns normalized interactive elements with generated IDs. The agent acts on `elementId`, not raw selectors. Internally we still keep the selector/path mapping, but the model contract stays higher level.
|
|
185
|
+
|
|
186
|
+
## System overview
|
|
187
|
+
|
|
188
|
+
```text
|
|
189
|
+
HTTP task request
|
|
190
|
+
-> TaskRunner
|
|
191
|
+
-> RecipeRegistry resolve/match
|
|
192
|
+
-> BrowserSession bootstrap (persistent profile + browser hardening)
|
|
193
|
+
-> Agent loop
|
|
194
|
+
-> snapshot current page
|
|
195
|
+
-> agent selects browser tool
|
|
196
|
+
-> runtime executes tool
|
|
197
|
+
-> loop until finish/fail
|
|
198
|
+
-> JSON schema validation
|
|
199
|
+
-> artifacts + run record persisted
|
|
200
|
+
-> HTTP response
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Main components
|
|
204
|
+
|
|
205
|
+
### API server
|
|
206
|
+
|
|
207
|
+
Fastify server with endpoints for:
|
|
208
|
+
|
|
209
|
+
- `GET /health`
|
|
210
|
+
- `GET /v1/recipes`
|
|
211
|
+
- `GET /v1/sessions`
|
|
212
|
+
- `POST /v1/sessions`
|
|
213
|
+
- `GET /v1/sessions/:sessionId`
|
|
214
|
+
- `PATCH /v1/sessions/:sessionId`
|
|
215
|
+
- `POST /v1/tasks/run`
|
|
216
|
+
- `GET /v1/tasks/:taskId`
|
|
217
|
+
|
|
218
|
+
### Task runner
|
|
219
|
+
|
|
220
|
+
Owns the end-to-end execution lifecycle:
|
|
221
|
+
|
|
222
|
+
- creates run directory
|
|
223
|
+
- resolves recipe and session defaults
|
|
224
|
+
- starts browser session
|
|
225
|
+
- drives the agent loop
|
|
226
|
+
- validates output
|
|
227
|
+
- stores result record and session history
|
|
228
|
+
|
|
229
|
+
### Session store
|
|
230
|
+
|
|
231
|
+
File-backed session records with:
|
|
232
|
+
|
|
233
|
+
- session ID and name
|
|
234
|
+
- guest/profile mode
|
|
235
|
+
- optional bound browser profile
|
|
236
|
+
- optional default start URL
|
|
237
|
+
- optional default agent configuration
|
|
238
|
+
- notes
|
|
239
|
+
- compact recent task history
|
|
240
|
+
|
|
241
|
+
### Browser session
|
|
242
|
+
|
|
243
|
+
Playwright wrapper that provides structured browser tools:
|
|
244
|
+
|
|
245
|
+
- navigate
|
|
246
|
+
- snapshot
|
|
247
|
+
- click element
|
|
248
|
+
- fill element
|
|
249
|
+
- press element
|
|
250
|
+
- select option
|
|
251
|
+
- wait for text
|
|
252
|
+
- read page text
|
|
253
|
+
- capture screenshot
|
|
254
|
+
|
|
255
|
+
### Agent adapter
|
|
256
|
+
|
|
257
|
+
Interface:
|
|
258
|
+
|
|
259
|
+
- receive run state + tool results
|
|
260
|
+
- return assistant tool calls
|
|
261
|
+
|
|
262
|
+
Implementations:
|
|
263
|
+
|
|
264
|
+
- `CliProxyAgent`
|
|
265
|
+
- `OpencodeAgent`
|
|
266
|
+
- `MockAgent`
|
|
267
|
+
|
|
268
|
+
### Recipe registry
|
|
269
|
+
|
|
270
|
+
Recipes are JSON definitions with:
|
|
271
|
+
|
|
272
|
+
- id/name/description
|
|
273
|
+
- URL matching hints
|
|
274
|
+
- prompt augmentations
|
|
275
|
+
- preferred input aliases
|
|
276
|
+
- optional completion assertions
|
|
277
|
+
|
|
278
|
+
Recipes do not replace the agent. They sharpen it.
|
|
279
|
+
|
|
280
|
+
### File-backed run store
|
|
281
|
+
|
|
282
|
+
Stores:
|
|
283
|
+
|
|
284
|
+
- `runs/<taskId>/task.json`
|
|
285
|
+
- `runs/<taskId>/steps.jsonl`
|
|
286
|
+
- screenshots
|
|
287
|
+
- raw result/metadata
|
|
288
|
+
|
|
289
|
+
This is enough for local replay and debugging.
|
|
290
|
+
|
|
291
|
+
## Request contract
|
|
292
|
+
|
|
293
|
+
### Task request
|
|
294
|
+
|
|
295
|
+
```json
|
|
296
|
+
{
|
|
297
|
+
"goal": "Search for banana and return product, price, and stock.",
|
|
298
|
+
"startUrl": "http://127.0.0.1:4010/",
|
|
299
|
+
"sessionId": "d8dd1a8f-0f31-4d6b-b6bb-0ff1452b9352",
|
|
300
|
+
"profile": "default",
|
|
301
|
+
"mode": "act",
|
|
302
|
+
"input": {
|
|
303
|
+
"query": "banana"
|
|
304
|
+
},
|
|
305
|
+
"outputSchema": {
|
|
306
|
+
"type": "object",
|
|
307
|
+
"required": ["product", "price", "stock"],
|
|
308
|
+
"properties": {
|
|
309
|
+
"product": { "type": "string" },
|
|
310
|
+
"price": { "type": "string" },
|
|
311
|
+
"stock": { "type": "string" }
|
|
312
|
+
}
|
|
313
|
+
},
|
|
314
|
+
"agent": {
|
|
315
|
+
"kind": "cliproxy"
|
|
316
|
+
},
|
|
317
|
+
"limits": {
|
|
318
|
+
"maxSteps": 12,
|
|
319
|
+
"timeoutMs": 90000,
|
|
320
|
+
"headless": true
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### Task response
|
|
326
|
+
|
|
327
|
+
Includes:
|
|
328
|
+
|
|
329
|
+
- task ID
|
|
330
|
+
- status
|
|
331
|
+
- validated result
|
|
332
|
+
- summary trace
|
|
333
|
+
- matched recipe
|
|
334
|
+
- artifact directory
|
|
335
|
+
- timing metadata
|
|
336
|
+
|
|
337
|
+
## Tool contract exposed to the agent
|
|
338
|
+
|
|
339
|
+
The agent receives structured tools, not arbitrary code execution.
|
|
340
|
+
|
|
341
|
+
### `snapshot`
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
|
|
345
|
+
- current URL/title
|
|
346
|
+
- text preview
|
|
347
|
+
- visible interactive elements with stable `elementId`s
|
|
348
|
+
- forms and semantic labels
|
|
349
|
+
|
|
350
|
+
### `navigate`
|
|
351
|
+
|
|
352
|
+
Navigate to a URL.
|
|
353
|
+
|
|
354
|
+
### `click`
|
|
355
|
+
|
|
356
|
+
Click a visible interactive element by `elementId`.
|
|
357
|
+
|
|
358
|
+
### `fill`
|
|
359
|
+
|
|
360
|
+
Fill an input-like element by `elementId`.
|
|
361
|
+
|
|
362
|
+
### `press`
|
|
363
|
+
|
|
364
|
+
Press a keyboard key against an element.
|
|
365
|
+
|
|
366
|
+
### `select`
|
|
367
|
+
|
|
368
|
+
Select a value on a `<select>` element.
|
|
369
|
+
|
|
370
|
+
### `wait_for_text`
|
|
371
|
+
|
|
372
|
+
Wait until expected text appears.
|
|
373
|
+
|
|
374
|
+
### `read_page`
|
|
375
|
+
|
|
376
|
+
Read full visible text in a normalized form for extraction/verification.
|
|
377
|
+
|
|
378
|
+
### `finish`
|
|
379
|
+
|
|
380
|
+
Return the structured result.
|
|
381
|
+
|
|
382
|
+
### `fail`
|
|
383
|
+
|
|
384
|
+
Abort the run with a machine-readable reason.
|
|
385
|
+
|
|
386
|
+
## Safety model
|
|
387
|
+
|
|
388
|
+
The MVP keeps safety simple and explicit:
|
|
389
|
+
|
|
390
|
+
- tool sandbox is browser-only; no arbitrary shell/file tools exposed to the model
|
|
391
|
+
- max step limit
|
|
392
|
+
- wall-clock timeout
|
|
393
|
+
- output schema validation
|
|
394
|
+
- task trace for auditing
|
|
395
|
+
- manual login bootstrap instead of storing credentials in the request
|
|
396
|
+
|
|
397
|
+
## Observability
|
|
398
|
+
|
|
399
|
+
Each run stores:
|
|
400
|
+
|
|
401
|
+
- request metadata
|
|
402
|
+
- step-level tool calls/results
|
|
403
|
+
- page screenshots
|
|
404
|
+
- final JSON result or failure reason
|
|
405
|
+
|
|
406
|
+
This is critical because browser agents fail in ways that need replay, not just logs.
|
|
407
|
+
|
|
408
|
+
## Real-world constraint discovered during validation
|
|
409
|
+
|
|
410
|
+
Public targets like Dexscreener and GMGN are currently fronted by Cloudflare/bot protection from fresh headless sessions in this environment. That means a serious product cannot assume every public site is immediately automatable with a brand-new clean headless browser.
|
|
411
|
+
|
|
412
|
+
So the product design now explicitly supports three continuity paths:
|
|
413
|
+
|
|
414
|
+
- named persistent profiles for authenticated sites
|
|
415
|
+
- `BROWSER_USER_DATA_DIR` for “use my real Chrome profile” behavior
|
|
416
|
+
- session-bound browser storage for guest workflows that still need continuity across tasks
|
|
417
|
+
|
|
418
|
+
The bundled GMGN/Dexscreener recipes should therefore be treated as starter recipes that depend on warmed browser state, not as guaranteed out-of-the-box site integrations. Protected-site recipes now explicitly require either a named persistent profile, `BROWSER_USER_DATA_DIR`, or a **warmed** `sessionId` so the runtime does not pretend a fresh temp browser will behave like a human’s already-warmed Chrome profile.
|
|
419
|
+
|
|
420
|
+
## Planned directory structure
|
|
421
|
+
|
|
422
|
+
```text
|
|
423
|
+
src/
|
|
424
|
+
agents/
|
|
425
|
+
browser/
|
|
426
|
+
recipes/
|
|
427
|
+
sessions/
|
|
428
|
+
server/
|
|
429
|
+
storage/
|
|
430
|
+
tasks/
|
|
431
|
+
tests/
|
|
432
|
+
fixtures/
|
|
433
|
+
scripts/
|
|
434
|
+
docs/
|
|
435
|
+
examples/
|
|
436
|
+
profiles/
|
|
437
|
+
runs/
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
## Detailed implementation plan
|
|
441
|
+
|
|
442
|
+
### Phase 1 — Core contracts
|
|
443
|
+
|
|
444
|
+
1. Define request/response schemas.
|
|
445
|
+
2. Build file-backed run store.
|
|
446
|
+
3. Build recipe registry.
|
|
447
|
+
4. Build Fastify app + routes.
|
|
448
|
+
|
|
449
|
+
### Phase 2 — Browser runtime
|
|
450
|
+
|
|
451
|
+
1. Wrap Playwright launch/context/page.
|
|
452
|
+
2. Add snapshot extraction with element IDs.
|
|
453
|
+
3. Add browser tools and error handling.
|
|
454
|
+
4. Persist screenshots and basic metrics.
|
|
455
|
+
|
|
456
|
+
### Phase 3 — Agent loop
|
|
457
|
+
|
|
458
|
+
1. Define provider-neutral `AgentAdapter`.
|
|
459
|
+
2. Implement CLIProxy planner adapter.
|
|
460
|
+
3. Implement OpenCode adapter as an optional backend.
|
|
461
|
+
4. Implement mock adapter.
|
|
462
|
+
4. Add system prompt and loop orchestration.
|
|
463
|
+
|
|
464
|
+
### Phase 4 — Profiles, examples, docs
|
|
465
|
+
|
|
466
|
+
1. Add manual login bootstrap script.
|
|
467
|
+
2. Add demo script.
|
|
468
|
+
3. Add sample recipe and example request.
|
|
469
|
+
4. Write README and operating notes.
|
|
470
|
+
|
|
471
|
+
### Phase 5 — Session continuity
|
|
472
|
+
|
|
473
|
+
1. Add file-backed session store.
|
|
474
|
+
2. Add session create/read/update/list endpoints.
|
|
475
|
+
3. Merge session defaults into task runs.
|
|
476
|
+
4. Append compact task history back into sessions.
|
|
477
|
+
|
|
478
|
+
### Phase 6 — Verification
|
|
479
|
+
|
|
480
|
+
1. Create local fixture site.
|
|
481
|
+
2. Run end-to-end task against fixture site.
|
|
482
|
+
3. Verify schema validation and stored artifacts.
|
|
483
|
+
4. Verify session-backed connected tasks.
|
|
484
|
+
5. Run typecheck, tests, and build.
|
|
485
|
+
|
|
486
|
+
## Future roadmap
|
|
487
|
+
|
|
488
|
+
After the MVP proves the architecture, the highest-value next layers are:
|
|
489
|
+
|
|
490
|
+
1. async queue + webhook completion
|
|
491
|
+
2. profile/session service with encrypted secrets
|
|
492
|
+
3. browser pools and worker isolation
|
|
493
|
+
4. stronger DOM understanding and recovery policies
|
|
494
|
+
5. proxy and anti-detection layer
|
|
495
|
+
6. recipe learning/promotion from successful runs
|
|
496
|
+
7. policy/approval layer for sensitive actions
|
|
497
|
+
8. multi-tenant controls, quotas, and billing
|
|
498
|
+
|
|
499
|
+
## Why this design is the right starting point
|
|
500
|
+
|
|
501
|
+
It gives us a working generalized browser API now, without prematurely locking ourselves into:
|
|
502
|
+
|
|
503
|
+
- site-specific adapters
|
|
504
|
+
- heavyweight orchestration frameworks
|
|
505
|
+
- infra we do not yet need
|
|
506
|
+
|
|
507
|
+
At the same time, it leaves clean upgrade paths for the pieces that actually become moats later: reliability, session management, recipes, verification, and replay.
|
|
508
|
+
|
|
509
|
+
## References
|
|
510
|
+
|
|
511
|
+
[^1]: API Everything homepage, accessed 2026-03-26, for the core product framing of “read + act through one API.”
|
|
512
|
+
[^2]: Playwright documentation and common industry practice for browser automation runtimes.
|
|
513
|
+
[^3]: OpenCode SDK docs, accessed 2026-03-26, for headless server access, session prompting, and structured JSON output.
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Releasing
|
|
2
|
+
|
|
3
|
+
## Versioning
|
|
4
|
+
|
|
5
|
+
This project uses tagged releases.
|
|
6
|
+
|
|
7
|
+
Recommended release flow:
|
|
8
|
+
|
|
9
|
+
1. update `CHANGELOG.md`
|
|
10
|
+
2. bump `package.json`, `server.json`, and any versioned examples or metadata together
|
|
11
|
+
3. run `npm run check`
|
|
12
|
+
4. commit the release
|
|
13
|
+
5. push the release commit to `main`
|
|
14
|
+
6. create and push a tag like `v0.2.0`
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
git push origin main
|
|
18
|
+
git tag v0.2.0
|
|
19
|
+
git push origin v0.2.0
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
The tag is the release trigger. Once the tag is pushed, GitHub Actions owns npm publication, MCP registry publication, and GitHub release creation. This flow assumes `NPM_TOKEN` is configured for tagged releases; if it is missing, the release workflow now fails loudly instead of pretending npm publish is optional.
|
|
23
|
+
|
|
24
|
+
Versioned files checked by CI:
|
|
25
|
+
|
|
26
|
+
- `package.json`
|
|
27
|
+
- `server.json`
|
|
28
|
+
|
|
29
|
+
`src/mcp.ts` reads the runtime version from `package.json`, so there is no extra hard-coded runtime version to bump.
|
|
30
|
+
|
|
31
|
+
If you forget one of the versioned files, `npm run check:meta` should fail before release. `npm run check:dist` also proves the built MCP entrypoint starts and responds before a release ships.
|
|
32
|
+
|
|
33
|
+
## What GitHub Actions does
|
|
34
|
+
|
|
35
|
+
- `CI`: typecheck, tests, build, metadata checks, package dry-run
|
|
36
|
+
- `Release`: verify, require npm publication on tags, publish to the MCP Registry after npm propagation, and then create the GitHub release
|
|
37
|
+
|
|
38
|
+
## npm
|
|
39
|
+
|
|
40
|
+
Package name:
|
|
41
|
+
|
|
42
|
+
```text
|
|
43
|
+
web-task-api
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Published package page:
|
|
47
|
+
|
|
48
|
+
```text
|
|
49
|
+
https://www.npmjs.com/package/web-task-api
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## MCP Registry
|
|
53
|
+
|
|
54
|
+
Published registry name:
|
|
55
|
+
|
|
56
|
+
```text
|
|
57
|
+
io.github.rich-jojo/web-task-api
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## References
|
|
61
|
+
|
|
62
|
+
[^1]: `server.json` is the MCP registry source of truth for npm package mapping and runtime environment metadata.
|
package/package.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "web-task-api",
|
|
3
|
+
"mcpName": "io.github.rich-jojo/web-task-api",
|
|
4
|
+
"version": "0.2.1",
|
|
5
|
+
"packageManager": "npm@10.8.2",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"description": "General browser-task API that lets agents read and act on websites through a single runtime.",
|
|
8
|
+
"homepage": "https://github.com/rich-jojo/web-task-api",
|
|
9
|
+
"bugs": {
|
|
10
|
+
"url": "https://github.com/rich-jojo/web-task-api/issues"
|
|
11
|
+
},
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "git+https://github.com/rich-jojo/web-task-api.git"
|
|
15
|
+
},
|
|
16
|
+
"main": "./dist/src/lib.js",
|
|
17
|
+
"exports": {
|
|
18
|
+
".": "./dist/src/lib.js",
|
|
19
|
+
"./mcp": "./dist/src/mcp-server.js"
|
|
20
|
+
},
|
|
21
|
+
"bin": {
|
|
22
|
+
"web-task-api": "dist/src/mcp.js",
|
|
23
|
+
"web-task-api-http": "dist/src/index.js"
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"dist",
|
|
27
|
+
"README.md",
|
|
28
|
+
"CHANGELOG.md",
|
|
29
|
+
"docs/design.md",
|
|
30
|
+
"docs/releasing.md",
|
|
31
|
+
"server.json",
|
|
32
|
+
"recipes"
|
|
33
|
+
],
|
|
34
|
+
"engines": {
|
|
35
|
+
"node": ">=22"
|
|
36
|
+
},
|
|
37
|
+
"scripts": {
|
|
38
|
+
"build": "tsc -p tsconfig.json",
|
|
39
|
+
"dev": "tsx src/index.ts",
|
|
40
|
+
"dev:mcp": "tsx src/mcp.ts",
|
|
41
|
+
"start": "node dist/src/index.js",
|
|
42
|
+
"start:mcp": "node dist/src/mcp.js",
|
|
43
|
+
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
44
|
+
"test": "node --import tsx --test tests/**/*.test.ts",
|
|
45
|
+
"check:meta": "node scripts/check-metadata.mjs",
|
|
46
|
+
"check:dist": "node scripts/check-dist-mcp.mjs",
|
|
47
|
+
"check": "npm run typecheck && npm run test && npm run build && npm run check:meta && npm run check:dist",
|
|
48
|
+
"playwright:install": "playwright install chromium",
|
|
49
|
+
"profile:login": "tsx scripts/profile-login.ts",
|
|
50
|
+
"demo": "tsx scripts/demo.ts",
|
|
51
|
+
"prepack": "npm run build"
|
|
52
|
+
},
|
|
53
|
+
"keywords": [
|
|
54
|
+
"mcp",
|
|
55
|
+
"browser-automation",
|
|
56
|
+
"playwright",
|
|
57
|
+
"web-tasks",
|
|
58
|
+
"structured-output",
|
|
59
|
+
"claude-code",
|
|
60
|
+
"opencode"
|
|
61
|
+
],
|
|
62
|
+
"author": "rich-jojo",
|
|
63
|
+
"dependencies": {
|
|
64
|
+
"@modelcontextprotocol/sdk": "1.18.1",
|
|
65
|
+
"@opencode-ai/sdk": "1.2.27",
|
|
66
|
+
"ajv": "8.17.1",
|
|
67
|
+
"ajv-formats": "3.0.1",
|
|
68
|
+
"dotenv": "16.4.7",
|
|
69
|
+
"fastify": "5.2.1",
|
|
70
|
+
"playwright": "1.52.0",
|
|
71
|
+
"zod": "3.24.2"
|
|
72
|
+
},
|
|
73
|
+
"devDependencies": {
|
|
74
|
+
"@types/node": "24.0.0",
|
|
75
|
+
"tsx": "4.19.3",
|
|
76
|
+
"typescript": "5.8.3"
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "dexscreener-token-read",
|
|
3
|
+
"name": "Dexscreener token read",
|
|
4
|
+
"description": "Read token or pair details from Dexscreener pages. Best used with a warmed persistent browser profile if anti-bot checks appear.",
|
|
5
|
+
"urlPatterns": ["dexscreener.com"],
|
|
6
|
+
"browserHints": {
|
|
7
|
+
"preferChrome": true,
|
|
8
|
+
"preferHeadful": true,
|
|
9
|
+
"preferPersistentProfile": true
|
|
10
|
+
},
|
|
11
|
+
"promptHints": [
|
|
12
|
+
"This site may present bot checks to fresh sessions; if the page is blocked, fail clearly and recommend running with a persistent profile.",
|
|
13
|
+
"Look for token, pair, chain, price, liquidity, volume, FDV, market cap, and contract address in visible page text.",
|
|
14
|
+
"Prefer read_page once the asset panel is visible instead of many small clicks."
|
|
15
|
+
],
|
|
16
|
+
"inputAliases": {
|
|
17
|
+
"query": ["symbol", "token", "pair", "contract"]
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "fixture-catalog",
|
|
3
|
+
"name": "Fixture catalog demo",
|
|
4
|
+
"description": "Optimized hints for the local demo catalog site.",
|
|
5
|
+
"urlPatterns": ["/", "/result"],
|
|
6
|
+
"promptHints": [
|
|
7
|
+
"The landing page has one query field and a Search button.",
|
|
8
|
+
"The result page contains Product, Price, and Stock lines in visible text.",
|
|
9
|
+
"When those lines appear, use them to finish the task."
|
|
10
|
+
],
|
|
11
|
+
"inputAliases": {
|
|
12
|
+
"query": ["query"]
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "generic-search",
|
|
3
|
+
"name": "Generic search flow",
|
|
4
|
+
"description": "Use when a page looks like a simple search or lookup form.",
|
|
5
|
+
"urlPatterns": [],
|
|
6
|
+
"promptHints": [
|
|
7
|
+
"If there is a single obvious search or query field, fill it from the input.",
|
|
8
|
+
"After filling the query, click the main submit/search button or press Enter.",
|
|
9
|
+
"Once result details are visible, read the page and finish with structured output."
|
|
10
|
+
],
|
|
11
|
+
"inputAliases": {
|
|
12
|
+
"query": ["search", "term", "q", "keyword"]
|
|
13
|
+
}
|
|
14
|
+
}
|