web-task-api 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/CHANGELOG.md +25 -0
  2. package/README.md +284 -0
  3. package/dist/scripts/demo.d.ts +1 -0
  4. package/dist/scripts/demo.js +32 -0
  5. package/dist/scripts/demo.js.map +1 -0
  6. package/dist/scripts/profile-login.d.ts +1 -0
  7. package/dist/scripts/profile-login.js +38 -0
  8. package/dist/scripts/profile-login.js.map +1 -0
  9. package/dist/src/agents/auto-agent.d.ts +22 -0
  10. package/dist/src/agents/auto-agent.js +54 -0
  11. package/dist/src/agents/auto-agent.js.map +1 -0
  12. package/dist/src/agents/cliproxy-agent.d.ts +18 -0
  13. package/dist/src/agents/cliproxy-agent.js +137 -0
  14. package/dist/src/agents/cliproxy-agent.js.map +1 -0
  15. package/dist/src/agents/index.d.ts +2 -0
  16. package/dist/src/agents/index.js +17 -0
  17. package/dist/src/agents/index.js.map +1 -0
  18. package/dist/src/agents/mock-agent.d.ts +15 -0
  19. package/dist/src/agents/mock-agent.js +132 -0
  20. package/dist/src/agents/mock-agent.js.map +1 -0
  21. package/dist/src/agents/opencode-agent.d.ts +20 -0
  22. package/dist/src/agents/opencode-agent.js +122 -0
  23. package/dist/src/agents/opencode-agent.js.map +1 -0
  24. package/dist/src/agents/planner-prompt.d.ts +6 -0
  25. package/dist/src/agents/planner-prompt.js +116 -0
  26. package/dist/src/agents/planner-prompt.js.map +1 -0
  27. package/dist/src/browser/session.d.ts +41 -0
  28. package/dist/src/browser/session.js +267 -0
  29. package/dist/src/browser/session.js.map +1 -0
  30. package/dist/src/client.d.ts +44 -0
  31. package/dist/src/client.js +59 -0
  32. package/dist/src/client.js.map +1 -0
  33. package/dist/src/config.d.ts +16 -0
  34. package/dist/src/config.js +18 -0
  35. package/dist/src/config.js.map +1 -0
  36. package/dist/src/index.d.ts +2 -0
  37. package/dist/src/index.js +15 -0
  38. package/dist/src/index.js.map +1 -0
  39. package/dist/src/lib.d.ts +6 -0
  40. package/dist/src/lib.js +5 -0
  41. package/dist/src/lib.js.map +1 -0
  42. package/dist/src/mcp-server.d.ts +3 -0
  43. package/dist/src/mcp-server.js +191 -0
  44. package/dist/src/mcp-server.js.map +1 -0
  45. package/dist/src/mcp.d.ts +2 -0
  46. package/dist/src/mcp.js +14 -0
  47. package/dist/src/mcp.js.map +1 -0
  48. package/dist/src/recipes/registry.d.ts +21 -0
  49. package/dist/src/recipes/registry.js +38 -0
  50. package/dist/src/recipes/registry.js.map +1 -0
  51. package/dist/src/server/app.d.ts +5 -0
  52. package/dist/src/server/app.js +89 -0
  53. package/dist/src/server/app.js.map +1 -0
  54. package/dist/src/sessions/store.d.ts +48 -0
  55. package/dist/src/sessions/store.js +84 -0
  56. package/dist/src/sessions/store.js.map +1 -0
  57. package/dist/src/storage/run-store.d.ts +12 -0
  58. package/dist/src/storage/run-store.js +30 -0
  59. package/dist/src/storage/run-store.js.map +1 -0
  60. package/dist/src/tasks/errors.d.ts +5 -0
  61. package/dist/src/tasks/errors.js +11 -0
  62. package/dist/src/tasks/errors.js.map +1 -0
  63. package/dist/src/tasks/output-validator.d.ts +1 -0
  64. package/dist/src/tasks/output-validator.js +21 -0
  65. package/dist/src/tasks/output-validator.js.map +1 -0
  66. package/dist/src/tasks/runner.d.ts +38 -0
  67. package/dist/src/tasks/runner.js +236 -0
  68. package/dist/src/tasks/runner.js.map +1 -0
  69. package/dist/src/tasks/schemas.d.ts +266 -0
  70. package/dist/src/tasks/schemas.js +67 -0
  71. package/dist/src/tasks/schemas.js.map +1 -0
  72. package/dist/tests/agent-adapters.test.d.ts +1 -0
  73. package/dist/tests/agent-adapters.test.js +87 -0
  74. package/dist/tests/agent-adapters.test.js.map +1 -0
  75. package/dist/tests/agent-selection.test.d.ts +1 -0
  76. package/dist/tests/agent-selection.test.js +26 -0
  77. package/dist/tests/agent-selection.test.js.map +1 -0
  78. package/dist/tests/auto-agent.test.d.ts +1 -0
  79. package/dist/tests/auto-agent.test.js +86 -0
  80. package/dist/tests/auto-agent.test.js.map +1 -0
  81. package/dist/tests/browser-session.test.d.ts +1 -0
  82. package/dist/tests/browser-session.test.js +41 -0
  83. package/dist/tests/browser-session.test.js.map +1 -0
  84. package/dist/tests/client.test.d.ts +1 -0
  85. package/dist/tests/client.test.js +35 -0
  86. package/dist/tests/client.test.js.map +1 -0
  87. package/dist/tests/fixture-site.d.ts +6 -0
  88. package/dist/tests/fixture-site.js +93 -0
  89. package/dist/tests/fixture-site.js.map +1 -0
  90. package/dist/tests/mcp.test.d.ts +1 -0
  91. package/dist/tests/mcp.test.js +186 -0
  92. package/dist/tests/mcp.test.js.map +1 -0
  93. package/dist/tests/output-validator.test.d.ts +1 -0
  94. package/dist/tests/output-validator.test.js +27 -0
  95. package/dist/tests/output-validator.test.js.map +1 -0
  96. package/dist/tests/request-validation.test.d.ts +1 -0
  97. package/dist/tests/request-validation.test.js +25 -0
  98. package/dist/tests/request-validation.test.js.map +1 -0
  99. package/dist/tests/runner-options.test.d.ts +1 -0
  100. package/dist/tests/runner-options.test.js +44 -0
  101. package/dist/tests/runner-options.test.js.map +1 -0
  102. package/dist/tests/session-api.test.d.ts +1 -0
  103. package/dist/tests/session-api.test.js +244 -0
  104. package/dist/tests/session-api.test.js.map +1 -0
  105. package/dist/tests/session-client.test.d.ts +1 -0
  106. package/dist/tests/session-client.test.js +28 -0
  107. package/dist/tests/session-client.test.js.map +1 -0
  108. package/dist/tests/task-api-failure.test.d.ts +1 -0
  109. package/dist/tests/task-api-failure.test.js +39 -0
  110. package/dist/tests/task-api-failure.test.js.map +1 -0
  111. package/dist/tests/task-api.test.d.ts +1 -0
  112. package/dist/tests/task-api.test.js +50 -0
  113. package/dist/tests/task-api.test.js.map +1 -0
  114. package/docs/design.md +513 -0
  115. package/docs/releasing.md +62 -0
  116. package/package.json +78 -0
  117. package/recipes/dexscreener-token-read.json +19 -0
  118. package/recipes/fixture-catalog.json +14 -0
  119. package/recipes/generic-search.json +14 -0
  120. package/recipes/gmgn-token-read.json +19 -0
  121. package/server.json +79 -0
package/docs/design.md ADDED
@@ -0,0 +1,513 @@
1
+ # Web Task API Design Doc
2
+
3
+ ## Summary
4
+
5
+ Build a generalized browser-task platform inside our projects that turns websites into outcome-oriented APIs. Instead of hardcoding one adapter per site, the system runs a real browser, lets an agent choose actions from structured browser tools, validates output against a schema, and stores artifacts for replay and debugging.
6
+
7
+ This MVP intentionally favors the architecture we actually want long term:
8
+
9
+ - agent-first execution
10
+ - browser as the substrate
11
+ - recipes as optimization, not the default
12
+ - login/profile reuse
13
+ - task-level JSON results
14
+ - strong traces and verification
15
+
16
+ ## Problem
17
+
18
+ Many useful websites have no API, weak APIs, or UI-only features. Per-site scrapers and automations work short term but do not scale:
19
+
20
+ - selectors break
21
+ - every site needs its own code path
22
+ - login/session handling becomes duplicated
23
+ - observability and replay are inconsistent
24
+ - unknown future websites require fresh engineering every time
25
+
26
+ We want a common layer that lets our projects say:
27
+
28
+ > Start from this URL, achieve this goal, return typed JSON.
29
+
30
+ ## Goals
31
+
32
+ 1. Expose a single task API for “read” and “act” use cases.
33
+ 2. Run against a real browser with persistent login profiles.
34
+ 3. Support freeform agent control without shipping per-site code first.
35
+ 4. Validate outputs against a supplied JSON schema.
36
+ 5. Persist step traces, screenshots, and final artifacts.
37
+ 6. Allow repeated high-value flows to be promoted into recipes later.
38
+ 7. Keep the runtime framework-light and provider-agnostic.
39
+ 8. Preserve browser/session state across related tasks so agents can chain work over time.
40
+
41
+ ## Non-goals for MVP
42
+
43
+ - distributed job queue
44
+ - multi-tenant billing/quotas
45
+ - residential proxy/captcha infrastructure
46
+ - human-in-the-loop approvals
47
+ - production-grade secret vault
48
+ - full workflow scheduling/webhooks
49
+
50
+ Those are expected future layers, not blockers for proving the core architecture.
51
+
52
+ ## Key decisions
53
+
54
+ ### 0) Session continuity is a first-class product feature
55
+
56
+ Users do not always want one isolated task. Real workflows look like:
57
+
58
+ - open Axiom with an authenticated profile
59
+ - inspect a token on GMGN as a guest
60
+ - carry findings into a later action task
61
+
62
+ So the product needs durable session records that survive across task runs, not just durable browser profiles.
63
+
64
+ ## Session architecture choices
65
+
66
+ ### Choice A — Stateless tasks only
67
+
68
+ - every task fully self-contained
69
+ - browser state supplied ad hoc with `profile`
70
+ - no cross-task context
71
+
72
+ Pros:
73
+
74
+ - simplest implementation
75
+ - easy horizontal scaling
76
+
77
+ Cons:
78
+
79
+ - weak for real agent workflows
80
+ - hard to chain research -> action -> verification
81
+ - no durable task memory except raw run files
82
+
83
+ ### Choice B — Named sessions backed by file-backed metadata and browser profiles **(chosen)**
84
+
85
+ - create a session record once
86
+ - bind optional profile, start URL, default agent config, and notes
87
+ - append compact task history after each run
88
+ - allow future tasks to refer to `sessionId`
89
+
90
+ Pros:
91
+
92
+ - simple enough for local/product usage now
93
+ - supports guest sessions and authenticated/profile sessions
94
+ - enables connected tasks without building a full workflow engine
95
+
96
+ Cons:
97
+
98
+ - file-backed state is single-machine scoped
99
+ - not yet multi-worker safe
100
+
101
+ ### Choice C — Full workflow engine with long-lived browser workers
102
+
103
+ - queue-backed sessions
104
+ - pinned worker/browser lifecycle
105
+ - richer inter-task memory and live state
106
+
107
+ Pros:
108
+
109
+ - strongest long-term orchestration model
110
+
111
+ Cons:
112
+
113
+ - too much infrastructure for this stage
114
+ - would slow delivery of the core product
115
+
116
+ ### Chosen approach
117
+
118
+ Choose **B** now:
119
+
120
+ - it gives real cross-task continuity
121
+ - it composes with persistent browser profiles
122
+ - it can later evolve toward C without breaking the task API
123
+
124
+ ### 1) Agent-first, recipe-assisted architecture
125
+
126
+ Default behavior is goal-driven browser control. Recipes are optional overlays that add hints, matching, and reusable assertions. This avoids recreating the brittle adapter trap.
127
+
128
+ ### 2) Thin custom orchestrator over large agent frameworks
129
+
130
+ There is no universal “golden standard” agent framework that is both simple and future-proof. For the MVP we use a small internal loop:
131
+
132
+ - provider adapter
133
+ - browser tools
134
+ - run state
135
+ - structured task contract
136
+
137
+ Why not make LangChain the core?
138
+
139
+ - too much framework gravity for the moat we actually care about
140
+ - browser/session/runtime quality matters more than chain abstractions
141
+ - easier to keep provider compatibility with our own small interface
142
+
143
+ ### 3) Pluggable planner backends: CLIProxyAPI first, OpenCode optional
144
+
145
+ The preferred production path uses CLIProxyAPI-managed auth and model routing, while keeping OpenCode as an optional adapter.
146
+
147
+ Why this split:
148
+
149
+ - OpenCode is powerful, but it is coding-native and should not be the only foundation for a general web agent platform.
150
+ - CLIProxyAPI is a better auth/routing layer for general LLM access.
151
+ - We still want OpenCode available where it already fits the local stack well.
152
+
153
+ CLIProxy path gives:
154
+
155
+ - proxy-managed auth instead of vendor-specific API keys
156
+ - one place to swap models/providers
157
+ - easy reuse of existing CLI/OAuth-backed setups
158
+
159
+ Important nuance: CLIProxyAPI is not treated here as “one API key for one provider”. It is treated as a routing/auth layer that may expose multiple providers, multiple accounts, and model aliases behind one compatible endpoint.
160
+
161
+ OpenCode path gives:
162
+
163
+ - a local programmable agent runtime we already use
164
+ - support for headless/server usage through `@opencode-ai/sdk`
165
+ - structured JSON output via session prompt formatting
166
+ - compatibility with existing provider routing, including CLIProxyAPI-backed setups
167
+
168
+ For practical local operation, we also support an `auto` mode:
169
+
170
+ - probe CLIProxy first and use it when reachable/authenticated
171
+ - if no planner model alias is configured for CLIProxy, fall back to OpenCode instead of failing late
172
+ - otherwise fall back to OpenCode so existing local GPT/OAuth setup still works
173
+
174
+ This is specifically useful when model auth is already solved in the machine via OpenCode but we still want the product surface to stay general-web focused rather than OpenCode-centric.
175
+
176
+ The code isolates this behind an `AgentAdapter` interface so other planners can still be added later.
177
+
178
+ ### 4) Deterministic mock agent for tests and demos
179
+
180
+ We need end-to-end verification without external credentials. The MVP includes a local mock agent that can drive semantically-labeled pages and extract structured data from a fixture site. This proves the whole stack works now.
181
+
182
+ ### 5) Element IDs instead of exposing selectors to the model
183
+
184
+ The snapshot tool returns normalized interactive elements with generated IDs. The agent acts on `elementId`, not raw selectors. Internally we still keep the selector/path mapping, but the model contract stays higher level.
185
+
186
+ ## System overview
187
+
188
+ ```text
189
+ HTTP task request
190
+ -> TaskRunner
191
+ -> RecipeRegistry resolve/match
192
+ -> BrowserSession bootstrap (persistent profile + browser hardening)
193
+ -> Agent loop
194
+ -> snapshot current page
195
+ -> agent selects browser tool
196
+ -> runtime executes tool
197
+ -> loop until finish/fail
198
+ -> JSON schema validation
199
+ -> artifacts + run record persisted
200
+ -> HTTP response
201
+ ```
202
+
203
+ ## Main components
204
+
205
+ ### API server
206
+
207
+ Fastify server with endpoints for:
208
+
209
+ - `GET /health`
210
+ - `GET /v1/recipes`
211
+ - `GET /v1/sessions`
212
+ - `POST /v1/sessions`
213
+ - `GET /v1/sessions/:sessionId`
214
+ - `PATCH /v1/sessions/:sessionId`
215
+ - `POST /v1/tasks/run`
216
+ - `GET /v1/tasks/:taskId`
217
+
218
+ ### Task runner
219
+
220
+ Owns the end-to-end execution lifecycle:
221
+
222
+ - creates run directory
223
+ - resolves recipe and session defaults
224
+ - starts browser session
225
+ - drives the agent loop
226
+ - validates output
227
+ - stores result record and session history
228
+
229
+ ### Session store
230
+
231
+ File-backed session records with:
232
+
233
+ - session ID and name
234
+ - guest/profile mode
235
+ - optional bound browser profile
236
+ - optional default start URL
237
+ - optional default agent configuration
238
+ - notes
239
+ - compact recent task history
240
+
241
+ ### Browser session
242
+
243
+ Playwright wrapper that provides structured browser tools:
244
+
245
+ - navigate
246
+ - snapshot
247
+ - click element
248
+ - fill element
249
+ - press element
250
+ - select option
251
+ - wait for text
252
+ - read page text
253
+ - capture screenshot
254
+
255
+ ### Agent adapter
256
+
257
+ Interface:
258
+
259
+ - receive run state + tool results
260
+ - return assistant tool calls
261
+
262
+ Implementations:
263
+
264
+ - `CliProxyAgent`
265
+ - `OpencodeAgent`
266
+ - `MockAgent`
267
+
268
+ ### Recipe registry
269
+
270
+ Recipes are JSON definitions with:
271
+
272
+ - id/name/description
273
+ - URL matching hints
274
+ - prompt augmentations
275
+ - preferred input aliases
276
+ - optional completion assertions
277
+
278
+ Recipes do not replace the agent. They sharpen it.
279
+
280
+ ### File-backed run store
281
+
282
+ Stores:
283
+
284
+ - `runs/<taskId>/task.json`
285
+ - `runs/<taskId>/steps.jsonl`
286
+ - screenshots
287
+ - raw result/metadata
288
+
289
+ This is enough for local replay and debugging.
290
+
291
+ ## Request contract
292
+
293
+ ### Task request
294
+
295
+ ```json
296
+ {
297
+ "goal": "Search for banana and return product, price, and stock.",
298
+ "startUrl": "http://127.0.0.1:4010/",
299
+ "sessionId": "d8dd1a8f-0f31-4d6b-b6bb-0ff1452b9352",
300
+ "profile": "default",
301
+ "mode": "act",
302
+ "input": {
303
+ "query": "banana"
304
+ },
305
+ "outputSchema": {
306
+ "type": "object",
307
+ "required": ["product", "price", "stock"],
308
+ "properties": {
309
+ "product": { "type": "string" },
310
+ "price": { "type": "string" },
311
+ "stock": { "type": "string" }
312
+ }
313
+ },
314
+ "agent": {
315
+ "kind": "cliproxy"
316
+ },
317
+ "limits": {
318
+ "maxSteps": 12,
319
+ "timeoutMs": 90000,
320
+ "headless": true
321
+ }
322
+ }
323
+ ```
324
+
325
+ ### Task response
326
+
327
+ Includes:
328
+
329
+ - task ID
330
+ - status
331
+ - validated result
332
+ - summary trace
333
+ - matched recipe
334
+ - artifact directory
335
+ - timing metadata
336
+
337
+ ## Tool contract exposed to the agent
338
+
339
+ The agent receives structured tools, not arbitrary code execution.
340
+
341
+ ### `snapshot`
342
+
343
+ Returns:
344
+
345
+ - current URL/title
346
+ - text preview
347
+ - visible interactive elements with stable `elementId`s
348
+ - forms and semantic labels
349
+
350
+ ### `navigate`
351
+
352
+ Navigate to a URL.
353
+
354
+ ### `click`
355
+
356
+ Click a visible interactive element by `elementId`.
357
+
358
+ ### `fill`
359
+
360
+ Fill an input-like element by `elementId`.
361
+
362
+ ### `press`
363
+
364
+ Press a keyboard key against an element.
365
+
366
+ ### `select`
367
+
368
+ Select a value on a `<select>` element.
369
+
370
+ ### `wait_for_text`
371
+
372
+ Wait until expected text appears.
373
+
374
+ ### `read_page`
375
+
376
+ Read full visible text in a normalized form for extraction/verification.
377
+
378
+ ### `finish`
379
+
380
+ Return the structured result.
381
+
382
+ ### `fail`
383
+
384
+ Abort the run with a machine-readable reason.
385
+
386
+ ## Safety model
387
+
388
+ The MVP keeps safety simple and explicit:
389
+
390
+ - tool sandbox is browser-only; no arbitrary shell/file tools exposed to the model
391
+ - max step limit
392
+ - wall-clock timeout
393
+ - output schema validation
394
+ - task trace for auditing
395
+ - manual login bootstrap instead of storing credentials in the request
396
+
397
+ ## Observability
398
+
399
+ Each run stores:
400
+
401
+ - request metadata
402
+ - step-level tool calls/results
403
+ - page screenshots
404
+ - final JSON result or failure reason
405
+
406
+ This is critical because browser agents fail in ways that need replay, not just logs.
407
+
408
+ ## Real-world constraint discovered during validation
409
+
410
+ Public targets like Dexscreener and GMGN are currently fronted by Cloudflare/bot protection from fresh headless sessions in this environment. That means a serious product cannot assume every public site is immediately automatable with a brand-new clean headless browser.
411
+
412
+ So the product design now explicitly supports three continuity paths:
413
+
414
+ - named persistent profiles for authenticated sites
415
+ - `BROWSER_USER_DATA_DIR` for “use my real Chrome profile” behavior
416
+ - session-bound browser storage for guest workflows that still need continuity across tasks
417
+
418
+ The bundled GMGN/Dexscreener recipes should therefore be treated as starter recipes that depend on warmed browser state, not as guaranteed out-of-the-box site integrations. Protected-site recipes now explicitly require either a named persistent profile, `BROWSER_USER_DATA_DIR`, or a **warmed** `sessionId` so the runtime does not pretend a fresh temp browser will behave like a human’s already-warmed Chrome profile.
419
+
420
+ ## Planned directory structure
421
+
422
+ ```text
423
+ src/
424
+ agents/
425
+ browser/
426
+ recipes/
427
+ sessions/
428
+ server/
429
+ storage/
430
+ tasks/
431
+ tests/
432
+ fixtures/
433
+ scripts/
434
+ docs/
435
+ examples/
436
+ profiles/
437
+ runs/
438
+ ```
439
+
440
+ ## Detailed implementation plan
441
+
442
+ ### Phase 1 — Core contracts
443
+
444
+ 1. Define request/response schemas.
445
+ 2. Build file-backed run store.
446
+ 3. Build recipe registry.
447
+ 4. Build Fastify app + routes.
448
+
449
+ ### Phase 2 — Browser runtime
450
+
451
+ 1. Wrap Playwright launch/context/page.
452
+ 2. Add snapshot extraction with element IDs.
453
+ 3. Add browser tools and error handling.
454
+ 4. Persist screenshots and basic metrics.
455
+
456
+ ### Phase 3 — Agent loop
457
+
458
+ 1. Define provider-neutral `AgentAdapter`.
459
+ 2. Implement CLIProxy planner adapter.
460
+ 3. Implement OpenCode adapter as an optional backend.
461
+ 4. Implement mock adapter.
462
+ 4. Add system prompt and loop orchestration.
463
+
464
+ ### Phase 4 — Profiles, examples, docs
465
+
466
+ 1. Add manual login bootstrap script.
467
+ 2. Add demo script.
468
+ 3. Add sample recipe and example request.
469
+ 4. Write README and operating notes.
470
+
471
+ ### Phase 5 — Session continuity
472
+
473
+ 1. Add file-backed session store.
474
+ 2. Add session create/read/update/list endpoints.
475
+ 3. Merge session defaults into task runs.
476
+ 4. Append compact task history back into sessions.
477
+
478
+ ### Phase 6 — Verification
479
+
480
+ 1. Create local fixture site.
481
+ 2. Run end-to-end task against fixture site.
482
+ 3. Verify schema validation and stored artifacts.
483
+ 4. Verify session-backed connected tasks.
484
+ 5. Run typecheck, tests, and build.
485
+
486
+ ## Future roadmap
487
+
488
+ After the MVP proves the architecture, the highest-value next layers are:
489
+
490
+ 1. async queue + webhook completion
491
+ 2. profile/session service with encrypted secrets
492
+ 3. browser pools and worker isolation
493
+ 4. stronger DOM understanding and recovery policies
494
+ 5. proxy and anti-detection layer
495
+ 6. recipe learning/promotion from successful runs
496
+ 7. policy/approval layer for sensitive actions
497
+ 8. multi-tenant controls, quotas, and billing
498
+
499
+ ## Why this design is the right starting point
500
+
501
+ It gives us a working generalized browser API now, without prematurely locking ourselves into:
502
+
503
+ - site-specific adapters
504
+ - heavyweight orchestration frameworks
505
+ - infra we do not yet need
506
+
507
+ At the same time, it leaves clean upgrade paths for the pieces that actually become moats later: reliability, session management, recipes, verification, and replay.
508
+
509
+ ## References
510
+
511
+ [^1]: API Everything homepage, accessed 2026-03-26, for the core product framing of “read + act through one API.”
512
+ [^2]: Playwright documentation and common industry practice for browser automation runtimes.
513
+ [^3]: OpenCode SDK docs, accessed 2026-03-26, for headless server access, session prompting, and structured JSON output.
@@ -0,0 +1,62 @@
1
+ # Releasing
2
+
3
+ ## Versioning
4
+
5
+ This project uses tagged releases.
6
+
7
+ Recommended release flow:
8
+
9
+ 1. update `CHANGELOG.md`
10
+ 2. bump `package.json`, `server.json`, and any versioned examples or metadata together
11
+ 3. run `npm run check`
12
+ 4. commit the release
13
+ 5. push the release commit to `main`
14
+ 6. create and push a tag like `v0.2.0`
15
+
16
+ ```bash
17
+ git push origin main
18
+ git tag v0.2.0
19
+ git push origin v0.2.0
20
+ ```
21
+
22
+ The tag is the release trigger. Once the tag is pushed, GitHub Actions owns npm publication, MCP registry publication, and GitHub release creation. This flow assumes `NPM_TOKEN` is configured for tagged releases; if it is missing, the release workflow now fails loudly instead of pretending npm publish is optional.
23
+
24
+ Versioned files checked by CI:
25
+
26
+ - `package.json`
27
+ - `server.json`
28
+
29
+ `src/mcp.ts` reads the runtime version from `package.json`, so there is no extra hard-coded runtime version to bump.
30
+
31
+ If you forget one of the versioned files, `npm run check:meta` should fail before release. `npm run check:dist` also proves the built MCP entrypoint starts and responds before a release ships.
32
+
33
+ ## What GitHub Actions does
34
+
35
+ - `CI`: typecheck, tests, build, metadata checks, package dry-run
36
+ - `Release`: verify, require npm publication on tags, publish to the MCP Registry after npm propagation, and then create the GitHub release
37
+
38
+ ## npm
39
+
40
+ Package name:
41
+
42
+ ```text
43
+ web-task-api
44
+ ```
45
+
46
+ Published package page:
47
+
48
+ ```text
49
+ https://www.npmjs.com/package/web-task-api
50
+ ```
51
+
52
+ ## MCP Registry
53
+
54
+ Published registry name:
55
+
56
+ ```text
57
+ io.github.rich-jojo/web-task-api
58
+ ```
59
+
60
+ ## References
61
+
62
+ [^1]: `server.json` is the MCP registry source of truth for npm package mapping and runtime environment metadata.
package/package.json ADDED
@@ -0,0 +1,78 @@
1
+ {
2
+ "name": "web-task-api",
3
+ "mcpName": "io.github.rich-jojo/web-task-api",
4
+ "version": "0.2.1",
5
+ "packageManager": "npm@10.8.2",
6
+ "type": "module",
7
+ "description": "General browser-task API that lets agents read and act on websites through a single runtime.",
8
+ "homepage": "https://github.com/rich-jojo/web-task-api",
9
+ "bugs": {
10
+ "url": "https://github.com/rich-jojo/web-task-api/issues"
11
+ },
12
+ "repository": {
13
+ "type": "git",
14
+ "url": "git+https://github.com/rich-jojo/web-task-api.git"
15
+ },
16
+ "main": "./dist/src/lib.js",
17
+ "exports": {
18
+ ".": "./dist/src/lib.js",
19
+ "./mcp": "./dist/src/mcp-server.js"
20
+ },
21
+ "bin": {
22
+ "web-task-api": "dist/src/mcp.js",
23
+ "web-task-api-http": "dist/src/index.js"
24
+ },
25
+ "files": [
26
+ "dist",
27
+ "README.md",
28
+ "CHANGELOG.md",
29
+ "docs/design.md",
30
+ "docs/releasing.md",
31
+ "server.json",
32
+ "recipes"
33
+ ],
34
+ "engines": {
35
+ "node": ">=22"
36
+ },
37
+ "scripts": {
38
+ "build": "tsc -p tsconfig.json",
39
+ "dev": "tsx src/index.ts",
40
+ "dev:mcp": "tsx src/mcp.ts",
41
+ "start": "node dist/src/index.js",
42
+ "start:mcp": "node dist/src/mcp.js",
43
+ "typecheck": "tsc -p tsconfig.json --noEmit",
44
+ "test": "node --import tsx --test tests/**/*.test.ts",
45
+ "check:meta": "node scripts/check-metadata.mjs",
46
+ "check:dist": "node scripts/check-dist-mcp.mjs",
47
+ "check": "npm run typecheck && npm run test && npm run build && npm run check:meta && npm run check:dist",
48
+ "playwright:install": "playwright install chromium",
49
+ "profile:login": "tsx scripts/profile-login.ts",
50
+ "demo": "tsx scripts/demo.ts",
51
+ "prepack": "npm run build"
52
+ },
53
+ "keywords": [
54
+ "mcp",
55
+ "browser-automation",
56
+ "playwright",
57
+ "web-tasks",
58
+ "structured-output",
59
+ "claude-code",
60
+ "opencode"
61
+ ],
62
+ "author": "rich-jojo",
63
+ "dependencies": {
64
+ "@modelcontextprotocol/sdk": "1.18.1",
65
+ "@opencode-ai/sdk": "1.2.27",
66
+ "ajv": "8.17.1",
67
+ "ajv-formats": "3.0.1",
68
+ "dotenv": "16.4.7",
69
+ "fastify": "5.2.1",
70
+ "playwright": "1.52.0",
71
+ "zod": "3.24.2"
72
+ },
73
+ "devDependencies": {
74
+ "@types/node": "24.0.0",
75
+ "tsx": "4.19.3",
76
+ "typescript": "5.8.3"
77
+ }
78
+ }
@@ -0,0 +1,19 @@
1
+ {
2
+ "id": "dexscreener-token-read",
3
+ "name": "Dexscreener token read",
4
+ "description": "Read token or pair details from Dexscreener pages. Best used with a warmed persistent browser profile if anti-bot checks appear.",
5
+ "urlPatterns": ["dexscreener.com"],
6
+ "browserHints": {
7
+ "preferChrome": true,
8
+ "preferHeadful": true,
9
+ "preferPersistentProfile": true
10
+ },
11
+ "promptHints": [
12
+ "This site may present bot checks to fresh sessions; if the page is blocked, fail clearly and recommend running with a persistent profile.",
13
+ "Look for token, pair, chain, price, liquidity, volume, FDV, market cap, and contract address in visible page text.",
14
+ "Prefer read_page once the asset panel is visible instead of many small clicks."
15
+ ],
16
+ "inputAliases": {
17
+ "query": ["symbol", "token", "pair", "contract"]
18
+ }
19
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "id": "fixture-catalog",
3
+ "name": "Fixture catalog demo",
4
+ "description": "Optimized hints for the local demo catalog site.",
5
+ "urlPatterns": ["/", "/result"],
6
+ "promptHints": [
7
+ "The landing page has one query field and a Search button.",
8
+ "The result page contains Product, Price, and Stock lines in visible text.",
9
+ "When those lines appear, use them to finish the task."
10
+ ],
11
+ "inputAliases": {
12
+ "query": ["query"]
13
+ }
14
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "id": "generic-search",
3
+ "name": "Generic search flow",
4
+ "description": "Use when a page looks like a simple search or lookup form.",
5
+ "urlPatterns": [],
6
+ "promptHints": [
7
+ "If there is a single obvious search or query field, fill it from the input.",
8
+ "After filling the query, click the main submit/search button or press Enter.",
9
+ "Once result details are visible, read the page and finish with structured output."
10
+ ],
11
+ "inputAliases": {
12
+ "query": ["search", "term", "q", "keyword"]
13
+ }
14
+ }