@napster-corp/webmcp-toolkit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +531 -0
  3. package/bin/webmcp-toolkit.mjs +81 -0
  4. package/dist/debug.d.ts +5 -0
  5. package/dist/debug.d.ts.map +1 -0
  6. package/dist/debug.js +26 -0
  7. package/dist/debug.js.map +1 -0
  8. package/dist/dev-panel.d.ts +22 -0
  9. package/dist/dev-panel.d.ts.map +1 -0
  10. package/dist/dev-panel.js +1046 -0
  11. package/dist/dev-panel.js.map +1 -0
  12. package/dist/index.d.ts +6 -0
  13. package/dist/index.d.ts.map +1 -0
  14. package/dist/index.js +36 -0
  15. package/dist/index.js.map +1 -0
  16. package/dist/model-context.d.ts +13 -0
  17. package/dist/model-context.d.ts.map +1 -0
  18. package/dist/model-context.js +28 -0
  19. package/dist/model-context.js.map +1 -0
  20. package/dist/resources.d.ts +15 -0
  21. package/dist/resources.d.ts.map +1 -0
  22. package/dist/resources.js +179 -0
  23. package/dist/resources.js.map +1 -0
  24. package/dist/tiers.d.ts +31 -0
  25. package/dist/tiers.d.ts.map +1 -0
  26. package/dist/tiers.js +107 -0
  27. package/dist/tiers.js.map +1 -0
  28. package/dist/types.d.ts +145 -0
  29. package/dist/types.d.ts.map +1 -0
  30. package/dist/types.js +9 -0
  31. package/dist/types.js.map +1 -0
  32. package/hooks/post-commit +17 -0
  33. package/package.json +86 -0
  34. package/skills/add-edge-mcp-dev-panel/SKILL.md +206 -0
  35. package/skills/plan-capabilities-and-state/SKILL.md +168 -0
  36. package/skills/setup-edge-mcp/SKILL.md +546 -0
  37. package/skills/sync-webmcp-tools/SKILL.md +26 -0
  38. package/src/debug.ts +26 -0
  39. package/src/dev-panel.ts +1318 -0
  40. package/src/index.ts +66 -0
  41. package/src/model-context.ts +31 -0
  42. package/src/resources.ts +207 -0
  43. package/src/tiers.ts +132 -0
  44. package/src/types.ts +177 -0
  45. package/tools/generate-capabilities.mjs +266 -0
  46. package/tools/install-hook.mjs +81 -0
  47. package/tools/runners/anthropic.mjs +75 -0
  48. package/tools/runners/copilot.mjs +63 -0
@@ -0,0 +1,546 @@
1
+ ---
2
+ name: setup-edge-mcp
3
+ description: Wire WebMCP into an existing web app — register a curated, approved set of standard WebMCP tools (what the agent can DO) and live-state resources (what the agent can SHOW) against the app's real code. Use when the developer says "add an AI agent to my web app", "expose my app to an agent", "set up WebMCP", "agentify this app", or otherwise wants a coding agent to operate their existing app's real operations. Stops once the tools are registered and the developer has been offered the optional dev panel — the actual agent integration (Napster Omniagent or any other vendor SDK) is a separate step that reads the standard `document.modelContext` at runtime.
4
+ ---
5
+
6
+ # setup-edge-mcp
7
+
8
+ Wire WebMCP into an existing web app: a tiny in-browser layer that declares which of the app's real operations a connected agent is allowed to invoke and which state slices it is allowed to perceive. It lives in the same JavaScript runtime as the app's UI. Importing `@napster-corp/webmcp-toolkit` polyfills the WebMCP standard so `document.modelContext` exists and installs a live-state resource extension on it; when an agent SDK initializes in the same page, it reads `document.modelContext` directly — no glue code from the developer.
9
+
10
+ This skill stops once the tools are registered and verified. Connecting an actual agent (Napster's Omniagent, or any other vendor that supports WebMCP) is a separate step handled by that vendor's own skills or SDK.
11
+
12
+ ## 0. Confirm you're reading the real target app
13
+
14
+ Before any code: skim the actual source — components, the store, the service calls the UI makes. Don't trust ambient context files (`CLAUDE.md`, READMEs, docs) if they describe a different project or contradict the code; the running code is the source of truth, and a stale context file has sent past runs down the wrong path.
15
+
16
+ ## 1. Run the planning skill
17
+
18
+ Setting up WebMCP is a **one-time act per app**: you plan what to expose, then you register it. Planning is the first half of that act — not an optional preamble.
19
+
20
+ **Invoke the `plan-capabilities-and-state` skill before doing anything else.** It analyzes the codebase, proposes a starter plan, and walks the developer through it until approved. The plan covers:
21
+
22
+ - The high-value workflows the agent should support
23
+ - The tool list (each with its side-effect tier and idempotency flag)
24
+ - The resource list (each with its out-of-band justification)
25
+ - The deliberate withholds
26
+
27
+ When the planning skill returns, control comes back here and continues at step 2 with an approved plan in hand.
28
+
29
+ **Speak the app's language, not WebMCP's.** Frame every question during planning in the app's own terms. Ask "should the agent be able to see the cart after the user edits it by hand?", not "should we lift the cart's component state into an observable store and add a resource subscriber?" Translate the mechanics yourself; the developer shouldn't have to learn WebMCP to answer.
30
+
31
+ ## 2. Install the package and create the `src/webmcp/` folder
32
+
33
+ ```bash
34
+ npm install @napster-corp/webmcp-toolkit
35
+ ```
36
+
37
+ Importing the package is a browser-only side effect: it polyfills the WebMCP standard so `document.modelContext` exists, and installs a live-state resource extension on it. Everything WebMCP-related lives in `src/webmcp/` (or wherever the app keeps app-wide modules — `src/lib/webmcp/`, `app/lib/webmcp/`, etc.). The folder contains three files, mirroring the two-sided distinction at the core of the integration (what the agent can DO vs what the agent can SEE):
38
+
39
+ ```
40
+ src/webmcp/
41
+ ├── index.ts ← imports the toolkit (installs the polyfill), then imports ./tools and ./resources
42
+ ├── tools.ts ← every registerTool / registerStatefulTool call
43
+ └── resources.ts ← every registerResource call
44
+ ```
45
+
46
+ No "which file does this go in" decision — the type of registration tells you. No domain splits, no further subfolders. If a file gets long, it stays long; the surface is a flat list of registrations and a single file per kind makes it auditable in one read.
47
+
48
+ A fourth file, `dev-panel.ts`, appears later only if the developer opts into the optional `add-edge-mcp-dev-panel` skill. The setup itself never creates it.
49
+
50
+ ### `src/webmcp/index.ts`
51
+
52
+ The orchestration file. Importing the toolkit installs the polyfill on `document.modelContext`; importing the two leaf files runs their registrations.
53
+
54
+ ```ts
55
+ // src/webmcp/index.ts
56
+ import '@napster-corp/webmcp-toolkit'; // installs the WebMCP polyfill + resource extension on document.modelContext
57
+ import './tools';
58
+ import './resources';
59
+ ```
60
+
61
+ Import the toolkit **first**, before `./tools` and `./resources`. The leaf files call `document.modelContext.registerTool(...)` and `registerResource(...)` at module load, so the polyfill must already be installed when they run. Listing the toolkit import above them in `index.ts` guarantees that order.
62
+
63
+ ### `src/webmcp/tools.ts`
64
+
65
+ Every `document.modelContext.registerTool(...)` and `registerStatefulTool(...)` call. No resources in this file.
66
+
67
+ ```ts
68
+ // src/webmcp/tools.ts
69
+ import { registerStatefulTool } from '@napster-corp/webmcp-toolkit';
70
+ import { searchProducts } from '../api/products';
71
+ import { cartStore } from '../features/cart/store';
72
+
73
+ document.modelContext.registerTool({
74
+ name: 'products.search',
75
+ description: 'Search the catalog and return matching products.',
76
+ inputSchema: {
77
+ type: 'object',
78
+ properties: { query: { type: 'string' } },
79
+ required: ['query'],
80
+ },
81
+ annotations: { readOnlyHint: true },
82
+ async execute({ query }) {
83
+ const results = await searchProducts(query);
84
+ return { content: [{ type: 'text', text: JSON.stringify(results) }] };
85
+ },
86
+ });
87
+
88
+ registerStatefulTool({
89
+ name: 'cart.add',
90
+ description: 'Add a product to the cart.',
91
+ inputSchema: {
92
+ type: 'object',
93
+ properties: {
94
+ productId: { type: 'string' },
95
+ qty: { type: 'integer', minimum: 1 },
96
+ },
97
+ required: ['productId', 'qty'],
98
+ },
99
+ napsterTier: 'reversible',
100
+ async execute({ productId, qty }) {
101
+ await cartStore.addLine(productId, qty);
102
+ return { content: [{ type: 'text', text: 'Added to cart.' }] };
103
+ },
104
+ });
105
+
106
+ // ...every other tool here.
107
+ ```
108
+
109
+ ### `src/webmcp/resources.ts`
110
+
111
+ Every `registerResource` call. Resources are the exception, not the rule (see step 4) — this file is often short, sometimes empty. An empty file (just the toolkit's import side effect, no registrations) is the right outcome when the gate excludes everything; leave a one-line comment explaining the absence.
112
+
113
+ ```ts
114
+ // src/webmcp/resources.ts
115
+ import { registerResource } from '@napster-corp/webmcp-toolkit';
116
+ import { cartStore } from '../features/cart/store';
117
+
118
+ registerResource({
119
+ uri: 'state://cart',
120
+ name: 'cart',
121
+ get: () => cartStore.getCurrent(),
122
+ subscribe: (onChange) => cartStore.subscribe(onChange),
123
+ });
124
+ ```
125
+
126
+ ### Wire the folder into the app's entry point
127
+
128
+ The app's entry point imports the folder once. That single import installs the polyfill and runs every registration at startup.
129
+
130
+ ```ts
131
+ // src/main.ts (or whatever the app's entry file is)
132
+ import './webmcp';
133
+ ```
134
+
135
+ That single import is the only change to anything outside `src/webmcp/`.
136
+
137
+ #### Framework-specific entry-point wiring
138
+
139
+ The plain `import './webmcp';` above works for any bundler whose entry file already runs in the browser (Vite, vanilla Webpack, plain HTML + ES modules). For frameworks that mix server-side rendering with client hydration, the import has to live somewhere that runs **in the browser** — `document` doesn't exist on the server, so importing the toolkit there throws or no-ops and nothing registers.
140
+
141
+ **Next.js (App Router)** — the default `app/layout.tsx` is a server component. Putting `import './webmcp';` there runs on the server where `document` is undefined. Use a thin client wrapper:
142
+
143
+ ```tsx
144
+ // app/webmcp-loader.tsx
145
+ 'use client';
146
+
147
+ import { useEffect } from 'react';
148
+
149
+ export function WebMcpLoader() {
150
+ useEffect(() => {
151
+ void import('@/webmcp');
152
+ }, []);
153
+ return null;
154
+ }
155
+
156
+ // app/layout.tsx — server component, unchanged otherwise
157
+ import { WebMcpLoader } from './webmcp-loader';
158
+
159
+ export default function RootLayout({ children }: { children: React.ReactNode }) {
160
+ return (
161
+ <html>
162
+ <body>
163
+ <WebMcpLoader />
164
+ {children}
165
+ </body>
166
+ </html>
167
+ );
168
+ }
169
+ ```
170
+
171
+ The dynamic `import()` runs only on the client during hydration, so the polyfill installs and the tools register exactly where they should.
172
+
173
+ **Next.js (Pages Router)** — wrap the import in `useEffect` inside `pages/_app.tsx`:
174
+
175
+ ```tsx
176
+ useEffect(() => { void import('@/webmcp'); }, []);
177
+ ```
178
+
179
+ **Nuxt** — use a client plugin (the `.client` suffix gates it to the browser):
180
+
181
+ ```ts
182
+ // plugins/webmcp.client.ts
183
+ import '~/webmcp';
184
+ ```
185
+
186
+ **Remix / React Router** — same pattern as Next.js App Router: a small client component that `useEffect`s a dynamic import, mounted once at the top of the tree.
187
+
188
+ **Vite + React / Vue / Svelte / vanilla** — the plain `import './webmcp';` in `src/main.ts(x)` works as shown above with no wrapper needed.
189
+
190
+ **The polyfill installs once** — importing the toolkit a second time (under hot-module-reload, say) reuses the already-installed `document.modelContext` rather than replacing it. The standard registry lives on the document, so vendor SDKs discover it without a configuration step.
191
+
192
+ ## 3. Tools — the hands
193
+
194
+ One tool per real operation the developer chose to expose. Named in the app's **own domain terms**, as `domain.verb`. Every tool MUST declare a `description` (the agent reads it to decide WHEN to invoke) and an `inputSchema` (the agent reads it to decide WHAT arguments to send). A tool with no arguments still declares an explicit empty schema (`{ type: 'object', properties: {} }`); a registration missing either field is malformed.
195
+
196
+ > **Examples here show the API shape.** In actual code, every `document.modelContext.registerTool(...)` / `registerStatefulTool(...)` call lives in `src/webmcp/tools.ts`, and every `registerResource(...)` lives in `src/webmcp/resources.ts`. The snippets below are abbreviated; mentally place them in those files.
197
+
198
+ ```ts
199
+ document.modelContext.registerTool({
200
+ name: 'products.search',
201
+ description: 'Search the catalog and return matching products.', // the planner reads this
202
+ inputSchema: { // REQUIRED — the contract
203
+ type: 'object',
204
+ properties: {
205
+ query: { type: 'string' },
206
+ maxPrice: { type: 'number' },
207
+ },
208
+ required: ['query'],
209
+ },
210
+ annotations: { readOnlyHint: true }, // read-only ⇒ readOnlyHint (see step 5)
211
+ async execute(params) {
212
+ const results = await searchProducts(params); // the app's REAL search
213
+ return { content: [{ type: 'text', text: JSON.stringify(results) }] }; // standard result shape
214
+ },
215
+ });
216
+ ```
217
+
218
+ Rules:
219
+
220
+ - **`execute` calls the app's own code** — the same function the UI's button/box calls. Composing several real operations into one tool is fine and often necessary (a `checkout.placeOrder` may need to chain `startCheckout → updateCheckout → placeOrder → refresh`). What's forbidden is **re-deriving business logic** — recomputing prices, re-validating rules the app already owns, or running a parallel fetch whose result goes only to the agent. If every step is a real call the app already exposes, you're composing, not re-implementing.
221
+ - **Domain-named, intent-first.** `products.search`, `cart.add`, `cart.checkout`. The name and description are how the agent decides when to use it.
222
+ - **`description` and `inputSchema` are mandatory.** Description tells the agent WHEN to invoke; schema tells it WHAT to send. Derive the schema from the app's real types (TypeScript, Zod, OpenAPI) — see step 6.
223
+ - **Make cardinality obvious in the name.** A single-item fetch and a list are different operations: prefer `viewDetails` / `get` / `open` for one (`products.viewDetails`), and `search` / `list` for many (`products.search`). `products.view` is ambiguous — avoid it.
224
+ - **Named navigation IS a tool.** `docs.openPage({ slug })`, `account.openSettings()`, `cart.viewDrawer()` are legitimate first-class tools — especially for docs sites and content-heavy apps. **Strongly avoid** a generic `navigate({ url })` or `goto({ path })` tool that hands raw routes to the agent. The standard accepts it but the agent reasons better in domain terms than in URL paths: refactor-safer, fewer broken calls. Parameterized navigation with a domain-level argument (`openPage({ slug })`, `viewDetails({ id })`) is the right shape; routes stay inside `execute`.
225
+ - **Expose only what the plan calls for.** Don't invent tools, don't wrap everything.
226
+
227
+ ### How a tool returns its result
228
+
229
+ A tool returns the standard WebMCP result shape: `{ content: [{ type: 'text', text: ... }] }`. **Return the answer whenever you can** — the agent gets it as `execute`'s return value and responds. If the operation needs a moment, `execute` can `await` and still return.
230
+
231
+ - `cart.add` → returns a short confirmation in its `content`. (The full cart isn't the result — it's available via the `cart` resource later if needed.)
232
+ - `products.search` → where it can, `await` the search and return the results as JSON text in `content`.
233
+
234
+ **When the result can't be returned inline** — the operation redirects or renders asynchronously and the data arrives out-of-band — return a short status in `content` that tells the agent what's happening and where the result will appear, so it narrates ("let me pull those up…") and waits, instead of reading the ack as "nothing found":
235
+
236
+ ```ts
237
+ async execute(params) {
238
+ showSearchResults(params); // navigates / renders asynchronously
239
+ return {
240
+ content: [{
241
+ type: 'text',
242
+ text: 'Searching the catalog — the results will appear in the product list in a moment. Stand by; this is not the final list.',
243
+ }],
244
+ };
245
+ }
246
+ ```
247
+
248
+ The result then reaches the agent through the matching **resource update** when its slice next changes. This is exactly the kind of out-of-band state that earns a resource under the gate in step 4.
249
+
250
+ > **Don't use a "pending" message for synchronous-from-the-user operations.** Client-side route changes, opening a modal, copying to clipboard, focusing a field — all of these complete *before* the agent's response has even started speaking. Return a plain confirmation (or whatever the real result is) for those. If you return a "stand by" status, the agent will narrate *"let me open that, stand by…"* for navigation that already finished, and the user sees the result before they hear the narration. Save the pending-style message for operations whose result genuinely arrives over time (a search that renders asynchronously, an upload, a long-running server call).
251
+
252
+ ### When `execute` needs framework context — the handle pattern
253
+
254
+ A tool's `execute` lives in a plain module: it can call functions, mutate stores, fetch from APIs — anything that doesn't require being *inside* the component tree. But some operations only exist inside the framework's runtime context: `useNavigate()`, `useRouter()`, `useQueryClient()`, toast / modal / dialog hooks, any `useContext`-based API. A plain `execute` can't call those directly.
255
+
256
+ The pattern: export a module-level slot from `tools.ts`, have a tiny in-tree component set it on mount, and let `execute` call through it.
257
+
258
+ ```ts
259
+ // src/webmcp/tools.ts
260
+
261
+ // Module-level slot. Set by <WebMcpHandles /> at mount.
262
+ let navHandle: ((slug: string) => void) | null = null;
263
+ export function setNavHandle(fn: (slug: string) => void): void {
264
+ navHandle = fn;
265
+ }
266
+
267
+ document.modelContext.registerTool({
268
+ name: 'docs.openPage',
269
+ description: 'Navigate to a documentation page by slug.',
270
+ inputSchema: {
271
+ type: 'object',
272
+ properties: { slug: { type: 'string' } },
273
+ required: ['slug'],
274
+ },
275
+ annotations: { readOnlyHint: false },
276
+ async execute({ slug }: { slug: string }) {
277
+ if (!navHandle) {
278
+ throw new Error(
279
+ 'Navigation handle not yet mounted — is <WebMcpHandles /> in the tree?',
280
+ );
281
+ }
282
+ navHandle(slug);
283
+ return { content: [{ type: 'text', text: 'Navigated.' }] }; // synchronous from the user; not pending.
284
+ },
285
+ });
286
+ ```
287
+
288
+ ```tsx
289
+ // src/components/WebMcpHandles.tsx
290
+ 'use client'; // ← required in Next.js App Router / Remix; omit elsewhere
291
+
292
+ import { useEffect } from 'react';
293
+ import { useRouter } from 'next/navigation'; // or useNavigate() in React Router, etc.
294
+ import { setNavHandle } from '../webmcp/tools';
295
+
296
+ export function WebMcpHandles() {
297
+ const router = useRouter();
298
+ useEffect(() => {
299
+ setNavHandle((slug) => router.push(`/docs/${slug}`));
300
+ }, [router]);
301
+ return null;
302
+ }
303
+ ```
304
+
305
+ Mount `<WebMcpHandles />` once near the top of the tree (just inside the root layout / app shell). The tool is now callable from anywhere.
306
+
307
+ Same pattern applies to other framework-context APIs: export `setToastHandle`, `setDialogHandle`, `setQueryClientHandle`, etc., and set them all from the same in-tree component. One handles file keeps the whole pattern visible in one place.
308
+
309
+ ### Server endpoints are app code too
310
+
311
+ "Call the app's own code" doesn't require importing the function into the client bundle. If the app already exposes the operation as a server endpoint (`/api/search`, a Next.js Route Handler, a Remix loader, a SvelteKit form action), calling it from `execute` via `fetch` is fine — it's the same canonical implementation, just reached over a network boundary instead of a module boundary. This is often the *right* move when importing the function directly would pull a large server-only bundle (vector store clients, DB drivers, ORMs, Node-only file IO) into the client.
312
+
313
+ ```ts
314
+ document.modelContext.registerTool({
315
+ name: 'docs.search',
316
+ description: 'Search the documentation by query.',
317
+ inputSchema: {
318
+ type: 'object',
319
+ properties: { query: { type: 'string' } },
320
+ required: ['query'],
321
+ },
322
+ annotations: { readOnlyHint: true },
323
+ async execute({ query }: { query: string }) {
324
+ const res = await fetch(`/api/search?q=${encodeURIComponent(query)}`);
325
+ if (!res.ok) throw new Error(`Search failed: ${res.status}`);
326
+ const data = await res.json();
327
+ return { content: [{ type: 'text', text: JSON.stringify(data) }] };
328
+ },
329
+ });
330
+ ```
331
+
332
+ The tool still calls the app's real implementation — it just travels through the same endpoint your existing search UI uses, so there's one search path, not two.
333
+
334
+ ### Reuse schemas where it helps
335
+
336
+ If several tools share the same input shape, extract the schema as a constant and reuse it. The standard doesn't care; it makes maintenance easier and the contract more consistent.
337
+
338
+ ```ts
339
+ const SlugArgs = {
340
+ type: 'object',
341
+ properties: { slug: { type: 'string' } },
342
+ required: ['slug'],
343
+ } as const satisfies Record<string, unknown>;
344
+
345
+ document.modelContext.registerTool({ name: 'docs.openPage', inputSchema: SlugArgs, ... });
346
+ document.modelContext.registerTool({ name: 'docs.copyPrompt', inputSchema: SlugArgs, ... });
347
+ ```
348
+
349
+ ## 4. Resources — the eyes
350
+
351
+ Resources are the **exception, not the rule.** Most of what the agent needs to know comes back from the tool it just called. A resource only earns its place when the agent needs to see state it *didn't* get from a return. **Many apps — content sites, docs sites, marketing pages, read-mostly tools — need zero resources.** An empty `resources.ts` is a correct outcome, not a failure; leave a one-line comment explaining the absence (e.g. *"No out-of-band state in this app — search results and page content are returned inline by their tools."*).
352
+
353
+ ```ts
354
+ registerResource({
355
+ uri: 'state://cart', // a stable resource URI
356
+ name: 'cart', // a noun — the slice it returns
357
+ get: () => store.getState().cart.lines.map(l => ({
358
+ id: l.id, name: l.name, qty: l.qty, price: l.price,
359
+ })),
360
+ subscribe: (onChange) => store.subscribe(s => s.cart, onChange), // optional: enables push
361
+ });
362
+ ```
363
+
364
+ - **`uri`** — a stable identifier for the slice (modeled on MCP resource URIs), e.g. `state://cart`.
365
+ - **`name`** — a noun identifying the slice (`cart`, `currentOrder`, `orderStatus`).
366
+ - **`get`** — returns a current, serializable snapshot from the app's real reactive state. Cheap, side-effect-free.
367
+ - **`subscribe`** *(optional)* — wires the app's own change mechanism (store subscribe, signal, query-cache listener) to an `onChange` callback, returning an unsubscribe. Present ⇒ the slice participates in push; absent ⇒ pull-only.
368
+
369
+ `get` is the *value*; `subscribe` is the *when*. Don't pass the new value through `onChange` — on change, the extension re-reads `get()`.
370
+
371
+ ### When to add a resource (the gate)
372
+
373
+ Add a resource **only for state that changes out-of-band** — state the agent needs to see that changes *without the agent acting*:
374
+
375
+ - The **user** changes it by hand (edits a cart quantity, toggles a filter, navigates somewhere), or
376
+ - It changes **server-side over time** (an order status moves from `processing` to `shipped` while the conversation is open).
377
+
378
+ That's the whole test. **If a tool already returns the answer, do NOT add a resource to mirror it.** A search that returns its results inline needs no `searchResults` resource; a `products.viewDetails` that returns the product needs no `currentProduct` resource. Resources are for the state the return *can't* give you, not a shadow copy of the state it can. **Pure pull state — a value the agent could just ask for — is a read-only tool, not a resource;** reserve resources for state that genuinely changes out of band.
379
+
380
+ So `cart` is a good resource (the user can edit it directly in the UI) and `orderStatus` is a good resource (it moves server-side); `searchResults` and `currentProduct` usually are **not**, because the tool that produces them hands them straight back.
381
+
382
+ ### Keep the signal clean at the source
383
+
384
+ - **Publish only settled state — never transient.** Don't push loading spinners, empty placeholders, or `null` blips. Expose the value only once it's real.
385
+ - **Don't clear-on-unmount between related views.** If navigating from one order page to another briefly tears down and rebuilds the same slice, the agent sees a `null` flash in between. Keep the slice populated across related transitions instead of blinking through empty.
386
+ - **Remember most pushes during an agent action are echoes.** When the agent itself just acted, the resulting state change is something it already got in the return. The fewer resources you expose, the less echo the consumer has to suppress.
387
+
388
+ ## 5. Side-effect tiers (the one bit of governance)
389
+
390
+ **WebMCP never widens what's permitted.** Tools are allowlisted; the agent runs as the signed-in user; the app's existing auth, validation, and permissions all stay in the app. Tiers are how the connected vendor SDK commits each call carefully — not what makes the call permitted in the first place.
391
+
392
+ Two layers express the safety contract:
393
+
394
+ - **Standard annotations** on every tool: only `readOnlyHint` and `untrustedContentHint` exist. Set `annotations: { readOnlyHint: true }` for tools that change no durable state; set `untrustedContentHint` (via `untrustedContent: true` on `registerStatefulTool`, or the annotation directly) for tools returning third-party content the model should treat as untrusted.
395
+ - **The Napster tier** on tools that need safety gating: use `registerStatefulTool`, which registers a standard tool AND records a `napsterTier` of `'read' | 'reversible' | 'irreversible'` (default `'reversible'`). The model never overrides the tier.
396
+
397
+ Default ambiguous cases to the **safer** tier.
398
+
399
+ | Tier | What it is | Example | Consumer behavior |
400
+ |---|---|---|---|
401
+ | `read` | Returns information / shows something; changes no durable state | search, look up, compare | Runs freely, no confirmation |
402
+ | `reversible` | Changes state that's easy to undo | add to cart, save draft, apply filter | Brief announce |
403
+ | `irreversible` | Cannot be cleanly undone | place order, charge card, cancel, send | Requires explicit user confirmation before commit |
404
+
405
+ A pure-read tool can be registered with plain `registerTool` and `annotations: { readOnlyHint: true }`. Anything that mutates state should go through `registerStatefulTool` so its tier is recorded.
406
+
407
+ For irreversible tools, also set `idempotent: true` if your underlying operation tolerates safe retries (e.g. via an idempotency key on the server):
408
+
409
+ ```ts
410
+ registerStatefulTool({
411
+ name: 'orders.cancel',
412
+ description: 'Cancel an open order before it ships.',
413
+ inputSchema: {
414
+ type: 'object',
415
+ properties: { orderId: { type: 'string' } },
416
+ required: ['orderId'],
417
+ },
418
+ napsterTier: 'irreversible',
419
+ idempotent: true,
420
+ async execute({ orderId }) {
421
+ await api.cancelOrder(orderId);
422
+ return { content: [{ type: 'text', text: 'Order cancelled.' }] };
423
+ },
424
+ });
425
+ ```
426
+
427
+ ## 6. Schemas: required for input, derived from real types
428
+
429
+ `inputSchema` is mandatory on every tool. **Derive schemas from the app's real types** (TypeScript types, JSDoc, Zod/Yup, OpenAPI) rather than hand-guessing them. Real types keep the schema honest — if the underlying type changes, the mismatch surfaces in development instead of confusing the agent at runtime. Hand-written schemas drift.
430
+
431
+ Tighten what the type alone can't express (a `string` that's really an enum, numeric bounds, `format` for emails/UUIDs/URLs), and redact sensitive fields from what `execute` returns.
432
+
433
+ If the app uses a runtime schema library like Zod, derive JSON Schema from it and validate at the boundary:
434
+
435
+ ```ts
436
+ import { z } from 'zod';
437
+ import { zodToJsonSchema } from 'zod-to-json-schema';
438
+
439
+ const SearchArgs = z.object({ query: z.string(), maxPrice: z.number().optional() });
440
+
441
+ document.modelContext.registerTool({
442
+ name: 'products.search',
443
+ description: 'Search the catalog and return matching products.',
444
+ inputSchema: zodToJsonSchema(SearchArgs) as Record<string, unknown>,
445
+ annotations: { readOnlyHint: true },
446
+ async execute(args) {
447
+ const results = await searchProducts(SearchArgs.parse(args)); // validate at the boundary
448
+ return { content: [{ type: 'text', text: JSON.stringify(results) }] };
449
+ },
450
+ });
451
+ ```
452
+
453
+ ### What about tools with no arguments?
454
+
455
+ They still need a schema — an explicit empty one:
456
+
457
+ ```ts
458
+ registerStatefulTool({
459
+ name: 'cart.clear',
460
+ description: 'Empty the cart.',
461
+ inputSchema: { type: 'object', properties: {} },
462
+ napsterTier: 'reversible',
463
+ async execute() {
464
+ await cartStore.clear();
465
+ return { content: [{ type: 'text', text: 'Cart cleared.' }] };
466
+ },
467
+ });
468
+ ```
469
+
470
+ The empty schema tells the agent "call with `{}`" rather than leaving the input shape undefined.
471
+
472
+ ### `inputSchema` is the contract, not the checker
473
+
474
+ Declaring `inputSchema` does not by itself validate arguments at runtime — it's the contract the agent reads, not a runtime guard. Validate at the boundary inside `execute` yourself (e.g. `SearchArgs.parse(args)` as above) when you want a hard runtime check; otherwise args pass through and the underlying function decides.
475
+
476
+ **The return is not the log line.** What a tool returns is the standard `content` payload the agent receives. Any one-line summary a dev tool prints in passing (`5 products · top: …`) is a *display* concern, not the return. Keep them visibly distinct so a reviewer is never misled about what the agent actually receives.
477
+
478
+ ## 7. Hosting mechanics (things that bite in practice)
479
+
480
+ - **Imperative actions from outside the component tree need a registered handle.** A tool's `execute` lives in a plain module, but things like navigation often only exist *inside* the framework (e.g. React Router's `useNavigate` hook). Register a handle at mount — an in-tree component sets `navHandle = useNavigate()` (or your framework's equivalent) into a module-level slot — and have `execute` call through that. Without it, `execute` has no way to drive navigation.
481
+ - **`document.modelContext` is shared by the whole document.** The polyfill installs once and the registry lives on the document, so it survives HMR / fast-refresh without splitting state. Re-importing the toolkit reuses the installed context rather than replacing it. Under HMR, re-running `tools.ts` re-registers tools onto the same context; if your bundler does a full page reload on edits instead (the common default without `import.meta.hot.accept()`), everything restarts cleanly — same end state via a different path.
482
+ - **HMR can leave `execute` holding a stale module-scope reference.** A tool like `async execute({ id }) { await cartStore.addLine(id, 1); }` captures `cartStore` at the moment `tools.ts` evaluates. Most modern bundlers use ESM live bindings, so when `cartStore` is hot-reloaded, the binding updates and the captured reference tracks the new value. But this isn't universal — CommonJS interop, certain bundler configs, or non-Vite stacks can leave `execute` calling into the old, dead store while the rest of the app uses the new one. Symptom: the tool "runs" without errors but the UI never updates because nothing's listening to the store it touched. Mitigation, if you hit this: wrap the store access in a getter that re-resolves at call time (`getCartStore().addLine(id, 1)`) so each invocation looks up the current module export. Only reach for this if the symptom appears — most stacks handle live bindings correctly out of the box.
483
+ - **Server-side rendering (SSR) has no `document`.** Outside the browser — Next.js / Nuxt / Remix / SvelteKit SSR, workers, edge runtimes — `document` doesn't exist, so the toolkit can't install `document.modelContext` and registrations have nowhere to land. This is why the import must be gated to the browser (step 2's framework wiring): importing it server-side either throws or no-ops, and running it on the server would leak subscriptions across requests and bleed per-user state through the Node process's shared globals. The real context spins up the moment the bundle hydrates in the browser.
484
+
485
+ ## 8. Offer the dev panel
486
+
487
+ The tools are wired but untested. Offer the developer the opt-in dev panel:
488
+
489
+ > "Want me to install a dev-only panel for testing the integration? It mounts in dev mode, lists every registered tool with a form for its arguments (rendered from each tool's `inputSchema`), shows every resource with a live JSON view, and logs every invoke and resource update. Toggle with `Cmd+Shift+E`. Skip if you'd rather not — the tools work either way."
490
+
491
+ If yes, run the `add-edge-mcp-dev-panel` skill — it handles the install and walks through usage. If no, the setup is done; testing is the developer's call.
492
+
493
+ ## 9. Verify at runtime, then sign off
494
+
495
+ ### Runtime verification is REQUIRED before sign-off — a green build is not enough.
496
+
497
+ A successful TypeScript compile means the code parses and types check. It does **not** mean the integration works. Tools can look correct in source and still fail at runtime because of:
498
+
499
+ - A missing handle (`<WebMcpHandles />` not mounted in the tree).
500
+ - A stale module reference under HMR.
501
+ - An SSR boundary (the toolkit import ran on the server, where `document` is undefined, so nothing installed).
502
+ - A `useRouter` hook returning `undefined` outside its provider.
503
+
504
+ The five-minute runtime check catches all of these. **Do it before declaring done.** Specifically:
505
+
506
+ 1. `npm run dev` and load the app in a browser.
507
+ 2. Open the browser console. Confirm you see the `[webmcp dev panel] mounted` log line if the dev panel was installed, or run `document.modelContext.listTools?.()` (or inspect `document.modelContext`) to confirm the polyfill installed and the registry is populated. If it's empty, the registrations didn't run — investigate before going further.
508
+ 3. Invoke **at least one tool per side-effect tier** you registered (`read`, `reversible`, `irreversible` if present). From the dev panel: open the panel (Cmd+Shift+E), expand the tool, fill its form, click Run. From DevTools: call the tool's `execute` through `document.modelContext` (e.g. `await document.modelContext.callTool?.('cart.add', { productId, qty: 1 })`, or whatever the standard invoke entry point is in your toolkit version).
509
+ 4. Confirm for each: the returned `content` makes sense, the UI reacts appropriately (cart drawer slides open, page navigates, modal appears).
510
+ 5. If you registered resources: trigger a real out-of-band change (click a UI button, wait for an auto-updating resource to tick) and confirm the dev panel's RESOURCE log fires (or re-reading the resource's `get()` shows the new value).
511
+
512
+ Only after these all pass — proceed to sign-off below.
513
+
514
+ ### Sign-off (in conversation, no separate file)
515
+
516
+ Walk the developer through:
517
+
518
+ - What's exposed (with tiers and idempotency flags).
519
+ - What's deliberately withheld.
520
+ - The resources, with a one-line reason each cleared the out-of-band gate (or: explicit acknowledgment that there are zero resources and why that's right for this app).
521
+ - Especially every `irreversible` — confirm whether it's `idempotent` and whether the underlying op has the right server-side dedup.
522
+ - **Confirmation that runtime verification passed** — list which tools you actually invoked successfully and what reacted.
523
+
524
+ When you finish, present:
525
+
526
+ - The path to the `src/webmcp/` folder (with `index.ts`, `tools.ts`, `resources.ts`, plus `dev-panel.ts` and any handles file if applicable).
527
+ - The tool list with tiers and idempotency.
528
+ - The resources with their why-each-cleared-the-gate notes (or "none registered, by design").
529
+ - The withheld-by-choice list.
530
+ - Whether the dev panel was installed (and where to toggle it if so).
531
+ - The result of runtime verification.
532
+
533
+ ## What's next (out of scope for this skill)
534
+
535
+ You've wired WebMCP into the app. The agent integration is a separate concern handled by whichever vendor SDK the developer chose:
536
+
537
+ - **Napster Omniagent** — see the skills in `napster/omniagent-api-skills` (`create-agent`, `deploy-webrtc`, etc.). The Web SDK reads the standard `document.modelContext` at init — no glue code in the customer's app.
538
+ - **Any other vendor that supports WebMCP** — follow that vendor's SDK install instructions. The standard `document.modelContext` registry is the same across vendors.
539
+
540
+ ## What you will NOT do in this skill
541
+
542
+ - Expose more than the agreed plan.
543
+ - Expose the app's route structure or add a generic `navigate` tool.
544
+ - Re-derive business logic the app already owns.
545
+ - Add a resource that just mirrors what a tool already returns.
546
+ - Build the agent UI, install a specific vendor's SDK, or configure a vendor-side resource (that's the next step's job).
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: sync-webmcp-tools
3
+ description: Keep a web app's WebMCP tool registrations in sync with its actual code — analyze the app, reconcile the tools file (add / update / remove), and edit only that file. This is the exact task the `webmcp-toolkit generate` automation runs (post-commit hook or CI), and it can also be followed directly in chat. It names no fixed path and defers layout, safety tiers, and methodology to the setup-edge-mcp and plan-capabilities-and-state skills.
4
+ ---
5
+
6
+ # sync-webmcp-tools
7
+
8
+ Keep a web app's WebMCP tool registrations in sync with its code. You are running inside the app's repository — your working directory is the app root.
9
+
10
+ This task is **universal**: it is identical whether it runs from the post-commit console (`webmcp-toolkit generate`) or is followed as a skill in chat. It names no fixed path — where the tools file lives is decided by the methodology skills and the app's own conventions, not by this instruction.
11
+
12
+ Follow the **`plan-capabilities-and-state`** and **`setup-edge-mcp`** skills EXACTLY. They are your source of truth for the analysis approach, the safety tiers, naming, what to deliberately withhold, and where the tools file lives (conventional layout: a `src/webmcp/` folder with `tools.ts`, but adapt to the app's actual structure). When this task is run by the automation, those two skills are inlined below the instruction; in chat, load them as skills.
13
+
14
+ ## Steps
15
+
16
+ 1. **Read the app's real code** — service/API modules, stores, routes, components — to understand what operations actually exist. The running code is the source of truth; ignore stale docs that contradict it.
17
+ 2. **Locate the WebMCP tools file** per the skills' layout, or wherever the app already registers tools. If none exists yet, create it at the conventional location the skills describe.
18
+ 3. **Read that tools file** (if present) to see what's already registered.
19
+ 4. **Reconcile.** Add tools for real operations that should be exposed, update ones whose signature or safety tier changed, and remove ones whose underlying code no longer exists. Each tool's `execute` must call the app's real function — do not invent endpoints. Use standard `document.modelContext.registerTool(...)` for plain tools, and `registerStatefulTool(...)` (from `@napster-corp/webmcp-toolkit`) for tools that need a safety tier.
20
+ 5. **Edit ONLY that one tools file.** Do not edit any other file. Match its existing style and imports.
21
+
22
+ ## Constraints
23
+
24
+ - Edit the single tools file and nothing else.
25
+ - Do not run shell commands, install packages, or touch git.
26
+ - When done, output a short summary: tools ADDED / UPDATED / REMOVED, each with the `file:function` evidence behind it.
package/src/debug.ts ADDED
@@ -0,0 +1,26 @@
1
+ // Opt-in flow logging for the toolkit. OFF by default so the published package
2
+ // is quiet in production. Turn it on either from code — `setDebug(true)` — or at
3
+ // runtime from the browser console: `globalThis.__WEBMCP_DEBUG__ = true`.
4
+
5
+ let enabled = false;
6
+
7
+ /** Enable or disable the toolkit's `[webmcp-toolkit]` flow logs. */
8
+ export function setDebug(on: boolean): void {
9
+ enabled = on;
10
+ }
11
+
12
+ function isOn(): boolean {
13
+ if (enabled) return true;
14
+ try {
15
+ return Boolean((globalThis as { __WEBMCP_DEBUG__?: unknown }).__WEBMCP_DEBUG__);
16
+ } catch {
17
+ return false;
18
+ }
19
+ }
20
+
21
+ /** Log a flow event when debug is on. No-op otherwise. */
22
+ export function debugLog(...args: unknown[]): void {
23
+ if (!isOn()) return;
24
+ // eslint-disable-next-line no-console
25
+ console.info('[webmcp-toolkit]', ...args);
26
+ }