@klura/mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +1 -1
  2. package/index.js +14 -316
  3. package/package.json +2 -3
  4. package/tools.js +0 -13
package/README.md CHANGED
@@ -34,7 +34,7 @@ Restart the client. The agent picks up the klura toolset automatically.
34
34
  Two surfaces land in the agent's context:
35
35
 
36
36
  - **Tools** — browser automation (`start_session`, `perform_action`, `get_screenshot`, `get_a11y_tree`), discovery + persistence (`save_strategy`, `execute`, `list_platform_skills`, `get_strategy`), network-log inspection (`get_network_log`, `find_in_page`), and the reverse-engineering escape hatches (`inspect_ws_frame`, `try_generator`, `js_eval`, `get_js_source`, `search_js_source`, `read_js_function`, `set_breakpoint`, `wait_for_pause`, and more). The runtime owns the canonical list; this server mirrors it one-for-one.
37
- - **Resource** `klura://reference` — the detailed reference doc, served section-by-section via URL fragments (`klura://reference#reverse-engineer-playbook`, `klura://reference#strategy-schemas-overview`, etc.) so each response fits inside the MCP output budget. Fetch `klura://reference` with no fragment for a table of contents.
37
+ - **Resource** `klura://reference` — the detailed reference doc, served section-by-section via URL fragments (`klura://reference#reverse-engineer-playbook`, `klura://reference#recorded-path-schema`, etc.) so each response fits inside the MCP output budget. Fetch `klura://reference` with no fragment for a table of contents.
38
38
 
39
39
  The always-loaded orientation is SKILL.md, passed as the server's `instructions` capability. Agents read SKILL.md on every conversation and pull detail on demand via the `klura://reference` fragments.
40
40
 
package/index.js CHANGED
@@ -15,325 +15,23 @@
15
15
  // }
16
16
  // }
17
17
  // }
18
+ //
19
+ // This package is a thin stdio wrapper. The server factory itself —
20
+ // `createKluraMcpServer()` — lives in `@klura/runtime` (runtime/mcp-server.js),
21
+ // so the runtime's optional CLI agent can build the same server without a
22
+ // dependency cycle (`@klura/mcp` depends on `@klura/runtime`, never the
23
+ // reverse). The factory is re-exported here for back-compatibility.
18
24
 
19
- // Build the klura MCP server — wires every tool + resource handler onto a
20
- // fresh Server instance and returns it unconnected. Callers pick a transport:
21
- // `main()` below attaches stdio for the CLI path; the field-reports harness
22
- // imports this directly and passes the instance to the Agent SDK via
23
- // `{type:'sdk', instance}` so the browser pool survives across SDK `resume`
24
- // queries (each resume would otherwise spawn a fresh stdio child and orphan
25
- // every in-memory session).
26
- async function createKluraMcpServer() {
27
- const { Server } = await import('@modelcontextprotocol/sdk/server/index.js');
28
- const { ListToolsRequestSchema, CallToolRequestSchema, ListResourcesRequestSchema, ReadResourceRequestSchema } = await import('@modelcontextprotocol/sdk/types.js');
29
- // Load klura runtime
30
- const klura = require('@klura/runtime');
31
-
32
- // SKILL.md (compact) is the always-loaded orientation.
33
- // REFERENCE.md (detailed schemas, examples) is served as an on-demand
34
- // resource via klura.resolveReferenceResource — see the ReadResource
35
- // handler below for the fragment-based section addressing.
36
- const skillMd = klura.getSkillMd()
37
- .replace(/^---[\s\S]*?---\s*/, ''); // strip frontmatter
38
-
39
- // Front-load a terse per-platform capability catalog so agents see what
40
- // klura already knows BEFORE the first tool call. The list_platform_skills
41
- // _hint only fires when the agent calls the tool, but the load-bearing
42
- // failure mode (observed in field) is the agent skipping that call entirely
43
- // and going straight to start_session for work an existing capability
44
- // already covers. The deliberate principle break + always-save framing
45
- // live in the rendered string itself (see getSavedSkillsSummaryMd).
46
- const savedSkills = klura.getSavedSkillsSummaryMd();
47
- const instructions = savedSkills ? `${skillMd}\n\n${savedSkills}` : skillMd;
48
-
49
- const server = new Server(
50
- { name: '@klura/mcp', version: '0.1.0' },
51
- { capabilities: { tools: {}, resources: {} }, instructions }
52
- );
53
-
54
- // -- Resources (on-demand reference docs) --
55
- //
56
- // REFERENCE.md is served section-by-section via URL fragments so each
57
- // response fits inside the MCP output budget. Fetching `klura://reference`
58
- // with no fragment returns a short table of contents listing every
59
- // addressable `#<slug>`; fetching `klura://reference#<slug>` returns only
60
- // that section. The section parser + budget logic lives in the runtime
61
- // module (`runtime/src/reference-sections.ts`) so a pre-commit test can
62
- // assert every section fits before a regression lands.
63
-
64
- server.setRequestHandler(ListResourcesRequestSchema, async () => {
65
- const sections = klura.listReferenceSections();
66
- return {
67
- resources: [
68
- {
69
- uri: 'klura://reference',
70
- name: 'Klura Reference — Table of Contents',
71
- description:
72
- 'Table of contents for the detailed reference. Fetch individual sections by appending a URL fragment, e.g. klura://reference#fetch-schema. ' +
73
- `Available sections: ${sections.map((s) => '#' + s.slug).join(', ')}.`,
74
- mimeType: 'text/markdown',
75
- },
76
- ],
77
- };
78
- });
79
-
80
- server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
81
- const uri = request.params.uri;
82
- try {
83
- const { text } = klura.resolveReferenceResource(uri);
84
- return {
85
- contents: [{ uri, mimeType: 'text/markdown', text }],
86
- };
87
- } catch (err) {
88
- // Surface the runtime's helpful error (which lists available slugs)
89
- // directly to the MCP client instead of swallowing it.
90
- throw new Error(err instanceof Error ? err.message : String(err));
91
- }
92
- });
93
-
94
- // -- Tool registry --
95
- //
96
- // Every tool's schema and handler live colocated in mcp/tools.js. The
97
- // ListTools handler reads the registry; the CallTool dispatcher looks up
98
- // the entry by name and invokes its handler. Adding a tool means adding
99
- // exactly one entry there — no separate switch case to keep in sync.
100
- const tools = require('./tools.js')(klura);
101
- const toolByName = new Map(tools.map((t) => [t.name, t]));
102
-
103
- server.setRequestHandler(ListToolsRequestSchema, async () => ({
104
- tools: tools.map(({ name, description, inputSchema }) => ({ name, description, inputSchema })),
105
- }));
106
-
107
- // OpenAI-style tool_calls deliver `arguments` as a JSON string parsed
108
- // once by the client. Many non-Anthropic models then JSON-encode nested
109
- // object/array fields a SECOND time, so we receive `args.foo === "{...}"`
110
- // where the schema declares `foo: {type: 'object'}`. Walk the inputSchema
111
- // and parse strings that the schema says shouldn't be strings. Only
112
- // strict JSON ('{', '['); leave anything else alone so we don't disturb
113
- // legitimately stringy fields. Best-effort: if parse fails or the value
114
- // doesn't match the declared type after parsing, leave it for the
115
- // runtime's own validators to reject with a useful error.
116
- function coerceArgs(toolName, args) {
117
- const tool = toolByName.get(toolName);
118
- const schema = tool && tool.inputSchema;
119
- if (!schema || !schema.properties || !args || typeof args !== 'object') return args;
120
- for (const [key, propSchema] of Object.entries(schema.properties)) {
121
- const v = args[key];
122
- if (typeof v !== 'string') continue;
123
- const expected = propSchema?.type;
124
- const wantsContainer =
125
- expected === 'object' ||
126
- expected === 'array' ||
127
- (Array.isArray(expected) && (expected.includes('object') || expected.includes('array')));
128
- if (!wantsContainer) continue;
129
- const trimmed = v.trim();
130
- if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) continue;
131
- try {
132
- const parsed = JSON.parse(trimmed);
133
- const parsedType = Array.isArray(parsed) ? 'array' : typeof parsed;
134
- const matches = Array.isArray(expected)
135
- ? expected.includes(parsedType)
136
- : expected === parsedType;
137
- if (matches) args[key] = parsed;
138
- } catch { /* leave for downstream validator */ }
139
- }
140
- return args;
141
- }
142
-
143
- // -- Tool execution --
144
-
145
- server.setRequestHandler(CallToolRequestSchema, async (request, extra) => {
146
- const { name, arguments: rawArgs } = request.params;
147
- const tool = toolByName.get(name);
148
- if (!tool) {
149
- return {
150
- content: [{ type: 'text', text: `Unknown tool: ${name}` }],
151
- isError: true,
152
- };
153
- }
154
- const args = coerceArgs(name, rawArgs);
155
-
156
- // Progress notifications. When the client request carried
157
- // `_meta.progressToken`, the SDK exposes it on `extra._meta` and gives us
158
- // `extra.sendNotification` for sending `notifications/progress` bound to
159
- // that token. Clients that honor this (Claude Desktop via MCP SDK with
160
- // `resetTimeoutOnProgress: true`) reset their per-request timeout each
161
- // time a progress arrives — turning a 4-minute hard deadline into a
162
- // sliding window that survives long-running tools (end_drive on a real
163
- // RE session does heavy synthesis + audit + handoff prose composition).
164
- //
165
- // Two emit paths:
166
- // - Explicit phase boundaries inside the tool (e.g. endDrive's
167
- // progress({stage: '...'}) calls). Names what's running so the user
168
- // sees specific status, not just "still working".
169
- // - 30s heartbeat for tools that don't emit explicit progress. Fires
170
- // only when no explicit progress arrived in the last interval, so
171
- // instrumented tools don't double-emit.
172
- let progressCount = 0;
173
- let lastProgressAt = Date.now();
174
- let progress;
175
- let heartbeat;
176
- const progressToken = extra && extra._meta ? extra._meta.progressToken : undefined;
177
- if (progressToken !== undefined && extra && typeof extra.sendNotification === 'function') {
178
- progress = ({ stage, current, total } = {}) => {
179
- progressCount += 1;
180
- lastProgressAt = Date.now();
181
- extra
182
- .sendNotification({
183
- method: 'notifications/progress',
184
- params: {
185
- progressToken,
186
- progress: typeof current === 'number' ? current : progressCount,
187
- ...(typeof total === 'number' ? { total } : {}),
188
- ...(typeof stage === 'string' ? { message: stage } : {}),
189
- },
190
- })
191
- .catch(() => { /* notification send failure is non-fatal */ });
192
- };
193
- heartbeat = setInterval(() => {
194
- if (Date.now() - lastProgressAt >= 30000) {
195
- progress({ stage: 'still working' });
196
- }
197
- }, 30000);
198
- }
199
-
200
- try {
201
- // Phase admissibility — hard tool blocking per the session-phase
202
- // state machine. Tools not in the current phase's allowedTools
203
- // (or, when budget is exhausted, not in allowedToolsWhenExhausted)
204
- // are rejected here without running. Universal tools (control
205
- // plane, memory reads, escape valve) bypass; tools called without
206
- // a session (start_session, etc.) bypass too. After admission,
207
- // tickPhaseCounter increments the per-phase round counter and
208
- // engages the soft-block flag when the budget is hit.
209
- if (args && args.session_id) {
210
- try {
211
- klura.assertToolAdmissibleBySessionId(args.session_id, name);
212
- } catch (err) {
213
- if (err instanceof klura.ToolNotAdmissibleError) {
214
- return {
215
- content: [
216
- {
217
- type: 'text',
218
- text: JSON.stringify({
219
- ok: false,
220
- error: 'tool_not_admissible',
221
- phase: err.phase,
222
- tool: err.toolName,
223
- message: err.reason,
224
- }, null, 2),
225
- },
226
- ],
227
- isError: true,
228
- };
229
- }
230
- throw err;
231
- }
232
- }
233
-
234
- // Pending-interruption / pending-checkpoint gates. A prior tool
235
- // call returned a handover resolution; every subsequent tool call
236
- // on the same session must echo the relevant token + an ack
237
- // (user_response / viewer_result) or cancel with {cancelled: true,
238
- // reason}. Tools that deliberately resolve the matching pending
239
- // state opt out via `skipInterruptionGate` / `skipCheckpointGate`
240
- // on their registry entry.
241
- if (args && args.session_id && !tool.skipInterruptionGate) {
242
- klura.assertNoPendingInterruption(args.session_id, {
243
- interruption_token: args.interruption_token,
244
- user_response: args.user_response,
245
- viewer_result: args.viewer_result,
246
- cancelled: args.cancelled,
247
- reason: args.reason,
248
- });
249
- }
250
- if (args && args.session_id && !tool.skipCheckpointGate) {
251
- klura.assertNoPendingCheckpoint(args.session_id, {
252
- checkpoint_token: args.checkpoint_token,
253
- user_response: args.user_response,
254
- viewer_result: args.viewer_result,
255
- cancelled: args.cancelled,
256
- reason: args.reason,
257
- });
258
- }
259
-
260
- let result = await tool.handler(args, { progress });
261
-
262
- // Inject sticky LIFT obligation reminder. Fires on every tool
263
- // response between the first mutating perform_action and either a
264
- // successful save_strategy or end_drive ok:true. Once-per-session
265
- // semantics → no token-binding needed (see runtime/docs/gates.md
266
- // §once-vs-many). klura.formatToolResult hoists the obligation
267
- // message into a leading [klura obligation]: <message> text block
268
- // so the model reads it as a top-level directive rather than buried
269
- // inside the JSON-stringified payload — that hoist + the imperative
270
- // wording in session-obligations.ts is the primary mechanism. If a
271
- // model still ends_turn with an open obligation despite reading the
272
- // hoisted block, treat that as a runtime weakness worth surfacing,
273
- // not a harness gap to paper over.
274
- if (args && args.session_id) {
275
- try {
276
- const obligation = klura.getSessionObligation(args.session_id);
277
- if (obligation && result && typeof result === 'object' && !Array.isArray(result)) {
278
- result = { ...result, _session_obligation: obligation };
279
- }
280
- } catch {
281
- /* non-fatal */
282
- }
283
- }
284
-
285
- // Convert to MCP content blocks (screenshots become image blocks)
286
- const blocks = klura.formatToolResult(name, result);
287
- return {
288
- content: blocks.map(b =>
289
- b.type === 'image'
290
- ? { type: 'image', data: b.data, mimeType: b.mediaType }
291
- : { type: 'text', text: b.text }
292
- ),
293
- };
294
- } catch (err) {
295
- // Attach the LIFT obligation to error responses too. Without this,
296
- // every save_strategy / end_drive rejection drops the "MUST be
297
- // end_drive" anchor exactly when the agent most needs it — agents
298
- // reading just the bare error treat the failure as a one-off shape
299
- // complaint and end the turn after the user-facing goal looks done.
300
- let obligationLine = '';
301
- if (args && args.session_id) {
302
- try {
303
- const obligation = klura.getSessionObligation(args.session_id);
304
- if (obligation && obligation.message) {
305
- obligationLine = `[klura obligation]: ${obligation.message}\n\n`;
306
- }
307
- } catch { /* non-fatal */ }
308
- }
309
-
310
- // klura's audit-style rejections (`invalid_<kind>: ...` and
311
- // `invalid_<kind>_rejected (<reason>)`) are iteration steps, not tool
312
- // errors — the agent's expected next move is to re-call the same tool
313
- // with audit_token + audit_answers. Returning these with
314
- // `isError: true` makes the SDK surface them as tool errors, which
315
- // Claude reads as "task failed, here's why" and reflexively wraps up
316
- // with text (end_turn) instead of continuing the iteration loop. Send
317
- // them as normal text results so the model treats them as data.
318
- const msg = typeof err.message === 'string' ? err.message : String(err);
319
- if (/^invalid_[a-z_]+:/.test(msg)) {
320
- return {
321
- content: [{ type: 'text', text: `${obligationLine}${msg}` }],
322
- };
323
- }
324
- return {
325
- content: [{ type: 'text', text: `${obligationLine}Error: ${msg}` }],
326
- isError: true,
327
- };
328
- } finally {
329
- if (heartbeat) clearInterval(heartbeat);
330
- }
331
- });
332
-
333
- return server;
334
- }
25
+ const { createKluraMcpServer } = require('@klura/runtime/mcp-server');
335
26
 
336
27
  async function main() {
28
+ // Latch this process as driven by an external MCP host BEFORE anything else.
29
+ // This is the load-bearing layer of the agent guardrail: with the flag set,
30
+ // the optional klura CLI LLM agent refuses to run, so it can never start a
31
+ // second LLM underneath the host that is already driving klura. Stdio is the
32
+ // external-host transport; the in-memory transport the CLI agent and the
33
+ // test harnesses use never reaches this path.
34
+ require('@klura/runtime').markExternalMcpHost();
337
35
  const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js');
338
36
  const server = await createKluraMcpServer();
339
37
  const transport = new StdioServerTransport();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@klura/mcp",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
@@ -11,7 +11,6 @@
11
11
  },
12
12
  "files": [
13
13
  "index.js",
14
- "tools.js",
15
14
  "LICENSE",
16
15
  "README.md"
17
16
  ],
@@ -31,7 +30,7 @@
31
30
  },
32
31
  "dependencies": {
33
32
  "@modelcontextprotocol/sdk": "^1.0.0",
34
- "@klura/runtime": "^0.2.0"
33
+ "@klura/runtime": "^0.3.0"
35
34
  },
36
35
  "devDependencies": {
37
36
  "@eslint/js": "^10.0.1",
package/tools.js DELETED
@@ -1,13 +0,0 @@
1
- // Klura MCP tool catalog — thin wrapper over the runtime's TOOL_REGISTRY.
2
- //
3
- // Every tool's name, description, inputSchema, and handler is defined
4
- // colocated with its implementation in `runtime/src/tools/*.ts` and
5
- // assembled into `TOOL_REGISTRY` (re-exported from `@klura/runtime`).
6
- // `mcp/index.js` calls this factory to get the array; tools that own a
7
- // runtime gate (interruption / checkpoint) opt out of the generic pre-call
8
- // assertion via `skipInterruptionGate` / `skipCheckpointGate` set on the
9
- // TOOL_DEF — see the dispatcher in `mcp/index.js`.
10
-
11
- module.exports = function defineTools(klura) {
12
- return klura.TOOL_REGISTRY;
13
- };