clickshot-mcp 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -2
  2. package/package.json +1 -1
  3. package/server.js +139 -12
package/README.md CHANGED
@@ -49,8 +49,12 @@ extension ingest port.
49
49
 
50
50
  ## Tools
51
51
 
52
- - `get_recent_activity(limit)` — last N clicks as annotated screenshots + a log
53
- (URL, page title, clicked element, text)
52
+ - `get_recent_activity(limit)` — last N frames as screenshots + a log (URL, page
53
+ title, clicked element/text; periodic frames are labeled)
54
+ - `start_watching(task)` — Claude asks to watch the user perform a task. Turns on
55
+ recording + periodic frames in the extension (with a visible "Claude is watching"
56
+ indicator). The MCP client prompts the user to approve this.
57
+ - `stop_watching()` — end the watch session
54
58
  - `clear_activity()` — wipe the buffer
55
59
 
56
60
  ## Ingest API (used by the extension)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clickshot-mcp",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "Local MCP server that feeds ClickShot screenshots into your running Claude (Code/Desktop) on your own subscription.",
5
5
  "type": "module",
6
6
  "bin": {
package/server.js CHANGED
@@ -18,7 +18,9 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
18
18
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
19
19
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
20
20
 
21
- const VERSION = "0.1.1";
21
+ const VERSION = "0.1.3";
22
+
23
+ const WATCH_INTERVAL_MS = 4000; // periodic-frame cadence the extension uses while watching
22
24
 
23
25
  // stdout is the JSON-RPC channel in stdio mode, so all logging goes to stderr.
24
26
  const log = (...a) => console.error(...a);
@@ -52,6 +54,49 @@ const MAX_BUFFER = 200; // keep the most recent N captures in memory
52
54
  const captures = [];
53
55
  let nextId = 1;
54
56
 
57
+ // Claude-initiated "watch" session state. The extension holds an SSE stream
58
+ // (GET /events) and starts recording (+ periodic frames) when `on` is true.
59
+ // Turned on by the start_watching tool (which the MCP client gates with a user
60
+ // approval prompt) and off by stop_watching or by the user via the extension.
61
+ const watch = { on: false, task: null, since: null };
62
+
63
+ // SSE clients (the extension's background worker). The stream keeps the MV3
64
+ // service worker alive and lets the server push state changes + frame ticks.
65
+ const sseClients = new Set();
66
+
67
+ function sseSend(res, event, data) {
68
+ try {
69
+ res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
70
+ } catch (e) {
71
+ /* client gone; cleaned up on 'close' */
72
+ }
73
+ }
74
+
75
+ function broadcast(event, data) {
76
+ for (const res of sseClients) sseSend(res, event, data);
77
+ }
78
+
79
+ function watchState() {
80
+ return { watching: watch.on, task: watch.task, since: watch.since, intervalMs: WATCH_INTERVAL_MS };
81
+ }
82
+
83
+ function setWatch(on, task) {
84
+ watch.on = !!on;
85
+ watch.task = on ? task || null : null;
86
+ watch.since = on ? Date.now() : null;
87
+ broadcast("state", watchState()); // push the change to the extension immediately
88
+ }
89
+
90
+ // One process-wide ticker drives the push channel:
91
+ // • every second: while watching, emit a frame tick on the 4s cadence
92
+ // • every ~12s: emit a state heartbeat so the worker stays alive even when idle
93
+ let tick = 0;
94
+ setInterval(() => {
95
+ tick++;
96
+ if (watch.on && tick % Math.round(WATCH_INTERVAL_MS / 1000) === 0) broadcast("frame", { t: tick });
97
+ if (tick % 12 === 0) broadcast("state", watchState());
98
+ }, 1000);
99
+
55
100
  // ---------------------------------------------------------------------------
56
101
  // MCP server definition (tools)
57
102
  // ---------------------------------------------------------------------------
@@ -91,22 +136,26 @@ function buildMcpServer() {
91
136
  {
92
137
  type: "text",
93
138
  text:
94
- `Most recent ${recent.length} browser click(s), oldest first. ` +
95
- `Each screenshot has a red marker at the click point.`,
139
+ `Most recent ${recent.length} browser frame(s), oldest first. ` +
140
+ `Click frames have a red marker at the click point; periodic frames ` +
141
+ `(captured while watching) do not.`,
96
142
  },
97
143
  ];
98
144
 
99
145
  for (const c of recent) {
100
146
  const m = c.meta || {};
101
147
  const when = m.timestamp ? new Date(m.timestamp).toISOString() : "unknown time";
102
- const what =
103
- (m.tag ? `<${m.tag}>` : "element") +
104
- (m.text ? ` "${m.text}"` : "") +
105
- (m.selector ? ` [${m.selector}]` : "");
106
- content.push({
107
- type: "text",
108
- text: `#${c.id} ${when} ${m.title || ""}\n ${m.url || ""}\n clicked ${what}`,
109
- });
148
+ let line;
149
+ if (m.kind === "interval") {
150
+ line = `#${c.id} ${when} — ${m.title || ""}\n ${m.url || ""}\n (periodic frame)`;
151
+ } else {
152
+ const what =
153
+ (m.tag ? `<${m.tag}>` : "element") +
154
+ (m.text ? ` "${m.text}"` : "") +
155
+ (m.selector ? ` [${m.selector}]` : "");
156
+ line = `#${c.id} ${when} — ${m.title || ""}\n ${m.url || ""}\n clicked ${what}`;
157
+ }
158
+ content.push({ type: "text", text: line });
110
159
  content.push({ type: "image", data: c.image, mimeType: c.mimeType || "image/jpeg" });
111
160
  }
112
161
 
@@ -114,6 +163,56 @@ function buildMcpServer() {
114
163
  }
115
164
  );
116
165
 
166
+ server.registerTool(
167
+ "start_watching",
168
+ {
169
+ title: "Watch the user perform a task",
170
+ description:
171
+ "Begin observing the user's browser so you can see how they perform a task you've proposed. " +
172
+ "Use this AFTER suggesting a concrete task and getting the user's go-ahead: it turns on " +
173
+ "recording in their ClickShot extension (with a visible on-screen 'Claude is watching' " +
174
+ "indicator) and captures their clicks plus periodic screenshots. When the user says they're " +
175
+ "done, call get_recent_activity to review how they did it, then stop_watching. " +
176
+ "The user is shown an approval prompt for this action and can stop watching at any time.",
177
+ inputSchema: {
178
+ task: z.string().describe("The task you are about to watch the user perform, e.g. 'file an expense report'."),
179
+ },
180
+ },
181
+ async ({ task }) => {
182
+ setWatch(true, task);
183
+ return {
184
+ content: [
185
+ {
186
+ type: "text",
187
+ text:
188
+ `Now watching for: "${task}".\n` +
189
+ `The ClickShot extension will start recording within a couple of seconds and show a ` +
190
+ `"Claude is watching" indicator. Ask the user to perform the task now. When they say ` +
191
+ `they're done, call get_recent_activity to review the steps, then stop_watching.`,
192
+ },
193
+ ],
194
+ };
195
+ }
196
+ );
197
+
198
+ server.registerTool(
199
+ "stop_watching",
200
+ {
201
+ title: "Stop watching",
202
+ description: "End the current watch session. The extension stops recording and hides the watching indicator.",
203
+ inputSchema: {},
204
+ },
205
+ async () => {
206
+ const was = watch.task;
207
+ setWatch(false);
208
+ return {
209
+ content: [
210
+ { type: "text", text: was ? `Stopped watching "${was}".` : "Watching was not active." },
211
+ ],
212
+ };
213
+ }
214
+ );
215
+
117
216
  server.registerTool(
118
217
  "clear_activity",
119
218
  {
@@ -147,7 +246,35 @@ function buildIngestApp() {
147
246
  res.json({ ok: true, buffered: captures.length });
148
247
  });
149
248
 
150
- app.get("/health", (_req, res) => res.json({ ok: true, captures: captures.length, mode: STDIO ? "stdio" : "http" }));
249
+ app.get("/health", (_req, res) => res.json({ ok: true, captures: captures.length, mode: STDIO ? "stdio" : "http", watching: watch.on }));
250
+
251
+ // The extension polls this to know whether Claude has asked to watch.
252
+ app.get("/watch", (_req, res) =>
253
+ res.json({ watching: watch.on, task: watch.task, since: watch.since, intervalMs: WATCH_INTERVAL_MS })
254
+ );
255
+
256
+ // Lets the extension (the user's Stop button) revoke a watch session.
257
+ app.post("/watch", (req, res) => {
258
+ const on = !!(req.body && req.body.watching);
259
+ setWatch(on, req.body && req.body.task);
260
+ res.json({ ok: true, watching: watch.on });
261
+ });
262
+
263
+ // SSE push channel. The extension holds this open; it keeps the MV3 service
264
+ // worker alive and receives `state` (watch on/off) and `frame` (capture now)
265
+ // events. Replaces the old setInterval polling, which died when the worker slept.
266
+ app.get("/events", (req, res) => {
267
+ res.set({
268
+ "Content-Type": "text/event-stream",
269
+ "Cache-Control": "no-cache, no-transform",
270
+ Connection: "keep-alive",
271
+ });
272
+ res.flushHeaders?.();
273
+ sseClients.add(res);
274
+ sseSend(res, "state", watchState()); // sync current state on connect
275
+ req.on("close", () => sseClients.delete(res));
276
+ });
277
+
151
278
  return app;
152
279
  }
153
280