@thinkrun/mcp 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,12 @@
5
5
  * script injection attempts. The server-side also applies JSON.stringify()
6
6
  * escaping when embedding selectors in error messages (defense-in-depth).
7
7
  */
8
+ import { existsSync, readFileSync } from 'node:fs';
9
+ import { homedir } from 'node:os';
10
+ import { join } from 'node:path';
8
11
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
12
+ import { readTabAuditState, resolveTabAuditState, writeTabAuditState, } from '@thinkrun/shared/audit-state';
13
+ import { patchLocalAuditState } from '@thinkrun/shared/local-audit-sync';
9
14
  import { z } from 'zod';
10
15
  /** Zod schema for CSS selectors — rejects obvious script injection patterns. */
11
16
  const cssSelector = z
@@ -59,6 +64,75 @@ export const MAX_GET_HTML_CHARS = 200_000;
59
64
  export const MAX_SLEEP_MS = 30_000;
60
65
  const LOCAL_TOOL_ENRICH_TIMEOUT_MS = 300;
61
66
  const LOCAL_TOOL_ENRICH_BRIDGE_TIMEOUT_MS = 150;
67
+ function getThinkrunLockDir() {
68
+ // Keep this fallback sequence aligned with packages/shared/src/audit-state.js#getLockDir.
69
+ const configuredDir = process.env.THINKRUN_LOCK_DIR ?? process.env.THINKBROWSE_LOCK_DIR;
70
+ if (configuredDir)
71
+ return configuredDir;
72
+ const thinkrunDir = join(homedir(), '.thinkrun');
73
+ const thinkbrowseDir = join(homedir(), '.thinkbrowse');
74
+ if (!existsSync(thinkrunDir) && existsSync(thinkbrowseDir)) {
75
+ return thinkbrowseDir;
76
+ }
77
+ return thinkrunDir;
78
+ }
79
+ function getCliSessionFilePath(tabId) {
80
+ const safeTabId = tabId.replace(/[^a-zA-Z0-9_-]/g, '_') || 'unknown';
81
+ return join(getThinkrunLockDir(), `local-session-${safeTabId}.json`);
82
+ }
83
+ function readPersistedCliSessionId(tabId) {
84
+ try {
85
+ const filePath = getCliSessionFilePath(tabId);
86
+ if (!existsSync(filePath))
87
+ return undefined;
88
+ const parsed = JSON.parse(readFileSync(filePath, 'utf-8'));
89
+ return typeof parsed.sessionId === 'string' && parsed.sessionId.length > 0
90
+ ? parsed.sessionId
91
+ : undefined;
92
+ }
93
+ catch {
94
+ return undefined;
95
+ }
96
+ }
97
+ function getThinkrunApiCredentials() {
98
+ const envApiKey = process.env.THINKRUN_API_KEY?.trim();
99
+ const envApiUrl = process.env.THINKRUN_BASE_URL?.trim();
100
+ if (envApiKey) {
101
+ const apiUrl = envApiUrl || 'https://api.thinkrun.ai';
102
+ return apiUrl.startsWith('https://') ? { apiKey: envApiKey, apiUrl } : null;
103
+ }
104
+ const configPaths = [
105
+ join(process.env.THINKRUN_CONFIG_DIR || join(homedir(), '.config', 'thinkrun'), 'config.json'),
106
+ join(homedir(), '.config', 'thinkbrowse', 'config.json'),
107
+ ];
108
+ for (const configPath of configPaths) {
109
+ try {
110
+ if (!existsSync(configPath))
111
+ continue;
112
+ const parsed = JSON.parse(readFileSync(configPath, 'utf-8'));
113
+ const apiKey = typeof parsed.apiKey === 'string' ? parsed.apiKey.trim() : '';
114
+ if (!apiKey)
115
+ continue;
116
+ const apiUrl = typeof parsed.apiUrl === 'string' && parsed.apiUrl.trim()
117
+ ? parsed.apiUrl.trim()
118
+ : 'https://api.thinkrun.ai';
119
+ if (!apiUrl.startsWith('https://'))
120
+ return null;
121
+ return { apiKey, apiUrl };
122
+ }
123
+ catch {
124
+ // best-effort lookup only
125
+ }
126
+ }
127
+ return null;
128
+ }
129
+ function syncPersistedCliAuditState(sessionId, enabled, updatedAt, fetchFn = globalThis.fetch) {
130
+ const creds = getThinkrunApiCredentials();
131
+ if (!creds)
132
+ return Promise.resolve(false);
133
+ const { apiKey, apiUrl } = creds;
134
+ return patchLocalAuditState({ sessionId, enabled, updatedAt, apiKey, apiUrl, fetchFn });
135
+ }
62
136
  function withTimeout(promise, timeoutMs, label) {
63
137
  let timer;
64
138
  const timeout = new Promise((_, reject) => {
@@ -251,6 +325,7 @@ export async function handleWindowNew(client, args, defaultSessionRef) {
251
325
  // Anchored to prevent false positives on cloud session IDs that end in digits
252
326
  // (e.g. `remote-session-42` would match the unanchored form and produce a tab ID).
253
327
  const LOCAL_SESSION_TAB_ID_RE = /^local-(\d+)$/;
328
+ const EXTENSION_PROXY_SESSION_TAB_ID_RE = /^ext-proxy-(\d+)$/;
254
329
  function extractLocalTabIdFromSessionId(sessionId) {
255
330
  // Local MCP session IDs are expected to end in the underlying tab ID.
256
331
  // `focus` depends on this convention to foreground the bound local tab
@@ -261,6 +336,84 @@ function extractLocalTabIdFromSessionId(sessionId) {
261
336
  const parsed = Number(match[1]);
262
337
  return Number.isInteger(parsed) && parsed > 0 ? parsed : undefined;
263
338
  }
339
+ function extractAuditableTabIdFromSessionId(sessionId) {
340
+ const localMatch = sessionId.match(LOCAL_SESSION_TAB_ID_RE);
341
+ if (localMatch) {
342
+ const parsed = Number(localMatch[1]);
343
+ return Number.isInteger(parsed) && parsed > 0 ? String(parsed) : undefined;
344
+ }
345
+ const extensionProxyMatch = sessionId.match(EXTENSION_PROXY_SESSION_TAB_ID_RE);
346
+ if (extensionProxyMatch) {
347
+ const parsed = Number(extensionProxyMatch[1]);
348
+ return Number.isInteger(parsed) && parsed > 0 ? String(parsed) : undefined;
349
+ }
350
+ return undefined;
351
+ }
352
+ function buildSessionAuditPayload(sessionId) {
353
+ const tabId = extractAuditableTabIdFromSessionId(sessionId);
354
+ if (!tabId)
355
+ return undefined;
356
+ const resolved = resolveTabAuditState({ tabId });
357
+ const stored = readTabAuditState(tabId);
358
+ return {
359
+ tabId,
360
+ enabled: resolved.enabled,
361
+ source: resolved.source,
362
+ storedEnabled: stored?.enabled,
363
+ storedUpdatedAt: stored?.updatedAt,
364
+ };
365
+ }
366
+ export async function handleSetAuditing(args, sidResolver) {
367
+ try {
368
+ const sessionId = sidResolver(args);
369
+ const tabId = extractAuditableTabIdFromSessionId(sessionId);
370
+ if (!tabId) {
371
+ return {
372
+ content: [{
373
+ type: 'text',
374
+ text: JSON.stringify({
375
+ success: false,
376
+ error: `Session ${sessionId} is not backed by a local tab`,
377
+ }, null, 2),
378
+ }],
379
+ isError: true,
380
+ };
381
+ }
382
+ const stored = writeTabAuditState(tabId, args.enabled, undefined, { explicit: true });
383
+ const persistedCliSessionId = readPersistedCliSessionId(tabId);
384
+ if (persistedCliSessionId) {
385
+ syncPersistedCliAuditState(persistedCliSessionId, args.enabled, stored.updatedAt).catch(() => {
386
+ console.error('[thinkrun-mcp] set_auditing backend sync failed');
387
+ });
388
+ }
389
+ const resolved = resolveTabAuditState({ tabId });
390
+ return {
391
+ content: [{
392
+ type: 'text',
393
+ text: JSON.stringify({
394
+ success: true,
395
+ sessionId,
396
+ tabId,
397
+ enabled: resolved.enabled,
398
+ source: resolved.source,
399
+ storedEnabled: stored.enabled,
400
+ storedUpdatedAt: stored.updatedAt,
401
+ }, null, 2),
402
+ }],
403
+ isError: false,
404
+ };
405
+ }
406
+ catch (err) {
407
+ const message = err instanceof Error ? err.message : String(err);
408
+ return {
409
+ content: [{
410
+ type: 'text',
411
+ text: JSON.stringify({ success: false, error: message }, null, 2),
412
+ }],
413
+ isError: true,
414
+ };
415
+ }
416
+ }
264
417
  export async function handleTabFocus(client, args, sidResolver, defaultSessionRef) {
265
418
  try {
266
419
  const sessionId = sidResolver(args);
@@ -475,10 +628,6 @@ export function createServer(client, options) {
475
628
  // The swap is not atomic with in-flight tool calls. MCP stdio uses sequential request/response,
476
629
  // so concurrent calls are not expected, but future transports may allow pipelining.
477
630
  const c = clientRef ? () => clientRef.current : () => client;
478
- const selectorScreenshotSupport = options?.selectorScreenshotSupport ?? (() => {
479
- const active = c();
480
- return active.capabilities?.selectorScreenshots !== false;
481
- });
482
631
  const requiredSessionIdSchema = z.string().describe('The session ID');
483
632
  const defaultableSessionIdSchema = z.string().optional().describe('The session ID (auto-injected if omitted when a default session exists)');
484
633
  /** Resolve session ID: use provided value, then ref, then initial default. */
@@ -539,11 +688,13 @@ export function createServer(client, options) {
539
688
  sessionId: defaultableSessionIdSchema,
540
689
  },
541
690
  }, async (args) => {
542
- const result = await c().getSession(sid(args));
691
+ const sessionId = sid(args);
692
+ const result = await c().getSession(sessionId);
693
+ const audit = buildSessionAuditPayload(sessionId);
543
694
  return {
544
695
  content: [{
545
696
  type: 'text',
546
- text: JSON.stringify(result, null, 2),
697
+ text: JSON.stringify(audit ? { ...result, audit } : result, null, 2),
547
698
  }],
548
699
  };
549
700
  });
@@ -663,6 +814,15 @@ export function createServer(client, options) {
663
814
  }],
664
815
  };
665
816
  });
817
+ server.registerTool('set_auditing', {
818
+ title: 'Set Audit Mode',
819
+ description: 'Enable or disable local audit mode for the current local tab-backed session. ' +
820
+ 'Uses the same persisted audit toggle as the ThinkRun CLI.',
821
+ inputSchema: {
822
+ sessionId: defaultableSessionIdSchema,
823
+ enabled: z.boolean().describe('Whether audit mode should be enabled for this local tab'),
824
+ },
825
+ }, async (args) => handleSetAuditing(args, sid));
666
826
  // ================================================================
667
827
  // Mode switching (only registered when clientRef + onSetMode provided)
668
828
  // ================================================================
@@ -845,6 +1005,18 @@ export function createServer(client, options) {
845
1005
  .optional()
846
1006
  .describe('Mouse button (default: left)'),
847
1007
  clickCount: z.number().optional().describe('Number of clicks (2 for double-click)'),
1008
+ delay: z
1009
+ .number()
1010
+ .int()
1011
+ .min(0)
1012
+ .optional()
1013
+ .describe('Delay between mouse down and up in milliseconds'),
1014
+ timeout: z
1015
+ .number()
1016
+ .int()
1017
+ .min(1)
1018
+ .optional()
1019
+ .describe('Command timeout in milliseconds'),
848
1020
  captureHtml: z.boolean().optional().describe('Capture page HTML after click'),
849
1021
  },
850
1022
  }, async (args) => {
@@ -857,6 +1029,42 @@ export function createServer(client, options) {
857
1029
  }],
858
1030
  };
859
1031
  });
1032
+ server.registerTool('click_at', {
1033
+ title: 'Click At Coordinates',
1034
+ description: 'Click at viewport-relative CSS pixel coordinates. Local mode only. ' +
1035
+ 'Use this when semantic element targeting is unavailable and you need a precise coordinate click.',
1036
+ inputSchema: {
1037
+ sessionId: defaultableSessionIdSchema,
1038
+ x: z.number().int().describe('Viewport-relative X coordinate in CSS pixels'),
1039
+ y: z.number().int().describe('Viewport-relative Y coordinate in CSS pixels'),
1040
+ thought: z.string().optional().describe('Why you are clicking at these coordinates'),
1041
+ button: z
1042
+ .enum(['left', 'right', 'middle'])
1043
+ .optional()
1044
+ .describe('Mouse button (default: left)'),
1045
+ delay: z
1046
+ .number()
1047
+ .int()
1048
+ .min(0)
1049
+ .optional()
1050
+ .describe('Delay between mouse down and up in milliseconds'),
1051
+ timeout: z
1052
+ .number()
1053
+ .int()
1054
+ .min(1)
1055
+ .optional()
1056
+ .describe('Command timeout in milliseconds'),
1057
+ },
1058
+ }, async (args) => {
1059
+ const { sessionId: _sid, ...params } = args;
1060
+ const result = await c().clickAt(sid(args), params);
1061
+ return {
1062
+ content: [{
1063
+ type: 'text',
1064
+ text: JSON.stringify(result, null, 2),
1065
+ }],
1066
+ };
1067
+ });
860
1068
  server.registerTool('type_text', {
861
1069
  title: 'Type Text',
862
1070
  description: 'Type text into an element (appends to existing text, triggers key events). ' +
@@ -865,6 +1073,12 @@ export function createServer(client, options) {
865
1073
  sessionId: defaultableSessionIdSchema,
866
1074
  selector: cssSelector.describe('CSS selector of the input element'),
867
1075
  text: z.string().describe('Text to type'),
1076
+ delay: z
1077
+ .number()
1078
+ .int()
1079
+ .min(0)
1080
+ .optional()
1081
+ .describe('Delay between typed characters in milliseconds'),
868
1082
  thought: z.string().optional().describe('Why you are typing this'),
869
1083
  },
870
1084
  }, async (args) => {
@@ -921,12 +1135,28 @@ export function createServer(client, options) {
921
1135
  inputSchema: {
922
1136
  sessionId: defaultableSessionIdSchema,
923
1137
  selector: cssSelector.optional().describe('CSS selector to scroll within (default: page)'),
1138
+ to: cssSelector.optional().describe('CLI-style alias: scroll this element into view'),
1139
+ direction: z
1140
+ .enum(['up', 'down'])
1141
+ .optional()
1142
+ .describe('CLI-style vertical scroll direction (use with amount)'),
1143
+ amount: z
1144
+ .number()
1145
+ .int()
1146
+ .positive()
1147
+ .optional()
1148
+ .describe('CLI-style vertical scroll distance in pixels (default: 500 when direction is set)'),
924
1149
  x: z.number().optional().describe('Horizontal scroll pixels'),
925
1150
  y: z.number().optional().describe('Vertical scroll pixels (positive = down)'),
926
1151
  thought: z.string().optional().describe('Why you are scrolling'),
927
1152
  },
928
1153
  }, async (args) => {
929
- const { sessionId: _sid, ...params } = args;
1154
+ const { sessionId: _sid, to, direction, amount, ...rest } = args;
1155
+ const params = {
1156
+ ...rest,
1157
+ selector: rest.selector ?? to,
1158
+ y: direction ? ((direction === 'up' ? -1 : 1) * (amount ?? 500)) : rest.y,
1159
+ };
930
1160
  const result = await c().scroll(sid(args), params);
931
1161
  return {
932
1162
  content: [{
@@ -1003,16 +1233,18 @@ export function createServer(client, options) {
1003
1233
  });
1004
1234
  server.registerTool('screenshot', {
1005
1235
  title: 'Take Screenshot',
1006
- description: 'Capture a screenshot of the current page. Returns a base64-encoded image. ' +
1236
+ description: 'Capture a screenshot of the current page. Returns an image plus any available artifact metadata. ' +
1007
1237
  'Use fullPage to capture the entire scrollable page.',
1008
1238
  inputSchema: {
1009
1239
  sessionId: defaultableSessionIdSchema,
1010
- selector: cssSelector.optional().describe('Optional CSS selector to capture a single element instead of the full viewport'),
1011
1240
  type: z
1012
1241
  .enum(['png', 'jpeg', 'webp'])
1013
1242
  .optional()
1014
1243
  .describe('Image format (default: png)'),
1015
1244
  fullPage: z.boolean().optional().describe('Capture full scrollable page'),
1245
+ selector: cssSelector
1246
+ .optional()
1247
+ .describe('Capture a specific element by CSS selector (mutually exclusive with fullPage)'),
1016
1248
  quality: z
1017
1249
  .number()
1018
1250
  .min(0)
@@ -1028,67 +1260,34 @@ export function createServer(client, options) {
1028
1260
  },
1029
1261
  }, async (args) => {
1030
1262
  const { sessionId: _sid, ...params } = args;
1031
- const activeClient = c();
1032
- if (params.selector && !selectorScreenshotSupport()) {
1033
- return {
1034
- content: [{
1035
- type: 'text',
1036
- text: JSON.stringify({
1037
- success: false,
1038
- error: 'Selector screenshots are currently unsupported in cloud mode. Use a default/full-page screenshot in cloud mode, or switch to local mode for selector capture.',
1039
- }, null, 2),
1040
- }],
1041
- isError: true,
1042
- };
1043
- }
1044
- let result;
1045
- try {
1046
- result = await activeClient.screenshot(sid(args), params);
1047
- }
1048
- catch (error) {
1049
- const retryable = typeof error === 'object'
1050
- && error !== null
1051
- && 'retryable' in error
1052
- && error.retryable === true;
1053
- return {
1054
- content: [{
1055
- type: 'text',
1056
- text: JSON.stringify({
1057
- success: false,
1058
- error: error instanceof Error ? error.message : String(error),
1059
- retryable,
1060
- }, null, 2),
1061
- }],
1062
- isError: true,
1063
- };
1064
- }
1263
+ const result = await c().screenshot(sid(args), params);
1065
1264
  if (result.screenshot) {
1066
- const metadata = {};
1067
- if (result.artifactId)
1068
- metadata.artifactId = result.artifactId;
1069
- if (result.url)
1070
- metadata.url = result.url;
1071
- if (result.format)
1072
- metadata.format = result.format;
1073
- if (typeof result.size === 'number')
1074
- metadata.size = result.size;
1075
- if (result.warning)
1076
- metadata.warning = result.warning;
1265
+ const format = result.format || params.type || 'png';
1266
+ const mimeType = result.mimeType || `image/${format}`;
1267
+ const metadata = {
1268
+ artifactId: result.artifactId,
1269
+ url: result.url ?? result.publicUrl,
1270
+ format: result.format,
1271
+ size: result.size,
1272
+ warning: result.warning,
1273
+ };
1274
+ const metadataText = Object.values(metadata).some((value) => value !== undefined)
1275
+ ? JSON.stringify(metadata, null, 2)
1276
+ : null;
1077
1277
  return {
1078
1278
  content: [
1079
1279
  {
1080
1280
  type: 'image',
1081
1281
  data: result.screenshot,
1082
- mimeType: result.mimeType || `image/${params.type || 'png'}`,
1282
+ mimeType,
1083
1283
  },
1084
- ...(Object.keys(metadata).length > 0
1284
+ ...(metadataText
1085
1285
  ? [{
1086
1286
  type: 'text',
1087
- text: JSON.stringify(metadata, null, 2),
1287
+ text: metadataText,
1088
1288
  }]
1089
1289
  : []),
1090
1290
  ],
1091
- isError: false,
1092
1291
  };
1093
1292
  }
1094
1293
  return {
@@ -1106,16 +1305,32 @@ export function createServer(client, options) {
1106
1305
  inputSchema: {
1107
1306
  sessionId: defaultableSessionIdSchema,
1108
1307
  type: z
1109
- .enum(['text', 'html', 'dom', 'evaluate'])
1308
+ .enum(['text', 'html', 'dom', 'evaluate', 'attribute'])
1110
1309
  .optional()
1111
1310
  .describe('Extraction type (default: text)'),
1311
+ format: z
1312
+ .enum(['text', 'json', 'html'])
1313
+ .optional()
1314
+ .describe('CLI-style alias for output mode. "json" keeps structured extraction results.'),
1112
1315
  selector: cssSelector.optional().describe('CSS selector to scope extraction'),
1113
1316
  script: z.string().optional().describe('JavaScript expression (when type=evaluate)'),
1114
1317
  attribute: z.string().optional().describe('HTML attribute to extract'),
1318
+ attr: z.string().optional().describe('CLI-style alias for attribute extraction'),
1115
1319
  multiple: z.boolean().optional().describe('Extract from all matching elements'),
1320
+ all: z.boolean().optional().describe('CLI-style alias for multiple extraction'),
1116
1321
  },
1117
1322
  }, async (args) => {
1118
- const { sessionId: _sid, ...params } = args;
1323
+ const { sessionId: _sid, format, attr, all, ...rest } = args;
1324
+ const attribute = rest.attribute ?? attr;
1325
+ const multiple = rest.multiple ?? all;
1326
+ const type = rest.type
1327
+ ?? (attribute ? 'attribute' : format === 'html' ? 'html' : format === 'json' ? 'text' : 'text');
1328
+ const params = {
1329
+ ...rest,
1330
+ type,
1331
+ attribute,
1332
+ multiple,
1333
+ };
1119
1334
  const result = await c().extract(sid(args), params);
1120
1335
  const extracted = result.text || result.html
1121
1336
  || (result.data != null ? JSON.stringify(result.data, null, 2) : null);
@@ -1132,9 +1347,15 @@ export function createServer(client, options) {
1132
1347
  inputSchema: {
1133
1348
  sessionId: defaultableSessionIdSchema,
1134
1349
  script: z.string().describe('JavaScript code to execute in the page'),
1350
+ timeout: z
1351
+ .number()
1352
+ .int()
1353
+ .min(1)
1354
+ .optional()
1355
+ .describe('Command timeout in milliseconds'),
1135
1356
  },
1136
1357
  }, async (args) => {
1137
- const result = await c().evaluate(sid(args), { script: args.script });
1358
+ const result = await c().evaluate(sid(args), { script: args.script, timeout: args.timeout });
1138
1359
  let text;
1139
1360
  if (result.error) {
1140
1361
  text = result.error;