@thinkrun/mcp 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,12 @@
5
5
  * script injection attempts. The server-side also applies JSON.stringify()
6
6
  * escaping when embedding selectors in error messages (defense-in-depth).
7
7
  */
8
+ import { existsSync, readFileSync } from 'node:fs';
9
+ import { homedir } from 'node:os';
10
+ import { join } from 'node:path';
8
11
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
12
+ import { readTabAuditState, resolveTabAuditState, writeTabAuditState, } from '@thinkrun/shared/audit-state';
13
+ import { patchLocalAuditState } from '@thinkrun/shared/local-audit-sync';
9
14
  import { z } from 'zod';
10
15
  /** Zod schema for CSS selectors — rejects obvious script injection patterns. */
11
16
  const cssSelector = z
@@ -59,6 +64,75 @@ export const MAX_GET_HTML_CHARS = 200_000;
59
64
  export const MAX_SLEEP_MS = 30_000;
60
65
  const LOCAL_TOOL_ENRICH_TIMEOUT_MS = 300;
61
66
  const LOCAL_TOOL_ENRICH_BRIDGE_TIMEOUT_MS = 150;
67
+ function getThinkrunLockDir() {
68
+ // Keep this fallback sequence aligned with packages/shared/src/audit-state.js#getLockDir.
69
+ const configuredDir = process.env.THINKRUN_LOCK_DIR ?? process.env.THINKBROWSE_LOCK_DIR;
70
+ if (configuredDir)
71
+ return configuredDir;
72
+ const thinkrunDir = join(homedir(), '.thinkrun');
73
+ const thinkbrowseDir = join(homedir(), '.thinkbrowse');
74
+ if (!existsSync(thinkrunDir) && existsSync(thinkbrowseDir)) {
75
+ return thinkbrowseDir;
76
+ }
77
+ return thinkrunDir;
78
+ }
79
+ function getCliSessionFilePath(tabId) {
80
+ const safeTabId = tabId.replace(/[^a-zA-Z0-9_-]/g, '_') || 'unknown';
81
+ return join(getThinkrunLockDir(), `local-session-${safeTabId}.json`);
82
+ }
83
+ function readPersistedCliSessionId(tabId) {
84
+ try {
85
+ const filePath = getCliSessionFilePath(tabId);
86
+ if (!existsSync(filePath))
87
+ return undefined;
88
+ const parsed = JSON.parse(readFileSync(filePath, 'utf-8'));
89
+ return typeof parsed.sessionId === 'string' && parsed.sessionId.length > 0
90
+ ? parsed.sessionId
91
+ : undefined;
92
+ }
93
+ catch {
94
+ return undefined;
95
+ }
96
+ }
97
+ function getThinkrunApiCredentials() {
98
+ const envApiKey = process.env.THINKRUN_API_KEY?.trim();
99
+ const envApiUrl = process.env.THINKRUN_BASE_URL?.trim();
100
+ if (envApiKey) {
101
+ const apiUrl = envApiUrl || 'https://api.thinkrun.ai';
102
+ return apiUrl.startsWith('https://') ? { apiKey: envApiKey, apiUrl } : null;
103
+ }
104
+ const configPaths = [
105
+ join(process.env.THINKRUN_CONFIG_DIR || join(homedir(), '.config', 'thinkrun'), 'config.json'),
106
+ join(homedir(), '.config', 'thinkbrowse', 'config.json'),
107
+ ];
108
+ for (const configPath of configPaths) {
109
+ try {
110
+ if (!existsSync(configPath))
111
+ continue;
112
+ const parsed = JSON.parse(readFileSync(configPath, 'utf-8'));
113
+ const apiKey = typeof parsed.apiKey === 'string' ? parsed.apiKey.trim() : '';
114
+ if (!apiKey)
115
+ continue;
116
+ const apiUrl = typeof parsed.apiUrl === 'string' && parsed.apiUrl.trim()
117
+ ? parsed.apiUrl.trim()
118
+ : 'https://api.thinkrun.ai';
119
+ if (!apiUrl.startsWith('https://'))
120
+ return null;
121
+ return { apiKey, apiUrl };
122
+ }
123
+ catch {
124
+ // best-effort lookup only
125
+ }
126
+ }
127
+ return null;
128
+ }
129
+ function syncPersistedCliAuditState(sessionId, enabled, updatedAt, fetchFn = globalThis.fetch) {
130
+ const creds = getThinkrunApiCredentials();
131
+ if (!creds)
132
+ return Promise.resolve(false);
133
+ const { apiKey, apiUrl } = creds;
134
+ return patchLocalAuditState({ sessionId, enabled, updatedAt, apiKey, apiUrl, fetchFn });
135
+ }
62
136
  function withTimeout(promise, timeoutMs, label) {
63
137
  let timer;
64
138
  const timeout = new Promise((_, reject) => {
@@ -251,6 +325,7 @@ export async function handleWindowNew(client, args, defaultSessionRef) {
251
325
  // Anchored to prevent false positives on cloud session IDs that end in digits
252
326
  // (e.g. `remote-session-42` would match the unanchored form and produce a tab ID).
253
327
  const LOCAL_SESSION_TAB_ID_RE = /^local-(\d+)$/;
328
+ const EXTENSION_PROXY_SESSION_TAB_ID_RE = /^ext-proxy-(\d+)$/;
254
329
  function extractLocalTabIdFromSessionId(sessionId) {
255
330
  // Local MCP session IDs are expected to end in the underlying tab ID.
256
331
  // `focus` depends on this convention to foreground the bound local tab
@@ -261,6 +336,84 @@ function extractLocalTabIdFromSessionId(sessionId) {
261
336
  const parsed = Number(match[1]);
262
337
  return Number.isInteger(parsed) && parsed > 0 ? parsed : undefined;
263
338
  }
339
+ function extractAuditableTabIdFromSessionId(sessionId) {
340
+ const localMatch = sessionId.match(LOCAL_SESSION_TAB_ID_RE);
341
+ if (localMatch) {
342
+ const parsed = Number(localMatch[1]);
343
+ return Number.isInteger(parsed) && parsed > 0 ? String(parsed) : undefined;
344
+ }
345
+ const extensionProxyMatch = sessionId.match(EXTENSION_PROXY_SESSION_TAB_ID_RE);
346
+ if (extensionProxyMatch) {
347
+ const parsed = Number(extensionProxyMatch[1]);
348
+ return Number.isInteger(parsed) && parsed > 0 ? String(parsed) : undefined;
349
+ }
350
+ return undefined;
351
+ }
352
+ function buildSessionAuditPayload(sessionId) {
353
+ const tabId = extractAuditableTabIdFromSessionId(sessionId);
354
+ if (!tabId)
355
+ return undefined;
356
+ const resolved = resolveTabAuditState({ tabId });
357
+ const stored = readTabAuditState(tabId);
358
+ return {
359
+ tabId,
360
+ enabled: resolved.enabled,
361
+ source: resolved.source,
362
+ storedEnabled: stored?.enabled,
363
+ storedUpdatedAt: stored?.updatedAt,
364
+ };
365
+ }
366
+ export async function handleSetAuditing(args, sidResolver) {
367
+ try {
368
+ const sessionId = sidResolver(args);
369
+ const tabId = extractAuditableTabIdFromSessionId(sessionId);
370
+ if (!tabId) {
371
+ return {
372
+ content: [{
373
+ type: 'text',
374
+ text: JSON.stringify({
375
+ success: false,
376
+ error: `Session ${sessionId} is not backed by a local tab`,
377
+ }, null, 2),
378
+ }],
379
+ isError: true,
380
+ };
381
+ }
382
+ const stored = writeTabAuditState(tabId, args.enabled, undefined, { explicit: true });
383
+ const persistedCliSessionId = readPersistedCliSessionId(tabId);
384
+ if (persistedCliSessionId) {
385
+ syncPersistedCliAuditState(persistedCliSessionId, args.enabled, stored.updatedAt).catch(() => {
386
+ console.error('[thinkrun-mcp] set_auditing backend sync failed');
387
+ });
388
+ }
389
+ const resolved = resolveTabAuditState({ tabId });
390
+ return {
391
+ content: [{
392
+ type: 'text',
393
+ text: JSON.stringify({
394
+ success: true,
395
+ sessionId,
396
+ tabId,
397
+ enabled: resolved.enabled,
398
+ source: resolved.source,
399
+ storedEnabled: stored.enabled,
400
+ storedUpdatedAt: stored.updatedAt,
401
+ }, null, 2),
402
+ }],
403
+ isError: false,
404
+ };
405
+ }
406
+ catch (err) {
407
+ const message = err instanceof Error ? err.message : String(err);
408
+ return {
409
+ content: [{
410
+ type: 'text',
411
+ text: JSON.stringify({ success: false, error: message }, null, 2),
412
+ }],
413
+ isError: true,
414
+ };
415
+ }
416
+ }
264
417
  export async function handleTabFocus(client, args, sidResolver, defaultSessionRef) {
265
418
  try {
266
419
  const sessionId = sidResolver(args);
@@ -535,11 +688,13 @@ export function createServer(client, options) {
535
688
  sessionId: defaultableSessionIdSchema,
536
689
  },
537
690
  }, async (args) => {
538
- const result = await c().getSession(sid(args));
691
+ const sessionId = sid(args);
692
+ const result = await c().getSession(sessionId);
693
+ const audit = buildSessionAuditPayload(sessionId);
539
694
  return {
540
695
  content: [{
541
696
  type: 'text',
542
- text: JSON.stringify(result, null, 2),
697
+ text: JSON.stringify(audit ? { ...result, audit } : result, null, 2),
543
698
  }],
544
699
  };
545
700
  });
@@ -659,6 +814,15 @@ export function createServer(client, options) {
659
814
  }],
660
815
  };
661
816
  });
817
+ server.registerTool('set_auditing', {
818
+ title: 'Set Audit Mode',
819
+ description: 'Enable or disable local audit mode for the current local tab-backed session. ' +
820
+ 'Uses the same persisted audit toggle as the ThinkRun CLI.',
821
+ inputSchema: {
822
+ sessionId: defaultableSessionIdSchema,
823
+ enabled: z.boolean().describe('Whether audit mode should be enabled for this local tab'),
824
+ },
825
+ }, async (args) => handleSetAuditing(args, sid));
662
826
  // ================================================================
663
827
  // Mode switching (only registered when clientRef + onSetMode provided)
664
828
  // ================================================================
@@ -841,6 +1005,18 @@ export function createServer(client, options) {
841
1005
  .optional()
842
1006
  .describe('Mouse button (default: left)'),
843
1007
  clickCount: z.number().optional().describe('Number of clicks (2 for double-click)'),
1008
+ delay: z
1009
+ .number()
1010
+ .int()
1011
+ .min(0)
1012
+ .optional()
1013
+ .describe('Delay between mouse down and up in milliseconds'),
1014
+ timeout: z
1015
+ .number()
1016
+ .int()
1017
+ .min(1)
1018
+ .optional()
1019
+ .describe('Command timeout in milliseconds'),
844
1020
  captureHtml: z.boolean().optional().describe('Capture page HTML after click'),
845
1021
  },
846
1022
  }, async (args) => {
@@ -853,6 +1029,42 @@ export function createServer(client, options) {
853
1029
  }],
854
1030
  };
855
1031
  });
1032
+ server.registerTool('click_at', {
1033
+ title: 'Click At Coordinates',
1034
+ description: 'Click at viewport-relative CSS pixel coordinates. Local mode only. ' +
1035
+ 'Use this when semantic element targeting is unavailable and you need a precise coordinate click.',
1036
+ inputSchema: {
1037
+ sessionId: defaultableSessionIdSchema,
1038
+ x: z.number().int().describe('Viewport-relative X coordinate in CSS pixels'),
1039
+ y: z.number().int().describe('Viewport-relative Y coordinate in CSS pixels'),
1040
+ thought: z.string().optional().describe('Why you are clicking at these coordinates'),
1041
+ button: z
1042
+ .enum(['left', 'right', 'middle'])
1043
+ .optional()
1044
+ .describe('Mouse button (default: left)'),
1045
+ delay: z
1046
+ .number()
1047
+ .int()
1048
+ .min(0)
1049
+ .optional()
1050
+ .describe('Delay between mouse down and up in milliseconds'),
1051
+ timeout: z
1052
+ .number()
1053
+ .int()
1054
+ .min(1)
1055
+ .optional()
1056
+ .describe('Command timeout in milliseconds'),
1057
+ },
1058
+ }, async (args) => {
1059
+ const { sessionId: _sid, ...params } = args;
1060
+ const result = await c().clickAt(sid(args), params);
1061
+ return {
1062
+ content: [{
1063
+ type: 'text',
1064
+ text: JSON.stringify(result, null, 2),
1065
+ }],
1066
+ };
1067
+ });
856
1068
  server.registerTool('type_text', {
857
1069
  title: 'Type Text',
858
1070
  description: 'Type text into an element (appends to existing text, triggers key events). ' +
@@ -861,6 +1073,12 @@ export function createServer(client, options) {
861
1073
  sessionId: defaultableSessionIdSchema,
862
1074
  selector: cssSelector.describe('CSS selector of the input element'),
863
1075
  text: z.string().describe('Text to type'),
1076
+ delay: z
1077
+ .number()
1078
+ .int()
1079
+ .min(0)
1080
+ .optional()
1081
+ .describe('Delay between typed characters in milliseconds'),
864
1082
  thought: z.string().optional().describe('Why you are typing this'),
865
1083
  },
866
1084
  }, async (args) => {
@@ -917,12 +1135,28 @@ export function createServer(client, options) {
917
1135
  inputSchema: {
918
1136
  sessionId: defaultableSessionIdSchema,
919
1137
  selector: cssSelector.optional().describe('CSS selector to scroll within (default: page)'),
1138
+ to: cssSelector.optional().describe('CLI-style alias: scroll this element into view'),
1139
+ direction: z
1140
+ .enum(['up', 'down'])
1141
+ .optional()
1142
+ .describe('CLI-style vertical scroll direction (use with amount)'),
1143
+ amount: z
1144
+ .number()
1145
+ .int()
1146
+ .positive()
1147
+ .optional()
1148
+ .describe('CLI-style vertical scroll distance in pixels (default: 500 when direction is set)'),
920
1149
  x: z.number().optional().describe('Horizontal scroll pixels'),
921
1150
  y: z.number().optional().describe('Vertical scroll pixels (positive = down)'),
922
1151
  thought: z.string().optional().describe('Why you are scrolling'),
923
1152
  },
924
1153
  }, async (args) => {
925
- const { sessionId: _sid, ...params } = args;
1154
+ const { sessionId: _sid, to, direction, amount, ...rest } = args;
1155
+ const params = {
1156
+ ...rest,
1157
+ selector: rest.selector ?? to,
1158
+ y: direction ? ((direction === 'up' ? -1 : 1) * (amount ?? 500)) : rest.y,
1159
+ };
926
1160
  const result = await c().scroll(sid(args), params);
927
1161
  return {
928
1162
  content: [{
@@ -999,7 +1233,7 @@ export function createServer(client, options) {
999
1233
  });
1000
1234
  server.registerTool('screenshot', {
1001
1235
  title: 'Take Screenshot',
1002
- description: 'Capture a screenshot of the current page. Returns a base64-encoded image. ' +
1236
+ description: 'Capture a screenshot of the current page. Returns an image plus any available artifact metadata. ' +
1003
1237
  'Use fullPage to capture the entire scrollable page.',
1004
1238
  inputSchema: {
1005
1239
  sessionId: defaultableSessionIdSchema,
@@ -1008,6 +1242,9 @@ export function createServer(client, options) {
1008
1242
  .optional()
1009
1243
  .describe('Image format (default: png)'),
1010
1244
  fullPage: z.boolean().optional().describe('Capture full scrollable page'),
1245
+ selector: cssSelector
1246
+ .optional()
1247
+ .describe('Capture a specific element by CSS selector (mutually exclusive with fullPage)'),
1011
1248
  quality: z
1012
1249
  .number()
1013
1250
  .min(0)
@@ -1025,12 +1262,32 @@ export function createServer(client, options) {
1025
1262
  const { sessionId: _sid, ...params } = args;
1026
1263
  const result = await c().screenshot(sid(args), params);
1027
1264
  if (result.screenshot) {
1265
+ const format = result.format || params.type || 'png';
1266
+ const mimeType = result.mimeType || `image/${format}`;
1267
+ const metadata = {
1268
+ artifactId: result.artifactId,
1269
+ url: result.url ?? result.publicUrl,
1270
+ format: result.format,
1271
+ size: result.size,
1272
+ warning: result.warning,
1273
+ };
1274
+ const metadataText = Object.values(metadata).some((value) => value !== undefined)
1275
+ ? JSON.stringify(metadata, null, 2)
1276
+ : null;
1028
1277
  return {
1029
- content: [{
1278
+ content: [
1279
+ {
1030
1280
  type: 'image',
1031
1281
  data: result.screenshot,
1032
- mimeType: `image/${params.type || 'png'}`,
1033
- }],
1282
+ mimeType,
1283
+ },
1284
+ ...(metadataText
1285
+ ? [{
1286
+ type: 'text',
1287
+ text: metadataText,
1288
+ }]
1289
+ : []),
1290
+ ],
1034
1291
  };
1035
1292
  }
1036
1293
  return {
@@ -1048,16 +1305,32 @@ export function createServer(client, options) {
1048
1305
  inputSchema: {
1049
1306
  sessionId: defaultableSessionIdSchema,
1050
1307
  type: z
1051
- .enum(['text', 'html', 'dom', 'evaluate'])
1308
+ .enum(['text', 'html', 'dom', 'evaluate', 'attribute'])
1052
1309
  .optional()
1053
1310
  .describe('Extraction type (default: text)'),
1311
+ format: z
1312
+ .enum(['text', 'json', 'html'])
1313
+ .optional()
1314
+ .describe('CLI-style alias for output mode. "json" keeps structured extraction results.'),
1054
1315
  selector: cssSelector.optional().describe('CSS selector to scope extraction'),
1055
1316
  script: z.string().optional().describe('JavaScript expression (when type=evaluate)'),
1056
1317
  attribute: z.string().optional().describe('HTML attribute to extract'),
1318
+ attr: z.string().optional().describe('CLI-style alias for attribute extraction'),
1057
1319
  multiple: z.boolean().optional().describe('Extract from all matching elements'),
1320
+ all: z.boolean().optional().describe('CLI-style alias for multiple extraction'),
1058
1321
  },
1059
1322
  }, async (args) => {
1060
- const { sessionId: _sid, ...params } = args;
1323
+ const { sessionId: _sid, format, attr, all, ...rest } = args;
1324
+ const attribute = rest.attribute ?? attr;
1325
+ const multiple = rest.multiple ?? all;
1326
+ const type = rest.type
1327
+ ?? (attribute ? 'attribute' : format === 'html' ? 'html' : format === 'json' ? 'text' : 'text');
1328
+ const params = {
1329
+ ...rest,
1330
+ type,
1331
+ attribute,
1332
+ multiple,
1333
+ };
1061
1334
  const result = await c().extract(sid(args), params);
1062
1335
  const extracted = result.text || result.html
1063
1336
  || (result.data != null ? JSON.stringify(result.data, null, 2) : null);
@@ -1074,9 +1347,15 @@ export function createServer(client, options) {
1074
1347
  inputSchema: {
1075
1348
  sessionId: defaultableSessionIdSchema,
1076
1349
  script: z.string().describe('JavaScript code to execute in the page'),
1350
+ timeout: z
1351
+ .number()
1352
+ .int()
1353
+ .min(1)
1354
+ .optional()
1355
+ .describe('Command timeout in milliseconds'),
1077
1356
  },
1078
1357
  }, async (args) => {
1079
- const result = await c().evaluate(sid(args), { script: args.script });
1358
+ const result = await c().evaluate(sid(args), { script: args.script, timeout: args.timeout });
1080
1359
  let text;
1081
1360
  if (result.error) {
1082
1361
  text = result.error;