@imenam/simple-scraper 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,6 +2,7 @@
2
2
  import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
3
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
4
  import dotenv from 'dotenv';
5
+ import fs from 'fs';
5
6
  import path from 'path';
6
7
  import { fileURLToPath } from 'url';
7
8
  import { z } from 'zod';
@@ -9,6 +10,7 @@ import { setupLogging } from './logger.js';
9
10
  import { getBrowser, closeBrowser, getDefaultTimeout } from './browser.js';
10
11
  import { loadAllCookies } from './cookies.js';
11
12
  import { GuiLauncher } from '@imenam/mcp-gui-interface';
13
+ import { createSession, getSession, closeSession, closeAllSessions, listSessions, getSessionLogs, clearSessionLogs } from './sessions.js';
12
14
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
13
15
  const rootDir = path.resolve(__dirname, '..');
14
16
  const envPath = path.join(rootDir, '.env');
@@ -25,6 +27,7 @@ const server = new McpServer({
25
27
  name: 'simple-scraper-mcp',
26
28
  version: '1.0.0',
27
29
  });
30
+ // ─── One-shot helpers ─────────────────────────────────────────────────────────
28
31
  async function setCookies(page, cookies, pageUrl) {
29
32
  const prepared = cookies.map(c => {
30
33
  if (!c.domain) {
@@ -40,6 +43,129 @@ async function applyCookies(page, pageUrl) {
40
43
  await setCookies(page, cookies, pageUrl);
41
44
  }
42
45
  }
46
+ // ─── Page-level helpers (shared between one-shot and session tools) ───────────
47
+ async function extractInputs(page, opts) {
48
+ return page.evaluate(({ selector: sel, showHidden }) => {
49
+ function extractMeta(el) {
50
+ if (el.tagName === 'SELECT') {
51
+ const select = el;
52
+ return {
53
+ tagName: 'SELECT',
54
+ type: select.type,
55
+ value: select.value,
56
+ options: Array.from(select.options).map(o => ({ value: o.value, text: o.text.trim() })),
57
+ };
58
+ }
59
+ if (el.type === 'checkbox' || el.type === 'radio') {
60
+ const input = el;
61
+ return { tagName: 'INPUT', type: input.type, value: input.value, checked: input.checked };
62
+ }
63
+ return { tagName: el.tagName, type: el.type || 'text', value: el.value };
64
+ }
65
+ function nestValue(obj, keys, value, isArray) {
66
+ if (!obj)
67
+ obj = {};
68
+ if (keys.length === 0)
69
+ return isArray ? [value] : value;
70
+ const key = keys[0];
71
+ if (keys.length === 1) {
72
+ if (isArray) {
73
+ return { ...obj, [key]: [...(obj[key] || []), value] };
74
+ }
75
+ return { ...obj, [key]: value };
76
+ }
77
+ return { ...obj, [key]: nestValue(obj[key] || {}, keys.slice(1), value, isArray) };
78
+ }
79
+ const scope = sel ? document.querySelector(sel) ?? document : document;
80
+ const query = showHidden ? 'input, select, textarea' : 'input:not([type=hidden]), select, textarea';
81
+ const inputs = Array.from(scope.querySelectorAll(query))
82
+ .filter(el => el.name);
83
+ const fields = inputs.reduce((page, el) => {
84
+ const keys = el.name.replace(/-/g, '_').replace(/\]/g, '').split('[');
85
+ const isArray = keys[keys.length - 1] === '';
86
+ if (isArray)
87
+ keys.pop();
88
+ return nestValue(page, keys, extractMeta(el), isArray);
89
+ }, {});
90
+ const buttons = Array.from((sel ? document.querySelector(sel) ?? document : document).querySelectorAll('button, input[type=submit]')).map(el => ({
91
+ tagName: el.tagName,
92
+ type: el.type || null,
93
+ name: el.name || null,
94
+ value: el.value || null,
95
+ text: el.textContent?.trim() || null,
96
+ id: el.id || null,
97
+ class: el.className || null,
98
+ }));
99
+ return { ...fields, buttons };
100
+ }, opts);
101
+ }
102
+ async function extractShowPage(page, opts) {
103
+ return page.evaluate((payload) => {
104
+ const { keysMap: km, boxSelector: bs, tablesMaxItems } = payload;
105
+ function cleanText(text) {
106
+ return text
107
+ .split('\n')
108
+ .map(s => s.trim())
109
+ .filter(Boolean)
110
+ .join(' ')
111
+ .trim();
112
+ }
113
+ function parseKeyValueBox(box) {
114
+ const result = {};
115
+ Array.from(box.querySelectorAll('.box-body > .row > div')).forEach(entry => {
116
+ const labelEl = entry.querySelector('strong');
117
+ const valueEl = entry.querySelector('p');
118
+ if (!labelEl)
119
+ return;
120
+ let key = cleanText(labelEl.textContent || '');
121
+ if (km[key])
122
+ key = km[key];
123
+ result[key] = valueEl ? cleanText(valueEl.textContent || '') : '';
124
+ });
125
+ return result;
126
+ }
127
+ function parseTableBox(table) {
128
+ const dataRows = Array.from(table.querySelectorAll('tbody tr'))
129
+ .filter(row => row.querySelector('td') !== null);
130
+ const theadThs = Array.from(table.querySelectorAll('thead tr th'));
131
+ let colKeys = null;
132
+ if (theadThs.length > 0) {
133
+ colKeys = theadThs.map((th, i) => cleanText(th.textContent || '') || `col_${i + 1}`);
134
+ }
135
+ else {
136
+ const firstBodyRow = table.querySelector('tbody tr');
137
+ if (firstBodyRow && firstBodyRow.querySelector('td') === null) {
138
+ const ths = Array.from(firstBodyRow.querySelectorAll('th'));
139
+ if (ths.length > 0) {
140
+ colKeys = ths.map((th, i) => cleanText(th.textContent || '') || `col_${i + 1}`);
141
+ }
142
+ }
143
+ }
144
+ return dataRows.slice(0, tablesMaxItems).map(row => {
145
+ const cells = Array.from(row.querySelectorAll('td'));
146
+ const output = {};
147
+ cells.forEach((td, i) => {
148
+ const key = colKeys ? (colKeys[i] ?? `col_${i + 1}`) : `col_${i + 1}`;
149
+ output[key] = cleanText(td.textContent || '');
150
+ });
151
+ return output;
152
+ });
153
+ }
154
+ const result = {};
155
+ const boxes = Array.from(document.querySelectorAll(bs));
156
+ boxes.forEach(box => {
157
+ const titleEl = box.querySelector('.box-header h3, .box-header h4') ??
158
+ box.querySelector('.box-body > h3, .box-body > h4');
159
+ const title = titleEl ? cleanText(titleEl.textContent || '') : '';
160
+ const key = title || `section_${Object.keys(result).length + 1}`;
161
+ const hasKvPairs = box.querySelector('.box-body > .row > div strong') !== null;
162
+ const table = box.querySelector('.dataTables_scrollBody table') ??
163
+ box.querySelector('table');
164
+ result[key] = hasKvPairs ? parseKeyValueBox(box) : (table ? parseTableBox(table) : parseKeyValueBox(box));
165
+ });
166
+ return result;
167
+ }, opts);
168
+ }
43
169
  // ─── scrape_page ──────────────────────────────────────────────────────────────
44
170
  server.tool('scrape_page', 'Navigate to a URL using a headless browser and return the full rendered HTML content of the page.', {
45
171
  url: z.string().url().describe('URL of the page to scrape'),
@@ -67,9 +193,9 @@ server.tool('scrape_page', 'Navigate to a URL using a headless browser and retur
67
193
  }
68
194
  });
69
195
  // ─── execute_js ───────────────────────────────────────────────────────────────
70
- server.tool('execute_js', 'Navigate to a URL and execute custom JavaScript code in the page context. Returns the result of the script execution.', {
196
+ server.tool('execute_js', 'Navigate to a URL and execute custom JavaScript in the page context. The script is executed as a JavaScript function body with new Function(script)(), so it must use an explicit return statement to send data back to the tool. A bare expression such as document.title returns undefined. Return serializable values only; objects and arrays are returned as formatted JSON, primitives as text. For async work, return a Promise, for example: return (async () => { /* ... */ return data; })();', {
71
197
  url: z.string().url().describe('URL of the page'),
72
- script: z.string().describe('JavaScript code to execute in the page context'),
198
+ script: z.string().describe('JavaScript function body to execute in the page context. Use an explicit return statement to provide the tool result, e.g. return { title: document.title };. Bare expressions such as document.title evaluate to undefined because the code runs via new Function(script)(). For async code, return a Promise, e.g. return (async () => { return await fetch("/api/data").then(r => r.json()); })();'),
73
199
  wait_for: z.string().optional().describe('CSS selector to wait for before executing the script'),
74
200
  timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
75
201
  }, async ({ url, script, wait_for, timeout }) => {
@@ -99,14 +225,29 @@ server.tool('execute_js', 'Navigate to a URL and execute custom JavaScript code
99
225
  }
100
226
  });
101
227
  // ─── get_page_inputs ──────────────────────────────────────────────────────────
102
- server.tool('get_page_inputs', 'Navigate to a URL and return a structured JSON object representing all form inputs (input, select, textarea) found on the page, organized by their name attributes.', {
103
- url: z.string().url().describe('URL of the page'),
228
+ server.tool('get_page_inputs', 'Return a structured JSON object of all form inputs (input, select, textarea) found on a page. If session_id is provided, extracts inputs from the active session page without navigating. If url is provided, opens a one-shot page, extracts, and closes. Exactly one of session_id or url must be provided.', {
229
+ session_id: z.string().optional().describe('Session ID of an active browser session. If provided, url is ignored.'),
230
+ url: z.string().url().optional().describe('URL of the page (one-shot mode, required if session_id is not provided)'),
104
231
  selector: z.string().optional().describe('CSS selector to scope the search (e.g. "#my-form")'),
105
- wait_for: z.string().optional().describe('CSS selector to wait for before extracting inputs'),
232
+ wait_for: z.string().optional().describe('CSS selector to wait for before extracting inputs (one-shot mode only)'),
106
233
  show_hidden: z.boolean().optional().describe('Include input[type=hidden] fields (default: false)'),
107
234
  timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
108
- }, async ({ url, selector, wait_for, show_hidden, timeout }) => {
235
+ }, async ({ session_id, url, selector, wait_for, show_hidden, timeout }) => {
109
236
  const effectiveTimeout = timeout ?? getDefaultTimeout();
237
+ if (!session_id && !url) {
238
+ return { content: [{ type: 'text', text: 'Error: either session_id or url must be provided.' }], isError: true };
239
+ }
240
+ if (session_id) {
241
+ try {
242
+ const page = getSession(session_id);
243
+ const result = await extractInputs(page, { selector, showHidden: show_hidden ?? false });
244
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
245
+ }
246
+ catch (error) {
247
+ const message = error instanceof Error ? error.message : String(error);
248
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
249
+ }
250
+ }
110
251
  const browser = await getBrowser();
111
252
  const page = await browser.newPage();
112
253
  try {
@@ -115,59 +256,7 @@ server.tool('get_page_inputs', 'Navigate to a URL and return a structured JSON o
115
256
  if (wait_for) {
116
257
  await page.waitForSelector(wait_for, { timeout: effectiveTimeout });
117
258
  }
118
- const result = await page.evaluate(({ selector: sel, showHidden }) => {
119
- function extractMeta(el) {
120
- if (el.tagName === 'SELECT') {
121
- const select = el;
122
- return {
123
- tagName: 'SELECT',
124
- type: select.type,
125
- value: select.value,
126
- options: Array.from(select.options).map(o => ({ value: o.value, text: o.text.trim() })),
127
- };
128
- }
129
- if (el.type === 'checkbox' || el.type === 'radio') {
130
- const input = el;
131
- return { tagName: 'INPUT', type: input.type, value: input.value, checked: input.checked };
132
- }
133
- return { tagName: el.tagName, type: el.type || 'text', value: el.value };
134
- }
135
- function nestValue(obj, keys, value, isArray) {
136
- if (!obj)
137
- obj = {};
138
- if (keys.length === 0)
139
- return isArray ? [value] : value;
140
- const key = keys[0];
141
- if (keys.length === 1) {
142
- if (isArray) {
143
- return { ...obj, [key]: [...(obj[key] || []), value] };
144
- }
145
- return { ...obj, [key]: value };
146
- }
147
- return { ...obj, [key]: nestValue(obj[key] || {}, keys.slice(1), value, isArray) };
148
- }
149
- const scope = sel ? document.querySelector(sel) ?? document : document;
150
- const query = showHidden ? 'input, select, textarea' : 'input:not([type=hidden]), select, textarea';
151
- const inputs = Array.from(scope.querySelectorAll(query))
152
- .filter(el => el.name);
153
- const fields = inputs.reduce((page, el) => {
154
- const keys = el.name.replace(/-/g, '_').replace(/\]/g, '').split('[');
155
- const isArray = keys[keys.length - 1] === '';
156
- if (isArray)
157
- keys.pop();
158
- return nestValue(page, keys, extractMeta(el), isArray);
159
- }, {});
160
- const buttons = Array.from((sel ? document.querySelector(sel) ?? document : document).querySelectorAll('button, input[type=submit]')).map(el => ({
161
- tagName: el.tagName,
162
- type: el.type || null,
163
- name: el.name || null,
164
- value: el.value || null,
165
- text: el.textContent?.trim() || null,
166
- id: el.id || null,
167
- class: el.className || null,
168
- }));
169
- return { ...fields, buttons };
170
- }, { selector, showHidden: show_hidden ?? false });
259
+ const result = await extractInputs(page, { selector, showHidden: show_hidden ?? false });
171
260
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
172
261
  }
173
262
  catch (error) {
@@ -202,89 +291,321 @@ server.tool('get_show_page', 'Navigate to a URL and automatically parse a show/d
202
291
  if (wait_for) {
203
292
  await page.waitForSelector(wait_for, { timeout: effectiveTimeout });
204
293
  }
205
- const result = await page.evaluate((payload) => {
206
- const { keysMap: km, boxSelector: bs, tablesMaxItems } = payload;
207
- function cleanText(text) {
208
- return text
209
- .split('\n')
210
- .map(s => s.trim())
211
- .filter(Boolean)
212
- .join(' ')
213
- .trim();
294
+ const result = await extractShowPage(page, {
295
+ keysMap: keys_map ?? {},
296
+ boxSelector: box_selector ?? '.box.box-primary',
297
+ tablesMaxItems: tables_max_items ?? 2,
298
+ });
299
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
300
+ }
301
+ catch (error) {
302
+ const message = error instanceof Error ? error.message : String(error);
303
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
304
+ }
305
+ finally {
306
+ await page.close();
307
+ }
308
+ });
309
+ // ─── open_session ─────────────────────────────────────────────────────────────
310
+ server.tool('open_session', 'Open a persistent browser session on a URL. The page stays alive between tool calls, allowing you to interact with it (click, type, evaluate JS, etc.) before taking a screenshot or extracting data. Returns a session_id to use with all session_* tools and screenshot.', {
311
+ url: z.string().url().describe('URL to navigate to when opening the session'),
312
+ wait_for: z.string().optional().describe('CSS selector to wait for before the session is considered ready'),
313
+ timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
314
+ }, async ({ url, wait_for, timeout }) => {
315
+ try {
316
+ const sessionId = await createSession(url, { waitFor: wait_for, timeout });
317
+ return { content: [{ type: 'text', text: JSON.stringify({ session_id: sessionId }) }] };
318
+ }
319
+ catch (error) {
320
+ const message = error instanceof Error ? error.message : String(error);
321
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
322
+ }
323
+ });
324
+ // ─── close_session ────────────────────────────────────────────────────────────
325
+ server.tool('close_session', 'Close a persistent browser session and free its resources. Always call this when you are done with a session.', {
326
+ session_id: z.string().describe('Session ID returned by open_session'),
327
+ }, async ({ session_id }) => {
328
+ try {
329
+ await closeSession(session_id);
330
+ return { content: [{ type: 'text', text: `Session "${session_id}" closed.` }] };
331
+ }
332
+ catch (error) {
333
+ const message = error instanceof Error ? error.message : String(error);
334
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
335
+ }
336
+ });
337
+ // ─── list_sessions ────────────────────────────────────────────────────────────
338
+ server.tool('list_sessions', 'List all currently active browser sessions with their ID and activity timestamps.', {}, async () => {
339
+ const all = listSessions();
340
+ return { content: [{ type: 'text', text: JSON.stringify(all, null, 2) }] };
341
+ });
342
+ // ─── session_goto ─────────────────────────────────────────────────────────────
343
+ server.tool('session_goto', 'Navigate the browser session to a new URL without closing the session.', {
344
+ session_id: z.string().describe('Session ID returned by open_session'),
345
+ url: z.string().url().describe('URL to navigate to'),
346
+ wait_for: z.string().optional().describe('CSS selector to wait for after navigation'),
347
+ timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
348
+ }, async ({ session_id, url, wait_for, timeout }) => {
349
+ const effectiveTimeout = timeout ?? getDefaultTimeout();
350
+ try {
351
+ const page = getSession(session_id);
352
+ await page.goto(url, { waitUntil: 'networkidle2', timeout: effectiveTimeout });
353
+ if (wait_for) {
354
+ await page.waitForSelector(wait_for, { timeout: effectiveTimeout });
355
+ }
356
+ return { content: [{ type: 'text', text: `Navigated to ${url}` }] };
357
+ }
358
+ catch (error) {
359
+ const message = error instanceof Error ? error.message : String(error);
360
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
361
+ }
362
+ });
363
+ // ─── session_click ────────────────────────────────────────────────────────────
364
+ server.tool('session_click', 'Click on an element in the browser session identified by a CSS selector.', {
365
+ session_id: z.string().describe('Session ID returned by open_session'),
366
+ selector: z.string().describe('CSS selector of the element to click'),
367
+ timeout: z.number().optional().describe('Timeout in milliseconds to wait for the element (default: 30000)'),
368
+ }, async ({ session_id, selector, timeout }) => {
369
+ const effectiveTimeout = timeout ?? getDefaultTimeout();
370
+ try {
371
+ const page = getSession(session_id);
372
+ await page.waitForSelector(selector, { timeout: effectiveTimeout });
373
+ await page.click(selector);
374
+ return { content: [{ type: 'text', text: `Clicked "${selector}"` }] };
375
+ }
376
+ catch (error) {
377
+ const message = error instanceof Error ? error.message : String(error);
378
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
379
+ }
380
+ });
381
+ // ─── session_type ─────────────────────────────────────────────────────────────
382
+ server.tool('session_type', 'Type text into an input element in the browser session. Optionally clears the field first.', {
383
+ session_id: z.string().describe('Session ID returned by open_session'),
384
+ selector: z.string().describe('CSS selector of the input element'),
385
+ text: z.string().describe('Text to type into the element'),
386
+ clear: z.boolean().optional().describe('Clear the field before typing (default: false)'),
387
+ timeout: z.number().optional().describe('Timeout in milliseconds to wait for the element (default: 30000)'),
388
+ }, async ({ session_id, selector, text, clear, timeout }) => {
389
+ const effectiveTimeout = timeout ?? getDefaultTimeout();
390
+ try {
391
+ const page = getSession(session_id);
392
+ await page.waitForSelector(selector, { timeout: effectiveTimeout });
393
+ if (clear) {
394
+ await page.click(selector, { clickCount: 3 });
395
+ }
396
+ await page.type(selector, text);
397
+ return { content: [{ type: 'text', text: `Typed into "${selector}"` }] };
398
+ }
399
+ catch (error) {
400
+ const message = error instanceof Error ? error.message : String(error);
401
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
402
+ }
403
+ });
404
+ // ─── session_wait_for ─────────────────────────────────────────────────────────
405
+ server.tool('session_wait_for', 'Wait for a CSS selector to appear in the active browser session page. Returns a confirmation message on success, or an error if the selector does not appear before the timeout.', {
406
+ session_id: z.string().describe('Session ID returned by open_session'),
407
+ selector: z.string().describe('CSS selector to wait for'),
408
+ timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
409
+ }, async ({ session_id, selector, timeout }) => {
410
+ const effectiveTimeout = timeout ?? getDefaultTimeout();
411
+ try {
412
+ const page = getSession(session_id);
413
+ await page.waitForSelector(selector, { timeout: effectiveTimeout });
414
+ return { content: [{ type: 'text', text: `Selector "${selector}" found.` }] };
415
+ }
416
+ catch (error) {
417
+ const message = error instanceof Error ? error.message : String(error);
418
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
419
+ }
420
+ });
421
+ // ─── session_evaluate ─────────────────────────────────────────────────────────
422
+ server.tool('session_evaluate', 'Execute custom JavaScript in the context of an active browser session page. Same conventions as execute_js: use an explicit return statement, return serializable values only.', {
423
+ session_id: z.string().describe('Session ID returned by open_session'),
424
+ script: z.string().describe('JavaScript function body to execute. Use an explicit return statement to get a result back.'),
425
+ wait_for: z.string().optional().describe('CSS selector to wait for before executing the script'),
426
+ timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
427
+ }, async ({ session_id, script, wait_for, timeout }) => {
428
+ const effectiveTimeout = timeout ?? getDefaultTimeout();
429
+ try {
430
+ const page = getSession(session_id);
431
+ if (wait_for) {
432
+ await page.waitForSelector(wait_for, { timeout: effectiveTimeout });
433
+ }
434
+ const result = await page.evaluate((code) => {
435
+ return new Function(code)();
436
+ }, script);
437
+ const serialized = typeof result === 'object' && result !== null
438
+ ? JSON.stringify(result, null, 2)
439
+ : String(result ?? 'undefined');
440
+ return { content: [{ type: 'text', text: serialized }] };
441
+ }
442
+ catch (error) {
443
+ const message = error instanceof Error ? error.message : String(error);
444
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
445
+ }
446
+ });
447
+ // ─── session_html ─────────────────────────────────────────────────────────────
448
+ server.tool('session_html', 'Return the current full rendered HTML of an active browser session page.', {
449
+ session_id: z.string().describe('Session ID returned by open_session'),
450
+ }, async ({ session_id }) => {
451
+ try {
452
+ const page = getSession(session_id);
453
+ const html = await page.content();
454
+ return { content: [{ type: 'text', text: html }] };
455
+ }
456
+ catch (error) {
457
+ const message = error instanceof Error ? error.message : String(error);
458
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
459
+ }
460
+ });
461
+ // ─── session_query_selector ───────────────────────────────────────────────────
462
+ server.tool('session_query_selector', 'Query the active session page with a CSS selector and return matching elements with their key attributes (tagName, id, class, textContent, href, name, type, value). Useful for finding the right selector before clicking or typing.', {
463
+ session_id: z.string().describe('Session ID returned by open_session'),
464
+ selector: z.string().describe('CSS selector to query'),
465
+ limit: z.number().optional().describe('Max number of elements to return (default: 10)'),
466
+ }, async ({ session_id, selector, limit }) => {
467
+ try {
468
+ const page = getSession(session_id);
469
+ const results = await page.$$eval(selector, (elements, maxItems) => elements.slice(0, maxItems).map(el => {
470
+ const text = (el.textContent ?? '').trim().slice(0, 100);
471
+ return {
472
+ tagName: el.tagName,
473
+ id: el.id || null,
474
+ class: el.className || null,
475
+ textContent: text || null,
476
+ href: el.href || null,
477
+ name: el.name || null,
478
+ type: el.type || null,
479
+ value: el.value || null,
480
+ };
481
+ }), limit ?? 10);
482
+ return { content: [{ type: 'text', text: JSON.stringify(results, null, 2) }] };
483
+ }
484
+ catch (error) {
485
+ const message = error instanceof Error ? error.message : String(error);
486
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
487
+ }
488
+ });
489
+ // ─── session_get_console_logs ─────────────────────────────────────────────────
490
+ server.tool('session_get_console_logs', 'Return all console messages (log, warn, error, info, debug, …) captured from the active browser session page since it was opened or last cleared.', {
491
+ session_id: z.string().describe('Session ID returned by open_session'),
492
+ clear: z.boolean().optional().describe('Clear the log buffer after reading (default: false)'),
493
+ }, async ({ session_id, clear }) => {
494
+ try {
495
+ const logs = getSessionLogs(session_id);
496
+ if (clear)
497
+ clearSessionLogs(session_id);
498
+ return { content: [{ type: 'text', text: JSON.stringify({ logs }, null, 2) }] };
499
+ }
500
+ catch (error) {
501
+ const message = error instanceof Error ? error.message : String(error);
502
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
503
+ }
504
+ });
505
+ // ─── session_scroll ───────────────────────────────────────────────────────────
506
+ server.tool('session_scroll', 'Scroll the active session page. Use selector to scroll a specific element into view, or x/y to scroll by a pixel amount, or to_bottom: true to scroll to the page bottom (useful for triggering lazy-load).', {
507
+ session_id: z.string().describe('Session ID returned by open_session'),
508
+ selector: z.string().optional().describe('CSS selector of an element to scroll into view'),
509
+ x: z.number().optional().describe('Horizontal scroll offset in pixels'),
510
+ y: z.number().optional().describe('Vertical scroll offset in pixels'),
511
+ to_bottom: z.boolean().optional().describe('Scroll to the very bottom of the page'),
512
+ }, async ({ session_id, selector, x, y, to_bottom }) => {
513
+ if (!selector && !to_bottom && x == null && y == null) {
514
+ return { content: [{ type: 'text', text: 'Error: provide at least one of selector, to_bottom, x, or y.' }], isError: true };
515
+ }
516
+ try {
517
+ const page = getSession(session_id);
518
+ if (selector) {
519
+ await page.$eval(selector, el => el.scrollIntoView());
520
+ return { content: [{ type: 'text', text: `Scrolled "${selector}" into view.` }] };
521
+ }
522
+ if (to_bottom) {
523
+ await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
524
+ return { content: [{ type: 'text', text: 'Scrolled to bottom of page.' }] };
525
+ }
526
+ await page.evaluate((dx, dy) => window.scrollBy(dx, dy), x ?? 0, y ?? 0);
527
+ return { content: [{ type: 'text', text: `Scrolled by x=${x ?? 0}, y=${y ?? 0}.` }] };
528
+ }
529
+ catch (error) {
530
+ const message = error instanceof Error ? error.message : String(error);
531
+ return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
532
+ }
533
+ });
534
+ // ─── screenshot ───────────────────────────────────────────────────────────────
535
+ server.tool('screenshot', 'Take a screenshot of a page. Provide either session_id (captures the page in its current interactive state) or url (one-shot: navigates, captures, closes). The selector parameter restricts the capture to a specific element. output controls the return format: "inline" returns a base64 image directly in the response, "file" writes the image to disk and returns the path, "both" does both.', {
536
+ session_id: z.string().optional().describe('Session ID of an active browser session. If provided, url is ignored and the current page state is captured.'),
537
+ url: z.string().url().optional().describe('URL to navigate to for a one-shot screenshot (required if session_id is not provided)'),
538
+ wait_for: z.string().optional().describe('CSS selector to wait for before capturing (one-shot mode only)'),
539
+ timeout: z.number().optional().describe('Timeout in milliseconds (default: 30000)'),
540
+ selector: z.string().optional().describe('CSS selector of a specific element to capture instead of the full page'),
541
+ full_page: z.boolean().optional().describe('Capture the full scrollable page height (default: false, ignored when selector is provided)'),
542
+ format: z.enum(['png', 'jpeg']).optional().describe('Image format (default: png)'),
543
+ output: z.enum(['inline', 'file', 'both']).describe('Return format: "inline" embeds the image in the response, "file" saves to disk and returns the path, "both" does both'),
544
+ path: z.string().optional().describe('Absolute or relative file path for the saved image (used when output is "file" or "both"). Relative paths are resolved from the MCP server\'s working directory. Defaults to <server_root>/screenshots/<timestamp>.<format>'),
545
+ }, async ({ session_id, url, wait_for, timeout, selector, full_page, format, output, path: filePath }) => {
546
+ const effectiveTimeout = timeout ?? getDefaultTimeout();
547
+ const fmt = format ?? 'png';
548
+ const mimeType = fmt === 'jpeg' ? 'image/jpeg' : 'image/png';
549
+ let page = null;
550
+ let ownedPage = false;
551
+ try {
552
+ if (session_id) {
553
+ page = getSession(session_id);
554
+ }
555
+ else {
556
+ if (!url) {
557
+ return { content: [{ type: 'text', text: 'Error: either session_id or url must be provided.' }], isError: true };
214
558
  }
215
- function parseKeyValueBox(box) {
216
- const result = {};
217
- Array.from(box.querySelectorAll('.box-body > .row > div')).forEach(entry => {
218
- const labelEl = entry.querySelector('strong');
219
- const valueEl = entry.querySelector('p');
220
- if (!labelEl)
221
- return;
222
- let key = cleanText(labelEl.textContent || '');
223
- if (km[key])
224
- key = km[key];
225
- result[key] = valueEl ? cleanText(valueEl.textContent || '') : '';
226
- });
227
- return result;
559
+ const browser = await getBrowser();
560
+ page = await browser.newPage();
561
+ ownedPage = true;
562
+ await applyCookies(page, url);
563
+ await page.goto(url, { waitUntil: 'networkidle2', timeout: effectiveTimeout });
564
+ if (wait_for) {
565
+ await page.waitForSelector(wait_for, { timeout: effectiveTimeout });
228
566
  }
229
- function parseTableBox(table) {
230
- // Rows with actual data (exclude <tbody tr> that contain only <th>, e.g. AdminLTE sub-headers)
231
- const dataRows = Array.from(table.querySelectorAll('tbody tr'))
232
- .filter(row => row.querySelector('td') !== null);
233
- // 1st attempt: standard <thead th>
234
- const theadThs = Array.from(table.querySelectorAll('thead tr th'));
235
- let colKeys = null;
236
- if (theadThs.length > 0) {
237
- colKeys = theadThs.map((th, i) => cleanText(th.textContent || '') || `col_${i + 1}`);
238
- }
239
- else {
240
- // 2nd attempt: first <tbody tr> containing only <th> (AdminLTE pattern)
241
- const firstBodyRow = table.querySelector('tbody tr');
242
- if (firstBodyRow && firstBodyRow.querySelector('td') === null) {
243
- const ths = Array.from(firstBodyRow.querySelectorAll('th'));
244
- if (ths.length > 0) {
245
- colKeys = ths.map((th, i) => cleanText(th.textContent || '') || `col_${i + 1}`);
246
- }
247
- }
248
- }
249
- return dataRows.slice(0, tablesMaxItems).map(row => {
250
- const cells = Array.from(row.querySelectorAll('td'));
251
- const output = {};
252
- cells.forEach((td, i) => {
253
- const key = colKeys ? (colKeys[i] ?? `col_${i + 1}`) : `col_${i + 1}`;
254
- output[key] = cleanText(td.textContent || '');
255
- });
256
- return output;
257
- });
567
+ }
568
+ let imageBuffer;
569
+ if (selector) {
570
+ await page.waitForSelector(selector, { timeout: effectiveTimeout });
571
+ const element = await page.$(selector);
572
+ if (!element) {
573
+ return { content: [{ type: 'text', text: `Error: selector "${selector}" not found.` }], isError: true };
258
574
  }
259
- const result = {};
260
- const boxes = Array.from(document.querySelectorAll(bs));
261
- boxes.forEach(box => {
262
- // Support both standard AdminLTE pattern (.box-header h3) and
263
- // inline title pattern where h3/h4 is placed directly inside .box-body.
264
- const titleEl = box.querySelector('.box-header h3, .box-header h4') ??
265
- box.querySelector('.box-body > h3, .box-body > h4');
266
- const title = titleEl ? cleanText(titleEl.textContent || '') : '';
267
- const key = title || `section_${Object.keys(result).length + 1}`;
268
- // Prefer key-value parsing if the box has strong/p pairs,
269
- // only fall back to table parsing when there are no such pairs.
270
- const hasKvPairs = box.querySelector('.box-body > .row > div strong') !== null;
271
- // DataTables with scrollY splits the table into two: a header-only table
272
- // inside .dataTables_scrollHeadInner and the actual data table inside
273
- // .dataTables_scrollBody. Always prefer the scroll-body table when present.
274
- const table = box.querySelector('.dataTables_scrollBody table') ??
275
- box.querySelector('table');
276
- result[key] = hasKvPairs ? parseKeyValueBox(box) : (table ? parseTableBox(table) : parseKeyValueBox(box));
277
- });
278
- return result;
279
- }, { keysMap: keys_map ?? {}, boxSelector: box_selector ?? '.box.box-primary', tablesMaxItems: tables_max_items ?? 2 });
280
- return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
575
+ imageBuffer = Buffer.from(await element.screenshot({ type: fmt }));
576
+ }
577
+ else {
578
+ imageBuffer = Buffer.from(await page.screenshot({ type: fmt, fullPage: full_page ?? false }));
579
+ }
580
+ const base64 = imageBuffer.toString('base64');
581
+ // Resolve file path
582
+ const resolvedPath = (() => {
583
+ if (filePath)
584
+ return path.resolve(filePath);
585
+ const ts = Date.now();
586
+ const dir = path.join(rootDir, 'screenshots');
587
+ return path.join(dir, `screenshot-${ts}.${fmt}`);
588
+ })();
589
+ const resultContent = [];
590
+ if (output === 'inline' || output === 'both') {
591
+ resultContent.push({ type: 'image', data: base64, mimeType });
592
+ }
593
+ if (output === 'file' || output === 'both') {
594
+ const dir = path.dirname(resolvedPath);
595
+ fs.mkdirSync(dir, { recursive: true });
596
+ fs.writeFileSync(resolvedPath, imageBuffer);
597
+ resultContent.push({ type: 'text', text: `Screenshot saved to: ${resolvedPath}` });
598
+ }
599
+ return { content: resultContent };
281
600
  }
282
601
  catch (error) {
283
602
  const message = error instanceof Error ? error.message : String(error);
284
603
  return { content: [{ type: 'text', text: `Error: ${message}` }], isError: true };
285
604
  }
286
605
  finally {
287
- await page.close();
606
+ if (ownedPage && page) {
607
+ await page.close();
608
+ }
288
609
  }
289
610
  });
290
611
  async function main() {
@@ -300,6 +621,7 @@ async function main() {
300
621
  }
301
622
  main().catch(async (err) => {
302
623
  console.error('[MCP] Fatal error:', err);
624
+ await closeAllSessions();
303
625
  await closeBrowser();
304
626
  process.exit(1);
305
627
  });