@imenam/simple-scraper 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ import { getBrowser } from './browser.js';
2
+ import { loadAllCookies } from './cookies.js';
3
+ const MAX_SESSIONS = parseInt(process.env.SCRAPER_MAX_SESSIONS ?? '5', 10);
4
+ const TTL_MS = parseInt(process.env.SCRAPER_SESSION_TTL_MS ?? String(10 * 60 * 1000), 10);
5
+ const sessions = new Map();
6
+ function generateSessionId() {
7
+ return 'sess_' + Math.random().toString(36).slice(2, 8);
8
+ }
9
+ async function applyCookies(page, pageUrl) {
10
+ const cookies = await loadAllCookies();
11
+ if (!cookies.length)
12
+ return;
13
+ const prepared = cookies.map(c => (c.domain ? c : { ...c, url: pageUrl }));
14
+ await page.setCookie(...prepared);
15
+ }
16
+ export async function createSession(url, opts = {}) {
17
+ if (sessions.size >= MAX_SESSIONS) {
18
+ throw new Error(`Session limit reached (max ${MAX_SESSIONS}). Close an existing session before opening a new one.`);
19
+ }
20
+ const sessionId = generateSessionId();
21
+ const browser = await getBrowser();
22
+ const page = await browser.newPage();
23
+ const entry = { page, createdAt: Date.now(), lastUsedAt: Date.now(), consoleLogs: [] };
24
+ page.on('console', (msg) => {
25
+ entry.consoleLogs.push({ type: msg.type(), text: msg.text(), timestamp: Date.now() });
26
+ });
27
+ try {
28
+ await applyCookies(page, url);
29
+ await page.goto(url, { waitUntil: 'networkidle2', timeout: opts.timeout ?? 30_000 });
30
+ if (opts.waitFor) {
31
+ await page.waitForSelector(opts.waitFor, { timeout: opts.timeout ?? 30_000 });
32
+ }
33
+ }
34
+ catch (err) {
35
+ await page.close().catch(() => undefined);
36
+ throw err;
37
+ }
38
+ sessions.set(sessionId, entry);
39
+ console.error(`[Session] Created ${sessionId} → ${url}`);
40
+ return sessionId;
41
+ }
42
+ export function getSession(sessionId) {
43
+ const entry = sessions.get(sessionId);
44
+ if (!entry) {
45
+ throw new Error(`Session "${sessionId}" not found or already closed.`);
46
+ }
47
+ if (entry.page.isClosed()) {
48
+ sessions.delete(sessionId);
49
+ throw new Error(`Session "${sessionId}" page was closed unexpectedly.`);
50
+ }
51
+ entry.lastUsedAt = Date.now();
52
+ return entry.page;
53
+ }
54
+ export async function closeSession(sessionId) {
55
+ const entry = sessions.get(sessionId);
56
+ if (!entry)
57
+ return;
58
+ await entry.page.close().catch(() => undefined);
59
+ sessions.delete(sessionId);
60
+ console.error(`[Session] Closed ${sessionId}`);
61
+ }
62
+ export async function closeAllSessions() {
63
+ const ids = Array.from(sessions.keys());
64
+ for (const id of ids) {
65
+ await closeSession(id);
66
+ }
67
+ console.error('[Session] All sessions closed');
68
+ }
69
+ export function getSessionLogs(sessionId) {
70
+ const entry = sessions.get(sessionId);
71
+ if (!entry)
72
+ throw new Error(`Session "${sessionId}" not found or already closed.`);
73
+ return entry.consoleLogs;
74
+ }
75
+ export function clearSessionLogs(sessionId) {
76
+ const entry = sessions.get(sessionId);
77
+ if (!entry)
78
+ throw new Error(`Session "${sessionId}" not found or already closed.`);
79
+ entry.consoleLogs.length = 0;
80
+ }
81
+ export function listSessions() {
82
+ return Array.from(sessions.entries()).map(([sessionId, s]) => ({
83
+ sessionId,
84
+ createdAt: s.createdAt,
85
+ lastUsedAt: s.lastUsedAt,
86
+ }));
87
+ }
88
+ // GC: close sessions inactive for more than TTL_MS
89
+ setInterval(async () => {
90
+ const now = Date.now();
91
+ for (const [id, entry] of sessions.entries()) {
92
+ if (now - entry.lastUsedAt > TTL_MS) {
93
+ console.error(`[Session] GC: closing inactive session ${id}`);
94
+ await closeSession(id);
95
+ }
96
+ }
97
+ }, 60_000);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@imenam/simple-scraper",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "description": "MCP server for web scraping and JavaScript execution using Puppeteer",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",