arcfetch 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +1 -1
- package/package.json +1 -1
- package/src/core/fetch-links.ts +1 -1
- package/src/core/pipeline.ts +1 -0
- package/src/core/playwright/manager.ts +55 -5
package/index.ts
CHANGED
|
@@ -146,7 +146,7 @@ Returns summary with title, author, excerpt. Use Read tool to access full conten
|
|
|
146
146
|
{
|
|
147
147
|
name: 'fetch_links',
|
|
148
148
|
description:
|
|
149
|
-
'Fetch all links from a cached reference. Extracts links and fetches each one, caching as new references. Uses parallel fetching (max
|
|
149
|
+
'Fetch all links from a cached reference. Extracts links and fetches each one, caching as new references. Uses parallel fetching (max 3 concurrent).',
|
|
150
150
|
inputSchema: {
|
|
151
151
|
type: 'object',
|
|
152
152
|
properties: {
|
package/package.json
CHANGED
package/src/core/fetch-links.ts
CHANGED
|
@@ -31,7 +31,7 @@ export async function fetchLinksFromRef(
|
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
const results: FetchLinkResult[] = [];
|
|
34
|
-
const concurrency =
|
|
34
|
+
const concurrency = 3;
|
|
35
35
|
const urls = linksResult.links.map((l) => l.href);
|
|
36
36
|
const verbose = options?.verbose ?? false;
|
|
37
37
|
const refetch = options?.refetch ?? false;
|
package/src/core/pipeline.ts
CHANGED
|
@@ -30,6 +30,7 @@ async function simpleFetch(url: string, verbose = false): Promise<SimpleFetchRes
|
|
|
30
30
|
|
|
31
31
|
const response = await fetch(url, {
|
|
32
32
|
redirect: 'follow',
|
|
33
|
+
signal: AbortSignal.timeout(30_000),
|
|
33
34
|
headers: {
|
|
34
35
|
'User-Agent':
|
|
35
36
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
@@ -3,6 +3,7 @@ import { LocalBrowserManager } from './local';
|
|
|
3
3
|
import type { BrowserManager, FetchWithBrowserResult } from './types';
|
|
4
4
|
|
|
5
5
|
let currentManager: BrowserManager | null = null;
|
|
6
|
+
let activeContexts = 0;
|
|
6
7
|
|
|
7
8
|
export async function getBrowserManager(config: PlaywrightConfig): Promise<BrowserManager> {
|
|
8
9
|
if (currentManager) {
|
|
@@ -31,10 +32,50 @@ function pick<T>(arr: T[]): T {
|
|
|
31
32
|
return arr[Math.floor(Math.random() * arr.length)];
|
|
32
33
|
}
|
|
33
34
|
|
|
35
|
+
/** Hard timeout for the entire browser fetch operation (browser launch + navigation + content extraction) */
|
|
36
|
+
const BROWSER_FETCH_TIMEOUT = 45_000;
|
|
37
|
+
|
|
38
|
+
function withTimeout<T>(promise: Promise<T>, ms: number, label: string): Promise<T> {
|
|
39
|
+
return new Promise<T>((resolve, reject) => {
|
|
40
|
+
const timer = setTimeout(() => reject(new Error(`${label} timed out after ${ms}ms`)), ms);
|
|
41
|
+
promise.then(
|
|
42
|
+
(val) => {
|
|
43
|
+
clearTimeout(timer);
|
|
44
|
+
resolve(val);
|
|
45
|
+
},
|
|
46
|
+
(err) => {
|
|
47
|
+
clearTimeout(timer);
|
|
48
|
+
reject(err);
|
|
49
|
+
}
|
|
50
|
+
);
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
34
54
|
export async function fetchWithBrowser(
|
|
35
55
|
url: string,
|
|
36
56
|
config: PlaywrightConfig,
|
|
37
57
|
verbose = false
|
|
58
|
+
): Promise<FetchWithBrowserResult> {
|
|
59
|
+
activeContexts++;
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
return await withTimeout(
|
|
63
|
+
doFetchWithBrowser(url, config, verbose),
|
|
64
|
+
BROWSER_FETCH_TIMEOUT,
|
|
65
|
+
`Playwright fetch ${url}`
|
|
66
|
+
);
|
|
67
|
+
} catch (error) {
|
|
68
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
69
|
+
return { html: '', error: message };
|
|
70
|
+
} finally {
|
|
71
|
+
activeContexts--;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async function doFetchWithBrowser(
|
|
76
|
+
url: string,
|
|
77
|
+
config: PlaywrightConfig,
|
|
78
|
+
verbose: boolean
|
|
38
79
|
): Promise<FetchWithBrowserResult> {
|
|
39
80
|
const manager = await getBrowserManager(config);
|
|
40
81
|
const browser = await manager.getBrowser();
|
|
@@ -120,14 +161,23 @@ export async function fetchWithBrowser(
|
|
|
120
161
|
const message = error instanceof Error ? error.message : String(error);
|
|
121
162
|
return { html: '', error: message };
|
|
122
163
|
} finally {
|
|
123
|
-
await page.close();
|
|
124
|
-
await context.close();
|
|
164
|
+
await page.close().catch(() => {});
|
|
165
|
+
await context.close().catch(() => {});
|
|
125
166
|
}
|
|
126
167
|
}
|
|
127
168
|
|
|
128
169
|
export async function closeBrowser(): Promise<void> {
|
|
129
|
-
if (currentManager)
|
|
130
|
-
|
|
131
|
-
|
|
170
|
+
if (!currentManager) return;
|
|
171
|
+
|
|
172
|
+
// Don't close if other contexts are still active
|
|
173
|
+
if (activeContexts > 0) {
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
try {
|
|
178
|
+
await withTimeout(currentManager.closeBrowser(), 5_000, 'closeBrowser');
|
|
179
|
+
} catch {
|
|
180
|
+
// Force-clear even if close times out
|
|
132
181
|
}
|
|
182
|
+
currentManager = null;
|
|
133
183
|
}
|