opencode-webfetch-plugin 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,16 @@
1
1
  import type { ToolContext } from "@opencode-ai/plugin";
2
2
 
3
- import * as os from 'os';
4
- import * as path from 'path';
5
- import * as fs from 'fs';
6
3
  import { HumanInteractor } from './HumanInteractor.js';
7
4
  import { Extractor } from './Extractor.js';
8
- import type { BrowserContext, Page } from 'playwright';
5
+ import { BrowserServer } from './BrowserServer.js';
6
+ import type { Page } from 'playwright';
9
7
 
10
8
  type PlaywrightModule = typeof import('playwright');
11
9
 
12
10
  export class BrowserManager {
13
- private context: BrowserContext | null = null;
14
- private page: Page | null = null;
15
11
  private readonly playwright: PlaywrightModule;
16
12
  private readonly client: any;
13
+ private browserServer: BrowserServer | null = null;
17
14
 
18
15
  constructor(playwright: PlaywrightModule, client: any) {
19
16
  this.playwright = playwright;
@@ -21,125 +18,71 @@ export class BrowserManager {
21
18
  }
22
19
 
23
20
  /**
24
- * Initializes the persistent context if not already done.
21
+ * Ensures the browser server is initialized.
25
22
  */
26
- public async ensureContext(): Promise<void> {
27
- let isConnected = false;
28
- if (this.context && this.page) {
29
- try {
30
- // Simple check to see if the page is still open and connected
31
- isConnected = !this.page.isClosed();
32
- } catch (e) {
33
- isConnected = false;
34
- }
35
- }
36
-
37
- if (isConnected) {
38
- return;
39
- }
40
-
41
- // Clean up just in case
42
- await this.dispose();
43
-
44
- const userDataDir = path.resolve(os.homedir(), '.cache/opencode/user-data');
45
- if (!fs.existsSync(userDataDir)) {
46
- fs.mkdirSync(userDataDir, { recursive: true });
23
+ private async ensureBrowserServer(): Promise<BrowserServer> {
24
+ if (!this.browserServer) {
25
+ this.browserServer = await BrowserServer.getInstance(this.playwright, this.client);
47
26
  }
48
-
49
- // Launch a persistent context with extension support
50
- const extensionPath = path.resolve(os.homedir(), '.cache/opencode/extensions');
51
- const extensions: string[] = [];
52
- if (fs.existsSync(extensionPath)) {
53
- const dirs = fs.readdirSync(extensionPath).map(d => path.join(extensionPath, d));
54
- extensions.push(...dirs.filter(d => fs.statSync(d).isDirectory()));
55
- }
56
-
57
- const launchOptions: Parameters<typeof this.playwright.chromium.launchPersistentContext>[1] = {
58
- headless: false,
59
- args: [
60
- '--no-sandbox',
61
- '--disable-dev-shm-usage',
62
- '--disable-blink-features=AutomationControlled',
63
- '--disable-features=VizDisplayCompositor',
64
- '--window-size=1280,720',
65
- ...(extensions.length > 0 ? [
66
- `--disable-extensions-except=${extensions.join(',')}`,
67
- `--load-extension=${extensions.join(',')}`
68
- ] : []),
69
- ],
70
- viewport: { width: 1280, height: 720 },
71
- };
72
-
73
- this.context = await this.playwright.chromium.launchPersistentContext(userDataDir, launchOptions);
74
-
75
- // Create a new page or use the default one created by launchPersistentContext
76
- const pages = this.context.pages();
77
- if (pages.length > 0) {
78
- this.page = pages[0];
79
- } else {
80
- this.page = await this.context.newPage();
81
- }
82
-
83
- // Mask webdriver
84
- await this.page.addInitScript(() => {
85
- Object.defineProperty(navigator, 'webdriver', {
86
- get: () => false,
87
- });
88
-
89
- const chrome = (window as any).chrome;
90
- if (chrome && chrome.runtime && chrome.runtime.onConnect) {
91
- delete chrome.runtime.onConnect;
92
- }
93
- });
27
+ return this.browserServer;
94
28
  }
95
29
 
96
30
  /**
97
31
  * Navigates to a URL and tries to extract the content.
98
32
  * Prompts the user via terminal if it encounters a captcha or login screen.
33
+ * For concurrent calls, creates new pages in the same context.
99
34
  */
100
35
  public async fetchWebpage(url: string, timeout: number, ctx: ToolContext): Promise<string> {
101
- await this.ensureContext();
102
- if (!this.page) throw new Error('Page not initialized');
36
+ const browserServer = await this.ensureBrowserServer();
37
+ const context = browserServer.getContext();
38
+
39
+ if (!context) throw new Error('Browser context not initialized');
103
40
 
104
- console.log(`\nNavigating to: ${url}`);
41
+ // Create a new page for each concurrent request
42
+ const page = await context.newPage();
43
+
44
+ this.client?.logger?.info(`Navigating to: ${url}`);
105
45
 
106
46
  // Add a listener to handle abortions
107
47
  const onAbort = () => {
108
- console.log('Operation aborted by user or timeout.');
48
+ this.client?.logger?.info('Operation aborted by user or timeout.');
49
+ page.close().catch(() => undefined);
109
50
  };
110
51
  ctx.abort.addEventListener('abort', onAbort);
111
52
 
112
53
  try {
113
54
  // Go to the requested URL
114
- await this.page.goto(url, { waitUntil: 'domcontentloaded', timeout }).catch((e) => {
115
- console.warn(`Navigation might have timed out or failed partially: ${e.message}`);
55
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout }).catch((e) => {
56
+ this.client?.logger?.warn(`Navigation might have timed out or failed partially: ${e.message}`);
116
57
  });
117
58
 
118
59
  // too fast
119
- await this.page.waitForTimeout(2000);
60
+ await page.waitForTimeout(2000);
120
61
 
121
62
  // Basic heuristic to check if human intervention is needed
122
- const needsHelp = await this.detectBlockers(this.page);
63
+ const needsHelp = await this.detectBlockers(page);
123
64
 
124
65
  if (needsHelp.blocked) {
125
- await HumanInteractor.askForHumanHelp(`The page appears to be blocked or requires login.\nReason: ${needsHelp.reason}\nURL: ${this.page.url()}`, ctx, this.client, () => this.detectBlockers(this.page));
66
+ await HumanInteractor.askForHumanHelp(`The page appears to be blocked or requires login.\nReason: ${needsHelp.reason}\nURL: ${page.url()}`, ctx, this.client, () => this.detectBlockers(page));
126
67
  } else {
127
68
  // Wait a little bit for dynamic content if not blocked
128
- await this.page.waitForTimeout(2000);
69
+ await page.waitForTimeout(2000);
129
70
  }
130
71
 
131
72
  // Allow one more check in case the user didn't fully resolve it, or if it redirected
132
- const needsHelpAgain = await this.detectBlockers(this.page);
73
+ const needsHelpAgain = await this.detectBlockers(page);
133
74
  if (needsHelpAgain.blocked) {
134
- await HumanInteractor.askForHumanHelp(`Still detected a blocker.\nReason: ${needsHelpAgain.reason}\nPlease complete the action and try again.`, ctx, this.client, () => this.detectBlockers(this.page));
75
+ await HumanInteractor.askForHumanHelp(`Still detected a blocker.\nReason: ${needsHelpAgain.reason}\nPlease complete the action and try again.`, ctx, this.client, () => this.detectBlockers(page));
135
76
  }
136
77
 
137
78
  // Extract content as Markdown
138
- console.log('Extracting page content...');
139
- const markdown = await Extractor.extractMarkdown(this.page, this.page.url());
79
+ this.client?.logger?.info('Extracting page content...');
80
+ const markdown = await Extractor.extractMarkdown(page, page.url());
140
81
  return markdown;
141
82
  } finally {
142
83
  ctx.abort.removeEventListener('abort', onAbort);
84
+ // Close the page after extraction to free resources
85
+ await page.close().catch(() => undefined);
143
86
  }
144
87
  }
145
88
 
@@ -199,25 +142,19 @@ export class BrowserManager {
199
142
  }
200
143
 
201
144
  } catch (e) {
202
- console.error('Error detecting blockers:', e);
145
+ this.client?.logger?.error('Error detecting blockers:', e);
203
146
  }
204
147
 
205
148
  return { blocked: false };
206
149
  }
207
150
 
208
151
  /**
209
- * Close the browser context safely.
152
+ * Close the browser if this process owns it.
210
153
  */
211
154
  public async dispose(): Promise<void> {
212
- try {
213
- if (this.context) {
214
- await this.context.close().catch(() => {});
215
- }
216
- } catch (e) {
217
- // Ignore errors on close
218
- } finally {
219
- this.context = null;
220
- this.page = null;
155
+ if (this.browserServer) {
156
+ await this.browserServer.dispose();
221
157
  }
158
+ this.browserServer = null;
222
159
  }
223
160
  }
@@ -0,0 +1,226 @@
1
+ import * as os from 'os';
2
+ import * as path from 'path';
3
+ import * as fs from 'fs';
4
+ import * as http from 'http';
5
+ import { spawn } from 'child_process';
6
+ import { fileURLToPath } from 'url';
7
+ import type { BrowserContext } from 'playwright';
8
+
9
+ type PlaywrightModule = typeof import('playwright');
10
+
11
+ const __filename = fileURLToPath(import.meta.url);
12
+ const __dirname = path.dirname(__filename);
13
+ const PROJECT_DIR = path.resolve(__dirname, '..');
14
+
15
+ const CDP_PORT = parseInt(process.env.WEBFETCH_CDP_PORT || '9222', 10);
16
+ const USER_DATA_DIR = path.resolve(os.homedir(), `.cache/opencode/user-data-${CDP_PORT}`);
17
+ const LAUNCH_SCRIPT = path.resolve(os.homedir(), '.cache/opencode/launch-browser.ts');
18
+
19
+ export class BrowserServer {
20
+ private static instance: BrowserServer | null = null;
21
+ private static initPromise: Promise<BrowserServer> | null = null;
22
+ private context: BrowserContext | null = null;
23
+ private readonly playwright: PlaywrightModule;
24
+ private readonly client: any;
25
+
26
+ private constructor(playwright: PlaywrightModule, client: any) {
27
+ this.playwright = playwright;
28
+ this.client = client;
29
+ }
30
+
31
+ static async getInstance(playwright: PlaywrightModule, client: any): Promise<BrowserServer> {
32
+ if (BrowserServer.initPromise) {
33
+ return BrowserServer.initPromise;
34
+ }
35
+
36
+ if (!BrowserServer.instance) {
37
+ BrowserServer.initPromise = (async () => {
38
+ BrowserServer.instance = new BrowserServer(playwright, client);
39
+ await BrowserServer.instance.initialize();
40
+ return BrowserServer.instance;
41
+ })();
42
+
43
+ try {
44
+ const instance = await BrowserServer.initPromise;
45
+ return instance;
46
+ } finally {
47
+ BrowserServer.initPromise = null;
48
+ }
49
+ }
50
+ return BrowserServer.instance;
51
+ }
52
+
53
+ private async initialize(): Promise<void> {
54
+ if (!fs.existsSync(USER_DATA_DIR)) {
55
+ fs.mkdirSync(USER_DATA_DIR, { recursive: true });
56
+ }
57
+
58
+ this.ensureLaunchScript();
59
+
60
+ for (let attempt = 1; attempt <= 3; attempt++) {
61
+ if (await this.tryConnect()) {
62
+ return;
63
+ }
64
+
65
+ this.client?.logger?.info(`Attempt ${attempt}/3: Starting independent browser process...`);
66
+
67
+ await this.spawnIndependentBrowser();
68
+
69
+ const waitTime = Math.floor(Math.random() * 3000) + 2000;
70
+ this.client?.logger?.info(`Waiting ${waitTime}ms for browser to start...`);
71
+ await this.sleep(waitTime);
72
+
73
+ if (await this.tryConnect()) {
74
+ return;
75
+ }
76
+ }
77
+
78
+ throw new Error('Failed to start or connect to browser after 3 attempts');
79
+ }
80
+
81
+ private ensureLaunchScript(): void {
82
+ const scriptContent = `import { chromium } from 'playwright';
83
+ import * as fs from 'fs';
84
+ import * as path from 'path';
85
+ import * as os from 'os';
86
+
87
+ const CDP_PORT = ${CDP_PORT};
88
+ const USER_DATA_DIR = path.resolve(os.homedir(), '.cache/opencode/user-data-${CDP_PORT}');
89
+
90
+ async function main() {
91
+ const singletonLock = path.join(USER_DATA_DIR, 'SingletonLock');
92
+ if (fs.existsSync(singletonLock)) {
93
+ fs.unlinkSync(singletonLock);
94
+ }
95
+
96
+ const extensionPath = path.resolve(os.homedir(), '.cache/opencode/extensions');
97
+ const extensions: string[] = [];
98
+ if (fs.existsSync(extensionPath)) {
99
+ const dirs = fs.readdirSync(extensionPath).map(d => path.join(extensionPath, d));
100
+ extensions.push(...dirs.filter(d => fs.statSync(d).isDirectory()));
101
+ }
102
+
103
+ const context = await chromium.launchPersistentContext(USER_DATA_DIR, {
104
+ headless: false,
105
+ ignoreDefaultArgs: ['--remote-debugging-pipe'],
106
+ args: [
107
+ '--no-sandbox',
108
+ '--disable-dev-shm-usage',
109
+ '--remote-allow-origins="*"',
110
+ // '--disable-blink-features=AutomationControlled',
111
+ // '--disable-features=VizDisplayCompositor',
112
+ // '--window-size=1280,720',
113
+ '--remote-debugging-port=' + CDP_PORT,
114
+ // ...(extensions.length > 0 ? [
115
+ // '--disable-extensions-except=' + extensions.join(','),
116
+ // '--load-extension=' + extensions.join(',')
117
+ // ] : []),
118
+ ],
119
+ viewport: { width: 1280, height: 720 },
120
+ });
121
+
122
+ context.on('page', async (page) => {
123
+ await page.addInitScript(() => {
124
+ Object.defineProperty(navigator, 'webdriver', { get: () => false });
125
+ const chrome = (window as any).chrome;
126
+ if (chrome && chrome.runtime && chrome.runtime.onConnect) {
127
+ delete chrome.runtime.onConnect;
128
+ }
129
+ });
130
+ });
131
+
132
+ console.log('Browser launched on port ' + CDP_PORT);
133
+ }
134
+
135
+ main().catch(console.error);
136
+ `;
137
+
138
+ const scriptDir = path.dirname(LAUNCH_SCRIPT);
139
+ if (!fs.existsSync(scriptDir)) {
140
+ fs.mkdirSync(scriptDir, { recursive: true });
141
+ }
142
+ fs.writeFileSync(LAUNCH_SCRIPT, scriptContent);
143
+ }
144
+
145
+ private async spawnIndependentBrowser(): Promise<void> {
146
+ return new Promise((resolve) => {
147
+ const child = spawn('npx', ['tsx', LAUNCH_SCRIPT], {
148
+ cwd: PROJECT_DIR,
149
+ detached: true,
150
+ stdio: 'ignore',
151
+ windowsHide: true,
152
+ shell: true,
153
+ });
154
+
155
+ child.unref();
156
+
157
+ child.on('error', (e) => {
158
+ this.client?.logger?.warn(`Failed to spawn browser: ${e.message}`);
159
+ });
160
+
161
+ resolve();
162
+ });
163
+ }
164
+
165
+ private async tryConnect(): Promise<boolean> {
166
+ if (!(await this.isPortInUse(CDP_PORT))) {
167
+ return false;
168
+ }
169
+
170
+ try {
171
+ await this.connectToBrowser(`http://localhost:${CDP_PORT}`);
172
+ return true;
173
+ } catch (e) {
174
+ this.client?.logger?.warn(`Failed to connect: ${e}`);
175
+ return false;
176
+ }
177
+ }
178
+
179
+ private async isPortInUse(port: number): Promise<boolean> {
180
+ return new Promise((resolve) => {
181
+ const req = http.request({
182
+ hostname: 'localhost',
183
+ port: port,
184
+ path: '/json/version',
185
+ method: 'GET',
186
+ timeout: 1000
187
+ }, (res) => {
188
+ resolve(res.statusCode === 200);
189
+ });
190
+
191
+ req.on('error', () => resolve(false));
192
+ req.on('timeout', () => {
193
+ req.destroy();
194
+ resolve(false);
195
+ });
196
+
197
+ req.end();
198
+ });
199
+ }
200
+
201
+ private async connectToBrowser(wsEndpoint: string): Promise<void> {
202
+ this.client?.logger?.info(`Connecting to browser at ${wsEndpoint}...`);
203
+
204
+ const browser = await this.playwright.chromium.connectOverCDP(wsEndpoint);
205
+
206
+ const contexts = browser.contexts();
207
+ if (contexts.length > 0) {
208
+ this.context = contexts[0];
209
+ this.client?.logger?.info('Connected to existing browser');
210
+ } else {
211
+ throw new Error('No context available in connected browser');
212
+ }
213
+ }
214
+
215
+ private sleep(ms: number): Promise<void> {
216
+ return new Promise(resolve => setTimeout(resolve, ms));
217
+ }
218
+
219
+ getContext(): BrowserContext | null {
220
+ return this.context;
221
+ }
222
+
223
+ async dispose(): Promise<void> {
224
+ this.context = null;
225
+ }
226
+ }
@@ -0,0 +1,184 @@
1
+ import { fork, type ChildProcess } from 'child_process';
2
+ import { fileURLToPath } from 'url';
3
+ import * as path from 'path';
4
+
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = path.dirname(__filename);
7
+
8
+ const MAX_TIMEOUT = 120_000;
9
+
10
+ interface WorkerRequest {
11
+ id: string;
12
+ type: 'fetch' | 'dispose';
13
+ url?: string;
14
+ timeout?: number;
15
+ }
16
+
17
+ interface WorkerResponse {
18
+ id: string;
19
+ success: boolean;
20
+ data?: string;
21
+ error?: string;
22
+ }
23
+
24
+ export class BrowserWorkerManager {
25
+ private worker: ChildProcess | null = null;
26
+ private requestId = 0;
27
+ private pendingRequests = new Map<string, { resolve: (value: any) => void; reject: (error: any) => void }>();
28
+ private client: any;
29
+ private initPromise: Promise<void> | null = null;
30
+
31
+ constructor(client: any) {
32
+ this.client = client;
33
+ }
34
+
35
+ private async ensureWorker(): Promise<void> {
36
+ if (this.initPromise) {
37
+ return this.initPromise;
38
+ }
39
+
40
+ if (this.worker) {
41
+ return;
42
+ }
43
+
44
+ this.initPromise = this.startWorker();
45
+ try {
46
+ await this.initPromise;
47
+ } finally {
48
+ this.initPromise = null;
49
+ }
50
+ }
51
+
52
+ private async startWorker(): Promise<void> {
53
+ return new Promise((resolve, reject) => {
54
+ const workerPath = path.resolve(__dirname, 'browser-worker.ts');
55
+
56
+ this.client?.logger?.info(`Starting browser worker: ${workerPath}`);
57
+
58
+ // Use tsx directly as the executable (it's in node_modules/.bin)
59
+ const tsxBin = path.resolve(__dirname, '../node_modules/.bin/tsx');
60
+
61
+ // Use tsx to execute TypeScript directly
62
+ this.worker = fork(workerPath, [], {
63
+ stdio: ['ignore', 'pipe', 'pipe', 'ipc'],
64
+ execPath: tsxBin,
65
+ execArgv: [],
66
+ });
67
+
68
+ // Forward stdout to logger for debugging
69
+ if (this.worker.stdout) {
70
+ this.worker.stdout.on('data', (data) => {
71
+ this.client?.logger?.info(`[Worker stdout] ${data.toString().trim()}`);
72
+ });
73
+ }
74
+
75
+ // Forward stderr to logger
76
+ if (this.worker.stderr) {
77
+ this.worker.stderr.on('data', (data) => {
78
+ this.client?.logger?.error(`[Worker stderr] ${data.toString().trim()}`);
79
+ });
80
+ }
81
+
82
+ // Forward stderr to logger
83
+ if (this.worker.stderr) {
84
+ this.worker.stderr.on('data', (data) => {
85
+ this.client?.logger?.error(`[Worker stderr] ${data.toString().trim()}`);
86
+ });
87
+ }
88
+
89
+ const onReady = (message: any) => {
90
+ if (message && message.type === 'ready') {
91
+ this.worker?.off('message', onReady);
92
+ resolve();
93
+ }
94
+ };
95
+
96
+ this.worker.on('message', onReady);
97
+
98
+ this.worker.on('message', (message: any) => {
99
+ if (message.type === 'toast') {
100
+ // Forward toast to client
101
+ this.client?.tui?.showToast(message.data);
102
+ } else if (message.id) {
103
+ // Handle response
104
+ const pending = this.pendingRequests.get(message.id);
105
+ if (pending) {
106
+ this.pendingRequests.delete(message.id);
107
+ if (message.success) {
108
+ pending.resolve(message.data);
109
+ } else {
110
+ pending.reject(new Error(message.error || 'Unknown worker error'));
111
+ }
112
+ }
113
+ }
114
+ });
115
+
116
+ this.worker.on('error', (error) => {
117
+ this.client?.logger?.error('Worker process error:', error);
118
+ reject(error);
119
+ });
120
+
121
+ this.worker.on('exit', (code) => {
122
+ this.client?.logger?.warn(`Worker process exited with code ${code}`);
123
+ this.worker = null;
124
+ // Reject all pending requests
125
+ for (const [id, pending] of this.pendingRequests.entries()) {
126
+ pending.reject(new Error('Worker process exited'));
127
+ }
128
+ this.pendingRequests.clear();
129
+ });
130
+
131
+ // Timeout for worker startup
132
+ setTimeout(() => {
133
+ if (this.initPromise) {
134
+ reject(new Error('Worker startup timeout'));
135
+ }
136
+ }, 10000);
137
+ });
138
+ }
139
+
140
+ async sendRequest(request: Omit<WorkerRequest, 'id'>): Promise<any> {
141
+ await this.ensureWorker();
142
+
143
+ if (!this.worker) {
144
+ throw new Error('Worker process not available');
145
+ }
146
+
147
+ const id = `req-${++this.requestId}`;
148
+ const fullRequest: WorkerRequest = { id, ...request };
149
+
150
+ return new Promise((resolve, reject) => {
151
+ this.pendingRequests.set(id, { resolve, reject });
152
+
153
+ this.worker!.send(fullRequest, (error) => {
154
+ if (error) {
155
+ this.pendingRequests.delete(id);
156
+ reject(error);
157
+ }
158
+ });
159
+
160
+ // Request timeout
161
+ setTimeout(() => {
162
+ if (this.pendingRequests.has(id)) {
163
+ this.pendingRequests.delete(id);
164
+ reject(new Error('Request timeout'));
165
+ }
166
+ }, MAX_TIMEOUT + 5000);
167
+ });
168
+ }
169
+
170
+ async dispose(): Promise<void> {
171
+ if (this.worker) {
172
+ try {
173
+ await this.sendRequest({ type: 'dispose' });
174
+ } catch (e) {
175
+ this.client?.logger?.warn('Error disposing worker:', e);
176
+ }
177
+
178
+ this.worker.kill();
179
+ this.worker = null;
180
+ }
181
+
182
+ this.pendingRequests.clear();
183
+ }
184
+ }
@@ -45,7 +45,7 @@ export class HumanInteractor {
45
45
  }
46
46
  }
47
47
  } catch (e) {
48
- console.error(e)
48
+ client?.logger?.error('Error in human interaction:', e);
49
49
  }
50
50
  }
51
51
  }