@opendatalabs/connect 0.8.1-canary.ff55fb0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/README.md +35 -33
  2. package/dist/cli/bin.js +8 -0
  3. package/dist/cli/bin.js.map +1 -1
  4. package/dist/cli/index.d.ts +87 -0
  5. package/dist/cli/index.d.ts.map +1 -1
  6. package/dist/cli/index.js +3582 -305
  7. package/dist/cli/index.js.map +1 -1
  8. package/dist/cli/main.d.ts.map +1 -1
  9. package/dist/cli/main.js +8 -0
  10. package/dist/cli/main.js.map +1 -1
  11. package/dist/cli/mcp-server.d.ts +15 -0
  12. package/dist/cli/mcp-server.d.ts.map +1 -0
  13. package/dist/cli/mcp-server.js +199 -0
  14. package/dist/cli/mcp-server.js.map +1 -0
  15. package/dist/cli/queries.d.ts +128 -0
  16. package/dist/cli/queries.d.ts.map +1 -0
  17. package/dist/cli/queries.js +415 -0
  18. package/dist/cli/queries.js.map +1 -0
  19. package/dist/cli/render/capabilities.d.ts +9 -0
  20. package/dist/cli/render/capabilities.d.ts.map +1 -0
  21. package/dist/cli/render/capabilities.js +24 -0
  22. package/dist/cli/render/capabilities.js.map +1 -0
  23. package/dist/cli/render/connect-renderer.d.ts +18 -0
  24. package/dist/cli/render/connect-renderer.d.ts.map +1 -0
  25. package/dist/cli/render/connect-renderer.js +255 -0
  26. package/dist/cli/render/connect-renderer.js.map +1 -0
  27. package/dist/cli/render/format.d.ts +27 -0
  28. package/dist/cli/render/format.d.ts.map +1 -0
  29. package/dist/cli/render/format.js +111 -0
  30. package/dist/cli/render/format.js.map +1 -0
  31. package/dist/cli/render/index.d.ts +7 -0
  32. package/dist/cli/render/index.d.ts.map +1 -0
  33. package/dist/cli/render/index.js +7 -0
  34. package/dist/cli/render/index.js.map +1 -0
  35. package/dist/cli/render/progress.d.ts +11 -0
  36. package/dist/cli/render/progress.d.ts.map +1 -0
  37. package/dist/cli/render/progress.js +56 -0
  38. package/dist/cli/render/progress.js.map +1 -0
  39. package/dist/cli/render/symbols.d.ts +11 -0
  40. package/dist/cli/render/symbols.d.ts.map +1 -0
  41. package/dist/cli/render/symbols.js +21 -0
  42. package/dist/cli/render/symbols.js.map +1 -0
  43. package/dist/cli/render/theme.d.ts +15 -0
  44. package/dist/cli/render/theme.d.ts.map +1 -0
  45. package/dist/cli/render/theme.js +41 -0
  46. package/dist/cli/render/theme.js.map +1 -0
  47. package/dist/cli/search-select.d.ts +17 -0
  48. package/dist/cli/search-select.d.ts.map +1 -0
  49. package/dist/cli/search-select.js +29 -0
  50. package/dist/cli/search-select.js.map +1 -0
  51. package/dist/cli/update-check-worker.d.ts +2 -0
  52. package/dist/cli/update-check-worker.d.ts.map +1 -0
  53. package/dist/cli/update-check-worker.js +55 -0
  54. package/dist/cli/update-check-worker.js.map +1 -0
  55. package/dist/cli/update-check.d.ts +21 -0
  56. package/dist/cli/update-check.d.ts.map +1 -0
  57. package/dist/cli/update-check.js +52 -0
  58. package/dist/cli/update-check.js.map +1 -0
  59. package/dist/connectors/registry.d.ts +27 -1
  60. package/dist/connectors/registry.d.ts.map +1 -1
  61. package/dist/connectors/registry.js +168 -4
  62. package/dist/connectors/registry.js.map +1 -1
  63. package/dist/core/cli-types.d.ts +583 -1
  64. package/dist/core/cli-types.d.ts.map +1 -1
  65. package/dist/core/cli-types.js +262 -1
  66. package/dist/core/cli-types.js.map +1 -1
  67. package/dist/core/index.d.ts +3 -2
  68. package/dist/core/index.d.ts.map +1 -1
  69. package/dist/core/index.js +2 -2
  70. package/dist/core/index.js.map +1 -1
  71. package/dist/core/paths.d.ts +22 -3
  72. package/dist/core/paths.d.ts.map +1 -1
  73. package/dist/core/paths.js +71 -10
  74. package/dist/core/paths.js.map +1 -1
  75. package/dist/core/state-store.d.ts +23 -0
  76. package/dist/core/state-store.d.ts.map +1 -1
  77. package/dist/core/state-store.js +83 -5
  78. package/dist/core/state-store.js.map +1 -1
  79. package/dist/personal-server/client.d.ts +34 -0
  80. package/dist/personal-server/client.d.ts.map +1 -0
  81. package/dist/personal-server/client.js +94 -0
  82. package/dist/personal-server/client.js.map +1 -0
  83. package/dist/personal-server/index.d.ts +10 -0
  84. package/dist/personal-server/index.d.ts.map +1 -1
  85. package/dist/personal-server/index.js +79 -32
  86. package/dist/personal-server/index.js.map +1 -1
  87. package/dist/personal-server/scope-resolver.d.ts +22 -0
  88. package/dist/personal-server/scope-resolver.d.ts.map +1 -0
  89. package/dist/personal-server/scope-resolver.js +68 -0
  90. package/dist/personal-server/scope-resolver.js.map +1 -0
  91. package/dist/runtime/core/contracts.d.ts +84 -0
  92. package/dist/runtime/core/contracts.d.ts.map +1 -0
  93. package/dist/runtime/core/contracts.js +2 -0
  94. package/dist/runtime/core/contracts.js.map +1 -0
  95. package/dist/runtime/core/index.d.ts +2 -0
  96. package/dist/runtime/core/index.d.ts.map +1 -0
  97. package/dist/runtime/core/index.js +2 -0
  98. package/dist/runtime/core/index.js.map +1 -0
  99. package/dist/runtime/index.d.ts +1 -0
  100. package/dist/runtime/index.d.ts.map +1 -1
  101. package/dist/runtime/index.js.map +1 -1
  102. package/dist/runtime/managed-playwright.d.ts +12 -3
  103. package/dist/runtime/managed-playwright.d.ts.map +1 -1
  104. package/dist/runtime/managed-playwright.js +124 -187
  105. package/dist/runtime/managed-playwright.js.map +1 -1
  106. package/dist/runtime/playwright/browser.d.ts +12 -0
  107. package/dist/runtime/playwright/browser.d.ts.map +1 -0
  108. package/dist/runtime/playwright/browser.js +229 -0
  109. package/dist/runtime/playwright/browser.js.map +1 -0
  110. package/dist/runtime/playwright/in-process-run.d.ts +6 -0
  111. package/dist/runtime/playwright/in-process-run.d.ts.map +1 -0
  112. package/dist/runtime/playwright/in-process-run.js +628 -0
  113. package/dist/runtime/playwright/in-process-run.js.map +1 -0
  114. package/dist/runtime/playwright/index.d.ts +3 -0
  115. package/dist/runtime/playwright/index.d.ts.map +1 -0
  116. package/dist/runtime/playwright/index.js +3 -0
  117. package/dist/runtime/playwright/index.js.map +1 -0
  118. package/dist/runtime/repo-paths.d.ts.map +1 -1
  119. package/dist/runtime/repo-paths.js +24 -7
  120. package/dist/runtime/repo-paths.js.map +1 -1
  121. package/dist/skills/index.d.ts +4 -0
  122. package/dist/skills/index.d.ts.map +1 -0
  123. package/dist/skills/index.js +3 -0
  124. package/dist/skills/index.js.map +1 -0
  125. package/dist/skills/paths.d.ts +33 -0
  126. package/dist/skills/paths.d.ts.map +1 -0
  127. package/dist/skills/paths.js +81 -0
  128. package/dist/skills/paths.js.map +1 -0
  129. package/dist/skills/registry.d.ts +48 -0
  130. package/dist/skills/registry.d.ts.map +1 -0
  131. package/dist/skills/registry.js +173 -0
  132. package/dist/skills/registry.js.map +1 -0
  133. package/package.json +34 -9
  134. package/runtime-assets/playwright-runner/entitlements.plist +0 -12
  135. package/runtime-assets/playwright-runner/index.cjs +0 -1011
  136. package/runtime-assets/playwright-runner/package-lock.json +0 -1242
  137. package/runtime-assets/playwright-runner/package.json +0 -29
  138. package/runtime-assets/playwright-runner/scripts/build.js +0 -182
  139. package/runtime-assets/run-connector.cjs +0 -275
@@ -1,1011 +0,0 @@
1
- /**
2
- * Playwright Runner for DataConnect
3
- *
4
- * Runs as a sidecar process, receives commands via stdin, sends results via stdout.
5
- *
6
- * Commands:
7
- * - { type: "run", runId, connectorPath, url, headless, allowHeaded }
8
- * - { type: "stop", runId }
9
- * - { type: "evaluate", runId, script }
10
- * - { type: "input-response", runId, requestId, data?, error? }
11
- * - { type: "screenshot", runId }
12
- * - { type: "quit" }
13
- *
14
- * Supports two-phase connectors:
15
- * - Phase 1 (Browser): Login detection + credential extraction
16
- * - Phase 2 (Background): Direct HTTP fetch without browser
17
- */
18
-
19
- const { chromium } = require('playwright');
20
- const fs = require('fs');
21
- const readline = require('readline');
22
- const path = require('path');
23
- const { execSync } = require('child_process');
24
-
25
- // System Chrome paths by platform
26
- const CHROME_PATHS = {
27
- darwin: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
28
- win32: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
29
- linux: '/usr/bin/google-chrome'
30
- };
31
-
32
- // Get browser cache directory - checks multiple candidate paths
33
- function getBrowserCacheDir() {
34
- if (process.env.PLAYWRIGHT_BROWSERS_PATH) {
35
- log(`Using PLAYWRIGHT_BROWSERS_PATH: ${process.env.PLAYWRIGHT_BROWSERS_PATH}`);
36
- return process.env.PLAYWRIGHT_BROWSERS_PATH;
37
- }
38
- const home = process.env.HOME || process.env.USERPROFILE || '';
39
- const candidates = [
40
- path.join(home, '.dataconnect', 'browsers'),
41
- path.join(home, '.dataconnect', 'playwright-runner', 'node_modules', 'playwright-core', '.local-browsers'),
42
- ];
43
- for (const dir of candidates) {
44
- if (fs.existsSync(dir)) {
45
- log(`Found browser cache: ${dir}`);
46
- return dir;
47
- }
48
- }
49
- return candidates[0];
50
- }
51
-
52
- // Check if system Chrome exists
53
- function getSystemChromePath() {
54
- const chromePath = CHROME_PATHS[process.platform];
55
- log(`Checking system Chrome at: ${chromePath}`);
56
- if (chromePath && fs.existsSync(chromePath)) {
57
- log(`Found system Chrome: ${chromePath}`);
58
- return chromePath;
59
- }
60
- log(`System Chrome not found at default path`);
61
- // Try alternative Windows paths
62
- if (process.platform === 'win32') {
63
- const altPaths = [
64
- 'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
65
- path.join(process.env.LOCALAPPDATA || '', 'Google\\Chrome\\Application\\chrome.exe')
66
- ];
67
- for (const p of altPaths) {
68
- if (fs.existsSync(p)) return p;
69
- }
70
- }
71
- // Try Edge on Windows
72
- if (process.platform === 'win32') {
73
- const edgePath = 'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe';
74
- if (fs.existsSync(edgePath)) return edgePath;
75
- }
76
- return null;
77
- }
78
-
79
- // Check if Playwright Chromium is already downloaded (or bundled via PLAYWRIGHT_BROWSERS_PATH)
80
- function getDownloadedChromiumPath() {
81
- const cacheDir = getBrowserCacheDir();
82
- log(`Checking for Chromium in: ${cacheDir}`);
83
- if (!fs.existsSync(cacheDir)) {
84
- log(`Browser cache dir does not exist: ${cacheDir}`);
85
- return null;
86
- }
87
-
88
- // Look for chromium directory
89
- const entries = fs.readdirSync(cacheDir);
90
- const chromiumDir = entries.find(e => e.startsWith('chromium-') && !e.includes('headless'));
91
- if (!chromiumDir) return null;
92
-
93
- const chromiumPath = path.join(cacheDir, chromiumDir);
94
-
95
- // Platform-specific executable paths (Playwright's "Chrome for Testing" structure)
96
- if (process.platform === 'darwin') {
97
- // Try arm64 first, then x64
98
- const paths = [
99
- path.join(chromiumPath, 'chrome-mac-arm64', 'Google Chrome for Testing.app', 'Contents', 'MacOS', 'Google Chrome for Testing'),
100
- path.join(chromiumPath, 'chrome-mac', 'Google Chrome for Testing.app', 'Contents', 'MacOS', 'Google Chrome for Testing'),
101
- // Legacy paths
102
- path.join(chromiumPath, 'chrome-mac-arm64', 'Chromium.app', 'Contents', 'MacOS', 'Chromium'),
103
- path.join(chromiumPath, 'chrome-mac', 'Chromium.app', 'Contents', 'MacOS', 'Chromium'),
104
- ];
105
- for (const p of paths) {
106
- if (fs.existsSync(p)) return p;
107
- }
108
- } else if (process.platform === 'win32') {
109
- const paths = [
110
- path.join(chromiumPath, 'chrome-win', 'chrome.exe'),
111
- path.join(chromiumPath, 'chrome-win64', 'chrome.exe'),
112
- ];
113
- for (const p of paths) {
114
- if (fs.existsSync(p)) return p;
115
- }
116
- } else {
117
- const paths = [
118
- path.join(chromiumPath, 'chrome-linux', 'chrome'),
119
- path.join(chromiumPath, 'chrome-linux64', 'chrome'),
120
- ];
121
- for (const p of paths) {
122
- if (fs.existsSync(p)) return p;
123
- }
124
- }
125
-
126
- return null;
127
- }
128
-
129
- // Default Chrome user-data directories by platform
130
- const CHROME_PROFILE_DIRS = {
131
- darwin: path.join(process.env.HOME || '', 'Library', 'Application Support', 'Google', 'Chrome'),
132
- win32: path.join(process.env.LOCALAPPDATA || '', 'Google', 'Chrome', 'User Data'),
133
- linux: path.join(process.env.HOME || '', '.config', 'google-chrome'),
134
- };
135
-
136
- // Check whether a browser path points to system Chrome (not Playwright Chromium).
137
- function isSystemChrome(browserPath) {
138
- if (!browserPath) return false;
139
- const lower = browserPath.toLowerCase();
140
- if (lower.includes('.databridge') || lower.includes('chromium') || lower.includes('chrome for testing')) {
141
- return false;
142
- }
143
- return true;
144
- }
145
-
146
- // Get the Chrome last-used profile directory path.
147
- function getChromeProfileDir(chromeRoot) {
148
- const localStatePath = path.join(chromeRoot, 'Local State');
149
- if (fs.existsSync(localStatePath)) {
150
- try {
151
- const localState = JSON.parse(fs.readFileSync(localStatePath, 'utf-8'));
152
- const lastUsed = localState?.profile?.last_used;
153
- if (lastUsed) {
154
- const profileDir = path.join(chromeRoot, lastUsed);
155
- if (fs.existsSync(profileDir)) {
156
- log(`Chrome last-used profile: "${lastUsed}"`);
157
- return profileDir;
158
- }
159
- }
160
- } catch (e) {
161
- log(`Warning: could not read Chrome Local State: ${e.message}`);
162
- }
163
- }
164
-
165
- const defaultDir = path.join(chromeRoot, 'Default');
166
- if (fs.existsSync(defaultDir)) return defaultDir;
167
- return null;
168
- }
169
-
170
- // Import cookies from the user's Chrome profile into a running Playwright
171
- // browser context's Cookies database. This is done AFTER Chrome creates its
172
- // own fresh profile, so we INSERT into Chrome's own db rather than replacing it
173
- // (which Chrome would wipe on startup).
174
- //
175
- // The encrypted_value blobs use the same Keychain key (v10 format), so Chrome
176
- // can decrypt them transparently — no Keychain popup needed since Chrome itself
177
- // is the one reading them.
178
- function importChromecookies(userDataDir, browserPath) {
179
- if (!isSystemChrome(browserPath)) return;
180
-
181
- // Only import once
182
- const markerFile = path.join(userDataDir, '.cookies-imported');
183
- if (fs.existsSync(markerFile)) {
184
- log('Skipping cookie import — already done');
185
- return;
186
- }
187
-
188
- const chromeRoot = CHROME_PROFILE_DIRS[process.platform];
189
- if (!chromeRoot || !fs.existsSync(chromeRoot)) return;
190
-
191
- const sourceProfileDir = getChromeProfileDir(chromeRoot);
192
- if (!sourceProfileDir) return;
193
-
194
- const sourceCookies = path.join(sourceProfileDir, 'Cookies');
195
- if (!fs.existsSync(sourceCookies)) return;
196
-
197
- // Find the target Cookies db — Chrome creates it inside "Default/" by default
198
- const targetCookies = path.join(userDataDir, 'Default', 'Cookies');
199
- if (!fs.existsSync(targetCookies)) {
200
- log('Skipping cookie import — target Cookies db not found yet');
201
- return;
202
- }
203
-
204
- try {
205
- // Use sqlite3 to INSERT cookies from source into the target db.
206
- // ATTACH the source db, then INSERT OR IGNORE to avoid duplicates.
207
- const sql = `
208
- ATTACH DATABASE '${sourceCookies.replace(/'/g, "''")}' AS src;
209
- INSERT OR REPLACE INTO cookies
210
- SELECT * FROM src.cookies;
211
- DETACH DATABASE src;
212
- `;
213
- execSync(`sqlite3 "${targetCookies}" "${sql}"`, {
214
- encoding: 'utf-8',
215
- timeout: 10000,
216
- });
217
-
218
- // Verify
219
- const count = execSync(
220
- `sqlite3 "${targetCookies}" "SELECT COUNT(*) FROM cookies;"`,
221
- { encoding: 'utf-8' }
222
- ).trim();
223
- log(`Imported cookies into profile — total cookies now: ${count}`);
224
-
225
- fs.writeFileSync(markerFile, new Date().toISOString());
226
- } catch (e) {
227
- log(`Warning: could not import Chrome cookies: ${e.message}`);
228
- }
229
- }
230
-
231
- // Download Chromium using Playwright
232
- async function downloadChromium(sendStatus) {
233
- const cacheDir = getBrowserCacheDir();
234
-
235
- // Create cache directory
236
- if (!fs.existsSync(cacheDir)) {
237
- fs.mkdirSync(cacheDir, { recursive: true });
238
- }
239
-
240
- log('Downloading Chromium browser (one-time setup)...');
241
- if (sendStatus) {
242
- sendStatus('DOWNLOADING_BROWSER');
243
- }
244
-
245
- // Set environment for Playwright to use our cache dir
246
- process.env.PLAYWRIGHT_BROWSERS_PATH = cacheDir;
247
-
248
- try {
249
- // Use Playwright's CLI to download Chromium
250
- execSync('npx playwright install chromium', {
251
- stdio: 'inherit',
252
- env: { ...process.env, PLAYWRIGHT_BROWSERS_PATH: cacheDir }
253
- });
254
- log('Chromium download complete');
255
- return getDownloadedChromiumPath();
256
- } catch (error) {
257
- log('Failed to download Chromium:', error.message);
258
- throw new Error('Failed to download browser. Please install Google Chrome or try again.');
259
- }
260
- }
261
-
262
- // Active browser contexts by runId
263
- const activeRuns = new Map();
264
-
265
- // Send message to parent process
266
- function send(msg) {
267
- process.stdout.write(JSON.stringify(msg) + '\n');
268
- }
269
-
270
- function drainStdout() {
271
- return new Promise(resolve => {
272
- if (process.stdout.writableNeedDrain) {
273
- process.stdout.once('drain', resolve);
274
- } else {
275
- process.stdout.write('', resolve);
276
- }
277
- });
278
- }
279
-
280
- // Log to stderr (doesn't interfere with JSON protocol)
281
- function log(...args) {
282
- console.error('[PlaywrightRunner]', ...args);
283
- }
284
-
285
- // Resolve browser executable path
286
- function resolveBrowserPath() {
287
- let browserPath = null;
288
-
289
- if (!process.env.DATACONNECT_SIMULATE_NO_CHROME) {
290
- browserPath = getSystemChromePath();
291
- } else {
292
- log('DATACONNECT_SIMULATE_NO_CHROME is set, skipping system Chrome detection');
293
- }
294
-
295
- if (!browserPath) {
296
- browserPath = getDownloadedChromiumPath();
297
- }
298
-
299
- if (!browserPath) {
300
- throw new Error('No browser available. The Rust backend should have downloaded Chromium before starting the connector.');
301
- }
302
-
303
- return browserPath;
304
- }
305
-
306
- // Launch a persistent browser context
307
- async function launchPersistentContext(userDataDir, headless, browserPath) {
308
- // Ensure profile directory exists
309
- fs.mkdirSync(userDataDir, { recursive: true });
310
-
311
- const launchOptions = {
312
- headless,
313
- args: [
314
- '--disable-blink-features=AutomationControlled',
315
- '--disable-features=MediaRouter,DialMediaRouteProvider',
316
- ],
317
- viewport: { width: 1280, height: 800 },
318
- userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
319
- };
320
-
321
- if (browserPath) {
322
- launchOptions.executablePath = browserPath;
323
- }
324
-
325
- // When using system Chrome, disable Playwright's mock keychain so Chrome
326
- // uses the real macOS Keychain. This lets it decrypt cookies imported from
327
- // the user's real Chrome profile (both use the same "Chrome Safe Storage"
328
- // Keychain entry). No popup — Chrome itself is already authorized.
329
- if (isSystemChrome(browserPath)) {
330
- launchOptions.ignoreDefaultArgs = ['--use-mock-keychain'];
331
- }
332
-
333
- log(`Launching ${headless ? 'headless' : 'headed'} browser with profile: ${userDataDir}`);
334
- const context = await chromium.launchPersistentContext(userDataDir, launchOptions);
335
- log('Browser launched successfully');
336
- return context;
337
- }
338
-
339
- // Create the page API that connectors use
340
- function createPageApi(runState, runId) {
341
- const networkCaptures = new Map();
342
- const capturedResponses = new Map();
343
-
344
- // Helper to get current page, throw if browser is closed
345
- function requirePage() {
346
- if (runState.browserClosed || !runState.page) {
347
- throw new Error('Browser is closed. Use page.httpFetch() for HTTP requests.');
348
- }
349
- return runState.page;
350
- }
351
-
352
- // Set up network interception on current page
353
- function setupNetworkCapture(page) {
354
- page.on('response', async (response) => {
355
- const url = response.url();
356
-
357
- for (const [key, config] of networkCaptures.entries()) {
358
- if (config.urlPattern && !url.includes(config.urlPattern)) continue;
359
-
360
- try {
361
- const request = response.request();
362
- const postData = request.postData() || '';
363
-
364
- if (config.bodyPattern) {
365
- const patterns = config.bodyPattern.split('|');
366
- if (!patterns.some(p => postData.includes(p))) continue;
367
- }
368
-
369
- const body = await response.json().catch(() => null);
370
- if (body) {
371
- capturedResponses.set(key, { url, data: body, timestamp: Date.now() });
372
- send({ type: 'network-captured', runId, key, url });
373
- }
374
- } catch (e) {
375
- // Ignore errors for non-JSON responses
376
- }
377
- }
378
- });
379
- }
380
-
381
- // Set up network capture on initial page
382
- if (runState.page) {
383
- setupNetworkCapture(runState.page);
384
- }
385
-
386
- return {
387
- goto: async (url, options = {}) => {
388
- const page = requirePage();
389
- log(`pageApi.goto called with: ${url}`);
390
- send({ type: 'log', runId, message: `Navigating to: ${url}` });
391
- const { waitUntil = 'domcontentloaded', timeout } = options;
392
- const gotoOpts = { waitUntil };
393
- if (timeout != null) gotoOpts.timeout = timeout;
394
- try {
395
- await page.goto(url, gotoOpts);
396
- log('pageApi.goto completed successfully');
397
- } catch (err) {
398
- log(`pageApi.goto error: ${err.message}`);
399
- throw err;
400
- }
401
- },
402
-
403
- evaluate: async (script) => {
404
- const page = requirePage();
405
- return await page.evaluate(script);
406
- },
407
-
408
- screenshot: async () => {
409
- const page = requirePage();
410
- const buffer = await page.screenshot({ type: 'jpeg', quality: 70, timeout: 5000 });
411
- return buffer.toString('base64');
412
- },
413
-
414
- requestInput: async (payload) => {
415
- const requestId = `input-${++runState.requestCounter}`;
416
- send({ type: 'request-input', runId, requestId, payload });
417
- return new Promise((resolve, reject) => {
418
- runState.pendingInputs.set(requestId, { resolve, reject });
419
- });
420
- },
421
-
422
- sleep: (ms) => new Promise(resolve => setTimeout(resolve, ms)),
423
-
424
- setData: async (key, value) => {
425
- if (key === 'status') {
426
- send({ type: 'log', runId, message: value });
427
- log(`[status] ${value}`);
428
- } else if (key === 'error') {
429
- log(`[error] ${value}`);
430
- } else if (key === 'result') {
431
- runState.hasResult = true;
432
- }
433
- send({ type: 'data', runId, key, value });
434
- },
435
-
436
- // Structured progress update — drives the frontend progress UI
437
- setProgress: async ({ phase, message, count }) => {
438
- send({ type: 'status', runId, status: { type: 'COLLECTING', message, phase, count } });
439
- if (message) log(`[progress] ${message}`);
440
- },
441
-
442
- promptUser: async (message, checkFn, interval = 2000) => {
443
- send({ type: 'log', runId, message });
444
- send({ type: 'status', runId, status: 'WAITING_FOR_USER' });
445
-
446
- // Poll until condition is met
447
- while (true) {
448
- await new Promise(resolve => setTimeout(resolve, interval));
449
- try {
450
- const result = await checkFn();
451
- if (result) {
452
- send({ type: 'log', runId, message: 'User action completed' });
453
- return;
454
- }
455
- } catch (e) {
456
- // Keep waiting
457
- }
458
- }
459
- },
460
-
461
- captureNetwork: async (config) => {
462
- networkCaptures.set(config.key, {
463
- urlPattern: config.urlPattern || '',
464
- bodyPattern: config.bodyPattern || ''
465
- });
466
- log(`Registered network capture: ${config.key}`);
467
- },
468
-
469
- getCapturedResponse: async (key) => {
470
- const captured = capturedResponses.get(key);
471
- return captured ? captured : null;
472
- },
473
-
474
- clearNetworkCaptures: async () => {
475
- networkCaptures.clear();
476
- capturedResponses.clear();
477
- },
478
-
479
- hasCapturedResponse: (key) => {
480
- return capturedResponses.has(key);
481
- },
482
-
483
- // Close the browser but keep the Node.js process alive for background HTTP work.
484
- // Cookies/session persist in the profile directory for next run.
485
- closeBrowser: async () => {
486
- if (runState.browserClosed) {
487
- log('Browser already closed');
488
- return;
489
- }
490
-
491
- log('Closing browser (connector requested closeBrowser)');
492
-
493
- // Extract cookies before closing so httpFetch can use them
494
- if (runState.context) {
495
- try {
496
- runState.cookies = await runState.context.cookies();
497
- log(`Extracted ${runState.cookies.length} cookies for background HTTP requests`);
498
- } catch (e) {
499
- log('Warning: could not extract cookies:', e.message);
500
- runState.cookies = [];
501
- }
502
- }
503
-
504
- runState.browserClosed = true;
505
- runState.browserClosedByConnector = true;
506
-
507
- if (runState.context) {
508
- try {
509
- await runState.context.close();
510
- } catch (e) {
511
- log('Error closing context:', e.message);
512
- }
513
- runState.context = null;
514
- runState.page = null;
515
- }
516
-
517
- send({ type: 'log', runId, message: 'Browser closed, continuing in background...' });
518
- log('Browser closed, process stays alive for background work');
519
- },
520
-
521
- // Escalate to headed mode for live human interaction (e.g., interactive CAPTCHAs).
522
- // Gated by allowHeaded capability — if the driver doesn't support headed mode,
523
- // navigates in the existing headless browser and returns { headed: false }.
524
- showBrowser: async (url) => {
525
- log('showBrowser requested');
526
-
527
- if (runState.browserClosed) {
528
- log('showBrowser called but browser is already closed');
529
- return { headed: false };
530
- }
531
-
532
- if (!runState.allowHeaded) {
533
- log('Headed mode not available — navigating headless');
534
- if (url && runState.page) {
535
- try {
536
- await runState.page.goto(url, { waitUntil: 'domcontentloaded' });
537
- } catch (e) {
538
- log(`showBrowser headless navigation failed: ${e.message}`);
539
- }
540
- }
541
- send({ type: 'log', runId, message: 'Headed interaction unavailable — staying headless' });
542
- return { headed: false };
543
- }
544
-
545
- // Close existing browser if open
546
- if (runState.context && !runState.browserClosed) {
547
- log('Closing existing browser before reopening headed');
548
- runState.browserClosedByConnector = true;
549
- try {
550
- await runState.context.close();
551
- } catch (e) {
552
- log('Error closing existing context:', e.message);
553
- }
554
- runState.context = null;
555
- runState.page = null;
556
- }
557
-
558
- // Launch new headed browser with persistent context
559
- runState.browserClosed = false;
560
- runState.browserClosedByConnector = false;
561
- runState.headless = false;
562
- const context = await launchPersistentContext(runState.userDataDir, false, runState.browserPath);
563
- const page = context.pages()[0] || await context.newPage();
564
-
565
- // Set up disconnect handler
566
- context.browser().on('disconnected', () => {
567
- if (!runState.connectorCompleted && !runState.browserClosedByConnector) {
568
- log(`Browser disconnected for run ${runId} (user closed window)`);
569
- runState.browserClosed = true;
570
- runState.context = null;
571
- runState.page = null;
572
- activeRuns.delete(runId);
573
- send({ type: 'status', runId, status: 'STOPPED' });
574
- drainStdout().then(() => process.exit(0));
575
- }
576
- });
577
-
578
- // Update state
579
- runState.context = context;
580
- runState.page = page;
581
-
582
- // Re-setup network capture on new page
583
- setupNetworkCapture(page);
584
-
585
- // Navigate to URL
586
- if (url) {
587
- await page.goto(url, { waitUntil: 'domcontentloaded' });
588
- }
589
-
590
- send({ type: 'log', runId, message: 'Browser opened for user interaction' });
591
- log('Headed browser opened');
592
- return { headed: true };
593
- },
594
-
595
- // Switch to headless mode — browser becomes invisible but stays running.
596
- // Use this after credentials are captured so the user doesn't see the browser
597
- // during data collection, while preserving the TLS fingerprint for Cloudflare.
598
- goHeadless: async () => {
599
- if (runState.headless && !runState.browserClosed) {
600
- log('Already in headless mode');
601
- return;
602
- }
603
-
604
- log('Switching to headless mode');
605
-
606
- // Close existing headed browser
607
- if (runState.context && !runState.browserClosed) {
608
- runState.browserClosedByConnector = true;
609
- try {
610
- await runState.context.close();
611
- } catch (e) {
612
- log('Error closing headed context:', e.message);
613
- }
614
- runState.context = null;
615
- runState.page = null;
616
- }
617
-
618
- // Reopen headless browser with persistent context
619
- runState.browserClosed = false;
620
- runState.browserClosedByConnector = false;
621
- runState.headless = true;
622
- const context = await launchPersistentContext(runState.userDataDir, true, runState.browserPath);
623
- const page = context.pages()[0] || await context.newPage();
624
-
625
- // Set up disconnect handler
626
- context.browser().on('disconnected', () => {
627
- if (!runState.connectorCompleted && !runState.browserClosedByConnector) {
628
- log(`Browser disconnected for run ${runId}`);
629
- runState.browserClosed = true;
630
- runState.context = null;
631
- runState.page = null;
632
- activeRuns.delete(runId);
633
- send({ type: 'status', runId, status: 'STOPPED' });
634
- drainStdout().then(() => process.exit(0));
635
- }
636
- });
637
-
638
- // Update state
639
- runState.context = context;
640
- runState.page = page;
641
-
642
- // Re-setup network capture on new page
643
- setupNetworkCapture(page);
644
-
645
- // Navigate to establish browser context
646
- await page.goto('https://chatgpt.com/', { waitUntil: 'domcontentloaded' });
647
-
648
- send({ type: 'log', runId, message: 'Switched to headless mode for background data collection' });
649
- log('Switched to headless mode');
650
- },
651
-
652
- // Direct HTTP fetch from Node.js — no browser needed.
653
- // Works after closeBrowser() for background data collection.
654
- // Automatically includes cookies extracted from the browser session.
655
- httpFetch: async (url, options = {}) => {
656
- const { timeout = 30000, ...fetchOptions } = options;
657
-
658
- // Auto-include cookies from the closed browser session
659
- if (runState.cookies && runState.cookies.length > 0) {
660
- try {
661
- const urlObj = new URL(url);
662
- const relevantCookies = runState.cookies
663
- .filter(c => {
664
- const cookieDomain = c.domain.startsWith('.') ? c.domain.slice(1) : c.domain;
665
- return urlObj.hostname === cookieDomain || urlObj.hostname.endsWith('.' + cookieDomain);
666
- })
667
- .map(c => `${c.name}=${c.value}`)
668
- .join('; ');
669
- if (relevantCookies) {
670
- fetchOptions.headers = { ...fetchOptions.headers, cookie: relevantCookies };
671
- }
672
- } catch (e) {
673
- // Ignore cookie injection errors
674
- }
675
- }
676
-
677
- const controller = new AbortController();
678
- const timeoutId = setTimeout(() => controller.abort(), timeout);
679
- try {
680
- const response = await fetch(url, {
681
- ...fetchOptions,
682
- signal: controller.signal,
683
- });
684
- clearTimeout(timeoutId);
685
- const text = await response.text();
686
- let json = null;
687
- try { json = JSON.parse(text); } catch {}
688
- if (!response.ok) {
689
- log(`[httpFetch] ${response.status} ${response.statusText} for ${url.substring(0, 100)}`);
690
- log(`[httpFetch] Response body (first 200 chars): ${text.substring(0, 200)}`);
691
- }
692
- return {
693
- ok: response.ok,
694
- status: response.status,
695
- headers: Object.fromEntries(response.headers.entries()),
696
- text,
697
- json,
698
- error: null,
699
- };
700
- } catch (err) {
701
- clearTimeout(timeoutId);
702
- return {
703
- ok: false,
704
- status: 0,
705
- headers: {},
706
- text: '',
707
- json: null,
708
- error: err.message,
709
- };
710
- }
711
- },
712
- };
713
- }
714
-
715
- // Run a connector
716
- async function runConnector(runId, connectorPath, url, headless = true, allowHeaded = true) {
717
- log(`Starting run ${runId} with connector ${connectorPath} (headless: ${headless}, allowHeaded: ${allowHeaded})`);
718
-
719
- // Derive connector ID for persistent browser profile
720
- const connectorFileName = path.basename(connectorPath, path.extname(connectorPath));
721
- const home = process.env.HOME || process.env.USERPROFILE || '';
722
- const userDataDir = path.join(home, '.dataconnect', 'browser-profiles', connectorFileName);
723
-
724
- // Mutable state shared with pageApi
725
- const runState = {
726
- context: null,
727
- page: null,
728
- browserClosed: false,
729
- browserClosedByConnector: false,
730
- connectorCompleted: false,
731
- headless,
732
- allowHeaded,
733
- userDataDir,
734
- browserPath: null,
735
- requestCounter: 0,
736
- pendingInputs: new Map(),
737
- };
738
-
739
- try {
740
- // Read connector script
741
- const connectorCode = fs.readFileSync(connectorPath, 'utf-8');
742
-
743
- // Resolve browser executable
744
- runState.browserPath = resolveBrowserPath();
745
- log(`Using browser: ${runState.browserPath}`);
746
-
747
- // On first run, we need to:
748
- // 1. Launch Chrome briefly so it creates its profile/Cookies db
749
- // 2. Close it
750
- // 3. INSERT cookies from the user's Chrome profile into the db
751
- // 4. Relaunch — now Chrome loads the imported cookies from disk
752
- const markerFile = path.join(userDataDir, '.cookies-imported');
753
- if (isSystemChrome(runState.browserPath) && !fs.existsSync(markerFile)) {
754
- log('First run: launching browser to initialize profile...');
755
- const tempCtx = await launchPersistentContext(userDataDir, true, runState.browserPath);
756
- await tempCtx.close();
757
- log('Profile initialized, importing cookies...');
758
- importChromecookies(userDataDir, runState.browserPath);
759
- }
760
-
761
- // Launch browser with persistent context (cookies already in db on first run)
762
- const context = await launchPersistentContext(userDataDir, headless, runState.browserPath);
763
- const page = context.pages()[0] || await context.newPage();
764
-
765
- runState.context = context;
766
- runState.page = page;
767
-
768
- // Handle browser disconnect (user closed browser window)
769
- context.browser().on('disconnected', () => {
770
- if (!runState.connectorCompleted && !runState.browserClosedByConnector && activeRuns.has(runId)) {
771
- log(`Browser disconnected for run ${runId} (user closed window)`);
772
- runState.browserClosed = true;
773
- runState.context = null;
774
- runState.page = null;
775
- activeRuns.delete(runId);
776
- send({ type: 'status', runId, status: 'STOPPED' });
777
- drainStdout().then(() => process.exit(0));
778
- }
779
- });
780
-
781
- // Store for cleanup
782
- activeRuns.set(runId, {
783
- runState,
784
- setCompleted: () => { runState.connectorCompleted = true; },
785
- });
786
-
787
- // Create page API
788
- const pageApi = createPageApi(runState, runId);
789
-
790
- // Navigate to starting URL
791
- log(`Navigating to initial URL: ${url}`);
792
- await page.goto(url, { waitUntil: 'domcontentloaded' });
793
- log('Initial navigation complete');
794
- send({ type: 'status', runId, status: 'RUNNING' });
795
-
796
- // Build the connector execution wrapper
797
- // The connector has an IIFE at the end - we need to return its Promise
798
- // Find the LAST IIFE and add 'return' before it (there may be inner IIFEs in helpers)
799
- let modifiedCode = connectorCode;
800
-
801
- // Find all occurrences and replace the last one
802
- const iifePattern = /\n\(async\s*\(\)\s*=>\s*\{/g;
803
- const matches = [...modifiedCode.matchAll(iifePattern)];
804
-
805
- if (matches.length > 0) {
806
- const lastMatch = matches[matches.length - 1];
807
- const insertPos = lastMatch.index;
808
- modifiedCode = modifiedCode.substring(0, insertPos) +
809
- '\nreturn (async () => {' +
810
- modifiedCode.substring(insertPos + lastMatch[0].length);
811
- log(`Added return before IIFE (match ${matches.length} of ${matches.length})`);
812
- } else {
813
- log('WARNING: Could not find IIFE pattern in connector code');
814
- }
815
-
816
- // Execute connector with page API in scope using AsyncFunction
817
- log('Starting connector execution...');
818
- const AsyncFunction = Object.getPrototypeOf(async function(){}).constructor;
819
- const runConnectorFn = new AsyncFunction('page', modifiedCode);
820
-
821
- log('Calling connector function...');
822
- const result = await runConnectorFn.call(null, pageApi);
823
- log('Connector function completed with result:', result ? 'has result' : 'undefined');
824
-
825
- if (!runState.hasResult && result != null) {
826
- const exportData = (result && result.success && result.data) ? result.data : result;
827
- send({ type: 'result', runId, data: exportData });
828
- }
829
- send({ type: 'status', runId, status: 'COMPLETE' });
830
-
831
- // Mark as completed to prevent disconnect handler from sending STOPPED
832
- runState.connectorCompleted = true;
833
-
834
- // Close browser if still open
835
- if (!runState.browserClosed && runState.context) {
836
- await new Promise(resolve => setTimeout(resolve, 2000));
837
- try {
838
- await runState.context.close();
839
- } catch (e) {
840
- // Browser may already be closed
841
- }
842
- }
843
-
844
- activeRuns.delete(runId);
845
-
846
- // Exit process after successful completion
847
- log('Connector completed successfully, exiting');
848
- await drainStdout();
849
- process.exit(0);
850
-
851
- } catch (error) {
852
- log(`Error in run ${runId}:`, error.message);
853
- send({ type: 'error', runId, message: error.message });
854
- send({ type: 'status', runId, status: 'ERROR' });
855
-
856
- // Cleanup on error
857
- if (runState.context && !runState.browserClosed) {
858
- try {
859
- await runState.context.close();
860
- } catch (e) {}
861
- }
862
- activeRuns.delete(runId);
863
-
864
- // Exit process after error
865
- log('Connector failed, exiting');
866
- await drainStdout();
867
- process.exit(1);
868
- }
869
- }
870
-
871
- // Stop a run
872
- async function stopRun(runId) {
873
- const run = activeRuns.get(runId);
874
- if (run) {
875
- log(`Stopping run ${runId}`);
876
- // Reject any pending requestInput promises so the connector doesn't hang
877
- for (const [, pending] of run.runState.pendingInputs) {
878
- pending.reject(new Error('Run stopped'));
879
- }
880
- run.runState.pendingInputs.clear();
881
- if (run.runState && run.runState.context && !run.runState.browserClosed) {
882
- await run.runState.context.close().catch(() => {});
883
- }
884
- activeRuns.delete(runId);
885
- send({ type: 'status', runId, status: 'STOPPED' });
886
- }
887
- }
888
-
889
- // Main loop - read commands from stdin
890
- async function main() {
891
- log('Playwright runner started');
892
- send({ type: 'ready' });
893
-
894
- const rl = readline.createInterface({
895
- input: process.stdin,
896
- output: process.stdout,
897
- terminal: false
898
- });
899
-
900
- for await (const line of rl) {
901
- try {
902
- const cmd = JSON.parse(line);
903
-
904
- switch (cmd.type) {
905
- case 'run':
906
- runConnector(cmd.runId, cmd.connectorPath, cmd.url, cmd.headless !== false, cmd.allowHeaded !== false);
907
- break;
908
-
909
- case 'stop':
910
- await stopRun(cmd.runId);
911
- break;
912
-
913
- case 'quit':
914
- log('Quitting...');
915
- for (const [runId, run] of activeRuns) {
916
- if (run.runState && run.runState.context && !run.runState.browserClosed) {
917
- await run.runState.context.close().catch(() => {});
918
- }
919
- }
920
- process.exit(0);
921
- break;
922
-
923
- case 'test':
924
- // Simple test to prove Node.js is working
925
- const os = require('os');
926
- send({
927
- type: 'test-result',
928
- data: {
929
- nodejs: process.version,
930
- platform: process.platform,
931
- arch: process.arch,
932
- hostname: os.hostname(),
933
- cpus: os.cpus().length,
934
- memory: Math.round(os.totalmem() / 1024 / 1024 / 1024) + ' GB',
935
- uptime: Math.round(os.uptime() / 60) + ' minutes'
936
- }
937
- });
938
- break;
939
-
940
- case 'evaluate': {
941
- const evalRun = activeRuns.get(cmd.runId);
942
- if (!evalRun) {
943
- send({ type: 'evaluate-result', runId: cmd.runId, error: `No active run: ${cmd.runId}` });
944
- break;
945
- }
946
- const { runState: evalState } = evalRun;
947
- if (evalState.browserClosed || !evalState.page) {
948
- send({ type: 'evaluate-result', runId: cmd.runId, error: 'Browser is closed' });
949
- break;
950
- }
951
- // Non-blocking: don't await so stdin loop keeps processing other commands.
952
- // Wrapped in try so synchronous throws (e.g. page torn down mid-call)
953
- // always produce an evaluate-result instead of hanging the driver.
954
- try {
955
- evalState.page.evaluate(cmd.script)
956
- .then(result => send({ type: 'evaluate-result', runId: cmd.runId, result }))
957
- .catch(e => send({ type: 'evaluate-result', runId: cmd.runId, error: e.stack || e.message }));
958
- } catch (e) {
959
- send({ type: 'evaluate-result', runId: cmd.runId, error: e.stack || e.message });
960
- }
961
- break;
962
- }
963
-
964
- case 'input-response': {
965
- const inputRun = activeRuns.get(cmd.runId);
966
- if (!inputRun) break;
967
- const pending = inputRun.runState.pendingInputs.get(cmd.requestId);
968
- if (!pending) break;
969
- inputRun.runState.pendingInputs.delete(cmd.requestId);
970
- if (cmd.error) {
971
- pending.reject(new Error(typeof cmd.error === 'string' ? cmd.error : JSON.stringify(cmd.error)));
972
- } else {
973
- pending.resolve(cmd.data);
974
- }
975
- break;
976
- }
977
-
978
- case 'screenshot': {
979
- const ssRun = activeRuns.get(cmd.runId);
980
- if (!ssRun) {
981
- send({ type: 'screenshot-result', runId: cmd.runId, error: `No active run: ${cmd.runId}` });
982
- break;
983
- }
984
- const { runState: ssState } = ssRun;
985
- if (ssState.browserClosed || !ssState.page) {
986
- send({ type: 'screenshot-result', runId: cmd.runId, error: 'Browser is closed' });
987
- break;
988
- }
989
- try {
990
- ssState.page.screenshot({ type: 'jpeg', quality: 70, timeout: 5000 })
991
- .then(buffer => send({ type: 'screenshot-result', runId: cmd.runId, data: buffer.toString('base64') }))
992
- .catch(e => send({ type: 'screenshot-result', runId: cmd.runId, error: e.stack || e.message }));
993
- } catch (e) {
994
- send({ type: 'screenshot-result', runId: cmd.runId, error: e.stack || e.message });
995
- }
996
- break;
997
- }
998
-
999
- default:
1000
- log(`Unknown command: ${cmd.type}`);
1001
- }
1002
- } catch (error) {
1003
- log(`Error parsing command: ${error.message}`);
1004
- }
1005
- }
1006
- }
1007
-
1008
- main().catch(err => {
1009
- log('Fatal error:', err);
1010
- process.exit(1);
1011
- });