autokap 1.3.27 → 1.3.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.d.ts CHANGED
@@ -99,6 +99,8 @@ export declare class Browser {
99
99
  private elementMap;
100
100
  private akNodeIndex;
101
101
  private poolContext;
102
+ private persistentContext;
103
+ private ownedChromiumProfileDir;
102
104
  /**
103
105
  * Xvfb instance backing the headed Chromium used by clip capture on Cloud
104
106
  * Run with NVIDIA L4. Set when forClipCapture spawns Xvfb; null otherwise
@@ -134,10 +136,10 @@ export declare class Browser {
134
136
  */
135
137
  static forClipCapture(options: BrowserOptions, cursorScript: string): Promise<Browser>;
136
138
  /**
137
- * Close only the browser context (not the browser process).
138
- * Used by clip capture to release the context promptly after the CDP loop
139
- * has stopped, while keeping the browser process alive for any pending teardown.
140
- * Call browser.close() afterwards to shut down the browser process.
139
+ * Close the active capture context promptly after recording stops. Regular
140
+ * clip capture leaves the browser process alive for teardown; persistent
141
+ * Cloud Run contexts close their owned browser process with the context.
142
+ * Call browser.close() afterwards to stop Xvfb and remove owned temp state.
141
143
  */
142
144
  closeContext(): Promise<void>;
143
145
  launch(): Promise<void>;
package/dist/browser.js CHANGED
@@ -1,6 +1,8 @@
1
1
  import { chromium } from 'playwright';
2
2
  import sharp from 'sharp';
3
3
  import { createHash } from 'crypto';
4
+ import { cp, mkdir, readFile, rm, writeFile } from 'fs/promises';
5
+ import { join } from 'path';
4
6
  import { buildAKNodeRuntimeIndex, deriveInteractiveElementsFromAKTree, disambiguateFingerprint, focusAKTree, fingerprintAKNode, serializeAKTree, } from './ak-tree.js';
5
7
  /**
6
8
  * Set-of-Marks (SoM) annotation: overlays colored [N] badges on each visible
@@ -100,6 +102,110 @@ import { CAPTURE_HIDE_STYLE_ID, dismissCookiesAndWidgets, ensureCaptureHideStyle
100
102
  import { CHROMIUM_ARGS, browserPool } from './browser-pool.js';
101
103
  import { isDebugEnabled, logger } from './logger.js';
102
104
  import { XvfbProcess } from './xvfb-process.js';
105
+ const CLOUD_CHROMIUM_PROFILE_TEMPLATE_DIR = '/opt/chromium-profile';
106
+ const CLOUD_CHROMIUM_PROFILE_ROOT = '/tmp/autokap-chromium-profiles';
107
+ const CLOUD_CHROMIUM_TRANSLATE_BLOCKED_LANGUAGES = [
108
+ 'fr',
109
+ 'en',
110
+ 'de',
111
+ 'es',
112
+ 'it',
113
+ 'pt',
114
+ 'nl',
115
+ 'ja',
116
+ 'zh',
117
+ 'ko',
118
+ 'ar',
119
+ 'ru',
120
+ ];
121
+ const CLOUD_CHROMIUM_ACCEPT_LANGUAGES = [
122
+ 'fr-FR',
123
+ 'fr',
124
+ 'en-US',
125
+ 'en',
126
+ 'en-GB',
127
+ 'es-ES',
128
+ 'es',
129
+ 'de-DE',
130
+ 'de',
131
+ 'it-IT',
132
+ 'it',
133
+ 'pt-PT',
134
+ 'pt-BR',
135
+ 'pt',
136
+ 'nl-NL',
137
+ 'nl',
138
+ 'ja-JP',
139
+ 'ja',
140
+ 'zh-CN',
141
+ 'zh-TW',
142
+ 'zh',
143
+ 'ko-KR',
144
+ 'ko',
145
+ 'ar',
146
+ 'ru-RU',
147
+ 'ru',
148
+ 'he',
149
+ 'th',
150
+ 'tr',
151
+ 'vi',
152
+ 'sv',
153
+ 'no',
154
+ 'da',
155
+ 'fi',
156
+ 'pl',
157
+ 'cs',
158
+ ];
159
+ function isPlainRecord(value) {
160
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
161
+ }
162
+ async function prepareCloudChromiumProfile() {
163
+ const rootDir = process.env.AUTOKAP_CHROMIUM_PROFILE_ROOT || CLOUD_CHROMIUM_PROFILE_ROOT;
164
+ const templateDir = process.env.AUTOKAP_CHROMIUM_PROFILE_TEMPLATE_DIR || CLOUD_CHROMIUM_PROFILE_TEMPLATE_DIR;
165
+ const userDataDir = join(rootDir, `${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
166
+ await mkdir(rootDir, { recursive: true });
167
+ try {
168
+ await cp(templateDir, userDataDir, { recursive: true, force: true });
169
+ }
170
+ catch (err) {
171
+ if (err.code !== 'ENOENT') {
172
+ throw err;
173
+ }
174
+ await mkdir(userDataDir, { recursive: true });
175
+ }
176
+ await seedCloudChromiumPreferences(userDataDir);
177
+ return userDataDir;
178
+ }
179
+ async function seedCloudChromiumPreferences(userDataDir) {
180
+ const defaultDir = join(userDataDir, 'Default');
181
+ const preferencesPath = join(defaultDir, 'Preferences');
182
+ await mkdir(defaultDir, { recursive: true });
183
+ let preferences = {};
184
+ try {
185
+ const raw = await readFile(preferencesPath, 'utf8');
186
+ const parsed = JSON.parse(raw);
187
+ if (isPlainRecord(parsed)) {
188
+ preferences = parsed;
189
+ }
190
+ }
191
+ catch (err) {
192
+ if (err.code !== 'ENOENT') {
193
+ logger.warn(`[capture] Cloud clip capture: replacing unreadable Chromium Preferences: ${err.message}`);
194
+ }
195
+ }
196
+ const translate = isPlainRecord(preferences.translate) ? preferences.translate : {};
197
+ preferences.translate = {
198
+ ...translate,
199
+ enabled: false,
200
+ };
201
+ preferences.translate_blocked_languages = CLOUD_CHROMIUM_TRANSLATE_BLOCKED_LANGUAGES;
202
+ const intl = isPlainRecord(preferences.intl) ? preferences.intl : {};
203
+ preferences.intl = {
204
+ ...intl,
205
+ accept_languages: CLOUD_CHROMIUM_ACCEPT_LANGUAGES.join(','),
206
+ };
207
+ await writeFile(preferencesPath, `${JSON.stringify(preferences)}\n`, 'utf8');
208
+ }
103
209
  async function withHelperTimeout(label, timeoutMs, work) {
104
210
  if (!timeoutMs || timeoutMs <= 0) {
105
211
  return work();
@@ -776,6 +882,8 @@ export class Browser {
776
882
  elementMap = new Map();
777
883
  akNodeIndex = new Map();
778
884
  poolContext = false;
885
+ persistentContext = false;
886
+ ownedChromiumProfileDir = null;
779
887
  /**
780
888
  * Xvfb instance backing the headed Chromium used by clip capture on Cloud
781
889
  * Run with NVIDIA L4. Set when forClipCapture spawns Xvfb; null otherwise
@@ -883,27 +991,28 @@ export class Browser {
883
991
  process.env.DISPLAY = instance.xvfb.display;
884
992
  logger.info(`[capture] Cloud clip capture: Chromium → Xvfb ${instance.xvfb.display} → ffmpeg x11grab + h264_nvenc path enabled`);
885
993
  }
994
+ const cloudChromiumProfileDir = isLinuxWithGpu ? await prepareCloudChromiumProfile() : null;
995
+ instance.ownedChromiumProfileDir = cloudChromiumProfileDir;
886
996
  // Kiosk + zero-position anchor for Xvfb: Chromium normally renders its
887
997
  // own toolbar/tabbar in headed mode, which would appear at the top of
888
998
  // every clip. `--kiosk` removes the address bar + tab strip;
889
999
  // `--window-position=0,0` and `--window-size` make the page fill the
890
- // Xvfb screen exactly. The `--disable-features` block kills the
891
- // separate "infobar" surfaces (translate suggestion, save-password
892
- // prompt, autofill banner, "Chrome is being controlled by automated
893
- // software" warning) these render OUTSIDE kiosk's chrome and would
894
- // otherwise show up at the top of every clip captured via x11grab.
895
- // CDP screenshot capture (Mac/Win/local Linux) hits the page surface
896
- // directly so it never sees these; only ffmpeg x11grab does.
1000
+ // Xvfb screen exactly. Chrome UI surfaces render OUTSIDE kiosk's chrome
1001
+ // and would otherwise show up in clips captured via x11grab. Translate is
1002
+ // controlled by the seeded Chromium preferences below; avoid passing our
1003
+ // own --disable-features list here because it replaces Playwright's
1004
+ // default disabled-feature map instead of merging with it.
897
1005
  const xvfbWindowArgs = isLinuxWithGpu ? [
898
1006
  '--kiosk',
899
1007
  '--window-position=0,0',
900
- '--disable-features=Translate,TranslateUI,AutofillServerCommunication,InfoBars',
901
- '--disable-infobars',
902
1008
  '--disable-blink-features=AutomationControlled',
903
- '--disable-translate',
904
1009
  '--no-default-browser-check',
905
1010
  '--no-first-run',
906
1011
  '--noerrdialogs',
1012
+ // Belt-and-suspenders with the seeded translate prefs: keep the browser's
1013
+ // preferred-language list broad enough that Chrome has no reason to offer
1014
+ // translation on the marketing/demo languages we capture most often.
1015
+ '--accept-lang=fr-FR,fr,en-US,en,en-GB,es-ES,es,de-DE,de,it-IT,it,pt-PT,pt-BR,pt,nl-NL,nl,ja-JP,ja,zh-CN,zh-TW,zh,ko-KR,ko,ar,ru-RU,ru,he,th,tr,vi,sv,no,da,fi,pl,cs',
907
1016
  ] : [];
908
1017
  const clipArgs = [
909
1018
  ...baseArgs,
@@ -913,14 +1022,6 @@ export class Browser {
913
1022
  ...cloudGpuArgs,
914
1023
  ...xvfbWindowArgs,
915
1024
  ];
916
- // Dedicated browser process for clip capture. Not pooled because clip
917
- // capture installs context-level init scripts (cursor overlay).
918
- // Cloud Run with Xvfb: launch headed (headless: false) so Chromium
919
- // renders to the Xvfb framebuffer that ffmpeg captures.
920
- instance.browser = await chromium.launch({
921
- headless: isLinuxWithGpu ? false : !options.headed,
922
- args: clipArgs,
923
- });
924
1025
  const contextOptions = {
925
1026
  viewport: options.viewport,
926
1027
  deviceScaleFactor,
@@ -928,7 +1029,33 @@ export class Browser {
928
1029
  colorScheme: options.colorScheme ?? 'light',
929
1030
  storageState: options.storageState,
930
1031
  };
931
- instance.context = await instance.browser.newContext(contextOptions);
1032
+ // Dedicated browser process for clip capture. Not pooled because clip
1033
+ // capture installs context-level init scripts (cursor overlay). Cloud Run
1034
+ // uses a seeded persistent profile so Chromium reads translate.enabled=false
1035
+ // from Default/Preferences. Policy files and flags are not reliable against
1036
+ // the Chromium 127+ TranslateUI2024 bubble; the user-data-dir pref is.
1037
+ if (isLinuxWithGpu && cloudChromiumProfileDir) {
1038
+ instance.context = await chromium.launchPersistentContext(cloudChromiumProfileDir, {
1039
+ ...contextOptions,
1040
+ headless: false,
1041
+ args: clipArgs,
1042
+ });
1043
+ instance.browser = instance.context.browser();
1044
+ instance.persistentContext = true;
1045
+ for (const page of instance.context.pages()) {
1046
+ await page.close().catch(() => undefined);
1047
+ }
1048
+ logger.info(`[capture] Cloud clip capture: persistent Chromium profile seeded at ${cloudChromiumProfileDir}`);
1049
+ }
1050
+ else {
1051
+ // Non-cloud clip capture keeps the regular browser + incognito context
1052
+ // model used by local/macOS/Windows frame capture.
1053
+ instance.browser = await chromium.launch({
1054
+ headless: !options.headed,
1055
+ args: clipArgs,
1056
+ });
1057
+ instance.context = await instance.browser.newContext(contextOptions);
1058
+ }
932
1059
  // Cloud Run only: inject the notranslate meta on every navigation so
933
1060
  // Chromium's translate UI never prompts. The --disable-features=Translate*
934
1061
  // launch flags are unreliable across Chromium versions (some translate
@@ -1026,40 +1153,14 @@ export class Browser {
1026
1153
  catch (err) {
1027
1154
  console.warn('[gpu-check] WebGL query failed:', err.message);
1028
1155
  }
1029
- // Diagnostic — verify the enterprise managed policy file shipped in
1030
- // cloud-runner/Dockerfile is actually loaded by Chromium. If
1031
- // TranslateEnabled doesn't appear in chrome://policy, Playwright's
1032
- // bundled Chromium isn't reading our policy directory (issue
1033
- // microsoft/playwright#32324) and we need to fall back to a
1034
- // pre-seeded user-data-dir for the translate.enabled pref.
1035
- try {
1036
- const policyPage = await instance.context.newPage();
1037
- await policyPage.goto('chrome://policy', { timeout: 5000, waitUntil: 'load' });
1038
- await policyPage.waitForTimeout(300);
1039
- const policyInfo = await policyPage.evaluate(() => {
1040
- const text = document.body?.innerText ?? '';
1041
- const lines = text.split('\n').filter(l => l.toLowerCase().includes('translate'));
1042
- return {
1043
- hasTranslateEnabled: text.includes('TranslateEnabled'),
1044
- translateLines: lines.slice(0, 10),
1045
- // chrome://policy renders a "No policies set" message when none load
1046
- noPoliciesMessage: text.includes('No policies set'),
1047
- };
1048
- });
1049
- console.info('[policy-check] chrome://policy:', JSON.stringify(policyInfo));
1050
- await policyPage.close();
1051
- }
1052
- catch (err) {
1053
- console.warn('[policy-check] chrome://policy query failed:', err.message);
1054
- }
1055
1156
  }
1056
1157
  return instance;
1057
1158
  }
1058
1159
  /**
1059
- * Close only the browser context (not the browser process).
1060
- * Used by clip capture to release the context promptly after the CDP loop
1061
- * has stopped, while keeping the browser process alive for any pending teardown.
1062
- * Call browser.close() afterwards to shut down the browser process.
1160
+ * Close the active capture context promptly after recording stops. Regular
1161
+ * clip capture leaves the browser process alive for teardown; persistent
1162
+ * Cloud Run contexts close their owned browser process with the context.
1163
+ * Call browser.close() afterwards to stop Xvfb and remove owned temp state.
1063
1164
  */
1064
1165
  async closeContext() {
1065
1166
  if (this.context) {
@@ -1068,6 +1169,10 @@ export class Browser {
1068
1169
  }
1069
1170
  catch { /* ignore */ }
1070
1171
  this.context = null;
1172
+ this.page = null;
1173
+ if (this.persistentContext) {
1174
+ this.browser = null;
1175
+ }
1071
1176
  }
1072
1177
  }
1073
1178
  async launch() {
@@ -1216,13 +1321,32 @@ export class Browser {
1216
1321
  this.poolContext = false;
1217
1322
  return;
1218
1323
  }
1219
- // Standalone mode (CLI): close the entire browser process
1220
- if (this.browser) {
1324
+ // Standalone mode (CLI): close the entire browser process. Persistent
1325
+ // contexts own their browser process, so closing the context is enough.
1326
+ if (this.persistentContext && this.context) {
1327
+ try {
1328
+ await this.context.close();
1329
+ }
1330
+ catch { /* ignore */ }
1331
+ this.context = null;
1332
+ this.browser = null;
1333
+ this.page = null;
1334
+ }
1335
+ else if (this.browser) {
1221
1336
  await this.browser.close();
1222
1337
  this.browser = null;
1223
1338
  this.context = null;
1224
1339
  this.page = null;
1225
1340
  }
1341
+ else if (this.context) {
1342
+ try {
1343
+ await this.context.close();
1344
+ }
1345
+ catch { /* ignore */ }
1346
+ this.context = null;
1347
+ this.page = null;
1348
+ }
1349
+ this.persistentContext = false;
1226
1350
  // Tear down Xvfb only after Chromium is fully gone — Chromium needs the
1227
1351
  // X display for its own teardown (releasing GL contexts, X resources).
1228
1352
  if (this.xvfb) {
@@ -1234,6 +1358,15 @@ export class Browser {
1234
1358
  }
1235
1359
  this.xvfb = null;
1236
1360
  }
1361
+ if (this.ownedChromiumProfileDir) {
1362
+ try {
1363
+ await rm(this.ownedChromiumProfileDir, { recursive: true, force: true });
1364
+ }
1365
+ catch (err) {
1366
+ logger.warn(`[capture] Cloud clip capture: failed to remove Chromium profile ${this.ownedChromiumProfileDir}: ${err.message}`);
1367
+ }
1368
+ this.ownedChromiumProfileDir = null;
1369
+ }
1237
1370
  }
1238
1371
  async navigateTo(url) {
1239
1372
  const page = this.ensurePage();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "autokap",
3
- "version": "1.3.27",
3
+ "version": "1.3.29",
4
4
  "description": "AI-powered CLI tool for capturing clean screenshots of websites",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",