donobu 5.34.0 → 5.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -92,6 +92,13 @@ export type LocateCacheEntryWithRunner = LocateCacheEntry & {
92
92
  };
93
93
  export type LocateCacheExecutor = (context: {
94
94
  page: DonobuExtendedPage;
95
+ /**
96
+ * Optional env mapping used to interpolate `{{$.env.X}}` placeholders that
97
+ * the AI may have embedded into `LocatorStep.text`/`name`/`testId` fields.
98
+ * Absent → steps run unchanged (backwards compatible with cache entries
99
+ * recorded before env-aware caching).
100
+ */
101
+ envData?: Record<string, string>;
95
102
  }) => Locator;
96
103
  /**
97
104
  * Builds a cache executor that mechanically reconstructs a Playwright
@@ -172,6 +172,6 @@ function buildAssertExecutor(steps) {
172
172
  * {@link Locator} from a cached {@link LocateResult}.
173
173
  */
174
174
  function buildLocateExecutor(result) {
175
- return ({ page }) => (0, buildLocator_1.buildLocator)(page, result);
175
+ return ({ page, envData }) => (0, buildLocator_1.buildLocator)(page, result, envData);
176
176
  }
177
177
  //# sourceMappingURL=assertCache.js.map
@@ -4,6 +4,10 @@ import type { LocateResult } from './locateTypes';
4
4
  * Mechanically construct a Playwright {@link Locator} from a structured
5
5
  * {@link LocateResult}. No `eval` or string parsing — every branch maps to a
6
6
  * direct Playwright API call.
7
+ *
8
+ * When `envData` is supplied, `{{$.env.X}}` placeholders inside `text`,
9
+ * `name`, and `testId` step fields are resolved against it before being
10
+ * applied. `selector` and `frames[]` are left untouched.
7
11
  */
8
- export declare function buildLocator(page: Page, result: LocateResult): Locator;
12
+ export declare function buildLocator(page: Page, result: LocateResult, envData?: Record<string, string>): Locator;
9
13
  //# sourceMappingURL=buildLocator.d.ts.map
@@ -1,12 +1,54 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.buildLocator = buildLocator;
4
+ const TemplateInterpolator_1 = require("../../../utils/TemplateInterpolator");
5
+ /**
6
+ * Resolves any `{{$.env.X}}` placeholders in a step field against the
7
+ * supplied env data. Returns the input verbatim when no env data is given,
8
+ * or when the field has no placeholder syntax — backwards compatible with
9
+ * cached entries that contain literal values only.
10
+ *
11
+ * Only applied to `text`, `name`, and `testId` step fields. `selector`
12
+ * (CSS/XPath) and `frames[]` entries are left literal because raw env
13
+ * values cannot be safely embedded into a CSS selector without escaping.
14
+ */
15
+ function resolveStepField(value, envData) {
16
+ if (!envData || !value.includes('{{')) {
17
+ return value;
18
+ }
19
+ return (0, TemplateInterpolator_1.interpolateString)(value, { env: envData, calls: [] });
20
+ }
21
+ /**
22
+ * Interpolate env placeholders, then optionally compile the result as a
23
+ * regex. Mirrors the order used by `buildAssertExecutor` so env-var × regex
24
+ * semantics stay consistent across cache executors.
25
+ *
26
+ * On `new RegExp(...)` failure (invalid pattern) the original string is
27
+ * returned, letting Playwright apply literal substring matching rather than
28
+ * throwing inside the cache replay path.
29
+ */
30
+ function resolveAndCompile(value, isRegex, envData) {
31
+ const resolved = resolveStepField(value, envData);
32
+ if (!isRegex) {
33
+ return resolved;
34
+ }
35
+ try {
36
+ return new RegExp(resolved);
37
+ }
38
+ catch {
39
+ return resolved;
40
+ }
41
+ }
4
42
  /**
5
43
  * Mechanically construct a Playwright {@link Locator} from a structured
6
44
  * {@link LocateResult}. No `eval` or string parsing — every branch maps to a
7
45
  * direct Playwright API call.
46
+ *
47
+ * When `envData` is supplied, `{{$.env.X}}` placeholders inside `text`,
48
+ * `name`, and `testId` step fields are resolved against it before being
49
+ * applied. `selector` and `frames[]` are left untouched.
8
50
  */
9
- function buildLocator(page, result) {
51
+ function buildLocator(page, result, envData) {
10
52
  // 1. Resolve frame chain (if any)
11
53
  let frameScope;
12
54
  if (result.frames && result.frames.length > 0) {
@@ -16,9 +58,9 @@ function buildLocator(page, result) {
16
58
  }
17
59
  // 2. Apply locator steps
18
60
  const base = frameScope ?? page;
19
- let locator = applyStep(base, result.steps[0]);
61
+ let locator = applyStep(base, result.steps[0], envData);
20
62
  for (let i = 1; i < result.steps.length; i++) {
21
- locator = applyStepToLocator(locator, result.steps[i]);
63
+ locator = applyStepToLocator(locator, result.steps[i], envData);
22
64
  }
23
65
  // 3. nth disambiguation
24
66
  if (result.nth !== undefined) {
@@ -39,34 +81,41 @@ function applyFrameStep(parent, step) {
39
81
  throw new Error(`Unknown frame method: ${step.method}`);
40
82
  }
41
83
  }
42
- function applyStep(base, step) {
43
- return applyStepTo(base, step);
84
+ function applyStep(base, step, envData) {
85
+ return applyStepTo(base, step, envData);
44
86
  }
45
- function applyStepToLocator(parent, step) {
46
- return applyStepTo(parent, step);
87
+ function applyStepToLocator(parent, step, envData) {
88
+ return applyStepTo(parent, step, envData);
47
89
  }
48
- function applyStepTo(parent, step) {
90
+ function applyStepTo(parent, step, envData) {
91
+ // `exact` and `*IsRegex` are mutually exclusive. If the AI emits both
92
+ // (shouldn't happen — the prompt forbids it), regex wins because passing
93
+ // `exact: true` with a `RegExp` matcher to Playwright is meaningless.
49
94
  const exactOpt = step.exact !== undefined ? { exact: step.exact } : undefined;
50
95
  switch (step.method) {
51
96
  case 'getByRole': {
52
97
  const roleOpts = {};
53
98
  if (step.name !== undefined) {
54
- roleOpts.name = step.name;
99
+ roleOpts.name = resolveAndCompile(step.name, step.nameIsRegex, envData);
55
100
  }
56
- if (step.exact !== undefined) {
101
+ if (step.exact !== undefined && !step.nameIsRegex) {
57
102
  roleOpts.exact = step.exact;
58
103
  }
59
104
  return parent.getByRole((step.role ?? 'generic'), Object.keys(roleOpts).length > 0 ? roleOpts : undefined);
60
105
  }
61
106
  case 'getByText':
62
- return parent.getByText(step.text ?? '', exactOpt);
107
+ return parent.getByText(resolveAndCompile(step.text ?? '', step.textIsRegex, envData), step.textIsRegex ? undefined : exactOpt);
63
108
  case 'getByLabel':
64
- return parent.getByLabel(step.text ?? '', exactOpt);
109
+ return parent.getByLabel(resolveAndCompile(step.text ?? '', step.textIsRegex, envData), step.textIsRegex ? undefined : exactOpt);
65
110
  case 'getByPlaceholder':
66
- return parent.getByPlaceholder(step.text ?? '', exactOpt);
111
+ return parent.getByPlaceholder(resolveAndCompile(step.text ?? '', step.textIsRegex, envData), step.textIsRegex ? undefined : exactOpt);
67
112
  case 'getByTestId':
68
- return parent.getByTestId(step.testId ?? '');
113
+ return parent.getByTestId(resolveStepField(step.testId ?? '', envData));
69
114
  case 'locator':
115
+ // `selector` is a raw CSS/XPath string — interpolating env values into
116
+ // it can produce invalid syntax silently. The locate prompt steers the
117
+ // AI toward semantic locators when env values are involved; cached
118
+ // selectors stay literal.
70
119
  return parent.locator(step.selector ?? '*');
71
120
  default:
72
121
  throw new Error(`Unknown locator method: ${step.method}`);
@@ -17,6 +17,7 @@ import type { LocateResult } from './locateTypes';
17
17
  */
18
18
  export declare function locateElement(page: Page, description: string, gptClient: GptClient, options?: {
19
19
  signal?: AbortSignal;
20
+ envData?: Record<string, string>;
20
21
  }): Promise<{
21
22
  locator: Locator;
22
23
  result: LocateResult;
@@ -4,6 +4,7 @@ exports.locateElement = locateElement;
4
4
  const v4_1 = require("zod/v4");
5
5
  const Logger_1 = require("../../../utils/Logger");
6
6
  const PlaywrightUtils_1 = require("../../../utils/PlaywrightUtils");
7
+ const TemplateInterpolator_1 = require("../../../utils/TemplateInterpolator");
7
8
  const buildLocator_1 = require("./buildLocator");
8
9
  const domSnapshot_1 = require("./domSnapshot");
9
10
  const LocateException_1 = require("./LocateException");
@@ -27,14 +28,15 @@ const SNIPPET_MAX_CHARS = 200;
27
28
  * callers can cache the result for deterministic replay.
28
29
  */
29
30
  async function locateElement(page, description, gptClient, options) {
31
+ const envData = options?.envData;
30
32
  const screenshot = await PlaywrightUtils_1.PlaywrightUtils.takeViewportScreenshot(page);
31
33
  const domSnapshot = await (0, domSnapshot_1.captureDomSnapshot)(page);
32
34
  Logger_1.appLogger.debug(`locate: DOM snapshot captured (${domSnapshot.html.length} chars, ${domSnapshot.omittedCount} nodes omitted)`);
33
- const systemMessage = buildSystemMessage(page.url(), await page.title());
35
+ const systemMessage = buildSystemMessage(page.url(), await page.title(), description, envData);
34
36
  const userMessage = buildUserMessage(description, screenshot, domSnapshot.html);
35
37
  // First attempt
36
38
  const firstResult = await callLlm(gptClient, systemMessage, userMessage, options?.signal);
37
- const firstLocator = (0, buildLocator_1.buildLocator)(page, firstResult);
39
+ const firstLocator = (0, buildLocator_1.buildLocator)(page, firstResult, envData);
38
40
  const firstCount = await safeCount(firstLocator);
39
41
  Logger_1.appLogger.debug(`locate: first attempt matched ${firstCount} element(s)`);
40
42
  if (firstCount === 1) {
@@ -42,7 +44,7 @@ async function locateElement(page, description, gptClient, options) {
42
44
  }
43
45
  // Disambiguation: small number of matches — show snippets and let LLM pick
44
46
  if (firstCount > 1 && firstCount <= DISAMBIGUATE_THRESHOLD) {
45
- return await disambiguate(page, description, gptClient, firstLocator, firstResult, firstCount, options?.signal);
47
+ return await disambiguate(page, description, gptClient, firstLocator, firstResult, firstCount, envData, options?.signal);
46
48
  }
47
49
  // Retry: zero matches or too many
48
50
  const previousAttempt = summarizeLocateResult(firstResult);
@@ -58,14 +60,14 @@ async function locateElement(page, description, gptClient, options) {
58
60
  : `Your locator matched ${firstCount} elements, which is too many to disambiguate. Your previous attempt was: ${previousAttempt}. Write a more specific locator.`;
59
61
  const retryMessage = buildRetryMessage(description, feedback, screenshot, retryDomHtml);
60
62
  const retryResult = await callLlm(gptClient, systemMessage, retryMessage, options?.signal);
61
- const retryLocator = (0, buildLocator_1.buildLocator)(page, retryResult);
63
+ const retryLocator = (0, buildLocator_1.buildLocator)(page, retryResult, envData);
62
64
  const retryCount = await safeCount(retryLocator);
63
65
  Logger_1.appLogger.debug(`locate: retry matched ${retryCount} element(s)`);
64
66
  if (retryCount === 1) {
65
67
  return { locator: retryLocator, result: retryResult };
66
68
  }
67
69
  if (retryCount > 1 && retryCount <= DISAMBIGUATE_THRESHOLD) {
68
- return await disambiguate(page, description, gptClient, retryLocator, retryResult, retryCount, options?.signal);
70
+ return await disambiguate(page, description, gptClient, retryLocator, retryResult, retryCount, envData, options?.signal);
69
71
  }
70
72
  // Give up
71
73
  const reason = retryCount === 0 ? 'no_matches' : 'too_many_matches';
@@ -77,7 +79,7 @@ async function locateElement(page, description, gptClient, options) {
77
79
  * Show HTML snippets of each match to the LLM and ask it to pick the
78
80
  * correct one. Returns the original locator with `.nth(n)` appended.
79
81
  */
80
- async function disambiguate(page, description, gptClient, locator, locateResult, count, signal) {
82
+ async function disambiguate(page, description, gptClient, locator, locateResult, count, envData, signal) {
81
83
  const snippets = [];
82
84
  for (let i = 0; i < count; i++) {
83
85
  const nth = locator.nth(i);
@@ -111,6 +113,12 @@ async function disambiguate(page, description, gptClient, locator, locateResult,
111
113
  .max(count - 1)
112
114
  .describe('Zero-based index of the element that best matches the description.'),
113
115
  });
116
+ // Disambiguation output is just an index — never cached and never fed back
117
+ // through `buildLocator`. Show the LLM the resolved description so it can
118
+ // match candidate HTML directly without doing mental env-var substitution.
119
+ const resolvedDescription = envData && description.includes('{{')
120
+ ? (0, TemplateInterpolator_1.interpolateString)(description, { env: envData, calls: [] })
121
+ : description;
114
122
  const systemMsg = {
115
123
  type: 'system',
116
124
  text: `You are resolving an ambiguous element lookup. The user described an element and your locator matched ${count} candidates. Choose the one that best matches the description.`,
@@ -120,7 +128,7 @@ async function disambiguate(page, description, gptClient, locator, locateResult,
120
128
  items: [
121
129
  {
122
130
  type: 'text',
123
- text: `Description: "${description}"\n\nCandidates:\n${snippetText}\n\nReturn the index of the best match.`,
131
+ text: `Description: "${resolvedDescription}"\n\nCandidates:\n${snippetText}\n\nReturn the index of the best match.`,
124
132
  },
125
133
  ],
126
134
  };
@@ -131,7 +139,7 @@ async function disambiguate(page, description, gptClient, locator, locateResult,
131
139
  nth: resp.output.index,
132
140
  };
133
141
  return {
134
- locator: (0, buildLocator_1.buildLocator)(page, disambiguatedResult),
142
+ locator: (0, buildLocator_1.buildLocator)(page, disambiguatedResult, envData),
135
143
  result: disambiguatedResult,
136
144
  };
137
145
  }
@@ -139,7 +147,54 @@ async function callLlm(gptClient, systemMessage, userMessage, signal) {
139
147
  const resp = await gptClient.getStructuredOutput([systemMessage, userMessage], locateSchema_1.LocateResultSchema, { signal });
140
148
  return resp.output;
141
149
  }
142
- function buildSystemMessage(pageUrl, pageTitle) {
150
+ function buildSystemMessage(pageUrl, pageTitle, description, envData) {
151
+ // Only annotate the prompt with env-var guidance when the raw description
152
+ // actually references at least one provided env var. Keeps the prompt small
153
+ // for the common case.
154
+ const envEntries = Object.entries(envData ?? {});
155
+ const referencedEnvEntries = envEntries.filter(([name]) => description.includes(`{{$.env.${name}}}`));
156
+ const envBlock = referencedEnvEntries.length > 0
157
+ ? `
158
+
159
+ The user's description contains environment variable references using the syntax
160
+ \`{{$.env.NAME}}\`. To keep cached locators valid across runs with different env
161
+ values, you MUST emit those same placeholders in any LocatorStep \`text\`,
162
+ \`name\`, or \`testId\` field whose contents come from an env var. Do NOT bake
163
+ the literal current value into the step.
164
+
165
+ Original (uninterpolated) description: "${description}"
166
+
167
+ Current env mapping (use these to identify which substrings on the page came
168
+ from which env var, then emit the placeholder rather than the literal):
169
+ ${referencedEnvEntries.map(([name, value]) => ` - {{$.env.${name}}} = ${JSON.stringify(value)}`).join('\n')}
170
+
171
+ Hard rules for env-var emission:
172
+ - Use placeholders ONLY in \`text\`, \`name\`, or \`testId\` fields.
173
+ - NEVER emit \`{{$.env.*}}\` inside \`selector\` (CSS/XPath) — interpolating
174
+ raw values into a CSS selector can produce invalid syntax. Use a semantic
175
+ locator (getByRole/getByText/getByLabel/getByPlaceholder/getByTestId)
176
+ instead when an env-derived value is involved.
177
+ - NEVER emit \`{{$.env.*}}\` inside any \`frames[]\` entry (iframe selectors
178
+ or iframe \`name\` attributes are not env-driven).
179
+
180
+ Examples:
181
+ - Description "The user row for {{$.env.TEST_EMAIL}}", TEST_EMAIL="alice@x.com",
182
+ page text shows "alice@x.com" →
183
+ [{ method: "getByText", text: "{{$.env.TEST_EMAIL}}" }]
184
+ - Description "The {{$.env.PROJECT_NAME}} tab", PROJECT_NAME="Apollo" →
185
+ [{ method: "getByRole", role: "tab", name: "{{$.env.PROJECT_NAME}}" }]
186
+ - Description "The submit button" (no env vars referenced) → emit literal text
187
+ as you normally would.
188
+
189
+ Combining env vars with regex: env interpolation runs BEFORE regex compilation,
190
+ so you can mix them. Prefer this when the env value should be matched alongside
191
+ dynamic page content. Example — description "The row for {{$.env.USER}} with
192
+ their score", USER="alice" →
193
+ [{ method: "getByText", text: "alice — \\\\d+ pts", textIsRegex: true }]
194
+ (Here the AI substituted the env value because it's part of a regex pattern;
195
+ the placeholder syntax also works — \`text: "{{$.env.USER}} — \\\\d+ pts"\` —
196
+ and is preferred when you want cache stability across env value changes.)`
197
+ : '';
143
198
  return {
144
199
  type: 'system',
145
200
  text: `You are a Playwright locator expert. Given a viewport screenshot and a pruned DOM snapshot of a webpage, return a structured locator that targets the element matching the user's description.
@@ -151,8 +206,50 @@ Rules:
151
206
  - If the element is inside an iframe, specify the frame(s) in the "frames" field.
152
207
  - Do NOT set "nth" unless you are certain the chain matches multiple elements and you know which index is correct. When unsure, omit it — the system will handle disambiguation.
153
208
 
209
+ Stability rules — locators are CACHED and replayed across runs. The page may
210
+ change between runs (vote counts increment, "3 hours ago" becomes "5 hours ago",
211
+ new posts shift positions, prices fluctuate). Choose locators that survive these
212
+ drifts:
213
+
214
+ - POSITIONAL DESCRIPTIONS: when the description references position ("first",
215
+ "third", "fourth from the top", "last"), translate that into a structural
216
+ chain plus \`nth\` rather than baking position-specific page text into a step.
217
+ Example — "the fourth comments link" should be a locator over ALL comment
218
+ links with \`nth: 3\`, not the literal "36 comments" you happen to see today.
219
+
220
+ - DYNAMIC TEXT: if the value you would put into \`name\` or \`text\` looks
221
+ dynamic — contains digits, timestamps, "X ago", "$X.XX", counts, scores,
222
+ vote totals — emit a regex pattern via \`nameIsRegex: true\` (for getByRole)
223
+ or \`textIsRegex: true\` (for getByText/getByLabel/getByPlaceholder) instead
224
+ of the literal value. Anchor the pattern with \`^\` / \`$\` when the whole
225
+ string should match, otherwise it acts as a substring match.
226
+
227
+ - DO NOT combine \`exact: true\` with \`nameIsRegex\`/\`textIsRegex\`. They are
228
+ mutually exclusive — set \`exact\` only for literal-string steps with stable
229
+ fixed labels like "Submit" or "Sign In".
230
+
231
+ - SAFE LITERALS: keep literal values for genuinely stable strings — fixed UI
232
+ labels, button text like "Submit"/"Cancel", section headings, unique
233
+ test-ids. Only escape to regex when stability is at risk.
234
+
235
+ Examples:
236
+ - "The fourth comments link" →
237
+ steps: [{ method: "getByRole", role: "link", name: "\\\\d+\\\\s+comments?$", nameIsRegex: true }]
238
+ nth: 3
239
+ - "The headline of the third story" → structural row selector + nth: 2 (literal name)
240
+ - "The submit button" → literal name: "Submit", optionally exact: true
241
+ - "The price tag for the cart total" →
242
+ steps: [{ method: "getByText", text: "\\\\$\\\\d+(\\\\.\\\\d+)?", textIsRegex: true }]
243
+ - "The 'posted 5 hours ago' label" →
244
+ steps: [{ method: "getByText", text: "posted \\\\d+ (minute|hour|day)s? ago", textIsRegex: true }]
245
+
246
+ Regex format: emit a JS-style regex source string (no leading/trailing slash,
247
+ no flags). Backslashes inside JSON must be doubled (\`\\\\d+\` not \`\\d+\`).
248
+ Invalid patterns silently fall back to literal matching, so prefer simple,
249
+ well-tested patterns.
250
+
154
251
  Page URL: ${pageUrl}
155
- Page title: ${pageTitle}`,
252
+ Page title: ${pageTitle}${envBlock}`,
156
253
  };
157
254
  }
158
255
  function buildUserMessage(description, screenshot, domHtml) {
@@ -27,6 +27,8 @@ export declare const LocateResultSchema: z.ZodObject<{
27
27
  testId: z.ZodOptional<z.ZodString>;
28
28
  selector: z.ZodOptional<z.ZodString>;
29
29
  exact: z.ZodOptional<z.ZodBoolean>;
30
+ nameIsRegex: z.ZodOptional<z.ZodBoolean>;
31
+ textIsRegex: z.ZodOptional<z.ZodBoolean>;
30
32
  }, z.core.$strip>>;
31
33
  nth: z.ZodOptional<z.ZodNumber>;
32
34
  }, z.core.$strip>;
@@ -46,7 +46,15 @@ const LocatorStepSchema = v4_1.z
46
46
  exact: v4_1.z
47
47
  .boolean()
48
48
  .optional()
49
- .describe('Whether text/name matching should be exact. Applies to getByRole (name), getByText, getByLabel, getByPlaceholder.'),
49
+ .describe('Whether text/name matching should be exact. Applies to getByRole (name), getByText, getByLabel, getByPlaceholder. Mutually exclusive with nameIsRegex / textIsRegex.'),
50
+ nameIsRegex: v4_1.z
51
+ .boolean()
52
+ .optional()
53
+ .describe('Set true when "name" is a regex pattern (compiled via new RegExp(name)). Use this for dynamic accessible names — e.g. "\\d+ comments" matches any "N comments" link. Used with getByRole. Do not combine with exact:true.'),
54
+ textIsRegex: v4_1.z
55
+ .boolean()
56
+ .optional()
57
+ .describe('Set true when "text" is a regex pattern (compiled via new RegExp(text)). Use this for dynamic page text — counts, dates, prices, "X ago" timestamps. Used with getByText / getByLabel / getByPlaceholder. Do not combine with exact:true.'),
50
58
  })
51
59
  .describe('A single Playwright locator step.');
52
60
  const FrameStepSchema = v4_1.z
@@ -20,6 +20,24 @@ export type LocatorStep = {
20
20
  selector?: string;
21
21
  /** Whether text/name matching should be exact. */
22
22
  exact?: boolean;
23
+ /**
24
+ * When true, `name` is treated as a regex pattern compiled via
25
+ * `new RegExp(name)` rather than a literal string. Mutually exclusive
26
+ * with `exact: true`. Used with `getByRole`.
27
+ *
28
+ * Env-var placeholders are interpolated **before** regex compilation, so
29
+ * `'\\d+ {{$.env.NOUN}}'` with `NOUN='comments'` compiles as
30
+ * `/\d+ comments/`.
31
+ */
32
+ nameIsRegex?: boolean;
33
+ /**
34
+ * When true, `text` is treated as a regex pattern compiled via
35
+ * `new RegExp(text)` rather than a literal string. Mutually exclusive with
36
+ * `exact: true`. Used with `getByText`, `getByLabel`, `getByPlaceholder`.
37
+ *
38
+ * Env-var placeholders are interpolated **before** regex compilation.
39
+ */
40
+ textIsRegex?: boolean;
23
41
  };
24
42
  /**
25
43
  * Identifies an iframe to scope into before applying {@link LocatorStep}s.
@@ -49,9 +67,48 @@ export type LocateResult = {
49
67
  */
50
68
  export type LocateOptions = {
51
69
  gptClient?: GptClient | Exclude<LanguageModel, string>;
52
- /** Timeout in milliseconds for the entire locate operation (default: 30 000). */
70
+ /**
71
+ * Timeout in milliseconds for the entire locate operation (default: 30 000).
72
+ *
73
+ * On cache hit this budgets the hydration patience window — the cached
74
+ * locator gets up to this long to attach to a matching element before the
75
+ * cache is treated as stale and the AI is re-run. On cache miss (or
76
+ * stale-cache fallthrough) this budgets the AI call. Whatever the cache
77
+ * path consumes is deducted from the AI path's remaining budget; the total
78
+ * never exceeds `timeout`.
79
+ */
53
80
  timeout?: number;
54
- /** Whether to use the on-disk cache. Defaults to true. */
81
+ /**
82
+ * Whether to use the on-disk cache. Defaults to true.
83
+ *
84
+ * Cached `LocateResult` step fields preserve `{{$.env.*}}` placeholders for
85
+ * any value that came from an env var, so changing an env value between
86
+ * runs replays the same cached locator with the new value rather than
87
+ * re-invoking the AI.
88
+ */
55
89
  cache?: boolean;
90
+ /**
91
+ * Explicit environment variable names (in addition to the heuristically
92
+ * derived ones) that the description may read via `{{$.env.*}}`
93
+ * interpolations.
94
+ */
95
+ envVars?: string[];
96
+ /**
97
+ * Explicitly supply environment variable values that amend (or override)
98
+ * the environment observed by this `page.ai.locate` call. Keys are merged
99
+ * with any names derived from {@link LocateOptions.envVars} and from
100
+ * `{{$.env.*}}` interpolations in the description.
101
+ *
102
+ * - A `string` value sets or overrides the variable for this invocation.
103
+ * - An `undefined` value *removes* the variable, even if it would
104
+ * otherwise be resolved from persistence.
105
+ *
106
+ * Only the **names** (keys) influence cache lookup; changing a value
107
+ * replays the cached locator with the new value via `{{$.env.*}}`
108
+ * placeholder substitution rather than busting the cache. If a referenced
109
+ * env var is absent at replay, the placeholder is left literal — the
110
+ * locator will then match zero elements and fail loudly.
111
+ */
112
+ envVals?: Record<string, string | undefined>;
56
113
  };
57
114
  //# sourceMappingURL=locateTypes.d.ts.map
@@ -408,33 +408,86 @@ Use this information to return an appropriate JSON object.`,
408
408
  const useCache = options?.cache !== false;
409
409
  const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
410
410
  const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
411
- // --- Cache lookup (when enabled and not clearing) ---
412
- if (useCache && !clearCache) {
413
- const cache = getOrInitPageAiCache();
414
- const cached = await cache.getLocate({ pageUrl, description });
415
- if (cached) {
416
- Logger_1.appLogger.debug(`Locate cache HIT for: "${description}" — rebuilding locator from cache`);
417
- return cached.run({ page });
411
+ // Distill env var names referenced by the description plus any
412
+ // explicitly provided names/overrides. Resolve env data locally — locate
413
+ // does not flow through `runTool`, so we don't mutate sharedState here.
414
+ const envVarNames = (0, DonobuFlowsManager_1.distillAllowedEnvVariableNames)(description, [
415
+ ...(options?.envVars ?? []),
416
+ ...Object.keys(options?.envVals ?? {}),
417
+ ]);
418
+ const hasEnvRefs = envVarNames.length > 0;
419
+ const resolveEnvData = async () => {
420
+ if (!hasEnvRefs) {
421
+ return undefined;
418
422
  }
419
- }
420
- // --- Cache invalidation (when clearing) ---
421
- if (useCache && clearCache) {
422
- const cache = getOrInitPageAiCache();
423
- await cache.deleteLocate({ pageUrl, description });
424
- Logger_1.appLogger.debug(`Locate cache invalidated for: "${description}"`);
425
- }
426
- // --- Cache miss or cache disabled: run AI locate ---
427
- const gptClient = getGptClient(page, options?.gptClient);
428
- if (!gptClient) {
429
- throw new ToolRequiresGptException_1.ToolRequiresGptException('locate');
430
- }
423
+ const envData = await sharedState.donobuStack.envDataManager.getByNames(envVarNames);
424
+ if (options?.envVals) {
425
+ for (const [k, v] of Object.entries(options.envVals)) {
426
+ if (v === undefined) {
427
+ delete envData[k];
428
+ }
429
+ else {
430
+ envData[k] = v;
431
+ }
432
+ }
433
+ }
434
+ return envData;
435
+ };
436
+ // The user-supplied `timeout` (default 30s) is the budget for the
437
+ // ENTIRE locate operation — cache-hit hydration wait + AI fallback.
438
+ // We start the abort timer here so the cache path's `waitFor` and the
439
+ // AI path share one bounded clock.
431
440
  const timeoutMillis = options?.timeout ?? 30_000;
441
+ const startedAt = Date.now();
432
442
  const abortController = new AbortController();
433
443
  const timeoutId = setTimeout(() => {
434
444
  abortController.abort(`Locate operation timed out after ${timeoutMillis} milliseconds`);
435
445
  }, timeoutMillis);
436
446
  try {
437
- const { locator, result } = await (0, locateElement_1.locateElement)(page, description, gptClient, { signal: abortController.signal });
447
+ // --- Cache lookup (when enabled and not clearing) ---
448
+ if (useCache && !clearCache) {
449
+ const cache = getOrInitPageAiCache();
450
+ const cached = await cache.getLocate({ pageUrl, description });
451
+ if (cached) {
452
+ const envData = await resolveEnvData();
453
+ const candidate = cached.run({ page, envData });
454
+ // Cache replay can outrun page hydration — the no-cache path
455
+ // gets an implicit hydration window from the AI round-trip
456
+ // latency, but a cache hit fires immediately and may see a
457
+ // partially-mounted DOM. Wait (within the operation's overall
458
+ // budget) for the locator to attach before validating.
459
+ const remaining = Math.max(timeoutMillis - (Date.now() - startedAt), 100);
460
+ try {
461
+ await candidate.first().waitFor({
462
+ state: 'attached',
463
+ timeout: remaining,
464
+ });
465
+ Logger_1.appLogger.debug(`Locate cache HIT for: "${description}" — rebuilt locator from cache`);
466
+ return candidate;
467
+ }
468
+ catch {
469
+ // Locator did not attach within the patience window. Either
470
+ // the page has drifted or the cache is genuinely stale.
471
+ // Invalidate and fall through to the AI path; the AI call
472
+ // gets whatever budget remains on the abort timer.
473
+ Logger_1.appLogger.debug(`Locate cache STALE for "${description}" (no match within ${remaining}ms) — re-running AI`);
474
+ await cache.deleteLocate({ pageUrl, description });
475
+ }
476
+ }
477
+ }
478
+ // --- Cache invalidation (when clearing) ---
479
+ if (useCache && clearCache) {
480
+ const cache = getOrInitPageAiCache();
481
+ await cache.deleteLocate({ pageUrl, description });
482
+ Logger_1.appLogger.debug(`Locate cache invalidated for: "${description}"`);
483
+ }
484
+ // --- Cache miss / cache disabled / stale-cache fallthrough: run AI ---
485
+ const gptClient = getGptClient(page, options?.gptClient);
486
+ if (!gptClient) {
487
+ throw new ToolRequiresGptException_1.ToolRequiresGptException('locate');
488
+ }
489
+ const envData = await resolveEnvData();
490
+ const { locator, result } = await (0, locateElement_1.locateElement)(page, description, gptClient, { signal: abortController.signal, envData });
438
491
  // --- Cache the result for future runs ---
439
492
  if (useCache) {
440
493
  try {
@@ -92,6 +92,13 @@ export type LocateCacheEntryWithRunner = LocateCacheEntry & {
92
92
  };
93
93
  export type LocateCacheExecutor = (context: {
94
94
  page: DonobuExtendedPage;
95
+ /**
96
+ * Optional env mapping used to interpolate `{{$.env.X}}` placeholders that
97
+ * the AI may have embedded into `LocatorStep.text`/`name`/`testId` fields.
98
+ * Absent → steps run unchanged (backwards compatible with cache entries
99
+ * recorded before env-aware caching).
100
+ */
101
+ envData?: Record<string, string>;
95
102
  }) => Locator;
96
103
  /**
97
104
  * Builds a cache executor that mechanically reconstructs a Playwright
@@ -172,6 +172,6 @@ function buildAssertExecutor(steps) {
172
172
  * {@link Locator} from a cached {@link LocateResult}.
173
173
  */
174
174
  function buildLocateExecutor(result) {
175
- return ({ page }) => (0, buildLocator_1.buildLocator)(page, result);
175
+ return ({ page, envData }) => (0, buildLocator_1.buildLocator)(page, result, envData);
176
176
  }
177
177
  //# sourceMappingURL=assertCache.js.map
@@ -4,6 +4,10 @@ import type { LocateResult } from './locateTypes';
4
4
  * Mechanically construct a Playwright {@link Locator} from a structured
5
5
  * {@link LocateResult}. No `eval` or string parsing — every branch maps to a
6
6
  * direct Playwright API call.
7
+ *
8
+ * When `envData` is supplied, `{{$.env.X}}` placeholders inside `text`,
9
+ * `name`, and `testId` step fields are resolved against it before being
10
+ * applied. `selector` and `frames[]` are left untouched.
7
11
  */
8
- export declare function buildLocator(page: Page, result: LocateResult): Locator;
12
+ export declare function buildLocator(page: Page, result: LocateResult, envData?: Record<string, string>): Locator;
9
13
  //# sourceMappingURL=buildLocator.d.ts.map
@@ -1,12 +1,54 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.buildLocator = buildLocator;
4
+ const TemplateInterpolator_1 = require("../../../utils/TemplateInterpolator");
5
+ /**
6
+ * Resolves any `{{$.env.X}}` placeholders in a step field against the
7
+ * supplied env data. Returns the input verbatim when no env data is given,
8
+ * or when the field has no placeholder syntax — backwards compatible with
9
+ * cached entries that contain literal values only.
10
+ *
11
+ * Only applied to `text`, `name`, and `testId` step fields. `selector`
12
+ * (CSS/XPath) and `frames[]` entries are left literal because raw env
13
+ * values cannot be safely embedded into a CSS selector without escaping.
14
+ */
15
+ function resolveStepField(value, envData) {
16
+ if (!envData || !value.includes('{{')) {
17
+ return value;
18
+ }
19
+ return (0, TemplateInterpolator_1.interpolateString)(value, { env: envData, calls: [] });
20
+ }
21
+ /**
22
+ * Interpolate env placeholders, then optionally compile the result as a
23
+ * regex. Mirrors the order used by `buildAssertExecutor` so env-var × regex
24
+ * semantics stay consistent across cache executors.
25
+ *
26
+ * On `new RegExp(...)` failure (invalid pattern) the original string is
27
+ * returned, letting Playwright apply literal substring matching rather than
28
+ * throwing inside the cache replay path.
29
+ */
30
+ function resolveAndCompile(value, isRegex, envData) {
31
+ const resolved = resolveStepField(value, envData);
32
+ if (!isRegex) {
33
+ return resolved;
34
+ }
35
+ try {
36
+ return new RegExp(resolved);
37
+ }
38
+ catch {
39
+ return resolved;
40
+ }
41
+ }
4
42
  /**
5
43
  * Mechanically construct a Playwright {@link Locator} from a structured
6
44
  * {@link LocateResult}. No `eval` or string parsing — every branch maps to a
7
45
  * direct Playwright API call.
46
+ *
47
+ * When `envData` is supplied, `{{$.env.X}}` placeholders inside `text`,
48
+ * `name`, and `testId` step fields are resolved against it before being
49
+ * applied. `selector` and `frames[]` are left untouched.
8
50
  */
9
- function buildLocator(page, result) {
51
+ function buildLocator(page, result, envData) {
10
52
  // 1. Resolve frame chain (if any)
11
53
  let frameScope;
12
54
  if (result.frames && result.frames.length > 0) {
@@ -16,9 +58,9 @@ function buildLocator(page, result) {
16
58
  }
17
59
  // 2. Apply locator steps
18
60
  const base = frameScope ?? page;
19
- let locator = applyStep(base, result.steps[0]);
61
+ let locator = applyStep(base, result.steps[0], envData);
20
62
  for (let i = 1; i < result.steps.length; i++) {
21
- locator = applyStepToLocator(locator, result.steps[i]);
63
+ locator = applyStepToLocator(locator, result.steps[i], envData);
22
64
  }
23
65
  // 3. nth disambiguation
24
66
  if (result.nth !== undefined) {
@@ -39,34 +81,41 @@ function applyFrameStep(parent, step) {
39
81
  throw new Error(`Unknown frame method: ${step.method}`);
40
82
  }
41
83
  }
42
- function applyStep(base, step) {
43
- return applyStepTo(base, step);
84
+ function applyStep(base, step, envData) {
85
+ return applyStepTo(base, step, envData);
44
86
  }
45
- function applyStepToLocator(parent, step) {
46
- return applyStepTo(parent, step);
87
+ function applyStepToLocator(parent, step, envData) {
88
+ return applyStepTo(parent, step, envData);
47
89
  }
48
- function applyStepTo(parent, step) {
90
+ function applyStepTo(parent, step, envData) {
91
+ // `exact` and `*IsRegex` are mutually exclusive. If the AI emits both
92
+ // (shouldn't happen — the prompt forbids it), regex wins because passing
93
+ // `exact: true` with a `RegExp` matcher to Playwright is meaningless.
49
94
  const exactOpt = step.exact !== undefined ? { exact: step.exact } : undefined;
50
95
  switch (step.method) {
51
96
  case 'getByRole': {
52
97
  const roleOpts = {};
53
98
  if (step.name !== undefined) {
54
- roleOpts.name = step.name;
99
+ roleOpts.name = resolveAndCompile(step.name, step.nameIsRegex, envData);
55
100
  }
56
- if (step.exact !== undefined) {
101
+ if (step.exact !== undefined && !step.nameIsRegex) {
57
102
  roleOpts.exact = step.exact;
58
103
  }
59
104
  return parent.getByRole((step.role ?? 'generic'), Object.keys(roleOpts).length > 0 ? roleOpts : undefined);
60
105
  }
61
106
  case 'getByText':
62
- return parent.getByText(step.text ?? '', exactOpt);
107
+ return parent.getByText(resolveAndCompile(step.text ?? '', step.textIsRegex, envData), step.textIsRegex ? undefined : exactOpt);
63
108
  case 'getByLabel':
64
- return parent.getByLabel(step.text ?? '', exactOpt);
109
+ return parent.getByLabel(resolveAndCompile(step.text ?? '', step.textIsRegex, envData), step.textIsRegex ? undefined : exactOpt);
65
110
  case 'getByPlaceholder':
66
- return parent.getByPlaceholder(step.text ?? '', exactOpt);
111
+ return parent.getByPlaceholder(resolveAndCompile(step.text ?? '', step.textIsRegex, envData), step.textIsRegex ? undefined : exactOpt);
67
112
  case 'getByTestId':
68
- return parent.getByTestId(step.testId ?? '');
113
+ return parent.getByTestId(resolveStepField(step.testId ?? '', envData));
69
114
  case 'locator':
115
+ // `selector` is a raw CSS/XPath string — interpolating env values into
116
+ // it can produce invalid syntax silently. The locate prompt steers the
117
+ // AI toward semantic locators when env values are involved; cached
118
+ // selectors stay literal.
70
119
  return parent.locator(step.selector ?? '*');
71
120
  default:
72
121
  throw new Error(`Unknown locator method: ${step.method}`);
@@ -17,6 +17,7 @@ import type { LocateResult } from './locateTypes';
17
17
  */
18
18
  export declare function locateElement(page: Page, description: string, gptClient: GptClient, options?: {
19
19
  signal?: AbortSignal;
20
+ envData?: Record<string, string>;
20
21
  }): Promise<{
21
22
  locator: Locator;
22
23
  result: LocateResult;
@@ -4,6 +4,7 @@ exports.locateElement = locateElement;
4
4
  const v4_1 = require("zod/v4");
5
5
  const Logger_1 = require("../../../utils/Logger");
6
6
  const PlaywrightUtils_1 = require("../../../utils/PlaywrightUtils");
7
+ const TemplateInterpolator_1 = require("../../../utils/TemplateInterpolator");
7
8
  const buildLocator_1 = require("./buildLocator");
8
9
  const domSnapshot_1 = require("./domSnapshot");
9
10
  const LocateException_1 = require("./LocateException");
@@ -27,14 +28,15 @@ const SNIPPET_MAX_CHARS = 200;
27
28
  * callers can cache the result for deterministic replay.
28
29
  */
29
30
  async function locateElement(page, description, gptClient, options) {
31
+ const envData = options?.envData;
30
32
  const screenshot = await PlaywrightUtils_1.PlaywrightUtils.takeViewportScreenshot(page);
31
33
  const domSnapshot = await (0, domSnapshot_1.captureDomSnapshot)(page);
32
34
  Logger_1.appLogger.debug(`locate: DOM snapshot captured (${domSnapshot.html.length} chars, ${domSnapshot.omittedCount} nodes omitted)`);
33
- const systemMessage = buildSystemMessage(page.url(), await page.title());
35
+ const systemMessage = buildSystemMessage(page.url(), await page.title(), description, envData);
34
36
  const userMessage = buildUserMessage(description, screenshot, domSnapshot.html);
35
37
  // First attempt
36
38
  const firstResult = await callLlm(gptClient, systemMessage, userMessage, options?.signal);
37
- const firstLocator = (0, buildLocator_1.buildLocator)(page, firstResult);
39
+ const firstLocator = (0, buildLocator_1.buildLocator)(page, firstResult, envData);
38
40
  const firstCount = await safeCount(firstLocator);
39
41
  Logger_1.appLogger.debug(`locate: first attempt matched ${firstCount} element(s)`);
40
42
  if (firstCount === 1) {
@@ -42,7 +44,7 @@ async function locateElement(page, description, gptClient, options) {
42
44
  }
43
45
  // Disambiguation: small number of matches — show snippets and let LLM pick
44
46
  if (firstCount > 1 && firstCount <= DISAMBIGUATE_THRESHOLD) {
45
- return await disambiguate(page, description, gptClient, firstLocator, firstResult, firstCount, options?.signal);
47
+ return await disambiguate(page, description, gptClient, firstLocator, firstResult, firstCount, envData, options?.signal);
46
48
  }
47
49
  // Retry: zero matches or too many
48
50
  const previousAttempt = summarizeLocateResult(firstResult);
@@ -58,14 +60,14 @@ async function locateElement(page, description, gptClient, options) {
58
60
  : `Your locator matched ${firstCount} elements, which is too many to disambiguate. Your previous attempt was: ${previousAttempt}. Write a more specific locator.`;
59
61
  const retryMessage = buildRetryMessage(description, feedback, screenshot, retryDomHtml);
60
62
  const retryResult = await callLlm(gptClient, systemMessage, retryMessage, options?.signal);
61
- const retryLocator = (0, buildLocator_1.buildLocator)(page, retryResult);
63
+ const retryLocator = (0, buildLocator_1.buildLocator)(page, retryResult, envData);
62
64
  const retryCount = await safeCount(retryLocator);
63
65
  Logger_1.appLogger.debug(`locate: retry matched ${retryCount} element(s)`);
64
66
  if (retryCount === 1) {
65
67
  return { locator: retryLocator, result: retryResult };
66
68
  }
67
69
  if (retryCount > 1 && retryCount <= DISAMBIGUATE_THRESHOLD) {
68
- return await disambiguate(page, description, gptClient, retryLocator, retryResult, retryCount, options?.signal);
70
+ return await disambiguate(page, description, gptClient, retryLocator, retryResult, retryCount, envData, options?.signal);
69
71
  }
70
72
  // Give up
71
73
  const reason = retryCount === 0 ? 'no_matches' : 'too_many_matches';
@@ -77,7 +79,7 @@ async function locateElement(page, description, gptClient, options) {
77
79
  * Show HTML snippets of each match to the LLM and ask it to pick the
78
80
  * correct one. Returns the original locator with `.nth(n)` appended.
79
81
  */
80
- async function disambiguate(page, description, gptClient, locator, locateResult, count, signal) {
82
+ async function disambiguate(page, description, gptClient, locator, locateResult, count, envData, signal) {
81
83
  const snippets = [];
82
84
  for (let i = 0; i < count; i++) {
83
85
  const nth = locator.nth(i);
@@ -111,6 +113,12 @@ async function disambiguate(page, description, gptClient, locator, locateResult,
111
113
  .max(count - 1)
112
114
  .describe('Zero-based index of the element that best matches the description.'),
113
115
  });
116
+ // Disambiguation output is just an index — never cached and never fed back
117
+ // through `buildLocator`. Show the LLM the resolved description so it can
118
+ // match candidate HTML directly without doing mental env-var substitution.
119
+ const resolvedDescription = envData && description.includes('{{')
120
+ ? (0, TemplateInterpolator_1.interpolateString)(description, { env: envData, calls: [] })
121
+ : description;
114
122
  const systemMsg = {
115
123
  type: 'system',
116
124
  text: `You are resolving an ambiguous element lookup. The user described an element and your locator matched ${count} candidates. Choose the one that best matches the description.`,
@@ -120,7 +128,7 @@ async function disambiguate(page, description, gptClient, locator, locateResult,
120
128
  items: [
121
129
  {
122
130
  type: 'text',
123
- text: `Description: "${description}"\n\nCandidates:\n${snippetText}\n\nReturn the index of the best match.`,
131
+ text: `Description: "${resolvedDescription}"\n\nCandidates:\n${snippetText}\n\nReturn the index of the best match.`,
124
132
  },
125
133
  ],
126
134
  };
@@ -131,7 +139,7 @@ async function disambiguate(page, description, gptClient, locator, locateResult,
131
139
  nth: resp.output.index,
132
140
  };
133
141
  return {
134
- locator: (0, buildLocator_1.buildLocator)(page, disambiguatedResult),
142
+ locator: (0, buildLocator_1.buildLocator)(page, disambiguatedResult, envData),
135
143
  result: disambiguatedResult,
136
144
  };
137
145
  }
@@ -139,7 +147,54 @@ async function callLlm(gptClient, systemMessage, userMessage, signal) {
139
147
  const resp = await gptClient.getStructuredOutput([systemMessage, userMessage], locateSchema_1.LocateResultSchema, { signal });
140
148
  return resp.output;
141
149
  }
142
- function buildSystemMessage(pageUrl, pageTitle) {
150
+ function buildSystemMessage(pageUrl, pageTitle, description, envData) {
151
+ // Only annotate the prompt with env-var guidance when the raw description
152
+ // actually references at least one provided env var. Keeps the prompt small
153
+ // for the common case.
154
+ const envEntries = Object.entries(envData ?? {});
155
+ const referencedEnvEntries = envEntries.filter(([name]) => description.includes(`{{$.env.${name}}}`));
156
+ const envBlock = referencedEnvEntries.length > 0
157
+ ? `
158
+
159
+ The user's description contains environment variable references using the syntax
160
+ \`{{$.env.NAME}}\`. To keep cached locators valid across runs with different env
161
+ values, you MUST emit those same placeholders in any LocatorStep \`text\`,
162
+ \`name\`, or \`testId\` field whose contents come from an env var. Do NOT bake
163
+ the literal current value into the step.
164
+
165
+ Original (uninterpolated) description: "${description}"
166
+
167
+ Current env mapping (use these to identify which substrings on the page came
168
+ from which env var, then emit the placeholder rather than the literal):
169
+ ${referencedEnvEntries.map(([name, value]) => ` - {{$.env.${name}}} = ${JSON.stringify(value)}`).join('\n')}
170
+
171
+ Hard rules for env-var emission:
172
+ - Use placeholders ONLY in \`text\`, \`name\`, or \`testId\` fields.
173
+ - NEVER emit \`{{$.env.*}}\` inside \`selector\` (CSS/XPath) — interpolating
174
+ raw values into a CSS selector can produce invalid syntax. Use a semantic
175
+ locator (getByRole/getByText/getByLabel/getByPlaceholder/getByTestId)
176
+ instead when an env-derived value is involved.
177
+ - NEVER emit \`{{$.env.*}}\` inside any \`frames[]\` entry (iframe selectors
178
+ or iframe \`name\` attributes are not env-driven).
179
+
180
+ Examples:
181
+ - Description "The user row for {{$.env.TEST_EMAIL}}", TEST_EMAIL="alice@x.com",
182
+ page text shows "alice@x.com" →
183
+ [{ method: "getByText", text: "{{$.env.TEST_EMAIL}}" }]
184
+ - Description "The {{$.env.PROJECT_NAME}} tab", PROJECT_NAME="Apollo" →
185
+ [{ method: "getByRole", role: "tab", name: "{{$.env.PROJECT_NAME}}" }]
186
+ - Description "The submit button" (no env vars referenced) → emit literal text
187
+ as you normally would.
188
+
189
+ Combining env vars with regex: env interpolation runs BEFORE regex compilation,
190
+ so you can mix them. Prefer this when the env value should be matched alongside
191
+ dynamic page content. Example — description "The row for {{$.env.USER}} with
192
+ their score", USER="alice" →
193
+ [{ method: "getByText", text: "alice — \\\\d+ pts", textIsRegex: true }]
194
+ (Here the AI substituted the env value because it's part of a regex pattern;
195
+ the placeholder syntax also works — \`text: "{{$.env.USER}} — \\\\d+ pts"\` —
196
+ and is preferred when you want cache stability across env value changes.)`
197
+ : '';
143
198
  return {
144
199
  type: 'system',
145
200
  text: `You are a Playwright locator expert. Given a viewport screenshot and a pruned DOM snapshot of a webpage, return a structured locator that targets the element matching the user's description.
@@ -151,8 +206,50 @@ Rules:
151
206
  - If the element is inside an iframe, specify the frame(s) in the "frames" field.
152
207
  - Do NOT set "nth" unless you are certain the chain matches multiple elements and you know which index is correct. When unsure, omit it — the system will handle disambiguation.
153
208
 
209
+ Stability rules — locators are CACHED and replayed across runs. The page may
210
+ change between runs (vote counts increment, "3 hours ago" becomes "5 hours ago",
211
+ new posts shift positions, prices fluctuate). Choose locators that survive these
212
+ drifts:
213
+
214
+ - POSITIONAL DESCRIPTIONS: when the description references position ("first",
215
+ "third", "fourth from the top", "last"), translate that into a structural
216
+ chain plus \`nth\` rather than baking position-specific page text into a step.
217
+ Example — "the fourth comments link" should be a locator over ALL comment
218
+ links with \`nth: 3\`, not the literal "36 comments" you happen to see today.
219
+
220
+ - DYNAMIC TEXT: if the value you would put into \`name\` or \`text\` looks
221
+ dynamic — contains digits, timestamps, "X ago", "$X.XX", counts, scores,
222
+ vote totals — emit a regex pattern via \`nameIsRegex: true\` (for getByRole)
223
+ or \`textIsRegex: true\` (for getByText/getByLabel/getByPlaceholder) instead
224
+ of the literal value. Anchor the pattern with \`^\` / \`$\` when the whole
225
+ string should match, otherwise it acts as a substring match.
226
+
227
+ - DO NOT combine \`exact: true\` with \`nameIsRegex\`/\`textIsRegex\`. They are
228
+ mutually exclusive — set \`exact\` only for literal-string steps with stable
229
+ fixed labels like "Submit" or "Sign In".
230
+
231
+ - SAFE LITERALS: keep literal values for genuinely stable strings — fixed UI
232
+ labels, button text like "Submit"/"Cancel", section headings, unique
233
+ test-ids. Only escape to regex when stability is at risk.
234
+
235
+ Examples:
236
+ - "The fourth comments link" →
237
+ steps: [{ method: "getByRole", role: "link", name: "\\\\d+\\\\s+comments?$", nameIsRegex: true }]
238
+ nth: 3
239
+ - "The headline of the third story" → structural row selector + nth: 2 (literal name)
240
+ - "The submit button" → literal name: "Submit", optionally exact: true
241
+ - "The price tag for the cart total" →
242
+ steps: [{ method: "getByText", text: "\\\\$\\\\d+(\\\\.\\\\d+)?", textIsRegex: true }]
243
+ - "The 'posted 5 hours ago' label" →
244
+ steps: [{ method: "getByText", text: "posted \\\\d+ (minute|hour|day)s? ago", textIsRegex: true }]
245
+
246
+ Regex format: emit a JS-style regex source string (no leading/trailing slash,
247
+ no flags). Backslashes inside JSON must be doubled (\`\\\\d+\` not \`\\d+\`).
248
+ Invalid patterns silently fall back to literal matching, so prefer simple,
249
+ well-tested patterns.
250
+
154
251
  Page URL: ${pageUrl}
155
- Page title: ${pageTitle}`,
252
+ Page title: ${pageTitle}${envBlock}`,
156
253
  };
157
254
  }
158
255
  function buildUserMessage(description, screenshot, domHtml) {
@@ -27,6 +27,8 @@ export declare const LocateResultSchema: z.ZodObject<{
27
27
  testId: z.ZodOptional<z.ZodString>;
28
28
  selector: z.ZodOptional<z.ZodString>;
29
29
  exact: z.ZodOptional<z.ZodBoolean>;
30
+ nameIsRegex: z.ZodOptional<z.ZodBoolean>;
31
+ textIsRegex: z.ZodOptional<z.ZodBoolean>;
30
32
  }, z.core.$strip>>;
31
33
  nth: z.ZodOptional<z.ZodNumber>;
32
34
  }, z.core.$strip>;
@@ -46,7 +46,15 @@ const LocatorStepSchema = v4_1.z
46
46
  exact: v4_1.z
47
47
  .boolean()
48
48
  .optional()
49
- .describe('Whether text/name matching should be exact. Applies to getByRole (name), getByText, getByLabel, getByPlaceholder.'),
49
+ .describe('Whether text/name matching should be exact. Applies to getByRole (name), getByText, getByLabel, getByPlaceholder. Mutually exclusive with nameIsRegex / textIsRegex.'),
50
+ nameIsRegex: v4_1.z
51
+ .boolean()
52
+ .optional()
53
+ .describe('Set true when "name" is a regex pattern (compiled via new RegExp(name)). Use this for dynamic accessible names — e.g. "\\d+ comments" matches any "N comments" link. Used with getByRole. Do not combine with exact:true.'),
54
+ textIsRegex: v4_1.z
55
+ .boolean()
56
+ .optional()
57
+ .describe('Set true when "text" is a regex pattern (compiled via new RegExp(text)). Use this for dynamic page text — counts, dates, prices, "X ago" timestamps. Used with getByText / getByLabel / getByPlaceholder. Do not combine with exact:true.'),
50
58
  })
51
59
  .describe('A single Playwright locator step.');
52
60
  const FrameStepSchema = v4_1.z
@@ -20,6 +20,24 @@ export type LocatorStep = {
20
20
  selector?: string;
21
21
  /** Whether text/name matching should be exact. */
22
22
  exact?: boolean;
23
+ /**
24
+ * When true, `name` is treated as a regex pattern compiled via
25
+ * `new RegExp(name)` rather than a literal string. Mutually exclusive
26
+ * with `exact: true`. Used with `getByRole`.
27
+ *
28
+ * Env-var placeholders are interpolated **before** regex compilation, so
29
+ * `'\\d+ {{$.env.NOUN}}'` with `NOUN='comments'` compiles as
30
+ * `/\d+ comments/`.
31
+ */
32
+ nameIsRegex?: boolean;
33
+ /**
34
+ * When true, `text` is treated as a regex pattern compiled via
35
+ * `new RegExp(text)` rather than a literal string. Mutually exclusive with
36
+ * `exact: true`. Used with `getByText`, `getByLabel`, `getByPlaceholder`.
37
+ *
38
+ * Env-var placeholders are interpolated **before** regex compilation.
39
+ */
40
+ textIsRegex?: boolean;
23
41
  };
24
42
  /**
25
43
  * Identifies an iframe to scope into before applying {@link LocatorStep}s.
@@ -49,9 +67,48 @@ export type LocateResult = {
49
67
  */
50
68
  export type LocateOptions = {
51
69
  gptClient?: GptClient | Exclude<LanguageModel, string>;
52
- /** Timeout in milliseconds for the entire locate operation (default: 30 000). */
70
+ /**
71
+ * Timeout in milliseconds for the entire locate operation (default: 30 000).
72
+ *
73
+ * On cache hit this budgets the hydration patience window — the cached
74
+ * locator gets up to this long to attach to a matching element before the
75
+ * cache is treated as stale and the AI is re-run. On cache miss (or
76
+ * stale-cache fallthrough) this budgets the AI call. Whatever the cache
77
+ * path consumes is deducted from the AI path's remaining budget; the total
78
+ * never exceeds `timeout`.
79
+ */
53
80
  timeout?: number;
54
- /** Whether to use the on-disk cache. Defaults to true. */
81
+ /**
82
+ * Whether to use the on-disk cache. Defaults to true.
83
+ *
84
+ * Cached `LocateResult` step fields preserve `{{$.env.*}}` placeholders for
85
+ * any value that came from an env var, so changing an env value between
86
+ * runs replays the same cached locator with the new value rather than
87
+ * re-invoking the AI.
88
+ */
55
89
  cache?: boolean;
90
+ /**
91
+ * Explicit environment variable names (in addition to the heuristically
92
+ * derived ones) that the description may read via `{{$.env.*}}`
93
+ * interpolations.
94
+ */
95
+ envVars?: string[];
96
+ /**
97
+ * Explicitly supply environment variable values that amend (or override)
98
+ * the environment observed by this `page.ai.locate` call. Keys are merged
99
+ * with any names derived from {@link LocateOptions.envVars} and from
100
+ * `{{$.env.*}}` interpolations in the description.
101
+ *
102
+ * - A `string` value sets or overrides the variable for this invocation.
103
+ * - An `undefined` value *removes* the variable, even if it would
104
+ * otherwise be resolved from persistence.
105
+ *
106
+ * Only the **names** (keys) influence cache lookup; changing a value
107
+ * replays the cached locator with the new value via `{{$.env.*}}`
108
+ * placeholder substitution rather than busting the cache. If a referenced
109
+ * env var is absent at replay, the placeholder is left literal — the
110
+ * locator will then match zero elements and fail loudly.
111
+ */
112
+ envVals?: Record<string, string | undefined>;
56
113
  };
57
114
  //# sourceMappingURL=locateTypes.d.ts.map
@@ -408,33 +408,86 @@ Use this information to return an appropriate JSON object.`,
408
408
  const useCache = options?.cache !== false;
409
409
  const clearCache = sharedState.runtimeDirectives?.clearPageAiCache ?? false;
410
410
  const pageUrl = (0, cacheLocator_1.extractCacheKeyHostname)(page.url());
411
- // --- Cache lookup (when enabled and not clearing) ---
412
- if (useCache && !clearCache) {
413
- const cache = getOrInitPageAiCache();
414
- const cached = await cache.getLocate({ pageUrl, description });
415
- if (cached) {
416
- Logger_1.appLogger.debug(`Locate cache HIT for: "${description}" — rebuilding locator from cache`);
417
- return cached.run({ page });
411
+ // Distill env var names referenced by the description plus any
412
+ // explicitly provided names/overrides. Resolve env data locally — locate
413
+ // does not flow through `runTool`, so we don't mutate sharedState here.
414
+ const envVarNames = (0, DonobuFlowsManager_1.distillAllowedEnvVariableNames)(description, [
415
+ ...(options?.envVars ?? []),
416
+ ...Object.keys(options?.envVals ?? {}),
417
+ ]);
418
+ const hasEnvRefs = envVarNames.length > 0;
419
+ const resolveEnvData = async () => {
420
+ if (!hasEnvRefs) {
421
+ return undefined;
418
422
  }
419
- }
420
- // --- Cache invalidation (when clearing) ---
421
- if (useCache && clearCache) {
422
- const cache = getOrInitPageAiCache();
423
- await cache.deleteLocate({ pageUrl, description });
424
- Logger_1.appLogger.debug(`Locate cache invalidated for: "${description}"`);
425
- }
426
- // --- Cache miss or cache disabled: run AI locate ---
427
- const gptClient = getGptClient(page, options?.gptClient);
428
- if (!gptClient) {
429
- throw new ToolRequiresGptException_1.ToolRequiresGptException('locate');
430
- }
423
+ const envData = await sharedState.donobuStack.envDataManager.getByNames(envVarNames);
424
+ if (options?.envVals) {
425
+ for (const [k, v] of Object.entries(options.envVals)) {
426
+ if (v === undefined) {
427
+ delete envData[k];
428
+ }
429
+ else {
430
+ envData[k] = v;
431
+ }
432
+ }
433
+ }
434
+ return envData;
435
+ };
436
+ // The user-supplied `timeout` (default 30s) is the budget for the
437
+ // ENTIRE locate operation — cache-hit hydration wait + AI fallback.
438
+ // We start the abort timer here so the cache path's `waitFor` and the
439
+ // AI path share one bounded clock.
431
440
  const timeoutMillis = options?.timeout ?? 30_000;
441
+ const startedAt = Date.now();
432
442
  const abortController = new AbortController();
433
443
  const timeoutId = setTimeout(() => {
434
444
  abortController.abort(`Locate operation timed out after ${timeoutMillis} milliseconds`);
435
445
  }, timeoutMillis);
436
446
  try {
437
- const { locator, result } = await (0, locateElement_1.locateElement)(page, description, gptClient, { signal: abortController.signal });
447
+ // --- Cache lookup (when enabled and not clearing) ---
448
+ if (useCache && !clearCache) {
449
+ const cache = getOrInitPageAiCache();
450
+ const cached = await cache.getLocate({ pageUrl, description });
451
+ if (cached) {
452
+ const envData = await resolveEnvData();
453
+ const candidate = cached.run({ page, envData });
454
+ // Cache replay can outrun page hydration — the no-cache path
455
+ // gets an implicit hydration window from the AI round-trip
456
+ // latency, but a cache hit fires immediately and may see a
457
+ // partially-mounted DOM. Wait (within the operation's overall
458
+ // budget) for the locator to attach before validating.
459
+ const remaining = Math.max(timeoutMillis - (Date.now() - startedAt), 100);
460
+ try {
461
+ await candidate.first().waitFor({
462
+ state: 'attached',
463
+ timeout: remaining,
464
+ });
465
+ Logger_1.appLogger.debug(`Locate cache HIT for: "${description}" — rebuilt locator from cache`);
466
+ return candidate;
467
+ }
468
+ catch {
469
+ // Locator did not attach within the patience window. Either
470
+ // the page has drifted or the cache is genuinely stale.
471
+ // Invalidate and fall through to the AI path; the AI call
472
+ // gets whatever budget remains on the abort timer.
473
+ Logger_1.appLogger.debug(`Locate cache STALE for "${description}" (no match within ${remaining}ms) — re-running AI`);
474
+ await cache.deleteLocate({ pageUrl, description });
475
+ }
476
+ }
477
+ }
478
+ // --- Cache invalidation (when clearing) ---
479
+ if (useCache && clearCache) {
480
+ const cache = getOrInitPageAiCache();
481
+ await cache.deleteLocate({ pageUrl, description });
482
+ Logger_1.appLogger.debug(`Locate cache invalidated for: "${description}"`);
483
+ }
484
+ // --- Cache miss / cache disabled / stale-cache fallthrough: run AI ---
485
+ const gptClient = getGptClient(page, options?.gptClient);
486
+ if (!gptClient) {
487
+ throw new ToolRequiresGptException_1.ToolRequiresGptException('locate');
488
+ }
489
+ const envData = await resolveEnvData();
490
+ const { locator, result } = await (0, locateElement_1.locateElement)(page, description, gptClient, { signal: abortController.signal, envData });
438
491
  // --- Cache the result for future runs ---
439
492
  if (useCache) {
440
493
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "donobu",
3
- "version": "5.34.0",
3
+ "version": "5.35.0",
4
4
  "description": "Create browser automations with an LLM agent and replay them as Playwright scripts.",
5
5
  "main": "dist/main.js",
6
6
  "module": "dist/esm/main.js",