@trusty-squire/mcp 0.9.9 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,7 +42,7 @@
42
42
  // - It doesn't sandbox the browser (T14 lives at the caller layer —
43
43
  // the router decides whether to spawn a fresh BrowserController
44
44
  // or reuse one).
45
- import { appendFileSync, mkdirSync } from "node:fs";
45
+ import { appendFileSync, mkdirSync, writeFileSync } from "node:fs";
46
46
  import { join } from "node:path";
47
47
  import { loggedInProviders } from "./login-state.js";
48
48
  import { isTruncatedCapture, extractApiKeyFromText, findOAuthButton, isCredentialNoiseCandidate, } from "./agent.js";
@@ -98,11 +98,18 @@ export async function replaySkill(input) {
98
98
  // once every named extract has run successfully — not on the first
99
99
  // extract like the single-cred path. Detected by skill content:
100
100
  // any *_named step → multi mode, else → single.
101
+ // extract_labeled is ALSO a multi-cred extract (pusher: application_id /
102
+ // app_key / secret) — it returns extract_named_ok and accumulates into the
103
+ // bundle just like the *_named kinds. Omitting it here left isMultiCred false,
104
+ // so a pusher skill accumulated all 3 values but never returned ok_multi and
105
+ // fell through to "walked entire graph without producing a credential."
101
106
  const isMultiCred = skill.steps.some((s) => s.kind === "extract_via_copy_button_named" ||
102
- s.kind === "extract_via_regex_named");
107
+ s.kind === "extract_via_regex_named" ||
108
+ s.kind === "extract_labeled");
103
109
  const expectedProduces = new Set(skill.steps
104
110
  .filter((s) => s.kind === "extract_via_copy_button_named" ||
105
- s.kind === "extract_via_regex_named")
111
+ s.kind === "extract_via_regex_named" ||
112
+ s.kind === "extract_labeled")
106
113
  .map((s) => s.produces));
107
114
  const credentialBundle = {};
108
115
  const viaBundle = {};
@@ -198,7 +205,25 @@ export async function replaySkill(input) {
198
205
  skippedOnboardingFill = true;
199
206
  continue;
200
207
  }
208
+ else if (step.kind === "select" &&
209
+ isSkippableAbsentSelect(step, validation.reason, i, skill.steps)) {
210
+ // Account-state-dependent ONBOARDING select (porter "Role" /
211
+ // railway "Workspace" class): the wizard dropdown only exists for a
212
+ // brand-new account. The verifier's operator account is already
213
+ // registered, so the service skips the onboarding form and the
214
+ // <select> is wholly absent — exactly the fill case above, just a
215
+ // different control. A later extract step still reaches the
216
+ // credential and the credential validator is the backstop, so skip
217
+ // rather than false-failing the whole replay.
218
+ console.error(`[replay] step ${i} (select label_hint=${JSON.stringify(step.label_hint)}) ` +
219
+ `select absent — skipping as account-state-dependent onboarding ` +
220
+ `(account already registered; signup form gone). A later extract ` +
221
+ `step still reaches the credential. Reason: ${validation.reason}`);
222
+ skippedOnboardingFill = true;
223
+ continue;
224
+ }
201
225
  else {
226
+ await maybeDumpReplayDebug(browser, skill, i, validation.reason);
202
227
  return {
203
228
  kind: "step_failed",
204
229
  stepIndex: i,
@@ -224,6 +249,18 @@ export async function replaySkill(input) {
224
249
  const credSpec = skill.credentials[0];
225
250
  const validatorResult = await validateCredential(execOutcome.value, credSpec, input.fetchFn);
226
251
  if (!validatorResult.ok) {
252
+ if (process.env.REPLAY_DEBUG) {
253
+ try {
254
+ const cands = await browser.extractCredentialCandidates().catch(() => []);
255
+ const txt = (await browser.extractText().catch(() => "")).slice(0, 2000);
256
+ writeFileSync(`/tmp/replay-validator-${skill.service}.txt`, `url=${browser.currentUrl()}\ngot=${execOutcome.value}\nreason=${validatorResult.reason}\n` +
257
+ `candidates=${JSON.stringify(cands.slice(0, 20))}\n\nTEXT:\n${txt}`);
258
+ console.error(`[replay-debug] dumped /tmp/replay-validator-${skill.service}.txt`);
259
+ }
260
+ catch {
261
+ /* best-effort */
262
+ }
263
+ }
227
264
  return {
228
265
  kind: "validator_failed",
229
266
  stepIndex: i,
@@ -357,9 +394,15 @@ async function preValidateStep(step, browser, templateValues) {
357
394
  case "click": {
358
395
  const inventory = await browser.extractInteractiveElements();
359
396
  const matches = inventory.filter((el) => matchesClickHint(el, step.text_match));
360
- const filtered = step.role_hint
397
+ // role_hint is a SOFT preference, not a hard gate. When it filters out
398
+ // every text-match — imagekit's live "Next" renders as an <a>, not the
399
+ // captured <button> — fall back to the text matches and let the
400
+ // disambiguator below pick. A genuinely-absent target (no text match at
401
+ // all) still falls through to the href/token fallbacks.
402
+ const roleFiltered = step.role_hint
361
403
  ? matches.filter((el) => matchesRole(el, step.role_hint))
362
404
  : matches;
405
+ const filtered = roleFiltered.length > 0 ? roleFiltered : matches;
363
406
  if (filtered.length === 0) {
364
407
  // href fallback: a nav-link target whose text rendered as an icon
365
408
  // on replay (or whose URL slug differs) won't match by text but
@@ -370,13 +413,17 @@ async function preValidateStep(step, browser, templateValues) {
370
413
  if (byHref.length === 1)
371
414
  return { ok: true, match: byHref[0] };
372
415
  }
373
- if (matches.length > 0) {
374
- return {
375
- ok: false,
376
- reason: `text_match=${JSON.stringify(step.text_match)} matched ${matches.length} elements, ` +
377
- `but role_hint=${step.role_hint} filtered them all out.`,
378
- };
379
- }
416
+ // Last-resort token-subset fallback: the captured text_match is a
417
+ // planner gloss ("Create Token") that doesn't substring-match the live
418
+ // button ("Create API Token"). Resolve by token containment, honoring
419
+ // role_hint, and accept ONLY a unique match — ambiguity is unsafe for a
420
+ // click that may mint a credential (the validator is the backstop).
421
+ const tokenPool = step.role_hint
422
+ ? inventory.filter((el) => matchesRole(el, step.role_hint))
423
+ : inventory;
424
+ const byTokens = tokenPool.filter((el) => matchesClickHintTokens(el, step.text_match));
425
+ if (byTokens.length === 1)
426
+ return { ok: true, match: byTokens[0] };
380
427
  return {
381
428
  ok: false,
382
429
  reason: `No element matches text_match=${JSON.stringify(step.text_match)}` +
@@ -410,6 +457,15 @@ async function preValidateStep(step, browser, templateValues) {
410
457
  const buttons = filtered.filter((el) => el.tag === "button");
411
458
  if (buttons.length === 1)
412
459
  return { ok: true, match: buttons[0] };
460
+ // Multiple BUTTONS with the same text (imagekit's onboarding renders
461
+ // two "Next" buttons): pick the first. preValidate used to hard-fail
462
+ // while execute would happily pickClickPriority — an inconsistency that
463
+ // failed the replay before it tried. Clicking either advances a wizard
464
+ // and the credential validator backstops a wrong pick. Ambiguous
465
+ // NON-button elements (two same-text links) stay a hard fail — that's
466
+ // genuine rot the skill can't pin, not a wizard button.
467
+ if (buttons.length > 1)
468
+ return { ok: true, match: buttons[0] };
413
469
  return {
414
470
  ok: false,
415
471
  reason: `text_match=${JSON.stringify(step.text_match)} matched ${filtered.length} elements; ` +
@@ -425,6 +481,15 @@ async function preValidateStep(step, browser, templateValues) {
425
481
  const inventory = await browser.extractInteractiveElements();
426
482
  const matches = inventory.filter((el) => isFillable(el) && matchesLabelHint(el, step.label_hint));
427
483
  if (matches.length === 0) {
484
+ // Fuzzy last-resort: the label_hint is a verbose gloss ("Name your
485
+ // key:") that didn't match the live input labeled "Name". Match on
486
+ // significant-token overlap, unique only — so a present-but-glossed
487
+ // field is filled rather than wrongly skipped (which left anthropic's
488
+ // submit disabled). A genuinely-absent onboarding field still matches
489
+ // nothing here and falls through to the absent-skip path.
490
+ const fuzzy = inventory.filter((el) => isFillable(el) && el.tag !== "select" && matchesLabelHintFuzzy(el, step.label_hint));
491
+ if (fuzzy.length === 1)
492
+ return { ok: true, match: fuzzy[0] };
428
493
  return {
429
494
  ok: false,
430
495
  reason: `No input matches label_hint=${JSON.stringify(step.label_hint)}.`,
@@ -515,10 +580,16 @@ async function preValidateStep(step, browser, templateValues) {
515
580
  reason: `${copyButtons.length} Copy buttons visible; none near text ${JSON.stringify(step.near_text_hint)}.`,
516
581
  };
517
582
  }
518
- return {
519
- ok: false,
520
- reason: `${copyButtons.length} Copy buttons visible; ${disambiguated.length} match near_text_hint ambiguous.`,
521
- };
583
+ // Ambiguous (2+ match) on a SINGLE-cred extract: the synthesizer's
584
+ // near_text_hint was unique at capture, but the returning-user keys page
585
+ // shows extra copyable values near the same label (planetscale renders a
586
+ // password + a connection string under one heading). Pick the FIRST
587
+ // match in DOM order — the credential's own copy button typically leads —
588
+ // rather than hard-failing a reachable credential. The post-extract
589
+ // credential validator is the backstop if the first one is wrong.
590
+ console.error(`[replay] ${copyButtons.length} Copy buttons match near_text_hint=${JSON.stringify(step.near_text_hint)} — ` +
591
+ `taking the first (validator backstops a wrong pick).`);
592
+ return { ok: true, match: disambiguated[0] };
522
593
  }
523
594
  case "extract_via_regex": {
524
595
  // Pre-validation for regex extraction: confirm the page text
@@ -575,6 +646,17 @@ async function preValidateStep(step, browser, templateValues) {
575
646
  const candidates = await browser.extractLabeledCredentialCandidates();
576
647
  const match = candidates.find((c) => labelMatchesHint(c.label, step.label_hint));
577
648
  if (match === undefined) {
649
+ if (process.env.REPLAY_DEBUG) {
650
+ try {
651
+ const txt = (await browser.extractText().catch(() => "")).slice(0, 2500);
652
+ writeFileSync(`/tmp/replay-labeled-${step.produces}.txt`, `url=${browser.currentUrl()}\nlabel_hint=${step.label_hint}\n` +
653
+ `candidates=${JSON.stringify(candidates.map((c) => ({ label: c.label, val: (c.value ?? "").slice(0, 6) })))}\n\nTEXT:\n${txt}`);
654
+ console.error(`[replay-debug] dumped /tmp/replay-labeled-${step.produces}.txt`);
655
+ }
656
+ catch {
657
+ /* best-effort */
658
+ }
659
+ }
578
660
  return {
579
661
  ok: false,
580
662
  reason: `No labeled credential matches label_hint=${JSON.stringify(step.label_hint)} ` +
@@ -594,7 +676,15 @@ async function preValidateStep(step, browser, templateValues) {
594
676
  export function labelMatchesHint(label, hint) {
595
677
  if (label === null)
596
678
  return false;
597
- const norm = (s) => s.toLowerCase().replace(/[^a-z0-9]+/g, "");
679
+ // Collapse common credential-label synonyms so a skill's hint matches the
680
+ // page's variant: pusher renders "app_id" while the skill asks for
681
+ // "application id". Apply on the already-stripped alphanumeric string so
682
+ // underscores ("application_id") don't defeat a word boundary.
683
+ const norm = (s) => s
684
+ .toLowerCase()
685
+ .replace(/[^a-z0-9]+/g, "")
686
+ .replace(/application/g, "app")
687
+ .replace(/identifier/g, "id");
598
688
  const a = norm(label);
599
689
  const b = norm(hint);
600
690
  if (a.length === 0 || b.length === 0)
@@ -605,10 +695,13 @@ async function executeStep(step, browser, templateValues, skill) {
605
695
  switch (step.kind) {
606
696
  case "navigate": {
607
697
  await browser.goto(step.url);
608
- // Tiny settle for SPA-style apps that fire route handlers
609
- // post-DOMContentLoaded. The bot's runPrewarm waits 2s
610
- // post-navigate too.
698
+ // Settle for SPA-style apps that fire route handlers post-
699
+ // DOMContentLoaded. A fixed 2s under-waits heavy authenticated
700
+ // dashboards (pusher's App Keys, imagekit's onboarding step rendered
701
+ // blank → "0 elements" at the next step). Poll for real interactive
702
+ // content first, with the 2s as a floor for fast/static pages.
611
703
  await browser.wait(2);
704
+ await browser.waitForInteractiveDom().catch(() => undefined);
612
705
  // 0.8.2-rc.22 — URL drift detection. When a skill's signup_url
613
706
  // assumes the user is authenticated (Railway's /account/tokens
614
707
  // captured after OAuth was done in a prior session), the
@@ -672,9 +765,12 @@ async function executeStep(step, browser, templateValues, skill) {
672
765
  case "click": {
673
766
  const inventory = await browser.extractInteractiveElements();
674
767
  const matches = inventory.filter((el) => matchesClickHint(el, step.text_match));
675
- const filtered = step.role_hint
768
+ // role_hint soft-fallback (mirrors preValidate): if it filters out every
769
+ // text-match, trust the text matches and let the disambiguator pick.
770
+ const roleFiltered = step.role_hint
676
771
  ? matches.filter((el) => matchesRole(el, step.role_hint))
677
772
  : matches;
773
+ const filtered = roleFiltered.length > 0 ? roleFiltered : matches;
678
774
  if (filtered.length === 0) {
679
775
  // href fallback (mirrors preValidate): resolve a nav link by its
680
776
  // stable href path tail when text matching finds nothing. If even
@@ -696,6 +792,17 @@ async function executeStep(step, browser, templateValues, skill) {
696
792
  return { kind: "clicked" };
697
793
  }
698
794
  }
795
+ // Token-subset fallback — mirrors preValidate so execute clicks the
796
+ // same gloss-resolved element preValidate approved. Unique match only.
797
+ const tokenPool = step.role_hint
798
+ ? inventory.filter((el) => matchesRole(el, step.role_hint))
799
+ : inventory;
800
+ const byTokens = tokenPool.filter((el) => matchesClickHintTokens(el, step.text_match));
801
+ if (byTokens.length === 1) {
802
+ await browser.click(byTokens[0].selector);
803
+ await browser.wait(1);
804
+ return { kind: "clicked" };
805
+ }
699
806
  throw new Error(`No element matches text_match=${step.text_match}` +
700
807
  (step.href_hint !== undefined ? ` (nor href_hint=${step.href_hint})` : ""));
701
808
  }
@@ -709,15 +816,25 @@ async function executeStep(step, browser, templateValues, skill) {
709
816
  : filterByNearTextHint(filtered, step.near_text_hint, inventory);
710
817
  const target = narrowed.length === 1 ? narrowed[0] : pickClickPriority(narrowed);
711
818
  await browser.click(target.selector);
712
- // Settle so any post-click navigation finishes before the next
713
- // pre-validation reads inventory.
819
+ // Settle so any post-click navigation/SPA route render finishes before
820
+ // the next step reads inventory (pusher's App Keys page, imagekit's
821
+ // onboarding step render a beat after the click → blank "0 elements").
714
822
  await browser.wait(1);
823
+ await browser.waitForInteractiveDom().catch(() => undefined);
715
824
  return { kind: "clicked" };
716
825
  }
717
826
  case "fill": {
718
827
  const inventory = await browser.extractInteractiveElements();
719
828
  const matches = inventory.filter((el) => isFillable(el) && matchesLabelHint(el, step.label_hint));
720
829
  if (matches.length === 0) {
830
+ // Fuzzy fallback (mirrors preValidate): fill a present-but-glossed
831
+ // input matched by significant-token overlap, unique only.
832
+ const fuzzy = inventory.filter((el) => isFillable(el) && el.tag !== "select" && matchesLabelHintFuzzy(el, step.label_hint));
833
+ if (fuzzy.length === 1) {
834
+ const value = substituteTemplate(step.value_template, templateValues);
835
+ await browser.type(fuzzy[0].selector, value);
836
+ return { kind: "filled" };
837
+ }
721
838
  throw new Error(`No input matches label_hint=${step.label_hint}`);
722
839
  }
723
840
  // rc.25 — share the disambiguator with preValidate so execute
@@ -1438,12 +1555,33 @@ function writeSkillUpdateCandidate(candidatesDir, skill, stepIndex, capturedStep
1438
1555
  }
1439
1556
  }
1440
1557
  // ── Inventory matching helpers ──────────────────────────────────────
1558
+ // Substring match that requires the needle to sit at a WORD boundary — the
1559
+ // adjacent characters must not be alphanumeric or a dot. Without this, a short
1560
+ // hint like "Next" matched "Next.js" (imagekit's dashboard footer), so a stale
1561
+ // onboarding "Next" step false-matched framework chrome instead of being
1562
+ // skipped as absent. Multi-word hints still match across internal whitespace.
1563
+ function includesAtWordBoundary(haystack, needle) {
1564
+ if (needle.length === 0)
1565
+ return false;
1566
+ const isWordChar = (c) => /[a-z0-9.]/i.test(c);
1567
+ let idx = haystack.indexOf(needle);
1568
+ while (idx !== -1) {
1569
+ const before = idx === 0 ? "" : haystack[idx - 1];
1570
+ const afterIdx = idx + needle.length;
1571
+ const after = afterIdx >= haystack.length ? "" : haystack[afterIdx];
1572
+ if (!isWordChar(before) && !isWordChar(after))
1573
+ return true;
1574
+ idx = haystack.indexOf(needle, idx + 1);
1575
+ }
1576
+ return false;
1577
+ }
1441
1578
  function matchesClickHint(el, hint) {
1442
1579
  const lowerHint = hint.toLowerCase();
1443
1580
  const text = (el.visibleText ?? "").toLowerCase();
1444
1581
  const aria = (el.ariaLabel ?? "").toLowerCase();
1445
- if (text.includes(lowerHint) || aria.includes(lowerHint))
1582
+ if (includesAtWordBoundary(text, lowerHint) || includesAtWordBoundary(aria, lowerHint)) {
1446
1583
  return true;
1584
+ }
1447
1585
  // 0.8.3-rc.1 — stable-attribute fallback. Form-control elements
1448
1586
  // routinely have a stable `name` attribute (mistral's ToS checkbox
1449
1587
  // ships as `<input name="terms">`) even when their visible text is
@@ -1459,6 +1597,56 @@ function matchesClickHint(el, hint) {
1459
1597
  return true;
1460
1598
  return false;
1461
1599
  }
1600
+ // Token-subset fallback for a credential-creating click whose captured
1601
+ // text_match is a planner GLOSS that doesn't substring-match the live button
1602
+ // ("Create Token" vs the page's "Create API Token" / "+ Create new token").
1603
+ // Matches when EVERY meaningful token (len>=3) of the hint appears among the
1604
+ // element's text/aria tokens, order-independent. Deliberately looser than
1605
+ // matchesClickHint's substring rule, so it is used ONLY as a last resort and
1606
+ // ONLY when it resolves to a UNIQUE element (the call site enforces this) —
1607
+ // pinning the wrong control on a click that may mint a credential is the risk,
1608
+ // and the post-extract credential validator is the backstop if it slips.
1609
+ // REPLAY_DEBUG diagnostic: on a step failure, dump the current URL + visible
1610
+ // clickable/fillable inventory to /tmp/replay-debug-<service>-step<N>.json so a
1611
+ // returning-user divergence can be diagnosed against the REAL authenticated
1612
+ // page (which a standalone trace can't reach — it doesn't walk OAuth consent).
1613
+ // No-op unless REPLAY_DEBUG is set; best-effort (never throws into replay).
1614
+ async function maybeDumpReplayDebug(browser, skill, stepIndex, reason) {
1615
+ if (!process.env.REPLAY_DEBUG)
1616
+ return;
1617
+ try {
1618
+ const inv = await browser.extractInteractiveElements();
1619
+ const interesting = inv
1620
+ .filter((e) => e.visible)
1621
+ .map((e) => ({
1622
+ tag: e.tag,
1623
+ role: e.role,
1624
+ text: (e.visibleText ?? "").slice(0, 60),
1625
+ aria: e.ariaLabel,
1626
+ label: e.labelText,
1627
+ placeholder: e.placeholder,
1628
+ href: e.href ?? null,
1629
+ }))
1630
+ .filter((e) => e.text || e.aria || e.label || e.placeholder || e.href);
1631
+ const path = `/tmp/replay-debug-${skill.service}-step${stepIndex}.json`;
1632
+ writeFileSync(path, JSON.stringify({ service: skill.service, stepIndex, reason, url: browser.currentUrl(), interesting }, null, 2));
1633
+ console.error(`[replay-debug] dumped ${path} (${interesting.length} elements)`);
1634
+ }
1635
+ catch {
1636
+ // best-effort diagnostic only
1637
+ }
1638
+ }
1639
+ function matchesClickHintTokens(el, hint) {
1640
+ const tokenize = (s) => (s.toLowerCase().match(/[a-z0-9]+/g) ?? []).filter((t) => t.length >= 3);
1641
+ const want = tokenize(hint);
1642
+ if (want.length === 0)
1643
+ return false;
1644
+ const have = new Set([
1645
+ ...tokenize(el.visibleText ?? ""),
1646
+ ...tokenize(el.ariaLabel ?? ""),
1647
+ ]);
1648
+ return want.every((t) => have.has(t));
1649
+ }
1462
1650
  // 2026-06-07 — href-tail match for nav-link clicks. The synthesizer
1463
1651
  // records a link target's href path (href_hint); on replay the link's
1464
1652
  // visible text may render as an icon and its URL's leading workspace/org
@@ -1560,6 +1748,28 @@ function matchesLabelHint(el, hint) {
1560
1748
  return true;
1561
1749
  return false;
1562
1750
  }
1751
+ const LABEL_STOPWORDS = new Set([
1752
+ "your", "the", "for", "and", "please", "enter", "field", "input", "this",
1753
+ ]);
1754
+ // Fuzzy label match for a fill/select whose captured label_hint is a verbose
1755
+ // gloss that doesn't exact/substring-match the live control. anthropic's skill
1756
+ // captured "Name your key:" but the live input is labeled "Name" — the exact
1757
+ // matcher missed it, the field was wrongly skipped as absent, and the form's
1758
+ // submit stayed disabled (precondition unmet). Matches on SIGNIFICANT-token
1759
+ // overlap (len>=3, minus stopwords) between the hint and the element's
1760
+ // label/placeholder/aria/name — so "Name your key:" overlaps "Name" / "Key
1761
+ // name" but NOT a "Search" box. Last-resort + unique-match-only (call site),
1762
+ // so it can't fill the wrong control on a multi-input form.
1763
+ function significantTokens(s) {
1764
+ return (s.toLowerCase().match(/[a-z0-9]+/g) ?? []).filter((t) => t.length >= 3 && !LABEL_STOPWORDS.has(t));
1765
+ }
1766
+ function matchesLabelHintFuzzy(el, hint) {
1767
+ const want = new Set(significantTokens(hint));
1768
+ if (want.size === 0)
1769
+ return false;
1770
+ const have = significantTokens(`${el.labelText ?? ""} ${el.placeholder ?? ""} ${el.ariaLabel ?? ""} ${el.name ?? ""}`);
1771
+ return have.some((t) => want.has(t));
1772
+ }
1563
1773
  function isRuntimeId(id) {
1564
1774
  // Mirror promote-to-skill.ts:looksLikeRuntimeId. Inline here to keep
1565
1775
  // the replay engine self-contained — the patterns rarely change and
@@ -1783,6 +1993,20 @@ function isSkippableAbsentFill(step, validationReason, stepIndex, steps) {
1783
1993
  return false;
1784
1994
  return hasLaterCredentialStep(steps, stepIndex);
1785
1995
  }
1996
+ // True when an absent onboarding SELECT is safe to skip — the <select> dropdown
1997
+ // equivalent of isSkippableAbsentFill. Wizard selects (porter "Role", railway
1998
+ // "Workspace") only exist for a brand-new account; on a returning-user replay
1999
+ // the onboarding form is gone and preValidateStep reports "No select matches…".
2000
+ // A present-but-unresolvable select is genuine rot and must NOT skip; only a
2001
+ // wholly-absent one is skippable, and only when a later step still yields a
2002
+ // credential (the validator at the extract step is the real backstop).
2003
+ function isSkippableAbsentSelect(step, validationReason, stepIndex, steps) {
2004
+ if (step.kind !== "select")
2005
+ return false;
2006
+ if (!/no select matches/i.test(validationReason))
2007
+ return false;
2008
+ return hasLaterCredentialStep(steps, stepIndex);
2009
+ }
1786
2010
  // Does the recipe still reach a credential after stepIndex — a later
1787
2011
  // extract step, or the credential-creating click still ahead?
1788
2012
  function hasLaterCredentialStep(steps, stepIndex) {
@@ -1791,7 +2015,8 @@ function hasLaterCredentialStep(steps, stepIndex) {
1791
2015
  if (k === "extract_via_copy_button" ||
1792
2016
  k === "extract_via_regex" ||
1793
2017
  k === "extract_via_copy_button_named" ||
1794
- k === "extract_via_regex_named") {
2018
+ k === "extract_via_regex_named" ||
2019
+ k === "extract_labeled") {
1795
2020
  return true;
1796
2021
  }
1797
2022
  }
@@ -1847,6 +2072,21 @@ const OAUTH_PROVIDER_HOSTS = new Set([
1847
2072
  "auth0.com",
1848
2073
  "login.microsoftonline.com",
1849
2074
  ]);
2075
+ // A service's OWN auth/login host — the FIRST hop when the replay session has
2076
+ // expired (porter's dashboard.porter.run → auth.porter.run). Distinct from
2077
+ // OAUTH_PROVIDER_HOSTS (the social IdPs): this is the service bouncing us to
2078
+ // log in, not the IdP handshake. Matches an auth-shaped subdomain
2079
+ // (auth./login./accounts./signin./sso./id.) or a hosted-auth vendor
2080
+ // (WorkOS/Auth0/Okta/Clerk/Stytch). Without this, detectNavigationDrift
2081
+ // returned null for auth.porter.run, so replay marched through its steps ON
2082
+ // the login page and failed at the cred-click with a misleading "nav
2083
+ // divergence" reason instead of the real cause (session not present).
2084
+ function looksLikeAuthHost(hostname) {
2085
+ const h = hostname.toLowerCase();
2086
+ if (/^(auth|login|accounts|signin|sign-in|sso|id)\./.test(h))
2087
+ return true;
2088
+ return /(^|\.)(workos|auth0|okta|clerk|stytch|onelogin|duosecurity)\.(com|io|dev|app)$/.test(h);
2089
+ }
1850
2090
  // Returns null when the current URL is consistent with the requested
1851
2091
  // URL (same origin, no login-path redirect). Returns a short reason
1852
2092
  // string when drift is detected. Exported for unit tests.
@@ -1867,6 +2107,14 @@ export function detectNavigationDrift(currentUrl, expectedUrl) {
1867
2107
  OAUTH_PROVIDER_HOSTS.has(cur.hostname)) {
1868
2108
  return `redirected to OAuth provider ${cur.hostname}`;
1869
2109
  }
2110
+ // Cross-domain landing on the service's OWN auth host (auth.porter.run,
2111
+ // a WorkOS/Auth0/etc. tenant) — the session expired, so we got bounced to
2112
+ // log in. Classify as drift so attemptOAuthRecovery can re-auth via the
2113
+ // cached provider session (or, failing that, return needs_login) instead of
2114
+ // replaying the skill onto the login page.
2115
+ if (cur.hostname !== exp.hostname && looksLikeAuthHost(cur.hostname)) {
2116
+ return `redirected to login host ${cur.hostname} (session expired / not authenticated)`;
2117
+ }
1870
2118
  // Same-origin redirect to a login-shaped path — covers Railway's
1871
2119
  // /login fallback when /account/tokens is hit unauthenticated.
1872
2120
  if (cur.hostname === exp.hostname && cur.pathname !== exp.pathname) {
@@ -1945,11 +2193,13 @@ async function clickConsentAffordance(browser) {
1945
2193
  const page = pageOf(browser);
1946
2194
  if (page === null)
1947
2195
  return false;
1948
- const name = /^(continue|allow|authorize|approve|accept|agree|i agree)$/i;
2196
+ // Exact-name match first (safest): the approve control's accessible name IS
2197
+ // just the verb.
2198
+ const exact = /^(continue|allow|authorize|approve|accept|agree|i agree)$/i;
1949
2199
  for (const role of ["button", "link"]) {
1950
2200
  try {
1951
- const loc = page.getByRole(role, { name }).first();
1952
- await loc.waitFor({ state: "visible", timeout: 2000 });
2201
+ const loc = page.getByRole(role, { name: exact }).first();
2202
+ await loc.waitFor({ state: "visible", timeout: 3000 });
1953
2203
  await loc.click({ timeout: 3000 });
1954
2204
  return true;
1955
2205
  }
@@ -1957,6 +2207,45 @@ async function clickConsentAffordance(browser) {
1957
2207
  /* try the next role */
1958
2208
  }
1959
2209
  }
2210
+ // Fallback: Google's modern consent button carries extra accessible-name
2211
+ // text ("Continue", "Continue to kinde", a nested span) that the exact match
2212
+ // misses — so kinde/imagekit reached state=consent but this returned false
2213
+ // and the verifier bailed needs_login. Match an approve verb at the START of
2214
+ // the name, and explicitly skip negatives ("Cancel", "Don't allow", "Back").
2215
+ const approve = /^(continue|allow|authorize|approve|accept|agree)/i;
2216
+ const negative = /(cancel|deny|don'?t\s*allow|no\s*thanks|go\s*back|^back$|reject)/i;
2217
+ for (const role of ["button", "link"]) {
2218
+ const loc = page.getByRole(role, { name: approve });
2219
+ const count = await loc.count().catch(() => 0);
2220
+ for (let i = 0; i < count; i++) {
2221
+ const el = loc.nth(i);
2222
+ const txt = ((await el.textContent().catch(() => "")) ?? "").trim();
2223
+ if (negative.test(txt))
2224
+ continue;
2225
+ try {
2226
+ await el.waitFor({ state: "visible", timeout: 2000 });
2227
+ await el.click({ timeout: 3000 });
2228
+ return true;
2229
+ }
2230
+ catch {
2231
+ /* next candidate */
2232
+ }
2233
+ }
2234
+ }
2235
+ if (process.env.REPLAY_DEBUG) {
2236
+ try {
2237
+ const btns = await page
2238
+ .getByRole("button")
2239
+ .all()
2240
+ .then((ls) => Promise.all(ls.slice(0, 25).map((l) => l.textContent().catch(() => ""))))
2241
+ .catch(() => []);
2242
+ writeFileSync(`/tmp/replay-consent-buttons.txt`, `url=${page.url()}\nbuttons=${JSON.stringify(btns)}`);
2243
+ console.error(`[replay-oauth-debug] consent affordance not found — dumped /tmp/replay-consent-buttons.txt`);
2244
+ }
2245
+ catch {
2246
+ /* best-effort */
2247
+ }
2248
+ }
1960
2249
  return false;
1961
2250
  }
1962
2251
  // Deterministically walk the provider's account-chooser + consent screens
@@ -2003,24 +2292,54 @@ export async function walkOAuthConsent(browser, providerId) {
2003
2292
  console.error(`[replay-oauth] state=${state} url=${url.slice(0, 100)}`);
2004
2293
  if (state === "not_provider")
2005
2294
  return "ok"; // flow left the provider
2006
- if (state === "challenge" || state === "needs_login")
2295
+ if (state === "challenge" || state === "needs_login") {
2296
+ if (process.env.REPLAY_DEBUG) {
2297
+ try {
2298
+ writeFileSync(`/tmp/replay-oauth-${providerId}-${state}.txt`, `url=${url}\n\n${body}`);
2299
+ console.error(`[replay-oauth-debug] dumped /tmp/replay-oauth-${providerId}-${state}.txt`);
2300
+ }
2301
+ catch {
2302
+ // best-effort
2303
+ }
2304
+ }
2007
2305
  return "needs_login";
2306
+ }
2008
2307
  // state === "consent": scope-gate it. Only auto-approve identity-basic
2009
2308
  // scopes — verify must never grant a sensitive scope blind.
2010
2309
  const scopes = extractOAuthScopes(url);
2310
+ // GitHub sensitive-scope phrases — repo/org/write/admin access. A consent
2311
+ // showing NONE is identity-basic (login). pusher's 2nd github consent
2312
+ // screen carries no scope= param (extractOAuthScopes → null), so without a
2313
+ // DOM fallback github fell straight to "not basic" and bailed.
2314
+ const githubSensitive = /\b(repositor|organization|act on your behalf|write|delete|admin|workflow|manage|gist|webhook|deploy)/i.test(body);
2011
2315
  const basic = scopes !== null
2012
2316
  ? provider.scopesAreBasic(scopes)
2013
- : // Google hides scopes behind an opaque part= token; fall back to
2014
- // the visible DOM — basic only when NO scope-grant phrases show.
2015
- providerId === "google" && scrapeGoogleScopePhrases(body).length === 0;
2317
+ : // Scopes unreadable from the URL fall back to the visible DOM.
2318
+ // Basic only when NO scope-grant phrases show (mirrors per-provider).
2319
+ providerId === "google"
2320
+ ? scrapeGoogleScopePhrases(body).length === 0
2321
+ : providerId === "github"
2322
+ ? !githubSensitive
2323
+ : false;
2016
2324
  if (!basic) {
2017
2325
  console.error("[replay-oauth] consent scopes not basic/unreadable — needs_login");
2018
2326
  return "needs_login";
2019
2327
  }
2020
2328
  const beforeUrl = browser.currentUrl();
2021
2329
  const clicked = await clickConsentAffordance(browser);
2022
- if (!clicked)
2023
- return "needs_login";
2330
+ if (!clicked) {
2331
+ // The consent may be auto-completing and navigating away before we can
2332
+ // click — Google's GIS flow (kinde/imagekit) redirects the consent to
2333
+ // /gsi/transform on its own for basic, previously-seen scopes, and the
2334
+ // popup then closes. Don't bail needs_login on a flow that's finishing:
2335
+ // wait a beat, then let the loop re-evaluate (oauthPageClosed /
2336
+ // not_provider → ok). If it's genuinely stuck on the consent, the loop
2337
+ // retries the click, bounded by MAX_NAV before the final needs_login.
2338
+ for (let w = 0; w < 6 && browser.currentUrl() === beforeUrl && !browser.oauthPageClosed(); w++) {
2339
+ await browser.wait(1);
2340
+ }
2341
+ continue;
2342
+ }
2024
2343
  // Same race as the chooser: the approve click navigates after a beat.
2025
2344
  // Wait for the URL to move before re-reading, or the next pass sees the
2026
2345
  // same consent URL, finds the affordance already consumed, and bails.
@@ -2032,10 +2351,15 @@ export async function walkOAuthConsent(browser, providerId) {
2032
2351
  return browser.oauthPageClosed() ? "ok" : "needs_login";
2033
2352
  }
2034
2353
  async function attemptOAuthRecovery(browser, expectedUrl) {
2035
- const profiles = loggedInProviders();
2036
- if (profiles.length === 0) {
2354
+ const rawProfiles = loggedInProviders();
2355
+ if (rawProfiles.length === 0) {
2037
2356
  return { kind: "needs_login", provider: "google" };
2038
2357
  }
2358
+ // Prefer Google over GitHub when a service offers both. GitHub OAuth
2359
+ // callbacks are rejected by more anti-bot services (pusher bounces a
2360
+ // github sign-in back to /accounts/sign_in with no session, while the
2361
+ // google round-trip completes). Try the more-reliable provider first.
2362
+ const profiles = [...rawProfiles].sort((a, b) => a === "google" ? -1 : b === "google" ? 1 : 0);
2039
2363
  // Find an OAuth button matching a provider we have a cached session for.
2040
2364
  // Retry: SPA login pages (posthog, kinde) render the OAuth buttons a beat
2041
2365
  // after domcontentloaded, so a single inventory races them → false
@@ -2059,6 +2383,19 @@ async function attemptOAuthRecovery(browser, expectedUrl) {
2059
2383
  // The page may genuinely be a non-OAuth login form (some services
2060
2384
  // also offer password auth). The replay can't synthesize a
2061
2385
  // password; surface needs_login with a guess based on the URL.
2386
+ if (process.env.REPLAY_DEBUG) {
2387
+ try {
2388
+ const inv = await browser.extractInteractiveElements();
2389
+ const clickable = inv
2390
+ .filter((e) => e.visible && (e.tag === "button" || e.tag === "a" || e.role === "button"))
2391
+ .map((e) => ({ tag: e.tag, text: (e.visibleText ?? "").slice(0, 40), aria: e.ariaLabel, href: (e.href ?? "").slice(0, 60) }));
2392
+ writeFileSync(`/tmp/replay-nobutton-${browser.currentUrl().replace(/[^a-z0-9]+/gi, "_").slice(-30)}.txt`, `url=${browser.currentUrl()}\nprofiles=${JSON.stringify(profiles)}\nclickable=${JSON.stringify(clickable, null, 1)}`);
2393
+ console.error(`[replay-oauth-debug] no OAuth button — dumped page affordances`);
2394
+ }
2395
+ catch {
2396
+ /* best-effort */
2397
+ }
2398
+ }
2062
2399
  const guess = inferProviderFromUrl(browser.currentUrl()) ?? "google";
2063
2400
  return { kind: "needs_login", provider: guess };
2064
2401
  }
@@ -2068,6 +2405,7 @@ async function attemptOAuthRecovery(browser, expectedUrl) {
2068
2405
  await browser.startOAuth(pickedButton.selector);
2069
2406
  const walk = await walkOAuthConsent(browser, pickedProvider);
2070
2407
  if (walk === "needs_login") {
2408
+ await browser.settleAfterOAuth().catch(() => undefined);
2071
2409
  return { kind: "needs_login", provider: pickedProvider };
2072
2410
  }
2073
2411
  // Confirm we're back: poll for the round-trip, then re-navigate to the
@@ -2089,6 +2427,12 @@ async function attemptOAuthRecovery(browser, expectedUrl) {
2089
2427
  if (host === expectedHost)
2090
2428
  break;
2091
2429
  }
2430
+ // Restore this.page to the product page. The GIS popup flow (kinde/imagekit)
2431
+ // closes the OAuth popup on its own; without this, this.page stays the CLOSED
2432
+ // popup and the re-navigate below throws "Target page has been closed". Only
2433
+ // the discovery bot called settleAfterOAuth before — the replay recovery
2434
+ // never did, so every popup-based OAuth crashed here.
2435
+ await browser.settleAfterOAuth().catch(() => undefined);
2092
2436
  await browser.goto(expectedUrl);
2093
2437
  await browser.wait(2);
2094
2438
  const drift = detectNavigationDrift(browser.currentUrl(), expectedUrl);