@trusty-squire/mcp 0.8.2-rc.9 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,7 @@
45
45
  import { appendFileSync, mkdirSync } from "node:fs";
46
46
  import { join } from "node:path";
47
47
  import { loggedInProviders } from "./login-state.js";
48
- import { isTruncatedCapture, extractApiKeyFromText } from "./agent.js";
48
+ import { isTruncatedCapture, extractApiKeyFromText, findOAuthButton } from "./agent.js";
49
49
  // ── Entry point ──────────────────────────────────────────────────────
50
50
  export async function replaySkill(input) {
51
51
  const { skill, browser } = input;
@@ -53,14 +53,25 @@ export async function replaySkill(input) {
53
53
  const candidatesDir = input.candidatesDir;
54
54
  const llmFallback = input.llmFallback;
55
55
  const templateValues = input.templateValues ?? {};
56
- // Router-level guard: a demoted or pending-review skill is not
57
- // replay-eligible. The router should have filtered these out, but
58
- // we double-check at the boundary in case something hand-feeds us
59
- // a skill record from a stale cache.
60
- if (skill.status !== "active") {
56
+ // Router-level guard: a demoted, pending-review, or superseded
57
+ // skill is not replay-eligible for end-user provisions. The router
58
+ // should have filtered these out, but we double-check at the
59
+ // boundary in case something hand-feeds us a skill record from a
60
+ // stale cache.
61
+ //
62
+ // The verifier loop bypasses this guard via bypassStatusGuard=true
63
+ // (set by housekeeper-loop on the verifier queue) so it can gather
64
+ // replay outcomes that drive promote/demote transitions. Even with
65
+ // bypass, `superseded` stays gated — a newer version is canonical
66
+ // and replaying the older one is wasted effort.
67
+ const bypass = input.bypassStatusGuard === true;
68
+ const guardBlocks = skill.status === "superseded" || (!bypass && skill.status !== "active");
69
+ if (guardBlocks) {
61
70
  return {
62
71
  kind: "skill_demoted",
63
- reason: `Skill status is ${skill.status}; replay is only valid for status=active.`,
72
+ reason: bypass
73
+ ? `Skill status is ${skill.status}; verifier replay still rejects superseded versions.`
74
+ : `Skill status is ${skill.status}; replay is only valid for status=active.`,
64
75
  };
65
76
  }
66
77
  // Walk the step graph. Dry mode stops before the last action that
@@ -395,7 +406,15 @@ async function preValidateStep(step, browser, templateValues) {
395
406
  const inventory = await browser.extractInteractiveElements();
396
407
  const copyButtons = inventory.filter(isCopyButton);
397
408
  if (copyButtons.length === 0) {
398
- return { ok: false, reason: "No Copy button visible on page." };
409
+ // 0.8.2-rc.22 pre-validation no longer hard-fails when the
410
+ // Copy button is missing. The executor's text-extraction
411
+ // fallback (extractCredentialCandidates + body-text regex +
412
+ // validator-blind tier) can still recover the credential when
413
+ // it's rendered on the page without a Copy affordance.
414
+ // Architecturally: pre-validation ranges over "is this step
415
+ // attempt-able"; the executor decides if attempt-able means
416
+ // "click and read" or "scan page text and validate."
417
+ return { ok: true };
399
418
  }
400
419
  if (copyButtons.length === 1) {
401
420
  return { ok: true, match: copyButtons[0] };
@@ -471,13 +490,46 @@ async function preValidateStep(step, browser, templateValues) {
471
490
  }
472
491
  async function executeStep(step, browser, templateValues, skill) {
473
492
  switch (step.kind) {
474
- case "navigate":
493
+ case "navigate": {
475
494
  await browser.goto(step.url);
476
495
  // Tiny settle for SPA-style apps that fire route handlers
477
496
  // post-DOMContentLoaded. The bot's runPrewarm waits 2s
478
497
  // post-navigate too.
479
498
  await browser.wait(2);
499
+ // 0.8.2-rc.22 — URL drift detection. When a skill's signup_url
500
+ // assumes the user is authenticated (Railway's /account/tokens
501
+ // captured after OAuth was done in a prior session), the
502
+ // unauthenticated bot lands on a login page instead. Downstream
503
+ // label_hint resolution then matches login-page elements that
504
+ // coincidentally share names with the captured page ("Name"
505
+ // input, "Workspace" select, "Create" button — all common on
506
+ // signup OR login forms), producing false-positive step
507
+ // successes. The replay then fails at the LAST step ("No Copy
508
+ // button visible") with a misleading reason. Catch the drift at
509
+ // step 0 so the verifier reports the real cause: this skill
510
+ // needs an OAuth step it doesn't have.
511
+ const landedUrl = browser.currentUrl();
512
+ const driftReason = detectNavigationDrift(landedUrl, step.url);
513
+ if (driftReason !== null) {
514
+ // 0.8.2-rc.22 — drive the OAuth handshake. Captured skills
515
+ // for OAuth-protected services (Railway, Sentry, etc.) often
516
+ // assume an authenticated session because the original capture
517
+ // was recorded in a profile that already had OAuth cookies.
518
+ // At replay time the persistent profile usually has the same
519
+ // cookies (subsequent OAuth round-trips through the provider
520
+ // auto-approve from the cached session). Click the OAuth
521
+ // button, wait for the round-trip to complete, re-navigate to
522
+ // the expected URL, and continue. Only bail to needs_login
523
+ // when no OAuth path is recoverable (no provider session, no
524
+ // OAuth button on the page).
525
+ const recovered = await attemptOAuthRecovery(browser, step.url);
526
+ if (recovered.kind === "ok") {
527
+ return { kind: "navigated" };
528
+ }
529
+ return { kind: "needs_login", provider: recovered.provider };
530
+ }
480
531
  return { kind: "navigated" };
532
+ }
481
533
  case "click_oauth_button": {
482
534
  // Profile-session guard. If the user hasn't run `mcp login` for
483
535
  // this provider, the click would still happen but we'd land on
@@ -553,7 +605,16 @@ async function executeStep(step, browser, templateValues, skill) {
553
605
  // 0.8.2-rc.3 — apply near_text_hint filter when present so
554
606
  // Sentry-grid rows land on the right <select>. The original
555
607
  // `inventory.find` would unilaterally pick the first match.
556
- const allMatches = inventory.filter((el) => matchesLabelHint(el, step.label_hint));
608
+ //
609
+ // 0.8.2-rc.21 — also restrict to fillable elements (input /
610
+ // textarea / select). Without this, a Railway-class form where
611
+ // a `<label for="select-X">` shares labelText with its
612
+ // `<select id="select-X">` would silently pick the label —
613
+ // and selectOption(label, …) would then route into the
614
+ // combobox path and fail because native selects don't reveal
615
+ // options via DOM patterns. Pre-validation already filters
616
+ // this way; the executor was lagging.
617
+ const allMatches = inventory.filter((el) => isFillable(el) && matchesLabelHint(el, step.label_hint));
557
618
  if (allMatches.length === 0) {
558
619
  throw new Error(`No select matches label_hint=${step.label_hint}`);
559
620
  }
@@ -569,23 +630,64 @@ async function executeStep(step, browser, templateValues, skill) {
569
630
  return { kind: "selected" };
570
631
  }
571
632
  case "extract_via_copy_button": {
572
- const inventory = await browser.extractInteractiveElements();
573
- const copyButtons = inventory.filter(isCopyButton);
574
- const target = copyButtons.length === 1
633
+ // 0.8.2-rc.22 poll for the Copy button OR a validator-passing
634
+ // candidate to appear, up to 8s. The captured skill assumes the
635
+ // post-Create UI renders synchronously, but services like
636
+ // Railway take 1-3s to paint the new-token row. Pre-rc.22 the
637
+ // executor ran a single inventory inspection and gave up; that
638
+ // cost us every replay where the credential needed a beat to
639
+ // appear.
640
+ //
641
+ // Loop exits on whichever happens first:
642
+ // (a) target Copy button materialises → break, click + run
643
+ // the normal extraction tiers.
644
+ // (b) a credential-shaped candidate appears in
645
+ // extractCredentialCandidates that satisfies the skill's
646
+ // post_extract_validator → return it directly without
647
+ // needing a Copy click.
648
+ // If neither shows up in 8s, fall through to the existing
649
+ // candidate/body/clipboard/fallback chain with the LAST polled
650
+ // inventory + emptiness, ending in the diagnostic throw.
651
+ const fallbackValidatorPoll = skill.credentials[0]?.post_extract_validator;
652
+ const pollDeadline = Date.now() + 8000;
653
+ let inventory = await browser.extractInteractiveElements();
654
+ let copyButtons = inventory.filter(isCopyButton);
655
+ let target = copyButtons.length === 1
575
656
  ? copyButtons[0]
576
657
  : copyButtons.find((btn) => nearTextHintMatches(btn, step.near_text_hint, inventory));
577
- if (target === undefined) {
578
- throw new Error("Copy button disappeared between pre-validation and execution.");
658
+ while (target === undefined && Date.now() < pollDeadline) {
659
+ // Bail-on-found: a validator-passing candidate appearing first
660
+ // is the credential. We don't need the Copy button anymore.
661
+ if (fallbackValidatorPoll !== undefined) {
662
+ try {
663
+ const polled = await browser.extractCredentialCandidates();
664
+ for (const cand of polled) {
665
+ if (cand.length < fallbackValidatorPoll.min_length)
666
+ continue;
667
+ if (cand.length > fallbackValidatorPoll.max_length)
668
+ continue;
669
+ if (!/\d/.test(cand))
670
+ continue;
671
+ if (!/^[a-zA-Z0-9_\-]+$/.test(cand))
672
+ continue;
673
+ return { kind: "extract_ok", value: cand, via: "copy_button" };
674
+ }
675
+ }
676
+ catch {
677
+ // Non-fatal — fall through to next poll tick.
678
+ }
679
+ }
680
+ await browser.wait(0.5);
681
+ inventory = await browser.extractInteractiveElements();
682
+ copyButtons = inventory.filter(isCopyButton);
683
+ target = copyButtons.length === 1
684
+ ? copyButtons[0]
685
+ : copyButtons.find((btn) => nearTextHintMatches(btn, step.near_text_hint, inventory));
686
+ }
687
+ if (target !== undefined) {
688
+ await browser.click(target.selector);
689
+ await browser.wait(1);
579
690
  }
580
- // Click the Copy button. The bot already does this in
581
- // tryCopyButtonExtraction; we mirror the contract: click, brief
582
- // wait, then read navigator.clipboard.readText() via the page
583
- // context. clipboardText() on BrowserController would be ideal
584
- // but doesn't exist yet — we use page.evaluate via the
585
- // extractCredentialCandidates pathway, falling back to text
586
- // scan if clipboard access is denied.
587
- await browser.click(target.selector);
588
- await browser.wait(1);
589
691
  // BrowserController.extractCredentialCandidates pulls visible
590
692
  // candidates (input values + direct text); it does NOT read the
591
693
  // clipboard yet. We use it as the primary source and fall back
@@ -642,7 +744,44 @@ async function executeStep(step, browser, templateValues, skill) {
642
744
  // Clipboard read failed (permission denied, no clipboard
643
745
  // contents). Fall through to the canonical error.
644
746
  }
645
- throw new Error("Copy button clicked but no credential matched the regex library in candidates, body text, or clipboard.");
747
+ // 0.8.2-rc.22 validator-filtered candidate scan. Mirrors the
748
+ // identical tier in `extract_via_regex` so that copy_button
749
+ // steps can recover when (a) the Copy button isn't on the
750
+ // page at all (replay reached this step without a Copy
751
+ // affordance — Railway-class pages where the token renders
752
+ // inline) or (b) the click + clipboard contract didn't yield
753
+ // a recognised prefix but a credential-shaped string IS
754
+ // sitting on the page.
755
+ const fallbackValidator = skill.credentials[0]?.post_extract_validator;
756
+ if (fallbackValidator !== undefined) {
757
+ try {
758
+ const cands = await browser.extractCredentialCandidates();
759
+ for (const cand of cands) {
760
+ if (cand.length < fallbackValidator.min_length)
761
+ continue;
762
+ if (cand.length > fallbackValidator.max_length)
763
+ continue;
764
+ if (!/\d/.test(cand))
765
+ continue;
766
+ if (!/^[a-zA-Z0-9_\-]+$/.test(cand))
767
+ continue;
768
+ return { kind: "extract_ok", value: cand, via: "copy_button" };
769
+ }
770
+ }
771
+ catch {
772
+ // Fall through to the canonical error below.
773
+ }
774
+ }
775
+ // Diagnostic context — keeps a short trail of "what did the bot
776
+ // see when extract failed" so we can iterate without re-running.
777
+ // url + inventory.length is enough to triage 90% of cases; full
778
+ // snapshots would require a new sink and aren't worth the
779
+ // complexity here.
780
+ const diag = ` [url=${browser.currentUrl()} inventory=${inventory.length} copyButtons=${copyButtons.length}]`;
781
+ const failureReason = target === undefined
782
+ ? `No Copy button on page and no credential-shaped string passed the validator.${diag}`
783
+ : `Copy button clicked but no credential matched the regex library in candidates, body text, or clipboard.${diag}`;
784
+ throw new Error(failureReason);
646
785
  }
647
786
  case "extract_via_regex": {
648
787
  // rc.18 — poll the page text for the credential. The previous
@@ -728,6 +867,50 @@ async function executeStep(step, browser, templateValues, skill) {
728
867
  // Fall through to the canonical error below.
729
868
  }
730
869
  }
870
+ // 0.8.2-rc.21 — validator-blind last-resort tier for uuid_token.
871
+ // The synthesizer's `uuid_token` is its FALLBACK pattern when no
872
+ // recognised prefix matches the captured HTML. inferShapeHint
873
+ // then sets the validator to {36, 36} if ANY uuid-shaped string
874
+ // appears on the page — even an unrelated request/session ID.
875
+ // On IPInfo's dashboard the actual API key is a bare 14-char
876
+ // hex string in a <code> element AND the HTML also contains
877
+ // an unrelated 36-char tracking UUID, so the validator above
878
+ // narrows to 36/36 and the real 14-char value is filtered out.
879
+ // This tier fires only when:
880
+ // - the captured pattern was the fallback uuid_token (so we
881
+ // KNOW the synthesizer guessed about the shape — never for
882
+ // prefix-anchored patterns like sk-or-v1-, re_, etc.)
883
+ // - every prior tier (labeled regex, UUID poll, copy-button
884
+ // colocation, validator-filtered candidate scan) failed
885
+ // Scans structural <code>/<pre>/<kbd>/<samp>-class candidates
886
+ // (extractCredentialCandidates filters to these explicitly so
887
+ // page chrome / nav strings don't appear here) with a wider
888
+ // 8-128 char range, digit-required, alphanumeric-only. The
889
+ // registry's post_extract_validator runs downstream and rejects
890
+ // shapes that don't satisfy the credential's published shape,
891
+ // so a false-positive surfaces as a validator-reject rather
892
+ // than a published bad credential.
893
+ if (step.pattern_name === "uuid_token") {
894
+ try {
895
+ const candidates = await browser.extractCredentialCandidates();
896
+ for (const cand of candidates) {
897
+ if (cand.length < 8 || cand.length > 128)
898
+ continue;
899
+ if (!/\d/.test(cand))
900
+ continue;
901
+ if (!/^[a-zA-Z0-9_\-]+$/.test(cand))
902
+ continue;
903
+ // Skip values that look like a URL/path/route — those
904
+ // show up in <code> blocks for documentation snippets.
905
+ if (cand.includes("/") || cand.includes("."))
906
+ continue;
907
+ return { kind: "extract_ok", value: cand, via: "regex" };
908
+ }
909
+ }
910
+ catch {
911
+ // Fall through to the canonical error below.
912
+ }
913
+ }
731
914
  throw new Error(`No credential matching pattern ${step.pattern_name} found on page.`);
732
915
  }
733
916
  // Multi-cred extract: mirrors the single-cred copy_button executor
@@ -1202,4 +1385,150 @@ async function preValidateAllExtractsInDryMode(steps, dryStopAt, browser, templa
1202
1385
  }
1203
1386
  return null;
1204
1387
  }
1388
+ // ── URL-drift detection (0.8.2-rc.22) ────────────────────────────────
1389
+ // Patterns that indicate the bot landed on a login/auth page instead
1390
+ // of the expected target. Catches:
1391
+ // - same-domain redirects to /login, /signin, /signup, /auth/*
1392
+ // - cross-domain redirects to known OAuth providers
1393
+ // - Railway's specific /login pattern
1394
+ // False-positive risk is low: signup pages with "/login" in the path
1395
+ // are rare and usually intentional (e.g., the form lives at the
1396
+ // `signup_url` itself), so a redirect that ends up on a path matching
1397
+ // these patterns is overwhelmingly a real auth wall.
1398
+ const LOGIN_PATH_RE = /\/(login|signin|sign[-_]in|auth|sso)(?:[/?#]|$)/i;
1399
+ const OAUTH_PROVIDER_HOSTS = new Set([
1400
+ "accounts.google.com",
1401
+ "github.com",
1402
+ "auth0.com",
1403
+ "login.microsoftonline.com",
1404
+ ]);
1405
+ // Returns null when the current URL is consistent with the requested
1406
+ // URL (same origin, no login-path redirect). Returns a short reason
1407
+ // string when drift is detected. Exported for unit tests.
1408
+ export function detectNavigationDrift(currentUrl, expectedUrl) {
1409
+ let cur;
1410
+ let exp;
1411
+ try {
1412
+ cur = new URL(currentUrl);
1413
+ exp = new URL(expectedUrl);
1414
+ }
1415
+ catch {
1416
+ // If either URL is unparseable, don't claim drift — the caller's
1417
+ // next step will fail with a clearer error.
1418
+ return null;
1419
+ }
1420
+ // Cross-domain landing on a known OAuth provider — unambiguous.
1421
+ if (cur.hostname !== exp.hostname &&
1422
+ OAUTH_PROVIDER_HOSTS.has(cur.hostname)) {
1423
+ return `redirected to OAuth provider ${cur.hostname}`;
1424
+ }
1425
+ // Same-origin redirect to a login-shaped path — covers Railway's
1426
+ // /login fallback when /account/tokens is hit unauthenticated.
1427
+ if (cur.hostname === exp.hostname && cur.pathname !== exp.pathname) {
1428
+ if (LOGIN_PATH_RE.test(cur.pathname)) {
1429
+ return `same-origin redirect to login path ${cur.pathname}`;
1430
+ }
1431
+ }
1432
+ return null;
1433
+ }
1434
+ export function inferProviderFromUrl(url) {
1435
+ try {
1436
+ const u = new URL(url);
1437
+ if (/^(?:.+\.)?google\.com$/i.test(u.hostname))
1438
+ return "google";
1439
+ if (/^(?:.+\.)?github\.com$/i.test(u.hostname))
1440
+ return "github";
1441
+ }
1442
+ catch {
1443
+ /* ignore */
1444
+ }
1445
+ return null;
1446
+ }
1447
+ // ── OAuth recovery during replay (0.8.2-rc.22) ───────────────────────
1448
+ // When a navigate step lands on a login page (URL drift detected),
1449
+ // the replay engine attempts to drive the OAuth handshake using the
1450
+ // bot's persistent profile's cached session cookies. This is the
1451
+ // non-failing path for skills captured against authenticated services
1452
+ // — Railway, Sentry, Anthropic, etc. — whose synthesizer didn't emit
1453
+ // an explicit `click_oauth_button` step because the original signup
1454
+ // rode an existing browser session.
1455
+ //
1456
+ // Recovery succeeds (returns ok) when:
1457
+ // - the current page has an OAuth button matching one of the
1458
+ // profile's logged-in providers
1459
+ // - clicking the button + waiting for the round-trip leaves the
1460
+ // bot back on the expected service domain
1461
+ // - re-navigating to the expected URL doesn't drift again
1462
+ //
1463
+ // Otherwise returns needs_login with the best-guess provider so the
1464
+ // caller surfaces a real "give the user a way to log in" signal.
1465
+ //
1466
+ // Cookie-driven OAuth typically completes in 2-5s end-to-end (provider
1467
+ // auto-approves from the cached session). 30s budget covers slower
1468
+ // providers + the rare "show the account chooser" interstitial. If the
1469
+ // provider demands real user interaction (2FA challenge, missing-scope
1470
+ // consent), the budget will tick down without resolution and we bail
1471
+ // to needs_login — that's the "laws of physics" boundary: a verifier
1472
+ // process running without a human can't complete a challenge.
1473
+ async function attemptOAuthRecovery(browser, expectedUrl) {
1474
+ const profiles = loggedInProviders();
1475
+ if (profiles.length === 0) {
1476
+ return { kind: "needs_login", provider: "google" };
1477
+ }
1478
+ // Inventory once. Look for an OAuth button matching any provider
1479
+ // we have a cached session for. Prefer the first match in profile
1480
+ // order so a Google-first user doesn't end up bound to GitHub on
1481
+ // services that offer both.
1482
+ const inventory = await browser.extractInteractiveElements();
1483
+ let pickedProvider = null;
1484
+ let pickedButton = null;
1485
+ for (const p of profiles) {
1486
+ const btn = findOAuthButton(inventory, p);
1487
+ if (btn !== null) {
1488
+ pickedProvider = p;
1489
+ pickedButton = btn;
1490
+ break;
1491
+ }
1492
+ }
1493
+ if (pickedProvider === null || pickedButton === null) {
1494
+ // The page may genuinely be a non-OAuth login form (some services
1495
+ // also offer password auth). The replay can't synthesize a
1496
+ // password; surface needs_login with a guess based on the URL.
1497
+ const guess = inferProviderFromUrl(browser.currentUrl()) ?? "google";
1498
+ return { kind: "needs_login", provider: guess };
1499
+ }
1500
+ // Drive the click. startOAuth adopts whichever Chrome target
1501
+ // catches the redirect (popup OR same-tab). After the click, poll
1502
+ // for the round-trip to complete: either the popup closes, OR the
1503
+ // active page's URL returns to the expected service domain.
1504
+ await browser.startOAuth(pickedButton.selector);
1505
+ const expectedHost = new URL(expectedUrl).hostname;
1506
+ const deadline = Date.now() + 30_000;
1507
+ while (Date.now() < deadline) {
1508
+ await browser.wait(1);
1509
+ if (browser.oauthPageClosed())
1510
+ break;
1511
+ let host;
1512
+ try {
1513
+ host = new URL(browser.currentUrl()).hostname;
1514
+ }
1515
+ catch {
1516
+ continue;
1517
+ }
1518
+ if (host === expectedHost)
1519
+ break;
1520
+ }
1521
+ // Verify we're actually back. Re-navigate to the exact expected URL
1522
+ // so the rest of the skill executes against the page it was
1523
+ // captured against (not, e.g., a /welcome or /dashboard landing).
1524
+ await browser.goto(expectedUrl);
1525
+ await browser.wait(2);
1526
+ const drift = detectNavigationDrift(browser.currentUrl(), expectedUrl);
1527
+ if (drift !== null) {
1528
+ // OAuth round-trip didn't unlock the destination — likely
1529
+ // expired cookies. The user needs to re-run `mcp login`.
1530
+ return { kind: "needs_login", provider: pickedProvider };
1531
+ }
1532
+ return { kind: "ok" };
1533
+ }
1205
1534
  //# sourceMappingURL=replay-skill.js.map