@trusty-squire/mcp 0.9.13 → 0.9.14-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/bot/agent.d.ts +5 -1
  2. package/dist/bot/agent.d.ts.map +1 -1
  3. package/dist/bot/agent.js +496 -20
  4. package/dist/bot/agent.js.map +1 -1
  5. package/dist/bot/browser.d.ts +12 -0
  6. package/dist/bot/browser.d.ts.map +1 -1
  7. package/dist/bot/browser.js +838 -83
  8. package/dist/bot/browser.js.map +1 -1
  9. package/dist/bot/captcha-solver-2captcha.d.ts +18 -0
  10. package/dist/bot/captcha-solver-2captcha.d.ts.map +1 -1
  11. package/dist/bot/captcha-solver-2captcha.js +21 -0
  12. package/dist/bot/captcha-solver-2captcha.js.map +1 -1
  13. package/dist/bot/email-code-fetcher.d.ts +5 -0
  14. package/dist/bot/email-code-fetcher.d.ts.map +1 -0
  15. package/dist/bot/email-code-fetcher.js +33 -0
  16. package/dist/bot/email-code-fetcher.js.map +1 -0
  17. package/dist/bot/inbox-client.d.ts +1 -0
  18. package/dist/bot/inbox-client.d.ts.map +1 -1
  19. package/dist/bot/inbox-client.js +55 -15
  20. package/dist/bot/inbox-client.js.map +1 -1
  21. package/dist/bot/index.d.ts +0 -1
  22. package/dist/bot/index.d.ts.map +1 -1
  23. package/dist/bot/index.js +45 -19
  24. package/dist/bot/index.js.map +1 -1
  25. package/dist/bot/promote-to-skill.d.ts +2 -1
  26. package/dist/bot/promote-to-skill.d.ts.map +1 -1
  27. package/dist/bot/promote-to-skill.js +115 -6
  28. package/dist/bot/promote-to-skill.js.map +1 -1
  29. package/dist/bot/replay-skill.d.ts +17 -0
  30. package/dist/bot/replay-skill.d.ts.map +1 -1
  31. package/dist/bot/replay-skill.js +243 -10
  32. package/dist/bot/replay-skill.js.map +1 -1
  33. package/dist/bot/signup-lock.d.ts +17 -0
  34. package/dist/bot/signup-lock.d.ts.map +1 -0
  35. package/dist/bot/signup-lock.js +174 -0
  36. package/dist/bot/signup-lock.js.map +1 -0
  37. package/dist/tools/provision-any.d.ts.map +1 -1
  38. package/dist/tools/provision-any.js +25 -12
  39. package/dist/tools/provision-any.js.map +1 -1
  40. package/package.json +2 -2
  41. package/dist/bot/oauth-lock.d.ts +0 -2
  42. package/dist/bot/oauth-lock.d.ts.map +0 -1
  43. package/dist/bot/oauth-lock.js +0 -28
  44. package/dist/bot/oauth-lock.js.map +0 -1
@@ -129,6 +129,23 @@ export async function replaySkill(input) {
129
129
  // marker too, so the verifier doesn't DEMOTE an active skill over it (which
130
130
  // was eroding OF#1 — measured: brevo demoted on a returning-user nav click).
131
131
  let authedViaOAuth = false;
132
+ // Form-readiness parity with the live bot. Until the FIRST form control
133
+ // fills/selects successfully, an "input absent" on a fill/select is far
134
+ // more likely the SPA signup form still hydrating than a genuinely-absent
135
+ // (already-registered) onboarding field — so we wait + reload + re-validate
136
+ // before treating it as skippable. Once a form control succeeds the form is
137
+ // present, and from then on absent fields keep the account-state skip.
138
+ let reachedForm = false;
139
+ // Post-click settle parity with the live bot. A click can kick off server
140
+ // work BEFORE the SPA navigates (zilliz's onboarding Continue provisions a
141
+ // default org/project/cluster, then routes to the dashboard — several
142
+ // seconds). The live bot's LLM round-trip gave that window for free; the
143
+ // replay engine reads the next inventory ~2s after the click, sees the OLD
144
+ // page, and wrongly skips/fails subsequent steps as "absent". When a step
145
+ // doesn't resolve and the most recent EXECUTED step was a click/navigate,
146
+ // poll re-validation before the skip/fail cascade decides. Iteration-
147
+ // bounded (not wall-clock) so stubbed tests don't spin.
148
+ let lastExecutedWasClick = false;
132
149
  for (let i = 0; i < skill.steps.length; i++) {
133
150
  const step = skill.steps[i];
134
151
  // Dry-mode short circuit: walk every step before the credential-
@@ -172,7 +189,31 @@ export async function replaySkill(input) {
172
189
  }
173
190
  // Pre-validate: would this step resolve cleanly against the
174
191
  // current page? If not, hand to the LLM fallback.
175
- const validation = await preValidateStep(step, browser, templateValues);
192
+ let validation = await preValidateStep(step, browser, templateValues);
193
+ // Form-readiness parity: a fill/select that doesn't resolve BEFORE we've
194
+ // reached the form is usually the SPA still hydrating (zilliz /signup
195
+ // renders marketing chrome then the form). Wait for hydration + reload
196
+ // once + re-validate — mirroring the live bot's waitForFormReady +
197
+ // reload-on-shell loop — before the skip/fail cascade decides it's a
198
+ // genuinely-absent (already-registered) field. A fresh signup's form
199
+ // appears; an already-registered one never does and the skip still fires.
200
+ if (!validation.ok &&
201
+ !reachedForm &&
202
+ (step.kind === "fill" || step.kind === "select")) {
203
+ validation = await waitForFormThenRevalidate(step, browser, templateValues);
204
+ }
205
+ if (!validation.ok && lastExecutedWasClick) {
206
+ for (let poll = 0; poll < 6 && !validation.ok; poll++) {
207
+ await browser.wait(2);
208
+ validation = await preValidateStep(step, browser, templateValues);
209
+ }
210
+ // One settle window per click. If the page didn't produce this step's
211
+ // target within it, later steps shouldn't each re-pay the wait — a
212
+ // genuinely-diverged page (returning-user skips) would otherwise
213
+ // crawl through every remaining step at +12s apiece.
214
+ if (!validation.ok)
215
+ lastExecutedWasClick = false;
216
+ }
176
217
  let stepToExecute = step;
177
218
  if (!validation.ok) {
178
219
  const fallbackResult = await tryFallback(step, validation.reason, browser, i, skill, llmFallback, candidatesDir);
@@ -245,7 +286,7 @@ export async function replaySkill(input) {
245
286
  // the router can decide whether to retry or fall through to the
246
287
  // universal bot.
247
288
  try {
248
- const execOutcome = await executeStep(stepToExecute, browser, templateValues, skill);
289
+ const execOutcome = await executeStep(stepToExecute, browser, templateValues, skill, input.fetchEmailCode);
249
290
  if (execOutcome.kind === "needs_login") {
250
291
  return { kind: "needs_login", provider: execOutcome.provider, stepIndex: i };
251
292
  }
@@ -253,6 +294,22 @@ export async function replaySkill(input) {
253
294
  // an authenticated returning-user session for the rest of the replay.
254
295
  if (stepToExecute.kind === "click_oauth_button")
255
296
  authedViaOAuth = true;
297
+ // Track form-readiness across DISTINCT forms. A successful fill/select
298
+ // means the CURRENT form is present; a click/navigate may move us to a
299
+ // NEW page whose form (zilliz's /information onboarding after the OTP)
300
+ // can itself still be hydrating — so re-arm the retry. Without the
301
+ // re-arm, the signup form hydrates but the next form's fields get
302
+ // eagerly skipped as "already registered".
303
+ if (execOutcome.kind === "filled" || execOutcome.kind === "selected") {
304
+ reachedForm = true;
305
+ lastExecutedWasClick = false;
306
+ }
307
+ else if (execOutcome.kind === "clicked" || execOutcome.kind === "navigated") {
308
+ reachedForm = false;
309
+ // Stays true across SKIPPED steps (they don't execute), so a step
310
+ // two slots after the click still gets the settle grace.
311
+ lastExecutedWasClick = true;
312
+ }
256
313
  if (execOutcome.kind === "extract_ok") {
257
314
  // We extracted a credential successfully. Validate it before
258
315
  // declaring victory — the synthesizer's shape inference is a
@@ -371,6 +428,38 @@ export async function replaySkill(input) {
371
428
  reason: "Walked entire skill graph without producing a credential.",
372
429
  };
373
430
  }
431
+ // Wait for an SPA signup form to hydrate, then re-validate the step — the
432
+ // replay-engine analogue of the live bot's waitForFormReady + reload-on-
433
+ // shell loop. A flaky hydrating SPA (zilliz /signup) renders marketing
434
+ // chrome first, so the one-shot post-navigate validation reads a form-less
435
+ // inventory; the bot retries/reloads until the form appears, and so must
436
+ // replay before it concludes a form control is genuinely absent. Bounded:
437
+ // at most three short attempts with one mid-loop reload. Returns the first
438
+ // passing validation, else the last failure (caller then runs its skip/fail
439
+ // cascade). On an already-registered account the form never appears, so
440
+ // this is a bounded no-op and the account-state skip still fires.
441
+ async function waitForFormThenRevalidate(step, browser, templateValues) {
442
+ let v = { ok: false, reason: "form not ready" };
443
+ for (let attempt = 0; attempt < 3; attempt++) {
444
+ await browser.waitForAuthWidgetHydration?.().catch(() => undefined);
445
+ await browser.wait(1.5);
446
+ if (attempt === 1) {
447
+ // One reload to unstick a wedged loading shell (oauthShellReloads).
448
+ try {
449
+ await browser.goto(browser.currentUrl());
450
+ await browser.wait(2);
451
+ await browser.waitForInteractiveDom?.().catch(() => undefined);
452
+ }
453
+ catch {
454
+ // navigation hiccup — the next attempt re-validates regardless
455
+ }
456
+ }
457
+ v = await preValidateStep(step, browser, templateValues);
458
+ if (v.ok)
459
+ return v;
460
+ }
461
+ return v;
462
+ }
374
463
  async function preValidateStep(step, browser, templateValues) {
375
464
  switch (step.kind) {
376
465
  case "navigate": {
@@ -386,6 +475,15 @@ async function preValidateStep(step, browser, templateValues) {
386
475
  return { ok: false, reason: `Invalid URL in navigate step: ${step.url}` };
387
476
  }
388
477
  }
478
+ case "await_email_code": {
479
+ // No meaningful DOM pre-check: the code input is found heuristically
480
+ // at execute time (it may be unlabeled), and the email may not have
481
+ // arrived yet. Accept; the executor polls the inbox and fails cleanly
482
+ // if no code arrives or no input is found. No useful LLM fallback
483
+ // exists for this step (there's no captured selector to substitute).
484
+ void templateValues;
485
+ return { ok: true };
486
+ }
389
487
  case "click_oauth_button": {
390
488
  const inventory = await browser.extractInteractiveElements();
391
489
  const matches = inventory.filter((el) => matchesClickHint(el, step.text_match));
@@ -550,7 +648,7 @@ async function preValidateStep(step, browser, templateValues) {
550
648
  }
551
649
  case "select": {
552
650
  const inventory = await browser.extractInteractiveElements();
553
- const matches = inventory.filter((el) => isFillable(el) && matchesLabelHint(el, step.label_hint));
651
+ const matches = inventory.filter((el) => isSelectTarget(el) && matchesLabelHint(el, step.label_hint));
554
652
  if (matches.length === 0) {
555
653
  return {
556
654
  ok: false,
@@ -713,7 +811,7 @@ export function labelMatchesHint(label, hint) {
713
811
  return false;
714
812
  return a === b || a.includes(b) || b.includes(a);
715
813
  }
716
- async function executeStep(step, browser, templateValues, skill) {
814
+ async function executeStep(step, browser, templateValues, skill, fetchEmailCode) {
717
815
  switch (step.kind) {
718
816
  case "navigate": {
719
817
  // Rebase a captured per-account subdomain onto the live session's
@@ -730,6 +828,13 @@ async function executeStep(step, browser, templateValues, skill) {
730
828
  // content first, with the 2s as a floor for fast/static pages.
731
829
  await browser.wait(2);
732
830
  await browser.waitForInteractiveDom().catch(() => undefined);
831
+ // Parity with the live bot's waitForFormReady: an SPA signup page can
832
+ // render marketing chrome (so waitForInteractiveDom is satisfied)
833
+ // while the actual auth form is still an async spinner. Without this
834
+ // the replay reads a form-less inventory and skips the email/password
835
+ // fills as "absent" (zilliz /signup). Bounded; no-op once the form
836
+ // is present.
837
+ await browser.waitForAuthWidgetHydration?.().catch(() => undefined);
733
838
  // 0.8.2-rc.22 — URL drift detection. When a skill's signup_url
734
839
  // assumes the user is authenticated (Railway's /account/tokens
735
840
  // captured after OAuth was done in a prior session), the
@@ -897,21 +1002,53 @@ async function executeStep(step, browser, templateValues, skill) {
897
1002
  await browser.type(match.selector, value);
898
1003
  return { kind: "filled" };
899
1004
  }
1005
+ case "await_email_code": {
1006
+ if (fetchEmailCode === undefined) {
1007
+ throw new Error("await_email_code step requires a fetchEmailCode callback, but the " +
1008
+ "caller did not wire inbox access into the replay.");
1009
+ }
1010
+ const alias = templateValues.EMAIL_ALIAS;
1011
+ if (alias === undefined || alias.length === 0) {
1012
+ throw new Error("await_email_code step requires templateValues.EMAIL_ALIAS (the run's " +
1013
+ "inbox alias) to poll for the verification email.");
1014
+ }
1015
+ const code = await fetchEmailCode({ alias });
1016
+ if (code === null || code.length === 0) {
1017
+ throw new Error(`No email verification code arrived for ${alias} within the poll window.`);
1018
+ }
1019
+ const inventory = await browser.extractInteractiveElements();
1020
+ const target = findCodeInput(inventory, step.label_hint);
1021
+ if (target === null) {
1022
+ throw new Error("await_email_code: could not find a verification-code input on the page.");
1023
+ }
1024
+ // browser.type clicks-then-pressSequentially, which auto-distributes
1025
+ // across multi-box single-digit OTP inputs (Porter/Koyeb class) as
1026
+ // well as a single combined box.
1027
+ const otpPageUrl = browser.currentUrl();
1028
+ await browser.type(target.selector, code);
1029
+ // Auto-advance is racy: a keystroke landing during the widget's focus
1030
+ // transition gets dropped by the controlled input, leaving N-1 boxes
1031
+ // filled and the submit disabled (zilliz Verify, observed 2026-06-11).
1032
+ // Read the boxes back and re-type per-box — explicit targeting, no
1033
+ // auto-advance dependency — anything that didn't stick.
1034
+ await fixupOtpDistribution(browser, code, otpPageUrl);
1035
+ return { kind: "filled" };
1036
+ }
900
1037
  case "select": {
901
1038
  const inventory = await browser.extractInteractiveElements();
902
1039
  // 0.8.2-rc.3 — apply near_text_hint filter when present so
903
1040
  // Sentry-grid rows land on the right <select>. The original
904
1041
  // `inventory.find` would unilaterally pick the first match.
905
1042
  //
906
- // 0.8.2-rc.21 — also restrict to fillable elements (input /
907
- // textarea / select). Without this, a Railway-class form where
908
- // a `<label for="select-X">` shares labelText with its
1043
+ // 0.8.2-rc.21 — also restrict to select targets (input /
1044
+ // textarea / select / role=combobox). Without this, a Railway-class
1045
+ // form where a `<label for="select-X">` shares labelText with its
909
1046
  // `<select id="select-X">` would silently pick the label —
910
1047
  // and selectOption(label, …) would then route into the
911
1048
  // combobox path and fail because native selects don't reveal
912
1049
  // options via DOM patterns. Pre-validation already filters
913
1050
  // this way; the executor was lagging.
914
- const allMatches = inventory.filter((el) => isFillable(el) && matchesLabelHint(el, step.label_hint));
1051
+ const allMatches = inventory.filter((el) => isSelectTarget(el) && matchesLabelHint(el, step.label_hint));
915
1052
  if (allMatches.length === 0) {
916
1053
  throw new Error(`No select matches label_hint=${step.label_hint}`);
917
1054
  }
@@ -1425,6 +1562,11 @@ async function findValidatedCandidate(browser, validator) {
1425
1562
  try {
1426
1563
  const candidates = await browser.extractCredentialCandidates();
1427
1564
  for (const cand of candidates) {
1565
+ // Same noise gate the heuristic tiers apply — a password-manager
1566
+ // affordance or consent-widget word that happens to satisfy a
1567
+ // length-only validator must not shadow the real key.
1568
+ if (isCredentialNoiseCandidate(cand))
1569
+ continue;
1428
1570
  if (candidateSatisfiesValidatorShape(cand, validator))
1429
1571
  return cand;
1430
1572
  }
@@ -1669,16 +1811,24 @@ async function maybeDumpReplayDebug(browser, skill, stepIndex, reason) {
1669
1811
  .filter((e) => e.visible)
1670
1812
  .map((e) => ({
1671
1813
  tag: e.tag,
1814
+ type: e.type,
1672
1815
  role: e.role,
1673
1816
  text: (e.visibleText ?? "").slice(0, 60),
1674
1817
  aria: e.ariaLabel,
1675
1818
  label: e.labelText,
1676
1819
  placeholder: e.placeholder,
1677
1820
  href: e.href ?? null,
1821
+ selector: e.selector,
1822
+ // Field state is the diagnostic for "submit stays disabled" failures
1823
+ // (which box is actually empty?). Password values stay redacted.
1824
+ value: e.type === "password" ? (e.value ? "<redacted>" : "") : (e.value ?? null),
1678
1825
  }))
1679
- .filter((e) => e.text || e.aria || e.label || e.placeholder || e.href);
1826
+ .filter((e) => e.text || e.aria || e.label || e.placeholder || e.href || e.value);
1827
+ // Visible page text (toasts, validation errors, "code expired" banners)
1828
+ // — interactive inventory alone can't show WHY a page refused to move.
1829
+ const pageText = (await browser.extractText().catch(() => "")).slice(0, 1500);
1680
1830
  const path = `/tmp/replay-debug-${skill.service}-step${stepIndex}.json`;
1681
- writeFileSync(path, JSON.stringify({ service: skill.service, stepIndex, reason, url: browser.currentUrl(), interesting }, null, 2));
1831
+ writeFileSync(path, JSON.stringify({ service: skill.service, stepIndex, reason, url: browser.currentUrl(), pageText, interesting }, null, 2));
1682
1832
  console.error(`[replay-debug] dumped ${path} (${interesting.length} elements)`);
1683
1833
  }
1684
1834
  catch {
@@ -1855,6 +2005,89 @@ function isRuntimeId(id) {
1855
2005
  function isFillable(el) {
1856
2006
  return el.tag === "input" || el.tag === "textarea" || el.tag === "select";
1857
2007
  }
2008
+ // A `select` step's target is broader than isFillable: MUI/Radix-class
2009
+ // dropdowns render as a non-input element with role="combobox" (zilliz's
2010
+ // Job Title is a <div id="mui-component-select-jobTitle" role="combobox">).
2011
+ // browser.selectOption already drives those (click + pick option from the
2012
+ // popup — the capture-time path); the replay matcher was the only place
2013
+ // still requiring a native form tag, which made every MUI select look
2014
+ // "absent" and get skipped as account-state onboarding (measured live
2015
+ // 2026-06-11: zilliz replay left Job Title unselected, Continue no-opped,
2016
+ // and the failure surfaced 5 steps later as a bogus returning-user
2017
+ // divergence on "API Keys").
2018
+ function isSelectTarget(el) {
2019
+ return isFillable(el) || el.role === "combobox";
2020
+ }
2021
+ // Locate the verification-code input for an `await_email_code` step.
2022
+ // OTP inputs are frequently UNLABELED (single-digit boxes, headless
2023
+ // inputs) — that's exactly why a `fill` step can't carry them — so the
2024
+ // resolution order is: (1) explicit label_hint when present, (2) an input
2025
+ // whose attributes name it a code field, (3) the first code-shaped input
2026
+ // on the page. (3) is safe because this step only runs at the
2027
+ // verification gate the synthesizer placed it at, where the page is just
2028
+ // the code input(s) + a Verify button. Returns null when no plausible
2029
+ // input exists. Exported for unit tests.
2030
+ export function codeInputCandidates(inventory) {
2031
+ // Code-shaped: a visible text-entry input that is NOT an email/password/
2032
+ // checkbox/radio/etc. (type null/"" covers headless OTP boxes).
2033
+ const TEXT_ENTRY = new Set(["text", "tel", "number", "", "search"]);
2034
+ return inventory.filter((el) => el.tag === "input" &&
2035
+ el.visible !== false &&
2036
+ (el.type === null || TEXT_ENTRY.has(el.type)) &&
2037
+ el.type !== "email" &&
2038
+ el.type !== "password");
2039
+ }
2040
+ // Post-typing readback for an `await_email_code` step. browser.type relies
2041
+ // on the widget's auto-advance to distribute digits across multi-box OTP
2042
+ // inputs; a keystroke that fires during the focus transition is silently
2043
+ // dropped by the controlled input (React setState hasn't moved focus yet),
2044
+ // leaving a box empty and the submit button disabled. Re-read the boxes and
2045
+ // re-type any digit that didn't stick — per-box explicit targeting, so the
2046
+ // corrective pass has no auto-advance dependency. No-ops when the mapping
2047
+ // boxes↔digits isn't unambiguous (extra unrelated inputs on the page) or
2048
+ // when the widget auto-submitted on the last digit (URL changed — the new
2049
+ // page's inputs are NOT OTP boxes). Exported for unit tests.
2050
+ export async function fixupOtpDistribution(browser, code, otpPageUrl) {
2051
+ // Let the widget's controlled-input state settle before reading back.
2052
+ await browser.wait(1);
2053
+ if (browser.currentUrl() !== otpPageUrl)
2054
+ return;
2055
+ const boxes = codeInputCandidates(await browser.extractInteractiveElements());
2056
+ if (boxes.length === 1) {
2057
+ // Single combined input: its value should be the whole code.
2058
+ if ((boxes[0].value ?? "") !== code) {
2059
+ await browser.type(boxes[0].selector, code);
2060
+ }
2061
+ return;
2062
+ }
2063
+ if (boxes.length !== code.length)
2064
+ return;
2065
+ for (let i = 0; i < boxes.length; i++) {
2066
+ if ((boxes[i].value ?? "") === code.charAt(i))
2067
+ continue;
2068
+ console.error(`[replay] await_email_code: OTP box ${i + 1}/${boxes.length} holds ` +
2069
+ `${JSON.stringify(boxes[i].value ?? "")} after auto-advance typing — re-typing it directly.`);
2070
+ await browser.type(boxes[i].selector, code.charAt(i));
2071
+ }
2072
+ }
2073
+ export function findCodeInput(inventory, labelHint) {
2074
+ const candidates = codeInputCandidates(inventory);
2075
+ if (candidates.length === 0)
2076
+ return null;
2077
+ if (labelHint !== undefined && labelHint.length > 0) {
2078
+ const byLabel = candidates.filter((el) => matchesLabelHint(el, labelHint));
2079
+ if (byLabel.length >= 1)
2080
+ return byLabel[0];
2081
+ }
2082
+ // Word-START boundary only (no trailing \b): "verif" must prefix-match
2083
+ // "verificationCode" / "verification_code", which a trailing \b would
2084
+ // break (it'd require "verif" to be a whole word).
2085
+ const codeRe = /\b(code|otp|verif|pin|one[\s-]?time|2fa|mfa)/i;
2086
+ const byAttr = candidates.filter((el) => codeRe.test(`${el.name ?? ""} ${el.id ?? ""} ${el.placeholder ?? ""} ${el.ariaLabel ?? ""} ${el.labelText ?? ""}`));
2087
+ if (byAttr.length >= 1)
2088
+ return byAttr[0];
2089
+ return candidates[0];
2090
+ }
1858
2091
  // rc.24/rc.25 — cascading fill-target disambiguator. Shared by
1859
2092
  // preValidate and executeStep so both arrive at the same input when
1860
2093
  // a label matches more than once (OpenRouter's "Name" input ships