@trusty-squire/mcp 0.6.14-rc.3 → 0.6.14-rc.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +3 -0
- package/dist/bot/agent.d.ts.map +1 -1
- package/dist/bot/agent.js +204 -2
- package/dist/bot/agent.js.map +1 -1
- package/dist/bot/browser.d.ts +1 -0
- package/dist/bot/browser.d.ts.map +1 -1
- package/dist/bot/browser.js +64 -0
- package/dist/bot/browser.js.map +1 -1
- package/dist/install/cli.d.ts +14 -0
- package/dist/install/cli.d.ts.map +1 -1
- package/dist/install/cli.js +25 -4
- package/dist/install/cli.js.map +1 -1
- package/package.json +2 -1
package/dist/bot/agent.d.ts
CHANGED
|
@@ -21,6 +21,7 @@ export declare class LLMCallBudgetExceeded extends Error {
|
|
|
21
21
|
constructor(budget: number);
|
|
22
22
|
}
|
|
23
23
|
export declare function guessSignupUrl(service: string): string;
|
|
24
|
+
export declare function isKnownDomainFullUrlMatch(service: string, url: string): boolean;
|
|
24
25
|
export declare function isGoogleSearchUrl(url: string): boolean;
|
|
25
26
|
export interface SignupTask {
|
|
26
27
|
service: string;
|
|
@@ -125,6 +126,8 @@ declare const FILL_VALUE_KINDS: readonly ["email", "password", "name", "username
|
|
|
125
126
|
type FillValueKind = (typeof FILL_VALUE_KINDS)[number];
|
|
126
127
|
export declare function parseSignupPlan(raw: string, allowedSelectors?: ReadonlySet<string>): SignupPlan;
|
|
127
128
|
export declare function formatInventory(inventory: readonly InteractiveElement[]): string;
|
|
129
|
+
export declare function hostMatchesServiceDomain(hostname: string, serviceSlug: string): boolean;
|
|
130
|
+
export declare function isAntiBotInterstitialText(visibleText: string): boolean;
|
|
128
131
|
export declare function detectAntiBotBlock(html: string): string | null;
|
|
129
132
|
export declare function detectAlreadySignedIn(inventory: readonly InteractiveElement[]): boolean;
|
|
130
133
|
export declare function isOauthOnlyChooser(inventory: readonly InteractiveElement[]): boolean;
|
package/dist/bot/agent.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../src/bot/agent.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EACV,iBAAiB,EAGjB,cAAc,EACd,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAGL,KAAK,eAAe,EACrB,MAAM,sBAAsB,CAAC;AAM9B,OAAO,EAGL,KAAK,SAAS,EACd,KAAK,OAAO,EACb,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"agent.d.ts","sourceRoot":"","sources":["../../src/bot/agent.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EACV,iBAAiB,EAGjB,cAAc,EACd,kBAAkB,EACnB,MAAM,cAAc,CAAC;AAEtB,OAAO,EAGL,KAAK,eAAe,EACrB,MAAM,sBAAsB,CAAC;AAM9B,OAAO,EAGL,KAAK,SAAS,EACd,KAAK,OAAO,EACb,MAAM,iBAAiB,CAAC;AAMzB,MAAM,WAAW,UAAU;IACzB,YAAY,CAAC,KAAK,EAAE;QAClB,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE;YAAE,OAAO,CAAC,EAAE,MAAM,CAAC;YAAC,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,aAAa,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;QACrE,eAAe,EAAE,MAAM,CAAC;KACzB,GAAG,OAAO,CAAC;QACV,OAAO,EAAE,MAAM,CAAC;QAChB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;KACrC,CAAC,CAAC;CACJ;AAsED,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAGlE;AAED,qBAAa,qBAAsB,SAAQ,KAAK;gBAClC,MAAM,EAAE,MAAM;CAI3B;AAwCD,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAMtD;AAQD,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAI/E;AAID,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAUtD;AAED,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,gBAAgB,EAAE,MAAM,MAAM,CAAC;IAC/B,KAAK,CAAC,EAAE,UAAU,GAAG,SAAS,CAAC;IAI/B,0BAA0B,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAIhD,mBAAmB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAOzC,aAAa,CAAC,EAAE,eAAe,GAAG,SAAS,CAAC;IAO5C,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAK/B,SAAS,CAAC,EAAE,MAAM,EAAE,GAAG,SAAS,CAAC;IAQjC,qBAAqB,CAAC,EAAE,SAAS,MAAM,EAAE,GAAG,SAAS,CAAC;IAUtD,sBAAsB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CAC9C;AAED,MAAM,WAAW,YAAY;IAC3B,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,CAAC,EAAE;QACZ,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;KACnC,CAAC;IACF,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,EAAE,CAAC;IAIhB,SAAS,CAAC,EAAE,MAAM,CAAC;IAMnB,YAAY,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAMjC,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAOhC,OAAO,CAAC,EAAE,OAAO,CAAC;IAQlB,GAAG,CAAC,EAAE,KAAK,GAAG,OAAO,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;IAKvB,OAAO,CAAC,EAAE;QACR,IAAI,EAAE,WAAW,GAAG,WAAW,CAAC;QAIhC,OAAO,EAAE,cAAc,CAAC;QACxB,kBAAkB,EAAE,OAAO,CAAC;QAI5B,OAAO,EAAE,OAAO,CAAC;KAClB,CAAC;CACH;AAGD,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,aAAa,CAAC;IAAC,OAAO,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAC/F;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACnD;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AAExD,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAkCD,MAAM,MAAM,cAAc,GACtB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAChC;IAAE,IAAI,EAAE,SAAS,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACnC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACjC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACnD;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAOjE;IACE,IAAI,EAAE,QAAQ,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,GAKD;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAOnD;IAAE,IAAI,EAAE,QAAQ,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACrD;IAAE,IAAI,EAAE,UAAU,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACjD;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AAKtD,QAAA,MAAM,gBAAgB,0EAOZ,CAAC;AACX,KAAK,aAAa,GAAG,CAAC,OAAO,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAC;AAkHvD,wBAAgB,eAAe,CAC7B,GAAG,EAAE,MAAM,EACX,gBAAgB,CAAC,EAAE,WAAW,CAAC,MAAM,CAAC,GACrC,UAAU,CAoCZ;AAKD,wBAAgB,eAAe,CAAC,SAAS,EAAE,SAAS,kBAAkB,EAAE,GAAG,MAAM,CA4ChF;AA6ED,wBAAgB,wBAAwB,CACtC,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,MAAM,GAClB,OAAO,CAgCT;AASD,wBAAgB,yBAAyB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAItE;AAOD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAe9D;AAYD,wBAAgB,qBAAqB,CACnC,SAAS,EAAE,SAAS,kBAAkB,EAAE,GACvC,OAAO,CAaT;AAKD,wBAAgB,kBAAkB,CAChC,SAAS,EAAE,SAAS,kBAAkB,EAAE,GACvC,OAAO,CAmBT;AAgBD,wBAAgB,eAAe,CAC7B,SAAS,EAAE,SAAS,kBAAkB,EAAE,EACxC,QAAQ,EAAE,eAAe,GACxB,kBAAkB,GAAG,IAAI,CAkC3B;AAKD,wBAAgB,oBAAoB,CAClC,SAAS,EAAE,SAAS,kBAAkB,EAAE,EACxC,SAAS,EAAE,SAAS,eAAe,EAAE,GACpC;IAAE,QAAQ,EAAE,eAAe,CAAC;IAAC,MAAM,EAAE,kBAAkB,CAAA;CAAE,GAAG,IAAI,CAMlE;AAQD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,gBAAgB,CAAC,EAAE,WAAW,CAAC,MAAM,CAAC,GACrC,cAAc,CAqFhB;AAgED,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,OAAO,CAYnF;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAmEjE;AASD,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,GAAG,IAAI,CAa5E;AASD,qBAAa,WAAW;IA8hBpB,OAAO,CAAC,OAAO;IA1hBjB,OAAO,CAAC,YAAY,CAAK;IAIzB,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAgB;IAC7C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAU;IAOlC,OAAO,CAAC,gBAAgB,CAAsC;IAM9D,OAAO,CAAC,UAAU;YAuBJ,cAAc;YAgCd,WAAW;YAsDX,oBAAoB;YAgRpB,cAAc;YA8Ed,sBAAsB;YAqBtB,UAAU;gBAwCd,OAAO,EAAE,iBAAiB,EAClC,GAAG,CAAC,EAAE,SAAS,GAAG,OAAO;IAkB3B,IAAI,QAAQ,IAAI,SAAS,MAAM,EAAE,CAEhC;YAOa,OAAO;YAcP,YAAY;YA+CZ,UAAU;IA6ClB,MAAM,CAAC,IAAI,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;YAkCvC,SAAS;YAgWT,YAAY;IA8R1B,OAAO,CAAC,UAAU;YAeJ,sBAAsB;YA0BtB,qBAAqB;YAerB,cAAc;IAkE5B,OAAO,CAAC,0BAA0B;IAYlC,OAAO,CAAC,oBAAoB;YAQd,wBAAwB;YA4CxB,cAAc;YAqUd,oBAAoB;YA6CpB,kBAAkB;YA6JlB,cAAc;YA2Ed,mBAAmB;YAsDnB,kBAAkB;YAuHlB,uBAAuB;CA4CtC"}
|
package/dist/bot/agent.js
CHANGED
|
@@ -15,6 +15,7 @@ import { saveDebugSnapshot } from "./debug.js";
|
|
|
15
15
|
import { captureOnboardingRound } from "./onboarding-capture.js";
|
|
16
16
|
import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
|
|
17
17
|
import { pickLLMPair, } from "./llm-client.js";
|
|
18
|
+
import { getDomain } from "tldts";
|
|
18
19
|
// Hard cap on LLM calls per signup. A signup that runs away to 20+ calls
|
|
19
20
|
// is both expensive and almost certainly stuck in a planning loop. 15
|
|
20
21
|
// covers: 2 initial form plans, 1 re-plan pair on validation, plus 6
|
|
@@ -131,6 +132,17 @@ export function guessSignupUrl(service) {
|
|
|
131
132
|
const host = entry ?? `${slug}.com`;
|
|
132
133
|
return `https://${host}/signup`;
|
|
133
134
|
}
|
|
135
|
+
// BUG-2 GUARD — did `url` come from KNOWN_DOMAINS as a hardcoded full
|
|
136
|
+
// URL (vs the default /signup convention)? These were explicitly
|
|
137
|
+
// chosen because the default 404s and the real entry is non-obvious
|
|
138
|
+
// — e.g. Railway's /login, Cloudflare's dash.cloudflare.com/sign-up.
|
|
139
|
+
// Trust the mapping rather than falling back to a Google search.
|
|
140
|
+
// Exported for unit testing.
|
|
141
|
+
export function isKnownDomainFullUrlMatch(service, url) {
|
|
142
|
+
const slug = service.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
143
|
+
const entry = KNOWN_DOMAINS[slug];
|
|
144
|
+
return entry !== undefined && /^https?:\/\//i.test(entry) && entry === url;
|
|
145
|
+
}
|
|
134
146
|
// True when the URL is a Google search results page — used to gate
|
|
135
147
|
// the prewarm + the post-load "did we land somewhere useful?" check.
|
|
136
148
|
export function isGoogleSearchUrl(url) {
|
|
@@ -331,6 +343,120 @@ export function formatInventory(inventory) {
|
|
|
331
343
|
})
|
|
332
344
|
.join("\n");
|
|
333
345
|
}
|
|
346
|
+
// Platform-as-a-service customer-tenant suffixes that the bundled PSL
|
|
347
|
+
// in `tldts` does NOT (yet) classify as public suffixes, but functionally
|
|
348
|
+
// behave like one: every label to the left is a distinct customer site,
|
|
349
|
+
// not an extension of the platform's own brand.
|
|
350
|
+
//
|
|
351
|
+
// Without this override, `getDomain("storysite-production.up.railway.app")`
|
|
352
|
+
// returns `"railway.app"` (first label "railway") and the guard wrongly
|
|
353
|
+
// matches it to slug "railway" — which is exactly the Railway bug this
|
|
354
|
+
// guard is meant to prevent.
|
|
355
|
+
//
|
|
356
|
+
// Keep this list short: only platforms where serving arbitrary 3rd-party
|
|
357
|
+
// content on `*.<suffix>` is the platform's primary purpose. Custom-domain-
|
|
358
|
+
// only platforms (e.g. heroku custom domains) don't belong here.
|
|
359
|
+
//
|
|
360
|
+
// Order matters — most-specific first. We pick the longest suffix the
|
|
361
|
+
// hostname ends with.
|
|
362
|
+
const PLATFORM_TENANT_SUFFIXES = [
|
|
363
|
+
"up.railway.app",
|
|
364
|
+
"railway.app",
|
|
365
|
+
"vercel.app",
|
|
366
|
+
"netlify.app",
|
|
367
|
+
"pages.dev",
|
|
368
|
+
"fly.dev",
|
|
369
|
+
"onrender.com",
|
|
370
|
+
"herokuapp.com",
|
|
371
|
+
"github.io",
|
|
372
|
+
"gitlab.io",
|
|
373
|
+
"workers.dev",
|
|
374
|
+
];
|
|
375
|
+
// Treat `hostname` as if `suffix` were a public suffix: return the label
|
|
376
|
+
// immediately to the left of the suffix, lowercased. Returns null if the
|
|
377
|
+
// hostname doesn't end with the suffix.
|
|
378
|
+
function tenantLabelUnderPlatformSuffix(hostname, suffix) {
|
|
379
|
+
const lc = hostname.toLowerCase();
|
|
380
|
+
const dotSuffix = `.${suffix}`;
|
|
381
|
+
if (!lc.endsWith(dotSuffix))
|
|
382
|
+
return null;
|
|
383
|
+
const head = lc.slice(0, -dotSuffix.length);
|
|
384
|
+
if (head.length === 0)
|
|
385
|
+
return null;
|
|
386
|
+
// The tenant label is the LAST label of head (rightmost-before-suffix).
|
|
387
|
+
const parts = head.split(".");
|
|
388
|
+
return parts[parts.length - 1] ?? null;
|
|
389
|
+
}
|
|
390
|
+
// BUG-1 GUARD — does `hostname` belong to the same registered domain as
|
|
391
|
+
// `serviceSlug` (the alphanumeric squashed service name like "railway",
|
|
392
|
+
// "postmark")?
|
|
393
|
+
//
|
|
394
|
+
// Uses PSL-aware eTLD+1 (via tldts) AND a hardcoded override for
|
|
395
|
+
// platform-tenant suffixes the bundled PSL doesn't cover yet, so platform
|
|
396
|
+
// subdomains like `*.up.railway.app` and `*.vercel.app` are correctly
|
|
397
|
+
// classified as distinct customer sites.
|
|
398
|
+
//
|
|
399
|
+
// railway.com ↔ slug "railway" → MATCH
|
|
400
|
+
// docs.railway.com ↔ slug "railway" → MATCH
|
|
401
|
+
// storysite-production.up.railway.app ↔ slug "railway" → REJECT
|
|
402
|
+
// (matched by platform override —
|
|
403
|
+
// tenant label is "storysite-production",
|
|
404
|
+
// not "railway")
|
|
405
|
+
// railway.app ↔ slug "railway" → MATCH
|
|
406
|
+
// (the apex itself is the platform's
|
|
407
|
+
// own brand; only labels to the left
|
|
408
|
+
// are tenant sites)
|
|
409
|
+
// railway.io (typosquat) ↔ slug "railway" → MATCH
|
|
410
|
+
// (intentional — we can't disambiguate
|
|
411
|
+
// typosquats from TLD variants like
|
|
412
|
+
// sentry.com vs sentry.io)
|
|
413
|
+
//
|
|
414
|
+
// Empty slug → permissive (return true), preserving prior behavior when
|
|
415
|
+
// no service name was provided to findSignupLink.
|
|
416
|
+
//
|
|
417
|
+
// Exported for unit testing.
|
|
418
|
+
export function hostMatchesServiceDomain(hostname, serviceSlug) {
|
|
419
|
+
if (serviceSlug.length === 0)
|
|
420
|
+
return true;
|
|
421
|
+
const lcHost = hostname.toLowerCase();
|
|
422
|
+
// Platform-tenant override: if hostname is `*.<platform-suffix>`, the
|
|
423
|
+
// tenant label (left of the suffix) is the "site name", not the
|
|
424
|
+
// platform's brand. Pick the LONGEST matching suffix so e.g.
|
|
425
|
+
// "x.up.railway.app" picks "up.railway.app" before "railway.app".
|
|
426
|
+
let bestSuffix = null;
|
|
427
|
+
for (const sfx of PLATFORM_TENANT_SUFFIXES) {
|
|
428
|
+
if (lcHost.endsWith(`.${sfx}`) &&
|
|
429
|
+
(bestSuffix === null || sfx.length > bestSuffix.length)) {
|
|
430
|
+
bestSuffix = sfx;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
if (bestSuffix !== null) {
|
|
434
|
+
const tenant = tenantLabelUnderPlatformSuffix(lcHost, bestSuffix);
|
|
435
|
+
if (tenant === null)
|
|
436
|
+
return false;
|
|
437
|
+
const normalizedTenant = tenant.replace(/[^a-z0-9]/g, "");
|
|
438
|
+
return normalizedTenant === serviceSlug;
|
|
439
|
+
}
|
|
440
|
+
const registered = getDomain(lcHost);
|
|
441
|
+
if (registered === null)
|
|
442
|
+
return false;
|
|
443
|
+
// The first label of the eTLD+1 is the "site name". For railway.com
|
|
444
|
+
// that's "railway".
|
|
445
|
+
const firstLabel = registered.split(".")[0]?.toLowerCase() ?? "";
|
|
446
|
+
// Normalize: strip hyphens so "trusty-squire" matches slug "trustysquire".
|
|
447
|
+
const normalized = firstLabel.replace(/[^a-z0-9]/g, "");
|
|
448
|
+
return normalized === serviceSlug;
|
|
449
|
+
}
|
|
450
|
+
// BUG-3 GUARD — diagnostic flag for the Inventory snapshot. Stricter
|
|
451
|
+
// than detectAntiBotBlock (no "cf-turnstile" / "recaptcha" raw-HTML
|
|
452
|
+
// matches) because the previous regex false-positive matched legitimate
|
|
453
|
+
// signup pages that just embed a Turnstile/reCAPTCHA widget script.
|
|
454
|
+
// Match on visible-text patterns only.
|
|
455
|
+
//
|
|
456
|
+
// Exported for unit testing.
|
|
457
|
+
export function isAntiBotInterstitialText(visibleText) {
|
|
458
|
+
return /just a moment|verify you are human|attention required|are you a robot|checking your browser/i.test(visibleText);
|
|
459
|
+
}
|
|
334
460
|
// Recognize a full-page anti-bot interstitial that's still up. Returns
|
|
335
461
|
// the vendor name (for the status message) or null. Pattern matching
|
|
336
462
|
// on visible text rather than markers — most vendors use the same UX
|
|
@@ -1089,7 +1215,11 @@ export class SignupAgent {
|
|
|
1089
1215
|
.replace(/\s+/g, " ")
|
|
1090
1216
|
.trim()
|
|
1091
1217
|
.slice(0, 240);
|
|
1092
|
-
|
|
1218
|
+
// BUG-3 FIX: match on user-visible text only. Previous regex
|
|
1219
|
+
// hit `cf-turnstile` / `recaptcha` / `cloudflare` in raw HTML,
|
|
1220
|
+
// false-positive-firing on legitimate signup pages that embed
|
|
1221
|
+
// a Turnstile widget script.
|
|
1222
|
+
const antiBot = isAntiBotInterstitialText(text);
|
|
1093
1223
|
steps.push(`Inventory diagnostic: title=${JSON.stringify(state.title.slice(0, 80))} ` +
|
|
1094
1224
|
`url=${state.url.slice(0, 120)} text=${JSON.stringify(text)}` +
|
|
1095
1225
|
(antiBot ? " ⚠ anti-bot interstitial detected" : ""));
|
|
@@ -1385,7 +1515,18 @@ export class SignupAgent {
|
|
|
1385
1515
|
// search-and-find-link path. This is the safety net that lets
|
|
1386
1516
|
// the bot recover from a wrong canonical guess (e.g. a service
|
|
1387
1517
|
// that uses /register or a non-`.com` TLD).
|
|
1388
|
-
|
|
1518
|
+
//
|
|
1519
|
+
// BUG-2 GUARD: when the guessed URL came from KNOWN_DOMAINS as a
|
|
1520
|
+
// full hardcoded URL (e.g. Railway → https://railway.com/login,
|
|
1521
|
+
// Cloudflare → https://dash.cloudflare.com/sign-up), trust the
|
|
1522
|
+
// mapping. These were explicitly chosen because the default
|
|
1523
|
+
// /signup path 404s and the real entry is non-obvious — falling
|
|
1524
|
+
// back to a Google search has produced cross-domain bugs (the
|
|
1525
|
+
// Railway run that ended up on storysite-production.up.railway.app).
|
|
1526
|
+
const usedKnownFullUrl = isKnownDomainFullUrlMatch(task.service, guessed);
|
|
1527
|
+
if (task.signupUrl === undefined &&
|
|
1528
|
+
!usedKnownFullUrl &&
|
|
1529
|
+
!(await this.looksLikeSignupPage())) {
|
|
1389
1530
|
steps.push(`${guessed} didn't look like a signup page — searching for the real one`);
|
|
1390
1531
|
const fallbackSearch = `https://www.google.com/search?q=${encodeURIComponent(`${task.service} signup`)}`;
|
|
1391
1532
|
await this.browser.goto(fallbackSearch);
|
|
@@ -1403,6 +1544,25 @@ export class SignupAgent {
|
|
|
1403
1544
|
await this.browser.goto(found);
|
|
1404
1545
|
await this.browser.wait(2);
|
|
1405
1546
|
}
|
|
1547
|
+
else {
|
|
1548
|
+
// BUG-1 GUARD: findSignupLink filters off-domain candidates
|
|
1549
|
+
// (registered-domain match against the service slug). If
|
|
1550
|
+
// nothing remained AND we'd been sent here from a Google
|
|
1551
|
+
// fallback, the bot is sitting on a SERP with no usable
|
|
1552
|
+
// destination — abort rather than let the form-fill planner
|
|
1553
|
+
// happily fill the Google search box.
|
|
1554
|
+
if (isGoogleSearchUrl(signupUrl)) {
|
|
1555
|
+
return {
|
|
1556
|
+
success: false,
|
|
1557
|
+
error: `no_signup_link: searched for ${task.service}'s signup page and ` +
|
|
1558
|
+
`found no on-domain candidates. The service likely doesn't have ` +
|
|
1559
|
+
`a public self-serve signup, or the bot's domain guard rejected ` +
|
|
1560
|
+
`every match. Sign up manually.`,
|
|
1561
|
+
steps,
|
|
1562
|
+
...this.resultTail(),
|
|
1563
|
+
};
|
|
1564
|
+
}
|
|
1565
|
+
}
|
|
1406
1566
|
}
|
|
1407
1567
|
// Steps 2-5: plan the form, fill it, submit — via the
|
|
1408
1568
|
// verify-and-replan loop (F3). The planner picks selectors from
|
|
@@ -2496,6 +2656,16 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
|
|
|
2496
2656
|
// Negative: signin/login/logout in host+path.
|
|
2497
2657
|
if (/(?:^|\/)(?:signin|login|logout|sign-in|log-in)\b/.test(hostPath))
|
|
2498
2658
|
continue;
|
|
2659
|
+
// BUG-1 GUARD: registered-domain match against the target service.
|
|
2660
|
+
// Without this, a Google search for "Railway signup" returned a
|
|
2661
|
+
// link to storysite-production.up.railway.app/signup/ — somebody's
|
|
2662
|
+
// hobby Django app hosted on Railway — and the bot filled out the
|
|
2663
|
+
// form, creating a junk account on the wrong website. PSL-aware
|
|
2664
|
+
// eTLD+1 comparison handles platform suffixes like .up.railway.app
|
|
2665
|
+
// and .vercel.app (where each customer subdomain is its own
|
|
2666
|
+
// "registered" entity) correctly.
|
|
2667
|
+
if (!hostMatchesServiceDomain(url.hostname, serviceSlug))
|
|
2668
|
+
continue;
|
|
2499
2669
|
// Score: a host containing the service slug is a strong match.
|
|
2500
2670
|
// Without a slug to compare against, every match scores 1.
|
|
2501
2671
|
const hostLower = url.hostname.toLowerCase();
|
|
@@ -2639,6 +2809,38 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
|
|
|
2639
2809
|
// with whatever we had (or null).
|
|
2640
2810
|
}
|
|
2641
2811
|
}
|
|
2812
|
+
// Pass 4 — Copy-button colocation scan. Railway's "New Token"
|
|
2813
|
+
// modal shows the UUID inside a <code> built character-by-span,
|
|
2814
|
+
// which Pass 1's direct-text walk can't reassemble. Walk every
|
|
2815
|
+
// visible "Copy" affordance's ancestor subtree, tokenize its
|
|
2816
|
+
// innerText, and accept the first token that looks like a
|
|
2817
|
+
// credential. Strict on shape (length 16-256, isolated token)
|
|
2818
|
+
// to avoid false positives on copy-blog-post-link buttons.
|
|
2819
|
+
if (apiKey === null) {
|
|
2820
|
+
try {
|
|
2821
|
+
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
2822
|
+
for (const candidate of await this.browser.extractCredentialsNearCopyButtons()) {
|
|
2823
|
+
// The candidate is a bare whitespace-isolated token. If it's
|
|
2824
|
+
// a UUID, accept it directly — the Copy-button colocation
|
|
2825
|
+
// is the credential signal we'd otherwise demand a textual
|
|
2826
|
+
// "api key" label for.
|
|
2827
|
+
if (UUID_RE.test(candidate)) {
|
|
2828
|
+
apiKey = candidate;
|
|
2829
|
+
break;
|
|
2830
|
+
}
|
|
2831
|
+
// Otherwise route through the normal extractor — accepts
|
|
2832
|
+
// gh*_*, sk_*, pk_*, Stripe/AWS-style prefixes, JWTs, etc.
|
|
2833
|
+
const hit = extractApiKeyFromText(candidate);
|
|
2834
|
+
if (hit !== null && !isTruncatedCapture(candidate, hit)) {
|
|
2835
|
+
apiKey = hit;
|
|
2836
|
+
break;
|
|
2837
|
+
}
|
|
2838
|
+
}
|
|
2839
|
+
}
|
|
2840
|
+
catch {
|
|
2841
|
+
// Non-fatal — leave apiKey as null and fall through.
|
|
2842
|
+
}
|
|
2843
|
+
}
|
|
2642
2844
|
// Last resort: if every path returned a truncated value, persist
|
|
2643
2845
|
// it with a `_truncated` suffix so the host agent can surface the
|
|
2644
2846
|
// partial result to the user (better than reporting "no key
|