unbrowse 3.1.0-experiments.995f8bb → 3.1.0-experiments.9cbcb13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/dist/cli.js +79 -33
  2. package/dist/index.js +2 -6
  3. package/dist/mcp.js +73 -22
  4. package/dist/server.js +25994 -0
  5. package/package.json +1 -2
  6. package/vendor/kuri/manifest.json +1 -1
  7. package/runtime-src/agent-outcome.ts +0 -166
  8. package/runtime-src/analytics-session.ts +0 -55
  9. package/runtime-src/api/browse-index.ts +0 -343
  10. package/runtime-src/api/browse-session.ts +0 -572
  11. package/runtime-src/api/browse-submit-prereqs.ts +0 -48
  12. package/runtime-src/api/browse-submit.ts +0 -1184
  13. package/runtime-src/api/routes.ts +0 -1833
  14. package/runtime-src/auth/browser-cookies.ts +0 -423
  15. package/runtime-src/auth/index.ts +0 -535
  16. package/runtime-src/auth/runtime.ts +0 -116
  17. package/runtime-src/browser/index.ts +0 -659
  18. package/runtime-src/browser/types.ts +0 -41
  19. package/runtime-src/build-info.generated.ts +0 -6
  20. package/runtime-src/capture/index.ts +0 -1929
  21. package/runtime-src/capture/prefetch.ts +0 -95
  22. package/runtime-src/capture/rsc.ts +0 -45
  23. package/runtime-src/cli/shortcuts.ts +0 -273
  24. package/runtime-src/cli.ts +0 -1734
  25. package/runtime-src/client/graph-client.ts +0 -100
  26. package/runtime-src/client/index.ts +0 -1425
  27. package/runtime-src/debug-trace.ts +0 -18
  28. package/runtime-src/domain.ts +0 -38
  29. package/runtime-src/execution/index.ts +0 -3407
  30. package/runtime-src/execution/retry.ts +0 -46
  31. package/runtime-src/execution/robots.ts +0 -167
  32. package/runtime-src/execution/search-forms.ts +0 -188
  33. package/runtime-src/execution/token-resolver.ts +0 -135
  34. package/runtime-src/extraction/index.ts +0 -1507
  35. package/runtime-src/foundry/publish-bundle.ts +0 -392
  36. package/runtime-src/graph/agent-augment.ts +0 -315
  37. package/runtime-src/graph/index.ts +0 -1532
  38. package/runtime-src/graph/local-fixtures.ts +0 -393
  39. package/runtime-src/graph/local-harness.ts +0 -646
  40. package/runtime-src/graph/planner.ts +0 -411
  41. package/runtime-src/graph/session.ts +0 -294
  42. package/runtime-src/graph/trace-store.ts +0 -136
  43. package/runtime-src/impact-log.ts +0 -227
  44. package/runtime-src/index.ts +0 -24
  45. package/runtime-src/indexer/index.ts +0 -465
  46. package/runtime-src/intent-match.ts +0 -1515
  47. package/runtime-src/kuri/client.ts +0 -1839
  48. package/runtime-src/logger.ts +0 -30
  49. package/runtime-src/marketplace/index.ts +0 -111
  50. package/runtime-src/mcp.ts +0 -1911
  51. package/runtime-src/orchestrator/browser-agent.ts +0 -374
  52. package/runtime-src/orchestrator/dag-advisor.ts +0 -59
  53. package/runtime-src/orchestrator/dag-feedback.ts +0 -257
  54. package/runtime-src/orchestrator/first-pass-action.ts +0 -403
  55. package/runtime-src/orchestrator/index.ts +0 -4480
  56. package/runtime-src/orchestrator/passive-publish.ts +0 -187
  57. package/runtime-src/orchestrator/timing-economics.ts +0 -80
  58. package/runtime-src/payments/cascade.ts +0 -137
  59. package/runtime-src/payments/index.ts +0 -270
  60. package/runtime-src/payments/lobster-pay.ts +0 -182
  61. package/runtime-src/payments/wallet.ts +0 -98
  62. package/runtime-src/publish/review-context.ts +0 -93
  63. package/runtime-src/publish/sanitize.ts +0 -197
  64. package/runtime-src/publish/schema-review.ts +0 -192
  65. package/runtime-src/publish-admission.ts +0 -388
  66. package/runtime-src/ratelimit/index.ts +0 -23
  67. package/runtime-src/reverse-engineer/bundle-scanner.ts +0 -127
  68. package/runtime-src/reverse-engineer/description-prompt.ts +0 -213
  69. package/runtime-src/reverse-engineer/index.ts +0 -1551
  70. package/runtime-src/reverse-engineer/token-sources.ts +0 -379
  71. package/runtime-src/router.ts +0 -17
  72. package/runtime-src/routing-telemetry.ts +0 -395
  73. package/runtime-src/runtime/browser-access.ts +0 -11
  74. package/runtime-src/runtime/browser-auth.ts +0 -12
  75. package/runtime-src/runtime/browser-host.ts +0 -48
  76. package/runtime-src/runtime/lifecycle.ts +0 -17
  77. package/runtime-src/runtime/local-server.ts +0 -311
  78. package/runtime-src/runtime/paths.ts +0 -99
  79. package/runtime-src/runtime/setup.ts +0 -251
  80. package/runtime-src/runtime/supervisor.ts +0 -69
  81. package/runtime-src/runtime/update-hints.ts +0 -351
  82. package/runtime-src/server.ts +0 -100
  83. package/runtime-src/session-logs.ts +0 -142
  84. package/runtime-src/settings.ts +0 -221
  85. package/runtime-src/single-binary.ts +0 -143
  86. package/runtime-src/site-policy.ts +0 -54
  87. package/runtime-src/stale-cleanup-runner.ts +0 -144
  88. package/runtime-src/stale-cleanup.ts +0 -133
  89. package/runtime-src/telemetry-attribution.ts +0 -120
  90. package/runtime-src/telemetry.ts +0 -253
  91. package/runtime-src/template-params.ts +0 -141
  92. package/runtime-src/transform/drift.ts +0 -60
  93. package/runtime-src/transform/index.ts +0 -277
  94. package/runtime-src/types/index.ts +0 -1
  95. package/runtime-src/types/skill.ts +0 -931
  96. package/runtime-src/vault/index.ts +0 -196
  97. package/runtime-src/verification/auth-gate.ts +0 -8
  98. package/runtime-src/verification/candidates.ts +0 -27
  99. package/runtime-src/verification/index.ts +0 -120
  100. package/runtime-src/verification/matrix.ts +0 -30
  101. package/runtime-src/version.ts +0 -148
  102. package/runtime-src/workflow/artifact.ts +0 -161
  103. package/runtime-src/workflow/compile.ts +0 -808
  104. package/runtime-src/workflow/publish.ts +0 -225
  105. package/runtime-src/workflow/runtime.ts +0 -213
@@ -1,379 +0,0 @@
1
- /**
2
- * Token source discovery - finds where a captured token value lives in HTML / JS
3
- * so the value can be re-scraped fresh at replay time.
4
- *
5
- * Input: a concrete token value we saw in a captured request header
6
- * Output: an ordered list of AuthTokenSource locators (meta tag, inline script regex,
7
- * or JS bundle regex) that will re-produce the same value when re-evaluated
8
- *
9
- * The resolver (src/execution/token-resolver.ts) walks these sources in order at
10
- * replay time and injects the freshest value into the outgoing request. This closes
11
- * the gap for sites that rotate CSRF tokens per page-load, keep bearer tokens in JS
12
- * bundles (Twitter public API), or hydrate tokens into window.__INITIAL_STATE__.
13
- */
14
-
15
- import type { AuthTokenSource } from "../types/index.js";
16
-
17
- // Minimum token length to consider - below this, false-positive matches dominate
18
- // (common 8-char hex substrings appear in every page).
19
- const MIN_TOKEN_LENGTH = 16;
20
-
21
- // Maximum sources to return per token - higher values make resolution slower
22
- // without meaningfully improving robustness.
23
- const MAX_SOURCES = 5;
24
-
25
- /**
26
- * Find all deterministic locators that currently produce the given token value
27
- * from the supplied HTML and JS bundle contents. The locators are captured in a
28
- * form that allows re-evaluation at replay time against freshly-fetched page
29
- * state - if the site rotates the token each page-load, the same locator will
30
- * return the new value.
31
- */
32
- export function findTokenSources(
33
- tokenValue: string,
34
- html: string | undefined,
35
- jsBundles?: Map<string, string>,
36
- ): AuthTokenSource[] {
37
- if (!tokenValue || tokenValue.length < MIN_TOKEN_LENGTH) return [];
38
-
39
- const sources: AuthTokenSource[] = [];
40
-
41
- if (html) {
42
- sources.push(...findHtmlMetaSources(tokenValue, html));
43
- sources.push(...findInlineScriptSources(tokenValue, html));
44
- }
45
-
46
- if (jsBundles && jsBundles.size > 0) {
47
- sources.push(...findJsBundleSources(tokenValue, jsBundles));
48
- }
49
-
50
- // Dedupe and cap
51
- const seen = new Set<string>();
52
- const out: AuthTokenSource[] = [];
53
- for (const src of sources) {
54
- const key = JSON.stringify(src);
55
- if (seen.has(key)) continue;
56
- seen.add(key);
57
- out.push(src);
58
- if (out.length >= MAX_SOURCES) break;
59
- }
60
- return out;
61
- }
62
-
63
- // HTML <meta> scanner
64
- // Matches patterns like:
65
- // <meta name="csrf-token" content="ABC123...">
66
- // <meta name="csrfToken" value="ABC123..." />
67
- // <meta property="csrf" content="ABC123...">
68
-
69
- const META_TAG_PATTERN = /<meta\s+([^>]+?)\s*\/?>/gi;
70
-
71
- function findHtmlMetaSources(tokenValue: string, html: string): AuthTokenSource[] {
72
- const out: AuthTokenSource[] = [];
73
- META_TAG_PATTERN.lastIndex = 0;
74
- let match: RegExpExecArray | null;
75
- while ((match = META_TAG_PATTERN.exec(html)) !== null) {
76
- const attrs = match[1];
77
- if (!attrs.includes(tokenValue)) continue;
78
-
79
- const name =
80
- extractAttr(attrs, "name") ??
81
- extractAttr(attrs, "property") ??
82
- extractAttr(attrs, "itemprop");
83
- if (!name) continue;
84
-
85
- for (const attrName of ["content", "value"] as const) {
86
- const attrValue = extractAttr(attrs, attrName);
87
- if (attrValue === tokenValue) {
88
- out.push({
89
- kind: "html-meta",
90
- meta_name: name,
91
- meta_attr: attrName,
92
- });
93
- break;
94
- }
95
- }
96
- }
97
- return out;
98
- }
99
-
100
- function extractAttr(attrs: string, name: string): string | undefined {
101
- const re = new RegExp(`\\b${escapeRegExp(name)}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`, "i");
102
- const m = attrs.match(re);
103
- if (!m) return undefined;
104
- return m[1] ?? m[2] ?? m[3];
105
- }
106
-
107
- // Inline <script> scanner
108
-
109
- const INLINE_SCRIPT_PATTERN = /<script(?:\s[^>]*)?>([\s\S]*?)<\/script>/gi;
110
- const CONTEXT_WINDOW = 48;
111
-
112
- function findInlineScriptSources(tokenValue: string, html: string): AuthTokenSource[] {
113
- const out: AuthTokenSource[] = [];
114
- INLINE_SCRIPT_PATTERN.lastIndex = 0;
115
- let match: RegExpExecArray | null;
116
- while ((match = INLINE_SCRIPT_PATTERN.exec(html)) !== null) {
117
- const scriptBody = match[1];
118
- const idx = scriptBody.indexOf(tokenValue);
119
- if (idx < 0) continue;
120
-
121
- const regex = buildContextRegex(scriptBody, idx, tokenValue);
122
- if (!regex) continue;
123
-
124
- out.push({
125
- kind: "html-inline-script",
126
- inline_script_regex: regex,
127
- });
128
- }
129
- return out;
130
- }
131
-
132
- // JS bundle scanner
133
-
134
- function findJsBundleSources(
135
- tokenValue: string,
136
- bundles: Map<string, string>,
137
- ): AuthTokenSource[] {
138
- const out: AuthTokenSource[] = [];
139
- for (const [bundleUrl, content] of bundles) {
140
- const idx = content.indexOf(tokenValue);
141
- if (idx < 0) continue;
142
-
143
- const regex = buildContextRegex(content, idx, tokenValue);
144
- if (!regex) continue;
145
-
146
- out.push({
147
- kind: "js-bundle",
148
- bundle_url_pattern: bundleUrlPattern(bundleUrl),
149
- bundle_regex: regex,
150
- });
151
- }
152
- return out;
153
- }
154
-
155
- function bundleUrlPattern(bundleUrl: string): string {
156
- try {
157
- const u = new URL(bundleUrl);
158
- const path = u.pathname.replace(/[.-][a-f0-9]{6,}\./, ".");
159
- return path.replace(/\.(m?js)$/i, "");
160
- } catch {
161
- return bundleUrl.replace(/[.-][a-f0-9]{6,}\./, ".").replace(/\.(m?js)$/i, "");
162
- }
163
- }
164
-
165
- // Context regex builder
166
-
167
- function buildContextRegex(
168
- content: string,
169
- tokenIdx: number,
170
- tokenValue: string,
171
- ): string | null {
172
- const start = Math.max(0, tokenIdx - CONTEXT_WINDOW);
173
- let context = content.slice(start, tokenIdx);
174
-
175
- const delimMatch = context.match(/([{,\[(]\s*["'`]?[\w$-]+\s*["'`]?\s*[:=]\s*["'`])[^{,\[(]*$/);
176
- if (delimMatch) {
177
- context = delimMatch[1];
178
- } else {
179
- context = context.trimStart();
180
- if (context.length < 6) return null;
181
- }
182
-
183
- const afterIdx = tokenIdx + tokenValue.length;
184
- const terminator = content.charAt(afterIdx);
185
-
186
- let terminatorPattern: string;
187
- if (terminator === '"') terminatorPattern = '"';
188
- else if (terminator === "'") terminatorPattern = "'";
189
- else if (terminator === "`") terminatorPattern = "`";
190
- else if (terminator === "," || terminator === "}" || terminator === "]" || terminator === ")") {
191
- terminatorPattern = `[${escapeRegExp(terminator)}]`;
192
- } else {
193
- return null;
194
- }
195
-
196
- const tokenCharset = inferTokenCharset(tokenValue);
197
- return `${escapeRegExp(context)}(${tokenCharset}+?)(?=${terminatorPattern})`;
198
- }
199
-
200
- function inferTokenCharset(tokenValue: string): string {
201
- if (/^[A-Za-z0-9+/=]+$/.test(tokenValue)) return "[A-Za-z0-9+/=]";
202
- if (/^[A-Za-z0-9._-]+$/.test(tokenValue)) return "[A-Za-z0-9._-]";
203
- if (/^[A-Fa-f0-9]+$/.test(tokenValue)) return "[A-Fa-f0-9]";
204
- if (/^[A-Za-z0-9]+$/.test(tokenValue)) return "[A-Za-z0-9]";
205
- return "[^\"'`,}\\]\\s)]";
206
- }
207
-
208
- function escapeRegExp(s: string): string {
209
- return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
210
- }
211
-
212
- // Resolver-side helpers
213
-
214
- export function extractTokenFromHtml(
215
- source: AuthTokenSource,
216
- html: string,
217
- ): string | undefined {
218
- if (source.kind === "html-meta" && source.meta_name) {
219
- const attr = source.meta_attr ?? "content";
220
- const re = new RegExp(
221
- `<meta\\s+[^>]*?(?:name|property|itemprop)\\s*=\\s*["']${escapeRegExp(source.meta_name)}["'][^>]*?${attr}\\s*=\\s*["']([^"']+)["'][^>]*>`,
222
- "i",
223
- );
224
- const m = html.match(re);
225
- if (m) return m[1];
226
- const re2 = new RegExp(
227
- `<meta\\s+[^>]*?${attr}\\s*=\\s*["']([^"']+)["'][^>]*?(?:name|property|itemprop)\\s*=\\s*["']${escapeRegExp(source.meta_name)}["'][^>]*>`,
228
- "i",
229
- );
230
- const m2 = html.match(re2);
231
- if (m2) return m2[1];
232
- return undefined;
233
- }
234
-
235
- if (source.kind === "html-inline-script" && source.inline_script_regex) {
236
- const scriptPattern = /<script(?:\s[^>]*)?>([\s\S]*?)<\/script>/gi;
237
- const inner = new RegExp(source.inline_script_regex);
238
- let m: RegExpExecArray | null;
239
- scriptPattern.lastIndex = 0;
240
- while ((m = scriptPattern.exec(html)) !== null) {
241
- const hit = m[1].match(inner);
242
- if (hit && hit[1]) return hit[1];
243
- }
244
- return undefined;
245
- }
246
-
247
- return undefined;
248
- }
249
-
250
- export function extractTokenFromBundle(
251
- source: AuthTokenSource,
252
- bundleContent: string,
253
- ): string | undefined {
254
- if (source.kind !== "js-bundle" || !source.bundle_regex) return undefined;
255
- const m = bundleContent.match(new RegExp(source.bundle_regex));
256
- return m?.[1];
257
- }
258
-
259
- // ─── Endpoint enrichment ──────────────────────────────────────────────────
260
- //
261
- // Walks captured requests, extracts auth-sensitive headers, runs findTokenSources
262
- // for each, and attaches matching TokenBindings to the corresponding endpoints.
263
- //
264
- // This is the capture-side glue that wires HTML/JS token discovery into the DAG.
265
-
266
- import type { EndpointDescriptor, AuthTokenBinding, AuthTokenSource as _TS } from "../types/index.js";
267
-
268
- /** Header names that commonly carry site-computed tokens worth scanning for.
269
- * Bearer auth, CSRF tokens, and transaction IDs are the prime targets. */
270
- const TOKEN_HEADER_PATTERN =
271
- /^(authorization|x-csrf-token|x-xsrf-token|x-csrftoken|csrf-token|x-api-key|x-auth-token|x-access-token|x-session-token|x-shopify-storefront-access-token|x-twitter-auth-type|x-client-transaction-id|x-requested-with|x-client-version)$/i;
272
-
273
- interface MinimalRequest {
274
- url: string;
275
- method: string;
276
- request_headers: Record<string, string>;
277
- }
278
-
279
- /**
280
- * For each captured request that uses a token-bearing header, find that token's
281
- * source locations in the captured HTML / JS bundles and attach AuthTokenBinding
282
- * entries to the endpoint that matches the request URL + method.
283
- *
284
- * Runs in-place: mutates endpoint.auth_tokens.
285
- */
286
- export function enrichEndpointsWithTokenSources(
287
- endpoints: EndpointDescriptor[],
288
- requests: MinimalRequest[],
289
- html: string | undefined,
290
- jsBundles: Map<string, string> | undefined,
291
- ): number {
292
- let enriched = 0;
293
- for (const req of requests) {
294
- const matching = endpoints.filter((ep) => endpointMatchesRequest(ep, req));
295
- if (matching.length === 0) continue;
296
-
297
- for (const [headerName, headerValue] of Object.entries(req.request_headers)) {
298
- if (!TOKEN_HEADER_PATTERN.test(headerName)) continue;
299
- const tokenValue = extractTokenValue(headerName, headerValue);
300
- if (!tokenValue) continue;
301
-
302
- const sources = (html || (jsBundles && jsBundles.size > 0))
303
- ? findTokenSources(tokenValue, html, jsBundles)
304
- : [];
305
-
306
- const lowerName = headerName.toLowerCase();
307
- if (sources.length === 0) {
308
- if (/csrf|xsrf/i.test(lowerName)) {
309
- sources.push({ kind: "cookie", cookie_names: ["ct0", "csrf_token", "_csrf", "csrftoken", "XSRF-TOKEN"] });
310
- } else if (lowerName === "authorization") {
311
- if (html) {
312
- const scriptSrcRe = /<script[^>]+src=["']([^"']+)["']/gi;
313
- let m: RegExpExecArray | null;
314
- while ((m = scriptSrcRe.exec(html)) !== null) {
315
- if (/main|app|client|bundle|vendor/i.test(m[1])) {
316
- sources.push({ kind: "js-bundle", bundle_url_pattern: m[1] });
317
- if (sources.length >= 3) break;
318
- }
319
- }
320
- }
321
- }
322
- }
323
-
324
- if (sources.length === 0) continue;
325
-
326
- const binding: AuthTokenBinding = {
327
- param_name: lowerName,
328
- param_location: "header",
329
- sources,
330
- refresh_on_401: true,
331
- };
332
-
333
- for (const ep of matching) {
334
- if (!ep.auth_tokens) ep.auth_tokens = [];
335
- // Always replace with fresh binding — stale sources from cached merges get overwritten
336
- const idx = ep.auth_tokens.findIndex((b) => b.param_name === binding.param_name);
337
- if (idx >= 0) {
338
- ep.auth_tokens[idx] = binding;
339
- } else {
340
- ep.auth_tokens.push(binding);
341
- }
342
- enriched++;
343
- }
344
- }
345
- }
346
- return enriched;
347
- }
348
-
349
- /** For Authorization: Bearer XYZ → returns "XYZ". For plain headers → returns value as-is. */
350
- function extractTokenValue(headerName: string, headerValue: string): string | null {
351
- if (!headerValue) return null;
352
- if (headerName.toLowerCase() === "authorization") {
353
- const parts = headerValue.split(/\s+/, 2);
354
- if (parts.length === 2 && /^(bearer|token)$/i.test(parts[0])) return parts[1];
355
- return headerValue;
356
- }
357
- return headerValue;
358
- }
359
-
360
- /** Loose match: compare hostname + path prefix of the captured request against the endpoint's url_template. */
361
- function endpointMatchesRequest(ep: EndpointDescriptor, req: MinimalRequest): boolean {
362
- if (ep.method !== req.method) return false;
363
- try {
364
- const reqUrl = new URL(req.url);
365
- const tplUrl = new URL(ep.url_template.replace(/\{[^}]+\}/g, "x"));
366
- if (reqUrl.hostname !== tplUrl.hostname) return false;
367
- // Compare path segment counts + non-template segments
368
- const reqSegs = reqUrl.pathname.split("/").filter(Boolean);
369
- const tplSegs = tplUrl.pathname.split("/").filter(Boolean);
370
- if (reqSegs.length !== tplSegs.length) return false;
371
- for (let i = 0; i < tplSegs.length; i++) {
372
- if (tplSegs[i] === "x") continue; // was a {template} placeholder
373
- if (tplSegs[i] !== reqSegs[i]) return false;
374
- }
375
- return true;
376
- } catch {
377
- return false;
378
- }
379
- }
@@ -1,17 +0,0 @@
1
- import type { EndpointDescriptor } from "./types/index.js";
2
-
3
- export const UNSAFE_ACTION_BLOCK_THRESHOLD = 0.6;
4
-
5
- export function computeUnsafeActionScore(endpoint: EndpointDescriptor): number {
6
- let score = 0;
7
- if (endpoint.idempotency === "unsafe") score += 0.4;
8
- if (endpoint.method === "POST" || endpoint.method === "PUT" || endpoint.method === "DELETE") score += 0.2;
9
- const inferredFromBundle = /inferred from js bundle/i.test(endpoint.description ?? "");
10
- if (inferredFromBundle) score += 0.2;
11
- if (!endpoint.response_schema) score += 0.1;
12
- if (endpoint.verification_status === "failed") score += 0.1;
13
- if (endpoint.reliability_score < 0.3) score += 0.1;
14
- if (endpoint.trigger_url) score -= 0.1;
15
- if (endpoint.verification_status === "verified") score -= 0.15;
16
- return Math.max(0, Math.min(1, score));
17
- }