imprint-mcp 0.4.4 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "imprint-mcp",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.6",
|
|
4
4
|
"description": "Teach an AI agent how to use any website. Once. Records a real browser session + narration; generates a deterministic MCP tool plus a DOM-replay playbook fallback.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -13,6 +13,9 @@ You receive a JSON object:
|
|
|
13
13
|
"narration": [
|
|
14
14
|
{ "timestamp": ms, "text": "what the user said they were doing" }
|
|
15
15
|
],
|
|
16
|
+
"events": [
|
|
17
|
+
{ "seq": int, "timestamp": ms, "type": "click|input|change|submit|navigation", "detail": "truncated browser event detail" }
|
|
18
|
+
],
|
|
16
19
|
"requests": [
|
|
17
20
|
{
|
|
18
21
|
"seq": int,
|
|
@@ -35,6 +38,9 @@ You receive a JSON object:
|
|
|
35
38
|
```
|
|
36
39
|
|
|
37
40
|
The narration is the user's own description of what they did. Use it to understand the workflow's intent, then select the requests that serve that intent.
|
|
41
|
+
The events are the browser actions captured during recording. Use input/change/submit
|
|
42
|
+
event timestamps to disambiguate repeated endpoint calls when narration was spoken
|
|
43
|
+
after the action.
|
|
38
44
|
|
|
39
45
|
Request entries may include `repeatCount`, `repeatedSeqs`, and `lastTimestamp` when identical requests were compacted. Select the representative `seq` unless a specific repeated seq is needed for an intentional multi-step workflow.
|
|
40
46
|
|
|
@@ -46,6 +52,7 @@ Request entries may include `repeatCount`, `repeatedSeqs`, and `lastTimestamp` w
|
|
|
46
52
|
- Data fetches that populate the page the user cared about
|
|
47
53
|
- Navigation documents (the HTML pages the user visited)
|
|
48
54
|
- Lookup or resolution endpoints (anything that converts user input into structured data -- e.g. returning locations, IDs, or options the user selects from)
|
|
55
|
+
- **Credential-bearing requests** -- any request whose body or headers contain `${credential.username}`, `${credential.password}`, or other `${credential.*}` placeholders. These are login/auth requests critical for downstream compilation. Always include them, even if they look like duplicates of other login requests to the same endpoint.
|
|
49
56
|
|
|
50
57
|
**What to EXCLUDE** (even if same-origin):
|
|
51
58
|
- Analytics and telemetry (`/collect`, `/event`, `/track`, `/log`, `/beacon`, `/pixel`, `analytics`, `telemetry`, `metrics`)
|
|
@@ -61,9 +68,12 @@ Request entries may include `repeatCount`, `repeatedSeqs`, and `lastTimestamp` w
|
|
|
61
68
|
|
|
62
69
|
1. **Read the narration first.** It tells you the user's goal -- "searching for flights," "booking a hotel," "checking prices." Every request you select should serve that goal.
|
|
63
70
|
2. **Correlate timestamps.** The narration has timestamps; the requests have timestamps. A request whose timestamp falls near a narration event ("now I clicked search") is likely load-bearing.
|
|
64
|
-
3. **
|
|
65
|
-
|
|
66
|
-
|
|
71
|
+
3. **Use browser events for repeated calls.** If the same endpoint appears more
|
|
72
|
+
than once with different user-controlled values, keep the request closest to
|
|
73
|
+
the input/change/submit event, even if narration came later.
|
|
74
|
+
4. **Prefer POST/PUT/PATCH over GET** when both exist for the same endpoint -- the mutation is usually the load-bearing one.
|
|
75
|
+
5. **When in doubt, include it.** A false positive (including a noise request) is cheaper than a false negative (excluding the result-bearing XHR). The downstream compilation LLM can ignore noise, but it can't work with data it never sees.
|
|
76
|
+
6. **Aim for 5-50 requests** out of potentially hundreds. If you're selecting more than 50, you're probably not filtering aggressively enough. If fewer than 3, double-check you haven't dropped the key data-fetch.
|
|
67
77
|
|
|
68
78
|
## Output
|
|
69
79
|
|
package/src/imprint/compile.ts
CHANGED
|
@@ -31,6 +31,7 @@ import { redactSession } from './redact.ts';
|
|
|
31
31
|
import { compactRequestContexts, requestContextDigest } from './request-context.ts';
|
|
32
32
|
import { ensureImprintRuntimeLink } from './runtime-link.ts';
|
|
33
33
|
import type { ClassifiedValue } from './session-diff.ts';
|
|
34
|
+
import { isTelemetryRequest } from './telemetry.ts';
|
|
34
35
|
import type { SharedCompileContext, ToolCandidate } from './tool-candidates.ts';
|
|
35
36
|
import { setSpanAttributes, traced } from './tracing.ts';
|
|
36
37
|
import {
|
|
@@ -280,6 +281,19 @@ function safeUrl(s: string): URL | null {
|
|
|
280
281
|
}
|
|
281
282
|
}
|
|
282
283
|
|
|
284
|
+
// ─── Credential-bearing request detection ───────────────────────────────────
|
|
285
|
+
|
|
286
|
+
const CREDENTIAL_PLACEHOLDER_RE = /\$\{credential\.[^}]+\}/;
|
|
287
|
+
|
|
288
|
+
export function findCredentialBearingSeqs(session: Session): number[] {
|
|
289
|
+
const seqs: number[] = [];
|
|
290
|
+
for (const r of session.requests) {
|
|
291
|
+
const text = `${r.url}\n${JSON.stringify(r.headers)}\n${r.body ?? ''}`;
|
|
292
|
+
if (CREDENTIAL_PLACEHOLDER_RE.test(text)) seqs.push(r.seq);
|
|
293
|
+
}
|
|
294
|
+
return seqs;
|
|
295
|
+
}
|
|
296
|
+
|
|
283
297
|
// ─── triageRequests (LLM-based request filtering) ───────────────────────────
|
|
284
298
|
|
|
285
299
|
const TRIAGE_RESOURCE_TYPES = new Set(['XHR', 'Fetch', 'Document']);
|
|
@@ -288,6 +302,20 @@ const HEADER_TRUNCATE_LIMIT = 200;
|
|
|
288
302
|
// data-bearing POSTs (search/booking) from telemetry; full bodies on a busy
|
|
289
303
|
// site can total >1MB and blow the 200K-token cap on `claude-opus-4-8`.
|
|
290
304
|
const TRIAGE_BODY_LIMIT = 500;
|
|
305
|
+
const TRIAGE_ACTION_ALIGNMENT_BEFORE_MS = 1000;
|
|
306
|
+
const TRIAGE_ACTION_ALIGNMENT_AFTER_MS = 5000;
|
|
307
|
+
const TRIAGE_CONTEXT_EVENT_TYPES = new Set<Session['events'][number]['type']>([
|
|
308
|
+
'navigation',
|
|
309
|
+
'click',
|
|
310
|
+
'input',
|
|
311
|
+
'change',
|
|
312
|
+
'submit',
|
|
313
|
+
]);
|
|
314
|
+
const TRIAGE_ACTION_EVENT_TYPES = new Set<Session['events'][number]['type']>([
|
|
315
|
+
'input',
|
|
316
|
+
'change',
|
|
317
|
+
'submit',
|
|
318
|
+
]);
|
|
291
319
|
|
|
292
320
|
export interface TriageResult {
|
|
293
321
|
session: Session;
|
|
@@ -317,6 +345,13 @@ interface TriageRequestContext {
|
|
|
317
345
|
lastTimestamp?: number;
|
|
318
346
|
}
|
|
319
347
|
|
|
348
|
+
interface TriageEventContext {
|
|
349
|
+
seq: number;
|
|
350
|
+
timestamp: number;
|
|
351
|
+
type: Session['events'][number]['type'];
|
|
352
|
+
detail: string;
|
|
353
|
+
}
|
|
354
|
+
|
|
320
355
|
export async function triageRequests(
|
|
321
356
|
session: Session,
|
|
322
357
|
llmConfig?: LLMOptions,
|
|
@@ -369,6 +404,7 @@ export async function triageRequests(
|
|
|
369
404
|
site: session.site,
|
|
370
405
|
url: session.url,
|
|
371
406
|
narration: session.narration,
|
|
407
|
+
events: buildTriageEventContexts(session),
|
|
372
408
|
requests: metadata,
|
|
373
409
|
};
|
|
374
410
|
|
|
@@ -408,7 +444,8 @@ export async function triageRequests(
|
|
|
408
444
|
);
|
|
409
445
|
}
|
|
410
446
|
|
|
411
|
-
const
|
|
447
|
+
const rescuedSeqs = rescueActionAlignedRepeatedSeqs(session, seqs as number[], compacted);
|
|
448
|
+
const selectedSet = new Set([...(seqs as number[]), ...rescuedSeqs, ...preserveSeqs]);
|
|
412
449
|
const triaged: Session = {
|
|
413
450
|
...session,
|
|
414
451
|
requests: session.requests.filter((r) => selectedSet.has(r.seq)),
|
|
@@ -436,6 +473,63 @@ export async function triageRequests(
|
|
|
436
473
|
);
|
|
437
474
|
}
|
|
438
475
|
|
|
476
|
+
export function buildTriageEventContexts(session: Session): TriageEventContext[] {
|
|
477
|
+
return session.events
|
|
478
|
+
.filter((event) => TRIAGE_CONTEXT_EVENT_TYPES.has(event.type))
|
|
479
|
+
.map((event) => ({
|
|
480
|
+
seq: event.seq,
|
|
481
|
+
timestamp: event.timestamp,
|
|
482
|
+
type: event.type,
|
|
483
|
+
detail: truncate(event.detail, TRIAGE_BODY_LIMIT) ?? '',
|
|
484
|
+
}));
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
export function rescueActionAlignedRepeatedSeqs(
|
|
488
|
+
session: Session,
|
|
489
|
+
selectedSeqs: Iterable<number>,
|
|
490
|
+
compactedRequests: TriageRequestContext[],
|
|
491
|
+
): number[] {
|
|
492
|
+
const selectedSet = new Set(selectedSeqs);
|
|
493
|
+
const requestBySeq = new Map(session.requests.map((request) => [request.seq, request]));
|
|
494
|
+
const actionTimestamps = session.events
|
|
495
|
+
.filter((event) => TRIAGE_ACTION_EVENT_TYPES.has(event.type))
|
|
496
|
+
.map((event) => event.timestamp);
|
|
497
|
+
if (actionTimestamps.length === 0) return [];
|
|
498
|
+
|
|
499
|
+
const rescued = new Set<number>();
|
|
500
|
+
for (const request of compactedRequests) {
|
|
501
|
+
const repeatedSeqs = request.repeatedSeqs ?? [];
|
|
502
|
+
if (repeatedSeqs.length === 0) continue;
|
|
503
|
+
if (!selectedSet.has(request.seq) && !repeatedSeqs.some((seq) => selectedSet.has(seq))) {
|
|
504
|
+
continue;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
for (const seq of repeatedSeqs) {
|
|
508
|
+
if (selectedSet.has(seq)) continue;
|
|
509
|
+
const original = requestBySeq.get(seq);
|
|
510
|
+
if (!original) continue;
|
|
511
|
+
if (!isTriageRescueCandidate(original)) continue;
|
|
512
|
+
if (!isNearActionEvent(original.timestamp, actionTimestamps)) continue;
|
|
513
|
+
rescued.add(seq);
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
return [...rescued].sort((a, b) => a - b);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
function isTriageRescueCandidate(request: Session['requests'][number]): boolean {
|
|
521
|
+
if (request.resourceType !== 'XHR' && request.resourceType !== 'Fetch') return false;
|
|
522
|
+
return !isTelemetryRequest(request);
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
function isNearActionEvent(timestamp: number, actionTimestamps: number[]): boolean {
|
|
526
|
+
return actionTimestamps.some(
|
|
527
|
+
(eventTimestamp) =>
|
|
528
|
+
timestamp >= eventTimestamp - TRIAGE_ACTION_ALIGNMENT_BEFORE_MS &&
|
|
529
|
+
timestamp <= eventTimestamp + TRIAGE_ACTION_ALIGNMENT_AFTER_MS,
|
|
530
|
+
);
|
|
531
|
+
}
|
|
532
|
+
|
|
439
533
|
function triageRequestGroupKey(request: TriageRequestContext): unknown[] {
|
|
440
534
|
let urlKey: string = request.url;
|
|
441
535
|
let paramSignature = '';
|
package/src/imprint/teach.ts
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
type CompileAgentProgress,
|
|
28
28
|
type TriageResult,
|
|
29
29
|
compilePlaybook,
|
|
30
|
+
findCredentialBearingSeqs,
|
|
30
31
|
generate,
|
|
31
32
|
triageRequests,
|
|
32
33
|
} from './compile.ts';
|
|
@@ -826,11 +827,25 @@ export async function teach(opts: TeachOptions): Promise<TeachResult> {
|
|
|
826
827
|
const model = await getModel();
|
|
827
828
|
mp.pause();
|
|
828
829
|
mp.clear();
|
|
830
|
+
const credentialSeqs = findCredentialBearingSeqs(triageSession);
|
|
829
831
|
spinner.start('Triaging requests');
|
|
830
|
-
localTriageResult = await triageRequests(
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
832
|
+
localTriageResult = await triageRequests(
|
|
833
|
+
triageSession,
|
|
834
|
+
{
|
|
835
|
+
provider: providerName,
|
|
836
|
+
model,
|
|
837
|
+
},
|
|
838
|
+
credentialSeqs.length > 0
|
|
839
|
+
? {
|
|
840
|
+
sharedContext: {
|
|
841
|
+
loginRequestSeqs: credentialSeqs,
|
|
842
|
+
credentialNames: [],
|
|
843
|
+
tokenExtractionNotes: '',
|
|
844
|
+
sharedHelperNotes: '',
|
|
845
|
+
},
|
|
846
|
+
}
|
|
847
|
+
: {},
|
|
848
|
+
);
|
|
834
849
|
spinner.stop(
|
|
835
850
|
`Triaged to ${localTriageResult.selectedSeqs.length} requests (from ${triageSession.requests.length}).`,
|
|
836
851
|
);
|
|
@@ -868,6 +883,7 @@ export async function teach(opts: TeachOptions): Promise<TeachResult> {
|
|
|
868
883
|
sessionPath: compileSessionPath,
|
|
869
884
|
providerName,
|
|
870
885
|
model,
|
|
886
|
+
trustSessionScope: !!localTriagedPath,
|
|
871
887
|
});
|
|
872
888
|
spinner.stop(
|
|
873
889
|
`Detected ${detection.candidates.length} candidate tool${detection.candidates.length === 1 ? '' : 's'}.`,
|
|
@@ -1288,6 +1304,7 @@ async function detectTeachCandidates(opts: {
|
|
|
1288
1304
|
sessionPath: string;
|
|
1289
1305
|
providerName: ProviderName;
|
|
1290
1306
|
model?: string;
|
|
1307
|
+
trustSessionScope?: boolean;
|
|
1291
1308
|
}): Promise<Awaited<ReturnType<typeof detectToolCandidates>>> {
|
|
1292
1309
|
const session = loadJsonFile(
|
|
1293
1310
|
opts.sessionPath,
|
|
@@ -1298,7 +1315,11 @@ async function detectTeachCandidates(opts: {
|
|
|
1298
1315
|
},
|
|
1299
1316
|
'session',
|
|
1300
1317
|
);
|
|
1301
|
-
return await detectToolCandidates(
|
|
1318
|
+
return await detectToolCandidates(
|
|
1319
|
+
session,
|
|
1320
|
+
{ provider: opts.providerName, model: opts.model },
|
|
1321
|
+
{ trustSessionScope: opts.trustSessionScope },
|
|
1322
|
+
);
|
|
1302
1323
|
}
|
|
1303
1324
|
|
|
1304
1325
|
async function selectTeachCandidates(
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
interface TelemetryRequestLike {
|
|
2
|
+
method: string;
|
|
3
|
+
url: string;
|
|
4
|
+
body?: string;
|
|
5
|
+
response?: {
|
|
6
|
+
body?: string;
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const HARD_TELEMETRY_PATH_PATTERN =
|
|
11
|
+
/\/(log|gen_204|jserror|ping|beacon|csi|batchlog|metrics|stats|collect|analytics|adsct|pagead|ccm)(?=$|[/?])/i;
|
|
12
|
+
const TERMINAL_EVENT_PATH_PATTERN = /\/events?\/?$/i;
|
|
13
|
+
const EVENT_COLLECTOR_BODY_PATTERNS = [
|
|
14
|
+
/"app_(?:version|build)"/i,
|
|
15
|
+
/"browser_(?:name|version)"/i,
|
|
16
|
+
/"device_(?:environment|locale|make|model)"/i,
|
|
17
|
+
/"event_(?:id|name|type)"/i,
|
|
18
|
+
/"os(?:_version)?"/i,
|
|
19
|
+
/"screen_(?:height|scale_factor|width)"/i,
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
function isTelemetryPath(pathname: string): boolean {
|
|
23
|
+
return HARD_TELEMETRY_PATH_PATTERN.test(pathname);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function isTelemetryRequest(request: TelemetryRequestLike): boolean {
|
|
27
|
+
let url: URL;
|
|
28
|
+
try {
|
|
29
|
+
url = new URL(request.url);
|
|
30
|
+
} catch {
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (isTelemetryPath(url.pathname)) return true;
|
|
35
|
+
if (!TERMINAL_EVENT_PATH_PATTERN.test(url.pathname)) return false;
|
|
36
|
+
if (request.method.toUpperCase() !== 'POST') return false;
|
|
37
|
+
if (!hasEmptyResponse(request)) return false;
|
|
38
|
+
|
|
39
|
+
const body = request.body ?? '';
|
|
40
|
+
return EVENT_COLLECTOR_BODY_PATTERNS.filter((pattern) => pattern.test(body)).length >= 2;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function hasEmptyResponse(request: TelemetryRequestLike): boolean {
|
|
44
|
+
const body = request.response?.body;
|
|
45
|
+
return body === undefined || body.trim().length === 0;
|
|
46
|
+
}
|
|
@@ -14,6 +14,7 @@ import { isSameRegistrableDomain, registrableDomain } from './etld.ts';
|
|
|
14
14
|
import { type LLMOptions, extractJsonObject, resolveProvider } from './llm.ts';
|
|
15
15
|
import { createLog } from './log.ts';
|
|
16
16
|
import { compactRequestContexts, requestContextDigest } from './request-context.ts';
|
|
17
|
+
import { isTelemetryRequest } from './telemetry.ts';
|
|
17
18
|
import { setSpanAttributes, traced } from './tracing.ts';
|
|
18
19
|
import type { CapturedRequest, Session } from './types.ts';
|
|
19
20
|
|
|
@@ -120,9 +121,10 @@ export type ToolCandidate = z.infer<typeof ToolCandidateSchema>;
|
|
|
120
121
|
const ToolCandidateDetectionSchema = z
|
|
121
122
|
.object({
|
|
122
123
|
sharedContext: SharedCompileContextSchema.default({}),
|
|
123
|
-
candidates: z.array(ToolCandidateSchema)
|
|
124
|
+
candidates: z.array(ToolCandidateSchema),
|
|
124
125
|
})
|
|
125
126
|
.superRefine((value, ctx) => {
|
|
127
|
+
if (value.candidates.length === 0) return;
|
|
126
128
|
const primaryCount = value.candidates.filter((c) => c.primary).length;
|
|
127
129
|
if (primaryCount !== 1) {
|
|
128
130
|
ctx.addIssue({
|
|
@@ -151,9 +153,19 @@ interface DetectToolCandidatesResult extends ToolCandidateDetection {
|
|
|
151
153
|
durationMs: number;
|
|
152
154
|
}
|
|
153
155
|
|
|
156
|
+
interface DetectToolCandidatesOptions {
|
|
157
|
+
/**
|
|
158
|
+
* The input session has already been reduced by request triage. Trust that
|
|
159
|
+
* selected XHR/Fetch scope instead of re-applying the raw-session origin
|
|
160
|
+
* heuristic, which would drop public cross-origin APIs such as api.remitly.io.
|
|
161
|
+
*/
|
|
162
|
+
trustSessionScope?: boolean;
|
|
163
|
+
}
|
|
164
|
+
|
|
154
165
|
export async function detectToolCandidates(
|
|
155
166
|
session: Session,
|
|
156
167
|
llmConfig?: LLMOptions,
|
|
168
|
+
opts: DetectToolCandidatesOptions = {},
|
|
157
169
|
): Promise<DetectToolCandidatesResult> {
|
|
158
170
|
return await traced(
|
|
159
171
|
'teach.detect_tool_candidates',
|
|
@@ -171,13 +183,25 @@ export async function detectToolCandidates(
|
|
|
171
183
|
);
|
|
172
184
|
}
|
|
173
185
|
const systemPrompt = readFileSync(promptPath, 'utf8');
|
|
174
|
-
const payload = buildToolCandidatePayload(session
|
|
186
|
+
const payload = buildToolCandidatePayload(session, {
|
|
187
|
+
trustSessionScope: opts.trustSessionScope,
|
|
188
|
+
});
|
|
175
189
|
|
|
176
190
|
setSpanAttributes(span, {
|
|
177
191
|
'imprint.events_considered': payload.events.length,
|
|
178
192
|
'imprint.requests_considered': payload.requests.length,
|
|
179
193
|
});
|
|
180
194
|
|
|
195
|
+
if (payload.requests.length === 0) {
|
|
196
|
+
throw new Error(
|
|
197
|
+
[
|
|
198
|
+
'Candidate detection received no eligible XHR/Fetch requests.',
|
|
199
|
+
'Imprint needs at least one data-bearing request to compile a tool.',
|
|
200
|
+
'This usually means triage removed the load-bearing API call, the recording only captured page/static traffic, or the workflow uses a browser-local calculation with no backend request.',
|
|
201
|
+
].join('\n'),
|
|
202
|
+
);
|
|
203
|
+
}
|
|
204
|
+
|
|
181
205
|
log(
|
|
182
206
|
`detecting candidate tools from ${payload.events.length} event(s), ${payload.requests.length} request(s)…`,
|
|
183
207
|
);
|
|
@@ -250,6 +274,14 @@ export async function detectToolCandidates(
|
|
|
250
274
|
export function validateToolCandidateDetection(input: unknown): ToolCandidateDetection {
|
|
251
275
|
const raw = ToolCandidateDetectionSchema.parse(input);
|
|
252
276
|
const before = raw.candidates.length;
|
|
277
|
+
if (before === 0) {
|
|
278
|
+
throw new Error(
|
|
279
|
+
[
|
|
280
|
+
'Candidate detector did not identify any tool candidates backed by requests.',
|
|
281
|
+
'Imprint needs at least one candidate with requestSeqs so the compiler has an API call to replay.',
|
|
282
|
+
].join('\n'),
|
|
283
|
+
);
|
|
284
|
+
}
|
|
253
285
|
raw.candidates = raw.candidates.filter((c) => c.requestSeqs.length > 0);
|
|
254
286
|
if (raw.candidates.length === 0) {
|
|
255
287
|
throw new Error(
|
|
@@ -316,12 +348,19 @@ interface ToolCandidatePayload {
|
|
|
316
348
|
requests: CandidateRequestPayload[];
|
|
317
349
|
}
|
|
318
350
|
|
|
319
|
-
export function buildToolCandidatePayload(
|
|
351
|
+
export function buildToolCandidatePayload(
|
|
352
|
+
session: Session,
|
|
353
|
+
opts: DetectToolCandidatesOptions = {},
|
|
354
|
+
): ToolCandidatePayload {
|
|
320
355
|
const startRoot = candidateStartRoot(session);
|
|
321
356
|
const appApiHosts = inferAppApiHosts(session, startRoot);
|
|
322
357
|
const requests = compactRequestContexts(
|
|
323
358
|
session.requests
|
|
324
|
-
.filter((request) =>
|
|
359
|
+
.filter((request) =>
|
|
360
|
+
isCandidateRequest(request, startRoot, appApiHosts, {
|
|
361
|
+
trustSessionScope: opts.trustSessionScope,
|
|
362
|
+
}),
|
|
363
|
+
)
|
|
325
364
|
.map((request) => {
|
|
326
365
|
const body = truncate(request.body, BODY_LIMIT);
|
|
327
366
|
const responsePreview = truncate(request.response?.body, RESPONSE_PREVIEW_LIMIT);
|
|
@@ -408,9 +447,6 @@ function candidateRequestGroupKey(request: CandidateRequestPayload): unknown[] {
|
|
|
408
447
|
* and — worse — the detector can anchor a candidate's `requestSeqs` on one
|
|
409
448
|
* (e.g. Google's `/log`), sending compile to reverse-engineer a beacon. Excluded
|
|
410
449
|
* entirely. The boundary lookahead keeps `/login`, `/catalog`, etc. safe. */
|
|
411
|
-
const TELEMETRY_PATH =
|
|
412
|
-
/\/(log|gen_204|jserror|ping|beacon|csi|batchlog|metrics|stats|collect|analytics|adsct|pagead|ccm)(?=$|[/?])/i;
|
|
413
|
-
|
|
414
450
|
/** Count distinct endpoint families (batchexecute rpcid, else METHOD+path) that
|
|
415
451
|
* carry a non-trivial number of requests. ≥2 means the session genuinely hit
|
|
416
452
|
* multiple backends — a single detected candidate there signals under-
|
|
@@ -433,11 +469,13 @@ function isCandidateRequest(
|
|
|
433
469
|
request: CapturedRequest,
|
|
434
470
|
startRoot: string | null,
|
|
435
471
|
appApiHosts: Set<string>,
|
|
472
|
+
opts: DetectToolCandidatesOptions = {},
|
|
436
473
|
): boolean {
|
|
437
474
|
if (request.resourceType !== 'XHR' && request.resourceType !== 'Fetch') return false;
|
|
438
475
|
const url = safeUrl(request.url);
|
|
439
476
|
if (!url) return false;
|
|
440
|
-
if (
|
|
477
|
+
if (isTelemetryRequest(request)) return false;
|
|
478
|
+
if (opts.trustSessionScope) return true;
|
|
441
479
|
if (startRoot && !isSameRegistrableDomain(url.hostname, startRoot)) {
|
|
442
480
|
return appApiHosts.has(url.hostname);
|
|
443
481
|
}
|