@apitap/core 1.5.2 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,10 +3,129 @@
3
3
  const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
4
4
  const PURE_NUMERIC_RE = /^\d+$/;
5
5
  const LONG_DIGITS_RE = /\d{8,}/;
6
+ const LONG_HEX_RE = /^[0-9a-f]{16,}$/i;
6
7
  const NEXT_DATA_PREFIX_RE = /^\/_next\/data\/[^/]+\//;
7
8
 
8
9
  /**
9
- * Check if a path segment is a dynamic value that should be parameterized.
10
+ * Resource nouns: segments that name a collection of resources.
11
+ * The value is the param name(s) for the slot(s) that follow.
12
+ * e.g. "repos" expects two dynamic segments: :owner/:repo
13
+ */
14
+ const RESOURCE_NOUNS = new Map<string, string[]>([
15
+ // Code hosting / generic REST
16
+ ['repos', [':owner', ':repo']],
17
+ ['users', [':username']],
18
+ ['orgs', [':org']],
19
+ ['organizations', [':org']],
20
+ ['teams', [':team']],
21
+ ['members', [':member']],
22
+ ['projects', [':project']],
23
+ ['gists', [':gist_id']],
24
+ ['issues', [':issue_number']],
25
+ ['pulls', [':pull_number']],
26
+ ['commits', [':sha']],
27
+ ['branches', [':branch']],
28
+ ['tags', [':tag']],
29
+ ['releases', [':release_id']],
30
+ ['milestones', [':milestone']],
31
+ ['labels', [':label']],
32
+ ['hooks', [':hook_id']],
33
+ ['keys', [':key_id']],
34
+ ['deployments', [':deployment_id']],
35
+ ['environments', [':env']],
36
+ ['runs', [':run_id']],
37
+ ['jobs', [':job_id']],
38
+ ['artifacts', [':artifact_id']],
39
+ ['packages', [':package']],
40
+
41
+ // Content / social
42
+ ['posts', [':post_id']],
43
+ ['comments', [':comment_id']],
44
+ ['articles', [':article_id']],
45
+ ['stories', [':story_id']],
46
+ ['threads', [':thread_id']],
47
+ ['messages', [':message_id']],
48
+ ['channels', [':channel']],
49
+ ['videos', [':video_id']],
50
+ ['playlists', [':playlist_id']],
51
+ ['tracks', [':track_id']],
52
+ ['albums', [':album_id']],
53
+ ['artists', [':artist_id']],
54
+ ['images', [':image_id']],
55
+ ['files', [':file_id']],
56
+ ['documents', [':doc_id']],
57
+ ['folders', [':folder_id']],
58
+ ['collections', [':collection_id']],
59
+ ['categories', [':category']],
60
+
61
+ // E-commerce
62
+ ['products', [':product_id']],
63
+ ['items', [':item_id']],
64
+ ['orders', [':order_id']],
65
+ ['customers', [':customer_id']],
66
+ ['carts', [':cart_id']],
67
+ ['stores', [':store_id']],
68
+ ['reviews', [':review_id']],
69
+
70
+ // Infrastructure / ops
71
+ ['accounts', [':account_id']],
72
+ ['workspaces', [':workspace']],
73
+ ['databases', [':database']],
74
+ ['tables', [':table']],
75
+ ['namespaces', [':namespace']],
76
+ ['clusters', [':cluster']],
77
+ ['instances', [':instance']],
78
+ ['regions', [':region']],
79
+ ['zones', [':zone']],
80
+ ['resources', [':resource_id']],
81
+ ['subscriptions', [':subscription_id']],
82
+ ['tenants', [':tenant_id']],
83
+ ['groups', [':group_id']],
84
+ ['roles', [':role']],
85
+ ['policies', [':policy']],
86
+ ['tokens', [':token_id']],
87
+ ['sessions', [':session_id']],
88
+ ['events', [':event_id']],
89
+ ['logs', [':log_id']],
90
+ ['metrics', [':metric']],
91
+ ['alerts', [':alert_id']],
92
+ ['notifications', [':notification_id']],
93
+ ['webhooks', [':webhook_id']],
94
+
95
+ // Media
96
+ ['media', [':media_id']],
97
+ ['assets', [':asset_id']],
98
+ ['uploads', [':upload_id']],
99
+ ]);
100
+
101
+ /**
102
+ * Segments that are always structural (never parameterized).
103
+ * Includes version prefixes, action verbs, and all RESOURCE_NOUNS keys.
104
+ */
105
+ const STRUCTURAL_SEGMENTS = new Set<string>([
106
+ // Version prefixes
107
+ 'api', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10',
108
+ // Actions / sub-resources
109
+ 'search', 'filter', 'sort', 'query', 'list', 'create', 'update', 'delete',
110
+ 'status', 'config', 'settings', 'preferences', 'profile', 'info', 'details',
111
+ 'stats', 'analytics', 'count', 'batch', 'bulk', 'export', 'import',
112
+ 'auth', 'login', 'logout', 'register', 'signup', 'signin', 'callback',
113
+ 'oauth', 'token', 'refresh', 'verify', 'confirm', 'reset', 'activate',
114
+ 'public', 'private', 'internal', 'external', 'admin', 'management',
115
+ 'graphql', 'gql', 'rest', 'rpc', 'ws', 'websocket', 'stream', 'feed',
116
+ 'health', 'ping', 'version', 'manifest', 'metadata', 'schema',
117
+ 'upload', 'download', 'preview', 'thumbnail', 'embed',
118
+ 'latest', 'trending', 'popular', 'featured', 'recommended', 'top', 'new',
119
+ 'web', 'app', 'mobile', 'desktop', 'data', 'raw', 'render',
120
+ 'consent', 'wrapper', 'widget', 'integrity', 'pathfinder', 'rum',
121
+ // All resource noun keys are also structural — this means a noun after itself
122
+ // (e.g. /repos/repos) treats the second "repos" as structural, not a param slot.
123
+ // That's a degenerate case and the correct behavior: we preserve the literal.
124
+ ...RESOURCE_NOUNS.keys(),
125
+ ]);
126
+
127
+ /**
128
+ * Check if a path segment is a dynamic value based on its structure alone.
10
129
  * Returns the parameter name (:id, :hash, :slug) or null if static.
11
130
  */
12
131
  function classifySegment(segment: string): string | null {
@@ -16,6 +135,9 @@ function classifySegment(segment: string): string | null {
16
135
  // UUID → :id
17
136
  if (UUID_RE.test(segment)) return ':id';
18
137
 
138
+ // Long hex string (16+ hex chars) → :hash
139
+ if (LONG_HEX_RE.test(segment)) return ':hash';
140
+
19
141
  // Slug with embedded long number (8+ consecutive digits) — check before hash
20
142
  // because slugs like "btc-updown-15m-1770254100" would also match the hash rule
21
143
  if (LONG_DIGITS_RE.test(segment)) {
@@ -34,20 +156,94 @@ function classifySegment(segment: string): string | null {
34
156
  }
35
157
 
36
158
  /**
37
- * Replace dynamic path segments with :param placeholders.
159
+ * Check if a segment looks like a lowercase word or hyphenated compound word.
160
+ * e.g. "search", "location-metadata", "top-rated" → true
161
+ * e.g. "n1byn1kt", "OxItOzEC", "ABC-123" → false
162
+ */
163
+ function looksLikeWord(segment: string): boolean {
164
+ return /^[a-z][a-z-]*[a-z]$/.test(segment)
165
+ && segment.split('-').every(part => /^[a-z]{2,}$/.test(part));
166
+ }
167
+
168
+ /**
169
+ * Replace dynamic path segments with named :param placeholders.
38
170
  *
39
- * Rules:
40
- * - Pure numeric :id
41
- * - UUID :id
42
- * - 12+ alphanum with mixed letters+digits → :hash
43
- * - Contains 8+ consecutive digits :slug
171
+ * Three-layer approach:
172
+ * 1. Structural detection: UUIDs, numbers, hashes, long-digit slugs
173
+ * 2. Context-aware: segments following a known resource noun get a
174
+ * semantically named param (e.g. /repos/:owner/:repo)
175
+ * 3. Heuristic fallback: non-word segments after structural segments
176
+ * are parameterized as :id
44
177
  */
45
178
  export function parameterizePath(path: string): string {
46
179
  const segments = path.split('/');
47
- const result = segments.map(seg => {
48
- if (seg === '') return seg;
49
- return classifySegment(seg) ?? seg;
50
- });
180
+ const result: string[] = [];
181
+ let nounSlots: string[] = [];
182
+
183
+ for (let i = 0; i < segments.length; i++) {
184
+ const seg = segments[i];
185
+
186
+ // Preserve empty segments (leading/trailing slashes)
187
+ if (seg === '') { result.push(seg); continue; }
188
+
189
+ const lower = seg.toLowerCase();
190
+
191
+ // Layer 1: Always parameterize structurally obvious dynamic values
192
+ // Order matters: pure numeric before long-digits (1770254100 is numeric, not a slug)
193
+ if (UUID_RE.test(seg)) { result.push(':id'); nounSlots = []; continue; }
194
+
195
+ if (PURE_NUMERIC_RE.test(seg)) {
196
+ // Use noun-derived name if available, otherwise :id
197
+ const name = nounSlots.length > 0 ? nounSlots.shift()! : ':id';
198
+ result.push(name);
199
+ continue;
200
+ }
201
+
202
+ if (LONG_HEX_RE.test(seg)) { result.push(':hash'); nounSlots = []; continue; }
203
+
204
+ // Slug with embedded long number (8+ digits mixed with text)
205
+ // Pure-numeric already handled above, so this only fires on mixed segments
206
+ if (LONG_DIGITS_RE.test(seg)) { result.push(':slug'); nounSlots = []; continue; }
207
+
208
+ // Hash-like: 12+ mixed alphanum (catches remaining patterns)
209
+ const structural = classifySegment(seg);
210
+ if (structural) { result.push(structural); nounSlots = []; continue; }
211
+
212
+ // Layer 2: Known resource noun → keep it, queue param names for following segments
213
+ if (RESOURCE_NOUNS.has(lower)) {
214
+ result.push(seg);
215
+ nounSlots = [...RESOURCE_NOUNS.get(lower)!];
216
+ continue;
217
+ }
218
+
219
+ // Structural segment → keep as-is, reset slots
220
+ if (STRUCTURAL_SEGMENTS.has(lower)) {
221
+ result.push(seg);
222
+ nounSlots = [];
223
+ continue;
224
+ }
225
+
226
+ // Fill a queued noun slot (e.g. "n1byn1kt" after "repos")
227
+ if (nounSlots.length > 0) {
228
+ result.push(nounSlots.shift()!);
229
+ continue;
230
+ }
231
+
232
+ // Layer 3: Heuristic — segment after a structural segment that doesn't
233
+ // look like a plain English word is likely a dynamic value
234
+ const prevSeg = i > 0 ? segments[i - 1]?.toLowerCase() : '';
235
+ const prevIsStructural = STRUCTURAL_SEGMENTS.has(prevSeg) || RESOURCE_NOUNS.has(prevSeg);
236
+
237
+ if (prevIsStructural && seg.length >= 2 && !looksLikeWord(seg)) {
238
+ result.push(':id');
239
+ continue;
240
+ }
241
+
242
+ // Default: keep as-is
243
+ result.push(seg);
244
+ nounSlots = [];
245
+ }
246
+
51
247
  return result.join('/');
52
248
  }
53
249
 
@@ -4,7 +4,7 @@ import { randomUUID } from 'node:crypto';
4
4
  import { shouldCapture } from './filter.js';
5
5
  import { launchBrowser, normalizeCookiesForStorageState } from './browser.js';
6
6
  import { isDomainMatch } from './domain.js';
7
- import { SkillGenerator, type GeneratorOptions } from '../skill/generator.js';
7
+ import { SkillGenerator, deduplicateAuth, type GeneratorOptions } from '../skill/generator.js';
8
8
  import { detectCaptcha } from '../auth/refresh.js';
9
9
  import { verifyEndpoints } from './verifier.js';
10
10
  import { signSkillFile } from '../skill/signing.js';
@@ -133,11 +133,21 @@ export class CaptureSession {
133
133
  case 'navigate': {
134
134
  if (!action.url) return { success: false, error: 'url required for navigate', snapshot: await this.takeSnapshot() };
135
135
 
136
- // M7 fix: Full SSRF validation on navigate URLs (same checks as ssrf.ts)
137
- const { validateUrl: validateNavUrl } = await import('../skill/ssrf.js');
138
- const navResult = validateNavUrl(action.url);
139
- if (!navResult.safe) {
140
- return { success: false, error: `Navigation blocked: ${navResult.reason}`, snapshot: await this.takeSnapshot() };
136
+ // M7 fix: SSRF validation on navigate URLs skip for same-origin
137
+ // (session.start() already navigated to targetUrl, so same-origin is trusted)
138
+ let sameOrigin = false;
139
+ try {
140
+ sameOrigin = this.targetUrl !== '' && new URL(action.url).origin === new URL(this.targetUrl).origin;
141
+ } catch {
142
+ // Invalid URL — let validateUrl produce the error
143
+ }
144
+
145
+ if (!sameOrigin) {
146
+ const { validateUrl: validateNavUrl } = await import('../skill/ssrf.js');
147
+ const navResult = validateNavUrl(action.url);
148
+ if (!navResult.safe) {
149
+ return { success: false, error: `Navigation blocked: ${navResult.reason}`, snapshot: await this.takeSnapshot() };
150
+ }
141
151
  }
142
152
 
143
153
  await this.page.goto(action.url, { waitUntil: 'domcontentloaded' });
@@ -208,10 +218,10 @@ export class CaptureSession {
208
218
 
209
219
  if (skill.endpoints.length === 0) continue;
210
220
 
211
- // Store extracted auth
212
- const extractedAuth = generator.getExtractedAuth();
213
- if (extractedAuth.length > 0) {
214
- await authManager.store(domain, extractedAuth[0]);
221
+ // Store extracted auth credentials
222
+ const auth = deduplicateAuth(generator.getExtractedAuth());
223
+ if (auth) {
224
+ await authManager.store(domain, auth);
215
225
  }
216
226
 
217
227
  // Store OAuth credentials if detected
package/src/cli.ts CHANGED
@@ -26,6 +26,7 @@ import { readFileSync } from 'node:fs';
26
26
  import { stat, unlink } from 'node:fs/promises';
27
27
  import { fileURLToPath } from 'node:url';
28
28
  import { createMcpServer } from './mcp.js';
29
+ import { attach, parseDomainPatterns } from './capture/cdp-attach.js';
29
30
 
30
31
  const __dirname = fileURLToPath(new URL('.', import.meta.url));
31
32
  const pkg = JSON.parse(readFileSync(join(__dirname, '..', 'package.json'), 'utf-8'));
@@ -66,6 +67,8 @@ function printUsage(): void {
66
67
 
67
68
  Usage:
68
69
  apitap capture <url> Capture API traffic from a website
70
+ apitap attach [--port 9222] [--domain *.github.com]
71
+ Attach to running Chrome and capture API traffic
69
72
  apitap discover <url> Detect APIs without a browser (fast recon)
70
73
  apitap inspect <url> Discover APIs without saving (X-ray vision)
71
74
  apitap search <query> Search skill files for a domain or endpoint
@@ -1233,6 +1236,20 @@ async function handleExtension(positional: string[], flags: Record<string, strin
1233
1236
  process.exit(1);
1234
1237
  }
1235
1238
 
1239
+ async function handleAttach(_positional: string[], flags: Record<string, string | boolean>): Promise<void> {
1240
+ const port = typeof flags.port === 'string' ? parseInt(flags.port, 10) : 9222;
1241
+ const domainPatterns = parseDomainPatterns(
1242
+ typeof flags.domain === 'string' ? flags.domain : undefined,
1243
+ );
1244
+ const json = flags.json === true;
1245
+
1246
+ const result = await attach({ port, domainPatterns, json });
1247
+
1248
+ if (json) {
1249
+ console.log(JSON.stringify(result, null, 2));
1250
+ }
1251
+ }
1252
+
1236
1253
  async function main(): Promise<void> {
1237
1254
  const { command, positional, flags } = parseArgs(process.argv.slice(2));
1238
1255
 
@@ -1300,6 +1317,9 @@ async function main(): Promise<void> {
1300
1317
  case 'extension':
1301
1318
  await handleExtension(positional, flags);
1302
1319
  break;
1320
+ case 'attach':
1321
+ await handleAttach(positional, flags);
1322
+ break;
1303
1323
  default:
1304
1324
  printUsage();
1305
1325
  }
package/src/index.ts CHANGED
@@ -14,6 +14,7 @@ export { peek, read, type PeekOptions, type ReadOptions } from './read/index.js'
14
14
  export type { PeekResult, ReadResult, Decoder } from './read/types.js';
15
15
  export { AuthManager, getMachineId } from './auth/manager.js';
16
16
  export { parameterizePath, cleanFrameworkPath } from './capture/parameterize.js';
17
+ export { attach, matchesDomainGlob, parseDomainPatterns } from './capture/cdp-attach.js';
17
18
  export { detectPagination } from './capture/pagination.js';
18
19
  export { verifyEndpoints } from './capture/verifier.js';
19
20
  export { IdleTracker } from './capture/idle.js';
package/src/mcp.ts CHANGED
@@ -265,6 +265,12 @@ export function createMcpServer(options: McpServerOptions = {}): McpServer {
265
265
  },
266
266
  },
267
267
  async ({ requests, maxBytes }) => {
268
+ // Consume one rate-limit token per batch item
269
+ for (let i = 0; i < requests.length; i++) {
270
+ if (!rateLimiter.check()) {
271
+ return { content: [{ type: 'text' as const, text: `Rate limit exceeded after ${i} of ${requests.length} items. Try again in a moment.` }], isError: true };
272
+ }
273
+ }
268
274
  const { replayMultiple } = await import('./replay/engine.js');
269
275
  const typed = requests.map(r => ({
270
276
  domain: r.domain,
@@ -334,6 +340,9 @@ export function createMcpServer(options: McpServerOptions = {}): McpServer {
334
340
  },
335
341
  },
336
342
  async ({ url }) => {
343
+ if (!rateLimiter.check()) {
344
+ return { content: [{ type: 'text' as const, text: 'Rate limit exceeded. Try again in a moment.' }], isError: true };
345
+ }
337
346
  try {
338
347
  if (!options._skipSsrfCheck) {
339
348
  const validation = await resolveAndValidateUrl(url);
@@ -415,6 +424,9 @@ export function createMcpServer(options: McpServerOptions = {}): McpServer {
415
424
  },
416
425
  },
417
426
  async ({ url, duration }) => {
427
+ if (!rateLimiter.check()) {
428
+ return { content: [{ type: 'text' as const, text: 'Rate limit exceeded. Try again in a moment.' }], isError: true };
429
+ }
418
430
  if (!options._skipSsrfCheck) {
419
431
  const validation = await resolveAndValidateUrl(url);
420
432
  if (!validation.safe) {
@@ -237,6 +237,13 @@ export async function startSocketServer(
237
237
 
238
238
  conn.on('data', (chunk) => {
239
239
  buffer += chunk.toString();
240
+
241
+ // Guard against unbounded buffer growth (max 10MB)
242
+ if (buffer.length > 10 * 1024 * 1024) {
243
+ conn.destroy();
244
+ return;
245
+ }
246
+
240
247
  const newlineIdx = buffer.indexOf('\n');
241
248
  if (newlineIdx === -1) return;
242
249
 
@@ -299,7 +306,7 @@ function readMessage(): Promise<NativeRequest | null> {
299
306
  }
300
307
 
301
308
  const messageLength = headerBuf.readUInt32LE(0);
302
- if (messageLength > 1024 * 1024) {
309
+ if (messageLength > 10 * 1024 * 1024) {
303
310
  process.stderr.write(`Message too large: ${messageLength}\n`);
304
311
  resolve(null);
305
312
  return;
@@ -416,6 +423,10 @@ if (isMainModule) {
416
423
 
417
424
  // Otherwise, handle as a direct extension message (save_skill, etc.)
418
425
  const response = await handleNativeMessage(message);
426
+ // Echo _portMsgId so extension can match response to request
427
+ if ((message as any)._portMsgId) {
428
+ (response as any)._portMsgId = (message as any)._portMsgId;
429
+ }
419
430
  sendMessage(response);
420
431
  }
421
432
 
package/src/plugin.ts CHANGED
@@ -3,6 +3,7 @@ import { searchSkills } from './skill/search.js';
3
3
  import { readSkillFile } from './skill/store.js';
4
4
  import { replayEndpoint } from './replay/engine.js';
5
5
  import { AuthManager, getMachineId } from './auth/manager.js';
6
+ import { deriveSigningKey } from './auth/crypto.js';
6
7
  import { homedir } from 'node:os';
7
8
  import { join } from 'node:path';
8
9
 
@@ -29,7 +30,11 @@ const APITAP_DIR = join(homedir(), '.apitap');
29
30
 
30
31
  /** M20: Mark plugin responses as untrusted external content */
31
32
  function wrapUntrusted(data: unknown): unknown {
32
- return { ...data as Record<string, unknown>, _meta: { externalContent: { untrusted: true } } };
33
+ const meta = { externalContent: { untrusted: true } };
34
+ if (Array.isArray(data)) {
35
+ return { results: data, _meta: meta };
36
+ }
37
+ return { ...data as Record<string, unknown>, _meta: meta };
33
38
  }
34
39
 
35
40
  export function createPlugin(options: PluginOptions = {}): Plugin {
@@ -93,7 +98,9 @@ export function createPlugin(options: PluginOptions = {}): Plugin {
93
98
  const endpointId = args.endpointId as string;
94
99
  const params = args.params as Record<string, string> | undefined;
95
100
 
96
- const skill = await readSkillFile(domain, skillsDir, { trustUnsigned: true });
101
+ const machineId = await getMachineId();
102
+ const signingKey = deriveSigningKey(machineId);
103
+ const skill = await readSkillFile(domain, skillsDir, { verifySignature: true, signingKey, trustUnsigned: true });
97
104
  if (!skill) {
98
105
  return {
99
106
  error: `No skill file found for "${domain}". Use apitap_capture to capture it first.`,
@@ -159,7 +166,7 @@ export function createPlugin(options: PluginOptions = {}): Plugin {
159
166
  const { promisify } = await import('node:util');
160
167
  const execFileAsync = promisify(execFile);
161
168
 
162
- const cliArgs = ['--import', 'tsx', 'src/cli.ts', 'capture', url, '--duration', String(duration), '--json', '--no-verify'];
169
+ const cliArgs = ['--import', 'tsx', 'src/cli.ts', 'capture', url, '--duration', String(duration), '--json'];
163
170
  if (allDomains) cliArgs.push('--all-domains');
164
171
 
165
172
  try {
@@ -46,6 +46,18 @@ const STRIP_HEADERS = new Set([
46
46
  'cookie',
47
47
  ]);
48
48
 
49
+ /** Headers that should never be treated as auth by entropy detection. */
50
+ const NOT_AUTH_HEADERS = new Set([
51
+ 'referer', 'user-agent', 'content-type', 'accept', 'accept-language',
52
+ 'origin', 'host', 'content-length', 'cache-control', 'pragma', 'if-none-match',
53
+ 'if-modified-since', 'dnt', 'upgrade-insecure-requests',
54
+ // Observability/tracing (high entropy but not auth)
55
+ 'traceparent', 'tracestate', 'tracecontext', 'newrelic', 'sentry-trace',
56
+ 'baggage', 'x-request-id', 'x-correlation-id', 'x-trace-id', 'x-span-id',
57
+ 'x-datadog-trace-id', 'x-datadog-parent-id', 'x-datadog-sampling-priority',
58
+ 'x-amzn-trace-id', 'x-cloud-trace-context',
59
+ ]);
60
+
49
61
  const AUTH_HEADERS = new Set([
50
62
  'authorization',
51
63
  'x-api-key',
@@ -107,7 +119,8 @@ function extractAuth(headers: Record<string, string>): [StoredAuth[], Set<string
107
119
  });
108
120
  } else if (lower === 'x-api-key' && value) {
109
121
  auth.push({ type: 'api-key', header: lower, value });
110
- } else if (!AUTH_HEADERS.has(lower) && value) {
122
+ } else if (!AUTH_HEADERS.has(lower) && !STRIP_HEADERS.has(lower) && !NOT_AUTH_HEADERS.has(lower)
123
+ && !lower.startsWith('sec-') && value) {
111
124
  // Entropy-based detection for non-standard headers
112
125
  const classification = isLikelyToken(lower, value);
113
126
  if (classification.isToken) {
@@ -119,6 +132,25 @@ function extractAuth(headers: Record<string, string>): [StoredAuth[], Set<string
119
132
  return [auth, entropyDetected];
120
133
  }
121
134
 
135
+ /**
136
+ * Deduplicate extracted auth entries by header name and build a StoredAuth
137
+ * object with all unique headers. Entries are expected to be pre-sorted
138
+ * by priority (bearer > api-key > custom) via getExtractedAuth().
139
+ */
140
+ export function deduplicateAuth(extractedAuth: StoredAuth[]): StoredAuth | null {
141
+ if (extractedAuth.length === 0) return null;
142
+ const seen = new Set<string>();
143
+ const headers: Array<{ header: string; value: string }> = [];
144
+ for (const a of extractedAuth) {
145
+ if (!seen.has(a.header)) {
146
+ seen.add(a.header);
147
+ headers.push({ header: a.header, value: a.value });
148
+ }
149
+ }
150
+ const primary = extractedAuth[0];
151
+ return { type: primary.type, header: primary.header, value: primary.value, headers };
152
+ }
153
+
122
154
  function generateEndpointId(method: string, parameterizedPath: string): string {
123
155
  // Clean framework noise for the ID (but not for the stored path)
124
156
  let cleaned = cleanFrameworkPath(parameterizedPath);
@@ -476,9 +508,12 @@ export class SkillGenerator {
476
508
  this.filteredCount++;
477
509
  }
478
510
 
479
- /** Get auth credentials extracted during capture. */
511
+ /** Get auth credentials extracted during capture, prioritized by type. */
480
512
  getExtractedAuth(): StoredAuth[] {
481
- return this.extractedAuthList;
513
+ const priority: Record<string, number> = { bearer: 0, 'api-key': 1, custom: 2 };
514
+ return [...this.extractedAuthList].sort(
515
+ (a, b) => (priority[a.type] ?? 3) - (priority[b.type] ?? 3),
516
+ );
482
517
  }
483
518
 
484
519
  /** Mark this domain as having captcha risk (detected during capture). */