@apitap/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +60 -0
- package/README.md +362 -0
- package/SKILL.md +270 -0
- package/dist/auth/crypto.d.ts +31 -0
- package/dist/auth/crypto.js +66 -0
- package/dist/auth/crypto.js.map +1 -0
- package/dist/auth/handoff.d.ts +29 -0
- package/dist/auth/handoff.js +180 -0
- package/dist/auth/handoff.js.map +1 -0
- package/dist/auth/manager.d.ts +46 -0
- package/dist/auth/manager.js +127 -0
- package/dist/auth/manager.js.map +1 -0
- package/dist/auth/oauth-refresh.d.ts +16 -0
- package/dist/auth/oauth-refresh.js +91 -0
- package/dist/auth/oauth-refresh.js.map +1 -0
- package/dist/auth/refresh.d.ts +43 -0
- package/dist/auth/refresh.js +217 -0
- package/dist/auth/refresh.js.map +1 -0
- package/dist/capture/anti-bot.d.ts +15 -0
- package/dist/capture/anti-bot.js +43 -0
- package/dist/capture/anti-bot.js.map +1 -0
- package/dist/capture/blocklist.d.ts +6 -0
- package/dist/capture/blocklist.js +70 -0
- package/dist/capture/blocklist.js.map +1 -0
- package/dist/capture/body-diff.d.ts +8 -0
- package/dist/capture/body-diff.js +102 -0
- package/dist/capture/body-diff.js.map +1 -0
- package/dist/capture/body-variables.d.ts +13 -0
- package/dist/capture/body-variables.js +142 -0
- package/dist/capture/body-variables.js.map +1 -0
- package/dist/capture/domain.d.ts +8 -0
- package/dist/capture/domain.js +34 -0
- package/dist/capture/domain.js.map +1 -0
- package/dist/capture/entropy.d.ts +33 -0
- package/dist/capture/entropy.js +100 -0
- package/dist/capture/entropy.js.map +1 -0
- package/dist/capture/filter.d.ts +11 -0
- package/dist/capture/filter.js +49 -0
- package/dist/capture/filter.js.map +1 -0
- package/dist/capture/graphql.d.ts +21 -0
- package/dist/capture/graphql.js +99 -0
- package/dist/capture/graphql.js.map +1 -0
- package/dist/capture/idle.d.ts +23 -0
- package/dist/capture/idle.js +44 -0
- package/dist/capture/idle.js.map +1 -0
- package/dist/capture/monitor.d.ts +26 -0
- package/dist/capture/monitor.js +183 -0
- package/dist/capture/monitor.js.map +1 -0
- package/dist/capture/oauth-detector.d.ts +18 -0
- package/dist/capture/oauth-detector.js +96 -0
- package/dist/capture/oauth-detector.js.map +1 -0
- package/dist/capture/pagination.d.ts +9 -0
- package/dist/capture/pagination.js +40 -0
- package/dist/capture/pagination.js.map +1 -0
- package/dist/capture/parameterize.d.ts +17 -0
- package/dist/capture/parameterize.js +63 -0
- package/dist/capture/parameterize.js.map +1 -0
- package/dist/capture/scrubber.d.ts +5 -0
- package/dist/capture/scrubber.js +38 -0
- package/dist/capture/scrubber.js.map +1 -0
- package/dist/capture/session.d.ts +46 -0
- package/dist/capture/session.js +445 -0
- package/dist/capture/session.js.map +1 -0
- package/dist/capture/token-detector.d.ts +16 -0
- package/dist/capture/token-detector.js +62 -0
- package/dist/capture/token-detector.js.map +1 -0
- package/dist/capture/verifier.d.ts +17 -0
- package/dist/capture/verifier.js +147 -0
- package/dist/capture/verifier.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +930 -0
- package/dist/cli.js.map +1 -0
- package/dist/discovery/auth.d.ts +17 -0
- package/dist/discovery/auth.js +81 -0
- package/dist/discovery/auth.js.map +1 -0
- package/dist/discovery/fetch.d.ts +17 -0
- package/dist/discovery/fetch.js +59 -0
- package/dist/discovery/fetch.js.map +1 -0
- package/dist/discovery/frameworks.d.ts +11 -0
- package/dist/discovery/frameworks.js +249 -0
- package/dist/discovery/frameworks.js.map +1 -0
- package/dist/discovery/index.d.ts +21 -0
- package/dist/discovery/index.js +219 -0
- package/dist/discovery/index.js.map +1 -0
- package/dist/discovery/openapi.d.ts +13 -0
- package/dist/discovery/openapi.js +175 -0
- package/dist/discovery/openapi.js.map +1 -0
- package/dist/discovery/probes.d.ts +9 -0
- package/dist/discovery/probes.js +70 -0
- package/dist/discovery/probes.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/dist/inspect/report.d.ts +52 -0
- package/dist/inspect/report.js +191 -0
- package/dist/inspect/report.js.map +1 -0
- package/dist/mcp.d.ts +8 -0
- package/dist/mcp.js +526 -0
- package/dist/mcp.js.map +1 -0
- package/dist/orchestration/browse.d.ts +38 -0
- package/dist/orchestration/browse.js +198 -0
- package/dist/orchestration/browse.js.map +1 -0
- package/dist/orchestration/cache.d.ts +15 -0
- package/dist/orchestration/cache.js +24 -0
- package/dist/orchestration/cache.js.map +1 -0
- package/dist/plugin.d.ts +17 -0
- package/dist/plugin.js +158 -0
- package/dist/plugin.js.map +1 -0
- package/dist/read/decoders/deepwiki.d.ts +2 -0
- package/dist/read/decoders/deepwiki.js +148 -0
- package/dist/read/decoders/deepwiki.js.map +1 -0
- package/dist/read/decoders/grokipedia.d.ts +2 -0
- package/dist/read/decoders/grokipedia.js +210 -0
- package/dist/read/decoders/grokipedia.js.map +1 -0
- package/dist/read/decoders/hackernews.d.ts +2 -0
- package/dist/read/decoders/hackernews.js +168 -0
- package/dist/read/decoders/hackernews.js.map +1 -0
- package/dist/read/decoders/index.d.ts +2 -0
- package/dist/read/decoders/index.js +12 -0
- package/dist/read/decoders/index.js.map +1 -0
- package/dist/read/decoders/reddit.d.ts +2 -0
- package/dist/read/decoders/reddit.js +142 -0
- package/dist/read/decoders/reddit.js.map +1 -0
- package/dist/read/decoders/twitter.d.ts +12 -0
- package/dist/read/decoders/twitter.js +187 -0
- package/dist/read/decoders/twitter.js.map +1 -0
- package/dist/read/decoders/wikipedia.d.ts +2 -0
- package/dist/read/decoders/wikipedia.js +66 -0
- package/dist/read/decoders/wikipedia.js.map +1 -0
- package/dist/read/decoders/youtube.d.ts +2 -0
- package/dist/read/decoders/youtube.js +69 -0
- package/dist/read/decoders/youtube.js.map +1 -0
- package/dist/read/extract.d.ts +25 -0
- package/dist/read/extract.js +320 -0
- package/dist/read/extract.js.map +1 -0
- package/dist/read/index.d.ts +14 -0
- package/dist/read/index.js +66 -0
- package/dist/read/index.js.map +1 -0
- package/dist/read/peek.d.ts +9 -0
- package/dist/read/peek.js +137 -0
- package/dist/read/peek.js.map +1 -0
- package/dist/read/types.d.ts +44 -0
- package/dist/read/types.js +3 -0
- package/dist/read/types.js.map +1 -0
- package/dist/replay/engine.d.ts +53 -0
- package/dist/replay/engine.js +441 -0
- package/dist/replay/engine.js.map +1 -0
- package/dist/replay/truncate.d.ts +16 -0
- package/dist/replay/truncate.js +92 -0
- package/dist/replay/truncate.js.map +1 -0
- package/dist/serve.d.ts +31 -0
- package/dist/serve.js +149 -0
- package/dist/serve.js.map +1 -0
- package/dist/skill/generator.d.ts +44 -0
- package/dist/skill/generator.js +419 -0
- package/dist/skill/generator.js.map +1 -0
- package/dist/skill/importer.d.ts +26 -0
- package/dist/skill/importer.js +80 -0
- package/dist/skill/importer.js.map +1 -0
- package/dist/skill/search.d.ts +19 -0
- package/dist/skill/search.js +51 -0
- package/dist/skill/search.js.map +1 -0
- package/dist/skill/signing.d.ts +16 -0
- package/dist/skill/signing.js +34 -0
- package/dist/skill/signing.js.map +1 -0
- package/dist/skill/ssrf.d.ts +27 -0
- package/dist/skill/ssrf.js +210 -0
- package/dist/skill/ssrf.js.map +1 -0
- package/dist/skill/store.d.ts +7 -0
- package/dist/skill/store.js +93 -0
- package/dist/skill/store.js.map +1 -0
- package/dist/stats/report.d.ts +26 -0
- package/dist/stats/report.js +157 -0
- package/dist/stats/report.js.map +1 -0
- package/dist/types.d.ts +214 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +58 -0
- package/src/auth/crypto.ts +92 -0
- package/src/auth/handoff.ts +229 -0
- package/src/auth/manager.ts +140 -0
- package/src/auth/oauth-refresh.ts +120 -0
- package/src/auth/refresh.ts +300 -0
- package/src/capture/anti-bot.ts +63 -0
- package/src/capture/blocklist.ts +75 -0
- package/src/capture/body-diff.ts +109 -0
- package/src/capture/body-variables.ts +156 -0
- package/src/capture/domain.ts +34 -0
- package/src/capture/entropy.ts +121 -0
- package/src/capture/filter.ts +56 -0
- package/src/capture/graphql.ts +124 -0
- package/src/capture/idle.ts +45 -0
- package/src/capture/monitor.ts +224 -0
- package/src/capture/oauth-detector.ts +106 -0
- package/src/capture/pagination.ts +49 -0
- package/src/capture/parameterize.ts +68 -0
- package/src/capture/scrubber.ts +49 -0
- package/src/capture/session.ts +502 -0
- package/src/capture/token-detector.ts +76 -0
- package/src/capture/verifier.ts +171 -0
- package/src/cli.ts +1031 -0
- package/src/discovery/auth.ts +99 -0
- package/src/discovery/fetch.ts +85 -0
- package/src/discovery/frameworks.ts +231 -0
- package/src/discovery/index.ts +256 -0
- package/src/discovery/openapi.ts +230 -0
- package/src/discovery/probes.ts +76 -0
- package/src/index.ts +26 -0
- package/src/inspect/report.ts +247 -0
- package/src/mcp.ts +618 -0
- package/src/orchestration/browse.ts +250 -0
- package/src/orchestration/cache.ts +37 -0
- package/src/plugin.ts +188 -0
- package/src/read/decoders/deepwiki.ts +180 -0
- package/src/read/decoders/grokipedia.ts +246 -0
- package/src/read/decoders/hackernews.ts +198 -0
- package/src/read/decoders/index.ts +15 -0
- package/src/read/decoders/reddit.ts +158 -0
- package/src/read/decoders/twitter.ts +211 -0
- package/src/read/decoders/wikipedia.ts +75 -0
- package/src/read/decoders/youtube.ts +75 -0
- package/src/read/extract.ts +396 -0
- package/src/read/index.ts +78 -0
- package/src/read/peek.ts +175 -0
- package/src/read/types.ts +37 -0
- package/src/replay/engine.ts +559 -0
- package/src/replay/truncate.ts +116 -0
- package/src/serve.ts +189 -0
- package/src/skill/generator.ts +473 -0
- package/src/skill/importer.ts +107 -0
- package/src/skill/search.ts +76 -0
- package/src/skill/signing.ts +36 -0
- package/src/skill/ssrf.ts +238 -0
- package/src/skill/store.ts +107 -0
- package/src/stats/report.ts +208 -0
- package/src/types.ts +233 -0
|
@@ -0,0 +1,502 @@
|
|
|
1
|
+
// src/capture/session.ts
|
|
2
|
+
import { chromium, type Browser, type Page } from 'playwright';
|
|
3
|
+
import { randomUUID } from 'node:crypto';
|
|
4
|
+
import { shouldCapture } from './filter.js';
|
|
5
|
+
import { isDomainMatch } from './domain.js';
|
|
6
|
+
import { SkillGenerator, type GeneratorOptions } from '../skill/generator.js';
|
|
7
|
+
import { detectCaptcha } from '../auth/refresh.js';
|
|
8
|
+
import { verifyEndpoints } from './verifier.js';
|
|
9
|
+
import { signSkillFile } from '../skill/signing.js';
|
|
10
|
+
import { writeSkillFile } from '../skill/store.js';
|
|
11
|
+
import { AuthManager, getMachineId } from '../auth/manager.js';
|
|
12
|
+
import { deriveKey } from '../auth/crypto.js';
|
|
13
|
+
import { homedir } from 'node:os';
|
|
14
|
+
import { join } from 'node:path';
|
|
15
|
+
import type { CapturedExchange, PageSnapshot, PageElement, InteractionResult, FinishResult } from '../types.js';
|
|
16
|
+
|
|
17
|
+
const APITAP_DIR = join(homedir(), '.apitap');
|
|
18
|
+
const MAX_ELEMENTS = 100;
|
|
19
|
+
const MAX_TEXT_LENGTH = 200;
|
|
20
|
+
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
|
21
|
+
|
|
22
|
+
export interface SessionOptions {
|
|
23
|
+
headless?: boolean;
|
|
24
|
+
allDomains?: boolean;
|
|
25
|
+
timeoutMs?: number;
|
|
26
|
+
skillsDir?: string;
|
|
27
|
+
authDir?: string; // Base dir for auth storage (defaults to ~/.apitap)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export class CaptureSession {
|
|
31
|
+
readonly id: string;
|
|
32
|
+
private browser: Browser | null = null;
|
|
33
|
+
private page: Page | null = null;
|
|
34
|
+
private generators = new Map<string, SkillGenerator>();
|
|
35
|
+
private totalRequests = 0;
|
|
36
|
+
private filteredRequests = 0;
|
|
37
|
+
private targetUrl = '';
|
|
38
|
+
private options: SessionOptions;
|
|
39
|
+
private captchaDetectedDomains = new Set<string>();
|
|
40
|
+
private recentEndpoints: string[] = [];
|
|
41
|
+
private timeoutTimer: ReturnType<typeof setTimeout> | null = null;
|
|
42
|
+
private expired = false;
|
|
43
|
+
private closed = false;
|
|
44
|
+
|
|
45
|
+
constructor(options: SessionOptions = {}) {
|
|
46
|
+
this.id = randomUUID();
|
|
47
|
+
this.options = options;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async start(url: string): Promise<PageSnapshot> {
|
|
51
|
+
if (this.closed) throw new Error('Session already closed');
|
|
52
|
+
|
|
53
|
+
this.targetUrl = url.startsWith('http') ? url : `https://${url}`;
|
|
54
|
+
const headless = this.options.headless ?? true;
|
|
55
|
+
|
|
56
|
+
this.browser = await chromium.launch({ headless });
|
|
57
|
+
const context = await this.browser.newContext();
|
|
58
|
+
|
|
59
|
+
// Inject cached session cookies if available
|
|
60
|
+
try {
|
|
61
|
+
const authDir = this.options.authDir ?? APITAP_DIR;
|
|
62
|
+
const machineId = await getMachineId();
|
|
63
|
+
const authManager = new AuthManager(authDir, machineId);
|
|
64
|
+
const domain = new URL(this.targetUrl).hostname;
|
|
65
|
+
const cachedSession = await authManager.retrieveSession(domain);
|
|
66
|
+
if (cachedSession?.cookies?.length) {
|
|
67
|
+
await context.addCookies(cachedSession.cookies);
|
|
68
|
+
}
|
|
69
|
+
} catch {
|
|
70
|
+
// Auth retrieval failed — proceed without cached session
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
this.page = await context.newPage();
|
|
74
|
+
|
|
75
|
+
this.setupResponseListener();
|
|
76
|
+
|
|
77
|
+
// Auto-timeout to prevent leaked browsers (unref so it doesn't block process exit)
|
|
78
|
+
const timeoutMs = this.options.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
79
|
+
this.timeoutTimer = setTimeout(() => {
|
|
80
|
+
this.expired = true;
|
|
81
|
+
this.cleanup().catch(() => {});
|
|
82
|
+
}, timeoutMs);
|
|
83
|
+
if (this.timeoutTimer.unref) this.timeoutTimer.unref();
|
|
84
|
+
|
|
85
|
+
await this.page.goto(this.targetUrl, { waitUntil: 'domcontentloaded' });
|
|
86
|
+
return this.takeSnapshot();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async interact(action: InteractionAction): Promise<InteractionResult> {
|
|
90
|
+
if (this.expired) return { success: false, error: 'Session expired', snapshot: this.emptySnapshot() };
|
|
91
|
+
if (this.closed) return { success: false, error: 'Session closed', snapshot: this.emptySnapshot() };
|
|
92
|
+
if (!this.page) return { success: false, error: 'Session not started', snapshot: this.emptySnapshot() };
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
switch (action.action) {
|
|
96
|
+
case 'snapshot':
|
|
97
|
+
return { success: true, snapshot: await this.takeSnapshot() };
|
|
98
|
+
|
|
99
|
+
case 'click': {
|
|
100
|
+
if (!action.ref) return { success: false, error: 'ref required for click', snapshot: await this.takeSnapshot() };
|
|
101
|
+
const el = await this.resolveRef(action.ref);
|
|
102
|
+
if (!el) return { success: false, error: `Element ${action.ref} not found`, snapshot: await this.takeSnapshot() };
|
|
103
|
+
await el.click();
|
|
104
|
+
await this.page.waitForLoadState('domcontentloaded').catch(() => {});
|
|
105
|
+
return { success: true, snapshot: await this.takeSnapshot() };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
case 'type': {
|
|
109
|
+
if (!action.ref) return { success: false, error: 'ref required for type', snapshot: await this.takeSnapshot() };
|
|
110
|
+
if (action.text === undefined) return { success: false, error: 'text required for type', snapshot: await this.takeSnapshot() };
|
|
111
|
+
const el = await this.resolveRef(action.ref);
|
|
112
|
+
if (!el) return { success: false, error: `Element ${action.ref} not found`, snapshot: await this.takeSnapshot() };
|
|
113
|
+
await el.fill(action.text);
|
|
114
|
+
if (action.submit) {
|
|
115
|
+
await el.press('Enter');
|
|
116
|
+
await this.page.waitForLoadState('domcontentloaded').catch(() => {});
|
|
117
|
+
}
|
|
118
|
+
return { success: true, snapshot: await this.takeSnapshot() };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
case 'select': {
|
|
122
|
+
if (!action.ref) return { success: false, error: 'ref required for select', snapshot: await this.takeSnapshot() };
|
|
123
|
+
if (action.value === undefined) return { success: false, error: 'value required for select', snapshot: await this.takeSnapshot() };
|
|
124
|
+
const el = await this.resolveRef(action.ref);
|
|
125
|
+
if (!el) return { success: false, error: `Element ${action.ref} not found`, snapshot: await this.takeSnapshot() };
|
|
126
|
+
await el.selectOption(action.value);
|
|
127
|
+
return { success: true, snapshot: await this.takeSnapshot() };
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
case 'navigate': {
|
|
131
|
+
if (!action.url) return { success: false, error: 'url required for navigate', snapshot: await this.takeSnapshot() };
|
|
132
|
+
|
|
133
|
+
// Basic URL validation — block non-HTTP schemes and cloud metadata
|
|
134
|
+
let parsed: URL;
|
|
135
|
+
try { parsed = new URL(action.url); } catch {
|
|
136
|
+
return { success: false, error: 'Invalid URL', snapshot: await this.takeSnapshot() };
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Block non-HTTP schemes (file://, ftp://, etc.)
|
|
140
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
141
|
+
return { success: false, error: `Blocked scheme: ${parsed.protocol}`, snapshot: await this.takeSnapshot() };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Block cloud metadata endpoint specifically (high-value target)
|
|
145
|
+
if (parsed.hostname === '169.254.169.254') {
|
|
146
|
+
return { success: false, error: 'Navigation blocked: cloud metadata endpoint', snapshot: await this.takeSnapshot() };
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
await this.page.goto(action.url, { waitUntil: 'domcontentloaded' });
|
|
150
|
+
return { success: true, snapshot: await this.takeSnapshot() };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
case 'scroll': {
|
|
154
|
+
const dir = action.direction ?? 'down';
|
|
155
|
+
const delta = dir === 'up' ? -500 : 500;
|
|
156
|
+
await this.page.mouse.wheel(0, delta);
|
|
157
|
+
// Wait a bit for lazy-loaded content
|
|
158
|
+
await this.page.waitForTimeout(500);
|
|
159
|
+
return { success: true, snapshot: await this.takeSnapshot() };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
case 'wait': {
|
|
163
|
+
const seconds = Math.min(action.seconds ?? 2, 10);
|
|
164
|
+
await this.page.waitForTimeout(seconds * 1000);
|
|
165
|
+
return { success: true, snapshot: await this.takeSnapshot() };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
default:
|
|
169
|
+
return { success: false, error: `Unknown action: ${(action as any).action}`, snapshot: await this.takeSnapshot() };
|
|
170
|
+
}
|
|
171
|
+
} catch (err: any) {
|
|
172
|
+
// Try to return snapshot even on error
|
|
173
|
+
try {
|
|
174
|
+
return { success: false, error: err.message, snapshot: await this.takeSnapshot() };
|
|
175
|
+
} catch {
|
|
176
|
+
return { success: false, error: err.message, snapshot: this.emptySnapshot() };
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
async finish(): Promise<FinishResult> {
|
|
182
|
+
if (this.closed) return { aborted: true, domains: [] };
|
|
183
|
+
|
|
184
|
+
// Measure DOM size before closing
|
|
185
|
+
let domBytes: number | undefined;
|
|
186
|
+
if (this.page) {
|
|
187
|
+
try {
|
|
188
|
+
const html = await this.page.content();
|
|
189
|
+
domBytes = html.length;
|
|
190
|
+
} catch { /* page may have navigated away */ }
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
await this.cleanup();
|
|
194
|
+
|
|
195
|
+
// Mark captcha risk
|
|
196
|
+
for (const [hostname, gen] of this.generators) {
|
|
197
|
+
if (this.captchaDetectedDomains.has(hostname)) {
|
|
198
|
+
gen.setCaptchaRisk(true);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Finalize: generate skill files, verify, sign, write
|
|
203
|
+
const machineId = await getMachineId();
|
|
204
|
+
const key = deriveKey(machineId);
|
|
205
|
+
const authManager = new AuthManager(APITAP_DIR, machineId);
|
|
206
|
+
|
|
207
|
+
const domains: FinishResult['domains'] = [];
|
|
208
|
+
|
|
209
|
+
for (const [domain, generator] of this.generators) {
|
|
210
|
+
let skill = generator.toSkillFile(domain, {
|
|
211
|
+
domBytes,
|
|
212
|
+
totalRequests: this.totalRequests,
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
if (skill.endpoints.length === 0) continue;
|
|
216
|
+
|
|
217
|
+
// Store extracted auth
|
|
218
|
+
const extractedAuth = generator.getExtractedAuth();
|
|
219
|
+
if (extractedAuth.length > 0) {
|
|
220
|
+
await authManager.store(domain, extractedAuth[0]);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Store OAuth credentials if detected
|
|
224
|
+
const oauthConfig = generator.getOAuthConfig();
|
|
225
|
+
if (oauthConfig) {
|
|
226
|
+
const clientSecret = generator.getOAuthClientSecret();
|
|
227
|
+
if (clientSecret) {
|
|
228
|
+
await authManager.storeOAuthCredentials(domain, { clientSecret });
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Verify endpoints
|
|
233
|
+
skill = await verifyEndpoints(skill);
|
|
234
|
+
|
|
235
|
+
// Sign
|
|
236
|
+
skill = signSkillFile(skill, key);
|
|
237
|
+
|
|
238
|
+
// Write
|
|
239
|
+
const skillsDir = this.options.skillsDir;
|
|
240
|
+
const path = await writeSkillFile(skill, skillsDir);
|
|
241
|
+
|
|
242
|
+
// Tally tiers
|
|
243
|
+
const tiers: Record<string, number> = {};
|
|
244
|
+
for (const ep of skill.endpoints) {
|
|
245
|
+
const t = ep.replayability?.tier ?? 'unknown';
|
|
246
|
+
tiers[t] = (tiers[t] ?? 0) + 1;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
domains.push({
|
|
250
|
+
domain,
|
|
251
|
+
endpointCount: skill.endpoints.length,
|
|
252
|
+
tiers,
|
|
253
|
+
skillFile: path,
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return { aborted: false, domains };
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
async abort(): Promise<void> {
|
|
261
|
+
await this.cleanup();
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/** Whether session has been terminated (expired, closed, or aborted) */
|
|
265
|
+
get isActive(): boolean {
|
|
266
|
+
return !this.closed && !this.expired;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// --- private ---
|
|
270
|
+
|
|
271
|
+
private setupResponseListener(): void {
|
|
272
|
+
if (!this.page) return;
|
|
273
|
+
|
|
274
|
+
const generatorOptions: GeneratorOptions = {
|
|
275
|
+
enablePreview: false,
|
|
276
|
+
scrub: true,
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
this.page.on('response', async (response) => {
|
|
280
|
+
this.totalRequests++;
|
|
281
|
+
|
|
282
|
+
const url = response.url();
|
|
283
|
+
const status = response.status();
|
|
284
|
+
const contentType = response.headers()['content-type'] ?? '';
|
|
285
|
+
|
|
286
|
+
// Domain filtering
|
|
287
|
+
if (!this.options.allDomains) {
|
|
288
|
+
const hostname = safeHostname(url);
|
|
289
|
+
if (hostname && !isDomainMatch(hostname, this.targetUrl)) {
|
|
290
|
+
this.filteredRequests++;
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
if (!shouldCapture({ url, status, contentType })) {
|
|
296
|
+
this.filteredRequests++;
|
|
297
|
+
const hostname = safeHostname(url);
|
|
298
|
+
if (hostname) {
|
|
299
|
+
const gen = this.generators.get(hostname);
|
|
300
|
+
if (gen) gen.recordFiltered();
|
|
301
|
+
}
|
|
302
|
+
// Track network bytes
|
|
303
|
+
const contentLength = parseInt(response.headers()['content-length'] ?? '0', 10);
|
|
304
|
+
if (contentLength > 0) {
|
|
305
|
+
const filteredHostname = safeHostname(url);
|
|
306
|
+
if (filteredHostname && this.generators.has(filteredHostname)) {
|
|
307
|
+
this.generators.get(filteredHostname)!.addNetworkBytes(contentLength);
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
try {
|
|
314
|
+
const body = await response.text();
|
|
315
|
+
const hostname = new URL(url).hostname;
|
|
316
|
+
|
|
317
|
+
// Captcha detection
|
|
318
|
+
if (contentType.includes('text/html') && detectCaptcha(body)) {
|
|
319
|
+
this.captchaDetectedDomains.add(hostname);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
if (!this.generators.has(hostname)) {
|
|
323
|
+
this.generators.set(hostname, new SkillGenerator(generatorOptions));
|
|
324
|
+
}
|
|
325
|
+
const gen = this.generators.get(hostname)!;
|
|
326
|
+
|
|
327
|
+
const exchange: CapturedExchange = {
|
|
328
|
+
request: {
|
|
329
|
+
url,
|
|
330
|
+
method: response.request().method(),
|
|
331
|
+
headers: response.request().headers(),
|
|
332
|
+
postData: response.request().postData() ?? undefined,
|
|
333
|
+
},
|
|
334
|
+
response: {
|
|
335
|
+
status,
|
|
336
|
+
headers: response.headers(),
|
|
337
|
+
body,
|
|
338
|
+
contentType,
|
|
339
|
+
},
|
|
340
|
+
timestamp: new Date().toISOString(),
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
const endpoint = gen.addExchange(exchange);
|
|
344
|
+
if (endpoint) {
|
|
345
|
+
const label = `${endpoint.method} ${endpoint.path}`;
|
|
346
|
+
this.recentEndpoints.push(label);
|
|
347
|
+
if (this.recentEndpoints.length > 5) {
|
|
348
|
+
this.recentEndpoints.shift();
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
} catch {
|
|
352
|
+
// Response body may not be available
|
|
353
|
+
}
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
private async takeSnapshot(): Promise<PageSnapshot> {
|
|
358
|
+
if (!this.page) return this.emptySnapshot();
|
|
359
|
+
|
|
360
|
+
try {
|
|
361
|
+
const url = this.page.url();
|
|
362
|
+
const title = await this.page.title();
|
|
363
|
+
const elements = await this.extractElements();
|
|
364
|
+
|
|
365
|
+
// Count unique endpoints across all generators
|
|
366
|
+
let endpointsCaptured = 0;
|
|
367
|
+
for (const gen of this.generators.values()) {
|
|
368
|
+
endpointsCaptured += gen.endpointCount;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
return {
|
|
372
|
+
url,
|
|
373
|
+
title,
|
|
374
|
+
elements,
|
|
375
|
+
endpointsCaptured,
|
|
376
|
+
totalRequests: this.totalRequests,
|
|
377
|
+
filteredRequests: this.filteredRequests,
|
|
378
|
+
recentEndpoints: [...this.recentEndpoints],
|
|
379
|
+
};
|
|
380
|
+
} catch {
|
|
381
|
+
return this.emptySnapshot();
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
private async extractElements(): Promise<PageElement[]> {
|
|
386
|
+
if (!this.page) return [];
|
|
387
|
+
|
|
388
|
+
return this.page.evaluate(({ maxElements, maxText }) => {
|
|
389
|
+
const selector = 'a[href], button, input, select, textarea, [role="button"], [role="link"], [onclick], [tabindex]';
|
|
390
|
+
const nodes = document.querySelectorAll(selector);
|
|
391
|
+
const results: PageElement[] = [];
|
|
392
|
+
|
|
393
|
+
for (const node of nodes) {
|
|
394
|
+
if (results.length >= maxElements) break;
|
|
395
|
+
|
|
396
|
+
const el = node as HTMLElement;
|
|
397
|
+
// Skip hidden/tiny elements
|
|
398
|
+
const rect = el.getBoundingClientRect();
|
|
399
|
+
if (rect.width === 0 && rect.height === 0) continue;
|
|
400
|
+
const style = window.getComputedStyle(el);
|
|
401
|
+
if (style.display === 'none' || style.visibility === 'hidden') continue;
|
|
402
|
+
|
|
403
|
+
const tag = el.tagName.toLowerCase();
|
|
404
|
+
const text = (el.textContent || '').trim().slice(0, maxText);
|
|
405
|
+
const role = el.getAttribute('role') || undefined;
|
|
406
|
+
const name = (el as HTMLInputElement).name || undefined;
|
|
407
|
+
const placeholder = (el as HTMLInputElement).placeholder || undefined;
|
|
408
|
+
const href = (el as HTMLAnchorElement).href || undefined;
|
|
409
|
+
const type = (el as HTMLInputElement).type || undefined;
|
|
410
|
+
const disabled = (el as HTMLInputElement).disabled || undefined;
|
|
411
|
+
|
|
412
|
+
results.push({
|
|
413
|
+
ref: `e${results.length}`,
|
|
414
|
+
tag,
|
|
415
|
+
...(role ? { role } : {}),
|
|
416
|
+
text,
|
|
417
|
+
...(name ? { name } : {}),
|
|
418
|
+
...(placeholder ? { placeholder } : {}),
|
|
419
|
+
...(href ? { href } : {}),
|
|
420
|
+
...(type ? { type } : {}),
|
|
421
|
+
...(disabled ? { disabled } : {}),
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
return results;
|
|
426
|
+
}, { maxElements: MAX_ELEMENTS, maxText: MAX_TEXT_LENGTH });
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
private async resolveRef(ref: string): Promise<ReturnType<Page['locator']> | null> {
|
|
430
|
+
if (!this.page) return null;
|
|
431
|
+
|
|
432
|
+
const index = parseInt(ref.replace('e', ''), 10);
|
|
433
|
+
if (isNaN(index)) return null;
|
|
434
|
+
|
|
435
|
+
// Re-query the DOM to get the nth visible interactive element
|
|
436
|
+
const selector = 'a[href], button, input, select, textarea, [role="button"], [role="link"], [onclick], [tabindex]';
|
|
437
|
+
const elements = await this.page.$$(selector);
|
|
438
|
+
|
|
439
|
+
// Filter to visible elements
|
|
440
|
+
let visibleIndex = 0;
|
|
441
|
+
for (const el of elements) {
|
|
442
|
+
const visible = await el.isVisible().catch(() => false);
|
|
443
|
+
if (!visible) continue;
|
|
444
|
+
|
|
445
|
+
if (visibleIndex === index) {
|
|
446
|
+
return el as any;
|
|
447
|
+
}
|
|
448
|
+
visibleIndex++;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
return null;
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
private emptySnapshot(): PageSnapshot {
|
|
455
|
+
return {
|
|
456
|
+
url: '',
|
|
457
|
+
title: '',
|
|
458
|
+
elements: [],
|
|
459
|
+
endpointsCaptured: 0,
|
|
460
|
+
totalRequests: this.totalRequests,
|
|
461
|
+
filteredRequests: this.filteredRequests,
|
|
462
|
+
recentEndpoints: [...this.recentEndpoints],
|
|
463
|
+
};
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
private async cleanup(): Promise<void> {
|
|
467
|
+
if (this.closed) return;
|
|
468
|
+
this.closed = true;
|
|
469
|
+
|
|
470
|
+
if (this.timeoutTimer) {
|
|
471
|
+
clearTimeout(this.timeoutTimer);
|
|
472
|
+
this.timeoutTimer = null;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (this.browser) {
|
|
476
|
+
try {
|
|
477
|
+
await this.browser.close();
|
|
478
|
+
} catch { /* already closed */ }
|
|
479
|
+
this.browser = null;
|
|
480
|
+
this.page = null;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
export interface InteractionAction {
|
|
486
|
+
action: 'snapshot' | 'click' | 'type' | 'select' | 'navigate' | 'scroll' | 'wait';
|
|
487
|
+
ref?: string;
|
|
488
|
+
text?: string;
|
|
489
|
+
value?: string;
|
|
490
|
+
url?: string;
|
|
491
|
+
direction?: 'up' | 'down';
|
|
492
|
+
seconds?: number;
|
|
493
|
+
submit?: boolean;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
function safeHostname(url: string): string | null {
|
|
497
|
+
try {
|
|
498
|
+
return new URL(url).hostname;
|
|
499
|
+
} catch {
|
|
500
|
+
return null;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
// src/capture/token-detector.ts
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Token Detection for Auth Refresh
|
|
5
|
+
*
|
|
6
|
+
* Identifies tokens in request bodies that are:
|
|
7
|
+
* 1. Session-generated (CSRF, nonces) — need refresh via browser
|
|
8
|
+
* 2. NOT user credentials (access tokens, API keys) — should not auto-refresh
|
|
9
|
+
*
|
|
10
|
+
* Detection uses pattern matching on names and value heuristics (hex, base64).
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// Token name patterns that indicate session-generated values
|
|
14
|
+
const TOKEN_NAME_PATTERNS = /csrf|token|nonce|xsrf|_token$/i;
|
|
15
|
+
|
|
16
|
+
// Exclude user-provided credentials (should not auto-refresh)
|
|
17
|
+
const TOKEN_NAME_EXCLUDE = /access.?token|auth.?token|api.?token|bearer/i;
|
|
18
|
+
|
|
19
|
+
// Token value patterns (high-entropy session tokens)
|
|
20
|
+
const TOKEN_VALUE_HEX = /^[a-f0-9]{32,64}$/i;
|
|
21
|
+
const TOKEN_VALUE_BASE64 = /^[A-Za-z0-9+/]{20,}={0,2}$/;
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Check if a name/value pair represents a refreshable session token.
|
|
25
|
+
*
|
|
26
|
+
* @param name - Field name (e.g., "csrf_token", "nonce")
|
|
27
|
+
* @param value - Field value
|
|
28
|
+
* @returns true if this is a refreshable token
|
|
29
|
+
*/
|
|
30
|
+
export function isRefreshableToken(name: string, value: string): boolean {
|
|
31
|
+
// Must match token name pattern
|
|
32
|
+
if (!TOKEN_NAME_PATTERNS.test(name)) {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Exclude user credentials
|
|
37
|
+
if (TOKEN_NAME_EXCLUDE.test(name)) {
|
|
38
|
+
return false;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Value must look like a token (hex or base64, sufficient length)
|
|
42
|
+
const isHex = TOKEN_VALUE_HEX.test(value);
|
|
43
|
+
const isBase64 = TOKEN_VALUE_BASE64.test(value);
|
|
44
|
+
|
|
45
|
+
return isHex || isBase64;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Scan a request body for refreshable tokens.
|
|
50
|
+
*
|
|
51
|
+
* @param body - Parsed request body (object or string)
|
|
52
|
+
* @param prefix - JSON path prefix for nested objects
|
|
53
|
+
* @returns Array of JSON paths to refreshable tokens (e.g., ["csrf_token", "data.nonce"])
|
|
54
|
+
*/
|
|
55
|
+
export function detectRefreshableTokens(
|
|
56
|
+
body: unknown,
|
|
57
|
+
prefix = ''
|
|
58
|
+
): string[] {
|
|
59
|
+
const tokens: string[] = [];
|
|
60
|
+
|
|
61
|
+
if (typeof body !== 'object' || body === null) {
|
|
62
|
+
return tokens;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
for (const [key, value] of Object.entries(body as Record<string, unknown>)) {
|
|
66
|
+
const path = prefix ? `${prefix}.${key}` : key;
|
|
67
|
+
|
|
68
|
+
if (typeof value === 'string' && isRefreshableToken(key, value)) {
|
|
69
|
+
tokens.push(path);
|
|
70
|
+
} else if (typeof value === 'object' && value !== null) {
|
|
71
|
+
tokens.push(...detectRefreshableTokens(value, path));
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return tokens;
|
|
76
|
+
}
|