@apitap/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +60 -0
- package/README.md +362 -0
- package/SKILL.md +270 -0
- package/dist/auth/crypto.d.ts +31 -0
- package/dist/auth/crypto.js +66 -0
- package/dist/auth/crypto.js.map +1 -0
- package/dist/auth/handoff.d.ts +29 -0
- package/dist/auth/handoff.js +180 -0
- package/dist/auth/handoff.js.map +1 -0
- package/dist/auth/manager.d.ts +46 -0
- package/dist/auth/manager.js +127 -0
- package/dist/auth/manager.js.map +1 -0
- package/dist/auth/oauth-refresh.d.ts +16 -0
- package/dist/auth/oauth-refresh.js +91 -0
- package/dist/auth/oauth-refresh.js.map +1 -0
- package/dist/auth/refresh.d.ts +43 -0
- package/dist/auth/refresh.js +217 -0
- package/dist/auth/refresh.js.map +1 -0
- package/dist/capture/anti-bot.d.ts +15 -0
- package/dist/capture/anti-bot.js +43 -0
- package/dist/capture/anti-bot.js.map +1 -0
- package/dist/capture/blocklist.d.ts +6 -0
- package/dist/capture/blocklist.js +70 -0
- package/dist/capture/blocklist.js.map +1 -0
- package/dist/capture/body-diff.d.ts +8 -0
- package/dist/capture/body-diff.js +102 -0
- package/dist/capture/body-diff.js.map +1 -0
- package/dist/capture/body-variables.d.ts +13 -0
- package/dist/capture/body-variables.js +142 -0
- package/dist/capture/body-variables.js.map +1 -0
- package/dist/capture/domain.d.ts +8 -0
- package/dist/capture/domain.js +34 -0
- package/dist/capture/domain.js.map +1 -0
- package/dist/capture/entropy.d.ts +33 -0
- package/dist/capture/entropy.js +100 -0
- package/dist/capture/entropy.js.map +1 -0
- package/dist/capture/filter.d.ts +11 -0
- package/dist/capture/filter.js +49 -0
- package/dist/capture/filter.js.map +1 -0
- package/dist/capture/graphql.d.ts +21 -0
- package/dist/capture/graphql.js +99 -0
- package/dist/capture/graphql.js.map +1 -0
- package/dist/capture/idle.d.ts +23 -0
- package/dist/capture/idle.js +44 -0
- package/dist/capture/idle.js.map +1 -0
- package/dist/capture/monitor.d.ts +26 -0
- package/dist/capture/monitor.js +183 -0
- package/dist/capture/monitor.js.map +1 -0
- package/dist/capture/oauth-detector.d.ts +18 -0
- package/dist/capture/oauth-detector.js +96 -0
- package/dist/capture/oauth-detector.js.map +1 -0
- package/dist/capture/pagination.d.ts +9 -0
- package/dist/capture/pagination.js +40 -0
- package/dist/capture/pagination.js.map +1 -0
- package/dist/capture/parameterize.d.ts +17 -0
- package/dist/capture/parameterize.js +63 -0
- package/dist/capture/parameterize.js.map +1 -0
- package/dist/capture/scrubber.d.ts +5 -0
- package/dist/capture/scrubber.js +38 -0
- package/dist/capture/scrubber.js.map +1 -0
- package/dist/capture/session.d.ts +46 -0
- package/dist/capture/session.js +445 -0
- package/dist/capture/session.js.map +1 -0
- package/dist/capture/token-detector.d.ts +16 -0
- package/dist/capture/token-detector.js +62 -0
- package/dist/capture/token-detector.js.map +1 -0
- package/dist/capture/verifier.d.ts +17 -0
- package/dist/capture/verifier.js +147 -0
- package/dist/capture/verifier.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +930 -0
- package/dist/cli.js.map +1 -0
- package/dist/discovery/auth.d.ts +17 -0
- package/dist/discovery/auth.js +81 -0
- package/dist/discovery/auth.js.map +1 -0
- package/dist/discovery/fetch.d.ts +17 -0
- package/dist/discovery/fetch.js +59 -0
- package/dist/discovery/fetch.js.map +1 -0
- package/dist/discovery/frameworks.d.ts +11 -0
- package/dist/discovery/frameworks.js +249 -0
- package/dist/discovery/frameworks.js.map +1 -0
- package/dist/discovery/index.d.ts +21 -0
- package/dist/discovery/index.js +219 -0
- package/dist/discovery/index.js.map +1 -0
- package/dist/discovery/openapi.d.ts +13 -0
- package/dist/discovery/openapi.js +175 -0
- package/dist/discovery/openapi.js.map +1 -0
- package/dist/discovery/probes.d.ts +9 -0
- package/dist/discovery/probes.js +70 -0
- package/dist/discovery/probes.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/dist/inspect/report.d.ts +52 -0
- package/dist/inspect/report.js +191 -0
- package/dist/inspect/report.js.map +1 -0
- package/dist/mcp.d.ts +8 -0
- package/dist/mcp.js +526 -0
- package/dist/mcp.js.map +1 -0
- package/dist/orchestration/browse.d.ts +38 -0
- package/dist/orchestration/browse.js +198 -0
- package/dist/orchestration/browse.js.map +1 -0
- package/dist/orchestration/cache.d.ts +15 -0
- package/dist/orchestration/cache.js +24 -0
- package/dist/orchestration/cache.js.map +1 -0
- package/dist/plugin.d.ts +17 -0
- package/dist/plugin.js +158 -0
- package/dist/plugin.js.map +1 -0
- package/dist/read/decoders/deepwiki.d.ts +2 -0
- package/dist/read/decoders/deepwiki.js +148 -0
- package/dist/read/decoders/deepwiki.js.map +1 -0
- package/dist/read/decoders/grokipedia.d.ts +2 -0
- package/dist/read/decoders/grokipedia.js +210 -0
- package/dist/read/decoders/grokipedia.js.map +1 -0
- package/dist/read/decoders/hackernews.d.ts +2 -0
- package/dist/read/decoders/hackernews.js +168 -0
- package/dist/read/decoders/hackernews.js.map +1 -0
- package/dist/read/decoders/index.d.ts +2 -0
- package/dist/read/decoders/index.js +12 -0
- package/dist/read/decoders/index.js.map +1 -0
- package/dist/read/decoders/reddit.d.ts +2 -0
- package/dist/read/decoders/reddit.js +142 -0
- package/dist/read/decoders/reddit.js.map +1 -0
- package/dist/read/decoders/twitter.d.ts +12 -0
- package/dist/read/decoders/twitter.js +187 -0
- package/dist/read/decoders/twitter.js.map +1 -0
- package/dist/read/decoders/wikipedia.d.ts +2 -0
- package/dist/read/decoders/wikipedia.js +66 -0
- package/dist/read/decoders/wikipedia.js.map +1 -0
- package/dist/read/decoders/youtube.d.ts +2 -0
- package/dist/read/decoders/youtube.js +69 -0
- package/dist/read/decoders/youtube.js.map +1 -0
- package/dist/read/extract.d.ts +25 -0
- package/dist/read/extract.js +320 -0
- package/dist/read/extract.js.map +1 -0
- package/dist/read/index.d.ts +14 -0
- package/dist/read/index.js +66 -0
- package/dist/read/index.js.map +1 -0
- package/dist/read/peek.d.ts +9 -0
- package/dist/read/peek.js +137 -0
- package/dist/read/peek.js.map +1 -0
- package/dist/read/types.d.ts +44 -0
- package/dist/read/types.js +3 -0
- package/dist/read/types.js.map +1 -0
- package/dist/replay/engine.d.ts +53 -0
- package/dist/replay/engine.js +441 -0
- package/dist/replay/engine.js.map +1 -0
- package/dist/replay/truncate.d.ts +16 -0
- package/dist/replay/truncate.js +92 -0
- package/dist/replay/truncate.js.map +1 -0
- package/dist/serve.d.ts +31 -0
- package/dist/serve.js +149 -0
- package/dist/serve.js.map +1 -0
- package/dist/skill/generator.d.ts +44 -0
- package/dist/skill/generator.js +419 -0
- package/dist/skill/generator.js.map +1 -0
- package/dist/skill/importer.d.ts +26 -0
- package/dist/skill/importer.js +80 -0
- package/dist/skill/importer.js.map +1 -0
- package/dist/skill/search.d.ts +19 -0
- package/dist/skill/search.js +51 -0
- package/dist/skill/search.js.map +1 -0
- package/dist/skill/signing.d.ts +16 -0
- package/dist/skill/signing.js +34 -0
- package/dist/skill/signing.js.map +1 -0
- package/dist/skill/ssrf.d.ts +27 -0
- package/dist/skill/ssrf.js +210 -0
- package/dist/skill/ssrf.js.map +1 -0
- package/dist/skill/store.d.ts +7 -0
- package/dist/skill/store.js +93 -0
- package/dist/skill/store.js.map +1 -0
- package/dist/stats/report.d.ts +26 -0
- package/dist/stats/report.js +157 -0
- package/dist/stats/report.js.map +1 -0
- package/dist/types.d.ts +214 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +58 -0
- package/src/auth/crypto.ts +92 -0
- package/src/auth/handoff.ts +229 -0
- package/src/auth/manager.ts +140 -0
- package/src/auth/oauth-refresh.ts +120 -0
- package/src/auth/refresh.ts +300 -0
- package/src/capture/anti-bot.ts +63 -0
- package/src/capture/blocklist.ts +75 -0
- package/src/capture/body-diff.ts +109 -0
- package/src/capture/body-variables.ts +156 -0
- package/src/capture/domain.ts +34 -0
- package/src/capture/entropy.ts +121 -0
- package/src/capture/filter.ts +56 -0
- package/src/capture/graphql.ts +124 -0
- package/src/capture/idle.ts +45 -0
- package/src/capture/monitor.ts +224 -0
- package/src/capture/oauth-detector.ts +106 -0
- package/src/capture/pagination.ts +49 -0
- package/src/capture/parameterize.ts +68 -0
- package/src/capture/scrubber.ts +49 -0
- package/src/capture/session.ts +502 -0
- package/src/capture/token-detector.ts +76 -0
- package/src/capture/verifier.ts +171 -0
- package/src/cli.ts +1031 -0
- package/src/discovery/auth.ts +99 -0
- package/src/discovery/fetch.ts +85 -0
- package/src/discovery/frameworks.ts +231 -0
- package/src/discovery/index.ts +256 -0
- package/src/discovery/openapi.ts +230 -0
- package/src/discovery/probes.ts +76 -0
- package/src/index.ts +26 -0
- package/src/inspect/report.ts +247 -0
- package/src/mcp.ts +618 -0
- package/src/orchestration/browse.ts +250 -0
- package/src/orchestration/cache.ts +37 -0
- package/src/plugin.ts +188 -0
- package/src/read/decoders/deepwiki.ts +180 -0
- package/src/read/decoders/grokipedia.ts +246 -0
- package/src/read/decoders/hackernews.ts +198 -0
- package/src/read/decoders/index.ts +15 -0
- package/src/read/decoders/reddit.ts +158 -0
- package/src/read/decoders/twitter.ts +211 -0
- package/src/read/decoders/wikipedia.ts +75 -0
- package/src/read/decoders/youtube.ts +75 -0
- package/src/read/extract.ts +396 -0
- package/src/read/index.ts +78 -0
- package/src/read/peek.ts +175 -0
- package/src/read/types.ts +37 -0
- package/src/replay/engine.ts +559 -0
- package/src/replay/truncate.ts +116 -0
- package/src/serve.ts +189 -0
- package/src/skill/generator.ts +473 -0
- package/src/skill/importer.ts +107 -0
- package/src/skill/search.ts +76 -0
- package/src/skill/signing.ts +36 -0
- package/src/skill/ssrf.ts +238 -0
- package/src/skill/store.ts +107 -0
- package/src/stats/report.ts +208 -0
- package/src/types.ts +233 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
// src/capture/domain.ts
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Check if a hostname matches the target domain.
|
|
5
|
+
* Uses dot-prefix matching to prevent evil-example.com matching example.com.
|
|
6
|
+
*
|
|
7
|
+
* @param hostname - The hostname to check (e.g. "api.example.com")
|
|
8
|
+
* @param target - The target domain or URL (e.g. "example.com" or "https://example.com/path")
|
|
9
|
+
*/
|
|
10
|
+
export function isDomainMatch(hostname: string, target: string): boolean {
|
|
11
|
+
// Extract hostname from URL if target looks like a URL
|
|
12
|
+
let targetHost: string;
|
|
13
|
+
try {
|
|
14
|
+
if (target.includes('://')) {
|
|
15
|
+
targetHost = new URL(target).hostname;
|
|
16
|
+
} else {
|
|
17
|
+
targetHost = target;
|
|
18
|
+
}
|
|
19
|
+
} catch {
|
|
20
|
+
targetHost = target;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// Strip www. prefix from target for broader matching
|
|
24
|
+
if (targetHost.startsWith('www.')) {
|
|
25
|
+
targetHost = targetHost.slice(4);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Exact match
|
|
29
|
+
if (hostname === targetHost) return true;
|
|
30
|
+
|
|
31
|
+
// Dot-prefix suffix match: hostname must end with ".targetHost"
|
|
32
|
+
// This prevents evil-example.com from matching example.com
|
|
33
|
+
return hostname.endsWith('.' + targetHost);
|
|
34
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
// src/capture/entropy.ts
|
|
2
|
+
|
|
3
|
+
const MIN_TOKEN_LENGTH = 16;
|
|
4
|
+
const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
5
|
+
|
|
6
|
+
export interface TokenClassification {
|
|
7
|
+
isToken: boolean;
|
|
8
|
+
confidence: 'high' | 'medium';
|
|
9
|
+
format: 'jwt' | 'opaque';
|
|
10
|
+
jwtClaims?: JwtClaims;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface JwtClaims {
|
|
14
|
+
exp?: number;
|
|
15
|
+
iat?: number;
|
|
16
|
+
iss?: string;
|
|
17
|
+
aud?: string;
|
|
18
|
+
scope?: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Calculate Shannon entropy (bits per character) of a string.
|
|
23
|
+
* Higher values indicate more randomness.
|
|
24
|
+
*/
|
|
25
|
+
export function shannonEntropy(value: string): number {
|
|
26
|
+
if (value.length === 0) return 0;
|
|
27
|
+
|
|
28
|
+
const freq = new Map<string, number>();
|
|
29
|
+
for (const ch of value) {
|
|
30
|
+
freq.set(ch, (freq.get(ch) ?? 0) + 1);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
let entropy = 0;
|
|
34
|
+
const len = value.length;
|
|
35
|
+
for (const count of freq.values()) {
|
|
36
|
+
const p = count / len;
|
|
37
|
+
entropy -= p * Math.log2(p);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return entropy;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Parse JWT claims from a token string.
|
|
45
|
+
* Returns null if not a valid JWT structure.
|
|
46
|
+
*/
|
|
47
|
+
export function parseJwtClaims(token: string): JwtClaims | null {
|
|
48
|
+
// JWT: starts with eyJ, has exactly 2 dots
|
|
49
|
+
if (!token.startsWith('eyJ')) return null;
|
|
50
|
+
const parts = token.split('.');
|
|
51
|
+
if (parts.length !== 3) return null;
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
// Decode payload (second part), base64url → JSON
|
|
55
|
+
const payload = parts[1]!;
|
|
56
|
+
const padded = payload.replace(/-/g, '+').replace(/_/g, '/');
|
|
57
|
+
const json = Buffer.from(padded, 'base64').toString('utf-8');
|
|
58
|
+
const claims = JSON.parse(json);
|
|
59
|
+
|
|
60
|
+
if (typeof claims !== 'object' || claims === null) return null;
|
|
61
|
+
|
|
62
|
+
const result: JwtClaims = {};
|
|
63
|
+
if (typeof claims.exp === 'number') result.exp = claims.exp;
|
|
64
|
+
if (typeof claims.iat === 'number') result.iat = claims.iat;
|
|
65
|
+
if (typeof claims.iss === 'string') result.iss = claims.iss;
|
|
66
|
+
if (typeof claims.aud === 'string') result.aud = claims.aud;
|
|
67
|
+
if (typeof claims.scope === 'string') result.scope = claims.scope;
|
|
68
|
+
|
|
69
|
+
return result;
|
|
70
|
+
} catch {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Classify whether a header/cookie value is likely an auth token.
|
|
77
|
+
*
|
|
78
|
+
* Detection hierarchy:
|
|
79
|
+
* 1. JWT (eyJ prefix, 2 dots) → decode and classify with rich metadata
|
|
80
|
+
* 2. UUID → skip (entity ID, not token)
|
|
81
|
+
* 3. Short values (<16 chars) → skip
|
|
82
|
+
* 4. High-entropy opaque string → classify by entropy threshold
|
|
83
|
+
*/
|
|
84
|
+
export function isLikelyToken(name: string, value: string): TokenClassification {
|
|
85
|
+
// Strip "Bearer " prefix for analysis
|
|
86
|
+
const raw = value.startsWith('Bearer ') ? value.slice(7) : value;
|
|
87
|
+
|
|
88
|
+
// JWT detection — takes priority
|
|
89
|
+
const jwtClaims = parseJwtClaims(raw);
|
|
90
|
+
if (jwtClaims) {
|
|
91
|
+
return {
|
|
92
|
+
isToken: true,
|
|
93
|
+
confidence: 'high',
|
|
94
|
+
format: 'jwt',
|
|
95
|
+
jwtClaims,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// UUID exclusion — almost always entity IDs, not tokens
|
|
100
|
+
if (UUID_PATTERN.test(raw)) {
|
|
101
|
+
return { isToken: false, confidence: 'medium', format: 'opaque' };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Minimum length gate
|
|
105
|
+
if (raw.length < MIN_TOKEN_LENGTH) {
|
|
106
|
+
return { isToken: false, confidence: 'medium', format: 'opaque' };
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Entropy-based classification
|
|
110
|
+
const entropy = shannonEntropy(raw);
|
|
111
|
+
|
|
112
|
+
if (entropy >= 4.5) {
|
|
113
|
+
return { isToken: true, confidence: 'high', format: 'opaque' };
|
|
114
|
+
}
|
|
115
|
+
if (entropy >= 3.5) {
|
|
116
|
+
return { isToken: true, confidence: 'medium', format: 'opaque' };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Below threshold — not a token
|
|
120
|
+
return { isToken: false, confidence: 'medium', format: 'opaque' };
|
|
121
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// src/capture/filter.ts
|
|
2
|
+
import { isBlocklisted } from './blocklist.js';
|
|
3
|
+
|
|
4
|
+
export interface FilterableResponse {
|
|
5
|
+
url: string;
|
|
6
|
+
status: number;
|
|
7
|
+
contentType: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const JSON_CONTENT_TYPES = [
|
|
11
|
+
'application/json',
|
|
12
|
+
'application/vnd.api+json',
|
|
13
|
+
'text/json',
|
|
14
|
+
];
|
|
15
|
+
|
|
16
|
+
/** Exact path matches that are telemetry/framework noise */
|
|
17
|
+
const NOISE_PATHS = new Set([
|
|
18
|
+
'/monitoring',
|
|
19
|
+
'/telemetry',
|
|
20
|
+
'/track',
|
|
21
|
+
'/manifest.json',
|
|
22
|
+
]);
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Check if a URL path is framework or telemetry noise.
|
|
26
|
+
* Exported for testing.
|
|
27
|
+
*/
|
|
28
|
+
export function isPathNoise(pathname: string): boolean {
|
|
29
|
+
// Exact match noise paths
|
|
30
|
+
if (NOISE_PATHS.has(pathname)) return true;
|
|
31
|
+
|
|
32
|
+
// Next.js static build assets (not data routes)
|
|
33
|
+
if (pathname.startsWith('/_next/static/')) return true;
|
|
34
|
+
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function shouldCapture(response: FilterableResponse): boolean {
|
|
39
|
+
// Only keep 2xx success responses
|
|
40
|
+
if (response.status < 200 || response.status >= 300) return false;
|
|
41
|
+
|
|
42
|
+
// Content-type must indicate JSON
|
|
43
|
+
const ct = response.contentType.toLowerCase().split(';')[0].trim();
|
|
44
|
+
if (!JSON_CONTENT_TYPES.some(t => ct === t)) return false;
|
|
45
|
+
|
|
46
|
+
// Check domain and path
|
|
47
|
+
try {
|
|
48
|
+
const url = new URL(response.url);
|
|
49
|
+
if (isBlocklisted(url.hostname)) return false;
|
|
50
|
+
if (isPathNoise(url.pathname)) return false;
|
|
51
|
+
} catch {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
// src/capture/graphql.ts
|
|
2
|
+
|
|
3
|
+
export interface GraphQLParsed {
|
|
4
|
+
operationName: string | null;
|
|
5
|
+
query: string;
|
|
6
|
+
variables: Record<string, unknown> | null;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Detect if a request is to a GraphQL endpoint.
|
|
11
|
+
*/
|
|
12
|
+
export function isGraphQLEndpoint(
|
|
13
|
+
path: string,
|
|
14
|
+
contentType: string,
|
|
15
|
+
body: string | null,
|
|
16
|
+
): boolean {
|
|
17
|
+
// Path contains /graphql
|
|
18
|
+
if (path.includes('/graphql')) {
|
|
19
|
+
return true;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Content-Type is application/graphql
|
|
23
|
+
if (contentType.includes('application/graphql')) {
|
|
24
|
+
return true;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Body contains a "query" field (GraphQL-style)
|
|
28
|
+
if (body) {
|
|
29
|
+
try {
|
|
30
|
+
const parsed = JSON.parse(body);
|
|
31
|
+
if (typeof parsed.query === 'string') {
|
|
32
|
+
return true;
|
|
33
|
+
}
|
|
34
|
+
} catch {
|
|
35
|
+
// Not JSON
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Parse a GraphQL request body.
|
|
44
|
+
*/
|
|
45
|
+
export function parseGraphQLBody(body: string): GraphQLParsed | null {
|
|
46
|
+
try {
|
|
47
|
+
const parsed = JSON.parse(body);
|
|
48
|
+
if (typeof parsed.query !== 'string') {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
return {
|
|
52
|
+
operationName: parsed.operationName ?? null,
|
|
53
|
+
query: parsed.query,
|
|
54
|
+
variables: parsed.variables ?? null,
|
|
55
|
+
};
|
|
56
|
+
} catch {
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Extract operation name from query string or explicit operationName.
|
|
63
|
+
*/
|
|
64
|
+
export function extractOperationName(
|
|
65
|
+
query: string,
|
|
66
|
+
explicitName: string | null,
|
|
67
|
+
): string {
|
|
68
|
+
if (explicitName) {
|
|
69
|
+
return explicitName;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Match "query Name" or "mutation Name" at start
|
|
73
|
+
const match = query.match(/^\s*(query|mutation|subscription)\s+(\w+)/);
|
|
74
|
+
if (match) {
|
|
75
|
+
return match[2];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return 'Anonymous';
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Detect which variables are likely dynamic (IDs, cursors, pagination).
|
|
83
|
+
*/
|
|
84
|
+
export function detectGraphQLVariables(
|
|
85
|
+
variables: Record<string, unknown> | null,
|
|
86
|
+
prefix = '',
|
|
87
|
+
): string[] {
|
|
88
|
+
if (!variables || typeof variables !== 'object') {
|
|
89
|
+
return [];
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const detected: string[] = [];
|
|
93
|
+
|
|
94
|
+
for (const [key, value] of Object.entries(variables)) {
|
|
95
|
+
const path = prefix ? `${prefix}.${key}` : key;
|
|
96
|
+
|
|
97
|
+
if (typeof value === 'number') {
|
|
98
|
+
// Numbers are often IDs or pagination values
|
|
99
|
+
detected.push(path);
|
|
100
|
+
} else if (typeof value === 'string') {
|
|
101
|
+
// Cursor-like strings (base64, long alphanumeric)
|
|
102
|
+
if (isLikelyCursor(value)) {
|
|
103
|
+
detected.push(path);
|
|
104
|
+
}
|
|
105
|
+
} else if (value && typeof value === 'object' && !Array.isArray(value)) {
|
|
106
|
+
// Recurse into nested objects
|
|
107
|
+
detected.push(...detectGraphQLVariables(value as Record<string, unknown>, path));
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return detected;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function isLikelyCursor(value: string): boolean {
|
|
115
|
+
// Base64-ish: long alphanumeric, possibly with = padding
|
|
116
|
+
if (value.length > 10 && /^[a-zA-Z0-9+/=_-]+$/.test(value)) {
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
// UUID-like
|
|
120
|
+
if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(value)) {
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
return false;
|
|
124
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
// src/capture/idle.ts
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Tracks unique endpoint discoveries and detects idle periods.
|
|
5
|
+
* Used during interactive capture to nudge the user when no new
|
|
6
|
+
* endpoints have been found for a while.
|
|
7
|
+
*/
|
|
8
|
+
export class IdleTracker {
|
|
9
|
+
private seen = new Set<string>();
|
|
10
|
+
private lastNewTime: number;
|
|
11
|
+
private thresholdMs: number;
|
|
12
|
+
private fired = false;
|
|
13
|
+
private now: () => number;
|
|
14
|
+
|
|
15
|
+
constructor(thresholdMs = 15000, now: () => number = Date.now) {
|
|
16
|
+
this.thresholdMs = thresholdMs;
|
|
17
|
+
this.now = now;
|
|
18
|
+
this.lastNewTime = this.now();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Record an endpoint key (e.g. "GET /api/items").
|
|
23
|
+
* Returns true if it's genuinely new (not seen before).
|
|
24
|
+
*/
|
|
25
|
+
recordEndpoint(key: string): boolean {
|
|
26
|
+
if (this.seen.has(key)) return false;
|
|
27
|
+
this.seen.add(key);
|
|
28
|
+
this.lastNewTime = this.now();
|
|
29
|
+
this.fired = false;
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Check if the idle threshold has been exceeded.
|
|
35
|
+
* Returns true exactly once per idle period (until reset by a new endpoint).
|
|
36
|
+
*/
|
|
37
|
+
checkIdle(): boolean {
|
|
38
|
+
if (this.fired) return false;
|
|
39
|
+
if (this.now() - this.lastNewTime >= this.thresholdMs) {
|
|
40
|
+
this.fired = true;
|
|
41
|
+
return true;
|
|
42
|
+
}
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
// src/capture/monitor.ts
|
|
2
|
+
import { chromium, type Browser, type Page } from 'playwright';
|
|
3
|
+
import { shouldCapture } from './filter.js';
|
|
4
|
+
import { isDomainMatch } from './domain.js';
|
|
5
|
+
import { SkillGenerator, type GeneratorOptions } from '../skill/generator.js';
|
|
6
|
+
import { IdleTracker } from './idle.js';
|
|
7
|
+
import { detectCaptcha } from '../auth/refresh.js';
|
|
8
|
+
import type { CapturedExchange } from '../types.js';
|
|
9
|
+
|
|
10
|
+
export interface CaptureOptions {
|
|
11
|
+
url: string;
|
|
12
|
+
port?: number;
|
|
13
|
+
launch?: boolean;
|
|
14
|
+
attach?: boolean;
|
|
15
|
+
headless?: boolean; // default: false (interactive capture shows browser)
|
|
16
|
+
duration?: number;
|
|
17
|
+
allDomains?: boolean;
|
|
18
|
+
enablePreview?: boolean;
|
|
19
|
+
scrub?: boolean;
|
|
20
|
+
onEndpoint?: (endpoint: { id: string; method: string; path: string }) => void;
|
|
21
|
+
onFiltered?: () => void;
|
|
22
|
+
onIdle?: () => void;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export interface CaptureResult {
|
|
26
|
+
generators: Map<string, SkillGenerator>;
|
|
27
|
+
totalRequests: number;
|
|
28
|
+
filteredRequests: number;
|
|
29
|
+
domBytes?: number; // v1.0: measured DOM size for browser cost comparison
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const DEFAULT_CDP_PORTS = [18792, 18800, 9222];
|
|
33
|
+
|
|
34
|
+
async function connectToBrowser(options: CaptureOptions): Promise<{ browser: Browser; launched: boolean }> {
|
|
35
|
+
if (!options.launch) {
|
|
36
|
+
const ports = options.port ? [options.port] : DEFAULT_CDP_PORTS;
|
|
37
|
+
for (const port of ports) {
|
|
38
|
+
try {
|
|
39
|
+
const browser = await chromium.connectOverCDP(`http://localhost:${port}`, { timeout: 3000 });
|
|
40
|
+
return { browser, launched: false };
|
|
41
|
+
} catch {
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
if (options.attach) {
|
|
48
|
+
const ports = options.port ? [options.port] : DEFAULT_CDP_PORTS;
|
|
49
|
+
throw new Error(`No browser found on CDP ports: ${ports.join(', ')}. Is a Chromium browser running with remote debugging?`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const browser = await chromium.launch({ headless: options.headless ?? (process.env.DISPLAY ? false : true) });
|
|
53
|
+
return { browser, launched: true };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export async function capture(options: CaptureOptions): Promise<CaptureResult> {
|
|
57
|
+
const { browser, launched } = await connectToBrowser(options);
|
|
58
|
+
const generators = new Map<string, SkillGenerator>();
|
|
59
|
+
let totalRequests = 0;
|
|
60
|
+
let filteredRequests = 0;
|
|
61
|
+
const captchaDetectedDomains = new Set<string>();
|
|
62
|
+
|
|
63
|
+
// Extract target domain for domain-only filtering
|
|
64
|
+
const targetUrl = options.url;
|
|
65
|
+
|
|
66
|
+
const generatorOptions: GeneratorOptions = {
|
|
67
|
+
enablePreview: options.enablePreview ?? false,
|
|
68
|
+
scrub: options.scrub ?? true,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Idle tracking: only active during interactive capture (no --duration)
|
|
72
|
+
const idleTracker = !options.duration ? new IdleTracker() : null;
|
|
73
|
+
let idleInterval: ReturnType<typeof setInterval> | null = null;
|
|
74
|
+
|
|
75
|
+
let page: Page;
|
|
76
|
+
if (launched) {
|
|
77
|
+
const context = await browser.newContext();
|
|
78
|
+
page = await context.newPage();
|
|
79
|
+
} else {
|
|
80
|
+
const contexts = browser.contexts();
|
|
81
|
+
if (contexts.length > 0 && contexts[0].pages().length > 0) {
|
|
82
|
+
page = contexts[0].pages()[0];
|
|
83
|
+
} else {
|
|
84
|
+
const context = contexts[0] ?? await browser.newContext();
|
|
85
|
+
page = await context.newPage();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
page.on('response', async (response) => {
|
|
90
|
+
totalRequests++;
|
|
91
|
+
|
|
92
|
+
const url = response.url();
|
|
93
|
+
const status = response.status();
|
|
94
|
+
const contentType = response.headers()['content-type'] ?? '';
|
|
95
|
+
|
|
96
|
+
// Domain-only filtering (before any other processing)
|
|
97
|
+
if (!options.allDomains) {
|
|
98
|
+
const hostname = safeHostname(url);
|
|
99
|
+
if (hostname && !isDomainMatch(hostname, targetUrl)) {
|
|
100
|
+
filteredRequests++;
|
|
101
|
+
options.onFiltered?.();
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (!shouldCapture({ url, status, contentType })) {
|
|
107
|
+
filteredRequests++;
|
|
108
|
+
const hostname = safeHostname(url);
|
|
109
|
+
if (hostname) {
|
|
110
|
+
const gen = generators.get(hostname);
|
|
111
|
+
if (gen) gen.recordFiltered();
|
|
112
|
+
}
|
|
113
|
+
// Track network bytes from headers for filtered responses (browser cost measurement)
|
|
114
|
+
const contentLength = parseInt(response.headers()['content-length'] ?? '0', 10);
|
|
115
|
+
if (contentLength > 0) {
|
|
116
|
+
const filteredHostname = safeHostname(url);
|
|
117
|
+
if (filteredHostname && generators.has(filteredHostname)) {
|
|
118
|
+
generators.get(filteredHostname)!.addNetworkBytes(contentLength);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
options.onFiltered?.();
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
const body = await response.text();
|
|
127
|
+
const hostname = new URL(url).hostname;
|
|
128
|
+
|
|
129
|
+
// Check for captcha in HTML responses (v0.8 captcha risk detection)
|
|
130
|
+
if (contentType.includes('text/html') && detectCaptcha(body)) {
|
|
131
|
+
captchaDetectedDomains.add(hostname);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (!generators.has(hostname)) {
|
|
135
|
+
generators.set(hostname, new SkillGenerator(generatorOptions));
|
|
136
|
+
}
|
|
137
|
+
const gen = generators.get(hostname)!;
|
|
138
|
+
|
|
139
|
+
const exchange: CapturedExchange = {
|
|
140
|
+
request: {
|
|
141
|
+
url,
|
|
142
|
+
method: response.request().method(),
|
|
143
|
+
headers: response.request().headers(),
|
|
144
|
+
postData: response.request().postData() ?? undefined,
|
|
145
|
+
},
|
|
146
|
+
response: {
|
|
147
|
+
status,
|
|
148
|
+
headers: response.headers(),
|
|
149
|
+
body,
|
|
150
|
+
contentType,
|
|
151
|
+
},
|
|
152
|
+
timestamp: new Date().toISOString(),
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
const endpoint = gen.addExchange(exchange);
|
|
156
|
+
if (endpoint) {
|
|
157
|
+
options.onEndpoint?.({ id: endpoint.id, method: endpoint.method, path: endpoint.path });
|
|
158
|
+
|
|
159
|
+
// Track for idle detection using parameterized key
|
|
160
|
+
if (idleTracker) {
|
|
161
|
+
const paramKey = `${endpoint.method} ${endpoint.path}`;
|
|
162
|
+
idleTracker.recordEndpoint(paramKey);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
} catch {
|
|
166
|
+
// Response body may not be available (e.g. redirects); skip silently
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
await page.goto(options.url, { waitUntil: 'domcontentloaded' });
|
|
171
|
+
|
|
172
|
+
// Start idle check interval (every 5s) for interactive capture
|
|
173
|
+
if (idleTracker && options.onIdle) {
|
|
174
|
+
idleInterval = setInterval(() => {
|
|
175
|
+
if (idleTracker.checkIdle()) {
|
|
176
|
+
options.onIdle!();
|
|
177
|
+
}
|
|
178
|
+
}, 5000);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Wait for duration or until interrupted
|
|
182
|
+
// SIGINT always resolves gracefully so skill files get written
|
|
183
|
+
if (options.duration) {
|
|
184
|
+
await new Promise<void>(resolve => {
|
|
185
|
+
const timer = setTimeout(resolve, options.duration! * 1000);
|
|
186
|
+
process.once('SIGINT', () => { clearTimeout(timer); resolve(); });
|
|
187
|
+
});
|
|
188
|
+
} else {
|
|
189
|
+
await new Promise<void>(resolve => {
|
|
190
|
+
process.once('SIGINT', resolve);
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Clean up idle interval
|
|
195
|
+
if (idleInterval) clearInterval(idleInterval);
|
|
196
|
+
|
|
197
|
+
// Measure DOM size for browser cost comparison (v1.0)
|
|
198
|
+
let domBytes: number | undefined;
|
|
199
|
+
try {
|
|
200
|
+
const html = await page.content();
|
|
201
|
+
domBytes = html.length;
|
|
202
|
+
} catch { /* page may have navigated away */ }
|
|
203
|
+
|
|
204
|
+
if (launched) {
|
|
205
|
+
await browser.close();
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Mark generators for domains where captcha was detected
|
|
209
|
+
for (const [hostname, gen] of generators) {
|
|
210
|
+
if (captchaDetectedDomains.has(hostname)) {
|
|
211
|
+
gen.setCaptchaRisk(true);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return { generators, totalRequests, filteredRequests, domBytes };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function safeHostname(url: string): string | null {
|
|
219
|
+
try {
|
|
220
|
+
return new URL(url).hostname;
|
|
221
|
+
} catch {
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
}
|