@apitap/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +60 -0
- package/README.md +362 -0
- package/SKILL.md +270 -0
- package/dist/auth/crypto.d.ts +31 -0
- package/dist/auth/crypto.js +66 -0
- package/dist/auth/crypto.js.map +1 -0
- package/dist/auth/handoff.d.ts +29 -0
- package/dist/auth/handoff.js +180 -0
- package/dist/auth/handoff.js.map +1 -0
- package/dist/auth/manager.d.ts +46 -0
- package/dist/auth/manager.js +127 -0
- package/dist/auth/manager.js.map +1 -0
- package/dist/auth/oauth-refresh.d.ts +16 -0
- package/dist/auth/oauth-refresh.js +91 -0
- package/dist/auth/oauth-refresh.js.map +1 -0
- package/dist/auth/refresh.d.ts +43 -0
- package/dist/auth/refresh.js +217 -0
- package/dist/auth/refresh.js.map +1 -0
- package/dist/capture/anti-bot.d.ts +15 -0
- package/dist/capture/anti-bot.js +43 -0
- package/dist/capture/anti-bot.js.map +1 -0
- package/dist/capture/blocklist.d.ts +6 -0
- package/dist/capture/blocklist.js +70 -0
- package/dist/capture/blocklist.js.map +1 -0
- package/dist/capture/body-diff.d.ts +8 -0
- package/dist/capture/body-diff.js +102 -0
- package/dist/capture/body-diff.js.map +1 -0
- package/dist/capture/body-variables.d.ts +13 -0
- package/dist/capture/body-variables.js +142 -0
- package/dist/capture/body-variables.js.map +1 -0
- package/dist/capture/domain.d.ts +8 -0
- package/dist/capture/domain.js +34 -0
- package/dist/capture/domain.js.map +1 -0
- package/dist/capture/entropy.d.ts +33 -0
- package/dist/capture/entropy.js +100 -0
- package/dist/capture/entropy.js.map +1 -0
- package/dist/capture/filter.d.ts +11 -0
- package/dist/capture/filter.js +49 -0
- package/dist/capture/filter.js.map +1 -0
- package/dist/capture/graphql.d.ts +21 -0
- package/dist/capture/graphql.js +99 -0
- package/dist/capture/graphql.js.map +1 -0
- package/dist/capture/idle.d.ts +23 -0
- package/dist/capture/idle.js +44 -0
- package/dist/capture/idle.js.map +1 -0
- package/dist/capture/monitor.d.ts +26 -0
- package/dist/capture/monitor.js +183 -0
- package/dist/capture/monitor.js.map +1 -0
- package/dist/capture/oauth-detector.d.ts +18 -0
- package/dist/capture/oauth-detector.js +96 -0
- package/dist/capture/oauth-detector.js.map +1 -0
- package/dist/capture/pagination.d.ts +9 -0
- package/dist/capture/pagination.js +40 -0
- package/dist/capture/pagination.js.map +1 -0
- package/dist/capture/parameterize.d.ts +17 -0
- package/dist/capture/parameterize.js +63 -0
- package/dist/capture/parameterize.js.map +1 -0
- package/dist/capture/scrubber.d.ts +5 -0
- package/dist/capture/scrubber.js +38 -0
- package/dist/capture/scrubber.js.map +1 -0
- package/dist/capture/session.d.ts +46 -0
- package/dist/capture/session.js +445 -0
- package/dist/capture/session.js.map +1 -0
- package/dist/capture/token-detector.d.ts +16 -0
- package/dist/capture/token-detector.js +62 -0
- package/dist/capture/token-detector.js.map +1 -0
- package/dist/capture/verifier.d.ts +17 -0
- package/dist/capture/verifier.js +147 -0
- package/dist/capture/verifier.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +930 -0
- package/dist/cli.js.map +1 -0
- package/dist/discovery/auth.d.ts +17 -0
- package/dist/discovery/auth.js +81 -0
- package/dist/discovery/auth.js.map +1 -0
- package/dist/discovery/fetch.d.ts +17 -0
- package/dist/discovery/fetch.js +59 -0
- package/dist/discovery/fetch.js.map +1 -0
- package/dist/discovery/frameworks.d.ts +11 -0
- package/dist/discovery/frameworks.js +249 -0
- package/dist/discovery/frameworks.js.map +1 -0
- package/dist/discovery/index.d.ts +21 -0
- package/dist/discovery/index.js +219 -0
- package/dist/discovery/index.js.map +1 -0
- package/dist/discovery/openapi.d.ts +13 -0
- package/dist/discovery/openapi.js +175 -0
- package/dist/discovery/openapi.js.map +1 -0
- package/dist/discovery/probes.d.ts +9 -0
- package/dist/discovery/probes.js +70 -0
- package/dist/discovery/probes.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.js +25 -0
- package/dist/index.js.map +1 -0
- package/dist/inspect/report.d.ts +52 -0
- package/dist/inspect/report.js +191 -0
- package/dist/inspect/report.js.map +1 -0
- package/dist/mcp.d.ts +8 -0
- package/dist/mcp.js +526 -0
- package/dist/mcp.js.map +1 -0
- package/dist/orchestration/browse.d.ts +38 -0
- package/dist/orchestration/browse.js +198 -0
- package/dist/orchestration/browse.js.map +1 -0
- package/dist/orchestration/cache.d.ts +15 -0
- package/dist/orchestration/cache.js +24 -0
- package/dist/orchestration/cache.js.map +1 -0
- package/dist/plugin.d.ts +17 -0
- package/dist/plugin.js +158 -0
- package/dist/plugin.js.map +1 -0
- package/dist/read/decoders/deepwiki.d.ts +2 -0
- package/dist/read/decoders/deepwiki.js +148 -0
- package/dist/read/decoders/deepwiki.js.map +1 -0
- package/dist/read/decoders/grokipedia.d.ts +2 -0
- package/dist/read/decoders/grokipedia.js +210 -0
- package/dist/read/decoders/grokipedia.js.map +1 -0
- package/dist/read/decoders/hackernews.d.ts +2 -0
- package/dist/read/decoders/hackernews.js +168 -0
- package/dist/read/decoders/hackernews.js.map +1 -0
- package/dist/read/decoders/index.d.ts +2 -0
- package/dist/read/decoders/index.js +12 -0
- package/dist/read/decoders/index.js.map +1 -0
- package/dist/read/decoders/reddit.d.ts +2 -0
- package/dist/read/decoders/reddit.js +142 -0
- package/dist/read/decoders/reddit.js.map +1 -0
- package/dist/read/decoders/twitter.d.ts +12 -0
- package/dist/read/decoders/twitter.js +187 -0
- package/dist/read/decoders/twitter.js.map +1 -0
- package/dist/read/decoders/wikipedia.d.ts +2 -0
- package/dist/read/decoders/wikipedia.js +66 -0
- package/dist/read/decoders/wikipedia.js.map +1 -0
- package/dist/read/decoders/youtube.d.ts +2 -0
- package/dist/read/decoders/youtube.js +69 -0
- package/dist/read/decoders/youtube.js.map +1 -0
- package/dist/read/extract.d.ts +25 -0
- package/dist/read/extract.js +320 -0
- package/dist/read/extract.js.map +1 -0
- package/dist/read/index.d.ts +14 -0
- package/dist/read/index.js +66 -0
- package/dist/read/index.js.map +1 -0
- package/dist/read/peek.d.ts +9 -0
- package/dist/read/peek.js +137 -0
- package/dist/read/peek.js.map +1 -0
- package/dist/read/types.d.ts +44 -0
- package/dist/read/types.js +3 -0
- package/dist/read/types.js.map +1 -0
- package/dist/replay/engine.d.ts +53 -0
- package/dist/replay/engine.js +441 -0
- package/dist/replay/engine.js.map +1 -0
- package/dist/replay/truncate.d.ts +16 -0
- package/dist/replay/truncate.js +92 -0
- package/dist/replay/truncate.js.map +1 -0
- package/dist/serve.d.ts +31 -0
- package/dist/serve.js +149 -0
- package/dist/serve.js.map +1 -0
- package/dist/skill/generator.d.ts +44 -0
- package/dist/skill/generator.js +419 -0
- package/dist/skill/generator.js.map +1 -0
- package/dist/skill/importer.d.ts +26 -0
- package/dist/skill/importer.js +80 -0
- package/dist/skill/importer.js.map +1 -0
- package/dist/skill/search.d.ts +19 -0
- package/dist/skill/search.js +51 -0
- package/dist/skill/search.js.map +1 -0
- package/dist/skill/signing.d.ts +16 -0
- package/dist/skill/signing.js +34 -0
- package/dist/skill/signing.js.map +1 -0
- package/dist/skill/ssrf.d.ts +27 -0
- package/dist/skill/ssrf.js +210 -0
- package/dist/skill/ssrf.js.map +1 -0
- package/dist/skill/store.d.ts +7 -0
- package/dist/skill/store.js +93 -0
- package/dist/skill/store.js.map +1 -0
- package/dist/stats/report.d.ts +26 -0
- package/dist/stats/report.js +157 -0
- package/dist/stats/report.js.map +1 -0
- package/dist/types.d.ts +214 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +58 -0
- package/src/auth/crypto.ts +92 -0
- package/src/auth/handoff.ts +229 -0
- package/src/auth/manager.ts +140 -0
- package/src/auth/oauth-refresh.ts +120 -0
- package/src/auth/refresh.ts +300 -0
- package/src/capture/anti-bot.ts +63 -0
- package/src/capture/blocklist.ts +75 -0
- package/src/capture/body-diff.ts +109 -0
- package/src/capture/body-variables.ts +156 -0
- package/src/capture/domain.ts +34 -0
- package/src/capture/entropy.ts +121 -0
- package/src/capture/filter.ts +56 -0
- package/src/capture/graphql.ts +124 -0
- package/src/capture/idle.ts +45 -0
- package/src/capture/monitor.ts +224 -0
- package/src/capture/oauth-detector.ts +106 -0
- package/src/capture/pagination.ts +49 -0
- package/src/capture/parameterize.ts +68 -0
- package/src/capture/scrubber.ts +49 -0
- package/src/capture/session.ts +502 -0
- package/src/capture/token-detector.ts +76 -0
- package/src/capture/verifier.ts +171 -0
- package/src/cli.ts +1031 -0
- package/src/discovery/auth.ts +99 -0
- package/src/discovery/fetch.ts +85 -0
- package/src/discovery/frameworks.ts +231 -0
- package/src/discovery/index.ts +256 -0
- package/src/discovery/openapi.ts +230 -0
- package/src/discovery/probes.ts +76 -0
- package/src/index.ts +26 -0
- package/src/inspect/report.ts +247 -0
- package/src/mcp.ts +618 -0
- package/src/orchestration/browse.ts +250 -0
- package/src/orchestration/cache.ts +37 -0
- package/src/plugin.ts +188 -0
- package/src/read/decoders/deepwiki.ts +180 -0
- package/src/read/decoders/grokipedia.ts +246 -0
- package/src/read/decoders/hackernews.ts +198 -0
- package/src/read/decoders/index.ts +15 -0
- package/src/read/decoders/reddit.ts +158 -0
- package/src/read/decoders/twitter.ts +211 -0
- package/src/read/decoders/wikipedia.ts +75 -0
- package/src/read/decoders/youtube.ts +75 -0
- package/src/read/extract.ts +396 -0
- package/src/read/index.ts +78 -0
- package/src/read/peek.ts +175 -0
- package/src/read/types.ts +37 -0
- package/src/replay/engine.ts +559 -0
- package/src/replay/truncate.ts +116 -0
- package/src/serve.ts +189 -0
- package/src/skill/generator.ts +473 -0
- package/src/skill/importer.ts +107 -0
- package/src/skill/search.ts +76 -0
- package/src/skill/signing.ts +36 -0
- package/src/skill/ssrf.ts +238 -0
- package/src/skill/store.ts +107 -0
- package/src/stats/report.ts +208 -0
- package/src/types.ts +233 -0
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// src/discovery/auth.ts
|
|
2
|
+
|
|
3
|
+
export interface AuthDetectionResult {
|
|
4
|
+
authRequired: boolean;
|
|
5
|
+
signals: string[];
|
|
6
|
+
loginUrl?: string; // detected login page URL if found
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
// Paths that indicate auth/login
|
|
10
|
+
const AUTH_PATH_PATTERNS = [
|
|
11
|
+
/\/login/i, /\/signin/i, /\/sign-in/i, /\/auth\//i,
|
|
12
|
+
/\/sso/i, /\/saml/i, /\/oauth/i, /\/cas\/login/i,
|
|
13
|
+
];
|
|
14
|
+
|
|
15
|
+
// OAuth provider patterns in URLs
|
|
16
|
+
const OAUTH_PATTERNS = [
|
|
17
|
+
/accounts\.google\.com\/o\/oauth/i,
|
|
18
|
+
/github\.com\/login\/oauth/i,
|
|
19
|
+
/login\.microsoftonline\.com/i,
|
|
20
|
+
/facebook\.com\/v\d+.*\/dialog\/oauth/i,
|
|
21
|
+
/appleid\.apple\.com\/auth/i,
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Scan fetched HTML and response headers for indicators that a site requires authentication.
|
|
26
|
+
*
|
|
27
|
+
* Checks for:
|
|
28
|
+
* - Login forms (password inputs)
|
|
29
|
+
* - Meta redirects to auth paths
|
|
30
|
+
* - OAuth login links
|
|
31
|
+
* - WWW-Authenticate response header
|
|
32
|
+
* - Location header redirecting to login
|
|
33
|
+
* - SAML/SSO form patterns
|
|
34
|
+
*/
|
|
35
|
+
export function detectAuthRequired(
|
|
36
|
+
html: string,
|
|
37
|
+
url: string,
|
|
38
|
+
headers: Record<string, string>,
|
|
39
|
+
): AuthDetectionResult {
|
|
40
|
+
const signals: string[] = [];
|
|
41
|
+
let loginUrl: string | undefined;
|
|
42
|
+
|
|
43
|
+
// 1. WWW-Authenticate header
|
|
44
|
+
if (headers['www-authenticate']) {
|
|
45
|
+
signals.push(`WWW-Authenticate header: ${headers['www-authenticate']}`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// 2. Location header redirecting to auth path
|
|
49
|
+
const location = headers['location'];
|
|
50
|
+
if (location) {
|
|
51
|
+
if (AUTH_PATH_PATTERNS.some(p => p.test(location))) {
|
|
52
|
+
signals.push(`Location redirect to auth path: ${location}`);
|
|
53
|
+
loginUrl = loginUrl ?? location;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// 3. Login form with password input
|
|
58
|
+
const hasPasswordInput = /<input[^>]*type\s*=\s*["']password["'][^>]*>/i.test(html);
|
|
59
|
+
const hasFormAction = /<form[^>]*action\s*=\s*["'][^"']*(?:login|signin|sign-in|auth)[^"']*["'][^>]*>/i.test(html);
|
|
60
|
+
if (hasPasswordInput && hasFormAction) {
|
|
61
|
+
signals.push('Detected login form with password input');
|
|
62
|
+
// Try to extract login URL from form action
|
|
63
|
+
const formMatch = html.match(/<form[^>]*action\s*=\s*["']([^"']*(?:login|signin|sign-in|auth)[^"']*)["']/i);
|
|
64
|
+
if (formMatch) {
|
|
65
|
+
loginUrl = loginUrl ?? formMatch[1];
|
|
66
|
+
}
|
|
67
|
+
} else if (hasPasswordInput) {
|
|
68
|
+
signals.push('Detected login form with password input');
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// 4. Meta redirect to auth path
|
|
72
|
+
const metaRefresh = html.match(/<meta[^>]*http-equiv\s*=\s*["']refresh["'][^>]*content\s*=\s*["'][^"']*url\s*=\s*([^"'\s>]+)/i);
|
|
73
|
+
if (metaRefresh) {
|
|
74
|
+
const redirectUrl = metaRefresh[1];
|
|
75
|
+
if (AUTH_PATH_PATTERNS.some(p => p.test(redirectUrl))) {
|
|
76
|
+
signals.push(`Meta redirect to auth path: ${redirectUrl}`);
|
|
77
|
+
loginUrl = loginUrl ?? redirectUrl;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// 5. OAuth provider links
|
|
82
|
+
const oauthMatch = OAUTH_PATTERNS.find(p => p.test(html));
|
|
83
|
+
if (oauthMatch) {
|
|
84
|
+
signals.push('OAuth provider login link detected');
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// 6. SAML/SSO form
|
|
88
|
+
const hasSaml = /SAMLRequest/i.test(html) || /saml/i.test(html);
|
|
89
|
+
const hasSsoForm = /<form[^>]*action\s*=\s*["'][^"']*(?:sso|saml)[^"']*["'][^>]*>/i.test(html);
|
|
90
|
+
if (hasSaml && hasSsoForm) {
|
|
91
|
+
signals.push('SSO/SAML authentication form detected');
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
authRequired: signals.length > 0,
|
|
96
|
+
signals,
|
|
97
|
+
loginUrl,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// src/discovery/fetch.ts
|
|
2
|
+
import { validateUrl } from '../skill/ssrf.js';
|
|
3
|
+
|
|
4
|
+
export interface FetchResult {
|
|
5
|
+
status: number;
|
|
6
|
+
headers: Record<string, string>;
|
|
7
|
+
body: string;
|
|
8
|
+
contentType: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface SafeFetchOptions {
|
|
12
|
+
timeout?: number;
|
|
13
|
+
method?: 'GET' | 'HEAD';
|
|
14
|
+
maxBodySize?: number;
|
|
15
|
+
skipSsrf?: boolean; // bypass SSRF check (for testing with local servers)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const DEFAULT_TIMEOUT = 5000;
|
|
19
|
+
const DEFAULT_MAX_BODY = 512 * 1024; // 512KB
|
|
20
|
+
const USER_AGENT = 'ApiTap-Discovery/1.0';
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Fetch a URL with SSRF protection, timeout, and size limits.
|
|
24
|
+
* Returns null on any failure (network error, SSRF blocked, timeout).
|
|
25
|
+
*/
|
|
26
|
+
export async function safeFetch(
|
|
27
|
+
url: string,
|
|
28
|
+
options: SafeFetchOptions = {},
|
|
29
|
+
): Promise<FetchResult | null> {
|
|
30
|
+
// SSRF check
|
|
31
|
+
if (!options.skipSsrf) {
|
|
32
|
+
const ssrfResult = validateUrl(url);
|
|
33
|
+
if (!ssrfResult.safe) return null;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const timeout = options.timeout ?? DEFAULT_TIMEOUT;
|
|
37
|
+
const method = options.method ?? 'GET';
|
|
38
|
+
const maxBody = options.maxBodySize ?? DEFAULT_MAX_BODY;
|
|
39
|
+
|
|
40
|
+
try {
|
|
41
|
+
const controller = new AbortController();
|
|
42
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
43
|
+
|
|
44
|
+
const response = await fetch(url, {
|
|
45
|
+
method,
|
|
46
|
+
signal: controller.signal,
|
|
47
|
+
headers: {
|
|
48
|
+
'User-Agent': USER_AGENT,
|
|
49
|
+
'Accept': 'text/html,application/json,*/*',
|
|
50
|
+
},
|
|
51
|
+
redirect: 'follow',
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
clearTimeout(timer);
|
|
55
|
+
|
|
56
|
+
// Extract headers
|
|
57
|
+
const headers: Record<string, string> = {};
|
|
58
|
+
response.headers.forEach((value, key) => {
|
|
59
|
+
headers[key.toLowerCase()] = value;
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const contentType = headers['content-type'] || '';
|
|
63
|
+
|
|
64
|
+
// For HEAD requests, don't read body
|
|
65
|
+
if (method === 'HEAD') {
|
|
66
|
+
return { status: response.status, headers, body: '', contentType };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Read body with size limit
|
|
70
|
+
const body = await readBodyLimited(response, maxBody);
|
|
71
|
+
|
|
72
|
+
return { status: response.status, headers, body, contentType };
|
|
73
|
+
} catch {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async function readBodyLimited(response: Response, maxSize: number): Promise<string> {
|
|
79
|
+
// Use text() with a size check — for discovery we don't need huge bodies
|
|
80
|
+
const text = await response.text();
|
|
81
|
+
if (text.length > maxSize) {
|
|
82
|
+
return text.slice(0, maxSize);
|
|
83
|
+
}
|
|
84
|
+
return text;
|
|
85
|
+
}
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
// src/discovery/frameworks.ts
|
|
2
|
+
import type { DetectedFramework } from '../types.js';
|
|
3
|
+
|
|
4
|
+
export interface PageInfo {
|
|
5
|
+
html: string;
|
|
6
|
+
headers: Record<string, string>;
|
|
7
|
+
url: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface FrameworkDetector {
|
|
11
|
+
name: string;
|
|
12
|
+
detect(page: PageInfo): DetectedFramework | null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const detectors: FrameworkDetector[] = [
|
|
16
|
+
{
|
|
17
|
+
name: 'wordpress',
|
|
18
|
+
detect({ html, headers }) {
|
|
19
|
+
const signals: string[] = [];
|
|
20
|
+
if (html.includes('/wp-json/')) signals.push('wp-json link');
|
|
21
|
+
if (html.includes('/wp-content/')) signals.push('wp-content');
|
|
22
|
+
if (html.includes('/wp-includes/')) signals.push('wp-includes');
|
|
23
|
+
if (headers['link']?.includes('/wp-json/')) signals.push('Link header');
|
|
24
|
+
if (headers['x-powered-by']?.toLowerCase().includes('wordpress')) signals.push('X-Powered-By');
|
|
25
|
+
|
|
26
|
+
if (signals.length === 0) return null;
|
|
27
|
+
return {
|
|
28
|
+
name: 'WordPress',
|
|
29
|
+
confidence: signals.length >= 2 ? 'high' : 'medium',
|
|
30
|
+
apiPatterns: [
|
|
31
|
+
'/wp-json/wp/v2/posts',
|
|
32
|
+
'/wp-json/wp/v2/pages',
|
|
33
|
+
'/wp-json/wp/v2/categories',
|
|
34
|
+
'/wp-json/wp/v2/tags',
|
|
35
|
+
'/wp-json/wp/v2/media',
|
|
36
|
+
'/wp-json/wp/v2/users',
|
|
37
|
+
'/wp-json/wp/v2/comments',
|
|
38
|
+
'/wp-json/wp/v2/search',
|
|
39
|
+
],
|
|
40
|
+
};
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
name: 'shopify',
|
|
45
|
+
detect({ html, url }) {
|
|
46
|
+
const signals: string[] = [];
|
|
47
|
+
if (html.includes('cdn.shopify.com')) signals.push('Shopify CDN');
|
|
48
|
+
if (html.includes('Shopify.theme')) signals.push('Shopify.theme');
|
|
49
|
+
if (html.includes('myshopify.com')) signals.push('myshopify domain');
|
|
50
|
+
if (html.includes('shopify-section')) signals.push('shopify-section');
|
|
51
|
+
|
|
52
|
+
if (signals.length === 0) return null;
|
|
53
|
+
const origin = new URL(url).origin;
|
|
54
|
+
return {
|
|
55
|
+
name: 'Shopify',
|
|
56
|
+
confidence: signals.length >= 2 ? 'high' : 'medium',
|
|
57
|
+
apiPatterns: [
|
|
58
|
+
'/products.json',
|
|
59
|
+
'/collections.json',
|
|
60
|
+
'/cart.json',
|
|
61
|
+
'/search/suggest.json',
|
|
62
|
+
],
|
|
63
|
+
};
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: 'nextjs',
|
|
68
|
+
detect({ html, headers }) {
|
|
69
|
+
const signals: string[] = [];
|
|
70
|
+
if (html.includes('__NEXT_DATA__')) signals.push('__NEXT_DATA__');
|
|
71
|
+
if (html.includes('/_next/')) signals.push('_next assets');
|
|
72
|
+
if (headers['x-nextjs-cache']) signals.push('X-Nextjs-Cache');
|
|
73
|
+
if (headers['x-powered-by']?.toLowerCase().includes('next.js')) signals.push('X-Powered-By');
|
|
74
|
+
|
|
75
|
+
if (signals.length === 0) return null;
|
|
76
|
+
|
|
77
|
+
// Extract build ID from __NEXT_DATA__ if available
|
|
78
|
+
const buildIdMatch = html.match(/"buildId"\s*:\s*"([^"]+)"/);
|
|
79
|
+
const patterns: string[] = ['/api/'];
|
|
80
|
+
if (buildIdMatch) {
|
|
81
|
+
patterns.push(`/_next/data/${buildIdMatch[1]}/`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
name: 'Next.js',
|
|
86
|
+
confidence: signals.length >= 2 ? 'high' : 'medium',
|
|
87
|
+
apiPatterns: patterns,
|
|
88
|
+
};
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: 'nuxt',
|
|
93
|
+
detect({ html, headers }) {
|
|
94
|
+
const signals: string[] = [];
|
|
95
|
+
if (html.includes('__NUXT__')) signals.push('__NUXT__');
|
|
96
|
+
if (html.includes('/_nuxt/')) signals.push('_nuxt assets');
|
|
97
|
+
if (html.includes('_payload.json')) signals.push('_payload.json');
|
|
98
|
+
if (html.includes('nuxt-link')) signals.push('nuxt-link');
|
|
99
|
+
|
|
100
|
+
if (signals.length === 0) return null;
|
|
101
|
+
return {
|
|
102
|
+
name: 'Nuxt',
|
|
103
|
+
confidence: signals.length >= 2 ? 'high' : 'medium',
|
|
104
|
+
apiPatterns: ['/api/', '/_payload.json'],
|
|
105
|
+
};
|
|
106
|
+
},
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
name: 'graphql',
|
|
110
|
+
detect({ html }) {
|
|
111
|
+
const signals: string[] = [];
|
|
112
|
+
if (html.includes('/graphql')) signals.push('/graphql reference');
|
|
113
|
+
if (html.includes('__APOLLO_STATE__')) signals.push('Apollo state');
|
|
114
|
+
if (html.includes('apollo-client')) signals.push('apollo-client');
|
|
115
|
+
if (html.includes('relay-')) signals.push('Relay');
|
|
116
|
+
if (html.includes('urql')) signals.push('urql');
|
|
117
|
+
|
|
118
|
+
if (signals.length === 0) return null;
|
|
119
|
+
return {
|
|
120
|
+
name: 'GraphQL',
|
|
121
|
+
confidence: signals.length >= 2 ? 'high' : 'medium',
|
|
122
|
+
apiPatterns: ['/graphql', '/gql', '/api/graphql'],
|
|
123
|
+
};
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
name: 'drupal',
|
|
128
|
+
detect({ html, headers }) {
|
|
129
|
+
const signals: string[] = [];
|
|
130
|
+
if (headers['x-drupal-cache']) signals.push('X-Drupal-Cache');
|
|
131
|
+
if (headers['x-drupal-dynamic-cache']) signals.push('X-Drupal-Dynamic-Cache');
|
|
132
|
+
if (headers['x-generator']?.toLowerCase().includes('drupal')) signals.push('X-Generator');
|
|
133
|
+
if (html.includes('/jsonapi/')) signals.push('jsonapi');
|
|
134
|
+
if (html.includes('drupal-settings-json')) signals.push('drupal-settings');
|
|
135
|
+
|
|
136
|
+
if (signals.length === 0) return null;
|
|
137
|
+
return {
|
|
138
|
+
name: 'Drupal',
|
|
139
|
+
confidence: signals.length >= 2 ? 'high' : 'medium',
|
|
140
|
+
apiPatterns: [
|
|
141
|
+
'/jsonapi/node/article',
|
|
142
|
+
'/jsonapi/node/page',
|
|
143
|
+
'/jsonapi/taxonomy_term',
|
|
144
|
+
],
|
|
145
|
+
};
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
name: 'rails',
|
|
150
|
+
detect({ headers }) {
|
|
151
|
+
const signals: string[] = [];
|
|
152
|
+
if (headers['x-request-id'] && headers['x-runtime']) signals.push('Rails headers');
|
|
153
|
+
if (headers['x-powered-by']?.toLowerCase().includes('phusion')) signals.push('Phusion');
|
|
154
|
+
|
|
155
|
+
if (signals.length === 0) return null;
|
|
156
|
+
return {
|
|
157
|
+
name: 'Rails',
|
|
158
|
+
confidence: 'low',
|
|
159
|
+
apiPatterns: ['/api/v1/'],
|
|
160
|
+
};
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
name: 'django-rest',
|
|
165
|
+
detect({ headers, html }) {
|
|
166
|
+
const signals: string[] = [];
|
|
167
|
+
if (headers['x-frame-options'] && headers['vary']?.includes('Cookie')) signals.push('Django-like headers');
|
|
168
|
+
if (html.includes('csrfmiddlewaretoken')) signals.push('CSRF middleware');
|
|
169
|
+
if (html.includes('django')) signals.push('django reference');
|
|
170
|
+
|
|
171
|
+
if (signals.length === 0) return null;
|
|
172
|
+
return {
|
|
173
|
+
name: 'Django',
|
|
174
|
+
confidence: 'low',
|
|
175
|
+
apiPatterns: ['/api/', '/api/v1/', '/rest/'],
|
|
176
|
+
};
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
name: 'laravel',
|
|
181
|
+
detect({ html, headers }) {
|
|
182
|
+
const signals: string[] = [];
|
|
183
|
+
if (html.includes('csrf-token') && html.includes('laravel')) signals.push('Laravel meta');
|
|
184
|
+
if (headers['set-cookie']?.includes('laravel_session')) signals.push('laravel_session');
|
|
185
|
+
|
|
186
|
+
if (signals.length === 0) return null;
|
|
187
|
+
return {
|
|
188
|
+
name: 'Laravel',
|
|
189
|
+
confidence: 'medium',
|
|
190
|
+
apiPatterns: ['/api/', '/api/v1/'],
|
|
191
|
+
};
|
|
192
|
+
},
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
name: 'strapi',
|
|
196
|
+
detect({ headers }) {
|
|
197
|
+
const signals: string[] = [];
|
|
198
|
+
if (headers['x-powered-by']?.toLowerCase().includes('strapi')) signals.push('X-Powered-By');
|
|
199
|
+
|
|
200
|
+
if (signals.length === 0) return null;
|
|
201
|
+
return {
|
|
202
|
+
name: 'Strapi',
|
|
203
|
+
confidence: 'high',
|
|
204
|
+
apiPatterns: ['/api/', '/api/content-types', '/api/articles', '/api/pages'],
|
|
205
|
+
};
|
|
206
|
+
},
|
|
207
|
+
},
|
|
208
|
+
];
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Detect web frameworks from a page's HTML and response headers.
|
|
212
|
+
* Returns all detected frameworks, sorted by confidence (high first).
|
|
213
|
+
*/
|
|
214
|
+
export function detectFrameworks(page: PageInfo): DetectedFramework[] {
|
|
215
|
+
const results: DetectedFramework[] = [];
|
|
216
|
+
// Lowercase headers for consistent matching
|
|
217
|
+
const normalizedHeaders: Record<string, string> = {};
|
|
218
|
+
for (const [key, value] of Object.entries(page.headers)) {
|
|
219
|
+
normalizedHeaders[key.toLowerCase()] = value;
|
|
220
|
+
}
|
|
221
|
+
const normalizedPage = { ...page, headers: normalizedHeaders };
|
|
222
|
+
|
|
223
|
+
for (const detector of detectors) {
|
|
224
|
+
const result = detector.detect(normalizedPage);
|
|
225
|
+
if (result) results.push(result);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const order = { high: 0, medium: 1, low: 2 };
|
|
229
|
+
results.sort((a, b) => order[a.confidence] - order[b.confidence]);
|
|
230
|
+
return results;
|
|
231
|
+
}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
// src/discovery/index.ts
|
|
2
|
+
import type { DiscoveryResult, SkillFile, SkillEndpoint, DetectedFramework } from '../types.js';
|
|
3
|
+
import { validateUrl } from '../skill/ssrf.js';
|
|
4
|
+
import { safeFetch } from './fetch.js';
|
|
5
|
+
import { detectFrameworks } from './frameworks.js';
|
|
6
|
+
import { discoverSpecs, parseSpecToSkillFile } from './openapi.js';
|
|
7
|
+
import { probeApiPaths } from './probes.js';
|
|
8
|
+
import { detectAuthRequired } from './auth.js';
|
|
9
|
+
|
|
10
|
+
export interface DiscoveryOptions {
|
|
11
|
+
timeout?: number; // overall timeout in ms (default: 30000)
|
|
12
|
+
skipProbes?: boolean; // skip API path probing
|
|
13
|
+
skipSpecs?: boolean; // skip OpenAPI spec discovery
|
|
14
|
+
skipFrameworks?: boolean; // skip framework detection
|
|
15
|
+
skipSsrf?: boolean; // bypass SSRF check (for testing with local servers)
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Run smart discovery on a URL to detect APIs without launching a browser.
|
|
20
|
+
*
|
|
21
|
+
* Flow:
|
|
22
|
+
* 1. SSRF validation
|
|
23
|
+
* 2. Fetch homepage HTML + headers
|
|
24
|
+
* 3. Run detection strategies in parallel:
|
|
25
|
+
* - Framework detection (from HTML/headers)
|
|
26
|
+
* - OpenAPI spec discovery (probe common paths)
|
|
27
|
+
* - Common API pattern probing
|
|
28
|
+
* 4. Synthesize results into a DiscoveryResult
|
|
29
|
+
*/
|
|
30
|
+
export async function discover(
|
|
31
|
+
url: string,
|
|
32
|
+
options: DiscoveryOptions = {},
|
|
33
|
+
): Promise<DiscoveryResult> {
|
|
34
|
+
const start = Date.now();
|
|
35
|
+
const fullUrl = url.startsWith('http') ? url : `https://${url}`;
|
|
36
|
+
|
|
37
|
+
// SSRF check
|
|
38
|
+
if (!options.skipSsrf) {
|
|
39
|
+
const ssrfResult = validateUrl(fullUrl);
|
|
40
|
+
if (!ssrfResult.safe) {
|
|
41
|
+
return {
|
|
42
|
+
confidence: 'none',
|
|
43
|
+
hints: [`SSRF blocked: ${ssrfResult.reason}`],
|
|
44
|
+
duration: Date.now() - start,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
let domain: string;
|
|
50
|
+
let origin: string;
|
|
51
|
+
try {
|
|
52
|
+
const parsed = new URL(fullUrl);
|
|
53
|
+
domain = parsed.hostname;
|
|
54
|
+
origin = parsed.origin;
|
|
55
|
+
} catch {
|
|
56
|
+
return {
|
|
57
|
+
confidence: 'none',
|
|
58
|
+
hints: ['Invalid URL'],
|
|
59
|
+
duration: Date.now() - start,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Fetch homepage
|
|
64
|
+
const homepage = await safeFetch(fullUrl, { timeout: options.timeout ?? 10000, skipSsrf: options.skipSsrf });
|
|
65
|
+
if (!homepage) {
|
|
66
|
+
return {
|
|
67
|
+
confidence: 'none',
|
|
68
|
+
hints: ['Failed to fetch homepage — site may be down or blocking requests'],
|
|
69
|
+
duration: Date.now() - start,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const ssrfOpts = { skipSsrf: options.skipSsrf };
|
|
74
|
+
|
|
75
|
+
// Auth detection (runs on homepage HTML + headers)
|
|
76
|
+
const authResult = detectAuthRequired(homepage.body, fullUrl, homepage.headers);
|
|
77
|
+
const authFields = authResult.authRequired ? {
|
|
78
|
+
authRequired: true as const,
|
|
79
|
+
authSignals: authResult.signals,
|
|
80
|
+
...(authResult.loginUrl ? { loginUrl: authResult.loginUrl } : {}),
|
|
81
|
+
} : {};
|
|
82
|
+
|
|
83
|
+
// Run all detection strategies in parallel
|
|
84
|
+
const [frameworks, specs, probes] = await Promise.all([
|
|
85
|
+
options.skipFrameworks
|
|
86
|
+
? []
|
|
87
|
+
: detectFrameworks({ html: homepage.body, headers: homepage.headers, url: fullUrl }),
|
|
88
|
+
options.skipSpecs
|
|
89
|
+
? []
|
|
90
|
+
: discoverSpecs(origin, homepage.headers, ssrfOpts),
|
|
91
|
+
options.skipProbes
|
|
92
|
+
? []
|
|
93
|
+
: probeApiPaths(origin, ssrfOpts),
|
|
94
|
+
]);
|
|
95
|
+
|
|
96
|
+
const hints: string[] = [];
|
|
97
|
+
|
|
98
|
+
// Strategy 1: OpenAPI spec found → parse into skill file (highest confidence)
|
|
99
|
+
if (specs.length > 0) {
|
|
100
|
+
const bestSpec = specs[0];
|
|
101
|
+
const skillFile = await parseSpecToSkillFile(bestSpec.url, domain, origin, ssrfOpts);
|
|
102
|
+
if (skillFile && skillFile.endpoints.length > 0) {
|
|
103
|
+
hints.push(`OpenAPI spec found at ${bestSpec.url} (${bestSpec.version})`);
|
|
104
|
+
if (frameworks.length > 0) hints.push(`Framework: ${frameworks.map(f => f.name).join(', ')}`);
|
|
105
|
+
addProbeHints(hints, probes);
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
confidence: 'high',
|
|
109
|
+
skillFile,
|
|
110
|
+
hints,
|
|
111
|
+
frameworks: frameworks.length > 0 ? frameworks : undefined,
|
|
112
|
+
specs,
|
|
113
|
+
probes: probes.length > 0 ? probes : undefined,
|
|
114
|
+
duration: Date.now() - start,
|
|
115
|
+
...authFields,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Strategy 2: Framework detected → generate skeleton skill file
|
|
121
|
+
const highConfidence = frameworks.filter(f => f.confidence === 'high');
|
|
122
|
+
if (highConfidence.length > 0) {
|
|
123
|
+
const skillFile = buildFrameworkSkillFile(domain, origin, highConfidence);
|
|
124
|
+
hints.push(`Detected: ${highConfidence.map(f => f.name).join(', ')}`);
|
|
125
|
+
addProbeHints(hints, probes);
|
|
126
|
+
if (specs.length > 0) hints.push(`Spec found but could not parse: ${specs.map(s => s.url).join(', ')}`);
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
confidence: 'medium',
|
|
130
|
+
skillFile,
|
|
131
|
+
hints,
|
|
132
|
+
frameworks,
|
|
133
|
+
specs: specs.length > 0 ? specs : undefined,
|
|
134
|
+
probes: probes.length > 0 ? probes : undefined,
|
|
135
|
+
duration: Date.now() - start,
|
|
136
|
+
...authFields,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Strategy 3: Medium-confidence framework or API probes found → hints only
|
|
141
|
+
const apiProbes = probes.filter(p => p.isApi);
|
|
142
|
+
const mediumFrameworks = frameworks.filter(f => f.confidence === 'medium');
|
|
143
|
+
|
|
144
|
+
if (mediumFrameworks.length > 0 || apiProbes.length > 0) {
|
|
145
|
+
if (mediumFrameworks.length > 0) {
|
|
146
|
+
const skillFile = buildFrameworkSkillFile(domain, origin, mediumFrameworks);
|
|
147
|
+
hints.push(`Possibly: ${mediumFrameworks.map(f => f.name).join(', ')}`);
|
|
148
|
+
addProbeHints(hints, probes);
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
confidence: 'low',
|
|
152
|
+
skillFile,
|
|
153
|
+
hints,
|
|
154
|
+
frameworks,
|
|
155
|
+
probes: probes.length > 0 ? probes : undefined,
|
|
156
|
+
duration: Date.now() - start,
|
|
157
|
+
...authFields,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Only probes found
|
|
162
|
+
hints.push('API paths detected via probing');
|
|
163
|
+
addProbeHints(hints, probes);
|
|
164
|
+
|
|
165
|
+
return {
|
|
166
|
+
confidence: 'low',
|
|
167
|
+
hints,
|
|
168
|
+
frameworks: frameworks.length > 0 ? frameworks : undefined,
|
|
169
|
+
probes,
|
|
170
|
+
duration: Date.now() - start,
|
|
171
|
+
...authFields,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Nothing found
|
|
176
|
+
if (frameworks.length > 0) {
|
|
177
|
+
hints.push(`Low-confidence signals: ${frameworks.map(f => f.name).join(', ')}`);
|
|
178
|
+
}
|
|
179
|
+
hints.push('No API patterns detected — auto-capture recommended');
|
|
180
|
+
|
|
181
|
+
return {
|
|
182
|
+
confidence: 'none',
|
|
183
|
+
hints,
|
|
184
|
+
frameworks: frameworks.length > 0 ? frameworks : undefined,
|
|
185
|
+
probes: probes.length > 0 ? probes : undefined,
|
|
186
|
+
duration: Date.now() - start,
|
|
187
|
+
...authFields,
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function addProbeHints(hints: string[], probes: import('../types.js').ProbeResult[]): void {
|
|
192
|
+
const apiProbes = probes.filter(p => p.isApi);
|
|
193
|
+
if (apiProbes.length > 0) {
|
|
194
|
+
hints.push(`API paths found: ${apiProbes.map(p => `${p.path} (${p.status})`).join(', ')}`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Build a skeleton skill file from detected frameworks.
|
|
200
|
+
* Endpoints are unverified predictions — replayability is 'unknown'.
|
|
201
|
+
*/
|
|
202
|
+
function buildFrameworkSkillFile(
|
|
203
|
+
domain: string,
|
|
204
|
+
baseUrl: string,
|
|
205
|
+
frameworks: DetectedFramework[],
|
|
206
|
+
): SkillFile {
|
|
207
|
+
const endpoints: SkillEndpoint[] = [];
|
|
208
|
+
const seen = new Set<string>();
|
|
209
|
+
|
|
210
|
+
for (const framework of frameworks) {
|
|
211
|
+
for (const pattern of framework.apiPatterns) {
|
|
212
|
+
const key = `GET ${pattern}`;
|
|
213
|
+
if (seen.has(key)) continue;
|
|
214
|
+
seen.add(key);
|
|
215
|
+
|
|
216
|
+
const id = generateId('GET', pattern);
|
|
217
|
+
endpoints.push({
|
|
218
|
+
id,
|
|
219
|
+
method: 'GET',
|
|
220
|
+
path: pattern,
|
|
221
|
+
queryParams: {},
|
|
222
|
+
headers: {},
|
|
223
|
+
responseShape: { type: 'unknown' },
|
|
224
|
+
examples: {
|
|
225
|
+
request: { url: `${baseUrl}${pattern}`, headers: {} },
|
|
226
|
+
responsePreview: null,
|
|
227
|
+
},
|
|
228
|
+
replayability: {
|
|
229
|
+
tier: 'unknown',
|
|
230
|
+
verified: false,
|
|
231
|
+
signals: [`discovered-from-${framework.name.toLowerCase()}`],
|
|
232
|
+
},
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
version: '1.2',
|
|
239
|
+
domain,
|
|
240
|
+
capturedAt: new Date().toISOString(),
|
|
241
|
+
baseUrl,
|
|
242
|
+
endpoints,
|
|
243
|
+
metadata: {
|
|
244
|
+
captureCount: 0,
|
|
245
|
+
filteredCount: 0,
|
|
246
|
+
toolVersion: '1.0.0',
|
|
247
|
+
},
|
|
248
|
+
provenance: 'unsigned',
|
|
249
|
+
};
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function generateId(method: string, path: string): string {
|
|
253
|
+
const segments = path.split('/').filter(s => s !== '' && !s.startsWith(':'));
|
|
254
|
+
const slug = segments.join('-').replace(/[^a-z0-9-]/gi, '').toLowerCase() || 'root';
|
|
255
|
+
return `${method.toLowerCase()}-${slug}`;
|
|
256
|
+
}
|