@apitap/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/LICENSE +60 -0
  2. package/README.md +362 -0
  3. package/SKILL.md +270 -0
  4. package/dist/auth/crypto.d.ts +31 -0
  5. package/dist/auth/crypto.js +66 -0
  6. package/dist/auth/crypto.js.map +1 -0
  7. package/dist/auth/handoff.d.ts +29 -0
  8. package/dist/auth/handoff.js +180 -0
  9. package/dist/auth/handoff.js.map +1 -0
  10. package/dist/auth/manager.d.ts +46 -0
  11. package/dist/auth/manager.js +127 -0
  12. package/dist/auth/manager.js.map +1 -0
  13. package/dist/auth/oauth-refresh.d.ts +16 -0
  14. package/dist/auth/oauth-refresh.js +91 -0
  15. package/dist/auth/oauth-refresh.js.map +1 -0
  16. package/dist/auth/refresh.d.ts +43 -0
  17. package/dist/auth/refresh.js +217 -0
  18. package/dist/auth/refresh.js.map +1 -0
  19. package/dist/capture/anti-bot.d.ts +15 -0
  20. package/dist/capture/anti-bot.js +43 -0
  21. package/dist/capture/anti-bot.js.map +1 -0
  22. package/dist/capture/blocklist.d.ts +6 -0
  23. package/dist/capture/blocklist.js +70 -0
  24. package/dist/capture/blocklist.js.map +1 -0
  25. package/dist/capture/body-diff.d.ts +8 -0
  26. package/dist/capture/body-diff.js +102 -0
  27. package/dist/capture/body-diff.js.map +1 -0
  28. package/dist/capture/body-variables.d.ts +13 -0
  29. package/dist/capture/body-variables.js +142 -0
  30. package/dist/capture/body-variables.js.map +1 -0
  31. package/dist/capture/domain.d.ts +8 -0
  32. package/dist/capture/domain.js +34 -0
  33. package/dist/capture/domain.js.map +1 -0
  34. package/dist/capture/entropy.d.ts +33 -0
  35. package/dist/capture/entropy.js +100 -0
  36. package/dist/capture/entropy.js.map +1 -0
  37. package/dist/capture/filter.d.ts +11 -0
  38. package/dist/capture/filter.js +49 -0
  39. package/dist/capture/filter.js.map +1 -0
  40. package/dist/capture/graphql.d.ts +21 -0
  41. package/dist/capture/graphql.js +99 -0
  42. package/dist/capture/graphql.js.map +1 -0
  43. package/dist/capture/idle.d.ts +23 -0
  44. package/dist/capture/idle.js +44 -0
  45. package/dist/capture/idle.js.map +1 -0
  46. package/dist/capture/monitor.d.ts +26 -0
  47. package/dist/capture/monitor.js +183 -0
  48. package/dist/capture/monitor.js.map +1 -0
  49. package/dist/capture/oauth-detector.d.ts +18 -0
  50. package/dist/capture/oauth-detector.js +96 -0
  51. package/dist/capture/oauth-detector.js.map +1 -0
  52. package/dist/capture/pagination.d.ts +9 -0
  53. package/dist/capture/pagination.js +40 -0
  54. package/dist/capture/pagination.js.map +1 -0
  55. package/dist/capture/parameterize.d.ts +17 -0
  56. package/dist/capture/parameterize.js +63 -0
  57. package/dist/capture/parameterize.js.map +1 -0
  58. package/dist/capture/scrubber.d.ts +5 -0
  59. package/dist/capture/scrubber.js +38 -0
  60. package/dist/capture/scrubber.js.map +1 -0
  61. package/dist/capture/session.d.ts +46 -0
  62. package/dist/capture/session.js +445 -0
  63. package/dist/capture/session.js.map +1 -0
  64. package/dist/capture/token-detector.d.ts +16 -0
  65. package/dist/capture/token-detector.js +62 -0
  66. package/dist/capture/token-detector.js.map +1 -0
  67. package/dist/capture/verifier.d.ts +17 -0
  68. package/dist/capture/verifier.js +147 -0
  69. package/dist/capture/verifier.js.map +1 -0
  70. package/dist/cli.d.ts +2 -0
  71. package/dist/cli.js +930 -0
  72. package/dist/cli.js.map +1 -0
  73. package/dist/discovery/auth.d.ts +17 -0
  74. package/dist/discovery/auth.js +81 -0
  75. package/dist/discovery/auth.js.map +1 -0
  76. package/dist/discovery/fetch.d.ts +17 -0
  77. package/dist/discovery/fetch.js +59 -0
  78. package/dist/discovery/fetch.js.map +1 -0
  79. package/dist/discovery/frameworks.d.ts +11 -0
  80. package/dist/discovery/frameworks.js +249 -0
  81. package/dist/discovery/frameworks.js.map +1 -0
  82. package/dist/discovery/index.d.ts +21 -0
  83. package/dist/discovery/index.js +219 -0
  84. package/dist/discovery/index.js.map +1 -0
  85. package/dist/discovery/openapi.d.ts +13 -0
  86. package/dist/discovery/openapi.js +175 -0
  87. package/dist/discovery/openapi.js.map +1 -0
  88. package/dist/discovery/probes.d.ts +9 -0
  89. package/dist/discovery/probes.js +70 -0
  90. package/dist/discovery/probes.js.map +1 -0
  91. package/dist/index.d.ts +25 -0
  92. package/dist/index.js +25 -0
  93. package/dist/index.js.map +1 -0
  94. package/dist/inspect/report.d.ts +52 -0
  95. package/dist/inspect/report.js +191 -0
  96. package/dist/inspect/report.js.map +1 -0
  97. package/dist/mcp.d.ts +8 -0
  98. package/dist/mcp.js +526 -0
  99. package/dist/mcp.js.map +1 -0
  100. package/dist/orchestration/browse.d.ts +38 -0
  101. package/dist/orchestration/browse.js +198 -0
  102. package/dist/orchestration/browse.js.map +1 -0
  103. package/dist/orchestration/cache.d.ts +15 -0
  104. package/dist/orchestration/cache.js +24 -0
  105. package/dist/orchestration/cache.js.map +1 -0
  106. package/dist/plugin.d.ts +17 -0
  107. package/dist/plugin.js +158 -0
  108. package/dist/plugin.js.map +1 -0
  109. package/dist/read/decoders/deepwiki.d.ts +2 -0
  110. package/dist/read/decoders/deepwiki.js +148 -0
  111. package/dist/read/decoders/deepwiki.js.map +1 -0
  112. package/dist/read/decoders/grokipedia.d.ts +2 -0
  113. package/dist/read/decoders/grokipedia.js +210 -0
  114. package/dist/read/decoders/grokipedia.js.map +1 -0
  115. package/dist/read/decoders/hackernews.d.ts +2 -0
  116. package/dist/read/decoders/hackernews.js +168 -0
  117. package/dist/read/decoders/hackernews.js.map +1 -0
  118. package/dist/read/decoders/index.d.ts +2 -0
  119. package/dist/read/decoders/index.js +12 -0
  120. package/dist/read/decoders/index.js.map +1 -0
  121. package/dist/read/decoders/reddit.d.ts +2 -0
  122. package/dist/read/decoders/reddit.js +142 -0
  123. package/dist/read/decoders/reddit.js.map +1 -0
  124. package/dist/read/decoders/twitter.d.ts +12 -0
  125. package/dist/read/decoders/twitter.js +187 -0
  126. package/dist/read/decoders/twitter.js.map +1 -0
  127. package/dist/read/decoders/wikipedia.d.ts +2 -0
  128. package/dist/read/decoders/wikipedia.js +66 -0
  129. package/dist/read/decoders/wikipedia.js.map +1 -0
  130. package/dist/read/decoders/youtube.d.ts +2 -0
  131. package/dist/read/decoders/youtube.js +69 -0
  132. package/dist/read/decoders/youtube.js.map +1 -0
  133. package/dist/read/extract.d.ts +25 -0
  134. package/dist/read/extract.js +320 -0
  135. package/dist/read/extract.js.map +1 -0
  136. package/dist/read/index.d.ts +14 -0
  137. package/dist/read/index.js +66 -0
  138. package/dist/read/index.js.map +1 -0
  139. package/dist/read/peek.d.ts +9 -0
  140. package/dist/read/peek.js +137 -0
  141. package/dist/read/peek.js.map +1 -0
  142. package/dist/read/types.d.ts +44 -0
  143. package/dist/read/types.js +3 -0
  144. package/dist/read/types.js.map +1 -0
  145. package/dist/replay/engine.d.ts +53 -0
  146. package/dist/replay/engine.js +441 -0
  147. package/dist/replay/engine.js.map +1 -0
  148. package/dist/replay/truncate.d.ts +16 -0
  149. package/dist/replay/truncate.js +92 -0
  150. package/dist/replay/truncate.js.map +1 -0
  151. package/dist/serve.d.ts +31 -0
  152. package/dist/serve.js +149 -0
  153. package/dist/serve.js.map +1 -0
  154. package/dist/skill/generator.d.ts +44 -0
  155. package/dist/skill/generator.js +419 -0
  156. package/dist/skill/generator.js.map +1 -0
  157. package/dist/skill/importer.d.ts +26 -0
  158. package/dist/skill/importer.js +80 -0
  159. package/dist/skill/importer.js.map +1 -0
  160. package/dist/skill/search.d.ts +19 -0
  161. package/dist/skill/search.js +51 -0
  162. package/dist/skill/search.js.map +1 -0
  163. package/dist/skill/signing.d.ts +16 -0
  164. package/dist/skill/signing.js +34 -0
  165. package/dist/skill/signing.js.map +1 -0
  166. package/dist/skill/ssrf.d.ts +27 -0
  167. package/dist/skill/ssrf.js +210 -0
  168. package/dist/skill/ssrf.js.map +1 -0
  169. package/dist/skill/store.d.ts +7 -0
  170. package/dist/skill/store.js +93 -0
  171. package/dist/skill/store.js.map +1 -0
  172. package/dist/stats/report.d.ts +26 -0
  173. package/dist/stats/report.js +157 -0
  174. package/dist/stats/report.js.map +1 -0
  175. package/dist/types.d.ts +214 -0
  176. package/dist/types.js +3 -0
  177. package/dist/types.js.map +1 -0
  178. package/package.json +58 -0
  179. package/src/auth/crypto.ts +92 -0
  180. package/src/auth/handoff.ts +229 -0
  181. package/src/auth/manager.ts +140 -0
  182. package/src/auth/oauth-refresh.ts +120 -0
  183. package/src/auth/refresh.ts +300 -0
  184. package/src/capture/anti-bot.ts +63 -0
  185. package/src/capture/blocklist.ts +75 -0
  186. package/src/capture/body-diff.ts +109 -0
  187. package/src/capture/body-variables.ts +156 -0
  188. package/src/capture/domain.ts +34 -0
  189. package/src/capture/entropy.ts +121 -0
  190. package/src/capture/filter.ts +56 -0
  191. package/src/capture/graphql.ts +124 -0
  192. package/src/capture/idle.ts +45 -0
  193. package/src/capture/monitor.ts +224 -0
  194. package/src/capture/oauth-detector.ts +106 -0
  195. package/src/capture/pagination.ts +49 -0
  196. package/src/capture/parameterize.ts +68 -0
  197. package/src/capture/scrubber.ts +49 -0
  198. package/src/capture/session.ts +502 -0
  199. package/src/capture/token-detector.ts +76 -0
  200. package/src/capture/verifier.ts +171 -0
  201. package/src/cli.ts +1031 -0
  202. package/src/discovery/auth.ts +99 -0
  203. package/src/discovery/fetch.ts +85 -0
  204. package/src/discovery/frameworks.ts +231 -0
  205. package/src/discovery/index.ts +256 -0
  206. package/src/discovery/openapi.ts +230 -0
  207. package/src/discovery/probes.ts +76 -0
  208. package/src/index.ts +26 -0
  209. package/src/inspect/report.ts +247 -0
  210. package/src/mcp.ts +618 -0
  211. package/src/orchestration/browse.ts +250 -0
  212. package/src/orchestration/cache.ts +37 -0
  213. package/src/plugin.ts +188 -0
  214. package/src/read/decoders/deepwiki.ts +180 -0
  215. package/src/read/decoders/grokipedia.ts +246 -0
  216. package/src/read/decoders/hackernews.ts +198 -0
  217. package/src/read/decoders/index.ts +15 -0
  218. package/src/read/decoders/reddit.ts +158 -0
  219. package/src/read/decoders/twitter.ts +211 -0
  220. package/src/read/decoders/wikipedia.ts +75 -0
  221. package/src/read/decoders/youtube.ts +75 -0
  222. package/src/read/extract.ts +396 -0
  223. package/src/read/index.ts +78 -0
  224. package/src/read/peek.ts +175 -0
  225. package/src/read/types.ts +37 -0
  226. package/src/replay/engine.ts +559 -0
  227. package/src/replay/truncate.ts +116 -0
  228. package/src/serve.ts +189 -0
  229. package/src/skill/generator.ts +473 -0
  230. package/src/skill/importer.ts +107 -0
  231. package/src/skill/search.ts +76 -0
  232. package/src/skill/signing.ts +36 -0
  233. package/src/skill/ssrf.ts +238 -0
  234. package/src/skill/store.ts +107 -0
  235. package/src/stats/report.ts +208 -0
  236. package/src/types.ts +233 -0
@@ -0,0 +1,99 @@
1
+ // src/discovery/auth.ts
2
+
3
+ export interface AuthDetectionResult {
4
+ authRequired: boolean;
5
+ signals: string[];
6
+ loginUrl?: string; // detected login page URL if found
7
+ }
8
+
9
+ // Paths that indicate auth/login
10
+ const AUTH_PATH_PATTERNS = [
11
+ /\/login/i, /\/signin/i, /\/sign-in/i, /\/auth\//i,
12
+ /\/sso/i, /\/saml/i, /\/oauth/i, /\/cas\/login/i,
13
+ ];
14
+
15
+ // OAuth provider patterns in URLs
16
+ const OAUTH_PATTERNS = [
17
+ /accounts\.google\.com\/o\/oauth/i,
18
+ /github\.com\/login\/oauth/i,
19
+ /login\.microsoftonline\.com/i,
20
+ /facebook\.com\/v\d+.*\/dialog\/oauth/i,
21
+ /appleid\.apple\.com\/auth/i,
22
+ ];
23
+
24
+ /**
25
+ * Scan fetched HTML and response headers for indicators that a site requires authentication.
26
+ *
27
+ * Checks for:
28
+ * - Login forms (password inputs)
29
+ * - Meta redirects to auth paths
30
+ * - OAuth login links
31
+ * - WWW-Authenticate response header
32
+ * - Location header redirecting to login
33
+ * - SAML/SSO form patterns
34
+ */
35
+ export function detectAuthRequired(
36
+ html: string,
37
+ url: string,
38
+ headers: Record<string, string>,
39
+ ): AuthDetectionResult {
40
+ const signals: string[] = [];
41
+ let loginUrl: string | undefined;
42
+
43
+ // 1. WWW-Authenticate header
44
+ if (headers['www-authenticate']) {
45
+ signals.push(`WWW-Authenticate header: ${headers['www-authenticate']}`);
46
+ }
47
+
48
+ // 2. Location header redirecting to auth path
49
+ const location = headers['location'];
50
+ if (location) {
51
+ if (AUTH_PATH_PATTERNS.some(p => p.test(location))) {
52
+ signals.push(`Location redirect to auth path: ${location}`);
53
+ loginUrl = loginUrl ?? location;
54
+ }
55
+ }
56
+
57
+ // 3. Login form with password input
58
+ const hasPasswordInput = /<input[^>]*type\s*=\s*["']password["'][^>]*>/i.test(html);
59
+ const hasFormAction = /<form[^>]*action\s*=\s*["'][^"']*(?:login|signin|sign-in|auth)[^"']*["'][^>]*>/i.test(html);
60
+ if (hasPasswordInput && hasFormAction) {
61
+ signals.push('Detected login form with password input');
62
+ // Try to extract login URL from form action
63
+ const formMatch = html.match(/<form[^>]*action\s*=\s*["']([^"']*(?:login|signin|sign-in|auth)[^"']*)["']/i);
64
+ if (formMatch) {
65
+ loginUrl = loginUrl ?? formMatch[1];
66
+ }
67
+ } else if (hasPasswordInput) {
68
+ signals.push('Detected login form with password input');
69
+ }
70
+
71
+ // 4. Meta redirect to auth path
72
+ const metaRefresh = html.match(/<meta[^>]*http-equiv\s*=\s*["']refresh["'][^>]*content\s*=\s*["'][^"']*url\s*=\s*([^"'\s>]+)/i);
73
+ if (metaRefresh) {
74
+ const redirectUrl = metaRefresh[1];
75
+ if (AUTH_PATH_PATTERNS.some(p => p.test(redirectUrl))) {
76
+ signals.push(`Meta redirect to auth path: ${redirectUrl}`);
77
+ loginUrl = loginUrl ?? redirectUrl;
78
+ }
79
+ }
80
+
81
+ // 5. OAuth provider links
82
+ const oauthMatch = OAUTH_PATTERNS.find(p => p.test(html));
83
+ if (oauthMatch) {
84
+ signals.push('OAuth provider login link detected');
85
+ }
86
+
87
+ // 6. SAML/SSO form
88
+ const hasSaml = /SAMLRequest/i.test(html) || /saml/i.test(html);
89
+ const hasSsoForm = /<form[^>]*action\s*=\s*["'][^"']*(?:sso|saml)[^"']*["'][^>]*>/i.test(html);
90
+ if (hasSaml && hasSsoForm) {
91
+ signals.push('SSO/SAML authentication form detected');
92
+ }
93
+
94
+ return {
95
+ authRequired: signals.length > 0,
96
+ signals,
97
+ loginUrl,
98
+ };
99
+ }
@@ -0,0 +1,85 @@
1
+ // src/discovery/fetch.ts
2
+ import { validateUrl } from '../skill/ssrf.js';
3
+
4
+ export interface FetchResult {
5
+ status: number;
6
+ headers: Record<string, string>;
7
+ body: string;
8
+ contentType: string;
9
+ }
10
+
11
+ export interface SafeFetchOptions {
12
+ timeout?: number;
13
+ method?: 'GET' | 'HEAD';
14
+ maxBodySize?: number;
15
+ skipSsrf?: boolean; // bypass SSRF check (for testing with local servers)
16
+ }
17
+
18
+ const DEFAULT_TIMEOUT = 5000;
19
+ const DEFAULT_MAX_BODY = 512 * 1024; // 512KB
20
+ const USER_AGENT = 'ApiTap-Discovery/1.0';
21
+
22
+ /**
23
+ * Fetch a URL with SSRF protection, timeout, and size limits.
24
+ * Returns null on any failure (network error, SSRF blocked, timeout).
25
+ */
26
+ export async function safeFetch(
27
+ url: string,
28
+ options: SafeFetchOptions = {},
29
+ ): Promise<FetchResult | null> {
30
+ // SSRF check
31
+ if (!options.skipSsrf) {
32
+ const ssrfResult = validateUrl(url);
33
+ if (!ssrfResult.safe) return null;
34
+ }
35
+
36
+ const timeout = options.timeout ?? DEFAULT_TIMEOUT;
37
+ const method = options.method ?? 'GET';
38
+ const maxBody = options.maxBodySize ?? DEFAULT_MAX_BODY;
39
+
40
+ try {
41
+ const controller = new AbortController();
42
+ const timer = setTimeout(() => controller.abort(), timeout);
43
+
44
+ const response = await fetch(url, {
45
+ method,
46
+ signal: controller.signal,
47
+ headers: {
48
+ 'User-Agent': USER_AGENT,
49
+ 'Accept': 'text/html,application/json,*/*',
50
+ },
51
+ redirect: 'follow',
52
+ });
53
+
54
+ clearTimeout(timer);
55
+
56
+ // Extract headers
57
+ const headers: Record<string, string> = {};
58
+ response.headers.forEach((value, key) => {
59
+ headers[key.toLowerCase()] = value;
60
+ });
61
+
62
+ const contentType = headers['content-type'] || '';
63
+
64
+ // For HEAD requests, don't read body
65
+ if (method === 'HEAD') {
66
+ return { status: response.status, headers, body: '', contentType };
67
+ }
68
+
69
+ // Read body with size limit
70
+ const body = await readBodyLimited(response, maxBody);
71
+
72
+ return { status: response.status, headers, body, contentType };
73
+ } catch {
74
+ return null;
75
+ }
76
+ }
77
+
78
+ async function readBodyLimited(response: Response, maxSize: number): Promise<string> {
79
+ // Use text() with a size check — for discovery we don't need huge bodies
80
+ const text = await response.text();
81
+ if (text.length > maxSize) {
82
+ return text.slice(0, maxSize);
83
+ }
84
+ return text;
85
+ }
@@ -0,0 +1,231 @@
1
+ // src/discovery/frameworks.ts
2
+ import type { DetectedFramework } from '../types.js';
3
+
4
+ export interface PageInfo {
5
+ html: string;
6
+ headers: Record<string, string>;
7
+ url: string;
8
+ }
9
+
10
+ interface FrameworkDetector {
11
+ name: string;
12
+ detect(page: PageInfo): DetectedFramework | null;
13
+ }
14
+
15
+ const detectors: FrameworkDetector[] = [
16
+ {
17
+ name: 'wordpress',
18
+ detect({ html, headers }) {
19
+ const signals: string[] = [];
20
+ if (html.includes('/wp-json/')) signals.push('wp-json link');
21
+ if (html.includes('/wp-content/')) signals.push('wp-content');
22
+ if (html.includes('/wp-includes/')) signals.push('wp-includes');
23
+ if (headers['link']?.includes('/wp-json/')) signals.push('Link header');
24
+ if (headers['x-powered-by']?.toLowerCase().includes('wordpress')) signals.push('X-Powered-By');
25
+
26
+ if (signals.length === 0) return null;
27
+ return {
28
+ name: 'WordPress',
29
+ confidence: signals.length >= 2 ? 'high' : 'medium',
30
+ apiPatterns: [
31
+ '/wp-json/wp/v2/posts',
32
+ '/wp-json/wp/v2/pages',
33
+ '/wp-json/wp/v2/categories',
34
+ '/wp-json/wp/v2/tags',
35
+ '/wp-json/wp/v2/media',
36
+ '/wp-json/wp/v2/users',
37
+ '/wp-json/wp/v2/comments',
38
+ '/wp-json/wp/v2/search',
39
+ ],
40
+ };
41
+ },
42
+ },
43
+ {
44
+ name: 'shopify',
45
+ detect({ html, url }) {
46
+ const signals: string[] = [];
47
+ if (html.includes('cdn.shopify.com')) signals.push('Shopify CDN');
48
+ if (html.includes('Shopify.theme')) signals.push('Shopify.theme');
49
+ if (html.includes('myshopify.com')) signals.push('myshopify domain');
50
+ if (html.includes('shopify-section')) signals.push('shopify-section');
51
+
52
+ if (signals.length === 0) return null;
53
+ const origin = new URL(url).origin;
54
+ return {
55
+ name: 'Shopify',
56
+ confidence: signals.length >= 2 ? 'high' : 'medium',
57
+ apiPatterns: [
58
+ '/products.json',
59
+ '/collections.json',
60
+ '/cart.json',
61
+ '/search/suggest.json',
62
+ ],
63
+ };
64
+ },
65
+ },
66
+ {
67
+ name: 'nextjs',
68
+ detect({ html, headers }) {
69
+ const signals: string[] = [];
70
+ if (html.includes('__NEXT_DATA__')) signals.push('__NEXT_DATA__');
71
+ if (html.includes('/_next/')) signals.push('_next assets');
72
+ if (headers['x-nextjs-cache']) signals.push('X-Nextjs-Cache');
73
+ if (headers['x-powered-by']?.toLowerCase().includes('next.js')) signals.push('X-Powered-By');
74
+
75
+ if (signals.length === 0) return null;
76
+
77
+ // Extract build ID from __NEXT_DATA__ if available
78
+ const buildIdMatch = html.match(/"buildId"\s*:\s*"([^"]+)"/);
79
+ const patterns: string[] = ['/api/'];
80
+ if (buildIdMatch) {
81
+ patterns.push(`/_next/data/${buildIdMatch[1]}/`);
82
+ }
83
+
84
+ return {
85
+ name: 'Next.js',
86
+ confidence: signals.length >= 2 ? 'high' : 'medium',
87
+ apiPatterns: patterns,
88
+ };
89
+ },
90
+ },
91
+ {
92
+ name: 'nuxt',
93
+ detect({ html, headers }) {
94
+ const signals: string[] = [];
95
+ if (html.includes('__NUXT__')) signals.push('__NUXT__');
96
+ if (html.includes('/_nuxt/')) signals.push('_nuxt assets');
97
+ if (html.includes('_payload.json')) signals.push('_payload.json');
98
+ if (html.includes('nuxt-link')) signals.push('nuxt-link');
99
+
100
+ if (signals.length === 0) return null;
101
+ return {
102
+ name: 'Nuxt',
103
+ confidence: signals.length >= 2 ? 'high' : 'medium',
104
+ apiPatterns: ['/api/', '/_payload.json'],
105
+ };
106
+ },
107
+ },
108
+ {
109
+ name: 'graphql',
110
+ detect({ html }) {
111
+ const signals: string[] = [];
112
+ if (html.includes('/graphql')) signals.push('/graphql reference');
113
+ if (html.includes('__APOLLO_STATE__')) signals.push('Apollo state');
114
+ if (html.includes('apollo-client')) signals.push('apollo-client');
115
+ if (html.includes('relay-')) signals.push('Relay');
116
+ if (html.includes('urql')) signals.push('urql');
117
+
118
+ if (signals.length === 0) return null;
119
+ return {
120
+ name: 'GraphQL',
121
+ confidence: signals.length >= 2 ? 'high' : 'medium',
122
+ apiPatterns: ['/graphql', '/gql', '/api/graphql'],
123
+ };
124
+ },
125
+ },
126
+ {
127
+ name: 'drupal',
128
+ detect({ html, headers }) {
129
+ const signals: string[] = [];
130
+ if (headers['x-drupal-cache']) signals.push('X-Drupal-Cache');
131
+ if (headers['x-drupal-dynamic-cache']) signals.push('X-Drupal-Dynamic-Cache');
132
+ if (headers['x-generator']?.toLowerCase().includes('drupal')) signals.push('X-Generator');
133
+ if (html.includes('/jsonapi/')) signals.push('jsonapi');
134
+ if (html.includes('drupal-settings-json')) signals.push('drupal-settings');
135
+
136
+ if (signals.length === 0) return null;
137
+ return {
138
+ name: 'Drupal',
139
+ confidence: signals.length >= 2 ? 'high' : 'medium',
140
+ apiPatterns: [
141
+ '/jsonapi/node/article',
142
+ '/jsonapi/node/page',
143
+ '/jsonapi/taxonomy_term',
144
+ ],
145
+ };
146
+ },
147
+ },
148
+ {
149
+ name: 'rails',
150
+ detect({ headers }) {
151
+ const signals: string[] = [];
152
+ if (headers['x-request-id'] && headers['x-runtime']) signals.push('Rails headers');
153
+ if (headers['x-powered-by']?.toLowerCase().includes('phusion')) signals.push('Phusion');
154
+
155
+ if (signals.length === 0) return null;
156
+ return {
157
+ name: 'Rails',
158
+ confidence: 'low',
159
+ apiPatterns: ['/api/v1/'],
160
+ };
161
+ },
162
+ },
163
+ {
164
+ name: 'django-rest',
165
+ detect({ headers, html }) {
166
+ const signals: string[] = [];
167
+ if (headers['x-frame-options'] && headers['vary']?.includes('Cookie')) signals.push('Django-like headers');
168
+ if (html.includes('csrfmiddlewaretoken')) signals.push('CSRF middleware');
169
+ if (html.includes('django')) signals.push('django reference');
170
+
171
+ if (signals.length === 0) return null;
172
+ return {
173
+ name: 'Django',
174
+ confidence: 'low',
175
+ apiPatterns: ['/api/', '/api/v1/', '/rest/'],
176
+ };
177
+ },
178
+ },
179
+ {
180
+ name: 'laravel',
181
+ detect({ html, headers }) {
182
+ const signals: string[] = [];
183
+ if (html.includes('csrf-token') && html.includes('laravel')) signals.push('Laravel meta');
184
+ if (headers['set-cookie']?.includes('laravel_session')) signals.push('laravel_session');
185
+
186
+ if (signals.length === 0) return null;
187
+ return {
188
+ name: 'Laravel',
189
+ confidence: 'medium',
190
+ apiPatterns: ['/api/', '/api/v1/'],
191
+ };
192
+ },
193
+ },
194
+ {
195
+ name: 'strapi',
196
+ detect({ headers }) {
197
+ const signals: string[] = [];
198
+ if (headers['x-powered-by']?.toLowerCase().includes('strapi')) signals.push('X-Powered-By');
199
+
200
+ if (signals.length === 0) return null;
201
+ return {
202
+ name: 'Strapi',
203
+ confidence: 'high',
204
+ apiPatterns: ['/api/', '/api/content-types', '/api/articles', '/api/pages'],
205
+ };
206
+ },
207
+ },
208
+ ];
209
+
210
+ /**
211
+ * Detect web frameworks from a page's HTML and response headers.
212
+ * Returns all detected frameworks, sorted by confidence (high first).
213
+ */
214
+ export function detectFrameworks(page: PageInfo): DetectedFramework[] {
215
+ const results: DetectedFramework[] = [];
216
+ // Lowercase headers for consistent matching
217
+ const normalizedHeaders: Record<string, string> = {};
218
+ for (const [key, value] of Object.entries(page.headers)) {
219
+ normalizedHeaders[key.toLowerCase()] = value;
220
+ }
221
+ const normalizedPage = { ...page, headers: normalizedHeaders };
222
+
223
+ for (const detector of detectors) {
224
+ const result = detector.detect(normalizedPage);
225
+ if (result) results.push(result);
226
+ }
227
+
228
+ const order = { high: 0, medium: 1, low: 2 };
229
+ results.sort((a, b) => order[a.confidence] - order[b.confidence]);
230
+ return results;
231
+ }
@@ -0,0 +1,256 @@
1
+ // src/discovery/index.ts
2
+ import type { DiscoveryResult, SkillFile, SkillEndpoint, DetectedFramework } from '../types.js';
3
+ import { validateUrl } from '../skill/ssrf.js';
4
+ import { safeFetch } from './fetch.js';
5
+ import { detectFrameworks } from './frameworks.js';
6
+ import { discoverSpecs, parseSpecToSkillFile } from './openapi.js';
7
+ import { probeApiPaths } from './probes.js';
8
+ import { detectAuthRequired } from './auth.js';
9
+
10
+ export interface DiscoveryOptions {
11
+ timeout?: number; // overall timeout in ms (default: 30000)
12
+ skipProbes?: boolean; // skip API path probing
13
+ skipSpecs?: boolean; // skip OpenAPI spec discovery
14
+ skipFrameworks?: boolean; // skip framework detection
15
+ skipSsrf?: boolean; // bypass SSRF check (for testing with local servers)
16
+ }
17
+
18
+ /**
19
+ * Run smart discovery on a URL to detect APIs without launching a browser.
20
+ *
21
+ * Flow:
22
+ * 1. SSRF validation
23
+ * 2. Fetch homepage HTML + headers
24
+ * 3. Run detection strategies in parallel:
25
+ * - Framework detection (from HTML/headers)
26
+ * - OpenAPI spec discovery (probe common paths)
27
+ * - Common API pattern probing
28
+ * 4. Synthesize results into a DiscoveryResult
29
+ */
30
+ export async function discover(
31
+ url: string,
32
+ options: DiscoveryOptions = {},
33
+ ): Promise<DiscoveryResult> {
34
+ const start = Date.now();
35
+ const fullUrl = url.startsWith('http') ? url : `https://${url}`;
36
+
37
+ // SSRF check
38
+ if (!options.skipSsrf) {
39
+ const ssrfResult = validateUrl(fullUrl);
40
+ if (!ssrfResult.safe) {
41
+ return {
42
+ confidence: 'none',
43
+ hints: [`SSRF blocked: ${ssrfResult.reason}`],
44
+ duration: Date.now() - start,
45
+ };
46
+ }
47
+ }
48
+
49
+ let domain: string;
50
+ let origin: string;
51
+ try {
52
+ const parsed = new URL(fullUrl);
53
+ domain = parsed.hostname;
54
+ origin = parsed.origin;
55
+ } catch {
56
+ return {
57
+ confidence: 'none',
58
+ hints: ['Invalid URL'],
59
+ duration: Date.now() - start,
60
+ };
61
+ }
62
+
63
+ // Fetch homepage
64
+ const homepage = await safeFetch(fullUrl, { timeout: options.timeout ?? 10000, skipSsrf: options.skipSsrf });
65
+ if (!homepage) {
66
+ return {
67
+ confidence: 'none',
68
+ hints: ['Failed to fetch homepage — site may be down or blocking requests'],
69
+ duration: Date.now() - start,
70
+ };
71
+ }
72
+
73
+ const ssrfOpts = { skipSsrf: options.skipSsrf };
74
+
75
+ // Auth detection (runs on homepage HTML + headers)
76
+ const authResult = detectAuthRequired(homepage.body, fullUrl, homepage.headers);
77
+ const authFields = authResult.authRequired ? {
78
+ authRequired: true as const,
79
+ authSignals: authResult.signals,
80
+ ...(authResult.loginUrl ? { loginUrl: authResult.loginUrl } : {}),
81
+ } : {};
82
+
83
+ // Run all detection strategies in parallel
84
+ const [frameworks, specs, probes] = await Promise.all([
85
+ options.skipFrameworks
86
+ ? []
87
+ : detectFrameworks({ html: homepage.body, headers: homepage.headers, url: fullUrl }),
88
+ options.skipSpecs
89
+ ? []
90
+ : discoverSpecs(origin, homepage.headers, ssrfOpts),
91
+ options.skipProbes
92
+ ? []
93
+ : probeApiPaths(origin, ssrfOpts),
94
+ ]);
95
+
96
+ const hints: string[] = [];
97
+
98
+ // Strategy 1: OpenAPI spec found → parse into skill file (highest confidence)
99
+ if (specs.length > 0) {
100
+ const bestSpec = specs[0];
101
+ const skillFile = await parseSpecToSkillFile(bestSpec.url, domain, origin, ssrfOpts);
102
+ if (skillFile && skillFile.endpoints.length > 0) {
103
+ hints.push(`OpenAPI spec found at ${bestSpec.url} (${bestSpec.version})`);
104
+ if (frameworks.length > 0) hints.push(`Framework: ${frameworks.map(f => f.name).join(', ')}`);
105
+ addProbeHints(hints, probes);
106
+
107
+ return {
108
+ confidence: 'high',
109
+ skillFile,
110
+ hints,
111
+ frameworks: frameworks.length > 0 ? frameworks : undefined,
112
+ specs,
113
+ probes: probes.length > 0 ? probes : undefined,
114
+ duration: Date.now() - start,
115
+ ...authFields,
116
+ };
117
+ }
118
+ }
119
+
120
+ // Strategy 2: Framework detected → generate skeleton skill file
121
+ const highConfidence = frameworks.filter(f => f.confidence === 'high');
122
+ if (highConfidence.length > 0) {
123
+ const skillFile = buildFrameworkSkillFile(domain, origin, highConfidence);
124
+ hints.push(`Detected: ${highConfidence.map(f => f.name).join(', ')}`);
125
+ addProbeHints(hints, probes);
126
+ if (specs.length > 0) hints.push(`Spec found but could not parse: ${specs.map(s => s.url).join(', ')}`);
127
+
128
+ return {
129
+ confidence: 'medium',
130
+ skillFile,
131
+ hints,
132
+ frameworks,
133
+ specs: specs.length > 0 ? specs : undefined,
134
+ probes: probes.length > 0 ? probes : undefined,
135
+ duration: Date.now() - start,
136
+ ...authFields,
137
+ };
138
+ }
139
+
140
+ // Strategy 3: Medium-confidence framework or API probes found → hints only
141
+ const apiProbes = probes.filter(p => p.isApi);
142
+ const mediumFrameworks = frameworks.filter(f => f.confidence === 'medium');
143
+
144
+ if (mediumFrameworks.length > 0 || apiProbes.length > 0) {
145
+ if (mediumFrameworks.length > 0) {
146
+ const skillFile = buildFrameworkSkillFile(domain, origin, mediumFrameworks);
147
+ hints.push(`Possibly: ${mediumFrameworks.map(f => f.name).join(', ')}`);
148
+ addProbeHints(hints, probes);
149
+
150
+ return {
151
+ confidence: 'low',
152
+ skillFile,
153
+ hints,
154
+ frameworks,
155
+ probes: probes.length > 0 ? probes : undefined,
156
+ duration: Date.now() - start,
157
+ ...authFields,
158
+ };
159
+ }
160
+
161
+ // Only probes found
162
+ hints.push('API paths detected via probing');
163
+ addProbeHints(hints, probes);
164
+
165
+ return {
166
+ confidence: 'low',
167
+ hints,
168
+ frameworks: frameworks.length > 0 ? frameworks : undefined,
169
+ probes,
170
+ duration: Date.now() - start,
171
+ ...authFields,
172
+ };
173
+ }
174
+
175
+ // Nothing found
176
+ if (frameworks.length > 0) {
177
+ hints.push(`Low-confidence signals: ${frameworks.map(f => f.name).join(', ')}`);
178
+ }
179
+ hints.push('No API patterns detected — auto-capture recommended');
180
+
181
+ return {
182
+ confidence: 'none',
183
+ hints,
184
+ frameworks: frameworks.length > 0 ? frameworks : undefined,
185
+ probes: probes.length > 0 ? probes : undefined,
186
+ duration: Date.now() - start,
187
+ ...authFields,
188
+ };
189
+ }
190
+
191
+ function addProbeHints(hints: string[], probes: import('../types.js').ProbeResult[]): void {
192
+ const apiProbes = probes.filter(p => p.isApi);
193
+ if (apiProbes.length > 0) {
194
+ hints.push(`API paths found: ${apiProbes.map(p => `${p.path} (${p.status})`).join(', ')}`);
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Build a skeleton skill file from detected frameworks.
200
+ * Endpoints are unverified predictions — replayability is 'unknown'.
201
+ */
202
+ function buildFrameworkSkillFile(
203
+ domain: string,
204
+ baseUrl: string,
205
+ frameworks: DetectedFramework[],
206
+ ): SkillFile {
207
+ const endpoints: SkillEndpoint[] = [];
208
+ const seen = new Set<string>();
209
+
210
+ for (const framework of frameworks) {
211
+ for (const pattern of framework.apiPatterns) {
212
+ const key = `GET ${pattern}`;
213
+ if (seen.has(key)) continue;
214
+ seen.add(key);
215
+
216
+ const id = generateId('GET', pattern);
217
+ endpoints.push({
218
+ id,
219
+ method: 'GET',
220
+ path: pattern,
221
+ queryParams: {},
222
+ headers: {},
223
+ responseShape: { type: 'unknown' },
224
+ examples: {
225
+ request: { url: `${baseUrl}${pattern}`, headers: {} },
226
+ responsePreview: null,
227
+ },
228
+ replayability: {
229
+ tier: 'unknown',
230
+ verified: false,
231
+ signals: [`discovered-from-${framework.name.toLowerCase()}`],
232
+ },
233
+ });
234
+ }
235
+ }
236
+
237
+ return {
238
+ version: '1.2',
239
+ domain,
240
+ capturedAt: new Date().toISOString(),
241
+ baseUrl,
242
+ endpoints,
243
+ metadata: {
244
+ captureCount: 0,
245
+ filteredCount: 0,
246
+ toolVersion: '1.0.0',
247
+ },
248
+ provenance: 'unsigned',
249
+ };
250
+ }
251
+
252
+ function generateId(method: string, path: string): string {
253
+ const segments = path.split('/').filter(s => s !== '' && !s.startsWith(':'));
254
+ const slug = segments.join('-').replace(/[^a-z0-9-]/gi, '').toLowerCase() || 'root';
255
+ return `${method.toLowerCase()}-${slug}`;
256
+ }