@apitap/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/LICENSE +60 -0
  2. package/README.md +362 -0
  3. package/SKILL.md +270 -0
  4. package/dist/auth/crypto.d.ts +31 -0
  5. package/dist/auth/crypto.js +66 -0
  6. package/dist/auth/crypto.js.map +1 -0
  7. package/dist/auth/handoff.d.ts +29 -0
  8. package/dist/auth/handoff.js +180 -0
  9. package/dist/auth/handoff.js.map +1 -0
  10. package/dist/auth/manager.d.ts +46 -0
  11. package/dist/auth/manager.js +127 -0
  12. package/dist/auth/manager.js.map +1 -0
  13. package/dist/auth/oauth-refresh.d.ts +16 -0
  14. package/dist/auth/oauth-refresh.js +91 -0
  15. package/dist/auth/oauth-refresh.js.map +1 -0
  16. package/dist/auth/refresh.d.ts +43 -0
  17. package/dist/auth/refresh.js +217 -0
  18. package/dist/auth/refresh.js.map +1 -0
  19. package/dist/capture/anti-bot.d.ts +15 -0
  20. package/dist/capture/anti-bot.js +43 -0
  21. package/dist/capture/anti-bot.js.map +1 -0
  22. package/dist/capture/blocklist.d.ts +6 -0
  23. package/dist/capture/blocklist.js +70 -0
  24. package/dist/capture/blocklist.js.map +1 -0
  25. package/dist/capture/body-diff.d.ts +8 -0
  26. package/dist/capture/body-diff.js +102 -0
  27. package/dist/capture/body-diff.js.map +1 -0
  28. package/dist/capture/body-variables.d.ts +13 -0
  29. package/dist/capture/body-variables.js +142 -0
  30. package/dist/capture/body-variables.js.map +1 -0
  31. package/dist/capture/domain.d.ts +8 -0
  32. package/dist/capture/domain.js +34 -0
  33. package/dist/capture/domain.js.map +1 -0
  34. package/dist/capture/entropy.d.ts +33 -0
  35. package/dist/capture/entropy.js +100 -0
  36. package/dist/capture/entropy.js.map +1 -0
  37. package/dist/capture/filter.d.ts +11 -0
  38. package/dist/capture/filter.js +49 -0
  39. package/dist/capture/filter.js.map +1 -0
  40. package/dist/capture/graphql.d.ts +21 -0
  41. package/dist/capture/graphql.js +99 -0
  42. package/dist/capture/graphql.js.map +1 -0
  43. package/dist/capture/idle.d.ts +23 -0
  44. package/dist/capture/idle.js +44 -0
  45. package/dist/capture/idle.js.map +1 -0
  46. package/dist/capture/monitor.d.ts +26 -0
  47. package/dist/capture/monitor.js +183 -0
  48. package/dist/capture/monitor.js.map +1 -0
  49. package/dist/capture/oauth-detector.d.ts +18 -0
  50. package/dist/capture/oauth-detector.js +96 -0
  51. package/dist/capture/oauth-detector.js.map +1 -0
  52. package/dist/capture/pagination.d.ts +9 -0
  53. package/dist/capture/pagination.js +40 -0
  54. package/dist/capture/pagination.js.map +1 -0
  55. package/dist/capture/parameterize.d.ts +17 -0
  56. package/dist/capture/parameterize.js +63 -0
  57. package/dist/capture/parameterize.js.map +1 -0
  58. package/dist/capture/scrubber.d.ts +5 -0
  59. package/dist/capture/scrubber.js +38 -0
  60. package/dist/capture/scrubber.js.map +1 -0
  61. package/dist/capture/session.d.ts +46 -0
  62. package/dist/capture/session.js +445 -0
  63. package/dist/capture/session.js.map +1 -0
  64. package/dist/capture/token-detector.d.ts +16 -0
  65. package/dist/capture/token-detector.js +62 -0
  66. package/dist/capture/token-detector.js.map +1 -0
  67. package/dist/capture/verifier.d.ts +17 -0
  68. package/dist/capture/verifier.js +147 -0
  69. package/dist/capture/verifier.js.map +1 -0
  70. package/dist/cli.d.ts +2 -0
  71. package/dist/cli.js +930 -0
  72. package/dist/cli.js.map +1 -0
  73. package/dist/discovery/auth.d.ts +17 -0
  74. package/dist/discovery/auth.js +81 -0
  75. package/dist/discovery/auth.js.map +1 -0
  76. package/dist/discovery/fetch.d.ts +17 -0
  77. package/dist/discovery/fetch.js +59 -0
  78. package/dist/discovery/fetch.js.map +1 -0
  79. package/dist/discovery/frameworks.d.ts +11 -0
  80. package/dist/discovery/frameworks.js +249 -0
  81. package/dist/discovery/frameworks.js.map +1 -0
  82. package/dist/discovery/index.d.ts +21 -0
  83. package/dist/discovery/index.js +219 -0
  84. package/dist/discovery/index.js.map +1 -0
  85. package/dist/discovery/openapi.d.ts +13 -0
  86. package/dist/discovery/openapi.js +175 -0
  87. package/dist/discovery/openapi.js.map +1 -0
  88. package/dist/discovery/probes.d.ts +9 -0
  89. package/dist/discovery/probes.js +70 -0
  90. package/dist/discovery/probes.js.map +1 -0
  91. package/dist/index.d.ts +25 -0
  92. package/dist/index.js +25 -0
  93. package/dist/index.js.map +1 -0
  94. package/dist/inspect/report.d.ts +52 -0
  95. package/dist/inspect/report.js +191 -0
  96. package/dist/inspect/report.js.map +1 -0
  97. package/dist/mcp.d.ts +8 -0
  98. package/dist/mcp.js +526 -0
  99. package/dist/mcp.js.map +1 -0
  100. package/dist/orchestration/browse.d.ts +38 -0
  101. package/dist/orchestration/browse.js +198 -0
  102. package/dist/orchestration/browse.js.map +1 -0
  103. package/dist/orchestration/cache.d.ts +15 -0
  104. package/dist/orchestration/cache.js +24 -0
  105. package/dist/orchestration/cache.js.map +1 -0
  106. package/dist/plugin.d.ts +17 -0
  107. package/dist/plugin.js +158 -0
  108. package/dist/plugin.js.map +1 -0
  109. package/dist/read/decoders/deepwiki.d.ts +2 -0
  110. package/dist/read/decoders/deepwiki.js +148 -0
  111. package/dist/read/decoders/deepwiki.js.map +1 -0
  112. package/dist/read/decoders/grokipedia.d.ts +2 -0
  113. package/dist/read/decoders/grokipedia.js +210 -0
  114. package/dist/read/decoders/grokipedia.js.map +1 -0
  115. package/dist/read/decoders/hackernews.d.ts +2 -0
  116. package/dist/read/decoders/hackernews.js +168 -0
  117. package/dist/read/decoders/hackernews.js.map +1 -0
  118. package/dist/read/decoders/index.d.ts +2 -0
  119. package/dist/read/decoders/index.js +12 -0
  120. package/dist/read/decoders/index.js.map +1 -0
  121. package/dist/read/decoders/reddit.d.ts +2 -0
  122. package/dist/read/decoders/reddit.js +142 -0
  123. package/dist/read/decoders/reddit.js.map +1 -0
  124. package/dist/read/decoders/twitter.d.ts +12 -0
  125. package/dist/read/decoders/twitter.js +187 -0
  126. package/dist/read/decoders/twitter.js.map +1 -0
  127. package/dist/read/decoders/wikipedia.d.ts +2 -0
  128. package/dist/read/decoders/wikipedia.js +66 -0
  129. package/dist/read/decoders/wikipedia.js.map +1 -0
  130. package/dist/read/decoders/youtube.d.ts +2 -0
  131. package/dist/read/decoders/youtube.js +69 -0
  132. package/dist/read/decoders/youtube.js.map +1 -0
  133. package/dist/read/extract.d.ts +25 -0
  134. package/dist/read/extract.js +320 -0
  135. package/dist/read/extract.js.map +1 -0
  136. package/dist/read/index.d.ts +14 -0
  137. package/dist/read/index.js +66 -0
  138. package/dist/read/index.js.map +1 -0
  139. package/dist/read/peek.d.ts +9 -0
  140. package/dist/read/peek.js +137 -0
  141. package/dist/read/peek.js.map +1 -0
  142. package/dist/read/types.d.ts +44 -0
  143. package/dist/read/types.js +3 -0
  144. package/dist/read/types.js.map +1 -0
  145. package/dist/replay/engine.d.ts +53 -0
  146. package/dist/replay/engine.js +441 -0
  147. package/dist/replay/engine.js.map +1 -0
  148. package/dist/replay/truncate.d.ts +16 -0
  149. package/dist/replay/truncate.js +92 -0
  150. package/dist/replay/truncate.js.map +1 -0
  151. package/dist/serve.d.ts +31 -0
  152. package/dist/serve.js +149 -0
  153. package/dist/serve.js.map +1 -0
  154. package/dist/skill/generator.d.ts +44 -0
  155. package/dist/skill/generator.js +419 -0
  156. package/dist/skill/generator.js.map +1 -0
  157. package/dist/skill/importer.d.ts +26 -0
  158. package/dist/skill/importer.js +80 -0
  159. package/dist/skill/importer.js.map +1 -0
  160. package/dist/skill/search.d.ts +19 -0
  161. package/dist/skill/search.js +51 -0
  162. package/dist/skill/search.js.map +1 -0
  163. package/dist/skill/signing.d.ts +16 -0
  164. package/dist/skill/signing.js +34 -0
  165. package/dist/skill/signing.js.map +1 -0
  166. package/dist/skill/ssrf.d.ts +27 -0
  167. package/dist/skill/ssrf.js +210 -0
  168. package/dist/skill/ssrf.js.map +1 -0
  169. package/dist/skill/store.d.ts +7 -0
  170. package/dist/skill/store.js +93 -0
  171. package/dist/skill/store.js.map +1 -0
  172. package/dist/stats/report.d.ts +26 -0
  173. package/dist/stats/report.js +157 -0
  174. package/dist/stats/report.js.map +1 -0
  175. package/dist/types.d.ts +214 -0
  176. package/dist/types.js +3 -0
  177. package/dist/types.js.map +1 -0
  178. package/package.json +58 -0
  179. package/src/auth/crypto.ts +92 -0
  180. package/src/auth/handoff.ts +229 -0
  181. package/src/auth/manager.ts +140 -0
  182. package/src/auth/oauth-refresh.ts +120 -0
  183. package/src/auth/refresh.ts +300 -0
  184. package/src/capture/anti-bot.ts +63 -0
  185. package/src/capture/blocklist.ts +75 -0
  186. package/src/capture/body-diff.ts +109 -0
  187. package/src/capture/body-variables.ts +156 -0
  188. package/src/capture/domain.ts +34 -0
  189. package/src/capture/entropy.ts +121 -0
  190. package/src/capture/filter.ts +56 -0
  191. package/src/capture/graphql.ts +124 -0
  192. package/src/capture/idle.ts +45 -0
  193. package/src/capture/monitor.ts +224 -0
  194. package/src/capture/oauth-detector.ts +106 -0
  195. package/src/capture/pagination.ts +49 -0
  196. package/src/capture/parameterize.ts +68 -0
  197. package/src/capture/scrubber.ts +49 -0
  198. package/src/capture/session.ts +502 -0
  199. package/src/capture/token-detector.ts +76 -0
  200. package/src/capture/verifier.ts +171 -0
  201. package/src/cli.ts +1031 -0
  202. package/src/discovery/auth.ts +99 -0
  203. package/src/discovery/fetch.ts +85 -0
  204. package/src/discovery/frameworks.ts +231 -0
  205. package/src/discovery/index.ts +256 -0
  206. package/src/discovery/openapi.ts +230 -0
  207. package/src/discovery/probes.ts +76 -0
  208. package/src/index.ts +26 -0
  209. package/src/inspect/report.ts +247 -0
  210. package/src/mcp.ts +618 -0
  211. package/src/orchestration/browse.ts +250 -0
  212. package/src/orchestration/cache.ts +37 -0
  213. package/src/plugin.ts +188 -0
  214. package/src/read/decoders/deepwiki.ts +180 -0
  215. package/src/read/decoders/grokipedia.ts +246 -0
  216. package/src/read/decoders/hackernews.ts +198 -0
  217. package/src/read/decoders/index.ts +15 -0
  218. package/src/read/decoders/reddit.ts +158 -0
  219. package/src/read/decoders/twitter.ts +211 -0
  220. package/src/read/decoders/wikipedia.ts +75 -0
  221. package/src/read/decoders/youtube.ts +75 -0
  222. package/src/read/extract.ts +396 -0
  223. package/src/read/index.ts +78 -0
  224. package/src/read/peek.ts +175 -0
  225. package/src/read/types.ts +37 -0
  226. package/src/replay/engine.ts +559 -0
  227. package/src/replay/truncate.ts +116 -0
  228. package/src/serve.ts +189 -0
  229. package/src/skill/generator.ts +473 -0
  230. package/src/skill/importer.ts +107 -0
  231. package/src/skill/search.ts +76 -0
  232. package/src/skill/signing.ts +36 -0
  233. package/src/skill/ssrf.ts +238 -0
  234. package/src/skill/store.ts +107 -0
  235. package/src/stats/report.ts +208 -0
  236. package/src/types.ts +233 -0
package/src/serve.ts ADDED
@@ -0,0 +1,189 @@
1
+ // src/serve.ts
2
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
+ import { z } from 'zod';
5
+ import { readSkillFile } from './skill/store.js';
6
+ import { replayEndpoint } from './replay/engine.js';
7
+ import { AuthManager, getMachineId } from './auth/manager.js';
8
+ import { homedir } from 'node:os';
9
+ import { join } from 'node:path';
10
+ import type { SkillFile, SkillEndpoint } from './types.js';
11
+
12
+ const APITAP_DIR = process.env.APITAP_DIR || join(homedir(), '.apitap');
13
+
14
+ export interface ServeTool {
15
+ name: string;
16
+ description: string;
17
+ endpointId: string;
18
+ inputSchema: {
19
+ type: 'object';
20
+ properties: Record<string, { type: string; description: string }>;
21
+ required: string[];
22
+ };
23
+ }
24
+
25
+ /**
26
+ * Build MCP tool definitions from a skill file's endpoints.
27
+ * Each endpoint becomes one tool named `domain_endpointId`.
28
+ */
29
+ export function buildServeTools(skill: SkillFile): ServeTool[] {
30
+ return skill.endpoints.map(ep => {
31
+ const properties: Record<string, { type: string; description: string }> = {};
32
+ const required: string[] = [];
33
+
34
+ // Path params → required
35
+ const pathParams = ep.path.match(/:([a-zA-Z_]+)/g);
36
+ if (pathParams) {
37
+ for (const raw of pathParams) {
38
+ const name = raw.slice(1);
39
+ properties[name] = { type: 'string', description: `Path parameter` };
40
+ required.push(name);
41
+ }
42
+ }
43
+
44
+ // Query params → optional with examples
45
+ for (const [key, val] of Object.entries(ep.queryParams)) {
46
+ properties[key] = {
47
+ type: 'string',
48
+ description: `Query param (example: ${val.example})`,
49
+ };
50
+ }
51
+
52
+ // Body variables → optional
53
+ if (ep.requestBody?.variables) {
54
+ for (const varPath of ep.requestBody.variables) {
55
+ properties[varPath] = {
56
+ type: 'string',
57
+ description: `Body variable`,
58
+ };
59
+ }
60
+ }
61
+
62
+ return {
63
+ name: `${skill.domain}_${ep.id}`,
64
+ description: `${ep.method} ${ep.path} on ${skill.domain}`,
65
+ endpointId: ep.id,
66
+ inputSchema: {
67
+ type: 'object' as const,
68
+ properties,
69
+ required,
70
+ },
71
+ };
72
+ });
73
+ }
74
+
75
+ export interface ServeOptions {
76
+ skillsDir?: string;
77
+ noAuth?: boolean;
78
+ /** @internal Skip SSRF validation — for testing only */
79
+ _skipSsrfCheck?: boolean;
80
+ }
81
+
82
+ /**
83
+ * Create an MCP server that exposes a skill file's endpoints as tools.
84
+ * Each endpoint becomes a callable tool that delegates to the replay engine.
85
+ */
86
+ export async function createServeServer(
87
+ domain: string,
88
+ options: ServeOptions = {},
89
+ ): Promise<McpServer> {
90
+ const skill = await readSkillFile(domain, options.skillsDir);
91
+ if (!skill) {
92
+ throw new Error(`No skill file found for "${domain}". Run: apitap capture ${domain}`);
93
+ }
94
+
95
+ if (skill.endpoints.length === 0) {
96
+ throw new Error(`Skill file for "${domain}" has no endpoints.`);
97
+ }
98
+
99
+ const tools = buildServeTools(skill);
100
+
101
+ // Load auth manager unless --no-auth
102
+ let authManager: AuthManager | undefined;
103
+ if (!options.noAuth) {
104
+ const machineId = await getMachineId();
105
+ authManager = new AuthManager(APITAP_DIR, machineId);
106
+ }
107
+
108
+ const server = new McpServer({
109
+ name: `apitap-serve-${domain}`,
110
+ version: '1.0.0',
111
+ });
112
+
113
+ // Register one tool per endpoint
114
+ for (const tool of tools) {
115
+ // Build zod schema from tool.inputSchema
116
+ const shape: Record<string, z.ZodTypeAny> = {};
117
+ for (const [key, prop] of Object.entries(tool.inputSchema.properties)) {
118
+ const field = z.string().describe(prop.description);
119
+ shape[key] = tool.inputSchema.required.includes(key) ? field : field.optional();
120
+ }
121
+
122
+ const endpointId = tool.endpointId;
123
+
124
+ server.registerTool(
125
+ tool.name,
126
+ {
127
+ description: tool.description,
128
+ inputSchema: z.object(shape),
129
+ annotations: {
130
+ readOnlyHint: true,
131
+ openWorldHint: true,
132
+ },
133
+ },
134
+ async (args: Record<string, unknown>) => {
135
+ const endpoint = skill.endpoints.find(e => e.id === endpointId)!;
136
+ const savedHeaders = endpoint.headers;
137
+ try {
138
+ // Inject stored auth without mutating the shared skill object
139
+ if (authManager) {
140
+ const hasStoredPlaceholder = Object.values(savedHeaders).some(v => v === '[stored]');
141
+ if (hasStoredPlaceholder) {
142
+ try {
143
+ const storedAuth = await authManager.retrieve(domain);
144
+ if (storedAuth) {
145
+ endpoint.headers = { ...savedHeaders, [storedAuth.header]: storedAuth.value };
146
+ }
147
+ } catch {
148
+ // Auth retrieval failed — proceed without
149
+ }
150
+ }
151
+ }
152
+
153
+ // Convert args to string params
154
+ const params: Record<string, string> = {};
155
+ for (const [k, v] of Object.entries(args)) {
156
+ if (v !== undefined) params[k] = String(v);
157
+ }
158
+
159
+ const result = await replayEndpoint(skill, endpointId, {
160
+ params,
161
+ authManager,
162
+ domain,
163
+ _skipSsrfCheck: options._skipSsrfCheck,
164
+ });
165
+
166
+ return {
167
+ content: [{
168
+ type: 'text' as const,
169
+ text: JSON.stringify({ status: result.status, data: result.data }),
170
+ }],
171
+ };
172
+ } catch (err: any) {
173
+ return {
174
+ content: [{
175
+ type: 'text' as const,
176
+ text: `Replay failed: ${err.message}`,
177
+ }],
178
+ isError: true,
179
+ };
180
+ } finally {
181
+ // Restore original headers so [stored] placeholders remain for next call
182
+ endpoint.headers = savedHeaders;
183
+ }
184
+ },
185
+ );
186
+ }
187
+
188
+ return server;
189
+ }
@@ -0,0 +1,473 @@
1
+ // src/skill/generator.ts
2
+ import type { CapturedExchange, SkillEndpoint, SkillFile, StoredAuth, RequestBody, OAuthConfig } from '../types.js';
3
+ import { scrubPII } from '../capture/scrubber.js';
4
+ import { parameterizePath, cleanFrameworkPath } from '../capture/parameterize.js';
5
+ import { detectPagination } from '../capture/pagination.js';
6
+ import { detectBodyVariables } from '../capture/body-variables.js';
7
+ import { isGraphQLEndpoint, parseGraphQLBody, extractOperationName, detectGraphQLVariables } from '../capture/graphql.js';
8
+ import { detectRefreshableTokens } from '../capture/token-detector.js';
9
+ import { isLikelyToken } from '../capture/entropy.js';
10
+ import { isOAuthTokenRequest, type OAuthInfo } from '../capture/oauth-detector.js';
11
+ import { diffBodies } from '../capture/body-diff.js';
12
+
13
+ const KEEP_HEADERS = new Set([
14
+ 'authorization',
15
+ 'content-type',
16
+ 'accept',
17
+ 'x-api-key',
18
+ 'x-csrf-token',
19
+ 'x-requested-with',
20
+ ]);
21
+
22
+ const AUTH_HEADERS = new Set([
23
+ 'authorization',
24
+ 'x-api-key',
25
+ ]);
26
+
27
+ export interface GeneratorOptions {
28
+ enablePreview?: boolean;
29
+ scrub?: boolean;
30
+ }
31
+
32
+ function filterHeaders(headers: Record<string, string>): Record<string, string> {
33
+ const filtered: Record<string, string> = {};
34
+ for (const [key, value] of Object.entries(headers)) {
35
+ const lower = key.toLowerCase();
36
+ if (KEEP_HEADERS.has(lower) || (lower.startsWith('x-') && !lower.startsWith('x-forwarded'))) {
37
+ filtered[key] = value;
38
+ }
39
+ }
40
+ return filtered;
41
+ }
42
+
43
+ function stripAuth(headers: Record<string, string>, entropyDetected?: Set<string>): Record<string, string> {
44
+ const stripped: Record<string, string> = {};
45
+ for (const [key, value] of Object.entries(headers)) {
46
+ const lower = key.toLowerCase();
47
+ if (AUTH_HEADERS.has(lower) || entropyDetected?.has(lower)) {
48
+ stripped[key] = '[stored]';
49
+ } else {
50
+ stripped[key] = value;
51
+ }
52
+ }
53
+ return stripped;
54
+ }
55
+
56
+ /**
57
+ * Extract auth credentials from headers.
58
+ * Uses name-based matching for known auth headers, plus entropy-based
59
+ * detection for non-standard headers carrying high-entropy tokens.
60
+ *
61
+ * @returns [auth list, set of entropy-detected header names (lowercased)]
62
+ */
63
+ function extractAuth(headers: Record<string, string>): [StoredAuth[], Set<string>] {
64
+ const auth: StoredAuth[] = [];
65
+ const entropyDetected = new Set<string>();
66
+
67
+ for (const [key, value] of Object.entries(headers)) {
68
+ const lower = key.toLowerCase();
69
+ if (lower === 'authorization' && value) {
70
+ auth.push({
71
+ type: value.toLowerCase().startsWith('bearer') ? 'bearer' : 'custom',
72
+ header: lower,
73
+ value,
74
+ });
75
+ } else if (lower === 'x-api-key' && value) {
76
+ auth.push({ type: 'api-key', header: lower, value });
77
+ } else if (!AUTH_HEADERS.has(lower) && value) {
78
+ // Entropy-based detection for non-standard headers
79
+ const classification = isLikelyToken(lower, value);
80
+ if (classification.isToken) {
81
+ auth.push({ type: 'custom', header: lower, value });
82
+ entropyDetected.add(lower);
83
+ }
84
+ }
85
+ }
86
+ return [auth, entropyDetected];
87
+ }
88
+
89
+ function generateEndpointId(method: string, parameterizedPath: string): string {
90
+ // Clean framework noise for the ID (but not for the stored path)
91
+ let cleaned = cleanFrameworkPath(parameterizedPath);
92
+
93
+ // Split into segments, remove :param placeholders (they add no info to the ID)
94
+ const segments = cleaned.split('/').filter(s => s !== '' && !s.startsWith(':'));
95
+
96
+ const slug = segments.join('-').replace(/[^a-z0-9-]/gi, '').toLowerCase() || 'root';
97
+ return `${method.toLowerCase()}-${slug}`;
98
+ }
99
+
100
+ function detectResponseShape(body: string): { type: string; fields?: string[] } {
101
+ try {
102
+ const parsed = JSON.parse(body);
103
+ if (Array.isArray(parsed)) {
104
+ const first = parsed[0];
105
+ return {
106
+ type: 'array',
107
+ fields: first && typeof first === 'object' && first !== null
108
+ ? Object.keys(first)
109
+ : undefined,
110
+ };
111
+ }
112
+ if (typeof parsed === 'object' && parsed !== null) {
113
+ return { type: 'object', fields: Object.keys(parsed) };
114
+ }
115
+ return { type: typeof parsed };
116
+ } catch {
117
+ return { type: 'unknown' };
118
+ }
119
+ }
120
+
121
+ function truncatePreview(body: string, maxItems = 2): unknown {
122
+ try {
123
+ const parsed = JSON.parse(body);
124
+ if (Array.isArray(parsed)) {
125
+ return parsed.slice(0, maxItems);
126
+ }
127
+ return parsed;
128
+ } catch {
129
+ return body.slice(0, 500);
130
+ }
131
+ }
132
+
133
+ function extractQueryParams(url: URL): Record<string, { type: string; example: string }> {
134
+ const params: Record<string, { type: string; example: string }> = {};
135
+ for (const [key, value] of url.searchParams) {
136
+ params[key] = { type: 'string', example: value };
137
+ }
138
+ return params;
139
+ }
140
+
141
+ function scrubQueryParams(
142
+ params: Record<string, { type: string; example: string }>,
143
+ ): Record<string, { type: string; example: string }> {
144
+ const scrubbed: Record<string, { type: string; example: string }> = {};
145
+ for (const [key, val] of Object.entries(params)) {
146
+ scrubbed[key] = { type: val.type, example: scrubPII(val.example) };
147
+ }
148
+ return scrubbed;
149
+ }
150
+
151
+ function scrubBody(body: unknown, doScrub: boolean): unknown {
152
+ if (!doScrub) return body;
153
+ if (typeof body === 'string') {
154
+ return scrubPII(body);
155
+ }
156
+ if (body && typeof body === 'object') {
157
+ const scrubbed: Record<string, unknown> = {};
158
+ for (const [key, value] of Object.entries(body as Record<string, unknown>)) {
159
+ if (typeof value === 'string') {
160
+ scrubbed[key] = scrubPII(value);
161
+ } else if (value && typeof value === 'object') {
162
+ scrubbed[key] = scrubBody(value, doScrub);
163
+ } else {
164
+ scrubbed[key] = value;
165
+ }
166
+ }
167
+ return scrubbed;
168
+ }
169
+ return body;
170
+ }
171
+
172
+ export class SkillGenerator {
173
+ private endpoints = new Map<string, SkillEndpoint>();
174
+ private exchangeBodies = new Map<string, string[]>(); // v1.0: store bodies for cross-request diffing
175
+ private captureCount = 0;
176
+ private filteredCount = 0;
177
+ private baseUrl: string | null = null;
178
+ private extractedAuthList: StoredAuth[] = [];
179
+ private options: Required<GeneratorOptions>;
180
+ private captchaRisk = false;
181
+ private oauthConfig: OAuthConfig | null = null;
182
+ private oauthClientSecret: string | undefined;
183
+ private totalNetworkBytes = 0; // v1.0: accumulate all response sizes
184
+
185
+ /** Number of unique endpoints captured so far */
186
+ get endpointCount(): number {
187
+ return this.endpoints.size;
188
+ }
189
+
190
+ constructor(options: GeneratorOptions = {}) {
191
+ this.options = {
192
+ enablePreview: options.enablePreview ?? false,
193
+ scrub: options.scrub ?? true,
194
+ };
195
+ }
196
+
197
+ /** Add a captured exchange. Returns the new endpoint if first seen, null if duplicate. */
198
+ addExchange(exchange: CapturedExchange): SkillEndpoint | null {
199
+ this.captureCount++;
200
+
201
+ const url = new URL(exchange.request.url);
202
+ const method = exchange.request.method;
203
+ const contentType = exchange.request.headers['content-type'] ?? '';
204
+
205
+ // Track baseUrl from the first captured exchange
206
+ if (!this.baseUrl) {
207
+ this.baseUrl = url.origin;
208
+ }
209
+
210
+ // Check for GraphQL
211
+ const isGraphQL = isGraphQLEndpoint(url.pathname, contentType, exchange.request.postData ?? null);
212
+ let graphqlInfo: { operationName: string; query: string; variables: Record<string, unknown> | null } | null = null;
213
+
214
+ if (isGraphQL && exchange.request.postData) {
215
+ const parsed = parseGraphQLBody(exchange.request.postData);
216
+ if (parsed) {
217
+ const opName = extractOperationName(parsed.query, parsed.operationName);
218
+ graphqlInfo = {
219
+ operationName: opName,
220
+ query: parsed.query,
221
+ variables: parsed.variables,
222
+ };
223
+ }
224
+ }
225
+
226
+ // Parameterize path for dedup and storage
227
+ const paramPath = parameterizePath(url.pathname);
228
+ // Use framework-cleaned path for dedup key so _next/data routes with different build hashes collapse
229
+ const dedupPath = cleanFrameworkPath(paramPath);
230
+
231
+ // For GraphQL, dedup by operation name instead of path
232
+ const key = graphqlInfo
233
+ ? `${method} graphql:${graphqlInfo.operationName}`
234
+ : `${method} ${dedupPath}`;
235
+
236
+ // Track response bytes for all exchanges (for browser cost measurement)
237
+ this.totalNetworkBytes += exchange.response.body.length;
238
+
239
+ if (this.endpoints.has(key)) {
240
+ // Store duplicate body for cross-request diffing (Strategy 1)
241
+ if (exchange.request.postData) {
242
+ const bodies = this.exchangeBodies.get(key);
243
+ if (bodies) bodies.push(exchange.request.postData);
244
+ }
245
+ return null;
246
+ }
247
+
248
+ // Detect OAuth token requests from captured traffic
249
+ const oauthInfo = isOAuthTokenRequest(exchange.request);
250
+ if (oauthInfo && !this.oauthConfig) {
251
+ this.oauthConfig = {
252
+ tokenEndpoint: oauthInfo.tokenEndpoint,
253
+ clientId: oauthInfo.clientId,
254
+ grantType: oauthInfo.grantType,
255
+ ...(oauthInfo.scope ? { scope: oauthInfo.scope } : {}),
256
+ };
257
+ this.oauthClientSecret = oauthInfo.clientSecret;
258
+ }
259
+
260
+ // Extract auth before filtering headers (includes entropy-based detection)
261
+ const [auth, entropyDetected] = extractAuth(exchange.request.headers);
262
+ this.extractedAuthList.push(...auth);
263
+
264
+ // Filter headers, then strip auth values (including entropy-detected tokens)
265
+ const filtered = filterHeaders(exchange.request.headers);
266
+ const safeHeaders = stripAuth(filtered, entropyDetected);
267
+
268
+ // Build query params, optionally scrub PII
269
+ let queryParams = extractQueryParams(url);
270
+ if (this.options.scrub) {
271
+ queryParams = scrubQueryParams(queryParams);
272
+ }
273
+
274
+ // Build example URL, optionally scrub PII
275
+ let exampleUrl = exchange.request.url;
276
+ if (this.options.scrub) {
277
+ exampleUrl = scrubPII(exampleUrl);
278
+ }
279
+
280
+ // Response preview: null by default, populated with --preview
281
+ let responsePreview: unknown = null;
282
+ if (this.options.enablePreview) {
283
+ const preview = truncatePreview(exchange.response.body);
284
+ responsePreview = this.options.scrub && typeof preview === 'string'
285
+ ? scrubPII(preview)
286
+ : preview;
287
+ }
288
+
289
+ // Detect pagination patterns
290
+ const pagination = detectPagination(queryParams) ?? undefined;
291
+
292
+ // Process request body for POST/PUT/PATCH
293
+ let requestBody: RequestBody | undefined;
294
+ if (['POST', 'PUT', 'PATCH'].includes(method) && exchange.request.postData) {
295
+ const bodyContentType = exchange.request.headers['content-type'] ?? 'application/octet-stream';
296
+ const rawBody = exchange.request.postData;
297
+
298
+ if (bodyContentType.includes('json')) {
299
+ try {
300
+ const parsed = JSON.parse(rawBody);
301
+ const scrubbedTemplate = scrubBody(parsed, this.options.scrub) as Record<string, unknown>;
302
+
303
+ // For GraphQL, detect variables in the variables object specifically
304
+ let variables: string[];
305
+ if (graphqlInfo && graphqlInfo.variables) {
306
+ variables = detectGraphQLVariables(graphqlInfo.variables, 'variables');
307
+ } else {
308
+ variables = detectBodyVariables(parsed);
309
+ }
310
+
311
+ // Detect refreshable tokens (CSRF, nonces) for v0.8 auth refresh
312
+ const refreshable = detectRefreshableTokens(parsed);
313
+
314
+ requestBody = {
315
+ contentType: 'application/json',
316
+ template: scrubbedTemplate,
317
+ ...(variables.length > 0 ? { variables } : {}),
318
+ ...(refreshable.length > 0 ? { refreshableTokens: refreshable } : {}),
319
+ };
320
+ } catch {
321
+ // Invalid JSON - store as string
322
+ requestBody = {
323
+ contentType: bodyContentType,
324
+ template: this.options.scrub ? scrubPII(rawBody) : rawBody,
325
+ };
326
+ }
327
+ } else {
328
+ // Non-JSON body - store as string
329
+ requestBody = {
330
+ contentType: bodyContentType,
331
+ template: this.options.scrub ? scrubPII(rawBody) : rawBody,
332
+ };
333
+ }
334
+ }
335
+
336
+ // Generate endpoint ID - use GraphQL operation name if applicable
337
+ const endpointId = graphqlInfo
338
+ ? `${method.toLowerCase()}-graphql-${graphqlInfo.operationName}`
339
+ : generateEndpointId(method, paramPath);
340
+
341
+ const endpoint: SkillEndpoint = {
342
+ id: endpointId,
343
+ method: exchange.request.method,
344
+ path: paramPath,
345
+ queryParams,
346
+ headers: safeHeaders,
347
+ responseShape: detectResponseShape(exchange.response.body),
348
+ examples: {
349
+ request: {
350
+ url: exampleUrl,
351
+ headers: stripAuth(filterHeaders(exchange.request.headers)),
352
+ },
353
+ responsePreview,
354
+ },
355
+ ...(pagination ? { pagination } : {}),
356
+ ...(requestBody ? { requestBody } : {}),
357
+ };
358
+
359
+ // Also strip entropy-detected tokens from example headers
360
+ if (entropyDetected.size > 0) {
361
+ endpoint.examples.request.headers = stripAuth(
362
+ filterHeaders(exchange.request.headers),
363
+ entropyDetected
364
+ );
365
+ }
366
+
367
+ // Store response bytes on endpoint
368
+ endpoint.responseBytes = exchange.response.body.length;
369
+
370
+ this.endpoints.set(key, endpoint);
371
+
372
+ // Store first body for cross-request diffing
373
+ if (exchange.request.postData) {
374
+ this.exchangeBodies.set(key, [exchange.request.postData]);
375
+ }
376
+
377
+ return endpoint;
378
+ }
379
+
380
+ /** Record a filtered-out request (for metadata tracking). */
381
+ recordFiltered(): void {
382
+ this.filteredCount++;
383
+ }
384
+
385
+ /** Get auth credentials extracted during capture. */
386
+ getExtractedAuth(): StoredAuth[] {
387
+ return this.extractedAuthList;
388
+ }
389
+
390
+ /** Mark this domain as having captcha risk (detected during capture). */
391
+ setCaptchaRisk(detected: boolean): void {
392
+ this.captchaRisk = detected;
393
+ }
394
+
395
+ /** Get detected OAuth configuration (non-secret, for skill file). */
396
+ getOAuthConfig(): OAuthConfig | null {
397
+ return this.oauthConfig;
398
+ }
399
+
400
+ /** Get the client secret captured from OAuth traffic (for encrypted storage). */
401
+ getOAuthClientSecret(): string | undefined {
402
+ return this.oauthClientSecret;
403
+ }
404
+
405
+ /** Check if any endpoint has refreshable tokens. */
406
+ private hasRefreshableTokens(): boolean {
407
+ for (const endpoint of this.endpoints.values()) {
408
+ if (endpoint.requestBody?.refreshableTokens?.length) {
409
+ return true;
410
+ }
411
+ }
412
+ return false;
413
+ }
414
+
415
+ /** Get total network bytes seen during capture (all responses, before filtering). */
416
+ getTotalNetworkBytes(): number {
417
+ return this.totalNetworkBytes;
418
+ }
419
+
420
+ /** Add network bytes from a response that was filtered out (not added as exchange). */
421
+ addNetworkBytes(bytes: number): void {
422
+ this.totalNetworkBytes += bytes;
423
+ }
424
+
425
+ /** Generate the complete skill file for a domain. */
426
+ toSkillFile(domain: string, options?: { domBytes?: number; totalRequests?: number }): SkillFile {
427
+ // Apply cross-request diffing (Strategy 1) to endpoints with multiple bodies
428
+ for (const [key, bodies] of this.exchangeBodies) {
429
+ if (bodies.length < 2) continue;
430
+ const endpoint = this.endpoints.get(key);
431
+ if (!endpoint?.requestBody) continue;
432
+
433
+ const diffedVars = diffBodies(bodies);
434
+ if (diffedVars.length > 0) {
435
+ const existing = new Set(endpoint.requestBody.variables ?? []);
436
+ for (const v of diffedVars) existing.add(v);
437
+ endpoint.requestBody.variables = [...existing];
438
+ }
439
+ }
440
+
441
+ const skill: SkillFile = {
442
+ version: '1.2',
443
+ domain,
444
+ capturedAt: new Date().toISOString(),
445
+ baseUrl: this.baseUrl ?? `https://${domain}`,
446
+ endpoints: Array.from(this.endpoints.values()),
447
+ metadata: {
448
+ captureCount: this.captureCount,
449
+ filteredCount: this.filteredCount,
450
+ toolVersion: '1.0.0',
451
+ ...(options?.domBytes != null ? {
452
+ browserCost: {
453
+ domBytes: options.domBytes,
454
+ totalNetworkBytes: this.totalNetworkBytes,
455
+ totalRequests: options.totalRequests ?? this.captureCount + this.filteredCount,
456
+ },
457
+ } : {}),
458
+ },
459
+ provenance: 'unsigned' as const,
460
+ };
461
+
462
+ // Add auth config if captcha risk detected, refreshable tokens present, or OAuth detected
463
+ if (this.captchaRisk || this.hasRefreshableTokens() || this.oauthConfig) {
464
+ skill.auth = {
465
+ browserMode: this.captchaRisk ? 'visible' : 'headless',
466
+ captchaRisk: this.captchaRisk,
467
+ ...(this.oauthConfig ? { oauthConfig: this.oauthConfig } : {}),
468
+ };
469
+ }
470
+
471
+ return skill;
472
+ }
473
+ }