@apitap/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/LICENSE +60 -0
  2. package/README.md +362 -0
  3. package/SKILL.md +270 -0
  4. package/dist/auth/crypto.d.ts +31 -0
  5. package/dist/auth/crypto.js +66 -0
  6. package/dist/auth/crypto.js.map +1 -0
  7. package/dist/auth/handoff.d.ts +29 -0
  8. package/dist/auth/handoff.js +180 -0
  9. package/dist/auth/handoff.js.map +1 -0
  10. package/dist/auth/manager.d.ts +46 -0
  11. package/dist/auth/manager.js +127 -0
  12. package/dist/auth/manager.js.map +1 -0
  13. package/dist/auth/oauth-refresh.d.ts +16 -0
  14. package/dist/auth/oauth-refresh.js +91 -0
  15. package/dist/auth/oauth-refresh.js.map +1 -0
  16. package/dist/auth/refresh.d.ts +43 -0
  17. package/dist/auth/refresh.js +217 -0
  18. package/dist/auth/refresh.js.map +1 -0
  19. package/dist/capture/anti-bot.d.ts +15 -0
  20. package/dist/capture/anti-bot.js +43 -0
  21. package/dist/capture/anti-bot.js.map +1 -0
  22. package/dist/capture/blocklist.d.ts +6 -0
  23. package/dist/capture/blocklist.js +70 -0
  24. package/dist/capture/blocklist.js.map +1 -0
  25. package/dist/capture/body-diff.d.ts +8 -0
  26. package/dist/capture/body-diff.js +102 -0
  27. package/dist/capture/body-diff.js.map +1 -0
  28. package/dist/capture/body-variables.d.ts +13 -0
  29. package/dist/capture/body-variables.js +142 -0
  30. package/dist/capture/body-variables.js.map +1 -0
  31. package/dist/capture/domain.d.ts +8 -0
  32. package/dist/capture/domain.js +34 -0
  33. package/dist/capture/domain.js.map +1 -0
  34. package/dist/capture/entropy.d.ts +33 -0
  35. package/dist/capture/entropy.js +100 -0
  36. package/dist/capture/entropy.js.map +1 -0
  37. package/dist/capture/filter.d.ts +11 -0
  38. package/dist/capture/filter.js +49 -0
  39. package/dist/capture/filter.js.map +1 -0
  40. package/dist/capture/graphql.d.ts +21 -0
  41. package/dist/capture/graphql.js +99 -0
  42. package/dist/capture/graphql.js.map +1 -0
  43. package/dist/capture/idle.d.ts +23 -0
  44. package/dist/capture/idle.js +44 -0
  45. package/dist/capture/idle.js.map +1 -0
  46. package/dist/capture/monitor.d.ts +26 -0
  47. package/dist/capture/monitor.js +183 -0
  48. package/dist/capture/monitor.js.map +1 -0
  49. package/dist/capture/oauth-detector.d.ts +18 -0
  50. package/dist/capture/oauth-detector.js +96 -0
  51. package/dist/capture/oauth-detector.js.map +1 -0
  52. package/dist/capture/pagination.d.ts +9 -0
  53. package/dist/capture/pagination.js +40 -0
  54. package/dist/capture/pagination.js.map +1 -0
  55. package/dist/capture/parameterize.d.ts +17 -0
  56. package/dist/capture/parameterize.js +63 -0
  57. package/dist/capture/parameterize.js.map +1 -0
  58. package/dist/capture/scrubber.d.ts +5 -0
  59. package/dist/capture/scrubber.js +38 -0
  60. package/dist/capture/scrubber.js.map +1 -0
  61. package/dist/capture/session.d.ts +46 -0
  62. package/dist/capture/session.js +445 -0
  63. package/dist/capture/session.js.map +1 -0
  64. package/dist/capture/token-detector.d.ts +16 -0
  65. package/dist/capture/token-detector.js +62 -0
  66. package/dist/capture/token-detector.js.map +1 -0
  67. package/dist/capture/verifier.d.ts +17 -0
  68. package/dist/capture/verifier.js +147 -0
  69. package/dist/capture/verifier.js.map +1 -0
  70. package/dist/cli.d.ts +2 -0
  71. package/dist/cli.js +930 -0
  72. package/dist/cli.js.map +1 -0
  73. package/dist/discovery/auth.d.ts +17 -0
  74. package/dist/discovery/auth.js +81 -0
  75. package/dist/discovery/auth.js.map +1 -0
  76. package/dist/discovery/fetch.d.ts +17 -0
  77. package/dist/discovery/fetch.js +59 -0
  78. package/dist/discovery/fetch.js.map +1 -0
  79. package/dist/discovery/frameworks.d.ts +11 -0
  80. package/dist/discovery/frameworks.js +249 -0
  81. package/dist/discovery/frameworks.js.map +1 -0
  82. package/dist/discovery/index.d.ts +21 -0
  83. package/dist/discovery/index.js +219 -0
  84. package/dist/discovery/index.js.map +1 -0
  85. package/dist/discovery/openapi.d.ts +13 -0
  86. package/dist/discovery/openapi.js +175 -0
  87. package/dist/discovery/openapi.js.map +1 -0
  88. package/dist/discovery/probes.d.ts +9 -0
  89. package/dist/discovery/probes.js +70 -0
  90. package/dist/discovery/probes.js.map +1 -0
  91. package/dist/index.d.ts +25 -0
  92. package/dist/index.js +25 -0
  93. package/dist/index.js.map +1 -0
  94. package/dist/inspect/report.d.ts +52 -0
  95. package/dist/inspect/report.js +191 -0
  96. package/dist/inspect/report.js.map +1 -0
  97. package/dist/mcp.d.ts +8 -0
  98. package/dist/mcp.js +526 -0
  99. package/dist/mcp.js.map +1 -0
  100. package/dist/orchestration/browse.d.ts +38 -0
  101. package/dist/orchestration/browse.js +198 -0
  102. package/dist/orchestration/browse.js.map +1 -0
  103. package/dist/orchestration/cache.d.ts +15 -0
  104. package/dist/orchestration/cache.js +24 -0
  105. package/dist/orchestration/cache.js.map +1 -0
  106. package/dist/plugin.d.ts +17 -0
  107. package/dist/plugin.js +158 -0
  108. package/dist/plugin.js.map +1 -0
  109. package/dist/read/decoders/deepwiki.d.ts +2 -0
  110. package/dist/read/decoders/deepwiki.js +148 -0
  111. package/dist/read/decoders/deepwiki.js.map +1 -0
  112. package/dist/read/decoders/grokipedia.d.ts +2 -0
  113. package/dist/read/decoders/grokipedia.js +210 -0
  114. package/dist/read/decoders/grokipedia.js.map +1 -0
  115. package/dist/read/decoders/hackernews.d.ts +2 -0
  116. package/dist/read/decoders/hackernews.js +168 -0
  117. package/dist/read/decoders/hackernews.js.map +1 -0
  118. package/dist/read/decoders/index.d.ts +2 -0
  119. package/dist/read/decoders/index.js +12 -0
  120. package/dist/read/decoders/index.js.map +1 -0
  121. package/dist/read/decoders/reddit.d.ts +2 -0
  122. package/dist/read/decoders/reddit.js +142 -0
  123. package/dist/read/decoders/reddit.js.map +1 -0
  124. package/dist/read/decoders/twitter.d.ts +12 -0
  125. package/dist/read/decoders/twitter.js +187 -0
  126. package/dist/read/decoders/twitter.js.map +1 -0
  127. package/dist/read/decoders/wikipedia.d.ts +2 -0
  128. package/dist/read/decoders/wikipedia.js +66 -0
  129. package/dist/read/decoders/wikipedia.js.map +1 -0
  130. package/dist/read/decoders/youtube.d.ts +2 -0
  131. package/dist/read/decoders/youtube.js +69 -0
  132. package/dist/read/decoders/youtube.js.map +1 -0
  133. package/dist/read/extract.d.ts +25 -0
  134. package/dist/read/extract.js +320 -0
  135. package/dist/read/extract.js.map +1 -0
  136. package/dist/read/index.d.ts +14 -0
  137. package/dist/read/index.js +66 -0
  138. package/dist/read/index.js.map +1 -0
  139. package/dist/read/peek.d.ts +9 -0
  140. package/dist/read/peek.js +137 -0
  141. package/dist/read/peek.js.map +1 -0
  142. package/dist/read/types.d.ts +44 -0
  143. package/dist/read/types.js +3 -0
  144. package/dist/read/types.js.map +1 -0
  145. package/dist/replay/engine.d.ts +53 -0
  146. package/dist/replay/engine.js +441 -0
  147. package/dist/replay/engine.js.map +1 -0
  148. package/dist/replay/truncate.d.ts +16 -0
  149. package/dist/replay/truncate.js +92 -0
  150. package/dist/replay/truncate.js.map +1 -0
  151. package/dist/serve.d.ts +31 -0
  152. package/dist/serve.js +149 -0
  153. package/dist/serve.js.map +1 -0
  154. package/dist/skill/generator.d.ts +44 -0
  155. package/dist/skill/generator.js +419 -0
  156. package/dist/skill/generator.js.map +1 -0
  157. package/dist/skill/importer.d.ts +26 -0
  158. package/dist/skill/importer.js +80 -0
  159. package/dist/skill/importer.js.map +1 -0
  160. package/dist/skill/search.d.ts +19 -0
  161. package/dist/skill/search.js +51 -0
  162. package/dist/skill/search.js.map +1 -0
  163. package/dist/skill/signing.d.ts +16 -0
  164. package/dist/skill/signing.js +34 -0
  165. package/dist/skill/signing.js.map +1 -0
  166. package/dist/skill/ssrf.d.ts +27 -0
  167. package/dist/skill/ssrf.js +210 -0
  168. package/dist/skill/ssrf.js.map +1 -0
  169. package/dist/skill/store.d.ts +7 -0
  170. package/dist/skill/store.js +93 -0
  171. package/dist/skill/store.js.map +1 -0
  172. package/dist/stats/report.d.ts +26 -0
  173. package/dist/stats/report.js +157 -0
  174. package/dist/stats/report.js.map +1 -0
  175. package/dist/types.d.ts +214 -0
  176. package/dist/types.js +3 -0
  177. package/dist/types.js.map +1 -0
  178. package/package.json +58 -0
  179. package/src/auth/crypto.ts +92 -0
  180. package/src/auth/handoff.ts +229 -0
  181. package/src/auth/manager.ts +140 -0
  182. package/src/auth/oauth-refresh.ts +120 -0
  183. package/src/auth/refresh.ts +300 -0
  184. package/src/capture/anti-bot.ts +63 -0
  185. package/src/capture/blocklist.ts +75 -0
  186. package/src/capture/body-diff.ts +109 -0
  187. package/src/capture/body-variables.ts +156 -0
  188. package/src/capture/domain.ts +34 -0
  189. package/src/capture/entropy.ts +121 -0
  190. package/src/capture/filter.ts +56 -0
  191. package/src/capture/graphql.ts +124 -0
  192. package/src/capture/idle.ts +45 -0
  193. package/src/capture/monitor.ts +224 -0
  194. package/src/capture/oauth-detector.ts +106 -0
  195. package/src/capture/pagination.ts +49 -0
  196. package/src/capture/parameterize.ts +68 -0
  197. package/src/capture/scrubber.ts +49 -0
  198. package/src/capture/session.ts +502 -0
  199. package/src/capture/token-detector.ts +76 -0
  200. package/src/capture/verifier.ts +171 -0
  201. package/src/cli.ts +1031 -0
  202. package/src/discovery/auth.ts +99 -0
  203. package/src/discovery/fetch.ts +85 -0
  204. package/src/discovery/frameworks.ts +231 -0
  205. package/src/discovery/index.ts +256 -0
  206. package/src/discovery/openapi.ts +230 -0
  207. package/src/discovery/probes.ts +76 -0
  208. package/src/index.ts +26 -0
  209. package/src/inspect/report.ts +247 -0
  210. package/src/mcp.ts +618 -0
  211. package/src/orchestration/browse.ts +250 -0
  212. package/src/orchestration/cache.ts +37 -0
  213. package/src/plugin.ts +188 -0
  214. package/src/read/decoders/deepwiki.ts +180 -0
  215. package/src/read/decoders/grokipedia.ts +246 -0
  216. package/src/read/decoders/hackernews.ts +198 -0
  217. package/src/read/decoders/index.ts +15 -0
  218. package/src/read/decoders/reddit.ts +158 -0
  219. package/src/read/decoders/twitter.ts +211 -0
  220. package/src/read/decoders/wikipedia.ts +75 -0
  221. package/src/read/decoders/youtube.ts +75 -0
  222. package/src/read/extract.ts +396 -0
  223. package/src/read/index.ts +78 -0
  224. package/src/read/peek.ts +175 -0
  225. package/src/read/types.ts +37 -0
  226. package/src/replay/engine.ts +559 -0
  227. package/src/replay/truncate.ts +116 -0
  228. package/src/serve.ts +189 -0
  229. package/src/skill/generator.ts +473 -0
  230. package/src/skill/importer.ts +107 -0
  231. package/src/skill/search.ts +76 -0
  232. package/src/skill/signing.ts +36 -0
  233. package/src/skill/ssrf.ts +238 -0
  234. package/src/skill/store.ts +107 -0
  235. package/src/stats/report.ts +208 -0
  236. package/src/types.ts +233 -0
@@ -0,0 +1,34 @@
1
+ // src/capture/domain.ts
2
+
3
+ /**
4
+ * Check if a hostname matches the target domain.
5
+ * Uses dot-prefix matching to prevent evil-example.com matching example.com.
6
+ *
7
+ * @param hostname - The hostname to check (e.g. "api.example.com")
8
+ * @param target - The target domain or URL (e.g. "example.com" or "https://example.com/path")
9
+ */
10
+ export function isDomainMatch(hostname: string, target: string): boolean {
11
+ // Extract hostname from URL if target looks like a URL
12
+ let targetHost: string;
13
+ try {
14
+ if (target.includes('://')) {
15
+ targetHost = new URL(target).hostname;
16
+ } else {
17
+ targetHost = target;
18
+ }
19
+ } catch {
20
+ targetHost = target;
21
+ }
22
+
23
+ // Strip www. prefix from target for broader matching
24
+ if (targetHost.startsWith('www.')) {
25
+ targetHost = targetHost.slice(4);
26
+ }
27
+
28
+ // Exact match
29
+ if (hostname === targetHost) return true;
30
+
31
+ // Dot-prefix suffix match: hostname must end with ".targetHost"
32
+ // This prevents evil-example.com from matching example.com
33
+ return hostname.endsWith('.' + targetHost);
34
+ }
@@ -0,0 +1,121 @@
1
+ // src/capture/entropy.ts
2
+
3
+ const MIN_TOKEN_LENGTH = 16;
4
+ const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
5
+
6
+ export interface TokenClassification {
7
+ isToken: boolean;
8
+ confidence: 'high' | 'medium';
9
+ format: 'jwt' | 'opaque';
10
+ jwtClaims?: JwtClaims;
11
+ }
12
+
13
+ export interface JwtClaims {
14
+ exp?: number;
15
+ iat?: number;
16
+ iss?: string;
17
+ aud?: string;
18
+ scope?: string;
19
+ }
20
+
21
+ /**
22
+ * Calculate Shannon entropy (bits per character) of a string.
23
+ * Higher values indicate more randomness.
24
+ */
25
+ export function shannonEntropy(value: string): number {
26
+ if (value.length === 0) return 0;
27
+
28
+ const freq = new Map<string, number>();
29
+ for (const ch of value) {
30
+ freq.set(ch, (freq.get(ch) ?? 0) + 1);
31
+ }
32
+
33
+ let entropy = 0;
34
+ const len = value.length;
35
+ for (const count of freq.values()) {
36
+ const p = count / len;
37
+ entropy -= p * Math.log2(p);
38
+ }
39
+
40
+ return entropy;
41
+ }
42
+
43
+ /**
44
+ * Parse JWT claims from a token string.
45
+ * Returns null if not a valid JWT structure.
46
+ */
47
+ export function parseJwtClaims(token: string): JwtClaims | null {
48
+ // JWT: starts with eyJ, has exactly 2 dots
49
+ if (!token.startsWith('eyJ')) return null;
50
+ const parts = token.split('.');
51
+ if (parts.length !== 3) return null;
52
+
53
+ try {
54
+ // Decode payload (second part), base64url → JSON
55
+ const payload = parts[1]!;
56
+ const padded = payload.replace(/-/g, '+').replace(/_/g, '/');
57
+ const json = Buffer.from(padded, 'base64').toString('utf-8');
58
+ const claims = JSON.parse(json);
59
+
60
+ if (typeof claims !== 'object' || claims === null) return null;
61
+
62
+ const result: JwtClaims = {};
63
+ if (typeof claims.exp === 'number') result.exp = claims.exp;
64
+ if (typeof claims.iat === 'number') result.iat = claims.iat;
65
+ if (typeof claims.iss === 'string') result.iss = claims.iss;
66
+ if (typeof claims.aud === 'string') result.aud = claims.aud;
67
+ if (typeof claims.scope === 'string') result.scope = claims.scope;
68
+
69
+ return result;
70
+ } catch {
71
+ return null;
72
+ }
73
+ }
74
+
75
+ /**
76
+ * Classify whether a header/cookie value is likely an auth token.
77
+ *
78
+ * Detection hierarchy:
79
+ * 1. JWT (eyJ prefix, 2 dots) → decode and classify with rich metadata
80
+ * 2. UUID → skip (entity ID, not token)
81
+ * 3. Short values (<16 chars) → skip
82
+ * 4. High-entropy opaque string → classify by entropy threshold
83
+ */
84
+ export function isLikelyToken(name: string, value: string): TokenClassification {
85
+ // Strip "Bearer " prefix for analysis
86
+ const raw = value.startsWith('Bearer ') ? value.slice(7) : value;
87
+
88
+ // JWT detection — takes priority
89
+ const jwtClaims = parseJwtClaims(raw);
90
+ if (jwtClaims) {
91
+ return {
92
+ isToken: true,
93
+ confidence: 'high',
94
+ format: 'jwt',
95
+ jwtClaims,
96
+ };
97
+ }
98
+
99
+ // UUID exclusion — almost always entity IDs, not tokens
100
+ if (UUID_PATTERN.test(raw)) {
101
+ return { isToken: false, confidence: 'medium', format: 'opaque' };
102
+ }
103
+
104
+ // Minimum length gate
105
+ if (raw.length < MIN_TOKEN_LENGTH) {
106
+ return { isToken: false, confidence: 'medium', format: 'opaque' };
107
+ }
108
+
109
+ // Entropy-based classification
110
+ const entropy = shannonEntropy(raw);
111
+
112
+ if (entropy >= 4.5) {
113
+ return { isToken: true, confidence: 'high', format: 'opaque' };
114
+ }
115
+ if (entropy >= 3.5) {
116
+ return { isToken: true, confidence: 'medium', format: 'opaque' };
117
+ }
118
+
119
+ // Below threshold — not a token
120
+ return { isToken: false, confidence: 'medium', format: 'opaque' };
121
+ }
@@ -0,0 +1,56 @@
1
+ // src/capture/filter.ts
2
+ import { isBlocklisted } from './blocklist.js';
3
+
4
+ export interface FilterableResponse {
5
+ url: string;
6
+ status: number;
7
+ contentType: string;
8
+ }
9
+
10
+ const JSON_CONTENT_TYPES = [
11
+ 'application/json',
12
+ 'application/vnd.api+json',
13
+ 'text/json',
14
+ ];
15
+
16
+ /** Exact path matches that are telemetry/framework noise */
17
+ const NOISE_PATHS = new Set([
18
+ '/monitoring',
19
+ '/telemetry',
20
+ '/track',
21
+ '/manifest.json',
22
+ ]);
23
+
24
+ /**
25
+ * Check if a URL path is framework or telemetry noise.
26
+ * Exported for testing.
27
+ */
28
+ export function isPathNoise(pathname: string): boolean {
29
+ // Exact match noise paths
30
+ if (NOISE_PATHS.has(pathname)) return true;
31
+
32
+ // Next.js static build assets (not data routes)
33
+ if (pathname.startsWith('/_next/static/')) return true;
34
+
35
+ return false;
36
+ }
37
+
38
+ export function shouldCapture(response: FilterableResponse): boolean {
39
+ // Only keep 2xx success responses
40
+ if (response.status < 200 || response.status >= 300) return false;
41
+
42
+ // Content-type must indicate JSON
43
+ const ct = response.contentType.toLowerCase().split(';')[0].trim();
44
+ if (!JSON_CONTENT_TYPES.some(t => ct === t)) return false;
45
+
46
+ // Check domain and path
47
+ try {
48
+ const url = new URL(response.url);
49
+ if (isBlocklisted(url.hostname)) return false;
50
+ if (isPathNoise(url.pathname)) return false;
51
+ } catch {
52
+ return false;
53
+ }
54
+
55
+ return true;
56
+ }
@@ -0,0 +1,124 @@
1
+ // src/capture/graphql.ts
2
+
3
+ export interface GraphQLParsed {
4
+ operationName: string | null;
5
+ query: string;
6
+ variables: Record<string, unknown> | null;
7
+ }
8
+
9
+ /**
10
+ * Detect if a request is to a GraphQL endpoint.
11
+ */
12
+ export function isGraphQLEndpoint(
13
+ path: string,
14
+ contentType: string,
15
+ body: string | null,
16
+ ): boolean {
17
+ // Path contains /graphql
18
+ if (path.includes('/graphql')) {
19
+ return true;
20
+ }
21
+
22
+ // Content-Type is application/graphql
23
+ if (contentType.includes('application/graphql')) {
24
+ return true;
25
+ }
26
+
27
+ // Body contains a "query" field (GraphQL-style)
28
+ if (body) {
29
+ try {
30
+ const parsed = JSON.parse(body);
31
+ if (typeof parsed.query === 'string') {
32
+ return true;
33
+ }
34
+ } catch {
35
+ // Not JSON
36
+ }
37
+ }
38
+
39
+ return false;
40
+ }
41
+
42
+ /**
43
+ * Parse a GraphQL request body.
44
+ */
45
+ export function parseGraphQLBody(body: string): GraphQLParsed | null {
46
+ try {
47
+ const parsed = JSON.parse(body);
48
+ if (typeof parsed.query !== 'string') {
49
+ return null;
50
+ }
51
+ return {
52
+ operationName: parsed.operationName ?? null,
53
+ query: parsed.query,
54
+ variables: parsed.variables ?? null,
55
+ };
56
+ } catch {
57
+ return null;
58
+ }
59
+ }
60
+
61
+ /**
62
+ * Extract operation name from query string or explicit operationName.
63
+ */
64
+ export function extractOperationName(
65
+ query: string,
66
+ explicitName: string | null,
67
+ ): string {
68
+ if (explicitName) {
69
+ return explicitName;
70
+ }
71
+
72
+ // Match "query Name" or "mutation Name" at start
73
+ const match = query.match(/^\s*(query|mutation|subscription)\s+(\w+)/);
74
+ if (match) {
75
+ return match[2];
76
+ }
77
+
78
+ return 'Anonymous';
79
+ }
80
+
81
+ /**
82
+ * Detect which variables are likely dynamic (IDs, cursors, pagination).
83
+ */
84
+ export function detectGraphQLVariables(
85
+ variables: Record<string, unknown> | null,
86
+ prefix = '',
87
+ ): string[] {
88
+ if (!variables || typeof variables !== 'object') {
89
+ return [];
90
+ }
91
+
92
+ const detected: string[] = [];
93
+
94
+ for (const [key, value] of Object.entries(variables)) {
95
+ const path = prefix ? `${prefix}.${key}` : key;
96
+
97
+ if (typeof value === 'number') {
98
+ // Numbers are often IDs or pagination values
99
+ detected.push(path);
100
+ } else if (typeof value === 'string') {
101
+ // Cursor-like strings (base64, long alphanumeric)
102
+ if (isLikelyCursor(value)) {
103
+ detected.push(path);
104
+ }
105
+ } else if (value && typeof value === 'object' && !Array.isArray(value)) {
106
+ // Recurse into nested objects
107
+ detected.push(...detectGraphQLVariables(value as Record<string, unknown>, path));
108
+ }
109
+ }
110
+
111
+ return detected;
112
+ }
113
+
114
+ function isLikelyCursor(value: string): boolean {
115
+ // Base64-ish: long alphanumeric, possibly with = padding
116
+ if (value.length > 10 && /^[a-zA-Z0-9+/=_-]+$/.test(value)) {
117
+ return true;
118
+ }
119
+ // UUID-like
120
+ if (/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(value)) {
121
+ return true;
122
+ }
123
+ return false;
124
+ }
@@ -0,0 +1,45 @@
1
+ // src/capture/idle.ts
2
+
3
+ /**
4
+ * Tracks unique endpoint discoveries and detects idle periods.
5
+ * Used during interactive capture to nudge the user when no new
6
+ * endpoints have been found for a while.
7
+ */
8
+ export class IdleTracker {
9
+ private seen = new Set<string>();
10
+ private lastNewTime: number;
11
+ private thresholdMs: number;
12
+ private fired = false;
13
+ private now: () => number;
14
+
15
+ constructor(thresholdMs = 15000, now: () => number = Date.now) {
16
+ this.thresholdMs = thresholdMs;
17
+ this.now = now;
18
+ this.lastNewTime = this.now();
19
+ }
20
+
21
+ /**
22
+ * Record an endpoint key (e.g. "GET /api/items").
23
+ * Returns true if it's genuinely new (not seen before).
24
+ */
25
+ recordEndpoint(key: string): boolean {
26
+ if (this.seen.has(key)) return false;
27
+ this.seen.add(key);
28
+ this.lastNewTime = this.now();
29
+ this.fired = false;
30
+ return true;
31
+ }
32
+
33
+ /**
34
+ * Check if the idle threshold has been exceeded.
35
+ * Returns true exactly once per idle period (until reset by a new endpoint).
36
+ */
37
+ checkIdle(): boolean {
38
+ if (this.fired) return false;
39
+ if (this.now() - this.lastNewTime >= this.thresholdMs) {
40
+ this.fired = true;
41
+ return true;
42
+ }
43
+ return false;
44
+ }
45
+ }
@@ -0,0 +1,224 @@
1
+ // src/capture/monitor.ts
2
+ import { chromium, type Browser, type Page } from 'playwright';
3
+ import { shouldCapture } from './filter.js';
4
+ import { isDomainMatch } from './domain.js';
5
+ import { SkillGenerator, type GeneratorOptions } from '../skill/generator.js';
6
+ import { IdleTracker } from './idle.js';
7
+ import { detectCaptcha } from '../auth/refresh.js';
8
+ import type { CapturedExchange } from '../types.js';
9
+
10
+ export interface CaptureOptions {
11
+ url: string;
12
+ port?: number;
13
+ launch?: boolean;
14
+ attach?: boolean;
15
+ headless?: boolean; // default: false (interactive capture shows browser)
16
+ duration?: number;
17
+ allDomains?: boolean;
18
+ enablePreview?: boolean;
19
+ scrub?: boolean;
20
+ onEndpoint?: (endpoint: { id: string; method: string; path: string }) => void;
21
+ onFiltered?: () => void;
22
+ onIdle?: () => void;
23
+ }
24
+
25
+ export interface CaptureResult {
26
+ generators: Map<string, SkillGenerator>;
27
+ totalRequests: number;
28
+ filteredRequests: number;
29
+ domBytes?: number; // v1.0: measured DOM size for browser cost comparison
30
+ }
31
+
32
+ const DEFAULT_CDP_PORTS = [18792, 18800, 9222];
33
+
34
+ async function connectToBrowser(options: CaptureOptions): Promise<{ browser: Browser; launched: boolean }> {
35
+ if (!options.launch) {
36
+ const ports = options.port ? [options.port] : DEFAULT_CDP_PORTS;
37
+ for (const port of ports) {
38
+ try {
39
+ const browser = await chromium.connectOverCDP(`http://localhost:${port}`, { timeout: 3000 });
40
+ return { browser, launched: false };
41
+ } catch {
42
+ continue;
43
+ }
44
+ }
45
+ }
46
+
47
+ if (options.attach) {
48
+ const ports = options.port ? [options.port] : DEFAULT_CDP_PORTS;
49
+ throw new Error(`No browser found on CDP ports: ${ports.join(', ')}. Is a Chromium browser running with remote debugging?`);
50
+ }
51
+
52
+ const browser = await chromium.launch({ headless: options.headless ?? (process.env.DISPLAY ? false : true) });
53
+ return { browser, launched: true };
54
+ }
55
+
56
+ export async function capture(options: CaptureOptions): Promise<CaptureResult> {
57
+ const { browser, launched } = await connectToBrowser(options);
58
+ const generators = new Map<string, SkillGenerator>();
59
+ let totalRequests = 0;
60
+ let filteredRequests = 0;
61
+ const captchaDetectedDomains = new Set<string>();
62
+
63
+ // Extract target domain for domain-only filtering
64
+ const targetUrl = options.url;
65
+
66
+ const generatorOptions: GeneratorOptions = {
67
+ enablePreview: options.enablePreview ?? false,
68
+ scrub: options.scrub ?? true,
69
+ };
70
+
71
+ // Idle tracking: only active during interactive capture (no --duration)
72
+ const idleTracker = !options.duration ? new IdleTracker() : null;
73
+ let idleInterval: ReturnType<typeof setInterval> | null = null;
74
+
75
+ let page: Page;
76
+ if (launched) {
77
+ const context = await browser.newContext();
78
+ page = await context.newPage();
79
+ } else {
80
+ const contexts = browser.contexts();
81
+ if (contexts.length > 0 && contexts[0].pages().length > 0) {
82
+ page = contexts[0].pages()[0];
83
+ } else {
84
+ const context = contexts[0] ?? await browser.newContext();
85
+ page = await context.newPage();
86
+ }
87
+ }
88
+
89
+ page.on('response', async (response) => {
90
+ totalRequests++;
91
+
92
+ const url = response.url();
93
+ const status = response.status();
94
+ const contentType = response.headers()['content-type'] ?? '';
95
+
96
+ // Domain-only filtering (before any other processing)
97
+ if (!options.allDomains) {
98
+ const hostname = safeHostname(url);
99
+ if (hostname && !isDomainMatch(hostname, targetUrl)) {
100
+ filteredRequests++;
101
+ options.onFiltered?.();
102
+ return;
103
+ }
104
+ }
105
+
106
+ if (!shouldCapture({ url, status, contentType })) {
107
+ filteredRequests++;
108
+ const hostname = safeHostname(url);
109
+ if (hostname) {
110
+ const gen = generators.get(hostname);
111
+ if (gen) gen.recordFiltered();
112
+ }
113
+ // Track network bytes from headers for filtered responses (browser cost measurement)
114
+ const contentLength = parseInt(response.headers()['content-length'] ?? '0', 10);
115
+ if (contentLength > 0) {
116
+ const filteredHostname = safeHostname(url);
117
+ if (filteredHostname && generators.has(filteredHostname)) {
118
+ generators.get(filteredHostname)!.addNetworkBytes(contentLength);
119
+ }
120
+ }
121
+ options.onFiltered?.();
122
+ return;
123
+ }
124
+
125
+ try {
126
+ const body = await response.text();
127
+ const hostname = new URL(url).hostname;
128
+
129
+ // Check for captcha in HTML responses (v0.8 captcha risk detection)
130
+ if (contentType.includes('text/html') && detectCaptcha(body)) {
131
+ captchaDetectedDomains.add(hostname);
132
+ }
133
+
134
+ if (!generators.has(hostname)) {
135
+ generators.set(hostname, new SkillGenerator(generatorOptions));
136
+ }
137
+ const gen = generators.get(hostname)!;
138
+
139
+ const exchange: CapturedExchange = {
140
+ request: {
141
+ url,
142
+ method: response.request().method(),
143
+ headers: response.request().headers(),
144
+ postData: response.request().postData() ?? undefined,
145
+ },
146
+ response: {
147
+ status,
148
+ headers: response.headers(),
149
+ body,
150
+ contentType,
151
+ },
152
+ timestamp: new Date().toISOString(),
153
+ };
154
+
155
+ const endpoint = gen.addExchange(exchange);
156
+ if (endpoint) {
157
+ options.onEndpoint?.({ id: endpoint.id, method: endpoint.method, path: endpoint.path });
158
+
159
+ // Track for idle detection using parameterized key
160
+ if (idleTracker) {
161
+ const paramKey = `${endpoint.method} ${endpoint.path}`;
162
+ idleTracker.recordEndpoint(paramKey);
163
+ }
164
+ }
165
+ } catch {
166
+ // Response body may not be available (e.g. redirects); skip silently
167
+ }
168
+ });
169
+
170
+ await page.goto(options.url, { waitUntil: 'domcontentloaded' });
171
+
172
+ // Start idle check interval (every 5s) for interactive capture
173
+ if (idleTracker && options.onIdle) {
174
+ idleInterval = setInterval(() => {
175
+ if (idleTracker.checkIdle()) {
176
+ options.onIdle!();
177
+ }
178
+ }, 5000);
179
+ }
180
+
181
+ // Wait for duration or until interrupted
182
+ // SIGINT always resolves gracefully so skill files get written
183
+ if (options.duration) {
184
+ await new Promise<void>(resolve => {
185
+ const timer = setTimeout(resolve, options.duration! * 1000);
186
+ process.once('SIGINT', () => { clearTimeout(timer); resolve(); });
187
+ });
188
+ } else {
189
+ await new Promise<void>(resolve => {
190
+ process.once('SIGINT', resolve);
191
+ });
192
+ }
193
+
194
+ // Clean up idle interval
195
+ if (idleInterval) clearInterval(idleInterval);
196
+
197
+ // Measure DOM size for browser cost comparison (v1.0)
198
+ let domBytes: number | undefined;
199
+ try {
200
+ const html = await page.content();
201
+ domBytes = html.length;
202
+ } catch { /* page may have navigated away */ }
203
+
204
+ if (launched) {
205
+ await browser.close();
206
+ }
207
+
208
+ // Mark generators for domains where captcha was detected
209
+ for (const [hostname, gen] of generators) {
210
+ if (captchaDetectedDomains.has(hostname)) {
211
+ gen.setCaptchaRisk(true);
212
+ }
213
+ }
214
+
215
+ return { generators, totalRequests, filteredRequests, domBytes };
216
+ }
217
+
218
+ function safeHostname(url: string): string | null {
219
+ try {
220
+ return new URL(url).hostname;
221
+ } catch {
222
+ return null;
223
+ }
224
+ }