@apitap/core 1.5.3 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +28 -8
  2. package/dist/auth/handoff.js +1 -1
  3. package/dist/auth/handoff.js.map +1 -1
  4. package/dist/capture/cdp-attach.d.ts +60 -0
  5. package/dist/capture/cdp-attach.js +422 -0
  6. package/dist/capture/cdp-attach.js.map +1 -0
  7. package/dist/capture/filter.js +6 -0
  8. package/dist/capture/filter.js.map +1 -1
  9. package/dist/capture/parameterize.d.ts +7 -6
  10. package/dist/capture/parameterize.js +204 -12
  11. package/dist/capture/parameterize.js.map +1 -1
  12. package/dist/capture/session.js +20 -10
  13. package/dist/capture/session.js.map +1 -1
  14. package/dist/cli.js +387 -20
  15. package/dist/cli.js.map +1 -1
  16. package/dist/discovery/openapi.js +23 -50
  17. package/dist/discovery/openapi.js.map +1 -1
  18. package/dist/index.d.ts +1 -0
  19. package/dist/index.js +1 -0
  20. package/dist/index.js.map +1 -1
  21. package/dist/mcp.js +12 -0
  22. package/dist/mcp.js.map +1 -1
  23. package/dist/native-host.js +5 -0
  24. package/dist/native-host.js.map +1 -1
  25. package/dist/plugin.js +10 -3
  26. package/dist/plugin.js.map +1 -1
  27. package/dist/replay/engine.d.ts +13 -0
  28. package/dist/replay/engine.js +20 -0
  29. package/dist/replay/engine.js.map +1 -1
  30. package/dist/skill/apis-guru.d.ts +35 -0
  31. package/dist/skill/apis-guru.js +128 -0
  32. package/dist/skill/apis-guru.js.map +1 -0
  33. package/dist/skill/generator.d.ts +7 -1
  34. package/dist/skill/generator.js +35 -3
  35. package/dist/skill/generator.js.map +1 -1
  36. package/dist/skill/merge.d.ts +29 -0
  37. package/dist/skill/merge.js +252 -0
  38. package/dist/skill/merge.js.map +1 -0
  39. package/dist/skill/openapi-converter.d.ts +31 -0
  40. package/dist/skill/openapi-converter.js +383 -0
  41. package/dist/skill/openapi-converter.js.map +1 -0
  42. package/dist/types.d.ts +41 -0
  43. package/package.json +1 -1
  44. package/src/auth/handoff.ts +1 -1
  45. package/src/capture/cdp-attach.ts +501 -0
  46. package/src/capture/filter.ts +5 -0
  47. package/src/capture/parameterize.ts +207 -11
  48. package/src/capture/session.ts +20 -10
  49. package/src/cli.ts +420 -18
  50. package/src/discovery/openapi.ts +25 -56
  51. package/src/index.ts +1 -0
  52. package/src/mcp.ts +12 -0
  53. package/src/native-host.ts +7 -0
  54. package/src/plugin.ts +10 -3
  55. package/src/replay/engine.ts +19 -0
  56. package/src/skill/apis-guru.ts +163 -0
  57. package/src/skill/generator.ts +38 -3
  58. package/src/skill/merge.ts +281 -0
  59. package/src/skill/openapi-converter.ts +426 -0
  60. package/src/types.ts +42 -1
@@ -3,10 +3,129 @@
3
3
  const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
4
4
  const PURE_NUMERIC_RE = /^\d+$/;
5
5
  const LONG_DIGITS_RE = /\d{8,}/;
6
+ const LONG_HEX_RE = /^[0-9a-f]{16,}$/i;
6
7
  const NEXT_DATA_PREFIX_RE = /^\/_next\/data\/[^/]+\//;
7
8
 
8
9
  /**
9
- * Check if a path segment is a dynamic value that should be parameterized.
10
+ * Resource nouns: segments that name a collection of resources.
11
+ * The value is the param name(s) for the slot(s) that follow.
12
+ * e.g. "repos" expects two dynamic segments: :owner/:repo
13
+ */
14
+ const RESOURCE_NOUNS = new Map<string, string[]>([
15
+ // Code hosting / generic REST
16
+ ['repos', [':owner', ':repo']],
17
+ ['users', [':username']],
18
+ ['orgs', [':org']],
19
+ ['organizations', [':org']],
20
+ ['teams', [':team']],
21
+ ['members', [':member']],
22
+ ['projects', [':project']],
23
+ ['gists', [':gist_id']],
24
+ ['issues', [':issue_number']],
25
+ ['pulls', [':pull_number']],
26
+ ['commits', [':sha']],
27
+ ['branches', [':branch']],
28
+ ['tags', [':tag']],
29
+ ['releases', [':release_id']],
30
+ ['milestones', [':milestone']],
31
+ ['labels', [':label']],
32
+ ['hooks', [':hook_id']],
33
+ ['keys', [':key_id']],
34
+ ['deployments', [':deployment_id']],
35
+ ['environments', [':env']],
36
+ ['runs', [':run_id']],
37
+ ['jobs', [':job_id']],
38
+ ['artifacts', [':artifact_id']],
39
+ ['packages', [':package']],
40
+
41
+ // Content / social
42
+ ['posts', [':post_id']],
43
+ ['comments', [':comment_id']],
44
+ ['articles', [':article_id']],
45
+ ['stories', [':story_id']],
46
+ ['threads', [':thread_id']],
47
+ ['messages', [':message_id']],
48
+ ['channels', [':channel']],
49
+ ['videos', [':video_id']],
50
+ ['playlists', [':playlist_id']],
51
+ ['tracks', [':track_id']],
52
+ ['albums', [':album_id']],
53
+ ['artists', [':artist_id']],
54
+ ['images', [':image_id']],
55
+ ['files', [':file_id']],
56
+ ['documents', [':doc_id']],
57
+ ['folders', [':folder_id']],
58
+ ['collections', [':collection_id']],
59
+ ['categories', [':category']],
60
+
61
+ // E-commerce
62
+ ['products', [':product_id']],
63
+ ['items', [':item_id']],
64
+ ['orders', [':order_id']],
65
+ ['customers', [':customer_id']],
66
+ ['carts', [':cart_id']],
67
+ ['stores', [':store_id']],
68
+ ['reviews', [':review_id']],
69
+
70
+ // Infrastructure / ops
71
+ ['accounts', [':account_id']],
72
+ ['workspaces', [':workspace']],
73
+ ['databases', [':database']],
74
+ ['tables', [':table']],
75
+ ['namespaces', [':namespace']],
76
+ ['clusters', [':cluster']],
77
+ ['instances', [':instance']],
78
+ ['regions', [':region']],
79
+ ['zones', [':zone']],
80
+ ['resources', [':resource_id']],
81
+ ['subscriptions', [':subscription_id']],
82
+ ['tenants', [':tenant_id']],
83
+ ['groups', [':group_id']],
84
+ ['roles', [':role']],
85
+ ['policies', [':policy']],
86
+ ['tokens', [':token_id']],
87
+ ['sessions', [':session_id']],
88
+ ['events', [':event_id']],
89
+ ['logs', [':log_id']],
90
+ ['metrics', [':metric']],
91
+ ['alerts', [':alert_id']],
92
+ ['notifications', [':notification_id']],
93
+ ['webhooks', [':webhook_id']],
94
+
95
+ // Media
96
+ ['media', [':media_id']],
97
+ ['assets', [':asset_id']],
98
+ ['uploads', [':upload_id']],
99
+ ]);
100
+
101
+ /**
102
+ * Segments that are always structural (never parameterized).
103
+ * Includes version prefixes, action verbs, and all RESOURCE_NOUNS keys.
104
+ */
105
+ const STRUCTURAL_SEGMENTS = new Set<string>([
106
+ // Version prefixes
107
+ 'api', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10',
108
+ // Actions / sub-resources
109
+ 'search', 'filter', 'sort', 'query', 'list', 'create', 'update', 'delete',
110
+ 'status', 'config', 'settings', 'preferences', 'profile', 'info', 'details',
111
+ 'stats', 'analytics', 'count', 'batch', 'bulk', 'export', 'import',
112
+ 'auth', 'login', 'logout', 'register', 'signup', 'signin', 'callback',
113
+ 'oauth', 'token', 'refresh', 'verify', 'confirm', 'reset', 'activate',
114
+ 'public', 'private', 'internal', 'external', 'admin', 'management',
115
+ 'graphql', 'gql', 'rest', 'rpc', 'ws', 'websocket', 'stream', 'feed',
116
+ 'health', 'ping', 'version', 'manifest', 'metadata', 'schema',
117
+ 'upload', 'download', 'preview', 'thumbnail', 'embed',
118
+ 'latest', 'trending', 'popular', 'featured', 'recommended', 'top', 'new',
119
+ 'web', 'app', 'mobile', 'desktop', 'data', 'raw', 'render',
120
+ 'consent', 'wrapper', 'widget', 'integrity', 'pathfinder', 'rum',
121
+ // All resource noun keys are also structural — this means a noun after itself
122
+ // (e.g. /repos/repos) treats the second "repos" as structural, not a param slot.
123
+ // That's a degenerate case and the correct behavior: we preserve the literal.
124
+ ...RESOURCE_NOUNS.keys(),
125
+ ]);
126
+
127
+ /**
128
+ * Check if a path segment is a dynamic value based on its structure alone.
10
129
  * Returns the parameter name (:id, :hash, :slug) or null if static.
11
130
  */
12
131
  function classifySegment(segment: string): string | null {
@@ -16,6 +135,9 @@ function classifySegment(segment: string): string | null {
16
135
  // UUID → :id
17
136
  if (UUID_RE.test(segment)) return ':id';
18
137
 
138
+ // Long hex string (16+ hex chars) → :hash
139
+ if (LONG_HEX_RE.test(segment)) return ':hash';
140
+
19
141
  // Slug with embedded long number (8+ consecutive digits) — check before hash
20
142
  // because slugs like "btc-updown-15m-1770254100" would also match the hash rule
21
143
  if (LONG_DIGITS_RE.test(segment)) {
@@ -34,20 +156,94 @@ function classifySegment(segment: string): string | null {
34
156
  }
35
157
 
36
158
  /**
37
- * Replace dynamic path segments with :param placeholders.
159
+ * Check if a segment looks like a lowercase word or hyphenated compound word.
160
+ * e.g. "search", "location-metadata", "top-rated" → true
161
+ * e.g. "n1byn1kt", "OxItOzEC", "ABC-123" → false
162
+ */
163
+ function looksLikeWord(segment: string): boolean {
164
+ return /^[a-z][a-z-]*[a-z]$/.test(segment)
165
+ && segment.split('-').every(part => /^[a-z]{2,}$/.test(part));
166
+ }
167
+
168
+ /**
169
+ * Replace dynamic path segments with named :param placeholders.
38
170
  *
39
- * Rules:
40
- * - Pure numeric :id
41
- * - UUID :id
42
- * - 12+ alphanum with mixed letters+digits → :hash
43
- * - Contains 8+ consecutive digits :slug
171
+ * Three-layer approach:
172
+ * 1. Structural detection: UUIDs, numbers, hashes, long-digit slugs
173
+ * 2. Context-aware: segments following a known resource noun get a
174
+ * semantically named param (e.g. /repos/:owner/:repo)
175
+ * 3. Heuristic fallback: non-word segments after structural segments
176
+ * are parameterized as :id
44
177
  */
45
178
  export function parameterizePath(path: string): string {
46
179
  const segments = path.split('/');
47
- const result = segments.map(seg => {
48
- if (seg === '') return seg;
49
- return classifySegment(seg) ?? seg;
50
- });
180
+ const result: string[] = [];
181
+ let nounSlots: string[] = [];
182
+
183
+ for (let i = 0; i < segments.length; i++) {
184
+ const seg = segments[i];
185
+
186
+ // Preserve empty segments (leading/trailing slashes)
187
+ if (seg === '') { result.push(seg); continue; }
188
+
189
+ const lower = seg.toLowerCase();
190
+
191
+ // Layer 1: Always parameterize structurally obvious dynamic values
192
+ // Order matters: pure numeric before long-digits (1770254100 is numeric, not a slug)
193
+ if (UUID_RE.test(seg)) { result.push(':id'); nounSlots = []; continue; }
194
+
195
+ if (PURE_NUMERIC_RE.test(seg)) {
196
+ // Use noun-derived name if available, otherwise :id
197
+ const name = nounSlots.length > 0 ? nounSlots.shift()! : ':id';
198
+ result.push(name);
199
+ continue;
200
+ }
201
+
202
+ if (LONG_HEX_RE.test(seg)) { result.push(':hash'); nounSlots = []; continue; }
203
+
204
+ // Slug with embedded long number (8+ digits mixed with text)
205
+ // Pure-numeric already handled above, so this only fires on mixed segments
206
+ if (LONG_DIGITS_RE.test(seg)) { result.push(':slug'); nounSlots = []; continue; }
207
+
208
+ // Hash-like: 12+ mixed alphanum (catches remaining patterns)
209
+ const structural = classifySegment(seg);
210
+ if (structural) { result.push(structural); nounSlots = []; continue; }
211
+
212
+ // Layer 2: Known resource noun → keep it, queue param names for following segments
213
+ if (RESOURCE_NOUNS.has(lower)) {
214
+ result.push(seg);
215
+ nounSlots = [...RESOURCE_NOUNS.get(lower)!];
216
+ continue;
217
+ }
218
+
219
+ // Structural segment → keep as-is, reset slots
220
+ if (STRUCTURAL_SEGMENTS.has(lower)) {
221
+ result.push(seg);
222
+ nounSlots = [];
223
+ continue;
224
+ }
225
+
226
+ // Fill a queued noun slot (e.g. "n1byn1kt" after "repos")
227
+ if (nounSlots.length > 0) {
228
+ result.push(nounSlots.shift()!);
229
+ continue;
230
+ }
231
+
232
+ // Layer 3: Heuristic — segment after a structural segment that doesn't
233
+ // look like a plain English word is likely a dynamic value
234
+ const prevSeg = i > 0 ? segments[i - 1]?.toLowerCase() : '';
235
+ const prevIsStructural = STRUCTURAL_SEGMENTS.has(prevSeg) || RESOURCE_NOUNS.has(prevSeg);
236
+
237
+ if (prevIsStructural && seg.length >= 2 && !looksLikeWord(seg)) {
238
+ result.push(':id');
239
+ continue;
240
+ }
241
+
242
+ // Default: keep as-is
243
+ result.push(seg);
244
+ nounSlots = [];
245
+ }
246
+
51
247
  return result.join('/');
52
248
  }
53
249
 
@@ -4,7 +4,7 @@ import { randomUUID } from 'node:crypto';
4
4
  import { shouldCapture } from './filter.js';
5
5
  import { launchBrowser, normalizeCookiesForStorageState } from './browser.js';
6
6
  import { isDomainMatch } from './domain.js';
7
- import { SkillGenerator, type GeneratorOptions } from '../skill/generator.js';
7
+ import { SkillGenerator, deduplicateAuth, type GeneratorOptions } from '../skill/generator.js';
8
8
  import { detectCaptcha } from '../auth/refresh.js';
9
9
  import { verifyEndpoints } from './verifier.js';
10
10
  import { signSkillFile } from '../skill/signing.js';
@@ -133,11 +133,21 @@ export class CaptureSession {
133
133
  case 'navigate': {
134
134
  if (!action.url) return { success: false, error: 'url required for navigate', snapshot: await this.takeSnapshot() };
135
135
 
136
- // M7 fix: Full SSRF validation on navigate URLs (same checks as ssrf.ts)
137
- const { validateUrl: validateNavUrl } = await import('../skill/ssrf.js');
138
- const navResult = validateNavUrl(action.url);
139
- if (!navResult.safe) {
140
- return { success: false, error: `Navigation blocked: ${navResult.reason}`, snapshot: await this.takeSnapshot() };
136
+ // M7 fix: SSRF validation on navigate URLs skip for same-origin
137
+ // (session.start() already navigated to targetUrl, so same-origin is trusted)
138
+ let sameOrigin = false;
139
+ try {
140
+ sameOrigin = this.targetUrl !== '' && new URL(action.url).origin === new URL(this.targetUrl).origin;
141
+ } catch {
142
+ // Invalid URL — let validateUrl produce the error
143
+ }
144
+
145
+ if (!sameOrigin) {
146
+ const { validateUrl: validateNavUrl } = await import('../skill/ssrf.js');
147
+ const navResult = validateNavUrl(action.url);
148
+ if (!navResult.safe) {
149
+ return { success: false, error: `Navigation blocked: ${navResult.reason}`, snapshot: await this.takeSnapshot() };
150
+ }
141
151
  }
142
152
 
143
153
  await this.page.goto(action.url, { waitUntil: 'domcontentloaded' });
@@ -208,10 +218,10 @@ export class CaptureSession {
208
218
 
209
219
  if (skill.endpoints.length === 0) continue;
210
220
 
211
- // Store extracted auth
212
- const extractedAuth = generator.getExtractedAuth();
213
- if (extractedAuth.length > 0) {
214
- await authManager.store(domain, extractedAuth[0]);
221
+ // Store extracted auth credentials
222
+ const auth = deduplicateAuth(generator.getExtractedAuth());
223
+ if (auth) {
224
+ await authManager.store(domain, auth);
215
225
  }
216
226
 
217
227
  // Store OAuth credentials if detected