@apitap/core 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@apitap/core",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "description": "Intercept web API traffic during browsing. Generate portable skill files so AI agents can call APIs directly instead of scraping.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,6 +1,7 @@
1
1
  // src/auth/handoff.ts
2
2
  import type { AuthManager } from './manager.js';
3
3
  import type { StoredSession, StoredAuth } from '../types.js';
4
+ import { launchBrowser } from '../capture/browser.js';
4
5
 
5
6
  export interface HandoffOptions {
6
7
  domain: string;
@@ -98,15 +99,12 @@ async function doHandoff(
98
99
  const loginUrl = options.loginUrl || `https://${domain}`;
99
100
  const timeout = options.timeout ?? 300_000; // 5 minutes
100
101
 
101
- const { chromium } = await import('playwright');
102
-
103
- const browser = await chromium.launch({ headless: false });
102
+ const { browser, context } = await launchBrowser({ headless: false });
104
103
 
105
104
  try {
106
- const context = await browser.newContext();
107
105
 
108
106
  // Restore existing session cookies if available (warm start)
109
- const cachedSession = await authManager.retrieveSession(domain);
107
+ const cachedSession = await authManager.retrieveSessionWithFallback(domain);
110
108
  if (cachedSession?.cookies?.length) {
111
109
  await context.addCookies(cachedSession.cookies);
112
110
  }
@@ -161,8 +159,13 @@ async function doHandoff(
161
159
  );
162
160
 
163
161
  if (hasSessionCookie || authDetected) {
164
- // Wait a bit more for any final redirects/requests
165
- await page.waitForTimeout(2000);
162
+ // Grace period: 4 additional polls at 2s each (~8s total)
163
+ // Allows time for MFA, CAPTCHAs, and post-login redirects
164
+ for (let grace = 0; grace < 4; grace++) {
165
+ if (page.isClosed()) break;
166
+ await page.waitForLoadState('networkidle', { timeout: 3000 }).catch(() => {});
167
+ await page.waitForTimeout(2000);
168
+ }
166
169
  loginDetected = true;
167
170
  break;
168
171
  }
@@ -66,6 +66,25 @@ export class AuthManager {
66
66
  return all[domain]?.session ?? null;
67
67
  }
68
68
 
69
+ /**
70
+ * Retrieve session with subdomain fallback.
71
+ * Tries exact match first, then walks up parent domains.
72
+ * e.g., dashboard.twitch.tv → twitch.tv
73
+ */
74
+ async retrieveSessionWithFallback(domain: string): Promise<StoredSession | null> {
75
+ // Try exact match first
76
+ const exact = await this.retrieveSession(domain);
77
+ if (exact) return exact;
78
+
79
+ // Try parent domains
80
+ for (const parent of getParentDomains(domain)) {
81
+ const session = await this.retrieveSession(parent);
82
+ if (session) return session;
83
+ }
84
+
85
+ return null;
86
+ }
87
+
69
88
  /** Store OAuth credentials for a domain (merges with existing auth). */
70
89
  async storeOAuthCredentials(domain: string, creds: { refreshToken?: string; clientSecret?: string }): Promise<void> {
71
90
  const all = await this.loadAll();
@@ -122,6 +141,24 @@ export class AuthManager {
122
141
  }
123
142
  }
124
143
 
144
+ /**
145
+ * Get parent domains for subdomain fallback.
146
+ * dashboard.twitch.tv → ["twitch.tv"]
147
+ * a.b.example.com → ["b.example.com", "example.com"]
148
+ * twitch.tv → [] (already base, 2 labels)
149
+ */
150
+ export function getParentDomains(domain: string): string[] {
151
+ const parts = domain.split('.');
152
+ const parents: string[] = [];
153
+
154
+ // Stop at 2 labels (e.g., "example.com" is the minimum)
155
+ for (let i = 1; i < parts.length - 1; i++) {
156
+ parents.push(parts.slice(i).join('.'));
157
+ }
158
+
159
+ return parents;
160
+ }
161
+
125
162
  /**
126
163
  * Get the machine ID for key derivation.
127
164
  * Linux: /etc/machine-id
@@ -56,9 +56,11 @@ export async function refreshOAuth(
56
56
  'oauth2.googleapis.com', 'accounts.google.com',
57
57
  'login.microsoftonline.com', 'github.com',
58
58
  'oauth.reddit.com', 'api.twitter.com',
59
+ 'auth0.com', 'okta.com',
60
+ 'securetoken.googleapis.com',
59
61
  ];
60
62
  const tokenHost = new URL(oauthConfig.tokenEndpoint).hostname;
61
- if (tokenHost !== domain && !tokenHost.endsWith('.' + domain) && !KNOWN_OAUTH_HOSTS.includes(tokenHost)) {
63
+ if (tokenHost !== domain && !tokenHost.endsWith('.' + domain) && !isKnownOAuthHost(tokenHost, KNOWN_OAUTH_HOSTS)) {
62
64
  return { success: false, error: `Token endpoint domain mismatch: ${tokenHost} vs ${domain}` };
63
65
  }
64
66
 
@@ -118,3 +120,13 @@ export async function refreshOAuth(
118
120
  };
119
121
  }
120
122
  }
123
+
124
+ /**
125
+ * Check if a hostname matches a known OAuth provider.
126
+ * Supports exact match and subdomain match (e.g., tenant.auth0.com matches auth0.com).
127
+ */
128
+ function isKnownOAuthHost(tokenHost: string, knownHosts: string[]): boolean {
129
+ return knownHosts.some(known =>
130
+ tokenHost === known || tokenHost.endsWith('.' + known)
131
+ );
132
+ }
@@ -2,6 +2,7 @@
2
2
  import type { SkillFile, StoredToken, StoredSession } from '../types.js';
3
3
  import type { AuthManager } from './manager.js';
4
4
  import { refreshOAuth, type OAuthRefreshResult } from './oauth-refresh.js';
5
+ import { launchBrowser } from '../capture/browser.js';
5
6
 
6
7
  export interface RefreshOptions {
7
8
  domain: string;
@@ -197,22 +198,19 @@ async function doBrowserRefresh(
197
198
  return { success: oauthRefreshed, tokens: {}, oauthRefreshed: oauthRefreshed || undefined };
198
199
  }
199
200
 
200
- const { chromium } = await import('playwright');
201
-
202
201
  const browserMode = options.browserMode || skill.auth?.browserMode || 'headless';
203
202
  const refreshUrl = options.refreshUrl || skill.auth?.refreshUrl || skill.baseUrl;
204
203
  const timeout = options.timeout || (skill.auth?.captchaRisk ? 300_000 : 30_000);
205
204
 
206
205
  // Try to restore session from cache
207
- const cachedSession = await authManager.retrieveSession(options.domain);
206
+ const cachedSession = await authManager.retrieveSessionWithFallback(options.domain);
208
207
  const sessionValid = cachedSession && isSessionValid(cachedSession);
209
208
 
210
- const browser = await chromium.launch({
209
+ const { browser, context } = await launchBrowser({
211
210
  headless: browserMode === 'headless',
212
211
  });
213
212
 
214
213
  try {
215
- const context = await browser.newContext();
216
214
 
217
215
  // Restore cookies if session is valid
218
216
  if (sessionValid && cachedSession) {
@@ -0,0 +1,51 @@
1
+ // src/capture/browser.ts
2
+ import type { Browser, BrowserContext } from 'playwright';
3
+
4
+ const CHROME_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
5
+
6
+ /**
7
+ * Launch args that reduce Playwright's automation fingerprint.
8
+ */
9
+ export function getLaunchArgs(): string[] {
10
+ return [
11
+ '--disable-blink-features=AutomationControlled',
12
+ ];
13
+ }
14
+
15
+ /**
16
+ * Realistic Chrome user-agent string for anti-detection.
17
+ */
18
+ export function getChromeUserAgent(): string {
19
+ return CHROME_USER_AGENT;
20
+ }
21
+
22
+ /**
23
+ * Launch a Chromium browser with anti-detection measures.
24
+ *
25
+ * Three layers:
26
+ * 1. --disable-blink-features=AutomationControlled in launch args
27
+ * 2. Realistic Chrome UA on context
28
+ * 3. navigator.webdriver = false via addInitScript
29
+ * 4. Viewport 1920x1080
30
+ */
31
+ export async function launchBrowser(options: { headless: boolean }): Promise<{ browser: Browser; context: BrowserContext }> {
32
+ const { chromium } = await import('playwright');
33
+
34
+ const browser = await chromium.launch({
35
+ headless: options.headless,
36
+ args: getLaunchArgs(),
37
+ });
38
+
39
+ const context = await browser.newContext({
40
+ userAgent: CHROME_USER_AGENT,
41
+ viewport: { width: 1920, height: 1080 },
42
+ });
43
+
44
+ await context.addInitScript(() => {
45
+ Object.defineProperty(navigator, 'webdriver', {
46
+ get: () => false,
47
+ });
48
+ });
49
+
50
+ return { browser, context };
51
+ }
@@ -5,6 +5,7 @@ import { isDomainMatch } from './domain.js';
5
5
  import { SkillGenerator, type GeneratorOptions } from '../skill/generator.js';
6
6
  import { IdleTracker } from './idle.js';
7
7
  import { detectCaptcha } from '../auth/refresh.js';
8
+ import { launchBrowser } from './browser.js';
8
9
  import type { CapturedExchange } from '../types.js';
9
10
 
10
11
  export interface CaptureOptions {
@@ -31,7 +32,7 @@ export interface CaptureResult {
31
32
 
32
33
  const DEFAULT_CDP_PORTS = [18792, 18800, 9222];
33
34
 
34
- async function connectToBrowser(options: CaptureOptions): Promise<{ browser: Browser; launched: boolean }> {
35
+ async function connectToBrowser(options: CaptureOptions): Promise<{ browser: Browser; launched: boolean; launchContext?: import('playwright').BrowserContext }> {
35
36
  if (!options.launch) {
36
37
  const ports = options.port ? [options.port] : DEFAULT_CDP_PORTS;
37
38
  for (const port of ports) {
@@ -49,12 +50,12 @@ async function connectToBrowser(options: CaptureOptions): Promise<{ browser: Bro
49
50
  throw new Error(`No browser found on CDP ports: ${ports.join(', ')}. Is a Chromium browser running with remote debugging?`);
50
51
  }
51
52
 
52
- const browser = await chromium.launch({ headless: options.headless ?? (process.env.DISPLAY ? false : true) });
53
- return { browser, launched: true };
53
+ const { browser, context } = await launchBrowser({ headless: options.headless ?? (process.env.DISPLAY ? false : true) });
54
+ return { browser, launched: true, launchContext: context };
54
55
  }
55
56
 
56
57
  export async function capture(options: CaptureOptions): Promise<CaptureResult> {
57
- const { browser, launched } = await connectToBrowser(options);
58
+ const { browser, launched, launchContext } = await connectToBrowser(options);
58
59
  const generators = new Map<string, SkillGenerator>();
59
60
  let totalRequests = 0;
60
61
  let filteredRequests = 0;
@@ -73,7 +74,10 @@ export async function capture(options: CaptureOptions): Promise<CaptureResult> {
73
74
  let idleInterval: ReturnType<typeof setInterval> | null = null;
74
75
 
75
76
  let page: Page;
76
- if (launched) {
77
+ if (launched && launchContext) {
78
+ page = await launchContext.newPage();
79
+ } else if (launched) {
80
+ // Fallback: shouldn't happen, but handle gracefully
77
81
  const context = await browser.newContext();
78
82
  page = await context.newPage();
79
83
  } else {
@@ -27,18 +27,48 @@ export function isOAuthTokenRequest(req: {
27
27
  const urlLower = req.url.toLowerCase();
28
28
  if (!urlLower.includes('/token') && !urlLower.includes('/oauth')) return null;
29
29
 
30
+ // Parse URL once — reused for query param fallback and Firebase detection
31
+ let parsedUrl: URL;
32
+ try {
33
+ parsedUrl = new URL(req.url);
34
+ } catch {
35
+ return null;
36
+ }
37
+
30
38
  if (!req.postData) return null;
31
39
 
32
40
  // Parse body — support URL-encoded and JSON
33
41
  const params = parseBody(req.postData, req.headers['content-type'] ?? '');
34
42
  if (!params) return null;
35
43
 
36
- const grantType = params.get('grant_type');
44
+ // grant_type: body takes precedence, URL query param as fallback (Supabase GoTrue)
45
+ let grantType = params.get('grant_type');
46
+ if (!grantType) {
47
+ grantType = parsedUrl.searchParams.get('grant_type') ?? undefined;
48
+ }
37
49
  if (!grantType) return null;
38
50
 
39
51
  // Only refreshable flows
40
52
  if (grantType !== 'refresh_token' && grantType !== 'client_credentials') return null;
41
53
 
54
+ // Firebase provider-specific detection:
55
+ // securetoken.googleapis.com uses ?key= instead of client_id
56
+ if (
57
+ parsedUrl.hostname === 'securetoken.googleapis.com' &&
58
+ parsedUrl.searchParams.has('key') &&
59
+ grantType === 'refresh_token'
60
+ ) {
61
+ const firebaseKey = parsedUrl.searchParams.get('key')!;
62
+ const result: OAuthInfo = {
63
+ tokenEndpoint: req.url, // Keep full URL with ?key= param
64
+ clientId: firebaseKey,
65
+ grantType: 'refresh_token',
66
+ };
67
+ const refreshToken = params.get('refresh_token');
68
+ if (refreshToken) result.refreshToken = refreshToken;
69
+ return result;
70
+ }
71
+
42
72
  // Extract client_id — may also be in Basic auth header
43
73
  let clientId = params.get('client_id') ?? '';
44
74
  let clientSecret = params.get('client_secret');
@@ -1,7 +1,8 @@
1
1
  // src/capture/session.ts
2
- import { chromium, type Browser, type Page } from 'playwright';
2
+ import { type Browser, type Page } from 'playwright';
3
3
  import { randomUUID } from 'node:crypto';
4
4
  import { shouldCapture } from './filter.js';
5
+ import { launchBrowser } from './browser.js';
5
6
  import { isDomainMatch } from './domain.js';
6
7
  import { SkillGenerator, type GeneratorOptions } from '../skill/generator.js';
7
8
  import { detectCaptcha } from '../auth/refresh.js';
@@ -53,8 +54,8 @@ export class CaptureSession {
53
54
  this.targetUrl = url.startsWith('http') ? url : `https://${url}`;
54
55
  const headless = this.options.headless ?? true;
55
56
 
56
- this.browser = await chromium.launch({ headless });
57
- const context = await this.browser.newContext();
57
+ const { browser, context } = await launchBrowser({ headless });
58
+ this.browser = browser;
58
59
 
59
60
  // Inject cached session cookies if available
60
61
  try {
@@ -62,7 +63,7 @@ export class CaptureSession {
62
63
  const machineId = await getMachineId();
63
64
  const authManager = new AuthManager(authDir, machineId);
64
65
  const domain = new URL(this.targetUrl).hostname;
65
- const cachedSession = await authManager.retrieveSession(domain);
66
+ const cachedSession = await authManager.retrieveSessionWithFallback(domain);
66
67
  if (cachedSession?.cookies?.length) {
67
68
  await context.addCookies(cachedSession.cookies);
68
69
  }