@cosmocoder/mcp-web-docs 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +193 -18
  2. package/build/crawler/auth.d.ts +17 -6
  3. package/build/crawler/auth.js +166 -31
  4. package/build/crawler/auth.js.map +1 -1
  5. package/build/crawler/auth.test.js +197 -33
  6. package/build/crawler/auth.test.js.map +1 -1
  7. package/build/index.js +189 -22
  8. package/build/index.js.map +1 -1
  9. package/build/index.test.js +232 -1
  10. package/build/index.test.js.map +1 -1
  11. package/build/storage/storage.d.ts +45 -0
  12. package/build/storage/storage.js +282 -8
  13. package/build/storage/storage.js.map +1 -1
  14. package/build/storage/storage.test.js +254 -0
  15. package/build/storage/storage.test.js.map +1 -1
  16. package/build/types.d.ts +17 -0
  17. package/build/util/security.d.ts +10 -0
  18. package/build/util/security.js +25 -0
  19. package/build/util/security.js.map +1 -1
  20. package/build/util/security.test.js +18 -0
  21. package/build/util/security.test.js.map +1 -1
  22. package/package.json +11 -2
  23. package/build/crawler/cheerio.d.ts +0 -11
  24. package/build/crawler/cheerio.js +0 -134
  25. package/build/crawler/cheerio.js.map +0 -1
  26. package/build/crawler/chromium.d.ts +0 -21
  27. package/build/crawler/chromium.js +0 -596
  28. package/build/crawler/chromium.js.map +0 -1
  29. package/build/crawler/default.d.ts +0 -11
  30. package/build/crawler/default.js +0 -138
  31. package/build/crawler/default.js.map +0 -1
  32. package/build/crawler/factory.d.ts +0 -6
  33. package/build/crawler/factory.js +0 -83
  34. package/build/crawler/factory.js.map +0 -1
  35. package/build/crawler/puppeteer.d.ts +0 -16
  36. package/build/crawler/puppeteer.js +0 -191
  37. package/build/crawler/puppeteer.js.map +0 -1
  38. package/build/embeddings/openai.d.ts +0 -8
  39. package/build/embeddings/openai.js +0 -56
  40. package/build/embeddings/openai.js.map +0 -1
  41. package/build/rag/cache.d.ts +0 -10
  42. package/build/rag/cache.js +0 -10
  43. package/build/rag/cache.js.map +0 -1
  44. package/build/rag/code-generator.d.ts +0 -11
  45. package/build/rag/code-generator.js +0 -30
  46. package/build/rag/code-generator.js.map +0 -1
  47. package/build/rag/context-assembler.d.ts +0 -23
  48. package/build/rag/context-assembler.js +0 -113
  49. package/build/rag/context-assembler.js.map +0 -1
  50. package/build/rag/docs-search.d.ts +0 -55
  51. package/build/rag/docs-search.js +0 -380
  52. package/build/rag/docs-search.js.map +0 -1
  53. package/build/rag/pipeline.d.ts +0 -26
  54. package/build/rag/pipeline.js +0 -91
  55. package/build/rag/pipeline.js.map +0 -1
  56. package/build/rag/query-processor.d.ts +0 -14
  57. package/build/rag/query-processor.js +0 -57
  58. package/build/rag/query-processor.js.map +0 -1
  59. package/build/rag/reranker.d.ts +0 -55
  60. package/build/rag/reranker.js +0 -210
  61. package/build/rag/reranker.js.map +0 -1
  62. package/build/rag/response-generator.d.ts +0 -20
  63. package/build/rag/response-generator.js +0 -101
  64. package/build/rag/response-generator.js.map +0 -1
  65. package/build/rag/retriever.d.ts +0 -19
  66. package/build/rag/retriever.js +0 -111
  67. package/build/rag/retriever.js.map +0 -1
  68. package/build/rag/validator.d.ts +0 -22
  69. package/build/rag/validator.js +0 -128
  70. package/build/rag/validator.js.map +0 -1
  71. package/build/rag/version-manager.d.ts +0 -23
  72. package/build/rag/version-manager.js +0 -98
  73. package/build/rag/version-manager.js.map +0 -1
  74. package/build/types/rag.d.ts +0 -27
  75. package/build/types/rag.js +0 -2
  76. package/build/types/rag.js.map +0 -1
  77. package/build/util/content-utils.d.ts +0 -31
  78. package/build/util/content-utils.js +0 -120
  79. package/build/util/content-utils.js.map +0 -1
  80. package/build/util/content.d.ts +0 -1
  81. package/build/util/content.js +0 -16
  82. package/build/util/content.js.map +0 -1
  83. package/build/util/site-detector.d.ts +0 -22
  84. package/build/util/site-detector.js +0 -42
  85. package/build/util/site-detector.js.map +0 -1
package/README.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # MCP Web Docs
2
2
 
3
+ [![npm version](https://img.shields.io/npm/v/@cosmocoder/mcp-web-docs.svg)](https://www.npmjs.com/package/@cosmocoder/mcp-web-docs)
4
+ [![npm downloads](https://img.shields.io/npm/dm/@cosmocoder/mcp-web-docs.svg)](https://www.npmjs.com/package/@cosmocoder/mcp-web-docs)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
+ [![Node.js](https://img.shields.io/badge/node-%3E%3D22.19.0-brightgreen.svg)](https://nodejs.org/)
7
+ [![CI](https://github.com/cosmocoder/mcp-web-docs/actions/workflows/release.yml/badge.svg)](https://github.com/cosmocoder/mcp-web-docs/actions/workflows/release.yml)
8
+
3
9
  **Index Any Documentation. Search Locally. Stay Private.**
4
10
 
5
11
  A self-hosted Model Context Protocol (MCP) server that crawls, indexes, and searches documentation from *any* website. Unlike remote MCP servers limited to GitHub repos or pre-indexed libraries, web-docs gives you full control over what gets indexed — including private documentation behind authentication.
@@ -32,6 +38,7 @@ AI assistants struggle with documentation:
32
38
 
33
39
  - **🌐 Universal Crawler** - Works with any documentation site, not just GitHub
34
40
  - **🔍 Hybrid Search** - Combines full-text search (FTS) with semantic vector search
41
+ - **🏷️ Tags & Categories** - Organize docs with tags and filter searches by project, team, or category
35
42
  - **🔐 Authentication Support** - Crawl private/protected docs with interactive browser login (auto-detects your default browser)
36
43
  - **📊 Smart Extraction** - Automatically extracts code blocks, props tables, and structured content
37
44
  - **⚡ Local Embeddings** - Uses FastEmbed for fast, private embedding generation (no API keys)
@@ -46,11 +53,21 @@ AI assistants struggle with documentation:
46
53
 
47
54
  - Node.js >= 22.19.0
48
55
 
49
- ### Setup
56
+ ### Option 1: Install from NPM (Recommended)
57
+
58
+ ```bash
59
+ npm install -g @cosmocoder/mcp-web-docs
60
+ ```
61
+
62
+ ### Option 2: Run with npx
63
+
64
+ No installation required - just configure your MCP client to use npx (see below).
65
+
66
+ ### Option 3: Build from Source
50
67
 
51
68
  ```bash
52
69
  # Clone the repository
53
- git clone https://github.com/user/mcp-web-docs.git
70
+ git clone https://github.com/cosmocoder/mcp-web-docs.git
54
71
  cd mcp-web-docs
55
72
 
56
73
  # Install dependencies (automatically installs Playwright browsers)
@@ -67,6 +84,30 @@ npm run build
67
84
 
68
85
  Add to your Cursor MCP settings (`~/.cursor/mcp.json`):
69
86
 
87
+ **Using npx (no install required):**
88
+ ```json
89
+ {
90
+ "mcpServers": {
91
+ "web-docs": {
92
+ "command": "npx",
93
+ "args": ["-y", "@cosmocoder/mcp-web-docs"]
94
+ }
95
+ }
96
+ }
97
+ ```
98
+
99
+ **Using global install:**
100
+ ```json
101
+ {
102
+ "mcpServers": {
103
+ "web-docs": {
104
+ "command": "mcp-web-docs"
105
+ }
106
+ }
107
+ }
108
+ ```
109
+
110
+ **Using local build:**
70
111
  ```json
71
112
  {
72
113
  "mcpServers": {
@@ -85,12 +126,24 @@ Add to your Cursor MCP settings (`~/.cursor/mcp.json`):
85
126
 
86
127
  Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
87
128
 
129
+ **Using npx:**
88
130
  ```json
89
131
  {
90
132
  "mcpServers": {
91
133
  "web-docs": {
92
- "command": "node",
93
- "args": ["/path/to/mcp-web-docs/build/index.js"]
134
+ "command": "npx",
135
+ "args": ["-y", "@cosmocoder/mcp-web-docs"]
136
+ }
137
+ }
138
+ }
139
+ ```
140
+
141
+ **Using global install:**
142
+ ```json
143
+ {
144
+ "mcpServers": {
145
+ "web-docs": {
146
+ "command": "mcp-web-docs"
94
147
  }
95
148
  }
96
149
  }
@@ -103,12 +156,24 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_
103
156
 
104
157
  Add to `.vscode/mcp.json` in your workspace:
105
158
 
159
+ **Using npx:**
106
160
  ```json
107
161
  {
108
162
  "servers": {
109
163
  "web-docs": {
110
- "command": "node",
111
- "args": ["/path/to/mcp-web-docs/build/index.js"]
164
+ "command": "npx",
165
+ "args": ["-y", "@cosmocoder/mcp-web-docs"]
166
+ }
167
+ }
168
+ }
169
+ ```
170
+
171
+ **Using global install:**
172
+ ```json
173
+ {
174
+ "servers": {
175
+ "web-docs": {
176
+ "command": "mcp-web-docs"
112
177
  }
113
178
  }
114
179
  }
@@ -121,12 +186,24 @@ Add to `.vscode/mcp.json` in your workspace:
121
186
 
122
187
  Add to `~/.codeium/windsurf/mcp_config.json`:
123
188
 
189
+ **Using npx:**
124
190
  ```json
125
191
  {
126
192
  "mcpServers": {
127
193
  "web-docs": {
128
- "command": "node",
129
- "args": ["/path/to/mcp-web-docs/build/index.js"]
194
+ "command": "npx",
195
+ "args": ["-y", "@cosmocoder/mcp-web-docs"]
196
+ }
197
+ }
198
+ }
199
+ ```
200
+
201
+ **Using global install:**
202
+ ```json
203
+ {
204
+ "mcpServers": {
205
+ "web-docs": {
206
+ "command": "mcp-web-docs"
130
207
  }
131
208
  }
132
209
  }
@@ -139,12 +216,26 @@ Add to `~/.codeium/windsurf/mcp_config.json`:
139
216
 
140
217
  Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`:
141
218
 
219
+ **Using npx:**
142
220
  ```json
143
221
  {
144
222
  "mcpServers": {
145
223
  "web-docs": {
146
- "command": "node",
147
- "args": ["/path/to/mcp-web-docs/build/index.js"],
224
+ "command": "npx",
225
+ "args": ["-y", "@cosmocoder/mcp-web-docs"],
226
+ "disabled": false,
227
+ "autoApprove": []
228
+ }
229
+ }
230
+ }
231
+ ```
232
+
233
+ **Using global install:**
234
+ ```json
235
+ {
236
+ "mcpServers": {
237
+ "web-docs": {
238
+ "command": "mcp-web-docs",
148
239
  "disabled": false,
149
240
  "autoApprove": []
150
241
  }
@@ -154,6 +245,38 @@ Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude
154
245
 
155
246
  </details>
156
247
 
248
+ <details>
249
+ <summary><b>RooCode</b></summary>
250
+
251
+ **Global configuration:** Open RooCode → Click MCP icon → "Edit Global MCP"
252
+
253
+ **Project-level configuration:** Create `.roo/mcp.json` at your project root
254
+
255
+ **Using npx:**
256
+ ```json
257
+ {
258
+ "mcpServers": {
259
+ "web-docs": {
260
+ "command": "npx",
261
+ "args": ["-y", "@cosmocoder/mcp-web-docs"]
262
+ }
263
+ }
264
+ }
265
+ ```
266
+
267
+ **Using global install:**
268
+ ```json
269
+ {
270
+ "mcpServers": {
271
+ "web-docs": {
272
+ "command": "mcp-web-docs"
273
+ }
274
+ }
275
+ }
276
+ ```
277
+
278
+ </details>
279
+
157
280
  ---
158
281
 
159
282
  ## ⚡ Quick Start
@@ -194,9 +317,10 @@ Add a new documentation site for indexing.
194
317
  ```typescript
195
318
  add_documentation({
196
319
  url: "https://docs.example.com/",
197
- title: "Example Docs", // Optional
198
- id: "example-docs", // Optional custom ID
199
- auth: { // Optional authentication
320
+ title: "Example Docs", // Optional
321
+ id: "example-docs", // Optional custom ID
322
+ tags: ["frontend", "mycompany"], // Optional tags for categorization
323
+ auth: { // Optional authentication
200
324
  requiresAuth: true,
201
325
  // browser auto-detected from OS settings if omitted
202
326
  loginTimeoutSecs: 300
@@ -211,8 +335,9 @@ Search through indexed documentation using hybrid search (FTS + semantic).
211
335
  ```typescript
212
336
  search_documentation({
213
337
  query: "how to configure authentication",
214
- url: "https://docs.example.com/", // Optional: filter to specific site
215
- limit: 10 // Optional: max results
338
+ url: "https://docs.example.com/", // Optional: filter to specific site
339
+ tags: ["frontend", "mycompany"], // Optional: filter by tags
340
+ limit: 10 // Optional: max results
216
341
  })
217
342
  ```
218
343
 
@@ -230,7 +355,22 @@ authenticate({
230
355
 
231
356
  ### `list_documentation`
232
357
 
233
- List all indexed documentation sites.
358
+ List all indexed documentation sites with their metadata including tags.
359
+
360
+ ### `set_tags`
361
+
362
+ Set or update tags for a documentation site. Tags help categorize and filter documentation.
363
+
364
+ ```typescript
365
+ set_tags({
366
+ url: "https://docs.example.com/",
367
+ tags: ["frontend", "react", "mycompany"] // Replaces existing tags
368
+ })
369
+ ```
370
+
371
+ ### `list_tags`
372
+
373
+ List all available tags with usage counts. Useful to see what tags exist across your indexed docs.
234
374
 
235
375
  ### `reindex_documentation`
236
376
 
@@ -285,15 +425,50 @@ API references, configuration, or library usage.
285
425
 
286
426
  ### Scoping Searches
287
427
 
288
- If you have multiple sites indexed, filter by URL to search within a specific site:
428
+ If you have multiple sites indexed, filter by URL or tags:
289
429
 
290
430
  ```typescript
431
+ // Filter by specific site URL
291
432
  search_documentation({
292
433
  query: "routing",
293
- url: "https://nextjs.org/docs/" // Only search Next.js docs
434
+ url: "https://nextjs.org/docs/"
435
+ })
436
+
437
+ // Filter by tags (searches all docs with matching tags)
438
+ search_documentation({
439
+ query: "Button component",
440
+ tags: ["frontend", "mycompany"] // Only docs tagged with BOTH tags
441
+ })
442
+ ```
443
+
444
+ ### Organizing with Tags
445
+
446
+ Tags help organize documentation when you have multiple related sites. Add tags when indexing:
447
+
448
+ ```typescript
449
+ // Index frontend package docs
450
+ add_documentation({
451
+ url: "https://docs.mycompany.com/ui-components/",
452
+ tags: ["frontend", "mycompany", "react"]
453
+ })
454
+
455
+ // Index backend API docs
456
+ add_documentation({
457
+ url: "https://docs.mycompany.com/api/",
458
+ tags: ["backend", "mycompany", "api"]
294
459
  })
295
460
  ```
296
461
 
462
+ Later, search across all frontend docs:
463
+ ```typescript
464
+ search_documentation({
465
+ query: "authentication",
466
+ tags: ["frontend"] // Searches all frontend-tagged docs
467
+ })
468
+ ```
469
+
470
+ You can also add tags to existing documentation with `set_tags`.
471
+
297
472
  ---
298
473
 
299
474
  ## 🚨 Troubleshooting
@@ -55,13 +55,21 @@ export declare class AuthManager {
55
55
  */
56
56
  clearSession(url: string): Promise<void>;
57
57
  /**
58
- * Validate that a stored session is still valid by making a test request.
59
- * This detects expired sessions by checking for:
60
- * 1. Redirects to login/auth pages
61
- * 2. Login page content in the response
58
+ * Check if stored cookies have expired based on their expiration timestamps.
59
+ * This is a fast check that doesn't require launching a browser.
60
+ *
61
+ * @param storageStateJson - The decrypted storage state JSON
62
+ * @param domain - The domain to check cookies for
63
+ * @returns Object with expiration status and details
64
+ */
65
+ private checkCookieExpiration;
66
+ /**
67
+ * Validate that a stored session is still valid.
68
+ * First checks cookie expiration timestamps (fast, no network).
69
+ * Falls back to browser-based validation for edge cases.
62
70
  *
63
71
  * @param url - The protected URL to validate against
64
- * @param browserType - Browser type to use for validation
72
+ * @param browserType - Browser type to use for browser-based validation (if needed)
65
73
  * @returns Validation result indicating if session is still valid
66
74
  */
67
75
  validateSession(url: string, browserType?: BrowserType): Promise<{
@@ -111,7 +119,10 @@ export declare class AuthManager {
111
119
  * Detection methods (in order of priority):
112
120
  * 1. If successPattern is provided: wait for URL to match the regex
113
121
  * 2. If successSelector is provided: wait for the CSS selector to appear
114
- * 3. Default: poll for common login success indicators (logout button, user menu, URL change)
122
+ * 3. Default: poll for common login success indicators or return to target domain
123
+ *
124
+ * For multi-step OAuth flows (e.g., GitHub Pages → GitHub Login → Okta → back),
125
+ * the method tracks when the user returns to the original target domain.
115
126
  */
116
127
  private waitForLogin;
117
128
  /**
@@ -164,13 +164,72 @@ export class AuthManager {
164
164
  }
165
165
  }
166
166
  /**
167
- * Validate that a stored session is still valid by making a test request.
168
- * This detects expired sessions by checking for:
169
- * 1. Redirects to login/auth pages
170
- * 2. Login page content in the response
167
+ * Check if stored cookies have expired based on their expiration timestamps.
168
+ * This is a fast check that doesn't require launching a browser.
169
+ *
170
+ * @param storageStateJson - The decrypted storage state JSON
171
+ * @param domain - The domain to check cookies for
172
+ * @returns Object with expiration status and details
173
+ */
174
+ checkCookieExpiration(storageStateJson, domain) {
175
+ try {
176
+ const storageState = safeJsonParse(storageStateJson, StorageStateSchema);
177
+ const cookies = storageState.cookies || [];
178
+ const now = Date.now() / 1000; // Convert to seconds (cookie expires is in seconds)
179
+ // Filter cookies relevant to this domain
180
+ const domainLower = domain.toLowerCase();
181
+ const relevantCookies = cookies.filter((cookie) => {
182
+ const cookieDomain = cookie.domain.toLowerCase().replace(/^\./, ''); // Remove leading dot
183
+ return domainLower === cookieDomain || domainLower.endsWith('.' + cookieDomain);
184
+ });
185
+ if (relevantCookies.length === 0) {
186
+ // No domain-specific cookies, check all cookies
187
+ // This handles cases where auth cookies are on a different domain (e.g., github.com for github.io)
188
+ logger.debug(`[AuthManager] No cookies found for ${domain}, checking all ${cookies.length} cookies`);
189
+ }
190
+ const cookiesToCheck = relevantCookies.length > 0 ? relevantCookies : cookies;
191
+ let expiredCount = 0;
192
+ const details = [];
193
+ for (const cookie of cookiesToCheck) {
194
+ // Skip cookies without expiration (session cookies)
195
+ if (cookie.expires === undefined || cookie.expires === -1 || cookie.expires === 0) {
196
+ continue;
197
+ }
198
+ if (cookie.expires < now) {
199
+ expiredCount++;
200
+ const expiredAgo = Math.round((now - cookie.expires) / 3600); // Hours ago
201
+ details.push(`Cookie "${cookie.name}" expired ${expiredAgo}h ago`);
202
+ }
203
+ }
204
+ // Consider session expired if ANY auth-related cookies are expired
205
+ // Common auth cookie names
206
+ const authCookiePatterns = /session|auth|token|jwt|sid|login|user|identity|sso|saml|oauth/i;
207
+ const expiredAuthCookies = cookiesToCheck.filter((cookie) => {
208
+ if (!cookie.expires || cookie.expires === -1 || cookie.expires === 0)
209
+ return false;
210
+ return cookie.expires < now && authCookiePatterns.test(cookie.name);
211
+ });
212
+ return {
213
+ hasExpiredCookies: expiredCount > 0,
214
+ expiredCount,
215
+ totalCount: cookiesToCheck.length,
216
+ details: expiredAuthCookies.length > 0
217
+ ? details.filter((d) => expiredAuthCookies.some((c) => d.includes(c.name)))
218
+ : details.slice(0, 3), // Limit details
219
+ };
220
+ }
221
+ catch (error) {
222
+ logger.debug(`[AuthManager] Error checking cookie expiration:`, error);
223
+ return { hasExpiredCookies: false, expiredCount: 0, totalCount: 0, details: [] };
224
+ }
225
+ }
226
+ /**
227
+ * Validate that a stored session is still valid.
228
+ * First checks cookie expiration timestamps (fast, no network).
229
+ * Falls back to browser-based validation for edge cases.
171
230
  *
172
231
  * @param url - The protected URL to validate against
173
- * @param browserType - Browser type to use for validation
232
+ * @param browserType - Browser type to use for browser-based validation (if needed)
174
233
  * @returns Validation result indicating if session is still valid
175
234
  */
176
235
  async validateSession(url, browserType = 'chromium') {
@@ -181,6 +240,25 @@ export class AuthManager {
181
240
  logger.info(`[AuthManager] No stored session found for ${domain}`);
182
241
  return { isValid: false, reason: 'No stored session found' };
183
242
  }
243
+ // Fast check: Look at cookie expiration timestamps
244
+ const cookieCheck = this.checkCookieExpiration(storageStateJson, domain);
245
+ logger.debug(`[AuthManager] Cookie check: ${cookieCheck.expiredCount}/${cookieCheck.totalCount} expired`);
246
+ if (cookieCheck.hasExpiredCookies) {
247
+ const reason = `Session cookies have expired (${cookieCheck.expiredCount} expired). ${cookieCheck.details.join('; ')}`;
248
+ logger.warn(`[AuthManager] Session expired based on cookie timestamps: ${reason}`);
249
+ return {
250
+ isValid: false,
251
+ reason,
252
+ loginDetection: {
253
+ isLoginPage: false,
254
+ confidence: 1.0,
255
+ reasons: [`Cookie expiration check: ${cookieCheck.details.join(', ')}`],
256
+ },
257
+ };
258
+ }
259
+ // If no cookies have explicit expiration, or all have valid timestamps,
260
+ // do a quick browser-based check to be sure
261
+ logger.debug(`[AuthManager] Cookie timestamps look valid, performing browser-based validation...`);
184
262
  let browser = null;
185
263
  let context = null;
186
264
  try {
@@ -200,17 +278,26 @@ export class AuthManager {
200
278
  waitUntil: 'domcontentloaded',
201
279
  timeout: 30000,
202
280
  });
281
+ // Wait for potential JavaScript redirects
282
+ await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => { });
283
+ // Additional wait for JS-based auth redirects (GitHub Pages, etc.)
284
+ await page.waitForTimeout(2000);
203
285
  const finalUrl = page.url();
204
286
  logger.debug(`[AuthManager] Final URL after navigation: ${finalUrl}`);
205
- // Check 1: Were we redirected to a login page?
206
- if (isLoginPageUrl(finalUrl) && finalUrl !== url) {
207
- logger.warn(`[AuthManager] Session appears expired - redirected to login page: ${finalUrl}`);
208
- return {
209
- isValid: false,
210
- reason: 'Redirected to login page - session has expired',
211
- finalUrl,
212
- loginDetection: { isLoginPage: true, confidence: 1.0, reasons: ['Redirected to login URL'] },
213
- };
287
+ // Check 1: Were we redirected to a different domain (likely auth)?
288
+ const finalDomain = new URL(finalUrl).hostname.toLowerCase();
289
+ const expectedDomain = domain.toLowerCase();
290
+ if (finalDomain !== expectedDomain && !finalDomain.endsWith('.' + expectedDomain)) {
291
+ // Redirected to a different domain - check if it's a login page
292
+ if (isLoginPageUrl(finalUrl)) {
293
+ logger.warn(`[AuthManager] Session appears expired - redirected to login page: ${finalUrl}`);
294
+ return {
295
+ isValid: false,
296
+ reason: `Redirected to login page on different domain (${finalDomain})`,
297
+ finalUrl,
298
+ loginDetection: { isLoginPage: true, confidence: 1.0, reasons: ['Redirected to external login URL'] },
299
+ };
300
+ }
214
301
  }
215
302
  // Check 2: Did we get an auth-related HTTP status?
216
303
  const status = response?.status();
@@ -223,8 +310,6 @@ export class AuthManager {
223
310
  };
224
311
  }
225
312
  // Check 3: Does the page content look like a login page?
226
- // Wait for content to load
227
- await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => { });
228
313
  const pageContent = await page.content();
229
314
  const bodyText = await page.evaluate(() => document.body?.textContent || '');
230
315
  const loginDetection = detectLoginPage(bodyText + pageContent, finalUrl);
@@ -378,6 +463,7 @@ export class AuthManager {
378
463
  logger.info(`[AuthManager] ⏳ You have ${loginTimeoutSecs} seconds to complete login.`);
379
464
  // Wait for successful login
380
465
  const loginSuccess = await this.waitForLogin(page, {
466
+ targetUrl: url, // The original target URL to return to
381
467
  successPattern: loginSuccessPattern,
382
468
  successSelector: loginSuccessSelector,
383
469
  timeoutSecs: loginTimeoutSecs,
@@ -405,17 +491,28 @@ export class AuthManager {
405
491
  * Detection methods (in order of priority):
406
492
  * 1. If successPattern is provided: wait for URL to match the regex
407
493
  * 2. If successSelector is provided: wait for the CSS selector to appear
408
- * 3. Default: poll for common login success indicators (logout button, user menu, URL change)
494
+ * 3. Default: poll for common login success indicators or return to target domain
495
+ *
496
+ * For multi-step OAuth flows (e.g., GitHub Pages → GitHub Login → Okta → back),
497
+ * the method tracks when the user returns to the original target domain.
409
498
  */
410
499
  async waitForLogin(page, options) {
411
- const { successPattern, successSelector, timeoutSecs } = options;
500
+ const { targetUrl, successPattern, successSelector, timeoutSecs } = options;
412
501
  const startTime = Date.now();
413
502
  const timeoutMs = timeoutSecs * 1000;
503
+ // Extract target domain for multi-step OAuth flow detection
504
+ let targetDomain;
505
+ try {
506
+ targetDomain = new URL(targetUrl).hostname.toLowerCase();
507
+ }
508
+ catch {
509
+ targetDomain = '';
510
+ }
414
511
  logger.debug(`[AuthManager] Login detection method: ${successPattern
415
512
  ? `URL pattern: ${successPattern}`
416
513
  : successSelector
417
514
  ? `CSS selector: ${successSelector}`
418
- : 'auto-detect (looking for logout button, user menu, or URL change)'}`);
515
+ : `auto-detect (target domain: ${targetDomain})`}`);
419
516
  // If we have specific success criteria, wait for them
420
517
  if (successPattern) {
421
518
  // Validate the regex pattern to prevent ReDoS attacks
@@ -450,53 +547,90 @@ export class AuthManager {
450
547
  // Default: wait for navigation away from login page or for page to show logged-in state
451
548
  // Poll for changes that indicate successful login
452
549
  logger.info(`[AuthManager] Using auto-detection for login success...`);
550
+ logger.info(`[AuthManager] Target domain: ${targetDomain}`);
453
551
  logger.info(`[AuthManager] The browser will stay open until you login or ${timeoutSecs} seconds pass.`);
454
552
  let lastLogTime = 0;
455
553
  // Track the initial URL to detect navigation
456
554
  const initialUrl = page.url();
457
555
  let hasNavigatedAway = false;
458
556
  let wasOnLoginPage = false;
557
+ const visitedDomains = new Set();
558
+ // Enhanced login page URL pattern including common IdPs
559
+ const loginPagePattern = /login|signin|sign-in|auth|sso|oauth|session|okta|oktapreview|auth0|onelogin|pingone|pingidentity|pingfederate|duosecurity|adfs|saml|idp/i;
459
560
  while (Date.now() - startTime < timeoutMs) {
460
561
  try {
461
562
  const currentUrl = page.url();
462
563
  const elapsed = Math.round((Date.now() - startTime) / 1000);
564
+ // Extract current domain
565
+ let currentDomain;
566
+ try {
567
+ currentDomain = new URL(currentUrl).hostname.toLowerCase();
568
+ }
569
+ catch {
570
+ currentDomain = '';
571
+ }
572
+ // Track visited domains for debugging
573
+ if (currentDomain && !visitedDomains.has(currentDomain)) {
574
+ visitedDomains.add(currentDomain);
575
+ logger.debug(`[AuthManager] Visited new domain: ${currentDomain}`);
576
+ }
463
577
  // Log status every 10 seconds
464
578
  if (elapsed - lastLogTime >= 10) {
465
579
  logger.info(`[AuthManager] Still waiting for login... (${elapsed}s elapsed, current URL: ${currentUrl})`);
466
580
  lastLogTime = elapsed;
467
581
  }
468
- // Check if we're on a login-like page
469
- const isLoginPage = /login|signin|sign-in|auth|sso|oauth|session/i.test(currentUrl);
582
+ // Check if we're on a login-like page (URL-based detection)
583
+ const isLoginPageUrl = loginPagePattern.test(currentUrl);
584
+ // Check if we're on a known identity provider domain
585
+ const isIdpDomain = /okta|auth0|onelogin|pingidentity|duosecurity|microsoftonline|accounts\.google/i.test(currentDomain);
586
+ const isLoginPage = isLoginPageUrl || isIdpDomain;
470
587
  // Track if we've been to a login page (to know when we've successfully logged in)
471
588
  if (isLoginPage) {
472
589
  wasOnLoginPage = true;
473
- logger.debug(`[AuthManager] Detected login page: ${currentUrl}`);
590
+ logger.debug(`[AuthManager] Detected login/IdP page: ${currentUrl}`);
474
591
  }
475
592
  // Track navigation away from initial URL
476
593
  if (currentUrl !== initialUrl && !hasNavigatedAway) {
477
594
  hasNavigatedAway = true;
478
595
  logger.debug(`[AuthManager] Navigation detected: ${initialUrl} → ${currentUrl}`);
479
596
  }
597
+ // Check if we've returned to the target domain after visiting login pages
598
+ const isBackAtTargetDomain = targetDomain && (currentDomain === targetDomain || currentDomain.endsWith('.' + targetDomain));
480
599
  // Check for common logged-in indicators
481
600
  const hasLogoutButton = (await page.locator('text=/log\\s*out|sign\\s*out/i').count()) > 0;
482
601
  const hasUserMenu = (await page.locator('[class*="user"], [class*="avatar"], [class*="profile"]').count()) > 0;
483
- // Only consider login successful if:
484
- // 1. We're not on a login page, AND
485
- // 2. We have logged-in indicators OR we were on a login page and navigated away
602
+ // Success condition 1: Found logout button or user menu (and not on login page)
486
603
  if (!isLoginPage && (hasLogoutButton || hasUserMenu)) {
487
604
  logger.info(`[AuthManager] ✓ Login indicators found (logout button or user menu)`);
488
605
  await page.waitForTimeout(1000);
489
606
  return true;
490
607
  }
491
- // For GitHub Pages: only consider successful if we were on login page and came back
492
- if (currentUrl.includes('github.io') && wasOnLoginPage && !isLoginPage) {
493
- // We were redirected to login and now we're back on the github.io page
608
+ // Success condition 2: Returned to target domain after visiting login page(s)
609
+ // This handles multi-step OAuth flows (GitHub Pages GitHub → Okta → back to GitHub Pages)
610
+ if (isBackAtTargetDomain && wasOnLoginPage && !isLoginPage) {
611
+ // We were redirected to login/IdP and now we're back on the target domain
494
612
  const bodyText = (await page.locator('body').textContent()) || '';
495
613
  // Make sure it's not an error page
496
614
  if (bodyText.length > 100 && !bodyText.includes('404') && !bodyText.includes('not found')) {
497
- logger.info(`[AuthManager] ✓ Returned to GitHub Pages after login`);
498
- await page.waitForTimeout(1000);
499
- return true;
615
+ logger.info(`[AuthManager] ✓ Returned to target domain (${currentDomain}) after login. Visited ${visitedDomains.size} domains during auth flow.`);
616
+ // Wait a bit longer for any post-login redirects to settle
617
+ await page.waitForTimeout(2000);
618
+ // Double-check we're still on target domain after waiting
619
+ const finalUrl = page.url();
620
+ let finalDomain;
621
+ try {
622
+ finalDomain = new URL(finalUrl).hostname.toLowerCase();
623
+ }
624
+ catch {
625
+ finalDomain = '';
626
+ }
627
+ if (finalDomain === targetDomain || finalDomain.endsWith('.' + targetDomain)) {
628
+ logger.info(`[AuthManager] ✓ Confirmed on target domain: ${finalUrl}`);
629
+ return true;
630
+ }
631
+ else {
632
+ logger.debug(`[AuthManager] Redirected away from target domain after waiting, continuing...`);
633
+ }
500
634
  }
501
635
  }
502
636
  // Wait a bit before checking again
@@ -509,6 +643,7 @@ export class AuthManager {
509
643
  }
510
644
  }
511
645
  logger.warn(`[AuthManager] Login detection timed out after ${timeoutSecs} seconds`);
646
+ logger.debug(`[AuthManager] Visited ${visitedDomains.size} domains during auth flow`);
512
647
  return false;
513
648
  }
514
649
  /**