@cosmocoder/mcp-web-docs 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -18
- package/build/crawler/auth.d.ts +17 -6
- package/build/crawler/auth.js +166 -31
- package/build/crawler/auth.js.map +1 -1
- package/build/crawler/auth.test.js +197 -33
- package/build/crawler/auth.test.js.map +1 -1
- package/build/index.js +189 -22
- package/build/index.js.map +1 -1
- package/build/index.test.js +232 -1
- package/build/index.test.js.map +1 -1
- package/build/storage/storage.d.ts +45 -0
- package/build/storage/storage.js +282 -8
- package/build/storage/storage.js.map +1 -1
- package/build/storage/storage.test.js +254 -0
- package/build/storage/storage.test.js.map +1 -1
- package/build/types.d.ts +17 -0
- package/build/util/security.d.ts +10 -0
- package/build/util/security.js +25 -0
- package/build/util/security.js.map +1 -1
- package/build/util/security.test.js +18 -0
- package/build/util/security.test.js.map +1 -1
- package/package.json +11 -2
- package/build/crawler/cheerio.d.ts +0 -11
- package/build/crawler/cheerio.js +0 -134
- package/build/crawler/cheerio.js.map +0 -1
- package/build/crawler/chromium.d.ts +0 -21
- package/build/crawler/chromium.js +0 -596
- package/build/crawler/chromium.js.map +0 -1
- package/build/crawler/default.d.ts +0 -11
- package/build/crawler/default.js +0 -138
- package/build/crawler/default.js.map +0 -1
- package/build/crawler/factory.d.ts +0 -6
- package/build/crawler/factory.js +0 -83
- package/build/crawler/factory.js.map +0 -1
- package/build/crawler/puppeteer.d.ts +0 -16
- package/build/crawler/puppeteer.js +0 -191
- package/build/crawler/puppeteer.js.map +0 -1
- package/build/embeddings/openai.d.ts +0 -8
- package/build/embeddings/openai.js +0 -56
- package/build/embeddings/openai.js.map +0 -1
- package/build/rag/cache.d.ts +0 -10
- package/build/rag/cache.js +0 -10
- package/build/rag/cache.js.map +0 -1
- package/build/rag/code-generator.d.ts +0 -11
- package/build/rag/code-generator.js +0 -30
- package/build/rag/code-generator.js.map +0 -1
- package/build/rag/context-assembler.d.ts +0 -23
- package/build/rag/context-assembler.js +0 -113
- package/build/rag/context-assembler.js.map +0 -1
- package/build/rag/docs-search.d.ts +0 -55
- package/build/rag/docs-search.js +0 -380
- package/build/rag/docs-search.js.map +0 -1
- package/build/rag/pipeline.d.ts +0 -26
- package/build/rag/pipeline.js +0 -91
- package/build/rag/pipeline.js.map +0 -1
- package/build/rag/query-processor.d.ts +0 -14
- package/build/rag/query-processor.js +0 -57
- package/build/rag/query-processor.js.map +0 -1
- package/build/rag/reranker.d.ts +0 -55
- package/build/rag/reranker.js +0 -210
- package/build/rag/reranker.js.map +0 -1
- package/build/rag/response-generator.d.ts +0 -20
- package/build/rag/response-generator.js +0 -101
- package/build/rag/response-generator.js.map +0 -1
- package/build/rag/retriever.d.ts +0 -19
- package/build/rag/retriever.js +0 -111
- package/build/rag/retriever.js.map +0 -1
- package/build/rag/validator.d.ts +0 -22
- package/build/rag/validator.js +0 -128
- package/build/rag/validator.js.map +0 -1
- package/build/rag/version-manager.d.ts +0 -23
- package/build/rag/version-manager.js +0 -98
- package/build/rag/version-manager.js.map +0 -1
- package/build/types/rag.d.ts +0 -27
- package/build/types/rag.js +0 -2
- package/build/types/rag.js.map +0 -1
- package/build/util/content-utils.d.ts +0 -31
- package/build/util/content-utils.js +0 -120
- package/build/util/content-utils.js.map +0 -1
- package/build/util/content.d.ts +0 -1
- package/build/util/content.js +0 -16
- package/build/util/content.js.map +0 -1
- package/build/util/site-detector.d.ts +0 -22
- package/build/util/site-detector.js +0 -42
- package/build/util/site-detector.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# MCP Web Docs
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@cosmocoder/mcp-web-docs)
|
|
4
|
+
[](https://www.npmjs.com/package/@cosmocoder/mcp-web-docs)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://nodejs.org/)
|
|
7
|
+
[](https://github.com/cosmocoder/mcp-web-docs/actions/workflows/release.yml)
|
|
8
|
+
|
|
3
9
|
**Index Any Documentation. Search Locally. Stay Private.**
|
|
4
10
|
|
|
5
11
|
A self-hosted Model Context Protocol (MCP) server that crawls, indexes, and searches documentation from *any* website. Unlike remote MCP servers limited to GitHub repos or pre-indexed libraries, web-docs gives you full control over what gets indexed — including private documentation behind authentication.
|
|
@@ -32,6 +38,7 @@ AI assistants struggle with documentation:
|
|
|
32
38
|
|
|
33
39
|
- **🌐 Universal Crawler** - Works with any documentation site, not just GitHub
|
|
34
40
|
- **🔍 Hybrid Search** - Combines full-text search (FTS) with semantic vector search
|
|
41
|
+
- **🏷️ Tags & Categories** - Organize docs with tags and filter searches by project, team, or category
|
|
35
42
|
- **🔐 Authentication Support** - Crawl private/protected docs with interactive browser login (auto-detects your default browser)
|
|
36
43
|
- **📊 Smart Extraction** - Automatically extracts code blocks, props tables, and structured content
|
|
37
44
|
- **⚡ Local Embeddings** - Uses FastEmbed for fast, private embedding generation (no API keys)
|
|
@@ -46,11 +53,21 @@ AI assistants struggle with documentation:
|
|
|
46
53
|
|
|
47
54
|
- Node.js >= 22.19.0
|
|
48
55
|
|
|
49
|
-
###
|
|
56
|
+
### Option 1: Install from NPM (Recommended)
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
npm install -g @cosmocoder/mcp-web-docs
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Option 2: Run with npx
|
|
63
|
+
|
|
64
|
+
No installation required - just configure your MCP client to use npx (see below).
|
|
65
|
+
|
|
66
|
+
### Option 3: Build from Source
|
|
50
67
|
|
|
51
68
|
```bash
|
|
52
69
|
# Clone the repository
|
|
53
|
-
git clone https://github.com/
|
|
70
|
+
git clone https://github.com/cosmocoder/mcp-web-docs.git
|
|
54
71
|
cd mcp-web-docs
|
|
55
72
|
|
|
56
73
|
# Install dependencies (automatically installs Playwright browsers)
|
|
@@ -67,6 +84,30 @@ npm run build
|
|
|
67
84
|
|
|
68
85
|
Add to your Cursor MCP settings (`~/.cursor/mcp.json`):
|
|
69
86
|
|
|
87
|
+
**Using npx (no install required):**
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"mcpServers": {
|
|
91
|
+
"web-docs": {
|
|
92
|
+
"command": "npx",
|
|
93
|
+
"args": ["-y", "@cosmocoder/mcp-web-docs"]
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
**Using global install:**
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"mcpServers": {
|
|
103
|
+
"web-docs": {
|
|
104
|
+
"command": "mcp-web-docs"
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**Using local build:**
|
|
70
111
|
```json
|
|
71
112
|
{
|
|
72
113
|
"mcpServers": {
|
|
@@ -85,12 +126,24 @@ Add to your Cursor MCP settings (`~/.cursor/mcp.json`):
|
|
|
85
126
|
|
|
86
127
|
Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
|
|
87
128
|
|
|
129
|
+
**Using npx:**
|
|
88
130
|
```json
|
|
89
131
|
{
|
|
90
132
|
"mcpServers": {
|
|
91
133
|
"web-docs": {
|
|
92
|
-
"command": "
|
|
93
|
-
"args": ["/
|
|
134
|
+
"command": "npx",
|
|
135
|
+
"args": ["-y", "@cosmocoder/mcp-web-docs"]
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Using global install:**
|
|
142
|
+
```json
|
|
143
|
+
{
|
|
144
|
+
"mcpServers": {
|
|
145
|
+
"web-docs": {
|
|
146
|
+
"command": "mcp-web-docs"
|
|
94
147
|
}
|
|
95
148
|
}
|
|
96
149
|
}
|
|
@@ -103,12 +156,24 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_
|
|
|
103
156
|
|
|
104
157
|
Add to `.vscode/mcp.json` in your workspace:
|
|
105
158
|
|
|
159
|
+
**Using npx:**
|
|
106
160
|
```json
|
|
107
161
|
{
|
|
108
162
|
"servers": {
|
|
109
163
|
"web-docs": {
|
|
110
|
-
"command": "
|
|
111
|
-
"args": ["/
|
|
164
|
+
"command": "npx",
|
|
165
|
+
"args": ["-y", "@cosmocoder/mcp-web-docs"]
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
**Using global install:**
|
|
172
|
+
```json
|
|
173
|
+
{
|
|
174
|
+
"servers": {
|
|
175
|
+
"web-docs": {
|
|
176
|
+
"command": "mcp-web-docs"
|
|
112
177
|
}
|
|
113
178
|
}
|
|
114
179
|
}
|
|
@@ -121,12 +186,24 @@ Add to `.vscode/mcp.json` in your workspace:
|
|
|
121
186
|
|
|
122
187
|
Add to `~/.codeium/windsurf/mcp_config.json`:
|
|
123
188
|
|
|
189
|
+
**Using npx:**
|
|
124
190
|
```json
|
|
125
191
|
{
|
|
126
192
|
"mcpServers": {
|
|
127
193
|
"web-docs": {
|
|
128
|
-
"command": "
|
|
129
|
-
"args": ["/
|
|
194
|
+
"command": "npx",
|
|
195
|
+
"args": ["-y", "@cosmocoder/mcp-web-docs"]
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**Using global install:**
|
|
202
|
+
```json
|
|
203
|
+
{
|
|
204
|
+
"mcpServers": {
|
|
205
|
+
"web-docs": {
|
|
206
|
+
"command": "mcp-web-docs"
|
|
130
207
|
}
|
|
131
208
|
}
|
|
132
209
|
}
|
|
@@ -139,12 +216,26 @@ Add to `~/.codeium/windsurf/mcp_config.json`:
|
|
|
139
216
|
|
|
140
217
|
Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`:
|
|
141
218
|
|
|
219
|
+
**Using npx:**
|
|
142
220
|
```json
|
|
143
221
|
{
|
|
144
222
|
"mcpServers": {
|
|
145
223
|
"web-docs": {
|
|
146
|
-
"command": "
|
|
147
|
-
"args": ["/
|
|
224
|
+
"command": "npx",
|
|
225
|
+
"args": ["-y", "@cosmocoder/mcp-web-docs"],
|
|
226
|
+
"disabled": false,
|
|
227
|
+
"autoApprove": []
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
**Using global install:**
|
|
234
|
+
```json
|
|
235
|
+
{
|
|
236
|
+
"mcpServers": {
|
|
237
|
+
"web-docs": {
|
|
238
|
+
"command": "mcp-web-docs",
|
|
148
239
|
"disabled": false,
|
|
149
240
|
"autoApprove": []
|
|
150
241
|
}
|
|
@@ -154,6 +245,38 @@ Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude
|
|
|
154
245
|
|
|
155
246
|
</details>
|
|
156
247
|
|
|
248
|
+
<details>
|
|
249
|
+
<summary><b>RooCode</b></summary>
|
|
250
|
+
|
|
251
|
+
**Global configuration:** Open RooCode → Click MCP icon → "Edit Global MCP"
|
|
252
|
+
|
|
253
|
+
**Project-level configuration:** Create `.roo/mcp.json` at your project root
|
|
254
|
+
|
|
255
|
+
**Using npx:**
|
|
256
|
+
```json
|
|
257
|
+
{
|
|
258
|
+
"mcpServers": {
|
|
259
|
+
"web-docs": {
|
|
260
|
+
"command": "npx",
|
|
261
|
+
"args": ["-y", "@cosmocoder/mcp-web-docs"]
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
**Using global install:**
|
|
268
|
+
```json
|
|
269
|
+
{
|
|
270
|
+
"mcpServers": {
|
|
271
|
+
"web-docs": {
|
|
272
|
+
"command": "mcp-web-docs"
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
</details>
|
|
279
|
+
|
|
157
280
|
---
|
|
158
281
|
|
|
159
282
|
## ⚡ Quick Start
|
|
@@ -194,9 +317,10 @@ Add a new documentation site for indexing.
|
|
|
194
317
|
```typescript
|
|
195
318
|
add_documentation({
|
|
196
319
|
url: "https://docs.example.com/",
|
|
197
|
-
title: "Example Docs",
|
|
198
|
-
id: "example-docs",
|
|
199
|
-
|
|
320
|
+
title: "Example Docs", // Optional
|
|
321
|
+
id: "example-docs", // Optional custom ID
|
|
322
|
+
tags: ["frontend", "mycompany"], // Optional tags for categorization
|
|
323
|
+
auth: { // Optional authentication
|
|
200
324
|
requiresAuth: true,
|
|
201
325
|
// browser auto-detected from OS settings if omitted
|
|
202
326
|
loginTimeoutSecs: 300
|
|
@@ -211,8 +335,9 @@ Search through indexed documentation using hybrid search (FTS + semantic).
|
|
|
211
335
|
```typescript
|
|
212
336
|
search_documentation({
|
|
213
337
|
query: "how to configure authentication",
|
|
214
|
-
url: "https://docs.example.com/",
|
|
215
|
-
|
|
338
|
+
url: "https://docs.example.com/", // Optional: filter to specific site
|
|
339
|
+
tags: ["frontend", "mycompany"], // Optional: filter by tags
|
|
340
|
+
limit: 10 // Optional: max results
|
|
216
341
|
})
|
|
217
342
|
```
|
|
218
343
|
|
|
@@ -230,7 +355,22 @@ authenticate({
|
|
|
230
355
|
|
|
231
356
|
### `list_documentation`
|
|
232
357
|
|
|
233
|
-
List all indexed documentation sites.
|
|
358
|
+
List all indexed documentation sites with their metadata including tags.
|
|
359
|
+
|
|
360
|
+
### `set_tags`
|
|
361
|
+
|
|
362
|
+
Set or update tags for a documentation site. Tags help categorize and filter documentation.
|
|
363
|
+
|
|
364
|
+
```typescript
|
|
365
|
+
set_tags({
|
|
366
|
+
url: "https://docs.example.com/",
|
|
367
|
+
tags: ["frontend", "react", "mycompany"] // Replaces existing tags
|
|
368
|
+
})
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### `list_tags`
|
|
372
|
+
|
|
373
|
+
List all available tags with usage counts. Useful to see what tags exist across your indexed docs.
|
|
234
374
|
|
|
235
375
|
### `reindex_documentation`
|
|
236
376
|
|
|
@@ -285,15 +425,50 @@ API references, configuration, or library usage.
|
|
|
285
425
|
|
|
286
426
|
### Scoping Searches
|
|
287
427
|
|
|
288
|
-
If you have multiple sites indexed, filter by URL
|
|
428
|
+
If you have multiple sites indexed, filter by URL or tags:
|
|
289
429
|
|
|
290
430
|
```typescript
|
|
431
|
+
// Filter by specific site URL
|
|
291
432
|
search_documentation({
|
|
292
433
|
query: "routing",
|
|
293
|
-
url: "https://nextjs.org/docs/"
|
|
434
|
+
url: "https://nextjs.org/docs/"
|
|
435
|
+
})
|
|
436
|
+
|
|
437
|
+
// Filter by tags (searches all docs with matching tags)
|
|
438
|
+
search_documentation({
|
|
439
|
+
query: "Button component",
|
|
440
|
+
tags: ["frontend", "mycompany"] // Only docs tagged with BOTH tags
|
|
441
|
+
})
|
|
442
|
+
```
|
|
443
|
+
|
|
444
|
+
### Organizing with Tags
|
|
445
|
+
|
|
446
|
+
Tags help organize documentation when you have multiple related sites. Add tags when indexing:
|
|
447
|
+
|
|
448
|
+
```typescript
|
|
449
|
+
// Index frontend package docs
|
|
450
|
+
add_documentation({
|
|
451
|
+
url: "https://docs.mycompany.com/ui-components/",
|
|
452
|
+
tags: ["frontend", "mycompany", "react"]
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
// Index backend API docs
|
|
456
|
+
add_documentation({
|
|
457
|
+
url: "https://docs.mycompany.com/api/",
|
|
458
|
+
tags: ["backend", "mycompany", "api"]
|
|
294
459
|
})
|
|
295
460
|
```
|
|
296
461
|
|
|
462
|
+
Later, search across all frontend docs:
|
|
463
|
+
```typescript
|
|
464
|
+
search_documentation({
|
|
465
|
+
query: "authentication",
|
|
466
|
+
tags: ["frontend"] // Searches all frontend-tagged docs
|
|
467
|
+
})
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
You can also add tags to existing documentation with `set_tags`.
|
|
471
|
+
|
|
297
472
|
---
|
|
298
473
|
|
|
299
474
|
## 🚨 Troubleshooting
|
package/build/crawler/auth.d.ts
CHANGED
|
@@ -55,13 +55,21 @@ export declare class AuthManager {
|
|
|
55
55
|
*/
|
|
56
56
|
clearSession(url: string): Promise<void>;
|
|
57
57
|
/**
|
|
58
|
-
*
|
|
59
|
-
* This
|
|
60
|
-
*
|
|
61
|
-
*
|
|
58
|
+
* Check if stored cookies have expired based on their expiration timestamps.
|
|
59
|
+
* This is a fast check that doesn't require launching a browser.
|
|
60
|
+
*
|
|
61
|
+
* @param storageStateJson - The decrypted storage state JSON
|
|
62
|
+
* @param domain - The domain to check cookies for
|
|
63
|
+
* @returns Object with expiration status and details
|
|
64
|
+
*/
|
|
65
|
+
private checkCookieExpiration;
|
|
66
|
+
/**
|
|
67
|
+
* Validate that a stored session is still valid.
|
|
68
|
+
* First checks cookie expiration timestamps (fast, no network).
|
|
69
|
+
* Falls back to browser-based validation for edge cases.
|
|
62
70
|
*
|
|
63
71
|
* @param url - The protected URL to validate against
|
|
64
|
-
* @param browserType - Browser type to use for validation
|
|
72
|
+
* @param browserType - Browser type to use for browser-based validation (if needed)
|
|
65
73
|
* @returns Validation result indicating if session is still valid
|
|
66
74
|
*/
|
|
67
75
|
validateSession(url: string, browserType?: BrowserType): Promise<{
|
|
@@ -111,7 +119,10 @@ export declare class AuthManager {
|
|
|
111
119
|
* Detection methods (in order of priority):
|
|
112
120
|
* 1. If successPattern is provided: wait for URL to match the regex
|
|
113
121
|
* 2. If successSelector is provided: wait for the CSS selector to appear
|
|
114
|
-
* 3. Default: poll for common login success indicators
|
|
122
|
+
* 3. Default: poll for common login success indicators or return to target domain
|
|
123
|
+
*
|
|
124
|
+
* For multi-step OAuth flows (e.g., GitHub Pages → GitHub Login → Okta → back),
|
|
125
|
+
* the method tracks when the user returns to the original target domain.
|
|
115
126
|
*/
|
|
116
127
|
private waitForLogin;
|
|
117
128
|
/**
|
package/build/crawler/auth.js
CHANGED
|
@@ -164,13 +164,72 @@ export class AuthManager {
|
|
|
164
164
|
}
|
|
165
165
|
}
|
|
166
166
|
/**
|
|
167
|
-
*
|
|
168
|
-
* This
|
|
169
|
-
*
|
|
170
|
-
*
|
|
167
|
+
* Check if stored cookies have expired based on their expiration timestamps.
|
|
168
|
+
* This is a fast check that doesn't require launching a browser.
|
|
169
|
+
*
|
|
170
|
+
* @param storageStateJson - The decrypted storage state JSON
|
|
171
|
+
* @param domain - The domain to check cookies for
|
|
172
|
+
* @returns Object with expiration status and details
|
|
173
|
+
*/
|
|
174
|
+
checkCookieExpiration(storageStateJson, domain) {
|
|
175
|
+
try {
|
|
176
|
+
const storageState = safeJsonParse(storageStateJson, StorageStateSchema);
|
|
177
|
+
const cookies = storageState.cookies || [];
|
|
178
|
+
const now = Date.now() / 1000; // Convert to seconds (cookie expires is in seconds)
|
|
179
|
+
// Filter cookies relevant to this domain
|
|
180
|
+
const domainLower = domain.toLowerCase();
|
|
181
|
+
const relevantCookies = cookies.filter((cookie) => {
|
|
182
|
+
const cookieDomain = cookie.domain.toLowerCase().replace(/^\./, ''); // Remove leading dot
|
|
183
|
+
return domainLower === cookieDomain || domainLower.endsWith('.' + cookieDomain);
|
|
184
|
+
});
|
|
185
|
+
if (relevantCookies.length === 0) {
|
|
186
|
+
// No domain-specific cookies, check all cookies
|
|
187
|
+
// This handles cases where auth cookies are on a different domain (e.g., github.com for github.io)
|
|
188
|
+
logger.debug(`[AuthManager] No cookies found for ${domain}, checking all ${cookies.length} cookies`);
|
|
189
|
+
}
|
|
190
|
+
const cookiesToCheck = relevantCookies.length > 0 ? relevantCookies : cookies;
|
|
191
|
+
let expiredCount = 0;
|
|
192
|
+
const details = [];
|
|
193
|
+
for (const cookie of cookiesToCheck) {
|
|
194
|
+
// Skip cookies without expiration (session cookies)
|
|
195
|
+
if (cookie.expires === undefined || cookie.expires === -1 || cookie.expires === 0) {
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (cookie.expires < now) {
|
|
199
|
+
expiredCount++;
|
|
200
|
+
const expiredAgo = Math.round((now - cookie.expires) / 3600); // Hours ago
|
|
201
|
+
details.push(`Cookie "${cookie.name}" expired ${expiredAgo}h ago`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// Consider session expired if ANY auth-related cookies are expired
|
|
205
|
+
// Common auth cookie names
|
|
206
|
+
const authCookiePatterns = /session|auth|token|jwt|sid|login|user|identity|sso|saml|oauth/i;
|
|
207
|
+
const expiredAuthCookies = cookiesToCheck.filter((cookie) => {
|
|
208
|
+
if (!cookie.expires || cookie.expires === -1 || cookie.expires === 0)
|
|
209
|
+
return false;
|
|
210
|
+
return cookie.expires < now && authCookiePatterns.test(cookie.name);
|
|
211
|
+
});
|
|
212
|
+
return {
|
|
213
|
+
hasExpiredCookies: expiredCount > 0,
|
|
214
|
+
expiredCount,
|
|
215
|
+
totalCount: cookiesToCheck.length,
|
|
216
|
+
details: expiredAuthCookies.length > 0
|
|
217
|
+
? details.filter((d) => expiredAuthCookies.some((c) => d.includes(c.name)))
|
|
218
|
+
: details.slice(0, 3), // Limit details
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
catch (error) {
|
|
222
|
+
logger.debug(`[AuthManager] Error checking cookie expiration:`, error);
|
|
223
|
+
return { hasExpiredCookies: false, expiredCount: 0, totalCount: 0, details: [] };
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Validate that a stored session is still valid.
|
|
228
|
+
* First checks cookie expiration timestamps (fast, no network).
|
|
229
|
+
* Falls back to browser-based validation for edge cases.
|
|
171
230
|
*
|
|
172
231
|
* @param url - The protected URL to validate against
|
|
173
|
-
* @param browserType - Browser type to use for validation
|
|
232
|
+
* @param browserType - Browser type to use for browser-based validation (if needed)
|
|
174
233
|
* @returns Validation result indicating if session is still valid
|
|
175
234
|
*/
|
|
176
235
|
async validateSession(url, browserType = 'chromium') {
|
|
@@ -181,6 +240,25 @@ export class AuthManager {
|
|
|
181
240
|
logger.info(`[AuthManager] No stored session found for ${domain}`);
|
|
182
241
|
return { isValid: false, reason: 'No stored session found' };
|
|
183
242
|
}
|
|
243
|
+
// Fast check: Look at cookie expiration timestamps
|
|
244
|
+
const cookieCheck = this.checkCookieExpiration(storageStateJson, domain);
|
|
245
|
+
logger.debug(`[AuthManager] Cookie check: ${cookieCheck.expiredCount}/${cookieCheck.totalCount} expired`);
|
|
246
|
+
if (cookieCheck.hasExpiredCookies) {
|
|
247
|
+
const reason = `Session cookies have expired (${cookieCheck.expiredCount} expired). ${cookieCheck.details.join('; ')}`;
|
|
248
|
+
logger.warn(`[AuthManager] Session expired based on cookie timestamps: ${reason}`);
|
|
249
|
+
return {
|
|
250
|
+
isValid: false,
|
|
251
|
+
reason,
|
|
252
|
+
loginDetection: {
|
|
253
|
+
isLoginPage: false,
|
|
254
|
+
confidence: 1.0,
|
|
255
|
+
reasons: [`Cookie expiration check: ${cookieCheck.details.join(', ')}`],
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
// If no cookies have explicit expiration, or all have valid timestamps,
|
|
260
|
+
// do a quick browser-based check to be sure
|
|
261
|
+
logger.debug(`[AuthManager] Cookie timestamps look valid, performing browser-based validation...`);
|
|
184
262
|
let browser = null;
|
|
185
263
|
let context = null;
|
|
186
264
|
try {
|
|
@@ -200,17 +278,26 @@ export class AuthManager {
|
|
|
200
278
|
waitUntil: 'domcontentloaded',
|
|
201
279
|
timeout: 30000,
|
|
202
280
|
});
|
|
281
|
+
// Wait for potential JavaScript redirects
|
|
282
|
+
await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => { });
|
|
283
|
+
// Additional wait for JS-based auth redirects (GitHub Pages, etc.)
|
|
284
|
+
await page.waitForTimeout(2000);
|
|
203
285
|
const finalUrl = page.url();
|
|
204
286
|
logger.debug(`[AuthManager] Final URL after navigation: ${finalUrl}`);
|
|
205
|
-
// Check 1: Were we redirected to a
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
finalUrl
|
|
212
|
-
|
|
213
|
-
|
|
287
|
+
// Check 1: Were we redirected to a different domain (likely auth)?
|
|
288
|
+
const finalDomain = new URL(finalUrl).hostname.toLowerCase();
|
|
289
|
+
const expectedDomain = domain.toLowerCase();
|
|
290
|
+
if (finalDomain !== expectedDomain && !finalDomain.endsWith('.' + expectedDomain)) {
|
|
291
|
+
// Redirected to a different domain - check if it's a login page
|
|
292
|
+
if (isLoginPageUrl(finalUrl)) {
|
|
293
|
+
logger.warn(`[AuthManager] Session appears expired - redirected to login page: ${finalUrl}`);
|
|
294
|
+
return {
|
|
295
|
+
isValid: false,
|
|
296
|
+
reason: `Redirected to login page on different domain (${finalDomain})`,
|
|
297
|
+
finalUrl,
|
|
298
|
+
loginDetection: { isLoginPage: true, confidence: 1.0, reasons: ['Redirected to external login URL'] },
|
|
299
|
+
};
|
|
300
|
+
}
|
|
214
301
|
}
|
|
215
302
|
// Check 2: Did we get an auth-related HTTP status?
|
|
216
303
|
const status = response?.status();
|
|
@@ -223,8 +310,6 @@ export class AuthManager {
|
|
|
223
310
|
};
|
|
224
311
|
}
|
|
225
312
|
// Check 3: Does the page content look like a login page?
|
|
226
|
-
// Wait for content to load
|
|
227
|
-
await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => { });
|
|
228
313
|
const pageContent = await page.content();
|
|
229
314
|
const bodyText = await page.evaluate(() => document.body?.textContent || '');
|
|
230
315
|
const loginDetection = detectLoginPage(bodyText + pageContent, finalUrl);
|
|
@@ -378,6 +463,7 @@ export class AuthManager {
|
|
|
378
463
|
logger.info(`[AuthManager] ⏳ You have ${loginTimeoutSecs} seconds to complete login.`);
|
|
379
464
|
// Wait for successful login
|
|
380
465
|
const loginSuccess = await this.waitForLogin(page, {
|
|
466
|
+
targetUrl: url, // The original target URL to return to
|
|
381
467
|
successPattern: loginSuccessPattern,
|
|
382
468
|
successSelector: loginSuccessSelector,
|
|
383
469
|
timeoutSecs: loginTimeoutSecs,
|
|
@@ -405,17 +491,28 @@ export class AuthManager {
|
|
|
405
491
|
* Detection methods (in order of priority):
|
|
406
492
|
* 1. If successPattern is provided: wait for URL to match the regex
|
|
407
493
|
* 2. If successSelector is provided: wait for the CSS selector to appear
|
|
408
|
-
* 3. Default: poll for common login success indicators
|
|
494
|
+
* 3. Default: poll for common login success indicators or return to target domain
|
|
495
|
+
*
|
|
496
|
+
* For multi-step OAuth flows (e.g., GitHub Pages → GitHub Login → Okta → back),
|
|
497
|
+
* the method tracks when the user returns to the original target domain.
|
|
409
498
|
*/
|
|
410
499
|
async waitForLogin(page, options) {
|
|
411
|
-
const { successPattern, successSelector, timeoutSecs } = options;
|
|
500
|
+
const { targetUrl, successPattern, successSelector, timeoutSecs } = options;
|
|
412
501
|
const startTime = Date.now();
|
|
413
502
|
const timeoutMs = timeoutSecs * 1000;
|
|
503
|
+
// Extract target domain for multi-step OAuth flow detection
|
|
504
|
+
let targetDomain;
|
|
505
|
+
try {
|
|
506
|
+
targetDomain = new URL(targetUrl).hostname.toLowerCase();
|
|
507
|
+
}
|
|
508
|
+
catch {
|
|
509
|
+
targetDomain = '';
|
|
510
|
+
}
|
|
414
511
|
logger.debug(`[AuthManager] Login detection method: ${successPattern
|
|
415
512
|
? `URL pattern: ${successPattern}`
|
|
416
513
|
: successSelector
|
|
417
514
|
? `CSS selector: ${successSelector}`
|
|
418
|
-
:
|
|
515
|
+
: `auto-detect (target domain: ${targetDomain})`}`);
|
|
419
516
|
// If we have specific success criteria, wait for them
|
|
420
517
|
if (successPattern) {
|
|
421
518
|
// Validate the regex pattern to prevent ReDoS attacks
|
|
@@ -450,53 +547,90 @@ export class AuthManager {
|
|
|
450
547
|
// Default: wait for navigation away from login page or for page to show logged-in state
|
|
451
548
|
// Poll for changes that indicate successful login
|
|
452
549
|
logger.info(`[AuthManager] Using auto-detection for login success...`);
|
|
550
|
+
logger.info(`[AuthManager] Target domain: ${targetDomain}`);
|
|
453
551
|
logger.info(`[AuthManager] The browser will stay open until you login or ${timeoutSecs} seconds pass.`);
|
|
454
552
|
let lastLogTime = 0;
|
|
455
553
|
// Track the initial URL to detect navigation
|
|
456
554
|
const initialUrl = page.url();
|
|
457
555
|
let hasNavigatedAway = false;
|
|
458
556
|
let wasOnLoginPage = false;
|
|
557
|
+
const visitedDomains = new Set();
|
|
558
|
+
// Enhanced login page URL pattern including common IdPs
|
|
559
|
+
const loginPagePattern = /login|signin|sign-in|auth|sso|oauth|session|okta|oktapreview|auth0|onelogin|pingone|pingidentity|pingfederate|duosecurity|adfs|saml|idp/i;
|
|
459
560
|
while (Date.now() - startTime < timeoutMs) {
|
|
460
561
|
try {
|
|
461
562
|
const currentUrl = page.url();
|
|
462
563
|
const elapsed = Math.round((Date.now() - startTime) / 1000);
|
|
564
|
+
// Extract current domain
|
|
565
|
+
let currentDomain;
|
|
566
|
+
try {
|
|
567
|
+
currentDomain = new URL(currentUrl).hostname.toLowerCase();
|
|
568
|
+
}
|
|
569
|
+
catch {
|
|
570
|
+
currentDomain = '';
|
|
571
|
+
}
|
|
572
|
+
// Track visited domains for debugging
|
|
573
|
+
if (currentDomain && !visitedDomains.has(currentDomain)) {
|
|
574
|
+
visitedDomains.add(currentDomain);
|
|
575
|
+
logger.debug(`[AuthManager] Visited new domain: ${currentDomain}`);
|
|
576
|
+
}
|
|
463
577
|
// Log status every 10 seconds
|
|
464
578
|
if (elapsed - lastLogTime >= 10) {
|
|
465
579
|
logger.info(`[AuthManager] Still waiting for login... (${elapsed}s elapsed, current URL: ${currentUrl})`);
|
|
466
580
|
lastLogTime = elapsed;
|
|
467
581
|
}
|
|
468
|
-
// Check if we're on a login-like page
|
|
469
|
-
const
|
|
582
|
+
// Check if we're on a login-like page (URL-based detection)
|
|
583
|
+
const isLoginPageUrl = loginPagePattern.test(currentUrl);
|
|
584
|
+
// Check if we're on a known identity provider domain
|
|
585
|
+
const isIdpDomain = /okta|auth0|onelogin|pingidentity|duosecurity|microsoftonline|accounts\.google/i.test(currentDomain);
|
|
586
|
+
const isLoginPage = isLoginPageUrl || isIdpDomain;
|
|
470
587
|
// Track if we've been to a login page (to know when we've successfully logged in)
|
|
471
588
|
if (isLoginPage) {
|
|
472
589
|
wasOnLoginPage = true;
|
|
473
|
-
logger.debug(`[AuthManager] Detected login page: ${currentUrl}`);
|
|
590
|
+
logger.debug(`[AuthManager] Detected login/IdP page: ${currentUrl}`);
|
|
474
591
|
}
|
|
475
592
|
// Track navigation away from initial URL
|
|
476
593
|
if (currentUrl !== initialUrl && !hasNavigatedAway) {
|
|
477
594
|
hasNavigatedAway = true;
|
|
478
595
|
logger.debug(`[AuthManager] Navigation detected: ${initialUrl} → ${currentUrl}`);
|
|
479
596
|
}
|
|
597
|
+
// Check if we've returned to the target domain after visiting login pages
|
|
598
|
+
const isBackAtTargetDomain = targetDomain && (currentDomain === targetDomain || currentDomain.endsWith('.' + targetDomain));
|
|
480
599
|
// Check for common logged-in indicators
|
|
481
600
|
const hasLogoutButton = (await page.locator('text=/log\\s*out|sign\\s*out/i').count()) > 0;
|
|
482
601
|
const hasUserMenu = (await page.locator('[class*="user"], [class*="avatar"], [class*="profile"]').count()) > 0;
|
|
483
|
-
//
|
|
484
|
-
// 1. We're not on a login page, AND
|
|
485
|
-
// 2. We have logged-in indicators OR we were on a login page and navigated away
|
|
602
|
+
// Success condition 1: Found logout button or user menu (and not on login page)
|
|
486
603
|
if (!isLoginPage && (hasLogoutButton || hasUserMenu)) {
|
|
487
604
|
logger.info(`[AuthManager] ✓ Login indicators found (logout button or user menu)`);
|
|
488
605
|
await page.waitForTimeout(1000);
|
|
489
606
|
return true;
|
|
490
607
|
}
|
|
491
|
-
//
|
|
492
|
-
|
|
493
|
-
|
|
608
|
+
// Success condition 2: Returned to target domain after visiting login page(s)
|
|
609
|
+
// This handles multi-step OAuth flows (GitHub Pages → GitHub → Okta → back to GitHub Pages)
|
|
610
|
+
if (isBackAtTargetDomain && wasOnLoginPage && !isLoginPage) {
|
|
611
|
+
// We were redirected to login/IdP and now we're back on the target domain
|
|
494
612
|
const bodyText = (await page.locator('body').textContent()) || '';
|
|
495
613
|
// Make sure it's not an error page
|
|
496
614
|
if (bodyText.length > 100 && !bodyText.includes('404') && !bodyText.includes('not found')) {
|
|
497
|
-
logger.info(`[AuthManager] ✓ Returned to
|
|
498
|
-
|
|
499
|
-
|
|
615
|
+
logger.info(`[AuthManager] ✓ Returned to target domain (${currentDomain}) after login. Visited ${visitedDomains.size} domains during auth flow.`);
|
|
616
|
+
// Wait a bit longer for any post-login redirects to settle
|
|
617
|
+
await page.waitForTimeout(2000);
|
|
618
|
+
// Double-check we're still on target domain after waiting
|
|
619
|
+
const finalUrl = page.url();
|
|
620
|
+
let finalDomain;
|
|
621
|
+
try {
|
|
622
|
+
finalDomain = new URL(finalUrl).hostname.toLowerCase();
|
|
623
|
+
}
|
|
624
|
+
catch {
|
|
625
|
+
finalDomain = '';
|
|
626
|
+
}
|
|
627
|
+
if (finalDomain === targetDomain || finalDomain.endsWith('.' + targetDomain)) {
|
|
628
|
+
logger.info(`[AuthManager] ✓ Confirmed on target domain: ${finalUrl}`);
|
|
629
|
+
return true;
|
|
630
|
+
}
|
|
631
|
+
else {
|
|
632
|
+
logger.debug(`[AuthManager] Redirected away from target domain after waiting, continuing...`);
|
|
633
|
+
}
|
|
500
634
|
}
|
|
501
635
|
}
|
|
502
636
|
// Wait a bit before checking again
|
|
@@ -509,6 +643,7 @@ export class AuthManager {
|
|
|
509
643
|
}
|
|
510
644
|
}
|
|
511
645
|
logger.warn(`[AuthManager] Login detection timed out after ${timeoutSecs} seconds`);
|
|
646
|
+
logger.debug(`[AuthManager] Visited ${visitedDomains.size} domains during auth flow`);
|
|
512
647
|
return false;
|
|
513
648
|
}
|
|
514
649
|
/**
|