@arabold/docs-mcp-server 1.20.0 → 1.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -0
- package/dist/{DocumentManagementService-BH02TJEe.js → DocumentManagementService-C1xAzouZ.js} +127 -18
- package/dist/DocumentManagementService-C1xAzouZ.js.map +1 -0
- package/dist/assets/main.css +1 -1
- package/dist/assets/main.js +47 -47
- package/dist/assets/main.js.map +1 -1
- package/dist/index.js +2017 -644
- package/dist/index.js.map +1 -1
- package/package.json +3 -1
- package/public/assets/main.css +1 -1
- package/public/assets/main.js +47 -47
- package/public/assets/main.js.map +1 -1
- package/dist/DocumentManagementService-BH02TJEe.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,9 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
1
2
|
import "dotenv/config";
|
|
2
|
-
import {
|
|
3
|
+
import { PostHog } from "posthog-node";
|
|
4
|
+
import crypto, { randomUUID } from "node:crypto";
|
|
5
|
+
import fs, { readFileSync, existsSync } from "node:fs";
|
|
3
6
|
import path from "node:path";
|
|
7
|
+
import envPaths from "env-paths";
|
|
8
|
+
import { Option, Command } from "commander";
|
|
4
9
|
import formBody from "@fastify/formbody";
|
|
5
10
|
import fastifyStatic from "@fastify/static";
|
|
6
11
|
import Fastify from "fastify";
|
|
12
|
+
import { ProxyOAuthServerProvider } from "@modelcontextprotocol/sdk/server/auth/providers/proxyProvider.js";
|
|
13
|
+
import { createRemoteJWKSet, jwtVerify } from "jose";
|
|
7
14
|
import { SSEServerTransport } from "@modelcontextprotocol/sdk/server/sse.js";
|
|
8
15
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
9
16
|
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
@@ -17,7 +24,7 @@ import { chromium } from "playwright";
|
|
|
17
24
|
import { gfm } from "@joplin/turndown-plugin-gfm";
|
|
18
25
|
import TurndownService from "turndown";
|
|
19
26
|
import iconv from "iconv-lite";
|
|
20
|
-
import fs from "node:fs/promises";
|
|
27
|
+
import fs$1 from "node:fs/promises";
|
|
21
28
|
import * as mime from "mime-types";
|
|
22
29
|
import axios from "axios";
|
|
23
30
|
import { HeaderGenerator } from "header-generator";
|
|
@@ -25,7 +32,6 @@ import { initTRPC } from "@trpc/server";
|
|
|
25
32
|
import { fastifyTRPCPlugin } from "@trpc/server/adapters/fastify";
|
|
26
33
|
import { z as z$1 } from "zod";
|
|
27
34
|
import { jsxs, jsx, Fragment } from "@kitajs/html/jsx-runtime";
|
|
28
|
-
import fs$1, { readFileSync, existsSync } from "node:fs";
|
|
29
35
|
import { unified } from "unified";
|
|
30
36
|
import remarkParse from "remark-parse";
|
|
31
37
|
import remarkGfm from "remark-gfm";
|
|
@@ -34,14 +40,13 @@ import DOMPurify from "dompurify";
|
|
|
34
40
|
import { fileURLToPath, URL as URL$1 } from "node:url";
|
|
35
41
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
36
42
|
import { createTRPCProxyClient, httpBatchLink } from "@trpc/client";
|
|
37
|
-
import "env-paths";
|
|
38
43
|
import "fuse.js";
|
|
39
44
|
import "langchain/text_splitter";
|
|
40
45
|
import "better-sqlite3";
|
|
41
46
|
import "sqlite-vec";
|
|
42
47
|
import { execSync } from "node:child_process";
|
|
43
48
|
import { v4 } from "uuid";
|
|
44
|
-
import "psl";
|
|
49
|
+
import psl from "psl";
|
|
45
50
|
import { minimatch } from "minimatch";
|
|
46
51
|
const LogLevel = {
|
|
47
52
|
ERROR: 0,
|
|
@@ -101,10 +106,806 @@ const logger = {
|
|
|
101
106
|
}
|
|
102
107
|
}
|
|
103
108
|
};
|
|
104
|
-
|
|
109
|
+
class PostHogClient {
|
|
110
|
+
client;
|
|
111
|
+
enabled;
|
|
112
|
+
// PostHog configuration
|
|
113
|
+
static CONFIG = {
|
|
114
|
+
host: "https://app.posthog.com",
|
|
115
|
+
// Performance optimizations
|
|
116
|
+
flushAt: 20,
|
|
117
|
+
// Batch size - send after 20 events
|
|
118
|
+
flushInterval: 1e4,
|
|
119
|
+
// 10 seconds - send after time
|
|
120
|
+
// Privacy settings
|
|
121
|
+
disableGeoip: true,
|
|
122
|
+
// Don't collect IP geolocation
|
|
123
|
+
disableSessionRecording: true,
|
|
124
|
+
// Never record sessions
|
|
125
|
+
disableSurveys: true,
|
|
126
|
+
// No user surveys
|
|
127
|
+
// Data handling
|
|
128
|
+
persistence: "memory"
|
|
129
|
+
// No disk persistence for privacy
|
|
130
|
+
};
|
|
131
|
+
constructor(enabled) {
|
|
132
|
+
this.enabled = enabled;
|
|
133
|
+
if (this.enabled) {
|
|
134
|
+
try {
|
|
135
|
+
this.client = new PostHog("phc_g7pXZZdUiAQXdnwUANjloQWMvO0amEDTBaeDSWgXgrQ", {
|
|
136
|
+
host: PostHogClient.CONFIG.host,
|
|
137
|
+
flushAt: PostHogClient.CONFIG.flushAt,
|
|
138
|
+
flushInterval: PostHogClient.CONFIG.flushInterval,
|
|
139
|
+
disableGeoip: PostHogClient.CONFIG.disableGeoip
|
|
140
|
+
});
|
|
141
|
+
logger.debug("PostHog client initialized");
|
|
142
|
+
} catch (error) {
|
|
143
|
+
logger.debug(
|
|
144
|
+
`PostHog initialization failed: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
145
|
+
);
|
|
146
|
+
this.enabled = false;
|
|
147
|
+
}
|
|
148
|
+
} else {
|
|
149
|
+
this.enabled = false;
|
|
150
|
+
logger.debug("PostHog client disabled");
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Send event to PostHog
|
|
155
|
+
*/
|
|
156
|
+
capture(distinctId, event, properties) {
|
|
157
|
+
if (!this.enabled || !this.client) return;
|
|
158
|
+
try {
|
|
159
|
+
this.client.capture({
|
|
160
|
+
distinctId,
|
|
161
|
+
event,
|
|
162
|
+
properties
|
|
163
|
+
});
|
|
164
|
+
logger.debug(`PostHog event captured: ${event}`);
|
|
165
|
+
} catch (error) {
|
|
166
|
+
logger.debug(
|
|
167
|
+
`PostHog capture error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
168
|
+
);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Graceful shutdown with event flushing
|
|
173
|
+
*/
|
|
174
|
+
async shutdown() {
|
|
175
|
+
if (this.client) {
|
|
176
|
+
try {
|
|
177
|
+
await this.client.shutdown();
|
|
178
|
+
logger.debug("PostHog client shutdown complete");
|
|
179
|
+
} catch (error) {
|
|
180
|
+
logger.debug(
|
|
181
|
+
`PostHog shutdown error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Check if client is enabled and ready
|
|
188
|
+
*/
|
|
189
|
+
isEnabled() {
|
|
190
|
+
return this.enabled && !!this.client;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
class SessionTracker {
|
|
194
|
+
sessionContext;
|
|
195
|
+
/**
|
|
196
|
+
* Start a new session with context
|
|
197
|
+
*/
|
|
198
|
+
startSession(context) {
|
|
199
|
+
this.sessionContext = context;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* End current session and return duration
|
|
203
|
+
*/
|
|
204
|
+
endSession() {
|
|
205
|
+
if (!this.sessionContext) return null;
|
|
206
|
+
const duration = Date.now() - this.sessionContext.startTime.getTime();
|
|
207
|
+
const sessionInterface = this.sessionContext.interface;
|
|
208
|
+
this.sessionContext = void 0;
|
|
209
|
+
return { duration, interface: sessionInterface };
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Get current session context
|
|
213
|
+
*/
|
|
214
|
+
getSessionContext() {
|
|
215
|
+
return this.sessionContext;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Get enriched properties with session context
|
|
219
|
+
*/
|
|
220
|
+
getEnrichedProperties(properties = {}) {
|
|
221
|
+
return {
|
|
222
|
+
...this.sessionContext,
|
|
223
|
+
...properties,
|
|
224
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
class TelemetryConfig {
|
|
229
|
+
static instance;
|
|
230
|
+
enabled;
|
|
231
|
+
constructor() {
|
|
232
|
+
this.enabled = this.determineEnabledState();
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* Determines if telemetry should be enabled based on CLI flags and environment variables.
|
|
236
|
+
* Priority: CLI flags > environment variables > default (true)
|
|
237
|
+
*/
|
|
238
|
+
determineEnabledState() {
|
|
239
|
+
if (process.env.DOCS_MCP_TELEMETRY === "false") {
|
|
240
|
+
return false;
|
|
241
|
+
}
|
|
242
|
+
const args = process.argv;
|
|
243
|
+
if (args.includes("--no-telemetry")) {
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
return true;
|
|
247
|
+
}
|
|
248
|
+
isEnabled() {
|
|
249
|
+
return this.enabled;
|
|
250
|
+
}
|
|
251
|
+
disable() {
|
|
252
|
+
this.enabled = false;
|
|
253
|
+
}
|
|
254
|
+
enable() {
|
|
255
|
+
this.enabled = true;
|
|
256
|
+
}
|
|
257
|
+
static getInstance() {
|
|
258
|
+
if (!TelemetryConfig.instance) {
|
|
259
|
+
TelemetryConfig.instance = new TelemetryConfig();
|
|
260
|
+
}
|
|
261
|
+
return TelemetryConfig.instance;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
function generateInstallationId() {
|
|
265
|
+
try {
|
|
266
|
+
const envStorePath = process.env.DOCS_MCP_STORE_PATH;
|
|
267
|
+
const dataDir = envStorePath || envPaths("docs-mcp-server", { suffix: "" }).data;
|
|
268
|
+
const installationIdPath = path.join(dataDir, "installation.id");
|
|
269
|
+
if (fs.existsSync(installationIdPath)) {
|
|
270
|
+
const existingId = fs.readFileSync(installationIdPath, "utf8").trim();
|
|
271
|
+
if (existingId) {
|
|
272
|
+
return existingId;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
const newId = randomUUID();
|
|
276
|
+
fs.mkdirSync(dataDir, { recursive: true });
|
|
277
|
+
fs.writeFileSync(installationIdPath, newId, "utf8");
|
|
278
|
+
return newId;
|
|
279
|
+
} catch {
|
|
280
|
+
return randomUUID();
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
function shouldEnableTelemetry() {
|
|
284
|
+
return TelemetryConfig.getInstance().isEnabled();
|
|
285
|
+
}
|
|
286
|
+
var TelemetryEvent = /* @__PURE__ */ ((TelemetryEvent2) => {
|
|
287
|
+
TelemetryEvent2["SESSION_STARTED"] = "session_started";
|
|
288
|
+
TelemetryEvent2["SESSION_ENDED"] = "session_ended";
|
|
289
|
+
TelemetryEvent2["APP_STARTED"] = "app_started";
|
|
290
|
+
TelemetryEvent2["APP_SHUTDOWN"] = "app_shutdown";
|
|
291
|
+
TelemetryEvent2["COMMAND_EXECUTED"] = "command_executed";
|
|
292
|
+
TelemetryEvent2["TOOL_USED"] = "tool_used";
|
|
293
|
+
TelemetryEvent2["HTTP_REQUEST_COMPLETED"] = "http_request_completed";
|
|
294
|
+
TelemetryEvent2["PIPELINE_JOB_PROGRESS"] = "pipeline_job_progress";
|
|
295
|
+
TelemetryEvent2["PIPELINE_JOB_COMPLETED"] = "pipeline_job_completed";
|
|
296
|
+
TelemetryEvent2["DOCUMENT_PROCESSED"] = "document_processed";
|
|
297
|
+
TelemetryEvent2["DOCUMENT_PROCESSING_FAILED"] = "document_processing_failed";
|
|
298
|
+
TelemetryEvent2["ERROR_OCCURRED"] = "error_occurred";
|
|
299
|
+
return TelemetryEvent2;
|
|
300
|
+
})(TelemetryEvent || {});
|
|
301
|
+
class Analytics {
|
|
302
|
+
postHogClient;
|
|
303
|
+
sessionTracker;
|
|
304
|
+
enabled = true;
|
|
305
|
+
distinctId;
|
|
306
|
+
constructor(enabled) {
|
|
307
|
+
this.enabled = enabled ?? TelemetryConfig.getInstance().isEnabled();
|
|
308
|
+
this.distinctId = generateInstallationId();
|
|
309
|
+
this.postHogClient = new PostHogClient(this.enabled);
|
|
310
|
+
this.sessionTracker = new SessionTracker();
|
|
311
|
+
if (this.enabled) {
|
|
312
|
+
logger.debug("Analytics enabled");
|
|
313
|
+
} else {
|
|
314
|
+
logger.debug("Analytics disabled");
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Initialize session context - call once per session
|
|
319
|
+
*/
|
|
320
|
+
startSession(context) {
|
|
321
|
+
if (!this.enabled) return;
|
|
322
|
+
this.sessionTracker.startSession(context);
|
|
323
|
+
this.track("session_started", {
|
|
324
|
+
interface: context.interface,
|
|
325
|
+
version: context.version,
|
|
326
|
+
platform: context.platform,
|
|
327
|
+
sessionDurationTarget: context.interface === "cli" ? "short" : "long",
|
|
328
|
+
authEnabled: context.authEnabled,
|
|
329
|
+
readOnly: context.readOnly,
|
|
330
|
+
servicesCount: context.servicesEnabled.length
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Track an event with automatic session context inclusion
|
|
335
|
+
*/
|
|
336
|
+
track(event, properties = {}) {
|
|
337
|
+
if (!this.enabled) return;
|
|
338
|
+
const eventProperties = this.sessionTracker.getEnrichedProperties(properties);
|
|
339
|
+
this.postHogClient.capture(this.distinctId, event, eventProperties);
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Track session end with duration
|
|
343
|
+
*/
|
|
344
|
+
endSession() {
|
|
345
|
+
if (!this.enabled) return;
|
|
346
|
+
const sessionInfo = this.sessionTracker.endSession();
|
|
347
|
+
if (sessionInfo) {
|
|
348
|
+
this.track("session_ended", {
|
|
349
|
+
durationMs: sessionInfo.duration,
|
|
350
|
+
interface: sessionInfo.interface
|
|
351
|
+
});
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Graceful shutdown with event flushing
|
|
356
|
+
*/
|
|
357
|
+
async shutdown() {
|
|
358
|
+
await this.postHogClient.shutdown();
|
|
359
|
+
}
|
|
360
|
+
/**
|
|
361
|
+
* Check if analytics is enabled
|
|
362
|
+
*/
|
|
363
|
+
isEnabled() {
|
|
364
|
+
return this.enabled && this.postHogClient.isEnabled();
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Get current session context
|
|
368
|
+
*/
|
|
369
|
+
getSessionContext() {
|
|
370
|
+
return this.sessionTracker.getSessionContext();
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
const analytics = new Analytics();
|
|
374
|
+
async function trackTool(toolName, operation, getProperties) {
|
|
375
|
+
const startTime = Date.now();
|
|
376
|
+
try {
|
|
377
|
+
const result = await operation();
|
|
378
|
+
analytics.track("tool_used", {
|
|
379
|
+
tool: toolName,
|
|
380
|
+
success: true,
|
|
381
|
+
durationMs: Date.now() - startTime,
|
|
382
|
+
...getProperties ? getProperties(result) : {}
|
|
383
|
+
});
|
|
384
|
+
return result;
|
|
385
|
+
} catch (error) {
|
|
386
|
+
analytics.track("tool_used", {
|
|
387
|
+
tool: toolName,
|
|
388
|
+
success: false,
|
|
389
|
+
durationMs: Date.now() - startTime,
|
|
390
|
+
errorType: error instanceof Error ? error.constructor.name : "UnknownError"
|
|
391
|
+
});
|
|
392
|
+
throw error;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
function extractHostname(url) {
|
|
396
|
+
try {
|
|
397
|
+
const parsed = new URL(url);
|
|
398
|
+
return parsed.hostname;
|
|
399
|
+
} catch {
|
|
400
|
+
return "invalid-hostname";
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
function extractProtocol(urlOrPath) {
|
|
404
|
+
try {
|
|
405
|
+
const parsed = new URL(urlOrPath);
|
|
406
|
+
return parsed.protocol.replace(":", "");
|
|
407
|
+
} catch {
|
|
408
|
+
if (urlOrPath.startsWith("/") || /^[A-Za-z]:/.test(urlOrPath)) {
|
|
409
|
+
return "file";
|
|
410
|
+
}
|
|
411
|
+
return "unknown";
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
function analyzeSearchQuery(query) {
|
|
415
|
+
return {
|
|
416
|
+
length: query.length,
|
|
417
|
+
wordCount: query.trim().split(/\s+/).length,
|
|
418
|
+
hasCodeTerms: /\b(function|class|import|export|const|let|var|def|async|await)\b/i.test(query),
|
|
419
|
+
hasSpecialChars: /[^\w\s]/.test(query)
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
function sanitizeErrorMessage(message) {
|
|
423
|
+
return message.replace(/https?:\/\/[^\s]+/gi, "[url]").replace(/file:\/\/[^\s]+/gi, "[file-url]").replace(/\/[^\s]*\.[a-z]{2,4}/gi, "[path]").replace(/[A-Za-z]:\\[^\s]+/g, "[path]").replace(/Bearer\s+[^\s]+/gi, "Bearer [token]").replace(/api[_-]?key[=:]\s*[^\s]+/gi, "api_key=[redacted]").replace(/token[=:]\s*[^\s]+/gi, "token=[redacted]").substring(0, 200);
|
|
424
|
+
}
|
|
425
|
+
function sanitizeError(error) {
|
|
426
|
+
return {
|
|
427
|
+
type: error.constructor.name,
|
|
428
|
+
message: sanitizeErrorMessage(error.message),
|
|
429
|
+
hasStack: Boolean(error.stack)
|
|
430
|
+
};
|
|
431
|
+
}
|
|
432
|
+
function extractCliFlags(argv) {
|
|
433
|
+
return argv.filter((arg) => arg.startsWith("--") || arg.startsWith("-"));
|
|
434
|
+
}
|
|
435
|
+
const version = "1.21.0";
|
|
105
436
|
const packageJson = {
|
|
106
437
|
version
|
|
107
438
|
};
|
|
439
|
+
function getPackageVersion() {
|
|
440
|
+
return packageJson.version;
|
|
441
|
+
}
|
|
442
|
+
function createCliSession(command, options) {
|
|
443
|
+
return {
|
|
444
|
+
sessionId: randomUUID(),
|
|
445
|
+
interface: "cli",
|
|
446
|
+
startTime: /* @__PURE__ */ new Date(),
|
|
447
|
+
version: getPackageVersion(),
|
|
448
|
+
platform: process.platform,
|
|
449
|
+
nodeVersion: process.version,
|
|
450
|
+
command: command || "unknown",
|
|
451
|
+
authEnabled: options?.authEnabled,
|
|
452
|
+
readOnly: options?.readOnly,
|
|
453
|
+
servicesEnabled: ["worker"]
|
|
454
|
+
// CLI typically runs embedded worker
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
function createMcpSession(options) {
|
|
458
|
+
return {
|
|
459
|
+
sessionId: randomUUID(),
|
|
460
|
+
interface: "mcp",
|
|
461
|
+
startTime: /* @__PURE__ */ new Date(),
|
|
462
|
+
version: getPackageVersion(),
|
|
463
|
+
platform: process.platform,
|
|
464
|
+
nodeVersion: process.version,
|
|
465
|
+
protocol: options.protocol || "stdio",
|
|
466
|
+
transport: options.transport,
|
|
467
|
+
authEnabled: options.authEnabled ?? false,
|
|
468
|
+
readOnly: options.readOnly ?? false,
|
|
469
|
+
servicesEnabled: options.servicesEnabled ?? ["mcp"]
|
|
470
|
+
};
|
|
471
|
+
}
|
|
472
|
+
function createTelemetryService() {
|
|
473
|
+
return {
|
|
474
|
+
startSession: (context) => {
|
|
475
|
+
analytics.startSession(context);
|
|
476
|
+
},
|
|
477
|
+
endSession: () => {
|
|
478
|
+
analytics.endSession();
|
|
479
|
+
},
|
|
480
|
+
shutdown: async () => {
|
|
481
|
+
await analytics.shutdown();
|
|
482
|
+
}
|
|
483
|
+
};
|
|
484
|
+
}
|
|
485
|
+
const telemetryService = createTelemetryService();
|
|
486
|
+
function createAuthMiddleware(authManager) {
|
|
487
|
+
return async (request, reply) => {
|
|
488
|
+
try {
|
|
489
|
+
const authContext = await authManager.createAuthContext(
|
|
490
|
+
request.headers.authorization || "",
|
|
491
|
+
request
|
|
492
|
+
);
|
|
493
|
+
request.auth = authContext;
|
|
494
|
+
const isAuthEnabled = authManager.authConfig.enabled;
|
|
495
|
+
if (!isAuthEnabled) {
|
|
496
|
+
logger.debug("Authentication disabled, allowing request");
|
|
497
|
+
return;
|
|
498
|
+
}
|
|
499
|
+
if (!authContext.authenticated) {
|
|
500
|
+
const hasAuthHeader = !!request.headers.authorization;
|
|
501
|
+
if (hasAuthHeader) {
|
|
502
|
+
logger.debug("Token validation failed");
|
|
503
|
+
reply.status(401).header(
|
|
504
|
+
"WWW-Authenticate",
|
|
505
|
+
'Bearer realm="MCP Server", error="invalid_token"'
|
|
506
|
+
).send({
|
|
507
|
+
error: "invalid_token",
|
|
508
|
+
error_description: "The access token is invalid"
|
|
509
|
+
});
|
|
510
|
+
return;
|
|
511
|
+
} else {
|
|
512
|
+
logger.debug("Missing authorization header");
|
|
513
|
+
reply.status(401).header("WWW-Authenticate", 'Bearer realm="MCP Server"').send({
|
|
514
|
+
error: "unauthorized",
|
|
515
|
+
error_description: "Authorization header required"
|
|
516
|
+
});
|
|
517
|
+
return;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
logger.debug(
|
|
521
|
+
`Authentication successful for subject: ${authContext.subject || "anonymous"}`
|
|
522
|
+
);
|
|
523
|
+
} catch (error) {
|
|
524
|
+
const message = error instanceof Error ? error.message : "Authentication failed";
|
|
525
|
+
logger.debug(`Authentication error: ${message}`);
|
|
526
|
+
reply.status(401).header("WWW-Authenticate", 'Bearer realm="MCP Server", error="invalid_token"').send({
|
|
527
|
+
error: "invalid_token",
|
|
528
|
+
error_description: "Token validation failed"
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
class ProxyAuthManager {
|
|
534
|
+
constructor(config) {
|
|
535
|
+
this.config = config;
|
|
536
|
+
}
|
|
537
|
+
proxyProvider = null;
|
|
538
|
+
discoveredEndpoints = null;
|
|
539
|
+
jwks = null;
|
|
540
|
+
/**
|
|
541
|
+
* Get the authentication configuration
|
|
542
|
+
*/
|
|
543
|
+
get authConfig() {
|
|
544
|
+
return this.config;
|
|
545
|
+
}
|
|
546
|
+
/**
|
|
547
|
+
* Initialize the proxy auth manager with the configured OAuth provider.
|
|
548
|
+
*/
|
|
549
|
+
async initialize() {
|
|
550
|
+
if (!this.config.enabled) {
|
|
551
|
+
logger.debug("Authentication disabled, skipping proxy auth manager initialization");
|
|
552
|
+
return;
|
|
553
|
+
}
|
|
554
|
+
if (!this.config.issuerUrl || !this.config.audience) {
|
|
555
|
+
throw new Error("Issuer URL and Audience are required when auth is enabled");
|
|
556
|
+
}
|
|
557
|
+
try {
|
|
558
|
+
logger.info("🔐 Initializing OAuth2 proxy authentication...");
|
|
559
|
+
this.discoveredEndpoints = await this.discoverEndpoints();
|
|
560
|
+
if (this.discoveredEndpoints.jwksUri) {
|
|
561
|
+
this.jwks = createRemoteJWKSet(new URL(this.discoveredEndpoints.jwksUri));
|
|
562
|
+
logger.debug(`JWKS configured from: ${this.discoveredEndpoints.jwksUri}`);
|
|
563
|
+
}
|
|
564
|
+
const capabilities = [];
|
|
565
|
+
if (this.discoveredEndpoints.jwksUri) capabilities.push("JWT validation via JWKS");
|
|
566
|
+
if (this.discoveredEndpoints.userinfoUrl)
|
|
567
|
+
capabilities.push("opaque token validation via userinfo");
|
|
568
|
+
logger.debug(`Token validation capabilities: ${capabilities.join(", ")}`);
|
|
569
|
+
if (capabilities.length === 0) {
|
|
570
|
+
logger.warn(
|
|
571
|
+
"⚠️ No token validation mechanisms available - authentication may fail"
|
|
572
|
+
);
|
|
573
|
+
}
|
|
574
|
+
this.proxyProvider = new ProxyOAuthServerProvider({
|
|
575
|
+
endpoints: {
|
|
576
|
+
authorizationUrl: this.discoveredEndpoints.authorizationUrl,
|
|
577
|
+
tokenUrl: this.discoveredEndpoints.tokenUrl,
|
|
578
|
+
revocationUrl: this.discoveredEndpoints.revocationUrl,
|
|
579
|
+
registrationUrl: this.discoveredEndpoints.registrationUrl
|
|
580
|
+
},
|
|
581
|
+
verifyAccessToken: this.verifyAccessToken.bind(this),
|
|
582
|
+
getClient: this.getClient.bind(this)
|
|
583
|
+
});
|
|
584
|
+
logger.info("✅ OAuth2 proxy authentication initialized successfully");
|
|
585
|
+
} catch (error) {
|
|
586
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
587
|
+
logger.error(`❌ Failed to initialize OAuth2 proxy authentication: ${message}`);
|
|
588
|
+
throw new Error(`Proxy authentication initialization failed: ${message}`);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
/**
|
|
592
|
+
* Register OAuth2 endpoints on the Fastify server.
|
|
593
|
+
* This manually implements the necessary OAuth2 endpoints using the proxy provider.
|
|
594
|
+
*/
|
|
595
|
+
registerRoutes(server, baseUrl) {
|
|
596
|
+
if (!this.proxyProvider) {
|
|
597
|
+
throw new Error("Proxy provider not initialized");
|
|
598
|
+
}
|
|
599
|
+
server.get("/.well-known/oauth-authorization-server", async (_request, reply) => {
|
|
600
|
+
const metadata = {
|
|
601
|
+
issuer: baseUrl.origin,
|
|
602
|
+
authorization_endpoint: `${baseUrl.origin}/oauth/authorize`,
|
|
603
|
+
token_endpoint: `${baseUrl.origin}/oauth/token`,
|
|
604
|
+
revocation_endpoint: `${baseUrl.origin}/oauth/revoke`,
|
|
605
|
+
registration_endpoint: `${baseUrl.origin}/oauth/register`,
|
|
606
|
+
scopes_supported: ["profile", "email"],
|
|
607
|
+
response_types_supported: ["code"],
|
|
608
|
+
grant_types_supported: ["authorization_code", "refresh_token"],
|
|
609
|
+
token_endpoint_auth_methods_supported: [
|
|
610
|
+
"client_secret_basic",
|
|
611
|
+
"client_secret_post",
|
|
612
|
+
"none"
|
|
613
|
+
],
|
|
614
|
+
code_challenge_methods_supported: ["S256"]
|
|
615
|
+
};
|
|
616
|
+
reply.type("application/json").send(metadata);
|
|
617
|
+
});
|
|
618
|
+
server.get("/.well-known/oauth-protected-resource", async (request, reply) => {
|
|
619
|
+
const baseUrl2 = `${request.protocol}://${request.headers.host}`;
|
|
620
|
+
const metadata = {
|
|
621
|
+
resource: `${baseUrl2}/sse`,
|
|
622
|
+
authorization_servers: [this.config.issuerUrl],
|
|
623
|
+
scopes_supported: ["profile", "email"],
|
|
624
|
+
bearer_methods_supported: ["header"],
|
|
625
|
+
resource_name: "Documentation MCP Server",
|
|
626
|
+
resource_documentation: "https://github.com/arabold/docs-mcp-server#readme",
|
|
627
|
+
// Enhanced metadata for better discoverability
|
|
628
|
+
resource_server_metadata_url: `${baseUrl2}/.well-known/oauth-protected-resource`,
|
|
629
|
+
authorization_server_metadata_url: `${this.config.issuerUrl}/.well-known/openid-configuration`,
|
|
630
|
+
jwks_uri: `${this.config.issuerUrl}/.well-known/jwks.json`,
|
|
631
|
+
// Supported MCP transports
|
|
632
|
+
mcp_transports: [
|
|
633
|
+
{
|
|
634
|
+
transport: "sse",
|
|
635
|
+
endpoint: `${baseUrl2}/sse`,
|
|
636
|
+
description: "Server-Sent Events transport"
|
|
637
|
+
},
|
|
638
|
+
{
|
|
639
|
+
transport: "http",
|
|
640
|
+
endpoint: `${baseUrl2}/mcp`,
|
|
641
|
+
description: "Streaming HTTP transport"
|
|
642
|
+
}
|
|
643
|
+
]
|
|
644
|
+
};
|
|
645
|
+
reply.type("application/json").send(metadata);
|
|
646
|
+
});
|
|
647
|
+
server.get("/oauth/authorize", async (request, reply) => {
|
|
648
|
+
const endpoints = await this.discoverEndpoints();
|
|
649
|
+
const params = new URLSearchParams(request.query);
|
|
650
|
+
if (!params.has("resource")) {
|
|
651
|
+
const resourceUrl = `${request.protocol}://${request.headers.host}/sse`;
|
|
652
|
+
params.set("resource", resourceUrl);
|
|
653
|
+
}
|
|
654
|
+
const redirectUrl = `${endpoints.authorizationUrl}?${params.toString()}`;
|
|
655
|
+
reply.redirect(redirectUrl);
|
|
656
|
+
});
|
|
657
|
+
server.post("/oauth/token", async (request, reply) => {
|
|
658
|
+
const endpoints = await this.discoverEndpoints();
|
|
659
|
+
const tokenBody = new URLSearchParams(request.body);
|
|
660
|
+
if (!tokenBody.has("resource")) {
|
|
661
|
+
const resourceUrl = `${request.protocol}://${request.headers.host}/sse`;
|
|
662
|
+
tokenBody.set("resource", resourceUrl);
|
|
663
|
+
}
|
|
664
|
+
const response = await fetch(endpoints.tokenUrl, {
|
|
665
|
+
method: "POST",
|
|
666
|
+
headers: {
|
|
667
|
+
"Content-Type": "application/x-www-form-urlencoded"
|
|
668
|
+
},
|
|
669
|
+
body: tokenBody.toString()
|
|
670
|
+
});
|
|
671
|
+
const data = await response.json();
|
|
672
|
+
reply.status(response.status).type("application/json").send(data);
|
|
673
|
+
});
|
|
674
|
+
server.post("/oauth/revoke", async (request, reply) => {
|
|
675
|
+
const endpoints = await this.discoverEndpoints();
|
|
676
|
+
if (endpoints.revocationUrl) {
|
|
677
|
+
const response = await fetch(endpoints.revocationUrl, {
|
|
678
|
+
method: "POST",
|
|
679
|
+
headers: {
|
|
680
|
+
"Content-Type": "application/x-www-form-urlencoded"
|
|
681
|
+
},
|
|
682
|
+
body: new URLSearchParams(request.body).toString()
|
|
683
|
+
});
|
|
684
|
+
reply.status(response.status).send();
|
|
685
|
+
} else {
|
|
686
|
+
reply.status(404).send({ error: "Revocation not supported" });
|
|
687
|
+
}
|
|
688
|
+
});
|
|
689
|
+
server.post("/oauth/register", async (request, reply) => {
|
|
690
|
+
const endpoints = await this.discoverEndpoints();
|
|
691
|
+
if (endpoints.registrationUrl) {
|
|
692
|
+
const response = await fetch(endpoints.registrationUrl, {
|
|
693
|
+
method: "POST",
|
|
694
|
+
headers: {
|
|
695
|
+
"Content-Type": "application/json"
|
|
696
|
+
},
|
|
697
|
+
body: JSON.stringify(request.body)
|
|
698
|
+
});
|
|
699
|
+
const data = await response.json();
|
|
700
|
+
reply.status(response.status).type("application/json").send(data);
|
|
701
|
+
} else {
|
|
702
|
+
reply.status(404).send({ error: "Dynamic client registration not supported" });
|
|
703
|
+
}
|
|
704
|
+
});
|
|
705
|
+
logger.debug("OAuth2 endpoints registered on Fastify server");
|
|
706
|
+
}
|
|
707
|
+
/**
|
|
708
|
+
* Discover OAuth endpoints from the OAuth2 authorization server.
|
|
709
|
+
* Uses OAuth2 discovery (RFC 8414) with OIDC discovery fallback.
|
|
710
|
+
* Supports both JWT and opaque token validation methods.
|
|
711
|
+
*/
|
|
712
|
+
async discoverEndpoints() {
|
|
713
|
+
const oauthDiscoveryUrl = `${this.config.issuerUrl}/.well-known/oauth-authorization-server`;
|
|
714
|
+
try {
|
|
715
|
+
const oauthResponse = await fetch(oauthDiscoveryUrl);
|
|
716
|
+
if (oauthResponse.ok) {
|
|
717
|
+
const config2 = await oauthResponse.json();
|
|
718
|
+
logger.debug(
|
|
719
|
+
`Successfully discovered OAuth2 endpoints from: ${oauthDiscoveryUrl}`
|
|
720
|
+
);
|
|
721
|
+
const userinfoEndpoint = await this.discoverUserinfoEndpoint();
|
|
722
|
+
if (userinfoEndpoint) {
|
|
723
|
+
config2.userinfo_endpoint = userinfoEndpoint;
|
|
724
|
+
}
|
|
725
|
+
return this.buildEndpointsFromConfig(config2);
|
|
726
|
+
}
|
|
727
|
+
} catch (error) {
|
|
728
|
+
logger.debug(`OAuth2 discovery failed: ${error}, trying OIDC discovery`);
|
|
729
|
+
}
|
|
730
|
+
const oidcDiscoveryUrl = `${this.config.issuerUrl}/.well-known/openid-configuration`;
|
|
731
|
+
const oidcResponse = await fetch(oidcDiscoveryUrl);
|
|
732
|
+
if (!oidcResponse.ok) {
|
|
733
|
+
throw new Error(
|
|
734
|
+
`Failed to fetch configuration from both ${oauthDiscoveryUrl} and ${oidcDiscoveryUrl}`
|
|
735
|
+
);
|
|
736
|
+
}
|
|
737
|
+
const config = await oidcResponse.json();
|
|
738
|
+
logger.debug(`Successfully discovered OIDC endpoints from: ${oidcDiscoveryUrl}`);
|
|
739
|
+
return this.buildEndpointsFromConfig(config);
|
|
740
|
+
}
|
|
741
|
+
/**
|
|
742
|
+
* Try to discover userinfo endpoint for opaque token validation
|
|
743
|
+
*/
|
|
744
|
+
async discoverUserinfoEndpoint() {
|
|
745
|
+
try {
|
|
746
|
+
const oidcDiscoveryUrl = `${this.config.issuerUrl}/.well-known/openid-configuration`;
|
|
747
|
+
const response = await fetch(oidcDiscoveryUrl);
|
|
748
|
+
if (response.ok) {
|
|
749
|
+
const config = await response.json();
|
|
750
|
+
return config.userinfo_endpoint || null;
|
|
751
|
+
}
|
|
752
|
+
} catch (error) {
|
|
753
|
+
logger.debug(`Failed to fetch userinfo endpoint: ${error}`);
|
|
754
|
+
}
|
|
755
|
+
return null;
|
|
756
|
+
}
|
|
757
|
+
/**
|
|
758
|
+
* Build endpoint configuration from discovery response.
|
|
759
|
+
*/
|
|
760
|
+
buildEndpointsFromConfig(config) {
|
|
761
|
+
return {
|
|
762
|
+
authorizationUrl: config.authorization_endpoint,
|
|
763
|
+
tokenUrl: config.token_endpoint,
|
|
764
|
+
revocationUrl: config.revocation_endpoint,
|
|
765
|
+
registrationUrl: config.registration_endpoint,
|
|
766
|
+
jwksUri: config.jwks_uri,
|
|
767
|
+
userinfoUrl: config.userinfo_endpoint
|
|
768
|
+
};
|
|
769
|
+
}
|
|
770
|
+
/**
|
|
771
|
+
* Get supported resource URLs for this MCP server instance.
|
|
772
|
+
* This enables self-discovering resource validation per MCP Authorization spec.
|
|
773
|
+
*/
|
|
774
|
+
getSupportedResources(request) {
|
|
775
|
+
const baseUrl = `${request.protocol}://${request.headers.host}`;
|
|
776
|
+
return [
|
|
777
|
+
`${baseUrl}/sse`,
|
|
778
|
+
// SSE transport
|
|
779
|
+
`${baseUrl}/mcp`,
|
|
780
|
+
// Streaming HTTP transport
|
|
781
|
+
`${baseUrl}`
|
|
782
|
+
// Server root
|
|
783
|
+
];
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Verify an access token using hybrid validation approach.
|
|
787
|
+
* First tries JWT validation with JWKS, falls back to userinfo endpoint for opaque tokens.
|
|
788
|
+
* This provides universal compatibility with all OAuth2 providers and token formats.
|
|
789
|
+
*/
|
|
790
|
+
async verifyAccessToken(token, request) {
|
|
791
|
+
logger.debug(`Attempting to verify token: ${token.substring(0, 20)}...`);
|
|
792
|
+
if (this.jwks) {
|
|
793
|
+
try {
|
|
794
|
+
logger.debug("Attempting JWT validation with JWKS...");
|
|
795
|
+
const { payload } = await jwtVerify(token, this.jwks, {
|
|
796
|
+
issuer: this.config.issuerUrl,
|
|
797
|
+
audience: this.config.audience
|
|
798
|
+
});
|
|
799
|
+
logger.debug(
|
|
800
|
+
`JWT validation successful. Subject: ${payload.sub}, Audience: ${payload.aud}`
|
|
801
|
+
);
|
|
802
|
+
if (!payload.sub) {
|
|
803
|
+
throw new Error("JWT payload missing subject claim");
|
|
804
|
+
}
|
|
805
|
+
return {
|
|
806
|
+
token,
|
|
807
|
+
clientId: payload.sub,
|
|
808
|
+
scopes: ["*"]
|
|
809
|
+
// Full access for all authenticated users
|
|
810
|
+
};
|
|
811
|
+
} catch (error) {
|
|
812
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
813
|
+
logger.debug(
|
|
814
|
+
`JWT validation failed: ${errorMessage}, trying userinfo fallback...`
|
|
815
|
+
);
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
if (this.discoveredEndpoints?.userinfoUrl) {
|
|
819
|
+
try {
|
|
820
|
+
logger.debug("Attempting userinfo endpoint validation...");
|
|
821
|
+
const response = await fetch(this.discoveredEndpoints.userinfoUrl, {
|
|
822
|
+
method: "GET",
|
|
823
|
+
headers: {
|
|
824
|
+
Authorization: `Bearer ${token}`,
|
|
825
|
+
Accept: "application/json"
|
|
826
|
+
}
|
|
827
|
+
});
|
|
828
|
+
if (!response.ok) {
|
|
829
|
+
throw new Error(
|
|
830
|
+
`Userinfo request failed: ${response.status} ${response.statusText}`
|
|
831
|
+
);
|
|
832
|
+
}
|
|
833
|
+
const userinfo = await response.json();
|
|
834
|
+
logger.debug(
|
|
835
|
+
`Token validation successful. User: ${userinfo.sub}, Email: ${userinfo.email}`
|
|
836
|
+
);
|
|
837
|
+
if (!userinfo.sub) {
|
|
838
|
+
throw new Error("Userinfo response missing subject");
|
|
839
|
+
}
|
|
840
|
+
if (request) {
|
|
841
|
+
const supportedResources = this.getSupportedResources(request);
|
|
842
|
+
logger.debug(`Supported resources: ${JSON.stringify(supportedResources)}`);
|
|
843
|
+
}
|
|
844
|
+
return {
|
|
845
|
+
token,
|
|
846
|
+
clientId: userinfo.sub,
|
|
847
|
+
scopes: ["*"]
|
|
848
|
+
// Full access for all authenticated users
|
|
849
|
+
};
|
|
850
|
+
} catch (error) {
|
|
851
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
852
|
+
logger.debug(`Userinfo validation failed: ${errorMessage}`);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
logger.debug("All token validation strategies exhausted");
|
|
856
|
+
throw new Error("Invalid access token");
|
|
857
|
+
}
|
|
858
|
+
/**
|
|
859
|
+
* Get client information for the given client ID.
|
|
860
|
+
* This is called by the proxy provider for client validation.
|
|
861
|
+
*/
|
|
862
|
+
async getClient(clientId) {
|
|
863
|
+
return {
|
|
864
|
+
client_id: clientId,
|
|
865
|
+
redirect_uris: [`${this.config.audience}/callback`]
|
|
866
|
+
// Add other client metadata as needed
|
|
867
|
+
};
|
|
868
|
+
}
|
|
869
|
+
/**
|
|
870
|
+
* Create an authentication context from a token (for compatibility with existing middleware).
|
|
871
|
+
* Uses binary authentication - valid token grants full access.
|
|
872
|
+
*/
|
|
873
|
+
async createAuthContext(authorization, request) {
|
|
874
|
+
if (!this.config.enabled) {
|
|
875
|
+
return {
|
|
876
|
+
authenticated: false,
|
|
877
|
+
scopes: /* @__PURE__ */ new Set()
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
try {
|
|
881
|
+
logger.debug(
|
|
882
|
+
`Processing authorization header: ${authorization.substring(0, 20)}...`
|
|
883
|
+
);
|
|
884
|
+
const match = authorization.match(/^Bearer\s+(.+)$/i);
|
|
885
|
+
if (!match) {
|
|
886
|
+
logger.debug("Authorization header does not match Bearer token pattern");
|
|
887
|
+
throw new Error("Invalid authorization header format");
|
|
888
|
+
}
|
|
889
|
+
const token = match[1];
|
|
890
|
+
logger.debug(`Extracted token: ${token.substring(0, 20)}...`);
|
|
891
|
+
const authInfo = await this.verifyAccessToken(token, request);
|
|
892
|
+
logger.debug(`Authentication successful for client: ${authInfo.clientId}`);
|
|
893
|
+
return {
|
|
894
|
+
authenticated: true,
|
|
895
|
+
scopes: /* @__PURE__ */ new Set(["*"]),
|
|
896
|
+
// Full access for authenticated users
|
|
897
|
+
subject: authInfo.clientId
|
|
898
|
+
};
|
|
899
|
+
} catch (error) {
|
|
900
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
901
|
+
logger.debug(`Authentication failed: ${errorMessage}`);
|
|
902
|
+
return {
|
|
903
|
+
authenticated: false,
|
|
904
|
+
scopes: /* @__PURE__ */ new Set()
|
|
905
|
+
};
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
}
|
|
108
909
|
var PipelineJobStatus = /* @__PURE__ */ ((PipelineJobStatus2) => {
|
|
109
910
|
PipelineJobStatus2["QUEUED"] = "queued";
|
|
110
911
|
PipelineJobStatus2["RUNNING"] = "running";
|
|
@@ -185,7 +986,7 @@ class ClearCompletedJobsTool {
|
|
|
185
986
|
try {
|
|
186
987
|
const clearedCount = await this.pipeline.clearCompletedJobs();
|
|
187
988
|
const message = clearedCount > 0 ? `Successfully cleared ${clearedCount} completed job${clearedCount === 1 ? "" : "s"} from the queue.` : "No completed jobs to clear.";
|
|
188
|
-
logger.debug(
|
|
989
|
+
logger.debug(message);
|
|
189
990
|
return {
|
|
190
991
|
message,
|
|
191
992
|
success: true,
|
|
@@ -193,7 +994,7 @@ class ClearCompletedJobsTool {
|
|
|
193
994
|
};
|
|
194
995
|
} catch (error) {
|
|
195
996
|
const errorMessage = `Failed to clear completed jobs: ${error instanceof Error ? error.message : String(error)}`;
|
|
196
|
-
logger.error(`❌
|
|
997
|
+
logger.error(`❌ ${errorMessage}`);
|
|
197
998
|
return {
|
|
198
999
|
message: errorMessage,
|
|
199
1000
|
success: false,
|
|
@@ -1139,7 +1940,7 @@ class FetchUrlTool {
|
|
|
1139
1940
|
async execute(options) {
|
|
1140
1941
|
const { url, scrapeMode = ScrapeMode.Auto, headers } = options;
|
|
1141
1942
|
const canFetchResults = this.fetchers.map((f) => f.canFetch(url));
|
|
1142
|
-
const fetcherIndex = canFetchResults.
|
|
1943
|
+
const fetcherIndex = canFetchResults.indexOf(true);
|
|
1143
1944
|
if (fetcherIndex === -1) {
|
|
1144
1945
|
throw new ToolError(
|
|
1145
1946
|
`Invalid URL: ${url}. Must be an HTTP/HTTPS URL or a file:// URL.`,
|
|
@@ -1372,14 +2173,13 @@ class RemoveTool {
|
|
|
1372
2173
|
this.pipeline = pipeline;
|
|
1373
2174
|
}
|
|
1374
2175
|
/**
|
|
1375
|
-
* Executes the tool to remove the specified library version
|
|
2176
|
+
* Executes the tool to remove the specified library version completely.
|
|
1376
2177
|
* Aborts any QUEUED/RUNNING job for the same library+version before deleting.
|
|
2178
|
+
* Removes all documents, the version record, and the library if no other versions exist.
|
|
1377
2179
|
*/
|
|
1378
2180
|
async execute(args) {
|
|
1379
2181
|
const { library, version: version2 } = args;
|
|
1380
|
-
logger.info(
|
|
1381
|
-
`🗑️ Removing library: ${library}${version2 ? `, version: ${version2}` : " (unversioned)"}`
|
|
1382
|
-
);
|
|
2182
|
+
logger.info(`🗑️ Removing library: ${library}${version2 ? `@${version2}` : ""}`);
|
|
1383
2183
|
try {
|
|
1384
2184
|
const allJobs = await this.pipeline.getJobs();
|
|
1385
2185
|
const jobs = allJobs.filter(
|
|
@@ -1392,12 +2192,12 @@ class RemoveTool {
|
|
|
1392
2192
|
await this.pipeline.cancelJob(job.id);
|
|
1393
2193
|
await this.pipeline.waitForJobCompletion(job.id);
|
|
1394
2194
|
}
|
|
1395
|
-
await this.documentManagementService.
|
|
1396
|
-
const message = `Successfully removed
|
|
2195
|
+
await this.documentManagementService.removeVersion(library, version2);
|
|
2196
|
+
const message = `Successfully removed ${library}${version2 ? `@${version2}` : ""}.`;
|
|
1397
2197
|
logger.info(`✅ ${message}`);
|
|
1398
2198
|
return { message };
|
|
1399
2199
|
} catch (error) {
|
|
1400
|
-
const errorMessage = `Failed to remove
|
|
2200
|
+
const errorMessage = `Failed to remove ${library}${version2 ? `@${version2}` : ""}: ${error instanceof Error ? error.message : String(error)}`;
|
|
1401
2201
|
logger.error(`❌ Error removing library: ${errorMessage}`);
|
|
1402
2202
|
throw new ToolError(errorMessage, this.constructor.name);
|
|
1403
2203
|
}
|
|
@@ -1546,7 +2346,7 @@ function createError(text) {
|
|
|
1546
2346
|
isError: true
|
|
1547
2347
|
};
|
|
1548
2348
|
}
|
|
1549
|
-
function createMcpServerInstance(tools) {
|
|
2349
|
+
function createMcpServerInstance(tools, readOnly = false) {
|
|
1550
2350
|
const server = new McpServer(
|
|
1551
2351
|
{
|
|
1552
2352
|
name: "docs-mcp-server",
|
|
@@ -1560,54 +2360,56 @@ function createMcpServerInstance(tools) {
|
|
|
1560
2360
|
}
|
|
1561
2361
|
}
|
|
1562
2362
|
);
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
2363
|
+
if (!readOnly) {
|
|
2364
|
+
server.tool(
|
|
2365
|
+
"scrape_docs",
|
|
2366
|
+
"Scrape and index documentation from a URL for a library. Use this tool to index a new library or a new version.",
|
|
2367
|
+
{
|
|
2368
|
+
url: z.string().url().describe("Documentation root URL to scrape."),
|
|
2369
|
+
library: z.string().describe("Library name."),
|
|
2370
|
+
version: z.string().optional().describe("Library version (optional)."),
|
|
2371
|
+
maxPages: z.number().optional().default(DEFAULT_MAX_PAGES).describe(`Maximum number of pages to scrape (default: ${DEFAULT_MAX_PAGES}).`),
|
|
2372
|
+
maxDepth: z.number().optional().default(DEFAULT_MAX_DEPTH$1).describe(`Maximum navigation depth (default: ${DEFAULT_MAX_DEPTH$1}).`),
|
|
2373
|
+
scope: z.enum(["subpages", "hostname", "domain"]).optional().default("subpages").describe("Crawling boundary: 'subpages', 'hostname', or 'domain'."),
|
|
2374
|
+
followRedirects: z.boolean().optional().default(true).describe("Follow HTTP redirects (3xx responses).")
|
|
2375
|
+
},
|
|
2376
|
+
{
|
|
2377
|
+
title: "Scrape New Library Documentation",
|
|
2378
|
+
destructiveHint: true,
|
|
2379
|
+
// replaces existing docs
|
|
2380
|
+
openWorldHint: true
|
|
2381
|
+
// requires internet access
|
|
2382
|
+
},
|
|
2383
|
+
async ({ url, library, version: version2, maxPages, maxDepth, scope, followRedirects }) => {
|
|
2384
|
+
try {
|
|
2385
|
+
const result = await tools.scrape.execute({
|
|
2386
|
+
url,
|
|
2387
|
+
library,
|
|
2388
|
+
version: version2,
|
|
2389
|
+
waitForCompletion: false,
|
|
2390
|
+
// Don't wait for completion
|
|
2391
|
+
// onProgress: undefined, // Explicitly undefined or omitted
|
|
2392
|
+
options: {
|
|
2393
|
+
maxPages,
|
|
2394
|
+
maxDepth,
|
|
2395
|
+
scope,
|
|
2396
|
+
followRedirects
|
|
2397
|
+
}
|
|
2398
|
+
});
|
|
2399
|
+
if ("jobId" in result) {
|
|
2400
|
+
return createResponse(`🚀 Scraping job started with ID: ${result.jobId}.`);
|
|
1596
2401
|
}
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
2402
|
+
return createResponse(
|
|
2403
|
+
`Scraping finished immediately (unexpectedly) with ${result.pagesScraped} pages.`
|
|
2404
|
+
);
|
|
2405
|
+
} catch (error) {
|
|
2406
|
+
return createError(
|
|
2407
|
+
`Failed to scrape documentation: ${error instanceof Error ? error.message : String(error)}`
|
|
2408
|
+
);
|
|
1600
2409
|
}
|
|
1601
|
-
return createResponse(
|
|
1602
|
-
`Scraping finished immediately (unexpectedly) with ${result.pagesScraped} pages.`
|
|
1603
|
-
);
|
|
1604
|
-
} catch (error) {
|
|
1605
|
-
return createError(
|
|
1606
|
-
`Failed to scrape documentation: ${error instanceof Error ? error.message : String(error)}`
|
|
1607
|
-
);
|
|
1608
2410
|
}
|
|
1609
|
-
|
|
1610
|
-
|
|
2411
|
+
);
|
|
2412
|
+
}
|
|
1611
2413
|
server.tool(
|
|
1612
2414
|
"search_docs",
|
|
1613
2415
|
'Search up-to-date documentation for a library or package. Examples:\n\n- {library: "react", query: "hooks lifecycle"} -> matches latest version of React\n- {library: "react", version: "18.0.0", query: "hooks lifecycle"} -> matches React 18.0.0 or earlier\n- {library: "typescript", version: "5.x", query: "ReturnType example"} -> any TypeScript 5.x.x version\n- {library: "typescript", version: "5.2.x", query: "ReturnType example"} -> any TypeScript 5.2.x version',
|
|
@@ -1728,24 +2530,25 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
|
|
|
1728
2530
|
}
|
|
1729
2531
|
}
|
|
1730
2532
|
);
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
2533
|
+
if (!readOnly) {
|
|
2534
|
+
server.tool(
|
|
2535
|
+
"list_jobs",
|
|
2536
|
+
"List all indexing jobs. Optionally filter by status.",
|
|
2537
|
+
{
|
|
2538
|
+
status: z.enum(["queued", "running", "completed", "failed", "cancelling", "cancelled"]).optional().describe("Filter jobs by status (optional).")
|
|
2539
|
+
},
|
|
2540
|
+
{
|
|
2541
|
+
title: "List Indexing Jobs",
|
|
2542
|
+
readOnlyHint: true,
|
|
2543
|
+
destructiveHint: false
|
|
2544
|
+
},
|
|
2545
|
+
async ({ status }) => {
|
|
2546
|
+
try {
|
|
2547
|
+
const result = await tools.listJobs.execute({
|
|
2548
|
+
status
|
|
2549
|
+
});
|
|
2550
|
+
const formattedJobs = result.jobs.map(
|
|
2551
|
+
(job) => `- ID: ${job.id}
|
|
1749
2552
|
Status: ${job.status}
|
|
1750
2553
|
Library: ${job.library}
|
|
1751
2554
|
Version: ${job.version}
|
|
@@ -1753,100 +2556,101 @@ ${result.libraries.map((lib) => `- ${lib.name}`).join("\n")}`
|
|
|
1753
2556
|
Started: ${job.startedAt}` : ""}${job.finishedAt ? `
|
|
1754
2557
|
Finished: ${job.finishedAt}` : ""}${job.error ? `
|
|
1755
2558
|
Error: ${job.error}` : ""}`
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
2559
|
+
).join("\n\n");
|
|
2560
|
+
return createResponse(
|
|
2561
|
+
result.jobs.length > 0 ? `Current Jobs:
|
|
1759
2562
|
|
|
1760
2563
|
${formattedJobs}` : "No jobs found."
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
}
|
|
1767
|
-
}
|
|
1768
|
-
);
|
|
1769
|
-
server.tool(
|
|
1770
|
-
"get_job_info",
|
|
1771
|
-
"Get details for a specific indexing job. Use the 'list_jobs' tool to find the job ID.",
|
|
1772
|
-
{
|
|
1773
|
-
jobId: z.string().uuid().describe("Job ID to query.")
|
|
1774
|
-
},
|
|
1775
|
-
{
|
|
1776
|
-
title: "Get Indexing Job Info",
|
|
1777
|
-
readOnlyHint: true,
|
|
1778
|
-
destructiveHint: false
|
|
1779
|
-
},
|
|
1780
|
-
async ({ jobId }) => {
|
|
1781
|
-
try {
|
|
1782
|
-
const result = await tools.getJobInfo.execute({ jobId });
|
|
1783
|
-
if (!result.job) {
|
|
1784
|
-
return createError(`Job with ID ${jobId} not found.`);
|
|
2564
|
+
);
|
|
2565
|
+
} catch (error) {
|
|
2566
|
+
return createError(
|
|
2567
|
+
`Failed to list jobs: ${error instanceof Error ? error.message : String(error)}`
|
|
2568
|
+
);
|
|
1785
2569
|
}
|
|
1786
|
-
|
|
1787
|
-
|
|
2570
|
+
}
|
|
2571
|
+
);
|
|
2572
|
+
server.tool(
|
|
2573
|
+
"get_job_info",
|
|
2574
|
+
"Get details for a specific indexing job. Use the 'list_jobs' tool to find the job ID.",
|
|
2575
|
+
{
|
|
2576
|
+
jobId: z.string().uuid().describe("Job ID to query.")
|
|
2577
|
+
},
|
|
2578
|
+
{
|
|
2579
|
+
title: "Get Indexing Job Info",
|
|
2580
|
+
readOnlyHint: true,
|
|
2581
|
+
destructiveHint: false
|
|
2582
|
+
},
|
|
2583
|
+
async ({ jobId }) => {
|
|
2584
|
+
try {
|
|
2585
|
+
const result = await tools.getJobInfo.execute({ jobId });
|
|
2586
|
+
if (!result.job) {
|
|
2587
|
+
return createError(`Job with ID ${jobId} not found.`);
|
|
2588
|
+
}
|
|
2589
|
+
const job = result.job;
|
|
2590
|
+
const formattedJob = `- ID: ${job.id}
|
|
1788
2591
|
Status: ${job.status}
|
|
1789
2592
|
Library: ${job.library}@${job.version}
|
|
1790
2593
|
Created: ${job.createdAt}${job.startedAt ? `
|
|
1791
2594
|
Started: ${job.startedAt}` : ""}${job.finishedAt ? `
|
|
1792
2595
|
Finished: ${job.finishedAt}` : ""}${job.error ? `
|
|
1793
2596
|
Error: ${job.error}` : ""}`;
|
|
1794
|
-
|
|
2597
|
+
return createResponse(`Job Info:
|
|
1795
2598
|
|
|
1796
2599
|
${formattedJob}`);
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
2600
|
+
} catch (error) {
|
|
2601
|
+
return createError(
|
|
2602
|
+
`Failed to get job info for ${jobId}: ${error instanceof Error ? error.message : String(error)}`
|
|
2603
|
+
);
|
|
2604
|
+
}
|
|
2605
|
+
}
|
|
2606
|
+
);
|
|
2607
|
+
server.tool(
|
|
2608
|
+
"cancel_job",
|
|
2609
|
+
"Cancel a queued or running indexing job. Use the 'list_jobs' tool to find the job ID.",
|
|
2610
|
+
{
|
|
2611
|
+
jobId: z.string().uuid().describe("Job ID to cancel.")
|
|
2612
|
+
},
|
|
2613
|
+
{
|
|
2614
|
+
title: "Cancel Indexing Job",
|
|
2615
|
+
destructiveHint: true
|
|
2616
|
+
},
|
|
2617
|
+
async ({ jobId }) => {
|
|
2618
|
+
try {
|
|
2619
|
+
const result = await tools.cancelJob.execute({ jobId });
|
|
2620
|
+
if (result.success) {
|
|
2621
|
+
return createResponse(result.message);
|
|
2622
|
+
}
|
|
2623
|
+
return createError(result.message);
|
|
2624
|
+
} catch (error) {
|
|
2625
|
+
return createError(
|
|
2626
|
+
`Failed to cancel job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
|
|
2627
|
+
);
|
|
2628
|
+
}
|
|
1801
2629
|
}
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
if (result.success) {
|
|
2630
|
+
);
|
|
2631
|
+
server.tool(
|
|
2632
|
+
"remove_docs",
|
|
2633
|
+
"Remove indexed documentation for a library version. Use only if explicitly instructed.",
|
|
2634
|
+
{
|
|
2635
|
+
library: z.string().describe("Library name."),
|
|
2636
|
+
version: z.string().optional().describe("Library version (optional, removes unversioned if omitted).")
|
|
2637
|
+
},
|
|
2638
|
+
{
|
|
2639
|
+
title: "Remove Library Documentation",
|
|
2640
|
+
destructiveHint: true
|
|
2641
|
+
},
|
|
2642
|
+
async ({ library, version: version2 }) => {
|
|
2643
|
+
try {
|
|
2644
|
+
const result = await tools.remove.execute({ library, version: version2 });
|
|
1818
2645
|
return createResponse(result.message);
|
|
2646
|
+
} catch (error) {
|
|
2647
|
+
return createError(
|
|
2648
|
+
`Failed to remove documents: ${error instanceof Error ? error.message : String(error)}`
|
|
2649
|
+
);
|
|
1819
2650
|
}
|
|
1820
|
-
return createError(result.message);
|
|
1821
|
-
} catch (error) {
|
|
1822
|
-
return createError(
|
|
1823
|
-
`Failed to cancel job ${jobId}: ${error instanceof Error ? error.message : String(error)}`
|
|
1824
|
-
);
|
|
1825
|
-
}
|
|
1826
|
-
}
|
|
1827
|
-
);
|
|
1828
|
-
server.tool(
|
|
1829
|
-
"remove_docs",
|
|
1830
|
-
"Remove indexed documentation for a library version. Use only if explicitly instructed.",
|
|
1831
|
-
{
|
|
1832
|
-
library: z.string().describe("Library name."),
|
|
1833
|
-
version: z.string().optional().describe("Library version (optional, removes unversioned if omitted).")
|
|
1834
|
-
},
|
|
1835
|
-
{
|
|
1836
|
-
title: "Remove Library Documentation",
|
|
1837
|
-
destructiveHint: true
|
|
1838
|
-
},
|
|
1839
|
-
async ({ library, version: version2 }) => {
|
|
1840
|
-
try {
|
|
1841
|
-
const result = await tools.remove.execute({ library, version: version2 });
|
|
1842
|
-
return createResponse(result.message);
|
|
1843
|
-
} catch (error) {
|
|
1844
|
-
return createError(
|
|
1845
|
-
`Failed to remove documents: ${error instanceof Error ? error.message : String(error)}`
|
|
1846
|
-
);
|
|
1847
2651
|
}
|
|
1848
|
-
|
|
1849
|
-
|
|
2652
|
+
);
|
|
2653
|
+
}
|
|
1850
2654
|
server.tool(
|
|
1851
2655
|
"fetch_url",
|
|
1852
2656
|
"Fetch a single URL and convert its content to Markdown. Use this tool to read the content of any web page.",
|
|
@@ -1910,74 +2714,76 @@ ${formattedJob}`);
|
|
|
1910
2714
|
};
|
|
1911
2715
|
}
|
|
1912
2716
|
);
|
|
1913
|
-
|
|
1914
|
-
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
|
|
2717
|
+
if (!readOnly) {
|
|
2718
|
+
server.resource(
|
|
2719
|
+
"jobs",
|
|
2720
|
+
"docs://jobs",
|
|
2721
|
+
{
|
|
2722
|
+
description: "List indexing jobs, optionally filtering by status.",
|
|
2723
|
+
mimeType: "application/json"
|
|
2724
|
+
},
|
|
2725
|
+
async (uri) => {
|
|
2726
|
+
const statusParam = uri.searchParams.get("status");
|
|
2727
|
+
let statusFilter;
|
|
2728
|
+
if (statusParam) {
|
|
2729
|
+
const validation = z.nativeEnum(PipelineJobStatus).safeParse(statusParam);
|
|
2730
|
+
if (validation.success) {
|
|
2731
|
+
statusFilter = validation.data;
|
|
2732
|
+
} else {
|
|
2733
|
+
logger.warn(`⚠️ Invalid status parameter received: ${statusParam}`);
|
|
2734
|
+
}
|
|
1929
2735
|
}
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
uri: new URL(job.id, uri).href,
|
|
1935
|
-
mimeType: "application/json",
|
|
1936
|
-
text: JSON.stringify({
|
|
1937
|
-
id: job.id,
|
|
1938
|
-
library: job.library,
|
|
1939
|
-
version: job.version,
|
|
1940
|
-
status: job.status,
|
|
1941
|
-
error: job.error || void 0
|
|
1942
|
-
})
|
|
1943
|
-
}))
|
|
1944
|
-
};
|
|
1945
|
-
}
|
|
1946
|
-
);
|
|
1947
|
-
server.resource(
|
|
1948
|
-
"job",
|
|
1949
|
-
// A distinct name for this specific resource type
|
|
1950
|
-
new ResourceTemplate("docs://jobs/{jobId}", { list: void 0 }),
|
|
1951
|
-
{
|
|
1952
|
-
description: "Get details for a specific indexing job by ID.",
|
|
1953
|
-
mimeType: "application/json"
|
|
1954
|
-
},
|
|
1955
|
-
async (uri, { jobId }) => {
|
|
1956
|
-
if (typeof jobId !== "string" || jobId.length === 0) {
|
|
1957
|
-
logger.warn(`⚠️ Invalid jobId received in URI: ${jobId}`);
|
|
1958
|
-
return { contents: [] };
|
|
1959
|
-
}
|
|
1960
|
-
const result = await tools.getJobInfo.execute({ jobId });
|
|
1961
|
-
if (!result.job) {
|
|
1962
|
-
return { contents: [] };
|
|
1963
|
-
}
|
|
1964
|
-
return {
|
|
1965
|
-
contents: [
|
|
1966
|
-
{
|
|
1967
|
-
uri: uri.href,
|
|
2736
|
+
const result = await tools.listJobs.execute({ status: statusFilter });
|
|
2737
|
+
return {
|
|
2738
|
+
contents: result.jobs.map((job) => ({
|
|
2739
|
+
uri: new URL(job.id, uri).href,
|
|
1968
2740
|
mimeType: "application/json",
|
|
1969
2741
|
text: JSON.stringify({
|
|
1970
|
-
id:
|
|
1971
|
-
library:
|
|
1972
|
-
version:
|
|
1973
|
-
status:
|
|
1974
|
-
error:
|
|
2742
|
+
id: job.id,
|
|
2743
|
+
library: job.library,
|
|
2744
|
+
version: job.version,
|
|
2745
|
+
status: job.status,
|
|
2746
|
+
error: job.error || void 0
|
|
1975
2747
|
})
|
|
1976
|
-
}
|
|
1977
|
-
|
|
1978
|
-
}
|
|
1979
|
-
|
|
1980
|
-
|
|
2748
|
+
}))
|
|
2749
|
+
};
|
|
2750
|
+
}
|
|
2751
|
+
);
|
|
2752
|
+
server.resource(
|
|
2753
|
+
"job",
|
|
2754
|
+
// A distinct name for this specific resource type
|
|
2755
|
+
new ResourceTemplate("docs://jobs/{jobId}", { list: void 0 }),
|
|
2756
|
+
{
|
|
2757
|
+
description: "Get details for a specific indexing job by ID.",
|
|
2758
|
+
mimeType: "application/json"
|
|
2759
|
+
},
|
|
2760
|
+
async (uri, { jobId }) => {
|
|
2761
|
+
if (typeof jobId !== "string" || jobId.length === 0) {
|
|
2762
|
+
logger.warn(`⚠️ Invalid jobId received in URI: ${jobId}`);
|
|
2763
|
+
return { contents: [] };
|
|
2764
|
+
}
|
|
2765
|
+
const result = await tools.getJobInfo.execute({ jobId });
|
|
2766
|
+
if (!result.job) {
|
|
2767
|
+
return { contents: [] };
|
|
2768
|
+
}
|
|
2769
|
+
return {
|
|
2770
|
+
contents: [
|
|
2771
|
+
{
|
|
2772
|
+
uri: uri.href,
|
|
2773
|
+
mimeType: "application/json",
|
|
2774
|
+
text: JSON.stringify({
|
|
2775
|
+
id: result.job.id,
|
|
2776
|
+
library: result.job.library,
|
|
2777
|
+
version: result.job.version,
|
|
2778
|
+
status: result.job.status,
|
|
2779
|
+
error: result.job.error || void 0
|
|
2780
|
+
})
|
|
2781
|
+
}
|
|
2782
|
+
]
|
|
2783
|
+
};
|
|
2784
|
+
}
|
|
2785
|
+
);
|
|
2786
|
+
}
|
|
1981
2787
|
return server;
|
|
1982
2788
|
}
|
|
1983
2789
|
class FileFetcher {
|
|
@@ -1992,7 +2798,7 @@ class FileFetcher {
|
|
|
1992
2798
|
const rawPath = source.replace("file://", "");
|
|
1993
2799
|
const filePath = decodeURIComponent(rawPath);
|
|
1994
2800
|
try {
|
|
1995
|
-
const content = await fs.readFile(filePath);
|
|
2801
|
+
const content = await fs$1.readFile(filePath);
|
|
1996
2802
|
const ext = path.extname(filePath).toLowerCase();
|
|
1997
2803
|
const mimeType = mime.lookup(ext) || "application/octet-stream";
|
|
1998
2804
|
return {
|
|
@@ -2083,9 +2889,49 @@ class HttpFetcher {
|
|
|
2083
2889
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2084
2890
|
}
|
|
2085
2891
|
async fetch(source, options) {
|
|
2892
|
+
const startTime = performance.now();
|
|
2086
2893
|
const maxRetries = options?.maxRetries ?? FETCHER_MAX_RETRIES;
|
|
2087
2894
|
const baseDelay = options?.retryDelay ?? FETCHER_BASE_DELAY;
|
|
2088
2895
|
const followRedirects = options?.followRedirects ?? true;
|
|
2896
|
+
try {
|
|
2897
|
+
const result = await this.performFetch(
|
|
2898
|
+
source,
|
|
2899
|
+
options,
|
|
2900
|
+
maxRetries,
|
|
2901
|
+
baseDelay,
|
|
2902
|
+
followRedirects
|
|
2903
|
+
);
|
|
2904
|
+
const duration = performance.now() - startTime;
|
|
2905
|
+
analytics.track("http_request_completed", {
|
|
2906
|
+
success: true,
|
|
2907
|
+
hostname: extractHostname(source),
|
|
2908
|
+
protocol: extractProtocol(source),
|
|
2909
|
+
duration_ms: Math.round(duration),
|
|
2910
|
+
content_size_bytes: result.content.length,
|
|
2911
|
+
mime_type: result.mimeType,
|
|
2912
|
+
has_encoding: !!result.encoding,
|
|
2913
|
+
follow_redirects: followRedirects,
|
|
2914
|
+
had_redirects: result.source !== source
|
|
2915
|
+
});
|
|
2916
|
+
return result;
|
|
2917
|
+
} catch (error) {
|
|
2918
|
+
const duration = performance.now() - startTime;
|
|
2919
|
+
const axiosError = error;
|
|
2920
|
+
const status = axiosError.response?.status;
|
|
2921
|
+
analytics.track("http_request_completed", {
|
|
2922
|
+
success: false,
|
|
2923
|
+
hostname: extractHostname(source),
|
|
2924
|
+
protocol: extractProtocol(source),
|
|
2925
|
+
duration_ms: Math.round(duration),
|
|
2926
|
+
status_code: status,
|
|
2927
|
+
error_type: error instanceof CancellationError ? "cancellation" : error instanceof RedirectError ? "redirect" : error instanceof ScraperError ? "scraper" : "unknown",
|
|
2928
|
+
error_code: axiosError.code,
|
|
2929
|
+
follow_redirects: followRedirects
|
|
2930
|
+
});
|
|
2931
|
+
throw error;
|
|
2932
|
+
}
|
|
2933
|
+
}
|
|
2934
|
+
async performFetch(source, options, maxRetries = FETCHER_MAX_RETRIES, baseDelay = FETCHER_BASE_DELAY, followRedirects = true) {
|
|
2089
2935
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
2090
2936
|
try {
|
|
2091
2937
|
const fingerprint = this.fingerprintGenerator.generateHeaders();
|
|
@@ -2185,20 +3031,35 @@ async function initializeTools(docService, pipeline) {
|
|
|
2185
3031
|
};
|
|
2186
3032
|
return tools;
|
|
2187
3033
|
}
|
|
2188
|
-
async function registerMcpService(server, docService, pipeline) {
|
|
3034
|
+
async function registerMcpService(server, docService, pipeline, readOnly = false, authManager) {
|
|
2189
3035
|
const mcpTools = await initializeTools(docService, pipeline);
|
|
2190
|
-
const mcpServer = createMcpServerInstance(mcpTools);
|
|
3036
|
+
const mcpServer = createMcpServerInstance(mcpTools, readOnly);
|
|
3037
|
+
const authMiddleware = authManager ? createAuthMiddleware(authManager) : null;
|
|
2191
3038
|
const sseTransports = {};
|
|
2192
3039
|
server.route({
|
|
2193
3040
|
method: "GET",
|
|
2194
3041
|
url: "/sse",
|
|
3042
|
+
preHandler: authMiddleware ? [authMiddleware] : void 0,
|
|
2195
3043
|
handler: async (_request, reply) => {
|
|
2196
3044
|
try {
|
|
2197
3045
|
const transport = new SSEServerTransport("/messages", reply.raw);
|
|
2198
3046
|
sseTransports[transport.sessionId] = transport;
|
|
3047
|
+
if (analytics.isEnabled()) {
|
|
3048
|
+
const session = createMcpSession({
|
|
3049
|
+
protocol: "http",
|
|
3050
|
+
transport: "sse",
|
|
3051
|
+
authEnabled: !!authManager,
|
|
3052
|
+
readOnly,
|
|
3053
|
+
servicesEnabled: ["mcp"]
|
|
3054
|
+
});
|
|
3055
|
+
analytics.startSession(session);
|
|
3056
|
+
}
|
|
2199
3057
|
reply.raw.on("close", () => {
|
|
2200
3058
|
delete sseTransports[transport.sessionId];
|
|
2201
3059
|
transport.close();
|
|
3060
|
+
if (analytics.isEnabled()) {
|
|
3061
|
+
analytics.endSession();
|
|
3062
|
+
}
|
|
2202
3063
|
});
|
|
2203
3064
|
await mcpServer.connect(transport);
|
|
2204
3065
|
} catch (error) {
|
|
@@ -2233,16 +3094,30 @@ async function registerMcpService(server, docService, pipeline) {
|
|
|
2233
3094
|
server.route({
|
|
2234
3095
|
method: "POST",
|
|
2235
3096
|
url: "/mcp",
|
|
3097
|
+
preHandler: authMiddleware ? [authMiddleware] : void 0,
|
|
2236
3098
|
handler: async (request, reply) => {
|
|
2237
3099
|
try {
|
|
2238
|
-
const requestServer = createMcpServerInstance(mcpTools);
|
|
3100
|
+
const requestServer = createMcpServerInstance(mcpTools, readOnly);
|
|
2239
3101
|
const requestTransport = new StreamableHTTPServerTransport({
|
|
2240
3102
|
sessionIdGenerator: void 0
|
|
2241
3103
|
});
|
|
3104
|
+
if (analytics.isEnabled()) {
|
|
3105
|
+
const session = createMcpSession({
|
|
3106
|
+
protocol: "http",
|
|
3107
|
+
transport: "streamable",
|
|
3108
|
+
authEnabled: !!authManager,
|
|
3109
|
+
readOnly,
|
|
3110
|
+
servicesEnabled: ["mcp"]
|
|
3111
|
+
});
|
|
3112
|
+
analytics.startSession(session);
|
|
3113
|
+
}
|
|
2242
3114
|
reply.raw.on("close", () => {
|
|
2243
3115
|
logger.debug("Streamable HTTP request closed");
|
|
2244
3116
|
requestTransport.close();
|
|
2245
3117
|
requestServer.close();
|
|
3118
|
+
if (analytics.isEnabled()) {
|
|
3119
|
+
analytics.endSession();
|
|
3120
|
+
}
|
|
2246
3121
|
});
|
|
2247
3122
|
await requestServer.connect(requestTransport);
|
|
2248
3123
|
await requestTransport.handleRequest(request.raw, reply.raw, request.body);
|
|
@@ -3066,410 +3941,475 @@ const Tooltip = ({ text, position = "top" }) => {
|
|
|
3066
3941
|
}
|
|
3067
3942
|
);
|
|
3068
3943
|
};
|
|
3069
|
-
const ScrapeFormContent = () =>
|
|
3070
|
-
|
|
3071
|
-
/* @__PURE__ */ jsxs(
|
|
3072
|
-
"
|
|
3073
|
-
|
|
3074
|
-
"
|
|
3075
|
-
|
|
3076
|
-
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3083
|
-
|
|
3084
|
-
|
|
3085
|
-
|
|
3086
|
-
|
|
3087
|
-
|
|
3088
|
-
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
3092
|
-
|
|
3093
|
-
|
|
3094
|
-
/* @__PURE__ */ jsx("p", { children: "Enter the URL of the documentation you want to scrape." }),
|
|
3095
|
-
/* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
|
|
3096
|
-
"For local files/folders, you must use the ",
|
|
3097
|
-
/* @__PURE__ */ jsx("code", { children: "file://" }),
|
|
3098
|
-
" ",
|
|
3099
|
-
"prefix and ensure the path is accessible to the server."
|
|
3100
|
-
] }),
|
|
3101
|
-
/* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
|
|
3102
|
-
"If running in Docker, ",
|
|
3103
|
-
/* @__PURE__ */ jsx("b", { children: "mount the folder" }),
|
|
3104
|
-
" (see README for details)."
|
|
3105
|
-
] })
|
|
3106
|
-
] })
|
|
3107
|
-
}
|
|
3108
|
-
)
|
|
3109
|
-
] }),
|
|
3110
|
-
/* @__PURE__ */ jsx(
|
|
3111
|
-
"input",
|
|
3112
|
-
{
|
|
3113
|
-
type: "url",
|
|
3114
|
-
name: "url",
|
|
3115
|
-
id: "url",
|
|
3116
|
-
required: true,
|
|
3117
|
-
"x-model": "url",
|
|
3118
|
-
"x-on:input": "checkUrlPath",
|
|
3119
|
-
"x-on:paste": "$nextTick(() => checkUrlPath())",
|
|
3120
|
-
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3121
|
-
}
|
|
3122
|
-
),
|
|
3123
|
-
/* @__PURE__ */ jsx(
|
|
3124
|
-
"div",
|
|
3125
|
-
{
|
|
3126
|
-
"x-show": "hasPath && !(url.startsWith('file://'))",
|
|
3127
|
-
"x-cloak": true,
|
|
3128
|
-
"x-transition:enter": "transition ease-out duration-300",
|
|
3129
|
-
"x-transition:enter-start": "opacity-0 transform -translate-y-2",
|
|
3130
|
-
"x-transition:enter-end": "opacity-100 transform translate-y-0",
|
|
3131
|
-
class: "mt-2",
|
|
3132
|
-
children: /* @__PURE__ */ jsx(
|
|
3133
|
-
Alert,
|
|
3944
|
+
const ScrapeFormContent = ({ defaultExcludePatterns }) => {
|
|
3945
|
+
const defaultExcludePatternsText = defaultExcludePatterns?.join("\n") || "";
|
|
3946
|
+
return /* @__PURE__ */ jsxs("div", { class: "mt-4 p-4 bg-white dark:bg-gray-800 rounded-lg shadow border border-gray-300 dark:border-gray-600", children: [
|
|
3947
|
+
/* @__PURE__ */ jsx("h3", { class: "text-xl font-semibold text-gray-900 dark:text-white mb-2", children: "Queue New Scrape Job" }),
|
|
3948
|
+
/* @__PURE__ */ jsxs(
|
|
3949
|
+
"form",
|
|
3950
|
+
{
|
|
3951
|
+
"hx-post": "/web/jobs/scrape",
|
|
3952
|
+
"hx-target": "#job-response",
|
|
3953
|
+
"hx-swap": "innerHTML",
|
|
3954
|
+
class: "space-y-2",
|
|
3955
|
+
"x-data": "{\n url: '',\n hasPath: false,\n headers: [],\n checkUrlPath() {\n try {\n const url = new URL(this.url);\n this.hasPath = url.pathname !== '/' && url.pathname !== '';\n } catch (e) {\n this.hasPath = false;\n }\n }\n }",
|
|
3956
|
+
children: [
|
|
3957
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
3958
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3959
|
+
/* @__PURE__ */ jsx(
|
|
3960
|
+
"label",
|
|
3961
|
+
{
|
|
3962
|
+
for: "url",
|
|
3963
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
3964
|
+
children: "URL"
|
|
3965
|
+
}
|
|
3966
|
+
),
|
|
3967
|
+
/* @__PURE__ */ jsx(
|
|
3968
|
+
Tooltip,
|
|
3134
3969
|
{
|
|
3135
|
-
|
|
3136
|
-
|
|
3970
|
+
text: /* @__PURE__ */ jsxs("div", { children: [
|
|
3971
|
+
/* @__PURE__ */ jsx("p", { children: "Enter the URL of the documentation you want to scrape." }),
|
|
3972
|
+
/* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
|
|
3973
|
+
"For local files/folders, you must use the ",
|
|
3974
|
+
/* @__PURE__ */ jsx("code", { children: "file://" }),
|
|
3975
|
+
" ",
|
|
3976
|
+
"prefix and ensure the path is accessible to the server."
|
|
3977
|
+
] }),
|
|
3978
|
+
/* @__PURE__ */ jsxs("p", { class: "mt-2", children: [
|
|
3979
|
+
"If running in Docker, ",
|
|
3980
|
+
/* @__PURE__ */ jsx("b", { children: "mount the folder" }),
|
|
3981
|
+
" (see README for details)."
|
|
3982
|
+
] })
|
|
3983
|
+
] })
|
|
3137
3984
|
}
|
|
3138
3985
|
)
|
|
3139
|
-
}
|
|
3140
|
-
)
|
|
3141
|
-
] }),
|
|
3142
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
3143
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3986
|
+
] }),
|
|
3144
3987
|
/* @__PURE__ */ jsx(
|
|
3145
|
-
"
|
|
3988
|
+
"input",
|
|
3146
3989
|
{
|
|
3147
|
-
|
|
3148
|
-
|
|
3149
|
-
|
|
3990
|
+
type: "url",
|
|
3991
|
+
name: "url",
|
|
3992
|
+
id: "url",
|
|
3993
|
+
required: true,
|
|
3994
|
+
"x-model": "url",
|
|
3995
|
+
"x-on:input": "checkUrlPath",
|
|
3996
|
+
"x-on:paste": "$nextTick(() => checkUrlPath())",
|
|
3997
|
+
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3150
3998
|
}
|
|
3151
3999
|
),
|
|
3152
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "The name of the library you're documenting. This will be used when searching." })
|
|
3153
|
-
] }),
|
|
3154
|
-
/* @__PURE__ */ jsx(
|
|
3155
|
-
"input",
|
|
3156
|
-
{
|
|
3157
|
-
type: "text",
|
|
3158
|
-
name: "library",
|
|
3159
|
-
id: "library",
|
|
3160
|
-
required: true,
|
|
3161
|
-
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3162
|
-
}
|
|
3163
|
-
)
|
|
3164
|
-
] }),
|
|
3165
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
3166
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3167
4000
|
/* @__PURE__ */ jsx(
|
|
3168
|
-
"
|
|
4001
|
+
"div",
|
|
3169
4002
|
{
|
|
3170
|
-
|
|
3171
|
-
|
|
3172
|
-
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
"input",
|
|
3179
|
-
{
|
|
3180
|
-
type: "text",
|
|
3181
|
-
name: "version",
|
|
3182
|
-
id: "version",
|
|
3183
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3184
|
-
}
|
|
3185
|
-
)
|
|
3186
|
-
] }),
|
|
3187
|
-
/* @__PURE__ */ jsxs("details", { class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md", children: [
|
|
3188
|
-
/* @__PURE__ */ jsx("summary", { class: "cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400", children: "Advanced Options" }),
|
|
3189
|
-
/* @__PURE__ */ jsxs("div", { class: "mt-2 space-y-2", "x-data": "{ headers: [] }", children: [
|
|
3190
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
3191
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3192
|
-
/* @__PURE__ */ jsx(
|
|
3193
|
-
"label",
|
|
4003
|
+
"x-show": "hasPath && !(url.startsWith('file://'))",
|
|
4004
|
+
"x-cloak": true,
|
|
4005
|
+
"x-transition:enter": "transition ease-out duration-300",
|
|
4006
|
+
"x-transition:enter-start": "opacity-0 transform -translate-y-2",
|
|
4007
|
+
"x-transition:enter-end": "opacity-100 transform translate-y-0",
|
|
4008
|
+
class: "mt-2",
|
|
4009
|
+
children: /* @__PURE__ */ jsx(
|
|
4010
|
+
Alert,
|
|
3194
4011
|
{
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
|
-
children: "Max Pages"
|
|
4012
|
+
type: "info",
|
|
4013
|
+
message: "By default, only subpages under the given URL will be scraped. To scrape the whole website, adjust the 'Scope' option in Advanced Options."
|
|
3198
4014
|
}
|
|
3199
|
-
)
|
|
3200
|
-
|
|
3201
|
-
|
|
4015
|
+
)
|
|
4016
|
+
}
|
|
4017
|
+
)
|
|
4018
|
+
] }),
|
|
4019
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4020
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3202
4021
|
/* @__PURE__ */ jsx(
|
|
3203
|
-
"
|
|
4022
|
+
"label",
|
|
3204
4023
|
{
|
|
3205
|
-
|
|
3206
|
-
|
|
3207
|
-
|
|
3208
|
-
min: "1",
|
|
3209
|
-
placeholder: "1000",
|
|
3210
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
4024
|
+
for: "library",
|
|
4025
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4026
|
+
children: "Library Name"
|
|
3211
4027
|
}
|
|
3212
|
-
)
|
|
4028
|
+
),
|
|
4029
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "The name of the library you're documenting. This will be used when searching." })
|
|
3213
4030
|
] }),
|
|
3214
|
-
/* @__PURE__ */
|
|
3215
|
-
|
|
3216
|
-
|
|
3217
|
-
|
|
3218
|
-
|
|
3219
|
-
|
|
3220
|
-
|
|
3221
|
-
|
|
3222
|
-
|
|
3223
|
-
|
|
3224
|
-
|
|
3225
|
-
|
|
4031
|
+
/* @__PURE__ */ jsx(
|
|
4032
|
+
"input",
|
|
4033
|
+
{
|
|
4034
|
+
type: "text",
|
|
4035
|
+
name: "library",
|
|
4036
|
+
id: "library",
|
|
4037
|
+
required: true,
|
|
4038
|
+
class: "mt-0.5 block w-full px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
4039
|
+
}
|
|
4040
|
+
)
|
|
4041
|
+
] }),
|
|
4042
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4043
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3226
4044
|
/* @__PURE__ */ jsx(
|
|
3227
|
-
"
|
|
4045
|
+
"label",
|
|
3228
4046
|
{
|
|
3229
|
-
|
|
3230
|
-
|
|
3231
|
-
|
|
3232
|
-
min: "0",
|
|
3233
|
-
placeholder: "3",
|
|
3234
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
4047
|
+
for: "version",
|
|
4048
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4049
|
+
children: "Version (optional)"
|
|
3235
4050
|
}
|
|
3236
|
-
)
|
|
4051
|
+
),
|
|
4052
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Specify the version of the library documentation you're indexing. This allows for version-specific searches." })
|
|
3237
4053
|
] }),
|
|
3238
|
-
/* @__PURE__ */
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
3242
|
-
|
|
3243
|
-
|
|
3244
|
-
|
|
3245
|
-
|
|
3246
|
-
|
|
3247
|
-
|
|
4054
|
+
/* @__PURE__ */ jsx(
|
|
4055
|
+
"input",
|
|
4056
|
+
{
|
|
4057
|
+
type: "text",
|
|
4058
|
+
name: "version",
|
|
4059
|
+
id: "version",
|
|
4060
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
4061
|
+
}
|
|
4062
|
+
)
|
|
4063
|
+
] }),
|
|
4064
|
+
/* @__PURE__ */ jsxs("details", { class: "bg-gray-50 dark:bg-gray-900 p-2 rounded-md", children: [
|
|
4065
|
+
/* @__PURE__ */ jsx("summary", { class: "cursor-pointer text-sm font-medium text-gray-600 dark:text-gray-400", children: "Advanced Options" }),
|
|
4066
|
+
/* @__PURE__ */ jsxs("div", { class: "mt-2 space-y-2", "x-data": "{ headers: [] }", children: [
|
|
4067
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4068
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
4069
|
+
/* @__PURE__ */ jsx(
|
|
4070
|
+
"label",
|
|
4071
|
+
{
|
|
4072
|
+
for: "maxPages",
|
|
4073
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4074
|
+
children: "Max Pages"
|
|
4075
|
+
}
|
|
4076
|
+
),
|
|
4077
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "The maximum number of pages to scrape. Default is 1000. Setting this too high may result in longer processing times." })
|
|
4078
|
+
] }),
|
|
3248
4079
|
/* @__PURE__ */ jsx(
|
|
3249
|
-
|
|
4080
|
+
"input",
|
|
3250
4081
|
{
|
|
3251
|
-
|
|
3252
|
-
|
|
3253
|
-
|
|
3254
|
-
|
|
3255
|
-
|
|
3256
|
-
|
|
3257
|
-
] })
|
|
3258
|
-
] })
|
|
4082
|
+
type: "number",
|
|
4083
|
+
name: "maxPages",
|
|
4084
|
+
id: "maxPages",
|
|
4085
|
+
min: "1",
|
|
4086
|
+
placeholder: "1000",
|
|
4087
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3259
4088
|
}
|
|
3260
4089
|
)
|
|
3261
4090
|
] }),
|
|
3262
|
-
/* @__PURE__ */ jsxs(
|
|
3263
|
-
"
|
|
3264
|
-
|
|
3265
|
-
|
|
3266
|
-
|
|
3267
|
-
|
|
3268
|
-
|
|
3269
|
-
|
|
3270
|
-
|
|
3271
|
-
|
|
3272
|
-
|
|
3273
|
-
}
|
|
3274
|
-
)
|
|
3275
|
-
] }),
|
|
3276
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
3277
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3278
|
-
/* @__PURE__ */ jsx(
|
|
3279
|
-
"label",
|
|
3280
|
-
{
|
|
3281
|
-
for: "includePatterns",
|
|
3282
|
-
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
3283
|
-
children: "Include Patterns"
|
|
3284
|
-
}
|
|
3285
|
-
),
|
|
3286
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
|
|
3287
|
-
] }),
|
|
3288
|
-
/* @__PURE__ */ jsx(
|
|
3289
|
-
"textarea",
|
|
3290
|
-
{
|
|
3291
|
-
name: "includePatterns",
|
|
3292
|
-
id: "includePatterns",
|
|
3293
|
-
rows: "2",
|
|
3294
|
-
placeholder: "e.g. docs/* or /api\\/v1.*/",
|
|
3295
|
-
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3296
|
-
}
|
|
3297
|
-
)
|
|
3298
|
-
] }),
|
|
3299
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
3300
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
4091
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4092
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
4093
|
+
/* @__PURE__ */ jsx(
|
|
4094
|
+
"label",
|
|
4095
|
+
{
|
|
4096
|
+
for: "maxDepth",
|
|
4097
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4098
|
+
children: "Max Depth"
|
|
4099
|
+
}
|
|
4100
|
+
),
|
|
4101
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "How many links deep the scraper should follow. Default is 3. Higher values capture more content but increase processing time." })
|
|
4102
|
+
] }),
|
|
3301
4103
|
/* @__PURE__ */ jsx(
|
|
3302
|
-
"
|
|
4104
|
+
"input",
|
|
3303
4105
|
{
|
|
3304
|
-
|
|
3305
|
-
|
|
3306
|
-
|
|
4106
|
+
type: "number",
|
|
4107
|
+
name: "maxDepth",
|
|
4108
|
+
id: "maxDepth",
|
|
4109
|
+
min: "0",
|
|
4110
|
+
placeholder: "3",
|
|
4111
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3307
4112
|
}
|
|
3308
|
-
)
|
|
3309
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
|
|
4113
|
+
)
|
|
3310
4114
|
] }),
|
|
3311
|
-
/* @__PURE__ */
|
|
3312
|
-
"
|
|
3313
|
-
|
|
3314
|
-
|
|
3315
|
-
|
|
3316
|
-
|
|
3317
|
-
|
|
3318
|
-
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
4115
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4116
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
4117
|
+
/* @__PURE__ */ jsx(
|
|
4118
|
+
"label",
|
|
4119
|
+
{
|
|
4120
|
+
for: "scope",
|
|
4121
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4122
|
+
children: "Scope"
|
|
4123
|
+
}
|
|
4124
|
+
),
|
|
4125
|
+
/* @__PURE__ */ jsx(
|
|
4126
|
+
Tooltip,
|
|
4127
|
+
{
|
|
4128
|
+
text: /* @__PURE__ */ jsxs("div", { children: [
|
|
4129
|
+
"Controls which pages are scraped:",
|
|
4130
|
+
/* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
4131
|
+
/* @__PURE__ */ jsx("li", { children: "'Subpages' only scrapes under the given URL path," }),
|
|
4132
|
+
/* @__PURE__ */ jsx("li", { children: "'Hostname' scrapes all content on the same host (e.g., all of docs.example.com)," }),
|
|
4133
|
+
/* @__PURE__ */ jsx("li", { children: "'Domain' scrapes all content on the domain and its subdomains (e.g., all of example.com)." })
|
|
4134
|
+
] })
|
|
4135
|
+
] })
|
|
4136
|
+
}
|
|
4137
|
+
)
|
|
4138
|
+
] }),
|
|
4139
|
+
/* @__PURE__ */ jsxs(
|
|
4140
|
+
"select",
|
|
3326
4141
|
{
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
4142
|
+
name: "scope",
|
|
4143
|
+
id: "scope",
|
|
4144
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
4145
|
+
children: [
|
|
4146
|
+
/* @__PURE__ */ jsx("option", { value: "subpages", selected: true, children: "Subpages (Default)" }),
|
|
4147
|
+
/* @__PURE__ */ jsx("option", { value: "hostname", children: "Hostname" }),
|
|
4148
|
+
/* @__PURE__ */ jsx("option", { value: "domain", children: "Domain" })
|
|
4149
|
+
]
|
|
3330
4150
|
}
|
|
3331
|
-
)
|
|
4151
|
+
)
|
|
4152
|
+
] }),
|
|
4153
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4154
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
4155
|
+
/* @__PURE__ */ jsx(
|
|
4156
|
+
"label",
|
|
4157
|
+
{
|
|
4158
|
+
for: "includePatterns",
|
|
4159
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4160
|
+
children: "Include Patterns"
|
|
4161
|
+
}
|
|
4162
|
+
),
|
|
4163
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to include. One per line or comma-separated. Regex patterns must be wrapped in slashes, e.g. /pattern/." })
|
|
4164
|
+
] }),
|
|
3332
4165
|
/* @__PURE__ */ jsx(
|
|
3333
|
-
|
|
4166
|
+
"textarea",
|
|
3334
4167
|
{
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
4168
|
+
name: "includePatterns",
|
|
4169
|
+
id: "includePatterns",
|
|
4170
|
+
rows: "2",
|
|
4171
|
+
placeholder: "e.g. docs/* or /api\\/v1.*/",
|
|
4172
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white"
|
|
3340
4173
|
}
|
|
3341
4174
|
)
|
|
3342
4175
|
] }),
|
|
3343
|
-
/* @__PURE__ */ jsxs(
|
|
3344
|
-
"select",
|
|
3345
|
-
{
|
|
3346
|
-
name: "scrapeMode",
|
|
3347
|
-
id: "scrapeMode",
|
|
3348
|
-
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
3349
|
-
children: [
|
|
3350
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
|
|
3351
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
|
|
3352
|
-
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
|
|
3353
|
-
]
|
|
3354
|
-
}
|
|
3355
|
-
)
|
|
3356
|
-
] }),
|
|
3357
|
-
/* @__PURE__ */ jsxs("div", { children: [
|
|
3358
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
|
|
3359
|
-
/* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
|
|
3360
|
-
/* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
|
|
3361
|
-
] }),
|
|
3362
4176
|
/* @__PURE__ */ jsxs("div", { children: [
|
|
3363
|
-
/* @__PURE__ */
|
|
4177
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3364
4178
|
/* @__PURE__ */ jsx(
|
|
3365
|
-
"
|
|
4179
|
+
"label",
|
|
3366
4180
|
{
|
|
3367
|
-
|
|
3368
|
-
class: "
|
|
3369
|
-
|
|
3370
|
-
"x-model": "header.name",
|
|
3371
|
-
required: true
|
|
4181
|
+
for: "excludePatterns",
|
|
4182
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4183
|
+
children: "Exclude Patterns"
|
|
3372
4184
|
}
|
|
3373
4185
|
),
|
|
3374
|
-
/* @__PURE__ */ jsx(
|
|
4186
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Glob or regex patterns for URLs to exclude. One per line or comma-separated. Exclude takes precedence over include. Regex patterns must be wrapped in slashes, e.g. /pattern/. Edit or clear this field to customize exclusions." })
|
|
4187
|
+
] }),
|
|
4188
|
+
/* @__PURE__ */ jsx(
|
|
4189
|
+
"textarea",
|
|
4190
|
+
{
|
|
4191
|
+
name: "excludePatterns",
|
|
4192
|
+
id: "excludePatterns",
|
|
4193
|
+
rows: "5",
|
|
4194
|
+
class: "mt-0.5 block w-full max-w-sm px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm bg-white dark:bg-gray-700 text-gray-900 dark:text-white font-mono text-xs",
|
|
4195
|
+
children: defaultExcludePatternsText
|
|
4196
|
+
}
|
|
4197
|
+
),
|
|
4198
|
+
/* @__PURE__ */ jsx("p", { class: "mt-1 text-xs text-gray-500 dark:text-gray-400", children: "Default patterns are pre-filled. Edit to customize or clear to exclude nothing." })
|
|
4199
|
+
] }),
|
|
4200
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4201
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3375
4202
|
/* @__PURE__ */ jsx(
|
|
3376
|
-
"
|
|
4203
|
+
"label",
|
|
3377
4204
|
{
|
|
3378
|
-
|
|
3379
|
-
class: "
|
|
3380
|
-
|
|
3381
|
-
"x-model": "header.value",
|
|
3382
|
-
required: true
|
|
4205
|
+
for: "scrapeMode",
|
|
4206
|
+
class: "block text-sm font-medium text-gray-700 dark:text-gray-300",
|
|
4207
|
+
children: "Scrape Mode"
|
|
3383
4208
|
}
|
|
3384
4209
|
),
|
|
3385
4210
|
/* @__PURE__ */ jsx(
|
|
3386
|
-
|
|
4211
|
+
Tooltip,
|
|
3387
4212
|
{
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
4213
|
+
text: /* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsxs("ul", { class: "list-disc pl-5", children: [
|
|
4214
|
+
/* @__PURE__ */ jsx("li", { children: "'Auto' automatically selects the best method," }),
|
|
4215
|
+
/* @__PURE__ */ jsx("li", { children: "'Fetch' uses simple HTTP requests (faster but may miss dynamic content)," }),
|
|
4216
|
+
/* @__PURE__ */ jsx("li", { children: "'Playwright' uses a headless browser (slower but better for JS-heavy sites)." })
|
|
4217
|
+
] }) })
|
|
3392
4218
|
}
|
|
3393
|
-
)
|
|
4219
|
+
)
|
|
4220
|
+
] }),
|
|
4221
|
+
/* @__PURE__ */ jsxs(
|
|
4222
|
+
"select",
|
|
4223
|
+
{
|
|
4224
|
+
name: "scrapeMode",
|
|
4225
|
+
id: "scrapeMode",
|
|
4226
|
+
class: "mt-0.5 block w-full max-w-sm pl-2 pr-10 py-1 text-base border border-gray-300 dark:border-gray-600 focus:outline-none focus:ring-indigo-500 focus:border-indigo-500 sm:text-sm rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white",
|
|
4227
|
+
children: [
|
|
4228
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Auto, selected: true, children: "Auto (Default)" }),
|
|
4229
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Fetch, children: "Fetch" }),
|
|
4230
|
+
/* @__PURE__ */ jsx("option", { value: ScrapeMode.Playwright, children: "Playwright" })
|
|
4231
|
+
]
|
|
4232
|
+
}
|
|
4233
|
+
)
|
|
4234
|
+
] }),
|
|
4235
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4236
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center mb-1", children: [
|
|
4237
|
+
/* @__PURE__ */ jsx("label", { class: "block text-sm font-medium text-gray-700 dark:text-gray-300", children: "Custom HTTP Headers" }),
|
|
4238
|
+
/* @__PURE__ */ jsx(Tooltip, { text: "Add custom HTTP headers (e.g., for authentication). These will be sent with every HTTP request." })
|
|
4239
|
+
] }),
|
|
4240
|
+
/* @__PURE__ */ jsxs("div", { children: [
|
|
4241
|
+
/* @__PURE__ */ jsx("template", { "x-for": "(header, idx) in headers", children: /* @__PURE__ */ jsxs("div", { class: "flex space-x-2 mb-1", children: [
|
|
4242
|
+
/* @__PURE__ */ jsx(
|
|
4243
|
+
"input",
|
|
4244
|
+
{
|
|
4245
|
+
type: "text",
|
|
4246
|
+
class: "w-1/3 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
|
|
4247
|
+
placeholder: "Header Name",
|
|
4248
|
+
"x-model": "header.name",
|
|
4249
|
+
required: true
|
|
4250
|
+
}
|
|
4251
|
+
),
|
|
4252
|
+
/* @__PURE__ */ jsx("span", { class: "text-gray-500", children: ":" }),
|
|
4253
|
+
/* @__PURE__ */ jsx(
|
|
4254
|
+
"input",
|
|
4255
|
+
{
|
|
4256
|
+
type: "text",
|
|
4257
|
+
class: "w-1/2 px-2 py-1 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-white text-xs",
|
|
4258
|
+
placeholder: "Header Value",
|
|
4259
|
+
"x-model": "header.value",
|
|
4260
|
+
required: true
|
|
4261
|
+
}
|
|
4262
|
+
),
|
|
4263
|
+
/* @__PURE__ */ jsx(
|
|
4264
|
+
"button",
|
|
4265
|
+
{
|
|
4266
|
+
type: "button",
|
|
4267
|
+
class: "text-red-500 hover:text-red-700 text-xs",
|
|
4268
|
+
"x-on:click": "headers.splice(idx, 1)",
|
|
4269
|
+
children: "Remove"
|
|
4270
|
+
}
|
|
4271
|
+
),
|
|
4272
|
+
/* @__PURE__ */ jsx(
|
|
4273
|
+
"input",
|
|
4274
|
+
{
|
|
4275
|
+
type: "hidden",
|
|
4276
|
+
name: "header[]",
|
|
4277
|
+
"x-bind:value": "header.name && header.value ? header.name + ':' + header.value : ''"
|
|
4278
|
+
}
|
|
4279
|
+
)
|
|
4280
|
+
] }) }),
|
|
3394
4281
|
/* @__PURE__ */ jsx(
|
|
3395
|
-
"
|
|
4282
|
+
"button",
|
|
3396
4283
|
{
|
|
3397
|
-
type: "
|
|
3398
|
-
|
|
3399
|
-
"x-
|
|
4284
|
+
type: "button",
|
|
4285
|
+
class: "mt-1 px-2 py-0.5 bg-indigo-100 dark:bg-indigo-900 text-indigo-700 dark:text-indigo-200 rounded text-xs",
|
|
4286
|
+
"x-on:click": "headers.push({ name: '', value: '' })",
|
|
4287
|
+
children: "+ Add Header"
|
|
3400
4288
|
}
|
|
3401
4289
|
)
|
|
3402
|
-
] })
|
|
4290
|
+
] })
|
|
4291
|
+
] }),
|
|
4292
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
4293
|
+
/* @__PURE__ */ jsx(
|
|
4294
|
+
"input",
|
|
4295
|
+
{
|
|
4296
|
+
id: "followRedirects",
|
|
4297
|
+
name: "followRedirects",
|
|
4298
|
+
type: "checkbox",
|
|
4299
|
+
checked: true,
|
|
4300
|
+
class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
4301
|
+
}
|
|
4302
|
+
),
|
|
4303
|
+
/* @__PURE__ */ jsx(
|
|
4304
|
+
"label",
|
|
4305
|
+
{
|
|
4306
|
+
for: "followRedirects",
|
|
4307
|
+
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
4308
|
+
children: "Follow Redirects"
|
|
4309
|
+
}
|
|
4310
|
+
)
|
|
4311
|
+
] }),
|
|
4312
|
+
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
4313
|
+
/* @__PURE__ */ jsx(
|
|
4314
|
+
"input",
|
|
4315
|
+
{
|
|
4316
|
+
id: "ignoreErrors",
|
|
4317
|
+
name: "ignoreErrors",
|
|
4318
|
+
type: "checkbox",
|
|
4319
|
+
checked: true,
|
|
4320
|
+
class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
4321
|
+
}
|
|
4322
|
+
),
|
|
3403
4323
|
/* @__PURE__ */ jsx(
|
|
3404
|
-
"
|
|
4324
|
+
"label",
|
|
3405
4325
|
{
|
|
3406
|
-
|
|
3407
|
-
class: "
|
|
3408
|
-
|
|
3409
|
-
children: "+ Add Header"
|
|
4326
|
+
for: "ignoreErrors",
|
|
4327
|
+
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
4328
|
+
children: "Ignore Errors During Scraping"
|
|
3410
4329
|
}
|
|
3411
4330
|
)
|
|
3412
4331
|
] })
|
|
3413
|
-
] }),
|
|
3414
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3415
|
-
/* @__PURE__ */ jsx(
|
|
3416
|
-
"input",
|
|
3417
|
-
{
|
|
3418
|
-
id: "followRedirects",
|
|
3419
|
-
name: "followRedirects",
|
|
3420
|
-
type: "checkbox",
|
|
3421
|
-
checked: true,
|
|
3422
|
-
class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
3423
|
-
}
|
|
3424
|
-
),
|
|
3425
|
-
/* @__PURE__ */ jsx(
|
|
3426
|
-
"label",
|
|
3427
|
-
{
|
|
3428
|
-
for: "followRedirects",
|
|
3429
|
-
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
3430
|
-
children: "Follow Redirects"
|
|
3431
|
-
}
|
|
3432
|
-
)
|
|
3433
|
-
] }),
|
|
3434
|
-
/* @__PURE__ */ jsxs("div", { class: "flex items-center", children: [
|
|
3435
|
-
/* @__PURE__ */ jsx(
|
|
3436
|
-
"input",
|
|
3437
|
-
{
|
|
3438
|
-
id: "ignoreErrors",
|
|
3439
|
-
name: "ignoreErrors",
|
|
3440
|
-
type: "checkbox",
|
|
3441
|
-
checked: true,
|
|
3442
|
-
class: "h-4 w-4 text-indigo-600 focus:ring-indigo-500 border-gray-300 dark:border-gray-600 rounded bg-white dark:bg-gray-700"
|
|
3443
|
-
}
|
|
3444
|
-
),
|
|
3445
|
-
/* @__PURE__ */ jsx(
|
|
3446
|
-
"label",
|
|
3447
|
-
{
|
|
3448
|
-
for: "ignoreErrors",
|
|
3449
|
-
class: "ml-1 block text-sm text-gray-900 dark:text-gray-300",
|
|
3450
|
-
children: "Ignore Errors During Scraping"
|
|
3451
|
-
}
|
|
3452
|
-
)
|
|
3453
4332
|
] })
|
|
3454
|
-
] })
|
|
3455
|
-
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
}
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
const ScrapeForm = () => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, {}) });
|
|
4333
|
+
] }),
|
|
4334
|
+
/* @__PURE__ */ jsx("div", { children: /* @__PURE__ */ jsx(
|
|
4335
|
+
"button",
|
|
4336
|
+
{
|
|
4337
|
+
type: "submit",
|
|
4338
|
+
class: "w-full flex justify-center py-1.5 px-3 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500",
|
|
4339
|
+
children: "Queue Job"
|
|
4340
|
+
}
|
|
4341
|
+
) })
|
|
4342
|
+
]
|
|
4343
|
+
}
|
|
4344
|
+
),
|
|
4345
|
+
/* @__PURE__ */ jsx("div", { id: "job-response", class: "mt-2 text-sm" })
|
|
4346
|
+
] });
|
|
4347
|
+
};
|
|
4348
|
+
const ScrapeForm = ({ defaultExcludePatterns }) => /* @__PURE__ */ jsx("div", { id: "scrape-form-container", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns }) });
|
|
4349
|
+
const DEFAULT_FILE_EXCLUSIONS = [
|
|
4350
|
+
// CHANGELOG files (case variations)
|
|
4351
|
+
"**/CHANGELOG.md",
|
|
4352
|
+
"**/changelog.md",
|
|
4353
|
+
"**/CHANGELOG.mdx",
|
|
4354
|
+
"**/changelog.mdx",
|
|
4355
|
+
// LICENSE files (case variations)
|
|
4356
|
+
"**/LICENSE",
|
|
4357
|
+
"**/LICENSE.md",
|
|
4358
|
+
"**/license.md",
|
|
4359
|
+
// CODE_OF_CONDUCT files (case variations)
|
|
4360
|
+
"**/CODE_OF_CONDUCT.md",
|
|
4361
|
+
"**/code_of_conduct.md"
|
|
4362
|
+
];
|
|
4363
|
+
const DEFAULT_FOLDER_EXCLUSIONS = [
|
|
4364
|
+
// Archive and deprecated content (matches anywhere in path)
|
|
4365
|
+
"**/archive/**",
|
|
4366
|
+
"**/archived/**",
|
|
4367
|
+
"**/deprecated/**",
|
|
4368
|
+
"**/legacy/**",
|
|
4369
|
+
"**/old/**",
|
|
4370
|
+
"**/outdated/**",
|
|
4371
|
+
"**/previous/**",
|
|
4372
|
+
"**/superseded/**",
|
|
4373
|
+
// Specific paths that don't follow the general pattern
|
|
4374
|
+
"docs/old/**",
|
|
4375
|
+
// Internationalization folders - non-English locales
|
|
4376
|
+
"**/i18n/ar*/**",
|
|
4377
|
+
"**/i18n/de*/**",
|
|
4378
|
+
"**/i18n/es*/**",
|
|
4379
|
+
"**/i18n/fr*/**",
|
|
4380
|
+
"**/i18n/hi*/**",
|
|
4381
|
+
"**/i18n/it*/**",
|
|
4382
|
+
"**/i18n/ja*/**",
|
|
4383
|
+
"**/i18n/ko*/**",
|
|
4384
|
+
"**/i18n/nl*/**",
|
|
4385
|
+
"**/i18n/pl*/**",
|
|
4386
|
+
"**/i18n/pt*/**",
|
|
4387
|
+
"**/i18n/ru*/**",
|
|
4388
|
+
"**/i18n/sv*/**",
|
|
4389
|
+
"**/i18n/th*/**",
|
|
4390
|
+
"**/i18n/tr*/**",
|
|
4391
|
+
"**/i18n/vi*/**",
|
|
4392
|
+
"**/i18n/zh*/**",
|
|
4393
|
+
// Common locale folder patterns
|
|
4394
|
+
"**/zh-cn/**",
|
|
4395
|
+
"**/zh-hk/**",
|
|
4396
|
+
"**/zh-mo/**",
|
|
4397
|
+
"**/zh-sg/**",
|
|
4398
|
+
"**/zh-tw/**"
|
|
4399
|
+
];
|
|
4400
|
+
const DEFAULT_EXCLUSION_PATTERNS = [
|
|
4401
|
+
...DEFAULT_FILE_EXCLUSIONS,
|
|
4402
|
+
...DEFAULT_FOLDER_EXCLUSIONS
|
|
4403
|
+
];
|
|
4404
|
+
function getEffectiveExclusionPatterns(userPatterns) {
|
|
4405
|
+
if (userPatterns !== void 0) {
|
|
4406
|
+
return userPatterns;
|
|
4407
|
+
}
|
|
4408
|
+
return DEFAULT_EXCLUSION_PATTERNS;
|
|
4409
|
+
}
|
|
3470
4410
|
function registerNewJobRoutes(server, scrapeTool) {
|
|
3471
4411
|
server.get("/web/jobs/new", async () => {
|
|
3472
|
-
return /* @__PURE__ */ jsx(ScrapeForm, {});
|
|
4412
|
+
return /* @__PURE__ */ jsx(ScrapeForm, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS });
|
|
3473
4413
|
});
|
|
3474
4414
|
server.post(
|
|
3475
4415
|
"/web/jobs/scrape",
|
|
@@ -3540,7 +4480,7 @@ function registerNewJobRoutes(server, scrapeTool) {
|
|
|
3540
4480
|
] })
|
|
3541
4481
|
}
|
|
3542
4482
|
),
|
|
3543
|
-
/* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(ScrapeFormContent, {}) })
|
|
4483
|
+
/* @__PURE__ */ jsx("div", { id: "scrape-form-container", "hx-swap-oob": "innerHTML", children: /* @__PURE__ */ jsx(ScrapeFormContent, { defaultExcludePatterns: DEFAULT_EXCLUSION_PATTERNS }) })
|
|
3544
4484
|
] });
|
|
3545
4485
|
}
|
|
3546
4486
|
return /* @__PURE__ */ jsx(Alert, { type: "warning", message: "Job finished unexpectedly quickly." });
|
|
@@ -3959,16 +4899,59 @@ async function registerWorkerService(pipeline) {
|
|
|
3959
4899
|
pipeline.setCallbacks({
|
|
3960
4900
|
onJobProgress: async (job, progress) => {
|
|
3961
4901
|
logger.debug(
|
|
3962
|
-
|
|
4902
|
+
`Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
|
|
3963
4903
|
);
|
|
4904
|
+
analytics.track(TelemetryEvent.PIPELINE_JOB_PROGRESS, {
|
|
4905
|
+
jobId: job.id,
|
|
4906
|
+
// Job IDs are already anonymous
|
|
4907
|
+
library: job.library,
|
|
4908
|
+
pagesScraped: progress.pagesScraped,
|
|
4909
|
+
totalPages: progress.totalPages,
|
|
4910
|
+
totalDiscovered: progress.totalDiscovered,
|
|
4911
|
+
progressPercent: Math.round(progress.pagesScraped / progress.totalPages * 100),
|
|
4912
|
+
currentDepth: progress.depth,
|
|
4913
|
+
maxDepth: progress.maxDepth,
|
|
4914
|
+
discoveryRatio: Math.round(
|
|
4915
|
+
progress.totalDiscovered / progress.totalPages * 100
|
|
4916
|
+
),
|
|
4917
|
+
// How much we discovered vs limited total
|
|
4918
|
+
queue_efficiency: progress.totalPages > 0 ? Math.round(progress.pagesScraped / progress.totalPages * 100) : 0
|
|
4919
|
+
});
|
|
3964
4920
|
},
|
|
3965
4921
|
onJobStatusChange: async (job) => {
|
|
3966
|
-
logger.debug(
|
|
4922
|
+
logger.debug(`Job ${job.id} status changed to: ${job.status}`);
|
|
4923
|
+
const duration = job.startedAt ? Date.now() - job.startedAt.getTime() : null;
|
|
4924
|
+
const queueWaitTime = job.startedAt && job.createdAt ? job.startedAt.getTime() - job.createdAt.getTime() : null;
|
|
4925
|
+
analytics.track(TelemetryEvent.PIPELINE_JOB_COMPLETED, {
|
|
4926
|
+
jobId: job.id,
|
|
4927
|
+
// Job IDs are already anonymous
|
|
4928
|
+
library: job.library,
|
|
4929
|
+
status: job.status,
|
|
4930
|
+
duration_ms: duration,
|
|
4931
|
+
queue_wait_time_ms: queueWaitTime,
|
|
4932
|
+
pages_processed: job.progressPages || 0,
|
|
4933
|
+
max_pages_configured: job.progressMaxPages || 0,
|
|
4934
|
+
has_version: !!job.version,
|
|
4935
|
+
has_error: !!job.error,
|
|
4936
|
+
throughput_pages_per_second: duration && job.progressPages ? Math.round(job.progressPages / duration * 1e3) : 0
|
|
4937
|
+
});
|
|
3967
4938
|
},
|
|
3968
4939
|
onJobError: async (job, error, document) => {
|
|
3969
4940
|
logger.warn(
|
|
3970
4941
|
`⚠️ Job ${job.id} error ${document ? `on document ${document.metadata.url}` : ""}: ${error.message}`
|
|
3971
4942
|
);
|
|
4943
|
+
const errorInfo = sanitizeError(error);
|
|
4944
|
+
analytics.track(TelemetryEvent.ERROR_OCCURRED, {
|
|
4945
|
+
jobId: job.id,
|
|
4946
|
+
// Job IDs are already anonymous
|
|
4947
|
+
library: job.library,
|
|
4948
|
+
errorType: errorInfo.type,
|
|
4949
|
+
errorMessage: errorInfo.message,
|
|
4950
|
+
hasDocument: !!document,
|
|
4951
|
+
stage: document ? "document_processing" : "job_setup",
|
|
4952
|
+
hasStack: errorInfo.hasStack,
|
|
4953
|
+
pages_processed_before_error: job.progressPages || 0
|
|
4954
|
+
});
|
|
3972
4955
|
}
|
|
3973
4956
|
});
|
|
3974
4957
|
await pipeline.start();
|
|
@@ -3987,7 +4970,7 @@ function getProjectRoot() {
|
|
|
3987
4970
|
let currentDir = path.dirname(currentFilePath);
|
|
3988
4971
|
while (true) {
|
|
3989
4972
|
const packageJsonPath = path.join(currentDir, "package.json");
|
|
3990
|
-
if (fs
|
|
4973
|
+
if (fs.existsSync(packageJsonPath)) {
|
|
3991
4974
|
projectRoot = currentDir;
|
|
3992
4975
|
return projectRoot;
|
|
3993
4976
|
}
|
|
@@ -4010,6 +4993,7 @@ class AppServer {
|
|
|
4010
4993
|
}
|
|
4011
4994
|
server;
|
|
4012
4995
|
mcpServer = null;
|
|
4996
|
+
authManager = null;
|
|
4013
4997
|
config;
|
|
4014
4998
|
/**
|
|
4015
4999
|
* Validate the server configuration for invalid service combinations.
|
|
@@ -4040,15 +5024,48 @@ class AppServer {
|
|
|
4040
5024
|
*/
|
|
4041
5025
|
async start() {
|
|
4042
5026
|
this.validateConfig();
|
|
5027
|
+
if (this.config.telemetry !== false && shouldEnableTelemetry()) {
|
|
5028
|
+
try {
|
|
5029
|
+
telemetryService.startSession({
|
|
5030
|
+
sessionId: crypto.randomUUID(),
|
|
5031
|
+
interface: "web",
|
|
5032
|
+
startTime: /* @__PURE__ */ new Date(),
|
|
5033
|
+
version: process.env.npm_package_version || "unknown",
|
|
5034
|
+
platform: process.platform,
|
|
5035
|
+
servicesEnabled: this.getActiveServicesList(),
|
|
5036
|
+
authEnabled: Boolean(this.config.auth),
|
|
5037
|
+
readOnly: Boolean(this.config.readOnly)
|
|
5038
|
+
});
|
|
5039
|
+
} catch (error) {
|
|
5040
|
+
logger.debug(`Failed to initialize telemetry: ${error}`);
|
|
5041
|
+
}
|
|
5042
|
+
}
|
|
4043
5043
|
await this.setupServer();
|
|
4044
5044
|
try {
|
|
5045
|
+
const startupStartTime = performance.now();
|
|
4045
5046
|
const address = await this.server.listen({
|
|
4046
5047
|
port: this.config.port,
|
|
4047
5048
|
host: "0.0.0.0"
|
|
4048
5049
|
});
|
|
5050
|
+
const startupDuration = performance.now() - startupStartTime;
|
|
5051
|
+
if (analytics.isEnabled()) {
|
|
5052
|
+
analytics.track(TelemetryEvent.APP_STARTED, {
|
|
5053
|
+
startup_success: true,
|
|
5054
|
+
startup_duration_ms: Math.round(startupDuration),
|
|
5055
|
+
listen_address: address,
|
|
5056
|
+
active_services: this.getActiveServicesList()
|
|
5057
|
+
});
|
|
5058
|
+
}
|
|
4049
5059
|
this.logStartupInfo(address);
|
|
4050
5060
|
return this.server;
|
|
4051
5061
|
} catch (error) {
|
|
5062
|
+
if (analytics.isEnabled()) {
|
|
5063
|
+
analytics.track(TelemetryEvent.APP_STARTED, {
|
|
5064
|
+
startup_success: false,
|
|
5065
|
+
error_type: error instanceof Error ? error.constructor.name : "UnknownError",
|
|
5066
|
+
error_message: error instanceof Error ? error.message : String(error)
|
|
5067
|
+
});
|
|
5068
|
+
}
|
|
4052
5069
|
logger.error(`❌ Failed to start AppServer: ${error}`);
|
|
4053
5070
|
await this.server.close();
|
|
4054
5071
|
throw error;
|
|
@@ -4059,24 +5076,121 @@ class AppServer {
|
|
|
4059
5076
|
*/
|
|
4060
5077
|
async stop() {
|
|
4061
5078
|
try {
|
|
5079
|
+
if (analytics.isEnabled()) {
|
|
5080
|
+
analytics.track(TelemetryEvent.APP_SHUTDOWN, {
|
|
5081
|
+
graceful: true
|
|
5082
|
+
});
|
|
5083
|
+
}
|
|
4062
5084
|
if (this.config.enableWorker) {
|
|
4063
5085
|
await stopWorkerService(this.pipeline);
|
|
4064
5086
|
}
|
|
4065
5087
|
if (this.mcpServer) {
|
|
4066
5088
|
await cleanupMcpService(this.mcpServer);
|
|
4067
5089
|
}
|
|
5090
|
+
telemetryService.endSession();
|
|
5091
|
+
await telemetryService.shutdown();
|
|
4068
5092
|
await this.server.close();
|
|
4069
5093
|
logger.info("🛑 AppServer stopped");
|
|
4070
5094
|
} catch (error) {
|
|
4071
5095
|
logger.error(`❌ Failed to stop AppServer gracefully: ${error}`);
|
|
5096
|
+
if (analytics.isEnabled()) {
|
|
5097
|
+
analytics.track(TelemetryEvent.APP_SHUTDOWN, {
|
|
5098
|
+
graceful: false,
|
|
5099
|
+
error: error instanceof Error ? error.constructor.name : "UnknownError"
|
|
5100
|
+
});
|
|
5101
|
+
await telemetryService.shutdown();
|
|
5102
|
+
}
|
|
4072
5103
|
throw error;
|
|
4073
5104
|
}
|
|
4074
5105
|
}
|
|
5106
|
+
/**
|
|
5107
|
+
* Setup global error handling for telemetry
|
|
5108
|
+
*/
|
|
5109
|
+
setupErrorHandling() {
|
|
5110
|
+
if (!process.listenerCount("unhandledRejection")) {
|
|
5111
|
+
process.on("unhandledRejection", (reason) => {
|
|
5112
|
+
logger.error(`Unhandled Promise Rejection: ${reason}`);
|
|
5113
|
+
if (analytics.isEnabled()) {
|
|
5114
|
+
analytics.track(TelemetryEvent.ERROR_OCCURRED, {
|
|
5115
|
+
error_type: "UnhandledPromiseRejection",
|
|
5116
|
+
error_category: "system",
|
|
5117
|
+
component: "AppServer",
|
|
5118
|
+
severity: "critical",
|
|
5119
|
+
context: "process_unhandled_rejection"
|
|
5120
|
+
});
|
|
5121
|
+
}
|
|
5122
|
+
});
|
|
5123
|
+
}
|
|
5124
|
+
if (!process.listenerCount("uncaughtException")) {
|
|
5125
|
+
process.on("uncaughtException", (error) => {
|
|
5126
|
+
logger.error(`Uncaught Exception: ${error.message}`);
|
|
5127
|
+
if (analytics.isEnabled()) {
|
|
5128
|
+
analytics.track(TelemetryEvent.ERROR_OCCURRED, {
|
|
5129
|
+
error_type: error.constructor.name,
|
|
5130
|
+
error_category: "system",
|
|
5131
|
+
component: "AppServer",
|
|
5132
|
+
severity: "critical",
|
|
5133
|
+
context: "process_uncaught_exception"
|
|
5134
|
+
});
|
|
5135
|
+
}
|
|
5136
|
+
});
|
|
5137
|
+
}
|
|
5138
|
+
if (typeof this.server.setErrorHandler === "function") {
|
|
5139
|
+
this.server.setErrorHandler(async (error, request, reply) => {
|
|
5140
|
+
if (analytics.isEnabled()) {
|
|
5141
|
+
analytics.track(TelemetryEvent.ERROR_OCCURRED, {
|
|
5142
|
+
error_type: error.constructor.name,
|
|
5143
|
+
error_category: "http",
|
|
5144
|
+
component: "FastifyServer",
|
|
5145
|
+
severity: "high",
|
|
5146
|
+
status_code: error.statusCode || 500,
|
|
5147
|
+
method: request.method,
|
|
5148
|
+
route: request.routeOptions?.url || request.url,
|
|
5149
|
+
context: "http_request_error"
|
|
5150
|
+
});
|
|
5151
|
+
}
|
|
5152
|
+
logger.error(`HTTP Error on ${request.method} ${request.url}: ${error.message}`);
|
|
5153
|
+
const statusCode = error.statusCode || 500;
|
|
5154
|
+
reply.status(statusCode).send({
|
|
5155
|
+
error: "Internal Server Error",
|
|
5156
|
+
statusCode,
|
|
5157
|
+
message: statusCode < 500 ? error.message : "An unexpected error occurred"
|
|
5158
|
+
});
|
|
5159
|
+
});
|
|
5160
|
+
}
|
|
5161
|
+
}
|
|
5162
|
+
/**
|
|
5163
|
+
* Get list of currently active services for telemetry
|
|
5164
|
+
*/
|
|
5165
|
+
getActiveServicesList() {
|
|
5166
|
+
const services = [];
|
|
5167
|
+
if (this.config.enableMcpServer) services.push("mcp");
|
|
5168
|
+
if (this.config.enableWebInterface) services.push("web");
|
|
5169
|
+
if (this.config.enableApiServer) services.push("api");
|
|
5170
|
+
if (this.config.enableWorker) services.push("worker");
|
|
5171
|
+
return services;
|
|
5172
|
+
}
|
|
4075
5173
|
/**
|
|
4076
5174
|
* Setup the server with plugins and conditionally enabled services.
|
|
4077
5175
|
*/
|
|
4078
5176
|
async setupServer() {
|
|
5177
|
+
this.setupErrorHandling();
|
|
5178
|
+
if (this.config.auth?.enabled) {
|
|
5179
|
+
await this.initializeAuth();
|
|
5180
|
+
}
|
|
4079
5181
|
await this.server.register(formBody);
|
|
5182
|
+
if (this.config.auth?.enabled) {
|
|
5183
|
+
this.server.addHook("onRequest", async (request) => {
|
|
5184
|
+
if (request.url.includes("/oauth") || request.url.includes("/auth") || request.url.includes("/register")) {
|
|
5185
|
+
logger.debug(
|
|
5186
|
+
`${request.method} ${request.url} - Headers: ${JSON.stringify(request.headers)}`
|
|
5187
|
+
);
|
|
5188
|
+
}
|
|
5189
|
+
});
|
|
5190
|
+
}
|
|
5191
|
+
if (this.config.auth?.enabled && this.authManager) {
|
|
5192
|
+
await this.setupAuthMetadataEndpoint();
|
|
5193
|
+
}
|
|
4080
5194
|
if (this.config.enableWebInterface) {
|
|
4081
5195
|
await this.enableWebInterface();
|
|
4082
5196
|
}
|
|
@@ -4107,7 +5221,9 @@ class AppServer {
|
|
|
4107
5221
|
this.mcpServer = await registerMcpService(
|
|
4108
5222
|
this.server,
|
|
4109
5223
|
this.docService,
|
|
4110
|
-
this.pipeline
|
|
5224
|
+
this.pipeline,
|
|
5225
|
+
this.config.readOnly,
|
|
5226
|
+
this.authManager || void 0
|
|
4111
5227
|
);
|
|
4112
5228
|
logger.debug("MCP server service enabled");
|
|
4113
5229
|
}
|
|
@@ -4135,6 +5251,28 @@ class AppServer {
|
|
|
4135
5251
|
index: false
|
|
4136
5252
|
});
|
|
4137
5253
|
}
|
|
5254
|
+
/**
|
|
5255
|
+
* Initialize OAuth2/OIDC authentication manager.
|
|
5256
|
+
*/
|
|
5257
|
+
async initializeAuth() {
|
|
5258
|
+
if (!this.config.auth) {
|
|
5259
|
+
return;
|
|
5260
|
+
}
|
|
5261
|
+
this.authManager = new ProxyAuthManager(this.config.auth);
|
|
5262
|
+
await this.authManager.initialize();
|
|
5263
|
+
logger.debug("Proxy auth manager initialized");
|
|
5264
|
+
}
|
|
5265
|
+
/**
|
|
5266
|
+
* Setup OAuth2 endpoints using ProxyAuthManager.
|
|
5267
|
+
*/
|
|
5268
|
+
async setupAuthMetadataEndpoint() {
|
|
5269
|
+
if (!this.authManager) {
|
|
5270
|
+
return;
|
|
5271
|
+
}
|
|
5272
|
+
const baseUrl = new URL(`http://localhost:${this.config.port}`);
|
|
5273
|
+
this.authManager.registerRoutes(this.server, baseUrl);
|
|
5274
|
+
logger.debug("OAuth2 proxy endpoints registered");
|
|
5275
|
+
}
|
|
4138
5276
|
/**
|
|
4139
5277
|
* Log startup information showing which services are enabled.
|
|
4140
5278
|
*/
|
|
@@ -4165,9 +5303,9 @@ async function startAppServer(docService, pipeline, config) {
|
|
|
4165
5303
|
await appServer.start();
|
|
4166
5304
|
return appServer;
|
|
4167
5305
|
}
|
|
4168
|
-
async function startStdioServer(tools) {
|
|
5306
|
+
async function startStdioServer(tools, readOnly = false) {
|
|
4169
5307
|
setLogLevel(LogLevel.ERROR);
|
|
4170
|
-
const server = createMcpServerInstance(tools);
|
|
5308
|
+
const server = createMcpServerInstance(tools, readOnly);
|
|
4171
5309
|
const transport = new StdioServerTransport();
|
|
4172
5310
|
await server.connect(transport);
|
|
4173
5311
|
logger.info("🤖 MCP server listening on stdio");
|
|
@@ -4226,10 +5364,10 @@ async function applyMigrations(db) {
|
|
|
4226
5364
|
logger.debug("Checking database migrations...");
|
|
4227
5365
|
ensureMigrationsTable(db);
|
|
4228
5366
|
const appliedMigrations = getAppliedMigrations(db);
|
|
4229
|
-
if (!fs
|
|
5367
|
+
if (!fs.existsSync(MIGRATIONS_DIR)) {
|
|
4230
5368
|
throw new StoreError("Migrations directory not found");
|
|
4231
5369
|
}
|
|
4232
|
-
const migrationFiles = fs
|
|
5370
|
+
const migrationFiles = fs.readdirSync(MIGRATIONS_DIR).filter((file) => file.endsWith(".sql")).sort();
|
|
4233
5371
|
const pendingMigrations = migrationFiles.filter(
|
|
4234
5372
|
(filename) => !appliedMigrations.has(filename)
|
|
4235
5373
|
);
|
|
@@ -4240,12 +5378,12 @@ async function applyMigrations(db) {
|
|
|
4240
5378
|
for (const filename of pendingMigrations) {
|
|
4241
5379
|
logger.debug(`Applying migration: ${filename}`);
|
|
4242
5380
|
const filePath = path.join(MIGRATIONS_DIR, filename);
|
|
4243
|
-
const sql = fs
|
|
5381
|
+
const sql = fs.readFileSync(filePath, "utf8");
|
|
4244
5382
|
try {
|
|
4245
5383
|
db.exec(sql);
|
|
4246
5384
|
const insertStmt = db.prepare(`INSERT INTO ${MIGRATIONS_TABLE} (id) VALUES (?)`);
|
|
4247
5385
|
insertStmt.run(filename);
|
|
4248
|
-
logger.debug(
|
|
5386
|
+
logger.debug(`Applied migration: ${filename}`);
|
|
4249
5387
|
appliedCount++;
|
|
4250
5388
|
} catch (error) {
|
|
4251
5389
|
logger.error(`❌ Failed to apply migration: ${filename} - ${error}`);
|
|
@@ -4319,12 +5457,12 @@ async function createDocumentManagement(options = {}) {
|
|
|
4319
5457
|
await client.initialize();
|
|
4320
5458
|
return client;
|
|
4321
5459
|
}
|
|
4322
|
-
const service = new (await import("./DocumentManagementService-
|
|
5460
|
+
const service = new (await import("./DocumentManagementService-C1xAzouZ.js")).DocumentManagementService();
|
|
4323
5461
|
await service.initialize();
|
|
4324
5462
|
return service;
|
|
4325
5463
|
}
|
|
4326
5464
|
async function createLocalDocumentManagement() {
|
|
4327
|
-
const service = new (await import("./DocumentManagementService-
|
|
5465
|
+
const service = new (await import("./DocumentManagementService-C1xAzouZ.js")).DocumentManagementService();
|
|
4328
5466
|
await service.initialize();
|
|
4329
5467
|
return service;
|
|
4330
5468
|
}
|
|
@@ -4494,6 +5632,16 @@ function validateUrl(url) {
|
|
|
4494
5632
|
throw new InvalidUrlError(url, error instanceof Error ? error : void 0);
|
|
4495
5633
|
}
|
|
4496
5634
|
}
|
|
5635
|
+
function extractPrimaryDomain(hostname) {
|
|
5636
|
+
if (/^\d+\.\d+\.\d+\.\d+$/.test(hostname) || /^[0-9a-fA-F:]+$/.test(hostname)) {
|
|
5637
|
+
return hostname;
|
|
5638
|
+
}
|
|
5639
|
+
if (!hostname.includes(".")) {
|
|
5640
|
+
return hostname;
|
|
5641
|
+
}
|
|
5642
|
+
const domain = psl.get(hostname.toLowerCase());
|
|
5643
|
+
return domain || hostname;
|
|
5644
|
+
}
|
|
4497
5645
|
function computeBaseDirectory(pathname) {
|
|
4498
5646
|
if (pathname === "") return "/";
|
|
4499
5647
|
if (pathname.endsWith("/")) return pathname;
|
|
@@ -4515,8 +5663,7 @@ function isInScope(baseUrl, targetUrl, scope) {
|
|
|
4515
5663
|
case "hostname":
|
|
4516
5664
|
return baseUrl.hostname === targetUrl.hostname;
|
|
4517
5665
|
case "domain": {
|
|
4518
|
-
|
|
4519
|
-
return getDomain(baseUrl.hostname) === getDomain(targetUrl.hostname);
|
|
5666
|
+
return extractPrimaryDomain(baseUrl.hostname) === extractPrimaryDomain(targetUrl.hostname);
|
|
4520
5667
|
}
|
|
4521
5668
|
default:
|
|
4522
5669
|
return false;
|
|
@@ -4563,7 +5710,8 @@ function shouldIncludeUrl(url, includePatterns, excludePatterns) {
|
|
|
4563
5710
|
}
|
|
4564
5711
|
}
|
|
4565
5712
|
const stripSlash = (patterns) => patterns?.map((p) => p.startsWith("/") ? p.slice(1) : p);
|
|
4566
|
-
|
|
5713
|
+
const effectiveExcludePatterns = getEffectiveExclusionPatterns(excludePatterns);
|
|
5714
|
+
if (matchesAnyPattern(normalizedPath, effectiveExcludePatterns) || basename && matchesAnyPattern(basename, stripSlash(effectiveExcludePatterns)))
|
|
4567
5715
|
return false;
|
|
4568
5716
|
if (!includePatterns || includePatterns.length === 0) return true;
|
|
4569
5717
|
return matchesAnyPattern(normalizedPath, includePatterns) || (basename ? matchesAnyPattern(basename, stripSlash(includePatterns)) : false);
|
|
@@ -4889,9 +6037,9 @@ class LocalFileStrategy extends BaseScraperStrategy {
|
|
|
4889
6037
|
}
|
|
4890
6038
|
async processItem(item, options, _progressCallback, _signal) {
|
|
4891
6039
|
const filePath = decodeURIComponent(item.url.replace(/^file:\/\//, ""));
|
|
4892
|
-
const stats = await fs.stat(filePath);
|
|
6040
|
+
const stats = await fs$1.stat(filePath);
|
|
4893
6041
|
if (stats.isDirectory()) {
|
|
4894
|
-
const contents = await fs.readdir(filePath);
|
|
6042
|
+
const contents = await fs$1.readdir(filePath);
|
|
4895
6043
|
const links = contents.map((name) => `file://${path.join(filePath, name)}`).filter((url) => this.shouldProcessUrl(url, options));
|
|
4896
6044
|
return { links };
|
|
4897
6045
|
}
|
|
@@ -5722,11 +6870,11 @@ async function createPipelineWithCallbacks(docService, options = {}) {
|
|
|
5722
6870
|
pipeline.setCallbacks({
|
|
5723
6871
|
onJobProgress: async (job, progress) => {
|
|
5724
6872
|
logger.debug(
|
|
5725
|
-
|
|
6873
|
+
`Job ${job.id} progress: ${progress.pagesScraped}/${progress.totalPages} pages`
|
|
5726
6874
|
);
|
|
5727
6875
|
},
|
|
5728
6876
|
onJobStatusChange: async (job) => {
|
|
5729
|
-
logger.debug(
|
|
6877
|
+
logger.debug(`Job ${job.id} status changed to: ${job.status}`);
|
|
5730
6878
|
},
|
|
5731
6879
|
onJobError: async (job, error, document) => {
|
|
5732
6880
|
logger.warn(
|
|
@@ -5743,7 +6891,9 @@ function createAppServerConfig(options) {
|
|
|
5743
6891
|
enableApiServer: options.enableApiServer ?? false,
|
|
5744
6892
|
enableWorker: options.enableWorker ?? true,
|
|
5745
6893
|
port: options.port,
|
|
5746
|
-
externalWorkerUrl: options.externalWorkerUrl
|
|
6894
|
+
externalWorkerUrl: options.externalWorkerUrl,
|
|
6895
|
+
readOnly: options.readOnly ?? false,
|
|
6896
|
+
auth: options.auth
|
|
5747
6897
|
};
|
|
5748
6898
|
}
|
|
5749
6899
|
function parseHeaders(headerOptions) {
|
|
@@ -5764,8 +6914,84 @@ const CLI_DEFAULTS = {
|
|
|
5764
6914
|
PROTOCOL: DEFAULT_PROTOCOL,
|
|
5765
6915
|
HTTP_PORT: DEFAULT_HTTP_PORT,
|
|
5766
6916
|
WEB_PORT: DEFAULT_WEB_PORT,
|
|
5767
|
-
MAX_CONCURRENCY: DEFAULT_MAX_CONCURRENCY
|
|
6917
|
+
MAX_CONCURRENCY: DEFAULT_MAX_CONCURRENCY,
|
|
6918
|
+
TELEMETRY: true
|
|
5768
6919
|
};
|
|
6920
|
+
function parseAuthConfig(options) {
|
|
6921
|
+
const enabled = options.authEnabled ?? (process.env.DOCS_MCP_AUTH_ENABLED?.toLowerCase() === "true" || false);
|
|
6922
|
+
if (!enabled) {
|
|
6923
|
+
return void 0;
|
|
6924
|
+
}
|
|
6925
|
+
const issuerUrl = options.authIssuerUrl ?? process.env.DOCS_MCP_AUTH_ISSUER_URL;
|
|
6926
|
+
const audience = options.authAudience ?? process.env.DOCS_MCP_AUTH_AUDIENCE;
|
|
6927
|
+
return {
|
|
6928
|
+
enabled,
|
|
6929
|
+
issuerUrl,
|
|
6930
|
+
audience,
|
|
6931
|
+
scopes: ["openid", "profile"]
|
|
6932
|
+
// Default scopes for OAuth2/OIDC
|
|
6933
|
+
};
|
|
6934
|
+
}
|
|
6935
|
+
function validateAuthConfig(authConfig) {
|
|
6936
|
+
if (!authConfig.enabled) {
|
|
6937
|
+
return;
|
|
6938
|
+
}
|
|
6939
|
+
const errors = [];
|
|
6940
|
+
if (!authConfig.issuerUrl) {
|
|
6941
|
+
errors.push("--auth-issuer-url is required when auth is enabled");
|
|
6942
|
+
} else {
|
|
6943
|
+
try {
|
|
6944
|
+
const url = new URL(authConfig.issuerUrl);
|
|
6945
|
+
if (url.protocol !== "https:") {
|
|
6946
|
+
errors.push("Issuer URL must use HTTPS protocol");
|
|
6947
|
+
}
|
|
6948
|
+
} catch {
|
|
6949
|
+
errors.push("Issuer URL must be a valid URL");
|
|
6950
|
+
}
|
|
6951
|
+
}
|
|
6952
|
+
if (!authConfig.audience) {
|
|
6953
|
+
errors.push("--auth-audience is required when auth is enabled");
|
|
6954
|
+
} else {
|
|
6955
|
+
try {
|
|
6956
|
+
const url = new URL(authConfig.audience);
|
|
6957
|
+
if (url.protocol === "http:" && url.hostname !== "localhost") {
|
|
6958
|
+
logger.warn(
|
|
6959
|
+
"⚠️ Audience uses HTTP protocol - consider using HTTPS for production"
|
|
6960
|
+
);
|
|
6961
|
+
}
|
|
6962
|
+
if (url.hash) {
|
|
6963
|
+
errors.push("Audience must not contain URL fragments");
|
|
6964
|
+
}
|
|
6965
|
+
} catch {
|
|
6966
|
+
if (authConfig.audience.startsWith("urn:")) {
|
|
6967
|
+
const urnParts = authConfig.audience.split(":");
|
|
6968
|
+
if (urnParts.length < 3 || !urnParts[1] || !urnParts[2]) {
|
|
6969
|
+
errors.push("URN audience must follow format: urn:namespace:specific-string");
|
|
6970
|
+
}
|
|
6971
|
+
} else {
|
|
6972
|
+
errors.push(
|
|
6973
|
+
"Audience must be a valid absolute URL or URN (e.g., https://api.example.com or urn:company:service)"
|
|
6974
|
+
);
|
|
6975
|
+
}
|
|
6976
|
+
}
|
|
6977
|
+
}
|
|
6978
|
+
if (errors.length > 0) {
|
|
6979
|
+
throw new Error(`Auth configuration validation failed:
|
|
6980
|
+
${errors.join("\n")}`);
|
|
6981
|
+
}
|
|
6982
|
+
}
|
|
6983
|
+
function warnHttpUsage(authConfig, port) {
|
|
6984
|
+
if (!authConfig?.enabled) {
|
|
6985
|
+
return;
|
|
6986
|
+
}
|
|
6987
|
+
const isLocalhost = process.env.NODE_ENV !== "production" || port === 6280 || // default dev port
|
|
6988
|
+
process.env.HOSTNAME?.includes("localhost");
|
|
6989
|
+
if (!isLocalhost) {
|
|
6990
|
+
logger.warn(
|
|
6991
|
+
"⚠️ Authentication is enabled but running over HTTP in production. Consider using HTTPS for security."
|
|
6992
|
+
);
|
|
6993
|
+
}
|
|
6994
|
+
}
|
|
5769
6995
|
function createDefaultAction(program) {
|
|
5770
6996
|
return program.addOption(
|
|
5771
6997
|
new Option("--protocol <protocol>", "Protocol for MCP server").choices(["auto", "stdio", "http"]).default("auto")
|
|
@@ -5777,13 +7003,33 @@ function createDefaultAction(program) {
|
|
|
5777
7003
|
}
|
|
5778
7004
|
return String(n);
|
|
5779
7005
|
}).default(CLI_DEFAULTS.HTTP_PORT.toString())
|
|
5780
|
-
).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").
|
|
7006
|
+
).option("--resume", "Resume interrupted jobs on startup", false).option("--no-resume", "Do not resume jobs on startup").option(
|
|
7007
|
+
"--read-only",
|
|
7008
|
+
"Run in read-only mode (only expose read tools, disable write/job tools)",
|
|
7009
|
+
false
|
|
7010
|
+
).option(
|
|
7011
|
+
"--auth-enabled",
|
|
7012
|
+
"Enable OAuth2/OIDC authentication for MCP endpoints",
|
|
7013
|
+
false
|
|
7014
|
+
).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
|
|
7015
|
+
"--auth-audience <id>",
|
|
7016
|
+
"JWT audience claim (identifies this protected resource)"
|
|
7017
|
+
).action(
|
|
5781
7018
|
async (options, command) => {
|
|
5782
7019
|
const globalOptions = command.opts();
|
|
5783
7020
|
const resolvedProtocol = resolveProtocol(options.protocol);
|
|
5784
7021
|
setupLogging(globalOptions, resolvedProtocol);
|
|
5785
7022
|
logger.debug("No subcommand specified, starting unified server by default...");
|
|
5786
7023
|
const port = validatePort(options.port);
|
|
7024
|
+
const authConfig = parseAuthConfig({
|
|
7025
|
+
authEnabled: options.authEnabled,
|
|
7026
|
+
authIssuerUrl: options.authIssuerUrl,
|
|
7027
|
+
authAudience: options.authAudience
|
|
7028
|
+
});
|
|
7029
|
+
if (authConfig) {
|
|
7030
|
+
validateAuthConfig(authConfig);
|
|
7031
|
+
warnHttpUsage(authConfig, port);
|
|
7032
|
+
}
|
|
5787
7033
|
ensurePlaywrightBrowsersInstalled();
|
|
5788
7034
|
const docService = await createLocalDocumentManagement();
|
|
5789
7035
|
const pipelineOptions = {
|
|
@@ -5793,14 +7039,14 @@ function createDefaultAction(program) {
|
|
|
5793
7039
|
};
|
|
5794
7040
|
const pipeline = await createPipelineWithCallbacks(docService, pipelineOptions);
|
|
5795
7041
|
if (resolvedProtocol === "stdio") {
|
|
5796
|
-
logger.debug(
|
|
7042
|
+
logger.debug(`Auto-detected stdio protocol (no TTY)`);
|
|
5797
7043
|
await pipeline.start();
|
|
5798
7044
|
const mcpTools = await initializeTools(docService, pipeline);
|
|
5799
|
-
await startStdioServer(mcpTools);
|
|
7045
|
+
await startStdioServer(mcpTools, options.readOnly);
|
|
5800
7046
|
await new Promise(() => {
|
|
5801
7047
|
});
|
|
5802
7048
|
} else {
|
|
5803
|
-
logger.debug(
|
|
7049
|
+
logger.debug(`Auto-detected http protocol (TTY available)`);
|
|
5804
7050
|
const config = createAppServerConfig({
|
|
5805
7051
|
enableWebInterface: true,
|
|
5806
7052
|
// Enable web interface in http mode
|
|
@@ -5810,7 +7056,9 @@ function createDefaultAction(program) {
|
|
|
5810
7056
|
// Enable API (tRPC) in http mode
|
|
5811
7057
|
enableWorker: true,
|
|
5812
7058
|
// Always enable in-process worker for unified server
|
|
5813
|
-
port
|
|
7059
|
+
port,
|
|
7060
|
+
readOnly: options.readOnly,
|
|
7061
|
+
auth: authConfig
|
|
5814
7062
|
});
|
|
5815
7063
|
await startAppServer(docService, pipeline, config);
|
|
5816
7064
|
await new Promise(() => {
|
|
@@ -5824,12 +7072,24 @@ async function fetchUrlAction(url, options, command) {
|
|
|
5824
7072
|
setupLogging(globalOptions);
|
|
5825
7073
|
const headers = parseHeaders(options.header);
|
|
5826
7074
|
const fetchUrlTool = new FetchUrlTool(new HttpFetcher(), new FileFetcher());
|
|
5827
|
-
const content = await
|
|
5828
|
-
|
|
5829
|
-
|
|
5830
|
-
|
|
5831
|
-
|
|
5832
|
-
|
|
7075
|
+
const content = await trackTool(
|
|
7076
|
+
"fetch_url",
|
|
7077
|
+
() => fetchUrlTool.execute({
|
|
7078
|
+
url,
|
|
7079
|
+
followRedirects: options.followRedirects,
|
|
7080
|
+
scrapeMode: options.scrapeMode,
|
|
7081
|
+
headers: Object.keys(headers).length > 0 ? headers : void 0
|
|
7082
|
+
}),
|
|
7083
|
+
(content2) => ({
|
|
7084
|
+
url_protocol: extractProtocol(url),
|
|
7085
|
+
// Safe: only protocol, not full URL
|
|
7086
|
+
follow_redirects: options.followRedirects,
|
|
7087
|
+
scrape_mode: options.scrapeMode,
|
|
7088
|
+
has_custom_headers: Object.keys(headers).length > 0,
|
|
7089
|
+
content_length: content2.length,
|
|
7090
|
+
cli_flags: extractCliFlags(process.argv)
|
|
7091
|
+
})
|
|
7092
|
+
);
|
|
5833
7093
|
console.log(content);
|
|
5834
7094
|
}
|
|
5835
7095
|
function createFetchUrlCommand(program) {
|
|
@@ -5864,10 +7124,22 @@ async function findVersionAction(library, options, command) {
|
|
|
5864
7124
|
const docService = await createDocumentManagement({ serverUrl });
|
|
5865
7125
|
try {
|
|
5866
7126
|
const findVersionTool = new FindVersionTool(docService);
|
|
5867
|
-
const versionInfo = await
|
|
5868
|
-
|
|
5869
|
-
|
|
5870
|
-
|
|
7127
|
+
const versionInfo = await trackTool(
|
|
7128
|
+
"find_version",
|
|
7129
|
+
() => findVersionTool.execute({
|
|
7130
|
+
library,
|
|
7131
|
+
targetVersion: options.version
|
|
7132
|
+
}),
|
|
7133
|
+
(versionInfo2) => ({
|
|
7134
|
+
library,
|
|
7135
|
+
// Safe: library names are public
|
|
7136
|
+
has_target_version: !!options.version,
|
|
7137
|
+
result_type: typeof versionInfo2,
|
|
7138
|
+
// 'string'
|
|
7139
|
+
using_remote_server: !!serverUrl,
|
|
7140
|
+
cli_flags: extractCliFlags(process.argv)
|
|
7141
|
+
})
|
|
7142
|
+
);
|
|
5871
7143
|
if (!versionInfo) throw new Error("Failed to get version information");
|
|
5872
7144
|
console.log(versionInfo);
|
|
5873
7145
|
} finally {
|
|
@@ -5887,7 +7159,15 @@ async function listAction(options, command) {
|
|
|
5887
7159
|
const docService = await createDocumentManagement({ serverUrl });
|
|
5888
7160
|
try {
|
|
5889
7161
|
const listLibrariesTool = new ListLibrariesTool(docService);
|
|
5890
|
-
const result = await
|
|
7162
|
+
const result = await trackTool(
|
|
7163
|
+
"list_libraries",
|
|
7164
|
+
() => listLibrariesTool.execute(),
|
|
7165
|
+
(result2) => ({
|
|
7166
|
+
library_count: result2.libraries.length,
|
|
7167
|
+
using_remote_server: !!serverUrl,
|
|
7168
|
+
cli_flags: extractCliFlags(process.argv)
|
|
7169
|
+
})
|
|
7170
|
+
);
|
|
5891
7171
|
console.log(formatOutput(result.libraries));
|
|
5892
7172
|
} finally {
|
|
5893
7173
|
await docService.shutdown();
|
|
@@ -5913,6 +7193,17 @@ function createMcpCommand(program) {
|
|
|
5913
7193
|
).option(
|
|
5914
7194
|
"--server-url <url>",
|
|
5915
7195
|
"URL of external pipeline worker RPC (e.g., http://localhost:6280/api)"
|
|
7196
|
+
).option(
|
|
7197
|
+
"--read-only",
|
|
7198
|
+
"Run in read-only mode (only expose read tools, disable write/job tools)",
|
|
7199
|
+
false
|
|
7200
|
+
).option(
|
|
7201
|
+
"--auth-enabled",
|
|
7202
|
+
"Enable OAuth2/OIDC authentication for MCP endpoints",
|
|
7203
|
+
false
|
|
7204
|
+
).option("--auth-issuer-url <url>", "Issuer/discovery URL for OAuth2/OIDC provider").option(
|
|
7205
|
+
"--auth-audience <id>",
|
|
7206
|
+
"JWT audience claim (identifies this protected resource)"
|
|
5916
7207
|
).action(
|
|
5917
7208
|
async (cmdOptions, command) => {
|
|
5918
7209
|
const globalOptions = command.parent?.opts() || {};
|
|
@@ -5920,6 +7211,14 @@ function createMcpCommand(program) {
|
|
|
5920
7211
|
const serverUrl = cmdOptions.serverUrl;
|
|
5921
7212
|
const resolvedProtocol = resolveProtocol(cmdOptions.protocol);
|
|
5922
7213
|
setupLogging(globalOptions, resolvedProtocol);
|
|
7214
|
+
const authConfig = parseAuthConfig({
|
|
7215
|
+
authEnabled: cmdOptions.authEnabled,
|
|
7216
|
+
authIssuerUrl: cmdOptions.authIssuerUrl,
|
|
7217
|
+
authAudience: cmdOptions.authAudience
|
|
7218
|
+
});
|
|
7219
|
+
if (authConfig) {
|
|
7220
|
+
validateAuthConfig(authConfig);
|
|
7221
|
+
}
|
|
5923
7222
|
try {
|
|
5924
7223
|
const docService = await createDocumentManagement({
|
|
5925
7224
|
serverUrl
|
|
@@ -5935,15 +7234,15 @@ function createMcpCommand(program) {
|
|
|
5935
7234
|
pipelineOptions
|
|
5936
7235
|
);
|
|
5937
7236
|
if (resolvedProtocol === "stdio") {
|
|
5938
|
-
logger.debug(
|
|
7237
|
+
logger.debug(`Auto-detected stdio protocol (no TTY)`);
|
|
5939
7238
|
logger.info("🚀 Starting MCP server (stdio mode)");
|
|
5940
7239
|
await pipeline.start();
|
|
5941
7240
|
const mcpTools = await initializeTools(docService, pipeline);
|
|
5942
|
-
await startStdioServer(mcpTools);
|
|
7241
|
+
await startStdioServer(mcpTools, cmdOptions.readOnly);
|
|
5943
7242
|
await new Promise(() => {
|
|
5944
7243
|
});
|
|
5945
7244
|
} else {
|
|
5946
|
-
logger.debug(
|
|
7245
|
+
logger.debug(`Auto-detected http protocol (TTY available)`);
|
|
5947
7246
|
logger.info("🚀 Starting MCP server (http mode)");
|
|
5948
7247
|
const config = createAppServerConfig({
|
|
5949
7248
|
enableWebInterface: false,
|
|
@@ -5953,7 +7252,9 @@ function createMcpCommand(program) {
|
|
|
5953
7252
|
// Never enable API in mcp command
|
|
5954
7253
|
enableWorker: !serverUrl,
|
|
5955
7254
|
port,
|
|
5956
|
-
externalWorkerUrl: serverUrl
|
|
7255
|
+
externalWorkerUrl: serverUrl,
|
|
7256
|
+
readOnly: cmdOptions.readOnly,
|
|
7257
|
+
auth: authConfig
|
|
5957
7258
|
});
|
|
5958
7259
|
await startAppServer(docService, pipeline, config);
|
|
5959
7260
|
await new Promise(() => {
|
|
@@ -5973,13 +7274,21 @@ async function removeAction(library, options, command) {
|
|
|
5973
7274
|
const docService = await createDocumentManagement({ serverUrl });
|
|
5974
7275
|
const { version: version2 } = options;
|
|
5975
7276
|
try {
|
|
5976
|
-
await
|
|
5977
|
-
|
|
5978
|
-
|
|
7277
|
+
await trackTool(
|
|
7278
|
+
"remove_documents",
|
|
7279
|
+
() => docService.removeAllDocuments(library, version2),
|
|
7280
|
+
() => ({
|
|
7281
|
+
library,
|
|
7282
|
+
// Safe: library names are public
|
|
7283
|
+
has_version: !!version2,
|
|
7284
|
+
using_remote_server: !!serverUrl,
|
|
7285
|
+
cli_flags: extractCliFlags(process.argv)
|
|
7286
|
+
})
|
|
5979
7287
|
);
|
|
7288
|
+
console.log(`✅ Successfully removed ${library}${version2 ? `@${version2}` : ""}.`);
|
|
5980
7289
|
} catch (error) {
|
|
5981
7290
|
console.error(
|
|
5982
|
-
`❌ Failed to remove
|
|
7291
|
+
`❌ Failed to remove ${library}${version2 ? `@${version2}` : ""}:`,
|
|
5983
7292
|
error instanceof Error ? error.message : String(error)
|
|
5984
7293
|
);
|
|
5985
7294
|
throw error;
|
|
@@ -6015,23 +7324,48 @@ async function scrapeAction(library, url, options, command) {
|
|
|
6015
7324
|
await pipeline.start();
|
|
6016
7325
|
const scrapeTool = new ScrapeTool(pipeline);
|
|
6017
7326
|
const headers = parseHeaders(options.header);
|
|
6018
|
-
const result = await
|
|
6019
|
-
|
|
6020
|
-
|
|
6021
|
-
|
|
6022
|
-
|
|
6023
|
-
|
|
6024
|
-
|
|
6025
|
-
|
|
6026
|
-
|
|
7327
|
+
const result = await trackTool(
|
|
7328
|
+
"scrape_docs",
|
|
7329
|
+
() => scrapeTool.execute({
|
|
7330
|
+
url,
|
|
7331
|
+
library,
|
|
7332
|
+
version: options.version,
|
|
7333
|
+
options: {
|
|
7334
|
+
maxPages: Number.parseInt(options.maxPages, 10),
|
|
7335
|
+
maxDepth: Number.parseInt(options.maxDepth, 10),
|
|
7336
|
+
maxConcurrency: Number.parseInt(options.maxConcurrency, 10),
|
|
7337
|
+
ignoreErrors: options.ignoreErrors,
|
|
7338
|
+
scope: options.scope,
|
|
7339
|
+
followRedirects: options.followRedirects,
|
|
7340
|
+
scrapeMode: options.scrapeMode,
|
|
7341
|
+
includePatterns: Array.isArray(options.includePattern) && options.includePattern.length > 0 ? options.includePattern : void 0,
|
|
7342
|
+
excludePatterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0 ? options.excludePattern : void 0,
|
|
7343
|
+
headers: Object.keys(headers).length > 0 ? headers : void 0
|
|
7344
|
+
}
|
|
7345
|
+
}),
|
|
7346
|
+
(result2) => ({
|
|
7347
|
+
library,
|
|
7348
|
+
// Safe: library names are public
|
|
7349
|
+
url_protocol: extractProtocol(url),
|
|
7350
|
+
// Safe: only protocol, not full URL
|
|
7351
|
+
max_pages: Number.parseInt(options.maxPages, 10),
|
|
7352
|
+
max_depth: Number.parseInt(options.maxDepth, 10),
|
|
7353
|
+
max_concurrency: Number.parseInt(options.maxConcurrency, 10),
|
|
7354
|
+
has_version: !!options.version,
|
|
6027
7355
|
scope: options.scope,
|
|
6028
|
-
|
|
6029
|
-
|
|
6030
|
-
|
|
6031
|
-
|
|
6032
|
-
|
|
6033
|
-
|
|
6034
|
-
|
|
7356
|
+
scrape_mode: options.scrapeMode,
|
|
7357
|
+
ignore_errors: options.ignoreErrors,
|
|
7358
|
+
follow_redirects: options.followRedirects,
|
|
7359
|
+
has_include_patterns: Array.isArray(options.includePattern) && options.includePattern.length > 0,
|
|
7360
|
+
has_exclude_patterns: Array.isArray(options.excludePattern) && options.excludePattern.length > 0,
|
|
7361
|
+
has_custom_headers: Object.keys(headers).length > 0,
|
|
7362
|
+
using_remote_server: !!serverUrl,
|
|
7363
|
+
cli_flags: extractCliFlags(process.argv),
|
|
7364
|
+
is_async_job: !("pagesScraped" in result2),
|
|
7365
|
+
// Pipeline mode vs direct mode
|
|
7366
|
+
pages_scraped: "pagesScraped" in result2 ? result2.pagesScraped : void 0
|
|
7367
|
+
})
|
|
7368
|
+
);
|
|
6035
7369
|
if ("pagesScraped" in result) {
|
|
6036
7370
|
console.log(`✅ Successfully scraped ${result.pagesScraped} pages`);
|
|
6037
7371
|
} else {
|
|
@@ -6113,13 +7447,28 @@ async function searchAction(library, query, options, command) {
|
|
|
6113
7447
|
const docService = await createDocumentManagement({ serverUrl });
|
|
6114
7448
|
try {
|
|
6115
7449
|
const searchTool = new SearchTool(docService);
|
|
6116
|
-
const result = await
|
|
6117
|
-
|
|
6118
|
-
|
|
6119
|
-
|
|
6120
|
-
|
|
6121
|
-
|
|
6122
|
-
|
|
7450
|
+
const result = await trackTool(
|
|
7451
|
+
"search_docs",
|
|
7452
|
+
() => searchTool.execute({
|
|
7453
|
+
library,
|
|
7454
|
+
version: options.version,
|
|
7455
|
+
query,
|
|
7456
|
+
limit: Number.parseInt(options.limit, 10),
|
|
7457
|
+
exactMatch: options.exactMatch
|
|
7458
|
+
}),
|
|
7459
|
+
(result2) => ({
|
|
7460
|
+
library,
|
|
7461
|
+
// Safe: library names are public
|
|
7462
|
+
query_analysis: analyzeSearchQuery(query),
|
|
7463
|
+
// Analyzed, not raw query
|
|
7464
|
+
result_count: result2.results.length,
|
|
7465
|
+
limit_used: Number.parseInt(options.limit, 10),
|
|
7466
|
+
has_version_filter: !!options.version,
|
|
7467
|
+
exact_match: options.exactMatch,
|
|
7468
|
+
using_remote_server: !!serverUrl,
|
|
7469
|
+
cli_flags: extractCliFlags(process.argv)
|
|
7470
|
+
})
|
|
7471
|
+
);
|
|
6123
7472
|
console.log(formatOutput(result.results));
|
|
6124
7473
|
} finally {
|
|
6125
7474
|
await docService.shutdown();
|
|
@@ -6232,11 +7581,28 @@ function createCliProgram() {
|
|
|
6232
7581
|
const program = new Command();
|
|
6233
7582
|
program.name("docs-mcp-server").description("Unified CLI, MCP Server, and Web Interface for Docs MCP Server.").version(packageJson.version).addOption(
|
|
6234
7583
|
new Option("--verbose", "Enable verbose (debug) logging").conflicts("silent")
|
|
6235
|
-
).addOption(new Option("--silent", "Disable all logging except errors")).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
|
|
6236
|
-
program.hook("preAction", (thisCommand,
|
|
7584
|
+
).addOption(new Option("--silent", "Disable all logging except errors")).addOption(new Option("--no-telemetry", "Disable telemetry collection")).enablePositionalOptions().allowExcessArguments(false).showHelpAfterError(true);
|
|
7585
|
+
program.hook("preAction", (thisCommand, actionCommand) => {
|
|
6237
7586
|
const globalOptions = thisCommand.opts();
|
|
6238
7587
|
if (globalOptions.silent) setLogLevel(LogLevel.ERROR);
|
|
6239
7588
|
else if (globalOptions.verbose) setLogLevel(LogLevel.DEBUG);
|
|
7589
|
+
if (shouldEnableTelemetry()) {
|
|
7590
|
+
const commandName = actionCommand.name();
|
|
7591
|
+
const session = createCliSession(commandName, {
|
|
7592
|
+
authEnabled: false,
|
|
7593
|
+
// CLI doesn't use auth
|
|
7594
|
+
readOnly: false
|
|
7595
|
+
});
|
|
7596
|
+
analytics.startSession(session);
|
|
7597
|
+
} else {
|
|
7598
|
+
TelemetryConfig.getInstance().disable();
|
|
7599
|
+
}
|
|
7600
|
+
});
|
|
7601
|
+
program.hook("postAction", async () => {
|
|
7602
|
+
if (analytics.isEnabled()) {
|
|
7603
|
+
analytics.endSession();
|
|
7604
|
+
await analytics.shutdown();
|
|
7605
|
+
}
|
|
6240
7606
|
});
|
|
6241
7607
|
createMcpCommand(program);
|
|
6242
7608
|
createWebCommand(program);
|
|
@@ -6284,6 +7650,10 @@ const sigintHandler = async () => {
|
|
|
6284
7650
|
activeDocService = null;
|
|
6285
7651
|
logger.debug("SIGINT: DocumentManagementService shut down.");
|
|
6286
7652
|
}
|
|
7653
|
+
if (analytics.isEnabled()) {
|
|
7654
|
+
await analytics.shutdown();
|
|
7655
|
+
logger.debug("SIGINT: Analytics shut down.");
|
|
7656
|
+
}
|
|
6287
7657
|
logger.info("✅ Graceful shutdown completed");
|
|
6288
7658
|
process.exit(0);
|
|
6289
7659
|
} catch (error) {
|
|
@@ -6359,6 +7729,7 @@ export {
|
|
|
6359
7729
|
EMBEDDING_BATCH_CHARS as E,
|
|
6360
7730
|
LibraryNotFoundError as L,
|
|
6361
7731
|
StoreError as S,
|
|
7732
|
+
TelemetryEvent as T,
|
|
6362
7733
|
VECTOR_DIMENSION as V,
|
|
6363
7734
|
applyMigrations as a,
|
|
6364
7735
|
EMBEDDING_BATCH_SIZE as b,
|
|
@@ -6368,7 +7739,9 @@ export {
|
|
|
6368
7739
|
SPLITTER_MAX_CHUNK_SIZE as f,
|
|
6369
7740
|
getProjectRoot as g,
|
|
6370
7741
|
VersionNotFoundError as h,
|
|
6371
|
-
|
|
7742
|
+
analytics as i,
|
|
7743
|
+
extractHostname as j,
|
|
7744
|
+
SPLITTER_MIN_CHUNK_SIZE as k,
|
|
6372
7745
|
logger as l,
|
|
6373
7746
|
mapDbDocumentToDocument as m,
|
|
6374
7747
|
normalizeVersionName as n
|